summaryrefslogtreecommitdiff
path: root/upb_parse.h
diff options
context:
space:
mode:
Diffstat (limited to 'upb_parse.h')
-rw-r--r--upb_parse.h137
1 files changed, 137 insertions, 0 deletions
diff --git a/upb_parse.h b/upb_parse.h
new file mode 100644
index 0000000..75752a0
--- /dev/null
+++ b/upb_parse.h
@@ -0,0 +1,137 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * This file contains parsing routines; both stream-oriented and tree-oriented
+ * models are supported.
+ *
+ * Copyright (c) 2008 Joshua Haberman. See LICENSE for details.
+ */
+
+#ifndef UPB_PARSE_H_
+#define UPB_PARSE_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* A deserialized value as described in a .proto file. */
+struct upb_tagged_value {
+ struct upb_field *field;
+ union upb_value v;
+};
+
+/* A value as it is encoded on-the-wire, before it has been interpreted as
+ * any particular .proto type. */
+struct upb_tagged_wire_value {
+ upb_wire_type_t type;
+ union upb_wire_value v;
+};
+
+/* Definition of a single field in a message, for the purposes of the parser's
+ * fieldmap. Note that this does not include nearly all of the information
+ * that can be specified about a field in a .proto file. For example, we don't
+ * even know the field's name. We keep only the information necessary to parse
+ * the field. */
+struct upb_field {
+ upb_field_number_t field_number;
+ int32_t type; /* google_protobuf_FieldDescriptorProto_Type */
+ struct upb_fieldset *fieldset; /* if type == MESSAGE */
+};
+
+struct upb_parse_stack_frame {
+ struct upb_fieldset *fieldset;
+ size_t end_offset; /* unknown for the top frame, so we set to SIZE_MAX */
+};
+
+/* The stream parser's state. */
+struct upb_parse_state {
+ size_t offset;
+ struct upb_parse_stack_frame stack[UPB_MAX_STACK];
+ struct upb_parse_stack_frame *top, *limit;
+};
+
+/* Call this once before parsing to initialize the data structures.
+ * message_type can be NULL, in which case all fields will be reported as
+ * unknown. */
+void upb_init_parser(struct upb_parse_state *state,
+ struct upb_fieldset *toplevel_fieldset);
+
+/* Status as returned by upb_parse(). Status codes <0 are fatal errors
+ * that cannot be recovered. Status codes >0 are unusual but nonfatal events,
+ * which nonetheless must be handled differently since they do not return data
+ * in val. */
+typedef enum upb_status {
+ UPB_STATUS_OK = 0,
+ UPB_STATUS_SUBMESSAGE_END = 1, // No data is stored in val or wv.
+
+ /** FATAL ERRORS: these indicate corruption, and cannot be recovered. */
+
+ // A varint did not terminate before hitting 64 bits.
+ UPB_ERROR_UNTERMINATED_VARINT = -1,
+
+ // A submessage ended in the middle of data.
+ UPB_ERROR_BAD_SUBMESSAGE_END = -2,
+
+ // Encountered a "group" on the wire (deprecated and unsupported).
+ UPB_ERROR_GROUP = -3,
+
+ // Input was nested more than UPB_MAX_NESTING deep.
+ UPB_ERROR_STACK_OVERFLOW = -4,
+
+ // The input data caused the pb's offset (a size_t) to overflow.
+ UPB_ERROR_OVERFLOW = -5,
+
+ /** NONFATAL ERRORS: the input was invalid, but we can continue if desired. */
+
+ // A value was encountered that was not defined in the .proto file. The
+ // unknown value is stored in wv.
+ UPB_ERROR_UNKNOWN_VALUE = 2,
+
+ // A field was encoded with the wrong wire type. The wire value is stored in
+ // wv.
+ UPB_ERROR_MISMATCHED_TYPE = 3,
+} upb_status_t;
+struct upb_parse_state;
+
+/* The main parsing function. Parses the next value from buf, storing the
+ * parsed value in val. If val is of type UPB_TYPE_MESSAGE, then a
+ * submessage was entered.
+ *
+ * IMPORTANT NOTE: for efficiency, the parsing routines do not do bounds checks,
+ * and may read as much as far as buf+10. So the caller must ensure that buf is
+ * not within 10 bytes of unmapped memory, or the program will segfault. Clients
+ * are encouraged to overallocate their buffers by ten bytes to compensate. */
+upb_status_t upb_parse_field(struct upb_parse_state *s,
+ uint8_t *buf,
+ upb_field_number_t *fieldnum,
+ struct upb_tagged_value *val,
+ struct upb_tagged_wire_value *wv);
+
+/* Low-level parsing functions. ***********************************************/
+
+/* Parses a single tag from the character data starting at buf, and updates
+ * buf to point one past the bytes that were consumed. buf will be incremented
+ * by at most ten bytes. */
+upb_status_t parse_tag(uint8_t **buf, struct upb_tag *tag);
+
+/* Parses a wire value with the given type (which must have been obtained from
+ * a tag that was just parsed) and adds the number of bytes that were consumed
+ * to *offset. For delimited types, offset is advanced past the delimited
+ * data. */
+upb_status_t upb_parse_wire_value(uint8_t *buf, size_t *offset,
+ upb_wire_type_t wt,
+ union upb_wire_value *wv);
+
+/* Like the above, but discards the wire value instead of saving it. */
+upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
+ upb_wire_type_t wt);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_PARSE_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback