From d1f78c88faafea7e672c7c45e20f6f040942a92a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 3 Jun 2009 22:06:24 -0700 Subject: A bunch more work, a fast table for field lookup. --- upb_parse.h | 110 +++++++++--------------------------------------------------- 1 file changed, 15 insertions(+), 95 deletions(-) (limited to 'upb_parse.h') diff --git a/upb_parse.h b/upb_parse.h index 75752a0..829e97c 100644 --- a/upb_parse.h +++ b/upb_parse.h @@ -18,99 +18,6 @@ extern "C" { #endif -/* A deserialized value as described in a .proto file. */ -struct upb_tagged_value { - struct upb_field *field; - union upb_value v; -}; - -/* A value as it is encoded on-the-wire, before it has been interpreted as - * any particular .proto type. */ -struct upb_tagged_wire_value { - upb_wire_type_t type; - union upb_wire_value v; -}; - -/* Definition of a single field in a message, for the purposes of the parser's - * fieldmap. Note that this does not include nearly all of the information - * that can be specified about a field in a .proto file. For example, we don't - * even know the field's name. We keep only the information necessary to parse - * the field. */ -struct upb_field { - upb_field_number_t field_number; - int32_t type; /* google_protobuf_FieldDescriptorProto_Type */ - struct upb_fieldset *fieldset; /* if type == MESSAGE */ -}; - -struct upb_parse_stack_frame { - struct upb_fieldset *fieldset; - size_t end_offset; /* unknown for the top frame, so we set to SIZE_MAX */ -}; - -/* The stream parser's state. */ -struct upb_parse_state { - size_t offset; - struct upb_parse_stack_frame stack[UPB_MAX_STACK]; - struct upb_parse_stack_frame *top, *limit; -}; - -/* Call this once before parsing to initialize the data structures. - * message_type can be NULL, in which case all fields will be reported as - * unknown. */ -void upb_init_parser(struct upb_parse_state *state, - struct upb_fieldset *toplevel_fieldset); - -/* Status as returned by upb_parse(). Status codes <0 are fatal errors - * that cannot be recovered. Status codes >0 are unusual but nonfatal events, - * which nonetheless must be handled differently since they do not return data - * in val. */ -typedef enum upb_status { - UPB_STATUS_OK = 0, - UPB_STATUS_SUBMESSAGE_END = 1, // No data is stored in val or wv. - - /** FATAL ERRORS: these indicate corruption, and cannot be recovered. */ - - // A varint did not terminate before hitting 64 bits. - UPB_ERROR_UNTERMINATED_VARINT = -1, - - // A submessage ended in the middle of data. - UPB_ERROR_BAD_SUBMESSAGE_END = -2, - - // Encountered a "group" on the wire (deprecated and unsupported). - UPB_ERROR_GROUP = -3, - - // Input was nested more than UPB_MAX_NESTING deep. - UPB_ERROR_STACK_OVERFLOW = -4, - - // The input data caused the pb's offset (a size_t) to overflow. - UPB_ERROR_OVERFLOW = -5, - - /** NONFATAL ERRORS: the input was invalid, but we can continue if desired. */ - - // A value was encountered that was not defined in the .proto file. The - // unknown value is stored in wv. - UPB_ERROR_UNKNOWN_VALUE = 2, - - // A field was encoded with the wrong wire type. The wire value is stored in - // wv. - UPB_ERROR_MISMATCHED_TYPE = 3, -} upb_status_t; -struct upb_parse_state; - -/* The main parsing function. Parses the next value from buf, storing the - * parsed value in val. If val is of type UPB_TYPE_MESSAGE, then a - * submessage was entered. - * - * IMPORTANT NOTE: for efficiency, the parsing routines do not do bounds checks, - * and may read as much as far as buf+10. So the caller must ensure that buf is - * not within 10 bytes of unmapped memory, or the program will segfault. Clients - * are encouraged to overallocate their buffers by ten bytes to compensate. */ -upb_status_t upb_parse_field(struct upb_parse_state *s, - uint8_t *buf, - upb_field_number_t *fieldnum, - struct upb_tagged_value *val, - struct upb_tagged_wire_value *wv); - /* Low-level parsing functions. ***********************************************/ /* Parses a single tag from the character data starting at buf, and updates @@ -118,6 +25,19 @@ upb_status_t upb_parse_field(struct upb_parse_state *s, * by at most ten bytes. */ upb_status_t parse_tag(uint8_t **buf, struct upb_tag *tag); +extern upb_wire_type_t upb_expected_wire_types[]; +/* Returns true if wt is the correct on-the-wire type for ft. */ +inline bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { + return upb_expected_wire_types[ft] == wt; +} + +/* Parses and converts a value from the character data starting at buf. The + * caller must have previously checked that the wire type is appropriate for + * this field type. For delimited data, buf is advanced to the beginning of + * the delimited data, not the end. */ +upb_status_t upb_parse_value(uint8_t **buf, upb_field_type_t ft, + union upb_value *value); + /* Parses a wire value with the given type (which must have been obtained from * a tag that was just parsed) and adds the number of bytes that were consumed * to *offset. For delimited types, offset is advanced past the delimited @@ -127,8 +47,8 @@ upb_status_t upb_parse_wire_value(uint8_t *buf, size_t *offset, union upb_wire_value *wv); /* Like the above, but discards the wire value instead of saving it. */ -upb_status_t skip_wire_value(uint8_t *buf, size_t *offset, - upb_wire_type_t wt); +upb_status_t upb_skip_wire_value(uint8_t *buf, size_t *offset, + upb_wire_type_t wt); #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3