From 73978bfc74666fb13c6e65f8c9c4cec90de24236 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 25 Feb 2009 17:39:49 -0800 Subject: API cleanup. --- pbstream.c | 32 +++++++++---------- pbstream.h | 104 ++++++++++++++++++++++++++++++------------------------------- 2 files changed, 67 insertions(+), 69 deletions(-) diff --git a/pbstream.c b/pbstream.c index 441b216..c8711e7 100644 --- a/pbstream.c +++ b/pbstream.c @@ -190,7 +190,7 @@ static pbstream_status_t get_MESSAGE(struct pbstream_parse_state *s, char *buf, s->top = s->base + cur_size; s->limit = s->base + new_size; } - s->top->message_descriptor = d->field_descriptor->message; + s->top->fieldset = d->field->fieldset; s->top->end_offset = d->v.delimited.offset + d->v.delimited.len; return PBSTREAM_STATUS_OK; } @@ -250,19 +250,20 @@ static pbstream_status_t parse_unknown_value( return PBSTREAM_STATUS_OK; } -static struct pbstream_field_descriptor *find_field( - struct pbstream_message_descriptor* md, - pbstream_field_number_t field_number) +static struct pbstream_field *find_field(struct pbstream_fieldset* fs, + pbstream_field_number_t num) { - /* TODO */ - return NULL; + /* TODO: a hybrid array/hashtable structure. */ + if(num < fs->num_fields) return &fs->fields[num]; + else return NULL; } -/* Parses and processes the next value from buf (but not past end). */ -pbstream_status_t parse_field(struct pbstream_parse_state *s, char *buf, - pbstream_field_number_t *fieldnum, - struct pbstream_value *val, - struct pbstream_wire_value *wv) +/* Parses and processes the next value from buf. */ +pbstream_status_t pbstream_parse_field(struct pbstream_parse_state *s, + char *buf, + pbstream_field_number_t *fieldnum, + struct pbstream_value *val, + struct pbstream_wire_value *wv) { char *b = buf; /* Check for end-of-message at the current stack depth. */ @@ -277,8 +278,7 @@ pbstream_status_t parse_field(struct pbstream_parse_state *s, char *buf, struct pbstream_tag tag; CHECK(parse_tag(&b, &tag)); size_t val_offset = s->offset + (b-buf); - struct pbstream_field_descriptor *fd = find_field(s->top->message_descriptor, - tag.field_number); + struct pbstream_field *fd = find_field(s->top->fieldset, tag.field_number); pbstream_status_t unknown_value_status; if(unlikely(!fd)) { unknown_value_status = PBSTREAM_ERROR_UNKNOWN_VALUE; @@ -291,7 +291,7 @@ pbstream_status_t parse_field(struct pbstream_parse_state *s, char *buf, } *fieldnum = tag.field_number; - val->field_descriptor = fd; + val->field = fd; CHECK(info->get(s, b, val)); return PBSTREAM_STATUS_OK; @@ -304,7 +304,7 @@ unknown_value: void pbstream_init_parser( struct pbstream_parse_state *state, - struct pbstream_message_descriptor *message_descriptor, + struct pbstream_fieldset *toplevel_fieldset, void *user_data) { state->offset = 0; @@ -313,6 +313,6 @@ void pbstream_init_parser( const int initial_stack = 20; state->top = state->base = malloc(sizeof(*state->base) * initial_stack); state->limit = state->base + initial_stack; - state->top->message_descriptor = message_descriptor; + state->top->fieldset = toplevel_fieldset; state->top->end_offset = SIZE_MAX; } diff --git a/pbstream.h b/pbstream.h index 9277843..2713974 100644 --- a/pbstream.h +++ b/pbstream.h @@ -41,9 +41,8 @@ typedef enum pbstream_wire_type { typedef int32_t pbstream_field_number_t; /* A deserialized value as described in a .proto file. */ -struct pbstream_field_descriptor; struct pbstream_value { - struct pbstream_field_descriptor *field_descriptor; + struct pbstream_field *field; union { double _double; float _float; @@ -60,6 +59,7 @@ struct pbstream_value { } v; }; +/* A tag occurs before each value on-the-wire. */ struct pbstream_tag { pbstream_field_number_t field_number; pbstream_wire_type_t wire_type; @@ -79,30 +79,47 @@ struct pbstream_wire_value { } v; }; -/* The definition of a field as defined in a pbstream (within a message). - * For example: - * required int32 a = 1; - */ -struct pbstream_field_descriptor { +/* Definition of a single field in a message. */ +struct pbstream_field { pbstream_field_number_t field_number; pbstream_type_t type; - struct pbstream_message_descriptor *message; /* if type == MESSAGE */ + struct pbstream_fieldset *fieldset; /* if type == MESSAGE */ +}; + +/* The set of fields corresponding to a message definition. */ +struct pbstream_fieldset { + /* TODO: a hybrid array/hashtable structure. */ + int num_fields; + struct pbstream_field fields[]; }; -/* A message as defined by the "message" construct in a .proto file. */ -typedef int pbstream_fieldset_t; /* TODO */ -struct pbstream_message_descriptor { - pbstream_fieldset_t fieldset; +struct pbstream_parse_stack_frame { + struct pbstream_fieldset *fieldset; + size_t end_offset; /* unknown for the top frame, so we set to SIZE_MAX */ }; -/* Callback for when an error occurred. - * The description is a static buffer which the client must not free. The - * offset is the location in the input where the error was detected (this - * offset is relative to the beginning of the stream). If is_fatal is true, - * parsing cannot continue. */ +/* The stream parser's state. */ +struct pbstream_parse_state { + size_t offset; + void *user_data; + struct pbstream_parse_stack_frame *base, *top, *limit; +}; + +/* Call this once before parsing to initialize the data structures. + * message_type can be NULL, in which case all fields will be reported as + * unknown. */ +void pbstream_init_parser( + struct pbstream_parse_state *state, + struct pbstream_fieldset *toplevel_fieldset, + void *user_data); + +/* Status as returned by pbstream_parse(). Status codes <0 are fatal errors + * that cannot be recovered. Status codes >0 are unusual but nonfatal events, + * which nonetheless must be handled differently since they do not return data + * in val. */ typedef enum pbstream_status { PBSTREAM_STATUS_OK = 0, - PBSTREAM_STATUS_SUBMESSAGE_END = 1, + PBSTREAM_STATUS_SUBMESSAGE_END = 1, // No data is stored in val or wv. /** FATAL ERRORS: these indicate corruption, and cannot be recovered. */ @@ -117,45 +134,26 @@ typedef enum pbstream_status { /** NONFATAL ERRORS: the input was invalid, but we can continue if desired. */ - // A value was encountered that was not defined in the .proto file. + // A value was encountered that was not defined in the .proto file. The + // unknown value is stored in wv. PBSTREAM_ERROR_UNKNOWN_VALUE = 2, - // A field was encoded with the wrong wire type. + // A field was encoded with the wrong wire type. The wire value is stored in + // wv. PBSTREAM_ERROR_MISMATCHED_TYPE = 3, } pbstream_status_t; struct pbstream_parse_state; -struct pbstream_parse_stack_frame { - struct pbstream_message_descriptor *message_descriptor; - size_t end_offset; /* unknown for the top frame, so we set to SIZE_MAX */ -}; - -/* The stream parser's state. */ -struct pbstream_parse_state { - size_t offset; - void *user_data; - struct pbstream_parse_stack_frame *base, *top, *limit; -}; - -/* Call this once before parsing to initialize the data structures. - * message_type can be NULL, in which case all fields will be reported as - * unknown. */ -void pbstream_init_parser( - struct pbstream_parse_state *state, - struct pbstream_message_descriptor *message_descriptor, - void *user_data); - -/* Call this to parse as much of buf as possible, calling callbacks as - * appropriate. buf need not be a complete pbstream. Returns the number of - * bytes consumed. In subsequent calls, buf should point to the first byte not - * consumed by previous calls. +/* The main parsing function. Parses the next value from buf, storing the + * parsed value in val. If val is of type PBSTREAM_TYPE_MESSAGE, then a + * submessage was entered. * - * If need_more_bytes is non-zero when parse() returns, this indicates that the - * beginning of a string or sub-message was recognized, but not all bytes of - * the string were in memory. The string will not be successfully parsed (and - * thus parsing of the pbstream cannot proceed) unless need_more_bytes more - * data is available upon the next call to parse. The caller may need to - * increase its buffer size. */ - -pbstream_status_t pbstream_parse(struct pbstream_parse_state *state, - char *buf, int buf_len, int buf_offset); + * IMPORTANT NOTE: for efficiency, the parsing routines do not do bounds checks, + * and may read as much as far as buf+10. So the caller must ensure that buf is + * not within 10 bytes of unmapped memory, or the program will segfault. Clients + * are encouraged to overallocate their buffers by ten bytes to compensate. */ +pbstream_status_t pbstream_parse_field(struct pbstream_parse_state *s, + char *buf, + pbstream_field_number_t *fieldnum, + struct pbstream_value *val, + struct pbstream_wire_value *wv); -- cgit v1.2.3