From 5e2691460e9fb2ec9b77c1f9d133ae6b667afc3a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 28 Jun 2009 15:41:53 -0700 Subject: Ditch abbreviated field business. --- upb_context.c | 5 ++-- upb_msg.c | 68 +++++++++++++++++++++++++++++++++++++++++++----------- upb_msg.h | 74 +++++++++++++++++++++++++++++------------------------------ upb_parse.c | 17 ++++++++++++++ 4 files changed, 109 insertions(+), 55 deletions(-) diff --git a/upb_context.c b/upb_context.c index e2c7aa9..79ce20d 100644 --- a/upb_context.c +++ b/upb_context.c @@ -185,7 +185,6 @@ bool upb_context_addfd(struct upb_context *c, (fd->set_flags.has.service ? fd->service->len : 0); upb_strtable_init(&tmp, symcount, sizeof(struct upb_symtab_entry)); - /* TODO: properly handle redefinitions and unresolvable symbols. */ if(fd->set_flags.has.message_type) for(unsigned int i = 0; i < fd->message_type->len; i++) if(!insert_message(&tmp, fd->message_type->elements[i], &package)) @@ -207,7 +206,7 @@ bool upb_context_addfd(struct upb_context *c, struct upb_msg *m = e->ref.msg; for(unsigned int i = 0; i < m->num_fields; i++) { struct upb_msg_field *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = f->descriptor; + google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; union upb_symbol_ref ref; if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) ref = resolve2(&c->symtab, &tmp, &e->e.key, fd->name, UPB_SYM_MESSAGE); @@ -215,7 +214,7 @@ bool upb_context_addfd(struct upb_context *c, ref = resolve2(&c->symtab, &tmp, &e->e.key, fd->name, UPB_SYM_ENUM); else continue; /* No resolving necessary. */ - if(!ref.msg) goto error; + if(!ref.msg) goto error; /* Ref. to undefined symbol. */ upb_msg_ref(m, f, ref); } } diff --git a/upb_msg.c b/upb_msg.c index 0517cf0..f2ad6c4 100644 --- a/upb_msg.c +++ b/upb_msg.c @@ -6,6 +6,7 @@ #include #include "descriptor.h" #include "upb_msg.h" +#include "upb_parse.h" #define ALIGN_UP(p, t) (t + ((p - 1) & (~t - 1))) @@ -17,9 +18,7 @@ static int div_round_up(int numerator, int denominator) { } static int compare_fields(const void *e1, const void *e2) { - const struct upb_msg_field *f1 = e1, *f2 = e2; - const google_protobuf_FieldDescriptorProto *fd1 = f1->descriptor; - const google_protobuf_FieldDescriptorProto *fd2 = f2->descriptor; + const google_protobuf_FieldDescriptorProto *fd1 = e1, *fd2 = e2; /* Required fields go before non-required. */ bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; @@ -48,18 +47,19 @@ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d) m->num_required_fields = 0; m->size = m->set_flags_bytes; - m->fields = malloc(sizeof(struct upb_msg_field) * m->num_fields); + m->fields = malloc(sizeof(*m->fields) * m->num_fields); + m->field_descriptors = malloc(sizeof(*m->field_descriptors) * m->num_fields); for(unsigned int i = 0; i < m->num_fields; i++) { /* We count on the caller to keep this pointer alive. */ - m->fields[i].descriptor = d->field->elements[i]; + m->field_descriptors[i] = d->field->elements[i]; } - qsort(m->fields, m->num_fields, sizeof(struct upb_msg_field), compare_fields); + qsort(m->field_descriptors, m->num_fields, sizeof(void*), compare_fields); size_t max_align = 0; for(unsigned int i = 0; i < m->num_fields; i++) { struct upb_msg_field *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = f->descriptor; + google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; struct upb_type_info *type_info = &upb_type_info[fd->type]; /* General alignment rules are: each member must be at an address that is a @@ -72,14 +72,11 @@ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d) if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) m->num_required_fields++; - /* Insert into the tables. Note that af->ref will be uninitialized, even in - * the tables' copies of *af, which is why we must update them separately + /* Insert into the tables. Note that f->ref will be uninitialized, even in + * the tables' copies of *f, which is why we must update them separately * when the references are resolved. */ - struct upb_abbrev_msg_field af = {.byte_offset = f->byte_offset, - .field_index = f->field_index, - .type = fd->type}; - struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = af}; - struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = af}; + struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f}; + struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f}; upb_inttable_insert(&m->fields_by_num, &nument.e); upb_strtable_insert(&m->fields_by_name, &strent.e); } @@ -94,3 +91,46 @@ void upb_msg_free(struct upb_msg *m) upb_strtable_free(&m->fields_by_name); free(m->fields); } + +#if 0 +struct parse_frame_data { + struct upb_msg *m; + void *data; +}; + +static void set_frame_data(struct upb_parse_state *s, struct upb_msg *m) +{ +} + +static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag, + void **user_field_desc) +{ + struct upb_msg *m = (struct upb_msg*)s->top->user_data; + struct upb_msg_field *f = upb_msg_fieldbynum(m, tag->field_number); + if(!f || !upb_check_type(tag->wire_type, f->type)) + return 0; /* Skip unknown or fields of the wrong type. */ + *user_field_desc = f->ref.msg; + return f->type; +} + +static void value_cb(struct upb_parse_state *s, union upb_value *v, + void *str, void *user_field_desc) +{ + *user_field_desc = f->ref.msg; +} + +static void submsg_start_cb(struct upb_parse_state *s, void *user_field_desc) +{ + set_frame_data(s, user_field_desc); +} + +void *upb_msg_parse(struct upb_msg *m, struct upb_string *str) +{ + struct upb_parse_state s; + upb_parse_state_init(&s, sizeof(struct parse_frame_data)); + set_frame_data(&s, m); + s.tag_cb = tag_cb; + s.value_cb = value_cb; + s.submsg_start_cb = submsg_start_cb; +} +#endif diff --git a/upb_msg.h b/upb_msg.h index 70d4405..9608e64 100644 --- a/upb_msg.h +++ b/upb_msg.h @@ -4,8 +4,11 @@ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. * * upb_msg contains a full description of a message as defined in a .proto file. - * This allows for run-time reflection over .proto types, but also defines an - * in-memory byte-level format for storing protobufs. + * It supports many features and operations for dealing with proto messages: + * - reflection over .proto types at runtime (list fields, get names, etc). + * - an in-memory byte-level format for efficiently storing and accessing msgs. + * - serializing and deserializing from the in-memory format to a protobuf. + * - optional memory management for handling strings, arrays, and submessages. * * The in-memory format is very much like a C struct that you can define at * run-time, but also supports reflection. Like C structs it supports @@ -25,9 +28,12 @@ * this format. This format is designed to allow the fastest possible random * access of individual fields. * - * Note that no memory management is defined, which should make it easier to - * integrate this format with existing memory-management schemes. Any memory - * management semantics can be used with the format as defined here. + * Note that clients need not use the memory management facilities defined here. + * They are for convenience only -- clients wishing to do their own memory + * management may do so (allowing clients to perform advanced techniques like + * reference-counting, garbage collection, and string references). Different + * clients can read each others messages regardless of what memory management + * scheme each is using. */ #ifndef UPB_MSG_H_ @@ -49,13 +55,13 @@ extern "C" { struct google_protobuf_DescriptorProto; struct google_protobuf_FieldDescriptorProto; -/* Structure definition. ******************************************************/ +/* Message definition. ********************************************************/ /* Structure that describes a single field in a message. */ struct upb_msg_field { - struct google_protobuf_FieldDescriptorProto *descriptor; uint32_t byte_offset; /* Where to find the data. */ uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ + upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ union upb_symbol_ref ref; }; @@ -69,62 +75,53 @@ struct upb_msg { struct upb_inttable fields_by_num; struct upb_strtable fields_by_name; struct upb_msg_field *fields; + struct google_protobuf_FieldDescriptorProto **field_descriptors; }; /* The num->field and name->field maps in upb_msg allow fast lookup of fields * by number or name. These lookups are in the critical path of parsing and * field lookup, so they must be as fast as possible. To make these more - * cache-friendly, we put the data in the table by value, but use only an - * abbreviated set of data (ie. not all the data in upb_msg_field). Notably, - * we don't include the pointer to the field descriptor. But the upb_msg_field - * can be retrieved in its entirety using the function below.*/ - -struct upb_abbrev_msg_field { - uint32_t byte_offset; /* Where to find the data. */ - uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ - upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ - union upb_symbol_ref ref; -}; + * cache-friendly, we put the data in the table by value. */ struct upb_fieldsbynum_entry { struct upb_inttable_entry e; - struct upb_abbrev_msg_field f; + struct upb_msg_field f; }; struct upb_fieldsbyname_entry { struct upb_strtable_entry e; - struct upb_abbrev_msg_field f; + struct upb_msg_field f; }; -INLINE struct upb_msg_field *upb_get_msg_field( - struct upb_abbrev_msg_field *f, struct upb_msg *m) { - return &m->fields[f->field_index]; +/* Can be used to retrieve a field descriptor given the upb_msg_field ref. */ +INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( + struct upb_msg_field *f, struct upb_msg *m) { + return m->field_descriptors[f->field_index]; } /* Initialize and free a upb_msg. Caller retains ownership of d, but the msg * will contain references to it, so it must outlive the msg. Note that init - * does not resolve upb_msg_field.ref -- that is left to the caller. */ + * does not resolve upb_msg_field.ref -- the caller should do that + * post-initialization by calling upb_msg_ref() below. */ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d); void upb_msg_free(struct upb_msg *m); /* Clients use this function on a previously initialized upb_msg to resolve the - * "ref" field in the upb_msg_field and upb_abbrev_msg_field. Since messages - * can refer to each other in mutually-recursive ways, this step must be - * separated from initialization. The function is necessary because there are - * multiple internal maps in which the ref appears. */ + * "ref" field in the upb_msg_field. Since messages can refer to each other in + * mutually-recursive ways, this step must be separated from initialization. */ void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, union upb_symbol_ref ref); /* While these are written to be as fast as possible, it will still be faster * to cache the results of this lookup if possible. These return NULL if no * such field is found. */ -INLINE struct upb_abbrev_msg_field *upb_msg_fieldbynum(struct upb_msg *m, - uint32_t number) { +INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m, + uint32_t number) { struct upb_fieldsbynum_entry *e = upb_inttable_lookup( &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); return e ? &e->f : NULL; } -INLINE struct upb_abbrev_msg_field *upb_msg_fieldbyname(struct upb_msg *m, - struct upb_string *name) { +INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m, + struct upb_string *name) { struct upb_fieldsbyname_entry *e = upb_strtable_lookup(&m->fields_by_name, name); return e ? &e->f : NULL; @@ -179,7 +176,7 @@ UPB_DEFINE_PRIMITIVE_ARRAY(bool, bool) /* For each primitive type we define a set of six functions: * - * // For fetching out of a struct (s points to the raw struct data). + * // For fetching out of a msg (s points to the raw msg data). * int32_t *upb_msg_get_int32_ptr(void *s, struct upb_msg_field *f); * int32_t upb_msg_get_int32(void *s, struct upb_msg_field *f); * void upb_msg_set_int32(void *s, struct upb_msg_field *f, int32_t val); @@ -232,9 +229,11 @@ UPB_DEFINE_ALL_ACCESSORS(uint64_t, uint64, INLINE) UPB_DEFINE_ALL_ACCESSORS(bool, bool, INLINE) UPB_DEFINE_ALL_ACCESSORS(struct upb_string*, bytes, INLINE) UPB_DEFINE_ALL_ACCESSORS(struct upb_string*, string, INLINE) -UPB_DEFINE_ALL_ACCESSORS(void*, substruct, INLINE) +UPB_DEFINE_ALL_ACCESSORS(void*, submsg, INLINE) UPB_DEFINE_ACCESSORS(struct upb_array*, array, INLINE) +/* "Set" flag reading and writing. *******************************************/ + INLINE size_t upb_isset_offset(uint32_t field_index) { return field_index / 8; } @@ -243,10 +242,9 @@ INLINE uint8_t upb_isset_mask(uint32_t field_index) { return 1 << (field_index % 8); } -/* Functions for reading and writing the "set" flags in the pbstruct. Note - * that these do not perform any memory management associated with any dynamic - * memory these fields may be referencing; that is the client's responsibility. - * These *only* set and test the flags. */ +/* Functions for reading and writing the "set" flags in the msg. Note that + * these do not perform memory management associated with any dynamic memory + * these fields may be referencing. These *only* set and test the flags. */ INLINE void upb_msg_set(void *s, struct upb_msg_field *f) { ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index); diff --git a/upb_parse.c b/upb_parse.c index 2b9c875..57cca2b 100644 --- a/upb_parse.c +++ b/upb_parse.c @@ -8,6 +8,7 @@ #include #include +#include #include #include "descriptor.h" @@ -266,6 +267,22 @@ upb_status_t upb_parse_value(void **b, upb_field_type_t ft, #undef CASE } +void upb_parse_state_init(struct upb_parse_state *state, size_t udata_size) +{ + state->offset = 0; + size_t stack_bytes = (sizeof(*state->stack) + udata_size) * UPB_MAX_NESTING; + state->stack = state->top = malloc(stack_bytes); + state->limit = (struct upb_parse_stack_frame*)((char*)state->stack + stack_bytes); + state->udata_size = udata_size; + state->done = false; + state->packed_end_offset = 0; +} + +void upb_parse_state_free(struct upb_parse_state *state) +{ + free(state->stack); +} + static void pop_stack_frame(struct upb_parse_state *s) { s->submsg_end_cb(s); -- cgit v1.2.3