From 9e3f5e343b8a729331dd6448bddb9150ae60d63c Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 26 Nov 2009 20:03:07 -0800 Subject: Make upb_msgdef own all its data. This is in anticipation of making upb_msgdef's easy to dup. This involved removing all traces of any descriptors from the defs. --- src/upb.h | 3 +++ src/upb_context.c | 14 ++++++-------- src/upb_def.c | 32 +++++++++++++++++++------------- src/upb_def.h | 45 ++++++++++++++++++++------------------------- src/upb_msg.c | 54 ++++++++++++++++++++++++++---------------------------- src/upb_parse.c | 6 +++--- src/upb_text.c | 44 +++++++++++++++++++++----------------------- 7 files changed, 98 insertions(+), 100 deletions(-) (limited to 'src') diff --git a/src/upb.h b/src/upb.h index cc09ab1..092ea66 100644 --- a/src/upb.h +++ b/src/upb.h @@ -223,6 +223,9 @@ union upb_symbol_ref { struct upb_msgdef *msg; struct upb_enumdef *_enum; struct upb_svc *svc; + + /* Used only temporarily before a reference has been resolved. */ + struct upb_string *str; }; // Status codes used as a return value. Codes >0 are not fatal and can be diff --git a/src/upb_context.c b/src/upb_context.c index 2d558d9..0e53ad1 100644 --- a/src/upb_context.c +++ b/src/upb_context.c @@ -243,7 +243,7 @@ static void insert_message(struct upb_strtable *t, e.e.key = fqname; e.type = UPB_SYM_MESSAGE; e.ref.msg = malloc(sizeof(*e.ref.msg)); - upb_msgdef_init(e.ref.msg, d, fqname, sort, c, status); + upb_msgdef_init(e.ref.msg, d, &fqname, sort, c, status); if(!upb_ok(status)) { free(fqname.ptr); return; @@ -292,14 +292,12 @@ void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, struct upb_msgdef *m = e->ref.msg; for(unsigned int i = 0; i < m->num_fields; i++) { struct upb_fielddef *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; union upb_symbol_ref ref; - if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE || - fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) - ref = resolve2(existingdefs, addto, &e->e.key, fd->type_name, + if(f->type == UPB_TYPENUM(MESSAGE) || f->type == UPB_TYPENUM(GROUP)) + ref = resolve2(existingdefs, addto, &e->e.key, f->ref.str, UPB_SYM_MESSAGE); - else if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM) - ref = resolve2(existingdefs, addto, &e->e.key, fd->type_name, + else if(f->type == UPB_TYPENUM(ENUM)) + ref = resolve2(existingdefs, addto, &e->e.key, f->ref.str, UPB_SYM_ENUM); else continue; /* No resolving necessary. */ @@ -307,7 +305,7 @@ void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, upb_seterr(status, UPB_STATUS_ERROR, "could not resolve symbol '" UPB_STRFMT "'" " in context '" UPB_STRFMT "'", - UPB_STRARG(fd->type_name), UPB_STRARG(&e->e.key)); + UPB_STRARG(f->ref.str), UPB_STRARG(&e->e.key)); return; } upb_msgdef_setref(m, f, ref); diff --git a/src/upb_def.c b/src/upb_def.c index 00a0610..e130563 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -37,7 +37,7 @@ void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num) } void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, - struct upb_string fqname, bool sort, struct upb_context *c, + struct upb_string *fqname, bool sort, struct upb_context *c, struct upb_status *status) { (void)status; // Nothing that can fail at the moment. @@ -47,8 +47,7 @@ void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, upb_strtable_init(&m->fields_by_name, num_fields, sizeof(struct upb_fieldsbyname_entry)); - m->descriptor = d; - m->fqname = fqname; + m->fqname = upb_strdup(fqname); m->context = c; m->num_fields = num_fields; m->set_flags_bytes = div_round_up(m->num_fields, 8); @@ -57,17 +56,20 @@ void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, m->size = m->set_flags_bytes; m->fields = malloc(sizeof(*m->fields) * m->num_fields); - m->field_descriptors = malloc(sizeof(*m->field_descriptors) * m->num_fields); + + /* Create a sorted list of the fields. */ + google_protobuf_FieldDescriptorProto **fds = + malloc(sizeof(*fds) * m->num_fields); for(unsigned int i = 0; i < m->num_fields; i++) { /* We count on the caller to keep this pointer alive. */ - m->field_descriptors[i] = d->field->elements[i]; + fds[i] = d->field->elements[i]; } - if(sort) upb_msgdef_sortfds(m->field_descriptors, m->num_fields); + if(sort) upb_msgdef_sortfds(fds, m->num_fields); size_t max_align = 0; for(unsigned int i = 0; i < m->num_fields; i++) { struct upb_fielddef *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; + google_protobuf_FieldDescriptorProto *fd = fds[i]; struct upb_type_info *type_info = &upb_type_info[fd->type]; /* General alignment rules are: each member must be at an address that is a @@ -77,6 +79,9 @@ void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, f->byte_offset = ALIGN_UP(m->size, type_info->align); f->type = fd->type; f->label = fd->label; + f->number = fd->number; + f->name = upb_strdup(fd->name); + f->ref.str = fd->type_name; m->size = f->byte_offset + type_info->size; max_align = UPB_MAX(max_align, type_info->align); if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) @@ -93,24 +98,26 @@ void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, if(max_align > 0) m->size = ALIGN_UP(m->size, max_align); + free(fds); } void upb_msgdef_free(struct upb_msgdef *m) { upb_inttable_free(&m->fields_by_num); upb_strtable_free(&m->fields_by_name); + upb_string_unref(m->fqname); + for (unsigned int i = 0; i < m->num_fields; i++) { + upb_string_unref(m->fields[i].name); + } free(m->fields); - free(m->field_descriptors); } void upb_msgdef_setref(struct upb_msgdef *m, struct upb_fielddef *f, union upb_symbol_ref ref) { - struct google_protobuf_FieldDescriptorProto *d = - upb_msg_field_descriptor(f, m); struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup( - &m->fields_by_num, d->number, sizeof(struct upb_fieldsbynum_entry)); + &m->fields_by_num, f->number, sizeof(struct upb_fieldsbynum_entry)); struct upb_fieldsbyname_entry *str_e = - upb_strtable_lookup(&m->fields_by_name, d->name); + upb_strtable_lookup(&m->fields_by_name, f->name); assert(int_e && str_e); f->ref = ref; int_e->f.ref = ref; @@ -122,7 +129,6 @@ void upb_enumdef_init(struct upb_enumdef *e, struct google_protobuf_EnumDescriptorProto *ed, struct upb_context *c) { int num_values = ed->set_flags.has.value ? ed->value->len : 0; - e->descriptor = ed; e->context = c; upb_atomic_refcount_init(&e->refcount, 0); upb_strtable_init(&e->nametoint, num_values, diff --git a/src/upb_def.h b/src/upb_def.h index 8cbaab6..b98eb08 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -9,6 +9,8 @@ * - upb_enumdef: describes an enum. * (TODO: descriptions of extensions and services). * + * Defs are immutable and reference-counted. + * * This file contains routines for creating and manipulating the definitions * themselves. To create and manipulate actual messages, see upb_msg.h. */ @@ -27,12 +29,14 @@ extern "C" { struct upb_fielddef; struct upb_context; +struct google_protobuf_EnumDescriptorProto; +struct google_protobuf_DescriptorProto; +struct google_protobuf_FieldDescriptorProto; /* Structure that describes a single .proto message type. */ struct upb_msgdef { struct upb_context *context; struct upb_msg *default_msg; /* Message with all default values set. */ - struct google_protobuf_DescriptorProto *descriptor; - struct upb_string fqname; /* Fully qualified. */ + struct upb_string *fqname; /* Fully qualified. */ size_t size; uint32_t num_fields; uint32_t set_flags_bytes; @@ -40,19 +44,20 @@ struct upb_msgdef { struct upb_inttable fields_by_num; struct upb_strtable fields_by_name; struct upb_fielddef *fields; - struct google_protobuf_FieldDescriptorProto **field_descriptors; }; -/* Structure that describes a single field in a message. This structure is very - * consciously designed to fit into 12/16 bytes (32/64 bit, respectively), - * because copies of this struct are in the hash table that is read in the - * critical path of parsing. Minimizing the size of this struct increases - * cache-friendliness. */ +/* Structure that describes a single field in a message. */ struct upb_fielddef { union upb_symbol_ref ref; uint32_t byte_offset; /* Where to find the data. */ - uint16_t field_index; /* Indexes upb_msgdef.fields and indicates set bit */ - upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ + uint16_t field_index; /* Indicates set bit. */ + + /* TODO: Performance test whether it's better to move the name and number + * into an array in upb_msgdef, indexed by field_index. */ + upb_field_number_t number; + struct upb_string *name; + + upb_field_type_t type; upb_label_t label; }; @@ -89,12 +94,6 @@ INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_fielddef *f) { else return -1; } -/* Can be used to retrieve a field descriptor given the upb_fielddef. */ -INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( - struct upb_fielddef *f, struct upb_msgdef *m) { - return m->field_descriptors[f->field_index]; -} - /* Number->field and name->field lookup. *************************************/ /* The num->field and name->field maps in upb_msgdef allow fast lookup of fields @@ -136,7 +135,6 @@ INLINE struct upb_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m, struct upb_enumdef { upb_atomic_refcount_t refcount; struct upb_context *context; - struct google_protobuf_EnumDescriptorProto *descriptor; struct upb_strtable nametoint; struct upb_inttable inttoname; }; @@ -154,8 +152,8 @@ struct upb_enumdef_iton_entry { /* Initializes and frees an enum, respectively. Caller retains ownership of * ed, but it must outlive e. */ void upb_enumdef_init(struct upb_enumdef *e, - struct google_protobuf_EnumDescriptorProto *ed, - struct upb_context *c); + struct google_protobuf_EnumDescriptorProto *ed, + struct upb_context *c); void upb_enumdef_free(struct upb_enumdef *e); @@ -164,21 +162,18 @@ void upb_enumdef_free(struct upb_enumdef *e); /* Initializes/frees a upb_msgdef. Usually this will be called by upb_context, * and clients will not have to construct one directly. * - * Caller retains ownership of d, but the msg will contain references to it, so - * it must outlive the msg. Note that init does not resolve + * Caller retains ownership of d. Note that init does not resolve * upb_fielddef.ref the caller should do that post-initialization by * calling upb_msg_ref() below. * - * fqname indicates the fully-qualified name of this message. Ownership of - * fqname passes to the msg, but the msg will contain references to it, so it - * must outlive the msg. + * fqname indicates the fully-qualified name of this message. * * sort indicates whether or not it is safe to reorder the fields from the order * they appear in d. This should be false if code has been compiled against a * header for this type that expects the given order. */ void upb_msgdef_init(struct upb_msgdef *m, struct google_protobuf_DescriptorProto *d, - struct upb_string fqname, bool sort, + struct upb_string *fqname, bool sort, struct upb_context *c, struct upb_status *status); void upb_msgdef_free(struct upb_msgdef *m); diff --git a/src/upb_msg.c b/src/upb_msg.c index 3786a63..5f96980 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -173,17 +173,16 @@ static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m); /* Returns a size of a value as it will be serialized. Does *not* include * the size of the tag -- that is already accounted for. */ static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p, - struct upb_fielddef *f, - google_protobuf_FieldDescriptorProto *fd) + struct upb_fielddef *f) { switch(f->type) { default: assert(false); return 0; /* Internal corruption. */ - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: { + case UPB_TYPENUM(MESSAGE): { size_t submsg_size = get_msgsize(sizes, *p.msg); return upb_get_INT32_size(submsg_size) + submsg_size; } - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP: { - size_t endgrp_tag_size = upb_get_tag_size(fd->number); + case UPB_TYPENUM(GROUP): { + size_t endgrp_tag_size = upb_get_tag_size(f->number); return endgrp_tag_size + get_msgsize(sizes, *p.msg); } #define CASE(type, member) \ @@ -216,19 +215,18 @@ static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m) /* We iterate over fields and arrays in reverse order. */ for(int32_t i = m->def->num_fields - 1; i >= 0; i--) { struct upb_fielddef *f = &m->def->fields[i]; - google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m->def); if(!upb_msg_isset(m, f)) continue; union upb_value_ptr p = upb_msg_getptr(m, f); if(upb_isarray(f)) { for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) { union upb_value_ptr elem = upb_array_getelementptr(*p.arr, j); /* TODO: for packed arrays tag size goes outside the loop. */ - size += upb_get_tag_size(fd->number); - size += get_valuesize(sizes, elem, f, fd); + size += upb_get_tag_size(f->number); + size += get_valuesize(sizes, elem, f); } } else { - size += upb_get_tag_size(fd->number); - size += get_valuesize(sizes, p, f, fd); + size += upb_get_tag_size(f->number); + size += get_valuesize(sizes, p, f); } } /* Resize the 'sizes' array if necessary. */ @@ -322,8 +320,8 @@ size_t upb_msg_serialize(struct upb_msg_serialize_state *s, struct upb_fielddef *f = &m->fields[i]; //union upb_value_ptr p = upb_msg_getptr(msg, f); buf = serialize_tag(buf, end, f, status); - if(f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { - } else if(f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) { + if(f->type == UPB_TYPENUM(MESSAGE)) { + } else if(f->type == UPB_TYPENUM(GROUP)) { } else if(upb_isstring(f)) { } else { //upb_serialize_value(buf, end, f->type, p, status); @@ -341,29 +339,29 @@ bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2, { #define CMP(type) return *p1.type == *p2.type; switch(type) { - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE: + case UPB_TYPENUM(DOUBLE): CMP(_double) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT: + case UPB_TYPENUM(FLOAT): CMP(_float) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64: + case UPB_TYPENUM(INT64): + case UPB_TYPENUM(SFIXED64): + case UPB_TYPENUM(SINT64): CMP(int64) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64: + case UPB_TYPENUM(UINT64): + case UPB_TYPENUM(FIXED64): CMP(uint64) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32: + case UPB_TYPENUM(INT32): + case UPB_TYPENUM(SFIXED32): + case UPB_TYPENUM(SINT32): CMP(int32) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM: + case UPB_TYPENUM(UINT32): + case UPB_TYPENUM(FIXED32): + case UPB_TYPENUM(ENUM): CMP(uint32); - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL: + case UPB_TYPENUM(BOOL): CMP(_bool); - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: + case UPB_TYPENUM(STRING): + case UPB_TYPENUM(BYTES): return upb_streql(*p1.str, *p2.str); default: return false; } diff --git a/src/upb_parse.c b/src/upb_parse.c index 2e910f2..3abaedf 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -467,9 +467,9 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, void *_buf, size_t len, buf = delim_end; // Could be >end. } } else { - if(!f || !upb_check_type(tag.wire_type, f->type)) { - buf = skip_wire_value(buf, end, tag.wire_type, status); - } else if (f->type == UPB_TYPENUM(GROUP)) { + //if(!f || !upb_check_type(tag.wire_type, f->type)) { + // buf = skip_wire_value(buf, end, tag.wire_type, status); + if (f->type == UPB_TYPENUM(GROUP)) { submsg_end = push(p, start, 0, f, status); msgdef = p->top->msgdef; } else { diff --git a/src/upb_text.c b/src/upb_text.c index ade8888..133552c 100644 --- a/src/upb_text.c +++ b/src/upb_text.c @@ -15,29 +15,29 @@ void upb_text_printval(upb_field_type_t type, union upb_value val, FILE *file) { #define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break; switch(type) { - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE: + case UPB_TYPENUM(DOUBLE): CASE("%0.f", _double); - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT: + case UPB_TYPENUM(FLOAT): CASE("%0.f", _float) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64: + case UPB_TYPENUM(INT64): + case UPB_TYPENUM(SFIXED64): + case UPB_TYPENUM(SINT64): CASE("%" PRId64, int64) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64: + case UPB_TYPENUM(UINT64): + case UPB_TYPENUM(FIXED64): CASE("%" PRIu64, uint64) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32: + case UPB_TYPENUM(INT32): + case UPB_TYPENUM(SFIXED32): + case UPB_TYPENUM(SINT32): CASE("%" PRId32, int32) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM: + case UPB_TYPENUM(UINT32): + case UPB_TYPENUM(FIXED32): + case UPB_TYPENUM(ENUM): CASE("%" PRIu32, uint32); - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL: + case UPB_TYPENUM(BOOL): CASE("%hhu", _bool); - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: + case UPB_TYPENUM(STRING): + case UPB_TYPENUM(BYTES): /* TODO: escaping. */ fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break; } @@ -84,7 +84,6 @@ void upb_text_pop(struct upb_text_printer *p, static void printval(struct upb_text_printer *printer, union upb_value_ptr p, struct upb_fielddef *f, - google_protobuf_FieldDescriptorProto *fd, FILE *stream); static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg, @@ -93,32 +92,31 @@ static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg, struct upb_msgdef *m = msg->def; for(uint32_t i = 0; i < m->num_fields; i++) { struct upb_fielddef *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m); if(!upb_msg_isset(msg, f)) continue; union upb_value_ptr p = upb_msg_getptr(msg, f); if(upb_isarray(f)) { struct upb_array *arr = *p.arr; for(uint32_t j = 0; j < arr->len; j++) { union upb_value_ptr elem_p = upb_array_getelementptr(arr, j); - printval(printer, elem_p, f, fd, stream); + printval(printer, elem_p, f, stream); } } else { - printval(printer, p, f, fd, stream); + printval(printer, p, f, stream); } } } static void printval(struct upb_text_printer *printer, union upb_value_ptr p, struct upb_fielddef *f, - google_protobuf_FieldDescriptorProto *fd, FILE *stream) { if(upb_issubmsg(f)) { - upb_text_push(printer, fd->name, stream); + upb_text_push(printer, f->name, stream); printmsg(printer, *p.msg, stream); upb_text_pop(printer, stream); } else { - upb_text_printfield(printer, fd->name, f->type, upb_value_read(p, f->type), stream); + upb_text_printfield(printer, f->name, f->type, + upb_value_read(p, f->type), stream); } } -- cgit v1.2.3