From 18291eedc3cb6bf4386698620ad9d02ad367126a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 5 Dec 2009 10:32:53 -0800 Subject: Make defs refcounted, rename upb_context->upbsymtab. There is currently a memory leak when type definitions form cycles. This will need to be dealt with. --- src/upb_def.h | 294 ++++++++++++++++++++++++++++------------------------------ 1 file changed, 141 insertions(+), 153 deletions(-) (limited to 'src/upb_def.h') diff --git a/src/upb_def.h b/src/upb_def.h index e58f01f..7c8cf80 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -7,16 +7,17 @@ * - upb_msgdef: describes a "message" construct. * - upb_fielddef: describes a message field. * - upb_enumdef: describes an enum. - * (TODO: descriptions of extensions and services). + * (TODO: definitions of extensions and services). * - * Defs should be obtained from a upb_context object; the APIs for creating - * them directly are internal-only. + * Defs are obtained from a upb_symtab object. A upb_symtab is empty when + * constructed, and definitions can be added by supplying serialized + * descriptors. * - * Defs are immutable and reference-counted. Contexts reference any defs - * that are the currently in their symbol table. If an extension is loaded - * that adds a field to an existing message, a new msgdef is constructed that - * includes the new field and the old msgdef is unref'd. The old msgdef will - * still be ref'd by message (if any) that were constructed with that msgdef. + * Defs are immutable and reference-counted. Symbol tables reference any defs + * that are the "current" definitions. If an extension is loaded that adds a + * field to an existing message, a new msgdef is constructed that includes the + * new field and the old msgdef is unref'd. The old msgdef will still be ref'd + * by messages (if any) that were constructed with that msgdef. * * This file contains routines for creating and manipulating the definitions * themselves. To create and manipulate actual messages, see upb_msg.h. @@ -32,16 +33,16 @@ extern "C" { #endif -/* "Base class" for defs; defines common members and functions. **************/ +/* upb_def: base class for defs **********************************************/ // All the different kind of defs we support. These correspond 1:1 with // declarations in a .proto file. enum upb_def_type { - UPB_DEF_MESSAGE, + UPB_DEF_MSG, UPB_DEF_ENUM, - UPB_DEF_SERVICE, - UPB_DEF_EXTENSION, - // Represented by a string, symbol hasn't been resolved yet. + UPB_DEF_SVC, + UPB_DEF_EXT, + // Internal-only, placeholder for a def that hasn't be resolved yet. UPB_DEF_UNRESOLVED }; @@ -52,17 +53,40 @@ struct upb_def { upb_atomic_refcount_t refcount; }; -void upb_def_init(struct upb_def *def, enum upb_def_type type, - struct upb_string *fqname); -void upb_def_uninit(struct upb_def *def); -INLINE void upb_def_ref(struct upb_def *def) { upb_atomic_ref(&def->refcount); } +void _upb_def_free(struct upb_def *def); // Must not be called directly! -/* Field definition. **********************************************************/ +// Call to ref/deref a def. +INLINE void upb_def_ref(struct upb_def *def) { + upb_atomic_ref(&def->refcount); +} +INLINE void upb_def_unref(struct upb_def *def) { + if(upb_atomic_unref(&def->refcount)) _upb_def_free(def); +} + +// Downcasts. They are checked only if asserts are enabled. +#define UPB_DOWNCAST_DEF(lower, upper) \ + struct upb_ ## lower; /* Forward-declare. */ \ + INLINE struct upb_ ## lower *upb_downcast_ ## lower(struct upb_def *def) { \ + if(def->type != UPB_DEF_ ## upper) return NULL; \ + return (struct upb_ ## lower*)def; \ + } +UPB_DOWNCAST_DEF(msgdef, MSG); +UPB_DOWNCAST_DEF(enumdef, ENUM); +UPB_DOWNCAST_DEF(svcdef, SVC); +UPB_DOWNCAST_DEF(extdef, EXT); +UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); +#undef UPB_DOWNCAST_DEF + +#define UPB_UPCAST(ptr) (&(ptr)->base) + +/* upb_fielddef ***************************************************************/ // A upb_fielddef describes a single field in a message. It isn't a full def // in the sense that it derives from upb_def. It cannot stand on its own; it // is either a field of a upb_msgdef or contained inside a upb_extensiondef. +// It is also reference-counted. struct upb_fielddef { + upb_atomic_refcount_t refcount; upb_field_type_t type; upb_label_t label; upb_field_number_t number; @@ -85,11 +109,11 @@ INLINE bool upb_isstring(struct upb_fielddef *f) { return upb_isstringtype(f->type); } INLINE bool upb_isarray(struct upb_fielddef *f) { - return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED; + return f->label == UPB_LABEL(REPEATED); } // Does the type of this field imply that it should contain an associated def? -INLINE bool upb_fielddef_hasdef(struct upb_fielddef *f) { - return upb_issubmsg(f) || f->type == UPB_TYPENUM(ENUM); +INLINE bool upb_hasdef(struct upb_fielddef *f) { + return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } INLINE bool upb_field_ismm(struct upb_fielddef *f) { @@ -115,31 +139,21 @@ INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_fielddef *f) { else return -1; } -struct google_protobuf_FieldDescriptorProto; - -// Interfaces for constructing/destroying fielddefs. These are internal-only. - -// Initializes a upb_fielddef from a FieldDescriptorProto. The caller must -// have previously allocated the upb_fielddef. -void upb_fielddef_init(struct upb_fielddef *f, - struct google_protobuf_FieldDescriptorProto *fd); -struct upb_fielddef *upb_fielddef_dup(struct upb_fielddef *f); -void upb_fielddef_uninit(struct upb_fielddef *f); - -// Sort the given fielddefs in-place, according to what we think is an optimal +// Internal-only interface for the upb compiler. +// Sorts the given fielddefs in-place, according to what we think is an optimal // ordering of fields. This can change from upb release to upb release. -void upb_fielddef_sort(struct upb_fielddef *defs, size_t num); +struct google_protobuf_FieldDescriptorProto; void upb_fielddef_sortfds(struct google_protobuf_FieldDescriptorProto **fds, size_t num); -/* Message definition. ********************************************************/ +/* upb_msgdef *****************************************************************/ struct google_protobuf_EnumDescriptorProto; struct google_protobuf_DescriptorProto; // Structure that describes a single .proto message type. struct upb_msgdef { - struct upb_def def; + struct upb_def base; struct upb_msg *default_msg; // Message with all default values set. size_t size; uint32_t num_fields; @@ -148,150 +162,124 @@ struct upb_msgdef { struct upb_fielddef *fields; // We have exclusive ownership of these. // Tables for looking up fields by number and name. - struct upb_inttable fields_by_num; - struct upb_strtable fields_by_name; + struct upb_inttable itof; // int to field + struct upb_strtable ntof; // name to field }; -// The num->field and name->field maps in upb_msgdef allow fast lookup of fields -// by number or name. These lookups are in the critical path of parsing and -// field lookup, so they must be as fast as possible. -struct upb_fieldsbynum_entry { +// Hash table entries for looking up fields by name or number. +struct upb_itof_ent { struct upb_inttable_entry e; - struct upb_fielddef f; + struct upb_fielddef *f; }; -struct upb_fieldsbyname_entry { +struct upb_ntof_ent { struct upb_strtable_entry e; - struct upb_fielddef f; + struct upb_fielddef *f; }; // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. -INLINE struct upb_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m, - uint32_t number) { - struct upb_fieldsbynum_entry *e = (struct upb_fieldsbynum_entry*) - upb_inttable_fast_lookup( - &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); - return e ? &e->f : NULL; +INLINE struct upb_fielddef *upb_msg_itof(struct upb_msgdef *m, uint32_t num) { + struct upb_itof_ent *e; + e = (struct upb_itof_ent*)upb_inttable_fast_lookup( + &m->itof, num, sizeof(struct upb_itof_ent)); + return e ? e->f : NULL; } -INLINE struct upb_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m, - struct upb_string *name) { - struct upb_fieldsbyname_entry *e = (struct upb_fieldsbyname_entry*) - upb_strtable_lookup( - &m->fields_by_name, name); - return e ? &e->f : NULL; +INLINE struct upb_fielddef *upb_msg_ntof(struct upb_msgdef *m, + struct upb_string *name) { + struct upb_ntof_ent *e; + e = (struct upb_ntof_ent*) upb_strtable_lookup(&m->ntof, name); + return e ? e->f : NULL; } -// Internal-only functions for constructing a msgdef. Caller retains ownership -// of d and fqname. Ownership of fields passes to the msgdef. -// -// Note that init does not resolve upb_fielddef.ref; the caller should do that -// post-initialization by calling upb_msgdef_resolve() below. -struct upb_msgdef *upb_msgdef_new(struct upb_fielddef *fields, int num_fields, - struct upb_string *fqname); -void _upb_msgdef_free(struct upb_msgdef *m); -INLINE void upb_msgdef_ref(struct upb_msgdef *m) { - upb_def_ref(&m->def); -} -INLINE void upb_msgdef_unref(struct upb_msgdef *m) { - if(upb_atomic_unref(&m->def.refcount)) _upb_msgdef_free(m); -} - -// Clients use this function on a previously initialized upb_msgdef to resolve -// the "ref" field in the upb_fielddef. Since messages can refer to each -// other in mutually-recursive ways, this step must be separated from -// initialization. -void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f, - struct upb_def *def); - -// Downcasts. They are checked only if asserts are enabled. -INLINE struct upb_msgdef *upb_downcast_msgdef(struct upb_def *def) { - assert(def->type == UPB_DEF_MESSAGE); - return (struct upb_msgdef*)def; -} - -/* Enum defintion. ************************************************************/ +/* upb_enumdef ****************************************************************/ struct upb_enumdef { - struct upb_def def; - struct upb_strtable nametoint; - struct upb_inttable inttoname; + struct upb_def base; + struct upb_strtable ntoi; + struct upb_inttable iton; }; -struct upb_enumdef_ntoi_entry { - struct upb_strtable_entry e; - uint32_t value; -}; - -struct upb_enumdef_iton_entry { - struct upb_inttable_entry e; - struct upb_string *string; +typedef int32_t upb_enumval_t; + +// Lookups from name to integer and vice-versa. +bool upb_enumdef_ntoi(struct upb_enumdef *e, struct upb_string *name, + upb_enumval_t *num); +struct upb_string *upb_enumdef_iton(struct upb_enumdef *e, upb_enumval_t num); + +// Iteration over name/value pairs. The order is undefined. +// struct upb_enumd_iter i; +// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// // ... +// } +struct upb_enum_iter { + struct upb_enumdef *e; + void *state; // Internal iteration state. + struct upb_string *name; + upb_enumval_t val; }; +void upb_enum_begin(struct upb_enum_iter *iter, struct upb_enumdef *e); +void upb_enum_next(struct upb_enum_iter *iter); +bool upb_enum_done(struct upb_enum_iter *iter); -// Internal-only functions for creating/destroying an enumdef. Caller retains -// ownership of ed. The enumdef is initialized with one ref. -struct upb_enumdef *upb_enumdef_new( - struct google_protobuf_EnumDescriptorProto *ed, struct upb_string *fqname); -void _upb_enumdef_free(struct upb_enumdef *e); -INLINE void upb_enumdef_ref(struct upb_enumdef *e) { upb_def_ref(&e->def); } -INLINE void upb_enumdef_unref(struct upb_enumdef *e) { - if(upb_atomic_unref(&e->def.refcount)) _upb_enumdef_free(e); -} -INLINE struct upb_enumdef *upb_downcast_enumdef(struct upb_def *def) { - assert(def->type == UPB_DEF_ENUM); - return (struct upb_enumdef*)def; -} +/* upb_symtab *****************************************************************/ -/* Unresolved definition. *****************************************************/ +// A SymbolTable is where upb_defs live. It is empty when first constructed. +// Clients add definitions to the symtab by supplying unserialized or +// serialized descriptors (as defined in descriptor.proto). +struct upb_symtab { + upb_atomic_refcount_t refcount; + upb_rwlock_t lock; // Protects all members except the refcount. + struct upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. -// This is a placeholder definition that contains only the name of the type -// that should eventually be referenced. Once symbols are resolved, this -// definition is replaced with a real definition. -struct upb_unresolveddef { - struct upb_def def; - struct upb_string *name; // Not fully-qualified. + // Our symbol tables; we own refs to the defs therein. + struct upb_strtable symtab; // The main symbol table. + struct upb_strtable psymtab; // Private symbols, for internal use. }; -INLINE struct upb_unresolveddef *upb_unresolveddef_new(struct upb_string *name) { - struct upb_unresolveddef *d = (struct upb_unresolveddef*)malloc(sizeof(*d)); - upb_def_init(&d->def, UPB_DEF_UNRESOLVED, name); - d->name = name; - upb_string_ref(name); - return d; -} -INLINE void _upb_unresolveddef_free(struct upb_unresolveddef *def) { - upb_def_uninit(&def->def); - upb_string_unref(def->name); +// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd +// when the caller is done with them. +struct upb_symtab *upb_symtab_new(void); +void _upb_symtab_free(struct upb_symtab *s); // Must not be called directly! + +INLINE void upb_symtab_ref(struct upb_symtab *s) { + upb_atomic_ref(&s->refcount); } -INLINE struct upb_unresolveddef *upb_downcast_unresolveddef(struct upb_def *def) { - assert(def->type == UPB_DEF_UNRESOLVED); - return (struct upb_unresolveddef*)def; +INLINE void upb_symtab_unref(struct upb_symtab *s) { + if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); } -INLINE void upb_def_unref(struct upb_def *def) { - if(upb_atomic_unref(&def->refcount)) { - switch(def->type) { - case UPB_DEF_MESSAGE: - _upb_msgdef_free((struct upb_msgdef*)def); - break; - case UPB_DEF_ENUM: - _upb_enumdef_free((struct upb_enumdef*)def); - break; - case UPB_DEF_SERVICE: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_EXTENSION: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_UNRESOLVED: - _upb_unresolveddef_free((struct upb_unresolveddef*)def); - break; - default: - assert(false); - } - } -} +// Resolves the given symbol using the rules described in descriptor.proto, +// namely: +// +// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like +// scoping rules are used to find the type (i.e. first the nested types +// within this message are searched, then within the parent, on up to the +// root namespace). +// +// Returns NULL if no such symbol has been defined. +struct upb_def *upb_symtab_resolve(struct upb_symtab *s, + struct upb_string *base, + struct upb_string *symbol); + +// Find an entry in the symbol table with this exact name. Returns NULL if no +// such symbol name has been defined. +struct upb_def *upb_symtab_lookup(struct upb_symtab *s, + struct upb_string *sym); + +// Gets an array of pointers to all currently active defs in this symtab. The +// caller owns the returned array (which is of length *count) as well as a ref +// to each symbol inside. +struct upb_def **upb_symtab_getandref_defs(struct upb_symtab *s, int *count); + +// Adds the definitions in the given serialized descriptor to this symtab. All +// types that are referenced from desc must have previously been defined (or be +// defined in desc). desc may not attempt to define any names that are already +// defined in this symtab. Caller retains ownership of desc. status indicates +// whether the operation was successful or not, and the error message (if any). +void upb_symtab_add_desc(struct upb_symtab *s, struct upb_string *desc, + struct upb_status *status); #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3