From 040f7e6ba2e2282b80f332a031b77d7d34b4fc85 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 24 Aug 2009 21:44:22 -0700 Subject: Significant memory-management refactoring any Python extension. --- src/upb.h | 93 +++++++++++++++++++++- src/upb_array.h | 89 ++++----------------- src/upb_context.c | 16 ++-- src/upb_enum.h | 9 --- src/upb_inlinedefs.c | 1 + src/upb_mm.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++++ src/upb_mm.h | 168 ++++++++++++++++++++++++++++++++++++++++ src/upb_msg.c | 213 +++++++++++++-------------------------------------- src/upb_msg.h | 152 +++++++++++------------------------- src/upb_parse.c | 2 +- src/upb_parse.h | 10 --- src/upb_string.c | 23 +++--- src/upb_string.h | 59 ++++++-------- src/upb_struct.h | 119 ++++++++++++++++++++++++++++ src/upb_text.c | 54 ++++++++++++- 15 files changed, 797 insertions(+), 419 deletions(-) create mode 100644 src/upb_mm.c create mode 100644 src/upb_mm.h create mode 100644 src/upb_struct.h (limited to 'src') diff --git a/src/upb.h b/src/upb.h index 27bf5fc..af026f5 100644 --- a/src/upb.h +++ b/src/upb.h @@ -12,7 +12,7 @@ #include #include #include /* for size_t. */ -#include "upb_string.h" +#include "descriptor_const.h" #ifdef __cplusplus extern "C" { @@ -23,6 +23,9 @@ extern "C" { #define INLINE static inline #endif +#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) +#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) + /* The maximum that any submessages can be nested. Matches proto2's limit. */ #define UPB_MAX_NESTING 64 @@ -55,12 +58,22 @@ typedef uint8_t upb_wire_type_t; * errors, and we use it to represent exceptional circumstances. */ typedef uint8_t upb_field_type_t; +INLINE bool upb_issubmsgtype(upb_field_type_t type) { + return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP || + type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE; +} + +INLINE bool upb_isstringtype(upb_field_type_t type) { + return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || + type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES; +} + /* Information about a given value type (upb_field_type_t). */ struct upb_type_info { uint8_t align; uint8_t size; upb_wire_type_t expected_wire_type; - struct upb_string ctype; + char *ctype; }; /* Contains information for all .proto types. Indexed by upb_field_type_t. */ @@ -90,6 +103,10 @@ struct upb_tag { /* Polymorphic values of .proto types *****************************************/ +struct upb_string; +struct upb_array; +struct upb_msg; + /* A single .proto value. The owner must have an out-of-band way of knowing * the type, so that it knows which union member to use. */ union upb_value { @@ -121,15 +138,83 @@ union upb_value_ptr { void *_void; }; +/* Unfortunately there is no way to define this so that it can be used as a + * generic expression, a la: + * foo(UPB_VALUE_ADDROF(bar)); + * ...you have to use it as the initializer of a upb_value_ptr: + * union upb_value_ptr p = UPB_VALUE_ADDROF(bar); + * foo(p); + */ +#define UPB_VALUE_ADDROF(val) {(void*)&val._double} + /* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need * to know the field type to perform this operation, because we need to know * how much memory to copy. */ -INLINE union upb_value upb_deref(union upb_value_ptr ptr, upb_field_type_t t) { +INLINE union upb_value upb_value_read(union upb_value_ptr ptr, + upb_field_type_t ft) { union upb_value val; - memcpy(&val, ptr._void, upb_type_info[t].size); +#define CASE(t, member_name) \ + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ + val.member_name = *ptr.member_name; \ + break; + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } +#undef CASE return val; } +/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need + * to know the field type to perform this operation, because we need to know + * how much memory to copy. */ +INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val, + upb_field_type_t ft) { +#define CASE(t, member_name) \ + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ + *ptr.member_name = val.member_name; \ + break; + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } +#undef CASE +} + union upb_symbol_ref { struct upb_msgdef *msg; struct upb_enum *_enum; diff --git a/src/upb_array.h b/src/upb_array.h index 370f6eb..732c4aa 100644 --- a/src/upb_array.h +++ b/src/upb_array.h @@ -23,7 +23,7 @@ #define UPB_ARRAY_H_ #include -#include "upb.h" +#include "upb_msg.h" /* Because we use upb_msg_fielddef */ #ifdef __cplusplus extern "C" { @@ -31,41 +31,6 @@ extern "C" { struct upb_string; -/* upb_arrays can be at most 2**32 elements long. */ -typedef uint32_t upb_arraylen_t; - -/* Represents an array (a repeated field) of any type. The interpretation of - * the data in the array depends on the type. */ -struct upb_array { - union upb_value_ptr elements; - upb_arraylen_t len; /* Number of elements in "elements". */ - upb_arraylen_t size; /* Memory we own (0 if by reference). */ - void *gptr; -}; - -INLINE void upb_array_init(struct upb_array *arr) -{ - arr->elements._void = NULL; - arr->len = 0; - arr->size = 0; -} - -INLINE void upb_array_uninit(struct upb_array *arr) -{ - if(arr->size) free(arr->elements._void); -} - -INLINE struct upb_array *upb_array_new(void) { - struct upb_array *arr = malloc(sizeof(*arr)); - upb_array_init(arr); - return arr; -} - -INLINE void upb_array_free(struct upb_array *arr) { - upb_array_uninit(arr); - free(arr); -} - /* Returns a pointer to an array element. Does not perform a bounds check! */ INLINE union upb_value_ptr upb_array_getelementptr( struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type) @@ -75,10 +40,17 @@ INLINE union upb_value_ptr upb_array_getelementptr( return ptr; } -INLINE union upb_value upb_array_getelement( - struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type) +/* Allocation/Deallocation/Resizing. ******************************************/ + +INLINE struct upb_array *upb_array_new(struct upb_msg_fielddef *f) { - return upb_deref(upb_array_getelementptr(arr, n, type), type); + struct upb_array *arr = malloc(sizeof(*arr)); + upb_mmhead_init(&arr->mmhead); + arr->elements._void = NULL; + arr->len = 0; + arr->size = 0; + arr->fielddef = f; + return arr; } INLINE uint32_t upb_round_up_to_pow2(uint32_t v) @@ -94,13 +66,10 @@ INLINE uint32_t upb_round_up_to_pow2(uint32_t v) return v; } -/* Resizes array to be "len" elements long and ensures we have write access - * to the array (reallocating if necessary). Returns true iff we were - * referencing memory for the array and dropped the reference. */ -INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen, - upb_field_type_t type) +/* Resizes array to be "len" elements long (reallocating if necessary). */ +INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen) { - size_t type_size = upb_type_info[type].size; + size_t type_size = upb_type_info[arr->fielddef->type].size; bool dropped = false; bool ref = arr->size == 0; /* Ref'ing external memory. */ void *data = arr->elements._void; @@ -114,39 +83,11 @@ INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen, memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size); dropped = true; } + /* TODO: fill with defaults. */ arr->len = newlen; return dropped; } -/* These are all overlays on upb_array, pointers between them can be cast. */ -#define UPB_DEFINE_ARRAY_TYPE(name, type) \ - struct name ## _array { \ - struct upb_fielddef *f; \ - void *gptr; \ - type *elements; \ - upb_arraylen_t len; \ - upb_arraylen_t size; \ - }; - -UPB_DEFINE_ARRAY_TYPE(upb_double, double) -UPB_DEFINE_ARRAY_TYPE(upb_float, float) -UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t) -UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t) -UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t) -UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t) -UPB_DEFINE_ARRAY_TYPE(upb_bool, bool) -UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*) -UPB_DEFINE_ARRAY_TYPE(upb_msg, void*) - -/* Defines an array of a specific message type (an overlay of upb_array). */ -#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array -#define UPB_DEFINE_MSG_ARRAY(msg_type) \ - UPB_MSG_ARRAY(msg_type) { \ - msg_type **elements; \ - upb_arraylen_t len; \ - upb_arraylen_t size; \ - }; - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/upb_context.c b/src/upb_context.c index 12ad8c7..0d64c3e 100644 --- a/src/upb_context.c +++ b/src/upb_context.c @@ -10,6 +10,7 @@ #include "upb_context.h" #include "upb_enum.h" #include "upb_msg.h" +#include "upb_mm.h" /* Search for a character in a string, in reverse. */ static int my_memrchr(char *data, char c, size_t len) @@ -66,7 +67,7 @@ static void free_context(struct upb_context *c) { free_symtab(&c->symtab); for(size_t i = 0; i < c->fds_len; i++) - upb_msg_free((struct upb_msg*)c->fds[i]); + upb_msg_unref((struct upb_msg*)c->fds[i]); free_symtab(&c->psymtab); free(c->fds); } @@ -77,9 +78,9 @@ void upb_context_unref(struct upb_context *c) upb_rwlock_wrlock(&c->lock); free_context(c); upb_rwlock_unlock(&c->lock); + free(c); + upb_rwlock_destroy(&c->lock); } - free(c); - upb_rwlock_destroy(&c->lock); } bool upb_context_lookup(struct upb_context *c, struct upb_string *symbol, @@ -325,10 +326,9 @@ bool upb_context_addfds(struct upb_context *c, } bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) { - google_protobuf_FileDescriptorSet *fds = - (google_protobuf_FileDescriptorSet*)upb_msg_parsenew(c->fds_msg, fds_str); - if(!fds) return false; - if(!upb_context_addfds(c, fds)) return false; + struct upb_msg *fds = upb_msg_new(c->fds_msg); + if(upb_msg_parsestr(fds, fds_str->ptr, fds_str->byte_len) != UPB_STATUS_OK) return false; + if(!upb_context_addfds(c, (google_protobuf_FileDescriptorSet*)fds)) return false; { /* We own fds now, need to keep a ref so we can free it later. */ @@ -337,7 +337,7 @@ bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) { c->fds_size *= 2; c->fds = realloc(c->fds, c->fds_size); } - c->fds[c->fds_len++] = fds; + c->fds[c->fds_len++] = (google_protobuf_FileDescriptorSet*)fds; upb_rwlock_unlock(&c->lock); } return true; diff --git a/src/upb_enum.h b/src/upb_enum.h index e43a203..9acc075 100644 --- a/src/upb_enum.h +++ b/src/upb_enum.h @@ -33,15 +33,6 @@ struct upb_enum_iton_entry { struct upb_string *string; }; -INLINE void upb_enum_ref(struct upb_enum *e) { - if(upb_atomic_ref(&e->refcount)) upb_context_ref(e->context); -} - -INLINE void upb_enum_unref(struct upb_enum *e) { - if(upb_atomic_unref(&e->refcount)) upb_context_unref(e->context); -} - - /* Initializes and frees an enum, respectively. Caller retains ownership of * ed, but it must outlive e. */ void upb_enum_init(struct upb_enum *e, diff --git a/src/upb_inlinedefs.c b/src/upb_inlinedefs.c index dae5c01..7a55e06 100644 --- a/src/upb_inlinedefs.c +++ b/src/upb_inlinedefs.c @@ -15,6 +15,7 @@ #include "upb_array.h" #include "upb_context.h" #include "upb_enum.h" +#include "upb_mm.h" #include "upb_msg.h" #include "upb_parse.h" #include "upb_serialize.h" diff --git a/src/upb_mm.c b/src/upb_mm.c new file mode 100644 index 0000000..853d572 --- /dev/null +++ b/src/upb_mm.c @@ -0,0 +1,208 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_mm.h" +#include "upb_string.h" +#include "upb_array.h" +#include "upb_msg.h" + +void upb_msg_destroy(struct upb_msg *msg) { + uint32_t i; + for(i = 0; i < msg->def->num_fields; i++) { + struct upb_msg_fielddef *f = &msg->def->fields[i]; + if(!upb_msg_isset(msg, f) || !upb_field_ismm(f)) continue; + upb_mm_ptrtype type = upb_field_ptrtype(f); + union upb_mmptr mmptr = upb_mmptr_read(upb_msg_getptr(msg, f), type); + upb_mm_unref(mmptr, type); + } + free(msg); +} + +void upb_array_destroy(struct upb_array *arr) +{ + if(upb_elem_ismm(arr->fielddef)) { + upb_arraylen_t i; + /* Unref elements. */ + for(i = 0; i < arr->len; i++) { + union upb_value_ptr p = upb_array_getelementptr(arr, i, arr->fielddef->type); + upb_mm_ptrtype type = upb_elem_ptrtype(arr->fielddef); + union upb_mmptr mmptr = upb_mmptr_read(p, type); + upb_mm_unref(mmptr, type); + } + } + if(arr->size != 0) free(arr->elements._void); + free(arr); +} + +static union upb_mmptr upb_mm_newptr(upb_mm_ptrtype type, + struct upb_msg_fielddef *f) +{ + union upb_mmptr p = {NULL}; + switch(type) { + case UPB_MM_MSG_REF: p.msg = upb_msg_new(f->ref.msg); + case UPB_MM_STR_REF: p.str = upb_string_new(); + case UPB_MM_ARR_REF: p.arr = upb_array_new(f); + default: assert(false); break; + } + return p; +} + +static struct upb_mm_ref *find_or_create_ref(struct upb_mm_ref *fromref, + struct upb_mm *mm, + union upb_mmptr p, upb_mm_ptrtype type, + bool *created) +{ + struct upb_mmhead *head = upb_mmhead_addr(p, type); + struct upb_mm_ref **ref = &head->refs; + while(*ref && (*ref)->mm <= mm) { + if((*ref)->mm == mm) { + return *ref; + *created = false; + } + ref = &((*ref)->next); + } + *created = true; + struct upb_mm_ref *newref = mm->newref_cb(fromref, p, type); + newref->p = p; + newref->type = type; + newref->mm = mm; + newref->next = *ref; + *ref = newref; + return newref; +} + +struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type, + struct upb_mm *mm, bool *created) +{ + return find_or_create_ref(NULL, mm, p, type, created); +} + +struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm) +{ + struct upb_msg *msg = upb_msg_new(def); + union upb_mmptr mmptr = {.msg = msg}; + bool created; + struct upb_mm_ref *ref = find_or_create_ref(NULL, mm, mmptr, UPB_MM_MSG_REF, &created); + upb_mm_unref(mmptr, UPB_MM_MSG_REF); /* Shouldn't have any counted refs. */ + assert(created); + return ref; +} + +struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref, + struct upb_msg_fielddef *f, + bool *refcreated) +{ + assert(upb_field_ismm(f)); + upb_mm_ptrtype ptrtype = upb_field_ptrtype(f); + struct upb_msg *msg = msgref->p.msg; + union upb_mmptr val; + union upb_value_ptr p = upb_msg_getptr(msg, f); + + /* Create the upb value if it doesn't already exist. */ + if(!upb_msg_isset(msg, f)) { + upb_msg_set(msg, f); + val = upb_mm_newptr(ptrtype, f); + upb_mmptr_write(p, val, ptrtype); + } else { + val = upb_mmptr_read(p, ptrtype); + } + + return find_or_create_ref(msgref, msgref->mm, val, ptrtype, refcreated); +} + +struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i, + bool *refcreated) +{ + struct upb_array *arr = arrref->p.arr; + struct upb_msg_fielddef *f = arr->fielddef; + assert(upb_elem_ismm(f)); + assert(i < arr->len); + union upb_value_ptr p = upb_array_getelementptr(arr, i, f->type); + upb_mm_ptrtype type = upb_elem_ptrtype(f); + union upb_mmptr val = upb_mmptr_read(p, type); + return find_or_create_ref(arrref, arrref->mm, val, type, refcreated); +} + +void upb_mm_release(struct upb_mm_ref *ref) +{ + struct upb_mm_ref **ref_head = (void*)ref->p.msg; + struct upb_mm_ref **ref_elem = ref_head; + struct upb_mm *mm = ref->mm; + while(true) { + assert(*ref_elem); /* Client asserts r->mm is in the list. */ + if((*ref_elem)->mm == mm) { + *ref_elem = (*ref_elem)->next; /* Remove from the list. */ + break; + } + } + + if(upb_mmhead_norefs(&ref->p.msg->mmhead)) { + /* Destroy the dynamic object. */ + switch(ref->type) { + case UPB_MM_MSG_REF: + upb_msg_destroy(ref->p.msg); + break; + case UPB_MM_ARR_REF: + upb_array_destroy(ref->p.arr); + break; + case UPB_MM_STR_REF: + upb_string_destroy(ref->p.str); + break; + default: assert(false); break; + } + } +} + +void upb_mm_msg_set(struct upb_mm_ref *from_msg_ref, struct upb_mm_ref *to_ref, + struct upb_msg_fielddef *f) +{ + assert(upb_field_ismm(f)); + union upb_mmptr fromval = from_msg_ref->p; + union upb_mmptr toval = to_ref->p; + union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f); + upb_mm_ptrtype type = upb_field_ptrtype(f); + if(upb_msg_isset(fromval.msg, f)) { + union upb_mmptr existingval = upb_mmptr_read(field_p, type); + if(existingval.msg == toval.msg) + return; /* Setting to its existing value, do nothing. */ + upb_mm_unref(existingval, type); + } + upb_msg_set(fromval.msg, f); + upb_mmptr_write(field_p, toval, type); + upb_mm_ref(toval, type); +} + +void upb_mm_msgclear(struct upb_mm_ref *from_msg_ref, struct upb_msg_fielddef *f) +{ + assert(upb_field_ismm(f)); + union upb_mmptr fromval = from_msg_ref->p; + upb_mm_ptrtype type = upb_field_ptrtype(f); + if(upb_msg_isset(fromval.msg, f)) { + union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f); + union upb_mmptr existingval = upb_mmptr_read(field_p, type); + upb_msg_unset(fromval.msg, f); + upb_mm_unref(existingval, type); + } +} + +void upb_mm_msgclear_all(struct upb_mm_ref *from) +{ + struct upb_msgdef *def = from->p.msg->def; + for(uint32_t i = 0; i < def->num_fields; i++) { + struct upb_msg_fielddef *f = &def->fields[i]; + if(!upb_field_ismm(f)) continue; + upb_mm_msgclear(from, f); + } +} + +void upb_mm_arr_set(struct upb_mm_ref *from, struct upb_mm_ref *to, + upb_arraylen_t i, upb_field_type_t type) +{ + (void)from; + (void)to; + (void)i; + (void)type; +} diff --git a/src/upb_mm.h b/src/upb_mm.h new file mode 100644 index 0000000..88cb043 --- /dev/null +++ b/src/upb_mm.h @@ -0,0 +1,168 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * A parsed protobuf is represented in memory as a tree. The three kinds of + * nodes in this tree are messages, arrays, and strings. This file defines + * a memory-management scheme for making sure that these nodes are colected + * at the right times. + * + * The basic strategy is reference-counting, but with a twist. Since any + * dynamic language that wishes to reference these nodes will need its own, + * language-specific structure, we provide two different kinds of references: + * + * - counted references. these are references that are tracked with only a + * reference count. They are used for two separate purposes: + * 1. for references within the tree, from one node to another. + * 2. for external references into the tree, where the referer does not need + * a separate message structure. + * - listed references. these are references that have their own separate + * data record. these separate records are kept in a linked list. + */ + +#ifndef UPB_MM_H_ +#define UPB_MM_H_ + +#include "upb.h" +#include "upb_string.h" +#include "upb_array.h" +#include "upb_msg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Structure definitions. *****************************************************/ + +typedef int16_t upb_mm_id; + +struct upb_msg; +struct upb_array; +struct upb_string; +struct upb_msg_fielddef; + +struct upb_mm_ref; +/* Info about a mm. */ +struct upb_mm { + /* fromref is set iff this call is from getfieldref or getelemref. */ + struct upb_mm_ref *(*newref_cb)(struct upb_mm_ref *fromref, + union upb_mmptr p, upb_mm_ptrtype type); +}; + +struct upb_mm_ref { + union upb_mmptr p; + /* This is slightly wasteful, because the mm-specific ref will probably also + * contain the information about what kind of ref this is, in a different + * form. */ + upb_mm_ptrtype type; + struct upb_mm *mm; /* TODO: There are ways to shrink this. */ + struct upb_mm_ref *next; /* Linked list for refs to the same value. */ +}; + +/* Functions for working with listed references. *****************************/ + +/* Create a new top-level message and create a single ref for it. */ +struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm); + +/* Given a pointer to an existing msg, array, or string, find a ref for this + * mm, creating one if necessary. 'created' indicates whether the returned + * reference was just created. */ +struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type, + struct upb_mm *mm, bool *created); + +/* f must be ismm == true. The msg field may or may not be set (will be + * created if it doesn't exist). If a ref already exists for the given field, + * returns it, otherwise calls the given callback to create one. 'created' + * indicates whether a new reference was created. */ +struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref, + struct upb_msg_fielddef *f, + bool *refcreated); +/* Array len must be < i. */ +struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i, + bool *refcreated); + +/* Remove this ref from the list for this msg. + * If that was the last reference, deletes the msg itself. */ +void upb_mm_release(struct upb_mm_ref *ref); + +void upb_mm_msgset(struct upb_mm_ref *msg, struct upb_mm_ref *to, + struct upb_msg_fielddef *f); +void upb_mm_msgclear(struct upb_mm_ref *from, struct upb_msg_fielddef *f); +void upb_mm_msgclear_all(struct upb_mm_ref *from); + +void upb_mm_arrset(struct upb_mm_ref *from, struct upb_mm_ref *to, uint32_t i); + +/* Defined iff upb_field_ismm(f). */ +INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f); +/* Defined iff upb_elem_ismm(f). */ +INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f); + +INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type); + +/* These methods are all a bit silly, since all branches of the case compile + * to the same thing (which the compiler will recognize), but we do it this way + * for full union correctness. */ +INLINE union upb_mmptr upb_mmptr_read(union upb_value_ptr p, upb_mm_ptrtype t) +{ + union upb_mmptr val; + switch(t) { + case UPB_MM_MSG_REF: val.msg = *p.msg; break; + case UPB_MM_STR_REF: val.str = *p.str; break; + case UPB_MM_ARR_REF: val.arr = *p.arr; break; + default: assert(false); val.msg = *p.msg; break; /* Shouldn't happen. */ + } + return val; +} + +INLINE void upb_mmptr_write(union upb_value_ptr p, union upb_mmptr val, + upb_mm_ptrtype t) +{ + switch(t) { + case UPB_MM_MSG_REF: *p.msg = val.msg; break; + case UPB_MM_STR_REF: *p.str = val.str; break; + case UPB_MM_ARR_REF: *p.arr = val.arr; break; + default: assert(false); val.msg = *p.msg; break; /* Shouldn't happen. */ + } +} + +void upb_array_destroy(struct upb_array *arr); +void upb_msg_destroy(struct upb_msg *msg); + +INLINE void upb_msg_unref(struct upb_msg *msg) { + if(upb_mmhead_unref(&msg->mmhead)) upb_msg_destroy(msg); +} + +INLINE void upb_array_unref(struct upb_array *arr) { + if(upb_mmhead_unref(&arr->mmhead)) upb_array_destroy(arr); +} + +INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type) +{ + switch(type) { + case UPB_MM_MSG_REF: upb_msg_unref(p.msg); break; + case UPB_MM_STR_REF: upb_string_unref(p.str); break; + case UPB_MM_ARR_REF: upb_array_unref(p.arr); + } +} + +static struct upb_mmhead *upb_mmhead_addr(union upb_mmptr p, upb_mm_ptrtype t) +{ + switch(t) { + case UPB_MM_MSG_REF: return &((*p.msg).mmhead); + case UPB_MM_STR_REF: return &((*p.str).mmhead); + case UPB_MM_ARR_REF: return &((*p.arr).mmhead); + default: assert(false); return &((*p.msg).mmhead); /* Shouldn't happen. */ + } +} + +INLINE void upb_mm_ref(union upb_mmptr p, upb_mm_ptrtype type) +{ + upb_mmhead_ref(upb_mmhead_addr(p, type)); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_MM_MSG_H_ */ diff --git a/src/upb_msg.c b/src/upb_msg.c index 45f889d..80602dd 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -6,8 +6,9 @@ #include #include -#include "descriptor.h" #include "upb_msg.h" +#include "descriptor.h" +#include "upb_mm.h" #include "upb_parse.h" #include "upb_serialize.h" #include "upb_text.h" @@ -47,7 +48,6 @@ bool upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, /* TODO: more complete validation. */ if(!d->set_flags.has.field) return false; - upb_atomic_refcount_init(&m->refcount, 0); upb_inttable_init(&m->fields_by_num, d->field->len, sizeof(struct upb_fieldsbynum_entry)); upb_strtable_init(&m->fields_by_name, d->field->len, @@ -123,113 +123,43 @@ void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f, str_e->f.ref = ref; } -/* Simple, one-shot parsing ***************************************************/ - -static void *upb_msg_new(struct upb_msgdef *md) -{ - size_t size = md->size + (sizeof(void*) * 2); - struct upb_msg *msg = malloc(size); - memset(msg, 0, size); - msg->def = md; - return msg; -} +/* Parsing. ******************************************************************/ -/* Allocation callbacks. */ -struct upb_array *getarray_cb( - void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f) -{ - (void)from_gptr; - (void)existingval; /* Don't care -- always zero. */ - (void)f; - return upb_array_new(); -} +struct upb_msg_parser_frame { + struct upb_msg *msg; +}; -static struct upb_string *getstring_cb( - void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f, - bool byref) -{ - (void)from_gptr; - (void)existingval; /* Don't care -- always zero. */ - (void)f; - (void)byref; - return upb_strnew(); -} +struct upb_msg_parser { + struct upb_stream_parser s; + bool merge; + bool byref; + struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top; +}; -static struct upb_msg *getmsg_cb( - void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f) -{ - (void)from_gptr; - (void)existingval; /* Don't care -- always zero. */ - return upb_msg_new(f->ref.msg); -} +void upb_msg_parser_reset(struct upb_msg_parser *p, + struct upb_msg *msg, bool byref); -struct upb_msg *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s) -{ - struct upb_msg_parser mp; - struct upb_msg *msg = upb_msg_new(md); - upb_msg_parser_reset(&mp, msg, false); - mp.getarray_cb = getarray_cb; - mp.getstring_cb = getstring_cb; - mp.getmsg_cb = getmsg_cb; - size_t read; - upb_status_t status = upb_msg_parser_parse(&mp, s->ptr, s->byte_len, &read); - if(status == UPB_STATUS_OK && read == s->byte_len) { - return msg; - } else { - upb_msg_free(msg); - return NULL; - } -} +/* Parses protocol buffer data out of data which has length of len. The data + * need not be a complete protocol buffer. The number of bytes parsed is + * returned in *read, and the next call to upb_msg_parse must supply data that + * is *read bytes past data in the logical stream. */ +upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p, + void *data, size_t len, size_t *read); -/* For simple, one-shot parsing we assume that a dynamic field exists (and - * needs to be freed) iff its set bit is set. */ -static void free_value(union upb_value_ptr p, struct upb_msg_fielddef *f) -{ - if(upb_isstring(f)) { - free((*p.str)->ptr); - free(*p.str); - } else if(upb_issubmsg(f)) { - upb_msg_free(*p.msg); - } -} -void upb_msg_free(struct upb_msg *msg) -{ - if(!msg) return; /* A very free-like thing to do. */ - struct upb_msgdef *m = msg->def; - for(unsigned int i = 0; i < m->num_fields; i++) { - struct upb_msg_fielddef *f = &m->fields[i]; - if(!upb_msg_isset(msg, f)) continue; - union upb_value_ptr p = upb_msg_getptr(msg, f); - if(upb_isarray(f)) { - assert(*p.arr); - for(upb_arraylen_t j = 0; j < (*p.arr)->len; j++) - free_value(upb_array_getelementptr(*p.arr, j, f->type), f); - upb_array_free(*p.arr); - } else { - free_value(p, f); - } - } - free(msg); -} - -/* Parsing. ******************************************************************/ /* Helper function that returns a pointer to where the next value for field "f" * should be stored, taking into account whether f is an array that may need to * be allocated or resized. */ static union upb_value_ptr get_value_ptr(struct upb_msg *msg, - struct upb_msg_fielddef *f, - void **gptr, - upb_msg_getandref_array_cb_t getarray_cb) + struct upb_msg_fielddef *f) { union upb_value_ptr p = upb_msg_getptr(msg, f); if(upb_isarray(f)) { bool isset = upb_msg_isset(msg, f); size_t len = isset ? (*p.arr)->len : 0; - if(!isset) *p.arr = getarray_cb(*gptr, *p.arr, f); - upb_array_resize(*p.arr, len+1, f->type); - *gptr = (*p.arr)->gptr; + if(!isset) *p.arr = upb_array_new(f); + upb_array_resize(*p.arr, len+1); p = upb_array_getelementptr(*p.arr, len, f->type); } return p; @@ -255,8 +185,7 @@ static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end, struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = user_field_desc; struct upb_msg *msg = mp->top->msg; - void *gptr = upb_msg_gptr(msg); - union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb); + union upb_value_ptr p = get_value_ptr(msg, f); upb_msg_set(msg, f); UPB_CHECK(upb_parse_value(buf, end, f->type, p, outbuf)); return UPB_STATUS_OK; @@ -269,21 +198,20 @@ static void str_cb(void *udata, uint8_t *str, struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = udesc; struct upb_msg *msg = mp->top->msg; - void *gptr = upb_msg_gptr(msg); - union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb); + union upb_value_ptr p = get_value_ptr(msg, f); upb_msg_set(msg, f); if(avail_len != total_len) abort(); /* TODO: support streaming. */ - bool byref = avail_len == total_len && mp->byref; - *p.str = mp->getstring_cb(gptr, *p.str, f, byref); - if(byref) { - upb_strdrop(*p.str); - (*p.str)->ptr = (char*)str; - (*p.str)->byte_len = avail_len; - } else { - upb_stralloc(*p.str, total_len); + //bool byref = avail_len == total_len && mp->byref; + *p.str = upb_string_new(); + //if(byref) { + // upb_strdrop(*p.str); + // (*p.str)->ptr = (char*)str; + // (*p.str)->byte_len = avail_len; + //} else { + upb_string_resize(*p.str, total_len); memcpy((*p.str)->ptr, str, avail_len); (*p.str)->byte_len = avail_len; - } + //} } static void submsg_start_cb(void *udata, void *user_field_desc) @@ -291,22 +219,39 @@ static void submsg_start_cb(void *udata, void *user_field_desc) struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = user_field_desc; struct upb_msg *oldmsg = mp->top->msg; - void *gptr = upb_msg_gptr(oldmsg); - union upb_value_ptr p = get_value_ptr(oldmsg, f, &gptr, mp->getarray_cb); + union upb_value_ptr p = get_value_ptr(oldmsg, f); + struct upb_msg **submsg = p.msg; + //if(*submsg && upb_mmhead_only(&((*submsg)->mmhead))) { + // /* We can reuse the existing submsg. */ + //} else { + *submsg = upb_msg_new(f->ref.msg); + //} + upb_msg_clear(*submsg); upb_msg_set(oldmsg, f); - *p.msg = mp->getmsg_cb(gptr, *p.msg, f); mp->top++; - mp->top->msg = *p.msg; + mp->top->msg = *submsg; } static void submsg_end_cb(void *udata) { struct upb_msg_parser *mp = udata; + struct upb_msg *msg = mp->top->msg; + /* TODO: free any remaining dynamic storage that was not reused. */ + (void)msg; mp->top--; } /* Externally-visible functions for the msg parser. */ +upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len) +{ + struct upb_msg_parser mp; + upb_msg_parser_reset(&mp, msg, false); + size_t read; + upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read); + return ret; +} + void upb_msg_parser_reset(struct upb_msg_parser *s, struct upb_msg *msg, bool byref) { upb_stream_parser_reset(&s->s, s); @@ -592,51 +537,3 @@ bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive) } return true; } - - -static void printval(struct upb_text_printer *printer, union upb_value_ptr p, - struct upb_msg_fielddef *f, - google_protobuf_FieldDescriptorProto *fd, - FILE *stream); - -static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg, - FILE *stream) -{ - struct upb_msgdef *m = msg->def; - for(uint32_t i = 0; i < m->num_fields; i++) { - struct upb_msg_fielddef *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m); - if(!upb_msg_isset(msg, f)) continue; - union upb_value_ptr p = upb_msg_getptr(msg, f); - if(upb_isarray(f)) { - struct upb_array *arr = *p.arr; - for(uint32_t j = 0; j < arr->len; j++) { - union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type); - printval(printer, elem_p, f, fd, stream); - } - } else { - printval(printer, p, f, fd, stream); - } - } -} - -static void printval(struct upb_text_printer *printer, union upb_value_ptr p, - struct upb_msg_fielddef *f, - google_protobuf_FieldDescriptorProto *fd, - FILE *stream) -{ - if(upb_issubmsg(f)) { - upb_text_push(printer, fd->name, stream); - printmsg(printer, *p.msg, stream); - upb_text_pop(printer, stream); - } else { - upb_text_printfield(printer, fd->name, f->type, upb_deref(p, f->type), stream); - } -} - -void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream) -{ - struct upb_text_printer printer; - upb_text_printer_init(&printer, single_line); - printmsg(&printer, msg, stream); -} diff --git a/src/upb_msg.h b/src/upb_msg.h index 9dc1827..abec479 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -52,10 +52,10 @@ #include #include +#include +#include "descriptor.h" #include "upb.h" -#include "upb_atomic.h" -#include "upb_context.h" #include "upb_parse.h" #include "upb_table.h" @@ -66,10 +66,11 @@ extern "C" { /* Message definition. ********************************************************/ struct upb_msg_fielddef; +struct upb_context; /* Structure that describes a single .proto message type. */ struct upb_msgdef { - upb_atomic_refcount_t refcount; struct upb_context *context; + struct upb_msg *default_msg; /* Message with all default values set. */ struct google_protobuf_DescriptorProto *descriptor; struct upb_string fqname; /* Fully qualified. */ size_t size; @@ -82,7 +83,6 @@ struct upb_msgdef { struct google_protobuf_FieldDescriptorProto **field_descriptors; }; - /* Structure that describes a single field in a message. This structure is very * consciously designed to fit into 12/16 bytes (32/64 bit, respectively), * because copies of this struct are in the hash table that is read in the @@ -96,14 +96,6 @@ struct upb_msg_fielddef { upb_label_t label; }; -INLINE void upb_msgdef_ref(struct upb_msgdef *m) { - if(upb_atomic_ref(&m->refcount)) upb_context_ref(m->context); -} - -INLINE void upb_msgdef_unref(struct upb_msgdef *m) { - if(upb_atomic_unref(&m->refcount)) upb_context_unref(m->context); -} - INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) { return upb_issubmsgtype(f->type); } @@ -114,6 +106,29 @@ INLINE bool upb_isarray(struct upb_msg_fielddef *f) { return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED; } +INLINE bool upb_field_ismm(struct upb_msg_fielddef *f) { + return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); +} + +INLINE bool upb_elem_ismm(struct upb_msg_fielddef *f) { + return upb_isstring(f) || upb_issubmsg(f); +} + +/* Defined iff upb_field_ismm(f). */ +INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f) { + if(upb_isarray(f)) return UPB_MM_ARR_REF; + else if(upb_isstring(f)) return UPB_MM_STR_REF; + else if(upb_issubmsg(f)) return UPB_MM_MSG_REF; + else return -1; +} + +/* Defined iff upb_elem_ismm(f). */ +INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f) { + if(upb_isstring(f)) return UPB_MM_STR_REF; + else if(upb_issubmsg(f)) return UPB_MM_MSG_REF; + else return -1; +} + /* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( struct upb_msg_fielddef *f, struct upb_msgdef *m) { @@ -122,14 +137,15 @@ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( /* Message structure. *********************************************************/ -struct upb_msg { - struct upb_msgdef *def; - void *gptr; /* Generic pointer for use by subclasses. */ - uint8_t data[1]; -}; - -INLINE void *upb_msg_gptr(struct upb_msg *msg) { - return msg->gptr; +/* Constructs a new msg corresponding to the given msgdef, and having one + * counted reference. */ +INLINE struct upb_msg *upb_msg_new(struct upb_msgdef *md) { + size_t size = md->size + offsetof(struct upb_msg, data); + struct upb_msg *msg = malloc(size); + memset(msg, 0, size); + upb_mmhead_init(&msg->mmhead); + msg->def = md; + return msg; } /* Field access. **************************************************************/ @@ -146,12 +162,6 @@ INLINE union upb_value_ptr upb_msg_getptr(struct upb_msg *msg, return p; } -/* Returns a a specific field in a message. */ -INLINE union upb_value upb_msg_get(struct upb_msg *msg, - struct upb_msg_fielddef *f) { - return upb_deref(upb_msg_getptr(msg, f), f->type); -} - /* "Set" flag reading and writing. *******************************************/ /* All upb code and code using upb should guarantee that the set flags are @@ -244,85 +254,10 @@ INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m, } -/* Simple, one-shot parsing ***************************************************/ - -/* A simple interface for parsing into a newly-allocated message. This - * interface should only be used when the message will be read-only with - * respect to memory management (eg. won't add or remove internal references to - * dynamic memory). For more flexible (but also more complicated) interfaces, - * see below and in upb_mm_msg.h. */ - -/* Parses the protobuf in s (which is expected to be complete) and allocates - * new message data to hold it. If byref is set, strings in the returned - * upb_msg will reference s instead of copying from it, but this requires that - * s will live for as long as the returned message does. */ -struct upb_msg *upb_msg_parsenew(struct upb_msgdef *m, struct upb_string *s); - -/* This function should be used to free messages that were parsed with - * upb_msg_parsenew. It will free the message appropriately (including all - * submessages). */ -void upb_msg_free(struct upb_msg *msg); - - -/* Parsing with (re)allocation callbacks. *************************************/ - -/* This interface parses protocol buffers into upb_msgs, but allows the client - * to supply allocation callbacks whenever the parser needs to obtain a string, - * array, or submsg (a "dynamic field"). If the parser sees that a dynamic - * field is already present (its "set bit" is set) it will use that, resizing - * it if necessary in the case of an array. Otherwise it will call the - * allocation callback to obtain one. - * - * This may seem trivial (since nearly all clients will use malloc and free for - * memory management), but the allocation callback can be used for more than - * just allocation. If we are parsing data into an existing upb_msg, the - * allocation callback can examine any existing memory that is allocated for - * the dynamic field and determine whether it can reuse it. It can also - * perform memory management like refing the new field. - * - * This parser is layered on top of the event-based parser in upb_parse.h. The - * parser is upb_mm_msg.h is layered on top of this parser. - * - * This parser is fully streaming-capable. */ - -/* Should return an initialized array. */ -typedef struct upb_array *(*upb_msg_getandref_array_cb_t)( - void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f); - -/* Callback to allocate a string. If byref is true, the client should assume - * that the string will be referencing the input data. */ -typedef struct upb_string *(*upb_msg_getandref_string_cb_t)( - void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f, - bool byref); - -/* Should return a cleared message. */ -typedef struct upb_msg *(*upb_msg_getandref_msg_cb_t)( - void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f); - -struct upb_msg_parser_frame { - struct upb_msg *msg; -}; - -struct upb_msg_parser { - struct upb_stream_parser s; - bool merge; - bool byref; - struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top; - upb_msg_getandref_array_cb_t getarray_cb; - upb_msg_getandref_string_cb_t getstring_cb; - upb_msg_getandref_msg_cb_t getmsg_cb; -}; - -void upb_msg_parser_reset(struct upb_msg_parser *p, - struct upb_msg *msg, bool byref); - -/* Parses protocol buffer data out of data which has length of len. The data - * need not be a complete protocol buffer. The number of bytes parsed is - * returned in *read, and the next call to upb_msg_parse must supply data that - * is *read bytes past data in the logical stream. */ -upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p, - void *data, size_t len, size_t *read); +/* Parsing ********************************************************************/ +/* TODO: a stream parser. */ +upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len); /* Serialization *************************************************************/ @@ -336,7 +271,7 @@ upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p, struct upb_msgsizes; /* Initialize/free a upb_msgsizes for the given message. */ -void upb_msgsizes_init(struct upb_msgsizes *sizes); +struct upb_msgsizes *upb_msgsizes_new(void); void upb_msgsizes_free(struct upb_msgsizes *sizes); /* Given a previously initialized sizes, recurse over the message and store its @@ -366,6 +301,10 @@ void upb_msg_serialize_init(struct upb_msg_serialize_state *s, upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s, void *buf, size_t len, size_t *written); +upb_status_t upb_msg_serialize_all(struct upb_msg *msg, + struct upb_msgsizes *sizes, + void *buf); + /* Text dump *****************************************************************/ bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive); @@ -397,7 +336,8 @@ void upb_msgdef_free(struct upb_msgdef *m); /* Sort the given field descriptors in-place, according to what we think is an * optimal ordering of fields. This can change from upb release to upb * release. */ -void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num); +void upb_msgdef_sortfds(struct google_protobuf_FieldDescriptorProto **fds, + size_t num); /* Clients use this function on a previously initialized upb_msgdef to resolve * the "ref" field in the upb_msg_fielddef. Since messages can refer to each diff --git a/src/upb_parse.c b/src/upb_parse.c index b7f3832..7c1ad66 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -11,7 +11,7 @@ /* May want to move this to upb.c if enough other things warrant it. */ #define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, UPB_STRLIT(#ctype)}, +#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype}, struct upb_type_info upb_type_info[] = { TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE, UPB_WIRE_TYPE_64BIT, double) TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT, UPB_WIRE_TYPE_32BIT, float) diff --git a/src/upb_parse.h b/src/upb_parse.h index de4cb2c..1454dd5 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -21,16 +21,6 @@ extern "C" { #endif -INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP || - type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE; -} - -INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || - type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES; -} - /* High-level parsing interface. **********************************************/ /* The general scheme is that the client registers callbacks that will be diff --git a/src/upb_string.c b/src/upb_string.c index 7754936..54df4f1 100644 --- a/src/upb_string.c +++ b/src/upb_string.c @@ -7,19 +7,20 @@ #include #include "upb_string.h" -bool upb_strreadfile(const char *filename, struct upb_string *data) { +struct upb_string *upb_strreadfile(const char *filename) { FILE *f = fopen(filename, "rb"); if(!f) return false; - if(fseek(f, 0, SEEK_END) != 0) return false; + if(fseek(f, 0, SEEK_END) != 0) goto error; long size = ftell(f); - if(size < 0) return false; - if(fseek(f, 0, SEEK_SET) != 0) return false; - data->byte_len = size; - upb_stralloc(data, data->byte_len); - if(fread(data->ptr, size, 1, f) != 1) { - free(data->ptr); - return false; - } + if(size < 0) goto error; + if(fseek(f, 0, SEEK_SET) != 0) goto error; + struct upb_string *s = upb_string_new(); + upb_string_resize(s, size); + if(fread(s->ptr, size, 1, f) != 1) goto error; fclose(f); - return true; + return s; + +error: + fclose(f); + return NULL; } diff --git a/src/upb_string.h b/src/upb_string.h index 9740a0b..aa62575 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -32,48 +32,35 @@ extern "C" { #include #include -/* inline if possible, emit standalone code if required. */ -#ifndef INLINE -#define INLINE static inline -#endif - -#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) -#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) +#include "upb_struct.h" -struct upb_string { - /* We expect the data to be 8-bit clean (uint8_t), but char* is such an - * ingrained convention that we follow it. */ - char *ptr; - uint32_t byte_len; - uint32_t byte_size; /* How many bytes of ptr we own. */ -}; +/* Allocation/Deallocation/Resizing. ******************************************/ -INLINE void upb_strinit(struct upb_string *str) +INLINE struct upb_string *upb_string_new(void) { + struct upb_string *str = (struct upb_string*)malloc(sizeof(*str)); + upb_mmhead_init(&str->mmhead); str->ptr = NULL; str->byte_len = 0; str->byte_size = 0; + return str; } -INLINE void upb_struninit(struct upb_string *str) -{ - if(str->byte_size) free(str->ptr); -} - -INLINE struct upb_string *upb_strnew(void) +/* For internal use only. */ +INLINE void upb_string_destroy(struct upb_string *str) { - struct upb_string *str = (struct upb_string*)malloc(sizeof(*str)); - upb_strinit(str); - return str; + if(str->byte_size != 0) free(str->ptr); + free(str); } -INLINE void upb_strfree(struct upb_string *str) +INLINE void upb_string_unref(struct upb_string *str) { - upb_struninit(str); - free(str); + if(upb_mmhead_unref(&str->mmhead)) upb_string_destroy(str); } -INLINE void upb_stralloc(struct upb_string *str, uint32_t size) +/* Resizes the string to size, reallocating if necessary. Does not preserve + * existing data. */ +INLINE void upb_string_resize(struct upb_string *str, uint32_t size) { if(str->byte_size < size) { /* Need to resize. */ @@ -81,12 +68,10 @@ INLINE void upb_stralloc(struct upb_string *str, uint32_t size) void *oldptr = str->byte_size == 0 ? NULL : str->ptr; str->ptr = (char*)realloc(oldptr, str->byte_size); } + str->byte_len = size; } -INLINE void upb_strdrop(struct upb_string *str) -{ - upb_struninit(str); -} +/* Library functions. *********************************************************/ INLINE bool upb_streql(struct upb_string *s1, struct upb_string *s2) { return s1->byte_len == s2->byte_len && @@ -101,26 +86,26 @@ INLINE int upb_strcmp(struct upb_string *s1, struct upb_string *s2) { INLINE void upb_strcpy(struct upb_string *dest, struct upb_string *src) { dest->byte_len = src->byte_len; - upb_stralloc(dest, dest->byte_len); + upb_string_resize(dest, dest->byte_len); memcpy(dest->ptr, src->ptr, src->byte_len); } INLINE struct upb_string *upb_strdup(struct upb_string *s) { - struct upb_string *copy = upb_strnew(); + struct upb_string *copy = upb_string_new(); upb_strcpy(copy, s); return copy; } INLINE struct upb_string *upb_strdupc(char *s) { - struct upb_string *copy = upb_strnew(); + struct upb_string *copy = upb_string_new(); copy->byte_len = strlen(s); - upb_stralloc(copy, copy->byte_len); + upb_string_resize(copy, copy->byte_len); memcpy(copy->ptr, s, copy->byte_len); return copy; } /* Reads an entire file into a newly-allocated string. */ -bool upb_strreadfile(const char *filename, struct upb_string *data); +struct upb_string *upb_strreadfile(const char *filename); /* Allows defining upb_strings as literals, ie: * struct upb_string str = UPB_STRLIT("Hello, World!\n"); diff --git a/src/upb_struct.h b/src/upb_struct.h new file mode 100644 index 0000000..9c1bb2e --- /dev/null +++ b/src/upb_struct.h @@ -0,0 +1,119 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file defines the in-memory format for messages, arrays, and strings + * (which are the three dynamically-allocated structures that make up all + * protobufs). */ + +#ifndef UPB_STRUCT_H +#define UPB_STRUCT_H + +#include "upb.h" + +/* mmhead -- this is a "base class" for strings, arrays, and messages ********/ + +struct upb_mm_ref; +struct upb_mmhead { + struct upb_mm_ref *refs; /* Head of linked list. */ + uint32_t refcount; +}; + +INLINE void upb_mmhead_init(struct upb_mmhead *head) { + head->refs = NULL; + head->refcount = 1; +} + +INLINE bool upb_mmhead_norefs(struct upb_mmhead *head) { + return head->refcount == 0 && head->refs == NULL; +} + +INLINE bool upb_mmhead_unref(struct upb_mmhead *head) { + head->refcount--; + return upb_mmhead_norefs(head); +} + +INLINE void upb_mmhead_ref(struct upb_mmhead *head) { + head->refcount++; +} + +/* Structures for msg, string, and array. *************************************/ + +/* These are all self describing. */ + +struct upb_msgdef; +struct upb_msg_fielddef; + +struct upb_msg { + struct upb_mmhead mmhead; + struct upb_msgdef *def; + uint8_t data[1]; +}; + +typedef uint32_t upb_arraylen_t; /* can be at most 2**32 elements long. */ +struct upb_array { + struct upb_mmhead mmhead; + struct upb_msg_fielddef *fielddef; /* Defines the type of the array. */ + union upb_value_ptr elements; + upb_arraylen_t len; /* Number of elements in "elements". */ + upb_arraylen_t size; /* Memory we own (0 if by reference). */ +}; + +struct upb_string { + struct upb_mmhead mmhead; + /* We expect the data to be 8-bit clean (uint8_t), but char* is such an + * ingrained convention that we follow it. */ + char *ptr; + uint32_t byte_len; + uint32_t byte_size; /* How many bytes of ptr we own, 0 if we reference. */ +}; + +/* Type-specific overlays on upb_array. ***************************************/ + +#define UPB_DEFINE_ARRAY_TYPE(name, type) \ + struct name ## _array { \ + struct upb_mmhead mmhead; \ + struct upb_msg_fielddef *fielddef; \ + type elements; \ + upb_arraylen_t len; \ + upb_arraylen_t size; \ + }; + +UPB_DEFINE_ARRAY_TYPE(upb_double, double) +UPB_DEFINE_ARRAY_TYPE(upb_float, float) +UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t) +UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t) +UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t) +UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t) +UPB_DEFINE_ARRAY_TYPE(upb_bool, bool) +UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*) +UPB_DEFINE_ARRAY_TYPE(upb_msg, void*) + +/* Defines an array of a specific message type (an overlay of upb_array). */ +#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array +#define UPB_DEFINE_MSG_ARRAY(msg_type) \ + UPB_MSG_ARRAY(msg_type) { \ + struct upb_mmhead mmhead; \ + struct upb_msg_fielddef *fielddef; \ + msg_type **elements; \ + upb_arraylen_t len; \ + upb_arraylen_t size; \ + }; + +/* mmptr -- a pointer which polymorphically points to one of the above. *******/ + +union upb_mmptr { + struct upb_msg *msg; + struct upb_array *arr; + struct upb_string *str; +}; + +enum { + UPB_MM_MSG_REF, + UPB_MM_STR_REF, + UPB_MM_ARR_REF +}; +typedef uint8_t upb_mm_ptrtype; + +#endif diff --git a/src/upb_text.c b/src/upb_text.c index c9aad7e..6d43152 100644 --- a/src/upb_text.c +++ b/src/upb_text.c @@ -5,8 +5,11 @@ */ #include -#include "upb_text.h" #include "descriptor.h" +#include "upb_text.h" +#include "upb_string.h" +#include "upb_msg.h" +#include "upb_array.h" void upb_text_printval(upb_field_type_t type, union upb_value val, FILE *file) { @@ -78,3 +81,52 @@ void upb_text_pop(struct upb_text_printer *p, print_indent(p, stream); fprintf(stream, "}\n"); } + +static void printval(struct upb_text_printer *printer, union upb_value_ptr p, + struct upb_msg_fielddef *f, + google_protobuf_FieldDescriptorProto *fd, + FILE *stream); + +static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg, + FILE *stream) +{ + struct upb_msgdef *m = msg->def; + for(uint32_t i = 0; i < m->num_fields; i++) { + struct upb_msg_fielddef *f = &m->fields[i]; + google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m); + if(!upb_msg_isset(msg, f)) continue; + union upb_value_ptr p = upb_msg_getptr(msg, f); + if(upb_isarray(f)) { + struct upb_array *arr = *p.arr; + for(uint32_t j = 0; j < arr->len; j++) { + union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type); + printval(printer, elem_p, f, fd, stream); + } + } else { + printval(printer, p, f, fd, stream); + } + } +} + +static void printval(struct upb_text_printer *printer, union upb_value_ptr p, + struct upb_msg_fielddef *f, + google_protobuf_FieldDescriptorProto *fd, + FILE *stream) +{ + if(upb_issubmsg(f)) { + upb_text_push(printer, fd->name, stream); + printmsg(printer, *p.msg, stream); + upb_text_pop(printer, stream); + } else { + upb_text_printfield(printer, fd->name, f->type, upb_value_read(p, f->type), stream); + } +} + + +void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream) +{ + struct upb_text_printer printer; + upb_text_printer_init(&printer, single_line); + printmsg(&printer, msg, stream); +} + -- cgit v1.2.3