summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-08-24 21:44:22 -0700
committerJoshua Haberman <joshua@reverberate.org>2009-08-24 21:44:22 -0700
commit040f7e6ba2e2282b80f332a031b77d7d34b4fc85 (patch)
tree1d5e273fb9fcca51f6ce299b766ee0a97ee92863 /src
parenta223f9af30738cf00c313fabee8de75d04fb9a1a (diff)
Significant memory-management refactoring any Python extension.
Diffstat (limited to 'src')
-rw-r--r--src/upb.h93
-rw-r--r--src/upb_array.h89
-rw-r--r--src/upb_context.c16
-rw-r--r--src/upb_enum.h9
-rw-r--r--src/upb_inlinedefs.c1
-rw-r--r--src/upb_mm.c208
-rw-r--r--src/upb_mm.h168
-rw-r--r--src/upb_msg.c213
-rw-r--r--src/upb_msg.h152
-rw-r--r--src/upb_parse.c2
-rw-r--r--src/upb_parse.h10
-rw-r--r--src/upb_string.c23
-rw-r--r--src/upb_string.h59
-rw-r--r--src/upb_struct.h119
-rw-r--r--src/upb_text.c54
15 files changed, 797 insertions, 419 deletions
diff --git a/src/upb.h b/src/upb.h
index 27bf5fc..af026f5 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -12,7 +12,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h> /* for size_t. */
-#include "upb_string.h"
+#include "descriptor_const.h"
#ifdef __cplusplus
extern "C" {
@@ -23,6 +23,9 @@ extern "C" {
#define INLINE static inline
#endif
+#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
+#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+
/* The maximum that any submessages can be nested. Matches proto2's limit. */
#define UPB_MAX_NESTING 64
@@ -55,12 +58,22 @@ typedef uint8_t upb_wire_type_t;
* errors, and we use it to represent exceptional circumstances. */
typedef uint8_t upb_field_type_t;
+INLINE bool upb_issubmsgtype(upb_field_type_t type) {
+ return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ||
+ type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE;
+}
+
+INLINE bool upb_isstringtype(upb_field_type_t type) {
+ return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
+ type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
+}
+
/* Information about a given value type (upb_field_type_t). */
struct upb_type_info {
uint8_t align;
uint8_t size;
upb_wire_type_t expected_wire_type;
- struct upb_string ctype;
+ char *ctype;
};
/* Contains information for all .proto types. Indexed by upb_field_type_t. */
@@ -90,6 +103,10 @@ struct upb_tag {
/* Polymorphic values of .proto types *****************************************/
+struct upb_string;
+struct upb_array;
+struct upb_msg;
+
/* A single .proto value. The owner must have an out-of-band way of knowing
* the type, so that it knows which union member to use. */
union upb_value {
@@ -121,15 +138,83 @@ union upb_value_ptr {
void *_void;
};
+/* Unfortunately there is no way to define this so that it can be used as a
+ * generic expression, a la:
+ * foo(UPB_VALUE_ADDROF(bar));
+ * ...you have to use it as the initializer of a upb_value_ptr:
+ * union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
+ * foo(p);
+ */
+#define UPB_VALUE_ADDROF(val) {(void*)&val._double}
+
/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need
* to know the field type to perform this operation, because we need to know
* how much memory to copy. */
-INLINE union upb_value upb_deref(union upb_value_ptr ptr, upb_field_type_t t) {
+INLINE union upb_value upb_value_read(union upb_value_ptr ptr,
+ upb_field_type_t ft) {
union upb_value val;
- memcpy(&val, ptr._void, upb_type_info[t].size);
+#define CASE(t, member_name) \
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
+ val.member_name = *ptr.member_name; \
+ break;
+ switch(ft) {
+ CASE(DOUBLE, _double)
+ CASE(FLOAT, _float)
+ CASE(INT32, int32)
+ CASE(INT64, int64)
+ CASE(UINT32, uint32)
+ CASE(UINT64, uint64)
+ CASE(SINT32, int32)
+ CASE(SINT64, int64)
+ CASE(FIXED32, uint32)
+ CASE(FIXED64, uint64)
+ CASE(SFIXED32, int32)
+ CASE(SFIXED64, int64)
+ CASE(BOOL, _bool)
+ CASE(ENUM, int32)
+ CASE(STRING, str)
+ CASE(BYTES, str)
+ CASE(MESSAGE, msg)
+ CASE(GROUP, msg)
+ default: break;
+ }
+#undef CASE
return val;
}
+/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need
+ * to know the field type to perform this operation, because we need to know
+ * how much memory to copy. */
+INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val,
+ upb_field_type_t ft) {
+#define CASE(t, member_name) \
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
+ *ptr.member_name = val.member_name; \
+ break;
+ switch(ft) {
+ CASE(DOUBLE, _double)
+ CASE(FLOAT, _float)
+ CASE(INT32, int32)
+ CASE(INT64, int64)
+ CASE(UINT32, uint32)
+ CASE(UINT64, uint64)
+ CASE(SINT32, int32)
+ CASE(SINT64, int64)
+ CASE(FIXED32, uint32)
+ CASE(FIXED64, uint64)
+ CASE(SFIXED32, int32)
+ CASE(SFIXED64, int64)
+ CASE(BOOL, _bool)
+ CASE(ENUM, int32)
+ CASE(STRING, str)
+ CASE(BYTES, str)
+ CASE(MESSAGE, msg)
+ CASE(GROUP, msg)
+ default: break;
+ }
+#undef CASE
+}
+
union upb_symbol_ref {
struct upb_msgdef *msg;
struct upb_enum *_enum;
diff --git a/src/upb_array.h b/src/upb_array.h
index 370f6eb..732c4aa 100644
--- a/src/upb_array.h
+++ b/src/upb_array.h
@@ -23,7 +23,7 @@
#define UPB_ARRAY_H_
#include <stdlib.h>
-#include "upb.h"
+#include "upb_msg.h" /* Because we use upb_msg_fielddef */
#ifdef __cplusplus
extern "C" {
@@ -31,41 +31,6 @@ extern "C" {
struct upb_string;
-/* upb_arrays can be at most 2**32 elements long. */
-typedef uint32_t upb_arraylen_t;
-
-/* Represents an array (a repeated field) of any type. The interpretation of
- * the data in the array depends on the type. */
-struct upb_array {
- union upb_value_ptr elements;
- upb_arraylen_t len; /* Number of elements in "elements". */
- upb_arraylen_t size; /* Memory we own (0 if by reference). */
- void *gptr;
-};
-
-INLINE void upb_array_init(struct upb_array *arr)
-{
- arr->elements._void = NULL;
- arr->len = 0;
- arr->size = 0;
-}
-
-INLINE void upb_array_uninit(struct upb_array *arr)
-{
- if(arr->size) free(arr->elements._void);
-}
-
-INLINE struct upb_array *upb_array_new(void) {
- struct upb_array *arr = malloc(sizeof(*arr));
- upb_array_init(arr);
- return arr;
-}
-
-INLINE void upb_array_free(struct upb_array *arr) {
- upb_array_uninit(arr);
- free(arr);
-}
-
/* Returns a pointer to an array element. Does not perform a bounds check! */
INLINE union upb_value_ptr upb_array_getelementptr(
struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
@@ -75,10 +40,17 @@ INLINE union upb_value_ptr upb_array_getelementptr(
return ptr;
}
-INLINE union upb_value upb_array_getelement(
- struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
+/* Allocation/Deallocation/Resizing. ******************************************/
+
+INLINE struct upb_array *upb_array_new(struct upb_msg_fielddef *f)
{
- return upb_deref(upb_array_getelementptr(arr, n, type), type);
+ struct upb_array *arr = malloc(sizeof(*arr));
+ upb_mmhead_init(&arr->mmhead);
+ arr->elements._void = NULL;
+ arr->len = 0;
+ arr->size = 0;
+ arr->fielddef = f;
+ return arr;
}
INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
@@ -94,13 +66,10 @@ INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
return v;
}
-/* Resizes array to be "len" elements long and ensures we have write access
- * to the array (reallocating if necessary). Returns true iff we were
- * referencing memory for the array and dropped the reference. */
-INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen,
- upb_field_type_t type)
+/* Resizes array to be "len" elements long (reallocating if necessary). */
+INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen)
{
- size_t type_size = upb_type_info[type].size;
+ size_t type_size = upb_type_info[arr->fielddef->type].size;
bool dropped = false;
bool ref = arr->size == 0; /* Ref'ing external memory. */
void *data = arr->elements._void;
@@ -114,39 +83,11 @@ INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen,
memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size);
dropped = true;
}
+ /* TODO: fill with defaults. */
arr->len = newlen;
return dropped;
}
-/* These are all overlays on upb_array, pointers between them can be cast. */
-#define UPB_DEFINE_ARRAY_TYPE(name, type) \
- struct name ## _array { \
- struct upb_fielddef *f; \
- void *gptr; \
- type *elements; \
- upb_arraylen_t len; \
- upb_arraylen_t size; \
- };
-
-UPB_DEFINE_ARRAY_TYPE(upb_double, double)
-UPB_DEFINE_ARRAY_TYPE(upb_float, float)
-UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t)
-UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t)
-UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t)
-UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
-UPB_DEFINE_ARRAY_TYPE(upb_bool, bool)
-UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
-UPB_DEFINE_ARRAY_TYPE(upb_msg, void*)
-
-/* Defines an array of a specific message type (an overlay of upb_array). */
-#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
-#define UPB_DEFINE_MSG_ARRAY(msg_type) \
- UPB_MSG_ARRAY(msg_type) { \
- msg_type **elements; \
- upb_arraylen_t len; \
- upb_arraylen_t size; \
- };
-
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/upb_context.c b/src/upb_context.c
index 12ad8c7..0d64c3e 100644
--- a/src/upb_context.c
+++ b/src/upb_context.c
@@ -10,6 +10,7 @@
#include "upb_context.h"
#include "upb_enum.h"
#include "upb_msg.h"
+#include "upb_mm.h"
/* Search for a character in a string, in reverse. */
static int my_memrchr(char *data, char c, size_t len)
@@ -66,7 +67,7 @@ static void free_context(struct upb_context *c)
{
free_symtab(&c->symtab);
for(size_t i = 0; i < c->fds_len; i++)
- upb_msg_free((struct upb_msg*)c->fds[i]);
+ upb_msg_unref((struct upb_msg*)c->fds[i]);
free_symtab(&c->psymtab);
free(c->fds);
}
@@ -77,9 +78,9 @@ void upb_context_unref(struct upb_context *c)
upb_rwlock_wrlock(&c->lock);
free_context(c);
upb_rwlock_unlock(&c->lock);
+ free(c);
+ upb_rwlock_destroy(&c->lock);
}
- free(c);
- upb_rwlock_destroy(&c->lock);
}
bool upb_context_lookup(struct upb_context *c, struct upb_string *symbol,
@@ -325,10 +326,9 @@ bool upb_context_addfds(struct upb_context *c,
}
bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) {
- google_protobuf_FileDescriptorSet *fds =
- (google_protobuf_FileDescriptorSet*)upb_msg_parsenew(c->fds_msg, fds_str);
- if(!fds) return false;
- if(!upb_context_addfds(c, fds)) return false;
+ struct upb_msg *fds = upb_msg_new(c->fds_msg);
+ if(upb_msg_parsestr(fds, fds_str->ptr, fds_str->byte_len) != UPB_STATUS_OK) return false;
+ if(!upb_context_addfds(c, (google_protobuf_FileDescriptorSet*)fds)) return false;
{
/* We own fds now, need to keep a ref so we can free it later. */
@@ -337,7 +337,7 @@ bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) {
c->fds_size *= 2;
c->fds = realloc(c->fds, c->fds_size);
}
- c->fds[c->fds_len++] = fds;
+ c->fds[c->fds_len++] = (google_protobuf_FileDescriptorSet*)fds;
upb_rwlock_unlock(&c->lock);
}
return true;
diff --git a/src/upb_enum.h b/src/upb_enum.h
index e43a203..9acc075 100644
--- a/src/upb_enum.h
+++ b/src/upb_enum.h
@@ -33,15 +33,6 @@ struct upb_enum_iton_entry {
struct upb_string *string;
};
-INLINE void upb_enum_ref(struct upb_enum *e) {
- if(upb_atomic_ref(&e->refcount)) upb_context_ref(e->context);
-}
-
-INLINE void upb_enum_unref(struct upb_enum *e) {
- if(upb_atomic_unref(&e->refcount)) upb_context_unref(e->context);
-}
-
-
/* Initializes and frees an enum, respectively. Caller retains ownership of
* ed, but it must outlive e. */
void upb_enum_init(struct upb_enum *e,
diff --git a/src/upb_inlinedefs.c b/src/upb_inlinedefs.c
index dae5c01..7a55e06 100644
--- a/src/upb_inlinedefs.c
+++ b/src/upb_inlinedefs.c
@@ -15,6 +15,7 @@
#include "upb_array.h"
#include "upb_context.h"
#include "upb_enum.h"
+#include "upb_mm.h"
#include "upb_msg.h"
#include "upb_parse.h"
#include "upb_serialize.h"
diff --git a/src/upb_mm.c b/src/upb_mm.c
new file mode 100644
index 0000000..853d572
--- /dev/null
+++ b/src/upb_mm.c
@@ -0,0 +1,208 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
+ */
+
+#include "upb_mm.h"
+#include "upb_string.h"
+#include "upb_array.h"
+#include "upb_msg.h"
+
+void upb_msg_destroy(struct upb_msg *msg) {
+ uint32_t i;
+ for(i = 0; i < msg->def->num_fields; i++) {
+ struct upb_msg_fielddef *f = &msg->def->fields[i];
+ if(!upb_msg_isset(msg, f) || !upb_field_ismm(f)) continue;
+ upb_mm_ptrtype type = upb_field_ptrtype(f);
+ union upb_mmptr mmptr = upb_mmptr_read(upb_msg_getptr(msg, f), type);
+ upb_mm_unref(mmptr, type);
+ }
+ free(msg);
+}
+
+void upb_array_destroy(struct upb_array *arr)
+{
+ if(upb_elem_ismm(arr->fielddef)) {
+ upb_arraylen_t i;
+ /* Unref elements. */
+ for(i = 0; i < arr->len; i++) {
+ union upb_value_ptr p = upb_array_getelementptr(arr, i, arr->fielddef->type);
+ upb_mm_ptrtype type = upb_elem_ptrtype(arr->fielddef);
+ union upb_mmptr mmptr = upb_mmptr_read(p, type);
+ upb_mm_unref(mmptr, type);
+ }
+ }
+ if(arr->size != 0) free(arr->elements._void);
+ free(arr);
+}
+
+static union upb_mmptr upb_mm_newptr(upb_mm_ptrtype type,
+ struct upb_msg_fielddef *f)
+{
+ union upb_mmptr p = {NULL};
+ switch(type) {
+ case UPB_MM_MSG_REF: p.msg = upb_msg_new(f->ref.msg);
+ case UPB_MM_STR_REF: p.str = upb_string_new();
+ case UPB_MM_ARR_REF: p.arr = upb_array_new(f);
+ default: assert(false); break;
+ }
+ return p;
+}
+
+static struct upb_mm_ref *find_or_create_ref(struct upb_mm_ref *fromref,
+ struct upb_mm *mm,
+ union upb_mmptr p, upb_mm_ptrtype type,
+ bool *created)
+{
+ struct upb_mmhead *head = upb_mmhead_addr(p, type);
+ struct upb_mm_ref **ref = &head->refs;
+ while(*ref && (*ref)->mm <= mm) {
+ if((*ref)->mm == mm) {
+ return *ref;
+ *created = false;
+ }
+ ref = &((*ref)->next);
+ }
+ *created = true;
+ struct upb_mm_ref *newref = mm->newref_cb(fromref, p, type);
+ newref->p = p;
+ newref->type = type;
+ newref->mm = mm;
+ newref->next = *ref;
+ *ref = newref;
+ return newref;
+}
+
+struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type,
+ struct upb_mm *mm, bool *created)
+{
+ return find_or_create_ref(NULL, mm, p, type, created);
+}
+
+struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm)
+{
+ struct upb_msg *msg = upb_msg_new(def);
+ union upb_mmptr mmptr = {.msg = msg};
+ bool created;
+ struct upb_mm_ref *ref = find_or_create_ref(NULL, mm, mmptr, UPB_MM_MSG_REF, &created);
+ upb_mm_unref(mmptr, UPB_MM_MSG_REF); /* Shouldn't have any counted refs. */
+ assert(created);
+ return ref;
+}
+
+struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref,
+ struct upb_msg_fielddef *f,
+ bool *refcreated)
+{
+ assert(upb_field_ismm(f));
+ upb_mm_ptrtype ptrtype = upb_field_ptrtype(f);
+ struct upb_msg *msg = msgref->p.msg;
+ union upb_mmptr val;
+ union upb_value_ptr p = upb_msg_getptr(msg, f);
+
+ /* Create the upb value if it doesn't already exist. */
+ if(!upb_msg_isset(msg, f)) {
+ upb_msg_set(msg, f);
+ val = upb_mm_newptr(ptrtype, f);
+ upb_mmptr_write(p, val, ptrtype);
+ } else {
+ val = upb_mmptr_read(p, ptrtype);
+ }
+
+ return find_or_create_ref(msgref, msgref->mm, val, ptrtype, refcreated);
+}
+
+struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i,
+ bool *refcreated)
+{
+ struct upb_array *arr = arrref->p.arr;
+ struct upb_msg_fielddef *f = arr->fielddef;
+ assert(upb_elem_ismm(f));
+ assert(i < arr->len);
+ union upb_value_ptr p = upb_array_getelementptr(arr, i, f->type);
+ upb_mm_ptrtype type = upb_elem_ptrtype(f);
+ union upb_mmptr val = upb_mmptr_read(p, type);
+ return find_or_create_ref(arrref, arrref->mm, val, type, refcreated);
+}
+
+void upb_mm_release(struct upb_mm_ref *ref)
+{
+ struct upb_mm_ref **ref_head = (void*)ref->p.msg;
+ struct upb_mm_ref **ref_elem = ref_head;
+ struct upb_mm *mm = ref->mm;
+ while(true) {
+ assert(*ref_elem); /* Client asserts r->mm is in the list. */
+ if((*ref_elem)->mm == mm) {
+ *ref_elem = (*ref_elem)->next; /* Remove from the list. */
+ break;
+ }
+ }
+
+ if(upb_mmhead_norefs(&ref->p.msg->mmhead)) {
+ /* Destroy the dynamic object. */
+ switch(ref->type) {
+ case UPB_MM_MSG_REF:
+ upb_msg_destroy(ref->p.msg);
+ break;
+ case UPB_MM_ARR_REF:
+ upb_array_destroy(ref->p.arr);
+ break;
+ case UPB_MM_STR_REF:
+ upb_string_destroy(ref->p.str);
+ break;
+ default: assert(false); break;
+ }
+ }
+}
+
+void upb_mm_msg_set(struct upb_mm_ref *from_msg_ref, struct upb_mm_ref *to_ref,
+ struct upb_msg_fielddef *f)
+{
+ assert(upb_field_ismm(f));
+ union upb_mmptr fromval = from_msg_ref->p;
+ union upb_mmptr toval = to_ref->p;
+ union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f);
+ upb_mm_ptrtype type = upb_field_ptrtype(f);
+ if(upb_msg_isset(fromval.msg, f)) {
+ union upb_mmptr existingval = upb_mmptr_read(field_p, type);
+ if(existingval.msg == toval.msg)
+ return; /* Setting to its existing value, do nothing. */
+ upb_mm_unref(existingval, type);
+ }
+ upb_msg_set(fromval.msg, f);
+ upb_mmptr_write(field_p, toval, type);
+ upb_mm_ref(toval, type);
+}
+
+void upb_mm_msgclear(struct upb_mm_ref *from_msg_ref, struct upb_msg_fielddef *f)
+{
+ assert(upb_field_ismm(f));
+ union upb_mmptr fromval = from_msg_ref->p;
+ upb_mm_ptrtype type = upb_field_ptrtype(f);
+ if(upb_msg_isset(fromval.msg, f)) {
+ union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f);
+ union upb_mmptr existingval = upb_mmptr_read(field_p, type);
+ upb_msg_unset(fromval.msg, f);
+ upb_mm_unref(existingval, type);
+ }
+}
+
+void upb_mm_msgclear_all(struct upb_mm_ref *from)
+{
+ struct upb_msgdef *def = from->p.msg->def;
+ for(uint32_t i = 0; i < def->num_fields; i++) {
+ struct upb_msg_fielddef *f = &def->fields[i];
+ if(!upb_field_ismm(f)) continue;
+ upb_mm_msgclear(from, f);
+ }
+}
+
+void upb_mm_arr_set(struct upb_mm_ref *from, struct upb_mm_ref *to,
+ upb_arraylen_t i, upb_field_type_t type)
+{
+ (void)from;
+ (void)to;
+ (void)i;
+ (void)type;
+}
diff --git a/src/upb_mm.h b/src/upb_mm.h
new file mode 100644
index 0000000..88cb043
--- /dev/null
+++ b/src/upb_mm.h
@@ -0,0 +1,168 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
+ *
+ * A parsed protobuf is represented in memory as a tree. The three kinds of
+ * nodes in this tree are messages, arrays, and strings. This file defines
+ * a memory-management scheme for making sure that these nodes are colected
+ * at the right times.
+ *
+ * The basic strategy is reference-counting, but with a twist. Since any
+ * dynamic language that wishes to reference these nodes will need its own,
+ * language-specific structure, we provide two different kinds of references:
+ *
+ * - counted references. these are references that are tracked with only a
+ * reference count. They are used for two separate purposes:
+ * 1. for references within the tree, from one node to another.
+ * 2. for external references into the tree, where the referer does not need
+ * a separate message structure.
+ * - listed references. these are references that have their own separate
+ * data record. these separate records are kept in a linked list.
+ */
+
+#ifndef UPB_MM_H_
+#define UPB_MM_H_
+
+#include "upb.h"
+#include "upb_string.h"
+#include "upb_array.h"
+#include "upb_msg.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Structure definitions. *****************************************************/
+
+typedef int16_t upb_mm_id;
+
+struct upb_msg;
+struct upb_array;
+struct upb_string;
+struct upb_msg_fielddef;
+
+struct upb_mm_ref;
+/* Info about a mm. */
+struct upb_mm {
+ /* fromref is set iff this call is from getfieldref or getelemref. */
+ struct upb_mm_ref *(*newref_cb)(struct upb_mm_ref *fromref,
+ union upb_mmptr p, upb_mm_ptrtype type);
+};
+
+struct upb_mm_ref {
+ union upb_mmptr p;
+ /* This is slightly wasteful, because the mm-specific ref will probably also
+ * contain the information about what kind of ref this is, in a different
+ * form. */
+ upb_mm_ptrtype type;
+ struct upb_mm *mm; /* TODO: There are ways to shrink this. */
+ struct upb_mm_ref *next; /* Linked list for refs to the same value. */
+};
+
+/* Functions for working with listed references. *****************************/
+
+/* Create a new top-level message and create a single ref for it. */
+struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm);
+
+/* Given a pointer to an existing msg, array, or string, find a ref for this
+ * mm, creating one if necessary. 'created' indicates whether the returned
+ * reference was just created. */
+struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type,
+ struct upb_mm *mm, bool *created);
+
+/* f must be ismm == true. The msg field may or may not be set (will be
+ * created if it doesn't exist). If a ref already exists for the given field,
+ * returns it, otherwise calls the given callback to create one. 'created'
+ * indicates whether a new reference was created. */
+struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref,
+ struct upb_msg_fielddef *f,
+ bool *refcreated);
+/* Array len must be < i. */
+struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i,
+ bool *refcreated);
+
+/* Remove this ref from the list for this msg.
+ * If that was the last reference, deletes the msg itself. */
+void upb_mm_release(struct upb_mm_ref *ref);
+
+void upb_mm_msgset(struct upb_mm_ref *msg, struct upb_mm_ref *to,
+ struct upb_msg_fielddef *f);
+void upb_mm_msgclear(struct upb_mm_ref *from, struct upb_msg_fielddef *f);
+void upb_mm_msgclear_all(struct upb_mm_ref *from);
+
+void upb_mm_arrset(struct upb_mm_ref *from, struct upb_mm_ref *to, uint32_t i);
+
+/* Defined iff upb_field_ismm(f). */
+INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f);
+/* Defined iff upb_elem_ismm(f). */
+INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f);
+
+INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type);
+
+/* These methods are all a bit silly, since all branches of the case compile
+ * to the same thing (which the compiler will recognize), but we do it this way
+ * for full union correctness. */
+INLINE union upb_mmptr upb_mmptr_read(union upb_value_ptr p, upb_mm_ptrtype t)
+{
+ union upb_mmptr val;
+ switch(t) {
+ case UPB_MM_MSG_REF: val.msg = *p.msg; break;
+ case UPB_MM_STR_REF: val.str = *p.str; break;
+ case UPB_MM_ARR_REF: val.arr = *p.arr; break;
+ default: assert(false); val.msg = *p.msg; break; /* Shouldn't happen. */
+ }
+ return val;
+}
+
+INLINE void upb_mmptr_write(union upb_value_ptr p, union upb_mmptr val,
+ upb_mm_ptrtype t)
+{
+ switch(t) {
+ case UPB_MM_MSG_REF: *p.msg = val.msg; break;
+ case UPB_MM_STR_REF: *p.str = val.str; break;
+ case UPB_MM_ARR_REF: *p.arr = val.arr; break;
+ default: assert(false); val.msg = *p.msg; break; /* Shouldn't happen. */
+ }
+}
+
+void upb_array_destroy(struct upb_array *arr);
+void upb_msg_destroy(struct upb_msg *msg);
+
+INLINE void upb_msg_unref(struct upb_msg *msg) {
+ if(upb_mmhead_unref(&msg->mmhead)) upb_msg_destroy(msg);
+}
+
+INLINE void upb_array_unref(struct upb_array *arr) {
+ if(upb_mmhead_unref(&arr->mmhead)) upb_array_destroy(arr);
+}
+
+INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type)
+{
+ switch(type) {
+ case UPB_MM_MSG_REF: upb_msg_unref(p.msg); break;
+ case UPB_MM_STR_REF: upb_string_unref(p.str); break;
+ case UPB_MM_ARR_REF: upb_array_unref(p.arr);
+ }
+}
+
+static struct upb_mmhead *upb_mmhead_addr(union upb_mmptr p, upb_mm_ptrtype t)
+{
+ switch(t) {
+ case UPB_MM_MSG_REF: return &((*p.msg).mmhead);
+ case UPB_MM_STR_REF: return &((*p.str).mmhead);
+ case UPB_MM_ARR_REF: return &((*p.arr).mmhead);
+ default: assert(false); return &((*p.msg).mmhead); /* Shouldn't happen. */
+ }
+}
+
+INLINE void upb_mm_ref(union upb_mmptr p, upb_mm_ptrtype type)
+{
+ upb_mmhead_ref(upb_mmhead_addr(p, type));
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_MM_MSG_H_ */
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 45f889d..80602dd 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -6,8 +6,9 @@
#include <inttypes.h>
#include <stdlib.h>
-#include "descriptor.h"
#include "upb_msg.h"
+#include "descriptor.h"
+#include "upb_mm.h"
#include "upb_parse.h"
#include "upb_serialize.h"
#include "upb_text.h"
@@ -47,7 +48,6 @@ bool upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d,
/* TODO: more complete validation. */
if(!d->set_flags.has.field) return false;
- upb_atomic_refcount_init(&m->refcount, 0);
upb_inttable_init(&m->fields_by_num, d->field->len,
sizeof(struct upb_fieldsbynum_entry));
upb_strtable_init(&m->fields_by_name, d->field->len,
@@ -123,113 +123,43 @@ void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
str_e->f.ref = ref;
}
-/* Simple, one-shot parsing ***************************************************/
-
-static void *upb_msg_new(struct upb_msgdef *md)
-{
- size_t size = md->size + (sizeof(void*) * 2);
- struct upb_msg *msg = malloc(size);
- memset(msg, 0, size);
- msg->def = md;
- return msg;
-}
+/* Parsing. ******************************************************************/
-/* Allocation callbacks. */
-struct upb_array *getarray_cb(
- void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f)
-{
- (void)from_gptr;
- (void)existingval; /* Don't care -- always zero. */
- (void)f;
- return upb_array_new();
-}
+struct upb_msg_parser_frame {
+ struct upb_msg *msg;
+};
-static struct upb_string *getstring_cb(
- void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f,
- bool byref)
-{
- (void)from_gptr;
- (void)existingval; /* Don't care -- always zero. */
- (void)f;
- (void)byref;
- return upb_strnew();
-}
+struct upb_msg_parser {
+ struct upb_stream_parser s;
+ bool merge;
+ bool byref;
+ struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top;
+};
-static struct upb_msg *getmsg_cb(
- void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f)
-{
- (void)from_gptr;
- (void)existingval; /* Don't care -- always zero. */
- return upb_msg_new(f->ref.msg);
-}
+void upb_msg_parser_reset(struct upb_msg_parser *p,
+ struct upb_msg *msg, bool byref);
-struct upb_msg *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s)
-{
- struct upb_msg_parser mp;
- struct upb_msg *msg = upb_msg_new(md);
- upb_msg_parser_reset(&mp, msg, false);
- mp.getarray_cb = getarray_cb;
- mp.getstring_cb = getstring_cb;
- mp.getmsg_cb = getmsg_cb;
- size_t read;
- upb_status_t status = upb_msg_parser_parse(&mp, s->ptr, s->byte_len, &read);
- if(status == UPB_STATUS_OK && read == s->byte_len) {
- return msg;
- } else {
- upb_msg_free(msg);
- return NULL;
- }
-}
+/* Parses protocol buffer data out of data which has length of len. The data
+ * need not be a complete protocol buffer. The number of bytes parsed is
+ * returned in *read, and the next call to upb_msg_parse must supply data that
+ * is *read bytes past data in the logical stream. */
+upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
+ void *data, size_t len, size_t *read);
-/* For simple, one-shot parsing we assume that a dynamic field exists (and
- * needs to be freed) iff its set bit is set. */
-static void free_value(union upb_value_ptr p, struct upb_msg_fielddef *f)
-{
- if(upb_isstring(f)) {
- free((*p.str)->ptr);
- free(*p.str);
- } else if(upb_issubmsg(f)) {
- upb_msg_free(*p.msg);
- }
-}
-void upb_msg_free(struct upb_msg *msg)
-{
- if(!msg) return; /* A very free-like thing to do. */
- struct upb_msgdef *m = msg->def;
- for(unsigned int i = 0; i < m->num_fields; i++) {
- struct upb_msg_fielddef *f = &m->fields[i];
- if(!upb_msg_isset(msg, f)) continue;
- union upb_value_ptr p = upb_msg_getptr(msg, f);
- if(upb_isarray(f)) {
- assert(*p.arr);
- for(upb_arraylen_t j = 0; j < (*p.arr)->len; j++)
- free_value(upb_array_getelementptr(*p.arr, j, f->type), f);
- upb_array_free(*p.arr);
- } else {
- free_value(p, f);
- }
- }
- free(msg);
-}
-
-/* Parsing. ******************************************************************/
/* Helper function that returns a pointer to where the next value for field "f"
* should be stored, taking into account whether f is an array that may need to
* be allocated or resized. */
static union upb_value_ptr get_value_ptr(struct upb_msg *msg,
- struct upb_msg_fielddef *f,
- void **gptr,
- upb_msg_getandref_array_cb_t getarray_cb)
+ struct upb_msg_fielddef *f)
{
union upb_value_ptr p = upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
bool isset = upb_msg_isset(msg, f);
size_t len = isset ? (*p.arr)->len : 0;
- if(!isset) *p.arr = getarray_cb(*gptr, *p.arr, f);
- upb_array_resize(*p.arr, len+1, f->type);
- *gptr = (*p.arr)->gptr;
+ if(!isset) *p.arr = upb_array_new(f);
+ upb_array_resize(*p.arr, len+1);
p = upb_array_getelementptr(*p.arr, len, f->type);
}
return p;
@@ -255,8 +185,7 @@ static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end,
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = user_field_desc;
struct upb_msg *msg = mp->top->msg;
- void *gptr = upb_msg_gptr(msg);
- union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb);
+ union upb_value_ptr p = get_value_ptr(msg, f);
upb_msg_set(msg, f);
UPB_CHECK(upb_parse_value(buf, end, f->type, p, outbuf));
return UPB_STATUS_OK;
@@ -269,21 +198,20 @@ static void str_cb(void *udata, uint8_t *str,
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = udesc;
struct upb_msg *msg = mp->top->msg;
- void *gptr = upb_msg_gptr(msg);
- union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb);
+ union upb_value_ptr p = get_value_ptr(msg, f);
upb_msg_set(msg, f);
if(avail_len != total_len) abort(); /* TODO: support streaming. */
- bool byref = avail_len == total_len && mp->byref;
- *p.str = mp->getstring_cb(gptr, *p.str, f, byref);
- if(byref) {
- upb_strdrop(*p.str);
- (*p.str)->ptr = (char*)str;
- (*p.str)->byte_len = avail_len;
- } else {
- upb_stralloc(*p.str, total_len);
+ //bool byref = avail_len == total_len && mp->byref;
+ *p.str = upb_string_new();
+ //if(byref) {
+ // upb_strdrop(*p.str);
+ // (*p.str)->ptr = (char*)str;
+ // (*p.str)->byte_len = avail_len;
+ //} else {
+ upb_string_resize(*p.str, total_len);
memcpy((*p.str)->ptr, str, avail_len);
(*p.str)->byte_len = avail_len;
- }
+ //}
}
static void submsg_start_cb(void *udata, void *user_field_desc)
@@ -291,22 +219,39 @@ static void submsg_start_cb(void *udata, void *user_field_desc)
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = user_field_desc;
struct upb_msg *oldmsg = mp->top->msg;
- void *gptr = upb_msg_gptr(oldmsg);
- union upb_value_ptr p = get_value_ptr(oldmsg, f, &gptr, mp->getarray_cb);
+ union upb_value_ptr p = get_value_ptr(oldmsg, f);
+ struct upb_msg **submsg = p.msg;
+ //if(*submsg && upb_mmhead_only(&((*submsg)->mmhead))) {
+ // /* We can reuse the existing submsg. */
+ //} else {
+ *submsg = upb_msg_new(f->ref.msg);
+ //}
+ upb_msg_clear(*submsg);
upb_msg_set(oldmsg, f);
- *p.msg = mp->getmsg_cb(gptr, *p.msg, f);
mp->top++;
- mp->top->msg = *p.msg;
+ mp->top->msg = *submsg;
}
static void submsg_end_cb(void *udata)
{
struct upb_msg_parser *mp = udata;
+ struct upb_msg *msg = mp->top->msg;
+ /* TODO: free any remaining dynamic storage that was not reused. */
+ (void)msg;
mp->top--;
}
/* Externally-visible functions for the msg parser. */
+upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len)
+{
+ struct upb_msg_parser mp;
+ upb_msg_parser_reset(&mp, msg, false);
+ size_t read;
+ upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read);
+ return ret;
+}
+
void upb_msg_parser_reset(struct upb_msg_parser *s, struct upb_msg *msg, bool byref)
{
upb_stream_parser_reset(&s->s, s);
@@ -592,51 +537,3 @@ bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive)
}
return true;
}
-
-
-static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
- struct upb_msg_fielddef *f,
- google_protobuf_FieldDescriptorProto *fd,
- FILE *stream);
-
-static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg,
- FILE *stream)
-{
- struct upb_msgdef *m = msg->def;
- for(uint32_t i = 0; i < m->num_fields; i++) {
- struct upb_msg_fielddef *f = &m->fields[i];
- google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
- if(!upb_msg_isset(msg, f)) continue;
- union upb_value_ptr p = upb_msg_getptr(msg, f);
- if(upb_isarray(f)) {
- struct upb_array *arr = *p.arr;
- for(uint32_t j = 0; j < arr->len; j++) {
- union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
- printval(printer, elem_p, f, fd, stream);
- }
- } else {
- printval(printer, p, f, fd, stream);
- }
- }
-}
-
-static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
- struct upb_msg_fielddef *f,
- google_protobuf_FieldDescriptorProto *fd,
- FILE *stream)
-{
- if(upb_issubmsg(f)) {
- upb_text_push(printer, fd->name, stream);
- printmsg(printer, *p.msg, stream);
- upb_text_pop(printer, stream);
- } else {
- upb_text_printfield(printer, fd->name, f->type, upb_deref(p, f->type), stream);
- }
-}
-
-void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream)
-{
- struct upb_text_printer printer;
- upb_text_printer_init(&printer, single_line);
- printmsg(&printer, msg, stream);
-}
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 9dc1827..abec479 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -52,10 +52,10 @@
#include <stdbool.h>
#include <stdint.h>
+#include <stddef.h>
+#include "descriptor.h"
#include "upb.h"
-#include "upb_atomic.h"
-#include "upb_context.h"
#include "upb_parse.h"
#include "upb_table.h"
@@ -66,10 +66,11 @@ extern "C" {
/* Message definition. ********************************************************/
struct upb_msg_fielddef;
+struct upb_context;
/* Structure that describes a single .proto message type. */
struct upb_msgdef {
- upb_atomic_refcount_t refcount;
struct upb_context *context;
+ struct upb_msg *default_msg; /* Message with all default values set. */
struct google_protobuf_DescriptorProto *descriptor;
struct upb_string fqname; /* Fully qualified. */
size_t size;
@@ -82,7 +83,6 @@ struct upb_msgdef {
struct google_protobuf_FieldDescriptorProto **field_descriptors;
};
-
/* Structure that describes a single field in a message. This structure is very
* consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
* because copies of this struct are in the hash table that is read in the
@@ -96,14 +96,6 @@ struct upb_msg_fielddef {
upb_label_t label;
};
-INLINE void upb_msgdef_ref(struct upb_msgdef *m) {
- if(upb_atomic_ref(&m->refcount)) upb_context_ref(m->context);
-}
-
-INLINE void upb_msgdef_unref(struct upb_msgdef *m) {
- if(upb_atomic_unref(&m->refcount)) upb_context_unref(m->context);
-}
-
INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) {
return upb_issubmsgtype(f->type);
}
@@ -114,6 +106,29 @@ INLINE bool upb_isarray(struct upb_msg_fielddef *f) {
return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
}
+INLINE bool upb_field_ismm(struct upb_msg_fielddef *f) {
+ return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
+}
+
+INLINE bool upb_elem_ismm(struct upb_msg_fielddef *f) {
+ return upb_isstring(f) || upb_issubmsg(f);
+}
+
+/* Defined iff upb_field_ismm(f). */
+INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f) {
+ if(upb_isarray(f)) return UPB_MM_ARR_REF;
+ else if(upb_isstring(f)) return UPB_MM_STR_REF;
+ else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
+ else return -1;
+}
+
+/* Defined iff upb_elem_ismm(f). */
+INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f) {
+ if(upb_isstring(f)) return UPB_MM_STR_REF;
+ else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
+ else return -1;
+}
+
/* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */
INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
struct upb_msg_fielddef *f, struct upb_msgdef *m) {
@@ -122,14 +137,15 @@ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
/* Message structure. *********************************************************/
-struct upb_msg {
- struct upb_msgdef *def;
- void *gptr; /* Generic pointer for use by subclasses. */
- uint8_t data[1];
-};
-
-INLINE void *upb_msg_gptr(struct upb_msg *msg) {
- return msg->gptr;
+/* Constructs a new msg corresponding to the given msgdef, and having one
+ * counted reference. */
+INLINE struct upb_msg *upb_msg_new(struct upb_msgdef *md) {
+ size_t size = md->size + offsetof(struct upb_msg, data);
+ struct upb_msg *msg = malloc(size);
+ memset(msg, 0, size);
+ upb_mmhead_init(&msg->mmhead);
+ msg->def = md;
+ return msg;
}
/* Field access. **************************************************************/
@@ -146,12 +162,6 @@ INLINE union upb_value_ptr upb_msg_getptr(struct upb_msg *msg,
return p;
}
-/* Returns a a specific field in a message. */
-INLINE union upb_value upb_msg_get(struct upb_msg *msg,
- struct upb_msg_fielddef *f) {
- return upb_deref(upb_msg_getptr(msg, f), f->type);
-}
-
/* "Set" flag reading and writing. *******************************************/
/* All upb code and code using upb should guarantee that the set flags are
@@ -244,85 +254,10 @@ INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m,
}
-/* Simple, one-shot parsing ***************************************************/
-
-/* A simple interface for parsing into a newly-allocated message. This
- * interface should only be used when the message will be read-only with
- * respect to memory management (eg. won't add or remove internal references to
- * dynamic memory). For more flexible (but also more complicated) interfaces,
- * see below and in upb_mm_msg.h. */
-
-/* Parses the protobuf in s (which is expected to be complete) and allocates
- * new message data to hold it. If byref is set, strings in the returned
- * upb_msg will reference s instead of copying from it, but this requires that
- * s will live for as long as the returned message does. */
-struct upb_msg *upb_msg_parsenew(struct upb_msgdef *m, struct upb_string *s);
-
-/* This function should be used to free messages that were parsed with
- * upb_msg_parsenew. It will free the message appropriately (including all
- * submessages). */
-void upb_msg_free(struct upb_msg *msg);
-
-
-/* Parsing with (re)allocation callbacks. *************************************/
-
-/* This interface parses protocol buffers into upb_msgs, but allows the client
- * to supply allocation callbacks whenever the parser needs to obtain a string,
- * array, or submsg (a "dynamic field"). If the parser sees that a dynamic
- * field is already present (its "set bit" is set) it will use that, resizing
- * it if necessary in the case of an array. Otherwise it will call the
- * allocation callback to obtain one.
- *
- * This may seem trivial (since nearly all clients will use malloc and free for
- * memory management), but the allocation callback can be used for more than
- * just allocation. If we are parsing data into an existing upb_msg, the
- * allocation callback can examine any existing memory that is allocated for
- * the dynamic field and determine whether it can reuse it. It can also
- * perform memory management like refing the new field.
- *
- * This parser is layered on top of the event-based parser in upb_parse.h. The
- * parser is upb_mm_msg.h is layered on top of this parser.
- *
- * This parser is fully streaming-capable. */
-
-/* Should return an initialized array. */
-typedef struct upb_array *(*upb_msg_getandref_array_cb_t)(
- void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f);
-
-/* Callback to allocate a string. If byref is true, the client should assume
- * that the string will be referencing the input data. */
-typedef struct upb_string *(*upb_msg_getandref_string_cb_t)(
- void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f,
- bool byref);
-
-/* Should return a cleared message. */
-typedef struct upb_msg *(*upb_msg_getandref_msg_cb_t)(
- void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f);
-
-struct upb_msg_parser_frame {
- struct upb_msg *msg;
-};
-
-struct upb_msg_parser {
- struct upb_stream_parser s;
- bool merge;
- bool byref;
- struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top;
- upb_msg_getandref_array_cb_t getarray_cb;
- upb_msg_getandref_string_cb_t getstring_cb;
- upb_msg_getandref_msg_cb_t getmsg_cb;
-};
-
-void upb_msg_parser_reset(struct upb_msg_parser *p,
- struct upb_msg *msg, bool byref);
-
-/* Parses protocol buffer data out of data which has length of len. The data
- * need not be a complete protocol buffer. The number of bytes parsed is
- * returned in *read, and the next call to upb_msg_parse must supply data that
- * is *read bytes past data in the logical stream. */
-upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
- void *data, size_t len, size_t *read);
+/* Parsing ********************************************************************/
+/* TODO: a stream parser. */
+upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len);
/* Serialization *************************************************************/
@@ -336,7 +271,7 @@ upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
struct upb_msgsizes;
/* Initialize/free a upb_msgsizes for the given message. */
-void upb_msgsizes_init(struct upb_msgsizes *sizes);
+struct upb_msgsizes *upb_msgsizes_new(void);
void upb_msgsizes_free(struct upb_msgsizes *sizes);
/* Given a previously initialized sizes, recurse over the message and store its
@@ -366,6 +301,10 @@ void upb_msg_serialize_init(struct upb_msg_serialize_state *s,
upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
void *buf, size_t len, size_t *written);
+upb_status_t upb_msg_serialize_all(struct upb_msg *msg,
+ struct upb_msgsizes *sizes,
+ void *buf);
+
/* Text dump *****************************************************************/
bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive);
@@ -397,7 +336,8 @@ void upb_msgdef_free(struct upb_msgdef *m);
/* Sort the given field descriptors in-place, according to what we think is an
* optimal ordering of fields. This can change from upb release to upb
* release. */
-void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num);
+void upb_msgdef_sortfds(struct google_protobuf_FieldDescriptorProto **fds,
+ size_t num);
/* Clients use this function on a previously initialized upb_msgdef to resolve
* the "ref" field in the upb_msg_fielddef. Since messages can refer to each
diff --git a/src/upb_parse.c b/src/upb_parse.c
index b7f3832..7c1ad66 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -11,7 +11,7 @@
/* May want to move this to upb.c if enough other things warrant it. */
#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, UPB_STRLIT(#ctype)},
+#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype},
struct upb_type_info upb_type_info[] = {
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE, UPB_WIRE_TYPE_64BIT, double)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT, UPB_WIRE_TYPE_32BIT, float)
diff --git a/src/upb_parse.h b/src/upb_parse.h
index de4cb2c..1454dd5 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -21,16 +21,6 @@
extern "C" {
#endif
-INLINE bool upb_issubmsgtype(upb_field_type_t type) {
- return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ||
- type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE;
-}
-
-INLINE bool upb_isstringtype(upb_field_type_t type) {
- return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
- type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
-}
-
/* High-level parsing interface. **********************************************/
/* The general scheme is that the client registers callbacks that will be
diff --git a/src/upb_string.c b/src/upb_string.c
index 7754936..54df4f1 100644
--- a/src/upb_string.c
+++ b/src/upb_string.c
@@ -7,19 +7,20 @@
#include <stdio.h>
#include "upb_string.h"
-bool upb_strreadfile(const char *filename, struct upb_string *data) {
+struct upb_string *upb_strreadfile(const char *filename) {
FILE *f = fopen(filename, "rb");
if(!f) return false;
- if(fseek(f, 0, SEEK_END) != 0) return false;
+ if(fseek(f, 0, SEEK_END) != 0) goto error;
long size = ftell(f);
- if(size < 0) return false;
- if(fseek(f, 0, SEEK_SET) != 0) return false;
- data->byte_len = size;
- upb_stralloc(data, data->byte_len);
- if(fread(data->ptr, size, 1, f) != 1) {
- free(data->ptr);
- return false;
- }
+ if(size < 0) goto error;
+ if(fseek(f, 0, SEEK_SET) != 0) goto error;
+ struct upb_string *s = upb_string_new();
+ upb_string_resize(s, size);
+ if(fread(s->ptr, size, 1, f) != 1) goto error;
fclose(f);
- return true;
+ return s;
+
+error:
+ fclose(f);
+ return NULL;
}
diff --git a/src/upb_string.h b/src/upb_string.h
index 9740a0b..aa62575 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -32,48 +32,35 @@ extern "C" {
#include <stdlib.h>
#include <string.h>
-/* inline if possible, emit standalone code if required. */
-#ifndef INLINE
-#define INLINE static inline
-#endif
-
-#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
-#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+#include "upb_struct.h"
-struct upb_string {
- /* We expect the data to be 8-bit clean (uint8_t), but char* is such an
- * ingrained convention that we follow it. */
- char *ptr;
- uint32_t byte_len;
- uint32_t byte_size; /* How many bytes of ptr we own. */
-};
+/* Allocation/Deallocation/Resizing. ******************************************/
-INLINE void upb_strinit(struct upb_string *str)
+INLINE struct upb_string *upb_string_new(void)
{
+ struct upb_string *str = (struct upb_string*)malloc(sizeof(*str));
+ upb_mmhead_init(&str->mmhead);
str->ptr = NULL;
str->byte_len = 0;
str->byte_size = 0;
+ return str;
}
-INLINE void upb_struninit(struct upb_string *str)
-{
- if(str->byte_size) free(str->ptr);
-}
-
-INLINE struct upb_string *upb_strnew(void)
+/* For internal use only. */
+INLINE void upb_string_destroy(struct upb_string *str)
{
- struct upb_string *str = (struct upb_string*)malloc(sizeof(*str));
- upb_strinit(str);
- return str;
+ if(str->byte_size != 0) free(str->ptr);
+ free(str);
}
-INLINE void upb_strfree(struct upb_string *str)
+INLINE void upb_string_unref(struct upb_string *str)
{
- upb_struninit(str);
- free(str);
+ if(upb_mmhead_unref(&str->mmhead)) upb_string_destroy(str);
}
-INLINE void upb_stralloc(struct upb_string *str, uint32_t size)
+/* Resizes the string to size, reallocating if necessary. Does not preserve
+ * existing data. */
+INLINE void upb_string_resize(struct upb_string *str, uint32_t size)
{
if(str->byte_size < size) {
/* Need to resize. */
@@ -81,12 +68,10 @@ INLINE void upb_stralloc(struct upb_string *str, uint32_t size)
void *oldptr = str->byte_size == 0 ? NULL : str->ptr;
str->ptr = (char*)realloc(oldptr, str->byte_size);
}
+ str->byte_len = size;
}
-INLINE void upb_strdrop(struct upb_string *str)
-{
- upb_struninit(str);
-}
+/* Library functions. *********************************************************/
INLINE bool upb_streql(struct upb_string *s1, struct upb_string *s2) {
return s1->byte_len == s2->byte_len &&
@@ -101,26 +86,26 @@ INLINE int upb_strcmp(struct upb_string *s1, struct upb_string *s2) {
INLINE void upb_strcpy(struct upb_string *dest, struct upb_string *src) {
dest->byte_len = src->byte_len;
- upb_stralloc(dest, dest->byte_len);
+ upb_string_resize(dest, dest->byte_len);
memcpy(dest->ptr, src->ptr, src->byte_len);
}
INLINE struct upb_string *upb_strdup(struct upb_string *s) {
- struct upb_string *copy = upb_strnew();
+ struct upb_string *copy = upb_string_new();
upb_strcpy(copy, s);
return copy;
}
INLINE struct upb_string *upb_strdupc(char *s) {
- struct upb_string *copy = upb_strnew();
+ struct upb_string *copy = upb_string_new();
copy->byte_len = strlen(s);
- upb_stralloc(copy, copy->byte_len);
+ upb_string_resize(copy, copy->byte_len);
memcpy(copy->ptr, s, copy->byte_len);
return copy;
}
/* Reads an entire file into a newly-allocated string. */
-bool upb_strreadfile(const char *filename, struct upb_string *data);
+struct upb_string *upb_strreadfile(const char *filename);
/* Allows defining upb_strings as literals, ie:
* struct upb_string str = UPB_STRLIT("Hello, World!\n");
diff --git a/src/upb_struct.h b/src/upb_struct.h
new file mode 100644
index 0000000..9c1bb2e
--- /dev/null
+++ b/src/upb_struct.h
@@ -0,0 +1,119 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
+ *
+ * This file defines the in-memory format for messages, arrays, and strings
+ * (which are the three dynamically-allocated structures that make up all
+ * protobufs). */
+
+#ifndef UPB_STRUCT_H
+#define UPB_STRUCT_H
+
+#include "upb.h"
+
+/* mmhead -- this is a "base class" for strings, arrays, and messages ********/
+
+struct upb_mm_ref;
+struct upb_mmhead {
+ struct upb_mm_ref *refs; /* Head of linked list. */
+ uint32_t refcount;
+};
+
+INLINE void upb_mmhead_init(struct upb_mmhead *head) {
+ head->refs = NULL;
+ head->refcount = 1;
+}
+
+INLINE bool upb_mmhead_norefs(struct upb_mmhead *head) {
+ return head->refcount == 0 && head->refs == NULL;
+}
+
+INLINE bool upb_mmhead_unref(struct upb_mmhead *head) {
+ head->refcount--;
+ return upb_mmhead_norefs(head);
+}
+
+INLINE void upb_mmhead_ref(struct upb_mmhead *head) {
+ head->refcount++;
+}
+
+/* Structures for msg, string, and array. *************************************/
+
+/* These are all self describing. */
+
+struct upb_msgdef;
+struct upb_msg_fielddef;
+
+struct upb_msg {
+ struct upb_mmhead mmhead;
+ struct upb_msgdef *def;
+ uint8_t data[1];
+};
+
+typedef uint32_t upb_arraylen_t; /* can be at most 2**32 elements long. */
+struct upb_array {
+ struct upb_mmhead mmhead;
+ struct upb_msg_fielddef *fielddef; /* Defines the type of the array. */
+ union upb_value_ptr elements;
+ upb_arraylen_t len; /* Number of elements in "elements". */
+ upb_arraylen_t size; /* Memory we own (0 if by reference). */
+};
+
+struct upb_string {
+ struct upb_mmhead mmhead;
+ /* We expect the data to be 8-bit clean (uint8_t), but char* is such an
+ * ingrained convention that we follow it. */
+ char *ptr;
+ uint32_t byte_len;
+ uint32_t byte_size; /* How many bytes of ptr we own, 0 if we reference. */
+};
+
+/* Type-specific overlays on upb_array. ***************************************/
+
+#define UPB_DEFINE_ARRAY_TYPE(name, type) \
+ struct name ## _array { \
+ struct upb_mmhead mmhead; \
+ struct upb_msg_fielddef *fielddef; \
+ type elements; \
+ upb_arraylen_t len; \
+ upb_arraylen_t size; \
+ };
+
+UPB_DEFINE_ARRAY_TYPE(upb_double, double)
+UPB_DEFINE_ARRAY_TYPE(upb_float, float)
+UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t)
+UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t)
+UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t)
+UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
+UPB_DEFINE_ARRAY_TYPE(upb_bool, bool)
+UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
+UPB_DEFINE_ARRAY_TYPE(upb_msg, void*)
+
+/* Defines an array of a specific message type (an overlay of upb_array). */
+#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
+#define UPB_DEFINE_MSG_ARRAY(msg_type) \
+ UPB_MSG_ARRAY(msg_type) { \
+ struct upb_mmhead mmhead; \
+ struct upb_msg_fielddef *fielddef; \
+ msg_type **elements; \
+ upb_arraylen_t len; \
+ upb_arraylen_t size; \
+ };
+
+/* mmptr -- a pointer which polymorphically points to one of the above. *******/
+
+union upb_mmptr {
+ struct upb_msg *msg;
+ struct upb_array *arr;
+ struct upb_string *str;
+};
+
+enum {
+ UPB_MM_MSG_REF,
+ UPB_MM_STR_REF,
+ UPB_MM_ARR_REF
+};
+typedef uint8_t upb_mm_ptrtype;
+
+#endif
diff --git a/src/upb_text.c b/src/upb_text.c
index c9aad7e..6d43152 100644
--- a/src/upb_text.c
+++ b/src/upb_text.c
@@ -5,8 +5,11 @@
*/
#include <inttypes.h>
-#include "upb_text.h"
#include "descriptor.h"
+#include "upb_text.h"
+#include "upb_string.h"
+#include "upb_msg.h"
+#include "upb_array.h"
void upb_text_printval(upb_field_type_t type, union upb_value val, FILE *file)
{
@@ -78,3 +81,52 @@ void upb_text_pop(struct upb_text_printer *p,
print_indent(p, stream);
fprintf(stream, "}\n");
}
+
+static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
+ struct upb_msg_fielddef *f,
+ google_protobuf_FieldDescriptorProto *fd,
+ FILE *stream);
+
+static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg,
+ FILE *stream)
+{
+ struct upb_msgdef *m = msg->def;
+ for(uint32_t i = 0; i < m->num_fields; i++) {
+ struct upb_msg_fielddef *f = &m->fields[i];
+ google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
+ if(!upb_msg_isset(msg, f)) continue;
+ union upb_value_ptr p = upb_msg_getptr(msg, f);
+ if(upb_isarray(f)) {
+ struct upb_array *arr = *p.arr;
+ for(uint32_t j = 0; j < arr->len; j++) {
+ union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
+ printval(printer, elem_p, f, fd, stream);
+ }
+ } else {
+ printval(printer, p, f, fd, stream);
+ }
+ }
+}
+
+static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
+ struct upb_msg_fielddef *f,
+ google_protobuf_FieldDescriptorProto *fd,
+ FILE *stream)
+{
+ if(upb_issubmsg(f)) {
+ upb_text_push(printer, fd->name, stream);
+ printmsg(printer, *p.msg, stream);
+ upb_text_pop(printer, stream);
+ } else {
+ upb_text_printfield(printer, fd->name, f->type, upb_value_read(p, f->type), stream);
+ }
+}
+
+
+void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream)
+{
+ struct upb_text_printer printer;
+ upb_text_printer_init(&printer, single_line);
+ printmsg(&printer, msg, stream);
+}
+
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback