From 209dce5eb08709bfb5b21e19289b3814619ca6cc Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 7 Jul 2010 10:39:08 -0700 Subject: Defined the function for getting a upb_symtab for descriptor.proto. --- src/upb.c | 38 ++-- src/upb_data.c | 500 -------------------------------------------------- src/upb_data.h | 552 -------------------------------------------------------- src/upb_def.c | 27 ++- src/upb_def.h | 5 + src/upb_table.c | 2 +- 6 files changed, 50 insertions(+), 1074 deletions(-) delete mode 100644 src/upb_data.c delete mode 100644 src/upb_data.h (limited to 'src') diff --git a/src/upb.c b/src/upb.c index 189dfe4..3c5efe8 100644 --- a/src/upb.c +++ b/src/upb.c @@ -18,25 +18,25 @@ #ctype}, upb_type_info upb_types[] = { - {0, 0, 0, ""} // There is no type 0. - TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1), // DOUBLE - TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1), // FLOAT - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1), // INT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1), // UINT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1), // INT32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1), // FIXED64 - TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1), // FIXED32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1), // BOOL - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1), // STRING - TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0), // GROUP - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1), // MESSAGE - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1), // BYTES - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1), // UINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1), // ENUM - TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1), // SFIXED32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1), // SFIXED64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1), // SINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1), // SINT64 + {0, 0, 0, 0, ""}, // There is no type 0. + TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1) // DOUBLE + TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1) // FLOAT + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // INT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1) // UINT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // INT32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1) // FIXED64 + TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1) // FIXED32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1) // BOOL + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // STRING + TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0) // GROUP + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // MESSAGE + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // BYTES + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // UINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // ENUM + TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1) // SFIXED32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1) // SFIXED64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // SINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // SINT64 }; void upb_seterr(upb_status *status, enum upb_status_code code, diff --git a/src/upb_data.c b/src/upb_data.c deleted file mode 100644 index 3b4f7ab..0000000 --- a/src/upb_data.c +++ /dev/null @@ -1,500 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include "upb_data.h" -#include "upb_decoder.h" -#include "upb_def.h" - -static uint32_t round_up_to_pow2(uint32_t v) -{ - /* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -/* upb_data *******************************************************************/ - -static void data_elem_unref(upb_valueptr p, upb_fielddef *f) { - if(upb_issubmsg(f)) { - upb_msg_unref(*p.msg, upb_downcast_msgdef(f->def)); - } else if(upb_isstring(f)) { - upb_string_unref(*p.str); - } else { - assert(false); - } -} - -static void data_unref(upb_valueptr p, upb_fielddef *f) { - if(upb_isarray(f)) { - upb_array_unref(*p.arr, f); - } else { - data_elem_unref(p, f); - } -} - -INLINE void data_init(upb_data *d, int flags) { - d->v = REFCOUNT_ONE | flags; -} - -static void check_not_frozen(upb_data *d) { - // On one hand I am reluctant to put abort() calls in a low-level library - // that are enabled in a production build. On the other hand, this is a bug - // in the client code that we cannot recover from, and it seems better to get - // the error here than later. - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) abort(); -} - - -/* upb_string *******************************************************************/ - -void _upb_string_setptr(upb_strptr s, char *ptr) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - s.refcounted->ptr = ptr; - else - s.norefcount->ptr = ptr; -} - -static void _upb_string_set_bytelen(upb_strptr s, upb_strlen_t newlen) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) { - s.refcounted->byte_len = newlen; - } else { - s.norefcount->byte_len = newlen; - } -} - -upb_strptr upb_string_new() { - upb_strptr s; - s.refcounted = malloc(sizeof(struct upb_refcounted_string)); - data_init(s.base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); - s.refcounted->byte_size = 0; - s.refcounted->byte_len = 0; - s.refcounted->ptr = NULL; - return s; -} - -static upb_strlen_t string_get_bytesize(upb_strptr s) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) { - return s.refcounted->byte_size; - } else { - return (s.norefcount->byte_size_and_flags & 0xFFFFFFF8) >> 3; - } -} - -static void string_set_bytesize(upb_strptr s, upb_strlen_t newsize) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) { - s.refcounted->byte_size = newsize; - } else { - s.norefcount->byte_size_and_flags &= 0x7; - s.norefcount->byte_size_and_flags |= (newsize << 3); - } -} - -void _upb_string_free(upb_strptr s) -{ - if(string_get_bytesize(s) != 0) free((void*)upb_string_getrobuf(s)); - free(s.base); -} - -void upb_string_resize(upb_strptr s, upb_strlen_t byte_len) { - check_not_frozen(s.base); - if(string_get_bytesize(s) < byte_len) { - // Need to resize. - size_t new_byte_size = round_up_to_pow2(byte_len); - _upb_string_setptr(s, realloc(_upb_string_getptr(s), new_byte_size)); - string_set_bytesize(s, new_byte_size); - } - _upb_string_set_bytelen(s, byte_len); -} - -upb_strptr upb_string_getref(upb_strptr s, int ref_flags) { - if(_upb_data_incref(s.base, ref_flags)) return s; - upb_strptr copy = upb_strdup(s); - if(ref_flags == UPB_REF_FROZEN) - upb_data_setflag(copy.base, UPB_DATA_FROZEN); - return copy; -} - -upb_strptr upb_strreadfile(const char *filename) { - FILE *f = fopen(filename, "rb"); - if(!f) return UPB_STRING_NULL; - if(fseek(f, 0, SEEK_END) != 0) goto error; - long size = ftell(f); - if(size < 0) goto error; - if(fseek(f, 0, SEEK_SET) != 0) goto error; - upb_strptr s = upb_string_new(); - char *buf = upb_string_getrwbuf(s, size); - if(fread(buf, size, 1, f) != 1) goto error; - fclose(f); - return s; - -error: - fclose(f); - return UPB_STRING_NULL; -} - -upb_strptr upb_strdupc(const char *src) { - upb_strptr copy = upb_string_new(); - upb_strlen_t len = strlen(src); - char *buf = upb_string_getrwbuf(copy, len); - memcpy(buf, src, len); - return copy; -} - -void upb_strcat(upb_strptr s, upb_strptr append) { - upb_strlen_t s_len = upb_strlen(s); - upb_strlen_t append_len = upb_strlen(append); - upb_strlen_t newlen = s_len + append_len; - memcpy(upb_string_getrwbuf(s, newlen) + s_len, - upb_string_getrobuf(append), append_len); -} - -upb_strptr upb_strslice(upb_strptr s, int offset, int len) { - upb_strptr slice = upb_string_new(); - len = UPB_MIN((upb_strlen_t)len, upb_strlen(s) - (upb_strlen_t)offset); - memcpy(upb_string_getrwbuf(slice, len), upb_string_getrobuf(s) + offset, len); - return slice; -} - -upb_strptr upb_strdup(upb_strptr s) { - upb_strptr copy = upb_string_new(); - upb_strcpy(copy, s); - return copy; -} - -int upb_strcmp(upb_strptr s1, upb_strptr s2) { - upb_strlen_t common_length = UPB_MIN(upb_strlen(s1), upb_strlen(s2)); - int common_diff = memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), - common_length); - return common_diff == - 0 ? ((int)upb_strlen(s1) - (int)upb_strlen(s2)) : common_diff; -} - - -/* upb_array ******************************************************************/ - -static void _upb_array_setptr(upb_arrayptr a, void *ptr) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - a.refcounted->elements._void = ptr; - else - a.norefcount->elements._void = ptr; -} - -static void _upb_array_setlen(upb_arrayptr a, upb_strlen_t newlen) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) { - a.refcounted->len = newlen; - } else { - a.norefcount->len = newlen; - } -} - -upb_arrayptr upb_array_new() { - upb_arrayptr a; - a.refcounted = malloc(sizeof(struct upb_refcounted_array)); - data_init(a.base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); - a.refcounted->size = 0; - a.refcounted->len = 0; - a.refcounted->elements._void = NULL; - return a; -} - -// ONLY handles refcounted arrays for the moment. -void _upb_array_free(upb_arrayptr a, upb_fielddef *f) -{ - if(upb_elem_ismm(f)) { - for(upb_arraylen_t i = 0; i < a.refcounted->size; i++) { - upb_valueptr p = _upb_array_getptr(a, f, i); - if(!*p.data) continue; - data_elem_unref(p, f); - } - } - if(a.refcounted->size != 0) free(a.refcounted->elements._void); - free(a.refcounted); -} - -static upb_arraylen_t array_get_size(upb_arrayptr a) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) { - return a.refcounted->size; - } else { - return (a.norefcount->base.v & 0xFFFFFFF8) >> 3; - } -} - -static void array_set_size(upb_arrayptr a, upb_arraylen_t newsize) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) { - a.refcounted->size = newsize; - } else { - a.norefcount->base.v &= 0x7; - a.norefcount->base.v |= (newsize << 3); - } -} - -void upb_array_resize(upb_arrayptr a, upb_fielddef *f, upb_strlen_t len) { - check_not_frozen(a.base); - size_t type_size = upb_types[f->type].size; - upb_arraylen_t old_size = array_get_size(a); - if(old_size < len) { - // Need to resize. - size_t new_size = round_up_to_pow2(len); - _upb_array_setptr(a, realloc(_upb_array_getptr_raw(a, 0, 0)._void, new_size * type_size)); - array_set_size(a, new_size); - memset(_upb_array_getptr_raw(a, old_size, type_size)._void, - 0, - (new_size - old_size) * type_size); - } - _upb_array_setlen(a, len); -} - - -/* upb_msg ********************************************************************/ - -static void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { - msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); -} - -upb_msg *upb_msg_new(upb_msgdef *md) { - upb_msg *msg = malloc(md->size); - memset(msg, 0, md->size); - data_init(&msg->base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); - upb_def_ref(UPB_UPCAST(md)); - return msg; -} - -// ONLY handles refcounted messages for the moment. -void _upb_msg_free(upb_msg *msg, upb_msgdef *md) -{ - for(int i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[i]; - upb_valueptr p = _upb_msg_getptr(msg, f); - if(!upb_field_ismm(f) || !*p.data) continue; - data_unref(p, f); - } - upb_def_unref(UPB_UPCAST(md)); - free(msg); -} - -void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status) -{ - upb_decoder *d = upb_decoder_new(md); - upb_msgsink *s = upb_msgsink_new(md); - - upb_msgsink_reset(s, msg); - upb_decoder_reset(d, upb_msgsink_sink(s)); - upb_msg_clear(msg, md); - upb_decoder_decode(d, str, status); - - upb_decoder_free(d); - upb_msgsink_free(s); -} - -#if 0 -void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status) -{ - upb_sizebuilder *sb = upb_sizebuilder_new(md); - upb_encoder *e = upb_encoder_new(md); - upb_strsink *sink = upb_strsink_new(); - - // Get sizes. We could avoid performing this step in some cases by having a - // bool in the msgdef indicating whether it or any of its children have - // submessages in the def (groups don't count). - upb_sizebuilder_reset(sb); - upb_msgsrc_produce(msg, md, upb_sizebuilder_sink(sb), true); - - upb_strsink_reset(); - upb_encoder_reset(e, sb, sink); - upb_msgsrc_produce(msg, md, sink, false); -} -#endif - -/* upb_msgsrc ****************************************************************/ - -static void _upb_msgsrc_produceval(upb_value v, upb_fielddef *f, upb_sink *sink, - bool reverse, upb_status *status) -{ - // TODO: We need to check status for failure, but how often? - if(upb_issubmsg(f)) { - upb_msgdef *md = upb_downcast_msgdef(f->def); - upb_sink_onstart(sink, f, status); - upb_msgsrc_produce(v.msg, md, sink, reverse, status); - upb_sink_onend(sink, f, status); - } else if(upb_isstring(f)) { - upb_sink_onstr(sink, f, v.str, 0, upb_strlen(v.str), status); - } else { - upb_sink_onvalue(sink, f, v, status); - } -} - -void upb_msgsrc_produce(upb_msg *msg, upb_msgdef *md, upb_sink *sink, - bool reverse, upb_status *status) -{ - for(int i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[reverse ? md->num_fields - i - 1 : i]; - if(!upb_msg_has(msg, f)) continue; - upb_value v = upb_msg_get(msg, f); - if(upb_isarray(f)) { - upb_arrayptr arr = v.arr; - upb_arraylen_t len = upb_array_len(arr); - for(upb_arraylen_t j = 0; j < upb_array_len(arr); j++) { - upb_value elem = upb_array_get(arr, f, reverse ? len - j - 1 : j); - _upb_msgsrc_produceval(elem, f, sink, reverse, status); - } - } else { - _upb_msgsrc_produceval(v, f, sink, reverse, status); - } - } -} - - -/* upb_msgsink ***************************************************************/ - -typedef struct { - upb_msg *msg; - upb_msgdef *md; -} upb_msgsink_frame; - -struct upb_msgsink { - upb_sink base; - upb_msgdef *toplevel_msgdef; - upb_msgsink_frame stack[UPB_MAX_NESTING], *top; -}; - -/* Helper function that returns a pointer to where the next value for field "f" - * should be stored, taking into account whether f is an array that may need to - * be allocated or resized. */ -static upb_valueptr get_valueptr(upb_msg *msg, upb_fielddef *f) -{ - upb_valueptr p = _upb_msg_getptr(msg, f); - if(upb_isarray(f)) { - if(!upb_msg_has(msg, f)) { - if(upb_array_isnull(*p.arr) || !upb_data_only(*p.data)) { - if(!upb_array_isnull(*p.arr)) - upb_array_unref(*p.arr, f); - *p.arr = upb_array_new(); - } - upb_array_truncate(*p.arr); - upb_msg_sethas(msg, f); - } else { - assert(!upb_array_isnull(*p.arr)); - } - upb_arraylen_t oldlen = upb_array_len(*p.arr); - upb_array_resize(*p.arr, f, oldlen + 1); - p = _upb_array_getptr(*p.arr, f, oldlen); - } - return p; -} - -// Callbacks for upb_sink. -// TODO: implement these in terms of public interfaces. - -static upb_sink_status _upb_msgsink_valuecb(upb_sink *s, upb_fielddef *f, - upb_value val, upb_status *status) -{ - (void)status; // No detectable errors can occur. - upb_msgsink *ms = (upb_msgsink*)s; - upb_msg *msg = ms->top->msg; - upb_valueptr p = get_valueptr(msg, f); - upb_msg_sethas(msg, f); - upb_value_write(p, val, f->type); - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_msgsink_strcb(upb_sink *s, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - (void)status; // No detectable errors can occur. - upb_msgsink *ms = (upb_msgsink*)s; - upb_msg *msg = ms->top->msg; - upb_valueptr p = get_valueptr(msg, f); - upb_msg_sethas(msg, f); - if(end > upb_strlen(str)) abort(); /* TODO: support streaming. */ - if(upb_string_isnull(*p.str) || !upb_data_only(*p.data)) { - if(!upb_string_isnull(*p.str)) - upb_string_unref(*p.str); - *p.str = upb_string_new(); - } - upb_strcpylen(*p.str, upb_string_getrobuf(str) + start, end - start); - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_msgsink_startcb(upb_sink *s, upb_fielddef *f, - upb_status *status) -{ - (void)status; // No detectable errors can occur. - upb_msgsink *ms = (upb_msgsink*)s; - upb_msg *oldmsg = ms->top->msg; - upb_valueptr p = get_valueptr(oldmsg, f); - ms->top++; - - if(upb_isarray(f) || !upb_msg_has(oldmsg, f)) { - upb_msgdef *md = upb_downcast_msgdef(f->def); - if(!*p.msg || !upb_data_only(*p.data)) { - if(*p.msg) - upb_msg_unref(*p.msg, md); - *p.msg = upb_msg_new(md); - } - upb_msg_clear(*p.msg, md); - upb_msg_sethas(oldmsg, f); - } - - ms->top->msg = *p.msg; - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_msgsink_endcb(upb_sink *s, upb_fielddef *f, - upb_status *status) -{ - (void)status; // No detectable errors can occur. - (void)f; // Unused. - upb_msgsink *ms = (upb_msgsink*)s; - ms->top--; - return UPB_SINK_CONTINUE; -} - -static upb_sink_callbacks _upb_msgsink_vtbl = { - _upb_msgsink_valuecb, - _upb_msgsink_strcb, - _upb_msgsink_startcb, - _upb_msgsink_endcb -}; - -// -// External upb_msgsink interface. -// - -upb_msgsink *upb_msgsink_new(upb_msgdef *md) -{ - upb_msgsink *ms = malloc(sizeof(*ms)); - upb_sink_init(&ms->base, &_upb_msgsink_vtbl); - ms->toplevel_msgdef = md; - return ms; -} - -void upb_msgsink_free(upb_msgsink *sink) -{ - free(sink); -} - -upb_sink *upb_msgsink_sink(upb_msgsink *sink) -{ - return &sink->base; -} - -void upb_msgsink_reset(upb_msgsink *ms, upb_msg *msg) -{ - ms->top = ms->stack; - ms->top->msg = msg; - ms->top->md = ms->toplevel_msgdef; -} diff --git a/src/upb_data.h b/src/upb_data.h deleted file mode 100644 index c0f53ff..0000000 --- a/src/upb_data.h +++ /dev/null @@ -1,552 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file defines the in-memory format for messages, arrays, and strings - * (which are the three dynamically-allocated structures that make up all - * protobufs). - * - * The members of all structs should be considered private. Access should - * only happen through the provided functions. - * - * Unlike Google's protobuf, messages contain *pointers* to strings and arrays - * instead of including them by value. This makes unused strings and arrays - * use less memory, and lets the strings and arrays have multiple possible - * representations (for example, a string could be a slice). It also gives - * us more flexibility wrt refcounting. The cost is that when a field *is* - * being used, the net memory usage is one pointer more than if we had - * included the thing directly. */ - -#ifndef UPB_DATA_H -#define UPB_DATA_H - -#include -#include -#include "upb.h" -#include "upb_atomic.h" -#include "upb_def.h" -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_data *******************************************************************/ - -// The "base class" of strings, arrays, and messages. Contains a few flags and -// possibly a reference count. None of the functions for upb_data are public, -// but some of the constants are. - -// typedef upb_atomic_refcount_t upb_data; - -// The flags in upb_data. -typedef enum { - // Set if the object itself was allocated with malloc() and should be freed - // with free(). This flag would be false if the object was allocated on the - // stack or is data from the static segment of an object file. Note that this - // flag does not apply to the data being referenced by a string or array. - // - // If this flag is false, UPB_FLAG_HAS_REFCOUNT must be false also; there is - // no sense refcounting something that does not need to be freed. - UPB_DATA_HEAPALLOCATED = 1, - - // Set if the object is frozen against modification. While an object is - // frozen, it is suitable for concurrent readonly access. Note that this - // flag alone is not a sufficient mechanism for preventing any kind of writes - // to the object's memory, because the object could still have a refcount. - UPB_DATA_FROZEN = (1<<1), - - // Set if the object has an embedded refcount. - UPB_DATA_REFCOUNTED = (1<<2) -} upb_data_flag; - -#define REFCOUNT_MASK 0xFFFFFFF8 -#define REFCOUNT_SHIFT 3 -#define REFCOUNT_ONE (1<v & flag; -} - -// INTERNAL-ONLY -INLINE void upb_data_setflag(upb_data *d, upb_data_flag flag) { - d->v |= flag; -} - -INLINE uint32_t upb_data_getrefcount(upb_data *d) { - int data; - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) - data = upb_atomic_read(d); - else - data = d->v; - return (data & REFCOUNT_MASK) >> REFCOUNT_SHIFT; -} - -// Returns true if the given data has only one owner. -INLINE bool upb_data_only(upb_data *data) { - return !upb_data_hasflag(data, UPB_DATA_REFCOUNTED) || - upb_data_getrefcount(data) == 1; -} - -// Specifies the type of ref that is requested based on the kind of access the -// caller needs to the object. -typedef enum { - // Use when the client plans to perform read-only access to the object, and - // only in one thread at a time. This imposes the least requirements on the - // object; it can be either frozen or not. As a result, requesting a - // reference of this type never performs a copy unless the object has no - // refcount. - // - // A ref of this type can always be explicitly converted to frozen or - // unfrozen later. - UPB_REF_THREADUNSAFE_READONLY = 0, - - // Use when the client plans to perform read-only access, but from multiple - // threads concurrently. This will force the object to eagerly perform any - // parsing that may have been lazily deferred, and will force a copy if the - // object is not current frozen. - // - // Asking for a reference of this type is equivalent to: - // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); - // x = freeze(x); - // ...except it is more efficient. - UPB_REF_FROZEN = 1, - - // Use when the client plans to perform read/write access. As a result, the - // reference will not be thread-safe for concurrent reading *or* writing; the - // object must be externally synchronized if it is being accessed from more - // than one thread. This will force a copy if the object is currently frozen. - // - // Asking for a reference of this type is equivalent to: - // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); - // x = thaw(x); - // ...except it is more efficient. - UPB_REF_MUTABLE = 2 -} upb_reftype; - -// INTERNAL-ONLY FUNCTION: -// Attempts to increment the reference on d with the given type of ref. If -// this is not possible, returns false. -INLINE bool _upb_data_incref(upb_data *d, upb_reftype reftype) { - bool frozen = upb_data_hasflag(d, UPB_DATA_FROZEN); - if((reftype == UPB_REF_FROZEN && !frozen) || - (reftype == UPB_REF_MUTABLE && frozen) || - (upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED) && - !upb_data_hasflag(d, UPB_DATA_REFCOUNTED))) { - return false; - } - // Increment the ref. Only need to use atomic ops if the ref is frozen. - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) upb_atomic_add(d, REFCOUNT_ONE); - else d->v += REFCOUNT_ONE; - return true; -} - -// INTERNAL-ONLY FUNCTION: -// Releases a reference on d, returning true if the object should be deleted. -INLINE bool _upb_data_unref(upb_data *d) { - if(upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED)) { - // A heap-allocated object without a refcount should never be decref'd. - // Its owner owns it exlusively and should free it directly. - assert(upb_data_hasflag(d, UPB_DATA_REFCOUNTED)); - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) { - int32_t old_val = upb_atomic_fetch_and_add(d, -REFCOUNT_ONE); - return (old_val & REFCOUNT_MASK) == REFCOUNT_ONE; - } else { - d->v -= REFCOUNT_ONE; - return (d->v & REFCOUNT_MASK) == 0; - } - } else { - // Non heap-allocated data never should be deleted. - return false; - } -} - -/* upb_string *****************************************************************/ - -// We have several different representations for string, depending on whether -// it has a refcount (and likely in the future, depending on whether it is a -// slice of another string). We could just have one representation with -// members that are sometimes unused, but this is wasteful in memory. The -// flags that are always part of the first word tell us which representation -// to use. -// -// In a way, this is like inheritance but instead of using a virtual pointer, -// we do switch/case in every "virtual" method. This may sound expensive but -// in many cases the different cases compile to exactly the same code, so there -// is no branch. - -struct upb_norefcount_string { - uint32_t byte_size_and_flags; - upb_strlen_t byte_len; - // We expect the data to be 8-bit clean (uint8_t), but char* is such an - // ingrained convention that we follow it. - char *ptr; -}; - -// Used for a string with a refcount. -struct upb_refcounted_string { - upb_data base; - upb_strlen_t byte_len; - char *ptr; - uint32_t byte_size; -}; - - -// Returns a newly constructed, refcounted string which starts out empty. -// Caller owns one ref on it. The returned string will not be frozen. -upb_strptr upb_string_new(void); - -// INTERNAL-ONLY: -// Frees the given string, alone with any memory the string owned. -void _upb_string_free(upb_strptr s); - -// Returns a string to which caller owns a ref, and contains the same contents -// as src. The returned value may be a copy of src, if the requested flags -// were incompatible with src's. -upb_strptr upb_string_getref(upb_strptr s, int ref_flags); - -#define UPB_STRING_NULL_INITIALIZER {NULL} -static const upb_strptr UPB_STRING_NULL = UPB_STRING_NULL_INITIALIZER; -INLINE bool upb_string_isnull(upb_strptr s) { return s.base == NULL; } - -// The caller releases a ref on src, which it must previously have owned a ref -// on. -INLINE void upb_string_unref(upb_strptr s) { - if(_upb_data_unref(s.base)) _upb_string_free(s); -} - -// The string is resized to byte_len. The string must not be frozen. -void upb_string_resize(upb_strptr s, upb_strlen_t len); - -// Returns a buffer to which the caller may write. The string is resized to -// byte_len (which may or may not trigger a reallocation). The string must not -// be frozen. -INLINE char *upb_string_getrwbuf(upb_strptr s, upb_strlen_t byte_len) { - upb_string_resize(s, byte_len); - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - return s.refcounted->ptr; - else - return s.norefcount->ptr; -} - -INLINE void upb_string_clear(upb_strptr s) { - upb_string_getrwbuf(s, 0); -} - -// INTERNAL-ONLY: -// Gets/sets the pointer. -INLINE char *_upb_string_getptr(upb_strptr s) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - return s.refcounted->ptr; - else - return s.norefcount->ptr; -} - -// Returns a buffer that the caller may use to read the current contents of -// the string. The number of bytes available is upb_strlen(s). -INLINE const char *upb_string_getrobuf(upb_strptr s) { - return _upb_string_getptr(s); -} - -// Returns the current length of the string. -INLINE upb_strlen_t upb_strlen(upb_strptr s) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - return s.refcounted->byte_len; - else - return s.norefcount->byte_len; -} - -/* upb_string library functions ***********************************************/ - -// Named like their counterparts, these are all safe against buffer -// overflow. These only use the public upb_string interface. - -// More efficient than upb_strcmp if all you need is to test equality. -INLINE bool upb_streql(upb_strptr s1, upb_strptr s2) { - upb_strlen_t len = upb_strlen(s1); - if(len != upb_strlen(s2)) { - return false; - } else { - return memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; - } -} - -// Like strcmp(). -int upb_strcmp(upb_strptr s1, upb_strptr s2); - -// Like upb_strcpy, but copies from a buffer and length. -INLINE void upb_strcpylen(upb_strptr dest, const void *src, upb_strlen_t len) { - memcpy(upb_string_getrwbuf(dest, len), src, len); -} - -// Replaces the contents of "dest" with the contents of "src". -INLINE void upb_strcpy(upb_strptr dest, upb_strptr src) { - upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src)); -} - -// Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_strptr dest, const char *src) { - // This does two passes over src, but that is necessary unless we want to - // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen(src)); -} - -// Returns a new string whose contents are a copy of s. -upb_strptr upb_strdup(upb_strptr s); - -// Like upb_strdup(), but duplicates a given buffer and length. -INLINE upb_strptr upb_strduplen(const void *src, upb_strlen_t len) { - upb_strptr s = upb_string_new(); - upb_strcpylen(s, src, len); - return s; -} - -// Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_strptr upb_strdupc(const char *src); - -// Appends 'append' to 's' in-place, resizing s if necessary. -void upb_strcat(upb_strptr s, upb_strptr append); - -// Returns a string that is a substring of the given string. Currently this -// returns a copy, but in the future this may return an object that references -// the original string data instead of copying it. Both now and in the future, -// the caller owns a ref on whatever is returned. -upb_strptr upb_strslice(upb_strptr s, int offset, int len); - -// Reads an entire file into a newly-allocated string (caller owns one ref). -upb_strptr upb_strreadfile(const char *filename); - -// Typedef for a read-only string that is allocated statically or on the stack. -// Initialize with the given macro, which must resolve to a const char*. You -// must not dynamically allocate this type. Example usage: -// -// upb_static_string mystr = UPB_STATIC_STRING_INIT("biscuits"); -// upb_strptr mystr_ptr = UPB_STATIC_STRING_PTR_INIT(mystr); -// -// If C99 compund literals are available, the much nicer UPB_STRLIT macro is -// available instead: -// -// upb_strtr mystr_ptr = UPB_STRLIT("biscuits"); -// -typedef struct upb_norefcount_string upb_static_string; -#define UPB_STATIC_STRING_INIT_LEN(str, len) {0 | UPB_DATA_FROZEN, len, str} -#define UPB_STATIC_STRING_INIT(str) UPB_STATIC_STRING_INIT_LEN(str, sizeof(str)-1) -#define UPB_STATIC_STRING_PTR_INIT(static_string) {&static_string} -#define UPB_STRLIT(str) (upb_strptr){&(upb_static_string)UPB_STATIC_STRING_INIT(str)} - -// Allows using upb_strings in printf, ie: -// upb_strptr str = UPB_STRLIT("Hello, World!\n"); -// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) upb_strlen(str), upb_string_getrobuf(str) -#define UPB_STRFMT "%.*s" - -/* upb_array ******************************************************************/ - -// The comments attached to upb_string above also apply here. -struct upb_norefcount_array { - upb_data base; // We co-opt the refcount for the size. - upb_arraylen_t len; - upb_valueptr elements; -}; - -struct upb_refcounted_array { - upb_data base; - upb_arraylen_t len; - upb_valueptr elements; - upb_arraylen_t size; -}; - -typedef struct upb_norefcount_array upb_static_array; -#define UPB_STATIC_ARRAY_INIT(arr, len) {{0 | UPB_DATA_FROZEN}, len, {._void=arr}} -#define UPB_STATIC_ARRAY_PTR_TYPED_INIT(static_arr) {{&static_arr}} - -#define UPB_ARRAY_NULL_INITIALIZER {NULL} -static const upb_arrayptr UPB_ARRAY_NULL = UPB_ARRAY_NULL_INITIALIZER; -INLINE bool upb_array_isnull(upb_arrayptr a) { return a.base == NULL; } -INLINE bool upb_array_ptreql(upb_arrayptr a1, upb_arrayptr a2) { - return a1.base == a2.base; -} - -#define UPB_MSG_ARRAYPTR(type) type ## _array -#define UPB_DEFINE_MSG_ARRAY(type) \ -typedef struct { upb_arrayptr ptr; } UPB_MSG_ARRAYPTR(type); \ -INLINE upb_arraylen_t type ## _array_len(UPB_MSG_ARRAYPTR(type) a) { \ - return upb_array_len(a.ptr); \ -} \ -INLINE type* type ## _array_get(UPB_MSG_ARRAYPTR(type) a, upb_arraylen_t elem) { \ - return *(type**)_upb_array_getptr_raw(a.ptr, elem, sizeof(void*))._void; \ -} - -// Constructs a newly-allocated, reference-counted array which starts out -// empty. Caller owns one ref on it. -upb_arrayptr upb_array_new(void); - -// Returns the current number of elements in the array. -INLINE size_t upb_array_len(upb_arrayptr a) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - return a.refcounted->len; - else - return a.norefcount->len; -} - -// INTERNAL-ONLY: -// Frees the given message and releases references on members. -void _upb_array_free(upb_arrayptr a, upb_fielddef *f); - -// INTERNAL-ONLY: -// Returns a pointer to the given elem. -INLINE upb_valueptr _upb_array_getptr_raw(upb_arrayptr a, upb_arraylen_t elem, - size_t type_size) { - upb_valueptr p; - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - p._void = &a.refcounted->elements.uint8[elem * type_size]; - else - p._void = &a.norefcount->elements.uint8[elem * type_size]; - return p; -} - -INLINE upb_valueptr _upb_array_getptr(upb_arrayptr a, upb_fielddef *f, - upb_arraylen_t elem) { - return _upb_array_getptr_raw(a, elem, upb_types[f->type].size); -} - -INLINE upb_value upb_array_get(upb_arrayptr a, upb_fielddef *f, - upb_arraylen_t elem) { - assert(elem < upb_array_len(a)); - return upb_value_read(_upb_array_getptr(a, f, elem), f->type); -} - -// The caller releases a ref on the given array, which it must previously have -// owned a ref on. -INLINE void upb_array_unref(upb_arrayptr a, upb_fielddef *f) { - if(_upb_data_unref(a.base)) _upb_array_free(a, f); -} - -#if 0 -// Returns an array to which caller owns a ref, and contains the same contents -// as src. The returned value may be a copy of src, if the requested flags -// were incompatible with src's. -INLINE upb_arrayptr upb_array_getref(upb_arrayptr src, int ref_flags); - -// Sets the given element in the array to val. The current length of the array -// must be greater than elem. If the field type is dynamic, the array will -// take a ref on val and release a ref on what was previously in the array. -INLINE void upb_array_set(upb_arrayptr a, upb_fielddef *f, int elem, - upb_value val); - - -// Note that array_append will attempt to take a reference on the given value, -// so to avoid a copy use append_default and get. -INLINE void upb_array_append(upb_arrayptr a, upb_fielddef *f, - upb_value val); -INLINE void upb_array_append_default(upb_arrayptr a, upb_fielddef *f, - upb_value val); -#endif - -INLINE void upb_array_truncate(upb_arrayptr a) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - a.refcounted->len = 0; - else - a.norefcount->len = 0; -} - - -/* upb_msg ********************************************************************/ - -// Note that some inline functions for upb_msg are defined in upb_def.h since -// they rely on the defs. - -struct _upb_msg { - upb_data base; - uint8_t data[4]; // We allocate the appropriate amount per message. -}; - -// Creates a new msg of the given type. -upb_msg *upb_msg_new(upb_msgdef *md); - -// INTERNAL-ONLY: -// Frees the given message and releases references on members. -void _upb_msg_free(upb_msg *msg, upb_msgdef *md); - -// INTERNAL-ONLY: -// Returns a pointer to the given field. -INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { - upb_valueptr p; - p._void = &msg->data[f->byte_offset]; - return p; -} - -// Releases a references on msg. -INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { - if(_upb_data_unref(&msg->base)) _upb_msg_free(msg, md); -} - -// Tests whether the given field is explicitly set, or whether it will return -// a default. -INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { - return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; -} - -// Returns the current value if set, or the default value if not set, of the -// specified field. The caller does *not* own a ref. -INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - if(upb_msg_has(msg, f)) { - return upb_value_read(_upb_msg_getptr(msg, f), f->type); - } else { - return f->default_value; - } -} - -// Sets the given field to the given value. The msg will take a ref on val, -// and will drop a ref on whatever was there before. -void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val); - -INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { - memset(msg->data, 0, md->set_flags_bytes); -} - -// A convenience function for decoding an entire protobuf all at once, without -// having to worry about setting up the appropriate objects. -void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status); - -// A convenience function for encoding an entire protobuf all at once. If an -// error occurs, the null string is returned and the status object contains -// the error. -void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status); - - -/* upb_msgsrc *****************************************************************/ - -// A nonresumable, non-interruptable (but simple and fast) source for pushing -// the data of a upb_msg to a upb_sink. -void upb_msgsrc_produce(upb_msg *msg, upb_msgdef *md, upb_sink *sink, - bool reverse, upb_status *status); - - -/* upb_msgsink ****************************************************************/ - -// A upb_msgsink can accept the data from a source and write it into a message. -struct upb_msgsink; -typedef struct upb_msgsink upb_msgsink; - -// Allocate and free a msgsink, respectively. -upb_msgsink *upb_msgsink_new(upb_msgdef *md); -void upb_msgsink_free(upb_msgsink *sink); - -// Returns the upb_sink (like an upcast). -upb_sink *upb_msgsink_sink(upb_msgsink *sink); - -// Resets the msgsink for the given msg. -void upb_msgsink_reset(upb_msgsink *sink, upb_msg *msg); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_def.c b/src/upb_def.c index 31f14fa..bb1f07a 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -6,6 +6,7 @@ #include #include "descriptor_const.h" +#include "descriptor.h" #include "upb_def.h" #define CHECKSRC(x) if(!(x)) goto src_err @@ -840,6 +841,7 @@ err: upb_deflist_uninit(&defs); } + /* upb_baredecoder ************************************************************/ // upb_baredecoder is a upb_src that can parse a subset of the protocol buffer @@ -977,7 +979,7 @@ static upb_src_vtable upb_baredecoder_src_vtbl = { (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, }; -upb_baredecoder *upb_baredecoder_new(upb_string *str) +static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); @@ -987,9 +989,30 @@ upb_baredecoder *upb_baredecoder_new(upb_string *str) return d; } -void upb_baredecoder_free(upb_baredecoder *d) +static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); upb_string_unref(d->str); free(d); } + +static upb_src *upb_baredecoder_src(upb_baredecoder *d) +{ + return &d->src; +} + +upb_symtab *upb_get_descriptor_symtab() +{ + // TODO: implement sharing of symtabs, so that successive calls to this + // function will return the same symtab. + upb_symtab *symtab = upb_symtab_new(); + // TODO: allow upb_strings to be static or on the stack. + upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); + upb_baredecoder *decoder = upb_baredecoder_new(descriptor); + upb_status status; + upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); + assert(upb_ok(&status)); + upb_baredecoder_free(decoder); + upb_string_unref(descriptor); + return symtab; +} diff --git a/src/upb_def.h b/src/upb_def.h index 033dcde..b73b0f9 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -257,6 +257,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // more useful? Maybe it should be an option. void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); +// Returns a symtab that defines google.protobuf.DescriptorProto and all other +// types that are defined in descriptor.proto. This allows you to load other +// proto types. The caller owns a ref on the returned symtab. +upb_symtab *upb_get_descriptor_symtab(); + /* upb_def casts **************************************************************/ diff --git a/src/upb_table.c b/src/upb_table.c index 6fd2c20..b91776c 100644 --- a/src/upb_table.c +++ b/src/upb_table.c @@ -232,7 +232,7 @@ void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { do { cur = (void*)((char*)cur + t->t.entry_size); if(cur == end) return NULL; - } while(upb_string_isnull(cur->key)); + } while(cur->key == NULL); return cur; } -- cgit v1.2.3