From fd184f0df2e5e428873eadfaf1ae829d2e4d8e51 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 22 Feb 2011 01:54:31 -0800 Subject: Major work on Lua extension and default values. Default values are now supported, and the Lua extension can now create and modify individual protobuf objects. --- src/descriptor.h | 26 -------- src/upb_decoder.c | 126 ++++++------------------------------ src/upb_decoder_x64.asm | 4 +- src/upb_def.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++- src/upb_def.h | 16 ++++- src/upb_msg.c | 16 +++++ src/upb_msg.h | 47 +++++++++++++- src/upb_string.c | 13 +++- src/upb_string.h | 10 +++ src/upbc.c | 1 - 10 files changed, 282 insertions(+), 143 deletions(-) delete mode 100644 src/descriptor.h (limited to 'src') diff --git a/src/descriptor.h b/src/descriptor.h deleted file mode 100644 index f6d3ca3..0000000 --- a/src/descriptor.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file contains declarations for an array that contains the contents - * of descriptor.proto, serialized as a protobuf. xxd is used to create - * the actual definition. - */ - -#ifndef UPB_DESCRIPTOR_H_ -#define UPB_DESCRIPTOR_H_ - -#include "upb_string.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern upb_string descriptor_str; - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DESCRIPTOR_H_ */ diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 8b10522..78fc8b1 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -1,10 +1,11 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2008-2011 Joshua Haberman. See LICENSE for details. */ #include "upb_decoder.h" +#include "upb_varint_decoder.h" #include #include @@ -21,105 +22,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end, upb_value_handler_t value_cb, void *closure, void *table, int table_size); -/* Pure Decoding **************************************************************/ - -// The key fast-path varint-decoding routine. Here we can assume we have at -// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of -// possibilities for optimization/experimentation here. - -#ifdef USE_SSE_VARINT_DECODING -#include - -// This works, but is empirically slower than the branchy version below. Why? -// Most varints are very short. Next step: use branches for 1/2-byte varints, -// but use the SSE version for 3-10 byte varints. -INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) { - const char *p = *ptr; - __m128i val128 = _mm_loadu_si128((void*)p); - unsigned int continuation_bits = _mm_movemask_epi8(val128); - unsigned int bsr_val = ~continuation_bits; - int varint_length = __builtin_ffs(bsr_val); - if (varint_length > 10) { - upb_seterr(s, UPB_ERROR, "Unterminated varint"); - return false; - } - - uint16_t twob; - memcpy(&twob, p, 2); - twob &= 0x7f7f; - twob = ((twob & 0xff00) >> 1) | (twob & 0xff); - - uint64_t eightb; - memcpy(&eightb, p + 2, 8); - eightb &= 0x7f7f7f7f7f7f7f7f; - eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff); - eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff); - eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff); - - uint64_t all_bits = twob | (eightb << 14); - int varint_bits = varint_length * 7; - uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1; - *val = all_bits & mask; - *ptr = p + varint_length; - return true; -} - -#else - -INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) { - const char *p = *ptr; - uint32_t low, high = 0; - uint32_t b; - b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done; - - upb_seterr(s, UPB_ERROR, "Unterminated varint"); - return false; - -done: - *val = ((uint64_t)high << 32) | low; - *ptr = p; - return true; -} - -typedef struct { - const char *newbuf; - uint64_t val; -} retval; - -retval upb_decode_varint_fast64(const char *p) { - uint64_t ret; - uint64_t b; - retval r = {(void*)0, 0}; - b = *(p++); ret = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done; - return r; - -done: - r.val = ret; - r.newbuf = p; - return r; -} - -#endif - /* Decoding/Buffering of individual values ************************************/ @@ -233,11 +135,13 @@ done: INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) { if (upb_decoder_bufleft(d) >= 16) { // Common (fast) case. - uint64_t val64; - const char *p = d->ptr; - if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; - upb_decoder_advance(d, p - d->ptr); - upb_value_setraw(val, val64); + upb_decoderet r = upb_decode_varint_fast(d->ptr); + if (r.p == NULL) { + upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n"); + return false; + } + upb_value_setraw(val, r.val); + upb_decoder_advance(d, r.p - d->ptr); return true; } else { return upb_decode_varint_slow(d, val); @@ -352,11 +256,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) { d->dispatcher.top->handlers.set->value, d->dispatcher.top->handlers.closure, d->top->msgdef->itof.array, - d->top->msgdef->itof.array_size); + d->top->msgdef->itof.array_size, + d->tmp); CHECK_FLOW(ret.flow); + if (ret.ptr - d->ptr > 0) { + DEBUGPRINTF("Fast path parsed %d bytes of data!\n", ret.ptr - d->ptr); + } d->ptr = ret.ptr; if (end - d->ptr < 12) { - DEBUGPRINTF("Off the fast path because <12 bytes of data\n"); + if (end == d->submsg_end && end != d->end) { + DEBUGPRINTF("Off the fast path because <12 bytes of data, but ONLY because of submsg end.\n"); + } else { + DEBUGPRINTF("Off the fast path because <12 bytes of data, NOT because of submsg end.\n"); + } } else { DEBUGPRINTF("Off the fast path for some other reason.\n"); } diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm index c59d131..032ea86 100644 --- a/src/upb_decoder_x64.asm +++ b/src/upb_decoder_x64.asm @@ -33,7 +33,7 @@ SECTION .text ; Register allocation. %define BUF rbx ; const char *p, current buf position. %define END rbp ; const char *end, where the buf ends (either submsg end or buf end) -%define FREE r12 ; unused +%define STRING r12 ; unused %define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call. %define CALLBACK r14 %define CLOSURE r15 @@ -143,6 +143,7 @@ _upb_fastdecode: ; Parse arguments into reg vals and stack. mov BUF, rdi + mov COMMITTED_BUF_SPILL, rdi mov END, rsi mov CALLBACK, rdx mov CLOSURE, rcx @@ -210,7 +211,6 @@ align 16 align 16 .string: - .cant_fast_path: mov rax, 0 ; UPB_CONTINUE -- continue as before. .done: diff --git a/src/upb_def.c b/src/upb_def.c index 0382610..d77e29a 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -6,9 +6,11 @@ #include #include +#include #include "descriptor.c" #include "descriptor_const.h" #include "upb_def.h" +#include "upb_msg.h" #define alignof(t) offsetof(struct { char c; t x; }, x) @@ -261,6 +263,8 @@ struct _upb_defbuilder { bool saw_number; bool saw_name; + upb_string *default_string; + upb_fielddef *f; }; typedef struct _upb_defbuilder upb_defbuilder; @@ -276,12 +280,18 @@ static void upb_defbuilder_init(upb_defbuilder *b) { upb_status_init(&b->status); b->stack_len = 0; b->name = NULL; + b->default_string = NULL; } static void upb_defbuilder_uninit(upb_defbuilder *b) { upb_string_unref(b->name); upb_status_uninit(&b->status); upb_deflist_uninit(&b->defs); + upb_string_unref(b->default_string); + while (b->stack_len > 0) { + upb_defbuilder_frame *f = &b->stack[--b->stack_len]; + upb_string_unref(f->name); + } } static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { @@ -587,6 +597,19 @@ upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) { /* upb_fielddef ***************************************************************/ static void upb_fielddef_free(upb_fielddef *f) { + if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) { + upb_string_unref(upb_value_getstr(f->default_value)); + } else if (upb_issubmsg(f)) { + upb_msg *m = upb_value_getmsg(f->default_value); + assert(m); + // We cheat a bit here. We need to unref msg, but we don't have a reliable + // way of accessing the msgdef (which is required by upb_msg_unref()), + // because f->def may have already been collected as part of a cycle if + // this is an unowned ref. But we know that default messages never contain + // references to other messages, and their only string references are to + // the singleton empty string, so we can safely unref+free msg directly. + if (upb_atomic_unref(&m->refcount)) free(m); + } upb_string_unref(f->name); if(f->owned) { upb_def_unref(f->def); @@ -606,6 +629,109 @@ static upb_flow_t upb_fielddef_startmsg(void *_b) { return UPB_CONTINUE; } +// Converts the default value in string "dstr" into "d". Passes a ref on dstr. +// Returns true on success. +static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) { + bool success = true; + if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) { + // We'll keep the ref we had on it. We include enums in this case because + // we need the enumdef to resolve the name, but we may not have it yet. + // We'll resolve it later. + if (dstr) { + upb_value_setstr(d, dstr); + } else { + upb_value_setstr(d, upb_emptystring()); + } + } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) { + // We don't expect to get a default value. + upb_string_unref(dstr); + if (dstr != NULL) { + printf("Returning false because I got a default string for a message!\n"); + success = false; + } + } else { + // The strto* functions need the string to be NULL-terminated. + char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr); + char *end; + upb_string_unref(dstr); + switch (type) { + case UPB_TYPE(INT32): + case UPB_TYPE(SINT32): + case UPB_TYPE(SFIXED32): + if (strz) { + long val = strtol(strz, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) + success = false; + else + upb_value_setint32(d, val); + } else { + upb_value_setint32(d, 0); + } + break; + case UPB_TYPE(INT64): + case UPB_TYPE(SINT64): + case UPB_TYPE(SFIXED64): + if (strz) { + upb_value_setint64(d, strtoll(strz, &end, 0)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setint64(d, 0); + } + break; + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + if (strz) { + long val = strtoul(strz, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) + success = false; + else + upb_value_setuint32(d, val); + } else { + upb_value_setuint32(d, 0); + } + break; + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + if (strz) { + upb_value_setuint64(d, strtoull(strz, &end, 0)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setuint64(d, 0); + } + break; + case UPB_TYPE(DOUBLE): + if (strz) { + upb_value_setdouble(d, strtod(strz, &end)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setdouble(d, 0.0); + } + break; + case UPB_TYPE(FLOAT): + if (strz) { + upb_value_setfloat(d, strtof(strz, &end)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setfloat(d, 0.0); + } + break; + case UPB_TYPE(BOOL): + if (!strz || strcmp(strz, "false") == 0) + upb_value_setbool(d, false); + else if (strcmp(strz, "true") == 0) + upb_value_setbool(d, true); + else + success = false; + break; + } + if (!success) { + printf("Returning false on the int conversion path, was trying to convert: %s, type=%d\n", strz, type); + } + free(strz); + } + return success; +} + static upb_flow_t upb_fielddef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_fielddef *f = b->f; @@ -619,6 +745,15 @@ static upb_flow_t upb_fielddef_endmsg(void *_b) { upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, f->number, &itof_ent); upb_strtable_insert(&m->ntof, &ntof_ent.e); + + upb_string *dstr = b->default_string; + b->default_string = NULL; + if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) { + // We don't worry too much about giving a great error message since the + // compiler should have ensured this was correct. + upb_seterr(&b->status, UPB_ERROR, "Error converting default value."); + return UPB_BREAK; + } return UPB_CONTINUE; } @@ -644,6 +779,12 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->owned = true; break; } + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_FIELDNUM: + // Have to convert from string to the correct type, but we might not know + // the type yet. + upb_string_unref(b->default_string); + b->default_string = upb_string_getref(upb_value_getstr(val)); + break; } return UPB_CONTINUE; } @@ -683,6 +824,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_b) { upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); + m->default_message = NULL; upb_deflist_push(&b->defs, UPB_UPCAST(m)); upb_defbuilder_startcontainer(b); return UPB_CONTINUE; @@ -703,7 +845,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { upb_field_count_t field = 0; upb_msg_iter i; for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - sorted_fields[field++]= upb_msg_iter_field(i); + sorted_fields[field++] = upb_msg_iter_field(i); } qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); @@ -745,6 +887,18 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); + // Create default message instance, an immutable message with all default + // values set (except submessages, which are simply marked as unset). We + // could alternatively leave all set bits unset, but this would make + // upb_msg_get() take its unexpected branch more often for no good reason. + m->default_message = upb_msg_new(m); + for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) { + upb_msg_set(m->default_message, f, f->default_value); + } + } + upb_defbuilder_endcontainer(b); return UPB_CONTINUE; } @@ -802,6 +956,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, static void upb_msgdef_free(upb_msgdef *m) { + upb_msg_unref(m->default_message, m); upb_msg_iter i; for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) upb_fielddef_free(upb_msg_iter_field(i)); @@ -818,6 +973,10 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { // We will later make the ref unowned if it is a part of a cycle. f->owned = true; upb_def_ref(def); + if (upb_issubmsg(f)) { + upb_msgdef *md = upb_downcast_msgdef(def); + upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message)); + } } upb_msg_iter upb_msg_begin(upb_msgdef *m) { @@ -937,7 +1096,8 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) } // Given a table of pending defs "tmptab" and a table of existing defs "symtab", -// resolves all of the unresolved refs for the defs in tmptab. +// resolves all of the unresolved refs for the defs in tmptab. Also resolves +// default values for enumerations and submessages. bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, upb_status *status) { @@ -1352,7 +1512,7 @@ upb_def *upb_getdescriptordef(upb_string *str) { // upb itself is corrupt. abort(); } - upb_def_unref(UPB_UPCAST(def)); // The symtab already holds a ref on it. + upb_msgdef_unref(def); // The symtab already holds a ref on it. atexit(upb_free_descriptor_symtab); } return upb_symtab_resolve( diff --git a/src/upb_def.h b/src/upb_def.h index 121d5bc..3f79895 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -81,6 +81,9 @@ INLINE void upb_def_unref(upb_def *def) { if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); } +#define UPB_UPCAST(ptr) (&(ptr)->base) + + /* upb_fielddef ***************************************************************/ // A upb_fielddef describes a single field in a message. It isn't a full def @@ -158,6 +161,10 @@ typedef struct _upb_msgdef { // Tables for looking up fields by number and name. upb_inttable itof; // int to field upb_strtable ntof; // name to field + + // Immutable msg instance that has all default values set. + // TODO: need a way of making this immutable! + struct _upb_msg *default_message; } upb_msgdef; // Hash table entries for looking up fields by name or number. @@ -172,6 +179,13 @@ typedef struct { upb_fielddef *f; } upb_ntof_ent; +INLINE void upb_msgdef_unref(upb_msgdef *md) { + upb_def_unref(UPB_UPCAST(md)); +} +INLINE void upb_msgdef_ref(upb_msgdef *md) { + upb_def_ref(UPB_UPCAST(md)); +} + // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. @@ -361,8 +375,6 @@ UPB_DOWNCAST_DEF(extdef, EXT); UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); #undef UPB_DOWNCAST_DEF -#define UPB_UPCAST(ptr) (&(ptr)->base) - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/upb_msg.c b/src/upb_msg.c index 9dfbea4..211004c 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -145,6 +145,22 @@ INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { msg->data[f->set_bit_offset] |= f->set_bit_mask; } +void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { + assert(val.type == upb_field_valuetype(f)); + upb_valueptr ptr = _upb_msg_getptr(msg, f); + if (upb_field_ismm(f)) { + // Unref any previous value we may have had there. + upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f)); + upb_field_unref(oldval, f); + + // Ref the new value. + upb_atomic_refcount_t *refcount = upb_value_getrefcount(val); + if (refcount) upb_atomic_ref(refcount); + } + upb_msg_sethas(msg, f); + return upb_value_write(ptr, val, upb_field_valuetype(f)); +} + static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) { upb_valueptr p = _upb_msg_getptr(msg, f); if (upb_isarray(f)) { diff --git a/src/upb_msg.h b/src/upb_msg.h index 3246971..ff8489c 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -135,6 +135,7 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val, #undef CASE } + /* upb_array ******************************************************************/ typedef uint32_t upb_arraylen_t; @@ -172,8 +173,17 @@ INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f, return upb_value_read(_upb_array_getptr(arr, f, i), f->type); } + /* upb_msg ********************************************************************/ +// upb_msg is not self-describing; the upb_msg does not contain a pointer to the +// upb_msgdef. While this makes the API a bit more cumbersome to use, this +// choice was made for a few important reasons: +// +// 1. it would make every message 8 bytes larger on 64-bit platforms. This is +// a high overhead for small messages. +// 2. you would want the msg to own a ref on its msgdef, but this would require +// an atomic operation for every message create or destroy! struct _upb_msg { upb_atomic_refcount_t refcount; uint8_t data[4]; // We allocate the appropriate amount per message. @@ -194,6 +204,11 @@ upb_msg *upb_msg_new(upb_msgdef *md); INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); } +INLINE upb_msg *upb_msg_getref(upb_msg *msg) { + assert(msg); + upb_atomic_ref(&msg->refcount); + return msg; +} void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef); @@ -203,10 +218,40 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0; } +// We have several options for handling default values: +// 1. inside upb_msg_clear(), overwrite all values to be their defaults, +// overwriting submessage pointers to point to the default instance again. +// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value +// if it is not set. upb_msg_clear() only clears the set bits. +// We lazily clear objects if/when we reuse them. +// 3. inside upb_msg_clear(), overwrite all values to be their default, +// and recurse into submessages to set all their values to defaults also. +// 4. as a hybrid of (1) and (3), make each "set bit" tri-state, where it +// can have a value of "unset, but cached sub-message needs to be cleared." +// Like (2) we can cache sub-messages and lazily clear, but primitive values +// can always be returned straight from the message. +// +// (1) is undesirable, because it prevents us from caching sub-objects. +// (2) makes clear() cheaper, but makes get() branchier. +// (3) makes get() less branchy, but makes clear() have worse cache behavior. +// (4) makes get() differently branchy (only returns default from msgdef if +// NON-primitive value is unset), but uses more set bits. It's questionable +// whether it would be a performance improvement. +// +// For the moment we go with (2). Google's protobuf does (3), which is likely +// part of the reason we beat it in some benchmarks. + +// For submessages and strings, the returned value is not owned. INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f)); + if (upb_msg_has(msg, f)) { + return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f)); + } else { + return f->default_value; + } } +void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val); + // Unsets all field values back to their defaults. INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); diff --git a/src/upb_string.c b/src/upb_string.c index 81b152d..29ce7d4 100644 --- a/src/upb_string.c +++ b/src/upb_string.c @@ -147,4 +147,15 @@ error: return NULL; } -void upb_string_noninlinerecycle(upb_string **_str) { return upb_string_recycle(_str); } +upb_string *upb_emptystring() { + static upb_string empty = UPB_STATIC_STRING(""); + return ∅ +} + +char *upb_string_newcstr(upb_string *str) { + upb_strlen_t len = upb_string_len(str); + char *ret = malloc(len+1); + memcpy(ret, upb_string_getrobuf(str), len); + ret[len] = '\0'; + return ret; +} diff --git a/src/upb_string.h b/src/upb_string.h index 3799c5e..efafa44 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -134,6 +134,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) { // Returns the length of the string. INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } +INLINE bool upb_string_isempty(upb_string *str) { + return !str || upb_string_len(str) == 0; +} // Use to read the bytes of the string. The caller *must* call // upb_string_endread() after the data has been read. The window between @@ -273,6 +276,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str, //#endif #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) +// Returns a singleton empty string. +upb_string *upb_emptystring(); + + /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer @@ -339,6 +346,9 @@ INLINE upb_string *upb_strdupc(const char *src) { return upb_strduplen(src, strlen(src)); } +// Returns a newly-allocated NULL-terminated copy of str. +char *upb_string_newcstr(upb_string *str); + // Appends 'append' to 's' in-place, resizing s if necessary. void upb_strcat(upb_string *s, upb_string *append); diff --git a/src/upbc.c b/src/upbc.c index 428ec41..4fa8a71 100644 --- a/src/upbc.c +++ b/src/upbc.c @@ -12,7 +12,6 @@ #include #include #include -#include "descriptor.h" #include "upb_def.h" #include "upb_msg.h" #include "upb_glue.h" -- cgit v1.2.3