summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2011-02-22 01:54:31 -0800
committerJoshua Haberman <joshua@reverberate.org>2011-02-22 01:54:31 -0800
commitfd184f0df2e5e428873eadfaf1ae829d2e4d8e51 (patch)
tree19c4a1d9099f04c74de60eb4d8149ea1b5d930a0 /src
parent0c6786c6fad563f181e66c90df2a74597ce6d18b (diff)
Major work on Lua extension and default values.
Default values are now supported, and the Lua extension can now create and modify individual protobuf objects.
Diffstat (limited to 'src')
-rw-r--r--src/descriptor.h26
-rw-r--r--src/upb_decoder.c126
-rw-r--r--src/upb_decoder_x64.asm4
-rw-r--r--src/upb_def.c166
-rw-r--r--src/upb_def.h16
-rw-r--r--src/upb_msg.c16
-rw-r--r--src/upb_msg.h47
-rw-r--r--src/upb_string.c13
-rw-r--r--src/upb_string.h10
-rw-r--r--src/upbc.c1
10 files changed, 282 insertions, 143 deletions
diff --git a/src/descriptor.h b/src/descriptor.h
deleted file mode 100644
index f6d3ca3..0000000
--- a/src/descriptor.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- * This file contains declarations for an array that contains the contents
- * of descriptor.proto, serialized as a protobuf. xxd is used to create
- * the actual definition.
- */
-
-#ifndef UPB_DESCRIPTOR_H_
-#define UPB_DESCRIPTOR_H_
-
-#include "upb_string.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern upb_string descriptor_str;
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_DESCRIPTOR_H_ */
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 8b10522..78fc8b1 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -1,10 +1,11 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
+ * Copyright (c) 2008-2011 Joshua Haberman. See LICENSE for details.
*/
#include "upb_decoder.h"
+#include "upb_varint_decoder.h"
#include <inttypes.h>
#include <stddef.h>
@@ -21,105 +22,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
upb_value_handler_t value_cb, void *closure,
void *table, int table_size);
-/* Pure Decoding **************************************************************/
-
-// The key fast-path varint-decoding routine. Here we can assume we have at
-// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of
-// possibilities for optimization/experimentation here.
-
-#ifdef USE_SSE_VARINT_DECODING
-#include <emmintrin.h>
-
-// This works, but is empirically slower than the branchy version below. Why?
-// Most varints are very short. Next step: use branches for 1/2-byte varints,
-// but use the SSE version for 3-10 byte varints.
-INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
- const char *p = *ptr;
- __m128i val128 = _mm_loadu_si128((void*)p);
- unsigned int continuation_bits = _mm_movemask_epi8(val128);
- unsigned int bsr_val = ~continuation_bits;
- int varint_length = __builtin_ffs(bsr_val);
- if (varint_length > 10) {
- upb_seterr(s, UPB_ERROR, "Unterminated varint");
- return false;
- }
-
- uint16_t twob;
- memcpy(&twob, p, 2);
- twob &= 0x7f7f;
- twob = ((twob & 0xff00) >> 1) | (twob & 0xff);
-
- uint64_t eightb;
- memcpy(&eightb, p + 2, 8);
- eightb &= 0x7f7f7f7f7f7f7f7f;
- eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff);
- eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff);
- eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff);
-
- uint64_t all_bits = twob | (eightb << 14);
- int varint_bits = varint_length * 7;
- uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1;
- *val = all_bits & mask;
- *ptr = p + varint_length;
- return true;
-}
-
-#else
-
-INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
- const char *p = *ptr;
- uint32_t low, high = 0;
- uint32_t b;
- b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 28;
- high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
-
- upb_seterr(s, UPB_ERROR, "Unterminated varint");
- return false;
-
-done:
- *val = ((uint64_t)high << 32) | low;
- *ptr = p;
- return true;
-}
-
-typedef struct {
- const char *newbuf;
- uint64_t val;
-} retval;
-
-retval upb_decode_varint_fast64(const char *p) {
- uint64_t ret;
- uint64_t b;
- retval r = {(void*)0, 0};
- b = *(p++); ret = (b & 0x7f) ; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done;
- return r;
-
-done:
- r.val = ret;
- r.newbuf = p;
- return r;
-}
-
-#endif
-
/* Decoding/Buffering of individual values ************************************/
@@ -233,11 +135,13 @@ done:
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
- uint64_t val64;
- const char *p = d->ptr;
- if (!upb_decode_varint_fast(&p, &val64, d->status)) return false;
- upb_decoder_advance(d, p - d->ptr);
- upb_value_setraw(val, val64);
+ upb_decoderet r = upb_decode_varint_fast(d->ptr);
+ if (r.p == NULL) {
+ upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
+ return false;
+ }
+ upb_value_setraw(val, r.val);
+ upb_decoder_advance(d, r.p - d->ptr);
return true;
} else {
return upb_decode_varint_slow(d, val);
@@ -352,11 +256,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
d->dispatcher.top->handlers.set->value,
d->dispatcher.top->handlers.closure,
d->top->msgdef->itof.array,
- d->top->msgdef->itof.array_size);
+ d->top->msgdef->itof.array_size,
+ d->tmp);
CHECK_FLOW(ret.flow);
+ if (ret.ptr - d->ptr > 0) {
+ DEBUGPRINTF("Fast path parsed %d bytes of data!\n", ret.ptr - d->ptr);
+ }
d->ptr = ret.ptr;
if (end - d->ptr < 12) {
- DEBUGPRINTF("Off the fast path because <12 bytes of data\n");
+ if (end == d->submsg_end && end != d->end) {
+ DEBUGPRINTF("Off the fast path because <12 bytes of data, but ONLY because of submsg end.\n");
+ } else {
+ DEBUGPRINTF("Off the fast path because <12 bytes of data, NOT because of submsg end.\n");
+ }
} else {
DEBUGPRINTF("Off the fast path for some other reason.\n");
}
diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm
index c59d131..032ea86 100644
--- a/src/upb_decoder_x64.asm
+++ b/src/upb_decoder_x64.asm
@@ -33,7 +33,7 @@ SECTION .text
; Register allocation.
%define BUF rbx ; const char *p, current buf position.
%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
-%define FREE r12 ; unused
+%define STRING r12 ; unused
%define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call.
%define CALLBACK r14
%define CLOSURE r15
@@ -143,6 +143,7 @@ _upb_fastdecode:
; Parse arguments into reg vals and stack.
mov BUF, rdi
+ mov COMMITTED_BUF_SPILL, rdi
mov END, rsi
mov CALLBACK, rdx
mov CLOSURE, rcx
@@ -210,7 +211,6 @@ align 16
align 16
.string:
-
.cant_fast_path:
mov rax, 0 ; UPB_CONTINUE -- continue as before.
.done:
diff --git a/src/upb_def.c b/src/upb_def.c
index 0382610..d77e29a 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -6,9 +6,11 @@
#include <stdlib.h>
#include <stddef.h>
+#include <errno.h>
#include "descriptor.c"
#include "descriptor_const.h"
#include "upb_def.h"
+#include "upb_msg.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
@@ -261,6 +263,8 @@ struct _upb_defbuilder {
bool saw_number;
bool saw_name;
+ upb_string *default_string;
+
upb_fielddef *f;
};
typedef struct _upb_defbuilder upb_defbuilder;
@@ -276,12 +280,18 @@ static void upb_defbuilder_init(upb_defbuilder *b) {
upb_status_init(&b->status);
b->stack_len = 0;
b->name = NULL;
+ b->default_string = NULL;
}
static void upb_defbuilder_uninit(upb_defbuilder *b) {
upb_string_unref(b->name);
upb_status_uninit(&b->status);
upb_deflist_uninit(&b->defs);
+ upb_string_unref(b->default_string);
+ while (b->stack_len > 0) {
+ upb_defbuilder_frame *f = &b->stack[--b->stack_len];
+ upb_string_unref(f->name);
+ }
}
static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) {
@@ -587,6 +597,19 @@ upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) {
/* upb_fielddef ***************************************************************/
static void upb_fielddef_free(upb_fielddef *f) {
+ if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) {
+ upb_string_unref(upb_value_getstr(f->default_value));
+ } else if (upb_issubmsg(f)) {
+ upb_msg *m = upb_value_getmsg(f->default_value);
+ assert(m);
+ // We cheat a bit here. We need to unref msg, but we don't have a reliable
+ // way of accessing the msgdef (which is required by upb_msg_unref()),
+ // because f->def may have already been collected as part of a cycle if
+ // this is an unowned ref. But we know that default messages never contain
+ // references to other messages, and their only string references are to
+ // the singleton empty string, so we can safely unref+free msg directly.
+ if (upb_atomic_unref(&m->refcount)) free(m);
+ }
upb_string_unref(f->name);
if(f->owned) {
upb_def_unref(f->def);
@@ -606,6 +629,109 @@ static upb_flow_t upb_fielddef_startmsg(void *_b) {
return UPB_CONTINUE;
}
+// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
+// Returns true on success.
+static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) {
+ bool success = true;
+ if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
+ // We'll keep the ref we had on it. We include enums in this case because
+ // we need the enumdef to resolve the name, but we may not have it yet.
+ // We'll resolve it later.
+ if (dstr) {
+ upb_value_setstr(d, dstr);
+ } else {
+ upb_value_setstr(d, upb_emptystring());
+ }
+ } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
+ // We don't expect to get a default value.
+ upb_string_unref(dstr);
+ if (dstr != NULL) {
+ printf("Returning false because I got a default string for a message!\n");
+ success = false;
+ }
+ } else {
+ // The strto* functions need the string to be NULL-terminated.
+ char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
+ char *end;
+ upb_string_unref(dstr);
+ switch (type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32):
+ if (strz) {
+ long val = strtol(strz, &end, 0);
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setint32(d, val);
+ } else {
+ upb_value_setint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ if (strz) {
+ upb_value_setint64(d, strtoll(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ if (strz) {
+ long val = strtoul(strz, &end, 0);
+ if (val > UINT32_MAX || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setuint32(d, val);
+ } else {
+ upb_value_setuint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ if (strz) {
+ upb_value_setuint64(d, strtoull(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setuint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(DOUBLE):
+ if (strz) {
+ upb_value_setdouble(d, strtod(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setdouble(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(FLOAT):
+ if (strz) {
+ upb_value_setfloat(d, strtof(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setfloat(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(BOOL):
+ if (!strz || strcmp(strz, "false") == 0)
+ upb_value_setbool(d, false);
+ else if (strcmp(strz, "true") == 0)
+ upb_value_setbool(d, true);
+ else
+ success = false;
+ break;
+ }
+ if (!success) {
+ printf("Returning false on the int conversion path, was trying to convert: %s, type=%d\n", strz, type);
+ }
+ free(strz);
+ }
+ return success;
+}
+
static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_defbuilder *b = _b;
upb_fielddef *f = b->f;
@@ -619,6 +745,15 @@ static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent);
upb_strtable_insert(&m->ntof, &ntof_ent.e);
+
+ upb_string *dstr = b->default_string;
+ b->default_string = NULL;
+ if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) {
+ // We don't worry too much about giving a great error message since the
+ // compiler should have ensured this was correct.
+ upb_seterr(&b->status, UPB_ERROR, "Error converting default value.");
+ return UPB_BREAK;
+ }
return UPB_CONTINUE;
}
@@ -644,6 +779,12 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) {
b->f->owned = true;
break;
}
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_FIELDNUM:
+ // Have to convert from string to the correct type, but we might not know
+ // the type yet.
+ upb_string_unref(b->default_string);
+ b->default_string = upb_string_getref(upb_value_getstr(val));
+ break;
}
return UPB_CONTINUE;
}
@@ -683,6 +824,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_b) {
upb_atomic_refcount_init(&m->cycle_refcount, 0);
upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
+ m->default_message = NULL;
upb_deflist_push(&b->defs, UPB_UPCAST(m));
upb_defbuilder_startcontainer(b);
return UPB_CONTINUE;
@@ -703,7 +845,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
upb_field_count_t field = 0;
upb_msg_iter i;
for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- sorted_fields[field++]= upb_msg_iter_field(i);
+ sorted_fields[field++] = upb_msg_iter_field(i);
}
qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields);
@@ -745,6 +887,18 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
if (max_align > 0) m->size = upb_align_up(m->size, max_align);
+ // Create default message instance, an immutable message with all default
+ // values set (except submessages, which are simply marked as unset). We
+ // could alternatively leave all set bits unset, but this would make
+ // upb_msg_get() take its unexpected branch more often for no good reason.
+ m->default_message = upb_msg_new(m);
+ for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) {
+ upb_msg_set(m->default_message, f, f->default_value);
+ }
+ }
+
upb_defbuilder_endcontainer(b);
return UPB_CONTINUE;
}
@@ -802,6 +956,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b,
static void upb_msgdef_free(upb_msgdef *m)
{
+ upb_msg_unref(m->default_message, m);
upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
upb_fielddef_free(upb_msg_iter_field(i));
@@ -818,6 +973,10 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) {
// We will later make the ref unowned if it is a part of a cycle.
f->owned = true;
upb_def_ref(def);
+ if (upb_issubmsg(f)) {
+ upb_msgdef *md = upb_downcast_msgdef(def);
+ upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message));
+ }
}
upb_msg_iter upb_msg_begin(upb_msgdef *m) {
@@ -937,7 +1096,8 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
}
// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
-// resolves all of the unresolved refs for the defs in tmptab.
+// resolves all of the unresolved refs for the defs in tmptab. Also resolves
+// default values for enumerations and submessages.
bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
upb_status *status)
{
@@ -1352,7 +1512,7 @@ upb_def *upb_getdescriptordef(upb_string *str) {
// upb itself is corrupt.
abort();
}
- upb_def_unref(UPB_UPCAST(def)); // The symtab already holds a ref on it.
+ upb_msgdef_unref(def); // The symtab already holds a ref on it.
atexit(upb_free_descriptor_symtab);
}
return upb_symtab_resolve(
diff --git a/src/upb_def.h b/src/upb_def.h
index 121d5bc..3f79895 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -81,6 +81,9 @@ INLINE void upb_def_unref(upb_def *def) {
if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}
+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
+
/* upb_fielddef ***************************************************************/
// A upb_fielddef describes a single field in a message. It isn't a full def
@@ -158,6 +161,10 @@ typedef struct _upb_msgdef {
// Tables for looking up fields by number and name.
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
+
+ // Immutable msg instance that has all default values set.
+ // TODO: need a way of making this immutable!
+ struct _upb_msg *default_message;
} upb_msgdef;
// Hash table entries for looking up fields by name or number.
@@ -172,6 +179,13 @@ typedef struct {
upb_fielddef *f;
} upb_ntof_ent;
+INLINE void upb_msgdef_unref(upb_msgdef *md) {
+ upb_def_unref(UPB_UPCAST(md));
+}
+INLINE void upb_msgdef_ref(upb_msgdef *md) {
+ upb_def_ref(UPB_UPCAST(md));
+}
+
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
@@ -361,8 +375,6 @@ UPB_DOWNCAST_DEF(extdef, EXT);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF
-#define UPB_UPCAST(ptr) (&(ptr)->base)
-
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 9dfbea4..211004c 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -145,6 +145,22 @@ INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
msg->data[f->set_bit_offset] |= f->set_bit_mask;
}
+void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) {
+ assert(val.type == upb_field_valuetype(f));
+ upb_valueptr ptr = _upb_msg_getptr(msg, f);
+ if (upb_field_ismm(f)) {
+ // Unref any previous value we may have had there.
+ upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f));
+ upb_field_unref(oldval, f);
+
+ // Ref the new value.
+ upb_atomic_refcount_t *refcount = upb_value_getrefcount(val);
+ if (refcount) upb_atomic_ref(refcount);
+ }
+ upb_msg_sethas(msg, f);
+ return upb_value_write(ptr, val, upb_field_valuetype(f));
+}
+
static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p = _upb_msg_getptr(msg, f);
if (upb_isarray(f)) {
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 3246971..ff8489c 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -135,6 +135,7 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
#undef CASE
}
+
/* upb_array ******************************************************************/
typedef uint32_t upb_arraylen_t;
@@ -172,8 +173,17 @@ INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f,
return upb_value_read(_upb_array_getptr(arr, f, i), f->type);
}
+
/* upb_msg ********************************************************************/
+// upb_msg is not self-describing; the upb_msg does not contain a pointer to the
+// upb_msgdef. While this makes the API a bit more cumbersome to use, this
+// choice was made for a few important reasons:
+//
+// 1. it would make every message 8 bytes larger on 64-bit platforms. This is
+// a high overhead for small messages.
+// 2. you would want the msg to own a ref on its msgdef, but this would require
+// an atomic operation for every message create or destroy!
struct _upb_msg {
upb_atomic_refcount_t refcount;
uint8_t data[4]; // We allocate the appropriate amount per message.
@@ -194,6 +204,11 @@ upb_msg *upb_msg_new(upb_msgdef *md);
INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
}
+INLINE upb_msg *upb_msg_getref(upb_msg *msg) {
+ assert(msg);
+ upb_atomic_ref(&msg->refcount);
+ return msg;
+}
void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef);
@@ -203,10 +218,40 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0;
}
+// We have several options for handling default values:
+// 1. inside upb_msg_clear(), overwrite all values to be their defaults,
+// overwriting submessage pointers to point to the default instance again.
+// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value
+// if it is not set. upb_msg_clear() only clears the set bits.
+// We lazily clear objects if/when we reuse them.
+// 3. inside upb_msg_clear(), overwrite all values to be their default,
+// and recurse into submessages to set all their values to defaults also.
+// 4. as a hybrid of (1) and (3), make each "set bit" tri-state, where it
+// can have a value of "unset, but cached sub-message needs to be cleared."
+// Like (2) we can cache sub-messages and lazily clear, but primitive values
+// can always be returned straight from the message.
+//
+// (1) is undesirable, because it prevents us from caching sub-objects.
+// (2) makes clear() cheaper, but makes get() branchier.
+// (3) makes get() less branchy, but makes clear() have worse cache behavior.
+// (4) makes get() differently branchy (only returns default from msgdef if
+// NON-primitive value is unset), but uses more set bits. It's questionable
+// whether it would be a performance improvement.
+//
+// For the moment we go with (2). Google's protobuf does (3), which is likely
+// part of the reason we beat it in some benchmarks.
+
+// For submessages and strings, the returned value is not owned.
INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
- return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
+ if (upb_msg_has(msg, f)) {
+ return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
+ } else {
+ return f->default_value;
+ }
}
+void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val);
+
// Unsets all field values back to their defaults.
INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
memset(msg->data, 0, md->set_flags_bytes);
diff --git a/src/upb_string.c b/src/upb_string.c
index 81b152d..29ce7d4 100644
--- a/src/upb_string.c
+++ b/src/upb_string.c
@@ -147,4 +147,15 @@ error:
return NULL;
}
-void upb_string_noninlinerecycle(upb_string **_str) { return upb_string_recycle(_str); }
+upb_string *upb_emptystring() {
+ static upb_string empty = UPB_STATIC_STRING("");
+ return &empty;
+}
+
+char *upb_string_newcstr(upb_string *str) {
+ upb_strlen_t len = upb_string_len(str);
+ char *ret = malloc(len+1);
+ memcpy(ret, upb_string_getrobuf(str), len);
+ ret[len] = '\0';
+ return ret;
+}
diff --git a/src/upb_string.h b/src/upb_string.h
index 3799c5e..efafa44 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -134,6 +134,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) {
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
+INLINE bool upb_string_isempty(upb_string *str) {
+ return !str || upb_string_len(str) == 0;
+}
// Use to read the bytes of the string. The caller *must* call
// upb_string_endread() after the data has been read. The window between
@@ -273,6 +276,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str,
//#endif
#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
+// Returns a singleton empty string.
+upb_string *upb_emptystring();
+
+
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
@@ -339,6 +346,9 @@ INLINE upb_string *upb_strdupc(const char *src) {
return upb_strduplen(src, strlen(src));
}
+// Returns a newly-allocated NULL-terminated copy of str.
+char *upb_string_newcstr(upb_string *str);
+
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);
diff --git a/src/upbc.c b/src/upbc.c
index 428ec41..4fa8a71 100644
--- a/src/upbc.c
+++ b/src/upbc.c
@@ -12,7 +12,6 @@
#include <inttypes.h>
#include <stdarg.h>
#include <stdlib.h>
-#include "descriptor.h"
#include "upb_def.h"
#include "upb_msg.h"
#include "upb_glue.h"
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback