summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/descriptor.h26
-rw-r--r--src/upb_decoder.c126
-rw-r--r--src/upb_decoder_x64.asm4
-rw-r--r--src/upb_def.c166
-rw-r--r--src/upb_def.h16
-rw-r--r--src/upb_msg.c16
-rw-r--r--src/upb_msg.h47
-rw-r--r--src/upb_string.c13
-rw-r--r--src/upb_string.h10
-rw-r--r--src/upbc.c1
10 files changed, 282 insertions, 143 deletions
diff --git a/src/descriptor.h b/src/descriptor.h
deleted file mode 100644
index f6d3ca3..0000000
--- a/src/descriptor.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- * This file contains declarations for an array that contains the contents
- * of descriptor.proto, serialized as a protobuf. xxd is used to create
- * the actual definition.
- */
-
-#ifndef UPB_DESCRIPTOR_H_
-#define UPB_DESCRIPTOR_H_
-
-#include "upb_string.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern upb_string descriptor_str;
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_DESCRIPTOR_H_ */
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 8b10522..78fc8b1 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -1,10 +1,11 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
+ * Copyright (c) 2008-2011 Joshua Haberman. See LICENSE for details.
*/
#include "upb_decoder.h"
+#include "upb_varint_decoder.h"
#include <inttypes.h>
#include <stddef.h>
@@ -21,105 +22,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
upb_value_handler_t value_cb, void *closure,
void *table, int table_size);
-/* Pure Decoding **************************************************************/
-
-// The key fast-path varint-decoding routine. Here we can assume we have at
-// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of
-// possibilities for optimization/experimentation here.
-
-#ifdef USE_SSE_VARINT_DECODING
-#include <emmintrin.h>
-
-// This works, but is empirically slower than the branchy version below. Why?
-// Most varints are very short. Next step: use branches for 1/2-byte varints,
-// but use the SSE version for 3-10 byte varints.
-INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
- const char *p = *ptr;
- __m128i val128 = _mm_loadu_si128((void*)p);
- unsigned int continuation_bits = _mm_movemask_epi8(val128);
- unsigned int bsr_val = ~continuation_bits;
- int varint_length = __builtin_ffs(bsr_val);
- if (varint_length > 10) {
- upb_seterr(s, UPB_ERROR, "Unterminated varint");
- return false;
- }
-
- uint16_t twob;
- memcpy(&twob, p, 2);
- twob &= 0x7f7f;
- twob = ((twob & 0xff00) >> 1) | (twob & 0xff);
-
- uint64_t eightb;
- memcpy(&eightb, p + 2, 8);
- eightb &= 0x7f7f7f7f7f7f7f7f;
- eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff);
- eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff);
- eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff);
-
- uint64_t all_bits = twob | (eightb << 14);
- int varint_bits = varint_length * 7;
- uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1;
- *val = all_bits & mask;
- *ptr = p + varint_length;
- return true;
-}
-
-#else
-
-INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
- const char *p = *ptr;
- uint32_t low, high = 0;
- uint32_t b;
- b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 28;
- high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
-
- upb_seterr(s, UPB_ERROR, "Unterminated varint");
- return false;
-
-done:
- *val = ((uint64_t)high << 32) | low;
- *ptr = p;
- return true;
-}
-
-typedef struct {
- const char *newbuf;
- uint64_t val;
-} retval;
-
-retval upb_decode_varint_fast64(const char *p) {
- uint64_t ret;
- uint64_t b;
- retval r = {(void*)0, 0};
- b = *(p++); ret = (b & 0x7f) ; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done;
- return r;
-
-done:
- r.val = ret;
- r.newbuf = p;
- return r;
-}
-
-#endif
-
/* Decoding/Buffering of individual values ************************************/
@@ -233,11 +135,13 @@ done:
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
- uint64_t val64;
- const char *p = d->ptr;
- if (!upb_decode_varint_fast(&p, &val64, d->status)) return false;
- upb_decoder_advance(d, p - d->ptr);
- upb_value_setraw(val, val64);
+ upb_decoderet r = upb_decode_varint_fast(d->ptr);
+ if (r.p == NULL) {
+ upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
+ return false;
+ }
+ upb_value_setraw(val, r.val);
+ upb_decoder_advance(d, r.p - d->ptr);
return true;
} else {
return upb_decode_varint_slow(d, val);
@@ -352,11 +256,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
d->dispatcher.top->handlers.set->value,
d->dispatcher.top->handlers.closure,
d->top->msgdef->itof.array,
- d->top->msgdef->itof.array_size);
+ d->top->msgdef->itof.array_size,
+ d->tmp);
CHECK_FLOW(ret.flow);
+ if (ret.ptr - d->ptr > 0) {
+ DEBUGPRINTF("Fast path parsed %d bytes of data!\n", ret.ptr - d->ptr);
+ }
d->ptr = ret.ptr;
if (end - d->ptr < 12) {
- DEBUGPRINTF("Off the fast path because <12 bytes of data\n");
+ if (end == d->submsg_end && end != d->end) {
+ DEBUGPRINTF("Off the fast path because <12 bytes of data, but ONLY because of submsg end.\n");
+ } else {
+ DEBUGPRINTF("Off the fast path because <12 bytes of data, NOT because of submsg end.\n");
+ }
} else {
DEBUGPRINTF("Off the fast path for some other reason.\n");
}
diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm
index c59d131..032ea86 100644
--- a/src/upb_decoder_x64.asm
+++ b/src/upb_decoder_x64.asm
@@ -33,7 +33,7 @@ SECTION .text
; Register allocation.
%define BUF rbx ; const char *p, current buf position.
%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
-%define FREE r12 ; unused
+%define STRING r12 ; unused
%define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call.
%define CALLBACK r14
%define CLOSURE r15
@@ -143,6 +143,7 @@ _upb_fastdecode:
; Parse arguments into reg vals and stack.
mov BUF, rdi
+ mov COMMITTED_BUF_SPILL, rdi
mov END, rsi
mov CALLBACK, rdx
mov CLOSURE, rcx
@@ -210,7 +211,6 @@ align 16
align 16
.string:
-
.cant_fast_path:
mov rax, 0 ; UPB_CONTINUE -- continue as before.
.done:
diff --git a/src/upb_def.c b/src/upb_def.c
index 0382610..d77e29a 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -6,9 +6,11 @@
#include <stdlib.h>
#include <stddef.h>
+#include <errno.h>
#include "descriptor.c"
#include "descriptor_const.h"
#include "upb_def.h"
+#include "upb_msg.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
@@ -261,6 +263,8 @@ struct _upb_defbuilder {
bool saw_number;
bool saw_name;
+ upb_string *default_string;
+
upb_fielddef *f;
};
typedef struct _upb_defbuilder upb_defbuilder;
@@ -276,12 +280,18 @@ static void upb_defbuilder_init(upb_defbuilder *b) {
upb_status_init(&b->status);
b->stack_len = 0;
b->name = NULL;
+ b->default_string = NULL;
}
static void upb_defbuilder_uninit(upb_defbuilder *b) {
upb_string_unref(b->name);
upb_status_uninit(&b->status);
upb_deflist_uninit(&b->defs);
+ upb_string_unref(b->default_string);
+ while (b->stack_len > 0) {
+ upb_defbuilder_frame *f = &b->stack[--b->stack_len];
+ upb_string_unref(f->name);
+ }
}
static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) {
@@ -587,6 +597,19 @@ upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) {
/* upb_fielddef ***************************************************************/
static void upb_fielddef_free(upb_fielddef *f) {
+ if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) {
+ upb_string_unref(upb_value_getstr(f->default_value));
+ } else if (upb_issubmsg(f)) {
+ upb_msg *m = upb_value_getmsg(f->default_value);
+ assert(m);
+ // We cheat a bit here. We need to unref msg, but we don't have a reliable
+ // way of accessing the msgdef (which is required by upb_msg_unref()),
+ // because f->def may have already been collected as part of a cycle if
+ // this is an unowned ref. But we know that default messages never contain
+ // references to other messages, and their only string references are to
+ // the singleton empty string, so we can safely unref+free msg directly.
+ if (upb_atomic_unref(&m->refcount)) free(m);
+ }
upb_string_unref(f->name);
if(f->owned) {
upb_def_unref(f->def);
@@ -606,6 +629,109 @@ static upb_flow_t upb_fielddef_startmsg(void *_b) {
return UPB_CONTINUE;
}
+// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
+// Returns true on success.
+static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) {
+ bool success = true;
+ if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
+ // We'll keep the ref we had on it. We include enums in this case because
+ // we need the enumdef to resolve the name, but we may not have it yet.
+ // We'll resolve it later.
+ if (dstr) {
+ upb_value_setstr(d, dstr);
+ } else {
+ upb_value_setstr(d, upb_emptystring());
+ }
+ } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
+ // We don't expect to get a default value.
+ upb_string_unref(dstr);
+ if (dstr != NULL) {
+ printf("Returning false because I got a default string for a message!\n");
+ success = false;
+ }
+ } else {
+ // The strto* functions need the string to be NULL-terminated.
+ char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
+ char *end;
+ upb_string_unref(dstr);
+ switch (type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32):
+ if (strz) {
+ long val = strtol(strz, &end, 0);
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setint32(d, val);
+ } else {
+ upb_value_setint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ if (strz) {
+ upb_value_setint64(d, strtoll(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ if (strz) {
+ long val = strtoul(strz, &end, 0);
+ if (val > UINT32_MAX || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setuint32(d, val);
+ } else {
+ upb_value_setuint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ if (strz) {
+ upb_value_setuint64(d, strtoull(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setuint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(DOUBLE):
+ if (strz) {
+ upb_value_setdouble(d, strtod(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setdouble(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(FLOAT):
+ if (strz) {
+ upb_value_setfloat(d, strtof(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setfloat(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(BOOL):
+ if (!strz || strcmp(strz, "false") == 0)
+ upb_value_setbool(d, false);
+ else if (strcmp(strz, "true") == 0)
+ upb_value_setbool(d, true);
+ else
+ success = false;
+ break;
+ }
+ if (!success) {
+ printf("Returning false on the int conversion path, was trying to convert: %s, type=%d\n", strz, type);
+ }
+ free(strz);
+ }
+ return success;
+}
+
static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_defbuilder *b = _b;
upb_fielddef *f = b->f;
@@ -619,6 +745,15 @@ static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent);
upb_strtable_insert(&m->ntof, &ntof_ent.e);
+
+ upb_string *dstr = b->default_string;
+ b->default_string = NULL;
+ if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) {
+ // We don't worry too much about giving a great error message since the
+ // compiler should have ensured this was correct.
+ upb_seterr(&b->status, UPB_ERROR, "Error converting default value.");
+ return UPB_BREAK;
+ }
return UPB_CONTINUE;
}
@@ -644,6 +779,12 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) {
b->f->owned = true;
break;
}
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_FIELDNUM:
+ // Have to convert from string to the correct type, but we might not know
+ // the type yet.
+ upb_string_unref(b->default_string);
+ b->default_string = upb_string_getref(upb_value_getstr(val));
+ break;
}
return UPB_CONTINUE;
}
@@ -683,6 +824,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_b) {
upb_atomic_refcount_init(&m->cycle_refcount, 0);
upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
+ m->default_message = NULL;
upb_deflist_push(&b->defs, UPB_UPCAST(m));
upb_defbuilder_startcontainer(b);
return UPB_CONTINUE;
@@ -703,7 +845,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
upb_field_count_t field = 0;
upb_msg_iter i;
for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- sorted_fields[field++]= upb_msg_iter_field(i);
+ sorted_fields[field++] = upb_msg_iter_field(i);
}
qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields);
@@ -745,6 +887,18 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
if (max_align > 0) m->size = upb_align_up(m->size, max_align);
+ // Create default message instance, an immutable message with all default
+ // values set (except submessages, which are simply marked as unset). We
+ // could alternatively leave all set bits unset, but this would make
+ // upb_msg_get() take its unexpected branch more often for no good reason.
+ m->default_message = upb_msg_new(m);
+ for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) {
+ upb_msg_set(m->default_message, f, f->default_value);
+ }
+ }
+
upb_defbuilder_endcontainer(b);
return UPB_CONTINUE;
}
@@ -802,6 +956,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b,
static void upb_msgdef_free(upb_msgdef *m)
{
+ upb_msg_unref(m->default_message, m);
upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
upb_fielddef_free(upb_msg_iter_field(i));
@@ -818,6 +973,10 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) {
// We will later make the ref unowned if it is a part of a cycle.
f->owned = true;
upb_def_ref(def);
+ if (upb_issubmsg(f)) {
+ upb_msgdef *md = upb_downcast_msgdef(def);
+ upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message));
+ }
}
upb_msg_iter upb_msg_begin(upb_msgdef *m) {
@@ -937,7 +1096,8 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
}
// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
-// resolves all of the unresolved refs for the defs in tmptab.
+// resolves all of the unresolved refs for the defs in tmptab. Also resolves
+// default values for enumerations and submessages.
bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
upb_status *status)
{
@@ -1352,7 +1512,7 @@ upb_def *upb_getdescriptordef(upb_string *str) {
// upb itself is corrupt.
abort();
}
- upb_def_unref(UPB_UPCAST(def)); // The symtab already holds a ref on it.
+ upb_msgdef_unref(def); // The symtab already holds a ref on it.
atexit(upb_free_descriptor_symtab);
}
return upb_symtab_resolve(
diff --git a/src/upb_def.h b/src/upb_def.h
index 121d5bc..3f79895 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -81,6 +81,9 @@ INLINE void upb_def_unref(upb_def *def) {
if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}
+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
+
/* upb_fielddef ***************************************************************/
// A upb_fielddef describes a single field in a message. It isn't a full def
@@ -158,6 +161,10 @@ typedef struct _upb_msgdef {
// Tables for looking up fields by number and name.
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
+
+ // Immutable msg instance that has all default values set.
+ // TODO: need a way of making this immutable!
+ struct _upb_msg *default_message;
} upb_msgdef;
// Hash table entries for looking up fields by name or number.
@@ -172,6 +179,13 @@ typedef struct {
upb_fielddef *f;
} upb_ntof_ent;
+INLINE void upb_msgdef_unref(upb_msgdef *md) {
+ upb_def_unref(UPB_UPCAST(md));
+}
+INLINE void upb_msgdef_ref(upb_msgdef *md) {
+ upb_def_ref(UPB_UPCAST(md));
+}
+
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
@@ -361,8 +375,6 @@ UPB_DOWNCAST_DEF(extdef, EXT);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF
-#define UPB_UPCAST(ptr) (&(ptr)->base)
-
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 9dfbea4..211004c 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -145,6 +145,22 @@ INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
msg->data[f->set_bit_offset] |= f->set_bit_mask;
}
+void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) {
+ assert(val.type == upb_field_valuetype(f));
+ upb_valueptr ptr = _upb_msg_getptr(msg, f);
+ if (upb_field_ismm(f)) {
+ // Unref any previous value we may have had there.
+ upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f));
+ upb_field_unref(oldval, f);
+
+ // Ref the new value.
+ upb_atomic_refcount_t *refcount = upb_value_getrefcount(val);
+ if (refcount) upb_atomic_ref(refcount);
+ }
+ upb_msg_sethas(msg, f);
+ return upb_value_write(ptr, val, upb_field_valuetype(f));
+}
+
static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p = _upb_msg_getptr(msg, f);
if (upb_isarray(f)) {
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 3246971..ff8489c 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -135,6 +135,7 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
#undef CASE
}
+
/* upb_array ******************************************************************/
typedef uint32_t upb_arraylen_t;
@@ -172,8 +173,17 @@ INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f,
return upb_value_read(_upb_array_getptr(arr, f, i), f->type);
}
+
/* upb_msg ********************************************************************/
+// upb_msg is not self-describing; the upb_msg does not contain a pointer to the
+// upb_msgdef. While this makes the API a bit more cumbersome to use, this
+// choice was made for a few important reasons:
+//
+// 1. it would make every message 8 bytes larger on 64-bit platforms. This is
+// a high overhead for small messages.
+// 2. you would want the msg to own a ref on its msgdef, but this would require
+// an atomic operation for every message create or destroy!
struct _upb_msg {
upb_atomic_refcount_t refcount;
uint8_t data[4]; // We allocate the appropriate amount per message.
@@ -194,6 +204,11 @@ upb_msg *upb_msg_new(upb_msgdef *md);
INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
}
+INLINE upb_msg *upb_msg_getref(upb_msg *msg) {
+ assert(msg);
+ upb_atomic_ref(&msg->refcount);
+ return msg;
+}
void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef);
@@ -203,10 +218,40 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0;
}
+// We have several options for handling default values:
+// 1. inside upb_msg_clear(), overwrite all values to be their defaults,
+// overwriting submessage pointers to point to the default instance again.
+// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value
+// if it is not set. upb_msg_clear() only clears the set bits.
+// We lazily clear objects if/when we reuse them.
+// 3. inside upb_msg_clear(), overwrite all values to be their default,
+// and recurse into submessages to set all their values to defaults also.
+// 4. as a hybrid of (1) and (3), make each "set bit" tri-state, where it
+// can have a value of "unset, but cached sub-message needs to be cleared."
+// Like (2) we can cache sub-messages and lazily clear, but primitive values
+// can always be returned straight from the message.
+//
+// (1) is undesirable, because it prevents us from caching sub-objects.
+// (2) makes clear() cheaper, but makes get() branchier.
+// (3) makes get() less branchy, but makes clear() have worse cache behavior.
+// (4) makes get() differently branchy (only returns default from msgdef if
+// NON-primitive value is unset), but uses more set bits. It's questionable
+// whether it would be a performance improvement.
+//
+// For the moment we go with (2). Google's protobuf does (3), which is likely
+// part of the reason we beat it in some benchmarks.
+
+// For submessages and strings, the returned value is not owned.
INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
- return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
+ if (upb_msg_has(msg, f)) {
+ return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
+ } else {
+ return f->default_value;
+ }
}
+void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val);
+
// Unsets all field values back to their defaults.
INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
memset(msg->data, 0, md->set_flags_bytes);
diff --git a/src/upb_string.c b/src/upb_string.c
index 81b152d..29ce7d4 100644
--- a/src/upb_string.c
+++ b/src/upb_string.c
@@ -147,4 +147,15 @@ error:
return NULL;
}
-void upb_string_noninlinerecycle(upb_string **_str) { return upb_string_recycle(_str); }
+upb_string *upb_emptystring() {
+ static upb_string empty = UPB_STATIC_STRING("");
+ return &empty;
+}
+
+char *upb_string_newcstr(upb_string *str) {
+ upb_strlen_t len = upb_string_len(str);
+ char *ret = malloc(len+1);
+ memcpy(ret, upb_string_getrobuf(str), len);
+ ret[len] = '\0';
+ return ret;
+}
diff --git a/src/upb_string.h b/src/upb_string.h
index 3799c5e..efafa44 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -134,6 +134,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) {
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
+INLINE bool upb_string_isempty(upb_string *str) {
+ return !str || upb_string_len(str) == 0;
+}
// Use to read the bytes of the string. The caller *must* call
// upb_string_endread() after the data has been read. The window between
@@ -273,6 +276,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str,
//#endif
#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
+// Returns a singleton empty string.
+upb_string *upb_emptystring();
+
+
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
@@ -339,6 +346,9 @@ INLINE upb_string *upb_strdupc(const char *src) {
return upb_strduplen(src, strlen(src));
}
+// Returns a newly-allocated NULL-terminated copy of str.
+char *upb_string_newcstr(upb_string *str);
+
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);
diff --git a/src/upbc.c b/src/upbc.c
index 428ec41..4fa8a71 100644
--- a/src/upbc.c
+++ b/src/upbc.c
@@ -12,7 +12,6 @@
#include <inttypes.h>
#include <stdarg.h>
#include <stdlib.h>
-#include "descriptor.h"
#include "upb_def.h"
#include "upb_msg.h"
#include "upb_glue.h"
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback