summaryrefslogtreecommitdiff
path: root/upb
diff options
context:
space:
mode:
authorJoshua Haberman <jhaberman@gmail.com>2011-08-31 20:03:59 -0700
committerJoshua Haberman <jhaberman@gmail.com>2011-08-31 20:03:59 -0700
commit4a8b9be46c9485a35383b52d400ba086d3f40ace (patch)
treec04b977e5db364a1c4dc53bca78ab2b1f860c5c7 /upb
parentd1d126acf453608fd797ceaec73d73fa25517166 (diff)
Header cleanup, clarify/correct comments for interfaces.
Diffstat (limited to 'upb')
-rw-r--r--upb/atomic.h2
-rw-r--r--upb/bytestream.h68
-rw-r--r--upb/def.c9
-rw-r--r--upb/def.h63
-rw-r--r--upb/descriptor.c6
-rw-r--r--upb/descriptor.h8
-rw-r--r--upb/pb/decoder.c9
-rw-r--r--upb/pb/decoder.h28
-rw-r--r--upb/pb/glue.c7
-rw-r--r--upb/table.h8
10 files changed, 114 insertions, 94 deletions
diff --git a/upb/atomic.h b/upb/atomic.h
index 87a0141..e82ad8f 100644
--- a/upb/atomic.h
+++ b/upb/atomic.h
@@ -7,7 +7,7 @@
* Only a very small part of upb is thread-safe. Notably, individual
* messages, arrays, and strings are *not* thread safe for mutating.
* However, we do make message *metadata* such as upb_msgdef and
- * upb_context thread-safe, and their ownership is tracked via atomic
+ * upb_symtab thread-safe, and their ownership is tracked via atomic
* refcounting. This header implements the small number of atomic
* primitives required to support this. The primitives we implement
* are:
diff --git a/upb/bytestream.h b/upb/bytestream.h
index 83976a3..0a744f6 100644
--- a/upb/bytestream.h
+++ b/upb/bytestream.h
@@ -10,6 +10,7 @@
* can get the data from a fd, a string, a cord, etc.
*
* Byte streams are NOT thread-safe! (Like f{read,write}_unlocked())
+ * This may change (in particular, bytesrc objects may be better thread-safe).
*/
#ifndef UPB_BYTESTREAM_H
@@ -28,7 +29,12 @@ extern "C" {
/* upb_bytesrc ****************************************************************/
// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as
-// they become available, and to preserve some trailing amount of data.
+// they become available, and to preserve some trailing amount of data, which
+// is useful for lazy parsing (among other things). If there is a submessage
+// that we want to parse later we can take a reference on that region of the
+// input buffer. This will guarantee that the bytesrc keeps the submessage
+// data around for later use, without requiring a copy out of the input
+// buffers.
typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*);
typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*);
@@ -53,10 +59,15 @@ INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
}
// Fetches at least one byte starting at ofs, returning the actual number of
-// bytes fetched (or 0 on error: see "s" for details). Gives caller a ref on
-// the fetched region. It is safe to re-fetch existing regions but only if
-// they are ref'd. "ofs" may not greater than the end of the region that was
-// previously fetched.
+// bytes fetched (or 0 on error: see "s" for details). A successful return
+// gives caller a ref on the fetched region.
+//
+// If "ofs" may be greater or equal than the end of the already-fetched region.
+// It may also be less than the end of the already-fetch region *if* either of
+// the following is true:
+//
+// * the region is ref'd (this implies that the data is still in-memory)
+// * the bytesrc is seekable (this implies that the data can be fetched again).
INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) {
return src->vtbl->fetch(src, ofs, s);
}
@@ -68,20 +79,21 @@ INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len,
src->vtbl->read(src, src_ofs, len, dst);
}
-// Returns a pointer to the bytesrc's internal buffer, returning how much data
-// was actually returned (which may be less than "len" if the given region is
-// not contiguous). The caller must own refs on the entire region from [ofs,
-// ofs+len]. The returned buffer is valid for as long as the region remains
-// ref'd.
+// Returns a pointer to the bytesrc's internal buffer, storing in *len how much
+// data is available. The caller must own refs on the given region. The
+// returned buffer is valid for as long as the region remains ref'd.
//
-// TODO: is "len" really required here?
+// TODO: if more data is available than the caller has ref'd is it ok for the
+// caller to read *len bytes?
INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs,
size_t *len) {
return src->vtbl->getptr(src, ofs, len);
}
// Gives the caller a ref on the given region. The caller must know that the
-// given region is already ref'd.
+// given region is already ref'd (for example, inside a upb_handlers callback
+// that receives a upb_strref, the region is guaranteed to be ref'd -- this
+// function allows that handler to take its own ref).
INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
src->vtbl->refregion(src, ofs, len);
}
@@ -110,16 +122,17 @@ INLINE void upb_bytesrc_unref(upb_bytesrc *src) {
src->vtbl->unref(src);
}
+
/* upb_strref *****************************************************************/
-// The structure we pass for a string.
+// The structure we pass to upb_handlers for a string value.
typedef struct _upb_strref {
// Pointer to the string data. NULL if the string spans multiple input
// buffers (in which case upb_bytesrc_getptr() must be called to obtain
// the actual pointers).
const char *ptr;
- // Length of the string.
+ // Total length of the string.
uint32_t len;
// Offset in the bytesrc that represents the beginning of this string.
@@ -139,12 +152,21 @@ typedef struct _upb_strref {
char *upb_strref_dup(struct _upb_strref *r);
INLINE void upb_strref_read(struct _upb_strref *r, char *buf) {
- upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, buf);
+ if (r->ptr) {
+ memcpy(buf, r->ptr, r->len);
+ } else {
+ assert(r->bytesrc);
+ upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, buf);
+ }
}
/* upb_bytesink ***************************************************************/
+// A bytesink is an interface that allows the caller to push byte-wise data.
+// It is very simple -- the only special capability is the ability to "rewind"
+// the stream, which is really only a mechanism of having the bytesink ignore
+// some subsequent calls.
typedef int upb_bytesink_write_func(void*, const void*, int);
typedef int upb_bytesink_vprintf_func(void*, const char *fmt, va_list args);
@@ -194,6 +216,22 @@ INLINE uint64_t upb_bytesink_getoffset(upb_bytesink *sink) {
return sink->offset;
}
+// Rewinds the stream to the given offset. This cannot actually "unput" any
+// data, it is for situations like:
+//
+// // If false is returned (because of error), call again later to resume.
+// bool write_some_data(upb_bytesink *sink, int indent) {
+// uint64_t start_offset = upb_bytesink_getoffset(sink);
+// if (upb_bytesink_writestr(sink, "Some data") < 0) goto err;
+// if (upb_bytesink_putrepeated(sink, ' ', indent) < 0) goto err;
+// return true;
+// err:
+// upb_bytesink_rewind(sink, start_offset);
+// return false;
+// }
+//
+// The subsequent bytesink writes *must* be identical to the writes that were
+// rewinded past.
INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) {
// TODO
(void)sink;
diff --git a/upb/def.c b/upb/def.c
index f83f5eb..ee793a9 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -523,6 +523,13 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
/* upb_symtab *****************************************************************/
+struct _upb_symtab {
+ upb_atomic_t refcount;
+ upb_rwlock_t lock; // Protects all members except the refcount.
+ upb_strtable symtab; // The symbol table.
+ upb_deflist olddefs;
+};
+
typedef struct {
upb_def *def;
} upb_symtab_ent;
@@ -568,6 +575,8 @@ static void upb_symtab_free(upb_symtab *s) {
free(s);
}
+void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
+
void upb_symtab_unref(upb_symtab *s) {
if(s && upb_atomic_unref(&s->refcount)) {
upb_symtab_free(s);
diff --git a/upb/def.h b/upb/def.h
index 86f84ce..4dc9c16 100644
--- a/upb/def.h
+++ b/upb/def.h
@@ -123,12 +123,12 @@ INLINE const char *upb_fielddef_typename(upb_fielddef *f) {
return f->def ? f->def->fqname : NULL;
}
-// Only meaningful once the def is in a symtab (returns NULL otherwise, or for
-// a fielddef where !upb_hassubdef(f)).
+// The enum or submessage def for this field, if any. Only meaningful for
+// submessage, group, and enum fields (ie. when upb_hassubdef(f) is true).
+// Since defs are not linked together until they are in a symtab, this
+// will return NULL until the msgdef is in a symtab.
upb_def *upb_fielddef_subdef(upb_fielddef *f);
-// NULL until the fielddef has been added to a msgdef.
-
// Write accessors. "Number" and "name" must be set before the fielddef is
// added to a msgdef. For the moment we do not allow these to be set once
// the fielddef is added to a msgdef -- this could be relaxed in the future.
@@ -214,11 +214,12 @@ void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end);
-// Adds a fielddef to a msgdef. Caller retains its ref on the fielddef.
-// May only be done before the msgdef is in a symtab. The fielddef's name and
-// number must be set, and the message may not already contain any field with
-// this name or number, and this fielddef may not be part of another message,
-// otherwise false is returned and no action is performed.
+// Adds a fielddef to a msgdef. Caller retains its ref on the fielddef. May
+// only be done before the msgdef is in a symtab (requires upb_def_ismutable(m)
+// for the msgdef). The fielddef's name and number must be set, and the
+// message may not already contain any field with this name or number, and this
+// fielddef may not be part of another message, otherwise false is returned and
+// no action is performed.
bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef **f, int n);
INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
return upb_msgdef_addfields(m, &f, 1);
@@ -252,6 +253,7 @@ INLINE int upb_msgdef_numfields(upb_msgdef *m) {
}
// Iteration over fields. The order is undefined.
+// TODO: the iteration should be in field order.
// Iterators are invalidated when a field is added or removed.
// upb_msg_iter i;
// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
@@ -296,7 +298,7 @@ upb_enumdef *upb_enumdef_dup(upb_enumdef *e);
INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
-// May only be set before the enumdef is in a symtab.
+// May only be set if upb_def_ismutable(e).
void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
// Adds a value to the enumdef. Requires that no existing val has this
@@ -334,30 +336,12 @@ INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
/* upb_symtab *****************************************************************/
-// A SymbolTable is where upb_defs live. It is empty when first constructed.
-// Clients add definitions to the symtab (or replace existing definitions) by
-// using a upb_symtab_commit() or calling upb_symtab_add().
-
-// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
-typedef struct {
- upb_def **defs;
- uint32_t len;
- uint32_t size;
-} upb_deflist;
-
-void upb_deflist_init(upb_deflist *l);
-void upb_deflist_uninit(upb_deflist *l);
-void upb_deflist_push(upb_deflist *l, upb_def *d);
-
-struct _upb_symtab {
- upb_atomic_t refcount;
- upb_rwlock_t lock; // Protects all members except the refcount.
- upb_strtable symtab; // The symbol table.
- upb_deflist olddefs;
-};
+// A symtab (symbol table) is where upb_defs live. It is empty when first
+// constructed. Clients add definitions to the symtab (or replace existing
+// definitions) by calling upb_symtab_add().
upb_symtab *upb_symtab_new(void);
-INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
+void upb_symtab_ref(upb_symtab *s);
void upb_symtab_unref(upb_symtab *s);
// Resolves the given symbol using the rules described in descriptor.proto,
@@ -427,6 +411,21 @@ UPB_DOWNCAST_DEF(svcdef, SERVICE);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF
+
+/* upb_deflist ****************************************************************/
+
+// upb_deflist is an internal-only dynamic array for storing a growing list of
+// upb_defs.
+typedef struct {
+ upb_def **defs;
+ uint32_t len;
+ uint32_t size;
+} upb_deflist;
+
+void upb_deflist_init(upb_deflist *l);
+void upb_deflist_uninit(upb_deflist *l);
+void upb_deflist_push(upb_deflist *l, upb_def *d);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/upb/descriptor.c b/upb/descriptor.c
index 90ba4f7..26cfef5 100644
--- a/upb/descriptor.c
+++ b/upb/descriptor.c
@@ -29,10 +29,6 @@ static char *upb_join(char *base, char *name) {
/* upb_descreader ************************************************************/
-// A upb_descreader builds a list of defs by handling a parse of a protobuf in
-// the format defined in descriptor.proto. The output of a upb_descreader is
-// a upb_symtabtxn.
-
static upb_def *upb_deflist_last(upb_deflist *l) {
return l->defs[l->len-1];
}
@@ -480,8 +476,6 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) {
upb_status_seterrliteral(status, "Encountered message with no name.");
return;
}
-
- upb_msgdef_layout(m);
upb_descreader_endcontainer(r);
}
diff --git a/upb/descriptor.h b/upb/descriptor.h
index 2c279a5..21099b3 100644
--- a/upb/descriptor.h
+++ b/upb/descriptor.h
@@ -21,8 +21,6 @@ extern "C" {
/* upb_descreader ************************************************************/
-// upb_descreader reads a descriptor and puts defs in a upb_symtabtxn.
-
// We keep a stack of all the messages scopes we are currently in, as well as
// the top-level file scope. This is necessary to correctly qualify the
// definitions that are contained inside. "name" tracks the name of the
@@ -50,13 +48,11 @@ typedef struct {
upb_fielddef *f;
} upb_descreader;
-// Creates a new descriptor builder that will add defs to the given txn.
void upb_descreader_init(upb_descreader *r);
void upb_descreader_uninit(upb_descreader *r);
-// Registers handlers that will load descriptor data into a symtabtxn.
-// Pass the descreader as the closure. The messages will have
-// upb_msgdef_layout() called on them before adding to the txn.
+// Registers handlers that will build the defs. Pass the descreader as the
+// closure.
upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);
// Gets the array of defs that have been parsed and removes them from the
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index fd04efc..ff39685 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -400,7 +400,7 @@ static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
#endif
}
-void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
+void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(
&d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
#ifdef UPB_USE_JIT_X64
@@ -423,13 +423,6 @@ void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
}
}
-void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
- upb_handlers *h = upb_handlers_new();
- upb_accessors_reghandlers(h, m);
- upb_decoder_initforhandlers(d, h);
- upb_handlers_unref(h);
-}
-
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
uint64_t end_ofs, void *closure) {
upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index 921697a..2232c52 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -5,13 +5,8 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*
* upb_decoder implements a high performance, streaming decoder for protobuf
- * data that works by implementing upb_src and getting its data from a
- * upb_bytesrc.
- *
- * The decoder does not currently support non-blocking I/O, in the sense that
- * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
- * decoder when data becomes available again. Support for this could be added,
- * but it would add complexity and perhaps cost efficiency also.
+ * data that works by getting its input data from a upb_bytesrc and calling
+ * into a upb_handlers.
*/
#ifndef UPB_DECODER_H_
@@ -36,18 +31,18 @@ typedef struct _upb_decoder {
upb_status *status; // Where we will store any errors that occur.
upb_strref strref; // For passing string data to callbacks.
- // Offsets for the region we currently have ref'd.
+ // Offsets for the bytesrc region we currently have ref'd.
uint64_t refstart_ofs, refend_ofs;
- // Current buffer and its stream offset.
+ // Current input buffer and its stream offset.
const char *buf, *ptr, *end;
uint64_t bufstart_ofs, bufend_ofs;
// Stream offset for the end of the top-level message, if any.
uint64_t end_ofs;
- // Buf offset as of which we've delivered calbacks; needed for rollback on
- // UPB_TRYAGAIN (or in the future, UPB_SUSPEND).
+ // Buf offset as of which we've delivered calbacks; needed for rollback if
+ // a callback returns UPB_BREAK.
const char *completed_ptr;
// End of the delimited region, relative to ptr, or NULL if not in this buf.
@@ -66,6 +61,7 @@ typedef struct _upb_decoder {
struct dasm_State *dynasm;
#endif
+ // For exiting the decoder on error.
sigjmp_buf exitjmp;
} upb_decoder;
@@ -75,14 +71,8 @@ typedef struct _upb_decoder {
// Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref
-// on the handlers or msgdef.
-void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *h);
-
-// Equivalent to:
-// upb_accessors_reghandlers(m, h);
-// upb_decoder_initforhandlers(d, h);
-// except possibly more efficient, by using cached state in the msgdef.
-void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m);
+// on the handlers.
+void upb_decoder_init(upb_decoder *d, upb_handlers *h);
void upb_decoder_uninit(upb_decoder *d);
// Resets the internal state of an already-allocated decoder. This puts it in a
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 8034c54..6981aa2 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -19,7 +19,10 @@ void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md,
upb_stringsrc_reset(&strsrc, str, len);
upb_decoder d;
- upb_decoder_initformsgdef(&d, md);
+ upb_handlers *h = upb_handlers_new();
+ upb_accessors_reghandlers(h, md);
+ upb_decoder_init(&d, h);
+ upb_handlers_unref(h);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
upb_decoder_decode(&d, status);
@@ -63,7 +66,7 @@ upb_def **upb_load_descriptor(const char *str, size_t len, int *n,
upb_descreader_reghandlers(h);
upb_decoder d;
- upb_decoder_initforhandlers(&d, h);
+ upb_decoder_init(&d, h);
upb_handlers_unref(h);
upb_descreader r;
upb_descreader_init(&r);
diff --git a/upb/table.h b/upb/table.h
index bd9a5d4..4a44a6f 100644
--- a/upb/table.h
+++ b/upb/table.h
@@ -11,6 +11,9 @@
* The table uses internal chaining with Brent's variation (inspired by the
* Lua implementation of hash tables). The hash function for strings is
* Austin Appleby's "MurmurHash."
+ *
+ * This header is internal to upb; its interface should not be considered
+ * public or stable.
*/
#ifndef UPB_TABLE_H_
@@ -128,21 +131,16 @@ INLINE void *_upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
upb_inttable_value *arrval =
(upb_inttable_value*)UPB_INDEX(t->array, key, value_size);
if (_upb_inttable_isarrkey(t, key)) {
- //DEBUGPRINTF("array lookup for key %d, &val=%p, has_entry=%d\n", key, val, val->has_entry);
return (arrval->has_entry) ? arrval : NULL;
}
uint32_t bucket = _upb_inttable_bucket(t, key);
upb_inttable_entry *e =
(upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
- //DEBUGPRINTF("looking in first bucket %d, entry size=%zd, addr=%p\n", bucket, entry_size, e);
while (1) {
- //DEBUGPRINTF("%d, %d, %d\n", e->val.has_entry, e->hdr.key, key);
if (e->hdr.key == key) {
- //DEBUGPRINTF("returning val from hash part\n");
return &e->val;
}
if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL;
- //DEBUGPRINTF("looking in bucket %d\n", bucket);
e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
}
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback