From 28ec9a1fa0f9b1d741920dfa8afc91fa2532c43d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:20:33 -0700 Subject: Split src/ into core/ and stream/. --- core/upb_def.h | 302 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 core/upb_def.h (limited to 'core/upb_def.h') diff --git a/core/upb_def.h b/core/upb_def.h new file mode 100644 index 0000000..c297e83 --- /dev/null +++ b/core/upb_def.h @@ -0,0 +1,302 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * Provides definitions of .proto constructs: + * - upb_msgdef: describes a "message" construct. + * - upb_fielddef: describes a message field. + * - upb_enumdef: describes an enum. + * (TODO: definitions of extensions and services). + * + * Defs are obtained from a upb_symtab object. A upb_symtab is empty when + * constructed, and definitions can be added by supplying serialized + * descriptors. + * + * Defs are immutable and reference-counted. Symbol tables reference any defs + * that are the "current" definitions. If an extension is loaded that adds a + * field to an existing message, a new msgdef is constructed that includes the + * new field and the old msgdef is unref'd. The old msgdef will still be ref'd + * by messages (if any) that were constructed with that msgdef. + * + * This file contains routines for creating and manipulating the definitions + * themselves. To create and manipulate actual messages, see upb_msg.h. + */ + +#ifndef UPB_DEF_H_ +#define UPB_DEF_H_ + +#include "upb_atomic.h" +#include "upb_stream.h" +#include "upb_table.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_def: base class for defs **********************************************/ + +// All the different kind of defs we support. These correspond 1:1 with +// declarations in a .proto file. +typedef enum { + UPB_DEF_MSG = 0, + UPB_DEF_ENUM, + UPB_DEF_SVC, + UPB_DEF_EXT, + // Internal-only, placeholder for a def that hasn't be resolved yet. + UPB_DEF_UNRESOLVED, + + // For specifying that defs of any type are requsted from getdefs. + UPB_DEF_ANY = -1 +} upb_def_type; + +// This typedef is more space-efficient than declaring an enum var directly. +typedef int8_t upb_def_type_t; + +typedef struct { + upb_string *fqname; // Fully qualified. + upb_atomic_refcount_t refcount; + upb_def_type_t type; + + // The is_cyclic flag could go in upb_msgdef instead of here, because only + // messages can be involved in cycles. However, putting them here is free + // from a space perspective because structure alignment will otherwise leave + // three bytes empty after type. It is also makes ref and unref more + // efficient, because we don't have to downcast to msgdef before checking the + // is_cyclic flag. + bool is_cyclic; + uint16_t search_depth; // Used during initialization dfs. +} upb_def; + +// These must not be called directly! +void _upb_def_cyclic_ref(upb_def *def); +void _upb_def_reftozero(upb_def *def); + +// Call to ref/deref a def. +INLINE void upb_def_ref(upb_def *def) { + if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); +} +INLINE void upb_def_unref(upb_def *def) { + if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); +} + +/* upb_fielddef ***************************************************************/ + +// A upb_fielddef describes a single field in a message. It isn't a full def +// in the sense that it derives from upb_def. It cannot stand on its own; it +// is either a field of a upb_msgdef or contained inside a upb_extensiondef. +// It is also reference-counted. +typedef struct _upb_fielddef { + upb_atomic_refcount_t refcount; + upb_string *name; + upb_field_number_t number; + upb_field_type_t type; + upb_label_t label; + upb_value default_value; + + // For the case of an enum or a submessage, points to the def for that type. + upb_def *def; + + // True if we own a ref on "def" (above). This is true unless this edge is + // part of a cycle. + bool owned; + + // These are set only when this fielddef is part of a msgdef. + uint32_t byte_offset; // Where in a upb_msg to find the data. + upb_field_count_t field_index; // Indicates set bit. +} upb_fielddef; + +// A variety of tests about the type of a field. +INLINE bool upb_issubmsg(upb_fielddef *f) { + return upb_issubmsgtype(f->type); +} +INLINE bool upb_isstring(upb_fielddef *f) { + return upb_isstringtype(f->type); +} +INLINE bool upb_isarray(upb_fielddef *f) { + return f->label == UPB_LABEL(REPEATED); +} +// Does the type of this field imply that it should contain an associated def? +INLINE bool upb_hasdef(upb_fielddef *f) { + return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); +} + +INLINE bool upb_field_ismm(upb_fielddef *f) { + return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); +} + +INLINE bool upb_elem_ismm(upb_fielddef *f) { + return upb_isstring(f) || upb_issubmsg(f); +} + +/* upb_msgdef *****************************************************************/ + +// Structure that describes a single .proto message type. +typedef struct _upb_msgdef { + upb_def base; + upb_atomic_refcount_t cycle_refcount; + size_t size; + upb_field_count_t num_fields; + uint32_t set_flags_bytes; + uint32_t num_required_fields; // Required fields have the lowest set bytemasks. + upb_fielddef *fields; // We have exclusive ownership of these. + + // Tables for looking up fields by number and name. + upb_inttable itof; // int to field + upb_strtable ntof; // name to field +} upb_msgdef; + +// Hash table entries for looking up fields by name or number. +typedef struct { + upb_inttable_entry e; + upb_fielddef *f; +} upb_itof_ent; +typedef struct { + upb_strtable_entry e; + upb_fielddef *f; +} upb_ntof_ent; + +// Looks up a field by name or number. While these are written to be as fast +// as possible, it will still be faster to cache the results of this lookup if +// possible. These return NULL if no such field is found. +INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) { + upb_itof_ent *e = + (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e)); + return e ? e->f : NULL; +} + +INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { + upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); + return e ? e->f : NULL; +} + +/* upb_enumdef ****************************************************************/ + +typedef struct _upb_enumdef { + upb_def base; + upb_strtable ntoi; + upb_inttable iton; +} upb_enumdef; + +typedef int32_t upb_enumval_t; + +// Lookups from name to integer and vice-versa. +bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); +upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); + +// Iteration over name/value pairs. The order is undefined. +// upb_enum_iter i; +// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// // ... +// } +typedef struct { + upb_enumdef *e; + void *state; // Internal iteration state. + upb_string *name; + upb_enumval_t val; +} upb_enum_iter; +void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); +void upb_enum_next(upb_enum_iter *iter); +bool upb_enum_done(upb_enum_iter *iter); + +/* upb_symtab *****************************************************************/ + +// A SymbolTable is where upb_defs live. It is empty when first constructed. +// Clients add definitions to the symtab by supplying unserialized or +// serialized descriptors (as defined in descriptor.proto). +typedef struct { + upb_atomic_refcount_t refcount; + upb_rwlock_t lock; // Protects all members except the refcount. + upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. + + // Our symbol tables; we own refs to the defs therein. + upb_strtable symtab; // The main symbol table. + upb_strtable psymtab; // Private symbols, for internal use. +} upb_symtab; + +// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd +// when the caller is done with them. +upb_symtab *upb_symtab_new(void); +void _upb_symtab_free(upb_symtab *s); // Must not be called directly! + +INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); } +INLINE void upb_symtab_unref(upb_symtab *s) { + if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); +} + +// Resolves the given symbol using the rules described in descriptor.proto, +// namely: +// +// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like +// scoping rules are used to find the type (i.e. first the nested types +// within this message are searched, then within the parent, on up to the +// root namespace). +// +// If a def is found, the caller owns one ref on the returned def. Otherwise +// returns NULL. +upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); + +// Find an entry in the symbol table with this exact name. If a def is found, +// the caller owns one ref on the returned def. Otherwise returns NULL. +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); + +// Gets an array of pointers to all currently active defs in this symtab. The +// caller owns the returned array (which is of length *count) as well as a ref +// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are +// returned, otherwise only defs of the required type are returned. +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); + +// "fds" is a upb_src that will yield data from the +// google.protobuf.FileDescriptorSet message type. upb_symtab_addfds() adds +// all the definitions from the given FileDescriptorSet and adds them to the +// symtab. status indicates whether the operation was successful or not, and +// the error message (if any). +// +// TODO: should this allow redefinition? Either is possible, but which is +// more useful? Maybe it should be an option. +void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); + +// Returns a symtab that defines google.protobuf.DescriptorProto and all other +// types that are defined in descriptor.proto. This allows you to load other +// proto types. The caller owns a ref on the returned symtab. +upb_symtab *upb_get_descriptor_symtab(); + + +/* upb_def casts **************************************************************/ + +// Dynamic casts, for determining if a def is of a particular type at runtime. +#define UPB_DYNAMIC_CAST_DEF(lower, upper) \ + struct _upb_ ## lower; /* Forward-declare. */ \ + INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \ + if(def->type != UPB_DEF_ ## upper) return NULL; \ + return (struct _upb_ ## lower*)def; \ + } +UPB_DYNAMIC_CAST_DEF(msgdef, MSG); +UPB_DYNAMIC_CAST_DEF(enumdef, ENUM); +UPB_DYNAMIC_CAST_DEF(svcdef, SVC); +UPB_DYNAMIC_CAST_DEF(extdef, EXT); +UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED); +#undef UPB_DYNAMIC_CAST_DEF + +// Downcasts, for when some wants to assert that a def is of a particular type. +// These are only checked if we are building debug. +#define UPB_DOWNCAST_DEF(lower, upper) \ + struct _upb_ ## lower; /* Forward-declare. */ \ + INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \ + assert(def->type == UPB_DEF_ ## upper); \ + return (struct _upb_ ## lower*)def; \ + } +UPB_DOWNCAST_DEF(msgdef, MSG); +UPB_DOWNCAST_DEF(enumdef, ENUM); +UPB_DOWNCAST_DEF(svcdef, SVC); +UPB_DOWNCAST_DEF(extdef, EXT); +UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); +#undef UPB_DOWNCAST_DEF + +#define UPB_UPCAST(ptr) (&(ptr)->base) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_DEF_H_ */ -- cgit v1.2.3 From 67b16cbe5c55d00d7e576cdf479392f3a0e927a5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 14:37:02 -0700 Subject: Basic test_def links and passes no-op test! --- Makefile | 8 +++++--- core/upb_def.c | 42 ++++++++++++++++++++--------------------- core/upb_def.h | 6 +----- core/upb_stream_vtbl.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++- stream/upb_decoder.c | 1 + 5 files changed, 78 insertions(+), 30 deletions(-) (limited to 'core/upb_def.h') diff --git a/Makefile b/Makefile index 2abe0c7..568dcad 100644 --- a/Makefile +++ b/Makefile @@ -102,14 +102,16 @@ VALGRIND=valgrind --leak-check=full --error-exitcode=1 #VALGRIND= test: tests @echo Running all tests under valgrind. + @set -e # Abort on error. # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table @for test in tests/*; do \ if [ -x ./$$test ] ; then \ - echo $(VALGRIND) ./$$test: \\c; \ - $(VALGRIND) ./$$test; \ + echo !!! $(VALGRIND) ./$$test; \ + $(VALGRIND) ./$$test || exit 1; \ fi \ - done; + done; \ + echo "All tests passed!" tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2: \ diff --git a/core/upb_def.c b/core/upb_def.c index 1f57c70..cc4fd80 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -764,7 +764,6 @@ static void upb_free_symtab(upb_strtable *t) void _upb_symtab_free(upb_symtab *s) { upb_free_symtab(&s->symtab); - upb_free_symtab(&s->psymtab); upb_rwlock_destroy(&s->lock); free(s); } @@ -932,30 +931,30 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) { - if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { - d->str = upb_string_tryrecycle(d->str); - upb_string_substr(d->str, d->input, d->offset, d->delimited_len); - } else { - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: - *val.uint64 = upb_baredecoder_readv64(d); - break; - case UPB_WIRE_TYPE_32BIT_VARINT: - *val.uint32 = upb_baredecoder_readv32(d); - break; - case UPB_WIRE_TYPE_64BIT: - *val.uint64 = upb_baredecoder_readf64(d); - break; - case UPB_WIRE_TYPE_32BIT: - *val.uint32 = upb_baredecoder_readf32(d); - break; - default: - assert(false); - } + switch(d->wire_type) { + case UPB_WIRE_TYPE_VARINT: + *val.uint64 = upb_baredecoder_readv64(d); + break; + case UPB_WIRE_TYPE_32BIT_VARINT: + *val.uint32 = upb_baredecoder_readv32(d); + break; + case UPB_WIRE_TYPE_64BIT: + *val.uint64 = upb_baredecoder_readf64(d); + break; + case UPB_WIRE_TYPE_32BIT: + *val.uint32 = upb_baredecoder_readf32(d); + break; + default: + assert(false); } return true; } +static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { + upb_string_substr(str, d->input, d->offset, d->delimited_len); + return true; +} + static bool upb_baredecoder_skipval(upb_baredecoder *d) { upb_value val; @@ -977,6 +976,7 @@ static bool upb_baredecoder_endmsg(upb_baredecoder *d) static upb_src_vtable upb_baredecoder_src_vtbl = { (upb_src_getdef_fptr)&upb_baredecoder_getdef, (upb_src_getval_fptr)&upb_baredecoder_getval, + (upb_src_getstr_fptr)&upb_baredecoder_getstr, (upb_src_skipval_fptr)&upb_baredecoder_skipval, (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, diff --git a/core/upb_def.h b/core/upb_def.h index c297e83..5c8c11e 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -207,11 +207,7 @@ bool upb_enum_done(upb_enum_iter *iter); typedef struct { upb_atomic_refcount_t refcount; upb_rwlock_t lock; // Protects all members except the refcount. - upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. - - // Our symbol tables; we own refs to the defs therein. - upb_strtable symtab; // The main symbol table. - upb_strtable psymtab; // Private symbols, for internal use. + upb_strtable symtab; // The symbol table. } upb_symtab; // Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 0ec45d2..52172d2 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -27,28 +27,35 @@ struct upb_bytesink; typedef struct upb_bytesink upb_bytesink; // Typedefs for function pointers to all of the virtual functions. -typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); + +// upb_src. +typedef struct _upb_fielddef *(*upb_src_getdef_fptr)(upb_src *src); typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); +typedef bool (*upb_src_getstr_fptr)(upb_src *src, upb_string *str); typedef bool (*upb_src_skipval_fptr)(upb_src *src); typedef bool (*upb_src_startmsg_fptr)(upb_src *src); typedef bool (*upb_src_endmsg_fptr)(upb_src *src); +// upb_sink. typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); +// upb_bytesrc. typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); typedef bool (*upb_bytesrc_append_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t len); +// upb_bytesink. typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); // Vtables for the above interfaces. typedef struct { upb_src_getdef_fptr getdef; upb_src_getval_fptr getval; + upb_src_getstr_fptr getstr; upb_src_skipval_fptr skipval; upb_src_startmsg_fptr startmsg; upb_src_endmsg_fptr endmsg; @@ -86,6 +93,48 @@ INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { #endif } +// Implementation of virtual function dispatch. +INLINE struct _upb_fielddef *upb_src_getdef(upb_src *src) { + return src->vtbl->getdef(src); +} +INLINE bool upb_src_getval(upb_src *src, upb_valueptr val) { + return src->vtbl->getval(src, val); +} +INLINE bool upb_src_getstr(upb_src *src, upb_string *str) { + return src->vtbl->getstr(src, str); +} +INLINE bool upb_src_skipval(upb_src *src) { return src->vtbl->skipval(src); } +INLINE bool upb_src_startmsg(upb_src *src) { return src->vtbl->startmsg(src); } +INLINE bool upb_src_endmsg(upb_src *src) { return src->vtbl->endmsg(src); } + +// Implementation of type-specific upb_src accessors. If we encounter a upb_src +// where these can be implemented directly in a measurably more efficient way, +// we can make these part of the vtable also. +// +// For <64-bit types we have to use a temporary to accommodate baredecoder, +// which does not know the actual width of the type. +INLINE bool upb_src_getbool(upb_src *src, bool *_bool) { + upb_value val; + bool ret = upb_src_getval(src, upb_value_addrof(&val)); + *_bool = val._bool; + return ret; +} + +INLINE bool upb_src_getint32(upb_src *src, int32_t *i32) { + upb_value val; + bool ret = upb_src_getval(src, upb_value_addrof(&val)); + *i32 = val.int32; + return ret; +} + +// TODO. +bool upb_src_getint32(upb_src *src, int32_t *val); +bool upb_src_getint64(upb_src *src, int64_t *val); +bool upb_src_getuint32(upb_src *src, uint32_t *val); +bool upb_src_getuint64(upb_src *src, uint64_t *val); +bool upb_src_getfloat(upb_src *src, float *val); +bool upb_src_getdouble(upb_src *src, double *val); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index e3fdc49..52fc72b 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -536,6 +536,7 @@ static bool upb_decoder_skipgroup(upb_decoder *d) upb_src_vtable upb_decoder_src_vtbl = { (upb_src_getdef_fptr)&upb_decoder_getdef, (upb_src_getval_fptr)&upb_decoder_getval, + (upb_src_getstr_fptr)&upb_decoder_getstr, (upb_src_skipval_fptr)&upb_decoder_skipval, (upb_src_startmsg_fptr)&upb_decoder_startmsg, (upb_src_endmsg_fptr)&upb_decoder_endmsg, -- cgit v1.2.3 From db6c7387bc1df49deac41155a173e33017a75ed8 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 18:11:24 -0700 Subject: Incremental progress towards getting upb_def to bootstrap. --- Makefile | 3 +- core/upb.c | 9 ++--- core/upb.h | 7 +++- core/upb_def.c | 102 ++++++++++++++++++++++--------------------------- core/upb_def.h | 62 +++++++++++++++++++++--------- core/upb_stream_vtbl.h | 1 + core/upb_table.c | 2 +- tests/test_string.c | 3 ++ 8 files changed, 105 insertions(+), 84 deletions(-) (limited to 'core/upb_def.h') diff --git a/Makefile b/Makefile index 568dcad..2b2a269 100644 --- a/Makefile +++ b/Makefile @@ -48,12 +48,13 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ descriptor/descriptor.c +$(SRC): perf-cppflags # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC diff --git a/core/upb.c b/core/upb.c index a98512d..9ed5617 100644 --- a/core/upb.c +++ b/core/upb.c @@ -44,12 +44,11 @@ void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...) { if(upb_ok(status)) { // The first error is the most interesting. - status->str = upb_string_new(); - char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN); status->code = code; + status->str = upb_string_tryrecycle(status->str); va_list args; va_start(args, msg); - vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args); + upb_string_vprintf(status->str, msg, args); va_end(args); } } @@ -57,10 +56,10 @@ void upb_seterr(upb_status *status, enum upb_status_code code, void upb_copyerr(upb_status *to, upb_status *from) { to->code = from->code; - to->str = upb_string_getref(from->str); + if(from->str) to->str = upb_string_getref(from->str); } -void upb_reset(upb_status *status) { +void upb_status_reset(upb_status *status) { status->code = UPB_STATUS_OK; upb_string_unref(status->str); status->str = NULL; diff --git a/core/upb.h b/core/upb.h index 230e638..630d9e1 100644 --- a/core/upb.h +++ b/core/upb.h @@ -195,7 +195,12 @@ INLINE bool upb_ok(upb_status *status) { return status->code == UPB_STATUS_OK; } -void upb_reset(upb_status *status); +INLINE void upb_status_init(upb_status *status) { + status->code = UPB_STATUS_OK; + status->str = NULL; +} + +void upb_status_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); void upb_copyerr(upb_status *to, upb_status *from); diff --git a/core/upb_def.c b/core/upb_def.c index cc4fd80..0f48559 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -155,8 +155,9 @@ static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, } else { open_defs[num_open_defs++] = m; } - for(int i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter iter = upb_msg_begin(m); + for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) { + upb_fielddef *f = upb_msg_iter_field(iter); upb_def *def = f->def; if(upb_issubmsg(f) && def->is_cyclic) { upb_msgdef *sub_m = upb_downcast_msgdef(def); @@ -230,16 +231,6 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ -typedef struct { - upb_strtable_entry e; - uint32_t value; -} ntoi_ent; - -typedef struct { - upb_inttable_entry e; - upb_string *string; -} iton_ent; - static void upb_enumdef_free(upb_enumdef *e) { upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); @@ -271,8 +262,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; } - ntoi_ent ntoi_ent = {{name, 0}, number}; - iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{name, 0}, number}; + upb_iton_ent iton_ent = {{number, 0}, name}; upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the @@ -291,11 +282,14 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + e->base.fqname = upb_string_tryrecycle(e->base.fqname); + CHECKSRC(upb_src_getstr(src, e->base.fqname)); case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: CHECK(upb_addenum_val(src, e, status)); break; @@ -304,37 +298,25 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) break; } } + assert(e->base.fqname); upb_deflist_push(defs, UPB_UPCAST(e)); return true; +src_err: + upb_copyerr(status, upb_src_status(src)); err: upb_enumdef_free(e); return false; } -static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) { - iter->state = ent; - iter->name = ent->e.key; - iter->val = ent->value; -} - -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) { +upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. - ntoi_ent *ent = upb_strtable_begin(&e->ntoi); - iter->e = e; - fill_iter(iter, ent); + return upb_inttable_begin(&e->iton); } -void upb_enum_next(upb_enum_iter *iter) { - ntoi_ent *ent = iter->state; - assert(ent); - ent = upb_strtable_next(&iter->e->ntoi, &ent->e); - iter->state = ent; - if(ent) fill_iter(iter, ent); -} - -bool upb_enum_done(upb_enum_iter *iter) { - return iter->state == NULL; +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { + assert(iter); + return upb_inttable_next(&e->iton, &iter->e); } @@ -346,7 +328,7 @@ static void upb_fielddef_free(upb_fielddef *f) { static void upb_fielddef_uninit(upb_fielddef *f) { upb_string_unref(f->name); - if(upb_hasdef(f) && f->owned) { + if(f->owned) { upb_def_unref(f->def); } } @@ -354,6 +336,8 @@ static void upb_fielddef_uninit(upb_fielddef *f) { static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) { upb_fielddef *f = malloc(sizeof(*f)); + f->number = -1; + f->name = NULL; f->def = NULL; f->owned = false; upb_fielddef *parsed_f; @@ -388,6 +372,7 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) } CHECKSRC(upb_src_eof(src)); // TODO: verify that all required fields were present. + assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. @@ -461,9 +446,9 @@ err: static void upb_msgdef_free(upb_msgdef *m) { - for (upb_field_count_t i = 0; i < m->num_fields; i++) - upb_fielddef_uninit(&m->fields[i]); - free(m->fields); + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_fielddef_uninit(upb_msg_iter_field(i)); upb_strtable_free(&m->ntof); upb_inttable_free(&m->itof); upb_def_uninit(&m->base); @@ -479,6 +464,13 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { upb_def_ref(def); } +upb_msg_iter upb_msg_begin(upb_msgdef *m) { + return upb_inttable_begin(&m->itof); +} + +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { + return upb_inttable_next(&m->itof, &iter->e); +} /* symtab internal ***********************************************************/ @@ -601,8 +593,9 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) } else { UPB_UPCAST(m)->search_depth = ++depth; bool cycle_found = false; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_issubmsg(f)) continue; upb_def *sub_def = f->def; upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); @@ -632,8 +625,9 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // Type names are resolved relative to the message in which they appear. upb_string *base = e->e.key; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_hasdef(f)) continue; // No resolving necessary. upb_string *name = upb_downcast_unresolveddef(f->def)->name; @@ -873,7 +867,6 @@ typedef struct { upb_wire_type_t wire_type; upb_strlen_t delimited_len; upb_strlen_t stack[UPB_MAX_NESTING], *top; - upb_string *str; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -929,6 +922,12 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) return &d->field; } +static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { + upb_string_substr(str, d->input, d->offset, d->delimited_len); + d->offset += d->delimited_len; + return true; +} + static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) { switch(d->wire_type) { @@ -950,11 +949,6 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) return true; } -static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { - upb_string_substr(str, d->input, d->offset, d->delimited_len); - return true; -} - static bool upb_baredecoder_skipval(upb_baredecoder *d) { upb_value val; @@ -986,7 +980,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); - d->str = upb_string_new(); d->top = &d->stack[0]; upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; @@ -995,7 +988,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); - upb_string_unref(d->str); free(d); } @@ -1004,11 +996,8 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) return &d->src; } -upb_symtab *upb_get_descriptor_symtab() +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { - // TODO: implement sharing of symtabs, so that successive calls to this - // function will return the same symtab. - upb_symtab *symtab = upb_symtab_new(); // TODO: allow upb_strings to be static or on the stack. upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); upb_baredecoder *decoder = upb_baredecoder_new(descriptor); @@ -1017,5 +1006,4 @@ upb_symtab *upb_get_descriptor_symtab() assert(upb_ok(&status)); upb_baredecoder_free(decoder); upb_string_unref(descriptor); - return symtab; } diff --git a/core/upb_def.h b/core/upb_def.h index 5c8c11e..82d8520 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -135,11 +135,6 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; - size_t size; - upb_field_count_t num_fields; - uint32_t set_flags_bytes; - uint32_t num_required_fields; // Required fields have the lowest set bytemasks. - upb_fielddef *fields; // We have exclusive ownership of these. // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -170,6 +165,21 @@ INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +// Iteration over fields. The order is undefined. +// upb_msg_iter i; +// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// // ... +// } +typedef upb_itof_ent *upb_msg_iter; + +upb_msg_iter upb_msg_begin(upb_msgdef *m); +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter); +INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; } + +INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { + return iter->f; +} + /* upb_enumdef ****************************************************************/ typedef struct _upb_enumdef { @@ -178,6 +188,16 @@ typedef struct _upb_enumdef { upb_inttable iton; } upb_enumdef; +typedef struct { + upb_strtable_entry e; + uint32_t value; +} upb_ntoi_ent; + +typedef struct { + upb_inttable_entry e; + upb_string *string; +} upb_iton_ent; + typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. @@ -186,18 +206,22 @@ upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. // upb_enum_iter i; -// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { // // ... // } -typedef struct { - upb_enumdef *e; - void *state; // Internal iteration state. - upb_string *name; - upb_enumval_t val; -} upb_enum_iter; -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); -void upb_enum_next(upb_enum_iter *iter); -bool upb_enum_done(upb_enum_iter *iter); +typedef upb_iton_ent *upb_enum_iter; + +upb_enum_iter upb_enum_begin(upb_enumdef *e); +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter); +INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; } + +INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { + return iter->string; +} +INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { + return iter->e.key; +} + /* upb_symtab *****************************************************************/ @@ -252,10 +276,10 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // more useful? Maybe it should be an option. void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); -// Returns a symtab that defines google.protobuf.DescriptorProto and all other -// types that are defined in descriptor.proto. This allows you to load other -// proto types. The caller owns a ref on the returned symtab. -upb_symtab *upb_get_descriptor_symtab(); +// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab. +// This is necessary for bootstrapping, since these are the upb_defs that +// specify other defs and allow them to be loaded. +void upb_symtab_add_descriptorproto(upb_symtab *s); /* upb_def casts **************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 52172d2..ba2670e 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -88,6 +88,7 @@ struct upb_bytesrc { INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; + upb_status_init(&s->status); #ifndef DEBUG // TODO: initialize debug-mode checking. #endif diff --git a/core/upb_table.c b/core/upb_table.c index b91776c..b860204 100644 --- a/core/upb_table.c +++ b/core/upb_table.c @@ -179,7 +179,7 @@ static void strinsert(upb_strtable *t, upb_strtable_entry *e) memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ upb_strtable_entry *evictee_e = strent(t, evictee_bucket); while(1) { - assert(!upb_string_isnull(evictee_e->key)); + assert(evictee_e->key); assert(evictee_e->next != UPB_END_OF_CHAIN); if(evictee_e->next == bucket) { evictee_e->next = empty_bucket; diff --git a/tests/test_string.c b/tests/test_string.c index 5e6e2a9..5869b70 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -66,4 +66,7 @@ int main() { upb_string_unref(str); upb_string_unref(str2); + + // Unref of NULL is harmless. + upb_string_unref(NULL); } -- cgit v1.2.3 From 4b6c8b6b2317436ab77b38e17b49a7c7b03bf3f4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 19:00:40 -0700 Subject: Fixed bugs in textoutput. Text output from descriptor.proto is now identical to protoc! --- core/upb_def.c | 6 +++++ core/upb_def.h | 1 + stream/upb_textprinter.c | 62 +++++++++++++++++++++++++++--------------------- 3 files changed, 42 insertions(+), 27 deletions(-) (limited to 'core/upb_def.h') diff --git a/core/upb_def.c b/core/upb_def.c index c0d72db..fd00895 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -331,6 +331,12 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { return upb_inttable_next(&e->iton, &iter->e); } +upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) { + upb_iton_ent *e = + (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); + return e ? e->string : NULL; +} + /* upb_fielddef ***************************************************************/ diff --git a/core/upb_def.h b/core/upb_def.h index 82d8520..9cdc54d 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -202,6 +202,7 @@ typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); +// Caller does not own a ref on the returned string. upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 75668a3..2d2e237 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -29,38 +29,48 @@ static void upb_textprinter_endfield(upb_textprinter *p) } static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { - p->str = upb_string_tryrecycle(p->str); + upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); + upb_enumdef *enum_def; + upb_string *enum_label; + if(p->f->type == UPB_TYPE(ENUM) && + (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && + (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { + // This is an enum value for which we found a corresponding string. + upb_bytesink_put(p->bytesink, enum_label); + } else { + p->str = upb_string_tryrecycle(p->str); #define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; - switch(p->f->type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); + switch(p->f->type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): + CASE("%" PRIu32, uint32); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + } + upb_bytesink_put(p->bytesink, p->str); } - upb_bytesink_put(p->bytesink, p->str); upb_textprinter_endfield(p); return upb_ok(upb_bytesink_status(p->bytesink)); } static bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { - upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); // TODO: escaping. upb_bytesink_put(p->bytesink, str); upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); @@ -79,14 +89,12 @@ static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) { upb_textprinter_indent(p); upb_bytesink_put(p->bytesink, f->name); - upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); p->f = f; return upb_ok(upb_bytesink_status(p->bytesink)); } static bool upb_textprinter_startmsg(upb_textprinter *p) { - upb_bytesink_put(p->bytesink, p->f->def->fqname); upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); p->indent_depth++; -- cgit v1.2.3 From 5871ed0d02ff69b20b65f577dd3be18a2e92dec7 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 18 Jul 2010 22:45:15 -0700 Subject: First go at Lua bindings. --- Makefile | 10 +++ core/upb_def.c | 4 +- core/upb_def.h | 8 +- core/upb_string.h | 5 +- lang_ext/lua/upb.c | 254 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 274 insertions(+), 7 deletions(-) create mode 100644 lang_ext/lua/upb.c (limited to 'core/upb_def.h') diff --git a/Makefile b/Makefile index 10ef96d..749c5a7 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,13 @@ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread +ifeq ($(shell uname), Darwin) + CPPFLAGS += -I/usr/include/lua5.1 + LDFLAGS += -L/usr/local/lib -llua +else + CFLAGS += $(strip $(shell pkg-config --silence-errors --cflags lua || pkg-config --cflags lua5.1)) + LDFLAGS += $(strip $(shell pkg-config --silence-errors --libs lua || pkg-config --libs lua5.1)) +endif LIBUPB=core/libupb.a LIBUPB_PIC=core/libupb_pic.a @@ -59,6 +66,9 @@ core/upb_def.o: core/upb_def.c core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC +lang_ext/lua/upb.so: lang_ext/lua/upb.lo + $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a + STATICOBJ=$(patsubst %.c,%.o,$(SRC)) SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC)) diff --git a/core/upb_def.c b/core/upb_def.c index fd00895..0d97982 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -190,7 +190,7 @@ void _upb_def_cyclic_ref(upb_def *def) { upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); } -static void upb_def_init(upb_def *def, upb_def_type type) { +static void upb_def_init(upb_def *def, upb_deftype type) { def->type = type; def->is_cyclic = 0; // We detect this later, after resolving refs. def->search_depth = 0; @@ -779,7 +779,7 @@ void _upb_symtab_free(upb_symtab *s) free(s); } -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) { upb_rwlock_rdlock(&s->lock); int total = upb_strtable_count(&s->symtab); diff --git a/core/upb_def.h b/core/upb_def.h index 9cdc54d..ae9e0fa 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -48,15 +48,15 @@ typedef enum { // For specifying that defs of any type are requsted from getdefs. UPB_DEF_ANY = -1 -} upb_def_type; +} upb_deftype; // This typedef is more space-efficient than declaring an enum var directly. -typedef int8_t upb_def_type_t; +typedef int8_t upb_deftype_t; typedef struct { upb_string *fqname; // Fully qualified. upb_atomic_refcount_t refcount; - upb_def_type_t type; + upb_deftype_t type; // The is_cyclic flag could go in upb_msgdef instead of here, because only // messages can be involved in cycles. However, putting them here is free @@ -265,7 +265,7 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); // caller owns the returned array (which is of length *count) as well as a ref // to each symbol inside. If type is UPB_DEF_ANY then defs of all types are // returned, otherwise only defs of the required type are returned. -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type); // "fds" is a upb_src that will yield data from the // google.protobuf.FileDescriptorSet message type. upb_symtab_addfds() adds diff --git a/core/upb_string.h b/core/upb_string.h index 65ba404..bd89f67 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -197,7 +197,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str, _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) #define UPB_STATIC_STRING_LEN(str, len) \ _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) -#define UPB_STACK_STRING(str) _UPB_STRING_INIT(str, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STACK_STRING(str) \ + _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STACK_STRING_LEN(str, len) \ + _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) /* upb_string library functions ***********************************************/ diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c new file mode 100644 index 0000000..ac7f188 --- /dev/null +++ b/lang_ext/lua/upb.c @@ -0,0 +1,254 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * A Lua extension for upb. + */ + +#include "lauxlib.h" +#include "upb_def.h" + +/* lupb_def *******************************************************************/ + +// All the def types share the same C layout, even though they are differen Lua +// types with different metatables. +typedef struct { + upb_def *def; +} lupb_def; + +static void lupb_pushnewdef(lua_State *L, upb_def *def) { + lupb_def *ldef = lua_newuserdata(L, sizeof(lupb_def)); + ldef->def = def; + const char *type_name; + switch(def->type) { + case UPB_DEF_MSG: + type_name = "upb.msgdef"; + break; + case UPB_DEF_ENUM: + type_name = "upb.enumdef"; + break; + default: + luaL_error(L, "unknown deftype %d", def->type); + } + luaL_getmetatable(L, type_name); + lua_setmetatable(L, -2); +} + +static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.msgdef"); +} + +static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.enumdef"); +} + +static int lupb_msgdef_gc(lua_State *L) { + lupb_def *ldef = lupb_msgdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + +static int lupb_enumdef_gc(lua_State *L) { + lupb_def *ldef = lupb_enumdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + +static const struct luaL_Reg lupb_msgdef_methods[] = { + {"__gc", lupb_msgdef_gc}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_enumdef_methods[] = { + {"__gc", lupb_enumdef_gc}, + {NULL, NULL} +}; + + +/* lupb_symtab ****************************************************************/ + +// lupb_symtab caches the Lua objects it vends (defs) via lookup or resolve. +// It does this (instead of creating a new Lua object every time) for two +// reasons: +// * it uses less memory, because we can reuse existing objects. +// * it gives the expected equality semantics, eg. symtab[sym] == symtab[sym]. +// +// The downside is a bit of complexity. We need a place to store these +// cached defs; the only good answer is in the metatable. This means we need +// a new metatable for every symtab instance (instead of one shared by all +// instances). Since this is different than the regular pattern, we can't +// use luaL_checkudata(), we have to implement it ourselves. +typedef struct { + upb_symtab *symtab; +} lupb_symtab; + +static int lupb_symtab_gc(lua_State *L); + +// Inherits a ref on the symtab. +static void lupb_pushnewsymtab(lua_State *L, upb_symtab *symtab) { + lupb_symtab *lsymtab = lua_newuserdata(L, sizeof(lupb_symtab)); + lsymtab->symtab = symtab; + // Create its metatable (see note above about mt-per-object). + lua_createtable(L, 0, 1); + luaL_getmetatable(L, "upb.symtab"); + lua_setfield(L, -2, "__index"); // Uses the type metatable to find methods. + lua_pushcfunction(L, lupb_symtab_gc); + lua_setfield(L, -2, "__gc"); + + // Put this metatable in the registry so we can find it for type validation. + lua_pushlightuserdata(L, lsymtab); + lua_pushvalue(L, -2); + lua_rawset(L, LUA_REGISTRYINDEX); + + // Set the symtab's metatable. + lua_setmetatable(L, -2); +} + +// Checks that narg is a proper lupb_symtab object. If it is, leaves its +// metatable on the stack for cache lookups/updates. +lupb_symtab *lupb_symtab_check(lua_State *L, int narg) { + lupb_symtab *symtab = lua_touserdata(L, narg); + if (symtab != NULL) { + if (lua_getmetatable(L, narg)) { + // We use a metatable-per-object to support memoization of defs. + lua_pushlightuserdata(L, symtab); + lua_rawget(L, LUA_REGISTRYINDEX); + if (lua_rawequal(L, -1, -2)) { // Does it have the correct mt? + lua_pop(L, 1); // Remove one copy of the mt, keep the other. + return symtab; + } + } + } + luaL_typerror(L, narg, "upb.symtab"); + return NULL; // Placate the compiler; luaL_typerror will longjmp out of here. +} + +static int lupb_symtab_gc(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + upb_symtab_unref(s->symtab); + + // Remove its metatable from the registry. + lua_pushlightuserdata(L, s); + lua_pushnil(L); + lua_rawset(L, LUA_REGISTRYINDEX); + return 0; +} + +// "mt" is the index of the metatable, -1 is the fqname of this def. +// Leaves the Lua object for the def at the top of the stack. +// Inherits a ref on "def". +static void lupb_symtab_getorcreate(lua_State *L, upb_def *def, int mt) { + // We may have this def cached, in which case we should return the same Lua + // object (as long as the value in the underlying symtab has not changed. + lua_rawget(L, mt); + if (!lua_isnil(L, -1)) { + // Def is cached, make sure it hasn't changed. + lupb_def *ldef = lua_touserdata(L, -1); + if (!ldef) luaL_error(L, "upb's internal cache is corrupt."); + if (ldef->def == def) { + // Cache is good, we can just return the cached value. + upb_def_unref(def); + return; + } + } + // Cached entry didn't exist or wasn't good. + lua_pop(L, 1); // Remove bad cached value. + lupb_pushnewdef(L, def); + + // Set it in the cache. + lua_pushvalue(L, 2); // push name (arg to this function). + lua_pushvalue(L, -2); // push the new def. + lua_rawset(L, mt); // set in the cache (the mt). +} + +static int lupb_symtab_lookup(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + size_t len; + const char *name = luaL_checklstring(L, 2, &len); + upb_string namestr = UPB_STACK_STRING_LEN(name, len); + upb_def *def = upb_symtab_lookup(s->symtab, &namestr); + if (!def) { + // There shouldn't be a value in our cache either because the symtab + // currently provides no API for deleting syms from a table. In case + // this changes in the future, we explicitly delete from the cache here. + lua_pushvalue(L, 2); // push name (arg to this function). + lua_pushnil(L); + lua_rawset(L, -3); // lupb_symtab_check() left our mt on the stack. + + // Return nil because the symbol was not found. + lua_pushnil(L); + return 1; + } else { + lua_pushvalue(L, 2); + lupb_symtab_getorcreate(L, def, 3); + return 1; + } +} + +static int lupb_symtab_getdefs(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + upb_deftype_t type = luaL_checkint(L, 2); + int count; + upb_def **defs = upb_symtab_getdefs(s->symtab, &count, type); + + // Create the table in which we will return the defs. + lua_createtable(L, 0, count); + int ret = lua_gettop(L); + + for (int i = 0; i < count; i++) { + upb_def *def = defs[i]; + // Look it up in the cache by name. + upb_string *name = def->fqname; + lua_pushlstring(L, upb_string_getrobuf(name), upb_string_len(name)); + lua_pushvalue(L, -1); // Push it again since the getorcreate consumes one. + lupb_symtab_getorcreate(L, def, 3); + + // Add it to our return table. + lua_settable(L, ret); + } + return 1; +} + +static int lupb_symtab_add_descriptorproto(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + upb_symtab_add_descriptorproto(s->symtab); + return 0; // No args to return. +} + +static const struct luaL_Reg lupb_symtab_methods[] = { + {"add_descriptorproto", lupb_symtab_add_descriptorproto}, + //{"addfds", lupb_symtab_addfds}, + {"getdefs", lupb_symtab_getdefs}, + {"lookup", lupb_symtab_lookup}, + //{"resolve", lupb_symtab_resolve}, + {NULL, NULL} +}; + + +/* lupb toplevel **************************************************************/ + +static int lupb_symtab_new(lua_State *L) { + upb_symtab *s = upb_symtab_new(); + lupb_pushnewsymtab(L, s); + return 1; +} + +static const struct luaL_Reg lupb_toplevel_methods[] = { + {"symtab", lupb_symtab_new}, + {NULL, NULL} +}; + +int luaopen_upb(lua_State *L) { + luaL_newmetatable(L, "upb.msgdef"); + luaL_register(L, NULL, lupb_msgdef_methods); + + luaL_newmetatable(L, "upb.enumdef"); + luaL_register(L, NULL, lupb_enumdef_methods); + + luaL_newmetatable(L, "upb.symtab"); + luaL_register(L, NULL, lupb_symtab_methods); + + luaL_register(L, "upb", lupb_toplevel_methods); + return 1; // Return package table. +} -- cgit v1.2.3 From 21ee24a7300dbdabef707457d2407b4f9187603b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 21 Jul 2010 18:59:01 -0700 Subject: Updated Lua extension to handle fielddefs. --- core/upb_def.c | 1 + core/upb_def.h | 22 ++++++++++------- core/upb_table.c | 2 +- lang_ext/lua/upb.c | 71 +++++++++++++++++++++++++++++++++++++++++++----------- tests/test_def.c | 2 ++ 5 files changed, 74 insertions(+), 24 deletions(-) (limited to 'core/upb_def.h') diff --git a/core/upb_def.c b/core/upb_def.c index 1feaf9d..e40e1f0 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -355,6 +355,7 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) f->name = NULL; f->def = NULL; f->owned = false; + f->msgdef = m; upb_fielddef *parsed_f; int32_t tmp; while((parsed_f = upb_src_getdef(src))) { diff --git a/core/upb_def.h b/core/upb_def.h index ae9e0fa..5c19a7a 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -87,23 +87,27 @@ INLINE void upb_def_unref(upb_def *def) { // is either a field of a upb_msgdef or contained inside a upb_extensiondef. // It is also reference-counted. typedef struct _upb_fielddef { - upb_atomic_refcount_t refcount; - upb_string *name; - upb_field_number_t number; - upb_field_type_t type; - upb_label_t label; upb_value default_value; + upb_string *name; + + struct _upb_msgdef *msgdef; + // For the case of an enum or a submessage, points to the def for that type. upb_def *def; - // True if we own a ref on "def" (above). This is true unless this edge is - // part of a cycle. - bool owned; + upb_atomic_refcount_t refcount; + uint32_t byte_offset; // Where in a upb_msg to find the data. // These are set only when this fielddef is part of a msgdef. - uint32_t byte_offset; // Where in a upb_msg to find the data. upb_field_count_t field_index; // Indicates set bit. + + upb_field_number_t number; + upb_field_type_t type; + upb_label_t label; + // True if we own a ref on "def" (above). This is true unless this edge is + // part of a cycle. + bool owned; } upb_fielddef; // A variety of tests about the type of a field. diff --git a/core/upb_table.c b/core/upb_table.c index b860204..a6e0a56 100644 --- a/core/upb_table.c +++ b/core/upb_table.c @@ -28,7 +28,7 @@ void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) { t->count = 0; t->entry_size = entry_size; - t->size_lg2 = 1; + t->size_lg2 = 0; while(size >>= 1) t->size_lg2++; size_t bytes = upb_table_size(t) * t->entry_size; t->mask = upb_table_size(t) - 1; diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index bfc1355..a16a187 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -15,10 +15,14 @@ // We cache all the lua objects (userdata) we vend in a weak table, indexed by // the C pointer of the object they are caching. -typedef void (*lupb_unref)(void *cobj); +typedef void (*lupb_cb)(void *cobj); + +static void lupb_nop(void *foo) { + (void)foo; +} static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, - lupb_unref unref) { + lupb_cb ref, lupb_cb unref) { // Lookup our cache in the registry (we don't put our objects in the registry // directly because we need our cache to be a weak table). lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache"); @@ -40,6 +44,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, lua_pushlightuserdata(L, cobj); lua_pushvalue(L, -2); lua_rawset(L, -4); + ref(cobj); } else { unref(cobj); } @@ -73,29 +78,21 @@ static void lupb_def_getorcreate(lua_State *L, upb_def *def) { luaL_error(L, "unknown deftype %d", def->type); type_name = NULL; // Placate the compiler. } - return lupb_cache_getorcreate(L, def, type_name, lupb_def_unref); + return lupb_cache_getorcreate(L, def, type_name, lupb_nop, lupb_def_unref); } +// msgdef + static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { return luaL_checkudata(L, narg, "upb.msgdef"); } -static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { - return luaL_checkudata(L, narg, "upb.enumdef"); -} - static int lupb_msgdef_gc(lua_State *L) { lupb_def *ldef = lupb_msgdef_check(L, 1); upb_def_unref(ldef->def); return 0; } -static int lupb_enumdef_gc(lua_State *L) { - lupb_def *ldef = lupb_enumdef_check(L, 1); - upb_def_unref(ldef->def); - return 0; -} - static const struct luaL_Reg lupb_msgdef_mm[] = { {"__gc", lupb_msgdef_gc}, {NULL, NULL} @@ -105,6 +102,18 @@ static const struct luaL_Reg lupb_msgdef_m[] = { {NULL, NULL} }; +// enumdef + +static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.enumdef"); +} + +static int lupb_enumdef_gc(lua_State *L) { + lupb_def *ldef = lupb_enumdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + static const struct luaL_Reg lupb_enumdef_mm[] = { {"__gc", lupb_enumdef_gc}, {NULL, NULL} @@ -115,6 +124,40 @@ static const struct luaL_Reg lupb_enumdef_m[] = { }; +/* lupb_fielddef **************************************************************/ + +typedef struct { + upb_fielddef *field; +} lupb_fielddef; + +static void lupb_fielddef_ref(void *cobj) { + upb_def_ref(UPB_UPCAST(((upb_fielddef*)cobj)->msgdef)); +} + +static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) { + lupb_cache_getorcreate(L, f, "upb.fielddef", lupb_fielddef_ref, lupb_nop); +} + +static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.fielddef"); +} + +static int lupb_fielddef_gc(lua_State *L) { + lupb_fielddef *lfielddef = lupb_fielddef_check(L, 1); + upb_def_unref(UPB_UPCAST(lfielddef->field->msgdef)); + return 0; +} + +static const struct luaL_Reg lupb_fielddef_mm[] = { + {"__gc", lupb_fielddef_gc}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_fielddef_m[] = { + {NULL, NULL} +}; + + /* lupb_symtab ****************************************************************/ typedef struct { @@ -193,7 +236,7 @@ static const struct luaL_Reg lupb_symtab_mm[] = { static int lupb_symtab_new(lua_State *L) { upb_symtab *s = upb_symtab_new(); - lupb_cache_getorcreate(L, s, "upb.symtab", lupb_symtab_unref); + lupb_cache_getorcreate(L, s, "upb.symtab", lupb_nop, lupb_symtab_unref); return 1; } diff --git a/tests/test_def.c b/tests/test_def.c index e6f95d7..732835d 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -14,6 +14,8 @@ int main() { } free(defs); + printf("Size: %zd\n", sizeof(upb_ntof_ent)); + upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); upb_def *fds = upb_symtab_lookup(s, str); assert(fds != NULL); -- cgit v1.2.3 From 672f4617e2ab7923806c6d6a44d16e128e16b3a4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 21 Jul 2010 22:36:31 -0700 Subject: Lua support for fielddefs and getting their properties. --- core/upb_def.h | 4 +-- lang_ext/lua/upb.c | 79 +++++++++++++++++++++++++++++++++++++++++++++------- stream/upb_decoder.c | 2 +- 3 files changed, 72 insertions(+), 13 deletions(-) (limited to 'core/upb_def.h') diff --git a/core/upb_def.h b/core/upb_def.h index 5c19a7a..3294a8d 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -158,13 +158,13 @@ typedef struct { // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. -INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) { +INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) { upb_itof_ent *e = (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e)); return e ? e->f : NULL; } -INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { +INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); return e ? e->f : NULL; } diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index a8165c7..5ab07ba 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -10,6 +10,10 @@ #include "lauxlib.h" #include "upb_def.h" +void lupb_pushstring(lua_State *L, upb_string *str) { + lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str)); +} + /* object cache ***************************************************************/ // We cache all the lua objects (userdata) we vend in a weak table, indexed by @@ -38,6 +42,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, void **obj = lua_newuserdata(L, sizeof(void*)); *obj = cobj; luaL_getmetatable(L, type); + assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. lua_setmetatable(L, -2); // Set it in the cache. @@ -83,22 +88,53 @@ static void lupb_def_getorcreate(lua_State *L, upb_def *def) { // msgdef -static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { - return luaL_checkudata(L, narg, "upb.msgdef"); +static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) { + lupb_def *ldef = luaL_checkudata(L, narg, "upb.msgdef"); + return upb_downcast_msgdef(ldef->def); } static int lupb_msgdef_gc(lua_State *L) { - lupb_def *ldef = lupb_msgdef_check(L, 1); + lupb_def *ldef = luaL_checkudata(L, 1, "upb.msgdef"); upb_def_unref(ldef->def); return 0; } +static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f); + +static int lupb_msgdef_fieldbyname(lua_State *L) { + upb_msgdef *m = lupb_msgdef_check(L, 1); + size_t len; + const char *name = luaL_checklstring(L, 2, &len); + upb_string namestr = UPB_STACK_STRING_LEN(name, len); + upb_fielddef *f = upb_msgdef_ntof(m, &namestr); + if (f) { + lupb_fielddef_getorcreate(L, f); + } else { + lua_pushnil(L); + } + return 1; +} + +static int lupb_msgdef_fieldbynum(lua_State *L) { + upb_msgdef *m = lupb_msgdef_check(L, 1); + int num = luaL_checkint(L, 2); + upb_fielddef *f = upb_msgdef_itof(m, num); + if (f) { + lupb_fielddef_getorcreate(L, f); + } else { + lua_pushnil(L); + } + return 1; +} + static const struct luaL_Reg lupb_msgdef_mm[] = { {"__gc", lupb_msgdef_gc}, {NULL, NULL} }; static const struct luaL_Reg lupb_msgdef_m[] = { + {"fieldbyname", lupb_msgdef_fieldbyname}, + {"fieldbynum", lupb_msgdef_fieldbynum}, {NULL, NULL} }; @@ -142,6 +178,29 @@ static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { return luaL_checkudata(L, narg, "upb.fielddef"); } +static int lupb_fielddef_index(lua_State *L) { + lupb_fielddef *f = lupb_fielddef_check(L, 1); + const char *str = luaL_checkstring(L, 2); + if (strcmp(str, "name") == 0) { + lupb_pushstring(L, f->field->name); + } else if (strcmp(str, "number") == 0) { + lua_pushinteger(L, f->field->number); + } else if (strcmp(str, "type") == 0) { + lua_pushinteger(L, f->field->type); + } else if (strcmp(str, "label") == 0) { + lua_pushinteger(L, f->field->label); + } else if (strcmp(str, "def") == 0) { + upb_def_ref(f->field->def); + lupb_def_getorcreate(L, f->field->def); + } else if (strcmp(str, "msgdef") == 0) { + upb_def_ref(UPB_UPCAST(f->field->msgdef)); + lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef)); + } else { + lua_pushnil(L); + } + return 1; +} + static int lupb_fielddef_gc(lua_State *L) { lupb_fielddef *lfielddef = lupb_fielddef_check(L, 1); upb_def_unref(UPB_UPCAST(lfielddef->field->msgdef)); @@ -150,10 +209,7 @@ static int lupb_fielddef_gc(lua_State *L) { static const struct luaL_Reg lupb_fielddef_mm[] = { {"__gc", lupb_fielddef_gc}, - {NULL, NULL} -}; - -static const struct luaL_Reg lupb_fielddef_m[] = { + {"__index", lupb_fielddef_index}, {NULL, NULL} }; @@ -206,7 +262,7 @@ static int lupb_symtab_getdefs(lua_State *L) { for (int i = 0; i < count; i++) { upb_def *def = defs[i]; upb_string *name = def->fqname; - lua_pushlstring(L, upb_string_getrobuf(name), upb_string_len(name)); + lupb_pushstring(L, name); lupb_def_getorcreate(L, def); // Add it to our return table. lua_settable(L, -3); @@ -255,14 +311,17 @@ static void lupb_register_type(lua_State *L, const char *name, luaL_newmetatable(L, name); luaL_register(L, NULL, mm); lua_createtable(L, 0, 0); - luaL_register(L, NULL, m); - lua_setfield(L, -2, "__index"); + if (m) { + luaL_register(L, NULL, m); + lua_setfield(L, -2, "__index"); + } lua_pop(L, 1); // The mt. } int luaopen_upb(lua_State *L) { lupb_register_type(L, "upb.msgdef", lupb_msgdef_m, lupb_msgdef_mm); lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm); + lupb_register_type(L, "upb.fielddef", NULL, lupb_fielddef_mm); lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm); // Create our object cache. TODO: need to make this table weak! diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 949ce2d..74ef5c5 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -342,7 +342,7 @@ again: } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number); + upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, field_number); if (!f) { // Unknown field. If/when the upb_src interface supports reporting -- cgit v1.2.3 From a9e998159c5ac8c4f2644b5ed0eda2e8ff1f8706 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 2 Aug 2010 10:25:24 -0700 Subject: Fleshed out upb_msg: test_vs_proto2 compiles but fails. --- Makefile | 10 ++-- core/upb.h | 98 ++++++++++++++++++++++++++++++++++---- core/upb_atomic.h | 4 ++ core/upb_def.c | 65 ++++++++++++++++++++++++- core/upb_def.h | 28 +++++++++-- core/upb_msg.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ core/upb_msg.h | 114 ++++++++++++++++++++++++++++++++++++++++---- stream/upb_decoder.c | 8 ++-- stream/upb_strstream.h | 2 +- tests/test_vs_proto2.cc | 54 ++++++++++++--------- 10 files changed, 452 insertions(+), 54 deletions(-) create mode 100644 core/upb_msg.c (limited to 'core/upb_def.h') diff --git a/Makefile b/Makefile index 203bed6..131b3c0 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ + core/upb_msg.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -101,14 +102,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ tests/test_def \ - tests/test_decoder -tests: $(TESTS) - -OTHER_TESTS=tests/tests \ - tests/test_table \ + tests/test_decoder \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ $(TESTS): core/libupb.a VALGRIND=valgrind --leak-check=full --error-exitcode=1 diff --git a/core/upb.h b/core/upb.h index b605fd9..7ee0469 100644 --- a/core/upb.h +++ b/core/upb.h @@ -80,24 +80,16 @@ enum upb_wire_type { typedef uint8_t upb_wire_type_t; -// Value type as defined in a .proto file. eg. string, int32, etc. The +// Type of a field as defined in a .proto file. eg. string, int32, etc. The // integers that represent this are defined by descriptor.proto. Note that // descriptor.proto reserves "0" for errors, and we use it to represent // exceptional circumstances. -typedef uint8_t upb_field_type_t; +typedef uint8_t upb_fieldtype_t; // For referencing the type constants tersely. #define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type #define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type -INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} - -INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} - // Info for a given field type. typedef struct { uint8_t align; @@ -129,6 +121,10 @@ typedef union { struct _upb_string; typedef struct _upb_string upb_string; +struct _upb_array; +typedef struct _upb_array upb_array; +struct _upb_msg; +typedef struct _upb_msg upb_msg; typedef uint32_t upb_strlen_t; @@ -142,6 +138,11 @@ typedef union { uint32_t uint32; uint64_t uint64; bool _bool; + upb_string *str; + upb_msg *msg; + upb_array *arr; + upb_atomic_refcount_t *refcount; + void *_void; } upb_value; // A pointer to a .proto value. The owner must have an out-of-band way of @@ -155,13 +156,90 @@ typedef union { uint32_t *uint32; uint64_t *uint64; bool *_bool; + upb_string **str; + upb_msg **msg; + upb_array **arr; + void *_void; } upb_valueptr; +// The type of a upb_value. This is like a upb_fieldtype_t, but adds the +// constant UPB_VALUETYPE_ARRAY to represent an array. +typedef uint8_t upb_valuetype_t; +#define UPB_VALUETYPE_ARRAY 32 + INLINE upb_valueptr upb_value_addrof(upb_value *val) { upb_valueptr ptr = {&val->_double}; return ptr; } +// Converts upb_value_ptr -> upb_value by reading from the pointer. We need to +// know the value type to perform this operation, because we need to know how +// much memory to copy. +INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { + upb_value val; + +#define CASE(t, member_name) \ + case UPB_TYPE(t): val.member_name = *ptr.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + return val; + +#undef CASE +} + +// Writes a upb_value to a upb_value_ptr location. We need to know the value +// type to perform this operation, because we need to know how much memory to +// copy. +INLINE void upb_value_write(upb_valueptr ptr, upb_value val, + upb_fieldtype_t ft) { +#define CASE(t, member_name) \ + case UPB_TYPE(t): *ptr.member_name = val.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + +#undef CASE +} + // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { diff --git a/core/upb_atomic.h b/core/upb_atomic.h index 01fc8a2..1cd848b 100644 --- a/core/upb_atomic.h +++ b/core/upb_atomic.h @@ -127,6 +127,10 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { Implement them or compile with UPB_THREAD_UNSAFE. #endif +INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) { + return upb_atomic_read(a) == 1; +} + /* Reader/Writer lock. ********************************************************/ #ifdef UPB_THREAD_UNSAFE diff --git a/core/upb_def.c b/core/upb_def.c index e117455..1c8fbdc 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -12,6 +12,16 @@ #define CHECKSRC(x) if(!(x)) goto src_err #define CHECK(x) if(!(x)) goto err +/* Rounds p up to the next multiple of t. */ +static size_t upb_align_up(size_t val, size_t align) { + return val % align == 0 ? val : val + align - (val % align); +} + +static int upb_div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} + // A little dynamic array for storing a growing list of upb_defs. typedef struct { upb_def **defs; @@ -409,6 +419,19 @@ src_err: /* upb_msgdef *****************************************************************/ +static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) { + // Sort by data size (ascending) to reduce padding. + size_t size1 = upb_types[f1->type].size; + size_t size2 = upb_types[f2->type].size; + if (size1 != size2) return size1 - size2; + // Otherwise return in number order (just so we get a reproduceable order. + return f1->number - f2->number; +} + +static int upb_compare_fields(const void *f1, const void *f2) { + return upb_compare_typed_fields(*(void**)f1, *(void**)f2); +} + // Processes a google.protobuf.DescriptorProto, adding defs to "defs." static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) { @@ -418,7 +441,6 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); int32_t start_count = defs->len; - upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { @@ -451,6 +473,45 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); goto err; } + + + // Create an ordering over the fields. + upb_field_count_t n = upb_msgdef_numfields(m); + upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); + upb_field_count_t field = 0; + upb_msg_iter i; + for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + sorted_fields[field++]= upb_msg_iter_field(i); + } + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); + + // Assign offsets in the msg. + m->set_flags_bytes = upb_div_round_up(n, 8); + m->size = sizeof(upb_atomic_refcount_t) + m->set_flags_bytes; + + size_t max_align = 0; + for (int i = 0; i < n; i++) { + upb_fielddef *f = sorted_fields[i]; + upb_type_info *type_info = &upb_types[f->type]; + + // This identifies the set bit. When we implement is_initialized (a + // general check about whether all required bits are set) we will probably + // want to use a different ordering that puts all the required bits + // together. + f->field_index = i; + + // General alignment rules are: each member must be at an address that is a + // multiple of that type's alignment. Also, the size of the structure as a + // whole must be a multiple of the greatest alignment of any member. + size_t offset = upb_align_up(m->size, type_info->align); + // Offsets are relative to the end of the refcount. + f->byte_offset = offset - sizeof(upb_atomic_refcount_t); + m->size = offset + type_info->size; + max_align = UPB_MAX(max_align, type_info->align); + } + + if (max_align > 0) m->size = upb_align_up(m->size, max_align); + upb_deflist_qualify(defs, m->base.fqname, start_count); upb_deflist_push(defs, UPB_UPCAST(m)); return true; @@ -664,7 +725,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, } // Check the type of the found def. - upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; if(found->def->type != expected) { upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); return false; diff --git a/core/upb_def.h b/core/upb_def.h index 3294a8d..9eb961a 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -103,7 +103,7 @@ typedef struct _upb_fielddef { upb_field_count_t field_index; // Indicates set bit. upb_field_number_t number; - upb_field_type_t type; + upb_fieldtype_t type; upb_label_t label; // True if we own a ref on "def" (above). This is true unless this edge is // part of a cycle. @@ -112,10 +112,10 @@ typedef struct _upb_fielddef { // A variety of tests about the type of a field. INLINE bool upb_issubmsg(upb_fielddef *f) { - return upb_issubmsgtype(f->type); + return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE); } INLINE bool upb_isstring(upb_fielddef *f) { - return upb_isstringtype(f->type); + return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES); } INLINE bool upb_isarray(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); @@ -125,6 +125,19 @@ INLINE bool upb_hasdef(upb_fielddef *f) { return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } +INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) { + if (upb_isarray(f)) { + return UPB_VALUETYPE_ARRAY; + } else { + return f->type; + } +} + +INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) { + assert(upb_isarray(f)); + return f->type; +} + INLINE bool upb_field_ismm(upb_fielddef *f) { return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); } @@ -139,6 +152,8 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; + uint32_t size; + uint32_t set_flags_bytes; // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -169,9 +184,14 @@ INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) { + return upb_strtable_count(&m->ntof); +} + // Iteration over fields. The order is undefined. // upb_msg_iter i; -// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { +// upb_fielddef *f = upb_msg_iter_field(i); // // ... // } typedef upb_itof_ent *upb_msg_iter; diff --git a/core/upb_msg.c b/core/upb_msg.c new file mode 100644 index 0000000..75f7a35 --- /dev/null +++ b/core/upb_msg.c @@ -0,0 +1,123 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * Data structure for storing a message of protobuf data. + */ + +#include "upb_msg.h" + +void _upb_elem_free(upb_value v, upb_fielddef *f) { + switch(f->type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + _upb_msg_free(v.msg, upb_downcast_msgdef(f->def)); + break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + _upb_string_free(v.str); + break; + default: + abort(); + } +} + +void _upb_field_free(upb_value v, upb_fielddef *f) { + if (upb_isarray(f)) { + _upb_array_free(v.arr, f); + } else { + _upb_elem_free(v, f); + } +} + +upb_msg *upb_msg_new(upb_msgdef *md) { + upb_msg *msg = malloc(md->size); + // Clear all set bits and cached pointers. + memset(msg, 0, md->size); + upb_atomic_refcount_init(&msg->refcount, 1); + return msg; +} + +void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { + // Need to release refs on all sub-objects. + upb_msg_iter i; + for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) _upb_field_unref(upb_value_read(p, type), f); + } + free(msg); +} + +upb_array *upb_array_new(void) { + upb_array *arr = malloc(sizeof(*arr)); + upb_atomic_refcount_init(&arr->refcount, 1); + arr->size = 0; + arr->len = 0; + arr->elements._void = NULL; + return arr; +} + +void _upb_array_free(upb_array *arr, upb_fielddef *f) { + if (upb_elem_ismm(f)) { + // Need to release refs on sub-objects. + upb_valuetype_t type = upb_elem_valuetype(f); + for (upb_arraylen_t i = 0; i < arr->size; i++) { + upb_valueptr p = _upb_array_getptr(arr, f, i); + _upb_elem_unref(upb_value_read(p, type), f); + } + } + if (arr->elements._void) free(arr->elements._void); + free(arr); +} + +upb_value upb_field_new(upb_fielddef *f, upb_valuetype_t type) { + upb_value v; + switch(type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + v.msg = upb_msg_new(upb_downcast_msgdef(f->def)); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + v.str = upb_string_new(); + case UPB_VALUETYPE_ARRAY: + v.arr = upb_array_new(); + default: + abort(); + } + return v; +} + +static void upb_field_recycle(upb_value val) { + (void)val; +} + +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, + upb_valuetype_t type) { + if (val._void == NULL || !upb_atomic_only(val.refcount)) { + if (val._void != NULL) upb_atomic_unref(val.refcount); + val = upb_field_new(f, type); + upb_value_write(p, val, type); + } else { + upb_field_recycle(val); + } + return val; +} + +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} + +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} diff --git a/core/upb_msg.h b/core/upb_msg.h index 5215bd9..2db67c0 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -9,14 +9,39 @@ #ifndef UPB_MSG_H #define UPB_MSG_H +#include "upb.h" +#include "upb_def.h" +#include + #ifdef __cplusplus extern "C" { #endif -typedef struct { +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value v, upb_fielddef *f, + upb_valuetype_t type); + +INLINE void _upb_value_ref(upb_value v) { upb_atomic_ref(v.refcount); } + +void _upb_field_free(upb_value v, upb_fielddef *f); +void _upb_elem_free(upb_value v, upb_fielddef *f); +INLINE void _upb_field_unref(upb_value v, upb_fielddef *f) { + assert(upb_field_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_field_free(v, f); +} +INLINE void _upb_elem_unref(upb_value v, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_elem_free(v, f); +} + +/* upb_array ******************************************************************/ + +typedef uint32_t upb_arraylen_t; +struct _upb_array { upb_atomic_refcount_t refcount; - uint32_t len; - uint32_t size; + upb_arraylen_t len; + upb_arraylen_t size; upb_valueptr elements; }; @@ -31,29 +56,70 @@ INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); } +INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, + uint32_t elem) { + upb_valueptr p; + p._void = &a->elements.uint8[elem * upb_types[f->type].size]; + return p; +} + INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) { assert(elem < upb_array_len(a)); return upb_value_read(_upb_array_getptr(a, f, elem), f->type); } // For string or submessages, will release a ref on the previously set value. +// and take a ref on the new value. The array must already be at least "elem" +// long; to append use append_mutable. INLINE void upb_array_set(upb_array *a, upb_fielddef *f, uint32_t elem, upb_value val) { + assert(elem < upb_array_len(a)); + upb_valueptr p = _upb_array_getptr(a, f, elem); + if (upb_elem_ismm(f)) { + _upb_elem_unref(upb_value_read(p, f->type), f); + _upb_value_ref(val); + } + upb_value_write(p, val, f->type); } -// Append an element with the default value, returning it. For strings or -// submessages, this will try to reuse previously allocated memory. -INLINE upb_value upb_array_append_mutable(upb_array *a, upb_fielddef *f) { +INLINE void upb_array_resize(upb_array *a, upb_fielddef *f) { + if (a->len == a->size) { + a->len *= 2; + a->elements._void = realloc(a->elements._void, + a->len * upb_types[f->type].size); + } } -typedef struct { +// Append an element to an array of string or submsg with the default value, +// returning it. This will try to reuse previously allocated memory. +INLINE upb_value upb_array_appendmutable(upb_array *a, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + upb_array_resize(a, f); + upb_valueptr p = _upb_array_getptr(a, f, a->len++); + upb_valuetype_t type = upb_elem_valuetype(f); + upb_value val = upb_value_read(p, type); + val = upb_field_tryrecycle(p, val, f, type); + return val; +} + + +/* upb_msg ********************************************************************/ + +struct _upb_msg { upb_atomic_refcount_t refcount; uint8_t data[4]; // We allocate the appropriate amount per message. -} upb_msg; +}; // Creates a new msg of the given type. upb_msg *upb_msg_new(upb_msgdef *md); +// Returns a pointer to the given field. +INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { + upb_valueptr p; + p._void = &msg->data[f->byte_offset]; + return p; +} + void _upb_msg_free(upb_msg *msg, upb_msgdef *md); INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { if (upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); @@ -65,6 +131,10 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; } +INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { + msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); +} + // Returns the current value of the given field if set, or the default value if // not set. INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { @@ -79,12 +149,29 @@ INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { // Otherwise sets it and returns an empty instance, attempting to reuse any // previously allocated memory. INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f) { + assert(upb_field_ismm(f)); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + upb_value val = upb_value_read(p, type); + if (!upb_msg_has(msg, f)) { + upb_msg_sethas(msg, f); + val = upb_field_tryrecycle(p, val, f, type); + } + return val; } // Sets the current value of the field. If this is a string, array, or // submessage field, releases a ref on the value (if any) that was previously // set. INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) { + _upb_field_unref(upb_value_read(p, type), f); + _upb_value_ref(val); + } + upb_msg_sethas(msg, f); + upb_value_write(p, val, upb_field_valuetype(f)); } // Unsets all field values back to their defaults. @@ -92,6 +179,17 @@ INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } +// A convenience function for decoding an entire protobuf all at once, without +// having to worry about setting up the appropriate objects. +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + +// A convenience function for encoding an entire protobuf all at once. If an +// error occurs, the null string is returned and the status object contains +// the error. +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 7591f78..c35212e 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -14,8 +14,10 @@ // Returns true if the give wire type and field type combination is valid, // taking into account both packed and non-packed encodings. -static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - return (1 << wt) & upb_types[ft].allowed_wire_types; +static bool upb_check_type(upb_wire_type_t wt, upb_fielddef *f) { + // TODO: need to take into account the label; only repeated fields are + // allowed to use packed encoding. + return (1 << wt) & upb_types[f->type].allowed_wire_types; } // Performs zig-zag decoding, which is used by sint32 and sint64. @@ -358,7 +360,7 @@ again: // unknown fields we will implement that here. upb_decoder_skipval(d); goto again; - } else if (!upb_check_type(wire_type, f->type)) { + } else if (!upb_check_type(wire_type, f)) { // This is a recoverable error condition. We skip the value but also // return NULL and report the error. upb_decoder_skipval(d); diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h index fa9bace..d01d21f 100644 --- a/stream/upb_strstream.h +++ b/stream/upb_strstream.h @@ -31,7 +31,7 @@ void upb_stringsrc_free(upb_stringsrc *s); void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); // Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); /* upb_stringsink *************************************************************/ diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 9083788..9446b8f 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -4,9 +4,10 @@ #include #include #include -#include "upb_data.h" +#include "upb_msg.h" #include "upb_def.h" #include "upb_decoder.h" +#include "upb_strstream.h" int num_assertions = 0; #define ASSERT(expr) do { \ @@ -25,7 +26,7 @@ void compare_arrays(const google::protobuf::Reflection *r, upb_msg *upb_msg, upb_fielddef *upb_f) { ASSERT(upb_msg_has(upb_msg, upb_f)); - upb_arrayptr arr = upb_msg_get(upb_msg, upb_f).arr; + upb_array *arr = upb_msg_get(upb_msg, upb_f).arr; ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f)); for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) { upb_value v = upb_array_get(arr, upb_f, i); @@ -63,7 +64,7 @@ void compare_arrays(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -116,7 +117,7 @@ void compare_values(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetString(proto2_msg, proto2_f); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -133,9 +134,10 @@ void compare(const google::protobuf::Message& proto2_msg, const google::protobuf::Reflection *r = proto2_msg.GetReflection(); const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor(); - ASSERT((upb_field_count_t)d->field_count() == upb_md->num_fields); - for(upb_field_count_t i = 0; i < upb_md->num_fields; i++) { - upb_fielddef *upb_f = &upb_md->fields[i]; + ASSERT((upb_field_count_t)d->field_count() == upb_msgdef_numfields(upb_md)); + upb_msg_iter i; + for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) { + upb_fielddef *upb_f = upb_msg_iter_field(i); const google::protobuf::FieldDescriptor *proto2_f = d->FindFieldByNumber(upb_f->number); // Make sure the definitions are equal. @@ -143,7 +145,7 @@ void compare(const google::protobuf::Message& proto2_msg, ASSERT(proto2_f); ASSERT(upb_f->number == proto2_f->number()); ASSERT(std::string(upb_string_getrobuf(upb_f->name), - upb_strlen(upb_f->name)) == + upb_string_len(upb_f->name)) == proto2_f->name()); ASSERT(upb_f->type == proto2_f->type()); ASSERT(upb_isarray(upb_f) == proto2_f->is_repeated()); @@ -166,10 +168,10 @@ void compare(const google::protobuf::Message& proto2_msg, void parse_and_compare(MESSAGE_CIDENT *proto2_msg, upb_msg *upb_msg, upb_msgdef *upb_md, - upb_strptr str) + upb_string *str) { // Parse to both proto2 and upb. - ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_strlen(str))); + ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str))); upb_status status = UPB_STATUS_INIT; upb_msg_decodestr(upb_msg, upb_md, str, &status); ASSERT(upb_ok(&status)); @@ -194,22 +196,32 @@ int main(int argc, char *argv[]) // Initialize upb state, parse descriptor. upb_status status = UPB_STATUS_INIT; - upb_symtab *c = upb_symtab_new(); - upb_strptr fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); - if(upb_string_isnull(fds)) { + upb_symtab *symtab = upb_symtab_new(); + upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds == NULL) { fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n"); return 1; } - upb_symtab_add_desc(c, fds, &status); + upb_symtab_add_descriptorproto(symtab); + upb_def *fds_msgdef = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds); + upb_decoder *decoder = upb_decoder_new(upb_downcast_msgdef(fds_msgdef)); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(ssrc)); + upb_symtab_addfds(symtab, upb_decoder_src(decoder), &status); if(!upb_ok(&status)) { - fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": %s.\n", - status.msg); + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": "); + upb_printerr(&status); return 1; } upb_string_unref(fds); + upb_decoder_free(decoder); + upb_stringsrc_free(ssrc); - upb_strptr proto_name = upb_strdupc(MESSAGE_NAME); - upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(c, proto_name)); + upb_string *proto_name = upb_strdupc(MESSAGE_NAME); + upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(symtab, proto_name)); if(!def) { fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", UPB_STRARG(proto_name)); @@ -218,8 +230,8 @@ int main(int argc, char *argv[]) upb_string_unref(proto_name); // Read the message data itself. - upb_strptr str = upb_strreadfile(MESSAGE_FILE); - if(upb_string_isnull(str)) { + upb_string *str = upb_strreadfile(MESSAGE_FILE); + if(str == NULL) { fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return 1; } @@ -234,7 +246,7 @@ int main(int argc, char *argv[]) upb_msg_unref(upb_msg, def); upb_def_unref(UPB_UPCAST(def)); upb_string_unref(str); - upb_symtab_unref(c); + upb_symtab_unref(symtab); return 0; } -- cgit v1.2.3 From c9df91b04a429f9324afeefece28f21e7078e3ac Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 22 Jan 2011 01:03:02 -0800 Subject: upb bootstraps again! and with no memory leaks! --- core/upb_def.c | 40 +++++++++++++++++----------------------- core/upb_def.h | 2 +- core/upb_stream_vtbl.h | 1 + core/upb_string.c | 1 + tests/test_def.c | 4 +--- tests/test_string.c | 11 +++++++++++ 6 files changed, 32 insertions(+), 27 deletions(-) (limited to 'core/upb_def.h') diff --git a/core/upb_def.c b/core/upb_def.c index a935930..c21843e 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -429,7 +429,7 @@ typedef struct _upb_unresolveddef { static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = str; + def->name = upb_string_getref(str); return def; } @@ -445,6 +445,7 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { static void upb_enumdef_free(upb_enumdef *e) { upb_enum_iter i; for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + // Frees the ref taken when the string was parsed. upb_string_unref(upb_enum_iter_name(i)); } upb_strtable_free(&e->ntoi); @@ -468,7 +469,7 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(b->name); - upb_string_getref(upb_value_getstr(val)); + b->name = upb_string_getref(upb_value_getstr(val)); b->saw_name = true; break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: @@ -495,6 +496,7 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. + b->name = NULL; return UPB_CONTINUE; } @@ -641,7 +643,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - if(b->f->def) upb_def_unref(b->f->def); + upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); b->f->owned = true; break; @@ -847,6 +849,7 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t, return e; } else { // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. upb_string *sym_str = upb_string_new(); int baselen = upb_string_len(base); while(1) { @@ -1212,21 +1215,14 @@ static void upb_baredecoder_run(upb_src *src, upb_status *status) { upb_baredecoder *d = (upb_baredecoder*)src; assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); upb_string *str = NULL; - upb_strlen_t stack[UPB_MAX_NESTING]; + upb_strlen_t stack[UPB_MAX_NESTING] = {UPB_STRLEN_MAX}; upb_strlen_t *top = &stack[0]; - *top = upb_string_len(d->input); d->offset = 0; #define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; CHECK(upb_dispatch_startmsg(&d->dispatcher)); while(d->offset < upb_string_len(d->input)) { - // Detect end-of-submessage. - while(d->offset >= *top) { - CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); - d->offset = *(top--); - } - uint32_t key = upb_baredecoder_readv64(d); upb_fielddef f; f.number = key >> 3; @@ -1266,22 +1262,22 @@ static void upb_baredecoder_run(upb_src *src, upb_status *status) { } CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); } + // Detect end-of-submessage. + while(d->offset >= *top) { + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); + d->offset = *(top--); + } } CHECK(upb_dispatch_endmsg(&d->dispatcher)); - printf("SUCCESS!!\n"); upb_string_unref(str); return; err: upb_copyerr(status, d->dispatcher.top->handlers.status); - upb_printerr(d->dispatcher.top->handlers.status); - upb_printerr(status); upb_string_unref(str); - printf("ERROR!!\n"); } -static upb_baredecoder *upb_baredecoder_new(upb_string *str) -{ +static upb_baredecoder *upb_baredecoder_new(upb_string *str) { static upb_src_vtbl vtbl = { &upb_baredecoder_sethandlers, &upb_baredecoder_run, @@ -1294,19 +1290,16 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) return d; } -static void upb_baredecoder_free(upb_baredecoder *d) -{ +static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); free(d); } -static upb_src *upb_baredecoder_src(upb_baredecoder *d) -{ +static upb_src *upb_baredecoder_src(upb_baredecoder *d) { return &d->src; } -void upb_symtab_add_descriptorproto(upb_symtab *symtab) -{ +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { // For the moment we silently decline to perform the operation if the symbols // already exist in the symtab. Revisit this when we have a better story // about whether syms in a table can be replaced. @@ -1329,4 +1322,5 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) upb_symtab_unref(symtab); abort(); } + upb_status_uninit(&status); } diff --git a/core/upb_def.h b/core/upb_def.h index 9eb961a..d9bab97 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -77,7 +77,7 @@ INLINE void upb_def_ref(upb_def *def) { if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); } INLINE void upb_def_unref(upb_def *def) { - if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); + if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); } /* upb_fielddef ***************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index e462122..fd71b2d 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -275,6 +275,7 @@ INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { ret = d->top->handlers.set->endmsg(d->top->handlers.closure); if (ret != UPB_CONTINUE) return ret; --d->top; + assert(d->top >= d->stack); } return d->top->handlers.set->endsubmsg(d->top->handlers.closure); } diff --git a/core/upb_string.c b/core/upb_string.c index e9ff0d9..c599728 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -67,6 +67,7 @@ void upb_string_recycle(upb_string **_str) { str->ptr = NULL; upb_string_release(str); } else { + upb_string_unref(str); *_str = upb_string_new(); } } diff --git a/tests/test_def.c b/tests/test_def.c index 5be0672..2d2658f 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -10,13 +10,10 @@ int main() { int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { - printf("Def with name: " UPB_STRFMT "\n", UPB_STRARG(defs[i]->fqname)); upb_def_unref(defs[i]); } free(defs); - printf("Size: %zd\n", sizeof(upb_ntof_ent)); - upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); upb_def *fds = upb_symtab_lookup(s, str); assert(fds != NULL); @@ -24,4 +21,5 @@ int main() { upb_def_unref(fds); upb_string_unref(str); upb_symtab_unref(s); + return 0; } diff --git a/tests/test_string.c b/tests/test_string.c index 6446806..ef0e2a9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -40,6 +40,17 @@ static void test_dynamic() { upb_string_recycle(&str); assert(str != NULL); + // Take a ref and recycle; should create a new string and release a ref + // on the old one. + upb_string *strcp = upb_string_getref(str); + assert(strcp == str); + assert(upb_atomic_read(&str->refcount) == 2); + upb_string_recycle(&str); + assert(strcp != str); + assert(upb_atomic_read(&str->refcount) == 1); + assert(upb_atomic_read(&strcp->refcount) == 1); + upb_string_unref(strcp); + upb_strcpyc(str, static_str); assert(upb_string_len(str) == (sizeof(static_str) - 1)); const char *robuf = upb_string_getrobuf(str); -- cgit v1.2.3 From fbb9fd35e05b88908beeca2c2b88b15aec1fca01 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 28 Jan 2011 10:11:48 -0800 Subject: Improve comments in headers, to better explain core interfaces. --- core/upb_def.h | 9 ++-- core/upb_stream.h | 123 ++++++++++++++++++++++++++++++++----------------- core/upb_stream_vtbl.h | 2 +- core/upb_string.h | 7 ++- 4 files changed, 91 insertions(+), 50 deletions(-) (limited to 'core/upb_def.h') diff --git a/core/upb_def.h b/core/upb_def.h index d9bab97..e95aec3 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -1,17 +1,18 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2009-2011 Joshua Haberman. See LICENSE for details. * - * Provides definitions of .proto constructs: + * Provides a mechanism for loading proto definitions from descriptors, and + * data structures to represent those definitions. These form the protobuf + * schema, and are used extensively throughout upb: * - upb_msgdef: describes a "message" construct. * - upb_fielddef: describes a message field. * - upb_enumdef: describes an enum. * (TODO: definitions of extensions and services). * * Defs are obtained from a upb_symtab object. A upb_symtab is empty when - * constructed, and definitions can be added by supplying serialized - * descriptors. + * constructed, and definitions can be added by supplying descriptors. * * Defs are immutable and reference-counted. Symbol tables reference any defs * that are the "current" definitions. If an extension is loaded that adds a diff --git a/core/upb_stream.h b/core/upb_stream.h index d0045cc..09e4025 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -1,23 +1,46 @@ /* * upb - a minimalist implementation of protocol buffers. * - * This file defines four general-purpose streaming interfaces for protobuf - * data or bytes: + * This file defines four general-purpose streaming data interfaces. * - * - upb_src: pull interface for protobuf data. - * - upb_sink: push interface for protobuf data. - * - upb_bytesrc: pull interface for bytes. - * - upb_bytesink: push interface for bytes. + * - upb_handlers: represents a set of callbacks, very much like in XML's SAX + * API, that a client can register to do a streaming tree traversal over a + * stream of structured protobuf data, without knowing where that data is + * coming from. There is only one upb_handlers type (it is not a virtual + * base class), but the object lets you register any set of handlers. * - * These interfaces are used as general-purpose glue in upb. For example, the - * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + * The upb_handlers interface supports delegation: when entering a submessage, + * you can delegate to another set of upb_handlers instead of handling the + * submessage yourself. This allows upb_handlers objects to *compose* -- you + * can implement a set of upb_handlers without knowing or caring whether this + * is the top-level message or not. * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * The other interfaces are the C equivalent of "virtual base classes" that + * anyone can implement: + * + * - upb_src: an interface that represents a source of streaming protobuf data. + * It lets you register a set of upb_handlers, and then call upb_src_run(), + * which pulls the protobuf data from somewhere and then calls the handlers. + * + * - upb_bytesrc: a pull interface for streams of bytes, basically an + * abstraction of read()/fread(), but it avoids copies where possible. + * + * - upb_bytesink: push interface for streams of bytes, basically an + * abstraction of write()/fwrite(), but it avoids copies where possible. + * + * All of the encoders and decoders are based on these generic interfaces, + * which lets you write streaming algorithms that do not depend on a specific + * serialization format; for example, you can write a pretty printer that works + * with input that came from protobuf binary format, protobuf text format, or + * even an in-memory upb_msg -- the pretty printer will not know the + * difference. + * + * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. * */ -#ifndef UPB_SRCSINK_H -#define UPB_SRCSINK_H +#ifndef UPB_STREAM_H +#define UPB_STREAM_H #include "upb.h" @@ -53,8 +76,10 @@ typedef enum { // When returned from a startsubmsg handler, indicates that the submessage // should be handled by a different set of handlers, which have been - // registered on the provided upb_handlers object. May not be returned - // from any other callback. + // registered on the provided upb_handlers object. This allows upb_handlers + // objects to compose; a set of upb_handlers need not know whether it is the + // top-level message or a sub-message. May not be returned from any other + // callback. UPB_DELEGATE, } upb_flow_t; @@ -105,9 +130,19 @@ typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, // // static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, // upb_value val) { -// Called with an unknown value is encountered. +// // Called with an unknown value is encountered. // return UPB_CONTINUE; // } +// +// // Any handlers you don't need can be set to NULL. +// static upb_handlerset handlers = { +// startmsg, +// endmsg, +// value, +// startsubmsg, +// endsubmsg, +// unknownval, +// }; typedef struct { upb_startmsg_handler_t startmsg; upb_endmsg_handler_t endmsg; @@ -128,26 +163,12 @@ INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); // from automatically being converted to strings in the value callback. // INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); -// The closure will be passed to every handler. The status will be used -// only immediately after a handler has returned UPB_STOP. +// The closure will be passed to every handler. The status will be read by the +// upb_src immediately after a handler has returned UPB_BREAK and used as the +// overall upb_src status; it will not be referenced at any other time. INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, upb_status *status); -// An object that transparently handles delegation so that the caller needs -// only follow the protocol as if delegation did not exist. -struct _upb_dispatcher; -typedef struct _upb_dispatcher upb_dispatcher; -INLINE void upb_dispatcher_init(upb_dispatcher *d); -INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); -INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); -INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, - upb_value val); -INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, - upb_field_number_t fieldnum, upb_value val); - /* upb_src ********************************************************************/ @@ -171,6 +192,24 @@ INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); INLINE void upb_src_run(upb_src *src, upb_status *status); +// A convenience object that a upb_src can use to invoke handlers. It +// transparently handles delegation so that the upb_src needs only follow the +// protocol as if delegation did not exist. +struct _upb_dispatcher; +typedef struct _upb_dispatcher upb_dispatcher; +INLINE void upb_dispatcher_init(upb_dispatcher *d); +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f); +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, + upb_value val); +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val); + /* upb_bytesrc ****************************************************************/ // Reads up to "count" bytes into "buf", returning the total number of bytes @@ -178,16 +217,16 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -// Like upb_bytesrc_read(), but modifies "str" in-place. "str" MUST be newly -// created or just recycled. Returns "false" if no data was returned, either -// due to error or EOF (check status for details). +// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure +// that "str" is created or just recycled. Returns "false" if no data was +// returned, either due to error or EOF (check status for details). // // In comparison to upb_bytesrc_read(), this call can possibly alias existing // string data (which avoids a copy). On the other hand, if the data was *not* // already in an existing string, this copies it into a upb_string, and if the // data needs to be put in a specific range of memory (because eg. you need to // put it into a different kind of string object) then upb_bytesrc_get() could -// be better. +// save you a copy. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, upb_status *status); @@ -206,15 +245,13 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -// Writes up to "count" bytes from "buf", returning the total number of bytes -// written. If <0, indicates error (check upb_bytesink_status() for details). -INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, - upb_strlen_t count); +INLINE bool upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...); -// Puts the given string, which may alias the string data (which avoids a -// copy). Returns the number of bytes that were actually, consumed, which may -// be fewer than were in the string, or <0 on error. -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str); +// Puts the given string, returning true if the operation was successful, otherwise +// check "status" for details. Ownership of the string is *not* passed; if +// the callee wants a reference he must call upb_string_getref() on it. +INLINE bool upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, + upb_status *status); // Returns the current error status for the stream. INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ddefba9..ef655fd 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -139,7 +139,7 @@ INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, return sink->vtbl->write(sink, buf, count); } -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str) { +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { return sink->vtbl->putstr(sink, str); } diff --git a/core/upb_string.h b/core/upb_string.h index 1a7e06b..7d0ae87 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -9,7 +9,9 @@ * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() * wheverever possible, while keeping both CPU and memory overhead low. * Throughout upb there are situations where one wants to reference all or part - * of another string without copying. upb_string provides APIs for doing this. + * of another string without copying. upb_string provides APIs for doing this, + * and allows the referenced string to be kept alive for as long as anyone is + * referencing it. * * Characteristics of upb_string: * - strings are reference-counted. @@ -22,7 +24,8 @@ * Reference-counted strings have recently fallen out of favor because of the * performance impacts of doing thread-safe reference counting with atomic * operations. We side-step this issue by not performing atomic operations - * unless the string has been marked thread-safe. + * unless the string has been marked thread-safe. Time will tell whether this + * scheme is easy and convenient enough to be practical. * * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched * idiom that we go with it instead of making our pointers uint8_t*. -- cgit v1.2.3