From 6bdbb45e88e7b88b294dfb6e4cb493cbc3c8cf74 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 13 Feb 2011 12:59:54 -0800 Subject: Merged core/ and stream/ -> src/. The split wasn't worth it. --- Makefile | 42 +- core/upb.c | 75 --- core/upb.h | 262 --------- core/upb_atomic.h | 189 ------- core/upb_def.c | 1349 ---------------------------------------------- core/upb_def.h | 362 ------------- core/upb_glue.c | 54 -- core/upb_glue.h | 46 -- core/upb_msg.c | 253 --------- core/upb_msg.h | 232 -------- core/upb_stream.h | 276 ---------- core/upb_stream_vtbl.h | 295 ---------- core/upb_string.c | 143 ----- core/upb_string.h | 360 ------------- core/upb_table.c | 411 -------------- core/upb_table.h | 133 ----- src/upb.c | 75 +++ src/upb.h | 262 +++++++++ src/upb_atomic.h | 189 +++++++ src/upb_decoder.c | 441 +++++++++++++++ src/upb_decoder.h | 86 +++ src/upb_def.c | 1349 ++++++++++++++++++++++++++++++++++++++++++++++ src/upb_def.h | 362 +++++++++++++ src/upb_encoder.c | 420 +++++++++++++++ src/upb_encoder.h | 56 ++ src/upb_glue.c | 54 ++ src/upb_glue.h | 46 ++ src/upb_msg.c | 253 +++++++++ src/upb_msg.h | 232 ++++++++ src/upb_stdio.c | 104 ++++ src/upb_stdio.h | 42 ++ src/upb_stream.h | 276 ++++++++++ src/upb_stream_vtbl.h | 295 ++++++++++ src/upb_string.c | 143 +++++ src/upb_string.h | 360 +++++++++++++ src/upb_strstream.c | 65 +++ src/upb_strstream.h | 65 +++ src/upb_table.c | 411 ++++++++++++++ src/upb_table.h | 133 +++++ src/upb_textprinter.c | 143 +++++ src/upb_textprinter.h | 29 + stream/upb_decoder.c | 441 --------------- stream/upb_decoder.h | 86 --- stream/upb_encoder.c | 420 --------------- stream/upb_encoder.h | 56 -- stream/upb_stdio.c | 104 ---- stream/upb_stdio.h | 42 -- stream/upb_strstream.c | 65 --- stream/upb_strstream.h | 65 --- stream/upb_textprinter.c | 143 ----- stream/upb_textprinter.h | 29 - 51 files changed, 5912 insertions(+), 5912 deletions(-) delete mode 100644 core/upb.c delete mode 100644 core/upb.h delete mode 100644 core/upb_atomic.h delete mode 100644 core/upb_def.c delete mode 100644 core/upb_def.h delete mode 100644 core/upb_glue.c delete mode 100644 core/upb_glue.h delete mode 100644 core/upb_msg.c delete mode 100644 core/upb_msg.h delete mode 100644 core/upb_stream.h delete mode 100644 core/upb_stream_vtbl.h delete mode 100644 core/upb_string.c delete mode 100644 core/upb_string.h delete mode 100644 core/upb_table.c delete mode 100644 core/upb_table.h create mode 100644 src/upb.c create mode 100644 src/upb.h create mode 100644 src/upb_atomic.h create mode 100644 src/upb_decoder.c create mode 100644 src/upb_decoder.h create mode 100644 src/upb_def.c create mode 100644 src/upb_def.h create mode 100644 src/upb_encoder.c create mode 100644 src/upb_encoder.h create mode 100644 src/upb_glue.c create mode 100644 src/upb_glue.h create mode 100644 src/upb_msg.c create mode 100644 src/upb_msg.h create mode 100644 src/upb_stdio.c create mode 100644 src/upb_stdio.h create mode 100644 src/upb_stream.h create mode 100644 src/upb_stream_vtbl.h create mode 100644 src/upb_string.c create mode 100644 src/upb_string.h create mode 100644 src/upb_strstream.c create mode 100644 src/upb_strstream.h create mode 100644 src/upb_table.c create mode 100644 src/upb_table.h create mode 100644 src/upb_textprinter.c create mode 100644 src/upb_textprinter.h delete mode 100644 stream/upb_decoder.c delete mode 100644 stream/upb_decoder.h delete mode 100644 stream/upb_encoder.c delete mode 100644 stream/upb_encoder.h delete mode 100644 stream/upb_stdio.c delete mode 100644 stream/upb_stdio.h delete mode 100644 stream/upb_strstream.c delete mode 100644 stream/upb_strstream.h delete mode 100644 stream/upb_textprinter.c delete mode 100644 stream/upb_textprinter.h diff --git a/Makefile b/Makefile index a8c47cb..6b0df54 100644 --- a/Makefile +++ b/Makefile @@ -27,9 +27,9 @@ rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $ CC=gcc CXX=g++ CFLAGS=-std=c99 -INCLUDE=-Idescriptor -Icore -Itests -Istream -I. +INCLUDE=-Idescriptor -Isrc -Itests -I. CPPFLAGS=-Wall -Wextra -Wno-missing-field-initializers -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) -LDLIBS=-lpthread core/libupb.a +LDLIBS=-lpthread src/libupb.a ifeq ($(shell uname), Darwin) CPPFLAGS += -I/usr/include/lua5.1 LDFLAGS += -L/usr/local/lib -llua @@ -38,14 +38,14 @@ else LDFLAGS += $(strip $(shell pkg-config --silence-errors --libs lua || pkg-config --libs lua5.1)) endif -LIBUPB=core/libupb.a -LIBUPB_PIC=core/libupb_pic.a -LIBUPB_SHARED=core/libupb.so +LIBUPB=src/libupb.a +LIBUPB_PIC=src/libupb_pic.a +LIBUPB_SHARED=src/libupb.so ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) all: $(ALL) clean: rm -rf $(LIBUPB) $(LIBUPB_PIC) - rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gc*) + rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gcno) $(call rwildcard,,*.dSYM) rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* rm -rf $(TESTS) tests/t.* rm -rf descriptor/descriptor.pb @@ -59,20 +59,20 @@ deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) # The core library -- the absolute minimum you must compile in to successfully # bootstrap. CORE= \ - core/upb.c \ - core/upb_table.c \ - core/upb_string.c \ - core/upb_def.c \ + src/upb.c \ + src/upb_table.c \ + src/upb_string.c \ + src/upb_def.c \ descriptor/descriptor.c # Common encoders/decoders and upb_msg -- you're almost certain to want these. STREAM= \ - stream/upb_decoder.c \ - stream/upb_stdio.c \ - stream/upb_textprinter.c \ - stream/upb_strstream.c \ - core/upb_msg.c \ - core/upb_glue.c \ + src/upb_decoder.c \ + src/upb_stdio.c \ + src/upb_textprinter.c \ + src/upb_strstream.c \ + src/upb_msg.c \ + src/upb_glue.c \ SRC=$(CORE) $(STREAM) @@ -81,13 +81,13 @@ $(SRC): perf-cppflags OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. -core/upb_def.o: core/upb_def.c +src/upb_def.o: src/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -core/upb_def.lo: core/upb_def.c +src/upb_def.lo: src/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC lang_ext/lua/upb.so: lang_ext/lua/upb.lo - $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a + $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< src/libupb_pic.a STATICOBJ=$(patsubst %.c,%.o,$(SRC)) @@ -171,10 +171,10 @@ tests/test_table: tests/test_table.cc # Includes which is a deprecated header. $(CXX) $(CXXFLAGS) $(CPPFLAGS) -Wno-deprecated -o $@ $< $(LIBUPB) -tests/tests: core/libupb.a +tests/tests: src/libupb.a # Tools -tools/upbc: core/libupb.a +tools/upbc: src/libupb.a # Benchmarks #UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table \ diff --git a/core/upb.c b/core/upb.c deleted file mode 100644 index 897ca4e..0000000 --- a/core/upb.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - */ - -#include -#include -#include - -#include "upb.h" -#include "upb_string.h" - -#define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(wire_type, ctype, allows_delimited, inmemory_type) \ - {alignof(ctype), sizeof(ctype), wire_type, \ - (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \ - UPB_TYPE(inmemory_type), #ctype}, - -const upb_type_info upb_types[] = { - {0, 0, 0, 0, 0, ""}, // There is no type 0. - TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1, DOUBLE) // DOUBLE - TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1, FLOAT) // FLOAT - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // INT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1, UINT64) // UINT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // INT32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1, UINT64) // FIXED64 - TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1, UINT32) // FIXED32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1, BOOL) // BOOL - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // STRING - TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0, MESSAGE) // GROUP - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, MESSAGE) // MESSAGE - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // BYTES - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, UINT32) // UINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, ENUM) // ENUM - TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1, INT32) // SFIXED32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1, INT64) // SFIXED64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // SINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // SINT64 -}; - -void upb_seterr(upb_status *status, enum upb_status_code code, - const char *msg, ...) { - status->code = code; - upb_string_recycle(&status->str); - va_list args; - va_start(args, msg); - upb_string_vprintf(status->str, msg, args); - va_end(args); -} - -void upb_copyerr(upb_status *to, upb_status *from) -{ - to->code = from->code; - if(from->str) to->str = upb_string_getref(from->str); -} - -void upb_clearerr(upb_status *status) { - status->code = UPB_OK; - upb_string_recycle(&status->str); -} - -void upb_printerr(upb_status *status) { - if(status->str) { - fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n", - status->code, UPB_STRARG(status->str)); - } else { - fprintf(stderr, "code: %d, no msg\n", status->code); - } -} - -void upb_status_uninit(upb_status *status) { - upb_string_unref(status->str); -} diff --git a/core/upb.h b/core/upb.h deleted file mode 100644 index 837fc52..0000000 --- a/core/upb.h +++ /dev/null @@ -1,262 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file contains shared definitions that are widely used across upb. - */ - -#ifndef UPB_H_ -#define UPB_H_ - -#include -#include -#include // only for size_t. -#include -#include "descriptor_const.h" -#include "upb_atomic.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// inline if possible, emit standalone code if required. -#ifndef INLINE -#define INLINE static inline -#endif - -#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) -#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) -#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) - -// The maximum that any submessages can be nested. Matches proto2's limit. -#define UPB_MAX_NESTING 64 - -// The maximum number of fields that any one .proto type can have. Note that -// this is very different than the max field number. It is hard to imagine a -// scenario where more than 32k fields makes sense. -#define UPB_MAX_FIELDS (1<<15) -typedef int16_t upb_field_count_t; - -// Nested type names are separated by periods. -#define UPB_SYMBOL_SEPARATOR '.' - -// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType -#define UPB_SYMBOL_MAXLEN 128 - -// The longest chain that mutually-recursive types are allowed to form. For -// example, this is a type cycle of length 2: -// message A { -// B b = 1; -// } -// message B { -// A a = 1; -// } -#define UPB_MAX_TYPE_CYCLE_LEN 16 - -// The maximum depth that the type graph can have. Note that this setting does -// not automatically constrain UPB_MAX_NESTING, because type cycles allow for -// unlimited nesting if we do not limit it. -#define UPB_MAX_TYPE_DEPTH 64 - -// The biggest possible single value is a 10-byte varint. -#define UPB_MAX_ENCODED_SIZE 10 - - -/* Fundamental types and type constants. **************************************/ - -// A list of types as they are encoded on-the-wire. -enum upb_wire_type { - UPB_WIRE_TYPE_VARINT = 0, - UPB_WIRE_TYPE_64BIT = 1, - UPB_WIRE_TYPE_DELIMITED = 2, - UPB_WIRE_TYPE_START_GROUP = 3, - UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5, - - // This isn't a real wire type, but we use this constant to describe varints - // that are expected to be a maximum of 32 bits. - UPB_WIRE_TYPE_32BIT_VARINT = 8 -}; - -typedef uint8_t upb_wire_type_t; - -// Type of a field as defined in a .proto file. eg. string, int32, etc. The -// integers that represent this are defined by descriptor.proto. Note that -// descriptor.proto reserves "0" for errors, and we use it to represent -// exceptional circumstances. -typedef uint8_t upb_fieldtype_t; - -// For referencing the type constants tersely. -#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type -#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type - -// Info for a given field type. -typedef struct { - uint8_t align; - uint8_t size; - upb_wire_type_t native_wire_type; - uint8_t allowed_wire_types; // For packable fields, also allows delimited. - uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32 - char *ctype; -} upb_type_info; - -// A static array of info about all of the field types, indexed by type number. -extern const upb_type_info upb_types[]; - -// The number of a field, eg. "optional string foo = 3". -typedef int32_t upb_field_number_t; - -// Label (optional, repeated, required) as defined in a .proto file. The -// values of this are defined by google.protobuf.FieldDescriptorProto.Label -// (from descriptor.proto). -typedef uint8_t upb_label_t; - -// A scalar (non-string) wire value. Used only for parsing unknown fields. -typedef union { - uint64_t varint; - uint64_t _64bit; - uint32_t _32bit; -} upb_wire_value; - -/* Polymorphic values of .proto types *****************************************/ - -struct _upb_string; -typedef struct _upb_string upb_string; -struct _upb_array; -typedef struct _upb_array upb_array; -struct _upb_msg; -typedef struct _upb_msg upb_msg; -struct _upb_bytesrc; -typedef struct _upb_bytesrc upb_bytesrc; - -typedef int32_t upb_strlen_t; -#define UPB_STRLEN_MAX INT32_MAX - -// The type of a upb_value. This is like a upb_fieldtype_t, but adds the -// constant UPB_VALUETYPE_ARRAY to represent an array. -typedef uint8_t upb_valuetype_t; -#define UPB_VALUETYPE_ARRAY 32 -#define UPB_VALUETYPE_BYTESRC 32 -#define UPB_VALUETYPE_RAW 33 - -// A single .proto value. The owner must have an out-of-band way of knowing -// the type, so that it knows which union member to use. -typedef struct { - union { - double _double; - float _float; - int32_t int32; - int64_t int64; - uint32_t uint32; - uint64_t uint64; - bool _bool; - upb_string *str; - upb_bytesrc *bytesrc; - upb_msg *msg; - upb_array *arr; - upb_atomic_refcount_t *refcount; - void *_void; - } val; - - // In debug mode we carry the value type around also so we can check accesses - // to be sure the right member is being read. -#ifndef NDEBUG - upb_valuetype_t type; -#endif -} upb_value; - -#ifdef NDEBUG -#define SET_TYPE(dest, val) -#else -#define SET_TYPE(dest, val) dest = val -#endif - -#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ - INLINE ctype upb_value_get ## name(upb_value val) { \ - assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ - return val.val.membername; \ - } \ - INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ - SET_TYPE(val->type, proto_type); \ - val->val.membername = cval; \ - } -UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE)); -UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT)); -UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32)); -UPB_VALUE_ACCESSORS(enumval, int32, int32_t, UPB_TYPE(ENUM)); -UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); -UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); -UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); -UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); -UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); -UPB_VALUE_ACCESSORS(msg, msg, upb_msg*, UPB_TYPE(MESSAGE)); -UPB_VALUE_ACCESSORS(arr, arr, upb_array*, UPB_VALUETYPE_ARRAY); -UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); - -INLINE void upb_value_setraw(upb_value *val, uint64_t cval) { - SET_TYPE(val->type, UPB_VALUETYPE_RAW); - val->val.uint64 = cval; -} - -INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) { - assert(val.type == UPB_TYPE(MESSAGE) || - val.type == UPB_TYPE(STRING) || - val.type == UPB_VALUETYPE_ARRAY); - return val.val.refcount; -} - -// Status codes used as a return value. Codes >0 are not fatal and can be -// resumed. -enum upb_status_code { - // The operation completed successfully. - UPB_OK = 0, - - // The bytesrc is at EOF and all data was read successfully. - UPB_EOF = 1, - - // A read or write from a streaming src/sink could not be completed right now. - UPB_TRYAGAIN = 2, - - // An unrecoverable error occurred. - UPB_ERROR = -1, - - // A recoverable error occurred (for example, data of the wrong type was - // encountered which we can skip over). - // UPB_STATUS_RECOVERABLE_ERROR = -2 -}; - -// TODO: consider adding error space and code, to let ie. errno be stored -// as a proper code, or application-specific error codes. -struct _upb_status { - char code; - upb_string *str; -}; - -typedef struct _upb_status upb_status; - -#define UPB_STATUS_INIT {UPB_OK, NULL} -#define UPB_ERRORMSG_MAXLEN 256 - -INLINE bool upb_ok(upb_status *status) { - return status->code == UPB_OK; -} - -INLINE void upb_status_init(upb_status *status) { - status->code = UPB_OK; - status->str = NULL; -} - -void upb_status_uninit(upb_status *status); - -void upb_printerr(upb_status *status); -void upb_clearerr(upb_status *status); -void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, - ...); -void upb_copyerr(upb_status *to, upb_status *from); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_H_ */ diff --git a/core/upb_atomic.h b/core/upb_atomic.h deleted file mode 100644 index 1cd848b..0000000 --- a/core/upb_atomic.h +++ /dev/null @@ -1,189 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * Only a very small part of upb is thread-safe. Notably, individual - * messages, arrays, and strings are *not* thread safe for mutating. - * However, we do make message *metadata* such as upb_msgdef and - * upb_context thread-safe, and their ownership is tracked via atomic - * refcounting. This header implements the small number of atomic - * primitives required to support this. The primitives we implement - * are: - * - * - a reader/writer lock (wrappers around platform-provided mutexes). - * - an atomic refcount. - */ - -#ifndef UPB_ATOMIC_H_ -#define UPB_ATOMIC_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* inline if possible, emit standalone code if required. */ -#ifndef INLINE -#define INLINE static inline -#endif - -#ifdef UPB_THREAD_UNSAFE - -/* Non-thread-safe implementations. ******************************************/ - -typedef struct { - int v; -} upb_atomic_refcount_t; - -INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - a->v = val; -} - -INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return a->v++ == 0; -} - -INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return --a->v == 0; -} - -INLINE int upb_atomic_read(upb_atomic_refcount_t *a) { - return a->v; -} - -INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) { - a->v += val; - return a->v == 0; -} - -INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) { - int ret = a->v; - a->v += val; - return ret; -} - -#endif - -/* Atomic refcount ************************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -/* Already defined above. */ - -#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 - -/* GCC includes atomic primitives. */ - -typedef struct { - volatile int v; -} upb_atomic_refcount_t; - -INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - a->v = val; - __sync_synchronize(); /* Ensure the initialized value is visible. */ -} - -INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return __sync_fetch_and_add(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) { - return __sync_add_and_fetch(&a->v, n) == 0; -} - -INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return __sync_sub_and_fetch(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) { - return __sync_fetch_and_add(&a->v, 0); -} - -#elif defined(WIN32) - -/* Windows defines atomic increment/decrement. */ -#include - -typedef struct { - volatile LONG val; -} upb_atomic_refcount_t; - -INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - InterlockedExchange(&a->val, val); -} - -INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return InterlockedIncrement(&a->val) == 1; -} - -INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return InterlockedDecrement(&a->val) == 0; -} - -#else -#error Atomic primitives not defined for your platform/CPU. \ - Implement them or compile with UPB_THREAD_UNSAFE. -#endif - -INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) { - return upb_atomic_read(a) == 1; -} - -/* Reader/Writer lock. ********************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -typedef struct { -} upb_rwlock_t; - -INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } - -#elif defined(UPB_USE_PTHREADS) - -#include - -typedef struct { - pthread_rwlock_t lock; -} upb_rwlock_t; - -INLINE void upb_rwlock_init(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_init(&l->lock, NULL); -} - -INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_destroy(&l->lock); -} - -INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_rdlock(&l->lock); -} - -INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_wrlock(&l->lock); -} - -INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_unlock(&l->lock); -} - -#else -#error Reader/writer lock is not defined for your platform/CPU. \ - Implement it or compile with UPB_THREAD_UNSAFE. -#endif - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_ATOMIC_H_ */ diff --git a/core/upb_def.c b/core/upb_def.c deleted file mode 100644 index 651afc1..0000000 --- a/core/upb_def.c +++ /dev/null @@ -1,1349 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include -#include "descriptor_const.h" -#include "descriptor.h" -#include "upb_def.h" - -#define alignof(t) offsetof(struct { char c; t x; }, x) - -/* Rounds p up to the next multiple of t. */ -static size_t upb_align_up(size_t val, size_t align) { - return val % align == 0 ? val : val + align - (val % align); -} - -static int upb_div_round_up(int numerator, int denominator) { - /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ - return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; -} - -/* Joins strings together, for example: - * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" - * join("", "Baz") -> "Baz" - * Caller owns a ref on the returned string. */ -static upb_string *upb_join(upb_string *base, upb_string *name) { - if (!base || upb_string_len(base) == 0) { - return upb_string_getref(name); - } else { - return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, - UPB_STRARG(base), UPB_STRARG(name)); - } -} - -/* Search for a character in a string, in reverse. */ -static int my_memrchr(char *data, char c, size_t len) -{ - int off = len-1; - while(off > 0 && data[off] != c) --off; - return off; -} - -/* upb_def ********************************************************************/ - -// Defs are reference counted, but can have cycles when types are -// self-recursive or mutually recursive, so we need to be capable of collecting -// the cycles. In our situation defs are immutable (so cycles cannot be -// created or destroyed post-initialization). We need to be thread-safe but -// want to avoid locks if at all possible and rely only on atomic operations. -// -// Our scheme is as follows. First we give each def a flag indicating whether -// it is part of a cycle or not. Because defs are immutable, this flag will -// never change. For acyclic defs, we can use a naive algorithm and avoid the -// overhead of dealing with cycles. Most defs will be acyclic, and most cycles -// will be very short. -// -// For defs that participate in cycles we keep two reference counts. One -// tracks references that come from outside the cycle (we call these external -// references), and is incremented and decremented like a regular refcount. -// The other is a cycle refcount, and works as follows. Every cycle is -// considered distinct, even if two cycles share members. For example, this -// graph has two distinct cycles: -// -// A-->B-->C -// ^ | | -// +---+---+ -// -// The cycles in this graph are AB and ABC. When A's external refcount -// transitions from 0->1, we say that A takes "cycle references" on both -// cycles. Taking a cycle reference means incrementing the cycle refcount of -// all defs in the cycle. Since A and B are common to both cycles, A and B's -// cycle refcounts will be incremented by two, and C's will be incremented by -// one. Likewise, when A's external refcount transitions from 1->0, we -// decrement A and B's cycle refcounts by two and C's by one. We collect a -// cyclic type when its cycle refcount drops to zero. A precondition for this -// is that the external refcount has dropped to zero also. -// -// This algorithm is relatively cheap, since it only requires extra work when -// the external refcount on a cyclic type transitions from 0->1 or 1->0. - -static void upb_msgdef_free(upb_msgdef *m); -static void upb_enumdef_free(upb_enumdef *e); -static void upb_unresolveddef_free(struct _upb_unresolveddef *u); - -static void upb_def_free(upb_def *def) -{ - switch(def->type) { - case UPB_DEF_MSG: - upb_msgdef_free(upb_downcast_msgdef(def)); - break; - case UPB_DEF_ENUM: - upb_enumdef_free(upb_downcast_enumdef(def)); - break; - case UPB_DEF_SVC: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_UNRESOLVED: - upb_unresolveddef_free(upb_downcast_unresolveddef(def)); - break; - default: - assert(false); - } -} - -// Depth-first search for all cycles that include cycle_base. Returns the -// number of paths from def that lead to cycle_base, which is equivalent to the -// number of cycles def is in that include cycle_base. -// -// open_defs tracks the set of nodes that are currently being visited in the -// search so we can stop the search if we detect a cycles that do not involve -// cycle_base. We can't color the nodes as we go by writing to a member of the -// def, because another thread could be performing the search concurrently. -static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, - upb_msgdef **open_defs, int num_open_defs, - bool ref) { - bool found = false; - for(int i = 0; i < num_open_defs; i++) { - if(open_defs[i] == m) { - // We encountered a cycle that did not involve cycle_base. - found = true; - break; - } - } - - if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) { - return 0; - } else if(m == cycle_base) { - return 1; - } else { - int path_count = 0; - if(cycle_base == NULL) { - cycle_base = m; - } else { - open_defs[num_open_defs++] = m; - } - upb_msg_iter iter = upb_msg_begin(m); - for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) { - upb_fielddef *f = upb_msg_iter_field(iter); - upb_def *def = f->def; - if(upb_issubmsg(f) && def->is_cyclic) { - upb_msgdef *sub_m = upb_downcast_msgdef(def); - path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs, - num_open_defs, ref); - } - } - if(ref) { - upb_atomic_add(&m->cycle_refcount, path_count); - } else { - if(upb_atomic_add(&m->cycle_refcount, -path_count)) - upb_def_free(UPB_UPCAST(m)); - } - return path_count; - } -} - -void _upb_def_reftozero(upb_def *def) { - if(def->is_cyclic) { - upb_msgdef *m = upb_downcast_msgdef(def); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false); - } else { - upb_def_free(def); - } -} - -void _upb_def_cyclic_ref(upb_def *def) { - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); -} - -static void upb_def_init(upb_def *def, upb_deftype type) { - def->type = type; - def->is_cyclic = 0; // We detect this later, after resolving refs. - def->search_depth = 0; - def->fqname = NULL; - upb_atomic_refcount_init(&def->refcount, 1); -} - -static void upb_def_uninit(upb_def *def) { - upb_string_unref(def->fqname); -} - - -/* upb_defbuilder ************************************************************/ - -// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in -// the format defined in descriptor.proto. The output of a upb_defbuilder is -// a list of upb_def* that possibly contain unresolved references. -// -// We use a separate object (upb_defbuilder) instead of having the defs handle -// the parse themselves because we need to store state that is only necessary -// during the building process itself. - -// When we are bootstrapping descriptor.proto, we must help the bare decoder out -// by telling it when to descend into a submessage, because with the wire format -// alone we cannot tell the difference between a submessage and a string. -// -// TODO: In the long-term, we should bootstrap from a serialization format that -// contains this information, so we can remove this special-case code. This -// would involve defining a serialization format very similar to the existing -// protobuf format, but that contains more information about the wire type. -#define BEGIN_SUBMSG 100 - -// upb_deflist: A little dynamic array for storing a growing list of upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -static void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); - l->len = 0; -} - -static void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) - if(l->defs[i]) upb_def_unref(l->defs[i]); - free(l->defs); -} - -static void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); - } - l->defs[l->len++] = d; -} - -static upb_def *upb_deflist_last(upb_deflist *l) { - return l->defs[l->len-1]; -} - -// Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - for(uint32_t i = start; i < l->len; i++) { - upb_def *def = l->defs[i]; - upb_string *name = def->fqname; - def->fqname = upb_join(str, name); - upb_string_unref(name); - } -} - -// We keep a stack of all the messages scopes we are currently in, as well as -// the top-level file scope. This is necessary to correctly qualify the -// definitions that are contained inside. "name" tracks the name of the -// message or package (a bare name -- not qualified by any enclosing scopes). -typedef struct { - upb_string *name; - // Index of the first def that is under this scope. For msgdefs, the - // msgdef itself is at start-1. - int start; -} upb_defbuilder_frame; - -struct _upb_defbuilder { - upb_deflist defs; - upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; - int stack_len; - upb_status status; - - uint32_t number; - upb_string *name; - bool saw_number; - bool saw_name; - - upb_fielddef *f; -}; -typedef struct _upb_defbuilder upb_defbuilder; - -// Forward declares for top-level file descriptors. -static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h); -static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, - upb_handlers *h); - - -static void upb_defbuilder_init(upb_defbuilder *b) { - upb_deflist_init(&b->defs); - upb_status_init(&b->status); - b->stack_len = 0; - b->name = NULL; -} - -static void upb_defbuilder_uninit(upb_defbuilder *b) { - upb_string_unref(b->name); - upb_status_uninit(&b->status); - upb_deflist_uninit(&b->defs); -} - -static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { - if (b->stack_len <= 1) return NULL; - int index = b->stack[b->stack_len-1].start - 1; - assert(index >= 0); - return upb_downcast_msgdef(b->defs.defs[index]); -} - -static upb_def *upb_defbuilder_last(upb_defbuilder *b) { - return upb_deflist_last(&b->defs); -} - -// Start/end handlers for FileDescriptorProto and DescriptorProto (the two -// entities that have names and can contain sub-definitions. -void upb_defbuilder_startcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = &b->stack[b->stack_len++]; - f->start = b->defs.len; - f->name = NULL; -} - -void upb_defbuilder_endcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = &b->stack[--b->stack_len]; - upb_deflist_qualify(&b->defs, f->name, f->start); - upb_string_unref(f->name); -} - -void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { - upb_defbuilder_frame *f = &b->stack[b->stack_len-1]; - upb_string_unref(f->name); - f->name = upb_string_getref(str); -} - -// Handlers for google.protobuf.FileDescriptorProto. -static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_defbuilder_startcontainer(b); - return UPB_CONTINUE; -} - -static upb_flow_t upb_defbuilder_FileDescriptorProto_endmsg(void *_b) { - upb_defbuilder *b = _b; - upb_defbuilder_endcontainer(b); - return UPB_CONTINUE; -} - -static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, - upb_fielddef *f, - upb_value val) { - upb_defbuilder *b = _b; - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: - upb_defbuilder_setscopename(b, upb_value_getstr(val)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - return BEGIN_SUBMSG; - } - return UPB_CONTINUE; -} - -static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( - void *_b, upb_fielddef *f, upb_handlers *h) { - upb_defbuilder *b = _b; - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - upb_msgdef_register_DescriptorProto(b, h); - return UPB_DELEGATE; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_enumdef_register_EnumDescriptorProto(b, h); - return UPB_DELEGATE; - default: - // TODO: services and extensions. - return UPB_SKIPSUBMSG; - } -} - -static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, - upb_handlers *h) { - static upb_handlerset handlers = { - &upb_defbuilder_FileDescriptorProto_startmsg, - &upb_defbuilder_FileDescriptorProto_endmsg, - &upb_defbuilder_FileDescriptorProto_value, - &upb_defbuilder_FileDescriptorProto_startsubmsg, - }; - upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b, &b->status); -} - -// Handlers for google.protobuf.FileDescriptorSet. -static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, - upb_fielddef *f, - upb_value val) { - (void)b; - (void)val; - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - return BEGIN_SUBMSG; - } - return UPB_CONTINUE; -} - -static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( - void *_b, upb_fielddef *f, upb_handlers *h) { - upb_defbuilder *b = _b; - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - upb_defbuilder_register_FileDescriptorProto(b, h); - return UPB_DELEGATE; - } - return UPB_SKIPSUBMSG; -} - -static void upb_defbuilder_register_FileDescriptorSet( - upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset handlers = { - NULL, // startmsg - NULL, // endmsg - &upb_defbuilder_FileDescriptorSet_value, - &upb_defbuilder_FileDescriptorSet_startsubmsg, - }; - upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b, &b->status); -} - - -/* upb_unresolveddef **********************************************************/ - -// Unresolved defs are used as temporary placeholders for a def whose name has -// not been resolved yet. During the name resolution step, all unresolved defs -// are replaced with pointers to the actual def being referenced. -typedef struct _upb_unresolveddef { - upb_def base; - - // The target type name. This may or may not be fully qualified. It is - // tempting to want to use base.fqname for this, but that will be qualified - // which is inappropriate for a name we still have to resolve. - upb_string *name; -} upb_unresolveddef; - -// Is passed a ref on the string. -static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { - upb_unresolveddef *def = malloc(sizeof(*def)); - upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = upb_string_getref(str); - return def; -} - -static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { - upb_string_unref(def->name); - upb_def_uninit(&def->base); - free(def); -} - - -/* upb_enumdef ****************************************************************/ - -static void upb_enumdef_free(upb_enumdef *e) { - upb_enum_iter i; - for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { - // Frees the ref taken when the string was parsed. - upb_string_unref(upb_enum_iter_name(i)); - } - upb_strtable_free(&e->ntoi); - upb_inttable_free(&e->iton); - upb_def_uninit(&e->base); - free(e); -} - -// google.protobuf.EnumValueDescriptorProto. -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { - upb_defbuilder *b = _b; - b->saw_number = false; - b->saw_name = false; - return UPB_CONTINUE; -} - -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, - upb_fielddef *f, - upb_value val) { - upb_defbuilder *b = _b; - switch(f->number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(b->name); - b->name = upb_string_getref(upb_value_getstr(val)); - b->saw_name = true; - break; - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - b->number = upb_value_getint32(val); - b->saw_number = true; - break; - default: - break; - } - return UPB_CONTINUE; -} - -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { - upb_defbuilder *b = _b; - if(!b->saw_number || !b->saw_name) { - upb_seterr(&b->status, UPB_ERROR, "Enum value missing name or number."); - return UPB_BREAK; - } - upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; - upb_iton_ent iton_ent = {{b->number, 0}, b->name}; - upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, &iton_ent.e); - // We don't unref "name" because we pass our ref to the iton entry of the - // table. strtables can ref their keys, but the inttable doesn't know that - // the value is a string. - b->name = NULL; - return UPB_CONTINUE; -} - -static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, - upb_handlers *h) { - static upb_handlerset handlers = { - &upb_enumdef_EnumValueDescriptorProto_startmsg, - &upb_enumdef_EnumValueDescriptorProto_endmsg, - &upb_enumdef_EnumValueDescriptorProto_value, - }; - upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b, &b->status); -} - -// google.protobuf.EnumDescriptorProto. -static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); - upb_deflist_push(&b->defs, UPB_UPCAST(e)); - return UPB_CONTINUE; -} - -static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { - (void)_b; - assert(upb_defbuilder_last((upb_defbuilder*)_b)->fqname != NULL); - return UPB_CONTINUE; -} - -static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, - upb_fielddef *f, - upb_value val) { - upb_defbuilder *b = _b; - switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: { - upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); - upb_string_unref(e->base.fqname); - e->base.fqname = upb_string_getref(upb_value_getstr(val)); - return UPB_CONTINUE; - } - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: - return BEGIN_SUBMSG; - default: - return UPB_CONTINUE; - } -} - -static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(void *_b, - upb_fielddef *f, - upb_handlers *h) { - upb_defbuilder *b = _b; - switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: - upb_enumdef_register_EnumValueDescriptorProto(b, h); - return UPB_DELEGATE; - default: - return UPB_SKIPSUBMSG; - } -} - -static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, - upb_handlers *h) { - static upb_handlerset handlers = { - &upb_enumdef_EnumDescriptorProto_startmsg, - &upb_enumdef_EnumDescriptorProto_endmsg, - &upb_enumdef_EnumDescriptorProto_value, - &upb_enumdef_EnumDescriptorProto_startsubmsg, - }; - upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b, &b->status); -} - -upb_enum_iter upb_enum_begin(upb_enumdef *e) { - // We could iterate over either table here; the choice is arbitrary. - return upb_inttable_begin(&e->iton); -} - -upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { - assert(iter); - return upb_inttable_next(&e->iton, &iter->e); -} - -upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) { - upb_iton_ent *e = - (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); - return e ? e->string : NULL; -} - - -/* upb_fielddef ***************************************************************/ - -static void upb_fielddef_free(upb_fielddef *f) { - upb_string_unref(f->name); - if(f->owned) { - upb_def_unref(f->def); - } - free(f); -} - -static upb_flow_t upb_fielddef_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_fielddef *f = malloc(sizeof(*f)); - f->number = -1; - f->name = NULL; - f->def = NULL; - f->owned = false; - f->msgdef = upb_defbuilder_top(b); - b->f = f; - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_endmsg(void *_b) { - upb_defbuilder *b = _b; - upb_fielddef *f = b->f; - // TODO: verify that all required fields were present. - assert(f->number != -1 && f->name != NULL); - assert((f->def != NULL) == upb_hasdef(f)); - - // Field was successfully read, add it as a field of the msgdef. - upb_msgdef *m = upb_defbuilder_top(b); - upb_itof_ent itof_ent = {{f->number, 0}, f}; - upb_ntof_ent ntof_ent = {{f->name, 0}, f}; - upb_inttable_insert(&m->itof, &itof_ent.e); - upb_strtable_insert(&m->ntof, &ntof_ent.e); - return UPB_CONTINUE; -} - -static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { - upb_defbuilder *b = _b; - switch(f->number) { - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - b->f->type = upb_value_getenumval(val); - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - b->f->label = upb_value_getenumval(val); - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - b->f->number = upb_value_getint32(val); - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(b->f->name); - b->f->name = upb_string_getref(upb_value_getstr(val)); - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_def_unref(b->f->def); - b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); - b->f->owned = true; - break; - } - } - return UPB_CONTINUE; -} - -static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, - upb_handlers *h) { - static upb_handlerset handlers = { - &upb_fielddef_startmsg, - &upb_fielddef_endmsg, - &upb_fielddef_value, - }; - upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b, &b->status); -} - - -/* upb_msgdef *****************************************************************/ - -static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) { - // Sort by data size (ascending) to reduce padding. - size_t size1 = upb_types[f1->type].size; - size_t size2 = upb_types[f2->type].size; - if (size1 != size2) return size1 - size2; - // Otherwise return in number order (just so we get a reproduceable order. - return f1->number - f2->number; -} - -static int upb_compare_fields(const void *f1, const void *f2) { - return upb_compare_typed_fields(*(void**)f1, *(void**)f2); -} - -// google.protobuf.DescriptorProto. -static upb_flow_t upb_msgdef_startmsg(void *_b) { - upb_defbuilder *b = _b; - upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG); - upb_atomic_refcount_init(&m->cycle_refcount, 0); - upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); - upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); - upb_deflist_push(&b->defs, UPB_UPCAST(m)); - upb_defbuilder_startcontainer(b); - return UPB_CONTINUE; -} - -static upb_flow_t upb_msgdef_endmsg(void *_b) { - upb_defbuilder *b = _b; - upb_msgdef *m = upb_defbuilder_top(b); - if(!m->base.fqname) { - upb_seterr(&b->status, UPB_ERROR, "Encountered message with no name."); - return UPB_BREAK; - } - - // Create an ordering over the fields. - upb_field_count_t n = upb_msgdef_numfields(m); - upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); - upb_field_count_t field = 0; - upb_msg_iter i; - for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - sorted_fields[field++]= upb_msg_iter_field(i); - } - qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); - - // Assign offsets in the msg. - m->set_flags_bytes = upb_div_round_up(n, 8); - m->size = sizeof(upb_atomic_refcount_t) + m->set_flags_bytes; - - size_t max_align = 0; - for (int i = 0; i < n; i++) { - upb_fielddef *f = sorted_fields[i]; - const upb_type_info *type_info = &upb_types[f->type]; - - // This identifies the set bit. When we implement is_initialized (a - // general check about whether all required bits are set) we will probably - // want to use a different ordering that puts all the required bits - // together. - f->field_index = i; - f->set_bit_mask = 1 << (i % 8); - f->set_bit_offset = i / 8; - - size_t size, align; - if (upb_isarray(f)) { - size = sizeof(void*); - align = alignof(void*); - } else { - size = type_info->size; - align = type_info->align; - } - // General alignment rules are: each member must be at an address that is a - // multiple of that type's alignment. Also, the size of the structure as a - // whole must be a multiple of the greatest alignment of any member. - size_t offset = upb_align_up(m->size, align); - // Offsets are relative to the end of the refcount. - f->byte_offset = offset - sizeof(upb_atomic_refcount_t); - m->size = offset + size; - max_align = UPB_MAX(max_align, align); - } - free(sorted_fields); - - if (max_align > 0) m->size = upb_align_up(m->size, max_align); - - upb_defbuilder_endcontainer(b); - return UPB_CONTINUE; -} - -static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { - upb_defbuilder *b = _b; - switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: { - upb_msgdef *m = upb_defbuilder_top(b); - upb_string_unref(m->base.fqname); - m->base.fqname = upb_string_getref(upb_value_getstr(val)); - upb_defbuilder_setscopename(b, upb_value_getstr(val)); - return UPB_CONTINUE; - } - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - return BEGIN_SUBMSG; - default: - // TODO: extensions. - return UPB_CONTINUE; - } -} - -static upb_flow_t upb_msgdef_startsubmsg(void *_b, upb_fielddef *f, - upb_handlers *h) { - upb_defbuilder *b = _b; - switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - upb_fielddef_register_FieldDescriptorProto(b, h); - return UPB_DELEGATE; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: - upb_msgdef_register_DescriptorProto(b, h); - return UPB_DELEGATE; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_enumdef_register_EnumDescriptorProto(b, h); - return UPB_DELEGATE; - break; - default: - return UPB_SKIPSUBMSG; - } -} - -static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, - upb_handlers *h) { - static upb_handlerset handlers = { - &upb_msgdef_startmsg, - &upb_msgdef_endmsg, - &upb_msgdef_value, - &upb_msgdef_startsubmsg, - }; - upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b, &b->status); -} - -static void upb_msgdef_free(upb_msgdef *m) -{ - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) - upb_fielddef_free(upb_msg_iter_field(i)); - upb_strtable_free(&m->ntof); - upb_inttable_free(&m->itof); - upb_def_uninit(&m->base); - free(m); -} - -static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { - (void)m; - if(f->owned) upb_def_unref(f->def); - f->def = def; - // We will later make the ref unowned if it is a part of a cycle. - f->owned = true; - upb_def_ref(def); -} - -upb_msg_iter upb_msg_begin(upb_msgdef *m) { - return upb_inttable_begin(&m->itof); -} - -upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { - return upb_inttable_next(&m->itof, &iter->e); -} - -/* upb_symtab adding defs *****************************************************/ - -// This is a self-contained group of functions that, given a list of upb_defs -// whose references are not yet resolved, resolves references and adds them -// atomically to a upb_symtab. - -typedef struct { - upb_strtable_entry e; - upb_def *def; -} upb_symtab_ent; - -// Given a symbol and the base symbol inside which it is defined, find the -// symbol's definition in t. -static upb_symtab_ent *upb_resolve(upb_strtable *t, - upb_string *base, upb_string *sym) -{ - if(upb_string_len(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN || - upb_string_len(sym) == 0) return NULL; - - if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) { - // Symbols starting with '.' are absolute, so we do a single lookup. - // Slice to omit the leading '.' - upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1); - upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); - upb_string_unref(sym_str); - return e; - } else { - // Remove components from base until we find an entry or run out. - // TODO: This branch is totally broken, but currently not used. - upb_string *sym_str = upb_string_new(); - int baselen = upb_string_len(base); - while(1) { - // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym - upb_strlen_t len = baselen + upb_string_len(sym) + 1; - char *buf = upb_string_getrwbuf(sym_str, len); - memcpy(buf, upb_string_getrobuf(base), baselen); - buf[baselen] = UPB_SYMBOL_SEPARATOR; - memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym)); - - upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); - if (e) return e; - else if(baselen == 0) return NULL; // No more scopes to try. - - baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); - } - } -} - -// Performs a pass over the type graph to find all cycles that include m. -static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) -{ - if(depth > UPB_MAX_TYPE_DEPTH) { - // We have found a non-cyclic path from the base of the type tree that - // exceeds the maximum allowed depth. There are many situations in upb - // where we recurse over the type tree (like for example, right now) and an - // absurdly deep tree could cause us to stack overflow on systems with very - // limited stacks. - upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at " - "depth %d in the type graph, which exceeds the maximum type " - "depth of %d.", UPB_UPCAST(m)->fqname, depth, - UPB_MAX_TYPE_DEPTH); - return false; - } else if(UPB_UPCAST(m)->search_depth == 1) { - // Cycle! - int cycle_len = depth - 1; - if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved " - "in a cycle of length %d, which exceeds the maximum type " - "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, - UPB_MAX_TYPE_CYCLE_LEN); - return false; - } - return true; - } else if(UPB_UPCAST(m)->search_depth > 0) { - // This was a cycle, but did not originate from the base of our search tree. - // We'll find it when we call find_cycles() on this node directly. - return false; - } else { - UPB_UPCAST(m)->search_depth = ++depth; - bool cycle_found = false; - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if(!upb_issubmsg(f)) continue; - upb_def *sub_def = f->def; - upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); - if(upb_symtab_findcycles(sub_m, depth, status)) { - cycle_found = true; - UPB_UPCAST(m)->is_cyclic = true; - if(f->owned) { - upb_atomic_unref(&sub_def->refcount); - f->owned = false; - } - } - } - UPB_UPCAST(m)->search_depth = 0; - return cycle_found; - } -} - -// Given a table of pending defs "tmptab" and a table of existing defs "symtab", -// resolves all of the unresolved refs for the defs in tmptab. -bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, - upb_status *status) -{ - upb_symtab_ent *e; - for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - // Type names are resolved relative to the message in which they appear. - upb_string *base = e->e.key; - - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if(!upb_hasdef(f)) continue; // No resolving necessary. - upb_string *name = upb_downcast_unresolveddef(f->def)->name; - - // Resolve from either the tmptab (pending adds) or symtab (existing - // defs). If both exist, prefer the pending add, because it will be - // overwriting the existing def. - upb_symtab_ent *found; - if(!(found = upb_resolve(tmptab, base, name)) && - !(found = upb_resolve(symtab, base, name))) { - upb_seterr(status, UPB_ERROR, - "could not resolve symbol '" UPB_STRFMT "'" - " in context '" UPB_STRFMT "'", - UPB_STRARG(name), UPB_STRARG(base)); - return false; - } - - // Check the type of the found def. - upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(found->def->type != expected) { - upb_seterr(status, UPB_ERROR, "Unexpected type"); - return false; - } - upb_msgdef_resolve(m, f, found->def); - } - } - - // Deal with type cycles. - for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - // The findcycles() call will decrement the external refcount of the - upb_symtab_findcycles(m, 0, status); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); - } - - return true; -} - -// Given a list of defs, a list of extensions (in the future), and a flag -// indicating whether the new defs can overwrite existing defs in the symtab, -// attempts to add the given defs to the symtab. The whole operation either -// succeeds or fails. Ownership of "defs" and "exts" is taken. -bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, - bool allow_redef, upb_status *status) -{ - upb_rwlock_wrlock(&s->lock); - - // Build a table of the defs we mean to add, for duplicate detection and name - // resolution. - upb_strtable tmptab; - upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent)); - for (int i = 0; i < num_defs; i++) { - upb_def *def = defs[i]; - upb_symtab_ent e = {{def->fqname, 0}, def}; - - // Redefinition is never allowed within a single FileDescriptorSet. - // Additionally, we only allow overwriting of an existing definition if - // allow_redef is set. - if (upb_strtable_lookup(&tmptab, def->fqname) || - (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { - upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT, - UPB_STRARG(def->fqname)); - goto err; - } - - // Pass ownership from the deflist to the strtable. - upb_strtable_insert(&tmptab, &e.e); - defs[i] = NULL; - } - - // TODO: process the list of extensions by modifying entries from - // tmptab in-place (copying them from the symtab first if necessary). - - if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err; - - // The defs in tmptab have been vetted, and can be added to the symtab - // without causing errors. Now add all tmptab defs to the symtab, - // overwriting (and releasing a ref on) any existing defs with the same - // names. Ownership for tmptab defs passes from the tmptab to the symtab. - upb_symtab_ent *tmptab_e; - for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e; - tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) { - upb_symtab_ent *symtab_e = - upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); - if(symtab_e) { - upb_def_unref(symtab_e->def); - symtab_e->def = tmptab_e->def; - } else { - upb_strtable_insert(&s->symtab, &tmptab_e->e); - } - } - - upb_rwlock_unlock(&s->lock); - upb_strtable_free(&tmptab); - return true; - -err: - // We need to free all defs from "tmptab." - upb_rwlock_unlock(&s->lock); - for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; - e = upb_strtable_next(&tmptab, &e->e)) { - upb_def_unref(e->def); - } - upb_strtable_free(&tmptab); - for (int i = 0; i < num_defs; i++) upb_def_unref(defs[i]); - return false; -} - - -/* upb_symtab public interface ************************************************/ - -upb_symtab *upb_symtab_new() -{ - upb_symtab *s = malloc(sizeof(*s)); - upb_atomic_refcount_init(&s->refcount, 1); - upb_rwlock_init(&s->lock); - upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); - s->fds_msgdef = NULL; - return s; -} - -static void upb_free_symtab(upb_strtable *t) -{ - upb_symtab_ent *e; - for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) - upb_def_unref(e->def); - upb_strtable_free(t); -} - -void _upb_symtab_free(upb_symtab *s) -{ - upb_free_symtab(&s->symtab); - upb_rwlock_destroy(&s->lock); - free(s); -} - -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) -{ - upb_rwlock_rdlock(&s->lock); - int total = upb_strtable_count(&s->symtab); - // We may only use part of this, depending on how many symbols are of the - // correct type. - upb_def **defs = malloc(sizeof(*defs) * total); - upb_symtab_ent *e = upb_strtable_begin(&s->symtab); - int i = 0; - for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { - upb_def *def = e->def; - assert(def); - if(type == UPB_DEF_ANY || def->type == type) - defs[i++] = def; - } - upb_rwlock_unlock(&s->lock); - *count = i; - for(i = 0; i < *count; i++) - upb_def_ref(defs[i]); - return defs; -} - -upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) -{ - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - - -upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - -void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) -{ - upb_defbuilder b; - upb_defbuilder_init(&b); - upb_handlers handlers; - upb_handlers_init(&handlers); - upb_defbuilder_register_FileDescriptorSet(&b, &handlers); - upb_src_sethandlers(src, &handlers); - upb_src_run(src, status); - if (upb_ok(status)) - upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); - upb_defbuilder_uninit(&b); - upb_handlers_uninit(&handlers); -} - - -/* upb_baredecoder ************************************************************/ - -// upb_baredecoder is a upb_src that can parse a subset of the protocol buffer -// binary format. It is only used for bootstrapping. It can parse without -// having a upb_msgdef, which is why it is useful for bootstrapping the first -// msgdef. On the downside, it does not support: -// -// * having its input span multiple upb_strings. -// * reading any field of the returned upb_fielddef's except f->number. -// * keeping a pointer to the upb_fielddef* and reading it later (the same -// upb_fielddef is reused over and over). -// * detecting errors in the input (we trust that our input is known-good). -// * skipping the rest of the submessage (UPB_SKIPSUBMSG). -// -// It also does not support any of the follow protobuf features: -// * packed fields. -// * groups. -// * zig-zag-encoded types like sint32 and sint64. -// -// Since it cannot tell the difference between submessages and strings, it -// always reports them as strings first, but if the value callback returns -// UPB_TREAT_AS_SUBMSG this signals to the baredecoder that it should be -// treated like a submessage instead. -// -// TODO: for bootstrapping we should define a slightly different wire format -// that includes enough information to know the precise integer types and -// that distinguishes between strings and submessages. This will allow -// us to get rid of the UPB_TREAT_AS_SUBMSG hack. It will also allow us -// to get rid of the upb_value_setraw() scheme, which would be more -// complicated to support on big-endian machines. - -typedef struct { - upb_src src; - upb_string *input; - upb_strlen_t offset; - upb_dispatcher dispatcher; -} upb_baredecoder; - -static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) -{ - const uint8_t *start = (uint8_t*)upb_string_getrobuf(d->input) + d->offset; - const uint8_t *buf = start; - uint8_t last = 0x80; - uint64_t val = 0; - for(int bitpos = 0; (last & 0x80); buf++, bitpos += 7) - val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; - d->offset += buf - start; - return val; -} - -static uint32_t upb_baredecoder_readv32(upb_baredecoder *d) -{ - return (uint32_t)upb_baredecoder_readv64(d); // Truncate. -} - -static uint64_t upb_baredecoder_readf64(upb_baredecoder *d) -{ - uint64_t val; - memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 8); - d->offset += 8; - return val; -} - -static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) -{ - uint32_t val; - memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 4); - d->offset += 4; - return val; -} - -static void upb_baredecoder_sethandlers(upb_src *src, upb_handlers *handlers) { - upb_baredecoder *d = (upb_baredecoder*)src; - upb_dispatcher_reset(&d->dispatcher, handlers, false); -} - -static void upb_baredecoder_run(upb_src *src, upb_status *status) { - upb_baredecoder *d = (upb_baredecoder*)src; - assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); - upb_string *str = NULL; - upb_strlen_t stack[UPB_MAX_NESTING] = {UPB_STRLEN_MAX}; - upb_strlen_t *top = &stack[0]; - d->offset = 0; - -#define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; - - CHECK(upb_dispatch_startmsg(&d->dispatcher)); - while(d->offset < upb_string_len(d->input)) { - uint32_t key = upb_baredecoder_readv64(d); - upb_fielddef f; - f.number = key >> 3; - upb_wire_type_t wt = key & 0x7; - if(wt == UPB_WIRE_TYPE_DELIMITED) { - uint32_t delim_len = upb_baredecoder_readv32(d); - // We don't know if it's a string or a submessage; deliver first as - // string. - upb_string_recycle(&str); - upb_string_substr(str, d->input, d->offset, delim_len); - upb_value v; - upb_value_setstr(&v, str); - upb_flow_t ret = upb_dispatch_value(&d->dispatcher, &f, v); - CHECK(ret); - if(ret == BEGIN_SUBMSG) { - // Should deliver as a submessage instead. - CHECK(upb_dispatch_startsubmsg(&d->dispatcher, &f)); - *(++top) = d->offset + delim_len; - } else { - d->offset += delim_len; - } - } else { - upb_value v; - switch(wt) { - case UPB_WIRE_TYPE_VARINT: - upb_value_setraw(&v, upb_baredecoder_readv64(d)); - break; - case UPB_WIRE_TYPE_64BIT: - upb_value_setraw(&v, upb_baredecoder_readf64(d)); - break; - case UPB_WIRE_TYPE_32BIT: - upb_value_setraw(&v, upb_baredecoder_readf32(d)); - break; - default: - assert(false); - abort(); - } - CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); - } - // Detect end-of-submessage. - while(d->offset >= *top) { - CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); - d->offset = *(top--); - } - } - CHECK(upb_dispatch_endmsg(&d->dispatcher)); - upb_string_unref(str); - return; - -err: - upb_copyerr(status, d->dispatcher.top->handlers.status); - upb_string_unref(str); -} - -static upb_baredecoder *upb_baredecoder_new(upb_string *str) { - static upb_src_vtbl vtbl = { - &upb_baredecoder_sethandlers, - &upb_baredecoder_run, - }; - upb_baredecoder *d = malloc(sizeof(*d)); - upb_src_init(&d->src, &vtbl); - d->input = upb_string_getref(str); - d->offset = 0; - upb_dispatcher_init(&d->dispatcher); - return d; -} - -static void upb_baredecoder_free(upb_baredecoder *d) { - upb_string_unref(d->input); - free(d); -} - -static upb_src *upb_baredecoder_src(upb_baredecoder *d) { - return &d->src; -} - -void upb_symtab_add_descriptorproto(upb_symtab *symtab) { - // For the moment we silently decline to perform the operation if the symbols - // already exist in the symtab. Revisit this when we have a better story - // about whether syms in a table can be replaced. - if(symtab->fds_msgdef) upb_def_unref(UPB_UPCAST(symtab->fds_msgdef)); - - upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); - upb_status status = UPB_STATUS_INIT; - upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); - upb_baredecoder_free(decoder); - - if(!upb_ok(&status)) { - // upb itself is corrupt. - upb_printerr(&status); - upb_clearerr(&status); - upb_symtab_unref(symtab); - abort(); - } - upb_def *def = upb_symtab_lookup( - symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); - if (!def || (symtab->fds_msgdef = upb_dyncast_msgdef(def)) == NULL) { - // upb itself is corrupt. - abort(); - } - upb_def_unref(def); // The symtab already holds a ref on it. - upb_status_uninit(&status); -} - -upb_msgdef *upb_symtab_fds_def(upb_symtab *s) { - assert(s->fds_msgdef != NULL); - upb_def_ref(UPB_UPCAST(s->fds_msgdef)); - return s->fds_msgdef; -} diff --git a/core/upb_def.h b/core/upb_def.h deleted file mode 100644 index 28cc258..0000000 --- a/core/upb_def.h +++ /dev/null @@ -1,362 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009-2011 Joshua Haberman. See LICENSE for details. - * - * Provides a mechanism for loading proto definitions from descriptors, and - * data structures to represent those definitions. These form the protobuf - * schema, and are used extensively throughout upb: - * - upb_msgdef: describes a "message" construct. - * - upb_fielddef: describes a message field. - * - upb_enumdef: describes an enum. - * (TODO: definitions of extensions and services). - * - * Defs are obtained from a upb_symtab object. A upb_symtab is empty when - * constructed, and definitions can be added by supplying descriptors. - * - * Defs are immutable and reference-counted. Symbol tables reference any defs - * that are the "current" definitions. If an extension is loaded that adds a - * field to an existing message, a new msgdef is constructed that includes the - * new field and the old msgdef is unref'd. The old msgdef will still be ref'd - * by messages (if any) that were constructed with that msgdef. - * - * This file contains routines for creating and manipulating the definitions - * themselves. To create and manipulate actual messages, see upb_msg.h. - */ - -#ifndef UPB_DEF_H_ -#define UPB_DEF_H_ - -#include "upb_atomic.h" -#include "upb_stream.h" -#include "upb_table.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_def: base class for defs **********************************************/ - -// All the different kind of defs we support. These correspond 1:1 with -// declarations in a .proto file. -typedef enum { - UPB_DEF_MSG = 0, - UPB_DEF_ENUM, - UPB_DEF_SVC, - UPB_DEF_EXT, - // Internal-only, placeholder for a def that hasn't be resolved yet. - UPB_DEF_UNRESOLVED, - - // For specifying that defs of any type are requsted from getdefs. - UPB_DEF_ANY = -1 -} upb_deftype; - -// This typedef is more space-efficient than declaring an enum var directly. -typedef int8_t upb_deftype_t; - -typedef struct { - upb_string *fqname; // Fully qualified. - upb_atomic_refcount_t refcount; - upb_deftype_t type; - - // The is_cyclic flag could go in upb_msgdef instead of here, because only - // messages can be involved in cycles. However, putting them here is free - // from a space perspective because structure alignment will otherwise leave - // three bytes empty after type. It is also makes ref and unref more - // efficient, because we don't have to downcast to msgdef before checking the - // is_cyclic flag. - bool is_cyclic; - uint16_t search_depth; // Used during initialization dfs. -} upb_def; - -// These must not be called directly! -void _upb_def_cyclic_ref(upb_def *def); -void _upb_def_reftozero(upb_def *def); - -// Call to ref/deref a def. -INLINE void upb_def_ref(upb_def *def) { - if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); -} -INLINE void upb_def_unref(upb_def *def) { - if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); -} - -/* upb_fielddef ***************************************************************/ - -// A upb_fielddef describes a single field in a message. It isn't a full def -// in the sense that it derives from upb_def. It cannot stand on its own; it -// is either a field of a upb_msgdef or contained inside a upb_extensiondef. -// It is also reference-counted. -typedef struct _upb_fielddef { - upb_value default_value; - - upb_string *name; - - struct _upb_msgdef *msgdef; - - // For the case of an enum or a submessage, points to the def for that type. - upb_def *def; - - upb_atomic_refcount_t refcount; - uint32_t byte_offset; // Where in a upb_msg to find the data. - - // These are set only when this fielddef is part of a msgdef. - upb_field_number_t number; - upb_field_count_t field_index; // Indicates set bit. - - upb_fieldtype_t type; - upb_label_t label; - // True if we own a ref on "def" (above). This is true unless this edge is - // part of a cycle. - bool owned; - uint8_t set_bit_mask; - uint16_t set_bit_offset; -} upb_fielddef; - -// A variety of tests about the type of a field. -INLINE bool upb_issubmsg(upb_fielddef *f) { - return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE); -} -INLINE bool upb_isstring(upb_fielddef *f) { - return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES); -} -INLINE bool upb_isarray(upb_fielddef *f) { - return f->label == UPB_LABEL(REPEATED); -} -// Does the type of this field imply that it should contain an associated def? -INLINE bool upb_hasdef(upb_fielddef *f) { - return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); -} - -INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) { - if (upb_isarray(f)) { - return UPB_VALUETYPE_ARRAY; - } else { - return f->type; - } -} - -INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) { - assert(upb_isarray(f)); - return f->type; -} - -INLINE bool upb_field_ismm(upb_fielddef *f) { - return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); -} - -INLINE bool upb_elem_ismm(upb_fielddef *f) { - return upb_isstring(f) || upb_issubmsg(f); -} - -/* upb_msgdef *****************************************************************/ - -// Structure that describes a single .proto message type. -typedef struct _upb_msgdef { - upb_def base; - upb_atomic_refcount_t cycle_refcount; - uint32_t size; - uint32_t set_flags_bytes; - - // Tables for looking up fields by number and name. - upb_inttable itof; // int to field - upb_strtable ntof; // name to field -} upb_msgdef; - -// Hash table entries for looking up fields by name or number. -typedef struct { - upb_inttable_entry e; - upb_fielddef *f; -} upb_itof_ent; -typedef struct { - upb_strtable_entry e; - upb_fielddef *f; -} upb_ntof_ent; - -// Looks up a field by name or number. While these are written to be as fast -// as possible, it will still be faster to cache the results of this lookup if -// possible. These return NULL if no such field is found. -INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) { - upb_itof_ent *e = - (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e)); - return e ? e->f : NULL; -} - -INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { - upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); - return e ? e->f : NULL; -} - -INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) { - return upb_strtable_count(&m->ntof); -} - -// Iteration over fields. The order is undefined. -// upb_msg_iter i; -// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { -// upb_fielddef *f = upb_msg_iter_field(i); -// // ... -// } -typedef upb_itof_ent *upb_msg_iter; - -upb_msg_iter upb_msg_begin(upb_msgdef *m); -upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter); -INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; } - -INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { - return iter->f; -} - -/* upb_enumdef ****************************************************************/ - -typedef struct _upb_enumdef { - upb_def base; - upb_strtable ntoi; - upb_inttable iton; -} upb_enumdef; - -typedef struct { - upb_strtable_entry e; - uint32_t value; -} upb_ntoi_ent; - -typedef struct { - upb_inttable_entry e; - upb_string *string; -} upb_iton_ent; - -typedef int32_t upb_enumval_t; - -// Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); -// Caller does not own a ref on the returned string. -upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); - -// Iteration over name/value pairs. The order is undefined. -// upb_enum_iter i; -// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { -// // ... -// } -typedef upb_iton_ent *upb_enum_iter; - -upb_enum_iter upb_enum_begin(upb_enumdef *e); -upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter); -INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; } - -INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { - return iter->string; -} -INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { - return iter->e.key; -} - - -/* upb_symtab *****************************************************************/ - -// A SymbolTable is where upb_defs live. It is empty when first constructed. -// Clients add definitions to the symtab by supplying unserialized or -// serialized descriptors (as defined in descriptor.proto). -struct _upb_symtab { - upb_atomic_refcount_t refcount; - upb_rwlock_t lock; // Protects all members except the refcount. - upb_strtable symtab; // The symbol table. - upb_msgdef *fds_msgdef; // Msgdef for google.protobuf.FileDescriptorSet. -}; -typedef struct _upb_symtab upb_symtab; - -// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd -// when the caller is done with them. -upb_symtab *upb_symtab_new(void); -void _upb_symtab_free(upb_symtab *s); // Must not be called directly! - -INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); } -INLINE void upb_symtab_unref(upb_symtab *s) { - if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); -} - -// Resolves the given symbol using the rules described in descriptor.proto, -// namely: -// -// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like -// scoping rules are used to find the type (i.e. first the nested types -// within this message are searched, then within the parent, on up to the -// root namespace). -// -// If a def is found, the caller owns one ref on the returned def. Otherwise -// returns NULL. -upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); - -// Find an entry in the symbol table with this exact name. If a def is found, -// the caller owns one ref on the returned def. Otherwise returns NULL. -upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); - -// Gets an array of pointers to all currently active defs in this symtab. The -// caller owns the returned array (which is of length *count) as well as a ref -// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are -// returned, otherwise only defs of the required type are returned. -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type); - -// "fds" is a upb_src that will yield data from the -// google.protobuf.FileDescriptorSet message type. It is not necessary that -// the upb_def for FileDescriptorSet came from this symtab, but it must be -// compatible with the official descriptor.proto, as published by Google. -// -// upb_symtab_addfds() adds all the definitions from the given -// FileDescriptorSet and adds them to the symtab. status indicates whether the -// operation was successful or not, and the error message (if any). -// -// TODO: should this allow redefinition? Either is possible, but which is -// more useful? Maybe it should be an option. -void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); - -// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab. -// This is necessary for bootstrapping, since these are the upb_defs that -// specify other defs and allow them to be loaded. -void upb_symtab_add_descriptorproto(upb_symtab *s); - -// Returns the upb_msgdef for google.protobuf.FileDescriptorSet, which the -// caller owns a ref on. This is a convenience method that is equivalent to -// looking up the symbol called "google.protobuf.FileDescriptorSet" yourself, -// except that it only will return a def that was added by -// upb_symtab_add_descriptorproto(). -upb_msgdef *upb_symtab_fds_def(upb_symtab *s); - - -/* upb_def casts **************************************************************/ - -// Dynamic casts, for determining if a def is of a particular type at runtime. -#define UPB_DYNAMIC_CAST_DEF(lower, upper) \ - struct _upb_ ## lower; /* Forward-declare. */ \ - INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \ - if(def->type != UPB_DEF_ ## upper) return NULL; \ - return (struct _upb_ ## lower*)def; \ - } -UPB_DYNAMIC_CAST_DEF(msgdef, MSG); -UPB_DYNAMIC_CAST_DEF(enumdef, ENUM); -UPB_DYNAMIC_CAST_DEF(svcdef, SVC); -UPB_DYNAMIC_CAST_DEF(extdef, EXT); -UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED); -#undef UPB_DYNAMIC_CAST_DEF - -// Downcasts, for when some wants to assert that a def is of a particular type. -// These are only checked if we are building debug. -#define UPB_DOWNCAST_DEF(lower, upper) \ - struct _upb_ ## lower; /* Forward-declare. */ \ - INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \ - assert(def->type == UPB_DEF_ ## upper); \ - return (struct _upb_ ## lower*)def; \ - } -UPB_DOWNCAST_DEF(msgdef, MSG); -UPB_DOWNCAST_DEF(enumdef, ENUM); -UPB_DOWNCAST_DEF(svcdef, SVC); -UPB_DOWNCAST_DEF(extdef, EXT); -UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); -#undef UPB_DOWNCAST_DEF - -#define UPB_UPCAST(ptr) (&(ptr)->base) - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DEF_H_ */ diff --git a/core/upb_glue.c b/core/upb_glue.c deleted file mode 100644 index 541827e..0000000 --- a/core/upb_glue.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_glue.h" -#include "upb_msg.h" -#include "upb_decoder.h" -#include "upb_strstream.h" - -void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md, - upb_status *status) { - upb_stringsrc strsrc; - upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str); - - upb_decoder d; - upb_decoder_init(&d, md); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc)); - upb_src *src = upb_decoder_src(&d); - - upb_msgpopulator p; - upb_msgpopulator_init(&p); - upb_msgpopulator_reset(&p, msg, md); - - upb_handlers h; - upb_handlers_init(&h); - upb_msgpopulator_register_handlers(&p, &h); - upb_src_sethandlers(src, &h); - - upb_src_run(src, status); - - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); - upb_msgpopulator_uninit(&p); - upb_handlers_uninit(&h); -} - -void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) { - upb_stringsrc strsrc; - upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str); - - upb_decoder d; - upb_msgdef *fds_msgdef = upb_symtab_fds_def(symtab); - upb_decoder_init(&d, fds_msgdef); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc)); - - upb_symtab_addfds(symtab, upb_decoder_src(&d), status); - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); - upb_def_unref(UPB_UPCAST(fds_msgdef)); -} diff --git a/core/upb_glue.h b/core/upb_glue.h deleted file mode 100644 index ca32436..0000000 --- a/core/upb_glue.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * upb's core components like upb_decoder and upb_msg are carefully designed to - * avoid depending on each other for maximum orthogonality. In other words, - * you can use a upb_decoder to decode into *any* kind of structure; upb_msg is - * just one such structure. You can use upb_decoder without having to link in - * upb_msg. - * - * However, for convenience we provide functions here for doing common - * operations like deserializing protobuf binary format into a upb_msg. The - * compromise is that this file drags in almost all of upb as a dependency, - * which could be undesirable if you're trying to use a trimmed-down build of - * upb. - * - * Copyright (c) 2011 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_GLUE_H -#define UPB_GLUE_H - -#ifdef __cplusplus -extern "C" { -#endif - -// Forward-declares so we don't have to include everything in this .h file. -// Clients should use the regular, typedef'd names (eg. upb_string). -struct _upb_msg; -struct _upb_msgdef; -struct _upb_status; -struct _upb_string; -struct _upb_symtab; - -// Decodes the given string, which must be in protobuf binary format, to the -// given upb_msg with msgdef "md", storing the status of the operation in "s". -void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg, - struct _upb_msgdef *md, struct _upb_status *s); - -void upb_parsedesc(struct _upb_symtab *symtab, struct _upb_string *str, - struct _upb_status *status); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/core/upb_msg.c b/core/upb_msg.c deleted file mode 100644 index 9dfbea4..0000000 --- a/core/upb_msg.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - * Data structure for storing a message of protobuf data. - */ - -#include "upb_msg.h" -#include "upb_decoder.h" -#include "upb_strstream.h" - -static uint32_t upb_round_up_pow2(uint32_t v) { - // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -static void upb_elem_free(upb_value v, upb_fielddef *f) { - switch(f->type) { - case UPB_TYPE(MESSAGE): - case UPB_TYPE(GROUP): - _upb_msg_free(upb_value_getmsg(v), upb_downcast_msgdef(f->def)); - break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - _upb_string_free(upb_value_getstr(v)); - break; - default: - abort(); - } -} - -static void upb_elem_unref(upb_value v, upb_fielddef *f) { - assert(upb_elem_ismm(f)); - upb_atomic_refcount_t *refcount = upb_value_getrefcount(v); - if (refcount && upb_atomic_unref(refcount)) - upb_elem_free(v, f); -} - -static void upb_field_free(upb_value v, upb_fielddef *f) { - if (upb_isarray(f)) { - _upb_array_free(upb_value_getarr(v), f); - } else { - upb_elem_free(v, f); - } -} - -static void upb_field_unref(upb_value v, upb_fielddef *f) { - assert(upb_field_ismm(f)); - upb_atomic_refcount_t *refcount = upb_value_getrefcount(v); - if (refcount && upb_atomic_unref(refcount)) - upb_field_free(v, f); -} - - -/* upb_array ******************************************************************/ - -upb_array *upb_array_new(void) { - upb_array *arr = malloc(sizeof(*arr)); - upb_atomic_refcount_init(&arr->refcount, 1); - arr->size = 0; - arr->len = 0; - arr->ptr = NULL; - return arr; -} - -void upb_array_recycle(upb_array **_arr, upb_fielddef *f) { - upb_array *arr = *_arr; - if(arr && upb_atomic_only(&arr->refcount)) { - arr->len = 0; - } else { - upb_array_unref(arr, f); - *_arr = upb_array_new(); - } -} - -void _upb_array_free(upb_array *arr, upb_fielddef *f) { - if (upb_elem_ismm(f)) { - // Need to release refs on sub-objects. - upb_valuetype_t type = upb_elem_valuetype(f); - for (upb_arraylen_t i = 0; i < arr->size; i++) { - upb_valueptr p = _upb_array_getptr(arr, f, i); - upb_elem_unref(upb_value_read(p, type), f); - } - } - free(arr->ptr); - free(arr); -} - -void upb_array_resize(upb_array *arr, upb_fielddef *f, upb_arraylen_t len) { - size_t type_size = upb_types[f->type].size; - upb_arraylen_t old_size = arr->size; - if (old_size < len) { - // Need to resize. - size_t new_size = upb_round_up_pow2(len); - arr->ptr = realloc(arr->ptr, new_size * type_size); - arr->size = new_size; - memset(arr->ptr + (old_size * type_size), 0, - (new_size - old_size) * type_size); - } - arr->len = len; -} - - -/* upb_msg ********************************************************************/ - -upb_msg *upb_msg_new(upb_msgdef *md) { - upb_msg *msg = malloc(md->size); - // Clear all set bits and cached pointers. - memset(msg, 0, md->size); - upb_atomic_refcount_init(&msg->refcount, 1); - return msg; -} - -void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { - // Need to release refs on all sub-objects. - upb_msg_iter i; - for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - upb_valueptr p = _upb_msg_getptr(msg, f); - upb_valuetype_t type = upb_field_valuetype(f); - if (upb_field_ismm(f)) upb_field_unref(upb_value_read(p, type), f); - } - free(msg); -} - -void upb_msg_recycle(upb_msg **_msg, upb_msgdef *msgdef) { - upb_msg *msg = *_msg; - if(msg && upb_atomic_only(&msg->refcount)) { - upb_msg_clear(msg, msgdef); - } else { - upb_msg_unref(msg, msgdef); - *_msg = upb_msg_new(msgdef); - } -} - -INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { - msg->data[f->set_bit_offset] |= f->set_bit_mask; -} - -static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) { - upb_valueptr p = _upb_msg_getptr(msg, f); - if (upb_isarray(f)) { - // Create/recycle/resize the array if necessary, and find a pointer to - // a newly-appended element. - if (!upb_msg_has(msg, f)) { - upb_array_recycle(p.arr, f); - upb_msg_sethas(msg, f); - } - assert(*p.arr != NULL); - upb_arraylen_t oldlen = upb_array_len(*p.arr); - upb_array_resize(*p.arr, f, oldlen + 1); - p = _upb_array_getptr(*p.arr, f, oldlen); - } - return p; -} - -static void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val) { - upb_valueptr p = upb_msg_getappendptr(msg, f); - if (upb_isstring(f)) { - // We do: - // - upb_string_recycle(), upb_string_substr() instead of - // - upb_string_unref(), upb_string_getref() - // because we can conveniently cache these upb_string objects in the - // upb_msg, whereas the upb_src who is sending us these strings may not - // have a good way of caching them. This saves the upb_src from allocating - // new upb_strings all the time to give us. - // - // If you were using this to copy one upb_msg to another this would - // allocate string objects whereas a upb_string_getref could have avoided - // those allocations completely; if this is an issue, we could make it an - // option of the upb_msgpopulator which behavior is desired. - upb_string *src = upb_value_getstr(val); - upb_string_recycle(p.str); - upb_string_substr(*p.str, src, 0, upb_string_len(src)); - } else { - upb_value_write(p, val, f->type); - } - upb_msg_sethas(msg, f); -} - -upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) { - upb_valueptr p = upb_msg_getappendptr(msg, f); - if (upb_isarray(f) || !upb_msg_has(msg, f)) { - upb_msg_recycle(p.msg, msgdef); - upb_msg_sethas(msg, f); - } - return *p.msg; -} - - -/* upb_msgpopulator ***********************************************************/ - -void upb_msgpopulator_init(upb_msgpopulator *p) { - upb_status_init(&p->status); -} - -void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md) { - p->top = p->stack; - p->limit = p->stack + sizeof(p->stack); - p->top->msg = m; - p->top->msgdef = md; -} - -void upb_msgpopulator_uninit(upb_msgpopulator *p) { - upb_status_uninit(&p->status); -} - -static upb_flow_t upb_msgpopulator_value(void *_p, upb_fielddef *f, upb_value val) { - upb_msgpopulator *p = _p; - upb_msg_appendval(p->top->msg, f, val); - return UPB_CONTINUE; -} - -static upb_flow_t upb_msgpopulator_startsubmsg(void *_p, upb_fielddef *f, - upb_handlers *delegate_to) { - upb_msgpopulator *p = _p; - (void)delegate_to; - upb_msg *oldmsg = p->top->msg; - if (++p->top == p->limit) { - upb_seterr(&p->status, UPB_ERROR, "Exceeded maximum nesting"); - return UPB_BREAK; - } - upb_msgdef *msgdef = upb_downcast_msgdef(f->def); - p->top->msgdef = msgdef; - p->top->msg = upb_msg_appendmsg(oldmsg, f, msgdef); - return UPB_CONTINUE; -} - -static upb_flow_t upb_msgpopulator_endsubmsg(void *_p) { - upb_msgpopulator *p = _p; - --p->top; - return UPB_CONTINUE; -} - -void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h) { - static upb_handlerset handlerset = { - NULL, // startmsg - NULL, // endmsg - &upb_msgpopulator_value, - &upb_msgpopulator_startsubmsg, - &upb_msgpopulator_endsubmsg, - }; - upb_register_handlerset(h, &handlerset); - upb_set_handler_closure(h, p, &p->status); -} diff --git a/core/upb_msg.h b/core/upb_msg.h deleted file mode 100644 index 8a3c63f..0000000 --- a/core/upb_msg.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. - * - * Data structure for storing a message of protobuf data. Unlike Google's - * protobuf, upb_msg and upb_array are reference counted instead of having - * exclusive ownership of their fields. This is a better match for dynamic - * languages where statements like a.b = other_b are normal. - * - * upb's parsers and serializers could also be used to populate and serialize - * other kinds of message objects (even one generated by Google's protobuf). - */ - -#ifndef UPB_MSG_H -#define UPB_MSG_H - -#include "upb.h" -#include "upb_def.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// A pointer to a .proto value. The owner must have an out-of-band way of -// knowing the type, so it knows which union member to use. -typedef union { - double *_double; - float *_float; - int32_t *int32; - int64_t *int64; - uint8_t *uint8; - uint32_t *uint32; - uint64_t *uint64; - bool *_bool; - upb_string **str; - upb_msg **msg; - upb_array **arr; - void *_void; -} upb_valueptr; - -INLINE upb_valueptr upb_value_addrof(upb_value *val) { - upb_valueptr ptr = {&val->val._double}; - return ptr; -} - -// Reads or writes a upb_value from an address represented by a upb_value_ptr. -// We need to know the value type to perform this operation, because we need to -// know how much memory to copy (and for big-endian machines, we need to know -// where in the upb_value the data goes). -// -// For little endian-machines where we didn't mind overreading, we could make -// upb_value_read simply use memcpy(). -INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { - upb_value val; - -#ifdef NDEBUG -#define CASE(t, member_name) \ - case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break; -#else -#define CASE(t, member_name) \ - case UPB_TYPE(t): val.val.member_name = *ptr.member_name; val.type = upb_types[ft].inmemory_type; break; -#endif - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - case UPB_VALUETYPE_ARRAY: - val.val.arr = *ptr.arr; -#ifndef NDEBUG - val.type = UPB_VALUETYPE_ARRAY; -#endif - break; - default: assert(false); - } - return val; - -#undef CASE -} - -INLINE void upb_value_write(upb_valueptr ptr, upb_value val, - upb_fieldtype_t ft) { - if (ft == UPB_VALUETYPE_ARRAY) { - assert(val.type == UPB_VALUETYPE_ARRAY); - } else { - assert(val.type == upb_types[ft].inmemory_type); - } -#define CASE(t, member_name) \ - case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break; - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - case UPB_VALUETYPE_ARRAY: - *ptr.arr = val.val.arr; - break; - default: assert(false); - } - -#undef CASE -} - -/* upb_array ******************************************************************/ - -typedef uint32_t upb_arraylen_t; -struct _upb_array { - upb_atomic_refcount_t refcount; - // "len" and "size" are measured in elements, not bytes. - upb_arraylen_t len; - upb_arraylen_t size; - char *ptr; -}; - -void _upb_array_free(upb_array *a, upb_fielddef *f); -INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, - uint32_t elem) { - upb_valueptr p; - p._void = &a->ptr[elem * upb_types[f->type].size]; - return p; -} - -upb_array *upb_array_new(void); - -INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { - if (a && upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); -} - -void upb_array_recycle(upb_array **arr, upb_fielddef *f); - -INLINE uint32_t upb_array_len(upb_array *a) { - return a->len; -} - -INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f, - upb_arraylen_t i) { - assert(i < upb_array_len(arr)); - return upb_value_read(_upb_array_getptr(arr, f, i), f->type); -} - -/* upb_msg ********************************************************************/ - -struct _upb_msg { - upb_atomic_refcount_t refcount; - uint8_t data[4]; // We allocate the appropriate amount per message. -}; - -void _upb_msg_free(upb_msg *msg, upb_msgdef *md); - -INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { - upb_valueptr p; - p._void = &msg->data[f->byte_offset]; - return p; -} - -// Creates a new msg of the given type. -upb_msg *upb_msg_new(upb_msgdef *md); - -// Unrefs the given message. -INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { - if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); -} - -void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef); - -// Tests whether the given field is explicitly set, or whether it will return a -// default. -INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { - return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0; -} - -INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f)); -} - -// Unsets all field values back to their defaults. -INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { - memset(msg->data, 0, md->set_flags_bytes); -} - -typedef struct { - upb_msg *msg; - upb_msgdef *msgdef; -} upb_msgpopulator_frame; - -typedef struct { - upb_msgpopulator_frame stack[UPB_MAX_NESTING], *top, *limit; - upb_status status; -} upb_msgpopulator; - -void upb_msgpopulator_init(upb_msgpopulator *p); -void upb_msgpopulator_uninit(upb_msgpopulator *p); -void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md); -void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/core/upb_stream.h b/core/upb_stream.h deleted file mode 100644 index 3f7c843..0000000 --- a/core/upb_stream.h +++ /dev/null @@ -1,276 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file defines four general-purpose streaming data interfaces. - * - * - upb_handlers: represents a set of callbacks, very much like in XML's SAX - * API, that a client can register to do a streaming tree traversal over a - * stream of structured protobuf data, without knowing where that data is - * coming from. There is only one upb_handlers type (it is not a virtual - * base class), but the object lets you register any set of handlers. - * - * The upb_handlers interface supports delegation: when entering a submessage, - * you can delegate to another set of upb_handlers instead of handling the - * submessage yourself. This allows upb_handlers objects to *compose* -- you - * can implement a set of upb_handlers without knowing or caring whether this - * is the top-level message or not. - * - * The other interfaces are the C equivalent of "virtual base classes" that - * anyone can implement: - * - * - upb_src: an interface that represents a source of streaming protobuf data. - * It lets you register a set of upb_handlers, and then call upb_src_run(), - * which pulls the protobuf data from somewhere and then calls the handlers. - * - * - upb_bytesrc: a pull interface for streams of bytes, basically an - * abstraction of read()/fread(), but it avoids copies where possible. - * - * - upb_bytesink: push interface for streams of bytes, basically an - * abstraction of write()/fwrite(), but it avoids copies where possible. - * - * All of the encoders and decoders are based on these generic interfaces, - * which lets you write streaming algorithms that do not depend on a specific - * serialization format; for example, you can write a pretty printer that works - * with input that came from protobuf binary format, protobuf text format, or - * even an in-memory upb_msg -- the pretty printer will not know the - * difference. - * - * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. - * - */ - -#ifndef UPB_STREAM_H -#define UPB_STREAM_H - -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Forward-declare. We can't include upb_def.h; it would be circular. -struct _upb_fielddef; - -/* upb_handlers ***************************************************************/ - -// upb_handlers define the interface by which a upb_src passes data to a -// upb_sink. - -// Constants that a handler returns to indicate to its caller whether it should -// continue or not. -typedef enum { - // Caller should continue sending values to the sink. - UPB_CONTINUE, - - // Stop processing for now; check status for details. If no status was set, - // a generic error will be returned. If the error is resumable, it is not - // (yet) defined where processing will resume -- waiting for real-world - // examples of resumable decoders and resume-requiring clients. upb_src - // implementations that are not capable of resuming will override the return - // status to be non-resumable if a resumable status was set by the handlers. - UPB_BREAK, - - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). - UPB_SKIPSUBMSG, - - // When returned from a startsubmsg handler, indicates that the submessage - // should be handled by a different set of handlers, which have been - // registered on the provided upb_handlers object. This allows upb_handlers - // objects to compose; a set of upb_handlers need not know whether it is the - // top-level message or a sub-message. May not be returned from any other - // callback. - UPB_DELEGATE, -} upb_flow_t; - -// upb_handlers -struct _upb_handlers; -typedef struct _upb_handlers upb_handlers; - -typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure); -typedef upb_flow_t (*upb_endmsg_handler_t)(void *closure); -typedef upb_flow_t (*upb_value_handler_t)(void *closure, - struct _upb_fielddef *f, - upb_value val); -typedef upb_flow_t (*upb_startsubmsg_handler_t)(void *closure, - struct _upb_fielddef *f, - upb_handlers *delegate_to); -typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure); -typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, - upb_field_number_t fieldnum, - upb_value val); - -// An empty set of handlers, for convenient copy/paste: -// -// static upb_flow_t startmsg(void *closure) { -// // Called when the top-level message begins. -// return UPB_CONTINUE; -// } -// -// static upb_flow_t endmsg(void *closure) { -// // Called when the top-level message ends. -// return UPB_CONTINUE; -// } -// -// static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) { -// // Called for every value in the stream. -// return UPB_CONTINUE; -// } -// -// static upb_flow_t startsubmsg(void *closure, upb_fielddef *f, -// upb_handlers *delegate_to) { -// // Called when a submessage begins; can delegate by returning UPB_DELEGATE. -// return UPB_CONTINUE; -// } -// -// static upb_flow_t endsubmsg(void *closure) { -// // Called when a submessage ends. -// return UPB_CONTINUE; -// } -// -// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, -// upb_value val) { -// // Called with an unknown value is encountered. -// return UPB_CONTINUE; -// } -// -// // Any handlers you don't need can be set to NULL. -// static upb_handlerset handlers = { -// startmsg, -// endmsg, -// value, -// startsubmsg, -// endsubmsg, -// unknownval, -// }; -typedef struct { - upb_startmsg_handler_t startmsg; - upb_endmsg_handler_t endmsg; - upb_value_handler_t value; - upb_startsubmsg_handler_t startsubmsg; - upb_endsubmsg_handler_t endsubmsg; - upb_unknownval_handler_t unknownval; -} upb_handlerset; - -// Functions to register handlers on a upb_handlers object. -INLINE void upb_handlers_init(upb_handlers *h); -INLINE void upb_handlers_uninit(upb_handlers *h); -INLINE void upb_handlers_reset(upb_handlers *h); -INLINE bool upb_handlers_isempty(upb_handlers *h); -INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); - -// TODO: for clients that want to increase efficiency by preventing bytesrcs -// from automatically being converted to strings in the value callback. -// INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); - -// The closure will be passed to every handler. The status will be read by the -// upb_src immediately after a handler has returned UPB_BREAK and used as the -// overall upb_src status; it will not be referenced at any other time. -INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, - upb_status *status); - - -/* upb_src ********************************************************************/ - -struct _upb_src; -typedef struct _upb_src upb_src; - -// upb_src_sethandlers() must be called once and only once before upb_src_run() -// is called. This sets up the callbacks that will handle the parse. A -// upb_src that is fully initialized except for the call to -// upb_src_sethandlers() is called "prepared" -- this is useful for library -// functions that want to consume the output of a generic upb_src. -// Calling sethandlers() multiple times is an error and will trigger an abort(). -INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); - -// Runs the src, calling the callbacks that were registered with -// upb_src_sethandlers(), and returning the status of the operation in -// "status." The status might indicate UPB_TRYAGAIN (indicating EAGAIN on a -// non-blocking socket) or a resumable error; in both cases upb_src_run can be -// called again later. TRYAGAIN could come from either the src (input buffers -// are empty) or the handlers (output buffers are full). -INLINE void upb_src_run(upb_src *src, upb_status *status); - - -// A convenience object that a upb_src can use to invoke handlers. It -// transparently handles delegation so that the upb_src needs only follow the -// protocol as if delegation did not exist. -struct _upb_dispatcher; -typedef struct _upb_dispatcher upb_dispatcher; -INLINE void upb_dispatcher_init(upb_dispatcher *d); -INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h, - bool supports_skip); -INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, - struct _upb_fielddef *f); -INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, - upb_value val); -INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, - upb_field_number_t fieldnum, - upb_value val); - -/* upb_bytesrc ****************************************************************/ - -// Reads up to "count" bytes into "buf", returning the total number of bytes -// read. If 0, indicates error and puts details in "status". -INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count, upb_status *status); - -// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure -// that "str" is created or just recycled. Returns "false" if no data was -// returned, either due to error or EOF (check status for details). -// -// In comparison to upb_bytesrc_read(), this call can possibly alias existing -// string data (which avoids a copy). On the other hand, if the data was *not* -// already in an existing string, this copies it into a upb_string, and if the -// data needs to be put in a specific range of memory (because eg. you need to -// put it into a different kind of string object) then upb_bytesrc_get() could -// save you a copy. -INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_status *status); - -// A convenience function for getting all the remaining data in a upb_bytesrc -// as a upb_string. Returns false and sets "status" if the operation fails. -INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, - upb_status *status); -INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, - upb_status *status) { - return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); -} - - -/* upb_bytesink ***************************************************************/ - -struct _upb_bytesink; -typedef struct _upb_bytesink upb_bytesink; - -// TODO: Figure out how buffering should be handled. Should the caller buffer -// data and only call these functions when a buffer is full? Seems most -// efficient, but then buffering has to be configured in the caller, which -// could be anything, which makes it hard to have a standard interface for -// controlling buffering. -// -// The downside of having the bytesink buffer is efficiency: the caller is -// making more (virtual) function calls, and the caller can't arrange to have -// a big contiguous buffer. The bytesink can do this, but will have to copy -// to make the data contiguous. - -// Returns the number of bytes written. -INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, - const char *fmt, ...); - -// Puts the given string, returning true if the operation was successful, otherwise -// check "status" for details. Ownership of the string is *not* passed; if -// the callee wants a reference he must call upb_string_getref() on it. -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, - upb_status *status); - -#include "upb_stream_vtbl.h" - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h deleted file mode 100644 index e1f9cb8..0000000 --- a/core/upb_stream_vtbl.h +++ /dev/null @@ -1,295 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * vtable declarations for types that are implementing any of the src or sink - * interfaces. Only components that are implementing these interfaces need - * to worry about this file. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_SRCSINK_VTBL_H_ -#define UPB_SRCSINK_VTBL_H_ - -#include -#include "upb_stream.h" -#include "upb_string.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Typedefs for function pointers to all of the virtual functions. - -// upb_src -typedef void (*upb_src_sethandlers_fptr)(upb_src *src, upb_handlers *handlers); -typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); - -// upb_bytesrc. -typedef upb_strlen_t (*upb_bytesrc_read_fptr)( - upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -typedef bool (*upb_bytesrc_getstr_fptr)( - upb_bytesrc *src, upb_string *str, upb_status *status); - -// upb_bytesink. -typedef upb_strlen_t (*upb_bytesink_write_fptr)( - upb_bytesink *bytesink, void *buf, upb_strlen_t count); -typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( - upb_bytesink *bytesink, upb_string *str, upb_status *status); -typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( - upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); - -// Vtables for the above interfaces. -typedef struct { - upb_bytesrc_read_fptr read; - upb_bytesrc_getstr_fptr getstr; -} upb_bytesrc_vtbl; - -typedef struct { - upb_bytesink_write_fptr write; - upb_bytesink_putstr_fptr putstr; - upb_bytesink_vprintf_fptr vprintf; -} upb_bytesink_vtbl; - -typedef struct { - upb_src_sethandlers_fptr sethandlers; - upb_src_run_fptr run; -} upb_src_vtbl; - - -// "Base Class" definitions; components that implement these interfaces should -// contain one of these structures. - -struct _upb_bytesrc { - upb_bytesrc_vtbl *vtbl; -}; - -struct _upb_bytesink { - upb_bytesink_vtbl *vtbl; -}; - -struct _upb_src { - upb_src_vtbl *vtbl; -}; - -INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { - s->vtbl = vtbl; -} - -INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { - s->vtbl = vtbl; -} - -INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) { - s->vtbl = vtbl; -} - -// Implementation of virtual function dispatch. - -// upb_src -INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers) { - src->vtbl->sethandlers(src, handlers); -} - -INLINE void upb_src_run(upb_src *src, upb_status *status) { - src->vtbl->run(src, status); -} - -// upb_bytesrc -INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count, upb_status *status) { - return src->vtbl->read(src, buf, count, status); -} - -INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_status *status) { - return src->vtbl->getstr(src, str, status); -} - -INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, - upb_status *status) { - // We start with a getstr, because that could possibly alias data instead of - // copying. - if (!upb_bytesrc_getstr(src, str, status)) return false; - // Trade-off between number of read calls and amount of overallocation. - const size_t bufsize = 4096; - do { - upb_strlen_t len = upb_string_len(str); - char *buf = upb_string_getrwbuf(str, len + bufsize); - upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize, status); - if (read < 0) return false; - // Resize to proper size. - upb_string_getrwbuf(str, len + read); - } while (!status->code != UPB_EOF); - return true; -} - - -// upb_bytesink -INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, - upb_strlen_t count) { - return sink->vtbl->write(sink, buf, count); -} - -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { - return sink->vtbl->putstr(sink, str, status); -} - -INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { - va_list args; - va_start(args, fmt); - upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); - va_end(args); - return ret; -} - -// upb_handlers -struct _upb_handlers { - upb_handlerset *set; - void *closure; - upb_status *status; // We don't own this. -}; - -INLINE void upb_handlers_init(upb_handlers *h) { - (void)h; -} -INLINE void upb_handlers_uninit(upb_handlers *h) { - (void)h; -} - -INLINE void upb_handlers_reset(upb_handlers *h) { - h->set = NULL; - h->closure = NULL; -} - -INLINE bool upb_handlers_isempty(upb_handlers *h) { - return !h->set && !h->closure; -} - -INLINE upb_flow_t upb_nop(void *closure) { - (void)closure; - return UPB_CONTINUE; -} - -INLINE upb_flow_t upb_value_nop(void *closure, struct _upb_fielddef *f, upb_value val) { - (void)closure; - (void)f; - (void)val; - return UPB_CONTINUE; -} - -INLINE upb_flow_t upb_startsubmsg_nop(void *closure, struct _upb_fielddef *f, - upb_handlers *delegate_to) { - (void)closure; - (void)f; - (void)delegate_to; - return UPB_CONTINUE; -} - -INLINE upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, - upb_value val) { - (void)closure; - (void)fieldnum; - (void)val; - return UPB_CONTINUE; -} - -INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { - if (!set->startmsg) set->startmsg = &upb_nop; - if (!set->endmsg) set->endmsg = &upb_nop; - if (!set->value) set->value = &upb_value_nop; - if (!set->startsubmsg) set->startsubmsg = &upb_startsubmsg_nop; - if (!set->endsubmsg) set->endsubmsg = &upb_nop; - if (!set->unknownval) set->unknownval = &upb_unknownval_nop; - h->set = set; -} - -INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, - upb_status *status) { - h->closure = closure; - h->status = status; -} - -// upb_dispatcher -typedef struct { - upb_handlers handlers; - int depth; -} upb_dispatcher_frame; - -struct _upb_dispatcher { - upb_dispatcher_frame stack[UPB_MAX_NESTING], *top, *limit; - bool supports_skip; -}; - -INLINE void upb_dispatcher_init(upb_dispatcher *d) { - d->limit = d->stack + sizeof(d->stack); -} - -INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h, - bool supports_skip) { - d->top = d->stack; - d->top->depth = 1; // Never want to trigger end-of-delegation. - d->top->handlers = *h; - d->supports_skip = supports_skip; -} - -INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { - assert(d->stack == d->top); - return d->top->handlers.set->startmsg(d->top->handlers.closure); -} - -INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d) { - assert(d->stack == d->top); - return d->top->handlers.set->endmsg(d->top->handlers.closure); -} - -// TODO: several edge cases to fix: -// - delegated start returns UPB_BREAK, should replay the start on resume. -// - endsubmsg returns UPB_BREAK, should NOT replay the delegated endmsg. -INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, - struct _upb_fielddef *f) { - upb_handlers handlers; - upb_handlers_init(&handlers); - upb_handlers_reset(&handlers); - upb_flow_t ret = d->top->handlers.set->startsubmsg(d->top->handlers.closure, f, &handlers); - assert((ret == UPB_DELEGATE) == !upb_handlers_isempty(&handlers)); - if (ret == UPB_DELEGATE) { - ++d->top; - d->top->handlers = handlers; - d->top->depth = 0; - ret = d->top->handlers.set->startmsg(d->top->handlers.closure); - } - if (ret == UPB_CONTINUE || !d->supports_skip) ++d->top->depth; - upb_handlers_uninit(&handlers); - return ret; -} - -INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { - upb_flow_t ret; - if (--d->top->depth == 0) { - ret = d->top->handlers.set->endmsg(d->top->handlers.closure); - if (ret != UPB_CONTINUE) return ret; - --d->top; - assert(d->top >= d->stack); - } - return d->top->handlers.set->endsubmsg(d->top->handlers.closure); -} - -INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, - struct _upb_fielddef *f, - upb_value val) { - return d->top->handlers.set->value(d->top->handlers.closure, f, val); -} - -INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, - upb_field_number_t fieldnum, - upb_value val) { - return d->top->handlers.set->unknownval(d->top->handlers.closure, - fieldnum, val); -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/core/upb_string.c b/core/upb_string.c deleted file mode 100644 index 30ed88f..0000000 --- a/core/upb_string.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_string.h" - -#include -#ifdef __GLIBC__ -#include -#elif defined(__APPLE__) -#include -#endif - -static uint32_t upb_round_up_pow2(uint32_t v) { - // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -upb_string *upb_string_new() { - upb_string *str = malloc(sizeof(*str)); - str->ptr = NULL; - str->cached_mem = NULL; - str->len = 0; -#ifndef UPB_HAVE_MSIZE - str->size = 0; -#endif - str->src = NULL; - upb_atomic_refcount_init(&str->refcount, 1); - return str; -} - -uint32_t upb_string_size(upb_string *str) { -#ifdef __GLIBC__ - return malloc_usable_size(str->cached_mem); -#elif defined(__APPLE__) - return malloc_size(str->cached_mem); -#else - return str->size; -#endif -} - -void _upb_string_free(upb_string *str) { - free(str->cached_mem); - _upb_string_release(str); - free(str); -} - -char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - // assert(str->ptr == NULL); - upb_strlen_t size = upb_string_size(str); - if (size < len) { - size = upb_round_up_pow2(len); - str->cached_mem = realloc(str->cached_mem, size); -#ifndef UPB_HAVE_MSIZE - str->size = size; -#endif - } - str->len = len; - str->ptr = str->cached_mem; - return str->cached_mem; -} - -void upb_string_substr(upb_string *str, upb_string *target_str, - upb_strlen_t start, upb_strlen_t len) { - if(str->ptr) *(char*)0 = 0; - assert(str->ptr == NULL); - str->src = upb_string_getref(target_str); - str->ptr = upb_string_getrobuf(target_str) + start; - str->len = len; -} - -void upb_string_vprintf(upb_string *str, const char *format, va_list args) { - // Try once without reallocating. We have to va_copy because we might have - // to call vsnprintf again. - uint32_t size = UPB_MAX(upb_string_size(str), 16); - char *buf = upb_string_getrwbuf(str, size); - va_list args_copy; - va_copy(args_copy, args); - uint32_t true_size = vsnprintf(buf, size, format, args_copy); - va_end(args_copy); - - if (true_size >= size) { - // Need to reallocate. We reallocate even if the sizes were equal, - // because snprintf excludes the terminating NULL from its count. - // We don't care about the terminating NULL, but snprintf might - // bail out of printing even other characters if it doesn't have - // enough space to write the NULL also. - upb_string_recycle(&str); - buf = upb_string_getrwbuf(str, true_size + 1); - vsnprintf(buf, true_size + 1, format, args); - } - str->len = true_size; -} - -upb_string *upb_string_asprintf(const char *format, ...) { - upb_string *str = upb_string_new(); - va_list args; - va_start(args, format); - upb_string_vprintf(str, format, args); - va_end(args); - return str; -} - -upb_string *upb_strdup(upb_string *s) { - upb_string *str = upb_string_new(); - upb_strcpy(str, s); - return str; -} - -void upb_strcat(upb_string *s, upb_string *append) { - uint32_t old_size = upb_string_len(s); - uint32_t append_size = upb_string_len(append); - uint32_t new_size = old_size + append_size; - char *buf = upb_string_getrwbuf(s, new_size); - memcpy(buf + old_size, upb_string_getrobuf(append), append_size); -} - -upb_string *upb_strreadfile(const char *filename) { - FILE *f = fopen(filename, "rb"); - if(!f) return NULL; - if(fseek(f, 0, SEEK_END) != 0) goto error; - long size = ftell(f); - if(size < 0) goto error; - if(fseek(f, 0, SEEK_SET) != 0) goto error; - upb_string *s = upb_string_new(); - char *buf = upb_string_getrwbuf(s, size); - if(fread(buf, size, 1, f) != 1) goto error; - fclose(f); - return s; - -error: - fclose(f); - return NULL; -} diff --git a/core/upb_string.h b/core/upb_string.h deleted file mode 100644 index 0694a23..0000000 --- a/core/upb_string.h +++ /dev/null @@ -1,360 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - * This file defines a simple string type which is length-delimited instead - * of NULL-terminated, and which has useful sharing semantics. - * - * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() - * wheverever possible, while keeping both CPU and memory overhead low. - * Throughout upb there are situations where one wants to reference all or part - * of another string without copying. upb_string provides APIs for doing this, - * and allows the referenced string to be kept alive for as long as anyone is - * referencing it. - * - * Characteristics of upb_string: - * - strings are reference-counted. - * - strings are immutable (can be mutated only when first created or recycled). - * - if a string has no other referents, it can be "recycled" into a new string - * without having to reallocate the upb_string. - * - strings can be substrings of other strings (owning a ref on the source - * string). - * - * Reference-counted strings have recently fallen out of favor because of the - * performance impacts of doing thread-safe reference counting with atomic - * operations. We side-step this issue by not performing atomic operations - * unless the string has been marked thread-safe. Time will tell whether this - * scheme is easy and convenient enough to be practical. - * - * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched - * idiom that we go with it instead of making our pointers uint8_t*. - * - * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE - * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if - * you are logically passing a reference to a upb_string to another thread - * (which implies that the other thread must eventually call unref of recycle), - * you have two options: - * - * - create a copy of the string that will be used in the other thread only. - * - call upb_string_get_synchronized_ref(), which will make getref, unref, and - * recycle thread-safe for this upb_string. - */ - -#ifndef UPB_STRING_H -#define UPB_STRING_H - -#include -#include -#include -#include "upb_atomic.h" -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// All members of this struct are private, and may only be read/written through -// the associated functions. -struct _upb_string { - // The string's refcount. - upb_atomic_refcount_t refcount; - - // The pointer to our currently active data. This may be memory we own - // or a pointer into memory we don't own. - const char *ptr; - - // If non-NULL, this is a block of memory we own. We keep this cached even - // if "ptr" is currently aliasing memory we don't own. - char *cached_mem; - - // The effective length of the string (the bytes at ptr). - int32_t len; -#ifndef UPB_HAVE_MSIZE - // How many bytes are allocated in cached_mem. - // - // Many platforms have a function that can tell you the size of a block - // that was previously malloc'd. In this case we can avoid storing the - // size explicitly. - uint32_t size; -#endif - - // Used if this is a slice of another string, NULL otherwise. We own a ref - // on src. - struct _upb_string *src; -}; - -// Internal-only initializer for upb_string instances. -#ifdef UPB_HAVE_MSIZE -#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, NULL} -#else -#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, 0, NULL} -#endif - -// Special pseudo-refcounts for static/stack-allocated strings, respectively. -#define _UPB_STRING_REFCOUNT_STATIC -1 -#define _UPB_STRING_REFCOUNT_STACK -2 - -// Returns a newly-created, empty, non-finalized string. When the string is no -// longer needed, it should be unref'd, never freed directly. -upb_string *upb_string_new(); - -// Internal-only; clients should call upb_string_unref(). -void _upb_string_free(upb_string *str); - -// Releases a ref on the given string, which may free the memory. "str" -// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE -// UNLESS THE STRING IS SYNCHRONIZED. -INLINE void upb_string_unref(upb_string *str) { - if (str && upb_atomic_read(&str->refcount) > 0 && - upb_atomic_unref(&str->refcount)) { - _upb_string_free(str); - } -} - -static void _upb_string_release(upb_string *str) { - if(str->src) { - upb_string_unref(str->src); - str->src = NULL; - } -} - -upb_string *upb_strdup(upb_string *s); // Forward-declare. - -// Returns a string with the same contents as "str". The caller owns a ref on -// the returned string, which may or may not be the same object as "str. -// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED! -INLINE upb_string *upb_string_getref(upb_string *str) { - int refcount = upb_atomic_read(&str->refcount); - if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); - // We don't ref the special <0 refcount for static strings. - if (refcount > 0) upb_atomic_ref(&str->refcount); - return str; -} - -// Returns the length of the string. -INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } - -// Use to read the bytes of the string. The caller *must* call -// upb_string_endread() after the data has been read. The window between -// upb_string_getrobuf() and upb_string_endread() should be kept as short as -// possible, because any pending upb_string_detach() may be blocked until -// upb_string_endread is called(). No other functions may be called on the -// string during this window except upb_string_len(). -INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } -INLINE void upb_string_endread(upb_string *str) { (void)str; } - -// Convenience method for getting the end of the string. Calls -// upb_string_getrobuf() so inherits the caveats of calling that function. -INLINE const char *upb_string_getbufend(upb_string *str) { - return upb_string_getrobuf(str) + upb_string_len(str); -} - -// Attempts to recycle the string "str" so it may be reused and have different -// data written to it. After the function returns, "str" points to a writable -// string, which is either the original string if it had no other references -// or a newly created string if it did have other references. -// -// As a special case, passing a pointer to NULL will allocate a new string. -// This is convenient for the pattern: -// -// upb_string *str = NULL; -// while (x) { -// if (y) { -// upb_string_recycle(&str); -// upb_src_getstr(str); -// } -// } -INLINE void upb_string_recycle(upb_string **_str) { - upb_string *str = *_str; - if(str && upb_atomic_only(&str->refcount)) { - str->ptr = NULL; - str->len = 0; - _upb_string_release(str); - } else { - upb_string_unref(str); - *_str = upb_string_new(); - } -} - - -// The options for setting the contents of a string. These may only be called -// when a string is first created or recycled; once other functions have been -// called on the string, these functions are not allowed until the string is -// recycled. - -// Gets a pointer suitable for writing to the string, which is guaranteed to -// have at least "len" bytes of data available. The size of the string will -// become "len". -char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); - -// Replaces the contents of str with the contents of the given printf. -void upb_string_vprintf(upb_string *str, const char *format, va_list args); -INLINE void upb_string_printf(upb_string *str, const char *format, ...) { - va_list args; - va_start(args, format); - upb_string_vprintf(str, format, args); - va_end(args); -} - -// Sets the contents of "str" to be the given substring of "target_str", to -// which the caller must own a ref. -void upb_string_substr(upb_string *str, upb_string *target_str, - upb_strlen_t start, upb_strlen_t len); - -// Sketch of an API for allowing upb_strings to reference external, unowned -// data. Waiting for a clear use case before actually implementing it. -// -// Makes the string "str" a reference to the given string data. The caller -// guarantees that the given string data will not change or be deleted until a -// matching call to upb_string_detach(), which may block until any concurrent -// readers have finished reading. upb_string_detach() preserves the contents -// of the string by copying the referenced data if there are any other -// referents. -// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -// void upb_string_detach(upb_string *str); - -// Allows using upb_strings in printf, ie: -// upb_strptr str = UPB_STRLIT("Hello, World!\n"); -// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) -#define UPB_STRFMT "%.*s" - -// Macros for constructing upb_string objects statically or on the stack. These -// can be used like: -// -// upb_string static_str = UPB_STATIC_STRING("Foo"); -// -// int main() { -// upb_string stack_str = UPB_STACK_STRING("Foo"); -// // Now: -// // upb_streql(&static_str, &stack_str) == true -// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true -// } -// -// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays, -// but you must not change the underlying data once you've passed the string on: -// -// void foo() { -// char data[] = "ABC123"; -// upb_string stack_str = UPB_STACK_STR(data); -// bar(&stack_str); -// data[0] = "B"; // NOT ALLOWED!! -// } -// -// TODO: should the stack business just be like attach/detach? The latter seems -// more flexible, though it does require a stack allocation. Maybe put this off -// until there is a clear use case. -#define UPB_STATIC_STRING(str) \ - _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) -#define UPB_STATIC_STRING_LEN(str, len) \ - _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) -#define UPB_STACK_STRING(str) \ - _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) -#define UPB_STACK_STRING_LEN(str, len) \ - _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) - -// A convenient way of specifying upb_strings as literals, like: -// -// upb_streql(UPB_STRLIT("expected"), other_str); -// -// However, this requires either C99 compound initializers or C++. -// Must ONLY be called with a string literal as its argument! -//#ifdef __cplusplus -//namespace upb { -//class String : public upb_string { -// // This constructor must ONLY be called with a string literal. -// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {} -//}; -//} -//#define UPB_STRLIT(str) upb::String(str) -//#endif -#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) - -/* upb_string library functions ***********************************************/ - -// Named like their counterparts, these are all safe against buffer -// overflow. For the most part these only use the public upb_string interface. - -// More efficient than upb_strcmp if all you need is to test equality. -INLINE bool upb_streql(upb_string *s1, upb_string *s2) { - upb_strlen_t len = upb_string_len(s1); - if(len != upb_string_len(s2)) { - return false; - } else { - bool ret = - memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; - upb_string_endread(s1); - upb_string_endread(s2); - return ret; - } -} - -// Like strcmp(). -int upb_strcmp(upb_string *s1, upb_string *s2); - -// Compare a upb_string with memory or a NULL-terminated C string. -INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) { - return len == upb_string_len(str) && - memcmp(upb_string_getrobuf(str), buf, len) == 0; -} - -INLINE bool upb_streqlc(upb_string *str, const void *buf) { - // Could be made one-pass. - return upb_streqllen(str, buf, strlen((const char*)buf)); -} - -// Like upb_strcpy, but copies from a buffer and length. -INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { - memcpy(upb_string_getrwbuf(dest, len), src, len); -} - -// Replaces the contents of "dest" with the contents of "src". -INLINE void upb_strcpy(upb_string *dest, upb_string *src) { - upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src)); - upb_string_endread(src); -} - -// Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_string *dest, const void *src) { - // This does two passes over src, but that is necessary unless we want to - // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen((const char*)src)); -} - -// Returns a new string whose contents are a copy of s. -upb_string *upb_strdup(upb_string *s); - -// Like upb_strdup(), but duplicates a given buffer and length. -INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { - upb_string *s = upb_string_new(); - upb_strcpylen(s, src, len); - return s; -} - -// Like upb_strdup(), but duplicates a C NULL-terminated string. -INLINE upb_string *upb_strdupc(const char *src) { - return upb_strduplen(src, strlen(src)); -} - -// Appends 'append' to 's' in-place, resizing s if necessary. -void upb_strcat(upb_string *s, upb_string *append); - -// Returns a new string that is a substring of the given string. -INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { - upb_string *str = upb_string_new(); - upb_string_substr(str, s, offset, len); - return str; -} - -// Reads an entire file into a newly-allocated string. -upb_string *upb_strreadfile(const char *filename); - -// Returns a new string with the contents of the given printf. -upb_string *upb_string_asprintf(const char *format, ...); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/core/upb_table.c b/core/upb_table.c deleted file mode 100644 index a6e0a56..0000000 --- a/core/upb_table.c +++ /dev/null @@ -1,411 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_table.h" -#include "upb_string.h" - -#include -#include -#include - -static const upb_inttable_key_t EMPTYENT = 0; -static const double MAX_LOAD = 0.85; - -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); - -/* We use 1-based indexes into the table so that 0 can be "NULL". */ -static upb_inttable_entry *intent(upb_inttable *t, int32_t i) { - return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); -} -static upb_strtable_entry *strent(upb_strtable *t, int32_t i) { - return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); -} - -void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) -{ - t->count = 0; - t->entry_size = entry_size; - t->size_lg2 = 0; - while(size >>= 1) t->size_lg2++; - size_t bytes = upb_table_size(t) * t->entry_size; - t->mask = upb_table_size(t) - 1; - t->entries = malloc(bytes); - memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */ -} - -void upb_inttable_init(upb_inttable *t, uint32_t size, uint16_t entsize) -{ - upb_table_init(&t->t, size, entsize); -} - -void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) -{ - upb_table_init(&t->t, size, entsize); -} - -void upb_table_free(upb_table *t) { free(t->entries); } -void upb_inttable_free(upb_inttable *t) { upb_table_free(&t->t); } -void upb_strtable_free(upb_strtable *t) { - // Free refs from the strtable. - upb_strtable_entry *e = upb_strtable_begin(t); - for(; e; e = upb_strtable_next(t, e)) { - upb_string_unref(e->key); - } - upb_table_free(&t->t); -} - -static uint32_t strtable_bucket(upb_strtable *t, upb_string *key) -{ - uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0); - return (hash & (upb_strtable_size(t)-1)) + 1; -} - -void *upb_strtable_lookup(upb_strtable *t, upb_string *key) -{ - uint32_t bucket = strtable_bucket(t, key); - upb_strtable_entry *e; - do { - e = strent(t, bucket); - if(e->key && upb_streql(e->key, key)) return e; - } while((bucket = e->next) != UPB_END_OF_CHAIN); - return NULL; -} - -static uint32_t empty_intbucket(upb_inttable *table) -{ - /* TODO: does it matter that this is biased towards the front of the table? */ - for(uint32_t i = 1; i <= upb_inttable_size(table); i++) { - upb_inttable_entry *e = intent(table, i); - if(e->key == EMPTYENT) return i; - } - assert(false); - return 0; -} - -/* The insert routines have a lot more code duplication between int/string - * variants than I would like, but there's just a bit too much that varies to - * parameterize them. */ -static void intinsert(upb_inttable *t, upb_inttable_entry *e) -{ - assert(upb_inttable_lookup(t, e->key) == NULL); - t->t.count++; - uint32_t bucket = upb_inttable_bucket(t, e->key); - upb_inttable_entry *table_e = intent(t, bucket); - if(table_e->key != EMPTYENT) { /* Collision. */ - if(bucket == upb_inttable_bucket(t, table_e->key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_intbucket(t); - while (table_e->next != UPB_END_OF_CHAIN) - table_e = intent(t, table_e->next); - table_e->next = empty_bucket; - table_e = intent(t, empty_bucket); - } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_intbucket(t); - uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key); - memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_inttable_entry *evictee_e = intent(t, evictee_bucket); - while(1) { - assert(evictee_e->key != UPB_EMPTY_ENTRY); - assert(evictee_e->next != UPB_END_OF_CHAIN); - if(evictee_e->next == bucket) { - evictee_e->next = empty_bucket; - break; - } - evictee_e = intent(t, evictee_e->next); - } - /* table_e remains set to our mainpos. */ - } - } - memcpy(table_e, e, t->t.entry_size); - table_e->next = UPB_END_OF_CHAIN; - assert(upb_inttable_lookup(t, e->key) == table_e); -} - -void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e) -{ - assert(e->key != 0); - if((double)(t->t.count + 1) / upb_inttable_size(t) > MAX_LOAD) { - /* Need to resize. New table of double the size, add old elements to it. */ - upb_inttable new_table; - upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size); - new_table.t.count = t->t.count; - upb_inttable_entry *old_e; - for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e)) - intinsert(&new_table, old_e); - upb_inttable_free(t); - *t = new_table; - } - intinsert(t, e); -} - -static uint32_t empty_strbucket(upb_strtable *table) -{ - /* TODO: does it matter that this is biased towards the front of the table? */ - for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { - upb_strtable_entry *e = strent(table, i); - if(!e->key) return i; - } - assert(false); - return 0; -} - -static void strinsert(upb_strtable *t, upb_strtable_entry *e) -{ - assert(upb_strtable_lookup(t, e->key) == NULL); - e->key = upb_string_getref(e->key); - t->t.count++; - uint32_t bucket = strtable_bucket(t, e->key); - upb_strtable_entry *table_e = strent(t, bucket); - if(table_e->key) { /* Collision. */ - if(bucket == strtable_bucket(t, table_e->key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_strbucket(t); - while (table_e->next != UPB_END_OF_CHAIN) - table_e = strent(t, table_e->next); - table_e->next = empty_bucket; - table_e = strent(t, empty_bucket); - } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_strbucket(t); - uint32_t evictee_bucket = strtable_bucket(t, table_e->key); - memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_strtable_entry *evictee_e = strent(t, evictee_bucket); - while(1) { - assert(evictee_e->key); - assert(evictee_e->next != UPB_END_OF_CHAIN); - if(evictee_e->next == bucket) { - evictee_e->next = empty_bucket; - break; - } - evictee_e = strent(t, evictee_e->next); - } - /* table_e remains set to our mainpos. */ - } - } - memcpy(table_e, e, t->t.entry_size); - table_e->next = UPB_END_OF_CHAIN; - assert(upb_strtable_lookup(t, e->key) == table_e); -} - -void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e) -{ - if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { - /* Need to resize. New table of double the size, add old elements to it. */ - upb_strtable new_table; - upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); - upb_strtable_entry *old_e; - for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e)) - strinsert(&new_table, old_e); - upb_strtable_free(t); - *t = new_table; - } - strinsert(t, e); -} - -void *upb_inttable_begin(upb_inttable *t) { - return upb_inttable_next(t, intent(t, 0)); -} - -void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur) { - upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1); - do { - cur = (void*)((char*)cur + t->t.entry_size); - if(cur == end) return NULL; - } while(cur->key == UPB_EMPTY_ENTRY); - return cur; -} - -void *upb_strtable_begin(upb_strtable *t) { - return upb_strtable_next(t, strent(t, 0)); -} - -void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { - upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1); - do { - cur = (void*)((char*)cur + t->t.entry_size); - if(cur == end) return NULL; - } while(cur->key == NULL); - return cur; -} - -#ifdef UPB_UNALIGNED_READS_OK -//----------------------------------------------------------------------------- -// MurmurHash2, by Austin Appleby (released as public domain). -// Reformatted and C99-ified by Joshua Haberman. -// Note - This code makes a few assumptions about how your machine behaves - -// 1. We can read a 4-byte value from any address without crashing -// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t -// And it has a few limitations - -// 1. It will not work incrementally. -// 2. It will not produce the same results on little-endian and big-endian -// machines. -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) -{ - // 'm' and 'r' are mixing constants generated offline. - // They're not really 'magic', they just happen to work well. - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - - // Initialize the hash to a 'random' value - uint32_t h = seed ^ len; - - // Mix 4 bytes at a time into the hash - const uint8_t * data = (const uint8_t *)key; - while(len >= 4) { - uint32_t k = *(uint32_t *)data; - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - // Handle the last few bytes of the input array - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; -} - -#else // !UPB_UNALIGNED_READS_OK - -//----------------------------------------------------------------------------- -// MurmurHashAligned2, by Austin Appleby -// Same algorithm as MurmurHash2, but only does aligned reads - should be safer -// on certain platforms. -// Performance will be lower than MurmurHash2 - -#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } - -static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) -{ - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - const uint8_t * data = (const uint8_t *)key; - uint32_t h = seed ^ len; - uint8_t align = (uintptr_t)data & 3; - - if(align && (len >= 4)) { - // Pre-load the temp registers - uint32_t t = 0, d = 0; - - switch(align) { - case 1: t |= data[2] << 16; - case 2: t |= data[1] << 8; - case 3: t |= data[0]; - } - - t <<= (8 * align); - - data += 4-align; - len -= 4-align; - - int32_t sl = 8 * (4-align); - int32_t sr = 8 * align; - - // Mix - - while(len >= 4) { - d = *(uint32_t *)data; - t = (t >> sr) | (d << sl); - - uint32_t k = t; - - MIX(h,k,m); - - t = d; - - data += 4; - len -= 4; - } - - // Handle leftover data in temp registers - - d = 0; - - if(len >= align) { - switch(align) { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - } - - uint32_t k = (t >> sr) | (d << sl); - MIX(h,k,m); - - data += align; - len -= align; - - //---------- - // Handle tail bytes - - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - } else { - switch(len) { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - case 0: h ^= (t >> sr) | (d << sl); h *= m; - } - } - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } else { - while(len >= 4) { - uint32_t k = *(uint32_t *)data; - - MIX(h,k,m); - - data += 4; - len -= 4; - } - - //---------- - // Handle tail bytes - - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } -} -#undef MIX - -#endif // UPB_UNALIGNED_READS_OK diff --git a/core/upb_table.h b/core/upb_table.h deleted file mode 100644 index 20dae92..0000000 --- a/core/upb_table.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file defines very fast int->struct (inttable) and string->struct - * (strtable) hash tables. The struct can be of any size, and it is stored - * in the table itself, for cache-friendly performance. - * - * The table uses internal chaining with Brent's variation (inspired by the - * Lua implementation of hash tables). The hash function for strings is - * Austin Appleby's "MurmurHash." - */ - -#ifndef UPB_TABLE_H_ -#define UPB_TABLE_H_ - -#include -#include "upb.h" -#include "upb_string.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Note: the key cannot be zero! Zero is used by the implementation. */ -typedef uint32_t upb_inttable_key_t; - -#define UPB_END_OF_CHAIN (uint32_t)0 -#define UPB_EMPTY_ENTRY (uint32_t)0 - -typedef struct { - upb_inttable_key_t key; - uint32_t next; /* Internal chaining. */ -} upb_inttable_entry; - -// TODO: consider storing the hash in the entry. This would avoid the need to -// rehash on table resizes, but more importantly could possibly improve lookup -// performance by letting us compare hashes before comparing lengths or the -// strings themselves. -typedef struct { - upb_string *key; // We own a ref. - uint32_t next; // Internal chaining. -} upb_strtable_entry; - -typedef struct { - void *entries; - uint32_t count; /* How many elements are currently in the table? */ - uint16_t entry_size; /* How big is each entry? */ - uint8_t size_lg2; /* The table is 2^size_lg2 in size. */ - uint32_t mask; -} upb_table; - -typedef struct { - upb_table t; -} upb_strtable; - -typedef struct { - upb_table t; -} upb_inttable; - -/* Initialize and free a table, respectively. Specify the initial size - * with 'size' (the size will be increased as necessary). Entry size - * specifies how many bytes each entry in the table is. */ -void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t entry_size); -void upb_inttable_free(upb_inttable *table); -void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size); -void upb_strtable_free(upb_strtable *table); - -INLINE uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; } -INLINE uint32_t upb_inttable_size(upb_inttable *t) { - return upb_table_size(&t->t); -} -INLINE uint32_t upb_strtable_size(upb_strtable *t) { - return upb_table_size(&t->t); -} - -INLINE uint32_t upb_table_count(upb_table *t) { return t->count; } -INLINE uint32_t upb_inttable_count(upb_inttable *t) { - return upb_table_count(&t->t); -} -INLINE uint32_t upb_strtable_count(upb_strtable *t) { - return upb_table_count(&t->t); -} - -/* Inserts the given key into the hashtable with the given value. The key must - * not already exist in the hash table. The data will be copied from e into - * the hashtable (the amount of data copied comes from entry_size when the - * table was constructed). Therefore the data at val may be freed once the - * call returns. */ -void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e); -void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e); - -INLINE uint32_t upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) { - return (k & t->t.mask) + 1; /* Identity hash for ints. */ -} - -/* Looks up key in this table. Inlined because this is in the critical path of - * decoding. We have the caller specify the entry_size because fixing this as - * a literal (instead of reading table->entry_size) gives the compiler more - * ability to optimize. */ -INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key, - uint32_t entry_size) { - assert(key != 0); - uint32_t bucket = upb_inttable_bucket(t, key); - upb_inttable_entry *e; - do { - e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket-1, entry_size); - if(e->key == key) return e; - } while((bucket = e->next) != UPB_END_OF_CHAIN); - return NULL; /* Not found. */ -} - -INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { - return upb_inttable_fastlookup(t, key, t->t.entry_size); -} - -void *upb_strtable_lookup(upb_strtable *t, upb_string *key); - -/* Provides iteration over the table. The order in which the entries are - * returned is undefined. Insertions invalidate iterators. The _next - * functions return NULL when the end has been reached. */ -void *upb_inttable_begin(upb_inttable *t); -void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur); - -void *upb_strtable_begin(upb_strtable *t); -void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_TABLE_H_ */ diff --git a/src/upb.c b/src/upb.c new file mode 100644 index 0000000..897ca4e --- /dev/null +++ b/src/upb.c @@ -0,0 +1,75 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + */ + +#include +#include +#include + +#include "upb.h" +#include "upb_string.h" + +#define alignof(t) offsetof(struct { char c; t x; }, x) +#define TYPE_INFO(wire_type, ctype, allows_delimited, inmemory_type) \ + {alignof(ctype), sizeof(ctype), wire_type, \ + (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \ + UPB_TYPE(inmemory_type), #ctype}, + +const upb_type_info upb_types[] = { + {0, 0, 0, 0, 0, ""}, // There is no type 0. + TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1, DOUBLE) // DOUBLE + TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1, FLOAT) // FLOAT + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // INT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1, UINT64) // UINT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // INT32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1, UINT64) // FIXED64 + TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1, UINT32) // FIXED32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1, BOOL) // BOOL + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // STRING + TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0, MESSAGE) // GROUP + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, MESSAGE) // MESSAGE + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // BYTES + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, UINT32) // UINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, ENUM) // ENUM + TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1, INT32) // SFIXED32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1, INT64) // SFIXED64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // SINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // SINT64 +}; + +void upb_seterr(upb_status *status, enum upb_status_code code, + const char *msg, ...) { + status->code = code; + upb_string_recycle(&status->str); + va_list args; + va_start(args, msg); + upb_string_vprintf(status->str, msg, args); + va_end(args); +} + +void upb_copyerr(upb_status *to, upb_status *from) +{ + to->code = from->code; + if(from->str) to->str = upb_string_getref(from->str); +} + +void upb_clearerr(upb_status *status) { + status->code = UPB_OK; + upb_string_recycle(&status->str); +} + +void upb_printerr(upb_status *status) { + if(status->str) { + fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n", + status->code, UPB_STRARG(status->str)); + } else { + fprintf(stderr, "code: %d, no msg\n", status->code); + } +} + +void upb_status_uninit(upb_status *status) { + upb_string_unref(status->str); +} diff --git a/src/upb.h b/src/upb.h new file mode 100644 index 0000000..837fc52 --- /dev/null +++ b/src/upb.h @@ -0,0 +1,262 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file contains shared definitions that are widely used across upb. + */ + +#ifndef UPB_H_ +#define UPB_H_ + +#include +#include +#include // only for size_t. +#include +#include "descriptor_const.h" +#include "upb_atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// inline if possible, emit standalone code if required. +#ifndef INLINE +#define INLINE static inline +#endif + +#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) +#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) +#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) + +// The maximum that any submessages can be nested. Matches proto2's limit. +#define UPB_MAX_NESTING 64 + +// The maximum number of fields that any one .proto type can have. Note that +// this is very different than the max field number. It is hard to imagine a +// scenario where more than 32k fields makes sense. +#define UPB_MAX_FIELDS (1<<15) +typedef int16_t upb_field_count_t; + +// Nested type names are separated by periods. +#define UPB_SYMBOL_SEPARATOR '.' + +// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType +#define UPB_SYMBOL_MAXLEN 128 + +// The longest chain that mutually-recursive types are allowed to form. For +// example, this is a type cycle of length 2: +// message A { +// B b = 1; +// } +// message B { +// A a = 1; +// } +#define UPB_MAX_TYPE_CYCLE_LEN 16 + +// The maximum depth that the type graph can have. Note that this setting does +// not automatically constrain UPB_MAX_NESTING, because type cycles allow for +// unlimited nesting if we do not limit it. +#define UPB_MAX_TYPE_DEPTH 64 + +// The biggest possible single value is a 10-byte varint. +#define UPB_MAX_ENCODED_SIZE 10 + + +/* Fundamental types and type constants. **************************************/ + +// A list of types as they are encoded on-the-wire. +enum upb_wire_type { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5, + + // This isn't a real wire type, but we use this constant to describe varints + // that are expected to be a maximum of 32 bits. + UPB_WIRE_TYPE_32BIT_VARINT = 8 +}; + +typedef uint8_t upb_wire_type_t; + +// Type of a field as defined in a .proto file. eg. string, int32, etc. The +// integers that represent this are defined by descriptor.proto. Note that +// descriptor.proto reserves "0" for errors, and we use it to represent +// exceptional circumstances. +typedef uint8_t upb_fieldtype_t; + +// For referencing the type constants tersely. +#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type +#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type + +// Info for a given field type. +typedef struct { + uint8_t align; + uint8_t size; + upb_wire_type_t native_wire_type; + uint8_t allowed_wire_types; // For packable fields, also allows delimited. + uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32 + char *ctype; +} upb_type_info; + +// A static array of info about all of the field types, indexed by type number. +extern const upb_type_info upb_types[]; + +// The number of a field, eg. "optional string foo = 3". +typedef int32_t upb_field_number_t; + +// Label (optional, repeated, required) as defined in a .proto file. The +// values of this are defined by google.protobuf.FieldDescriptorProto.Label +// (from descriptor.proto). +typedef uint8_t upb_label_t; + +// A scalar (non-string) wire value. Used only for parsing unknown fields. +typedef union { + uint64_t varint; + uint64_t _64bit; + uint32_t _32bit; +} upb_wire_value; + +/* Polymorphic values of .proto types *****************************************/ + +struct _upb_string; +typedef struct _upb_string upb_string; +struct _upb_array; +typedef struct _upb_array upb_array; +struct _upb_msg; +typedef struct _upb_msg upb_msg; +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; + +typedef int32_t upb_strlen_t; +#define UPB_STRLEN_MAX INT32_MAX + +// The type of a upb_value. This is like a upb_fieldtype_t, but adds the +// constant UPB_VALUETYPE_ARRAY to represent an array. +typedef uint8_t upb_valuetype_t; +#define UPB_VALUETYPE_ARRAY 32 +#define UPB_VALUETYPE_BYTESRC 32 +#define UPB_VALUETYPE_RAW 33 + +// A single .proto value. The owner must have an out-of-band way of knowing +// the type, so that it knows which union member to use. +typedef struct { + union { + double _double; + float _float; + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + bool _bool; + upb_string *str; + upb_bytesrc *bytesrc; + upb_msg *msg; + upb_array *arr; + upb_atomic_refcount_t *refcount; + void *_void; + } val; + + // In debug mode we carry the value type around also so we can check accesses + // to be sure the right member is being read. +#ifndef NDEBUG + upb_valuetype_t type; +#endif +} upb_value; + +#ifdef NDEBUG +#define SET_TYPE(dest, val) +#else +#define SET_TYPE(dest, val) dest = val +#endif + +#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ + INLINE ctype upb_value_get ## name(upb_value val) { \ + assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ + return val.val.membername; \ + } \ + INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ + SET_TYPE(val->type, proto_type); \ + val->val.membername = cval; \ + } +UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE)); +UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT)); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32)); +UPB_VALUE_ACCESSORS(enumval, int32, int32_t, UPB_TYPE(ENUM)); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); +UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); +UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); +UPB_VALUE_ACCESSORS(msg, msg, upb_msg*, UPB_TYPE(MESSAGE)); +UPB_VALUE_ACCESSORS(arr, arr, upb_array*, UPB_VALUETYPE_ARRAY); +UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); + +INLINE void upb_value_setraw(upb_value *val, uint64_t cval) { + SET_TYPE(val->type, UPB_VALUETYPE_RAW); + val->val.uint64 = cval; +} + +INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) { + assert(val.type == UPB_TYPE(MESSAGE) || + val.type == UPB_TYPE(STRING) || + val.type == UPB_VALUETYPE_ARRAY); + return val.val.refcount; +} + +// Status codes used as a return value. Codes >0 are not fatal and can be +// resumed. +enum upb_status_code { + // The operation completed successfully. + UPB_OK = 0, + + // The bytesrc is at EOF and all data was read successfully. + UPB_EOF = 1, + + // A read or write from a streaming src/sink could not be completed right now. + UPB_TRYAGAIN = 2, + + // An unrecoverable error occurred. + UPB_ERROR = -1, + + // A recoverable error occurred (for example, data of the wrong type was + // encountered which we can skip over). + // UPB_STATUS_RECOVERABLE_ERROR = -2 +}; + +// TODO: consider adding error space and code, to let ie. errno be stored +// as a proper code, or application-specific error codes. +struct _upb_status { + char code; + upb_string *str; +}; + +typedef struct _upb_status upb_status; + +#define UPB_STATUS_INIT {UPB_OK, NULL} +#define UPB_ERRORMSG_MAXLEN 256 + +INLINE bool upb_ok(upb_status *status) { + return status->code == UPB_OK; +} + +INLINE void upb_status_init(upb_status *status) { + status->code = UPB_OK; + status->str = NULL; +} + +void upb_status_uninit(upb_status *status); + +void upb_printerr(upb_status *status); +void upb_clearerr(upb_status *status); +void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, + ...); +void upb_copyerr(upb_status *to, upb_status *from); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_H_ */ diff --git a/src/upb_atomic.h b/src/upb_atomic.h new file mode 100644 index 0000000..1cd848b --- /dev/null +++ b/src/upb_atomic.h @@ -0,0 +1,189 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * Only a very small part of upb is thread-safe. Notably, individual + * messages, arrays, and strings are *not* thread safe for mutating. + * However, we do make message *metadata* such as upb_msgdef and + * upb_context thread-safe, and their ownership is tracked via atomic + * refcounting. This header implements the small number of atomic + * primitives required to support this. The primitives we implement + * are: + * + * - a reader/writer lock (wrappers around platform-provided mutexes). + * - an atomic refcount. + */ + +#ifndef UPB_ATOMIC_H_ +#define UPB_ATOMIC_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* inline if possible, emit standalone code if required. */ +#ifndef INLINE +#define INLINE static inline +#endif + +#ifdef UPB_THREAD_UNSAFE + +/* Non-thread-safe implementations. ******************************************/ + +typedef struct { + int v; +} upb_atomic_refcount_t; + +INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { + a->v = val; +} + +INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { + return a->v++ == 0; +} + +INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { + return --a->v == 0; +} + +INLINE int upb_atomic_read(upb_atomic_refcount_t *a) { + return a->v; +} + +INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) { + a->v += val; + return a->v == 0; +} + +INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) { + int ret = a->v; + a->v += val; + return ret; +} + +#endif + +/* Atomic refcount ************************************************************/ + +#ifdef UPB_THREAD_UNSAFE + +/* Already defined above. */ + +#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 + +/* GCC includes atomic primitives. */ + +typedef struct { + volatile int v; +} upb_atomic_refcount_t; + +INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { + a->v = val; + __sync_synchronize(); /* Ensure the initialized value is visible. */ +} + +INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { + return __sync_fetch_and_add(&a->v, 1) == 0; +} + +INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) { + return __sync_add_and_fetch(&a->v, n) == 0; +} + +INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { + return __sync_sub_and_fetch(&a->v, 1) == 0; +} + +INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) { + return __sync_fetch_and_add(&a->v, 0); +} + +#elif defined(WIN32) + +/* Windows defines atomic increment/decrement. */ +#include + +typedef struct { + volatile LONG val; +} upb_atomic_refcount_t; + +INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { + InterlockedExchange(&a->val, val); +} + +INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { + return InterlockedIncrement(&a->val) == 1; +} + +INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { + return InterlockedDecrement(&a->val) == 0; +} + +#else +#error Atomic primitives not defined for your platform/CPU. \ + Implement them or compile with UPB_THREAD_UNSAFE. +#endif + +INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) { + return upb_atomic_read(a) == 1; +} + +/* Reader/Writer lock. ********************************************************/ + +#ifdef UPB_THREAD_UNSAFE + +typedef struct { +} upb_rwlock_t; + +INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } + +#elif defined(UPB_USE_PTHREADS) + +#include + +typedef struct { + pthread_rwlock_t lock; +} upb_rwlock_t; + +INLINE void upb_rwlock_init(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_init(&l->lock, NULL); +} + +INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_destroy(&l->lock); +} + +INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_rdlock(&l->lock); +} + +INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_wrlock(&l->lock); +} + +INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_unlock(&l->lock); +} + +#else +#error Reader/writer lock is not defined for your platform/CPU. \ + Implement it or compile with UPB_THREAD_UNSAFE. +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_ATOMIC_H_ */ diff --git a/src/upb_decoder.c b/src/upb_decoder.c new file mode 100644 index 0000000..4a43c4b --- /dev/null +++ b/src/upb_decoder.c @@ -0,0 +1,441 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_decoder.h" + +#include +#include +#include +#include "upb_def.h" + +/* Pure Decoding **************************************************************/ + +// The key fast-path varint-decoding routine. Here we can assume we have at +// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of +// possibilities for optimization/experimentation here. + +#ifdef USE_SSE_VARINT_DECODING +#include + +// This works, but is empirically slower than the branchy version below. Why? +// Most varints are very short. Next step: use branches for 1/2-byte varints, +// but use the SSE version for 3-10 byte varints. +INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) { + const char *p = *ptr; + __m128i val128 = _mm_loadu_si128((void*)p); + unsigned int continuation_bits = _mm_movemask_epi8(val128); + unsigned int bsr_val = ~continuation_bits; + int varint_length = __builtin_ffs(bsr_val); + if (varint_length > 10) { + upb_seterr(s, UPB_ERROR, "Unterminated varint"); + return false; + } + + uint16_t twob; + memcpy(&twob, p, 2); + twob &= 0x7f7f; + twob = ((twob & 0xff00) >> 1) | (twob & 0xff); + + uint64_t eightb; + memcpy(&eightb, p + 2, 8); + eightb &= 0x7f7f7f7f7f7f7f7f; + eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff); + eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff); + eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff); + + uint64_t all_bits = twob | (eightb << 14); + int varint_bits = varint_length * 7; + uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1; + *val = all_bits & mask; + *ptr = p + varint_length; + return true; +} + +#else + +INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) { + const char *p = *ptr; + uint32_t low, high = 0; + uint32_t b; + b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 28; + high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done; + + upb_seterr(s, UPB_ERROR, "Unterminated varint"); + return false; + +done: + *val = ((uint64_t)high << 32) | low; + *ptr = p; + return true; +} + +#endif + + +/* Decoding/Buffering of individual values ************************************/ + +// Performs zig-zag decoding, which is used by sint32 and sint64. +INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } + +typedef struct { + // Our current position in the data buffer. + const char *ptr; + + // End of this submessage, relative to *ptr. + const char *submsg_end; + + // Number of bytes available at ptr. + size_t len; + + // Msgdef for the current level. + upb_msgdef *msgdef; +} upb_dstate; + +// Constant used to signal that the submessage is a group and therefore we +// don't know its end offset. This cannot be the offset of a real submessage +// end because it takes at least one byte to begin a submessage. +#define UPB_GROUP_END_OFFSET 0 +#define UPB_MAX_VARINT_ENCODED_SIZE 10 + +INLINE void upb_dstate_advance(upb_dstate *s, size_t len) { + s->ptr += len; + s->len -= len; +} + +INLINE void upb_dstate_setmsgend(upb_decoder *d, upb_dstate *s) { + s->submsg_end = (d->top->end_offset == UPB_GROUP_END_OFFSET) ? + (void*)UINTPTR_MAX : + upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); +} + +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s); + +// Called only from the slow path, this function copies the next "len" bytes +// from the stream to "data", adjusting the dstate appropriately. +static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, + upb_dstate *s) { + while (1) { + size_t to_copy = UPB_MIN(bytes_wanted, s->len); + memcpy(data, s->ptr, to_copy); + upb_dstate_advance(s, to_copy); + bytes_wanted -= to_copy; + if (bytes_wanted == 0) { + upb_dstate_setmsgend(d, s); + return true; + } + + // Get next buffer. + if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + upb_string_recycle(&d->buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + s->ptr = upb_string_getrobuf(d->buf); + s->len = upb_string_len(d->buf); + } +} + +// We use this path when we don't have UPB_MAX_VARINT_ENCODED_SIZE contiguous +// bytes available in our current buffer. We don't inline this because we +// accept that it will be slow and we don't want to pay for two copies of it. +static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s, + upb_value *val) { + char byte = 0x80; + uint64_t val64 = 0; + int bitpos; + for(bitpos = 0; + bitpos < 70 && (byte & 0x80) && upb_getbuf(d, &byte, 1, s); + bitpos += 7) + val64 |= ((uint64_t)byte & 0x7F) << bitpos; + + if(bitpos == 70) { + upb_seterr(d->status, UPB_ERROR, + "Varint was unterminated after 10 bytes.\n"); + return false; + } else if (d->status->code == UPB_EOF && bitpos == 0) { + // Regular EOF. + return false; + } else if (d->status->code == UPB_EOF && (byte & 0x80)) { + upb_seterr(d->status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + return false; + } else { + // Success. + upb_value_setraw(val, val64); + return true; + } +} + +typedef struct { + upb_wire_type_t wire_type; + upb_field_number_t field_number; +} upb_tag; + +INLINE bool upb_decode_tag(upb_decoder *d, upb_dstate *s, upb_tag *tag) { + const char *p = s->ptr; + uint32_t tag_int; + upb_value val; + // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). + if (s->len < 2) goto slow; // unlikely. + tag_int = *p & 0x7f; + if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*p & 0x7f) << 7; + if ((*(p++) & 0x80) == 0) goto done; // likely +slow: + // Decode a full varint starting over from ptr. + if (!upb_decode_varint_slow(d, s, &val)) return false; + tag_int = upb_value_getint64(val); + p = s->ptr; // Trick the next line into not overwriting us. +done: + upb_dstate_advance(s, p - s->ptr); + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return true; +} + +INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) { + if (s->len >= 16) { + // Common (fast) case. + uint64_t val64; + const char *p = s->ptr; + if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; + upb_dstate_advance(s, p - s->ptr); + upb_value_setraw(val, val64); + return true; + } else { + return upb_decode_varint_slow(d, s, val); + } +} + +INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, + upb_dstate *s, upb_value *val) { + static const char table[] = {0, 8, 0, 0, 0, 4}; + size_t bytes = table[wt]; + if (s->len >= bytes) { + // Common (fast) case. + memcpy(val, s->ptr, bytes); + upb_dstate_advance(s, bytes); + } else { + if (!upb_getbuf(d, val, bytes, s)) return false; + } + return true; +} + +// "val" initially holds the length of the string, this is replaced by the +// contents of the string. +INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str, + upb_dstate *s) { + upb_string_recycle(str); + uint32_t strlen = upb_value_getint32(*val); + if (s->len >= strlen) { + // Common (fast) case. + upb_string_substr(*str, d->buf, s->ptr - upb_string_getrobuf(d->buf), strlen); + upb_dstate_advance(s, strlen); + } else { + if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) + return false; + } + upb_value_setstr(val, *str); + return true; +} + + +/* The main decoding loop *****************************************************/ + +extern upb_wire_type_t upb_expected_wire_types[]; +// Returns true if wt is the correct on-the-wire type for ft. +INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { + // This doesn't currently support packed arrays. + return upb_types[ft].native_wire_type == wt; +} + +static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, + upb_value submsg_len, upb_fieldtype_t type) { + d->top++; + if(d->top >= d->limit) { + upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); + return UPB_ERROR; + } + d->top->end_offset = (type == UPB_TYPE(GROUP)) ? + UPB_GROUP_END_OFFSET : + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + + upb_value_getint32(submsg_len); + d->top->msgdef = upb_downcast_msgdef(f->def); + upb_dstate_setmsgend(d, s); + return upb_dispatch_startsubmsg(&d->dispatcher, f); +} + +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s) { + d->top--; + upb_dstate_setmsgend(d, s); + return upb_dispatch_endsubmsg(&d->dispatcher); +} + +void upb_decoder_run(upb_src *src, upb_status *status) { + upb_decoder *d = (upb_decoder*)src; + d->status = status; + // We put our dstate on the stack so the compiler knows they can't be changed + // by external code (like when we dispatch a callback). We must be sure not + // to let its address escape this source file. + upb_dstate state = {NULL, (void*)0x1, 0, d->top->msgdef}; + +// TODO: handle UPB_SKIPSUBMSG +#define CHECK_FLOW(expr) if ((expr) == UPB_BREAK) { assert(!upb_ok(status)); goto err; } +#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; } + + CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher)); + + // Main loop: executed once per tag/field pair. + while(1) { + // Check for end-of-submessage. + while (state.ptr >= state.submsg_end) { + if (state.ptr > state.submsg_end) { + upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); + goto err; + } + CHECK_FLOW(upb_pop(d, &state)); + } + + // Parse/handle tag. + upb_tag tag; + if (!upb_decode_tag(d, &state, &tag)) { + if (status->code == UPB_EOF && d->top == d->stack) { + // Normal end-of-file. + upb_clearerr(status); + CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); + return; + } else { + if (status->code == UPB_EOF) { + upb_seterr(status, UPB_ERROR, + "Input ended in the middle of a submessage."); + } + goto err; + } + } + + // Decode wire data. Hopefully this branch will predict pretty well + // since most types will read a varint here. + upb_value val; + switch (tag.wire_type) { + case UPB_WIRE_TYPE_START_GROUP: + break; // Nothing to do now, below we will push appropriately. + case UPB_WIRE_TYPE_END_GROUP: + if(d->top->end_offset != UPB_GROUP_END_OFFSET) { + upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); + goto err; + } + CHECK_FLOW(upb_pop(d, &state)); + continue; // We have no value to dispatch. + case UPB_WIRE_TYPE_VARINT: + case UPB_WIRE_TYPE_DELIMITED: + // For the delimited case we are parsing the length. + CHECK(upb_decode_varint(d, &state, &val)); + break; + case UPB_WIRE_TYPE_32BIT: + case UPB_WIRE_TYPE_64BIT: + CHECK(upb_decode_fixed(d, tag.wire_type, &state, &val)); + break; + } + + // Look up field by tag number. + upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, tag.field_number); + + if (!f) { + if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) + CHECK(upb_decode_string(d, &val, &d->tmp, &state)); + CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, tag.field_number, val)); + } else if (!upb_check_type(tag.wire_type, f->type)) { + // TODO: put more details in this error msg. + upb_seterr(status, UPB_ERROR, "Field had incorrect type, name: " UPB_STRFMT, UPB_STRARG(f->name)); + upb_printerr(status); + *(int*)0 = 0; + goto err; + } + + // Perform any further massaging of the data now that we have the fielddef. + // Now we can distinguish strings from submessages, and we know about + // zig-zag-encoded types. + // TODO: handle packed encoding. + // TODO: if we were being paranoid, we could check for 32-bit-varint types + // that the top 32 bits all match the highest bit of the low 32 bits. + // If this is not true we are losing data. But the main protobuf library + // doesn't check this, and it would slow us down, so pass for now. + switch (f->type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + CHECK_FLOW(upb_push(d, &state, f, val, f->type)); + continue; // We have no value to dispatch. + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + CHECK(upb_decode_string(d, &val, &d->tmp, &state)); + break; + case UPB_TYPE(SINT32): + upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); + break; + case UPB_TYPE(SINT64): + upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); + break; + default: +#ifndef NDEBUG + val.type = upb_types[f->type].inmemory_type; +#endif + break; // Other types need no further processing at this point. + } + CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val)); + } + +err: + if (upb_ok(status)) { + upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); + } +} + +void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { + upb_decoder *d = (upb_decoder*)src; + upb_dispatcher_reset(&d->dispatcher, handlers, false); + d->top = d->stack; + d->buf_stream_offset = 0; + d->top->msgdef = d->toplevel_msgdef; + // The top-level message is not delimited (we can keep receiving data for it + // indefinitely), so we treat it like a group. + d->top->end_offset = 0; +} + +void upb_decoder_init(upb_decoder *d, upb_msgdef *msgdef) { + static upb_src_vtbl vtbl = { + &upb_decoder_sethandlers, + &upb_decoder_run, + }; + upb_src_init(&d->src, &vtbl); + upb_dispatcher_init(&d->dispatcher); + d->toplevel_msgdef = msgdef; + d->limit = &d->stack[UPB_MAX_NESTING]; + d->buf = NULL; + d->tmp = NULL; +} + +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { + d->bytesrc = bytesrc; + d->top = &d->stack[0]; + d->top->msgdef = d->toplevel_msgdef; + // Never want to end top-level message, so treat it like a group. + d->top->end_offset = UPB_GROUP_END_OFFSET; +} + +void upb_decoder_uninit(upb_decoder *d) { + upb_string_unref(d->buf); + upb_string_unref(d->tmp); +} + +upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; } diff --git a/src/upb_decoder.h b/src/upb_decoder.h new file mode 100644 index 0000000..1c62753 --- /dev/null +++ b/src/upb_decoder.h @@ -0,0 +1,86 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * upb_decoder implements a high performance, streaming decoder for protobuf + * data that works by implementing upb_src and getting its data from a + * upb_bytesrc. + * + * The decoder does not currently support non-blocking I/O, in the sense that + * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the + * decoder when data becomes available again. Support for this could be added, + * but it would add complexity and perhaps cost efficiency also. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_DECODER_H_ +#define UPB_DECODER_H_ + +#include +#include +#include "upb_def.h" +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_decoder *****************************************************************/ + +// The decoder keeps a stack with one entry per level of recursion. +// upb_decoder_frame is one frame of that stack. +typedef struct { + upb_msgdef *msgdef; + size_t end_offset; // For groups, 0. +} upb_decoder_frame; + +struct _upb_decoder { + // Immutable state of the decoder. + upb_src src; + upb_dispatcher dispatcher; + upb_bytesrc *bytesrc; + upb_msgdef *toplevel_msgdef; + upb_decoder_frame stack[UPB_MAX_NESTING]; + + // Mutable state of the decoder. + + // Where we will store any errors that occur. + upb_status *status; + + // Stack entries store the offset where the submsg ends (for groups, 0). + upb_decoder_frame *top, *limit; + + // Current input buffer. + upb_string *buf; + + // Temporary string for passing to callbacks. + upb_string *tmp; + + // The offset within the overall stream represented by the *beginning* of buf. + size_t buf_stream_offset; +}; + +// A upb_decoder decodes the binary protocol buffer format, writing the data it +// decodes to a upb_sink. +struct _upb_decoder; +typedef struct _upb_decoder upb_decoder; + +// Allocates and frees a upb_decoder, respectively. +void upb_decoder_init(upb_decoder *d, upb_msgdef *md); +void upb_decoder_uninit(upb_decoder *d); + +// Resets the internal state of an already-allocated decoder. This puts it in a +// state where it has not seen any data, and expects the next data to be from +// the beginning of a new protobuf. Parsers must be reset before they can be +// used. A decoder can be reset multiple times. +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc); + +// Returns a upb_src pointer by which the decoder can be used. The returned +// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free(). +upb_src *upb_decoder_src(upb_decoder *d); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_DECODER_H_ */ diff --git a/src/upb_def.c b/src/upb_def.c new file mode 100644 index 0000000..651afc1 --- /dev/null +++ b/src/upb_def.c @@ -0,0 +1,1349 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. + */ + +#include +#include +#include "descriptor_const.h" +#include "descriptor.h" +#include "upb_def.h" + +#define alignof(t) offsetof(struct { char c; t x; }, x) + +/* Rounds p up to the next multiple of t. */ +static size_t upb_align_up(size_t val, size_t align) { + return val % align == 0 ? val : val + align - (val % align); +} + +static int upb_div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} + +/* Joins strings together, for example: + * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" + * join("", "Baz") -> "Baz" + * Caller owns a ref on the returned string. */ +static upb_string *upb_join(upb_string *base, upb_string *name) { + if (!base || upb_string_len(base) == 0) { + return upb_string_getref(name); + } else { + return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, + UPB_STRARG(base), UPB_STRARG(name)); + } +} + +/* Search for a character in a string, in reverse. */ +static int my_memrchr(char *data, char c, size_t len) +{ + int off = len-1; + while(off > 0 && data[off] != c) --off; + return off; +} + +/* upb_def ********************************************************************/ + +// Defs are reference counted, but can have cycles when types are +// self-recursive or mutually recursive, so we need to be capable of collecting +// the cycles. In our situation defs are immutable (so cycles cannot be +// created or destroyed post-initialization). We need to be thread-safe but +// want to avoid locks if at all possible and rely only on atomic operations. +// +// Our scheme is as follows. First we give each def a flag indicating whether +// it is part of a cycle or not. Because defs are immutable, this flag will +// never change. For acyclic defs, we can use a naive algorithm and avoid the +// overhead of dealing with cycles. Most defs will be acyclic, and most cycles +// will be very short. +// +// For defs that participate in cycles we keep two reference counts. One +// tracks references that come from outside the cycle (we call these external +// references), and is incremented and decremented like a regular refcount. +// The other is a cycle refcount, and works as follows. Every cycle is +// considered distinct, even if two cycles share members. For example, this +// graph has two distinct cycles: +// +// A-->B-->C +// ^ | | +// +---+---+ +// +// The cycles in this graph are AB and ABC. When A's external refcount +// transitions from 0->1, we say that A takes "cycle references" on both +// cycles. Taking a cycle reference means incrementing the cycle refcount of +// all defs in the cycle. Since A and B are common to both cycles, A and B's +// cycle refcounts will be incremented by two, and C's will be incremented by +// one. Likewise, when A's external refcount transitions from 1->0, we +// decrement A and B's cycle refcounts by two and C's by one. We collect a +// cyclic type when its cycle refcount drops to zero. A precondition for this +// is that the external refcount has dropped to zero also. +// +// This algorithm is relatively cheap, since it only requires extra work when +// the external refcount on a cyclic type transitions from 0->1 or 1->0. + +static void upb_msgdef_free(upb_msgdef *m); +static void upb_enumdef_free(upb_enumdef *e); +static void upb_unresolveddef_free(struct _upb_unresolveddef *u); + +static void upb_def_free(upb_def *def) +{ + switch(def->type) { + case UPB_DEF_MSG: + upb_msgdef_free(upb_downcast_msgdef(def)); + break; + case UPB_DEF_ENUM: + upb_enumdef_free(upb_downcast_enumdef(def)); + break; + case UPB_DEF_SVC: + assert(false); /* Unimplemented. */ + break; + case UPB_DEF_UNRESOLVED: + upb_unresolveddef_free(upb_downcast_unresolveddef(def)); + break; + default: + assert(false); + } +} + +// Depth-first search for all cycles that include cycle_base. Returns the +// number of paths from def that lead to cycle_base, which is equivalent to the +// number of cycles def is in that include cycle_base. +// +// open_defs tracks the set of nodes that are currently being visited in the +// search so we can stop the search if we detect a cycles that do not involve +// cycle_base. We can't color the nodes as we go by writing to a member of the +// def, because another thread could be performing the search concurrently. +static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, + upb_msgdef **open_defs, int num_open_defs, + bool ref) { + bool found = false; + for(int i = 0; i < num_open_defs; i++) { + if(open_defs[i] == m) { + // We encountered a cycle that did not involve cycle_base. + found = true; + break; + } + } + + if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) { + return 0; + } else if(m == cycle_base) { + return 1; + } else { + int path_count = 0; + if(cycle_base == NULL) { + cycle_base = m; + } else { + open_defs[num_open_defs++] = m; + } + upb_msg_iter iter = upb_msg_begin(m); + for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) { + upb_fielddef *f = upb_msg_iter_field(iter); + upb_def *def = f->def; + if(upb_issubmsg(f) && def->is_cyclic) { + upb_msgdef *sub_m = upb_downcast_msgdef(def); + path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs, + num_open_defs, ref); + } + } + if(ref) { + upb_atomic_add(&m->cycle_refcount, path_count); + } else { + if(upb_atomic_add(&m->cycle_refcount, -path_count)) + upb_def_free(UPB_UPCAST(m)); + } + return path_count; + } +} + +void _upb_def_reftozero(upb_def *def) { + if(def->is_cyclic) { + upb_msgdef *m = upb_downcast_msgdef(def); + upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false); + } else { + upb_def_free(def); + } +} + +void _upb_def_cyclic_ref(upb_def *def) { + upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); +} + +static void upb_def_init(upb_def *def, upb_deftype type) { + def->type = type; + def->is_cyclic = 0; // We detect this later, after resolving refs. + def->search_depth = 0; + def->fqname = NULL; + upb_atomic_refcount_init(&def->refcount, 1); +} + +static void upb_def_uninit(upb_def *def) { + upb_string_unref(def->fqname); +} + + +/* upb_defbuilder ************************************************************/ + +// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in +// the format defined in descriptor.proto. The output of a upb_defbuilder is +// a list of upb_def* that possibly contain unresolved references. +// +// We use a separate object (upb_defbuilder) instead of having the defs handle +// the parse themselves because we need to store state that is only necessary +// during the building process itself. + +// When we are bootstrapping descriptor.proto, we must help the bare decoder out +// by telling it when to descend into a submessage, because with the wire format +// alone we cannot tell the difference between a submessage and a string. +// +// TODO: In the long-term, we should bootstrap from a serialization format that +// contains this information, so we can remove this special-case code. This +// would involve defining a serialization format very similar to the existing +// protobuf format, but that contains more information about the wire type. +#define BEGIN_SUBMSG 100 + +// upb_deflist: A little dynamic array for storing a growing list of upb_defs. +typedef struct { + upb_def **defs; + uint32_t len; + uint32_t size; +} upb_deflist; + +static void upb_deflist_init(upb_deflist *l) { + l->size = 8; + l->defs = malloc(l->size * sizeof(void*)); + l->len = 0; +} + +static void upb_deflist_uninit(upb_deflist *l) { + for(uint32_t i = 0; i < l->len; i++) + if(l->defs[i]) upb_def_unref(l->defs[i]); + free(l->defs); +} + +static void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size * sizeof(void*)); + } + l->defs[l->len++] = d; +} + +static upb_def *upb_deflist_last(upb_deflist *l) { + return l->defs[l->len-1]; +} + +// Qualify the defname for all defs starting with offset "start" with "str". +static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { + for(uint32_t i = start; i < l->len; i++) { + upb_def *def = l->defs[i]; + upb_string *name = def->fqname; + def->fqname = upb_join(str, name); + upb_string_unref(name); + } +} + +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). +typedef struct { + upb_string *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. + int start; +} upb_defbuilder_frame; + +struct _upb_defbuilder { + upb_deflist defs; + upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; + int stack_len; + upb_status status; + + uint32_t number; + upb_string *name; + bool saw_number; + bool saw_name; + + upb_fielddef *f; +}; +typedef struct _upb_defbuilder upb_defbuilder; + +// Forward declares for top-level file descriptors. +static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h); +static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, + upb_handlers *h); + + +static void upb_defbuilder_init(upb_defbuilder *b) { + upb_deflist_init(&b->defs); + upb_status_init(&b->status); + b->stack_len = 0; + b->name = NULL; +} + +static void upb_defbuilder_uninit(upb_defbuilder *b) { + upb_string_unref(b->name); + upb_status_uninit(&b->status); + upb_deflist_uninit(&b->defs); +} + +static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { + if (b->stack_len <= 1) return NULL; + int index = b->stack[b->stack_len-1].start - 1; + assert(index >= 0); + return upb_downcast_msgdef(b->defs.defs[index]); +} + +static upb_def *upb_defbuilder_last(upb_defbuilder *b) { + return upb_deflist_last(&b->defs); +} + +// Start/end handlers for FileDescriptorProto and DescriptorProto (the two +// entities that have names and can contain sub-definitions. +void upb_defbuilder_startcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = &b->stack[b->stack_len++]; + f->start = b->defs.len; + f->name = NULL; +} + +void upb_defbuilder_endcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = &b->stack[--b->stack_len]; + upb_deflist_qualify(&b->defs, f->name, f->start); + upb_string_unref(f->name); +} + +void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { + upb_defbuilder_frame *f = &b->stack[b->stack_len-1]; + upb_string_unref(f->name); + f->name = upb_string_getref(str); +} + +// Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_endcontainer(b); + return UPB_CONTINUE; +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, + upb_fielddef *f, + upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + return BEGIN_SUBMSG; + } + return UPB_CONTINUE; +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( + void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + upb_msgdef_register_DescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + upb_enumdef_register_EnumDescriptorProto(b, h); + return UPB_DELEGATE; + default: + // TODO: services and extensions. + return UPB_SKIPSUBMSG; + } +} + +static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_defbuilder_FileDescriptorProto_startmsg, + &upb_defbuilder_FileDescriptorProto_endmsg, + &upb_defbuilder_FileDescriptorProto_value, + &upb_defbuilder_FileDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b, &b->status); +} + +// Handlers for google.protobuf.FileDescriptorSet. +static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, + upb_fielddef *f, + upb_value val) { + (void)b; + (void)val; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + return BEGIN_SUBMSG; + } + return UPB_CONTINUE; +} + +static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( + void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + upb_defbuilder_register_FileDescriptorProto(b, h); + return UPB_DELEGATE; + } + return UPB_SKIPSUBMSG; +} + +static void upb_defbuilder_register_FileDescriptorSet( + upb_defbuilder *b, upb_handlers *h) { + static upb_handlerset handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorSet_value, + &upb_defbuilder_FileDescriptorSet_startsubmsg, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b, &b->status); +} + + +/* upb_unresolveddef **********************************************************/ + +// Unresolved defs are used as temporary placeholders for a def whose name has +// not been resolved yet. During the name resolution step, all unresolved defs +// are replaced with pointers to the actual def being referenced. +typedef struct _upb_unresolveddef { + upb_def base; + + // The target type name. This may or may not be fully qualified. It is + // tempting to want to use base.fqname for this, but that will be qualified + // which is inappropriate for a name we still have to resolve. + upb_string *name; +} upb_unresolveddef; + +// Is passed a ref on the string. +static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { + upb_unresolveddef *def = malloc(sizeof(*def)); + upb_def_init(&def->base, UPB_DEF_UNRESOLVED); + def->name = upb_string_getref(str); + return def; +} + +static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { + upb_string_unref(def->name); + upb_def_uninit(&def->base); + free(def); +} + + +/* upb_enumdef ****************************************************************/ + +static void upb_enumdef_free(upb_enumdef *e) { + upb_enum_iter i; + for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + // Frees the ref taken when the string was parsed. + upb_string_unref(upb_enum_iter_name(i)); + } + upb_strtable_free(&e->ntoi); + upb_inttable_free(&e->iton); + upb_def_uninit(&e->base); + free(e); +} + +// google.protobuf.EnumValueDescriptorProto. +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + b->saw_number = false; + b->saw_name = false; + return UPB_CONTINUE; +} + +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, + upb_fielddef *f, + upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(b->name); + b->name = upb_string_getref(upb_value_getstr(val)); + b->saw_name = true; + break; + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + b->number = upb_value_getint32(val); + b->saw_number = true; + break; + default: + break; + } + return UPB_CONTINUE; +} + +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + if(!b->saw_number || !b->saw_name) { + upb_seterr(&b->status, UPB_ERROR, "Enum value missing name or number."); + return UPB_BREAK; + } + upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; + upb_iton_ent iton_ent = {{b->number, 0}, b->name}; + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); + upb_strtable_insert(&e->ntoi, &ntoi_ent.e); + upb_inttable_insert(&e->iton, &iton_ent.e); + // We don't unref "name" because we pass our ref to the iton entry of the + // table. strtables can ref their keys, but the inttable doesn't know that + // the value is a string. + b->name = NULL; + return UPB_CONTINUE; +} + +static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_enumdef_EnumValueDescriptorProto_startmsg, + &upb_enumdef_EnumValueDescriptorProto_endmsg, + &upb_enumdef_EnumValueDescriptorProto_value, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b, &b->status); +} + +// google.protobuf.EnumDescriptorProto. +static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); + upb_deflist_push(&b->defs, UPB_UPCAST(e)); + return UPB_CONTINUE; +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { + (void)_b; + assert(upb_defbuilder_last((upb_defbuilder*)_b)->fqname != NULL); + return UPB_CONTINUE; +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, + upb_fielddef *f, + upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: { + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); + upb_string_unref(e->base.fqname); + e->base.fqname = upb_string_getref(upb_value_getstr(val)); + return UPB_CONTINUE; + } + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_CONTINUE; + } +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(void *_b, + upb_fielddef *f, + upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + upb_enumdef_register_EnumValueDescriptorProto(b, h); + return UPB_DELEGATE; + default: + return UPB_SKIPSUBMSG; + } +} + +static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_enumdef_EnumDescriptorProto_startmsg, + &upb_enumdef_EnumDescriptorProto_endmsg, + &upb_enumdef_EnumDescriptorProto_value, + &upb_enumdef_EnumDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b, &b->status); +} + +upb_enum_iter upb_enum_begin(upb_enumdef *e) { + // We could iterate over either table here; the choice is arbitrary. + return upb_inttable_begin(&e->iton); +} + +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { + assert(iter); + return upb_inttable_next(&e->iton, &iter->e); +} + +upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) { + upb_iton_ent *e = + (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); + return e ? e->string : NULL; +} + + +/* upb_fielddef ***************************************************************/ + +static void upb_fielddef_free(upb_fielddef *f) { + upb_string_unref(f->name); + if(f->owned) { + upb_def_unref(f->def); + } + free(f); +} + +static upb_flow_t upb_fielddef_startmsg(void *_b) { + upb_defbuilder *b = _b; + upb_fielddef *f = malloc(sizeof(*f)); + f->number = -1; + f->name = NULL; + f->def = NULL; + f->owned = false; + f->msgdef = upb_defbuilder_top(b); + b->f = f; + return UPB_CONTINUE; +} + +static upb_flow_t upb_fielddef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_fielddef *f = b->f; + // TODO: verify that all required fields were present. + assert(f->number != -1 && f->name != NULL); + assert((f->def != NULL) == upb_hasdef(f)); + + // Field was successfully read, add it as a field of the msgdef. + upb_msgdef *m = upb_defbuilder_top(b); + upb_itof_ent itof_ent = {{f->number, 0}, f}; + upb_ntof_ent ntof_ent = {{f->name, 0}, f}; + upb_inttable_insert(&m->itof, &itof_ent.e); + upb_strtable_insert(&m->ntof, &ntof_ent.e); + return UPB_CONTINUE; +} + +static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: + b->f->type = upb_value_getenumval(val); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: + b->f->label = upb_value_getenumval(val); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: + b->f->number = upb_value_getint32(val); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(b->f->name); + b->f->name = upb_string_getref(upb_value_getstr(val)); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { + upb_def_unref(b->f->def); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); + b->f->owned = true; + break; + } + } + return UPB_CONTINUE; +} + +static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_fielddef_startmsg, + &upb_fielddef_endmsg, + &upb_fielddef_value, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b, &b->status); +} + + +/* upb_msgdef *****************************************************************/ + +static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) { + // Sort by data size (ascending) to reduce padding. + size_t size1 = upb_types[f1->type].size; + size_t size2 = upb_types[f2->type].size; + if (size1 != size2) return size1 - size2; + // Otherwise return in number order (just so we get a reproduceable order. + return f1->number - f2->number; +} + +static int upb_compare_fields(const void *f1, const void *f2) { + return upb_compare_typed_fields(*(void**)f1, *(void**)f2); +} + +// google.protobuf.DescriptorProto. +static upb_flow_t upb_msgdef_startmsg(void *_b) { + upb_defbuilder *b = _b; + upb_msgdef *m = malloc(sizeof(*m)); + upb_def_init(&m->base, UPB_DEF_MSG); + upb_atomic_refcount_init(&m->cycle_refcount, 0); + upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); + upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); + upb_deflist_push(&b->defs, UPB_UPCAST(m)); + upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; +} + +static upb_flow_t upb_msgdef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_msgdef *m = upb_defbuilder_top(b); + if(!m->base.fqname) { + upb_seterr(&b->status, UPB_ERROR, "Encountered message with no name."); + return UPB_BREAK; + } + + // Create an ordering over the fields. + upb_field_count_t n = upb_msgdef_numfields(m); + upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); + upb_field_count_t field = 0; + upb_msg_iter i; + for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + sorted_fields[field++]= upb_msg_iter_field(i); + } + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); + + // Assign offsets in the msg. + m->set_flags_bytes = upb_div_round_up(n, 8); + m->size = sizeof(upb_atomic_refcount_t) + m->set_flags_bytes; + + size_t max_align = 0; + for (int i = 0; i < n; i++) { + upb_fielddef *f = sorted_fields[i]; + const upb_type_info *type_info = &upb_types[f->type]; + + // This identifies the set bit. When we implement is_initialized (a + // general check about whether all required bits are set) we will probably + // want to use a different ordering that puts all the required bits + // together. + f->field_index = i; + f->set_bit_mask = 1 << (i % 8); + f->set_bit_offset = i / 8; + + size_t size, align; + if (upb_isarray(f)) { + size = sizeof(void*); + align = alignof(void*); + } else { + size = type_info->size; + align = type_info->align; + } + // General alignment rules are: each member must be at an address that is a + // multiple of that type's alignment. Also, the size of the structure as a + // whole must be a multiple of the greatest alignment of any member. + size_t offset = upb_align_up(m->size, align); + // Offsets are relative to the end of the refcount. + f->byte_offset = offset - sizeof(upb_atomic_refcount_t); + m->size = offset + size; + max_align = UPB_MAX(max_align, align); + } + free(sorted_fields); + + if (max_align > 0) m->size = upb_align_up(m->size, max_align); + + upb_defbuilder_endcontainer(b); + return UPB_CONTINUE; +} + +static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: { + upb_msgdef *m = upb_defbuilder_top(b); + upb_string_unref(m->base.fqname); + m->base.fqname = upb_string_getref(upb_value_getstr(val)); + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + return UPB_CONTINUE; + } + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + return BEGIN_SUBMSG; + default: + // TODO: extensions. + return UPB_CONTINUE; + } +} + +static upb_flow_t upb_msgdef_startsubmsg(void *_b, upb_fielddef *f, + upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: + upb_fielddef_register_FieldDescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: + upb_msgdef_register_DescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + upb_enumdef_register_EnumDescriptorProto(b, h); + return UPB_DELEGATE; + break; + default: + return UPB_SKIPSUBMSG; + } +} + +static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_msgdef_startmsg, + &upb_msgdef_endmsg, + &upb_msgdef_value, + &upb_msgdef_startsubmsg, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b, &b->status); +} + +static void upb_msgdef_free(upb_msgdef *m) +{ + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_fielddef_free(upb_msg_iter_field(i)); + upb_strtable_free(&m->ntof); + upb_inttable_free(&m->itof); + upb_def_uninit(&m->base); + free(m); +} + +static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { + (void)m; + if(f->owned) upb_def_unref(f->def); + f->def = def; + // We will later make the ref unowned if it is a part of a cycle. + f->owned = true; + upb_def_ref(def); +} + +upb_msg_iter upb_msg_begin(upb_msgdef *m) { + return upb_inttable_begin(&m->itof); +} + +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { + return upb_inttable_next(&m->itof, &iter->e); +} + +/* upb_symtab adding defs *****************************************************/ + +// This is a self-contained group of functions that, given a list of upb_defs +// whose references are not yet resolved, resolves references and adds them +// atomically to a upb_symtab. + +typedef struct { + upb_strtable_entry e; + upb_def *def; +} upb_symtab_ent; + +// Given a symbol and the base symbol inside which it is defined, find the +// symbol's definition in t. +static upb_symtab_ent *upb_resolve(upb_strtable *t, + upb_string *base, upb_string *sym) +{ + if(upb_string_len(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN || + upb_string_len(sym) == 0) return NULL; + + if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) { + // Symbols starting with '.' are absolute, so we do a single lookup. + // Slice to omit the leading '.' + upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1); + upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); + upb_string_unref(sym_str); + return e; + } else { + // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. + upb_string *sym_str = upb_string_new(); + int baselen = upb_string_len(base); + while(1) { + // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym + upb_strlen_t len = baselen + upb_string_len(sym) + 1; + char *buf = upb_string_getrwbuf(sym_str, len); + memcpy(buf, upb_string_getrobuf(base), baselen); + buf[baselen] = UPB_SYMBOL_SEPARATOR; + memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym)); + + upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); + if (e) return e; + else if(baselen == 0) return NULL; // No more scopes to try. + + baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); + } + } +} + +// Performs a pass over the type graph to find all cycles that include m. +static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) +{ + if(depth > UPB_MAX_TYPE_DEPTH) { + // We have found a non-cyclic path from the base of the type tree that + // exceeds the maximum allowed depth. There are many situations in upb + // where we recurse over the type tree (like for example, right now) and an + // absurdly deep tree could cause us to stack overflow on systems with very + // limited stacks. + upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at " + "depth %d in the type graph, which exceeds the maximum type " + "depth of %d.", UPB_UPCAST(m)->fqname, depth, + UPB_MAX_TYPE_DEPTH); + return false; + } else if(UPB_UPCAST(m)->search_depth == 1) { + // Cycle! + int cycle_len = depth - 1; + if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { + upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved " + "in a cycle of length %d, which exceeds the maximum type " + "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, + UPB_MAX_TYPE_CYCLE_LEN); + return false; + } + return true; + } else if(UPB_UPCAST(m)->search_depth > 0) { + // This was a cycle, but did not originate from the base of our search tree. + // We'll find it when we call find_cycles() on this node directly. + return false; + } else { + UPB_UPCAST(m)->search_depth = ++depth; + bool cycle_found = false; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if(!upb_issubmsg(f)) continue; + upb_def *sub_def = f->def; + upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); + if(upb_symtab_findcycles(sub_m, depth, status)) { + cycle_found = true; + UPB_UPCAST(m)->is_cyclic = true; + if(f->owned) { + upb_atomic_unref(&sub_def->refcount); + f->owned = false; + } + } + } + UPB_UPCAST(m)->search_depth = 0; + return cycle_found; + } +} + +// Given a table of pending defs "tmptab" and a table of existing defs "symtab", +// resolves all of the unresolved refs for the defs in tmptab. +bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, + upb_status *status) +{ + upb_symtab_ent *e; + for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + // Type names are resolved relative to the message in which they appear. + upb_string *base = e->e.key; + + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if(!upb_hasdef(f)) continue; // No resolving necessary. + upb_string *name = upb_downcast_unresolveddef(f->def)->name; + + // Resolve from either the tmptab (pending adds) or symtab (existing + // defs). If both exist, prefer the pending add, because it will be + // overwriting the existing def. + upb_symtab_ent *found; + if(!(found = upb_resolve(tmptab, base, name)) && + !(found = upb_resolve(symtab, base, name))) { + upb_seterr(status, UPB_ERROR, + "could not resolve symbol '" UPB_STRFMT "'" + " in context '" UPB_STRFMT "'", + UPB_STRARG(name), UPB_STRARG(base)); + return false; + } + + // Check the type of the found def. + upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + if(found->def->type != expected) { + upb_seterr(status, UPB_ERROR, "Unexpected type"); + return false; + } + upb_msgdef_resolve(m, f, found->def); + } + } + + // Deal with type cycles. + for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + // The findcycles() call will decrement the external refcount of the + upb_symtab_findcycles(m, 0, status); + upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); + } + + return true; +} + +// Given a list of defs, a list of extensions (in the future), and a flag +// indicating whether the new defs can overwrite existing defs in the symtab, +// attempts to add the given defs to the symtab. The whole operation either +// succeeds or fails. Ownership of "defs" and "exts" is taken. +bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, + bool allow_redef, upb_status *status) +{ + upb_rwlock_wrlock(&s->lock); + + // Build a table of the defs we mean to add, for duplicate detection and name + // resolution. + upb_strtable tmptab; + upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent)); + for (int i = 0; i < num_defs; i++) { + upb_def *def = defs[i]; + upb_symtab_ent e = {{def->fqname, 0}, def}; + + // Redefinition is never allowed within a single FileDescriptorSet. + // Additionally, we only allow overwriting of an existing definition if + // allow_redef is set. + if (upb_strtable_lookup(&tmptab, def->fqname) || + (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { + upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT, + UPB_STRARG(def->fqname)); + goto err; + } + + // Pass ownership from the deflist to the strtable. + upb_strtable_insert(&tmptab, &e.e); + defs[i] = NULL; + } + + // TODO: process the list of extensions by modifying entries from + // tmptab in-place (copying them from the symtab first if necessary). + + if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err; + + // The defs in tmptab have been vetted, and can be added to the symtab + // without causing errors. Now add all tmptab defs to the symtab, + // overwriting (and releasing a ref on) any existing defs with the same + // names. Ownership for tmptab defs passes from the tmptab to the symtab. + upb_symtab_ent *tmptab_e; + for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e; + tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) { + upb_symtab_ent *symtab_e = + upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); + if(symtab_e) { + upb_def_unref(symtab_e->def); + symtab_e->def = tmptab_e->def; + } else { + upb_strtable_insert(&s->symtab, &tmptab_e->e); + } + } + + upb_rwlock_unlock(&s->lock); + upb_strtable_free(&tmptab); + return true; + +err: + // We need to free all defs from "tmptab." + upb_rwlock_unlock(&s->lock); + for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; + e = upb_strtable_next(&tmptab, &e->e)) { + upb_def_unref(e->def); + } + upb_strtable_free(&tmptab); + for (int i = 0; i < num_defs; i++) upb_def_unref(defs[i]); + return false; +} + + +/* upb_symtab public interface ************************************************/ + +upb_symtab *upb_symtab_new() +{ + upb_symtab *s = malloc(sizeof(*s)); + upb_atomic_refcount_init(&s->refcount, 1); + upb_rwlock_init(&s->lock); + upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); + s->fds_msgdef = NULL; + return s; +} + +static void upb_free_symtab(upb_strtable *t) +{ + upb_symtab_ent *e; + for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) + upb_def_unref(e->def); + upb_strtable_free(t); +} + +void _upb_symtab_free(upb_symtab *s) +{ + upb_free_symtab(&s->symtab); + upb_rwlock_destroy(&s->lock); + free(s); +} + +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) +{ + upb_rwlock_rdlock(&s->lock); + int total = upb_strtable_count(&s->symtab); + // We may only use part of this, depending on how many symbols are of the + // correct type. + upb_def **defs = malloc(sizeof(*defs) * total); + upb_symtab_ent *e = upb_strtable_begin(&s->symtab); + int i = 0; + for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { + upb_def *def = e->def; + assert(def); + if(type == UPB_DEF_ANY || def->type == type) + defs[i++] = def; + } + upb_rwlock_unlock(&s->lock); + *count = i; + for(i = 0; i < *count; i++) + upb_def_ref(defs[i]); + return defs; +} + +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) +{ + upb_rwlock_rdlock(&s->lock); + upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); + upb_def *ret = NULL; + if(e) { + ret = e->def; + upb_def_ref(ret); + } + upb_rwlock_unlock(&s->lock); + return ret; +} + + +upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) { + upb_rwlock_rdlock(&s->lock); + upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol); + upb_def *ret = NULL; + if(e) { + ret = e->def; + upb_def_ref(ret); + } + upb_rwlock_unlock(&s->lock); + return ret; +} + +void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) +{ + upb_defbuilder b; + upb_defbuilder_init(&b); + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_defbuilder_register_FileDescriptorSet(&b, &handlers); + upb_src_sethandlers(src, &handlers); + upb_src_run(src, status); + if (upb_ok(status)) + upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); + upb_defbuilder_uninit(&b); + upb_handlers_uninit(&handlers); +} + + +/* upb_baredecoder ************************************************************/ + +// upb_baredecoder is a upb_src that can parse a subset of the protocol buffer +// binary format. It is only used for bootstrapping. It can parse without +// having a upb_msgdef, which is why it is useful for bootstrapping the first +// msgdef. On the downside, it does not support: +// +// * having its input span multiple upb_strings. +// * reading any field of the returned upb_fielddef's except f->number. +// * keeping a pointer to the upb_fielddef* and reading it later (the same +// upb_fielddef is reused over and over). +// * detecting errors in the input (we trust that our input is known-good). +// * skipping the rest of the submessage (UPB_SKIPSUBMSG). +// +// It also does not support any of the follow protobuf features: +// * packed fields. +// * groups. +// * zig-zag-encoded types like sint32 and sint64. +// +// Since it cannot tell the difference between submessages and strings, it +// always reports them as strings first, but if the value callback returns +// UPB_TREAT_AS_SUBMSG this signals to the baredecoder that it should be +// treated like a submessage instead. +// +// TODO: for bootstrapping we should define a slightly different wire format +// that includes enough information to know the precise integer types and +// that distinguishes between strings and submessages. This will allow +// us to get rid of the UPB_TREAT_AS_SUBMSG hack. It will also allow us +// to get rid of the upb_value_setraw() scheme, which would be more +// complicated to support on big-endian machines. + +typedef struct { + upb_src src; + upb_string *input; + upb_strlen_t offset; + upb_dispatcher dispatcher; +} upb_baredecoder; + +static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) +{ + const uint8_t *start = (uint8_t*)upb_string_getrobuf(d->input) + d->offset; + const uint8_t *buf = start; + uint8_t last = 0x80; + uint64_t val = 0; + for(int bitpos = 0; (last & 0x80); buf++, bitpos += 7) + val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; + d->offset += buf - start; + return val; +} + +static uint32_t upb_baredecoder_readv32(upb_baredecoder *d) +{ + return (uint32_t)upb_baredecoder_readv64(d); // Truncate. +} + +static uint64_t upb_baredecoder_readf64(upb_baredecoder *d) +{ + uint64_t val; + memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 8); + d->offset += 8; + return val; +} + +static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) +{ + uint32_t val; + memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 4); + d->offset += 4; + return val; +} + +static void upb_baredecoder_sethandlers(upb_src *src, upb_handlers *handlers) { + upb_baredecoder *d = (upb_baredecoder*)src; + upb_dispatcher_reset(&d->dispatcher, handlers, false); +} + +static void upb_baredecoder_run(upb_src *src, upb_status *status) { + upb_baredecoder *d = (upb_baredecoder*)src; + assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); + upb_string *str = NULL; + upb_strlen_t stack[UPB_MAX_NESTING] = {UPB_STRLEN_MAX}; + upb_strlen_t *top = &stack[0]; + d->offset = 0; + +#define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; + + CHECK(upb_dispatch_startmsg(&d->dispatcher)); + while(d->offset < upb_string_len(d->input)) { + uint32_t key = upb_baredecoder_readv64(d); + upb_fielddef f; + f.number = key >> 3; + upb_wire_type_t wt = key & 0x7; + if(wt == UPB_WIRE_TYPE_DELIMITED) { + uint32_t delim_len = upb_baredecoder_readv32(d); + // We don't know if it's a string or a submessage; deliver first as + // string. + upb_string_recycle(&str); + upb_string_substr(str, d->input, d->offset, delim_len); + upb_value v; + upb_value_setstr(&v, str); + upb_flow_t ret = upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(ret); + if(ret == BEGIN_SUBMSG) { + // Should deliver as a submessage instead. + CHECK(upb_dispatch_startsubmsg(&d->dispatcher, &f)); + *(++top) = d->offset + delim_len; + } else { + d->offset += delim_len; + } + } else { + upb_value v; + switch(wt) { + case UPB_WIRE_TYPE_VARINT: + upb_value_setraw(&v, upb_baredecoder_readv64(d)); + break; + case UPB_WIRE_TYPE_64BIT: + upb_value_setraw(&v, upb_baredecoder_readf64(d)); + break; + case UPB_WIRE_TYPE_32BIT: + upb_value_setraw(&v, upb_baredecoder_readf32(d)); + break; + default: + assert(false); + abort(); + } + CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); + } + // Detect end-of-submessage. + while(d->offset >= *top) { + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); + d->offset = *(top--); + } + } + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + upb_string_unref(str); + return; + +err: + upb_copyerr(status, d->dispatcher.top->handlers.status); + upb_string_unref(str); +} + +static upb_baredecoder *upb_baredecoder_new(upb_string *str) { + static upb_src_vtbl vtbl = { + &upb_baredecoder_sethandlers, + &upb_baredecoder_run, + }; + upb_baredecoder *d = malloc(sizeof(*d)); + upb_src_init(&d->src, &vtbl); + d->input = upb_string_getref(str); + d->offset = 0; + upb_dispatcher_init(&d->dispatcher); + return d; +} + +static void upb_baredecoder_free(upb_baredecoder *d) { + upb_string_unref(d->input); + free(d); +} + +static upb_src *upb_baredecoder_src(upb_baredecoder *d) { + return &d->src; +} + +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { + // For the moment we silently decline to perform the operation if the symbols + // already exist in the symtab. Revisit this when we have a better story + // about whether syms in a table can be replaced. + if(symtab->fds_msgdef) upb_def_unref(UPB_UPCAST(symtab->fds_msgdef)); + + upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); + upb_status status = UPB_STATUS_INIT; + upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); + upb_baredecoder_free(decoder); + + if(!upb_ok(&status)) { + // upb itself is corrupt. + upb_printerr(&status); + upb_clearerr(&status); + upb_symtab_unref(symtab); + abort(); + } + upb_def *def = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + if (!def || (symtab->fds_msgdef = upb_dyncast_msgdef(def)) == NULL) { + // upb itself is corrupt. + abort(); + } + upb_def_unref(def); // The symtab already holds a ref on it. + upb_status_uninit(&status); +} + +upb_msgdef *upb_symtab_fds_def(upb_symtab *s) { + assert(s->fds_msgdef != NULL); + upb_def_ref(UPB_UPCAST(s->fds_msgdef)); + return s->fds_msgdef; +} diff --git a/src/upb_def.h b/src/upb_def.h new file mode 100644 index 0000000..28cc258 --- /dev/null +++ b/src/upb_def.h @@ -0,0 +1,362 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009-2011 Joshua Haberman. See LICENSE for details. + * + * Provides a mechanism for loading proto definitions from descriptors, and + * data structures to represent those definitions. These form the protobuf + * schema, and are used extensively throughout upb: + * - upb_msgdef: describes a "message" construct. + * - upb_fielddef: describes a message field. + * - upb_enumdef: describes an enum. + * (TODO: definitions of extensions and services). + * + * Defs are obtained from a upb_symtab object. A upb_symtab is empty when + * constructed, and definitions can be added by supplying descriptors. + * + * Defs are immutable and reference-counted. Symbol tables reference any defs + * that are the "current" definitions. If an extension is loaded that adds a + * field to an existing message, a new msgdef is constructed that includes the + * new field and the old msgdef is unref'd. The old msgdef will still be ref'd + * by messages (if any) that were constructed with that msgdef. + * + * This file contains routines for creating and manipulating the definitions + * themselves. To create and manipulate actual messages, see upb_msg.h. + */ + +#ifndef UPB_DEF_H_ +#define UPB_DEF_H_ + +#include "upb_atomic.h" +#include "upb_stream.h" +#include "upb_table.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_def: base class for defs **********************************************/ + +// All the different kind of defs we support. These correspond 1:1 with +// declarations in a .proto file. +typedef enum { + UPB_DEF_MSG = 0, + UPB_DEF_ENUM, + UPB_DEF_SVC, + UPB_DEF_EXT, + // Internal-only, placeholder for a def that hasn't be resolved yet. + UPB_DEF_UNRESOLVED, + + // For specifying that defs of any type are requsted from getdefs. + UPB_DEF_ANY = -1 +} upb_deftype; + +// This typedef is more space-efficient than declaring an enum var directly. +typedef int8_t upb_deftype_t; + +typedef struct { + upb_string *fqname; // Fully qualified. + upb_atomic_refcount_t refcount; + upb_deftype_t type; + + // The is_cyclic flag could go in upb_msgdef instead of here, because only + // messages can be involved in cycles. However, putting them here is free + // from a space perspective because structure alignment will otherwise leave + // three bytes empty after type. It is also makes ref and unref more + // efficient, because we don't have to downcast to msgdef before checking the + // is_cyclic flag. + bool is_cyclic; + uint16_t search_depth; // Used during initialization dfs. +} upb_def; + +// These must not be called directly! +void _upb_def_cyclic_ref(upb_def *def); +void _upb_def_reftozero(upb_def *def); + +// Call to ref/deref a def. +INLINE void upb_def_ref(upb_def *def) { + if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); +} +INLINE void upb_def_unref(upb_def *def) { + if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); +} + +/* upb_fielddef ***************************************************************/ + +// A upb_fielddef describes a single field in a message. It isn't a full def +// in the sense that it derives from upb_def. It cannot stand on its own; it +// is either a field of a upb_msgdef or contained inside a upb_extensiondef. +// It is also reference-counted. +typedef struct _upb_fielddef { + upb_value default_value; + + upb_string *name; + + struct _upb_msgdef *msgdef; + + // For the case of an enum or a submessage, points to the def for that type. + upb_def *def; + + upb_atomic_refcount_t refcount; + uint32_t byte_offset; // Where in a upb_msg to find the data. + + // These are set only when this fielddef is part of a msgdef. + upb_field_number_t number; + upb_field_count_t field_index; // Indicates set bit. + + upb_fieldtype_t type; + upb_label_t label; + // True if we own a ref on "def" (above). This is true unless this edge is + // part of a cycle. + bool owned; + uint8_t set_bit_mask; + uint16_t set_bit_offset; +} upb_fielddef; + +// A variety of tests about the type of a field. +INLINE bool upb_issubmsg(upb_fielddef *f) { + return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE); +} +INLINE bool upb_isstring(upb_fielddef *f) { + return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES); +} +INLINE bool upb_isarray(upb_fielddef *f) { + return f->label == UPB_LABEL(REPEATED); +} +// Does the type of this field imply that it should contain an associated def? +INLINE bool upb_hasdef(upb_fielddef *f) { + return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); +} + +INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) { + if (upb_isarray(f)) { + return UPB_VALUETYPE_ARRAY; + } else { + return f->type; + } +} + +INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) { + assert(upb_isarray(f)); + return f->type; +} + +INLINE bool upb_field_ismm(upb_fielddef *f) { + return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); +} + +INLINE bool upb_elem_ismm(upb_fielddef *f) { + return upb_isstring(f) || upb_issubmsg(f); +} + +/* upb_msgdef *****************************************************************/ + +// Structure that describes a single .proto message type. +typedef struct _upb_msgdef { + upb_def base; + upb_atomic_refcount_t cycle_refcount; + uint32_t size; + uint32_t set_flags_bytes; + + // Tables for looking up fields by number and name. + upb_inttable itof; // int to field + upb_strtable ntof; // name to field +} upb_msgdef; + +// Hash table entries for looking up fields by name or number. +typedef struct { + upb_inttable_entry e; + upb_fielddef *f; +} upb_itof_ent; +typedef struct { + upb_strtable_entry e; + upb_fielddef *f; +} upb_ntof_ent; + +// Looks up a field by name or number. While these are written to be as fast +// as possible, it will still be faster to cache the results of this lookup if +// possible. These return NULL if no such field is found. +INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) { + upb_itof_ent *e = + (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e)); + return e ? e->f : NULL; +} + +INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { + upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); + return e ? e->f : NULL; +} + +INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) { + return upb_strtable_count(&m->ntof); +} + +// Iteration over fields. The order is undefined. +// upb_msg_iter i; +// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { +// upb_fielddef *f = upb_msg_iter_field(i); +// // ... +// } +typedef upb_itof_ent *upb_msg_iter; + +upb_msg_iter upb_msg_begin(upb_msgdef *m); +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter); +INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; } + +INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { + return iter->f; +} + +/* upb_enumdef ****************************************************************/ + +typedef struct _upb_enumdef { + upb_def base; + upb_strtable ntoi; + upb_inttable iton; +} upb_enumdef; + +typedef struct { + upb_strtable_entry e; + uint32_t value; +} upb_ntoi_ent; + +typedef struct { + upb_inttable_entry e; + upb_string *string; +} upb_iton_ent; + +typedef int32_t upb_enumval_t; + +// Lookups from name to integer and vice-versa. +bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); +// Caller does not own a ref on the returned string. +upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); + +// Iteration over name/value pairs. The order is undefined. +// upb_enum_iter i; +// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { +// // ... +// } +typedef upb_iton_ent *upb_enum_iter; + +upb_enum_iter upb_enum_begin(upb_enumdef *e); +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter); +INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; } + +INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { + return iter->string; +} +INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { + return iter->e.key; +} + + +/* upb_symtab *****************************************************************/ + +// A SymbolTable is where upb_defs live. It is empty when first constructed. +// Clients add definitions to the symtab by supplying unserialized or +// serialized descriptors (as defined in descriptor.proto). +struct _upb_symtab { + upb_atomic_refcount_t refcount; + upb_rwlock_t lock; // Protects all members except the refcount. + upb_strtable symtab; // The symbol table. + upb_msgdef *fds_msgdef; // Msgdef for google.protobuf.FileDescriptorSet. +}; +typedef struct _upb_symtab upb_symtab; + +// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd +// when the caller is done with them. +upb_symtab *upb_symtab_new(void); +void _upb_symtab_free(upb_symtab *s); // Must not be called directly! + +INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); } +INLINE void upb_symtab_unref(upb_symtab *s) { + if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); +} + +// Resolves the given symbol using the rules described in descriptor.proto, +// namely: +// +// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like +// scoping rules are used to find the type (i.e. first the nested types +// within this message are searched, then within the parent, on up to the +// root namespace). +// +// If a def is found, the caller owns one ref on the returned def. Otherwise +// returns NULL. +upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); + +// Find an entry in the symbol table with this exact name. If a def is found, +// the caller owns one ref on the returned def. Otherwise returns NULL. +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); + +// Gets an array of pointers to all currently active defs in this symtab. The +// caller owns the returned array (which is of length *count) as well as a ref +// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are +// returned, otherwise only defs of the required type are returned. +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type); + +// "fds" is a upb_src that will yield data from the +// google.protobuf.FileDescriptorSet message type. It is not necessary that +// the upb_def for FileDescriptorSet came from this symtab, but it must be +// compatible with the official descriptor.proto, as published by Google. +// +// upb_symtab_addfds() adds all the definitions from the given +// FileDescriptorSet and adds them to the symtab. status indicates whether the +// operation was successful or not, and the error message (if any). +// +// TODO: should this allow redefinition? Either is possible, but which is +// more useful? Maybe it should be an option. +void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); + +// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab. +// This is necessary for bootstrapping, since these are the upb_defs that +// specify other defs and allow them to be loaded. +void upb_symtab_add_descriptorproto(upb_symtab *s); + +// Returns the upb_msgdef for google.protobuf.FileDescriptorSet, which the +// caller owns a ref on. This is a convenience method that is equivalent to +// looking up the symbol called "google.protobuf.FileDescriptorSet" yourself, +// except that it only will return a def that was added by +// upb_symtab_add_descriptorproto(). +upb_msgdef *upb_symtab_fds_def(upb_symtab *s); + + +/* upb_def casts **************************************************************/ + +// Dynamic casts, for determining if a def is of a particular type at runtime. +#define UPB_DYNAMIC_CAST_DEF(lower, upper) \ + struct _upb_ ## lower; /* Forward-declare. */ \ + INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \ + if(def->type != UPB_DEF_ ## upper) return NULL; \ + return (struct _upb_ ## lower*)def; \ + } +UPB_DYNAMIC_CAST_DEF(msgdef, MSG); +UPB_DYNAMIC_CAST_DEF(enumdef, ENUM); +UPB_DYNAMIC_CAST_DEF(svcdef, SVC); +UPB_DYNAMIC_CAST_DEF(extdef, EXT); +UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED); +#undef UPB_DYNAMIC_CAST_DEF + +// Downcasts, for when some wants to assert that a def is of a particular type. +// These are only checked if we are building debug. +#define UPB_DOWNCAST_DEF(lower, upper) \ + struct _upb_ ## lower; /* Forward-declare. */ \ + INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \ + assert(def->type == UPB_DEF_ ## upper); \ + return (struct _upb_ ## lower*)def; \ + } +UPB_DOWNCAST_DEF(msgdef, MSG); +UPB_DOWNCAST_DEF(enumdef, ENUM); +UPB_DOWNCAST_DEF(svcdef, SVC); +UPB_DOWNCAST_DEF(extdef, EXT); +UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); +#undef UPB_DOWNCAST_DEF + +#define UPB_UPCAST(ptr) (&(ptr)->base) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_DEF_H_ */ diff --git a/src/upb_encoder.c b/src/upb_encoder.c new file mode 100644 index 0000000..304a423 --- /dev/null +++ b/src/upb_encoder.c @@ -0,0 +1,420 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_encoder.h" + +#include +#include "descriptor.h" + +/* Functions for calculating sizes of wire values. ****************************/ + +static size_t upb_v_uint64_t_size(uint64_t val) { +#ifdef __GNUC__ + int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. +#else + int high_bit = 0; + uint64_t tmp = val; + while(tmp >>= 1) high_bit++; +#endif + return val == 0 ? 1 : high_bit / 7 + 1; +} + +static size_t upb_v_int32_t_size(int32_t val) { + // v_uint32's are sign-extended to maintain wire compatibility with int64s. + return upb_v_uint64_t_size((int64_t)val); +} +static size_t upb_v_uint32_t_size(uint32_t val) { + return upb_v_uint64_t_size(val); +} +static size_t upb_f_uint64_t_size(uint64_t val) { + (void)val; // Length is independent of value. + return sizeof(uint64_t); +} +static size_t upb_f_uint32_t_size(uint32_t val) { + (void)val; // Length is independent of value. + return sizeof(uint32_t); +} + + +/* Functions to write wire values. ********************************************/ + +// Since we know in advance the longest that the value could be, we always make +// sure that our buffer is long enough. This saves us from having to perform +// bounds checks. + +// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT). +static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val) +{ + do { + uint8_t byte = val & 0x7f; + val >>= 7; + if(val) byte |= 0x80; + *buf++ = byte; + } while(val); + return buf; +} + +// Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits. +static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val) +{ + return upb_put_v_uint64_t(buf, val); +} + +// Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to +// maintain wire-compatibility with 64-bit signed integers. +static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val) +{ + return upb_put_v_uint64_t(buf, (int64_t)val); +} + +static void upb_put32(uint8_t *buf, uint32_t val) { + buf[0] = val & 0xff; + buf[1] = (val >> 8) & 0xff; + buf[2] = (val >> 16) & 0xff; + buf[3] = (val >> 24); +} + +// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). +static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val) +{ + uint8_t *uint32_end = buf + sizeof(uint32_t); +#if UPB_UNALIGNED_READS_OK + *(uint32_t*)buf = val; +#else + upb_put32(buf, val); +#endif + return uint32_end; +} + +// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). +static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val) +{ + uint8_t *uint64_end = buf + sizeof(uint64_t); +#if UPB_UNALIGNED_READS_OK + *(uint64_t*)buf = val; +#else + upb_put32(buf, (uint32_t)val); + upb_put32(buf, (uint32_t)(val >> 32)); +#endif + return uint64_end; +} + +/* Functions to write and calculate sizes for .proto values. ******************/ + +// Performs zig-zag encoding, which is used by sint32 and sint64. +static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } +static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } + +/* Use macros to define a set of two functions for each .proto type: + * + * // Converts and writes a .proto value into buf. "end" indicates the end + * // of the current available buffer (if the buffer does not contain enough + * // space UPB_STATUS_NEED_MORE_DATA is returned). On success, *outbuf will + * // point one past the data that was written. + * uint8_t *upb_put_INT32(uint8_t *buf, int32_t val); + * + * // Returns the number of bytes required to encode val. + * size_t upb_get_INT32_size(int32_t val); + * + * // Given a .proto value s (source) convert it to a wire value. + * uint32_t upb_vtowv_INT32(int32_t s); + */ + +#define VTOWV(type, wire_t, val_t) \ + static wire_t upb_vtowv_ ## type(val_t s) + +#define PUT(type, v_or_f, wire_t, val_t, member_name) \ + static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \ + wire_t tmp = upb_vtowv_ ## type(val); \ + return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \ + } + +#define T(type, v_or_f, wire_t, val_t, member_name) \ + static size_t upb_get_ ## type ## _size(val_t val) { \ + return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \ + } \ + VTOWV(type, wire_t, val_t); /* prototype for PUT below */ \ + PUT(type, v_or_f, wire_t, val_t, member_name) \ + VTOWV(type, wire_t, val_t) + +T(INT32, v, int32_t, int32_t, int32) { return (uint32_t)s; } +T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; } +T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } +T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } +T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); } +T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzenc_64(s); } +T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } +T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } +T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; } +T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; } +T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; } +T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; } +T(DOUBLE, f, uint64_t, double, _double) { + upb_value v; + v._double = s; + return v.uint64; +} +T(FLOAT, f, uint32_t, float, _float) { + upb_value v; + v._float = s; + return v.uint32; +} +#undef VTOWV +#undef PUT +#undef T + +static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) +{ +#define CASE(t, member_name) \ + case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name); + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + default: assert(false); return buf; + } +#undef CASE +} + +static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v) +{ +#define CASE(t, member_name) \ + case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name); + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + default: assert(false); return 0; + } +#undef CASE +} + +static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num, + upb_wire_type_t wt) +{ + return upb_put_UINT32(buf, wt | (num << 3)); +} + +static uint32_t _upb_get_tag_size(upb_field_number_t num) +{ + return upb_get_UINT32_size(num << 3); +} + + +/* upb_sizebuilder ************************************************************/ + +struct upb_sizebuilder { + // Accumulating size for the current level. + uint32_t size; + + // Stack of sizes for our current nesting. + uint32_t stack[UPB_MAX_NESTING], *top; + + // Vector of sizes. + uint32_t *sizes; + int sizes_len; + int sizes_size; + + upb_status status; +}; + +// upb_sink callbacks. +static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f, + upb_value val, + upb_status *status) +{ + (void)status; + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + uint32_t size = 0; + size += _upb_get_tag_size(f->number); + size += _upb_get_value_size(f->type, val); + sb->size += size; + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end, + upb_status *status) +{ + (void)status; + (void)str; // String data itself is not used. + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(start >= 0) { + uint32_t size = 0; + size += _upb_get_tag_size(f->number); + size += upb_get_UINT32_size(end - start); + sb->size += size; + } + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + (void)status; + (void)f; // Unused (we calculate tag size and delimiter in endcb). + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(f->type == UPB_TYPE(MESSAGE)) { + *sb->top = sb->size; + sb->top++; + sb->size = 0; + } else { + assert(f->type == UPB_TYPE(GROUP)); + sb->size += _upb_get_tag_size(f->number); + } + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + (void)status; + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(f->type == UPB_TYPE(MESSAGE)) { + sb->top--; + if(sb->sizes_len == sb->sizes_size) { + sb->sizes_size *= 2; + sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes)); + } + uint32_t child_size = sb->size; + uint32_t parent_size = *sb->top; + sb->sizes[sb->sizes_len++] = child_size; + // The size according to the parent includes the tag size and delimiter of + // the submessage. + parent_size += upb_get_UINT32_size(child_size); + parent_size += _upb_get_tag_size(f->number); + // Include size accumulated in parent before child began. + sb->size = child_size + parent_size; + } else { + assert(f->type == UPB_TYPE(GROUP)); + // As an optimization, we could just add this number twice in startcb, to + // avoid having to recalculate it. + sb->size += _upb_get_tag_size(f->number); + } + return UPB_SINK_CONTINUE; +} + +upb_sink_callbacks _upb_sizebuilder_sink_vtbl = { + _upb_sizebuilder_valuecb, + _upb_sizebuilder_strcb, + _upb_sizebuilder_startcb, + _upb_sizebuilder_endcb +}; + + +/* upb_sink callbacks *********************************************************/ + +struct upb_encoder { + upb_sink base; + //upb_bytesink *bytesink; + uint32_t *sizes; + int size_offset; +}; + + +// Within one callback we may need to encode up to two separate values. +#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2) + +static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf, + size_t len, upb_status *status) +{ + // TODO: conjure a upb_strptr that points to buf. + //upb_strptr ptr; + (void)s; + (void)buf; + (void)status; + size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr); + if(written < len) { + // TODO: mark to skip "written" bytes next time. + return UPB_SINK_STOP; + } else { + return UPB_SINK_CONTINUE; + } +} + +static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f, + upb_value val, upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + upb_wire_type_t wt = upb_types[f->type].expected_wire_type; + // TODO: handle packed encoding. + ptr = _upb_put_tag(ptr, f->number, wt); + ptr = upb_encode_value(ptr, f->type, val); + return _upb_encoder_push_buf(s, buf, ptr - buf, status); +} + +static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end, + upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + if(start >= 0) { + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); + ptr = upb_put_UINT32(ptr, end - start); + } + // TODO: properly handle partially consumed strings and partially supplied + // strings. + _upb_encoder_push_buf(s, buf, ptr - buf, status); + return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status); +} + +static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + if(f->type == UPB_TYPE(GROUP)) { + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP); + } else { + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); + ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]); + } + return _upb_encoder_push_buf(s, buf, ptr - buf, status); +} + +static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE; + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP); + return _upb_encoder_push_buf(s, buf, ptr - buf, status); +} + +upb_sink_callbacks _upb_encoder_sink_vtbl = { + _upb_encoder_valuecb, + _upb_encoder_strcb, + _upb_encoder_startcb, + _upb_encoder_endcb +}; + diff --git a/src/upb_encoder.h b/src/upb_encoder.h new file mode 100644 index 0000000..e879b0b --- /dev/null +++ b/src/upb_encoder.h @@ -0,0 +1,56 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Implements a upb_sink that writes protobuf data to the binary wire format. + * + * For messages that have any submessages, the encoder needs a buffer + * containing the submessage sizes, so they can be properly written at the + * front of each message. Note that groups do *not* have this requirement. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_ENCODER_H_ +#define UPB_ENCODER_H_ + +#include "upb.h" +#include "upb_srcsink.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_encoder ****************************************************************/ + +// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol +// buffer binary wire format. +struct upb_encoder; +typedef struct upb_encoder upb_encoder; + +upb_encoder *upb_encoder_new(upb_msgdef *md); +void upb_encoder_free(upb_encoder *e); + +// Resets the given upb_encoder such that is is ready to begin encoding, +// outputting data to "bytesink" (which must live until the encoder is +// reset or destroyed). +void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink); + +// Returns the upb_sink to which data can be written. The sink is invalidated +// when the encoder is reset or destroyed. Note that if the client wants to +// encode any length-delimited submessages it must first call +// upb_encoder_buildsizes() below. +upb_sink *upb_encoder_sink(upb_encoder *e); + +// Call prior to pushing any data with embedded submessages. "src" must yield +// exactly the same data as what will next be encoded, but in reverse order. +// The encoder iterates over this data in order to determine the sizes of the +// submessages. If any errors are returned by the upb_src, the status will +// be saved in *status. If the client is sure that the upb_src will not throw +// any errors, "status" may be NULL. +void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_ENCODER_H_ */ diff --git a/src/upb_glue.c b/src/upb_glue.c new file mode 100644 index 0000000..541827e --- /dev/null +++ b/src/upb_glue.c @@ -0,0 +1,54 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_glue.h" +#include "upb_msg.h" +#include "upb_decoder.h" +#include "upb_strstream.h" + +void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md, + upb_status *status) { + upb_stringsrc strsrc; + upb_stringsrc_init(&strsrc); + upb_stringsrc_reset(&strsrc, str); + + upb_decoder d; + upb_decoder_init(&d, md); + upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc)); + upb_src *src = upb_decoder_src(&d); + + upb_msgpopulator p; + upb_msgpopulator_init(&p); + upb_msgpopulator_reset(&p, msg, md); + + upb_handlers h; + upb_handlers_init(&h); + upb_msgpopulator_register_handlers(&p, &h); + upb_src_sethandlers(src, &h); + + upb_src_run(src, status); + + upb_stringsrc_uninit(&strsrc); + upb_decoder_uninit(&d); + upb_msgpopulator_uninit(&p); + upb_handlers_uninit(&h); +} + +void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) { + upb_stringsrc strsrc; + upb_stringsrc_init(&strsrc); + upb_stringsrc_reset(&strsrc, str); + + upb_decoder d; + upb_msgdef *fds_msgdef = upb_symtab_fds_def(symtab); + upb_decoder_init(&d, fds_msgdef); + upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc)); + + upb_symtab_addfds(symtab, upb_decoder_src(&d), status); + upb_stringsrc_uninit(&strsrc); + upb_decoder_uninit(&d); + upb_def_unref(UPB_UPCAST(fds_msgdef)); +} diff --git a/src/upb_glue.h b/src/upb_glue.h new file mode 100644 index 0000000..ca32436 --- /dev/null +++ b/src/upb_glue.h @@ -0,0 +1,46 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * upb's core components like upb_decoder and upb_msg are carefully designed to + * avoid depending on each other for maximum orthogonality. In other words, + * you can use a upb_decoder to decode into *any* kind of structure; upb_msg is + * just one such structure. You can use upb_decoder without having to link in + * upb_msg. + * + * However, for convenience we provide functions here for doing common + * operations like deserializing protobuf binary format into a upb_msg. The + * compromise is that this file drags in almost all of upb as a dependency, + * which could be undesirable if you're trying to use a trimmed-down build of + * upb. + * + * Copyright (c) 2011 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_GLUE_H +#define UPB_GLUE_H + +#ifdef __cplusplus +extern "C" { +#endif + +// Forward-declares so we don't have to include everything in this .h file. +// Clients should use the regular, typedef'd names (eg. upb_string). +struct _upb_msg; +struct _upb_msgdef; +struct _upb_status; +struct _upb_string; +struct _upb_symtab; + +// Decodes the given string, which must be in protobuf binary format, to the +// given upb_msg with msgdef "md", storing the status of the operation in "s". +void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg, + struct _upb_msgdef *md, struct _upb_status *s); + +void upb_parsedesc(struct _upb_symtab *symtab, struct _upb_string *str, + struct _upb_status *status); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_msg.c b/src/upb_msg.c new file mode 100644 index 0000000..9dfbea4 --- /dev/null +++ b/src/upb_msg.c @@ -0,0 +1,253 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * Data structure for storing a message of protobuf data. + */ + +#include "upb_msg.h" +#include "upb_decoder.h" +#include "upb_strstream.h" + +static uint32_t upb_round_up_pow2(uint32_t v) { + // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +static void upb_elem_free(upb_value v, upb_fielddef *f) { + switch(f->type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + _upb_msg_free(upb_value_getmsg(v), upb_downcast_msgdef(f->def)); + break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + _upb_string_free(upb_value_getstr(v)); + break; + default: + abort(); + } +} + +static void upb_elem_unref(upb_value v, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + upb_atomic_refcount_t *refcount = upb_value_getrefcount(v); + if (refcount && upb_atomic_unref(refcount)) + upb_elem_free(v, f); +} + +static void upb_field_free(upb_value v, upb_fielddef *f) { + if (upb_isarray(f)) { + _upb_array_free(upb_value_getarr(v), f); + } else { + upb_elem_free(v, f); + } +} + +static void upb_field_unref(upb_value v, upb_fielddef *f) { + assert(upb_field_ismm(f)); + upb_atomic_refcount_t *refcount = upb_value_getrefcount(v); + if (refcount && upb_atomic_unref(refcount)) + upb_field_free(v, f); +} + + +/* upb_array ******************************************************************/ + +upb_array *upb_array_new(void) { + upb_array *arr = malloc(sizeof(*arr)); + upb_atomic_refcount_init(&arr->refcount, 1); + arr->size = 0; + arr->len = 0; + arr->ptr = NULL; + return arr; +} + +void upb_array_recycle(upb_array **_arr, upb_fielddef *f) { + upb_array *arr = *_arr; + if(arr && upb_atomic_only(&arr->refcount)) { + arr->len = 0; + } else { + upb_array_unref(arr, f); + *_arr = upb_array_new(); + } +} + +void _upb_array_free(upb_array *arr, upb_fielddef *f) { + if (upb_elem_ismm(f)) { + // Need to release refs on sub-objects. + upb_valuetype_t type = upb_elem_valuetype(f); + for (upb_arraylen_t i = 0; i < arr->size; i++) { + upb_valueptr p = _upb_array_getptr(arr, f, i); + upb_elem_unref(upb_value_read(p, type), f); + } + } + free(arr->ptr); + free(arr); +} + +void upb_array_resize(upb_array *arr, upb_fielddef *f, upb_arraylen_t len) { + size_t type_size = upb_types[f->type].size; + upb_arraylen_t old_size = arr->size; + if (old_size < len) { + // Need to resize. + size_t new_size = upb_round_up_pow2(len); + arr->ptr = realloc(arr->ptr, new_size * type_size); + arr->size = new_size; + memset(arr->ptr + (old_size * type_size), 0, + (new_size - old_size) * type_size); + } + arr->len = len; +} + + +/* upb_msg ********************************************************************/ + +upb_msg *upb_msg_new(upb_msgdef *md) { + upb_msg *msg = malloc(md->size); + // Clear all set bits and cached pointers. + memset(msg, 0, md->size); + upb_atomic_refcount_init(&msg->refcount, 1); + return msg; +} + +void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { + // Need to release refs on all sub-objects. + upb_msg_iter i; + for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) upb_field_unref(upb_value_read(p, type), f); + } + free(msg); +} + +void upb_msg_recycle(upb_msg **_msg, upb_msgdef *msgdef) { + upb_msg *msg = *_msg; + if(msg && upb_atomic_only(&msg->refcount)) { + upb_msg_clear(msg, msgdef); + } else { + upb_msg_unref(msg, msgdef); + *_msg = upb_msg_new(msgdef); + } +} + +INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { + msg->data[f->set_bit_offset] |= f->set_bit_mask; +} + +static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) { + upb_valueptr p = _upb_msg_getptr(msg, f); + if (upb_isarray(f)) { + // Create/recycle/resize the array if necessary, and find a pointer to + // a newly-appended element. + if (!upb_msg_has(msg, f)) { + upb_array_recycle(p.arr, f); + upb_msg_sethas(msg, f); + } + assert(*p.arr != NULL); + upb_arraylen_t oldlen = upb_array_len(*p.arr); + upb_array_resize(*p.arr, f, oldlen + 1); + p = _upb_array_getptr(*p.arr, f, oldlen); + } + return p; +} + +static void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val) { + upb_valueptr p = upb_msg_getappendptr(msg, f); + if (upb_isstring(f)) { + // We do: + // - upb_string_recycle(), upb_string_substr() instead of + // - upb_string_unref(), upb_string_getref() + // because we can conveniently cache these upb_string objects in the + // upb_msg, whereas the upb_src who is sending us these strings may not + // have a good way of caching them. This saves the upb_src from allocating + // new upb_strings all the time to give us. + // + // If you were using this to copy one upb_msg to another this would + // allocate string objects whereas a upb_string_getref could have avoided + // those allocations completely; if this is an issue, we could make it an + // option of the upb_msgpopulator which behavior is desired. + upb_string *src = upb_value_getstr(val); + upb_string_recycle(p.str); + upb_string_substr(*p.str, src, 0, upb_string_len(src)); + } else { + upb_value_write(p, val, f->type); + } + upb_msg_sethas(msg, f); +} + +upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) { + upb_valueptr p = upb_msg_getappendptr(msg, f); + if (upb_isarray(f) || !upb_msg_has(msg, f)) { + upb_msg_recycle(p.msg, msgdef); + upb_msg_sethas(msg, f); + } + return *p.msg; +} + + +/* upb_msgpopulator ***********************************************************/ + +void upb_msgpopulator_init(upb_msgpopulator *p) { + upb_status_init(&p->status); +} + +void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md) { + p->top = p->stack; + p->limit = p->stack + sizeof(p->stack); + p->top->msg = m; + p->top->msgdef = md; +} + +void upb_msgpopulator_uninit(upb_msgpopulator *p) { + upb_status_uninit(&p->status); +} + +static upb_flow_t upb_msgpopulator_value(void *_p, upb_fielddef *f, upb_value val) { + upb_msgpopulator *p = _p; + upb_msg_appendval(p->top->msg, f, val); + return UPB_CONTINUE; +} + +static upb_flow_t upb_msgpopulator_startsubmsg(void *_p, upb_fielddef *f, + upb_handlers *delegate_to) { + upb_msgpopulator *p = _p; + (void)delegate_to; + upb_msg *oldmsg = p->top->msg; + if (++p->top == p->limit) { + upb_seterr(&p->status, UPB_ERROR, "Exceeded maximum nesting"); + return UPB_BREAK; + } + upb_msgdef *msgdef = upb_downcast_msgdef(f->def); + p->top->msgdef = msgdef; + p->top->msg = upb_msg_appendmsg(oldmsg, f, msgdef); + return UPB_CONTINUE; +} + +static upb_flow_t upb_msgpopulator_endsubmsg(void *_p) { + upb_msgpopulator *p = _p; + --p->top; + return UPB_CONTINUE; +} + +void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h) { + static upb_handlerset handlerset = { + NULL, // startmsg + NULL, // endmsg + &upb_msgpopulator_value, + &upb_msgpopulator_startsubmsg, + &upb_msgpopulator_endsubmsg, + }; + upb_register_handlerset(h, &handlerset); + upb_set_handler_closure(h, p, &p->status); +} diff --git a/src/upb_msg.h b/src/upb_msg.h new file mode 100644 index 0000000..8a3c63f --- /dev/null +++ b/src/upb_msg.h @@ -0,0 +1,232 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. + * + * Data structure for storing a message of protobuf data. Unlike Google's + * protobuf, upb_msg and upb_array are reference counted instead of having + * exclusive ownership of their fields. This is a better match for dynamic + * languages where statements like a.b = other_b are normal. + * + * upb's parsers and serializers could also be used to populate and serialize + * other kinds of message objects (even one generated by Google's protobuf). + */ + +#ifndef UPB_MSG_H +#define UPB_MSG_H + +#include "upb.h" +#include "upb_def.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// A pointer to a .proto value. The owner must have an out-of-band way of +// knowing the type, so it knows which union member to use. +typedef union { + double *_double; + float *_float; + int32_t *int32; + int64_t *int64; + uint8_t *uint8; + uint32_t *uint32; + uint64_t *uint64; + bool *_bool; + upb_string **str; + upb_msg **msg; + upb_array **arr; + void *_void; +} upb_valueptr; + +INLINE upb_valueptr upb_value_addrof(upb_value *val) { + upb_valueptr ptr = {&val->val._double}; + return ptr; +} + +// Reads or writes a upb_value from an address represented by a upb_value_ptr. +// We need to know the value type to perform this operation, because we need to +// know how much memory to copy (and for big-endian machines, we need to know +// where in the upb_value the data goes). +// +// For little endian-machines where we didn't mind overreading, we could make +// upb_value_read simply use memcpy(). +INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { + upb_value val; + +#ifdef NDEBUG +#define CASE(t, member_name) \ + case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break; +#else +#define CASE(t, member_name) \ + case UPB_TYPE(t): val.val.member_name = *ptr.member_name; val.type = upb_types[ft].inmemory_type; break; +#endif + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + case UPB_VALUETYPE_ARRAY: + val.val.arr = *ptr.arr; +#ifndef NDEBUG + val.type = UPB_VALUETYPE_ARRAY; +#endif + break; + default: assert(false); + } + return val; + +#undef CASE +} + +INLINE void upb_value_write(upb_valueptr ptr, upb_value val, + upb_fieldtype_t ft) { + if (ft == UPB_VALUETYPE_ARRAY) { + assert(val.type == UPB_VALUETYPE_ARRAY); + } else { + assert(val.type == upb_types[ft].inmemory_type); + } +#define CASE(t, member_name) \ + case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + case UPB_VALUETYPE_ARRAY: + *ptr.arr = val.val.arr; + break; + default: assert(false); + } + +#undef CASE +} + +/* upb_array ******************************************************************/ + +typedef uint32_t upb_arraylen_t; +struct _upb_array { + upb_atomic_refcount_t refcount; + // "len" and "size" are measured in elements, not bytes. + upb_arraylen_t len; + upb_arraylen_t size; + char *ptr; +}; + +void _upb_array_free(upb_array *a, upb_fielddef *f); +INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, + uint32_t elem) { + upb_valueptr p; + p._void = &a->ptr[elem * upb_types[f->type].size]; + return p; +} + +upb_array *upb_array_new(void); + +INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { + if (a && upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); +} + +void upb_array_recycle(upb_array **arr, upb_fielddef *f); + +INLINE uint32_t upb_array_len(upb_array *a) { + return a->len; +} + +INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f, + upb_arraylen_t i) { + assert(i < upb_array_len(arr)); + return upb_value_read(_upb_array_getptr(arr, f, i), f->type); +} + +/* upb_msg ********************************************************************/ + +struct _upb_msg { + upb_atomic_refcount_t refcount; + uint8_t data[4]; // We allocate the appropriate amount per message. +}; + +void _upb_msg_free(upb_msg *msg, upb_msgdef *md); + +INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { + upb_valueptr p; + p._void = &msg->data[f->byte_offset]; + return p; +} + +// Creates a new msg of the given type. +upb_msg *upb_msg_new(upb_msgdef *md); + +// Unrefs the given message. +INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { + if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); +} + +void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef); + +// Tests whether the given field is explicitly set, or whether it will return a +// default. +INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { + return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0; +} + +INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { + return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f)); +} + +// Unsets all field values back to their defaults. +INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { + memset(msg->data, 0, md->set_flags_bytes); +} + +typedef struct { + upb_msg *msg; + upb_msgdef *msgdef; +} upb_msgpopulator_frame; + +typedef struct { + upb_msgpopulator_frame stack[UPB_MAX_NESTING], *top, *limit; + upb_status status; +} upb_msgpopulator; + +void upb_msgpopulator_init(upb_msgpopulator *p); +void upb_msgpopulator_uninit(upb_msgpopulator *p); +void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md); +void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_stdio.c b/src/upb_stdio.c new file mode 100644 index 0000000..8857677 --- /dev/null +++ b/src/upb_stdio.c @@ -0,0 +1,104 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_stdio.h" + +#include +#include +#include "upb_string.h" + +// We can make this configurable if necessary. +#define BLOCK_SIZE 4096 + +struct upb_stdio { + upb_bytesrc bytesrc; + upb_bytesink bytesink; + FILE *file; +}; + +void upb_stdio_reset(upb_stdio *stdio, FILE* file) { + stdio->file = file; +} + + +/* upb_bytesrc methods ********************************************************/ + +static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, + upb_strlen_t count, upb_status *status) { + upb_stdio *stdio = (upb_stdio*)src; + assert(count > 0); + size_t read = fread(buf, 1, count, stdio->file); + if(read < (size_t)count) { + // Error or EOF. + if(feof(stdio->file)) { + upb_seterr(status, UPB_EOF, ""); + return read; + } else if(ferror(stdio->file)) { + upb_seterr(status, UPB_ERROR, "Error reading from stdio stream."); + return -1; + } + } + return read; +} + +static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + upb_strlen_t read = upb_stdio_read( + src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status); + if (read <= 0) return false; + upb_string_getrwbuf(str, read); + return true; +} + + +/* upb_bytesink methods *******************************************************/ + +upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); + upb_strlen_t len = upb_string_len(str); + upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); + if(written < len) { + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; + } + return written; +} + +upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, + const char *fmt, va_list args) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); + upb_strlen_t written = vfprintf(stdio->file, fmt, args); + if (written < 0) { + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; + } + return written; +} + +upb_stdio *upb_stdio_new() { + static upb_bytesrc_vtbl bytesrc_vtbl = { + upb_stdio_read, + upb_stdio_getstr, + }; + + static upb_bytesink_vtbl bytesink_vtbl = { + NULL, + upb_stdio_putstr, + upb_stdio_vprintf + }; + + upb_stdio *stdio = malloc(sizeof(*stdio)); + upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); + upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); + return stdio; +} + +void upb_stdio_free(upb_stdio *stdio) { + free(stdio); +} + +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; } +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; } diff --git a/src/upb_stdio.h b/src/upb_stdio.h new file mode 100644 index 0000000..fd71fdd --- /dev/null +++ b/src/upb_stdio.h @@ -0,0 +1,42 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file provides upb_bytesrc and upb_bytesink implementations for + * ANSI C stdio. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include +#include "upb_stream.h" + +#ifndef UPB_STDIO_H_ +#define UPB_STDIO_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_stdio; +typedef struct upb_stdio upb_stdio; + +// Creation/deletion. +upb_stdio *upb_stdio_new(); +void upb_stdio_free(upb_stdio *stdio); + +// Reset/initialize the object for use. The src or sink will call +// fread()/fwrite()/etc. on the given FILE*. +void upb_stdio_reset(upb_stdio *stdio, FILE* file); + +// Gets a bytesrc or bytesink for the given stdio. The returned pointer is +// invalidated by upb_stdio_reset above. It is perfectly valid to get both +// a bytesrc and a bytesink for the same stdio if the FILE* is open for reading +// and writing. +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio); +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_stream.h b/src/upb_stream.h new file mode 100644 index 0000000..3f7c843 --- /dev/null +++ b/src/upb_stream.h @@ -0,0 +1,276 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file defines four general-purpose streaming data interfaces. + * + * - upb_handlers: represents a set of callbacks, very much like in XML's SAX + * API, that a client can register to do a streaming tree traversal over a + * stream of structured protobuf data, without knowing where that data is + * coming from. There is only one upb_handlers type (it is not a virtual + * base class), but the object lets you register any set of handlers. + * + * The upb_handlers interface supports delegation: when entering a submessage, + * you can delegate to another set of upb_handlers instead of handling the + * submessage yourself. This allows upb_handlers objects to *compose* -- you + * can implement a set of upb_handlers without knowing or caring whether this + * is the top-level message or not. + * + * The other interfaces are the C equivalent of "virtual base classes" that + * anyone can implement: + * + * - upb_src: an interface that represents a source of streaming protobuf data. + * It lets you register a set of upb_handlers, and then call upb_src_run(), + * which pulls the protobuf data from somewhere and then calls the handlers. + * + * - upb_bytesrc: a pull interface for streams of bytes, basically an + * abstraction of read()/fread(), but it avoids copies where possible. + * + * - upb_bytesink: push interface for streams of bytes, basically an + * abstraction of write()/fwrite(), but it avoids copies where possible. + * + * All of the encoders and decoders are based on these generic interfaces, + * which lets you write streaming algorithms that do not depend on a specific + * serialization format; for example, you can write a pretty printer that works + * with input that came from protobuf binary format, protobuf text format, or + * even an in-memory upb_msg -- the pretty printer will not know the + * difference. + * + * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. + * + */ + +#ifndef UPB_STREAM_H +#define UPB_STREAM_H + +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Forward-declare. We can't include upb_def.h; it would be circular. +struct _upb_fielddef; + +/* upb_handlers ***************************************************************/ + +// upb_handlers define the interface by which a upb_src passes data to a +// upb_sink. + +// Constants that a handler returns to indicate to its caller whether it should +// continue or not. +typedef enum { + // Caller should continue sending values to the sink. + UPB_CONTINUE, + + // Stop processing for now; check status for details. If no status was set, + // a generic error will be returned. If the error is resumable, it is not + // (yet) defined where processing will resume -- waiting for real-world + // examples of resumable decoders and resume-requiring clients. upb_src + // implementations that are not capable of resuming will override the return + // status to be non-resumable if a resumable status was set by the handlers. + UPB_BREAK, + + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIPSUBMSG, + + // When returned from a startsubmsg handler, indicates that the submessage + // should be handled by a different set of handlers, which have been + // registered on the provided upb_handlers object. This allows upb_handlers + // objects to compose; a set of upb_handlers need not know whether it is the + // top-level message or a sub-message. May not be returned from any other + // callback. + UPB_DELEGATE, +} upb_flow_t; + +// upb_handlers +struct _upb_handlers; +typedef struct _upb_handlers upb_handlers; + +typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_endmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_value_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_value val); +typedef upb_flow_t (*upb_startsubmsg_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_handlers *delegate_to); +typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, + upb_field_number_t fieldnum, + upb_value val); + +// An empty set of handlers, for convenient copy/paste: +// +// static upb_flow_t startmsg(void *closure) { +// // Called when the top-level message begins. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t endmsg(void *closure) { +// // Called when the top-level message ends. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) { +// // Called for every value in the stream. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t startsubmsg(void *closure, upb_fielddef *f, +// upb_handlers *delegate_to) { +// // Called when a submessage begins; can delegate by returning UPB_DELEGATE. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t endsubmsg(void *closure) { +// // Called when a submessage ends. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, +// upb_value val) { +// // Called with an unknown value is encountered. +// return UPB_CONTINUE; +// } +// +// // Any handlers you don't need can be set to NULL. +// static upb_handlerset handlers = { +// startmsg, +// endmsg, +// value, +// startsubmsg, +// endsubmsg, +// unknownval, +// }; +typedef struct { + upb_startmsg_handler_t startmsg; + upb_endmsg_handler_t endmsg; + upb_value_handler_t value; + upb_startsubmsg_handler_t startsubmsg; + upb_endsubmsg_handler_t endsubmsg; + upb_unknownval_handler_t unknownval; +} upb_handlerset; + +// Functions to register handlers on a upb_handlers object. +INLINE void upb_handlers_init(upb_handlers *h); +INLINE void upb_handlers_uninit(upb_handlers *h); +INLINE void upb_handlers_reset(upb_handlers *h); +INLINE bool upb_handlers_isempty(upb_handlers *h); +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); + +// TODO: for clients that want to increase efficiency by preventing bytesrcs +// from automatically being converted to strings in the value callback. +// INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); + +// The closure will be passed to every handler. The status will be read by the +// upb_src immediately after a handler has returned UPB_BREAK and used as the +// overall upb_src status; it will not be referenced at any other time. +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, + upb_status *status); + + +/* upb_src ********************************************************************/ + +struct _upb_src; +typedef struct _upb_src upb_src; + +// upb_src_sethandlers() must be called once and only once before upb_src_run() +// is called. This sets up the callbacks that will handle the parse. A +// upb_src that is fully initialized except for the call to +// upb_src_sethandlers() is called "prepared" -- this is useful for library +// functions that want to consume the output of a generic upb_src. +// Calling sethandlers() multiple times is an error and will trigger an abort(). +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); + +// Runs the src, calling the callbacks that were registered with +// upb_src_sethandlers(), and returning the status of the operation in +// "status." The status might indicate UPB_TRYAGAIN (indicating EAGAIN on a +// non-blocking socket) or a resumable error; in both cases upb_src_run can be +// called again later. TRYAGAIN could come from either the src (input buffers +// are empty) or the handlers (output buffers are full). +INLINE void upb_src_run(upb_src *src, upb_status *status); + + +// A convenience object that a upb_src can use to invoke handlers. It +// transparently handles delegation so that the upb_src needs only follow the +// protocol as if delegation did not exist. +struct _upb_dispatcher; +typedef struct _upb_dispatcher upb_dispatcher; +INLINE void upb_dispatcher_init(upb_dispatcher *d); +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h, + bool supports_skip); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f); +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, + upb_value val); +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val); + +/* upb_bytesrc ****************************************************************/ + +// Reads up to "count" bytes into "buf", returning the total number of bytes +// read. If 0, indicates error and puts details in "status". +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count, upb_status *status); + +// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure +// that "str" is created or just recycled. Returns "false" if no data was +// returned, either due to error or EOF (check status for details). +// +// In comparison to upb_bytesrc_read(), this call can possibly alias existing +// string data (which avoids a copy). On the other hand, if the data was *not* +// already in an existing string, this copies it into a upb_string, and if the +// data needs to be put in a specific range of memory (because eg. you need to +// put it into a different kind of string object) then upb_bytesrc_get() could +// save you a copy. +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_status *status); + +// A convenience function for getting all the remaining data in a upb_bytesrc +// as a upb_string. Returns false and sets "status" if the operation fails. +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status); +INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, + upb_status *status) { + return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); +} + + +/* upb_bytesink ***************************************************************/ + +struct _upb_bytesink; +typedef struct _upb_bytesink upb_bytesink; + +// TODO: Figure out how buffering should be handled. Should the caller buffer +// data and only call these functions when a buffer is full? Seems most +// efficient, but then buffering has to be configured in the caller, which +// could be anything, which makes it hard to have a standard interface for +// controlling buffering. +// +// The downside of having the bytesink buffer is efficiency: the caller is +// making more (virtual) function calls, and the caller can't arrange to have +// a big contiguous buffer. The bytesink can do this, but will have to copy +// to make the data contiguous. + +// Returns the number of bytes written. +INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, + const char *fmt, ...); + +// Puts the given string, returning true if the operation was successful, otherwise +// check "status" for details. Ownership of the string is *not* passed; if +// the callee wants a reference he must call upb_string_getref() on it. +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, + upb_status *status); + +#include "upb_stream_vtbl.h" + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_stream_vtbl.h b/src/upb_stream_vtbl.h new file mode 100644 index 0000000..e1f9cb8 --- /dev/null +++ b/src/upb_stream_vtbl.h @@ -0,0 +1,295 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * vtable declarations for types that are implementing any of the src or sink + * interfaces. Only components that are implementing these interfaces need + * to worry about this file. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_SRCSINK_VTBL_H_ +#define UPB_SRCSINK_VTBL_H_ + +#include +#include "upb_stream.h" +#include "upb_string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Typedefs for function pointers to all of the virtual functions. + +// upb_src +typedef void (*upb_src_sethandlers_fptr)(upb_src *src, upb_handlers *handlers); +typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); + +// upb_bytesrc. +typedef upb_strlen_t (*upb_bytesrc_read_fptr)( + upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); +typedef bool (*upb_bytesrc_getstr_fptr)( + upb_bytesrc *src, upb_string *str, upb_status *status); + +// upb_bytesink. +typedef upb_strlen_t (*upb_bytesink_write_fptr)( + upb_bytesink *bytesink, void *buf, upb_strlen_t count); +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str, upb_status *status); +typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( + upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); + +// Vtables for the above interfaces. +typedef struct { + upb_bytesrc_read_fptr read; + upb_bytesrc_getstr_fptr getstr; +} upb_bytesrc_vtbl; + +typedef struct { + upb_bytesink_write_fptr write; + upb_bytesink_putstr_fptr putstr; + upb_bytesink_vprintf_fptr vprintf; +} upb_bytesink_vtbl; + +typedef struct { + upb_src_sethandlers_fptr sethandlers; + upb_src_run_fptr run; +} upb_src_vtbl; + + +// "Base Class" definitions; components that implement these interfaces should +// contain one of these structures. + +struct _upb_bytesrc { + upb_bytesrc_vtbl *vtbl; +}; + +struct _upb_bytesink { + upb_bytesink_vtbl *vtbl; +}; + +struct _upb_src { + upb_src_vtbl *vtbl; +}; + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { + s->vtbl = vtbl; +} + +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { + s->vtbl = vtbl; +} + +INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) { + s->vtbl = vtbl; +} + +// Implementation of virtual function dispatch. + +// upb_src +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers) { + src->vtbl->sethandlers(src, handlers); +} + +INLINE void upb_src_run(upb_src *src, upb_status *status) { + src->vtbl->run(src, status); +} + +// upb_bytesrc +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count, upb_status *status) { + return src->vtbl->read(src, buf, count, status); +} + +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + return src->vtbl->getstr(src, str, status); +} + +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + // We start with a getstr, because that could possibly alias data instead of + // copying. + if (!upb_bytesrc_getstr(src, str, status)) return false; + // Trade-off between number of read calls and amount of overallocation. + const size_t bufsize = 4096; + do { + upb_strlen_t len = upb_string_len(str); + char *buf = upb_string_getrwbuf(str, len + bufsize); + upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize, status); + if (read < 0) return false; + // Resize to proper size. + upb_string_getrwbuf(str, len + read); + } while (!status->code != UPB_EOF); + return true; +} + + +// upb_bytesink +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count) { + return sink->vtbl->write(sink, buf, count); +} + +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { + return sink->vtbl->putstr(sink, str, status); +} + +INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); + va_end(args); + return ret; +} + +// upb_handlers +struct _upb_handlers { + upb_handlerset *set; + void *closure; + upb_status *status; // We don't own this. +}; + +INLINE void upb_handlers_init(upb_handlers *h) { + (void)h; +} +INLINE void upb_handlers_uninit(upb_handlers *h) { + (void)h; +} + +INLINE void upb_handlers_reset(upb_handlers *h) { + h->set = NULL; + h->closure = NULL; +} + +INLINE bool upb_handlers_isempty(upb_handlers *h) { + return !h->set && !h->closure; +} + +INLINE upb_flow_t upb_nop(void *closure) { + (void)closure; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_value_nop(void *closure, struct _upb_fielddef *f, upb_value val) { + (void)closure; + (void)f; + (void)val; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_startsubmsg_nop(void *closure, struct _upb_fielddef *f, + upb_handlers *delegate_to) { + (void)closure; + (void)f; + (void)delegate_to; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)closure; + (void)fieldnum; + (void)val; + return UPB_CONTINUE; +} + +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + if (!set->startmsg) set->startmsg = &upb_nop; + if (!set->endmsg) set->endmsg = &upb_nop; + if (!set->value) set->value = &upb_value_nop; + if (!set->startsubmsg) set->startsubmsg = &upb_startsubmsg_nop; + if (!set->endsubmsg) set->endsubmsg = &upb_nop; + if (!set->unknownval) set->unknownval = &upb_unknownval_nop; + h->set = set; +} + +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, + upb_status *status) { + h->closure = closure; + h->status = status; +} + +// upb_dispatcher +typedef struct { + upb_handlers handlers; + int depth; +} upb_dispatcher_frame; + +struct _upb_dispatcher { + upb_dispatcher_frame stack[UPB_MAX_NESTING], *top, *limit; + bool supports_skip; +}; + +INLINE void upb_dispatcher_init(upb_dispatcher *d) { + d->limit = d->stack + sizeof(d->stack); +} + +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h, + bool supports_skip) { + d->top = d->stack; + d->top->depth = 1; // Never want to trigger end-of-delegation. + d->top->handlers = *h; + d->supports_skip = supports_skip; +} + +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + return d->top->handlers.set->startmsg(d->top->handlers.closure); +} + +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + return d->top->handlers.set->endmsg(d->top->handlers.closure); +} + +// TODO: several edge cases to fix: +// - delegated start returns UPB_BREAK, should replay the start on resume. +// - endsubmsg returns UPB_BREAK, should NOT replay the delegated endmsg. +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f) { + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_handlers_reset(&handlers); + upb_flow_t ret = d->top->handlers.set->startsubmsg(d->top->handlers.closure, f, &handlers); + assert((ret == UPB_DELEGATE) == !upb_handlers_isempty(&handlers)); + if (ret == UPB_DELEGATE) { + ++d->top; + d->top->handlers = handlers; + d->top->depth = 0; + ret = d->top->handlers.set->startmsg(d->top->handlers.closure); + } + if (ret == UPB_CONTINUE || !d->supports_skip) ++d->top->depth; + upb_handlers_uninit(&handlers); + return ret; +} + +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + upb_flow_t ret; + if (--d->top->depth == 0) { + ret = d->top->handlers.set->endmsg(d->top->handlers.closure); + if (ret != UPB_CONTINUE) return ret; + --d->top; + assert(d->top >= d->stack); + } + return d->top->handlers.set->endsubmsg(d->top->handlers.closure); +} + +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, + struct _upb_fielddef *f, + upb_value val) { + return d->top->handlers.set->value(d->top->handlers.closure, f, val); +} + +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val) { + return d->top->handlers.set->unknownval(d->top->handlers.closure, + fieldnum, val); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_string.c b/src/upb_string.c new file mode 100644 index 0000000..30ed88f --- /dev/null +++ b/src/upb_string.c @@ -0,0 +1,143 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_string.h" + +#include +#ifdef __GLIBC__ +#include +#elif defined(__APPLE__) +#include +#endif + +static uint32_t upb_round_up_pow2(uint32_t v) { + // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +upb_string *upb_string_new() { + upb_string *str = malloc(sizeof(*str)); + str->ptr = NULL; + str->cached_mem = NULL; + str->len = 0; +#ifndef UPB_HAVE_MSIZE + str->size = 0; +#endif + str->src = NULL; + upb_atomic_refcount_init(&str->refcount, 1); + return str; +} + +uint32_t upb_string_size(upb_string *str) { +#ifdef __GLIBC__ + return malloc_usable_size(str->cached_mem); +#elif defined(__APPLE__) + return malloc_size(str->cached_mem); +#else + return str->size; +#endif +} + +void _upb_string_free(upb_string *str) { + free(str->cached_mem); + _upb_string_release(str); + free(str); +} + +char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { + // assert(str->ptr == NULL); + upb_strlen_t size = upb_string_size(str); + if (size < len) { + size = upb_round_up_pow2(len); + str->cached_mem = realloc(str->cached_mem, size); +#ifndef UPB_HAVE_MSIZE + str->size = size; +#endif + } + str->len = len; + str->ptr = str->cached_mem; + return str->cached_mem; +} + +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len) { + if(str->ptr) *(char*)0 = 0; + assert(str->ptr == NULL); + str->src = upb_string_getref(target_str); + str->ptr = upb_string_getrobuf(target_str) + start; + str->len = len; +} + +void upb_string_vprintf(upb_string *str, const char *format, va_list args) { + // Try once without reallocating. We have to va_copy because we might have + // to call vsnprintf again. + uint32_t size = UPB_MAX(upb_string_size(str), 16); + char *buf = upb_string_getrwbuf(str, size); + va_list args_copy; + va_copy(args_copy, args); + uint32_t true_size = vsnprintf(buf, size, format, args_copy); + va_end(args_copy); + + if (true_size >= size) { + // Need to reallocate. We reallocate even if the sizes were equal, + // because snprintf excludes the terminating NULL from its count. + // We don't care about the terminating NULL, but snprintf might + // bail out of printing even other characters if it doesn't have + // enough space to write the NULL also. + upb_string_recycle(&str); + buf = upb_string_getrwbuf(str, true_size + 1); + vsnprintf(buf, true_size + 1, format, args); + } + str->len = true_size; +} + +upb_string *upb_string_asprintf(const char *format, ...) { + upb_string *str = upb_string_new(); + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); + return str; +} + +upb_string *upb_strdup(upb_string *s) { + upb_string *str = upb_string_new(); + upb_strcpy(str, s); + return str; +} + +void upb_strcat(upb_string *s, upb_string *append) { + uint32_t old_size = upb_string_len(s); + uint32_t append_size = upb_string_len(append); + uint32_t new_size = old_size + append_size; + char *buf = upb_string_getrwbuf(s, new_size); + memcpy(buf + old_size, upb_string_getrobuf(append), append_size); +} + +upb_string *upb_strreadfile(const char *filename) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + if(fseek(f, 0, SEEK_END) != 0) goto error; + long size = ftell(f); + if(size < 0) goto error; + if(fseek(f, 0, SEEK_SET) != 0) goto error; + upb_string *s = upb_string_new(); + char *buf = upb_string_getrwbuf(s, size); + if(fread(buf, size, 1, f) != 1) goto error; + fclose(f); + return s; + +error: + fclose(f); + return NULL; +} diff --git a/src/upb_string.h b/src/upb_string.h new file mode 100644 index 0000000..0694a23 --- /dev/null +++ b/src/upb_string.h @@ -0,0 +1,360 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * This file defines a simple string type which is length-delimited instead + * of NULL-terminated, and which has useful sharing semantics. + * + * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() + * wheverever possible, while keeping both CPU and memory overhead low. + * Throughout upb there are situations where one wants to reference all or part + * of another string without copying. upb_string provides APIs for doing this, + * and allows the referenced string to be kept alive for as long as anyone is + * referencing it. + * + * Characteristics of upb_string: + * - strings are reference-counted. + * - strings are immutable (can be mutated only when first created or recycled). + * - if a string has no other referents, it can be "recycled" into a new string + * without having to reallocate the upb_string. + * - strings can be substrings of other strings (owning a ref on the source + * string). + * + * Reference-counted strings have recently fallen out of favor because of the + * performance impacts of doing thread-safe reference counting with atomic + * operations. We side-step this issue by not performing atomic operations + * unless the string has been marked thread-safe. Time will tell whether this + * scheme is easy and convenient enough to be practical. + * + * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched + * idiom that we go with it instead of making our pointers uint8_t*. + * + * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE + * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if + * you are logically passing a reference to a upb_string to another thread + * (which implies that the other thread must eventually call unref of recycle), + * you have two options: + * + * - create a copy of the string that will be used in the other thread only. + * - call upb_string_get_synchronized_ref(), which will make getref, unref, and + * recycle thread-safe for this upb_string. + */ + +#ifndef UPB_STRING_H +#define UPB_STRING_H + +#include +#include +#include +#include "upb_atomic.h" +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// All members of this struct are private, and may only be read/written through +// the associated functions. +struct _upb_string { + // The string's refcount. + upb_atomic_refcount_t refcount; + + // The pointer to our currently active data. This may be memory we own + // or a pointer into memory we don't own. + const char *ptr; + + // If non-NULL, this is a block of memory we own. We keep this cached even + // if "ptr" is currently aliasing memory we don't own. + char *cached_mem; + + // The effective length of the string (the bytes at ptr). + int32_t len; +#ifndef UPB_HAVE_MSIZE + // How many bytes are allocated in cached_mem. + // + // Many platforms have a function that can tell you the size of a block + // that was previously malloc'd. In this case we can avoid storing the + // size explicitly. + uint32_t size; +#endif + + // Used if this is a slice of another string, NULL otherwise. We own a ref + // on src. + struct _upb_string *src; +}; + +// Internal-only initializer for upb_string instances. +#ifdef UPB_HAVE_MSIZE +#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, NULL} +#else +#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, 0, NULL} +#endif + +// Special pseudo-refcounts for static/stack-allocated strings, respectively. +#define _UPB_STRING_REFCOUNT_STATIC -1 +#define _UPB_STRING_REFCOUNT_STACK -2 + +// Returns a newly-created, empty, non-finalized string. When the string is no +// longer needed, it should be unref'd, never freed directly. +upb_string *upb_string_new(); + +// Internal-only; clients should call upb_string_unref(). +void _upb_string_free(upb_string *str); + +// Releases a ref on the given string, which may free the memory. "str" +// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE +// UNLESS THE STRING IS SYNCHRONIZED. +INLINE void upb_string_unref(upb_string *str) { + if (str && upb_atomic_read(&str->refcount) > 0 && + upb_atomic_unref(&str->refcount)) { + _upb_string_free(str); + } +} + +static void _upb_string_release(upb_string *str) { + if(str->src) { + upb_string_unref(str->src); + str->src = NULL; + } +} + +upb_string *upb_strdup(upb_string *s); // Forward-declare. + +// Returns a string with the same contents as "str". The caller owns a ref on +// the returned string, which may or may not be the same object as "str. +// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED! +INLINE upb_string *upb_string_getref(upb_string *str) { + int refcount = upb_atomic_read(&str->refcount); + if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); + // We don't ref the special <0 refcount for static strings. + if (refcount > 0) upb_atomic_ref(&str->refcount); + return str; +} + +// Returns the length of the string. +INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } + +// Use to read the bytes of the string. The caller *must* call +// upb_string_endread() after the data has been read. The window between +// upb_string_getrobuf() and upb_string_endread() should be kept as short as +// possible, because any pending upb_string_detach() may be blocked until +// upb_string_endread is called(). No other functions may be called on the +// string during this window except upb_string_len(). +INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } +INLINE void upb_string_endread(upb_string *str) { (void)str; } + +// Convenience method for getting the end of the string. Calls +// upb_string_getrobuf() so inherits the caveats of calling that function. +INLINE const char *upb_string_getbufend(upb_string *str) { + return upb_string_getrobuf(str) + upb_string_len(str); +} + +// Attempts to recycle the string "str" so it may be reused and have different +// data written to it. After the function returns, "str" points to a writable +// string, which is either the original string if it had no other references +// or a newly created string if it did have other references. +// +// As a special case, passing a pointer to NULL will allocate a new string. +// This is convenient for the pattern: +// +// upb_string *str = NULL; +// while (x) { +// if (y) { +// upb_string_recycle(&str); +// upb_src_getstr(str); +// } +// } +INLINE void upb_string_recycle(upb_string **_str) { + upb_string *str = *_str; + if(str && upb_atomic_only(&str->refcount)) { + str->ptr = NULL; + str->len = 0; + _upb_string_release(str); + } else { + upb_string_unref(str); + *_str = upb_string_new(); + } +} + + +// The options for setting the contents of a string. These may only be called +// when a string is first created or recycled; once other functions have been +// called on the string, these functions are not allowed until the string is +// recycled. + +// Gets a pointer suitable for writing to the string, which is guaranteed to +// have at least "len" bytes of data available. The size of the string will +// become "len". +char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); + +// Replaces the contents of str with the contents of the given printf. +void upb_string_vprintf(upb_string *str, const char *format, va_list args); +INLINE void upb_string_printf(upb_string *str, const char *format, ...) { + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); +} + +// Sets the contents of "str" to be the given substring of "target_str", to +// which the caller must own a ref. +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len); + +// Sketch of an API for allowing upb_strings to reference external, unowned +// data. Waiting for a clear use case before actually implementing it. +// +// Makes the string "str" a reference to the given string data. The caller +// guarantees that the given string data will not change or be deleted until a +// matching call to upb_string_detach(), which may block until any concurrent +// readers have finished reading. upb_string_detach() preserves the contents +// of the string by copying the referenced data if there are any other +// referents. +// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); +// void upb_string_detach(upb_string *str); + +// Allows using upb_strings in printf, ie: +// upb_strptr str = UPB_STRLIT("Hello, World!\n"); +// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ +#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) +#define UPB_STRFMT "%.*s" + +// Macros for constructing upb_string objects statically or on the stack. These +// can be used like: +// +// upb_string static_str = UPB_STATIC_STRING("Foo"); +// +// int main() { +// upb_string stack_str = UPB_STACK_STRING("Foo"); +// // Now: +// // upb_streql(&static_str, &stack_str) == true +// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true +// } +// +// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays, +// but you must not change the underlying data once you've passed the string on: +// +// void foo() { +// char data[] = "ABC123"; +// upb_string stack_str = UPB_STACK_STR(data); +// bar(&stack_str); +// data[0] = "B"; // NOT ALLOWED!! +// } +// +// TODO: should the stack business just be like attach/detach? The latter seems +// more flexible, though it does require a stack allocation. Maybe put this off +// until there is a clear use case. +#define UPB_STATIC_STRING(str) \ + _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STATIC_STRING_LEN(str, len) \ + _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STACK_STRING(str) \ + _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STACK_STRING_LEN(str, len) \ + _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) + +// A convenient way of specifying upb_strings as literals, like: +// +// upb_streql(UPB_STRLIT("expected"), other_str); +// +// However, this requires either C99 compound initializers or C++. +// Must ONLY be called with a string literal as its argument! +//#ifdef __cplusplus +//namespace upb { +//class String : public upb_string { +// // This constructor must ONLY be called with a string literal. +// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {} +//}; +//} +//#define UPB_STRLIT(str) upb::String(str) +//#endif +#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) + +/* upb_string library functions ***********************************************/ + +// Named like their counterparts, these are all safe against buffer +// overflow. For the most part these only use the public upb_string interface. + +// More efficient than upb_strcmp if all you need is to test equality. +INLINE bool upb_streql(upb_string *s1, upb_string *s2) { + upb_strlen_t len = upb_string_len(s1); + if(len != upb_string_len(s2)) { + return false; + } else { + bool ret = + memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; + upb_string_endread(s1); + upb_string_endread(s2); + return ret; + } +} + +// Like strcmp(). +int upb_strcmp(upb_string *s1, upb_string *s2); + +// Compare a upb_string with memory or a NULL-terminated C string. +INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) { + return len == upb_string_len(str) && + memcmp(upb_string_getrobuf(str), buf, len) == 0; +} + +INLINE bool upb_streqlc(upb_string *str, const void *buf) { + // Could be made one-pass. + return upb_streqllen(str, buf, strlen((const char*)buf)); +} + +// Like upb_strcpy, but copies from a buffer and length. +INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { + memcpy(upb_string_getrwbuf(dest, len), src, len); +} + +// Replaces the contents of "dest" with the contents of "src". +INLINE void upb_strcpy(upb_string *dest, upb_string *src) { + upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src)); + upb_string_endread(src); +} + +// Like upb_strcpy, but copies from a NULL-terminated string. +INLINE void upb_strcpyc(upb_string *dest, const void *src) { + // This does two passes over src, but that is necessary unless we want to + // repeatedly re-allocate dst, which seems worse. + upb_strcpylen(dest, src, strlen((const char*)src)); +} + +// Returns a new string whose contents are a copy of s. +upb_string *upb_strdup(upb_string *s); + +// Like upb_strdup(), but duplicates a given buffer and length. +INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { + upb_string *s = upb_string_new(); + upb_strcpylen(s, src, len); + return s; +} + +// Like upb_strdup(), but duplicates a C NULL-terminated string. +INLINE upb_string *upb_strdupc(const char *src) { + return upb_strduplen(src, strlen(src)); +} + +// Appends 'append' to 's' in-place, resizing s if necessary. +void upb_strcat(upb_string *s, upb_string *append); + +// Returns a new string that is a substring of the given string. +INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { + upb_string *str = upb_string_new(); + upb_string_substr(str, s, offset, len); + return str; +} + +// Reads an entire file into a newly-allocated string. +upb_string *upb_strreadfile(const char *filename); + +// Returns a new string with the contents of the given printf. +upb_string *upb_string_asprintf(const char *format, ...); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_strstream.c b/src/upb_strstream.c new file mode 100644 index 0000000..a7967d4 --- /dev/null +++ b/src/upb_strstream.c @@ -0,0 +1,65 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_strstream.h" + +#include +#include "upb_string.h" + +static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf, + upb_strlen_t count, upb_status *status) { + upb_stringsrc *src = (upb_stringsrc*)_src; + if (src->offset == upb_string_len(src->str)) { + status->code = UPB_EOF; + return -1; + } else { + upb_strlen_t to_read = UPB_MIN(count, upb_string_len(src->str) - src->offset); + memcpy(buf, upb_string_getrobuf(src->str) + src->offset, to_read); + src->offset += to_read; + return to_read; + } +} + +static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str, + upb_status *status) { + upb_stringsrc *src = (upb_stringsrc*)_src; + if (src->offset == upb_string_len(src->str)) { + status->code = UPB_EOF; + return false; + } else { + upb_strlen_t len = upb_string_len(src->str) - src->offset; + upb_string_substr(str, src->str, src->offset, len); + src->offset += len; + assert(src->offset == upb_string_len(src->str)); + return true; + } +} + +void upb_stringsrc_init(upb_stringsrc *s) { + static upb_bytesrc_vtbl bytesrc_vtbl = { + upb_stringsrc_read, + upb_stringsrc_getstr, + }; + s->str = NULL; + upb_bytesrc_init(&s->bytesrc, &bytesrc_vtbl); +} + +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { + if (str != s->str) { + upb_string_unref(s->str); + s->str = upb_string_getref(str); + } + s->offset = 0; +} + +void upb_stringsrc_uninit(upb_stringsrc *s) { + upb_string_unref(s->str); +} + + +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { + return &s->bytesrc; +} diff --git a/src/upb_strstream.h b/src/upb_strstream.h new file mode 100644 index 0000000..1a8792b --- /dev/null +++ b/src/upb_strstream.h @@ -0,0 +1,65 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file contains upb_bytesrc and upb_bytesink implementations for + * upb_string. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_STRSTREAM_H +#define UPB_STRSTREAM_H + +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_stringsrc **************************************************************/ + +struct _upb_stringsrc { + upb_bytesrc bytesrc; + upb_string *str; + upb_strlen_t offset; +}; +typedef struct _upb_stringsrc upb_stringsrc; + +// Create/free a stringsrc. +void upb_stringsrc_init(upb_stringsrc *s); +void upb_stringsrc_uninit(upb_stringsrc *s); + +// Resets the stringsrc to a state where it will vend the given string. The +// stringsrc will take a reference on the string, so the caller need not ensure +// that it outlives the stringsrc. A stringsrc can be reset multiple times. +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); + +// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); + + +/* upb_stringsink *************************************************************/ + +struct upb_stringsink; +typedef struct upb_stringsink upb_stringsink; + +// Create/free a stringsrc. +upb_stringsink *upb_stringsink_new(); +void upb_stringsink_free(upb_stringsink *s); + +// Gets a string containing the data that has been written to this stringsink. +// The caller does *not* own any references to this string. +upb_string *upb_stringsink_getstring(upb_stringsink *s); + +// Clears the internal string of accumulated data, resetting it to empty. +void upb_stringsink_reset(upb_stringsink *s); + +// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +upb_bytesink *upb_stringsrc_bytesink(); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_table.c b/src/upb_table.c new file mode 100644 index 0000000..a6e0a56 --- /dev/null +++ b/src/upb_table.c @@ -0,0 +1,411 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_table.h" +#include "upb_string.h" + +#include +#include +#include + +static const upb_inttable_key_t EMPTYENT = 0; +static const double MAX_LOAD = 0.85; + +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); + +/* We use 1-based indexes into the table so that 0 can be "NULL". */ +static upb_inttable_entry *intent(upb_inttable *t, int32_t i) { + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); +} +static upb_strtable_entry *strent(upb_strtable *t, int32_t i) { + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); +} + +void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) +{ + t->count = 0; + t->entry_size = entry_size; + t->size_lg2 = 0; + while(size >>= 1) t->size_lg2++; + size_t bytes = upb_table_size(t) * t->entry_size; + t->mask = upb_table_size(t) - 1; + t->entries = malloc(bytes); + memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */ +} + +void upb_inttable_init(upb_inttable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); +} + +void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); +} + +void upb_table_free(upb_table *t) { free(t->entries); } +void upb_inttable_free(upb_inttable *t) { upb_table_free(&t->t); } +void upb_strtable_free(upb_strtable *t) { + // Free refs from the strtable. + upb_strtable_entry *e = upb_strtable_begin(t); + for(; e; e = upb_strtable_next(t, e)) { + upb_string_unref(e->key); + } + upb_table_free(&t->t); +} + +static uint32_t strtable_bucket(upb_strtable *t, upb_string *key) +{ + uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0); + return (hash & (upb_strtable_size(t)-1)) + 1; +} + +void *upb_strtable_lookup(upb_strtable *t, upb_string *key) +{ + uint32_t bucket = strtable_bucket(t, key); + upb_strtable_entry *e; + do { + e = strent(t, bucket); + if(e->key && upb_streql(e->key, key)) return e; + } while((bucket = e->next) != UPB_END_OF_CHAIN); + return NULL; +} + +static uint32_t empty_intbucket(upb_inttable *table) +{ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 1; i <= upb_inttable_size(table); i++) { + upb_inttable_entry *e = intent(table, i); + if(e->key == EMPTYENT) return i; + } + assert(false); + return 0; +} + +/* The insert routines have a lot more code duplication between int/string + * variants than I would like, but there's just a bit too much that varies to + * parameterize them. */ +static void intinsert(upb_inttable *t, upb_inttable_entry *e) +{ + assert(upb_inttable_lookup(t, e->key) == NULL); + t->t.count++; + uint32_t bucket = upb_inttable_bucket(t, e->key); + upb_inttable_entry *table_e = intent(t, bucket); + if(table_e->key != EMPTYENT) { /* Collision. */ + if(bucket == upb_inttable_bucket(t, table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + uint32_t empty_bucket = empty_intbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = intent(t, table_e->next); + table_e->next = empty_bucket; + table_e = intent(t, empty_bucket); + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + uint32_t empty_bucket = empty_intbucket(t); + uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key); + memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + upb_inttable_entry *evictee_e = intent(t, evictee_bucket); + while(1) { + assert(evictee_e->key != UPB_EMPTY_ENTRY); + assert(evictee_e->next != UPB_END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; + break; + } + evictee_e = intent(t, evictee_e->next); + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; + assert(upb_inttable_lookup(t, e->key) == table_e); +} + +void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e) +{ + assert(e->key != 0); + if((double)(t->t.count + 1) / upb_inttable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + upb_inttable new_table; + upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size); + new_table.t.count = t->t.count; + upb_inttable_entry *old_e; + for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e)) + intinsert(&new_table, old_e); + upb_inttable_free(t); + *t = new_table; + } + intinsert(t, e); +} + +static uint32_t empty_strbucket(upb_strtable *table) +{ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { + upb_strtable_entry *e = strent(table, i); + if(!e->key) return i; + } + assert(false); + return 0; +} + +static void strinsert(upb_strtable *t, upb_strtable_entry *e) +{ + assert(upb_strtable_lookup(t, e->key) == NULL); + e->key = upb_string_getref(e->key); + t->t.count++; + uint32_t bucket = strtable_bucket(t, e->key); + upb_strtable_entry *table_e = strent(t, bucket); + if(table_e->key) { /* Collision. */ + if(bucket == strtable_bucket(t, table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + uint32_t empty_bucket = empty_strbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = strent(t, table_e->next); + table_e->next = empty_bucket; + table_e = strent(t, empty_bucket); + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + uint32_t empty_bucket = empty_strbucket(t); + uint32_t evictee_bucket = strtable_bucket(t, table_e->key); + memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + upb_strtable_entry *evictee_e = strent(t, evictee_bucket); + while(1) { + assert(evictee_e->key); + assert(evictee_e->next != UPB_END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; + break; + } + evictee_e = strent(t, evictee_e->next); + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; + assert(upb_strtable_lookup(t, e->key) == table_e); +} + +void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e) +{ + if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + upb_strtable new_table; + upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); + upb_strtable_entry *old_e; + for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e)) + strinsert(&new_table, old_e); + upb_strtable_free(t); + *t = new_table; + } + strinsert(t, e); +} + +void *upb_inttable_begin(upb_inttable *t) { + return upb_inttable_next(t, intent(t, 0)); +} + +void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur) { + upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1); + do { + cur = (void*)((char*)cur + t->t.entry_size); + if(cur == end) return NULL; + } while(cur->key == UPB_EMPTY_ENTRY); + return cur; +} + +void *upb_strtable_begin(upb_strtable *t) { + return upb_strtable_next(t, strent(t, 0)); +} + +void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { + upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1); + do { + cur = (void*)((char*)cur + t->t.entry_size); + if(cur == end) return NULL; + } while(cur->key == NULL); + return cur; +} + +#ifdef UPB_UNALIGNED_READS_OK +//----------------------------------------------------------------------------- +// MurmurHash2, by Austin Appleby (released as public domain). +// Reformatted and C99-ified by Joshua Haberman. +// Note - This code makes a few assumptions about how your machine behaves - +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t +// And it has a few limitations - +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + + // Initialize the hash to a 'random' value + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + const uint8_t * data = (const uint8_t *)key; + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +#else // !UPB_UNALIGNED_READS_OK + +//----------------------------------------------------------------------------- +// MurmurHashAligned2, by Austin Appleby +// Same algorithm as MurmurHash2, but only does aligned reads - should be safer +// on certain platforms. +// Performance will be lower than MurmurHash2 + +#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + +static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) +{ + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + const uint8_t * data = (const uint8_t *)key; + uint32_t h = seed ^ len; + uint8_t align = (uintptr_t)data & 3; + + if(align && (len >= 4)) { + // Pre-load the temp registers + uint32_t t = 0, d = 0; + + switch(align) { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + int32_t sl = 8 * (4-align); + int32_t sr = 8 * align; + + // Mix + + while(len >= 4) { + d = *(uint32_t *)data; + t = (t >> sr) | (d << sl); + + uint32_t k = t; + + MIX(h,k,m); + + t = d; + + data += 4; + len -= 4; + } + + // Handle leftover data in temp registers + + d = 0; + + if(len >= align) { + switch(align) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + } + + uint32_t k = (t >> sr) | (d << sl); + MIX(h,k,m); + + data += align; + len -= align; + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + } else { + switch(len) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h ^= (t >> sr) | (d << sl); h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } else { + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + MIX(h,k,m); + + data += 4; + len -= 4; + } + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} +#undef MIX + +#endif // UPB_UNALIGNED_READS_OK diff --git a/src/upb_table.h b/src/upb_table.h new file mode 100644 index 0000000..20dae92 --- /dev/null +++ b/src/upb_table.h @@ -0,0 +1,133 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file defines very fast int->struct (inttable) and string->struct + * (strtable) hash tables. The struct can be of any size, and it is stored + * in the table itself, for cache-friendly performance. + * + * The table uses internal chaining with Brent's variation (inspired by the + * Lua implementation of hash tables). The hash function for strings is + * Austin Appleby's "MurmurHash." + */ + +#ifndef UPB_TABLE_H_ +#define UPB_TABLE_H_ + +#include +#include "upb.h" +#include "upb_string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Note: the key cannot be zero! Zero is used by the implementation. */ +typedef uint32_t upb_inttable_key_t; + +#define UPB_END_OF_CHAIN (uint32_t)0 +#define UPB_EMPTY_ENTRY (uint32_t)0 + +typedef struct { + upb_inttable_key_t key; + uint32_t next; /* Internal chaining. */ +} upb_inttable_entry; + +// TODO: consider storing the hash in the entry. This would avoid the need to +// rehash on table resizes, but more importantly could possibly improve lookup +// performance by letting us compare hashes before comparing lengths or the +// strings themselves. +typedef struct { + upb_string *key; // We own a ref. + uint32_t next; // Internal chaining. +} upb_strtable_entry; + +typedef struct { + void *entries; + uint32_t count; /* How many elements are currently in the table? */ + uint16_t entry_size; /* How big is each entry? */ + uint8_t size_lg2; /* The table is 2^size_lg2 in size. */ + uint32_t mask; +} upb_table; + +typedef struct { + upb_table t; +} upb_strtable; + +typedef struct { + upb_table t; +} upb_inttable; + +/* Initialize and free a table, respectively. Specify the initial size + * with 'size' (the size will be increased as necessary). Entry size + * specifies how many bytes each entry in the table is. */ +void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t entry_size); +void upb_inttable_free(upb_inttable *table); +void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size); +void upb_strtable_free(upb_strtable *table); + +INLINE uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; } +INLINE uint32_t upb_inttable_size(upb_inttable *t) { + return upb_table_size(&t->t); +} +INLINE uint32_t upb_strtable_size(upb_strtable *t) { + return upb_table_size(&t->t); +} + +INLINE uint32_t upb_table_count(upb_table *t) { return t->count; } +INLINE uint32_t upb_inttable_count(upb_inttable *t) { + return upb_table_count(&t->t); +} +INLINE uint32_t upb_strtable_count(upb_strtable *t) { + return upb_table_count(&t->t); +} + +/* Inserts the given key into the hashtable with the given value. The key must + * not already exist in the hash table. The data will be copied from e into + * the hashtable (the amount of data copied comes from entry_size when the + * table was constructed). Therefore the data at val may be freed once the + * call returns. */ +void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e); +void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e); + +INLINE uint32_t upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) { + return (k & t->t.mask) + 1; /* Identity hash for ints. */ +} + +/* Looks up key in this table. Inlined because this is in the critical path of + * decoding. We have the caller specify the entry_size because fixing this as + * a literal (instead of reading table->entry_size) gives the compiler more + * ability to optimize. */ +INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key, + uint32_t entry_size) { + assert(key != 0); + uint32_t bucket = upb_inttable_bucket(t, key); + upb_inttable_entry *e; + do { + e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket-1, entry_size); + if(e->key == key) return e; + } while((bucket = e->next) != UPB_END_OF_CHAIN); + return NULL; /* Not found. */ +} + +INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { + return upb_inttable_fastlookup(t, key, t->t.entry_size); +} + +void *upb_strtable_lookup(upb_strtable *t, upb_string *key); + +/* Provides iteration over the table. The order in which the entries are + * returned is undefined. Insertions invalidate iterators. The _next + * functions return NULL when the end has been reached. */ +void *upb_inttable_begin(upb_inttable *t); +void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur); + +void *upb_strtable_begin(upb_strtable *t); +void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_TABLE_H_ */ diff --git a/src/upb_textprinter.c b/src/upb_textprinter.c new file mode 100644 index 0000000..894a1ea --- /dev/null +++ b/src/upb_textprinter.c @@ -0,0 +1,143 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_textprinter.h" + +#include +#include +#include "upb_def.h" +#include "upb_string.h" + +struct _upb_textprinter { + upb_bytesink *bytesink; + int indent_depth; + bool single_line; + upb_status status; +}; + +#define CHECK(x) if ((x) < 0) goto err; + +static int upb_textprinter_indent(upb_textprinter *p) +{ + if(!p->single_line) + for(int i = 0; i < p->indent_depth; i++) + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + return 0; +err: + return -1; +} + +static int upb_textprinter_endfield(upb_textprinter *p) { + if(p->single_line) { + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + } else { + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status)); + } + return 0; +err: + return -1; +} + +static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, + upb_value val) { + upb_textprinter *p = _p; + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); +#define CASE(fmtstr, member) \ + CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; + switch(f->type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", double); + case UPB_TYPE(FLOAT): + CASE("%0.f", float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + CASE("%" PRIu32, uint32); + case UPB_TYPE(ENUM): { + upb_enumdef *enum_def = upb_downcast_enumdef(f->def); + upb_string *enum_label = + upb_enumdef_iton(enum_def, upb_value_getint32(val)); + if (enum_label) { + // We found a corresponding string for this enum. Otherwise we fall + // through to the int32 code path. + CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status)); + break; + } + } + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(BOOL): + CASE("%hhu", bool); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + // TODO: escaping. + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); + CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); + break; + } + upb_textprinter_endfield(p); + return UPB_CONTINUE; +err: + return UPB_BREAK; +} + +static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, + upb_handlers *delegate_to) { + (void)delegate_to; + upb_textprinter *p = _p; + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT " {", UPB_STRARG(f->name))); + if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); + p->indent_depth++; + return UPB_CONTINUE; +err: + return UPB_BREAK; +} + +static upb_flow_t upb_textprinter_endsubmsg(void *_p) +{ + upb_textprinter *p = _p; + p->indent_depth--; + upb_textprinter_indent(p); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status); + upb_textprinter_endfield(p); + return UPB_CONTINUE; +} + +upb_textprinter *upb_textprinter_new() { + upb_textprinter *p = malloc(sizeof(*p)); + return p; +} + +void upb_textprinter_free(upb_textprinter *p) { + free(p); +} + +void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, + upb_bytesink *sink, bool single_line) { + static upb_handlerset handlerset = { + NULL, // startmsg + NULL, // endmsg + upb_textprinter_value, + upb_textprinter_startsubmsg, + upb_textprinter_endsubmsg, + }; + p->bytesink = sink; + p->single_line = single_line; + p->indent_depth = 0; + upb_register_handlerset(handlers, &handlerset); + upb_set_handler_closure(handlers, p, &p->status); +} diff --git a/src/upb_textprinter.h b/src/upb_textprinter.h new file mode 100644 index 0000000..a880626 --- /dev/null +++ b/src/upb_textprinter.h @@ -0,0 +1,29 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_TEXT_H_ +#define UPB_TEXT_H_ + +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct _upb_textprinter; +typedef struct _upb_textprinter upb_textprinter; + +upb_textprinter *upb_textprinter_new(); +void upb_textprinter_free(upb_textprinter *p); +void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, + upb_bytesink *sink, bool single_line); +void upb_textprinter_sethandlers(upb_textprinter *p, upb_handlers *h); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_TEXT_H_ */ diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c deleted file mode 100644 index 4a43c4b..0000000 --- a/stream/upb_decoder.c +++ /dev/null @@ -1,441 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_decoder.h" - -#include -#include -#include -#include "upb_def.h" - -/* Pure Decoding **************************************************************/ - -// The key fast-path varint-decoding routine. Here we can assume we have at -// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of -// possibilities for optimization/experimentation here. - -#ifdef USE_SSE_VARINT_DECODING -#include - -// This works, but is empirically slower than the branchy version below. Why? -// Most varints are very short. Next step: use branches for 1/2-byte varints, -// but use the SSE version for 3-10 byte varints. -INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) { - const char *p = *ptr; - __m128i val128 = _mm_loadu_si128((void*)p); - unsigned int continuation_bits = _mm_movemask_epi8(val128); - unsigned int bsr_val = ~continuation_bits; - int varint_length = __builtin_ffs(bsr_val); - if (varint_length > 10) { - upb_seterr(s, UPB_ERROR, "Unterminated varint"); - return false; - } - - uint16_t twob; - memcpy(&twob, p, 2); - twob &= 0x7f7f; - twob = ((twob & 0xff00) >> 1) | (twob & 0xff); - - uint64_t eightb; - memcpy(&eightb, p + 2, 8); - eightb &= 0x7f7f7f7f7f7f7f7f; - eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff); - eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff); - eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff); - - uint64_t all_bits = twob | (eightb << 14); - int varint_bits = varint_length * 7; - uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1; - *val = all_bits & mask; - *ptr = p + varint_length; - return true; -} - -#else - -INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) { - const char *p = *ptr; - uint32_t low, high = 0; - uint32_t b; - b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done; - - upb_seterr(s, UPB_ERROR, "Unterminated varint"); - return false; - -done: - *val = ((uint64_t)high << 32) | low; - *ptr = p; - return true; -} - -#endif - - -/* Decoding/Buffering of individual values ************************************/ - -// Performs zig-zag decoding, which is used by sint32 and sint64. -INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } - -typedef struct { - // Our current position in the data buffer. - const char *ptr; - - // End of this submessage, relative to *ptr. - const char *submsg_end; - - // Number of bytes available at ptr. - size_t len; - - // Msgdef for the current level. - upb_msgdef *msgdef; -} upb_dstate; - -// Constant used to signal that the submessage is a group and therefore we -// don't know its end offset. This cannot be the offset of a real submessage -// end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET 0 -#define UPB_MAX_VARINT_ENCODED_SIZE 10 - -INLINE void upb_dstate_advance(upb_dstate *s, size_t len) { - s->ptr += len; - s->len -= len; -} - -INLINE void upb_dstate_setmsgend(upb_decoder *d, upb_dstate *s) { - s->submsg_end = (d->top->end_offset == UPB_GROUP_END_OFFSET) ? - (void*)UINTPTR_MAX : - upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); -} - -static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s); - -// Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting the dstate appropriately. -static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, - upb_dstate *s) { - while (1) { - size_t to_copy = UPB_MIN(bytes_wanted, s->len); - memcpy(data, s->ptr, to_copy); - upb_dstate_advance(s, to_copy); - bytes_wanted -= to_copy; - if (bytes_wanted == 0) { - upb_dstate_setmsgend(d, s); - return true; - } - - // Get next buffer. - if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); - upb_string_recycle(&d->buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - s->ptr = upb_string_getrobuf(d->buf); - s->len = upb_string_len(d->buf); - } -} - -// We use this path when we don't have UPB_MAX_VARINT_ENCODED_SIZE contiguous -// bytes available in our current buffer. We don't inline this because we -// accept that it will be slow and we don't want to pay for two copies of it. -static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s, - upb_value *val) { - char byte = 0x80; - uint64_t val64 = 0; - int bitpos; - for(bitpos = 0; - bitpos < 70 && (byte & 0x80) && upb_getbuf(d, &byte, 1, s); - bitpos += 7) - val64 |= ((uint64_t)byte & 0x7F) << bitpos; - - if(bitpos == 70) { - upb_seterr(d->status, UPB_ERROR, - "Varint was unterminated after 10 bytes.\n"); - return false; - } else if (d->status->code == UPB_EOF && bitpos == 0) { - // Regular EOF. - return false; - } else if (d->status->code == UPB_EOF && (byte & 0x80)) { - upb_seterr(d->status, UPB_ERROR, - "Provided data ended in the middle of a varint.\n"); - return false; - } else { - // Success. - upb_value_setraw(val, val64); - return true; - } -} - -typedef struct { - upb_wire_type_t wire_type; - upb_field_number_t field_number; -} upb_tag; - -INLINE bool upb_decode_tag(upb_decoder *d, upb_dstate *s, upb_tag *tag) { - const char *p = s->ptr; - uint32_t tag_int; - upb_value val; - // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). - if (s->len < 2) goto slow; // unlikely. - tag_int = *p & 0x7f; - if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order - tag_int |= (*p & 0x7f) << 7; - if ((*(p++) & 0x80) == 0) goto done; // likely -slow: - // Decode a full varint starting over from ptr. - if (!upb_decode_varint_slow(d, s, &val)) return false; - tag_int = upb_value_getint64(val); - p = s->ptr; // Trick the next line into not overwriting us. -done: - upb_dstate_advance(s, p - s->ptr); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return true; -} - -INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) { - if (s->len >= 16) { - // Common (fast) case. - uint64_t val64; - const char *p = s->ptr; - if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; - upb_dstate_advance(s, p - s->ptr); - upb_value_setraw(val, val64); - return true; - } else { - return upb_decode_varint_slow(d, s, val); - } -} - -INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, - upb_dstate *s, upb_value *val) { - static const char table[] = {0, 8, 0, 0, 0, 4}; - size_t bytes = table[wt]; - if (s->len >= bytes) { - // Common (fast) case. - memcpy(val, s->ptr, bytes); - upb_dstate_advance(s, bytes); - } else { - if (!upb_getbuf(d, val, bytes, s)) return false; - } - return true; -} - -// "val" initially holds the length of the string, this is replaced by the -// contents of the string. -INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str, - upb_dstate *s) { - upb_string_recycle(str); - uint32_t strlen = upb_value_getint32(*val); - if (s->len >= strlen) { - // Common (fast) case. - upb_string_substr(*str, d->buf, s->ptr - upb_string_getrobuf(d->buf), strlen); - upb_dstate_advance(s, strlen); - } else { - if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) - return false; - } - upb_value_setstr(val, *str); - return true; -} - - -/* The main decoding loop *****************************************************/ - -extern upb_wire_type_t upb_expected_wire_types[]; -// Returns true if wt is the correct on-the-wire type for ft. -INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { - // This doesn't currently support packed arrays. - return upb_types[ft].native_wire_type == wt; -} - -static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, - upb_value submsg_len, upb_fieldtype_t type) { - d->top++; - if(d->top >= d->limit) { - upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); - return UPB_ERROR; - } - d->top->end_offset = (type == UPB_TYPE(GROUP)) ? - UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + - upb_value_getint32(submsg_len); - d->top->msgdef = upb_downcast_msgdef(f->def); - upb_dstate_setmsgend(d, s); - return upb_dispatch_startsubmsg(&d->dispatcher, f); -} - -static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s) { - d->top--; - upb_dstate_setmsgend(d, s); - return upb_dispatch_endsubmsg(&d->dispatcher); -} - -void upb_decoder_run(upb_src *src, upb_status *status) { - upb_decoder *d = (upb_decoder*)src; - d->status = status; - // We put our dstate on the stack so the compiler knows they can't be changed - // by external code (like when we dispatch a callback). We must be sure not - // to let its address escape this source file. - upb_dstate state = {NULL, (void*)0x1, 0, d->top->msgdef}; - -// TODO: handle UPB_SKIPSUBMSG -#define CHECK_FLOW(expr) if ((expr) == UPB_BREAK) { assert(!upb_ok(status)); goto err; } -#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; } - - CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher)); - - // Main loop: executed once per tag/field pair. - while(1) { - // Check for end-of-submessage. - while (state.ptr >= state.submsg_end) { - if (state.ptr > state.submsg_end) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); - goto err; - } - CHECK_FLOW(upb_pop(d, &state)); - } - - // Parse/handle tag. - upb_tag tag; - if (!upb_decode_tag(d, &state, &tag)) { - if (status->code == UPB_EOF && d->top == d->stack) { - // Normal end-of-file. - upb_clearerr(status); - CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); - return; - } else { - if (status->code == UPB_EOF) { - upb_seterr(status, UPB_ERROR, - "Input ended in the middle of a submessage."); - } - goto err; - } - } - - // Decode wire data. Hopefully this branch will predict pretty well - // since most types will read a varint here. - upb_value val; - switch (tag.wire_type) { - case UPB_WIRE_TYPE_START_GROUP: - break; // Nothing to do now, below we will push appropriately. - case UPB_WIRE_TYPE_END_GROUP: - if(d->top->end_offset != UPB_GROUP_END_OFFSET) { - upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); - goto err; - } - CHECK_FLOW(upb_pop(d, &state)); - continue; // We have no value to dispatch. - case UPB_WIRE_TYPE_VARINT: - case UPB_WIRE_TYPE_DELIMITED: - // For the delimited case we are parsing the length. - CHECK(upb_decode_varint(d, &state, &val)); - break; - case UPB_WIRE_TYPE_32BIT: - case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_fixed(d, tag.wire_type, &state, &val)); - break; - } - - // Look up field by tag number. - upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, tag.field_number); - - if (!f) { - if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) - CHECK(upb_decode_string(d, &val, &d->tmp, &state)); - CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, tag.field_number, val)); - } else if (!upb_check_type(tag.wire_type, f->type)) { - // TODO: put more details in this error msg. - upb_seterr(status, UPB_ERROR, "Field had incorrect type, name: " UPB_STRFMT, UPB_STRARG(f->name)); - upb_printerr(status); - *(int*)0 = 0; - goto err; - } - - // Perform any further massaging of the data now that we have the fielddef. - // Now we can distinguish strings from submessages, and we know about - // zig-zag-encoded types. - // TODO: handle packed encoding. - // TODO: if we were being paranoid, we could check for 32-bit-varint types - // that the top 32 bits all match the highest bit of the low 32 bits. - // If this is not true we are losing data. But the main protobuf library - // doesn't check this, and it would slow us down, so pass for now. - switch (f->type) { - case UPB_TYPE(MESSAGE): - case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, &state, f, val, f->type)); - continue; // We have no value to dispatch. - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - CHECK(upb_decode_string(d, &val, &d->tmp, &state)); - break; - case UPB_TYPE(SINT32): - upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); - break; - case UPB_TYPE(SINT64): - upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); - break; - default: -#ifndef NDEBUG - val.type = upb_types[f->type].inmemory_type; -#endif - break; // Other types need no further processing at this point. - } - CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val)); - } - -err: - if (upb_ok(status)) { - upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); - } -} - -void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { - upb_decoder *d = (upb_decoder*)src; - upb_dispatcher_reset(&d->dispatcher, handlers, false); - d->top = d->stack; - d->buf_stream_offset = 0; - d->top->msgdef = d->toplevel_msgdef; - // The top-level message is not delimited (we can keep receiving data for it - // indefinitely), so we treat it like a group. - d->top->end_offset = 0; -} - -void upb_decoder_init(upb_decoder *d, upb_msgdef *msgdef) { - static upb_src_vtbl vtbl = { - &upb_decoder_sethandlers, - &upb_decoder_run, - }; - upb_src_init(&d->src, &vtbl); - upb_dispatcher_init(&d->dispatcher); - d->toplevel_msgdef = msgdef; - d->limit = &d->stack[UPB_MAX_NESTING]; - d->buf = NULL; - d->tmp = NULL; -} - -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { - d->bytesrc = bytesrc; - d->top = &d->stack[0]; - d->top->msgdef = d->toplevel_msgdef; - // Never want to end top-level message, so treat it like a group. - d->top->end_offset = UPB_GROUP_END_OFFSET; -} - -void upb_decoder_uninit(upb_decoder *d) { - upb_string_unref(d->buf); - upb_string_unref(d->tmp); -} - -upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; } diff --git a/stream/upb_decoder.h b/stream/upb_decoder.h deleted file mode 100644 index 1c62753..0000000 --- a/stream/upb_decoder.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * upb_decoder implements a high performance, streaming decoder for protobuf - * data that works by implementing upb_src and getting its data from a - * upb_bytesrc. - * - * The decoder does not currently support non-blocking I/O, in the sense that - * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the - * decoder when data becomes available again. Support for this could be added, - * but it would add complexity and perhaps cost efficiency also. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_DECODER_H_ -#define UPB_DECODER_H_ - -#include -#include -#include "upb_def.h" -#include "upb_stream.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_decoder *****************************************************************/ - -// The decoder keeps a stack with one entry per level of recursion. -// upb_decoder_frame is one frame of that stack. -typedef struct { - upb_msgdef *msgdef; - size_t end_offset; // For groups, 0. -} upb_decoder_frame; - -struct _upb_decoder { - // Immutable state of the decoder. - upb_src src; - upb_dispatcher dispatcher; - upb_bytesrc *bytesrc; - upb_msgdef *toplevel_msgdef; - upb_decoder_frame stack[UPB_MAX_NESTING]; - - // Mutable state of the decoder. - - // Where we will store any errors that occur. - upb_status *status; - - // Stack entries store the offset where the submsg ends (for groups, 0). - upb_decoder_frame *top, *limit; - - // Current input buffer. - upb_string *buf; - - // Temporary string for passing to callbacks. - upb_string *tmp; - - // The offset within the overall stream represented by the *beginning* of buf. - size_t buf_stream_offset; -}; - -// A upb_decoder decodes the binary protocol buffer format, writing the data it -// decodes to a upb_sink. -struct _upb_decoder; -typedef struct _upb_decoder upb_decoder; - -// Allocates and frees a upb_decoder, respectively. -void upb_decoder_init(upb_decoder *d, upb_msgdef *md); -void upb_decoder_uninit(upb_decoder *d); - -// Resets the internal state of an already-allocated decoder. This puts it in a -// state where it has not seen any data, and expects the next data to be from -// the beginning of a new protobuf. Parsers must be reset before they can be -// used. A decoder can be reset multiple times. -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc); - -// Returns a upb_src pointer by which the decoder can be used. The returned -// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free(). -upb_src *upb_decoder_src(upb_decoder *d); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DECODER_H_ */ diff --git a/stream/upb_encoder.c b/stream/upb_encoder.c deleted file mode 100644 index 304a423..0000000 --- a/stream/upb_encoder.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_encoder.h" - -#include -#include "descriptor.h" - -/* Functions for calculating sizes of wire values. ****************************/ - -static size_t upb_v_uint64_t_size(uint64_t val) { -#ifdef __GNUC__ - int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. -#else - int high_bit = 0; - uint64_t tmp = val; - while(tmp >>= 1) high_bit++; -#endif - return val == 0 ? 1 : high_bit / 7 + 1; -} - -static size_t upb_v_int32_t_size(int32_t val) { - // v_uint32's are sign-extended to maintain wire compatibility with int64s. - return upb_v_uint64_t_size((int64_t)val); -} -static size_t upb_v_uint32_t_size(uint32_t val) { - return upb_v_uint64_t_size(val); -} -static size_t upb_f_uint64_t_size(uint64_t val) { - (void)val; // Length is independent of value. - return sizeof(uint64_t); -} -static size_t upb_f_uint32_t_size(uint32_t val) { - (void)val; // Length is independent of value. - return sizeof(uint32_t); -} - - -/* Functions to write wire values. ********************************************/ - -// Since we know in advance the longest that the value could be, we always make -// sure that our buffer is long enough. This saves us from having to perform -// bounds checks. - -// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT). -static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val) -{ - do { - uint8_t byte = val & 0x7f; - val >>= 7; - if(val) byte |= 0x80; - *buf++ = byte; - } while(val); - return buf; -} - -// Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits. -static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val) -{ - return upb_put_v_uint64_t(buf, val); -} - -// Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to -// maintain wire-compatibility with 64-bit signed integers. -static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val) -{ - return upb_put_v_uint64_t(buf, (int64_t)val); -} - -static void upb_put32(uint8_t *buf, uint32_t val) { - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24); -} - -// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val) -{ - uint8_t *uint32_end = buf + sizeof(uint32_t); -#if UPB_UNALIGNED_READS_OK - *(uint32_t*)buf = val; -#else - upb_put32(buf, val); -#endif - return uint32_end; -} - -// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val) -{ - uint8_t *uint64_end = buf + sizeof(uint64_t); -#if UPB_UNALIGNED_READS_OK - *(uint64_t*)buf = val; -#else - upb_put32(buf, (uint32_t)val); - upb_put32(buf, (uint32_t)(val >> 32)); -#endif - return uint64_end; -} - -/* Functions to write and calculate sizes for .proto values. ******************/ - -// Performs zig-zag encoding, which is used by sint32 and sint64. -static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } -static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } - -/* Use macros to define a set of two functions for each .proto type: - * - * // Converts and writes a .proto value into buf. "end" indicates the end - * // of the current available buffer (if the buffer does not contain enough - * // space UPB_STATUS_NEED_MORE_DATA is returned). On success, *outbuf will - * // point one past the data that was written. - * uint8_t *upb_put_INT32(uint8_t *buf, int32_t val); - * - * // Returns the number of bytes required to encode val. - * size_t upb_get_INT32_size(int32_t val); - * - * // Given a .proto value s (source) convert it to a wire value. - * uint32_t upb_vtowv_INT32(int32_t s); - */ - -#define VTOWV(type, wire_t, val_t) \ - static wire_t upb_vtowv_ ## type(val_t s) - -#define PUT(type, v_or_f, wire_t, val_t, member_name) \ - static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \ - wire_t tmp = upb_vtowv_ ## type(val); \ - return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \ - } - -#define T(type, v_or_f, wire_t, val_t, member_name) \ - static size_t upb_get_ ## type ## _size(val_t val) { \ - return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \ - } \ - VTOWV(type, wire_t, val_t); /* prototype for PUT below */ \ - PUT(type, v_or_f, wire_t, val_t, member_name) \ - VTOWV(type, wire_t, val_t) - -T(INT32, v, int32_t, int32_t, int32) { return (uint32_t)s; } -T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; } -T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } -T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } -T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); } -T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzenc_64(s); } -T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } -T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } -T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; } -T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; } -T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; } -T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; } -T(DOUBLE, f, uint64_t, double, _double) { - upb_value v; - v._double = s; - return v.uint64; -} -T(FLOAT, f, uint32_t, float, _float) { - upb_value v; - v._float = s; - return v.uint32; -} -#undef VTOWV -#undef PUT -#undef T - -static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name); - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: assert(false); return buf; - } -#undef CASE -} - -static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name); - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: assert(false); return 0; - } -#undef CASE -} - -static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num, - upb_wire_type_t wt) -{ - return upb_put_UINT32(buf, wt | (num << 3)); -} - -static uint32_t _upb_get_tag_size(upb_field_number_t num) -{ - return upb_get_UINT32_size(num << 3); -} - - -/* upb_sizebuilder ************************************************************/ - -struct upb_sizebuilder { - // Accumulating size for the current level. - uint32_t size; - - // Stack of sizes for our current nesting. - uint32_t stack[UPB_MAX_NESTING], *top; - - // Vector of sizes. - uint32_t *sizes; - int sizes_len; - int sizes_size; - - upb_status status; -}; - -// upb_sink callbacks. -static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f, - upb_value val, - upb_status *status) -{ - (void)status; - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - uint32_t size = 0; - size += _upb_get_tag_size(f->number); - size += _upb_get_value_size(f->type, val); - sb->size += size; - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - (void)status; - (void)str; // String data itself is not used. - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(start >= 0) { - uint32_t size = 0; - size += _upb_get_tag_size(f->number); - size += upb_get_UINT32_size(end - start); - sb->size += size; - } - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - (void)status; - (void)f; // Unused (we calculate tag size and delimiter in endcb). - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(f->type == UPB_TYPE(MESSAGE)) { - *sb->top = sb->size; - sb->top++; - sb->size = 0; - } else { - assert(f->type == UPB_TYPE(GROUP)); - sb->size += _upb_get_tag_size(f->number); - } - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - (void)status; - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(f->type == UPB_TYPE(MESSAGE)) { - sb->top--; - if(sb->sizes_len == sb->sizes_size) { - sb->sizes_size *= 2; - sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes)); - } - uint32_t child_size = sb->size; - uint32_t parent_size = *sb->top; - sb->sizes[sb->sizes_len++] = child_size; - // The size according to the parent includes the tag size and delimiter of - // the submessage. - parent_size += upb_get_UINT32_size(child_size); - parent_size += _upb_get_tag_size(f->number); - // Include size accumulated in parent before child began. - sb->size = child_size + parent_size; - } else { - assert(f->type == UPB_TYPE(GROUP)); - // As an optimization, we could just add this number twice in startcb, to - // avoid having to recalculate it. - sb->size += _upb_get_tag_size(f->number); - } - return UPB_SINK_CONTINUE; -} - -upb_sink_callbacks _upb_sizebuilder_sink_vtbl = { - _upb_sizebuilder_valuecb, - _upb_sizebuilder_strcb, - _upb_sizebuilder_startcb, - _upb_sizebuilder_endcb -}; - - -/* upb_sink callbacks *********************************************************/ - -struct upb_encoder { - upb_sink base; - //upb_bytesink *bytesink; - uint32_t *sizes; - int size_offset; -}; - - -// Within one callback we may need to encode up to two separate values. -#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2) - -static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf, - size_t len, upb_status *status) -{ - // TODO: conjure a upb_strptr that points to buf. - //upb_strptr ptr; - (void)s; - (void)buf; - (void)status; - size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr); - if(written < len) { - // TODO: mark to skip "written" bytes next time. - return UPB_SINK_STOP; - } else { - return UPB_SINK_CONTINUE; - } -} - -static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f, - upb_value val, upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - upb_wire_type_t wt = upb_types[f->type].expected_wire_type; - // TODO: handle packed encoding. - ptr = _upb_put_tag(ptr, f->number, wt); - ptr = upb_encode_value(ptr, f->type, val); - return _upb_encoder_push_buf(s, buf, ptr - buf, status); -} - -static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(start >= 0) { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); - ptr = upb_put_UINT32(ptr, end - start); - } - // TODO: properly handle partially consumed strings and partially supplied - // strings. - _upb_encoder_push_buf(s, buf, ptr - buf, status); - return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status); -} - -static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(f->type == UPB_TYPE(GROUP)) { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP); - } else { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); - ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]); - } - return _upb_encoder_push_buf(s, buf, ptr - buf, status); -} - -static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE; - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP); - return _upb_encoder_push_buf(s, buf, ptr - buf, status); -} - -upb_sink_callbacks _upb_encoder_sink_vtbl = { - _upb_encoder_valuecb, - _upb_encoder_strcb, - _upb_encoder_startcb, - _upb_encoder_endcb -}; - diff --git a/stream/upb_encoder.h b/stream/upb_encoder.h deleted file mode 100644 index e879b0b..0000000 --- a/stream/upb_encoder.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Implements a upb_sink that writes protobuf data to the binary wire format. - * - * For messages that have any submessages, the encoder needs a buffer - * containing the submessage sizes, so they can be properly written at the - * front of each message. Note that groups do *not* have this requirement. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_ENCODER_H_ -#define UPB_ENCODER_H_ - -#include "upb.h" -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_encoder ****************************************************************/ - -// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol -// buffer binary wire format. -struct upb_encoder; -typedef struct upb_encoder upb_encoder; - -upb_encoder *upb_encoder_new(upb_msgdef *md); -void upb_encoder_free(upb_encoder *e); - -// Resets the given upb_encoder such that is is ready to begin encoding, -// outputting data to "bytesink" (which must live until the encoder is -// reset or destroyed). -void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink); - -// Returns the upb_sink to which data can be written. The sink is invalidated -// when the encoder is reset or destroyed. Note that if the client wants to -// encode any length-delimited submessages it must first call -// upb_encoder_buildsizes() below. -upb_sink *upb_encoder_sink(upb_encoder *e); - -// Call prior to pushing any data with embedded submessages. "src" must yield -// exactly the same data as what will next be encoded, but in reverse order. -// The encoder iterates over this data in order to determine the sizes of the -// submessages. If any errors are returned by the upb_src, the status will -// be saved in *status. If the client is sure that the upb_src will not throw -// any errors, "status" may be NULL. -void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_ENCODER_H_ */ diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c deleted file mode 100644 index 8857677..0000000 --- a/stream/upb_stdio.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_stdio.h" - -#include -#include -#include "upb_string.h" - -// We can make this configurable if necessary. -#define BLOCK_SIZE 4096 - -struct upb_stdio { - upb_bytesrc bytesrc; - upb_bytesink bytesink; - FILE *file; -}; - -void upb_stdio_reset(upb_stdio *stdio, FILE* file) { - stdio->file = file; -} - - -/* upb_bytesrc methods ********************************************************/ - -static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, - upb_strlen_t count, upb_status *status) { - upb_stdio *stdio = (upb_stdio*)src; - assert(count > 0); - size_t read = fread(buf, 1, count, stdio->file); - if(read < (size_t)count) { - // Error or EOF. - if(feof(stdio->file)) { - upb_seterr(status, UPB_EOF, ""); - return read; - } else if(ferror(stdio->file)) { - upb_seterr(status, UPB_ERROR, "Error reading from stdio stream."); - return -1; - } - } - return read; -} - -static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, - upb_status *status) { - upb_strlen_t read = upb_stdio_read( - src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status); - if (read <= 0) return false; - upb_string_getrwbuf(str, read); - return true; -} - - -/* upb_bytesink methods *******************************************************/ - -upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { - upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); - upb_strlen_t len = upb_string_len(str); - upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); - if(written < len) { - upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); - return -1; - } - return written; -} - -upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, - const char *fmt, va_list args) { - upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); - upb_strlen_t written = vfprintf(stdio->file, fmt, args); - if (written < 0) { - upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); - return -1; - } - return written; -} - -upb_stdio *upb_stdio_new() { - static upb_bytesrc_vtbl bytesrc_vtbl = { - upb_stdio_read, - upb_stdio_getstr, - }; - - static upb_bytesink_vtbl bytesink_vtbl = { - NULL, - upb_stdio_putstr, - upb_stdio_vprintf - }; - - upb_stdio *stdio = malloc(sizeof(*stdio)); - upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); - upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); - return stdio; -} - -void upb_stdio_free(upb_stdio *stdio) { - free(stdio); -} - -upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; } -upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; } diff --git a/stream/upb_stdio.h b/stream/upb_stdio.h deleted file mode 100644 index fd71fdd..0000000 --- a/stream/upb_stdio.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file provides upb_bytesrc and upb_bytesink implementations for - * ANSI C stdio. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include -#include "upb_stream.h" - -#ifndef UPB_STDIO_H_ -#define UPB_STDIO_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -struct upb_stdio; -typedef struct upb_stdio upb_stdio; - -// Creation/deletion. -upb_stdio *upb_stdio_new(); -void upb_stdio_free(upb_stdio *stdio); - -// Reset/initialize the object for use. The src or sink will call -// fread()/fwrite()/etc. on the given FILE*. -void upb_stdio_reset(upb_stdio *stdio, FILE* file); - -// Gets a bytesrc or bytesink for the given stdio. The returned pointer is -// invalidated by upb_stdio_reset above. It is perfectly valid to get both -// a bytesrc and a bytesink for the same stdio if the FILE* is open for reading -// and writing. -upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio); -upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/stream/upb_strstream.c b/stream/upb_strstream.c deleted file mode 100644 index a7967d4..0000000 --- a/stream/upb_strstream.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_strstream.h" - -#include -#include "upb_string.h" - -static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf, - upb_strlen_t count, upb_status *status) { - upb_stringsrc *src = (upb_stringsrc*)_src; - if (src->offset == upb_string_len(src->str)) { - status->code = UPB_EOF; - return -1; - } else { - upb_strlen_t to_read = UPB_MIN(count, upb_string_len(src->str) - src->offset); - memcpy(buf, upb_string_getrobuf(src->str) + src->offset, to_read); - src->offset += to_read; - return to_read; - } -} - -static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str, - upb_status *status) { - upb_stringsrc *src = (upb_stringsrc*)_src; - if (src->offset == upb_string_len(src->str)) { - status->code = UPB_EOF; - return false; - } else { - upb_strlen_t len = upb_string_len(src->str) - src->offset; - upb_string_substr(str, src->str, src->offset, len); - src->offset += len; - assert(src->offset == upb_string_len(src->str)); - return true; - } -} - -void upb_stringsrc_init(upb_stringsrc *s) { - static upb_bytesrc_vtbl bytesrc_vtbl = { - upb_stringsrc_read, - upb_stringsrc_getstr, - }; - s->str = NULL; - upb_bytesrc_init(&s->bytesrc, &bytesrc_vtbl); -} - -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { - if (str != s->str) { - upb_string_unref(s->str); - s->str = upb_string_getref(str); - } - s->offset = 0; -} - -void upb_stringsrc_uninit(upb_stringsrc *s) { - upb_string_unref(s->str); -} - - -upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { - return &s->bytesrc; -} diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h deleted file mode 100644 index 1a8792b..0000000 --- a/stream/upb_strstream.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file contains upb_bytesrc and upb_bytesink implementations for - * upb_string. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_STRSTREAM_H -#define UPB_STRSTREAM_H - -#include "upb_stream.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_stringsrc **************************************************************/ - -struct _upb_stringsrc { - upb_bytesrc bytesrc; - upb_string *str; - upb_strlen_t offset; -}; -typedef struct _upb_stringsrc upb_stringsrc; - -// Create/free a stringsrc. -void upb_stringsrc_init(upb_stringsrc *s); -void upb_stringsrc_uninit(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// stringsrc will take a reference on the string, so the caller need not ensure -// that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); - -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); - - -/* upb_stringsink *************************************************************/ - -struct upb_stringsink; -typedef struct upb_stringsink upb_stringsink; - -// Create/free a stringsrc. -upb_stringsink *upb_stringsink_new(); -void upb_stringsink_free(upb_stringsink *s); - -// Gets a string containing the data that has been written to this stringsink. -// The caller does *not* own any references to this string. -upb_string *upb_stringsink_getstring(upb_stringsink *s); - -// Clears the internal string of accumulated data, resetting it to empty. -void upb_stringsink_reset(upb_stringsink *s); - -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesink *upb_stringsrc_bytesink(); - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c deleted file mode 100644 index 894a1ea..0000000 --- a/stream/upb_textprinter.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_textprinter.h" - -#include -#include -#include "upb_def.h" -#include "upb_string.h" - -struct _upb_textprinter { - upb_bytesink *bytesink; - int indent_depth; - bool single_line; - upb_status status; -}; - -#define CHECK(x) if ((x) < 0) goto err; - -static int upb_textprinter_indent(upb_textprinter *p) -{ - if(!p->single_line) - for(int i = 0; i < p->indent_depth; i++) - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); - return 0; -err: - return -1; -} - -static int upb_textprinter_endfield(upb_textprinter *p) { - if(p->single_line) { - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); - } else { - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status)); - } - return 0; -err: - return -1; -} - -static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, - upb_value val) { - upb_textprinter *p = _p; - upb_textprinter_indent(p); - CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); -#define CASE(fmtstr, member) \ - CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; - switch(f->type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", double); - case UPB_TYPE(FLOAT): - CASE("%0.f", float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - CASE("%" PRIu32, uint32); - case UPB_TYPE(ENUM): { - upb_enumdef *enum_def = upb_downcast_enumdef(f->def); - upb_string *enum_label = - upb_enumdef_iton(enum_def, upb_value_getint32(val)); - if (enum_label) { - // We found a corresponding string for this enum. Otherwise we fall - // through to the int32 code path. - CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status)); - break; - } - } - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(BOOL): - CASE("%hhu", bool); - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - // TODO: escaping. - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); - CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); - break; - } - upb_textprinter_endfield(p); - return UPB_CONTINUE; -err: - return UPB_BREAK; -} - -static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, - upb_handlers *delegate_to) { - (void)delegate_to; - upb_textprinter *p = _p; - upb_textprinter_indent(p); - CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT " {", UPB_STRARG(f->name))); - if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); - p->indent_depth++; - return UPB_CONTINUE; -err: - return UPB_BREAK; -} - -static upb_flow_t upb_textprinter_endsubmsg(void *_p) -{ - upb_textprinter *p = _p; - p->indent_depth--; - upb_textprinter_indent(p); - upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status); - upb_textprinter_endfield(p); - return UPB_CONTINUE; -} - -upb_textprinter *upb_textprinter_new() { - upb_textprinter *p = malloc(sizeof(*p)); - return p; -} - -void upb_textprinter_free(upb_textprinter *p) { - free(p); -} - -void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, - upb_bytesink *sink, bool single_line) { - static upb_handlerset handlerset = { - NULL, // startmsg - NULL, // endmsg - upb_textprinter_value, - upb_textprinter_startsubmsg, - upb_textprinter_endsubmsg, - }; - p->bytesink = sink; - p->single_line = single_line; - p->indent_depth = 0; - upb_register_handlerset(handlers, &handlerset); - upb_set_handler_closure(handlers, p, &p->status); -} diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h deleted file mode 100644 index a880626..0000000 --- a/stream/upb_textprinter.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_TEXT_H_ -#define UPB_TEXT_H_ - -#include "upb_stream.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct _upb_textprinter; -typedef struct _upb_textprinter upb_textprinter; - -upb_textprinter *upb_textprinter_new(); -void upb_textprinter_free(upb_textprinter *p); -void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, - upb_bytesink *sink, bool single_line); -void upb_textprinter_sethandlers(upb_textprinter *p, upb_handlers *h); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_TEXT_H_ */ -- cgit v1.2.3