From 462b26c1cc041a8fa26deb62cf12f1f351a5b2f6 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 8 Jul 2009 12:06:47 -0700 Subject: Directory restructuring. --- src/upb.h | 136 ++++++++++++++++++ src/upb_context.c | 304 +++++++++++++++++++++++++++++++++++++++ src/upb_context.h | 106 ++++++++++++++ src/upb_enum.c | 31 ++++ src/upb_enum.h | 41 ++++++ src/upb_inlinedefs.c | 19 +++ src/upb_msg.c | 379 ++++++++++++++++++++++++++++++++++++++++++++++++ src/upb_msg.h | 369 +++++++++++++++++++++++++++++++++++++++++++++++ src/upb_parse.c | 383 +++++++++++++++++++++++++++++++++++++++++++++++++ src/upb_parse.h | 158 ++++++++++++++++++++ src/upb_string.c | 24 ++++ src/upb_string.h | 82 +++++++++++ src/upb_table.c | 398 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/upb_table.h | 123 ++++++++++++++++ 14 files changed, 2553 insertions(+) create mode 100644 src/upb.h create mode 100644 src/upb_context.c create mode 100644 src/upb_context.h create mode 100644 src/upb_enum.c create mode 100644 src/upb_enum.h create mode 100644 src/upb_inlinedefs.c create mode 100644 src/upb_msg.c create mode 100644 src/upb_msg.h create mode 100644 src/upb_parse.c create mode 100644 src/upb_parse.h create mode 100644 src/upb_string.c create mode 100644 src/upb_string.h create mode 100644 src/upb_table.c create mode 100644 src/upb_table.h (limited to 'src') diff --git a/src/upb.h b/src/upb.h new file mode 100644 index 0000000..64f2c54 --- /dev/null +++ b/src/upb.h @@ -0,0 +1,136 @@ +/* + * upb - a minimalist implementation of protocol buffers. + + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file contains shared definitions that are widely used across upb. + */ + +#ifndef UPB_H_ +#define UPB_H_ + +#include +#include +#include /* for size_t. */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* inline if possible, emit standalone code if required. */ +#ifndef INLINE +#define INLINE static inline +#endif + +/* The maximum that any submessages can be nested. Matches proto2's limit. */ +#define UPB_MAX_NESTING 64 + +/* The maximum number of fields that any one .proto type can have. */ +#define UPB_MAX_FIELDS (1<<16) + +/* Nested type names are separated by periods. */ +#define UPB_SYMBOL_SEPARATOR '.' +#define UPB_SYMBOL_MAX_LENGTH 256 + +#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) + +INLINE uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; } + +/* Value type as defined in a .proto file. The values of this are defined by + * google_protobuf_FieldDescriptorProto_Type (from descriptor.proto). + * Note that descriptor.proto reserves "0" for errors, and we use it to + * represent exceptional circumstances. */ +typedef uint8_t upb_field_type_t; + +/* Label (optional, repeated, required) as defined in a .proto file. The values + * of this are defined by google.protobuf.FieldDescriptorProto.Label (from + * descriptor.proto). */ +typedef uint8_t upb_label_t; + +struct upb_type_info { + uint8_t align; + uint8_t size; + uint8_t expected_wire_type; +}; + +/* Contains information for all .proto types. Indexed by upb_field_type_t. */ +extern struct upb_type_info upb_type_info[]; + +/* A pointer to a .proto value. The owner must have an out-of-band way of + * knowing the type, so it knows which union member to use. */ +union upb_value { + double _double; + float _float; + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + bool _bool; + struct upb_string **string; + struct upb_array **array; + void *message; +}; + +union upb_value_ptr { + double *_double; + float *_float; + int32_t *int32; + int64_t *int64; + uint32_t *uint32; + uint64_t *uint64; + bool *_bool; + struct upb_string **string; + struct upb_array **array; + void **message; + void *_void; +}; + +union upb_symbol_ref { + struct upb_msg *msg; + struct upb_enum *_enum; + struct upb_svc *svc; +}; + +/* The number of a field, eg. "optional string foo = 3". */ +typedef int32_t upb_field_number_t; + +/* Status codes used as a return value. Codes >0 are not fatal and can be + * resumed. */ +typedef enum upb_status { + UPB_STATUS_OK = 0, + + // The input byte stream ended in the middle of a record. + UPB_STATUS_NEED_MORE_DATA = 1, + + // The user value callback opted to stop parsing. + UPB_STATUS_USER_CANCELLED = 2, + + // A varint did not terminate before hitting 64 bits. + UPB_ERROR_UNTERMINATED_VARINT = -1, + + // A submessage or packed array ended in the middle of data. + UPB_ERROR_BAD_SUBMESSAGE_END = -2, + + // Input was nested more than UPB_MAX_NESTING deep. + UPB_ERROR_STACK_OVERFLOW = -3, + + // The input data caused the pb's offset (a size_t) to overflow. + UPB_ERROR_OVERFLOW = -4, + + // An "end group" tag was encountered in an inappropriate place. + UPB_ERROR_SPURIOUS_END_GROUP = -5, + + UPB_ERROR_ILLEGAL = -6 +} upb_status_t; + +#define UPB_CHECK(func) do { \ + upb_status_t status = func; \ + if(status != UPB_STATUS_OK) return status; \ + } while (0) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_H_ */ diff --git a/src/upb_context.c b/src/upb_context.c new file mode 100644 index 0000000..d857d16 --- /dev/null +++ b/src/upb_context.c @@ -0,0 +1,304 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include +#include +#include "descriptor.h" +#include "upb_context.h" +#include "upb_enum.h" +#include "upb_msg.h" + +/* Search for a character in a string, in reverse. */ +static int memrchr(char *data, char c, size_t len) +{ + int off = len-1; + while(off > 0 && data[off] != c) --off; + return off; +} + +bool addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, + google_protobuf_FileDescriptorProto *fd); + +bool upb_context_init(struct upb_context *c) +{ + upb_strtable_init(&c->symtab, 16, sizeof(struct upb_symtab_entry)); + upb_strtable_init(&c->psymtab, 16, sizeof(struct upb_symtab_entry)); + /* Add all the types in descriptor.proto so we can parse descriptors. */ + if(!addfd(&c->psymtab, &c->symtab, &google_protobuf_filedescriptor)) { + assert(false); + return false; /* Indicates that upb is buggy or corrupt. */ + } + struct upb_string name = UPB_STRLIT("google.protobuf.FileDescriptorSet"); + struct upb_symtab_entry *e = upb_strtable_lookup(&c->psymtab, &name); + assert(e); + c->fds_msg = e->ref.msg; + c->fds_size = 16; + c->fds_len = 0; + c->fds = malloc(sizeof(*c->fds)); + return true; +} + +static void free_symtab(struct upb_strtable *t) +{ + struct upb_symtab_entry *e = upb_strtable_begin(t); + for(; e; e = upb_strtable_next(t, &e->e)) { + switch(e->type) { + case UPB_SYM_MESSAGE: upb_msg_free(e->ref.msg); break; + case UPB_SYM_ENUM: upb_enum_free(e->ref._enum); break; + default: break; /* TODO */ + } + free(e->ref.msg); /* The pointer is the same for all. */ + free(e->e.key.ptr); + } + upb_strtable_free(t); +} + +void upb_context_free(struct upb_context *c) +{ + free_symtab(&c->symtab); + for(size_t i = 0; i < c->fds_len; i++) + upb_msgdata_free(c->fds[i], c->fds_msg, true); + free_symtab(&c->psymtab); + free(c->fds); +} + +struct upb_symtab_entry *upb_context_lookup(struct upb_context *c, + struct upb_string *symbol) +{ + return upb_strtable_lookup(&c->symtab, symbol); +} + +/* Given a symbol and the base symbol inside which it is defined, find the + * symbol's definition in t. */ +static struct upb_symtab_entry *resolve(struct upb_strtable *t, + struct upb_string *base, + struct upb_string *symbol) +{ + if(base->byte_len + symbol->byte_len + 1 >= UPB_SYMBOL_MAX_LENGTH || + symbol->byte_len == 0) return NULL; + + if(symbol->ptr[0] == UPB_SYMBOL_SEPARATOR) { + /* Symbols starting with '.' are absolute, so we do a single lookup. */ + struct upb_string sym_str = {.ptr = symbol->ptr+1, + .byte_len = symbol->byte_len-1}; + return upb_strtable_lookup(t, &sym_str); + } else { + /* Remove components from base until we find an entry or run out. */ + char sym[UPB_SYMBOL_MAX_LENGTH+1]; + struct upb_string sym_str = {.ptr = sym}; + int baselen = base->byte_len; + while(1) { + /* sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol */ + memcpy(sym, base->ptr, baselen); + sym[baselen] = UPB_SYMBOL_SEPARATOR; + memcpy(sym + baselen + 1, symbol->ptr, symbol->byte_len); + sym_str.byte_len = baselen + symbol->byte_len + 1; + + struct upb_symtab_entry *e = upb_strtable_lookup(t, &sym_str); + if (e) return e; + else if(baselen == 0) return NULL; /* No more scopes to try. */ + + baselen = memrchr(base->ptr, UPB_SYMBOL_SEPARATOR, baselen); + } + } +} + +/* Tries to resolve a symbol in two different tables. */ +union upb_symbol_ref resolve2(struct upb_strtable *t1, struct upb_strtable *t2, + struct upb_string *base, struct upb_string *sym, + enum upb_symbol_type expected_type) { + union upb_symbol_ref nullref = {.msg = NULL}; + struct upb_symtab_entry *e = resolve(t1, base, sym); + if(e == NULL) e = resolve(t2, base, sym); + + if(e && e->type == expected_type) return e->ref; + else return nullref; +} + + +struct upb_symtab_entry *upb_context_resolve(struct upb_context *c, + struct upb_string *base, + struct upb_string *symbol) { + return resolve(&c->symtab, base, symbol); +} + +/* Joins strings together, for example: + * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" + * join("", "Baz") -> "Baz" + * Caller owns the returned string and must free it. */ +static struct upb_string join(struct upb_string *base, struct upb_string *name) { + size_t len = base->byte_len + name->byte_len; + if(base->byte_len > 0) len++; /* For the separator. */ + struct upb_string joined = {.byte_len=len, .ptr=malloc(len)}; + if(base->byte_len > 0) { + /* nested_base = base + '.' + d->name */ + memcpy(joined.ptr, base->ptr, base->byte_len); + joined.ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR; + memcpy(&joined.ptr[base->byte_len+1], name->ptr, name->byte_len); + } else { + memcpy(joined.ptr, name->ptr, name->byte_len); + } + return joined; +} + +static bool insert_enum(struct upb_strtable *t, + google_protobuf_EnumDescriptorProto *ed, + struct upb_string *base) +{ + // TODO: re-enable when compiler sets this flag + //if(!ed->set_flags.has.name) return false; + + /* We own this and must free it on destruct. */ + struct upb_string fqname = join(base, ed->name); + + /* Redefinition within a FileDescriptorProto is not allowed. */ + if(upb_strtable_lookup(t, &fqname)) { + free(fqname.ptr); + return false; + } + + struct upb_symtab_entry e; + e.e.key = fqname; + e.type = UPB_SYM_ENUM; + e.ref._enum = malloc(sizeof(*e.ref._enum)); + upb_enum_init(e.ref._enum, ed); + upb_strtable_insert(t, &e.e); + + return true; +} + +static bool insert_message(struct upb_strtable *t, + google_protobuf_DescriptorProto *d, + struct upb_string *base) +{ + /* TODO: re-enable when compiler sets this flag. */ + //if(!d->set_flags.has.name) return false; + + /* We own this and must free it on destruct. */ + struct upb_string fqname = join(base, d->name); + + /* Redefinition within a FileDescriptorProto is not allowed. */ + if(upb_strtable_lookup(t, d->name)) { + free(fqname.ptr); + return false; + } + + struct upb_symtab_entry e; + e.e.key = fqname; + e.type = UPB_SYM_MESSAGE; + e.ref.msg = malloc(sizeof(*e.ref.msg)); + if(!upb_msg_init(e.ref.msg, d)) { + free(fqname.ptr); + return false; + } + upb_strtable_insert(t, &e.e); + + /* Add nested messages and enums. */ + //if(d->set_flags.has.nested_type) (re-enable when protoc sets) + if(d->nested_type) + for(unsigned int i = 0; i < d->nested_type->len; i++) + if(!insert_message(t, d->nested_type->elements[i], &fqname)) + return false; + + //if(d->set_flags.has.enum_type) + if(d->enum_type) + for(unsigned int i = 0; i < d->enum_type->len; i++) + if(!insert_enum(t, d->enum_type->elements[i], &fqname)) + return false; + + return true; +} + +bool addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, + google_protobuf_FileDescriptorProto *fd) +{ + struct upb_string package = {.byte_len=0}; + if(fd->set_flags.has.package) package = *fd->package; + + if(fd->set_flags.has.message_type) + for(unsigned int i = 0; i < fd->message_type->len; i++) + if(!insert_message(addto, fd->message_type->elements[i], &package)) + return false; + + if(fd->set_flags.has.enum_type) + for(unsigned int i = 0; i < fd->enum_type->len; i++) + if(!insert_enum(addto, fd->enum_type->elements[i], &package)) + return false; + + /* TODO: handle extensions and services. */ + + /* Attempt to resolve all references. */ + struct upb_symtab_entry *e; + for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { + if(upb_strtable_lookup(existingdefs, &e->e.key)) + return false; /* Redefinition prohibited. */ + if(e->type == UPB_SYM_MESSAGE) { + struct upb_msg *m = e->ref.msg; + for(unsigned int i = 0; i < m->num_fields; i++) { + struct upb_msg_field *f = &m->fields[i]; + google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; + union upb_symbol_ref ref; + if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE || + fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) + ref = resolve2(existingdefs, addto, &e->e.key, fd->type_name, + UPB_SYM_MESSAGE); + else if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM) + ref = resolve2(existingdefs, addto, &e->e.key, fd->type_name, + UPB_SYM_ENUM); + else + continue; /* No resolving necessary. */ + if(!ref.msg) return false; /* Ref. to undefined symbol. */ + upb_msg_ref(m, f, ref); + } + } + } + return true; +} + +bool upb_context_addfd(struct upb_context *c, + google_protobuf_FileDescriptorProto *fd) +{ + struct upb_strtable tmp; + if(!addfd(&tmp, &c->symtab, fd)) { + free_symtab(&tmp); + return false; + } + upb_strtable_free(&tmp); + return true; +} + +bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) { + google_protobuf_FileDescriptorSet *fds = + upb_alloc_and_parse(c->fds_msg, fds_str, true); + if(!fds) return false; + + if(fds->set_flags.has.file) { + /* Insert new symbols into a temporary table until we have verified that + * the descriptor is valid. */ + struct upb_strtable tmp; + upb_strtable_init(&tmp, 0, sizeof(struct upb_symtab_entry)); + for(uint32_t i = 0; i < fds->file->len; i++) { + if(!addfd(&tmp, &c->symtab, fds->file->elements[i])) { + printf("Not added successfully!\n"); + free_symtab(&tmp); + return false; + } + } + /* Everything was successfully added, copy from the tmp symtable. */ + struct upb_symtab_entry *e; + for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) + upb_strtable_insert(&c->symtab, &e->e); + upb_strtable_free(&tmp); + } + + /* We own fds now, need to keep a ref so we can free it later. */ + if(c->fds_size == c->fds_len) { + c->fds_size *= 2; + c->fds = realloc(c->fds, c->fds_size); + } + c->fds[c->fds_len++] = fds; + return true; +} diff --git a/src/upb_context.h b/src/upb_context.h new file mode 100644 index 0000000..4ddaed6 --- /dev/null +++ b/src/upb_context.h @@ -0,0 +1,106 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * A context represents a namespace of proto definitions, sort of like an + * interpreter's symbol table. It is empty when first constructed. Clients + * add definitions to the context by supplying unserialized or serialized + * descriptors (as defined in descriptor.proto). + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_CONTEXT_H_ +#define UPB_CONTEXT_H_ + +#include "upb.h" +#include "upb_table.h" + +struct google_protobuf_FileDescriptorProto; + +#ifdef __cplusplus +extern "C" { +#endif + +/* Definitions. ***************************************************************/ + +/* The symbol table maps names to various kinds of symbols. */ +enum upb_symbol_type { + UPB_SYM_MESSAGE, + UPB_SYM_ENUM, + UPB_SYM_SERVICE, + UPB_SYM_EXTENSION +}; + +struct upb_symtab_entry { + struct upb_strtable_entry e; + enum upb_symbol_type type; + union upb_symbol_ref ref; +}; + +struct upb_context { + struct upb_strtable symtab; /* The context's symbol table. */ + struct upb_strtable psymtab; /* Private symbols, for internal use. */ + struct upb_msg *fds_msg; /* This is in psymtab, ptr here for convenience. */ + + /* A list of the FileDescriptorProtos we own (from having parsed them + * ourselves) and must free on destruction. */ + size_t fds_size, fds_len; + struct google_protobuf_FileDescriptorSet **fds; +}; + +/* Initializes and frees a upb_context, respectively. */ +bool upb_context_init(struct upb_context *c); +void upb_context_free(struct upb_context *c); + +/* Looking up symbols. ********************************************************/ + +/* Resolves the given symbol using the rules described in descriptor.proto, + * namely: + * + * If the name starts with a '.', it is fully-qualified. Otherwise, C++-like + * scoping rules are used to find the type (i.e. first the nested types + * within this message are searched, then within the parent, on up to the + * root namespace). + * + * Returns NULL if the symbol has not been defined. */ +struct upb_symtab_entry *upb_context_resolve(struct upb_context *c, + struct upb_string *base, + struct upb_string *symbol); + +/* Find an entry in the symbol table with this exact name. Returns NULL if no + * such symbol name exists. */ +struct upb_symtab_entry *upb_context_lookup(struct upb_context *c, + struct upb_string *symbol); + +INLINE struct upb_symtab_entry *upb_context_symbegin(struct upb_context *c) { + return (struct upb_symtab_entry*)upb_strtable_begin(&c->symtab); +} + +INLINE struct upb_symtab_entry *upb_context_symnext( + struct upb_context *c, struct upb_symtab_entry *cur) { + return (struct upb_symtab_entry*)upb_strtable_next(&c->symtab, &cur->e); +} + +/* Adding symbols. ************************************************************/ + +/* Adds the definitions in the given file descriptor to this context. All + * types that are referenced from fd must have previously been defined (or be + * defined in fd). fd may not attempt to define any names that are already + * defined in this context. + * + * Caller retains ownership of fd, but the context will contain references to + * it, so it must outlive the context. + * + * upb_context_addfd only returns true or false; it does not give any hint + * about what happened in the case of failure. This is because the descriptor + * is expected to have been validated at the time it was parsed/generated. */ +bool upb_context_addfd(struct upb_context *c, + struct google_protobuf_FileDescriptorProto *fd); + +bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_PARSE_H_ */ diff --git a/src/upb_enum.c b/src/upb_enum.c new file mode 100644 index 0000000..b599c9b --- /dev/null +++ b/src/upb_enum.c @@ -0,0 +1,31 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "descriptor.h" +#include "upb_enum.h" + +void upb_enum_init(struct upb_enum *e, + struct google_protobuf_EnumDescriptorProto *ed) { + int num_values = ed->set_flags.has.value ? ed->value->len : 0; + e->descriptor = ed; + upb_strtable_init(&e->nametoint, num_values, sizeof(struct upb_enum_ntoi_entry)); + upb_inttable_init(&e->inttoname, num_values, sizeof(struct upb_enum_iton_entry)); + + for(int i = 0; i < num_values; i++) { + google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i]; + struct upb_enum_ntoi_entry ntoi_entry = {.e = {.key = *value->name}, + .value = value->number}; + struct upb_enum_iton_entry iton_entry = {.e = {.key = value->number}, + .string = value->name}; + upb_strtable_insert(&e->nametoint, &ntoi_entry.e); + upb_inttable_insert(&e->inttoname, &iton_entry.e); + } +} + +void upb_enum_free(struct upb_enum *e) { + upb_strtable_free(&e->nametoint); + upb_inttable_free(&e->inttoname); +} diff --git a/src/upb_enum.h b/src/upb_enum.h new file mode 100644 index 0000000..9fea3a4 --- /dev/null +++ b/src/upb_enum.h @@ -0,0 +1,41 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * upb_enum is a simple object that allows run-time reflection over the values + * defined within an enum. */ + +#ifndef UPB_ENUM_H_ +#define UPB_ENUM_H_ + +#include +#include "upb_table.h" + +/* Forward declaration from descriptor.h. */ +struct google_protobuf_EnumDescriptorProto; +struct google_protobuf_EnumValueDescriptorProto; + +struct upb_enum { + struct google_protobuf_EnumDescriptorProto *descriptor; + struct upb_strtable nametoint; + struct upb_inttable inttoname; +}; + +struct upb_enum_ntoi_entry { + struct upb_strtable_entry e; + uint32_t value; +}; + +struct upb_enum_iton_entry { + struct upb_inttable_entry e; + struct upb_string *string; +}; + +/* Initializes and frees an enum, respectively. Caller retains ownership of + * ed, but it must outlive e. */ +void upb_enum_init(struct upb_enum *e, + struct google_protobuf_EnumDescriptorProto *ed); +void upb_enum_free(struct upb_enum *e); + +#endif /* UPB_ENUM_H_ */ diff --git a/src/upb_inlinedefs.c b/src/upb_inlinedefs.c new file mode 100644 index 0000000..60863c4 --- /dev/null +++ b/src/upb_inlinedefs.c @@ -0,0 +1,19 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file, if compiled, will contain standalone (non-inlined) versions of + * all inline functions defined in header files. We don't generally use this + * file since we use "static inline" for inline functions (which will put a + * standalone version of the function in any .o file that needs it, but + * compiling this file and dumping the object file will let us inspect how + * inline functions are compiled, so we keep it around. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#define INLINE +#include "upb_context.h" +#include "upb_enum.h" +#include "upb_msg.h" +#include "upb_parse.h" +#include "upb_table.h" diff --git a/src/upb_msg.c b/src/upb_msg.c new file mode 100644 index 0000000..47a8662 --- /dev/null +++ b/src/upb_msg.c @@ -0,0 +1,379 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + */ + +#include +#include +#include "descriptor.h" +#include "upb_msg.h" +#include "upb_parse.h" + +/* Rounds p up to the next multiple of t. */ +#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) + +static int div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} + +/* Callback for sorting fields. */ +static int compare_fields(const void *e1, const void *e2) { + const google_protobuf_FieldDescriptorProto *fd1 = *(void**)e1; + const google_protobuf_FieldDescriptorProto *fd2 = *(void**)e2; + /* Required fields go before non-required. */ + bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; + if(req1 != req2) { + return req2 - req1; + } else { + /* Within required and non-required field lists, list in number order. + * TODO: consider ordering by data size to reduce padding. */ + return fd1->number - fd2->number; + } +} + +bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d) +{ + /* TODO: more complete validation. + * TODO: re-enable this check when we properly set this flag. */ + //if(!d->set_flags.has.field) return false; + + upb_inttable_init(&m->fields_by_num, d->field->len, + sizeof(struct upb_fieldsbynum_entry)); + upb_strtable_init(&m->fields_by_name, d->field->len, + sizeof(struct upb_fieldsbyname_entry)); + + m->descriptor = d; + m->num_fields = d->field->len; + m->set_flags_bytes = div_round_up(m->num_fields, 8); + /* These are incremented in the loop. */ + m->num_required_fields = 0; + m->size = m->set_flags_bytes; + + m->fields = malloc(sizeof(*m->fields) * m->num_fields); + m->field_descriptors = malloc(sizeof(*m->field_descriptors) * m->num_fields); + for(unsigned int i = 0; i < m->num_fields; i++) { + /* We count on the caller to keep this pointer alive. */ + m->field_descriptors[i] = d->field->elements[i]; + } + /* TODO: re-enable proper sorting once the compiler is sorted out. */ + //qsort(m->field_descriptors, m->num_fields, sizeof(void*), compare_fields); + + size_t max_align = 0; + for(unsigned int i = 0; i < m->num_fields; i++) { + struct upb_msg_field *f = &m->fields[i]; + google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; + struct upb_type_info *type_info = &upb_type_info[fd->type]; + + /* General alignment rules are: each member must be at an address that is a + * multiple of that type's alignment. Also, the size of the structure as + * a whole must be a multiple of the greatest alignment of any member. */ + f->field_index = i; + f->byte_offset = ALIGN_UP(m->size, type_info->align); + f->type = fd->type; + f->label = fd->label; + m->size = f->byte_offset + type_info->size; + max_align = max(max_align, type_info->align); + if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) + m->num_required_fields++; + + /* Insert into the tables. Note that f->ref will be uninitialized, even in + * the tables' copies of *f, which is why we must update them separately + * in upb_msg_ref() below. */ + struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f}; + struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f}; + upb_inttable_insert(&m->fields_by_num, &nument.e); + upb_strtable_insert(&m->fields_by_name, &strent.e); + } + + m->size = ALIGN_UP(m->size, max_align); + return true; +} + +void upb_msg_free(struct upb_msg *m) +{ + upb_inttable_free(&m->fields_by_num); + upb_strtable_free(&m->fields_by_name); + free(m->fields); + free(m->field_descriptors); +} + +void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, + union upb_symbol_ref ref) { + struct google_protobuf_FieldDescriptorProto *d = + upb_msg_field_descriptor(f, m); + struct upb_fieldsbynum_entry *int_e = upb_inttable_lookup( + &m->fields_by_num, d->number, sizeof(struct upb_fieldsbynum_entry)); + struct upb_fieldsbyname_entry *str_e = + upb_strtable_lookup(&m->fields_by_name, d->name); + assert(int_e && str_e); + f->ref = ref; + int_e->f.ref = ref; + str_e->f.ref = ref; +} + +/* Memory management *********************************************************/ + +/* Our memory management scheme is as follows: + * + * All pointers to dynamic memory (strings, arrays, and submessages) are + * expected to be good pointers if they are non-zero, *regardless* of whether + * that field's bit is set! That way we can reuse the memory even if the field + * is unset and then set later. */ + +/* For our memory-managed strings and arrays we store extra information + * (compared to a plain upb_string or upb_array). But the data starts with + * a upb_string and upb_array, so we can overlay onto the regular types. */ +struct mm_upb_string { + struct upb_string s; + /* Track the allocated size, so we know when we need to reallocate. */ + uint32_t size; + /* Our allocated data. Stored separately so that clients can point s.ptr to + * a referenced string, but we can reuse this data later. */ + char *data; +}; + +struct mm_upb_array { + struct upb_array a; + /* Track the allocated size, so we know when we need to reallocate. */ + uint32_t size; +}; + +static uint32_t round_up_to_pow2(uint32_t v) +{ + /* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +void *upb_msgdata_new(struct upb_msg *m) +{ + void *msg = malloc(m->size); + memset(msg, 0, m->size); /* Clear all pointers, values, and set bits. */ + return msg; +} + +static void free_value(union upb_value_ptr p, struct upb_msg_field *f, + bool free_submsgs) +{ + switch(f->type) { + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: { + struct mm_upb_string *mm_str = (void*)*p.string; + if(mm_str) { + free(mm_str->data); + free(mm_str); + } + break; + } + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP: + if(free_submsgs) upb_msgdata_free(*p.message, f->ref.msg, free_submsgs); + break; + default: break; /* For non-dynamic types, do nothing. */ + } +} + +void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs) +{ + if(!data) return; /* A very free-like thing to do. */ + for(unsigned int i = 0; i < m->num_fields; i++) { + struct upb_msg_field *f = &m->fields[i]; + union upb_value_ptr p = upb_msg_getptr(data, f); + if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) { + if(*p.array) { + for(uint32_t j = 0; j < (*p.array)->len; j++) + free_value(upb_array_getelementptr(*p.array, j, f->type), + f, free_submsgs); + free((*p.array)->elements._void); + free(*p.array); + } + } else { + free_value(p, f, free_submsgs); + } + } + free(data); +} + +void upb_msg_reuse_str(struct upb_string **str, uint32_t size) +{ + if(!*str) { + *str = malloc(sizeof(struct mm_upb_string)); + memset(*str, 0, sizeof(struct mm_upb_string)); + } + struct mm_upb_string *s = (void*)*str; + if(s->size < size) { + size = max(16, round_up_to_pow2(size)); + s->data = realloc(s->data, size); + s->size = size; + } + s->s.ptr = s->data; +} + +void upb_msg_reuse_array(struct upb_array **arr, uint32_t size, upb_field_type_t t) +{ + if(!*arr) { + *arr = malloc(sizeof(struct mm_upb_array)); + memset(*arr, 0, sizeof(struct mm_upb_array)); + } + struct mm_upb_array *a = (void*)*arr; + if(a->size < size) { + size = max(16, round_up_to_pow2(size)); + size_t type_size = upb_type_info[t].size; + a->a.elements._void = realloc(a->a.elements._void, size * type_size); + /* Zero any newly initialized memory. */ + memset(UPB_INDEX(a->a.elements._void, a->size, type_size), 0, + (size - a->size) * type_size); + a->size = size; + } +} + +void upb_msg_reuse_strref(struct upb_string **str) { upb_msg_reuse_str(str, 0); } + +void upb_msg_reuse_submsg(void **msg, struct upb_msg *m) +{ + if(!*msg) *msg = upb_msgdata_new(m); + else upb_msg_clear(*msg, m); /* Clears set bits, leaves pointers. */ +} + +/* Serialization/Deserialization. ********************************************/ + +/* We use this as our "user_data" for each frame of the parsing stack. */ +struct parse_frame_data { + struct upb_msg *m; + void *data; +}; + +static void set_frame_data(struct upb_parse_state *s, struct upb_msg *m, + void *data) +{ + struct parse_frame_data *frame = (void*)&s->top->user_data; + frame->m = m; + frame->data = data; +} + +static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag, + void **user_field_desc) +{ + struct parse_frame_data *frame = (void*)&s->top->user_data; + struct upb_msg_field *f = upb_msg_fieldbynum(frame->m, tag->field_number); + if(!f || !upb_check_type(tag->wire_type, f->type)) + return 0; /* Skip unknown or fields of the wrong type. */ + *user_field_desc = f; + return f->type; +} + +/* Returns a pointer to where the next value for field "f" should be stored, + * taking into account whether f is an array that may need to be reallocatd. */ +static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f) +{ + union upb_value_ptr p = upb_msg_getptr(data, f); + if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) { + size_t len = upb_msg_is_set(data, f) ? (*p.array)->len : 0; + upb_msg_reuse_array(p.array, len+1, f->type); + (*p.array)->len = len + 1; + assert(p._void); + p = upb_array_getelementptr(*p.array, len, f->type); + assert(p._void); + } + upb_msg_set(data, f); + assert(p._void); + return p; +} + +static upb_status_t value_cb(struct upb_parse_state *s, void **buf, void *end, + void *user_field_desc) +{ + struct parse_frame_data *frame = (void*)&s->top->user_data; + struct upb_msg_field *f = user_field_desc; + union upb_value_ptr p = get_value_ptr(frame->data, f); + UPB_CHECK(upb_parse_value(buf, end, f->type, p)); + return UPB_STATUS_OK; +} + +static upb_status_t str_cb(struct upb_parse_state *_s, struct upb_string *str, + void *user_field_desc) +{ + struct upb_msg_parse_state *s = (void*)_s; + struct parse_frame_data *frame = (void*)&s->s.top->user_data; + struct upb_msg_field *f = user_field_desc; + union upb_value_ptr p = get_value_ptr(frame->data, f); + if(s->byref) { + upb_msg_reuse_strref(p.string); + **p.string = *str; + } else { + upb_msg_reuse_str(p.string, str->byte_len); + upb_strcpy(*p.string, str); + } + return UPB_STATUS_OK; +} + +static void submsg_start_cb(struct upb_parse_state *_s, void *user_field_desc) +{ + struct upb_msg_parse_state *s = (void*)_s; + struct upb_msg_field *f = user_field_desc; + struct parse_frame_data *frame = (void*)&s->s.top->user_data; + // TODO: find a non-hacky way to get a pointer to the old frame. + struct parse_frame_data *oldframe = (void*)((char*)s->s.top - s->s.udata_size); + union upb_value_ptr p = get_value_ptr(oldframe->data, f); + assert(f->ref.msg); + upb_msg_reuse_submsg(p.message, f->ref.msg); + set_frame_data(&s->s, f->ref.msg, *p.message); + if(!s->merge) upb_msg_clear(frame->data, f->ref.msg); +} + +void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *msg, + struct upb_msg *m, bool merge, bool byref) +{ + upb_parse_reset(&s->s); + s->merge = merge; + s->byref = byref; + if(!merge && msg == NULL) msg = upb_msgdata_new(m); + upb_msg_clear(msg, m); + set_frame_data(&s->s, m, msg); + s->s.tag_cb = tag_cb; + s->s.value_cb = value_cb; + s->s.str_cb = str_cb; + s->s.submsg_start_cb = submsg_start_cb; +} + +void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg, + struct upb_msg *m, bool merge, bool byref) +{ + upb_parse_init(&s->s, sizeof(struct parse_frame_data)); + upb_msg_parse_reset(s, msg, m, merge, byref); +} + +void upb_msg_parse_free(struct upb_msg_parse_state *s) +{ + upb_parse_free(&s->s); +} + +upb_status_t upb_msg_parse(struct upb_msg_parse_state *s, + void *data, size_t len, size_t *read) +{ + return upb_parse(&s->s, data, len, read); +} + +void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *str, bool byref) +{ + struct upb_msg_parse_state s; + void *msg = upb_msgdata_new(m); + upb_msg_parse_init(&s, msg, m, false, byref); + size_t read; + upb_status_t status = upb_msg_parse(&s, str->ptr, str->byte_len, &read); + upb_msg_parse_free(&s); + if(status == UPB_STATUS_OK && read == str->byte_len) { + return msg; + } else { + upb_msg_free(msg); + return NULL; + } +} diff --git a/src/upb_msg.h b/src/upb_msg.h new file mode 100644 index 0000000..8910505 --- /dev/null +++ b/src/upb_msg.h @@ -0,0 +1,369 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * A upb_msg provides a full description of a message as defined in a .proto + * file. It supports many features and operations for dealing with proto + * messages: + * - reflection over .proto types at runtime (list fields, get names, etc). + * - an in-memory byte-level format for efficiently storing and accessing msgs. + * - serializing and deserializing from the in-memory format to a protobuf. + * - optional memory management for handling strings, arrays, and submessages. + * + * Throughout this file, the following convention is used: + * - "struct upb_msg *m" describes a message type (name, list of fields, etc). + * - "void *data" is an actual message stored using the in-memory format. + * + * The in-memory format is very much like a C struct that you can define at + * run-time, but also supports reflection. Like C structs it supports + * offset-based access, as opposed to the much slower name-based lookup. The + * format stores both the values themselves and bits describing whether each + * field is set or not. For example: + * + * parsed message Foo { + * optional bool a = 1; + * repeated uint32 b = 2; + * optional Bar c = 3; + * } + * + * The in-memory layout for this message on a 32-bit machine will be something + * like: + * + * Foo + * +------------------------+ + * | set_flags a:1, b:1, c:1| + * +------------------------+ + * | bool a (1 byte) | + * +------------------------+ + * | padding (3 bytes) | + * +------------------------+ upb_array + * | upb_array* b (4 bytes) | ----> +----------------------------+ + * +------------------------+ | uint32* elements (4 bytes) | ---+ + * | Bar* c (4 bytes) | +----------------------------+ | + * +------------------------+ | uint32 size (4 bytes) | | + * +----------------------------+ | + * | + * -----------------------------------------------------------------+ + * | + * V + * uint32 array + * +----+----+----+----+----+----+ + * | e1 | e2 | e3 | e4 | e5 | e6 | + * +----+----+----+----+----+----+ + * + * And the corresponding C structure (as emitted by the proto compiler) would be: + * + * struct Foo { + * union { + * uint8_t bytes[1]; + * struct { + * bool a:1; + * bool b:1; + * bool c:1; + * } has; + * } set_flags; + * bool a; + * upb_uint32_array *b; + * Bar *c; + * } + * + * Because the C struct emitted by the upb compiler uses exactly the same + * byte-level format as the reflection interface, you can access the same hunk + * of memory either way. The C struct provides maximum performance and static + * type safety; upb_msg provides flexibility. + * + * The in-memory format has no interoperability guarantees whatsoever, except + * that a single version of upb will interoperate with itself. Don't even + * think about persisting the in-memory format or sending it anywhere. That's + * what serialized protobufs are for! The in-memory format is just that -- an + * in-memory representation that allows for fast access. + * + * The in-memory format is carefully designed to *not* mandate any particular + * memory management scheme. This should make it easier to integrate with + * existing memory management schemes, or to perform advanced techniques like + * reference counting, garbage collection, and string references. Different + * clients can read each others messages regardless of what memory management + * scheme each is using. + * + * A memory management scheme is provided for convenience, and it is used by + * default by the stock message parser. Clients can substitute their own + * memory management scheme into this parser without any loss of generality + * or performance. + */ + +#ifndef UPB_MSG_H_ +#define UPB_MSG_H_ + +#include +#include + +#include "upb.h" +#include "upb_table.h" +#include "upb_parse.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Forward declarations from descriptor.h. */ +struct google_protobuf_DescriptorProto; +struct google_protobuf_FieldDescriptorProto; + +/* Message definition. ********************************************************/ + +/* Structure that describes a single field in a message. This structure is very + * consciously designed to fit into 12/16 bytes (32/64 bit, respectively), + * because copies of this struct are in the hash table that is read in the + * critical path of parsing. Minimizing the size of this struct increases + * cache-friendliness. */ +struct upb_msg_field { + union upb_symbol_ref ref; + uint32_t byte_offset; /* Where to find the data. */ + uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ + upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ + upb_label_t label; +}; + +/* Structure that describes a single .proto message type. */ +struct upb_msg { + struct google_protobuf_DescriptorProto *descriptor; + size_t size; + uint32_t num_fields; + uint32_t set_flags_bytes; + uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */ + struct upb_inttable fields_by_num; + struct upb_strtable fields_by_name; + struct upb_msg_field *fields; + struct google_protobuf_FieldDescriptorProto **field_descriptors; +}; + +/* The num->field and name->field maps in upb_msg allow fast lookup of fields + * by number or name. These lookups are in the critical path of parsing and + * field lookup, so they must be as fast as possible. To make these more + * cache-friendly, we put the data in the table by value. */ + +struct upb_fieldsbynum_entry { + struct upb_inttable_entry e; + struct upb_msg_field f; +}; + +struct upb_fieldsbyname_entry { + struct upb_strtable_entry e; + struct upb_msg_field f; +}; + +/* Can be used to retrieve a field descriptor given the upb_msg_field ref. */ +INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( + struct upb_msg_field *f, struct upb_msg *m) { + return m->field_descriptors[f->field_index]; +} + +/* Initializes/frees a upb_msg. Caller retains ownership of d, but the msg + * will contain references to it, so it must outlive the msg. Note that init + * does not resolve upb_msg_field.ref -- the caller should do that + * post-initialization by calling upb_msg_ref() below. */ +bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d); +void upb_msg_free(struct upb_msg *m); + +/* Clients use this function on a previously initialized upb_msg to resolve the + * "ref" field in the upb_msg_field. Since messages can refer to each other in + * mutually-recursive ways, this step must be separated from initialization. */ +void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, union upb_symbol_ref ref); + +/* Looks up a field by name or number. While these are written to be as fast + * as possible, it will still be faster to cache the results of this lookup if + * possible. These return NULL if no such field is found. */ +INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m, + uint32_t number) { + struct upb_fieldsbynum_entry *e = + (struct upb_fieldsbynum_entry*)upb_inttable_lookup( + &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry)); + return e ? &e->f : NULL; +} +INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m, + struct upb_string *name) { + struct upb_fieldsbyname_entry *e = + (struct upb_fieldsbyname_entry*)upb_strtable_lookup( + &m->fields_by_name, name); + return e ? &e->f : NULL; +} + +/* "Set" flag reading and writing. *******************************************/ + +INLINE size_t upb_isset_offset(uint32_t field_index) { + return field_index / 8; +} + +INLINE uint8_t upb_isset_mask(uint32_t field_index) { + return 1 << (field_index % 8); +} + +/* Functions for reading and writing the "set" flags in the msg. Note that + * these do not perform memory management associated with any dynamic memory + * these fields may be referencing. These *only* set and test the flags. */ +INLINE void upb_msg_set(void *s, struct upb_msg_field *f) +{ + ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index); +} + +INLINE void upb_msg_unset(void *s, struct upb_msg_field *f) +{ + ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index); +} + +INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f) +{ + return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index); +} + +INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m) +{ + int num_fields = m->num_required_fields; + int i = 0; + while(num_fields > 8) { + if(((uint8_t*)s)[i++] != 0xFF) return false; + num_fields -= 8; + } + if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false; + return true; +} + +INLINE void upb_msg_clear(void *s, struct upb_msg *m) +{ + memset(s, 0, m->set_flags_bytes); +} + +/* Scalar (non-array) data access. ********************************************/ + +/* Returns a pointer to a specific field in a message. */ +INLINE union upb_value_ptr upb_msg_getptr(void *data, struct upb_msg_field *f) { + union upb_value_ptr p; + p._void = ((char*)data + f->byte_offset); + return p; +} + +/* Arrays. ********************************************************************/ + +/* Represents an array (a repeated field) of any type. The interpretation of + * the data in the array depends on the type. */ +struct upb_array { + union upb_value_ptr elements; + uint32_t len; /* Measured in elements. */ +}; + +/* Returns a pointer to an array element. */ +INLINE union upb_value_ptr upb_array_getelementptr( + struct upb_array *arr, uint32_t n, upb_field_type_t type) +{ + union upb_value_ptr ptr; + ptr._void = (void*)((char*)arr->elements._void + n*upb_type_info[type].size); + return ptr; +} + +/* These are all overlays on upb_array, pointers between them can be cast. */ +#define UPB_DEFINE_ARRAY_TYPE(name, type) \ + struct name ## _array { \ + type *elements; \ + uint32_t len; \ + }; + +UPB_DEFINE_ARRAY_TYPE(upb_double, double) +UPB_DEFINE_ARRAY_TYPE(upb_float, float) +UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t) +UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t) +UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t) +UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t) +UPB_DEFINE_ARRAY_TYPE(upb_bool, bool) +UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*) + +/* Defines an array of a specific message type. */ +#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array +#define UPB_DEFINE_MSG_ARRAY(msg_type) \ + UPB_MSG_ARRAY(msg_type) { \ + msg_type **elements; \ + uint32_t len; \ + }; + +/* Memory management *********************************************************/ + +/* One important note about these memory management routines: they must be used + * completely or not at all (for each message). In other words, you can't + * allocate your own message and then free it with upb_msgdata_free. As + * another example, you can't point a field to your own string and then call + * upb_msg_reuse_str. */ + +/* Allocates and frees message data, respectively. Newly allocated data is + * initialized to empty. Freeing a message always frees string data, but + * the client can decide whether or not submessages should be deleted. */ +void *upb_msgdata_new(struct upb_msg *m); +void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs); + +/* Given a pointer to the appropriate field of the message or array, these + * functions will lazily allocate memory for a string, array, or submessage. + * If the previously allocated memory is big enough, it will reuse it without + * re-allocating. See upb_msg.c for example usage. */ + +/* Reuse a string of at least the given size. */ +void upb_msg_reuse_str(struct upb_string **str, uint32_t size); +/* Like the previous, but assumes that the string will be by reference, so + * doesn't allocate memory for the string itself. */ +void upb_msg_reuse_strref(struct upb_string **str); + +/* Reuse an array of at least the given size, with the given type. */ +void upb_msg_reuse_array(struct upb_array **arr, uint32_t size, + upb_field_type_t t); + +/* Reuse a submessage of the given type. */ +void upb_msg_reuse_submsg(void **msg, struct upb_msg *m); + +/* Serialization/Deserialization. ********************************************/ + +/* This is all just a layer on top of the stream-oriented facility in + * upb_parse.h. */ + +struct upb_msg_parse_state { + struct upb_parse_state s; + bool merge; + bool byref; + struct upb_msg *m; +}; + +/* Initializes/frees a message parser. The parser will write the data to the + * message data "data", which the caller must have previously allocated (the + * parser will allocate submsgs, strings, and arrays as needed, however). + * + * "Merge" controls whether the parser will append to data instead of + * overwriting. Merging concatenates arrays and merges submessages instead + * of clearing both. + * + * "Byref" controls whether the new message data copies or references strings + * it encounters. If byref == true, then all strings supplied to upb_msg_parse + * must remain unchanged and must outlive data. */ +void upb_msg_parse_init(struct upb_msg_parse_state *s, void *data, + struct upb_msg *m, bool merge, bool byref); +void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *data, + struct upb_msg *m, bool merge, bool byref); +void upb_msg_parse_free(struct upb_msg_parse_state *s); + +/* Parses a protobuf fragment, writing the data to the message that was passed + * to upb_msg_parse_init. This function can be called multiple times as more + * data becomes available. */ +upb_status_t upb_msg_parse(struct upb_msg_parse_state *s, + void *data, size_t len, size_t *read); + +/* Parses the protobuf in s (which is expected to be complete) and allocates + * new message data to hold it. This is an alternative to the streaming API + * above. "byref" works as in upb_msg_parse_init(). */ +void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref); + + +/* Text dump *****************************************************************/ + +void upb_msg_print(void *data, struct upb_msg *m, FILE *stream); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_MSG_H_ */ diff --git a/src/upb_parse.c b/src/upb_parse.c new file mode 100644 index 0000000..3a011a6 --- /dev/null +++ b/src/upb_parse.c @@ -0,0 +1,383 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_parse.h" + +#include +#include +#include +#include +#include "descriptor.h" + +/* Lowest-level functions -- these read integers from the input buffer. */ + +static void *check_end(uint8_t *buf, void *end, size_t maxlen, + upb_status_t *bound_error) +{ + void *maxend = buf + maxlen; + if(end < maxend) { + *bound_error = UPB_STATUS_NEED_MORE_DATA; + return end; + } else { + *bound_error = UPB_ERROR_UNTERMINATED_VARINT; + return maxend; + } +} + +static upb_status_t get_v_uint64_t(void *restrict *buf, void *end, + uint64_t *restrict val) +{ + uint8_t *b = *buf; + upb_status_t bound_error; + end = check_end(b, end, 10, &bound_error); /* 2**64 is a 10-byte varint. */ + uint8_t last = 0x80; + *val = 0; + for(int bitpos = 0; b < (uint8_t*)end && (last & 0x80); b++, bitpos += 7) + *val |= ((uint64_t)((last = *b) & 0x7F)) << bitpos; + + if(last & 0x80) return bound_error; + *buf = b; + return UPB_STATUS_OK; +} + +static upb_status_t skip_v_uint64_t(void **buf, void *end) +{ + uint8_t *b = *buf; + upb_status_t bound_error; + end = check_end(b, end, 10, &bound_error); /* 2**64 is a 10-byte varint. */ + uint8_t last = 0x80; + for(; b < (uint8_t*)end && (last & 0x80); b++) + last = *b; + + if(last & 0x80) return bound_error; + *buf = b; + return UPB_STATUS_OK; +} + +static upb_status_t get_v_uint32_t(void *restrict *buf, void *end, + uint32_t *restrict val) +{ + uint8_t *b = *buf, *dend; + upb_status_t bound_error; + dend = check_end(b, end, 5, &bound_error); /* 2**32 is a 5-byte varint. */ + end = check_end(b, end, 10, &bound_error); /* May have to discard bytes. */ + uint8_t last = 0x80; + *val = 0; + for(int bitpos = 0; b < dend && (last & 0x80); b++, bitpos += 7) + *val |= ((uint32_t)((last = *b) & 0x7F)) << bitpos; + + /* Discard high bytes until varint ends. */ + for(; b < (uint8_t*)end && (last & 0x80); b++) + last = *b; + + if(last & 0x80) + return bound_error; + *buf = b; + return UPB_STATUS_OK; +} + +static upb_status_t get_f_uint32_t(void *restrict *buf, void *end, + uint32_t *restrict val) +{ + uint8_t *b = *buf; + void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t); + if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; +#if UPB_UNALIGNED_READS_OK + *val = *(uint32_t*)b; +#else +#define SHL(val, bits) ((uint32_t)val << bits) + *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24); +#undef SHL +#endif + *buf = uint32_end; + return UPB_STATUS_OK; +} + +static upb_status_t get_f_uint64_t(void *restrict *buf, void *end, + uint64_t *restrict val) +{ + void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t); + if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; +#if UPB_UNALIGNED_READS_OK + *val = *(uint64_t*)*buf; + *buf = uint64_end; +#else + uint32_t lo32, hi32; + get_f_uint32_t(buf, &lo32, end); + get_f_uint32_t(buf, &hi32, end); + *val = lo32 | ((uint64_t)hi32 << 32); +#endif + return UPB_STATUS_OK; +} + +static upb_status_t skip_f_uint32_t(void **buf, void *end) +{ + void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t); + if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; + *buf = uint32_end; + return UPB_STATUS_OK; +} + +static upb_status_t skip_f_uint64_t(void **buf, void *end) +{ + void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t); + if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; + *buf = uint64_end; + return UPB_STATUS_OK; +} + +static int32_t zz_decode_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } + +/* Functions for reading wire values and converting them to values. These + * are generated with macros because they follow a higly consistent pattern. */ + +#define WVTOV(type, wire_t, val_t) \ + static void wvtov_ ## type(wire_t s, val_t *d) + +#define GET(type, v_or_f, wire_t, val_t, member_name) \ + static upb_status_t get_ ## type(void **buf, void *end, val_t *d) { \ + wire_t tmp; \ + UPB_CHECK(get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp)); \ + wvtov_ ## type(tmp, d); \ + return UPB_STATUS_OK; \ + } + +#define T(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t); /* prototype for GET below */ \ + GET(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t) + +T(DOUBLE, f, uint64_t, double, _double) { memcpy(d, &s, sizeof(double)); } +T(FLOAT, f, uint32_t, float, _float) { memcpy(d, &s, sizeof(float)); } +T(INT32, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } +T(INT64, v, uint64_t, int64_t, int64) { *d = (int64_t)s; } +T(UINT32, v, uint32_t, uint32_t, uint32) { *d = s; } +T(UINT64, v, uint64_t, uint64_t, uint64) { *d = s; } +T(SINT32, v, uint32_t, int32_t, int32) { *d = zz_decode_32(s); } +T(SINT64, v, uint64_t, int64_t, int64) { *d = zz_decode_64(s); } +T(FIXED32, f, uint32_t, uint32_t, uint32) { *d = s; } +T(FIXED64, f, uint64_t, uint64_t, uint64) { *d = s; } +T(SFIXED32, f, uint32_t, int32_t, int32) { *d = (int32_t)s; } +T(SFIXED64, f, uint64_t, int64_t, int64) { *d = (int64_t)s; } +T(BOOL, v, uint32_t, bool, _bool) { *d = (bool)s; } +T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } +#undef WVTOV +#undef GET +#undef T + +#define alignof(t) offsetof(struct { char c; t x; }, x) + +/* May want to move this to upb.c if enough other things warrant it. */ +struct upb_type_info upb_type_info[] = { + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = {alignof(double), sizeof(double), UPB_WIRE_TYPE_64BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = {alignof(float), sizeof(float), UPB_WIRE_TYPE_32BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_START_GROUP}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32]= {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_32BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED}, +}; + +static upb_status_t parse_tag(void **buf, void *end, struct upb_tag *tag) +{ + uint32_t tag_int; + UPB_CHECK(get_v_uint32_t(buf, end, &tag_int)); + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return UPB_STATUS_OK; +} + +upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt, + union upb_wire_value *wv) +{ + switch(wt) { + case UPB_WIRE_TYPE_VARINT: UPB_CHECK(get_v_uint64_t(buf, end, &wv->varint)); break; + case UPB_WIRE_TYPE_64BIT: UPB_CHECK(get_f_uint64_t(buf, end, &wv->_64bit)); break; + case UPB_WIRE_TYPE_32BIT: UPB_CHECK(get_f_uint32_t(buf, end, &wv->_32bit)); break; + default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */ + } + return UPB_STATUS_OK; +} + +static upb_status_t skip_wire_value(void **buf, void *end, upb_wire_type_t wt) +{ + switch(wt) { + case UPB_WIRE_TYPE_VARINT: UPB_CHECK(skip_v_uint64_t(buf, end)); break; + case UPB_WIRE_TYPE_64BIT: UPB_CHECK(skip_f_uint64_t(buf, end)); break; + case UPB_WIRE_TYPE_32BIT: UPB_CHECK(skip_f_uint32_t(buf, end)); break; + case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */ + case UPB_WIRE_TYPE_END_GROUP: break; + default: return UPB_ERROR_ILLEGAL; + } + return UPB_STATUS_OK; +} + +upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft, + union upb_value_ptr v) +{ +#define CASE(t, member_name) \ + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ + return get_ ## t(buf, end, v.member_name); + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + default: return UPB_ERROR_ILLEGAL; + } +#undef CASE +} + +void upb_parse_reset(struct upb_parse_state *state) +{ + state->offset = 0; + state->top = state->stack; + /* The top-level message is not delimited (we can keep receiving data for + * it indefinitely). */ + state->top->end_offset = SIZE_MAX; +} + +void upb_parse_init(struct upb_parse_state *state, size_t udata_size) +{ + memset(state, 0, sizeof(struct upb_parse_state)); /* Clear all callbacks. */ + size_t stack_bytes = (sizeof(*state->stack) + udata_size) * UPB_MAX_NESTING; + state->stack = malloc(stack_bytes); + state->limit = (struct upb_parse_stack_frame*)((char*)state->stack + stack_bytes); + state->udata_size = udata_size; + upb_parse_reset(state); +} + +void upb_parse_free(struct upb_parse_state *state) +{ + free(state->stack); +} + +static void pop_stack_frame(struct upb_parse_state *s) +{ + if(s->submsg_end_cb) s->submsg_end_cb(s); + s->top--; + s->top = (struct upb_parse_stack_frame*)((char*)s->top - s->udata_size); +} + +static upb_status_t push_stack_frame(struct upb_parse_state *s, size_t end, + void *user_field_desc) +{ + s->top++; + s->top = (struct upb_parse_stack_frame*)((char*)s->top + s->udata_size); + if(s->top > s->limit) return UPB_ERROR_STACK_OVERFLOW; + s->top->end_offset = end; + if(s->submsg_start_cb) s->submsg_start_cb(s, user_field_desc); + return UPB_STATUS_OK; +} + +static upb_status_t parse_delimited(struct upb_parse_state *s, + struct upb_tag *tag, + void **buf, void *end, + size_t base_offset) +{ + int32_t delim_len; + void *user_field_desc; + void *bufstart = *buf; + + /* Whether we are parsing or skipping the field, we always need to parse + * the length. */ + UPB_CHECK(get_INT32(buf, end, &delim_len)); + upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc); + if(*buf < bufstart) return UPB_ERROR_OVERFLOW; + if(*buf > end && ft != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { + /* Streaming submessages is ok, but for other delimited types (string, + * bytes, and packed arrays) we require that all the delimited data is + * available. This could be relaxed if desired. */ + return UPB_STATUS_NEED_MORE_DATA; + } + + if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { + base_offset += ((char*)*buf - (char*)bufstart); + UPB_CHECK(push_stack_frame(s, base_offset + delim_len, user_field_desc)); + } else { + void *delim_end = (char*)*buf + delim_len; + if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || + ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) { + struct upb_string str = {.ptr = *buf, .byte_len = delim_len}; + s->str_cb(s, &str, user_field_desc); + *buf = delim_end; + } else { + /* Packed Array. */ + while(*buf < delim_end) + UPB_CHECK(s->value_cb(s, buf, end, user_field_desc)); + } + } + return UPB_STATUS_OK; +} + +static upb_status_t parse_nondelimited(struct upb_parse_state *s, + struct upb_tag *tag, + void **buf, void *end) +{ + /* Simple value or begin group. */ + void *user_field_desc; + upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc); + if(ft == 0) { + UPB_CHECK(skip_wire_value(buf, end, tag->wire_type)); + } else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) { + /* No length specified, an "end group" tag will mark the end. */ + UPB_CHECK(push_stack_frame(s, UINT32_MAX, user_field_desc)); + } else { + UPB_CHECK(s->value_cb(s, buf, end, user_field_desc)); + } + return UPB_STATUS_OK; +} + +upb_status_t upb_parse(struct upb_parse_state *restrict s, void *buf, size_t len, + size_t *restrict read) +{ + void *end = (char*)buf + len; + *read = 0; + while(buf < end) { + struct upb_tag tag; + void *bufstart = buf; + UPB_CHECK(parse_tag(&buf, end, &tag)); + if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) { + if(s->top->end_offset != UINT32_MAX) + return UPB_ERROR_SPURIOUS_END_GROUP; + pop_stack_frame(s); + } else if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { + parse_delimited(s, &tag, &buf, end, s->offset + (char*)buf - (char*)bufstart); + } else { + parse_nondelimited(s, &tag, &buf, end); + } + size_t bytes_read = ((char*)buf - (char*)bufstart); + *read += bytes_read; + s->offset += bytes_read; + while(s->offset >= s->top->end_offset) { + if(s->offset != s->top->end_offset) return UPB_ERROR_BAD_SUBMESSAGE_END; + pop_stack_frame(s); + } + } + return UPB_STATUS_OK; +} diff --git a/src/upb_parse.h b/src/upb_parse.h new file mode 100644 index 0000000..d9db85c --- /dev/null +++ b/src/upb_parse.h @@ -0,0 +1,158 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * upb_parse implements a high performance, callback-based, stream-oriented + * parser (comparable to the SAX model in XML parsers). For parsing protobufs + * into in-memory messages (a more DOM-like model), see the routines in + * upb_msg.h, which are layered on top of this parser. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_PARSE_H_ +#define UPB_PARSE_H_ + +#include +#include +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Definitions. ***************************************************************/ + +/* A list of types as they are encoded on-the-wire. */ +enum upb_wire_type { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5 +}; +typedef uint8_t upb_wire_type_t; + +/* A value as it is encoded on-the-wire, except delimited, which is handled + * separately. */ +union upb_wire_value { + uint64_t varint; + uint64_t _64bit; + uint32_t _32bit; +}; + +/* A tag occurs before each value on-the-wire. */ +struct upb_tag { + upb_field_number_t field_number; + upb_wire_type_t wire_type; +}; + +/* High-level parsing interface. **********************************************/ + +/* The general scheme is that the client registers callbacks that will be + * called at the appropriate times. These callbacks provide the client with + * data and let the client make decisions (like whether to parse or to skip + * a value). + * + * After initializing the parse state, the client can repeatedly call upb_parse + * as data becomes available. The parser is fully streaming-capable, so the + * data need not all be available at the same time. */ + +struct upb_parse_state; + +/* Initialize and free (respectively) the given parse state, which must have + * been previously allocated. udata_size specifies how much space will be + * available at parse_stack_frame.user_data in each frame for user data. */ +void upb_parse_init(struct upb_parse_state *state, size_t udata_size); +void upb_parse_reset(struct upb_parse_state *state); +void upb_parse_free(struct upb_parse_state *state); + +/* The callback that is called immediately after a tag has been parsed. The + * client should determine whether it wants to parse or skip the corresponding + * value. If it wants to parse it, it must discover and return the correct + * .proto type (the tag only contains the wire type) and check that the wire + * type is appropriate for the .proto type. To skip the value (which means + * skipping all submessages, in the case of a submessage), the callback should + * return zero. */ +typedef upb_field_type_t (*upb_tag_cb)(struct upb_parse_state *s, + struct upb_tag *tag, + void **user_field_desc); + +/* The callback that is called when a regular value (ie. not a string or + * submessage) is encountered which the client has opted to parse (by not + * returning 0 from the tag_cb). The client must parse the value and update + * buf accordingly, returning success or failure. + * + * Note that this callback can be called several times in a row for a single + * call to tag_cb in the case of packed arrays. */ +typedef upb_status_t (*upb_value_cb)(struct upb_parse_state *s, + void **buf, void *end, + void *user_field_desc); + +/* The callback that is called when a string is parsed. */ +typedef upb_status_t (*upb_str_cb)(struct upb_parse_state *s, + struct upb_string *str, + void *user_field_desc); + +/* Callbacks that are called when a submessage begins and ends, respectively. + * Both are called with the submessage's stack frame at the top of the stack. */ +typedef void (*upb_submsg_start_cb)(struct upb_parse_state *s, + void *user_field_desc); +typedef void (*upb_submsg_end_cb)(struct upb_parse_state *s); + +/* Each stack frame (one for each level of submessages/groups) has this format, + * where user_data has as many bytes allocated as specified when initialized. */ +struct upb_parse_stack_frame { + size_t end_offset; /* 0 indicates that this is a group. */ + char user_data[]; +}; + +struct upb_parse_state { + size_t offset; + struct upb_parse_stack_frame *stack, *top, *limit; + size_t udata_size; /* How many bytes the user gets in each frame. */ + upb_tag_cb tag_cb; + upb_value_cb value_cb; + upb_str_cb str_cb; + upb_submsg_start_cb submsg_start_cb; + upb_submsg_end_cb submsg_end_cb; +}; + +/* Parses up to len bytes of protobuf data out of buf, calling cb as needed. + * The function returns how many bytes were consumed from buf. Data is parsed + * until no more data can be read from buf, or the callback sets *done=true, + * or an error occured. Sets *read to the number of bytes consumed. */ +upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len, + size_t *read); + +extern upb_wire_type_t upb_expected_wire_types[]; +/* Returns true if wt is the correct on-the-wire type for ft. */ +INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { + if(ft == 10) { // GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) + return wt == UPB_WIRE_TYPE_START_GROUP; + } else { + /* With packed arrays, anything can be delimited (except groups). */ + return wt == UPB_WIRE_TYPE_DELIMITED || + upb_type_info[ft].expected_wire_type == wt; + } +} + +/* Data-consuming functions (to be called from value cb). *********************/ + +/* Parses and converts a value from the character data starting at buf. The + * caller must have previously checked that the wire type is appropriate for + * this field type. */ +upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft, + union upb_value_ptr v); + +/* Parses a wire value with the given type (which must have been obtained from + * a tag that was just parsed) and adds the number of bytes that were consumed + * to *offset. */ +upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt, + union upb_wire_value *wv); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_PARSE_H_ */ diff --git a/src/upb_string.c b/src/upb_string.c new file mode 100644 index 0000000..733bafe --- /dev/null +++ b/src/upb_string.c @@ -0,0 +1,24 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_string.h" + +bool upb_strreadfile(const char *filename, struct upb_string *data) { + FILE *f = fopen(filename, "rb"); + if(!f) return false; + if(fseek(f, 0, SEEK_END) != 0) return false; + long size = ftell(f); + if(size < 0) return false; + if(fseek(f, 0, SEEK_SET) != 0) return false; + data->ptr = (char*)malloc(size); + data->byte_len = size; + if(fread(data->ptr, size, 1, f) != 1) { + free(data->ptr); + return false; + } + fclose(f); + return true; +} diff --git a/src/upb_string.h b/src/upb_string.h new file mode 100644 index 0000000..7a05811 --- /dev/null +++ b/src/upb_string.h @@ -0,0 +1,82 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + + * Defines a delimited (as opposed to null-terminated) string type and some + * library functions for manipulating them. + * + * There are two primary reasons upb uses delimited strings. One is that they + * can be more efficient for some operations because they do not have to scan + * the string to find its length. For example, streql can start by just + * comparing the lengths (very efficient) and scan the strings themselves only + * if the lengths are equal. + * + * More importantly, using delimited strings makes it possible for strings to + * reference substrings of other strings. For example, if I am parsing a + * protobuf I can create a string that references the original protobuf's + * string data. With NULL-termination I would be forced to write a NULL + * into the middle of the protobuf's data, which is less than ideal and in + * some cases not practical or possible. + */ + +#ifndef UPB_STRING_H_ +#define UPB_STRING_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include "upb.h" + +struct upb_string { + /* We expect the data to be 8-bit clean (uint8_t), but char* is such an + * ingrained convention that we follow it. */ + char *ptr; + uint32_t byte_len; +}; + +INLINE bool upb_streql(struct upb_string *s1, struct upb_string *s2) { + return s1->byte_len == s2->byte_len && + memcmp(s1->ptr, s2->ptr, s1->byte_len) == 0; +} + +INLINE void upb_strcpy(struct upb_string *dest, struct upb_string *src) { + memcpy(dest->ptr, src->ptr, dest->byte_len); + dest->byte_len = src->byte_len; +} + +INLINE struct upb_string upb_strdup(struct upb_string s) { + struct upb_string copy; + copy.ptr = (char*)malloc(s.byte_len); + copy.byte_len = s.byte_len; + memcpy(copy.ptr, s.ptr, s.byte_len); + return copy; +} + +INLINE void upb_strfree(struct upb_string s) { + free(s.ptr); +} + +/* Reads an entire file into a newly-allocated string. */ +bool upb_strreadfile(const char *filename, struct upb_string *data); + +/* Allows defining upb_strings as literals, ie: + * struct upb_string str = UPB_STRLIT("Hello, World!\n"); + * Doesn't work with C++ due to lack of struct initializer syntax. + */ +#define UPB_STRLIT(strlit) {.ptr=strlit, .byte_len=sizeof(strlit)-1} + +/* Allows using upb_strings in printf, ie: + * struct upb_string str = UPB_STRLIT("Hello, World!\n"); + * printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ +#define UPB_STRARG(str) (str).byte_len, (str).ptr +#define UPB_STRFMT "%.*s" + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_H_ */ diff --git a/src/upb_table.c b/src/upb_table.c new file mode 100644 index 0000000..3bbc7f7 --- /dev/null +++ b/src/upb_table.c @@ -0,0 +1,398 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_table.h" + +#include +#include +#include + +static const upb_inttable_key_t EMPTYENT = 0; +static const double MAX_LOAD = 0.85; + +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); + +/* We use 1-based indexes into the table so that 0 can be "NULL". */ +static struct upb_inttable_entry *intent(struct upb_inttable *t, int32_t i) { + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); +} +static struct upb_strtable_entry *strent(struct upb_strtable *t, int32_t i) { + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); +} + +void upb_table_init(struct upb_table *t, uint32_t size, uint16_t entry_size) +{ + t->count = 0; + t->entry_size = entry_size; + t->size_lg2 = 1; + while(size >>= 1) t->size_lg2++; + size_t bytes = upb_table_size(t) * t->entry_size; + t->mask = upb_table_size(t) - 1; + t->entries = malloc(bytes); + memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */ +} + +void upb_inttable_init(struct upb_inttable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); +} + +void upb_strtable_init(struct upb_strtable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); +} + +void upb_table_free(struct upb_table *t) { free(t->entries); } +void upb_inttable_free(struct upb_inttable *t) { upb_table_free(&t->t); } +void upb_strtable_free(struct upb_strtable *t) { upb_table_free(&t->t); } + +static uint32_t strtable_bucket(struct upb_strtable *t, struct upb_string *key) +{ + uint32_t hash = MurmurHash2(key->ptr, key->byte_len, 0); + return (hash & (upb_strtable_size(t)-1)) + 1; +} + +void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key) +{ + uint32_t bucket = strtable_bucket(t, key); + struct upb_strtable_entry *e; + do { + e = strent(t, bucket); + if(upb_streql(&e->key, key)) return e; + } while((bucket = e->next) != UPB_END_OF_CHAIN); + return NULL; +} + +static uint32_t empty_intbucket(struct upb_inttable *table) +{ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 1; i <= upb_inttable_size(table); i++) { + struct upb_inttable_entry *e = intent(table, i); + if(e->key == EMPTYENT) return i; + } + assert(false); + return 0; +} + +/* The insert routines have a lot more code duplication between int/string + * variants than I would like, but there's just a bit too much that varies to + * parameterize them. */ +static void intinsert(struct upb_inttable *t, struct upb_inttable_entry *e) +{ + assert(upb_inttable_lookup(t, e->key, t->t.entry_size) == NULL); + uint32_t bucket = upb_inttable_bucket(t, e->key); + struct upb_inttable_entry *table_e = intent(t, bucket); + if(table_e->key != EMPTYENT) { /* Collision. */ + if(bucket == upb_inttable_bucket(t, table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + uint32_t empty_bucket = empty_intbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = intent(t, table_e->next); + table_e->next = empty_bucket; + table_e = intent(t, empty_bucket); + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + uint32_t empty_bucket = empty_intbucket(t); + uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key); + memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + struct upb_inttable_entry *evictee_e = intent(t, evictee_bucket); + while(1) { + assert(evictee_e->key != UPB_EMPTY_ENTRY); + assert(evictee_e->next != UPB_END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; + break; + } + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; + assert(upb_inttable_lookup(t, e->key, t->t.entry_size) == table_e); +} + +void upb_inttable_insert(struct upb_inttable *t, struct upb_inttable_entry *e) +{ + assert(e->key != 0); + if((double)++t->t.count / upb_inttable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + struct upb_inttable new_table; + upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size); + struct upb_inttable_entry *old_e; + for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e)) + intinsert(&new_table, old_e); + upb_inttable_free(t); + *t = new_table; + } + intinsert(t, e); +} + +static uint32_t empty_strbucket(struct upb_strtable *table) +{ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { + struct upb_strtable_entry *e = strent(table, i); + if(e->key.byte_len == 0) return i; + } + assert(false); + return 0; +} + +static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e) +{ + assert(upb_strtable_lookup(t, &e->key) == NULL); + uint32_t bucket = strtable_bucket(t, &e->key); + struct upb_strtable_entry *table_e = strent(t, bucket); + if(table_e->key.byte_len != 0) { /* Collision. */ + if(bucket == strtable_bucket(t, &table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + uint32_t empty_bucket = empty_strbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = strent(t, table_e->next); + table_e->next = empty_bucket; + table_e = strent(t, empty_bucket); + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + uint32_t empty_bucket = empty_strbucket(t); + uint32_t evictee_bucket = strtable_bucket(t, &table_e->key); + memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + struct upb_strtable_entry *evictee_e = strent(t, evictee_bucket); + while(1) { + assert(evictee_e->key.byte_len != 0); + assert(evictee_e->next != UPB_END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; + break; + } + evictee_e = strent(t, evictee_e->next); + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; + assert(upb_strtable_lookup(t, &e->key) == table_e); +} + +void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e) +{ + if((double)++t->t.count / upb_strtable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + struct upb_strtable new_table; + upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); + struct upb_strtable_entry *old_e; + for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e)) + strinsert(&new_table, old_e); + upb_strtable_free(t); + *t = new_table; + } + strinsert(t, e); +} + +void *upb_inttable_begin(struct upb_inttable *t) { + return upb_inttable_next(t, intent(t, 0)); +} + +void *upb_inttable_next(struct upb_inttable *t, struct upb_inttable_entry *cur) { + struct upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1); + do { + cur = (void*)((char*)cur + t->t.entry_size); + if(cur == end) return NULL; + } while(cur->key == UPB_EMPTY_ENTRY); + return cur; +} + +void *upb_strtable_begin(struct upb_strtable *t) { + return upb_strtable_next(t, strent(t, 0)); +} + +void *upb_strtable_next(struct upb_strtable *t, struct upb_strtable_entry *cur) { + struct upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1); + do { + cur = (void*)((char*)cur + t->t.entry_size); + if(cur == end) return NULL; + } while(cur->key.byte_len == 0); + return cur; +} + +#ifdef UPB_UNALIGNED_READS_OK +//----------------------------------------------------------------------------- +// MurmurHash2, by Austin Appleby (released as public domain). +// Reformatted and C99-ified by Joshua Haberman. +// Note - This code makes a few assumptions about how your machine behaves - +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t +// And it has a few limitations - +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + + // Initialize the hash to a 'random' value + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + const uint8_t * data = (const uint8_t *)key; + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +#else // !UPB_UNALIGNED_READS_OK + +//----------------------------------------------------------------------------- +// MurmurHashAligned2, by Austin Appleby +// Same algorithm as MurmurHash2, but only does aligned reads - should be safer +// on certain platforms. +// Performance will be lower than MurmurHash2 + +#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + +static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) +{ + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + const uint8_t * data = (const uint8_t *)key; + uint32_t h = seed ^ len; + uint8_t align = (uintptr_t)data & 3; + + if(align && (len >= 4)) { + // Pre-load the temp registers + uint32_t t = 0, d = 0; + + switch(align) { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + int32_t sl = 8 * (4-align); + int32_t sr = 8 * align; + + // Mix + + while(len >= 4) { + d = *(uint32_t *)data; + t = (t >> sr) | (d << sl); + + uint32_t k = t; + + MIX(h,k,m); + + t = d; + + data += 4; + len -= 4; + } + + // Handle leftover data in temp registers + + d = 0; + + if(len >= align) { + switch(align) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + } + + uint32_t k = (t >> sr) | (d << sl); + MIX(h,k,m); + + data += align; + len -= align; + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + } else { + switch(len) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h ^= (t >> sr) | (d << sl); h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } else { + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + MIX(h,k,m); + + data += 4; + len -= 4; + } + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} +#undef MIX + +#endif // UPB_UNALIGNED_READS_OK diff --git a/src/upb_table.h b/src/upb_table.h new file mode 100644 index 0000000..094ed48 --- /dev/null +++ b/src/upb_table.h @@ -0,0 +1,123 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file defines very fast int->struct (inttable) and string->struct + * (strtable) hash tables. The struct can be of any size, and it is stored + * in the table itself, for cache-friendly performance. + * + * The table uses internal chaining with Brent's variation (inspired by the + * Lua implementation of hash tables). The hash function for strings is + * Austin Appleby's "MurmurHash." + */ + +#ifndef UPB_TABLE_H_ +#define UPB_TABLE_H_ + +#include +#include "upb.h" +#include "upb_string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Note: the key cannot be zero! Zero is used by the implementation. */ +typedef uint32_t upb_inttable_key_t; + +#define UPB_END_OF_CHAIN (uint32_t)0 +#define UPB_EMPTY_ENTRY (uint32_t)0 + +struct upb_inttable_entry { + upb_inttable_key_t key; + uint32_t next; /* Internal chaining. */ +}; + +/* TODO: consider storing the hash in the entry. This would avoid the need to + * rehash on table resizes, but more importantly could possibly improve lookup + * performance by letting us compare hashes before comparing lengths or the + * strings themselves. */ +struct upb_strtable_entry { + struct upb_string key; + uint32_t next; /* Internal chaining. */ +}; + +struct upb_table { + void *entries; + uint32_t count; /* How many elements are currently in the table? */ + uint16_t entry_size; /* How big is each entry? */ + uint8_t size_lg2; /* The table is 2^size_lg2 in size. */ + uint32_t mask; +}; + +struct upb_strtable { + struct upb_table t; +}; + +struct upb_inttable { + struct upb_table t; +}; + +/* Initialize and free a table, respectively. Specify the initial size + * with 'size' (the size will be increased as necessary). Entry size + * specifies how many bytes each entry in the table is. */ +void upb_inttable_init(struct upb_inttable *table, + uint32_t size, uint16_t entry_size); +void upb_inttable_free(struct upb_inttable *table); +void upb_strtable_init(struct upb_strtable *table, + uint32_t size, uint16_t entry_size); +void upb_strtable_free(struct upb_strtable *table); + +INLINE uint32_t upb_table_size(struct upb_table *t) { return 1 << t->size_lg2; } +INLINE uint32_t upb_inttable_size(struct upb_inttable *t) { + return upb_table_size(&t->t); +} +INLINE uint32_t upb_strtable_size(struct upb_strtable *t) { + return upb_table_size(&t->t); +} + +/* Inserts the given key into the hashtable with the given value. The key must + * not already exist in the hash table. The data will be copied from e into + * the hashtable (the amount of data copied comes from entry_size when the + * table was constructed). Therefore the data at val may be freed once the + * call returns. */ +void upb_inttable_insert(struct upb_inttable *t, struct upb_inttable_entry *e); +void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e); + +INLINE uint32_t upb_inttable_bucket(struct upb_inttable *t, upb_inttable_key_t k) { + return (k & t->t.mask) + 1; /* Identity hash for ints. */ +} + +/* Looks up key in this table. Inlined because this is in the critical path + * of parsing. We have the caller specify the entry_size because fixing + * this as a literal (instead of reading table->entry_size) gives the + * compiler more ability to optimize. */ +INLINE void *upb_inttable_lookup(struct upb_inttable *t, + uint32_t key, uint32_t entry_size) { + assert(key != 0); + uint32_t bucket = upb_inttable_bucket(t, key); + struct upb_inttable_entry *e; + do { + e = (struct upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket-1, entry_size); + if(e->key == key) return e; + } while((bucket = e->next) != UPB_END_OF_CHAIN); + return NULL; /* Not found. */ +} + +void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key); + +/* Provides iteration over the table. The order in which the entries are + * returned is undefined. Insertions invalidate iterators. The _next + * functions return NULL when the end has been reached. */ +void *upb_inttable_begin(struct upb_inttable *t); +void *upb_inttable_next(struct upb_inttable *t, struct upb_inttable_entry *cur); + +void *upb_strtable_begin(struct upb_strtable *t); +void *upb_strtable_next(struct upb_strtable *t, struct upb_strtable_entry *cur); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_TABLE_H_ */ -- cgit v1.2.3