From 28ec9a1fa0f9b1d741920dfa8afc91fa2532c43d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:20:33 -0700 Subject: Split src/ into core/ and stream/. --- core/upb_table.c | 411 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 411 insertions(+) create mode 100644 core/upb_table.c (limited to 'core/upb_table.c') diff --git a/core/upb_table.c b/core/upb_table.c new file mode 100644 index 0000000..b91776c --- /dev/null +++ b/core/upb_table.c @@ -0,0 +1,411 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_table.h" +#include "upb_string.h" + +#include +#include +#include + +static const upb_inttable_key_t EMPTYENT = 0; +static const double MAX_LOAD = 0.85; + +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); + +/* We use 1-based indexes into the table so that 0 can be "NULL". */ +static upb_inttable_entry *intent(upb_inttable *t, int32_t i) { + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); +} +static upb_strtable_entry *strent(upb_strtable *t, int32_t i) { + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); +} + +void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) +{ + t->count = 0; + t->entry_size = entry_size; + t->size_lg2 = 1; + while(size >>= 1) t->size_lg2++; + size_t bytes = upb_table_size(t) * t->entry_size; + t->mask = upb_table_size(t) - 1; + t->entries = malloc(bytes); + memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */ +} + +void upb_inttable_init(upb_inttable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); +} + +void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); +} + +void upb_table_free(upb_table *t) { free(t->entries); } +void upb_inttable_free(upb_inttable *t) { upb_table_free(&t->t); } +void upb_strtable_free(upb_strtable *t) { + // Free refs from the strtable. + upb_strtable_entry *e = upb_strtable_begin(t); + for(; e; e = upb_strtable_next(t, e)) { + upb_string_unref(e->key); + } + upb_table_free(&t->t); +} + +static uint32_t strtable_bucket(upb_strtable *t, upb_string *key) +{ + uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0); + return (hash & (upb_strtable_size(t)-1)) + 1; +} + +void *upb_strtable_lookup(upb_strtable *t, upb_string *key) +{ + uint32_t bucket = strtable_bucket(t, key); + upb_strtable_entry *e; + do { + e = strent(t, bucket); + if(e->key && upb_streql(e->key, key)) return e; + } while((bucket = e->next) != UPB_END_OF_CHAIN); + return NULL; +} + +static uint32_t empty_intbucket(upb_inttable *table) +{ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 1; i <= upb_inttable_size(table); i++) { + upb_inttable_entry *e = intent(table, i); + if(e->key == EMPTYENT) return i; + } + assert(false); + return 0; +} + +/* The insert routines have a lot more code duplication between int/string + * variants than I would like, but there's just a bit too much that varies to + * parameterize them. */ +static void intinsert(upb_inttable *t, upb_inttable_entry *e) +{ + assert(upb_inttable_lookup(t, e->key) == NULL); + t->t.count++; + uint32_t bucket = upb_inttable_bucket(t, e->key); + upb_inttable_entry *table_e = intent(t, bucket); + if(table_e->key != EMPTYENT) { /* Collision. */ + if(bucket == upb_inttable_bucket(t, table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + uint32_t empty_bucket = empty_intbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = intent(t, table_e->next); + table_e->next = empty_bucket; + table_e = intent(t, empty_bucket); + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + uint32_t empty_bucket = empty_intbucket(t); + uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key); + memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + upb_inttable_entry *evictee_e = intent(t, evictee_bucket); + while(1) { + assert(evictee_e->key != UPB_EMPTY_ENTRY); + assert(evictee_e->next != UPB_END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; + break; + } + evictee_e = intent(t, evictee_e->next); + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; + assert(upb_inttable_lookup(t, e->key) == table_e); +} + +void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e) +{ + assert(e->key != 0); + if((double)(t->t.count + 1) / upb_inttable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + upb_inttable new_table; + upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size); + new_table.t.count = t->t.count; + upb_inttable_entry *old_e; + for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e)) + intinsert(&new_table, old_e); + upb_inttable_free(t); + *t = new_table; + } + intinsert(t, e); +} + +static uint32_t empty_strbucket(upb_strtable *table) +{ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { + upb_strtable_entry *e = strent(table, i); + if(!e->key) return i; + } + assert(false); + return 0; +} + +static void strinsert(upb_strtable *t, upb_strtable_entry *e) +{ + assert(upb_strtable_lookup(t, e->key) == NULL); + e->key = upb_string_getref(e->key); + t->t.count++; + uint32_t bucket = strtable_bucket(t, e->key); + upb_strtable_entry *table_e = strent(t, bucket); + if(table_e->key) { /* Collision. */ + if(bucket == strtable_bucket(t, table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + uint32_t empty_bucket = empty_strbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = strent(t, table_e->next); + table_e->next = empty_bucket; + table_e = strent(t, empty_bucket); + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + uint32_t empty_bucket = empty_strbucket(t); + uint32_t evictee_bucket = strtable_bucket(t, table_e->key); + memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + upb_strtable_entry *evictee_e = strent(t, evictee_bucket); + while(1) { + assert(!upb_string_isnull(evictee_e->key)); + assert(evictee_e->next != UPB_END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; + break; + } + evictee_e = strent(t, evictee_e->next); + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; + assert(upb_strtable_lookup(t, e->key) == table_e); +} + +void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e) +{ + if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + upb_strtable new_table; + upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); + upb_strtable_entry *old_e; + for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e)) + strinsert(&new_table, old_e); + upb_strtable_free(t); + *t = new_table; + } + strinsert(t, e); +} + +void *upb_inttable_begin(upb_inttable *t) { + return upb_inttable_next(t, intent(t, 0)); +} + +void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur) { + upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1); + do { + cur = (void*)((char*)cur + t->t.entry_size); + if(cur == end) return NULL; + } while(cur->key == UPB_EMPTY_ENTRY); + return cur; +} + +void *upb_strtable_begin(upb_strtable *t) { + return upb_strtable_next(t, strent(t, 0)); +} + +void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { + upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1); + do { + cur = (void*)((char*)cur + t->t.entry_size); + if(cur == end) return NULL; + } while(cur->key == NULL); + return cur; +} + +#ifdef UPB_UNALIGNED_READS_OK +//----------------------------------------------------------------------------- +// MurmurHash2, by Austin Appleby (released as public domain). +// Reformatted and C99-ified by Joshua Haberman. +// Note - This code makes a few assumptions about how your machine behaves - +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t +// And it has a few limitations - +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + + // Initialize the hash to a 'random' value + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + const uint8_t * data = (const uint8_t *)key; + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +#else // !UPB_UNALIGNED_READS_OK + +//----------------------------------------------------------------------------- +// MurmurHashAligned2, by Austin Appleby +// Same algorithm as MurmurHash2, but only does aligned reads - should be safer +// on certain platforms. +// Performance will be lower than MurmurHash2 + +#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + +static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) +{ + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + const uint8_t * data = (const uint8_t *)key; + uint32_t h = seed ^ len; + uint8_t align = (uintptr_t)data & 3; + + if(align && (len >= 4)) { + // Pre-load the temp registers + uint32_t t = 0, d = 0; + + switch(align) { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + int32_t sl = 8 * (4-align); + int32_t sr = 8 * align; + + // Mix + + while(len >= 4) { + d = *(uint32_t *)data; + t = (t >> sr) | (d << sl); + + uint32_t k = t; + + MIX(h,k,m); + + t = d; + + data += 4; + len -= 4; + } + + // Handle leftover data in temp registers + + d = 0; + + if(len >= align) { + switch(align) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + } + + uint32_t k = (t >> sr) | (d << sl); + MIX(h,k,m); + + data += align; + len -= align; + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + } else { + switch(len) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h ^= (t >> sr) | (d << sl); h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } else { + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + MIX(h,k,m); + + data += 4; + len -= 4; + } + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} +#undef MIX + +#endif // UPB_UNALIGNED_READS_OK -- cgit v1.2.3 From db6c7387bc1df49deac41155a173e33017a75ed8 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 18:11:24 -0700 Subject: Incremental progress towards getting upb_def to bootstrap. --- Makefile | 3 +- core/upb.c | 9 ++--- core/upb.h | 7 +++- core/upb_def.c | 102 ++++++++++++++++++++++--------------------------- core/upb_def.h | 62 +++++++++++++++++++++--------- core/upb_stream_vtbl.h | 1 + core/upb_table.c | 2 +- tests/test_string.c | 3 ++ 8 files changed, 105 insertions(+), 84 deletions(-) (limited to 'core/upb_table.c') diff --git a/Makefile b/Makefile index 568dcad..2b2a269 100644 --- a/Makefile +++ b/Makefile @@ -48,12 +48,13 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ descriptor/descriptor.c +$(SRC): perf-cppflags # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC diff --git a/core/upb.c b/core/upb.c index a98512d..9ed5617 100644 --- a/core/upb.c +++ b/core/upb.c @@ -44,12 +44,11 @@ void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...) { if(upb_ok(status)) { // The first error is the most interesting. - status->str = upb_string_new(); - char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN); status->code = code; + status->str = upb_string_tryrecycle(status->str); va_list args; va_start(args, msg); - vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args); + upb_string_vprintf(status->str, msg, args); va_end(args); } } @@ -57,10 +56,10 @@ void upb_seterr(upb_status *status, enum upb_status_code code, void upb_copyerr(upb_status *to, upb_status *from) { to->code = from->code; - to->str = upb_string_getref(from->str); + if(from->str) to->str = upb_string_getref(from->str); } -void upb_reset(upb_status *status) { +void upb_status_reset(upb_status *status) { status->code = UPB_STATUS_OK; upb_string_unref(status->str); status->str = NULL; diff --git a/core/upb.h b/core/upb.h index 230e638..630d9e1 100644 --- a/core/upb.h +++ b/core/upb.h @@ -195,7 +195,12 @@ INLINE bool upb_ok(upb_status *status) { return status->code == UPB_STATUS_OK; } -void upb_reset(upb_status *status); +INLINE void upb_status_init(upb_status *status) { + status->code = UPB_STATUS_OK; + status->str = NULL; +} + +void upb_status_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); void upb_copyerr(upb_status *to, upb_status *from); diff --git a/core/upb_def.c b/core/upb_def.c index cc4fd80..0f48559 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -155,8 +155,9 @@ static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, } else { open_defs[num_open_defs++] = m; } - for(int i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter iter = upb_msg_begin(m); + for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) { + upb_fielddef *f = upb_msg_iter_field(iter); upb_def *def = f->def; if(upb_issubmsg(f) && def->is_cyclic) { upb_msgdef *sub_m = upb_downcast_msgdef(def); @@ -230,16 +231,6 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ -typedef struct { - upb_strtable_entry e; - uint32_t value; -} ntoi_ent; - -typedef struct { - upb_inttable_entry e; - upb_string *string; -} iton_ent; - static void upb_enumdef_free(upb_enumdef *e) { upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); @@ -271,8 +262,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; } - ntoi_ent ntoi_ent = {{name, 0}, number}; - iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{name, 0}, number}; + upb_iton_ent iton_ent = {{number, 0}, name}; upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the @@ -291,11 +282,14 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + e->base.fqname = upb_string_tryrecycle(e->base.fqname); + CHECKSRC(upb_src_getstr(src, e->base.fqname)); case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: CHECK(upb_addenum_val(src, e, status)); break; @@ -304,37 +298,25 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) break; } } + assert(e->base.fqname); upb_deflist_push(defs, UPB_UPCAST(e)); return true; +src_err: + upb_copyerr(status, upb_src_status(src)); err: upb_enumdef_free(e); return false; } -static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) { - iter->state = ent; - iter->name = ent->e.key; - iter->val = ent->value; -} - -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) { +upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. - ntoi_ent *ent = upb_strtable_begin(&e->ntoi); - iter->e = e; - fill_iter(iter, ent); + return upb_inttable_begin(&e->iton); } -void upb_enum_next(upb_enum_iter *iter) { - ntoi_ent *ent = iter->state; - assert(ent); - ent = upb_strtable_next(&iter->e->ntoi, &ent->e); - iter->state = ent; - if(ent) fill_iter(iter, ent); -} - -bool upb_enum_done(upb_enum_iter *iter) { - return iter->state == NULL; +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { + assert(iter); + return upb_inttable_next(&e->iton, &iter->e); } @@ -346,7 +328,7 @@ static void upb_fielddef_free(upb_fielddef *f) { static void upb_fielddef_uninit(upb_fielddef *f) { upb_string_unref(f->name); - if(upb_hasdef(f) && f->owned) { + if(f->owned) { upb_def_unref(f->def); } } @@ -354,6 +336,8 @@ static void upb_fielddef_uninit(upb_fielddef *f) { static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) { upb_fielddef *f = malloc(sizeof(*f)); + f->number = -1; + f->name = NULL; f->def = NULL; f->owned = false; upb_fielddef *parsed_f; @@ -388,6 +372,7 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) } CHECKSRC(upb_src_eof(src)); // TODO: verify that all required fields were present. + assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. @@ -461,9 +446,9 @@ err: static void upb_msgdef_free(upb_msgdef *m) { - for (upb_field_count_t i = 0; i < m->num_fields; i++) - upb_fielddef_uninit(&m->fields[i]); - free(m->fields); + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_fielddef_uninit(upb_msg_iter_field(i)); upb_strtable_free(&m->ntof); upb_inttable_free(&m->itof); upb_def_uninit(&m->base); @@ -479,6 +464,13 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { upb_def_ref(def); } +upb_msg_iter upb_msg_begin(upb_msgdef *m) { + return upb_inttable_begin(&m->itof); +} + +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { + return upb_inttable_next(&m->itof, &iter->e); +} /* symtab internal ***********************************************************/ @@ -601,8 +593,9 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) } else { UPB_UPCAST(m)->search_depth = ++depth; bool cycle_found = false; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_issubmsg(f)) continue; upb_def *sub_def = f->def; upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); @@ -632,8 +625,9 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // Type names are resolved relative to the message in which they appear. upb_string *base = e->e.key; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_hasdef(f)) continue; // No resolving necessary. upb_string *name = upb_downcast_unresolveddef(f->def)->name; @@ -873,7 +867,6 @@ typedef struct { upb_wire_type_t wire_type; upb_strlen_t delimited_len; upb_strlen_t stack[UPB_MAX_NESTING], *top; - upb_string *str; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -929,6 +922,12 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) return &d->field; } +static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { + upb_string_substr(str, d->input, d->offset, d->delimited_len); + d->offset += d->delimited_len; + return true; +} + static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) { switch(d->wire_type) { @@ -950,11 +949,6 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) return true; } -static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { - upb_string_substr(str, d->input, d->offset, d->delimited_len); - return true; -} - static bool upb_baredecoder_skipval(upb_baredecoder *d) { upb_value val; @@ -986,7 +980,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); - d->str = upb_string_new(); d->top = &d->stack[0]; upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; @@ -995,7 +988,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); - upb_string_unref(d->str); free(d); } @@ -1004,11 +996,8 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) return &d->src; } -upb_symtab *upb_get_descriptor_symtab() +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { - // TODO: implement sharing of symtabs, so that successive calls to this - // function will return the same symtab. - upb_symtab *symtab = upb_symtab_new(); // TODO: allow upb_strings to be static or on the stack. upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); upb_baredecoder *decoder = upb_baredecoder_new(descriptor); @@ -1017,5 +1006,4 @@ upb_symtab *upb_get_descriptor_symtab() assert(upb_ok(&status)); upb_baredecoder_free(decoder); upb_string_unref(descriptor); - return symtab; } diff --git a/core/upb_def.h b/core/upb_def.h index 5c8c11e..82d8520 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -135,11 +135,6 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; - size_t size; - upb_field_count_t num_fields; - uint32_t set_flags_bytes; - uint32_t num_required_fields; // Required fields have the lowest set bytemasks. - upb_fielddef *fields; // We have exclusive ownership of these. // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -170,6 +165,21 @@ INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +// Iteration over fields. The order is undefined. +// upb_msg_iter i; +// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// // ... +// } +typedef upb_itof_ent *upb_msg_iter; + +upb_msg_iter upb_msg_begin(upb_msgdef *m); +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter); +INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; } + +INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { + return iter->f; +} + /* upb_enumdef ****************************************************************/ typedef struct _upb_enumdef { @@ -178,6 +188,16 @@ typedef struct _upb_enumdef { upb_inttable iton; } upb_enumdef; +typedef struct { + upb_strtable_entry e; + uint32_t value; +} upb_ntoi_ent; + +typedef struct { + upb_inttable_entry e; + upb_string *string; +} upb_iton_ent; + typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. @@ -186,18 +206,22 @@ upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. // upb_enum_iter i; -// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { // // ... // } -typedef struct { - upb_enumdef *e; - void *state; // Internal iteration state. - upb_string *name; - upb_enumval_t val; -} upb_enum_iter; -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); -void upb_enum_next(upb_enum_iter *iter); -bool upb_enum_done(upb_enum_iter *iter); +typedef upb_iton_ent *upb_enum_iter; + +upb_enum_iter upb_enum_begin(upb_enumdef *e); +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter); +INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; } + +INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { + return iter->string; +} +INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { + return iter->e.key; +} + /* upb_symtab *****************************************************************/ @@ -252,10 +276,10 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // more useful? Maybe it should be an option. void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); -// Returns a symtab that defines google.protobuf.DescriptorProto and all other -// types that are defined in descriptor.proto. This allows you to load other -// proto types. The caller owns a ref on the returned symtab. -upb_symtab *upb_get_descriptor_symtab(); +// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab. +// This is necessary for bootstrapping, since these are the upb_defs that +// specify other defs and allow them to be loaded. +void upb_symtab_add_descriptorproto(upb_symtab *s); /* upb_def casts **************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 52172d2..ba2670e 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -88,6 +88,7 @@ struct upb_bytesrc { INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; + upb_status_init(&s->status); #ifndef DEBUG // TODO: initialize debug-mode checking. #endif diff --git a/core/upb_table.c b/core/upb_table.c index b91776c..b860204 100644 --- a/core/upb_table.c +++ b/core/upb_table.c @@ -179,7 +179,7 @@ static void strinsert(upb_strtable *t, upb_strtable_entry *e) memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ upb_strtable_entry *evictee_e = strent(t, evictee_bucket); while(1) { - assert(!upb_string_isnull(evictee_e->key)); + assert(evictee_e->key); assert(evictee_e->next != UPB_END_OF_CHAIN); if(evictee_e->next == bucket) { evictee_e->next = empty_bucket; diff --git a/tests/test_string.c b/tests/test_string.c index 5e6e2a9..5869b70 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -66,4 +66,7 @@ int main() { upb_string_unref(str); upb_string_unref(str2); + + // Unref of NULL is harmless. + upb_string_unref(NULL); } -- cgit v1.2.3 From 21ee24a7300dbdabef707457d2407b4f9187603b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 21 Jul 2010 18:59:01 -0700 Subject: Updated Lua extension to handle fielddefs. --- core/upb_def.c | 1 + core/upb_def.h | 22 ++++++++++------- core/upb_table.c | 2 +- lang_ext/lua/upb.c | 71 +++++++++++++++++++++++++++++++++++++++++++----------- tests/test_def.c | 2 ++ 5 files changed, 74 insertions(+), 24 deletions(-) (limited to 'core/upb_table.c') diff --git a/core/upb_def.c b/core/upb_def.c index 1feaf9d..e40e1f0 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -355,6 +355,7 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) f->name = NULL; f->def = NULL; f->owned = false; + f->msgdef = m; upb_fielddef *parsed_f; int32_t tmp; while((parsed_f = upb_src_getdef(src))) { diff --git a/core/upb_def.h b/core/upb_def.h index ae9e0fa..5c19a7a 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -87,23 +87,27 @@ INLINE void upb_def_unref(upb_def *def) { // is either a field of a upb_msgdef or contained inside a upb_extensiondef. // It is also reference-counted. typedef struct _upb_fielddef { - upb_atomic_refcount_t refcount; - upb_string *name; - upb_field_number_t number; - upb_field_type_t type; - upb_label_t label; upb_value default_value; + upb_string *name; + + struct _upb_msgdef *msgdef; + // For the case of an enum or a submessage, points to the def for that type. upb_def *def; - // True if we own a ref on "def" (above). This is true unless this edge is - // part of a cycle. - bool owned; + upb_atomic_refcount_t refcount; + uint32_t byte_offset; // Where in a upb_msg to find the data. // These are set only when this fielddef is part of a msgdef. - uint32_t byte_offset; // Where in a upb_msg to find the data. upb_field_count_t field_index; // Indicates set bit. + + upb_field_number_t number; + upb_field_type_t type; + upb_label_t label; + // True if we own a ref on "def" (above). This is true unless this edge is + // part of a cycle. + bool owned; } upb_fielddef; // A variety of tests about the type of a field. diff --git a/core/upb_table.c b/core/upb_table.c index b860204..a6e0a56 100644 --- a/core/upb_table.c +++ b/core/upb_table.c @@ -28,7 +28,7 @@ void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) { t->count = 0; t->entry_size = entry_size; - t->size_lg2 = 1; + t->size_lg2 = 0; while(size >>= 1) t->size_lg2++; size_t bytes = upb_table_size(t) * t->entry_size; t->mask = upb_table_size(t) - 1; diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index bfc1355..a16a187 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -15,10 +15,14 @@ // We cache all the lua objects (userdata) we vend in a weak table, indexed by // the C pointer of the object they are caching. -typedef void (*lupb_unref)(void *cobj); +typedef void (*lupb_cb)(void *cobj); + +static void lupb_nop(void *foo) { + (void)foo; +} static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, - lupb_unref unref) { + lupb_cb ref, lupb_cb unref) { // Lookup our cache in the registry (we don't put our objects in the registry // directly because we need our cache to be a weak table). lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache"); @@ -40,6 +44,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, lua_pushlightuserdata(L, cobj); lua_pushvalue(L, -2); lua_rawset(L, -4); + ref(cobj); } else { unref(cobj); } @@ -73,29 +78,21 @@ static void lupb_def_getorcreate(lua_State *L, upb_def *def) { luaL_error(L, "unknown deftype %d", def->type); type_name = NULL; // Placate the compiler. } - return lupb_cache_getorcreate(L, def, type_name, lupb_def_unref); + return lupb_cache_getorcreate(L, def, type_name, lupb_nop, lupb_def_unref); } +// msgdef + static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { return luaL_checkudata(L, narg, "upb.msgdef"); } -static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { - return luaL_checkudata(L, narg, "upb.enumdef"); -} - static int lupb_msgdef_gc(lua_State *L) { lupb_def *ldef = lupb_msgdef_check(L, 1); upb_def_unref(ldef->def); return 0; } -static int lupb_enumdef_gc(lua_State *L) { - lupb_def *ldef = lupb_enumdef_check(L, 1); - upb_def_unref(ldef->def); - return 0; -} - static const struct luaL_Reg lupb_msgdef_mm[] = { {"__gc", lupb_msgdef_gc}, {NULL, NULL} @@ -105,6 +102,18 @@ static const struct luaL_Reg lupb_msgdef_m[] = { {NULL, NULL} }; +// enumdef + +static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.enumdef"); +} + +static int lupb_enumdef_gc(lua_State *L) { + lupb_def *ldef = lupb_enumdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + static const struct luaL_Reg lupb_enumdef_mm[] = { {"__gc", lupb_enumdef_gc}, {NULL, NULL} @@ -115,6 +124,40 @@ static const struct luaL_Reg lupb_enumdef_m[] = { }; +/* lupb_fielddef **************************************************************/ + +typedef struct { + upb_fielddef *field; +} lupb_fielddef; + +static void lupb_fielddef_ref(void *cobj) { + upb_def_ref(UPB_UPCAST(((upb_fielddef*)cobj)->msgdef)); +} + +static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) { + lupb_cache_getorcreate(L, f, "upb.fielddef", lupb_fielddef_ref, lupb_nop); +} + +static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.fielddef"); +} + +static int lupb_fielddef_gc(lua_State *L) { + lupb_fielddef *lfielddef = lupb_fielddef_check(L, 1); + upb_def_unref(UPB_UPCAST(lfielddef->field->msgdef)); + return 0; +} + +static const struct luaL_Reg lupb_fielddef_mm[] = { + {"__gc", lupb_fielddef_gc}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_fielddef_m[] = { + {NULL, NULL} +}; + + /* lupb_symtab ****************************************************************/ typedef struct { @@ -193,7 +236,7 @@ static const struct luaL_Reg lupb_symtab_mm[] = { static int lupb_symtab_new(lua_State *L) { upb_symtab *s = upb_symtab_new(); - lupb_cache_getorcreate(L, s, "upb.symtab", lupb_symtab_unref); + lupb_cache_getorcreate(L, s, "upb.symtab", lupb_nop, lupb_symtab_unref); return 1; } diff --git a/tests/test_def.c b/tests/test_def.c index e6f95d7..732835d 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -14,6 +14,8 @@ int main() { } free(defs); + printf("Size: %zd\n", sizeof(upb_ntof_ent)); + upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); upb_def *fds = upb_symtab_lookup(s, str); assert(fds != NULL); -- cgit v1.2.3