From 992a03be55faf83d794b9ec5e8c4ca7e78c08a9b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 9 Jun 2010 15:55:02 -0700 Subject: More decoder work, first attempts at compiling it. --- src/upb.c | 28 ----------------- src/upb.h | 26 ---------------- src/upb_decoder.c | 90 +++++++++++++++++++++++++++++-------------------------- src/upb_def.h | 18 +++++------ src/upb_srcsink.h | 16 ++++++++-- src/upb_string.h | 4 +-- src/upb_table.c | 2 +- src/upb_table.h | 5 ++-- 8 files changed, 77 insertions(+), 112 deletions(-) (limited to 'src') diff --git a/src/upb.c b/src/upb.c index 5d145e5..938c72d 100644 --- a/src/upb.c +++ b/src/upb.c @@ -10,34 +10,6 @@ #include "upb.h" -#define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(proto_type, wire_type, ctype) \ - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \ - {alignof(ctype), sizeof(ctype), wire_type, #ctype}, - -// With packed fields, any type expecting 32-bit, 64-bit or varint can instead -// receive delimited. -upb_type_info upb_types[] = { - TYPE_INFO(DOUBLE, (1<> 3; } -INLINE upb_wiretype_t upb_key_wiretype(upb_key key) { return key & 0x07; } - /* Polymorphic values of .proto types *****************************************/ // INTERNAL-ONLY: never refer to these types with a tag ("union", "struct"). diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 5b9e962..916f0db 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -9,11 +9,10 @@ #include #include #include -#include "upb_def.h" /* Functions to read wire values. *********************************************/ -const int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val); +int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val); // Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that >=10 // bytes are available at buf. Returns the number of bytes consumed, or 11 if @@ -22,13 +21,9 @@ INLINE uint8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val) { // We inline this common case (1-byte varints), if that fails we dispatch to // the full (non-inlined) version. - int8_t ret = 1; *val = *buf & 0x7f; - if(*buf & 0x80) { - // Varint is >1 byte. - ret += upb_get_v_uint64_full(buf + 1, val); - } - return ret; + if((*buf & 0x80) == 0) return 1; + return upb_get_v_uint64_full(buf + 1, val); } // Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit @@ -36,7 +31,7 @@ INLINE uint8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val) INLINE uint8_t upb_get_v_uint32(const uint8_t *buf, uint32_t *val) { uint64_t val64; - int8_t ret = upb_get_v_uint64(buf, end, &val64, status); + int8_t ret = upb_get_v_uint64(buf, &val64); *val = (uint32_t)val64; // Discard the high bits. return ret; } @@ -56,7 +51,7 @@ INLINE void upb_get_f_uint32(const uint8_t *buf, uint32_t *val) // Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller // promises that 8 bytes are available at buf. -INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val) +INLINE void upb_get_f_uint64(const uint8_t *buf, uint64_t *val) { #if UPB_UNALIGNED_READS_OK *val = *(uint64_t*)buf; @@ -71,32 +66,27 @@ INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val) // Skips a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that 10 // bytes are available at "buf". Returns the number of bytes that were // skipped. -INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf) +INLINE const uint8_t upb_skip_v_uint64(const uint8_t *buf) { const uint8_t *const maxend = buf + 10; uint8_t last = 0x80; - for(; buf < (uint8_t*)end && (last & 0x80); buf++) + for(; buf < maxend && (last & 0x80); buf++) last = *buf; - if(buf > maxend) return -1; - return buf; + return } -// Parses a 64-bit varint that is known to be >= 2 bytes (the inline version -// handles 1 and 2 byte varints). -const uint8_t upb_get_v_uint64_full(const uint8_t *buf uint64_t *val) +// Parses remining bytes of a 64-bit varint that has already had its first byte +// parsed. +const uint8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val) { - const uint8_t *const maxend = buf + 9; - uint8_t last = 0x80; - int bitpos; + uint8_t bytes = 0; - for(bitpos = 0; buf < (uint8_t*)maxend && (last & 0x80); buf++, bitpos += 7) - *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; + // bitpos starts at 7 because our caller already read one byte. + for(int bitpos = 7; bytes < 10 && (*buf & 0x80); buf++, bitpos += 7) + *val |= (uint64_t)(*buf & 0x7F) << bitpos; - if(buf >= maxend) { - return -11; - } - return buf; + return bytes; } // Performs zig-zag decoding, which is used by sint32 and sint64. @@ -136,6 +126,12 @@ struct upb_decoder { // The overall stream offset of the end of "buf". If "buf" is NULL, it is as // if "buf" was the empty string. uint32_t buf_endoffset; + + // Fielddef for the key we just read. + upb_fielddef *field; + + // Wire type of the key we just read. + upb_wire_type_t wire_type; }; @@ -187,7 +183,7 @@ static void upb_decoder_advancebuf(upb_decoder *d) } } -static void upb_decoder_pullnextbuf(upb_decoder *d) +static bool upb_decoder_pullnextbuf(upb_decoder *d) { if(!d->nextbuf) { d->nextbuf = upb_bytesrc_get(d->bytesrc); @@ -200,21 +196,28 @@ static void upb_decoder_pullnextbuf(upb_decoder *d) return true; } -static void upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) +static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) { d->buf_bytesleft -= bytes; while(d->buf_bytesleft <= 0 && !upb_bytesrc_eof(d->bytesrc)) { - upb_decoder_pullnextbuf(d); + if(!upb_decoder_pullnextbuf(d)) return false; upb_decoder_advancebuf(d); } + return true; } -static void upb_decoder_skipgroup(upb_decoder *d) +static bool upb_decoder_skipgroup(upb_decoder *d) { - // This will be mututally recursive if the group has sub-groups. If we - // wanted to handle EAGAIN in the future, this approach would not work; - // we would need to track the group depth explicitly. - while(upb_decoder_getdef(d)) upb_decoder_skipval(d); + // This will be mututally recursive with upb_decoder_skipval() if the group + // has sub-groups. If we wanted to handle EAGAIN in the future, this + // approach would not work; we would need to track the group depth + // explicitly. + while(upb_decoder_getdef(d)) { + if(!upb_decoder_skipval(d)) return false; + } + // If we are at the end of the group like we want to be, then + // upb_decoder_getdef() returned NULL because of eof, not error. + return upb_ok(&d->status); } static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, int32_t *bytes) @@ -266,14 +269,16 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d) again: uint32_t key; + upb_wire_type_t wire_type; if(!upb_decoder_get_v_uint32(d, &key)) { return NULL; + wire_type = key & 0x7; - if(d->key.wire_type == UPB_WIRE_TYPE_DELIMITED) { + if(wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. if(!upb_decoder_get_v_uint32(d, &d->delim_len)) return NULL; - } else if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) { + } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) { if(isgroup(d->top->submsg_end)) { d->eof = true; } else { @@ -285,14 +290,14 @@ again: } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, upb_fieldnum_from_key(key)); + upb_fielddef *f = upb_msg_itof(d->top->msgdef, key >> 3); if (!f) { // Unknown field. If/when the upb_src interface supports reporting // unknown fields we will implement that here. upb_decoder_skipval(d); goto again; - } else if (!upb_check_type(upb_wiretype_from_key(key), f->type)) { + } else if (!upb_check_type(wire_type, f->type)) { // This is a recoverable error condition. We skip the value but also // return NULL and report the error. upb_decoder_skipval(d); @@ -301,6 +306,7 @@ again: return NULL; } d->field = f; + d->wire_type = wire_type; return f; } @@ -379,14 +385,14 @@ bool upb_decoder_skipval(upb_decoder *d) { case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64(buf, end, status); case UPB_WIRE_TYPE_64BIT: - return upb_skip_bytes(8); + return upb_decoder_skipbytes(8); case UPB_WIRE_TYPE_32BIT: - return upb_skip_bytes(4); + return upb_decoder_skipbytes(4); case UPB_WIRE_TYPE_START_GROUP: - return upb_skip_groups(1); + return upb_decoder_skipgroup(); case UPB_WIRE_TYPE_DELIMITED: // Works for both string/bytes *and* submessages. - return upb_skip_bytes(d->delimited_len); + return upb_decoder_skipbytes(d->delimited_len); default: // Including UPB_WIRE_TYPE_END_GROUP. assert(false); diff --git a/src/upb_def.h b/src/upb_def.h index dd9dc07..a571730 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -53,7 +53,7 @@ enum upb_def_type { typedef int8_t upb_def_type_t; typedef struct { - upb_strptr fqname; // Fully qualified. + upb_string *fqname; // Fully qualified. upb_atomic_refcount_t refcount; upb_def_type_t type; @@ -90,7 +90,7 @@ typedef struct _upb_fielddef { upb_field_type_t type; upb_label_t label; upb_field_number_t number; - upb_strptr name; + upb_string *name; upb_value default_value; // These are set only when this fielddef is part of a msgdef. @@ -163,7 +163,7 @@ INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) { return e ? e->f : NULL; } -INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_strptr name) { +INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); return e ? e->f : NULL; } @@ -179,8 +179,8 @@ typedef struct _upb_enumdef { typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoi(upb_enumdef *e, upb_strptr name, upb_enumval_t *num); -upb_strptr upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); +bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); +upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. // upb_enum_iter i; @@ -190,7 +190,7 @@ upb_strptr upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); typedef struct { upb_enumdef *e; void *state; // Internal iteration state. - upb_strptr name; + upb_string *name; upb_enumval_t val; } upb_enum_iter; void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); @@ -232,11 +232,11 @@ INLINE void upb_symtab_unref(upb_symtab *s) { // // If a def is found, the caller owns one ref on the returned def. Otherwise // returns NULL. -upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol); +upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); // Find an entry in the symbol table with this exact name. If a def is found, // the caller owns one ref on the returned def. Otherwise returns NULL. -upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym); +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); // Gets an array of pointers to all currently active defs in this symtab. The // caller owns the returned array (which is of length *count) as well as a ref @@ -249,7 +249,7 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // defined in desc). desc may not attempt to define any names that are already // defined in this symtab. Caller retains ownership of desc. status indicates // whether the operation was successful or not, and the error message (if any). -void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status); +void upb_symtab_add_desc(upb_symtab *s, upb_string *desc, upb_status *status); /* upb_def casts **************************************************************/ diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 3a57cc8..4a3d1e3 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -28,6 +28,9 @@ extern "C" { // TODO: decide how to handle unknown fields. +struct upb_src; +typedef struct upb_src upb_src; + // Retrieves the fielddef for the next field in the stream. Returns NULL on // error or end-of-stream. upb_fielddef *upb_src_getdef(upb_src *src); @@ -53,6 +56,9 @@ upb_status *upb_src_status(upb_src *src); /* upb_sink *******************************************************************/ +struct upb_sink; +typedef struct upb_sink upb_sink; + // Puts the given fielddef into the stream. bool upb_sink_putdef(upb_sink *sink, upb_fielddef *def); @@ -70,6 +76,9 @@ upb_status *upb_sink_status(upb_sink *sink); /* upb_bytesrc ****************************************************************/ +struct upb_bytesrc; +typedef struct upb_bytesrc upb_bytesrc; + // Returns the next string in the stream. NULL is returned on error or eof. // The string must be at least "minlen" bytes long unless the stream is eof. // @@ -89,6 +98,9 @@ upb_status *upb_bytesrc_status(upb_src *src); /* upb_bytesink ***************************************************************/ +struct upb_bytesink; +typedef struct upb_bytesink upb_bytesink; + // Puts the given string. Returns the number of bytes that were actually, // consumed, which may be fewer than were in the string, or <0 on error. int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); @@ -137,14 +149,14 @@ typedef struct { // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. -typedef struct { +struct upb_src { upb_src_vtable *vtbl; upb_status status; bool eof; #ifndef NDEBUG int state; // For debug-mode checking of API usage. #endif -} upb_src; +}; INLINE void upb_sink_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; diff --git a/src/upb_string.h b/src/upb_string.h index c0d14d5..2c0303d 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -44,7 +44,7 @@ typedef struct _upb_string { // Used if this is a slice of another string. struct _upb_string *src; // Used if this string is referencing external unowned memory. - upb_stomic_refcount_t reader_count; + upb_atomic_refcount_t reader_count; } extra; } upb_string; @@ -126,7 +126,7 @@ INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { // Replaces the contents of "dest" with the contents of "src". INLINE void upb_strcpy(upb_string *dest, upb_string *src) { - upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src)); + upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src)); upb_string_endread(src); } diff --git a/src/upb_table.c b/src/upb_table.c index a477121..51a9f21 100644 --- a/src/upb_table.c +++ b/src/upb_table.c @@ -5,7 +5,7 @@ */ #include "upb_table.h" -#include "upb_data.h" +#include "upb_string.h" #include #include diff --git a/src/upb_table.h b/src/upb_table.h index 122aed3..20dae92 100644 --- a/src/upb_table.h +++ b/src/upb_table.h @@ -17,6 +17,7 @@ #include #include "upb.h" +#include "upb_string.h" #ifdef __cplusplus extern "C" { @@ -38,7 +39,7 @@ typedef struct { // performance by letting us compare hashes before comparing lengths or the // strings themselves. typedef struct { - upb_strptr key; // We own a frozen ref. + upb_string *key; // We own a ref. uint32_t next; // Internal chaining. } upb_strtable_entry; @@ -114,7 +115,7 @@ INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { return upb_inttable_fastlookup(t, key, t->t.entry_size); } -void *upb_strtable_lookup(upb_strtable *t, upb_strptr key); +void *upb_strtable_lookup(upb_strtable *t, upb_string *key); /* Provides iteration over the table. The order in which the entries are * returned is undefined. Insertions invalidate iterators. The _next -- cgit v1.2.3