From b5f5ee867e6c91b77490dc8894236f17a47bde00 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 23 Nov 2011 16:19:22 -0800 Subject: Refinement of upb_bytesrc interface. Added a upb_byteregion that tracks a region of the input buffer; decoders use this instead of using a upb_bytesrc directly. upb_byteregion is also used as the way of passing a string to a upb_handlers callback. This symmetry makes decoders compose better; if you want to take a parsed string and decode it as something else, you can take the string directly from the callback and feed it as input to another parser. A commented-out version of a pinning interface is present; I decline to actually implement it (and accept its extra complexity) until/unless it is clear that it is actually a win. But it is included as a proof-of-concept, to show that it fits well with the existing interface. --- benchmarks/parsestream.upb.c | 3 +- benchmarks/parsetoproto2.upb.cc | 24 ++- benchmarks/parsetostruct.upb.c | 8 +- tests/test_cpp.cc | 1 + tests/test_decoder.c | 2 +- tests/test_varint.c | 1 + tests/tests.c | 8 +- upb/bytestream.c | 161 ++++++++++------- upb/bytestream.h | 378 +++++++++++++++++++++++++++------------- upb/def.c | 30 ++-- upb/descriptor.c | 15 +- upb/handlers.h | 10 +- upb/msg.c | 11 +- upb/pb/decoder.c | 157 ++++++++--------- upb/pb/decoder.h | 38 ++-- upb/pb/decoder_x64.dasc | 47 +++-- upb/pb/glue.c | 13 +- upb/pb/textprinter.c | 13 +- upb/pb/varint.h | 2 +- upb/table.h | 1 + upb/upb.c | 6 +- upb/upb.h | 42 +++-- 22 files changed, 575 insertions(+), 396 deletions(-) diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c index 4d13e9d..19d8ccf 100644 --- a/benchmarks/parsestream.upb.c +++ b/benchmarks/parsestream.upb.c @@ -76,8 +76,7 @@ static size_t run(int i) (void)i; upb_status status = UPB_STATUS_INIT; upb_stringsrc_reset(&stringsrc, input_str, input_len); - upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc), - 0, UPB_NONDELIMITED, NULL); + upb_decoder_reset(&decoder, upb_stringsrc_allbytes(&stringsrc), NULL); upb_decoder_decode(&decoder, &status); if(!upb_ok(&status)) goto err; return input_len; diff --git a/benchmarks/parsetoproto2.upb.cc b/benchmarks/parsetoproto2.upb.cc index 75cd10c..03a1039 100644 --- a/benchmarks/parsetoproto2.upb.cc +++ b/benchmarks/parsetoproto2.upb.cc @@ -24,6 +24,7 @@ #include #undef private +char *str; static size_t len; MESSAGE_CIDENT msg[NUM_MESSAGES]; MESSAGE_CIDENT msg2; @@ -53,9 +54,13 @@ upb_flow_t proto2_setstr(void *m, upb_value fval, upb_value val) { const upb_fielddef *f = upb_value_getfielddef(fval); std::string **str = (std::string**)UPB_INDEX(m, f->offset, 1); if (*str == f->default_ptr) *str = new std::string; - const upb_strref *ref = upb_value_getstrref(val); + const upb_byteregion *ref = upb_value_getbyteregion(val); + uint32_t len; + (*str)->assign( + upb_byteregion_getptr(ref, upb_byteregion_startofs(ref), &len), + upb_byteregion_len(ref)); + assert(len == upb_byteregion_len(ref)); // XXX: only supports contiguous strings atm. - (*str)->assign(ref->ptr, ref->len); return UPB_CONTINUE; } @@ -64,9 +69,13 @@ upb_flow_t proto2_append_str(void *_r, upb_value fval, upb_value val) { typedef google::protobuf::RepeatedPtrField R; (void)fval; R *r = (R*)_r; - const upb_strref *ref = upb_value_getstrref(val); + const upb_byteregion *ref = upb_value_getbyteregion(val); // XXX: only supports contiguous strings atm. - r->Add()->assign(ref->ptr, ref->len); + uint32_t len; + r->Add()->assign( + upb_byteregion_getptr(ref, upb_byteregion_startofs(ref), &len), + upb_byteregion_len(ref)); + assert(len == upb_byteregion_len(ref)); return UPB_CONTINUE; } @@ -265,7 +274,7 @@ static bool initialize() upb_symtab_unref(s); // Read the message data itself. - char *str = upb_readfile(MESSAGE_FILE, &len); + str = upb_readfile(MESSAGE_FILE, &len); if(str == NULL) { fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return false; @@ -275,7 +284,6 @@ static bool initialize() msg2.ParseFromArray(str, len); upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str, len); upb_handlers *h = upb_handlers_new(); upb_accessors_reghandlers(h, def); if (!JIT) h->should_jit = false; @@ -296,8 +304,8 @@ static size_t run(int i) (void)i; upb_status status = UPB_STATUS_INIT; msg[i % NUM_MESSAGES].Clear(); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), - 0, UPB_NONDELIMITED, &msg[i % NUM_MESSAGES]); + upb_stringsrc_reset(&strsrc, str, len); + upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &msg[i % NUM_MESSAGES]); upb_decoder_decode(&d, &status); if(!upb_ok(&status)) goto err; return len; diff --git a/benchmarks/parsetostruct.upb.c b/benchmarks/parsetostruct.upb.c index 5e7aa35..4eeafbb 100644 --- a/benchmarks/parsetostruct.upb.c +++ b/benchmarks/parsetostruct.upb.c @@ -8,6 +8,7 @@ #include "upb/pb/glue.h" static const upb_msgdef *def; +char *str; static size_t len; static void *msg[NUM_MESSAGES]; static upb_stringsrc strsrc; @@ -33,7 +34,7 @@ static bool initialize() upb_symtab_unref(s); // Read the message data itself. - char *str = upb_readfile(MESSAGE_FILE, &len); + str = upb_readfile(MESSAGE_FILE, &len); if(str == NULL) { fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return false; @@ -43,7 +44,6 @@ static bool initialize() msg[i] = upb_stdmsg_new(def); upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str, len); upb_handlers *h = upb_handlers_new(); upb_accessors_reghandlers(h, def); if (!JIT) h->should_jit = false; @@ -70,8 +70,8 @@ static size_t run(int i) upb_status status = UPB_STATUS_INIT; i %= NUM_MESSAGES; upb_msg_clear(msg[i], def); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), - 0, UPB_NONDELIMITED, msg[i]); + upb_stringsrc_reset(&strsrc, str, len); + upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg[i]); upb_decoder_decode(&d, &status); if(!upb_ok(&status)) goto err; return len; diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc index 11542bf..ecf27bf 100644 --- a/tests/test_cpp.cc +++ b/tests/test_cpp.cc @@ -7,6 +7,7 @@ * Tests for C++ wrappers. */ +#include #include #include "upb/def.hpp" #include "upb/pb/glue.hpp" diff --git a/tests/test_decoder.c b/tests/test_decoder.c index 1994501..84a90cd 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -52,7 +52,7 @@ int main(int argc, char *argv[]) { upb_decoder d; upb_decoder_init(&d, handlers); - upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UPB_NONDELIMITED, p); + upb_decoder_reset(&d, upb_stdio_allbytes(&in), p); upb_status_clear(&status); upb_decoder_decode(&d, &status); diff --git a/tests/test_varint.c b/tests/test_varint.c index 7dce6ab..4c076b3 100644 --- a/tests/test_varint.c +++ b/tests/test_varint.c @@ -4,6 +4,7 @@ * Copyright (c) 2011 Google Inc. See LICENSE for details. */ +#include #include "upb/pb/varint.h" #include "upb_test.h" diff --git a/tests/tests.c b/tests/tests.c index e8c335b..83fb3ef 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -16,15 +16,16 @@ static upb_symtab *load_test_proto() { ASSERT(s); upb_status status = UPB_STATUS_INIT; if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) { - fprintf(stderr, "Error loading descriptor file: %s\n", upb_status_getstr(&status)); + fprintf(stderr, "Error loading descriptor file: %s\n", + upb_status_getstr(&status)); exit(1); } upb_status_uninit(&status); return s; } -static upb_flow_t upb_test_onvalue(void *closure, upb_value fval, upb_value val) { - (void)closure; +static upb_flow_t upb_test_onvalue(void *c, upb_value fval, upb_value val) { + (void)c; (void)fval; (void)val; return UPB_CONTINUE; @@ -56,6 +57,7 @@ static void test_upb_symtab() { upb_symtab_unref(s); const upb_msgdef *m = upb_downcast_msgdef_const(def); upb_msg_iter i = upb_msg_begin(m); + ASSERT(!upb_msg_done(i)); upb_fielddef *f = upb_msg_iter_field(i); ASSERT(upb_hassubdef(f)); upb_def *def2 = f->def; diff --git a/upb/bytestream.c b/upb/bytestream.c index 41a84b0..135f269 100644 --- a/upb/bytestream.c +++ b/upb/bytestream.c @@ -14,29 +14,33 @@ // We can make this configurable if necessary. #define BUF_SIZE 32768 -char *upb_strref_dup(const struct _upb_strref *r) { - char *ret = (char*)malloc(r->len + 1); - upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret); - ret[r->len] = '\0'; +char *upb_byteregion_strdup(const struct _upb_byteregion *r) { + char *ret = malloc(upb_byteregion_len(r) + 1); + upb_byteregion_copyall(r, ret); + ret[upb_byteregion_len(r)] = '\0'; return ret; } -upb_strref *upb_strref_new(const char *str) { - return upb_strref_newl(str, strlen(str)); +upb_byteregion *upb_byteregion_new(const void *str) { + return upb_byteregion_newl(str, strlen(str)); } -upb_strref *upb_strref_newl(const void *str, size_t len) { - upb_strref *s = malloc(sizeof(*s)); - s->bytesrc = NULL; - s->ptr = malloc(len); - memcpy((void*)s->ptr, str, len); - return s; +upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len) { + upb_stringsrc *src = malloc(sizeof(*src)); + upb_stringsrc_init(src); + char *ptr = malloc(len + 1); + memcpy(ptr, str, len); + ptr[len] = '\0'; + upb_stringsrc_reset(src, ptr, len); + return upb_stringsrc_allbytes(src); } -void upb_strref_free(upb_strref *ref) { - if (!ref) return; - free((char*)ref->ptr); - free(ref); +void upb_byteregion_free(upb_byteregion *r) { + if (!r) return; + uint32_t len; + free((char*)upb_byteregion_getptr(r, 0, &len)); + upb_stringsrc_uninit((upb_stringsrc*)r->bytesrc); + free(r->bytesrc); } void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl) { @@ -48,6 +52,31 @@ void upb_bytesink_uninit(upb_bytesink *sink) { upb_status_uninit(&sink->status); } +void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src, + uint64_t ofs, uint64_t len) { + assert(ofs >= upb_byteregion_startofs(src)); + assert(len <= upb_byteregion_remaining(src, ofs)); + r->bytesrc = src->bytesrc; + r->toplevel = false; + r->start = ofs; + r->discard = ofs; + r->end = ofs + len; + r->fetch = UPB_MIN(src->fetch, r->end); +} + +bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s) { + uint64_t fetchable = upb_byteregion_remaining(r, r->fetch); + if (fetchable == 0) { + upb_status_seteof(s); + return false; + } + uint64_t num = upb_bytesrc_fetch(r->bytesrc, r->fetch, s); + if (num == 0) return false; + r->fetch += UPB_MIN(num, fetchable); + return true; +} + + /* upb_stdio ******************************************************************/ int upb_stdio_cmpbuf(const void *_key, const void *_elem) { @@ -86,63 +115,54 @@ static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) { return s->bufs[s->nbuf-num_reused]; } -size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) { +void upb_stdio_discard(void *src, uint64_t ofs) { + (void)src; + (void)ofs; +} + +uint32_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) { (void)ofs; upb_stdio *stdio = (upb_stdio*)src; upb_stdio_buf *buf = upb_stdio_rotatebufs(stdio); - size_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file); - if(read < (size_t)BUF_SIZE) { + uint32_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file); + buf->len = read; + if(read < (uint32_t)BUF_SIZE) { // Error or EOF. - if(feof(stdio->file)) return 0; + if(feof(stdio->file)) { + upb_status_seteof(s); + return read; + } if(ferror(stdio->file)) { upb_status_fromerrno(s); - return -1; + return 0; } assert(false); } - buf->len = read; return buf->ofs + buf->len; } -void upb_stdio_read(const void *src, uint64_t src_ofs, size_t len, char *dst) { - upb_stdio_buf *buf = upb_stdio_findbuf(src, src_ofs); - src_ofs -= buf->ofs; - memcpy(dst, &buf->data[src_ofs], BUF_SIZE - src_ofs); - len -= (BUF_SIZE - src_ofs); - dst += (BUF_SIZE - src_ofs); +void upb_stdio_read(const void *src, uint64_t ofs, uint32_t len, char *dst) { + upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); + ofs -= buf->ofs; + memcpy(dst, buf->data + ofs, BUF_SIZE - ofs); + len -= (BUF_SIZE - ofs); + dst += (BUF_SIZE - ofs); while (len > 0) { ++buf; - size_t bytes = UPB_MIN(len, BUF_SIZE); + uint32_t bytes = UPB_MIN(len, BUF_SIZE); memcpy(dst, buf->data, bytes); len -= bytes; dst += bytes; } } -const char *upb_stdio_getptr(void *src, uint64_t ofs, size_t *len) { +const char *upb_stdio_getptr(const void *src, uint64_t ofs, uint32_t *len) { upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); ofs -= buf->ofs; *len = BUF_SIZE - ofs; return &buf->data[ofs]; } -void upb_stdio_refregion(void *src, uint64_t ofs, size_t len) { - upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); - len -= (BUF_SIZE - ofs); - ++buf->refcount; - while (len > 0) { - len -= BUF_SIZE; - ++buf; - ++buf->refcount; - } -} - -void upb_stdio_unrefregion(void *src, uint64_t ofs, size_t len) { - (void)src; - (void)ofs; - (void)len; -} - #if 0 upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); @@ -154,7 +174,6 @@ upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *s } return written; } -#endif uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, const char *fmt, va_list args) { @@ -166,16 +185,14 @@ uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, } return written; } +#endif void upb_stdio_init(upb_stdio *stdio) { static upb_bytesrc_vtbl bytesrc_vtbl = { - upb_stdio_fetch, - upb_stdio_read, - upb_stdio_getptr, - upb_stdio_refregion, - upb_stdio_unrefregion, - NULL, - NULL + &upb_stdio_fetch, + &upb_stdio_discard, + &upb_stdio_read, + &upb_stdio_getptr, }; upb_bytesrc_init(&stdio->src, &bytesrc_vtbl); @@ -209,26 +226,32 @@ void upb_stdio_uninit(upb_stdio *stdio) { stdio->file = NULL; } -upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; } +upb_byteregion* upb_stdio_allbytes(upb_stdio *stdio) { return &stdio->byteregion; } upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; } /* upb_stringsrc **************************************************************/ -size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) { +uint32_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) { upb_stringsrc *src = _src; - (void)s; // No errors can occur. + upb_status_seteof(s); return src->len - ofs; } -void upb_stringsrc_read(const void *_src, uint64_t src_ofs, - size_t len, char *dst) { +void upb_stringsrc_read(const void *_src, uint64_t ofs, + uint32_t len, char *dst) { const upb_stringsrc *src = _src; - memcpy(dst, src->str + src_ofs, len); + assert(ofs + len <= src->len); + memcpy(dst, src->str + ofs, len); } -const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) { - upb_stringsrc *src = _src; +void upb_stringsrc_discard(void *src, uint64_t ofs) { + (void)src; + (void)ofs; +} + +const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, uint32_t *len) { + const upb_stringsrc *src = _s; *len = src->len - ofs; return src->str + ofs; } @@ -236,17 +259,23 @@ const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) { void upb_stringsrc_init(upb_stringsrc *s) { static upb_bytesrc_vtbl vtbl = { &upb_stringsrc_fetch, + &upb_stringsrc_discard, &upb_stringsrc_read, &upb_stringsrc_getptr, - NULL, NULL, NULL, NULL }; upb_bytesrc_init(&s->bytesrc, &vtbl); s->str = NULL; + s->byteregion.bytesrc = &s->bytesrc; + s->byteregion.toplevel = true; } -void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) { +void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len) { s->str = str; s->len = len; + s->byteregion.start = 0; + s->byteregion.discard = 0; + s->byteregion.fetch = 0; + s->byteregion.end = len; } void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; } @@ -262,7 +291,7 @@ void upb_stringsink_uninit(upb_stringsink *s) { free(s->str); } -void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) { +void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t size) { free(s->str); s->str = str; s->len = 0; diff --git a/upb/bytestream.h b/upb/bytestream.h index cbaef48..3b339f1 100644 --- a/upb/bytestream.h +++ b/upb/bytestream.h @@ -4,19 +4,73 @@ * Copyright (c) 2011 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * This file contains upb_bytesrc and upb_bytesink, which are abstractions of - * stdio (fread()/fwrite()/etc) that provide useful buffering/sharing - * semantics. They are virtual base classes so concrete implementations - * can get the data from a fd, a string, a cord, etc. + * This file defines three core interfaces: + * - upb_bytesink: for writing streams of data. + * - upb_bytesrc: for reading streams of data. + * - upb_byteregion: for reading from a specific region of a upb_bytesrc; + * should be used by decoders instead of using upb_bytesrc directly. * - * Byte streams are NOT thread-safe! (Like f{read,write}_unlocked()) - * This may change (in particular, bytesrc objects may be better thread-safe). + * These interfaces are used by streaming encoders and decoders: for example, a + * protobuf parser gets its input from a upb_byteregion. They are virtual base + * classes so concrete implementations can get the data from a fd, a FILE*, a + * string, etc. */ +// A upb_byteregion represents a region of data from a bytesrc. +// +// Parsers get data from this interface instead of a bytesrc because we often +// want to parse only a specific region of the input. For example, if we parse +// a string from our input but know that the string represents a protobuf, we +// can pass its upb_byteregion to an appropriate protobuf parser. +// +// Since the bytes may be coming from a file or network socket, bytes must be +// fetched before they can be read (though in some cases this fetch may be a +// no-op). "fetch" is the only operation on a byteregion that could fail or +// block, because it is the only operation that actually performs I/O. +// +// Bytes can be discarded when they are no longer needed. Parsers should +// always discard bytes they no longer need, both so the buffers can be freed +// when possible and to give better visibility into what bytes the parser is +// still using. +// +// start discard read fetch end +// ofs ofs ofs ofs ofs +// | |--->discard() | |--->fetch() | +// V V V V V +// +-------------+-------------------------+-----------------+-----------------+ +// | discarded | | | fetchable | +// +-------------+-------------------------+-----------------+-----------------+ +// | <------------- loaded ------------------> | +// | <- available -> | +// | <---------- remaining ----------> | +// +// Note that the start offset may be something other than zero! A byteregion +// is a view into an underlying bytesrc stream, and the region may start +// somewhere other than the beginning of that stream. +// +// The region can be either delimited or nondelimited. A non-delimited region +// will keep returning data until the underlying data source returns EOF. A +// delimited region will return EOF at a predetermined offset. +// +// end +// ofs +// | +// V +// +-----------------------+ +// | delimited region | <-- hard EOF, even if data source has more data. +// +-----------------------+ +// +// +------------------------ +// | nondelimited region Z <-- won't return EOF until data source hits EOF. +// +------------------------ + + #ifndef UPB_BYTESTREAM_H #define UPB_BYTESTREAM_H #include +#include +#include #include #include #include "upb.h" @@ -29,25 +83,22 @@ extern "C" { /* upb_bytesrc ****************************************************************/ // A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as -// they become available, and to preserve some trailing amount of data, which -// is useful for lazy parsing (among other things). If there is a submessage -// that we want to parse later we can take a reference on that region of the -// input buffer. This will guarantee that the bytesrc keeps the submessage -// data around for later use, without requiring a copy out of the input -// buffers. -typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*); -typedef void upb_bytesrc_read_func(const void*, uint64_t, size_t, char*); -typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*); -typedef void upb_bytesrc_refregion_func(void*, uint64_t, size_t); -typedef void upb_bytesrc_ref_func(void*); +// they become available, and to preserve some trailing amount of data before +// it is discarded. Consumers should not use upb_bytesrc directly, but rather +// should use a upb_byteregion (which allows access to a region of a bytesrc). +// +// upb_bytesrc is a virtual base class with implementations that get data from +// eg. a string, a cord, a file descriptor, a FILE*, etc. + +typedef uint32_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*); +typedef void upb_bytesrc_discard_func(void*, uint64_t); +typedef void upb_bytesrc_copy_func(const void*, uint64_t, uint32_t, char*); +typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, uint32_t*); typedef struct _upb_bytesrc_vtbl { upb_bytesrc_fetch_func *fetch; - upb_bytesrc_read_func *read; + upb_bytesrc_discard_func *discard; + upb_bytesrc_copy_func *copy; upb_bytesrc_getptr_func *getptr; - upb_bytesrc_refregion_func *refregion; - upb_bytesrc_refregion_func *unrefregion; - upb_bytesrc_ref_func *ref; - upb_bytesrc_ref_func *unref; } upb_bytesrc_vtbl; typedef struct { @@ -59,114 +110,198 @@ INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) { } // Fetches at least one byte starting at ofs, returning the actual number of -// bytes fetched (or 0 on error: see "s" for details). A successful return -// gives caller a ref on the fetched region. -// -// If "ofs" may be greater or equal than the end of the already-fetched region. -// It may also be less than the end of the already-fetch region *if* either of -// the following is true: -// -// * the region is ref'd (this implies that the data is still in-memory) -// * the bytesrc is seekable (this implies that the data can be fetched again). -INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) { +// bytes fetched (or 0 on EOF or error: see *s for details). Some bytesrc's +// may set EOF on *s after a successful read if no further data is available, +// but not all bytesrc's support this. It is valid for bytes to be fetched +// multiple times, as long as the bytes have not been previously discarded. +INLINE uint32_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, + upb_status *s) { return src->vtbl->fetch(src, ofs, s); } -// Copies "len" bytes of data from offset src_ofs to "dst", which must be at -// least "len" bytes long. The caller must own a ref on the given region. -INLINE void upb_bytesrc_read(const upb_bytesrc *src, uint64_t src_ofs, - size_t len, char *dst) { - src->vtbl->read(src, src_ofs, len, dst); +// Discards all data prior to ofs (except data that is pinned, if pinning +// support is added -- see TODO below). +INLINE void upb_bytesrc_discard(upb_bytesrc *src, uint64_t ofs) { + src->vtbl->discard(src, ofs); +} + +// Copies "len" bytes of data from ofs to "dst", which must be at least "len" +// bytes long. The given region must not be discarded. +INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, uint32_t len, + char *dst) { + src->vtbl->copy(src, ofs, len, dst); } // Returns a pointer to the bytesrc's internal buffer, storing in *len how much -// data is available. The caller must own refs on the given region. The -// returned buffer is valid for as long as the region remains ref'd. -// -// TODO: if more data is available than the caller has ref'd is it ok for the -// caller to read *len bytes? -INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs, - size_t *len) { +// data is available. The given offset must not be discarded. The returned +// buffer is valid for as long as its bytes are not discarded (in the case that +// part of the returned buffer is discarded, only the non-discarded bytes +// remain valid). +INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs, + uint32_t *len) { return src->vtbl->getptr(src, ofs, len); } -// Gives the caller a ref on the given region. The caller must know that the -// given region is already ref'd (for example, inside a upb_handlers callback -// that receives a upb_strref, the region is guaranteed to be ref'd -- this -// function allows that handler to take its own ref). -INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) { - src->vtbl->refregion(src, ofs, len); -} +// TODO: Add if/when there is a demonstrated need: +// +// // When the caller pins a region (which must not be already discarded), it +// // is guaranteed that the region will not be discarded (nor will the bytesrc +// // be destroyed) until the region is unpinned. However, not all bytesrc's +// // support pinning; a false return indicates that a pin was not possible. +// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, uint32_t len) { +// return src->vtbl->refregion(src, ofs, len); +// } +// +// // Releases some number of pinned bytes from the beginning of a pinned +// // region (which may be fewer than the total number of bytes pinned). +// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, uint32_t len, +// uint32_t bytes_to_release) { +// src->vtbl->unpin(src, ofs, len); +// } +// +// Adding pinning support would also involve adding a "pin_ofs" parameter to +// upb_bytesrc_fetch, so that the fetch can extend an already-pinned region. -// Releases a ref on the given region, which the caller must have previously -// ref'd. -INLINE void upb_bytesrc_unrefregion(upb_bytesrc *src, uint64_t ofs, size_t len) { - src->vtbl->unrefregion(src, ofs, len); -} -// Attempts to ref the bytesrc itself, returning false if this bytesrc is -// not ref-able. -INLINE bool upb_bytesrc_tryref(upb_bytesrc *src) { - if (src->vtbl->ref) { - src->vtbl->ref(src); - return true; - } else { - return false; - } -} +/* upb_byteregion *************************************************************/ -// Unref's the bytesrc itself. May only be called when upb_bytesrc_tryref() -// has previously returned true. -INLINE void upb_bytesrc_unref(upb_bytesrc *src) { - assert(src->vtbl->unref); - src->vtbl->unref(src); -} +#define UPB_NONDELIMITED (0xffffffffffffffffULL) +typedef struct _upb_byteregion { + uint64_t start; + uint64_t discard; + uint64_t fetch; + uint64_t end; // UPB_NONDELIMITED if nondelimited. + upb_bytesrc *bytesrc; + bool toplevel; // If true, discards hit the underlying byteregion. +} upb_byteregion; + +// Initializes a byteregion. Its initial value will be empty. No methods may +// be called on an empty byteregion except upb_byteregion_reset(). +void upb_byteregion_init(upb_byteregion *r); +void upb_byteregion_uninit(upb_byteregion *r); + +// Accessors for the regions bounds -- the meaning of these is described in the +// diagram above. +INLINE uint64_t upb_byteregion_startofs(const upb_byteregion *r) { + return r->start; +} +INLINE uint64_t upb_byteregion_discardofs(const upb_byteregion *r) { + return r->discard; +} +INLINE uint64_t upb_byteregion_fetchofs(const upb_byteregion *r) { + return r->fetch; +} +INLINE uint64_t upb_byteregion_endofs(const upb_byteregion *r) { + return r->end; +} -/* upb_strref *****************************************************************/ +// Returns how many bytes are fetched and available for reading starting +// from offset "o". +INLINE uint64_t upb_byteregion_available(const upb_byteregion *r, uint64_t o) { + assert(o >= upb_byteregion_discardofs(r)); + assert(o <= r->fetch); // Could relax this. + return r->fetch - o; +} -// The structure we pass to upb_handlers for a string value. -typedef struct _upb_strref { - // Pointer to the string data. NULL if the string spans multiple input - // buffers (in which case upb_bytesrc_getptr() must be called to obtain - // the actual pointers). - const char *ptr; +// Returns the total number of bytes remaining after offset "o", or +// UPB_NONDELIMITED if the byteregion is non-delimited. +INLINE uint64_t upb_byteregion_remaining(const upb_byteregion *r, uint64_t o) { + return r->end == UPB_NONDELIMITED ? UPB_NONDELIMITED : r->end - o; +} - // Total length of the string. - uint32_t len; +INLINE uint64_t upb_byteregion_len(const upb_byteregion *r) { + return upb_byteregion_remaining(r, r->start); +} - // Offset in the bytesrc that represents the beginning of this string. - uint32_t stream_offset; +// Sets the value of this byteregion to be a subset of the given byteregion's +// data. The caller is responsible for releasing this region before the src +// region is released (unless the region is first pinned, if pinning support is +// added. see below). +void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src, + uint64_t ofs, uint64_t len); +void upb_byteregion_release(upb_byteregion *r); + +// Attempts to fetch more data, extending the fetched range of this byteregion. +// Returns true if the fetched region was extended by at least one byte, false +// on EOF or error (see *s for details). +bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s); + +// Fetches all remaining data for "r", returning false if the operation failed +// (see "*s" for details). May only be used on delimited byteregions. +INLINE bool upb_byteregion_fetchall(upb_byteregion *r, upb_status *s) { + assert(upb_byteregion_len(r) != UPB_NONDELIMITED); + while (upb_byteregion_fetch(r, s)) ; // Empty body. + return upb_eof(s); +} - // Bytesrc from which this string data comes. May be NULL if ptr is set. If - // non-NULL, the bytesrc is only guaranteed to be alive from inside the - // callback; however if the handler knows more about its type and how to - // prolong its life, it may do so. - upb_bytesrc *bytesrc; +// Discards bytes from the byteregion up until ofs (which must be greater or +// equal to upb_byteregion_discardofs()). It is valid to discard bytes that +// have not been fetched (such bytes will never be fetched) but it is an error +// to discard past the end of a delimited byteregion. +INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) { + assert(ofs >= upb_byteregion_discardofs(r)); + assert(ofs <= upb_byteregion_endofs(r)); + r->discard = ofs; + if (r->toplevel) upb_bytesrc_discard(r->bytesrc, ofs); +} - // Possibly add optional members here like start_line, start_column, etc. -} upb_strref; +// Copies "len" bytes of data into "dst", starting at ofs. The specified +// region must be available. +INLINE void upb_byteregion_copy(const upb_byteregion *r, uint64_t ofs, + uint32_t len, char *dst) { + assert(ofs >= upb_byteregion_discardofs(r)); + assert(len <= upb_byteregion_available(r, ofs)); + upb_bytesrc_copy(r->bytesrc, ofs, len, dst); +} -// Copies the contents of the strref into a newly-allocated, NULL-terminated -// string. -char *upb_strref_dup(const struct _upb_strref *r); +// Copies all bytes from the byteregion into dst. Requires that the entire +// byteregion is fetched and that none has been discarded. +INLINE void upb_byteregion_copyall(const upb_byteregion *r, char *dst) { + assert(r->start == r->discard && r->end == r->fetch); + upb_byteregion_copy(r, r->start, upb_byteregion_len(r), dst); +} -INLINE void upb_strref_read(const struct _upb_strref *r, char *buf) { - if (r->ptr) { - memcpy(buf, r->ptr, r->len); - } else { - assert(r->bytesrc); - upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, buf); - } +// Returns a pointer to the internal buffer for the byteregion starting at +// offset "ofs." Stores the number of bytes available in this buffer in *len. +// The returned buffer is invalidated when the byteregion is reset or released, +// or when the bytes are discarded. If the byteregion is not currently pinned, +// the pointer is only valid for the lifetime of the parent byteregion. +INLINE const char *upb_byteregion_getptr(const upb_byteregion *r, + uint64_t ofs, uint32_t *len) { + assert(ofs >= upb_byteregion_discardofs(r)); + const char *ret = upb_bytesrc_getptr(r->bytesrc, ofs, len); + *len = UPB_MIN(*len, upb_byteregion_available(r, ofs)); + return ret; } -// Dynamically allocates a upb_strref object whose contents are the given -// string. The given string data is copied into the strref, which makes these -// functions unsuitable for tight loops (in those cases a strref should be made -// to point to existing string data). -upb_strref *upb_strref_new(const char *str); -upb_strref *upb_strref_newl(const void *str, size_t len); -void upb_strref_free(upb_strref *ref); +// TODO: add if/when there is a demonstrated need. +// +// // Pins this byteregion's bytes in memory, allowing it to outlive its parent +// // byteregion. Normally a byteregion may only be used while its parent is +// // still valid, but a pinned byteregion may continue to be used until it is +// // reset or released. A byteregion must be fully fetched to be pinned +// // (this implies that the byteregion must be delimited). +// // +// // In some cases this operation may cause the input data to be copied. +// // +// // void upb_byteregion_pin(upb_byteregion *r); + +// Convenience functions for creating and destroying a byteregion with a simple +// string as its data. These are relatively inefficient compared with creating +// your own bytesrc (they call malloc() and copy the string data) so should not +// be used on any critical path. +// +// The string data in the returned region is guaranteed to be contiguous and +// NULL-terminated. +upb_byteregion *upb_byteregion_new(const void *str); +upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len); +// May *only* be called on a byteregion created with upb_byteregion_new[l]()! +void upb_byteregion_free(upb_byteregion *r); + +// Copies the contents of the byteregion into a newly-allocated, NULL-terminated +// string. Requires that the byteregion is fully fetched. +char *upb_byteregion_strdup(const upb_byteregion *r); /* upb_bytesink ***************************************************************/ @@ -279,6 +414,7 @@ typedef struct { bool should_close; upb_stdio_buf **bufs; uint32_t nbuf, szbuf; + upb_byteregion byteregion; } upb_stdio; void upb_stdio_init(upb_stdio *stdio); @@ -297,7 +433,7 @@ void upb_stdio_reset(upb_stdio *stdio, FILE *file); void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, upb_status *s); -upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio); +upb_byteregion *upb_stdio_allbytes(upb_stdio *stdio); upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); @@ -305,24 +441,26 @@ upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); // bytesrc/bytesink for a simple contiguous string. -struct _upb_stringsrc { +typedef struct { upb_bytesrc bytesrc; const char *str; - size_t len; -}; -typedef struct _upb_stringsrc upb_stringsrc; + uint32_t len; + upb_byteregion byteregion; +} upb_stringsrc; // Create/free a stringsrc. void upb_stringsrc_init(upb_stringsrc *s); void upb_stringsrc_uninit(upb_stringsrc *s); // Resets the stringsrc to a state where it will vend the given string. The -// stringsrc will take a reference on the string, so the caller need not ensure -// that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len); +// string data must be valid until the stringsrc is reset again or destroyed. +void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len); -// Returns the upb_bytesrc* for this stringsrc. -upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); +// Returns the top-level upb_byteregion* for this stringsrc. Invalidated when +// the stringsrc is reset. +INLINE upb_byteregion *upb_stringsrc_allbytes(upb_stringsrc *s) { + return &s->byteregion; +} /* upb_stringsink *************************************************************/ @@ -330,7 +468,7 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); struct _upb_stringsink { upb_bytesink bytesink; char *str; - size_t len, size; + uint32_t len, size; }; typedef struct _upb_stringsink upb_stringsink; @@ -340,12 +478,12 @@ void upb_stringsink_uninit(upb_stringsink *s); // Resets the sink's string to "str", which the sink takes ownership of. // "str" may be NULL, which will make the sink allocate a new string. -void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size); +void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t len); // Releases ownership of the returned string (which is "len" bytes long) and // resets the internal string to be empty again (as if reset were called with // NULL). -const char *upb_stringsink_release(upb_stringsink *s, size_t *len); +const char *upb_stringsink_release(upb_stringsink *s, uint32_t *len); // Returns the upb_bytesink* for this stringsrc. Invalidated by reset above. upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s); diff --git a/upb/def.c b/upb/def.c index 82fa6ac..13418c6 100644 --- a/upb/def.c +++ b/upb/def.c @@ -251,7 +251,8 @@ static void upb_fielddef_init_default(upb_fielddef *f) { case UPB_TYPE(FIXED32): upb_value_setuint32(&f->defaultval, 0); break; case UPB_TYPE(BOOL): upb_value_setbool(&f->defaultval, false); break; case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): upb_value_setstrref(&f->defaultval, upb_strref_new("")); break; + case UPB_TYPE(BYTES): + upb_value_setbyteregion(&f->defaultval, upb_byteregion_new("")); break; case UPB_TYPE(GROUP): case UPB_TYPE(MESSAGE): upb_value_setptr(&f->defaultval, NULL); break; } @@ -260,7 +261,7 @@ static void upb_fielddef_init_default(upb_fielddef *f) { static void upb_fielddef_uninit_default(upb_fielddef *f) { if (upb_isstring(f) || f->default_is_symbolic) { - upb_strref_free((upb_strref*)upb_value_getstrref(f->defaultval)); + upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); } } @@ -324,24 +325,29 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) { f->def = def; if (f->type == UPB_TYPE(ENUM) && f->default_is_symbolic) { // Resolve the enum's default from a string to an integer. - upb_strref *str = (upb_strref*)upb_value_getstrref(f->defaultval); - assert(str); // Should point to either a real default or the empty string. + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + assert(bytes); // Points to either a real default or the empty string. upb_enumdef *e = upb_downcast_enumdef(f->def); int32_t val = 0; // Could do a sanity check that the default value does not have embedded // NULLs. - if (str->ptr[0] == '\0') { + if (upb_byteregion_len(bytes) == 0) { upb_value_setint32(&f->defaultval, e->defaultval); } else { - bool success = upb_enumdef_ntoi(e, str->ptr, &val); + uint32_t len; + // ptr is guaranteed to be NULL-terminated because the byteregion was + // created with upb_byteregion_newl(). + const char *ptr = upb_byteregion_getptr(bytes, 0, &len); + assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk. + bool success = upb_enumdef_ntoi(e, ptr, &val); if (!success) { upb_status_seterrf( - s, "Default enum value (%s) is not a member of the enum", str); + s, "Default enum value (%s) is not a member of the enum", ptr); return false; } upb_value_setint32(&f->defaultval, val); } - upb_strref_free(str); + upb_byteregion_free(bytes); } return true; } @@ -381,10 +387,10 @@ void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { assert(upb_isstring(f) || f->type == UPB_TYPE(ENUM)); - const upb_strref *ref = upb_value_getstrref(f->defaultval); - assert(ref); - upb_strref_free((upb_strref*)ref); - upb_value_setstrref(&f->defaultval, upb_strref_newl(str, len)); + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + assert(bytes); + upb_byteregion_free(bytes); + upb_value_setbyteregion(&f->defaultval, upb_byteregion_newl(str, len)); f->default_is_symbolic = true; } diff --git a/upb/descriptor.c b/upb/descriptor.c index 39ed6da..0c589f2 100644 --- a/upb/descriptor.c +++ b/upb/descriptor.c @@ -123,7 +123,8 @@ static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r, upb_value val) { (void)fval; upb_descreader *r = _r; - upb_descreader_setscopename(r, upb_strref_dup(upb_value_getstrref(val))); + upb_descreader_setscopename( + r, upb_byteregion_strdup(upb_value_getbyteregion(val))); return UPB_CONTINUE; } @@ -180,7 +181,7 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r, (void)fval; upb_descreader *r = _r; free(r->name); - r->name = upb_strref_dup(upb_value_getstrref(val)); + r->name = upb_byteregion_strdup(upb_value_getbyteregion(val)); r->saw_name = true; return UPB_CONTINUE; } @@ -259,7 +260,7 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r, upb_descreader *r = _r; upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); free(e->base.fqname); - e->base.fqname = upb_strref_dup(upb_value_getstrref(val)); + e->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val)); return UPB_CONTINUE; } @@ -423,7 +424,7 @@ static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) { (void)fval; upb_descreader *r = _r; - char *name = upb_strref_dup(upb_value_getstrref(val)); + char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); upb_fielddef_setname(r->f, name); free(name); return UPB_CONTINUE; @@ -433,7 +434,7 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval, upb_value val) { (void)fval; upb_descreader *r = _r; - char *name = upb_strref_dup(upb_value_getstrref(val)); + char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); upb_fielddef_settypename(r->f, name); free(name); return UPB_CONTINUE; @@ -446,7 +447,7 @@ static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval, // Have to convert from string to the correct type, but we might not know the // type yet. free(r->default_string); - r->default_string = upb_strref_dup(upb_value_getstrref(val)); + r->default_string = upb_byteregion_strdup(upb_value_getbyteregion(val)); return UPB_CONTINUE; } @@ -499,7 +500,7 @@ static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) { assert(val.type == UPB_TYPE(STRING)); upb_msgdef *m = upb_descreader_top(r); free(m->base.fqname); - m->base.fqname = upb_strref_dup(upb_value_getstrref(val)); + m->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val)); upb_descreader_setscopename(r, strdup(m->base.fqname)); return UPB_CONTINUE; } diff --git a/upb/handlers.h b/upb/handlers.h index 2945d8c..e17a726 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -324,13 +324,13 @@ typedef struct { uint16_t fieldindex; bool is_sequence; // frame represents seq or submsg? (f might be both). - bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX (strings aren't pushed) + bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX + // (strings aren't pushed). } upb_dispatcher_frame; -// Called when some of the input needs to be skipped. All frames from -// top to bottom, inclusive, should be skipped. -typedef void upb_skip_handler(void *, upb_dispatcher_frame *top, - upb_dispatcher_frame *bottom); +// Called when some of the input needs to be skipped. All frames from the +// current top to "bottom", inclusive, should be skipped. +typedef void upb_skip_handler(void *, upb_dispatcher_frame *bottom); typedef void upb_exit_handler(void *); typedef struct { diff --git a/upb/msg.c b/upb/msg.c index 87bb61b..78309cf 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -151,13 +151,14 @@ static void _upb_stdmsg_setstr(void *_dst, upb_value src) { *dstp = dst; } dst->len = 0; - const upb_strref *ref = upb_value_getstrref(src); - if (ref->len > dst->size) { - dst->size = ref->len; + const upb_byteregion *bytes = upb_value_getbyteregion(src); + uint32_t len = upb_byteregion_len(bytes); + if (len > dst->size) { + dst->size = len; dst->ptr = realloc(dst->ptr, dst->size); } - dst->len = ref->len; - upb_bytesrc_read(ref->bytesrc, ref->stream_offset, ref->len, dst->ptr); + dst->len = len; + upb_byteregion_copyall(bytes, dst->ptr); } upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) { diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 5844377..ae54e47 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -45,27 +45,29 @@ static void upb_decoder_abort(upb_decoder *d, const char *msg) { /* Buffering ******************************************************************/ -// We operate on one buffer at a time, which may be a subset of the bytesrc -// region we have ref'd. When data for the buffer is completely gone we pull -// the next one. When we've committed our progress we release our ref on any -// previous buffers' regions. - -static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; } -static void upb_decoder_advance(upb_decoder *d, size_t len) { - assert((size_t)(d->end - d->ptr) >= len); +// We operate on one buffer at a time, which may be a subset of the currently +// loaded byteregion data. When data for the buffer is completely gone we pull +// the next one. When we've committed our progress we discard any previous +// buffers' regions. + +static uint32_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; } +static void upb_decoder_advance(upb_decoder *d, uint32_t len) { + assert(upb_decoder_bufleft(d) >= len); d->ptr += len; } -size_t upb_decoder_offset(upb_decoder *d) { - size_t offset = d->bufstart_ofs; - if (d->ptr) offset += (d->ptr - d->buf); - return offset; +uint64_t upb_decoder_offset(upb_decoder *d) { + return d->bufstart_ofs + (d->ptr - d->buf); +} + +uint64_t upb_decoder_bufendofs(upb_decoder *d) { + return d->bufstart_ofs + (d->end - d->buf); } static void upb_decoder_setmsgend(upb_decoder *d) { upb_dispatcher_frame *f = d->dispatcher.top; - size_t delimlen = f->end_ofs - d->bufstart_ofs; - size_t buflen = d->end - d->buf; + uint32_t delimlen = f->end_ofs - d->bufstart_ofs; + uint32_t buflen = d->end - d->buf; d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ? d->buf + delimlen : NULL; // NULL if not in this buf. d->top_is_packed = f->is_packed; @@ -73,24 +75,25 @@ static void upb_decoder_setmsgend(upb_decoder *d) { static bool upb_trypullbuf(upb_decoder *d) { assert(upb_decoder_bufleft(d) == 0); - if (d->bufend_ofs == d->refend_ofs) { - size_t read = upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status); - if (read <= 0) { - d->ptr = NULL; - d->end = NULL; - if (read == 0) return false; // EOF - upb_decoder_exit(d); // Non-EOF error. - } - d->refend_ofs += read; + d->bufstart_ofs = upb_decoder_offset(d); + d->buf = NULL; + d->ptr = NULL; + d->end = NULL; + if (upb_byteregion_available(d->input, upb_decoder_offset(d)) == 0 && + !upb_byteregion_fetch(d->input, d->status)) { + if (upb_eof(d->status)) return false; + upb_decoder_exit(d); // Non-EOF error. } - d->bufstart_ofs = d->bufend_ofs; - size_t len; - d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len); + uint32_t len; + d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len); assert(len > 0); - d->bufend_ofs = d->bufstart_ofs + len; d->ptr = d->buf; d->end = d->buf + len; #ifdef UPB_USE_JIT_X64 + // If we start parsing a value, we can parse up to 20 bytes without + // having to bounds-check anything (2 10-byte varints). Since the + // JIT bounds-checks only *between* values (and for strings), the + // JIT bails if there are not 20 bytes available. d->jit_end = d->end - 20; #endif upb_decoder_setmsgend(d); @@ -101,16 +104,21 @@ static void upb_pullbuf(upb_decoder *d) { if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF"); } -void upb_decoder_commit(upb_decoder *d) { - d->completed_ptr = d->ptr; - if (d->refstart_ofs < d->bufstart_ofs) { - // Drop our ref on the previous buf's region. - upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs); - upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs); - d->refstart_ofs = d->bufstart_ofs; +void upb_decoder_skipto(upb_decoder *d, uint64_t ofs) { + if (ofs < upb_decoder_bufendofs(d)) { + upb_decoder_advance(d, ofs - upb_decoder_offset(d)); + } else { + d->buf = NULL; + d->ptr = NULL; + d->end = NULL; + d->bufstart_ofs = ofs; } } +void upb_decoder_checkpoint(upb_decoder *d) { + upb_byteregion_discard(d->input, upb_decoder_offset(d)); +} + /* Decoding of wire types *****************************************************/ @@ -151,11 +159,12 @@ done: return ret; } +// Returns true on success or false if we've hit a valid EOF. FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { - if (upb_decoder_bufleft(d) == 0 && upb_dispatcher_islegalend(&d->dispatcher)) { - // Check for our two successful end-of-message conditions - // (user-specified EOM and bytesrc EOF). - if (d->bufend_ofs == d->end_ofs || !upb_trypullbuf(d)) return false; + if (upb_decoder_bufleft(d) == 0 && + upb_dispatcher_islegalend(&d->dispatcher) && + !upb_trypullbuf(d)) { + return false; } *val = upb_decode_varint32(d); return true; @@ -212,26 +221,15 @@ FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { return u64; // TODO: proper byte swapping } -INLINE upb_strref *upb_decode_string(upb_decoder *d) { +INLINE upb_byteregion *upb_decode_string(upb_decoder *d) { uint32_t strlen = upb_decode_varint32(d); - d->strref.stream_offset = upb_decoder_offset(d); - d->strref.len = strlen; - if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d); - if (upb_decoder_bufleft(d) >= strlen) { - // Fast case. - d->strref.ptr = d->ptr; - upb_decoder_advance(d, strlen); - } else { - // Slow case. - while (1) { - size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen); - upb_decoder_advance(d, consume); - strlen -= consume; - if (strlen == 0) break; - upb_pullbuf(d); - } - } - return &d->strref; + uint64_t offset = upb_decoder_offset(d); + upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen); + // Could make it an option on the callback whether we fetchall() first or not. + upb_byteregion_fetchall(&d->str_byteregion, d->status); + if (!upb_ok(d->status)) upb_decoder_exit(d); + upb_decoder_skipto(d, offset + strlen); + return &d->str_byteregion; } INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) { @@ -272,7 +270,7 @@ T(DOUBLE, fixed64, double, upb_asdouble) T(FLOAT, fixed32, float, upb_asfloat) T(SINT32, varint, int32, upb_zzdec_32) T(SINT64, varint, int64, upb_zzdec_64) -T(STRING, string, strref, upb_strref*) +T(STRING, string, byteregion, upb_byteregion*) static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) { upb_push(d, f, UPB_NONDELIMITED); @@ -352,10 +350,10 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { case UPB_WIRE_TYPE_DELIMITED: upb_decoder_advance(d, upb_decode_varint32(d)); break; default: - upb_decoder_abort(d, "Invavlid wire type"); + upb_decoder_abort(d, "Invalid wire type"); } // TODO: deliver to unknown field callback. - upb_decoder_commit(d); + upb_decoder_checkpoint(d); upb_decoder_checkdelim(d); } } @@ -380,24 +378,18 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) { return; } f->decode(d, f); - upb_decoder_commit(d); + upb_decoder_checkpoint(d); } } -static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top, - upb_dispatcher_frame *bottom) { - (void)top; - (void)bottom; - (void)_d; -#if 0 +static void upb_decoder_skip(void *_d, upb_dispatcher_frame *f) { upb_decoder *d = _d; - // TODO - if (bottom->end_offset == UPB_NONDELIMITED) { - // TODO: support skipping groups. - abort(); + if (f->end_ofs != UPB_NONDELIMITED) { + upb_decoder_skipto(d, d->dispatcher.top->end_ofs); + } else { + // TODO: how to support skipping groups? Dispatcher could drop callbacks, + // or it could be special-cased inside the decoder. } - d->ptr = d->buf.ptr + bottom->end_offset; -#endif } void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { @@ -423,24 +415,19 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { } } -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs, - uint64_t end_ofs, void *closure) { +void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure) { upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure); - f->end_ofs = end_ofs; - d->end_ofs = end_ofs; - d->refstart_ofs = start_ofs; - d->refend_ofs = start_ofs; - d->bufstart_ofs = start_ofs; - d->bufend_ofs = start_ofs; - d->bytesrc = bytesrc; + f->end_ofs = UPB_NONDELIMITED; + d->input = input; + d->bufstart_ofs = upb_byteregion_startofs(input); d->buf = NULL; d->ptr = NULL; - d->end = NULL; // Force a buffer pull. + d->end = NULL; // Force a buffer pull. + d->delim_end = NULL; // But don't let end-of-message get triggered. + d->str_byteregion.bytesrc = input->bytesrc; #ifdef UPB_USE_JIT_X64 d->jit_end = NULL; #endif - d->delim_end = NULL; // But don't let end-of-message get triggered. - d->strref.bytesrc = bytesrc; } void upb_decoder_uninit(upb_decoder *d) { diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 2232c52..c35bec4 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -5,7 +5,7 @@ * Author: Josh Haberman * * upb_decoder implements a high performance, streaming decoder for protobuf - * data that works by getting its input data from a upb_bytesrc and calling + * data that works by getting its input data from a upb_byteregion and calling * into a upb_handlers. */ @@ -26,24 +26,14 @@ extern "C" { struct dasm_State; typedef struct _upb_decoder { - upb_bytesrc *bytesrc; // Source of our serialized data. - upb_dispatcher dispatcher; // Dispatcher to which we push parsed data. - upb_status *status; // Where we will store any errors that occur. - upb_strref strref; // For passing string data to callbacks. - - // Offsets for the bytesrc region we currently have ref'd. - uint64_t refstart_ofs, refend_ofs; + upb_byteregion *input; // Input data (serialized). + upb_dispatcher dispatcher; // Dispatcher to which we push parsed data. + upb_status *status; // Where we will store any errors that occur. + upb_byteregion str_byteregion; // For passing string data to callbacks. // Current input buffer and its stream offset. const char *buf, *ptr, *end; - uint64_t bufstart_ofs, bufend_ofs; - - // Stream offset for the end of the top-level message, if any. - uint64_t end_ofs; - - // Buf offset as of which we've delivered calbacks; needed for rollback if - // a callback returns UPB_BREAK. - const char *completed_ptr; + uint64_t bufstart_ofs; // End of the delimited region, relative to ptr, or NULL if not in this buf. const char *delim_end; @@ -65,10 +55,6 @@ typedef struct _upb_decoder { sigjmp_buf exitjmp; } upb_decoder; -// Used for frames that have no specific end offset: groups, repeated primitive -// fields inside groups, and the top-level message. -#define UPB_NONDELIMITED UINT64_MAX - // Initializes/uninitializes a decoder for calling into the given handlers // or to write into the given msgdef, given its accessors). Takes a ref // on the handlers. @@ -77,13 +63,13 @@ void upb_decoder_uninit(upb_decoder *d); // Resets the internal state of an already-allocated decoder. This puts it in a // state where it has not seen any data, and expects the next data to be from -// the beginning of a new protobuf. Parsers must be reset before they can be -// used. A decoder can be reset multiple times. -// -// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message. -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs, - uint64_t end_ofs, void *closure); +// the beginning of a new protobuf. Decoders must be reset before they can be +// used. A decoder can be reset multiple times. "input" must live until the +// decoder is reset again (or destroyed). +void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure); +// Decodes serialized data (calling handlers as the data is parsed) until error +// or EOF (see *status for details). void upb_decoder_decode(upb_decoder *d, upb_status *status); #ifdef __cplusplus diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc index 72c4aa1..75e5b6b 100644 --- a/upb/pb/decoder_x64.dasc +++ b/upb/pb/decoder_x64.dasc @@ -129,7 +129,7 @@ void upb_reg_jit_gdb(upb_decoder *d) { |.define PTR, rbx |.define CLOSURE, r12 |.type FRAME, upb_dispatcher_frame, r13 -|.type STRREF, upb_strref, r14 +|.type BYTEREGION,upb_byteregion, r14 |.type DECODER, upb_decoder, r15 |.type STDARRAY, upb_stdarray | @@ -365,23 +365,26 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m, // robust checks. | mov ecx, dword [PTR + tag_size] | decode_loaded_varint tag_size + | mov rdi, DECODER->effective_end + | sub rdi, rax + | cmp ARG3_64, rdi // if (len > d->effective_end - str) + | ja ->exit_jit // Can't deliver, whole string not in buf. + + // Update PTR to point past end of string. | mov rdi, rax | add rdi, ARG3_64 - | mov STRREF->len, ARG3_32 - | mov STRREF->ptr, rax - | sub rax, DECODER->buf - | add eax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs - | mov STRREF->stream_offset, eax - | mov ARG3_64, STRREF - | cmp rdi, DECODER->effective_end - | ja ->exit_jit // Can't deliver, whole string not in buf. | mov PTR, rdi - break; - case UPB_TYPE_ENDGROUP: // A pseudo-type. - | add PTR, tag_size - | jmp =>m->jit_endofmsg_pclabel - return; + // Populate BYTEREGION appropriately. + | sub rax, DECODER->buf + | add rax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs + | mov BYTEREGION->start, rax + | mov BYTEREGION->discard, rax + | add rax, ARG3_64 + | mov BYTEREGION->end, rax + | mov BYTEREGION->fetch, rax // Fast path ensures whole string is loaded + | mov ARG3_64, BYTEREGION + break; // Will dispatch callbacks and call submessage in a second. case UPB_TYPE(MESSAGE): @@ -471,7 +474,6 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) { | callp f->endsubmsg } | popframe upb_fhandlers_getmsg(f) - } else { | mov ARG1_64, CLOSURE // Test for callbacks we can specialize. @@ -522,8 +524,8 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) { } // PTR should point to the beginning of the tag. -static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag, - upb_mhandlers *m, +static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, + uint32_t next_tag, upb_mhandlers *m, upb_fhandlers *f, upb_fhandlers *next_f) { // PC-label for the dispatch table. // We check the wire type (which must be loaded in edx) because the @@ -546,7 +548,14 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta |1: // Label for repeating this field. - upb_decoder_jit_decodefield(d, m, f->type, upb_value_size(tag)); + int tag_size = upb_value_size(tag); + if (f->type == UPB_TYPE_ENDGROUP) { + | add PTR, tag_size + | jmp =>m->jit_endofmsg_pclabel + return; + } + + upb_decoder_jit_decodefield(d, m, f->type, tag_size); upb_decoder_jit_callcb(d, f); // Epilogue: load next tag, check for repeated field. @@ -673,7 +682,7 @@ static void upb_decoder_jit(upb_decoder *d) { | sub rsp, 8 | mov DECODER, ARG1_64 | mov FRAME, DECODER:ARG1_64->dispatcher.top - | lea STRREF, DECODER:ARG1_64->strref + | lea BYTEREGION, DECODER:ARG1_64->str_byteregion | mov CLOSURE, FRAME->closure | mov PTR, DECODER->ptr diff --git a/upb/pb/glue.c b/upb/pb/glue.c index 37b86d9..3176355 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -23,7 +23,7 @@ void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md, upb_accessors_reghandlers(h, md); upb_decoder_init(&d, h); upb_handlers_unref(h); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg); + upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg); upb_decoder_decode(&d, status); upb_stringsrc_uninit(&strsrc); @@ -84,16 +84,19 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, upb_handlers_unref(h); upb_descreader r; upb_descreader_init(&r); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &r); + upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &r); upb_decoder_decode(&d, status); + upb_stringsrc_uninit(&strsrc); + upb_decoder_uninit(&d); + if (!upb_ok(status)) { + upb_descreader_uninit(&r); + return NULL; + } upb_def **defs = upb_descreader_getdefs(&r, n); upb_def **defscopy = malloc(sizeof(upb_def*) * (*n)); memcpy(defscopy, defs, sizeof(upb_def*) * (*n)); - upb_descreader_uninit(&r); - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); // Set default accessors and layouts on all messages. for(int i = 0; i < *n; i++) { diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c index 4056b8f..3f68f90 100644 --- a/upb/pb/textprinter.c +++ b/upb/pb/textprinter.c @@ -35,15 +35,16 @@ err: return -1; } -static int upb_textprinter_putescaped(upb_textprinter *p, const upb_strref *strref, +static int upb_textprinter_putescaped(upb_textprinter *p, + const upb_byteregion *bytes, bool preserve_utf8) { // Based on CEscapeInternal() from Google's protobuf release. // TODO; we could read directly from a bytesrc's buffer instead. - // TODO; we could write strrefs to the sink when possible. + // TODO; we could write byteregions to the sink when possible. char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); - char *buf = malloc(strref->len), *src = buf; - char *end = src + strref->len; - upb_bytesrc_read(strref->bytesrc, strref->stream_offset, strref->len, buf); + char *buf = malloc(upb_byteregion_len(bytes)), *src = buf; + char *end = src + upb_byteregion_len(bytes); + upb_byteregion_copyall(bytes, buf); // I think hex is prettier and more useful, but proto2 uses octal; should // investigate whether it can parse hex also. @@ -142,7 +143,7 @@ static upb_flow_t upb_textprinter_putstr(void *_p, upb_value fval, uint64_t start_ofs = upb_bytesink_getoffset(p->sink); const upb_fielddef *f = upb_value_getfielddef(fval); CHECK(upb_bytesink_putc(p->sink, '"')); - CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val), + CHECK(upb_textprinter_putescaped(p, upb_value_getbyteregion(val), f->type == UPB_TYPE(STRING))); CHECK(upb_bytesink_putc(p->sink, '"')); return UPB_CONTINUE; diff --git a/upb/pb/varint.h b/upb/pb/varint.h index 1bbd193..19977e9 100644 --- a/upb/pb/varint.h +++ b/upb/pb/varint.h @@ -113,7 +113,7 @@ INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { /* Encoding *******************************************************************/ -INLINE size_t upb_value_size(uint64_t val) { +INLINE int upb_value_size(uint64_t val) { #ifdef __GNUC__ int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. #else diff --git a/upb/table.h b/upb/table.h index f410457..0786a1a 100644 --- a/upb/table.h +++ b/upb/table.h @@ -20,6 +20,7 @@ #define UPB_TABLE_H_ #include +#include #include "upb.h" #ifdef __cplusplus diff --git a/upb/upb.c b/upb/upb.c index a7c4ea0..5002e10 100644 --- a/upb/upb.c +++ b/upb/upb.c @@ -71,8 +71,9 @@ void upb_status_seterrliteral(upb_status *status, const char *msg) { status->space = NULL; } -void upb_status_copy(upb_status *to, upb_status *from) { +void upb_status_copy(upb_status *to, const upb_status *from) { to->status = from->status; + to->eof = from->eof; to->code = from->code; to->space = from->space; if (from->str == from->buf) { @@ -100,6 +101,7 @@ const char *upb_status_getstr(const upb_status *_status) { void upb_status_clear(upb_status *status) { status->status = UPB_OK; + status->eof = false; status->code = 0; status->space = NULL; status->str = NULL; @@ -124,7 +126,7 @@ void upb_status_fromerrno(upb_status *status) { upb_errorspace upb_posix_errorspace = {"POSIX", NULL}; // TODO -int upb_vrprintf(char **buf, size_t *size, size_t ofs, +int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs, const char *fmt, va_list args) { // Try once without reallocating. We have to va_copy because we might have // to call vsnprintf again. diff --git a/upb/upb.h b/upb/upb.h index 8c78d9a..e43418f 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -12,7 +12,6 @@ #include #include -#include // only for size_t. #include #include #include "descriptor_const.h" @@ -40,7 +39,7 @@ INLINE void nop_printf(const char *fmt, ...) { (void)fmt; } #endif // Rounds val up to the next multiple of align. -INLINE size_t upb_align_up(size_t val, size_t align) { +INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) { return val % align == 0 ? val : val + align - (val % align); } @@ -124,7 +123,7 @@ extern const upb_type_info upb_types[]; /* upb_value ******************************************************************/ -struct _upb_strref; +struct _upb_byteregion; struct _upb_fielddef; // Special constants for the upb_value.type field. These must not conflict @@ -144,7 +143,7 @@ typedef struct { int64_t int64; uint32_t uint32; bool _bool; - const struct _upb_strref *strref; + struct _upb_byteregion *byteregion; const struct _upb_fielddef *fielddef; void *_void; } val; @@ -194,11 +193,13 @@ UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR); +UPB_VALUE_ACCESSORS(byteregion, byteregion, struct _upb_byteregion*, + UPB_TYPE(STRING)); -// upb_fielddef and upb_strref should never be modified from a callback +// upb_fielddef should never be modified from a callback // (ie. when they're getting passed through a upb_value). -UPB_VALUE_ACCESSORS(strref, strref, const struct _upb_strref*, UPB_TYPE(STRING)); -UPB_VALUE_ACCESSORS(fielddef, fielddef, const struct _upb_fielddef*, UPB_VALUETYPE_FIELDDEF); +UPB_VALUE_ACCESSORS(fielddef, fielddef, const struct _upb_fielddef*, + UPB_VALUETYPE_FIELDDEF); extern upb_value UPB_NO_VALUE; @@ -215,43 +216,46 @@ typedef struct { const char *name; // Writes a NULL-terminated string to "buf" containing an error message for // the given error code, returning false if the message was too large to fit. - bool (*code_to_string)(int code, char *buf, size_t len); + bool (*code_to_string)(int code, char *buf, uint32_t len); } upb_errorspace; -// TODO: consider adding error space and code, to let ie. errno be stored -// as a proper code, or application-specific error codes. typedef struct { char status; + bool eof; int code; // Can be set to a more specific code (defined by error space). upb_errorspace *space; const char *str; // NULL when no message is present. NULL-terminated. char *buf; // Owned by the status. - size_t bufsize; + uint32_t bufsize; } upb_status; -#define UPB_STATUS_INIT {UPB_OK, 0, NULL, NULL, NULL, 0} +#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0} void upb_status_init(upb_status *status); void upb_status_uninit(upb_status *status); -INLINE bool upb_ok(upb_status *status) { return status->code == UPB_OK; } +INLINE bool upb_ok(const upb_status *status) { return status->code == UPB_OK; } +INLINE bool upb_eof(const upb_status *status) { return status->eof; } void upb_status_clear(upb_status *status); void upb_status_seterrliteral(upb_status *status, const char *msg); void upb_status_seterrf(upb_status *s, const char *msg, ...); void upb_status_setcode(upb_status *s, upb_errorspace *space, int code); +INLINE void upb_status_seteof(upb_status *s) { s->eof = true; } // The returned string is invalidated by any other call into the status. const char *upb_status_getstr(const upb_status *s); -void upb_status_copy(upb_status *to, upb_status *from); +void upb_status_copy(upb_status *to, const upb_status *from); extern upb_errorspace upb_posix_errorspace; void upb_status_fromerrno(upb_status *status); -// Like vaprintf, but uses *buf (which can be NULL) as a starting point and -// reallocates it only if the new value will not fit. "size" is updated to -// reflect the allocated size of the buffer. Returns false on memory alloc -// failure. -int upb_vrprintf(char **buf, size_t *size, size_t ofs, +// Like vasprintf (which allocates a string large enough for the result), but +// uses *buf (which can be NULL) as a starting point and reallocates it only if +// the new value will not fit. "size" is updated to reflect the allocated size +// of the buffer. Starts writing at the given offset into the string; bytes +// preceding this offset are unaffected. Returns the new length of the string, +// or -1 on memory allocation failure. +int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs, const char *fmt, va_list args); #ifdef __cplusplus -- cgit v1.2.3