summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <jhaberman@gmail.com>2015-05-08 17:30:22 -0700
committerJoshua Haberman <jhaberman@gmail.com>2015-05-08 17:30:22 -0700
commitccc0fd0dbbcebb43f4d85d7df1439e1fc7993bf8 (patch)
treeda3cbc97eed1eb70af5e0f3a687ff37ad239d119
parentbd7ea8c6f1854aa37b7792c6f23334ffc0fd94ff (diff)
parent838009ba2b8ea1e99061c66e0fbd9cb53a96ec20 (diff)
Merge pull request #18 from haberman/google-internal
Sync from Google-internal development.
-rw-r--r--Makefile1
-rw-r--r--tests/bindings/googlepb/test_vs_proto2.cc9
-rw-r--r--tests/json/test_json.cc12
-rw-r--r--tests/pb/test_decoder.cc51
-rw-r--r--tests/test_cpp.cc1
-rw-r--r--upb/bindings/googlepb/proto2.cc9
-rw-r--r--upb/bindings/lua/upb.c2
-rw-r--r--upb/bindings/lua/upb/pb.c21
-rw-r--r--upb/descriptor/reader.c117
-rw-r--r--upb/descriptor/reader.h78
-rw-r--r--upb/env.c261
-rw-r--r--upb/env.h256
-rw-r--r--upb/handlers-inl.h36
-rw-r--r--upb/handlers.c9
-rw-r--r--upb/handlers.h2
-rw-r--r--upb/json/parser.c196
-rw-r--r--upb/json/parser.h100
-rw-r--r--upb/json/parser.rl124
-rw-r--r--upb/json/printer.c51
-rw-r--r--upb/json/printer.h70
-rw-r--r--upb/pb/compile_decoder.c1
-rw-r--r--upb/pb/compile_decoder_x64.dasc10
-rw-r--r--upb/pb/compile_decoder_x64.h24
-rw-r--r--upb/pb/decoder.c178
-rw-r--r--upb/pb/decoder.h173
-rw-r--r--upb/pb/decoder.int.h92
-rw-r--r--upb/pb/encoder.c144
-rw-r--r--upb/pb/encoder.h116
-rw-r--r--upb/pb/glue.c18
-rw-r--r--upb/pb/textprinter.c51
-rw-r--r--upb/pb/textprinter.h38
-rw-r--r--upb/sink.h21
-rw-r--r--upb/upb.h9
33 files changed, 1490 insertions, 791 deletions
diff --git a/Makefile b/Makefile
index 5887fbe..e75fe54 100644
--- a/Makefile
+++ b/Makefile
@@ -131,6 +131,7 @@ make_objs_cc = $$(patsubst upb/$$(pc).cc,obj/upb/$$(pc).$(1),$$($$(call to_srcs,
upb_SRCS = \
upb/def.c \
+ upb/env.c \
upb/handlers.c \
upb/refcounted.c \
upb/shim/shim.c \
diff --git a/tests/bindings/googlepb/test_vs_proto2.cc b/tests/bindings/googlepb/test_vs_proto2.cc
index 49dfe49..8e68791 100644
--- a/tests/bindings/googlepb/test_vs_proto2.cc
+++ b/tests/bindings/googlepb/test_vs_proto2.cc
@@ -77,13 +77,14 @@ void parse_and_compare(google::protobuf::Message *msg1,
cache.GetDecoderMethod(upb::pb::DecoderMethodOptions(protomsg_handlers)));
upb::Status status;
- upb::pb::Decoder decoder(decoder_method.get(), &status);
+ upb::Environment env;
+ env.ReportErrorsTo(&status);
upb::Sink protomsg_sink(protomsg_handlers, msg2);
-
- decoder.ResetOutput(&protomsg_sink);
+ upb::pb::Decoder* decoder =
+ upb::pb::Decoder::Create(&env, decoder_method.get(), &protomsg_sink);
msg2->Clear();
- bool ok = upb::BufferSource::PutBuffer(str, len, decoder.input());
+ bool ok = upb::BufferSource::PutBuffer(str, len, decoder->input());
if (!ok) {
fprintf(stderr, "error parsing: %s\n", status.error_message());
print_diff(*msg1, *msg2);
diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc
index 828e603..cb60bad 100644
--- a/tests/json/test_json.cc
+++ b/tests/json/test_json.cc
@@ -292,14 +292,14 @@ void test_json_roundtrip_message(const char* json_src,
const upb::Handlers* serialize_handlers,
int seam) {
upb::Status st;
- upb::json::Parser parser(&st);
- upb::json::Printer printer(serialize_handlers);
+ upb::Environment env;
+ env.ReportErrorsTo(&st);
StringSink data_sink;
+ upb::json::Printer* printer =
+ upb::json::Printer::Create(&env, serialize_handlers, data_sink.Sink());
+ upb::json::Parser* parser = upb::json::Parser::Create(&env, printer->input());
- parser.ResetOutput(printer.input());
- printer.ResetOutput(data_sink.Sink());
-
- upb::BytesSink* input = parser.input();
+ upb::BytesSink* input = parser->input();
void *sub;
size_t len = strlen(json_src);
size_t ofs = 0;
diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc
index 3aea777..98926a6 100644
--- a/tests/pb/test_decoder.cc
+++ b/tests/pb/test_decoder.cc
@@ -64,6 +64,8 @@
(float)completed * 100 / total); \
}
+#define MAX_NESTING 64
+
uint32_t filter_hash = 0;
double completed;
double total;
@@ -210,7 +212,7 @@ string submsg(uint32_t fn, const string& buf) {
// using the closure depth to test that the stack of closures is properly
// handled.
-int closures[UPB_DECODER_MAX_NESTING];
+int closures[MAX_NESTING];
string output;
void indentbuf(string *buf, int depth) {
@@ -508,6 +510,15 @@ upb::reffed_ptr<const upb::Handlers> NewHandlers(TestMode mode) {
const upb::Handlers *global_handlers;
const upb::pb::DecoderMethod *global_method;
+upb::pb::Decoder* CreateDecoder(upb::Environment* env,
+ const upb::pb::DecoderMethod* method,
+ upb::Sink* sink) {
+ upb::pb::Decoder *ret = upb::pb::Decoder::Create(env, method, sink);
+ ASSERT(ret != NULL);
+ ret->set_max_nesting(MAX_NESTING);
+ return ret;
+}
+
uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
size_t seam2) {
uint32_t hash = MurmurHash2(proto.c_str(), proto.size(), 0);
@@ -545,19 +556,21 @@ static bool parse(upb::pb::Decoder* decoder, void* subc, const char* buf,
#define LINE(x) x "\n"
void run_decoder(const string& proto, const string* expected_output) {
upb::Status status;
- upb::pb::Decoder decoder(global_method, &status);
upb::Sink sink(global_handlers, &closures[0]);
- decoder.ResetOutput(&sink);
for (size_t i = 0; i < proto.size(); i++) {
for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) {
+ // TODO(haberman): hoist this again once the environment supports reset.
+ upb::Environment env;
+ env.ReportErrorsTo(&status);
+ upb::pb::Decoder *decoder = CreateDecoder(&env, global_method, &sink);
+
testhash = Hash(proto, expected_output, i, j);
if (filter_hash && testhash != filter_hash) continue;
if (test_mode != COUNT_ONLY) {
- decoder.Reset();
output.clear();
status.Clear();
size_t ofs = 0;
- upb::BytesSink* input = decoder.input();
+ upb::BytesSink* input = decoder->input();
void *sub;
if (filter_hash) {
@@ -576,9 +589,9 @@ void run_decoder(const string& proto, const string* expected_output) {
}
bool ok = input->Start(proto.size(), &sub) &&
- parse(&decoder, sub, proto.c_str(), 0, i, &ofs, &status) &&
- parse(&decoder, sub, proto.c_str(), i, j, &ofs, &status) &&
- parse(&decoder, sub, proto.c_str(), j, proto.size(), &ofs,
+ parse(decoder, sub, proto.c_str(), 0, i, &ofs, &status) &&
+ parse(decoder, sub, proto.c_str(), i, j, &ofs, &status) &&
+ parse(decoder, sub, proto.c_str(), j, proto.size(), &ofs,
&status) &&
ofs == proto.size();
@@ -852,7 +865,7 @@ void test_invalid() {
// Test exceeding the resource limit of stack depth.
string buf;
- for (int i = 0; i <= UPB_DECODER_MAX_NESTING; i++) {
+ for (int i = 0; i <= MAX_NESTING; i++) {
buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
}
assert_does_not_parse(buf);
@@ -871,11 +884,12 @@ void test_valid() {
if (!filter_hash || filter_hash == testhash) {
testhash = emptyhash;
upb::Status status;
- upb::pb::Decoder decoder(global_method, &status);
+ upb::Environment env;
+ env.ReportErrorsTo(&status);
upb::Sink sink(global_handlers, &closures[0]);
- decoder.ResetOutput(&sink);
+ upb::pb::Decoder* decoder = CreateDecoder(&env, global_method, &sink);
output.clear();
- bool ok = upb::BufferSource::PutBuffer("", 0, decoder.input());
+ bool ok = upb::BufferSource::PutBuffer("", 0, decoder->input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
@@ -1076,7 +1090,7 @@ void test_valid() {
// Staying within the stack limit should work properly.
string buf;
string textbuf;
- int total = UPB_DECODER_MAX_NESTING - 1;
+ int total = MAX_NESTING - 1;
for (int i = 0; i < total; i++) {
buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
indentbuf(&textbuf, i);
@@ -1135,11 +1149,12 @@ upb::reffed_ptr<const upb::pb::DecoderMethod> method =
{ NULL, 0 },
};
for (int i = 0; testdata[i].data; i++) {
+ upb::Environment env;
upb::Status status;
- upb::pb::Decoder decoder(method.get(), &status);
- upb::Sink sink(global_handlers, &closures[0]);
- decoder.ResetOutput(&sink);
- upb::BytesSink* input = decoder.input();
+ env.ReportErrorsTo(&status);
+ upb::Sink sink(method->dest_handlers(), &closures[0]);
+ upb::pb::Decoder* decoder = CreateDecoder(&env, method.get(), &sink);
+ upb::BytesSink* input = decoder->input();
void* subc;
ASSERT(input->Start(0, &subc));
size_t ofs = 0;
@@ -1182,7 +1197,7 @@ extern "C" {
int run_tests(int argc, char *argv[]) {
if (argc > 1)
filter_hash = strtol(argv[1], NULL, 16);
- for (int i = 0; i < UPB_DECODER_MAX_NESTING; i++) {
+ for (int i = 0; i < MAX_NESTING; i++) {
closures[i] = i;
}
diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc
index 71f354c..c2de6c3 100644
--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@@ -15,6 +15,7 @@
#include <sstream>
#include "upb/def.h"
+#include "upb/env.h"
#include "upb/descriptor/reader.h"
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
diff --git a/upb/bindings/googlepb/proto2.cc b/upb/bindings/googlepb/proto2.cc
index 3911172..87c13b6 100644
--- a/upb/bindings/googlepb/proto2.cc
+++ b/upb/bindings/googlepb/proto2.cc
@@ -946,14 +946,14 @@ case goog::FieldDescriptor::cpptype: \
public:
typedef goog::Message Type;
#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
- static ::proto2::Arena* GetArena(Type* t) {
+ static goog::Arena* GetArena(Type* t) {
return t->GetArena();
}
static void* GetMaybeArenaPointer(Type* t) {
return t->GetMaybeArenaPointer();
}
static inline Type* NewFromPrototype(
- const Type* prototype, ::proto2::Arena* arena = NULL) {
+ const Type* prototype, goog::Arena* arena = NULL) {
return prototype->New(arena);
}
static void Delete(Type* t, goog::Arena* arena = NULL) {
@@ -1277,6 +1277,11 @@ case goog::FieldDescriptor::cpptype: \
return lazy_field_.SetAllocated(static_cast<proto2::Message*>(message));
}
+ virtual void UnsafeArenaSetAllocatedMessage(proto2::MessageLite* message) {
+ return lazy_field_.UnsafeArenaSetAllocated(
+ static_cast<proto2::Message*>(message));
+ }
+
virtual proto2::MessageLite* ReleaseMessage(
const proto2::MessageLite& prototype) {
return lazy_field_.ReleaseByPrototype(
diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c
index 5ad0235..b35af24 100644
--- a/upb/bindings/lua/upb.c
+++ b/upb/bindings/lua/upb.c
@@ -1358,7 +1358,7 @@ static size_t align_up(size_t val, size_t align) {
// If we always read/write as a consistent type to each value, this shouldn't
// violate aliasing.
-#define DEREF(msg, ofs, type) *(type*)(&msg->data[ofs])
+#define DEREF(msg, ofs, type) *(type*)((char*)msg + sizeof(lupb_msg) + ofs)
lupb_msg *lupb_msg_check(lua_State *L, int narg) {
lupb_msg *msg = luaL_checkudata(L, narg, LUPB_MSG);
diff --git a/upb/bindings/lua/upb/pb.c b/upb/bindings/lua/upb/pb.c
index c9f1f47..920648f 100644
--- a/upb/bindings/lua/upb/pb.c
+++ b/upb/bindings/lua/upb/pb.c
@@ -61,19 +61,20 @@ static int lupb_pbdecodermethod_parse(lua_State *L) {
// Handlers need this.
lua_getuservalue(L, -1);
- upb_pbdecoder decoder;
upb_status status = UPB_STATUS_INIT;
- upb_pbdecoder_init(&decoder, method, &status);
+ upb_env env;
+ upb_env_init(&env);
+ upb_env_reporterrorsto(&env, &status);
upb_sink sink;
upb_sink_reset(&sink, handlers, msg);
- upb_pbdecoder_resetoutput(&decoder, &sink);
- upb_bufsrc_putbuf(pb, len, upb_pbdecoder_input(&decoder));
- // TODO: Our need to call uninit isn't longjmp-safe; what if the decode
- // triggers a Lua error? uninit is only needed if the decoder
- // dynamically-allocated a growing stack -- ditch this feature and live with
- // the compile-time limit? Or have a custom allocation function that
- // allocates Lua GC-rooted memory?
- upb_pbdecoder_uninit(&decoder);
+ upb_pbdecoder *decoder = upb_pbdecoder_create(&env, method, &sink);
+ upb_bufsrc_putbuf(pb, len, upb_pbdecoder_input(decoder));
+
+ // TODO: This won't get called in the error case, which longjmp's across us.
+ // This will cause the memory to leak. To remedy this, we should make the
+ // upb_env wrapped in a userdata that guarantees this will get called.
+ upb_env_uninit(&env);
+
lupb_checkstatus(L, &status);
lua_pop(L, 1); // Uservalue.
diff --git a/upb/descriptor/reader.c b/upb/descriptor/reader.c
index 1baad81..0b289c0 100644
--- a/upb/descriptor/reader.c
+++ b/upb/descriptor/reader.c
@@ -20,6 +20,54 @@
#include "upb/sink.h"
#include "upb/descriptor/descriptor.upb.h"
+// upb_deflist is an internal-only dynamic array for storing a growing list of
+// upb_defs.
+typedef struct {
+ upb_def **defs;
+ size_t len;
+ size_t size;
+ bool owned;
+} upb_deflist;
+
+// We keep a stack of all the messages scopes we are currently in, as well as
+// the top-level file scope. This is necessary to correctly qualify the
+// definitions that are contained inside. "name" tracks the name of the
+// message or package (a bare name -- not qualified by any enclosing scopes).
+typedef struct {
+ char *name;
+ // Index of the first def that is under this scope. For msgdefs, the
+ // msgdef itself is at start-1.
+ int start;
+} upb_descreader_frame;
+
+// The maximum number of nested declarations that are allowed, ie.
+// message Foo {
+// message Bar {
+// message Baz {
+// }
+// }
+// }
+//
+// This is a resource limit that affects how big our runtime stack can grow.
+// TODO: make this a runtime-settable property of the Reader instance.
+#define UPB_MAX_MESSAGE_NESTING 64
+
+struct upb_descreader {
+ upb_sink sink;
+ upb_deflist defs;
+ upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
+ int stack_len;
+
+ uint32_t number;
+ char *name;
+ bool saw_number;
+ bool saw_name;
+
+ char *default_string;
+
+ upb_fielddef *f;
+};
+
static char *upb_strndup(const char *buf, size_t n) {
char *ret = malloc(n + 1);
if (!ret) return NULL;
@@ -99,36 +147,6 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
/* upb_descreader ************************************************************/
-void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers,
- upb_status *status) {
- UPB_UNUSED(status);
- upb_deflist_init(&r->defs);
- upb_sink_reset(upb_descreader_input(r), handlers, r);
- r->stack_len = 0;
- r->name = NULL;
- r->default_string = NULL;
-}
-
-void upb_descreader_uninit(upb_descreader *r) {
- free(r->name);
- upb_deflist_uninit(&r->defs);
- free(r->default_string);
- while (r->stack_len > 0) {
- upb_descreader_frame *f = &r->stack[--r->stack_len];
- free(f->name);
- }
-}
-
-upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
- *n = r->defs.len;
- upb_deflist_donaterefs(&r->defs, owner);
- return r->defs.defs;
-}
-
-upb_sink *upb_descreader_input(upb_descreader *r) {
- return &r->sink;
-}
-
static upb_msgdef *upb_descreader_top(upb_descreader *r) {
assert(r->stack_len > 1);
int index = r->stack[r->stack_len-1].start - 1;
@@ -568,6 +586,45 @@ static void reghandlers(const void *closure, upb_handlers *h) {
#undef D
+void descreader_cleanup(void *_r) {
+ upb_descreader *r = _r;
+ free(r->name);
+ upb_deflist_uninit(&r->defs);
+ free(r->default_string);
+ while (r->stack_len > 0) {
+ upb_descreader_frame *f = &r->stack[--r->stack_len];
+ free(f->name);
+ }
+}
+
+
+/* Public API ****************************************************************/
+
+upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
+ upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
+ if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
+ return NULL;
+ }
+
+ upb_deflist_init(&r->defs);
+ upb_sink_reset(upb_descreader_input(r), h, r);
+ r->stack_len = 0;
+ r->name = NULL;
+ r->default_string = NULL;
+
+ return r;
+}
+
+upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
+ *n = r->defs.len;
+ upb_deflist_donaterefs(&r->defs, owner);
+ return r->defs.defs;
+}
+
+upb_sink *upb_descreader_input(upb_descreader *r) {
+ return &r->sink;
+}
+
const upb_handlers *upb_descreader_newhandlers(const void *owner) {
const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
const upb_handlers *h = upb_handlers_newfrozen(
diff --git a/upb/descriptor/reader.h b/upb/descriptor/reader.h
index 700fd65..bcd4b06 100644
--- a/upb/descriptor/reader.h
+++ b/upb/descriptor/reader.h
@@ -11,6 +11,7 @@
#ifndef UPB_DESCRIPTOR_H
#define UPB_DESCRIPTOR_H
+#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@@ -23,45 +24,11 @@ class Reader;
UPB_DECLARE_TYPE(upb::descriptor::Reader, upb_descreader);
-// Internal-only structs used by Reader.
-
-// upb_deflist is an internal-only dynamic array for storing a growing list of
-// upb_defs.
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- upb_def **defs;
- size_t len;
- size_t size;
- bool owned;
-} upb_deflist;
-
-// We keep a stack of all the messages scopes we are currently in, as well as
-// the top-level file scope. This is necessary to correctly qualify the
-// definitions that are contained inside. "name" tracks the name of the
-// message or package (a bare name -- not qualified by any enclosing scopes).
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- char *name;
- // Index of the first def that is under this scope. For msgdefs, the
- // msgdef itself is at start-1.
- int start;
-} upb_descreader_frame;
-
-// The maximum number of nested declarations that are allowed, ie.
-// message Foo {
-// message Bar {
-// message Baz {
-// }
-// }
-// }
-//
-// This is a resource limit that affects how big our runtime stack can grow.
-// TODO: make this a runtime-settable property of the Reader instance.
-#define UPB_MAX_MESSAGE_NESTING 64
+#ifdef __cplusplus
// Class that receives descriptor data according to the descriptor.proto schema
// and use it to build upb::Defs corresponding to that schema.
-UPB_DEFINE_CLASS0(upb::descriptor::Reader,
+class upb::descriptor::Reader {
public:
// These handlers must have come from NewHandlers() and must outlive the
// Reader.
@@ -71,11 +38,7 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader,
// to build/memory-manage the handlers at runtime at all). Unfortunately this
// is a bit tricky to implement for Handlers, but necessary to simplify this
// interface.
- Reader(const Handlers* handlers, Status* status);
- ~Reader();
-
- // Resets the reader's state and discards any defs it may have built.
- void Reset();
+ static Reader* Create(Environment* env, const Handlers* handlers);
// The reader's input; this is where descriptor.proto data should be sent.
Sink* input();
@@ -91,45 +54,30 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader,
// Builds and returns handlers for the reader, owned by "owner."
static Handlers* NewHandlers(const void* owner);
-,
-UPB_DEFINE_STRUCT0(upb_descreader,
- upb_sink sink;
- upb_deflist defs;
- upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
- int stack_len;
- uint32_t number;
- char *name;
- bool saw_number;
- bool saw_name;
+ private:
+ UPB_DISALLOW_POD_OPS(Reader, upb::descriptor::Reader);
+};
- char *default_string;
-
- upb_fielddef *f;
-));
+#endif
-UPB_BEGIN_EXTERN_C // {
+UPB_BEGIN_EXTERN_C
// C API.
-void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers,
- upb_status *status);
-void upb_descreader_uninit(upb_descreader *r);
-void upb_descreader_reset(upb_descreader *r);
+upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h);
upb_sink *upb_descreader_input(upb_descreader *r);
upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n);
const upb_handlers *upb_descreader_newhandlers(const void *owner);
-UPB_END_EXTERN_C // }
+UPB_END_EXTERN_C
#ifdef __cplusplus
// C++ implementation details. /////////////////////////////////////////////////
namespace upb {
namespace descriptor {
-inline Reader::Reader(const Handlers *h, Status *s) {
- upb_descreader_init(this, h, s);
+inline Reader* Reader::Create(Environment* e, const Handlers *h) {
+ return upb_descreader_create(e, h);
}
-inline Reader::~Reader() { upb_descreader_uninit(this); }
-inline void Reader::Reset() { upb_descreader_reset(this); }
inline Sink* Reader::input() { return upb_descreader_input(this); }
inline upb::Def** Reader::GetDefs(void* owner, int* n) {
return upb_descreader_getdefs(this, owner, n);
diff --git a/upb/env.c b/upb/env.c
new file mode 100644
index 0000000..7fa3334
--- /dev/null
+++ b/upb/env.c
@@ -0,0 +1,261 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2014 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/env.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef struct cleanup_ent {
+ upb_cleanup_func *cleanup;
+ void *ud;
+ struct cleanup_ent *next;
+} cleanup_ent;
+
+static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
+
+/* Default allocator **********************************************************/
+
+// Just use realloc, keeping all allocated blocks in a linked list to destroy at
+// the end.
+
+typedef struct mem_block {
+ // List is doubly-linked, because in cases where realloc() moves an existing
+ // block, we need to be able to remove the old pointer from the list
+ // efficiently.
+ struct mem_block *prev, *next;
+#ifndef NDEBUG
+ size_t size; // Doesn't include mem_block structure.
+#endif
+ char data[];
+} mem_block;
+
+typedef struct {
+ mem_block *head;
+} default_alloc_ud;
+
+static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
+ UPB_UNUSED(oldsize);
+ default_alloc_ud *ud = _ud;
+
+ mem_block *from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
+
+#ifndef NDEBUG
+ if (from) {
+ assert(oldsize <= from->size);
+ }
+#endif
+
+ mem_block *block = realloc(from, size + sizeof(mem_block));
+ if (!block) return NULL;
+
+#ifndef NDEBUG
+ block->size = size;
+#endif
+
+ if (from) {
+ if (block != from) {
+ // The block was moved, so pointers in next and prev blocks must be
+ // updated to its new location.
+ if (block->next) block->next->prev = block;
+ if (block->prev) block->prev->next = block;
+ }
+ } else {
+ // Insert at head of linked list.
+ block->prev = NULL;
+ block->next = ud->head;
+ if (block->next) block->next->prev = block;
+ ud->head = block;
+ }
+
+ return &block->data;
+}
+
+static void default_alloc_cleanup(void *_ud) {
+ default_alloc_ud *ud = _ud;
+ mem_block *block = ud->head;
+
+ while (block) {
+ void *to_free = block;
+ block = block->next;
+ free(to_free);
+ }
+}
+
+
+/* Standard error functions ***************************************************/
+
+static bool default_err(void *ud, const upb_status *status) {
+ UPB_UNUSED(ud);
+ fprintf(stderr, "upb error: %s\n", upb_status_errmsg(status));
+ return false;
+}
+
+static bool write_err_to(void *ud, const upb_status *status) {
+ upb_status *copy_to = ud;
+ upb_status_copy(copy_to, status);
+ return false;
+}
+
+
+/* upb_env ********************************************************************/
+
+void upb_env_init(upb_env *e) {
+ e->ok_ = true;
+ e->bytes_allocated = 0;
+ e->cleanup_head = NULL;
+
+ default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
+ ud->head = NULL;
+
+ // Set default functions.
+ upb_env_setallocfunc(e, default_alloc, ud);
+ upb_env_seterrorfunc(e, default_err, NULL);
+}
+
+void upb_env_uninit(upb_env *e) {
+ cleanup_ent *ent = e->cleanup_head;
+
+ while (ent) {
+ ent->cleanup(ent->ud);
+ ent = ent->next;
+ }
+
+ // Must do this after running cleanup functions, because this will delete
+ // the memory we store our cleanup entries in!
+ if (e->alloc == default_alloc) {
+ default_alloc_cleanup(e->alloc_ud);
+ }
+}
+
+UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
+ void *ud) {
+ e->alloc = alloc;
+ e->alloc_ud = ud;
+}
+
+UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
+ void *ud) {
+ e->err = func;
+ e->err_ud = ud;
+}
+
+void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
+ e->err = write_err_to;
+ e->err_ud = status;
+}
+
+bool upb_env_ok(const upb_env *e) {
+ return e->ok_;
+}
+
+bool upb_env_reporterror(upb_env *e, const upb_status *status) {
+ e->ok_ = false;
+ return e->err(e->err_ud, status);
+}
+
+bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
+ cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
+ if (!ent) return false;
+
+ ent->cleanup = func;
+ ent->ud = ud;
+ ent->next = e->cleanup_head;
+ e->cleanup_head = ent;
+
+ return true;
+}
+
+void *upb_env_malloc(upb_env *e, size_t size) {
+ e->bytes_allocated += size;
+ if (e->alloc == seeded_alloc) {
+ // This is equivalent to the next branch, but allows inlining for a
+ // measurable perf benefit.
+ return seeded_alloc(e->alloc_ud, NULL, 0, size);
+ } else {
+ return e->alloc(e->alloc_ud, NULL, 0, size);
+ }
+}
+
+void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
+ assert(oldsize <= size);
+ char *ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
+
+#ifndef NDEBUG
+ // Overwrite non-preserved memory to ensure callers are passing the oldsize
+ // that they truly require.
+ memset(ret + oldsize, 0xff, size - oldsize);
+#endif
+
+ return ret;
+}
+
+size_t upb_env_bytesallocated(const upb_env *e) {
+ return e->bytes_allocated;
+}
+
+
+/* upb_seededalloc ************************************************************/
+
+// Be conservative and choose 16 in case anyone is using SSE.
+static const size_t maxalign = 16;
+
+static size_t align_up(size_t size) {
+ return ((size + maxalign - 1) / maxalign) * maxalign;
+}
+
+UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
+ size_t size) {
+ upb_seededalloc *a = ud;
+ size = align_up(size);
+
+ assert(a->mem_limit >= a->mem_ptr);
+
+ if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
+ // Fast path: we can satisfy from the initial allocation.
+ void *ret = a->mem_ptr;
+ a->mem_ptr += size;
+ return ret;
+ } else {
+ // Slow path: fallback to other allocator.
+ a->need_cleanup = true;
+ return a->alloc(a->alloc_ud, ptr, oldsize, size);
+ }
+}
+
+void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
+ a->mem_base = mem;
+ a->mem_ptr = mem;
+ a->mem_limit = (char*)mem + len;
+ a->need_cleanup = false;
+ a->returned_allocfunc = false;
+
+ default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
+ ud->head = NULL;
+
+ upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
+}
+
+void upb_seededalloc_uninit(upb_seededalloc *a) {
+ if (a->alloc == default_alloc && a->need_cleanup) {
+ default_alloc_cleanup(a->alloc_ud);
+ }
+}
+
+UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
+ upb_alloc_func *alloc,
+ void *ud) {
+ assert(!a->returned_allocfunc);
+ a->alloc = alloc;
+ a->alloc_ud = ud;
+}
+
+upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
+ a->returned_allocfunc = true;
+ return seeded_alloc;
+}
diff --git a/upb/env.h b/upb/env.h
new file mode 100644
index 0000000..78dda20
--- /dev/null
+++ b/upb/env.h
@@ -0,0 +1,256 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2014 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A upb::Environment provides a means for injecting malloc and an
+ * error-reporting callback into encoders/decoders. This allows them to be
+ * independent of nearly all assumptions about their actual environment.
+ *
+ * It is also a container for allocating the encoders/decoders themselves that
+ * insulates clients from knowing their actual size. This provides ABI
+ * compatibility even if the size of the objects change. And this allows the
+ * structure definitions to be in the .c files instead of the .h files, making
+ * the .h files smaller and more readable.
+ */
+
+#include "upb/upb.h"
+
+#ifndef UPB_ENV_H_
+#define UPB_ENV_H_
+
+#ifdef __cplusplus
+namespace upb {
+class Environment;
+class SeededAllocator;
+}
+#endif
+
+UPB_DECLARE_TYPE(upb::Environment, upb_env);
+UPB_DECLARE_TYPE(upb::SeededAllocator, upb_seededalloc);
+
+typedef void *upb_alloc_func(void *ud, void *ptr, size_t oldsize, size_t size);
+typedef void upb_cleanup_func(void *ud);
+typedef bool upb_error_func(void *ud, const upb_status *status);
+
+// An environment is *not* thread-safe.
+UPB_DEFINE_CLASS0(upb::Environment,
+ public:
+ Environment();
+ ~Environment();
+
+ // Set a custom memory allocation function for the environment. May ONLY
+ // be called before any calls to Malloc()/Realloc()/AddCleanup() below.
+ // If this is not called, the system realloc() function will be used.
+ // The given user pointer "ud" will be passed to the allocation function.
+ //
+ // The allocation function will not receive corresponding "free" calls. it
+ // must ensure that the memory is valid for the lifetime of the Environment,
+ // but it may be reclaimed any time thereafter. The likely usage is that
+ // "ud" points to a stateful allocator, and that the allocator frees all
+ // memory, arena-style, when it is destroyed. In this case the allocator must
+ // outlive the Environment. Another possibility is that the allocation
+ // function returns GC-able memory that is guaranteed to be GC-rooted for the
+ // life of the Environment.
+ void SetAllocationFunction(upb_alloc_func* alloc, void* ud);
+
+ template<class T>
+ void SetAllocator(T* allocator) {
+ SetAllocationFunction(allocator->GetAllocationFunction(), allocator);
+ }
+
+ // Set a custom error reporting function.
+ void SetErrorFunction(upb_error_func* func, void* ud);
+
+ // Set the error reporting function to simply copy the status to the given
+ // status and abort.
+ void ReportErrorsTo(Status* status);
+
+ // Returns true if all allocations and AddCleanup() calls have succeeded,
+ // and no errors were reported with ReportError() (except ones that recovered
+ // successfully).
+ bool ok() const;
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Functions for use by encoders/decoders.
+
+ // Reports an error to this environment's callback, returning true if
+ // the caller should try to recover.
+ bool ReportError(const Status* status);
+
+ // Allocate memory. Uses the environment's allocation function.
+ //
+ // There is no need to free(). All memory will be freed automatically, but is
+ // guaranteed to outlive the Environment.
+ void* Malloc(size_t size);
+
+ // Reallocate memory. Preserves "oldsize" bytes from the existing buffer
+ // Requires: oldsize <= existing_size.
+ //
+ // TODO(haberman): should we also enforce that oldsize <= size?
+ void* Realloc(void* ptr, size_t oldsize, size_t size);
+
+ // Add a cleanup function to run when the environment is destroyed.
+ // Returns false on out-of-memory.
+ //
+ // The first call to AddCleanup() after SetAllocationFunction() is guaranteed
+ // to return true -- this makes it possible to robustly set a cleanup handler
+ // for a custom allocation function.
+ bool AddCleanup(upb_cleanup_func* func, void* ud);
+
+ // Total number of bytes that have been allocated. It is undefined what
+ // Realloc() does to this counter.
+ size_t BytesAllocated() const;
+
+ private:
+ UPB_DISALLOW_COPY_AND_ASSIGN(Environment);
+,
+UPB_DEFINE_STRUCT0(upb_env,
+ bool ok_;
+ size_t bytes_allocated;
+
+ // Alloc function.
+ upb_alloc_func *alloc;
+ void *alloc_ud;
+
+ // Error-reporting function.
+ upb_error_func *err;
+ void *err_ud;
+
+ // Userdata for default alloc func.
+ void *default_alloc_ud;
+
+ // Cleanup entries. Pointer to a cleanup_ent, defined in env.c
+ void *cleanup_head;
+
+ // For future expansion, since the size of this struct is exposed to users.
+ void *future1;
+ void *future2;
+));
+
+UPB_BEGIN_EXTERN_C
+
+void upb_env_init(upb_env *e);
+void upb_env_uninit(upb_env *e);
+void upb_env_setallocfunc(upb_env *e, upb_alloc_func *func, void *ud);
+void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud);
+void upb_env_reporterrorsto(upb_env *e, upb_status *status);
+bool upb_env_ok(const upb_env *e);
+bool upb_env_reporterror(upb_env *e, const upb_status *status);
+void *upb_env_malloc(upb_env *e, size_t size);
+void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size);
+bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud);
+size_t upb_env_bytesallocated(const upb_env *e);
+
+UPB_END_EXTERN_C
+
+// An allocator that allocates from an initial memory region (likely the stack)
+// before falling back to another allocator.
+UPB_DEFINE_CLASS0(upb::SeededAllocator,
+ public:
+ SeededAllocator(void *mem, size_t len);
+ ~SeededAllocator();
+
+ // Set a custom fallback memory allocation function for the allocator, to use
+ // once the initial region runs out.
+ //
+ // May ONLY be called before GetAllocationFunction(). If this is not
+ // called, the system realloc() will be the fallback allocator.
+ void SetFallbackAllocator(upb_alloc_func *alloc, void *ud);
+
+ // Gets the allocation function for this allocator.
+ upb_alloc_func* GetAllocationFunction();
+
+ private:
+ UPB_DISALLOW_COPY_AND_ASSIGN(SeededAllocator);
+,
+UPB_DEFINE_STRUCT0(upb_seededalloc,
+ // Fallback alloc function.
+ upb_alloc_func *alloc;
+ upb_cleanup_func *alloc_cleanup;
+ void *alloc_ud;
+ bool need_cleanup;
+ bool returned_allocfunc;
+
+ // Userdata for default alloc func.
+ void *default_alloc_ud;
+
+ // Pointers for the initial memory region.
+ char *mem_base;
+ char *mem_ptr;
+ char *mem_limit;
+
+ // For future expansion, since the size of this struct is exposed to users.
+ void *future1;
+ void *future2;
+));
+
+UPB_BEGIN_EXTERN_C
+
+void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len);
+void upb_seededalloc_uninit(upb_seededalloc *a);
+void upb_seededalloc_setfallbackalloc(upb_seededalloc *a, upb_alloc_func *func,
+ void *ud);
+upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upb {
+
+inline Environment::Environment() {
+ upb_env_init(this);
+}
+inline Environment::~Environment() {
+ upb_env_uninit(this);
+}
+inline void Environment::SetAllocationFunction(upb_alloc_func *alloc,
+ void *ud) {
+ upb_env_setallocfunc(this, alloc, ud);
+}
+inline void Environment::SetErrorFunction(upb_error_func *func, void *ud) {
+ upb_env_seterrorfunc(this, func, ud);
+}
+inline void Environment::ReportErrorsTo(Status* status) {
+ upb_env_reporterrorsto(this, status);
+}
+inline bool Environment::ok() const {
+ return upb_env_ok(this);
+}
+inline bool Environment::ReportError(const Status* status) {
+ return upb_env_reporterror(this, status);
+}
+inline void *Environment::Malloc(size_t size) {
+ return upb_env_malloc(this, size);
+}
+inline void *Environment::Realloc(void *ptr, size_t oldsize, size_t size) {
+ return upb_env_realloc(this, ptr, oldsize, size);
+}
+inline bool Environment::AddCleanup(upb_cleanup_func *func, void *ud) {
+ return upb_env_addcleanup(this, func, ud);
+}
+inline size_t Environment::BytesAllocated() const {
+ return upb_env_bytesallocated(this);
+}
+
+inline SeededAllocator::SeededAllocator(void *mem, size_t len) {
+ upb_seededalloc_init(this, mem, len);
+}
+inline SeededAllocator::~SeededAllocator() {
+ upb_seededalloc_uninit(this);
+}
+inline void SeededAllocator::SetFallbackAllocator(upb_alloc_func *alloc,
+ void *ud) {
+ upb_seededalloc_setfallbackalloc(this, alloc, ud);
+}
+inline upb_alloc_func *SeededAllocator::GetAllocationFunction() {
+ return upb_seededalloc_getallocfunc(this);
+}
+
+} // namespace upb
+
+#endif // __cplusplus
+
+#endif // UPB_ENV_H_
diff --git a/upb/handlers-inl.h b/upb/handlers-inl.h
index 87a755f..70ddf91 100644
--- a/upb/handlers-inl.h
+++ b/upb/handlers-inl.h
@@ -147,12 +147,17 @@ template <class T> struct disable_if_same<T, T> {};
template <class T> void DeletePointer(void *p) { delete static_cast<T>(p); }
template <class T1, class T2>
-struct FirstUnlessVoid {
+struct FirstUnlessVoidOrBool {
typedef T1 value;
};
template <class T2>
-struct FirstUnlessVoid<void, T2> {
+struct FirstUnlessVoidOrBool<void, T2> {
+ typedef T2 value;
+};
+
+template <class T2>
+struct FirstUnlessVoidOrBool<bool, T2> {
typedef T2 value;
};
@@ -534,10 +539,14 @@ inline MethodSig4<R, C, P1, P2, P3, P4> MatchFunc(R (C::*f)(P1, P2, P3, P4)) {
//
// 1. If the function returns void, make it return the expected type and with
// a value that always indicates success.
-// 2. If the function is expected to return void* but doesn't, wrap it so it
-// does (either by returning the closure param if the wrapped function
-// returns void or by casting a different pointer type to void* for
-// return).
+// 2. If the function returns bool, make it return the expected type with a
+// value that indicates success or failure.
+//
+// The "expected type" for return is:
+// 1. void* for start handlers. If the closure parameter has a different type
+// we will cast it to void* for the return in the success case.
+// 2. size_t for string buffer handlers.
+// 3. bool for everything else.
// Template parameters are FuncN type and desired return type.
template <class F, class R, class Enable = void>
@@ -926,10 +935,13 @@ inline Handler<T>::Handler(F func)
attr_.SetClosureType(UniquePtrForType<typename F::FuncInfo::Closure>());
// We use the closure type (from the first parameter) if the return type is
- // void. This is all nonsense for non START* handlers, but it doesn't matter
- // because in that case the value will be ignored.
- typedef typename FirstUnlessVoid<typename F::FuncInfo::Return,
- typename F::FuncInfo::Closure>::value
+ // void or bool, since these are the two cases we wrap to return the closure's
+ // type anyway.
+ //
+ // This is all nonsense for non START* handlers, but it doesn't matter because
+ // in that case the value will be ignored.
+ typedef typename FirstUnlessVoidOrBool<typename F::FuncInfo::Return,
+ typename F::FuncInfo::Closure>::value
EffectiveReturn;
attr_.SetReturnClosureType(UniquePtrForType<EffectiveReturn>());
}
@@ -1124,9 +1136,7 @@ inline BytesHandler::BytesHandler() {
upb_byteshandler_init(this);
}
-inline BytesHandler::~BytesHandler() {
- upb_byteshandler_uninit(this);
-}
+inline BytesHandler::~BytesHandler() {}
} // namespace upb
diff --git a/upb/handlers.c b/upb/handlers.c
index fe368e5..5a253f1 100644
--- a/upb/handlers.c
+++ b/upb/handlers.c
@@ -176,7 +176,14 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
if (closure_type && *context_closure_type &&
closure_type != *context_closure_type) {
// TODO(haberman): better message for debugging.
- upb_status_seterrmsg(&h->status_, "closure type does not match");
+ if (f) {
+ upb_status_seterrf(&h->status_,
+ "closure type does not match for field %s",
+ upb_fielddef_name(f));
+ } else {
+ upb_status_seterrmsg(
+ &h->status_, "closure type does not match for message-level handler");
+ }
return false;
}
diff --git a/upb/handlers.h b/upb/handlers.h
index 2267d98..1b8864a 100644
--- a/upb/handlers.h
+++ b/upb/handlers.h
@@ -755,10 +755,8 @@ UPB_DEFINE_STRUCT0(upb_byteshandler,
));
void upb_byteshandler_init(upb_byteshandler *h);
-void upb_byteshandler_uninit(upb_byteshandler *h);
// Caller must ensure that "d" outlives the handlers.
-// TODO(haberman): support handlerfree function for the data.
// TODO(haberman): should this have a "freeze" operation? It's not necessary
// for memory management, but could be useful to force immutability and provide
// a convenient moment to verify that all registration succeeded.
diff --git a/upb/json/parser.c b/upb/json/parser.c
index 08cd13d..f4f5628 100644
--- a/upb/json/parser.c
+++ b/upb/json/parser.c
@@ -33,6 +33,71 @@
#include "upb/json/parser.h"
+#define UPB_JSON_MAX_DEPTH 64
+
+typedef struct {
+ upb_sink sink;
+
+ // The current message in which we're parsing, and the field whose value we're
+ // expecting next.
+ const upb_msgdef *m;
+ const upb_fielddef *f;
+
+ // We are in a repeated-field context, ready to emit mapentries as
+ // submessages. This flag alters the start-of-object (open-brace) behavior to
+ // begin a sequence of mapentry messages rather than a single submessage.
+ bool is_map;
+
+ // We are in a map-entry message context. This flag is set when parsing the
+ // value field of a single map entry and indicates to all value-field parsers
+ // (subobjects, strings, numbers, and bools) that the map-entry submessage
+ // should end as soon as the value is parsed.
+ bool is_mapentry;
+
+ // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
+ // message's map field that we're currently parsing. This differs from |f|
+ // because |f| is the field in the *current* message (i.e., the map-entry
+ // message itself), not the parent's field that leads to this map.
+ const upb_fielddef *mapfield;
+} upb_jsonparser_frame;
+
+struct upb_json_parser {
+ upb_env *env;
+ upb_byteshandler input_handler_;
+ upb_bytessink input_;
+
+ // Stack to track the JSON scopes we are in.
+ upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
+ upb_jsonparser_frame *top;
+ upb_jsonparser_frame *limit;
+
+ upb_status *status;
+
+ // Ragel's internal parsing stack for the parsing state machine.
+ int current_state;
+ int parser_stack[UPB_JSON_MAX_DEPTH];
+ int parser_top;
+
+ // The handle for the current buffer.
+ const upb_bufhandle *handle;
+
+ // Accumulate buffer. See details in parser.rl.
+ const char *accumulated;
+ size_t accumulated_len;
+ char *accumulate_buf;
+ size_t accumulate_buf_size;
+
+ // Multi-part text data. See details in parser.rl.
+ int multipart_state;
+ upb_selector_t string_selector;
+
+ // Input capture. See details in parser.rl.
+ const char *capture;
+
+ // Intermediate result of parsing a unicode escape sequence.
+ uint32_t digit;
+};
+
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
// Used to signal that a capture has been suspended.
@@ -235,12 +300,13 @@ static void accumulate_clear(upb_json_parser *p) {
// Used internally by accumulate_append().
static bool accumulate_realloc(upb_json_parser *p, size_t need) {
- size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
+ size_t old_size = p->accumulate_buf_size;
+ size_t new_size = UPB_MAX(old_size, 128);
while (new_size < need) {
new_size = saturating_multiply(new_size, 2);
}
- void *mem = realloc(p->accumulate_buf, new_size);
+ void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
if (!mem) {
upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
return false;
@@ -989,11 +1055,11 @@ static void end_object(upb_json_parser *p) {
// final state once, when the closing '"' is seen.
-#line 1085 "upb/json/parser.rl"
+#line 1151 "upb/json/parser.rl"
-#line 997 "upb/json/parser.c"
+#line 1063 "upb/json/parser.c"
static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1,
5, 1, 6, 1, 7, 1, 8, 1,
@@ -1142,7 +1208,7 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1;
-#line 1088 "upb/json/parser.rl"
+#line 1154 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) {
@@ -1162,7 +1228,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
capture_resume(parser, buf);
-#line 1166 "upb/json/parser.c"
+#line 1232 "upb/json/parser.c"
{
int _klen;
unsigned int _trans;
@@ -1237,118 +1303,118 @@ _match:
switch ( *_acts++ )
{
case 0:
-#line 1000 "upb/json/parser.rl"
+#line 1066 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 1:
-#line 1001 "upb/json/parser.rl"
+#line 1067 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
break;
case 2:
-#line 1005 "upb/json/parser.rl"
+#line 1071 "upb/json/parser.rl"
{ start_text(parser, p); }
break;
case 3:
-#line 1006 "upb/json/parser.rl"
+#line 1072 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_text(parser, p)); }
break;
case 4:
-#line 1012 "upb/json/parser.rl"
+#line 1078 "upb/json/parser.rl"
{ start_hex(parser); }
break;
case 5:
-#line 1013 "upb/json/parser.rl"
+#line 1079 "upb/json/parser.rl"
{ hexdigit(parser, p); }
break;
case 6:
-#line 1014 "upb/json/parser.rl"
+#line 1080 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_hex(parser)); }
break;
case 7:
-#line 1020 "upb/json/parser.rl"
+#line 1086 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(escape(parser, p)); }
break;
case 8:
-#line 1026 "upb/json/parser.rl"
+#line 1092 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 9:
-#line 1029 "upb/json/parser.rl"
+#line 1095 "upb/json/parser.rl"
{ {stack[top++] = cs; cs = 19; goto _again;} }
break;
case 10:
-#line 1031 "upb/json/parser.rl"
+#line 1097 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
break;
case 11:
-#line 1036 "upb/json/parser.rl"
+#line 1102 "upb/json/parser.rl"
{ start_member(parser); }
break;
case 12:
-#line 1037 "upb/json/parser.rl"
+#line 1103 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_membername(parser)); }
break;
case 13:
-#line 1040 "upb/json/parser.rl"
+#line 1106 "upb/json/parser.rl"
{ end_member(parser); }
break;
case 14:
-#line 1046 "upb/json/parser.rl"
+#line 1112 "upb/json/parser.rl"
{ start_object(parser); }
break;
case 15:
-#line 1049 "upb/json/parser.rl"
+#line 1115 "upb/json/parser.rl"
{ end_object(parser); }
break;
case 16:
-#line 1055 "upb/json/parser.rl"
+#line 1121 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); }
break;
case 17:
-#line 1059 "upb/json/parser.rl"
+#line 1125 "upb/json/parser.rl"
{ end_array(parser); }
break;
case 18:
-#line 1064 "upb/json/parser.rl"
+#line 1130 "upb/json/parser.rl"
{ start_number(parser, p); }
break;
case 19:
-#line 1065 "upb/json/parser.rl"
+#line 1131 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_number(parser, p)); }
break;
case 20:
-#line 1067 "upb/json/parser.rl"
+#line 1133 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); }
break;
case 21:
-#line 1068 "upb/json/parser.rl"
+#line 1134 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_stringval(parser)); }
break;
case 22:
-#line 1070 "upb/json/parser.rl"
+#line 1136 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
break;
case 23:
-#line 1072 "upb/json/parser.rl"
+#line 1138 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
break;
case 24:
-#line 1074 "upb/json/parser.rl"
+#line 1140 "upb/json/parser.rl"
{ /* null value */ }
break;
case 25:
-#line 1076 "upb/json/parser.rl"
+#line 1142 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject(parser)); }
break;
case 26:
-#line 1077 "upb/json/parser.rl"
+#line 1143 "upb/json/parser.rl"
{ end_subobject(parser); }
break;
case 27:
-#line 1082 "upb/json/parser.rl"
+#line 1148 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
-#line 1352 "upb/json/parser.c"
+#line 1418 "upb/json/parser.c"
}
}
@@ -1361,7 +1427,7 @@ _again:
_out: {}
}
-#line 1107 "upb/json/parser.rl"
+#line 1173 "upb/json/parser.rl"
if (p != pe) {
upb_status_seterrf(parser->status, "Parse error at %s\n", p);
@@ -1390,26 +1456,7 @@ bool end(void *closure, const void *hd) {
return true;
}
-
-/* Public API *****************************************************************/
-
-void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
- p->limit = p->stack + UPB_JSON_MAX_DEPTH;
- p->accumulate_buf = NULL;
- p->accumulate_buf_size = 0;
- upb_byteshandler_init(&p->input_handler_);
- upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
- upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
- upb_bytessink_reset(&p->input_, &p->input_handler_, p);
- p->status = status;
-}
-
-void upb_json_parser_uninit(upb_json_parser *p) {
- upb_byteshandler_uninit(&p->input_handler_);
- free(p->accumulate_buf);
-}
-
-void upb_json_parser_reset(upb_json_parser *p) {
+static void json_parser_reset(upb_json_parser *p) {
p->top = p->stack;
p->top->f = NULL;
p->top->is_map = false;
@@ -1419,25 +1466,48 @@ void upb_json_parser_reset(upb_json_parser *p) {
int top;
// Emit Ragel initialization of the parser.
-#line 1423 "upb/json/parser.c"
+#line 1470 "upb/json/parser.c"
{
cs = json_start;
top = 0;
}
-#line 1164 "upb/json/parser.rl"
+#line 1211 "upb/json/parser.rl"
p->current_state = cs;
p->parser_top = top;
accumulate_clear(p);
p->multipart_state = MULTIPART_INACTIVE;
p->capture = NULL;
+ p->accumulated = NULL;
}
-void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
- upb_json_parser_reset(p);
- upb_sink_reset(&p->top->sink, sink->handlers, sink->closure);
- p->top->m = upb_handlers_msgdef(sink->handlers);
- p->accumulated = NULL;
+
+/* Public API *****************************************************************/
+
+upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
+#ifndef NDEBUG
+ const size_t size_before = upb_env_bytesallocated(env);
+#endif
+ upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
+ if (!p) return false;
+
+ p->env = env;
+ p->limit = p->stack + UPB_JSON_MAX_DEPTH;
+ p->accumulate_buf = NULL;
+ p->accumulate_buf_size = 0;
+ upb_byteshandler_init(&p->input_handler_);
+ upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
+ upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
+ upb_bytessink_reset(&p->input_, &p->input_handler_, p);
+
+ json_parser_reset(p);
+ upb_sink_reset(&p->top->sink, output->handlers, output->closure);
+ p->top->m = upb_handlers_msgdef(output->handlers);
+
+ // If this fails, uncomment and increase the value in parser.h.
+ // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before);
+ assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
+ return p;
}
upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
diff --git a/upb/json/parser.h b/upb/json/parser.h
index c693edf..b932adf 100644
--- a/upb/json/parser.h
+++ b/upb/json/parser.h
@@ -11,6 +11,7 @@
#ifndef UPB_JSON_PARSER_H_
#define UPB_JSON_PARSER_H_
+#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@@ -23,96 +24,32 @@ class Parser;
UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser);
-// Internal-only struct used by the parser. A parser frame corresponds
-// one-to-one with a handler (sink) frame.
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- upb_sink sink;
- // The current message in which we're parsing, and the field whose value we're
- // expecting next.
- const upb_msgdef *m;
- const upb_fielddef *f;
-
- // We are in a repeated-field context, ready to emit mapentries as
- // submessages. This flag alters the start-of-object (open-brace) behavior to
- // begin a sequence of mapentry messages rather than a single submessage.
- bool is_map;
- // We are in a map-entry message context. This flag is set when parsing the
- // value field of a single map entry and indicates to all value-field parsers
- // (subobjects, strings, numbers, and bools) that the map-entry submessage
- // should end as soon as the value is parsed.
- bool is_mapentry;
- // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
- // message's map field that we're currently parsing. This differs from |f|
- // because |f| is the field in the *current* message (i.e., the map-entry
- // message itself), not the parent's field that leads to this map.
- const upb_fielddef *mapfield;
-} upb_jsonparser_frame;
-
-
/* upb::json::Parser **********************************************************/
-#define UPB_JSON_MAX_DEPTH 64
+// Preallocation hint: parser won't allocate more bytes than this when first
+// constructed. This hint may be an overestimate for some build configurations.
+// But if the parser library is upgraded without recompiling the application,
+// it may be an underestimate.
+#define UPB_JSON_PARSER_SIZE 3568
+
+#ifdef __cplusplus
// Parses an incoming BytesStream, pushing the results to the destination sink.
-UPB_DEFINE_CLASS0(upb::json::Parser,
+class upb::json::Parser {
public:
- Parser(Status* status);
- ~Parser();
+ static Parser* Create(Environment* env, Sink* output);
- // Resets the state of the printer, so that it will expect to begin a new
- // document.
- void Reset();
-
- // Resets the output pointer which will serve as our closure. Implies
- // Reset().
- void ResetOutput(Sink* output);
-
- // The input to the printer.
BytesSink* input();
-,
-UPB_DEFINE_STRUCT0(upb_json_parser,
- upb_byteshandler input_handler_;
- upb_bytessink input_;
-
- // Stack to track the JSON scopes we are in.
- upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
- upb_jsonparser_frame *top;
- upb_jsonparser_frame *limit;
- upb_status *status;
+ private:
+ UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser);
+};
- // Ragel's internal parsing stack for the parsing state machine.
- int current_state;
- int parser_stack[UPB_JSON_MAX_DEPTH];
- int parser_top;
-
- // The handle for the current buffer.
- const upb_bufhandle *handle;
-
- // Accumulate buffer. See details in parser.rl.
- const char *accumulated;
- size_t accumulated_len;
- char *accumulate_buf;
- size_t accumulate_buf_size;
-
- // Multi-part text data. See details in parser.rl.
- int multipart_state;
- upb_selector_t string_selector;
-
- // Input capture. See details in parser.rl.
- const char *capture;
-
- // Intermediate result of parsing a unicode escape sequence.
- uint32_t digit;
-));
+#endif
UPB_BEGIN_EXTERN_C
-void upb_json_parser_init(upb_json_parser *p, upb_status *status);
-void upb_json_parser_uninit(upb_json_parser *p);
-void upb_json_parser_reset(upb_json_parser *p);
-void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *output);
+upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output);
upb_bytessink *upb_json_parser_input(upb_json_parser *p);
UPB_END_EXTERN_C
@@ -121,11 +58,8 @@ UPB_END_EXTERN_C
namespace upb {
namespace json {
-inline Parser::Parser(Status* status) { upb_json_parser_init(this, status); }
-inline Parser::~Parser() { upb_json_parser_uninit(this); }
-inline void Parser::Reset() { upb_json_parser_reset(this); }
-inline void Parser::ResetOutput(Sink* output) {
- upb_json_parser_resetoutput(this, output);
+inline Parser* Parser::Create(Environment* env, Sink* output) {
+ return upb_json_parser_create(env, output);
}
inline BytesSink* Parser::input() {
return upb_json_parser_input(this);
diff --git a/upb/json/parser.rl b/upb/json/parser.rl
index b171617..81d1514 100644
--- a/upb/json/parser.rl
+++ b/upb/json/parser.rl
@@ -31,6 +31,71 @@
#include "upb/json/parser.h"
+#define UPB_JSON_MAX_DEPTH 64
+
+typedef struct {
+ upb_sink sink;
+
+ // The current message in which we're parsing, and the field whose value we're
+ // expecting next.
+ const upb_msgdef *m;
+ const upb_fielddef *f;
+
+ // We are in a repeated-field context, ready to emit mapentries as
+ // submessages. This flag alters the start-of-object (open-brace) behavior to
+ // begin a sequence of mapentry messages rather than a single submessage.
+ bool is_map;
+
+ // We are in a map-entry message context. This flag is set when parsing the
+ // value field of a single map entry and indicates to all value-field parsers
+ // (subobjects, strings, numbers, and bools) that the map-entry submessage
+ // should end as soon as the value is parsed.
+ bool is_mapentry;
+
+ // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
+ // message's map field that we're currently parsing. This differs from |f|
+ // because |f| is the field in the *current* message (i.e., the map-entry
+ // message itself), not the parent's field that leads to this map.
+ const upb_fielddef *mapfield;
+} upb_jsonparser_frame;
+
+struct upb_json_parser {
+ upb_env *env;
+ upb_byteshandler input_handler_;
+ upb_bytessink input_;
+
+ // Stack to track the JSON scopes we are in.
+ upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
+ upb_jsonparser_frame *top;
+ upb_jsonparser_frame *limit;
+
+ upb_status *status;
+
+ // Ragel's internal parsing stack for the parsing state machine.
+ int current_state;
+ int parser_stack[UPB_JSON_MAX_DEPTH];
+ int parser_top;
+
+ // The handle for the current buffer.
+ const upb_bufhandle *handle;
+
+ // Accumulate buffer. See details in parser.rl.
+ const char *accumulated;
+ size_t accumulated_len;
+ char *accumulate_buf;
+ size_t accumulate_buf_size;
+
+ // Multi-part text data. See details in parser.rl.
+ int multipart_state;
+ upb_selector_t string_selector;
+
+ // Input capture. See details in parser.rl.
+ const char *capture;
+
+ // Intermediate result of parsing a unicode escape sequence.
+ uint32_t digit;
+};
+
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
// Used to signal that a capture has been suspended.
@@ -233,12 +298,13 @@ static void accumulate_clear(upb_json_parser *p) {
// Used internally by accumulate_append().
static bool accumulate_realloc(upb_json_parser *p, size_t need) {
- size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
+ size_t old_size = p->accumulate_buf_size;
+ size_t new_size = UPB_MAX(old_size, 128);
while (new_size < need) {
new_size = saturating_multiply(new_size, 2);
}
- void *mem = realloc(p->accumulate_buf, new_size);
+ void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
if (!mem) {
upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
return false;
@@ -1132,26 +1198,7 @@ bool end(void *closure, const void *hd) {
return true;
}
-
-/* Public API *****************************************************************/
-
-void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
- p->limit = p->stack + UPB_JSON_MAX_DEPTH;
- p->accumulate_buf = NULL;
- p->accumulate_buf_size = 0;
- upb_byteshandler_init(&p->input_handler_);
- upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
- upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
- upb_bytessink_reset(&p->input_, &p->input_handler_, p);
- p->status = status;
-}
-
-void upb_json_parser_uninit(upb_json_parser *p) {
- upb_byteshandler_uninit(&p->input_handler_);
- free(p->accumulate_buf);
-}
-
-void upb_json_parser_reset(upb_json_parser *p) {
+static void json_parser_reset(upb_json_parser *p) {
p->top = p->stack;
p->top->f = NULL;
p->top->is_map = false;
@@ -1166,13 +1213,36 @@ void upb_json_parser_reset(upb_json_parser *p) {
accumulate_clear(p);
p->multipart_state = MULTIPART_INACTIVE;
p->capture = NULL;
+ p->accumulated = NULL;
}
-void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
- upb_json_parser_reset(p);
- upb_sink_reset(&p->top->sink, sink->handlers, sink->closure);
- p->top->m = upb_handlers_msgdef(sink->handlers);
- p->accumulated = NULL;
+
+/* Public API *****************************************************************/
+
+upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
+#ifndef NDEBUG
+ const size_t size_before = upb_env_bytesallocated(env);
+#endif
+ upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
+ if (!p) return false;
+
+ p->env = env;
+ p->limit = p->stack + UPB_JSON_MAX_DEPTH;
+ p->accumulate_buf = NULL;
+ p->accumulate_buf_size = 0;
+ upb_byteshandler_init(&p->input_handler_);
+ upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
+ upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
+ upb_bytessink_reset(&p->input_, &p->input_handler_, p);
+
+ json_parser_reset(p);
+ upb_sink_reset(&p->top->sink, output->handlers, output->closure);
+ p->top->m = upb_handlers_msgdef(output->handlers);
+
+ // If this fails, uncomment and increase the value in parser.h.
+ // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before);
+ assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
+ return p;
}
upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
diff --git a/upb/json/printer.c b/upb/json/printer.c
index c7267e0..539f83a 100644
--- a/upb/json/printer.c
+++ b/upb/json/printer.c
@@ -15,6 +15,27 @@
#include <string.h>
#include <stdint.h>
+struct upb_json_printer {
+ upb_sink input_;
+ // BytesSink closure.
+ void *subc_;
+ upb_bytessink *output_;
+
+ // We track the depth so that we know when to emit startstr/endstr on the
+ // output.
+ int depth_;
+
+ // Have we emitted the first element? This state is necessary to emit commas
+ // without leaving a trailing comma in arrays/maps. We keep this state per
+ // frame depth.
+ //
+ // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
+ // We count frames (contexts in which we separate elements by commas) as both
+ // repeated fields and messages (maps), and the worst case is a
+ // message->repeated field->submessage->repeated field->... nesting.
+ bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
+};
+
// StringPiece; a pointer plus a length.
typedef struct {
const char *ptr;
@@ -731,25 +752,29 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
#undef TYPE
}
-/* Public API *****************************************************************/
-
-void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h) {
- p->output_ = NULL;
+static void json_printer_reset(upb_json_printer *p) {
p->depth_ = 0;
- upb_sink_reset(&p->input_, h, p);
}
-void upb_json_printer_uninit(upb_json_printer *p) {
- UPB_UNUSED(p);
-}
-void upb_json_printer_reset(upb_json_printer *p) {
- p->depth_ = 0;
-}
+/* Public API *****************************************************************/
+
+upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
+ upb_bytessink *output) {
+#ifndef NDEBUG
+ size_t size_before = upb_env_bytesallocated(e);
+#endif
+
+ upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
+ if (!p) return NULL;
-void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output) {
- upb_json_printer_reset(p);
p->output_ = output;
+ json_printer_reset(p);
+ upb_sink_reset(&p->input_, h, p);
+
+ // If this fails, increase the value in printer.h.
+ assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
+ return p;
}
upb_sink *upb_json_printer_input(upb_json_printer *p) {
diff --git a/upb/json/printer.h b/upb/json/printer.h
index fbc206d..c73cb79 100644
--- a/upb/json/printer.h
+++ b/upb/json/printer.h
@@ -11,6 +11,7 @@
#ifndef UPB_JSON_TYPED_PRINTER_H_
#define UPB_JSON_TYPED_PRINTER_H_
+#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@@ -26,71 +27,48 @@ UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer);
/* upb::json::Printer *********************************************************/
-// Prints an incoming stream of data to a BytesSink in JSON format.
-UPB_DEFINE_CLASS0(upb::json::Printer,
- public:
- Printer(const upb::Handlers* handlers);
- ~Printer();
+#define UPB_JSON_PRINTER_SIZE 168
- // Resets the state of the printer, so that it will expect to begin a new
- // document.
- void Reset();
+#ifdef __cplusplus
- // Resets the output pointer which will serve as our closure. Implies
- // Reset().
- void ResetOutput(BytesSink* output);
+// Prints an incoming stream of data to a BytesSink in JSON format.
+class upb::json::Printer {
+ public:
+ static Printer* Create(Environment* env, const upb::Handlers* handlers,
+ BytesSink* output);
// The input to the printer.
Sink* input();
// Returns handlers for printing according to the specified schema.
static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* md);
-,
-UPB_DEFINE_STRUCT0(upb_json_printer,
- upb_sink input_;
- // BytesSink closure.
- void *subc_;
- upb_bytessink *output_;
-
- // We track the depth so that we know when to emit startstr/endstr on the
- // output.
- int depth_;
- // Have we emitted the first element? This state is necessary to emit commas
- // without leaving a trailing comma in arrays/maps. We keep this state per
- // frame depth.
- //
- // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
- // We count frames (contexts in which we separate elements by commas) as both
- // repeated fields and messages (maps), and the worst case is a
- // message->repeated field->submessage->repeated field->... nesting.
- bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
-));
-
-UPB_BEGIN_EXTERN_C // {
-// Native C API.
+ static const size_t kSize = UPB_JSON_PRINTER_SIZE;
-void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h);
-void upb_json_printer_uninit(upb_json_printer *p);
-void upb_json_printer_reset(upb_json_printer *p);
-void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output);
+ private:
+ UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer);
+};
+
+#endif
+
+UPB_BEGIN_EXTERN_C
+
+// Native C API.
+upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
+ upb_bytessink *output);
upb_sink *upb_json_printer_input(upb_json_printer *p);
const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
const void *owner);
-UPB_END_EXTERN_C // }
+UPB_END_EXTERN_C
#ifdef __cplusplus
namespace upb {
namespace json {
-inline Printer::Printer(const upb::Handlers* handlers) {
- upb_json_printer_init(this, handlers);
-}
-inline Printer::~Printer() { upb_json_printer_uninit(this); }
-inline void Printer::Reset() { upb_json_printer_reset(this); }
-inline void Printer::ResetOutput(BytesSink* output) {
- upb_json_printer_resetoutput(this, output);
+inline Printer* Printer::Create(Environment* env, const upb::Handlers* handlers,
+ BytesSink* output) {
+ return upb_json_printer_create(env, handlers, output);
}
inline Sink* Printer::input() { return upb_json_printer_input(this); }
inline reffed_ptr<const Handlers> Printer::NewHandlers(
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c
index 377cbb4..a17332b 100644
--- a/upb/pb/compile_decoder.c
+++ b/upb/pb/compile_decoder.c
@@ -64,7 +64,6 @@ mgroup *newgroup(const void *owner) {
static void freemethod(upb_refcounted *r) {
upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
- upb_byteshandler_uninit(&method->input_handler_);
if (method->dest_handlers_) {
upb_handlers_unref(method->dest_handlers_, method);
diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc
index 3181cab..e72e4e3 100644
--- a/upb/pb/compile_decoder_x64.dasc
+++ b/upb/pb/compile_decoder_x64.dasc
@@ -242,7 +242,7 @@ static void emit_static_asm(jitcompiler *jc) {
|
|2:
| // Resume decoder.
- | lea ARG2_64, DECODER->callstack
+ | mov ARG2_64, DECODER->callstack
| sub rsp, ARG3_64
| mov ARG1_64, rsp
| callp memcpy // Restore stack.
@@ -255,7 +255,7 @@ static void emit_static_asm(jitcompiler *jc) {
asmlabel(jc, "exitjit");
|->exitjit:
| // Save the stack into DECODER->callstack.
- | lea ARG1_64, DECODER->callstack
+ | mov ARG1_64, DECODER->callstack
| mov ARG2_64, rsp
| mov ARG3_64, DECODER->saved_rsp
| sub ARG3_64, rsp
@@ -300,11 +300,11 @@ static void emit_static_asm(jitcompiler *jc) {
| sub rcx, rdx
| jb ->err // Len is greater than enclosing message.
| mov FRAME->end_ofs, rcx
+ | cmp FRAME, DECODER->limit
+ | je >3 // Stack overflow
| add FRAME, sizeof(upb_pbdecoder_frame)
| mov DELIMEND, PTR
| add DELIMEND, rdx
- | cmp FRAME, DECODER->limit
- | je >3 // Stack overflow
| mov dword FRAME->groupnum, 0
| test rcx, rcx
| jz >2
@@ -1071,9 +1071,9 @@ static void jitbytecode(jitcompiler *jc) {
| // code with the packed code-path. If this is changed later, this
| // store can be removed.
| mov qword FRAME->end_ofs, 0
- | add FRAME, sizeof(upb_pbdecoder_frame)
| cmp FRAME, DECODER->limit
| je ->err
+ | add FRAME, sizeof(upb_pbdecoder_frame)
| mov dword FRAME->groupnum, arg
break;
case OP_PUSHLENDELIM:
diff --git a/upb/pb/compile_decoder_x64.h b/upb/pb/compile_decoder_x64.h
index ef4459d..9527361 100644
--- a/upb/pb/compile_decoder_x64.h
+++ b/upb/pb/compile_decoder_x64.h
@@ -28,8 +28,8 @@ static const unsigned char upb_jit_actionlist[2162] = {
73,139,159,233,77,139,167,233,77,139,174,233,73,139,174,233,73,43,175,233,
73,3,175,233,73,139,151,233,72,133,210,15,133,244,248,252,255,208,73,139,
135,233,73,199,135,233,0,0,0,0,248,1,255,91,65,92,65,93,65,94,65,95,93,195,
- 248,2,73,141,183,233,72,41,212,72,137,231,72,184,237,237,65,84,73,137,228,
- 72,129,228,239,252,255,208,76,137,228,65,92,195,255,248,11,73,141,191,233,
+ 248,2,73,139,183,233,72,41,212,72,137,231,72,184,237,237,65,84,73,137,228,
+ 72,129,228,239,252,255,208,76,137,228,65,92,195,255,248,11,73,139,191,233,
72,137,230,73,139,151,233,72,41,226,73,137,151,233,137,195,72,184,237,237,
65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,137,216,73,139,
167,233,91,65,92,65,93,65,94,65,95,93,195,255,248,12,73,57,159,233,15,132,
@@ -40,7 +40,7 @@ static const unsigned char upb_jit_actionlist[2162] = {
255,76,57,227,15,132,244,253,255,76,137,225,72,41,217,72,131,252,249,1,15,
130,244,253,255,15,182,19,132,210,15,137,244,254,248,7,232,244,14,248,8,72,
131,195,1,72,137,252,233,72,41,217,72,41,209,15,130,244,15,73,137,142,233,
- 73,129,198,239,72,137,221,72,1,213,77,59,183,233,15,132,244,249,65,199,134,
+ 77,59,183,233,15,132,244,249,73,129,198,239,72,137,221,72,1,213,65,199,134,
233,0,0,0,0,72,133,201,15,132,244,248,77,139,167,233,72,57,252,235,15,135,
244,248,76,57,229,15,135,244,248,255,73,137,252,236,248,2,195,248,3,73,139,
159,233,76,137,252,255,255,72,190,237,237,255,190,237,255,49,252,246,255,
@@ -122,8 +122,8 @@ static const unsigned char upb_jit_actionlist[2162] = {
1,248,2,255,72,137,218,76,137,225,72,41,217,77,139,135,233,72,184,237,237,
65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,72,1,195,255,
76,57,227,15,132,244,249,232,244,29,248,3,255,76,137,227,255,72,57,252,235,
- 15,133,244,1,248,4,255,77,137,174,233,73,199,134,233,0,0,0,0,73,129,198,239,
- 77,59,183,233,15,132,244,15,65,199,134,233,237,255,232,244,13,255,73,129,
+ 15,133,244,1,248,4,255,77,137,174,233,73,199,134,233,0,0,0,0,77,59,183,233,
+ 15,132,244,15,73,129,198,239,65,199,134,233,237,255,232,244,13,255,73,129,
252,238,239,77,139,174,233,255,77,139,167,233,73,3,174,233,73,59,175,233,
15,130,244,247,76,57,229,15,135,244,247,73,137,252,236,248,1,255,72,57,221,
15,132,245,255,232,245,255,248,9,72,131,196,8,195,255
@@ -419,7 +419,7 @@ static void emit_static_asm(jitcompiler *jc) {
//|
//|2:
//| // Resume decoder.
- //| lea ARG2_64, DECODER->callstack
+ //| mov ARG2_64, DECODER->callstack
//| sub rsp, ARG3_64
//| mov ARG1_64, rsp
//| callp memcpy // Restore stack.
@@ -434,7 +434,7 @@ static void emit_static_asm(jitcompiler *jc) {
asmlabel(jc, "exitjit");
//|->exitjit:
//| // Save the stack into DECODER->callstack.
- //| lea ARG1_64, DECODER->callstack
+ //| mov ARG1_64, DECODER->callstack
//| mov ARG2_64, rsp
//| mov ARG3_64, DECODER->saved_rsp
//| sub ARG3_64, rsp
@@ -490,11 +490,11 @@ static void emit_static_asm(jitcompiler *jc) {
//| sub rcx, rdx
//| jb ->err // Len is greater than enclosing message.
//| mov FRAME->end_ofs, rcx
+ //| cmp FRAME, DECODER->limit
+ //| je >3 // Stack overflow
//| add FRAME, sizeof(upb_pbdecoder_frame)
//| mov DELIMEND, PTR
//| add DELIMEND, rdx
- //| cmp FRAME, DECODER->limit
- //| je >3 // Stack overflow
//| mov dword FRAME->groupnum, 0
//| test rcx, rcx
//| jz >2
@@ -504,7 +504,7 @@ static void emit_static_asm(jitcompiler *jc) {
//| cmp DELIMEND, DATAEND
//| ja >2
//| mov DATAEND, DELIMEND // If DELIMEND >= PTR && DELIMEND < DATAEND
- dasm_put(Dst, 337, Dt1(->end_ofs), sizeof(upb_pbdecoder_frame), Dt2(->limit), Dt1(->groupnum), Dt2(->end));
+ dasm_put(Dst, 337, Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), Dt2(->end));
# 317 "upb/pb/compile_decoder_x64.dasc"
//|2:
//| ret
@@ -1609,11 +1609,11 @@ static void jitbytecode(jitcompiler *jc) {
//| // code with the packed code-path. If this is changed later, this
//| // store can be removed.
//| mov qword FRAME->end_ofs, 0
- //| add FRAME, sizeof(upb_pbdecoder_frame)
//| cmp FRAME, DECODER->limit
//| je ->err
+ //| add FRAME, sizeof(upb_pbdecoder_frame)
//| mov dword FRAME->groupnum, arg
- dasm_put(Dst, 2070, Dt1(->sink.closure), Dt1(->end_ofs), sizeof(upb_pbdecoder_frame), Dt2(->limit), Dt1(->groupnum), arg);
+ dasm_put(Dst, 2070, Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), arg);
# 1078 "upb/pb/compile_decoder_x64.dasc"
break;
case OP_PUSHLENDELIM:
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 522b02e..a780666 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -19,10 +19,7 @@
*/
#include <inttypes.h>
-#include <setjmp.h>
-#include <stdarg.h>
#include <stddef.h>
-#include <stdlib.h>
#include "upb/pb/decoder.int.h"
#include "upb/pb/varint.int.h"
@@ -70,18 +67,17 @@ static bool consumes_input(opcode op) {
static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
-// It's unfortunate that we have to micro-manage the compiler this way,
-// especially since this tuning is necessarily specific to one hardware
-// configuration. But emperically on a Core i7, performance increases 30-50%
-// with these annotations. Every instance where these appear, gcc 4.2.1 made
-// the wrong decision and degraded performance in benchmarks.
-#define FORCEINLINE static inline __attribute__((always_inline))
-#define NOINLINE __attribute__((noinline))
+// It's unfortunate that we have to micro-manage the compiler with
+// UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
+// specific to one hardware configuration. But empirically on a Core i7,
+// performance increases 30-50% with these annotations. Every instance where
+// these appear, gcc 4.2.1 made the wrong decision and degraded performance in
+// benchmarks.
static void seterr(upb_pbdecoder *d, const char *msg) {
- // TODO(haberman): encapsulate this access to pipeline->status, but not sure
- // exactly what that interface should look like.
- upb_status_seterrmsg(d->status, msg);
+ upb_status status = UPB_STATUS_INIT;
+ upb_status_seterrmsg(&status, msg);
+ upb_env_reporterror(d->env, &status);
}
void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
@@ -249,7 +245,8 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
// Copies the next "bytes" bytes into "buf" and advances the stream.
// Requires that this many bytes are available in the current buffer.
-FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
+ size_t bytes) {
assert(bytes <= curbufleft(d));
memcpy(buf, d->ptr, bytes);
advance(d, bytes);
@@ -258,8 +255,8 @@ FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
// Slow path for getting the next "bytes" bytes, regardless of whether they are
// available in the current buffer or not. Returns a status code as described
// in decoder.int.h.
-static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
- size_t bytes) {
+UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
+ size_t bytes) {
const size_t avail = curbufleft(d);
consumebytes(d, buf, avail);
bytes -= avail;
@@ -280,7 +277,8 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
// Gets the next "bytes" bytes, regardless of whether they are available in the
// current buffer or not. Returns a status code as described in decoder.int.h.
-FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
+ size_t bytes) {
if (curbufleft(d) >= bytes) {
// Buffer has enough data to satisfy.
consumebytes(d, buf, bytes);
@@ -290,8 +288,8 @@ FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
}
}
-static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
- size_t bytes) {
+UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
+ size_t bytes) {
size_t ret = curbufleft(d);
memcpy(buf, d->ptr, ret);
if (in_residual_buf(d, d->ptr)) {
@@ -302,7 +300,8 @@ static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
return ret;
}
-FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
+ size_t bytes) {
if (curbufleft(d) >= bytes) {
memcpy(buf, d->ptr, bytes);
return bytes;
@@ -316,8 +315,8 @@ FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
// Slow path for decoding a varint from the current buffer position.
// Returns a status code as described in decoder.int.h.
-NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
- uint64_t *u64) {
+UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
+ uint64_t *u64) {
*u64 = 0;
uint8_t byte = 0x80;
int bitpos;
@@ -335,7 +334,7 @@ NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
// Decodes a varint from the current buffer position.
// Returns a status code as described in decoder.int.h.
-FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
+UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
*u64 = *d->ptr;
advance(d, 1);
@@ -358,7 +357,7 @@ FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
// Decodes a 32-bit varint from the current buffer position.
// Returns a status code as described in decoder.int.h.
-FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
+UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
uint64_t u64;
int32_t ret = decode_varint(d, &u64);
if (ret >= 0) return ret;
@@ -377,14 +376,14 @@ FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
// Decodes a fixed32 from the current buffer position.
// Returns a status code as described in decoder.int.h.
// TODO: proper byte swapping for big-endian machines.
-FORCEINLINE int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
+UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
return getbytes(d, u32, 4);
}
// Decodes a fixed64 from the current buffer position.
// Returns a status code as described in decoder.int.h.
// TODO: proper byte swapping for big-endian machines.
-FORCEINLINE int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
+UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
return getbytes(d, u64, 8);
}
@@ -408,7 +407,7 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
if (end > fr->end_ofs) {
seterr(d, "Submessage end extends past enclosing submessage.");
return false;
- } else if ((fr + 1) == d->limit) {
+ } else if (fr == d->limit) {
seterr(d, kPbDecoderStackOverflow);
return false;
}
@@ -435,8 +434,8 @@ static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
// Pops a frame from the decoder stack.
static void decoder_pop(upb_pbdecoder *d) { d->top--; }
-NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
- uint64_t expected) {
+UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
+ uint64_t expected) {
uint64_t data = 0;
size_t bytes = upb_value_size(expected);
size_t read = peekbytes(d, &data, bytes);
@@ -814,7 +813,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
upb_pbdecoder *d = closure;
UPB_UNUSED(size_hint);
+ d->top->end_ofs = UINT64_MAX;
+ d->bufstart_ofs = 0;
d->call_len = 1;
+ d->callstack[0] = &halt;
d->pc = pc;
return d;
}
@@ -823,6 +825,8 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
UPB_UNUSED(hd);
UPB_UNUSED(size_hint);
upb_pbdecoder *d = closure;
+ d->top->end_ofs = UINT64_MAX;
+ d->bufstart_ofs = 0;
d->call_len = 0;
return d;
}
@@ -879,55 +883,117 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
return true;
}
-void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m,
- upb_status *s) {
- d->limit = &d->stack[UPB_DECODER_MAX_NESTING];
- upb_bytessink_reset(&d->input_, &m->input_handler_, d);
- d->method_ = m;
- d->callstack[0] = &halt;
- d->status = s;
- upb_pbdecoder_reset(d);
-}
-
void upb_pbdecoder_reset(upb_pbdecoder *d) {
d->top = d->stack;
- d->top->end_ofs = UINT64_MAX;
d->top->groupnum = 0;
- d->bufstart_ofs = 0;
d->ptr = d->residual;
d->buf = d->residual;
d->end = d->residual;
d->residual_end = d->residual;
- d->call_len = 1;
}
-uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
- return offset(d);
+static size_t stacksize(upb_pbdecoder *d, size_t entries) {
+ UPB_UNUSED(d);
+ return entries * sizeof(upb_pbdecoder_frame);
}
-// Not currently required, but to support outgrowing the static stack we need
-// this.
-void upb_pbdecoder_uninit(upb_pbdecoder *d) {
+static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
UPB_UNUSED(d);
-}
-const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
- return d->method_;
+#ifdef UPB_USE_JIT_X64
+ if (d->method_->is_native_) {
+ // Each native stack frame needs two pointers, plus we need a few frames for
+ // the enter/exit trampolines.
+ size_t ret = entries * sizeof(void*) * 2;
+ ret += sizeof(void*) * 10;
+ return ret;
+ }
+#endif
+
+ return entries * sizeof(uint32_t*);
}
-bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink* sink) {
- // TODO(haberman): do we need to test whether the decoder is already on the
- // stack (like calling this from within a callback)? Should we support
- // rebinding the output at all?
+upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
+ upb_sink *sink) {
+ const size_t default_max_nesting = 64;
+#ifndef NDEBUG
+ size_t size_before = upb_env_bytesallocated(e);
+#endif
+
+ upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
+ if (!d) return NULL;
+
+ d->method_ = m;
+ d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
+ d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
+ if (!d->stack || !d->callstack) {
+ return NULL;
+ }
+
+ d->env = e;
+ d->limit = d->stack + default_max_nesting - 1;
+ d->stack_size = default_max_nesting;
+
+ upb_pbdecoder_reset(d);
+ upb_bytessink_reset(&d->input_, &m->input_handler_, d);
+
assert(sink);
if (d->method_->dest_handlers_) {
if (sink->handlers != d->method_->dest_handlers_)
- return false;
+ return NULL;
}
upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
- return true;
+
+ // If this fails, increase the value in decoder.h.
+ assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
+ return d;
+}
+
+uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
+ return offset(d);
+}
+
+const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
+ return d->method_;
}
upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
return &d->input_;
}
+
+size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
+ return d->stack_size;
+}
+
+bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
+ assert(d->top >= d->stack);
+
+ if (max < (size_t)(d->top - d->stack)) {
+ // Can't set a limit smaller than what we are currently at.
+ return false;
+ }
+
+ if (max > d->stack_size) {
+ // Need to reallocate stack and callstack to accommodate.
+ size_t old_size = stacksize(d, d->stack_size);
+ size_t new_size = stacksize(d, max);
+ void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
+ if (!p) {
+ return false;
+ }
+ d->stack = p;
+
+ old_size = callstacksize(d, d->stack_size);
+ new_size = callstacksize(d, max);
+ p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
+ if (!p) {
+ return false;
+ }
+ d->callstack = p;
+
+ d->stack_size = max;
+ }
+
+ d->limit = d->stack + max - 1;
+ return true;
+}
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index e93f3bd..d37718c 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -18,7 +18,7 @@
#ifndef UPB_DECODER_H_
#define UPB_DECODER_H_
-#include "upb/table.int.h"
+#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@@ -37,44 +37,6 @@ UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder);
UPB_DECLARE_TYPE(upb::pb::DecoderMethod, upb_pbdecodermethod);
UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts);
-// The maximum that any submessages can be nested. Matches proto2's limit.
-// This specifies the size of the decoder's statically-sized array and therefore
-// setting it high will cause the upb::pb::Decoder object to be larger.
-//
-// If necessary we can add a runtime-settable property to Decoder that allow
-// this to be larger than the compile-time setting, but this would add
-// complexity, particularly since we would have to decide how/if to give users
-// the ability to set a custom memory allocation function.
-#define UPB_DECODER_MAX_NESTING 64
-
-// Internal-only struct used by the decoder.
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- // Space optimization note: we store two pointers here that the JIT
- // doesn't need at all; the upb_handlers* inside the sink and
- // the dispatch table pointer. We can optimze so that the JIT uses
- // smaller stack frames than the interpreter. The only thing we need
- // to guarantee is that the fallback routines can find end_ofs.
- upb_sink sink;
-
- // The absolute stream offset of the end-of-frame delimiter.
- // Non-delimited frames (groups and non-packed repeated fields) reuse the
- // delimiter of their parent, even though the frame may not end there.
- //
- // NOTE: the JIT stores a slightly different value here for non-top frames.
- // It stores the value relative to the end of the enclosed message. But the
- // top frame is still stored the same way, which is important for ensuring
- // that calls from the JIT into C work correctly.
- uint64_t end_ofs;
- const uint32_t *base;
-
- // 0 indicates a length-delimited field.
- // A positive number indicates a known group.
- // A negative number indicates an unknown group.
- int32_t groupnum;
- upb_inttable *dispatch; // Not used by the JIT.
-} upb_pbdecoder_frame;
-
// The parameters one uses to construct a DecoderMethod.
// TODO(haberman): move allowjit here? Seems more convenient for users.
UPB_DEFINE_CLASS0(upb::pb::DecoderMethodOptions,
@@ -152,22 +114,31 @@ UPB_DEFINE_STRUCT(upb_pbdecodermethod, upb_refcounted,
upb_inttable dispatch;
));
+// Preallocation hint: decoder won't allocate more bytes than this when first
+// constructed. This hint may be an overestimate for some build configurations.
+// But if the decoder library is upgraded without recompiling the application,
+// it may be an underestimate.
+#define UPB_PB_DECODER_SIZE 4400
+
+#ifdef __cplusplus
+
// A Decoder receives binary protobuf data on its input sink and pushes the
// decoded data to its output sink.
-UPB_DEFINE_CLASS0(upb::pb::Decoder,
+class upb::pb::Decoder {
public:
// Constructs a decoder instance for the given method, which must outlive this
// decoder. Any errors during parsing will be set on the given status, which
// must also outlive this decoder.
- Decoder(const DecoderMethod* method, Status* status);
- ~Decoder();
+ //
+ // The sink must match the given method.
+ static Decoder* Create(Environment* env, const DecoderMethod* method,
+ Sink* output);
// Returns the DecoderMethod this decoder is parsing from.
- // TODO(haberman): Do users need to be able to rebind this?
const DecoderMethod* method() const;
- // Resets the state of the decoder.
- void Reset();
+ // The sink on which this decoder receives input.
+ BytesSink* input();
// Returns number of bytes successfully parsed.
//
@@ -178,76 +149,25 @@ UPB_DEFINE_CLASS0(upb::pb::Decoder,
// callback.
uint64_t BytesParsed() const;
- // Resets the output sink of the Decoder.
- // The given sink must match method()->dest_handlers().
+ // Gets/sets the parsing nexting limit. If the total number of nested
+ // submessages and repeated fields hits this limit, parsing will fail. This
+ // is a resource limit that controls the amount of memory used by the parsing
+ // stack.
//
- // This must be called at least once before the decoder can be used. It may
- // only be called with the decoder is in a state where it was just created or
- // reset with pipeline.Reset(). The given sink must be from the same pipeline
- // as this decoder.
- bool ResetOutput(Sink* sink);
-
- // The sink on which this decoder receives input.
- BytesSink* input();
-
- private:
- UPB_DISALLOW_COPY_AND_ASSIGN(Decoder);
-,
-UPB_DEFINE_STRUCT0(upb_pbdecoder, UPB_QUOTE(
- // Our input sink.
- upb_bytessink input_;
-
- // The decoder method we are parsing with (owned).
- const upb_pbdecodermethod *method_;
-
- size_t call_len;
- const uint32_t *pc, *last;
+ // Setting the limit will fail if the parser is currently suspended at a depth
+ // greater than this, or if memory allocation of the stack fails.
+ size_t max_nesting() const;
+ bool set_max_nesting(size_t max);
- // Current input buffer and its stream offset.
- const char *buf, *ptr, *end, *checkpoint;
-
- // End of the delimited region, relative to ptr, or NULL if not in this buf.
- const char *delim_end;
-
- // End of the delimited region, relative to ptr, or end if not in this buf.
- const char *data_end;
-
- // Overall stream offset of "buf."
- uint64_t bufstart_ofs;
-
- // Buffer for residual bytes not parsed from the previous buffer.
- // The maximum number of residual bytes we require is 12; a five-byte
- // unknown tag plus an eight-byte value, less one because the value
- // is only a partial value.
- char residual[12];
- char *residual_end;
-
- // Stores the user buffer passed to our decode function.
- const char *buf_param;
- size_t size_param;
- const upb_bufhandle *handle;
+ void Reset();
-#ifdef UPB_USE_JIT_X64
- // Used momentarily by the generated code to store a value while a user
- // function is called.
- uint32_t tmp_len;
+ static const size_t kSize = UPB_PB_DECODER_SIZE;
- const void *saved_rsp;
-#endif
+ private:
+ UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder);
+};
- upb_status *status;
-
- // Our internal stack.
- upb_pbdecoder_frame *top, *limit;
- upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING];
-#ifdef UPB_USE_JIT_X64
- // Each native stack frame needs two pointers, plus we need a few frames for
- // the enter/exit trampolines.
- const uint32_t *callstack[(UPB_DECODER_MAX_NESTING * 2) + 10];
-#else
- const uint32_t *callstack[UPB_DECODER_MAX_NESTING];
-#endif
-)));
+#endif // __cplusplus
// A class for caching protobuf processing code, whether bytecode for the
// interpreted decoder or machine code for the JIT.
@@ -296,14 +216,15 @@ UPB_DEFINE_STRUCT0(upb_pbcodecache,
UPB_BEGIN_EXTERN_C // {
-void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *method,
- upb_status *status);
-void upb_pbdecoder_uninit(upb_pbdecoder *d);
-void upb_pbdecoder_reset(upb_pbdecoder *d);
+upb_pbdecoder *upb_pbdecoder_create(upb_env *e,
+ const upb_pbdecodermethod *method,
+ upb_sink *output);
const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d);
-bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink);
upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d);
uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d);
+size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d);
+bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max);
+void upb_pbdecoder_reset(upb_pbdecoder *d);
void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
const upb_handlers *h);
@@ -338,27 +259,27 @@ namespace upb {
namespace pb {
-inline Decoder::Decoder(const DecoderMethod* m, Status* s) {
- upb_pbdecoder_init(this, m, s);
-}
-inline Decoder::~Decoder() {
- upb_pbdecoder_uninit(this);
+// static
+inline Decoder* Decoder::Create(Environment* env, const DecoderMethod* m,
+ Sink* sink) {
+ return upb_pbdecoder_create(env, m, sink);
}
inline const DecoderMethod* Decoder::method() const {
return upb_pbdecoder_method(this);
}
-inline void Decoder::Reset() {
- upb_pbdecoder_reset(this);
+inline BytesSink* Decoder::input() {
+ return upb_pbdecoder_input(this);
}
inline uint64_t Decoder::BytesParsed() const {
return upb_pbdecoder_bytesparsed(this);
}
-inline bool Decoder::ResetOutput(Sink* sink) {
- return upb_pbdecoder_resetoutput(this, sink);
+inline size_t Decoder::max_nesting() const {
+ return upb_pbdecoder_maxnesting(this);
}
-inline BytesSink* Decoder::input() {
- return upb_pbdecoder_input(this);
+inline bool Decoder::set_max_nesting(size_t max) {
+ return upb_pbdecoder_setmaxnesting(this, max);
}
+inline void Decoder::Reset() { upb_pbdecoder_reset(this); }
inline DecoderMethodOptions::DecoderMethodOptions(const Handlers* h) {
upb_pbdecodermethodopts_init(this, h);
diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h
index 302701e..5522be7 100644
--- a/upb/pb/decoder.int.h
+++ b/upb/pb/decoder.int.h
@@ -13,8 +13,9 @@
#include <stdlib.h>
#include "upb/def.h"
#include "upb/handlers.h"
-#include "upb/sink.h"
#include "upb/pb/decoder.h"
+#include "upb/sink.h"
+#include "upb/table.int.h"
// Opcode definitions. The canonical meaning of each opcode is its
// implementation in the interpreter (the JIT is written to match this).
@@ -112,6 +113,95 @@ typedef struct {
#endif
} mgroup;
+// The maximum that any submessages can be nested. Matches proto2's limit.
+// This specifies the size of the decoder's statically-sized array and therefore
+// setting it high will cause the upb::pb::Decoder object to be larger.
+//
+// If necessary we can add a runtime-settable property to Decoder that allow
+// this to be larger than the compile-time setting, but this would add
+// complexity, particularly since we would have to decide how/if to give users
+// the ability to set a custom memory allocation function.
+#define UPB_DECODER_MAX_NESTING 64
+
+// Internal-only struct used by the decoder.
+typedef struct {
+ // Space optimization note: we store two pointers here that the JIT
+ // doesn't need at all; the upb_handlers* inside the sink and
+ // the dispatch table pointer. We can optimze so that the JIT uses
+ // smaller stack frames than the interpreter. The only thing we need
+ // to guarantee is that the fallback routines can find end_ofs.
+ upb_sink sink;
+
+ // The absolute stream offset of the end-of-frame delimiter.
+ // Non-delimited frames (groups and non-packed repeated fields) reuse the
+ // delimiter of their parent, even though the frame may not end there.
+ //
+ // NOTE: the JIT stores a slightly different value here for non-top frames.
+ // It stores the value relative to the end of the enclosed message. But the
+ // top frame is still stored the same way, which is important for ensuring
+ // that calls from the JIT into C work correctly.
+ uint64_t end_ofs;
+ const uint32_t *base;
+
+ // 0 indicates a length-delimited field.
+ // A positive number indicates a known group.
+ // A negative number indicates an unknown group.
+ int32_t groupnum;
+ upb_inttable *dispatch; // Not used by the JIT.
+} upb_pbdecoder_frame;
+
+struct upb_pbdecoder {
+ upb_env *env;
+
+ // Our input sink.
+ upb_bytessink input_;
+
+ // The decoder method we are parsing with (owned).
+ const upb_pbdecodermethod *method_;
+
+ size_t call_len;
+ const uint32_t *pc, *last;
+
+ // Current input buffer and its stream offset.
+ const char *buf, *ptr, *end, *checkpoint;
+
+ // End of the delimited region, relative to ptr, or NULL if not in this buf.
+ const char *delim_end;
+
+ // End of the delimited region, relative to ptr, or end if not in this buf.
+ const char *data_end;
+
+ // Overall stream offset of "buf."
+ uint64_t bufstart_ofs;
+
+ // Buffer for residual bytes not parsed from the previous buffer.
+ // The maximum number of residual bytes we require is 12; a five-byte
+ // unknown tag plus an eight-byte value, less one because the value
+ // is only a partial value.
+ char residual[12];
+ char *residual_end;
+
+ // Stores the user buffer passed to our decode function.
+ const char *buf_param;
+ size_t size_param;
+ const upb_bufhandle *handle;
+
+ // Our internal stack.
+ upb_pbdecoder_frame *stack, *top, *limit;
+ const uint32_t **callstack;
+ size_t stack_size;
+
+ upb_status *status;
+
+#ifdef UPB_USE_JIT_X64
+ // Used momentarily by the generated code to store a value while a user
+ // function is called.
+ uint32_t tmp_len;
+
+ const void *saved_rsp;
+#endif
+};
+
// Decoder entry points; used as handlers.
void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint);
diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c
index 2534a4d..ca5ebc1 100644
--- a/upb/pb/encoder.c
+++ b/upb/pb/encoder.c
@@ -62,6 +62,68 @@
#include <stdlib.h>
+// The output buffer is divided into segments; a segment is a string of data
+// that is "ready to go" -- it does not need any varint lengths inserted into
+// the middle. The seams between segments are where varints will be inserted
+// once they are known.
+//
+// We also use the concept of a "run", which is a range of encoded bytes that
+// occur at a single submessage level. Every segment contains one or more runs.
+//
+// A segment can span messages. Consider:
+//
+// .--Submessage lengths---------.
+// | | |
+// | V V
+// V | |--------------- | |-----------------
+// Submessages: | |-----------------------------------------------
+// Top-level msg: ------------------------------------------------------------
+//
+// Segments: ----- ------------------- -----------------
+// Runs: *---- *--------------*--- *----------------
+// (* marks the start)
+//
+// Note that the top-level menssage is not in any segment because it does not
+// have any length preceding it.
+//
+// A segment is only interrupted when another length needs to be inserted. So
+// observe how the second segment spans both the inner submessage and part of
+// the next enclosing message.
+typedef struct {
+ uint32_t msglen; // The length to varint-encode before this segment.
+ uint32_t seglen; // Length of the segment.
+} upb_pb_encoder_segment;
+
+struct upb_pb_encoder {
+ upb_env *env;
+
+ // Our input and output.
+ upb_sink input_;
+ upb_bytessink *output_;
+
+ // The "subclosure" -- used as the inner closure as part of the bytessink
+ // protocol.
+ void *subc;
+
+ // The output buffer and limit, and our current write position. "buf"
+ // initially points to "initbuf", but is dynamically allocated if we need to
+ // grow beyond the initial size.
+ char *buf, *ptr, *limit;
+
+ // The beginning of the current run, or undefined if we are at the top level.
+ char *runbegin;
+
+ // The list of segments we are accumulating.
+ upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
+
+ // The stack of enclosing submessages. Each entry in the stack points to the
+ // segment where this submessage's length is being accumulated.
+ int *stack, *top, *stacklimit;
+
+ // Depth of startmsg/endmsg calls.
+ int depth;
+};
+
/* low-level buffering ********************************************************/
// Low-level functions for interacting with the output buffer.
@@ -80,24 +142,22 @@ static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
// e->ptr. Returns false if the bytes could not be allocated.
static bool reserve(upb_pb_encoder *e, size_t bytes) {
if ((size_t)(e->limit - e->ptr) < bytes) {
+ // Grow buffer.
size_t needed = bytes + (e->ptr - e->buf);
size_t old_size = e->limit - e->buf;
+
size_t new_size = old_size;
+
while (new_size < needed) {
new_size *= 2;
}
- char *realloc_from = (e->buf == e->initbuf) ? NULL : e->buf;
- char *new_buf = realloc(realloc_from, new_size);
+ char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
if (new_buf == NULL) {
return false;
}
- if (realloc_from == NULL) {
- memcpy(new_buf, e->initbuf, old_size);
- }
-
e->ptr = new_buf + (e->ptr - e->buf);
e->runbegin = new_buf + (e->runbegin - e->buf);
e->limit = new_buf + new_size;
@@ -166,21 +226,17 @@ static bool start_delim(upb_pb_encoder *e) {
}
if (++e->segptr == e->seglimit) {
- upb_pb_encoder_segment *realloc_from =
- (e->segbuf == e->seginitbuf) ? NULL : e->segbuf;
+ // Grow segment buffer.
size_t old_size =
(e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
size_t new_size = old_size * 2;
- upb_pb_encoder_segment *new_buf = realloc(realloc_from, new_size);
+ upb_pb_encoder_segment *new_buf =
+ upb_env_realloc(e->env, e->segbuf, old_size, new_size);
if (new_buf == NULL) {
return false;
}
- if (realloc_from == NULL) {
- memcpy(new_buf, e->seginitbuf, old_size);
- }
-
e->segptr = new_buf + (e->segptr - e->segbuf);
e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
e->segbuf = new_buf;
@@ -451,6 +507,12 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) {
}
}
+void upb_pb_encoder_reset(upb_pb_encoder *e) {
+ e->segptr = NULL;
+ e->top = NULL;
+ e->depth = 0;
+}
+
/* public API *****************************************************************/
@@ -459,40 +521,42 @@ const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
}
-#define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0]))
-
-void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h) {
- e->output_ = NULL;
- e->subc = NULL;
- e->buf = e->initbuf;
- e->ptr = e->buf;
- e->limit = e->buf + ARRAYSIZE(e->initbuf);
- e->segbuf = e->seginitbuf;
- e->seglimit = e->segbuf + ARRAYSIZE(e->seginitbuf);
- e->stacklimit = e->stack + ARRAYSIZE(e->stack);
- upb_sink_reset(&e->input_, h, e);
-}
-
-void upb_pb_encoder_uninit(upb_pb_encoder *e) {
- if (e->buf != e->initbuf) {
- free(e->buf);
+upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
+ upb_bytessink *output) {
+ const size_t initial_bufsize = 256;
+ const size_t initial_segbufsize = 16;
+ // TODO(haberman): make this configurable.
+ const size_t stack_size = 64;
+#ifndef NDEBUG
+ const size_t size_before = upb_env_bytesallocated(env);
+#endif
+
+ upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
+ if (!e) return NULL;
+
+ e->buf = upb_env_malloc(env, initial_bufsize);
+ e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
+ e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
+
+ if (!e->buf || !e->segbuf || !e->stack) {
+ return NULL;
}
- if (e->segbuf != e->seginitbuf) {
- free(e->segbuf);
- }
-}
+ e->limit = e->buf + initial_bufsize;
+ e->seglimit = e->segbuf + initial_segbufsize;
+ e->stacklimit = e->stack + stack_size;
-void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output) {
upb_pb_encoder_reset(e);
+ upb_sink_reset(&e->input_, h, e);
+
+ e->env = env;
e->output_ = output;
e->subc = output->closure;
-}
+ e->ptr = e->buf;
-void upb_pb_encoder_reset(upb_pb_encoder *e) {
- e->segptr = NULL;
- e->top = NULL;
- e->depth = 0;
+ // If this fails, increase the value in encoder.h.
+ assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
+ return e;
}
upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h
index 2df5797..edff95b 100644
--- a/upb/pb/encoder.h
+++ b/upb/pb/encoder.h
@@ -15,6 +15,7 @@
#ifndef UPB_ENCODER_H_
#define UPB_ENCODER_H_
+#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@@ -31,101 +32,42 @@ UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder);
/* upb::pb::Encoder ***********************************************************/
-// The output buffer is divided into segments; a segment is a string of data
-// that is "ready to go" -- it does not need any varint lengths inserted into
-// the middle. The seams between segments are where varints will be inserted
-// once they are known.
-//
-// We also use the concept of a "run", which is a range of encoded bytes that
-// occur at a single submessage level. Every segment contains one or more runs.
-//
-// A segment can span messages. Consider:
-//
-// .--Submessage lengths---------.
-// | | |
-// | V V
-// V | |--------------- | |-----------------
-// Submessages: | |-----------------------------------------------
-// Top-level msg: ------------------------------------------------------------
-//
-// Segments: ----- ------------------- -----------------
-// Runs: *---- *--------------*--- *----------------
-// (* marks the start)
-//
-// Note that the top-level menssage is not in any segment because it does not
-// have any length preceding it.
-//
-// A segment is only interrupted when another length needs to be inserted. So
-// observe how the second segment spans both the inner submessage and part of
-// the next enclosing message.
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- uint32_t msglen; // The length to varint-encode before this segment.
- uint32_t seglen; // Length of the segment.
-} upb_pb_encoder_segment;
-
-UPB_DEFINE_CLASS0(upb::pb::Encoder,
- public:
- Encoder(const upb::Handlers* handlers);
- ~Encoder();
-
- static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* msg);
+// Preallocation hint: decoder won't allocate more bytes than this when first
+// constructed. This hint may be an overestimate for some build configurations.
+// But if the decoder library is upgraded without recompiling the application,
+// it may be an underestimate.
+#define UPB_PB_ENCODER_SIZE 768
- // Resets the state of the printer, so that it will expect to begin a new
- // document.
- void Reset();
+#ifdef __cplusplus
- // Resets the output pointer which will serve as our closure.
- void ResetOutput(BytesSink* output);
+class upb::pb::Encoder {
+ public:
+ // Creates a new encoder in the given environment. The Handlers must have
+ // come from NewHandlers() below.
+ static Encoder* Create(Environment* env, const Handlers* handlers,
+ BytesSink* output);
// The input to the encoder.
Sink* input();
- private:
- UPB_DISALLOW_COPY_AND_ASSIGN(Encoder);
-,
-UPB_DEFINE_STRUCT0(upb_pb_encoder, UPB_QUOTE(
- // Our input and output.
- upb_sink input_;
- upb_bytessink *output_;
-
- // The "subclosure" -- used as the inner closure as part of the bytessink
- // protocol.
- void *subc;
-
- // The output buffer and limit, and our current write position. "buf"
- // initially points to "initbuf", but is dynamically allocated if we need to
- // grow beyond the initial size.
- char *buf, *ptr, *limit;
+ // Creates a new set of handlers for this MessageDef.
+ static reffed_ptr<const Handlers> NewHandlers(const MessageDef* msg);
- // The beginning of the current run, or undefined if we are at the top level.
- char *runbegin;
+ static const size_t kSize = UPB_PB_ENCODER_SIZE;
- // The list of segments we are accumulating.
- upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
-
- // The stack of enclosing submessages. Each entry in the stack points to the
- // segment where this submessage's length is being accumulated.
- int stack[UPB_PBENCODER_MAX_NESTING], *top, *stacklimit;
-
- // Depth of startmsg/endmsg calls.
- int depth;
+ private:
+ UPB_DISALLOW_POD_OPS(Encoder, upb::pb::Encoder);
+};
- // Initial buffers for the output buffer and segment buffer. If we outgrow
- // these we will dynamically allocate bigger ones.
- char initbuf[256];
- upb_pb_encoder_segment seginitbuf[32];
-)));
+#endif
UPB_BEGIN_EXTERN_C
const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
const void *owner);
-void upb_pb_encoder_reset(upb_pb_encoder *e);
upb_sink *upb_pb_encoder_input(upb_pb_encoder *p);
-void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h);
-void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output);
-void upb_pb_encoder_uninit(upb_pb_encoder *e);
+upb_pb_encoder* upb_pb_encoder_create(upb_env* e, const upb_handlers* h,
+ upb_bytessink* output);
UPB_END_EXTERN_C
@@ -133,17 +75,9 @@ UPB_END_EXTERN_C
namespace upb {
namespace pb {
-inline Encoder::Encoder(const upb::Handlers* handlers) {
- upb_pb_encoder_init(this, handlers);
-}
-inline Encoder::~Encoder() {
- upb_pb_encoder_uninit(this);
-}
-inline void Encoder::Reset() {
- upb_pb_encoder_reset(this);
-}
-inline void Encoder::ResetOutput(BytesSink* output) {
- upb_pb_encoder_resetoutput(this, output);
+inline Encoder* Encoder::Create(Environment* env, const Handlers* handlers,
+ BytesSink* output) {
+ return upb_pb_encoder_create(env, handlers, output);
}
inline Sink* Encoder::input() {
return upb_pb_encoder_input(this);
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index fde2dd1..1259dac 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -22,26 +22,26 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
const upb_pbdecodermethod *decoder_m =
upb_pbdecodermethod_new(&opts, &decoder_m);
- upb_pbdecoder decoder;
- upb_descreader reader;
+ upb_env env;
+ upb_env_init(&env);
+ upb_env_reporterrorsto(&env, status);
- upb_pbdecoder_init(&decoder, decoder_m, status);
- upb_descreader_init(&reader, reader_h, status);
- upb_pbdecoder_resetoutput(&decoder, upb_descreader_input(&reader));
+ upb_descreader *reader = upb_descreader_create(&env, reader_h);
+ upb_pbdecoder *decoder =
+ upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
// Push input data.
- bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(&decoder));
+ bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
upb_def **ret = NULL;
if (!ok) goto cleanup;
- upb_def **defs = upb_descreader_getdefs(&reader, owner, n);
+ upb_def **defs = upb_descreader_getdefs(reader, owner, n);
ret = malloc(sizeof(upb_def*) * (*n));
memcpy(ret, defs, sizeof(upb_def*) * (*n));
cleanup:
- upb_pbdecoder_uninit(&decoder);
- upb_descreader_uninit(&reader);
+ upb_env_uninit(&env);
upb_handlers_unref(reader_h, &reader_h);
upb_pbdecodermethod_unref(decoder_m, &decoder_m);
return ret;
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
index 45c5e43..07f951d 100644
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@@ -19,6 +19,14 @@
#include "upb/sink.h"
+struct upb_textprinter {
+ upb_sink input_;
+ upb_bytessink *output_;
+ int indent_depth_;
+ bool single_line_;
+ void *subc;
+};
+
#define CHECK(x) if ((x) < 0) goto err;
static const char *shortname(const char *longname) {
@@ -236,24 +244,6 @@ err:
return false;
}
-
-/* Public API *****************************************************************/
-
-void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h) {
- p->single_line_ = false;
- p->indent_depth_ = 0;
- upb_sink_reset(&p->input_, h, p);
-}
-
-void upb_textprinter_uninit(upb_textprinter *p) {
- UPB_UNUSED(p);
-}
-
-void upb_textprinter_reset(upb_textprinter *p, bool single_line) {
- p->single_line_ = single_line;
- p->indent_depth_ = 0;
-}
-
static void onmreg(const void *c, upb_handlers *h) {
UPB_UNUSED(c);
const upb_msgdef *m = upb_handlers_msgdef(h);
@@ -313,6 +303,26 @@ static void onmreg(const void *c, upb_handlers *h) {
}
}
+static void textprinter_reset(upb_textprinter *p, bool single_line) {
+ p->single_line_ = single_line;
+ p->indent_depth_ = 0;
+}
+
+
+/* Public API *****************************************************************/
+
+upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
+ upb_bytessink *output) {
+ upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
+ if (!p) return NULL;
+
+ p->output_ = output;
+ upb_sink_reset(&p->input_, h, p);
+ textprinter_reset(p, false);
+
+ return p;
+}
+
const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
const void *owner) {
return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
@@ -320,11 +330,6 @@ const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
-bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output) {
- p->output_ = output;
- return true;
-}
-
void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
p->single_line_ = single_line;
}
diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h
index 97e01f7..3ba3403 100644
--- a/upb/pb/textprinter.h
+++ b/upb/pb/textprinter.h
@@ -8,6 +8,7 @@
#ifndef UPB_TEXT_H_
#define UPB_TEXT_H_
+#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@@ -20,58 +21,51 @@ class TextPrinter;
UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter);
-UPB_DEFINE_CLASS0(upb::pb::TextPrinter,
+#ifdef __cplusplus
+
+class upb::pb::TextPrinter {
public:
// The given handlers must have come from NewHandlers(). It must outlive the
// TextPrinter.
- explicit TextPrinter(const upb::Handlers* handlers);
+ static TextPrinter *Create(Environment *env, const upb::Handlers *handlers,
+ BytesSink *output);
void SetSingleLineMode(bool single_line);
- bool ResetOutput(BytesSink* output);
Sink* input();
// If handler caching becomes a requirement we can add a code cache as in
// decoder.h
static reffed_ptr<const Handlers> NewHandlers(const MessageDef* md);
+};
- private:
-,
-UPB_DEFINE_STRUCT0(upb_textprinter,
- upb_sink input_;
- upb_bytessink *output_;
- int indent_depth_;
- bool single_line_;
- void *subc;
-));
+#endif
-UPB_BEGIN_EXTERN_C // {
+UPB_BEGIN_EXTERN_C
// C API.
-void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h);
-void upb_textprinter_uninit(upb_textprinter *p);
-bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output);
+upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
+ upb_bytessink *output);
void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line);
upb_sink *upb_textprinter_input(upb_textprinter *p);
const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
const void *owner);
-UPB_END_EXTERN_C // }
+UPB_END_EXTERN_C
#ifdef __cplusplus
namespace upb {
namespace pb {
-inline TextPrinter::TextPrinter(const upb::Handlers* handlers) {
- upb_textprinter_init(this, handlers);
+inline TextPrinter *TextPrinter::Create(Environment *env,
+ const upb::Handlers *handlers,
+ BytesSink *output) {
+ return upb_textprinter_create(env, handlers, output);
}
inline void TextPrinter::SetSingleLineMode(bool single_line) {
upb_textprinter_setsingleline(this, single_line);
}
-inline bool TextPrinter::ResetOutput(BytesSink* output) {
- return upb_textprinter_resetoutput(this, output);
-}
inline Sink* TextPrinter::input() {
return upb_textprinter_input(this);
}
diff --git a/upb/sink.h b/upb/sink.h
index 479aaff..1ada31a 100644
--- a/upb/sink.h
+++ b/upb/sink.h
@@ -34,27 +34,6 @@ UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc);
UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink);
UPB_DECLARE_TYPE(upb::Sink, upb_sink);
-// Internal-only struct for the sink.
-struct upb_sinkframe {
- UPB_PRIVATE_FOR_CPP
- const upb_handlers *h;
- void *closure;
-
- // For any frames besides the top, this is the END* callback that will run
- // when the subframe is popped (for example, for a "sequence" frame the frame
- // above it will be a UPB_HANDLER_ENDSEQ handler). But this is only
- // necessary for assertion checking inside upb_sink and can be omitted if the
- // sink has only one caller.
- //
- // TODO(haberman): have a mechanism for ensuring that a sink only has one
- // caller.
- upb_selector_t selector;
-};
-
-// The maximum nesting depth that upb::Sink will allow. Matches proto2's limit.
-// TODO: make this a runtime-settable property of Sink.
-#define UPB_SINK_MAX_NESTING 64
-
// A upb::Sink is an object that binds a upb::Handlers object to some runtime
// state. It represents an endpoint to which data can be sent.
//
diff --git a/upb/upb.h b/upb/upb.h
index 3744ea6..13efaed 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -25,6 +25,15 @@
#define UPB_INLINE static inline
#endif
+// For use in C/C++ source files (not headers), forces inlining within the file.
+#ifdef __GNUC__
+#define UPB_FORCEINLINE inline __attribute__((always_inline))
+#define UPB_NOINLINE __attribute__((noinline))
+#else
+#define UPB_FORCEINLINE
+#define UPB_NOINLINE
+#endif
+
#if __STDC_VERSION__ >= 199901L
#define UPB_C99
#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback