summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--tests/json/test_json.cc244
-rw-r--r--upb/bindings/lua/upb.c2
-rw-r--r--upb/def.c9
-rw-r--r--upb/def.h14
-rw-r--r--upb/json/parser.c139
-rw-r--r--upb/json/parser.rl69
-rw-r--r--upb/json/printer.c122
8 files changed, 504 insertions, 97 deletions
diff --git a/Makefile b/Makefile
index b2c4036..732f498 100644
--- a/Makefile
+++ b/Makefile
@@ -235,6 +235,7 @@ C_TESTS = \
CC_TESTS = \
tests/pb/test_decoder \
+ tests/json/test_json \
tests/test_cpp \
tests/test_table \
@@ -264,6 +265,7 @@ tests/test_handlers: LIBS = lib/libupb.descriptor.a lib/libupb.a
tests/pb/test_decoder: LIBS = lib/libupb.pb.a lib/libupb.a
tests/test_cpp: LIBS = $(LOAD_DESCRIPTOR_LIBS) lib/libupb.a
tests/test_table: LIBS = lib/libupb.a
+tests/json/test_json: LIBS = lib/libupb.a lib/libupb.json.a
tests/test_def: tests/test.proto.pb
diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc
new file mode 100644
index 0000000..1444081
--- /dev/null
+++ b/tests/json/test_json.cc
@@ -0,0 +1,244 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2014 Google Inc. See LICENSE for details.
+ *
+ * A set of tests for JSON parsing and serialization.
+ */
+
+#include "tests/upb_test.h"
+#include "upb/handlers.h"
+#include "upb/symtab.h"
+#include "upb/json/printer.h"
+#include "upb/json/parser.h"
+#include "upb/upb.h"
+
+#include <string>
+
+// Macros for readability in test case list: allows us to give TEST("...") /
+// EXPECT("...") pairs.
+#define TEST(x) x
+#define EXPECT_SAME NULL
+#define EXPECT(x) x
+#define TEST_SENTINEL { NULL, NULL }
+
+struct TestCase {
+ const char* input;
+ const char* expected;
+};
+
+static TestCase kTestRoundtripMessages[] = {
+ // Test most fields here.
+ {
+ TEST("{\"optional_int32\":-42,\"optional_string\":\"Test\\u0001Message\","
+ "\"optional_msg\":{\"foo\":42},"
+ "\"optional_bool\":true,\"repeated_msg\":[{\"foo\":1},"
+ "{\"foo\":2}]}"),
+ EXPECT_SAME
+ },
+ // Test special escapes in strings.
+ {
+ TEST("{\"repeated_string\":[\"\\b\",\"\\r\",\"\\n\",\"\\f\",\"\\t\","
+ "\"\uFFFF\"]}"),
+ EXPECT_SAME
+ },
+ // Test enum symbolic names.
+ {
+ // The common case: parse and print the symbolic name.
+ TEST("{\"optional_enum\":\"A\"}"),
+ EXPECT_SAME
+ },
+ {
+ // Unknown enum value: will be printed as an integer.
+ TEST("{\"optional_enum\":42}"),
+ EXPECT_SAME
+ },
+ {
+ // Known enum value: we're happy to parse an integer but we will re-emit the
+ // symbolic name.
+ TEST("{\"optional_enum\":1}"),
+ EXPECT("{\"optional_enum\":\"B\"}")
+ },
+ // UTF-8 tests: escapes -> literal UTF8 in output.
+ {
+ // Note double escape on \uXXXX: we want the escape to be processed by the
+ // JSON parser, not by the C++ compiler!
+ TEST("{\"optional_string\":\"\\u007F\"}"),
+ EXPECT("{\"optional_string\":\"\x7F\"}")
+ },
+ {
+ TEST("{\"optional_string\":\"\\u0080\"}"),
+ EXPECT("{\"optional_string\":\"\xC2\x80\"}")
+ },
+ {
+ TEST("{\"optional_string\":\"\\u07FF\"}"),
+ EXPECT("{\"optional_string\":\"\xDF\xBF\"}")
+ },
+ {
+ TEST("{\"optional_string\":\"\\u0800\"}"),
+ EXPECT("{\"optional_string\":\"\xE0\xA0\x80\"}")
+ },
+ {
+ TEST("{\"optional_string\":\"\\uFFFF\"}"),
+ EXPECT("{\"optional_string\":\"\xEF\xBF\xBF\"}")
+ },
+ TEST_SENTINEL
+};
+
+static void AddField(upb::MessageDef* message,
+ int number,
+ const char* name,
+ upb_fieldtype_t type,
+ bool is_repeated,
+ const upb::Def* subdef = NULL) {
+ upb::reffed_ptr<upb::FieldDef> field(upb::FieldDef::New());
+ upb::Status st;
+ field->set_name(name, &st);
+ field->set_type(type);
+ field->set_label(is_repeated ? UPB_LABEL_REPEATED : UPB_LABEL_OPTIONAL);
+ field->set_number(number, &st);
+ if (subdef) {
+ field->set_subdef(subdef, &st);
+ }
+ message->AddField(field, &st);
+}
+
+static const upb::MessageDef* BuildTestMessage(
+ upb::reffed_ptr<upb::SymbolTable> symtab) {
+ upb::Status st;
+
+ // Create SubMessage.
+ upb::reffed_ptr<upb::MessageDef> submsg(upb::MessageDef::New());
+ submsg->set_full_name("SubMessage", &st);
+ AddField(submsg.get(), 1, "foo", UPB_TYPE_INT32, false);
+
+ // Create MyEnum.
+ upb::reffed_ptr<upb::EnumDef> myenum(upb::EnumDef::New());
+ myenum->set_full_name("MyEnum", &st);
+ myenum->AddValue("A", 0, &st);
+ myenum->AddValue("B", 1, &st);
+ myenum->AddValue("C", 2, &st);
+
+ // Create TestMessage.
+ upb::reffed_ptr<upb::MessageDef> md(upb::MessageDef::New());
+ md->set_full_name("TestMessage", &st);
+
+ AddField(md.get(), 1, "optional_int32", UPB_TYPE_INT32, false);
+ AddField(md.get(), 2, "optional_int64", UPB_TYPE_INT64, false);
+ AddField(md.get(), 3, "optional_uint32", UPB_TYPE_UINT32, false);
+ AddField(md.get(), 4, "optional_uint64", UPB_TYPE_UINT64, false);
+ AddField(md.get(), 5, "optional_string", UPB_TYPE_STRING, false);
+ AddField(md.get(), 6, "optional_bytes", UPB_TYPE_BYTES, false);
+ AddField(md.get(), 7, "optional_bool" , UPB_TYPE_BOOL, false);
+ AddField(md.get(), 8, "optional_msg" , UPB_TYPE_MESSAGE, false,
+ upb::upcast(submsg.get()));
+ AddField(md.get(), 9, "optional_enum", UPB_TYPE_ENUM, false,
+ upb::upcast(myenum.get()));
+
+ AddField(md.get(), 11, "repeated_int32", UPB_TYPE_INT32, true);
+ AddField(md.get(), 12, "repeated_int64", UPB_TYPE_INT64, true);
+ AddField(md.get(), 13, "repeated_uint32", UPB_TYPE_UINT32, true);
+ AddField(md.get(), 14, "repeated_uint64", UPB_TYPE_UINT64, true);
+ AddField(md.get(), 15, "repeated_string", UPB_TYPE_STRING, true);
+ AddField(md.get(), 16, "repeated_bytes", UPB_TYPE_BYTES, true);
+ AddField(md.get(), 17, "repeated_bool" , UPB_TYPE_BOOL, true);
+ AddField(md.get(), 18, "repeated_msg" , UPB_TYPE_MESSAGE, true,
+ upb::upcast(submsg.get()));
+ AddField(md.get(), 19, "optional_enum", UPB_TYPE_ENUM, true,
+ upb::upcast(myenum.get()));
+
+ // Add both to our symtab.
+ upb::Def* defs[3] = {
+ upb::upcast(submsg.ReleaseTo(&defs)),
+ upb::upcast(myenum.ReleaseTo(&defs)),
+ upb::upcast(md.ReleaseTo(&defs)),
+ };
+ symtab->Add(defs, 3, &defs, &st);
+
+ // Return TestMessage.
+ return symtab->LookupMessage("TestMessage");
+}
+
+class StringSink {
+ public:
+ StringSink() {
+ upb_byteshandler_init(&byteshandler_);
+ upb_byteshandler_setstring(&byteshandler_, &str_handler, NULL);
+ upb_bytessink_reset(&bytessink_, &byteshandler_, &s_);
+ }
+ ~StringSink() { }
+
+ upb_bytessink* Sink() { return &bytessink_; }
+
+ const std::string& Data() { return s_; }
+
+ private:
+
+ static size_t str_handler(void* _closure, const void* hd,
+ const char* data, size_t len,
+ const upb_bufhandle* handle) {
+ UPB_UNUSED(hd);
+ UPB_UNUSED(handle);
+ std::string* s = static_cast<std::string*>(_closure);
+ std::string appended(data, len);
+ s->append(data, len);
+ return len;
+ }
+
+ upb_byteshandler byteshandler_;
+ upb_bytessink bytessink_;
+ std::string s_;
+};
+
+// Starts with a message in JSON format, parses and directly serializes again,
+// and compares the result.
+void test_json_roundtrip() {
+ upb::reffed_ptr<upb::SymbolTable> symtab(upb::SymbolTable::New());
+ const upb::MessageDef* md = BuildTestMessage(symtab.get());
+ upb::reffed_ptr<const upb::Handlers> serialize_handlers(
+ upb::json::Printer::NewHandlers(md));
+
+ for (const TestCase* test_case = kTestRoundtripMessages;
+ test_case->input != NULL; test_case++) {
+
+ const char *json_src = test_case->input;
+ const char *json_expected = test_case->expected;
+ if (json_expected == EXPECT_SAME) {
+ json_expected = json_src;
+ }
+
+ upb::Status st;
+ upb::json::Parser parser(&st);
+ upb::json::Printer printer(serialize_handlers.get());
+ StringSink data_sink;
+
+ parser.ResetOutput(printer.input());
+ printer.ResetOutput(data_sink.Sink());
+
+ bool ok = upb::BufferSource::PutBuffer(json_src, strlen(json_src),
+ parser.input());
+ if (!ok) {
+ fprintf(stderr, "upb parse error: %s\n", st.error_message());
+ }
+ ASSERT(ok);
+
+ if (memcmp(json_expected,
+ data_sink.Data().data(),
+ data_sink.Data().size())) {
+ fprintf(stderr,
+ "JSON parse/serialize roundtrip result differs:\n"
+ "Original:\n%s\nParsed/Serialized:\n%s\n",
+ json_src, data_sink.Data().c_str());
+ abort();
+ }
+ }
+}
+
+extern "C" {
+int run_tests(int argc, char *argv[]) {
+ UPB_UNUSED(argc);
+ UPB_UNUSED(argv);
+ test_json_roundtrip();
+ return 0;
+}
+}
diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c
index 2bd78af..17fc0a8 100644
--- a/upb/bindings/lua/upb.c
+++ b/upb/bindings/lua/upb.c
@@ -1032,7 +1032,7 @@ static int lupb_enumdef_value(lua_State *L) {
} else if (type == LUA_TSTRING) {
const char *key = lua_tostring(L, 2);
int32_t num;
- if (upb_enumdef_ntoi(e, key, &num)) {
+ if (upb_enumdef_ntoiz(e, key, &num)) {
lua_pushinteger(L, num);
} else {
lua_pushnil(L);
diff --git a/upb/def.c b/upb/def.c
index fde2ee8..aa05618 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -457,7 +457,7 @@ bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
if (!upb_isident(name, strlen(name), false, status)) {
return false;
}
- if (upb_enumdef_ntoi(e, name, NULL)) {
+ if (upb_enumdef_ntoiz(e, name, NULL)) {
upb_status_seterrf(status, "name '%s' is already defined", name);
return false;
}
@@ -505,9 +505,10 @@ void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
-bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) {
+bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
+ size_t len, int32_t *num) {
upb_value v;
- if (!upb_strtable_lookup(&def->ntoi, name, &v)) {
+ if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
return false;
}
if (num) *num = upb_value_getint32(v);
@@ -595,7 +596,7 @@ static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
if (f->defaultval.bytes) {
// Default was explicitly set as a str; try to lookup corresponding int.
str_t *s = f->defaultval.bytes;
- if (upb_enumdef_ntoi(e, s->str, val)) {
+ if (upb_enumdef_ntoiz(e, s->str, val)) {
return true;
}
} else {
diff --git a/upb/def.h b/upb/def.h
index 2699fbf..cfa140a 100644
--- a/upb/def.h
+++ b/upb/def.h
@@ -943,7 +943,17 @@ bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s);
int upb_enumdef_numvals(const upb_enumdef *e);
bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
upb_status *status);
-bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num);
+
+// Enum lookups:
+// - ntoi: look up a name with specified length.
+// - ntoiz: look up a name provided as a null-terminated string.
+// - iton: look up an integer, returning the name as a null-terminated string.
+bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, size_t len,
+ int32_t *num);
+UPB_INLINE bool upb_enumdef_ntoiz(const upb_enumdef *e,
+ const char *name, int32_t *num) {
+ return upb_enumdef_ntoi(e, name, strlen(name), num);
+}
const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num);
// upb_enum_iter i;
@@ -1352,7 +1362,7 @@ inline bool EnumDef::AddValue(const std::string& name, int32_t num,
return upb_enumdef_addval(this, upb_safecstr(name), num, status);
}
inline bool EnumDef::FindValueByName(const char* name, int32_t *num) const {
- return upb_enumdef_ntoi(this, name, num);
+ return upb_enumdef_ntoiz(this, name, num);
}
inline const char* EnumDef::FindValueByNumber(int32_t num) const {
return upb_enumdef_iton(this, num);
diff --git a/upb/json/parser.c b/upb/json/parser.c
index 2687713..78fc6c0 100644
--- a/upb/json/parser.c
+++ b/upb/json/parser.c
@@ -288,7 +288,7 @@ badpadding:
return false;
}
-static bool end_text(upb_json_parser *p, const char *ptr) {
+static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
assert(!p->accumulated); // TODO: handle this case.
p->accumulated = p->text_begin;
p->accumulated_len = ptr - p->text_begin;
@@ -302,6 +302,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
}
p->accumulated = NULL;
+ } else if (p->top->f &&
+ upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
+ !is_num) {
+
+ // Enum case: resolve enum symbolic name to integer value.
+ const upb_enumdef *enumdef =
+ (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
+
+ int32_t int_val = 0;
+ if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
+ &int_val)) {
+ upb_selector_t sel = getsel(p);
+ upb_sink_putint32(&p->top->sink, sel, int_val);
+ } else {
+ upb_status_seterrmsg(p->status, "Enum value name unknown");
+ return false;
+ }
+ p->accumulated = NULL;
}
return true;
@@ -310,29 +328,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
static bool start_stringval(upb_json_parser *p) {
assert(p->top->f);
- if (!upb_fielddef_isstring(p->top->f)) {
+ if (upb_fielddef_isstring(p->top->f)) {
+ if (!check_stack(p)) return false;
+
+ // Start a new parser frame: parser frames correspond one-to-one with
+ // handler frames, and string events occur in a sub-frame.
+ upb_jsonparser_frame *inner = p->top + 1;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+ upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
+ inner->m = p->top->m;
+ inner->f = p->top->f;
+ p->top = inner;
+
+ return true;
+ } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
+ // Do nothing -- symbolic enum names in quotes remain in the
+ // current parser frame.
+ return true;
+ } else {
upb_status_seterrf(p->status,
- "String specified for non-string field: %s",
+ "String specified for non-string/non-enum field: %s",
upb_fielddef_name(p->top->f));
return false;
}
- if (!check_stack(p)) return false;
-
- upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow.
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
- upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
- inner->m = p->top->m;
- inner->f = p->top->f;
- p->top = inner;
-
- return true;
}
static void end_stringval(upb_json_parser *p) {
- p->top--;
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
- upb_sink_endstr(&p->top->sink, sel);
+ if (upb_fielddef_isstring(p->top->f)) {
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+ upb_sink_endstr(&p->top->sink, sel);
+ p->top--;
+ }
}
static void start_number(upb_json_parser *p, const char *ptr) {
@@ -341,7 +368,7 @@ static void start_number(upb_json_parser *p, const char *ptr) {
}
static void end_number(upb_json_parser *p, const char *ptr) {
- end_text(p, ptr);
+ end_text(p, ptr, true);
const char *myend = p->accumulated + p->accumulated_len;
char *end;
@@ -450,15 +477,15 @@ static void hex(upb_json_parser *p, const char *end) {
// emit the codepoint as UTF-8.
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
int length = 0;
- if (codepoint < 0x7F) {
+ if (codepoint <= 0x7F) {
utf8[0] = codepoint;
length = 1;
- } else if (codepoint < 0x07FF) {
+ } else if (codepoint <= 0x07FF) {
utf8[1] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[0] = (codepoint & 0x1F) | 0xC0;
length = 2;
- } else /* codepoint < 0xFFFF */ {
+ } else /* codepoint <= 0xFFFF */ {
utf8[2] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[1] = (codepoint & 0x3F) | 0x80;
@@ -478,11 +505,11 @@ static void hex(upb_json_parser *p, const char *end) {
// What follows is the Ragel parser itself. The language is specified in Ragel
// and the actions call our C functions above.
-#line 568 "upb/json/parser.rl"
+#line 595 "upb/json/parser.rl"
-#line 486 "upb/json/parser.c"
+#line 513 "upb/json/parser.c"
static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1,
4, 1, 5, 1, 6, 1, 7, 1,
@@ -635,7 +662,7 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1;
-#line 571 "upb/json/parser.rl"
+#line 598 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) {
@@ -652,7 +679,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const char *pe = buf + size;
-#line 656 "upb/json/parser.c"
+#line 683 "upb/json/parser.c"
{
int _klen;
unsigned int _trans;
@@ -727,114 +754,114 @@ _match:
switch ( *_acts++ )
{
case 0:
-#line 489 "upb/json/parser.rl"
+#line 516 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 1:
-#line 490 "upb/json/parser.rl"
+#line 517 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
break;
case 2:
-#line 494 "upb/json/parser.rl"
+#line 521 "upb/json/parser.rl"
{ start_text(parser, p); }
break;
case 3:
-#line 495 "upb/json/parser.rl"
- { CHECK_RETURN_TOP(end_text(parser, p)); }
+#line 522 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(end_text(parser, p, false)); }
break;
case 4:
-#line 501 "upb/json/parser.rl"
+#line 528 "upb/json/parser.rl"
{ start_hex(parser, p); }
break;
case 5:
-#line 502 "upb/json/parser.rl"
+#line 529 "upb/json/parser.rl"
{ hex(parser, p); }
break;
case 6:
-#line 508 "upb/json/parser.rl"
+#line 535 "upb/json/parser.rl"
{ escape(parser, p); }
break;
case 7:
-#line 511 "upb/json/parser.rl"
+#line 538 "upb/json/parser.rl"
{ {cs = stack[--top]; goto _again;} }
break;
case 8:
-#line 512 "upb/json/parser.rl"
+#line 539 "upb/json/parser.rl"
{ {stack[top++] = cs; cs = 19; goto _again;} }
break;
case 9:
-#line 514 "upb/json/parser.rl"
+#line 541 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
break;
case 10:
-#line 519 "upb/json/parser.rl"
+#line 546 "upb/json/parser.rl"
{ start_member(parser); }
break;
case 11:
-#line 520 "upb/json/parser.rl"
+#line 547 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_member(parser)); }
break;
case 12:
-#line 523 "upb/json/parser.rl"
+#line 550 "upb/json/parser.rl"
{ clear_member(parser); }
break;
case 13:
-#line 529 "upb/json/parser.rl"
+#line 556 "upb/json/parser.rl"
{ start_object(parser); }
break;
case 14:
-#line 532 "upb/json/parser.rl"
+#line 559 "upb/json/parser.rl"
{ end_object(parser); }
break;
case 15:
-#line 538 "upb/json/parser.rl"
+#line 565 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); }
break;
case 16:
-#line 542 "upb/json/parser.rl"
+#line 569 "upb/json/parser.rl"
{ end_array(parser); }
break;
case 17:
-#line 547 "upb/json/parser.rl"
+#line 574 "upb/json/parser.rl"
{ start_number(parser, p); }
break;
case 18:
-#line 548 "upb/json/parser.rl"
+#line 575 "upb/json/parser.rl"
{ end_number(parser, p); }
break;
case 19:
-#line 550 "upb/json/parser.rl"
+#line 577 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); }
break;
case 20:
-#line 551 "upb/json/parser.rl"
+#line 578 "upb/json/parser.rl"
{ end_stringval(parser); }
break;
case 21:
-#line 553 "upb/json/parser.rl"
+#line 580 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(putbool(parser, true)); }
break;
case 22:
-#line 555 "upb/json/parser.rl"
+#line 582 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(putbool(parser, false)); }
break;
case 23:
-#line 557 "upb/json/parser.rl"
+#line 584 "upb/json/parser.rl"
{ /* null value */ }
break;
case 24:
-#line 559 "upb/json/parser.rl"
+#line 586 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject(parser)); }
break;
case 25:
-#line 560 "upb/json/parser.rl"
+#line 587 "upb/json/parser.rl"
{ end_subobject(parser); }
break;
case 26:
-#line 565 "upb/json/parser.rl"
+#line 592 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
-#line 838 "upb/json/parser.c"
+#line 865 "upb/json/parser.c"
}
}
@@ -847,7 +874,7 @@ _again:
_out: {}
}
-#line 587 "upb/json/parser.rl"
+#line 614 "upb/json/parser.rl"
if (p != pe) {
upb_status_seterrf(parser->status, "Parse error at %s\n", p);
@@ -888,13 +915,13 @@ void upb_json_parser_reset(upb_json_parser *p) {
int top;
// Emit Ragel initialization of the parser.
-#line 892 "upb/json/parser.c"
+#line 919 "upb/json/parser.c"
{
cs = json_start;
top = 0;
}
-#line 627 "upb/json/parser.rl"
+#line 654 "upb/json/parser.rl"
p->current_state = cs;
p->parser_top = top;
p->text_begin = NULL;
diff --git a/upb/json/parser.rl b/upb/json/parser.rl
index 92a1566..8ceca77 100644
--- a/upb/json/parser.rl
+++ b/upb/json/parser.rl
@@ -286,7 +286,7 @@ badpadding:
return false;
}
-static bool end_text(upb_json_parser *p, const char *ptr) {
+static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
assert(!p->accumulated); // TODO: handle this case.
p->accumulated = p->text_begin;
p->accumulated_len = ptr - p->text_begin;
@@ -300,6 +300,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
}
p->accumulated = NULL;
+ } else if (p->top->f &&
+ upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
+ !is_num) {
+
+ // Enum case: resolve enum symbolic name to integer value.
+ const upb_enumdef *enumdef =
+ (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
+
+ int32_t int_val = 0;
+ if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
+ &int_val)) {
+ upb_selector_t sel = getsel(p);
+ upb_sink_putint32(&p->top->sink, sel, int_val);
+ } else {
+ upb_status_seterrmsg(p->status, "Enum value name unknown");
+ return false;
+ }
+ p->accumulated = NULL;
}
return true;
@@ -308,29 +326,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
static bool start_stringval(upb_json_parser *p) {
assert(p->top->f);
- if (!upb_fielddef_isstring(p->top->f)) {
+ if (upb_fielddef_isstring(p->top->f)) {
+ if (!check_stack(p)) return false;
+
+ // Start a new parser frame: parser frames correspond one-to-one with
+ // handler frames, and string events occur in a sub-frame.
+ upb_jsonparser_frame *inner = p->top + 1;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+ upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
+ inner->m = p->top->m;
+ inner->f = p->top->f;
+ p->top = inner;
+
+ return true;
+ } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
+ // Do nothing -- symbolic enum names in quotes remain in the
+ // current parser frame.
+ return true;
+ } else {
upb_status_seterrf(p->status,
- "String specified for non-string field: %s",
+ "String specified for non-string/non-enum field: %s",
upb_fielddef_name(p->top->f));
return false;
}
- if (!check_stack(p)) return false;
-
- upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow.
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
- upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
- inner->m = p->top->m;
- inner->f = p->top->f;
- p->top = inner;
-
- return true;
}
static void end_stringval(upb_json_parser *p) {
- p->top--;
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
- upb_sink_endstr(&p->top->sink, sel);
+ if (upb_fielddef_isstring(p->top->f)) {
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+ upb_sink_endstr(&p->top->sink, sel);
+ p->top--;
+ }
}
static void start_number(upb_json_parser *p, const char *ptr) {
@@ -339,7 +366,7 @@ static void start_number(upb_json_parser *p, const char *ptr) {
}
static void end_number(upb_json_parser *p, const char *ptr) {
- end_text(p, ptr);
+ end_text(p, ptr, true);
const char *myend = p->accumulated + p->accumulated_len;
char *end;
@@ -448,15 +475,15 @@ static void hex(upb_json_parser *p, const char *end) {
// emit the codepoint as UTF-8.
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
int length = 0;
- if (codepoint < 0x7F) {
+ if (codepoint <= 0x7F) {
utf8[0] = codepoint;
length = 1;
- } else if (codepoint < 0x07FF) {
+ } else if (codepoint <= 0x07FF) {
utf8[1] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[0] = (codepoint & 0x1F) | 0xC0;
length = 2;
- } else /* codepoint < 0xFFFF */ {
+ } else /* codepoint <= 0xFFFF */ {
utf8[2] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[1] = (codepoint & 0x3F) | 0x80;
@@ -492,7 +519,7 @@ static void hex(upb_json_parser *p, const char *end) {
text =
/[^\\"]/+
>{ start_text(parser, p); }
- %{ CHECK_RETURN_TOP(end_text(parser, p)); }
+ %{ CHECK_RETURN_TOP(end_text(parser, p, false)); }
;
unicode_char =
diff --git a/upb/json/printer.c b/upb/json/printer.c
index 44e6f83..28f3e4a 100644
--- a/upb/json/printer.c
+++ b/upb/json/printer.c
@@ -69,10 +69,10 @@ static inline char* json_nice_escape(char c) {
}
}
-// Write a properly quoted and escaped string.
+// Write a properly escaped string chunk. The surrounding quotes are *not*
+// printed; this is so that the caller has the option of emitting the string
+// content in chunks.
static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
- print_data(p, "\"", 1);
-
const char* unescaped_run = NULL;
for (unsigned int i = 0; i < len; i++) {
char c = buf[i];
@@ -112,8 +112,6 @@ static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
if (unescaped_run) {
print_data(p, unescaped_run, &buf[len] - unescaped_run);
}
-
- print_data(p, "\"", 1);
}
#define CHKLENGTH(x) if (!(x)) return -1;
@@ -158,8 +156,9 @@ static bool putkey(void *closure, const void *handler_data) {
upb_json_printer *p = closure;
const strpc *key = handler_data;
print_comma(p);
+ print_data(p, "\"", 1);
putstring(p, key->ptr, key->len);
- print_data(p, ":", 1);
+ print_data(p, "\":", 2);
return true;
}
@@ -200,6 +199,47 @@ TYPE_HANDLERS(uint64_t, fmt_uint64);
#undef TYPE_HANDLERS
+typedef struct {
+ void *keyname;
+ const upb_enumdef *enumdef;
+} EnumHandlerData;
+
+static bool scalar_enum(void *closure, const void *handler_data,
+ int32_t val) {
+ const EnumHandlerData *hd = handler_data;
+ upb_json_printer *p = closure;
+ CHK(putkey(closure, hd->keyname));
+
+ const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val);
+ if (symbolic_name) {
+ print_data(p, "\"", 1);
+ putstring(p, symbolic_name, strlen(symbolic_name));
+ print_data(p, "\"", 1);
+ } else {
+ putint32_t(closure, NULL, val);
+ }
+
+ return true;
+}
+
+static bool repeated_enum(void *closure, const void *handler_data,
+ int32_t val) {
+ const EnumHandlerData *hd = handler_data;
+ upb_json_printer *p = closure;
+ print_comma(p);
+
+ const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val);
+ if (symbolic_name) {
+ print_data(p, "\"", 1);
+ putstring(p, symbolic_name, strlen(symbolic_name));
+ print_data(p, "\"", 1);
+ } else {
+ putint32_t(closure, NULL, val);
+ }
+
+ return true;
+}
+
static void *scalar_startsubmsg(void *closure, const void *handler_data) {
return putkey(closure, handler_data) ? closure : UPB_BREAK;
}
@@ -310,27 +350,60 @@ static size_t putbytes(void *closure, const void *handler_data, const char *str,
}
size_t bytes = to - data;
+ print_data(p, "\"", 1);
putstring(p, data, bytes);
+ print_data(p, "\"", 1);
return len;
}
+static void *scalar_startstr(void *closure, const void *handler_data,
+ size_t size_hint) {
+ UPB_UNUSED(handler_data);
+ UPB_UNUSED(size_hint);
+ upb_json_printer *p = closure;
+ CHK(putkey(closure, handler_data));
+ print_data(p, "\"", 1);
+ return p;
+}
+
static size_t scalar_str(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
- CHK(putkey(closure, handler_data));
CHK(putstr(closure, handler_data, str, len, handle));
return len;
}
+static bool scalar_endstr(void *closure, const void *handler_data) {
+ UPB_UNUSED(handler_data);
+ upb_json_printer *p = closure;
+ print_data(p, "\"", 1);
+ return true;
+}
+
+static void *repeated_startstr(void *closure, const void *handler_data,
+ size_t size_hint) {
+ UPB_UNUSED(handler_data);
+ UPB_UNUSED(size_hint);
+ upb_json_printer *p = closure;
+ print_comma(p);
+ print_data(p, "\"", 1);
+ return p;
+}
+
static size_t repeated_str(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
- upb_json_printer *p = closure;
- print_comma(p);
CHK(putstr(closure, handler_data, str, len, handle));
return len;
}
+static bool repeated_endstr(void *closure, const void *handler_data) {
+ UPB_UNUSED(handler_data);
+ upb_json_printer *p = closure;
+ print_data(p, "\"", 1);
+ return true;
+}
+
static size_t scalar_bytes(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
@@ -381,21 +454,44 @@ void sethandlers(const void *closure, upb_handlers *h) {
TYPE(UPB_TYPE_FLOAT, float, float);
TYPE(UPB_TYPE_DOUBLE, double, double);
TYPE(UPB_TYPE_BOOL, bool, bool);
- TYPE(UPB_TYPE_ENUM, int32, int32_t);
TYPE(UPB_TYPE_INT32, int32, int32_t);
TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
TYPE(UPB_TYPE_INT64, int64, int64_t);
TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
+ case UPB_TYPE_ENUM: {
+ // For now, we always emit symbolic names for enums. We may want an
+ // option later to control this behavior, but we will wait for a real
+ // need first.
+ EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
+ hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
+ hd->keyname = newstrpc(h, f);
+ upb_handlers_addcleanup(h, hd, free);
+ upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&enum_attr, hd);
+
+ if (upb_fielddef_isseq(f)) {
+ upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
+ } else {
+ upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
+ }
+
+ upb_handlerattr_uninit(&enum_attr);
+ break;
+ }
case UPB_TYPE_STRING:
- // XXX: this doesn't support strings that span buffers yet.
if (upb_fielddef_isseq(f)) {
+ upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
upb_handlers_setstring(h, f, repeated_str, &empty_attr);
+ upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
} else {
- upb_handlers_setstring(h, f, scalar_str, &name_attr);
+ upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
+ upb_handlers_setstring(h, f, scalar_str, &empty_attr);
+ upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
}
break;
case UPB_TYPE_BYTES:
- // XXX: this doesn't support strings that span buffers yet.
+ // XXX: this doesn't support strings that span buffers yet. The base64
+ // encoder will need to be made resumable for this to work properly.
if (upb_fielddef_isseq(f)) {
upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
} else {
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback