From f9afc3e55bbc289df41606d493377318c6645817 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Wed, 13 Jan 2016 18:04:48 -0800 Subject: Changed JSON parser/printer to correctly camelCase names. --- upb/def.c | 30 +++++++ upb/def.h | 28 ++++++ upb/descriptor/descriptor.pb | Bin 5012 -> 5012 bytes upb/json/parser.c | 206 +++++++++++++++++++++++++++++++++---------- upb/json/parser.h | 61 +++++++++++-- upb/json/parser.rl | 134 +++++++++++++++++++++++++--- upb/json/printer.c | 16 +++- 7 files changed, 405 insertions(+), 70 deletions(-) (limited to 'upb') diff --git a/upb/def.c b/upb/def.c index 0c694f8..a1dc192 100644 --- a/upb/def.c +++ b/upb/def.c @@ -1,6 +1,7 @@ #include "upb/def.h" +#include #include #include #include "upb/structdefs.int.h" @@ -721,6 +722,35 @@ const char *upb_fielddef_name(const upb_fielddef *f) { return upb_def_fullname(upb_fielddef_upcast(f)); } +bool upb_fielddef_getjsonname(const upb_fielddef *f, char *buf) { + const char *name = upb_fielddef_name(f); + size_t i, j; + bool ucase_next = false; + + if (!name) return false; + + /* Implement the transformation as described in the spec: + * 1. upper case all letters after an underscore. + * 2. remove all underscores. + */ + for (i = 0, j = 0; name[i]; i++) { + if (name[i] == '_') { + ucase_next = true; + continue; + } + + if (ucase_next) { + buf[j++] = toupper(name[i]); + ucase_next = false; + } else { + buf[j++] = name[i]; + } + } + + buf[j] = '\0'; + return true; +} + const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { return f->msg_is_symbolic ? NULL : f->msg.def; } diff --git a/upb/def.h b/upb/def.h index 2c29cb5..d19ab18 100644 --- a/upb/def.h +++ b/upb/def.h @@ -307,6 +307,12 @@ class upb::FieldDef { uint32_t number() const; /* Returns 0 if uninitialized. */ bool is_extension() const; + /* Get the JSON name for this field. This will copy the JSON name into the + * given buffer, which must have size of at least "strlen(name()) + 1". + * The string will be NULL-terminated. Returns false if uninitialized. + */ + bool GetJsonName(char* buf) const; + /* For UPB_TYPE_MESSAGE fields only where is_tag_delimited() == false, * indicates whether this field should have lazy parsing handlers that yield * the unparsed string for the submessage. @@ -472,6 +478,16 @@ class upb::FieldDef { bool set_name(const char* name, upb::Status* s); bool set_name(const std::string& name, upb::Status* s); + /* Sets the JSON name to the given string. */ + /* TODO(haberman): implement. Right now only default json_name (camelCase) + * is supported. */ + bool set_json_name(const char* json_name, upb::Status* s); + bool set_json_name(const std::string& name, upb::Status* s); + + /* Clears the JSON name. This will make it revert to its default, which is + * a camelCased version of the regular field name. */ + void clear_json_name(); + void set_integer_format(IntegerFormat format); bool set_tag_delimited(bool tag_delimited, upb::Status* s); @@ -536,6 +552,7 @@ const char *upb_fielddef_name(const upb_fielddef *f); bool upb_fielddef_isextension(const upb_fielddef *f); bool upb_fielddef_lazy(const upb_fielddef *f); bool upb_fielddef_packed(const upb_fielddef *f); +bool upb_fielddef_getjsonname(const upb_fielddef *f, char *buf); const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f); const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f); upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f); @@ -570,6 +587,8 @@ void upb_fielddef_setdescriptortype(upb_fielddef *f, int type); void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label); bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s); bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s); +bool upb_fielddef_setjsonname(upb_fielddef *f, const char *name, upb_status *s); +bool upb_fielddef_clearjsonname(upb_fielddef *f); bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name, upb_status *s); void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension); @@ -1346,6 +1365,15 @@ inline bool FieldDef::set_name(const char *name, Status* s) { inline bool FieldDef::set_name(const std::string& name, Status* s) { return upb_fielddef_setname(this, upb_safecstr(name), s); } +inline bool FieldDef::set_json_name(const char *name, Status* s) { + return upb_fielddef_setjsonname(this, name, s); +} +inline bool FieldDef::set_json_name(const std::string& name, Status* s) { + return upb_fielddef_setjsonname(this, upb_safecstr(name), s); +} +inline void FieldDef::clear_json_name() { + upb_fielddef_clearjsonname(this); +} inline bool FieldDef::set_containing_type_name(const char *name, Status* s) { return upb_fielddef_setcontainingtypename(this, name, s); } diff --git a/upb/descriptor/descriptor.pb b/upb/descriptor/descriptor.pb index d79c9cf..3b07163 100644 Binary files a/upb/descriptor/descriptor.pb and b/upb/descriptor/descriptor.pb differ diff --git a/upb/json/parser.c b/upb/json/parser.c index f9771f9..8a50b3a 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -40,6 +40,9 @@ typedef struct { const upb_msgdef *m; const upb_fielddef *f; + /* The table mapping json name to fielddef for this message. */ + upb_strtable *name_table; + /* We are in a repeated-field context, ready to emit mapentries as * submessages. This flag alters the start-of-object (open-brace) behavior to * begin a sequence of mapentry messages rather than a single submessage. */ @@ -60,7 +63,7 @@ typedef struct { struct upb_json_parser { upb_env *env; - upb_byteshandler input_handler_; + const upb_json_parsermethod *method; upb_bytessink input_; /* Stack to track the JSON scopes we are in. */ @@ -95,6 +98,19 @@ struct upb_json_parser { uint32_t digit; }; +struct upb_json_parsermethod { + upb_refcounted base; + + upb_byteshandler input_handler_; + + /* Mainly for the purposes of refcounting, so all the fielddefs we point + * to stay alive. */ + const upb_msgdef *msg; + + /* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */ + upb_inttable name_tables; +}; + #define PARSER_CHECK_RETURN(x) if (!(x)) return false /* Used to signal that a capture has been suspended. */ @@ -123,6 +139,13 @@ static bool check_stack(upb_json_parser *p) { return true; } +static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) { + upb_value v; + bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v); + UPB_ASSERT_VAR(ok, ok); + frame->name_table = upb_value_getptr(v); +} + /* There are GCC/Clang built-ins for overflow checking which we could start * using if there was any performance benefit to it. */ @@ -719,6 +742,7 @@ static bool start_stringval(upb_json_parser *p) { upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->name_table = NULL; inner->is_map = false; inner->is_mapentry = false; p->top = inner; @@ -905,6 +929,7 @@ static bool handle_mapentry(upb_json_parser *p) { sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); inner->m = mapentrymsg; + inner->name_table = NULL; inner->mapfield = mapfield; inner->is_map = false; @@ -941,20 +966,20 @@ static bool end_membername(upb_json_parser *p) { } else { size_t len; const char *buf = accumulate_getptr(p, &len); - const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + upb_value v; - if (!f) { + if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) { + p->top->f = upb_value_getconstptr(v); + multipart_end(p); + + return true; + } else { /* TODO(haberman): Ignore unknown fields if requested/configured to do * so. */ upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf); upb_env_reporterror(p->env, &p->status); return false; } - - p->top->f = f; - multipart_end(p); - - return true; } } @@ -996,6 +1021,7 @@ static bool start_subobject(upb_json_parser *p) { sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->name_table = NULL; inner->mapfield = p->top->f; inner->f = NULL; inner->is_map = true; @@ -1016,6 +1042,7 @@ static bool start_subobject(upb_json_parser *p) { sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); inner->m = upb_fielddef_msgsubdef(p->top->f); + set_name_table(p, inner); inner->f = NULL; inner->is_map = false; inner->is_mapentry = false; @@ -1065,6 +1092,7 @@ static bool start_array(upb_json_parser *p) { sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = p->top->m; + inner->name_table = NULL; inner->f = p->top->f; inner->is_map = false; inner->is_mapentry = false; @@ -1122,11 +1150,11 @@ static void end_object(upb_json_parser *p) { * final state once, when the closing '"' is seen. */ -#line 1218 "upb/json/parser.rl" +#line 1246 "upb/json/parser.rl" -#line 1130 "upb/json/parser.c" +#line 1158 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 5, 1, 6, 1, 7, 1, 8, 1, @@ -1275,7 +1303,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 1221 "upb/json/parser.rl" +#line 1249 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -1297,7 +1325,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 1301 "upb/json/parser.c" +#line 1329 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -1372,118 +1400,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 1133 "upb/json/parser.rl" +#line 1161 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 1134 "upb/json/parser.rl" +#line 1162 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 1138 "upb/json/parser.rl" +#line 1166 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 1139 "upb/json/parser.rl" +#line 1167 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 1145 "upb/json/parser.rl" +#line 1173 "upb/json/parser.rl" { start_hex(parser); } break; case 5: -#line 1146 "upb/json/parser.rl" +#line 1174 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 6: -#line 1147 "upb/json/parser.rl" +#line 1175 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 1153 "upb/json/parser.rl" +#line 1181 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 1159 "upb/json/parser.rl" +#line 1187 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 1162 "upb/json/parser.rl" +#line 1190 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 1164 "upb/json/parser.rl" +#line 1192 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 1169 "upb/json/parser.rl" +#line 1197 "upb/json/parser.rl" { start_member(parser); } break; case 12: -#line 1170 "upb/json/parser.rl" +#line 1198 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_membername(parser)); } break; case 13: -#line 1173 "upb/json/parser.rl" +#line 1201 "upb/json/parser.rl" { end_member(parser); } break; case 14: -#line 1179 "upb/json/parser.rl" +#line 1207 "upb/json/parser.rl" { start_object(parser); } break; case 15: -#line 1182 "upb/json/parser.rl" +#line 1210 "upb/json/parser.rl" { end_object(parser); } break; case 16: -#line 1188 "upb/json/parser.rl" +#line 1216 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 1192 "upb/json/parser.rl" +#line 1220 "upb/json/parser.rl" { end_array(parser); } break; case 18: -#line 1197 "upb/json/parser.rl" +#line 1225 "upb/json/parser.rl" { start_number(parser, p); } break; case 19: -#line 1198 "upb/json/parser.rl" +#line 1226 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 1200 "upb/json/parser.rl" +#line 1228 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 1201 "upb/json/parser.rl" +#line 1229 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 1203 "upb/json/parser.rl" +#line 1231 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 1205 "upb/json/parser.rl" +#line 1233 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 1207 "upb/json/parser.rl" +#line 1235 "upb/json/parser.rl" { /* null value */ } break; case 25: -#line 1209 "upb/json/parser.rl" +#line 1237 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 1210 "upb/json/parser.rl" +#line 1238 "upb/json/parser.rl" { end_subobject(parser); } break; case 27: -#line 1215 "upb/json/parser.rl" +#line 1243 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1487 "upb/json/parser.c" +#line 1515 "upb/json/parser.c" } } @@ -1496,7 +1524,7 @@ _again: _out: {} } -#line 1242 "upb/json/parser.rl" +#line 1270 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(&parser->status, "Parse error at %s\n", p); @@ -1537,13 +1565,13 @@ static void json_parser_reset(upb_json_parser *p) { /* Emit Ragel initialization of the parser. */ -#line 1541 "upb/json/parser.c" +#line 1569 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 1282 "upb/json/parser.rl" +#line 1310 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); @@ -1553,10 +1581,65 @@ static void json_parser_reset(upb_json_parser *p) { upb_status_clear(&p->status); } +static void visit_json_parsermethod(const upb_refcounted *r, + upb_refcounted_visit *visit, + void *closure) { + const upb_json_parsermethod *method = (upb_json_parsermethod*)r; + visit(r, upb_msgdef_upcast2(method->msg), closure); +} + +static void free_json_parsermethod(upb_refcounted *r) { + upb_json_parsermethod *method = (upb_json_parsermethod*)r; + + upb_inttable_iter i; + upb_inttable_begin(&i, &method->name_tables); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_value val = upb_inttable_iter_value(&i); + upb_strtable *t = upb_value_getptr(val); + upb_strtable_uninit(t); + free(t); + } + + upb_inttable_uninit(&method->name_tables); + + free(r); +} + +static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { + upb_msg_field_iter i; + upb_strtable *t; + + if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) { + return; + } + + /* TODO(haberman): handle malloc failure. */ + t = malloc(sizeof(*t)); + upb_strtable_init(t, UPB_CTYPE_CONSTPTR); + upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t)); + + for(upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + /* It would be nice to stack-allocate this, but protobufs do not limit the + * length of fields to any reasonable limit. */ + char *buf = malloc(strlen(upb_fielddef_name(f)) + 1); + upb_fielddef_getjsonname(f, buf); + upb_strtable_insert(t, buf, upb_value_constptr(f)); + free(buf); + + if (upb_fielddef_issubmsg(f)) { + add_jsonname_table(m, upb_fielddef_msgsubdef(f)); + } + } +} /* Public API *****************************************************************/ -upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { +upb_json_parser *upb_json_parser_create(upb_env *env, + const upb_json_parsermethod *method, + upb_sink *output) { #ifndef NDEBUG const size_t size_before = upb_env_bytesallocated(env); #endif @@ -1564,17 +1647,16 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { if (!p) return false; p->env = env; + p->method = method; p->limit = p->stack + UPB_JSON_MAX_DEPTH; p->accumulate_buf = NULL; p->accumulate_buf_size = 0; - upb_byteshandler_init(&p->input_handler_); - upb_byteshandler_setstring(&p->input_handler_, parse, NULL); - upb_byteshandler_setendstr(&p->input_handler_, end, NULL); - upb_bytessink_reset(&p->input_, &p->input_handler_, p); + upb_bytessink_reset(&p->input_, &method->input_handler_, p); json_parser_reset(p); upb_sink_reset(&p->top->sink, output->handlers, output->closure); p->top->m = upb_handlers_msgdef(output->handlers); + set_name_table(p, p->top); /* If this fails, uncomment and increase the value in parser.h. */ /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */ @@ -1585,3 +1667,29 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { upb_bytessink *upb_json_parser_input(upb_json_parser *p) { return &p->input_; } + +upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md, + const void* owner) { + static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod, + free_json_parsermethod}; + upb_json_parsermethod *ret = malloc(sizeof(*ret)); + upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner); + + ret->msg = md; + upb_ref2(md, ret); + + upb_byteshandler_init(&ret->input_handler_); + upb_byteshandler_setstring(&ret->input_handler_, parse, ret); + upb_byteshandler_setendstr(&ret->input_handler_, end, ret); + + upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR); + + add_jsonname_table(ret, md); + + return ret; +} + +const upb_byteshandler *upb_json_parsermethod_inputhandler( + const upb_json_parsermethod *m) { + return &m->input_handler_; +} diff --git a/upb/json/parser.h b/upb/json/parser.h index e8f34fe..1d67dc4 100644 --- a/upb/json/parser.h +++ b/upb/json/parser.h @@ -15,11 +15,14 @@ namespace upb { namespace json { class Parser; +class ParserMethod; } /* namespace json */ } /* namespace upb */ #endif UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser) +UPB_DECLARE_DERIVED_TYPE(upb::json::ParserMethod, upb::RefCounted, + upb_json_parsermethod, upb_refcounted) /* upb::json::Parser **********************************************************/ @@ -27,7 +30,7 @@ UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser) * constructed. This hint may be an overestimate for some build configurations. * But if the parser library is upgraded without recompiling the application, * it may be an underestimate. */ -#define UPB_JSON_PARSER_SIZE 3704 +#define UPB_JSON_PARSER_SIZE 4104 #ifdef __cplusplus @@ -35,7 +38,8 @@ UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser) * sink. */ class upb::json::Parser { public: - static Parser* Create(Environment* env, Sink* output); + static Parser* Create(Environment* env, const ParserMethod* method, + Sink* output); BytesSink* input(); @@ -43,25 +47,72 @@ class upb::json::Parser { UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser) }; +class upb::json::ParserMethod { + public: + /* Include base methods from upb::ReferenceCounted. */ + UPB_REFCOUNTED_CPPMETHODS + + /* Returns handlers for parsing according to the specified schema. */ + static reffed_ptr New(const upb::MessageDef* md); + + /* The destination handlers that are statically bound to this method. + * This method is only capable of outputting to a sink that uses these + * handlers. */ + const Handlers* dest_handlers() const; + + /* The input handlers for this decoder method. */ + const BytesHandler* input_handler() const; + + private: + UPB_DISALLOW_POD_OPS(ParserMethod, upb::json::ParserMethod) +}; + #endif UPB_BEGIN_EXTERN_C -upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output); +upb_json_parser* upb_json_parser_create(upb_env* e, + const upb_json_parsermethod* m, + upb_sink* output); upb_bytessink *upb_json_parser_input(upb_json_parser *p); +upb_json_parsermethod* upb_json_parsermethod_new(const upb_msgdef* md, + const void* owner); +const upb_handlers *upb_json_parsermethod_desthandlers( + const upb_json_parsermethod *m); +const upb_byteshandler *upb_json_parsermethod_inputhandler( + const upb_json_parsermethod *m); + +/* Include refcounted methods like upb_json_parsermethod_ref(). */ +UPB_REFCOUNTED_CMETHODS(upb_json_parsermethod, upb_json_parsermethod_upcast) + UPB_END_EXTERN_C #ifdef __cplusplus namespace upb { namespace json { -inline Parser* Parser::Create(Environment* env, Sink* output) { - return upb_json_parser_create(env, output); +inline Parser* Parser::Create(Environment* env, const ParserMethod* method, + Sink* output) { + return upb_json_parser_create(env, method, output); } inline BytesSink* Parser::input() { return upb_json_parser_input(this); } + +inline const Handlers* ParserMethod::dest_handlers() const { + return upb_json_parsermethod_desthandlers(this); +} +inline const BytesHandler* ParserMethod::input_handler() const { + return upb_json_parsermethod_inputhandler(this); +} +/* static */ +inline reffed_ptr ParserMethod::New( + const MessageDef* md) { + const upb_json_parsermethod *m = upb_json_parsermethod_new(md, &m); + return reffed_ptr(m, &m); +} + } /* namespace json */ } /* namespace upb */ diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 40b7724..b85fd21 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -38,6 +38,9 @@ typedef struct { const upb_msgdef *m; const upb_fielddef *f; + /* The table mapping json name to fielddef for this message. */ + upb_strtable *name_table; + /* We are in a repeated-field context, ready to emit mapentries as * submessages. This flag alters the start-of-object (open-brace) behavior to * begin a sequence of mapentry messages rather than a single submessage. */ @@ -58,7 +61,7 @@ typedef struct { struct upb_json_parser { upb_env *env; - upb_byteshandler input_handler_; + const upb_json_parsermethod *method; upb_bytessink input_; /* Stack to track the JSON scopes we are in. */ @@ -93,6 +96,19 @@ struct upb_json_parser { uint32_t digit; }; +struct upb_json_parsermethod { + upb_refcounted base; + + upb_byteshandler input_handler_; + + /* Mainly for the purposes of refcounting, so all the fielddefs we point + * to stay alive. */ + const upb_msgdef *msg; + + /* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */ + upb_inttable name_tables; +}; + #define PARSER_CHECK_RETURN(x) if (!(x)) return false /* Used to signal that a capture has been suspended. */ @@ -121,6 +137,13 @@ static bool check_stack(upb_json_parser *p) { return true; } +static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) { + upb_value v; + bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v); + UPB_ASSERT_VAR(ok, ok); + frame->name_table = upb_value_getptr(v); +} + /* There are GCC/Clang built-ins for overflow checking which we could start * using if there was any performance benefit to it. */ @@ -717,6 +740,7 @@ static bool start_stringval(upb_json_parser *p) { upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->name_table = NULL; inner->is_map = false; inner->is_mapentry = false; p->top = inner; @@ -903,6 +927,7 @@ static bool handle_mapentry(upb_json_parser *p) { sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); inner->m = mapentrymsg; + inner->name_table = NULL; inner->mapfield = mapfield; inner->is_map = false; @@ -939,20 +964,20 @@ static bool end_membername(upb_json_parser *p) { } else { size_t len; const char *buf = accumulate_getptr(p, &len); - const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + upb_value v; - if (!f) { + if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) { + p->top->f = upb_value_getconstptr(v); + multipart_end(p); + + return true; + } else { /* TODO(haberman): Ignore unknown fields if requested/configured to do * so. */ upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf); upb_env_reporterror(p->env, &p->status); return false; } - - p->top->f = f; - multipart_end(p); - - return true; } } @@ -994,6 +1019,7 @@ static bool start_subobject(upb_json_parser *p) { sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->name_table = NULL; inner->mapfield = p->top->f; inner->f = NULL; inner->is_map = true; @@ -1014,6 +1040,7 @@ static bool start_subobject(upb_json_parser *p) { sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); inner->m = upb_fielddef_msgsubdef(p->top->f); + set_name_table(p, inner); inner->f = NULL; inner->is_map = false; inner->is_mapentry = false; @@ -1063,6 +1090,7 @@ static bool start_array(upb_json_parser *p) { sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = p->top->m; + inner->name_table = NULL; inner->f = p->top->f; inner->is_map = false; inner->is_mapentry = false; @@ -1288,10 +1316,65 @@ static void json_parser_reset(upb_json_parser *p) { upb_status_clear(&p->status); } +static void visit_json_parsermethod(const upb_refcounted *r, + upb_refcounted_visit *visit, + void *closure) { + const upb_json_parsermethod *method = (upb_json_parsermethod*)r; + visit(r, upb_msgdef_upcast2(method->msg), closure); +} + +static void free_json_parsermethod(upb_refcounted *r) { + upb_json_parsermethod *method = (upb_json_parsermethod*)r; + + upb_inttable_iter i; + upb_inttable_begin(&i, &method->name_tables); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_value val = upb_inttable_iter_value(&i); + upb_strtable *t = upb_value_getptr(val); + upb_strtable_uninit(t); + free(t); + } + + upb_inttable_uninit(&method->name_tables); + + free(r); +} + +static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { + upb_msg_field_iter i; + upb_strtable *t; + + if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) { + return; + } + + /* TODO(haberman): handle malloc failure. */ + t = malloc(sizeof(*t)); + upb_strtable_init(t, UPB_CTYPE_CONSTPTR); + upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t)); + + for(upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + /* It would be nice to stack-allocate this, but protobufs do not limit the + * length of fields to any reasonable limit. */ + char *buf = malloc(strlen(upb_fielddef_name(f)) + 1); + upb_fielddef_getjsonname(f, buf); + upb_strtable_insert(t, buf, upb_value_constptr(f)); + free(buf); + + if (upb_fielddef_issubmsg(f)) { + add_jsonname_table(m, upb_fielddef_msgsubdef(f)); + } + } +} /* Public API *****************************************************************/ -upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { +upb_json_parser *upb_json_parser_create(upb_env *env, + const upb_json_parsermethod *method, + upb_sink *output) { #ifndef NDEBUG const size_t size_before = upb_env_bytesallocated(env); #endif @@ -1299,17 +1382,16 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { if (!p) return false; p->env = env; + p->method = method; p->limit = p->stack + UPB_JSON_MAX_DEPTH; p->accumulate_buf = NULL; p->accumulate_buf_size = 0; - upb_byteshandler_init(&p->input_handler_); - upb_byteshandler_setstring(&p->input_handler_, parse, NULL); - upb_byteshandler_setendstr(&p->input_handler_, end, NULL); - upb_bytessink_reset(&p->input_, &p->input_handler_, p); + upb_bytessink_reset(&p->input_, &method->input_handler_, p); json_parser_reset(p); upb_sink_reset(&p->top->sink, output->handlers, output->closure); p->top->m = upb_handlers_msgdef(output->handlers); + set_name_table(p, p->top); /* If this fails, uncomment and increase the value in parser.h. */ /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */ @@ -1320,3 +1402,29 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { upb_bytessink *upb_json_parser_input(upb_json_parser *p) { return &p->input_; } + +upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md, + const void* owner) { + static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod, + free_json_parsermethod}; + upb_json_parsermethod *ret = malloc(sizeof(*ret)); + upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner); + + ret->msg = md; + upb_ref2(md, ret); + + upb_byteshandler_init(&ret->input_handler_); + upb_byteshandler_setstring(&ret->input_handler_, parse, ret); + upb_byteshandler_setendstr(&ret->input_handler_, end, ret); + + upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR); + + add_jsonname_table(ret, md); + + return ret; +} + +const upb_byteshandler *upb_json_parsermethod_inputhandler( + const upb_json_parsermethod *m) { + return &m->input_handler_; +} diff --git a/upb/json/printer.c b/upb/json/printer.c index 4a6f1ad..5d54abf 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -33,15 +33,25 @@ struct upb_json_printer { /* StringPiece; a pointer plus a length. */ typedef struct { - const char *ptr; + char *ptr; size_t len; } strpc; +void freestrpc(void *ptr) { + strpc *pc = ptr; + free(pc->ptr); + free(pc); +} + +/* Convert fielddef name to JSON name and return as a string piece. */ strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) { + /* TODO(haberman): handle malloc failure. */ strpc *ret = malloc(sizeof(*ret)); - ret->ptr = upb_fielddef_name(f); + ret->ptr = malloc(strlen(upb_fielddef_name(f)) + 1); + upb_fielddef_getjsonname(f, ret->ptr); ret->len = strlen(ret->ptr); - upb_handlers_addcleanup(h, ret, free); + + upb_handlers_addcleanup(h, ret, freestrpc); return ret; } -- cgit v1.2.3 From 458da2563fa674bbcfd32ade2254b3915f751491 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Tue, 16 Feb 2016 23:13:53 -0800 Subject: Addressed code review comments. --- upb/def.c | 28 +++++++++++++++++++--------- upb/def.h | 28 +++++++++++++++++++++++----- upb/json/parser.c | 18 +++++++++++++----- upb/json/parser.rl | 18 +++++++++++++----- upb/json/printer.c | 7 ++++--- 5 files changed, 72 insertions(+), 27 deletions(-) (limited to 'upb') diff --git a/upb/def.c b/upb/def.c index a1dc192..abb5b05 100644 --- a/upb/def.c +++ b/upb/def.c @@ -722,33 +722,43 @@ const char *upb_fielddef_name(const upb_fielddef *f) { return upb_def_fullname(upb_fielddef_upcast(f)); } -bool upb_fielddef_getjsonname(const upb_fielddef *f, char *buf) { +size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) { const char *name = upb_fielddef_name(f); - size_t i, j; + size_t src, dst = 0; bool ucase_next = false; - if (!name) return false; +#define WRITE(byte) \ + ++dst; \ + if (dst < len) buf[dst - 1] = byte; \ + else if (dst == len) buf[dst - 1] = '\0' + + if (!name) { + WRITE('\0'); + return 0; + } /* Implement the transformation as described in the spec: * 1. upper case all letters after an underscore. * 2. remove all underscores. */ - for (i = 0, j = 0; name[i]; i++) { - if (name[i] == '_') { + for (src = 0; name[src]; src++) { + if (name[src] == '_') { ucase_next = true; continue; } if (ucase_next) { - buf[j++] = toupper(name[i]); + WRITE(toupper(name[src])); ucase_next = false; } else { - buf[j++] = name[i]; + WRITE(name[src]); } } - buf[j] = '\0'; - return true; + WRITE('\0'); + return dst; + +#undef WRITE } const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { diff --git a/upb/def.h b/upb/def.h index d19ab18..8365574 100644 --- a/upb/def.h +++ b/upb/def.h @@ -307,11 +307,26 @@ class upb::FieldDef { uint32_t number() const; /* Returns 0 if uninitialized. */ bool is_extension() const; - /* Get the JSON name for this field. This will copy the JSON name into the - * given buffer, which must have size of at least "strlen(name()) + 1". - * The string will be NULL-terminated. Returns false if uninitialized. + /* Copies the JSON name for this field into the given buffer. Returns the + * actual size of the JSON name, including the NULL terminator. If the + * return value is 0, the JSON name is unset. If the return value is + * greater than len, the JSON name was truncated. The buffer is always + * NULL-terminated if len > 0. + * + * The JSON name always defaults to a camelCased version of the regular + * name. However if the regular name is unset, the JSON name will be unset + * also. */ - bool GetJsonName(char* buf) const; + size_t GetJsonName(char* buf, size_t len) const; + + /* Convenience version of the above function which copies the JSON name + * into the given string, returning false if the name is not set. */ + template + bool GetJsonName(T* str) { + str->resize(GetJsonName(NULL, 0)); + GetJsonName(&(*str)[0], str->size()); + return str->size() > 0; + } /* For UPB_TYPE_MESSAGE fields only where is_tag_delimited() == false, * indicates whether this field should have lazy parsing handlers that yield @@ -552,7 +567,7 @@ const char *upb_fielddef_name(const upb_fielddef *f); bool upb_fielddef_isextension(const upb_fielddef *f); bool upb_fielddef_lazy(const upb_fielddef *f); bool upb_fielddef_packed(const upb_fielddef *f); -bool upb_fielddef_getjsonname(const upb_fielddef *f, char *buf); +size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len); const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f); const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f); upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f); @@ -1335,6 +1350,9 @@ inline const char* FieldDef::name() const { return upb_fielddef_name(this); } inline bool FieldDef::is_extension() const { return upb_fielddef_isextension(this); } +inline size_t FieldDef::GetJsonName(char* buf, size_t len) const { + return upb_fielddef_getjsonname(this, buf, len); +} inline bool FieldDef::lazy() const { return upb_fielddef_lazy(this); } diff --git a/upb/json/parser.c b/upb/json/parser.c index 8a50b3a..3731478 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -1609,6 +1609,11 @@ static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { upb_msg_field_iter i; upb_strtable *t; + /* It would be nice to stack-allocate this, but protobufs do not limit the + * length of fields to any reasonable limit. */ + char *buf = NULL; + size_t len = 0; + if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) { return; } @@ -1622,17 +1627,20 @@ static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { !upb_msg_field_done(&i); upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - /* It would be nice to stack-allocate this, but protobufs do not limit the - * length of fields to any reasonable limit. */ - char *buf = malloc(strlen(upb_fielddef_name(f)) + 1); - upb_fielddef_getjsonname(f, buf); + size_t field_len = upb_fielddef_getjsonname(f, buf, len); + if (field_len > len) { + buf = realloc(buf, field_len); + len = field_len; + upb_fielddef_getjsonname(f, buf, len); + } upb_strtable_insert(t, buf, upb_value_constptr(f)); - free(buf); if (upb_fielddef_issubmsg(f)) { add_jsonname_table(m, upb_fielddef_msgsubdef(f)); } } + + free(buf); } /* Public API *****************************************************************/ diff --git a/upb/json/parser.rl b/upb/json/parser.rl index b85fd21..4ccca6c 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -1344,6 +1344,11 @@ static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { upb_msg_field_iter i; upb_strtable *t; + /* It would be nice to stack-allocate this, but protobufs do not limit the + * length of fields to any reasonable limit. */ + char *buf = NULL; + size_t len = 0; + if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) { return; } @@ -1357,17 +1362,20 @@ static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { !upb_msg_field_done(&i); upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - /* It would be nice to stack-allocate this, but protobufs do not limit the - * length of fields to any reasonable limit. */ - char *buf = malloc(strlen(upb_fielddef_name(f)) + 1); - upb_fielddef_getjsonname(f, buf); + size_t field_len = upb_fielddef_getjsonname(f, buf, len); + if (field_len > len) { + buf = realloc(buf, field_len); + len = field_len; + upb_fielddef_getjsonname(f, buf, len); + } upb_strtable_insert(t, buf, upb_value_constptr(f)); - free(buf); if (upb_fielddef_issubmsg(f)) { add_jsonname_table(m, upb_fielddef_msgsubdef(f)); } } + + free(buf); } /* Public API *****************************************************************/ diff --git a/upb/json/printer.c b/upb/json/printer.c index 5d54abf..c3d9bb4 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -47,9 +47,10 @@ void freestrpc(void *ptr) { strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) { /* TODO(haberman): handle malloc failure. */ strpc *ret = malloc(sizeof(*ret)); - ret->ptr = malloc(strlen(upb_fielddef_name(f)) + 1); - upb_fielddef_getjsonname(f, ret->ptr); - ret->len = strlen(ret->ptr); + ret->len = upb_fielddef_getjsonname(f, NULL, 0); + ret->ptr = malloc(ret->len); + upb_fielddef_getjsonname(f, ret->ptr, ret->len); + ret->len--; /* NULL */ upb_handlers_addcleanup(h, ret, freestrpc); return ret; -- cgit v1.2.3 From 41506406506332675016916280693a8d94e1b3ac Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Wed, 17 Feb 2016 12:37:32 -0800 Subject: Added assertions for getjsonname() return. --- upb/json/parser.rl | 4 +++- upb/json/printer.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'upb') diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 4ccca6c..fbc2d88 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -1364,9 +1364,11 @@ static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { const upb_fielddef *f = upb_msg_iter_field(&i); size_t field_len = upb_fielddef_getjsonname(f, buf, len); if (field_len > len) { + size_t len2; buf = realloc(buf, field_len); len = field_len; - upb_fielddef_getjsonname(f, buf, len); + len2 = upb_fielddef_getjsonname(f, buf, len); + UPB_ASSERT_VAR(len2, len == len2); } upb_strtable_insert(t, buf, upb_value_constptr(f)); diff --git a/upb/json/printer.c b/upb/json/printer.c index c3d9bb4..0db7b80 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -47,9 +47,11 @@ void freestrpc(void *ptr) { strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) { /* TODO(haberman): handle malloc failure. */ strpc *ret = malloc(sizeof(*ret)); + size_t len; ret->len = upb_fielddef_getjsonname(f, NULL, 0); ret->ptr = malloc(ret->len); - upb_fielddef_getjsonname(f, ret->ptr, ret->len); + len = upb_fielddef_getjsonname(f, ret->ptr, ret->len); + UPB_ASSERT_VAR(len, len == ret->len); ret->len--; /* NULL */ upb_handlers_addcleanup(h, ret, freestrpc); -- cgit v1.2.3