From 2ccebb74c309c7ea4c4589b35893cdd6c996ac4b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 21 May 2011 15:50:08 -0700 Subject: Add proof-of-concept C++ wrapper header. --- benchmarks/parsestream.upb_table.c | 2 +- lang_ext/cpp/upb/handlers.hpp | 142 +++++++++++++++++++++++++++++++++++++ src/upb_def.c | 50 ++++++------- src/upb_handlers.c | 89 +++++++++++------------ src/upb_handlers.h | 15 ++-- 5 files changed, 222 insertions(+), 76 deletions(-) create mode 100644 lang_ext/cpp/upb/handlers.hpp diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c index 00eb1bb..089d956 100644 --- a/benchmarks/parsestream.upb_table.c +++ b/benchmarks/parsestream.upb_table.c @@ -63,7 +63,7 @@ static bool initialize() upb_handlers_init(&handlers); // Cause all messages to be read, but do nothing when they are. - upb_handlerset hset = {NULL, NULL, value, startsubmsg, NULL}; + upb_handlerset hset = {NULL, NULL, value, startsubmsg, NULL, NULL, NULL}; upb_handlers_reghandlerset(&handlers, def, &hset); upb_decoder_init(&decoder, &handlers); upb_stringsrc_init(&stringsrc); diff --git a/lang_ext/cpp/upb/handlers.hpp b/lang_ext/cpp/upb/handlers.hpp new file mode 100644 index 0000000..b083f15 --- /dev/null +++ b/lang_ext/cpp/upb/handlers.hpp @@ -0,0 +1,142 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Note! This file is a proof-of-concept for C++ wrappers and does not + * yet build. + * + * upb::Handlers is a generic visitor-like interface for iterating over a + * stream of protobuf data. You can register function pointers that will be + * called for each message and/or field as the data is being parsed or iterated + * over, without having to know the source format that we are parsing from. + * This decouples the parsing logic from the processing logic. + */ + +#ifndef UPB_HANDLERS_HPP +#define UPB_HANDLERS_HPP + +#include "upb_handlers.h" + +namespace upb { + +typedef upb_flow_t Flow; + +class FieldHandlers : public upb_fhandlers { + public: + typedef upb_value_handler ValueHandler; + typedef upb_startfield_handler StartFieldHandler; + typedef upb_endfield_handler EndFieldHandler; + + // The FieldHandlers will live at least as long as the upb::Handlers to + // which it belongs, but can be Ref'd/Unref'd to make it live longer (which + // will prolong the life of the underlying upb::Handlers also). + void Ref() { upb_fhandlers_ref(this); } + void Unref() { upb_fhandlers_unref(this); } + + // Functions to set this field's handlers. + // These return "this" so they can be conveniently chained, eg. + // message_handlers->NewField(...) + // ->SetStartSequenceHandler(&StartSequence), + // ->SetEndSequenceHandler(&EndSequence), + // ->SetValueHandler(&Value); + FieldHandlers* SetValueHandler(ValueHandler* h) { + upb_fhandlers_setvalue(this, h); return this; + } + FieldHandlers* SetStartSequenceHandler(StartFieldHandler* h) { + upb_fhandlers_setstartseq(this, h); return this; + } + FieldHandlers* SetEndSequenceHandler(EndFieldHandler* h) { + upb_fhandlers_endseq(this, h); return this; + } + FieldHandlers* SetStartSubmessageHandler(StartFieldHandler* h) { + upb_fhandlers_setstartsubmsg(this, h); return this; + } + FieldHandlers* SetEndSubmessageHandler(EndFieldHandler* h) { + upb_fhandlers_endsubmsg(this, h); return this; + } + + // Get/Set the field's bound value, which will be passed to its handlers. + Value GetBoundValue() { return upb_fhandlers_getfval(this); } + FieldHandlers* SetBoundValue(Value val) { + upb_fhandlers_setfval(this, val); return this; + } + + private: + FieldHandlers(); // Only created by upb::Handlers. + ~FieldHandlers(); // Only destroyed by refcounting. +}; + + +class MessageHandlers : public upb_mhandlers { + public: + typedef upb_startmsg_handler StartMessageHandler; + typedef upb_endmsg_handler EndMessageHandler; + + // The MessageHandlers will live at least as long as the upb::Handlers to + // which it belongs, but can be Ref'd/Unref'd to make it live longer (which + // will prolong the life of the underlying upb::Handlers also). + void Ref() { upb_mhandlers_ref(this); } + void Unref() { upb_mhandlers_unref(this); } + + // Functions to set this message's handlers. + // These return "this" so they can be conveniently chained, eg. + // handlers->NewMessage() + // ->SetStartMessageHandler(&StartMessage) + // ->SetEndMessageHandler(&EndMessage); + MessageHandlers* SetStartMessageHandler(StartMessageHandler* h) { + upb_mhandlers_setstartmsg(this, h); return this; + } + MessageHandlers* SetEndMessageHandler(EndMessageHandler* h) { + upb_mhandlers_setendmsg(this, h); return this; + } + + // Functions to create new FieldHandlers for this message. + FieldHandlers* NewFieldHandlers(uint32_t fieldnum, upb_fieldtype_t type, + bool repeated) { + return upb_mhandlers_newfhandlers(this, fieldnum, type, repeated); + } + FieldHandlers* NewFieldHandlers(FieldDef* f) { + return upb_mhandlers_newfhandlers_fordef(f); + } + + // Like the previous but for MESSAGE or GROUP fields. For GROUP fields, the + // given submessage must not have any fields with this field number. + FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, FieldType type, + bool repeated, + MessageHandlers* subm) { + return upb_mhandlers_newsubmsgfhandlers(this, n, type, repeated, subm); + } + + FieldHandlers* NewFieldHandlersForSubmessage(FieldDef* f, + MessageHandlers* subm) { + return upb_mhandlers_newsubmsgfhandlers_fordef(f); + } + + + private: + MessageHandlers(); // Only created by upb::Handlers. + ~MessageHandlers(); // Only destroyed by refcounting. +}; + +class Handlers : public upb_handlers { + public: + // Creates a new Handlers instance. + Handlers* New() { return static_cast(upb_handlers_new()); } + + void Ref() { upb_handlers_ref(this); } + void Unref() { upb_handlers_unref(this); } + + // Returns a new MessageHandlers object. The first such message that is + // obtained will be the top-level message for this Handlers object. + MessageHandlers* NewMessageHandlers() { return upb_handlers_newmhandlers(); } + + private: + FieldHandlers(); // Only created by Handlers::New(). + ~FieldHandlers(); // Only destroyed by refcounting. +}; + +} // namespace upb + +#endif diff --git a/src/upb_def.c b/src/upb_def.c index 413621a..a6fe041 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -346,20 +346,20 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_package(void *_b, static upb_mhandlers *upb_defbuilder_register_FileDescriptorProto( upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmsg(h); + upb_mhandlers *m = upb_handlers_newmhandlers(h); upb_mhandlers_setstartmsg(m, &upb_defbuilder_FileDescriptorProto_startmsg); upb_mhandlers_setendmsg(m, &upb_defbuilder_FileDescriptorProto_endmsg); #define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM #define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE upb_fhandlers *f = - upb_mhandlers_newfield(m, FNUM(PACKAGE), FTYPE(PACKAGE), false); + upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false); upb_fhandlers_setvalue(f, &upb_defbuilder_FileDescriptorProto_package); - upb_mhandlers_newsubmsgfield(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true, - upb_msgdef_register_DescriptorProto(h)); - upb_mhandlers_newsubmsgfield(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); + upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true, + upb_msgdef_register_DescriptorProto(h)); + upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, + upb_enumdef_register_EnumDescriptorProto(h)); // TODO: services, extensions return m; } @@ -379,13 +379,13 @@ static void upb_defbuilder_FileDescriptorSet_onendmsg(void *_b, } static upb_mhandlers *upb_defbuilder_register_FileDescriptorSet(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmsg(h); + upb_mhandlers *m = upb_handlers_newmhandlers(h); upb_mhandlers_setendmsg(m, upb_defbuilder_FileDescriptorSet_onendmsg); #define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM #define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE - upb_mhandlers_newsubmsgfield(m, FNUM(FILE), FTYPE(FILE), true, - upb_defbuilder_register_FileDescriptorProto(h)); + upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true, + upb_defbuilder_register_FileDescriptorProto(h)); return m; } #undef FNUM @@ -494,17 +494,17 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b, static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto( upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmsg(h); + upb_mhandlers *m = upb_handlers_newmhandlers(h); upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg); upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg); #define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM #define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE upb_fhandlers *f; - f = upb_mhandlers_newfield(m, FNUM(NAME), FTYPE(NAME), false); + f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name); - f = upb_mhandlers_newfield(m, FNUM(NUMBER), FTYPE(NUMBER), false); + f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false); upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number); return m; } @@ -547,16 +547,17 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_b, } static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmsg(h); + upb_mhandlers *m = upb_handlers_newmhandlers(h); upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg); upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg); #define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM #define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = upb_mhandlers_newfield(m, FNUM(NAME), FTYPE(NAME), false); + upb_fhandlers *f = + upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name); - upb_mhandlers_newsubmsgfield(m, FNUM(VALUE), FTYPE(VALUE), true, + upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true, upb_enumdef_register_EnumValueDescriptorProto(h)); return m; } @@ -824,13 +825,13 @@ static upb_flow_t upb_fielddef_ondefaultval(void *_b, upb_value fval, static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto( upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmsg(h); + upb_mhandlers *m = upb_handlers_newmhandlers(h); upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg); upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg); #define FIELD(name, handler) \ upb_fhandlers_setvalue( \ - upb_mhandlers_newfield(m, \ + upb_mhandlers_newfhandlers(m, \ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \ false), \ @@ -960,22 +961,23 @@ static upb_flow_t upb_msgdef_onname(void *_b, upb_value fval, upb_value val) { } static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmsg(h); + upb_mhandlers *m = upb_handlers_newmhandlers(h); upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg); upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg); #define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM #define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = upb_mhandlers_newfield(m, FNUM(NAME), FTYPE(NAME), false); + upb_fhandlers *f = + upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); upb_fhandlers_setvalue(f, &upb_msgdef_onname); - upb_mhandlers_newsubmsgfield(m, FNUM(FIELD), FTYPE(FIELD), true, - upb_fielddef_register_FieldDescriptorProto(h)); - upb_mhandlers_newsubmsgfield(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); + upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true, + upb_fielddef_register_FieldDescriptorProto(h)); + upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, + upb_enumdef_register_EnumDescriptorProto(h)); // DescriptorProto is self-recursive, so we must link the definition. - upb_mhandlers_newsubmsgfield( + upb_mhandlers_newfhandlers_subm( m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m); // TODO: extensions. diff --git a/src/upb_handlers.c b/src/upb_handlers.c index 7be43ce..f4664a0 100644 --- a/src/upb_handlers.c +++ b/src/upb_handlers.c @@ -50,9 +50,9 @@ static upb_mhandlers *upb_mhandlers_new() { return m; } -static upb_fhandlers *_upb_mhandlers_newfield(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, - bool repeated) { +static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, + upb_fieldtype_t type, + bool repeated) { uint32_t tag = n << 3 | upb_types[type].native_wire_type; upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, tag); if (f) abort(); @@ -69,25 +69,56 @@ static upb_fhandlers *_upb_mhandlers_newfield(upb_mhandlers *m, uint32_t n, return f; } -upb_fhandlers *upb_mhandlers_newfield(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, bool repeated) { +upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, + upb_fieldtype_t type, bool repeated) { assert(type != UPB_TYPE(MESSAGE)); assert(type != UPB_TYPE(GROUP)); - return _upb_mhandlers_newfield(m, n, type, repeated); + return _upb_mhandlers_newfhandlers(m, n, type, repeated); } -upb_fhandlers *upb_mhandlers_newsubmsgfield(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, bool repeated, - upb_mhandlers *subm) { +upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, + upb_fieldtype_t type, + bool repeated, + upb_mhandlers *subm) { assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)); assert(subm); - upb_fhandlers *f = _upb_mhandlers_newfield(m, n, type, repeated); + upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated); f->submsg = subm; if (type == UPB_TYPE(GROUP)) - _upb_mhandlers_newfield(subm, n, UPB_TYPE_ENDGROUP, false); + _upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false); return f; } + +/* upb_handlers ***************************************************************/ + +void upb_handlers_init(upb_handlers *h) { + h->msgs_len = 0; + h->msgs_size = 4; + h->msgs = malloc(h->msgs_size * sizeof(*h->msgs)); + h->should_jit = true; +} + +void upb_handlers_uninit(upb_handlers *h) { + for (int i = 0; i < h->msgs_len; i++) { + upb_mhandlers *mh = h->msgs[i]; + upb_inttable_free(&mh->fieldtab); + free(mh->tablearray); + free(mh); + } + free(h->msgs); +} + +upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) { + if (h->msgs_len == h->msgs_size) { + h->msgs_size *= 2; + h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs)); + } + upb_mhandlers *mh = upb_mhandlers_new(); + h->msgs[h->msgs_len++] = mh; + return mh; +} + typedef struct { upb_strtable_entry e; upb_mhandlers *mh; @@ -97,7 +128,7 @@ static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m, upb_onmsgreg *msgreg_cb, upb_onfieldreg *fieldreg_cb, void *closure, upb_strtable *mtab) { - upb_mhandlers *mh = upb_handlers_newmsg(h); + upb_mhandlers *mh = upb_handlers_newmhandlers(h); upb_mtab_ent e = {{m->base.fqname, 0}, mh}; upb_strtable_insert(mtab, &e.e); if (msgreg_cb) msgreg_cb(closure, mh, m); @@ -115,10 +146,10 @@ static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m, sub_mh = upb_regmsg_dfs(h, upb_downcast_msgdef(f->def), msgreg_cb, fieldreg_cb, closure, mtab); } - fh = upb_mhandlers_newsubmsgfield( + fh = upb_mhandlers_newfhandlers_subm( mh, f->number, f->type, upb_isarray(f), sub_mh); } else { - fh = upb_mhandlers_newfield(mh, f->number, f->type, upb_isarray(f)); + fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isarray(f)); } if (fieldreg_cb) fieldreg_cb(closure, fh, f); } @@ -138,36 +169,6 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, upb_msgdef *m, } -/* upb_handlers ***************************************************************/ - -void upb_handlers_init(upb_handlers *h) { - h->msgs_len = 0; - h->msgs_size = 4; - h->msgs = malloc(h->msgs_size * sizeof(*h->msgs)); - h->should_jit = true; -} - -void upb_handlers_uninit(upb_handlers *h) { - for (int i = 0; i < h->msgs_len; i++) { - upb_mhandlers *mh = h->msgs[i]; - upb_inttable_free(&mh->fieldtab); - free(mh->tablearray); - free(mh); - } - free(h->msgs); -} - -upb_mhandlers *upb_handlers_newmsg(upb_handlers *h) { - if (h->msgs_len == h->msgs_size) { - h->msgs_size *= 2; - h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs)); - } - upb_mhandlers *mh = upb_mhandlers_new(); - h->msgs[h->msgs_len++] = mh; - return mh; -} - - /* upb_dispatcher *************************************************************/ static upb_fhandlers toplevel_f = { diff --git a/src/upb_handlers.h b/src/upb_handlers.h index 30908e8..6479f7a 100644 --- a/src/upb_handlers.h +++ b/src/upb_handlers.h @@ -187,19 +187,20 @@ INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) { // Appends a new message to the graph of handlers and returns it. This message // can be obtained later at index upb_handlers_msgcount()-1. All handlers will // be initialized to no-op handlers. -upb_mhandlers *upb_handlers_newmsg(upb_handlers *h); -upb_mhandlers *upb_handlers_getmsg(upb_handlers *h, int index); +upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h); +upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index); // Creates a new field with the given name and number. There must not be an // existing field with either this name or number or abort() will be called. // TODO: this should take a name also. -upb_fhandlers *upb_mhandlers_newfield(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, bool repeated); +upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, + upb_fieldtype_t type, bool repeated); // Like the previous but for MESSAGE or GROUP fields. For GROUP fields, the // given submessage must not have any fields with this field number. -upb_fhandlers *upb_mhandlers_newsubmsgfield(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, bool repeated, - upb_mhandlers *subm); +upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, + upb_fieldtype_t type, + bool repeated, + upb_mhandlers *subm); // upb_mhandlers accessors. #define UPB_MHANDLERS_ACCESSORS(name, type) \ -- cgit v1.2.3