#include "ruby.h" #include "upb/def.h" #include "upb/handlers.h" #include "upb/pb/decoder.h" #include "upb/pb/glue.h" #include "upb/shim/shim.h" #include "upb/symtab.h" static VALUE cMessageDef; static VALUE cMessage; // Wrapper around a upb_msgdef. typedef struct { // The msgdef for this message, and a DecoderMethod to parse protobufs and // fill a message. // // We own refs on both of these. const upb_msgdef *md; const upb_pbdecodermethod *fill_method; size_t size; uint32_t *field_offsets; } rb_msgdef; // Ruby message object. // This will be sized according to what fields are actually present. typedef struct { union u { VALUE rbmsgdef; char data[1]; } data; } rb_msg; #define DEREF(msg, ofs, type) *(type*)(&msg->data.data[ofs]) static void symtab_free(void *md) { upb_symtab_unref(md, UPB_UNTRACKED_REF); } void rupb_checkstatus(upb_status *s) { if (!upb_ok(s)) { fprintf(stderr, "YO, error! %s", upb_status_errmsg(s)); rb_raise(rb_eRuntimeError, "%s", upb_status_errmsg(s)); } else { fprintf(stderr, "A-OK!"); } } /* handlers *******************************************************************/ // These are handlers for populating a Ruby protobuf message when parsing. static size_t strhandler(void *closure, const void *hd, const char *str, size_t len, const upb_bufhandle *handle) { rb_msg *msg = closure; const size_t *ofs = hd; DEREF(msg, *ofs, VALUE) = rb_str_new(str, len); return len; } static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) { size_t *hd_ofs = ALLOC(size_t); *hd_ofs = ofs; upb_handlers_addcleanup(h, hd_ofs, free); return hd_ofs; } static void add_handlers_for_message(const void *closure, upb_handlers *h) { // XXX: Doesn't support submessages properly yet. const rb_msgdef *rmd = closure; upb_msg_iter i; for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (upb_fielddef_isseq(f)) { rb_raise(rb_eRuntimeError, "Doesn't support repeated fields yet."); } size_t ofs = rmd->field_offsets[upb_fielddef_index(f)]; switch (upb_fielddef_type(f)) { case UPB_TYPE_BOOL: case UPB_TYPE_INT32: case UPB_TYPE_UINT32: case UPB_TYPE_ENUM: case UPB_TYPE_FLOAT: case UPB_TYPE_INT64: case UPB_TYPE_UINT64: case UPB_TYPE_DOUBLE: upb_shim_set(h, f, ofs, -1); break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: { upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs)); // XXX: does't currently handle split buffers. upb_handlers_setstring(h, f, strhandler, &attr); upb_handlerattr_uninit(&attr); break; } case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Doesn't support submessages yet."); break; } } } // Creates upb handlers for populating a message. static const upb_handlers *new_fill_handlers(const rb_msgdef *rmd, const void *owner) { return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, rmd); } // General alignment rules are that each type needs to be stored at an address // that is a multiple of its size. static size_t align_up(size_t val, size_t align) { return val % align == 0 ? val : val + align - (val % align); } // Byte size to store each upb type. static size_t rupb_sizeof(upb_fieldtype_t type) { switch (type) { case UPB_TYPE_BOOL: return 1; case UPB_TYPE_INT32: case UPB_TYPE_UINT32: case UPB_TYPE_ENUM: case UPB_TYPE_FLOAT: return 4; case UPB_TYPE_INT64: case UPB_TYPE_UINT64: case UPB_TYPE_DOUBLE: return 8; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: case UPB_TYPE_MESSAGE: return sizeof(VALUE); } assert(false); } /* msg ************************************************************************/ static void msg_free(void *msg) { free(msg); } // Invoked by the Ruby GC whenever it is doing a mark-and-sweep. static void msg_mark(void *p) { rb_msg *msg = p; rb_msgdef *rmd; Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd); // We need to mark all references to other Ruby values: strings, arrays, and // submessages that we point to. Only strings are implemented so far. upb_msg_iter i; for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (upb_fielddef_isstring(f)) { size_t ofs = rmd->field_offsets[upb_fielddef_index(f)]; rb_gc_mark(DEREF(msg, ofs, VALUE)); } } } VALUE msg_new(VALUE msgdef) { const rb_msgdef *rmd; Data_Get_Struct(msgdef, rb_msgdef, rmd); rb_msg *msg = (rb_msg*)ALLOC_N(char, rmd->size); memset(msg, 0, rmd->size); msg->data.rbmsgdef = msgdef; VALUE ret = Data_Wrap_Struct(cMessage, msg_mark, msg_free, msg); return ret; } /* msgdef *********************************************************************/ static void msgdef_free(void *_rmd) { rb_msgdef *rmd = _rmd; upb_msgdef_unref(rmd->md, &rmd->md); upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method); free(rmd->field_offsets); } const upb_pbdecodermethod *new_fillmsg_decodermethod(const rb_msgdef *rmd, const void *owner) { const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers); upb_pbdecodermethodopts opts; upb_pbdecodermethodopts_init(&opts, fill_handlers); const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner); upb_handlers_unref(fill_handlers, &fill_handlers); return ret; } // Calculates offsets for each field. // // This lets us pack protos like structs instead of storing them like // dictionaries. This speeds up a parsing a lot and also saves memory // (unless messages are very sparse). static void assign_offsets(rb_msgdef *rmd) { size_t ofs = sizeof(rb_msgdef); // Msg starts with msgdef pointer. upb_msg_iter i; for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); size_t field_size = rupb_sizeof(upb_fielddef_type(f)); ofs = align_up(ofs, field_size); // Align field properly. rmd->field_offsets[upb_fielddef_index(f)] = ofs; ofs += field_size; } rmd->size = ofs; } // Constructs a new Ruby wrapper object around the given msgdef. static VALUE make_msgdef(const upb_msgdef *md) { rb_msgdef *rmd; VALUE ret = Data_Make_Struct(cMessageDef, rb_msgdef, NULL, msgdef_free, rmd); upb_msgdef_ref(md, &rmd->md); rmd->md = md; rmd->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md)); rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method); assign_offsets(rmd); return ret; } // Loads a descriptor and constructs a MessageDef to the named message. static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) { upb_symtab *symtab = upb_symtab_new(UPB_UNTRACKED_REF); // Wrap the symtab in a Ruby object so it gets GC'd. // In a real wrapper we would wrap this object more fully (ie. expose its // methods to Ruby callers). Data_Wrap_Struct(rb_cObject, NULL, symtab_free, symtab); upb_status status = UPB_STATUS_INIT; upb_load_descriptor_into_symtab( symtab, RSTRING_PTR(descriptor), RSTRING_LEN(descriptor), &status); if (!upb_ok(&status)) { rb_raise(rb_eRuntimeError, "Error loading descriptor: %s", upb_status_errmsg(&status)); } const char *name = RSTRING_PTR(message_name); const upb_msgdef *m = upb_symtab_lookupmsg(symtab, name); if (!m) { rb_raise(rb_eRuntimeError, "Message name '%s' not found", name); } return make_msgdef(m); } static VALUE msgdef_parse(VALUE self, VALUE binary_protobuf) { const rb_msgdef *rmd; Data_Get_Struct(self, rb_msgdef, rmd); VALUE msg = msg_new(self); rb_msg *msgp; Data_Get_Struct(msg, rb_msg, msgp); const upb_handlers *h = upb_pbdecodermethod_desthandlers(rmd->fill_method); upb_pbdecoder decoder; upb_sink sink; upb_status status = UPB_STATUS_INIT; upb_pbdecoder_init(&decoder, rmd->fill_method, &status); upb_sink_reset(&sink, h, msgp); upb_pbdecoder_resetoutput(&decoder, &sink); fprintf(stderr, "STR: %s\n", RSTRING_PTR(binary_protobuf)); fprintf(stderr, "LEN: %d\n", (int)RSTRING_LEN(binary_protobuf)); size_t n = upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf), RSTRING_LEN(binary_protobuf), upb_pbdecoder_input(&decoder)); fprintf(stderr, "n: %d\n", (int)n); // TODO(haberman): make uninit optional if custom allocator for parsing // returns GC-rooted memory. That will make decoding longjmp-safe (required // if parsing triggers any VM errors like OOM or errors in user handlers). upb_pbdecoder_uninit(&decoder); rupb_checkstatus(&status); return msg; } void Init_upb() { VALUE upb = rb_define_module("Upb"); cMessageDef = rb_define_class_under(upb, "MessageDef", rb_cObject); rb_define_singleton_method(cMessageDef, "load", msgdef_load, 2); rb_define_method(cMessageDef, "parse", msgdef_parse, 1); cMessage = rb_define_class_under(upb, "Message", rb_cObject); }