From 8405e5e38948bc61cad7586e62c566232e1de6d9 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 1 Aug 2014 15:32:59 -0700 Subject: Beginnings of a prototype Ruby extension. --- upb/bindings/ruby/README | 2 + upb/bindings/ruby/extconf.rb | 7 + upb/bindings/ruby/upb.c | 300 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 309 insertions(+) create mode 100644 upb/bindings/ruby/README create mode 100644 upb/bindings/ruby/extconf.rb create mode 100644 upb/bindings/ruby/upb.c (limited to 'upb') diff --git a/upb/bindings/ruby/README b/upb/bindings/ruby/README new file mode 100644 index 0000000..50fd746 --- /dev/null +++ b/upb/bindings/ruby/README @@ -0,0 +1,2 @@ +This is PROTOTYPE code -- all interfaces are experimental +and will almost certainly change. diff --git a/upb/bindings/ruby/extconf.rb b/upb/bindings/ruby/extconf.rb new file mode 100644 index 0000000..67fddba --- /dev/null +++ b/upb/bindings/ruby/extconf.rb @@ -0,0 +1,7 @@ +#!/usr/bin/ruby + +require 'mkmf' +find_header("upb/upb.h", "../../..") or raise "Can't find upb headers" +find_library("upb_pic", "upb_msgdef_new", "../..") or raise "Can't find upb lib" +$CFLAGS += " -Wall" +create_makefile("upb") diff --git a/upb/bindings/ruby/upb.c b/upb/bindings/ruby/upb.c new file mode 100644 index 0000000..fabcc46 --- /dev/null +++ b/upb/bindings/ruby/upb.c @@ -0,0 +1,300 @@ + +#include "ruby.h" +#include "upb/def.h" +#include "upb/handlers.h" +#include "upb/pb/decoder.h" +#include "upb/pb/glue.h" +#include "upb/shim/shim.h" +#include "upb/symtab.h" + +static VALUE cMessageDef; +static VALUE cMessage; + +// Wrapper around a upb_msgdef. +typedef struct { + // The msgdef for this message, and a DecoderMethod to parse protobufs and + // fill a message. + // + // We own refs on both of these. + const upb_msgdef *md; + const upb_pbdecodermethod *fill_method; + + size_t size; + uint32_t *field_offsets; +} rb_msgdef; + +// Ruby message object. +// This will be sized according to what fields are actually present. +typedef struct { + union u { + VALUE rbmsgdef; + char data[1]; + } data; +} rb_msg; + +#define DEREF(msg, ofs, type) *(type*)(&msg->data.data[ofs]) + +static void symtab_free(void *md) { + upb_symtab_unref(md, UPB_UNTRACKED_REF); +} + +void rupb_checkstatus(upb_status *s) { + if (!upb_ok(s)) { + fprintf(stderr, "YO, error! %s", upb_status_errmsg(s)); + rb_raise(rb_eRuntimeError, "%s", upb_status_errmsg(s)); + } else { + fprintf(stderr, "A-OK!"); + } +} + +/* handlers *******************************************************************/ + +// These are handlers for populating a Ruby protobuf message when parsing. + +static size_t strhandler(void *closure, const void *hd, const char *str, + size_t len, const upb_bufhandle *handle) { + rb_msg *msg = closure; + const size_t *ofs = hd; + DEREF(msg, *ofs, VALUE) = rb_str_new(str, len); + return len; +} + +static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) { + size_t *hd_ofs = ALLOC(size_t); + *hd_ofs = ofs; + upb_handlers_addcleanup(h, hd_ofs, free); + return hd_ofs; +} + +static void add_handlers_for_message(const void *closure, upb_handlers *h) { + // XXX: Doesn't support submessages properly yet. + const rb_msgdef *rmd = closure; + upb_msg_iter i; + for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + + if (upb_fielddef_isseq(f)) { + rb_raise(rb_eRuntimeError, "Doesn't support repeated fields yet."); + } + + size_t ofs = rmd->field_offsets[upb_fielddef_index(f)]; + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_ENUM: + case UPB_TYPE_FLOAT: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + case UPB_TYPE_DOUBLE: + upb_shim_set(h, f, ofs, -1); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs)); + // XXX: does't currently handle split buffers. + upb_handlers_setstring(h, f, strhandler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + case UPB_TYPE_MESSAGE: + rb_raise(rb_eRuntimeError, "Doesn't support submessages yet."); + break; + } + } +} + +// Creates upb handlers for populating a message. +static const upb_handlers *new_fill_handlers(const rb_msgdef *rmd, + const void *owner) { + return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, rmd); +} + +// General alignment rules are that each type needs to be stored at an address +// that is a multiple of its size. +static size_t align_up(size_t val, size_t align) { + return val % align == 0 ? val : val + align - (val % align); +} + +// Byte size to store each upb type. +static size_t rupb_sizeof(upb_fieldtype_t type) { + switch (type) { + case UPB_TYPE_BOOL: + return 1; + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_ENUM: + case UPB_TYPE_FLOAT: + return 4; + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + case UPB_TYPE_DOUBLE: + return 8; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: + return sizeof(VALUE); + } + assert(false); +} + +/* msg ************************************************************************/ + +static void msg_free(void *msg) { + free(msg); +} + +// Invoked by the Ruby GC whenever it is doing a mark-and-sweep. +static void msg_mark(void *p) { + rb_msg *msg = p; + rb_msgdef *rmd; + Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd); + + // We need to mark all references to other Ruby values: strings, arrays, and + // submessages that we point to. Only strings are implemented so far. + upb_msg_iter i; + for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (upb_fielddef_isstring(f)) { + size_t ofs = rmd->field_offsets[upb_fielddef_index(f)]; + rb_gc_mark(DEREF(msg, ofs, VALUE)); + } + } +} + +VALUE msg_new(VALUE msgdef) { + const rb_msgdef *rmd; + Data_Get_Struct(msgdef, rb_msgdef, rmd); + + rb_msg *msg = (rb_msg*)ALLOC_N(char, rmd->size); + memset(msg, 0, rmd->size); + msg->data.rbmsgdef = msgdef; + + VALUE ret = Data_Wrap_Struct(cMessage, msg_mark, msg_free, msg); + return ret; +} + +/* msgdef *********************************************************************/ + +static void msgdef_free(void *_rmd) { + rb_msgdef *rmd = _rmd; + upb_msgdef_unref(rmd->md, &rmd->md); + upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method); + free(rmd->field_offsets); +} + +const upb_pbdecodermethod *new_fillmsg_decodermethod(const rb_msgdef *rmd, + const void *owner) { + const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers); + upb_pbdecodermethodopts opts; + upb_pbdecodermethodopts_init(&opts, fill_handlers); + + const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner); + upb_handlers_unref(fill_handlers, &fill_handlers); + return ret; +} + +// Calculates offsets for each field. +// +// This lets us pack protos like structs instead of storing them like +// dictionaries. This speeds up a parsing a lot and also saves memory +// (unless messages are very sparse). +static void assign_offsets(rb_msgdef *rmd) { + size_t ofs = sizeof(rb_msgdef); // Msg starts with msgdef pointer. + upb_msg_iter i; + for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + size_t field_size = rupb_sizeof(upb_fielddef_type(f)); + ofs = align_up(ofs, field_size); // Align field properly. + rmd->field_offsets[upb_fielddef_index(f)] = ofs; + ofs += field_size; + } + rmd->size = ofs; +} + +// Constructs a new Ruby wrapper object around the given msgdef. +static VALUE make_msgdef(const upb_msgdef *md) { + rb_msgdef *rmd; + VALUE ret = Data_Make_Struct(cMessageDef, rb_msgdef, NULL, msgdef_free, rmd); + + upb_msgdef_ref(md, &rmd->md); + + rmd->md = md; + rmd->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md)); + rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method); + + assign_offsets(rmd); + + return ret; +} + +// Loads a descriptor and constructs a MessageDef to the named message. +static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) { + upb_symtab *symtab = upb_symtab_new(UPB_UNTRACKED_REF); + + // Wrap the symtab in a Ruby object so it gets GC'd. + // In a real wrapper we would wrap this object more fully (ie. expose its + // methods to Ruby callers). + Data_Wrap_Struct(rb_cObject, NULL, symtab_free, symtab); + + upb_status status = UPB_STATUS_INIT; + upb_load_descriptor_into_symtab( + symtab, RSTRING_PTR(descriptor), RSTRING_LEN(descriptor), &status); + + if (!upb_ok(&status)) { + rb_raise(rb_eRuntimeError, + "Error loading descriptor: %s", upb_status_errmsg(&status)); + } + + const char *name = RSTRING_PTR(message_name); + const upb_msgdef *m = upb_symtab_lookupmsg(symtab, name); + + if (!m) { + rb_raise(rb_eRuntimeError, "Message name '%s' not found", name); + } + + return make_msgdef(m); +} + +static VALUE msgdef_parse(VALUE self, VALUE binary_protobuf) { + const rb_msgdef *rmd; + Data_Get_Struct(self, rb_msgdef, rmd); + + VALUE msg = msg_new(self); + rb_msg *msgp; + Data_Get_Struct(msg, rb_msg, msgp); + + const upb_handlers *h = upb_pbdecodermethod_desthandlers(rmd->fill_method); + upb_pbdecoder decoder; + upb_sink sink; + upb_status status = UPB_STATUS_INIT; + + upb_pbdecoder_init(&decoder, rmd->fill_method, &status); + upb_sink_reset(&sink, h, msgp); + upb_pbdecoder_resetoutput(&decoder, &sink); + fprintf(stderr, "STR: %s\n", RSTRING_PTR(binary_protobuf)); + fprintf(stderr, "LEN: %d\n", (int)RSTRING_LEN(binary_protobuf)); + size_t n = upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf), + RSTRING_LEN(binary_protobuf), + upb_pbdecoder_input(&decoder)); + fprintf(stderr, "n: %d\n", (int)n); + // TODO(haberman): make uninit optional if custom allocator for parsing + // returns GC-rooted memory. That will make decoding longjmp-safe (required + // if parsing triggers any VM errors like OOM or errors in user handlers). + upb_pbdecoder_uninit(&decoder); + rupb_checkstatus(&status); + + return msg; +} + +void Init_upb() { + VALUE upb = rb_define_module("Upb"); + + cMessageDef = rb_define_class_under(upb, "MessageDef", rb_cObject); + rb_define_singleton_method(cMessageDef, "load", msgdef_load, 2); + rb_define_method(cMessageDef, "parse", msgdef_parse, 1); + + cMessage = rb_define_class_under(upb, "Message", rb_cObject); +} -- cgit v1.2.3