summaryrefslogtreecommitdiff
path: root/upb
diff options
context:
space:
mode:
authorJosh Haberman <jhaberman@gmail.com>2014-08-01 15:32:59 -0700
committerJosh Haberman <jhaberman@gmail.com>2014-08-01 15:32:59 -0700
commit8405e5e38948bc61cad7586e62c566232e1de6d9 (patch)
tree0d214505c99c4b74a1e1655bce95877e267dde82 /upb
parent43f2455cbda22c513f2885d760ca2766b23e100f (diff)
Beginnings of a prototype Ruby extension.
Diffstat (limited to 'upb')
-rw-r--r--upb/bindings/ruby/README2
-rw-r--r--upb/bindings/ruby/extconf.rb7
-rw-r--r--upb/bindings/ruby/upb.c300
3 files changed, 309 insertions, 0 deletions
diff --git a/upb/bindings/ruby/README b/upb/bindings/ruby/README
new file mode 100644
index 0000000..50fd746
--- /dev/null
+++ b/upb/bindings/ruby/README
@@ -0,0 +1,2 @@
+This is PROTOTYPE code -- all interfaces are experimental
+and will almost certainly change.
diff --git a/upb/bindings/ruby/extconf.rb b/upb/bindings/ruby/extconf.rb
new file mode 100644
index 0000000..67fddba
--- /dev/null
+++ b/upb/bindings/ruby/extconf.rb
@@ -0,0 +1,7 @@
+#!/usr/bin/ruby
+
+require 'mkmf'
+find_header("upb/upb.h", "../../..") or raise "Can't find upb headers"
+find_library("upb_pic", "upb_msgdef_new", "../..") or raise "Can't find upb lib"
+$CFLAGS += " -Wall"
+create_makefile("upb")
diff --git a/upb/bindings/ruby/upb.c b/upb/bindings/ruby/upb.c
new file mode 100644
index 0000000..fabcc46
--- /dev/null
+++ b/upb/bindings/ruby/upb.c
@@ -0,0 +1,300 @@
+
+#include "ruby.h"
+#include "upb/def.h"
+#include "upb/handlers.h"
+#include "upb/pb/decoder.h"
+#include "upb/pb/glue.h"
+#include "upb/shim/shim.h"
+#include "upb/symtab.h"
+
+static VALUE cMessageDef;
+static VALUE cMessage;
+
+// Wrapper around a upb_msgdef.
+typedef struct {
+ // The msgdef for this message, and a DecoderMethod to parse protobufs and
+ // fill a message.
+ //
+ // We own refs on both of these.
+ const upb_msgdef *md;
+ const upb_pbdecodermethod *fill_method;
+
+ size_t size;
+ uint32_t *field_offsets;
+} rb_msgdef;
+
+// Ruby message object.
+// This will be sized according to what fields are actually present.
+typedef struct {
+ union u {
+ VALUE rbmsgdef;
+ char data[1];
+ } data;
+} rb_msg;
+
+#define DEREF(msg, ofs, type) *(type*)(&msg->data.data[ofs])
+
+static void symtab_free(void *md) {
+ upb_symtab_unref(md, UPB_UNTRACKED_REF);
+}
+
+void rupb_checkstatus(upb_status *s) {
+ if (!upb_ok(s)) {
+ fprintf(stderr, "YO, error! %s", upb_status_errmsg(s));
+ rb_raise(rb_eRuntimeError, "%s", upb_status_errmsg(s));
+ } else {
+ fprintf(stderr, "A-OK!");
+ }
+}
+
+/* handlers *******************************************************************/
+
+// These are handlers for populating a Ruby protobuf message when parsing.
+
+static size_t strhandler(void *closure, const void *hd, const char *str,
+ size_t len, const upb_bufhandle *handle) {
+ rb_msg *msg = closure;
+ const size_t *ofs = hd;
+ DEREF(msg, *ofs, VALUE) = rb_str_new(str, len);
+ return len;
+}
+
+static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) {
+ size_t *hd_ofs = ALLOC(size_t);
+ *hd_ofs = ofs;
+ upb_handlers_addcleanup(h, hd_ofs, free);
+ return hd_ofs;
+}
+
+static void add_handlers_for_message(const void *closure, upb_handlers *h) {
+ // XXX: Doesn't support submessages properly yet.
+ const rb_msgdef *rmd = closure;
+ upb_msg_iter i;
+ for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_msg_iter_field(&i);
+
+ if (upb_fielddef_isseq(f)) {
+ rb_raise(rb_eRuntimeError, "Doesn't support repeated fields yet.");
+ }
+
+ size_t ofs = rmd->field_offsets[upb_fielddef_index(f)];
+
+ switch (upb_fielddef_type(f)) {
+ case UPB_TYPE_BOOL:
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_ENUM:
+ case UPB_TYPE_FLOAT:
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT64:
+ case UPB_TYPE_DOUBLE:
+ upb_shim_set(h, f, ofs, -1);
+ break;
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES: {
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs));
+ // XXX: does't currently handle split buffers.
+ upb_handlers_setstring(h, f, strhandler, &attr);
+ upb_handlerattr_uninit(&attr);
+ break;
+ }
+ case UPB_TYPE_MESSAGE:
+ rb_raise(rb_eRuntimeError, "Doesn't support submessages yet.");
+ break;
+ }
+ }
+}
+
+// Creates upb handlers for populating a message.
+static const upb_handlers *new_fill_handlers(const rb_msgdef *rmd,
+ const void *owner) {
+ return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, rmd);
+}
+
+// General alignment rules are that each type needs to be stored at an address
+// that is a multiple of its size.
+static size_t align_up(size_t val, size_t align) {
+ return val % align == 0 ? val : val + align - (val % align);
+}
+
+// Byte size to store each upb type.
+static size_t rupb_sizeof(upb_fieldtype_t type) {
+ switch (type) {
+ case UPB_TYPE_BOOL:
+ return 1;
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_ENUM:
+ case UPB_TYPE_FLOAT:
+ return 4;
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT64:
+ case UPB_TYPE_DOUBLE:
+ return 8;
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES:
+ case UPB_TYPE_MESSAGE:
+ return sizeof(VALUE);
+ }
+ assert(false);
+}
+
+/* msg ************************************************************************/
+
+static void msg_free(void *msg) {
+ free(msg);
+}
+
+// Invoked by the Ruby GC whenever it is doing a mark-and-sweep.
+static void msg_mark(void *p) {
+ rb_msg *msg = p;
+ rb_msgdef *rmd;
+ Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd);
+
+ // We need to mark all references to other Ruby values: strings, arrays, and
+ // submessages that we point to. Only strings are implemented so far.
+ upb_msg_iter i;
+ for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_msg_iter_field(&i);
+ if (upb_fielddef_isstring(f)) {
+ size_t ofs = rmd->field_offsets[upb_fielddef_index(f)];
+ rb_gc_mark(DEREF(msg, ofs, VALUE));
+ }
+ }
+}
+
+VALUE msg_new(VALUE msgdef) {
+ const rb_msgdef *rmd;
+ Data_Get_Struct(msgdef, rb_msgdef, rmd);
+
+ rb_msg *msg = (rb_msg*)ALLOC_N(char, rmd->size);
+ memset(msg, 0, rmd->size);
+ msg->data.rbmsgdef = msgdef;
+
+ VALUE ret = Data_Wrap_Struct(cMessage, msg_mark, msg_free, msg);
+ return ret;
+}
+
+/* msgdef *********************************************************************/
+
+static void msgdef_free(void *_rmd) {
+ rb_msgdef *rmd = _rmd;
+ upb_msgdef_unref(rmd->md, &rmd->md);
+ upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method);
+ free(rmd->field_offsets);
+}
+
+const upb_pbdecodermethod *new_fillmsg_decodermethod(const rb_msgdef *rmd,
+ const void *owner) {
+ const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers);
+ upb_pbdecodermethodopts opts;
+ upb_pbdecodermethodopts_init(&opts, fill_handlers);
+
+ const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner);
+ upb_handlers_unref(fill_handlers, &fill_handlers);
+ return ret;
+}
+
+// Calculates offsets for each field.
+//
+// This lets us pack protos like structs instead of storing them like
+// dictionaries. This speeds up a parsing a lot and also saves memory
+// (unless messages are very sparse).
+static void assign_offsets(rb_msgdef *rmd) {
+ size_t ofs = sizeof(rb_msgdef); // Msg starts with msgdef pointer.
+ upb_msg_iter i;
+ for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_fielddef *f = upb_msg_iter_field(&i);
+ size_t field_size = rupb_sizeof(upb_fielddef_type(f));
+ ofs = align_up(ofs, field_size); // Align field properly.
+ rmd->field_offsets[upb_fielddef_index(f)] = ofs;
+ ofs += field_size;
+ }
+ rmd->size = ofs;
+}
+
+// Constructs a new Ruby wrapper object around the given msgdef.
+static VALUE make_msgdef(const upb_msgdef *md) {
+ rb_msgdef *rmd;
+ VALUE ret = Data_Make_Struct(cMessageDef, rb_msgdef, NULL, msgdef_free, rmd);
+
+ upb_msgdef_ref(md, &rmd->md);
+
+ rmd->md = md;
+ rmd->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md));
+ rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method);
+
+ assign_offsets(rmd);
+
+ return ret;
+}
+
+// Loads a descriptor and constructs a MessageDef to the named message.
+static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) {
+ upb_symtab *symtab = upb_symtab_new(UPB_UNTRACKED_REF);
+
+ // Wrap the symtab in a Ruby object so it gets GC'd.
+ // In a real wrapper we would wrap this object more fully (ie. expose its
+ // methods to Ruby callers).
+ Data_Wrap_Struct(rb_cObject, NULL, symtab_free, symtab);
+
+ upb_status status = UPB_STATUS_INIT;
+ upb_load_descriptor_into_symtab(
+ symtab, RSTRING_PTR(descriptor), RSTRING_LEN(descriptor), &status);
+
+ if (!upb_ok(&status)) {
+ rb_raise(rb_eRuntimeError,
+ "Error loading descriptor: %s", upb_status_errmsg(&status));
+ }
+
+ const char *name = RSTRING_PTR(message_name);
+ const upb_msgdef *m = upb_symtab_lookupmsg(symtab, name);
+
+ if (!m) {
+ rb_raise(rb_eRuntimeError, "Message name '%s' not found", name);
+ }
+
+ return make_msgdef(m);
+}
+
+static VALUE msgdef_parse(VALUE self, VALUE binary_protobuf) {
+ const rb_msgdef *rmd;
+ Data_Get_Struct(self, rb_msgdef, rmd);
+
+ VALUE msg = msg_new(self);
+ rb_msg *msgp;
+ Data_Get_Struct(msg, rb_msg, msgp);
+
+ const upb_handlers *h = upb_pbdecodermethod_desthandlers(rmd->fill_method);
+ upb_pbdecoder decoder;
+ upb_sink sink;
+ upb_status status = UPB_STATUS_INIT;
+
+ upb_pbdecoder_init(&decoder, rmd->fill_method, &status);
+ upb_sink_reset(&sink, h, msgp);
+ upb_pbdecoder_resetoutput(&decoder, &sink);
+ fprintf(stderr, "STR: %s\n", RSTRING_PTR(binary_protobuf));
+ fprintf(stderr, "LEN: %d\n", (int)RSTRING_LEN(binary_protobuf));
+ size_t n = upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf),
+ RSTRING_LEN(binary_protobuf),
+ upb_pbdecoder_input(&decoder));
+ fprintf(stderr, "n: %d\n", (int)n);
+ // TODO(haberman): make uninit optional if custom allocator for parsing
+ // returns GC-rooted memory. That will make decoding longjmp-safe (required
+ // if parsing triggers any VM errors like OOM or errors in user handlers).
+ upb_pbdecoder_uninit(&decoder);
+ rupb_checkstatus(&status);
+
+ return msg;
+}
+
+void Init_upb() {
+ VALUE upb = rb_define_module("Upb");
+
+ cMessageDef = rb_define_class_under(upb, "MessageDef", rb_cObject);
+ rb_define_singleton_method(cMessageDef, "load", msgdef_load, 2);
+ rb_define_method(cMessageDef, "parse", msgdef_parse, 1);
+
+ cMessage = rb_define_class_under(upb, "Message", rb_cObject);
+}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback