From 040f7e6ba2e2282b80f332a031b77d7d34b4fc85 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 24 Aug 2009 21:44:22 -0700 Subject: Significant memory-management refactoring any Python extension. --- Makefile | 38 +- descriptor/descriptor.h | 79 +--- descriptor/descriptor_const.h | 53 +++ lang_ext/python/cext.c | 16 + lang_ext/python/cext.h | 48 +++ lang_ext/python/definition.c | 164 ++++---- lang_ext/python/definition.h | 20 +- lang_ext/python/pb.c | 919 ++++++++++++++++++++++++++++++++++++++++++ lang_ext/python/setup.py | 18 +- src/upb.h | 93 ++++- src/upb_array.h | 89 +--- src/upb_context.c | 16 +- src/upb_enum.h | 9 - src/upb_inlinedefs.c | 1 + src/upb_mm.c | 208 ++++++++++ src/upb_mm.h | 168 ++++++++ src/upb_msg.c | 213 +++------- src/upb_msg.h | 152 +++---- src/upb_parse.c | 2 +- src/upb_parse.h | 10 - src/upb_string.c | 23 +- src/upb_string.h | 59 +-- src/upb_struct.h | 119 ++++++ src/upb_text.c | 54 ++- tools/upbc.c | 124 ++++-- 25 files changed, 2082 insertions(+), 613 deletions(-) create mode 100644 descriptor/descriptor_const.h create mode 100644 lang_ext/python/cext.c create mode 100644 lang_ext/python/cext.h create mode 100644 lang_ext/python/pb.c create mode 100644 src/upb_mm.c create mode 100644 src/upb_mm.h create mode 100644 src/upb_struct.h diff --git a/Makefile b/Makefile index 2d2c6f8..0aaae32 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,11 @@ # +# This Makefile builds the upb library as well as associated tests, tools, and +# language extensions. +# +# It does not use autoconf/automake/libtool because I can't stomach all the +# cruft. If you're not compiling for gcc, you may have to change some of the +# options. +# # Summary of compiler flags you may want to use: # # * -DNDEBUG: makes binary smaller and faster by removing sanity checks. @@ -25,19 +32,28 @@ CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && ca LDLIBS=-lpthread LIBUPB=src/libupb.a -ALL=deps $(OBJ) $(LIBUPB) tests/test_table tests/tests tools/upbc +LIBUPB_PIC=src/libupb_pic.a +LIBUPB_SHARED=src/libupb.so +ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) $(LIBUPB_SHARED) tests/test_table tests/tests tools/upbc all: $(ALL) clean: - rm -rf $(call rwildcard,,*.o) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* + rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* rm -rf descriptor/descriptor.proto.pb + cd lang_ext/python && python setup.py clean --all # The core library (src/libupb.a) -OBJ=src/upb_parse.o src/upb_table.o src/upb_msg.o src/upb_enum.o src/upb_context.o \ - src/upb_string.o src/upb_text.o src/upb_serialize.o descriptor/descriptor.o -SRC=$(call rwildcard,,*.c) -HEADERS=$(call rwildcard,,*.h) -$(LIBUPB): $(OBJ) - ar rcs $(LIBUPB) $(OBJ) +SRC=src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c src/upb_enum.c src/upb_context.c \ + src/upb_string.c src/upb_text.c src/upb_serialize.c descriptor/descriptor.c +STATICOBJ=$(patsubst %.c,%.o,$(SRC)) +SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC)) +# building shared objects is like building static ones, except -fPIC is added. +%.lo : %.c ; $(CC) -fPIC $(CPPFLAGS) $(CFLAGS) -c -o $@ $< +$(LIBUPB): $(STATICOBJ) + ar rcs $(LIBUPB) $(STATICOBJ) +$(LIBUPB_PIC): $(SHAREDOBJ) + ar rcs $(LIBUPB_PIC) $(SHAREDOBJ) +$(LIBUPB_SHARED): $(SHAREDOBJ) + $(CC) -shared -o $(LIBUPB_SHARED) $(SHAREDOBJ) # Regenerating the auto-generated files in descriptor/. descriptor/descriptor.proto.pb: descriptor/descriptor.proto @@ -47,6 +63,10 @@ descriptor/descriptor.proto.pb: descriptor/descriptor.proto descriptorgen: descriptor/descriptor.proto.pb tools/upbc ./tools/upbc -i upb_file_descriptor_set -o descriptor/descriptor descriptor/descriptor.proto.pb +# Language extensions. +python: $(LIBUPB_PIC) + cd lang_ext/python && python setup.py build + # Tests test: tests/tests ./tests/tests @@ -136,5 +156,5 @@ benchmarks/b.parsetostruct_googlemessage2.proto2_compiled: \ benchmarks/google_messages.pb.cc -lprotobuf -lpthread -include deps -deps: $(SRC) $(HEADERS) gen-deps.sh Makefile +deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) @./gen-deps.sh $(SRC) diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h index 7096023..403d9df 100644 --- a/descriptor/descriptor.h +++ b/descriptor/descriptor.h @@ -3,9 +3,7 @@ #ifndef DESCRIPTOR_DESCRIPTOR_H #define DESCRIPTOR_DESCRIPTOR_H -#include - -#include +#include #ifdef __cplusplus extern "C" { @@ -14,45 +12,6 @@ extern "C" { struct google_protobuf_FileDescriptorSet; extern struct google_protobuf_FileDescriptorSet *upb_file_descriptor_set; -/* Enums. */ - -typedef enum google_protobuf_FieldOptions_CType { - GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1, - GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2 -} google_protobuf_FieldOptions_CType; - -typedef enum google_protobuf_FieldDescriptorProto_Type { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE = 1, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64 = 3, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64 = 4, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 = 5, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64 = 6, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32 = 7, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL = 8, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING = 9, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP = 10, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE = 11, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES = 12, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM = 14, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18 -} google_protobuf_FieldDescriptorProto_Type; - -typedef enum google_protobuf_FieldDescriptorProto_Label { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3 -} google_protobuf_FieldDescriptorProto_Label; - -typedef enum google_protobuf_FileOptions_OptimizeMode { - GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1, - GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2 -} google_protobuf_FileOptions_OptimizeMode; - /* Forward declarations of all message types. * So they can refer to each other in possibly-recursive ways. */ @@ -131,8 +90,8 @@ typedef struct google_protobuf_MethodOptions /* The message definitions themselves. */ struct google_protobuf_UninterpretedOption_NamePart { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -146,8 +105,8 @@ struct google_protobuf_UninterpretedOption_NamePart { UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption_NamePart) struct google_protobuf_DescriptorProto { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -171,8 +130,8 @@ struct google_protobuf_DescriptorProto { UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto) struct google_protobuf_EnumDescriptorProto { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -188,8 +147,8 @@ struct google_protobuf_EnumDescriptorProto { UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumDescriptorProto) struct google_protobuf_UninterpretedOption { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -211,8 +170,8 @@ struct google_protobuf_UninterpretedOption { UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption) struct google_protobuf_FileDescriptorProto { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -238,8 +197,8 @@ struct google_protobuf_FileDescriptorProto { UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorProto) struct google_protobuf_MethodDescriptorProto { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -257,8 +216,8 @@ struct google_protobuf_MethodDescriptorProto { UPB_DEFINE_MSG_ARRAY(google_protobuf_MethodDescriptorProto) struct google_protobuf_EnumValueOptions { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -270,8 +229,8 @@ struct google_protobuf_EnumValueOptions { UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueOptions) struct google_protobuf_EnumValueDescriptorProto { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -287,8 +246,8 @@ struct google_protobuf_EnumValueDescriptorProto { UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueDescriptorProto) struct google_protobuf_ServiceDescriptorProto { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -304,8 +263,8 @@ struct google_protobuf_ServiceDescriptorProto { UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceDescriptorProto) struct google_protobuf_FileDescriptorSet { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -317,8 +276,8 @@ struct google_protobuf_FileDescriptorSet { UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorSet) struct google_protobuf_DescriptorProto_ExtensionRange { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -332,8 +291,8 @@ struct google_protobuf_DescriptorProto_ExtensionRange { UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto_ExtensionRange) struct google_protobuf_FieldOptions { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -353,8 +312,8 @@ struct google_protobuf_FieldOptions { UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldOptions) struct google_protobuf_FileOptions { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -374,8 +333,8 @@ struct google_protobuf_FileOptions { UPB_DEFINE_MSG_ARRAY(google_protobuf_FileOptions) struct google_protobuf_MessageOptions { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -389,8 +348,8 @@ struct google_protobuf_MessageOptions { UPB_DEFINE_MSG_ARRAY(google_protobuf_MessageOptions) struct google_protobuf_EnumOptions { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -402,8 +361,8 @@ struct google_protobuf_EnumOptions { UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumOptions) struct google_protobuf_FieldDescriptorProto { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -429,8 +388,8 @@ struct google_protobuf_FieldDescriptorProto { UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldDescriptorProto) struct google_protobuf_ServiceOptions { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { @@ -442,8 +401,8 @@ struct google_protobuf_ServiceOptions { UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceOptions) struct google_protobuf_MethodOptions { + struct upb_mmhead mmhead; struct upb_msgdef *def; - void *gptr; union { uint8_t bytes[1]; struct { diff --git a/descriptor/descriptor_const.h b/descriptor/descriptor_const.h new file mode 100644 index 0000000..2423e97 --- /dev/null +++ b/descriptor/descriptor_const.h @@ -0,0 +1,53 @@ +/* This file was generated by upbc (the upb compiler). Do not edit. */ + +#ifndef DESCRIPTOR_DESCRIPTOR_C +#define DESCRIPTOR_DESCRIPTOR_C + +#ifdef __cplusplus +extern "C" { +#endif + +/* Enums. */ + +typedef enum google_protobuf_FieldOptions_CType { + GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1, + GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2 +} google_protobuf_FieldOptions_CType; + +typedef enum google_protobuf_FieldDescriptorProto_Type { + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE = 1, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT = 2, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64 = 3, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64 = 4, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 = 5, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64 = 6, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32 = 7, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL = 8, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING = 9, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP = 10, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE = 11, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES = 12, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM = 14, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18 +} google_protobuf_FieldDescriptorProto_Type; + +typedef enum google_protobuf_FieldDescriptorProto_Label { + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3 +} google_protobuf_FieldDescriptorProto_Label; + +typedef enum google_protobuf_FileOptions_OptimizeMode { + GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1, + GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2 +} google_protobuf_FileOptions_OptimizeMode; + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* DESCRIPTOR_DESCRIPTOR_C */ diff --git a/lang_ext/python/cext.c b/lang_ext/python/cext.c new file mode 100644 index 0000000..5336f2d --- /dev/null +++ b/lang_ext/python/cext.c @@ -0,0 +1,16 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + */ + +#include "cext.h" + +PyMODINIT_FUNC +initcext(void) +{ + PyObject *mod = Py_InitModule("upb.cext", NULL); + initdefinition(); + initpb(); +} diff --git a/lang_ext/python/cext.h b/lang_ext/python/cext.h new file mode 100644 index 0000000..e0e7832 --- /dev/null +++ b/lang_ext/python/cext.h @@ -0,0 +1,48 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + */ + +#ifndef UPB_PYTHON_CEXT_H_ +#define UPB_PYTHON_CEXT_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_HEAD + struct upb_context *context; + PyObject *created_defs; +} PyUpb_Context; + +typedef struct { + PyObject_HEAD + struct upb_msgdef *def; + PyUpb_Context *context; +} PyUpb_MsgDef; + +extern PyTypeObject PyUpb_MsgDefType; + +/* What format string should be passed to PyArg_ParseTuple to get just a raw + * string of bytes and a length. */ +#if PY_MAJOR_VERSION >= 3 +#define BYTES_FORMAT "y#" +#else +#define BYTES_FORMAT "s#" +#endif + +#define RETURN_BOOL(val) if(val) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; } + +extern PyMODINIT_FUNC initdefinition(void); +extern PyMODINIT_FUNC initpb(void); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/lang_ext/python/definition.c b/lang_ext/python/definition.c index 6a788ab..cc1089d 100644 --- a/lang_ext/python/definition.c +++ b/lang_ext/python/definition.c @@ -24,21 +24,29 @@ #include "upb_context.h" #include "upb_msg.h" -#if PY_MAJOR_VERSION > 3 -const char *bytes_format = "y#"; -#else -const char *bytes_format = "s#"; -#endif +static PyTypeObject PyUpb_ContextType; +static struct upb_strtable msgdefs; +static struct upb_strtable contexts; +struct msgtab_entry { + struct upb_strtable_entry e; + PyUpb_MsgDef *msgdef; +}; -/* upb.def.MessageDefinition **************************************************/ +struct contexttab_entry { + struct upb_strtable_entry e; + PyUpb_Context *context; +}; -typedef struct { - PyObject_HEAD - struct upb_msgdef *def; -} PyUpb_MsgDef; +#define CheckContext(obj) \ + (void*)obj; do { \ + if(!PyObject_TypeCheck(obj, &PyUpb_ContextType)) { \ + PyErr_SetString(PyExc_TypeError, "Must be a upb.Context"); \ + return NULL; \ + } \ + } while(0) -PyTypeObject PyUpb_MsgDefType; /* forward decl. */ +/* upb.def.MessageDefinition **************************************************/ /* Not implemented yet, but these methods will expose information about the * message definition (the upb_msgdef). */ @@ -46,18 +54,10 @@ static PyMethodDef msgdef_methods[] = { {NULL, NULL} }; -static PyObject *msgdef_new(struct upb_msgdef *m) -{ - PyUpb_MsgDef *md_obj = (void*)PyType_GenericAlloc(&PyUpb_MsgDefType, 0); - md_obj->def = m; - upb_msgdef_ref(md_obj->def); - return (void*)md_obj; -} - static void msgdef_dealloc(PyObject *obj) { PyUpb_MsgDef *md_obj = (void*)obj; - upb_msgdef_unref(md_obj->def); + Py_DECREF(md_obj->context); obj->ob_type->tp_free(obj); } @@ -106,27 +106,11 @@ PyTypeObject PyUpb_MsgDefType = { /* upb.Context ****************************************************************/ -typedef struct { - PyObject_HEAD - struct upb_context *context; - PyObject *created_defs; -} PyUpb_Context; - -static PyTypeObject PyUpb_ContextType; /* forward decl. */ - -#define CheckContext(obj) \ - (void*)obj; do { \ - if(!PyObject_TypeCheck(obj, &PyUpb_ContextType)) { \ - PyErr_SetString(PyExc_TypeError, "Must be a upb.Context"); \ - return NULL; \ - } \ - } while(0) - static PyObject *context_parsefds(PyObject *_context, PyObject *args) { PyUpb_Context *context = CheckContext(_context); struct upb_string str; - if(!PyArg_ParseTuple(args, bytes_format, &str.ptr, &str.byte_len)) + if(!PyArg_ParseTuple(args, BYTES_FORMAT, &str.ptr, &str.byte_len)) return NULL; str.byte_size = 0; /* We don't own that mem. */ @@ -138,35 +122,56 @@ static PyObject *context_parsefds(PyObject *_context, PyObject *args) Py_RETURN_NONE; } -static PyObject *get_or_create_def(PyUpb_Context *context, - struct upb_symtab_entry *e) +static PyObject *get_or_create_def(struct upb_symtab_entry *e) { - /* Check out internal dictionary of Python classes we have already created - * (keyed by the address of the obj we are referencing). */ -#if PY_MAJOR_VERSION > 3 - PyObject *str = PyBytes_FromStringAndSize((char*)&e->ref, sizeof(void*)); -#else - PyObject *str = PyString_FromStringAndSize((char*)&e->ref, sizeof(void*)); -#endif - /* Would use PyDict_GetItemStringAndSize() if it existed, but only - * PyDict_GetItemString() exists, and pointers could have NULL bytes. */ - PyObject *def = PyDict_GetItem(context->created_defs, str); - if(!def) { - switch(e->type) { - case UPB_SYM_MESSAGE: - def = msgdef_new(e->ref.msg); - break; - case UPB_SYM_ENUM: - case UPB_SYM_SERVICE: - case UPB_SYM_EXTENSION: - default: - def = NULL; - break; - } - if(def) PyDict_SetItem(context->created_defs, str, def); + switch(e->type) { + case UPB_SYM_MESSAGE: return (PyObject*)get_or_create_msgdef(e->ref.msg); + case UPB_SYM_ENUM: + case UPB_SYM_SERVICE: + case UPB_SYM_EXTENSION: + default: fprintf(stderr, "upb.pb, not implemented.\n"); abort(); return NULL; } - Py_DECREF(str); - return def; +} + +static PyUpb_Context *get_or_create_context(struct upb_context *context) +{ + PyUpb_Context *pycontext = NULL; + struct upb_string str = {.ptr = (char*)&context, .byte_len = sizeof(void*)}; + struct contexttab_entry *e = upb_strtable_lookup(&contexts, &str); + if(!e) { + pycontext = (void*)PyUpb_ContextType.tp_alloc(&PyUpb_ContextType, 0); + pycontext->context = context; + struct contexttab_entry new_e = { + .e = {.key = {.ptr = (char*)&pycontext->context, .byte_len = sizeof(void*)}}, + .context = pycontext + }; + upb_strtable_insert(&contexts, &new_e.e); + } else { + pycontext = e->context; + Py_INCREF(pycontext); + } + return pycontext; +} + +PyUpb_MsgDef *get_or_create_msgdef(struct upb_msgdef *def) +{ + PyUpb_MsgDef *pydef = NULL; + struct upb_string str = {.ptr = (char*)&def, .byte_len = sizeof(void*)}; + struct msgtab_entry *e = upb_strtable_lookup(&msgdefs, &str); + if(!e) { + pydef = (void*)PyUpb_MsgDefType.tp_alloc(&PyUpb_MsgDefType, 0); + pydef->def = def; + pydef->context = get_or_create_context(def->context); + struct msgtab_entry new_e = { + .e = {.key = {.ptr = (char*)&pydef->def, .byte_len = sizeof(void*)}}, + .msgdef = pydef + }; + upb_strtable_insert(&msgdefs, &new_e.e); + } else { + pydef = e->msgdef; + Py_INCREF(pydef); + } + return pydef; } static PyObject *context_lookup(PyObject *self, PyObject *args) @@ -179,7 +184,7 @@ static PyObject *context_lookup(PyObject *self, PyObject *args) struct upb_symtab_entry e; if(upb_context_lookup(context->context, &str, &e)) { - return get_or_create_def(context, &e); + return get_or_create_def(&e); } else { Py_RETURN_NONE; } @@ -197,12 +202,13 @@ static PyObject *context_resolve(PyObject *self, PyObject *args) struct upb_symtab_entry e; if(upb_context_resolve(context->context, &base, &str, &e)) { - return get_or_create_def(context, &e); + return get_or_create_def(&e); } else { Py_RETURN_NONE; } } +/* Callback for upb_context_enumerate below. */ static void add_string(void *udata, struct upb_symtab_entry *entry) { PyObject *list = udata; @@ -244,7 +250,11 @@ static PyObject *context_new(PyTypeObject *subtype, { PyUpb_Context *obj = (void*)subtype->tp_alloc(subtype, 0); obj->context = upb_context_new(); - obj->created_defs = PyDict_New(); + struct contexttab_entry e = { + .e = {.key = {.ptr = (char*)&obj->context, .byte_len = sizeof(void*)}}, + .context = obj + }; + upb_strtable_insert(&contexts, &e.e); return (void*)obj; } @@ -252,7 +262,9 @@ static void context_dealloc(PyObject *obj) { PyUpb_Context *c = (void*)obj; upb_context_unref(c->context); - Py_DECREF(c->created_defs); + /* TODO: once strtable supports delete. */ + //struct upb_string ptrstr = {.ptr = (char*)&c->context, .byte_len = sizeof(void*)}; + //upb_strtable_delete(&contexts, &ptrstr); obj->ob_type->tp_free(obj); } @@ -299,17 +311,25 @@ static PyTypeObject PyUpb_ContextType = { 0, /* tp_free */ }; -PyMethodDef methods[] = { +static PyMethodDef methods[] = { + {NULL, NULL} }; PyMODINIT_FUNC initdefinition(void) { if(PyType_Ready(&PyUpb_ContextType) < 0) return; - Py_INCREF(&PyUpb_ContextType); /* TODO: necessary? */ if(PyType_Ready(&PyUpb_MsgDefType) < 0) return; - Py_INCREF(&PyUpb_MsgDefType); /* TODO: necessary? */ - PyObject *mod = Py_InitModule("upb.definition", methods); + /* PyModule_AddObject steals a reference. These objects are statically + * allocated and must not be deleted, so we increment their refcount. */ + Py_INCREF(&PyUpb_ContextType); + Py_INCREF(&PyUpb_MsgDefType); + + PyObject *mod = Py_InitModule("upb.cext.definition", methods); PyModule_AddObject(mod, "Context", (PyObject*)&PyUpb_ContextType); + PyModule_AddObject(mod, "MessageDefinition", (PyObject*)&PyUpb_MsgDefType); + + upb_strtable_init(&contexts, 8, sizeof(struct contexttab_entry)); + upb_strtable_init(&msgdefs, 16, sizeof(struct msgtab_entry)); } diff --git a/lang_ext/python/definition.h b/lang_ext/python/definition.h index 8731b8a..040019d 100644 --- a/lang_ext/python/definition.h +++ b/lang_ext/python/definition.h @@ -16,16 +16,30 @@ extern "C" { #endif +typedef struct { + PyObject_HEAD + struct upb_context *context; +} PyUpb_Context; + typedef struct { PyObject_HEAD struct upb_msgdef *def; -} PyUpb_MessageDefinition; + PyUpb_Context *context; +} PyUpb_MsgDef; -extern PyTypeObject PyUpb_MessageDefinitionType; +extern PyTypeObject PyUpb_MsgDefType; /* What format string should be passed to PyArg_ParseTuple to get just a raw * string of bytes and a length. */ -extern const char *bytes_format; +#if PY_MAJOR_VERSION >= 3 +#define BYTES_FORMAT "y#" +#else +#define BYTES_FORMAT "s#" +#endif + +PyUpb_MsgDef *get_or_create_msgdef(struct upb_msgdef *def); + +#define RETURN_BOOL(val) if(val) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; } #ifdef __cplusplus } /* extern "C" */ diff --git a/lang_ext/python/pb.c b/lang_ext/python/pb.c new file mode 100644 index 0000000..6f016b4 --- /dev/null +++ b/lang_ext/python/pb.c @@ -0,0 +1,919 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file implements an interface to Python that is compatible + * (as much as possible) with proto1 (the first implementation of + * protocol buffers, which is only released internally to Google). + * + * The key interface we must support is ProtocolMessage. Each message + * type has its own Python class that supports the ProtocolMessage + * interface (obj.Clear(), obj.IsInitialized(), etc) as well as + * message-specific accessors (obj.foo(), obj.set_foo(), + * obj.clear_foo(), etc). + * + * accessors. We represent these message types as instances as + * upb.pb.MessageType objects. In other words, these instances + * are both instances of upb.pb.MessageType *and* classes of + * type MyProtoType. + */ + +#include +#include +#include "upb_mm.h" +#include "definition.h" + +/* Opcodes that describe all of the operations you can perform on a field of a + * protobuf from Python. For example, foo.has_bar() uses opcode OP_HAS. */ +typedef enum { + /* For non-repeated fields. */ + OP_HAS, + /* For non-repeated fields that are not submessages. */ + OP_SET, + /* For non-repeated message fields. */ + OP_MUTABLE, + + /* For repeated fields. */ + OP_SIZE, OP_LIST, OP_ADD, + + /* For all types of fields. */ + OP_GET, OP_CLEAR +} PyUpb_PbBoundFieldOpCode; + +const char *opcode_names[] = { + "OP_HAS", "OP_SET", "OP_MUTABLE", "OP_SIZE", "OP_LIST", "OP_ADD", "OP_GET", "OP_CLEAR" +}; + +/* Structures for the Python objects we define. */ +typedef struct { + PyObject_HEAD; + PyUpb_MsgDef *def; +} PyUpb_PbMsgCreator; + +typedef struct { + PyObject_HEAD; + struct upb_mm_ref ref; + PyUpb_MsgDef *def; +} PyUpb_PbMsg; + +typedef struct { + PyObject_HEAD; + PyUpb_PbMsg *msg; + struct upb_msg_fielddef *f; + PyUpb_PbBoundFieldOpCode code; +} PyUpb_PbBoundFieldOp; + +static PyTypeObject PyUpb_PbMsgCreatorType; +static PyTypeObject PyUpb_PbMsgType; +static PyTypeObject PyUpb_PbBoundFieldOpType; + +#define Check_MsgCreator(obj) \ + (void*)obj; do { \ + if(!PyObject_TypeCheck(obj, &PyUpb_PbMsgCreatorType)) { \ + PyErr_SetString(PyExc_TypeError, "must be a MessageCreator"); \ + return NULL; \ + } \ + } while(0) + +#define Check_Message(obj) \ + (void*)obj; do { \ + if(!PyObject_TypeCheck(obj, &PyUpb_PbMsgType)) { \ + PyErr_SetString(PyExc_TypeError, "must be a Message"); \ + return NULL; \ + } \ + } while(0) + +#define Check_BoundFieldOp(obj) \ + (void*)obj; do { \ + if(!PyObject_TypeCheck(obj, &PyUpb_PbBoundFieldOpType)) { \ + PyErr_SetString(PyExc_TypeError, "must be a BoundFieldOp"); \ + return NULL; \ + } \ + } while(0) + +#define EXPECT_NO_ARGS if(!PyArg_ParseTuple(args, "")) return NULL; +#define MMREF_TO_PYOBJ(mmref) (PyObject*)((char*)(mmref)-offsetof(PyUpb_PbMsg, ref)) + +static struct upb_mm_ref *NewPyRef(struct upb_mm_ref *fromref, + union upb_mmptr p, upb_mm_ptrtype type) +{ + (void)fromref; /* Don't care. */ + struct upb_mm_ref *ref = NULL; + switch(type) { + case UPB_MM_MSG_REF: { + PyUpb_PbMsg *msg = (void*)PyUpb_PbMsgType.tp_alloc(&PyUpb_PbMsgType, 0); + msg->def = get_or_create_msgdef(p.msg->def); /* gets a ref. */ + ref = &msg->ref; + break; + } + case UPB_MM_STR_REF: { + } + case UPB_MM_ARR_REF: { + } + default: assert(false); abort(); break; /* Shouldn't happen. */ + } + return ref; +} + +struct upb_mm pymm = {NewPyRef}; + +/* upb.pb.BoundFieldOp ********************************************************/ + +static PyObject *upb_to_py(union upb_value_ptr p, upb_field_type_t type) +{ + switch(type) { + default: + PyErr_SetString(PyExc_RuntimeError, "internal: unexpected type"); + return NULL; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE: + return PyFloat_FromDouble(*p._double); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT: + return PyFloat_FromDouble(*p._float); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64: + return PyLong_FromLongLong(*p.int64); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64: + return PyLong_FromUnsignedLongLong(*p.uint64); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM: +#if PY_MAJOR_VERSION >= 3 + return PyLong_FromLong(*p.int32); +#else + return PyInt_FromLong(*p.int32); +#endif + + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32: + return PyLong_FromLong(*p.uint32); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL: + RETURN_BOOL(*p._bool); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: + /* Py3k will distinguish between these two. */ + return PyString_FromStringAndSize((*p.str)->ptr, (*p.str)->byte_len); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: { + union upb_mmptr mmptr = upb_mmptr_read(p, UPB_MM_MSG_REF); + bool created; + struct upb_mm_ref *ref = upb_mm_getref(mmptr, UPB_MM_MSG_REF, &pymm, &created); + PyObject *obj = MMREF_TO_PYOBJ(ref); + if(!created) Py_INCREF(obj); + return obj; + } + } +} + +static long convert_to_long(PyObject *val, long lobound, long hibound, bool *ok) +{ + PyObject *o = PyNumber_Int(val); + if(!o) { + PyErr_SetString(PyExc_OverflowError, "could not convert to long"); + *ok = false; + return -1; + } + long longval = PyInt_AS_LONG(o); + if(longval > hibound || longval < lobound) { + PyErr_SetString(PyExc_OverflowError, "value outside type bounds"); + *ok = false; + return -1; + } + *ok = true; + return longval; +} + +static void set_upbscalarfield(union upb_value_ptr p, PyObject *val, + upb_field_type_t type) +{ + switch(type) { + default: + PyErr_SetString(PyExc_RuntimeError, "internal error"); + return; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE: { + PyObject *o = PyNumber_Float(val); + if(!o) { + PyErr_SetString(PyExc_ValueError, "could not convert to double"); + return; + } + *p._double = PyFloat_AS_DOUBLE(o); + return; + } + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT: { + PyObject *o = PyNumber_Float(val); + if(!o) { + PyErr_SetString(PyExc_ValueError, "could not convert to float"); + return; + } + *p._float = PyFloat_AS_DOUBLE(o); + return; + } + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64: { +#if LONG_MAX >= INT64_MAX + bool ok; + long longval = convert_to_long(val, INT64_MIN, INT64_MAX, &ok); + if(ok) *p.int32 = longval; + return; +#else + PyObject *o = PyNumber_Long(val); + if(!o) { + PyErr_SetString(PyExc_ValueError, "could not convert to int64"); + return; + } + *p.int64 = PyLong_AsLongLong(o); + return; +#endif + } + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64: { + PyObject *o = PyNumber_Long(val); + if(!o) { + PyErr_SetString(PyExc_ValueError, "could not convert to uint64"); + return; + } + *p.uint64 = PyLong_AsUnsignedLongLong(o); + return; + } + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM: { + bool ok; + long longval = convert_to_long(val, INT32_MIN, INT32_MAX, &ok); + if(ok) *p.int32 = longval; + return; + } + + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32: { +#if LONG_MAX >= UINT32_MAX + bool ok; + long longval = convert_to_long(val, 0, UINT32_MAX, &ok); + if(ok) *p.int32 = longval; + return; +#else + PyObject *o = PyNumber_Long(val); + if(!o) { + PyErr_SetString(PyExc_ValueError, "could not convert to uint32"); + return; + } + *p.uint32 = PyLong_AsUnsignedLong(o); + return; +#endif + } + + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL: + if(!PyBool_Check(val)) { + PyErr_SetString(PyExc_ValueError, "should be true or false"); + return; + } + if(val == Py_True) *p._bool = true; + else if(val == Py_False) *p._bool = false; + else PyErr_SetString(PyExc_RuntimeError, "not true or false?"); + return; + + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: { + size_t len = PyString_GET_SIZE(val); + upb_string_resize(*p.str, len); + memcpy((*p.str)->ptr, PyString_AS_STRING(val), len); + return; + } + } +} + +static bool check_py_type(PyObject *obj, upb_field_type_t type) +{ + /* TODO */ + return true; +} + +PyObject* fieldop_call(PyObject *callable, PyObject *args, PyObject *kw) +{ + PyUpb_PbBoundFieldOp *op = Check_BoundFieldOp(callable); + PyUpb_PbMsg *pymsg = op->msg; + struct upb_mm_ref *msgref = &(pymsg->ref); + struct upb_msg *msg = pymsg->ref.p.msg; + struct upb_msg_fielddef *f = op->f; + union upb_value_ptr p = upb_msg_getptr(msg, f); + switch(op->code) { + case OP_HAS: + /* obj.has_foo() */ + EXPECT_NO_ARGS; + RETURN_BOOL(upb_msg_isset(msg, f)); + case OP_SET: { + PyObject *val; + if(upb_isarray(f)) { + /* obj.set_repeatedfoo(i, val) */ + int i; + if(!PyArg_ParseTuple(args, "iO", &i, &val)) return NULL; + if(!upb_msg_isset(msg, f) || i >= (*p.arr)->len) { + PyErr_SetString(PyExc_IndexError, "assignment to invalid index"); + return NULL; + } + p = upb_array_getelementptr(*p.arr, i, f->type); + } else { + /* obj.set_foo(val) */ + if(!PyArg_ParseTuple(args, "O", &val)) return NULL; + } + set_upbscalarfield(p, val, f->type); + if(PyErr_Occurred()) return NULL; + Py_RETURN_NONE; + } + case OP_MUTABLE: { + /* obj.mutable_scalarmsg() */ + EXPECT_NO_ARGS; + bool created; + PyObject *obj = MMREF_TO_PYOBJ(upb_mm_getfieldref(msgref, f, &created)); + if(!created) Py_INCREF(obj); + return obj; + } + + /* For repeated fields. */ + case OP_SIZE: { + /* obj.repeatedfoo_size() */ + EXPECT_NO_ARGS; + long len = + upb_msg_isset(msg, f) ? (*upb_msg_getptr(msg, f).arr)->len : 0; + return PyInt_FromLong(len); + } + case OP_LIST: + /* obj.repeatedfoo_list() */ + case OP_ADD: { + /* Parse/Verify the args. */ + PyObject *val; + if(upb_issubmsg(f)) { + /* obj.add_submsgfoo() # returns the new submsg */ + EXPECT_NO_ARGS; + } else { + /* obj.add_scalarfoo(val) */ + if(!PyArg_ParseTuple(args, "O", &val)) return NULL; + if(!check_py_type(val, f->type)) return NULL; + } + + upb_arraylen_t len = (*p.arr)->len; + union upb_value_ptr elem_p = upb_array_getelementptr(*p.arr, len, f->type); + upb_array_resize(*p.arr, len + 1); + + if(upb_issubmsg(f)) { + /* string or submsg. */ + bool created; + upb_mm_ptrtype type = upb_elem_ptrtype(f); + union upb_mmptr mmptr = upb_mmptr_read(elem_p, type); + struct upb_mm_ref *valref = upb_mm_getref(mmptr, type, &pymm, &created); + assert(created); + PyObject *obj = MMREF_TO_PYOBJ(valref); + return obj; + } else { + set_upbscalarfield(elem_p, val, f->type); + if(PyErr_Occurred()) return NULL; + Py_RETURN_NONE; + } + } + + /* For all fields. */ + case OP_GET: { + if(upb_isarray(f)) { + /* obj.repeatedfoo(i) */ + int i; + if(!PyArg_ParseTuple(args, "i", &i)) return NULL; + if(!upb_msg_isset(msg, f) || i >= (*p.arr)->len) { + PyErr_SetString(PyExc_IndexError, "get from invalid index"); + return NULL; + } + p = upb_array_getelementptr(*p.arr, i, f->type); + } else { + /* obj.foo() */ + EXPECT_NO_ARGS; + } + return upb_to_py(p, f->type); + } + case OP_CLEAR: + /* obj.clear_foo() */ + EXPECT_NO_ARGS; + upb_mm_msgclear(msgref, f); + Py_RETURN_NONE; + + default: + PyErr_SetString(PyExc_RuntimeError, "invalid bound field opcode."); + return NULL; + } +} + +static void fieldop_dealloc(PyObject *obj) +{ + PyUpb_PbBoundFieldOp *op = (void*)obj; + Py_DECREF(op->msg); + obj->ob_type->tp_free(obj); +} + +static PyObject *fieldop_repr(PyObject *obj) +{ + PyUpb_PbBoundFieldOp *op = Check_BoundFieldOp(obj); + struct upb_string *name = op->msg->def->def->descriptor->name; + /* Need to get a NULL-terminated copy of name since PyString_FromFormat + * doesn't support ptr+len. */ + PyObject *nameobj = PyString_FromStringAndSize(name->ptr, name->byte_len); + struct google_protobuf_FieldDescriptorProto *fd = + upb_msg_field_descriptor(op->f, op->msg->def->def); + PyObject *fieldnameobj = PyString_FromStringAndSize(fd->name->ptr, fd->name->byte_len); + PyObject *ret = + PyString_FromFormat("", + PyString_AS_STRING(fieldnameobj), + opcode_names[op->code], PyString_AS_STRING(nameobj)); + Py_DECREF(nameobj); + Py_DECREF(fieldnameobj); + return ret; +} + +static PyTypeObject PyUpb_PbBoundFieldOpType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "upb.pb.BoundFieldOp", /* tp_name */ + sizeof(PyUpb_PbBoundFieldOp), /* tp_basicsize */ + 0, /* tp_itemsize */ + fieldop_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + fieldop_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + fieldop_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* Can't be created from Python. */ /* tp_new */ + 0, /* tp_free */ +}; + +/* upb.pb.Message *************************************************************/ + +#define Check_SameProtoType(obj1, obj2) \ + do { \ + if(self->ob_type != other->ob_type) { \ + PyErr_SetString(PyExc_TypeError, "other must be of the same type"); \ + return NULL; \ + } \ + } while(0); + +static PyObject *msg_clear(PyObject *self, PyObject *args) +{ + (void)args; + PyUpb_PbMsg *msg = Check_Message(self); + upb_mm_msgclear_all(&msg->ref); + Py_RETURN_NONE; +} + +//static PyObject *msg_encode(PyObject *self, PyObject *args) +//{ +// (void)args; +// PyUpb_PbMsg *msg = Check_Message(self); +// struct upb_msgsizes *sizes = upb_msgsizes_new(); +// struct upb_msg *upb_msg = msg->ref.p.msg; +// upb_msgsizes_read(sizes, upb_msg); +// +// size_t size = upb_msgsizes_totalsize(sizes); +// PyObject *str = PyString_FromStringAndSize(NULL, size); +// if(!str) return NULL; +// char *strbuf = PyString_AS_STRING(str); +// +// bool success = upb_msg_serialize_all(upb_msg, sizes, strbuf); +// upb_msgsizes_free(sizes); +// if(success) { +// return str; +// } else { +// /* TODO: better error than TypeError. */ +// PyErr_SetString(PyExc_TypeError, "Error serializing protobuf."); +// return NULL; +// } +//} + +static PyObject *msg_equals(PyObject *self, PyObject *other) +{ + PyUpb_PbMsg *msg1 = Check_Message(self); + PyUpb_PbMsg *msg2 = Check_Message(other); + Check_SameProtoType(msg1, msg2); + RETURN_BOOL(upb_msg_eql(msg1->ref.p.msg, msg2->ref.p.msg, true)) +} + +static PyObject *msg_isinitialized(PyObject *self, PyObject *args) +{ + (void)args; + PyUpb_PbMsg *msg = Check_Message(self); + RETURN_BOOL(upb_msg_all_required_fields_set(msg->ref.p.msg)) +} + +static PyObject *msg_parsefromstring(PyObject *self, PyObject *args) +{ + PyUpb_PbMsg *msg = Check_Message(self); + char *strdata; + size_t strlen; + if(!PyArg_ParseTuple(args, BYTES_FORMAT, &strdata, &strlen)) + return NULL; + + if(upb_msg_parsestr(msg->ref.p.msg, strdata, strlen) != UPB_STATUS_OK) { + /* TODO: better error than TypeError. */ + PyErr_SetString(PyExc_TypeError, "error parsing protobuf"); + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject *msg_mergefromstring(PyObject *self, PyObject *args) +{ + PyUpb_PbMsg *msg = Check_Message(self); + char *strdata; + size_t strlen; + if(!PyArg_ParseTuple(args, BYTES_FORMAT, &strdata, &strlen)) + return NULL; + + if(upb_msg_parsestr(msg->ref.p.msg, strdata, strlen) != UPB_STATUS_OK) { + /* TODO: better error than TypeError. */ + PyErr_SetString(PyExc_TypeError, "error parsing protobuf"); + return NULL; + } + Py_RETURN_NONE; +} + +/* Commented-out methods are TODO. */ +static PyMethodDef msg_methods[] = { + {"Clear", msg_clear, METH_NOARGS, + "Erases all data from the ProtocolMessage, reseting fields to their defaults" + }, + //{"CopyFrom", msg_copyfrom, METH_O, + // "Copies data from another ProtocolMessage." + //}, + //{"Encode", msg_encode, METH_NOARGS, + // "Returns a string representing the ProtocolMessage." + //}, + {"Equals", msg_equals, METH_O, + "Returns true if the given ProtocolMessage has the same type and value." + }, + {"IsInitialized", msg_isinitialized, METH_NOARGS, + "Returns true iff all required fields have been set." + }, + //{"Merge", msg_merge, METH_O, + // "Merges data from the given Decoder." + //}, + //{"MergeFrom", msg_mergefrom, METH_O, + // "Merges data from another ProtocolMessage of the same type." + //}, + {"MergeFromString", msg_mergefromstring, METH_VARARGS, + "Merges data from the given string. Raises an exception if this does not " + "result in the ProtocolMessage being initialized." + }, + //{"Output", msg_output, METH_O, + // "Writes the ProtocolMessage to the given encoder." + //}, + //{"OutputUnchecked", msg_output, METH_O, + // "Writes the ProtocolMessage to the given encoder, without checking " + // "initialization" + //}, + //{"Parse", msg_parse, METH_O, + // "Parses data from the given Decoder." + //}, + //{"ParseASCII", msg_parseascii, METH_VARARGS, + // "Parses a string generated by ToASCII. Raises a ValueError if unknown " + // "fields are encountered." + //}, + //{"ParseASCIIIgnoreUnknown", msg_parseascii, METH_VARARGS, + // "Parses a string generated by ToASCII. Ignores unknown fields." + //}, + {"ParseFromString", msg_parsefromstring, METH_VARARGS, + "Parses data from the given string. Raises an exception if this does not " + "result in the ProtocolMessage being initialized." + }, + //{"ToASCII", msg_toascii, METH_NOARGS, + // "Returns the ProtocolMessage as a human-readable ASCII string." + //}, + //{"ToCompactASCII", msg_tocompactascii, METH_NOARGS, + // "Returns the ProtocolMessage as a human-readable ASCII string that uses " + // "tag numbers instead of field names." + //}, + //{"ToShortASCII", msg_toshortascii, METH_NOARGS, + // "Returns the ProtocolMessage as a human-readable ASCII string, all on one + // "line." + //}, + //{"TryMerge", msg_trymerge, METH_O, + // "Merges data from the given decoder. + //} + {NULL, NULL} +}; + +static bool starts_with(struct upb_string *str, struct upb_string *prefix, + struct upb_string *out_str) +{ + if(str->byte_len < prefix->byte_len) return false; + if(memcmp(str->ptr, prefix->ptr, prefix->byte_len) == 0) { + out_str->ptr = str->ptr + prefix->byte_len; + out_str->byte_len = str->byte_len - prefix->byte_len; + return true; + } else { + return false; + } +} + +static bool ends_with(struct upb_string *str, struct upb_string *suffix, + struct upb_string *out_str) +{ + if(str->byte_len < suffix->byte_len) return false; + if(memcmp(str->ptr + str->byte_len - suffix->byte_len, suffix->ptr, suffix->byte_len) == 0) { + out_str->ptr = str->ptr; + out_str->byte_len = str->byte_len - suffix->byte_len; + return true; + } else { + return false; + } +} + +PyObject *PyUpb_NewPbBoundFieldOp(PyUpb_PbMsg *msgobj, struct upb_msg_fielddef *f, + PyUpb_PbBoundFieldOpCode code) +{ + /* Type check that this operation on a field of this type makes sense. */ + if(upb_isarray(f)) { + switch(code) { + case OP_HAS: + case OP_SET: + case OP_MUTABLE: + return NULL; + default: break; + } + } else { + if(upb_issubmsg(f)) { + switch(code) { + case OP_SET: + case OP_SIZE: + case OP_LIST: + case OP_ADD: + return NULL; + default: break; + } + } else { + switch(code) { + case OP_MUTABLE: + case OP_SIZE: + case OP_LIST: + case OP_ADD: + return NULL; + default: break; + } + } + } + + PyUpb_PbBoundFieldOp *op = + (void*)PyUpb_PbBoundFieldOpType.tp_alloc(&PyUpb_PbBoundFieldOpType, 0); + op->msg = msgobj; + op->f = f; + op->code = code; + Py_INCREF(op->msg); + return (PyObject*)op; +} + +PyObject* msg_getattro(PyObject *obj, PyObject *attr_name) +{ + /* Each protobuf field results in a set of four methods for a scalar or five + * methods for an array. To avoid putting 4f entries in our type dict, we + * dynamically scan the method to see if it is of these forms, and if so, + * look it up in the hash table that upb already keeps. + * + * If these repeated comparisons showed up as being a hot spot in a profile, + * there are several ways this dispatch could be optimized. */ + static struct upb_string set = {.ptr = "set_", .byte_len = 4}; + static struct upb_string has = {.ptr = "has_", .byte_len = 4}; + static struct upb_string clear = {.ptr = "clear_", .byte_len = 6}; + static struct upb_string size = {.ptr = "_size", .byte_len = 5}; + static struct upb_string mutable = {.ptr = "mutable_", .byte_len = 8}; + static struct upb_string add = {.ptr = "add_", .byte_len = 4}; + static struct upb_string list = {.ptr = "_list", .byte_len = 5}; + + struct upb_string str; + Py_ssize_t len; + PyString_AsStringAndSize(attr_name, &str.ptr, &len); + if(len > UINT32_MAX) { + PyErr_SetString(PyExc_TypeError, + "Wow, that's a long attribute name you've got there."); + return NULL; + } + str.byte_len = (uint32_t)len; + PyUpb_PbMsg *msgobj = Check_Message(obj); + struct upb_msgdef *def = msgobj->ref.p.msg->def; + + /* This can be a field reference iff the first letter is lowercase, because + * generic methods (eg. IsInitialized()) all start with uppercase. */ + if(islower(str.ptr[0])) { + PyUpb_PbBoundFieldOpCode opcode; + struct upb_string field_name; + if(starts_with(&str, &has, &field_name)) + opcode = OP_HAS; + else if(starts_with(&str, &set, &field_name)) + opcode = OP_SET; + else if(starts_with(&str, &mutable, &field_name)) + opcode = OP_MUTABLE; + else if(ends_with(&str, &size, &field_name)) + opcode = OP_SIZE; + else if(ends_with(&str, &list, &field_name)) + opcode = OP_LIST; + else if(starts_with(&str, &add, &field_name)) + opcode = OP_ADD; + else if(starts_with(&str, &clear, &field_name)) + opcode = OP_CLEAR; + else { + /* Could be a plain field reference (eg. obj.field(i)). */ + opcode = OP_GET; + field_name = str; + } + struct upb_msg_fielddef *f = upb_msg_fieldbyname(def, &field_name); + if(f) { + PyObject *op = PyUpb_NewPbBoundFieldOp(msgobj, f, opcode); + if(op) return op; + } + } + + /* Fall back on regular attribute lookup. */ + return PyObject_GenericGetAttr(obj, attr_name); +} + +static void msg_dealloc(PyObject *obj) +{ + PyUpb_PbMsg *msg = (void*)obj; + upb_mm_release(&msg->ref); + Py_DECREF(msg->def); + obj->ob_type->tp_free(obj); +} + +static PyTypeObject PyUpb_PbMsgType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "upb.pb.Message", /* tp_name */ + sizeof(PyUpb_PbMsg), /* tp_basicsize */ + 0, /* tp_itemsize */ + msg_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr (TODO) */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + msg_getattro, /* tp_getattro */ + 0, /* Not allowed. */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse (TODO) */ + 0, /* tp_clear (TODO) */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + msg_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* Can't be created from Python. */ /* tp_new */ + 0, /* tp_free */ +}; + +/* upb.pb.MessageCreator ******************************************************/ + +static PyObject *creator_call(PyObject *callable, PyObject *args, PyObject *kw) +{ + PyUpb_PbMsgCreator *creator = Check_MsgCreator(callable); + return MMREF_TO_PYOBJ(upb_mm_newmsg_ref(creator->def->def, &pymm)); +} + +static PyObject *creator_repr(PyObject *obj) +{ + PyUpb_PbMsgCreator *creator = Check_MsgCreator(obj); + struct upb_string *name = creator->def->def->descriptor->name; + /* Need to get a NULL-terminated copy of name since PyString_FromFormat + * doesn't support ptr+len. */ + PyObject *nameobj = PyString_FromStringAndSize(name->ptr, name->byte_len); + PyObject *ret = PyString_FromFormat("", + PyString_AS_STRING(nameobj)); + Py_DECREF(nameobj); + return ret; +} + +static void creator_dealloc(PyObject *obj) +{ + PyUpb_PbMsgCreator *creator = (void*)obj; + Py_DECREF(creator->def); + obj->ob_type->tp_free(obj); +} + +static PyObject *creator_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyUpb_PbMsgCreator *creator = (void*)type->tp_alloc(type, 0); + PyUpb_MsgDef *def; + if(!PyArg_ParseTuple(args, "O!", &PyUpb_MsgDefType, &def)) return NULL; + creator->def = def; + Py_INCREF(creator->def); + return (PyObject*)creator; +} + +static PyTypeObject PyUpb_PbMsgCreatorType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "upb.pb.MessageCreator", /* tp_name */ + sizeof(PyUpb_PbMsgCreator), /* tp_basicsize */ + 0, /* tp_itemsize */ + creator_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + creator_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + creator_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + creator_new, /* tp_new */ + 0, /* tp_free */ +}; + +/* upb.pb module **************************************************************/ + +static PyMethodDef methods[] = { + {NULL, NULL} +}; + +PyMODINIT_FUNC +initpb(void) +{ + if(PyType_Ready(&PyUpb_PbBoundFieldOpType) < 0) return; + if(PyType_Ready(&PyUpb_PbMsgType) < 0) return; + if(PyType_Ready(&PyUpb_PbMsgCreatorType) < 0) return; + + /* PyModule_AddObject steals a reference. These objects are statically + * allocated and must not be deleted, so we increment their refcount. */ + Py_INCREF(&PyUpb_PbBoundFieldOpType); + Py_INCREF(&PyUpb_PbMsgType); + Py_INCREF(&PyUpb_PbMsgCreatorType); + + PyObject *mod = Py_InitModule("upb.cext.pb", methods); + PyModule_AddObject(mod, "BoundFieldOp", (PyObject*)&PyUpb_PbBoundFieldOpType); + PyModule_AddObject(mod, "Message", (PyObject*)&PyUpb_PbMsgType); + PyModule_AddObject(mod, "MessageCreator", (PyObject*)&PyUpb_PbMsgCreatorType); +} diff --git a/lang_ext/python/setup.py b/lang_ext/python/setup.py index 66862f3..53cbef1 100644 --- a/lang_ext/python/setup.py +++ b/lang_ext/python/setup.py @@ -1,11 +1,15 @@ from distutils.core import setup, Extension + setup(name='upb', version='0.1', - ext_modules=[Extension('upb.definition', ['definition.c'], - include_dirs=['../../src', '../../descriptor'], - define_macros=[("UPB_USE_PTHREADS", 1), - ("UPB_UNALIGNED_READS_OK", 1)], - library_dirs=['../../src'], - libraries=['upb_pic'] - )], + ext_modules=[ + Extension('upb.cext', ['definition.c', 'pb.c', 'cext.c'], + include_dirs=['../../src', '../../descriptor'], + define_macros=[("UPB_USE_PTHREADS", 1), + ("UPB_UNALIGNED_READS_OK", 1)], + library_dirs=['../../src'], + libraries=['upb_pic'], + ), + ], + packages=['upb'] ) diff --git a/src/upb.h b/src/upb.h index 27bf5fc..af026f5 100644 --- a/src/upb.h +++ b/src/upb.h @@ -12,7 +12,7 @@ #include #include #include /* for size_t. */ -#include "upb_string.h" +#include "descriptor_const.h" #ifdef __cplusplus extern "C" { @@ -23,6 +23,9 @@ extern "C" { #define INLINE static inline #endif +#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) +#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) + /* The maximum that any submessages can be nested. Matches proto2's limit. */ #define UPB_MAX_NESTING 64 @@ -55,12 +58,22 @@ typedef uint8_t upb_wire_type_t; * errors, and we use it to represent exceptional circumstances. */ typedef uint8_t upb_field_type_t; +INLINE bool upb_issubmsgtype(upb_field_type_t type) { + return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP || + type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE; +} + +INLINE bool upb_isstringtype(upb_field_type_t type) { + return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || + type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES; +} + /* Information about a given value type (upb_field_type_t). */ struct upb_type_info { uint8_t align; uint8_t size; upb_wire_type_t expected_wire_type; - struct upb_string ctype; + char *ctype; }; /* Contains information for all .proto types. Indexed by upb_field_type_t. */ @@ -90,6 +103,10 @@ struct upb_tag { /* Polymorphic values of .proto types *****************************************/ +struct upb_string; +struct upb_array; +struct upb_msg; + /* A single .proto value. The owner must have an out-of-band way of knowing * the type, so that it knows which union member to use. */ union upb_value { @@ -121,15 +138,83 @@ union upb_value_ptr { void *_void; }; +/* Unfortunately there is no way to define this so that it can be used as a + * generic expression, a la: + * foo(UPB_VALUE_ADDROF(bar)); + * ...you have to use it as the initializer of a upb_value_ptr: + * union upb_value_ptr p = UPB_VALUE_ADDROF(bar); + * foo(p); + */ +#define UPB_VALUE_ADDROF(val) {(void*)&val._double} + /* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need * to know the field type to perform this operation, because we need to know * how much memory to copy. */ -INLINE union upb_value upb_deref(union upb_value_ptr ptr, upb_field_type_t t) { +INLINE union upb_value upb_value_read(union upb_value_ptr ptr, + upb_field_type_t ft) { union upb_value val; - memcpy(&val, ptr._void, upb_type_info[t].size); +#define CASE(t, member_name) \ + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ + val.member_name = *ptr.member_name; \ + break; + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } +#undef CASE return val; } +/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need + * to know the field type to perform this operation, because we need to know + * how much memory to copy. */ +INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val, + upb_field_type_t ft) { +#define CASE(t, member_name) \ + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ + *ptr.member_name = val.member_name; \ + break; + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } +#undef CASE +} + union upb_symbol_ref { struct upb_msgdef *msg; struct upb_enum *_enum; diff --git a/src/upb_array.h b/src/upb_array.h index 370f6eb..732c4aa 100644 --- a/src/upb_array.h +++ b/src/upb_array.h @@ -23,7 +23,7 @@ #define UPB_ARRAY_H_ #include -#include "upb.h" +#include "upb_msg.h" /* Because we use upb_msg_fielddef */ #ifdef __cplusplus extern "C" { @@ -31,41 +31,6 @@ extern "C" { struct upb_string; -/* upb_arrays can be at most 2**32 elements long. */ -typedef uint32_t upb_arraylen_t; - -/* Represents an array (a repeated field) of any type. The interpretation of - * the data in the array depends on the type. */ -struct upb_array { - union upb_value_ptr elements; - upb_arraylen_t len; /* Number of elements in "elements". */ - upb_arraylen_t size; /* Memory we own (0 if by reference). */ - void *gptr; -}; - -INLINE void upb_array_init(struct upb_array *arr) -{ - arr->elements._void = NULL; - arr->len = 0; - arr->size = 0; -} - -INLINE void upb_array_uninit(struct upb_array *arr) -{ - if(arr->size) free(arr->elements._void); -} - -INLINE struct upb_array *upb_array_new(void) { - struct upb_array *arr = malloc(sizeof(*arr)); - upb_array_init(arr); - return arr; -} - -INLINE void upb_array_free(struct upb_array *arr) { - upb_array_uninit(arr); - free(arr); -} - /* Returns a pointer to an array element. Does not perform a bounds check! */ INLINE union upb_value_ptr upb_array_getelementptr( struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type) @@ -75,10 +40,17 @@ INLINE union upb_value_ptr upb_array_getelementptr( return ptr; } -INLINE union upb_value upb_array_getelement( - struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type) +/* Allocation/Deallocation/Resizing. ******************************************/ + +INLINE struct upb_array *upb_array_new(struct upb_msg_fielddef *f) { - return upb_deref(upb_array_getelementptr(arr, n, type), type); + struct upb_array *arr = malloc(sizeof(*arr)); + upb_mmhead_init(&arr->mmhead); + arr->elements._void = NULL; + arr->len = 0; + arr->size = 0; + arr->fielddef = f; + return arr; } INLINE uint32_t upb_round_up_to_pow2(uint32_t v) @@ -94,13 +66,10 @@ INLINE uint32_t upb_round_up_to_pow2(uint32_t v) return v; } -/* Resizes array to be "len" elements long and ensures we have write access - * to the array (reallocating if necessary). Returns true iff we were - * referencing memory for the array and dropped the reference. */ -INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen, - upb_field_type_t type) +/* Resizes array to be "len" elements long (reallocating if necessary). */ +INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen) { - size_t type_size = upb_type_info[type].size; + size_t type_size = upb_type_info[arr->fielddef->type].size; bool dropped = false; bool ref = arr->size == 0; /* Ref'ing external memory. */ void *data = arr->elements._void; @@ -114,39 +83,11 @@ INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen, memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size); dropped = true; } + /* TODO: fill with defaults. */ arr->len = newlen; return dropped; } -/* These are all overlays on upb_array, pointers between them can be cast. */ -#define UPB_DEFINE_ARRAY_TYPE(name, type) \ - struct name ## _array { \ - struct upb_fielddef *f; \ - void *gptr; \ - type *elements; \ - upb_arraylen_t len; \ - upb_arraylen_t size; \ - }; - -UPB_DEFINE_ARRAY_TYPE(upb_double, double) -UPB_DEFINE_ARRAY_TYPE(upb_float, float) -UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t) -UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t) -UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t) -UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t) -UPB_DEFINE_ARRAY_TYPE(upb_bool, bool) -UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*) -UPB_DEFINE_ARRAY_TYPE(upb_msg, void*) - -/* Defines an array of a specific message type (an overlay of upb_array). */ -#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array -#define UPB_DEFINE_MSG_ARRAY(msg_type) \ - UPB_MSG_ARRAY(msg_type) { \ - msg_type **elements; \ - upb_arraylen_t len; \ - upb_arraylen_t size; \ - }; - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/upb_context.c b/src/upb_context.c index 12ad8c7..0d64c3e 100644 --- a/src/upb_context.c +++ b/src/upb_context.c @@ -10,6 +10,7 @@ #include "upb_context.h" #include "upb_enum.h" #include "upb_msg.h" +#include "upb_mm.h" /* Search for a character in a string, in reverse. */ static int my_memrchr(char *data, char c, size_t len) @@ -66,7 +67,7 @@ static void free_context(struct upb_context *c) { free_symtab(&c->symtab); for(size_t i = 0; i < c->fds_len; i++) - upb_msg_free((struct upb_msg*)c->fds[i]); + upb_msg_unref((struct upb_msg*)c->fds[i]); free_symtab(&c->psymtab); free(c->fds); } @@ -77,9 +78,9 @@ void upb_context_unref(struct upb_context *c) upb_rwlock_wrlock(&c->lock); free_context(c); upb_rwlock_unlock(&c->lock); + free(c); + upb_rwlock_destroy(&c->lock); } - free(c); - upb_rwlock_destroy(&c->lock); } bool upb_context_lookup(struct upb_context *c, struct upb_string *symbol, @@ -325,10 +326,9 @@ bool upb_context_addfds(struct upb_context *c, } bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) { - google_protobuf_FileDescriptorSet *fds = - (google_protobuf_FileDescriptorSet*)upb_msg_parsenew(c->fds_msg, fds_str); - if(!fds) return false; - if(!upb_context_addfds(c, fds)) return false; + struct upb_msg *fds = upb_msg_new(c->fds_msg); + if(upb_msg_parsestr(fds, fds_str->ptr, fds_str->byte_len) != UPB_STATUS_OK) return false; + if(!upb_context_addfds(c, (google_protobuf_FileDescriptorSet*)fds)) return false; { /* We own fds now, need to keep a ref so we can free it later. */ @@ -337,7 +337,7 @@ bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) { c->fds_size *= 2; c->fds = realloc(c->fds, c->fds_size); } - c->fds[c->fds_len++] = fds; + c->fds[c->fds_len++] = (google_protobuf_FileDescriptorSet*)fds; upb_rwlock_unlock(&c->lock); } return true; diff --git a/src/upb_enum.h b/src/upb_enum.h index e43a203..9acc075 100644 --- a/src/upb_enum.h +++ b/src/upb_enum.h @@ -33,15 +33,6 @@ struct upb_enum_iton_entry { struct upb_string *string; }; -INLINE void upb_enum_ref(struct upb_enum *e) { - if(upb_atomic_ref(&e->refcount)) upb_context_ref(e->context); -} - -INLINE void upb_enum_unref(struct upb_enum *e) { - if(upb_atomic_unref(&e->refcount)) upb_context_unref(e->context); -} - - /* Initializes and frees an enum, respectively. Caller retains ownership of * ed, but it must outlive e. */ void upb_enum_init(struct upb_enum *e, diff --git a/src/upb_inlinedefs.c b/src/upb_inlinedefs.c index dae5c01..7a55e06 100644 --- a/src/upb_inlinedefs.c +++ b/src/upb_inlinedefs.c @@ -15,6 +15,7 @@ #include "upb_array.h" #include "upb_context.h" #include "upb_enum.h" +#include "upb_mm.h" #include "upb_msg.h" #include "upb_parse.h" #include "upb_serialize.h" diff --git a/src/upb_mm.c b/src/upb_mm.c new file mode 100644 index 0000000..853d572 --- /dev/null +++ b/src/upb_mm.c @@ -0,0 +1,208 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_mm.h" +#include "upb_string.h" +#include "upb_array.h" +#include "upb_msg.h" + +void upb_msg_destroy(struct upb_msg *msg) { + uint32_t i; + for(i = 0; i < msg->def->num_fields; i++) { + struct upb_msg_fielddef *f = &msg->def->fields[i]; + if(!upb_msg_isset(msg, f) || !upb_field_ismm(f)) continue; + upb_mm_ptrtype type = upb_field_ptrtype(f); + union upb_mmptr mmptr = upb_mmptr_read(upb_msg_getptr(msg, f), type); + upb_mm_unref(mmptr, type); + } + free(msg); +} + +void upb_array_destroy(struct upb_array *arr) +{ + if(upb_elem_ismm(arr->fielddef)) { + upb_arraylen_t i; + /* Unref elements. */ + for(i = 0; i < arr->len; i++) { + union upb_value_ptr p = upb_array_getelementptr(arr, i, arr->fielddef->type); + upb_mm_ptrtype type = upb_elem_ptrtype(arr->fielddef); + union upb_mmptr mmptr = upb_mmptr_read(p, type); + upb_mm_unref(mmptr, type); + } + } + if(arr->size != 0) free(arr->elements._void); + free(arr); +} + +static union upb_mmptr upb_mm_newptr(upb_mm_ptrtype type, + struct upb_msg_fielddef *f) +{ + union upb_mmptr p = {NULL}; + switch(type) { + case UPB_MM_MSG_REF: p.msg = upb_msg_new(f->ref.msg); + case UPB_MM_STR_REF: p.str = upb_string_new(); + case UPB_MM_ARR_REF: p.arr = upb_array_new(f); + default: assert(false); break; + } + return p; +} + +static struct upb_mm_ref *find_or_create_ref(struct upb_mm_ref *fromref, + struct upb_mm *mm, + union upb_mmptr p, upb_mm_ptrtype type, + bool *created) +{ + struct upb_mmhead *head = upb_mmhead_addr(p, type); + struct upb_mm_ref **ref = &head->refs; + while(*ref && (*ref)->mm <= mm) { + if((*ref)->mm == mm) { + return *ref; + *created = false; + } + ref = &((*ref)->next); + } + *created = true; + struct upb_mm_ref *newref = mm->newref_cb(fromref, p, type); + newref->p = p; + newref->type = type; + newref->mm = mm; + newref->next = *ref; + *ref = newref; + return newref; +} + +struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type, + struct upb_mm *mm, bool *created) +{ + return find_or_create_ref(NULL, mm, p, type, created); +} + +struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm) +{ + struct upb_msg *msg = upb_msg_new(def); + union upb_mmptr mmptr = {.msg = msg}; + bool created; + struct upb_mm_ref *ref = find_or_create_ref(NULL, mm, mmptr, UPB_MM_MSG_REF, &created); + upb_mm_unref(mmptr, UPB_MM_MSG_REF); /* Shouldn't have any counted refs. */ + assert(created); + return ref; +} + +struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref, + struct upb_msg_fielddef *f, + bool *refcreated) +{ + assert(upb_field_ismm(f)); + upb_mm_ptrtype ptrtype = upb_field_ptrtype(f); + struct upb_msg *msg = msgref->p.msg; + union upb_mmptr val; + union upb_value_ptr p = upb_msg_getptr(msg, f); + + /* Create the upb value if it doesn't already exist. */ + if(!upb_msg_isset(msg, f)) { + upb_msg_set(msg, f); + val = upb_mm_newptr(ptrtype, f); + upb_mmptr_write(p, val, ptrtype); + } else { + val = upb_mmptr_read(p, ptrtype); + } + + return find_or_create_ref(msgref, msgref->mm, val, ptrtype, refcreated); +} + +struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i, + bool *refcreated) +{ + struct upb_array *arr = arrref->p.arr; + struct upb_msg_fielddef *f = arr->fielddef; + assert(upb_elem_ismm(f)); + assert(i < arr->len); + union upb_value_ptr p = upb_array_getelementptr(arr, i, f->type); + upb_mm_ptrtype type = upb_elem_ptrtype(f); + union upb_mmptr val = upb_mmptr_read(p, type); + return find_or_create_ref(arrref, arrref->mm, val, type, refcreated); +} + +void upb_mm_release(struct upb_mm_ref *ref) +{ + struct upb_mm_ref **ref_head = (void*)ref->p.msg; + struct upb_mm_ref **ref_elem = ref_head; + struct upb_mm *mm = ref->mm; + while(true) { + assert(*ref_elem); /* Client asserts r->mm is in the list. */ + if((*ref_elem)->mm == mm) { + *ref_elem = (*ref_elem)->next; /* Remove from the list. */ + break; + } + } + + if(upb_mmhead_norefs(&ref->p.msg->mmhead)) { + /* Destroy the dynamic object. */ + switch(ref->type) { + case UPB_MM_MSG_REF: + upb_msg_destroy(ref->p.msg); + break; + case UPB_MM_ARR_REF: + upb_array_destroy(ref->p.arr); + break; + case UPB_MM_STR_REF: + upb_string_destroy(ref->p.str); + break; + default: assert(false); break; + } + } +} + +void upb_mm_msg_set(struct upb_mm_ref *from_msg_ref, struct upb_mm_ref *to_ref, + struct upb_msg_fielddef *f) +{ + assert(upb_field_ismm(f)); + union upb_mmptr fromval = from_msg_ref->p; + union upb_mmptr toval = to_ref->p; + union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f); + upb_mm_ptrtype type = upb_field_ptrtype(f); + if(upb_msg_isset(fromval.msg, f)) { + union upb_mmptr existingval = upb_mmptr_read(field_p, type); + if(existingval.msg == toval.msg) + return; /* Setting to its existing value, do nothing. */ + upb_mm_unref(existingval, type); + } + upb_msg_set(fromval.msg, f); + upb_mmptr_write(field_p, toval, type); + upb_mm_ref(toval, type); +} + +void upb_mm_msgclear(struct upb_mm_ref *from_msg_ref, struct upb_msg_fielddef *f) +{ + assert(upb_field_ismm(f)); + union upb_mmptr fromval = from_msg_ref->p; + upb_mm_ptrtype type = upb_field_ptrtype(f); + if(upb_msg_isset(fromval.msg, f)) { + union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f); + union upb_mmptr existingval = upb_mmptr_read(field_p, type); + upb_msg_unset(fromval.msg, f); + upb_mm_unref(existingval, type); + } +} + +void upb_mm_msgclear_all(struct upb_mm_ref *from) +{ + struct upb_msgdef *def = from->p.msg->def; + for(uint32_t i = 0; i < def->num_fields; i++) { + struct upb_msg_fielddef *f = &def->fields[i]; + if(!upb_field_ismm(f)) continue; + upb_mm_msgclear(from, f); + } +} + +void upb_mm_arr_set(struct upb_mm_ref *from, struct upb_mm_ref *to, + upb_arraylen_t i, upb_field_type_t type) +{ + (void)from; + (void)to; + (void)i; + (void)type; +} diff --git a/src/upb_mm.h b/src/upb_mm.h new file mode 100644 index 0000000..88cb043 --- /dev/null +++ b/src/upb_mm.h @@ -0,0 +1,168 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * A parsed protobuf is represented in memory as a tree. The three kinds of + * nodes in this tree are messages, arrays, and strings. This file defines + * a memory-management scheme for making sure that these nodes are colected + * at the right times. + * + * The basic strategy is reference-counting, but with a twist. Since any + * dynamic language that wishes to reference these nodes will need its own, + * language-specific structure, we provide two different kinds of references: + * + * - counted references. these are references that are tracked with only a + * reference count. They are used for two separate purposes: + * 1. for references within the tree, from one node to another. + * 2. for external references into the tree, where the referer does not need + * a separate message structure. + * - listed references. these are references that have their own separate + * data record. these separate records are kept in a linked list. + */ + +#ifndef UPB_MM_H_ +#define UPB_MM_H_ + +#include "upb.h" +#include "upb_string.h" +#include "upb_array.h" +#include "upb_msg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Structure definitions. *****************************************************/ + +typedef int16_t upb_mm_id; + +struct upb_msg; +struct upb_array; +struct upb_string; +struct upb_msg_fielddef; + +struct upb_mm_ref; +/* Info about a mm. */ +struct upb_mm { + /* fromref is set iff this call is from getfieldref or getelemref. */ + struct upb_mm_ref *(*newref_cb)(struct upb_mm_ref *fromref, + union upb_mmptr p, upb_mm_ptrtype type); +}; + +struct upb_mm_ref { + union upb_mmptr p; + /* This is slightly wasteful, because the mm-specific ref will probably also + * contain the information about what kind of ref this is, in a different + * form. */ + upb_mm_ptrtype type; + struct upb_mm *mm; /* TODO: There are ways to shrink this. */ + struct upb_mm_ref *next; /* Linked list for refs to the same value. */ +}; + +/* Functions for working with listed references. *****************************/ + +/* Create a new top-level message and create a single ref for it. */ +struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm); + +/* Given a pointer to an existing msg, array, or string, find a ref for this + * mm, creating one if necessary. 'created' indicates whether the returned + * reference was just created. */ +struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type, + struct upb_mm *mm, bool *created); + +/* f must be ismm == true. The msg field may or may not be set (will be + * created if it doesn't exist). If a ref already exists for the given field, + * returns it, otherwise calls the given callback to create one. 'created' + * indicates whether a new reference was created. */ +struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref, + struct upb_msg_fielddef *f, + bool *refcreated); +/* Array len must be < i. */ +struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i, + bool *refcreated); + +/* Remove this ref from the list for this msg. + * If that was the last reference, deletes the msg itself. */ +void upb_mm_release(struct upb_mm_ref *ref); + +void upb_mm_msgset(struct upb_mm_ref *msg, struct upb_mm_ref *to, + struct upb_msg_fielddef *f); +void upb_mm_msgclear(struct upb_mm_ref *from, struct upb_msg_fielddef *f); +void upb_mm_msgclear_all(struct upb_mm_ref *from); + +void upb_mm_arrset(struct upb_mm_ref *from, struct upb_mm_ref *to, uint32_t i); + +/* Defined iff upb_field_ismm(f). */ +INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f); +/* Defined iff upb_elem_ismm(f). */ +INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f); + +INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type); + +/* These methods are all a bit silly, since all branches of the case compile + * to the same thing (which the compiler will recognize), but we do it this way + * for full union correctness. */ +INLINE union upb_mmptr upb_mmptr_read(union upb_value_ptr p, upb_mm_ptrtype t) +{ + union upb_mmptr val; + switch(t) { + case UPB_MM_MSG_REF: val.msg = *p.msg; break; + case UPB_MM_STR_REF: val.str = *p.str; break; + case UPB_MM_ARR_REF: val.arr = *p.arr; break; + default: assert(false); val.msg = *p.msg; break; /* Shouldn't happen. */ + } + return val; +} + +INLINE void upb_mmptr_write(union upb_value_ptr p, union upb_mmptr val, + upb_mm_ptrtype t) +{ + switch(t) { + case UPB_MM_MSG_REF: *p.msg = val.msg; break; + case UPB_MM_STR_REF: *p.str = val.str; break; + case UPB_MM_ARR_REF: *p.arr = val.arr; break; + default: assert(false); val.msg = *p.msg; break; /* Shouldn't happen. */ + } +} + +void upb_array_destroy(struct upb_array *arr); +void upb_msg_destroy(struct upb_msg *msg); + +INLINE void upb_msg_unref(struct upb_msg *msg) { + if(upb_mmhead_unref(&msg->mmhead)) upb_msg_destroy(msg); +} + +INLINE void upb_array_unref(struct upb_array *arr) { + if(upb_mmhead_unref(&arr->mmhead)) upb_array_destroy(arr); +} + +INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type) +{ + switch(type) { + case UPB_MM_MSG_REF: upb_msg_unref(p.msg); break; + case UPB_MM_STR_REF: upb_string_unref(p.str); break; + case UPB_MM_ARR_REF: upb_array_unref(p.arr); + } +} + +static struct upb_mmhead *upb_mmhead_addr(union upb_mmptr p, upb_mm_ptrtype t) +{ + switch(t) { + case UPB_MM_MSG_REF: return &((*p.msg).mmhead); + case UPB_MM_STR_REF: return &((*p.str).mmhead); + case UPB_MM_ARR_REF: return &((*p.arr).mmhead); + default: assert(false); return &((*p.msg).mmhead); /* Shouldn't happen. */ + } +} + +INLINE void upb_mm_ref(union upb_mmptr p, upb_mm_ptrtype type) +{ + upb_mmhead_ref(upb_mmhead_addr(p, type)); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_MM_MSG_H_ */ diff --git a/src/upb_msg.c b/src/upb_msg.c index 45f889d..80602dd 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -6,8 +6,9 @@ #include #include -#include "descriptor.h" #include "upb_msg.h" +#include "descriptor.h" +#include "upb_mm.h" #include "upb_parse.h" #include "upb_serialize.h" #include "upb_text.h" @@ -47,7 +48,6 @@ bool upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, /* TODO: more complete validation. */ if(!d->set_flags.has.field) return false; - upb_atomic_refcount_init(&m->refcount, 0); upb_inttable_init(&m->fields_by_num, d->field->len, sizeof(struct upb_fieldsbynum_entry)); upb_strtable_init(&m->fields_by_name, d->field->len, @@ -123,113 +123,43 @@ void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f, str_e->f.ref = ref; } -/* Simple, one-shot parsing ***************************************************/ - -static void *upb_msg_new(struct upb_msgdef *md) -{ - size_t size = md->size + (sizeof(void*) * 2); - struct upb_msg *msg = malloc(size); - memset(msg, 0, size); - msg->def = md; - return msg; -} +/* Parsing. ******************************************************************/ -/* Allocation callbacks. */ -struct upb_array *getarray_cb( - void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f) -{ - (void)from_gptr; - (void)existingval; /* Don't care -- always zero. */ - (void)f; - return upb_array_new(); -} +struct upb_msg_parser_frame { + struct upb_msg *msg; +}; -static struct upb_string *getstring_cb( - void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f, - bool byref) -{ - (void)from_gptr; - (void)existingval; /* Don't care -- always zero. */ - (void)f; - (void)byref; - return upb_strnew(); -} +struct upb_msg_parser { + struct upb_stream_parser s; + bool merge; + bool byref; + struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top; +}; -static struct upb_msg *getmsg_cb( - void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f) -{ - (void)from_gptr; - (void)existingval; /* Don't care -- always zero. */ - return upb_msg_new(f->ref.msg); -} +void upb_msg_parser_reset(struct upb_msg_parser *p, + struct upb_msg *msg, bool byref); -struct upb_msg *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s) -{ - struct upb_msg_parser mp; - struct upb_msg *msg = upb_msg_new(md); - upb_msg_parser_reset(&mp, msg, false); - mp.getarray_cb = getarray_cb; - mp.getstring_cb = getstring_cb; - mp.getmsg_cb = getmsg_cb; - size_t read; - upb_status_t status = upb_msg_parser_parse(&mp, s->ptr, s->byte_len, &read); - if(status == UPB_STATUS_OK && read == s->byte_len) { - return msg; - } else { - upb_msg_free(msg); - return NULL; - } -} +/* Parses protocol buffer data out of data which has length of len. The data + * need not be a complete protocol buffer. The number of bytes parsed is + * returned in *read, and the next call to upb_msg_parse must supply data that + * is *read bytes past data in the logical stream. */ +upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p, + void *data, size_t len, size_t *read); -/* For simple, one-shot parsing we assume that a dynamic field exists (and - * needs to be freed) iff its set bit is set. */ -static void free_value(union upb_value_ptr p, struct upb_msg_fielddef *f) -{ - if(upb_isstring(f)) { - free((*p.str)->ptr); - free(*p.str); - } else if(upb_issubmsg(f)) { - upb_msg_free(*p.msg); - } -} -void upb_msg_free(struct upb_msg *msg) -{ - if(!msg) return; /* A very free-like thing to do. */ - struct upb_msgdef *m = msg->def; - for(unsigned int i = 0; i < m->num_fields; i++) { - struct upb_msg_fielddef *f = &m->fields[i]; - if(!upb_msg_isset(msg, f)) continue; - union upb_value_ptr p = upb_msg_getptr(msg, f); - if(upb_isarray(f)) { - assert(*p.arr); - for(upb_arraylen_t j = 0; j < (*p.arr)->len; j++) - free_value(upb_array_getelementptr(*p.arr, j, f->type), f); - upb_array_free(*p.arr); - } else { - free_value(p, f); - } - } - free(msg); -} - -/* Parsing. ******************************************************************/ /* Helper function that returns a pointer to where the next value for field "f" * should be stored, taking into account whether f is an array that may need to * be allocated or resized. */ static union upb_value_ptr get_value_ptr(struct upb_msg *msg, - struct upb_msg_fielddef *f, - void **gptr, - upb_msg_getandref_array_cb_t getarray_cb) + struct upb_msg_fielddef *f) { union upb_value_ptr p = upb_msg_getptr(msg, f); if(upb_isarray(f)) { bool isset = upb_msg_isset(msg, f); size_t len = isset ? (*p.arr)->len : 0; - if(!isset) *p.arr = getarray_cb(*gptr, *p.arr, f); - upb_array_resize(*p.arr, len+1, f->type); - *gptr = (*p.arr)->gptr; + if(!isset) *p.arr = upb_array_new(f); + upb_array_resize(*p.arr, len+1); p = upb_array_getelementptr(*p.arr, len, f->type); } return p; @@ -255,8 +185,7 @@ static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end, struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = user_field_desc; struct upb_msg *msg = mp->top->msg; - void *gptr = upb_msg_gptr(msg); - union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb); + union upb_value_ptr p = get_value_ptr(msg, f); upb_msg_set(msg, f); UPB_CHECK(upb_parse_value(buf, end, f->type, p, outbuf)); return UPB_STATUS_OK; @@ -269,21 +198,20 @@ static void str_cb(void *udata, uint8_t *str, struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = udesc; struct upb_msg *msg = mp->top->msg; - void *gptr = upb_msg_gptr(msg); - union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb); + union upb_value_ptr p = get_value_ptr(msg, f); upb_msg_set(msg, f); if(avail_len != total_len) abort(); /* TODO: support streaming. */ - bool byref = avail_len == total_len && mp->byref; - *p.str = mp->getstring_cb(gptr, *p.str, f, byref); - if(byref) { - upb_strdrop(*p.str); - (*p.str)->ptr = (char*)str; - (*p.str)->byte_len = avail_len; - } else { - upb_stralloc(*p.str, total_len); + //bool byref = avail_len == total_len && mp->byref; + *p.str = upb_string_new(); + //if(byref) { + // upb_strdrop(*p.str); + // (*p.str)->ptr = (char*)str; + // (*p.str)->byte_len = avail_len; + //} else { + upb_string_resize(*p.str, total_len); memcpy((*p.str)->ptr, str, avail_len); (*p.str)->byte_len = avail_len; - } + //} } static void submsg_start_cb(void *udata, void *user_field_desc) @@ -291,22 +219,39 @@ static void submsg_start_cb(void *udata, void *user_field_desc) struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = user_field_desc; struct upb_msg *oldmsg = mp->top->msg; - void *gptr = upb_msg_gptr(oldmsg); - union upb_value_ptr p = get_value_ptr(oldmsg, f, &gptr, mp->getarray_cb); + union upb_value_ptr p = get_value_ptr(oldmsg, f); + struct upb_msg **submsg = p.msg; + //if(*submsg && upb_mmhead_only(&((*submsg)->mmhead))) { + // /* We can reuse the existing submsg. */ + //} else { + *submsg = upb_msg_new(f->ref.msg); + //} + upb_msg_clear(*submsg); upb_msg_set(oldmsg, f); - *p.msg = mp->getmsg_cb(gptr, *p.msg, f); mp->top++; - mp->top->msg = *p.msg; + mp->top->msg = *submsg; } static void submsg_end_cb(void *udata) { struct upb_msg_parser *mp = udata; + struct upb_msg *msg = mp->top->msg; + /* TODO: free any remaining dynamic storage that was not reused. */ + (void)msg; mp->top--; } /* Externally-visible functions for the msg parser. */ +upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len) +{ + struct upb_msg_parser mp; + upb_msg_parser_reset(&mp, msg, false); + size_t read; + upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read); + return ret; +} + void upb_msg_parser_reset(struct upb_msg_parser *s, struct upb_msg *msg, bool byref) { upb_stream_parser_reset(&s->s, s); @@ -592,51 +537,3 @@ bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive) } return true; } - - -static void printval(struct upb_text_printer *printer, union upb_value_ptr p, - struct upb_msg_fielddef *f, - google_protobuf_FieldDescriptorProto *fd, - FILE *stream); - -static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg, - FILE *stream) -{ - struct upb_msgdef *m = msg->def; - for(uint32_t i = 0; i < m->num_fields; i++) { - struct upb_msg_fielddef *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m); - if(!upb_msg_isset(msg, f)) continue; - union upb_value_ptr p = upb_msg_getptr(msg, f); - if(upb_isarray(f)) { - struct upb_array *arr = *p.arr; - for(uint32_t j = 0; j < arr->len; j++) { - union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type); - printval(printer, elem_p, f, fd, stream); - } - } else { - printval(printer, p, f, fd, stream); - } - } -} - -static void printval(struct upb_text_printer *printer, union upb_value_ptr p, - struct upb_msg_fielddef *f, - google_protobuf_FieldDescriptorProto *fd, - FILE *stream) -{ - if(upb_issubmsg(f)) { - upb_text_push(printer, fd->name, stream); - printmsg(printer, *p.msg, stream); - upb_text_pop(printer, stream); - } else { - upb_text_printfield(printer, fd->name, f->type, upb_deref(p, f->type), stream); - } -} - -void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream) -{ - struct upb_text_printer printer; - upb_text_printer_init(&printer, single_line); - printmsg(&printer, msg, stream); -} diff --git a/src/upb_msg.h b/src/upb_msg.h index 9dc1827..abec479 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -52,10 +52,10 @@ #include #include +#include +#include "descriptor.h" #include "upb.h" -#include "upb_atomic.h" -#include "upb_context.h" #include "upb_parse.h" #include "upb_table.h" @@ -66,10 +66,11 @@ extern "C" { /* Message definition. ********************************************************/ struct upb_msg_fielddef; +struct upb_context; /* Structure that describes a single .proto message type. */ struct upb_msgdef { - upb_atomic_refcount_t refcount; struct upb_context *context; + struct upb_msg *default_msg; /* Message with all default values set. */ struct google_protobuf_DescriptorProto *descriptor; struct upb_string fqname; /* Fully qualified. */ size_t size; @@ -82,7 +83,6 @@ struct upb_msgdef { struct google_protobuf_FieldDescriptorProto **field_descriptors; }; - /* Structure that describes a single field in a message. This structure is very * consciously designed to fit into 12/16 bytes (32/64 bit, respectively), * because copies of this struct are in the hash table that is read in the @@ -96,14 +96,6 @@ struct upb_msg_fielddef { upb_label_t label; }; -INLINE void upb_msgdef_ref(struct upb_msgdef *m) { - if(upb_atomic_ref(&m->refcount)) upb_context_ref(m->context); -} - -INLINE void upb_msgdef_unref(struct upb_msgdef *m) { - if(upb_atomic_unref(&m->refcount)) upb_context_unref(m->context); -} - INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) { return upb_issubmsgtype(f->type); } @@ -114,6 +106,29 @@ INLINE bool upb_isarray(struct upb_msg_fielddef *f) { return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED; } +INLINE bool upb_field_ismm(struct upb_msg_fielddef *f) { + return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); +} + +INLINE bool upb_elem_ismm(struct upb_msg_fielddef *f) { + return upb_isstring(f) || upb_issubmsg(f); +} + +/* Defined iff upb_field_ismm(f). */ +INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f) { + if(upb_isarray(f)) return UPB_MM_ARR_REF; + else if(upb_isstring(f)) return UPB_MM_STR_REF; + else if(upb_issubmsg(f)) return UPB_MM_MSG_REF; + else return -1; +} + +/* Defined iff upb_elem_ismm(f). */ +INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f) { + if(upb_isstring(f)) return UPB_MM_STR_REF; + else if(upb_issubmsg(f)) return UPB_MM_MSG_REF; + else return -1; +} + /* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( struct upb_msg_fielddef *f, struct upb_msgdef *m) { @@ -122,14 +137,15 @@ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( /* Message structure. *********************************************************/ -struct upb_msg { - struct upb_msgdef *def; - void *gptr; /* Generic pointer for use by subclasses. */ - uint8_t data[1]; -}; - -INLINE void *upb_msg_gptr(struct upb_msg *msg) { - return msg->gptr; +/* Constructs a new msg corresponding to the given msgdef, and having one + * counted reference. */ +INLINE struct upb_msg *upb_msg_new(struct upb_msgdef *md) { + size_t size = md->size + offsetof(struct upb_msg, data); + struct upb_msg *msg = malloc(size); + memset(msg, 0, size); + upb_mmhead_init(&msg->mmhead); + msg->def = md; + return msg; } /* Field access. **************************************************************/ @@ -146,12 +162,6 @@ INLINE union upb_value_ptr upb_msg_getptr(struct upb_msg *msg, return p; } -/* Returns a a specific field in a message. */ -INLINE union upb_value upb_msg_get(struct upb_msg *msg, - struct upb_msg_fielddef *f) { - return upb_deref(upb_msg_getptr(msg, f), f->type); -} - /* "Set" flag reading and writing. *******************************************/ /* All upb code and code using upb should guarantee that the set flags are @@ -244,85 +254,10 @@ INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m, } -/* Simple, one-shot parsing ***************************************************/ - -/* A simple interface for parsing into a newly-allocated message. This - * interface should only be used when the message will be read-only with - * respect to memory management (eg. won't add or remove internal references to - * dynamic memory). For more flexible (but also more complicated) interfaces, - * see below and in upb_mm_msg.h. */ - -/* Parses the protobuf in s (which is expected to be complete) and allocates - * new message data to hold it. If byref is set, strings in the returned - * upb_msg will reference s instead of copying from it, but this requires that - * s will live for as long as the returned message does. */ -struct upb_msg *upb_msg_parsenew(struct upb_msgdef *m, struct upb_string *s); - -/* This function should be used to free messages that were parsed with - * upb_msg_parsenew. It will free the message appropriately (including all - * submessages). */ -void upb_msg_free(struct upb_msg *msg); - - -/* Parsing with (re)allocation callbacks. *************************************/ - -/* This interface parses protocol buffers into upb_msgs, but allows the client - * to supply allocation callbacks whenever the parser needs to obtain a string, - * array, or submsg (a "dynamic field"). If the parser sees that a dynamic - * field is already present (its "set bit" is set) it will use that, resizing - * it if necessary in the case of an array. Otherwise it will call the - * allocation callback to obtain one. - * - * This may seem trivial (since nearly all clients will use malloc and free for - * memory management), but the allocation callback can be used for more than - * just allocation. If we are parsing data into an existing upb_msg, the - * allocation callback can examine any existing memory that is allocated for - * the dynamic field and determine whether it can reuse it. It can also - * perform memory management like refing the new field. - * - * This parser is layered on top of the event-based parser in upb_parse.h. The - * parser is upb_mm_msg.h is layered on top of this parser. - * - * This parser is fully streaming-capable. */ - -/* Should return an initialized array. */ -typedef struct upb_array *(*upb_msg_getandref_array_cb_t)( - void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f); - -/* Callback to allocate a string. If byref is true, the client should assume - * that the string will be referencing the input data. */ -typedef struct upb_string *(*upb_msg_getandref_string_cb_t)( - void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f, - bool byref); - -/* Should return a cleared message. */ -typedef struct upb_msg *(*upb_msg_getandref_msg_cb_t)( - void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f); - -struct upb_msg_parser_frame { - struct upb_msg *msg; -}; - -struct upb_msg_parser { - struct upb_stream_parser s; - bool merge; - bool byref; - struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top; - upb_msg_getandref_array_cb_t getarray_cb; - upb_msg_getandref_string_cb_t getstring_cb; - upb_msg_getandref_msg_cb_t getmsg_cb; -}; - -void upb_msg_parser_reset(struct upb_msg_parser *p, - struct upb_msg *msg, bool byref); - -/* Parses protocol buffer data out of data which has length of len. The data - * need not be a complete protocol buffer. The number of bytes parsed is - * returned in *read, and the next call to upb_msg_parse must supply data that - * is *read bytes past data in the logical stream. */ -upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p, - void *data, size_t len, size_t *read); +/* Parsing ********************************************************************/ +/* TODO: a stream parser. */ +upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len); /* Serialization *************************************************************/ @@ -336,7 +271,7 @@ upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p, struct upb_msgsizes; /* Initialize/free a upb_msgsizes for the given message. */ -void upb_msgsizes_init(struct upb_msgsizes *sizes); +struct upb_msgsizes *upb_msgsizes_new(void); void upb_msgsizes_free(struct upb_msgsizes *sizes); /* Given a previously initialized sizes, recurse over the message and store its @@ -366,6 +301,10 @@ void upb_msg_serialize_init(struct upb_msg_serialize_state *s, upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s, void *buf, size_t len, size_t *written); +upb_status_t upb_msg_serialize_all(struct upb_msg *msg, + struct upb_msgsizes *sizes, + void *buf); + /* Text dump *****************************************************************/ bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive); @@ -397,7 +336,8 @@ void upb_msgdef_free(struct upb_msgdef *m); /* Sort the given field descriptors in-place, according to what we think is an * optimal ordering of fields. This can change from upb release to upb * release. */ -void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num); +void upb_msgdef_sortfds(struct google_protobuf_FieldDescriptorProto **fds, + size_t num); /* Clients use this function on a previously initialized upb_msgdef to resolve * the "ref" field in the upb_msg_fielddef. Since messages can refer to each diff --git a/src/upb_parse.c b/src/upb_parse.c index b7f3832..7c1ad66 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -11,7 +11,7 @@ /* May want to move this to upb.c if enough other things warrant it. */ #define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, UPB_STRLIT(#ctype)}, +#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype}, struct upb_type_info upb_type_info[] = { TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE, UPB_WIRE_TYPE_64BIT, double) TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT, UPB_WIRE_TYPE_32BIT, float) diff --git a/src/upb_parse.h b/src/upb_parse.h index de4cb2c..1454dd5 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -21,16 +21,6 @@ extern "C" { #endif -INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP || - type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE; -} - -INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || - type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES; -} - /* High-level parsing interface. **********************************************/ /* The general scheme is that the client registers callbacks that will be diff --git a/src/upb_string.c b/src/upb_string.c index 7754936..54df4f1 100644 --- a/src/upb_string.c +++ b/src/upb_string.c @@ -7,19 +7,20 @@ #include #include "upb_string.h" -bool upb_strreadfile(const char *filename, struct upb_string *data) { +struct upb_string *upb_strreadfile(const char *filename) { FILE *f = fopen(filename, "rb"); if(!f) return false; - if(fseek(f, 0, SEEK_END) != 0) return false; + if(fseek(f, 0, SEEK_END) != 0) goto error; long size = ftell(f); - if(size < 0) return false; - if(fseek(f, 0, SEEK_SET) != 0) return false; - data->byte_len = size; - upb_stralloc(data, data->byte_len); - if(fread(data->ptr, size, 1, f) != 1) { - free(data->ptr); - return false; - } + if(size < 0) goto error; + if(fseek(f, 0, SEEK_SET) != 0) goto error; + struct upb_string *s = upb_string_new(); + upb_string_resize(s, size); + if(fread(s->ptr, size, 1, f) != 1) goto error; fclose(f); - return true; + return s; + +error: + fclose(f); + return NULL; } diff --git a/src/upb_string.h b/src/upb_string.h index 9740a0b..aa62575 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -32,48 +32,35 @@ extern "C" { #include #include -/* inline if possible, emit standalone code if required. */ -#ifndef INLINE -#define INLINE static inline -#endif - -#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) -#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) +#include "upb_struct.h" -struct upb_string { - /* We expect the data to be 8-bit clean (uint8_t), but char* is such an - * ingrained convention that we follow it. */ - char *ptr; - uint32_t byte_len; - uint32_t byte_size; /* How many bytes of ptr we own. */ -}; +/* Allocation/Deallocation/Resizing. ******************************************/ -INLINE void upb_strinit(struct upb_string *str) +INLINE struct upb_string *upb_string_new(void) { + struct upb_string *str = (struct upb_string*)malloc(sizeof(*str)); + upb_mmhead_init(&str->mmhead); str->ptr = NULL; str->byte_len = 0; str->byte_size = 0; + return str; } -INLINE void upb_struninit(struct upb_string *str) -{ - if(str->byte_size) free(str->ptr); -} - -INLINE struct upb_string *upb_strnew(void) +/* For internal use only. */ +INLINE void upb_string_destroy(struct upb_string *str) { - struct upb_string *str = (struct upb_string*)malloc(sizeof(*str)); - upb_strinit(str); - return str; + if(str->byte_size != 0) free(str->ptr); + free(str); } -INLINE void upb_strfree(struct upb_string *str) +INLINE void upb_string_unref(struct upb_string *str) { - upb_struninit(str); - free(str); + if(upb_mmhead_unref(&str->mmhead)) upb_string_destroy(str); } -INLINE void upb_stralloc(struct upb_string *str, uint32_t size) +/* Resizes the string to size, reallocating if necessary. Does not preserve + * existing data. */ +INLINE void upb_string_resize(struct upb_string *str, uint32_t size) { if(str->byte_size < size) { /* Need to resize. */ @@ -81,12 +68,10 @@ INLINE void upb_stralloc(struct upb_string *str, uint32_t size) void *oldptr = str->byte_size == 0 ? NULL : str->ptr; str->ptr = (char*)realloc(oldptr, str->byte_size); } + str->byte_len = size; } -INLINE void upb_strdrop(struct upb_string *str) -{ - upb_struninit(str); -} +/* Library functions. *********************************************************/ INLINE bool upb_streql(struct upb_string *s1, struct upb_string *s2) { return s1->byte_len == s2->byte_len && @@ -101,26 +86,26 @@ INLINE int upb_strcmp(struct upb_string *s1, struct upb_string *s2) { INLINE void upb_strcpy(struct upb_string *dest, struct upb_string *src) { dest->byte_len = src->byte_len; - upb_stralloc(dest, dest->byte_len); + upb_string_resize(dest, dest->byte_len); memcpy(dest->ptr, src->ptr, src->byte_len); } INLINE struct upb_string *upb_strdup(struct upb_string *s) { - struct upb_string *copy = upb_strnew(); + struct upb_string *copy = upb_string_new(); upb_strcpy(copy, s); return copy; } INLINE struct upb_string *upb_strdupc(char *s) { - struct upb_string *copy = upb_strnew(); + struct upb_string *copy = upb_string_new(); copy->byte_len = strlen(s); - upb_stralloc(copy, copy->byte_len); + upb_string_resize(copy, copy->byte_len); memcpy(copy->ptr, s, copy->byte_len); return copy; } /* Reads an entire file into a newly-allocated string. */ -bool upb_strreadfile(const char *filename, struct upb_string *data); +struct upb_string *upb_strreadfile(const char *filename); /* Allows defining upb_strings as literals, ie: * struct upb_string str = UPB_STRLIT("Hello, World!\n"); diff --git a/src/upb_struct.h b/src/upb_struct.h new file mode 100644 index 0000000..9c1bb2e --- /dev/null +++ b/src/upb_struct.h @@ -0,0 +1,119 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file defines the in-memory format for messages, arrays, and strings + * (which are the three dynamically-allocated structures that make up all + * protobufs). */ + +#ifndef UPB_STRUCT_H +#define UPB_STRUCT_H + +#include "upb.h" + +/* mmhead -- this is a "base class" for strings, arrays, and messages ********/ + +struct upb_mm_ref; +struct upb_mmhead { + struct upb_mm_ref *refs; /* Head of linked list. */ + uint32_t refcount; +}; + +INLINE void upb_mmhead_init(struct upb_mmhead *head) { + head->refs = NULL; + head->refcount = 1; +} + +INLINE bool upb_mmhead_norefs(struct upb_mmhead *head) { + return head->refcount == 0 && head->refs == NULL; +} + +INLINE bool upb_mmhead_unref(struct upb_mmhead *head) { + head->refcount--; + return upb_mmhead_norefs(head); +} + +INLINE void upb_mmhead_ref(struct upb_mmhead *head) { + head->refcount++; +} + +/* Structures for msg, string, and array. *************************************/ + +/* These are all self describing. */ + +struct upb_msgdef; +struct upb_msg_fielddef; + +struct upb_msg { + struct upb_mmhead mmhead; + struct upb_msgdef *def; + uint8_t data[1]; +}; + +typedef uint32_t upb_arraylen_t; /* can be at most 2**32 elements long. */ +struct upb_array { + struct upb_mmhead mmhead; + struct upb_msg_fielddef *fielddef; /* Defines the type of the array. */ + union upb_value_ptr elements; + upb_arraylen_t len; /* Number of elements in "elements". */ + upb_arraylen_t size; /* Memory we own (0 if by reference). */ +}; + +struct upb_string { + struct upb_mmhead mmhead; + /* We expect the data to be 8-bit clean (uint8_t), but char* is such an + * ingrained convention that we follow it. */ + char *ptr; + uint32_t byte_len; + uint32_t byte_size; /* How many bytes of ptr we own, 0 if we reference. */ +}; + +/* Type-specific overlays on upb_array. ***************************************/ + +#define UPB_DEFINE_ARRAY_TYPE(name, type) \ + struct name ## _array { \ + struct upb_mmhead mmhead; \ + struct upb_msg_fielddef *fielddef; \ + type elements; \ + upb_arraylen_t len; \ + upb_arraylen_t size; \ + }; + +UPB_DEFINE_ARRAY_TYPE(upb_double, double) +UPB_DEFINE_ARRAY_TYPE(upb_float, float) +UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t) +UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t) +UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t) +UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t) +UPB_DEFINE_ARRAY_TYPE(upb_bool, bool) +UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*) +UPB_DEFINE_ARRAY_TYPE(upb_msg, void*) + +/* Defines an array of a specific message type (an overlay of upb_array). */ +#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array +#define UPB_DEFINE_MSG_ARRAY(msg_type) \ + UPB_MSG_ARRAY(msg_type) { \ + struct upb_mmhead mmhead; \ + struct upb_msg_fielddef *fielddef; \ + msg_type **elements; \ + upb_arraylen_t len; \ + upb_arraylen_t size; \ + }; + +/* mmptr -- a pointer which polymorphically points to one of the above. *******/ + +union upb_mmptr { + struct upb_msg *msg; + struct upb_array *arr; + struct upb_string *str; +}; + +enum { + UPB_MM_MSG_REF, + UPB_MM_STR_REF, + UPB_MM_ARR_REF +}; +typedef uint8_t upb_mm_ptrtype; + +#endif diff --git a/src/upb_text.c b/src/upb_text.c index c9aad7e..6d43152 100644 --- a/src/upb_text.c +++ b/src/upb_text.c @@ -5,8 +5,11 @@ */ #include -#include "upb_text.h" #include "descriptor.h" +#include "upb_text.h" +#include "upb_string.h" +#include "upb_msg.h" +#include "upb_array.h" void upb_text_printval(upb_field_type_t type, union upb_value val, FILE *file) { @@ -78,3 +81,52 @@ void upb_text_pop(struct upb_text_printer *p, print_indent(p, stream); fprintf(stream, "}\n"); } + +static void printval(struct upb_text_printer *printer, union upb_value_ptr p, + struct upb_msg_fielddef *f, + google_protobuf_FieldDescriptorProto *fd, + FILE *stream); + +static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg, + FILE *stream) +{ + struct upb_msgdef *m = msg->def; + for(uint32_t i = 0; i < m->num_fields; i++) { + struct upb_msg_fielddef *f = &m->fields[i]; + google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m); + if(!upb_msg_isset(msg, f)) continue; + union upb_value_ptr p = upb_msg_getptr(msg, f); + if(upb_isarray(f)) { + struct upb_array *arr = *p.arr; + for(uint32_t j = 0; j < arr->len; j++) { + union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type); + printval(printer, elem_p, f, fd, stream); + } + } else { + printval(printer, p, f, fd, stream); + } + } +} + +static void printval(struct upb_text_printer *printer, union upb_value_ptr p, + struct upb_msg_fielddef *f, + google_protobuf_FieldDescriptorProto *fd, + FILE *stream) +{ + if(upb_issubmsg(f)) { + upb_text_push(printer, fd->name, stream); + printmsg(printer, *p.msg, stream); + upb_text_pop(printer, stream); + } else { + upb_text_printfield(printer, fd->name, f->type, upb_value_read(p, f->type), stream); + } +} + + +void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream) +{ + struct upb_text_printer printer; + upb_text_printer_init(&printer, single_line); + printmsg(&printer, msg, stream); +} + diff --git a/tools/upbc.c b/tools/upbc.c index a324971..885a259 100644 --- a/tools/upbc.c +++ b/tools/upbc.c @@ -15,6 +15,8 @@ #include "upb_enum.h" #include "upb_msg.h" #include "upb_text.h" +#include "upb_array.h" +#include "upb_mm.h" /* These are in-place string transformations that do not change the length of * the string (and thus never need to re-allocate). */ @@ -54,32 +56,24 @@ void *strtable_to_array(struct upb_strtable *t, int *size) return array; } -/* The .h file defines structs for the types defined in the .proto file. It - * also defines constants for the enum values. - * - * Assumes that d has been validated. */ -static void write_h(struct upb_symtab_entry *entries[], int num_entries, - char *outfile_name, char *descriptor_cident, FILE *stream) +/* The _const.h file defines the constants (enums) defined in the .proto + * file. */ +static void write_const_h(struct upb_symtab_entry *entries[], int num_entries, + char *outfile_name, FILE *stream) { /* Header file prologue. */ struct upb_string *include_guard_name = upb_strdupc(outfile_name); to_preproc(include_guard_name); + /* A bit cheesy, but will do the job. */ + include_guard_name->ptr[include_guard_name->byte_len-1] = 'C'; fputs("/* This file was generated by upbc (the upb compiler). " "Do not edit. */\n\n", stream), fprintf(stream, "#ifndef " UPB_STRFMT "\n", UPB_STRARG(include_guard_name)); fprintf(stream, "#define " UPB_STRFMT "\n\n", UPB_STRARG(include_guard_name)); - fputs("#include \n\n", stream); - fputs("#include \n\n", stream); fputs("#ifdef __cplusplus\n", stream); fputs("extern \"C\" {\n", stream); fputs("#endif\n\n", stream); - if(descriptor_cident) { - fputs("struct google_protobuf_FileDescriptorSet;\n", stream); - fprintf(stream, "extern struct google_protobuf_FileDescriptorSet *%s;\n\n", - descriptor_cident); - } - /* Enums. */ fprintf(stream, "/* Enums. */\n\n"); for(int i = 0; i < num_entries; i++) { /* Foreach enum */ @@ -109,12 +103,45 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries, UPB_STRARG(enum_val_prefix), UPB_STRARG(value_name), v->number); if(j != ed->value->len-1) fputc(',', stream); fputc('\n', stream); - upb_strfree(value_name); + upb_string_unref(value_name); } } fprintf(stream, "} " UPB_STRFMT ";\n\n", UPB_STRARG(enum_name)); - upb_strfree(enum_name); - upb_strfree(enum_val_prefix); + upb_string_unref(enum_name); + upb_string_unref(enum_val_prefix); + } + + /* Epilogue. */ + fputs("#ifdef __cplusplus\n", stream); + fputs("} /* extern \"C\" */\n", stream); + fputs("#endif\n\n", stream); + fprintf(stream, "#endif /* " UPB_STRFMT " */\n", UPB_STRARG(include_guard_name)); + upb_string_unref(include_guard_name); +} + +/* The .h file defines structs for the types defined in the .proto file. It + * also defines constants for the enum values. + * + * Assumes that d has been validated. */ +static void write_h(struct upb_symtab_entry *entries[], int num_entries, + char *outfile_name, char *descriptor_cident, FILE *stream) +{ + /* Header file prologue. */ + struct upb_string *include_guard_name = upb_strdupc(outfile_name); + to_preproc(include_guard_name); + fputs("/* This file was generated by upbc (the upb compiler). " + "Do not edit. */\n\n", stream), + fprintf(stream, "#ifndef " UPB_STRFMT "\n", UPB_STRARG(include_guard_name)); + fprintf(stream, "#define " UPB_STRFMT "\n\n", UPB_STRARG(include_guard_name)); + fputs("#include \n\n", stream); + fputs("#ifdef __cplusplus\n", stream); + fputs("extern \"C\" {\n", stream); + fputs("#endif\n\n", stream); + + if(descriptor_cident) { + fputs("struct google_protobuf_FileDescriptorSet;\n", stream); + fprintf(stream, "extern struct google_protobuf_FileDescriptorSet *%s;\n\n", + descriptor_cident); } /* Forward declarations. */ @@ -131,7 +158,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries, fprintf(stream, "struct " UPB_STRFMT ";\n", UPB_STRARG(msg_name)); fprintf(stream, "typedef struct " UPB_STRFMT "\n " UPB_STRFMT ";\n\n", UPB_STRARG(msg_name), UPB_STRARG(msg_name)); - upb_strfree(msg_name); + upb_string_unref(msg_name); } /* Message Declarations. */ @@ -144,8 +171,8 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries, struct upb_string *msg_name = upb_strdup(&entry->e.key); to_cident(msg_name); fprintf(stream, "struct " UPB_STRFMT " {\n", UPB_STRARG(msg_name)); + fputs(" struct upb_mmhead mmhead;\n", stream); fputs(" struct upb_msgdef *def;\n", stream); - fputs(" void *gptr;\n", stream); fputs(" union {\n", stream); fprintf(stream, " uint8_t bytes[%" PRIu32 "];\n", m->set_flags_bytes); fputs(" struct {\n", stream); @@ -179,7 +206,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries, fprintf(stream, " " UPB_STRFMT "* " UPB_STRFMT ";\n", UPB_STRARG(type_name), UPB_STRARG(fd->name)); } - upb_strfree(type_name); + upb_string_unref(type_name); } else if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) { static char* c_types[] = { "", "struct upb_double_array*", "struct upb_float_array*", @@ -208,7 +235,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries, fputs("};\n", stream); fprintf(stream, "UPB_DEFINE_MSG_ARRAY(" UPB_STRFMT ")\n\n", UPB_STRARG(msg_name)); - upb_strfree(msg_name); + upb_string_unref(msg_name); } /* Epilogue. */ @@ -216,7 +243,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries, fputs("} /* extern \"C\" */\n", stream); fputs("#endif\n\n", stream); fprintf(stream, "#endif /* " UPB_STRFMT " */\n", UPB_STRARG(include_guard_name)); - upb_strfree(include_guard_name); + upb_string_unref(include_guard_name); } /* Format of table entries that we use when analyzing data structures for @@ -300,24 +327,28 @@ static void add_strings_from_msg(void *data, struct upb_msgdef *m, struct typetable_entry *get_or_insert_typeentry(struct upb_strtable *t, struct upb_msg_fielddef *f) { - struct upb_string type_name = upb_issubmsg(f) ? f->ref.msg->fqname : - upb_type_info[f->type].ctype; - struct typetable_entry *type_e = upb_strtable_lookup(t, &type_name); + struct upb_string *type_name = upb_issubmsg(f) ? upb_strdup(&f->ref.msg->fqname) : + upb_strdupc(upb_type_info[f->type].ctype); + struct typetable_entry *type_e = upb_strtable_lookup(t, type_name); if(type_e == NULL) { struct typetable_entry new_type_e = { - .e = {.key = type_name}, .field = f, .cident = upb_strdup(&type_name), + .e = {.key = *type_name}, .field = f, .cident = upb_strdup(type_name), .values = NULL, .values_size = 0, .values_len = 0, .arrays = NULL, .arrays_size = 0, .arrays_len = 0 }; to_cident(new_type_e.cident); + assert(upb_strtable_lookup(t, type_name) == NULL); + assert(upb_strtable_lookup(t, &new_type_e.e.key) == NULL); upb_strtable_insert(t, &new_type_e.e); - type_e = upb_strtable_lookup(t, &type_name); + type_e = upb_strtable_lookup(t, type_name); assert(type_e); + } else { + upb_string_unref(type_name); } return type_e; } -static void add_value(union upb_value value, struct upb_msg_fielddef *f, +static void add_value(union upb_value_ptr p, struct upb_msg_fielddef *f, struct upb_strtable *t) { struct typetable_entry *type_e = get_or_insert_typeentry(t, f); @@ -325,7 +356,7 @@ static void add_value(union upb_value value, struct upb_msg_fielddef *f, type_e->values_size = UPB_MAX(type_e->values_size * 2, 4); type_e->values = realloc(type_e->values, sizeof(*type_e->values) * type_e->values_size); } - type_e->values[type_e->values_len++] = value; + type_e->values[type_e->values_len++] = upb_value_read(p, f->type); } static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t) @@ -353,7 +384,7 @@ static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t /* Add the individual values in the array. */ for(uint32_t j = 0; j < arr->len; j++) - add_value(upb_array_getelement(arr, j, f->type), f, t); + add_value(upb_array_getelementptr(arr, j, f->type), f, t); /* Add submsgs. We must do this separately so that the msgs in this * array are contiguous (and don't have submsgs of the same type @@ -362,7 +393,7 @@ static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t add_submsgs(*upb_array_getelementptr(arr, j, f->type).msg, f->ref.msg, t); } else { if(!upb_issubmsg(f)) continue; - add_value(upb_deref(p, f->type), f, t); + add_value(p, f, t); add_submsgs(*p.msg, f->ref.msg, t); } } @@ -445,7 +476,8 @@ static void write_message_c(void *data, struct upb_msgdef *m, .type = GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE, .ref = {.msg = m} }; - add_value(val, &fake_field, &types); + union upb_value_ptr p = UPB_VALUE_ADDROF(val); + add_value(p, &fake_field, &types); add_submsgs(data, m, &types); /* Emit foward declarations for all msgs of all types, and define arrays. */ @@ -503,7 +535,7 @@ static void write_message_c(void *data, struct upb_msgdef *m, for(unsigned int j = 0; j < m->num_fields; j++) { struct upb_msg_fielddef *f = &m->fields[j]; google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j]; - union upb_value val = upb_msg_get(msgdata, f); + union upb_value val = upb_value_read(upb_msg_getptr(msgdata, f), f->type); fprintf(stream, " ." UPB_STRFMT " = ", UPB_STRARG(fd->name)); if(!upb_msg_isset(msgdata, f)) { fputs("0, /* Not set. */", stream); @@ -571,7 +603,7 @@ static void write_message_c(void *data, struct upb_msgdef *m, /* Free tables. */ for(e = upb_strtable_begin(&types); e; e = upb_strtable_next(&types, &e->e)) { - upb_strfree(e->cident); + upb_string_unref(e->cident); free(e->values); free(e->arrays); } @@ -641,16 +673,17 @@ int main(int argc, char *argv[]) if(!outfile_base) outfile_base = input_file; /* Read input file. */ - struct upb_string *descriptor = upb_strnew(); - if(!upb_strreadfile(input_file, descriptor)) + struct upb_string *descriptor = upb_strreadfile(input_file); + if(!descriptor) error("Couldn't read input file."); /* Parse input file. */ struct upb_context *c = upb_context_new(); - struct upb_msg *fds_msg = upb_msg_parsenew(c->fds_msg, descriptor); - google_protobuf_FileDescriptorSet *fds = (void*)fds_msg; - if(!fds) + struct upb_msg *fds_msg = upb_msg_new(c->fds_msg); + if(upb_msg_parsestr(fds_msg, descriptor->ptr, descriptor->byte_len) != UPB_STATUS_OK) error("Failed to parse input file descriptor."); + //upb_msg_print(fds_msg, false, stderr); + google_protobuf_FileDescriptorSet *fds = (void*)fds_msg; if(!upb_context_addfds(c, fds)) error("Failed to resolve symbols in descriptor.\n"); @@ -666,17 +699,21 @@ int main(int argc, char *argv[]) /* Emit output files. */ const int maxsize = 256; - char h_filename[maxsize], c_filename[maxsize]; + char h_filename[maxsize], h_const_filename[maxsize], c_filename[maxsize]; if(snprintf(h_filename, maxsize, "%s.h", outfile_base) >= maxsize || - snprintf(c_filename, maxsize, "%s.c", outfile_base) >= maxsize) + snprintf(c_filename, maxsize, "%s.c", outfile_base) >= maxsize || + snprintf(h_const_filename, maxsize, "%s_const.h", outfile_base) >= maxsize) error("File base too long.\n"); FILE *h_file = fopen(h_filename, "w"); if(!h_file) error("Failed to open .h output file"); + FILE *h_const_file = fopen(h_const_filename, "w"); + if(!h_const_file) error("Failed to open _const.h output file"); int symcount; struct upb_symtab_entry **entries = strtable_to_array(&c->symtab, &symcount); write_h(entries, symcount, h_filename, cident, h_file); + write_const_h(entries, symcount, h_filename, h_const_file); free(entries); if(cident) { FILE *c_file = fopen(c_filename, "w"); @@ -684,10 +721,11 @@ int main(int argc, char *argv[]) write_message_c(fds, c->fds_msg, cident, h_filename, argc, argv, input_file, c_file); fclose(c_file); } - upb_msg_free(fds_msg); + upb_msg_unref(fds_msg); upb_context_unref(c); - upb_strfree(descriptor); + upb_string_unref(descriptor); fclose(h_file); + fclose(h_const_file); return 0; } -- cgit v1.2.3