summaryrefslogtreecommitdiff
path: root/pbstruct.h
diff options
context:
space:
mode:
Diffstat (limited to 'pbstruct.h')
-rw-r--r--pbstruct.h143
1 files changed, 93 insertions, 50 deletions
diff --git a/pbstruct.h b/pbstruct.h
index be6222d..227d7bb 100644
--- a/pbstruct.h
+++ b/pbstruct.h
@@ -4,30 +4,71 @@
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* pbstruct is an efficient, compact, self-describing format for storing
- * pb messages in-memory. It can be used when parsing a protobuf into
- * a structure is desired, but both the parsing and emitting modules can
- * be used without ever using pbstruct.
+ * protobufs in-memory. In many ways it is a dynamic implementation of C
+ * structures, which nonetheless is efficient because it does offset-based (as
+ * opposed to name-based) access. It can be used when representing a message in
+ * memory is desired, but both the parsing and emitting modules can be used
+ * without ever using pbstruct.
+ *
+ * There is one fundamental choice pbstruct is forced to make: whether to
+ * include substructures by value or by reference. Supporting both doesn't
+ * seem worth the complexity. The tradeoffs are:
+ *
+ * +value:
+ * - fewer malloc/free calls
+ * - better locality
+ * - one less deref to access data
+ * - simpler to delete/GC (since the data can't nest arbitrarily)
+ * +ref:
+ * - you pay only sizeof(void*) for unused fields, not sizeof(struct)
+ * - you can keep substructs around while deleting the enclosing struct
+ * - copies of the surrounding struct are cheaper (including array reallocs)
+ *
+ * My view of these tradeoffs is that while by value semantics could yield
+ * small performance gains in its best case, and is somewhat simpler, it has
+ * much worse degenerate cases. For example, consider a protobuf like that is
+ * only meant to contain one of several possible large structures:
+ *
+ * message {
+ * optional LargeMessage1 m1 = 1;
+ * optional LargeMessage2 m2 = 2;
+ * optional LargeMessage3 m3 = 2;
+ * // ...
+ * }
+ *
+ * This proto will take N times more memory if structures are by value than if
+ * they are by reference. To avoid such bad cases, submessages are by
+ * reference.
*/
+#ifndef PBSTRUCT_H_
+#define PBSTRUCT_H_
+
#include <stdbool.h>
#include <stdint.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef enum pbstruct_type {
- PBSTRUCT_TYPE_DOUBLE = 1,
- PBSTRUCT_TYPE_FLOAT = 2,
- PBSTRUCT_TYPE_INT32 = 3,
- PBSTRUCT_TYPE_UINT32 = 4,
- PBSTRUCT_TYPE_INT64 = 5,
- PBSTRUCT_TYPE_UINT64 = 6,
- PBSTRUCT_TYPE_BOOL = 7,
-
- /* For these types, the main struct contains a pointer to the real data. */
- PBSTRUCT_TYPE_BYTES = 8,
- PBSTRUCT_TYPE_STRING = 9,
- PBSTRUCT_TYPE_SUBSTRUCT = 10,
-
- /* The array types are just like the primitive types, except the main struct
- * contains a pointer to an array of the primitive type. */
+ PBSTRUCT_TYPE_DOUBLE = 0,
+ PBSTRUCT_TYPE_FLOAT = 1,
+ PBSTRUCT_TYPE_INT32 = 2,
+ PBSTRUCT_TYPE_UINT32 = 3,
+ PBSTRUCT_TYPE_INT64 = 4,
+ PBSTRUCT_TYPE_UINT64 = 5,
+ PBSTRUCT_TYPE_BOOL = 6,
+ /* Main struct contains a pointer to a pbstruct. */
+ PBSTRUCT_TYPE_SUBSTRUCT = 7,
+
+ /* Main struct contains a pointer to a pbstruct_delimited. */
+ PBSTRUCT_DELIMITED = 8, /* Not itself a real type. */
+ PBSTRUCT_TYPE_BYTES = PBSTRUCT_DELIMITED | 1,
+ PBSTRUCT_TYPE_STRING = PBSTRUCT_DELIMITED | 2,
+
+ /* The main struct contains a pointer to a pbstruct_array, for which each
+ * element is identical to the non-array form. */
PBSTRUCT_ARRAY = 16, /* Not itself a real type. */
PBSTRUCT_TYPE_DOUBLE_ARRAY = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_DOUBLE,
PBSTRUCT_TYPE_FLOAT_ARRAY = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_FLOAT,
@@ -50,51 +91,53 @@ struct pbstruct_field {
};
struct pbstruct_delimited {
- size_t len;
- char data[];
+ size_t len; /* Measured in bytes. */
+ uint8_t data[];
+};
+
+struct pbstruct_array {
+ size_t len; /* Measured in elements. */
+ uint8_t data[];
};
struct pbstruct_definition {
char *name;
size_t struct_size;
int num_fields;
- int num_required_fields;
+ int num_required_fields; /* Required fields have the lowest set bytemasks. */
struct pbstruct_field fields[];
};
struct pbstruct {
struct pbstruct_definition *definition;
- uint8_t data[];
+ uint8_t data[]; /* layout is described by definition. */
};
-struct pbstruct* pbstruct_new(struct pbstruct_definition *definition);
-/* Deletes any sub-structs also. */
-struct pbstruct* pbstruct_delete(struct pbstruct_definition *definition);
-
-inline bool pbstruct_is_set(struct pbstruct *s, struct pbstruct_field *f) {
- return s->data[f->isset_byte_offset] & f->isset_byte_mask;
-}
+/* Initializes everything to unset. */
+void pbstruct_init(struct pbstruct *s, struct pbstruct_definition *definition);
+bool pbstruct_is_set(struct pbstruct *s, struct pbstruct_field *f);
+/* Doesn't check whether the field is holding allocated memory, and will leak
+ * the memory if it is. */
+void pbstruct_unset(struct pbstruct *s, struct pbstruct_field *f);
+void pbstruct_set(struct pbstruct *s, struct pbstruct_field *f);
/* These do no existence checks or type checks. */
-#define DEFINE_GETTERS(ctype, name) \
- inline ctype pbstruct_get_ ## name(struct pbstruct *s, struct pbstruct_field *f) { \
- /* TODO: make sure the compiler knows this is an aligned access. */ \
- return *(ctype*)(s->data + f->byte_offset); \
- } \
- inline ctype *pbstruct_get_ ## name ## _array(struct pbstruct *s, \
- struct pbstruct_field *f) { \
- /* TODO: make sure the compiler knows this is an aligned access. */ \
- return *(ctype**)(s->data + f->byte_offset); \
- }
-
-DEFINE_GETTERS(double, double)
-DEFINE_GETTERS(float, float)
-DEFINE_GETTERS(int32_t, int32)
-DEFINE_GETTERS(int64_t, int64)
-DEFINE_GETTERS(uint32_t, uint32)
-DEFINE_GETTERS(uint64_t, uint64)
-DEFINE_GETTERS(bool, bool)
-DEFINE_GETTERS(struct pbstruct_delimited*, bytes)
-DEFINE_GETTERS(struct pbstruct_delimited*, string)
-DEFINE_GETTERS(struct pbstruct*, substruct)
+#define DECLARE_GETTERS(ctype, name) \
+ ctype *pbstruct_get_ ## name(struct pbstruct *s, struct pbstruct_field *f);
+
+DECLARE_GETTERS(double, double)
+DECLARE_GETTERS(float, float)
+DECLARE_GETTERS(int32_t, int32)
+DECLARE_GETTERS(int64_t, int64)
+DECLARE_GETTERS(uint32_t, uint32)
+DECLARE_GETTERS(uint64_t, uint64)
+DECLARE_GETTERS(bool, bool)
+DECLARE_GETTERS(struct pbstruct_delimited*, bytes)
+DECLARE_GETTERS(struct pbstruct_delimited*, string)
+DECLARE_GETTERS(struct pbstruct*, substruct)
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+#endif /* PBSTRUCT_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback