From e5debfa1c99757ad08bccd834b9596a4f5e97adb Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 28 Dec 2009 16:41:33 -0800 Subject: More incremental work; ported some of upbc. --- src/upb.c | 4 +-- src/upb.h | 18 +++++------ src/upb_data.h | 99 +++++++++++++++++++++++++++++++++++++-------------------- src/upb_def.c | 4 +-- src/upb_def.h | 2 +- src/upb_parse.c | 4 +-- src/upb_table.h | 2 +- src/upb_text.h | 4 +-- 8 files changed, 83 insertions(+), 54 deletions(-) (limited to 'src') diff --git a/src/upb.c b/src/upb.c index e58d272..2412be3 100644 --- a/src/upb.c +++ b/src/upb.c @@ -32,8 +32,8 @@ struct upb_type_info upb_type_info[] = { TYPE_INFO(SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t) TYPE_INFO(SINT32, UPB_WIRE_TYPE_VARINT, int32_t) TYPE_INFO(SINT64, UPB_WIRE_TYPE_VARINT, int64_t) - TYPE_INFO(STRING, UPB_WIRE_TYPE_DELIMITED, struct upb_string*) - TYPE_INFO(BYTES, UPB_WIRE_TYPE_DELIMITED, struct upb_string*) + TYPE_INFO(STRING, UPB_WIRE_TYPE_DELIMITED, union upb_string*) + TYPE_INFO(BYTES, UPB_WIRE_TYPE_DELIMITED, union upb_string*) }; void upb_seterr(struct upb_status *status, enum upb_status_code code, diff --git a/src/upb.h b/src/upb.h index 237281f..aeef349 100644 --- a/src/upb.h +++ b/src/upb.h @@ -122,9 +122,9 @@ struct upb_tag { /* Polymorphic values of .proto types *****************************************/ -struct upb_string; -struct upb_array; -struct upb_msg; +union upb_string; +union upb_array; +union upb_msg; // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. @@ -136,9 +136,9 @@ union upb_value { uint32_t uint32; uint64_t uint64; bool _bool; - struct upb_string *str; - struct upb_array *arr; - struct upb_msg *msg; + union upb_string *str; + union upb_array *arr; + union upb_msg *msg; }; // A pointer to a .proto value. The owner must have an out-of-band way of @@ -151,9 +151,9 @@ union upb_value_ptr { uint32_t *uint32; uint64_t *uint64; bool *_bool; - struct upb_string **str; - struct upb_array **arr; - struct upb_msg **msg; + union upb_string **str; + union upb_array **arr; + union upb_msg **msg; void *_void; }; diff --git a/src/upb_data.h b/src/upb_data.h index 48b5d9d..ea59ac7 100644 --- a/src/upb_data.h +++ b/src/upb_data.h @@ -8,7 +8,15 @@ * protobufs). * * The members of all structs should be considered private. Access should - * only happen through the provided functions. */ + * only happen through the provided functions. + * + * Unlike Google's protobuf, messages contain *pointers* to strings and arrays + * instead of including them by value. This makes unused strings and arrays + * use less memory, and lets the strings and arrays have multiple possible + * representations (for example, a string could be a slice). It also gives + * us more flexibility wrt refcounting. The cost is that when a field *is* + * being used, the net memory usage is one pointer more than if we had + * included the thing directly. */ #ifndef UPB_DATA_H #define UPB_DATA_H @@ -189,7 +197,7 @@ typedef struct { uint32_t byte_size; } upb_refcounted_string; -typedef union { +typedef union upb_string { upb_norefcount_string norefcount; upb_string_common common; upb_refcounted_string refcounted; @@ -197,12 +205,12 @@ typedef union { // Returns a newly constructed, refcounted string which starts out empty. // Caller owns one ref on it. The returned string will not be frozen. -upb_string *upb_string_new(); +upb_string *upb_string_new(void); // Creates a new string which is a duplicate of the given string. If // refcounted is true, the new string is refcounted, otherwise the caller // has exlusive ownership of it. -INLINE upb_string *upb_strdup(upb_string *s, bool refcounted); +INLINE upb_string *upb_strdup(upb_string *s); // INTERNAL-ONLY: // Frees the given string, alone with any memory the string owned. @@ -213,7 +221,7 @@ void _upb_string_free(upb_string *s); // were incompatible with src's. INLINE upb_string *upb_string_getref(upb_string *s, int ref_flags) { if(_upb_data_incref(&s->common.base, ref_flags)) return s; - return upb_strdup(s, true); + return upb_strdup(s); } // The caller releases a ref on src, which it must previously have owned a ref @@ -227,6 +235,8 @@ INLINE void upb_string_unref(upb_string *s) { // not be frozen otherwise the program will assert-fail or abort(). char *upb_string_getrwbuf(upb_string *s, upb_strlen_t byte_len); +void upb_string_resize(upb_string *s, upb_strlen_t len); + INLINE void upb_string_clear(upb_string *s) { upb_string_getrwbuf(s, 0); } @@ -270,12 +280,20 @@ INLINE void upb_strcpy(upb_string *dest, upb_string *src) { memcpy(upb_string_getrwbuf(dest, src_len), upb_string_getrobuf(src), src_len); } -INLINE upb_string *upb_strdup(upb_string *s, bool refcounted) { - upb_string *copy = upb_string_new(refcounted); +INLINE upb_string *upb_strdup(upb_string *s) { + upb_string *copy = upb_string_new(); upb_strcpy(copy, s); return copy; } +INLINE upb_string *upb_strdupc(const char *src) { + upb_string *copy = upb_string_new(); + upb_strlen_t len = strlen(src); + char *buf = upb_string_getrwbuf(copy, len); + memcpy(buf, src, len); + return copy; +} + // Appends 'append' to 's' in-place, resizing s if necessary. INLINE void upb_strcat(upb_string *s, upb_string *append) { upb_strlen_t s_len = upb_strlen(s); @@ -290,7 +308,7 @@ INLINE void upb_strcat(upb_string *s, upb_string *append) { // the original string data instead of copying it. Both now and in the future, // the caller owns a ref on whatever is returned. INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { - upb_string *slice = upb_string_new(true); + upb_string *slice = upb_string_new(); len = UPB_MIN((upb_strlen_t)len, upb_strlen(s) - (upb_strlen_t)offset); memcpy(upb_string_getrwbuf(slice, len), upb_string_getrobuf(s) + offset, len); return slice; @@ -304,7 +322,7 @@ upb_string *upb_strreadfile(const char *filename); // must not dynamically allocate this type. typedef upb_string upb_static_string; #define UPB_STRLIT_LEN(str, len) {0 | UPB_DATA_FROZEN, len, str} -#define UPB_STRLIT(str) {{{0 | UPB_DATA_FROZEN}, sizeof(str), str}} +#define UPB_STRLIT(str) {{0 | UPB_DATA_FROZEN, sizeof(str), str}} // Allows using upb_strings in printf, ie: // upb_string str = UPB_STRLIT("Hello, World!\n"); @@ -316,33 +334,50 @@ typedef upb_string upb_static_string; typedef uint32_t upb_arraylen_t; -// The members of this struct are private. Access should only be through the -// associated functions. +// The comments attached to upb_string above also apply here. +typedef struct { + upb_data base; + upb_arraylen_t len; + union upb_value_ptr elements; +} upb_array_common; + +typedef struct { + uint32_t size_and_flags; + upb_arraylen_t len; + union upb_value_ptr elements; +} upb_norefcount_array; + typedef struct { - unsigned int size:29; // How many bytes we own, 0 if we don't own. - bool is_heap_allocated:1; - bool is_frozen:1; - bool has_refcount:1; + upb_data base; upb_arraylen_t len; - union upb_value_ptr *elements; + union upb_value_ptr elements; + upb_arraylen_t size; +} upb_refcounted_array; + +typedef union upb_array { + upb_norefcount_array norefcount; + upb_array_common common; + upb_refcounted_array refcounted; } upb_array; +// This type can be used either to perform read-only access on an array, +// or to statically define a non-reference-counted static array. #define UPB_DEFINE_MSG_ARRAY(type) \ -typedef struct type ## array { \ - unsigned int size:29; \ - bool is_heap_allocated:1; \ - bool is_frozen:1;\ - bool has_refcount:1;\ +typedef struct type ## _array { \ + upb_data base; \ upb_arraylen_t len;\ type **elements; \ -} type ## array; \ +} type ## _array; \ -#define UPB_MSG_ARRAY(type) struct type ## array +#define UPB_MSG_ARRAY(type) struct type ## _array -// Constructs a newly-allocated array, which starts out empty. Caller owns one -// ref on it. +// Constructs a newly-allocated, reference-counted array which starts out +// empty. Caller owns one ref on it. upb_array *upb_array_new(void); +union upb_value upb_array_get(upb_array *a, struct upb_fielddef *f, int elem); + +#if 0 // Returns an array to which caller owns a ref, and contains the same contents // as src. The returned value may be a copy of src, if the requested flags // were incompatible with src's. @@ -358,12 +393,6 @@ INLINE void upb_array_unref(upb_array *a, struct upb_fielddef *f); INLINE void upb_array_set(upb_array *a, struct upb_fielddef *f, int elem, union upb_value val); -// Note that the caller does *not* own a ref on the returned value. -INLINE union upb_value upb_array_get(upb_array *a, struct upb_fielddef *f, - int elem); -INLINE union upb_value upb_array_getmutable(upb_array *a, - struct upb_fielddef *f, int elem, - union upb_value val); // Note that array_append will attempt to take a reference on the given value, // so to avoid a copy use append_default and get. @@ -371,15 +400,16 @@ INLINE void upb_array_append(upb_array *a, struct upb_fielddef *f, union upb_value val); INLINE void upb_array_append_default(upb_array *a, struct upb_fielddef *f, union upb_value val); +#endif // Returns the current number of elements in the array. INLINE size_t upb_array_len(upb_array *a) { - return a->len; + return a->common.len; } /* upb_msg ********************************************************************/ -typedef struct { +typedef union upb_msg { uint8_t data[1]; } upb_msg; @@ -390,13 +420,12 @@ void upb_msg_unref(upb_msg *msg, struct upb_msgdef *md); // Tests whether the given field is explicitly set, or whether it will return // a default. -bool upb_msg_isset(upb_msg *msg, struct upb_fielddef *f); +bool upb_msg_has(upb_msg *msg, struct upb_fielddef *f); // Returns the current value if set, or the default value if not set, of the // specified field. The mutable version will first replace the value with a // mutable copy if it is not already mutable. union upb_value upb_msg_get(upb_msg *msg, struct upb_fielddef *f); -union upb_value upb_msg_getmutable(upb_msg *msg, struct upb_fielddef *f); // Sets the given field to the given value. The msg will take a ref on val, // and will drop a ref on whatever was there before. diff --git a/src/upb_def.c b/src/upb_def.c index 5f52f31..be5c181 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -437,7 +437,7 @@ static struct symtab_ent *resolve(struct upb_strtable *t, return e; } else { // Remove components from base until we find an entry or run out. - upb_string *sym_str = upb_string_new(true); + upb_string *sym_str = upb_string_new(); int baselen = upb_strlen(base); while(1) { // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol @@ -461,7 +461,7 @@ static struct symtab_ent *resolve(struct upb_strtable *t, * join("", "Baz") -> "Baz" * Caller owns a ref on the returned string. */ static upb_string *join(upb_string *base, upb_string *name) { - upb_string *joined = upb_strdup(base, true); + upb_string *joined = upb_strdup(base); upb_strlen_t len = upb_strlen(joined); if(len > 0) { upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; diff --git a/src/upb_def.h b/src/upb_def.h index e8c7548..b013c40 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -173,7 +173,7 @@ struct google_protobuf_DescriptorProto; struct upb_msgdef { struct upb_def base; upb_atomic_refcount_t cycle_refcount; - struct upb_msg *default_msg; // Message with all default values set. + upb_msg *default_msg; // Message with all default values set. size_t size; upb_field_count_t num_fields; uint32_t set_flags_bytes; diff --git a/src/upb_parse.c b/src/upb_parse.c index 2948022..b9cad5a 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -272,8 +272,8 @@ static uint8_t *skip_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt, } } -uint8_t *upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, - union upb_value_ptr v, struct upb_status *status) +static uint8_t *upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, + union upb_value_ptr v, struct upb_status *status) { #define CASE(t, member_name) \ case UPB_TYPE(t): return upb_get_ ## t(buf, end, v.member_name, status); diff --git a/src/upb_table.h b/src/upb_table.h index 9a49b8b..31ff7d2 100644 --- a/src/upb_table.h +++ b/src/upb_table.h @@ -39,7 +39,7 @@ struct upb_inttable_entry { // performance by letting us compare hashes before comparing lengths or the // strings themselves. struct upb_strtable_entry { - upb_string *key; // We own a frozen ref. + upb_string *key; // We own a frozen ref. uint32_t next; // Internal chaining. }; diff --git a/src/upb_text.h b/src/upb_text.h index 32d7278..6b2f4eb 100644 --- a/src/upb_text.h +++ b/src/upb_text.h @@ -23,10 +23,10 @@ INLINE void upb_text_printer_init(struct upb_text_printer *p, bool single_line) p->single_line = single_line; } void upb_text_printval(upb_field_type_t type, union upb_value p, FILE *file); -void upb_text_printfield(struct upb_text_printer *p, struct upb_string *name, +void upb_text_printfield(struct upb_text_printer *p, upb_string *name, upb_field_type_t valtype, union upb_value val, FILE *stream); -void upb_text_push(struct upb_text_printer *p, struct upb_string *submsg_type, +void upb_text_push(struct upb_text_printer *p, upb_string *submsg_type, FILE *stream); void upb_text_pop(struct upb_text_printer *p, FILE *stream); -- cgit v1.2.3