From f49f7f94c10de1ab39bd045e4ff6bb281541b51f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 22 Dec 2009 17:08:14 -0800 Subject: More progress, upb_data and upb_def compile (but are incomplete). --- src/upb_data.h | 149 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 109 insertions(+), 40 deletions(-) (limited to 'src/upb_data.h') diff --git a/src/upb_data.h b/src/upb_data.h index 01661ff..7b0a97b 100644 --- a/src/upb_data.h +++ b/src/upb_data.h @@ -5,24 +5,31 @@ * * This file defines the in-memory format for messages, arrays, and strings * (which are the three dynamically-allocated structures that make up all - * protobufs). */ + * protobufs). + * + * The members of all structs should be considered private. Access should + * only happen through the provided functions. */ #ifndef UPB_DATA_H #define UPB_DATA_H +#include #include #include "upb.h" -#include "upb_def.h" +#include "upb_atomic.h" + +struct upb_msgdef; +struct upb_fielddef; /* upb_data *******************************************************************/ // The "base class" of strings, arrays, and messages. Contains a few flags and // possibly a reference count. None of the functions for upb_data are public, // but some of the constants are. -typedef upb_atomic_t upb_data; +typedef upb_atomic_refcount_t upb_data; // The flags in upb_data. -enum upb_data_flag { +typedef enum { // Set if the object itself was allocated with malloc() and should be freed // with free(). This flag would be false if the object was allocated on the // stack or is data from the static segment of an object file. Note that this @@ -40,48 +47,56 @@ enum upb_data_flag { // Set if the object has an embedded refcount. UPB_DATA_REFCOUNTED = (1<<2) -}; +} upb_data_flag; #define REFCOUNT_MASK 0xFFFFFFF8 #define REFCOUNT_SHIFT 3 #define REFCOUNT_ONE (1<v & flag; } -INLINE uint32_t _upb_data_read_refcount(upb_data *d) { +// INTERNAL-ONLY +INLINE void upb_data_setflag(upb_data *d, upb_data_flag flag) { + d->v |= flag; +} + +INLINE uint32_t upb_data_getrefcount(upb_data *d) { + int data; if(upb_data_hasflag(d, UPB_DATA_FROZEN)) data = upb_atomic_read(d); else - data = data->val; + data = d->v; return (data & REFCOUNT_MASK) >> REFCOUNT_SHIFT; } // Returns true if the given data has only one owner. -INLINE bool _upb_data_only(upb_data *data) { +INLINE bool upb_data_only(upb_data *data) { return !upb_data_hasflag(data, UPB_DATA_REFCOUNTED) || - upb_data_read_refcount(data) == 1; + upb_data_getrefcount(data) == 1; } // Specifies the type of ref that is requested based on the kind of access the // caller needs to the object. -enum upb_ref_flags { +typedef enum { // Use when the client plans to perform read-only access to the object, and // only in one thread at a time. This imposes the least requirements on the // object; it can be either frozen or not. As a result, requesting a // reference of this type never performs a copy unless the object has no // refcount. + // + // A ref of this type can always be explicitly converted to frozen or + // unfrozen later. UPB_REF_THREADUNSAFE_READONLY = 0, // Use when the client plans to perform read-only access, but from multiple // threads concurrently. This will force the object to eagerly perform any // parsing that may have been lazily deferred, and will force a copy if the - // object is not current frozen and there are any other referents (who expect - // the object to stay writable). + // object is not current frozen. // // Asking for a reference of this type is equivalent to: // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); @@ -92,23 +107,23 @@ enum upb_ref_flags { // Use when the client plans to perform read/write access. As a result, the // reference will not be thread-safe for concurrent reading *or* writing; the // object must be externally synchronized if it is being accessed from more - // than one thread. This will force a copy if the object is not currently - // frozen and there are any other referents (who expect the object to stay - // safe for unsynchronized reads). + // than one thread. This will force a copy if the object is currently frozen. // // Asking for a reference of this type is equivalent to: // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); // x = thaw(x); // ...except it is more efficient. UPB_REF_MUTABLE = 2 -} +} upb_reftype; +// INTERNAL-ONLY FUNCTION: // Attempts to increment the reference on d with the given type of ref. If // this is not possible, returns false. -INLINE bool upb_data_incref(upb_data *d, upb_reftype reftype) { +INLINE bool _upb_data_incref(upb_data *d, upb_reftype reftype) { if((reftype == UPB_REF_FROZEN && !upb_data_hasflag(d, UPB_DATA_FROZEN)) || (reftype == UPB_REF_MUTABLE && upb_data_hasflag(d, UPB_DATA_FROZEN)) || - !upb_data_hasflag(d, UPB_DATA_REFCOUNTED)) { + (upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED) && + !upb_data_hasflag(d, UPB_DATA_REFCOUNTED))) { return false; } // Increment the ref. Only need to use atomic ops if the ref is frozen. @@ -117,25 +132,40 @@ INLINE bool upb_data_incref(upb_data *d, upb_reftype reftype) { return true; } -INLINE bool upb_data_decref(upb_data *d) { - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) { - int32_t old_val = upb_atomic_fetch_and_add(d, -REFCOUNT_ONE); - return old_val & REFCOUNT_MASK == REFCOUNT_ONE; +// INTERNAL-ONLY FUNCTION: +// Releases a reference on d, returning true if the object should be deleted. +INLINE bool _upb_data_unref(upb_data *d) { + if(upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED)) { + // A heap-allocated object without a refcount should never be decref'd. + // Its owner owns it exlusively and should free it directly. + assert(upb_data_hasflag(d, UPB_DATA_REFCOUNTED)); + if(upb_data_hasflag(d, UPB_DATA_FROZEN)) { + int32_t old_val = upb_atomic_fetch_and_add(d, -REFCOUNT_ONE); + return (old_val & REFCOUNT_MASK) == REFCOUNT_ONE; + } else { + d->v -= REFCOUNT_ONE; + return (d->v & REFCOUNT_MASK) == 0; + } } else { - *d -= REFCOUNT_ONE; - return *d & REFCOUNT_MASK == 0; + // Non heap-allocated data never should be deleted. + return false; } } -typedef uint8_t upb_flags_t; struct upb_mmhead {}; /* upb_string *****************************************************************/ typedef uint32_t upb_strlen_t; -// The members of this struct are private. Access should only be through the -// associated functions. +// We have several different representations for string, depending on whether +// it has a refcount (and likely in the future, depending on whether it is a +// slice of another string). We could just have one representation with +// members that are sometimes unused, but this is wasteful in memory. The +// flags that are always part of the first word tell us which representation +// to use. +// +// upb_string_common is the members that are common to all representations. typedef struct { upb_data base; upb_strlen_t byte_len; @@ -144,12 +174,14 @@ typedef struct { char *ptr; } upb_string_common; +// Used for a string without a refcount. typedef struct { uint32_t byte_size_and_flags; upb_strlen_t byte_len; char *ptr; } upb_norefcount_string; +// Used for a string with a refcount. typedef struct { upb_data base; upb_strlen_t byte_len; @@ -159,28 +191,35 @@ typedef struct { typedef union { upb_string_common common; - // Should only be accessed if flags indicate the string has *no* refcount. upb_norefcount_string norefcount; - // Should only be accessed if flags indicate the string *has* a refcount. upb_refcounted_string refcounted; } upb_string; -// Returns a newly constructed string, which starts out empty. Caller owns one -// ref on it. -upb_string *upb_string_new(void); +// Returns a newly constructed, refcounted string which starts out empty. +// Caller owns one ref on it. The returned string will not be frozen. +upb_string *upb_string_new(); + +// Creates a new string which is a duplicate of the given string. If +// refcounted is true, the new string is refcounted, otherwise the caller +// has exlusive ownership of it. +INLINE upb_string *upb_strdup(upb_string *s, bool refcounted); + +// INTERNAL-ONLY: +// Frees the given string, alone with any memory the string owned. +void _upb_string_free(upb_string *s); // Returns a string to which caller owns a ref, and contains the same contents // as src. The returned value may be a copy of src, if the requested flags // were incompatible with src's. -INLINE upb_string *upb_string_getref(upb_string *s, upb_flags_t ref_flags) { - if(upb_data_incref(&s->common.base, ref_flags)) return s; - return upb_strdup(s); +INLINE upb_string *upb_string_getref(upb_string *s, int ref_flags) { + if(_upb_data_incref(&s->common.base, ref_flags)) return s; + return upb_strdup(s, true); } // The caller releases a ref on src, which it must previously have owned a ref // on. INLINE void upb_string_unref(upb_string *s) { - if(upb_data_unref(&s->common.base)) _upb_string_free(s); + if(_upb_data_unref(&s->common.base)) _upb_string_free(s); } // Returns a buffer to which the caller may write. The string is resized to @@ -188,6 +227,10 @@ INLINE void upb_string_unref(upb_string *s) { // not be frozen otherwise the program will assert-fail or abort(). char *upb_string_getrwbuf(upb_string *s, upb_strlen_t byte_len); +INLINE void upb_string_clear(upb_string *s) { + upb_string_getrwbuf(s, 0); +} + // Returns a buffer that the caller may use to read the current contents of // the string. The number of bytes available is upb_strlen(s). INLINE const char *upb_string_getrobuf(upb_string *s) { @@ -227,6 +270,32 @@ INLINE void upb_strcpy(upb_string *dest, upb_string *src) { memcpy(upb_string_getrwbuf(dest, src_len), upb_string_getrobuf(src), src_len); } +INLINE upb_string *upb_strdup(upb_string *s, bool refcounted) { + upb_string *copy = upb_string_new(refcounted); + upb_strcpy(copy, s); + return copy; +} + +// Appends 'append' to 's' in-place, resizing s if necessary. +INLINE void upb_strcat(upb_string *s, upb_string *append) { + upb_strlen_t s_len = upb_strlen(s); + upb_strlen_t append_len = upb_strlen(append); + upb_strlen_t newlen = s_len + append_len; + memcpy(upb_string_getrwbuf(s, newlen) + s_len, + upb_string_getrobuf(append), append_len); +} + +// Returns a string that is a substring of the given string. Currently this +// returns a copy, but in the future this may return an object that references +// the original string data instead of copying it. Both now and in the future, +// the caller owns a ref on whatever is returned. +INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { + upb_string *slice = upb_string_new(true); + len = UPB_MIN((upb_strlen_t)len, upb_strlen(s) - (upb_strlen_t)offset); + memcpy(upb_string_getrwbuf(slice, len), upb_string_getrobuf(s) + offset, len); + return slice; +} + // Reads an entire file into a newly-allocated string (caller owns one ref). upb_string *upb_strreadfile(const char *filename); @@ -234,12 +303,12 @@ upb_string *upb_strreadfile(const char *filename); // Initialize with the given macro, which must resolve to a const char*. You // must not dynamically allocate this type. typedef upb_string upb_static_string; -#define UPB_STRLIT(str) {sizeof(str), false, true, false, 0, str} +#define UPB_STRLIT(str) {{{0 | UPB_DATA_FROZEN}, sizeof(str), str}} // Allows using upb_strings in printf, ie: // upb_string str = UPB_STRLIT("Hello, World!\n"); // printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) (str)->byte_len, (str)->ptr +#define UPB_STRARG(str) (str)->common.byte_len, (str)->common.ptr #define UPB_STRFMT "%.*s" /* upb_array ******************************************************************/ @@ -276,7 +345,7 @@ upb_array *upb_array_new(void); // Returns an array to which caller owns a ref, and contains the same contents // as src. The returned value may be a copy of src, if the requested flags // were incompatible with src's. -INLINE upb_array *upb_array_getref(upb_array *src, upb_flags_t flags); +INLINE upb_array *upb_array_getref(upb_array *src, int ref_flags); // The caller releases a ref on the given array, which it must previously have // owned a ref on. -- cgit v1.2.3