diff options
Diffstat (limited to 'upb')
45 files changed, 7927 insertions, 3507 deletions
diff --git a/upb/bytestream.c b/upb/bytestream.c index a242df4..8473f33 100644 --- a/upb/bytestream.c +++ b/upb/bytestream.c @@ -7,11 +7,13 @@ #include "upb/bytestream.h" -#include <stddef.h> #include <stdlib.h> #include <string.h> -char *upb_byteregion_strdup(const struct _upb_byteregion *r) { + +/* upb_byteregion *************************************************************/ + +char *upb_byteregion_strdup(const upb_byteregion *r) { char *ret = malloc(upb_byteregion_len(r) + 1); upb_byteregion_copyall(r, ret); ret[upb_byteregion_len(r)] = '\0'; diff --git a/upb/bytestream.h b/upb/bytestream.h index bdfcd73..41f767a 100644 --- a/upb/bytestream.h +++ b/upb/bytestream.h @@ -73,16 +73,18 @@ #ifndef UPB_BYTESTREAM_H #define UPB_BYTESTREAM_H -#include <errno.h> -#include <stdarg.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> #include "upb.h" #ifdef __cplusplus +namespace upb { +class ByteRegion; +class StringSource; +} // namespace upb +typedef upb::StringSource upb_stringsrc; extern "C" { +#else +struct upb_stringsrc; +typedef struct upb_stringsrc upb_stringsrc; #endif typedef enum { @@ -185,22 +187,91 @@ INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs, #define UPB_NONDELIMITED (0xffffffffffffffffULL) -typedef struct _upb_byteregion { +#ifdef __cplusplus +} // extern "C" + +class upb::ByteRegion { + public: + static const uint64_t kNondelimited = UPB_NONDELIMITED; + typedef upb_bytesuccess_t ByteSuccess; + + // Accessors for the regions bounds -- the meaning of these is described in + // the diagram above. + uint64_t start_ofs() const; + uint64_t discard_ofs() const; + uint64_t fetch_ofs() const; + uint64_t end_ofs() const; + + // Returns how many bytes are fetched and available for reading starting from + // offset "offset". + uint64_t BytesAvailable(uint64_t offset) const; + + // Returns the total number of bytes remaining after offset "offset", or + // kNondelimited if the byteregion is non-delimited. + uint64_t BytesRemaining(uint64_t offset) const; + + uint64_t Length() const; + + // Sets the value of this byteregion to be a subset of the given byteregion's + // data. The caller is responsible for releasing this region before the src + // region is released (unless the region is first pinned, if pinning support + // is added. see below). + void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len); + void Release(); + + // Attempts to fetch more data, extending the fetched range of this + // byteregion. Returns true if the fetched region was extended by at least + // one byte, false on EOF or error (see *s for details). + ByteSuccess Fetch(); + + // Fetches all remaining data, returning false if the operation failed (see + // *s for details). May only be used on delimited byteregions. + ByteSuccess FetchAll(); + + // Discards bytes from the byteregion up until ofs (which must be greater or + // equal to discard_ofs()). It is valid to discard bytes that have not been + // fetched (such bytes will never be fetched) but it is an error to discard + // past the end of a delimited byteregion. + void Discard(uint64_t ofs); + + // Copies "len" bytes of data into "dst", starting at ofs. The specified + // region must be available. + void Copy(uint64_t ofs, size_t len, char *dst) const; + + // Copies all bytes from the byteregion into dst. Requires that the entire + // byteregion is fetched and that none has been discarded. + void CopyAll(char *dst) const; + + // Returns a pointer to the internal buffer for the byteregion starting at + // offset "ofs." Stores the number of bytes available in this buffer in *len. + // The returned buffer is invalidated when the byteregion is reset or + // released, or when the bytes are discarded. If the byteregion is not + // currently pinned, the pointer is only valid for the lifetime of the parent + // byteregion. + const char *GetPtr(uint64_t ofs, size_t *len) const; + + // Copies the contents of the byteregion into a newly-allocated, + // NULL-terminated string. Requires that the byteregion is fully fetched. + char *StrDup() const; + + template <typename T> void AssignToString(T* str); + +#else +struct upb_byteregion { +#endif uint64_t start; uint64_t discard; uint64_t fetch; uint64_t end; // UPB_NONDELIMITED if nondelimited. upb_bytesrc *bytesrc; bool toplevel; // If true, discards hit the underlying bytesrc. -} upb_byteregion; +}; -// Initializes a byteregion. Its initial value will be empty. No methods may -// be called on an empty byteregion except upb_byteregion_reset(). -void upb_byteregion_init(upb_byteregion *r); -void upb_byteregion_uninit(upb_byteregion *r); +#ifdef __cplusplus +extern "C" { +#endif -// Accessors for the regions bounds -- the meaning of these is described in the -// diagram above. +// Native C API. INLINE uint64_t upb_byteregion_startofs(const upb_byteregion *r) { return r->start; } @@ -213,17 +284,11 @@ INLINE uint64_t upb_byteregion_fetchofs(const upb_byteregion *r) { INLINE uint64_t upb_byteregion_endofs(const upb_byteregion *r) { return r->end; } - -// Returns how many bytes are fetched and available for reading starting -// from offset "o". INLINE uint64_t upb_byteregion_available(const upb_byteregion *r, uint64_t o) { assert(o >= upb_byteregion_discardofs(r)); assert(o <= r->fetch); // Could relax this. return r->fetch - o; } - -// Returns the total number of bytes remaining after offset "o", or -// UPB_NONDELIMITED if the byteregion is non-delimited. INLINE uint64_t upb_byteregion_remaining(const upb_byteregion *r, uint64_t o) { return r->end == UPB_NONDELIMITED ? UPB_NONDELIMITED : r->end - o; } @@ -231,22 +296,10 @@ INLINE uint64_t upb_byteregion_remaining(const upb_byteregion *r, uint64_t o) { INLINE uint64_t upb_byteregion_len(const upb_byteregion *r) { return upb_byteregion_remaining(r, r->start); } - -// Sets the value of this byteregion to be a subset of the given byteregion's -// data. The caller is responsible for releasing this region before the src -// region is released (unless the region is first pinned, if pinning support is -// added. see below). void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src, uint64_t ofs, uint64_t len); void upb_byteregion_release(upb_byteregion *r); - -// Attempts to fetch more data, extending the fetched range of this byteregion. -// Returns true if the fetched region was extended by at least one byte, false -// on EOF or error (see *s for details). upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r); - -// Fetches all remaining data for "r", returning the success of the operation -// May only be used on delimited byteregions. INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) { assert(upb_byteregion_len(r) != UPB_NONDELIMITED); upb_bytesuccess_t ret; @@ -255,11 +308,6 @@ INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) { } while (ret == UPB_BYTE_OK); return ret == UPB_BYTE_EOF ? UPB_BYTE_OK : ret; } - -// Discards bytes from the byteregion up until ofs (which must be greater or -// equal to upb_byteregion_discardofs()). It is valid to discard bytes that -// have not been fetched (such bytes will never be fetched) but it is an error -// to discard past the end of a delimited byteregion. INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) { assert(ofs >= upb_byteregion_discardofs(r)); assert(ofs <= upb_byteregion_endofs(r)); @@ -267,28 +315,16 @@ INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) { if (ofs > r->fetch) r->fetch = ofs; if (r->toplevel) upb_bytesrc_discard(r->bytesrc, ofs); } - -// Copies "len" bytes of data into "dst", starting at ofs. The specified -// region must be available. INLINE void upb_byteregion_copy(const upb_byteregion *r, uint64_t ofs, size_t len, char *dst) { assert(ofs >= upb_byteregion_discardofs(r)); assert(len <= upb_byteregion_available(r, ofs)); upb_bytesrc_copy(r->bytesrc, ofs, len, dst); } - -// Copies all bytes from the byteregion into dst. Requires that the entire -// byteregion is fetched and that none has been discarded. INLINE void upb_byteregion_copyall(const upb_byteregion *r, char *dst) { assert(r->start == r->discard && r->end == r->fetch); upb_byteregion_copy(r, r->start, upb_byteregion_len(r), dst); } - -// Returns a pointer to the internal buffer for the byteregion starting at -// offset "ofs." Stores the number of bytes available in this buffer in *len. -// The returned buffer is invalidated when the byteregion is reset or released, -// or when the bytes are discarded. If the byteregion is not currently pinned, -// the pointer is only valid for the lifetime of the parent byteregion. INLINE const char *upb_byteregion_getptr(const upb_byteregion *r, uint64_t ofs, size_t *len) { assert(ofs >= upb_byteregion_discardofs(r)); @@ -354,9 +390,7 @@ INLINE int upb_bytesink_write(upb_bytesink *s, const void *buf, int len) { return s->vtbl->write(s, buf, len); } -INLINE int upb_bytesink_writestr(upb_bytesink *sink, const char *str) { - return upb_bytesink_write(sink, str, strlen(str)); -} +#define upb_bytesink_writestr(s, buf) upb_bytesink_write(s, buf, strlen(buf)) // Returns the number of bytes written or -1 on error. INLINE int upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...) { @@ -413,27 +447,47 @@ INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) { // bytesrc/bytesink for a simple contiguous string. -typedef struct { +#ifdef __cplusplus +} // extern "C" + +class upb::StringSource { + public: + StringSource(); + template <typename T> explicit StringSource(const T& str); + StringSource(const char *data, size_t len); + ~StringSource(); + + // Resets the stringsrc to a state where it will vend the given string. The + // string data must be valid until the stringsrc is reset again or destroyed. + void Reset(const char* data, size_t len); + template <typename T> void Reset(const T& str); + + // Returns the top-level upb_byteregion* for this stringsrc. Invalidated + // when the stringsrc is reset. + ByteRegion* AllBytes(); + + upb_bytesrc* ByteSource(); + +#else +struct upb_stringsrc { +#endif upb_bytesrc bytesrc; const char *str; size_t len; upb_byteregion byteregion; -} upb_stringsrc; +}; -// Create/free a stringsrc. +#ifdef __cplusplus +extern "C" { +#endif + +// Native C API. void upb_stringsrc_init(upb_stringsrc *s); void upb_stringsrc_uninit(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// string data must be valid until the stringsrc is reset again or destroyed. void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len); - INLINE upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { return &s->bytesrc; } - -// Returns the top-level upb_byteregion* for this stringsrc. Invalidated when -// the stringsrc is reset. INLINE upb_byteregion *upb_stringsrc_allbytes(upb_stringsrc *s) { return &s->byteregion; } @@ -465,7 +519,111 @@ const char *upb_stringsink_release(upb_stringsink *s, size_t *len); upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s); #ifdef __cplusplus -} /* extern "C" */ +} // extern "C" + +namespace upb { + +inline uint64_t ByteRegion::start_ofs() const { + return upb_byteregion_startofs(this); +} +inline uint64_t ByteRegion::discard_ofs() const { + return upb_byteregion_discardofs(this); +} +inline uint64_t ByteRegion::fetch_ofs() const { + return upb_byteregion_fetchofs(this); +} +inline uint64_t ByteRegion::end_ofs() const { + return upb_byteregion_endofs(this); +} +inline uint64_t ByteRegion::BytesAvailable(uint64_t offset) const { + return upb_byteregion_available(this, offset); +} +inline uint64_t ByteRegion::BytesRemaining(uint64_t offset) const { + return upb_byteregion_remaining(this, offset); +} +inline uint64_t ByteRegion::Length() const { + return upb_byteregion_len(this); +} +inline void ByteRegion::Reset( + const upb_byteregion *src, uint64_t ofs, uint64_t len) { + upb_byteregion_reset(this, src, ofs, len); +} +inline void ByteRegion::Release() { + upb_byteregion_release(this); +} +inline ByteRegion::ByteSuccess ByteRegion::Fetch() { + return upb_byteregion_fetch(this); +} +inline ByteRegion::ByteSuccess ByteRegion::FetchAll() { + return upb_byteregion_fetchall(this); +} +inline void ByteRegion::Discard(uint64_t ofs) { + upb_byteregion_discard(this, ofs); +} +inline void ByteRegion::Copy(uint64_t ofs, size_t len, char *dst) const { + upb_byteregion_copy(this, ofs, len, dst); +} +inline void ByteRegion::CopyAll(char *dst) const { + upb_byteregion_copyall(this, dst); +} +inline const char *ByteRegion::GetPtr(uint64_t ofs, size_t *len) const { + return upb_byteregion_getptr(this, ofs, len); +} +inline char *ByteRegion::StrDup() const { + return upb_byteregion_strdup(this); +} +template <typename T> void ByteRegion::AssignToString(T* str) { + uint64_t ofs = start_ofs(); + size_t len; + const char *ptr = GetPtr(ofs, &len); + // Emperically calling reserve() here is counterproductive and slows down + // benchmarks. If the parsing is happening in a tight loop that is reusing + // the string object, there is probably enough data reserved already and + // the reserve() call is extra overhead. + str->assign(ptr, len); + ofs += len; + while (ofs < end_ofs()) { + ptr = GetPtr(ofs, &len); + str->append(ptr, len); + ofs += len; + } +} + +template <> inline ByteRegion* GetValue<ByteRegion*>(Value v) { + return static_cast<ByteRegion*>(upb_value_getbyteregion(v)); +} + +template <> inline Value MakeValue<ByteRegion*>(ByteRegion* v) { + return upb_value_byteregion(v); +} + +inline StringSource::StringSource() { upb_stringsrc_init(this); } +template <typename T> StringSource::StringSource(const T& str) { + upb_stringsrc_init(this); + Reset(str); +} +inline StringSource::StringSource(const char *data, size_t len) { + upb_stringsrc_init(this); + Reset(data, len); +} +inline StringSource::~StringSource() { + upb_stringsrc_uninit(this); +} +inline void StringSource::Reset(const char* data, size_t len) { + upb_stringsrc_reset(this, data, len); +} +template <typename T> void StringSource::Reset(const T& str) { + upb_stringsrc_reset(this, str.c_str(), str.size()); +} +inline ByteRegion* StringSource::AllBytes() { + return upb_stringsrc_allbytes(this); +} +inline upb_bytesrc* StringSource::ByteSource() { + return upb_stringsrc_bytesrc(this); +} + +} // namespace upb + #endif #endif @@ -5,11 +5,12 @@ * Author: Josh Haberman <jhaberman@gmail.com> */ +#include "upb/def.h" + #include <stdlib.h> -#include <stddef.h> #include <string.h> #include "upb/bytestream.h" -#include "upb/def.h" +#include "upb/handlers.h" // isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. static bool upb_isbetween(char c, char low, char high) { @@ -44,204 +45,175 @@ static bool upb_isident(const char *str, size_t len, bool full) { /* upb_def ********************************************************************/ -static void upb_msgdef_free(upb_msgdef *m); -static void upb_fielddef_free(upb_fielddef *f); -static void upb_enumdef_free(upb_enumdef *e); +upb_deftype_t upb_def_type(const upb_def *d) { return d->type; } -bool upb_def_ismutable(const upb_def *def) { return !def->is_finalized; } -bool upb_def_isfinalized(const upb_def *def) { return def->is_finalized; } +const char *upb_def_fullname(const upb_def *d) { return d->fullname; } bool upb_def_setfullname(upb_def *def, const char *fullname) { - assert(upb_def_ismutable(def)); + assert(!upb_def_isfrozen(def)); if (!upb_isident(fullname, strlen(fullname), true)) return false; - free(def->fullname); - def->fullname = strdup(fullname); + free((void*)def->fullname); + def->fullname = upb_strdup(fullname); return true; } -void upb_def_ref(const upb_def *_def, const void *owner) { - upb_def *def = (upb_def*)_def; - upb_refcount_ref(&def->refcount, owner); -} - -void upb_def_unref(const upb_def *_def, const void *owner) { - upb_def *def = (upb_def*)_def; - if (!def) return; - if (!upb_refcount_unref(&def->refcount, owner)) return; - upb_def *base = def; - // Free all defs in the SCC. - do { - upb_def *next = (upb_def*)def->refcount.next; - switch (def->type) { - case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; - case UPB_DEF_FIELD: upb_fielddef_free(upb_downcast_fielddef(def)); break; - case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; - default: - assert(false); - } - def = next; - } while(def != base); -} - -void upb_def_donateref(const upb_def *_def, const void *from, const void *to) { - upb_def *def = (upb_def*)_def; - upb_refcount_donateref(&def->refcount, from, to); -} - upb_def *upb_def_dup(const upb_def *def, const void *o) { switch (def->type) { case UPB_DEF_MSG: - return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def), o)); + return upb_upcast(upb_msgdef_dup(upb_downcast_msgdef(def), o)); case UPB_DEF_FIELD: - return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef_const(def), o)); + return upb_upcast(upb_fielddef_dup(upb_downcast_fielddef(def), o)); case UPB_DEF_ENUM: - return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def), o)); + return upb_upcast(upb_enumdef_dup(upb_downcast_enumdef(def), o)); default: assert(false); return NULL; } } -static bool upb_def_init(upb_def *def, upb_deftype_t type, const void *owner) { +bool upb_def_isfrozen(const upb_def *def) { + return upb_refcounted_isfrozen(upb_upcast(def)); +} + +void upb_def_ref(const upb_def *def, const void *owner) { + upb_refcounted_ref(upb_upcast(def), owner); +} + +void upb_def_unref(const upb_def *def, const void *owner) { + upb_refcounted_unref(upb_upcast(def), owner); +} + +void upb_def_donateref(const upb_def *def, const void *from, const void *to) { + upb_refcounted_donateref(upb_upcast(def), from, to); +} + +void upb_def_checkref(const upb_def *def, const void *owner) { + upb_refcounted_checkref(upb_upcast(def), owner); +} + +static bool upb_def_init(upb_def *def, upb_deftype_t type, + const struct upb_refcounted_vtbl *vtbl, + const void *owner) { + if (!upb_refcounted_init(upb_upcast(def), vtbl, owner)) return false; def->type = type; - def->is_finalized = false; def->fullname = NULL; - return upb_refcount_init(&def->refcount, owner); + def->came_from_user = false; + return true; } static void upb_def_uninit(upb_def *def) { - upb_refcount_uninit(&def->refcount); - free(def->fullname); + free((void*)def->fullname); } -static void upb_def_getsuccessors(upb_refcount *refcount, void *closure) { - upb_def *def = (upb_def*)refcount; - switch (def->type) { - case UPB_DEF_MSG: { - upb_msgdef *m = upb_downcast_msgdef(def); - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - upb_refcount_visit(refcount, &f->base.refcount, closure); - } - break; - } - case UPB_DEF_FIELD: { - upb_fielddef *f = upb_downcast_fielddef(def); - assert(f->msgdef); - upb_refcount_visit(refcount, &f->msgdef->base.refcount, closure); - upb_def *subdef = f->sub.def; - if (subdef) - upb_refcount_visit(refcount, &subdef->refcount, closure); - break; - } - case UPB_DEF_ENUM: - case UPB_DEF_SERVICE: - case UPB_DEF_ANY: - break; - } +static const char *msgdef_name(const upb_msgdef *m) { + const char *name = upb_def_fullname(upb_upcast(m)); + return name ? name : "(anonymous)"; } -static bool upb_validate_field(const upb_fielddef *f, upb_status *s) { - if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == -1) { +static bool upb_validate_field(upb_fielddef *f, upb_status *s) { + if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { upb_status_seterrliteral(s, "fielddef must have name and number set"); return false; } - if (upb_hassubdef(f)) { + if (upb_fielddef_hassubdef(f)) { if (f->subdef_is_symbolic) { upb_status_seterrf(s, - "field %s has not been resolved", upb_fielddef_name(f)); + "field '%s' has not been resolved", upb_fielddef_name(f)); return false; - } else if (upb_fielddef_subdef(f) == NULL) { + } + + const upb_def *subdef = upb_fielddef_subdef(f); + if (subdef == NULL) { + upb_status_seterrf(s, + "field %s.%s is missing required subdef", + msgdef_name(f->msgdef), upb_fielddef_name(f)); + return false; + } else if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) { upb_status_seterrf(s, - "field is %s missing required subdef", upb_fielddef_name(f)); + "subdef of field %s.%s is not frozen or being frozen", + msgdef_name(f->msgdef), upb_fielddef_name(f)); return false; - } else if (!upb_def_isfinalized(upb_fielddef_subdef(f))) { + } else if (upb_fielddef_default_is_symbolic(f)) { upb_status_seterrf(s, - "field %s subtype is not being finalized", upb_fielddef_name(f)); + "enum field %s.%s has not been resolved", + msgdef_name(f->msgdef), upb_fielddef_name(f)); return false; } } return true; } -bool upb_finalize(upb_def *const*defs, int n, upb_status *s) { - if (n >= UINT16_MAX - 1) { - upb_status_seterrliteral(s, "too many defs (max is 64k at a time)"); - return false; - } - +bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) { // First perform validation, in two passes so we can check that we have a // transitive closure without needing to search. for (int i = 0; i < n; i++) { upb_def *def = defs[i]; - if (upb_def_isfinalized(def)) { + if (upb_def_isfrozen(def)) { // Could relax this requirement if it's annoying. - upb_status_seterrliteral(s, "def is already finalized"); + upb_status_seterrliteral(s, "def is already frozen"); goto err; } else if (def->type == UPB_DEF_FIELD) { - upb_status_seterrliteral(s, "standalone fielddefs can not be finalized"); + upb_status_seterrliteral(s, "standalone fielddefs can not be frozen"); goto err; } else { // Set now to detect transitive closure in the second pass. - def->is_finalized = true; + def->came_from_user = true; } } for (int i = 0; i < n; i++) { - upb_msgdef *m = upb_dyncast_msgdef(defs[i]); - if (!m) continue; - upb_inttable_compact(&m->itof); - upb_msg_iter j; - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { - upb_fielddef *f = upb_msg_iter_field(&j); - assert(f->msgdef == m); - if (!upb_validate_field(f, s)) goto err; - } - } - - // Validation all passed, now find strongly-connected components so that - // our refcounting works with cycles. - upb_refcount_findscc((upb_refcount**)defs, n, &upb_def_getsuccessors); - - // Now that ref cycles have been removed it is safe to have each fielddef - // take a ref on its subdef (if any), but only if it's a member of another - // SCC. - for (int i = 0; i < n; i++) { - upb_msgdef *m = upb_dyncast_msgdef(defs[i]); - if (!m) continue; - upb_msg_iter j; - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { - upb_fielddef *f = upb_msg_iter_field(&j); - f->base.is_finalized = true; - // Release the ref taken in upb_msgdef_addfields(). - upb_fielddef_unref(f, m); - if (!upb_hassubdef(f)) continue; - assert(upb_fielddef_subdef(f)); - if (!upb_refcount_merged(&f->base.refcount, &f->sub.def->refcount)) { - // Subdef is part of a different strongly-connected component. - upb_def_ref(f->sub.def, &f->sub.def); - f->subdef_is_owned = true; + upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]); + upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]); + if (m) { + upb_inttable_compact(&m->itof); + upb_msg_iter j; + uint32_t selector = 0; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + assert(f->msgdef == m); + if (!upb_validate_field(f, s)) goto err; + f->selector_base = selector + upb_handlers_selectorbaseoffset(f); + selector += upb_handlers_selectorcount(f); } + m->selector_count = selector; + } else if (e) { + upb_inttable_compact(&e->iton); } } - return true; + // Validation all passed; freeze the defs. + return upb_refcounted_freeze((upb_refcounted*const*)defs, n, s); err: for (int i = 0; i < n; i++) { - defs[i]->is_finalized = false; + defs[i]->came_from_user = false; } + assert(!upb_ok(s)); return false; } /* upb_enumdef ****************************************************************/ +static void upb_enumdef_free(upb_refcounted *r) { + upb_enumdef *e = (upb_enumdef*)r; + upb_inttable_iter i; + upb_inttable_begin(&i, &e->iton); + for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) { + // To clean up the upb_strdup() from upb_enumdef_addval(). + free(upb_value_getcstr(upb_inttable_iter_value(&i))); + } + upb_strtable_uninit(&e->ntoi); + upb_inttable_uninit(&e->iton); + upb_def_uninit(upb_upcast(e)); + free(e); +} + upb_enumdef *upb_enumdef_new(const void *owner) { + static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free}; upb_enumdef *e = malloc(sizeof(*e)); if (!e) return NULL; - if (!upb_def_init(&e->base, UPB_DEF_ENUM, owner)) goto err2; - if (!upb_strtable_init(&e->ntoi)) goto err2; - if (!upb_inttable_init(&e->iton)) goto err1; + if (!upb_def_init(upb_upcast(e), UPB_DEF_ENUM, &vtbl, owner)) goto err2; + if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2; + if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1; return e; err1: @@ -251,26 +223,13 @@ err2: return NULL; } -static void upb_enumdef_free(upb_enumdef *e) { - upb_inttable_iter i; - upb_inttable_begin(&i, &e->iton); - for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) { - // To clean up the strdup() from upb_enumdef_addval(). - free(upb_value_getptr(upb_inttable_iter_value(&i))); - } - upb_strtable_uninit(&e->ntoi); - upb_inttable_uninit(&e->iton); - upb_def_uninit(&e->base); - free(e); -} - upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) { upb_enumdef *new_e = upb_enumdef_new(owner); if (!new_e) return NULL; upb_enum_iter i; for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { bool success = upb_enumdef_addval( - new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i)); + new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL); if (!success) { upb_enumdef_unref(new_e, owner); return NULL; @@ -279,23 +238,69 @@ upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) { return new_e; } -bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num) { - if (!upb_isident(name, strlen(name), false)) return false; - if (upb_enumdef_ntoi(e, name, NULL)) +bool upb_enumdef_isfrozen(const upb_enumdef *e) { + return upb_def_isfrozen(upb_upcast(e)); +} + +void upb_enumdef_ref(const upb_enumdef *e, const void *owner) { + upb_def_ref(upb_upcast(e), owner); +} + +void upb_enumdef_unref(const upb_enumdef *e, const void *owner) { + upb_def_unref(upb_upcast(e), owner); +} + +void upb_enumdef_donateref( + const upb_enumdef *e, const void *from, const void *to) { + upb_def_donateref(upb_upcast(e), from, to); +} + +void upb_enumdef_checkref(const upb_enumdef *e, const void *owner) { + upb_def_checkref(upb_upcast(e), owner); +} + +const char *upb_enumdef_fullname(const upb_enumdef *e) { + return upb_def_fullname(upb_upcast(e)); +} + +bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname) { + return upb_def_setfullname(upb_upcast(e), fullname); +} + +bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, + upb_status *status) { + if (!upb_isident(name, strlen(name), false)) { + upb_status_seterrf(status, "name '%s' is not a valid identifier", name); return false; - if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) + } + if (upb_enumdef_ntoi(e, name, NULL)) { + upb_status_seterrf(status, "name '%s' is already defined", name); return false; + } + if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) { + upb_status_seterrliteral(status, "out of memory"); + return false; + } if (!upb_inttable_lookup(&e->iton, num) && - !upb_inttable_insert(&e->iton, num, upb_value_ptr(strdup(name)))) + !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) { + upb_status_seterrliteral(status, "out of memory"); + upb_strtable_remove(&e->ntoi, name, NULL); return false; + } return true; } +int32_t upb_enumdef_default(const upb_enumdef *e) { return e->defaultval; } + void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) { - assert(upb_def_ismutable(UPB_UPCAST(e))); + assert(!upb_enumdef_isfrozen(e)); e->defaultval = val; } +int upb_enumdef_numvals(const upb_enumdef *e) { + return upb_strtable_count(&e->ntoi); +} + void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { // We iterate over the ntoi table, to account for duplicate numbers. upb_strtable_begin(i, &e->ntoi); @@ -313,7 +318,15 @@ bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) { const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { const upb_value *v = upb_inttable_lookup32(&def->iton, num); - return v ? upb_value_getptr(*v) : NULL; + return v ? upb_value_getcstr(*v) : NULL; +} + +const char *upb_enum_iter_name(upb_enum_iter *iter) { + return upb_strtable_iter_key(iter); +} + +int32_t upb_enum_iter_number(upb_enum_iter *iter) { + return upb_value_getint32(upb_strtable_iter_value(iter)); } @@ -324,9 +337,7 @@ const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { {alignof(ctype), sizeof(ctype), UPB_CTYPE_ ## inmemory_type} const upb_typeinfo upb_types[UPB_NUM_TYPES] = { - // END_GROUP is not real, but used to signify the pseudo-field that - // ends a group from within the group. - TYPE_INFO(void*, PTR), // ENDGROUP + TYPE_INFO(void*, PTR), // (unused) TYPE_INFO(double, DOUBLE), // DOUBLE TYPE_INFO(float, FLOAT), // FLOAT TYPE_INFO(int64_t, INT64), // INT64 @@ -340,7 +351,7 @@ const upb_typeinfo upb_types[UPB_NUM_TYPES] = { TYPE_INFO(void*, PTR), // MESSAGE TYPE_INFO(void*, BYTEREGION), // BYTES TYPE_INFO(uint32_t, UINT32), // UINT32 - TYPE_INFO(uint32_t, INT32), // ENUM + TYPE_INFO(int32_t, INT32), // ENUM TYPE_INFO(int32_t, INT32), // SFIXED32 TYPE_INFO(int64_t, INT64), // SFIXED64 TYPE_INFO(int32_t, INT32), // SINT32 @@ -349,10 +360,36 @@ const upb_typeinfo upb_types[UPB_NUM_TYPES] = { static void upb_fielddef_init_default(upb_fielddef *f); +static void upb_fielddef_uninit_default(upb_fielddef *f) { + if (f->default_is_string) + upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); +} + +static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_fielddef *f = (const upb_fielddef*)r; + if (f->msgdef) { + visit(r, upb_upcast2(f->msgdef), closure); + } + if (!f->subdef_is_symbolic && f->sub.def) { + visit(r, upb_upcast(f->sub.def), closure); + } +} + +static void freefield(upb_refcounted *r) { + upb_fielddef *f = (upb_fielddef*)r; + upb_fielddef_uninit_default(f); + if (f->subdef_is_symbolic) + free(f->sub.name); + upb_def_uninit(upb_upcast(f)); + free(f); +} + upb_fielddef *upb_fielddef_new(const void *owner) { + static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield}; upb_fielddef *f = malloc(sizeof(*f)); if (!f) return NULL; - if (!upb_def_init(UPB_UPCAST(f), UPB_DEF_FIELD, owner)) { + if (!upb_def_init(upb_upcast(f), UPB_DEF_FIELD, &vtbl, owner)) { free(f); return NULL; } @@ -360,35 +397,18 @@ upb_fielddef *upb_fielddef_new(const void *owner) { f->sub.def = NULL; f->subdef_is_symbolic = false; f->subdef_is_owned = false; - f->label = UPB_LABEL(OPTIONAL); - f->hasbit = -1; - f->offset = 0; - f->accessor = NULL; - upb_value_setfielddef(&f->fval, f); + f->label_ = UPB_LABEL(OPTIONAL); // These are initialized to be invalid; the user must set them explicitly. // Could relax this later if it's convenient and non-confusing to have a // defaults for them. - f->type = UPB_TYPE_NONE; - f->number = 0; + f->type_ = UPB_TYPE_NONE; + f->number_ = 0; upb_fielddef_init_default(f); return f; } -static void upb_fielddef_uninit_default(upb_fielddef *f) { - if (f->default_is_string) - upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); -} - -static void upb_fielddef_free(upb_fielddef *f) { - if (f->subdef_is_owned) - upb_def_unref(f->sub.def, &f->sub.def); - upb_fielddef_uninit_default(f); - upb_def_uninit(UPB_UPCAST(f)); - free(f); -} - upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { upb_fielddef *newf = upb_fielddef_new(owner); if (!newf) return NULL; @@ -396,10 +416,6 @@ upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { upb_fielddef_setlabel(newf, upb_fielddef_label(f)); upb_fielddef_setnumber(newf, upb_fielddef_number(f)); upb_fielddef_setname(newf, upb_fielddef_name(f)); - upb_fielddef_sethasbit(newf, upb_fielddef_hasbit(f)); - upb_fielddef_setoffset(newf, upb_fielddef_offset(f)); - upb_fielddef_setaccessor(newf, upb_fielddef_accessor(f)); - upb_fielddef_setfval(newf, upb_fielddef_fval(f)); if (f->default_is_string) { upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f)); size_t len; @@ -424,13 +440,64 @@ upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { } strcpy(newname, "."); strcat(newname, f->sub.def->fullname); - upb_fielddef_setsubtypename(newf, newname); + upb_fielddef_setsubdefname(newf, newname); free(newname); } return newf; } +bool upb_fielddef_isfrozen(const upb_fielddef *f) { + return upb_def_isfrozen(upb_upcast(f)); +} + +void upb_fielddef_ref(const upb_fielddef *f, const void *owner) { + upb_def_ref(upb_upcast(f), owner); +} + +void upb_fielddef_unref(const upb_fielddef *f, const void *owner) { + upb_def_unref(upb_upcast(f), owner); +} + +void upb_fielddef_donateref( + const upb_fielddef *f, const void *from, const void *to) { + upb_def_donateref(upb_upcast(f), from, to); +} + +void upb_fielddef_checkref(const upb_fielddef *f, const void *owner) { + upb_def_checkref(upb_upcast(f), owner); +} + +upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { + return f->type_; +} + +upb_label_t upb_fielddef_label(const upb_fielddef *f) { + return f->label_; +} + +uint32_t upb_fielddef_number(const upb_fielddef *f) { return f->number_; } + +const char *upb_fielddef_name(const upb_fielddef *f) { + return upb_def_fullname(upb_upcast(f)); +} + +const upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) { + return f->msgdef; +} + +upb_msgdef *upb_fielddef_msgdef_mutable(upb_fielddef *f) { + return (upb_msgdef*)f->msgdef; +} + +bool upb_fielddef_setname(upb_fielddef *f, const char *name) { + return upb_def_setfullname(upb_upcast(f), name); +} + +upb_value upb_fielddef_default(const upb_fielddef *f) { + return f->defaultval; +} + static void upb_fielddef_init_default(upb_fielddef *f) { f->default_is_string = false; switch (upb_fielddef_type(f)) { @@ -455,13 +522,12 @@ static void upb_fielddef_init_default(upb_fielddef *f) { break; case UPB_TYPE(GROUP): case UPB_TYPE(MESSAGE): upb_value_setptr(&f->defaultval, NULL); break; - case UPB_TYPE_ENDGROUP: assert(false); case UPB_TYPE_NONE: break; } } const upb_def *upb_fielddef_subdef(const upb_fielddef *f) { - if (upb_hassubdef(f) && upb_fielddef_isfinalized(f)) { + if (upb_fielddef_hassubdef(f) && upb_fielddef_isfrozen(f)) { assert(f->sub.def); return f->sub.def; } else { @@ -473,65 +539,34 @@ upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) { return (upb_def*)upb_fielddef_subdef(f); } -const char *upb_fielddef_subtypename(upb_fielddef *f) { - assert(upb_fielddef_ismutable(f)); +const char *upb_fielddef_subdefname(const upb_fielddef *f) { + assert(!upb_fielddef_isfrozen(f)); return f->subdef_is_symbolic ? f->sub.name : NULL; } -// Could expose this to clients if a client wants to call it independently -// of upb_resolve() for whatever reason. -static bool upb_fielddef_resolvedefault(upb_fielddef *f, upb_status *s) { - if (!f->default_is_string) return true; - // Resolve the enum's default from a string to an integer. - upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); - assert(bytes); // Points to either a real default or the empty string. - upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef_mutable(f)); - int32_t val = 0; - if (upb_byteregion_len(bytes) == 0) { - upb_value_setint32(&f->defaultval, e->defaultval); - } else { - size_t len; - // ptr is guaranteed to be NULL-terminated because the byteregion was - // created with upb_byteregion_newl(). - const char *ptr = upb_byteregion_getptr( - bytes, upb_byteregion_startofs(bytes), &len); - assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk. - bool success = upb_enumdef_ntoi(e, ptr, &val); - if (!success) { - upb_status_seterrf( - s, "Default enum value (%s) is not a member of the enum", ptr); - return false; - } - upb_value_setint32(&f->defaultval, val); - } - f->default_is_string = false; - upb_byteregion_free(bytes); - return true; -} - -bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number) { +bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number) { assert(f->msgdef == NULL); - f->number = number; + f->number_ = number; return true; } bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) { - assert(upb_fielddef_ismutable(f)); + assert(!upb_fielddef_isfrozen(f)); upb_fielddef_uninit_default(f); - f->type = type; + f->type_ = type; upb_fielddef_init_default(f); return true; } bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) { - assert(upb_fielddef_ismutable(f)); - f->label = label; + assert(!upb_fielddef_isfrozen(f)); + f->label_ = label; return true; } void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { - assert(upb_fielddef_ismutable(f)); - assert(!upb_isstring(f) && !upb_issubmsg(f)); + assert(!upb_fielddef_isfrozen(f)); + assert(!upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f)); if (f->default_is_string) { upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); assert(bytes); @@ -542,20 +577,21 @@ void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { } bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { - assert(upb_isstring(f) || f->type == UPB_TYPE(ENUM)); + assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE(ENUM)); + if (f->type_ == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false; + if (f->default_is_string) { upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); assert(bytes); upb_byteregion_free(bytes); } else { - assert(f->type == UPB_TYPE(ENUM)); + assert(f->type_ == UPB_TYPE(ENUM)); } - if (f->type == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false; + upb_byteregion *r = upb_byteregion_newl(str, len); upb_value_setbyteregion(&f->defaultval, r); upb_bytesuccess_t ret = upb_byteregion_fetch(r); - (void)ret; - assert(ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK)); + UPB_ASSERT_VAR(ret, ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK)); assert(upb_byteregion_available(r, 0) == upb_byteregion_len(r)); f->default_is_string = true; return true; @@ -565,77 +601,127 @@ void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) { upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0); } -void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) { - assert(upb_fielddef_ismutable(f)); - // TODO: we need an ownership/freeing mechanism for dynamically-allocated - // fvals. One possibility is to let the user supply a free() function - // and call it when the fval is no longer referenced. Would have to - // ensure that no common use cases need cycles. - // - // For now the fval has no ownership; the caller must simply guarantee - // somehow that it outlives any handlers/plan. - f->fval = fval; +bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) { + return f->default_is_string && f->type_ == UPB_TYPE_ENUM; } -void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit) { - assert(upb_fielddef_ismutable(f)); - f->hasbit = hasbit; -} +bool upb_fielddef_resolvedefault(upb_fielddef *f) { + if (!upb_fielddef_default_is_symbolic(f)) return true; -void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset) { - assert(upb_fielddef_ismutable(f)); - f->offset = offset; -} - -void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *tbl) { - assert(upb_fielddef_ismutable(f)); - f->accessor = tbl; + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + const upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef(f)); + assert(bytes); // Points to either a real default or the empty string. + assert(e); + if (upb_byteregion_len(bytes) == 0) { + // The "default default" for an enum is the first defined value. + upb_value_setint32(&f->defaultval, e->defaultval); + } else { + size_t len; + int32_t val = 0; + // ptr is guaranteed to be NULL-terminated because the byteregion was + // created with upb_byteregion_newl(). + const char *ptr = upb_byteregion_getptr( + bytes, upb_byteregion_startofs(bytes), &len); + assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk + if (!upb_enumdef_ntoi(e, ptr, &val)) { + return false; + } + upb_value_setint32(&f->defaultval, val); + } + f->default_is_string = false; + upb_byteregion_free(bytes); + return true; } -static bool upb_subtype_typecheck(upb_fielddef *f, const upb_def *subdef) { - if (f->type == UPB_TYPE(MESSAGE) || f->type == UPB_TYPE(GROUP)) - return upb_dyncast_msgdef_const(subdef) != NULL; - else if (f->type == UPB_TYPE(ENUM)) - return upb_dyncast_enumdef_const(subdef) != NULL; +static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef) { + if (f->type_ == UPB_TYPE(MESSAGE) || f->type_ == UPB_TYPE(GROUP)) + return upb_dyncast_msgdef(subdef) != NULL; + else if (f->type_ == UPB_TYPE(ENUM)) + return upb_dyncast_enumdef(subdef) != NULL; else { assert(false); return false; } } -bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef) { - assert(upb_fielddef_ismutable(f)); - assert(upb_hassubdef(f)); - assert(subdef); - if (!upb_subtype_typecheck(f, subdef)) return false; - if (f->subdef_is_symbolic) free(f->sub.name); +static void release_subdef(upb_fielddef *f) { + if (f->subdef_is_symbolic) { + free(f->sub.name); + } else if (f->sub.def) { + upb_unref2(f->sub.def, f); + } +} + +bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef) { + assert(!upb_fielddef_isfrozen(f)); + assert(upb_fielddef_hassubdef(f)); + if (subdef && !upb_subdef_typecheck(f, subdef)) return false; + release_subdef(f); f->sub.def = subdef; f->subdef_is_symbolic = false; + if (f->sub.def) upb_ref2(f->sub.def, f); return true; } -bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name) { - assert(upb_fielddef_ismutable(f)); - assert(upb_hassubdef(f)); - if (f->subdef_is_symbolic) free(f->sub.name); - f->sub.name = strdup(name); +bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name) { + assert(!upb_fielddef_isfrozen(f)); + assert(upb_fielddef_hassubdef(f)); + release_subdef(f); + f->sub.name = upb_strdup(name); f->subdef_is_symbolic = true; return true; } +bool upb_fielddef_issubmsg(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_GROUP || + upb_fielddef_type(f) == UPB_TYPE_MESSAGE; +} + +bool upb_fielddef_isstring(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_STRING || + upb_fielddef_type(f) == UPB_TYPE_BYTES; +} + +bool upb_fielddef_isseq(const upb_fielddef *f) { + return upb_fielddef_label(f) == UPB_LABEL_REPEATED; +} + +bool upb_fielddef_isprimitive(const upb_fielddef *f) { + return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f); +} + +bool upb_fielddef_hassubdef(const upb_fielddef *f) { + return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE(ENUM); +} + /* upb_msgdef *****************************************************************/ +static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_msgdef *m = (const upb_msgdef*)r; + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + visit(r, upb_upcast2(f), closure); + } +} + +static void freemsg(upb_refcounted *r) { + upb_msgdef *m = (upb_msgdef*)r; + upb_strtable_uninit(&m->ntof); + upb_inttable_uninit(&m->itof); + upb_def_uninit(upb_upcast(m)); + free(m); +} + upb_msgdef *upb_msgdef_new(const void *owner) { + static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg}; upb_msgdef *m = malloc(sizeof(*m)); if (!m) return NULL; - if (!upb_def_init(&m->base, UPB_DEF_MSG, owner)) goto err2; - if (!upb_inttable_init(&m->itof)) goto err2; - if (!upb_strtable_init(&m->ntof)) goto err1; - m->size = 0; - m->hasbit_bytes = 0; - m->extstart = 0; - m->extend = 0; + if (!upb_def_init(upb_upcast(m), UPB_DEF_MSG, &vtbl, owner)) goto err2; + if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2; + if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1; return m; err1: @@ -645,20 +731,10 @@ err2: return NULL; } -static void upb_msgdef_free(upb_msgdef *m) { - upb_strtable_uninit(&m->ntof); - upb_inttable_uninit(&m->itof); - upb_def_uninit(&m->base); - free(m); -} - upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { upb_msgdef *newm = upb_msgdef_new(owner); if (!newm) return NULL; - upb_msgdef_setsize(newm, upb_msgdef_size(m)); - upb_msgdef_sethasbit_bytes(newm, upb_msgdef_hasbit_bytes(m)); - upb_msgdef_setextrange(newm, upb_msgdef_extstart(m), upb_msgdef_extend(m)); - upb_def_setfullname(UPB_UPCAST(newm), upb_def_fullname(UPB_UPCAST(m))); + upb_def_setfullname(upb_upcast(newm), upb_def_fullname(upb_upcast(m))); upb_msg_iter i; for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f); @@ -670,26 +746,33 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { return newm; } -void upb_msgdef_setsize(upb_msgdef *m, uint16_t size) { - assert(upb_def_ismutable(UPB_UPCAST(m))); - m->size = size; +bool upb_msgdef_isfrozen(const upb_msgdef *m) { + return upb_def_isfrozen(upb_upcast(m)); } -void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes) { - assert(upb_def_ismutable(UPB_UPCAST(m))); - m->hasbit_bytes = bytes; +void upb_msgdef_ref(const upb_msgdef *m, const void *owner) { + upb_def_ref(upb_upcast(m), owner); } -bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end) { - assert(upb_def_ismutable(UPB_UPCAST(m))); - if (start == 0 && end == 0) { - // Clearing the extension range -- ok to fall through. - } else if (start >= end || start < 1 || end > UPB_MAX_FIELDNUMBER) { - return false; - } - m->extstart = start; - m->extend = start; - return true; +void upb_msgdef_unref(const upb_msgdef *m, const void *owner) { + upb_def_unref(upb_upcast(m), owner); +} + +void upb_msgdef_donateref( + const upb_msgdef *m, const void *from, const void *to) { + upb_def_donateref(upb_upcast(m), from, to); +} + +void upb_msgdef_checkref(const upb_msgdef *m, const void *owner) { + upb_def_checkref(upb_upcast(m), owner); +} + +const char *upb_msgdef_fullname(const upb_msgdef *m) { + return upb_def_fullname(upb_upcast(m)); +} + +bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname) { + return upb_def_setfullname(upb_upcast(m), fullname); } bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, @@ -697,6 +780,8 @@ bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, // Check constraints for all fields before performing any action. for (int i = 0; i < n; i++) { upb_fielddef *f = fields[i]; + // TODO(haberman): handle the case where two fields of the input duplicate + // name or number. if (f->msgdef != NULL || upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0 || upb_msgdef_itof(m, upb_fielddef_number(f)) || @@ -710,306 +795,48 @@ bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, f->msgdef = m; upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); - upb_fielddef_ref(f, m); + upb_ref2(f, m); + upb_ref2(m, f); if (ref_donor) upb_fielddef_unref(f, ref_donor); } return true; } -void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { - upb_inttable_begin(iter, &m->itof); +bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, + const void *ref_donor) { + return upb_msgdef_addfields(m, &f, 1, ref_donor); } -void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } - - -/* upb_symtab *****************************************************************/ - -upb_symtab *upb_symtab_new(const void *owner) { - upb_symtab *s = malloc(sizeof(*s)); - upb_refcount_init(&s->refcount, owner); - upb_strtable_init(&s->symtab); - return s; +const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { + const upb_value *val = upb_inttable_lookup32(&m->itof, i); + return val ? (const upb_fielddef*)upb_value_getptr(*val) : NULL; } -void upb_symtab_ref(const upb_symtab *s, const void *owner) { - upb_refcount_ref(&s->refcount, owner); +const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) { + const upb_value *val = upb_strtable_lookup(&m->ntof, name); + return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; } -void upb_symtab_unref(const upb_symtab *s, const void *owner) { - if(s && upb_refcount_unref(&s->refcount, owner)) { - upb_symtab *destroying = (upb_symtab*)s; - upb_strtable_iter i; - upb_strtable_begin(&i, &destroying->symtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) - upb_def_unref(upb_value_getptr(upb_strtable_iter_value(&i)), s); - upb_strtable_uninit(&destroying->symtab); - upb_refcount_uninit(&destroying->refcount); - free(destroying); - } +upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i) { + return (upb_fielddef*)upb_msgdef_itof(m, i); } -void upb_symtab_donateref( - const upb_symtab *s, const void *from, const void *to) { - upb_refcount_donateref(&s->refcount, from, to); -} - -const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, - upb_deftype_t type, const void *owner) { - int total = upb_strtable_count(&s->symtab); - // We may only use part of this, depending on how many symbols are of the - // correct type. - const upb_def **defs = malloc(sizeof(*defs) * total); - upb_strtable_iter iter; - upb_strtable_begin(&iter, &s->symtab); - int i = 0; - for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); - assert(def); - if(type == UPB_DEF_ANY || def->type == type) - defs[i++] = def; - } - *count = i; - if (owner) - for(i = 0; i < *count; i++) upb_def_ref(defs[i], owner); - return defs; -} - -const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, - const void *owner) { - const upb_value *v = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = v ? upb_value_getptr(*v) : NULL; - if (ret) upb_def_ref(ret, owner); - return ret; -} - -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym, - const void *owner) { - const upb_value *v = upb_strtable_lookup(&s->symtab, sym); - upb_def *def = v ? upb_value_getptr(*v) : NULL; - upb_msgdef *ret = NULL; - if(def && def->type == UPB_DEF_MSG) { - ret = upb_downcast_msgdef(def); - upb_def_ref(def, owner); - } - return ret; -} - -// Given a symbol and the base symbol inside which it is defined, find the -// symbol's definition in t. -static upb_def *upb_resolvename(const upb_strtable *t, - const char *base, const char *sym) { - if(strlen(sym) == 0) return NULL; - if(sym[0] == UPB_SYMBOL_SEPARATOR) { - // Symbols starting with '.' are absolute, so we do a single lookup. - // Slice to omit the leading '.' - const upb_value *v = upb_strtable_lookup(t, sym + 1); - return v ? upb_value_getptr(*v) : NULL; - } else { - // Remove components from base until we find an entry or run out. - // TODO: This branch is totally broken, but currently not used. - (void)base; - assert(false); - return NULL; - } +upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, const char *name) { + return (upb_fielddef*)upb_msgdef_ntof(m, name); } -const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym, const void *owner) { - upb_def *ret = upb_resolvename(&s->symtab, base, sym); - if (ret) upb_def_ref(ret, owner); - return ret; -} - -// Adds dups of any existing def that can reach a def with the same name as one -// of "defs." This is to provide a consistent output graph as documented in -// the header file. We use a modified depth-first traversal that traverses -// each SCC (which we already computed) as if it were a single node. This -// allows us to traverse the possibly-cyclic graph as if it were a DAG and to -// easily dup the correct set of nodes with O(n) time. -// -// Returns true if defs that can reach "def" need to be duplicated into deftab. -static bool upb_resolve_dfs(const upb_def *def, upb_strtable *deftab, - const void *new_owner, upb_inttable *seen, - upb_status *s) { - // Memoize results of this function for efficiency (since we're traversing a - // DAG this is not needed to limit the depth of the search). - upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def); - if (v) return upb_value_getbool(*v); - - // Visit submessages for all messages in the SCC. - bool need_dup = false; - const upb_def *base = def; - do { - assert(upb_def_isfinalized(def)); - if (def->type == UPB_DEF_FIELD) continue; - upb_value *v = upb_strtable_lookup(deftab, upb_def_fullname(def)); - if (v) { - upb_def *add_def = upb_value_getptr(*v); - if (add_def->refcount.next && add_def->refcount.next != &def->refcount) { - upb_status_seterrf(s, "conflicting existing defs for name: '%s'", - upb_def_fullname(def)); - return false; - } - need_dup = true; - } - const upb_msgdef *m = upb_dyncast_msgdef_const(def); - if (m) { - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - if (!upb_hassubdef(f)) continue; - // |= to avoid short-circuit; we need its side-effects. - need_dup |= upb_resolve_dfs( - upb_fielddef_subdef_mutable(f), deftab, new_owner, seen, s); - if (!upb_ok(s)) return false; - } - } - } while ((def = (upb_def*)def->refcount.next) != base); - - if (need_dup) { - // Dup any defs that don't already have entries in deftab. - def = base; - do { - if (def->type == UPB_DEF_FIELD) continue; - const char *name = upb_def_fullname(def); - if (upb_strtable_lookup(deftab, name) == NULL) { - upb_def *newdef = upb_def_dup(def, new_owner); - if (!newdef) goto oom; - // We temporarily use this field to track who we were dup'd from. - newdef->refcount.next = (upb_refcount*)def; - if (!upb_strtable_insert(deftab, name, upb_value_ptr(newdef))) - goto oom; - } - } while ((def = (upb_def*)def->refcount.next) != base); - } - - upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup)); - return need_dup; - -oom: - upb_status_seterrliteral(s, "out of memory"); - return false; +int upb_msgdef_numfields(const upb_msgdef *m) { + return upb_strtable_count(&m->ntof); } -bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, - upb_status *status) { - upb_def **add_defs = NULL; - upb_strtable addtab; - if (!upb_strtable_init(&addtab)) { - upb_status_seterrliteral(status, "out of memory"); - return false; - } - - // Add new defs to table. - for (int i = 0; i < n; i++) { - upb_def *def = defs[i]; - assert(upb_def_ismutable(def)); - const char *fullname = upb_def_fullname(def); - if (!fullname) { - upb_status_seterrliteral( - status, "Anonymous defs cannot be added to a symtab"); - goto err; - } - if (upb_strtable_lookup(&addtab, fullname) != NULL) { - upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); - goto err; - } - if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) - goto oom_err; - // We temporarily use this field to indicate that we came from the user's - // list rather than being dup'd. - def->refcount.next = NULL; - } - - // Add dups of any existing def that can reach a def with the same name as - // one of "defs." - upb_inttable seen; - if (!upb_inttable_init(&seen)) goto oom_err; - upb_strtable_iter i; - upb_strtable_begin(&i, &s->symtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); - upb_resolve_dfs(def, &addtab, ref_donor, &seen, status); - if (!upb_ok(status)) goto err; - } - upb_inttable_uninit(&seen); - - // Now using the table, resolve symbolic references. - upb_strtable_begin(&i, &addtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); - upb_msgdef *m = upb_dyncast_msgdef(def); - if (!m) continue; - // Type names are resolved relative to the message in which they appear. - const char *base = upb_def_fullname(UPB_UPCAST(m)); - - upb_msg_iter j; - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { - upb_fielddef *f = upb_msg_iter_field(&j); - const char *name = upb_fielddef_subtypename(f); - if (name) { - upb_def *subdef = upb_resolvename(&addtab, base, name); - if (subdef == NULL) { - upb_status_seterrf( - status, "couldn't resolve name '%s' in message '%s'", name, base); - goto err; - } else if (!upb_fielddef_setsubdef(f, subdef)) { - upb_status_seterrf( - status, "def '%s' had the wrong type for field '%s'", - upb_def_fullname(subdef), upb_fielddef_name(f)); - goto err; - } - } - - if (upb_fielddef_type(f) == UPB_TYPE(ENUM) && upb_fielddef_subdef(f) && - !upb_fielddef_resolvedefault(f, status)) - goto err; - } - } - - // We need an array of the defs in addtab, for passing to upb_finalize. - add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab)); - if (add_defs == NULL) goto oom_err; - upb_strtable_begin(&i, &addtab); - for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i)) - add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i)); - - // Restore the next pointer that we stole. - for (int i = 0; i < n; i++) - add_defs[i]->refcount.next = &add_defs[i]->refcount; +void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { + upb_inttable_begin(iter, &m->itof); +} - if (!upb_finalize(add_defs, n, status)) goto err; - upb_strtable_uninit(&addtab); +void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } - for (int i = 0; i < n; i++) { - upb_def *def = add_defs[i]; - const char *name = upb_def_fullname(def); - upb_def_donateref(def, ref_donor, s); - upb_value *v = upb_strtable_lookup(&s->symtab, name); - if(v) { - upb_def_unref(upb_value_getptr(*v), s); - upb_value_setptr(v, def); - } else { - upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); - } - } - free(add_defs); - return true; +bool upb_msg_done(upb_msg_iter *iter) { return upb_inttable_done(iter); } -oom_err: - upb_status_seterrliteral(status, "out of memory"); -err: { - // Need to unref any defs we dup'd (we can distinguish them from defs that - // the user passed in by their def->refcount.next pointers). - upb_strtable_iter i; - upb_strtable_begin(&i, &addtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); - if (def->refcount.next) upb_def_unref(def, s); - } - } - upb_strtable_uninit(&addtab); - free(add_defs); - return false; +upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter) { + return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); } @@ -12,47 +12,48 @@ * - upb_enumdef: describes an enum. * (TODO: definitions of services). * - * Defs go through two distinct phases of life: + * Like upb_refcounted objects, defs are mutable only until frozen, and are + * only thread-safe once frozen. * - * 1. MUTABLE: when first created, the properties of the def can be set freely - * (for example a message's name, its list of fields, the name/number of - * fields, etc). During this phase the def is *not* thread-safe, and may - * not be used for any purpose except to set its properties (it can't be - * used to parse anything, create any messages in memory, etc). - * - * 2. FINALIZED: the upb_def_finalize() operation finalizes a set of defs, - * which makes them thread-safe and immutable. Finalized defs may only be - * accessed through a CONST POINTER. If you want to modify an existing - * immutable def, copy it with upb_*_dup() and modify and finalize the copy. - * - * The refcounting of defs works properly no matter what state the def is in. - * Once the def is finalized it is guaranteed that any def reachable from a - * live def is also live (so a ref on the base of a message tree keeps the - * whole tree alive). - * - * You can test for which stage of life a def is in by calling - * upb_def_ismutable(). This is particularly useful for dynamic language - * bindings, which must properly guarantee that the dynamic language cannot - * break the rules laid out above. - * - * It would be possible to make the defs thread-safe during stage 1 by using - * mutexes internally and changing any methods returning pointers to return - * copies instead. This could be important if we are integrating with a VM or - * interpreter that does not naturally serialize access to wrapped objects (for - * example, in the case of Python this is not necessary because of the GIL). + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. */ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ -#include "upb/refcount.h" -#include "upb/table.h" - #ifdef __cplusplus -extern "C" { +#include <cstring> +#include <string> +#include <vector> + +namespace upb { +class Def; +class EnumDef; +class FieldDef; +class MessageDef; +} + +typedef upb::Def upb_def; +typedef upb::EnumDef upb_enumdef; +typedef upb::FieldDef upb_fielddef; +typedef upb::MessageDef upb_msgdef; +#else +struct upb_def; +struct upb_enumdef; +struct upb_fielddef; +struct upb_msgdef; + +typedef struct upb_def upb_def; +typedef struct upb_enumdef upb_enumdef; +typedef struct upb_fielddef upb_fielddef; +typedef struct upb_msgdef upb_msgdef; #endif -/* upb_def: base class for defs **********************************************/ +#include "upb/refcounted.h" + + +/* upb::Def: base class for defs *********************************************/ // All the different kind of defs we support. These correspond 1:1 with // declarations in a .proto file. @@ -65,64 +66,97 @@ typedef enum { UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*() } upb_deftype_t; -typedef struct _upb_def { - upb_refcount refcount; - char *fullname; - upb_deftype_t type; - bool is_finalized; -} upb_def; +#ifdef __cplusplus -#define UPB_UPCAST(ptr) (&(ptr)->base) +class upb::Def { + public: + typedef upb_deftype_t Type; + + Def* Dup(const void *owner) const; + + // Though not declared as such in C++, upb::RefCounted is the base of + // Def and we can upcast to it. + RefCounted* Upcast(); + const RefCounted* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + Type def_type() const; + + // "fullname" is the def's fully-qualified name (eg. foo.bar.Message). + const char *full_name() const; + + // The def must be mutable. Caller retains ownership of fullname. Defs are + // not required to have a name; if a def has no name when it is frozen, it + // will remain an anonymous def. + bool set_full_name(const char *fullname); + bool set_full_name(const std::string& fullname); + + // Freezes the given defs; this validates all constraints and marks the defs + // as frozen (read-only). "defs" may not contain any fielddefs, but fields + // of any msgdefs will be frozen. + // + // Symbolic references to sub-types and enum defaults must have already been + // resolved. Any mutable defs reachable from any of "defs" must also be in + // the list; more formally, "defs" must be a transitive closure of mutable + // defs. + // + // After this operation succeeds, the finalized defs must only be accessed + // through a const pointer! + static bool Freeze(Def *const*defs, int n, Status *status); + static bool Freeze(const std::vector<Def*>& defs, Status *status); + + private: + UPB_DISALLOW_POD_OPS(Def); + +#else +struct upb_def { +#endif + upb_refcounted base; + const char *fullname; + upb_deftype_t type:8; + // Used as a flag during the def's mutable stage. Must be false unless + // it is currently being used by a function on the stack. This allows + // us to easily determine which defs were passed into the function's + // current invocation. + bool came_from_user; +}; + +#define UPB_DEF_INIT(name, type) {UPB_REFCOUNT_INIT, name, type, false} + +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif +upb_def *upb_def_dup(const upb_def *def, const void *owner); -// Call to ref/unref a def. These are thread-safe. If the def is finalized, -// it is guaranteed that any def reachable from a live def is also live. +// From upb_refcounted. +bool upb_def_isfrozen(const upb_def *def); void upb_def_ref(const upb_def *def, const void *owner); void upb_def_unref(const upb_def *def, const void *owner); void upb_def_donateref(const upb_def *def, const void *from, const void *to); +void upb_def_checkref(const upb_def *def, const void *owner); -upb_def *upb_def_dup(const upb_def *def, const void *owner); - -// A def is mutable until it has been finalized. -bool upb_def_ismutable(const upb_def *def); -bool upb_def_isfinalized(const upb_def *def); - -// "fullname" is the def's fully-qualified name (eg. foo.bar.Message). -INLINE const char *upb_def_fullname(const upb_def *d) { return d->fullname; } - -// The def must be mutable. Caller retains ownership of fullname. Defs are -// not required to have a name; if a def has no name when it is finalized, it -// will remain an anonymous def. +upb_deftype_t upb_def_type(const upb_def *d); +const char *upb_def_fullname(const upb_def *d); bool upb_def_setfullname(upb_def *def, const char *fullname); +bool upb_def_freeze(upb_def *const*defs, int n, upb_status *status); +#ifdef __cplusplus +} // extern "C" +#endif + -// Finalizes the given defs; this validates all constraints and marks the defs -// as finalized (read-only). This will also cause fielddefs to take refs on -// their subdefs so that any reachable def will be kept alive (but this is -// done in a way that correctly handles circular references). -// -// On success, a new list is returned containing the finalized defs and -// ownership of the "defs" list passes to the function. On failure NULL is -// returned and the caller retains ownership of "defs." -// -// Symbolic references to sub-types or enum defaults must have already been -// resolved. "defs" must contain the transitive closure of any mutable defs -// reachable from the any def in the list. In other words, there may not be a -// mutable def which is reachable from one of "defs" that does not appear -// elsewhere in "defs." "defs" may not contain fielddefs, but any fielddefs -// reachable from the given msgdefs will be finalized. -// -// n is currently limited to 64k defs, if more are required break them into -// batches of 64k (or we could raise this limit, at the cost of a bigger -// upb_def structure or complexity in upb_finalize()). -bool upb_finalize(upb_def *const*defs, int n, upb_status *status); - - -/* upb_fielddef ***************************************************************/ +/* upb::FieldDef **************************************************************/ // We choose these to match descriptor.proto. Clients may use UPB_TYPE() and // UPB_LABEL() instead of referencing these directly. typedef enum { UPB_TYPE_NONE = -1, // Internal-only, may be removed. - UPB_TYPE_ENDGROUP = 0, // Internal-only, may be removed. UPB_TYPE_DOUBLE = 1, UPB_TYPE_FLOAT = 2, UPB_TYPE_INT64 = 3, @@ -164,426 +198,485 @@ typedef struct { extern const upb_typeinfo upb_types[UPB_NUM_TYPES]; +#ifdef __cplusplus + // A upb_fielddef describes a single field in a message. It is most often // found as a part of a upb_msgdef, but can also stand alone to represent // an extension. -typedef struct _upb_fielddef { +class upb::FieldDef { + public: + typedef upb_fieldtype_t Type; + typedef upb_label_t Label; + + // Returns NULL if memory allocation failed. + static FieldDef* New(const void *owner); + + // Duplicates the given field, returning NULL if memory allocation failed. + // When a fielddef is duplicated, the subdef (if any) is made symbolic if it + // wasn't already. If the subdef is set but has no name (which is possible + // since msgdefs are not required to have a name) the new fielddef's subdef + // will be unset. + FieldDef* Dup(const void *owner) const; + + // Though not declared as such in C++, upb::Def is the base of FieldDef and + // we can upcast to it. + Def* Upcast(); + const Def* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Functionality from upb::Def. + const char *full_name() const; + bool set_full_name(const char *fullname); + bool set_full_name(const std::string& fullname); + + Type type() const; // Return UPB_TYPE_NONE if uninitialized. + Label label() const; // Defaults to UPB_LABEL_OPTIONAL. + uint32_t number() const; // Returns 0 if uninitialized. + const MessageDef* message_def() const; + + // "number" and "name" must be set before the fielddef is added to a msgdef. + // For the moment we do not allow these to be set once the fielddef is added + // to a msgdef -- this could be relaxed in the future. + bool set_number(uint32_t number); + bool set_type(upb_fieldtype_t type); + bool set_label(upb_label_t label); + + // These are the same as full_name()/set_full_name(), but since fielddefs + // most often use simple, non-qualified names, we provide this accessor + // also. Generally only extensions will want to think of this name as + // fully-qualified. + bool set_name(const char *name); + bool set_name(const std::string& name); + const char *name() const; + + bool IsSubMessage() const; + bool IsString() const; + bool IsSequence() const; + bool IsPrimitive() const; + + // Returns the default value for this fielddef, which may either be something + // the client set explicitly or the "default default" (0 for numbers, empty + // for strings). The field's type indicates the type of the returned value, + // except for enum fields that are still mutable. + // + // For enums the default can be set either numerically or symbolically -- the + // upb_fielddef_default_is_symbolic() function below will indicate which it + // is. For string defaults, the value will be a upb_byteregion which is + // invalidated by any other non-const call on this object. Once the fielddef + // is frozen, symbolic enum defaults are resolved, so frozen enum fielddefs + // always have a default of type int32. + Value default_value() const; + + // Sets default value for the field. For numeric types, use + // upb_fielddef_setdefault(), and "value" must match the type of the field. + // For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may + // use either, since the default may be set either numerically or + // symbolically. + // + // NOTE: May only be called for fields whose type has already been set. + // Also, will be reset to default if the field's type is set again. + void set_default_value(Value value); + bool set_default_string(const void *str, size_t len); + bool set_default_string(const std::string& str); + void set_default_cstr(const char *str); + + // The results of this function are only meaningful for mutable enum fields, + // which can have a default specified either as an integer or as a string. + // If this returns true, the default returned from upb_fielddef_default() is + // a string, otherwise it is an integer. + bool IsDefaultSymbolic() const; + + // If this is an enum field with a symbolic default, resolves the default and + // returns true if resolution was successful or if this field didn't need to + // be resolved (because it is not an enum with a symbolic default). + bool ResolveDefault(); + + // Submessage and enum fields must reference a "subdef", which is the + // upb_msgdef or upb_enumdef that defines their type. Note that when the + // fielddef is mutable it may not have a subdef *yet*, but this function + // still returns true to indicate that the field's type requires a subdef. + bool HasSubDef() const; + + // Returns the enum or submessage def or symbolic name for this field, if + // any. Requires that upb_hassubdef(f). Returns NULL if the subdef has not + // been set or if you ask for a subdef when the subdef is currently set + // symbolically (or vice-versa). To access the subdef's name for a linked + // fielddef, use upb_def_fullname(upb_fielddef_subdef(f)). + // + // Caller does *not* own a ref on the returned def or string. + // upb_fielddef_subdefename() is non-const because frozen defs will never + // have a symbolic reference (they must be resolved before the msgdef can be + // frozen). + const Def* subdef() const; + const char* subdef_name() const; + + // Before a fielddef is frozen, its subdef may be set either directly (with a + // upb::Def*) or symbolically. Symbolic refs must be resolved before the + // containing msgdef can be frozen (see upb_resolve() above). The client is + // responsible for making sure that "subdef" lives until this fielddef is + // frozen or deleted. + // + // Both methods require that upb_hassubdef(f) (so the type must be set prior + // to calling these methods). Returns false if this is not the case, or if + // the given subdef is not of the correct type. The subdef is reset if the + // field's type is changed. The subdef can be set to NULL to clear it. + bool set_subdef(const Def* subdef); + bool set_subdef_name(const char* name); + bool set_subdef_name(const std::string& name); + + private: + UPB_DISALLOW_POD_OPS(FieldDef); + +#else +struct upb_fielddef { +#endif upb_def base; - struct _upb_msgdef *msgdef; + const upb_msgdef *msgdef; union { + const upb_def *def; // If !subdef_is_symbolic. char *name; // If subdef_is_symbolic. - upb_def *def; // If !subdef_is_symbolic. } sub; // The msgdef or enumdef for this field, if upb_hassubdef(f). bool subdef_is_symbolic; bool default_is_string; bool subdef_is_owned; - upb_fieldtype_t type; - upb_label_t label; - int16_t hasbit; - uint16_t offset; - int32_t number; + upb_fieldtype_t type_; + upb_label_t label_; + uint32_t number_; upb_value defaultval; // Only for non-repeated scalars and strings. - upb_value fval; - struct _upb_accessor_vtbl *accessor; - const void *prototype; -} upb_fielddef; - -// Returns NULL if memory allocation failed. + uint32_t selector_base; // Used to index into a upb::Handlers table. +}; + +// This will only work for static initialization because of the subdef_is_owned +// initialization. Theoretically the other _INIT() macros could possible work +// for non-static initialization, but this has not been tested. +#define UPB_FIELDDEF_INIT(label, type, name, num, msgdef, subdef, \ + selector_base, defaultval) \ + {UPB_DEF_INIT(name, UPB_DEF_FIELD), msgdef, {subdef}, false, \ + type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, \ + false, /* subdef_is_owned: not used since fielddef is not freed. */ \ + type, label, num, defaultval, selector_base} + +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif upb_fielddef *upb_fielddef_new(const void *owner); - -INLINE void upb_fielddef_ref(upb_fielddef *f, const void *owner) { - upb_def_ref(UPB_UPCAST(f), owner); -} -INLINE void upb_fielddef_unref(upb_fielddef *f, const void *owner) { - upb_def_unref(UPB_UPCAST(f), owner); -} - -// Duplicates the given field, returning NULL if memory allocation failed. -// When a fielddef is duplicated, the subdef (if any) is made symbolic if it -// wasn't already. If the subdef is set but has no name (which is possible -// since msgdefs are not required to have a name) the new fielddef's subdef -// will be unset. upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner); -INLINE bool upb_fielddef_ismutable(const upb_fielddef *f) { - return upb_def_ismutable(UPB_UPCAST(f)); -} -INLINE bool upb_fielddef_isfinalized(const upb_fielddef *f) { - return !upb_fielddef_ismutable(f); -} - -// Simple accessors. /////////////////////////////////////////////////////////// - -INLINE upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { - return f->type; -} -INLINE upb_label_t upb_fielddef_label(const upb_fielddef *f) { - return f->label; -} -INLINE int32_t upb_fielddef_number(const upb_fielddef *f) { return f->number; } -INLINE uint16_t upb_fielddef_offset(const upb_fielddef *f) { return f->offset; } -INLINE int16_t upb_fielddef_hasbit(const upb_fielddef *f) { return f->hasbit; } -INLINE const char *upb_fielddef_name(const upb_fielddef *f) { - return upb_def_fullname(UPB_UPCAST(f)); -} -INLINE upb_value upb_fielddef_fval(const upb_fielddef *f) { return f->fval; } -INLINE struct _upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) { - return f->msgdef; -} -INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(const upb_fielddef *f) { - return f->accessor; -} - +// From upb_refcounted. +bool upb_fielddef_isfrozen(const upb_fielddef *f); +void upb_fielddef_ref(const upb_fielddef *f, const void *owner); +void upb_fielddef_unref(const upb_fielddef *f, const void *owner); +void upb_fielddef_donateref( + const upb_fielddef *f, const void *from, const void *to); +void upb_fielddef_checkref(const upb_fielddef *f, const void *owner); + +// From upb_def. +const char *upb_fielddef_fullname(const upb_fielddef *f); +bool upb_fielddef_setfullname(upb_fielddef *f, const char *fullname); + +upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f); +upb_label_t upb_fielddef_label(const upb_fielddef *f); +uint32_t upb_fielddef_number(const upb_fielddef *f); +const char *upb_fielddef_name(const upb_fielddef *f); +const upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f); +upb_msgdef *upb_fielddef_msgdef_mutable(upb_fielddef *f); bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type); bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label); -void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit); -void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset); -// TODO(haberman): need a way of keeping the fval alive even if some handlers -// outlast the fielddef. -void upb_fielddef_setfval(upb_fielddef *f, upb_value fval); -void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); - -// "Number" and "fullname" must be set before the fielddef is added to a msgdef. -// For the moment we do not allow these to be set once the fielddef is added to -// a msgdef -- this could be relaxed in the future. -bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number); -INLINE bool upb_fielddef_setname(upb_fielddef *f, const char *name) { - return upb_def_setfullname(UPB_UPCAST(f), name); -} - -// Field type tests. /////////////////////////////////////////////////////////// - -INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} -INLINE bool upb_isstringtype(upb_fieldtype_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} -INLINE bool upb_isprimitivetype(upb_fieldtype_t type) { - return !upb_issubmsgtype(type) && !upb_isstringtype(type); -} -INLINE bool upb_issubmsg(const upb_fielddef *f) { - return upb_issubmsgtype(f->type); -} -INLINE bool upb_isstring(const upb_fielddef *f) { - return upb_isstringtype(f->type); -} -INLINE bool upb_isseq(const upb_fielddef *f) { - return f->label == UPB_LABEL(REPEATED); -} - -// Default value. ////////////////////////////////////////////////////////////// - -// Returns the default value for this fielddef, which may either be something -// the client set explicitly or the "default default" (0 for numbers, empty for -// strings). The field's type indicates the type of the returned value, except -// for enum fields that are still mutable. -// -// For enums the default can be set either numerically or symbolically -- the -// upb_fielddef_default_is_symbolic() function below will indicate which it is. -// For string defaults, the value will be a upb_byteregion which is invalidated -// by any other non-const call on this object. Once the fielddef is finalized, -// symbolic enum defaults are resolved, so finalized enum fielddefs always have -// a default of type int32. -INLINE upb_value upb_fielddef_default(const upb_fielddef *f) { - return f->defaultval; -} -// Sets default value for the field. For numeric types, use -// upb_fielddef_setdefault(), and "value" must match the type of the field. -// For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may -// use either, since the default may be set either numerically or symbolically. -// -// NOTE: May only be called for fields whose type has already been set. -// Also, will be reset to default if the field's type is set again. +bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number); +bool upb_fielddef_setname(upb_fielddef *f, const char *name); +bool upb_fielddef_issubmsg(const upb_fielddef *f); +bool upb_fielddef_isstring(const upb_fielddef *f); +bool upb_fielddef_isseq(const upb_fielddef *f); +bool upb_fielddef_isprimitive(const upb_fielddef *f); +upb_value upb_fielddef_default(const upb_fielddef *f); void upb_fielddef_setdefault(upb_fielddef *f, upb_value value); bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len); void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str); - -// The results of this function are only meaningful for mutable enum fields, -// which can have a default specified either as an integer or as a string. If -// this returns true, the default returned from upb_fielddef_default() is a -// string, otherwise it is an integer. -INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) { - assert(f->type == UPB_TYPE(ENUM)); - return f->default_is_string; -} - -// Subdef. ///////////////////////////////////////////////////////////////////// - -// Submessage and enum fields must reference a "subdef", which is the -// upb_msgdef or upb_enumdef that defines their type. Note that when the -// fielddef is mutable it may not have a subdef *yet*, but this function still -// returns true to indicate that the field's type requires a subdef. -INLINE bool upb_hassubdef(const upb_fielddef *f) { - return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); -} - -// Before a fielddef is finalized, its subdef may be set either directly (with -// a upb_def*) or symbolically. Symbolic refs must be resolved before the -// containing msgdef can be finalized (see upb_resolve() above). The client is -// responsible for making sure that "subdef" lives until this fielddef is -// finalized or deleted. -// -// Both methods require that upb_hassubdef(f) (so the type must be set prior -// to calling these methods). Returns false if this is not the case, or if -// the given subdef is not of the correct type. The subtype is reset if the -// field's type is changed. -bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef); -bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name); - -// Returns the enum or submessage def or symbolic name for this field, if any. -// Requires that upb_hassubdef(f). Returns NULL if the subdef has not been set -// or if you ask for a subtype name when the subtype is currently set -// symbolically (or vice-versa). To access the subtype's name for a linked -// fielddef, use upb_def_fullname(upb_fielddef_subdef(f)). -// -// Caller does *not* own a ref on the returned def or string. -// upb_fielddef_subtypename() is non-const because finalized defs will never -// have a symbolic reference (they must be resolved before the msgdef can be -// finalized). -upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f); +bool upb_fielddef_default_is_symbolic(const upb_fielddef *f); +bool upb_fielddef_resolvedefault(upb_fielddef *f); +bool upb_fielddef_hassubdef(const upb_fielddef *f); +bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef); +bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name); const upb_def *upb_fielddef_subdef(const upb_fielddef *f); -const char *upb_fielddef_subtypename(upb_fielddef *f); +const char *upb_fielddef_subdefname(const upb_fielddef *f); +#ifdef __cplusplus +} // extern "C" +#endif -/* upb_msgdef *****************************************************************/ +/* upb::MessageDef ************************************************************/ + +typedef upb_inttable_iter upb_msg_iter; + +#ifdef __cplusplus // Structure that describes a single .proto message type. -typedef struct _upb_msgdef { +class upb::MessageDef { + public: + // Returns NULL if memory allocation failed. + static MessageDef* New(const void *owner); + + // Though not declared as such in C++, upb::Def is the base of MessageDef and + // we can upcast to it. + Def* Upcast(); + const Def* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Functionality from upb::Def. + const char *full_name() const; + bool set_full_name(const char *fullname); + bool set_full_name(const std::string& fullname); + + // The number of fields that belong to the MessageDef. + int field_count() const; + + // Adds a set of fields (upb_fielddef objects) to a msgdef. Requires that + // the msgdef and all the fielddefs are mutable. The fielddef's name and + // number must be set, and the message may not already contain any field with + // this name or number, and this fielddef may not be part of another message. + // In error cases false is returned and the msgdef is unchanged. On success, + // the caller donates a ref from ref_donor (if non-NULL). + bool AddField(upb_fielddef *f, const void *ref_donor); + + // These return NULL if the field is not found. + FieldDef* FindFieldByNumber(uint32_t number); + FieldDef* FieldFieldByName(const char *name); + const FieldDef* FindFieldByNumber(uint32_t number) const; + const FieldDef* FieldFieldByName(const char *name) const; + + // Returns a new msgdef that is a copy of the given msgdef (and a copy of all + // the fields) but with any references to submessages broken and replaced + // with just the name of the submessage. Returns NULL if memory allocation + // failed. + // + // TODO(haberman): which is more useful, keeping fields resolved or + // unresolving them? If there's no obvious answer, Should this functionality + // just be moved into symtab.c? + MessageDef* Dup(const void *owner) const; + + // Iteration over fields. The order is undefined. + class Iterator { + public: + explicit Iterator(MessageDef* md); + + FieldDef* field(); + bool Done(); + void Next(); + + private: + upb_msg_iter iter_; + }; + + // For iterating over the fields of a const MessageDef. + class ConstIterator { + public: + explicit ConstIterator(const MessageDef* md); + + const FieldDef* field(); + bool Done(); + void Next(); + + private: + upb_msg_iter iter_; + }; + + private: + UPB_DISALLOW_POD_OPS(MessageDef); + +#else +struct upb_msgdef { +#endif upb_def base; + size_t selector_count; // Tables for looking up fields by number and name. upb_inttable itof; // int to field upb_strtable ntof; // name to field - // The following fields may be modified while mutable. - uint16_t size; - uint8_t hasbit_bytes; - // The range of tag numbers used to store extensions. - uint32_t extstart, extend; - // Used for proto2 integration. - const void *prototype; -} upb_msgdef; + // TODO(haberman): proper extension ranges (there can be multiple). +}; + +#define UPB_MSGDEF_INIT(name, itof, ntof, selector_count) \ + {UPB_DEF_INIT(name, UPB_DEF_MSG), selector_count, itof, ntof} +#ifdef __cplusplus +extern "C" { +#endif // Returns NULL if memory allocation failed. upb_msgdef *upb_msgdef_new(const void *owner); -INLINE void upb_msgdef_ref(const upb_msgdef *md, const void *owner) { - upb_def_ref(UPB_UPCAST(md), owner); -} -INLINE void upb_msgdef_unref(const upb_msgdef *md, const void *owner) { - upb_def_unref(UPB_UPCAST(md), owner); -} +// From upb_refcounted. +bool upb_msgdef_isfrozen(const upb_msgdef *m); +void upb_msgdef_ref(const upb_msgdef *m, const void *owner); +void upb_msgdef_unref(const upb_msgdef *m, const void *owner); +void upb_msgdef_donateref( + const upb_msgdef *m, const void *from, const void *to); +void upb_msgdef_checkref(const upb_msgdef *m, const void *owner); -// Returns a new msgdef that is a copy of the given msgdef (and a copy of all -// the fields) but with any references to submessages broken and replaced with -// just the name of the submessage. Returns NULL if memory allocation failed. -// This can be put back into another symtab and the names will be re-resolved -// in the new context. -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner); +// From upb_def. +const char *upb_msgdef_fullname(const upb_msgdef *m); +bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname); -// Read accessors. May be called at any time. -INLINE size_t upb_msgdef_size(const upb_msgdef *m) { return m->size; } -INLINE uint8_t upb_msgdef_hasbit_bytes(const upb_msgdef *m) { - return m->hasbit_bytes; -} -INLINE uint32_t upb_msgdef_extstart(const upb_msgdef *m) { return m->extstart; } -INLINE uint32_t upb_msgdef_extend(const upb_msgdef *m) { return m->extend; } - -// Write accessors. May only be called before the msgdef is in a symtab. -void upb_msgdef_setsize(upb_msgdef *m, uint16_t size); -void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes); -bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end); - -// Adds a set of fields (upb_fielddef objects) to a msgdef. Requires that the -// msgdef and all the fielddefs are mutable. The fielddef's name and number -// must be set, and the message may not already contain any field with this -// name or number, and this fielddef may not be part of another message. In -// error cases false is returned and the msgdef is unchanged. -// -// On success, the msgdef takes a ref on the fielddef so the caller needn't -// worry about continuing to keep it alive (however the reverse is not true; -// refs on the fielddef will *not* keep the msgdef alive). If ref_donor is -// non-NULL, caller passes a ref on the fielddef from ref_donor to the msgdef, -// otherwise caller retains its reference(s) on the defs in f. +upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner); bool upb_msgdef_addfields( upb_msgdef *m, upb_fielddef *const *f, int n, const void *ref_donor); -INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, - const void *ref_donor) { - return upb_msgdef_addfields(m, &f, 1, ref_donor); -} - -// Looks up a field by name or number. While these are written to be as fast -// as possible, it will still be faster to cache the results of this lookup if -// possible. These return NULL if no such field is found. -INLINE upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { - const upb_value *val = upb_inttable_lookup32(&m->itof, i); - return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; -} - -INLINE upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) { - const upb_value *val = upb_strtable_lookup(&m->ntof, name); - return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; -} - -INLINE int upb_msgdef_numfields(const upb_msgdef *m) { - return upb_strtable_count(&m->ntof); -} - -// Iteration over fields. The order is undefined. -// TODO: the iteration should be in field order. -// Iterators are invalidated when a field is added or removed. -// upb_msg_iter i; -// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { -// upb_fielddef *f = upb_msg_iter_field(&i); -// // ... -// } -typedef upb_inttable_iter upb_msg_iter; - +bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor); +const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i); +const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name); +upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i); +upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, const char *name); +int upb_msgdef_numfields(const upb_msgdef *m); + +// upb_msg_iter i; +// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { +// upb_fielddef *f = upb_msg_iter_field(&i); +// // ... +// } void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m); void upb_msg_next(upb_msg_iter *iter); -INLINE bool upb_msg_done(upb_msg_iter *iter) { return upb_inttable_done(iter); } +bool upb_msg_done(upb_msg_iter *iter); +upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter); +#ifdef __cplusplus +} // extern "C +#endif -// Iterator accessor. -INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter) { - return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); -} +/* upb::EnumDef ***************************************************************/ -/* upb_enumdef ****************************************************************/ +typedef upb_strtable_iter upb_enum_iter; -typedef struct _upb_enumdef { +#ifdef __cplusplus + +class upb::EnumDef { + public: + // Returns NULL if memory allocation failed. + static EnumDef* New(const void *owner); + + // Though not declared as such in C++, upb::Def is the base of EnumDef and we + // can upcast to it. + Def* Upcast(); + const Def* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Functionality from upb::Def. + const char *full_name() const; + bool set_full_name(const char *fullname); + bool set_full_name(const std::string& fullname); + + // The value that is used as the default when no field default is specified. + int32_t default_value() const; + void set_default_value(int32_t val); + + // Returns the number of values currently defined in the enum. Note that + // multiple names can refer to the same number, so this may be greater than + // the total number of unique numbers. + int value_count() const; + + // Adds a single name/number pair to the enum. Fails if this name has + // already been used by another value. + bool AddValue(const char* name, int32_t num, Status* status); + bool AddValue(const std::string& name, int32_t num, Status* status); + + // Lookups from name to integer, returning true if found. + bool FindValueByName(const char* name, int32_t* num) const; + + // Finds the name corresponding to the given number, or NULL if none was + // found. If more than one name corresponds to this number, returns the + // first one that was added. + const char* FindValueByNumber(int32_t num) const; + + // Returns a new EnumDef with all the same values. The new EnumDef will be + // owned by the given owner. + EnumDef* Dup(const void *owner) const; + + // Iteration over name/value pairs. The order is undefined. + // Adding an enum val invalidates any iterators. + class Iterator { + public: + explicit Iterator(const EnumDef*); + + int32_t number(); + const char* name(); + bool Done(); + void Next(); + + private: + upb_enum_iter iter_; + }; + + private: + UPB_DISALLOW_POD_OPS(EnumDef); + +#else +struct upb_enumdef { +#endif upb_def base; upb_strtable ntoi; upb_inttable iton; int32_t defaultval; -} upb_enumdef; +}; -// Returns NULL if memory allocation failed. +#define UPB_ENUMDEF_INIT(name, ntoi, iton, defaultval) \ + {UPB_DEF_INIT(name, UPB_DEF_ENUM), ntoi, iton, defaultval} + +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif upb_enumdef *upb_enumdef_new(const void *owner); -INLINE void upb_enumdef_ref(const upb_enumdef *e, const void *owner) { - upb_def_ref(&e->base, owner); -} -INLINE void upb_enumdef_unref(const upb_enumdef *e, const void *owner) { - upb_def_unref(&e->base, owner); -} upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner); -INLINE int32_t upb_enumdef_default(const upb_enumdef *e) { - return e->defaultval; -} - -// May only be set if upb_def_ismutable(e). -void upb_enumdef_setdefault(upb_enumdef *e, int32_t val); - -// Returns the number of values currently defined in the enum. Note that -// multiple names can refer to the same number, so this may be greater than the -// total number of unique numbers. -INLINE int upb_enumdef_numvals(const upb_enumdef *e) { - return upb_strtable_count(&e->ntoi); -} +// From upb_refcounted. +void upb_enumdef_unref(const upb_enumdef *e, const void *owner); +bool upb_enumdef_isfrozen(const upb_enumdef *e); +void upb_enumdef_ref(const upb_enumdef *e, const void *owner); +void upb_enumdef_donateref( + const upb_enumdef *m, const void *from, const void *to); +void upb_enumdef_checkref(const upb_enumdef *e, const void *owner); -// Adds a value to the enumdef. Requires that no existing val has this name, -// but duplicate numbers are allowed. May only be called if the enumdef is -// mutable. Returns false if the existing name is used, or if "name" is not a -// valid label, or on memory allocation failure (we may want to distinguish -// these failure cases in the future). -bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num); +// From upb_def. +const char *upb_enumdef_fullname(const upb_enumdef *e); +bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname); -// Lookups from name to integer, returning true if found. +int32_t upb_enumdef_default(const upb_enumdef *e); +void upb_enumdef_setdefault(upb_enumdef *e, int32_t val); +int upb_enumdef_numvals(const upb_enumdef *e); +bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, + upb_status *status); bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num); - -// Finds the name corresponding to the given number, or NULL if none was found. -// If more than one name corresponds to this number, returns the first one that -// was added. const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num); -// Iteration over name/value pairs. The order is undefined. -// Adding an enum val invalidates any iterators. -// upb_enum_iter i; -// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { -// // ... -// } -typedef upb_strtable_iter upb_enum_iter; - +// upb_enum_iter i; +// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// // ... +// } void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e); void upb_enum_next(upb_enum_iter *iter); bool upb_enum_done(upb_enum_iter *iter); - -// Iterator accessors. -INLINE const char *upb_enum_iter_name(upb_enum_iter *iter) { - return upb_strtable_iter_key(iter); -} -INLINE int32_t upb_enum_iter_number(upb_enum_iter *iter) { - return upb_value_getint32(upb_strtable_iter_value(iter)); -} - - -/* upb_symtab *****************************************************************/ - -// A symtab (symbol table) stores a name->def map of upb_defs. Clients could -// always create such tables themselves, but upb_symtab has logic for resolving -// symbolic references, which is nontrivial. -typedef struct { - upb_refcount refcount; - upb_strtable symtab; -} upb_symtab; - -upb_symtab *upb_symtab_new(const void *owner); -void upb_symtab_ref(const upb_symtab *s, const void *owner); -void upb_symtab_unref(const upb_symtab *s, const void *owner); -void upb_symtab_donateref( - const upb_symtab *s, const void *from, const void *to); - -// Resolves the given symbol using the rules described in descriptor.proto, -// namely: -// -// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like -// scoping rules are used to find the type (i.e. first the nested types -// within this message are searched, then within the parent, on up to the -// root namespace). -// -// If a def is found, the caller owns one ref on the returned def, owned by -// owner. Otherwise returns NULL. -const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym, const void *owner); - -// Finds an entry in the symbol table with this exact name. If a def is found, -// the caller owns one ref on the returned def, owned by owner. Otherwise -// returns NULL. -const upb_def *upb_symtab_lookup( - const upb_symtab *s, const char *sym, const void *owner); -const upb_msgdef *upb_symtab_lookupmsg( - const upb_symtab *s, const char *sym, const void *owner); - -// Gets an array of pointers to all currently active defs in this symtab. The -// caller owns the returned array (which is of length *count) as well as a ref -// to each symbol inside (owned by owner). If type is UPB_DEF_ANY then defs of -// all types are returned, otherwise only defs of the required type are -// returned. -const upb_def **upb_symtab_getdefs( - const upb_symtab *s, int *n, upb_deftype_t type, const void *owner); - -// Adds the given defs to the symtab, resolving all symbols (including enum -// default values) and finalizing the defs. Only one def per name may be in -// the list, but defs can replace existing defs in the symtab. All defs must -// have a name -- anonymous defs are not allowed. Anonymous defs can still be -// finalized by calling upb_def_finalize() directly. -// -// Any existing defs that can reach defs that are being replaced will -// themselves be replaced also, so that the resulting set of defs is fully -// consistent. -// -// This logic implemented in this method is a convenience; ultimately it calls -// some combination of upb_fielddef_setsubdef(), upb_def_dup(), and -// upb_finalize(), any of which the client could call themself. However, since -// the logic for doing so is nontrivial, we provide it here. -// -// The entire operation either succeeds or fails. If the operation fails, the -// symtab is unchanged, false is returned, and status indicates the error. The -// caller passes a ref on all defs to the symtab (even if the operation fails). -bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, - upb_status *status); +const char *upb_enum_iter_name(upb_enum_iter *iter); +int32_t upb_enum_iter_number(upb_enum_iter *iter); +#ifdef __cplusplus +} // extern "C" +#endif /* upb_def casts **************************************************************/ @@ -592,31 +685,349 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, // Downcasts, for when some wants to assert that a def is of a particular type. // These are only checked if we are building debug. #define UPB_DEF_CASTS(lower, upper) \ - struct _upb_ ## lower; /* Forward-declare. */ \ - INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \ - if(def->type != UPB_DEF_ ## upper) return NULL; \ - return (struct _upb_ ## lower*)def; \ + INLINE const upb_ ## lower *upb_dyncast_ ## lower(const upb_def *def) { \ + if (upb_def_type(def) != UPB_DEF_ ## upper) return NULL; \ + return (upb_ ## lower*)def; \ } \ - INLINE const struct _upb_ ## lower *upb_dyncast_ ## lower ## _const(const upb_def *def) { \ - if(def->type != UPB_DEF_ ## upper) return NULL; \ - return (const struct _upb_ ## lower*)def; \ + INLINE const upb_ ## lower *upb_downcast_ ## lower(const upb_def *def) { \ + assert(upb_def_type(def) == UPB_DEF_ ## upper); \ + return (const upb_ ## lower*)def; \ } \ - INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \ - assert(def->type == UPB_DEF_ ## upper); \ - return (struct _upb_ ## lower*)def; \ + INLINE upb_ ## lower *upb_dyncast_ ## lower ## _mutable(upb_def *def) { \ + return (upb_ ## lower*)upb_dyncast_ ## lower(def); \ } \ - INLINE const struct _upb_ ## lower *upb_downcast_ ## lower ## _const(const upb_def *def) { \ - assert(def->type == UPB_DEF_ ## upper); \ - return (const struct _upb_ ## lower*)def; \ + INLINE upb_ ## lower *upb_downcast_ ## lower ## _mutable(upb_def *def) { \ + return (upb_ ## lower*)upb_downcast_ ## lower(def); \ } UPB_DEF_CASTS(msgdef, MSG); UPB_DEF_CASTS(fielddef, FIELD); UPB_DEF_CASTS(enumdef, ENUM); -UPB_DEF_CASTS(svcdef, SERVICE); #undef UPB_DEF_CASTS #ifdef __cplusplus -} /* extern "C" */ + +INLINE const char *upb_safecstr(const std::string& str) { + assert(str.size() == std::strlen(str.c_str())); + return str.c_str(); +} + +// Inline C++ wrappers. +namespace upb { + +inline Def* Def::Dup(const void *owner) const { + return upb_def_dup(this, owner); +} +inline RefCounted* Def::Upcast() { + return upb_upcast(this); +} +inline const RefCounted* Def::Upcast() const { + return upb_upcast(this); +} +inline bool Def::IsFrozen() const { + return upb_def_isfrozen(this); +} +inline void Def::Ref(const void* owner) const { + upb_def_ref(this, owner); +} +inline void Def::Unref(const void* owner) const { + upb_def_unref(this, owner); +} +inline void Def::DonateRef(const void *from, const void *to) const { + upb_def_donateref(this, from, to); +} +inline void Def::CheckRef(const void *owner) const { + upb_def_checkref(this, owner); +} +inline Def::Type Def::def_type() const { + return upb_def_type(this); +} +inline const char *Def::full_name() const { + return upb_def_fullname(this); +} +inline bool Def::set_full_name(const char *fullname) { + return upb_def_setfullname(this, fullname); +} +inline bool Def::set_full_name(const std::string& fullname) { + return upb_def_setfullname(this, upb_safecstr(fullname)); +} +inline bool Def::Freeze(Def *const*defs, int n, Status *status) { + return upb_def_freeze(defs, n, status); +} +inline bool Def::Freeze(const std::vector<Def*>& defs, Status *status) { + return upb_def_freeze((Def*const*)&defs[0], defs.size(), status); +} + +inline FieldDef* FieldDef::New(const void *owner) { + return upb_fielddef_new(owner); +} +inline FieldDef* FieldDef::Dup(const void *owner) const { + return upb_fielddef_dup(this, owner); +} +inline Def* FieldDef::Upcast() { + return upb_upcast(this); +} +inline const Def* FieldDef::Upcast() const { + return upb_upcast(this); +} +inline bool FieldDef::IsFrozen() const { + return upb_fielddef_isfrozen(this); +} +inline void FieldDef::Ref(const void* owner) const { + upb_fielddef_ref(this, owner); +} +inline void FieldDef::Unref(const void* owner) const { + upb_fielddef_unref(this, owner); +} +inline void FieldDef::DonateRef(const void *from, const void *to) const { + upb_fielddef_donateref(this, from, to); +} +inline void FieldDef::CheckRef(const void *owner) const { + upb_fielddef_checkref(this, owner); +} +inline const char *FieldDef::full_name() const { + return upb_fielddef_fullname(this); +} +inline bool FieldDef::set_full_name(const char *fullname) { + return upb_fielddef_setfullname(this, fullname); +} +inline bool FieldDef::set_full_name(const std::string& fullname) { + return upb_fielddef_setfullname(this, upb_safecstr(fullname)); +} +inline FieldDef::Type FieldDef::type() const { + return upb_fielddef_type(this); +} +inline FieldDef::Label FieldDef::label() const { + return upb_fielddef_label(this); +} +inline uint32_t FieldDef::number() const { + return upb_fielddef_number(this); +} +inline const char *FieldDef::name() const { + return upb_fielddef_name(this); +} +inline const MessageDef* FieldDef::message_def() const { + return upb_fielddef_msgdef(this); +} +inline bool FieldDef::set_number(uint32_t number) { + return upb_fielddef_setnumber(this, number); +} +inline bool FieldDef::set_name(const char *name) { + return upb_fielddef_setname(this, name); +} +inline bool FieldDef::set_name(const std::string& name) { + return upb_fielddef_setname(this, upb_safecstr(name)); +} +inline bool FieldDef::set_type(upb_fieldtype_t type) { + return upb_fielddef_settype(this, type); +} +inline bool FieldDef::set_label(upb_label_t label) { + return upb_fielddef_setlabel(this, label); +} +inline bool FieldDef::IsSubMessage() const { + return upb_fielddef_issubmsg(this); +} +inline bool FieldDef::IsString() const { + return upb_fielddef_isstring(this); +} +inline bool FieldDef::IsSequence() const { + return upb_fielddef_isseq(this); +} +inline Value FieldDef::default_value() const { + return upb_fielddef_default(this); +} +inline void FieldDef::set_default_value(Value value) { + upb_fielddef_setdefault(this, value); +} +inline bool FieldDef::set_default_string(const void *str, size_t len) { + return upb_fielddef_setdefaultstr(this, str, len); +} +inline bool FieldDef::set_default_string(const std::string& str) { + return upb_fielddef_setdefaultstr(this, str.c_str(), str.size()); +} +inline void FieldDef::set_default_cstr(const char *str) { + return upb_fielddef_setdefaultcstr(this, str); +} +inline bool FieldDef::IsDefaultSymbolic() const { + return upb_fielddef_default_is_symbolic(this); +} +inline bool FieldDef::ResolveDefault() { + return upb_fielddef_resolvedefault(this); +} +inline bool FieldDef::HasSubDef() const { + return upb_fielddef_hassubdef(this); +} +inline const Def* FieldDef::subdef() const { + return upb_fielddef_subdef(this); +} +inline const char* FieldDef::subdef_name() const { + return upb_fielddef_subdefname(this); +} +inline bool FieldDef::set_subdef(const Def* subdef) { + return upb_fielddef_setsubdef(this, subdef); +} +inline bool FieldDef::set_subdef_name(const char* name) { + return upb_fielddef_setsubdefname(this, name); +} +inline bool FieldDef::set_subdef_name(const std::string& name) { + return upb_fielddef_setsubdefname(this, upb_safecstr(name)); +} + +inline MessageDef* MessageDef::New(const void *owner) { + return upb_msgdef_new(owner); +} +inline Def* MessageDef::Upcast() { + return upb_upcast(this); +} +inline const Def* MessageDef::Upcast() const { + return upb_upcast(this); +} +inline bool MessageDef::IsFrozen() const { + return upb_msgdef_isfrozen(this); +} +inline void MessageDef::Ref(const void* owner) const { + return upb_msgdef_ref(this, owner); +} +inline void MessageDef::Unref(const void* owner) const { + return upb_msgdef_unref(this, owner); +} +inline void MessageDef::DonateRef(const void *from, const void *to) const { + return upb_msgdef_donateref(this, from, to); +} +inline void MessageDef::CheckRef(const void *owner) const { + return upb_msgdef_checkref(this, owner); +} +inline const char *MessageDef::full_name() const { + return upb_msgdef_fullname(this); +} +inline bool MessageDef::set_full_name(const char *fullname) { + return upb_msgdef_setfullname(this, fullname); +} +inline bool MessageDef::set_full_name(const std::string& fullname) { + return upb_msgdef_setfullname(this, upb_safecstr(fullname)); +} +inline int MessageDef::field_count() const { + return upb_msgdef_numfields(this); +} +inline bool MessageDef::AddField(upb_fielddef *f, const void *ref_donor) { + return upb_msgdef_addfield(this, f, ref_donor); +} +inline FieldDef* MessageDef::FindFieldByNumber(uint32_t number) { + return upb_msgdef_itof_mutable(this, number); +} +inline FieldDef* MessageDef::FieldFieldByName(const char *name) { + return upb_msgdef_ntof_mutable(this, name); +} +inline const FieldDef* MessageDef::FindFieldByNumber(uint32_t number) const { + return upb_msgdef_itof(this, number); +} +inline const FieldDef* MessageDef::FieldFieldByName(const char *name) const { + return upb_msgdef_ntof(this, name); +} +inline MessageDef* MessageDef::Dup(const void *owner) const { + return upb_msgdef_dup(this, owner); +} + +inline MessageDef::Iterator::Iterator(MessageDef* md) { + upb_msg_begin(&iter_, md); +} +inline FieldDef* MessageDef::Iterator::field() { + return upb_msg_iter_field(&iter_); +} +inline bool MessageDef::Iterator::Done() { + return upb_msg_done(&iter_); +} +inline void MessageDef::Iterator::Next() { + return upb_msg_next(&iter_); +} + +inline MessageDef::ConstIterator::ConstIterator(const MessageDef* md) { + upb_msg_begin(&iter_, md); +} +inline const FieldDef* MessageDef::ConstIterator::field() { + return upb_msg_iter_field(&iter_); +} +inline bool MessageDef::ConstIterator::Done() { + return upb_msg_done(&iter_); +} +inline void MessageDef::ConstIterator::Next() { + return upb_msg_next(&iter_); +} + +inline EnumDef* EnumDef::New(const void *owner) { + return upb_enumdef_new(owner); +} +inline Def* EnumDef::Upcast() { + return upb_upcast(this); +} +inline const Def* EnumDef::Upcast() const { + return upb_upcast(this); +} +inline bool EnumDef::IsFrozen() const { + return upb_enumdef_isfrozen(this); +} +inline void EnumDef::Ref(const void* owner) const { + return upb_enumdef_ref(this, owner); +} +inline void EnumDef::Unref(const void* owner) const { + return upb_enumdef_unref(this, owner); +} +inline void EnumDef::DonateRef(const void *from, const void *to) const { + return upb_enumdef_donateref(this, from, to); +} +inline void EnumDef::CheckRef(const void *owner) const { + return upb_enumdef_checkref(this, owner); +} +inline const char *EnumDef::full_name() const { + return upb_enumdef_fullname(this); +} +inline bool EnumDef::set_full_name(const char *fullname) { + return upb_enumdef_setfullname(this, fullname); +} +inline bool EnumDef::set_full_name(const std::string& fullname) { + return upb_enumdef_setfullname(this, upb_safecstr(fullname)); +} +inline int32_t EnumDef::default_value() const { + return upb_enumdef_default(this); +} +inline void EnumDef::set_default_value(int32_t val) { + upb_enumdef_setdefault(this, val); +} +inline int EnumDef::value_count() const { + return upb_enumdef_numvals(this); +} +inline bool EnumDef::AddValue(const char* name, int32_t num, Status* status) { + return upb_enumdef_addval(this, name, num, status); +} +inline bool EnumDef::AddValue( + const std::string& name, int32_t num, Status* status) { + return upb_enumdef_addval(this, upb_safecstr(name), num, status); +} +inline bool EnumDef::FindValueByName(const char* name, int32_t* num) const { + return upb_enumdef_ntoi(this, name, num); +} +inline const char* EnumDef::FindValueByNumber(int32_t num) const { + return upb_enumdef_iton(this, num); +} +inline EnumDef* EnumDef::Dup(const void *owner) const { + return upb_enumdef_dup(this, owner); +} + +inline EnumDef::Iterator::Iterator(const EnumDef* e) { + upb_enum_begin(&iter_, e); +} +inline int32_t EnumDef::Iterator::number() { + return upb_enum_iter_number(&iter_); +} +inline const char* EnumDef::Iterator::name() { + return upb_enum_iter_name(&iter_); +} +inline bool EnumDef::Iterator::Done() { + return upb_enum_done(&iter_); +} +inline void EnumDef::Iterator::Next() { + return upb_enum_next(&iter_); +} +} // namespace upb #endif #endif /* UPB_DEF_H_ */ diff --git a/upb/descriptor.proto b/upb/descriptor/descriptor.proto index 233f879..233f879 100644 --- a/upb/descriptor.proto +++ b/upb/descriptor/descriptor.proto diff --git a/upb/descriptor/descriptor.upb.c b/upb/descriptor/descriptor.upb.c new file mode 100755 index 0000000..9a64c5b --- /dev/null +++ b/upb/descriptor/descriptor.upb.c @@ -0,0 +1,483 @@ +// This file was generated by upbc (the upb compiler). +// Do not edit -- your changes will be discarded when the file is +// regenerated. + +#include "upb/def.h" + +const upb_msgdef google_protobuf_msgs[20]; +const upb_fielddef google_protobuf_fields[73]; +const upb_enumdef google_protobuf_enums[4]; +const upb_tabent google_protobuf_strentries[192]; +const upb_tabent google_protobuf_intentries[66]; +const upb_value google_protobuf_arrays[97]; + +const upb_msgdef google_protobuf_msgs[20] = { + UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[0], &google_protobuf_arrays[0], 6, 5), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[0]), 31), + UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[6], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[16]), 2), + UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[10], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[20]), 11), + UPB_MSGDEF_INIT("google.protobuf.EnumOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[4], &google_protobuf_arrays[14], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[24]), 5), + UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[15], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[28]), 7), + UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[6], &google_protobuf_arrays[19], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[32]), 5), + UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[8], &google_protobuf_arrays[20], 6, 5), UPB_STRTABLE_INIT(8, 15, 9, 4, &google_protobuf_strentries[36]), 18), + UPB_MSGDEF_INIT("google.protobuf.FieldOptions", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[12], &google_protobuf_arrays[26], 5, 3), UPB_STRTABLE_INIT(5, 7, 9, 3, &google_protobuf_strentries[52]), 11), + UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", UPB_INTTABLE_INIT(4, 7, 9, 3, &google_protobuf_intentries[16], &google_protobuf_arrays[31], 6, 5), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[60]), 37), + UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[37], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[76]), 5), + UPB_MSGDEF_INIT("google.protobuf.FileOptions", UPB_INTTABLE_INIT(8, 15, 9, 4, &google_protobuf_intentries[24], &google_protobuf_arrays[40], 6, 1), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[80]), 17), + UPB_MSGDEF_INIT("google.protobuf.MessageOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[40], &google_protobuf_arrays[46], 4, 2), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[96]), 7), + UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[50], 5, 4), UPB_STRTABLE_INIT(4, 7, 9, 3, &google_protobuf_strentries[100]), 12), + UPB_MSGDEF_INIT("google.protobuf.MethodOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[42], &google_protobuf_arrays[55], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[108]), 5), + UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[56], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[112]), 11), + UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[44], &google_protobuf_arrays[60], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[116]), 5), + UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[61], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[120]), 5), + UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[64], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[124]), 6), + UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[46], &google_protobuf_arrays[68], 6, 4), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[128]), 17), + UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[74], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[144]), 4), +}; + +const upb_fielddef google_protobuf_fields[73] = { + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "aggregate_value", 8, &google_protobuf_msgs[18], NULL, 10, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "cc_generic_services", 16, &google_protobuf_msgs[10], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "ctype", 1, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_enums[2]), 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "default_value", 7, &google_protobuf_msgs[6], NULL, 15, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, "dependency", 3, &google_protobuf_msgs[8], NULL, 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "deprecated", 3, &google_protobuf_msgs[7], NULL, 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, "double_value", 6, &google_protobuf_msgs[18], NULL, 13, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "end", 2, &google_protobuf_msgs[1], NULL, 1, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "enum_type", 4, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[2]), 15, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "enum_type", 5, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[2]), 18, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "experimental_map_key", 9, &google_protobuf_msgs[7], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "extendee", 2, &google_protobuf_msgs[6], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension", 7, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[6]), 34, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension", 6, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 25, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension_range", 5, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[1]), 20, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "field", 2, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "file", 1, &google_protobuf_msgs[9], upb_upcast(&google_protobuf_msgs[8]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "identifier_value", 3, &google_protobuf_msgs[18], NULL, 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "input_type", 2, &google_protobuf_msgs[12], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, "is_extension", 2, &google_protobuf_msgs[19], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_generate_equals_and_hash", 20, &google_protobuf_msgs[10], NULL, 6, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_generic_services", 17, &google_protobuf_msgs[10], NULL, 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_multiple_files", 10, &google_protobuf_msgs[10], NULL, 16, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "java_outer_classname", 8, &google_protobuf_msgs[10], NULL, 12, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "java_package", 1, &google_protobuf_msgs[10], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "label", 4, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[0]), 7, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "location", 1, &google_protobuf_msgs[16], upb_upcast(&google_protobuf_msgs[17]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "message_set_wire_format", 1, &google_protobuf_msgs[11], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "message_type", 4, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[0]), 13, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "method", 2, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[12]), 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[12], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[4], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[14], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[2], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[6], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "name", 2, &google_protobuf_msgs[18], upb_upcast(&google_protobuf_msgs[19]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[0], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[8], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, "name_part", 1, &google_protobuf_msgs[19], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, "negative_int_value", 5, &google_protobuf_msgs[18], NULL, 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "nested_type", 3, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[0]), 10, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "no_standard_descriptor_accessor", 2, &google_protobuf_msgs[11], NULL, 1, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "number", 2, &google_protobuf_msgs[4], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "number", 3, &google_protobuf_msgs[6], NULL, 6, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "optimize_for", 9, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_enums[3]), 15, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 4, &google_protobuf_msgs[12], upb_upcast(&google_protobuf_msgs[13]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[15]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 8, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[10]), 21, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[3]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 7, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[11]), 28, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 8, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_msgs[7]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[4], upb_upcast(&google_protobuf_msgs[5]), 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "output_type", 3, &google_protobuf_msgs[12], NULL, 6, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "package", 2, &google_protobuf_msgs[8], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "packed", 2, &google_protobuf_msgs[7], NULL, 1, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, "path", 1, &google_protobuf_msgs[17], NULL, 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, "positive_int_value", 4, &google_protobuf_msgs[18], NULL, 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "py_generic_services", 18, &google_protobuf_msgs[10], NULL, 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "service", 6, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[14]), 29, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "source_code_info", 9, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[16]), 24, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, "span", 2, &google_protobuf_msgs[17], NULL, 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "start", 1, &google_protobuf_msgs[1], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, "string_value", 7, &google_protobuf_msgs[18], NULL, 14, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "type", 5, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[1]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "type_name", 6, &google_protobuf_msgs[6], NULL, 12, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[15], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[11], upb_upcast(&google_protobuf_msgs[18]), 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[13], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_msgs[18]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_msgs[18]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[3], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[5], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "value", 2, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[4]), 5, UPB_VALUE_INIT_NONE), +}; + +const upb_enumdef google_protobuf_enums[4] = { + UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, 1, 2, &google_protobuf_strentries[148]), UPB_INTTABLE_INIT(0, 0, 8, 0, NULL, &google_protobuf_arrays[78], 4, 3), 0), + UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, 1, 5, &google_protobuf_strentries[152]), UPB_INTTABLE_INIT(12, 15, 8, 4, &google_protobuf_intentries[50], &google_protobuf_arrays[82], 7, 6), 0), + UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, 1, 2, &google_protobuf_strentries[184]), UPB_INTTABLE_INIT(0, 0, 8, 0, NULL, &google_protobuf_arrays[89], 4, 3), 0), + UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, 1, 2, &google_protobuf_strentries[188]), UPB_INTTABLE_INIT(0, 0, 8, 0, NULL, &google_protobuf_arrays[93], 4, 3), 0), +}; + +const upb_tabent google_protobuf_strentries[192] = { + {UPB_TABKEY_STR("extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[13]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[36]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("field"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[15]), NULL}, + {UPB_TABKEY_STR("extension_range"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[14]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("nested_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[40]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[49]), NULL}, + {UPB_TABKEY_STR("enum_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[8]), &google_protobuf_strentries[14]}, + {UPB_TABKEY_STR("start"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[61]), NULL}, + {UPB_TABKEY_STR("end"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[7]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[72]), NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[48]), NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[33]), &google_protobuf_strentries[22]}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[70]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("number"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[42]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[51]), NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[31]), &google_protobuf_strentries[30]}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[71]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("label"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[25]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[34]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("number"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[43]), &google_protobuf_strentries[49]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("type_name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[64]), NULL}, + {UPB_TABKEY_STR("extendee"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[11]), NULL}, + {UPB_TABKEY_STR("type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[63]), &google_protobuf_strentries[48]}, + {UPB_TABKEY_STR("default_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[3]), NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[50]), NULL}, + {UPB_TABKEY_STR("experimental_map_key"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[10]), &google_protobuf_strentries[58]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("ctype"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[2]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("deprecated"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[5]), NULL}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[69]), NULL}, + {UPB_TABKEY_STR("packed"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[54]), NULL}, + {UPB_TABKEY_STR("extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[12]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[37]), NULL}, + {UPB_TABKEY_STR("service"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[58]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("source_code_info"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[59]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("dependency"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[4]), NULL}, + {UPB_TABKEY_STR("message_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[28]), NULL}, + {UPB_TABKEY_STR("package"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[53]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[47]), NULL}, + {UPB_TABKEY_STR("enum_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[9]), &google_protobuf_strentries[74]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("file"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[16]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[68]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("cc_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[1]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("java_multiple_files"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[22]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("java_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[21]), &google_protobuf_strentries[94]}, + {UPB_TABKEY_STR("java_generate_equals_and_hash"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[20]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("java_package"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[24]), NULL}, + {UPB_TABKEY_STR("optimize_for"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[44]), NULL}, + {UPB_TABKEY_STR("py_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[57]), NULL}, + {UPB_TABKEY_STR("java_outer_classname"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[23]), NULL}, + {UPB_TABKEY_STR("message_set_wire_format"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[27]), &google_protobuf_strentries[98]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[66]), NULL}, + {UPB_TABKEY_STR("no_standard_descriptor_accessor"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[41]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[30]), NULL}, + {UPB_TABKEY_STR("input_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[18]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("output_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[52]), NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[45]), NULL}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[67]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[46]), &google_protobuf_strentries[114]}, + {UPB_TABKEY_STR("method"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[29]), NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[32]), &google_protobuf_strentries[113]}, + {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[65]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("location"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[26]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("span"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[60]), NULL}, + {UPB_TABKEY_STR("path"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[55]), &google_protobuf_strentries[126]}, + {UPB_TABKEY_STR("double_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[6]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[35]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("negative_int_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[39]), NULL}, + {UPB_TABKEY_STR("aggregate_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[0]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("positive_int_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[56]), NULL}, + {UPB_TABKEY_STR("identifier_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[17]), NULL}, + {UPB_TABKEY_STR("string_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[62]), &google_protobuf_strentries[142]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("is_extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[19]), NULL}, + {UPB_TABKEY_STR("name_part"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[38]), NULL}, + {UPB_TABKEY_STR("LABEL_REQUIRED"), UPB_VALUE_INIT_INT32(2), &google_protobuf_strentries[150]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("LABEL_REPEATED"), UPB_VALUE_INIT_INT32(3), NULL}, + {UPB_TABKEY_STR("LABEL_OPTIONAL"), UPB_VALUE_INIT_INT32(1), NULL}, + {UPB_TABKEY_STR("TYPE_FIXED64"), UPB_VALUE_INIT_INT32(6), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_STRING"), UPB_VALUE_INIT_INT32(9), NULL}, + {UPB_TABKEY_STR("TYPE_FLOAT"), UPB_VALUE_INIT_INT32(2), &google_protobuf_strentries[181]}, + {UPB_TABKEY_STR("TYPE_DOUBLE"), UPB_VALUE_INIT_INT32(1), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_INT32"), UPB_VALUE_INIT_INT32(5), NULL}, + {UPB_TABKEY_STR("TYPE_SFIXED32"), UPB_VALUE_INIT_INT32(15), NULL}, + {UPB_TABKEY_STR("TYPE_FIXED32"), UPB_VALUE_INIT_INT32(7), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_MESSAGE"), UPB_VALUE_INIT_INT32(11), &google_protobuf_strentries[182]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_INT64"), UPB_VALUE_INIT_INT32(3), &google_protobuf_strentries[179]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_ENUM"), UPB_VALUE_INIT_INT32(14), NULL}, + {UPB_TABKEY_STR("TYPE_UINT32"), UPB_VALUE_INIT_INT32(13), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_UINT64"), UPB_VALUE_INIT_INT32(4), &google_protobuf_strentries[178]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("TYPE_SFIXED64"), UPB_VALUE_INIT_INT32(16), NULL}, + {UPB_TABKEY_STR("TYPE_BYTES"), UPB_VALUE_INIT_INT32(12), NULL}, + {UPB_TABKEY_STR("TYPE_SINT64"), UPB_VALUE_INIT_INT32(18), NULL}, + {UPB_TABKEY_STR("TYPE_BOOL"), UPB_VALUE_INIT_INT32(8), NULL}, + {UPB_TABKEY_STR("TYPE_GROUP"), UPB_VALUE_INIT_INT32(10), NULL}, + {UPB_TABKEY_STR("TYPE_SINT32"), UPB_VALUE_INIT_INT32(17), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("CORD"), UPB_VALUE_INIT_INT32(1), NULL}, + {UPB_TABKEY_STR("STRING"), UPB_VALUE_INIT_INT32(0), &google_protobuf_strentries[185]}, + {UPB_TABKEY_STR("STRING_PIECE"), UPB_VALUE_INIT_INT32(2), NULL}, + {UPB_TABKEY_STR("CODE_SIZE"), UPB_VALUE_INIT_INT32(2), NULL}, + {UPB_TABKEY_STR("SPEED"), UPB_VALUE_INIT_INT32(1), &google_protobuf_strentries[191]}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("LITE_RUNTIME"), UPB_VALUE_INIT_INT32(3), NULL}, +}; + +const upb_tabent google_protobuf_intentries[66] = { + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[13]), NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[49]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[70]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[71]), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[50]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[64]), NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[3]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[10]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[69]), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[47]), NULL}, + {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[59]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[58]), NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[12]), NULL}, + {UPB_TABKEY_NUM(16), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[1]), NULL}, + {UPB_TABKEY_NUM(17), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[21]), NULL}, + {UPB_TABKEY_NUM(18), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[57]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(20), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[20]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[68]), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[23]), NULL}, + {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[44]), NULL}, + {UPB_TABKEY_NUM(10), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[22]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[66]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[67]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[65]), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[0]), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[6]), NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[62]), NULL}, + {UPB_TABKEY_NUM(16), UPB_VALUE_INIT_CONSTPTR("TYPE_SFIXED64"), NULL}, + {UPB_TABKEY_NUM(17), UPB_VALUE_INIT_CONSTPTR("TYPE_SINT32"), NULL}, + {UPB_TABKEY_NUM(18), UPB_VALUE_INIT_CONSTPTR("TYPE_SINT64"), NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR("TYPE_FIXED32"), NULL}, + {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR("TYPE_BOOL"), NULL}, + {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR("TYPE_STRING"), NULL}, + {UPB_TABKEY_NUM(10), UPB_VALUE_INIT_CONSTPTR("TYPE_GROUP"), NULL}, + {UPB_TABKEY_NUM(11), UPB_VALUE_INIT_CONSTPTR("TYPE_MESSAGE"), NULL}, + {UPB_TABKEY_NUM(12), UPB_VALUE_INIT_CONSTPTR("TYPE_BYTES"), NULL}, + {UPB_TABKEY_NUM(13), UPB_VALUE_INIT_CONSTPTR("TYPE_UINT32"), NULL}, + {UPB_TABKEY_NUM(14), UPB_VALUE_INIT_CONSTPTR("TYPE_ENUM"), NULL}, + {UPB_TABKEY_NUM(15), UPB_VALUE_INIT_CONSTPTR("TYPE_SFIXED32"), NULL}, +}; + +const upb_value google_protobuf_arrays[97] = { + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[36]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[15]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[40]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[8]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[14]), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[61]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[7]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[33]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[72]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[48]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[31]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[42]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[51]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[34]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[11]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[43]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[25]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[63]), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[2]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[54]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[5]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[37]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[53]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[4]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[28]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[9]), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[16]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[24]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[27]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[41]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[30]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[18]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[52]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[45]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[32]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[29]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[46]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[26]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[55]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[60]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[35]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[17]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[56]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[39]), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[38]), + UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[19]), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR("LABEL_OPTIONAL"), + UPB_VALUE_INIT_CONSTPTR("LABEL_REQUIRED"), + UPB_VALUE_INIT_CONSTPTR("LABEL_REPEATED"), + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR("TYPE_DOUBLE"), + UPB_VALUE_INIT_CONSTPTR("TYPE_FLOAT"), + UPB_VALUE_INIT_CONSTPTR("TYPE_INT64"), + UPB_VALUE_INIT_CONSTPTR("TYPE_UINT64"), + UPB_VALUE_INIT_CONSTPTR("TYPE_INT32"), + UPB_VALUE_INIT_CONSTPTR("TYPE_FIXED64"), + UPB_VALUE_INIT_CONSTPTR("STRING"), + UPB_VALUE_INIT_CONSTPTR("CORD"), + UPB_VALUE_INIT_CONSTPTR("STRING_PIECE"), + UPB_ARRAY_EMPTYENT, + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR("SPEED"), + UPB_VALUE_INIT_CONSTPTR("CODE_SIZE"), + UPB_VALUE_INIT_CONSTPTR("LITE_RUNTIME"), +}; + diff --git a/upb/descriptor/descriptor.upb.h b/upb/descriptor/descriptor.upb.h new file mode 100755 index 0000000..4903ae5 --- /dev/null +++ b/upb/descriptor/descriptor.upb.h @@ -0,0 +1,90 @@ +// This file was generated by upbc (the upb compiler). +// Do not edit -- your changes will be discarded when the file is +// regenerated. + +#ifndef GOOGLE_PROTOBUF_UPB_H_ +#define GOOGLE_PROTOBUF_UPB_H_ + +#include "upb/def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Enums + +typedef enum { + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17, +} google_protobuf_FieldDescriptorProto_Type; + +typedef enum { + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1, +} google_protobuf_FieldDescriptorProto_Label; + +typedef enum { + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1, + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0, + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2, +} google_protobuf_FieldOptions_CType; + +typedef enum { + GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2, + GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1, + GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3, +} google_protobuf_FileOptions_OptimizeMode; + +// Do not refer to these forward declarations; use the constants +// below. +extern const upb_msgdef google_protobuf_msgs[20]; +extern const upb_fielddef google_protobuf_fields[73]; +extern const upb_enumdef google_protobuf_enums[4]; + +// Constants for references to defs. +// We hide these behind macros to decouple users from the +// details of how we have statically defined them (ie. whether +// each def has its own symbol or lives in an array of defs). +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO &google_protobuf_msgs[0] +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE &google_protobuf_msgs[1] +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO &google_protobuf_msgs[2] +#define GOOGLE_PROTOBUF_ENUMOPTIONS &google_protobuf_msgs[3] +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO &google_protobuf_msgs[4] +#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS &google_protobuf_msgs[5] +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO &google_protobuf_msgs[6] +#define GOOGLE_PROTOBUF_FIELDOPTIONS &google_protobuf_msgs[7] +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO &google_protobuf_msgs[8] +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET &google_protobuf_msgs[9] +#define GOOGLE_PROTOBUF_FILEOPTIONS &google_protobuf_msgs[10] +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS &google_protobuf_msgs[11] +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO &google_protobuf_msgs[12] +#define GOOGLE_PROTOBUF_METHODOPTIONS &google_protobuf_msgs[13] +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO &google_protobuf_msgs[14] +#define GOOGLE_PROTOBUF_SERVICEOPTIONS &google_protobuf_msgs[15] +#define GOOGLE_PROTOBUF_SOURCECODEINFO &google_protobuf_msgs[16] +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION &google_protobuf_msgs[17] +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION &google_protobuf_msgs[18] +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART &google_protobuf_msgs[19] + +#ifdef __cplusplus +}; // extern "C" +#endif + +#endif // GOOGLE_PROTOBUF_UPB_H_ diff --git a/upb/descriptor/descriptor_const.h b/upb/descriptor/descriptor_const.h deleted file mode 100644 index 52ca803..0000000 --- a/upb/descriptor/descriptor_const.h +++ /dev/null @@ -1,349 +0,0 @@ -/* This file was generated by upbc (the upb compiler). Do not edit. */ - -#ifndef UPB_DESCRIPTOR_CONST_H -#define UPB_DESCRIPTOR_CONST_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* Enums. */ - -typedef enum google_protobuf_FieldDescriptorProto_Type { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17 -} google_protobuf_FieldDescriptorProto_Type; - -typedef enum google_protobuf_FieldDescriptorProto_Label { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1 -} google_protobuf_FieldDescriptorProto_Label; - -typedef enum google_protobuf_FieldOptions_CType { - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1, - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0, - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2 -} google_protobuf_FieldOptions_CType; - -typedef enum google_protobuf_FileOptions_OptimizeMode { - GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2, - GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1, - GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3 -} google_protobuf_FileOptions_OptimizeMode; - -/* Constants for field names and numbers. */ - -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1 -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path" -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNUM 2 -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNAME "span" -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNUM 2 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNUM 3 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNAME "identifier_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNUM 4 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNAME "positive_int_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDTYPE 4 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNUM 7 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNAME "package" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNUM 3 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNAME "dependency" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNUM 4 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNAME "message_type" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNUM 9 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNAME "input_type" -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNUM 3 -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNAME "output_type" -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNUM 4 -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1 -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location" -#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNUM 1 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNAME "start" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNUM 2 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNUM 2 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNAME "packed" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNUM 3 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNAME "deprecated" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNUM 9 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNAME "experimental_map_key" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16 -#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services" -#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNUM 17 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNAME "java_generic_services" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNUM 18 -#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNAME "py_generic_services" -#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNUM 20 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNAME "java_generate_equals_and_hash" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9 -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for" -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNAME "extendee" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNUM 3 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNAME "number" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNUM 4 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNAME "label" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNUM 7 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8 - -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8 - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DESCRIPTOR_CONST_H */ diff --git a/upb/descriptor/reader.c b/upb/descriptor/reader.c index 8177560..16f3b24 100644 --- a/upb/descriptor/reader.c +++ b/upb/descriptor/reader.c @@ -3,21 +3,38 @@ * * Copyright (c) 2008-2009 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> + * + * XXX: The routines in this file that consume a string do not currently + * support having the string span buffers. In the future, as upb_sink and + * its buffering/sharing functionality evolve there should be an easy and + * idiomatic way of correctly handling this case. For now, we accept this + * limitation since we currently only parse descriptors from single strings. */ -#include <stdlib.h> +#include "upb/descriptor/reader.h" + #include <errno.h> +#include <stdlib.h> +#include <string.h> +#include "upb/bytestream.h" #include "upb/def.h" -#include "upb/descriptor/descriptor_const.h" -#include "upb/descriptor/reader.h" +#include "upb/descriptor/descriptor.upb.h" + +static char *upb_strndup(const char *buf, size_t n) { + char *ret = malloc(n + 1); + if (!ret) return NULL; + memcpy(ret, buf, n); + ret[n] = '\0'; + return ret; +} // Returns a newly allocated string that joins input strings together, for example: // join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" // join("", "Baz") -> "Baz" -// Caller owns a ref on the returned string. */ +// Caller owns a ref on the returned string. static char *upb_join(const char *base, const char *name) { if (!base || strlen(base) == 0) { - return strdup(name); + return upb_strdup(name); } else { char *ret = malloc(strlen(base) + strlen(name) + 2); ret[0] = '\0'; @@ -74,10 +91,6 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) { } } -// Forward declares for top-level file descriptors. -static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h); -static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h); - void upb_descreader_init(upb_descreader *r) { upb_deflist_init(&r->defs); upb_status_init(&r->status); @@ -107,7 +120,7 @@ static upb_msgdef *upb_descreader_top(upb_descreader *r) { if (r->stack_len <= 1) return NULL; int index = r->stack[r->stack_len-1].start - 1; assert(index >= 0); - return upb_downcast_msgdef(r->defs.defs[index]); + return upb_downcast_msgdef_mutable(r->defs.defs[index]); } static upb_def *upb_descreader_last(upb_descreader *r) { @@ -136,144 +149,80 @@ void upb_descreader_setscopename(upb_descreader *r, char *str) { } // Handlers for google.protobuf.FileDescriptorProto. -static upb_flow_t upb_descreader_FileDescriptorProto_startmsg(void *_r) { +static bool file_startmsg(void *_r) { upb_descreader *r = _r; upb_descreader_startcontainer(r); - return UPB_CONTINUE; + return true; } -static void upb_descreader_FileDescriptorProto_endmsg(void *_r, - upb_status *status) { - (void)status; +static void file_endmsg(void *_r, upb_status *status) { + UPB_UNUSED(status); upb_descreader *r = _r; upb_descreader_endcontainer(r); } -static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r, - upb_value fval, - upb_value val) { - (void)fval; +static size_t file_onpackage(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; - upb_descreader_setscopename( - r, upb_byteregion_strdup(upb_value_getbyteregion(val))); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_descreader_register_FileDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_descreader_FileDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_descreader_FileDescriptorProto_endmsg); - -#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM -#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false); - upb_fhandlers_setvalue(f, &upb_descreader_FileDescriptorProto_package); - - upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true, - upb_msgdef_register_DescriptorProto(h)); - upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); - // TODO: services, extensions - return m; -} -#undef FNUM -#undef FTYPE - -static upb_mhandlers *upb_descreader_register_FileDescriptorSet(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - -#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM -#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE - upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true, - upb_descreader_register_FileDescriptorProto(h)); - return m; -} -#undef FNUM -#undef FTYPE - -upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h) { - h->should_jit = false; - return upb_descreader_register_FileDescriptorSet(h); + // XXX: see comment at the top of the file. + upb_descreader_setscopename(r, upb_strndup(buf, n)); + return n; } -// google.protobuf.EnumValueDescriptorProto. -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_r) { +// Handlers for google.protobuf.EnumValueDescriptorProto. +static bool enumval_startmsg(void *_r) { upb_descreader *r = _r; r->saw_number = false; r->saw_name = false; - return UPB_CONTINUE; + return true; } -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r, - upb_value fval, - upb_value val) { - (void)fval; +static size_t enumval_onname(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; + // XXX: see comment at the top of the file. free(r->name); - r->name = upb_byteregion_strdup(upb_value_getbyteregion(val)); + r->name = upb_strndup(buf, n); r->saw_name = true; - return UPB_CONTINUE; + return n; } -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_r, - upb_value fval, - upb_value val) { - (void)fval; +static bool enumval_onnumber(void *_r, void *fval, int32_t val) { + UPB_UNUSED(fval); upb_descreader *r = _r; - r->number = upb_value_getint32(val); + r->number = val; r->saw_number = true; - return UPB_CONTINUE; + return true; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r, - upb_status *status) { +static void enumval_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; if(!r->saw_number || !r->saw_name) { upb_status_seterrliteral(status, "Enum value missing name or number."); return; } - upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); + upb_enumdef *e = upb_downcast_enumdef_mutable(upb_descreader_last(r)); if (upb_enumdef_numvals(e) == 0) { // The default value of an enum (in the absence of an explicit default) is // its first listed value. upb_enumdef_setdefault(e, r->number); } - upb_enumdef_addval(e, r->name, r->number); + upb_enumdef_addval(e, r->name, r->number, status); free(r->name); r->name = NULL; } -static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f; - f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name); - - f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number); - return m; -} -#undef FNUM -#undef FTYPE -// google.protobuf.EnumDescriptorProto. -static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) { +// Handlers for google.protobuf.EnumDescriptorProto. +static bool enum_startmsg(void *_r) { upb_descreader *r = _r; - upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new(&r->defs))); - return UPB_CONTINUE; + upb_deflist_push(&r->defs, upb_upcast(upb_enumdef_new(&r->defs))); + return true; } -static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) { +static void enum_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; - upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); + upb_enumdef *e = upb_downcast_enumdef_mutable(upb_descreader_last(r)); if (upb_def_fullname(upb_descreader_last((upb_descreader*)_r)) == NULL) { upb_status_seterrliteral(status, "Enum had no name."); return; @@ -284,46 +233,28 @@ static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) } } -static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r, - upb_value fval, - upb_value val) { - (void)fval; +static size_t enum_onname(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; - char *fullname = upb_byteregion_strdup(upb_value_getbyteregion(val)); + // XXX: see comment at the top of the file. + char *fullname = upb_strndup(buf, n); upb_def_setfullname(upb_descreader_last(r), fullname); free(fullname); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg); - upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name); - - upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true, - upb_enumdef_register_EnumValueDescriptorProto(h)); - return m; + return n; } -#undef FNUM -#undef FTYPE -static upb_flow_t upb_fielddef_startmsg(void *_r) { +// Handlers for google.protobuf.FieldDescriptorProto +static bool field_startmsg(void *_r) { upb_descreader *r = _r; r->f = upb_fielddef_new(&r->defs); free(r->default_string); r->default_string = NULL; - return UPB_CONTINUE; + return true; } // Converts the default value in string "str" into "d". Passes a ref on str. // Returns true on success. -static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) { +static bool parse_default(char *str, upb_value *d, int type) { bool success = true; if (str) { switch(type) { @@ -397,29 +328,24 @@ static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) { return success; } -static void upb_fielddef_endmsg(void *_r, upb_status *status) { +static void field_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_fielddef *f = r->f; // TODO: verify that all required fields were present. - assert(f->number != -1 && upb_fielddef_name(f) != NULL); - assert((upb_fielddef_subtypename(f) != NULL) == upb_hassubdef(f)); - - // Field was successfully read, add it as a field of the msgdef. - upb_msgdef *m = upb_descreader_top(r); - upb_msgdef_addfield(m, f, &r->defs); - r->f = NULL; + assert(upb_fielddef_number(f) != 0 && upb_fielddef_name(f) != NULL); + assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f)); if (r->default_string) { - if (upb_issubmsg(f)) { + if (upb_fielddef_issubmsg(f)) { upb_status_seterrliteral(status, "Submessages cannot have defaults."); return; } - if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) { + if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE(ENUM)) { upb_fielddef_setdefaultcstr(f, r->default_string); } else { upb_value val; upb_value_setptr(&val, NULL); // Silence inaccurate compiler warnings. - if (!upb_fielddef_parsedefault(r->default_string, &val, f->type)) { + if (!parse_default(r->default_string, &val, upb_fielddef_type(f))) { // We don't worry too much about giving a great error message since the // compiler should have ensured this was correct. upb_status_seterrliteral(status, "Error converting default value."); @@ -430,132 +356,147 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) { } } -static upb_flow_t upb_fielddef_ontype(void *_r, upb_value fval, upb_value val) { - (void)fval; +static bool field_ontype(void *_r, void *fval, int32_t val) { + UPB_UNUSED(fval); upb_descreader *r = _r; - upb_fielddef_settype(r->f, upb_value_getint32(val)); - return UPB_CONTINUE; + upb_fielddef_settype(r->f, val); + return true; } -static upb_flow_t upb_fielddef_onlabel(void *_r, upb_value fval, upb_value val) { - (void)fval; +static bool field_onlabel(void *_r, void *fval, int32_t val) { + UPB_UNUSED(fval); upb_descreader *r = _r; - upb_fielddef_setlabel(r->f, upb_value_getint32(val)); - return UPB_CONTINUE; + upb_fielddef_setlabel(r->f, val); + return true; } -static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) { - (void)fval; +static bool field_onnumber(void *_r, void *fval, int32_t val) { + UPB_UNUSED(fval); upb_descreader *r = _r; - upb_fielddef_setnumber(r->f, upb_value_getint32(val)); - return UPB_CONTINUE; + upb_fielddef_setnumber(r->f, val); + return true; } -static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) { - (void)fval; +static size_t field_onname(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; - char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); + // XXX: see comment at the top of the file. + char *name = upb_strndup(buf, n); upb_fielddef_setname(r->f, name); free(name); - return UPB_CONTINUE; + return n; } -static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval, - upb_value val) { - (void)fval; +static size_t field_ontypename(void *_r, void *fval, const char *buf, + size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; - char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); - upb_fielddef_setsubtypename(r->f, name); + // XXX: see comment at the top of the file. + char *name = upb_strndup(buf, n); + upb_fielddef_setsubdefname(r->f, name); free(name); - return UPB_CONTINUE; + return n; } -static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval, - upb_value val) { - (void)fval; +static size_t field_ondefaultval(void *_r, void *fval, const char *buf, + size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; // Have to convert from string to the correct type, but we might not know the - // type yet. + // type yet, so we save it as a string until the end of the field. + // XXX: see comment at the top of the file. free(r->default_string); - r->default_string = upb_byteregion_strdup(upb_value_getbyteregion(val)); - return UPB_CONTINUE; -} - -static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto( - upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg); - upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg); - -#define FIELD(name, handler) \ - upb_fhandlers_setvalue( \ - upb_mhandlers_newfhandlers(m, \ - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \ - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \ - false), \ - handler); - FIELD(TYPE, &upb_fielddef_ontype); - FIELD(LABEL, &upb_fielddef_onlabel); - FIELD(NUMBER, &upb_fielddef_onnumber); - FIELD(NAME, &upb_fielddef_onname); - FIELD(TYPE_NAME, &upb_fielddef_ontypename); - FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval); - return m; -} -#undef FNUM -#undef FTYPE - - -// google.protobuf.DescriptorProto. -static upb_flow_t upb_msgdef_startmsg(void *_r) { + r->default_string = upb_strndup(buf, n); + return n; +} + +// Handlers for google.protobuf.DescriptorProto (representing a message). +static bool msg_startmsg(void *_r) { upb_descreader *r = _r; - upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new(&r->defs))); + upb_deflist_push(&r->defs, upb_upcast(upb_msgdef_new(&r->defs))); upb_descreader_startcontainer(r); - return UPB_CONTINUE; + return true; } -static void upb_msgdef_endmsg(void *_r, upb_status *status) { +static void msg_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_msgdef *m = upb_descreader_top(r); - if(!upb_def_fullname(UPB_UPCAST(m))) { + if(!upb_def_fullname(upb_upcast(m))) { upb_status_seterrliteral(status, "Encountered message with no name."); return; } upb_descreader_endcontainer(r); } -static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) { - (void)fval; +static size_t msg_onname(void *_r, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); upb_descreader *r = _r; upb_msgdef *m = upb_descreader_top(r); - char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); - upb_def_setfullname(UPB_UPCAST(m), name); + // XXX: see comment at the top of the file. + char *name = upb_strndup(buf, n); + upb_def_setfullname(upb_upcast(m), name); upb_descreader_setscopename(r, name); // Passes ownership of name. - return UPB_CONTINUE; + return n; } -static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) { - upb_mhandlers *m = upb_handlers_newmhandlers(h); - upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg); - upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg); - -#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM -#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE - upb_fhandlers *f = - upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false); - upb_fhandlers_setvalue(f, &upb_msgdef_onname); - - upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true, - upb_fielddef_register_FieldDescriptorProto(h)); - upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true, - upb_enumdef_register_EnumDescriptorProto(h)); +static bool msg_onendfield(void *_r, void *fval) { + UPB_UNUSED(fval); + upb_descreader *r = _r; + upb_msgdef *m = upb_descreader_top(r); + upb_msgdef_addfield(m, r->f, &r->defs); + r->f = NULL; + return true; +} - // DescriptorProto is self-recursive, so we must link the definition. - upb_mhandlers_newfhandlers_subm( - m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m); +static bool discardfield(void *_r, void *fval) { + UPB_UNUSED(fval); + upb_descreader *r = _r; + // Discard extension field so we don't leak it. + upb_fielddef_unref(r->f, &r->defs); + r->f = NULL; + return true; +} + +static void reghandlers(void *closure, upb_handlers *h) { + UPB_UNUSED(closure); + const upb_msgdef *m = upb_handlers_msgdef(h); + + if (m == GOOGLE_PROTOBUF_DESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &msg_startmsg); + upb_handlers_setendmsg(h, &msg_endmsg); + upb_handlers_setstring_n(h, "name", &msg_onname, NULL, NULL); + upb_handlers_setendsubmsg_n(h, "field", &msg_onendfield, NULL, NULL); + // TODO: support extensions + upb_handlers_setendsubmsg_n(h, "extension", &discardfield, NULL, NULL); + } else if (m == GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &file_startmsg); + upb_handlers_setendmsg(h, &file_endmsg); + upb_handlers_setstring_n(h, "package", &file_onpackage, NULL, NULL); + // TODO: support extensions + upb_handlers_setendsubmsg_n(h, "extension", &discardfield, NULL, NULL); + } else if (m == GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &enumval_startmsg); + upb_handlers_setendmsg(h, &enumval_endmsg); + upb_handlers_setstring_n(h, "name", &enumval_onname, NULL, NULL); + upb_handlers_setint32_n(h, "number", &enumval_onnumber, NULL, NULL); + } else if (m == GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &enum_startmsg); + upb_handlers_setendmsg(h, &enum_endmsg); + upb_handlers_setstring_n(h, "name", &enum_onname, NULL, NULL); + } else if (m == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO) { + upb_handlers_setstartmsg(h, &field_startmsg); + upb_handlers_setendmsg(h, &field_endmsg); + upb_handlers_setint32_n (h, "type", &field_ontype, NULL, NULL); + upb_handlers_setint32_n (h, "label", &field_onlabel, NULL, NULL); + upb_handlers_setint32_n (h, "number", &field_onnumber, NULL, NULL); + upb_handlers_setstring_n(h, "name", &field_onname, NULL, NULL); + upb_handlers_setstring_n(h, "type_name", &field_ontypename, NULL, NULL); + upb_handlers_setstring_n( + h, "default_value", &field_ondefaultval, NULL, NULL); + } +} - // TODO: extensions. - return m; +const upb_handlers *upb_descreader_newhandlers(const void *owner) { + return upb_handlers_newfrozen( + GOOGLE_PROTOBUF_FILEDESCRIPTORSET, owner, reghandlers, NULL); } -#undef FNUM -#undef FTYPE diff --git a/upb/descriptor/reader.h b/upb/descriptor/reader.h index 0e1bfa0..4312682 100644 --- a/upb/descriptor/reader.h +++ b/upb/descriptor/reader.h @@ -67,7 +67,7 @@ void upb_descreader_uninit(upb_descreader *r); // Registers handlers that will build the defs. Pass the descreader as the // closure. -upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h); +const upb_handlers *upb_descreader_newhandlers(const void *owner); // Gets the array of defs that have been parsed and removes them from the // descreader. Ownership of the defs is passed to the caller using the given diff --git a/upb/google/README b/upb/google/README new file mode 100644 index 0000000..a237583 --- /dev/null +++ b/upb/google/README @@ -0,0 +1,16 @@ +This directory contains code to interoperate with Google's official +Protocol Buffers release. Since it doesn't really have a name +besides "protobuf," calling this directory "google" seems like the +least confusing option. + +We support writing into protobuf's generated classes (and hopefully +reading too, before long). We support both the open source protobuf +release and the Google-internal version of the same code. The two +live in different namespaces, and the internal version supports some +features that are not supported in the open-source release. Also, the +internal version includes the legacy "proto1" classes which we must +support; thankfully this is mostly relegated to its own separate file. + +Our functionality requires the full google::protobuf::Message +interface; we rely on reflection so we know what fields to read/write +and where to put them, so we can't support MessageLite. diff --git a/upb/google/bridge.cc b/upb/google/bridge.cc new file mode 100644 index 0000000..4d64ab8 --- /dev/null +++ b/upb/google/bridge.cc @@ -0,0 +1,260 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// IMPORTANT NOTE! This file is compiled TWICE, once with UPB_GOOGLE3 defined +// and once without! This allows us to provide functionality against proto2 +// and protobuf opensource both in a single binary without the two conflicting. +// However we must be careful not to violate the ODR. + +#include "upb/google/bridge.h" + +#include <map> +#include <string> +#include "upb/def.h" +#include "upb/google/proto1.h" +#include "upb/google/proto2.h" +#include "upb/handlers.h" + +namespace upb { +namespace proto2_bridge_google3 { class Defs; } +namespace proto2_bridge_opensource { class Defs; } +} // namespace upb + +#ifdef UPB_GOOGLE3 +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/message.h" +#include "net/proto2/proto/descriptor.pb.h" +namespace goog = ::proto2; +namespace me = ::upb::proto2_bridge_google3; +#else +#include "google/protobuf/descriptor.h" +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.pb.h" +namespace goog = ::google::protobuf; +namespace me = ::upb::proto2_bridge_opensource; +#endif + +class me::Defs { + public: + void OnMessage(Handlers* h) { + const upb::MessageDef* md = h->message_def(); + const goog::Message& m = *message_map_[md]; + const goog::Descriptor* d = m.GetDescriptor(); + for (upb::MessageDef::ConstIterator i(md); !i.Done(); i.Next()) { + const upb::FieldDef* upb_f = i.field(); + const goog::FieldDescriptor* proto2_f = + d->FindFieldByNumber(upb_f->number()); + if (!upb::google::TrySetWriteHandlers(proto2_f, m, upb_f, h) +#ifdef UPB_GOOGLE3 + && !upb::google::TrySetProto1WriteHandlers(proto2_f, m, upb_f, h) +#endif + ) { + // Unsupported reflection class. + // + // Should we fall back to using the public Reflection interface in this + // case? It's unclear whether it's supported behavior for users to + // create their own Reflection classes. + assert(false); + } + } + } + + static void StaticOnMessage(void *closure, upb::Handlers* handlers) { + me::Defs* defs = static_cast<me::Defs*>(closure); + defs->OnMessage(handlers); + } + + void AddSymbol(const std::string& name, upb::Def* def) { + assert(symbol_map_.find(name) == symbol_map_.end()); + symbol_map_[name] = def; + } + + void AddMessage(const goog::Message* m, upb::MessageDef* md) { + assert(message_map_.find(md) == message_map_.end()); + message_map_[md] = m; + AddSymbol(m->GetDescriptor()->full_name(), md->Upcast()); + } + + upb::Def* FindSymbol(const std::string& name) { + SymbolMap::iterator iter = symbol_map_.find(name); + return iter != symbol_map_.end() ? iter->second : NULL; + } + + void Flatten(std::vector<upb::Def*>* defs) { + SymbolMap::iterator iter; + for (iter = symbol_map_.begin(); iter != symbol_map_.end(); ++iter) { + defs->push_back(iter->second); + } + } + + private: + // Maps a new upb::MessageDef* to a corresponding proto2 Message* whose + // derived class is of the correct type according to the message the user + // gave us. + typedef std::map<const upb::MessageDef*, const goog::Message*> MessageMap; + MessageMap message_map_; + + // Maps a type name to a upb Def we have constructed to represent it. + typedef std::map<std::string, upb::Def*> SymbolMap; + SymbolMap symbol_map_; +}; + +namespace upb { +namespace google { + +// For submessage fields, stores a pointer to an instance of the submessage in +// *subm (but it is *not* guaranteed to be a prototype). +FieldDef* AddFieldDef(const goog::Message& m, const goog::FieldDescriptor* f, + upb::MessageDef* md, const goog::Message** subm) { + // To parse weak submessages effectively, we need to represent them in the + // upb::Def schema even though they are not reflected in the proto2 + // descriptors (weak fields are represented as FieldDescriptor::TYPE_BYTES). + const goog::Message* weak_prototype = NULL; +#ifdef UPB_GOOGLE3 + weak_prototype = upb::google::GetProto1WeakPrototype(m, f); +#endif + + upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f); + upb_f->set_number(f->number()); + upb_f->set_name(f->name()); + upb_f->set_label(static_cast<upb::FieldDef::Label>(f->label())); + upb_f->set_type(weak_prototype ? + UPB_TYPE_MESSAGE : static_cast<upb::FieldDef::Type>(f->type())); + + if (weak_prototype) { + upb_f->set_subdef_name(weak_prototype->GetDescriptor()->full_name()); + } else if (upb_f->IsSubMessage()) { + upb_f->set_subdef_name(f->message_type()->full_name()); + } else if (upb_f->type() == UPB_TYPE(ENUM)) { + // We set the enum default numerically. + upb_f->set_default_value( + MakeValue(static_cast<int32_t>(f->default_value_enum()->number()))); + upb_f->set_subdef_name(f->enum_type()->full_name()); + } else { + // Set field default for primitive types. Need to switch on the upb type + // rather than the proto2 type, because upb_f->type() may have been changed + // from BYTES to MESSAGE for a weak field. + switch (upb_types[upb_f->type()].inmemory_type) { + case UPB_CTYPE_INT32: + upb_f->set_default_value(MakeValue(f->default_value_int32())); + break; + case UPB_CTYPE_INT64: + upb_f->set_default_value( + MakeValue(static_cast<int64_t>(f->default_value_int64()))); + break; + case UPB_CTYPE_UINT32: + upb_f->set_default_value(MakeValue(f->default_value_uint32())); + break; + case UPB_CTYPE_UINT64: + upb_f->set_default_value( + MakeValue(static_cast<uint64_t>(f->default_value_uint64()))); + break; + case UPB_CTYPE_DOUBLE: + upb_f->set_default_value(MakeValue(f->default_value_double())); + break; + case UPB_CTYPE_FLOAT: + upb_f->set_default_value(MakeValue(f->default_value_float())); + break; + case UPB_CTYPE_BOOL: + upb_f->set_default_value(MakeValue(f->default_value_bool())); + break; + case UPB_CTYPE_BYTEREGION: + upb_f->set_default_string(f->default_value_string()); + break; + } + } + bool ok = md->AddField(upb_f, &upb_f); + UPB_ASSERT_VAR(ok, ok); + + if (weak_prototype) { + *subm = weak_prototype; + } else if (f->cpp_type() == goog::FieldDescriptor::CPPTYPE_MESSAGE) { + *subm = upb::google::GetFieldPrototype(m, f); +#ifdef UPB_GOOGLE3 + if (!*subm) + *subm = upb::google::GetProto1FieldPrototype(m, f); +#endif + assert(*subm); + } + + return upb_f; +} + +upb::EnumDef* NewEnumDef(const goog::EnumDescriptor* desc, void *owner) { + upb::EnumDef* e = upb::EnumDef::New(owner); + e->set_full_name(desc->full_name()); + for (int i = 0; i < desc->value_count(); i++) { + const goog::EnumValueDescriptor* val = desc->value(i); + bool success = e->AddValue(val->name(), val->number(), NULL); + UPB_ASSERT_VAR(success, success); + } + return e; +} + +static upb::MessageDef* NewMessageDef(const goog::Message& m, void *owner, + me::Defs* defs) { + upb::MessageDef* md = upb::MessageDef::New(owner); + md->set_full_name(m.GetDescriptor()->full_name()); + + // Must do this before processing submessages to prevent infinite recursion. + defs->AddMessage(&m, md); + + const goog::Descriptor* d = m.GetDescriptor(); + for (int i = 0; i < d->field_count(); i++) { + const goog::FieldDescriptor* proto2_f = d->field(i); + +#ifdef UPB_GOOGLE3 + // Skip lazy fields for now since we can't properly handle them. + if (proto2_f->options().lazy()) continue; +#endif + // Extensions not supported yet. + if (proto2_f->is_extension()) continue; + + const goog::Message* subm_prototype; + upb::FieldDef* f = AddFieldDef(m, proto2_f, md, &subm_prototype); + + if (!f->HasSubDef()) continue; + + upb::Def* subdef = defs->FindSymbol(f->subdef_name()); + if (!subdef) { + if (f->type() == UPB_TYPE(ENUM)) { + subdef = NewEnumDef(proto2_f->enum_type(), owner)->Upcast(); + defs->AddSymbol(subdef->full_name(), subdef); + } else { + assert(f->IsSubMessage()); + assert(subm_prototype); + subdef = NewMessageDef(*subm_prototype, owner, defs)->Upcast(); + } + } + f->set_subdef(subdef); + } + + return md; +} + +const upb::Handlers* NewWriteHandlers(const goog::Message& m, void *owner) { + me::Defs defs; + const upb::MessageDef* md = NewMessageDef(m, owner, &defs); + + std::vector<upb::Def*> defs_vec; + defs.Flatten(&defs_vec); + Status status; + bool success = Def::Freeze(defs_vec, &status); + UPB_ASSERT_VAR(success, success); + + const upb::Handlers* ret = + upb::Handlers::NewFrozen(md, owner, me::Defs::StaticOnMessage, &defs); + + // Unref all defs, since they're now ref'd by the handlers. + for (int i = 0; i < static_cast<int>(defs_vec.size()); i++) { + defs_vec[i]->Unref(owner); + } + + return ret; +} + +} // namespace google +} // namespace upb diff --git a/upb/google/bridge.h b/upb/google/bridge.h new file mode 100644 index 0000000..8a2256f --- /dev/null +++ b/upb/google/bridge.h @@ -0,0 +1,76 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// This file contains functionality for constructing upb Defs and Handlers +// corresponding to proto2 messages. Using this functionality, you can use upb +// to dynamically generate parsing code that can behave exactly like proto2's +// generated parsing code. Alternatively, you can configure things to +// read/write only a subset of the fields for higher performance when only some +// fields are needed. +// +// Example usage (FIX XXX): +// +// // Build a def that will have all fields and parse just like proto2 would. +// const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto()); +// +// // JIT the parser; should only be done once ahead-of-time. +// upb::Handlers* handlers = upb::NewHandlersForMessage(md); +// upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers); +// handlers->Unref(); +// +// // The actual parsing. +// MyProto proto; +// upb::Decoder decoder; +// upb::StringSource source(buf, len); +// decoder.ResetPlan(plan, 0); +// decoder.ResetInput(source.AllBytes(), &proto); +// CHECK(decoder.Decode() == UPB_OK) << decoder.status(); +// +// To parse only one field and skip all others: +// +// const upb::MessageDef* md = +// upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype()); +// upb::proto2_bridge::AddFieldDef( +// MyProto::descriptor()->FindFieldByName("my_field"), md); +// upb::Freeze(md); +// +// // Now continue with "JIT the parser" from above. +// +// Note that there is currently no support for +// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate +// DescriptorPool and MessageFactory for extensions. Since this is a property +// of the input in proto2, it's difficult to build a plan ahead-of-time that +// can properly support this. If it's an important use case, the caller should +// probably build a upb plan explicitly. + +#ifndef UPB_GOOGLE_BRIDGE_H_ +#define UPB_GOOGLE_BRIDGE_H_ + +namespace google { +namespace protobuf { class Message; } +} // namespace google + +namespace proto2 { class Message; } + +namespace upb { + +class Handlers; + +namespace google { + +// Returns a upb::Handlers object that can be used to populate a proto2::Message +// object of the same type as "m." +// +// TODO(haberman): Add handler caching functionality so that we don't use +// O(n^2) memory in the worst case when incrementally building handlers. +const upb::Handlers* NewWriteHandlers(const proto2::Message& m, void *owner); +const upb::Handlers* NewWriteHandlers(const ::google::protobuf::Message& m, + void *owner); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_BRIDGE_H_ diff --git a/upb/google/cord.h b/upb/google/cord.h new file mode 100644 index 0000000..c579c0c --- /dev/null +++ b/upb/google/cord.h @@ -0,0 +1,48 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// Functionality for interoperating with Cord. Only needed inside Google. + +#ifndef UPB_GOOGLE_CORD_H +#define UPB_GOOGLE_CORD_H + +#include "strings/cord.h" +#include "upb/bytestream.h" + +namespace upb { + +namespace proto2_bridge_google3 { class FieldAccessor; } +namespace proto2_bridge_opensource { class FieldAccessor; } + +namespace google { + +class P2R_Handlers; + +class CordSupport { + private: + UPB_DISALLOW_POD_OPS(CordSupport); + + inline static void AssignToCord(const upb::ByteRegion* r, Cord* cord) { + // TODO(haberman): ref source data if source is a cord. + cord->Clear(); + uint64_t ofs = r->start_ofs(); + while (ofs < r->end_ofs()) { + size_t len; + const char *buf = r->GetPtr(ofs, &len); + cord->Append(StringPiece(buf, len)); + ofs += len; + } + } + + friend class ::upb::proto2_bridge_google3::FieldAccessor; + friend class ::upb::proto2_bridge_opensource::FieldAccessor; + friend class P2R_Handlers; +}; + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_CORD_H diff --git a/upb/google/proto1.cc b/upb/google/proto1.cc new file mode 100644 index 0000000..bb9ff75 --- /dev/null +++ b/upb/google/proto1.cc @@ -0,0 +1,502 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// This set of handlers can write into a proto2::Message whose reflection class +// is _pi::Proto2Reflection (ie. proto1 messages; while slightly confusing, the +// name "Proto2Reflection" indicates that it is a reflection class implementing +// the proto2 reflection interface, but is used for proto1 generated messages). +// +// Like FieldAccessor this depends on breaking encapsulation, and will need to +// be changed if and when the details of _pi::Proto2Reflection change. +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ + +#include "upb/google/proto1.h" + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "net/proto2/public/repeated_field.h" +#undef private + +// TODO(haberman): friend upb so that this isn't required. +#define private public +#include "net/proto/proto2_reflection.h" +#undef private + +#include "net/proto/internal_layout.h" +#include "upb/bytestream.h" +#include "upb/def.h" +#include "upb/google/cord.h" +#include "upb/handlers.h" + +template<class T> static T* GetPointer(void *message, size_t offset) { + return reinterpret_cast<T*>(static_cast<char*>(message) + offset); +} + +namespace upb { +namespace google { + +class P2R_Handlers { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + const proto2::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast<const _pi::Proto2Reflection*>(base_r); + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + switch (r->GetFieldLayout(proto2_f)->crep) { +#define PRIMITIVE(name, type_name) \ + case _pi::CREP_REQUIRED_ ## name: \ + case _pi::CREP_OPTIONAL_ ## name: \ + case _pi::CREP_REPEATED_ ## name: \ + SetPrimitiveHandlers<type_name>(proto2_f, r, upb_f, h); return true; + PRIMITIVE(DOUBLE, double); + PRIMITIVE(FLOAT, float); + PRIMITIVE(INT64, int64_t); + PRIMITIVE(UINT64, uint64_t); + PRIMITIVE(INT32, int32_t); + PRIMITIVE(FIXED64, uint64_t); + PRIMITIVE(FIXED32, uint32_t); + PRIMITIVE(BOOL, bool); +#undef PRIMITIVE + case _pi::CREP_REQUIRED_STRING: + case _pi::CREP_OPTIONAL_STRING: + case _pi::CREP_REPEATED_STRING: + SetStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: + SetOutOfLineStringHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_CORD: + case _pi::CREP_OPTIONAL_CORD: + case _pi::CREP_REPEATED_CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case _pi::CREP_REQUIRED_GROUP: + case _pi::CREP_REQUIRED_FOREIGN: + case _pi::CREP_REQUIRED_FOREIGN_PROTO2: + SetRequiredMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_GROUP: + case _pi::CREP_REPEATED_GROUP: + case _pi::CREP_OPTIONAL_FOREIGN: + case _pi::CREP_REPEATED_FOREIGN: + case _pi::CREP_OPTIONAL_FOREIGN_PROTO2: + case _pi::CREP_REPEATED_FOREIGN_PROTO2: + SetMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + case _pi::CREP_OPTIONAL_FOREIGN_WEAK: + case _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2: + SetWeakMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + default: assert(false); return false; + } + } + + // If the field "f" in the message "m" is a weak field, returns the prototype + // of the submessage (which may be a specific type or may be OpaqueMessage). + // Otherwise returns NULL. + static const proto2::Message* GetWeakPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const _pi::Proto2Reflection* r = + dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection()); + if (!r) return NULL; + + const _pi::Field* field = r->GetFieldLayout(f); + if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) { + return static_cast<const proto2::Message*>( + field->weak_layout()->default_instance); + } else if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2) { + return field->proto2_weak_default_instance(); + } else { + return NULL; + } + } + + // If "m" is a message that uses Proto2Reflection, returns the prototype of + // the submessage (which may be OpaqueMessage for a weak field that is not + // linked in). Otherwise returns NULL. + static const proto2::Message* GetFieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + // See file comment re: dynamic_cast. + const proto2::Message* ret = GetWeakPrototype(m, f); + if (ret) { + return ret; + } else if (dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection())) { + // Since proto1 has no dynamic message, it must be from the generated + // factory. + assert(f->cpp_type() == proto2::FieldDescriptor::CPPTYPE_MESSAGE); + ret = proto2::MessageFactory::generated_factory()->GetPrototype( + f->message_type()); + assert(ret); + return ret; + } else { + return NULL; + } + } + + private: + class FieldOffset { + public: + FieldOffset( + const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : offset_(GetOffset(f, r)), + is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template<class T> T* GetFieldPointer(void* message) const { + return GetPointer<T>(message, offset_); + } + + void SetHasbit(void* message) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer<uint8_t>(message, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + + static int16_t GetHasbit(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + assert(!f->is_repeated()); + return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index; + } + + static uint16_t GetOffset(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + return r->GetFieldLayout(f)->offset; + } + + // StartSequence ///////////////////////////////////////////////////////////// + + static void SetStartSequenceHandler( + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(f->IsSequence()); + h->SetStartSequenceHandler( + f, &PushOffset, new FieldOffset(proto2_f, r), + &upb::DeletePointer<FieldOffset>); + } + + static void* PushOffset(void *m, void *fval) { + const FieldOffset* offset = static_cast<FieldOffset*>(fval); + return offset->GetFieldPointer<void>(m); + } + + // Primitive Value (numeric, enum, bool) ///////////////////////////////////// + + template <typename T> static void SetPrimitiveHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetValueHandler<T>(f, &Append<T>, NULL, NULL); + } else { + upb::SetStoreValueHandler<T>( + f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h); + } + } + + template <typename T> + static bool Append(void *_r, void *fval, T val) { + UPB_UNUSED(fval); + // Proto1's ProtoArray class derives from proto2::RepeatedField. + proto2::RepeatedField<T>* r = static_cast<proto2::RepeatedField<T>*>(_r); + r->Add(val); + return true; + } + + // String //////////////////////////////////////////////////////////////////// + + static void SetStringHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnStringBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedString, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartString, new FieldOffset(proto2_f, r), + &upb::DeletePointer<FieldOffset>); + } + } + + static void* StartString(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const FieldOffset* info = static_cast<const FieldOffset*>(fval); + info->SetHasbit(m); + string* str = info->GetFieldPointer<string>(m); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + static size_t OnStringBuf(void *_s, void *fval, const char *buf, size_t n) { + string* s = static_cast<string*>(_s); + s->append(buf, n); + return n; + } + + static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(fval); + proto2::RepeatedPtrField<string>* r = + static_cast<proto2::RepeatedPtrField<string>*>(_r); + string* str = r->Add(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // Out-of-line string //////////////////////////////////////////////////////// + + static void SetOutOfLineStringHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + // This type is only used for non-repeated string fields. + assert(!f->IsSequence()); + h->SetStartStringHandler( + f, &StartOutOfLineString, new FieldOffset(proto2_f, r), + &upb::DeletePointer<FieldOffset>); + h->SetStringHandler(f, &OnStringBuf, NULL, NULL); + } + + static void* StartOutOfLineString(void *m, void *fval, size_t size_hint) { + const FieldOffset* info = static_cast<const FieldOffset*>(fval); + info->SetHasbit(m); + string **str = info->GetFieldPointer<string*>(m); + if (*str == &::ProtocolMessage::___empty_internal_proto_string_) + *str = new string(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers( + const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnCordBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartCord, new FieldOffset(proto2_f, r), + &upb::DeletePointer<FieldOffset*>); + } + } + + static void* StartCord(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + const FieldOffset* offset = static_cast<const FieldOffset*>(fval); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer<Cord>(m); + field->Clear(); + return field; + } + + static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); + Cord* c = static_cast<Cord*>(_c); + c->Append(StringPiece(buf, n)); + return true; + } + + static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + proto2::RepeatedField<Cord>* r = + static_cast<proto2::RepeatedField<Cord>*>(_r); + return r->Add(); + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData( + const proto2::Message& prototype, + const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : FieldOffset(f, r) { + prototype_ = GetWeakPrototype(prototype, f); + if (!prototype_) + prototype_ = GetFieldPrototype(prototype, f); + } + + const proto2::Message* prototype() const { return prototype_; } + + private: + const proto2::Message* prototype_; + }; + + static void SetStartSubMessageHandler( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + upb::Handlers::StartFieldHandler* handler, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStartSubMessageHandler( + f, handler, + new SubMessageHandlerData(m, proto2_f, r), + &upb::DeletePointer<SubMessageHandlerData>); + } + + static void SetRequiredMessageHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); + } else { + h->SetStartSubMessageHandler( + f, &StartRequiredSubMessage, new FieldOffset(proto2_f, r), + &upb::DeletePointer<FieldOffset>); + } + } + + static void* StartRequiredSubMessage(void *m, void *fval) { + const FieldOffset* offset = static_cast<FieldOffset*>(fval); + offset->SetHasbit(m); + return offset->GetFieldPointer<void>(m); + } + + static void SetMessageHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); + } else { + SetStartSubMessageHandler(proto2_f, m, r, &StartSubMessage, f, h); + } + } + + static void SetWeakMessageHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); + } else { + SetStartSubMessageHandler(proto2_f, m, r, &StartWeakSubMessage, f, h); + } + } + + static void* StartSubMessage(void *m, void *fval) { + const SubMessageHandlerData* info = + static_cast<const SubMessageHandlerData*>(fval); + info->SetHasbit(m); + proto2::Message **subm = info->GetFieldPointer<proto2::Message*>(m); + if (*subm == info->prototype()) *subm = (*subm)->New(); + return *subm; + } + + static void* StartWeakSubMessage(void *m, void *fval) { + const SubMessageHandlerData* info = + static_cast<const SubMessageHandlerData*>(fval); + info->SetHasbit(m); + proto2::Message **subm = info->GetFieldPointer<proto2::Message*>(m); + if (*subm == NULL) { + *subm = info->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through + // its base class RepeatedPtrFieldBase*. + static void* StartRepeatedSubMessage(void* _r, void *fval) { + const SubMessageHandlerData* info = + static_cast<const SubMessageHandlerData*>(fval); + proto2::internal::RepeatedPtrFieldBase *r = + static_cast<proto2::internal::RepeatedPtrFieldBase*>(_r); + void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>(); + if (!submsg) { + submsg = info->prototype()->New(); + r->AddAllocated<RepeatedMessageTypeHandler>(submsg); + } + return submsg; + } +}; + +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return P2R_Handlers::TrySet(proto2_f, m, upb_f, h); +} + +const proto2::Message* GetProto1WeakPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetWeakPrototype(m, f); +} + +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f) { + return P2R_Handlers::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/google/proto1.h b/upb/google/proto1.h new file mode 100644 index 0000000..f35fb13 --- /dev/null +++ b/upb/google/proto1.h @@ -0,0 +1,53 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// Support for registering field handlers that can write into a legacy proto1 +// message. This functionality is only needed inside Google. +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO1_H_ +#define UPB_GOOGLE_PROTO1_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetProto1WeakPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f); + +// Returns a prototype for the given non-weak field in "m". +const proto2::Message* GetProto1FieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO1_H_ diff --git a/upb/google/proto2.cc b/upb/google/proto2.cc new file mode 100644 index 0000000..264530c --- /dev/null +++ b/upb/google/proto2.cc @@ -0,0 +1,632 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// Note that we have received an exception from c-style-artiters regarding +// dynamic_cast<> in this file: +// https://groups.google.com/a/google.com/d/msg/c-style/7Zp_XCX0e7s/I6dpzno4l-MJ +// +// IMPORTANT NOTE! This file is compiled TWICE, once with UPB_GOOGLE3 defined +// and once without! This allows us to provide functionality against proto2 +// and protobuf opensource both in a single binary without the two conflicting. +// However we must be careful not to violate the ODR. + +#include "upb/google/proto2.h" + +#include "upb/google/proto1.h" +#include "upb/bytestream.h" +#include "upb/def.h" +#include "upb/handlers.h" + +namespace upb { +namespace proto2_bridge_google3 { class FieldAccessor; } +namespace proto2_bridge_opensource { class FieldAccessor; } +} // namespace upb + +// BEGIN DOUBLE COMPILATION TRICKERY. ////////////////////////////////////////// + +#ifdef UPB_GOOGLE3 + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "net/proto2/public/repeated_field.h" +#undef protected + +#define private public +#include "net/proto2/public/generated_message_reflection.h" +#undef private + +#include "net/proto2/proto/descriptor.pb.h" +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/lazy_field.h" +#include "net/proto2/public/message.h" +#include "net/proto2/public/string_piece_field_support.h" +#include "upb/google/cord.h" + +namespace goog = ::proto2; +namespace me = ::upb::proto2_bridge_google3; + +#else + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "google/protobuf/repeated_field.h" +#undef protected + +#define private public +#include "google/protobuf/generated_message_reflection.h" +#undef private + +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/message.h" + +namespace goog = ::google::protobuf; +namespace me = ::upb::proto2_bridge_opensource; + +#endif // ifdef UPB_GOOGLE3 + +// END DOUBLE COMPILATION TRICKERY. //////////////////////////////////////////// + +// Have to define this manually since older versions of proto2 didn't define +// an enum value for STRING. +#define UPB_CTYPE_STRING 0 + +template<class T> static T* GetPointer(void *message, size_t offset) { + return reinterpret_cast<T*>(static_cast<char*>(message) + offset); +} + +// This class contains handlers that can write into a proto2 class whose +// reflection class is GeneratedMessageReflection. (Despite the name, even +// DynamicMessage uses GeneratedMessageReflection, so this covers all proto2 +// messages generated by the compiler.) To do this it must break the +// encapsulation of GeneratedMessageReflection and therefore depends on +// internal interfaces that are not guaranteed to be stable. This class will +// need to be updated if any non-backward-compatible changes are made to +// GeneratedMessageReflection. +// +// TODO(haberman): change class name? In retrospect, "FieldAccessor" isn't the +// best (something more specific like GeneratedMessageReflectionHandlers or +// GMR_Handlers would be better) but we're depending on a "friend" declaration +// in proto2 that already specifies "FieldAccessor." No versions of proto2 have +// been released that include the "friend FieldAccessor" declaration, so there's +// still time to change this. On the other hand, perhaps it's simpler to just +// rely on "#define private public" since it may be a long time before new +// versions of proto2 open source are pervasive enough that we can remove this +// anyway. +class me::FieldAccessor { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const upb::FieldDef* upb_f, upb::Handlers* h) { + const goog::Reflection* base_r = m.GetReflection(); + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast<const goog::internal::GeneratedMessageReflection*>(base_r); + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + switch (proto2_f->cpp_type()) { +#define PRIMITIVE_TYPE(cpptype, cident) \ + case goog::FieldDescriptor::cpptype: \ + SetPrimitiveHandlers<cident>(proto2_f, r, upb_f, h); return true; + PRIMITIVE_TYPE(CPPTYPE_INT32, int32_t); + PRIMITIVE_TYPE(CPPTYPE_INT64, int64_t); + PRIMITIVE_TYPE(CPPTYPE_UINT32, uint32_t); + PRIMITIVE_TYPE(CPPTYPE_UINT64, uint64_t); + PRIMITIVE_TYPE(CPPTYPE_DOUBLE, double); + PRIMITIVE_TYPE(CPPTYPE_FLOAT, float); + PRIMITIVE_TYPE(CPPTYPE_BOOL, bool); +#undef PRIMITIVE_TYPE + case goog::FieldDescriptor::CPPTYPE_ENUM: + SetEnumHandlers(proto2_f, r, upb_f, h); + return true; + case goog::FieldDescriptor::CPPTYPE_STRING: { + // Old versions of the open-source protobuf release erroneously default + // to Cord even though that has never been supported in the open-source + // release. + int32_t ctype = proto2_f->options().has_ctype() ? + proto2_f->options().ctype() : UPB_CTYPE_STRING; + switch (ctype) { +#ifdef UPB_GOOGLE3 + case goog::FieldOptions::STRING: + SetStringHandlers<string>(proto2_f, m, r, upb_f, h); + return true; + case goog::FieldOptions::CORD: + SetCordHandlers(proto2_f, r, upb_f, h); + return true; + case goog::FieldOptions::STRING_PIECE: + SetStringPieceHandlers(proto2_f, r, upb_f, h); + return true; +#else + case UPB_CTYPE_STRING: + SetStringHandlers<std::string>(proto2_f, m, r, upb_f, h); + return true; +#endif + default: + return false; + } + } + case goog::FieldDescriptor::CPPTYPE_MESSAGE: +#ifdef UPB_GOOGLE3 + if (proto2_f->options().lazy()) { + return false; // Not yet implemented. + } else { + SetSubMessageHandlers(proto2_f, m, r, upb_f, h); + return true; + } +#else + SetSubMessageHandlers(proto2_f, m, r, upb_f, h); + return true; +#endif + default: + return false; + } + } + + static const goog::Message* GetFieldPrototype( + const goog::Message& m, + const goog::FieldDescriptor* f) { + // We assume that all submessages (and extensions) will be constructed + // using the same MessageFactory as this message. This doesn't cover the + // case of CodedInputStream::SetExtensionRegistry(). + // See file comment re: dynamic_cast. + const goog::internal::GeneratedMessageReflection* r = + dynamic_cast<const goog::internal::GeneratedMessageReflection*>( + m.GetReflection()); + if (!r) return NULL; + return r->message_factory_->GetPrototype(f->message_type()); + } + + private: + static upb_selector_t GetSelector(const upb::FieldDef* f, + upb::Handlers::Type type) { + upb::Handlers::Selector selector; + bool ok = upb::Handlers::GetSelector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; + } + + static int64_t GetHasbit( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + // proto2 does not store hasbits for repeated fields. + assert(!f->is_repeated()); + return (r->has_bits_offset_ * 8) + f->index(); + } + + static uint16_t GetOffset( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) { + return r->offsets_[f->index()]; + } + + class FieldOffset { + public: + FieldOffset( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) + : offset_(GetOffset(f, r)), + is_repeated_(f->is_repeated()) { + if (!is_repeated_) { + int64_t hasbit = GetHasbit(f, r); + hasbyte_ = hasbit / 8; + mask_ = 1 << (hasbit % 8); + } + } + + template<class T> T* GetFieldPointer(void *message) const { + return GetPointer<T>(message, offset_); + } + + void SetHasbit(void* m) const { + assert(!is_repeated_); + uint8_t* byte = GetPointer<uint8_t>(m, hasbyte_); + *byte |= mask_; + } + + private: + const size_t offset_; + bool is_repeated_; + + // Only for non-repeated fields. + int32_t hasbyte_; + int8_t mask_; + }; + + // StartSequence ///////////////////////////////////////////////////////////// + + static void SetStartSequenceHandler( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(f->IsSequence()); + h->SetStartSequenceHandler( + f, &PushOffset, new FieldOffset(proto2_f, r), + &upb::DeletePointer<FieldOffset>); + } + + static void* PushOffset(void *m, void *fval) { + const FieldOffset* offset = static_cast<FieldOffset*>(fval); + return offset->GetFieldPointer<void>(m); + } + + // Primitive Value (numeric, bool) /////////////////////////////////////////// + + template <typename T> static void SetPrimitiveHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetValueHandler<T>(f, &AppendPrimitive<T>, NULL, NULL); + } else { + upb::SetStoreValueHandler<T>( + f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h); + } + } + + template <typename T> + static bool AppendPrimitive(void *_r, void *fval, T val) { + UPB_UNUSED(fval); + goog::RepeatedField<T>* r = static_cast<goog::RepeatedField<T>*>(_r); + r->Add(val); + return true; + } + + // Enum ////////////////////////////////////////////////////////////////////// + + class EnumHandlerData : public FieldOffset { + public: + EnumHandlerData( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f) + : FieldOffset(proto2_f, r), + field_number_(f->number()), + unknown_fields_offset_(r->unknown_fields_offset_), + enum_(upb_downcast_enumdef(f->subdef())) { + } + + bool IsValidValue(int32_t val) const { + return enum_->FindValueByNumber(val) != NULL; + } + + int32_t field_number() const { return field_number_; } + + goog::UnknownFieldSet* mutable_unknown_fields(goog::Message* m) const { + return GetPointer<goog::UnknownFieldSet>(m, unknown_fields_offset_); + } + + private: + int32_t field_number_; + size_t unknown_fields_offset_; + const upb::EnumDef* enum_; + }; + + static void SetEnumHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + EnumHandlerData* data = new EnumHandlerData(proto2_f, r, f); + if (f->IsSequence()) { + h->SetInt32Handler( + f, &AppendEnum, data, &upb::DeletePointer<EnumHandlerData>); + } else { + h->SetInt32Handler( + f, &SetEnum, data, &upb::DeletePointer<EnumHandlerData>); + } + } + + static bool SetEnum(void *_m, void *fval, int32_t val) { + goog::Message* m = static_cast<goog::Message*>(_m); + const EnumHandlerData* data = static_cast<const EnumHandlerData*>(fval); + if (data->IsValidValue(val)) { + int32_t* message_val = data->GetFieldPointer<int32_t>(m); + *message_val = val; + data->SetHasbit(m); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + return true; + } + + static bool AppendEnum(void *_m, void *fval, int32_t val) { + // Closure is the enclosing message. We can't use the RepeatedField<> as + // the closure because we need to go back to the message for unrecognized + // enum values, which go into the unknown field set. + goog::Message* m = static_cast<goog::Message*>(_m); + const EnumHandlerData* data = static_cast<const EnumHandlerData*>(fval); + if (data->IsValidValue(val)) { + goog::RepeatedField<int32_t>* r = + data->GetFieldPointer<goog::RepeatedField<int32_t> >(m); + r->Add(val); + } else { + data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); + } + return true; + } + + // String //////////////////////////////////////////////////////////////////// + + // For scalar (non-repeated) string fields. + template<class T> + class StringHandlerData : public FieldOffset { + public: + StringHandlerData(const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message& prototype) + : FieldOffset(proto2_f, r) { + // "prototype" isn't guaranteed to be empty, so we create a copy to get + // the default string instance. + goog::Message* empty = prototype.New(); + prototype_ = &r->GetStringReference(*empty, proto2_f, NULL); + delete empty; + } + + const T* prototype() const { return prototype_; } + + T** GetStringPointer(void *message) const { + return GetFieldPointer<T*>(message); + } + + private: + const T* prototype_; + }; + + template <typename T> static void SetStringHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + h->SetStringHandler(f, &OnStringBuf<T>, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedString<T>, NULL, NULL); + } else { + StringHandlerData<T>* data = new StringHandlerData<T>(proto2_f, r, m); + h->SetStartStringHandler( + f, &StartString<T>, data, &upb::DeletePointer<StringHandlerData<T> >); + } + } + + // This needs to be templated because google3 string is not std::string. + template <typename T> static void* StartString( + void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const StringHandlerData<T>* data = + static_cast<const StringHandlerData<T>*>(fval); + T** str = data->GetStringPointer(m); + data->SetHasbit(m); + // If it points to the default instance, we must create a new instance. + if (*str == data->prototype()) *str = new T(); + (*str)->clear(); + // reserve() here appears to hurt performance rather than help. + return *str; + } + + template <typename T> static size_t OnStringBuf( + void *_str, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); + T* str = static_cast<T*>(_str); + str->append(buf, n); + return n; + } + + + template <typename T> + static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + goog::RepeatedPtrField<T>* r = static_cast<goog::RepeatedPtrField<T>*>(_r); + T* str = r->Add(); + str->clear(); + // reserve() here appears to hurt performance rather than help. + return str; + } + + // SubMessage //////////////////////////////////////////////////////////////// + + class SubMessageHandlerData : public FieldOffset { + public: + SubMessageHandlerData( + const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message* prototype) + : FieldOffset(f, r), + prototype_(prototype) { + } + + const goog::Message* prototype() const { return prototype_; } + + private: + const goog::Message* const prototype_; + }; + + static void SetSubMessageHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + SubMessageHandlerData* data = + new SubMessageHandlerData(proto2_f, r, GetFieldPrototype(m, proto2_f)); + upb::Handlers::Free* free = &upb::DeletePointer<SubMessageHandlerData>; + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartSubMessageHandler(f, &StartRepeatedSubMessage, data, free); + } else { + h->SetStartSubMessageHandler(f, &StartSubMessage, data, free); + } + } + + static void* StartSubMessage(void *m, void *fval) { + const SubMessageHandlerData* data = + static_cast<const SubMessageHandlerData*>(fval); + data->SetHasbit(m); + goog::Message **subm = data->GetFieldPointer<goog::Message*>(m); + if (*subm == NULL || *subm == data->prototype()) { + *subm = data->prototype()->New(); + } + return *subm; + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through + // its base class RepeatedPtrFieldBase*. + static void* StartRepeatedSubMessage(void* _r, void *fval) { + const SubMessageHandlerData* data = + static_cast<const SubMessageHandlerData*>(fval); + goog::internal::RepeatedPtrFieldBase *r = + static_cast<goog::internal::RepeatedPtrFieldBase*>(_r); + void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>(); + if (!submsg) { + submsg = data->prototype()->New(); + r->AddAllocated<RepeatedMessageTypeHandler>(submsg); + } + return submsg; + } + + // TODO(haberman): handle Extensions, Unknown Fields. + +#ifdef UPB_GOOGLE3 + // Handlers for types/features only included in internal proto2 release: + // Cord, StringPiece, LazyField, and MessageSet. + // TODO(haberman): LazyField, MessageSet. + + // Cord ////////////////////////////////////////////////////////////////////// + + static void SetCordHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnCordBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartCord, new FieldOffset(proto2_f, r), + &upb::DeletePointer<FieldOffset*>); + } + } + + static void* StartCord(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const FieldOffset* offset = static_cast<const FieldOffset*>(fval); + offset->SetHasbit(m); + Cord* field = offset->GetFieldPointer<Cord>(m); + field->Clear(); + return field; + } + + static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) { + UPB_UNUSED(fval); + Cord* c = static_cast<Cord*>(_c); + c->Append(StringPiece(buf, n)); + return n; + } + + static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + proto2::RepeatedField<Cord>* r = + static_cast<proto2::RepeatedField<Cord>*>(_r); + return r->Add(); + } + + // StringPiece /////////////////////////////////////////////////////////////// + + static void SetStringPieceHandlers( + const proto2::FieldDescriptor* proto2_f, + const proto2::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + h->SetStringHandler(f, &OnStringPieceBuf, NULL, NULL); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetStartStringHandler(f, &StartRepeatedStringPiece, NULL, NULL); + } else { + h->SetStartStringHandler( + f, &StartStringPiece, new FieldOffset(proto2_f, r), + &upb::DeletePointer<FieldOffset*>); + } + } + + static size_t OnStringPieceBuf(void *_f, void *fval, + const char *buf, size_t len) { + UPB_UNUSED(fval); + // TODO(haberman): alias if possible and enabled on the input stream. + // TODO(haberman): add a method to StringPieceField that lets us avoid + // this copy/malloc/free. + proto2::internal::StringPieceField* field = + static_cast<proto2::internal::StringPieceField*>(_f); + size_t new_len = field->size() + len; + char *data = new char[new_len]; + memcpy(data, field->data(), field->size()); + memcpy(data + field->size(), buf, len); + field->CopyFrom(StringPiece(data, new_len)); + delete[] data; + return len; + } + + static void* StartStringPiece(void *m, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + const FieldOffset* offset = static_cast<const FieldOffset*>(fval); + offset->SetHasbit(m); + proto2::internal::StringPieceField* field = + offset->GetFieldPointer<proto2::internal::StringPieceField>(m); + field->Clear(); + return field; + } + + static void* StartRepeatedStringPiece(void* _r, void *fval, + size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); + typedef proto2::RepeatedPtrField<proto2::internal::StringPieceField> + RepeatedStringPiece; + RepeatedStringPiece* r = static_cast<RepeatedStringPiece*>(_r); + proto2::internal::StringPieceField* field = r->Add(); + field->Clear(); + return field; + } + +#endif // UPB_GOOGLE3 +}; + +namespace upb { +namespace google { + +bool TrySetWriteHandlers(const goog::FieldDescriptor* proto2_f, + const goog::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h) { + return me::FieldAccessor::TrySet(proto2_f, prototype, upb_f, h); +} + +const goog::Message* GetFieldPrototype( + const goog::Message& m, + const goog::FieldDescriptor* f) { + return me::FieldAccessor::GetFieldPrototype(m, f); +} + +} // namespace google +} // namespace upb diff --git a/upb/google/proto2.h b/upb/google/proto2.h new file mode 100644 index 0000000..f2662ea --- /dev/null +++ b/upb/google/proto2.h @@ -0,0 +1,62 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman <jhaberman@gmail.com> +// +// Support for registering field handlers that can write into a proto2 +// message that uses GeneratedMessageReflection (which includes all messages +// generated by the proto2 compiler as well as DynamicMessage). +// +// This is a low-level interface; the high-level interface in google.h is +// more user-friendly. + +#ifndef UPB_GOOGLE_PROTO2_H_ +#define UPB_GOOGLE_PROTO2_H_ + +namespace proto2 { +class FieldDescriptor; +class Message; +} + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Message; +} +} + +namespace upb { +class FieldDef; +class Handlers; +} + +namespace upb { +namespace google { + +// Sets field handlers in the given Handlers object for writing to a single +// field (as described by "proto2_f" and "upb_f") into a message constructed +// by the same factory as "prototype." Returns true if this was successful +// (this will fail if "prototype" is not a proto1 message, or if we can't +// handle it for some reason). +bool TrySetWriteHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); +bool TrySetWriteHandlers(const ::google::protobuf::FieldDescriptor* proto2_f, + const ::google::protobuf::Message& prototype, + const upb::FieldDef* upb_f, upb::Handlers* h); + +// Returns a prototype for the given field in "m", if it is weak. The returned +// message could be the linked-in message type or OpaqueMessage, if the weak +// message is *not* linked in. Otherwise returns NULL. +const proto2::Message* GetFieldPrototype( + const proto2::Message& m, + const proto2::FieldDescriptor* f); +const ::google::protobuf::Message* GetFieldPrototype( + const ::google::protobuf::Message& m, + const ::google::protobuf::FieldDescriptor* f); + +} // namespace google +} // namespace upb + +#endif // UPB_GOOGLE_PROTO2_H_ diff --git a/upb/handlers.c b/upb/handlers.c index 8350f64..8263c9a 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -1,292 +1,385 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Copyright (c) 2011-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> */ -#include <stdlib.h> #include "upb/handlers.h" +#include <stdlib.h> +#include <string.h> + +// Defined for the sole purpose of having a unique pointer value for +// UPB_NO_CLOSURE. +char _upb_noclosure; + +typedef struct { + upb_func *handler; + + // Could put either or both of these in a separate table to save memory when + // they are sparse. + void *data; + upb_handlerfree *cleanup; + + // TODO(haberman): this is wasteful; only the first "fieldhandler" of a + // submessage field needs this. To reduce memory footprint we should either: + // - put the subhandlers in a separate "fieldhandler", stored as part of + // a union with one of the above fields. + // - count selector offsets by individual pointers instead of by whole + // fieldhandlers. + const upb_handlers *subhandlers; +} fieldhandler; + +static const fieldhandler *getfh( + const upb_handlers *h, upb_selector_t selector) { + assert(selector < upb_handlers_msgdef(h)->selector_count); + fieldhandler* fhbase = (void*)&h->fh_base; + return &fhbase[selector]; +} -/* upb_mhandlers **************************************************************/ +static fieldhandler *getfh_mutable(upb_handlers *h, upb_selector_t selector) { + return (fieldhandler*)getfh(h, selector); +} -static upb_mhandlers *upb_mhandlers_new(void) { - upb_mhandlers *m = malloc(sizeof(*m)); - upb_inttable_init(&m->fieldtab); - m->startmsg = NULL; - m->endmsg = NULL; - m->is_group = false; -#ifdef UPB_USE_JIT_X64 - m->tablearray = NULL; -#endif - return m; +bool upb_handlers_isfrozen(const upb_handlers *h) { + return upb_refcounted_isfrozen(upb_upcast(h)); } -static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, - bool repeated) { - const upb_value *v = upb_inttable_lookup(&m->fieldtab, n); - // TODO: design/refine the API for changing the set of fields or modifying - // existing handlers. - if (v) return NULL; - upb_fhandlers new_f = {type, repeated, 0, - n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL, -#ifdef UPB_USE_JIT_X64 - 0, 0, 0, -#endif - }; - upb_fhandlers *ptr = malloc(sizeof(*ptr)); - memcpy(ptr, &new_f, sizeof(upb_fhandlers)); - upb_inttable_insert(&m->fieldtab, n, upb_value_ptr(ptr)); - return ptr; +uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { + return upb_fielddef_isseq(f) ? 2 : 0; } -upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, bool repeated) { - assert(type != UPB_TYPE(MESSAGE)); - assert(type != UPB_TYPE(GROUP)); - return _upb_mhandlers_newfhandlers(m, n, type, repeated); +uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { + uint32_t ret = 1; + if (upb_fielddef_isstring(f)) ret += 2; // STARTSTR/ENDSTR + if (upb_fielddef_isseq(f)) ret += 2; // STARTSEQ/ENDSEQ + if (upb_fielddef_issubmsg(f)) ret += 2; // STARTSUBMSG/ENDSUBMSG + return ret; } -upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, - bool repeated, - upb_mhandlers *subm) { - assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)); - assert(subm); - upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated); - if (!f) return NULL; - f->submsg = subm; - if (type == UPB_TYPE(GROUP)) - _upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false); - return f; +upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_SINT32: + case UPB_TYPE_SFIXED32: + case UPB_TYPE_ENUM: + return UPB_HANDLER_INT32; + case UPB_TYPE_INT64: + case UPB_TYPE_SINT64: + case UPB_TYPE_SFIXED64: + return UPB_HANDLER_INT64; + case UPB_TYPE_UINT32: + case UPB_TYPE_FIXED32: + return UPB_HANDLER_UINT32; + case UPB_TYPE_UINT64: + case UPB_TYPE_FIXED64: + return UPB_HANDLER_UINT64; + case UPB_TYPE_FLOAT: + return UPB_HANDLER_FLOAT; + case UPB_TYPE_DOUBLE: + return UPB_HANDLER_DOUBLE; + case UPB_TYPE_BOOL: + return UPB_HANDLER_BOOL; + default: assert(false); return -1; // Invalid input. + } } -upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n) { - const upb_value *v = upb_inttable_lookup(&m->fieldtab, n); - return v ? upb_value_getptr(*v) : NULL; +bool upb_getselector( + const upb_fielddef *f, upb_handlertype_t type, upb_selector_t *s) { + // If the type checks in this function are a hot-spot, we can introduce a + // separate function that calculates the selector assuming that the type + // is correct (may even want to make it inline for the upb_sink fast-path. + switch (type) { + case UPB_HANDLER_INT32: + case UPB_HANDLER_INT64: + case UPB_HANDLER_UINT32: + case UPB_HANDLER_UINT64: + case UPB_HANDLER_FLOAT: + case UPB_HANDLER_DOUBLE: + case UPB_HANDLER_BOOL: + if (!upb_fielddef_isprimitive(f) || + upb_handlers_getprimitivehandlertype(f) != type) + return false; + *s = f->selector_base; + break; + case UPB_HANDLER_STARTSTR: + if (!upb_fielddef_isstring(f)) return false; + *s = f->selector_base; + break; + case UPB_HANDLER_STRING: + if (!upb_fielddef_isstring(f)) return false; + *s = f->selector_base + 1; + break; + case UPB_HANDLER_ENDSTR: + if (!upb_fielddef_isstring(f)) return false; + *s = f->selector_base + 2; + break; + case UPB_HANDLER_STARTSEQ: + if (!upb_fielddef_isseq(f)) return false; + *s = f->selector_base - 2; + break; + case UPB_HANDLER_ENDSEQ: + if (!upb_fielddef_isseq(f)) return false; + *s = f->selector_base - 1; + break; + case UPB_HANDLER_STARTSUBMSG: + if (!upb_fielddef_issubmsg(f)) return false; + *s = f->selector_base + 1; + break; + case UPB_HANDLER_ENDSUBMSG: + if (!upb_fielddef_issubmsg(f)) return false; + *s = f->selector_base + 2; + break; + } + assert(*s < upb_fielddef_msgdef(f)->selector_count); + return true; } +void upb_handlers_ref(const upb_handlers *h, const void *owner) { + upb_refcounted_ref(upb_upcast(h), owner); +} -/* upb_handlers ***************************************************************/ +void upb_handlers_unref(const upb_handlers *h, const void *owner) { + upb_refcounted_unref(upb_upcast(h), owner); +} -upb_handlers *upb_handlers_new() { - upb_handlers *h = malloc(sizeof(*h)); - h->refcount = 1; - h->msgs_len = 0; - h->msgs_size = 4; - h->msgs = malloc(h->msgs_size * sizeof(*h->msgs)); - h->should_jit = true; - return h; +void upb_handlers_donateref( + const upb_handlers *h, const void *from, const void *to) { + upb_refcounted_donateref(upb_upcast(h), from, to); } -void upb_handlers_ref(upb_handlers *h) { h->refcount++; } - -void upb_handlers_unref(upb_handlers *h) { - if (--h->refcount == 0) { - for (int i = 0; i < h->msgs_len; i++) { - upb_mhandlers *mh = h->msgs[i]; - upb_inttable_iter j; - upb_inttable_begin(&j, &mh->fieldtab); - for(; !upb_inttable_done(&j); upb_inttable_next(&j)) { - free(upb_value_getptr(upb_inttable_iter_value(&j))); - } - upb_inttable_uninit(&mh->fieldtab); -#ifdef UPB_USE_JIT_X64 - free(mh->tablearray); -#endif - free(mh); - } - free(h->msgs); - free(h); - } +void upb_handlers_checkref(const upb_handlers *h, const void *owner) { + upb_refcounted_checkref(upb_upcast(h), owner); +} + +static void do_cleanup(upb_handlers* h, const upb_fielddef *f, + upb_handlertype_t type) { + upb_selector_t selector; + if (!upb_getselector(f, type, &selector)) return; + fieldhandler *fh = getfh_mutable(h, selector); + if (fh->cleanup) fh->cleanup(fh->data); + fh->cleanup = NULL; + fh->data = NULL; } -upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) { - if (h->msgs_len == h->msgs_size) { - h->msgs_size *= 2; - h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs)); +static void freehandlers(upb_refcounted *r) { + upb_handlers *h = (upb_handlers*)r; + upb_msg_iter i; + for(upb_msg_begin(&i, h->msg); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + for (upb_handlertype_t type = 0; type < UPB_HANDLER_MAX; type++) + do_cleanup(h, f, type); } - upb_mhandlers *mh = upb_mhandlers_new(); - h->msgs[h->msgs_len++] = mh; - return mh; + upb_msgdef_unref(h->msg, h); + free(h); } -static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, const upb_msgdef *m, - upb_onmsgreg *msgreg_cb, - upb_onfieldreg *fieldreg_cb, - void *closure, upb_strtable *mtab) { - upb_mhandlers *mh = upb_handlers_newmhandlers(h); - upb_strtable_insert(mtab, upb_def_fullname(UPB_UPCAST(m)), upb_value_ptr(mh)); - if (msgreg_cb) msgreg_cb(closure, mh, m); +static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_handlers *h = (const upb_handlers*)r; upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + for(upb_msg_begin(&i, h->msg); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); - upb_fhandlers *fh; - if (upb_issubmsg(f)) { - upb_mhandlers *sub_mh; - const upb_value *subm_ent; - // The table lookup is necessary to break the DFS for type cycles. - const char *subname = upb_def_fullname(upb_fielddef_subdef(f)); - if ((subm_ent = upb_strtable_lookup(mtab, subname)) != NULL) { - sub_mh = upb_value_getptr(*subm_ent); - } else { - sub_mh = upb_regmsg_dfs( - h, upb_downcast_msgdef_const(upb_fielddef_subdef(f)), - msgreg_cb, fieldreg_cb, closure, mtab); - } - fh = upb_mhandlers_newfhandlers_subm( - mh, f->number, f->type, upb_isseq(f), sub_mh); - } else { - fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isseq(f)); - } - if (fieldreg_cb) fieldreg_cb(closure, fh, f); + if (!upb_fielddef_issubmsg(f)) continue; + const upb_handlers *sub = upb_handlers_getsubhandlers(h, f); + if (sub) visit(r, upb_upcast(sub), closure); } - return mh; } -upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m, - upb_onmsgreg *msgreg_cb, - upb_onfieldreg *fieldreg_cb, - void *closure) { - upb_strtable mtab; - upb_strtable_init(&mtab); - upb_mhandlers *ret = - upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab); - upb_strtable_uninit(&mtab); - return ret; +upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) { + assert(upb_msgdef_isfrozen(md)); + static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers}; + size_t fhandlers_size = sizeof(fieldhandler) * md->selector_count; + upb_handlers *h = calloc(sizeof(*h) - sizeof(void*) + fhandlers_size, 1); + if (!h) return NULL; + h->msg = md; + upb_msgdef_ref(h->msg, h); + if (!upb_refcounted_init(upb_upcast(h), &vtbl, owner)) goto oom; + + // calloc() above initialized all handlers to NULL. + return h; + +oom: + freehandlers(upb_upcast(h)); + return NULL; } +bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) { + // TODO: verify we have a transitive closure. + return upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s); +} + +const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; } -/* upb_dispatcher *************************************************************/ - -void upb_dispatcher_init(upb_dispatcher *d, upb_status *status, - upb_exit_handler UPB_NORETURN *exit, - void *srcclosure) { - d->stack[0].f = NULL; // Should never be read. - d->limit = &d->stack[UPB_MAX_NESTING]; - d->exitjmp = exit; - d->srcclosure = srcclosure; - d->top_is_implicit = false; - d->msgent = NULL; - d->top = NULL; - d->toplevel_msgent = NULL; - d->status = status; +void upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handler *handler) { + assert(!upb_handlers_isfrozen(h)); + h->startmsg = handler; } -upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure, - upb_mhandlers *top) { - d->msgent = top; - d->toplevel_msgent = top; - d->top = d->stack; - d->top->closure = closure; - d->top->is_sequence = false; - d->top->is_packed = false; - return d->top; +upb_startmsg_handler *upb_handlers_getstartmsg(const upb_handlers *h) { + return h->startmsg; } -void upb_dispatcher_uninit(upb_dispatcher *d) { - (void)d; +void upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handler *handler) { + assert(!upb_handlers_isfrozen(h)); + h->endmsg = handler; } -void upb_dispatch_startmsg(upb_dispatcher *d) { - upb_flow_t flow = UPB_CONTINUE; - if (d->msgent->startmsg) d->msgent->startmsg(d->top->closure); - if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d); +upb_endmsg_handler *upb_handlers_getendmsg(const upb_handlers *h) { + return h->endmsg; } -void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) { - assert(d->top == d->stack); - if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status); - // TODO: should we avoid this copy by passing client's status obj to cbs? - upb_status_copy(status, d->status); +// For now we stuff the subhandlers pointer into the fieldhandlers* +// corresponding to the UPB_HANDLER_STARTSUBMSG handler. +static const upb_handlers **subhandlersptr(upb_handlers *h, + const upb_fielddef *f) { + assert(upb_fielddef_issubmsg(f)); + upb_selector_t selector; + bool ok = upb_getselector(f, UPB_HANDLER_STARTSUBMSG, &selector); + UPB_ASSERT_VAR(ok, ok); + return &getfh_mutable(h, selector)->subhandlers; } -upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, - upb_fhandlers *f) { - if (d->top + 1 >= d->limit) { - upb_status_seterrliteral(d->status, "Nesting too deep."); - _upb_dispatcher_abortjmp(d); +bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f, + const upb_handlers *sub) { + assert(!upb_handlers_isfrozen(h)); + if (!upb_fielddef_issubmsg(f)) return false; + if (sub != NULL && + upb_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) { + return false; } + const upb_handlers **stored = subhandlersptr(h, f); + const upb_handlers *old = *stored; + if (old) upb_unref2(old, h); + *stored = sub; + if (sub) upb_ref2(sub, h); + return true; +} - upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure); - if (f->startseq) sflow = f->startseq(d->top->closure, f->fval); - _upb_dispatcher_sethas(d->top->closure, f->hasbit); - if (sflow.flow != UPB_CONTINUE) { - _upb_dispatcher_abortjmp(d); - } +const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, + const upb_fielddef *f) { + const upb_handlers **stored = subhandlersptr((upb_handlers*)h, f); + return *stored; +} - ++d->top; - d->top->f = f; - d->top->is_sequence = true; - d->top->is_packed = false; - d->top->closure = sflow.closure; - return d->top; +#define SETTER(name, handlerctype, handlertype) \ + bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \ + handlerctype val, void *data, \ + upb_handlerfree *cleanup) { \ + assert(!upb_handlers_isfrozen(h)); \ + if (upb_handlers_msgdef(h) != upb_fielddef_msgdef(f)) return false; \ + upb_selector_t selector; \ + bool ok = upb_getselector(f, handlertype, &selector); \ + if (!ok) return false; \ + do_cleanup(h, f, handlertype); \ + fieldhandler *fh = getfh_mutable(h, selector); \ + fh->handler = (upb_func*)val; \ + fh->data = (upb_func*)data; \ + fh->cleanup = (upb_func*)cleanup; \ + return true; \ + } \ + +SETTER(int32, upb_int32_handler*, UPB_HANDLER_INT32); +SETTER(int64, upb_int64_handler*, UPB_HANDLER_INT64); +SETTER(uint32, upb_uint32_handler*, UPB_HANDLER_UINT32); +SETTER(uint64, upb_uint64_handler*, UPB_HANDLER_UINT64); +SETTER(float, upb_float_handler*, UPB_HANDLER_FLOAT); +SETTER(double, upb_double_handler*, UPB_HANDLER_DOUBLE); +SETTER(bool, upb_bool_handler*, UPB_HANDLER_BOOL); +SETTER(startstr, upb_startstr_handler*, UPB_HANDLER_STARTSTR); +SETTER(string, upb_string_handler*, UPB_HANDLER_STRING); +SETTER(endstr, upb_endfield_handler*, UPB_HANDLER_ENDSTR); +SETTER(startseq, upb_startfield_handler*, UPB_HANDLER_STARTSEQ); +SETTER(startsubmsg, upb_startfield_handler*, UPB_HANDLER_STARTSUBMSG); +SETTER(endsubmsg, upb_endfield_handler*, UPB_HANDLER_ENDSUBMSG); +SETTER(endseq, upb_endfield_handler*, UPB_HANDLER_ENDSEQ); +#undef SETTER + +upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s) { + return getfh(h, s)->handler; } -upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) { - assert(d->top > d->stack); - assert(d->top->is_sequence); - upb_fhandlers *f = d->top->f; - --d->top; - upb_flow_t flow = UPB_CONTINUE; - if (f->endseq) flow = f->endseq(d->top->closure, f->fval); - if (flow != UPB_CONTINUE) { - _upb_dispatcher_abortjmp(d); - } - d->msgent = d->top->f ? d->top->f->submsg : d->toplevel_msgent; - return d->top; +void *upb_handlers_gethandlerdata(const upb_handlers *h, upb_selector_t s) { + return getfh(h, s)->data; } -upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, - upb_fhandlers *f) { - if (d->top + 1 >= d->limit) { - upb_status_seterrliteral(d->status, "Nesting too deep."); - _upb_dispatcher_abortjmp(d); - } +typedef struct { + upb_inttable tab; // maps upb_msgdef* -> upb_handlers*. + upb_handlers_callback *callback; + void *closure; +} dfs_state; - upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure); - if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval); - _upb_dispatcher_sethas(d->top->closure, f->hasbit); - if (sflow.flow != UPB_CONTINUE) { - _upb_dispatcher_abortjmp(d); - } +static upb_handlers *newformsg(const upb_msgdef *m, const void *owner, + dfs_state *s) { + upb_handlers *h = upb_handlers_new(m, owner); + if (!h) return NULL; + if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom; - ++d->top; - d->top->f = f; - d->top->is_sequence = false; - d->top->is_packed = false; - d->top->closure = sflow.closure; - d->msgent = f->submsg; - upb_dispatch_startmsg(d); - return d->top; -} + s->callback(s->closure, h); -upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) { - assert(d->top > d->stack); - assert(!d->top->is_sequence); - upb_fhandlers *f = d->top->f; - if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status); - d->msgent = d->top->f->msg; - --d->top; - upb_flow_t flow = UPB_CONTINUE; - if (f->endsubmsg) f->endsubmsg(d->top->closure, f->fval); - if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d); - return d->top; -} + // For each submessage field, get or create a handlers object and set it as + // the subhandlers. + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (!upb_fielddef_issubmsg(f)) continue; -bool upb_dispatcher_stackempty(upb_dispatcher *d) { - return d->top == d->stack; -} -bool upb_dispatcher_islegalend(upb_dispatcher *d) { - if (d->top == d->stack) return true; - if (d->top - 1 == d->stack && - d->top->is_sequence && !d->top->is_packed) return true; - return false; + const upb_msgdef *subdef = upb_downcast_msgdef(upb_fielddef_subdef(f)); + const upb_value *subm_ent = upb_inttable_lookupptr(&s->tab, subdef); + if (subm_ent) { + upb_handlers_setsubhandlers(h, f, upb_value_getptr(*subm_ent)); + } else { + upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s); + if (!sub_mh) goto oom; + upb_handlers_setsubhandlers(h, f, sub_mh); + upb_handlers_unref(sub_mh, &sub_mh); + } + } + return h; + +oom: + upb_handlers_unref(h, owner); + return NULL; } -void _upb_dispatcher_abortjmp(upb_dispatcher *d) { - d->exitjmp(d->srcclosure); - assert(false); // Never returns. +const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, + const void *owner, + upb_handlers_callback *callback, + void *closure) { + dfs_state state; + state.callback = callback; + state.closure = closure; + if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL; + + upb_handlers *ret = newformsg(m, owner, &state); + if (!ret) return NULL; + upb_refcounted *r = upb_upcast(ret); + upb_status status = UPB_STATUS_INIT; + bool ok = upb_refcounted_freeze(&r, 1, &status); + UPB_ASSERT_VAR(ok, ok); + upb_status_uninit(&status); + + upb_inttable_uninit(&state.tab); + return ret; } + +#define STDMSG_WRITER(type, ctype) \ + bool upb_stdmsg_set ## type (void *_m, void *fval, ctype val) { \ + assert(_m != NULL); \ + const upb_stdmsg_fval *f = fval; \ + uint8_t *m = _m; \ + if (f->hasbit > 0) \ + *(uint8_t*)&m[f->hasbit / 8] |= 1 << (f->hasbit % 8); \ + *(ctype*)&m[f->offset] = val; \ + return true; \ + } \ + +STDMSG_WRITER(double, double) +STDMSG_WRITER(float, float) +STDMSG_WRITER(int32, int32_t) +STDMSG_WRITER(int64, int64_t) +STDMSG_WRITER(uint32, uint32_t) +STDMSG_WRITER(uint64, uint64_t) +STDMSG_WRITER(bool, bool) +#undef STDMSG_WRITER diff --git a/upb/handlers.h b/upb/handlers.h index 6d8f9f2..094702e 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -1,399 +1,689 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. + * Copyright (c) 2010-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * upb_handlers is a generic visitor-like interface for iterating over a stream - * of protobuf data. You can register function pointers that will be called - * for each message and/or field as the data is being parsed or iterated over, - * without having to know the source format that we are parsing from. This - * decouples the parsing logic from the processing logic. + * A upb_handlers is like a virtual table for a upb_msgdef. Each field of the + * message can have associated functions that will be called when we are + * parsing or visiting a stream of data. This is similar to how handlers work + * in SAX (the Simple API for XML). * - * TODO: should we allow handlers to longjmp()? Would be necessary to eg. let - * a Lua handler "yield" from the current coroutine. I *think* everything - * would "just work" with our current decoder. + * The handlers have no idea where the data is coming from, so a single set of + * handlers could be used with two completely different data sources (for + * example, a parser and a visitor over in-memory objects). This decoupling is + * the most important feature of upb, because it allows parsers and serializers + * to be highly reusable. + * + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. */ #ifndef UPB_HANDLERS_H #define UPB_HANDLERS_H -#include "upb/upb.h" #include "upb/def.h" -#include "upb/bytestream.h" #ifdef __cplusplus -extern "C" { +namespace upb { class Handlers; } +typedef upb::Handlers upb_handlers; +#else +struct upb_handlers; +typedef struct upb_handlers upb_handlers; #endif -/* Handlers protocol definition ***********************************************/ - -// A upb_handlers object represents a graph of handlers. Each message can have -// a set of handlers as well as a set of fields which themselves have handlers. -// Fields that represent submessages or groups are linked to other message -// handlers, so the overall set of handlers can form a graph structure (which -// may be cyclic). -// -// The upb_mhandlers (message handlers) object can have the following handlers: -// -// static upb_flow_t startmsg(void *closure) { -// // Called when the message begins. "closure" was supplied by our caller. -// return UPB_CONTINUE; -// } -// -// static void endmsg(void *closure, upb_status *status) { -// // Called when processing of this message ends, whether in success or -// // failure. "status" indicates the final status of processing, and can -// / also be modified in-place to update the final status. -// // -// // Since this callback is guaranteed to always be called eventually, it -// // can be used to free any resources that were allocated during processing. -// } -// -// TODO: unknown field handler. -// -// The upb_fhandlers (field handlers) object can have the following handlers: -// -// static upb_flow_t value(void *closure, upb_value fval, upb_value val) { -// // Called when the field's value is encountered. "fval" contains -// // whatever value was bound to this field at registration type -// // (for upb_register_all(), this will be the field's upb_fielddef*). -// return UPB_CONTINUE; -// } -// -// static upb_sflow_t startsubmsg(void *closure, upb_value fval) { -// // Called when a submessage begins. The second element of the return -// // value is the closure for the submessage. -// return UPB_CONTINUE_WITH(closure); -// } -// -// static upb_flow_t endsubmsg(void *closure, upb_value fval) { -// // Called when a submessage ends. -// return UPB_CONTINUE; -// } -// -// static upb_sflow_t startseq(void *closure, upb_value fval) { -// // Called when a sequence (repeated field) begins. The second element -// // of the return value is the closure for the sequence. -// return UPB_CONTINUE_WITH(closure); -// } -// -// static upb_flow_t endseq(void *closure, upb_value fval) { -// // Called when a sequence ends. -// return UPB_CONTINUE; -// } -// -// All handlers except the endmsg handler return a value from this enum, to -// control whether parsing will continue or not. +// All the different types of handlers that can be registered. +// Only needed for the advanced functions in upb::Handlers. typedef enum { - // Data source should continue calling callbacks. - UPB_CONTINUE = 0, + UPB_HANDLER_INT32, + UPB_HANDLER_INT64, + UPB_HANDLER_UINT32, + UPB_HANDLER_UINT64, + UPB_HANDLER_FLOAT, + UPB_HANDLER_DOUBLE, + UPB_HANDLER_BOOL, + UPB_HANDLER_STARTSTR, + UPB_HANDLER_STRING, + UPB_HANDLER_ENDSTR, + UPB_HANDLER_STARTSUBMSG, + UPB_HANDLER_ENDSUBMSG, + UPB_HANDLER_STARTSEQ, + UPB_HANDLER_ENDSEQ, +} upb_handlertype_t; + +#define UPB_HANDLER_MAX (UPB_HANDLER_ENDSEQ+1) + +#define UPB_BREAK NULL + +// A convenient definition for when no closure is needed. +extern char _upb_noclosure; +#define UPB_NO_CLOSURE &_upb_noclosure + +// A selector refers to a specific field handler in the Handlers object +// (for example: the STARTSUBMSG handler for field "field15"). +typedef uint32_t upb_selector_t; - // Halt processing permanently (in a non-resumable way). The endmsg handlers - // for any currently open messages will be called which can supply a more - // specific status message. No further input data will be consumed. - UPB_BREAK = -1, +#ifdef __cplusplus - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). In other words, it is - // like a UPB_BREAK that applies only to the current level. +// A upb::Handlers object represents the set of handlers associated with a +// message in the graph of messages. You can think of it as a big virtual +// table with functions corresponding to all the events that can fire while +// parsing or visiting a message of a specific type. +// +// Any handlers that are not set behave as if they had successfully consumed +// the value. For start* handlers that return a void* closure, an unset handler +// will propagate the existing closure. +class upb::Handlers { + public: + typedef upb_selector_t Selector; + typedef upb_handlertype_t Type; + + typedef bool StartMessageHandler(void* closure); + typedef void EndMessageHandler(void* closure, Status* status); + typedef void* StartFieldHandler(void* closure, void* data); + typedef bool EndFieldHandler(void *closure, void *data); + typedef void* StartStringHandler(void *c, void *d, size_t size_hint); + typedef size_t StringHandler(void *c, void *d, const char *buf, size_t len); + + template <class T> struct Value { + typedef bool Handler(void* closure, void* data, T val); + }; + + typedef Value<int32_t>::Handler Int32Handler; + typedef Value<int64_t>::Handler Int64Handler; + typedef Value<uint32_t>::Handler Uint32Handler; + typedef Value<uint64_t>::Handler Uint64Handler; + typedef Value<float>::Handler FloatHandler; + typedef Value<double>::Handler DoubleHandler; + typedef Value<bool>::Handler BoolHandler; + + // Any function pointer can be converted to this and converted back to its + // correct type. + typedef void GenericFunction(); + + // For freeing handler data. + typedef void Free(void *data); + + typedef void HandlersCallback(void *closure, upb_handlers *h); + + // Returns a new handlers object for the given frozen msgdef. A single ref + // will belong to the given owner. + // Returns NULL if memory allocation failed. + static Handlers* New(const MessageDef* m, const void *owner); + + // Convenience function for registering a graph of handlers that mirrors the + // graph of msgdefs for some message. For "m" and all its children a new set + // of handlers will be created and the given callback will be invoked, + // allowing the client to register handlers for this message. Note that any + // subhandlers set by the callback will be overwritten. + static const Handlers* NewFrozen(const MessageDef *m, const void *owner, + HandlersCallback *callback, void *closure); + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Freezes the given set of handlers. You may not freeze a handler without + // also freezing any handlers they point to. In the future we may want to + // require that all fields of the submessage have had subhandlers set for + // them. + static bool Freeze(Handlers*const* handlers, int n, Status* s); + + // Returns the msgdef associated with this handlers object. + const MessageDef* message_def() const; + + // Sets the startmsg handler for the message, which is defined as follows: + // + // bool startmsg(void *closure) { + // // Called when the message begins. Returns true if processing should + // // continue. + // return true; + // } + void SetStartMessageHandler(StartMessageHandler *handler); + StartMessageHandler *GetStartMessageHandler() const; + + // Sets the endmsg handler for the message, which is defined as follows: + // + // void endmsg(void *closure, upb_status *status) { + // // Called when processing of this message ends, whether in success or + // // failure. "status" indicates the final status of processing, and + // // can also be modified in-place to update the final status. + // } + void SetEndMessageHandler(EndMessageHandler *handler); + EndMessageHandler *GetEndMessageHandler() const; + + // Sets the value handler for the given field, which is defined as follows + // (this is for an int32 field; other field types will pass their native + // C/C++ type for "val"): + // + // bool value(void *closure, void *d, int32_t val) { + // // Called when the field's value is encountered. "d" contains + // // whatever data was bound to this field when it was registered. + // // Returns true if processing should continue. + // return true; + // } + // + // The value type must exactly match f->type(). + // For example, SetInt32Handler() may only be used for fields of type + // UPB_TYPE_INT32, UPB_TYPE_SINT32, UPB_TYPE_SFIXED32, and UPB_TYPE_ENUM. + // + // "d" is the data that will be bound to this callback and passed to it. + // If "fr" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or has the wrong + // type for this handler. + bool SetInt32Handler (const FieldDef* f, Int32Handler* h, void* d, Free* fr); + bool SetInt64Handler (const FieldDef* f, Int64Handler* h, void* d, Free* fr); + bool SetUint32Handler(const FieldDef* f, Uint32Handler* h, void* d, Free* fr); + bool SetUint64Handler(const FieldDef* f, Uint64Handler* h, void* d, Free* fr); + bool SetFloatHandler (const FieldDef* f, FloatHandler* h, void* d, Free* fr); + bool SetDoubleHandler(const FieldDef* f, DoubleHandler* h, void* d, Free* fr); + bool SetBoolHandler (const FieldDef* f, BoolHandler* h, void* d, Free* fr); + + // Sets handlers for a string field, which are defined as follows: + // + // void* startstr(void *closure, void *data, size_t size_hint) { + // // Called when a string value begins. The return value indicates the + // // closure for the string. "size_hint" indicates the size of the + // // string if it is known, however if the string is length-delimited + // // and the end-of-string is not available size_hint will be zero. + // // This case is indistinguishable from the case where the size is + // // known to be zero. + // // + // // TODO(haberman): is it important to distinguish these cases? + // // If we had ssize_t as a type we could make -1 "unknown", but + // // ssize_t is POSIX (not ANSI) and therefore less portable. + // // In practice I suspect it won't be important to distinguish. + // return closure; + // } // - // If you UPB_SKIPSUBMSG from a startmsg handler, the endmsg handler will - // be called to perform cleanup and return a status. Returning - // UPB_SKIPSUBMSG from a startsubmsg handler will *not* call the startmsg, - // endmsg, or endsubmsg handlers. + // size_t str(void *closure, void *data, const char *str, size_t len) { + // // Called for each buffer of string data; the multiple physical buffers + // // are all part of the same logical string. The return value indicates + // // how many bytes were consumed. If this number is less than "len", + // // this will also indicate that processing should be halted for now, + // // like returning false or UPB_BREAK from any other callback. If + // // number is greater than "len", the excess bytes will be skipped over + // // and not passed to the callback. + // return len; + // } // - // If UPB_SKIPSUBMSG is called from the top-level message, no further input - // data will be consumed. - UPB_SKIPSUBMSG = -2, + // bool endstr(void *closure, void *data) { + // // Called when a string value ends. + // return true; + // } + bool SetStartStringHandler(const FieldDef* f, StartStringHandler* h, + void* d, Free* fr); + bool SetStringHandler(const FieldDef* f, StringHandler* h, void* d, Free* fr); + bool SetEndStringHandler(const FieldDef* f, EndFieldHandler* h, + void* d, Free* fr); + + // A setter that is templated on the type of the value. + template<class T> bool SetValueHandler( + const FieldDef* f, typename Value<T>::Handler* h, void* d, Free* fr); + + // Sets the startseq handler, which is defined as follows: + // + // void *startseq(void *closure, void *data) { + // // Called when a sequence (repeated field) begins. The returned + // // pointer indicates the closure for the sequence (or UPB_BREAK + // // to interrupt processing). + // return closure; + // } + // + // Returns "false" if "f" does not belong to this message or is not a + // repeated field. + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + bool SetStartSequenceHandler(const FieldDef* f, StartFieldHandler *handler, + void* data, Free* cleanup); - // TODO: Add UPB_SUSPEND, for resumable producers/consumers. -} upb_flow_t; + // Sets the startsubmsg handler for the given field, which is defined as + // follows: + // + // void *startsubmsg(void *closure, void *data) { + // // Called when a submessage begins. The returned pointer indicates the + // // closure for the sequence (or UPB_BREAK to interrupt processing). + // return closure; + // } + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or is not a + // submessage/group field. + bool SetStartSubMessageHandler(const FieldDef* f, StartFieldHandler *handler, + void* data, Free* cleanup); -// The startsubmsg handler needs to also pass a closure to the submsg. -typedef struct { - upb_flow_t flow; - void *closure; -} upb_sflow_t; + // Sets the endsubmsg handler for the given field, which is defined as + // follows: + // + // bool endsubmsg(void *closure, void *data) { + // // Called when a submessage ends. Returns true to continue processing. + // return true; + // } + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or is not a + // submessage/group field. + bool SetEndSubMessageHandler(const FieldDef* f, EndFieldHandler *handler, + void* data, Free* cleanup); -INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) { - upb_sflow_t ret = {flow, closure}; - return ret; -} -#define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c) -#define UPB_SBREAK UPB_SFLOW(UPB_BREAK, NULL) + // Starts the endsubseq handler for the given field, which is defined as + // follows: + // + // bool endseq(void *closure, void *data) { + // // Called when a sequence ends. Returns true continue processing. + // return true; + // } + // + // "data" is the data that will be bound to this callback and passed to it. + // If "cleanup" is non-NULL it will be run when the data is no longer needed. + // + // Returns "false" if "f" does not belong to this message or is not a + // repeated field. + bool SetEndSequenceHandler(const FieldDef* f, EndFieldHandler *handler, + void* data, Free* cleanup); + + // Sets or gets the object that specifies handlers for the given field, which + // must be a submessage or group. Returns NULL if no handlers are set. + bool SetSubHandlers(const FieldDef* f, const Handlers* sub); + const Handlers* GetSubHandlers(const FieldDef* f) const; + + // NOTE: The remaining functions in this class are mostly of interest to + // byte-code/JIT compilers (or upb internals); most users will not need them. + // These functions also require more care, since passing a selector that + // does not match the type of these handlers yields undefined behavior. + + // A selector refers to a specific field handler in the Handlers object + // (for example: the STARTSUBMSG handler for field "field15"). + // On success, returns true and stores the selector in "s". + // If the FieldDef or Type are invalid, returns false. + // The returned selector is ONLY valid for Handlers whose MessageDef + // contains this FieldDef. + static bool GetSelector(const FieldDef* f, Type type, Selector* s); + + // Returns the function pointer for this handler. It is the client's + // responsibility to cast to the correct function type before calling it. + GenericFunction* GetHandler(Selector selector); + + // Returns the handler data that was registered with this handler. + void* GetHandlerData(Selector selector); + + // Gets the byte offset from a Handlers* where the given handler can be found. + // Useful for JITs that want to read the pointer in their fast path. + static size_t GetHandlerOffset(Selector selector); + + // Could add any of the following functions as-needed, with some minor + // implementation changes: + // + // const FieldDef* GetFieldDef(Selector selector); + // static bool IsSequence(Selector selector); + // Selector GetEndSelector(Selector start_selector); -// Typedefs for all of the handler functions defined above. -typedef upb_flow_t (upb_startmsg_handler)(void *c); -typedef void (upb_endmsg_handler)(void *c, upb_status *status); -typedef upb_flow_t (upb_value_handler)(void *c, upb_value fval, upb_value val); -typedef upb_sflow_t (upb_startfield_handler)(void *closure, upb_value fval); -typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval); + private: + UPB_DISALLOW_POD_OPS(Handlers); +#else +struct upb_handlers { +#endif + upb_refcounted base; + const upb_msgdef *msg; + bool (*startmsg)(void*); + void (*endmsg)(void*, upb_status*); + void *fh_base[1]; // Start of dynamically-sized field handler array. +}; -/* upb_fhandlers **************************************************************/ +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif +typedef bool upb_startmsg_handler(void *c); +typedef void upb_endmsg_handler(void *c, upb_status *status); +typedef void* upb_startfield_handler(void *closure, void *d); +typedef bool upb_endfield_handler(void *closure, void *d); +typedef void upb_handlers_callback(void *closure, upb_handlers *h); +typedef void upb_handlerfree(void *d); +typedef void upb_func(); + +typedef bool upb_int32_handler(void *c, void *d, int32_t val); +typedef bool upb_int64_handler(void *c, void *d, int64_t val); +typedef bool upb_uint32_handler(void *c, void *d, uint32_t val); +typedef bool upb_uint64_handler(void *c, void *d, uint64_t val); +typedef bool upb_float_handler(void *c, void *d, float val); +typedef bool upb_double_handler(void *c, void *d, double val); +typedef bool upb_bool_handler(void *c, void *d, bool val); +typedef void* upb_startstr_handler(void *closure, void *d, size_t size_hint); +typedef size_t upb_string_handler(void *c, void *d, const char *buf, size_t n); + +upb_handlers *upb_handlers_new(const upb_msgdef *m, const void *owner); +const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, + const void *owner, + upb_handlers_callback *callback, + void *closure); + +// From upb_refcounted. +void upb_handlers_unref(const upb_handlers *h, const void *owner); +bool upb_handlers_isfrozen(const upb_handlers *h); +void upb_handlers_ref(const upb_handlers *h, const void *owner); +void upb_handlers_donateref( + const upb_handlers *h, const void *from, const void *to); +void upb_handlers_checkref(const upb_handlers *h, const void *owner); + +bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s); +const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h); +void upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handler *handler); +upb_startmsg_handler *upb_handlers_getstartmsg(const upb_handlers *h); +void upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handler *handler); +upb_endmsg_handler *upb_handlers_getendmsg(const upb_handlers *h); +bool upb_handlers_setint32( + upb_handlers *h, const upb_fielddef *f, upb_int32_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setint64( + upb_handlers *h, const upb_fielddef *f, upb_int64_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setuint32( + upb_handlers *h, const upb_fielddef *f, upb_uint32_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setuint64( + upb_handlers *h, const upb_fielddef *f, upb_uint64_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setfloat( + upb_handlers *h, const upb_fielddef *f, upb_float_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setdouble( + upb_handlers *h, const upb_fielddef *f, upb_double_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setbool( + upb_handlers *h, const upb_fielddef *f, upb_bool_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstartstr( + upb_handlers *h, const upb_fielddef *f, upb_startstr_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstring( + upb_handlers *h, const upb_fielddef *f, upb_string_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setendstr( + upb_handlers *h, const upb_fielddef *f, upb_endfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstartseq( + upb_handlers *h, const upb_fielddef *f, upb_startfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setstartsubmsg( + upb_handlers *h, const upb_fielddef *f, upb_startfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setendsubmsg( + upb_handlers *h, const upb_fielddef *f, upb_endfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setendseq( + upb_handlers *h, const upb_fielddef *f, upb_endfield_handler *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setsubhandlers( + upb_handlers *h, const upb_fielddef *f, const upb_handlers *sub); +const upb_handlers *upb_handlers_getsubhandlers( + const upb_handlers *h, const upb_fielddef *f); +upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f); +bool upb_getselector( + const upb_fielddef *f, upb_handlertype_t type, upb_selector_t *s); +upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s); +void *upb_handlers_gethandlerdata(const upb_handlers *h, upb_selector_t s); +size_t upb_gethandleroffset(upb_selector_t s); + +// Internal-only. +uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f); +uint32_t upb_handlers_selectorcount(const upb_fielddef *f); +#ifdef __cplusplus +} // extern "C" +#endif -// A upb_fhandlers object represents the set of handlers associated with one -// specific message field. +// Convenience versions of the above that first look up the field by name. +#define DEFINE_NAME_SETTER(slot, type) \ + INLINE void upb_handlers_set ## slot ## _n( \ + upb_handlers *h, const char *name, type val, \ + void *d, upb_handlerfree *fr) { \ + upb_handlers_set ## slot(h, upb_msgdef_ntof( \ + upb_handlers_msgdef(h), name), val, d, fr); \ + } +DEFINE_NAME_SETTER(int32, upb_int32_handler*); +DEFINE_NAME_SETTER(int64, upb_int64_handler*); +DEFINE_NAME_SETTER(uint32, upb_uint32_handler*); +DEFINE_NAME_SETTER(uint64, upb_uint64_handler*); +DEFINE_NAME_SETTER(float, upb_float_handler*); +DEFINE_NAME_SETTER(double, upb_double_handler*); +DEFINE_NAME_SETTER(bool, upb_bool_handler*); +DEFINE_NAME_SETTER(startstr, upb_startstr_handler*); +DEFINE_NAME_SETTER(string, upb_string_handler*); +DEFINE_NAME_SETTER(endstr, upb_endfield_handler*); +DEFINE_NAME_SETTER(startseq, upb_startfield_handler*); +DEFINE_NAME_SETTER(startsubmsg, upb_startfield_handler*); +DEFINE_NAME_SETTER(endsubmsg, upb_endfield_handler*); +DEFINE_NAME_SETTER(endseq, upb_endfield_handler*); +#undef DEFINE_NAME_SETTER + +// Value writers for every in-memory type: write the data to a known offset +// from the closure "c." These depend on the fval being a pointer to a +// structure that is (or begins with) the upb_stdmsg_fval type. // -// TODO: remove upb_decoder-specific fields from this, and instead have -// upb_decoderplan make a deep copy of the whole graph with its own fields -// added. -struct _upb_decoder; -struct _upb_mhandlers; -typedef struct _upb_fieldent { - upb_fieldtype_t type; - bool repeated; - uint32_t refcount; - uint32_t number; +// TODO(haberman): These are hacky; remove them and replace with an API that +// lets you set a simple "writer" handler in a way that can generate +// specialized code right then. + +typedef struct upb_stdmsg_fval { +#ifdef __cplusplus + upb_stdmsg_fval(size_t offset_, int32_t hasbit_) + : offset(offset_), + hasbit(hasbit_) { + } +#endif + size_t offset; int32_t hasbit; - struct _upb_mhandlers *msg; - struct _upb_mhandlers *submsg; // Set iff upb_issubmsgtype(type) == true. - upb_value fval; - upb_value_handler *value; - upb_startfield_handler *startsubmsg; - upb_endfield_handler *endsubmsg; - upb_startfield_handler *startseq; - upb_endfield_handler *endseq; -#ifdef UPB_USE_JIT_X64 - uint32_t jit_pclabel; - uint32_t jit_pclabel_notypecheck; - uint32_t jit_submsg_done_pclabel; +} upb_stdmsg_fval; + +#ifdef __cplusplus +extern "C" { #endif -} upb_fhandlers; - -// fhandlers are created as part of a upb_handlers instance, but can be ref'd -// and unref'd to prolong the life of the handlers. -void upb_fhandlers_ref(upb_fhandlers *m); -void upb_fhandlers_unref(upb_fhandlers *m); - -// upb_fhandlers accessors -#define UPB_FHANDLERS_ACCESSORS(name, type) \ - INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \ - INLINE type upb_fhandlers_get ## name(const upb_fhandlers *f) { return f->name; } -// TODO(haberman): need a way of keeping the fval alive even if a plan outlasts -// the handlers. -UPB_FHANDLERS_ACCESSORS(fval, upb_value) -UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*) -UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*) -UPB_FHANDLERS_ACCESSORS(endsubmsg, upb_endfield_handler*) -UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*) -UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*) -UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*) -UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*) -// If set to >= 0, the hasbit will automatically be set when the corresponding -// field is parsed (when a JIT is enabled, this can be significantly more -// efficient than setting the hasbit yourself inside the callback). For values -// it is undefined whether the hasbit is set before or after the callback is -// called. For seq and submsg, the hasbit is set *after* the start handler is -// called, but before any of the handlers for the submsg or sequence. -UPB_FHANDLERS_ACCESSORS(hasbit, int32_t) - - -/* upb_mhandlers **************************************************************/ - -// A upb_mhandlers object represents the set of handlers associated with a -// message in the graph of messages. - -typedef struct _upb_mhandlers { - uint32_t refcount; - upb_startmsg_handler *startmsg; - upb_endmsg_handler *endmsg; - upb_inttable fieldtab; // Maps field number -> upb_fhandlers. - bool is_group; -#ifdef UPB_USE_JIT_X64 - // Used inside the JIT to track labels (jmp targets) in the generated code. - uint32_t jit_startmsg_pclabel; // Starting a parse of this (sub-)message. - uint32_t jit_afterstartmsg_pclabel; // After calling the startmsg handler. - uint32_t jit_endofbuf_pclabel; // ptr hitend, but delim_end or jit_end? - uint32_t jit_endofmsg_pclabel; // Done parsing this (sub-)message. - uint32_t jit_dyndispatch_pclabel; // Dispatch by table lookup. - uint32_t jit_unknownfield_pclabel; // Parsed an unknown field. - uint32_t max_field_number; - // Currently keyed on field number. Could also try keying it - // on encoded or decoded tag, or on encoded field number. - void **tablearray; - // Pointer to the JIT code for parsing this message. - void *jit_func; +bool upb_stdmsg_setint32(void *c, void *d, int32_t val); +bool upb_stdmsg_setint64(void *c, void *d, int64_t val); +bool upb_stdmsg_setuint32(void *c, void *d, uint32_t val); +bool upb_stdmsg_setuint64(void *c, void *d, uint64_t val); +bool upb_stdmsg_setfloat(void *c, void *d, float val); +bool upb_stdmsg_setdouble(void *c, void *d, double val); +bool upb_stdmsg_setbool(void *c, void *d, bool val); +#ifdef __cplusplus +} // extern "C" #endif -} upb_mhandlers; - -// mhandlers are created as part of a upb_handlers instance, but can be ref'd -// and unref'd to prolong the life of the handlers. -void upb_mhandlers_ref(upb_mhandlers *m); -void upb_mhandlers_unref(upb_mhandlers *m); - -// Creates a new field with the given name and number. There must not be an -// existing field with either this name or number or abort() will be called. -// TODO: this should take a name also. -upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, bool repeated); -// Like the previous but for MESSAGE or GROUP fields. For GROUP fields, the -// given submessage must not have any fields with this field number. -upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, - upb_fieldtype_t type, - bool repeated, - upb_mhandlers *subm); - -// upb_mhandlers accessors. -#define UPB_MHANDLERS_ACCESSORS(name, type) \ - INLINE void upb_mhandlers_set ## name(upb_mhandlers *m, type v){m->name = v;} \ - INLINE type upb_mhandlers_get ## name(upb_mhandlers *m) { return m->name; } -UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*); -UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*); - -// Returns fhandlers for the given field, or NULL if none. -upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n); - - -/* upb_handlers ***************************************************************/ - -struct _upb_handlers { - uint32_t refcount; - upb_mhandlers **msgs; // Array of msgdefs, [0]=toplevel. - int msgs_len, msgs_size; - bool should_jit; -}; -typedef struct _upb_handlers upb_handlers; - -upb_handlers *upb_handlers_new(void); -void upb_handlers_ref(upb_handlers *h); -void upb_handlers_unref(upb_handlers *h); - -// Appends a new message to the graph of handlers and returns it. This message -// can be obtained later at index upb_handlers_msgcount()-1. All handlers will -// be initialized to no-op handlers. -upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h); -upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index); - -// Convenience function for registering handlers for all messages and -// fields in a msgdef and all its children. For every registered message -// "msgreg_cb" will be called with the newly-created mhandlers, and likewise -// with "fieldreg_cb" -// -// See upb_handlers_reghandlerset() below for an example. -typedef void upb_onmsgreg( - void *closure, upb_mhandlers *mh, const upb_msgdef *m); -typedef void upb_onfieldreg( - void *closure, upb_fhandlers *fh, const upb_fielddef *f); -upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m, - upb_onmsgreg *msgreg_cb, - upb_onfieldreg *fieldreg_cb, - void *closure); - -// Convenience function for registering a set of handlers for all messages and -// fields in a msgdef and its children, with the fval bound to the upb_fielddef. -// Any of the handlers may be NULL, in which case no callback will be set and -// the nop callback will be used. -typedef struct { - upb_startmsg_handler *startmsg; - upb_endmsg_handler *endmsg; - upb_value_handler *value; - upb_startfield_handler *startsubmsg; - upb_endfield_handler *endsubmsg; - upb_startfield_handler *startseq; - upb_endfield_handler *endseq; -} upb_handlerset; - -INLINE void upb_onmreg_hset(void *c, upb_mhandlers *mh, const upb_msgdef *m) { - (void)m; - upb_handlerset *hs = (upb_handlerset*)c; - if (hs->startmsg) upb_mhandlers_setstartmsg(mh, hs->startmsg); - if (hs->endmsg) upb_mhandlers_setendmsg(mh, hs->endmsg); -} -INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, const upb_fielddef *f) { - upb_handlerset *hs = (upb_handlerset*)c; - if (hs->value) upb_fhandlers_setvalue(fh, hs->value); - if (hs->startsubmsg) upb_fhandlers_setstartsubmsg(fh, hs->startsubmsg); - if (hs->endsubmsg) upb_fhandlers_setendsubmsg(fh, hs->endsubmsg); - if (hs->startseq) upb_fhandlers_setstartseq(fh, hs->startseq); - if (hs->endseq) upb_fhandlers_setendseq(fh, hs->endseq); - upb_value val; - upb_value_setfielddef(&val, f); - upb_fhandlers_setfval(fh, val); -} -INLINE upb_mhandlers *upb_handlers_reghandlerset( - upb_handlers *h, const upb_msgdef *m, upb_handlerset *hs) { - return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs); -} - - -/* upb_dispatcher *************************************************************/ - -// WARNING: upb_dispatcher should be considered INTERNAL-ONLY. The interface -// between it and upb_decoder is somewhat tightly coupled and may change. -// -// upb_dispatcher can be used by sources of data to invoke the appropriate -// handlers on a upb_handlers object. Besides maintaining the runtime stack of -// closures and handlers, the dispatcher checks the return status of user -// callbacks and properly handles statuses other than UPB_CONTINUE, invoking -// "skip" or "exit" handlers on the underlying data source as appropriate. - -typedef struct { - upb_fhandlers *f; - void *closure; - uint64_t end_ofs; - bool is_sequence; // frame represents seq or submsg? (f might be both). - bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX - // (strings aren't pushed). -} upb_dispatcher_frame; - -typedef void upb_exit_handler(void *); - -typedef struct { - upb_dispatcher_frame *top, *limit; - - // Msg and dispatch table for the current level. - upb_mhandlers *msgent; - upb_mhandlers *toplevel_msgent; - upb_exit_handler UPB_NORETURN *exitjmp; - void *srcclosure; - bool top_is_implicit; - - // Stack. - upb_status *status; - upb_dispatcher_frame stack[UPB_MAX_NESTING]; -} upb_dispatcher; - -// Caller retains ownership of the status object. -void upb_dispatcher_init(upb_dispatcher *d, upb_status *status, - upb_exit_handler UPB_NORETURN *exit, void *closure); -upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure, - upb_mhandlers *top_msg); -void upb_dispatcher_uninit(upb_dispatcher *d); - -// Tests whether the message could legally end here (either the stack is empty -// or the only open stack frame is implicit). -bool upb_dispatcher_islegalend(upb_dispatcher *d); - -// Unwinds one or more stack frames based on the given flow constant that was -// just returned from a handler. Calls end handlers as appropriate. -void _upb_dispatcher_abortjmp(upb_dispatcher *d) UPB_NORETURN; - -INLINE void _upb_dispatcher_sethas(void *_p, int32_t hasbit) { - char *p = (char*)_p; - if (hasbit >= 0) p[(uint32_t)hasbit / 8] |= (1 << ((uint32_t)hasbit % 8)); -} - -// Dispatch functions -- call the user handler and handle errors. -INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f, - upb_value val) { - upb_flow_t flow = UPB_CONTINUE; - if (f->value) flow = f->value(d->top->closure, f->fval, val); - _upb_dispatcher_sethas(d->top->closure, f->hasbit); - if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d); -} -void upb_dispatch_startmsg(upb_dispatcher *d); -void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); -upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, - upb_fhandlers *f); -upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d); -upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, - upb_fhandlers *f); -upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d); #ifdef __cplusplus -} /* extern "C" */ + +namespace upb { + +// C++ Wrappers +inline Handlers* Handlers::New(const MessageDef* m, const void *owner) { + return upb_handlers_new(m, owner); +} +inline const Handlers* Handlers::NewFrozen( + const MessageDef *m, const void *owner, + upb_handlers_callback *callback, void *closure) { + return upb_handlers_newfrozen(m, owner, callback, closure); +} +inline bool Handlers::IsFrozen() const { + return upb_handlers_isfrozen(this); +} +inline void Handlers::Ref(const void* owner) const { + upb_handlers_ref(this, owner); +} +inline void Handlers::Unref(const void* owner) const { + upb_handlers_unref(this, owner); +} +inline void Handlers::DonateRef(const void *from, const void *to) const { + upb_handlers_donateref(this, from, to); +} +inline void Handlers::CheckRef(const void *owner) const { + upb_handlers_checkref(this, owner); +} +inline bool Handlers::Freeze(Handlers*const* handlers, int n, Status* s) { + return upb_handlers_freeze(handlers, n, s); +} +inline const MessageDef* Handlers::message_def() const { + return upb_handlers_msgdef(this); +} +inline void Handlers::SetStartMessageHandler( + Handlers::StartMessageHandler *handler) { + upb_handlers_setstartmsg(this, handler); +} +inline void Handlers::SetEndMessageHandler( + Handlers::EndMessageHandler *handler) { + upb_handlers_setendmsg(this, handler); +} +inline bool Handlers::SetInt32Handler( + const FieldDef *f, Handlers::Int32Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint32(this, f, handler, d, fr); +} +inline bool Handlers::SetInt64Handler( + const FieldDef *f, Handlers::Int64Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint64(this, f, handler, d, fr); +} +inline bool Handlers::SetUint32Handler( + const FieldDef *f, Handlers::Uint32Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint32(this, f, handler, d, fr); +} +inline bool Handlers::SetUint64Handler( + const FieldDef *f, Handlers::Uint64Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint64(this, f, handler, d, fr); +} +inline bool Handlers::SetFloatHandler( + const FieldDef *f, Handlers::FloatHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setfloat(this, f, handler, d, fr); +} +inline bool Handlers::SetDoubleHandler( + const FieldDef *f, Handlers::DoubleHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setdouble(this, f, handler, d, fr); +} +inline bool Handlers::SetBoolHandler( + const FieldDef *f, Handlers::BoolHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setbool(this, f, handler, d, fr); +} +inline bool Handlers::SetStartStringHandler( + const FieldDef* f, Handlers::StartStringHandler* handler, + void* d, Handlers::Free* fr) { + return upb_handlers_setstartstr(this, f, handler, d, fr); +} +inline bool Handlers::SetEndStringHandler( + const FieldDef* f, Handlers::EndFieldHandler* handler, + void* d, Handlers::Free* fr) { + return upb_handlers_setendstr(this, f, handler, d, fr); +} +inline bool Handlers::SetStringHandler( + const FieldDef *f, Handlers::StringHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstring(this, f, handler, d, fr); +} +inline bool Handlers::SetStartSequenceHandler( + const FieldDef* f, Handlers::StartFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstartseq(this, f, handler, d, fr); +} +inline bool Handlers::SetStartSubMessageHandler( + const FieldDef* f, Handlers::StartFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstartsubmsg(this, f, handler, d, fr); +} +inline bool Handlers::SetEndSubMessageHandler( + const FieldDef* f, Handlers::EndFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setendsubmsg(this, f, handler, d, fr); +} +inline bool Handlers::SetEndSequenceHandler( + const FieldDef* f, Handlers::EndFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setendseq(this, f, handler, d, fr); +} +inline bool Handlers::SetSubHandlers( + const FieldDef* f, const Handlers* sub) { + return upb_handlers_setsubhandlers(this, f, sub); +} +inline Handlers::StartMessageHandler *Handlers::GetStartMessageHandler() const { + return upb_handlers_getstartmsg(this); +} +inline Handlers::EndMessageHandler *Handlers::GetEndMessageHandler() const { + return upb_handlers_getendmsg(this); +} +inline const Handlers* Handlers::GetSubHandlers( + const FieldDef* f) const { + return upb_handlers_getsubhandlers(this, f); +} +inline bool Handlers::GetSelector( + const FieldDef* f, Handlers::Type type, Handlers::Selector* s) { + return upb_getselector(f, type, s); +} +inline Handlers::GenericFunction* Handlers::GetHandler( + Handlers::Selector selector) { + return upb_handlers_gethandler(this, selector); +} +inline void* Handlers::GetHandlerData(Handlers::Selector selector) { + return upb_handlers_gethandlerdata(this, selector); +} +inline size_t Handlers::GetHandlerOffset(Handlers::Selector selector) { + return upb_gethandleroffset(selector); +} + +#define SET_VALUE_HANDLER(type, ctype) \ + template<> \ + inline bool Handlers::SetValueHandler<ctype>( \ + const FieldDef* f, \ + typename Handlers::Value<ctype>::Handler* handler, \ + void* data, Handlers::Free* cleanup) { \ + return upb_handlers_set ## type(this, f, handler, data, cleanup); \ + } +SET_VALUE_HANDLER(double, double); +SET_VALUE_HANDLER(float, float); +SET_VALUE_HANDLER(uint64, uint64_t); +SET_VALUE_HANDLER(uint32, uint32_t); +SET_VALUE_HANDLER(int64, int64_t); +SET_VALUE_HANDLER(int32, int32_t); +SET_VALUE_HANDLER(bool, bool); +#undef SET_VALUE_HANDLER + +template <class T> void DeletePointer(void *p) { delete static_cast<T*>(p); } + +template <class T> +void SetStoreValueHandler( + const FieldDef* f, size_t offset, int32_t hasbit, Handlers* h); + +// A handy templated function that will retrieve a value handler for a given +// C++ type. +#define SET_STORE_VALUE_HANDLER(type, ctype) \ + template <> \ + inline void SetStoreValueHandler<ctype>(const FieldDef* f, size_t offset, \ + int32_t hasbit, Handlers* h) { \ + h->SetValueHandler<ctype>( \ + f, upb_stdmsg_set ## type, new upb_stdmsg_fval(offset, hasbit), \ + &upb::DeletePointer<upb_stdmsg_fval>); \ + } + +SET_STORE_VALUE_HANDLER(double, double); +SET_STORE_VALUE_HANDLER(float, float); +SET_STORE_VALUE_HANDLER(uint64, uint64_t); +SET_STORE_VALUE_HANDLER(uint32, uint32_t); +SET_STORE_VALUE_HANDLER(int64, int64_t); +SET_STORE_VALUE_HANDLER(int32, int32_t); +SET_STORE_VALUE_HANDLER(bool, bool); +#undef GET_VALUE_HANDLER + +} // namespace upb #endif #endif diff --git a/upb/msg.c b/upb/msg.c deleted file mode 100644 index c671b7b..0000000 --- a/upb/msg.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Google Inc. See LICENSE for details. - * Author: Josh Haberman <jhaberman@gmail.com> - * - */ - -#include "upb/upb.h" -#include "upb/msg.h" - -#define UPB_ACCESSOR(type, ctype) \ - upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval, \ - upb_value val) { \ - assert(_m != NULL); \ - const upb_fielddef *f = upb_value_getfielddef(fval); \ - uint8_t *m = _m; \ - /* Hasbit is set automatically by the handlers. */ \ - *(ctype*)&m[f->offset] = upb_value_get ## type(val); \ - return UPB_CONTINUE; \ - } \ - -UPB_ACCESSOR(double, double) -UPB_ACCESSOR(float, float) -UPB_ACCESSOR(int32, int32_t) -UPB_ACCESSOR(int64, int64_t) -UPB_ACCESSOR(uint32, uint32_t) -UPB_ACCESSOR(uint64, uint64_t) -UPB_ACCESSOR(bool, bool) -UPB_ACCESSOR(ptr, void*) -#undef UPB_ACCESSORS - -static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, - const upb_fielddef *f) { - (void)c; - if (f->accessor) { - upb_fhandlers_setfval(fh, f->fval); - if (upb_isseq(f)) { - upb_fhandlers_setstartseq(fh, f->accessor->startseq); - upb_fhandlers_setvalue(fh, f->accessor->append); - upb_fhandlers_setstartsubmsg(fh, f->accessor->appendsubmsg); - } else { - upb_fhandlers_setvalue(fh, f->accessor->set); - upb_fhandlers_setstartsubmsg(fh, f->accessor->startsubmsg); - upb_fhandlers_sethasbit(fh, f->hasbit); - } - } -} - -upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m) { - return upb_handlers_regmsgdef(h, m, NULL, &upb_accessors_onfreg, NULL); -} diff --git a/upb/msg.h b/upb/msg.h deleted file mode 100644 index 7aaaf2a..0000000 --- a/upb/msg.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. - * Author: Josh Haberman <jhaberman@gmail.com> - * - * Routines for reading and writing message data to an in-memory structure, - * similar to a C struct. - * - * upb does not define one single message object that everyone must use. - * Rather it defines an abstract interface for reading and writing members - * of a message object, and all of the parsers and serializers use this - * abstract interface. This allows upb's parsers and serializers to be used - * regardless of what memory management scheme or synchronization model the - * application is using. - * - * A standard set of accessors is provided for doing simple reads and writes at - * a known offset into the message. These accessors should be used when - * possible, because they are specially optimized -- for example, the JIT can - * recognize them and emit specialized code instead of having to call the - * function at all. The application can substitute its own accessors when the - * standard accessors are not suitable. - */ - -#ifndef UPB_MSG_H -#define UPB_MSG_H - -#include <stdlib.h> -#include "upb/def.h" -#include "upb/handlers.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -/* upb_accessor ***************************************************************/ - -// A upb_accessor is a table of function pointers for doing reads and writes -// for one specific upb_fielddef. Each field has a separate accessor, which -// lives in the fielddef. - -typedef bool upb_has_reader(const void *m, upb_value fval); -typedef upb_value upb_value_reader(const void *m, upb_value fval); - -typedef const void *upb_seqbegin_handler(const void *s); -typedef const void *upb_seqnext_handler(const void *s, const void *iter); -typedef upb_value upb_seqget_handler(const void *iter); -INLINE bool upb_seq_done(const void *iter) { return iter == NULL; } - -typedef struct _upb_accessor_vtbl { - // Writers. These take an fval as a parameter because the callbacks are used - // as upb_handlers, but the fval is always the fielddef for that field. - upb_startfield_handler *startsubmsg; // Non-repeated submsg fields. - upb_value_handler *set; // Non-repeated scalar fields. - upb_startfield_handler *startseq; // Repeated fields only. - upb_startfield_handler *appendsubmsg; // Repeated submsg fields. - upb_value_handler *append; // Repeated scalar fields. - - // TODO: expect to also need endsubmsg and endseq. - - // Readers. - upb_has_reader *has; - upb_value_reader *getseq; - upb_value_reader *get; - upb_seqbegin_handler *seqbegin; - upb_seqnext_handler *seqnext; - upb_seqget_handler *seqget; -} upb_accessor_vtbl; - -// Registers handlers for writing into a message of the given type using -// whatever accessors it has defined. -upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m); - -INLINE void upb_msg_clearbit(void *msg, const upb_fielddef *f) { - ((char*)msg)[f->hasbit / 8] &= ~(1 << (f->hasbit % 8)); -} - -/* upb_msg/upb_seq ************************************************************/ - -// These accessor functions are simply convenience methods for reading or -// writing to a message through its accessors. - -INLINE bool upb_msg_has(const void *m, const upb_fielddef *f) { - return f->accessor && f->accessor->has(m, f->fval); -} - -// May only be called for fields that have accessors. -INLINE upb_value upb_msg_get(const void *m, const upb_fielddef *f) { - assert(f->accessor && !upb_isseq(f)); - return f->accessor->get(m, f->fval); -} - -// May only be called for fields that have accessors. -INLINE upb_value upb_msg_getseq(const void *m, const upb_fielddef *f) { - assert(f->accessor && upb_isseq(f)); - return f->accessor->getseq(m, f->fval); -} - -INLINE void upb_msg_set(void *m, const upb_fielddef *f, upb_value val) { - assert(f->accessor); - f->accessor->set(m, f->fval, val); -} - -INLINE const void *upb_seq_begin(const void *s, const upb_fielddef *f) { - assert(f->accessor); - return f->accessor->seqbegin(s); -} -INLINE const void *upb_seq_next(const void *s, const void *iter, - const upb_fielddef *f) { - assert(f->accessor); - assert(!upb_seq_done(iter)); - return f->accessor->seqnext(s, iter); -} -INLINE upb_value upb_seq_get(const void *iter, const upb_fielddef *f) { - assert(f->accessor); - assert(!upb_seq_done(iter)); - return f->accessor->seqget(iter); -} - -INLINE bool upb_msg_has_named(const void *m, const upb_msgdef *md, - const char *field_name) { - const upb_fielddef *f = upb_msgdef_ntof(md, field_name); - return f && upb_msg_has(m, f); -} - -INLINE bool upb_msg_get_named(const void *m, const upb_msgdef *md, - const char *field_name, upb_value *val) { - const upb_fielddef *f = upb_msgdef_ntof(md, field_name); - if (!f) return false; - *val = upb_msg_get(m, f); - return true; -} - -// Value writers for every in-memory type: write the data to a known offset -// from the closure "c." -// -// TODO(haberman): instead of having standard writer functions, should we have -// a bool in the accessor that says "write raw value to the field's offset"? -upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setptr(void *c, upb_value fval, upb_value val); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 30f7c65..065c495 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -5,17 +5,13 @@ * Author: Josh Haberman <jhaberman@gmail.com> */ +#include <inttypes.h> #include <stddef.h> #include <stdlib.h> #include "upb/bytestream.h" -#include "upb/msg.h" #include "upb/pb/decoder.h" #include "upb/pb/varint.h" -#ifndef UINT32_MAX -#define UINT32_MAX 0xffffffff -#endif - typedef struct { uint8_t native_wire_type; bool is_numeric; @@ -62,11 +58,12 @@ static const upb_decoder_typeinfo upb_decoder_types[] = { #include "upb/pb/decoder_x64.h" #endif -upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) { +upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit) { + UPB_UNUSED(allowjit); upb_decoderplan *p = malloc(sizeof(*p)); + assert(upb_handlers_isfrozen(h)); p->handlers = h; - upb_handlers_ref(h); - h->should_jit = allowjit; + upb_handlers_ref(h, p); #ifdef UPB_USE_JIT_X64 p->jit_code = NULL; if (allowjit) upb_decoderplan_makejit(p); @@ -76,7 +73,7 @@ upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) { void upb_decoderplan_unref(upb_decoderplan *p) { // TODO: make truly refcounted. - upb_handlers_unref(p->handlers); + upb_handlers_unref(p->handlers, p); #ifdef UPB_USE_JIT_X64 if (p->jit_code) upb_decoderplan_freejit(p); #endif @@ -100,8 +97,8 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p) { // configuration. But emperically on a Core i7, performance increases 30-50% // with these annotations. Every instance where these appear, gcc 4.2.1 made // the wrong decision and degraded performance in benchmarks. -#define FORCEINLINE static __attribute__((__always_inline__)) -#define NOINLINE static __attribute__((__noinline__)) +#define FORCEINLINE static inline __attribute__((always_inline)) +#define NOINLINE static __attribute__((noinline)) UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) { // Resumable decoder would back out to completed_ptr (and possibly get a @@ -141,14 +138,23 @@ uint64_t upb_decoder_bufendofs(upb_decoder *d) { return d->bufstart_ofs + (d->end - d->buf); } +static bool upb_decoder_islegalend(upb_decoder *d) { + if (d->top == d->stack) return true; + if (d->top - 1 == d->stack && + d->top->is_sequence && !d->top->is_packed) return true; + return false; +} + +// Calculates derived values that we cache for speed. These reflect a +// combination of the current buffer and the stack, so must be called whenever +// either is updated. static void upb_decoder_setmsgend(upb_decoder *d) { - upb_dispatcher_frame *f = d->dispatcher.top; + upb_decoder_frame *f = d->top; size_t delimlen = f->end_ofs - d->bufstart_ofs; size_t buflen = d->end - d->buf; d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ? d->buf + delimlen : NULL; // NULL if not in this buf. d->top_is_packed = f->is_packed; - d->dispatch_table = &d->dispatcher.msgent->fieldtab; } static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) { @@ -201,11 +207,11 @@ static void upb_pullbuf(upb_decoder *d) { if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF"); } -void upb_decoder_checkpoint(upb_decoder *d) { +static void upb_decoder_checkpoint(upb_decoder *d) { upb_byteregion_discard(d->input, upb_decoder_offset(d)); } -void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) { +static void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) { if (ofs <= upb_decoder_bufendofs(d)) { upb_decoder_advance(d, ofs - upb_decoder_offset(d)); } else { @@ -214,7 +220,7 @@ void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) { upb_decoder_checkpoint(d); } -void upb_decoder_discard(upb_decoder *d, size_t bytes) { +static void upb_decoder_discard(upb_decoder *d, size_t bytes) { upb_decoder_discardto(d, upb_decoder_offset(d) + bytes); } @@ -259,7 +265,7 @@ done: // Returns true on success or false if we've hit a valid EOF. FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { if (upb_decoder_bufleft(d) == 0 && - upb_dispatcher_islegalend(&d->dispatcher) && + upb_decoder_islegalend(d) && !upb_trypullbuf(d)) { return false; } @@ -319,21 +325,45 @@ FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { return u64; // TODO: proper byte swapping for big-endian machines. } -INLINE upb_byteregion *upb_decode_string(upb_decoder *d) { - uint32_t strlen = upb_decode_varint32(d); - uint64_t offset = upb_decoder_offset(d); - if (offset + strlen > upb_byteregion_endofs(d->input)) - upb_decoder_abortjmp(d, "Unexpected EOF"); - upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen); - // Could make it an option on the callback whether we fetchall() first or not. - if (upb_byteregion_fetchall(&d->str_byteregion) != UPB_BYTE_OK) - upb_decoder_abortjmp(d, "Couldn't fetchall() on string."); - upb_decoder_discardto(d, offset + strlen); - return &d->str_byteregion; +INLINE void upb_push_msg(upb_decoder *d, const upb_fielddef *f, uint64_t end) { + upb_decoder_frame *fr = d->top + 1; + if (!upb_sink_startsubmsg(&d->sink, f) || fr > d->limit) { + upb_decoder_abortjmp(d, "Nesting too deep."); + } + fr->f = f; + fr->is_sequence = false; + fr->is_packed = false; + fr->end_ofs = end; + fr->group_fieldnum = end == UPB_NONDELIMITED ? + (int32_t)upb_fielddef_number(f) : -1; + d->top = fr; + upb_decoder_setmsgend(d); } -INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) { - upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end; +INLINE void upb_push_seq(upb_decoder *d, const upb_fielddef *f, bool packed, + uint64_t end_ofs) { + upb_decoder_frame *fr = d->top + 1; + if (!upb_sink_startseq(&d->sink, f) || fr > d->limit) { + upb_decoder_abortjmp(d, "Nesting too deep."); + } + fr->f = f; + fr->is_sequence = true; + fr->group_fieldnum = -1; + fr->is_packed = packed; + fr->end_ofs = end_ofs; + d->top = fr; + upb_decoder_setmsgend(d); +} + +INLINE void upb_pop_submsg(upb_decoder *d) { + upb_sink_endsubmsg(&d->sink, d->top->f); + d->top--; + upb_decoder_setmsgend(d); +} + +INLINE void upb_pop_seq(upb_decoder *d) { + upb_sink_endseq(&d->sink, d->top->f); + d->top--; upb_decoder_setmsgend(d); } @@ -344,13 +374,14 @@ INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) { // properly sign-extended. We could detect this and error about the data loss, // but proto2 does not do this, so we pass. -#define T(type, wt, valtype, convfunc) \ - INLINE void upb_decode_ ## type(upb_decoder *d, upb_fhandlers *f) { \ - upb_value val; \ - upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \ - upb_dispatch_value(&d->dispatcher, f, val); \ +#define T(type, wt, name, convfunc) \ + INLINE void upb_decode_ ## type(upb_decoder *d, const upb_fielddef *f) { \ + upb_sink_put ## name(&d->sink, f, (convfunc)(upb_decode_ ## wt(d))); \ } \ +static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } +static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } + T(INT32, varint, int32, int32_t) T(INT64, varint, int64, int64_t) T(UINT32, varint, uint32, uint32_t) @@ -361,43 +392,44 @@ T(SFIXED32, fixed32, int32, int32_t) T(SFIXED64, fixed64, int64, int64_t) T(BOOL, varint, bool, bool) T(ENUM, varint, int32, int32_t) +T(DOUBLE, fixed64, double, upb_asdouble) +T(FLOAT, fixed32, float, upb_asfloat) T(SINT32, varint, int32, upb_zzdec_32) T(SINT64, varint, int64, upb_zzdec_64) -T(STRING, string, byteregion, upb_byteregion*) - #undef T -INLINE void upb_decode_DOUBLE(upb_decoder *d, upb_fhandlers *f) { - upb_value val; - double dbl; - uint64_t wireval = upb_decode_fixed64(d); - memcpy(&dbl, &wireval, 8); - upb_value_setdouble(&val, dbl); - upb_dispatch_value(&d->dispatcher, f, val); -} - -INLINE void upb_decode_FLOAT(upb_decoder *d, upb_fhandlers *f) { - upb_value val; - float flt; - uint64_t wireval = upb_decode_fixed32(d); - memcpy(&flt, &wireval, 4); - upb_value_setfloat(&val, flt); - upb_dispatch_value(&d->dispatcher, f, val); -} - -static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) { +static void upb_decode_GROUP(upb_decoder *d, const upb_fielddef *f) { upb_push_msg(d, f, UPB_NONDELIMITED); } -static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) { - (void)f; - upb_dispatch_endsubmsg(&d->dispatcher); - upb_decoder_setmsgend(d); -} -static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { + +static void upb_decode_MESSAGE(upb_decoder *d, const upb_fielddef *f) { uint32_t len = upb_decode_varint32(d); upb_push_msg(d, f, upb_decoder_offset(d) + len); } +static void upb_decode_STRING(upb_decoder *d, const upb_fielddef *f) { + uint32_t strlen = upb_decode_varint32(d); + uint64_t offset = upb_decoder_offset(d); + uint64_t end = offset + strlen; + if (end > upb_byteregion_endofs(d->input)) + upb_decoder_abortjmp(d, "Unexpected EOF"); + upb_sink_startstr(&d->sink, f, strlen); + while (strlen > 0) { + if (upb_byteregion_available(d->input, offset) == 0) + upb_pullbuf(d); + size_t len; + const char *ptr = upb_byteregion_getptr(d->input, offset, &len); + len = UPB_MIN(len, strlen); + len = upb_sink_putstring(&d->sink, f, ptr, len); + if (len > strlen) + upb_decoder_abortjmp(d, "Skipped too many bytes."); + offset += len; + strlen -= len; + upb_decoder_discardto(d, offset); + } + upb_sink_endstr(&d->sink, f); +} + /* The main decoding loop *****************************************************/ @@ -410,33 +442,33 @@ static void upb_decoder_checkdelim(upb_decoder *d) { // handler). while (d->delim_end != NULL && d->ptr >= d->delim_end) { if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end"); - if (d->dispatcher.top->is_sequence) { - upb_dispatch_endseq(&d->dispatcher); + if (d->top->is_sequence) { + upb_pop_seq(d); } else { - upb_dispatch_endsubmsg(&d->dispatcher); + upb_pop_submsg(d); } - upb_decoder_setmsgend(d); } } -INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { +INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) { while (1) { uint32_t tag; if (!upb_trydecode_varint32(d, &tag)) return NULL; uint8_t wire_type = tag & 0x7; - uint32_t fieldnum = tag >> 3; - const upb_value *val = upb_inttable_lookup32(d->dispatch_table, fieldnum); - upb_fhandlers *f = val ? upb_value_getptr(*val) : NULL; - bool is_packed = false; + uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL; + const upb_handlers *h = upb_sink_tophandlers(&d->sink); + f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum); + bool packed = false; if (f) { // Wire type check. - if (wire_type == upb_decoder_types[f->type].native_wire_type) { + upb_fieldtype_t type = upb_fielddef_type(f); + if (wire_type == upb_decoder_types[type].native_wire_type) { // Wire type is ok. } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED && - upb_decoder_types[f->type].is_numeric)) { + upb_decoder_types[type].is_numeric)) { // Wire type is ok (and packed). - is_packed = true; + packed = true; } else { f = NULL; } @@ -445,29 +477,24 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { // There are no explicit "startseq" or "endseq" markers in protobuf // streams, so we have to infer them by noticing when a repeated field // starts or ends. - upb_dispatcher_frame *fr = d->dispatcher.top; + upb_decoder_frame *fr = d->top; if (fr->is_sequence && fr->f != f) { - upb_dispatch_endseq(&d->dispatcher); - upb_decoder_setmsgend(d); - fr = d->dispatcher.top; + upb_pop_seq(d); + fr = d->top; } - if (f && f->repeated && !fr->is_sequence) { - upb_dispatcher_frame *fr2 = upb_dispatch_startseq(&d->dispatcher, f); - if (is_packed) { - // Packed primitive field. + + if (f && upb_fielddef_isseq(f) && !fr->is_sequence) { + if (packed) { uint32_t len = upb_decode_varint32(d); - fr2->end_ofs = upb_decoder_offset(d) + len; - fr2->is_packed = true; + upb_push_seq(d, f, true, upb_decoder_offset(d) + len); } else { - // Non-packed field -- this tag pertains to only a single message. - fr2->end_ofs = fr->end_ofs; + upb_push_seq(d, f, false, fr->end_ofs); } - upb_decoder_setmsgend(d); } if (f) return f; - // Unknown field. + // Unknown field or ENDGROUP. if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) upb_decoder_abortjmp(d, "Invalid field number"); switch (wire_type) { @@ -479,7 +506,12 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { case UPB_WIRE_TYPE_START_GROUP: upb_decoder_abortjmp(d, "Can't handle unknown groups yet"); case UPB_WIRE_TYPE_END_GROUP: - upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag"); + if (fieldnum != fr->group_fieldnum) + upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag"); + upb_sink_endsubmsg(&d->sink, fr->f); + d->top--; + upb_decoder_setmsgend(d); + break; default: upb_decoder_abortjmp(d, "Invalid wire type"); } @@ -495,30 +527,30 @@ upb_success_t upb_decoder_decode(upb_decoder *d) { assert(!upb_ok(&d->status)); return UPB_ERROR; } - upb_dispatch_startmsg(&d->dispatcher); + upb_sink_startmsg(&d->sink); // Prime the buf so we can hit the JIT immediately. upb_trypullbuf(d); - upb_fhandlers *f = d->dispatcher.top->f; + const upb_fielddef *f = d->top->f; while(1) { - upb_decoder_checkdelim(d); #ifdef UPB_USE_JIT_X64 upb_decoder_enterjit(d); upb_decoder_checkpoint(d); + upb_decoder_setmsgend(d); #endif + upb_decoder_checkdelim(d); if (!d->top_is_packed) f = upb_decode_tag(d); if (!f) { // Sucessful EOF. We may need to dispatch a top-level implicit frame. - if (d->dispatcher.top->is_sequence) { - assert(d->dispatcher.top == d->dispatcher.stack + 1); - upb_dispatch_endseq(&d->dispatcher); + if (d->top->is_sequence) { + assert(d->sink.top == d->sink.stack + 1); + upb_pop_seq(d); } - assert(d->dispatcher.top == d->dispatcher.stack); - upb_dispatch_endmsg(&d->dispatcher, &d->status); + assert(d->top == d->stack); + upb_sink_endmsg(&d->sink, &d->status); return UPB_OK; } - switch (f->type) { - case UPB_TYPE_ENDGROUP: upb_endgroup(d, f); break; + switch (upb_fielddef_type(f)) { case UPB_TYPE(DOUBLE): upb_decode_DOUBLE(d, f); break; case UPB_TYPE(FLOAT): upb_decode_FLOAT(d, f); break; case UPB_TYPE(INT64): upb_decode_INT64(d, f); break; @@ -545,28 +577,29 @@ upb_success_t upb_decoder_decode(upb_decoder *d) { void upb_decoder_init(upb_decoder *d) { upb_status_init(&d->status); - upb_dispatcher_init(&d->dispatcher, &d->status, &upb_decoder_exitjmp2, d); d->plan = NULL; d->input = NULL; + d->limit = &d->stack[UPB_MAX_NESTING]; } -void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) { - assert(msg_offset >= 0); - assert(msg_offset < p->handlers->msgs_len); +void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p) { d->plan = p; - d->msg_offset = msg_offset; d->input = NULL; + upb_sink_init(&d->sink, p->handlers); } void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, - void *closure) { + void *c) { assert(d->plan); - upb_dispatcher_frame *f = - upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]); upb_status_clear(&d->status); - f->end_ofs = UPB_NONDELIMITED; + upb_sink_reset(&d->sink, c); d->input = input; - d->str_byteregion.bytesrc = input->bytesrc; + + d->top = d->stack; + d->top->is_sequence = false; + d->top->is_packed = false; + d->top->group_fieldnum = UINT32_MAX; + d->top->end_ofs = UPB_NONDELIMITED; // Protect against assert in skiptonewbuf(). d->bufstart_ofs = 0; @@ -576,6 +609,5 @@ void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, } void upb_decoder_uninit(upb_decoder *d) { - upb_dispatcher_uninit(&d->dispatcher); upb_status_uninit(&d->status); } diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index df65468..690ebb9 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -13,9 +13,8 @@ #define UPB_DECODER_H_ #include <setjmp.h> -#include <stdbool.h> -#include <stdint.h> -#include "upb/handlers.h" +#include "upb/bytestream.h" +#include "upb/sink.h" #ifdef __cplusplus extern "C" { @@ -34,9 +33,12 @@ extern "C" { struct _upb_decoderplan; typedef struct _upb_decoderplan upb_decoderplan; -// TODO: add parameter for a list of other decoder plans that we can share -// generated code with. -upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit); +// TODO(haberman): +// - add support for letting any message in the plan be at the top level. +// - make this object a handlers instead (when bytesrc/bytesink are merged +// into handlers). +// - add support for sharing code with previously-built plans/handlers. +upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit); void upb_decoderplan_unref(upb_decoderplan *p); // Returns true if the plan contains JIT-ted code. This may not be the same as @@ -49,15 +51,28 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p); struct dasm_State; +typedef struct { + const upb_fielddef *f; + uint64_t end_ofs; + uint32_t group_fieldnum; // UINT32_MAX for non-groups. + bool is_sequence; // frame represents seq or submsg? (f might be both). + bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX + // (strings aren't pushed). +} upb_decoder_frame; + typedef struct _upb_decoder { upb_decoderplan *plan; - int msg_offset; // Which message from the plan is top-level. upb_byteregion *input; // Input data (serialized), not owned. - upb_dispatcher dispatcher; // Dispatcher to which we push parsed data. upb_status status; // Where we store errors that occur. - upb_byteregion str_byteregion; // For passing string data to callbacks. - upb_inttable *dispatch_table; + // Where we push parsed data. + // TODO(haberman): make this a pointer and make upb_decoder_resetinput() take + // one of these instead of a void*. + upb_sink sink; + + // Our internal stack. + upb_decoder_frame *top, *limit; + upb_decoder_frame stack[UPB_MAX_NESTING]; // Current input buffer and its stream offset. const char *buf, *ptr, *end; @@ -70,7 +85,11 @@ typedef struct _upb_decoder { #ifdef UPB_USE_JIT_X64 // For JIT, which doesn't do bounds checks in the middle of parsing a field. - const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end) + const char *jit_end, *effective_end; // == MIN(jit_end, delim_end) + + // Used momentarily by the generated code to store a value while a user + // function is called. + uint32_t tmp_len; #endif // For exiting the decoder on error. @@ -88,7 +107,7 @@ void upb_decoder_uninit(upb_decoder *d); // must live until the decoder is destroyed or reset to a different plan. // // Must be called before upb_decoder_resetinput() or upb_decoder_decode(). -void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset); +void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p); // Resets the input of an already-allocated decoder. This puts it in a state // where it has not seen any data, and expects the next data to be from the @@ -111,7 +130,8 @@ INLINE const upb_status *upb_decoder_status(upb_decoder *d) { // Implementation details struct _upb_decoderplan { - upb_handlers *handlers; // owns reference. + // The top-level handlers that this plan calls into. We own a ref. + const upb_handlers *handlers; #ifdef UPB_USE_JIT_X64 // JIT-generated machine code (else NULL). @@ -119,8 +139,23 @@ struct _upb_decoderplan { size_t jit_size; char *debug_info; + // For storing upb_jitmsginfo, which contains per-msg runtime data needed + // by the JIT. + // Maps upb_handlers* -> upb_jitmsginfo. + upb_inttable msginfo; + + // The following members are used only while the JIT is being built. + // This pointer is allocated by dasm_init() and freed by dasm_free(). struct dasm_State *dynasm; + + // For storing pclabel bases while we are building the JIT. + // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base + upb_inttable pclabels; + + // This is not the same as len(pclabels) because the table only contains base + // offsets for each def, but each def can have many pclabels. + uint32_t pclabel_count; #endif }; diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc index f58e403..cd09cfe 100644 --- a/upb/pb/decoder_x64.dasc +++ b/upb/pb/decoder_x64.dasc @@ -12,6 +12,7 @@ |// function) we must respect alignment rules. All x86-64 systems require |// 16-byte stack alignment. +#include <stdio.h> #include <sys/mman.h> #include "dynasm/dasm_x86.h" @@ -28,6 +29,44 @@ #define MAP_32BIT 0 #endif +// These are used to track jump targets for messages and fields. +enum { + STARTMSG = 0, + AFTER_STARTMSG = 1, + ENDOFBUF = 2, + ENDOFMSG = 3, + DYNDISPATCH = 4, + TOTAL_MSG_PCLABELS = 5, +}; + +enum { + FIELD = 0, + FIELD_NO_TYPECHECK = 1, + TOTAL_FIELD_PCLABELS = 2, +}; + +typedef struct { + uint32_t max_field_number; + // Currently keyed on field number. Could also try keying it + // on encoded or decoded tag, or on encoded field number. + void **tablearray; + // Pointer to the JIT code for parsing this message. + void *jit_func; +} upb_jitmsginfo; + +static uint32_t upb_getpclabel(upb_decoderplan *plan, const void *obj, int n) { + const upb_value *v = upb_inttable_lookupptr(&plan->pclabels, obj); + assert(v); + return upb_value_getuint32(*v) + n; +} + +static upb_jitmsginfo *upb_getmsginfo(upb_decoderplan *plan, + const upb_handlers *h) { + const upb_value *v = upb_inttable_lookupptr(&plan->msginfo, h); + assert(v); + return upb_value_getptr(*v); +} + // To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code // at runtime. GDB 7.x+ has defined an interface for doing this, and these // structure/function defintions are copied out of gdb/jit.h @@ -66,7 +105,9 @@ typedef struct { gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL}; -void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); } +void __attribute__((noinline)) __jit_debug_register_code() { + __asm__ __volatile__(""); +} void upb_reg_jit_gdb(upb_decoderplan *plan) { // Create debug info. @@ -120,7 +161,8 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |.define ARG3_32, edx |.define ARG3_64, rdx |.define ARG4_64, rcx -|.define ARG5_32, r8d +|.define XMMARG1, xmm0 + | |// Register allocation / type map. |// ALL of the code in this file uses these register allocations. @@ -128,13 +170,15 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |// conventions, but of course when calling to user callbacks we must. |.define PTR, rbx // Writing this to DECODER->ptr commits our progress. |.define CLOSURE, r12 -|.type FRAME, upb_dispatcher_frame, r13 -|.type BYTEREGION,upb_byteregion, r14 +|.type SINKFRAME, upb_sink_frame, r13 +|.type FRAME, upb_decoder_frame, r14 |.type DECODER, upb_decoder, r15 -|.type STDARRAY, upb_stdarray | |.macro callp, addr || upb_assert_notnull(addr); +|// TODO(haberman): fix this. I believe the predicate we should actually be +|// testing is whether the jump distance is greater than INT32_MAX, not the +|// absolute address of the target. || if ((uintptr_t)addr < 0xffffffff) { | call &addr || } else { @@ -143,14 +187,22 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } || } |.endmacro | -|// Checks PTR for end-of-buffer. -|.macro check_eob, m +|// Checkpoints our progress by writing PTR to DECODER, and +|// checks for end-of-buffer. +|.macro checkpoint, h +| mov DECODER->ptr, PTR | cmp PTR, DECODER->effective_end -|| if (m->is_group) { - | jae ->exit_jit -|| } else { - | jae =>m->jit_endofbuf_pclabel -|| } +| jae =>upb_getpclabel(plan, h, ENDOFBUF) +|.endmacro +| +|.macro check_bool_ret +| test al, al +| jz ->exit_jit +|.endmacro +| +|.macro check_ptr_ret +| test rax, rax +| jz ->exit_jit |.endmacro | |// Decodes varint from [PTR + offset] -> ARG3. @@ -172,8 +224,7 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | mov ARG1_64, rax | mov ARG2_32, ARG3_32 | callp upb_vdecode_max8_fast -| test rax, rax -| jz ->exit_jit // >10-byte varint. +| check_ptr_ret // Check for unterminated, >10-byte varint. |9: |.endmacro | @@ -187,74 +238,103 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |// Could specialize this by avoiding the value masking: could just key the |// table on the raw (length-masked) varint to save 3-4 cycles of latency. |// Currently only support tables where all entries are in the array part. -|.macro dyndispatch_, m -|=>m->jit_dyndispatch_pclabel: +|.macro dyndispatch_, h +|=>upb_getpclabel(plan, h, DYNDISPATCH): | decode_loaded_varint, 0 | mov ecx, edx | shr ecx, 3 -| and edx, 0x7 // For the type check that will happen later. -| cmp ecx, m->max_field_number // Bounds-check the field. -| ja ->exit_jit // In the future; could be unknown label -|| if ((uintptr_t)m->tablearray < 0xffffffff) { +| and edx, 0x7 // Note: this value is used in the FIELD pclabel below. +| cmp edx, UPB_WIRE_TYPE_END_GROUP +| je >1 +|| upb_jitmsginfo *mi = upb_getmsginfo(plan, h); +| cmp ecx, mi->max_field_number // Bounds-check the field. +| ja ->exit_jit // In the future; could be unknown label +|| if ((uintptr_t)mi->tablearray < 0xffffffff) { | // TODO: support hybrid array/hash tables. -| mov rax, qword [rcx*8 + m->tablearray] +| mov rax, qword [rcx*8 + mi->tablearray] || } else { -| mov64 rax, (uintptr_t)m->tablearray +| mov64 rax, (uintptr_t)mi->tablearray | mov rax, qword [rax + rcx*8] || } | jmp rax // Dispatch: unpredictable jump. +|1: +|// End group. +| cmp ecx, FRAME->group_fieldnum +| jne ->exit_jit // Unexpected END_GROUP tag. +| mov PTR, rax // rax came from decode_loaded_varint +| mov DECODER->ptr, PTR +| jmp =>upb_getpclabel(plan, h, ENDOFMSG) |.endmacro | |.if 1 | // Replicated dispatch: larger code, but better branch prediction. | .define dyndispatch, dyndispatch_ |.else -| .macro dyndispatch, m -| jmp =>m->jit_dyndispatch_pclabel +| // Single dispatch: smaller code, could be faster because of reduced +| // icache usage. We keep this around to allow for easy comparison between +| // the two. +| .macro dyndispatch, h +| jmp =>upb_getpclabel(plan, h, DYNDISPATCH) | .endmacro |.endif | |// Push a stack frame (not the CPU stack, the upb_decoder stack). -|.macro pushframe, f, end_offset_, is_sequence_ -| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing. -| cmp rax, qword DECODER->dispatcher.limit +|.macro pushframe, h, field, end_offset_, endtype +|// Decoder Frame. +| lea rax, [FRAME + sizeof(upb_decoder_frame)] // rax for short addressing +| cmp rax, DECODER->limit | jae ->exit_jit // Frame stack overflow. -| mov64 r8, (uintptr_t)f -| mov qword FRAME:rax->f, r8 +| mov64 r10, (uintptr_t)field +| mov FRAME:rax->f, r10 | mov qword FRAME:rax->end_ofs, end_offset_ -| mov byte FRAME:rax->is_sequence, is_sequence_ -| mov DECODER->dispatcher.top, rax +| mov byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ) +| mov byte FRAME:rax->is_packed, 0 +|| if (upb_fielddef_type(field) == UPB_TYPE_GROUP && +|| endtype == UPB_HANDLER_ENDSUBMSG) { +| mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field) +|| } else { +| mov dword FRAME:rax->group_fieldnum, 0xffffffff +|| } +| mov DECODER->top, rax | mov FRAME, rax +|// Sink Frame. +| lea rcx, [SINKFRAME + sizeof(upb_sink_frame)] // rcx for short addressing +| cmp rcx, DECODER->sink.limit +| jae ->exit_jit // Frame stack overflow. +| mov dword SINKFRAME:rcx->end, getselector(field, endtype) +|| if (upb_fielddef_issubmsg(field)) { +| mov64 r9, (uintptr_t)upb_handlers_getsubhandlers(h, field) +|| } else { +| mov64 r9, (uintptr_t)h +|| } +| mov SINKFRAME:rcx->h, r9 +| mov DECODER->sink.top, rcx +| mov SINKFRAME, rcx |.endmacro | -|.macro popframe, m -| sub FRAME, sizeof(upb_dispatcher_frame) -| mov DECODER->dispatcher.top, FRAME -| setmsgend m -| mov CLOSURE, FRAME->closure +|.macro popframe +| sub FRAME, sizeof(upb_decoder_frame) +| mov DECODER->top, FRAME +| sub SINKFRAME, sizeof(upb_sink_frame) +| mov DECODER->sink.top, SINKFRAME +| setmsgend +| mov CLOSURE, SINKFRAME->closure |.endmacro | -|.macro setmsgend, m -| mov rsi, DECODER->jit_end -|| if (m->is_group) { -| mov64 rax, 0xffffffffffffffff -| mov qword DECODER->delim_end, rax -| mov DECODER->effective_end, rsi -|| } else { -| // Could store a correctly-biased version in the frame, at the cost of -| // a larger stack. -| mov eax, dword FRAME->end_ofs -| add rax, qword DECODER->buf -| mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs -| cmp rax, rsi -| jb >8 -| mov rax, rsi // effective_end = min(d->delim_end, d->jit_end) +|.macro setmsgend +| mov rsi, DECODER->jit_end +| mov rax, qword FRAME->end_ofs // Will be UINT64_MAX for groups. +| sub rax, qword DECODER->bufstart_ofs +| add rax, qword DECODER->buf // rax = d->buf + f->end_ofs - d->bufstart_ofs +| jc >8 // If the addition overflowed, use jit_end +| cmp rax, rsi +| ja >8 // If jit_end is less, use jit_end +| mov rsi, rax // Use frame end. |8: -| mov DECODER->effective_end, rax -|| } +| mov DECODER->effective_end, rsi |.endmacro | -|// rax contains the tag, compare it against "tag", but since it is a varint +|// rcx contains the tag, compare it against "tag", but since it is a varint |// we must only compare as many bytes as actually have data. |.macro checktag, tag || switch (upb_value_size(tag)) { @@ -279,22 +359,6 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } || } |.endmacro | -|// TODO: optimize for 0 (xor) and 32-bits. -|.macro loadfval, f -||#ifndef NDEBUG -||// Since upb_value carries type information in debug mode -||// only, we need to pass the arguments slightly differently. -| mov ARG3_32, f->fval.type -||#endif -|| if (f->fval.val.uint64 == 0) { -| xor ARG2_32, ARG2_32 -|| } else if (f->fval.val.uint64 < 0xffffffff) { -| mov ARG2_32, f->fval.val.uint64 -|| } else { -| mov64 ARG2_64, f->fval.val.uint64 -|| } -|.endmacro -| |.macro sethas, reg, hasbit || if (hasbit >= 0) { | or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8)) @@ -304,14 +368,37 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } #include <stdlib.h> #include "upb/pb/varint.h" -#include "upb/msg.h" + +static upb_selector_t getselector(const upb_fielddef *f, + upb_handlertype_t type) { + upb_selector_t selector; + bool ok = upb_getselector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; +} + +static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f, + upb_handlertype_t type) { + return upb_handlers_gethandler(h, getselector(f, type)); +} + +static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f, + upb_handlertype_t type) { + return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type)); +} // Decodes the next val into ARG3, advances PTR. static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, - uint8_t type, size_t tag_size) { + uint8_t type, size_t tag_size, + const upb_handlers *h, + const upb_fielddef *f) { // Decode the value into arg 3 for the callback. switch (type) { case UPB_TYPE(DOUBLE): + | movsd XMMARG1, qword [PTR + tag_size] + | add PTR, 8 + tag_size + break; + case UPB_TYPE(FIXED64): case UPB_TYPE(SFIXED64): | mov ARG3_64, qword [PTR + tag_size] @@ -319,6 +406,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, break; case UPB_TYPE(FLOAT): + | movss XMMARG1, dword [PTR + tag_size] + | add PTR, 4 + tag_size + break; + case UPB_TYPE(FIXED32): case UPB_TYPE(SFIXED32): | mov ARG3_32, dword [PTR + tag_size] @@ -362,7 +453,7 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, break; case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): + case UPB_TYPE(BYTES): { // We only handle the case where the entire string is in our current // buf, which sidesteps any security problems. The C path has more // robust checks. @@ -372,22 +463,42 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, | sub rdi, rax | cmp ARG3_64, rdi // if (len > d->end - str) | ja ->exit_jit // Can't deliver, whole string not in buf. + | mov PTR, rax + + upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR); + if (handler) { + | mov DECODER->tmp_len, ARG3_64 + | mov ARG1_64, CLOSURE + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSTR) + | callp handler + | check_ptr_ret + | mov ARG1_64, rax // sub-closure + | mov ARG4_64, DECODER->tmp_len + } else { + | mov ARG1_64, CLOSURE + | mov ARG4_64, ARG3_64 + } + + handler = gethandler(h, f, UPB_HANDLER_STRING); + if (handler) { + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STRING) + | mov ARG3_64, PTR + | callp handler + // TODO: properly handle returns other than "n" (the whole string). + | add PTR, rax + } else { + | add PTR, ARG4_64 + } - // Update PTR to point past end of string. - | mov rdi, rax - | add rdi, ARG3_64 - | mov PTR, rdi - - // Populate BYTEREGION appropriately. - | sub rax, DECODER->buf - | add rax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs - | mov BYTEREGION->start, rax - | mov BYTEREGION->discard, rax - | add rax, ARG3_64 - | mov BYTEREGION->end, rax - | mov BYTEREGION->fetch, rax // Fast path ensures whole string is loaded - | mov ARG3_64, BYTEREGION + handler = gethandler(h, f, UPB_HANDLER_ENDSTR); + if (handler) { + | mov ARG1_64, CLOSURE + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSTR) + | callp handler + | check_bool_ret + } break; + } // Will dispatch callbacks and call submessage in a second. case UPB_TYPE(MESSAGE): @@ -402,85 +513,85 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, } static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, - upb_fhandlers *f) { + const upb_handlers *h, + const upb_fielddef *f) { // Call callbacks. Specializing the append accessors didn't yield a speed // increase in benchmarks. - if (upb_issubmsgtype(f->type)) { - if (f->type == UPB_TYPE(MESSAGE)) { + if (upb_fielddef_issubmsg(f)) { + if (upb_fielddef_type(f) == UPB_TYPE(MESSAGE)) { | mov rsi, PTR | sub rsi, DECODER->buf | add rsi, ARG3_64 // = (d->ptr - d->buf) + delim_len } else { - assert(f->type == UPB_TYPE(GROUP)); + assert(upb_fielddef_type(f) == UPB_TYPE(GROUP)); | mov rsi, UPB_NONDELIMITED } - | pushframe f, rsi, false + | pushframe h, f, rsi, UPB_HANDLER_ENDSUBMSG // Call startsubmsg handler (if any). - if (f->startsubmsg) { + upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG); + if (startsubmsg) { // upb_sflow_t startsubmsg(void *closure, upb_value fval) | mov ARG1_64, CLOSURE - | loadfval f - | callp f->startsubmsg - | sethas CLOSURE, f->hasbit - | mov CLOSURE, rdx - } else { - | sethas CLOSURE, f->hasbit + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSUBMSG); + | callp startsubmsg + | check_ptr_ret + | mov CLOSURE, rax } - | mov qword FRAME->closure, CLOSURE - // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK - | mov DECODER->ptr, PTR + | mov qword SINKFRAME->closure, CLOSURE - const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f); - | call =>sub_m->jit_startmsg_pclabel; - | popframe upb_fhandlers_getmsg(f) + // TODO: have to decide what to do with NULLs subhandlers (or whether to + // disallow them and require a full handlers tree to match the def tree). + const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f); + assert(sub_h); + | call =>upb_getpclabel(plan, sub_h, STARTMSG) + | popframe // Call endsubmsg handler (if any). - if (f->endsubmsg) { + upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG); + if (endsubmsg) { // upb_flow_t endsubmsg(void *closure, upb_value fval); | mov ARG1_64, CLOSURE - | loadfval f - | callp f->endsubmsg + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSUBMSG); + | callp endsubmsg + | check_bool_ret } - // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK - | mov DECODER->ptr, PTR - } else { + } else if (!upb_fielddef_isstring(f)) { | mov ARG1_64, CLOSURE + upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f); + upb_func *handler = gethandler(h, f, handlertype); + const upb_stdmsg_fval *fv = (void*)gethandlerdata(h, f, handlertype); // Test for callbacks we can specialize. // Can't switch() on function pointers. - if (f->value == &upb_stdmsg_setint64 || - f->value == &upb_stdmsg_setuint64 || - f->value == &upb_stdmsg_setptr || - f->value == &upb_stdmsg_setdouble) { - const upb_fielddef *fd = upb_value_getfielddef(f->fval); - | mov [ARG1_64 + fd->offset], ARG3_64 - } else if (f->value == &upb_stdmsg_setint32 || - f->value == &upb_stdmsg_setuint32 || - f->value == &upb_stdmsg_setfloat) { - const upb_fielddef *fd = upb_value_getfielddef(f->fval); - | mov [ARG1_64 + fd->offset], ARG3_32 - } else if (f->value == &upb_stdmsg_setbool) { - const upb_fielddef *fd = upb_value_getfielddef(f->fval); - | mov [ARG1_64 + fd->offset], ARG3_8 - } else if (f->value) { + if (handler == (void*)&upb_stdmsg_setint64 || + handler == (void*)&upb_stdmsg_setuint64) { + | mov [ARG1_64 + fv->offset], ARG3_64 + | sethas CLOSURE, fv->hasbit + } else if (handler == (void*)&upb_stdmsg_setdouble) { + | movsd qword [ARG1_64 + fv->offset], XMMARG1 + | sethas CLOSURE, fv->hasbit + } else if (handler == (void*)&upb_stdmsg_setint32 || + handler == (void*)&upb_stdmsg_setuint32) { + | mov [ARG1_64 + fv->offset], ARG3_32 + | sethas CLOSURE, fv->hasbit + } else if (handler == (void*)&upb_stdmsg_setfloat) { + | movss dword [ARG1_64 + fv->offset], XMMARG1 + | sethas CLOSURE, fv->hasbit + } else if (handler == (void*)&upb_stdmsg_setbool) { + | mov [ARG1_64 + fv->offset], ARG3_8 + | sethas CLOSURE, fv->hasbit + } else if (handler) { // Load closure and fval into arg registers. - ||#ifndef NDEBUG - ||// Since upb_value carries type information in debug mode - ||// only, we need to pass the arguments slightly differently. - | mov ARG4_64, ARG3_64 - | mov ARG5_32, upb_types[f->type].inmemory_type - ||#endif - | loadfval f - | callp f->value + | mov64 ARG2_64, gethandlerdata(h, f, handlertype); + | callp handler + | check_bool_ret } - | sethas CLOSURE, f->hasbit - // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK - | mov DECODER->ptr, PTR } } -static uint64_t upb_get_encoded_tag(upb_fhandlers *f) { - uint32_t tag = (f->number << 3) | upb_decoder_types[f->type].native_wire_type; +static uint64_t upb_get_encoded_tag(const upb_fielddef *f) { + uint32_t tag = (upb_fielddef_number(f) << 3) | + upb_decoder_types[upb_fielddef_type(f)].native_wire_type; uint64_t encoded_tag = upb_vencode32(tag); // No tag should be greater than 5 bytes. assert(encoded_tag <= 0xffffffffff); @@ -488,118 +599,121 @@ static uint64_t upb_get_encoded_tag(upb_fhandlers *f) { } // PTR should point to the beginning of the tag. -static void upb_decoderplan_jit_field(upb_decoderplan *plan, upb_mhandlers *m, - upb_fhandlers *f, upb_fhandlers *next_f) { +static void upb_decoderplan_jit_field(upb_decoderplan *plan, + const upb_handlers *h, + const upb_fielddef *f, + const upb_fielddef *next_f) { uint64_t tag = upb_get_encoded_tag(f); uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0; + int tag_size = upb_value_size(tag); // PC-label for the dispatch table. // We check the wire type (which must be loaded in edx) because the // table is keyed on field number, not type. - |=>f->jit_pclabel: + |=>upb_getpclabel(plan, f, FIELD): | cmp edx, (tag & 0x7) | jne ->exit_jit // In the future: could be an unknown field or packed. - |=>f->jit_pclabel_notypecheck: - if (f->repeated) { + |=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK): + if (upb_fielddef_isseq(f)) { | mov rsi, FRAME->end_ofs - | pushframe f, rsi, true - if (f->startseq) { + | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ + upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ); + if (startseq) { | mov ARG1_64, CLOSURE - | loadfval f - | callp f->startseq - | sethas CLOSURE, f->hasbit - | mov CLOSURE, rdx - } else { - | sethas CLOSURE, f->hasbit + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSEQ); + | callp startseq + | check_ptr_ret + | mov CLOSURE, rax } - | mov qword FRAME->closure, CLOSURE + | mov qword SINKFRAME->closure, CLOSURE } |1: // Label for repeating this field. - int tag_size = upb_value_size(tag); - if (f->type == UPB_TYPE_ENDGROUP) { - | add PTR, tag_size - | jmp =>m->jit_endofmsg_pclabel - return; - } - - upb_decoderplan_jit_decodefield(plan, f->type, tag_size); - upb_decoderplan_jit_callcb(plan, f); + upb_decoderplan_jit_decodefield(plan, upb_fielddef_type(f), tag_size, h, f); + upb_decoderplan_jit_callcb(plan, h, f); // Epilogue: load next tag, check for repeated field. - | check_eob m + | checkpoint h | mov rcx, qword [PTR] - if (f->repeated) { + if (upb_fielddef_isseq(f)) { | checktag tag | je <1 - if (f->endseq) { + upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ); + if (endseq) { | mov ARG1_64, CLOSURE - | loadfval f - | callp f->endseq + | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSEQ); + | callp endseq } - | popframe m + | popframe + // Load next tag again (popframe clobbered it). + | mov rcx, qword [PTR] } + if (next_tag != 0) { | checktag next_tag - | je =>next_f->jit_pclabel_notypecheck + | je =>upb_getpclabel(plan, next_f, FIELD_NO_TYPECHECK) } // Fall back to dynamic dispatch. - | dyndispatch m - |1: + | dyndispatch h } static int upb_compare_uint32(const void *a, const void *b) { - // TODO: always put ENDGROUP at the end. return *(uint32_t*)a - *(uint32_t*)b; } -static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { - |=>m->jit_afterstartmsg_pclabel: +static void upb_decoderplan_jit_msg(upb_decoderplan *plan, + const upb_handlers *h) { + |=>upb_getpclabel(plan, h, AFTER_STARTMSG): // There was a call to get here, so we need to align the stack. | sub rsp, 8 | jmp >1 - |=>m->jit_startmsg_pclabel: + |=>upb_getpclabel(plan, h, STARTMSG): // There was a call to get here, so we need to align the stack. | sub rsp, 8 // Call startmsg handler (if any): - if (m->startmsg) { + upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h); + if (startmsg) { // upb_flow_t startmsg(void *closure); - | mov ARG1_64, FRAME->closure - | callp m->startmsg - // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK + | mov ARG1_64, SINKFRAME->closure + | callp startmsg + | check_bool_ret } |1: - | setmsgend m - | check_eob m + | setmsgend + | checkpoint h | mov ecx, dword [PTR] - | dyndispatch_ m + | dyndispatch_ h // --------- New code section (does not fall through) ------------------------ // Emit code for parsing each field (dynamic dispatch contains pointers to // all of these). - // Create an ordering over the fields (inttable ordering is undefined). - int num_keys = upb_inttable_count(&m->fieldtab); + // Create an ordering over the fields in field number order. + // Parsing will theoretically be fastest if we emit code in the same + // order as field numbers are seen on-the-wire because of an optimization + // in the generated code that skips dynamic dispatch if the next field is + // as expected. + const upb_msgdef *md = upb_handlers_msgdef(h); + int num_keys = upb_msgdef_numfields(md); uint32_t *keys = malloc(num_keys * sizeof(*keys)); int idx = 0; - upb_inttable_iter i; - upb_inttable_begin(&i, &m->fieldtab); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - keys[idx++] = upb_inttable_iter_key(&i); + upb_msg_iter i; + for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + keys[idx++] = upb_fielddef_number(upb_msg_iter_field(&i)); } qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); for(int i = 0; i < num_keys; i++) { - upb_fhandlers *f = upb_mhandlers_lookup(m, keys[i]); - upb_fhandlers *next_f = - (i + 1 < num_keys) ? upb_mhandlers_lookup(m, keys[i + 1]) : NULL; - upb_decoderplan_jit_field(plan, m, f, next_f); + const upb_fielddef *f = upb_msgdef_itof(md, keys[i]); + const upb_fielddef *next_f = + (i + 1 < num_keys) ? upb_msgdef_itof(md, keys[i + 1]) : NULL; + upb_decoderplan_jit_field(plan, h, f, next_f); } free(keys); @@ -607,27 +721,19 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { // --------- New code section (does not fall through) ------------------------ // End-of-buf / end-of-message. - if (!m->is_group) { - // This case doesn't exist for groups, because there eob really means - // eob, so that case just exits the jit directly. - |=>m->jit_endofbuf_pclabel: - | cmp PTR, DECODER->delim_end - | jb ->exit_jit // We are at eob, but not end-of-submsg. - } + // We hit a buffer limit; either we hit jit_end or end-of-submessage. + |=>upb_getpclabel(plan, h, ENDOFBUF): + | cmp PTR, DECODER->jit_end + | jae ->exit_jit - |=>m->jit_endofmsg_pclabel: + |=>upb_getpclabel(plan, h, ENDOFMSG): // We are at end-of-submsg: call endmsg handler (if any): - if (m->endmsg) { + upb_endmsg_handler *endmsg = upb_handlers_getendmsg(h); + if (endmsg) { // void endmsg(void *closure, upb_status *status) { - | mov ARG1_64, FRAME->closure - | lea ARG2_64, DECODER->dispatcher.status - | callp m->endmsg - } - - if (m->is_group) { - // Advance past the "end group" tag. - // TODO: Handle UPB_BREAK - | mov DECODER->ptr, PTR + | mov ARG1_64, SINKFRAME->closure + | lea ARG2_64, DECODER->sink.status + | callp endmsg } // Counter previous alignment. @@ -657,9 +763,9 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) { // Align stack. | sub rsp, 8 | mov DECODER, ARG1_64 - | mov FRAME, DECODER:ARG1_64->dispatcher.top - | lea BYTEREGION, DECODER:ARG1_64->str_byteregion - | mov CLOSURE, FRAME->closure + | mov FRAME, DECODER:ARG1_64->top + | mov SINKFRAME, DECODER:ARG1_64->sink.top + | mov CLOSURE, SINKFRAME->closure | mov PTR, DECODER->ptr // TODO: push return addresses for re-entry (will be necessary for multiple @@ -680,54 +786,65 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) { | leave | ret - upb_handlers *h = plan->handlers; - for (int i = 0; i < h->msgs_len; i++) - upb_decoderplan_jit_msg(plan, h->msgs[i]); -} - -static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f, - uint32_t *pclabel_count) { - f->jit_pclabel = (*pclabel_count)++; - f->jit_pclabel_notypecheck = (*pclabel_count)++; -} - -static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m, - uint32_t *pclabel_count) { - m->jit_startmsg_pclabel = (*pclabel_count)++; - m->jit_afterstartmsg_pclabel = (*pclabel_count)++; - m->jit_endofbuf_pclabel = (*pclabel_count)++; - m->jit_endofmsg_pclabel = (*pclabel_count)++; - m->jit_dyndispatch_pclabel = (*pclabel_count)++; - m->jit_unknownfield_pclabel = (*pclabel_count)++; - m->max_field_number = 0; upb_inttable_iter i; - upb_inttable_begin(&i, &m->fieldtab); + upb_inttable_begin(&i, &plan->msginfo); for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - uint32_t key = upb_inttable_iter_key(&i); - m->max_field_number = UPB_MAX(m->max_field_number, key); - upb_fhandlers *f = upb_value_getptr(upb_inttable_iter_value(&i)); - upb_decoderplan_jit_assignfieldlabs(f, pclabel_count); + const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); + upb_decoderplan_jit_msg(plan, h); + } +} + +static void upb_decoderplan_jit_assignpclabels(upb_decoderplan *plan, + const upb_handlers *h) { + // Limit the DFS. + if (upb_inttable_lookupptr(&plan->pclabels, h)) return; + + upb_inttable_insertptr(&plan->pclabels, h, + upb_value_uint32(plan->pclabel_count)); + plan->pclabel_count += TOTAL_MSG_PCLABELS; + + upb_jitmsginfo *info = malloc(sizeof(*info)); + info->max_field_number = 0; + upb_inttable_insertptr(&plan->msginfo, h, upb_value_ptr(info)); + + upb_msg_iter i; + upb_msg_begin(&i, upb_handlers_msgdef(h)); + for(; !upb_msg_done(&i); upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + info->max_field_number = + UPB_MAX(info->max_field_number, upb_fielddef_number(f)); + upb_inttable_insertptr(&plan->pclabels, f, + upb_value_uint32(plan->pclabel_count)); + plan->pclabel_count += TOTAL_FIELD_PCLABELS; + + // Discover the whole graph of handlers depth-first. We will probably + // revise this later to be more explicit about the list of handlers that + // the plan should include. + if (upb_fielddef_issubmsg(f)) { + const upb_handlers *subh = upb_handlers_getsubhandlers(h, f); + if (subh) upb_decoderplan_jit_assignpclabels(plan, subh); + } } // TODO: support large field numbers by either using a hash table or // generating code for a binary search. For now large field numbers // will just fall back to the table decoder. - m->max_field_number = UPB_MIN(m->max_field_number, 16000); - m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*)); + info->max_field_number = UPB_MIN(info->max_field_number, 16000); + info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*)); } static void upb_decoderplan_makejit(upb_decoderplan *plan) { + upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR); plan->debug_info = NULL; // Assign pclabels. - uint32_t pclabel_count = 0; - upb_handlers *h = plan->handlers; - for (int i = 0; i < h->msgs_len; i++) - upb_decoderplan_jit_assignmsglabs(h->msgs[i], &pclabel_count); + plan->pclabel_count = 0; + upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32); + upb_decoderplan_jit_assignpclabels(plan, plan->handlers); void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); dasm_init(plan, 1); dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX); - dasm_growpc(plan, pclabel_count); + dasm_growpc(plan, plan->pclabel_count); dasm_setup(plan, upb_jit_actionlist); upb_decoderplan_jit(plan); @@ -744,38 +861,53 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) { dasm_encode(plan, plan->jit_code); // Create dispatch tables. - for (int i = 0; i < h->msgs_len; i++) { - upb_mhandlers *m = h->msgs[i]; + upb_inttable_iter i; + upb_inttable_begin(&i, &plan->msginfo); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); + upb_jitmsginfo *mi = upb_getmsginfo(plan, h); // We jump to after the startmsg handler since it is called before entering // the JIT (either by upb_decoder or by a previous call to the JIT). - m->jit_func = - plan->jit_code + dasm_getpclabel(plan, m->jit_afterstartmsg_pclabel); - for (uint32_t j = 0; j <= m->max_field_number; j++) { - upb_fhandlers *f = upb_mhandlers_lookup(m, j); + mi->jit_func = plan->jit_code + + dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG)); + for (uint32_t j = 0; j <= mi->max_field_number; j++) { + const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), j); if (f) { - m->tablearray[j] = - plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel); + mi->tablearray[j] = plan->jit_code + + dasm_getpclabel(plan, upb_getpclabel(plan, f, FIELD)); } else { // TODO: extend the JIT to handle unknown fields. // For the moment we exit the JIT for any unknown field. - m->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit]; + mi->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit]; } } } + upb_inttable_uninit(&plan->pclabels); + dasm_free(plan); free(globals); mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ); +#ifndef NDEBUG // View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code // Or: ndisasm -b 64 /tmp/machine-code FILE *f = fopen("/tmp/machine-code", "wb"); fwrite(plan->jit_code, plan->jit_size, 1, f); fclose(f); +#endif } static void upb_decoderplan_freejit(upb_decoderplan *plan) { + upb_inttable_iter i; + upb_inttable_begin(&i, &plan->msginfo); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_jitmsginfo *mi = upb_value_getptr(upb_inttable_iter_value(&i)); + free(mi->tablearray); + free(mi); + } + upb_inttable_uninit(&plan->msginfo); munmap(plan->jit_code, plan->jit_size); free(plan->debug_info); // TODO: unregister @@ -783,7 +915,7 @@ static void upb_decoderplan_freejit(upb_decoderplan *plan) { static void upb_decoder_enterjit(upb_decoder *d) { if (d->plan->jit_code && - d->dispatcher.top == d->dispatcher.stack && + d->sink.top == d->sink.stack && d->ptr && d->ptr < d->jit_end) { #ifndef NDEBUG register uint64_t rbx asm ("rbx") = 11; @@ -795,7 +927,9 @@ static void upb_decoder_enterjit(upb_decoder *d) { // Decodes as many fields as possible, updating d->ptr appropriately, // before falling through to the slow(er) path. void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code; - upb_jit_decode(d, d->plan->handlers->msgs[d->msg_offset]->jit_func); + upb_jitmsginfo *mi = upb_getmsginfo(d->plan, d->plan->handlers); + assert(mi); + upb_jit_decode(d, mi->jit_func); assert(d->ptr <= d->end); // Test that callee-save registers were properly restored. diff --git a/upb/pb/glue.c b/upb/pb/glue.c index 40b901d..4e69c0c 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -5,10 +5,14 @@ * Author: Josh Haberman <jhaberman@gmail.com> */ +#include "upb/pb/glue.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> #include "upb/bytestream.h" #include "upb/descriptor/reader.h" #include "upb/pb/decoder.h" -#include "upb/pb/glue.h" upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, void *owner, upb_status *status) { @@ -16,16 +20,14 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, upb_stringsrc_init(&strsrc); upb_stringsrc_reset(&strsrc, str, len); - upb_handlers *h = upb_handlers_new(); - upb_descreader_reghandlers(h); - + const upb_handlers *h = upb_descreader_newhandlers(&h); upb_decoderplan *p = upb_decoderplan_new(h, false); upb_decoder d; upb_decoder_init(&d); - upb_handlers_unref(h); + upb_handlers_unref(h, &h); upb_descreader r; upb_descreader_init(&r); - upb_decoder_resetplan(&d, p, 0); + upb_decoder_resetplan(&d, p); upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r); upb_success_t ret = upb_decoder_decode(&d); diff --git a/upb/pb/glue.h b/upb/pb/glue.h index 6179d8d..4bbc975 100644 --- a/upb/pb/glue.h +++ b/upb/pb/glue.h @@ -27,8 +27,7 @@ #define UPB_GLUE_H #include <stdbool.h> -#include "upb/upb.h" -#include "upb/def.h" +#include "upb/symtab.h" #ifdef __cplusplus extern "C" { @@ -55,6 +54,29 @@ char *upb_readfile(const char *filename, size_t *len); #ifdef __cplusplus } /* extern "C" */ + +namespace upb { + +// All routines that load descriptors expect the descriptor to be a +// FileDescriptorSet. +inline bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname, + Status* status) { + return upb_load_descriptor_file_into_symtab(s, fname, status); +} + +inline bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str, + size_t len, Status* status) { + return upb_load_descriptor_into_symtab(s, str, len, status); +} + +// Templated so it can accept both string and std::string. +template <typename T> +bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) { + return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status); +} + +} // namespace upb + #endif #endif diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c index 2fe3452..3770afc 100644 --- a/upb/pb/textprinter.c +++ b/upb/pb/textprinter.c @@ -5,11 +5,14 @@ * Author: Josh Haberman <jhaberman@gmail.com> */ +#include "upb/pb/textprinter.h" + #include <ctype.h> #include <float.h> #include <inttypes.h> +#include <stdio.h> #include <stdlib.h> -#include "upb/pb/textprinter.h" +#include <string.h> struct _upb_textprinter { upb_bytesink *sink; @@ -20,7 +23,7 @@ struct _upb_textprinter { #define CHECK(x) if ((x) < 0) goto err; -static int upb_textprinter_indent(upb_textprinter *p) { +static int indent(upb_textprinter *p) { if (!p->single_line) CHECK(upb_bytesink_putrepeated(p->sink, ' ', p->indent_depth*2)); return 0; @@ -28,37 +31,32 @@ err: return -1; } -static int upb_textprinter_endfield(upb_textprinter *p) { +static int endfield(upb_textprinter *p) { CHECK(upb_bytesink_putc(p->sink, p->single_line ? ' ' : '\n')); return 0; err: return -1; } -static int upb_textprinter_putescaped(upb_textprinter *p, - const upb_byteregion *bytes, - bool preserve_utf8) { +static int putescaped(upb_textprinter *p, const char *buf, size_t len, + bool preserve_utf8) { // Based on CEscapeInternal() from Google's protobuf release. - // TODO; we could read directly from a bytesrc's buffer instead. - // TODO; we could write byteregions to the sink when possible. - char dstbuf[512], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); - char *buf = malloc(upb_byteregion_len(bytes)), *src = buf; - char *end = src + upb_byteregion_len(bytes); - upb_byteregion_copyall(bytes, buf); + char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); + const char *end = buf + len; // I think hex is prettier and more useful, but proto2 uses octal; should // investigate whether it can parse hex also. const bool use_hex = false; bool last_hex_escape = false; // true if last output char was \xNN - for (; src < end; src++) { + for (; buf < end; buf++) { if (dstend - dst < 4) { CHECK(upb_bytesink_write(p->sink, dstbuf, dst - dstbuf)); dst = dstbuf; } bool is_hex_escape = false; - switch (*src) { + switch (*buf) { case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break; case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break; case '\t': *(dst++) = '\\'; *(dst++) = 't'; break; @@ -66,123 +64,123 @@ static int upb_textprinter_putescaped(upb_textprinter *p, case '\'': *(dst++) = '\\'; *(dst++) = '\''; break; case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break; default: - // Note that if we emit \xNN and the src character after that is a hex + // Note that if we emit \xNN and the buf character after that is a hex // digit then that digit must be escaped too to prevent it being // interpreted as part of the character code by C. - if ((!preserve_utf8 || (uint8_t)*src < 0x80) && - (!isprint(*src) || (last_hex_escape && isxdigit(*src)))) { - sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*src); + if ((!preserve_utf8 || (uint8_t)*buf < 0x80) && + (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) { + sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf); is_hex_escape = use_hex; dst += 4; } else { - *(dst++) = *src; break; + *(dst++) = *buf; break; } } last_hex_escape = is_hex_escape; } // Flush remaining data. CHECK(upb_bytesink_write(p->sink, dst, dst - dstbuf)); - free(buf); return 0; err: - free(buf); return -1; } -#define TYPE(member, fmt) \ - static upb_flow_t upb_textprinter_put ## member(void *_p, upb_value fval, \ - upb_value val) { \ +#define TYPE(name, ctype, fmt) \ + static bool put ## name(void *_p, void *fval, ctype val) { \ upb_textprinter *p = _p; \ - const upb_fielddef *f = upb_value_getfielddef(fval); \ - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); \ - CHECK(upb_textprinter_indent(p)); \ + const upb_fielddef *f = fval; \ + CHECK(indent(p)); \ CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f))); \ CHECK(upb_bytesink_writestr(p->sink, ": ")); \ - CHECK(upb_bytesink_printf(p->sink, fmt, upb_value_get ## member(val))); \ - CHECK(upb_textprinter_endfield(p)); \ - return UPB_CONTINUE; \ + CHECK(upb_bytesink_printf(p->sink, fmt, val)); \ + CHECK(endfield(p)); \ + return true; \ err: \ - upb_bytesink_rewind(p->sink, start_ofs); \ - return UPB_BREAK; \ + return false; \ } #define STRINGIFY_HELPER(x) #x #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x) -TYPE(double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") -TYPE(float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") -TYPE(int64, "%" PRId64) -TYPE(uint64, "%" PRIu64) -TYPE(int32, "%" PRId32) -TYPE(uint32, "%" PRIu32); -TYPE(bool, "%hhu"); +TYPE(int32, int32_t, "%" PRId32) +TYPE(int64, int64_t, "%" PRId64) +TYPE(uint32, uint32_t, "%" PRIu32); +TYPE(uint64, uint64_t, "%" PRIu64) +TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") +TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") +TYPE(bool, bool, "%hhu"); // Output a symbolic value from the enum if found, else just print as int32. -static upb_flow_t upb_textprinter_putenum(void *_p, upb_value fval, - upb_value val) { +static bool putenum(void *_p, void *fval, int32_t val) { upb_textprinter *p = _p; - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); - const upb_fielddef *f = upb_value_getfielddef(fval); - const upb_enumdef *enum_def = - upb_downcast_enumdef_const(upb_fielddef_subdef(f)); - const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val)); + const upb_fielddef *f = fval; + const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f)); + const char *label = upb_enumdef_iton(enum_def, val); if (label) { CHECK(upb_bytesink_writestr(p->sink, label)); } else { - CHECK(upb_textprinter_putint32(_p, fval, val)); + CHECK(putint32(_p, fval, val)); } - return UPB_CONTINUE; + return true; err: - upb_bytesink_rewind(p->sink, start_ofs); - return UPB_BREAK; + return false; } -static upb_flow_t upb_textprinter_putstr(void *_p, upb_value fval, - upb_value val) { +static void *startstr(void *_p, void *fval, size_t size_hint) { + UPB_UNUSED(size_hint); + UPB_UNUSED(fval); upb_textprinter *p = _p; - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); - const upb_fielddef *f = upb_value_getfielddef(fval); - CHECK(upb_bytesink_putc(p->sink, '"')); - CHECK(upb_textprinter_putescaped(p, upb_value_getbyteregion(val), - f->type == UPB_TYPE(STRING))); CHECK(upb_bytesink_putc(p->sink, '"')); - return UPB_CONTINUE; + return p; err: - upb_bytesink_rewind(p->sink, start_ofs); return UPB_BREAK; } -static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) { +static bool endstr(void *_p, void *fval) { + UPB_UNUSED(fval); + upb_textprinter *p = _p; + CHECK(upb_bytesink_putc(p->sink, '"')); + return true; +err: + return false; +} + +static size_t putstr(void *_p, void *fval, const char *buf, size_t len) { upb_textprinter *p = _p; - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); - const upb_fielddef *f = upb_value_getfielddef(fval); - CHECK(upb_textprinter_indent(p)); + const upb_fielddef *f = fval; + CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE(STRING))); + return len; +err: + return 0; +} + +static void *startsubmsg(void *_p, void *fval) { + upb_textprinter *p = _p; + const upb_fielddef *f = fval; + CHECK(indent(p)); CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f))); if (!p->single_line) CHECK(upb_bytesink_putc(p->sink, '\n')); p->indent_depth++; - return UPB_CONTINUE_WITH(_p); + return _p; err: - upb_bytesink_rewind(p->sink, start_ofs); - return UPB_SBREAK; + return UPB_BREAK; } -static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) { - (void)fval; +static bool endsubmsg(void *_p, void *fval) { + UPB_UNUSED(fval); upb_textprinter *p = _p; - uint64_t start_ofs = upb_bytesink_getoffset(p->sink); p->indent_depth--; - CHECK(upb_textprinter_indent(p)); + CHECK(indent(p)); CHECK(upb_bytesink_putc(p->sink, '}')); - CHECK(upb_textprinter_endfield(p)); - return UPB_CONTINUE; + CHECK(endfield(p)); + return true; err: - upb_bytesink_rewind(p->sink, start_ofs); - return UPB_BREAK; + return false; } -upb_textprinter *upb_textprinter_new(void) { +upb_textprinter *upb_textprinter_new() { upb_textprinter *p = malloc(sizeof(*p)); return p; } @@ -196,22 +194,61 @@ void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, p->indent_depth = 0; } -static void upb_textprinter_onfreg(void *c, upb_fhandlers *fh, const upb_fielddef *f) { +static void onmreg(void *c, upb_handlers *h) { (void)c; - upb_fhandlers_setstartsubmsg(fh, &upb_textprinter_startsubmsg); - upb_fhandlers_setendsubmsg(fh, &upb_textprinter_endsubmsg); -#define F(type) &upb_textprinter_put ## type - static upb_value_handler *fptrs[] = {NULL, F(double), F(float), F(int64), - F(uint64), F(int32), F(uint64), F(uint32), F(bool), F(str), - NULL, NULL, F(str), F(uint32), F(enum), F(int32), - F(int64), F(int32), F(int64)}; - upb_fhandlers_setvalue(fh, fptrs[f->type]); - upb_value fval; - upb_value_setfielddef(&fval, f); - upb_fhandlers_setfval(fh, fval); + const upb_msgdef *m = upb_handlers_msgdef(h); + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_SINT32: + case UPB_TYPE_SFIXED32: + upb_handlers_setint32(h, f, putint32, f, NULL); + break; + case UPB_TYPE_SINT64: + case UPB_TYPE_SFIXED64: + case UPB_TYPE_INT64: + upb_handlers_setint64(h, f, putint64, f, NULL); + break; + case UPB_TYPE_UINT32: + case UPB_TYPE_FIXED32: + upb_handlers_setuint32(h, f, putuint32, f, NULL); + break; + case UPB_TYPE_UINT64: + case UPB_TYPE_FIXED64: + upb_handlers_setuint64(h, f, putuint64, f, NULL); + break; + case UPB_TYPE_FLOAT: + upb_handlers_setfloat(h, f, putfloat, f, NULL); + break; + case UPB_TYPE_DOUBLE: + upb_handlers_setdouble(h, f, putdouble, f, NULL); + break; + case UPB_TYPE_BOOL: + upb_handlers_setbool(h, f, putbool, f, NULL); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + upb_handlers_setstartstr(h, f, startstr, f, NULL); + upb_handlers_setstring(h, f, putstr, f, NULL); + upb_handlers_setendstr(h, f, endstr, f, NULL); + break; + case UPB_TYPE_GROUP: + case UPB_TYPE_MESSAGE: + upb_handlers_setstartsubmsg(h, f, &startsubmsg, f, NULL); + upb_handlers_setendsubmsg(h, f, &endsubmsg, f, NULL); + break; + case UPB_TYPE_ENUM: + upb_handlers_setint32(h, f, putenum, f, NULL); + default: + assert(false); + break; + } + } } -upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, const upb_msgdef *m) { - return upb_handlers_regmsgdef( - h, m, NULL, &upb_textprinter_onfreg, NULL); +const upb_handlers *upb_textprinter_newhandlers(const void *owner, + const upb_msgdef *m) { + return upb_handlers_newfrozen(m, owner, &onmreg, NULL); } diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h index 174148e..6d111d2 100644 --- a/upb/pb/textprinter.h +++ b/upb/pb/textprinter.h @@ -18,11 +18,12 @@ extern "C" { struct _upb_textprinter; typedef struct _upb_textprinter upb_textprinter; -upb_textprinter *upb_textprinter_new(void); +upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, bool single_line); -upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, const upb_msgdef *m); +const upb_handlers *upb_textprinter_newhandlers(const void *owner, + const upb_msgdef *m); #ifdef __cplusplus } /* extern "C" */ diff --git a/upb/pb/varint.c b/upb/pb/varint.c index 45caec1..d6d6161 100644 --- a/upb/pb/varint.c +++ b/upb/pb/varint.c @@ -7,16 +7,64 @@ #include "upb/pb/varint.h" +// A basic branch-based decoder, uses 32-bit values to get good performance +// on 32-bit architectures (but performs well on 64-bits also). +// This scheme comes from the original Google Protobuf implementation (proto2). +upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) { + upb_decoderet err = {NULL, 0}; + const char *p = r.p; + uint32_t low = (uint32_t)r.val; + uint32_t high = 0; + uint32_t b; + b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7fU) << 28; + high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done; + return err; + +done: + r.val = ((uint64_t)high << 32) | low; + r.p = p; + return r; +} + +// Like the previous, but uses 64-bit values. +upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) { + const char *p = r.p; + uint64_t val = r.val; + uint64_t b; + upb_decoderet err = {NULL, 0}; + b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done; + return err; + +done: + r.val = val; + r.p = p; + return r; +} + // Given an encoded varint v, returns an integer with a single bit set that // indicates the end of the varint. Subtracting one from this value will // yield a mask that leaves only bits that are part of the varint. Returns // 0 if the varint is unterminated. -INLINE uint64_t upb_get_vstopbit(uint64_t v) { +static uint64_t upb_get_vstopbit(uint64_t v) { uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL; return ~cbits & (cbits+1); } -INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; } +// A branchless decoder. Credit to Pascal Massimino for the bit-twiddling. upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) { uint64_t b; memcpy(&b, r.p, sizeof(b)); @@ -35,14 +83,15 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) { return my_r; } +// A branchless decoder. Credit to Daniel Wright for the bit-twiddling. upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { uint64_t b; memcpy(&b, r.p, sizeof(b)); uint64_t stop_bit = upb_get_vstopbit(b); b &= (stop_bit - 1); - b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f); - b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff); - b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff); + b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL); + b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL); + b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL); if (stop_bit == 0) { // Error: unterminated varint. upb_decoderet err_r = {(void*)0, 0}; diff --git a/upb/pb/varint.h b/upb/pb/varint.h index c0e0134..c4d67ba 100644 --- a/upb/pb/varint.h +++ b/upb/pb/varint.h @@ -49,71 +49,32 @@ typedef struct { uint64_t val; } upb_decoderet; -// A basic branch-based decoder, uses 32-bit values to get good performance -// on 32-bit architectures (but performs well on 64-bits also). -INLINE upb_decoderet upb_vdecode_branch32(const char *p) { - upb_decoderet r = {NULL, 0}; - uint32_t low, high = 0; - uint32_t b; - b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done; - return r; - -done: - r.val = ((uint64_t)high << 32) | low; - r.p = p; - return r; -} - -// Like the previous, but uses 64-bit values. -INLINE upb_decoderet upb_vdecode_branch64(const char *p) { - uint64_t val; - uint64_t b; - upb_decoderet r = {NULL, 0}; - b = *(p++); val = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done; - return r; - -done: - r.val = val; - r.p = p; - return r; -} - -// Decodes a varint of at most 8 bytes without branching (except for error). +// Four functions for decoding a varint of at most eight bytes. They are all +// functionally identical, but are implemented in different ways and likely have +// different performance profiles. We keep them around for performance testing. +// +// Note that these functions may not read byte-by-byte, so they must not be used +// unless there are at least eight bytes left in the buffer! +upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r); +upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r); upb_decoderet upb_vdecode_max8_wright(upb_decoderet r); - -// Another implementation of the previous. upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r); // Template for a function that checks the first two bytes with branching -// and dispatches 2-10 bytes with a separate function. -#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ -INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ - uint8_t *p = (uint8_t*)_p; \ - if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \ - upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)}; \ - if ((*(p + 1) & 0x80) == 0) return r; \ - return decode_max8_function(r); \ +// and dispatches 2-10 bytes with a separate function. Note that this may read +// up to 10 bytes, so it must not be used unless there are at least ten bytes +// left in the buffer! +#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ +INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ + uint8_t *p = (uint8_t*)_p; \ + if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; } \ + upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)}; \ + if ((*(p + 1) & 0x80) == 0) return r; \ + return decode_max8_function(r); \ } +UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32); +UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64); UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright); UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); #undef UPB_VARINT_DECODER_CHECK2 @@ -121,11 +82,10 @@ UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); // Our canonical functions for decoding varints, based on the currently // favored best-performing implementations. INLINE upb_decoderet upb_vdecode_fast(const char *p) { - // Use nobranch2 on 64-bit, branch32 on 32-bit. if (sizeof(long) == 8) return upb_vdecode_check2_massimino(p); else - return upb_vdecode_branch32(p); + return upb_vdecode_check2_branch32(p); } INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { @@ -154,9 +114,9 @@ INLINE size_t upb_vencode64(uint64_t val, char *buf) { if (val == 0) { buf[0] = 0; return 1; } size_t i = 0; while (val) { - uint8_t byte = val & 0x7f; + uint8_t byte = val & 0x7fU; val >>= 7; - if (val) byte |= 0x80; + if (val) byte |= 0x80U; buf[i++] = byte; } return i; @@ -169,7 +129,7 @@ INLINE uint64_t upb_vencode32(uint32_t val) { uint64_t ret = 0; assert(bytes <= 5); memcpy(&ret, buf, bytes); - assert(ret <= 0xffffffffff); + assert(ret <= 0xffffffffffU); return ret; } diff --git a/upb/refcount.c b/upb/refcount.c deleted file mode 100644 index d729a2a..0000000 --- a/upb/refcount.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2012 Google Inc. See LICENSE for details. - * Author: Josh Haberman <jhaberman@gmail.com> - */ - -#include <stdlib.h> -#include "upb/refcount.h" - -// TODO(haberman): require client to define these if ref debugging is on. -#ifndef UPB_LOCK -#define UPB_LOCK -#endif - -#ifndef UPB_UNLOCK -#define UPB_UNLOCK -#endif - -/* arch-specific atomic primitives *******************************************/ - -#ifdef UPB_THREAD_UNSAFE ////////////////////////////////////////////////////// - -INLINE void upb_atomic_inc(uint32_t *a) { (*a)++; } -INLINE bool upb_atomic_dec(uint32_t *a) { return --(*a) == 0; } - -#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 /////////////////// - -INLINE void upb_atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); } -INLINE bool upb_atomic_dec(uint32_t *a) { - return __sync_sub_and_fetch(a, 1) == 0; -} - -#elif defined(WIN32) /////////////////////////////////////////////////////////// - -#include <Windows.h> - -INLINE void upb_atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); } -INLINE bool upb_atomic_dec(upb_atomic_t *a) { - return InterlockedDecrement(&a->val) == 0; -} - -#else -#error Atomic primitives not defined for your platform/CPU. \ - Implement them or compile with UPB_THREAD_UNSAFE. -#endif - -// Reserved index values. -#define UPB_INDEX_UNDEFINED UINT16_MAX -#define UPB_INDEX_NOT_IN_STACK (UINT16_MAX - 1) - -static void upb_refcount_merge(upb_refcount *r, upb_refcount *from) { - if (upb_refcount_merged(r, from)) return; - *r->count += *from->count; - free(from->count); - upb_refcount *base = from; - - // Set all refcount pointers in the "from" chain to the merged refcount. - do { from->count = r->count; } while ((from = from->next) != base); - - // Merge the two circularly linked lists by swapping their next pointers. - upb_refcount *tmp = r->next; - r->next = base->next; - base->next = tmp; -} - -// Tarjan's algorithm, see: -// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm - -typedef struct { - int index; - upb_refcount **stack; - int stack_len; - upb_getsuccessors *func; -} upb_tarjan_state; - -static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state); - -void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *_state) { - upb_tarjan_state *state = _state; - if (subobj->index == UPB_INDEX_UNDEFINED) { - // Subdef has not yet been visited; recurse on it. - upb_refcount_dofindscc(subobj, state); - obj->lowlink = UPB_MIN(obj->lowlink, subobj->lowlink); - } else if (subobj->index != UPB_INDEX_NOT_IN_STACK) { - // Subdef is in the stack and hence in the current SCC. - obj->lowlink = UPB_MIN(obj->lowlink, subobj->index); - } -} - -static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state) { - obj->index = state->index; - obj->lowlink = state->index; - state->index++; - state->stack[state->stack_len++] = obj; - - state->func(obj, state); // Visit successors. - - if (obj->lowlink == obj->index) { - upb_refcount *scc_obj; - while ((scc_obj = state->stack[--state->stack_len]) != obj) { - upb_refcount_merge(obj, scc_obj); - scc_obj->index = UPB_INDEX_NOT_IN_STACK; - } - obj->index = UPB_INDEX_NOT_IN_STACK; - } -} - -bool upb_refcount_findscc(upb_refcount **refs, int n, upb_getsuccessors *func) { - // TODO(haberman): allocate less memory. We can't use n as a bound because - // it doesn't include fielddefs. Could either use a dynamically-resizing - // array or think of some other way. - upb_tarjan_state state = {0, malloc(UINT16_MAX * sizeof(void*)), 0, func}; - if (state.stack == NULL) return false; - for (int i = 0; i < n; i++) - if (refs[i]->index == UPB_INDEX_UNDEFINED) - upb_refcount_dofindscc(refs[i], &state); - free(state.stack); - return true; -} - -#ifdef UPB_DEBUG_REFS -static void upb_refcount_track(const upb_refcount *r, const void *owner) { - // Caller must not already own a ref. - assert(upb_inttable_lookup(r->refs, (uintptr_t)owner) == NULL); - - // If a ref is leaked we want to blame the leak on the whoever leaked the - // ref, not on who originally allocated the refcounted object. We accomplish - // this as follows. When a ref is taken in DEBUG_REFS mode, we malloc() some - // memory and arrange setup pointers like so: - // - // upb_refcount - // +----------+ +---------+ - // | count |<-+ | - // +----------+ +----------+ - // | table |---X-->| malloc'd | - // +----------+ | memory | - // +----------+ - // - // Since the "malloc'd memory" is allocated inside of "ref" and free'd in - // unref, it will cause a leak if not unref'd. And since the leaked memory - // points to the object itself, the object will be considered "indirectly - // lost" by tools like Valgrind and not shown unless requested (which is good - // because the object's creator may not be responsible for the leak). But we - // have to hide the pointer marked "X" above from Valgrind, otherwise the - // malloc'd memory will appear to be indirectly leaked and the object itself - // will still be considered the primary leak. We hide this pointer from - // Valgrind (et all) by doing a bitwise not on it. - const upb_refcount **target = malloc(sizeof(void*)); - uintptr_t obfuscated = ~(uintptr_t)target; - *target = r; - upb_inttable_insert(r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated)); -} - -static void upb_refcount_untrack(const upb_refcount *r, const void *owner) { - upb_value v; - bool success = upb_inttable_remove(r->refs, (uintptr_t)owner, &v); - assert(success); - if (success) { - // Must un-obfuscate the pointer (see above). - free((void*)(~upb_value_getuint64(v))); - } -} -#endif - - -/* upb_refcount **************************************************************/ - -bool upb_refcount_init(upb_refcount *r, const void *owner) { - (void)owner; - r->count = malloc(sizeof(uint32_t)); - if (!r->count) return false; - // Initializing this here means upb_refcount_findscc() can only run once for - // each refcount; may need to revise this to be more flexible. - r->index = UPB_INDEX_UNDEFINED; - r->next = r; -#ifdef UPB_DEBUG_REFS - // We don't detect malloc() failures for UPB_DEBUG_REFS. - r->refs = malloc(sizeof(*r->refs)); - upb_inttable_init(r->refs); - *r->count = 0; - upb_refcount_ref(r, owner); -#else - *r->count = 1; -#endif - return true; -} - -void upb_refcount_uninit(upb_refcount *r) { - (void)r; -#ifdef UPB_DEBUG_REFS - assert(upb_inttable_count(r->refs) == 0); - upb_inttable_uninit(r->refs); - free(r->refs); -#endif -} - -// Thread-safe operations ////////////////////////////////////////////////////// - -void upb_refcount_ref(const upb_refcount *r, const void *owner) { - (void)owner; - upb_atomic_inc(r->count); -#ifdef UPB_DEBUG_REFS - UPB_LOCK; - upb_refcount_track(r, owner); - UPB_UNLOCK; -#endif -} - -bool upb_refcount_unref(const upb_refcount *r, const void *owner) { - (void)owner; - bool ret = upb_atomic_dec(r->count); -#ifdef UPB_DEBUG_REFS - UPB_LOCK; - upb_refcount_untrack(r, owner); - UPB_UNLOCK; -#endif - if (ret) free(r->count); - return ret; -} - -void upb_refcount_donateref( - const upb_refcount *r, const void *from, const void *to) { - (void)r; (void)from; (void)to; - assert(from != to); -#ifdef UPB_DEBUG_REFS - UPB_LOCK; - upb_refcount_track(r, to); - upb_refcount_untrack(r, from); - UPB_UNLOCK; -#endif -} - -bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2) { - return r->count == r2->count; -} diff --git a/upb/refcount.h b/upb/refcount.h deleted file mode 100644 index 91ad3b8..0000000 --- a/upb/refcount.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Google Inc. See LICENSE for details. - * Author: Josh Haberman <jhaberman@gmail.com> - * - * A thread-safe refcount that can optionally track references for debugging - * purposes. It helps avoid circular references by allowing a - * strongly-connected component in the graph to share a refcount. - * - * This interface is internal to upb. - */ - -#ifndef UPB_REFCOUNT_H_ -#define UPB_REFCOUNT_H_ - -#include <stdbool.h> -#include <stdint.h> -#include "upb/table.h" - -#ifndef NDEBUG -#define UPB_DEBUG_REFS -#endif - -typedef struct _upb_refcount { - uint32_t *count; - struct _upb_refcount *next; // Circularly-linked list of this SCC. - uint16_t index; // For SCC algorithm. - uint16_t lowlink; // For SCC algorithm. -#ifdef UPB_DEBUG_REFS - // Make this a pointer so that we can modify it inside of const methods - // without ugly casts. - upb_inttable *refs; -#endif -} upb_refcount; - -// NON THREAD SAFE operations ////////////////////////////////////////////////// - -// Initializes the refcount with a single ref for the given owner. Returns -// NULL if memory could not be allocated. -bool upb_refcount_init(upb_refcount *r, const void *owner); - -// Uninitializes the refcount. May only be called after unref() returns true. -void upb_refcount_uninit(upb_refcount *r); - -// Finds strongly-connected components among some set of objects and merges all -// refcounts that share a SCC. The given function will be called when the -// algorithm needs to visit children of a particular object; the function -// should call upb_refcount_visit() once for each child obj. -// -// Returns false if memory allocation failed. -typedef void upb_getsuccessors(upb_refcount *obj, void*); -bool upb_refcount_findscc(upb_refcount **objs, int n, upb_getsuccessors *func); -void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *closure); - -// Thread-safe operations ////////////////////////////////////////////////////// - -// Increases the ref count, the new ref is owned by "owner" which must not -// already own a ref. Circular reference chains are not allowed. -void upb_refcount_ref(const upb_refcount *r, const void *owner); - -// Release a ref owned by owner, returns true if that was the last ref. -bool upb_refcount_unref(const upb_refcount *r, const void *owner); - -// Moves an existing ref from ref_donor to new_owner, without changing the -// overall ref count. -void upb_refcount_donateref( - const upb_refcount *r, const void *from, const void *to); - -// Returns true if these two objects share a refcount. -bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2); - -#endif // UPB_REFCOUNT_H_ diff --git a/upb/refcounted.c b/upb/refcounted.c new file mode 100644 index 0000000..54ad735 --- /dev/null +++ b/upb/refcounted.c @@ -0,0 +1,776 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * Our key invariants are: + * 1. reference cycles never span groups + * 2. for ref2(to, from), we increment to's count iff group(from) != group(to) + * + * The previous two are how we avoid leaking cycles. Other important + * invariants are: + * 3. for mutable objects "from" and "to", if there exists a ref2(to, from) + * this implies group(from) == group(to). (In practice, what we implement + * is even stronger; "from" and "to" will share a group if there has *ever* + * been a ref2(to, from), but all that is necessary for correctness is the + * weaker one). + * 4. mutable and immutable objects are never in the same group. + */ + +#include "upb/refcounted.h" + +#include <setjmp.h> +#include <stdlib.h> + +uint32_t static_refcount = 1; + +/* arch-specific atomic primitives *******************************************/ + +#ifdef UPB_THREAD_UNSAFE ////////////////////////////////////////////////////// + +static void atomic_inc(uint32_t *a) { (*a)++; } +static bool atomic_dec(uint32_t *a) { return --(*a) == 0; } + +#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 /////////////////// + +static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); } +static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; } + +#elif defined(WIN32) /////////////////////////////////////////////////////////// + +#include <Windows.h> + +static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); } +static bool atomic_dec(upb_atomic_t *a) { + return InterlockedDecrement(&a->val) == 0; +} + +#else +#error Atomic primitives not defined for your platform/CPU. \ + Implement them or compile with UPB_THREAD_UNSAFE. +#endif + + +/* Reference tracking (debug only) ********************************************/ + +#ifdef UPB_DEBUG_REFS + +#ifdef UPB_THREAD_UNSAFE + +static void upb_lock() {} +static void upb_unlock() {} + +#else + +// User must define functions that lock/unlock a global mutex and link this +// file against them. +void upb_lock(); +void upb_unlock(); + +#endif + +// UPB_DEBUG_REFS mode counts on being able to malloc() memory in some +// code-paths that can normally never fail, like upb_refcounted_ref(). Since +// we have no way to propagage out-of-memory errors back to the user, and since +// these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. +#define CHECK_OOM(predicate) assert(predicate) + +typedef struct { + const upb_refcounted *obj; // Object we are taking a ref on. + int count; // How many refs there are (duplicates only allowed for ref2). + bool is_ref2; +} trackedref; + +trackedref *trackedref_new(const upb_refcounted *obj, bool is_ref2) { + trackedref *ret = malloc(sizeof(*ret)); + CHECK_OOM(ret); + ret->obj = obj; + ret->count = 1; + ret->is_ref2 = is_ref2; + return ret; +} + +// A reversible function for obfuscating a uintptr_t. +// This depends on sizeof(uintptr_t) <= sizeof(uint64_t), so would fail +// on 128-bit machines. +static uintptr_t obfuscate(const void *x) { return ~(uintptr_t)x; } + +static upb_value obfuscate_v(const void *x) { + return upb_value_uint64(obfuscate(x)); +} + +static const void *unobfuscate_v(upb_value x) { + return (void*)~upb_value_getuint64(x); +} + +// +// Stores tracked references according to the following scheme: +// (upb_inttable)reftracks = { +// (void*)owner -> (upb_inttable*) = { +// obfuscate((upb_refcounted*)obj) -> obfuscate((trackedref*)is_ref2) +// } +// } +// +// obfuscate() is a function that hides the link from the heap checker, so +// that it is not followed for the purposes of deciding what has "indirectly +// leaked." Even though we have a pointer to the trackedref*, we want it to +// appear leaked if it is not freed. +// +// This scheme gives us the following desirable properties: +// +// 1. We can easily determine whether an (owner->obj) ref already exists +// and error out if a duplicate ref is taken. +// +// 2. Because the trackedref is allocated with malloc() at the point that +// the ref is taken, that memory will be leaked if the ref is not released. +// Because the malloc'd memory points to the refcounted object, the object +// itself will only be considered "indirectly leaked" by smart memory +// checkers like Valgrind. This will correctly blame the ref leaker +// instead of the innocent code that allocated the object to begin with. +// +// 3. We can easily enumerate all of the ref2 refs for a given owner, which +// allows us to double-check that the object's visit() function is +// correctly implemented. +// +static upb_inttable reftracks = UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR); + +static upb_inttable *trygettab(const void *p) { + const upb_value *v = upb_inttable_lookupptr(&reftracks, p); + return v ? upb_value_getptr(*v) : NULL; +} + +// Gets or creates the tracking table for the given owner. +static upb_inttable *gettab(const void *p) { + upb_inttable *tab = trygettab(p); + if (tab == NULL) { + tab = malloc(sizeof(*tab)); + CHECK_OOM(tab); + upb_inttable_init(tab, UPB_CTYPE_UINT64); + upb_inttable_insertptr(&reftracks, p, upb_value_ptr(tab)); + } + return tab; +} + +static void track(const upb_refcounted *r, const void *owner, bool ref2) { + upb_lock(); + upb_inttable *refs = gettab(owner); + const upb_value *v = upb_inttable_lookup(refs, obfuscate(r)); + if (v) { + trackedref *ref = (trackedref*)unobfuscate_v(*v); + // Since we allow multiple ref2's for the same to/from pair without + // allocating separate memory for each one, we lose the fine-grained + // tracking behavior we get with regular refs. Since ref2s only happen + // inside upb, we'll accept this limitation until/unless there is a really + // difficult upb-internal bug that can't be figured out without it. + assert(ref2); + assert(ref->is_ref2); + ref->count++; + } else { + trackedref *ref = trackedref_new(r, ref2); + bool ok = upb_inttable_insert(refs, obfuscate(r), obfuscate_v(ref)); + CHECK_OOM(ok); + } + upb_unlock(); +} + +static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { + upb_lock(); + upb_inttable *refs = gettab(owner); + const upb_value *v = upb_inttable_lookup(refs, obfuscate(r)); + // This assert will fail if an owner attempts to release a ref it didn't have. + assert(v); + trackedref *ref = (trackedref*)unobfuscate_v(*v); + assert(ref->is_ref2 == ref2); + if (--ref->count == 0) { + free(ref); + upb_inttable_remove(refs, obfuscate(r), NULL); + if (upb_inttable_count(refs) == 0) { + upb_inttable_uninit(refs); + free(refs); + upb_inttable_removeptr(&reftracks, owner, NULL); + } + } + upb_unlock(); +} + +static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { + upb_lock(); + upb_inttable *refs = gettab(owner); + const upb_value *v = upb_inttable_lookup(refs, obfuscate(r)); + assert(v); + trackedref *ref = (trackedref*)unobfuscate_v(*v); + assert(ref->obj == r); + assert(ref->is_ref2 == ref2); + upb_unlock(); +} + +// Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that +// originate from the given owner. +static void getref2s(const upb_refcounted *owner, upb_inttable *tab) { + upb_lock(); + upb_inttable *refs = trygettab(owner); + if (refs) { + upb_inttable_iter i; + upb_inttable_begin(&i, refs); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + trackedref *ref = (trackedref*)unobfuscate_v(upb_inttable_iter_value(&i)); + if (ref->is_ref2) { + upb_value count = upb_value_int32(ref->count); + bool ok = upb_inttable_insertptr(tab, ref->obj, count); + CHECK_OOM(ok); + } + } + } + upb_unlock(); +} + +typedef struct { + upb_inttable ref2; + const upb_refcounted *obj; +} check_state; + +static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj, + void *closure) { + check_state *s = closure; + assert(obj == s->obj); + assert(subobj); + upb_inttable *ref2 = &s->ref2; + upb_value v; + bool removed = upb_inttable_removeptr(ref2, subobj, &v); + // The following assertion will fail if the visit() function visits a subobj + // that it did not have a ref2 on, or visits the same subobj too many times. + assert(removed); + int32_t newcount = upb_value_getint32(v) - 1; + if (newcount > 0) { + upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount)); + } +} + +static void visit(const upb_refcounted *r, upb_refcounted_visit *v, + void *closure) { + // In DEBUG_REFS mode we know what existing ref2 refs there are, so we know + // exactly the set of nodes that visit() should visit. So we verify visit()'s + // correctness here. + check_state state; + state.obj = r; + bool ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32); + CHECK_OOM(ok); + getref2s(r, &state.ref2); + + // This should visit any children in the ref2 table. + if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state); + + // This assertion will fail if the visit() function missed any children. + assert(upb_inttable_count(&state.ref2) == 0); + upb_inttable_uninit(&state.ref2); + if (r->vtbl->visit) r->vtbl->visit(r, v, closure); +} + +#else + +static void track(const upb_refcounted *r, const void *owner, bool ref2) { + UPB_UNUSED(r); + UPB_UNUSED(owner); + UPB_UNUSED(ref2); +} + +static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { + UPB_UNUSED(r); + UPB_UNUSED(owner); + UPB_UNUSED(ref2); +} + +static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { + UPB_UNUSED(r); + UPB_UNUSED(owner); + UPB_UNUSED(ref2); +} + +static void visit(const upb_refcounted *r, upb_refcounted_visit *v, + void *closure) { + if (r->vtbl->visit) r->vtbl->visit(r, v, closure); +} + +#endif // UPB_DEBUG_REFS + + +/* freeze() *******************************************************************/ + +// The freeze() operation is by far the most complicated part of this scheme. +// We compute strongly-connected components and then mutate the graph such that +// we preserve the invariants documented at the top of this file. And we must +// handle out-of-memory errors gracefully (without leaving the graph +// inconsistent), which adds to the fun. + +// The state used by the freeze operation (shared across many functions). +typedef struct { + int depth; + int maxdepth; + uint64_t index; + // Maps upb_refcounted* -> attributes (color, etc). attr layout varies by + // color. + upb_inttable objattr; + upb_inttable stack; // stack of upb_refcounted* for Tarjan's algorithm. + upb_inttable groups; // array of uint32_t*, malloc'd refcounts for new groups + upb_status *status; + jmp_buf err; +} tarjan; + +static void release_ref2(const upb_refcounted *obj, + const upb_refcounted *subobj, + void *closure); + +// Node attributes ///////////////////////////////////////////////////////////// + +// After our analysis phase all nodes will be either GRAY or WHITE. + +typedef enum { + BLACK = 0, // Object has not been seen. + GRAY, // Object has been found via a refgroup but may not be reachable. + GREEN, // Object is reachable and is currently on the Tarjan stack. + WHITE, // Object is reachable and has been assigned a group (SCC). +} color_t; + +UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); } +UPB_NORETURN static void oom(tarjan *t) { + upb_status_seterrliteral(t->status, "out of memory"); + err(t); +} + +uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) { + const upb_value *v = upb_inttable_lookupptr(&t->objattr, r); + return v ? upb_value_getuint64(*v) : 0; +} + +uint64_t getattr(const tarjan *t, const upb_refcounted *r) { + const upb_value *v = upb_inttable_lookupptr(&t->objattr, r); + assert(v); + return upb_value_getuint64(*v); +} + +void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) { + upb_inttable_removeptr(&t->objattr, r, NULL); + upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr)); +} + +static color_t color(tarjan *t, const upb_refcounted *r) { + return trygetattr(t, r) & 0x3; // Color is always stored in the low 2 bits. +} + +static void set_gray(tarjan *t, const upb_refcounted *r) { + assert(color(t, r) == BLACK); + setattr(t, r, GRAY); +} + +// Pushes an obj onto the Tarjan stack and sets it to GREEN. +static void push(tarjan *t, const upb_refcounted *r) { + assert(color(t, r) == BLACK || color(t, r) == GRAY); + // This defines the attr layout for the GREEN state. "index" and "lowlink" + // get 31 bits, which is plenty (limit of 2B objects frozen at a time). + setattr(t, r, GREEN | (t->index << 2) | (t->index << 33)); + if (++t->index == 0x80000000) { + upb_status_seterrliteral(t->status, "too many objects to freeze"); + err(t); + } + upb_inttable_push(&t->stack, upb_value_ptr((void*)r)); +} + +// Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its +// SCC group. +static upb_refcounted *pop(tarjan *t) { + upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack)); + assert(color(t, r) == GREEN); + // This defines the attr layout for nodes in the WHITE state. + // Top of group stack is [group, NULL]; we point at group. + setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8); + return r; +} + +static void newgroup(tarjan *t) { + uint32_t *group = malloc(sizeof(*group)); + if (!group) oom(t); + // Push group and empty group leader (we'll fill in leader later). + if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) || + !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) { + free(group); + oom(t); + } + *group = 0; +} + +static uint32_t idx(tarjan *t, const upb_refcounted *r) { + assert(color(t, r) == GREEN); + return (getattr(t, r) >> 2) & 0x7FFFFFFF; +} + +static uint32_t lowlink(tarjan *t, const upb_refcounted *r) { + if (color(t, r) == GREEN) { + return getattr(t, r) >> 33; + } else { + return UINT32_MAX; + } +} + +static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) { + assert(color(t, r) == GREEN); + setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF)); +} + +uint32_t *group(tarjan *t, upb_refcounted *r) { + assert(color(t, r) == WHITE); + uint64_t groupnum = getattr(t, r) >> 8; + const upb_value *v = upb_inttable_lookup(&t->groups, groupnum); + assert(v); + return upb_value_getptr(*v); +} + +// If the group leader for this object's group has not previously been set, +// the given object is assigned to be its leader. +static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) { + assert(color(t, r) == WHITE); + uint64_t leader_slot = (getattr(t, r) >> 8) + 1; + const upb_value *v = upb_inttable_lookup(&t->groups, leader_slot); + assert(v); + if (upb_value_getptr(*v)) { + return upb_value_getptr(*v); + } else { + upb_inttable_remove(&t->groups, leader_slot, NULL); + upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r)); + return r; + } +} + + +// Tarjan's algorithm ////////////////////////////////////////////////////////// + +// See: +// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm +static void do_tarjan(const upb_refcounted *obj, tarjan *t); + +static void tarjan_visit(const upb_refcounted *obj, + const upb_refcounted *subobj, + void *closure) { + tarjan *t = closure; + if (++t->depth > t->maxdepth) { + upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth); + err(t); + } else if (subobj->is_frozen || color(t, subobj) == WHITE) { + // Do nothing: we don't want to visit or color already-frozen nodes, + // and WHITE nodes have already been assigned a SCC. + } else if (color(t, subobj) < GREEN) { + // Subdef has not yet been visited; recurse on it. + do_tarjan(subobj, t); + set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj))); + } else if (color(t, subobj) == GREEN) { + // Subdef is in the stack and hence in the current SCC. + set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj))); + } + --t->depth; +} + +static void do_tarjan(const upb_refcounted *obj, tarjan *t) { + if (color(t, obj) == BLACK) { + // We haven't seen this object's group; mark the whole group GRAY. + const upb_refcounted *o = obj; + do { set_gray(t, o); } while ((o = o->next) != obj); + } + + push(t, obj); + visit(obj, tarjan_visit, t); + if (lowlink(t, obj) == idx(t, obj)) { + newgroup(t); + while (pop(t) != obj) + ; + } +} + + +// freeze() //////////////////////////////////////////////////////////////////// + +static void crossref(const upb_refcounted *r, const upb_refcounted *subobj, + void *_t) { + tarjan *t = _t; + assert(color(t, r) > BLACK); + if (color(t, subobj) > BLACK && r->group != subobj->group) { + // Previously this ref was not reflected in subobj->group because they + // were in the same group; now that they are split a ref must be taken. + atomic_inc(subobj->group); + } +} + +static bool freeze(upb_refcounted *const*roots, int n, upb_status *s) { + volatile bool ret = false; + + // We run in two passes so that we can allocate all memory before performing + // any mutation of the input -- this allows us to leave the input unchanged + // in the case of memory allocation failure. + tarjan t; + t.index = 0; + t.depth = 0; + t.maxdepth = UPB_MAX_TYPE_DEPTH * 2; // May want to make this a parameter. + t.status = s; + if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1; + if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2; + if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3; + if (setjmp(t.err) != 0) goto err4; + + + for (int i = 0; i < n; i++) { + if (color(&t, roots[i]) < GREEN) { + do_tarjan(roots[i], &t); + } + } + + // If we've made it this far, no further errors are possible so it's safe to + // mutate the objects without risk of leaving them in an inconsistent state. + ret = true; + + // The transformation that follows requires care. The preconditions are: + // - all objects in attr map are WHITE or GRAY, and are in mutable groups + // (groups of all mutable objs) + // - no ref2(to, from) refs have incremented count(to) if both "to" and + // "from" are in our attr map (this follows from invariants (2) and (3)) + + // Pass 1: we remove WHITE objects from their mutable groups, and add them to + // new groups according to the SCC's we computed. These new groups will + // consist of only frozen objects. None will be immediately collectible, + // because WHITE objects are by definition reachable from one of "roots", + // which the caller must own refs on. + upb_inttable_iter i; + upb_inttable_begin(&i, &t.objattr); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i); + // Since removal from a singly-linked list requires access to the object's + // predecessor, we consider obj->next instead of obj for moving. With the + // while() loop we guarantee that we will visit every node's predecessor. + // Proof: + // 1. every node's predecessor is in our attr map. + // 2. though the loop body may change a node's predecessor, it will only + // change it to be the node we are currently operating on, so with a + // while() loop we guarantee ourselves the chance to remove each node. + while (color(&t, obj->next) == WHITE && + group(&t, obj->next) != obj->next->group) { + // Remove from old group. + upb_refcounted *move = obj->next; + if (obj == move) { + // Removing the last object from a group. + assert(*obj->group == obj->individual_count); + free(obj->group); + } else { + obj->next = move->next; + // This may decrease to zero; we'll collect GRAY objects (if any) that + // remain in the group in the third pass. + assert(*move->group >= move->individual_count); + *move->group -= move->individual_count; + } + + // Add to new group. + upb_refcounted *leader = groupleader(&t, move); + if (move == leader) { + // First object added to new group is its leader. + move->group = group(&t, move); + move->next = move; + *move->group = move->individual_count; + } else { + // Group already has at least one object in it. + assert(leader->group == group(&t, move)); + move->group = group(&t, move); + move->next = leader->next; + leader->next = move; + *move->group += move->individual_count; + } + + move->is_frozen = true; + } + } + + // Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must + // increment count(to) if group(obj) != group(to) (which could now be the + // case if "to" was just frozen). + upb_inttable_begin(&i, &t.objattr); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i); + visit(obj, crossref, &t); + } + + // Pass 3: GRAY objects are collected if their group's refcount dropped to + // zero when we removed its white nodes. This can happen if they had only + // been kept alive by virtue of sharing a group with an object that was just + // frozen. + // + // It is important that we do this last, since the GRAY object's free() + // function could call unref2() on just-frozen objects, which will decrement + // refs that were added in pass 2. + upb_inttable_begin(&i, &t.objattr); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i); + if (obj->group == NULL || *obj->group == 0) { + if (obj->group) { + // We eagerly free() the group's count (since we can't easily determine + // the group's remaining size it's the easiest way to ensure it gets + // done). + free(obj->group); + + // Visit to release ref2's (done in a separate pass since release_ref2 + // depends on o->group being unmodified so it can test merged()). + upb_refcounted *o = obj; + do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj); + + // Mark "group" fields as NULL so we know to free the objects later in + // this loop, but also don't try to delete the group twice. + o = obj; + do { o->group = NULL; } while ((o = o->next) != obj); + } + obj->vtbl->free(obj); + } + } + +err4: + if (!ret) { + upb_inttable_begin(&i, &t.groups); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) + free(upb_value_getptr(upb_inttable_iter_value(&i))); + } + upb_inttable_uninit(&t.groups); +err3: + upb_inttable_uninit(&t.stack); +err2: + upb_inttable_uninit(&t.objattr); +err1: + return ret; +} + + +/* Misc internal functions ***************************************************/ + +static bool merged(const upb_refcounted *r, const upb_refcounted *r2) { + return r->group == r2->group; +} + +static void merge(upb_refcounted *r, upb_refcounted *from) { + if (merged(r, from)) return; + *r->group += *from->group; + free(from->group); + upb_refcounted *base = from; + + // Set all refcount pointers in the "from" chain to the merged refcount. + // + // TODO(haberman): this linear algorithm can result in an overall O(n^2) bound + // if the user continuously extends a group by one object. Prevent this by + // using one of the techniques in this paper: + // ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf + do { from->group = r->group; } while ((from = from->next) != base); + + // Merge the two circularly linked lists by swapping their next pointers. + upb_refcounted *tmp = r->next; + r->next = base->next; + base->next = tmp; +} + +static void unref(const upb_refcounted *r); + +static void release_ref2(const upb_refcounted *obj, + const upb_refcounted *subobj, + void *closure) { + UPB_UNUSED(closure); + if (!merged(obj, subobj)) { + assert(subobj->is_frozen); + unref(subobj); + } + untrack(subobj, obj, true); +} + +static void unref(const upb_refcounted *r) { + if (atomic_dec(r->group)) { + free(r->group); + + // In two passes, since release_ref2 needs a guarantee that any subobjs + // are alive. + const upb_refcounted *o = r; + do { visit(o, release_ref2, NULL); } while((o = o->next) != r); + + o = r; + do { + const upb_refcounted *next = o->next; + assert(o->is_frozen || o->individual_count == 0); + o->vtbl->free((upb_refcounted*)o); + o = next; + } while(o != r); + } +} + + +/* Public interface ***********************************************************/ + +bool upb_refcounted_init(upb_refcounted *r, + const struct upb_refcounted_vtbl *vtbl, + const void *owner) { + r->next = r; + r->vtbl = vtbl; + r->individual_count = 0; + r->is_frozen = false; + r->group = malloc(sizeof(*r->group)); + if (!r->group) return false; + *r->group = 0; + upb_refcounted_ref(r, owner); + return true; +} + +bool upb_refcounted_isfrozen(const upb_refcounted *r) { + return r->is_frozen; +} + +void upb_refcounted_ref(const upb_refcounted *r, const void *owner) { + if (!r->is_frozen) + ((upb_refcounted*)r)->individual_count++; + atomic_inc(r->group); + track(r, owner, false); +} + +void upb_refcounted_unref(const upb_refcounted *r, const void *owner) { + if (!r->is_frozen) + ((upb_refcounted*)r)->individual_count--; + unref(r); + untrack(r, owner, false); +} + +void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) { + assert(!from->is_frozen); // Non-const pointer implies this. + if (r->is_frozen) { + atomic_inc(r->group); + } else { + merge((upb_refcounted*)r, from); + } + track(r, from, true); +} + +void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) { + assert(!from->is_frozen); // Non-const pointer implies this. + if (r->is_frozen) { + unref(r); + } else { + assert(merged(r, from)); + } + untrack(r, from, true); +} + +void upb_refcounted_donateref( + const upb_refcounted *r, const void *from, const void *to) { + assert(from != to); + assert(to != NULL); + upb_refcounted_ref(r, to); + if (from != NULL) + upb_refcounted_unref(r, from); +} + +void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) { + checkref(r, owner, false); +} + +bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s) { + for (int i = 0; i < n; i++) { + assert(!roots[i]->is_frozen); + } + return freeze(roots, n, s); +} diff --git a/upb/refcounted.h b/upb/refcounted.h new file mode 100644 index 0000000..19993ca --- /dev/null +++ b/upb/refcounted.h @@ -0,0 +1,180 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * A refcounting scheme that supports circular refs. It accomplishes this by + * partitioning the set of objects into groups such that no cycle spans groups; + * we can then reference-count the group as a whole and ignore refs within the + * group. When objects are mutable, these groups are computed very + * conservatively; we group any objects that have ever had a link between them. + * When objects are frozen, we compute strongly-connected components which + * allows us to be precise and only group objects that are actually cyclic. + * + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. + */ + +#ifndef UPB_REFCOUNTED_H_ +#define UPB_REFCOUNTED_H_ + +#include "upb/table.h" + +// Reference tracking is designed to be used with a tool like Valgrind; when +// enabled, it will cause reference leaks to show up as actual memory leaks +// that are attributed to the code that leaked the ref, *not* the code that +// originally created the object. +#ifndef NDEBUG +#define UPB_DEBUG_REFS +#endif + +struct upb_refcounted_vtbl; + +#ifdef __cplusplus +namespace upb { class RefCounted; } +typedef upb::RefCounted upb_refcounted; +extern "C" { +#else +struct upb_refcounted; +typedef struct upb_refcounted upb_refcounted; +#endif + +#ifdef __cplusplus + +class upb::RefCounted { + public: + // Returns true if the given object is frozen. + bool IsFrozen() const; + + // Increases the ref count, the new ref is owned by "owner" which must not + // already own a ref (and should not itself be a refcounted object if the ref + // could possibly be circular; see below). + // Thread-safe iff "this" is frozen. + void Ref(const void *owner) const; + + // Release a ref that was acquired from upb_refcounted_ref() and collects any + // objects it can. + void Unref(const void *owner) const; + + // Moves an existing ref from "from" to "to", without changing the overall + // ref count. DonateRef(foo, NULL, owner) is the same as Ref(foo, owner), + // but "to" may not be NULL. + void DonateRef(const void *from, const void *to) const; + + // Verifies that a ref to the given object is currently held by the given + // owner. Only effective in UPB_DEBUG_REFS builds. + void CheckRef(const void *owner) const; + + private: + UPB_DISALLOW_POD_OPS(RefCounted); +#else +struct upb_refcounted { +#endif + // A single reference count shared by all objects in the group. + uint32_t *group; + + // A singly-linked list of all objects in the group. + upb_refcounted *next; + + // Table of function pointers for this type. + const struct upb_refcounted_vtbl *vtbl; + + // Maintained only when mutable, this tracks the number of refs (but not + // ref2's) to this object. *group should be the sum of all individual_count + // in the group. + uint32_t individual_count; + + bool is_frozen; +}; + +// Native C API. +bool upb_refcounted_isfrozen(const upb_refcounted *r); +void upb_refcounted_ref(const upb_refcounted *r, const void *owner); +void upb_refcounted_unref(const upb_refcounted *r, const void *owner); +void upb_refcounted_donateref( + const upb_refcounted *r, const void *from, const void *to); +void upb_refcounted_checkref(const upb_refcounted *r, const void *owner); + + +// Internal-to-upb Interface /////////////////////////////////////////////////// + +typedef void upb_refcounted_visit(const upb_refcounted *r, + const upb_refcounted *subobj, + void *closure); + +struct upb_refcounted_vtbl { + // Must visit all subobjects that are currently ref'd via upb_refcounted_ref2. + // Must be longjmp()-safe. + void (*visit)(const upb_refcounted *r, upb_refcounted_visit *visit, void *c); + + // Must free the object and release all references to other objects. + void (*free)(upb_refcounted *r); +}; + +// Initializes the refcounted with a single ref for the given owner. Returns +// false if memory could not be allocated. +bool upb_refcounted_init(upb_refcounted *r, + const struct upb_refcounted_vtbl *vtbl, + const void *owner); + +// Adds a ref from one refcounted object to another ("from" must not already +// own a ref). These refs may be circular; cycles will be collected correctly +// (if conservatively). These refs do not need to be freed in from's free() +// function. +void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from); + +// Removes a ref that was acquired from upb_refcounted_ref2(), and collects any +// object it can. This is only necessary when "from" no longer points to "r", +// and not from from's "free" function. +void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from); + +#define upb_ref2(r, from) \ + upb_refcounted_ref2((const upb_refcounted*)r, (upb_refcounted*)from) +#define upb_unref2(r, from) \ + upb_refcounted_unref2((const upb_refcounted*)r, (upb_refcounted*)from) + +// Freezes all mutable object reachable by ref2() refs from the given roots. +// This will split refcounting groups into precise SCC groups, so that +// refcounting of frozen objects can be more aggressive. If memory allocation +// fails or if more than 2**31 mutable objects are reachable from "roots", +// false is returned and the objects are unchanged. +// +// After this operation succeeds, the objects are frozen/const, and may not be +// used through non-const pointers. In particular, they may not be passed as +// the second parameter of upb_refcounted_{ref,unref}2(). On the upside, all +// operations on frozen refcounteds are threadsafe, and objects will be freed +// at the precise moment that they become unreachable. +// +// Caller must own refs on each object in the "roots" list. +bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s); + +// Shared by all compiled-in refcounted objects. +extern uint32_t static_refcount; + +#define UPB_REFCOUNT_INIT {&static_refcount, NULL, NULL, 0, true} + +#ifdef __cplusplus +} /* extern "C" */ + +// C++ Wrappers. +namespace upb { +inline bool RefCounted::IsFrozen() const { + return upb_refcounted_isfrozen(this); +} +inline void RefCounted::Ref(const void *owner) const { + upb_refcounted_ref(this, owner); +} +inline void RefCounted::Unref(const void *owner) const { + upb_refcounted_unref(this, owner); +} +inline void RefCounted::DonateRef(const void *from, const void *to) const { + upb_refcounted_donateref(this, from, to); +} +inline void RefCounted::CheckRef(const void *owner) const { + upb_refcounted_checkref(this, owner); +} +} // namespace upb +#endif + +#endif // UPB_REFCOUNT_H_ diff --git a/upb/sink.c b/upb/sink.c new file mode 100644 index 0000000..d829fa9 --- /dev/null +++ b/upb/sink.c @@ -0,0 +1,205 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2011-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + */ + +#include "upb/sink.h" + +static bool chkstack(upb_sink *s) { + if (s->top + 1 >= s->limit) { + upb_status_seterrliteral(&s->status, "Nesting too deep."); + return false; + } else { + return true; + } +} + +static upb_selector_t getselector(const upb_fielddef *f, + upb_handlertype_t type) { + upb_selector_t selector; + bool ok = upb_getselector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; +} + +void upb_sink_init(upb_sink *s, const upb_handlers *h) { + s->limit = &s->stack[UPB_MAX_NESTING]; + s->top = NULL; + s->stack[0].h = h; + upb_status_init(&s->status); +} + +void upb_sink_reset(upb_sink *s, void *closure) { + s->top = s->stack; + s->top->closure = closure; +} + +void upb_sink_uninit(upb_sink *s) { + upb_status_uninit(&s->status); +} + +bool upb_sink_startmsg(upb_sink *s) { + const upb_handlers *h = s->top->h; + upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h); + return startmsg ? startmsg(s->top->closure) : true; +} + +void upb_sink_endmsg(upb_sink *s, upb_status *status) { + UPB_UNUSED(status); + assert(s->top == s->stack); + upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top->h); + if (endmsg) endmsg(s->top->closure, &s->status); +} + +#define PUTVAL(type, ctype, htype) \ + bool upb_sink_put ## type(upb_sink *s, const upb_fielddef *f, ctype val) { \ + upb_selector_t selector; \ + if (!upb_getselector(f, UPB_HANDLER_ ## htype, &selector)) return false; \ + upb_ ## type ## _handler *handler = (upb_ ## type ## _handler*) \ + upb_handlers_gethandler(s->top->h, selector); \ + if (handler) { \ + void *data = upb_handlers_gethandlerdata(s->top->h, selector); \ + if (!handler(s->top->closure, data, val)) return false; \ + } \ + return true; \ + } + +PUTVAL(int32, int32_t, INT32); +PUTVAL(int64, int64_t, INT64); +PUTVAL(uint32, uint32_t, UINT32); +PUTVAL(uint64, uint64_t, UINT64); +PUTVAL(float, float, FLOAT); +PUTVAL(double, double, DOUBLE); +PUTVAL(bool, bool, BOOL); +#undef PUTVAL + +size_t upb_sink_putstring(upb_sink *s, const upb_fielddef *f, + const char *buf, size_t n) { + upb_selector_t selector; + if (!upb_getselector(f, UPB_HANDLER_STRING, &selector)) return false; + upb_string_handler *handler = (upb_string_handler*) + upb_handlers_gethandler(s->top->h, selector); + if (handler) { + void *data = upb_handlers_gethandlerdata(s->top->h, selector); \ + return handler(s->top->closure, data, buf, n); + } + return n; +} + +bool upb_sink_startseq(upb_sink *s, const upb_fielddef *f) { + assert(upb_fielddef_isseq(f)); + if (!chkstack(s)) return false; + + void *subc = s->top->closure; + const upb_handlers *h = s->top->h; + upb_selector_t selector; + if (!upb_getselector(f, UPB_HANDLER_STARTSEQ, &selector)) return false; + upb_startfield_handler *startseq = + (upb_startfield_handler*)upb_handlers_gethandler(h, selector); + if (startseq) { + subc = startseq(s->top->closure, upb_handlers_gethandlerdata(h, selector)); + if (!subc) return false; + } + + ++s->top; + s->top->end = getselector(f, UPB_HANDLER_ENDSEQ); + s->top->h = h; + s->top->closure = subc; + return true; +} + +bool upb_sink_endseq(upb_sink *s, const upb_fielddef *f) { + upb_selector_t selector = s->top->end; + assert(selector == getselector(f, UPB_HANDLER_ENDSEQ)); + --s->top; + + const upb_handlers *h = s->top->h; + upb_endfield_handler *endseq = + (upb_endfield_handler*)upb_handlers_gethandler(h, selector); + return endseq ? + endseq(s->top->closure, upb_handlers_gethandlerdata(h, selector)) : + true; +} + +bool upb_sink_startstr(upb_sink *s, const upb_fielddef *f, size_t size_hint) { + assert(upb_fielddef_isstring(f)); + if (!chkstack(s)) return false; + + void *subc = s->top->closure; + const upb_handlers *h = s->top->h; + upb_selector_t selector; + if (!upb_getselector(f, UPB_HANDLER_STARTSTR, &selector)) return false; + upb_startstr_handler *startstr = + (upb_startstr_handler*)upb_handlers_gethandler(h, selector); + if (startstr) { + subc = startstr( + s->top->closure, upb_handlers_gethandlerdata(h, selector), size_hint); + if (!subc) return false; + } + + ++s->top; + s->top->end = getselector(f, UPB_HANDLER_ENDSTR); + s->top->h = h; + s->top->closure = subc; + return true; +} + +bool upb_sink_endstr(upb_sink *s, const upb_fielddef *f) { + upb_selector_t selector = s->top->end; + assert(selector == getselector(f, UPB_HANDLER_ENDSTR)); + --s->top; + + const upb_handlers *h = s->top->h; + upb_endfield_handler *endstr = + (upb_endfield_handler*)upb_handlers_gethandler(h, selector); + return endstr ? + endstr(s->top->closure, upb_handlers_gethandlerdata(h, selector)) : + true; +} + +bool upb_sink_startsubmsg(upb_sink *s, const upb_fielddef *f) { + assert(upb_fielddef_issubmsg(f)); + if (!chkstack(s)) return false; + + const upb_handlers *h = s->top->h; + upb_selector_t selector; + if (!upb_getselector(f, UPB_HANDLER_STARTSUBMSG, &selector)) return false; + upb_startfield_handler *startsubmsg = + (upb_startfield_handler*)upb_handlers_gethandler(h, selector); + void *subc = s->top->closure; + + if (startsubmsg) { + void *data = upb_handlers_gethandlerdata(h, selector); + subc = startsubmsg(s->top->closure, data); + if (!subc) return false; + } + + ++s->top; + s->top->end = getselector(f, UPB_HANDLER_ENDSUBMSG); + s->top->h = upb_handlers_getsubhandlers(h, f); + s->top->closure = subc; + upb_sink_startmsg(s); + return true; +} + +bool upb_sink_endsubmsg(upb_sink *s, const upb_fielddef *f) { + upb_selector_t selector = s->top->end; + assert(selector == getselector(f, UPB_HANDLER_ENDSUBMSG)); + + upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top->h); + if (endmsg) endmsg(s->top->closure, &s->status); + --s->top; + + const upb_handlers *h = s->top->h; + upb_endfield_handler *endfield = + (upb_endfield_handler*)upb_handlers_gethandler(h, selector); + return endfield ? + endfield(s->top->closure, upb_handlers_gethandlerdata(h, selector)) : + true; +} + +const upb_handlers *upb_sink_tophandlers(upb_sink *s) { + return s->top->h; +} diff --git a/upb/sink.h b/upb/sink.h new file mode 100644 index 0000000..2c0f037 --- /dev/null +++ b/upb/sink.h @@ -0,0 +1,82 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * A upb_sink is an object that binds a upb_handlers object to some runtime + * state. It is the object that can actually receive data via the upb_handlers + * interface. + * + * Unlike upb_def and upb_handlers, upb_sink is never frozen, immutable, or + * thread-safe. You can create as many of them as you want, but each one may + * only be used in a single thread at a time. + * + * If we compare with class-based OOP, a you can think of a upb_def as an + * abstract base class, a upb_handlers as a concrete derived class, and a + * upb_sink as an object (class instance). + */ + +#ifndef UPB_SINK_H +#define UPB_SINK_H + +#include "upb/handlers.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* upb_sink *******************************************************************/ + +typedef struct { + upb_selector_t end; // From the enclosing message (unused at top-level). + const upb_handlers *h; + void *closure; +} upb_sink_frame; + +typedef struct { + upb_sink_frame *top, *limit; + upb_sink_frame stack[UPB_MAX_NESTING]; + upb_status status; +} upb_sink; + +// Caller retains ownership of the handlers object. +void upb_sink_init(upb_sink *s, const upb_handlers *h); + +// Resets the state of the sink so that it is ready to accept new input. +// Any state from previously received data is discarded. "Closure" will be +// used as the top-level closure. +void upb_sink_reset(upb_sink *s, void *closure); + +void upb_sink_uninit(upb_sink *s); + +// Returns the handlers at the top of the stack. +const upb_handlers *upb_sink_tophandlers(upb_sink *s); + +// Functions for pushing data into the sink. +// These return false if processing should stop (either due to error or just +// to suspend). +bool upb_sink_startmsg(upb_sink *s); +void upb_sink_endmsg(upb_sink *s, upb_status *status); +bool upb_sink_putint32(upb_sink *s, const upb_fielddef *f, int32_t val); +bool upb_sink_putint64(upb_sink *s, const upb_fielddef *f, int64_t val); +bool upb_sink_putuint32(upb_sink *s, const upb_fielddef *f, uint32_t val); +bool upb_sink_putuint64(upb_sink *s, const upb_fielddef *f, uint64_t val); +bool upb_sink_putfloat(upb_sink *s, const upb_fielddef *f, float val); +bool upb_sink_putdouble(upb_sink *s, const upb_fielddef *f, double val); +bool upb_sink_putbool(upb_sink *s, const upb_fielddef *f, bool val); +bool upb_sink_startstr(upb_sink *s, const upb_fielddef *f, size_t size_hint); +size_t upb_sink_putstring(upb_sink *s, const upb_fielddef *f, const char *buf, + size_t len); +bool upb_sink_endstr(upb_sink *s, const upb_fielddef *f); +bool upb_sink_startsubmsg(upb_sink *s, const upb_fielddef *f); +bool upb_sink_endsubmsg(upb_sink *s, const upb_fielddef *f); +bool upb_sink_startseq(upb_sink *s, const upb_fielddef *f); +bool upb_sink_endseq(upb_sink *s, const upb_fielddef *f); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/upb/stdc/error.c b/upb/stdc/error.c index 313866c..85c9ca6 100644 --- a/upb/stdc/error.c +++ b/upb/stdc/error.c @@ -9,7 +9,6 @@ #include "upb/stdc/error.h" -#include <errno.h> #include <string.h> void upb_status_fromerrno(upb_status *status, int code) { diff --git a/upb/stdc/io.c b/upb/stdc/io.c index 1abed32..5d36aa5 100644 --- a/upb/stdc/io.c +++ b/upb/stdc/io.c @@ -7,6 +7,9 @@ #include "upb/stdc/io.h" +#include <errno.h> +#include <stdlib.h> +#include <string.h> #include "upb/stdc/error.h" // We can make this configurable if necessary. diff --git a/upb/symtab.c b/upb/symtab.c new file mode 100644 index 0000000..cd82bdd --- /dev/null +++ b/upb/symtab.c @@ -0,0 +1,326 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + */ + +#include "upb/symtab.h" + +#include <stdlib.h> +#include <string.h> + +#include "upb/bytestream.h" + +bool upb_symtab_isfrozen(const upb_symtab *s) { + return upb_refcounted_isfrozen(upb_upcast(s)); +} + +void upb_symtab_ref(const upb_symtab *s, const void *owner) { + upb_refcounted_ref(upb_upcast(s), owner); +} + +void upb_symtab_unref(const upb_symtab *s, const void *owner) { + upb_refcounted_unref(upb_upcast(s), owner); +} + +void upb_symtab_donateref( + const upb_symtab *s, const void *from, const void *to) { + upb_refcounted_donateref(upb_upcast(s), from, to); +} + +void upb_symtab_checkref(const upb_symtab *s, const void *owner) { + upb_refcounted_checkref(upb_upcast(s), owner); +} + +static void upb_symtab_free(upb_refcounted *r) { + upb_symtab *s = (upb_symtab*)r; + upb_strtable_iter i; + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_def_unref(def, s); + } + upb_strtable_uninit(&s->symtab); + free(s); +} + +static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free}; + +upb_symtab *upb_symtab_new(const void *owner) { + upb_symtab *s = malloc(sizeof(*s)); + upb_refcounted_init(upb_upcast(s), &vtbl, owner); + upb_strtable_init(&s->symtab, UPB_CTYPE_PTR); + return s; +} + +const upb_def **upb_symtab_getdefs(const upb_symtab *s, upb_deftype_t type, + const void *owner, int *n) { + int total = upb_strtable_count(&s->symtab); + // We may only use part of this, depending on how many symbols are of the + // correct type. + const upb_def **defs = malloc(sizeof(*defs) * total); + upb_strtable_iter iter; + upb_strtable_begin(&iter, &s->symtab); + int i = 0; + for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); + assert(def); + if(type == UPB_DEF_ANY || def->type == type) + defs[i++] = def; + } + *n = i; + if (owner) + for(i = 0; i < *n; i++) upb_def_ref(defs[i], owner); + return defs; +} + +const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, + const void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *ret = v ? upb_value_getptr(*v) : NULL; + if (ret) upb_def_ref(ret, owner); + return ret; +} + +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym, + const void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *def = v ? upb_value_getptr(*v) : NULL; + upb_msgdef *ret = NULL; + if(def && def->type == UPB_DEF_MSG) { + ret = upb_downcast_msgdef_mutable(def); + upb_def_ref(def, owner); + } + return ret; +} + +// Given a symbol and the base symbol inside which it is defined, find the +// symbol's definition in t. +static upb_def *upb_resolvename(const upb_strtable *t, + const char *base, const char *sym) { + if(strlen(sym) == 0) return NULL; + if(sym[0] == UPB_SYMBOL_SEPARATOR) { + // Symbols starting with '.' are absolute, so we do a single lookup. + // Slice to omit the leading '.' + const upb_value *v = upb_strtable_lookup(t, sym + 1); + return v ? upb_value_getptr(*v) : NULL; + } else { + // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. + (void)base; + assert(false); + return NULL; + } +} + +const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, + const char *sym, const void *owner) { + upb_def *ret = upb_resolvename(&s->symtab, base, sym); + if (ret) upb_def_ref(ret, owner); + return ret; +} + +// Searches def and its children to find defs that have the same name as any +// def in "addtab." Returns true if any where found, and as a side-effect adds +// duplicates of these defs into addtab. +// +// We use a modified depth-first traversal that traverses each SCC (which we +// already computed) as if it were a single node. This allows us to traverse +// the possibly-cyclic graph as if it were a DAG and to dup the correct set of +// nodes with O(n) time. +static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, + const void *new_owner, upb_inttable *seen, + upb_status *s) { + // Memoize results of this function for efficiency (since we're traversing a + // DAG this is not needed to limit the depth of the search). + const upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def); + if (v) return upb_value_getbool(*v); + + // Visit submessages for all messages in the SCC. + bool need_dup = false; + const upb_def *base = def; + do { + assert(upb_def_isfrozen(def)); + if (def->type == UPB_DEF_FIELD) continue; + const upb_value *v = upb_strtable_lookup(addtab, upb_def_fullname(def)); + if (v) { + // Because we memoize we should not visit a node after we have dup'd it. + assert(((upb_def*)upb_value_getptr(*v))->came_from_user); + need_dup = true; + } + const upb_msgdef *m = upb_dyncast_msgdef(def); + if (m) { + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (!upb_fielddef_hassubdef(f)) continue; + // |= to avoid short-circuit; we need its side-effects. + need_dup |= upb_resolve_dfs( + upb_fielddef_subdef(f), addtab, new_owner, seen, s); + if (!upb_ok(s)) return false; + } + } + } while ((def = (upb_def*)def->base.next) != base); + + if (need_dup) { + // Dup any defs that don't already have entries in addtab. + def = base; + do { + if (def->type == UPB_DEF_FIELD) continue; + const char *name = upb_def_fullname(def); + if (upb_strtable_lookup(addtab, name) == NULL) { + upb_def *newdef = upb_def_dup(def, new_owner); + if (!newdef) goto oom; + newdef->came_from_user = false; + if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef))) + goto oom; + } + } while ((def = (upb_def*)def->base.next) != base); + } + + upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup)); + return need_dup; + +oom: + upb_status_seterrliteral(s, "out of memory"); + return false; +} + +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, + upb_status *status) { + upb_def **add_defs = NULL; + upb_strtable addtab; + if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) { + upb_status_seterrliteral(status, "out of memory"); + return false; + } + + // Add new defs to table. + for (int i = 0; i < n; i++) { + upb_def *def = defs[i]; + if (upb_def_isfrozen(def)) { + upb_status_seterrliteral(status, "added defs must be mutable"); + goto err; + } + assert(!upb_def_isfrozen(def)); + const char *fullname = upb_def_fullname(def); + if (!fullname) { + upb_status_seterrliteral( + status, "Anonymous defs cannot be added to a symtab"); + goto err; + } + if (upb_strtable_lookup(&addtab, fullname) != NULL) { + upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); + goto err; + } + // We need this to back out properly, because if there is a failure we need + // to donate the ref back to the caller. + def->came_from_user = true; + upb_def_donateref(def, ref_donor, s); + if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) + goto oom_err; + } + + // Add dups of any existing def that can reach a def with the same name as + // one of "defs." + upb_inttable seen; + if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err; + upb_strtable_iter i; + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_resolve_dfs(def, &addtab, s, &seen, status); + if (!upb_ok(status)) goto err; + } + upb_inttable_uninit(&seen); + + // Now using the table, resolve symbolic references. + upb_strtable_begin(&i, &addtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_msgdef *m = upb_dyncast_msgdef_mutable(def); + if (!m) continue; + // Type names are resolved relative to the message in which they appear. + const char *base = upb_def_fullname(upb_upcast(m)); + + upb_msg_iter j; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + const char *name = upb_fielddef_subdefname(f); + if (name) { + upb_def *subdef = upb_resolvename(&addtab, base, name); + if (subdef == NULL) { + upb_status_seterrf( + status, "couldn't resolve name '%s' in message '%s'", name, base); + goto err; + } else if (!upb_fielddef_setsubdef(f, subdef)) { + upb_status_seterrf( + status, "def '%s' had the wrong type for field '%s'", + upb_def_fullname(subdef), upb_fielddef_name(f)); + goto err; + } + } + + if (!upb_fielddef_resolvedefault(f)) { + upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f)); + size_t len; + const char *ptr = upb_byteregion_getptr(r, 0, &len); + upb_status_seterrf(status, "couldn't resolve enum default '%s'", ptr); + goto err; + } + } + } + + // We need an array of the defs in addtab, for passing to upb_def_freeze. + add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab)); + if (add_defs == NULL) goto oom_err; + upb_strtable_begin(&i, &addtab); + for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i)) { + add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i)); + } + + if (!upb_def_freeze(add_defs, n, status)) goto err; + + // This must be delayed until all errors have been detected, since error + // recovery code uses this table to cleanup defs. + upb_strtable_uninit(&addtab); + + // TODO(haberman) we don't properly handle errors after this point (like + // OOM in upb_strtable_insert() below). + for (int i = 0; i < n; i++) { + upb_def *def = add_defs[i]; + const char *name = upb_def_fullname(def); + upb_value v; + if (upb_strtable_remove(&s->symtab, name, &v)) { + const upb_def *def = upb_value_getptr(v); + upb_def_unref(def, s); + } + bool success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); + UPB_ASSERT_VAR(success, success == true); + } + free(add_defs); + return true; + +oom_err: + upb_status_seterrliteral(status, "out of memory"); +err: { + // For defs the user passed in, we need to donate the refs back. For defs + // we dup'd, we need to just unref them. + upb_strtable_iter i; + upb_strtable_begin(&i, &addtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + if (def->came_from_user) { + upb_def_donateref(def, s, ref_donor); + } else { + upb_def_unref(def, s); + } + def->came_from_user = false; + } + } + upb_strtable_uninit(&addtab); + free(add_defs); + assert(!upb_ok(status)); + return false; +} diff --git a/upb/symtab.h b/upb/symtab.h new file mode 100644 index 0000000..883324a --- /dev/null +++ b/upb/symtab.h @@ -0,0 +1,200 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009-2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman <jhaberman@gmail.com> + * + * A symtab (symbol table) stores a name->def map of upb_defs. Clients could + * always create such tables themselves, but upb_symtab has logic for resolving + * symbolic references, and in particular, for keeping a whole set of consistent + * defs when replacing some subset of those defs. This logic is nontrivial. + * + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. + */ + +#ifndef UPB_SYMTAB_H_ +#define UPB_SYMTAB_H_ + +#ifdef __cplusplus +#include <vector> + +namespace upb { class SymbolTable; } +typedef upb::SymbolTable upb_symtab; +#else +struct upb_symtab; +typedef struct upb_symtab upb_symtab; +#endif + +#include "upb/def.h" + +#ifdef __cplusplus + +class upb::SymbolTable { + public: + // Returns a new symbol table with a single ref owned by "owner." + // Returns NULL if memory allocation failed. + static SymbolTable* New(const void* owner); + + // Though not declared as such in C++, upb::RefCounted is the base of + // SymbolTable and we can upcast to it. + RefCounted* Upcast(); + const RefCounted* Upcast() const; + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void *from, const void *to) const; + void CheckRef(const void *owner) const; + + // Resolves the given symbol using the rules described in descriptor.proto, + // namely: + // + // If the name starts with a '.', it is fully-qualified. Otherwise, + // C++-like scoping rules are used to find the type (i.e. first the nested + // types within this message are searched, then within the parent, on up + // to the root namespace). + // + // If a def is found, the caller owns one ref on the returned def, owned by + // owner. Otherwise returns NULL. + const Def* Resolve(const char* base, const char* sym, + const void* owner) const; + + // Finds an entry in the symbol table with this exact name. If a def is + // found, the caller owns one ref on the returned def, owned by owner. + // Otherwise returns NULL. + const Def* Lookup(const char *sym, const void *owner) const; + const MessageDef* LookupMessage(const char *sym, const void *owner) const; + + // Gets an array of pointers to all currently active defs in this symtab. + // The caller owns the returned array (which is of length *n) as well as a + // ref to each symbol inside (owned by owner). If type is UPB_DEF_ANY then + // defs of all types are returned, otherwise only defs of the required type + // are returned. + const Def** GetDefs(upb_deftype_t type, const void *owner, int *n) const; + + // Adds the given mutable defs to the symtab, resolving all symbols + // (including enum default values) and finalizing the defs. Only one def per + // name may be in the list, but defs can replace existing defs in the symtab. + // All defs must have a name -- anonymous defs are not allowed. Anonymous + // defs can still be frozen by calling upb_def_freeze() directly. + // + // Any existing defs that can reach defs that are being replaced will + // themselves be replaced also, so that the resulting set of defs is fully + // consistent. + // + // This logic implemented in this method is a convenience; ultimately it + // calls some combination of upb_fielddef_setsubdef(), upb_def_dup(), and + // upb_freeze(), any of which the client could call themself. However, since + // the logic for doing so is nontrivial, we provide it here. + // + // The entire operation either succeeds or fails. If the operation fails, + // the symtab is unchanged, false is returned, and status indicates the + // error. The caller passes a ref on all defs to the symtab (even if the + // operation fails). + // + // TODO(haberman): currently failure will leave the symtab unchanged, but may + // leave the defs themselves partially resolved. Does this matter? If so we + // could do a prepass that ensures that all symbols are resolvable and bail + // if not, so we don't mutate anything until we know the operation will + // succeed. + // + // TODO(haberman): since the defs must be mutable, refining a frozen def + // requires making mutable copies of the entire tree. This is wasteful if + // only a few messages are changing. We may want to add a way of adding a + // tree of frozen defs to the symtab (perhaps an alternate constructor where + // you pass the root of the tree?) + bool Add(Def*const* defs, int n, void* ref_donor, upb_status* status); + + bool Add(const std::vector<Def*>& defs, void *owner, Status* status) { + return Add((Def*const*)&defs[0], defs.size(), owner, status); + } + + private: + UPB_DISALLOW_POD_OPS(SymbolTable); + +#else +struct upb_symtab { +#endif + upb_refcounted base; + upb_strtable symtab; +}; + +// Native C API. +#ifdef __cplusplus +extern "C" { +#endif +// From upb_refcounted. +bool upb_symtab_isfrozen(const upb_symtab *s); +void upb_symtab_ref(const upb_symtab *s, const void *owner); +void upb_symtab_unref(const upb_symtab *s, const void *owner); +void upb_symtab_donateref( + const upb_symtab *s, const void *from, const void *to); +void upb_symtab_checkref(const upb_symtab *s, const void *owner); + +upb_symtab *upb_symtab_new(const void *owner); +const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, + const char *sym, const void *owner); +const upb_def *upb_symtab_lookup( + const upb_symtab *s, const char *sym, const void *owner); +const upb_msgdef *upb_symtab_lookupmsg( + const upb_symtab *s, const char *sym, const void *owner); +const upb_def **upb_symtab_getdefs( + const upb_symtab *s, upb_deftype_t type, const void *owner, int *n); +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, + upb_status *status); + +#ifdef __cplusplus +} /* extern "C" */ + +// C++ inline wrappers. +namespace upb { +inline SymbolTable* SymbolTable::New(const void* owner) { + return upb_symtab_new(owner); +} + +inline RefCounted* SymbolTable::Upcast() { return upb_upcast(this); } +inline const RefCounted* SymbolTable::Upcast() const { + return upb_upcast(this); +} +inline bool SymbolTable::IsFrozen() const { + return upb_symtab_isfrozen(this); +} +inline void SymbolTable::Ref(const void *owner) const { + upb_symtab_ref(this, owner); +} +inline void SymbolTable::Unref(const void *owner) const { + upb_symtab_unref(this, owner); +} +inline void SymbolTable::DonateRef(const void *from, const void *to) const { + upb_symtab_donateref(this, from, to); +} +inline void SymbolTable::CheckRef(const void *owner) const { + upb_symtab_checkref(this, owner); +} + +inline const Def* SymbolTable::Resolve( + const char* base, const char* sym, const void* owner) const { + return upb_symtab_resolve(this, base, sym, owner); +} +inline const Def* SymbolTable::Lookup( + const char *sym, const void *owner) const { + return upb_symtab_lookup(this, sym, owner); +} +inline const MessageDef* SymbolTable::LookupMessage( + const char *sym, const void *owner) const { + return upb_symtab_lookupmsg(this, sym, owner); +} +inline const Def** SymbolTable::GetDefs( + upb_deftype_t type, const void *owner, int *n) const { + return upb_symtab_getdefs(this, type, owner, n); +} +inline bool SymbolTable::Add( + Def*const* defs, int n, void* ref_donor, upb_status* status) { + return upb_symtab_add(this, (upb_def*const*)defs, n, ref_donor, status); +} +} // namespace upb +#endif + +#endif /* UPB_SYMTAB_H_ */ diff --git a/upb/table.c b/upb/table.c index 1cf944a..21457a0 100644 --- a/upb/table.c +++ b/upb/table.c @@ -5,14 +5,10 @@ * Author: Josh Haberman <jhaberman@gmail.com> * * Implementation is heavily inspired by Lua's ltable.c. - * - * TODO: for table iteration we use (array - 1) in several places; is this - * undefined behavior? If so find a better solution. */ #include "upb/table.h" -#include <assert.h> #include <stdlib.h> #include <string.h> @@ -35,47 +31,56 @@ int upb_log2(uint64_t v) { return UPB_MIN(UPB_MAXARRSIZE, ret); } +char *upb_strdup(const char *s) { + size_t n = strlen(s) + 1; + char *p = malloc(n); + if (p) memcpy(p, s, n); + return p; +} + static upb_tabkey upb_strkey(const char *str) { upb_tabkey k; k.str = (char*)str; return k; } -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); -typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key); +typedef const upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key); typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2); /* Base table (shared code) ***************************************************/ -static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; } - static bool upb_table_isfull(upb_table *t) { return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD; } -static bool upb_table_init(upb_table *t, uint8_t size_lg2) { +static bool upb_table_init(upb_table *t, upb_ctype_t type, uint8_t size_lg2) { t->count = 0; + t->type = type; t->size_lg2 = size_lg2; + t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0; size_t bytes = upb_table_size(t) * sizeof(upb_tabent); - t->mask = upb_table_size(t) - 1; - t->entries = malloc(bytes); - if (!t->entries) return false; - memset(t->entries, 0, bytes); + if (bytes > 0) { + t->entries = malloc(bytes); + if (!t->entries) return false; + memset((void*)t->entries, 0, bytes); + } else { + t->entries = NULL; + } return true; } -static void upb_table_uninit(upb_table *t) { free(t->entries); } - -static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; } +static void upb_table_uninit(upb_table *t) { free((void*)t->entries); } -static upb_tabent *upb_table_emptyent(const upb_table *t) { - upb_tabent *e = t->entries + upb_table_size(t); +static upb_tabent *upb_table_emptyent(upb_table *t) { + upb_tabent *e = (upb_tabent*)t->entries + upb_table_size(t); while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); } } -static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, - upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { - upb_tabent *e = hash(t, key); +static const upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, + upb_hashfunc_t *hash, + upb_eqlfunc_t *eql) { + if (t->size_lg2 == 0) return NULL; + const upb_tabent *e = hash(t, key); if (upb_tabent_isempty(e)) return NULL; while (1) { if (eql(e->key, key)) return &e->val; @@ -86,14 +91,19 @@ static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, // The given key must not already exist in the table. static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { - (void)eql; assert(upb_table_lookup(t, key, hash, eql) == NULL); + assert(val.type == t->type); t->count++; - upb_tabent *mainpos_e = hash(t, key); + upb_tabent *mainpos_e = (upb_tabent*)hash(t, key); upb_tabent *our_e = mainpos_e; - if (!upb_tabent_isempty(mainpos_e)) { // Collision. + if (upb_tabent_isempty(mainpos_e)) { + // Our main position is empty; use it. + our_e->next = NULL; + } else { + // Collision. upb_tabent *new_e = upb_table_emptyent(t); - upb_tabent *chain = hash(t, mainpos_e->key); // Head of collider's chain. + // Head of collider's chain. + upb_tabent *chain = (upb_tabent*)hash(t, mainpos_e->key); if (chain == mainpos_e) { // Existing ent is in its main posisiton (it has the same hash as us, and // is the head of our chain). Insert to new ent and append to this chain. @@ -105,7 +115,10 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, // chain). This implies that no existing ent in the table has our hash. // Evict it (updating its chain) and use its ent for head of our chain. *new_e = *mainpos_e; // copies next. - while (chain->next != mainpos_e) chain = chain->next; + while (chain->next != mainpos_e) { + chain = (upb_tabent*)chain->next; + assert(chain); + } chain->next = new_e; our_e = mainpos_e; our_e->next = NULL; @@ -117,27 +130,35 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, } static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val, + upb_tabkey *removed, upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { - upb_tabent *chain = hash(t, key); + upb_tabent *chain = (upb_tabent*)hash(t, key); + if (upb_tabent_isempty(chain)) return false; if (eql(chain->key, key)) { + // Element to remove is at the head of its chain. t->count--; if (val) *val = chain->val; if (chain->next) { - upb_tabent *move = chain->next; + upb_tabent *move = (upb_tabent*)chain->next; *chain = *move; + *removed = move->key; move->key.num = 0; // Make the slot empty. } else { + *removed = chain->key; chain->key.num = 0; // Make the slot empty. } return true; } else { + // Element to remove is either in a non-head position or not in the table. while (chain->next && !eql(chain->next->key, key)) - chain = chain->next; + chain = (upb_tabent*)chain->next; if (chain->next) { // Found element to remove. if (val) *val = chain->next->val; - chain->next->key.num = 0; - chain->next = chain->next->next; + upb_tabent *remove = (upb_tabent*)chain->next; + *removed = remove->key; + remove->key.num = 0; + chain->next = remove->next; t->count--; return true; } else { @@ -146,13 +167,16 @@ static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val, } } -static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) { - upb_tabent *end = t->entries + upb_table_size(t); +static const upb_tabent *upb_table_next(const upb_table *t, + const upb_tabent *e) { + const upb_tabent *end = t->entries + upb_table_size(t); do { if (++e == end) return NULL; } while(e->key.num == 0); return e; } -static upb_tabent *upb_table_begin(const upb_table *t) { +// TODO: is calculating t->entries - 1 undefined behavior? If so find a better +// solution. +static const upb_tabent *upb_table_begin(const upb_table *t) { return upb_table_next(t, t->entries - 1); } @@ -161,7 +185,7 @@ static upb_tabent *upb_table_begin(const upb_table *t) { // A simple "subclass" of upb_table that only adds a hash function for strings. -static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) { +static const upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) { // Could avoid the strlen() by using a hash function that terminates on NULL. return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask); } @@ -170,11 +194,13 @@ static bool upb_streql(upb_tabkey k1, upb_tabkey k2) { return strcmp(k1.str, k2.str) == 0; } -bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); } +bool upb_strtable_init(upb_strtable *t, upb_ctype_t type) { + return upb_table_init(&t->t, type, 2); +} void upb_strtable_uninit(upb_strtable *t) { for (size_t i = 0; i < upb_table_size(&t->t); i++) - free(t->t.entries[i].key.str); + free((void*)t->t.entries[i].key.str); upb_table_uninit(&t->t); } @@ -182,7 +208,8 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { if (upb_table_isfull(&t->t)) { // Need to resize. New table of double the size, add old elements to it. upb_strtable new_table; - if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false; + if (!upb_table_init(&new_table.t, t->t.type, t->t.size_lg2 + 1)) + return false; upb_strtable_iter i; upb_strtable_begin(&i, t); for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { @@ -192,15 +219,23 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { upb_strtable_uninit(t); *t = new_table; } - if ((k = strdup(k)) == NULL) return false; + if ((k = upb_strdup(k)) == NULL) return false; upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql); return true; } -upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) { +const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) { return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql); } +bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) { + upb_tabkey removed; + bool found = upb_table_remove( + &t->t, upb_strkey(key), val, &removed, &upb_strhash, &upb_streql); + if (found) free((void*)removed.str); + return found; +} + void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { i->t = t; i->e = upb_table_begin(&t->t); @@ -224,8 +259,9 @@ size_t upb_inttable_count(const upb_inttable *t) { return t->t.count + t->array_count; } -bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) { - if (!upb_table_init(&t->t, hsize_lg2)) return false; +bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t type, + size_t asize, int hsize_lg2) { + if (!upb_table_init(&t->t, type, hsize_lg2)) return false; // Always make the array part at least 1 long, so that we know key 0 // won't be in the hash part, which simplifies things. t->array_size = UPB_MAX(1, asize); @@ -236,17 +272,32 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) { upb_table_uninit(&t->t); return false; } - memset(t->array, 0xff, array_bytes); + memset((void*)t->array, 0xff, array_bytes); return true; } -bool upb_inttable_init(upb_inttable *t) { - return upb_inttable_sizedinit(t, 0, 4); +bool upb_inttable_init(upb_inttable *t, upb_ctype_t type) { + return upb_inttable_sizedinit(t, type, 0, 4); } void upb_inttable_uninit(upb_inttable *t) { upb_table_uninit(&t->t); - free(t->array); + free((void*)t->array); +} + +static void upb_inttable_check(upb_inttable *t) { + UPB_UNUSED(t); +#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG) + // This check is very expensive (makes inserts/deletes O(N)). + size_t count = 0; + upb_inttable_iter i; + upb_inttable_begin(&i, t); + for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) { + const upb_value *v = upb_inttable_lookup(t, upb_inttable_iter_key(&i)); + assert(v); + } + assert(count == upb_inttable_count(t)); +#endif } bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { @@ -254,45 +305,78 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { if (key < t->array_size) { assert(!upb_arrhas(t->array[key])); t->array_count++; - t->array[key] = val; + ((upb_value*)t->array)[key] = val; } else { if (upb_table_isfull(&t->t)) { // Need to resize the hash part, but we re-use the array part. upb_table new_table; - if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false; - upb_tabent *e; + if (!upb_table_init(&new_table, t->t.type, t->t.size_lg2 + 1)) + return false; + const upb_tabent *e; for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e)) upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql); + + assert(t->t.count == new_table.count); + upb_table_uninit(&t->t); t->t = new_table; } upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql); } + upb_inttable_check(t); return true; } -upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) { +const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) { if (key < t->array_size) { - upb_value *v = &t->array[key]; + const upb_value *v = &t->array[key]; return upb_arrhas(*v) ? v : NULL; } return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql); } bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { + bool success; if (key < t->array_size) { if (upb_arrhas(t->array[key])) { t->array_count--; if (val) *val = t->array[key]; - t->array[key] = upb_value_uint64(-1); - return true; + ((upb_value*)t->array)[key] = upb_value_uint64(-1); + success = true; } else { - return false; + success = false; } } else { - return upb_table_remove( - &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql); + upb_tabkey removed; + success = upb_table_remove( + &t->t, upb_intkey(key), val, &removed, &upb_inthash, &upb_inteql); } + upb_inttable_check(t); + return success; +} + +bool upb_inttable_push(upb_inttable *t, upb_value val) { + return upb_inttable_insert(t, upb_inttable_count(t), val); +} + +upb_value upb_inttable_pop(upb_inttable *t) { + upb_value val; + bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val); + UPB_ASSERT_VAR(ok, ok); + return val; +} + +bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) { + return upb_inttable_insert(t, (uintptr_t)key, val); +} + +const upb_value *upb_inttable_lookupptr(const upb_inttable *t, + const void *key) { + return upb_inttable_lookup(t, (uintptr_t)key); +} + +bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { + return upb_inttable_remove(t, (uintptr_t)key, val); } void upb_inttable_compact(upb_inttable *t) { @@ -301,7 +385,10 @@ void upb_inttable_compact(upb_inttable *t) { upb_inttable_iter i; for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) counts[upb_log2(upb_inttable_iter_key(&i))]++; - int count = upb_inttable_count(t); + // Int part must always be at least 1 entry large to catch lookups of key 0. + // Key 0 must always be in the array part because "0" in the hash part + // denotes an empty entry. + int count = UPB_MAX(upb_inttable_count(t), 1); int size; for (size = UPB_MAXARRSIZE; size > 1; size--) { count -= counts[size]; @@ -311,7 +398,8 @@ void upb_inttable_compact(upb_inttable *t) { // Insert all elements into new, perfectly-sized table. upb_inttable new_table; int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD; - upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1); + + upb_inttable_sizedinit(&new_table, t->t.type, size, upb_log2(hashsize)); for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) upb_inttable_insert( &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i)); @@ -352,7 +440,7 @@ void upb_inttable_next(upb_inttable_iter *iter) { // 1. It will not work incrementally. // 2. It will not produce the same results on little-endian and big-endian // machines. -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) { +uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) { // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. const uint32_t m = 0x5bd1e995; @@ -403,7 +491,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) { #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } -static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) { +uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) { const uint32_t m = 0x5bd1e995; const int32_t r = 24; const uint8_t * data = (const uint8_t *)key; diff --git a/upb/table.h b/upb/table.h index f6bff66..80f6813 100644 --- a/upb/table.h +++ b/upb/table.h @@ -17,13 +17,16 @@ * * This header is internal to upb; its interface should not be considered * public or stable. + * + * The table must be homogenous (all values of the same type). We currently + * enforce this on insert but store the full upb_value (with type) anyway. + * This is required with the current interface because lookups vend a pointer + * to the table's internal storage. */ #ifndef UPB_TABLE_H_ #define UPB_TABLE_H_ -#include <stddef.h> -#include <stdint.h> #include "upb.h" #ifdef __cplusplus @@ -32,45 +35,80 @@ extern "C" { typedef union { uintptr_t num; - char *str; // We own, nullz. + const char *str; // We own, nullz. } upb_tabkey; +#define UPB_TABKEY_NUM(n) {n} +#ifdef UPB_C99 +#define UPB_TABKEY_STR(s) {.str = s} +#endif +// TODO(haberman): C++ +#define UPB_TABKEY_NONE {0} + typedef struct _upb_tabent { upb_tabkey key; + // Storing a upb_value here wastes a bit of memory in debug mode because + // we are storing the type for each value even though we enforce that all + // values are the same. But since this only affects debug mode, we don't + // worry too much about it. The same applies to upb_inttable.array below. upb_value val; - struct _upb_tabent *next; // Internal chaining. + // Internal chaining. This is const so we can create static initializers for + // tables. We cast away const sometimes, but *only* when the containing + // upb_table is known to be non-const. This requires a bit of care, but + // the subtlety is confined to table.c. + const struct _upb_tabent *next; } upb_tabent; typedef struct { - upb_tabent *entries; // Hash table. size_t count; // Number of entries in the hash part. size_t mask; // Mask to turn hash value -> bucket. + upb_ctype_t type; // Type of all values. uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries. + const upb_tabent *entries; // Hash table. } upb_table; typedef struct { upb_table t; } upb_strtable; +#define UPB_STRTABLE_INIT(count, mask, type, size_lg2, entries) \ + {{count, mask, type, size_lg2, entries}} + typedef struct { - upb_table t; // For entries that don't fit in the array part. - upb_value *array; // Array part of the table. - size_t array_size; // Array part size. - size_t array_count; // Array part number of elements. + upb_table t; // For entries that don't fit in the array part. + const upb_value *array; // Array part of the table. + size_t array_size; // Array part size. + size_t array_count; // Array part number of elements. } upb_inttable; -INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; } +#define UPB_INTTABLE_INIT(count, mask, type, size_lg2, ent, a, asize, acount) \ + {{count, mask, type, size_lg2, ent}, a, asize, acount} -INLINE upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { - return t->entries + ((uint32_t)key.num & t->mask); +#define UPB_EMPTY_INTTABLE_INIT(type) \ + UPB_INTTABLE_INIT(0, 0, type, 0, NULL, NULL, 0, 0) + +#define UPB_ARRAY_EMPTYENT UPB_VALUE_INIT_INT64(-1) + +INLINE size_t upb_table_size(const upb_table *t) { + if (t->size_lg2 == 0) + return 0; + else + return 1 << t->size_lg2; } +// Internal-only functions, in .h file only out of necessity. +INLINE bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; } +INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; } +INLINE const upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { + return t->entries + ((uint32_t)key.num & t->mask); +} INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; } +uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); // Initialize and uninitialize a table, respectively. If memory allocation // failed, false is returned that the table is uninitialized. -bool upb_inttable_init(upb_inttable *table); -bool upb_strtable_init(upb_strtable *table); +bool upb_inttable_init(upb_inttable *table, upb_ctype_t type); +bool upb_strtable_init(upb_strtable *table, upb_ctype_t type); void upb_inttable_uninit(upb_inttable *table); void upb_strtable_uninit(upb_strtable *table); @@ -90,14 +128,24 @@ bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val); // Looks up key in this table, returning a pointer to the table's internal copy // of the user's inserted data, or NULL if this key is not in the table. The -// user is free to modify the given upb_value, which will be reflected in any -// future lookups of this key. The returned pointer is invalidated by inserts. -upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key); -upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key); +// returned pointer is invalidated by inserts. +const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key); +const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key); // Removes an item from the table. Returns true if the remove was successful, // and stores the removed item in *val if non-NULL. bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); +bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val); + +// Handy routines for treating an inttable like a stack. May not be mixed with +// other insert/remove calls. +bool upb_inttable_push(upb_inttable *t, upb_value val); +upb_value upb_inttable_pop(upb_inttable *t); + +// Convenience routines for inttables with pointer keys. +bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val); +bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val); +const upb_value *upb_inttable_lookupptr(const upb_inttable *t, const void *key); // Optimizes the table for the current set of entries, for both memory use and // lookup time. Client should call this after all entries have been inserted; @@ -105,12 +153,15 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); void upb_inttable_compact(upb_inttable *t); // A special-case inlinable version of the lookup routine for 32-bit integers. -INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) { +INLINE const upb_value *upb_inttable_lookup32(const upb_inttable *t, + uint32_t key) { if (key < t->array_size) { - upb_value *v = &t->array[key]; + const upb_value *v = &t->array[key]; return upb_arrhas(*v) ? v : NULL; } - for (upb_tabent *e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { + const upb_tabent *e; + if (t->t.entries == NULL) return NULL; + for (e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { if ((uint32_t)e->key.num == key) return &e->val; if (e->next == NULL) return NULL; } @@ -124,12 +175,12 @@ INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) { // upb_strtable_begin(&i, t); // for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { // const char *key = upb_strtable_iter_key(&i); -// const myval *val = upb_strtable_iter_value(&i); +// const upb_value val = upb_strtable_iter_value(&i); // // ... // } typedef struct { const upb_strtable *t; - upb_tabent *e; + const upb_tabent *e; } upb_strtable_iter; void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t); @@ -149,13 +200,15 @@ INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) { // upb_inttable_iter i; // upb_inttable_begin(&i, t); // for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { +// uintptr_t key = upb_inttable_iter_key(&i); +// upb_value val = upb_inttable_iter_value(&i); // // ... // } typedef struct { const upb_inttable *t; union { - upb_tabent *ent; // For hash iteration. - upb_value *val; // For array iteration. + const upb_tabent *ent; // For hash iteration. + const upb_value *val; // For array iteration. } ptr; uintptr_t arrkey; bool array_part; @@ -29,24 +29,31 @@ void upb_status_uninit(upb_status *status) { free(status->buf); } -void upb_status_seterrf(upb_status *s, const char *msg, ...) { - s->code = UPB_ERROR; +bool upb_ok(const upb_status *status) { return !status->error; } +bool upb_eof(const upb_status *status) { return status->eof_; } + +void upb_status_seterrf(upb_status *status, const char *msg, ...) { + if (!status) return; + status->error = true; + status->space = NULL; va_list args; va_start(args, msg); - upb_vrprintf(&s->buf, &s->bufsize, 0, msg, args); + upb_vrprintf(&status->buf, &status->bufsize, 0, msg, args); va_end(args); - s->str = s->buf; + status->str = status->buf; } void upb_status_seterrliteral(upb_status *status, const char *msg) { + if (!status) return; status->error = true; status->str = msg; status->space = NULL; } void upb_status_copy(upb_status *to, const upb_status *from) { + if (!to) return; to->error = from->error; - to->eof = from->eof; + to->eof_ = from->eof_; to->code = from->code; to->space = from->space; if (from->str == from->buf) { @@ -78,19 +85,26 @@ const char *upb_status_getstr(const upb_status *_status) { } void upb_status_clear(upb_status *status) { + if (!status) return; status->error = false; - status->eof = false; + status->eof_ = false; status->code = 0; status->space = NULL; status->str = NULL; } void upb_status_setcode(upb_status *status, upb_errorspace *space, int code) { + if (!status) return; status->code = code; status->space = space; status->str = NULL; } +void upb_status_seteof(upb_status *status) { + if (!status) return; + status->eof_ = true; +} + int upb_vrprintf(char **buf, size_t *size, size_t ofs, const char *fmt, va_list args) { // Try once without reallocating. We have to va_copy because we might have @@ -5,6 +5,9 @@ * Author: Josh Haberman <jhaberman@gmail.com> * * This file contains shared definitions that are widely used across upb. + * + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. */ #ifndef UPB_H_ @@ -25,6 +28,28 @@ extern "C" { #define INLINE static inline #endif +#if __STDC_VERSION__ >= 199901L +#define UPB_C99 +#endif + +#if (defined(__cplusplus) && __cplusplus >= 201103L) || defined(__GXX_EXPERIMENTAL_CXX0X__) +#define UPB_CXX11 +#endif + +#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11) +#define UPB_DISALLOW_POD_OPS(class_name) \ + class_name() = delete; \ + ~class_name() = delete; \ + class_name(const class_name&) = delete; \ + void operator=(const class_name&) = delete; +#else +#define UPB_DISALLOW_POD_OPS(class_name) \ + class_name(); \ + ~class_name(); \ + class_name(const class_name&); \ + void operator=(const class_name&); +#endif + #ifdef __GNUC__ #define UPB_NORETURN __attribute__((__noreturn__)) #else @@ -32,12 +57,33 @@ extern "C" { #endif #ifndef UINT16_MAX -#define UINT16_MAX 65535 +#define UINT16_MAX 0xffff +#endif + +#ifndef UINT32_MAX +#define UINT32_MAX 0xffffffff #endif #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) +// For our C-based inheritance, sometimes it's necessary to upcast an object to +// its base class. We try to minimize the need for this by replicating base +// class functions in the derived class -- the derived class functions simply +// forward to the base class implementations. This strategy simplifies the C++ +// API since we can't use real C++ inheritance. +#define upb_upcast(obj) (&(obj)->base) +#define upb_upcast2(obj) upb_upcast(upb_upcast(obj)) + +char *upb_strdup(const char *s); + +#define UPB_UNUSED(var) (void)var + +// For asserting something about a variable when the variable is not used for +// anything else. This prevents "unused variable" warnings when compiling in +// debug mode. +#define UPB_ASSERT_VAR(var, predicate) UPB_UNUSED(var); assert(predicate) + // The maximum that any submessages can be nested. Matches proto2's limit. // At the moment this specifies the size of several statically-sized arrays // and therefore setting it high will cause more memory to be used. Will @@ -45,19 +91,7 @@ extern "C" { // TODO: make this a runtime-settable property of upb_handlers. #define UPB_MAX_NESTING 64 -// The maximum number of fields that any one .proto type can have. Note that -// this is very different than the max field number. It is hard to imagine a -// scenario where more than 2k fields (each with its own name and field number) -// makes sense. The .proto file to describe it would be 2000 lines long and -// contain 2000 unique names. -// -// With this limit we can store a has-bit offset in 8 bits (2**8 * 8 = 2048) -// and we can store a value offset in 16 bits, since the maximum message -// size is 16,640 bytes (2**8 has-bits + 2048 * 8-byte value). Note that -// strings and arrays are not counted in this, only the *pointer* to them is. -// An individual string or array is unaffected by this 16k byte limit. -#define UPB_MAX_FIELDS (2048) - +// Inherent limit of protobuf wire format and schema definition. #define UPB_MAX_FIELDNUMBER ((1 << 29) - 1) // Nested type names are separated by periods. @@ -81,7 +115,99 @@ extern "C" { #define UPB_MAX_TYPE_DEPTH 64 -/* upb_value ******************************************************************/ +/* upb::Status ****************************************************************/ + +#ifdef __cplusplus +namespace upb { class Status; } +typedef upb::Status upb_status; +#else +struct upb_status; +typedef struct upb_status upb_status; +#endif + +typedef enum { + UPB_OK, // The operation completed successfully. + UPB_SUSPENDED, // The operation was suspended and may be resumed later. + UPB_ERROR, // An error occurred. +} upb_success_t; + +typedef struct { + const char *name; + // Writes a NULL-terminated string to "buf" containing an error message for + // the given error code, returning false if the message was too large to fit. + bool (*code_to_string)(int code, char *buf, size_t len); +} upb_errorspace; + +#ifdef __cplusplus + +class upb::Status { + public: + typedef upb_success_t Success; + + Status(); + ~Status(); + + bool ok(); + bool eof(); + + const char *GetString() const; + void SetEof(); + void SetErrorLiteral(const char* msg); + void Clear(); + + private: +#else +struct upb_status { +#endif + bool error; + bool eof_; + + // Specific status code defined by some error space (optional). + int code; + upb_errorspace *space; + + // Error message (optional). + const char *str; // NULL when no message is present. NULL-terminated. + char *buf; // Owned by the status. + size_t bufsize; +}; + +#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0} + +void upb_status_init(upb_status *status); +void upb_status_uninit(upb_status *status); + +bool upb_ok(const upb_status *status); +bool upb_eof(const upb_status *status); + +// Any of the functions that write to a status object allow status to be NULL, +// to support use cases where the function's caller does not care about the +// status message. +void upb_status_clear(upb_status *status); +void upb_status_seterrliteral(upb_status *status, const char *msg); +void upb_status_seterrf(upb_status *status, const char *msg, ...); +void upb_status_setcode(upb_status *status, upb_errorspace *space, int code); +void upb_status_seteof(upb_status *status); +// The returned string is invalidated by any other call into the status. +const char *upb_status_getstr(const upb_status *status); +void upb_status_copy(upb_status *to, const upb_status *from); + +// Like vasprintf (which allocates a string large enough for the result), but +// uses *buf (which can be NULL) as a starting point and reallocates it only if +// the new value will not fit. "size" is updated to reflect the allocated size +// of the buffer. Starts writing at the given offset into the string; bytes +// preceding this offset are unaffected. Returns the new length of the string, +// or -1 on memory allocation failure. +int upb_vrprintf(char **buf, size_t *size, size_t ofs, + const char *fmt, va_list args); + + +/* upb::Value *****************************************************************/ + +// TODO(haberman): upb::Value is gross and should be retired from the public +// interface (we *may* still want to keep it for internal use). upb::Handlers +// and upb::Def should replace their use of Value with one function for each C +// type. // Clients should not need to access these enum values; they are used internally // to do typechecks of upb_value accesses. @@ -93,13 +219,19 @@ typedef enum { UPB_CTYPE_DOUBLE = 5, UPB_CTYPE_FLOAT = 6, UPB_CTYPE_BOOL = 7, - UPB_CTYPE_PTR = 8, - UPB_CTYPE_BYTEREGION = 9, - UPB_CTYPE_FIELDDEF = 10, + UPB_CTYPE_CSTR = 8, + UPB_CTYPE_PTR = 9, + UPB_CTYPE_BYTEREGION = 10, + UPB_CTYPE_FIELDDEF = 11, } upb_ctype_t; -struct _upb_byteregion; -struct _upb_fielddef; +#ifdef __cplusplus +namespace upb { class ByteRegion; } +typedef upb::ByteRegion upb_byteregion; +#else +struct upb_byteregion; +typedef struct upb_byteregion upb_byteregion; +#endif // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. @@ -112,9 +244,10 @@ typedef struct { double _double; float _float; bool _bool; - void *_void; - struct _upb_byteregion *byteregion; - const struct _upb_fielddef *fielddef; + char *cstr; + void *ptr; + const void *constptr; + upb_byteregion *byteregion; } val; #ifndef NDEBUG @@ -124,12 +257,32 @@ typedef struct { #endif } upb_value; +#ifdef UPB_C99 +#define UPB_VAL_INIT(v, member) {.member = v} +#endif +// TODO(haberman): C++ + #ifdef NDEBUG #define SET_TYPE(dest, val) +#define UPB_VALUE_INIT(v, member, type) {UPB_VAL_INIT(v, member)} #else #define SET_TYPE(dest, val) dest = val +#define UPB_VALUE_INIT(v, member, type) {UPB_VAL_INIT(v, member), type} #endif +#define UPB_VALUE_INIT_INT32(v) UPB_VALUE_INIT(v, int32, UPB_CTYPE_INT32) +#define UPB_VALUE_INIT_INT64(v) UPB_VALUE_INIT(v, int64, UPB_CTYPE_INT64) +#define UPB_VALUE_INIT_UINT32(v) UPB_VALUE_INIT(v, uint32, UPB_CTYPE_UINT32) +#define UPB_VALUE_INIT_UINT64(v) UPB_VALUE_INIT(v, uint64, UPB_CTYPE_UINT64) +#define UPB_VALUE_INIT_DOUBLE(v) UPB_VALUE_INIT(v, _double, UPB_CTYPE_DOUBLE) +#define UPB_VALUE_INIT_FLOAT(v) UPB_VALUE_INIT(v, _float, UPB_CTYPE_FLOAT) +#define UPB_VALUE_INIT_BOOL(v) UPB_VALUE_INIT(v, _bool, UPB_CTYPE_BOOL) +#define UPB_VALUE_INIT_CSTR(v) UPB_VALUE_INIT(v, cstr, UPB_CTYPE_CSTR) +#define UPB_VALUE_INIT_PTR(v) UPB_VALUE_INIT(v, ptr, UPB_CTYPE_PTR) +#define UPB_VALUE_INIT_CONSTPTR(v) UPB_VALUE_INIT(v, constptr, UPB_CTYPE_PTR) +// Non-existent type, all reads will fail. +#define UPB_VALUE_INIT_NONE UPB_VALUE_INIT(NULL, ptr, -1) + // For each value type, define the following set of functions: // // // Get/set an int32 from a upb_value. @@ -174,12 +327,9 @@ ALL(int64, int64, int64_t, UPB_CTYPE_INT64); ALL(uint32, uint32, uint32_t, UPB_CTYPE_UINT32); ALL(uint64, uint64, uint64_t, UPB_CTYPE_UINT64); ALL(bool, _bool, bool, UPB_CTYPE_BOOL); -ALL(ptr, _void, void*, UPB_CTYPE_PTR); -ALL(byteregion, byteregion, struct _upb_byteregion*, UPB_CTYPE_BYTEREGION); - -// upb_fielddef should never be modified from a callback -// (ie. when they're getting passed through a upb_value). -ALL(fielddef, fielddef, const struct _upb_fielddef*, UPB_CTYPE_FIELDDEF); +ALL(cstr, cstr, char*, UPB_CTYPE_CSTR); +ALL(ptr, ptr, void*, UPB_CTYPE_PTR); +ALL(byteregion, byteregion, upb_byteregion*, UPB_CTYPE_BYTEREGION); #ifdef __KERNEL__ // Linux kernel modules are compiled without SSE and therefore are incapable @@ -199,64 +349,55 @@ ALL(float, _float, float, UPB_CTYPE_FLOAT); extern upb_value UPB_NO_VALUE; +#ifdef __cplusplus +} // extern "C" -/* upb_status *****************************************************************/ - -typedef enum { - UPB_OK, // The operation completed successfully. - UPB_SUSPENDED, // The operation was suspended and may be resumed later. - UPB_ERROR, // An error occurred. -} upb_success_t; - -typedef struct { - const char *name; - // Writes a NULL-terminated string to "buf" containing an error message for - // the given error code, returning false if the message was too large to fit. - bool (*code_to_string)(int code, char *buf, size_t len); -} upb_errorspace; - -typedef struct { - bool error; - bool eof; - - // Specific status code defined by some error space (optional). - int code; - upb_errorspace *space; - - // Error message (optional). - const char *str; // NULL when no message is present. NULL-terminated. - char *buf; // Owned by the status. - size_t bufsize; -} upb_status; - -#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0} +namespace upb { -void upb_status_init(upb_status *status); -void upb_status_uninit(upb_status *status); +typedef upb_value Value; -INLINE bool upb_ok(const upb_status *status) { return !status->error; } -INLINE bool upb_eof(const upb_status *status) { return status->eof; } +template <typename T> T GetValue(Value v); +template <typename T> Value MakeValue(T v); -void upb_status_clear(upb_status *status); -void upb_status_seterrliteral(upb_status *status, const char *msg); -void upb_status_seterrf(upb_status *s, const char *msg, ...); -void upb_status_setcode(upb_status *s, upb_errorspace *space, int code); -INLINE void upb_status_seteof(upb_status *s) { s->eof = true; } -// The returned string is invalidated by any other call into the status. -const char *upb_status_getstr(const upb_status *s); -void upb_status_copy(upb_status *to, const upb_status *from); +#define UPB_VALUE_ACCESSORS(type, ctype) \ + template <> inline ctype GetValue<ctype>(Value v) { \ + return upb_value_get ## type(v); \ + } \ + template <> inline Value MakeValue<ctype>(ctype v) { \ + return upb_value_ ## type(v); \ + } -// Like vasprintf (which allocates a string large enough for the result), but -// uses *buf (which can be NULL) as a starting point and reallocates it only if -// the new value will not fit. "size" is updated to reflect the allocated size -// of the buffer. Starts writing at the given offset into the string; bytes -// preceding this offset are unaffected. Returns the new length of the string, -// or -1 on memory allocation failure. -int upb_vrprintf(char **buf, size_t *size, size_t ofs, - const char *fmt, va_list args); +UPB_VALUE_ACCESSORS(double, double); +UPB_VALUE_ACCESSORS(float, float); +UPB_VALUE_ACCESSORS(int32, int32_t); +UPB_VALUE_ACCESSORS(int64, int64_t); +UPB_VALUE_ACCESSORS(uint32, uint32_t); +UPB_VALUE_ACCESSORS(uint64, uint64_t); +UPB_VALUE_ACCESSORS(bool, bool); + +#undef UPB_VALUE_ACCESSORS + +template <typename T> inline T* GetPtrValue(Value v) { + return static_cast<T*>(upb_value_getptr(v)); +} +template <typename T> inline Value MakePtrValue(T* v) { + return upb_value_ptr(static_cast<void*>(v)); +} + +// C++ Wrappers +inline Status::Status() { upb_status_init(this); } +inline Status::~Status() { upb_status_uninit(this); } +inline bool Status::ok() { return upb_ok(this); } +inline bool Status::eof() { return upb_eof(this); } +inline const char *Status::GetString() const { return upb_status_getstr(this); } +inline void Status::SetEof() { upb_status_seteof(this); } +inline void Status::SetErrorLiteral(const char* msg) { + upb_status_seterrliteral(this, msg); +} +inline void Status::Clear() { upb_status_clear(this); } + +} // namespace upb -#ifdef __cplusplus -} /* extern "C" */ #endif #endif /* UPB_H_ */ |