summaryrefslogtreecommitdiff
path: root/upb
diff options
context:
space:
mode:
Diffstat (limited to 'upb')
-rw-r--r--upb/bytestream.c95
-rw-r--r--upb/bytestream.h97
-rw-r--r--upb/def.c2
-rw-r--r--upb/handlers.c140
-rw-r--r--upb/handlers.h69
-rw-r--r--upb/msg.c6
-rw-r--r--upb/pb/decoder.c361
-rw-r--r--upb/pb/decoder.h102
-rw-r--r--upb/pb/decoder_x64.dasc322
-rw-r--r--upb/pb/glue.c33
-rw-r--r--upb/pb/glue.h4
-rw-r--r--upb/pb/varint.h40
-rw-r--r--upb/table.h9
-rw-r--r--upb/upb.c101
-rw-r--r--upb/upb.h37
15 files changed, 839 insertions, 579 deletions
diff --git a/upb/bytestream.c b/upb/bytestream.c
index 135f269..8feb678 100644
--- a/upb/bytestream.c
+++ b/upb/bytestream.c
@@ -25,7 +25,7 @@ upb_byteregion *upb_byteregion_new(const void *str) {
return upb_byteregion_newl(str, strlen(str));
}
-upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len) {
+upb_byteregion *upb_byteregion_newl(const void *str, size_t len) {
upb_stringsrc *src = malloc(sizeof(*src));
upb_stringsrc_init(src);
char *ptr = malloc(len + 1);
@@ -37,7 +37,7 @@ upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len) {
void upb_byteregion_free(upb_byteregion *r) {
if (!r) return;
- uint32_t len;
+ size_t len;
free((char*)upb_byteregion_getptr(r, 0, &len));
upb_stringsrc_uninit((upb_stringsrc*)r->bytesrc);
free(r->bytesrc);
@@ -64,16 +64,14 @@ void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src,
r->fetch = UPB_MIN(src->fetch, r->end);
}
-bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s) {
+upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r) {
uint64_t fetchable = upb_byteregion_remaining(r, r->fetch);
- if (fetchable == 0) {
- upb_status_seteof(s);
- return false;
- }
- uint64_t num = upb_bytesrc_fetch(r->bytesrc, r->fetch, s);
- if (num == 0) return false;
- r->fetch += UPB_MIN(num, fetchable);
- return true;
+ if (fetchable == 0) return UPB_BYTE_EOF;
+ size_t fetched;
+ upb_bytesuccess_t ret = upb_bytesrc_fetch(r->bytesrc, r->fetch, &fetched);
+ if (ret != UPB_BYTE_OK) return false;
+ r->fetch += UPB_MIN(fetched, fetchable);
+ return UPB_BYTE_OK;
}
@@ -93,10 +91,10 @@ static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) {
static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) {
upb_stdio_buf **reuse = NULL; // XXX
- uint32_t num_reused = 0, num_inuse = 0;
+ int num_reused = 0, num_inuse = 0;
// Could sweep only a subset of bufs if this was a hotspot.
- for (uint32_t i = 0; i < s->nbuf; i++) {
+ for (int i = 0; i < s->nbuf; i++) {
upb_stdio_buf *buf = s->bufs[i];
if (buf->refcount > 0) {
s->bufs[num_inuse++] = buf;
@@ -120,28 +118,37 @@ void upb_stdio_discard(void *src, uint64_t ofs) {
(void)ofs;
}
-uint32_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
+upb_bytesuccess_t upb_stdio_fetch(void *src, uint64_t ofs, size_t *bytes_read) {
(void)ofs;
upb_stdio *stdio = (upb_stdio*)src;
upb_stdio_buf *buf = upb_stdio_rotatebufs(stdio);
- uint32_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
- buf->len = read;
- if(read < (uint32_t)BUF_SIZE) {
+retry:
+ *bytes_read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
+ buf->len = *bytes_read;
+ if (*bytes_read < (size_t)BUF_SIZE) {
// Error or EOF.
- if(feof(stdio->file)) {
- upb_status_seteof(s);
- return read;
+ if (feof(stdio->file)) {
+ upb_status_seteof(&stdio->src.status);
+ return UPB_BYTE_EOF;
}
- if(ferror(stdio->file)) {
- upb_status_fromerrno(s);
- return 0;
+ if (ferror(stdio->file)) {
+#ifdef EINTR
+ // If we encounter a client who doesn't want to retry EINTR, we can easily
+ // add a boolean property of the stdio that controls this behavior.
+ if (errno == EINTR) {
+ clearerr(stdio->file);
+ goto retry;
+ }
+#endif
+ upb_status_fromerrno(&stdio->src.status);
+ return upb_errno_is_wouldblock() ? UPB_BYTE_WOULDBLOCK : UPB_BYTE_ERROR;
}
assert(false);
}
- return buf->ofs + buf->len;
+ return UPB_BYTE_OK;
}
-void upb_stdio_read(const void *src, uint64_t ofs, uint32_t len, char *dst) {
+void upb_stdio_copy(const void *src, uint64_t ofs, size_t len, char *dst) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
ofs -= buf->ofs;
memcpy(dst, buf->data + ofs, BUF_SIZE - ofs);
@@ -149,14 +156,14 @@ void upb_stdio_read(const void *src, uint64_t ofs, uint32_t len, char *dst) {
dst += (BUF_SIZE - ofs);
while (len > 0) {
++buf;
- uint32_t bytes = UPB_MIN(len, BUF_SIZE);
+ size_t bytes = UPB_MIN(len, BUF_SIZE);
memcpy(dst, buf->data, bytes);
len -= bytes;
dst += bytes;
}
}
-const char *upb_stdio_getptr(const void *src, uint64_t ofs, uint32_t *len) {
+const char *upb_stdio_getptr(const void *src, uint64_t ofs, size_t *len) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
ofs -= buf->ofs;
*len = BUF_SIZE - ofs;
@@ -168,7 +175,7 @@ upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *s
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
upb_strlen_t len = upb_string_len(str);
upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
- if(written < len) {
+ if (written < len) {
upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
}
@@ -191,7 +198,7 @@ void upb_stdio_init(upb_stdio *stdio) {
static upb_bytesrc_vtbl bytesrc_vtbl = {
&upb_stdio_fetch,
&upb_stdio_discard,
- &upb_stdio_read,
+ &upb_stdio_copy,
&upb_stdio_getptr,
};
upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
@@ -226,20 +233,25 @@ void upb_stdio_uninit(upb_stdio *stdio) {
stdio->file = NULL;
}
-upb_byteregion* upb_stdio_allbytes(upb_stdio *stdio) { return &stdio->byteregion; }
+upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
/* upb_stringsrc **************************************************************/
-uint32_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
+upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
upb_stringsrc *src = _src;
- upb_status_seteof(s);
- return src->len - ofs;
+ assert(ofs < src->len);
+ if (ofs == src->len) {
+ upb_status_seteof(&src->bytesrc.status);
+ return UPB_BYTE_EOF;
+ }
+ *read = src->len - ofs;
+ return UPB_BYTE_OK;
}
-void upb_stringsrc_read(const void *_src, uint64_t ofs,
- uint32_t len, char *dst) {
+void upb_stringsrc_copy(const void *_src, uint64_t ofs,
+ size_t len, char *dst) {
const upb_stringsrc *src = _src;
assert(ofs + len <= src->len);
memcpy(dst, src->str + ofs, len);
@@ -250,7 +262,7 @@ void upb_stringsrc_discard(void *src, uint64_t ofs) {
(void)ofs;
}
-const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, uint32_t *len) {
+const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
const upb_stringsrc *src = _s;
*len = src->len - ofs;
return src->str + ofs;
@@ -260,7 +272,7 @@ void upb_stringsrc_init(upb_stringsrc *s) {
static upb_bytesrc_vtbl vtbl = {
&upb_stringsrc_fetch,
&upb_stringsrc_discard,
- &upb_stringsrc_read,
+ &upb_stringsrc_copy,
&upb_stringsrc_getptr,
};
upb_bytesrc_init(&s->bytesrc, &vtbl);
@@ -269,7 +281,7 @@ void upb_stringsrc_init(upb_stringsrc *s) {
s->byteregion.toplevel = true;
}
-void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len) {
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) {
s->str = str;
s->len = len;
s->byteregion.start = 0;
@@ -280,18 +292,13 @@ void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len) {
void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; }
-upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
- return &s->bytesrc;
-}
-
-
/* upb_stringsink *************************************************************/
void upb_stringsink_uninit(upb_stringsink *s) {
free(s->str);
}
-void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t size) {
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) {
free(s->str);
s->str = str;
s->len = 0;
diff --git a/upb/bytestream.h b/upb/bytestream.h
index 3b339f1..409ae80 100644
--- a/upb/bytestream.h
+++ b/upb/bytestream.h
@@ -63,11 +63,17 @@
// +------------------------
// | nondelimited region Z <-- won't return EOF until data source hits EOF.
// +------------------------
+//
+// TODO: if 64-bit math for stream offsets is a performance issue on
+// non-64-bit machines, we could introduce a upb_off_t typedef that can be
+// defined as a 32-bit type for applications that don't need to handle
+// streams longer than 4GB.
#ifndef UPB_BYTESTREAM_H
#define UPB_BYTESTREAM_H
+#include <errno.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
@@ -79,6 +85,12 @@
extern "C" {
#endif
+typedef enum {
+ UPB_BYTE_OK = UPB_OK,
+ UPB_BYTE_WOULDBLOCK = UPB_SUSPENDED,
+ UPB_BYTE_ERROR = UPB_ERROR,
+ UPB_BYTE_EOF
+} upb_bytesuccess_t;
/* upb_bytesrc ****************************************************************/
@@ -90,10 +102,10 @@ extern "C" {
// upb_bytesrc is a virtual base class with implementations that get data from
// eg. a string, a cord, a file descriptor, a FILE*, etc.
-typedef uint32_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
+typedef upb_bytesuccess_t upb_bytesrc_fetch_func(void*, uint64_t, size_t*);
typedef void upb_bytesrc_discard_func(void*, uint64_t);
-typedef void upb_bytesrc_copy_func(const void*, uint64_t, uint32_t, char*);
-typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, uint32_t*);
+typedef void upb_bytesrc_copy_func(const void*, uint64_t, size_t, char*);
+typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, size_t*);
typedef struct _upb_bytesrc_vtbl {
upb_bytesrc_fetch_func *fetch;
upb_bytesrc_discard_func *discard;
@@ -102,21 +114,27 @@ typedef struct _upb_bytesrc_vtbl {
} upb_bytesrc_vtbl;
typedef struct {
- upb_bytesrc_vtbl *vtbl;
+ const upb_bytesrc_vtbl *vtbl;
+ upb_status status;
} upb_bytesrc;
-INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
+INLINE void upb_bytesrc_init(upb_bytesrc *src, const upb_bytesrc_vtbl *vtbl) {
src->vtbl = vtbl;
+ upb_status_init(&src->status);
+}
+
+INLINE void upb_bytesrc_uninit(upb_bytesrc *src) {
+ upb_status_uninit(&src->status);
}
-// Fetches at least one byte starting at ofs, returning the actual number of
-// bytes fetched (or 0 on EOF or error: see *s for details). Some bytesrc's
-// may set EOF on *s after a successful read if no further data is available,
-// but not all bytesrc's support this. It is valid for bytes to be fetched
-// multiple times, as long as the bytes have not been previously discarded.
-INLINE uint32_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs,
- upb_status *s) {
- return src->vtbl->fetch(src, ofs, s);
+// Fetches at least one byte starting at ofs, returning the success or failure
+// of the operation. If UPB_BYTE_OK is returned, *read indicates the number of
+// of bytes successfully fetched; any error or EOF status will be reflected in
+// upb_bytesrc_status(). It is valid for bytes to be fetched multiple times,
+// as long as the bytes have not been previously discarded.
+INLINE upb_bytesuccess_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs,
+ size_t *read) {
+ return src->vtbl->fetch(src, ofs, read);
}
// Discards all data prior to ofs (except data that is pinned, if pinning
@@ -127,7 +145,7 @@ INLINE void upb_bytesrc_discard(upb_bytesrc *src, uint64_t ofs) {
// Copies "len" bytes of data from ofs to "dst", which must be at least "len"
// bytes long. The given region must not be discarded.
-INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, uint32_t len,
+INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, size_t len,
char *dst) {
src->vtbl->copy(src, ofs, len, dst);
}
@@ -138,7 +156,7 @@ INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, uint32_t len,
// part of the returned buffer is discarded, only the non-discarded bytes
// remain valid).
INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs,
- uint32_t *len) {
+ size_t *len) {
return src->vtbl->getptr(src, ofs, len);
}
@@ -148,14 +166,14 @@ INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs,
// // is guaranteed that the region will not be discarded (nor will the bytesrc
// // be destroyed) until the region is unpinned. However, not all bytesrc's
// // support pinning; a false return indicates that a pin was not possible.
-// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, uint32_t len) {
+// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, size_t len) {
// return src->vtbl->refregion(src, ofs, len);
// }
//
// // Releases some number of pinned bytes from the beginning of a pinned
// // region (which may be fewer than the total number of bytes pinned).
-// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, uint32_t len,
-// uint32_t bytes_to_release) {
+// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, size_t len,
+// size_t bytes_to_release) {
// src->vtbl->unpin(src, ofs, len);
// }
//
@@ -173,7 +191,7 @@ typedef struct _upb_byteregion {
uint64_t fetch;
uint64_t end; // UPB_NONDELIMITED if nondelimited.
upb_bytesrc *bytesrc;
- bool toplevel; // If true, discards hit the underlying byteregion.
+ bool toplevel; // If true, discards hit the underlying bytesrc.
} upb_byteregion;
// Initializes a byteregion. Its initial value will be empty. No methods may
@@ -225,14 +243,17 @@ void upb_byteregion_release(upb_byteregion *r);
// Attempts to fetch more data, extending the fetched range of this byteregion.
// Returns true if the fetched region was extended by at least one byte, false
// on EOF or error (see *s for details).
-bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s);
+upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r);
-// Fetches all remaining data for "r", returning false if the operation failed
-// (see "*s" for details). May only be used on delimited byteregions.
-INLINE bool upb_byteregion_fetchall(upb_byteregion *r, upb_status *s) {
+// Fetches all remaining data for "r", returning the success of the operation
+// May only be used on delimited byteregions.
+INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) {
assert(upb_byteregion_len(r) != UPB_NONDELIMITED);
- while (upb_byteregion_fetch(r, s)) ; // Empty body.
- return upb_eof(s);
+ upb_bytesuccess_t ret;
+ do {
+ ret = upb_byteregion_fetch(r);
+ } while (ret == UPB_BYTE_OK);
+ return ret == UPB_BYTE_EOF ? UPB_BYTE_OK : ret;
}
// Discards bytes from the byteregion up until ofs (which must be greater or
@@ -243,13 +264,14 @@ INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) {
assert(ofs >= upb_byteregion_discardofs(r));
assert(ofs <= upb_byteregion_endofs(r));
r->discard = ofs;
+ if (ofs > r->fetch) r->fetch = ofs;
if (r->toplevel) upb_bytesrc_discard(r->bytesrc, ofs);
}
// Copies "len" bytes of data into "dst", starting at ofs. The specified
// region must be available.
INLINE void upb_byteregion_copy(const upb_byteregion *r, uint64_t ofs,
- uint32_t len, char *dst) {
+ size_t len, char *dst) {
assert(ofs >= upb_byteregion_discardofs(r));
assert(len <= upb_byteregion_available(r, ofs));
upb_bytesrc_copy(r->bytesrc, ofs, len, dst);
@@ -268,7 +290,7 @@ INLINE void upb_byteregion_copyall(const upb_byteregion *r, char *dst) {
// or when the bytes are discarded. If the byteregion is not currently pinned,
// the pointer is only valid for the lifetime of the parent byteregion.
INLINE const char *upb_byteregion_getptr(const upb_byteregion *r,
- uint64_t ofs, uint32_t *len) {
+ uint64_t ofs, size_t *len) {
assert(ofs >= upb_byteregion_discardofs(r));
const char *ret = upb_bytesrc_getptr(r->bytesrc, ofs, len);
*len = UPB_MIN(*len, upb_byteregion_available(r, ofs));
@@ -295,7 +317,7 @@ INLINE const char *upb_byteregion_getptr(const upb_byteregion *r,
// The string data in the returned region is guaranteed to be contiguous and
// NULL-terminated.
upb_byteregion *upb_byteregion_new(const void *str);
-upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len);
+upb_byteregion *upb_byteregion_newl(const void *str, size_t len);
// May *only* be called on a byteregion created with upb_byteregion_new[l]()!
void upb_byteregion_free(upb_byteregion *r);
@@ -399,7 +421,7 @@ INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) {
typedef struct {
uint64_t ofs;
- uint32_t len;
+ size_t len;
uint32_t refcount;
char data[];
} upb_stdio_buf;
@@ -414,7 +436,6 @@ typedef struct {
bool should_close;
upb_stdio_buf **bufs;
uint32_t nbuf, szbuf;
- upb_byteregion byteregion;
} upb_stdio;
void upb_stdio_init(upb_stdio *stdio);
@@ -433,7 +454,7 @@ void upb_stdio_reset(upb_stdio *stdio, FILE *file);
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
upb_status *s);
-upb_byteregion *upb_stdio_allbytes(upb_stdio *stdio);
+upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
@@ -444,7 +465,7 @@ upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
typedef struct {
upb_bytesrc bytesrc;
const char *str;
- uint32_t len;
+ size_t len;
upb_byteregion byteregion;
} upb_stringsrc;
@@ -454,7 +475,11 @@ void upb_stringsrc_uninit(upb_stringsrc *s);
// Resets the stringsrc to a state where it will vend the given string. The
// string data must be valid until the stringsrc is reset again or destroyed.
-void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len);
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
+
+INLINE upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
+ return &s->bytesrc;
+}
// Returns the top-level upb_byteregion* for this stringsrc. Invalidated when
// the stringsrc is reset.
@@ -468,7 +493,7 @@ INLINE upb_byteregion *upb_stringsrc_allbytes(upb_stringsrc *s) {
struct _upb_stringsink {
upb_bytesink bytesink;
char *str;
- uint32_t len, size;
+ size_t len, size;
};
typedef struct _upb_stringsink upb_stringsink;
@@ -478,12 +503,12 @@ void upb_stringsink_uninit(upb_stringsink *s);
// Resets the sink's string to "str", which the sink takes ownership of.
// "str" may be NULL, which will make the sink allocate a new string.
-void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t len);
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t len);
// Releases ownership of the returned string (which is "len" bytes long) and
// resets the internal string to be empty again (as if reset were called with
// NULL).
-const char *upb_stringsink_release(upb_stringsink *s, uint32_t *len);
+const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above.
upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s);
diff --git a/upb/def.c b/upb/def.c
index 13418c6..246e9bb 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -334,7 +334,7 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
if (upb_byteregion_len(bytes) == 0) {
upb_value_setint32(&f->defaultval, e->defaultval);
} else {
- uint32_t len;
+ size_t len;
// ptr is guaranteed to be NULL-terminated because the byteregion was
// created with upb_byteregion_newl().
const char *ptr = upb_byteregion_getptr(bytes, 0, &len);
diff --git a/upb/handlers.c b/upb/handlers.c
index 0af09ef..d1b68ad 100644
--- a/upb/handlers.c
+++ b/upb/handlers.c
@@ -13,7 +13,7 @@
static upb_mhandlers *upb_mhandlers_new() {
upb_mhandlers *m = malloc(sizeof(*m));
- upb_inttable_init(&m->fieldtab, 8, sizeof(upb_fhandlers));
+ upb_inttable_init(&m->fieldtab, 8, sizeof(upb_itofhandlers_ent));
m->startmsg = NULL;
m->endmsg = NULL;
m->is_group = false;
@@ -26,21 +26,21 @@ static upb_mhandlers *upb_mhandlers_new() {
static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
upb_fieldtype_t type,
bool repeated) {
- uint32_t tag = n << 3 | upb_types[type].native_wire_type;
- upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, tag);
- if (f) abort();
- upb_fhandlers new_f = {false, type, repeated,
- repeated && upb_isprimitivetype(type), UPB_ATOMIC_INIT(0),
+ upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, n);
+ // TODO: design/refine the API for changing the set of fields or modifying
+ // existing handlers.
+ if (e) return NULL;
+ upb_fhandlers new_f = {type, repeated, UPB_ATOMIC_INIT(0),
n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
#ifdef UPB_USE_JIT_X64
0, 0, 0,
#endif
NULL};
- upb_inttable_insert(&m->fieldtab, tag, &new_f);
- f = upb_inttable_lookup(&m->fieldtab, tag);
- assert(f);
- assert(f->type == type);
- return f;
+ upb_fhandlers *ptr = malloc(sizeof(*ptr));
+ memcpy(ptr, &new_f, sizeof(upb_fhandlers));
+ upb_itofhandlers_ent ent = {false, ptr};
+ upb_inttable_insert(&m->fieldtab, n, &ent);
+ return ptr;
}
upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
@@ -57,6 +57,7 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP));
assert(subm);
upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated);
+ if (!f) return NULL;
f->submsg = subm;
if (type == UPB_TYPE(GROUP))
_upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false);
@@ -82,6 +83,12 @@ void upb_handlers_unref(upb_handlers *h) {
if (upb_atomic_unref(&h->refcount)) {
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *mh = h->msgs[i];
+ for(upb_inttable_iter j = upb_inttable_begin(&mh->fieldtab);
+ !upb_inttable_done(j);
+ j = upb_inttable_next(&mh->fieldtab, j)) {
+ upb_itofhandlers_ent *e = upb_inttable_iter_value(j);
+ free(e->f);
+ }
upb_inttable_free(&mh->fieldtab);
#ifdef UPB_USE_JIT_X64
free(mh->tablearray);
@@ -154,41 +161,24 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
/* upb_dispatcher *************************************************************/
-static upb_fhandlers toplevel_f = {
- false, UPB_TYPE(GROUP), false, false, UPB_ATOMIC_INIT(0), 0,
- -1, NULL, NULL, // submsg
-#ifdef NDEBUG
- {{0}},
-#else
- {{0}, -1},
-#endif
- NULL, NULL, NULL, NULL, NULL,
-#ifdef UPB_USE_JIT_X64
- 0, 0, 0,
-#endif
- NULL};
-
-void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
- upb_skip_handler *skip, upb_exit_handler *exit,
+void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
+ upb_exit_handler UPB_NORETURN *exit,
void *srcclosure) {
- d->handlers = h;
- upb_handlers_ref(h);
- for (int i = 0; i < h->msgs_len; i++) {
- upb_mhandlers *m = h->msgs[i];
- upb_inttable_compact(&m->fieldtab);
- }
- d->stack[0].f = &toplevel_f;
+ d->stack[0].f = NULL; // Should never be read.
d->limit = &d->stack[UPB_MAX_NESTING];
- d->skip = skip;
- d->exit = exit;
+ d->exitjmp = exit;
d->srcclosure = srcclosure;
d->top_is_implicit = false;
- upb_status_init(&d->status);
+ d->msgent = NULL;
+ d->top = NULL;
+ d->toplevel_msgent = NULL;
+ d->status = status;
}
-upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
- d->msgent = d->handlers->msgs[0];
- d->dispatch_table = &d->msgent->fieldtab;
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure,
+ upb_mhandlers *top) {
+ d->msgent = top;
+ d->toplevel_msgent = top;
d->top = d->stack;
d->top->closure = closure;
d->top->is_sequence = false;
@@ -197,46 +187,32 @@ upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
}
void upb_dispatcher_uninit(upb_dispatcher *d) {
- upb_handlers_unref(d->handlers);
- upb_status_uninit(&d->status);
}
void upb_dispatch_startmsg(upb_dispatcher *d) {
upb_flow_t flow = UPB_CONTINUE;
if (d->msgent->startmsg) d->msgent->startmsg(d->top->closure);
- if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+ if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
}
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
assert(d->top == d->stack);
- if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+ if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
// TODO: should we avoid this copy by passing client's status obj to cbs?
- upb_status_copy(status, &d->status);
-}
-
-void indent(upb_dispatcher *d) {
- for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " ");
-}
-
-void indentm1(upb_dispatcher *d) {
- for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " ");
+ upb_status_copy(status, d->status);
}
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_fhandlers *f) {
- //indent(d);
- //fprintf(stderr, "START SEQ: %d\n", f->number);
- if((d->top+1) >= d->limit) {
- upb_status_seterrliteral(&d->status, "Nesting too deep.");
- _upb_dispatcher_unwind(d, UPB_BREAK);
- return d->top; // Dummy.
+ if (d->top + 1 >= d->limit) {
+ upb_status_seterrliteral(d->status, "Nesting too deep.");
+ _upb_dispatcher_abortjmp(d);
}
upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
if (sflow.flow != UPB_CONTINUE) {
- _upb_dispatcher_unwind(d, sflow.flow);
- return d->top; // Dummy.
+ _upb_dispatcher_abortjmp(d);
}
++d->top;
@@ -248,8 +224,6 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
}
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
- //indentm1(d);
- //fprintf(stderr, "END SEQ\n");
assert(d->top > d->stack);
assert(d->top->is_sequence);
upb_fhandlers *f = d->top->f;
@@ -257,30 +231,23 @@ upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
upb_flow_t flow = UPB_CONTINUE;
if (f->endseq) flow = f->endseq(d->top->closure, f->fval);
if (flow != UPB_CONTINUE) {
- printf("YO, UNWINDING!\n");
- _upb_dispatcher_unwind(d, flow);
- return d->top; // Dummy.
+ _upb_dispatcher_abortjmp(d);
}
- d->msgent = d->top->f->submsg ? d->top->f->submsg : d->handlers->msgs[0];
- d->dispatch_table = &d->msgent->fieldtab;
+ d->msgent = d->top->f ? d->top->f->submsg : d->toplevel_msgent;
return d->top;
}
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_fhandlers *f) {
- //indent(d);
- //fprintf(stderr, "START SUBMSG: %d\n", f->number);
- if((d->top+1) >= d->limit) {
- upb_status_seterrliteral(&d->status, "Nesting too deep.");
- _upb_dispatcher_unwind(d, UPB_BREAK);
- return d->top; // Dummy.
+ if (d->top + 1 >= d->limit) {
+ upb_status_seterrliteral(d->status, "Nesting too deep.");
+ _upb_dispatcher_abortjmp(d);
}
upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
if (sflow.flow != UPB_CONTINUE) {
- _upb_dispatcher_unwind(d, sflow.flow);
- return d->top; // Dummy.
+ _upb_dispatcher_abortjmp(d);
}
++d->top;
@@ -289,24 +256,20 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
d->top->is_packed = false;
d->top->closure = sflow.closure;
d->msgent = f->submsg;
- d->dispatch_table = &d->msgent->fieldtab;
upb_dispatch_startmsg(d);
return d->top;
}
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
- //indentm1(d);
- //fprintf(stderr, "END SUBMSG\n");
assert(d->top > d->stack);
assert(!d->top->is_sequence);
upb_fhandlers *f = d->top->f;
- if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+ if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
d->msgent = d->top->f->msg;
- d->dispatch_table = &d->msgent->fieldtab;
--d->top;
upb_flow_t flow = UPB_CONTINUE;
if (f->endsubmsg) f->endsubmsg(d->top->closure, f->fval);
- if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+ if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
return d->top;
}
@@ -320,14 +283,7 @@ bool upb_dispatcher_islegalend(upb_dispatcher *d) {
return false;
}
-void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) {
- upb_dispatcher_frame *frame = d->top;
- while (1) {
- frame->f->submsg->endmsg(frame->closure, &d->status);
- frame->f->endsubmsg(frame->closure, frame->f->fval);
- --frame;
- if (frame < d->stack) { d->exit(d->srcclosure); return; }
- d->top = frame;
- if (flow == UPB_SKIPSUBMSG) return;
- }
+void _upb_dispatcher_abortjmp(upb_dispatcher *d) {
+ d->exitjmp(d->srcclosure);
+ assert(false); // Never returns.
}
diff --git a/upb/handlers.h b/upb/handlers.h
index e17a726..9ed02c1 100644
--- a/upb/handlers.h
+++ b/upb/handlers.h
@@ -132,13 +132,15 @@ typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval);
// A upb_fhandlers object represents the set of handlers associated with one
// specific message field.
+//
+// TODO: remove upb_decoder-specific fields from this, and instead have
+// upb_decoderplan make a deep copy of the whole graph with its own fields
+// added.
struct _upb_decoder;
struct _upb_mhandlers;
typedef struct _upb_fieldent {
- bool junk;
upb_fieldtype_t type;
bool repeated;
- bool is_repeated_primitive;
upb_atomic_t refcount;
uint32_t number;
int32_t valuehasbit;
@@ -158,6 +160,11 @@ typedef struct _upb_fieldent {
void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
} upb_fhandlers;
+typedef struct {
+ bool junk; // Stolen by table impl; see table.h for details.
+ upb_fhandlers *f;
+} upb_itofhandlers_ent;
+
// fhandlers are created as part of a upb_handlers instance, but can be ref'd
// and unref'd to prolong the life of the handlers.
void upb_fhandlers_ref(upb_fhandlers *m);
@@ -194,16 +201,18 @@ typedef struct _upb_mhandlers {
upb_inttable fieldtab; // Maps field number -> upb_fhandlers.
bool is_group;
#ifdef UPB_USE_JIT_X64
- uint32_t jit_startmsg_pclabel;
- uint32_t jit_endofbuf_pclabel;
- uint32_t jit_endofmsg_pclabel;
- uint32_t jit_dyndispatch_pclabel;
- uint32_t jit_unknownfield_pclabel;
- int32_t jit_parent_field_done_pclabel;
+ // Used inside the JIT to track labels (jmp targets) in the generated code.
+ uint32_t jit_startmsg_pclabel; // Starting a parse of this (sub-)message.
+ uint32_t jit_endofbuf_pclabel; // ptr hitend, but delim_end or jit_end?
+ uint32_t jit_endofmsg_pclabel; // Done parsing this (sub-)message.
+ uint32_t jit_dyndispatch_pclabel; // Dispatch by table lookup.
+ uint32_t jit_unknownfield_pclabel; // Parsed an unknown field.
uint32_t max_field_number;
// Currently keyed on field number. Could also try keying it
// on encoded or decoded tag, or on encoded field number.
void **tablearray;
+ // Pointer to the JIT code for parsing this message.
+ void *jit_func;
#endif
} upb_mhandlers;
@@ -316,62 +325,47 @@ INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgd
typedef struct {
upb_fhandlers *f;
void *closure;
-
- // Members to use as the data source requires.
- void *srcclosure;
uint64_t end_ofs;
- uint16_t msgindex;
- uint16_t fieldindex;
-
bool is_sequence; // frame represents seq or submsg? (f might be both).
bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX
// (strings aren't pushed).
} upb_dispatcher_frame;
-// Called when some of the input needs to be skipped. All frames from the
-// current top to "bottom", inclusive, should be skipped.
-typedef void upb_skip_handler(void *, upb_dispatcher_frame *bottom);
typedef void upb_exit_handler(void *);
typedef struct {
upb_dispatcher_frame *top, *limit;
- upb_handlers *handlers;
-
// Msg and dispatch table for the current level.
upb_mhandlers *msgent;
- upb_inttable *dispatch_table;
- upb_skip_handler *skip;
- upb_exit_handler *exit;
+ upb_mhandlers *toplevel_msgent;
+ upb_exit_handler UPB_NORETURN *exitjmp;
void *srcclosure;
bool top_is_implicit;
// Stack.
- upb_status status;
+ upb_status *status;
upb_dispatcher_frame stack[UPB_MAX_NESTING];
} upb_dispatcher;
-void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
- upb_skip_handler *skip, upb_exit_handler *exit,
- void *closure);
-upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure);
+// Caller retains ownership of the status object.
+void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
+ upb_exit_handler UPB_NORETURN *exit, void *closure);
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure,
+ upb_mhandlers *top_msg);
void upb_dispatcher_uninit(upb_dispatcher *d);
// Tests whether the message could legally end here (either the stack is empty
// or the only open stack frame is implicit).
bool upb_dispatcher_islegalend(upb_dispatcher *d);
-// Looks up a field by number for the current message.
-INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) {
- return (upb_fhandlers*)upb_inttable_fastlookup(
- d->dispatch_table, n, sizeof(upb_fhandlers));
-}
-
-void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow);
+// Unwinds one or more stack frames based on the given flow constant that was
+// just returned from a handler. Calls end handlers as appropriate.
+void _upb_dispatcher_abortjmp(upb_dispatcher *d) UPB_NORETURN;
INLINE void _upb_dispatcher_sethas(void *_p, int32_t hasbit) {
char *p = (char*)_p;
- if (hasbit >= 0) p[hasbit / 8] |= (1 << (hasbit % 8));
+ if (hasbit >= 0) p[(uint32_t)hasbit / 8] |= (1 << ((uint32_t)hasbit % 8));
}
// Dispatch functions -- call the user handler and handle errors.
@@ -380,11 +374,12 @@ INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
upb_flow_t flow = UPB_CONTINUE;
if (f->value) flow = f->value(d->top->closure, f->fval, val);
_upb_dispatcher_sethas(d->top->closure, f->valuehasbit);
- if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+ if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
}
void upb_dispatch_startmsg(upb_dispatcher *d);
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
-upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
+ upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);
diff --git a/upb/msg.c b/upb/msg.c
index 78309cf..77521e5 100644
--- a/upb/msg.c
+++ b/upb/msg.c
@@ -86,14 +86,16 @@ void upb_stdmsg_sethas(void *_m, upb_value fval) {
assert(_m != NULL);
char *m = _m;
const upb_fielddef *f = upb_value_getfielddef(fval);
- if (f->hasbit >= 0) m[f->hasbit / 8] |= (1 << (f->hasbit % 8));
+ if (f->hasbit >= 0)
+ m[(uint32_t)f->hasbit / 8] |= (1 << ((uint32_t)f->hasbit % 8));
}
bool upb_stdmsg_has(const void *_m, upb_value fval) {
assert(_m != NULL);
const char *m = _m;
const upb_fielddef *f = upb_value_getfielddef(fval);
- return f->hasbit < 0 || (m[f->hasbit / 8] & (1 << (f->hasbit % 8)));
+ return f->hasbit < 0 ||
+ (m[(uint32_t)f->hasbit / 8] & (1 << ((uint32_t)f->hasbit % 8)));
}
#define UPB_ACCESSORS(type, ctype) \
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index ae54e47..1b5fc17 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -13,14 +13,95 @@
#include "upb/pb/decoder.h"
#include "upb/pb/varint.h"
+/* upb_decoderplan ************************************************************/
+
#ifdef UPB_USE_JIT_X64
-#define Dst_DECL upb_decoder *d
-#define Dst_REF (d->dynasm)
-#define Dst (d)
+// These defines are necessary for DynASM codegen.
+// See dynasm/dasm_proto.h for more info.
+#define Dst_DECL upb_decoderplan *plan
+#define Dst_REF (plan->dynasm)
+#define Dst (plan)
+
+// In debug mode, make DynASM do internal checks (must be defined before any
+// dasm header is included.
+#ifndef NDEBUG
+#define DASM_CHECKS
+#endif
+
#include "dynasm/dasm_proto.h"
#include "upb/pb/decoder_x64.h"
#endif
+typedef struct {
+ upb_fhandlers base;
+ void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
+#ifdef UPB_USE_JIT_X64
+ uint32_t jit_pclabel;
+ uint32_t jit_pclabel_notypecheck;
+#endif
+} upb_dplanfield;
+
+typedef struct {
+ upb_mhandlers base;
+#ifdef UPB_USE_JIT_X64
+ uint32_t jit_startmsg_pclabel;
+ uint32_t jit_endofbuf_pclabel;
+ uint32_t jit_endofmsg_pclabel;
+ uint32_t jit_dyndispatch_pclabel;
+ uint32_t jit_unknownfield_pclabel;
+ int32_t jit_parent_field_done_pclabel;
+ uint32_t max_field_number;
+ // Currently keyed on field number. Could also try keying it
+ // on encoded or decoded tag, or on encoded field number.
+ void **tablearray;
+#endif
+} upb_dplanmsg;
+
+static void *upb_decoderplan_fptrs[];
+
+void upb_decoderplan_initfhandlers(upb_fhandlers *f) {
+ f->decode = upb_decoderplan_fptrs[f->type];
+}
+
+upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
+ upb_decoderplan *p = malloc(sizeof(*p));
+ p->handlers = h;
+ upb_handlers_ref(h);
+ h->should_jit = allowjit;
+#ifdef UPB_USE_JIT_X64
+ p->jit_code = NULL;
+ if (allowjit) upb_decoderplan_makejit(p);
+#endif
+ // Set function pointers for each field's decode function.
+ for (int i = 0; i < h->msgs_len; i++) {
+ upb_mhandlers *m = h->msgs[i];
+ for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
+ !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
+ upb_fhandlers *f = e->f;
+ upb_decoderplan_initfhandlers(f);
+ }
+ }
+ return p;
+}
+
+void upb_decoderplan_unref(upb_decoderplan *p) {
+ // TODO: make truly refcounted.
+ upb_handlers_unref(p->handlers);
+#ifdef UPB_USE_JIT_X64
+ if (p->jit_code) upb_decoderplan_freejit(p);
+#endif
+ free(p);
+}
+
+bool upb_decoderplan_hasjitcode(upb_decoderplan *p) {
+ return p->jit_code != NULL;
+}
+
+
+/* upb_decoder ****************************************************************/
+
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
// configuration. But emperically on a Core i7, performance increases 30-50%
@@ -29,18 +110,17 @@
#define FORCEINLINE static __attribute__((always_inline))
#define NOINLINE static __attribute__((noinline))
-static void upb_decoder_exit(upb_decoder *d) {
+UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) {
// Resumable decoder would back out to completed_ptr (and possibly get a
// previous buffer).
siglongjmp(d->exitjmp, 1);
}
-static void upb_decoder_exit2(void *_d) {
- upb_decoder *d = _d;
- upb_decoder_exit(d);
+UPB_NORETURN static void upb_decoder_exitjmp2(void *d) {
+ upb_decoder_exitjmp(d);
}
-static void upb_decoder_abort(upb_decoder *d, const char *msg) {
- upb_status_seterrliteral(d->status, msg);
- upb_decoder_exit(d);
+UPB_NORETURN static void upb_decoder_abortjmp(upb_decoder *d, const char *msg) {
+ upb_status_seterrliteral(&d->status, msg);
+ upb_decoder_exitjmp(d);
}
/* Buffering ******************************************************************/
@@ -50,8 +130,12 @@ static void upb_decoder_abort(upb_decoder *d, const char *msg) {
// the next one. When we've committed our progress we discard any previous
// buffers' regions.
-static uint32_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
-static void upb_decoder_advance(upb_decoder *d, uint32_t len) {
+static size_t upb_decoder_bufleft(upb_decoder *d) {
+ assert(d->end >= d->ptr);
+ return d->end - d->ptr;
+}
+
+static void upb_decoder_advance(upb_decoder *d, size_t len) {
assert(upb_decoder_bufleft(d) >= len);
d->ptr += len;
}
@@ -66,29 +150,49 @@ uint64_t upb_decoder_bufendofs(upb_decoder *d) {
static void upb_decoder_setmsgend(upb_decoder *d) {
upb_dispatcher_frame *f = d->dispatcher.top;
- uint32_t delimlen = f->end_ofs - d->bufstart_ofs;
- uint32_t buflen = d->end - d->buf;
+ size_t delimlen = f->end_ofs - d->bufstart_ofs;
+ size_t buflen = d->end - d->buf;
d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
d->buf + delimlen : NULL; // NULL if not in this buf.
d->top_is_packed = f->is_packed;
+ d->dispatch_table = &d->dispatcher.msgent->fieldtab;
}
-static bool upb_trypullbuf(upb_decoder *d) {
- assert(upb_decoder_bufleft(d) == 0);
- d->bufstart_ofs = upb_decoder_offset(d);
+static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) {
+ assert(ofs >= upb_decoder_offset(d));
+ if (ofs > upb_byteregion_endofs(d->input))
+ upb_decoder_abortjmp(d, "Unexpected EOF");
d->buf = NULL;
d->ptr = NULL;
d->end = NULL;
- if (upb_byteregion_available(d->input, upb_decoder_offset(d)) == 0 &&
- !upb_byteregion_fetch(d->input, d->status)) {
- if (upb_eof(d->status)) return false;
- upb_decoder_exit(d); // Non-EOF error.
+ d->delim_end = NULL;
+#ifdef UPB_USE_JIT_X64
+ d->jit_end = NULL;
+#endif
+ d->bufstart_ofs = ofs;
+}
+
+static bool upb_trypullbuf(upb_decoder *d) {
+ assert(upb_decoder_bufleft(d) == 0);
+ upb_decoder_skiptonewbuf(d, upb_decoder_offset(d));
+ if (upb_byteregion_available(d->input, d->bufstart_ofs) == 0) {
+ switch (upb_byteregion_fetch(d->input)) {
+ case UPB_BYTE_OK:
+ assert(upb_byteregion_available(d->input, d->bufstart_ofs) > 0);
+ break;
+ case UPB_BYTE_EOF: return false;
+ case UPB_BYTE_ERROR: upb_decoder_abortjmp(d, "I/O error in input");
+ // Decoder resuming is not yet supported.
+ case UPB_BYTE_WOULDBLOCK:
+ upb_decoder_abortjmp(d, "Input returned WOULDBLOCK");
+ }
}
- uint32_t len;
+ size_t len;
d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len);
assert(len > 0);
d->ptr = d->buf;
d->end = d->buf + len;
+ upb_decoder_setmsgend(d);
#ifdef UPB_USE_JIT_X64
// If we start parsing a value, we can parse up to 20 bytes without
// having to bounds-check anything (2 10-byte varints). Since the
@@ -96,27 +200,29 @@ static bool upb_trypullbuf(upb_decoder *d) {
// JIT bails if there are not 20 bytes available.
d->jit_end = d->end - 20;
#endif
- upb_decoder_setmsgend(d);
+ assert(upb_decoder_bufleft(d) > 0);
return true;
}
static void upb_pullbuf(upb_decoder *d) {
- if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
+ if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF");
}
-void upb_decoder_skipto(upb_decoder *d, uint64_t ofs) {
- if (ofs < upb_decoder_bufendofs(d)) {
+void upb_decoder_checkpoint(upb_decoder *d) {
+ upb_byteregion_discard(d->input, upb_decoder_offset(d));
+}
+
+void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) {
+ if (ofs <= upb_decoder_bufendofs(d)) {
upb_decoder_advance(d, ofs - upb_decoder_offset(d));
} else {
- d->buf = NULL;
- d->ptr = NULL;
- d->end = NULL;
- d->bufstart_ofs = ofs;
+ upb_decoder_skiptonewbuf(d, ofs);
}
+ upb_decoder_checkpoint(d);
}
-void upb_decoder_checkpoint(upb_decoder *d) {
- upb_byteregion_discard(d->input, upb_decoder_offset(d));
+void upb_decoder_discard(upb_decoder *d, size_t bytes) {
+ upb_decoder_discardto(d, upb_decoder_offset(d) + bytes);
}
@@ -126,15 +232,13 @@ NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
- const char *ptr = d->ptr;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
- if (upb_decoder_bufleft(d) == 0) {
- upb_pullbuf(d);
- ptr = d->ptr;
- }
- u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
+ if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
+ u64 |= ((uint64_t)(byte = *d->ptr) & 0x7F) << bitpos;
+ upb_decoder_advance(d, 1);
}
- if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
+ if(bitpos == 70 && (byte & 0x80))
+ upb_decoder_abortjmp(d, "Unterminated varint");
return u64;
}
@@ -151,7 +255,7 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
u64 = upb_decode_varint_slow(d);
- if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
+ if (u64 > UINT32_MAX) upb_decoder_abortjmp(d, "Unterminated 32-bit varint");
ret = (uint32_t)u64;
p = d->ptr; // Turn the next line into a nop.
done:
@@ -174,7 +278,7 @@ FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
if (upb_decoder_bufleft(d) >= 10) {
// Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
- if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
+ if (r.p == NULL) upb_decoder_abortjmp(d, "Unterminated varint");
upb_decoder_advance(d, r.p - d->ptr);
return r.val;
} else if (upb_decoder_bufleft(d) > 0) {
@@ -200,11 +304,12 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
} else {
// Slow case.
size_t read = 0;
- while (read < bytes) {
- size_t avail = upb_decoder_bufleft(d);
+ while (1) {
+ size_t avail = UPB_MIN(upb_decoder_bufleft(d), bytes - read);
memcpy(buf + read, d->ptr, avail);
upb_decoder_advance(d, avail);
read += avail;
+ if (read == bytes) break;
upb_pullbuf(d);
}
}
@@ -213,26 +318,28 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
uint32_t u32;
upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
- return u32; // TODO: proper byte swapping
+ return u32; // TODO: proper byte swapping for big-endian machines.
}
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
uint64_t u64;
upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
- return u64; // TODO: proper byte swapping
+ return u64; // TODO: proper byte swapping for big-endian machines.
}
INLINE upb_byteregion *upb_decode_string(upb_decoder *d) {
uint32_t strlen = upb_decode_varint32(d);
uint64_t offset = upb_decoder_offset(d);
+ if (offset + strlen > upb_byteregion_endofs(d->input))
+ upb_decoder_abortjmp(d, "Unexpected EOF");
upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen);
// Could make it an option on the callback whether we fetchall() first or not.
- upb_byteregion_fetchall(&d->str_byteregion, d->status);
- if (!upb_ok(d->status)) upb_decoder_exit(d);
- upb_decoder_skipto(d, offset + strlen);
+ if (upb_byteregion_fetchall(&d->str_byteregion) != UPB_BYTE_OK)
+ upb_decoder_abortjmp(d, "Couldn't fetchall() on string.");
+ upb_decoder_discardto(d, offset + strlen);
return &d->str_byteregion;
}
-INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
+INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
@@ -253,8 +360,6 @@ INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
-static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
-static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
T(INT32, varint, int32, int32_t)
T(INT64, varint, int64, int64_t)
@@ -271,9 +376,10 @@ T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64)
T(STRING, string, byteregion, upb_byteregion*)
+#undef T
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
- upb_push(d, f, UPB_NONDELIMITED);
+ upb_push_msg(d, f, UPB_NONDELIMITED);
}
static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
(void)f;
@@ -281,15 +387,30 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
upb_decoder_setmsgend(d);
}
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
- upb_push(d, f, upb_decode_varint32(d) + upb_decoder_offset(d));
+ uint32_t len = upb_decode_varint32(d);
+ upb_push_msg(d, f, upb_decoder_offset(d) + len);
}
+#define F(type) &upb_decode_ ## type
+static void *upb_decoderplan_fptrs[] = {
+ &upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
+ F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
+ F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
+ F(SFIXED64), F(SINT32), F(SINT64)};
+#undef F
+
/* The main decoding loop *****************************************************/
static void upb_decoder_checkdelim(upb_decoder *d) {
+ // TODO: This doesn't work for the case that no buffer is currently loaded
+ // (ie. d->buf == NULL) because delim_end is NULL even if we are at
+ // end-of-delim. Need to add a test that exercises this by putting a buffer
+ // seam in the middle of the final delimited value in a proto that we skip
+ // for some reason (like because it's unknown and we have no unknown field
+ // handler).
while (d->delim_end != NULL && d->ptr >= d->delim_end) {
- if (d->ptr > d->delim_end) upb_decoder_abort(d, "Bad submessage end");
+ if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end");
if (d->dispatcher.top->is_sequence) {
upb_dispatch_endseq(&d->dispatcher);
} else {
@@ -299,33 +420,36 @@ static void upb_decoder_checkdelim(upb_decoder *d) {
}
}
-static void upb_decoder_enterjit(upb_decoder *d) {
- (void)d;
-#ifdef UPB_USE_JIT_X64
- if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
- // Decodes as many fields as possible, updating d->ptr appropriately,
- // before falling through to the slow(er) path.
- void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
- upb_jit_decode(d);
- }
-#endif
-}
-
INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
uint32_t tag;
if (!upb_trydecode_varint32(d, &tag)) return NULL;
uint8_t wire_type = tag & 0x7;
- upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
+ uint32_t fieldnum = tag >> 3;
+ upb_itofhandlers_ent *e = upb_inttable_fastlookup(
+ d->dispatch_table, fieldnum, sizeof(upb_itofhandlers_ent));
+ upb_fhandlers *f = e ? e->f : NULL;
+
+ if (f) {
+ // Wire type check.
+ if (wire_type == upb_types[f->type].native_wire_type ||
+ (wire_type == UPB_WIRE_TYPE_DELIMITED &&
+ upb_types[f->type].is_numeric)) {
+ // Wire type is ok.
+ } else {
+ f = NULL;
+ }
+ }
// There are no explicit "startseq" or "endseq" markers in protobuf
// streams, so we have to infer them by noticing when a repeated field
// starts or ends.
- if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
+ upb_dispatcher_frame *fr = d->dispatcher.top;
+ if (fr->is_sequence && fr->f != f) {
upb_dispatch_endseq(&d->dispatcher);
upb_decoder_setmsgend(d);
}
- if (f && f->repeated && d->dispatcher.top->f != f) {
+ if (f && f->repeated && (!fr->is_sequence || fr->f != f)) {
uint64_t old_end = d->dispatcher.top->end_ofs;
upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
@@ -334,7 +458,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
fr->end_ofs = old_end;
} else {
// Packed primitive field.
- fr->end_ofs = upb_decoder_offset(d) + upb_decode_varint(d);
+ uint32_t len = upb_decode_varint32(d);
+ fr->end_ofs = upb_decoder_offset(d) + len;
fr->is_packed = true;
}
upb_decoder_setmsgend(d);
@@ -343,14 +468,20 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
if (f) return f;
// Unknown field.
+ if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER)
+ upb_decoder_abortjmp(d, "Invalid field number");
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
- case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
- case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
+ case UPB_WIRE_TYPE_32BIT: upb_decoder_discard(d, 4); break;
+ case UPB_WIRE_TYPE_64BIT: upb_decoder_discard(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED:
- upb_decoder_advance(d, upb_decode_varint32(d)); break;
+ upb_decoder_discard(d, upb_decode_varint32(d)); break;
+ case UPB_WIRE_TYPE_START_GROUP:
+ upb_decoder_abortjmp(d, "Can't handle unknown groups yet");
+ case UPB_WIRE_TYPE_END_GROUP:
+ upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag");
default:
- upb_decoder_abort(d, "Invalid wire type");
+ upb_decoder_abortjmp(d, "Invalid wire type");
}
// TODO: deliver to unknown field callback.
upb_decoder_checkpoint(d);
@@ -358,16 +489,22 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
}
}
-void upb_decoder_decode(upb_decoder *d, upb_status *status) {
- if (sigsetjmp(d->exitjmp, 0)) { assert(!upb_ok(status)); return; }
- d->status = status;
+upb_success_t upb_decoder_decode(upb_decoder *d) {
+ assert(d->input);
+ if (sigsetjmp(d->exitjmp, 0)) {
+ assert(!upb_ok(&d->status));
+ return UPB_ERROR;
+ }
upb_dispatch_startmsg(&d->dispatcher);
// Prime the buf so we can hit the JIT immediately.
upb_trypullbuf(d);
upb_fhandlers *f = d->dispatcher.top->f;
- while(1) { // Main loop: executed once per tag/field pair.
+ while(1) {
upb_decoder_checkdelim(d);
+#ifdef UPB_USE_JIT_X64
upb_decoder_enterjit(d);
+ upb_decoder_checkpoint(d);
+#endif
if (!d->top_is_packed) f = upb_decode_tag(d);
if (!f) {
// Sucessful EOF. We may need to dispatch a top-level implicit frame.
@@ -375,64 +512,46 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
assert(d->dispatcher.top->is_sequence);
upb_dispatch_endseq(&d->dispatcher);
}
- return;
+ return UPB_OK;
}
f->decode(d, f);
upb_decoder_checkpoint(d);
}
}
-static void upb_decoder_skip(void *_d, upb_dispatcher_frame *f) {
- upb_decoder *d = _d;
- if (f->end_ofs != UPB_NONDELIMITED) {
- upb_decoder_skipto(d, d->dispatcher.top->end_ofs);
- } else {
- // TODO: how to support skipping groups? Dispatcher could drop callbacks,
- // or it could be special-cased inside the decoder.
- }
+void upb_decoder_init(upb_decoder *d) {
+ upb_status_init(&d->status);
+ upb_dispatcher_init(&d->dispatcher, &d->status, &upb_decoder_exitjmp2, d);
+ d->plan = NULL;
+ d->input = NULL;
}
-void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
- upb_dispatcher_init(
- &d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
-#ifdef UPB_USE_JIT_X64
- d->jit_code = NULL;
- if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
-#endif
- // Set function pointers for each field's decode function.
- for (int i = 0; i < handlers->msgs_len; i++) {
- upb_mhandlers *m = handlers->msgs[i];
- for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
- i = upb_inttable_next(&m->fieldtab, i)) {
- upb_fhandlers *f = upb_inttable_iter_value(i);
-#define F(type) &upb_decode_ ## type
- static void *fptrs[] = {&upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
- F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
- F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
- F(SFIXED64), F(SINT32), F(SINT64)};
- f->decode = fptrs[f->type];
- }
- }
+void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) {
+ assert(msg_offset >= 0);
+ assert(msg_offset < p->handlers->msgs_len);
+ d->plan = p;
+ d->msg_offset = msg_offset;
+ d->input = NULL;
}
-void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure) {
- upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
+void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
+ void *closure) {
+ assert(d->plan);
+ upb_dispatcher_frame *f =
+ upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]);
+ upb_status_clear(&d->status);
f->end_ofs = UPB_NONDELIMITED;
d->input = input;
- d->bufstart_ofs = upb_byteregion_startofs(input);
- d->buf = NULL;
- d->ptr = NULL;
- d->end = NULL; // Force a buffer pull.
- d->delim_end = NULL; // But don't let end-of-message get triggered.
d->str_byteregion.bytesrc = input->bytesrc;
-#ifdef UPB_USE_JIT_X64
- d->jit_end = NULL;
-#endif
+
+ // Protect against assert in skiptonewbuf().
+ d->bufstart_ofs = 0;
+ d->ptr = NULL;
+ d->buf = NULL;
+ upb_decoder_skiptonewbuf(d, upb_byteregion_startofs(input));
}
void upb_decoder_uninit(upb_decoder *d) {
-#ifdef UPB_USE_JIT_X64
- if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
-#endif
upb_dispatcher_uninit(&d->dispatcher);
+ upb_status_uninit(&d->status);
}
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index c35bec4..13e5774 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -21,15 +21,43 @@
extern "C" {
#endif
-/* upb_decoder *****************************************************************/
+/* upb_decoderplan ************************************************************/
+
+// A decoderplan contains whatever data structures and generated (JIT-ted) code
+// are necessary to decode protobuf data of a specific type to a specific set
+// of handlers. By generating the plan ahead of time, we avoid having to
+// redo this work every time we decode.
+//
+// A decoderplan is threadsafe, meaning that it can be used concurrently by
+// different upb_decoders in different threads. However, the upb_decoders are
+// *not* thread-safe.
+struct _upb_decoderplan;
+typedef struct _upb_decoderplan upb_decoderplan;
+
+// TODO: add parameter for a list of other decoder plans that we can share
+// generated code with.
+upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit);
+void upb_decoderplan_unref(upb_decoderplan *p);
+
+// Returns true if the plan contains JIT-ted code. This may not be the same as
+// the "allowjit" parameter to the constructor if support for JIT-ting was not
+// compiled in.
+bool upb_decoderplan_hasjitcode(upb_decoderplan *p);
+
+
+/* upb_decoder ****************************************************************/
struct dasm_State;
typedef struct _upb_decoder {
- upb_byteregion *input; // Input data (serialized).
- upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
- upb_status *status; // Where we will store any errors that occur.
- upb_byteregion str_byteregion; // For passing string data to callbacks.
+ upb_decoderplan *plan;
+ int msg_offset; // Which message from the plan is top-level.
+ upb_byteregion *input; // Input data (serialized), not owned.
+ upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
+ upb_status status; // Where we store errors that occur.
+ upb_byteregion str_byteregion; // For passing string data to callbacks.
+
+ upb_inttable *dispatch_table;
// Current input buffer and its stream offset.
const char *buf, *ptr, *end;
@@ -37,40 +65,64 @@ typedef struct _upb_decoder {
// End of the delimited region, relative to ptr, or NULL if not in this buf.
const char *delim_end;
+ // True if the top stack frame represents a packed field.
bool top_is_packed;
#ifdef UPB_USE_JIT_X64
// For JIT, which doesn't do bounds checks in the middle of parsing a field.
const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end)
-
- // JIT-generated machine code (else NULL).
- char *jit_code;
- size_t jit_size;
- char *debug_info;
-
- struct dasm_State *dynasm;
#endif
// For exiting the decoder on error.
sigjmp_buf exitjmp;
} upb_decoder;
-// Initializes/uninitializes a decoder for calling into the given handlers
-// or to write into the given msgdef, given its accessors). Takes a ref
-// on the handlers.
-void upb_decoder_init(upb_decoder *d, upb_handlers *h);
+void upb_decoder_init(upb_decoder *d);
void upb_decoder_uninit(upb_decoder *d);
-// Resets the internal state of an already-allocated decoder. This puts it in a
-// state where it has not seen any data, and expects the next data to be from
-// the beginning of a new protobuf. Decoders must be reset before they can be
-// used. A decoder can be reset multiple times. "input" must live until the
-// decoder is reset again (or destroyed).
-void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure);
+// Resets the plan that the decoder will parse from. "msg_offset" indicates
+// which message from the plan will be used as the top-level message.
+//
+// This will also reset the decoder's input to be uninitialized --
+// upb_decoder_resetinput() must be called before parsing can occur. The plan
+// must live until the decoder is destroyed or reset to a different plan.
+//
+// Must be called before upb_decoder_resetinput() or upb_decoder_decode().
+void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset);
+
+// Resets the input of an already-allocated decoder. This puts it in a state
+// where it has not seen any data, and expects the next data to be from the
+// beginning of a new protobuf. Decoders must have their input reset before
+// they can be used. A decoder can have its input reset multiple times.
+// "input" must live until the decoder is destroyed or has it input reset
+// again. "c" is the closure that will be passed to the handlers.
+//
+// Must be called before upb_decoder_decode().
+void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *c);
+
+// Decodes serialized data (calling handlers as the data is parsed), returning
+// the success of the operation (call upb_decoder_status() for details).
+upb_success_t upb_decoder_decode(upb_decoder *d);
+
+INLINE const upb_status *upb_decoder_status(upb_decoder *d) {
+ return &d->status;
+}
+
+// Implementation details
+
+struct _upb_decoderplan {
+ upb_handlers *handlers; // owns reference.
+
+#ifdef UPB_USE_JIT_X64
+ // JIT-generated machine code (else NULL).
+ char *jit_code;
+ size_t jit_size;
+ char *debug_info;
-// Decodes serialized data (calling handlers as the data is parsed) until error
-// or EOF (see *status for details).
-void upb_decoder_decode(upb_decoder *d, upb_status *status);
+ // This pointer is allocated by dasm_init() and freed by dasm_free().
+ struct dasm_State *dynasm;
+#endif
+};
#ifdef __cplusplus
} /* extern "C" */
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc
index 75e5b6b..807191b 100644
--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@@ -4,20 +4,15 @@
|// Copyright (c) 2011 Google Inc. See LICENSE for details.
|// Author: Josh Haberman <jhaberman@gmail.com>
|//
-|// JIT compiler for upb_decoder on x86. Given a upb_handlers object,
-|// generates code specialized to parsing the specific message and
-|// calling specific handlers.
+|// JIT compiler for upb_decoder on x86. Given a upb_decoderplan object (which
+|// contains an embedded set of upb_handlers), generates code specialized to
+|// parsing the specific message and calling specific handlers.
|//
|// Since the JIT can call other functions (the JIT'ted code is not a leaf
|// function) we must respect alignment rules. On OS X, this means aligning
|// the stack to 16 bytes.
-#define UPB_NONE -1
-#define UPB_MULTIPLE -2
-#define UPB_TOPLEVEL_ONE -3
-
#include <sys/mman.h>
-#include "dynasm/dasm_proto.h"
#include "dynasm/dasm_x86.h"
#ifndef MAP_ANONYMOUS
@@ -73,15 +68,15 @@ gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
-void upb_reg_jit_gdb(upb_decoder *d) {
+void upb_reg_jit_gdb(upb_decoderplan *plan) {
// Create debug info.
size_t elf_len = sizeof(upb_jit_debug_elf_file);
- d->debug_info = malloc(elf_len);
- memcpy(d->debug_info, upb_jit_debug_elf_file, elf_len);
- uint64_t *p = (void*)d->debug_info;
- for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
- if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
- if (*p == 0x321) { *p = d->jit_size; }
+ plan->debug_info = malloc(elf_len);
+ memcpy(plan->debug_info, upb_jit_debug_elf_file, elf_len);
+ uint64_t *p = (void*)plan->debug_info;
+ for (; (void*)(p+1) <= (void*)plan->debug_info + elf_len; ++p) {
+ if (*p == 0x12345678) { *p = (uintptr_t)plan->jit_code; }
+ if (*p == 0x321) { *p = plan->jit_size; }
}
// Register the JIT-ted code with GDB.
@@ -89,7 +84,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
e->next_entry = __jit_debug_descriptor.first_entry;
e->prev_entry = NULL;
if (e->next_entry) e->next_entry->prev_entry = e;
- e->symfile_addr = d->debug_info;
+ e->symfile_addr = plan->debug_info;
e->symfile_size = elf_len;
__jit_debug_descriptor.first_entry = e;
__jit_debug_descriptor.relevant_entry = e;
@@ -99,12 +94,17 @@ void upb_reg_jit_gdb(upb_decoder *d) {
#else
-void upb_reg_jit_gdb(upb_decoder *d) {
- (void)d;
+void upb_reg_jit_gdb(upb_decoderplan *plan) {
+ (void)plan;
}
#endif
+// Has to be a separate function, otherwise GCC will complain about
+// expressions like (&foo != NULL) because they will never evaluate
+// to false.
+static void upb_assert_notnull(void *addr) { assert(addr != NULL); }
+
|.arch x64
|.actionlist upb_jit_actionlist
|.globals UPB_JIT_GLOBAL_
@@ -126,7 +126,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|// ALL of the code in this file uses these register allocations.
|// When we "call" within this file, we do not use regular calling
|// conventions, but of course when calling to user callbacks we must.
-|.define PTR, rbx
+|.define PTR, rbx // Writing this to DECODER->ptr commits our progress.
|.define CLOSURE, r12
|.type FRAME, upb_dispatcher_frame, r13
|.type BYTEREGION,upb_byteregion, r14
@@ -134,6 +134,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|.type STDARRAY, upb_stdarray
|
|.macro callp, addr
+|| upb_assert_notnull(addr);
|| if ((uintptr_t)addr < 0xffffffff) {
| call &addr
|| } else {
@@ -191,11 +192,12 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| decode_loaded_varint, 0
| mov ecx, edx
| shr ecx, 3
-| and edx, 0x7
+| and edx, 0x7 // For the type check that will happen later.
| cmp ecx, m->max_field_number // Bounds-check the field.
| ja ->exit_jit // In the future; could be unknown label
|| if ((uintptr_t)m->tablearray < 0xffffffff) {
-| mov rax, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables.
+| // TODO: support hybrid array/hash tables.
+| mov rax, qword [rcx*8 + m->tablearray]
|| } else {
| mov64 rax, (uintptr_t)m->tablearray
| mov rax, qword [rax + rcx*8]
@@ -217,8 +219,9 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing.
| cmp rax, qword DECODER->dispatcher.limit
| jae ->exit_jit // Frame stack overflow.
-| mov qword FRAME:rax->f, f
-| mov dword FRAME:rax->end_ofs, end_offset_
+| mov64 r8, (uintptr_t)f
+| mov qword FRAME:rax->f, r8
+| mov qword FRAME:rax->end_ofs, end_offset_
| mov byte FRAME:rax->is_sequence, is_sequence_
| mov DECODER->dispatcher.top, rax
| mov FRAME, rax
@@ -294,7 +297,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|
|.macro sethas, reg, hasbit
|| if (hasbit >= 0) {
-| or byte [reg + (hasbit / 8)], (1 << (hasbit % 8))
+| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
|| }
|.endmacro
@@ -304,8 +307,9 @@ void upb_reg_jit_gdb(upb_decoder *d) {
#include "upb/msg.h"
// Decodes the next val into ARG3, advances PTR.
-static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
- uint8_t type, size_t tag_size) {
+static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
+ upb_mhandlers *m,
+ uint8_t type, size_t tag_size) {
// Decode the value into arg 3 for the callback.
switch (type) {
case UPB_TYPE(DOUBLE):
@@ -365,9 +369,9 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
// robust checks.
| mov ecx, dword [PTR + tag_size]
| decode_loaded_varint tag_size
- | mov rdi, DECODER->effective_end
+ | mov rdi, DECODER->end
| sub rdi, rax
- | cmp ARG3_64, rdi // if (len > d->effective_end - str)
+ | cmp ARG3_64, rdi // if (len > d->end - str)
| ja ->exit_jit // Can't deliver, whole string not in buf.
// Update PTR to point past end of string.
@@ -401,8 +405,8 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
#if 0
// These appear not to speed things up, but keeping around for
// further experimentation.
-static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size,
- upb_fhandlers *f) {
+static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size,
+ upb_fhandlers *f) {
| mov eax, STDARRAY:ARG1_64->len
| cmp eax, STDARRAY:ARG1_64->size
| jne >2
@@ -434,18 +438,19 @@ static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size,
}
#endif
-static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
+static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
+ upb_fhandlers *f) {
// Call callbacks.
if (upb_issubmsgtype(f->type)) {
if (f->type == UPB_TYPE(MESSAGE)) {
| mov rsi, PTR
| sub rsi, DECODER->buf
- | add esi, ARG3_32 // = (d->ptr - d->buf) + delim_len
+ | add rsi, ARG3_64 // = (d->ptr - d->buf) + delim_len
} else {
assert(f->type == UPB_TYPE(GROUP));
- | mov esi, UPB_NONDELIMITED
+ | mov rsi, UPB_NONDELIMITED
}
- | pushframe f, esi, false
+ | pushframe f, rsi, false
// Call startsubmsg handler (if any).
if (f->startsubmsg) {
@@ -456,15 +461,11 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| mov CLOSURE, rdx
}
| mov qword FRAME->closure, CLOSURE
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+ | mov DECODER->ptr, PTR
const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
- if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
- | jmp =>sub_m->jit_startmsg_pclabel;
- } else {
- | call =>sub_m->jit_startmsg_pclabel;
- }
-
- |=>f->jit_submsg_done_pclabel:
+ | call =>sub_m->jit_startmsg_pclabel;
// Call endsubmsg handler (if any).
if (f->endsubmsg) {
@@ -474,6 +475,8 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| callp f->endsubmsg
}
| popframe upb_fhandlers_getmsg(f)
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+ | mov DECODER->ptr, PTR
} else {
| mov ARG1_64, CLOSURE
// Test for callbacks we can specialize.
@@ -499,15 +502,15 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
f->value == &upb_stdmsg_setuint64_r ||
f->value == &upb_stdmsg_setptr_r ||
f->value == &upb_stdmsg_setdouble_r) {
- upb_decoder_jit_doappend(d, 8, f);
+ upb_decoderplan_jit_doappend(plan, 8, f);
} else if (f->value == &upb_stdmsg_setint32_r ||
f->value == &upb_stdmsg_setuint32_r ||
f->value == &upb_stdmsg_setfloat_r) {
- upb_decoder_jit_doappend(d, 4, f);
+ upb_decoderplan_jit_doappend(plan, 4, f);
} else if (f->value == &upb_stdmsg_setbool_r) {
- upb_decoder_jit_doappend(d, 1, f);
+ upb_decoderplan_jit_doappend(plan, 1, f);
#endif
- } else {
+ } else if (f->value) {
// Load closure and fval into arg registers.
||#ifndef NDEBUG
||// Since upb_value carries type information in debug mode
@@ -519,14 +522,15 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| callp f->value
}
| sethas CLOSURE, f->valuehasbit
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+ | mov DECODER->ptr, PTR
}
- // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
}
// PTR should point to the beginning of the tag.
-static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
- uint32_t next_tag, upb_mhandlers *m,
- upb_fhandlers *f, upb_fhandlers *next_f) {
+static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
+ uint64_t next_tag, upb_mhandlers *m,
+ upb_fhandlers *f, upb_fhandlers *next_f) {
// PC-label for the dispatch table.
// We check the wire type (which must be loaded in edx) because the
// table is keyed on field number, not type.
@@ -535,8 +539,8 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
| jne ->exit_jit // In the future: could be an unknown field or packed.
|=>f->jit_pclabel_notypecheck:
if (f->repeated) {
- | mov esi, FRAME->end_ofs
- | pushframe f, esi, true
+ | mov rsi, FRAME->end_ofs
+ | pushframe f, rsi, true
if (f->startseq) {
| mov ARG1_64, CLOSURE
| loadfval f
@@ -555,8 +559,8 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
return;
}
- upb_decoder_jit_decodefield(d, m, f->type, tag_size);
- upb_decoder_jit_callcb(d, f);
+ upb_decoderplan_jit_decodefield(plan, m, f->type, tag_size);
+ upb_decoderplan_jit_callcb(plan, f);
// Epilogue: load next tag, check for repeated field.
| check_eob m
@@ -586,13 +590,11 @@ static int upb_compare_uint32(const void *a, const void *b) {
return *(uint32_t*)a - *(uint32_t*)b;
}
-static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
+static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
|=>m->jit_startmsg_pclabel:
+ // There was a call to get here, so we need to align the stack.
+ | sub rsp, 8
- if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
- // There was a call to get here, so we need to align the stack.
- | sub rsp, 8
- }
// Call startmsg handler (if any):
if (m->startmsg) {
// upb_flow_t startmsg(void *closure);
@@ -615,23 +617,30 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
int num_keys = upb_inttable_count(&m->fieldtab);
uint32_t *keys = malloc(num_keys * sizeof(*keys));
int idx = 0;
- for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
+ !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
keys[idx++] = upb_inttable_iter_key(i);
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
upb_fhandlers *last_f = NULL;
- uint32_t last_tag = 0;
+ uint64_t last_encoded_tag = 0;
for(int i = 0; i < num_keys; i++) {
- uint32_t key = keys[i];
- upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key);
- uint32_t tag = upb_vencode32(key);
- if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
- last_tag = tag;
+ uint32_t fieldnum = keys[i];
+ upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum);
+ upb_fhandlers *f = e->f;
+ assert(f->number == fieldnum);
+ uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type;
+ uint64_t encoded_tag = upb_vencode32(tag);
+ // No tag should be greater than 5 bytes.
+ assert(encoded_tag <= 0xffffffffff);
+ if (last_f) upb_decoderplan_jit_field(
+ plan, last_encoded_tag, encoded_tag, m, last_f, f);
+ last_encoded_tag = encoded_tag;
last_f = f;
}
- upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
+ upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL);
free(keys);
@@ -655,22 +664,29 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
| callp m->endmsg
}
- if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
- // Counter previous alignment.
- | add rsp, 8
- | ret
- } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
- | jmp ->exit_jit
- } else {
- | jmp =>m->jit_parent_field_done_pclabel
+ if (m->is_group) {
+ // Advance past the "end group" tag.
+ // TODO: Handle UPB_BREAK
+ | mov DECODER->ptr, PTR
}
+ // Counter previous alignment.
+ | add rsp, 8
+ | ret
}
-static const char *dbgfmt =
- "JIT encountered unknown field! wt=%d, fn=%d\n";
-
-static void upb_decoder_jit(upb_decoder *d) {
+static void upb_decoderplan_jit(upb_decoderplan *plan) {
+ // The JIT prologue/epilogue trampoline that is generated in this function
+ // does not depend on the handlers, so it will never vary. Ideally we would
+ // put it in an object file and just link it into upb so we could have only a
+ // single copy of it instead of one copy for each decoderplan. But our
+ // options for doing that are undesirable: GCC inline assembly is
+ // complicated, not portable to other compilers, and comes with subtle
+ // caveats about incorrect things what the optimizer might do if you eg.
+ // execute non-local jumps. Putting this code in a .s file would force us to
+ // calculate the structure offsets ourself instead of symbolically
+ // (ie. [r15 + 0xcd] instead of DECODER->ptr). So we tolerate a bit of
+ // unnecessary duplication/redundancy.
| push rbp
| mov rbp, rsp
| push r15
@@ -686,18 +702,14 @@ static void upb_decoder_jit(upb_decoder *d) {
| mov CLOSURE, FRAME->closure
| mov PTR, DECODER->ptr
- upb_handlers *h = d->dispatcher.handlers;
- if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
- | call =>h->msgs[0]->jit_startmsg_pclabel
- | jmp ->exit_jit
- }
-
// TODO: push return addresses for re-entry (will be necessary for multiple
// buffer support).
- for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]);
+ | call ARG2_64
|->exit_jit:
- | mov DECODER->ptr, PTR
+ // Restore stack pointer to where it was before any "call" instructions
+ // inside our generated code.
+ | lea rsp, [rbp - 48]
// Counter previous alignment.
| add rsp, 8
| pop rbx
@@ -707,122 +719,128 @@ static void upb_decoder_jit(upb_decoder *d) {
| pop r15
| leave
| ret
- |=>0:
- | mov rdi, stderr
- | mov rsi, dbgfmt
- | callp fprintf
- | callp abort
+
+ upb_handlers *h = plan->handlers;
+ for (int i = 0; i < h->msgs_len; i++)
+ upb_decoderplan_jit_msg(plan, h->msgs[i]);
}
-void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f,
- uint32_t *pclabel_count) {
+static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f,
+ uint32_t *pclabel_count) {
f->jit_pclabel = (*pclabel_count)++;
f->jit_pclabel_notypecheck = (*pclabel_count)++;
- f->jit_submsg_done_pclabel = (*pclabel_count)++;
}
-void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) {
+static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m,
+ uint32_t *pclabel_count) {
m->jit_startmsg_pclabel = (*pclabel_count)++;
m->jit_endofbuf_pclabel = (*pclabel_count)++;
m->jit_endofmsg_pclabel = (*pclabel_count)++;
m->jit_dyndispatch_pclabel = (*pclabel_count)++;
m->jit_unknownfield_pclabel = (*pclabel_count)++;
- m->jit_parent_field_done_pclabel = UPB_NONE;
m->max_field_number = 0;
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
uint32_t key = upb_inttable_iter_key(i);
m->max_field_number = UPB_MAX(m->max_field_number, key);
- upb_fhandlers *f = upb_inttable_iter_value(i);
- upb_decoder_jit_assignfieldlabs(f, pclabel_count);
+ upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
+ upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count);
}
- // XXX: Won't work for large field numbers; will need to use a upb_table.
+ // TODO: support large field numbers by either using a hash table or
+ // generating code for a binary search. For now large field numbers
+ // will just fall back to the table decoder.
+ m->max_field_number = UPB_MIN(m->max_field_number, 16000);
m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
}
-// Second pass: for messages that have only one parent, link them to the field
-// from which they are called.
-void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) {
- upb_inttable_iter i;
- for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
- i = upb_inttable_next(&m->fieldtab, i)) {
- upb_fhandlers *f = upb_inttable_iter_value(i);
- if (upb_issubmsgtype(f->type)) {
- upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
- if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
- sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
- } else {
- sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
- }
- }
- }
-}
-
-void upb_decoder_makejit(upb_decoder *d) {
- d->debug_info = NULL;
+static void upb_decoderplan_makejit(upb_decoderplan *plan) {
+ plan->debug_info = NULL;
// Assign pclabels.
- uint32_t pclabel_count = 1;
- upb_handlers *h = d->dispatcher.handlers;
+ uint32_t pclabel_count = 0;
+ upb_handlers *h = plan->handlers;
for (int i = 0; i < h->msgs_len; i++)
- upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count);
- for (int i = 0; i < h->msgs_len; i++)
- upb_decoder_jit_assignmsglabs2(h->msgs[i]);
-
- if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) {
- h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
- }
+ upb_decoderplan_jit_assignmsglabs(h->msgs[i], &pclabel_count);
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
- dasm_init(d, 1);
- dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
- dasm_growpc(d, pclabel_count);
- dasm_setup(d, upb_jit_actionlist);
+ dasm_init(plan, 1);
+ dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX);
+ dasm_growpc(plan, pclabel_count);
+ dasm_setup(plan, upb_jit_actionlist);
- upb_decoder_jit(d);
+ upb_decoderplan_jit(plan);
- dasm_link(d, &d->jit_size);
+ int dasm_status = dasm_link(plan, &plan->jit_size);
+ (void)dasm_status;
+ assert(dasm_status == DASM_S_OK);
- d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
- MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ plan->jit_code = mmap(NULL, plan->jit_size, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
- upb_reg_jit_gdb(d);
+ upb_reg_jit_gdb(plan);
- dasm_encode(d, d->jit_code);
+ dasm_encode(plan, plan->jit_code);
// Create dispatch tables.
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *m = h->msgs[i];
+ m->jit_func =
+ plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel);
for (uint32_t j = 0; j <= m->max_field_number; j++) {
- upb_fhandlers *f = NULL;
- for (int k = 0; k < 8; k++) {
- f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
- if (f) break;
- }
+ upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j);
+ upb_fhandlers *f = e ? e->f : NULL;
if (f) {
- m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
+ m->tablearray[j] =
+ plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel);
} else {
- // Don't handle unknown fields yet.
- m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
+ // TODO: extend the JIT to handle unknown fields.
+ // For the moment we exit the JIT for any unknown field.
+ m->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit];
}
}
}
- dasm_free(d);
+ dasm_free(plan);
free(globals);
- mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
+ mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ);
// View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code
// Or: ndisasm -b 64 /tmp/machine-code
FILE *f = fopen("/tmp/machine-code", "wb");
- fwrite(d->jit_code, d->jit_size, 1, f);
+ fwrite(plan->jit_code, plan->jit_size, 1, f);
fclose(f);
}
-void upb_decoder_freejit(upb_decoder *d) {
- munmap(d->jit_code, d->jit_size);
- free(d->debug_info);
+static void upb_decoderplan_freejit(upb_decoderplan *plan) {
+ munmap(plan->jit_code, plan->jit_size);
+ free(plan->debug_info);
// TODO: unregister
}
+
+static void upb_decoder_enterjit(upb_decoder *d) {
+ if (d->plan->jit_code &&
+ d->dispatcher.top == d->dispatcher.stack &&
+ d->ptr && d->ptr < d->jit_end) {
+#ifndef NDEBUG
+ register uint64_t rbx asm ("rbx") = 11;
+ register uint64_t r12 asm ("r12") = 12;
+ register uint64_t r13 asm ("r13") = 13;
+ register uint64_t r14 asm ("r14") = 14;
+ register uint64_t r15 asm ("r15") = 15;
+#endif
+ // Decodes as many fields as possible, updating d->ptr appropriately,
+ // before falling through to the slow(er) path.
+ void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code;
+ upb_jit_decode(d, d->plan->handlers->msgs[d->msg_offset]->jit_func);
+ assert(d->ptr <= d->end);
+
+ // Test that callee-save registers were properly restored.
+ assert(rbx == 11);
+ assert(r12 == 12);
+ assert(r13 == 13);
+ assert(r14 == 14);
+ assert(r15 == 15);
+ }
+}
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 3176355..4949fe3 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -12,8 +12,8 @@
#include "upb/pb/glue.h"
#include "upb/pb/textprinter.h"
-void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
- upb_status *status) {
+bool upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
+ bool allow_jit, upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str, len);
@@ -21,13 +21,21 @@ void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
upb_decoder d;
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, md);
- upb_decoder_init(&d, h);
+ upb_decoderplan *p = upb_decoderplan_new(h, allow_jit);
+ upb_decoder_init(&d);
upb_handlers_unref(h);
- upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg);
- upb_decoder_decode(&d, status);
+ upb_decoder_resetplan(&d, p, 0);
+ upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg);
+ upb_success_t ret = upb_decoder_decode(&d);
+ // stringsrc and the handlers registered by upb_accessors_reghandlers()
+ // should not suspend.
+ assert((ret == UPB_OK) == upb_ok(upb_decoder_status(&d)));
+ if (status) upb_status_copy(status, upb_decoder_status(&d));
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
+ upb_decoderplan_unref(p);
+ return ret == UPB_OK;
}
void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
@@ -35,7 +43,7 @@ void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
size_t len;
char *data = upb_readfile(fname, &len);
if (!data) goto err;
- upb_strtomsg(data, len, msg, md, s);
+ upb_strtomsg(data, len, msg, md, false, s);
if (!upb_ok(s)) goto err;
return msg;
@@ -69,7 +77,6 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
}
#endif
-// TODO: read->load.
upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
upb_status *status) {
upb_stringsrc strsrc;
@@ -79,17 +86,21 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
upb_handlers *h = upb_handlers_new();
upb_descreader_reghandlers(h);
+ upb_decoderplan *p = upb_decoderplan_new(h, false);
upb_decoder d;
- upb_decoder_init(&d, h);
+ upb_decoder_init(&d);
upb_handlers_unref(h);
upb_descreader r;
upb_descreader_init(&r);
- upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &r);
+ upb_decoder_resetplan(&d, p, 0);
+ upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r);
- upb_decoder_decode(&d, status);
+ upb_success_t ret = upb_decoder_decode(&d);
+ if (status) upb_status_copy(status, upb_decoder_status(&d));
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
- if (!upb_ok(status)) {
+ upb_decoderplan_unref(p);
+ if (ret != UPB_OK) {
upb_descreader_uninit(&r);
return NULL;
}
diff --git a/upb/pb/glue.h b/upb/pb/glue.h
index 38e8d8e..ff8c85e 100644
--- a/upb/pb/glue.h
+++ b/upb/pb/glue.h
@@ -36,8 +36,8 @@ extern "C" {
// Decodes the given string, which must be in protobuf binary format, to the
// given upb_msg with msgdef "md", storing the status of the operation in "s".
-void upb_strtomsg(const char *str, size_t len, void *msg,
- const upb_msgdef *md, upb_status *s);
+bool upb_strtomsg(const char *str, size_t len, void *msg,
+ const upb_msgdef *md, bool allow_jit, upb_status *s);
// Parses the given file into a new message of the given type. Caller owns
// the returned message (or NULL if an error occurred).
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
index 19977e9..815a7a1 100644
--- a/upb/pb/varint.h
+++ b/upb/pb/varint.h
@@ -19,6 +19,18 @@
extern "C" {
#endif
+// The maximum number of bytes that it takes to encode a 64-bit varint.
+// Note that with a better encoding this could be 9 (TODO: write up a
+// wiki document about this).
+#define UPB_PB_VARINT_MAX_LEN 10
+
+/* Zig-zag encoding/decoding **************************************************/
+
+INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
/* Decoding *******************************************************************/
// All decoding functions return this struct by value.
@@ -56,7 +68,7 @@ done:
INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
uint64_t val;
uint64_t b;
- upb_decoderet r = {(void*)0, 0};
+ upb_decoderet r = {NULL, 0};
b = *(p++); val = (b & 0x7f) ; if(!(b & 0x80)) goto done;
b = *(p++); val |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
@@ -124,17 +136,33 @@ INLINE int upb_value_size(uint64_t val) {
return val == 0 ? 1 : high_bit / 8 + 1;
}
+// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
+// bytes long), returning how many bytes were used.
+//
+// TODO: benchmark and optimize if necessary.
+INLINE size_t upb_vencode64(uint64_t val, char *buf) {
+ if (val == 0) { buf[0] = 0; return 1; }
+ size_t i = 0;
+ while (val) {
+ uint8_t byte = val & 0x7f;
+ val >>= 7;
+ if (val) byte |= 0x80;
+ buf[i++] = byte;
+ }
+ return i;
+}
+
// Encodes a 32-bit varint, *not* sign-extended.
INLINE uint64_t upb_vencode32(uint32_t val) {
+ char buf[UPB_PB_VARINT_MAX_LEN];
+ size_t bytes = upb_vencode64(val, buf);
uint64_t ret = 0;
- for (int bitpos = 0; val; bitpos+=8, val >>=7) {
- if (bitpos > 0) ret |= (1 << (bitpos-1));
- ret |= (val & 0x7f) << bitpos;
- }
+ assert(bytes <= 5);
+ memcpy(&ret, buf, bytes);
+ assert(ret <= 0xffffffffff);
return ret;
}
-
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/upb/table.h b/upb/table.h
index 0786a1a..0c0a785 100644
--- a/upb/table.h
+++ b/upb/table.h
@@ -127,6 +127,8 @@ INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) {
// We have the caller specify the entry_size because fixing this as a literal
// (instead of reading table->entry_size) gives the compiler more ability to
// optimize.
+//
+// Note: All returned pointers are invalidated by inserts!
INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
size_t entry_size, size_t value_size) {
upb_inttable_value *arrval =
@@ -203,8 +205,11 @@ typedef struct {
} upb_inttable_iter;
upb_inttable_iter upb_inttable_begin(const upb_inttable *t);
-upb_inttable_iter upb_inttable_next(const upb_inttable *t, upb_inttable_iter iter);
-INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; }
+upb_inttable_iter upb_inttable_next(const upb_inttable *t,
+ upb_inttable_iter iter);
+INLINE bool upb_inttable_done(upb_inttable_iter iter) {
+ return iter.value == NULL;
+}
INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
return iter.key;
}
diff --git a/upb/upb.c b/upb/upb.c
index 5002e10..a3e07e4 100644
--- a/upb/upb.c
+++ b/upb/upb.c
@@ -15,29 +15,32 @@
#include "upb/bytestream.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(wire_type, ctype, inmemory_type) \
- {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
+#define TYPE_INFO(wire_type, ctype, inmemory_type, is_numeric) \
+ {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), \
+ #ctype, is_numeric},
const upb_type_info upb_types[] = {
- TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE) // ENDGROUP (fake)
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64) // UINT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // INT32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64) // FIXED64
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32) // FIXED32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL) // BOOL
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // STRING
- TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE) // GROUP
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE) // MESSAGE
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // BYTES
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32) // UINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32) // ENUM
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32) // SFIXED32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64) // SFIXED64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // SINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // SINT64
+ // END_GROUP is not real, but used to signify the pseudo-field that
+ // ends a group from within the group.
+ TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE, false) // ENDGROUP
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE, true) // DOUBLE
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT, true) // FLOAT
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // INT64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64, true) // UINT64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // INT32
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64, true) // FIXED64
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32, true) // FIXED32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL, true) // BOOL
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // STRING
+ TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE, false) // GROUP
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE, false) // MESSAGE
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // BYTES
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32, true) // UINT32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32, true) // ENUM
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32, true) // SFIXED32
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64, true) // SFIXED64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // SINT32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // SINT64
};
#ifdef NDEBUG
@@ -66,13 +69,13 @@ void upb_status_seterrf(upb_status *s, const char *msg, ...) {
}
void upb_status_seterrliteral(upb_status *status, const char *msg) {
- status->code = UPB_ERROR;
+ status->error = true;
status->str = msg;
status->space = NULL;
}
void upb_status_copy(upb_status *to, const upb_status *from) {
- to->status = from->status;
+ to->error = from->error;
to->eof = from->eof;
to->code = from->code;
to->space = from->space;
@@ -92,15 +95,20 @@ const char *upb_status_getstr(const upb_status *_status) {
// Function is logically const but can modify internal state to materialize
// the string.
upb_status *status = (upb_status*)_status;
- if (status->str == NULL && status->space && status->space->code_to_string) {
- status->space->code_to_string(status->code, status->buf, status->bufsize);
- status->str = status->buf;
+ if (status->str == NULL && status->space) {
+ if (status->space->code_to_string) {
+ status->space->code_to_string(status->code, status->buf, status->bufsize);
+ status->str = status->buf;
+ } else {
+ upb_status_seterrf(status, "No message, error space=%s, code=%d\n",
+ status->space->name, status->code);
+ }
}
return status->str;
}
void upb_status_clear(upb_status *status) {
- status->status = UPB_OK;
+ status->error = false;
status->eof = false;
status->code = 0;
status->space = NULL;
@@ -114,19 +122,38 @@ void upb_status_setcode(upb_status *status, upb_errorspace *space, int code) {
}
void upb_status_fromerrno(upb_status *status) {
- if (errno == 0) {
- status->status = UPB_OK;
- } else if (errno == EAGAIN || errno == EWOULDBLOCK) {
- status->status = UPB_WOULDBLOCK;
- } else {
- status->status = UPB_ERROR;
+ if (errno != 0 && !upb_errno_is_wouldblock()) {
+ status->error = true;
+ upb_status_setcode(status, &upb_posix_errorspace, errno);
+ }
+}
+
+bool upb_errno_is_wouldblock() {
+ return
+#ifdef EAGAIN
+ errno == EAGAIN ||
+#endif
+#ifdef EWOULDBLOCK
+ errno == EWOULDBLOCK ||
+#endif
+ false;
+}
+
+bool upb_posix_codetostr(int code, char *buf, size_t len) {
+ if (strerror_r(code, buf, len) == -1) {
+ if (errno == EINVAL) {
+ return snprintf(buf, len, "Invalid POSIX error number %d\n", code) >= len;
+ } else if (errno == ERANGE) {
+ return false;
+ }
+ assert(false);
}
- upb_status_setcode(status, &upb_posix_errorspace, errno);
+ return true;
}
-upb_errorspace upb_posix_errorspace = {"POSIX", NULL}; // TODO
+upb_errorspace upb_posix_errorspace = {"POSIX", &upb_posix_codetostr};
-int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args) {
// Try once without reallocating. We have to va_copy because we might have
// to call vsnprintf again.
@@ -141,7 +168,7 @@ int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
// Need to print again, because some characters were truncated. vsnprintf
// will not write the entire string unless you give it space to store the
// NULL terminator also.
- while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2);
+ *size = (ofs + true_len + 1);
char *newbuf = realloc(*buf, *size);
if (!newbuf) return -1;
vsnprintf(newbuf + ofs, true_len + 1, fmt, args);
diff --git a/upb/upb.h b/upb/upb.h
index e43418f..d11c7cb 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -10,10 +10,12 @@
#ifndef UPB_H_
#define UPB_H_
-#include <stdbool.h>
-#include <stdint.h>
#include <assert.h>
#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
#include "descriptor_const.h"
#include "atomic.h"
@@ -26,6 +28,12 @@ extern "C" {
#define INLINE static inline
#endif
+#ifdef __GNUC__
+#define UPB_NORETURN __attribute__((__noreturn__))
+#else
+#define UPB_NORETURN
+#endif
+
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
@@ -115,6 +123,7 @@ typedef struct {
uint8_t native_wire_type;
uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32
const char *ctype;
+ bool is_numeric; // Only numeric types can be packed.
} upb_type_info;
// A static array of info about all of the field types, indexed by type number.
@@ -176,6 +185,7 @@ typedef struct {
return val.val.membername; \
} \
INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
+ memset(val, 0, sizeof(*val)); \
SET_TYPE(val->type, proto_type); \
val->val.membername = cval; \
} \
@@ -206,27 +216,31 @@ extern upb_value UPB_NO_VALUE;
/* upb_status *****************************************************************/
-enum {
+typedef enum {
UPB_OK, // The operation completed successfully.
- UPB_WOULDBLOCK, // Stream is nonblocking and the operation would block.
+ UPB_SUSPENDED, // The operation was suspended and may be resumed later.
UPB_ERROR, // An error occurred.
-};
+} upb_success_t;
typedef struct {
const char *name;
// Writes a NULL-terminated string to "buf" containing an error message for
// the given error code, returning false if the message was too large to fit.
- bool (*code_to_string)(int code, char *buf, uint32_t len);
+ bool (*code_to_string)(int code, char *buf, size_t len);
} upb_errorspace;
typedef struct {
- char status;
+ bool error;
bool eof;
- int code; // Can be set to a more specific code (defined by error space).
+
+ // Specific status code defined by some error space (optional).
+ int code;
upb_errorspace *space;
+
+ // Error message (optional).
const char *str; // NULL when no message is present. NULL-terminated.
char *buf; // Owned by the status.
- uint32_t bufsize;
+ size_t bufsize;
} upb_status;
#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0}
@@ -234,7 +248,7 @@ typedef struct {
void upb_status_init(upb_status *status);
void upb_status_uninit(upb_status *status);
-INLINE bool upb_ok(const upb_status *status) { return status->code == UPB_OK; }
+INLINE bool upb_ok(const upb_status *status) { return !status->error; }
INLINE bool upb_eof(const upb_status *status) { return status->eof; }
void upb_status_clear(upb_status *status);
@@ -248,6 +262,7 @@ void upb_status_copy(upb_status *to, const upb_status *from);
extern upb_errorspace upb_posix_errorspace;
void upb_status_fromerrno(upb_status *status);
+bool upb_errno_is_wouldblock();
// Like vasprintf (which allocates a string large enough for the result), but
// uses *buf (which can be NULL) as a starting point and reallocates it only if
@@ -255,7 +270,7 @@ void upb_status_fromerrno(upb_status *status);
// of the buffer. Starts writing at the given offset into the string; bytes
// preceding this offset are unaffected. Returns the new length of the string,
// or -1 on memory allocation failure.
-int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args);
#ifdef __cplusplus
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback