summaryrefslogtreecommitdiff
path: root/upb/pb
diff options
context:
space:
mode:
Diffstat (limited to 'upb/pb')
-rw-r--r--upb/pb/decoder.c914
-rw-r--r--upb/pb/decoder.h207
-rw-r--r--upb/pb/decoder_x64.dasc429
-rw-r--r--upb/pb/glue.c46
-rw-r--r--upb/pb/textprinter.c121
-rw-r--r--upb/pb/textprinter.h4
-rw-r--r--upb/pb/varint.h24
7 files changed, 975 insertions, 770 deletions
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 065c495..2bfc717 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -6,12 +6,101 @@
*/
#include <inttypes.h>
+#include <setjmp.h>
#include <stddef.h>
#include <stdlib.h>
#include "upb/bytestream.h"
#include "upb/pb/decoder.h"
#include "upb/pb/varint.h"
+#define UPB_NONDELIMITED (0xffffffffffffffffULL)
+
+/* upb_pbdecoder ****************************************************************/
+
+struct dasm_State;
+
+typedef struct {
+ const upb_fielddef *f;
+ uint64_t end_ofs;
+ uint32_t group_fieldnum; // UINT32_MAX for non-groups.
+ bool is_sequence; // frame represents seq or submsg/str? (f might be both).
+ bool is_packed; // true for packed primitive sequences.
+} frame;
+
+struct upb_pbdecoder {
+ // Where we push parsed data (not owned).
+ upb_sink *sink;
+
+ // Current input buffer and its stream offset.
+ const char *buf, *ptr, *end, *checkpoint;
+ uint64_t bufstart_ofs;
+
+ // Buffer for residual bytes not parsed from the previous buffer.
+ char residual[16];
+ char *residual_end;
+
+ // Stores the user buffer passed to our decode function.
+ const char *buf_param;
+ size_t size_param;
+
+ // Equal to size_param while we are in the residual buf, 0 otherwise.
+ size_t userbuf_remaining;
+
+ // Used to temporarily store the return value before calling longjmp().
+ size_t ret;
+
+ // End of the delimited region, relative to ptr, or NULL if not in this buf.
+ const char *delim_end;
+
+#ifdef UPB_USE_JIT_X64
+ // For JIT, which doesn't do bounds checks in the middle of parsing a field.
+ const char *jit_end, *effective_end; // == MIN(jit_end, delim_end)
+
+ // Used momentarily by the generated code to store a value while a user
+ // function is called.
+ uint32_t tmp_len;
+
+ const void *saved_rbp;
+#endif
+
+ // Our internal stack.
+ frame *top, *limit;
+ frame stack[UPB_MAX_NESTING];
+
+ // For exiting the decoder on error.
+ jmp_buf exitjmp;
+};
+
+typedef struct {
+ // The top-level handlers that this plan calls into. We own a ref.
+ const upb_handlers *dest_handlers;
+
+#ifdef UPB_USE_JIT_X64
+ // JIT-generated machine code (else NULL).
+ char *jit_code;
+ size_t jit_size;
+ char *debug_info;
+
+ // For storing upb_jitmsginfo, which contains per-msg runtime data needed
+ // by the JIT.
+ // Maps upb_handlers* -> upb_jitmsginfo.
+ upb_inttable msginfo;
+
+ // The following members are used only while the JIT is being built.
+
+ // This pointer is allocated by dasm_init() and freed by dasm_free().
+ struct dasm_State *dynasm;
+
+ // For storing pclabel bases while we are building the JIT.
+ // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base
+ upb_inttable pclabels;
+
+ // This is not the same as len(pclabels) because the table only contains base
+ // offsets for each def, but each def can have many pclabels.
+ uint32_t pclabel_count;
+#endif
+} decoderplan;
+
typedef struct {
uint8_t native_wire_type;
bool is_numeric;
@@ -39,12 +128,21 @@ static const upb_decoder_typeinfo upb_decoder_types[] = {
{UPB_WIRE_TYPE_VARINT, true}, // SINT64
};
-/* upb_decoderplan ************************************************************/
+static upb_selector_t getselector(const upb_fielddef *f,
+ upb_handlertype_t type) {
+ upb_selector_t selector;
+ bool ok = upb_getselector(f, type, &selector);
+ UPB_ASSERT_VAR(ok, ok);
+ return selector;
+}
+
+
+/* decoderplan ****************************************************************/
#ifdef UPB_USE_JIT_X64
// These defines are necessary for DynASM codegen.
// See dynasm/dasm_proto.h for more info.
-#define Dst_DECL upb_decoderplan *plan
+#define Dst_DECL decoderplan *plan
#define Dst_REF (plan->dynasm)
#define Dst (plan)
@@ -58,39 +156,49 @@ static const upb_decoder_typeinfo upb_decoder_types[] = {
#include "upb/pb/decoder_x64.h"
#endif
-upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit) {
- UPB_UNUSED(allowjit);
- upb_decoderplan *p = malloc(sizeof(*p));
- assert(upb_handlers_isfrozen(h));
- p->handlers = h;
- upb_handlers_ref(h, p);
-#ifdef UPB_USE_JIT_X64
- p->jit_code = NULL;
- if (allowjit) upb_decoderplan_makejit(p);
-#endif
- return p;
-}
-
-void upb_decoderplan_unref(upb_decoderplan *p) {
- // TODO: make truly refcounted.
- upb_handlers_unref(p->handlers, p);
+void freeplan(void *_p) {
+ decoderplan *p = _p;
+ upb_handlers_unref(p->dest_handlers, p);
#ifdef UPB_USE_JIT_X64
if (p->jit_code) upb_decoderplan_freejit(p);
#endif
free(p);
}
-bool upb_decoderplan_hasjitcode(upb_decoderplan *p) {
+static decoderplan *getdecoderplan(const upb_handlers *h) {
+ if (upb_handlers_frametype(h) != upb_pbdecoder_getframetype())
+ return NULL;
+ upb_selector_t sel;
+ if (!upb_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STRING, &sel))
+ return NULL;
+ return upb_handlers_gethandlerdata(h, sel);
+}
+
+bool upb_pbdecoder_isdecoder(const upb_handlers *h) {
+ return getdecoderplan(h) != NULL;
+}
+
+bool upb_pbdecoder_hasjitcode(const upb_handlers *h) {
#ifdef UPB_USE_JIT_X64
+ decoderplan *p = getdecoderplan(h);
+ if (!p) return false;
return p->jit_code != NULL;
#else
- (void)p;
+ UPB_UNUSED(h);
return false;
#endif
}
+const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) {
+ decoderplan *p = getdecoderplan(h);
+ if (!p) return NULL;
+ return p->dest_handlers;
+}
+
+
+/* upb_pbdecoder ****************************************************************/
-/* upb_decoder ****************************************************************/
+static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
@@ -100,98 +208,73 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p) {
#define FORCEINLINE static inline __attribute__((always_inline))
#define NOINLINE static __attribute__((noinline))
-UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) {
- // Resumable decoder would back out to completed_ptr (and possibly get a
- // previous buffer).
- _longjmp(d->exitjmp, 1);
+static upb_status *decoder_status(upb_pbdecoder *d) {
+ // TODO(haberman): encapsulate this access to pipeline->status, but not sure
+ // exactly what that interface should look like.
+ return &d->sink->pipeline_->status_;
}
-UPB_NORETURN static void upb_decoder_exitjmp2(void *d) {
- upb_decoder_exitjmp(d);
+
+UPB_NORETURN static void exitjmp(upb_pbdecoder *d) {
+ _longjmp(d->exitjmp, 1);
}
-UPB_NORETURN static void upb_decoder_abortjmp(upb_decoder *d, const char *msg) {
- upb_status_seterrliteral(&d->status, msg);
- upb_decoder_exitjmp(d);
+
+UPB_NORETURN static void abortjmp(upb_pbdecoder *d, const char *msg) {
+ d->ret = in_residual_buf(d, d->checkpoint) ? 0 : (d->checkpoint - d->buf);
+ upb_status_seterrliteral(decoder_status(d), msg);
+ exitjmp(d);
}
/* Buffering ******************************************************************/
-// We operate on one buffer at a time, which may be a subset of the currently
-// loaded byteregion data. When data for the buffer is completely gone we pull
-// the next one. When we've committed our progress we discard any previous
-// buffers' regions.
+// We operate on one buffer at a time, which is either the user's buffer passed
+// to our "decode" callback or some residual bytes from the previous buffer.
-static size_t upb_decoder_bufleft(upb_decoder *d) {
+// How many bytes can be safely read from d->ptr.
+static size_t bufleft(upb_pbdecoder *d) {
assert(d->end >= d->ptr);
return d->end - d->ptr;
}
-static void upb_decoder_advance(upb_decoder *d, size_t len) {
- assert(upb_decoder_bufleft(d) >= len);
+// Overall offset of d->ptr.
+uint64_t offset(const upb_pbdecoder *d) {
+ return d->bufstart_ofs + (d->ptr - d->buf);
+}
+
+// Advances d->ptr.
+static void advance(upb_pbdecoder *d, size_t len) {
+ assert(bufleft(d) >= len);
d->ptr += len;
}
-uint64_t upb_decoder_offset(upb_decoder *d) {
- return d->bufstart_ofs + (d->ptr - d->buf);
+// Commits d->ptr progress; should be called when an entire atomic value
+// (ie tag+value) has been successfully consumed.
+static void checkpoint(upb_pbdecoder *d) {
+ d->checkpoint = d->ptr;
}
-uint64_t upb_decoder_bufendofs(upb_decoder *d) {
- return d->bufstart_ofs + (d->end - d->buf);
+static bool in_buf(const char *p, const char *buf, const char *end) {
+ return p >= buf && p <= end;
}
-static bool upb_decoder_islegalend(upb_decoder *d) {
- if (d->top == d->stack) return true;
- if (d->top - 1 == d->stack &&
- d->top->is_sequence && !d->top->is_packed) return true;
- return false;
+static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
+ return in_buf(p, d->residual, d->residual_end);
}
-// Calculates derived values that we cache for speed. These reflect a
-// combination of the current buffer and the stack, so must be called whenever
-// either is updated.
-static void upb_decoder_setmsgend(upb_decoder *d) {
- upb_decoder_frame *f = d->top;
+// Calculates the delim_end value, which represents a combination of the
+// current buffer and the stack, so must be called whenever either is updated.
+static void set_delim_end(upb_pbdecoder *d) {
+ frame *f = d->top;
size_t delimlen = f->end_ofs - d->bufstart_ofs;
size_t buflen = d->end - d->buf;
d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
d->buf + delimlen : NULL; // NULL if not in this buf.
- d->top_is_packed = f->is_packed;
-}
-
-static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) {
- assert(ofs >= upb_decoder_offset(d));
- if (ofs > upb_byteregion_endofs(d->input))
- upb_decoder_abortjmp(d, "Unexpected EOF");
- d->buf = NULL;
- d->ptr = NULL;
- d->end = NULL;
- d->delim_end = NULL;
-#ifdef UPB_USE_JIT_X64
- d->jit_end = NULL;
-#endif
- d->bufstart_ofs = ofs;
}
-static bool upb_trypullbuf(upb_decoder *d) {
- assert(upb_decoder_bufleft(d) == 0);
- upb_decoder_skiptonewbuf(d, upb_decoder_offset(d));
- if (upb_byteregion_available(d->input, d->bufstart_ofs) == 0) {
- switch (upb_byteregion_fetch(d->input)) {
- case UPB_BYTE_OK:
- assert(upb_byteregion_available(d->input, d->bufstart_ofs) > 0);
- break;
- case UPB_BYTE_EOF: return false;
- case UPB_BYTE_ERROR: upb_decoder_abortjmp(d, "I/O error in input");
- // Decoder resuming is not yet supported.
- case UPB_BYTE_WOULDBLOCK:
- upb_decoder_abortjmp(d, "Input returned WOULDBLOCK");
- }
- }
- size_t len;
- d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len);
- assert(len > 0);
- d->ptr = d->buf;
- d->end = d->buf + len;
- upb_decoder_setmsgend(d);
+static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
+ d->ptr = buf;
+ d->buf = buf;
+ d->end = end;
+ set_delim_end(d);
#ifdef UPB_USE_JIT_X64
// If we start parsing a value, we can parse up to 20 bytes without
// having to bounds-check anything (2 10-byte varints). Since the
@@ -199,172 +282,232 @@ static bool upb_trypullbuf(upb_decoder *d) {
// JIT bails if there are not 20 bytes available.
d->jit_end = d->end - 20;
#endif
- assert(upb_decoder_bufleft(d) > 0);
- return true;
}
-static void upb_pullbuf(upb_decoder *d) {
- if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF");
+static void suspendjmp(upb_pbdecoder *d) {
+ switchtobuf(d, d->residual, d->residual_end);
+ exitjmp(d);
+}
+
+static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
+ assert(len >= 0);
+ assert(d->ptr == d->end);
+ d->bufstart_ofs += (d->ptr - d->buf);
+ switchtobuf(d, buf, buf + len);
+}
+
+static void skip(upb_pbdecoder *d, size_t bytes) {
+ size_t avail = bufleft(d);
+ size_t total_avail = avail + d->userbuf_remaining;
+ if (avail >= bytes) {
+ // Skipped data is all in current buffer.
+ advance(d, bytes);
+ } else if (total_avail >= bytes) {
+ // Skipped data is all in residual buf and param buffer.
+ assert(in_residual_buf(d, d->ptr));
+ advance(d, avail);
+ advancetobuf(d, d->buf_param, d->size_param);
+ d->userbuf_remaining = 0;
+ advance(d, bytes - avail);
+ } else {
+ // Skipped data extends beyond currently available buffers.
+ // TODO: we need to do a checkdelim() equivalent that pops any frames that
+ // we just skipped past.
+ d->bufstart_ofs = offset(d) + bytes;
+ d->residual_end = d->residual;
+ d->ret += bytes - total_avail;
+ suspendjmp(d);
+ }
+}
+
+static void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+ assert(bytes <= bufleft(d));
+ memcpy(buf, d->ptr, bytes);
+ advance(d, bytes);
}
-static void upb_decoder_checkpoint(upb_decoder *d) {
- upb_byteregion_discard(d->input, upb_decoder_offset(d));
+NOINLINE void getbytes_slow(upb_pbdecoder *d, void *buf, size_t bytes) {
+ const size_t avail = bufleft(d);
+ if (avail + d->userbuf_remaining >= bytes) {
+ // Remaining residual buffer and param buffer together can satisfy.
+ // (We are only called from getbytes() which has already verified that
+ // the current buffer alone cannot satisfy).
+ assert(in_residual_buf(d, d->ptr));
+ consumebytes(d, buf, avail);
+ advancetobuf(d, d->buf_param, d->size_param);
+ consumebytes(d, buf + avail, bytes - avail);
+ d->userbuf_remaining = 0;
+ } else {
+ // There is not enough remaining data, save residual bytes (if any)
+ // starting at the last committed checkpoint and exit.
+ if (in_buf(d->checkpoint, d->buf_param, d->buf_param + d->size_param)) {
+ // Checkpoint was in user buf; old residual bytes not needed.
+ d->ptr = d->checkpoint;
+ size_t save = bufleft(d);
+ assert(save <= sizeof(d->residual));
+ memcpy(d->residual, d->ptr, save);
+ d->residual_end = d->residual + save;
+ d->bufstart_ofs = offset(d);
+ } else {
+ // Checkpoint was in residual buf; append user byte(s) to residual buf.
+ assert(d->checkpoint == d->residual);
+ assert((d->residual_end - d->residual) + d->size_param <=
+ sizeof(d->residual));
+ if (!in_residual_buf(d, d->ptr)) {
+ d->bufstart_ofs -= (d->residual_end - d->residual);
+ }
+ memcpy(d->residual_end, d->buf_param, d->size_param);
+ d->residual_end += d->size_param;
+ }
+ suspendjmp(d);
+ }
}
-static void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) {
- if (ofs <= upb_decoder_bufendofs(d)) {
- upb_decoder_advance(d, ofs - upb_decoder_offset(d));
+FORCEINLINE void getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
+ if (bufleft(d) >= bytes) {
+ // Buffer has enough data to satisfy.
+ consumebytes(d, buf, bytes);
} else {
- upb_decoder_skiptonewbuf(d, ofs);
+ getbytes_slow(d, buf, bytes);
}
- upb_decoder_checkpoint(d);
}
-static void upb_decoder_discard(upb_decoder *d, size_t bytes) {
- upb_decoder_discardto(d, upb_decoder_offset(d) + bytes);
+FORCEINLINE uint8_t getbyte(upb_pbdecoder *d) {
+ uint8_t byte;
+ getbytes(d, &byte, 1);
+ return byte;
}
/* Decoding of wire types *****************************************************/
-NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
+NOINLINE uint64_t decode_varint_slow(upb_pbdecoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
- if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
- u64 |= ((uint64_t)(byte = *d->ptr) & 0x7F) << bitpos;
- upb_decoder_advance(d, 1);
+ u64 |= ((uint64_t)((byte = getbyte(d)) & 0x7F)) << bitpos;
}
if(bitpos == 70 && (byte & 0x80))
- upb_decoder_abortjmp(d, "Unterminated varint");
+ abortjmp(d, "Unterminated varint");
return u64;
}
+NOINLINE uint32_t decode_v32_slow(upb_pbdecoder *d) {
+ uint64_t u64 = decode_varint_slow(d);
+ if (u64 > UINT32_MAX) abortjmp(d, "Unterminated 32-bit varint");
+ return (uint32_t)u64;
+}
+
// For tags and delimited lengths, which must be <=32bit and are usually small.
-FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
- const char *p = d->ptr;
- uint32_t ret;
- uint64_t u64;
+FORCEINLINE uint32_t decode_v32(upb_pbdecoder *d) {
// Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
- if (upb_decoder_bufleft(d) < 2) goto slow; // unlikely.
- ret = *p & 0x7f;
- if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order
- ret |= (*p & 0x7f) << 7;
- if ((*(p++) & 0x80) == 0) goto done; // likely
-slow:
- u64 = upb_decode_varint_slow(d);
- if (u64 > UINT32_MAX) upb_decoder_abortjmp(d, "Unterminated 32-bit varint");
- ret = (uint32_t)u64;
- p = d->ptr; // Turn the next line into a nop.
-done:
- upb_decoder_advance(d, p - d->ptr);
- return ret;
-}
-
-// Returns true on success or false if we've hit a valid EOF.
-FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
- if (upb_decoder_bufleft(d) == 0 &&
- upb_decoder_islegalend(d) &&
- !upb_trypullbuf(d)) {
- return false;
+ if (bufleft(d) >= 2) {
+ uint32_t ret = d->ptr[0] & 0x7f;
+ if ((d->ptr[0] & 0x80) == 0) {
+ advance(d, 1);
+ return ret;
+ }
+ ret |= (d->ptr[1] & 0x7f) << 7;
+ if ((d->ptr[1] & 0x80) == 0) {
+ advance(d, 2);
+ return ret;
+ }
}
- *val = upb_decode_varint32(d);
- return true;
+ return decode_v32_slow(d);
}
-FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
- if (upb_decoder_bufleft(d) >= 10) {
+FORCEINLINE uint64_t decode_varint(upb_pbdecoder *d) {
+ if (bufleft(d) >= 10) {
// Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
- if (r.p == NULL) upb_decoder_abortjmp(d, "Unterminated varint");
- upb_decoder_advance(d, r.p - d->ptr);
+ if (r.p == NULL) abortjmp(d, "Unterminated varint");
+ advance(d, r.p - d->ptr);
return r.val;
- } else if (upb_decoder_bufleft(d) > 0) {
- // Intermediate case -- worth it?
- char tmpbuf[10];
- memset(tmpbuf, 0x80, 10);
- memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
- upb_decoderet r = upb_vdecode_fast(tmpbuf);
- if (r.p != NULL) {
- upb_decoder_advance(d, r.p - tmpbuf);
- return r.val;
- }
- }
- // Slow case -- varint spans buffer seam.
- return upb_decode_varint_slow(d);
-}
-
-FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
- if (upb_decoder_bufleft(d) >= bytes) {
- // Fast case.
- memcpy(buf, d->ptr, bytes);
- upb_decoder_advance(d, bytes);
} else {
- // Slow case.
- size_t read = 0;
- while (1) {
- size_t avail = UPB_MIN(upb_decoder_bufleft(d), bytes - read);
- memcpy(buf + read, d->ptr, avail);
- upb_decoder_advance(d, avail);
- read += avail;
- if (read == bytes) break;
- upb_pullbuf(d);
- }
+ // Slow case -- varint spans buffer seam.
+ return decode_varint_slow(d);
}
}
-FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
+FORCEINLINE uint32_t decode_fixed32(upb_pbdecoder *d) {
uint32_t u32;
- upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
+ getbytes(d, &u32, 4);
return u32; // TODO: proper byte swapping for big-endian machines.
}
-FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
+
+FORCEINLINE uint64_t decode_fixed64(upb_pbdecoder *d) {
uint64_t u64;
- upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
+ getbytes(d, &u64, 8);
return u64; // TODO: proper byte swapping for big-endian machines.
}
-INLINE void upb_push_msg(upb_decoder *d, const upb_fielddef *f, uint64_t end) {
- upb_decoder_frame *fr = d->top + 1;
- if (!upb_sink_startsubmsg(&d->sink, f) || fr > d->limit) {
- upb_decoder_abortjmp(d, "Nesting too deep.");
- }
+static void push(upb_pbdecoder *d, const upb_fielddef *f, bool is_sequence,
+ bool is_packed, int32_t group_fieldnum, uint64_t end) {
+ frame *fr = d->top + 1;
+ if (fr >= d->limit) abortjmp(d, "Nesting too deep.");
fr->f = f;
- fr->is_sequence = false;
- fr->is_packed = false;
+ fr->is_sequence = is_sequence;
+ fr->is_packed = is_packed;
fr->end_ofs = end;
- fr->group_fieldnum = end == UPB_NONDELIMITED ?
- (int32_t)upb_fielddef_number(f) : -1;
+ fr->group_fieldnum = group_fieldnum;
d->top = fr;
- upb_decoder_setmsgend(d);
+ set_delim_end(d);
}
-INLINE void upb_push_seq(upb_decoder *d, const upb_fielddef *f, bool packed,
- uint64_t end_ofs) {
- upb_decoder_frame *fr = d->top + 1;
- if (!upb_sink_startseq(&d->sink, f) || fr > d->limit) {
- upb_decoder_abortjmp(d, "Nesting too deep.");
- }
- fr->f = f;
- fr->is_sequence = true;
- fr->group_fieldnum = -1;
- fr->is_packed = packed;
- fr->end_ofs = end_ofs;
- d->top = fr;
- upb_decoder_setmsgend(d);
+static void push_msg(upb_pbdecoder *d, const upb_fielddef *f, uint64_t end) {
+ if (!upb_sink_startsubmsg(d->sink, getselector(f, UPB_HANDLER_STARTSUBMSG)))
+ abortjmp(d, "startsubmsg failed.");
+ int32_t group_fieldnum = (end == UPB_NONDELIMITED) ?
+ (int32_t)upb_fielddef_number(f) : -1;
+ push(d, f, false, false, group_fieldnum, end);
+}
+
+static void push_seq(upb_pbdecoder *d, const upb_fielddef *f, bool packed,
+ uint64_t end_ofs) {
+ if (!upb_sink_startseq(d->sink, getselector(f, UPB_HANDLER_STARTSEQ)))
+ abortjmp(d, "startseq failed.");
+ push(d, f, true, packed, -1, end_ofs);
+}
+
+static void push_str(upb_pbdecoder *d, const upb_fielddef *f, size_t len,
+ uint64_t end) {
+ if (!upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), len))
+ abortjmp(d, "startseq failed.");
+ push(d, f, false, false, -1, end);
}
-INLINE void upb_pop_submsg(upb_decoder *d) {
- upb_sink_endsubmsg(&d->sink, d->top->f);
+static void pop_submsg(upb_pbdecoder *d) {
+ upb_sink_endsubmsg(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSUBMSG));
d->top--;
- upb_decoder_setmsgend(d);
+ set_delim_end(d);
}
-INLINE void upb_pop_seq(upb_decoder *d) {
- upb_sink_endseq(&d->sink, d->top->f);
+static void pop_seq(upb_pbdecoder *d) {
+ upb_sink_endseq(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSEQ));
d->top--;
- upb_decoder_setmsgend(d);
+ set_delim_end(d);
+}
+
+static void pop_string(upb_pbdecoder *d) {
+ upb_sink_endstr(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSTR));
+ d->top--;
+ set_delim_end(d);
+}
+
+static void checkdelim(upb_pbdecoder *d) {
+ while (d->delim_end && d->ptr >= d->delim_end) {
+ // TODO(haberman): not sure what to do about this; if we detect this error
+ // we can possibly violate the promise that errors are always signaled by a
+ // short "parsed byte" count (because all bytes might have been successfully
+ // parsed prior to detecting this error).
+ // if (d->ptr > d->delim_end) abortjmp(d, "Bad submessage end");
+ if (d->top->is_sequence) {
+ pop_seq(d);
+ } else {
+ pop_submsg(d);
+ }
+ }
}
@@ -374,95 +517,79 @@ INLINE void upb_pop_seq(upb_decoder *d) {
// properly sign-extended. We could detect this and error about the data loss,
// but proto2 does not do this, so we pass.
-#define T(type, wt, name, convfunc) \
- INLINE void upb_decode_ ## type(upb_decoder *d, const upb_fielddef *f) { \
- upb_sink_put ## name(&d->sink, f, (convfunc)(upb_decode_ ## wt(d))); \
+#define T(type, sel, wt, name, convfunc) \
+ static void decode_ ## type(upb_pbdecoder *d, const upb_fielddef *f) { \
+ upb_sink_put ## name(d->sink, getselector(f, UPB_HANDLER_ ## sel), \
+ (convfunc)(decode_ ## wt(d))); \
} \
static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
-T(INT32, varint, int32, int32_t)
-T(INT64, varint, int64, int64_t)
-T(UINT32, varint, uint32, uint32_t)
-T(UINT64, varint, uint64, uint64_t)
-T(FIXED32, fixed32, uint32, uint32_t)
-T(FIXED64, fixed64, uint64, uint64_t)
-T(SFIXED32, fixed32, int32, int32_t)
-T(SFIXED64, fixed64, int64, int64_t)
-T(BOOL, varint, bool, bool)
-T(ENUM, varint, int32, int32_t)
-T(DOUBLE, fixed64, double, upb_asdouble)
-T(FLOAT, fixed32, float, upb_asfloat)
-T(SINT32, varint, int32, upb_zzdec_32)
-T(SINT64, varint, int64, upb_zzdec_64)
+T(INT32, INT32, varint, int32, int32_t)
+T(INT64, INT64, varint, int64, int64_t)
+T(UINT32, UINT32, varint, uint32, uint32_t)
+T(UINT64, UINT64, varint, uint64, uint64_t)
+T(FIXED32, UINT32, fixed32, uint32, uint32_t)
+T(FIXED64, UINT64, fixed64, uint64, uint64_t)
+T(SFIXED32, INT32, fixed32, int32, int32_t)
+T(SFIXED64, INT64, fixed64, int64, int64_t)
+T(BOOL, BOOL, varint, bool, bool)
+T(ENUM, INT32, varint, int32, int32_t)
+T(DOUBLE, DOUBLE, fixed64, double, upb_asdouble)
+T(FLOAT, FLOAT, fixed32, float, upb_asfloat)
+T(SINT32, INT32, varint, int32, upb_zzdec_32)
+T(SINT64, INT64, varint, int64, upb_zzdec_64)
#undef T
-static void upb_decode_GROUP(upb_decoder *d, const upb_fielddef *f) {
- upb_push_msg(d, f, UPB_NONDELIMITED);
-}
-
-static void upb_decode_MESSAGE(upb_decoder *d, const upb_fielddef *f) {
- uint32_t len = upb_decode_varint32(d);
- upb_push_msg(d, f, upb_decoder_offset(d) + len);
-}
-
-static void upb_decode_STRING(upb_decoder *d, const upb_fielddef *f) {
- uint32_t strlen = upb_decode_varint32(d);
- uint64_t offset = upb_decoder_offset(d);
- uint64_t end = offset + strlen;
- if (end > upb_byteregion_endofs(d->input))
- upb_decoder_abortjmp(d, "Unexpected EOF");
- upb_sink_startstr(&d->sink, f, strlen);
- while (strlen > 0) {
- if (upb_byteregion_available(d->input, offset) == 0)
- upb_pullbuf(d);
- size_t len;
- const char *ptr = upb_byteregion_getptr(d->input, offset, &len);
- len = UPB_MIN(len, strlen);
- len = upb_sink_putstring(&d->sink, f, ptr, len);
- if (len > strlen)
- upb_decoder_abortjmp(d, "Skipped too many bytes.");
- offset += len;
- strlen -= len;
- upb_decoder_discardto(d, offset);
- }
- upb_sink_endstr(&d->sink, f);
+static void decode_GROUP(upb_pbdecoder *d, const upb_fielddef *f) {
+ push_msg(d, f, UPB_NONDELIMITED);
}
+static void decode_MESSAGE(upb_pbdecoder *d, const upb_fielddef *f) {
+ uint32_t len = decode_v32(d);
+ push_msg(d, f, offset(d) + len);
+}
-/* The main decoding loop *****************************************************/
-
-static void upb_decoder_checkdelim(upb_decoder *d) {
- // TODO: This doesn't work for the case that no buffer is currently loaded
- // (ie. d->buf == NULL) because delim_end is NULL even if we are at
- // end-of-delim. Need to add a test that exercises this by putting a buffer
- // seam in the middle of the final delimited value in a proto that we skip
- // for some reason (like because it's unknown and we have no unknown field
- // handler).
- while (d->delim_end != NULL && d->ptr >= d->delim_end) {
- if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end");
- if (d->top->is_sequence) {
- upb_pop_seq(d);
- } else {
- upb_pop_submsg(d);
+static void decode_STRING(upb_pbdecoder *d, const upb_fielddef *f) {
+ uint32_t strlen = decode_v32(d);
+ if (strlen <= bufleft(d)) {
+ upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), strlen);
+ if (strlen)
+ upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING),
+ d->ptr, strlen);
+ upb_sink_endstr(d->sink, getselector(f, UPB_HANDLER_ENDSTR));
+ advance(d, strlen);
+ } else {
+ // Buffer ends in the middle of the string; need to push a decoder frame
+ // for it.
+ push_str(d, f, strlen, offset(d) + strlen);
+ if (bufleft(d)) {
+ upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING),
+ d->ptr, bufleft(d));
+ advance(d, bufleft(d));
}
+ d->bufstart_ofs = offset(d);
+ d->residual_end = d->residual;
+ suspendjmp(d);
}
}
-INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) {
+
+/* The main decoding loop *****************************************************/
+
+static const upb_fielddef *decode_tag(upb_pbdecoder *d) {
while (1) {
- uint32_t tag;
- if (!upb_trydecode_varint32(d, &tag)) return NULL;
+ uint32_t tag = decode_v32(d);
uint8_t wire_type = tag & 0x7;
uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL;
- const upb_handlers *h = upb_sink_tophandlers(&d->sink);
+ const upb_handlers *h = upb_sinkframe_handlers(upb_sink_top(d->sink));
f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum);
bool packed = false;
if (f) {
// Wire type check.
- upb_fieldtype_t type = upb_fielddef_type(f);
+ upb_descriptortype_t type = upb_fielddef_descriptortype(f);
if (wire_type == upb_decoder_types[type].native_wire_type) {
// Wire type is ok.
} else if ((wire_type == UPB_WIRE_TYPE_DELIMITED &&
@@ -477,18 +604,19 @@ INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) {
// There are no explicit "startseq" or "endseq" markers in protobuf
// streams, so we have to infer them by noticing when a repeated field
// starts or ends.
- upb_decoder_frame *fr = d->top;
+ frame *fr = d->top;
if (fr->is_sequence && fr->f != f) {
- upb_pop_seq(d);
+ pop_seq(d);
fr = d->top;
}
if (f && upb_fielddef_isseq(f) && !fr->is_sequence) {
if (packed) {
- uint32_t len = upb_decode_varint32(d);
- upb_push_seq(d, f, true, upb_decoder_offset(d) + len);
+ uint32_t len = decode_v32(d);
+ push_seq(d, f, true, offset(d) + len);
+ checkpoint(d);
} else {
- upb_push_seq(d, f, false, fr->end_ofs);
+ push_seq(d, f, false, fr->end_ofs);
}
}
@@ -496,118 +624,202 @@ INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) {
// Unknown field or ENDGROUP.
if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER)
- upb_decoder_abortjmp(d, "Invalid field number");
+ abortjmp(d, "Invalid field number");
switch (wire_type) {
- case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
- case UPB_WIRE_TYPE_32BIT: upb_decoder_discard(d, 4); break;
- case UPB_WIRE_TYPE_64BIT: upb_decoder_discard(d, 8); break;
- case UPB_WIRE_TYPE_DELIMITED:
- upb_decoder_discard(d, upb_decode_varint32(d)); break;
+ case UPB_WIRE_TYPE_VARINT: decode_varint(d); break;
+ case UPB_WIRE_TYPE_32BIT: skip(d, 4); break;
+ case UPB_WIRE_TYPE_64BIT: skip(d, 8); break;
+ case UPB_WIRE_TYPE_DELIMITED: skip(d, decode_v32(d)); break;
case UPB_WIRE_TYPE_START_GROUP:
- upb_decoder_abortjmp(d, "Can't handle unknown groups yet");
+ abortjmp(d, "Can't handle unknown groups yet");
case UPB_WIRE_TYPE_END_GROUP:
if (fieldnum != fr->group_fieldnum)
- upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag");
- upb_sink_endsubmsg(&d->sink, fr->f);
- d->top--;
- upb_decoder_setmsgend(d);
+ abortjmp(d, "Unmatched ENDGROUP tag");
+ pop_submsg(d);
break;
default:
- upb_decoder_abortjmp(d, "Invalid wire type");
+ abortjmp(d, "Invalid wire type");
}
// TODO: deliver to unknown field callback.
- upb_decoder_checkpoint(d);
- upb_decoder_checkdelim(d);
+ checkpoint(d);
+ checkdelim(d);
}
}
-upb_success_t upb_decoder_decode(upb_decoder *d) {
- assert(d->input);
+void *start(const upb_sinkframe *fr, size_t size_hint) {
+ UPB_UNUSED(size_hint);
+ upb_pbdecoder *d = upb_sinkframe_userdata(fr);
+ assert(d);
+ assert(d->sink);
+ upb_sink_startmsg(d->sink);
+ return d;
+}
+
+bool end(const upb_sinkframe *fr) {
+ upb_pbdecoder *d = upb_sinkframe_userdata(fr);
+
+ if (d->residual_end > d->residual) {
+ // We have preserved bytes.
+ upb_status_seterrliteral(decoder_status(d), "Unexpected EOF");
+ return false;
+ }
+
+ // We may need to dispatch a top-level implicit frame.
+ if (d->top == d->stack + 1 &&
+ d->top->is_sequence &&
+ !d->top->is_packed) {
+ assert(upb_sinkframe_depth(upb_sink_top(d->sink)) == 1);
+ pop_seq(d);
+ }
+ if (d->top != d->stack) {
+ upb_status_seterrliteral(
+ decoder_status(d), "Ended inside delimited field.");
+ return false;
+ }
+ upb_sink_endmsg(d->sink);
+ return true;
+}
+
+size_t decode(const upb_sinkframe *fr, const char *buf, size_t size) {
+ upb_pbdecoder *d = upb_sinkframe_userdata(fr);
+ decoderplan *plan = upb_sinkframe_handlerdata(fr);
+ UPB_UNUSED(plan);
+ assert(upb_sinkframe_handlers(upb_sink_top(d->sink)) == plan->dest_handlers);
+
+ if (size == 0) return 0;
+ // Assume we'll consume the whole buffer unless this is overwritten.
+ d->ret = size;
+
if (_setjmp(d->exitjmp)) {
- assert(!upb_ok(&d->status));
- return UPB_ERROR;
+ // Hit end-of-buffer or error.
+ return d->ret;
+ }
+
+ d->buf_param = buf;
+ d->size_param = size;
+ if (d->residual_end > d->residual) {
+ // We have residual bytes from the last buffer.
+ d->userbuf_remaining = size;
+ } else {
+ d->userbuf_remaining = 0;
+ advancetobuf(d, buf, size);
+
+ if (d->top != d->stack &&
+ upb_fielddef_isstring(d->top->f) &&
+ !d->top->is_sequence) {
+ // Last buffer ended in the middle of a string; deliver more of it.
+ size_t len = d->top->end_ofs - offset(d);
+ if (size >= len) {
+ upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING),
+ d->ptr, len);
+ advance(d, len);
+ pop_string(d);
+ } else {
+ upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING),
+ d->ptr, size);
+ advance(d, size);
+ d->residual_end = d->residual;
+ advancetobuf(d, d->residual, 0);
+ return size;
+ }
+ }
}
- upb_sink_startmsg(&d->sink);
- // Prime the buf so we can hit the JIT immediately.
- upb_trypullbuf(d);
+ checkpoint(d);
+
const upb_fielddef *f = d->top->f;
while(1) {
#ifdef UPB_USE_JIT_X64
- upb_decoder_enterjit(d);
- upb_decoder_checkpoint(d);
- upb_decoder_setmsgend(d);
+ upb_decoder_enterjit(d, plan);
+ checkpoint(d);
+ set_delim_end(d); // JIT doesn't keep this current.
#endif
- upb_decoder_checkdelim(d);
- if (!d->top_is_packed) f = upb_decode_tag(d);
- if (!f) {
- // Sucessful EOF. We may need to dispatch a top-level implicit frame.
- if (d->top->is_sequence) {
- assert(d->sink.top == d->sink.stack + 1);
- upb_pop_seq(d);
- }
- assert(d->top == d->stack);
- upb_sink_endmsg(&d->sink, &d->status);
- return UPB_OK;
+ checkdelim(d);
+ if (!d->top->is_packed) {
+ f = decode_tag(d);
}
- switch (upb_fielddef_type(f)) {
- case UPB_TYPE(DOUBLE): upb_decode_DOUBLE(d, f); break;
- case UPB_TYPE(FLOAT): upb_decode_FLOAT(d, f); break;
- case UPB_TYPE(INT64): upb_decode_INT64(d, f); break;
- case UPB_TYPE(UINT64): upb_decode_UINT64(d, f); break;
- case UPB_TYPE(INT32): upb_decode_INT32(d, f); break;
- case UPB_TYPE(FIXED64): upb_decode_FIXED64(d, f); break;
- case UPB_TYPE(FIXED32): upb_decode_FIXED32(d, f); break;
- case UPB_TYPE(BOOL): upb_decode_BOOL(d, f); break;
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES): upb_decode_STRING(d, f); break;
- case UPB_TYPE(GROUP): upb_decode_GROUP(d, f); break;
- case UPB_TYPE(MESSAGE): upb_decode_MESSAGE(d, f); break;
- case UPB_TYPE(UINT32): upb_decode_UINT32(d, f); break;
- case UPB_TYPE(ENUM): upb_decode_ENUM(d, f); break;
- case UPB_TYPE(SFIXED32): upb_decode_SFIXED32(d, f); break;
- case UPB_TYPE(SFIXED64): upb_decode_SFIXED64(d, f); break;
- case UPB_TYPE(SINT32): upb_decode_SINT32(d, f); break;
- case UPB_TYPE(SINT64): upb_decode_SINT64(d, f); break;
- case UPB_TYPE_NONE: assert(false); break;
+ switch (upb_fielddef_descriptortype(f)) {
+ case UPB_DESCRIPTOR_TYPE_DOUBLE: decode_DOUBLE(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_FLOAT: decode_FLOAT(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_INT64: decode_INT64(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_UINT64: decode_UINT64(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_INT32: decode_INT32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_FIXED64: decode_FIXED64(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_FIXED32: decode_FIXED32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_BOOL: decode_BOOL(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_STRING: UPB_FALLTHROUGH_INTENDED;
+ case UPB_DESCRIPTOR_TYPE_BYTES: decode_STRING(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_GROUP: decode_GROUP(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_MESSAGE: decode_MESSAGE(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_UINT32: decode_UINT32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_ENUM: decode_ENUM(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_SFIXED32: decode_SFIXED32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_SFIXED64: decode_SFIXED64(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_SINT32: decode_SINT32(d, f); break;
+ case UPB_DESCRIPTOR_TYPE_SINT64: decode_SINT64(d, f); break;
}
- upb_decoder_checkpoint(d);
+ checkpoint(d);
}
}
-void upb_decoder_init(upb_decoder *d) {
- upb_status_init(&d->status);
- d->plan = NULL;
- d->input = NULL;
+void init(void *_d) {
+ upb_pbdecoder *d = _d;
d->limit = &d->stack[UPB_MAX_NESTING];
+ d->sink = NULL;
+ // reset() must be called before decoding; this is guaranteed by assert() in
+ // start().
}
-void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p) {
- d->plan = p;
- d->input = NULL;
- upb_sink_init(&d->sink, p->handlers);
-}
-
-void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
- void *c) {
- assert(d->plan);
- upb_status_clear(&d->status);
- upb_sink_reset(&d->sink, c);
- d->input = input;
-
+void reset(void *_d) {
+ upb_pbdecoder *d = _d;
d->top = d->stack;
d->top->is_sequence = false;
d->top->is_packed = false;
d->top->group_fieldnum = UINT32_MAX;
d->top->end_ofs = UPB_NONDELIMITED;
-
- // Protect against assert in skiptonewbuf().
d->bufstart_ofs = 0;
- d->ptr = NULL;
- d->buf = NULL;
- upb_decoder_skiptonewbuf(d, upb_byteregion_startofs(input));
+ d->ptr = d->residual;
+ d->buf = d->residual;
+ d->end = d->residual;
+ d->residual_end = d->residual;
}
-void upb_decoder_uninit(upb_decoder *d) {
- upb_status_uninit(&d->status);
+bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink* sink) {
+ // TODO(haberman): typecheck the sink, and test whether the decoder is in the
+ // middle of decoding. Return false if either assumption is violated.
+ d->sink = sink;
+ reset(d);
+ return true;
+}
+
+const upb_frametype upb_pbdecoder_frametype = {
+ sizeof(upb_pbdecoder),
+ init,
+ NULL,
+ reset,
+};
+
+const upb_frametype *upb_pbdecoder_getframetype() {
+ return &upb_pbdecoder_frametype;
+}
+
+const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest,
+ bool allowjit,
+ const void *owner) {
+ UPB_UNUSED(allowjit);
+ decoderplan *p = malloc(sizeof(*p));
+ assert(upb_handlers_isfrozen(dest));
+ p->dest_handlers = dest;
+ upb_handlers_ref(dest, p);
+#ifdef UPB_USE_JIT_X64
+ p->jit_code = NULL;
+ if (allowjit) upb_decoderplan_makejit(p);
+#endif
+
+ upb_handlers *h = upb_handlers_new(
+ UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner);
+ upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, start, NULL, NULL);
+ upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, decode, p, freeplan);
+ upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, end, NULL, NULL);
+ return h;
}
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index 690ebb9..4307434 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -4,163 +4,96 @@
* Copyright (c) 2009-2010 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * upb_decoder implements a high performance, streaming decoder for protobuf
- * data that works by getting its input data from a upb_byteregion and calling
- * into a upb_handlers.
+ * upb::Decoder implements a high performance, streaming decoder for protobuf
+ * data that works by parsing input data one buffer at a time and calling into
+ * a upb::Handlers.
*/
#ifndef UPB_DECODER_H_
#define UPB_DECODER_H_
-#include <setjmp.h>
-#include "upb/bytestream.h"
#include "upb/sink.h"
#ifdef __cplusplus
-extern "C" {
-#endif
+namespace upb {
+namespace pb {
-/* upb_decoderplan ************************************************************/
-
-// A decoderplan contains whatever data structures and generated (JIT-ted) code
-// are necessary to decode protobuf data of a specific type to a specific set
-// of handlers. By generating the plan ahead of time, we avoid having to
-// redo this work every time we decode.
-//
-// A decoderplan is threadsafe, meaning that it can be used concurrently by
-// different upb_decoders in different threads. However, the upb_decoders are
-// *not* thread-safe.
-struct _upb_decoderplan;
-typedef struct _upb_decoderplan upb_decoderplan;
-
-// TODO(haberman):
-// - add support for letting any message in the plan be at the top level.
-// - make this object a handlers instead (when bytesrc/bytesink are merged
-// into handlers).
-// - add support for sharing code with previously-built plans/handlers.
-upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit);
-void upb_decoderplan_unref(upb_decoderplan *p);
-
-// Returns true if the plan contains JIT-ted code. This may not be the same as
-// the "allowjit" parameter to the constructor if support for JIT-ting was not
-// compiled in.
-bool upb_decoderplan_hasjitcode(upb_decoderplan *p);
-
-
-/* upb_decoder ****************************************************************/
-
-struct dasm_State;
-
-typedef struct {
- const upb_fielddef *f;
- uint64_t end_ofs;
- uint32_t group_fieldnum; // UINT32_MAX for non-groups.
- bool is_sequence; // frame represents seq or submsg? (f might be both).
- bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX
- // (strings aren't pushed).
-} upb_decoder_frame;
-
-typedef struct _upb_decoder {
- upb_decoderplan *plan;
- upb_byteregion *input; // Input data (serialized), not owned.
- upb_status status; // Where we store errors that occur.
-
- // Where we push parsed data.
- // TODO(haberman): make this a pointer and make upb_decoder_resetinput() take
- // one of these instead of a void*.
- upb_sink sink;
-
- // Our internal stack.
- upb_decoder_frame *top, *limit;
- upb_decoder_frame stack[UPB_MAX_NESTING];
-
- // Current input buffer and its stream offset.
- const char *buf, *ptr, *end;
- uint64_t bufstart_ofs;
-
- // End of the delimited region, relative to ptr, or NULL if not in this buf.
- const char *delim_end;
- // True if the top stack frame represents a packed field.
- bool top_is_packed;
-
-#ifdef UPB_USE_JIT_X64
- // For JIT, which doesn't do bounds checks in the middle of parsing a field.
- const char *jit_end, *effective_end; // == MIN(jit_end, delim_end)
-
- // Used momentarily by the generated code to store a value while a user
- // function is called.
- uint32_t tmp_len;
-#endif
-
- // For exiting the decoder on error.
- jmp_buf exitjmp;
-} upb_decoder;
-
-void upb_decoder_init(upb_decoder *d);
-void upb_decoder_uninit(upb_decoder *d);
-
-// Resets the plan that the decoder will parse from. "msg_offset" indicates
-// which message from the plan will be used as the top-level message.
-//
-// This will also reset the decoder's input to be uninitialized --
-// upb_decoder_resetinput() must be called before parsing can occur. The plan
-// must live until the decoder is destroyed or reset to a different plan.
-//
-// Must be called before upb_decoder_resetinput() or upb_decoder_decode().
-void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p);
-
-// Resets the input of an already-allocated decoder. This puts it in a state
-// where it has not seen any data, and expects the next data to be from the
-// beginning of a new protobuf. Decoders must have their input reset before
-// they can be used. A decoder can have its input reset multiple times.
-// "input" must live until the decoder is destroyed or has it input reset
-// again. "c" is the closure that will be passed to the handlers.
-//
-// Must be called before upb_decoder_decode().
-void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *c);
-
-// Decodes serialized data (calling handlers as the data is parsed), returning
-// the success of the operation (call upb_decoder_status() for details).
-upb_success_t upb_decoder_decode(upb_decoder *d);
-
-INLINE const upb_status *upb_decoder_status(upb_decoder *d) {
- return &d->status;
-}
+// Frame type that encapsulates decoder state.
+class Decoder;
-// Implementation details
+// Resets the sink of the Decoder. This must be called at least once before
+// the decoder can be used. It may only be called with the decoder is in a
+// state where it was just created or reset. The given sink must be from the
+// same pipeline as this decoder.
+inline bool ResetDecoderSink(Decoder* d, Sink* sink);
-struct _upb_decoderplan {
- // The top-level handlers that this plan calls into. We own a ref.
- const upb_handlers *handlers;
+// Gets the handlers suitable for parsing protobuf data according to the given
+// destination handlers. The protobuf schema to parse is taken from dest.
+inline const upb::Handlers *GetDecoderHandlers(const upb::Handlers *dest,
+ bool allowjit,
+ const void *owner);
-#ifdef UPB_USE_JIT_X64
- // JIT-generated machine code (else NULL).
- char *jit_code;
- size_t jit_size;
- char *debug_info;
+// Returns true if these handlers represent a upb::pb::Decoder.
+bool IsDecoder(const upb::Handlers *h);
- // For storing upb_jitmsginfo, which contains per-msg runtime data needed
- // by the JIT.
- // Maps upb_handlers* -> upb_jitmsginfo.
- upb_inttable msginfo;
+// Returns true if IsDecoder(h) and the given handlers have JIT code.
+inline bool HasJitCode(const upb::Handlers* h);
- // The following members are used only while the JIT is being built.
+// Returns the destination handlers if IsDecoder(h), otherwise returns NULL.
+const upb::Handlers* GetDestHandlers(const upb::Handlers* h);
- // This pointer is allocated by dasm_init() and freed by dasm_free().
- struct dasm_State *dynasm;
+} // namespace pb
+} // namespace upb
- // For storing pclabel bases while we are building the JIT.
- // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base
- upb_inttable pclabels;
+typedef upb::pb::Decoder upb_pbdecoder;
- // This is not the same as len(pclabels) because the table only contains base
- // offsets for each def, but each def can have many pclabels.
- uint32_t pclabel_count;
+extern "C" {
+#else
+struct upb_pbdecoder;
+typedef struct upb_pbdecoder upb_pbdecoder;
#endif
-};
+
+// C API.
+const upb_frametype *upb_pbdecoder_getframetype();
+bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink *sink);
+const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest,
+ bool allowjit,
+ const void *owner);
+bool upb_pbdecoder_isdecoder(const upb_handlers *h);
+bool upb_pbdecoder_hasjitcode(const upb_handlers *h);
+const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h);
+
+// C++ implementation details. /////////////////////////////////////////////////
#ifdef __cplusplus
-} /* extern "C" */
+} // extern "C"
+
+namespace upb {
+
+template<> inline const FrameType* GetFrameType<upb::pb::Decoder>() {
+ return upb_pbdecoder_getframetype();
+}
+
+namespace pb {
+inline bool ResetDecoderSink(Decoder* r, Sink* sink) {
+ return upb_pbdecoder_resetsink(r, sink);
+}
+inline const upb::Handlers* GetDecoderHandlers(const upb::Handlers* dest,
+ bool allowjit,
+ const void* owner) {
+ return upb_pbdecoder_gethandlers(dest, allowjit, owner);
+}
+inline bool IsDecoder(const upb::Handlers* h) {
+ return upb_pbdecoder_isdecoder(h);
+}
+inline bool HasJitCode(const upb::Handlers* h) {
+ return upb_pbdecoder_hasjitcode(h);
+}
+inline const upb::Handlers* GetDestHandlers(const upb::Handlers* h) {
+ return upb_pbdecoder_getdesthandlers(h);
+}
+} // namespace pb
+} // namespace upb
#endif
#endif /* UPB_DECODER_H_ */
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc
index cd09cfe..7d4c537 100644
--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@@ -4,7 +4,7 @@
|// Copyright (c) 2011 Google Inc. See LICENSE for details.
|// Author: Josh Haberman <jhaberman@gmail.com>
|//
-|// JIT compiler for upb_decoder on x86. Given a upb_decoderplan object (which
+|// JIT compiler for upb_pbdecoder on x86. Given a decoderplan object (which
|// contains an embedded set of upb_handlers), generates code specialized to
|// parsing the specific message and calling specific handlers.
|//
@@ -54,17 +54,19 @@ typedef struct {
void *jit_func;
} upb_jitmsginfo;
-static uint32_t upb_getpclabel(upb_decoderplan *plan, const void *obj, int n) {
- const upb_value *v = upb_inttable_lookupptr(&plan->pclabels, obj);
- assert(v);
- return upb_value_getuint32(*v) + n;
+static uint32_t upb_getpclabel(decoderplan *plan, const void *obj, int n) {
+ upb_value v;
+ bool found = upb_inttable_lookupptr(&plan->pclabels, obj, &v);
+ UPB_ASSERT_VAR(found, found);
+ return upb_value_getuint32(v) + n;
}
-static upb_jitmsginfo *upb_getmsginfo(upb_decoderplan *plan,
+static upb_jitmsginfo *upb_getmsginfo(decoderplan *plan,
const upb_handlers *h) {
- const upb_value *v = upb_inttable_lookupptr(&plan->msginfo, h);
- assert(v);
- return upb_value_getptr(*v);
+ upb_value v;
+ bool found = upb_inttable_lookupptr(&plan->msginfo, h, &v);
+ UPB_ASSERT_VAR(found, found);
+ return upb_value_getptr(v);
}
// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
@@ -109,7 +111,7 @@ void __attribute__((noinline)) __jit_debug_register_code() {
__asm__ __volatile__("");
}
-void upb_reg_jit_gdb(upb_decoderplan *plan) {
+void upb_reg_jit_gdb(decoderplan *plan) {
// Create debug info.
size_t elf_len = sizeof(upb_jit_debug_elf_file);
plan->debug_info = malloc(elf_len);
@@ -135,7 +137,7 @@ void upb_reg_jit_gdb(upb_decoderplan *plan) {
#else
-void upb_reg_jit_gdb(upb_decoderplan *plan) {
+void upb_reg_jit_gdb(decoderplan *plan) {
(void)plan;
}
@@ -154,10 +156,9 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
|// Calling conventions. Note -- this will need to be changed for
|// Windows, which uses a different calling convention!
|.define ARG1_64, rdi
-|.define ARG2_8, sil
+|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi.
|.define ARG2_32, esi
|.define ARG2_64, rsi
-|.define ARG3_8, dl
|.define ARG3_32, edx
|.define ARG3_64, rdx
|.define ARG4_64, rcx
@@ -170,9 +171,10 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
|// conventions, but of course when calling to user callbacks we must.
|.define PTR, rbx // Writing this to DECODER->ptr commits our progress.
|.define CLOSURE, r12
-|.type SINKFRAME, upb_sink_frame, r13
-|.type FRAME, upb_decoder_frame, r14
-|.type DECODER, upb_decoder, r15
+|.type SINKFRAME, upb_sinkframe, r13
+|.type FRAME, frame, r14
+|.type DECODER, upb_pbdecoder, r15
+|.type SINK, upb_sink
|
|.macro callp, addr
|| upb_assert_notnull(addr);
@@ -187,6 +189,21 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
|| }
|.endmacro
|
+|.macro load_handler_data, h, f, type
+||{
+|| uintptr_t data = (uintptr_t)gethandlerdata(h, f, type);
+|| if (data > 0xffffffff) {
+| mov64 rax, data
+| mov SINKFRAME->u.handler_data, rax
+|| } else if (data > 0x7fffffff) {
+| mov eax, data
+| mov SINKFRAME->u.handler_data, rax
+|| } else {
+| mov qword SINKFRAME->u.handler_data, data
+|| }
+|| }
+|.endmacro
+|
|// Checkpoints our progress by writing PTR to DECODER, and
|// checks for end-of-buffer.
|.macro checkpoint, h
@@ -205,25 +222,33 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
| jz ->exit_jit
|.endmacro
|
-|// Decodes varint from [PTR + offset] -> ARG3.
-|// Saves new pointer as rax.
+|// Decodes varint into ARG2.
+|// Inputs:
+|// - ecx: first 4 bytes of varint
+|// - offset: offset from PTR where varint begins
+|// Outputs:
+|// - ARG2: contains decoded varint
+|// - rax: new PTR
|.macro decode_loaded_varint, offset
| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
| lea rax, [PTR + offset + 1]
-| mov ARG3_32, ecx
-| and ARG3_32, 0x7f
+| mov ARG2_32, ecx
+| and ARG2_32, 0x7f
| test cl, cl
| jns >9
| lea rax, [PTR + offset + 2]
-| movzx esi, ch
-| and esi, 0x7f
-| shl esi, 7
-| or ARG3_32, esi
+| movzx edx, ch
+| and edx, 0x7f
+| shl edx, 7
+| or ARG2_32, edx
| test cx, cx
| jns >9
| mov ARG1_64, rax
-| mov ARG2_32, ARG3_32
+|// XXX: I don't think this handles 64-bit values correctly.
+|// Test with UINT64_MAX
| callp upb_vdecode_max8_fast
+|// rax return from function will contain new pointer
+| mov ARG2_64, rdx
| check_ptr_ret // Check for unterminated, >10-byte varint.
|9:
|.endmacro
@@ -234,17 +259,22 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
| mov PTR, rax
|.endmacro
|
-|// Decode the tag -> edx.
+|// Table-based field dispatch.
+|// Inputs:
+|// - ecx: first 4 bytes of tag
+|// Outputs:
+|// - edx: field number
+|// - esi: wire type
|// Could specialize this by avoiding the value masking: could just key the
|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
|// Currently only support tables where all entries are in the array part.
|.macro dyndispatch_, h
|=>upb_getpclabel(plan, h, DYNDISPATCH):
| decode_loaded_varint, 0
-| mov ecx, edx
+| mov ecx, esi
| shr ecx, 3
-| and edx, 0x7 // Note: this value is used in the FIELD pclabel below.
-| cmp edx, UPB_WIRE_TYPE_END_GROUP
+| and esi, 0x7 // Note: this value is used in the FIELD pclabel below.
+| cmp esi, UPB_WIRE_TYPE_END_GROUP
| je >1
|| upb_jitmsginfo *mi = upb_getmsginfo(plan, h);
| cmp ecx, mi->max_field_number // Bounds-check the field.
@@ -278,10 +308,31 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
| .endmacro
|.endif
|
-|// Push a stack frame (not the CPU stack, the upb_decoder stack).
-|.macro pushframe, h, field, end_offset_, endtype
+|.macro pushsinkframe, handlers, field, endtype
+| mov rax, DECODER->sink
+| mov dword SINKFRAME->u.selector, getselector(field, endtype)
+| lea rcx, [SINKFRAME + sizeof(upb_sinkframe)] // rcx for short addressing
+| cmp rcx, SINK:rax->limit
+| jae ->exit_jit // Frame stack overflow.
+| mov64 r9, (uintptr_t)handlers
+| mov SINKFRAME:rcx->h, r9
+| mov SINKFRAME:rcx->closure, CLOSURE
+| mov SINK:rax->top_, rcx
+| mov SINKFRAME:rcx->sink_, rax
+| mov SINKFRAME, rcx
+|.endmacro
+|
+|.macro popsinkframe
+| sub SINKFRAME, sizeof(upb_sinkframe)
+| mov rax, DECODER->sink
+| mov SINK:rax->top_, SINKFRAME
+| mov CLOSURE, SINKFRAME->closure
+|.endmacro
+|
+|// Push a stack frame (not the CPU stack, the upb_pbdecoder stack).
+|.macro pushframe, handlers, field, end_offset_, endtype
|// Decoder Frame.
-| lea rax, [FRAME + sizeof(upb_decoder_frame)] // rax for short addressing
+| lea rax, [FRAME + sizeof(frame)] // rax for short addressing
| cmp rax, DECODER->limit
| jae ->exit_jit // Frame stack overflow.
| mov64 r10, (uintptr_t)field
@@ -289,36 +340,21 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
| mov qword FRAME:rax->end_ofs, end_offset_
| mov byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ)
| mov byte FRAME:rax->is_packed, 0
-|| if (upb_fielddef_type(field) == UPB_TYPE_GROUP &&
-|| endtype == UPB_HANDLER_ENDSUBMSG) {
+|| if (upb_fielddef_istagdelim(field) && endtype == UPB_HANDLER_ENDSUBMSG) {
| mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field)
|| } else {
| mov dword FRAME:rax->group_fieldnum, 0xffffffff
|| }
| mov DECODER->top, rax
| mov FRAME, rax
-|// Sink Frame.
-| lea rcx, [SINKFRAME + sizeof(upb_sink_frame)] // rcx for short addressing
-| cmp rcx, DECODER->sink.limit
-| jae ->exit_jit // Frame stack overflow.
-| mov dword SINKFRAME:rcx->end, getselector(field, endtype)
-|| if (upb_fielddef_issubmsg(field)) {
-| mov64 r9, (uintptr_t)upb_handlers_getsubhandlers(h, field)
-|| } else {
-| mov64 r9, (uintptr_t)h
-|| }
-| mov SINKFRAME:rcx->h, r9
-| mov DECODER->sink.top, rcx
-| mov SINKFRAME, rcx
+| pushsinkframe handlers, field, endtype
|.endmacro
|
|.macro popframe
-| sub FRAME, sizeof(upb_decoder_frame)
+| sub FRAME, sizeof(frame)
| mov DECODER->top, FRAME
-| sub SINKFRAME, sizeof(upb_sink_frame)
-| mov DECODER->sink.top, SINKFRAME
+| popsinkframe
| setmsgend
-| mov CLOSURE, SINKFRAME->closure
|.endmacro
|
|.macro setmsgend
@@ -369,14 +405,6 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
#include <stdlib.h>
#include "upb/pb/varint.h"
-static upb_selector_t getselector(const upb_fielddef *f,
- upb_handlertype_t type) {
- upb_selector_t selector;
- bool ok = upb_getselector(f, type, &selector);
- UPB_ASSERT_VAR(ok, ok);
- return selector;
-}
-
static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f,
upb_handlertype_t type) {
return upb_handlers_gethandler(h, getselector(f, type));
@@ -387,73 +415,74 @@ static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f,
return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type));
}
-// Decodes the next val into ARG3, advances PTR.
-static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
- uint8_t type, size_t tag_size,
+// Decodes the next val into ARG2, advances PTR.
+static void upb_decoderplan_jit_decodefield(decoderplan *plan,
+ size_t tag_size,
const upb_handlers *h,
const upb_fielddef *f) {
// Decode the value into arg 3 for the callback.
- switch (type) {
- case UPB_TYPE(DOUBLE):
+ switch (upb_fielddef_descriptortype(f)) {
+ case UPB_DESCRIPTOR_TYPE_DOUBLE:
| movsd XMMARG1, qword [PTR + tag_size]
| add PTR, 8 + tag_size
break;
- case UPB_TYPE(FIXED64):
- case UPB_TYPE(SFIXED64):
- | mov ARG3_64, qword [PTR + tag_size]
+ case UPB_DESCRIPTOR_TYPE_FIXED64:
+ case UPB_DESCRIPTOR_TYPE_SFIXED64:
+ | mov ARG2_64, qword [PTR + tag_size]
| add PTR, 8 + tag_size
break;
- case UPB_TYPE(FLOAT):
+ case UPB_DESCRIPTOR_TYPE_FLOAT:
| movss XMMARG1, dword [PTR + tag_size]
| add PTR, 4 + tag_size
break;
- case UPB_TYPE(FIXED32):
- case UPB_TYPE(SFIXED32):
- | mov ARG3_32, dword [PTR + tag_size]
+ case UPB_DESCRIPTOR_TYPE_FIXED32:
+ case UPB_DESCRIPTOR_TYPE_SFIXED32:
+ | mov ARG2_32, dword [PTR + tag_size]
| add PTR, 4 + tag_size
break;
- case UPB_TYPE(BOOL):
+ case UPB_DESCRIPTOR_TYPE_BOOL:
// Can't assume it's one byte long, because bool must be wire-compatible
// with all of the varint integer types.
| decode_varint tag_size
- | test ARG3_64, ARG3_64
- | setne ARG3_8 // Other bytes left with val, should be ok.
+ | test ARG2_64, ARG2_64
+ | setne al
+ | movzx ARG2_32, al
break;
- case UPB_TYPE(INT64):
- case UPB_TYPE(UINT64):
- case UPB_TYPE(INT32):
- case UPB_TYPE(UINT32):
- case UPB_TYPE(ENUM):
+ case UPB_DESCRIPTOR_TYPE_INT64:
+ case UPB_DESCRIPTOR_TYPE_UINT64:
+ case UPB_DESCRIPTOR_TYPE_INT32:
+ case UPB_DESCRIPTOR_TYPE_UINT32:
+ case UPB_DESCRIPTOR_TYPE_ENUM:
| decode_varint tag_size
break;
- case UPB_TYPE(SINT64):
+ case UPB_DESCRIPTOR_TYPE_SINT64:
// 64-bit zig-zag decoding.
| decode_varint tag_size
- | mov rax, ARG3_64
- | shr ARG3_64, 1
+ | mov rax, ARG2_64
+ | shr ARG2_64, 1
| and rax, 1
| neg rax
- | xor ARG3_64, rax
+ | xor ARG2_64, rax
break;
- case UPB_TYPE(SINT32):
+ case UPB_DESCRIPTOR_TYPE_SINT32:
// 32-bit zig-zag decoding.
| decode_varint tag_size
- | mov eax, ARG3_32
- | shr ARG3_32, 1
+ | mov eax, ARG2_32
+ | shr ARG2_32, 1
| and eax, 1
| neg eax
- | xor ARG3_32, eax
+ | xor ARG2_32, eax
break;
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES): {
+ case UPB_DESCRIPTOR_TYPE_STRING:
+ case UPB_DESCRIPTOR_TYPE_BYTES: {
// We only handle the case where the entire string is in our current
// buf, which sidesteps any security problems. The C path has more
// robust checks.
@@ -461,39 +490,46 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
| decode_loaded_varint tag_size
| mov rdi, DECODER->end
| sub rdi, rax
- | cmp ARG3_64, rdi // if (len > d->end - str)
+ | cmp ARG2_64, rdi // if (len > d->end - str)
| ja ->exit_jit // Can't deliver, whole string not in buf.
| mov PTR, rax
upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR);
if (handler) {
- | mov DECODER->tmp_len, ARG3_64
- | mov ARG1_64, CLOSURE
- | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSTR)
+ | mov DECODER->tmp_len, ARG2_32
+ | mov ARG1_64, SINKFRAME
+ | load_handler_data h, f, UPB_HANDLER_STARTSTR
| callp handler
| check_ptr_ret
- | mov ARG1_64, rax // sub-closure
- | mov ARG4_64, DECODER->tmp_len
+ | mov CLOSURE, rax
+ | mov ARG3_32, DECODER->tmp_len
} else {
- | mov ARG1_64, CLOSURE
- | mov ARG4_64, ARG3_64
+ | mov ARG3_64, ARG2_64
}
handler = gethandler(h, f, UPB_HANDLER_STRING);
if (handler) {
- | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STRING)
- | mov ARG3_64, PTR
+ // TODO: push a real frame so we can resume into the string.
+ // (but maybe do this only if the string breaks).
+ | pushsinkframe h, f, UPB_HANDLER_ENDSTR
+
+ // size_t str(const upb_sinkframe *frame, const char *buf, size_t len)
+ | mov ARG1_64, SINKFRAME
+ | load_handler_data h, f, UPB_HANDLER_STRING
+ | mov ARG2_64, PTR
| callp handler
// TODO: properly handle returns other than "n" (the whole string).
| add PTR, rax
+ | popsinkframe
} else {
- | add PTR, ARG4_64
+ | add PTR, ARG3_64
}
handler = gethandler(h, f, UPB_HANDLER_ENDSTR);
if (handler) {
- | mov ARG1_64, CLOSURE
- | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSTR)
+ // bool endstr(const upb_sinkframe *frame);
+ | mov ARG1_64, SINKFRAME
+ | load_handler_data h, f, UPB_HANDLER_ENDSTR
| callp handler
| check_bool_ret
}
@@ -501,10 +537,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
}
// Will dispatch callbacks and call submessage in a second.
- case UPB_TYPE(MESSAGE):
+ case UPB_DESCRIPTOR_TYPE_MESSAGE:
| decode_varint tag_size
break;
- case UPB_TYPE(GROUP):
+ case UPB_DESCRIPTOR_TYPE_GROUP:
| add PTR, tag_size
break;
@@ -512,52 +548,58 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
}
}
-static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
+static void upb_decoderplan_jit_callcb(decoderplan *plan,
const upb_handlers *h,
const upb_fielddef *f) {
// Call callbacks. Specializing the append accessors didn't yield a speed
// increase in benchmarks.
if (upb_fielddef_issubmsg(f)) {
- if (upb_fielddef_type(f) == UPB_TYPE(MESSAGE)) {
- | mov rsi, PTR
- | sub rsi, DECODER->buf
- | add rsi, ARG3_64 // = (d->ptr - d->buf) + delim_len
- } else {
- assert(upb_fielddef_type(f) == UPB_TYPE(GROUP));
- | mov rsi, UPB_NONDELIMITED
- }
- | pushframe h, f, rsi, UPB_HANDLER_ENDSUBMSG
-
// Call startsubmsg handler (if any).
upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG);
if (startsubmsg) {
- // upb_sflow_t startsubmsg(void *closure, upb_value fval)
- | mov ARG1_64, CLOSURE
- | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSUBMSG);
+ // upb_sflow_t startsubmsg(const upb_sinkframe *frame)
+ | mov DECODER->tmp_len, ARG2_32
+ | mov ARG1_64, SINKFRAME
+ | load_handler_data h, f, UPB_HANDLER_STARTSUBMSG
| callp startsubmsg
| check_ptr_ret
| mov CLOSURE, rax
}
- | mov qword SINKFRAME->closure, CLOSURE
- // TODO: have to decide what to do with NULLs subhandlers (or whether to
- // disallow them and require a full handlers tree to match the def tree).
const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f);
- assert(sub_h);
- | call =>upb_getpclabel(plan, sub_h, STARTMSG)
- | popframe
+ if (sub_h) {
+ if (upb_fielddef_istagdelim(f)) {
+ | mov rdx, UPB_NONDELIMITED
+ } else {
+ | mov esi, DECODER->tmp_len
+ | mov rdx, PTR
+ | sub rdx, DECODER->buf
+ | add rdx, DECODER->bufstart_ofs
+ | add rdx, rsi // = d->bufstart_ofs + (d->ptr - d->buf) + delim_len
+ }
+ | pushframe sub_h, f, rdx, UPB_HANDLER_ENDSUBMSG
+ | call =>upb_getpclabel(plan, sub_h, STARTMSG)
+ | popframe
+ } else {
+ if (upb_fielddef_istagdelim(f)) {
+ // Groups with no handlers not supported yet.
+ assert(false);
+ } else {
+ | mov esi, DECODER->tmp_len
+ | add PTR, rsi
+ }
+ }
// Call endsubmsg handler (if any).
upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG);
if (endsubmsg) {
// upb_flow_t endsubmsg(void *closure, upb_value fval);
- | mov ARG1_64, CLOSURE
- | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSUBMSG);
+ | mov ARG1_64, SINKFRAME
+ | load_handler_data h, f, UPB_HANDLER_ENDSUBMSG
| callp endsubmsg
| check_bool_ret
}
} else if (!upb_fielddef_isstring(f)) {
- | mov ARG1_64, CLOSURE
upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f);
upb_func *handler = gethandler(h, f, handlertype);
const upb_stdmsg_fval *fv = (void*)gethandlerdata(h, f, handlertype);
@@ -565,24 +607,25 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
// Can't switch() on function pointers.
if (handler == (void*)&upb_stdmsg_setint64 ||
handler == (void*)&upb_stdmsg_setuint64) {
- | mov [ARG1_64 + fv->offset], ARG3_64
+ | mov [CLOSURE + fv->offset], ARG2_64
| sethas CLOSURE, fv->hasbit
} else if (handler == (void*)&upb_stdmsg_setdouble) {
- | movsd qword [ARG1_64 + fv->offset], XMMARG1
+ | movsd qword [CLOSURE + fv->offset], XMMARG1
| sethas CLOSURE, fv->hasbit
} else if (handler == (void*)&upb_stdmsg_setint32 ||
handler == (void*)&upb_stdmsg_setuint32) {
- | mov [ARG1_64 + fv->offset], ARG3_32
+ | mov [CLOSURE + fv->offset], ARG2_32
| sethas CLOSURE, fv->hasbit
} else if (handler == (void*)&upb_stdmsg_setfloat) {
- | movss dword [ARG1_64 + fv->offset], XMMARG1
+ | movss dword [CLOSURE + fv->offset], XMMARG1
| sethas CLOSURE, fv->hasbit
} else if (handler == (void*)&upb_stdmsg_setbool) {
- | mov [ARG1_64 + fv->offset], ARG3_8
+ | mov [CLOSURE + fv->offset], ARG2_8
| sethas CLOSURE, fv->hasbit
} else if (handler) {
- // Load closure and fval into arg registers.
- | mov64 ARG2_64, gethandlerdata(h, f, handlertype);
+ // bool value(const upb_sinkframe* frame, ctype val)
+ | mov ARG1_64, SINKFRAME
+ | load_handler_data h, f, handlertype
| callp handler
| check_bool_ret
}
@@ -591,15 +634,27 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
static uint64_t upb_get_encoded_tag(const upb_fielddef *f) {
uint32_t tag = (upb_fielddef_number(f) << 3) |
- upb_decoder_types[upb_fielddef_type(f)].native_wire_type;
+ upb_decoder_types[upb_fielddef_descriptortype(f)].native_wire_type;
uint64_t encoded_tag = upb_vencode32(tag);
// No tag should be greater than 5 bytes.
assert(encoded_tag <= 0xffffffffff);
return encoded_tag;
}
+static void upb_decoderplan_jit_endseq(decoderplan *plan,
+ const upb_handlers *h,
+ const upb_fielddef *f) {
+ | popframe
+ upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ);
+ if (endseq) {
+ | mov ARG1_64, SINKFRAME
+ | load_handler_data h, f, UPB_HANDLER_ENDSEQ
+ | callp endseq
+ }
+}
+
// PTR should point to the beginning of the tag.
-static void upb_decoderplan_jit_field(upb_decoderplan *plan,
+static void upb_decoderplan_jit_field(decoderplan *plan,
const upb_handlers *h,
const upb_fielddef *f,
const upb_fielddef *next_f) {
@@ -608,45 +663,51 @@ static void upb_decoderplan_jit_field(upb_decoderplan *plan,
int tag_size = upb_value_size(tag);
// PC-label for the dispatch table.
- // We check the wire type (which must be loaded in edx) because the
+ // We check the wire type (which must be loaded in edi) because the
// table is keyed on field number, not type.
|=>upb_getpclabel(plan, f, FIELD):
- | cmp edx, (tag & 0x7)
+ | cmp esi, (tag & 0x7)
| jne ->exit_jit // In the future: could be an unknown field or packed.
|=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK):
if (upb_fielddef_isseq(f)) {
- | mov rsi, FRAME->end_ofs
- | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ
upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ);
if (startseq) {
- | mov ARG1_64, CLOSURE
- | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSEQ);
- | callp startseq
+ | mov ARG1_64, SINKFRAME
+ | load_handler_data h, f, UPB_HANDLER_STARTSEQ
+ | callp startseq
| check_ptr_ret
- | mov CLOSURE, rax
+ | mov CLOSURE, rax
}
- | mov qword SINKFRAME->closure, CLOSURE
+ | mov rsi, FRAME->end_ofs
+ | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ
}
|1: // Label for repeating this field.
- upb_decoderplan_jit_decodefield(plan, upb_fielddef_type(f), tag_size, h, f);
+ upb_decoderplan_jit_decodefield(plan, tag_size, h, f);
upb_decoderplan_jit_callcb(plan, h, f);
- // Epilogue: load next tag, check for repeated field.
- | checkpoint h
- | mov rcx, qword [PTR]
+ // This is kind of gross; future redesign should take into account how to
+ // make this work nicely. The difficult part is that the sequence can be
+ // broken either by end-of-message or by seeing a different field; in both
+ // cases we need to call the endseq handler, but what we do after that
+ // depends on which case triggered the end-of-sequence.
+ | mov DECODER->ptr, PTR
+ | cmp PTR, DECODER->jit_end
+ | jae ->exit_jit
+ | cmp PTR, DECODER->effective_end
+ | jb >2
+ if (upb_fielddef_isseq(f)) {
+ upb_decoderplan_jit_endseq(plan, h, f);
+ }
+ | jmp =>upb_getpclabel(plan, h, ENDOFMSG)
+ |2:
+ | mov rcx, qword [PTR]
if (upb_fielddef_isseq(f)) {
| checktag tag
| je <1
- upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ);
- if (endseq) {
- | mov ARG1_64, CLOSURE
- | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSEQ);
- | callp endseq
- }
- | popframe
- // Load next tag again (popframe clobbered it).
+ upb_decoderplan_jit_endseq(plan, h, f);
+ // Load next tag again (popframe/endseq clobbered it).
| mov rcx, qword [PTR]
}
@@ -663,22 +724,22 @@ static int upb_compare_uint32(const void *a, const void *b) {
return *(uint32_t*)a - *(uint32_t*)b;
}
-static void upb_decoderplan_jit_msg(upb_decoderplan *plan,
+static void upb_decoderplan_jit_msg(decoderplan *plan,
const upb_handlers *h) {
|=>upb_getpclabel(plan, h, AFTER_STARTMSG):
- // There was a call to get here, so we need to align the stack.
- | sub rsp, 8
+ | push rbp
+ | mov rbp, rsp
| jmp >1
|=>upb_getpclabel(plan, h, STARTMSG):
- // There was a call to get here, so we need to align the stack.
- | sub rsp, 8
+ | push rbp
+ | mov rbp, rsp
// Call startmsg handler (if any):
upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h);
if (startmsg) {
// upb_flow_t startmsg(void *closure);
- | mov ARG1_64, SINKFRAME->closure
+ | mov ARG1_64, SINKFRAME
| callp startmsg
| check_bool_ret
}
@@ -731,17 +792,18 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan,
upb_endmsg_handler *endmsg = upb_handlers_getendmsg(h);
if (endmsg) {
// void endmsg(void *closure, upb_status *status) {
- | mov ARG1_64, SINKFRAME->closure
- | lea ARG2_64, DECODER->sink.status
+ | mov ARG1_64, SINKFRAME
+ | mov ARG2_64, DECODER->sink
+ | mov ARG2_64, SINK:ARG2_64->pipeline_
+ | add ARG2_64, offsetof(upb_pipeline, status_)
| callp endmsg
}
- // Counter previous alignment.
- | add rsp, 8
+ | leave
| ret
}
-static void upb_decoderplan_jit(upb_decoderplan *plan) {
+static void upb_decoderplan_jit(decoderplan *plan) {
// The JIT prologue/epilogue trampoline that is generated in this function
// does not depend on the handlers, so it will never vary. Ideally we would
// put it in an object file and just link it into upb so we could have only a
@@ -763,18 +825,18 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) {
// Align stack.
| sub rsp, 8
| mov DECODER, ARG1_64
+ | mov DECODER->saved_rbp, rbp
| mov FRAME, DECODER:ARG1_64->top
- | mov SINKFRAME, DECODER:ARG1_64->sink.top
+ | mov rax, DECODER:ARG1_64->sink
+ | mov SINKFRAME, SINK:rax->top_
| mov CLOSURE, SINKFRAME->closure
| mov PTR, DECODER->ptr
// TODO: push return addresses for re-entry (will be necessary for multiple
// buffer support).
| call ARG2_64
-
|->exit_jit:
- // Restore stack pointer to where it was before any "call" instructions
- // inside our generated code.
+ | mov rbp, DECODER->saved_rbp
| lea rsp, [rbp - 48]
// Counter previous alignment.
| add rsp, 8
@@ -794,10 +856,10 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) {
}
}
-static void upb_decoderplan_jit_assignpclabels(upb_decoderplan *plan,
+static void upb_decoderplan_jit_assignpclabels(decoderplan *plan,
const upb_handlers *h) {
// Limit the DFS.
- if (upb_inttable_lookupptr(&plan->pclabels, h)) return;
+ if (upb_inttable_lookupptr(&plan->pclabels, h, NULL)) return;
upb_inttable_insertptr(&plan->pclabels, h,
upb_value_uint32(plan->pclabel_count));
@@ -832,14 +894,14 @@ static void upb_decoderplan_jit_assignpclabels(upb_decoderplan *plan,
info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*));
}
-static void upb_decoderplan_makejit(upb_decoderplan *plan) {
+static void upb_decoderplan_makejit(decoderplan *plan) {
upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR);
plan->debug_info = NULL;
// Assign pclabels.
plan->pclabel_count = 0;
upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32);
- upb_decoderplan_jit_assignpclabels(plan, plan->handlers);
+ upb_decoderplan_jit_assignpclabels(plan, plan->dest_handlers);
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
dasm_init(plan, 1);
@@ -867,7 +929,7 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) {
const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i);
upb_jitmsginfo *mi = upb_getmsginfo(plan, h);
// We jump to after the startmsg handler since it is called before entering
- // the JIT (either by upb_decoder or by a previous call to the JIT).
+ // the JIT (either by upb_pbdecoder or by a previous call to the JIT).
mi->jit_func = plan->jit_code +
dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG));
for (uint32_t j = 0; j <= mi->max_field_number; j++) {
@@ -899,7 +961,7 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) {
#endif
}
-static void upb_decoderplan_freejit(upb_decoderplan *plan) {
+static void upb_decoderplan_freejit(decoderplan *plan) {
upb_inttable_iter i;
upb_inttable_begin(&i, &plan->msginfo);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
@@ -913,9 +975,10 @@ static void upb_decoderplan_freejit(upb_decoderplan *plan) {
// TODO: unregister
}
-static void upb_decoder_enterjit(upb_decoder *d) {
- if (d->plan->jit_code &&
- d->sink.top == d->sink.stack &&
+static void upb_decoder_enterjit(upb_pbdecoder *d, decoderplan *plan) {
+ if (plan->jit_code &&
+ d->top == d->stack &&
+ d->sink->top_ == d->sink->stack &&
d->ptr && d->ptr < d->jit_end) {
#ifndef NDEBUG
register uint64_t rbx asm ("rbx") = 11;
@@ -926,8 +989,8 @@ static void upb_decoder_enterjit(upb_decoder *d) {
#endif
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
- void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code;
- upb_jitmsginfo *mi = upb_getmsginfo(d->plan, d->plan->handlers);
+ void (*upb_jit_decode)(upb_pbdecoder *d, void*) = (void*)plan->jit_code;
+ upb_jitmsginfo *mi = upb_getmsginfo(plan, plan->dest_handlers);
assert(mi);
upb_jit_decode(d, mi->jit_func);
assert(d->ptr <= d->end);
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 4e69c0c..bcde039 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -16,33 +16,37 @@
upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
void *owner, upb_status *status) {
- upb_stringsrc strsrc;
- upb_stringsrc_init(&strsrc);
- upb_stringsrc_reset(&strsrc, str, len);
+ // Create handlers.
+ const upb_handlers *reader_h = upb_descreader_gethandlers(&reader_h);
+ const upb_handlers *decoder_h =
+ upb_pbdecoder_gethandlers(reader_h, false, &decoder_h);
- const upb_handlers *h = upb_descreader_newhandlers(&h);
- upb_decoderplan *p = upb_decoderplan_new(h, false);
- upb_decoder d;
- upb_decoder_init(&d);
- upb_handlers_unref(h, &h);
- upb_descreader r;
- upb_descreader_init(&r);
- upb_decoder_resetplan(&d, p);
- upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r);
+ // Create pipeline.
+ upb_pipeline pipeline;
+ upb_pipeline_init(&pipeline, NULL, 0, upb_realloc, NULL);
+ upb_pipeline_donateref(&pipeline, reader_h, &reader_h);
+ upb_pipeline_donateref(&pipeline, decoder_h, &decoder_h);
- upb_success_t ret = upb_decoder_decode(&d);
- if (status) upb_status_copy(status, upb_decoder_status(&d));
- upb_stringsrc_uninit(&strsrc);
- upb_decoder_uninit(&d);
- upb_decoderplan_unref(p);
- if (ret != UPB_OK) {
- upb_descreader_uninit(&r);
+ // Create sinks.
+ upb_sink *reader_sink = upb_pipeline_newsink(&pipeline, reader_h);
+ upb_sink *decoder_sink = upb_pipeline_newsink(&pipeline, decoder_h);
+ upb_pbdecoder *d = upb_sinkframe_userdata(upb_sink_base(decoder_sink));
+ upb_pbdecoder_resetsink(d, reader_sink);
+
+ // Push input data.
+ bool ok = upb_bytestream_putstr(decoder_sink, str, len);
+
+ if (status) upb_status_copy(status, upb_pipeline_status(&pipeline));
+ if (!ok) {
+ upb_pipeline_uninit(&pipeline);
return NULL;
}
- upb_def **defs = upb_descreader_getdefs(&r, owner, n);
+
+ upb_descreader *r = upb_sinkframe_userdata(upb_sink_base(reader_sink));
+ upb_def **defs = upb_descreader_getdefs(r, owner, n);
upb_def **defscopy = malloc(sizeof(upb_def*) * (*n));
memcpy(defscopy, defs, sizeof(upb_def*) * (*n));
- upb_descreader_uninit(&r);
+ upb_pipeline_uninit(&pipeline);
return defscopy;
}
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
index 3770afc..91c1e2d 100644
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@@ -14,8 +14,9 @@
#include <stdlib.h>
#include <string.h>
+#include "upb/sink.h"
+
struct _upb_textprinter {
- upb_bytesink *sink;
int indent_depth;
bool single_line;
upb_status status;
@@ -24,18 +25,17 @@ struct _upb_textprinter {
#define CHECK(x) if ((x) < 0) goto err;
static int indent(upb_textprinter *p) {
+ int i;
if (!p->single_line)
- CHECK(upb_bytesink_putrepeated(p->sink, ' ', p->indent_depth*2));
+ for (i = 0; i < p->indent_depth * 2; i++)
+ putchar(' ');
return 0;
-err:
return -1;
}
static int endfield(upb_textprinter *p) {
- CHECK(upb_bytesink_putc(p->sink, p->single_line ? ' ' : '\n'));
+ putchar(p->single_line ? ' ' : '\n');
return 0;
-err:
- return -1;
}
static int putescaped(upb_textprinter *p, const char *buf, size_t len,
@@ -51,7 +51,7 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len,
for (; buf < end; buf++) {
if (dstend - dst < 4) {
- CHECK(upb_bytesink_write(p->sink, dstbuf, dst - dstbuf));
+ fwrite(dstbuf, dst - dstbuf, 1, stdout);
dst = dstbuf;
}
@@ -79,24 +79,35 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len,
last_hex_escape = is_hex_escape;
}
// Flush remaining data.
- CHECK(upb_bytesink_write(p->sink, dst, dst - dstbuf));
+ fwrite(dst, dst - dstbuf, 1, stdout);
return 0;
-err:
- return -1;
}
#define TYPE(name, ctype, fmt) \
- static bool put ## name(void *_p, void *fval, ctype val) { \
- upb_textprinter *p = _p; \
- const upb_fielddef *f = fval; \
- CHECK(indent(p)); \
- CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f))); \
- CHECK(upb_bytesink_writestr(p->sink, ": ")); \
- CHECK(upb_bytesink_printf(p->sink, fmt, val)); \
- CHECK(endfield(p)); \
- return true; \
- err: \
- return false; \
+ static bool put ## name(const upb_sinkframe *frame, ctype val) { \
+ upb_textprinter *p = upb_sinkframe_userdata(frame); \
+ const upb_fielddef *f = upb_sinkframe_handlerdata(frame); \
+ CHECK(indent(p)); \
+ puts(upb_fielddef_name(f)); \
+ puts(": "); \
+ printf(fmt, val); \
+ CHECK(endfield(p)); \
+ return true; \
+ err: \
+ return false; \
+}
+
+static bool putbool(const upb_sinkframe *frame, bool val) {
+ upb_textprinter *p = upb_sinkframe_userdata(frame);
+ const upb_fielddef *f = upb_sinkframe_handlerdata(frame);
+ CHECK(indent(p));
+ puts(upb_fielddef_name(f));
+ puts(": ");
+ puts(val ? "true" : "false");
+ CHECK(endfield(p));
+ return true;
+err:
+ return false;
}
#define STRINGIFY_HELPER(x) #x
@@ -108,72 +119,61 @@ TYPE(uint32, uint32_t, "%" PRIu32);
TYPE(uint64, uint64_t, "%" PRIu64)
TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
-TYPE(bool, bool, "%hhu");
// Output a symbolic value from the enum if found, else just print as int32.
-static bool putenum(void *_p, void *fval, int32_t val) {
-
- upb_textprinter *p = _p;
- const upb_fielddef *f = fval;
+static bool putenum(const upb_sinkframe *frame, int32_t val) {
+ const upb_fielddef *f = upb_sinkframe_handlerdata(frame);
const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
const char *label = upb_enumdef_iton(enum_def, val);
if (label) {
- CHECK(upb_bytesink_writestr(p->sink, label));
+ puts(label);
} else {
- CHECK(putint32(_p, fval, val));
+ CHECK(putint32(frame, val));
}
return true;
err:
return false;
}
-static void *startstr(void *_p, void *fval, size_t size_hint) {
+static void *startstr(const upb_sinkframe *frame, size_t size_hint) {
UPB_UNUSED(size_hint);
- UPB_UNUSED(fval);
- upb_textprinter *p = _p;
- CHECK(upb_bytesink_putc(p->sink, '"'));
+ upb_textprinter *p = upb_sinkframe_userdata(frame);
+ putchar('"');
return p;
-err:
- return UPB_BREAK;
}
-static bool endstr(void *_p, void *fval) {
- UPB_UNUSED(fval);
- upb_textprinter *p = _p;
- CHECK(upb_bytesink_putc(p->sink, '"'));
+static bool endstr(const upb_sinkframe *frame) {
+ putchar('"');
return true;
-err:
- return false;
}
-static size_t putstr(void *_p, void *fval, const char *buf, size_t len) {
- upb_textprinter *p = _p;
- const upb_fielddef *f = fval;
- CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE(STRING)));
+static size_t putstr(const upb_sinkframe *frame, const char *buf, size_t len) {
+ upb_textprinter *p = upb_sinkframe_userdata(frame);
+ const upb_fielddef *f = upb_sinkframe_handlerdata(frame);
+ CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
return len;
err:
return 0;
}
-static void *startsubmsg(void *_p, void *fval) {
- upb_textprinter *p = _p;
- const upb_fielddef *f = fval;
+static void *startsubmsg(const upb_sinkframe *frame) {
+ upb_textprinter *p = upb_sinkframe_userdata(frame);
+ const upb_fielddef *f = upb_sinkframe_handlerdata(frame);
CHECK(indent(p));
- CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f)));
+ printf("%s {", upb_fielddef_name(f));
if (!p->single_line)
- CHECK(upb_bytesink_putc(p->sink, '\n'));
+ putchar('\n');
p->indent_depth++;
- return _p;
+ return p;
err:
return UPB_BREAK;
}
-static bool endsubmsg(void *_p, void *fval) {
- UPB_UNUSED(fval);
- upb_textprinter *p = _p;
+static bool endsubmsg(const upb_sinkframe *frame) {
+ upb_textprinter *p = upb_sinkframe_userdata(frame);
p->indent_depth--;
CHECK(indent(p));
- CHECK(upb_bytesink_putc(p->sink, '}'));
+ putchar('}');
CHECK(endfield(p));
return true;
err:
@@ -187,9 +187,7 @@ upb_textprinter *upb_textprinter_new() {
void upb_textprinter_free(upb_textprinter *p) { free(p); }
-void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
- bool single_line) {
- p->sink = sink;
+void upb_textprinter_reset(upb_textprinter *p, bool single_line) {
p->single_line = single_line;
p->indent_depth = 0;
}
@@ -202,21 +200,15 @@ static void onmreg(void *c, upb_handlers *h) {
upb_fielddef *f = upb_msg_iter_field(&i);
switch (upb_fielddef_type(f)) {
case UPB_TYPE_INT32:
- case UPB_TYPE_SINT32:
- case UPB_TYPE_SFIXED32:
upb_handlers_setint32(h, f, putint32, f, NULL);
break;
- case UPB_TYPE_SINT64:
- case UPB_TYPE_SFIXED64:
case UPB_TYPE_INT64:
upb_handlers_setint64(h, f, putint64, f, NULL);
break;
case UPB_TYPE_UINT32:
- case UPB_TYPE_FIXED32:
upb_handlers_setuint32(h, f, putuint32, f, NULL);
break;
case UPB_TYPE_UINT64:
- case UPB_TYPE_FIXED64:
upb_handlers_setuint64(h, f, putuint64, f, NULL);
break;
case UPB_TYPE_FLOAT:
@@ -234,7 +226,6 @@ static void onmreg(void *c, upb_handlers *h) {
upb_handlers_setstring(h, f, putstr, f, NULL);
upb_handlers_setendstr(h, f, endstr, f, NULL);
break;
- case UPB_TYPE_GROUP:
case UPB_TYPE_MESSAGE:
upb_handlers_setstartsubmsg(h, f, &startsubmsg, f, NULL);
upb_handlers_setendsubmsg(h, f, &endsubmsg, f, NULL);
@@ -250,5 +241,5 @@ static void onmreg(void *c, upb_handlers *h) {
const upb_handlers *upb_textprinter_newhandlers(const void *owner,
const upb_msgdef *m) {
- return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
+ return upb_handlers_newfrozen(m, NULL, owner, &onmreg, NULL);
}
diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h
index 6d111d2..7b653e7 100644
--- a/upb/pb/textprinter.h
+++ b/upb/pb/textprinter.h
@@ -8,7 +8,6 @@
#ifndef UPB_TEXT_H_
#define UPB_TEXT_H_
-#include "upb/bytestream.h"
#include "upb/handlers.h"
#ifdef __cplusplus
@@ -20,8 +19,7 @@ typedef struct _upb_textprinter upb_textprinter;
upb_textprinter *upb_textprinter_new();
void upb_textprinter_free(upb_textprinter *p);
-void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
- bool single_line);
+void upb_textprinter_reset(upb_textprinter *p, bool single_line);
const upb_handlers *upb_textprinter_newhandlers(const void *owner,
const upb_msgdef *m);
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
index c4d67ba..d33872d 100644
--- a/upb/pb/varint.h
+++ b/upb/pb/varint.h
@@ -36,10 +36,14 @@ typedef enum {
/* Zig-zag encoding/decoding **************************************************/
-INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
-INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
-INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
-INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+UPB_INLINE int32_t upb_zzdec_32(uint32_t n) {
+ return (n >> 1) ^ -(int32_t)(n & 1);
+}
+UPB_INLINE int64_t upb_zzdec_64(uint64_t n) {
+ return (n >> 1) ^ -(int64_t)(n & 1);
+}
+UPB_INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
/* Decoding *******************************************************************/
@@ -65,7 +69,7 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
// up to 10 bytes, so it must not be used unless there are at least ten bytes
// left in the buffer!
#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
-INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \
+UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \
uint8_t *p = (uint8_t*)_p; \
if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; } \
upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)}; \
@@ -81,21 +85,21 @@ UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
// Our canonical functions for decoding varints, based on the currently
// favored best-performing implementations.
-INLINE upb_decoderet upb_vdecode_fast(const char *p) {
+UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) {
if (sizeof(long) == 8)
return upb_vdecode_check2_massimino(p);
else
return upb_vdecode_check2_branch32(p);
}
-INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
+UPB_INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
return upb_vdecode_max8_massimino(r);
}
/* Encoding *******************************************************************/
-INLINE int upb_value_size(uint64_t val) {
+UPB_INLINE int upb_value_size(uint64_t val) {
#ifdef __GNUC__
int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
#else
@@ -110,7 +114,7 @@ INLINE int upb_value_size(uint64_t val) {
// bytes long), returning how many bytes were used.
//
// TODO: benchmark and optimize if necessary.
-INLINE size_t upb_vencode64(uint64_t val, char *buf) {
+UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) {
if (val == 0) { buf[0] = 0; return 1; }
size_t i = 0;
while (val) {
@@ -123,7 +127,7 @@ INLINE size_t upb_vencode64(uint64_t val, char *buf) {
}
// Encodes a 32-bit varint, *not* sign-extended.
-INLINE uint64_t upb_vencode32(uint32_t val) {
+UPB_INLINE uint64_t upb_vencode32(uint32_t val) {
char buf[UPB_PB_VARINT_MAX_LEN];
size_t bytes = upb_vencode64(val, buf);
uint64_t ret = 0;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback