From cfdb9907cb87d15eaab72ceefbfa42fd7a4c3127 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Sat, 11 May 2013 16:45:38 -0700 Subject: Synced with 3 months of Google-internal development. Major changes: - Got rid of all bytestream interfaces in favor of using regular handlers. - new Pipeline object represents a upb pipeline, does bump allocation internally to manage memory. - proto2 support now can handle extensions. --- upb/pb/decoder.c | 914 +++++++++++++++++++++++++++++------------------- upb/pb/decoder.h | 207 ++++------- upb/pb/decoder_x64.dasc | 429 +++++++++++++---------- upb/pb/glue.c | 46 +-- upb/pb/textprinter.c | 121 +++---- upb/pb/textprinter.h | 4 +- upb/pb/varint.h | 24 +- 7 files changed, 975 insertions(+), 770 deletions(-) (limited to 'upb/pb') diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 065c495..2bfc717 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -6,12 +6,101 @@ */ #include +#include #include #include #include "upb/bytestream.h" #include "upb/pb/decoder.h" #include "upb/pb/varint.h" +#define UPB_NONDELIMITED (0xffffffffffffffffULL) + +/* upb_pbdecoder ****************************************************************/ + +struct dasm_State; + +typedef struct { + const upb_fielddef *f; + uint64_t end_ofs; + uint32_t group_fieldnum; // UINT32_MAX for non-groups. + bool is_sequence; // frame represents seq or submsg/str? (f might be both). + bool is_packed; // true for packed primitive sequences. +} frame; + +struct upb_pbdecoder { + // Where we push parsed data (not owned). + upb_sink *sink; + + // Current input buffer and its stream offset. + const char *buf, *ptr, *end, *checkpoint; + uint64_t bufstart_ofs; + + // Buffer for residual bytes not parsed from the previous buffer. + char residual[16]; + char *residual_end; + + // Stores the user buffer passed to our decode function. + const char *buf_param; + size_t size_param; + + // Equal to size_param while we are in the residual buf, 0 otherwise. + size_t userbuf_remaining; + + // Used to temporarily store the return value before calling longjmp(). + size_t ret; + + // End of the delimited region, relative to ptr, or NULL if not in this buf. + const char *delim_end; + +#ifdef UPB_USE_JIT_X64 + // For JIT, which doesn't do bounds checks in the middle of parsing a field. + const char *jit_end, *effective_end; // == MIN(jit_end, delim_end) + + // Used momentarily by the generated code to store a value while a user + // function is called. + uint32_t tmp_len; + + const void *saved_rbp; +#endif + + // Our internal stack. + frame *top, *limit; + frame stack[UPB_MAX_NESTING]; + + // For exiting the decoder on error. + jmp_buf exitjmp; +}; + +typedef struct { + // The top-level handlers that this plan calls into. We own a ref. + const upb_handlers *dest_handlers; + +#ifdef UPB_USE_JIT_X64 + // JIT-generated machine code (else NULL). + char *jit_code; + size_t jit_size; + char *debug_info; + + // For storing upb_jitmsginfo, which contains per-msg runtime data needed + // by the JIT. + // Maps upb_handlers* -> upb_jitmsginfo. + upb_inttable msginfo; + + // The following members are used only while the JIT is being built. + + // This pointer is allocated by dasm_init() and freed by dasm_free(). + struct dasm_State *dynasm; + + // For storing pclabel bases while we are building the JIT. + // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base + upb_inttable pclabels; + + // This is not the same as len(pclabels) because the table only contains base + // offsets for each def, but each def can have many pclabels. + uint32_t pclabel_count; +#endif +} decoderplan; + typedef struct { uint8_t native_wire_type; bool is_numeric; @@ -39,12 +128,21 @@ static const upb_decoder_typeinfo upb_decoder_types[] = { {UPB_WIRE_TYPE_VARINT, true}, // SINT64 }; -/* upb_decoderplan ************************************************************/ +static upb_selector_t getselector(const upb_fielddef *f, + upb_handlertype_t type) { + upb_selector_t selector; + bool ok = upb_getselector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; +} + + +/* decoderplan ****************************************************************/ #ifdef UPB_USE_JIT_X64 // These defines are necessary for DynASM codegen. // See dynasm/dasm_proto.h for more info. -#define Dst_DECL upb_decoderplan *plan +#define Dst_DECL decoderplan *plan #define Dst_REF (plan->dynasm) #define Dst (plan) @@ -58,39 +156,49 @@ static const upb_decoder_typeinfo upb_decoder_types[] = { #include "upb/pb/decoder_x64.h" #endif -upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit) { - UPB_UNUSED(allowjit); - upb_decoderplan *p = malloc(sizeof(*p)); - assert(upb_handlers_isfrozen(h)); - p->handlers = h; - upb_handlers_ref(h, p); -#ifdef UPB_USE_JIT_X64 - p->jit_code = NULL; - if (allowjit) upb_decoderplan_makejit(p); -#endif - return p; -} - -void upb_decoderplan_unref(upb_decoderplan *p) { - // TODO: make truly refcounted. - upb_handlers_unref(p->handlers, p); +void freeplan(void *_p) { + decoderplan *p = _p; + upb_handlers_unref(p->dest_handlers, p); #ifdef UPB_USE_JIT_X64 if (p->jit_code) upb_decoderplan_freejit(p); #endif free(p); } -bool upb_decoderplan_hasjitcode(upb_decoderplan *p) { +static decoderplan *getdecoderplan(const upb_handlers *h) { + if (upb_handlers_frametype(h) != upb_pbdecoder_getframetype()) + return NULL; + upb_selector_t sel; + if (!upb_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STRING, &sel)) + return NULL; + return upb_handlers_gethandlerdata(h, sel); +} + +bool upb_pbdecoder_isdecoder(const upb_handlers *h) { + return getdecoderplan(h) != NULL; +} + +bool upb_pbdecoder_hasjitcode(const upb_handlers *h) { #ifdef UPB_USE_JIT_X64 + decoderplan *p = getdecoderplan(h); + if (!p) return false; return p->jit_code != NULL; #else - (void)p; + UPB_UNUSED(h); return false; #endif } +const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) { + decoderplan *p = getdecoderplan(h); + if (!p) return NULL; + return p->dest_handlers; +} + + +/* upb_pbdecoder ****************************************************************/ -/* upb_decoder ****************************************************************/ +static bool in_residual_buf(const upb_pbdecoder *d, const char *p); // It's unfortunate that we have to micro-manage the compiler this way, // especially since this tuning is necessarily specific to one hardware @@ -100,98 +208,73 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p) { #define FORCEINLINE static inline __attribute__((always_inline)) #define NOINLINE static __attribute__((noinline)) -UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) { - // Resumable decoder would back out to completed_ptr (and possibly get a - // previous buffer). - _longjmp(d->exitjmp, 1); +static upb_status *decoder_status(upb_pbdecoder *d) { + // TODO(haberman): encapsulate this access to pipeline->status, but not sure + // exactly what that interface should look like. + return &d->sink->pipeline_->status_; } -UPB_NORETURN static void upb_decoder_exitjmp2(void *d) { - upb_decoder_exitjmp(d); + +UPB_NORETURN static void exitjmp(upb_pbdecoder *d) { + _longjmp(d->exitjmp, 1); } -UPB_NORETURN static void upb_decoder_abortjmp(upb_decoder *d, const char *msg) { - upb_status_seterrliteral(&d->status, msg); - upb_decoder_exitjmp(d); + +UPB_NORETURN static void abortjmp(upb_pbdecoder *d, const char *msg) { + d->ret = in_residual_buf(d, d->checkpoint) ? 0 : (d->checkpoint - d->buf); + upb_status_seterrliteral(decoder_status(d), msg); + exitjmp(d); } /* Buffering ******************************************************************/ -// We operate on one buffer at a time, which may be a subset of the currently -// loaded byteregion data. When data for the buffer is completely gone we pull -// the next one. When we've committed our progress we discard any previous -// buffers' regions. +// We operate on one buffer at a time, which is either the user's buffer passed +// to our "decode" callback or some residual bytes from the previous buffer. -static size_t upb_decoder_bufleft(upb_decoder *d) { +// How many bytes can be safely read from d->ptr. +static size_t bufleft(upb_pbdecoder *d) { assert(d->end >= d->ptr); return d->end - d->ptr; } -static void upb_decoder_advance(upb_decoder *d, size_t len) { - assert(upb_decoder_bufleft(d) >= len); +// Overall offset of d->ptr. +uint64_t offset(const upb_pbdecoder *d) { + return d->bufstart_ofs + (d->ptr - d->buf); +} + +// Advances d->ptr. +static void advance(upb_pbdecoder *d, size_t len) { + assert(bufleft(d) >= len); d->ptr += len; } -uint64_t upb_decoder_offset(upb_decoder *d) { - return d->bufstart_ofs + (d->ptr - d->buf); +// Commits d->ptr progress; should be called when an entire atomic value +// (ie tag+value) has been successfully consumed. +static void checkpoint(upb_pbdecoder *d) { + d->checkpoint = d->ptr; } -uint64_t upb_decoder_bufendofs(upb_decoder *d) { - return d->bufstart_ofs + (d->end - d->buf); +static bool in_buf(const char *p, const char *buf, const char *end) { + return p >= buf && p <= end; } -static bool upb_decoder_islegalend(upb_decoder *d) { - if (d->top == d->stack) return true; - if (d->top - 1 == d->stack && - d->top->is_sequence && !d->top->is_packed) return true; - return false; +static bool in_residual_buf(const upb_pbdecoder *d, const char *p) { + return in_buf(p, d->residual, d->residual_end); } -// Calculates derived values that we cache for speed. These reflect a -// combination of the current buffer and the stack, so must be called whenever -// either is updated. -static void upb_decoder_setmsgend(upb_decoder *d) { - upb_decoder_frame *f = d->top; +// Calculates the delim_end value, which represents a combination of the +// current buffer and the stack, so must be called whenever either is updated. +static void set_delim_end(upb_pbdecoder *d) { + frame *f = d->top; size_t delimlen = f->end_ofs - d->bufstart_ofs; size_t buflen = d->end - d->buf; d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ? d->buf + delimlen : NULL; // NULL if not in this buf. - d->top_is_packed = f->is_packed; -} - -static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) { - assert(ofs >= upb_decoder_offset(d)); - if (ofs > upb_byteregion_endofs(d->input)) - upb_decoder_abortjmp(d, "Unexpected EOF"); - d->buf = NULL; - d->ptr = NULL; - d->end = NULL; - d->delim_end = NULL; -#ifdef UPB_USE_JIT_X64 - d->jit_end = NULL; -#endif - d->bufstart_ofs = ofs; } -static bool upb_trypullbuf(upb_decoder *d) { - assert(upb_decoder_bufleft(d) == 0); - upb_decoder_skiptonewbuf(d, upb_decoder_offset(d)); - if (upb_byteregion_available(d->input, d->bufstart_ofs) == 0) { - switch (upb_byteregion_fetch(d->input)) { - case UPB_BYTE_OK: - assert(upb_byteregion_available(d->input, d->bufstart_ofs) > 0); - break; - case UPB_BYTE_EOF: return false; - case UPB_BYTE_ERROR: upb_decoder_abortjmp(d, "I/O error in input"); - // Decoder resuming is not yet supported. - case UPB_BYTE_WOULDBLOCK: - upb_decoder_abortjmp(d, "Input returned WOULDBLOCK"); - } - } - size_t len; - d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len); - assert(len > 0); - d->ptr = d->buf; - d->end = d->buf + len; - upb_decoder_setmsgend(d); +static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) { + d->ptr = buf; + d->buf = buf; + d->end = end; + set_delim_end(d); #ifdef UPB_USE_JIT_X64 // If we start parsing a value, we can parse up to 20 bytes without // having to bounds-check anything (2 10-byte varints). Since the @@ -199,172 +282,232 @@ static bool upb_trypullbuf(upb_decoder *d) { // JIT bails if there are not 20 bytes available. d->jit_end = d->end - 20; #endif - assert(upb_decoder_bufleft(d) > 0); - return true; } -static void upb_pullbuf(upb_decoder *d) { - if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF"); +static void suspendjmp(upb_pbdecoder *d) { + switchtobuf(d, d->residual, d->residual_end); + exitjmp(d); +} + +static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) { + assert(len >= 0); + assert(d->ptr == d->end); + d->bufstart_ofs += (d->ptr - d->buf); + switchtobuf(d, buf, buf + len); +} + +static void skip(upb_pbdecoder *d, size_t bytes) { + size_t avail = bufleft(d); + size_t total_avail = avail + d->userbuf_remaining; + if (avail >= bytes) { + // Skipped data is all in current buffer. + advance(d, bytes); + } else if (total_avail >= bytes) { + // Skipped data is all in residual buf and param buffer. + assert(in_residual_buf(d, d->ptr)); + advance(d, avail); + advancetobuf(d, d->buf_param, d->size_param); + d->userbuf_remaining = 0; + advance(d, bytes - avail); + } else { + // Skipped data extends beyond currently available buffers. + // TODO: we need to do a checkdelim() equivalent that pops any frames that + // we just skipped past. + d->bufstart_ofs = offset(d) + bytes; + d->residual_end = d->residual; + d->ret += bytes - total_avail; + suspendjmp(d); + } +} + +static void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { + assert(bytes <= bufleft(d)); + memcpy(buf, d->ptr, bytes); + advance(d, bytes); } -static void upb_decoder_checkpoint(upb_decoder *d) { - upb_byteregion_discard(d->input, upb_decoder_offset(d)); +NOINLINE void getbytes_slow(upb_pbdecoder *d, void *buf, size_t bytes) { + const size_t avail = bufleft(d); + if (avail + d->userbuf_remaining >= bytes) { + // Remaining residual buffer and param buffer together can satisfy. + // (We are only called from getbytes() which has already verified that + // the current buffer alone cannot satisfy). + assert(in_residual_buf(d, d->ptr)); + consumebytes(d, buf, avail); + advancetobuf(d, d->buf_param, d->size_param); + consumebytes(d, buf + avail, bytes - avail); + d->userbuf_remaining = 0; + } else { + // There is not enough remaining data, save residual bytes (if any) + // starting at the last committed checkpoint and exit. + if (in_buf(d->checkpoint, d->buf_param, d->buf_param + d->size_param)) { + // Checkpoint was in user buf; old residual bytes not needed. + d->ptr = d->checkpoint; + size_t save = bufleft(d); + assert(save <= sizeof(d->residual)); + memcpy(d->residual, d->ptr, save); + d->residual_end = d->residual + save; + d->bufstart_ofs = offset(d); + } else { + // Checkpoint was in residual buf; append user byte(s) to residual buf. + assert(d->checkpoint == d->residual); + assert((d->residual_end - d->residual) + d->size_param <= + sizeof(d->residual)); + if (!in_residual_buf(d, d->ptr)) { + d->bufstart_ofs -= (d->residual_end - d->residual); + } + memcpy(d->residual_end, d->buf_param, d->size_param); + d->residual_end += d->size_param; + } + suspendjmp(d); + } } -static void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) { - if (ofs <= upb_decoder_bufendofs(d)) { - upb_decoder_advance(d, ofs - upb_decoder_offset(d)); +FORCEINLINE void getbytes(upb_pbdecoder *d, void *buf, size_t bytes) { + if (bufleft(d) >= bytes) { + // Buffer has enough data to satisfy. + consumebytes(d, buf, bytes); } else { - upb_decoder_skiptonewbuf(d, ofs); + getbytes_slow(d, buf, bytes); } - upb_decoder_checkpoint(d); } -static void upb_decoder_discard(upb_decoder *d, size_t bytes) { - upb_decoder_discardto(d, upb_decoder_offset(d) + bytes); +FORCEINLINE uint8_t getbyte(upb_pbdecoder *d) { + uint8_t byte; + getbytes(d, &byte, 1); + return byte; } /* Decoding of wire types *****************************************************/ -NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) { +NOINLINE uint64_t decode_varint_slow(upb_pbdecoder *d) { uint8_t byte = 0x80; uint64_t u64 = 0; int bitpos; for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d); - u64 |= ((uint64_t)(byte = *d->ptr) & 0x7F) << bitpos; - upb_decoder_advance(d, 1); + u64 |= ((uint64_t)((byte = getbyte(d)) & 0x7F)) << bitpos; } if(bitpos == 70 && (byte & 0x80)) - upb_decoder_abortjmp(d, "Unterminated varint"); + abortjmp(d, "Unterminated varint"); return u64; } +NOINLINE uint32_t decode_v32_slow(upb_pbdecoder *d) { + uint64_t u64 = decode_varint_slow(d); + if (u64 > UINT32_MAX) abortjmp(d, "Unterminated 32-bit varint"); + return (uint32_t)u64; +} + // For tags and delimited lengths, which must be <=32bit and are usually small. -FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) { - const char *p = d->ptr; - uint32_t ret; - uint64_t u64; +FORCEINLINE uint32_t decode_v32(upb_pbdecoder *d) { // Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048). - if (upb_decoder_bufleft(d) < 2) goto slow; // unlikely. - ret = *p & 0x7f; - if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order - ret |= (*p & 0x7f) << 7; - if ((*(p++) & 0x80) == 0) goto done; // likely -slow: - u64 = upb_decode_varint_slow(d); - if (u64 > UINT32_MAX) upb_decoder_abortjmp(d, "Unterminated 32-bit varint"); - ret = (uint32_t)u64; - p = d->ptr; // Turn the next line into a nop. -done: - upb_decoder_advance(d, p - d->ptr); - return ret; -} - -// Returns true on success or false if we've hit a valid EOF. -FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { - if (upb_decoder_bufleft(d) == 0 && - upb_decoder_islegalend(d) && - !upb_trypullbuf(d)) { - return false; + if (bufleft(d) >= 2) { + uint32_t ret = d->ptr[0] & 0x7f; + if ((d->ptr[0] & 0x80) == 0) { + advance(d, 1); + return ret; + } + ret |= (d->ptr[1] & 0x7f) << 7; + if ((d->ptr[1] & 0x80) == 0) { + advance(d, 2); + return ret; + } } - *val = upb_decode_varint32(d); - return true; + return decode_v32_slow(d); } -FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) { - if (upb_decoder_bufleft(d) >= 10) { +FORCEINLINE uint64_t decode_varint(upb_pbdecoder *d) { + if (bufleft(d) >= 10) { // Fast case. upb_decoderet r = upb_vdecode_fast(d->ptr); - if (r.p == NULL) upb_decoder_abortjmp(d, "Unterminated varint"); - upb_decoder_advance(d, r.p - d->ptr); + if (r.p == NULL) abortjmp(d, "Unterminated varint"); + advance(d, r.p - d->ptr); return r.val; - } else if (upb_decoder_bufleft(d) > 0) { - // Intermediate case -- worth it? - char tmpbuf[10]; - memset(tmpbuf, 0x80, 10); - memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d)); - upb_decoderet r = upb_vdecode_fast(tmpbuf); - if (r.p != NULL) { - upb_decoder_advance(d, r.p - tmpbuf); - return r.val; - } - } - // Slow case -- varint spans buffer seam. - return upb_decode_varint_slow(d); -} - -FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) { - if (upb_decoder_bufleft(d) >= bytes) { - // Fast case. - memcpy(buf, d->ptr, bytes); - upb_decoder_advance(d, bytes); } else { - // Slow case. - size_t read = 0; - while (1) { - size_t avail = UPB_MIN(upb_decoder_bufleft(d), bytes - read); - memcpy(buf + read, d->ptr, avail); - upb_decoder_advance(d, avail); - read += avail; - if (read == bytes) break; - upb_pullbuf(d); - } + // Slow case -- varint spans buffer seam. + return decode_varint_slow(d); } } -FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) { +FORCEINLINE uint32_t decode_fixed32(upb_pbdecoder *d) { uint32_t u32; - upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t)); + getbytes(d, &u32, 4); return u32; // TODO: proper byte swapping for big-endian machines. } -FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { + +FORCEINLINE uint64_t decode_fixed64(upb_pbdecoder *d) { uint64_t u64; - upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t)); + getbytes(d, &u64, 8); return u64; // TODO: proper byte swapping for big-endian machines. } -INLINE void upb_push_msg(upb_decoder *d, const upb_fielddef *f, uint64_t end) { - upb_decoder_frame *fr = d->top + 1; - if (!upb_sink_startsubmsg(&d->sink, f) || fr > d->limit) { - upb_decoder_abortjmp(d, "Nesting too deep."); - } +static void push(upb_pbdecoder *d, const upb_fielddef *f, bool is_sequence, + bool is_packed, int32_t group_fieldnum, uint64_t end) { + frame *fr = d->top + 1; + if (fr >= d->limit) abortjmp(d, "Nesting too deep."); fr->f = f; - fr->is_sequence = false; - fr->is_packed = false; + fr->is_sequence = is_sequence; + fr->is_packed = is_packed; fr->end_ofs = end; - fr->group_fieldnum = end == UPB_NONDELIMITED ? - (int32_t)upb_fielddef_number(f) : -1; + fr->group_fieldnum = group_fieldnum; d->top = fr; - upb_decoder_setmsgend(d); + set_delim_end(d); } -INLINE void upb_push_seq(upb_decoder *d, const upb_fielddef *f, bool packed, - uint64_t end_ofs) { - upb_decoder_frame *fr = d->top + 1; - if (!upb_sink_startseq(&d->sink, f) || fr > d->limit) { - upb_decoder_abortjmp(d, "Nesting too deep."); - } - fr->f = f; - fr->is_sequence = true; - fr->group_fieldnum = -1; - fr->is_packed = packed; - fr->end_ofs = end_ofs; - d->top = fr; - upb_decoder_setmsgend(d); +static void push_msg(upb_pbdecoder *d, const upb_fielddef *f, uint64_t end) { + if (!upb_sink_startsubmsg(d->sink, getselector(f, UPB_HANDLER_STARTSUBMSG))) + abortjmp(d, "startsubmsg failed."); + int32_t group_fieldnum = (end == UPB_NONDELIMITED) ? + (int32_t)upb_fielddef_number(f) : -1; + push(d, f, false, false, group_fieldnum, end); +} + +static void push_seq(upb_pbdecoder *d, const upb_fielddef *f, bool packed, + uint64_t end_ofs) { + if (!upb_sink_startseq(d->sink, getselector(f, UPB_HANDLER_STARTSEQ))) + abortjmp(d, "startseq failed."); + push(d, f, true, packed, -1, end_ofs); +} + +static void push_str(upb_pbdecoder *d, const upb_fielddef *f, size_t len, + uint64_t end) { + if (!upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), len)) + abortjmp(d, "startseq failed."); + push(d, f, false, false, -1, end); } -INLINE void upb_pop_submsg(upb_decoder *d) { - upb_sink_endsubmsg(&d->sink, d->top->f); +static void pop_submsg(upb_pbdecoder *d) { + upb_sink_endsubmsg(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSUBMSG)); d->top--; - upb_decoder_setmsgend(d); + set_delim_end(d); } -INLINE void upb_pop_seq(upb_decoder *d) { - upb_sink_endseq(&d->sink, d->top->f); +static void pop_seq(upb_pbdecoder *d) { + upb_sink_endseq(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSEQ)); d->top--; - upb_decoder_setmsgend(d); + set_delim_end(d); +} + +static void pop_string(upb_pbdecoder *d) { + upb_sink_endstr(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSTR)); + d->top--; + set_delim_end(d); +} + +static void checkdelim(upb_pbdecoder *d) { + while (d->delim_end && d->ptr >= d->delim_end) { + // TODO(haberman): not sure what to do about this; if we detect this error + // we can possibly violate the promise that errors are always signaled by a + // short "parsed byte" count (because all bytes might have been successfully + // parsed prior to detecting this error). + // if (d->ptr > d->delim_end) abortjmp(d, "Bad submessage end"); + if (d->top->is_sequence) { + pop_seq(d); + } else { + pop_submsg(d); + } + } } @@ -374,95 +517,79 @@ INLINE void upb_pop_seq(upb_decoder *d) { // properly sign-extended. We could detect this and error about the data loss, // but proto2 does not do this, so we pass. -#define T(type, wt, name, convfunc) \ - INLINE void upb_decode_ ## type(upb_decoder *d, const upb_fielddef *f) { \ - upb_sink_put ## name(&d->sink, f, (convfunc)(upb_decode_ ## wt(d))); \ +#define T(type, sel, wt, name, convfunc) \ + static void decode_ ## type(upb_pbdecoder *d, const upb_fielddef *f) { \ + upb_sink_put ## name(d->sink, getselector(f, UPB_HANDLER_ ## sel), \ + (convfunc)(decode_ ## wt(d))); \ } \ static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } -T(INT32, varint, int32, int32_t) -T(INT64, varint, int64, int64_t) -T(UINT32, varint, uint32, uint32_t) -T(UINT64, varint, uint64, uint64_t) -T(FIXED32, fixed32, uint32, uint32_t) -T(FIXED64, fixed64, uint64, uint64_t) -T(SFIXED32, fixed32, int32, int32_t) -T(SFIXED64, fixed64, int64, int64_t) -T(BOOL, varint, bool, bool) -T(ENUM, varint, int32, int32_t) -T(DOUBLE, fixed64, double, upb_asdouble) -T(FLOAT, fixed32, float, upb_asfloat) -T(SINT32, varint, int32, upb_zzdec_32) -T(SINT64, varint, int64, upb_zzdec_64) +T(INT32, INT32, varint, int32, int32_t) +T(INT64, INT64, varint, int64, int64_t) +T(UINT32, UINT32, varint, uint32, uint32_t) +T(UINT64, UINT64, varint, uint64, uint64_t) +T(FIXED32, UINT32, fixed32, uint32, uint32_t) +T(FIXED64, UINT64, fixed64, uint64, uint64_t) +T(SFIXED32, INT32, fixed32, int32, int32_t) +T(SFIXED64, INT64, fixed64, int64, int64_t) +T(BOOL, BOOL, varint, bool, bool) +T(ENUM, INT32, varint, int32, int32_t) +T(DOUBLE, DOUBLE, fixed64, double, upb_asdouble) +T(FLOAT, FLOAT, fixed32, float, upb_asfloat) +T(SINT32, INT32, varint, int32, upb_zzdec_32) +T(SINT64, INT64, varint, int64, upb_zzdec_64) #undef T -static void upb_decode_GROUP(upb_decoder *d, const upb_fielddef *f) { - upb_push_msg(d, f, UPB_NONDELIMITED); -} - -static void upb_decode_MESSAGE(upb_decoder *d, const upb_fielddef *f) { - uint32_t len = upb_decode_varint32(d); - upb_push_msg(d, f, upb_decoder_offset(d) + len); -} - -static void upb_decode_STRING(upb_decoder *d, const upb_fielddef *f) { - uint32_t strlen = upb_decode_varint32(d); - uint64_t offset = upb_decoder_offset(d); - uint64_t end = offset + strlen; - if (end > upb_byteregion_endofs(d->input)) - upb_decoder_abortjmp(d, "Unexpected EOF"); - upb_sink_startstr(&d->sink, f, strlen); - while (strlen > 0) { - if (upb_byteregion_available(d->input, offset) == 0) - upb_pullbuf(d); - size_t len; - const char *ptr = upb_byteregion_getptr(d->input, offset, &len); - len = UPB_MIN(len, strlen); - len = upb_sink_putstring(&d->sink, f, ptr, len); - if (len > strlen) - upb_decoder_abortjmp(d, "Skipped too many bytes."); - offset += len; - strlen -= len; - upb_decoder_discardto(d, offset); - } - upb_sink_endstr(&d->sink, f); +static void decode_GROUP(upb_pbdecoder *d, const upb_fielddef *f) { + push_msg(d, f, UPB_NONDELIMITED); } +static void decode_MESSAGE(upb_pbdecoder *d, const upb_fielddef *f) { + uint32_t len = decode_v32(d); + push_msg(d, f, offset(d) + len); +} -/* The main decoding loop *****************************************************/ - -static void upb_decoder_checkdelim(upb_decoder *d) { - // TODO: This doesn't work for the case that no buffer is currently loaded - // (ie. d->buf == NULL) because delim_end is NULL even if we are at - // end-of-delim. Need to add a test that exercises this by putting a buffer - // seam in the middle of the final delimited value in a proto that we skip - // for some reason (like because it's unknown and we have no unknown field - // handler). - while (d->delim_end != NULL && d->ptr >= d->delim_end) { - if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end"); - if (d->top->is_sequence) { - upb_pop_seq(d); - } else { - upb_pop_submsg(d); +static void decode_STRING(upb_pbdecoder *d, const upb_fielddef *f) { + uint32_t strlen = decode_v32(d); + if (strlen <= bufleft(d)) { + upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), strlen); + if (strlen) + upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING), + d->ptr, strlen); + upb_sink_endstr(d->sink, getselector(f, UPB_HANDLER_ENDSTR)); + advance(d, strlen); + } else { + // Buffer ends in the middle of the string; need to push a decoder frame + // for it. + push_str(d, f, strlen, offset(d) + strlen); + if (bufleft(d)) { + upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING), + d->ptr, bufleft(d)); + advance(d, bufleft(d)); } + d->bufstart_ofs = offset(d); + d->residual_end = d->residual; + suspendjmp(d); } } -INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) { + +/* The main decoding loop *****************************************************/ + +static const upb_fielddef *decode_tag(upb_pbdecoder *d) { while (1) { - uint32_t tag; - if (!upb_trydecode_varint32(d, &tag)) return NULL; + uint32_t tag = decode_v32(d); uint8_t wire_type = tag & 0x7; uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL; - const upb_handlers *h = upb_sink_tophandlers(&d->sink); + const upb_handlers *h = upb_sinkframe_handlers(upb_sink_top(d->sink)); f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum); bool packed = false; if (f) { // Wire type check. - upb_fieldtype_t type = upb_fielddef_type(f); + upb_descriptortype_t type = upb_fielddef_descriptortype(f); if (wire_type == upb_decoder_types[type].native_wire_type) { // Wire type is ok. } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED && @@ -477,18 +604,19 @@ INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) { // There are no explicit "startseq" or "endseq" markers in protobuf // streams, so we have to infer them by noticing when a repeated field // starts or ends. - upb_decoder_frame *fr = d->top; + frame *fr = d->top; if (fr->is_sequence && fr->f != f) { - upb_pop_seq(d); + pop_seq(d); fr = d->top; } if (f && upb_fielddef_isseq(f) && !fr->is_sequence) { if (packed) { - uint32_t len = upb_decode_varint32(d); - upb_push_seq(d, f, true, upb_decoder_offset(d) + len); + uint32_t len = decode_v32(d); + push_seq(d, f, true, offset(d) + len); + checkpoint(d); } else { - upb_push_seq(d, f, false, fr->end_ofs); + push_seq(d, f, false, fr->end_ofs); } } @@ -496,118 +624,202 @@ INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) { // Unknown field or ENDGROUP. if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) - upb_decoder_abortjmp(d, "Invalid field number"); + abortjmp(d, "Invalid field number"); switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break; - case UPB_WIRE_TYPE_32BIT: upb_decoder_discard(d, 4); break; - case UPB_WIRE_TYPE_64BIT: upb_decoder_discard(d, 8); break; - case UPB_WIRE_TYPE_DELIMITED: - upb_decoder_discard(d, upb_decode_varint32(d)); break; + case UPB_WIRE_TYPE_VARINT: decode_varint(d); break; + case UPB_WIRE_TYPE_32BIT: skip(d, 4); break; + case UPB_WIRE_TYPE_64BIT: skip(d, 8); break; + case UPB_WIRE_TYPE_DELIMITED: skip(d, decode_v32(d)); break; case UPB_WIRE_TYPE_START_GROUP: - upb_decoder_abortjmp(d, "Can't handle unknown groups yet"); + abortjmp(d, "Can't handle unknown groups yet"); case UPB_WIRE_TYPE_END_GROUP: if (fieldnum != fr->group_fieldnum) - upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag"); - upb_sink_endsubmsg(&d->sink, fr->f); - d->top--; - upb_decoder_setmsgend(d); + abortjmp(d, "Unmatched ENDGROUP tag"); + pop_submsg(d); break; default: - upb_decoder_abortjmp(d, "Invalid wire type"); + abortjmp(d, "Invalid wire type"); } // TODO: deliver to unknown field callback. - upb_decoder_checkpoint(d); - upb_decoder_checkdelim(d); + checkpoint(d); + checkdelim(d); } } -upb_success_t upb_decoder_decode(upb_decoder *d) { - assert(d->input); +void *start(const upb_sinkframe *fr, size_t size_hint) { + UPB_UNUSED(size_hint); + upb_pbdecoder *d = upb_sinkframe_userdata(fr); + assert(d); + assert(d->sink); + upb_sink_startmsg(d->sink); + return d; +} + +bool end(const upb_sinkframe *fr) { + upb_pbdecoder *d = upb_sinkframe_userdata(fr); + + if (d->residual_end > d->residual) { + // We have preserved bytes. + upb_status_seterrliteral(decoder_status(d), "Unexpected EOF"); + return false; + } + + // We may need to dispatch a top-level implicit frame. + if (d->top == d->stack + 1 && + d->top->is_sequence && + !d->top->is_packed) { + assert(upb_sinkframe_depth(upb_sink_top(d->sink)) == 1); + pop_seq(d); + } + if (d->top != d->stack) { + upb_status_seterrliteral( + decoder_status(d), "Ended inside delimited field."); + return false; + } + upb_sink_endmsg(d->sink); + return true; +} + +size_t decode(const upb_sinkframe *fr, const char *buf, size_t size) { + upb_pbdecoder *d = upb_sinkframe_userdata(fr); + decoderplan *plan = upb_sinkframe_handlerdata(fr); + UPB_UNUSED(plan); + assert(upb_sinkframe_handlers(upb_sink_top(d->sink)) == plan->dest_handlers); + + if (size == 0) return 0; + // Assume we'll consume the whole buffer unless this is overwritten. + d->ret = size; + if (_setjmp(d->exitjmp)) { - assert(!upb_ok(&d->status)); - return UPB_ERROR; + // Hit end-of-buffer or error. + return d->ret; + } + + d->buf_param = buf; + d->size_param = size; + if (d->residual_end > d->residual) { + // We have residual bytes from the last buffer. + d->userbuf_remaining = size; + } else { + d->userbuf_remaining = 0; + advancetobuf(d, buf, size); + + if (d->top != d->stack && + upb_fielddef_isstring(d->top->f) && + !d->top->is_sequence) { + // Last buffer ended in the middle of a string; deliver more of it. + size_t len = d->top->end_ofs - offset(d); + if (size >= len) { + upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING), + d->ptr, len); + advance(d, len); + pop_string(d); + } else { + upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING), + d->ptr, size); + advance(d, size); + d->residual_end = d->residual; + advancetobuf(d, d->residual, 0); + return size; + } + } } - upb_sink_startmsg(&d->sink); - // Prime the buf so we can hit the JIT immediately. - upb_trypullbuf(d); + checkpoint(d); + const upb_fielddef *f = d->top->f; while(1) { #ifdef UPB_USE_JIT_X64 - upb_decoder_enterjit(d); - upb_decoder_checkpoint(d); - upb_decoder_setmsgend(d); + upb_decoder_enterjit(d, plan); + checkpoint(d); + set_delim_end(d); // JIT doesn't keep this current. #endif - upb_decoder_checkdelim(d); - if (!d->top_is_packed) f = upb_decode_tag(d); - if (!f) { - // Sucessful EOF. We may need to dispatch a top-level implicit frame. - if (d->top->is_sequence) { - assert(d->sink.top == d->sink.stack + 1); - upb_pop_seq(d); - } - assert(d->top == d->stack); - upb_sink_endmsg(&d->sink, &d->status); - return UPB_OK; + checkdelim(d); + if (!d->top->is_packed) { + f = decode_tag(d); } - switch (upb_fielddef_type(f)) { - case UPB_TYPE(DOUBLE): upb_decode_DOUBLE(d, f); break; - case UPB_TYPE(FLOAT): upb_decode_FLOAT(d, f); break; - case UPB_TYPE(INT64): upb_decode_INT64(d, f); break; - case UPB_TYPE(UINT64): upb_decode_UINT64(d, f); break; - case UPB_TYPE(INT32): upb_decode_INT32(d, f); break; - case UPB_TYPE(FIXED64): upb_decode_FIXED64(d, f); break; - case UPB_TYPE(FIXED32): upb_decode_FIXED32(d, f); break; - case UPB_TYPE(BOOL): upb_decode_BOOL(d, f); break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): upb_decode_STRING(d, f); break; - case UPB_TYPE(GROUP): upb_decode_GROUP(d, f); break; - case UPB_TYPE(MESSAGE): upb_decode_MESSAGE(d, f); break; - case UPB_TYPE(UINT32): upb_decode_UINT32(d, f); break; - case UPB_TYPE(ENUM): upb_decode_ENUM(d, f); break; - case UPB_TYPE(SFIXED32): upb_decode_SFIXED32(d, f); break; - case UPB_TYPE(SFIXED64): upb_decode_SFIXED64(d, f); break; - case UPB_TYPE(SINT32): upb_decode_SINT32(d, f); break; - case UPB_TYPE(SINT64): upb_decode_SINT64(d, f); break; - case UPB_TYPE_NONE: assert(false); break; + switch (upb_fielddef_descriptortype(f)) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: decode_DOUBLE(d, f); break; + case UPB_DESCRIPTOR_TYPE_FLOAT: decode_FLOAT(d, f); break; + case UPB_DESCRIPTOR_TYPE_INT64: decode_INT64(d, f); break; + case UPB_DESCRIPTOR_TYPE_UINT64: decode_UINT64(d, f); break; + case UPB_DESCRIPTOR_TYPE_INT32: decode_INT32(d, f); break; + case UPB_DESCRIPTOR_TYPE_FIXED64: decode_FIXED64(d, f); break; + case UPB_DESCRIPTOR_TYPE_FIXED32: decode_FIXED32(d, f); break; + case UPB_DESCRIPTOR_TYPE_BOOL: decode_BOOL(d, f); break; + case UPB_DESCRIPTOR_TYPE_STRING: UPB_FALLTHROUGH_INTENDED; + case UPB_DESCRIPTOR_TYPE_BYTES: decode_STRING(d, f); break; + case UPB_DESCRIPTOR_TYPE_GROUP: decode_GROUP(d, f); break; + case UPB_DESCRIPTOR_TYPE_MESSAGE: decode_MESSAGE(d, f); break; + case UPB_DESCRIPTOR_TYPE_UINT32: decode_UINT32(d, f); break; + case UPB_DESCRIPTOR_TYPE_ENUM: decode_ENUM(d, f); break; + case UPB_DESCRIPTOR_TYPE_SFIXED32: decode_SFIXED32(d, f); break; + case UPB_DESCRIPTOR_TYPE_SFIXED64: decode_SFIXED64(d, f); break; + case UPB_DESCRIPTOR_TYPE_SINT32: decode_SINT32(d, f); break; + case UPB_DESCRIPTOR_TYPE_SINT64: decode_SINT64(d, f); break; } - upb_decoder_checkpoint(d); + checkpoint(d); } } -void upb_decoder_init(upb_decoder *d) { - upb_status_init(&d->status); - d->plan = NULL; - d->input = NULL; +void init(void *_d) { + upb_pbdecoder *d = _d; d->limit = &d->stack[UPB_MAX_NESTING]; + d->sink = NULL; + // reset() must be called before decoding; this is guaranteed by assert() in + // start(). } -void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p) { - d->plan = p; - d->input = NULL; - upb_sink_init(&d->sink, p->handlers); -} - -void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, - void *c) { - assert(d->plan); - upb_status_clear(&d->status); - upb_sink_reset(&d->sink, c); - d->input = input; - +void reset(void *_d) { + upb_pbdecoder *d = _d; d->top = d->stack; d->top->is_sequence = false; d->top->is_packed = false; d->top->group_fieldnum = UINT32_MAX; d->top->end_ofs = UPB_NONDELIMITED; - - // Protect against assert in skiptonewbuf(). d->bufstart_ofs = 0; - d->ptr = NULL; - d->buf = NULL; - upb_decoder_skiptonewbuf(d, upb_byteregion_startofs(input)); + d->ptr = d->residual; + d->buf = d->residual; + d->end = d->residual; + d->residual_end = d->residual; } -void upb_decoder_uninit(upb_decoder *d) { - upb_status_uninit(&d->status); +bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink* sink) { + // TODO(haberman): typecheck the sink, and test whether the decoder is in the + // middle of decoding. Return false if either assumption is violated. + d->sink = sink; + reset(d); + return true; +} + +const upb_frametype upb_pbdecoder_frametype = { + sizeof(upb_pbdecoder), + init, + NULL, + reset, +}; + +const upb_frametype *upb_pbdecoder_getframetype() { + return &upb_pbdecoder_frametype; +} + +const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, + bool allowjit, + const void *owner) { + UPB_UNUSED(allowjit); + decoderplan *p = malloc(sizeof(*p)); + assert(upb_handlers_isfrozen(dest)); + p->dest_handlers = dest; + upb_handlers_ref(dest, p); +#ifdef UPB_USE_JIT_X64 + p->jit_code = NULL; + if (allowjit) upb_decoderplan_makejit(p); +#endif + + upb_handlers *h = upb_handlers_new( + UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner); + upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, start, NULL, NULL); + upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, decode, p, freeplan); + upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, end, NULL, NULL); + return h; } diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 690ebb9..4307434 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -4,163 +4,96 @@ * Copyright (c) 2009-2010 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * upb_decoder implements a high performance, streaming decoder for protobuf - * data that works by getting its input data from a upb_byteregion and calling - * into a upb_handlers. + * upb::Decoder implements a high performance, streaming decoder for protobuf + * data that works by parsing input data one buffer at a time and calling into + * a upb::Handlers. */ #ifndef UPB_DECODER_H_ #define UPB_DECODER_H_ -#include -#include "upb/bytestream.h" #include "upb/sink.h" #ifdef __cplusplus -extern "C" { -#endif +namespace upb { +namespace pb { -/* upb_decoderplan ************************************************************/ - -// A decoderplan contains whatever data structures and generated (JIT-ted) code -// are necessary to decode protobuf data of a specific type to a specific set -// of handlers. By generating the plan ahead of time, we avoid having to -// redo this work every time we decode. -// -// A decoderplan is threadsafe, meaning that it can be used concurrently by -// different upb_decoders in different threads. However, the upb_decoders are -// *not* thread-safe. -struct _upb_decoderplan; -typedef struct _upb_decoderplan upb_decoderplan; - -// TODO(haberman): -// - add support for letting any message in the plan be at the top level. -// - make this object a handlers instead (when bytesrc/bytesink are merged -// into handlers). -// - add support for sharing code with previously-built plans/handlers. -upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit); -void upb_decoderplan_unref(upb_decoderplan *p); - -// Returns true if the plan contains JIT-ted code. This may not be the same as -// the "allowjit" parameter to the constructor if support for JIT-ting was not -// compiled in. -bool upb_decoderplan_hasjitcode(upb_decoderplan *p); - - -/* upb_decoder ****************************************************************/ - -struct dasm_State; - -typedef struct { - const upb_fielddef *f; - uint64_t end_ofs; - uint32_t group_fieldnum; // UINT32_MAX for non-groups. - bool is_sequence; // frame represents seq or submsg? (f might be both). - bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX - // (strings aren't pushed). -} upb_decoder_frame; - -typedef struct _upb_decoder { - upb_decoderplan *plan; - upb_byteregion *input; // Input data (serialized), not owned. - upb_status status; // Where we store errors that occur. - - // Where we push parsed data. - // TODO(haberman): make this a pointer and make upb_decoder_resetinput() take - // one of these instead of a void*. - upb_sink sink; - - // Our internal stack. - upb_decoder_frame *top, *limit; - upb_decoder_frame stack[UPB_MAX_NESTING]; - - // Current input buffer and its stream offset. - const char *buf, *ptr, *end; - uint64_t bufstart_ofs; - - // End of the delimited region, relative to ptr, or NULL if not in this buf. - const char *delim_end; - // True if the top stack frame represents a packed field. - bool top_is_packed; - -#ifdef UPB_USE_JIT_X64 - // For JIT, which doesn't do bounds checks in the middle of parsing a field. - const char *jit_end, *effective_end; // == MIN(jit_end, delim_end) - - // Used momentarily by the generated code to store a value while a user - // function is called. - uint32_t tmp_len; -#endif - - // For exiting the decoder on error. - jmp_buf exitjmp; -} upb_decoder; - -void upb_decoder_init(upb_decoder *d); -void upb_decoder_uninit(upb_decoder *d); - -// Resets the plan that the decoder will parse from. "msg_offset" indicates -// which message from the plan will be used as the top-level message. -// -// This will also reset the decoder's input to be uninitialized -- -// upb_decoder_resetinput() must be called before parsing can occur. The plan -// must live until the decoder is destroyed or reset to a different plan. -// -// Must be called before upb_decoder_resetinput() or upb_decoder_decode(). -void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p); - -// Resets the input of an already-allocated decoder. This puts it in a state -// where it has not seen any data, and expects the next data to be from the -// beginning of a new protobuf. Decoders must have their input reset before -// they can be used. A decoder can have its input reset multiple times. -// "input" must live until the decoder is destroyed or has it input reset -// again. "c" is the closure that will be passed to the handlers. -// -// Must be called before upb_decoder_decode(). -void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *c); - -// Decodes serialized data (calling handlers as the data is parsed), returning -// the success of the operation (call upb_decoder_status() for details). -upb_success_t upb_decoder_decode(upb_decoder *d); - -INLINE const upb_status *upb_decoder_status(upb_decoder *d) { - return &d->status; -} +// Frame type that encapsulates decoder state. +class Decoder; -// Implementation details +// Resets the sink of the Decoder. This must be called at least once before +// the decoder can be used. It may only be called with the decoder is in a +// state where it was just created or reset. The given sink must be from the +// same pipeline as this decoder. +inline bool ResetDecoderSink(Decoder* d, Sink* sink); -struct _upb_decoderplan { - // The top-level handlers that this plan calls into. We own a ref. - const upb_handlers *handlers; +// Gets the handlers suitable for parsing protobuf data according to the given +// destination handlers. The protobuf schema to parse is taken from dest. +inline const upb::Handlers *GetDecoderHandlers(const upb::Handlers *dest, + bool allowjit, + const void *owner); -#ifdef UPB_USE_JIT_X64 - // JIT-generated machine code (else NULL). - char *jit_code; - size_t jit_size; - char *debug_info; +// Returns true if these handlers represent a upb::pb::Decoder. +bool IsDecoder(const upb::Handlers *h); - // For storing upb_jitmsginfo, which contains per-msg runtime data needed - // by the JIT. - // Maps upb_handlers* -> upb_jitmsginfo. - upb_inttable msginfo; +// Returns true if IsDecoder(h) and the given handlers have JIT code. +inline bool HasJitCode(const upb::Handlers* h); - // The following members are used only while the JIT is being built. +// Returns the destination handlers if IsDecoder(h), otherwise returns NULL. +const upb::Handlers* GetDestHandlers(const upb::Handlers* h); - // This pointer is allocated by dasm_init() and freed by dasm_free(). - struct dasm_State *dynasm; +} // namespace pb +} // namespace upb - // For storing pclabel bases while we are building the JIT. - // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base - upb_inttable pclabels; +typedef upb::pb::Decoder upb_pbdecoder; - // This is not the same as len(pclabels) because the table only contains base - // offsets for each def, but each def can have many pclabels. - uint32_t pclabel_count; +extern "C" { +#else +struct upb_pbdecoder; +typedef struct upb_pbdecoder upb_pbdecoder; #endif -}; + +// C API. +const upb_frametype *upb_pbdecoder_getframetype(); +bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink *sink); +const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, + bool allowjit, + const void *owner); +bool upb_pbdecoder_isdecoder(const upb_handlers *h); +bool upb_pbdecoder_hasjitcode(const upb_handlers *h); +const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h); + +// C++ implementation details. ///////////////////////////////////////////////// #ifdef __cplusplus -} /* extern "C" */ +} // extern "C" + +namespace upb { + +template<> inline const FrameType* GetFrameType() { + return upb_pbdecoder_getframetype(); +} + +namespace pb { +inline bool ResetDecoderSink(Decoder* r, Sink* sink) { + return upb_pbdecoder_resetsink(r, sink); +} +inline const upb::Handlers* GetDecoderHandlers(const upb::Handlers* dest, + bool allowjit, + const void* owner) { + return upb_pbdecoder_gethandlers(dest, allowjit, owner); +} +inline bool IsDecoder(const upb::Handlers* h) { + return upb_pbdecoder_isdecoder(h); +} +inline bool HasJitCode(const upb::Handlers* h) { + return upb_pbdecoder_hasjitcode(h); +} +inline const upb::Handlers* GetDestHandlers(const upb::Handlers* h) { + return upb_pbdecoder_getdesthandlers(h); +} +} // namespace pb +} // namespace upb #endif #endif /* UPB_DECODER_H_ */ diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc index cd09cfe..7d4c537 100644 --- a/upb/pb/decoder_x64.dasc +++ b/upb/pb/decoder_x64.dasc @@ -4,7 +4,7 @@ |// Copyright (c) 2011 Google Inc. See LICENSE for details. |// Author: Josh Haberman |// -|// JIT compiler for upb_decoder on x86. Given a upb_decoderplan object (which +|// JIT compiler for upb_pbdecoder on x86. Given a decoderplan object (which |// contains an embedded set of upb_handlers), generates code specialized to |// parsing the specific message and calling specific handlers. |// @@ -54,17 +54,19 @@ typedef struct { void *jit_func; } upb_jitmsginfo; -static uint32_t upb_getpclabel(upb_decoderplan *plan, const void *obj, int n) { - const upb_value *v = upb_inttable_lookupptr(&plan->pclabels, obj); - assert(v); - return upb_value_getuint32(*v) + n; +static uint32_t upb_getpclabel(decoderplan *plan, const void *obj, int n) { + upb_value v; + bool found = upb_inttable_lookupptr(&plan->pclabels, obj, &v); + UPB_ASSERT_VAR(found, found); + return upb_value_getuint32(v) + n; } -static upb_jitmsginfo *upb_getmsginfo(upb_decoderplan *plan, +static upb_jitmsginfo *upb_getmsginfo(decoderplan *plan, const upb_handlers *h) { - const upb_value *v = upb_inttable_lookupptr(&plan->msginfo, h); - assert(v); - return upb_value_getptr(*v); + upb_value v; + bool found = upb_inttable_lookupptr(&plan->msginfo, h, &v); + UPB_ASSERT_VAR(found, found); + return upb_value_getptr(v); } // To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code @@ -109,7 +111,7 @@ void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); } -void upb_reg_jit_gdb(upb_decoderplan *plan) { +void upb_reg_jit_gdb(decoderplan *plan) { // Create debug info. size_t elf_len = sizeof(upb_jit_debug_elf_file); plan->debug_info = malloc(elf_len); @@ -135,7 +137,7 @@ void upb_reg_jit_gdb(upb_decoderplan *plan) { #else -void upb_reg_jit_gdb(upb_decoderplan *plan) { +void upb_reg_jit_gdb(decoderplan *plan) { (void)plan; } @@ -154,10 +156,9 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |// Calling conventions. Note -- this will need to be changed for |// Windows, which uses a different calling convention! |.define ARG1_64, rdi -|.define ARG2_8, sil +|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi. |.define ARG2_32, esi |.define ARG2_64, rsi -|.define ARG3_8, dl |.define ARG3_32, edx |.define ARG3_64, rdx |.define ARG4_64, rcx @@ -170,9 +171,10 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |// conventions, but of course when calling to user callbacks we must. |.define PTR, rbx // Writing this to DECODER->ptr commits our progress. |.define CLOSURE, r12 -|.type SINKFRAME, upb_sink_frame, r13 -|.type FRAME, upb_decoder_frame, r14 -|.type DECODER, upb_decoder, r15 +|.type SINKFRAME, upb_sinkframe, r13 +|.type FRAME, frame, r14 +|.type DECODER, upb_pbdecoder, r15 +|.type SINK, upb_sink | |.macro callp, addr || upb_assert_notnull(addr); @@ -187,6 +189,21 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } || } |.endmacro | +|.macro load_handler_data, h, f, type +||{ +|| uintptr_t data = (uintptr_t)gethandlerdata(h, f, type); +|| if (data > 0xffffffff) { +| mov64 rax, data +| mov SINKFRAME->u.handler_data, rax +|| } else if (data > 0x7fffffff) { +| mov eax, data +| mov SINKFRAME->u.handler_data, rax +|| } else { +| mov qword SINKFRAME->u.handler_data, data +|| } +|| } +|.endmacro +| |// Checkpoints our progress by writing PTR to DECODER, and |// checks for end-of-buffer. |.macro checkpoint, h @@ -205,25 +222,33 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | jz ->exit_jit |.endmacro | -|// Decodes varint from [PTR + offset] -> ARG3. -|// Saves new pointer as rax. +|// Decodes varint into ARG2. +|// Inputs: +|// - ecx: first 4 bytes of varint +|// - offset: offset from PTR where varint begins +|// Outputs: +|// - ARG2: contains decoded varint +|// - rax: new PTR |.macro decode_loaded_varint, offset | // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder. | lea rax, [PTR + offset + 1] -| mov ARG3_32, ecx -| and ARG3_32, 0x7f +| mov ARG2_32, ecx +| and ARG2_32, 0x7f | test cl, cl | jns >9 | lea rax, [PTR + offset + 2] -| movzx esi, ch -| and esi, 0x7f -| shl esi, 7 -| or ARG3_32, esi +| movzx edx, ch +| and edx, 0x7f +| shl edx, 7 +| or ARG2_32, edx | test cx, cx | jns >9 | mov ARG1_64, rax -| mov ARG2_32, ARG3_32 +|// XXX: I don't think this handles 64-bit values correctly. +|// Test with UINT64_MAX | callp upb_vdecode_max8_fast +|// rax return from function will contain new pointer +| mov ARG2_64, rdx | check_ptr_ret // Check for unterminated, >10-byte varint. |9: |.endmacro @@ -234,17 +259,22 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | mov PTR, rax |.endmacro | -|// Decode the tag -> edx. +|// Table-based field dispatch. +|// Inputs: +|// - ecx: first 4 bytes of tag +|// Outputs: +|// - edx: field number +|// - esi: wire type |// Could specialize this by avoiding the value masking: could just key the |// table on the raw (length-masked) varint to save 3-4 cycles of latency. |// Currently only support tables where all entries are in the array part. |.macro dyndispatch_, h |=>upb_getpclabel(plan, h, DYNDISPATCH): | decode_loaded_varint, 0 -| mov ecx, edx +| mov ecx, esi | shr ecx, 3 -| and edx, 0x7 // Note: this value is used in the FIELD pclabel below. -| cmp edx, UPB_WIRE_TYPE_END_GROUP +| and esi, 0x7 // Note: this value is used in the FIELD pclabel below. +| cmp esi, UPB_WIRE_TYPE_END_GROUP | je >1 || upb_jitmsginfo *mi = upb_getmsginfo(plan, h); | cmp ecx, mi->max_field_number // Bounds-check the field. @@ -278,10 +308,31 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | .endmacro |.endif | -|// Push a stack frame (not the CPU stack, the upb_decoder stack). -|.macro pushframe, h, field, end_offset_, endtype +|.macro pushsinkframe, handlers, field, endtype +| mov rax, DECODER->sink +| mov dword SINKFRAME->u.selector, getselector(field, endtype) +| lea rcx, [SINKFRAME + sizeof(upb_sinkframe)] // rcx for short addressing +| cmp rcx, SINK:rax->limit +| jae ->exit_jit // Frame stack overflow. +| mov64 r9, (uintptr_t)handlers +| mov SINKFRAME:rcx->h, r9 +| mov SINKFRAME:rcx->closure, CLOSURE +| mov SINK:rax->top_, rcx +| mov SINKFRAME:rcx->sink_, rax +| mov SINKFRAME, rcx +|.endmacro +| +|.macro popsinkframe +| sub SINKFRAME, sizeof(upb_sinkframe) +| mov rax, DECODER->sink +| mov SINK:rax->top_, SINKFRAME +| mov CLOSURE, SINKFRAME->closure +|.endmacro +| +|// Push a stack frame (not the CPU stack, the upb_pbdecoder stack). +|.macro pushframe, handlers, field, end_offset_, endtype |// Decoder Frame. -| lea rax, [FRAME + sizeof(upb_decoder_frame)] // rax for short addressing +| lea rax, [FRAME + sizeof(frame)] // rax for short addressing | cmp rax, DECODER->limit | jae ->exit_jit // Frame stack overflow. | mov64 r10, (uintptr_t)field @@ -289,36 +340,21 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | mov qword FRAME:rax->end_ofs, end_offset_ | mov byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ) | mov byte FRAME:rax->is_packed, 0 -|| if (upb_fielddef_type(field) == UPB_TYPE_GROUP && -|| endtype == UPB_HANDLER_ENDSUBMSG) { +|| if (upb_fielddef_istagdelim(field) && endtype == UPB_HANDLER_ENDSUBMSG) { | mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field) || } else { | mov dword FRAME:rax->group_fieldnum, 0xffffffff || } | mov DECODER->top, rax | mov FRAME, rax -|// Sink Frame. -| lea rcx, [SINKFRAME + sizeof(upb_sink_frame)] // rcx for short addressing -| cmp rcx, DECODER->sink.limit -| jae ->exit_jit // Frame stack overflow. -| mov dword SINKFRAME:rcx->end, getselector(field, endtype) -|| if (upb_fielddef_issubmsg(field)) { -| mov64 r9, (uintptr_t)upb_handlers_getsubhandlers(h, field) -|| } else { -| mov64 r9, (uintptr_t)h -|| } -| mov SINKFRAME:rcx->h, r9 -| mov DECODER->sink.top, rcx -| mov SINKFRAME, rcx +| pushsinkframe handlers, field, endtype |.endmacro | |.macro popframe -| sub FRAME, sizeof(upb_decoder_frame) +| sub FRAME, sizeof(frame) | mov DECODER->top, FRAME -| sub SINKFRAME, sizeof(upb_sink_frame) -| mov DECODER->sink.top, SINKFRAME +| popsinkframe | setmsgend -| mov CLOSURE, SINKFRAME->closure |.endmacro | |.macro setmsgend @@ -369,14 +405,6 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } #include #include "upb/pb/varint.h" -static upb_selector_t getselector(const upb_fielddef *f, - upb_handlertype_t type) { - upb_selector_t selector; - bool ok = upb_getselector(f, type, &selector); - UPB_ASSERT_VAR(ok, ok); - return selector; -} - static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type) { return upb_handlers_gethandler(h, getselector(f, type)); @@ -387,73 +415,74 @@ static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f, return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type)); } -// Decodes the next val into ARG3, advances PTR. -static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, - uint8_t type, size_t tag_size, +// Decodes the next val into ARG2, advances PTR. +static void upb_decoderplan_jit_decodefield(decoderplan *plan, + size_t tag_size, const upb_handlers *h, const upb_fielddef *f) { // Decode the value into arg 3 for the callback. - switch (type) { - case UPB_TYPE(DOUBLE): + switch (upb_fielddef_descriptortype(f)) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: | movsd XMMARG1, qword [PTR + tag_size] | add PTR, 8 + tag_size break; - case UPB_TYPE(FIXED64): - case UPB_TYPE(SFIXED64): - | mov ARG3_64, qword [PTR + tag_size] + case UPB_DESCRIPTOR_TYPE_FIXED64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + | mov ARG2_64, qword [PTR + tag_size] | add PTR, 8 + tag_size break; - case UPB_TYPE(FLOAT): + case UPB_DESCRIPTOR_TYPE_FLOAT: | movss XMMARG1, dword [PTR + tag_size] | add PTR, 4 + tag_size break; - case UPB_TYPE(FIXED32): - case UPB_TYPE(SFIXED32): - | mov ARG3_32, dword [PTR + tag_size] + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + | mov ARG2_32, dword [PTR + tag_size] | add PTR, 4 + tag_size break; - case UPB_TYPE(BOOL): + case UPB_DESCRIPTOR_TYPE_BOOL: // Can't assume it's one byte long, because bool must be wire-compatible // with all of the varint integer types. | decode_varint tag_size - | test ARG3_64, ARG3_64 - | setne ARG3_8 // Other bytes left with val, should be ok. + | test ARG2_64, ARG2_64 + | setne al + | movzx ARG2_32, al break; - case UPB_TYPE(INT64): - case UPB_TYPE(UINT64): - case UPB_TYPE(INT32): - case UPB_TYPE(UINT32): - case UPB_TYPE(ENUM): + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_ENUM: | decode_varint tag_size break; - case UPB_TYPE(SINT64): + case UPB_DESCRIPTOR_TYPE_SINT64: // 64-bit zig-zag decoding. | decode_varint tag_size - | mov rax, ARG3_64 - | shr ARG3_64, 1 + | mov rax, ARG2_64 + | shr ARG2_64, 1 | and rax, 1 | neg rax - | xor ARG3_64, rax + | xor ARG2_64, rax break; - case UPB_TYPE(SINT32): + case UPB_DESCRIPTOR_TYPE_SINT32: // 32-bit zig-zag decoding. | decode_varint tag_size - | mov eax, ARG3_32 - | shr ARG3_32, 1 + | mov eax, ARG2_32 + | shr ARG2_32, 1 | and eax, 1 | neg eax - | xor ARG3_32, eax + | xor ARG2_32, eax break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): { + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { // We only handle the case where the entire string is in our current // buf, which sidesteps any security problems. The C path has more // robust checks. @@ -461,39 +490,46 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, | decode_loaded_varint tag_size | mov rdi, DECODER->end | sub rdi, rax - | cmp ARG3_64, rdi // if (len > d->end - str) + | cmp ARG2_64, rdi // if (len > d->end - str) | ja ->exit_jit // Can't deliver, whole string not in buf. | mov PTR, rax upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR); if (handler) { - | mov DECODER->tmp_len, ARG3_64 - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSTR) + | mov DECODER->tmp_len, ARG2_32 + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_STARTSTR | callp handler | check_ptr_ret - | mov ARG1_64, rax // sub-closure - | mov ARG4_64, DECODER->tmp_len + | mov CLOSURE, rax + | mov ARG3_32, DECODER->tmp_len } else { - | mov ARG1_64, CLOSURE - | mov ARG4_64, ARG3_64 + | mov ARG3_64, ARG2_64 } handler = gethandler(h, f, UPB_HANDLER_STRING); if (handler) { - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STRING) - | mov ARG3_64, PTR + // TODO: push a real frame so we can resume into the string. + // (but maybe do this only if the string breaks). + | pushsinkframe h, f, UPB_HANDLER_ENDSTR + + // size_t str(const upb_sinkframe *frame, const char *buf, size_t len) + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_STRING + | mov ARG2_64, PTR | callp handler // TODO: properly handle returns other than "n" (the whole string). | add PTR, rax + | popsinkframe } else { - | add PTR, ARG4_64 + | add PTR, ARG3_64 } handler = gethandler(h, f, UPB_HANDLER_ENDSTR); if (handler) { - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSTR) + // bool endstr(const upb_sinkframe *frame); + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_ENDSTR | callp handler | check_bool_ret } @@ -501,10 +537,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, } // Will dispatch callbacks and call submessage in a second. - case UPB_TYPE(MESSAGE): + case UPB_DESCRIPTOR_TYPE_MESSAGE: | decode_varint tag_size break; - case UPB_TYPE(GROUP): + case UPB_DESCRIPTOR_TYPE_GROUP: | add PTR, tag_size break; @@ -512,52 +548,58 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, } } -static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, +static void upb_decoderplan_jit_callcb(decoderplan *plan, const upb_handlers *h, const upb_fielddef *f) { // Call callbacks. Specializing the append accessors didn't yield a speed // increase in benchmarks. if (upb_fielddef_issubmsg(f)) { - if (upb_fielddef_type(f) == UPB_TYPE(MESSAGE)) { - | mov rsi, PTR - | sub rsi, DECODER->buf - | add rsi, ARG3_64 // = (d->ptr - d->buf) + delim_len - } else { - assert(upb_fielddef_type(f) == UPB_TYPE(GROUP)); - | mov rsi, UPB_NONDELIMITED - } - | pushframe h, f, rsi, UPB_HANDLER_ENDSUBMSG - // Call startsubmsg handler (if any). upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG); if (startsubmsg) { - // upb_sflow_t startsubmsg(void *closure, upb_value fval) - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSUBMSG); + // upb_sflow_t startsubmsg(const upb_sinkframe *frame) + | mov DECODER->tmp_len, ARG2_32 + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_STARTSUBMSG | callp startsubmsg | check_ptr_ret | mov CLOSURE, rax } - | mov qword SINKFRAME->closure, CLOSURE - // TODO: have to decide what to do with NULLs subhandlers (or whether to - // disallow them and require a full handlers tree to match the def tree). const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f); - assert(sub_h); - | call =>upb_getpclabel(plan, sub_h, STARTMSG) - | popframe + if (sub_h) { + if (upb_fielddef_istagdelim(f)) { + | mov rdx, UPB_NONDELIMITED + } else { + | mov esi, DECODER->tmp_len + | mov rdx, PTR + | sub rdx, DECODER->buf + | add rdx, DECODER->bufstart_ofs + | add rdx, rsi // = d->bufstart_ofs + (d->ptr - d->buf) + delim_len + } + | pushframe sub_h, f, rdx, UPB_HANDLER_ENDSUBMSG + | call =>upb_getpclabel(plan, sub_h, STARTMSG) + | popframe + } else { + if (upb_fielddef_istagdelim(f)) { + // Groups with no handlers not supported yet. + assert(false); + } else { + | mov esi, DECODER->tmp_len + | add PTR, rsi + } + } // Call endsubmsg handler (if any). upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG); if (endsubmsg) { // upb_flow_t endsubmsg(void *closure, upb_value fval); - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSUBMSG); + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_ENDSUBMSG | callp endsubmsg | check_bool_ret } } else if (!upb_fielddef_isstring(f)) { - | mov ARG1_64, CLOSURE upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f); upb_func *handler = gethandler(h, f, handlertype); const upb_stdmsg_fval *fv = (void*)gethandlerdata(h, f, handlertype); @@ -565,24 +607,25 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, // Can't switch() on function pointers. if (handler == (void*)&upb_stdmsg_setint64 || handler == (void*)&upb_stdmsg_setuint64) { - | mov [ARG1_64 + fv->offset], ARG3_64 + | mov [CLOSURE + fv->offset], ARG2_64 | sethas CLOSURE, fv->hasbit } else if (handler == (void*)&upb_stdmsg_setdouble) { - | movsd qword [ARG1_64 + fv->offset], XMMARG1 + | movsd qword [CLOSURE + fv->offset], XMMARG1 | sethas CLOSURE, fv->hasbit } else if (handler == (void*)&upb_stdmsg_setint32 || handler == (void*)&upb_stdmsg_setuint32) { - | mov [ARG1_64 + fv->offset], ARG3_32 + | mov [CLOSURE + fv->offset], ARG2_32 | sethas CLOSURE, fv->hasbit } else if (handler == (void*)&upb_stdmsg_setfloat) { - | movss dword [ARG1_64 + fv->offset], XMMARG1 + | movss dword [CLOSURE + fv->offset], XMMARG1 | sethas CLOSURE, fv->hasbit } else if (handler == (void*)&upb_stdmsg_setbool) { - | mov [ARG1_64 + fv->offset], ARG3_8 + | mov [CLOSURE + fv->offset], ARG2_8 | sethas CLOSURE, fv->hasbit } else if (handler) { - // Load closure and fval into arg registers. - | mov64 ARG2_64, gethandlerdata(h, f, handlertype); + // bool value(const upb_sinkframe* frame, ctype val) + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, handlertype | callp handler | check_bool_ret } @@ -591,15 +634,27 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, static uint64_t upb_get_encoded_tag(const upb_fielddef *f) { uint32_t tag = (upb_fielddef_number(f) << 3) | - upb_decoder_types[upb_fielddef_type(f)].native_wire_type; + upb_decoder_types[upb_fielddef_descriptortype(f)].native_wire_type; uint64_t encoded_tag = upb_vencode32(tag); // No tag should be greater than 5 bytes. assert(encoded_tag <= 0xffffffffff); return encoded_tag; } +static void upb_decoderplan_jit_endseq(decoderplan *plan, + const upb_handlers *h, + const upb_fielddef *f) { + | popframe + upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ); + if (endseq) { + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_ENDSEQ + | callp endseq + } +} + // PTR should point to the beginning of the tag. -static void upb_decoderplan_jit_field(upb_decoderplan *plan, +static void upb_decoderplan_jit_field(decoderplan *plan, const upb_handlers *h, const upb_fielddef *f, const upb_fielddef *next_f) { @@ -608,45 +663,51 @@ static void upb_decoderplan_jit_field(upb_decoderplan *plan, int tag_size = upb_value_size(tag); // PC-label for the dispatch table. - // We check the wire type (which must be loaded in edx) because the + // We check the wire type (which must be loaded in edi) because the // table is keyed on field number, not type. |=>upb_getpclabel(plan, f, FIELD): - | cmp edx, (tag & 0x7) + | cmp esi, (tag & 0x7) | jne ->exit_jit // In the future: could be an unknown field or packed. |=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK): if (upb_fielddef_isseq(f)) { - | mov rsi, FRAME->end_ofs - | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ); if (startseq) { - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSEQ); - | callp startseq + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_STARTSEQ + | callp startseq | check_ptr_ret - | mov CLOSURE, rax + | mov CLOSURE, rax } - | mov qword SINKFRAME->closure, CLOSURE + | mov rsi, FRAME->end_ofs + | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ } |1: // Label for repeating this field. - upb_decoderplan_jit_decodefield(plan, upb_fielddef_type(f), tag_size, h, f); + upb_decoderplan_jit_decodefield(plan, tag_size, h, f); upb_decoderplan_jit_callcb(plan, h, f); - // Epilogue: load next tag, check for repeated field. - | checkpoint h - | mov rcx, qword [PTR] + // This is kind of gross; future redesign should take into account how to + // make this work nicely. The difficult part is that the sequence can be + // broken either by end-of-message or by seeing a different field; in both + // cases we need to call the endseq handler, but what we do after that + // depends on which case triggered the end-of-sequence. + | mov DECODER->ptr, PTR + | cmp PTR, DECODER->jit_end + | jae ->exit_jit + | cmp PTR, DECODER->effective_end + | jb >2 + if (upb_fielddef_isseq(f)) { + upb_decoderplan_jit_endseq(plan, h, f); + } + | jmp =>upb_getpclabel(plan, h, ENDOFMSG) + |2: + | mov rcx, qword [PTR] if (upb_fielddef_isseq(f)) { | checktag tag | je <1 - upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ); - if (endseq) { - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSEQ); - | callp endseq - } - | popframe - // Load next tag again (popframe clobbered it). + upb_decoderplan_jit_endseq(plan, h, f); + // Load next tag again (popframe/endseq clobbered it). | mov rcx, qword [PTR] } @@ -663,22 +724,22 @@ static int upb_compare_uint32(const void *a, const void *b) { return *(uint32_t*)a - *(uint32_t*)b; } -static void upb_decoderplan_jit_msg(upb_decoderplan *plan, +static void upb_decoderplan_jit_msg(decoderplan *plan, const upb_handlers *h) { |=>upb_getpclabel(plan, h, AFTER_STARTMSG): - // There was a call to get here, so we need to align the stack. - | sub rsp, 8 + | push rbp + | mov rbp, rsp | jmp >1 |=>upb_getpclabel(plan, h, STARTMSG): - // There was a call to get here, so we need to align the stack. - | sub rsp, 8 + | push rbp + | mov rbp, rsp // Call startmsg handler (if any): upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h); if (startmsg) { // upb_flow_t startmsg(void *closure); - | mov ARG1_64, SINKFRAME->closure + | mov ARG1_64, SINKFRAME | callp startmsg | check_bool_ret } @@ -731,17 +792,18 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_endmsg_handler *endmsg = upb_handlers_getendmsg(h); if (endmsg) { // void endmsg(void *closure, upb_status *status) { - | mov ARG1_64, SINKFRAME->closure - | lea ARG2_64, DECODER->sink.status + | mov ARG1_64, SINKFRAME + | mov ARG2_64, DECODER->sink + | mov ARG2_64, SINK:ARG2_64->pipeline_ + | add ARG2_64, offsetof(upb_pipeline, status_) | callp endmsg } - // Counter previous alignment. - | add rsp, 8 + | leave | ret } -static void upb_decoderplan_jit(upb_decoderplan *plan) { +static void upb_decoderplan_jit(decoderplan *plan) { // The JIT prologue/epilogue trampoline that is generated in this function // does not depend on the handlers, so it will never vary. Ideally we would // put it in an object file and just link it into upb so we could have only a @@ -763,18 +825,18 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) { // Align stack. | sub rsp, 8 | mov DECODER, ARG1_64 + | mov DECODER->saved_rbp, rbp | mov FRAME, DECODER:ARG1_64->top - | mov SINKFRAME, DECODER:ARG1_64->sink.top + | mov rax, DECODER:ARG1_64->sink + | mov SINKFRAME, SINK:rax->top_ | mov CLOSURE, SINKFRAME->closure | mov PTR, DECODER->ptr // TODO: push return addresses for re-entry (will be necessary for multiple // buffer support). | call ARG2_64 - |->exit_jit: - // Restore stack pointer to where it was before any "call" instructions - // inside our generated code. + | mov rbp, DECODER->saved_rbp | lea rsp, [rbp - 48] // Counter previous alignment. | add rsp, 8 @@ -794,10 +856,10 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) { } } -static void upb_decoderplan_jit_assignpclabels(upb_decoderplan *plan, +static void upb_decoderplan_jit_assignpclabels(decoderplan *plan, const upb_handlers *h) { // Limit the DFS. - if (upb_inttable_lookupptr(&plan->pclabels, h)) return; + if (upb_inttable_lookupptr(&plan->pclabels, h, NULL)) return; upb_inttable_insertptr(&plan->pclabels, h, upb_value_uint32(plan->pclabel_count)); @@ -832,14 +894,14 @@ static void upb_decoderplan_jit_assignpclabels(upb_decoderplan *plan, info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*)); } -static void upb_decoderplan_makejit(upb_decoderplan *plan) { +static void upb_decoderplan_makejit(decoderplan *plan) { upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR); plan->debug_info = NULL; // Assign pclabels. plan->pclabel_count = 0; upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32); - upb_decoderplan_jit_assignpclabels(plan, plan->handlers); + upb_decoderplan_jit_assignpclabels(plan, plan->dest_handlers); void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); dasm_init(plan, 1); @@ -867,7 +929,7 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) { const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); upb_jitmsginfo *mi = upb_getmsginfo(plan, h); // We jump to after the startmsg handler since it is called before entering - // the JIT (either by upb_decoder or by a previous call to the JIT). + // the JIT (either by upb_pbdecoder or by a previous call to the JIT). mi->jit_func = plan->jit_code + dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG)); for (uint32_t j = 0; j <= mi->max_field_number; j++) { @@ -899,7 +961,7 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) { #endif } -static void upb_decoderplan_freejit(upb_decoderplan *plan) { +static void upb_decoderplan_freejit(decoderplan *plan) { upb_inttable_iter i; upb_inttable_begin(&i, &plan->msginfo); for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { @@ -913,9 +975,10 @@ static void upb_decoderplan_freejit(upb_decoderplan *plan) { // TODO: unregister } -static void upb_decoder_enterjit(upb_decoder *d) { - if (d->plan->jit_code && - d->sink.top == d->sink.stack && +static void upb_decoder_enterjit(upb_pbdecoder *d, decoderplan *plan) { + if (plan->jit_code && + d->top == d->stack && + d->sink->top_ == d->sink->stack && d->ptr && d->ptr < d->jit_end) { #ifndef NDEBUG register uint64_t rbx asm ("rbx") = 11; @@ -926,8 +989,8 @@ static void upb_decoder_enterjit(upb_decoder *d) { #endif // Decodes as many fields as possible, updating d->ptr appropriately, // before falling through to the slow(er) path. - void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code; - upb_jitmsginfo *mi = upb_getmsginfo(d->plan, d->plan->handlers); + void (*upb_jit_decode)(upb_pbdecoder *d, void*) = (void*)plan->jit_code; + upb_jitmsginfo *mi = upb_getmsginfo(plan, plan->dest_handlers); assert(mi); upb_jit_decode(d, mi->jit_func); assert(d->ptr <= d->end); diff --git a/upb/pb/glue.c b/upb/pb/glue.c index 4e69c0c..bcde039 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -16,33 +16,37 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, void *owner, upb_status *status) { - upb_stringsrc strsrc; - upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str, len); + // Create handlers. + const upb_handlers *reader_h = upb_descreader_gethandlers(&reader_h); + const upb_handlers *decoder_h = + upb_pbdecoder_gethandlers(reader_h, false, &decoder_h); - const upb_handlers *h = upb_descreader_newhandlers(&h); - upb_decoderplan *p = upb_decoderplan_new(h, false); - upb_decoder d; - upb_decoder_init(&d); - upb_handlers_unref(h, &h); - upb_descreader r; - upb_descreader_init(&r); - upb_decoder_resetplan(&d, p); - upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r); + // Create pipeline. + upb_pipeline pipeline; + upb_pipeline_init(&pipeline, NULL, 0, upb_realloc, NULL); + upb_pipeline_donateref(&pipeline, reader_h, &reader_h); + upb_pipeline_donateref(&pipeline, decoder_h, &decoder_h); - upb_success_t ret = upb_decoder_decode(&d); - if (status) upb_status_copy(status, upb_decoder_status(&d)); - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); - upb_decoderplan_unref(p); - if (ret != UPB_OK) { - upb_descreader_uninit(&r); + // Create sinks. + upb_sink *reader_sink = upb_pipeline_newsink(&pipeline, reader_h); + upb_sink *decoder_sink = upb_pipeline_newsink(&pipeline, decoder_h); + upb_pbdecoder *d = upb_sinkframe_userdata(upb_sink_base(decoder_sink)); + upb_pbdecoder_resetsink(d, reader_sink); + + // Push input data. + bool ok = upb_bytestream_putstr(decoder_sink, str, len); + + if (status) upb_status_copy(status, upb_pipeline_status(&pipeline)); + if (!ok) { + upb_pipeline_uninit(&pipeline); return NULL; } - upb_def **defs = upb_descreader_getdefs(&r, owner, n); + + upb_descreader *r = upb_sinkframe_userdata(upb_sink_base(reader_sink)); + upb_def **defs = upb_descreader_getdefs(r, owner, n); upb_def **defscopy = malloc(sizeof(upb_def*) * (*n)); memcpy(defscopy, defs, sizeof(upb_def*) * (*n)); - upb_descreader_uninit(&r); + upb_pipeline_uninit(&pipeline); return defscopy; } diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c index 3770afc..91c1e2d 100644 --- a/upb/pb/textprinter.c +++ b/upb/pb/textprinter.c @@ -14,8 +14,9 @@ #include #include +#include "upb/sink.h" + struct _upb_textprinter { - upb_bytesink *sink; int indent_depth; bool single_line; upb_status status; @@ -24,18 +25,17 @@ struct _upb_textprinter { #define CHECK(x) if ((x) < 0) goto err; static int indent(upb_textprinter *p) { + int i; if (!p->single_line) - CHECK(upb_bytesink_putrepeated(p->sink, ' ', p->indent_depth*2)); + for (i = 0; i < p->indent_depth * 2; i++) + putchar(' '); return 0; -err: return -1; } static int endfield(upb_textprinter *p) { - CHECK(upb_bytesink_putc(p->sink, p->single_line ? ' ' : '\n')); + putchar(p->single_line ? ' ' : '\n'); return 0; -err: - return -1; } static int putescaped(upb_textprinter *p, const char *buf, size_t len, @@ -51,7 +51,7 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len, for (; buf < end; buf++) { if (dstend - dst < 4) { - CHECK(upb_bytesink_write(p->sink, dstbuf, dst - dstbuf)); + fwrite(dstbuf, dst - dstbuf, 1, stdout); dst = dstbuf; } @@ -79,24 +79,35 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len, last_hex_escape = is_hex_escape; } // Flush remaining data. - CHECK(upb_bytesink_write(p->sink, dst, dst - dstbuf)); + fwrite(dst, dst - dstbuf, 1, stdout); return 0; -err: - return -1; } #define TYPE(name, ctype, fmt) \ - static bool put ## name(void *_p, void *fval, ctype val) { \ - upb_textprinter *p = _p; \ - const upb_fielddef *f = fval; \ - CHECK(indent(p)); \ - CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f))); \ - CHECK(upb_bytesink_writestr(p->sink, ": ")); \ - CHECK(upb_bytesink_printf(p->sink, fmt, val)); \ - CHECK(endfield(p)); \ - return true; \ - err: \ - return false; \ + static bool put ## name(const upb_sinkframe *frame, ctype val) { \ + upb_textprinter *p = upb_sinkframe_userdata(frame); \ + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); \ + CHECK(indent(p)); \ + puts(upb_fielddef_name(f)); \ + puts(": "); \ + printf(fmt, val); \ + CHECK(endfield(p)); \ + return true; \ + err: \ + return false; \ +} + +static bool putbool(const upb_sinkframe *frame, bool val) { + upb_textprinter *p = upb_sinkframe_userdata(frame); + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); + CHECK(indent(p)); + puts(upb_fielddef_name(f)); + puts(": "); + puts(val ? "true" : "false"); + CHECK(endfield(p)); + return true; +err: + return false; } #define STRINGIFY_HELPER(x) #x @@ -108,72 +119,61 @@ TYPE(uint32, uint32_t, "%" PRIu32); TYPE(uint64, uint64_t, "%" PRIu64) TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") -TYPE(bool, bool, "%hhu"); // Output a symbolic value from the enum if found, else just print as int32. -static bool putenum(void *_p, void *fval, int32_t val) { - - upb_textprinter *p = _p; - const upb_fielddef *f = fval; +static bool putenum(const upb_sinkframe *frame, int32_t val) { + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f)); const char *label = upb_enumdef_iton(enum_def, val); if (label) { - CHECK(upb_bytesink_writestr(p->sink, label)); + puts(label); } else { - CHECK(putint32(_p, fval, val)); + CHECK(putint32(frame, val)); } return true; err: return false; } -static void *startstr(void *_p, void *fval, size_t size_hint) { +static void *startstr(const upb_sinkframe *frame, size_t size_hint) { UPB_UNUSED(size_hint); - UPB_UNUSED(fval); - upb_textprinter *p = _p; - CHECK(upb_bytesink_putc(p->sink, '"')); + upb_textprinter *p = upb_sinkframe_userdata(frame); + putchar('"'); return p; -err: - return UPB_BREAK; } -static bool endstr(void *_p, void *fval) { - UPB_UNUSED(fval); - upb_textprinter *p = _p; - CHECK(upb_bytesink_putc(p->sink, '"')); +static bool endstr(const upb_sinkframe *frame) { + putchar('"'); return true; -err: - return false; } -static size_t putstr(void *_p, void *fval, const char *buf, size_t len) { - upb_textprinter *p = _p; - const upb_fielddef *f = fval; - CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE(STRING))); +static size_t putstr(const upb_sinkframe *frame, const char *buf, size_t len) { + upb_textprinter *p = upb_sinkframe_userdata(frame); + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); + CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING)); return len; err: return 0; } -static void *startsubmsg(void *_p, void *fval) { - upb_textprinter *p = _p; - const upb_fielddef *f = fval; +static void *startsubmsg(const upb_sinkframe *frame) { + upb_textprinter *p = upb_sinkframe_userdata(frame); + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); CHECK(indent(p)); - CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f))); + printf("%s {", upb_fielddef_name(f)); if (!p->single_line) - CHECK(upb_bytesink_putc(p->sink, '\n')); + putchar('\n'); p->indent_depth++; - return _p; + return p; err: return UPB_BREAK; } -static bool endsubmsg(void *_p, void *fval) { - UPB_UNUSED(fval); - upb_textprinter *p = _p; +static bool endsubmsg(const upb_sinkframe *frame) { + upb_textprinter *p = upb_sinkframe_userdata(frame); p->indent_depth--; CHECK(indent(p)); - CHECK(upb_bytesink_putc(p->sink, '}')); + putchar('}'); CHECK(endfield(p)); return true; err: @@ -187,9 +187,7 @@ upb_textprinter *upb_textprinter_new() { void upb_textprinter_free(upb_textprinter *p) { free(p); } -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line) { - p->sink = sink; +void upb_textprinter_reset(upb_textprinter *p, bool single_line) { p->single_line = single_line; p->indent_depth = 0; } @@ -202,21 +200,15 @@ static void onmreg(void *c, upb_handlers *h) { upb_fielddef *f = upb_msg_iter_field(&i); switch (upb_fielddef_type(f)) { case UPB_TYPE_INT32: - case UPB_TYPE_SINT32: - case UPB_TYPE_SFIXED32: upb_handlers_setint32(h, f, putint32, f, NULL); break; - case UPB_TYPE_SINT64: - case UPB_TYPE_SFIXED64: case UPB_TYPE_INT64: upb_handlers_setint64(h, f, putint64, f, NULL); break; case UPB_TYPE_UINT32: - case UPB_TYPE_FIXED32: upb_handlers_setuint32(h, f, putuint32, f, NULL); break; case UPB_TYPE_UINT64: - case UPB_TYPE_FIXED64: upb_handlers_setuint64(h, f, putuint64, f, NULL); break; case UPB_TYPE_FLOAT: @@ -234,7 +226,6 @@ static void onmreg(void *c, upb_handlers *h) { upb_handlers_setstring(h, f, putstr, f, NULL); upb_handlers_setendstr(h, f, endstr, f, NULL); break; - case UPB_TYPE_GROUP: case UPB_TYPE_MESSAGE: upb_handlers_setstartsubmsg(h, f, &startsubmsg, f, NULL); upb_handlers_setendsubmsg(h, f, &endsubmsg, f, NULL); @@ -250,5 +241,5 @@ static void onmreg(void *c, upb_handlers *h) { const upb_handlers *upb_textprinter_newhandlers(const void *owner, const upb_msgdef *m) { - return upb_handlers_newfrozen(m, owner, &onmreg, NULL); + return upb_handlers_newfrozen(m, NULL, owner, &onmreg, NULL); } diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h index 6d111d2..7b653e7 100644 --- a/upb/pb/textprinter.h +++ b/upb/pb/textprinter.h @@ -8,7 +8,6 @@ #ifndef UPB_TEXT_H_ #define UPB_TEXT_H_ -#include "upb/bytestream.h" #include "upb/handlers.h" #ifdef __cplusplus @@ -20,8 +19,7 @@ typedef struct _upb_textprinter upb_textprinter; upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line); +void upb_textprinter_reset(upb_textprinter *p, bool single_line); const upb_handlers *upb_textprinter_newhandlers(const void *owner, const upb_msgdef *m); diff --git a/upb/pb/varint.h b/upb/pb/varint.h index c4d67ba..d33872d 100644 --- a/upb/pb/varint.h +++ b/upb/pb/varint.h @@ -36,10 +36,14 @@ typedef enum { /* Zig-zag encoding/decoding **************************************************/ -INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } -INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } +UPB_INLINE int32_t upb_zzdec_32(uint32_t n) { + return (n >> 1) ^ -(int32_t)(n & 1); +} +UPB_INLINE int64_t upb_zzdec_64(uint64_t n) { + return (n >> 1) ^ -(int64_t)(n & 1); +} +UPB_INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } +UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } /* Decoding *******************************************************************/ @@ -65,7 +69,7 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r); // up to 10 bytes, so it must not be used unless there are at least ten bytes // left in the buffer! #define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ -INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ +UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ uint8_t *p = (uint8_t*)_p; \ if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; } \ upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)}; \ @@ -81,21 +85,21 @@ UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); // Our canonical functions for decoding varints, based on the currently // favored best-performing implementations. -INLINE upb_decoderet upb_vdecode_fast(const char *p) { +UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) { if (sizeof(long) == 8) return upb_vdecode_check2_massimino(p); else return upb_vdecode_check2_branch32(p); } -INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { +UPB_INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { return upb_vdecode_max8_massimino(r); } /* Encoding *******************************************************************/ -INLINE int upb_value_size(uint64_t val) { +UPB_INLINE int upb_value_size(uint64_t val) { #ifdef __GNUC__ int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. #else @@ -110,7 +114,7 @@ INLINE int upb_value_size(uint64_t val) { // bytes long), returning how many bytes were used. // // TODO: benchmark and optimize if necessary. -INLINE size_t upb_vencode64(uint64_t val, char *buf) { +UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) { if (val == 0) { buf[0] = 0; return 1; } size_t i = 0; while (val) { @@ -123,7 +127,7 @@ INLINE size_t upb_vencode64(uint64_t val, char *buf) { } // Encodes a 32-bit varint, *not* sign-extended. -INLINE uint64_t upb_vencode32(uint32_t val) { +UPB_INLINE uint64_t upb_vencode32(uint32_t val) { char buf[UPB_PB_VARINT_MAX_LEN]; size_t bytes = upb_vencode64(val, buf); uint64_t ret = 0; -- cgit v1.2.3