From 28ec9a1fa0f9b1d741920dfa8afc91fa2532c43d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:20:33 -0700 Subject: Split src/ into core/ and stream/. --- stream/upb_byteio.h | 43 ++++ stream/upb_decoder.c | 577 +++++++++++++++++++++++++++++++++++++++++++++++++++ stream/upb_decoder.h | 53 +++++ stream/upb_encoder.c | 420 +++++++++++++++++++++++++++++++++++++ stream/upb_encoder.h | 56 +++++ stream/upb_text.c | 121 +++++++++++ stream/upb_text.h | 36 ++++ 7 files changed, 1306 insertions(+) create mode 100644 stream/upb_byteio.h create mode 100644 stream/upb_decoder.c create mode 100644 stream/upb_decoder.h create mode 100644 stream/upb_encoder.c create mode 100644 stream/upb_encoder.h create mode 100644 stream/upb_text.c create mode 100644 stream/upb_text.h (limited to 'stream') diff --git a/stream/upb_byteio.h b/stream/upb_byteio.h new file mode 100644 index 0000000..69a28b3 --- /dev/null +++ b/stream/upb_byteio.h @@ -0,0 +1,43 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file contains upb_bytesrc and upb_bytesink implementations for common + * interfaces like strings, UNIX fds, and FILE*. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_BYTEIO_H +#define UPB_BYTEIO_H + +#include "upb_srcsink.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_stringsrc **************************************************************/ + +struct upb_stringsrc; +typedef struct upb_stringsrc upb_stringsrc; + +// Create/free a stringsrc. +upb_stringsrc *upb_stringsrc_new(); +void upb_stringsrc_free(upb_stringsrc *s); + +// Resets the stringsrc to a state where it will vend the given string. The +// stringsrc will take a reference on the string, so the caller need not ensure +// that it outlives the stringsrc. A stringsrc can be reset multiple times. +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); + +// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +upb_bytesrc *upb_stringsrc_bytesrc(); + + +/* upb_fdsrc ******************************************************************/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c new file mode 100644 index 0000000..e3fdc49 --- /dev/null +++ b/stream/upb_decoder.c @@ -0,0 +1,577 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_decoder.h" + +#include +#include +#include + +#define UPB_GROUP_END_OFFSET UINT32_MAX + +// Returns true if the give wire type and field type combination is valid, +// taking into account both packed and non-packed encodings. +static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { + return (1 << wt) & upb_types[ft].allowed_wire_types; +} + +// Performs zig-zag decoding, which is used by sint32 and sint64. +static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } + + +/* upb_decoder ****************************************************************/ + +// The decoder keeps a stack with one entry per level of recursion. +// upb_decoder_frame is one frame of that stack. +typedef struct { + upb_msgdef *msgdef; + upb_fielddef *field; + upb_strlen_t end_offset; // For groups, -1. +} upb_decoder_frame; + +struct upb_decoder { + upb_src src; // upb_decoder is a upb_src. + + upb_msgdef *toplevel_msgdef; + upb_bytesrc *bytesrc; + + // The buffer of input data. NULL is equivalent to the empty string. + upb_string *buf; + + // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain. + uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; + + // The number of bytes we have yet to consume from "buf" or tmpbuf. This is + // always >= 0 unless we were just reset or are eof. + int32_t buf_bytesleft; + + // The offset within "buf" from where we are currently reading. This can be + // <0 if we are reading some residual bytes from the previous buffer, which + // are stored in tmpbuf and combined with bytes from "buf". + int32_t buf_offset; + + // The overall stream offset of the beginning of "buf". + uint32_t buf_stream_offset; + + // Fielddef for the key we just read. + upb_fielddef *field; + + // Wire type of the key we just read. + upb_wire_type_t wire_type; + + // Delimited length of the string field we are reading. + upb_strlen_t delimited_len; + + upb_strlen_t packed_end_offset; + + // We keep a stack of messages we have recursed into. + upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; +}; + + +/* upb_decoder buffering. *****************************************************/ + +static upb_strlen_t upb_decoder_offset(upb_decoder *d) +{ + return d->buf_stream_offset - d->buf_offset; +} + +static bool upb_decoder_nextbuf(upb_decoder *d) +{ + assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE); + + // Copy residual bytes to temporary buffer. + if(d->buf_bytesleft > 0) { + memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset, + d->buf_bytesleft); + } + + // Recycle old buffer. + if(d->buf) { + d->buf = upb_string_tryrecycle(d->buf); + d->buf_offset -= upb_string_len(d->buf); + d->buf_stream_offset += upb_string_len(d->buf); + } + + // Pull next buffer. + if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { + d->buf_bytesleft += upb_string_len(d->buf); + return true; + } else { + // Error or EOF. + if(!upb_bytesrc_eof(d->bytesrc)) { + // Error from bytesrc. + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return false; + } else if(d->buf_bytesleft == 0) { + // EOF from bytesrc and we don't have any residual bytes left. + d->src.eof = true; + return false; + } else { + // No more data left from the bytesrc, but we still have residual bytes. + return true; + } + } +} + +static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) +{ + if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { + // GCC is currently complaining about use of an uninitialized value if we + // don't set this now. I think this is incorrect, but leaving this in + // to suppress the warning for now. + *bytes = 0; + if(!upb_decoder_nextbuf(d)) return NULL; + } + + assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE); + + if(d->buf_offset >= 0) { + // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE + // contiguous bytes, so we can read directly out of it. + *bytes = d->buf_bytesleft; + return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; + } else { + // We need to accumulate UPB_MAX_ENCODED_SIZE bytes; len is how many we + // have so far. + upb_strlen_t len = -d->buf_offset; + if(d->buf) { + upb_strlen_t to_copy = + UPB_MIN(UPB_MAX_ENCODED_SIZE - len, upb_string_len(d->buf)); + memcpy(d->tmpbuf + len, upb_string_getrobuf(d->buf), to_copy); + len += to_copy; + } + // Pad the buffer out to UPB_MAX_ENCODED_SIZE. + memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len); + *bytes = len; + return d->tmpbuf; + } +} + +// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE +// bytes long. This buffer contains the next bytes in the stream (even if +// those bytes span multiple buffers). *bytes is set to the number of actual +// stream bytes that are available in the returned buffer. If +// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes. +// +// After the data has been read, upb_decoder_consume() should be called to +// indicate how many bytes were consumed. +static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) +{ + if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) { + // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE + // contiguous bytes, so we can read directly out of it. + *bytes = d->buf_bytesleft; + return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; + } else { + return upb_decoder_getbuf_full(d, bytes); + } +} + +static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) +{ + assert(bytes <= UPB_MAX_ENCODED_SIZE); + d->buf_offset += bytes; + d->buf_bytesleft -= bytes; + if(d->buf_offset < 0) { + // We still have residual bytes we have not consumed. + memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset); + } + assert(d->buf_bytesleft >= 0); + return true; +} + +static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) +{ + d->buf_offset += bytes; + d->buf_bytesleft -= bytes; + while(d->buf_bytesleft < 0) { + if(!upb_decoder_nextbuf(d)) return false; + } + return true; +} + + +/* Functions to read wire values. *********************************************/ + +// Parses remining bytes of a 64-bit varint that has already had its first byte +// parsed. +INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high) +{ + upb_strlen_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + const uint8_t *start = buf; + if(!buf) return false; + + *high = 0; + uint32_t b; + b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + if(bytes_available >= 10) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " + "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); + } else { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle " + "of a varint, stream offset: %u", upb_decoder_offset(d)); + } + return false; + +done: + return upb_decoder_consume(d, buf - start); +} + +// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit +// varint is not a true wire type. +static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val) +{ + uint32_t high; + if(!upb_decoder_readv64(d, val, &high)) return false; + + // We expect the high bits to be zero, except that signed 32-bit values are + // first sign-extended to be wire-compatible with 64 bits, in which case we + // expect the high bits to be all one. + // + // We could perform a slightly more sophisticated check by having the caller + // indicate whether a signed or unsigned value is being read. We could check + // that the high bits are all zeros for unsigned, and properly sign-extended + // for signed. + if(high != 0 && ~high != 0) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but " + "the high bits contained data we should not truncate: " + "%ux, stream offset: %u", high, upb_decoder_offset(d)); + return false; + } + return true; +} + +// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). Caller +// promises that 4 bytes are available at buf. +static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val) +{ + upb_strlen_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(!buf) return false; + if(bytes_available < 4) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Stream ended in the middle of a 32-bit value"); + return false; + } + memcpy(val, buf, 4); + // TODO: byte swap if big-endian. + return upb_decoder_consume(d, 4); +} + +// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller +// promises that 8 bytes are available at buf. +static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val) +{ + upb_strlen_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(!buf) return false; + if(bytes_available < 8) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Stream ended in the middle of a 64-bit value"); + return false; + } + memcpy(val, buf, 8); + // TODO: byte swap if big-endian. + return upb_decoder_consume(d, 8); +} + +// Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing +// it to be easily skipped. Caller promises that 10 bytes are available at +// "buf". The function will return a maximum of 11 bytes before quitting. +static uint8_t upb_decoder_skipv64(upb_decoder *d) +{ + uint32_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(!buf) return false; + uint8_t i; + for(i = 0; i < 10 && buf[i] & 0x80; i++) + ; // empty loop body. + if(i > 10) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint."); + return false; + } + return upb_decoder_consume(d, i); +} + + +/* upb_src implementation for upb_decoder. ************************************/ + +bool upb_decoder_skipval(upb_decoder *d); + +upb_fielddef *upb_decoder_getdef(upb_decoder *d) +{ + // Detect end-of-submessage. + if(upb_decoder_offset(d) >= d->top->end_offset) { + d->src.eof = true; + return NULL; + } + + // Handles the packed field case. + if(d->field) return d->field; + + uint32_t key = 0; +again: + if(!upb_decoder_readv32(d, &key)) return NULL; + upb_wire_type_t wire_type = key & 0x7; + int32_t field_number = key >> 3; + + if(wire_type == UPB_WIRE_TYPE_DELIMITED) { + // For delimited wire values we parse the length now, since we need it in + // all cases. + if(!upb_decoder_readv32(d, &d->delimited_len)) return NULL; + } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) { + if(d->top->end_offset == UPB_GROUP_END_OFFSET) { + d->src.eof = true; + } else { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "End group seen but current " + "message is not a group, byte offset: %zd", + upb_decoder_offset(d)); + } + return NULL; + } + + // Look up field by tag number. + upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number); + + if (!f) { + // Unknown field. If/when the upb_src interface supports reporting + // unknown fields we will implement that here. + upb_decoder_skipval(d); + goto again; + } else if (!upb_check_type(wire_type, f->type)) { + // This is a recoverable error condition. We skip the value but also + // return NULL and report the error. + upb_decoder_skipval(d); + // TODO: better error message. + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Incorrect wire type.\n"); + return NULL; + } + d->field = f; + d->wire_type = wire_type; + return f; +} + +bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) +{ + switch(upb_types[d->field->type].native_wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint32_t low, high; + if(!upb_decoder_readv64(d, &low, &high)) return false; + uint64_t u64 = ((uint64_t)high << 32) | low; + if(d->field->type == UPB_TYPE(SINT64)) + *val.int64 = upb_zzdec_64(u64); + else + *val.uint64 = u64; + break; + } + case UPB_WIRE_TYPE_32BIT_VARINT: { + uint32_t u32; + if(!upb_decoder_readv32(d, &u32)) return false; + if(d->field->type == UPB_TYPE(SINT32)) + *val.int32 = upb_zzdec_32(u32); + else + *val.uint32 = u32; + break; + } + case UPB_WIRE_TYPE_64BIT: + if(!upb_decoder_readf64(d, val.uint64)) return false; + break; + case UPB_WIRE_TYPE_32BIT: + if(!upb_decoder_readf32(d, val.uint32)) return false; + break; + default: + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Attempted to call getval on a group."); + return false; + } + // For a packed field where we have not reached the end, we leave the field + // in the decoder so we will return it again without parsing a key. + if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || + upb_decoder_offset(d) >= d->packed_end_offset) { + d->field = NULL; + } + return true; +} + +bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { + // A string, bytes, or a length-delimited submessage. The latter isn't + // technically a string, but can be gotten as one to perform lazy parsing. + const int32_t total_len = d->delimited_len; + if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { + // The entire string is inside our current buffer, so we can just + // return a substring of the buffer without copying. + upb_string_substr(str, d->buf, + upb_string_len(d->buf) - d->buf_bytesleft, + total_len); + upb_decoder_skipbytes(d, total_len); + } else { + // The string spans buffers, so we must copy from the residual buffer + // (if any bytes are there), then the buffer, and finally from the bytesrc. + uint8_t *ptr = (uint8_t*)upb_string_getrwbuf( + str, UPB_MIN(total_len, d->buf_bytesleft)); + int32_t len = 0; + if(d->buf_offset < 0) { + // Residual bytes we need to copy from tmpbuf. + memcpy(ptr, d->tmpbuf, -d->buf_offset); + len += -d->buf_offset; + } + if(d->buf) { + // Bytes from the buffer. + memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset, + upb_string_len(str) - len); + } + upb_decoder_skipbytes(d, upb_string_len(str)); + if(len < total_len) { + // Bytes from the bytesrc. + if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) { + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return false; + } + // Have to advance this since the buffering layer of the decoder will + // never see these bytes. + d->buf_stream_offset += total_len - len; + } + } + d->field = NULL; + return true; +} + +static bool upb_decoder_skipgroup(upb_decoder *d); + +bool upb_decoder_startmsg(upb_decoder *d) { + d->top->field = d->field; + if(++d->top >= d->limit) { + upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED, + "Nesting exceeded maximum (%d levels)\n", + UPB_MAX_NESTING); + return false; + } + upb_decoder_frame *frame = d->top; + frame->msgdef = upb_downcast_msgdef(d->field->def); + if(d->field->type == UPB_TYPE(GROUP)) { + frame->end_offset = UPB_GROUP_END_OFFSET; + } else { + frame->end_offset = upb_decoder_offset(d) + d->delimited_len; + } + return true; +} + +bool upb_decoder_endmsg(upb_decoder *d) { + if(d->top > d->stack) { + --d->top; + if(!d->src.eof) { + if(d->top->field->type == UPB_TYPE(GROUP)) + upb_decoder_skipgroup(d); + else + upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); + } + d->src.eof = false; + return true; + } else { + return false; + } +} + +bool upb_decoder_skipval(upb_decoder *d) { + upb_strlen_t bytes_to_skip; + switch(d->wire_type) { + case UPB_WIRE_TYPE_VARINT: { + return upb_decoder_skipv64(d); + } + case UPB_WIRE_TYPE_START_GROUP: + if(!upb_decoder_startmsg(d)) return false; + if(!upb_decoder_skipgroup(d)) return false; + if(!upb_decoder_endmsg(d)) return false; + return true; + default: + // Including UPB_WIRE_TYPE_END_GROUP. + assert(false); + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group"); + return false; + case UPB_WIRE_TYPE_64BIT: + bytes_to_skip = 8; + break; + case UPB_WIRE_TYPE_32BIT: + bytes_to_skip = 4; + break; + case UPB_WIRE_TYPE_DELIMITED: + // Works for both string/bytes *and* submessages. + bytes_to_skip = d->delimited_len; + break; + } + return upb_decoder_skipbytes(d, bytes_to_skip); +} + +static bool upb_decoder_skipgroup(upb_decoder *d) +{ + // This will be mututally recursive with upb_decoder_skipval() if the group + // has sub-groups. If we wanted to handle EAGAIN in the future, this + // approach would not work; we would need to track the group depth + // explicitly. + while(upb_decoder_getdef(d)) { + if(!upb_decoder_skipval(d)) return false; + } + // If we are at the end of the group like we want to be, then + // upb_decoder_getdef() returned NULL because of eof, not error. + if(!&d->src.eof) return false; + return true; +} + +upb_src_vtable upb_decoder_src_vtbl = { + (upb_src_getdef_fptr)&upb_decoder_getdef, + (upb_src_getval_fptr)&upb_decoder_getval, + (upb_src_skipval_fptr)&upb_decoder_skipval, + (upb_src_startmsg_fptr)&upb_decoder_startmsg, + (upb_src_endmsg_fptr)&upb_decoder_endmsg, +}; + + +/* upb_decoder construction/destruction. **************************************/ + +upb_decoder *upb_decoder_new(upb_msgdef *msgdef) +{ + upb_decoder *d = malloc(sizeof(*d)); + d->toplevel_msgdef = msgdef; + d->limit = &d->stack[UPB_MAX_NESTING]; + d->buf = NULL; + upb_src_init(&d->src, &upb_decoder_src_vtbl); + return d; +} + +void upb_decoder_free(upb_decoder *d) +{ + upb_string_unref(d->buf); + free(d); +} + +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) +{ + upb_string_unref(d->buf); + d->top = d->stack; + d->top->msgdef = d->toplevel_msgdef; + // The top-level message is not delimited (we can keep receiving data for it + // indefinitely), so we set the end offset as high as possible, but not equal + // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. + d->top->end_offset = UINT32_MAX - 1; + d->bytesrc = bytesrc; + d->buf = NULL; + d->buf_bytesleft = 0; + d->buf_stream_offset = 0; + d->buf_offset = 0; +} diff --git a/stream/upb_decoder.h b/stream/upb_decoder.h new file mode 100644 index 0000000..dde61fc --- /dev/null +++ b/stream/upb_decoder.h @@ -0,0 +1,53 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * upb_decoder implements a high performance, streaming decoder for protobuf + * data that works by implementing upb_src and getting its data from a + * upb_bytesrc. + * + * The decoder does not currently support non-blocking I/O, in the sense that + * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the + * decoder when data becomes available again. Support for this could be added, + * but it would add complexity and perhaps cost efficiency also. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_DECODER_H_ +#define UPB_DECODER_H_ + +#include +#include +#include "upb_def.h" +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_decoder *****************************************************************/ + +// A upb_decoder decodes the binary protocol buffer format, writing the data it +// decodes to a upb_sink. +struct upb_decoder; +typedef struct upb_decoder upb_decoder; + +// Allocates and frees a upb_decoder, respectively. +upb_decoder *upb_decoder_new(upb_msgdef *md); +void upb_decoder_free(upb_decoder *d); + +// Resets the internal state of an already-allocated decoder. This puts it in a +// state where it has not seen any data, and expects the next data to be from +// the beginning of a new protobuf. Parsers must be reset before they can be +// used. A decoder can be reset multiple times. +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc); + +// Returns a upb_src pointer by which the decoder can be used. The returned +// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free(). +upb_src *upb_decoder_getsrc(upb_decoder *d); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_DECODER_H_ */ diff --git a/stream/upb_encoder.c b/stream/upb_encoder.c new file mode 100644 index 0000000..304a423 --- /dev/null +++ b/stream/upb_encoder.c @@ -0,0 +1,420 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_encoder.h" + +#include +#include "descriptor.h" + +/* Functions for calculating sizes of wire values. ****************************/ + +static size_t upb_v_uint64_t_size(uint64_t val) { +#ifdef __GNUC__ + int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. +#else + int high_bit = 0; + uint64_t tmp = val; + while(tmp >>= 1) high_bit++; +#endif + return val == 0 ? 1 : high_bit / 7 + 1; +} + +static size_t upb_v_int32_t_size(int32_t val) { + // v_uint32's are sign-extended to maintain wire compatibility with int64s. + return upb_v_uint64_t_size((int64_t)val); +} +static size_t upb_v_uint32_t_size(uint32_t val) { + return upb_v_uint64_t_size(val); +} +static size_t upb_f_uint64_t_size(uint64_t val) { + (void)val; // Length is independent of value. + return sizeof(uint64_t); +} +static size_t upb_f_uint32_t_size(uint32_t val) { + (void)val; // Length is independent of value. + return sizeof(uint32_t); +} + + +/* Functions to write wire values. ********************************************/ + +// Since we know in advance the longest that the value could be, we always make +// sure that our buffer is long enough. This saves us from having to perform +// bounds checks. + +// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT). +static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val) +{ + do { + uint8_t byte = val & 0x7f; + val >>= 7; + if(val) byte |= 0x80; + *buf++ = byte; + } while(val); + return buf; +} + +// Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits. +static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val) +{ + return upb_put_v_uint64_t(buf, val); +} + +// Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to +// maintain wire-compatibility with 64-bit signed integers. +static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val) +{ + return upb_put_v_uint64_t(buf, (int64_t)val); +} + +static void upb_put32(uint8_t *buf, uint32_t val) { + buf[0] = val & 0xff; + buf[1] = (val >> 8) & 0xff; + buf[2] = (val >> 16) & 0xff; + buf[3] = (val >> 24); +} + +// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). +static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val) +{ + uint8_t *uint32_end = buf + sizeof(uint32_t); +#if UPB_UNALIGNED_READS_OK + *(uint32_t*)buf = val; +#else + upb_put32(buf, val); +#endif + return uint32_end; +} + +// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). +static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val) +{ + uint8_t *uint64_end = buf + sizeof(uint64_t); +#if UPB_UNALIGNED_READS_OK + *(uint64_t*)buf = val; +#else + upb_put32(buf, (uint32_t)val); + upb_put32(buf, (uint32_t)(val >> 32)); +#endif + return uint64_end; +} + +/* Functions to write and calculate sizes for .proto values. ******************/ + +// Performs zig-zag encoding, which is used by sint32 and sint64. +static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } +static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } + +/* Use macros to define a set of two functions for each .proto type: + * + * // Converts and writes a .proto value into buf. "end" indicates the end + * // of the current available buffer (if the buffer does not contain enough + * // space UPB_STATUS_NEED_MORE_DATA is returned). On success, *outbuf will + * // point one past the data that was written. + * uint8_t *upb_put_INT32(uint8_t *buf, int32_t val); + * + * // Returns the number of bytes required to encode val. + * size_t upb_get_INT32_size(int32_t val); + * + * // Given a .proto value s (source) convert it to a wire value. + * uint32_t upb_vtowv_INT32(int32_t s); + */ + +#define VTOWV(type, wire_t, val_t) \ + static wire_t upb_vtowv_ ## type(val_t s) + +#define PUT(type, v_or_f, wire_t, val_t, member_name) \ + static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \ + wire_t tmp = upb_vtowv_ ## type(val); \ + return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \ + } + +#define T(type, v_or_f, wire_t, val_t, member_name) \ + static size_t upb_get_ ## type ## _size(val_t val) { \ + return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \ + } \ + VTOWV(type, wire_t, val_t); /* prototype for PUT below */ \ + PUT(type, v_or_f, wire_t, val_t, member_name) \ + VTOWV(type, wire_t, val_t) + +T(INT32, v, int32_t, int32_t, int32) { return (uint32_t)s; } +T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; } +T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } +T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } +T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); } +T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzenc_64(s); } +T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } +T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } +T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; } +T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; } +T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; } +T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; } +T(DOUBLE, f, uint64_t, double, _double) { + upb_value v; + v._double = s; + return v.uint64; +} +T(FLOAT, f, uint32_t, float, _float) { + upb_value v; + v._float = s; + return v.uint32; +} +#undef VTOWV +#undef PUT +#undef T + +static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) +{ +#define CASE(t, member_name) \ + case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name); + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + default: assert(false); return buf; + } +#undef CASE +} + +static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v) +{ +#define CASE(t, member_name) \ + case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name); + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + default: assert(false); return 0; + } +#undef CASE +} + +static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num, + upb_wire_type_t wt) +{ + return upb_put_UINT32(buf, wt | (num << 3)); +} + +static uint32_t _upb_get_tag_size(upb_field_number_t num) +{ + return upb_get_UINT32_size(num << 3); +} + + +/* upb_sizebuilder ************************************************************/ + +struct upb_sizebuilder { + // Accumulating size for the current level. + uint32_t size; + + // Stack of sizes for our current nesting. + uint32_t stack[UPB_MAX_NESTING], *top; + + // Vector of sizes. + uint32_t *sizes; + int sizes_len; + int sizes_size; + + upb_status status; +}; + +// upb_sink callbacks. +static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f, + upb_value val, + upb_status *status) +{ + (void)status; + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + uint32_t size = 0; + size += _upb_get_tag_size(f->number); + size += _upb_get_value_size(f->type, val); + sb->size += size; + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end, + upb_status *status) +{ + (void)status; + (void)str; // String data itself is not used. + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(start >= 0) { + uint32_t size = 0; + size += _upb_get_tag_size(f->number); + size += upb_get_UINT32_size(end - start); + sb->size += size; + } + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + (void)status; + (void)f; // Unused (we calculate tag size and delimiter in endcb). + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(f->type == UPB_TYPE(MESSAGE)) { + *sb->top = sb->size; + sb->top++; + sb->size = 0; + } else { + assert(f->type == UPB_TYPE(GROUP)); + sb->size += _upb_get_tag_size(f->number); + } + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + (void)status; + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(f->type == UPB_TYPE(MESSAGE)) { + sb->top--; + if(sb->sizes_len == sb->sizes_size) { + sb->sizes_size *= 2; + sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes)); + } + uint32_t child_size = sb->size; + uint32_t parent_size = *sb->top; + sb->sizes[sb->sizes_len++] = child_size; + // The size according to the parent includes the tag size and delimiter of + // the submessage. + parent_size += upb_get_UINT32_size(child_size); + parent_size += _upb_get_tag_size(f->number); + // Include size accumulated in parent before child began. + sb->size = child_size + parent_size; + } else { + assert(f->type == UPB_TYPE(GROUP)); + // As an optimization, we could just add this number twice in startcb, to + // avoid having to recalculate it. + sb->size += _upb_get_tag_size(f->number); + } + return UPB_SINK_CONTINUE; +} + +upb_sink_callbacks _upb_sizebuilder_sink_vtbl = { + _upb_sizebuilder_valuecb, + _upb_sizebuilder_strcb, + _upb_sizebuilder_startcb, + _upb_sizebuilder_endcb +}; + + +/* upb_sink callbacks *********************************************************/ + +struct upb_encoder { + upb_sink base; + //upb_bytesink *bytesink; + uint32_t *sizes; + int size_offset; +}; + + +// Within one callback we may need to encode up to two separate values. +#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2) + +static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf, + size_t len, upb_status *status) +{ + // TODO: conjure a upb_strptr that points to buf. + //upb_strptr ptr; + (void)s; + (void)buf; + (void)status; + size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr); + if(written < len) { + // TODO: mark to skip "written" bytes next time. + return UPB_SINK_STOP; + } else { + return UPB_SINK_CONTINUE; + } +} + +static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f, + upb_value val, upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + upb_wire_type_t wt = upb_types[f->type].expected_wire_type; + // TODO: handle packed encoding. + ptr = _upb_put_tag(ptr, f->number, wt); + ptr = upb_encode_value(ptr, f->type, val); + return _upb_encoder_push_buf(s, buf, ptr - buf, status); +} + +static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end, + upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + if(start >= 0) { + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); + ptr = upb_put_UINT32(ptr, end - start); + } + // TODO: properly handle partially consumed strings and partially supplied + // strings. + _upb_encoder_push_buf(s, buf, ptr - buf, status); + return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status); +} + +static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + if(f->type == UPB_TYPE(GROUP)) { + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP); + } else { + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); + ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]); + } + return _upb_encoder_push_buf(s, buf, ptr - buf, status); +} + +static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE; + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP); + return _upb_encoder_push_buf(s, buf, ptr - buf, status); +} + +upb_sink_callbacks _upb_encoder_sink_vtbl = { + _upb_encoder_valuecb, + _upb_encoder_strcb, + _upb_encoder_startcb, + _upb_encoder_endcb +}; + diff --git a/stream/upb_encoder.h b/stream/upb_encoder.h new file mode 100644 index 0000000..e879b0b --- /dev/null +++ b/stream/upb_encoder.h @@ -0,0 +1,56 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Implements a upb_sink that writes protobuf data to the binary wire format. + * + * For messages that have any submessages, the encoder needs a buffer + * containing the submessage sizes, so they can be properly written at the + * front of each message. Note that groups do *not* have this requirement. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_ENCODER_H_ +#define UPB_ENCODER_H_ + +#include "upb.h" +#include "upb_srcsink.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_encoder ****************************************************************/ + +// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol +// buffer binary wire format. +struct upb_encoder; +typedef struct upb_encoder upb_encoder; + +upb_encoder *upb_encoder_new(upb_msgdef *md); +void upb_encoder_free(upb_encoder *e); + +// Resets the given upb_encoder such that is is ready to begin encoding, +// outputting data to "bytesink" (which must live until the encoder is +// reset or destroyed). +void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink); + +// Returns the upb_sink to which data can be written. The sink is invalidated +// when the encoder is reset or destroyed. Note that if the client wants to +// encode any length-delimited submessages it must first call +// upb_encoder_buildsizes() below. +upb_sink *upb_encoder_sink(upb_encoder *e); + +// Call prior to pushing any data with embedded submessages. "src" must yield +// exactly the same data as what will next be encoded, but in reverse order. +// The encoder iterates over this data in order to determine the sizes of the +// submessages. If any errors are returned by the upb_src, the status will +// be saved in *status. If the client is sure that the upb_src will not throw +// any errors, "status" may be NULL. +void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_ENCODER_H_ */ diff --git a/stream/upb_text.c b/stream/upb_text.c new file mode 100644 index 0000000..8662269 --- /dev/null +++ b/stream/upb_text.c @@ -0,0 +1,121 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include +#include "descriptor.h" +#include "upb_text.h" +#include "upb_data.h" + +void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file) +{ +#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break; + switch(type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): + CASE("%" PRIu32, uint32); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + /* TODO: escaping. */ + fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break; + } +} + +static void print_indent(upb_text_printer *p, FILE *stream) +{ + if(!p->single_line) + for(int i = 0; i < p->indent_depth; i++) + fprintf(stream, " "); +} + +void upb_text_printfield(upb_text_printer *p, upb_strptr name, + upb_field_type_t valtype, upb_value val, + FILE *stream) +{ + print_indent(p, stream); + fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name)); + upb_text_printval(valtype, val, stream); + if(p->single_line) + fputc(' ', stream); + else + fputc('\n', stream); +} + +void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream) +{ + print_indent(p, stream); + fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type)); + if(!p->single_line) fputc('\n', stream); + p->indent_depth++; +} + +void upb_text_pop(upb_text_printer *p, FILE *stream) +{ + p->indent_depth--; + print_indent(p, stream); + fprintf(stream, "}\n"); +} + +static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, + FILE *stream); + +static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md, + FILE *stream) +{ + for(upb_field_count_t i = 0; i < md->num_fields; i++) { + upb_fielddef *f = &md->fields[i]; + if(!upb_msg_has(msg, f)) continue; + upb_value v = upb_msg_get(msg, f); + if(upb_isarray(f)) { + upb_arrayptr arr = v.arr; + for(uint32_t j = 0; j < upb_array_len(arr); j++) { + upb_value elem = upb_array_get(arr, f, j); + printval(printer, elem, f, stream); + } + } else { + printval(printer, v, f, stream); + } + } +} + +static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, + FILE *stream) +{ + if(upb_issubmsg(f)) { + upb_text_push(printer, f->name, stream); + printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream); + upb_text_pop(printer, stream); + } else { + upb_text_printfield(printer, f->name, f->type, v, stream); + } +} + + +void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line, + FILE *stream) +{ + upb_text_printer printer; + upb_text_printer_init(&printer, single_line); + printmsg(&printer, msg, md, stream); +} + diff --git a/stream/upb_text.h b/stream/upb_text.h new file mode 100644 index 0000000..d89c9d6 --- /dev/null +++ b/stream/upb_text.h @@ -0,0 +1,36 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_TEXT_H_ +#define UPB_TEXT_H_ + +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int indent_depth; + bool single_line; +} upb_text_printer; + +INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) { + p->indent_depth = 0; + p->single_line = single_line; +} +void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file); +void upb_text_printfield(upb_text_printer *p, upb_strptr name, + upb_field_type_t valtype, upb_value val, FILE *stream); +void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, + FILE *stream); +void upb_text_pop(upb_text_printer *p, FILE *stream); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_TEXT_H_ */ -- cgit v1.2.3 From 67b16cbe5c55d00d7e576cdf479392f3a0e927a5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 14:37:02 -0700 Subject: Basic test_def links and passes no-op test! --- Makefile | 8 +++++--- core/upb_def.c | 42 ++++++++++++++++++++--------------------- core/upb_def.h | 6 +----- core/upb_stream_vtbl.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++- stream/upb_decoder.c | 1 + 5 files changed, 78 insertions(+), 30 deletions(-) (limited to 'stream') diff --git a/Makefile b/Makefile index 2abe0c7..568dcad 100644 --- a/Makefile +++ b/Makefile @@ -102,14 +102,16 @@ VALGRIND=valgrind --leak-check=full --error-exitcode=1 #VALGRIND= test: tests @echo Running all tests under valgrind. + @set -e # Abort on error. # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table @for test in tests/*; do \ if [ -x ./$$test ] ; then \ - echo $(VALGRIND) ./$$test: \\c; \ - $(VALGRIND) ./$$test; \ + echo !!! $(VALGRIND) ./$$test; \ + $(VALGRIND) ./$$test || exit 1; \ fi \ - done; + done; \ + echo "All tests passed!" tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2: \ diff --git a/core/upb_def.c b/core/upb_def.c index 1f57c70..cc4fd80 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -764,7 +764,6 @@ static void upb_free_symtab(upb_strtable *t) void _upb_symtab_free(upb_symtab *s) { upb_free_symtab(&s->symtab); - upb_free_symtab(&s->psymtab); upb_rwlock_destroy(&s->lock); free(s); } @@ -932,30 +931,30 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) { - if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { - d->str = upb_string_tryrecycle(d->str); - upb_string_substr(d->str, d->input, d->offset, d->delimited_len); - } else { - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: - *val.uint64 = upb_baredecoder_readv64(d); - break; - case UPB_WIRE_TYPE_32BIT_VARINT: - *val.uint32 = upb_baredecoder_readv32(d); - break; - case UPB_WIRE_TYPE_64BIT: - *val.uint64 = upb_baredecoder_readf64(d); - break; - case UPB_WIRE_TYPE_32BIT: - *val.uint32 = upb_baredecoder_readf32(d); - break; - default: - assert(false); - } + switch(d->wire_type) { + case UPB_WIRE_TYPE_VARINT: + *val.uint64 = upb_baredecoder_readv64(d); + break; + case UPB_WIRE_TYPE_32BIT_VARINT: + *val.uint32 = upb_baredecoder_readv32(d); + break; + case UPB_WIRE_TYPE_64BIT: + *val.uint64 = upb_baredecoder_readf64(d); + break; + case UPB_WIRE_TYPE_32BIT: + *val.uint32 = upb_baredecoder_readf32(d); + break; + default: + assert(false); } return true; } +static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { + upb_string_substr(str, d->input, d->offset, d->delimited_len); + return true; +} + static bool upb_baredecoder_skipval(upb_baredecoder *d) { upb_value val; @@ -977,6 +976,7 @@ static bool upb_baredecoder_endmsg(upb_baredecoder *d) static upb_src_vtable upb_baredecoder_src_vtbl = { (upb_src_getdef_fptr)&upb_baredecoder_getdef, (upb_src_getval_fptr)&upb_baredecoder_getval, + (upb_src_getstr_fptr)&upb_baredecoder_getstr, (upb_src_skipval_fptr)&upb_baredecoder_skipval, (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, diff --git a/core/upb_def.h b/core/upb_def.h index c297e83..5c8c11e 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -207,11 +207,7 @@ bool upb_enum_done(upb_enum_iter *iter); typedef struct { upb_atomic_refcount_t refcount; upb_rwlock_t lock; // Protects all members except the refcount. - upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. - - // Our symbol tables; we own refs to the defs therein. - upb_strtable symtab; // The main symbol table. - upb_strtable psymtab; // Private symbols, for internal use. + upb_strtable symtab; // The symbol table. } upb_symtab; // Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 0ec45d2..52172d2 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -27,28 +27,35 @@ struct upb_bytesink; typedef struct upb_bytesink upb_bytesink; // Typedefs for function pointers to all of the virtual functions. -typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); + +// upb_src. +typedef struct _upb_fielddef *(*upb_src_getdef_fptr)(upb_src *src); typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); +typedef bool (*upb_src_getstr_fptr)(upb_src *src, upb_string *str); typedef bool (*upb_src_skipval_fptr)(upb_src *src); typedef bool (*upb_src_startmsg_fptr)(upb_src *src); typedef bool (*upb_src_endmsg_fptr)(upb_src *src); +// upb_sink. typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); +// upb_bytesrc. typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); typedef bool (*upb_bytesrc_append_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t len); +// upb_bytesink. typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); // Vtables for the above interfaces. typedef struct { upb_src_getdef_fptr getdef; upb_src_getval_fptr getval; + upb_src_getstr_fptr getstr; upb_src_skipval_fptr skipval; upb_src_startmsg_fptr startmsg; upb_src_endmsg_fptr endmsg; @@ -86,6 +93,48 @@ INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { #endif } +// Implementation of virtual function dispatch. +INLINE struct _upb_fielddef *upb_src_getdef(upb_src *src) { + return src->vtbl->getdef(src); +} +INLINE bool upb_src_getval(upb_src *src, upb_valueptr val) { + return src->vtbl->getval(src, val); +} +INLINE bool upb_src_getstr(upb_src *src, upb_string *str) { + return src->vtbl->getstr(src, str); +} +INLINE bool upb_src_skipval(upb_src *src) { return src->vtbl->skipval(src); } +INLINE bool upb_src_startmsg(upb_src *src) { return src->vtbl->startmsg(src); } +INLINE bool upb_src_endmsg(upb_src *src) { return src->vtbl->endmsg(src); } + +// Implementation of type-specific upb_src accessors. If we encounter a upb_src +// where these can be implemented directly in a measurably more efficient way, +// we can make these part of the vtable also. +// +// For <64-bit types we have to use a temporary to accommodate baredecoder, +// which does not know the actual width of the type. +INLINE bool upb_src_getbool(upb_src *src, bool *_bool) { + upb_value val; + bool ret = upb_src_getval(src, upb_value_addrof(&val)); + *_bool = val._bool; + return ret; +} + +INLINE bool upb_src_getint32(upb_src *src, int32_t *i32) { + upb_value val; + bool ret = upb_src_getval(src, upb_value_addrof(&val)); + *i32 = val.int32; + return ret; +} + +// TODO. +bool upb_src_getint32(upb_src *src, int32_t *val); +bool upb_src_getint64(upb_src *src, int64_t *val); +bool upb_src_getuint32(upb_src *src, uint32_t *val); +bool upb_src_getuint64(upb_src *src, uint64_t *val); +bool upb_src_getfloat(upb_src *src, float *val); +bool upb_src_getdouble(upb_src *src, double *val); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index e3fdc49..52fc72b 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -536,6 +536,7 @@ static bool upb_decoder_skipgroup(upb_decoder *d) upb_src_vtable upb_decoder_src_vtbl = { (upb_src_getdef_fptr)&upb_decoder_getdef, (upb_src_getval_fptr)&upb_decoder_getval, + (upb_src_getstr_fptr)&upb_decoder_getstr, (upb_src_skipval_fptr)&upb_decoder_skipval, (upb_src_startmsg_fptr)&upb_decoder_startmsg, (upb_src_endmsg_fptr)&upb_decoder_endmsg, -- cgit v1.2.3 From fcfc37e7d41f87bc9ff5ecfb64e0aebb3457c633 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 11 Jul 2010 16:58:44 -0700 Subject: Reduce decoder memory usage. The "field" entry was only being used to determine whether we were inside a group, but the "end_offset" member contains enough information to tell us that. --- Makefile | 2 +- stream/upb_decoder.c | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'stream') diff --git a/Makefile b/Makefile index 2b2a269..c37df72 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 52fc72b..c06660f 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -29,8 +29,7 @@ static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } // upb_decoder_frame is one frame of that stack. typedef struct { upb_msgdef *msgdef; - upb_fielddef *field; - upb_strlen_t end_offset; // For groups, -1. + upb_strlen_t end_offset; // For groups, UPB_GROUP_END_OFFSET. } upb_decoder_frame; struct upb_decoder { @@ -57,9 +56,6 @@ struct upb_decoder { // The overall stream offset of the beginning of "buf". uint32_t buf_stream_offset; - // Fielddef for the key we just read. - upb_fielddef *field; - // Wire type of the key we just read. upb_wire_type_t wire_type; @@ -68,6 +64,9 @@ struct upb_decoder { upb_strlen_t packed_end_offset; + // Fielddef for the key we just read. + upb_fielddef *field; + // We keep a stack of messages we have recursed into. upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; }; @@ -455,7 +454,6 @@ bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { static bool upb_decoder_skipgroup(upb_decoder *d); bool upb_decoder_startmsg(upb_decoder *d) { - d->top->field = d->field; if(++d->top >= d->limit) { upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED, "Nesting exceeded maximum (%d levels)\n", @@ -476,7 +474,7 @@ bool upb_decoder_endmsg(upb_decoder *d) { if(d->top > d->stack) { --d->top; if(!d->src.eof) { - if(d->top->field->type == UPB_TYPE(GROUP)) + if(d->top->end_offset == UPB_GROUP_END_OFFSET) upb_decoder_skipgroup(d); else upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); -- cgit v1.2.3 From 57ad204ceaef0943bba11bdc5d4d98f2d179a22f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 12 Jul 2010 01:04:14 -0700 Subject: Implemented upb_stdio (upb_bytesrc/upb_bytesink). --- stream/upb_stdio.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ stream/upb_stdio.h | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 stream/upb_stdio.c create mode 100644 stream/upb_stdio.h (limited to 'stream') diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c new file mode 100644 index 0000000..7cbca91 --- /dev/null +++ b/stream/upb_stdio.c @@ -0,0 +1,61 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_stdio.h" + +// We can make this configurable if necessary. +#define BLOCK_SIZE 4096 + +struct upb_stdio { + upb_bytesrc bytesrc; + upb_bytesink bytesink; + FILE *file; +} + +static bool upb_stdio_read(upb_stdio *stdio, upb_string *str, + int offset, int bytes_to_read) { + char *buf = upb_string_getrwbuf(offset + bytes_to_read) + offset; + size_t read = fread(buf, 1, bytes_to_read, stdio->file); + if(read < bytes_to_read) { + // Error or EOF. + stdio->bytesrc.eof = feof(stdio->file); + if(ferror(stdio->file)) { + upb_seterr(&stdio->bytesrc.status, UPB_STATUS_ERROR, + "Error reading from stdio stream."); + return false; + } + // Resize to actual read size. + upb_string_getrwbuf(str, offset + read); + } + return true; +} + +bool upb_stdio_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen) { + // We ignore "minlen" since the stdio interfaces always return a full read + // unless they are at EOF. + (void)minlen; + return upb_stdio_read((upb_stdio*)src, str, 0, BLOCK_SIZE); +} + +bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { + return upb_stdio_read((upb_stdio*)src, str, upb_string_len(str), len); +} + +int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { + upb_stdio *stdio = (upb_stdio*)sink - offsetof(upb_stdio, bytesink); + upb_strlen_t len = upb_string_len(str); + size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); + if(written < len) { + // Error or EOF. + stdio->bytesink.eof = feof(stdio->file); + if(ferror(stdio->file)) { + upb_seterr(&stdio->bytesink.status, UPB_STATUS_ERROR, + "Error writing to stdio stream."); + return 0; + } + } + return written; +} diff --git a/stream/upb_stdio.h b/stream/upb_stdio.h new file mode 100644 index 0000000..3c29fcb --- /dev/null +++ b/stream/upb_stdio.h @@ -0,0 +1,42 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file provides upb_bytesrc and upb_bytesink implementations for + * ANSI C stdio. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include +#include "upb_stream.h" + +#ifndef UPB_STDIO_H_ +#define UPB_STDIO_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_stdio; +typedef struct upb_stdio upb_stdio; + +// Creation/deletion. +upb_stdio_ *upb_stdio__new(); +void upb_stdio_free(upb_stdio *stdio); + +// Reset/initialize the object for use. The src or sink will call +// fread()/fwrite()/etc. on the given FILE*. +void upb_stdio_reset(upb_stdio *stdio, FILE* file); + +// Gets a bytesrc or bytesink for the given stdio. The returned pointer is +// invalidated by upb_stdio_reset above. It is perfectly valid to get both +// a bytesrc and a bytesink for the same stdio if the FILE* is open for reading +// and writing. +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio); +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3 From 5b5e26144ddcfcbbee8b6df843a7d169d4cf1eea Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 13 Jul 2010 20:44:27 -0700 Subject: Work on upb_textprinter. --- stream/upb_text.c | 54 +++++++++++++----------------------------------------- 1 file changed, 13 insertions(+), 41 deletions(-) (limited to 'stream') diff --git a/stream/upb_text.c b/stream/upb_text.c index 8662269..4a25ecd 100644 --- a/stream/upb_text.c +++ b/stream/upb_text.c @@ -9,9 +9,9 @@ #include "upb_text.h" #include "upb_data.h" -void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file) -{ -#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break; +bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { + upb_string *p->str = upb_string_tryrecycle(p->str); +#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; switch(type) { case UPB_TYPE(DOUBLE): CASE("%0.f", _double); @@ -34,18 +34,22 @@ void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file) CASE("%" PRIu32, uint32); case UPB_TYPE(BOOL): CASE("%hhu", _bool); - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - /* TODO: escaping. */ - fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break; } + return upb_bytesink_put(p->str); +} + +bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { + upb_bytesink_put(UPB_STRLIT("\"")); + // TODO: escaping. + upb_bytesink_put(str); + upb_bytesink_put(UPB_STRLIT("\"")); } static void print_indent(upb_text_printer *p, FILE *stream) { if(!p->single_line) for(int i = 0; i < p->indent_depth; i++) - fprintf(stream, " "); + upb_bytesink_put(UPB_STRLIT(" ")); } void upb_text_printfield(upb_text_printer *p, upb_strptr name, @@ -61,7 +65,7 @@ void upb_text_printfield(upb_text_printer *p, upb_strptr name, fputc('\n', stream); } -void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream) +void upb_textprinter_startmsg(upb_textprinter *p) { print_indent(p, stream); fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type)); @@ -76,28 +80,6 @@ void upb_text_pop(upb_text_printer *p, FILE *stream) fprintf(stream, "}\n"); } -static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, - FILE *stream); - -static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md, - FILE *stream) -{ - for(upb_field_count_t i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[i]; - if(!upb_msg_has(msg, f)) continue; - upb_value v = upb_msg_get(msg, f); - if(upb_isarray(f)) { - upb_arrayptr arr = v.arr; - for(uint32_t j = 0; j < upb_array_len(arr); j++) { - upb_value elem = upb_array_get(arr, f, j); - printval(printer, elem, f, stream); - } - } else { - printval(printer, v, f, stream); - } - } -} - static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, FILE *stream) { @@ -109,13 +91,3 @@ static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, upb_text_printfield(printer, f->name, f->type, v, stream); } } - - -void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line, - FILE *stream) -{ - upb_text_printer printer; - upb_text_printer_init(&printer, single_line); - printmsg(&printer, msg, md, stream); -} - -- cgit v1.2.3 From 87b2c69c15716b96a294f5918878fb8b7b9a0b40 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 12:56:04 -0700 Subject: Fleshed out upb_stdio and upb_textprinter. test_decoder now compiles and links! But it doesn't work yet. --- Makefile | 7 +-- core/upb_stream.h | 5 +- core/upb_stream_vtbl.h | 110 +++++++++++++++++++++++++++++++++++---- stream/upb_decoder.c | 4 ++ stream/upb_decoder.h | 2 +- stream/upb_stdio.c | 37 +++++++++++-- stream/upb_stdio.h | 2 +- stream/upb_text.c | 93 --------------------------------- stream/upb_text.h | 36 ------------- stream/upb_textprinter.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++ stream/upb_textprinter.h | 30 +++++++++++ 11 files changed, 310 insertions(+), 147 deletions(-) delete mode 100644 stream/upb_text.c delete mode 100644 stream/upb_text.h create mode 100644 stream/upb_textprinter.c create mode 100644 stream/upb_textprinter.h (limited to 'stream') diff --git a/Makefile b/Makefile index 166ca3a..10ef96d 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $ CC=gcc CXX=g++ CFLAGS=-std=c99 -INCLUDE=-Idescriptor -Icore -Itests -I. +INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread @@ -47,7 +47,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c \ + core/upb_stream.c stream/upb_stdio.c stream/upb_textprinter.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -90,7 +90,8 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_def + tests/test_def \ + tests/test_decoder tests: $(TESTS) OTHER_TESTS=tests/tests \ diff --git a/core/upb_stream.h b/core/upb_stream.h index 9147e45..b7400c5 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -111,7 +111,10 @@ bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); bool upb_sink_putval(upb_sink *sink, upb_value val); bool upb_sink_putstr(upb_sink *sink, upb_string *str); -// Ends a submessage. +// Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a +// client could have a submessage already serialized, and therefore put it +// as a string instead of its individual elements. +bool upb_sink_startmsg(upb_sink *sink); bool upb_sink_endmsg(upb_sink *sink); // Returns the current error status for the stream. diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ba2670e..96f6cfe 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -5,6 +5,21 @@ * interfaces. Only components that are implementing these interfaces need * to worry about this file. * + * This is tedious; this is the place in upb where I most wish I had a C++ + * feature. In C++ the compiler would generate this all for me. If there's + * any consolation, it's that I have a bit of flexibility you don't have in + * C++: I could, with preprocessor magic alone "de-virtualize" this interface + * for a particular source file. Say I had a C file that called a upb_src, + * but didn't want to pay the virtual function overhead. I could define: + * + * #define upb_src_getdef(src) upb_decoder_getdef((upb_decoder*)src) + * #define upb_src_stargmsg(src) upb_decoder_startmsg(upb_decoder*)src) + * // etc. + * + * The source file is compatible with the regular upb_src interface, but here + * we bind it to a particular upb_src (upb_decoder), which could lead to + * improved performance at a loss of flexibility for this one upb_src client. + * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. */ @@ -39,12 +54,13 @@ typedef bool (*upb_src_endmsg_fptr)(upb_src *src); // upb_sink. typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); +typedef bool (*upb_sink_putstr_fptr)(upb_sink *sink, upb_string *str); typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); // upb_bytesrc. -typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); -typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); +typedef bool (*upb_bytesrc_get_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); typedef bool (*upb_bytesrc_append_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t len); @@ -61,12 +77,23 @@ typedef struct { upb_src_endmsg_fptr endmsg; } upb_src_vtable; +typedef struct { + upb_sink_putdef_fptr putdef; + upb_sink_putval_fptr putval; + upb_sink_putstr_fptr putstr; + upb_sink_startmsg_fptr startmsg; + upb_sink_endmsg_fptr endmsg; +} upb_sink_vtable; + typedef struct { upb_bytesrc_get_fptr get; upb_bytesrc_append_fptr append; - upb_bytesrc_recycle_fptr recycle; } upb_bytesrc_vtable; +typedef struct { + upb_bytesink_put_fptr put; +} upb_bytesink_vtable; + // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. @@ -74,9 +101,12 @@ struct upb_src { upb_src_vtable *vtbl; upb_status status; bool eof; -#ifndef NDEBUG - int state; // For debug-mode checking of API usage. -#endif +}; + +struct upb_sink { + upb_sink_vtable *vtbl; + upb_status status; + bool eof; }; struct upb_bytesrc { @@ -85,13 +115,34 @@ struct upb_bytesrc { bool eof; }; +struct upb_bytesink { + upb_bytesink_vtable *vtbl; + upb_status status; + bool eof; +}; + INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); -#ifndef DEBUG - // TODO: initialize debug-mode checking. -#endif +} + +INLINE void upb_sink_init(upb_sink *s, upb_sink_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; + upb_status_init(&s->status); +} + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; + upb_status_init(&s->status); +} + +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; + upb_status_init(&s->status); } // Implementation of virtual function dispatch. @@ -136,6 +187,47 @@ bool upb_src_getuint64(upb_src *src, uint64_t *val); bool upb_src_getfloat(upb_src *src, float *val); bool upb_src_getdouble(upb_src *src, double *val); +// upb_bytesrc +INLINE bool upb_bytesrc_get( + upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t minlen) { + return bytesrc->vtbl->get(bytesrc, str, minlen); +} + +INLINE bool upb_bytesrc_append( + upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t len) { + return bytesrc->vtbl->append(bytesrc, str, len); +} + +// upb_sink +INLINE bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def) { + return sink->vtbl->putdef(sink, def); +} +INLINE bool upb_sink_putval(upb_sink *sink, upb_value val) { + return sink->vtbl->putval(sink, val); +} +INLINE bool upb_sink_putstr(upb_sink *sink, upb_string *str) { + return sink->vtbl->putstr(sink, str); +} +INLINE bool upb_sink_startmsg(upb_sink *sink) { + return sink->vtbl->startmsg(sink); +} +INLINE bool upb_sink_endmsg(upb_sink *sink) { + return sink->vtbl->endmsg(sink); +} + +INLINE upb_status *upb_sink_status(upb_sink *sink) { return &sink->status; } + +// upb_bytesink +INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { + return sink->vtbl->put(sink, str); +} +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { + return &sink->status; +} + +// upb_bytesink + + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index c06660f..9a3f6b0 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -574,3 +574,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) d->buf_stream_offset = 0; d->buf_offset = 0; } + +upb_src *upb_decoder_src(upb_decoder *d) { + return &d->src; +} diff --git a/stream/upb_decoder.h b/stream/upb_decoder.h index dde61fc..6ba4d77 100644 --- a/stream/upb_decoder.h +++ b/stream/upb_decoder.h @@ -44,7 +44,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc); // Returns a upb_src pointer by which the decoder can be used. The returned // upb_src is invalidated by upb_decoder_reset() or upb_decoder_free(). -upb_src *upb_decoder_getsrc(upb_decoder *d); +upb_src *upb_decoder_src(upb_decoder *d); #ifdef __cplusplus } /* extern "C" */ diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 7cbca91..89a6621 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -6,6 +6,10 @@ #include "upb_stdio.h" +#include +#include +#include "upb_string.h" + // We can make this configurable if necessary. #define BLOCK_SIZE 4096 @@ -13,11 +17,15 @@ struct upb_stdio { upb_bytesrc bytesrc; upb_bytesink bytesink; FILE *file; +}; + +void upb_stdio_reset(upb_stdio *stdio, FILE* file) { + stdio->file = file; } static bool upb_stdio_read(upb_stdio *stdio, upb_string *str, - int offset, int bytes_to_read) { - char *buf = upb_string_getrwbuf(offset + bytes_to_read) + offset; + int offset, size_t bytes_to_read) { + char *buf = upb_string_getrwbuf(str, offset + bytes_to_read) + offset; size_t read = fread(buf, 1, bytes_to_read, stdio->file); if(read < bytes_to_read) { // Error or EOF. @@ -44,7 +52,7 @@ bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { return upb_stdio_read((upb_stdio*)src, str, upb_string_len(str), len); } -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { +int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { upb_stdio *stdio = (upb_stdio*)sink - offsetof(upb_stdio, bytesink); upb_strlen_t len = upb_string_len(str); size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); @@ -59,3 +67,26 @@ int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { } return written; } + +static upb_bytesrc_vtable upb_stdio_bytesrc_vtbl = { + (upb_bytesrc_get_fptr)upb_stdio_get, + (upb_bytesrc_append_fptr)upb_stdio_append, +}; + +static upb_bytesink_vtable upb_stdio_bytesink_vtbl = { + upb_stdio_put +}; + +upb_stdio *upb_stdio_new() { + upb_stdio *stdio = malloc(sizeof(*stdio)); + upb_bytesrc_init(&stdio->bytesrc, &upb_stdio_bytesrc_vtbl); + upb_bytesink_init(&stdio->bytesink, &upb_stdio_bytesink_vtbl); + return stdio; +} + +void upb_stdio_free(upb_stdio *stdio) { + free(stdio); +} + +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; } +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; } diff --git a/stream/upb_stdio.h b/stream/upb_stdio.h index 3c29fcb..fd71fdd 100644 --- a/stream/upb_stdio.h +++ b/stream/upb_stdio.h @@ -21,7 +21,7 @@ struct upb_stdio; typedef struct upb_stdio upb_stdio; // Creation/deletion. -upb_stdio_ *upb_stdio__new(); +upb_stdio *upb_stdio_new(); void upb_stdio_free(upb_stdio *stdio); // Reset/initialize the object for use. The src or sink will call diff --git a/stream/upb_text.c b/stream/upb_text.c deleted file mode 100644 index 4a25ecd..0000000 --- a/stream/upb_text.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include "descriptor.h" -#include "upb_text.h" -#include "upb_data.h" - -bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { - upb_string *p->str = upb_string_tryrecycle(p->str); -#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; - switch(type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); - } - return upb_bytesink_put(p->str); -} - -bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { - upb_bytesink_put(UPB_STRLIT("\"")); - // TODO: escaping. - upb_bytesink_put(str); - upb_bytesink_put(UPB_STRLIT("\"")); -} - -static void print_indent(upb_text_printer *p, FILE *stream) -{ - if(!p->single_line) - for(int i = 0; i < p->indent_depth; i++) - upb_bytesink_put(UPB_STRLIT(" ")); -} - -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, - FILE *stream) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name)); - upb_text_printval(valtype, val, stream); - if(p->single_line) - fputc(' ', stream); - else - fputc('\n', stream); -} - -void upb_textprinter_startmsg(upb_textprinter *p) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type)); - if(!p->single_line) fputc('\n', stream); - p->indent_depth++; -} - -void upb_text_pop(upb_text_printer *p, FILE *stream) -{ - p->indent_depth--; - print_indent(p, stream); - fprintf(stream, "}\n"); -} - -static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, - FILE *stream) -{ - if(upb_issubmsg(f)) { - upb_text_push(printer, f->name, stream); - printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream); - upb_text_pop(printer, stream); - } else { - upb_text_printfield(printer, f->name, f->type, v, stream); - } -} diff --git a/stream/upb_text.h b/stream/upb_text.h deleted file mode 100644 index d89c9d6..0000000 --- a/stream/upb_text.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_TEXT_H_ -#define UPB_TEXT_H_ - -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - int indent_depth; - bool single_line; -} upb_text_printer; - -INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) { - p->indent_depth = 0; - p->single_line = single_line; -} -void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file); -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, FILE *stream); -void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, - FILE *stream); -void upb_text_pop(upb_text_printer *p, FILE *stream); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_TEXT_H_ */ diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c new file mode 100644 index 0000000..0f0357a --- /dev/null +++ b/stream/upb_textprinter.c @@ -0,0 +1,131 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_textprinter.h" + +#include +#include +#include "upb_def.h" +#include "upb_string.h" + +struct _upb_textprinter { + upb_sink sink; + upb_bytesink *bytesink; + upb_string *str; + int indent_depth; + bool single_line; + upb_fielddef *f; +}; + +static void upb_textprinter_endfield(upb_textprinter *p) +{ + if(p->single_line) + upb_bytesink_put(p->bytesink, UPB_STRLIT(' ')); + else + upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); +} + +static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { + p->str = upb_string_tryrecycle(p->str); +#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; + switch(p->f->type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): + CASE("%" PRIu32, uint32); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + } + upb_bytesink_put(p->bytesink, p->str); + upb_textprinter_endfield(p); + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { + upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + // TODO: escaping. + upb_bytesink_put(p->bytesink, str); + upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + upb_textprinter_endfield(p); + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static void upb_textprinter_indent(upb_textprinter *p) +{ + if(!p->single_line) + for(int i = 0; i < p->indent_depth; i++) + upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); +} + +static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) +{ + upb_textprinter_indent(p); + upb_bytesink_put(p->bytesink, f->name); + upb_bytesink_put(p->bytesink, UPB_STRLIT(":")); + p->f = f; + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static bool upb_textprinter_startmsg(upb_textprinter *p) +{ + upb_textprinter_indent(p); + upb_bytesink_put(p->bytesink, p->f->def->fqname); + upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); + if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); + p->indent_depth++; + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static bool upb_textprinter_endmsg(upb_textprinter *p) +{ + p->indent_depth--; + upb_textprinter_indent(p); + upb_bytesink_put(p->bytesink, UPB_STRLIT("}")); + upb_textprinter_endfield(p); + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +upb_sink_vtable upb_textprinter_vtbl = { + (upb_sink_putdef_fptr)upb_textprinter_putdef, + (upb_sink_putval_fptr)upb_textprinter_putval, + (upb_sink_putstr_fptr)upb_textprinter_putstr, + (upb_sink_startmsg_fptr)upb_textprinter_startmsg, + (upb_sink_endmsg_fptr)upb_textprinter_endmsg, +}; + +upb_textprinter *upb_textprinter_new() { + upb_textprinter *p = malloc(sizeof(*p)); + upb_sink_init(&p->sink, &upb_textprinter_vtbl); + return p; +} + +void upb_textprinter_free(upb_textprinter *p) { + free(p); +} + +void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, + bool single_line) { + p->bytesink = sink; + p->single_line = single_line; + p->indent_depth = 0; +} + +upb_sink *upb_textprinter_sink(upb_textprinter *p) { return &p->sink; } diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h new file mode 100644 index 0000000..7e35412 --- /dev/null +++ b/stream/upb_textprinter.h @@ -0,0 +1,30 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_TEXT_H_ +#define UPB_TEXT_H_ + +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct _upb_textprinter; +typedef struct _upb_textprinter upb_textprinter; + +upb_textprinter *upb_textprinter_new(); +void upb_textprinter_free(upb_textprinter *p); +void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, + bool single_line); + +upb_sink *upb_textprinter_sink(upb_textprinter *p); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_TEXT_H_ */ -- cgit v1.2.3 From 60ae9be4380937c3cd39fb72df04fd1723e741e6 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 13:39:38 -0700 Subject: Bugfixes to upb_stdio and upb_decoder. --- stream/upb_decoder.c | 3 ++- stream/upb_stdio.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'stream') diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 9a3f6b0..64057c5 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -91,10 +91,10 @@ static bool upb_decoder_nextbuf(upb_decoder *d) // Recycle old buffer. if(d->buf) { - d->buf = upb_string_tryrecycle(d->buf); d->buf_offset -= upb_string_len(d->buf); d->buf_stream_offset += upb_string_len(d->buf); } + d->buf = upb_string_tryrecycle(d->buf); // Pull next buffer. if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { @@ -569,6 +569,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. d->top->end_offset = UINT32_MAX - 1; d->bytesrc = bytesrc; + d->field = NULL; d->buf = NULL; d->buf_bytesleft = 0; d->buf_stream_offset = 0; diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 89a6621..820399b 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -53,7 +53,7 @@ bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { } int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { - upb_stdio *stdio = (upb_stdio*)sink - offsetof(upb_stdio, bytesink); + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_strlen_t len = upb_string_len(str); size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { -- cgit v1.2.3 From af9d691a344746b15fb1df2e454273b637d20433 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 15:05:57 -0700 Subject: Added Xcode project. --- core/upb_string.c | 2 +- stream/upb_textprinter.c | 2 +- upb.xcodeproj/project.pbxproj | 497 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 499 insertions(+), 2 deletions(-) create mode 100644 upb.xcodeproj/project.pbxproj (limited to 'stream') diff --git a/core/upb_string.c b/core/upb_string.c index ca3c669..93686f5 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -71,7 +71,7 @@ upb_string *upb_string_tryrecycle(upb_string *str) { } char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->ptr == NULL); + // assert(str->ptr == NULL); uint32_t size = upb_string_size(str); if (size < len) { size = upb_round_up_pow2(len); diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 0f0357a..11ad6a8 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -7,7 +7,7 @@ #include "upb_textprinter.h" #include -#include +#include #include "upb_def.h" #include "upb_string.h" diff --git a/upb.xcodeproj/project.pbxproj b/upb.xcodeproj/project.pbxproj new file mode 100644 index 0000000..8b4eb4e --- /dev/null +++ b/upb.xcodeproj/project.pbxproj @@ -0,0 +1,497 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 420E6F1C11F258AE001DA8FE /* test_decoder.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */; }; + 420E6F3B11F259B3001DA8FE /* liblibupbcore.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */; }; + 420E6F3C11F259B3001DA8FE /* liblibupbstream.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */; }; + 42BD1D6E11F2500D0076AD28 /* upb.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3211F24E4C0076AD28 /* upb.c */; }; + 42BD1D7011F2500D0076AD28 /* upb_def.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2911F24E4C0076AD28 /* upb_def.c */; }; + 42BD1D7211F2500D0076AD28 /* upb_stream.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */; }; + 42BD1D7311F2500D0076AD28 /* upb_string.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2E11F24E4C0076AD28 /* upb_string.c */; }; + 42BD1D7411F2500D0076AD28 /* upb_table.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3011F24E4C0076AD28 /* upb_table.c */; }; + 42BD1D7611F250B90076AD28 /* upb_decoder.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */; }; + 42BD1D7711F250B90076AD28 /* upb_stdio.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4011F24EA30076AD28 /* upb_stdio.c */; }; + 42BD1D7811F250B90076AD28 /* upb_textprinter.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */; }; + 42BD1D9011F251820076AD28 /* descriptor_const.h in Headers */ = {isa = PBXBuildFile; fileRef = 42BD1D8D11F251820076AD28 /* descriptor_const.h */; }; + 42BD1D9111F251820076AD28 /* descriptor.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D8E11F251820076AD28 /* descriptor.c */; }; + 42BD1D9211F251820076AD28 /* descriptor.h in Headers */ = {isa = PBXBuildFile; fileRef = 42BD1D8F11F251820076AD28 /* descriptor.h */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 420E6F3311F2598D001DA8FE /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 42BD1D5B11F24F920076AD28 /* upbcore */; + remoteInfo = upbcore; + }; + 420E6F3511F2598D001DA8FE /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 42BD1D6311F24FBA0076AD28 /* upbstream */; + remoteInfo = upbstream; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXFileReference section */ + 420E6F1811F2589F001DA8FE /* test_decoder */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = test_decoder; sourceTree = BUILT_PRODUCTS_DIR; }; + 42BD1D2811F24E4C0076AD28 /* upb_atomic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_atomic.h; path = core/upb_atomic.h; sourceTree = ""; }; + 42BD1D2911F24E4C0076AD28 /* upb_def.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_def.c; path = core/upb_def.c; sourceTree = ""; }; + 42BD1D2A11F24E4C0076AD28 /* upb_def.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_def.h; path = core/upb_def.h; sourceTree = ""; }; + 42BD1D2B11F24E4C0076AD28 /* upb_stream_vtbl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stream_vtbl.h; path = core/upb_stream_vtbl.h; sourceTree = ""; }; + 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_stream.c; path = core/upb_stream.c; sourceTree = ""; }; + 42BD1D2D11F24E4C0076AD28 /* upb_stream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stream.h; path = core/upb_stream.h; sourceTree = ""; }; + 42BD1D2E11F24E4C0076AD28 /* upb_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_string.c; path = core/upb_string.c; sourceTree = ""; }; + 42BD1D2F11F24E4C0076AD28 /* upb_string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_string.h; path = core/upb_string.h; sourceTree = ""; }; + 42BD1D3011F24E4C0076AD28 /* upb_table.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_table.c; path = core/upb_table.c; sourceTree = ""; }; + 42BD1D3111F24E4C0076AD28 /* upb_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_table.h; path = core/upb_table.h; sourceTree = ""; }; + 42BD1D3211F24E4C0076AD28 /* upb.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb.c; path = core/upb.c; sourceTree = ""; }; + 42BD1D3311F24E4C0076AD28 /* upb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb.h; path = core/upb.h; sourceTree = ""; }; + 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_decoder.c; path = stream/upb_decoder.c; sourceTree = ""; }; + 42BD1D3F11F24EA30076AD28 /* upb_decoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_decoder.h; path = stream/upb_decoder.h; sourceTree = ""; }; + 42BD1D4011F24EA30076AD28 /* upb_stdio.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_stdio.c; path = stream/upb_stdio.c; sourceTree = ""; }; + 42BD1D4111F24EA30076AD28 /* upb_stdio.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stdio.h; path = stream/upb_stdio.h; sourceTree = ""; }; + 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_textprinter.c; path = stream/upb_textprinter.c; sourceTree = ""; }; + 42BD1D4311F24EA30076AD28 /* upb_textprinter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_textprinter.h; path = stream/upb_textprinter.h; sourceTree = ""; }; + 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_decoder.c; path = tests/test_decoder.c; sourceTree = ""; }; + 42BD1D5011F24F3E0076AD28 /* test_def.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_def.c; path = tests/test_def.c; sourceTree = ""; }; + 42BD1D5111F24F3E0076AD28 /* test_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_string.c; path = tests/test_string.c; sourceTree = ""; }; + 42BD1D5211F24F3E0076AD28 /* test_table.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = test_table.cc; path = tests/test_table.cc; sourceTree = ""; }; + 42BD1D5311F24F3E0076AD28 /* test_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = test_util.h; path = tests/test_util.h; sourceTree = ""; }; + 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblibupbcore.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblibupbstream.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 42BD1D8D11F251820076AD28 /* descriptor_const.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = descriptor_const.h; path = descriptor/descriptor_const.h; sourceTree = ""; }; + 42BD1D8E11F251820076AD28 /* descriptor.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = descriptor.c; path = descriptor/descriptor.c; sourceTree = ""; }; + 42BD1D8F11F251820076AD28 /* descriptor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = descriptor.h; path = descriptor/descriptor.h; sourceTree = ""; }; + C6A0FF2C0290799A04C91782 /* upb.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = upb.1; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 420E6F1611F2589F001DA8FE /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 420E6F3B11F259B3001DA8FE /* liblibupbcore.a in Frameworks */, + 420E6F3C11F259B3001DA8FE /* liblibupbstream.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D5A11F24F920076AD28 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D6211F24FBA0076AD28 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* upb */ = { + isa = PBXGroup; + children = ( + 08FB7795FE84155DC02AAC07 /* Source */, + C6A0FF2B0290797F04C91782 /* Documentation */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = upb; + sourceTree = ""; + }; + 08FB7795FE84155DC02AAC07 /* Source */ = { + isa = PBXGroup; + children = ( + 42BD1D8A11F251670076AD28 /* descriptor */, + 42BD1D4711F24EB20076AD28 /* tests */, + 42BD1D3B11F24E810076AD28 /* stream */, + 42BD1D3A11F24E5F0076AD28 /* core */, + ); + name = Source; + sourceTree = ""; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */, + 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */, + 420E6F1811F2589F001DA8FE /* test_decoder */, + ); + name = Products; + sourceTree = ""; + }; + 42BD1D3A11F24E5F0076AD28 /* core */ = { + isa = PBXGroup; + children = ( + 42BD1D2811F24E4C0076AD28 /* upb_atomic.h */, + 42BD1D2911F24E4C0076AD28 /* upb_def.c */, + 42BD1D2A11F24E4C0076AD28 /* upb_def.h */, + 42BD1D2B11F24E4C0076AD28 /* upb_stream_vtbl.h */, + 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */, + 42BD1D2D11F24E4C0076AD28 /* upb_stream.h */, + 42BD1D2E11F24E4C0076AD28 /* upb_string.c */, + 42BD1D2F11F24E4C0076AD28 /* upb_string.h */, + 42BD1D3011F24E4C0076AD28 /* upb_table.c */, + 42BD1D3111F24E4C0076AD28 /* upb_table.h */, + 42BD1D3211F24E4C0076AD28 /* upb.c */, + 42BD1D3311F24E4C0076AD28 /* upb.h */, + ); + name = core; + sourceTree = ""; + }; + 42BD1D3B11F24E810076AD28 /* stream */ = { + isa = PBXGroup; + children = ( + 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */, + 42BD1D3F11F24EA30076AD28 /* upb_decoder.h */, + 42BD1D4011F24EA30076AD28 /* upb_stdio.c */, + 42BD1D4111F24EA30076AD28 /* upb_stdio.h */, + 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */, + 42BD1D4311F24EA30076AD28 /* upb_textprinter.h */, + ); + name = stream; + sourceTree = ""; + }; + 42BD1D4711F24EB20076AD28 /* tests */ = { + isa = PBXGroup; + children = ( + 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */, + 42BD1D5011F24F3E0076AD28 /* test_def.c */, + 42BD1D5111F24F3E0076AD28 /* test_string.c */, + 42BD1D5211F24F3E0076AD28 /* test_table.cc */, + 42BD1D5311F24F3E0076AD28 /* test_util.h */, + ); + name = tests; + sourceTree = ""; + }; + 42BD1D8A11F251670076AD28 /* descriptor */ = { + isa = PBXGroup; + children = ( + 42BD1D8D11F251820076AD28 /* descriptor_const.h */, + 42BD1D8E11F251820076AD28 /* descriptor.c */, + 42BD1D8F11F251820076AD28 /* descriptor.h */, + ); + name = descriptor; + sourceTree = ""; + }; + C6A0FF2B0290797F04C91782 /* Documentation */ = { + isa = PBXGroup; + children = ( + C6A0FF2C0290799A04C91782 /* upb.1 */, + ); + name = Documentation; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 42BD1D5811F24F920076AD28 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 42BD1D9011F251820076AD28 /* descriptor_const.h in Headers */, + 42BD1D9211F251820076AD28 /* descriptor.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D6011F24FBA0076AD28 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 420E6F1711F2589F001DA8FE /* test_decoder */ = { + isa = PBXNativeTarget; + buildConfigurationList = 420E6F1F11F258CC001DA8FE /* Build configuration list for PBXNativeTarget "test_decoder" */; + buildPhases = ( + 420E6F1511F2589F001DA8FE /* Sources */, + 420E6F1611F2589F001DA8FE /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + 420E6F3411F2598D001DA8FE /* PBXTargetDependency */, + 420E6F3611F2598D001DA8FE /* PBXTargetDependency */, + ); + name = test_decoder; + productName = test_decoder; + productReference = 420E6F1811F2589F001DA8FE /* test_decoder */; + productType = "com.apple.product-type.tool"; + }; + 42BD1D5B11F24F920076AD28 /* upbcore */ = { + isa = PBXNativeTarget; + buildConfigurationList = 42BD1D5F11F24FB10076AD28 /* Build configuration list for PBXNativeTarget "upbcore" */; + buildPhases = ( + 42BD1D5811F24F920076AD28 /* Headers */, + 42BD1D5911F24F920076AD28 /* Sources */, + 42BD1D5A11F24F920076AD28 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = upbcore; + productName = libupbcore; + productReference = 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */; + productType = "com.apple.product-type.library.static"; + }; + 42BD1D6311F24FBA0076AD28 /* upbstream */ = { + isa = PBXNativeTarget; + buildConfigurationList = 42BD1D6911F24FED0076AD28 /* Build configuration list for PBXNativeTarget "upbstream" */; + buildPhases = ( + 42BD1D6011F24FBA0076AD28 /* Headers */, + 42BD1D6111F24FBA0076AD28 /* Sources */, + 42BD1D6211F24FBA0076AD28 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = upbstream; + productName = libupbstream; + productReference = 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */; + productType = "com.apple.product-type.library.static"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "upb" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + mainGroup = 08FB7794FE84155DC02AAC07 /* upb */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 42BD1D5B11F24F920076AD28 /* upbcore */, + 42BD1D6311F24FBA0076AD28 /* upbstream */, + 420E6F1711F2589F001DA8FE /* test_decoder */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 420E6F1511F2589F001DA8FE /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 420E6F1C11F258AE001DA8FE /* test_decoder.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D5911F24F920076AD28 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 42BD1D6E11F2500D0076AD28 /* upb.c in Sources */, + 42BD1D7011F2500D0076AD28 /* upb_def.c in Sources */, + 42BD1D7211F2500D0076AD28 /* upb_stream.c in Sources */, + 42BD1D7311F2500D0076AD28 /* upb_string.c in Sources */, + 42BD1D7411F2500D0076AD28 /* upb_table.c in Sources */, + 42BD1D9111F251820076AD28 /* descriptor.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D6111F24FBA0076AD28 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 42BD1D7611F250B90076AD28 /* upb_decoder.c in Sources */, + 42BD1D7711F250B90076AD28 /* upb_stdio.c in Sources */, + 42BD1D7811F250B90076AD28 /* upb_textprinter.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 420E6F3411F2598D001DA8FE /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 42BD1D5B11F24F920076AD28 /* upbcore */; + targetProxy = 420E6F3311F2598D001DA8FE /* PBXContainerItemProxy */; + }; + 420E6F3611F2598D001DA8FE /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 42BD1D6311F24FBA0076AD28 /* upbstream */; + targetProxy = 420E6F3511F2598D001DA8FE /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 1DEB928A08733DD80010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = UPB_THREAD_UNSAFE; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + ONLY_ACTIVE_ARCH = YES; + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Debug; + }; + 1DEB928B08733DD80010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_PREPROCESSOR_DEFINITIONS = UPB_THREAD_UNSAFE; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Release; + }; + 420E6F1A11F258A0001DA8FE /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/bin; + PREBINDING = NO; + PRODUCT_NAME = test_decoder; + }; + name = Debug; + }; + 420E6F1B11F258A0001DA8FE /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/bin; + PREBINDING = NO; + PRODUCT_NAME = test_decoder; + ZERO_LINK = NO; + }; + name = Release; + }; + 42BD1D5D11F24F930076AD28 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbcore; + }; + name = Debug; + }; + 42BD1D5E11F24F930076AD28 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbcore; + ZERO_LINK = NO; + }; + name = Release; + }; + 42BD1D6511F24FBA0076AD28 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbstream; + }; + name = Debug; + }; + 42BD1D6611F24FBA0076AD28 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbstream; + ZERO_LINK = NO; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "upb" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB928A08733DD80010E9CD /* Debug */, + 1DEB928B08733DD80010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 420E6F1F11F258CC001DA8FE /* Build configuration list for PBXNativeTarget "test_decoder" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 420E6F1A11F258A0001DA8FE /* Debug */, + 420E6F1B11F258A0001DA8FE /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 42BD1D5F11F24FB10076AD28 /* Build configuration list for PBXNativeTarget "upbcore" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 42BD1D5D11F24F930076AD28 /* Debug */, + 42BD1D5E11F24F930076AD28 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 42BD1D6911F24FED0076AD28 /* Build configuration list for PBXNativeTarget "upbstream" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 42BD1D6511F24FBA0076AD28 /* Debug */, + 42BD1D6611F24FBA0076AD28 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} -- cgit v1.2.3 From b77db146466a113bbfb9e56472bda1975f7a25a5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 15:13:05 -0700 Subject: Fixed broken submsg support in upb_streamdata. --- core/upb_stream.c | 38 ++++++++++++++++++++++---------------- stream/upb_textprinter.c | 2 +- 2 files changed, 23 insertions(+), 17 deletions(-) (limited to 'stream') diff --git a/core/upb_stream.c b/core/upb_stream.c index bda11de..e63ba00 100644 --- a/core/upb_stream.c +++ b/core/upb_stream.c @@ -14,24 +14,30 @@ void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { upb_fielddef *f; upb_string *str = NULL; - while((f = upb_src_getdef(src)) != NULL) { - CHECKSINK(upb_sink_putdef(sink, f)); - if(upb_issubmsg(f)) { - // We always recurse into submessages, but the putdef above already told - // the sink that. - } else if(upb_isstring(f)) { - str = upb_string_tryrecycle(str); - CHECKSRC(upb_src_getstr(src, str)); - CHECKSINK(upb_sink_putstr(sink, str)); - } else { - // Primitive type. - upb_value val; - CHECKSRC(upb_src_getval(src, upb_value_addrof(&val))); - CHECKSINK(upb_sink_putval(sink, val)); + int depth = 0; + while(1) { + while((f = upb_src_getdef(src)) != NULL) { + CHECKSINK(upb_sink_putdef(sink, f)); + if(upb_issubmsg(f)) { + upb_src_startmsg(src); + upb_sink_startmsg(sink); + ++depth; + } else if(upb_isstring(f)) { + str = upb_string_tryrecycle(str); + CHECKSRC(upb_src_getstr(src, str)); + CHECKSINK(upb_sink_putstr(sink, str)); + } else { + // Primitive type. + upb_value val; + CHECKSRC(upb_src_getval(src, upb_value_addrof(&val))); + CHECKSINK(upb_sink_putval(sink, val)); + } } + // If we're not EOF now, the loop terminated due to an error. + CHECKSRC(upb_src_eof(src)); + if (depth == 0) break; + --depth; } - // If we're not EOF now, the loop terminated due to an error. - CHECKSRC(upb_src_eof(src)); return; src_err: diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 11ad6a8..201edba 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -79,7 +79,7 @@ static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) { upb_textprinter_indent(p); upb_bytesink_put(p->bytesink, f->name); - upb_bytesink_put(p->bytesink, UPB_STRLIT(":")); + upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); p->f = f; return upb_ok(upb_bytesink_status(p->bytesink)); } -- cgit v1.2.3 From 0fcfeab521b01160875e863575dd5b63952b1593 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 18:30:53 -0700 Subject: Bugfixes, test_decoder successfully stream-decodes a stream! --- core/upb_stream.c | 5 +++++ stream/upb_decoder.c | 39 ++++++++++++++++++--------------------- stream/upb_textprinter.c | 9 +++++---- 3 files changed, 28 insertions(+), 25 deletions(-) (limited to 'stream') diff --git a/core/upb_stream.c b/core/upb_stream.c index e63ba00..0d47392 100644 --- a/core/upb_stream.c +++ b/core/upb_stream.c @@ -37,14 +37,19 @@ void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { CHECKSRC(upb_src_eof(src)); if (depth == 0) break; --depth; + upb_src_endmsg(src); + upb_sink_endmsg(sink); } + upb_string_unref(str); return; src_err: + upb_string_unref(str); upb_copyerr(status, upb_src_status(src)); return; sink_err: + upb_string_unref(str); upb_copyerr(status, upb_sink_status(sink)); return; } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 64057c5..949ce2d 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -76,7 +76,7 @@ struct upb_decoder { static upb_strlen_t upb_decoder_offset(upb_decoder *d) { - return d->buf_stream_offset - d->buf_offset; + return d->buf_stream_offset + d->buf_offset; } static bool upb_decoder_nextbuf(upb_decoder *d) @@ -101,34 +101,30 @@ static bool upb_decoder_nextbuf(upb_decoder *d) d->buf_bytesleft += upb_string_len(d->buf); return true; } else { - // Error or EOF. - if(!upb_bytesrc_eof(d->bytesrc)) { - // Error from bytesrc. - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } else if(d->buf_bytesleft == 0) { - // EOF from bytesrc and we don't have any residual bytes left. - d->src.eof = true; - return false; - } else { - // No more data left from the bytesrc, but we still have residual bytes. - return true; - } + return false; } } static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) { + if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE && !upb_bytesrc_eof(d->bytesrc)) + upb_decoder_nextbuf(d); + if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { - // GCC is currently complaining about use of an uninitialized value if we - // don't set this now. I think this is incorrect, but leaving this in - // to suppress the warning for now. - *bytes = 0; - if(!upb_decoder_nextbuf(d)) return NULL; + if(upb_bytesrc_eof(d->bytesrc) && d->buf_bytesleft > 0) { + // We're working through the last few bytes of the buffer. + } else if(upb_bytesrc_eof(d->bytesrc)) { + // End of stream, no more bytes left. + assert(d->buf_bytesleft == 0); + d->src.eof = true; + return NULL; + } else { + // We are short of bytes even though the bytesrc isn't EOF; must be error. + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return NULL; + } } - assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE); - if(d->buf_offset >= 0) { // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE // contiguous bytes, so we can read directly out of it. @@ -467,6 +463,7 @@ bool upb_decoder_startmsg(upb_decoder *d) { } else { frame->end_offset = upb_decoder_offset(d) + d->delimited_len; } + d->field = NULL; return true; } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 201edba..75668a3 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -23,9 +23,9 @@ struct _upb_textprinter { static void upb_textprinter_endfield(upb_textprinter *p) { if(p->single_line) - upb_bytesink_put(p->bytesink, UPB_STRLIT(' ')); + upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); else - upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); + upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); } static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { @@ -86,10 +86,9 @@ static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) static bool upb_textprinter_startmsg(upb_textprinter *p) { - upb_textprinter_indent(p); upb_bytesink_put(p->bytesink, p->f->def->fqname); upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); - if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); + if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); p->indent_depth++; return upb_ok(upb_bytesink_status(p->bytesink)); } @@ -114,10 +113,12 @@ upb_sink_vtable upb_textprinter_vtbl = { upb_textprinter *upb_textprinter_new() { upb_textprinter *p = malloc(sizeof(*p)); upb_sink_init(&p->sink, &upb_textprinter_vtbl); + p->str = NULL; return p; } void upb_textprinter_free(upb_textprinter *p) { + upb_string_unref(p->str); free(p); } -- cgit v1.2.3 From 4b6c8b6b2317436ab77b38e17b49a7c7b03bf3f4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 19:00:40 -0700 Subject: Fixed bugs in textoutput. Text output from descriptor.proto is now identical to protoc! --- core/upb_def.c | 6 +++++ core/upb_def.h | 1 + stream/upb_textprinter.c | 62 +++++++++++++++++++++++++++--------------------- 3 files changed, 42 insertions(+), 27 deletions(-) (limited to 'stream') diff --git a/core/upb_def.c b/core/upb_def.c index c0d72db..fd00895 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -331,6 +331,12 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { return upb_inttable_next(&e->iton, &iter->e); } +upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) { + upb_iton_ent *e = + (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); + return e ? e->string : NULL; +} + /* upb_fielddef ***************************************************************/ diff --git a/core/upb_def.h b/core/upb_def.h index 82d8520..9cdc54d 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -202,6 +202,7 @@ typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); +// Caller does not own a ref on the returned string. upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 75668a3..2d2e237 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -29,38 +29,48 @@ static void upb_textprinter_endfield(upb_textprinter *p) } static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { - p->str = upb_string_tryrecycle(p->str); + upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); + upb_enumdef *enum_def; + upb_string *enum_label; + if(p->f->type == UPB_TYPE(ENUM) && + (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && + (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { + // This is an enum value for which we found a corresponding string. + upb_bytesink_put(p->bytesink, enum_label); + } else { + p->str = upb_string_tryrecycle(p->str); #define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; - switch(p->f->type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); + switch(p->f->type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): + CASE("%" PRIu32, uint32); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + } + upb_bytesink_put(p->bytesink, p->str); } - upb_bytesink_put(p->bytesink, p->str); upb_textprinter_endfield(p); return upb_ok(upb_bytesink_status(p->bytesink)); } static bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { - upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); // TODO: escaping. upb_bytesink_put(p->bytesink, str); upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); @@ -79,14 +89,12 @@ static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) { upb_textprinter_indent(p); upb_bytesink_put(p->bytesink, f->name); - upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); p->f = f; return upb_ok(upb_bytesink_status(p->bytesink)); } static bool upb_textprinter_startmsg(upb_textprinter *p) { - upb_bytesink_put(p->bytesink, p->f->def->fqname); upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); p->indent_depth++; -- cgit v1.2.3 From 672f4617e2ab7923806c6d6a44d16e128e16b3a4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 21 Jul 2010 22:36:31 -0700 Subject: Lua support for fielddefs and getting their properties. --- core/upb_def.h | 4 +-- lang_ext/lua/upb.c | 79 +++++++++++++++++++++++++++++++++++++++++++++------- stream/upb_decoder.c | 2 +- 3 files changed, 72 insertions(+), 13 deletions(-) (limited to 'stream') diff --git a/core/upb_def.h b/core/upb_def.h index 5c19a7a..3294a8d 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -158,13 +158,13 @@ typedef struct { // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. -INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) { +INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) { upb_itof_ent *e = (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e)); return e ? e->f : NULL; } -INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { +INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); return e ? e->f : NULL; } diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index a8165c7..5ab07ba 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -10,6 +10,10 @@ #include "lauxlib.h" #include "upb_def.h" +void lupb_pushstring(lua_State *L, upb_string *str) { + lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str)); +} + /* object cache ***************************************************************/ // We cache all the lua objects (userdata) we vend in a weak table, indexed by @@ -38,6 +42,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, void **obj = lua_newuserdata(L, sizeof(void*)); *obj = cobj; luaL_getmetatable(L, type); + assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. lua_setmetatable(L, -2); // Set it in the cache. @@ -83,22 +88,53 @@ static void lupb_def_getorcreate(lua_State *L, upb_def *def) { // msgdef -static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { - return luaL_checkudata(L, narg, "upb.msgdef"); +static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) { + lupb_def *ldef = luaL_checkudata(L, narg, "upb.msgdef"); + return upb_downcast_msgdef(ldef->def); } static int lupb_msgdef_gc(lua_State *L) { - lupb_def *ldef = lupb_msgdef_check(L, 1); + lupb_def *ldef = luaL_checkudata(L, 1, "upb.msgdef"); upb_def_unref(ldef->def); return 0; } +static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f); + +static int lupb_msgdef_fieldbyname(lua_State *L) { + upb_msgdef *m = lupb_msgdef_check(L, 1); + size_t len; + const char *name = luaL_checklstring(L, 2, &len); + upb_string namestr = UPB_STACK_STRING_LEN(name, len); + upb_fielddef *f = upb_msgdef_ntof(m, &namestr); + if (f) { + lupb_fielddef_getorcreate(L, f); + } else { + lua_pushnil(L); + } + return 1; +} + +static int lupb_msgdef_fieldbynum(lua_State *L) { + upb_msgdef *m = lupb_msgdef_check(L, 1); + int num = luaL_checkint(L, 2); + upb_fielddef *f = upb_msgdef_itof(m, num); + if (f) { + lupb_fielddef_getorcreate(L, f); + } else { + lua_pushnil(L); + } + return 1; +} + static const struct luaL_Reg lupb_msgdef_mm[] = { {"__gc", lupb_msgdef_gc}, {NULL, NULL} }; static const struct luaL_Reg lupb_msgdef_m[] = { + {"fieldbyname", lupb_msgdef_fieldbyname}, + {"fieldbynum", lupb_msgdef_fieldbynum}, {NULL, NULL} }; @@ -142,6 +178,29 @@ static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { return luaL_checkudata(L, narg, "upb.fielddef"); } +static int lupb_fielddef_index(lua_State *L) { + lupb_fielddef *f = lupb_fielddef_check(L, 1); + const char *str = luaL_checkstring(L, 2); + if (strcmp(str, "name") == 0) { + lupb_pushstring(L, f->field->name); + } else if (strcmp(str, "number") == 0) { + lua_pushinteger(L, f->field->number); + } else if (strcmp(str, "type") == 0) { + lua_pushinteger(L, f->field->type); + } else if (strcmp(str, "label") == 0) { + lua_pushinteger(L, f->field->label); + } else if (strcmp(str, "def") == 0) { + upb_def_ref(f->field->def); + lupb_def_getorcreate(L, f->field->def); + } else if (strcmp(str, "msgdef") == 0) { + upb_def_ref(UPB_UPCAST(f->field->msgdef)); + lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef)); + } else { + lua_pushnil(L); + } + return 1; +} + static int lupb_fielddef_gc(lua_State *L) { lupb_fielddef *lfielddef = lupb_fielddef_check(L, 1); upb_def_unref(UPB_UPCAST(lfielddef->field->msgdef)); @@ -150,10 +209,7 @@ static int lupb_fielddef_gc(lua_State *L) { static const struct luaL_Reg lupb_fielddef_mm[] = { {"__gc", lupb_fielddef_gc}, - {NULL, NULL} -}; - -static const struct luaL_Reg lupb_fielddef_m[] = { + {"__index", lupb_fielddef_index}, {NULL, NULL} }; @@ -206,7 +262,7 @@ static int lupb_symtab_getdefs(lua_State *L) { for (int i = 0; i < count; i++) { upb_def *def = defs[i]; upb_string *name = def->fqname; - lua_pushlstring(L, upb_string_getrobuf(name), upb_string_len(name)); + lupb_pushstring(L, name); lupb_def_getorcreate(L, def); // Add it to our return table. lua_settable(L, -3); @@ -255,14 +311,17 @@ static void lupb_register_type(lua_State *L, const char *name, luaL_newmetatable(L, name); luaL_register(L, NULL, mm); lua_createtable(L, 0, 0); - luaL_register(L, NULL, m); - lua_setfield(L, -2, "__index"); + if (m) { + luaL_register(L, NULL, m); + lua_setfield(L, -2, "__index"); + } lua_pop(L, 1); // The mt. } int luaopen_upb(lua_State *L) { lupb_register_type(L, "upb.msgdef", lupb_msgdef_m, lupb_msgdef_mm); lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm); + lupb_register_type(L, "upb.fielddef", NULL, lupb_fielddef_mm); lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm); // Create our object cache. TODO: need to make this table weak! diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 949ce2d..74ef5c5 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -342,7 +342,7 @@ again: } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number); + upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, field_number); if (!f) { // Unknown field. If/when the upb_src interface supports reporting -- cgit v1.2.3 From 678799082b9775e601a09af9aa68e59fc1c64f6f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 16:23:52 -0700 Subject: Stream decoding benchmark. --- Makefile | 24 ++++++-- benchmarks/parsestream.upb_table.c | 113 +++++++++++++++++++++++++++++++++++++ core/upb_stream.h | 3 +- core/upb_string.c | 18 ++++++ stream/upb_byteio.h | 43 -------------- 5 files changed, 152 insertions(+), 49 deletions(-) create mode 100644 benchmarks/parsestream.upb_table.c delete mode 100644 stream/upb_byteio.h (limited to 'stream') diff --git a/Makefile b/Makefile index 749c5a7..203bed6 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c stream/upb_stdio.c stream/upb_textprinter.c \ + core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -154,10 +154,10 @@ tests/tests: core/libupb.a tools/upbc: core/libupb.a # Benchmarks -UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage1.upb_table_byref \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byref +#UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table \ +# benchmarks/b.parsetostruct_googlemessage2.upb_table +UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \ + benchmarks/b.parsestream_googlemessage2.upb_table BENCHMARKS=$(UPB_BENCHMARKS) \ benchmarks/b.parsetostruct_googlemessage1.proto2_table \ @@ -204,6 +204,20 @@ benchmarks/b.parsetostruct_googlemessage2.upb_table_byref: \ -DMESSAGE_FILE=\"google_message2.dat\" \ -DBYREF=true $(LIBUPB) +benchmarks/b.parsestream_googlemessage1.upb_table \ +benchmarks/b.parsestream_googlemessage2.upb_table: \ + benchmarks/parsestream.upb_table.c $(LIBUPB) benchmarks/google_messages.proto.pb + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage1.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message1.dat\" \ + $(LIBUPB) + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage2.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message2.dat\" \ + $(LIBUPB) + benchmarks/b.parsetostruct_googlemessage1.proto2_table \ benchmarks/b.parsetostruct_googlemessage2.proto2_table: \ benchmarks/parsetostruct.proto2_table.cc benchmarks/google_messages.pb.cc diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c new file mode 100644 index 0000000..c6acad9 --- /dev/null +++ b/benchmarks/parsestream.upb_table.c @@ -0,0 +1,113 @@ + +#include "main.c" + +#include "upb_def.h" +#include "upb_decoder.h" +#include "upb_strstream.h" + +static upb_stringsrc *stringsrc; +static upb_string *input_str; +static upb_string *tmp_str; +static upb_msgdef *def; +static upb_decoder *decoder; + +static bool initialize() +{ + // Initialize upb state, decode descriptor. + upb_status status = UPB_STATUS_INIT; + upb_symtab *s = upb_symtab_new(); + upb_symtab_add_descriptorproto(s); + upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds_str == NULL) { + fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"), + upb_printerr(&status); + return false; + } + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds_str); + upb_def *fds_def = upb_symtab_lookup( + s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds_def)); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_symtab_addfds(s, upb_decoder_src(d), &status); + + if(!upb_ok(&status)) { + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":"); + upb_printerr(&status); + return false; + } + + upb_string_unref(fds_str); + upb_decoder_free(d); + upb_stringsrc_free(ssrc); + upb_def_unref(fds_def); + + def = upb_downcast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME))); + if(!def) { + fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", + UPB_STRARG(UPB_STRLIT(MESSAGE_NAME))); + return false; + } + upb_symtab_unref(s); + + // Read the message data itself. + input_str = upb_strreadfile(MESSAGE_FILE); + if(input_str == NULL) { + fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); + return false; + } + tmp_str = NULL; + decoder = upb_decoder_new(def); + stringsrc = upb_stringsrc_new(); + return true; +} + +static void cleanup() +{ + upb_string_unref(input_str); + upb_string_unref(tmp_str); + upb_def_unref(UPB_UPCAST(def)); + upb_decoder_free(decoder); + upb_stringsrc_free(stringsrc); +} + +static size_t run(int i) +{ + (void)i; + upb_status status = UPB_STATUS_INIT; + upb_stringsrc_reset(stringsrc, input_str); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(stringsrc)); + upb_src *src = upb_decoder_src(decoder); + upb_fielddef *f; + upb_string *str = NULL; + int depth = 0; + while(1) { + while((f = upb_src_getdef(src)) != NULL) { + if(upb_issubmsg(f)) { + upb_src_startmsg(src); + ++depth; + } else if(upb_isstring(f)) { + tmp_str = upb_string_tryrecycle(str); + upb_src_getstr(src, tmp_str); + } else { + // Primitive type. + upb_value val; + upb_src_getval(src, upb_value_addrof(&val)); + } + } + // If we're not EOF now, the loop terminated due to an error. + if (!upb_src_eof(src)) goto err; + if (depth == 0) break; + --depth; + upb_src_endmsg(src); + } + if(!upb_ok(&status)) goto err; + return upb_string_len(input_str); + +err: + fprintf(stderr, "Decode error"); + upb_printerr(&status); + return 0; +} diff --git a/core/upb_stream.h b/core/upb_stream.h index b7400c5..861bd1c 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -128,7 +128,8 @@ bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. +// data in "str" with more data. The call fails if fewer than len bytes are +// available in the stream. bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. diff --git a/core/upb_string.c b/core/upb_string.c index 93686f5..847a3ee 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -131,3 +131,21 @@ upb_string *upb_strdup(upb_string *s) { upb_strcpy(str, s); return str; } + +upb_string *upb_strreadfile(const char *filename) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + if(fseek(f, 0, SEEK_END) != 0) goto error; + long size = ftell(f); + if(size < 0) goto error; + if(fseek(f, 0, SEEK_SET) != 0) goto error; + upb_string *s = upb_string_new(); + char *buf = upb_string_getrwbuf(s, size); + if(fread(buf, size, 1, f) != 1) goto error; + fclose(f); + return s; + +error: + fclose(f); + return NULL; +} diff --git a/stream/upb_byteio.h b/stream/upb_byteio.h deleted file mode 100644 index 69a28b3..0000000 --- a/stream/upb_byteio.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file contains upb_bytesrc and upb_bytesink implementations for common - * interfaces like strings, UNIX fds, and FILE*. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_BYTEIO_H -#define UPB_BYTEIO_H - -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_stringsrc **************************************************************/ - -struct upb_stringsrc; -typedef struct upb_stringsrc upb_stringsrc; - -// Create/free a stringsrc. -upb_stringsrc *upb_stringsrc_new(); -void upb_stringsrc_free(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// stringsrc will take a reference on the string, so the caller need not ensure -// that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); - -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); - - -/* upb_fdsrc ******************************************************************/ - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif -- cgit v1.2.3 From e30260bb0af98fa1d6d829fa9ad2fbd95d7dff95 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 16:25:57 -0700 Subject: upb_stringsrc: upb_bytesrc for strings. --- stream/upb_strstream.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 stream/upb_strstream.c (limited to 'stream') diff --git a/stream/upb_strstream.c b/stream/upb_strstream.c new file mode 100644 index 0000000..65f33d9 --- /dev/null +++ b/stream/upb_strstream.c @@ -0,0 +1,62 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_strstream.h" + +#include +#include "upb_string.h" + +struct upb_stringsrc { + upb_bytesrc bytesrc; + upb_string *str; +}; + +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { + if (str != s->str) { + if (s->str) upb_string_unref(s->str); + s->str = upb_string_getref(str); + } + s->bytesrc.eof = false; +} + +void upb_stringsrc_free(upb_stringsrc *s) { + if (s->str) upb_string_unref(s->str); + free(s); +} + +static bool upb_stringsrc_get(upb_stringsrc *src, upb_string *str, + upb_strlen_t minlen) { + // We ignore "minlen" since we always return the entire string. + (void)minlen; + upb_string_substr(str, src->str, 0, upb_string_len(src->str)); + src->bytesrc.eof = true; + return true; +} + +static bool upb_stringsrc_append(upb_stringsrc *src, upb_string *str, + upb_strlen_t len) { + // Unimplemented; since we return the string via "get" all in one go, + // this method probably isn't very useful. + (void)src; + (void)str; + (void)len; + return false; +} + +static upb_bytesrc_vtable upb_stringsrc_vtbl = { + (upb_bytesrc_get_fptr)upb_stringsrc_get, + (upb_bytesrc_append_fptr)upb_stringsrc_append, +}; + +upb_stringsrc *upb_stringsrc_new() { + upb_stringsrc *s = malloc(sizeof(*s)); + upb_bytesrc_init(&s->bytesrc, &upb_stringsrc_vtbl); + return s; +} + +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { + return &s->bytesrc; +} -- cgit v1.2.3 From 851c6a6915b55842809c8622d0fb941bc911be37 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 16:27:29 -0700 Subject: strstream header file also. --- stream/upb_strstream.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 stream/upb_strstream.h (limited to 'stream') diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h new file mode 100644 index 0000000..fa9bace --- /dev/null +++ b/stream/upb_strstream.h @@ -0,0 +1,61 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file contains upb_bytesrc and upb_bytesink implementations for + * upb_string. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_STRSTREAM_H +#define UPB_STRSTREAM_H + +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_stringsrc **************************************************************/ + +struct upb_stringsrc; +typedef struct upb_stringsrc upb_stringsrc; + +// Create/free a stringsrc. +upb_stringsrc *upb_stringsrc_new(); +void upb_stringsrc_free(upb_stringsrc *s); + +// Resets the stringsrc to a state where it will vend the given string. The +// stringsrc will take a reference on the string, so the caller need not ensure +// that it outlives the stringsrc. A stringsrc can be reset multiple times. +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); + +// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +upb_bytesrc *upb_stringsrc_bytesrc(); + + +/* upb_stringsink *************************************************************/ + +struct upb_stringsink; +typedef struct upb_stringsink upb_stringsink; + +// Create/free a stringsrc. +upb_stringsink *upb_stringsink_new(); +void upb_stringsink_free(upb_stringsink *s); + +// Gets a string containing the data that has been written to this stringsink. +// The caller does *not* own any references to this string. +upb_string *upb_stringsink_getstring(upb_stringsink *s); + +// Clears the internal string of accumulated data, resetting it to empty. +void upb_stringsink_reset(upb_stringsink *s); + +// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +upb_bytesink *upb_stringsrc_bytesink(); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3 From 372c8f0487a666c3fb36edc18accba0fba9a2680 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 17:01:45 -0700 Subject: Fixes to benchmark. --- benchmarks/parsestream.upb_table.c | 8 ++++++-- core/upb_def.c | 2 ++ stream/upb_decoder.c | 12 +++++++++--- stream/upb_strstream.c | 1 + 4 files changed, 18 insertions(+), 5 deletions(-) (limited to 'stream') diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c index c6acad9..1e18119 100644 --- a/benchmarks/parsestream.upb_table.c +++ b/benchmarks/parsestream.upb_table.c @@ -17,6 +17,12 @@ static bool initialize() upb_status status = UPB_STATUS_INIT; upb_symtab *s = upb_symtab_new(); upb_symtab_add_descriptorproto(s); + upb_def *fds_def = upb_symtab_lookup( + s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + if (!fds_def) { + fprintf(stderr, "Couldn't load FileDescriptorSet def"); + } + upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); if(fds_str == NULL) { fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"), @@ -26,8 +32,6 @@ static bool initialize() upb_stringsrc *ssrc = upb_stringsrc_new(); upb_stringsrc_reset(ssrc, fds_str); - upb_def *fds_def = upb_symtab_lookup( - s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds_def)); upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); diff --git a/core/upb_def.c b/core/upb_def.c index e40e1f0..e117455 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -384,6 +384,8 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) f->owned = true; break; } + default: + upb_src_skipval(src); } } CHECKSRC(upb_src_eof(src)); diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 74ef5c5..46cfb3f 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -318,7 +318,9 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d) } // Handles the packed field case. - if(d->field) return d->field; + if(d->field) { + return d->field; + } uint32_t key = 0; again: @@ -457,12 +459,15 @@ bool upb_decoder_startmsg(upb_decoder *d) { return false; } upb_decoder_frame *frame = d->top; - frame->msgdef = upb_downcast_msgdef(d->field->def); if(d->field->type == UPB_TYPE(GROUP)) { frame->end_offset = UPB_GROUP_END_OFFSET; - } else { + } else if (d->field->type == UPB_TYPE(MESSAGE)) { frame->end_offset = upb_decoder_offset(d) + d->delimited_len; + } else { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Tried to startmsg a non-msg field."); } + frame->msgdef = upb_downcast_msgdef(d->field->def); d->field = NULL; return true; } @@ -485,6 +490,7 @@ bool upb_decoder_endmsg(upb_decoder *d) { bool upb_decoder_skipval(upb_decoder *d) { upb_strlen_t bytes_to_skip; + d->field = NULL; switch(d->wire_type) { case UPB_WIRE_TYPE_VARINT: { return upb_decoder_skipv64(d); diff --git a/stream/upb_strstream.c b/stream/upb_strstream.c index 65f33d9..7ed761b 100644 --- a/stream/upb_strstream.c +++ b/stream/upb_strstream.c @@ -53,6 +53,7 @@ static upb_bytesrc_vtable upb_stringsrc_vtbl = { upb_stringsrc *upb_stringsrc_new() { upb_stringsrc *s = malloc(sizeof(*s)); + s->str = NULL; upb_bytesrc_init(&s->bytesrc, &upb_stringsrc_vtbl); return s; } -- cgit v1.2.3 From 2a617bf12c8e1f7f689e3767bf7e4582d76c4f39 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 18:18:09 -0700 Subject: Optimizations and bugfix to benchmark. --- benchmarks/parsestream.upb_table.c | 5 ++--- stream/upb_decoder.c | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'stream') diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c index 1e18119..16979b0 100644 --- a/benchmarks/parsestream.upb_table.c +++ b/benchmarks/parsestream.upb_table.c @@ -85,15 +85,14 @@ static size_t run(int i) upb_decoder_reset(decoder, upb_stringsrc_bytesrc(stringsrc)); upb_src *src = upb_decoder_src(decoder); upb_fielddef *f; - upb_string *str = NULL; int depth = 0; while(1) { - while((f = upb_src_getdef(src)) != NULL) { + while(!upb_src_eof(src) && (f = upb_src_getdef(src)) != NULL) { if(upb_issubmsg(f)) { upb_src_startmsg(src); ++depth; } else if(upb_isstring(f)) { - tmp_str = upb_string_tryrecycle(str); + tmp_str = upb_string_tryrecycle(tmp_str); upb_src_getstr(src, tmp_str); } else { // Primitive type. diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 46cfb3f..7591f78 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -177,6 +177,12 @@ static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset); } assert(d->buf_bytesleft >= 0); + + // Detect end-of-submessage. + if(upb_decoder_offset(d) >= d->top->end_offset) { + d->src.eof = true; + } + return true; } @@ -187,6 +193,12 @@ static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) while(d->buf_bytesleft < 0) { if(!upb_decoder_nextbuf(d)) return false; } + + // Detect end-of-submessage. + if(upb_decoder_offset(d) >= d->top->end_offset) { + d->src.eof = true; + } + return true; } @@ -311,12 +323,7 @@ bool upb_decoder_skipval(upb_decoder *d); upb_fielddef *upb_decoder_getdef(upb_decoder *d) { - // Detect end-of-submessage. - if(upb_decoder_offset(d) >= d->top->end_offset) { - d->src.eof = true; - return NULL; - } - + if (d->src.eof) return NULL; // Handles the packed field case. if(d->field) { return d->field; @@ -481,7 +488,8 @@ bool upb_decoder_endmsg(upb_decoder *d) { else upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); } - d->src.eof = false; + // Detect end-of-submessage. + d->src.eof = upb_decoder_offset(d) >= d->top->end_offset; return true; } else { return false; @@ -571,6 +579,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) // indefinitely), so we set the end offset as high as possible, but not equal // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. d->top->end_offset = UINT32_MAX - 1; + d->src.eof = false; d->bytesrc = bytesrc; d->field = NULL; d->buf = NULL; -- cgit v1.2.3 From a9e998159c5ac8c4f2644b5ed0eda2e8ff1f8706 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 2 Aug 2010 10:25:24 -0700 Subject: Fleshed out upb_msg: test_vs_proto2 compiles but fails. --- Makefile | 10 ++-- core/upb.h | 98 ++++++++++++++++++++++++++++++++++---- core/upb_atomic.h | 4 ++ core/upb_def.c | 65 ++++++++++++++++++++++++- core/upb_def.h | 28 +++++++++-- core/upb_msg.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ core/upb_msg.h | 114 ++++++++++++++++++++++++++++++++++++++++---- stream/upb_decoder.c | 8 ++-- stream/upb_strstream.h | 2 +- tests/test_vs_proto2.cc | 54 ++++++++++++--------- 10 files changed, 452 insertions(+), 54 deletions(-) create mode 100644 core/upb_msg.c (limited to 'stream') diff --git a/Makefile b/Makefile index 203bed6..131b3c0 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ + core/upb_msg.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -101,14 +102,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ tests/test_def \ - tests/test_decoder -tests: $(TESTS) - -OTHER_TESTS=tests/tests \ - tests/test_table \ + tests/test_decoder \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ $(TESTS): core/libupb.a VALGRIND=valgrind --leak-check=full --error-exitcode=1 diff --git a/core/upb.h b/core/upb.h index b605fd9..7ee0469 100644 --- a/core/upb.h +++ b/core/upb.h @@ -80,24 +80,16 @@ enum upb_wire_type { typedef uint8_t upb_wire_type_t; -// Value type as defined in a .proto file. eg. string, int32, etc. The +// Type of a field as defined in a .proto file. eg. string, int32, etc. The // integers that represent this are defined by descriptor.proto. Note that // descriptor.proto reserves "0" for errors, and we use it to represent // exceptional circumstances. -typedef uint8_t upb_field_type_t; +typedef uint8_t upb_fieldtype_t; // For referencing the type constants tersely. #define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type #define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type -INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} - -INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} - // Info for a given field type. typedef struct { uint8_t align; @@ -129,6 +121,10 @@ typedef union { struct _upb_string; typedef struct _upb_string upb_string; +struct _upb_array; +typedef struct _upb_array upb_array; +struct _upb_msg; +typedef struct _upb_msg upb_msg; typedef uint32_t upb_strlen_t; @@ -142,6 +138,11 @@ typedef union { uint32_t uint32; uint64_t uint64; bool _bool; + upb_string *str; + upb_msg *msg; + upb_array *arr; + upb_atomic_refcount_t *refcount; + void *_void; } upb_value; // A pointer to a .proto value. The owner must have an out-of-band way of @@ -155,13 +156,90 @@ typedef union { uint32_t *uint32; uint64_t *uint64; bool *_bool; + upb_string **str; + upb_msg **msg; + upb_array **arr; + void *_void; } upb_valueptr; +// The type of a upb_value. This is like a upb_fieldtype_t, but adds the +// constant UPB_VALUETYPE_ARRAY to represent an array. +typedef uint8_t upb_valuetype_t; +#define UPB_VALUETYPE_ARRAY 32 + INLINE upb_valueptr upb_value_addrof(upb_value *val) { upb_valueptr ptr = {&val->_double}; return ptr; } +// Converts upb_value_ptr -> upb_value by reading from the pointer. We need to +// know the value type to perform this operation, because we need to know how +// much memory to copy. +INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { + upb_value val; + +#define CASE(t, member_name) \ + case UPB_TYPE(t): val.member_name = *ptr.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + return val; + +#undef CASE +} + +// Writes a upb_value to a upb_value_ptr location. We need to know the value +// type to perform this operation, because we need to know how much memory to +// copy. +INLINE void upb_value_write(upb_valueptr ptr, upb_value val, + upb_fieldtype_t ft) { +#define CASE(t, member_name) \ + case UPB_TYPE(t): *ptr.member_name = val.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + +#undef CASE +} + // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { diff --git a/core/upb_atomic.h b/core/upb_atomic.h index 01fc8a2..1cd848b 100644 --- a/core/upb_atomic.h +++ b/core/upb_atomic.h @@ -127,6 +127,10 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { Implement them or compile with UPB_THREAD_UNSAFE. #endif +INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) { + return upb_atomic_read(a) == 1; +} + /* Reader/Writer lock. ********************************************************/ #ifdef UPB_THREAD_UNSAFE diff --git a/core/upb_def.c b/core/upb_def.c index e117455..1c8fbdc 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -12,6 +12,16 @@ #define CHECKSRC(x) if(!(x)) goto src_err #define CHECK(x) if(!(x)) goto err +/* Rounds p up to the next multiple of t. */ +static size_t upb_align_up(size_t val, size_t align) { + return val % align == 0 ? val : val + align - (val % align); +} + +static int upb_div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} + // A little dynamic array for storing a growing list of upb_defs. typedef struct { upb_def **defs; @@ -409,6 +419,19 @@ src_err: /* upb_msgdef *****************************************************************/ +static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) { + // Sort by data size (ascending) to reduce padding. + size_t size1 = upb_types[f1->type].size; + size_t size2 = upb_types[f2->type].size; + if (size1 != size2) return size1 - size2; + // Otherwise return in number order (just so we get a reproduceable order. + return f1->number - f2->number; +} + +static int upb_compare_fields(const void *f1, const void *f2) { + return upb_compare_typed_fields(*(void**)f1, *(void**)f2); +} + // Processes a google.protobuf.DescriptorProto, adding defs to "defs." static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) { @@ -418,7 +441,6 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); int32_t start_count = defs->len; - upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { @@ -451,6 +473,45 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); goto err; } + + + // Create an ordering over the fields. + upb_field_count_t n = upb_msgdef_numfields(m); + upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); + upb_field_count_t field = 0; + upb_msg_iter i; + for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + sorted_fields[field++]= upb_msg_iter_field(i); + } + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); + + // Assign offsets in the msg. + m->set_flags_bytes = upb_div_round_up(n, 8); + m->size = sizeof(upb_atomic_refcount_t) + m->set_flags_bytes; + + size_t max_align = 0; + for (int i = 0; i < n; i++) { + upb_fielddef *f = sorted_fields[i]; + upb_type_info *type_info = &upb_types[f->type]; + + // This identifies the set bit. When we implement is_initialized (a + // general check about whether all required bits are set) we will probably + // want to use a different ordering that puts all the required bits + // together. + f->field_index = i; + + // General alignment rules are: each member must be at an address that is a + // multiple of that type's alignment. Also, the size of the structure as a + // whole must be a multiple of the greatest alignment of any member. + size_t offset = upb_align_up(m->size, type_info->align); + // Offsets are relative to the end of the refcount. + f->byte_offset = offset - sizeof(upb_atomic_refcount_t); + m->size = offset + type_info->size; + max_align = UPB_MAX(max_align, type_info->align); + } + + if (max_align > 0) m->size = upb_align_up(m->size, max_align); + upb_deflist_qualify(defs, m->base.fqname, start_count); upb_deflist_push(defs, UPB_UPCAST(m)); return true; @@ -664,7 +725,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, } // Check the type of the found def. - upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; if(found->def->type != expected) { upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); return false; diff --git a/core/upb_def.h b/core/upb_def.h index 3294a8d..9eb961a 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -103,7 +103,7 @@ typedef struct _upb_fielddef { upb_field_count_t field_index; // Indicates set bit. upb_field_number_t number; - upb_field_type_t type; + upb_fieldtype_t type; upb_label_t label; // True if we own a ref on "def" (above). This is true unless this edge is // part of a cycle. @@ -112,10 +112,10 @@ typedef struct _upb_fielddef { // A variety of tests about the type of a field. INLINE bool upb_issubmsg(upb_fielddef *f) { - return upb_issubmsgtype(f->type); + return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE); } INLINE bool upb_isstring(upb_fielddef *f) { - return upb_isstringtype(f->type); + return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES); } INLINE bool upb_isarray(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); @@ -125,6 +125,19 @@ INLINE bool upb_hasdef(upb_fielddef *f) { return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } +INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) { + if (upb_isarray(f)) { + return UPB_VALUETYPE_ARRAY; + } else { + return f->type; + } +} + +INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) { + assert(upb_isarray(f)); + return f->type; +} + INLINE bool upb_field_ismm(upb_fielddef *f) { return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); } @@ -139,6 +152,8 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; + uint32_t size; + uint32_t set_flags_bytes; // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -169,9 +184,14 @@ INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) { + return upb_strtable_count(&m->ntof); +} + // Iteration over fields. The order is undefined. // upb_msg_iter i; -// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { +// upb_fielddef *f = upb_msg_iter_field(i); // // ... // } typedef upb_itof_ent *upb_msg_iter; diff --git a/core/upb_msg.c b/core/upb_msg.c new file mode 100644 index 0000000..75f7a35 --- /dev/null +++ b/core/upb_msg.c @@ -0,0 +1,123 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * Data structure for storing a message of protobuf data. + */ + +#include "upb_msg.h" + +void _upb_elem_free(upb_value v, upb_fielddef *f) { + switch(f->type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + _upb_msg_free(v.msg, upb_downcast_msgdef(f->def)); + break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + _upb_string_free(v.str); + break; + default: + abort(); + } +} + +void _upb_field_free(upb_value v, upb_fielddef *f) { + if (upb_isarray(f)) { + _upb_array_free(v.arr, f); + } else { + _upb_elem_free(v, f); + } +} + +upb_msg *upb_msg_new(upb_msgdef *md) { + upb_msg *msg = malloc(md->size); + // Clear all set bits and cached pointers. + memset(msg, 0, md->size); + upb_atomic_refcount_init(&msg->refcount, 1); + return msg; +} + +void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { + // Need to release refs on all sub-objects. + upb_msg_iter i; + for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) _upb_field_unref(upb_value_read(p, type), f); + } + free(msg); +} + +upb_array *upb_array_new(void) { + upb_array *arr = malloc(sizeof(*arr)); + upb_atomic_refcount_init(&arr->refcount, 1); + arr->size = 0; + arr->len = 0; + arr->elements._void = NULL; + return arr; +} + +void _upb_array_free(upb_array *arr, upb_fielddef *f) { + if (upb_elem_ismm(f)) { + // Need to release refs on sub-objects. + upb_valuetype_t type = upb_elem_valuetype(f); + for (upb_arraylen_t i = 0; i < arr->size; i++) { + upb_valueptr p = _upb_array_getptr(arr, f, i); + _upb_elem_unref(upb_value_read(p, type), f); + } + } + if (arr->elements._void) free(arr->elements._void); + free(arr); +} + +upb_value upb_field_new(upb_fielddef *f, upb_valuetype_t type) { + upb_value v; + switch(type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + v.msg = upb_msg_new(upb_downcast_msgdef(f->def)); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + v.str = upb_string_new(); + case UPB_VALUETYPE_ARRAY: + v.arr = upb_array_new(); + default: + abort(); + } + return v; +} + +static void upb_field_recycle(upb_value val) { + (void)val; +} + +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, + upb_valuetype_t type) { + if (val._void == NULL || !upb_atomic_only(val.refcount)) { + if (val._void != NULL) upb_atomic_unref(val.refcount); + val = upb_field_new(f, type); + upb_value_write(p, val, type); + } else { + upb_field_recycle(val); + } + return val; +} + +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} + +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} diff --git a/core/upb_msg.h b/core/upb_msg.h index 5215bd9..2db67c0 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -9,14 +9,39 @@ #ifndef UPB_MSG_H #define UPB_MSG_H +#include "upb.h" +#include "upb_def.h" +#include + #ifdef __cplusplus extern "C" { #endif -typedef struct { +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value v, upb_fielddef *f, + upb_valuetype_t type); + +INLINE void _upb_value_ref(upb_value v) { upb_atomic_ref(v.refcount); } + +void _upb_field_free(upb_value v, upb_fielddef *f); +void _upb_elem_free(upb_value v, upb_fielddef *f); +INLINE void _upb_field_unref(upb_value v, upb_fielddef *f) { + assert(upb_field_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_field_free(v, f); +} +INLINE void _upb_elem_unref(upb_value v, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_elem_free(v, f); +} + +/* upb_array ******************************************************************/ + +typedef uint32_t upb_arraylen_t; +struct _upb_array { upb_atomic_refcount_t refcount; - uint32_t len; - uint32_t size; + upb_arraylen_t len; + upb_arraylen_t size; upb_valueptr elements; }; @@ -31,29 +56,70 @@ INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); } +INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, + uint32_t elem) { + upb_valueptr p; + p._void = &a->elements.uint8[elem * upb_types[f->type].size]; + return p; +} + INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) { assert(elem < upb_array_len(a)); return upb_value_read(_upb_array_getptr(a, f, elem), f->type); } // For string or submessages, will release a ref on the previously set value. +// and take a ref on the new value. The array must already be at least "elem" +// long; to append use append_mutable. INLINE void upb_array_set(upb_array *a, upb_fielddef *f, uint32_t elem, upb_value val) { + assert(elem < upb_array_len(a)); + upb_valueptr p = _upb_array_getptr(a, f, elem); + if (upb_elem_ismm(f)) { + _upb_elem_unref(upb_value_read(p, f->type), f); + _upb_value_ref(val); + } + upb_value_write(p, val, f->type); } -// Append an element with the default value, returning it. For strings or -// submessages, this will try to reuse previously allocated memory. -INLINE upb_value upb_array_append_mutable(upb_array *a, upb_fielddef *f) { +INLINE void upb_array_resize(upb_array *a, upb_fielddef *f) { + if (a->len == a->size) { + a->len *= 2; + a->elements._void = realloc(a->elements._void, + a->len * upb_types[f->type].size); + } } -typedef struct { +// Append an element to an array of string or submsg with the default value, +// returning it. This will try to reuse previously allocated memory. +INLINE upb_value upb_array_appendmutable(upb_array *a, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + upb_array_resize(a, f); + upb_valueptr p = _upb_array_getptr(a, f, a->len++); + upb_valuetype_t type = upb_elem_valuetype(f); + upb_value val = upb_value_read(p, type); + val = upb_field_tryrecycle(p, val, f, type); + return val; +} + + +/* upb_msg ********************************************************************/ + +struct _upb_msg { upb_atomic_refcount_t refcount; uint8_t data[4]; // We allocate the appropriate amount per message. -} upb_msg; +}; // Creates a new msg of the given type. upb_msg *upb_msg_new(upb_msgdef *md); +// Returns a pointer to the given field. +INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { + upb_valueptr p; + p._void = &msg->data[f->byte_offset]; + return p; +} + void _upb_msg_free(upb_msg *msg, upb_msgdef *md); INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { if (upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); @@ -65,6 +131,10 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; } +INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { + msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); +} + // Returns the current value of the given field if set, or the default value if // not set. INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { @@ -79,12 +149,29 @@ INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { // Otherwise sets it and returns an empty instance, attempting to reuse any // previously allocated memory. INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f) { + assert(upb_field_ismm(f)); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + upb_value val = upb_value_read(p, type); + if (!upb_msg_has(msg, f)) { + upb_msg_sethas(msg, f); + val = upb_field_tryrecycle(p, val, f, type); + } + return val; } // Sets the current value of the field. If this is a string, array, or // submessage field, releases a ref on the value (if any) that was previously // set. INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) { + _upb_field_unref(upb_value_read(p, type), f); + _upb_value_ref(val); + } + upb_msg_sethas(msg, f); + upb_value_write(p, val, upb_field_valuetype(f)); } // Unsets all field values back to their defaults. @@ -92,6 +179,17 @@ INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } +// A convenience function for decoding an entire protobuf all at once, without +// having to worry about setting up the appropriate objects. +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + +// A convenience function for encoding an entire protobuf all at once. If an +// error occurs, the null string is returned and the status object contains +// the error. +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 7591f78..c35212e 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -14,8 +14,10 @@ // Returns true if the give wire type and field type combination is valid, // taking into account both packed and non-packed encodings. -static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - return (1 << wt) & upb_types[ft].allowed_wire_types; +static bool upb_check_type(upb_wire_type_t wt, upb_fielddef *f) { + // TODO: need to take into account the label; only repeated fields are + // allowed to use packed encoding. + return (1 << wt) & upb_types[f->type].allowed_wire_types; } // Performs zig-zag decoding, which is used by sint32 and sint64. @@ -358,7 +360,7 @@ again: // unknown fields we will implement that here. upb_decoder_skipval(d); goto again; - } else if (!upb_check_type(wire_type, f->type)) { + } else if (!upb_check_type(wire_type, f)) { // This is a recoverable error condition. We skip the value but also // return NULL and report the error. upb_decoder_skipval(d); diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h index fa9bace..d01d21f 100644 --- a/stream/upb_strstream.h +++ b/stream/upb_strstream.h @@ -31,7 +31,7 @@ void upb_stringsrc_free(upb_stringsrc *s); void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); // Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); /* upb_stringsink *************************************************************/ diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 9083788..9446b8f 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -4,9 +4,10 @@ #include #include #include -#include "upb_data.h" +#include "upb_msg.h" #include "upb_def.h" #include "upb_decoder.h" +#include "upb_strstream.h" int num_assertions = 0; #define ASSERT(expr) do { \ @@ -25,7 +26,7 @@ void compare_arrays(const google::protobuf::Reflection *r, upb_msg *upb_msg, upb_fielddef *upb_f) { ASSERT(upb_msg_has(upb_msg, upb_f)); - upb_arrayptr arr = upb_msg_get(upb_msg, upb_f).arr; + upb_array *arr = upb_msg_get(upb_msg, upb_f).arr; ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f)); for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) { upb_value v = upb_array_get(arr, upb_f, i); @@ -63,7 +64,7 @@ void compare_arrays(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -116,7 +117,7 @@ void compare_values(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetString(proto2_msg, proto2_f); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -133,9 +134,10 @@ void compare(const google::protobuf::Message& proto2_msg, const google::protobuf::Reflection *r = proto2_msg.GetReflection(); const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor(); - ASSERT((upb_field_count_t)d->field_count() == upb_md->num_fields); - for(upb_field_count_t i = 0; i < upb_md->num_fields; i++) { - upb_fielddef *upb_f = &upb_md->fields[i]; + ASSERT((upb_field_count_t)d->field_count() == upb_msgdef_numfields(upb_md)); + upb_msg_iter i; + for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) { + upb_fielddef *upb_f = upb_msg_iter_field(i); const google::protobuf::FieldDescriptor *proto2_f = d->FindFieldByNumber(upb_f->number); // Make sure the definitions are equal. @@ -143,7 +145,7 @@ void compare(const google::protobuf::Message& proto2_msg, ASSERT(proto2_f); ASSERT(upb_f->number == proto2_f->number()); ASSERT(std::string(upb_string_getrobuf(upb_f->name), - upb_strlen(upb_f->name)) == + upb_string_len(upb_f->name)) == proto2_f->name()); ASSERT(upb_f->type == proto2_f->type()); ASSERT(upb_isarray(upb_f) == proto2_f->is_repeated()); @@ -166,10 +168,10 @@ void compare(const google::protobuf::Message& proto2_msg, void parse_and_compare(MESSAGE_CIDENT *proto2_msg, upb_msg *upb_msg, upb_msgdef *upb_md, - upb_strptr str) + upb_string *str) { // Parse to both proto2 and upb. - ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_strlen(str))); + ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str))); upb_status status = UPB_STATUS_INIT; upb_msg_decodestr(upb_msg, upb_md, str, &status); ASSERT(upb_ok(&status)); @@ -194,22 +196,32 @@ int main(int argc, char *argv[]) // Initialize upb state, parse descriptor. upb_status status = UPB_STATUS_INIT; - upb_symtab *c = upb_symtab_new(); - upb_strptr fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); - if(upb_string_isnull(fds)) { + upb_symtab *symtab = upb_symtab_new(); + upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds == NULL) { fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n"); return 1; } - upb_symtab_add_desc(c, fds, &status); + upb_symtab_add_descriptorproto(symtab); + upb_def *fds_msgdef = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds); + upb_decoder *decoder = upb_decoder_new(upb_downcast_msgdef(fds_msgdef)); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(ssrc)); + upb_symtab_addfds(symtab, upb_decoder_src(decoder), &status); if(!upb_ok(&status)) { - fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": %s.\n", - status.msg); + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": "); + upb_printerr(&status); return 1; } upb_string_unref(fds); + upb_decoder_free(decoder); + upb_stringsrc_free(ssrc); - upb_strptr proto_name = upb_strdupc(MESSAGE_NAME); - upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(c, proto_name)); + upb_string *proto_name = upb_strdupc(MESSAGE_NAME); + upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(symtab, proto_name)); if(!def) { fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", UPB_STRARG(proto_name)); @@ -218,8 +230,8 @@ int main(int argc, char *argv[]) upb_string_unref(proto_name); // Read the message data itself. - upb_strptr str = upb_strreadfile(MESSAGE_FILE); - if(upb_string_isnull(str)) { + upb_string *str = upb_strreadfile(MESSAGE_FILE); + if(str == NULL) { fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return 1; } @@ -234,7 +246,7 @@ int main(int argc, char *argv[]) upb_msg_unref(upb_msg, def); upb_def_unref(UPB_UPCAST(def)); upb_string_unref(str); - upb_symtab_unref(c); + upb_symtab_unref(symtab); return 0; } -- cgit v1.2.3 From 5511aa16b02fd5fc1688b87f06ee09f4c8649f06 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 22 Jan 2011 23:31:33 -0800 Subject: Begin porting old decoder to new interfaces. Doesn't build yet. --- stream/upb_decoder.c | 815 +++++++++++++++++++-------------------------------- 1 file changed, 306 insertions(+), 509 deletions(-) (limited to 'stream') diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index c35212e..b820b08 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -9,587 +9,384 @@ #include #include #include +#include "upb_def.h" -#define UPB_GROUP_END_OFFSET UINT32_MAX - -// Returns true if the give wire type and field type combination is valid, -// taking into account both packed and non-packed encodings. -static bool upb_check_type(upb_wire_type_t wt, upb_fielddef *f) { - // TODO: need to take into account the label; only repeated fields are - // allowed to use packed encoding. - return (1 << wt) & upb_types[f->type].allowed_wire_types; -} - -// Performs zig-zag decoding, which is used by sint32 and sint64. -static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } - - -/* upb_decoder ****************************************************************/ - -// The decoder keeps a stack with one entry per level of recursion. -// upb_decoder_frame is one frame of that stack. -typedef struct { - upb_msgdef *msgdef; - upb_strlen_t end_offset; // For groups, UPB_GROUP_END_OFFSET. -} upb_decoder_frame; - -struct upb_decoder { - upb_src src; // upb_decoder is a upb_src. - - upb_msgdef *toplevel_msgdef; - upb_bytesrc *bytesrc; - - // The buffer of input data. NULL is equivalent to the empty string. - upb_string *buf; - - // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain. - uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; - - // The number of bytes we have yet to consume from "buf" or tmpbuf. This is - // always >= 0 unless we were just reset or are eof. - int32_t buf_bytesleft; - - // The offset within "buf" from where we are currently reading. This can be - // <0 if we are reading some residual bytes from the previous buffer, which - // are stored in tmpbuf and combined with bytes from "buf". - int32_t buf_offset; - - // The overall stream offset of the beginning of "buf". - uint32_t buf_stream_offset; - - // Wire type of the key we just read. - upb_wire_type_t wire_type; - - // Delimited length of the string field we are reading. - upb_strlen_t delimited_len; - - upb_strlen_t packed_end_offset; - - // Fielddef for the key we just read. - upb_fielddef *field; - - // We keep a stack of messages we have recursed into. - upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; -}; +/* Functions to read wire values. *********************************************/ +// These functions are internal to the decode, but might be moved into an +// internal header file if we at some point in the future opt to do code +// generation, because the generated code would want to inline these functions. +// The same applies to the functions to read .proto values below. -/* upb_decoder buffering. *****************************************************/ +const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, + uint64_t *val, upb_status *status); -static upb_strlen_t upb_decoder_offset(upb_decoder *d) +// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). +INLINE const uint8_t *upb_get_v_uint64_t(const uint8_t *buf, const uint8_t *end, + uint64_t *val, upb_status *status) { - return d->buf_stream_offset + d->buf_offset; -} - -static bool upb_decoder_nextbuf(upb_decoder *d) -{ - assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE); - - // Copy residual bytes to temporary buffer. - if(d->buf_bytesleft > 0) { - memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset, - d->buf_bytesleft); - } - - // Recycle old buffer. - if(d->buf) { - d->buf_offset -= upb_string_len(d->buf); - d->buf_stream_offset += upb_string_len(d->buf); - } - d->buf = upb_string_tryrecycle(d->buf); - - // Pull next buffer. - if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { - d->buf_bytesleft += upb_string_len(d->buf); - return true; + // We inline this common case (1-byte varints), if that fails we dispatch to + // the full (non-inlined) version. + if((*buf & 0x80) == 0) { + *val = *buf & 0x7f; + return buf + 1; } else { - return false; + return upb_get_v_uint64_t_full(buf, end, val, status); } } -static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) +// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit +// varint is not a true wire type. +INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end, + uint32_t *val, upb_status *status) { - if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE && !upb_bytesrc_eof(d->bytesrc)) - upb_decoder_nextbuf(d); - - if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { - if(upb_bytesrc_eof(d->bytesrc) && d->buf_bytesleft > 0) { - // We're working through the last few bytes of the buffer. - } else if(upb_bytesrc_eof(d->bytesrc)) { - // End of stream, no more bytes left. - assert(d->buf_bytesleft == 0); - d->src.eof = true; - return NULL; - } else { - // We are short of bytes even though the bytesrc isn't EOF; must be error. - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return NULL; - } - } - - if(d->buf_offset >= 0) { - // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE - // contiguous bytes, so we can read directly out of it. - *bytes = d->buf_bytesleft; - return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; - } else { - // We need to accumulate UPB_MAX_ENCODED_SIZE bytes; len is how many we - // have so far. - upb_strlen_t len = -d->buf_offset; - if(d->buf) { - upb_strlen_t to_copy = - UPB_MIN(UPB_MAX_ENCODED_SIZE - len, upb_string_len(d->buf)); - memcpy(d->tmpbuf + len, upb_string_getrobuf(d->buf), to_copy); - len += to_copy; - } - // Pad the buffer out to UPB_MAX_ENCODED_SIZE. - memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len); - *bytes = len; - return d->tmpbuf; - } + uint64_t val64; + const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); + *val = (uint32_t)val64; // Discard the high bits. + return ret; } -// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE -// bytes long. This buffer contains the next bytes in the stream (even if -// those bytes span multiple buffers). *bytes is set to the number of actual -// stream bytes that are available in the returned buffer. If -// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes. -// -// After the data has been read, upb_decoder_consume() should be called to -// indicate how many bytes were consumed. -static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) +// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). +INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, + uint32_t *val, upb_status *status) { - if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) { - // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE - // contiguous bytes, so we can read directly out of it. - *bytes = d->buf_bytesleft; - return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; - } else { - return upb_decoder_getbuf_full(d, bytes); + const uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; } + memcpy(val, buf, sizeof(uint32_t)); + return uint32_end; } -static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) +// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). +INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, + uint64_t *val, upb_status *status) { - assert(bytes <= UPB_MAX_ENCODED_SIZE); - d->buf_offset += bytes; - d->buf_bytesleft -= bytes; - if(d->buf_offset < 0) { - // We still have residual bytes we have not consumed. - memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset); - } - assert(d->buf_bytesleft >= 0); - - // Detect end-of-submessage. - if(upb_decoder_offset(d) >= d->top->end_offset) { - d->src.eof = true; + const uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; } - - return true; + memcpy(val, buf, sizeof(uint64_t)); + return uint64_end; } -static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) +INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, + const uint8_t *end, + upb_status *status) { - d->buf_offset += bytes; - d->buf_bytesleft -= bytes; - while(d->buf_bytesleft < 0) { - if(!upb_decoder_nextbuf(d)) return false; - } - - // Detect end-of-submessage. - if(upb_decoder_offset(d) >= d->top->end_offset) { - d->src.eof = true; + const uint8_t *const maxend = buf + 10; + uint8_t last = 0x80; + for(; buf < (uint8_t*)end && (last & 0x80); buf++) + last = *buf; + + if(buf >= end && buf <= maxend && (last & 0x80)) { + status->code = UPB_STATUS_NEED_MORE_DATA; + buf = end; + } else if(buf > maxend) { + status->code = UPB_ERROR_UNTERMINATED_VARINT; + buf = end; } - - return true; + return buf; } - -/* Functions to read wire values. *********************************************/ - -// Parses remining bytes of a 64-bit varint that has already had its first byte -// parsed. -INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high) +INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf, + const uint8_t *end, + upb_status *status) { - upb_strlen_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - const uint8_t *start = buf; - if(!buf) return false; - - *high = 0; - uint32_t b; - b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; - - if(bytes_available >= 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " - "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); - } else { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle " - "of a varint, stream offset: %u", upb_decoder_offset(d)); + const uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; } - return false; - -done: - return upb_decoder_consume(d, buf - start); + return uint32_end; } -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val) +INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf, + const uint8_t *end, + upb_status *status) { - uint32_t high; - if(!upb_decoder_readv64(d, val, &high)) return false; - - // We expect the high bits to be zero, except that signed 32-bit values are - // first sign-extended to be wire-compatible with 64 bits, in which case we - // expect the high bits to be all one. - // - // We could perform a slightly more sophisticated check by having the caller - // indicate whether a signed or unsigned value is being read. We could check - // that the high bits are all zeros for unsigned, and properly sign-extended - // for signed. - if(high != 0 && ~high != 0) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but " - "the high bits contained data we should not truncate: " - "%ux, stream offset: %u", high, upb_decoder_offset(d)); - return false; + const uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; } - return true; + return uint64_end; } -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). Caller -// promises that 4 bytes are available at buf. -static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val) -{ - upb_strlen_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - if(!buf) return false; - if(bytes_available < 4) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Stream ended in the middle of a 32-bit value"); - return false; - } - memcpy(val, buf, 4); - // TODO: byte swap if big-endian. - return upb_decoder_consume(d, 4); -} +/* Functions to read .proto values. *******************************************/ -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller -// promises that 8 bytes are available at buf. -static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val) -{ - upb_strlen_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - if(!buf) return false; - if(bytes_available < 8) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Stream ended in the middle of a 64-bit value"); - return false; - } - memcpy(val, buf, 8); - // TODO: byte swap if big-endian. - return upb_decoder_consume(d, 8); -} +// Performs zig-zag decoding, which is used by sint32 and sint64. +INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -// Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing -// it to be easily skipped. Caller promises that 10 bytes are available at -// "buf". The function will return a maximum of 11 bytes before quitting. -static uint8_t upb_decoder_skipv64(upb_decoder *d) +// Parses a tag, places the result in *tag. +INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, + upb_tag *tag, upb_status *status) { - uint32_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - if(!buf) return false; - uint8_t i; - for(i = 0; i < 10 && buf[i] & 0x80; i++) - ; // empty loop body. - if(i > 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint."); - return false; - } - return upb_decoder_consume(d, i); + uint32_t tag_int; + const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return ret; } +// The decoder keeps a stack with one entry per level of recursion. +// upb_decoder_frame is one frame of that stack. +typedef struct { + upb_msgdef *msgdef; + upb_fielddef *field; + size_t end_offset; // For groups, 0. +} upb_decoder_frame; -/* upb_src implementation for upb_decoder. ************************************/ +struct upb_decoder { + // Immutable state of the decoder. + upb_src src; + upb_dispatcher dispatcher; + upb_msgdef *toplevel_msgdef; + upb_decoder_frame stack[UPB_MAX_NESTING]; -bool upb_decoder_skipval(upb_decoder *d); + // Mutable state of the decoder. -upb_fielddef *upb_decoder_getdef(upb_decoder *d) -{ - if (d->src.eof) return NULL; - // Handles the packed field case. - if(d->field) { - return d->field; - } + // Where we will store any errors that occur. + upb_status *status; + + // Stack entries store the offset where the submsg ends (for groups, 0). + upb_decoder_frame *top, *limit; + + // Current input buffer. + upb_string *buf; + + // The offset within the overall stream represented by the *beginning* of buf. + upb_strlen_t buf_stream_offset; + + // Our current offset *within* buf. Will be negative if we are buffering + // from previous buffers in tmpbuf. + upb_strlen_t buf_offset; + + // Holds any bytes we have from previous buffers. The number of bytes we + // have encoded here is -buf_offset, if buf_offset<0, 0 otherwise. + uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; +}; + +upb_flow_t upb_decode_varint(upb_decoder *d, ptrs *p, + uint32_t *low, uint32_t *high) { + if (p->end - p->ptr > UPB_MAX_ENCODED_SIZE) { + // Fast path; we know we have a complete varint in our existing buffer. + *high = 0; + uint32_t b; + uint8_t *ptr = p->ptr; + b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + if(bytes_available >= 10) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " + "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); + return false; + } - uint32_t key = 0; -again: - if(!upb_decoder_readv32(d, &key)) return NULL; - upb_wire_type_t wire_type = key & 0x7; - int32_t field_number = key >> 3; - - if(wire_type == UPB_WIRE_TYPE_DELIMITED) { - // For delimited wire values we parse the length now, since we need it in - // all cases. - if(!upb_decoder_readv32(d, &d->delimited_len)) return NULL; - } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) { - if(d->top->end_offset == UPB_GROUP_END_OFFSET) { - d->src.eof = true; + done: + p->ptr = ptr; + } else { + // Slow path: we may have to combine one or more buffers to get a whole + // varint worth of data. + uint8_t buf[UPB_MAX_ENCODED_SIZE]; + uint8_t *p = buf, *end = buf + sizeof(buf); + for(ing bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) + *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; + + if(d->status->code == UPB_EOF && (last & 0x80)) { + upb_seterr(status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + } else if(buf == maxend) { + upb_seterr(status, UPB_ERROR, + "Varint was unterminated after 10 bytes.\n"); } else { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - upb_decoder_offset(d)); + // Success. + return; } - return NULL; + ungetbytes(d, buf, p - buf); } +} - // Look up field by tag number. - upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, field_number); - - if (!f) { - // Unknown field. If/when the upb_src interface supports reporting - // unknown fields we will implement that here. - upb_decoder_skipval(d); - goto again; - } else if (!upb_check_type(wire_type, f)) { - // This is a recoverable error condition. We skip the value but also - // return NULL and report the error. - upb_decoder_skipval(d); - // TODO: better error message. - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Incorrect wire type.\n"); - return NULL; - } - d->field = f; - d->wire_type = wire_type; - return f; +static const void *get_msgend(upb_decoder *d) +{ + if(d->top->end_offset > 0) + return upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); + else + return (void*)UINTPTR_MAX; // group. } -bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) +static bool isgroup(const void *submsg_end) { - switch(upb_types[d->field->type].native_wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint32_t low, high; - if(!upb_decoder_readv64(d, &low, &high)) return false; - uint64_t u64 = ((uint64_t)high << 32) | low; - if(d->field->type == UPB_TYPE(SINT64)) - *val.int64 = upb_zzdec_64(u64); - else - *val.uint64 = u64; - break; - } - case UPB_WIRE_TYPE_32BIT_VARINT: { - uint32_t u32; - if(!upb_decoder_readv32(d, &u32)) return false; - if(d->field->type == UPB_TYPE(SINT32)) - *val.int32 = upb_zzdec_32(u32); - else - *val.uint32 = u32; - break; - } - case UPB_WIRE_TYPE_64BIT: - if(!upb_decoder_readf64(d, val.uint64)) return false; - break; - case UPB_WIRE_TYPE_32BIT: - if(!upb_decoder_readf32(d, val.uint32)) return false; - break; - default: - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Attempted to call getval on a group."); - return false; - } - // For a packed field where we have not reached the end, we leave the field - // in the decoder so we will return it again without parsing a key. - if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || - upb_decoder_offset(d) >= d->packed_end_offset) { - d->field = NULL; - } - return true; + return submsg_end == (void*)UINTPTR_MAX; } -bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { - // A string, bytes, or a length-delimited submessage. The latter isn't - // technically a string, but can be gotten as one to perform lazy parsing. - const int32_t total_len = d->delimited_len; - if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { - // The entire string is inside our current buffer, so we can just - // return a substring of the buffer without copying. - upb_string_substr(str, d->buf, - upb_string_len(d->buf) - d->buf_bytesleft, - total_len); - upb_decoder_skipbytes(d, total_len); - } else { - // The string spans buffers, so we must copy from the residual buffer - // (if any bytes are there), then the buffer, and finally from the bytesrc. - uint8_t *ptr = (uint8_t*)upb_string_getrwbuf( - str, UPB_MIN(total_len, d->buf_bytesleft)); - int32_t len = 0; - if(d->buf_offset < 0) { - // Residual bytes we need to copy from tmpbuf. - memcpy(ptr, d->tmpbuf, -d->buf_offset); - len += -d->buf_offset; - } - if(d->buf) { - // Bytes from the buffer. - memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset, - upb_string_len(str) - len); - } - upb_decoder_skipbytes(d, upb_string_len(str)); - if(len < total_len) { - // Bytes from the bytesrc. - if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) { - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } - // Have to advance this since the buffering layer of the decoder will - // never see these bytes. - d->buf_stream_offset += total_len - len; - } - } - d->field = NULL; - return true; +extern upb_wire_type_t upb_expected_wire_types[]; +// Returns true if wt is the correct on-the-wire type for ft. +INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { + // This doesn't currently support packed arrays. + return upb_types[ft].expected_wire_type == wt; } -static bool upb_decoder_skipgroup(upb_decoder *d); -bool upb_decoder_startmsg(upb_decoder *d) { - if(++d->top >= d->limit) { - upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED, +// Pushes a new stack frame for a submessage with the given len (which will +// be zero if the submessage is a group). +static const uint8_t *push(upb_decoder *d, const uint8_t *start, + uint32_t submsg_len, upb_fielddef *f, + upb_status *status) +{ + d->top->field = f; + d->top++; + if(d->top >= d->limit) { + upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, "Nesting exceeded maximum (%d levels)\n", UPB_MAX_NESTING); - return false; + return NULL; } upb_decoder_frame *frame = d->top; - if(d->field->type == UPB_TYPE(GROUP)) { - frame->end_offset = UPB_GROUP_END_OFFSET; - } else if (d->field->type == UPB_TYPE(MESSAGE)) { - frame->end_offset = upb_decoder_offset(d) + d->delimited_len; - } else { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Tried to startmsg a non-msg field."); - } - frame->msgdef = upb_downcast_msgdef(d->field->def); - d->field = NULL; - return true; -} - -bool upb_decoder_endmsg(upb_decoder *d) { - if(d->top > d->stack) { - --d->top; - if(!d->src.eof) { - if(d->top->end_offset == UPB_GROUP_END_OFFSET) - upb_decoder_skipgroup(d); - else - upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); - } - // Detect end-of-submessage. - d->src.eof = upb_decoder_offset(d) >= d->top->end_offset; - return true; - } else { - return false; - } -} + frame->end_offset = d->completed_offset + submsg_len; + frame->msgdef = upb_downcast_msgdef(f->def); -bool upb_decoder_skipval(upb_decoder *d) { - upb_strlen_t bytes_to_skip; - d->field = NULL; - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: { - return upb_decoder_skipv64(d); - } - case UPB_WIRE_TYPE_START_GROUP: - if(!upb_decoder_startmsg(d)) return false; - if(!upb_decoder_skipgroup(d)) return false; - if(!upb_decoder_endmsg(d)) return false; - return true; - default: - // Including UPB_WIRE_TYPE_END_GROUP. - assert(false); - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group"); - return false; - case UPB_WIRE_TYPE_64BIT: - bytes_to_skip = 8; - break; - case UPB_WIRE_TYPE_32BIT: - bytes_to_skip = 4; - break; - case UPB_WIRE_TYPE_DELIMITED: - // Works for both string/bytes *and* submessages. - bytes_to_skip = d->delimited_len; - break; - } - return upb_decoder_skipbytes(d, bytes_to_skip); + upb_dispatch_startsubmsg(&d->dispatcher, f); + return get_msgend(d); } -static bool upb_decoder_skipgroup(upb_decoder *d) +// Pops a stack frame, returning a pointer for where the next submsg should +// end (or a pointer that is out of range for a group). +static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) { - // This will be mututally recursive with upb_decoder_skipval() if the group - // has sub-groups. If we wanted to handle EAGAIN in the future, this - // approach would not work; we would need to track the group depth - // explicitly. - while(upb_decoder_getdef(d)) { - if(!upb_decoder_skipval(d)) return false; - } - // If we are at the end of the group like we want to be, then - // upb_decoder_getdef() returned NULL because of eof, not error. - if(!&d->src.eof) return false; - return true; + d->top--; + upb_dispatch_endsubmsg(&d->dispatcher); + return get_msgend(d); } -upb_src_vtable upb_decoder_src_vtbl = { - (upb_src_getdef_fptr)&upb_decoder_getdef, - (upb_src_getval_fptr)&upb_decoder_getval, - (upb_src_getstr_fptr)&upb_decoder_getstr, - (upb_src_skipval_fptr)&upb_decoder_skipval, - (upb_src_startmsg_fptr)&upb_decoder_startmsg, - (upb_src_endmsg_fptr)&upb_decoder_endmsg, -}; +void upb_decoder_run(upb_src *src, upb_status *status) { + // buf is our current offset, moves from start to end. + const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str) + d->buf_offset; + const uint8_t *end = (uint8_t*)upb_string_getrobuf(str) + upb_string_len(str); + const uint8_t *submsg_end = get_msgend(d, start); + upb_msgdef *msgdef = d->top->msgdef; + upb_string *str = NULL; + + // Main loop: executed once per tag/field pair. + while(1) { + // Parse/handle tag. + upb_tag tag; + CHECK(decode_tag(d, &buf, &end, &tag)); + + // Decode wire data. Hopefully this branch will predict pretty well + // since most types will read a varint here. + upb_value val; + switch (tag.wire_type) { + case UPB_WIRE_TYPE_END_GROUP: + if(!isgroup(submsg_end)) { + upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current " + "message is not a group, byte offset: %zd", + d->completed_offset + (completed - start)); + goto err; + } + submsg_end = pop(d, start, status, &msgdef); + completed = buf; + goto check_msgend; + case UPB_WIRE_TYPE_VARINT: + case UPB_WIRE_TYPE_DELIMITED: + // For the delimited case we are parsing the length. + CHECK(upb_decode_varint(d, &buf, &end, &val)); + break; + case UPB_WIRE_TYPE_32BIT: + CHECK(upb_decode_32bit(d, &buf, &end, &val)); + break; + case UPB_WIRE_TYPE_64BIT: + CHECK(upb_decode_64bit(d, &buf, &end, &val)); + break; + } + // Look up field by tag number. + upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); -/* upb_decoder construction/destruction. **************************************/ + if (!f) { + // Unknown field. + } else if (!upb_check_type(tag.wire_type, f->type)) { + // Field has incorrect type. + } -upb_decoder *upb_decoder_new(upb_msgdef *msgdef) -{ - upb_decoder *d = malloc(sizeof(*d)); - d->toplevel_msgdef = msgdef; - d->limit = &d->stack[UPB_MAX_NESTING]; - d->buf = NULL; - upb_src_init(&d->src, &upb_decoder_src_vtbl); - return d; -} + // Perform any further massaging of the data now that we have the fielddef. + // Now we can distinguish strings from submessages, and we know about + // zig-zag-encoded types. + // TODO: handle packed encoding. + switch (f->type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + CHECK(push(d, start, upb_value_getint32(val), f, status, &msgdef)); + goto check_msgend; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + CHECK(upb_decode_string(d, str, upb_value_getint32(val))); + upb_value_setstr(&val, str); + break; + case UPB_TYPE(SINT32): + upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); + break; + case UPB_TYPE(SINT64): + upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); + break; + default: + // Other types need no further processing at this point. + } + CHECK(upb_dispatch_value(d->sink, f, val, status)); + +check_msgend: + while(buf >= submsg_end) { + if(buf > submsg_end) { + upb_seterr(status, UPB_ERROR, "Expected submsg end offset " + "did not lie on a tag/value boundary."); + goto err; + } + submsg_end = pop(d, start, status, &msgdef); + } + completed = buf; + } -void upb_decoder_free(upb_decoder *d) -{ - upb_string_unref(d->buf); - free(d); +err: + read = (char*)completed - (char*)start; + d->completed_offset += read; + return read; } -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) -{ - upb_string_unref(d->buf); +void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { + upb_decoder *d = (upb_decoder*)src; + upb_dispatcher_reset(&d->dispatcher, handlers); d->top = d->stack; + d->completed_offset = 0; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it - // indefinitely), so we set the end offset as high as possible, but not equal - // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. - d->top->end_offset = UINT32_MAX - 1; - d->src.eof = false; - d->bytesrc = bytesrc; - d->field = NULL; - d->buf = NULL; - d->buf_bytesleft = 0; - d->buf_stream_offset = 0; - d->buf_offset = 0; + // indefinitely), so we treat it like a group. + d->top->end_offset = 0; } -upb_src *upb_decoder_src(upb_decoder *d) { - return &d->src; +upb_decoder *upb_decoder_new(upb_msgdef *msgdef) { + static upb_src_vtbl vtbl = { + &upb_decoder_sethandlers, + &upb_decoder_run, + }; + upb_decoder *d = malloc(sizeof(*d)); + upb_src_init(&d->src, &vtbl); + upb_dispatcher_init(&d->dispatcher); + d->toplevel_msgdef = msgdef; + d->limit = &d->stack[UPB_MAX_NESTING]; + return d; +} + +void upb_decoder_free(upb_decoder *d) { + free(d); } -- cgit v1.2.3 From 58a70b55c62cfefcbe7a55a2fd41ee6b87c7256f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 23 Jan 2011 16:29:10 -0800 Subject: Decoder code structure is mostly in-place. --- core/upb_stream.h | 20 ++- core/upb_string.h | 57 ++++++-- stream/upb_decoder.c | 363 ++++++++++++++++++++++----------------------------- 3 files changed, 212 insertions(+), 228 deletions(-) (limited to 'stream') diff --git a/core/upb_stream.h b/core/upb_stream.h index cf01a5f..54fd930 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -171,14 +171,18 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ // Reads up to "count" bytes into "buf", returning the total number of bytes -// read. If <0, indicates error (check upb_bytesrc_status for details). +// read. If 0, indicates error and puts details in "status". INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count); + upb_strlen_t count, upb_status *status); // Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing -// existing string data (which avoids a copy). +// existing string data (which avoids a copy). On the other hand, if +// the data was *not* already in an existing string, this copies it into +// a upb_string, and if the data needs to be put in a specific range of +// memory (because eg. you need to put it into a different kind of string +// object) then upb_bytesrc_get() could be better. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_strlen_t count); + upb_status *status); // A convenience function for getting all the remaining data in a upb_bytesrc // as a upb_string. Returns false and sets "status" if the operation fails. @@ -189,14 +193,6 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); } -// Returns the current error status for the stream. -// Note! The "eof" flag works like feof() in C; it cannot report end-of-file -// until a read has failed due to eof. It cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); -INLINE bool upb_bytesrc_eof(upb_bytesrc *src); - /* upb_bytesink ***************************************************************/ diff --git a/core/upb_string.h b/core/upb_string.h index 1f4b20c..04c0ae9 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -3,26 +3,39 @@ * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. * - * This file defines a simple string type. The overriding goal of upb_string - * is to avoid memcpy(), malloc(), and free() wheverever possible, while - * keeping both CPU and memory overhead low. Throughout upb there are - * situations where one wants to reference all or part of another string - * without copying. upb_string provides APIs for doing this. + * This file defines a simple string type which is length-delimited instead + * of NULL-terminated, and which has useful sharing semantics. + * + * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() + * wheverever possible, while keeping both CPU and memory overhead low. + * Throughout upb there are situations where one wants to reference all or part + * of another string without copying. upb_string provides APIs for doing this. * * Characteristics of upb_string: * - strings are reference-counted. - * - strings are logically immutable. + * - strings are immutable (can be mutated only when first created or recycled). * - if a string has no other referents, it can be "recycled" into a new string * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings are not thread-safe by default, but can be made so by calling a - * function. This is not the default because it causes extra CPU overhead. * * Reference-counted strings have recently fallen out of favor because of the * performance impacts of doing thread-safe reference counting with atomic * operations. We side-step this issue by not performing atomic operations * unless the string has been marked thread-safe. + * + * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched + * idiom that we go with it instead of making our pointers uint8_t*. + * + * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE + * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if + * you are logically passing a reference to a upb_string to another thread + * (which implies that the other thread must eventually call unref of recycle), + * you have two options: + * + * - create a copy of the string that will be used in the other thread only. + * - call upb_string_get_synchronized_ref(), which will make getref, unref, and + * recycle thread-safe for this upb_string. */ #ifndef UPB_STRING_H @@ -83,10 +96,12 @@ struct _upb_string { // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); +// Internal-only; clients should call upb_string_unref(). void _upb_string_free(upb_string *str); // Releases a ref on the given string, which may free the memory. "str" -// can be NULL, in which case this is a no-op. +// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE +// UNLESS THE STRING IS SYNCHRONIZED. INLINE void upb_string_unref(upb_string *str) { if (str && upb_atomic_read(&str->refcount) > 0 && upb_atomic_unref(&str->refcount)) { @@ -98,6 +113,7 @@ upb_string *upb_strdup(upb_string *s); // Forward-declare. // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. +// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED! INLINE upb_string *upb_string_getref(upb_string *str) { int refcount = upb_atomic_read(&str->refcount); if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); @@ -163,8 +179,11 @@ void upb_string_substr(upb_string *str, upb_string *target_str, // data. Waiting for a clear use case before actually implementing it. // // Makes the string "str" a reference to the given string data. The caller -// guarantees that the given string data will not change or be deleted until -// a matching call to upb_string_detach(). +// guarantees that the given string data will not change or be deleted until a +// matching call to upb_string_detach(), which may block until any concurrent +// readers have finished reading. upb_string_detach() preserves the contents +// of the string by copying the referenced data if there are any other +// referents. // void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); // void upb_string_detach(upb_string *str); @@ -207,6 +226,22 @@ void upb_string_substr(upb_string *str, upb_string *target_str, _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) #define UPB_STACK_STRING_LEN(str, len) \ _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) + +// A convenient way of specifying upb_strings as literals, like: +// +// upb_streql(UPB_STRLIT("expected"), other_str); +// +// However, this requires either C99 compound initializers or C++. +// Must ONLY be called with a string literal as its argument! +//#ifdef __cplusplus +//namespace upb { +//class String : public upb_string { +// // This constructor must ONLY be called with a string literal. +// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {} +//}; +//} +//#define UPB_STRLIT(str) upb::String(str) +//#endif #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) /* upb_string library functions ***********************************************/ diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index b820b08..fbd7eba 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -11,127 +11,39 @@ #include #include "upb_def.h" -/* Functions to read wire values. *********************************************/ - -// These functions are internal to the decode, but might be moved into an -// internal header file if we at some point in the future opt to do code -// generation, because the generated code would want to inline these functions. -// The same applies to the functions to read .proto values below. - -const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status); - -// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). -INLINE const uint8_t *upb_get_v_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - // We inline this common case (1-byte varints), if that fails we dispatch to - // the full (non-inlined) version. - if((*buf & 0x80) == 0) { - *val = *buf & 0x7f; - return buf + 1; - } else { - return upb_get_v_uint64_t_full(buf, end, val, status); - } +/* Pure Decoding **************************************************************/ + +// The key fast-path varint-decoding routine. There are a lot of possibilities +// for optimization/experimentation here. +INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, + upb_status *status) { + *high = 0; + uint32_t b; + uint8_t *ptr = p->ptr; + b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + upb_seterr(status, UPB_ERROR, "Unterminated varint"); + return false; +done: + return true; } -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - uint64_t val64; - const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); - *val = (uint32_t)val64; // Discard the high bits. - return ret; -} -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint32_t)); - return uint32_end; -} - -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint64_t)); - return uint64_end; -} - -INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *const maxend = buf + 10; - uint8_t last = 0x80; - for(; buf < (uint8_t*)end && (last & 0x80); buf++) - last = *buf; - - if(buf >= end && buf <= maxend && (last & 0x80)) { - status->code = UPB_STATUS_NEED_MORE_DATA; - buf = end; - } else if(buf > maxend) { - status->code = UPB_ERROR_UNTERMINATED_VARINT; - buf = end; - } - return buf; -} - -INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint32_end; -} - -INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint64_end; -} - -/* Functions to read .proto values. *******************************************/ +/* Decoding/Buffering of individual values ************************************/ // Performs zig-zag decoding, which is used by sint32 and sint64. INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -// Parses a tag, places the result in *tag. -INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, - upb_tag *tag, upb_status *status) -{ - uint32_t tag_int; - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - // The decoder keeps a stack with one entry per level of recursion. // upb_decoder_frame is one frame of that stack. typedef struct { @@ -144,6 +56,7 @@ struct upb_decoder { // Immutable state of the decoder. upb_src src; upb_dispatcher dispatcher; + upb_bytesrc *bytesrc; upb_msgdef *toplevel_msgdef; upb_decoder_frame stack[UPB_MAX_NESTING]; @@ -158,66 +71,108 @@ struct upb_decoder { // Current input buffer. upb_string *buf; + // Our current offset *within* buf. + upb_strlen_t buf_offset; + // The offset within the overall stream represented by the *beginning* of buf. upb_strlen_t buf_stream_offset; +}; - // Our current offset *within* buf. Will be negative if we are buffering - // from previous buffers in tmpbuf. - upb_strlen_t buf_offset; +// Called only from the slow path, this function copies the next "len" bytes +// from the stream to "data", adjusting "buf" and "end" appropriately. +INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, + uint8_t **buf, uint8_t **end) { + while (len > 0) { + memcpy(data, *buf, *end-*buf); + len -= (*end-*buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + *buf = upb_string_getrobuf(d->buf); + *end = *buf + upb_string_len(d->buf); + } +} - // Holds any bytes we have from previous buffers. The number of bytes we - // have encoded here is -buf_offset, if buf_offset<0, 0 otherwise. - uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; -}; +// We use this path when we don't have UPB_MAX_ENCODED_SIZE contiguous bytes +// available in our current buffer. We don't inline this because we accept +// that it will be slow and we don't want to pay for two copies of it. +static bool upb_decode_varint_slow(upb_decoder *d) { + uint8_t buf[UPB_MAX_ENCODED_SIZE]; + uint8_t *p = buf, *end = buf + sizeof(buf); + for(int bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) + *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; + + if(d->status->code == UPB_EOF && (last & 0x80)) { + upb_seterr(status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + } else if(buf == maxend) { + upb_seterr(status, UPB_ERROR, + "Varint was unterminated after 10 bytes.\n"); + } else { + // Success. + return; + } +} -upb_flow_t upb_decode_varint(upb_decoder *d, ptrs *p, - uint32_t *low, uint32_t *high) { - if (p->end - p->ptr > UPB_MAX_ENCODED_SIZE) { - // Fast path; we know we have a complete varint in our existing buffer. - *high = 0; - uint32_t b; - uint8_t *ptr = p->ptr; - b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; - - if(bytes_available >= 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " - "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); - return false; - } +INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, + const uint8_t **end, upb_tag *tag) { + const uint8_t *buf = *_buf, *end = *_end; + uint32_t tag_int; + // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). + if (end - buf < 2) goto slow; // unlikely. + tag_int = *buf & 0x7f; + if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*buf & 0x7f) << 7; + if ((*(buf++) & 0x80) != 0) goto slow; // unlikely. +slow: + if (!upb_decode_varint_slow(d, _buf, _end)) return false; + buf = *_buf; // Trick the next line into not overwriting us. +done: + *_buf = buf; + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return true; +} + +INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, + uint32_t *low, uint32_t *high) { + if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) + return upb_decode_varint_fast(d); + else + return upb_decode_varint_slow(d); +} - done: - p->ptr = ptr; +INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, + uint8_t **buf, uint8_t **end, upb_value *val) { + static const char table = {0, 8, 0, 0, 0, 4}; + size_t bytes = table[wt]; + if (*end - *buf >= bytes) { + // Common (fast) case. + memcpy(&val, *buf, bytes); + *buf += bytes; } else { - // Slow path: we may have to combine one or more buffers to get a whole - // varint worth of data. - uint8_t buf[UPB_MAX_ENCODED_SIZE]; - uint8_t *p = buf, *end = buf + sizeof(buf); - for(ing bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) - *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; - - if(d->status->code == UPB_EOF && (last & 0x80)) { - upb_seterr(status, UPB_ERROR, - "Provided data ended in the middle of a varint.\n"); - } else if(buf == maxend) { - upb_seterr(status, UPB_ERROR, - "Varint was unterminated after 10 bytes.\n"); - } else { - // Success. - return; - } - ungetbytes(d, buf, p - buf); + if (!upb_getbuf(d, &val, bytes, buf, end)) return false; + } + return true; +} + +// "val" initially holds the length of the string, this is replaced by the +// contents of the string. +INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { + upb_string_recycle(str); + upb_strlen_t len = upb_valu_getint32(*val); + if (*end - *buf >= len) { + // Common (fast) case. + upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); + *buf += len; + } else { + if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) + return false; } + return true; } + +/* The main decoding loop *****************************************************/ + static const void *get_msgend(upb_decoder *d) { if(d->top->end_offset > 0) @@ -238,36 +193,29 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_types[ft].expected_wire_type == wt; } - -// Pushes a new stack frame for a submessage with the given len (which will -// be zero if the submessage is a group). -static const uint8_t *push(upb_decoder *d, const uint8_t *start, +static bool upb_push(upb_decoder *d, const uint8_t *start, uint32_t submsg_len, upb_fielddef *f, upb_status *status) { d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, - "Nesting exceeded maximum (%d levels)\n", - UPB_MAX_NESTING); - return NULL; + upb_seterr(status, UPB_ERROR, "Nesting too deep."); + return false; } - upb_decoder_frame *frame = d->top; - frame->end_offset = d->completed_offset + submsg_len; - frame->msgdef = upb_downcast_msgdef(f->def); - - upb_dispatch_startsubmsg(&d->dispatcher, f); - return get_msgend(d); + d->top->end_offset = d->completed_offset + submsg_len; + d->top->msgdef = upb_downcast_msgdef(f->def); + *submsg_end = get_msgend(d); + if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; + return true; } -// Pops a stack frame, returning a pointer for where the next submsg should -// end (or a pointer that is out of range for a group). -static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) +static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) { d->top--; upb_dispatch_endsubmsg(&d->dispatcher); - return get_msgend(d); + *submsg_end = get_msgend(d); + return true; } void upb_decoder_run(upb_src *src, upb_status *status) { @@ -278,11 +226,13 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_msgdef *msgdef = d->top->msgdef; upb_string *str = NULL; + upb_dispatch_startmsg(&d->dispatcher); + // Main loop: executed once per tag/field pair. while(1) { // Parse/handle tag. upb_tag tag; - CHECK(decode_tag(d, &buf, &end, &tag)); + CHECK(upb_decode_tag(d, &buf, &end, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. @@ -290,24 +240,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: if(!isgroup(submsg_end)) { - upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - d->completed_offset + (completed - start)); + upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - submsg_end = pop(d, start, status, &msgdef); - completed = buf; - goto check_msgend; + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); + goto check_msgend; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. CHECK(upb_decode_varint(d, &buf, &end, &val)); break; case UPB_WIRE_TYPE_32BIT: - CHECK(upb_decode_32bit(d, &buf, &end, &val)); - break; case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_64bit(d, &buf, &end, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); break; } @@ -315,24 +260,31 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); if (!f) { - // Unknown field. + if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) + CHECK(upb_decode_string(d, &val, &str)); + CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { - // Field has incorrect type. + // TODO: put more details in this error msg. + upb_seterr(status, UPB_ERROR, "Field had incorrect type."); + goto err; } // Perform any further massaging of the data now that we have the fielddef. // Now we can distinguish strings from submessages, and we know about // zig-zag-encoded types. // TODO: handle packed encoding. + // TODO: if we were being paranoid, we could check for 32-bit-varint types + // that the top 32 bits all match the highest bit of the low 32 bits. + // If this is not true we are losing data. But the main protobuf library + // doesn't check this, and it would slow us down, so pass for now. switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK(push(d, start, upb_value_getint32(val), f, status, &msgdef)); - goto check_msgend; + CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + goto check_msgend; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - CHECK(upb_decode_string(d, str, upb_value_getint32(val))); - upb_value_setstr(&val, str); + CHECK(upb_decode_string(d, &val, &str)); break; case UPB_TYPE(SINT32): upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); @@ -341,26 +293,27 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); break; default: - // Other types need no further processing at this point. + break; // Other types need no further processing at this point. } CHECK(upb_dispatch_value(d->sink, f, val, status)); check_msgend: while(buf >= submsg_end) { if(buf > submsg_end) { - upb_seterr(status, UPB_ERROR, "Expected submsg end offset " - "did not lie on a tag/value boundary."); + upb_seterr(status, UPB_ERROR, "Bad submessage end.") goto err; } - submsg_end = pop(d, start, status, &msgdef); + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); } - completed = buf; } + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + return; + err: - read = (char*)completed - (char*)start; - d->completed_offset += read; - return read; + if (upb_ok(status)) { + upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); + } } void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { -- cgit v1.2.3 From fe659c8c93c464fcbcfb5739935a2e4341d01fd4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 23 Jan 2011 18:59:31 -0800 Subject: Getting closer to a decoder that could actually compile and work. --- core/upb_stream.h | 7 +- core/upb_string.h | 6 ++ stream/upb_decoder.c | 207 +++++++++++++++++++++++++++------------------------ 3 files changed, 119 insertions(+), 101 deletions(-) (limited to 'stream') diff --git a/core/upb_stream.h b/core/upb_stream.h index 54fd930..bf312a8 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -40,8 +40,11 @@ typedef enum { UPB_CONTINUE, // Stop processing for now; check status for details. If no status was set, - // a generic error will be returned. If the error is resumable, processing - // will resume by delivering this callback again. + // a generic error will be returned. If the error is resumable, it is not + // (yet) defined where processing will resume -- waiting for real-world + // examples of resumable decoders and resume-requiring clients. upb_src + // implementations that are not capable of resuming will override the return + // status to be non-resumable if a resumable status was set by the handlers. UPB_BREAK, // Skips to the end of the current submessage (or if we are at the top diff --git a/core/upb_string.h b/core/upb_string.h index 04c0ae9..1a7e06b 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -134,6 +134,12 @@ INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str) { (void)str; } +// Convenience method for getting the end of the string. Calls +// upb_string_getrobuf() so inherits the caveats of calling that function. +INLINE const char *upb_string_getbufend(upb_string *str) { + return upb_string_getrobuf(str) + upb_string_len(str); +} + // Attempts to recycle the string "str" so it may be reused and have different // data written to it. After the function returns, "str" points to a writable // string, which is either the original string if it had no other references diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index fbd7eba..9a17451 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -13,23 +13,24 @@ /* Pure Decoding **************************************************************/ -// The key fast-path varint-decoding routine. There are a lot of possibilities -// for optimization/experimentation here. -INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, +// The key fast-path varint-decoding routine. Here we can assume we have at +// least UPB_MAX_ENCODED_SIZE bytes available. There are a lot of +// possibilities for optimization/experimentation here. +INLINE bool upb_decode_varint_fast(uint8_t **ptr, uint64_t &val, upb_status *status) { *high = 0; uint32_t b; uint8_t *ptr = p->ptr; - b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 28; + b = *(*ptr++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 28; *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; @@ -71,23 +72,51 @@ struct upb_decoder { // Current input buffer. upb_string *buf; - // Our current offset *within* buf. - upb_strlen_t buf_offset; - // The offset within the overall stream represented by the *beginning* of buf. upb_strlen_t buf_stream_offset; }; // Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting "buf" and "end" appropriately. -INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, - uint8_t **buf, uint8_t **end) { - while (len > 0) { - memcpy(data, *buf, *end-*buf); - len -= (*end-*buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - *buf = upb_string_getrobuf(d->buf); - *end = *buf + upb_string_len(d->buf); +// from the stream to "data", adjusting "buf" and "len" appropriately. +static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, + uint8_t **ptr, size_t *len) { + while (1) { + memcpy(data, *ptr, *len); + bytes_wanted -= *len; + *ptr += *len; + if (bytes_wanted == 0) return true; + + // Did "len" indicate end-of-submessage or end-of-buffer? + size_t buf_offset = d->buf ? (*ptr - upb_string_getrobuf(d->buf)) : 0; + if (d->top->end_offset > 0 && + d->top->end_offset == d->buf_stream_offset + buf_offset) { + // End-of-submessage. + if (bytes_wanted > 0) { + upb_seterr(d->status, UPB_ERROR, "Bad submessage end.") + return false; + } + if (upb_pop(d) != UPB_CONTINUE) return false; + } else { + // End-of-buffer. + if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + *ptr = upb_string_getrobuf(d->buf); + } + + // Wait for end-of-submessage or end-of-buffer, whichever comes first. + size_t offset_in_buf = *ptr - upb_string_getrobuf(d->buf); + size_t buf_remaining = upb_string_getbufend(d->buf) - *ptr; + size_t submsg_remaining = + d->top->end_offset - d->buf_stream_offset - offset_in_buf; + if (d->top->end_offset == UPB_GROUP_END_OFFSET || + buf_remaining > submsg_remaining) { + *len = buf_remaining; + } else { + // Check that non of our subtraction overflowed. + assert(d->top->end_offset > d->buf_stream_offset); + assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); + *len = submsg_remaining; + } } } @@ -112,21 +141,21 @@ static bool upb_decode_varint_slow(upb_decoder *d) { } } -INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, - const uint8_t **end, upb_tag *tag) { - const uint8_t *buf = *_buf, *end = *_end; +INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_ptr, + const uint8_t **len, upb_tag *tag) { + const uint8_t *ptr = *_ptr, *len = *_end; uint32_t tag_int; // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). - if (end - buf < 2) goto slow; // unlikely. - tag_int = *buf & 0x7f; - if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order - tag_int |= (*buf & 0x7f) << 7; - if ((*(buf++) & 0x80) != 0) goto slow; // unlikely. + if (len - ptr < 2) goto slow; // unlikely. + tag_int = *ptr & 0x7f; + if ((*(ptr++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*ptr & 0x7f) << 7; + if ((*(ptr++) & 0x80) != 0) goto slow; // unlikely. slow: - if (!upb_decode_varint_slow(d, _buf, _end)) return false; - buf = *_buf; // Trick the next line into not overwriting us. + if (!upb_decode_varint_slow(d, _ptr, _end)) return false; + ptr = *_ptr; // Trick the next line into not overwriting us. done: - *_buf = buf; + *_ptr = ptr; tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); tag->field_number = tag_int >> 3; return true; @@ -134,22 +163,22 @@ done: INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, uint32_t *low, uint32_t *high) { - if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) + if (p->len - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) return upb_decode_varint_fast(d); else return upb_decode_varint_slow(d); } INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, - uint8_t **buf, uint8_t **end, upb_value *val) { + uint8_t **ptr, uint8_t **len, upb_value *val) { static const char table = {0, 8, 0, 0, 0, 4}; size_t bytes = table[wt]; - if (*end - *buf >= bytes) { + if (*len - *ptr >= bytes) { // Common (fast) case. - memcpy(&val, *buf, bytes); - *buf += bytes; + memcpy(&val, *ptr, bytes); + *ptr += bytes; } else { - if (!upb_getbuf(d, &val, bytes, buf, end)) return false; + if (!upb_getptr(d, &val, bytes, ptr, len)) return false; } return true; } @@ -159,12 +188,12 @@ INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { upb_string_recycle(str); upb_strlen_t len = upb_valu_getint32(*val); - if (*end - *buf >= len) { + if (*len - *ptr >= len) { // Common (fast) case. - upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); - *buf += len; + upb_string_substr(*str, d->buf, *ptr - upb_string_getrobuf(d->buf), len); + *ptr += len; } else { - if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) + if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, ptr, len)) return false; } return true; @@ -173,19 +202,6 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) /* The main decoding loop *****************************************************/ -static const void *get_msgend(upb_decoder *d) -{ - if(d->top->end_offset > 0) - return upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); - else - return (void*)UINTPTR_MAX; // group. -} - -static bool isgroup(const void *submsg_end) -{ - return submsg_end == (void*)UINTPTR_MAX; -} - extern upb_wire_type_t upb_expected_wire_types[]; // Returns true if wt is the correct on-the-wire type for ft. INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { @@ -193,76 +209,78 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_types[ft].expected_wire_type == wt; } -static bool upb_push(upb_decoder *d, const uint8_t *start, - uint32_t submsg_len, upb_fielddef *f, - upb_status *status) -{ +static upb_flow_t upb_push(upb_decoder *d, upb_fielddef *f, + upb_strlen_t submsg_len, upb_field_type_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { upb_seterr(status, UPB_ERROR, "Nesting too deep."); - return false; + return UPB_ERROR; } - d->top->end_offset = d->completed_offset + submsg_len; + d->top->end_offset = type == UPB_TYPE(GROUP) ? + UPB_GROUP_END_OFFSET : d->completed_offset + submsg_len; d->top->msgdef = upb_downcast_msgdef(f->def); - *submsg_end = get_msgend(d); - if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; - return true; + return upb_dispatch_startsubmsg(&d->dispatcher, f); } -static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) -{ +static upb_flow_t upb_pop(upb_decoder *d) { d->top--; - upb_dispatch_endsubmsg(&d->dispatcher); - *submsg_end = get_msgend(d); - return true; + return upb_dispatch_endsubmsg(&d->dispatcher); } void upb_decoder_run(upb_src *src, upb_status *status) { - // buf is our current offset, moves from start to end. - const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str) + d->buf_offset; - const uint8_t *end = (uint8_t*)upb_string_getrobuf(str) + upb_string_len(str); - const uint8_t *submsg_end = get_msgend(d, start); - upb_msgdef *msgdef = d->top->msgdef; + // We use stack variables for our frequently used vars so the compiler knows + // they can't be changed by external code (like when we dispatch a callback). + + // Our current position in the data buffer. + uint8_t *ptr = NULL; + // Number of bytes available at ptr, until either end-of-buf or + // end-of-submessage (whichever is smaller). + size_t len = 0; + upb_string *str = NULL; - upb_dispatch_startmsg(&d->dispatcher); +// TODO: handle UPB_SKIPSUBMSG +#define CHECK_FLOW(expr) if ((expr) != UPB_CONTINUE) goto err +#define CHECK(expr) if (!expr) goto err; + + CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher)); // Main loop: executed once per tag/field pair. while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &buf, &end, &tag)); + CHECK(upb_decode_tag(d, &ptr, &len, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. upb_value val; switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: - if(!isgroup(submsg_end)) { + if(d->top->end_offset != UPB_GROUP_END_OFFSET) upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); - goto check_msgend; // We have no value to dispatch. + CHECK_FLOW(upb_pop(d)); + continue; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. - CHECK(upb_decode_varint(d, &buf, &end, &val)); + CHECK(upb_decode_varint(d, &ptr, &len, &val)); break; case UPB_WIRE_TYPE_32BIT: case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &ptr, &len, &val)); break; } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); + upb_fielddef *f = upb_msg_itof(d->top->msgdef, tag.field_number); if (!f) { if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) CHECK(upb_decode_string(d, &val, &str)); - CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); + CHECK_FLOW(upb_dispatch_unknownval(d, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { // TODO: put more details in this error msg. upb_seterr(status, UPB_ERROR, "Field had incorrect type."); @@ -280,8 +298,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); - goto check_msgend; // We have no value to dispatch. + CHECK_FLOW(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): CHECK(upb_decode_string(d, &val, &str)); @@ -295,19 +313,10 @@ void upb_decoder_run(upb_src *src, upb_status *status) { default: break; // Other types need no further processing at this point. } - CHECK(upb_dispatch_value(d->sink, f, val, status)); - -check_msgend: - while(buf >= submsg_end) { - if(buf > submsg_end) { - upb_seterr(status, UPB_ERROR, "Bad submessage end.") - goto err; - } - CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); - } + CHECK_FLOW(upb_dispatch_value(d->sink, f, val, status)); } - CHECK(upb_dispatch_endmsg(&d->dispatcher)); + CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); return; err: -- cgit v1.2.3 From 93381f1411def0dba5677b71cd4df859d99777f3 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 24 Jan 2011 21:15:44 -0800 Subject: Decoder compiles again! But probably doesn't work. --- Makefile | 6 +- core/upb.c | 2 +- core/upb.h | 2 +- core/upb_def.c | 2 +- core/upb_stream_vtbl.h | 26 +++--- stream/upb_decoder.c | 241 +++++++++++++++++++++++++++++-------------------- 6 files changed, 160 insertions(+), 119 deletions(-) (limited to 'stream') diff --git a/Makefile b/Makefile index 04779c0..46cb836 100644 --- a/Makefile +++ b/Makefile @@ -62,8 +62,8 @@ SRC=core/upb.c \ core/upb_string.c \ descriptor/descriptor.c \ core/upb_def.c \ + stream/upb_decoder.c \ # core/upb_msg.c \ -# stream/upb_decoder.c \ # stream/upb_stdio.c \ # stream/upb_strstream.c \ # stream/upb_textprinter.c @@ -74,9 +74,9 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< core/upb_def.lo: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC + $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC lang_ext/lua/upb.so: lang_ext/lua/upb.lo $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a diff --git a/core/upb.c b/core/upb.c index da2a0f0..ff2d47e 100644 --- a/core/upb.c +++ b/core/upb.c @@ -18,7 +18,7 @@ (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \ #ctype}, -upb_type_info upb_types[] = { +const upb_type_info upb_types[] = { {0, 0, 0, 0, ""}, // There is no type 0. TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1) // DOUBLE TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1) // FLOAT diff --git a/core/upb.h b/core/upb.h index d394a08..7b228a0 100644 --- a/core/upb.h +++ b/core/upb.h @@ -101,7 +101,7 @@ typedef struct { } upb_type_info; // A static array of info about all of the field types, indexed by type number. -extern upb_type_info upb_types[]; +extern const upb_type_info upb_types[]; // The number of a field, eg. "optional string foo = 3". typedef int32_t upb_field_number_t; diff --git a/core/upb_def.c b/core/upb_def.c index c21843e..2eda89f 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -717,7 +717,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { size_t max_align = 0; for (int i = 0; i < n; i++) { upb_fielddef *f = sorted_fields[i]; - upb_type_info *type_info = &upb_types[f->type]; + const upb_type_info *type_info = &upb_types[f->type]; // This identifies the set bit. When we implement is_initialized (a // general check about whether all required bits are set) we will probably diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index fd71b2d..ddefba9 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -27,9 +27,9 @@ typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); // upb_bytesrc. typedef upb_strlen_t (*upb_bytesrc_read_fptr)( - upb_bytesrc *src, void *buf, upb_strlen_t count); + upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); typedef bool (*upb_bytesrc_getstr_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t count); + upb_bytesrc *src, upb_string *str, upb_status *status); // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( @@ -102,35 +102,31 @@ INLINE void upb_src_run(upb_src *src, upb_status *status) { // upb_bytesrc INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count) { - return src->vtbl->read(src, buf, count); + upb_strlen_t count, upb_status *status) { + return src->vtbl->read(src, buf, count, status); } INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_strlen_t count) { - return src->vtbl->getstr(src, str, count); + upb_status *status) { + return src->vtbl->getstr(src, str, status); } INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, upb_status *status) { // We start with a getstr, because that could possibly alias data instead of // copying. - if (!upb_bytesrc_getstr(src, str, UPB_STRLEN_MAX)) goto error; + if (!upb_bytesrc_getstr(src, str, status)) return false; // Trade-off between number of read calls and amount of overallocation. const size_t bufsize = 4096; - while (!upb_bytesrc_eof(src)) { + do { upb_strlen_t len = upb_string_len(str); char *buf = upb_string_getrwbuf(str, len + bufsize); - upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize); - if (read < 0) goto error; + upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize, status); + if (read < 0) return false; // Resize to proper size. upb_string_getrwbuf(str, len + read); - } + } while (!status->code != UPB_EOF); return true; - -error: - upb_copyerr(status, upb_bytesrc_status(src)); - return false; } INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 9a17451..b4b32ff 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -14,27 +14,27 @@ /* Pure Decoding **************************************************************/ // The key fast-path varint-decoding routine. Here we can assume we have at -// least UPB_MAX_ENCODED_SIZE bytes available. There are a lot of +// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of // possibilities for optimization/experimentation here. -INLINE bool upb_decode_varint_fast(uint8_t **ptr, uint64_t &val, +INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *status) { - *high = 0; + uint32_t low, high = 0; uint32_t b; - uint8_t *ptr = p->ptr; - b = *(*ptr++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(*ptr++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 28; + high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; done: + *val = ((uint64_t)high << 32) | low; return true; } @@ -50,7 +50,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - size_t end_offset; // For groups, 0. + ssize_t end_offset; // For groups, 0. } upb_decoder_frame; struct upb_decoder { @@ -76,23 +76,50 @@ struct upb_decoder { upb_strlen_t buf_stream_offset; }; +typedef struct { + // Our current position in the data buffer. + const char *ptr; + + // Number of bytes available at ptr, until either end-of-buf or + // end-of-submessage (whichever is smaller). + size_t len; + + // Msgdef for the current level. + upb_msgdef *msgdef; +} upb_dstate; + +INLINE void upb_dstate_advance(upb_dstate *s, size_t len) { + s->ptr += len; + s->len -= len; +} + +static upb_flow_t upb_pop(upb_decoder *d); + +// Constant used to signal that the submessage is a group and therefore we +// don't know its end offset. This cannot be the offset of a real submessage +// end because it takes at least one byte to begin a submessage. +#define UPB_GROUP_END_OFFSET -1 +#define UPB_MAX_VARINT_ENCODED_SIZE 10 + // Called only from the slow path, this function copies the next "len" bytes // from the stream to "data", adjusting "buf" and "len" appropriately. static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, - uint8_t **ptr, size_t *len) { + upb_dstate *s) { while (1) { - memcpy(data, *ptr, *len); - bytes_wanted -= *len; - *ptr += *len; + size_t to_copy = UPB_MIN(bytes_wanted, s->len); + memcpy(data, s->ptr, to_copy); + upb_dstate_advance(s, to_copy); + bytes_wanted -= to_copy; if (bytes_wanted == 0) return true; // Did "len" indicate end-of-submessage or end-of-buffer? - size_t buf_offset = d->buf ? (*ptr - upb_string_getrobuf(d->buf)) : 0; + ssize_t buf_offset = + d->buf ? ((const char*)s->ptr - upb_string_getrobuf(d->buf)) : 0; if (d->top->end_offset > 0 && d->top->end_offset == d->buf_stream_offset + buf_offset) { // End-of-submessage. if (bytes_wanted > 0) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end.") + upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); return false; } if (upb_pop(d) != UPB_CONTINUE) return false; @@ -100,100 +127,121 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, // End-of-buffer. if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - *ptr = upb_string_getrobuf(d->buf); + s->ptr = upb_string_getrobuf(d->buf); } // Wait for end-of-submessage or end-of-buffer, whichever comes first. - size_t offset_in_buf = *ptr - upb_string_getrobuf(d->buf); - size_t buf_remaining = upb_string_getbufend(d->buf) - *ptr; - size_t submsg_remaining = + ssize_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); + ssize_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; + ssize_t submsg_remaining = d->top->end_offset - d->buf_stream_offset - offset_in_buf; if (d->top->end_offset == UPB_GROUP_END_OFFSET || buf_remaining > submsg_remaining) { - *len = buf_remaining; + s->len = buf_remaining; } else { // Check that non of our subtraction overflowed. assert(d->top->end_offset > d->buf_stream_offset); assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); - *len = submsg_remaining; + s->len = submsg_remaining; } } } -// We use this path when we don't have UPB_MAX_ENCODED_SIZE contiguous bytes -// available in our current buffer. We don't inline this because we accept -// that it will be slow and we don't want to pay for two copies of it. -static bool upb_decode_varint_slow(upb_decoder *d) { - uint8_t buf[UPB_MAX_ENCODED_SIZE]; - uint8_t *p = buf, *end = buf + sizeof(buf); - for(int bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) - *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; - - if(d->status->code == UPB_EOF && (last & 0x80)) { - upb_seterr(status, UPB_ERROR, - "Provided data ended in the middle of a varint.\n"); - } else if(buf == maxend) { - upb_seterr(status, UPB_ERROR, +// We use this path when we don't have UPB_MAX_VARINT_ENCODED_SIZE contiguous +// bytes available in our current buffer. We don't inline this because we +// accept that it will be slow and we don't want to pay for two copies of it. +static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s, + upb_value *val) { + char byte = 0x80; + uint64_t val64 = 0; + int bitpos; + for(bitpos = 0; + bitpos < 70 && (byte & 0x80) && upb_getbuf(d, &byte, 1, s); + bitpos += 7) + val64 |= ((uint64_t)byte & 0x7F) << bitpos; + + if(bitpos == 70) { + upb_seterr(d->status, UPB_ERROR, "Varint was unterminated after 10 bytes.\n"); + return false; + } else if (d->status->code == UPB_EOF && (byte & 0x80)) { + upb_seterr(d->status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + return false; } else { // Success. - return; + upb_value_setint64(val, val64); + return true; } } -INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_ptr, - const uint8_t **len, upb_tag *tag) { - const uint8_t *ptr = *_ptr, *len = *_end; +typedef struct { + upb_wire_type_t wire_type; + upb_field_number_t field_number; +} upb_tag; + +INLINE bool upb_decode_tag(upb_decoder *d, upb_dstate *s, upb_tag *tag) { + const char *p = s->ptr; uint32_t tag_int; + upb_value val; // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). - if (len - ptr < 2) goto slow; // unlikely. - tag_int = *ptr & 0x7f; - if ((*(ptr++) & 0x80) == 0) goto done; // predictable if fields are in order - tag_int |= (*ptr & 0x7f) << 7; - if ((*(ptr++) & 0x80) != 0) goto slow; // unlikely. + if (s->len < 2) goto slow; // unlikely. + tag_int = *p & 0x7f; + if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*p & 0x7f) << 7; + if ((*(p++) & 0x80) == 0) goto done; // likely slow: - if (!upb_decode_varint_slow(d, _ptr, _end)) return false; - ptr = *_ptr; // Trick the next line into not overwriting us. + // Decode a full varint starting over from ptr. + if (!upb_decode_varint_slow(d, s, &val)) return false; + tag_int = upb_value_getint64(val); + p = s->ptr; // Trick the next line into not overwriting us. done: - *_ptr = ptr; + upb_dstate_advance(s, p - s->ptr); tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); tag->field_number = tag_int >> 3; return true; } -INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, - uint32_t *low, uint32_t *high) { - if (p->len - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) - return upb_decode_varint_fast(d); - else - return upb_decode_varint_slow(d); +INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) { + if (s->len >= UPB_MAX_VARINT_ENCODED_SIZE) { + // Common (fast) case. + uint64_t val64; + const char *p = s->ptr; + if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; + upb_dstate_advance(s, p - s->ptr); + upb_value_setint64(val, val64); + return true; + } else { + return upb_decode_varint_slow(d, s, val); + } } INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, - uint8_t **ptr, uint8_t **len, upb_value *val) { - static const char table = {0, 8, 0, 0, 0, 4}; + upb_dstate *s, upb_value *val) { + static const char table[] = {0, 8, 0, 0, 0, 4}; size_t bytes = table[wt]; - if (*len - *ptr >= bytes) { + if (s->len >= bytes) { // Common (fast) case. - memcpy(&val, *ptr, bytes); - *ptr += bytes; + memcpy(&val, s->ptr, bytes); + upb_dstate_advance(s, bytes); } else { - if (!upb_getptr(d, &val, bytes, ptr, len)) return false; + if (!upb_getbuf(d, &val, bytes, s)) return false; } return true; } // "val" initially holds the length of the string, this is replaced by the // contents of the string. -INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { +INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str, + upb_dstate *s) { upb_string_recycle(str); - upb_strlen_t len = upb_valu_getint32(*val); - if (*len - *ptr >= len) { + uint32_t strlen = upb_value_getint32(*val); + if (s->len >= strlen) { // Common (fast) case. - upb_string_substr(*str, d->buf, *ptr - upb_string_getrobuf(d->buf), len); - *ptr += len; + upb_string_substr(*str, d->buf, s->ptr - upb_string_getrobuf(d->buf), strlen); + upb_dstate_advance(s, strlen); } else { - if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, ptr, len)) + if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) return false; } return true; @@ -204,21 +252,22 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) extern upb_wire_type_t upb_expected_wire_types[]; // Returns true if wt is the correct on-the-wire type for ft. -INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { +INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { // This doesn't currently support packed arrays. - return upb_types[ft].expected_wire_type == wt; + return upb_types[ft].native_wire_type == wt; } -static upb_flow_t upb_push(upb_decoder *d, upb_fielddef *f, - upb_strlen_t submsg_len, upb_field_type_t type) { +static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, + upb_strlen_t submsg_len, upb_fieldtype_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR, "Nesting too deep."); + upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); return UPB_ERROR; } - d->top->end_offset = type == UPB_TYPE(GROUP) ? - UPB_GROUP_END_OFFSET : d->completed_offset + submsg_len; + d->top->end_offset = (type == UPB_TYPE(GROUP)) ? + UPB_GROUP_END_OFFSET : + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + submsg_len; d->top->msgdef = upb_downcast_msgdef(f->def); return upb_dispatch_startsubmsg(&d->dispatcher, f); } @@ -229,15 +278,11 @@ static upb_flow_t upb_pop(upb_decoder *d) { } void upb_decoder_run(upb_src *src, upb_status *status) { - // We use stack variables for our frequently used vars so the compiler knows - // they can't be changed by external code (like when we dispatch a callback). - - // Our current position in the data buffer. - uint8_t *ptr = NULL; - // Number of bytes available at ptr, until either end-of-buf or - // end-of-submessage (whichever is smaller). - size_t len = 0; - + upb_decoder *d = (upb_decoder*)src; + // We put our dstate on the stack so the compiler knows they can't be changed + // by external code (like when we dispatch a callback). We must be sure not + // to let its address escape this source file. + upb_dstate state = {NULL, 0, d->top->msgdef}; upb_string *str = NULL; // TODO: handle UPB_SKIPSUBMSG @@ -250,14 +295,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &ptr, &len, &tag)); + CHECK(upb_decode_tag(d, &state, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. upb_value val; switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: - if(d->top->end_offset != UPB_GROUP_END_OFFSET) + if(d->top->end_offset != UPB_GROUP_END_OFFSET) { upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } @@ -266,21 +311,21 @@ void upb_decoder_run(upb_src *src, upb_status *status) { case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. - CHECK(upb_decode_varint(d, &ptr, &len, &val)); + CHECK(upb_decode_varint(d, &state, &val)); break; case UPB_WIRE_TYPE_32BIT: case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_fixed(d, tag.wire_type, &ptr, &len, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &state, &val)); break; } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, tag.field_number); + upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, tag.field_number); if (!f) { if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) - CHECK(upb_decode_string(d, &val, &str)); - CHECK_FLOW(upb_dispatch_unknownval(d, tag.field_number, val)); + CHECK(upb_decode_string(d, &val, &str, &state)); + CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { // TODO: put more details in this error msg. upb_seterr(status, UPB_ERROR, "Field had incorrect type."); @@ -298,11 +343,11 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + CHECK_FLOW(upb_push(d, &state, f, upb_value_getint32(val), f->type)); continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - CHECK(upb_decode_string(d, &val, &str)); + CHECK(upb_decode_string(d, &val, &str, &state)); break; case UPB_TYPE(SINT32): upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); @@ -313,7 +358,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { default: break; // Other types need no further processing at this point. } - CHECK_FLOW(upb_dispatch_value(d->sink, f, val, status)); + CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val)); } CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); @@ -329,7 +374,7 @@ void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { upb_decoder *d = (upb_decoder*)src; upb_dispatcher_reset(&d->dispatcher, handlers); d->top = d->stack; - d->completed_offset = 0; + d->buf_stream_offset = 0; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it // indefinitely), so we treat it like a group. -- cgit v1.2.3 From 2c24cbb108bbda296f01e7628028b1dcb2b9516b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 25 Jan 2011 10:07:47 -0800 Subject: More work on decoder and stdio bytesrc/bytesink. --- Makefile | 6 ++--- core/upb.c | 17 +++++-------- core/upb_stream.h | 16 +++++++----- stream/upb_decoder.c | 14 +++++++--- stream/upb_stdio.c | 66 +++++++++++++++++++++++------------------------- stream/upb_textprinter.c | 1 - stream/upb_textprinter.h | 3 +-- 7 files changed, 63 insertions(+), 60 deletions(-) (limited to 'stream') diff --git a/Makefile b/Makefile index 46cb836..1dfd79d 100644 --- a/Makefile +++ b/Makefile @@ -63,10 +63,10 @@ SRC=core/upb.c \ descriptor/descriptor.c \ core/upb_def.c \ stream/upb_decoder.c \ + stream/upb_stdio.c \ + stream/upb_textprinter.c # core/upb_msg.c \ -# stream/upb_stdio.c \ # stream/upb_strstream.c \ -# stream/upb_textprinter.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -114,7 +114,7 @@ TESTS=tests/test_string \ tests/test_table \ tests/test_def \ tests/test_stream \ -# tests/test_decoder \ + tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ # tests/test.proto.pb diff --git a/core/upb.c b/core/upb.c index ff2d47e..525c8a8 100644 --- a/core/upb.c +++ b/core/upb.c @@ -41,16 +41,13 @@ const upb_type_info upb_types[] = { }; void upb_seterr(upb_status *status, enum upb_status_code code, - const char *msg, ...) -{ - if(upb_ok(status)) { // The first error is the most interesting. - status->code = code; - upb_string_recycle(&status->str); - va_list args; - va_start(args, msg); - upb_string_vprintf(status->str, msg, args); - va_end(args); - } + const char *msg, ...) { + status->code = code; + upb_string_recycle(&status->str); + va_list args; + va_start(args, msg); + upb_string_vprintf(status->str, msg, args); + va_end(args); } void upb_copyerr(upb_status *to, upb_status *from) diff --git a/core/upb_stream.h b/core/upb_stream.h index bf312a8..d0045cc 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -178,12 +178,16 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -// Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing -// existing string data (which avoids a copy). On the other hand, if -// the data was *not* already in an existing string, this copies it into -// a upb_string, and if the data needs to be put in a specific range of -// memory (because eg. you need to put it into a different kind of string -// object) then upb_bytesrc_get() could be better. +// Like upb_bytesrc_read(), but modifies "str" in-place. "str" MUST be newly +// created or just recycled. Returns "false" if no data was returned, either +// due to error or EOF (check status for details). +// +// In comparison to upb_bytesrc_read(), this call can possibly alias existing +// string data (which avoids a copy). On the other hand, if the data was *not* +// already in an existing string, this copies it into a upb_string, and if the +// data needs to be put in a specific range of memory (because eg. you need to +// put it into a different kind of string object) then upb_bytesrc_get() could +// be better. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, upb_status *status); diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index b4b32ff..e60915f 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -126,6 +126,7 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, } else { // End-of-buffer. if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + upb_string_recycle(&d->buf); if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; s->ptr = upb_string_getrobuf(d->buf); } @@ -295,7 +296,15 @@ void upb_decoder_run(upb_src *src, upb_status *status) { while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &state, &tag)); + if (!upb_decode_tag(d, &state, &tag)) { + if (status->code == UPB_EOF && d->top == d->stack) { + // Normal end-of-file. + CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); + return; + } else { + goto err; + } + } // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. @@ -361,9 +370,6 @@ void upb_decoder_run(upb_src *src, upb_status *status) { CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val)); } - CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); - return; - err: if (upb_ok(status)) { upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 820399b..7923664 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -23,44 +23,42 @@ void upb_stdio_reset(upb_stdio *stdio, FILE* file) { stdio->file = file; } -static bool upb_stdio_read(upb_stdio *stdio, upb_string *str, - int offset, size_t bytes_to_read) { - char *buf = upb_string_getrwbuf(str, offset + bytes_to_read) + offset; - size_t read = fread(buf, 1, bytes_to_read, stdio->file); - if(read < bytes_to_read) { +static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, + upb_strlen_t count, upb_status *status) { + upb_stdio *stdio = (upb_stdio*)src; + assert(count > 0); + size_t read = fread(buf, 1, count, stdio->file); + if(read < (size_t)count) { // Error or EOF. - stdio->bytesrc.eof = feof(stdio->file); - if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesrc.status, UPB_STATUS_ERROR, - "Error reading from stdio stream."); - return false; + if(feof(stdio->file)) { + upb_seterr(status, UPB_EOF, ""); + return read; + } else if(ferror(stdio->file)) { + upb_seterr(status, UPB_ERROR, "Error reading from stdio stream."); + return -1; } - // Resize to actual read size. - upb_string_getrwbuf(str, offset + read); } - return true; + return read; } -bool upb_stdio_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen) { - // We ignore "minlen" since the stdio interfaces always return a full read - // unless they are at EOF. - (void)minlen; - return upb_stdio_read((upb_stdio*)src, str, 0, BLOCK_SIZE); -} - -bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { - return upb_stdio_read((upb_stdio*)src, str, upb_string_len(str), len); +static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + upb_strlen_t read = upb_stdio_read( + src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status); + if (read <= 0) return false; + upb_string_getrwbuf(str, read); + return true; } int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_strlen_t len = upb_string_len(str); - size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); + upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { // Error or EOF. stdio->bytesink.eof = feof(stdio->file); if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesink.status, UPB_STATUS_ERROR, + upb_seterr(&stdio->bytesink.status, UPB_ERROR, "Error writing to stdio stream."); return 0; } @@ -68,19 +66,19 @@ int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { return written; } -static upb_bytesrc_vtable upb_stdio_bytesrc_vtbl = { - (upb_bytesrc_get_fptr)upb_stdio_get, - (upb_bytesrc_append_fptr)upb_stdio_append, -}; +upb_stdio *upb_stdio_new() { + static upb_bytesrc_vtbl bytesrc_vtbl = { + upb_stdio_read, + upb_stdio_getstr, + }; -static upb_bytesink_vtable upb_stdio_bytesink_vtbl = { - upb_stdio_put -}; + //static upb_bytesink_vtbl bytesink_vtbl = { + // upb_stdio_put + //}; -upb_stdio *upb_stdio_new() { upb_stdio *stdio = malloc(sizeof(*stdio)); - upb_bytesrc_init(&stdio->bytesrc, &upb_stdio_bytesrc_vtbl); - upb_bytesink_init(&stdio->bytesink, &upb_stdio_bytesink_vtbl); + upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); + //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); return stdio; } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 2d2e237..3a77ab1 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -12,7 +12,6 @@ #include "upb_string.h" struct _upb_textprinter { - upb_sink sink; upb_bytesink *bytesink; upb_string *str; int indent_depth; diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h index 7e35412..b40d9fa 100644 --- a/stream/upb_textprinter.h +++ b/stream/upb_textprinter.h @@ -20,8 +20,7 @@ upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, bool single_line); - -upb_sink *upb_textprinter_sink(upb_textprinter *p); +void upb_textprinter_sethandlers(upb_textprinter *p, upb_handlers *h); #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3 From 5af1ade5435807da065197c0c558947b34628d58 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 28 Jan 2011 10:11:25 -0800 Subject: More work on textprinter. --- stream/upb_textprinter.c | 143 +++++++++++++++++++++-------------------------- 1 file changed, 64 insertions(+), 79 deletions(-) (limited to 'stream') diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 3a77ab1..2209173 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -13,119 +13,104 @@ struct _upb_textprinter { upb_bytesink *bytesink; - upb_string *str; int indent_depth; bool single_line; upb_fielddef *f; }; -static void upb_textprinter_endfield(upb_textprinter *p) +static void upb_textprinter_indent(upb_textprinter *p) { + if(!p->single_line) + for(int i = 0; i < p->indent_depth; i++) + upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); +} + +static void upb_textprinter_endfield(upb_textprinter *p) { if(p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); else upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); } -static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { - upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); - upb_enumdef *enum_def; - upb_string *enum_label; - if(p->f->type == UPB_TYPE(ENUM) && - (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && - (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { - // This is an enum value for which we found a corresponding string. - upb_bytesink_put(p->bytesink, enum_label); - } else { - p->str = upb_string_tryrecycle(p->str); -#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; - switch(p->f->type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); +static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, + upb_value val) { + upb_textprinter *p = _p; + upb_textprinter_indent(p); + upb_bytesink_printf(p->bytesink, UPB_STRFMT ": ", UPB_STRARG(f->name)); +#define CASE(fmtstr, member) upb_bytesink_printf(p->bytesink, fmtstr, val.member); break; + switch(p->f->type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + CASE("%" PRIu32, uint32); + case UPB_TYPE(ENUM): { + upb_enumdef *enum_def; + upb_string *enum_label; + (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && + (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { + // This is an enum value for which we found a corresponding string. + upb_bytesink_put(p->bytesink, enum_label); + CASE("%" PRIu32, uint32); } - upb_bytesink_put(p->bytesink, p->str); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); + upb_bytesink_put(p->bytesink, str); + upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + break; } upb_textprinter_endfield(p); - return upb_ok(upb_bytesink_status(p->bytesink)); -} - -static bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { - upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); - // TODO: escaping. - upb_bytesink_put(p->bytesink, str); - upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); - upb_textprinter_endfield(p); - return upb_ok(upb_bytesink_status(p->bytesink)); -} - -static void upb_textprinter_indent(upb_textprinter *p) -{ - if(!p->single_line) - for(int i = 0; i < p->indent_depth; i++) - upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); + return UPB_CONTINUE; } -static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) -{ - upb_textprinter_indent(p); - upb_bytesink_put(p->bytesink, f->name); - p->f = f; - return upb_ok(upb_bytesink_status(p->bytesink)); -} - -static bool upb_textprinter_startmsg(upb_textprinter *p) -{ +static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f) { + upb_textprinter *p = _p; + p->indent_depth++; upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); - p->indent_depth++; - return upb_ok(upb_bytesink_status(p->bytesink)); + return UPB_CONTINUE; } -static bool upb_textprinter_endmsg(upb_textprinter *p) +static upb_flow_t upb_textprinter_endsubmsg(void *_p) { + upb_textprinter *p = _p; p->indent_depth--; upb_textprinter_indent(p); upb_bytesink_put(p->bytesink, UPB_STRLIT("}")); upb_textprinter_endfield(p); - return upb_ok(upb_bytesink_status(p->bytesink)); + return UPB_CONTINUE; } -upb_sink_vtable upb_textprinter_vtbl = { - (upb_sink_putdef_fptr)upb_textprinter_putdef, - (upb_sink_putval_fptr)upb_textprinter_putval, - (upb_sink_putstr_fptr)upb_textprinter_putstr, - (upb_sink_startmsg_fptr)upb_textprinter_startmsg, - (upb_sink_endmsg_fptr)upb_textprinter_endmsg, -}; - upb_textprinter *upb_textprinter_new() { + static upb_handlerset handlers = { + NULL, // startmsg + NULL, // endmsg + upb_textprinter_putval, + upb_textprinter_startsubmsg, + upb_textprinter_endsubmsg, + }; upb_textprinter *p = malloc(sizeof(*p)); - upb_sink_init(&p->sink, &upb_textprinter_vtbl); - p->str = NULL; + upb_byte_init(&p->sink, &upb_textprinter_vtbl); return p; } void upb_textprinter_free(upb_textprinter *p) { - upb_string_unref(p->str); free(p); } -- cgit v1.2.3 From d98db7cb567f17a3bb56e2af8499d2e3aef03b3b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 12:07:09 -0800 Subject: Textprinter is compiling again. --- core/upb_stream.h | 22 ++++++--- core/upb_stream_vtbl.h | 21 ++++++--- stream/upb_textprinter.c | 114 ++++++++++++++++++++++++++++------------------- stream/upb_textprinter.h | 4 +- 4 files changed, 103 insertions(+), 58 deletions(-) (limited to 'stream') diff --git a/core/upb_stream.h b/core/upb_stream.h index 09e4025..aa23549 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -245,16 +245,26 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -INLINE bool upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...); +// TODO: Figure out how buffering should be handled. Should the caller buffer +// data and only call these functions when a buffer is full? Seems most +// efficient, but then buffering has to be configured in the caller, which +// could be anything, which makes it hard to have a standard interface for +// controlling buffering. +// +// The downside of having the bytesink buffer is efficiency: the caller is +// making more (virtual) function calls, and the caller can't arrange to have +// a big contiguous buffer. The bytesink can do this, but will have to copy +// to make the data contiguous. + +// Returns the number of bytes written. +INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, + const char *fmt, ...); // Puts the given string, returning true if the operation was successful, otherwise // check "status" for details. Ownership of the string is *not* passed; if // the callee wants a reference he must call upb_string_getref() on it. -INLINE bool upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, - upb_status *status); - -// Returns the current error status for the stream. -INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, + upb_status *status); #include "upb_stream_vtbl.h" diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ef655fd..8e8971f 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -34,8 +34,10 @@ typedef bool (*upb_bytesrc_getstr_fptr)( // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( upb_bytesink *bytesink, void *buf, upb_strlen_t count); -typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( - upb_bytesink *bytesink, upb_string *str); +typedef bool (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str, upb_status *status); +typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( + upb_status *status, const char *fmt, va_list args); // Vtables for the above interfaces. typedef struct { @@ -44,8 +46,9 @@ typedef struct { } upb_bytesrc_vtbl; typedef struct { - upb_bytesink_write_fptr write; - upb_bytesink_putstr_fptr putstr; + upb_bytesink_write_fptr write; + upb_bytesink_putstr_fptr putstr; + upb_bytesink_vprintf_fptr vprintf; } upb_bytesink_vtbl; typedef struct { @@ -140,13 +143,21 @@ INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, } INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { - return sink->vtbl->putstr(sink, str); + return sink->vtbl->putstr(sink, str, status); } INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { return &sink->status; } +INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + upb_strlen_t ret = sink->vtbl->vprintf(status, fmt, args); + va_end(args); + return ret; +} + // upb_handlers struct _upb_handlers { upb_handlerset *set; diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 2209173..7025494 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -15,34 +15,51 @@ struct _upb_textprinter { upb_bytesink *bytesink; int indent_depth; bool single_line; - upb_fielddef *f; + upb_status status; }; -static void upb_textprinter_indent(upb_textprinter *p) +#define CHECK(x) if ((x) < 0) goto err; + +static int upb_textprinter_indent(upb_textprinter *p) { if(!p->single_line) for(int i = 0; i < p->indent_depth; i++) - upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + return 0; +err: + return -1; } -static void upb_textprinter_endfield(upb_textprinter *p) { - if(p->single_line) - upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); - else - upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); +static int upb_textprinter_startfield(upb_textprinter *p, upb_fielddef *f) { + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); + return 0; +err: + return -1; +} + +static int upb_textprinter_endfield(upb_textprinter *p) { + if(p->single_line) { + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + } else { + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status)); + } + return 0; +err: + return -1; } static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, upb_value val) { upb_textprinter *p = _p; - upb_textprinter_indent(p); - upb_bytesink_printf(p->bytesink, UPB_STRFMT ": ", UPB_STRARG(f->name)); -#define CASE(fmtstr, member) upb_bytesink_printf(p->bytesink, fmtstr, val.member); break; - switch(p->f->type) { + upb_textprinter_startfield(p, f); +#define CASE(fmtstr, member) \ + CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; + switch(f->type) { case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); + CASE("%0.f", double); case UPB_TYPE(FLOAT): - CASE("%0.f", _float) + CASE("%0.f", float) case UPB_TYPE(INT64): case UPB_TYPE(SFIXED64): case UPB_TYPE(SINT64): @@ -50,40 +67,48 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, case UPB_TYPE(UINT64): case UPB_TYPE(FIXED64): CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) case UPB_TYPE(UINT32): case UPB_TYPE(FIXED32): CASE("%" PRIu32, uint32); case UPB_TYPE(ENUM): { - upb_enumdef *enum_def; - upb_string *enum_label; - (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && - (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { - // This is an enum value for which we found a corresponding string. - upb_bytesink_put(p->bytesink, enum_label); - CASE("%" PRIu32, uint32); + upb_enumdef *enum_def = upb_downcast_enumdef(f->def); + upb_string *enum_label = + upb_enumdef_iton(enum_def, upb_value_getint32(val)); + if (enum_label) { + // We found a corresponding string for this enum. Otherwise we fall + // through to the int32 code path. + CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status)); + break; + } } + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) case UPB_TYPE(BOOL): - CASE("%hhu", _bool); + CASE("%hhu", bool); case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); - upb_bytesink_put(p->bytesink, str); - upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + // TODO: escaping. + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(": \""), &p->status)); + CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); break; } upb_textprinter_endfield(p); return UPB_CONTINUE; +err: + return UPB_BREAK; } -static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f) { +static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, + upb_handlers *delegate_to) { + (void)delegate_to; upb_textprinter *p = _p; + upb_textprinter_startfield(p, f); p->indent_depth++; - upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); - if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" {"), &p->status); + if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); return UPB_CONTINUE; } @@ -92,21 +117,13 @@ static upb_flow_t upb_textprinter_endsubmsg(void *_p) upb_textprinter *p = _p; p->indent_depth--; upb_textprinter_indent(p); - upb_bytesink_put(p->bytesink, UPB_STRLIT("}")); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status); upb_textprinter_endfield(p); return UPB_CONTINUE; } upb_textprinter *upb_textprinter_new() { - static upb_handlerset handlers = { - NULL, // startmsg - NULL, // endmsg - upb_textprinter_putval, - upb_textprinter_startsubmsg, - upb_textprinter_endsubmsg, - }; upb_textprinter *p = malloc(sizeof(*p)); - upb_byte_init(&p->sink, &upb_textprinter_vtbl); return p; } @@ -114,11 +131,18 @@ void upb_textprinter_free(upb_textprinter *p) { free(p); } -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line) { +void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, + upb_bytesink *sink, bool single_line) { + static upb_handlerset handlerset = { + NULL, // startmsg + NULL, // endmsg + upb_textprinter_value, + upb_textprinter_startsubmsg, + upb_textprinter_endsubmsg, + }; p->bytesink = sink; p->single_line = single_line; p->indent_depth = 0; + upb_register_handlerset(handlers, &handlerset); + upb_set_handler_closure(handlers, p, &p->status); } - -upb_sink *upb_textprinter_sink(upb_textprinter *p) { return &p->sink; } diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h index b40d9fa..a880626 100644 --- a/stream/upb_textprinter.h +++ b/stream/upb_textprinter.h @@ -18,8 +18,8 @@ typedef struct _upb_textprinter upb_textprinter; upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line); +void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, + upb_bytesink *sink, bool single_line); void upb_textprinter_sethandlers(upb_textprinter *p, upb_handlers *h); #ifdef __cplusplus -- cgit v1.2.3 From 93099cccd1e6428d6be45553c7dd7746bbd65e93 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 20:16:34 -0800 Subject: upb_strstream compiles again. That covers all source files except upb_msg! --- Makefile | 4 ++-- stream/upb_strstream.c | 50 +++++++++++++++++++++++++++++--------------------- 2 files changed, 31 insertions(+), 23 deletions(-) (limited to 'stream') diff --git a/Makefile b/Makefile index 1dfd79d..26e036e 100644 --- a/Makefile +++ b/Makefile @@ -64,9 +64,9 @@ SRC=core/upb.c \ core/upb_def.c \ stream/upb_decoder.c \ stream/upb_stdio.c \ - stream/upb_textprinter.c + stream/upb_textprinter.c \ + stream/upb_strstream.c \ # core/upb_msg.c \ -# stream/upb_strstream.c \ $(SRC): perf-cppflags # Parts of core that are yet to be converted. diff --git a/stream/upb_strstream.c b/stream/upb_strstream.c index 7ed761b..d3fd4e0 100644 --- a/stream/upb_strstream.c +++ b/stream/upb_strstream.c @@ -12,6 +12,7 @@ struct upb_stringsrc { upb_bytesrc bytesrc; upb_string *str; + upb_strlen_t offset; }; void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { @@ -27,34 +28,41 @@ void upb_stringsrc_free(upb_stringsrc *s) { free(s); } -static bool upb_stringsrc_get(upb_stringsrc *src, upb_string *str, - upb_strlen_t minlen) { - // We ignore "minlen" since we always return the entire string. - (void)minlen; - upb_string_substr(str, src->str, 0, upb_string_len(src->str)); - src->bytesrc.eof = true; - return true; +static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf, + upb_strlen_t count, upb_status *status) { + upb_stringsrc *src = (upb_stringsrc*)_src; + if (src->offset == upb_string_len(src->str)) { + upb_seterr(status, UPB_EOF, ""); + return -1; + } else { + upb_strlen_t to_read = UPB_MIN(count, upb_string_len(src->str) - src->offset); + memcpy(buf, upb_string_getrobuf(src->str) + src->offset, to_read); + src->offset += to_read; + return to_read; + } } -static bool upb_stringsrc_append(upb_stringsrc *src, upb_string *str, - upb_strlen_t len) { - // Unimplemented; since we return the string via "get" all in one go, - // this method probably isn't very useful. - (void)src; - (void)str; - (void)len; - return false; +static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str, + upb_status *status) { + upb_stringsrc *src = (upb_stringsrc*)_src; + if (src->offset == upb_string_len(str)) { + upb_seterr(status, UPB_EOF, ""); + return false; + } else { + upb_string_substr(str, src->str, 0, upb_string_len(src->str)); + return true; + } } -static upb_bytesrc_vtable upb_stringsrc_vtbl = { - (upb_bytesrc_get_fptr)upb_stringsrc_get, - (upb_bytesrc_append_fptr)upb_stringsrc_append, -}; - upb_stringsrc *upb_stringsrc_new() { + static upb_bytesrc_vtbl bytesrc_vtbl = { + upb_stringsrc_read, + upb_stringsrc_getstr, + }; + upb_stringsrc *s = malloc(sizeof(*s)); s->str = NULL; - upb_bytesrc_init(&s->bytesrc, &upb_stringsrc_vtbl); + upb_bytesrc_init(&s->bytesrc, &bytesrc_vtbl); return s; } -- cgit v1.2.3 From 02a8cdfff29d6a17836847490a06dfe535855d52 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 23:22:33 -0800 Subject: Fixes to decoder, stdio, textprinter. --- core/upb_stream_vtbl.h | 6 ++--- stream/upb_decoder.c | 68 ++++++++++++++++++++++++++++++++---------------- stream/upb_stdio.c | 38 ++++++++++++++++++--------- stream/upb_textprinter.c | 4 +-- tests/test_decoder.c | 10 +++++-- 5 files changed, 85 insertions(+), 41 deletions(-) (limited to 'stream') diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 8e8971f..a6990bc 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -34,10 +34,10 @@ typedef bool (*upb_bytesrc_getstr_fptr)( // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( upb_bytesink *bytesink, void *buf, upb_strlen_t count); -typedef bool (*upb_bytesink_putstr_fptr)( +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( upb_bytesink *bytesink, upb_string *str, upb_status *status); typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( - upb_status *status, const char *fmt, va_list args); + upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); // Vtables for the above interfaces. typedef struct { @@ -153,7 +153,7 @@ INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { va_list args; va_start(args, fmt); - upb_strlen_t ret = sink->vtbl->vprintf(status, fmt, args); + upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); va_end(args); return ret; } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index e60915f..a7a2c76 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -18,22 +18,24 @@ // possibilities for optimization/experimentation here. INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *status) { + const char *p = *ptr; uint32_t low, high = 0; uint32_t b; - b = *(*ptr++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 28; + high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; done: + *ptr = p; *val = ((uint64_t)high << 32) | low; return true; } @@ -50,7 +52,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - ssize_t end_offset; // For groups, 0. + size_t end_offset; // For groups, 0. } upb_decoder_frame; struct upb_decoder { @@ -73,7 +75,7 @@ struct upb_decoder { upb_string *buf; // The offset within the overall stream represented by the *beginning* of buf. - upb_strlen_t buf_stream_offset; + size_t buf_stream_offset; }; typedef struct { @@ -98,7 +100,7 @@ static upb_flow_t upb_pop(upb_decoder *d); // Constant used to signal that the submessage is a group and therefore we // don't know its end offset. This cannot be the offset of a real submessage // end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET -1 +#define UPB_GROUP_END_OFFSET 0 #define UPB_MAX_VARINT_ENCODED_SIZE 10 // Called only from the slow path, this function copies the next "len" bytes @@ -132,12 +134,12 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, } // Wait for end-of-submessage or end-of-buffer, whichever comes first. - ssize_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); - ssize_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; - ssize_t submsg_remaining = + size_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); + size_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; + size_t submsg_remaining = d->top->end_offset - d->buf_stream_offset - offset_in_buf; if (d->top->end_offset == UPB_GROUP_END_OFFSET || - buf_remaining > submsg_remaining) { + buf_remaining < submsg_remaining) { s->len = buf_remaining; } else { // Check that non of our subtraction overflowed. @@ -165,13 +167,16 @@ static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s, upb_seterr(d->status, UPB_ERROR, "Varint was unterminated after 10 bytes.\n"); return false; + } else if (d->status->code == UPB_EOF && bitpos == 0) { + // Regular EOF. + return false; } else if (d->status->code == UPB_EOF && (byte & 0x80)) { upb_seterr(d->status, UPB_ERROR, "Provided data ended in the middle of a varint.\n"); return false; } else { // Success. - upb_value_setint64(val, val64); + upb_value_setraw(val, val64); return true; } } @@ -210,7 +215,7 @@ INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) { const char *p = s->ptr; if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; upb_dstate_advance(s, p - s->ptr); - upb_value_setint64(val, val64); + upb_value_setraw(val, val64); return true; } else { return upb_decode_varint_slow(d, s, val); @@ -245,6 +250,7 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str, if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) return false; } + upb_value_setstr(val, *str); return true; } @@ -259,7 +265,7 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { } static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, - upb_strlen_t submsg_len, upb_fieldtype_t type) { + upb_value submsg_len, upb_fieldtype_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { @@ -268,7 +274,7 @@ static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, } d->top->end_offset = (type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + submsg_len; + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + upb_value_getint32(submsg_len); d->top->msgdef = upb_downcast_msgdef(f->def); return upb_dispatch_startsubmsg(&d->dispatcher, f); } @@ -280,6 +286,7 @@ static upb_flow_t upb_pop(upb_decoder *d) { void upb_decoder_run(upb_src *src, upb_status *status) { upb_decoder *d = (upb_decoder*)src; + d->status = status; // We put our dstate on the stack so the compiler knows they can't be changed // by external code (like when we dispatch a callback). We must be sure not // to let its address escape this source file. @@ -299,9 +306,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { if (!upb_decode_tag(d, &state, &tag)) { if (status->code == UPB_EOF && d->top == d->stack) { // Normal end-of-file. + upb_clearerr(status); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); return; } else { + if (status->code == UPB_EOF) { + upb_seterr(status, UPB_ERROR, + "Input ended in the middle of a submessage."); + } goto err; } } @@ -352,7 +364,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, &state, f, upb_value_getint32(val), f->type)); + CHECK_FLOW(upb_push(d, &state, f, val, f->type)); continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): @@ -397,9 +409,21 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) { upb_dispatcher_init(&d->dispatcher); d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; + d->buf = NULL; return d; } +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { + d->bytesrc = bytesrc; + d->top = &d->stack[0]; + d->top->msgdef = d->toplevel_msgdef; + d->top->end_offset = SIZE_MAX; // never want to end top-level message. + upb_string_unref(d->buf); + d->buf = NULL; +} + void upb_decoder_free(upb_decoder *d) { free(d); } + +upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; } diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 7923664..8857677 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -23,6 +23,9 @@ void upb_stdio_reset(upb_stdio *stdio, FILE* file) { stdio->file = file; } + +/* upb_bytesrc methods ********************************************************/ + static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status) { upb_stdio *stdio = (upb_stdio*)src; @@ -50,18 +53,27 @@ static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, return true; } -int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { + +/* upb_bytesink methods *******************************************************/ + +upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_strlen_t len = upb_string_len(str); upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { - // Error or EOF. - stdio->bytesink.eof = feof(stdio->file); - if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesink.status, UPB_ERROR, - "Error writing to stdio stream."); - return 0; - } + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; + } + return written; +} + +upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, + const char *fmt, va_list args) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); + upb_strlen_t written = vfprintf(stdio->file, fmt, args); + if (written < 0) { + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; } return written; } @@ -72,13 +84,15 @@ upb_stdio *upb_stdio_new() { upb_stdio_getstr, }; - //static upb_bytesink_vtbl bytesink_vtbl = { - // upb_stdio_put - //}; + static upb_bytesink_vtbl bytesink_vtbl = { + NULL, + upb_stdio_putstr, + upb_stdio_vprintf + }; upb_stdio *stdio = malloc(sizeof(*stdio)); upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); - //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); + upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); return stdio; } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 7025494..531da12 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -90,7 +90,7 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): // TODO: escaping. - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(": \""), &p->status)); + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); break; @@ -107,7 +107,7 @@ static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, upb_textprinter *p = _p; upb_textprinter_startfield(p, f); p->indent_depth++; - upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" {"), &p->status); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT("{"), &p->status); if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); return UPB_CONTINUE; } diff --git a/tests/test_decoder.c b/tests/test_decoder.c index 0e6f19c..ed5a77e 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -16,13 +16,19 @@ int main() { upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds)); upb_decoder_reset(d, upb_stdio_bytesrc(in)); upb_textprinter *p = upb_textprinter_new(); - upb_textprinter_reset(p, upb_stdio_bytesink(out), false); + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false); + upb_src *src = upb_decoder_src(d); + upb_src_sethandlers(src, &handlers); upb_status status = UPB_STATUS_INIT; - upb_streamdata(upb_decoder_src(d), upb_textprinter_sink(p), &status); + upb_src_run(src, &status); + upb_printerr(&status); assert(upb_ok(&status)); + upb_stdio_free(in); upb_stdio_free(out); upb_decoder_free(d); -- cgit v1.2.3 From 9aa7e559d634a3ecf087ee376f82704e2290f478 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 30 Jan 2011 16:28:37 -0800 Subject: Fixes to decoder and textprinter: it works (for some input)! A protobuf -> text stream for descriptor.proto now outputs the same text as proto2. --- stream/upb_decoder.c | 98 ++++++++++++++++++++++++------------------------ stream/upb_textprinter.c | 19 ++++------ tests/test_decoder.c | 8 +++- 3 files changed, 62 insertions(+), 63 deletions(-) (limited to 'stream') diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index a7a2c76..3a279a1 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -51,7 +51,6 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } // upb_decoder_frame is one frame of that stack. typedef struct { upb_msgdef *msgdef; - upb_fielddef *field; size_t end_offset; // For groups, 0. } upb_decoder_frame; @@ -82,29 +81,37 @@ typedef struct { // Our current position in the data buffer. const char *ptr; - // Number of bytes available at ptr, until either end-of-buf or - // end-of-submessage (whichever is smaller). + // End of this submessage, relative to *ptr. + const char *submsg_end; + + // Number of bytes available at ptr. size_t len; // Msgdef for the current level. upb_msgdef *msgdef; } upb_dstate; +// Constant used to signal that the submessage is a group and therefore we +// don't know its end offset. This cannot be the offset of a real submessage +// end because it takes at least one byte to begin a submessage. +#define UPB_GROUP_END_OFFSET 0 +#define UPB_MAX_VARINT_ENCODED_SIZE 10 + INLINE void upb_dstate_advance(upb_dstate *s, size_t len) { s->ptr += len; s->len -= len; } -static upb_flow_t upb_pop(upb_decoder *d); +INLINE void upb_dstate_setmsgend(upb_decoder *d, upb_dstate *s) { + s->submsg_end = (d->top->end_offset == UPB_GROUP_END_OFFSET) ? + (void*)UINTPTR_MAX : + upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); +} -// Constant used to signal that the submessage is a group and therefore we -// don't know its end offset. This cannot be the offset of a real submessage -// end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET 0 -#define UPB_MAX_VARINT_ENCODED_SIZE 10 +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s); // Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting "buf" and "len" appropriately. +// from the stream to "data", adjusting the dstate appropriately. static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, upb_dstate *s) { while (1) { @@ -112,41 +119,17 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, memcpy(data, s->ptr, to_copy); upb_dstate_advance(s, to_copy); bytes_wanted -= to_copy; - if (bytes_wanted == 0) return true; - - // Did "len" indicate end-of-submessage or end-of-buffer? - ssize_t buf_offset = - d->buf ? ((const char*)s->ptr - upb_string_getrobuf(d->buf)) : 0; - if (d->top->end_offset > 0 && - d->top->end_offset == d->buf_stream_offset + buf_offset) { - // End-of-submessage. - if (bytes_wanted > 0) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); - return false; - } - if (upb_pop(d) != UPB_CONTINUE) return false; - } else { - // End-of-buffer. - if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); - upb_string_recycle(&d->buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - s->ptr = upb_string_getrobuf(d->buf); + if (bytes_wanted == 0) { + upb_dstate_setmsgend(d, s); + return true; } - // Wait for end-of-submessage or end-of-buffer, whichever comes first. - size_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); - size_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; - size_t submsg_remaining = - d->top->end_offset - d->buf_stream_offset - offset_in_buf; - if (d->top->end_offset == UPB_GROUP_END_OFFSET || - buf_remaining < submsg_remaining) { - s->len = buf_remaining; - } else { - // Check that non of our subtraction overflowed. - assert(d->top->end_offset > d->buf_stream_offset); - assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); - s->len = submsg_remaining; - } + // Get next buffer. + if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + upb_string_recycle(&d->buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + s->ptr = upb_string_getrobuf(d->buf); + s->len = upb_string_len(d->buf); } } @@ -266,7 +249,6 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, upb_value submsg_len, upb_fieldtype_t type) { - d->top->field = f; d->top++; if(d->top >= d->limit) { upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); @@ -274,13 +256,16 @@ static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, } d->top->end_offset = (type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + upb_value_getint32(submsg_len); + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + + upb_value_getint32(submsg_len); d->top->msgdef = upb_downcast_msgdef(f->def); + upb_dstate_setmsgend(d, s); return upb_dispatch_startsubmsg(&d->dispatcher, f); } -static upb_flow_t upb_pop(upb_decoder *d) { +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s) { d->top--; + upb_dstate_setmsgend(d, s); return upb_dispatch_endsubmsg(&d->dispatcher); } @@ -290,7 +275,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // We put our dstate on the stack so the compiler knows they can't be changed // by external code (like when we dispatch a callback). We must be sure not // to let its address escape this source file. - upb_dstate state = {NULL, 0, d->top->msgdef}; + upb_dstate state = {NULL, (void*)0x1, 0, d->top->msgdef}; upb_string *str = NULL; // TODO: handle UPB_SKIPSUBMSG @@ -301,6 +286,15 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // Main loop: executed once per tag/field pair. while(1) { + // Check for end-of-submessage. + while (state.ptr >= state.submsg_end) { + if (state.ptr > state.submsg_end) { + upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); + goto err; + } + CHECK_FLOW(upb_pop(d, &state)); + } + // Parse/handle tag. upb_tag tag; if (!upb_decode_tag(d, &state, &tag)) { @@ -308,6 +302,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // Normal end-of-file. upb_clearerr(status); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); + upb_string_unref(str); return; } else { if (status->code == UPB_EOF) { @@ -322,12 +317,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // since most types will read a varint here. upb_value val; switch (tag.wire_type) { + case UPB_WIRE_TYPE_START_GROUP: + break; // Nothing to do now, below we will push appropriately. case UPB_WIRE_TYPE_END_GROUP: if(d->top->end_offset != UPB_GROUP_END_OFFSET) { upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - CHECK_FLOW(upb_pop(d)); + CHECK_FLOW(upb_pop(d, &state)); continue; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: @@ -383,6 +380,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { } err: + upb_string_unref(str); if (upb_ok(status)) { upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); } @@ -417,12 +415,14 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { d->bytesrc = bytesrc; d->top = &d->stack[0]; d->top->msgdef = d->toplevel_msgdef; - d->top->end_offset = SIZE_MAX; // never want to end top-level message. + // Never want to end top-level message, so treat it like a group. + d->top->end_offset = UPB_GROUP_END_OFFSET; upb_string_unref(d->buf); d->buf = NULL; } void upb_decoder_free(upb_decoder *d) { + upb_string_unref(d->buf); free(d); } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 531da12..894a1ea 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -30,14 +30,6 @@ err: return -1; } -static int upb_textprinter_startfield(upb_textprinter *p, upb_fielddef *f) { - upb_textprinter_indent(p); - CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); - return 0; -err: - return -1; -} - static int upb_textprinter_endfield(upb_textprinter *p) { if(p->single_line) { CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); @@ -52,7 +44,8 @@ err: static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, upb_value val) { upb_textprinter *p = _p; - upb_textprinter_startfield(p, f); + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); #define CASE(fmtstr, member) \ CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; switch(f->type) { @@ -105,11 +98,13 @@ static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, upb_handlers *delegate_to) { (void)delegate_to; upb_textprinter *p = _p; - upb_textprinter_startfield(p, f); - p->indent_depth++; - upb_bytesink_putstr(p->bytesink, UPB_STRLIT("{"), &p->status); + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT " {", UPB_STRARG(f->name))); if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); + p->indent_depth++; return UPB_CONTINUE; +err: + return UPB_BREAK; } static upb_flow_t upb_textprinter_endsubmsg(void *_p) diff --git a/tests/test_decoder.c b/tests/test_decoder.c index ed5a77e..f48472d 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -25,14 +25,18 @@ int main() { upb_status status = UPB_STATUS_INIT; upb_src_run(src, &status); - upb_printerr(&status); assert(upb_ok(&status)); - + upb_status_uninit(&status); upb_stdio_free(in); upb_stdio_free(out); upb_decoder_free(d); upb_textprinter_free(p); upb_def_unref(fds); upb_symtab_unref(symtab); + + // Prevent C library from holding buffers open, so Valgrind doesn't see + // memory leaks. + fclose(stdin); + fclose(stdout); } -- cgit v1.2.3