From 1aafd4111b9b6d08d2d0937b0f396a4caa9ea04d Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Sat, 8 Jul 2017 00:00:05 -0700 Subject: A good start on upb_encode and upb_decode. --- upb/decode.c | 247 ++++++++++++++++++++++++++ upb/decode.h | 17 ++ upb/encode.c | 512 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ upb/encode.h | 17 ++ upb/msg.c | 10 +- upb/msg.h | 1 + upb/structs.int.h | 18 ++ upb/upb.h | 3 + 8 files changed, 816 insertions(+), 9 deletions(-) create mode 100644 upb/decode.c create mode 100644 upb/decode.h create mode 100644 upb/encode.c create mode 100644 upb/encode.h create mode 100644 upb/structs.int.h (limited to 'upb') diff --git a/upb/decode.c b/upb/decode.c new file mode 100644 index 0000000..3b2ea54 --- /dev/null +++ b/upb/decode.c @@ -0,0 +1,247 @@ + +#include "upb/decode.h" + +typedef enum { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5 +} upb_wiretype_t; + +static void upb_decode_seterr(upb_env *env, const char *msg) { + upb_status status = UPB_STATUS_INIT; + upb_status_seterrmsg(&status, msg); + upb_env_reporterror(env, &status); +} + +static bool upb_decode_varint(const char **ptr, const char *limit, + uint64_t *val) { + uint8_t byte = 0x80; + int bitpos = 0; + const char *p = *ptr; + *val = 0; + + while (byte & 0x80) { + if (bitpos == 70 || p == limit) { + return false; + } + + byte = *p; + *val |= (uint64_t)(byte & 0x7F) << bitpos; + p++; + bitpos += 7; + } + + *ptr = p; + return true; +} + +static bool upb_decode_varint32(const char **ptr, const char *limit, + uint32_t *val) { + uint64_t u64; + if (!upb_decode_varint(ptr, limit, &u64) || u64 > UINT32_MAX) { + return false; + } else { + *val = u64; + return true; + } +} + +static const upb_msglayout_fieldinit_v1 *upb_find_field( + const upb_msglayout_msginit_v1 *l, uint32_t field_number) { + /* Lots of optimization opportunities here. */ + int i; + for (i = 0; i < l->field_count; i++) { + if (l->fields[i].number == field_number) { + return &l->fields[i]; + } + } + + return NULL; /* Unknown field. */ +} + +static bool upb_decode_64bit(const char **ptr, const char *limit, + uint64_t *val) { + if (limit - *ptr < 8) { + return false; + } else { + memcpy(val, *ptr, 8); + *ptr += 8; + return true; + } +} + +static bool upb_decode_32bit(const char **ptr, const char *limit, + uint32_t *val) { + if (limit - *ptr < 4) { + return false; + } else { + memcpy(val, *ptr, 4); + *ptr += 4; + return true; + } +} + +static int32_t upb_zzdec_32(uint32_t n) { + return (n >> 1) ^ -(int32_t)(n & 1); +} + +static int64_t upb_zzdec_64(uint64_t n) { + return (n >> 1) ^ -(int64_t)(n & 1); +} + +static bool upb_decode_string(const char **ptr, const char *limit, + upb_stringview *val) { + uint32_t len; + + if (!upb_decode_varint32(ptr, limit, &len) || + limit - *ptr < len) { + return false; + } + + *val = upb_stringview_make(*ptr, len); + *ptr += len; + return true; +} + +static void upb_set32(void *msg, size_t ofs, uint32_t val) { + memcpy((char*)msg + ofs, &val, sizeof(val)); +} + +bool upb_append_unknown(const char **ptr, const char *start, const char *limit, + char *msg) { + UPB_UNUSED(limit); + UPB_UNUSED(msg); + *ptr = limit; + return true; +} + +bool upb_decode_field(const char **ptr, const char *limit, char *msg, + const upb_msglayout_msginit_v1 *l, upb_env *env) { + uint32_t tag; + uint32_t wire_type; + uint32_t field_number; + const char *p = *ptr; + const char *field_start = p; + const upb_msglayout_fieldinit_v1 *f; + + if (!upb_decode_varint32(&p, limit, &tag)) { + upb_decode_seterr(env, "Error decoding tag.\n"); + return false; + } + + wire_type = tag & 0x7; + field_number = tag >> 3; + + if (field_number == 0) { + return false; + } + + f = upb_find_field(l, field_number); + + switch (wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint64_t val; + if (!upb_decode_varint(&p, limit, &val)) { + upb_decode_seterr(env, "Error decoding varint value.\n"); + return false; + } + + if (!f) { + return upb_append_unknown(ptr, field_start, p, msg); + } + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + memcpy(msg + f->offset, &val, sizeof(val)); + break; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_ENUM: { + uint32_t val32 = val; + memcpy(msg + f->offset, &val32, sizeof(val32)); + break; + } + case UPB_DESCRIPTOR_TYPE_SINT32: { + int32_t decoded = upb_zzdec_32(val); + memcpy(msg + f->offset, &decoded, sizeof(decoded)); + break; + } + case UPB_DESCRIPTOR_TYPE_SINT64: { + int64_t decoded = upb_zzdec_64(val); + memcpy(msg + f->offset, &decoded, sizeof(decoded)); + break; + } + default: + return upb_append_unknown(ptr, field_start, p, msg); + } + + break; + } + case UPB_WIRE_TYPE_64BIT: { + uint64_t val; + if (!upb_decode_64bit(&p, limit, &val)) { + upb_decode_seterr(env, "Error decoding 64bit value.\n"); + return false; + } + + if (!f) { + return upb_append_unknown(ptr, field_start, p, msg); + } + + break; + } + case UPB_WIRE_TYPE_32BIT: { + uint32_t val; + if (!upb_decode_32bit(&p, limit, &val)) { + upb_decode_seterr(env, "Error decoding 32bit value.\n"); + return false; + } + + if (!f) { + return upb_append_unknown(ptr, field_start, p, msg); + } + + break; + } + case UPB_WIRE_TYPE_DELIMITED: { + upb_stringview val; + if (!upb_decode_string(&p, limit, &val)) { + upb_decode_seterr(env, "Error decoding delimited value.\n"); + return false; + } + + if (!f) { + return upb_append_unknown(ptr, field_start, p, msg); + } + + memcpy(msg + f->offset, &val, sizeof(val)); + break; + } + } + + if (f->oneof_index != UPB_NOT_IN_ONEOF) { + upb_set32(msg, l->oneofs[f->oneof_index].case_offset, f->number); + } + + *ptr = p; + return true; +} + +bool upb_decode(upb_stringview buf, void *msg_void, + const upb_msglayout_msginit_v1 *l, upb_env *env) { + char *msg = msg_void; + const char *ptr = buf.data; + const char *limit = ptr + buf.size; + + while (ptr < limit) { + if (!upb_decode_field(&ptr, limit, msg, l, env)) { + return false; + } + } + + return true; +} diff --git a/upb/decode.h b/upb/decode.h new file mode 100644 index 0000000..2a9e39e --- /dev/null +++ b/upb/decode.h @@ -0,0 +1,17 @@ +/* +** upb_decode: parsing into a upb_msg using a upb_msglayout. +*/ + +#ifndef UPB_DECODE_H_ +#define UPB_DECODE_H_ + +#include "upb/msg.h" + +UPB_BEGIN_EXTERN_C + +bool upb_decode(upb_stringview buf, void *msg, + const upb_msglayout_msginit_v1 *l, upb_env *env); + +UPB_END_EXTERN_C + +#endif /* UPB_DECODE_H_ */ diff --git a/upb/encode.c b/upb/encode.c new file mode 100644 index 0000000..30f2da7 --- /dev/null +++ b/upb/encode.c @@ -0,0 +1,512 @@ + +#include "upb/encode.h" +#include "upb/structs.int.h" + +#define UPB_PB_VARINT_MAX_LEN 10 + +static size_t upb_encode_varint(uint64_t val, char *buf) { + size_t i; + if (val == 0) { buf[0] = 0; return 1; } + i = 0; + while (val) { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } + return i; +} + +static size_t upb_varint_size(uint64_t val) { + char buf[UPB_PB_VARINT_MAX_LEN]; + return upb_encode_varint(val, buf); +} + +static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } +static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } + +typedef enum { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5 +} upb_wiretype_t; + +/* Index is descriptor type. */ +const uint8_t upb_native_wiretypes[] = { + UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ + UPB_WIRE_TYPE_64BIT, /* DOUBLE */ + UPB_WIRE_TYPE_32BIT, /* FLOAT */ + UPB_WIRE_TYPE_VARINT, /* INT64 */ + UPB_WIRE_TYPE_VARINT, /* UINT64 */ + UPB_WIRE_TYPE_VARINT, /* INT32 */ + UPB_WIRE_TYPE_64BIT, /* FIXED64 */ + UPB_WIRE_TYPE_32BIT, /* FIXED32 */ + UPB_WIRE_TYPE_VARINT, /* BOOL */ + UPB_WIRE_TYPE_DELIMITED, /* STRING */ + UPB_WIRE_TYPE_START_GROUP, /* GROUP */ + UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */ + UPB_WIRE_TYPE_DELIMITED, /* BYTES */ + UPB_WIRE_TYPE_VARINT, /* UINT32 */ + UPB_WIRE_TYPE_VARINT, /* ENUM */ + UPB_WIRE_TYPE_32BIT, /* SFIXED32 */ + UPB_WIRE_TYPE_64BIT, /* SFIXED64 */ + UPB_WIRE_TYPE_VARINT, /* SINT32 */ + UPB_WIRE_TYPE_VARINT, /* SINT64 */ +}; + +/* The output buffer is divided into segments; a segment is a string of data + * that is "ready to go" -- it does not need any varint lengths inserted into + * the middle. The seams between segments are where varints will be inserted + * once they are known. + * + * We also use the concept of a "run", which is a range of encoded bytes that + * occur at a single submessage level. Every segment contains one or more runs. + * + * A segment can span messages. Consider: + * + * .--Submessage lengths---------. + * | | | + * | V V + * V | |--------------- | |----------------- + * Submessages: | |----------------------------------------------- + * Top-level msg: ------------------------------------------------------------ + * + * Segments: ----- ------------------- ----------------- + * Runs: *---- *--------------*--- *---------------- + * (* marks the start) + * + * Note that the top-level menssage is not in any segment because it does not + * have any length preceding it. + * + * A segment is only interrupted when another length needs to be inserted. So + * observe how the second segment spans both the inner submessage and part of + * the next enclosing message. */ + +typedef struct { + uint32_t msglen; /* The length to varint-encode before this segment. */ + uint32_t seglen; /* Length of the segment. */ +} upb_segment; + +typedef struct { + upb_env *env; + char *buf, *ptr, *limit; + + /* The beginning of the current run, or undefined if we are at the top + * level. */ + char *runbegin; + + /* The list of segments we are accumulating. */ + upb_segment *segbuf, *segptr, *seglimit; + + /* The stack of enclosing submessages. Each entry in the stack points to the + * segment where this submessage's length is being accumulated. */ + int *stack, *top, *stacklimit; +} upb_encstate; + +static upb_segment *upb_encode_top(upb_encstate *e) { + return &e->segbuf[*e->top]; +} + +static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) { + char *new_buf; + size_t needed = bytes + (e->ptr - e->buf); + size_t old_size = e->limit - e->buf; + + size_t new_size = old_size; + + while (new_size < needed) { + new_size *= 2; + } + + new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size); + + if (new_buf == NULL) { + return false; + } + + e->ptr = new_buf + (e->ptr - e->buf); + e->runbegin = new_buf + (e->runbegin - e->buf); + e->limit = new_buf + new_size; + e->buf = new_buf; + return true; +} + +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ +static bool upb_encode_reserve(upb_encstate *e, size_t bytes) { + if (UPB_LIKELY((size_t)(e->limit - e->ptr) >= bytes)) { + return true; + } + + return upb_encode_growbuffer(e, bytes); +} + +/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have + * previously called reserve() with at least this many bytes. */ +static void upb_encode_advance(upb_encstate *e, size_t bytes) { + UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes); + e->ptr += bytes; +} + +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) { + if (!upb_encode_reserve(e, len)) { + return false; + } + + memcpy(e->ptr, data, len); + upb_encode_advance(e, len); + return true; +} + +/* Finish the current run by adding the run totals to the segment and message + * length. */ +static void upb_encode_accumulate(upb_encstate *e) { + size_t run_len; + UPB_ASSERT(e->ptr >= e->runbegin); + run_len = e->ptr - e->runbegin; + e->segptr->seglen += run_len; + upb_encode_top(e)->msglen += run_len; + e->runbegin = e->ptr; +} + +/* Call to indicate the start of delimited region for which the full length is + * not yet known. The length will be inserted at the current position once it + * is known (and subsequent data moved if necessary). */ +static bool upb_encode_startdelim(upb_encstate *e) { + if (e->top) { + /* We are already buffering, advance to the next segment and push it on the + * stack. */ + upb_encode_accumulate(e); + + if (++e->top == e->stacklimit) { + /* TODO(haberman): grow stack? */ + return false; + } + + if (++e->segptr == e->seglimit) { + /* Grow segment buffer. */ + size_t old_size = + (e->seglimit - e->segbuf) * sizeof(upb_segment); + size_t new_size = old_size * 2; + upb_segment *new_buf = + upb_env_realloc(e->env, e->segbuf, old_size, new_size); + + if (new_buf == NULL) { + return false; + } + + e->segptr = new_buf + (e->segptr - e->segbuf); + e->seglimit = new_buf + (new_size / sizeof(upb_segment)); + e->segbuf = new_buf; + } + } else { + /* We were previously at the top level, start buffering. */ + e->segptr = e->segbuf; + e->top = e->stack; + e->runbegin = e->ptr; + } + + *e->top = e->segptr - e->segbuf; + e->segptr->seglen = 0; + e->segptr->msglen = 0; + + return true; +} + +/* Call to indicate the end of a delimited region. We now know the length of + * the delimited region. If we are not nested inside any other delimited + * regions, we can now emit all of the buffered data we accumulated. */ +static bool upb_encode_enddelim(upb_encstate *e) { + size_t msglen; + upb_encode_accumulate(e); + msglen = upb_encode_top(e)->msglen; + + if (e->top == e->stack) { + /* All lengths are now available, emit all buffered data. */ + char buf[UPB_PB_VARINT_MAX_LEN]; + upb_segment *s; + const char *ptr = e->buf; + for (s = e->segbuf; s <= e->segptr; s++) { + size_t lenbytes = upb_encode_varint(s->msglen, buf); + //putbuf(e, buf, lenbytes); + //putbuf(e, ptr, s->seglen); + ptr += s->seglen; + } + + e->ptr = e->buf; + e->top = NULL; + } else { + /* Need to keep buffering; propagate length info into enclosing + * submessages. */ + --e->top; + upb_encode_top(e)->msglen += msglen + upb_varint_size(msglen); + } + + return true; +} + +/* encoding of wire types *****************************************************/ + +static bool upb_put_fixed64(upb_encstate *e, uint64_t val) { + /* TODO(haberman): byte-swap for big endian. */ + return upb_put_bytes(e, &val, sizeof(uint64_t)); +} + +static bool upb_put_fixed32(upb_encstate *e, uint32_t val) { + /* TODO(haberman): byte-swap for big endian. */ + return upb_put_bytes(e, &val, sizeof(uint32_t)); +} + +static bool upb_put_varint(upb_encstate *e, uint64_t val) { + if (!upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN)) { + return false; + } + + upb_encode_advance(e, upb_encode_varint(val, e->ptr)); + return true; +} + +static bool upb_put_double(upb_encstate *e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + return upb_put_fixed64(e, u64); +} + +static bool upb_put_float(upb_encstate *e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + return upb_put_fixed32(e, u32); +} + +static uint32_t upb_readcase(const char *msg, const upb_msglayout_msginit_v1 *m, + int oneof_index) { + uint32_t ret; + memcpy(&ret, msg + m->oneofs[oneof_index].case_offset, sizeof(ret)); + return ret; +} + +static bool upb_readhasbit(const char *msg, + const upb_msglayout_fieldinit_v1 *f) { + UPB_ASSERT(f->hasbit != UPB_NO_HASBIT); + return msg[f->hasbit / 8] & (1 << (f->hasbit % 8)); +} + +static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) { + return upb_put_varint(e, (field_number << 3) | wire_type); +} + +static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr, + size_t size) { + size_t bytes = arr->len * size; + return upb_put_varint(e, bytes) && upb_put_bytes(e, arr->data, bytes); +} + +bool upb_encode_message(upb_encstate *e, const char *msg, + const upb_msglayout_msginit_v1 *m); + +static bool upb_encode_array(upb_encstate *e, const char *field_mem, + const upb_msglayout_msginit_v1 *m, + const upb_msglayout_fieldinit_v1 *f) { + const upb_array *arr = *(const upb_array**)field_mem; + + if (arr->len == 0) { + return true; + } + + /* We encode all primitive arrays as packed, regardless of what was specified + * in the .proto file. Could special case 1-sized arrays. */ + if (!upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)) { + return false; + } + +#define VARINT_CASE(ctype, encode) { \ + uint64_t *data = arr->data; \ + uint64_t *limit = data + arr->len; \ + if (!upb_encode_startdelim(e)) { \ + return false; \ + } \ + for (; data < limit; data++) { \ + if (!upb_put_varint(e, encode)) { \ + return false; \ + } \ + } \ + return upb_encode_enddelim(e); \ +} + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + return upb_put_fixedarray(e, arr, sizeof(double)); + case UPB_DESCRIPTOR_TYPE_FLOAT: + return upb_put_fixedarray(e, arr, sizeof(float)); + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + return upb_put_fixedarray(e, arr, sizeof(uint64_t)); + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + return upb_put_fixedarray(e, arr, sizeof(uint32_t)); + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + VARINT_CASE(uint64_t, *data); + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_ENUM: + VARINT_CASE(uint32_t, *data); + case UPB_DESCRIPTOR_TYPE_BOOL: + VARINT_CASE(bool, *data); + case UPB_DESCRIPTOR_TYPE_SINT32: + VARINT_CASE(int32_t, upb_zzenc_32(*data)); + case UPB_DESCRIPTOR_TYPE_SINT64: + VARINT_CASE(int64_t, upb_zzenc_64(*data)); + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + upb_stringview *data = arr->data; + upb_stringview *limit = data + arr->len; + goto put_string_data; /* Skip first tag, we already put it. */ + for (; data < limit; data++) { + if (!upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)) { + return false; + } +put_string_data: + if (!upb_put_varint(e, data->size) || + !upb_put_bytes(e, data->data, data->size)) { + return false; + } + } + } + case UPB_DESCRIPTOR_TYPE_GROUP: + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + void **data = arr->data; + void **limit = data + arr->len; + const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; + goto put_submsg_data; /* Skip first tag, we already put it. */ + for (; data < limit; data++) { + if (!upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)) { + return false; + } +put_submsg_data: + if (!upb_encode_startdelim(e) || + !upb_encode_message(e, *data, subm) || + !upb_encode_enddelim(e)) { + return false; + } + } + } + } + UPB_UNREACHABLE(); +#undef VARINT_CASE +} + +static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, + const upb_msglayout_msginit_v1 *m, + const upb_msglayout_fieldinit_v1 *f, + bool is_proto3) { +#define CASE(ctype, type, wire_type, encodeval) { \ + ctype val = *(ctype*)field_mem; \ + if (is_proto3 && val == 0) { \ + return true; \ + } \ + return upb_put_tag(e, f->number, wire_type) && \ + upb_put_ ## type(e, encodeval); \ +} + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + CASE(double, double, UPB_WIRE_TYPE_64BIT, val) + case UPB_DESCRIPTOR_TYPE_FLOAT: + CASE(float, float, UPB_WIRE_TYPE_32BIT, val) + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val) + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_ENUM: + CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val) + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val) + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val) + case UPB_DESCRIPTOR_TYPE_BOOL: + CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val) + case UPB_DESCRIPTOR_TYPE_SINT32: + CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_32(val)) + case UPB_DESCRIPTOR_TYPE_SINT64: + CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_64(val)) + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + upb_stringview view = *(upb_stringview*)field_mem; + if (is_proto3 && view.size == 0) { + return true; + } + return upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED) && + upb_put_varint(e, view.size) && + upb_put_bytes(e, view.data, view.size); + } + case UPB_DESCRIPTOR_TYPE_GROUP: + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + void *submsg = *(void**)field_mem; + if (is_proto3 && submsg == NULL) { + return true; + } + return upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED) && + upb_encode_startdelim(e) && + upb_encode_message(e, submsg, m->submsgs[f->submsg_index]) && + upb_encode_enddelim(e); + } + } +#undef CASE + UPB_UNREACHABLE(); +} + +bool upb_encode_hasscalarfield(const char *msg, + const upb_msglayout_msginit_v1 *m, + const upb_msglayout_fieldinit_v1 *f) { + if (f->oneof_index != UPB_NOT_IN_ONEOF) { + return upb_readcase(msg, m, f->oneof_index) == f->number; + } else if (m->is_proto2) { + return upb_readhasbit(msg, f); + } else { + /* For proto3, we'll test for the field being empty later. */ + return true; + } +} + +bool upb_encode_message(upb_encstate* e, const char *msg, + const upb_msglayout_msginit_v1 *m) { + int i; + for (i = 0; i < m->field_count; i++) { + const upb_msglayout_fieldinit_v1 *f = &m->fields[i]; + + if (f->label == UPB_LABEL_REPEATED) { + if (!upb_encode_array(e, msg, m, f)) { + return NULL; + } + } else { + if (upb_encode_hasscalarfield(msg, m, f) && + !upb_encode_scalarfield(e, msg + f->offset, m, f, !m->is_proto2)) { + return NULL; + } + } + } + + return true; +} + +char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *m, + upb_env *env, size_t *size) { + upb_encstate e; + + if (!upb_encode_message(&e, msg, m)) { + return false; + } + + *size = e.ptr - e.buf; + return e.buf; +} diff --git a/upb/encode.h b/upb/encode.h new file mode 100644 index 0000000..83908d4 --- /dev/null +++ b/upb/encode.h @@ -0,0 +1,17 @@ +/* +** upb_encode: parsing into a upb_msg using a upb_msglayout. +*/ + +#ifndef UPB_ENCODE_H_ +#define UPB_ENCODE_H_ + +#include "upb/msg.h" + +UPB_BEGIN_EXTERN_C + +char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *l, + upb_env *env, size_t *size); + +UPB_END_EXTERN_C + +#endif /* UPB_ENCODE_H_ */ diff --git a/upb/msg.c b/upb/msg.c index ef39dc0..9d29a39 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -1,5 +1,6 @@ #include "upb/msg.h" +#include "upb/structs.int.h" static bool is_power_of_two(size_t val) { return (val & (val - 1)) == 0; @@ -791,15 +792,6 @@ void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val, /** upb_array *****************************************************************/ -struct upb_array { - upb_fieldtype_t type; - uint8_t element_size; - void *data; /* Each element is element_size. */ - size_t len; /* Measured in elements. */ - size_t size; /* Measured in elements. */ - upb_alloc *alloc; -}; - #define DEREF_ARR(arr, i, type) ((type*)arr->data)[i] size_t upb_array_sizeof(upb_fieldtype_t type) { diff --git a/upb/msg.h b/upb/msg.h index ee1e2fb..8024828 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -386,6 +386,7 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h, /** Interfaces for generated code *********************************************/ #define UPB_NOT_IN_ONEOF UINT16_MAX +#define UPB_NO_HASBIT UINT16_MAX typedef struct { uint32_t number; diff --git a/upb/structs.int.h b/upb/structs.int.h new file mode 100644 index 0000000..242155b --- /dev/null +++ b/upb/structs.int.h @@ -0,0 +1,18 @@ +/* +** structs.int.h: structures definitions that are internal to upb. +*/ + +#ifndef UPB_STRUCTS_H_ +#define UPB_STRUCTS_H_ + +struct upb_array { + upb_fieldtype_t type; + uint8_t element_size; + void *data; /* Each element is element_size. */ + size_t len; /* Measured in elements. */ + size_t size; /* Measured in elements. */ + upb_alloc *alloc; +}; + +#endif /* UPB_STRUCTS_H_ */ + diff --git a/upb/upb.h b/upb/upb.h index a2b79ca..19cd02c 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -34,6 +34,9 @@ template class InlinedEnvironment; #define UPB_INLINE static #endif +/* Hints to the compiler about likely/unlikely branches. */ +#define UPB_LIKELY(x) __builtin_expect((x),1) + /* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler * doesn't provide these preprocessor symbols. */ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -- cgit v1.2.3