diff options
Diffstat (limited to 'upb/encode.c')
-rw-r--r-- | upb/encode.c | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/upb/encode.c b/upb/encode.c new file mode 100644 index 0000000..2fe1cc3 --- /dev/null +++ b/upb/encode.c @@ -0,0 +1,381 @@ + +#include "upb/encode.h" +#include "upb/structs.int.h" + +#define UPB_PB_VARINT_MAX_LEN 10 +#define CHK(x) do { if (!(x)) { return false; } } while(0) + +static size_t upb_encode_varint(uint64_t val, char *buf) { + size_t i; + if (val == 0) { buf[0] = 0; return 1; } + i = 0; + while (val) { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } + return i; +} + +static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } +static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } + +typedef enum { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5 +} upb_wiretype_t; + +/* Index is descriptor type. */ +const uint8_t upb_native_wiretypes[] = { + UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ + UPB_WIRE_TYPE_64BIT, /* DOUBLE */ + UPB_WIRE_TYPE_32BIT, /* FLOAT */ + UPB_WIRE_TYPE_VARINT, /* INT64 */ + UPB_WIRE_TYPE_VARINT, /* UINT64 */ + UPB_WIRE_TYPE_VARINT, /* INT32 */ + UPB_WIRE_TYPE_64BIT, /* FIXED64 */ + UPB_WIRE_TYPE_32BIT, /* FIXED32 */ + UPB_WIRE_TYPE_VARINT, /* BOOL */ + UPB_WIRE_TYPE_DELIMITED, /* STRING */ + UPB_WIRE_TYPE_START_GROUP, /* GROUP */ + UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */ + UPB_WIRE_TYPE_DELIMITED, /* BYTES */ + UPB_WIRE_TYPE_VARINT, /* UINT32 */ + UPB_WIRE_TYPE_VARINT, /* ENUM */ + UPB_WIRE_TYPE_32BIT, /* SFIXED32 */ + UPB_WIRE_TYPE_64BIT, /* SFIXED64 */ + UPB_WIRE_TYPE_VARINT, /* SINT32 */ + UPB_WIRE_TYPE_VARINT, /* SINT64 */ +}; + +typedef struct { + upb_env *env; + char *buf, *ptr, *limit; +} upb_encstate; + +static size_t upb_roundup_pow2(size_t bytes) { + size_t ret = 128; + while (ret < bytes) { + ret *= 2; + } + return ret; +} + +static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) { + size_t old_size = e->limit - e->buf; + size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); + char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size); + CHK(new_buf); + + /* We want previous data at the end, realloc() put it at the beginning. */ + memmove(e->limit - old_size, e->buf, old_size); + + e->ptr = new_buf + new_size - (e->limit - e->ptr); + e->limit = new_buf + new_size; + e->buf = new_buf; + return true; +} + +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ +static bool upb_encode_reserve(upb_encstate *e, size_t bytes) { + CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) || + upb_encode_growbuffer(e, bytes)); + + e->ptr -= bytes; + return true; +} + +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) { + CHK(upb_encode_reserve(e, len)); + memcpy(e->ptr, data, len); + return true; +} + +static bool upb_put_fixed64(upb_encstate *e, uint64_t val) { + /* TODO(haberman): byte-swap for big endian. */ + return upb_put_bytes(e, &val, sizeof(uint64_t)); +} + +static bool upb_put_fixed32(upb_encstate *e, uint32_t val) { + /* TODO(haberman): byte-swap for big endian. */ + return upb_put_bytes(e, &val, sizeof(uint32_t)); +} + +static bool upb_put_varint(upb_encstate *e, uint64_t val) { + size_t len; + char *start; + CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN)); + len = upb_encode_varint(val, e->ptr); + start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; + memmove(start, e->ptr, len); + e->ptr = start; + return true; +} + +static bool upb_put_double(upb_encstate *e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + return upb_put_fixed64(e, u64); +} + +static bool upb_put_float(upb_encstate *e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + return upb_put_fixed32(e, u32); +} + +static uint32_t upb_readcase(const char *msg, const upb_msglayout_msginit_v1 *m, + int oneof_index) { + uint32_t ret; + memcpy(&ret, msg + m->oneofs[oneof_index].case_offset, sizeof(ret)); + return ret; +} + +static bool upb_readhasbit(const char *msg, + const upb_msglayout_fieldinit_v1 *f) { + UPB_ASSERT(f->hasbit != UPB_NO_HASBIT); + return msg[f->hasbit / 8] & (1 << (f->hasbit % 8)); +} + +static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) { + return upb_put_varint(e, (field_number << 3) | wire_type); +} + +static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr, + size_t size) { + size_t bytes = arr->len * size; + return upb_put_bytes(e, arr->data, bytes) && upb_put_varint(e, bytes); +} + +bool upb_encode_message(upb_encstate *e, const char *msg, + const upb_msglayout_msginit_v1 *m, + size_t *size); + +static bool upb_encode_array(upb_encstate *e, const char *field_mem, + const upb_msglayout_msginit_v1 *m, + const upb_msglayout_fieldinit_v1 *f) { + const upb_array *arr = *(const upb_array**)field_mem; + + if (arr->len == 0) { + return true; + } + +#define VARINT_CASE(ctype, encode) do { \ + uint64_t *start = arr->data; \ + uint64_t *ptr = start + arr->len; \ + char *buf_ptr = e->ptr; \ + do { \ + ptr--; \ + CHK(upb_put_varint(e, encode)); \ + } while (ptr != start); \ + CHK(upb_put_varint(e, buf_ptr - e->ptr)); \ + break; \ +} while(0) + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + CHK(upb_put_fixedarray(e, arr, sizeof(double))); + break; + case UPB_DESCRIPTOR_TYPE_FLOAT: + CHK(upb_put_fixedarray(e, arr, sizeof(float))); + break; + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t))); + break; + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t))); + break; + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + VARINT_CASE(uint64_t, *ptr); + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_ENUM: + VARINT_CASE(uint32_t, *ptr); + case UPB_DESCRIPTOR_TYPE_BOOL: + VARINT_CASE(bool, *ptr); + case UPB_DESCRIPTOR_TYPE_SINT32: + VARINT_CASE(int32_t, upb_zzenc_32(*ptr)); + case UPB_DESCRIPTOR_TYPE_SINT64: + VARINT_CASE(int64_t, upb_zzenc_64(*ptr)); + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + upb_stringview *start = arr->data; + upb_stringview *ptr = start + arr->len; + do { + ptr--; + CHK(upb_put_bytes(e, ptr->data, ptr->size) && + upb_put_varint(e, ptr->size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); + } while (ptr != start); + return true; + } + case UPB_DESCRIPTOR_TYPE_GROUP: { + void **start = arr->data; + void **ptr = start + arr->len; + const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; + do { + size_t size; + ptr--; + CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && + upb_encode_message(e, *ptr, subm, &size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP)); + } while (ptr != start); + return true; + } + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + void **start = arr->data; + void **ptr = start + arr->len; + const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; + do { + size_t size; + ptr--; + CHK(upb_encode_message(e, *ptr, subm, &size) && + upb_put_varint(e, size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); + } while (ptr != start); + return true; + } + } +#undef VARINT_CASE + + /* We encode all primitive arrays as packed, regardless of what was specified + * in the .proto file. Could special case 1-sized arrays. */ + CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); + return true; +} + +static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, + const upb_msglayout_msginit_v1 *m, + const upb_msglayout_fieldinit_v1 *f, + bool is_proto3) { +#define CASE(ctype, type, wire_type, encodeval) do { \ + ctype val = *(ctype*)field_mem; \ + if (is_proto3 && val == 0) { \ + return true; \ + } \ + return upb_put_ ## type(e, encodeval) && \ + upb_put_tag(e, f->number, wire_type); \ +} while(0) + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + CASE(double, double, UPB_WIRE_TYPE_64BIT, val); + case UPB_DESCRIPTOR_TYPE_FLOAT: + CASE(float, float, UPB_WIRE_TYPE_32BIT, val); + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val); + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_ENUM: + CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val); + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val); + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val); + case UPB_DESCRIPTOR_TYPE_BOOL: + CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val); + case UPB_DESCRIPTOR_TYPE_SINT32: + CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_32(val)); + case UPB_DESCRIPTOR_TYPE_SINT64: + CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_64(val)); + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + upb_stringview view = *(upb_stringview*)field_mem; + if (is_proto3 && view.size == 0) { + return true; + } + return upb_put_bytes(e, view.data, view.size) && + upb_put_varint(e, view.size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); + } + case UPB_DESCRIPTOR_TYPE_GROUP: { + size_t size; + void *submsg = *(void**)field_mem; + const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; + if (is_proto3 && submsg == NULL) { + return true; + } + return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && + upb_encode_message(e, submsg, subm, &size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP); + } + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + size_t size; + void *submsg = *(void**)field_mem; + const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; + if (is_proto3 && submsg == NULL) { + return true; + } + return upb_encode_message(e, submsg, subm, &size) && + upb_put_varint(e, size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); + } + } +#undef CASE + UPB_UNREACHABLE(); +} + +bool upb_encode_hasscalarfield(const char *msg, + const upb_msglayout_msginit_v1 *m, + const upb_msglayout_fieldinit_v1 *f) { + if (f->oneof_index != UPB_NOT_IN_ONEOF) { + return upb_readcase(msg, m, f->oneof_index) == f->number; + } else if (m->is_proto2) { + return upb_readhasbit(msg, f); + } else { + /* For proto3, we'll test for the field being empty later. */ + return true; + } +} + +bool upb_encode_message(upb_encstate* e, const char *msg, + const upb_msglayout_msginit_v1 *m, + size_t *size) { + int i; + char *buf_end = e->ptr; + for (i = m->field_count - 1; i >= 0; i--) { + const upb_msglayout_fieldinit_v1 *f = &m->fields[i]; + + if (f->label == UPB_LABEL_REPEATED) { + CHK(upb_encode_array(e, msg, m, f)); + } else { + if (upb_encode_hasscalarfield(msg, m, f)) { + CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, !m->is_proto2)); + } + } + } + + *size = buf_end - e->ptr; + return true; +} + +char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *m, + upb_env *env, size_t *size) { + upb_encstate e; + e.env = env; + e.buf = NULL; + e.limit = NULL; + e.ptr = NULL; + + if (!upb_encode_message(&e, msg, m, size)) { + return false; + } + + *size = e.limit - e.ptr; + return e.ptr; +} |