From 946dcf4a5d9e35046eb4c2bd4c31e2d4633199e0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 3 Aug 2009 17:22:03 -0700 Subject: Fix tests (and src) such that all tests pass again. --- Makefile | 1 + README | 26 +++++++++++ src/upb_parse.c | 47 ++++--------------- src/upb_parse.h | 32 +++++++++++++ tests/tests.c | 137 +++++++++++++++++++++++++++++++++++++++++--------------- 5 files changed, 169 insertions(+), 74 deletions(-) diff --git a/Makefile b/Makefile index 0b0d9da..2a1e5b9 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ clean: src/libupb.a: $(OBJ) ar rcs src/libupb.a $(OBJ) tests/test_table: src/libupb.a +tests/tests: src/libupb.a tools/upbc: src/libupb.a benchmark/benchmark: src/libupb.a benchmark/google_messages.pb.h benchmark/google_messages.pb.o benchmark/benchmark.o $(CXX) $(CPPFLAGS) -o benchmark/benchmark benchmark/google_messages.pb.o benchmark/benchmark.cc src/libupb.a -lm -lprotobuf -lpthread diff --git a/README b/README index 10e905a..e869667 100644 --- a/README +++ b/README @@ -4,6 +4,32 @@ upb - a minimalist implementation of protocol buffers. - For API documentation, see the header files. - To build type "make". + +ROADMAP OF THE SOURCE +===================== + +benchmark/ + Benchmarks of upb and other protocol buffer implementations. +descriptor/ + Files that describe the format of Protocol Buffer "descriptors", which are + protocol buffers that describe the format of other protocol buffers. These + are used extensively inside upb. +labs/ + Code that is not part of upb, but contains efficiency-related experiments + about alternate ways of implementing things. When possible, these are + benchmarked by the tests in benchmark/. We also test these with the tests + in tests/, to ensure that the alternate implementations are actually correct. +src/ + The core source directory. builds into src/libupb.a. +tests/ + Unit tests. +tools/ + Command-line tools like the upb compiler. + + +CONTACT +======= + Author: Joshua Haberman (joshua@reverberate.org, haberman@google.com) See LICENSE for copyright information. diff --git a/src/upb_parse.c b/src/upb_parse.c index a8fa3a6..1411bbd 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -35,50 +35,21 @@ struct upb_type_info upb_type_info[] = { /* This is called by the inline version of the function if the varint turns out * to be >= 2 bytes. */ -upb_status_t upb_get_v_uint64_t_full(uint8_t *restrict buf, uint8_t *end, - uint64_t *restrict val, +upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val, uint8_t **outbuf) { + uint8_t *const *maxend = buf + 10; uint8_t last = 0x80; *val = 0; - for(int bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7) + int bitpos; + for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7) *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; - if(last & 0x80) return UPB_STATUS_NEED_MORE_DATA; + if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA; + if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT; *outbuf = buf; return UPB_STATUS_OK; } -static upb_status_t skip_v_uint64_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf) -{ - /* TODO: fix and optimize. */ - uint8_t last = 0x80; - for(; buf < end && (last & 0x80); buf++) { - last = *buf; - } - - if(last & 0x80) { - return UPB_ERROR_UNTERMINATED_VARINT; - } - *outbuf = buf; - return UPB_STATUS_OK; -} - -static upb_status_t skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf) -{ - uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; - *outbuf = uint32_end; - return UPB_STATUS_OK; -} - -static upb_status_t skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf) -{ - uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; - *outbuf = uint64_end; - return UPB_STATUS_OK; -} - upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt, union upb_wire_value *wv, uint8_t **outbuf) { @@ -94,9 +65,9 @@ static upb_status_t skip_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t uint8_t **outbuf) { switch(wt) { - case UPB_WIRE_TYPE_VARINT: return skip_v_uint64_t(buf, end, outbuf); - case UPB_WIRE_TYPE_64BIT: return skip_f_uint64_t(buf, end, outbuf); - case UPB_WIRE_TYPE_32BIT: return skip_f_uint32_t(buf, end, outbuf); + case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64_t(buf, end, outbuf); + case UPB_WIRE_TYPE_64BIT: return upb_skip_f_uint64_t(buf, end, outbuf); + case UPB_WIRE_TYPE_32BIT: return upb_skip_f_uint32_t(buf, end, outbuf); case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */ case UPB_WIRE_TYPE_END_GROUP: return UPB_STATUS_OK; default: return UPB_ERROR_ILLEGAL; diff --git a/src/upb_parse.h b/src/upb_parse.h index e727c11..f0ec5e2 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -208,6 +208,38 @@ INLINE upb_status_t upb_get_f_uint64_t(uint8_t *buf, uint8_t *end, return UPB_STATUS_OK; } +INLINE upb_status_t upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end, + uint8_t **outbuf) +{ + uint8_t *const maxend = buf + 10; + uint8_t last = 0x80; + for(; buf < (uint8_t*)end && (last & 0x80); buf++) + last = *buf; + if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA; + if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT; + *outbuf = buf; + return UPB_STATUS_OK; +} + +INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t + **outbuf) +{ + uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; + *outbuf = uint32_end; + return UPB_STATUS_OK; +} + +INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t + **outbuf) +{ + uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; + *outbuf = uint64_end; + return UPB_STATUS_OK; +} + + /* Functions to read .proto values. *******************************************/ /* These functions read the appropriate wire value for a given .proto type diff --git a/tests/tests.c b/tests/tests.c index 497910f..2a769a9 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -3,14 +3,8 @@ #include #include #include -#include "descriptor.c" -#include "upb_context.c" -#include "upb_enum.c" -#include "upb_msg.c" -#include "upb_parse.c" -#include "upb_serialize.c" -#include "upb_table.c" -#include "upb_text.c" +#include "upb_parse.h" +#include "upb_context.h" int num_assertions = 0; #define ASSERT(expr) do { \ @@ -25,10 +19,15 @@ static void test_get_v_uint64_t() uint8_t name[] = bytes; \ uint8_t *name ## _buf = name; \ uint64_t name ## _val = 0; \ - status = upb_get_v_uint64_t(name ## _buf, name + sizeof(name), &name ## _val, &name ## _buf); \ + status = upb_get_v_uint64_t(name, name + sizeof(name) - 1, &name ## _val, &name ## _buf); \ ASSERT(status == UPB_STATUS_OK); \ ASSERT(name ## _val == val); \ ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \ + /* Test NEED_MORE_DATA. */ \ + if(sizeof(name) > 2) { \ + status = upb_get_v_uint64_t(name, name + sizeof(name) - 2, &name ## _val, &name ## _buf); \ + ASSERT(status == UPB_STATUS_NEED_MORE_DATA); \ + } \ } TEST(zero, "\x00", 0ULL); @@ -42,18 +41,37 @@ static void test_get_v_uint64_t() TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0x3fdf9f1e1c181ULL); TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0x303fdf9f1e1c181ULL); TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL); +#undef TEST - uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01}; - uint8_t *elevenbyte_buf = elevenbyte; - uint64_t elevenbyte_val = 0; - upb_status_t status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_val, &elevenbyte_buf); + uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01}; + uint8_t *twelvebyte_buf = twelvebyte; + uint64_t twelvebyte_val = 0; + upb_status_t status; + /* A varint that terminates before hitting the end of the provided buffer, + * but in too many bytes (11 instead of 10). */ + status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 12, &twelvebyte_val, &twelvebyte_buf); ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT); - status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_val, &elevenbyte_buf); - /* Byte 10 is 0x80, so we know it's unterminated. */ + + /* A varint that terminates simultaneously with the end of the provided + * buffer, but in too many bytes (11 instead of 10). */ + status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 11, &twelvebyte_val, &twelvebyte_buf); ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT); - status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_val, &elevenbyte_buf); + + /* A varint whose buffer ends on exactly the byte where the varint must + * terminate, but the final byte does not terminate. The absolutely most + * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know + * by this point that the varint does not properly terminate. But we also + * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not + * compromise overall correctness -- clients who supply more data later will + * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no + * more data to supply will (rightly) conclude that their protobuf is corrupt. + */ + status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 10, &twelvebyte_val, &twelvebyte_buf); + ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT || + status == UPB_STATUS_NEED_MORE_DATA); + + status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 9, &twelvebyte_val, &twelvebyte_buf); ASSERT(status == UPB_STATUS_NEED_MORE_DATA); -#undef TEST } static void test_get_v_uint32_t() @@ -63,10 +81,15 @@ static void test_get_v_uint32_t() uint8_t name[] = bytes; \ uint8_t *name ## _buf = name; \ uint32_t name ## _val = 0; \ - status = upb_get_v_uint32_t(name ## _buf, name + sizeof(name), &name ## _val, &name ## _buf); \ + status = upb_get_v_uint32_t(name, name + sizeof(name), &name ## _val, &name ## _buf); \ ASSERT(status == UPB_STATUS_OK); \ ASSERT(name ## _val == val); \ ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \ + /* Test NEED_MORE_DATA. */ \ + if(sizeof(name) > 2) { \ + status = upb_get_v_uint32_t(name, name + sizeof(name) - 2, &name ## _val, &name ## _buf); \ + ASSERT(status == UPB_STATUS_NEED_MORE_DATA); \ + } \ } TEST(zero, "\x00", 0UL); @@ -81,18 +104,37 @@ static void test_get_v_uint32_t() TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0xf1e1c181UL); TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0xf1e1c181UL); TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0xf1e1c181UL); +#undef TEST - uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01}; - uint8_t *elevenbyte_buf = elevenbyte; - uint64_t elevenbyte_val = 0; - upb_status_t status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_val, &elevenbyte_buf); + uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01}; + uint8_t *twelvebyte_buf = twelvebyte; + uint32_t twelvebyte_val = 0; + upb_status_t status; + /* A varint that terminates before hitting the end of the provided buffer, + * but in too many bytes (11 instead of 10). */ + status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 12, &twelvebyte_val, &twelvebyte_buf); ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT); - status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_val, &elevenbyte_buf); - /* Byte 10 is 0x80, so we know it's unterminated. */ + + /* A varint that terminates simultaneously with the end of the provided + * buffer, but in too many bytes (11 instead of 10). */ + status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 11, &twelvebyte_val, &twelvebyte_buf); ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT); - status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_val, &elevenbyte_buf); + + /* A varint whose buffer ends on exactly the byte where the varint must + * terminate, but the final byte does not terminate. The absolutely most + * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know + * by this point that the varint does not properly terminate. But we also + * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not + * compromise overall correctness -- clients who supply more data later will + * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no + * more data to supply will (rightly) conclude that their protobuf is corrupt. + */ + status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 10, &twelvebyte_val, &twelvebyte_buf); + ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT || + status == UPB_STATUS_NEED_MORE_DATA); + + status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 9, &twelvebyte_val, &twelvebyte_buf); ASSERT(status == UPB_STATUS_NEED_MORE_DATA); -#undef TEST } static void test_skip_v_uint64_t() @@ -101,9 +143,14 @@ static void test_skip_v_uint64_t() upb_status_t status; \ uint8_t name[] = bytes; \ uint8_t *name ## _buf = name; \ - status = skip_v_uint64_t(name ## _buf, name + sizeof(name), &name ## _buf); \ + status = upb_skip_v_uint64_t(name ## _buf, name + sizeof(name), &name ## _buf); \ ASSERT(status == UPB_STATUS_OK); \ ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \ + /* Test NEED_MORE_DATA. */ \ + if(sizeof(name) > 2) { \ + status = upb_skip_v_uint64_t(name, name + sizeof(name) - 2, &name ## _buf); \ + ASSERT(status == UPB_STATUS_NEED_MORE_DATA); \ + } \ } TEST(zero, "\x00"); @@ -117,18 +164,36 @@ static void test_skip_v_uint64_t() TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01"); TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03"); TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07"); +#undef TEST - uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01}; - uint8_t *elevenbyte_buf = elevenbyte; - upb_status_t status = skip_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_buf); - printf("%d\n", status); + uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01}; + uint8_t *twelvebyte_buf = twelvebyte; + upb_status_t status; + /* A varint that terminates before hitting the end of the provided buffer, + * but in too many bytes (11 instead of 10). */ + status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 12, &twelvebyte_buf); ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT); - status = skip_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_buf); - /* Byte 10 is 0x80, so we know it's unterminated. */ + + /* A varint that terminates simultaneously with the end of the provided + * buffer, but in too many bytes (11 instead of 10). */ + status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 11, &twelvebyte_buf); ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT); - status = skip_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_buf); + + /* A varint whose buffer ends on exactly the byte where the varint must + * terminate, but the final byte does not terminate. The absolutely most + * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know + * by this point that the varint does not properly terminate. But we also + * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not + * compromise overall correctness -- clients who supply more data later will + * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no + * more data to supply will (rightly) conclude that their protobuf is corrupt. + */ + status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 10, &twelvebyte_buf); + ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT || + status == UPB_STATUS_NEED_MORE_DATA); + + status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 9, &twelvebyte_buf); ASSERT(status == UPB_STATUS_NEED_MORE_DATA); -#undef TEST } static void test_get_f_uint32_t() @@ -150,7 +215,7 @@ static void test_get_f_uint32_t() uint8_t threeb[] = {0x00, 0x00, 0x00}; uint8_t *threeb_buf = threeb; uint32_t threeb_val; - upb_status_t status = upb_get_f_uint32_t(threeb_buf, threeb + sizeof(threeb), &threeb_val, &threeb_buf); + upb_status_t status = upb_get_f_uint32_t(threeb, threeb + sizeof(threeb), &threeb_val, &threeb_buf); ASSERT(status == UPB_STATUS_NEED_MORE_DATA); #undef TEST -- cgit v1.2.3