summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile1
-rw-r--r--README26
-rw-r--r--src/upb_parse.c47
-rw-r--r--src/upb_parse.h32
-rw-r--r--tests/tests.c137
5 files changed, 169 insertions, 74 deletions
diff --git a/Makefile b/Makefile
index 0b0d9da..2a1e5b9 100644
--- a/Makefile
+++ b/Makefile
@@ -16,6 +16,7 @@ clean:
src/libupb.a: $(OBJ)
ar rcs src/libupb.a $(OBJ)
tests/test_table: src/libupb.a
+tests/tests: src/libupb.a
tools/upbc: src/libupb.a
benchmark/benchmark: src/libupb.a benchmark/google_messages.pb.h benchmark/google_messages.pb.o benchmark/benchmark.o
$(CXX) $(CPPFLAGS) -o benchmark/benchmark benchmark/google_messages.pb.o benchmark/benchmark.cc src/libupb.a -lm -lprotobuf -lpthread
diff --git a/README b/README
index 10e905a..e869667 100644
--- a/README
+++ b/README
@@ -4,6 +4,32 @@ upb - a minimalist implementation of protocol buffers.
- For API documentation, see the header files.
- To build type "make".
+
+ROADMAP OF THE SOURCE
+=====================
+
+benchmark/
+ Benchmarks of upb and other protocol buffer implementations.
+descriptor/
+ Files that describe the format of Protocol Buffer "descriptors", which are
+ protocol buffers that describe the format of other protocol buffers. These
+ are used extensively inside upb.
+labs/
+ Code that is not part of upb, but contains efficiency-related experiments
+ about alternate ways of implementing things. When possible, these are
+ benchmarked by the tests in benchmark/. We also test these with the tests
+ in tests/, to ensure that the alternate implementations are actually correct.
+src/
+ The core source directory. builds into src/libupb.a.
+tests/
+ Unit tests.
+tools/
+ Command-line tools like the upb compiler.
+
+
+CONTACT
+=======
+
Author: Joshua Haberman (joshua@reverberate.org, haberman@google.com)
See LICENSE for copyright information.
diff --git a/src/upb_parse.c b/src/upb_parse.c
index a8fa3a6..1411bbd 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -35,50 +35,21 @@ struct upb_type_info upb_type_info[] = {
/* This is called by the inline version of the function if the varint turns out
* to be >= 2 bytes. */
-upb_status_t upb_get_v_uint64_t_full(uint8_t *restrict buf, uint8_t *end,
- uint64_t *restrict val,
+upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t **outbuf)
{
+ uint8_t *const *maxend = buf + 10;
uint8_t last = 0x80;
*val = 0;
- for(int bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
+ int bitpos;
+ for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
*val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
- if(last & 0x80) return UPB_STATUS_NEED_MORE_DATA;
+ if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA;
+ if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT;
*outbuf = buf;
return UPB_STATUS_OK;
}
-static upb_status_t skip_v_uint64_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf)
-{
- /* TODO: fix and optimize. */
- uint8_t last = 0x80;
- for(; buf < end && (last & 0x80); buf++) {
- last = *buf;
- }
-
- if(last & 0x80) {
- return UPB_ERROR_UNTERMINATED_VARINT;
- }
- *outbuf = buf;
- return UPB_STATUS_OK;
-}
-
-static upb_status_t skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf)
-{
- uint8_t *uint32_end = buf + sizeof(uint32_t);
- if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA;
- *outbuf = uint32_end;
- return UPB_STATUS_OK;
-}
-
-static upb_status_t skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf)
-{
- uint8_t *uint64_end = buf + sizeof(uint64_t);
- if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA;
- *outbuf = uint64_end;
- return UPB_STATUS_OK;
-}
-
upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
union upb_wire_value *wv, uint8_t **outbuf)
{
@@ -94,9 +65,9 @@ static upb_status_t skip_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t
uint8_t **outbuf)
{
switch(wt) {
- case UPB_WIRE_TYPE_VARINT: return skip_v_uint64_t(buf, end, outbuf);
- case UPB_WIRE_TYPE_64BIT: return skip_f_uint64_t(buf, end, outbuf);
- case UPB_WIRE_TYPE_32BIT: return skip_f_uint32_t(buf, end, outbuf);
+ case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64_t(buf, end, outbuf);
+ case UPB_WIRE_TYPE_64BIT: return upb_skip_f_uint64_t(buf, end, outbuf);
+ case UPB_WIRE_TYPE_32BIT: return upb_skip_f_uint32_t(buf, end, outbuf);
case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */
case UPB_WIRE_TYPE_END_GROUP: return UPB_STATUS_OK;
default: return UPB_ERROR_ILLEGAL;
diff --git a/src/upb_parse.h b/src/upb_parse.h
index e727c11..f0ec5e2 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -208,6 +208,38 @@ INLINE upb_status_t upb_get_f_uint64_t(uint8_t *buf, uint8_t *end,
return UPB_STATUS_OK;
}
+INLINE upb_status_t upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end,
+ uint8_t **outbuf)
+{
+ uint8_t *const maxend = buf + 10;
+ uint8_t last = 0x80;
+ for(; buf < (uint8_t*)end && (last & 0x80); buf++)
+ last = *buf;
+ if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA;
+ if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT;
+ *outbuf = buf;
+ return UPB_STATUS_OK;
+}
+
+INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t
+ **outbuf)
+{
+ uint8_t *uint32_end = buf + sizeof(uint32_t);
+ if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA;
+ *outbuf = uint32_end;
+ return UPB_STATUS_OK;
+}
+
+INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t
+ **outbuf)
+{
+ uint8_t *uint64_end = buf + sizeof(uint64_t);
+ if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA;
+ *outbuf = uint64_end;
+ return UPB_STATUS_OK;
+}
+
+
/* Functions to read .proto values. *******************************************/
/* These functions read the appropriate wire value for a given .proto type
diff --git a/tests/tests.c b/tests/tests.c
index 497910f..2a769a9 100644
--- a/tests/tests.c
+++ b/tests/tests.c
@@ -3,14 +3,8 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
-#include "descriptor.c"
-#include "upb_context.c"
-#include "upb_enum.c"
-#include "upb_msg.c"
-#include "upb_parse.c"
-#include "upb_serialize.c"
-#include "upb_table.c"
-#include "upb_text.c"
+#include "upb_parse.h"
+#include "upb_context.h"
int num_assertions = 0;
#define ASSERT(expr) do { \
@@ -25,10 +19,15 @@ static void test_get_v_uint64_t()
uint8_t name[] = bytes; \
uint8_t *name ## _buf = name; \
uint64_t name ## _val = 0; \
- status = upb_get_v_uint64_t(name ## _buf, name + sizeof(name), &name ## _val, &name ## _buf); \
+ status = upb_get_v_uint64_t(name, name + sizeof(name) - 1, &name ## _val, &name ## _buf); \
ASSERT(status == UPB_STATUS_OK); \
ASSERT(name ## _val == val); \
ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
+ /* Test NEED_MORE_DATA. */ \
+ if(sizeof(name) > 2) { \
+ status = upb_get_v_uint64_t(name, name + sizeof(name) - 2, &name ## _val, &name ## _buf); \
+ ASSERT(status == UPB_STATUS_NEED_MORE_DATA); \
+ } \
}
TEST(zero, "\x00", 0ULL);
@@ -42,18 +41,37 @@ static void test_get_v_uint64_t()
TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0x3fdf9f1e1c181ULL);
TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0x303fdf9f1e1c181ULL);
TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
+#undef TEST
- uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01};
- uint8_t *elevenbyte_buf = elevenbyte;
- uint64_t elevenbyte_val = 0;
- upb_status_t status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_val, &elevenbyte_buf);
+ uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
+ uint8_t *twelvebyte_buf = twelvebyte;
+ uint64_t twelvebyte_val = 0;
+ upb_status_t status;
+ /* A varint that terminates before hitting the end of the provided buffer,
+ * but in too many bytes (11 instead of 10). */
+ status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 12, &twelvebyte_val, &twelvebyte_buf);
ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
- status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_val, &elevenbyte_buf);
- /* Byte 10 is 0x80, so we know it's unterminated. */
+
+ /* A varint that terminates simultaneously with the end of the provided
+ * buffer, but in too many bytes (11 instead of 10). */
+ status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 11, &twelvebyte_val, &twelvebyte_buf);
ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
- status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_val, &elevenbyte_buf);
+
+ /* A varint whose buffer ends on exactly the byte where the varint must
+ * terminate, but the final byte does not terminate. The absolutely most
+ * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
+ * by this point that the varint does not properly terminate. But we also
+ * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
+ * compromise overall correctness -- clients who supply more data later will
+ * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
+ * more data to supply will (rightly) conclude that their protobuf is corrupt.
+ */
+ status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 10, &twelvebyte_val, &twelvebyte_buf);
+ ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT ||
+ status == UPB_STATUS_NEED_MORE_DATA);
+
+ status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 9, &twelvebyte_val, &twelvebyte_buf);
ASSERT(status == UPB_STATUS_NEED_MORE_DATA);
-#undef TEST
}
static void test_get_v_uint32_t()
@@ -63,10 +81,15 @@ static void test_get_v_uint32_t()
uint8_t name[] = bytes; \
uint8_t *name ## _buf = name; \
uint32_t name ## _val = 0; \
- status = upb_get_v_uint32_t(name ## _buf, name + sizeof(name), &name ## _val, &name ## _buf); \
+ status = upb_get_v_uint32_t(name, name + sizeof(name), &name ## _val, &name ## _buf); \
ASSERT(status == UPB_STATUS_OK); \
ASSERT(name ## _val == val); \
ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
+ /* Test NEED_MORE_DATA. */ \
+ if(sizeof(name) > 2) { \
+ status = upb_get_v_uint32_t(name, name + sizeof(name) - 2, &name ## _val, &name ## _buf); \
+ ASSERT(status == UPB_STATUS_NEED_MORE_DATA); \
+ } \
}
TEST(zero, "\x00", 0UL);
@@ -81,18 +104,37 @@ static void test_get_v_uint32_t()
TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0xf1e1c181UL);
TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0xf1e1c181UL);
TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0xf1e1c181UL);
+#undef TEST
- uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01};
- uint8_t *elevenbyte_buf = elevenbyte;
- uint64_t elevenbyte_val = 0;
- upb_status_t status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_val, &elevenbyte_buf);
+ uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
+ uint8_t *twelvebyte_buf = twelvebyte;
+ uint32_t twelvebyte_val = 0;
+ upb_status_t status;
+ /* A varint that terminates before hitting the end of the provided buffer,
+ * but in too many bytes (11 instead of 10). */
+ status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 12, &twelvebyte_val, &twelvebyte_buf);
ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
- status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_val, &elevenbyte_buf);
- /* Byte 10 is 0x80, so we know it's unterminated. */
+
+ /* A varint that terminates simultaneously with the end of the provided
+ * buffer, but in too many bytes (11 instead of 10). */
+ status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 11, &twelvebyte_val, &twelvebyte_buf);
ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
- status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_val, &elevenbyte_buf);
+
+ /* A varint whose buffer ends on exactly the byte where the varint must
+ * terminate, but the final byte does not terminate. The absolutely most
+ * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
+ * by this point that the varint does not properly terminate. But we also
+ * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
+ * compromise overall correctness -- clients who supply more data later will
+ * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
+ * more data to supply will (rightly) conclude that their protobuf is corrupt.
+ */
+ status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 10, &twelvebyte_val, &twelvebyte_buf);
+ ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT ||
+ status == UPB_STATUS_NEED_MORE_DATA);
+
+ status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 9, &twelvebyte_val, &twelvebyte_buf);
ASSERT(status == UPB_STATUS_NEED_MORE_DATA);
-#undef TEST
}
static void test_skip_v_uint64_t()
@@ -101,9 +143,14 @@ static void test_skip_v_uint64_t()
upb_status_t status; \
uint8_t name[] = bytes; \
uint8_t *name ## _buf = name; \
- status = skip_v_uint64_t(name ## _buf, name + sizeof(name), &name ## _buf); \
+ status = upb_skip_v_uint64_t(name ## _buf, name + sizeof(name), &name ## _buf); \
ASSERT(status == UPB_STATUS_OK); \
ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
+ /* Test NEED_MORE_DATA. */ \
+ if(sizeof(name) > 2) { \
+ status = upb_skip_v_uint64_t(name, name + sizeof(name) - 2, &name ## _buf); \
+ ASSERT(status == UPB_STATUS_NEED_MORE_DATA); \
+ } \
}
TEST(zero, "\x00");
@@ -117,18 +164,36 @@ static void test_skip_v_uint64_t()
TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01");
TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03");
TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07");
+#undef TEST
- uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01};
- uint8_t *elevenbyte_buf = elevenbyte;
- upb_status_t status = skip_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_buf);
- printf("%d\n", status);
+ uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
+ uint8_t *twelvebyte_buf = twelvebyte;
+ upb_status_t status;
+ /* A varint that terminates before hitting the end of the provided buffer,
+ * but in too many bytes (11 instead of 10). */
+ status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 12, &twelvebyte_buf);
ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
- status = skip_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_buf);
- /* Byte 10 is 0x80, so we know it's unterminated. */
+
+ /* A varint that terminates simultaneously with the end of the provided
+ * buffer, but in too many bytes (11 instead of 10). */
+ status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 11, &twelvebyte_buf);
ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
- status = skip_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_buf);
+
+ /* A varint whose buffer ends on exactly the byte where the varint must
+ * terminate, but the final byte does not terminate. The absolutely most
+ * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
+ * by this point that the varint does not properly terminate. But we also
+ * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
+ * compromise overall correctness -- clients who supply more data later will
+ * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
+ * more data to supply will (rightly) conclude that their protobuf is corrupt.
+ */
+ status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 10, &twelvebyte_buf);
+ ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT ||
+ status == UPB_STATUS_NEED_MORE_DATA);
+
+ status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 9, &twelvebyte_buf);
ASSERT(status == UPB_STATUS_NEED_MORE_DATA);
-#undef TEST
}
static void test_get_f_uint32_t()
@@ -150,7 +215,7 @@ static void test_get_f_uint32_t()
uint8_t threeb[] = {0x00, 0x00, 0x00};
uint8_t *threeb_buf = threeb;
uint32_t threeb_val;
- upb_status_t status = upb_get_f_uint32_t(threeb_buf, threeb + sizeof(threeb), &threeb_val, &threeb_buf);
+ upb_status_t status = upb_get_f_uint32_t(threeb, threeb + sizeof(threeb), &threeb_val, &threeb_buf);
ASSERT(status == UPB_STATUS_NEED_MORE_DATA);
#undef TEST
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback