From 919fea438a5ac5366684cfa26d2bb3d17519cb60 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Mon, 18 May 2015 10:55:20 -0700 Subject: Ported upb to C89, for greater portability. A large part of this change contains surface-level porting, like moving variable declarations to the top of the block. However there are a few more substantial things too: - moved internal-only struct definitions to a separate file (structdefs.int.h), for greater encapsulation and ABI compatibility. - removed the UPB_UPCAST macro, since it requires access to the internal-only struct definitions. Replaced uses with calls to inline, type-safe casting functions. - removed the UPB_DEFINE_CLASS/UPB_DEFINE_STRUCT macros. Class and struct definitions are now more explicit -- you get to see the actual class/struct keywords in the source. The casting convenience functions have been moved into UPB_DECLARE_DERIVED_TYPE() and UPB_DECLARE_DERIVED_TYPE2(). - the new way that we duplicate base methods in derived types is also more convenient and requires less duplication. It is also less greppable, but hopefully that is not too big a problem. Compiler flags (-std=c89 -pedantic) should help to rigorously enforce that the code is free of C99-isms. A few functions are not available in C89 (strtoll). There are temporary, hacky solutions in place. --- upb/pb/varint.int.h | 86 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 35 deletions(-) (limited to 'upb/pb/varint.int.h') diff --git a/upb/pb/varint.int.h b/upb/pb/varint.int.h index 8498acd..a394a75 100644 --- a/upb/pb/varint.int.h +++ b/upb/pb/varint.int.h @@ -20,25 +20,25 @@ extern "C" { #endif -// A list of types as they are encoded on-the-wire. +/* A list of types as they are encoded on-the-wire. */ typedef enum { UPB_WIRE_TYPE_VARINT = 0, UPB_WIRE_TYPE_64BIT = 1, UPB_WIRE_TYPE_DELIMITED = 2, UPB_WIRE_TYPE_START_GROUP = 3, UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5, + UPB_WIRE_TYPE_32BIT = 5 } upb_wiretype_t; #define UPB_MAX_WIRE_TYPE 5 -// The maximum number of bytes that it takes to encode a 64-bit varint. -// Note that with a better encoding this could be 9 (TODO: write up a -// wiki document about this). +/* The maximum number of bytes that it takes to encode a 64-bit varint. + * Note that with a better encoding this could be 9 (TODO: write up a + * wiki document about this). */ #define UPB_PB_VARINT_MAX_LEN 10 -// Array of the "native" (ie. non-packed-repeated) wire type for the given a -// descriptor type (upb_descriptortype_t). +/* Array of the "native" (ie. non-packed-repeated) wire type for the given a + * descriptor type (upb_descriptortype_t). */ extern const uint8_t upb_pb_native_wire_types[]; /* Zig-zag encoding/decoding **************************************************/ @@ -54,44 +54,59 @@ UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } /* Decoding *******************************************************************/ -// All decoding functions return this struct by value. +/* All decoding functions return this struct by value. */ typedef struct { - const char *p; // NULL if the varint was unterminated. + const char *p; /* NULL if the varint was unterminated. */ uint64_t val; } upb_decoderet; -// Four functions for decoding a varint of at most eight bytes. They are all -// functionally identical, but are implemented in different ways and likely have -// different performance profiles. We keep them around for performance testing. -// -// Note that these functions may not read byte-by-byte, so they must not be used -// unless there are at least eight bytes left in the buffer! +UPB_INLINE upb_decoderet upb_decoderet_make(const char *p, uint64_t val) { + upb_decoderet ret; + ret.p = p; + ret.val = val; + return ret; +} + +/* Four functions for decoding a varint of at most eight bytes. They are all + * functionally identical, but are implemented in different ways and likely have + * different performance profiles. We keep them around for performance testing. + * + * Note that these functions may not read byte-by-byte, so they must not be used + * unless there are at least eight bytes left in the buffer! */ upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r); upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r); upb_decoderet upb_vdecode_max8_wright(upb_decoderet r); upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r); -// Template for a function that checks the first two bytes with branching -// and dispatches 2-10 bytes with a separate function. Note that this may read -// up to 10 bytes, so it must not be used unless there are at least ten bytes -// left in the buffer! +/* Template for a function that checks the first two bytes with branching + * and dispatches 2-10 bytes with a separate function. Note that this may read + * up to 10 bytes, so it must not be used unless there are at least ten bytes + * left in the buffer! */ #define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ uint8_t *p = (uint8_t*)_p; \ - if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; } \ - upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)}; \ - if ((*(p + 1) & 0x80) == 0) return r; \ + upb_decoderet r; \ + if ((*p & 0x80) == 0) { \ + /* Common case: one-byte varint. */ \ + return upb_decoderet_make(_p + 1, *p & 0x7fU); \ + } \ + r = upb_decoderet_make(_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)); \ + if ((*(p + 1) & 0x80) == 0) { \ + /* Two-byte varint. */ \ + return r; \ + } \ + /* Longer varint, fallback to out-of-line function. */ \ return decode_max8_function(r); \ } -UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32); -UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64); -UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright); -UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); +UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32) +UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64) +UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright) +UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino) #undef UPB_VARINT_DECODER_CHECK2 -// Our canonical functions for decoding varints, based on the currently -// favored best-performing implementations. +/* Our canonical functions for decoding varints, based on the currently + * favored best-performing implementations. */ UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) { if (sizeof(long) == 8) return upb_vdecode_check2_branch64(p); @@ -108,7 +123,7 @@ UPB_INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { UPB_INLINE int upb_value_size(uint64_t val) { #ifdef __GNUC__ - int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. + int high_bit = 63 - __builtin_clzll(val); /* 0-based, undef if val == 0. */ #else int high_bit = 0; uint64_t tmp = val; @@ -117,13 +132,14 @@ UPB_INLINE int upb_value_size(uint64_t val) { return val == 0 ? 1 : high_bit / 8 + 1; } -// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN -// bytes long), returning how many bytes were used. -// -// TODO: benchmark and optimize if necessary. +/* Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN + * bytes long), returning how many bytes were used. + * + * TODO: benchmark and optimize if necessary. */ UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) { + size_t i; if (val == 0) { buf[0] = 0; return 1; } - size_t i = 0; + i = 0; while (val) { uint8_t byte = val & 0x7fU; val >>= 7; @@ -138,7 +154,7 @@ UPB_INLINE size_t upb_varint_size(uint64_t val) { return upb_vencode64(val, buf); } -// Encodes a 32-bit varint, *not* sign-extended. +/* Encodes a 32-bit varint, *not* sign-extended. */ UPB_INLINE uint64_t upb_vencode32(uint32_t val) { char buf[UPB_PB_VARINT_MAX_LEN]; size_t bytes = upb_vencode64(val, buf); -- cgit v1.2.3