summaryrefslogtreecommitdiff
path: root/upb/json/parser.rl
diff options
context:
space:
mode:
Diffstat (limited to 'upb/json/parser.rl')
-rw-r--r--upb/json/parser.rl28
1 files changed, 24 insertions, 4 deletions
diff --git a/upb/json/parser.rl b/upb/json/parser.rl
index 75860e5..92a1566 100644
--- a/upb/json/parser.rl
+++ b/upb/json/parser.rl
@@ -438,17 +438,37 @@ static void start_hex(upb_json_parser *p, const char *ptr) {
}
static void hex(upb_json_parser *p, const char *end) {
- UPB_UNUSED(end);
const char *start = p->text_begin;
- assert(end - start == 4);
+ UPB_ASSERT_VAR(end, end - start == 4);
uint16_t codepoint =
(hexdigit(start[0]) << 12) |
(hexdigit(start[1]) << 8) |
(hexdigit(start[2]) << 4) |
hexdigit(start[3]);
- // TODO(haberman): convert to UTF-8 and emit (though if it is a high surrogate
+ // emit the codepoint as UTF-8.
+ char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
+ int length = 0;
+ if (codepoint < 0x7F) {
+ utf8[0] = codepoint;
+ length = 1;
+ } else if (codepoint < 0x07FF) {
+ utf8[1] = (codepoint & 0x3F) | 0x80;
+ codepoint >>= 6;
+ utf8[0] = (codepoint & 0x1F) | 0xC0;
+ length = 2;
+ } else /* codepoint < 0xFFFF */ {
+ utf8[2] = (codepoint & 0x3F) | 0x80;
+ codepoint >>= 6;
+ utf8[1] = (codepoint & 0x3F) | 0x80;
+ codepoint >>= 6;
+ utf8[0] = (codepoint & 0x0F) | 0xE0;
+ length = 3;
+ }
+ // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
// we have to wait for the next escape to get the full code point).
- UPB_UNUSED(codepoint);
+
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
+ upb_sink_putstring(&p->top->sink, sel, utf8, length, NULL);
}
#define CHECK_RETURN_TOP(x) if (!(x)) goto error
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback