From a75a305c77acd6800b81204f387f7a437a62fe6b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 25 Feb 2011 18:31:22 -0800 Subject: Implemented upb_stringsink, upb_msgtotext, and exposed the latter to Lua. --- src/upb_glue.c | 24 +++++++++++++++++ src/upb_glue.h | 15 +++++++++-- src/upb_msg.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/upb_msg.h | 5 ++++ src/upb_string.c | 25 ++++++++++-------- src/upb_string.h | 12 ++++++--- src/upb_strstream.c | 56 ++++++++++++++++++++++++++++++++++++++-- src/upb_strstream.h | 26 ++++++++++--------- src/upb_table.h | 2 +- src/upb_textprinter.c | 6 ++--- 10 files changed, 207 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/upb_glue.c b/src/upb_glue.c index a3d4e7d..86022d8 100644 --- a/src/upb_glue.c +++ b/src/upb_glue.c @@ -8,6 +8,7 @@ #include "upb_msg.h" #include "upb_decoder.h" #include "upb_strstream.h" +#include "upb_textprinter.h" void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md, upb_status *status) { @@ -37,6 +38,29 @@ void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md, upb_handlers_uninit(&h); } +void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md, + bool single_line) { + upb_stringsink strsink; + upb_stringsink_init(&strsink); + upb_stringsink_reset(&strsink, str); + + upb_textprinter *p = upb_textprinter_new(); + upb_handlers h; + upb_handlers_init(&h); + upb_textprinter_reset(p, &h, upb_stringsink_bytesink(&strsink), single_line); + + upb_status status = UPB_STATUS_INIT; + upb_msg_runhandlers(msg, md, &h, &status); + // None of {upb_msg_runhandlers, upb_textprinter, upb_stringsink} should be + // capable of returning an error. + assert(upb_ok(&status)); + upb_status_uninit(&status); + + upb_stringsink_uninit(&strsink); + upb_textprinter_free(p); + upb_handlers_uninit(&h); +} + void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) { upb_stringsrc strsrc; upb_stringsrc_init(&strsrc); diff --git a/src/upb_glue.h b/src/upb_glue.h index ca32436..afb7b59 100644 --- a/src/upb_glue.h +++ b/src/upb_glue.h @@ -4,8 +4,8 @@ * upb's core components like upb_decoder and upb_msg are carefully designed to * avoid depending on each other for maximum orthogonality. In other words, * you can use a upb_decoder to decode into *any* kind of structure; upb_msg is - * just one such structure. You can use upb_decoder without having to link in - * upb_msg. + * just one such structure. A upb_msg can be serialized/deserialized into any + * format, protobuf binary format is just one such format. * * However, for convenience we provide functions here for doing common * operations like deserializing protobuf binary format into a upb_msg. The @@ -13,12 +13,20 @@ * which could be undesirable if you're trying to use a trimmed-down build of * upb. * + * While these routines are convenient, they do not reuse any encoding/decoding + * state. For example, if a decoder is JIT-based, it will be re-JITted every + * time these functions are called. For this reason, if you are parsing lots + * of data and efficiency is an issue, these may not be the best functions to + * use (though they are useful for prototyping, before optimizing). + * * Copyright (c) 2011 Joshua Haberman. See LICENSE for details. */ #ifndef UPB_GLUE_H #define UPB_GLUE_H +#include + #ifdef __cplusplus extern "C" { #endif @@ -36,6 +44,9 @@ struct _upb_symtab; void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg, struct _upb_msgdef *md, struct _upb_status *s); +void upb_msgtotext(struct _upb_string *str, struct _upb_msg *msg, + struct _upb_msgdef *md, bool single_line); + void upb_parsedesc(struct _upb_symtab *symtab, struct _upb_string *str, struct _upb_status *status); diff --git a/src/upb_msg.c b/src/upb_msg.c index 89913dd..a3fd825 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -180,6 +180,77 @@ upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { } } +static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md, + upb_dispatcher *d, upb_status *s); + +static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f, + upb_dispatcher *d, upb_status *s) { +#define CHECK_FLOW(x) do { \ + flow = x; if (flow != UPB_CONTINUE) return flow; \ + } while(0) + +// For when a SKIP can be implemented just through an early return. +#define CHECK_FLOW_LOCAL(x) do { \ + flow = x; \ + if (flow != UPB_CONTINUE) { \ + if (flow == UPB_SKIPSUBMSG) flow = UPB_CONTINUE; \ + goto end; \ + } \ +} while (0) + upb_flow_t flow; + if (upb_issubmsg(f)) { + upb_msg *msg = upb_value_getmsg(val); + CHECK_FLOW_LOCAL(upb_dispatch_startsubmsg(d, f)); + CHECK_FLOW_LOCAL(upb_msg_dispatch(msg, upb_downcast_msgdef(f->def), d, s)); + CHECK_FLOW(upb_dispatch_endsubmsg(d, f)); + } else { + CHECK_FLOW(upb_dispatch_value(d, f, val)); + } + +end: + return flow; +} + +static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md, + upb_dispatcher *d, upb_status *s) { + upb_msg_iter i; + upb_flow_t flow; + for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if (!upb_msg_has(msg, f)) continue; + upb_value val = upb_msg_get(msg, f); + if (upb_isarray(f)) { + upb_array *arr = upb_value_getarr(val); + for (uint32_t j = 0; j < upb_array_len(arr); ++j) { + CHECK_FLOW_LOCAL(upb_msg_pushval(upb_array_get(arr, f, j), f, d, s)); + } + } else { + CHECK_FLOW_LOCAL(upb_msg_pushval(val, f, d, s)); + } + } + return UPB_CONTINUE; + +end: + // Need to copy/massage the error. + upb_copyerr(s, d->top->handlers.status); + if (upb_ok(s)) { + upb_seterr(s, UPB_ERROR, "Callback returned UPB_BREAK"); + } + return flow; +#undef CHECK_FLOW +#undef CHECK_FLOW_LOCAL +} + +void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h, + upb_status *status) { + upb_dispatcher d; + upb_dispatcher_init(&d); + upb_dispatcher_reset(&d, h, true); + + if (upb_dispatch_startmsg(&d) != UPB_CONTINUE) return; + if (upb_msg_dispatch(msg, md, &d, status) != UPB_CONTINUE) return; + if (upb_dispatch_endmsg(&d) != UPB_CONTINUE) return; +} static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) { upb_valueptr p = _upb_msg_getptr(msg, f); diff --git a/src/upb_msg.h b/src/upb_msg.h index 1e0a176..fd5750f 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -274,6 +274,11 @@ INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } +// A non-resumable upb_src that pushes the current contents of the message to +// the given handlers. +void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h, + upb_status *status); + typedef struct { upb_msg *msg; upb_msgdef *msgdef; diff --git a/src/upb_string.c b/src/upb_string.c index 29ce7d4..cbc3414 100644 --- a/src/upb_string.c +++ b/src/upb_string.c @@ -83,27 +83,30 @@ void upb_string_substr(upb_string *str, upb_string *target_str, str->len = len; } -void upb_string_vprintf(upb_string *str, const char *format, va_list args) { +size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format, + va_list args) { // Try once without reallocating. We have to va_copy because we might have // to call vsnprintf again. - uint32_t size = UPB_MAX(upb_string_size(str), 16); - char *buf = upb_string_getrwbuf(str, size); + uint32_t size = UPB_MAX(upb_string_size(str) - offset, 16); + char *buf = upb_string_getrwbuf(str, offset + size) + offset; va_list args_copy; va_copy(args_copy, args); uint32_t true_size = vsnprintf(buf, size, format, args_copy); va_end(args_copy); + // Resize to be the correct size. if (true_size >= size) { - // Need to reallocate. We reallocate even if the sizes were equal, - // because snprintf excludes the terminating NULL from its count. - // We don't care about the terminating NULL, but snprintf might - // bail out of printing even other characters if it doesn't have - // enough space to write the NULL also. - upb_string_recycle(&str); - buf = upb_string_getrwbuf(str, true_size + 1); + // Need to print again, because some characters were truncated. vsnprintf + // has weird behavior (and contrary IMO to what the standard says): it will + // not write the entire string unless you give it space to store the NULL + // terminator also. So we can't give it space for the string itself and + // let NULL get truncated (after all, we don't care about it): we *must* + // give it space for NULL. + buf = upb_string_getrwbuf(str, offset + true_size + 1) + offset; vsnprintf(buf, true_size + 1, format, args); } - str->len = true_size; + str->len = offset + true_size; + return true_size; } upb_string *upb_string_asprintf(const char *format, ...) { diff --git a/src/upb_string.h b/src/upb_string.h index efafa44..38c3d55 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -192,12 +192,18 @@ INLINE void upb_string_recycle(upb_string **_str) { char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); // Replaces the contents of str with the contents of the given printf. -void upb_string_vprintf(upb_string *str, const char *format, va_list args); -INLINE void upb_string_printf(upb_string *str, const char *format, ...) { +size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format, + va_list args); +INLINE size_t upb_string_vprintf(upb_string *str, const char *format, + va_list args) { + return upb_string_vprintf_at(str, 0, format, args); +} +INLINE size_t upb_string_printf(upb_string *str, const char *format, ...) { va_list args; va_start(args, format); - upb_string_vprintf(str, format, args); + size_t written = upb_string_vprintf(str, format, args); va_end(args); + return written; } // Sets the contents of "str" to be the given substring of "target_str", to diff --git a/src/upb_strstream.c b/src/upb_strstream.c index a7967d4..c957648 100644 --- a/src/upb_strstream.c +++ b/src/upb_strstream.c @@ -9,6 +9,9 @@ #include #include "upb_string.h" + +/* upb_stringsrc **************************************************************/ + static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf, upb_strlen_t count, upb_status *status) { upb_stringsrc *src = (upb_stringsrc*)_src; @@ -39,12 +42,12 @@ static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str, } void upb_stringsrc_init(upb_stringsrc *s) { - static upb_bytesrc_vtbl bytesrc_vtbl = { + static upb_bytesrc_vtbl vtbl = { upb_stringsrc_read, upb_stringsrc_getstr, }; + upb_bytesrc_init(&s->bytesrc, &vtbl); s->str = NULL; - upb_bytesrc_init(&s->bytesrc, &bytesrc_vtbl); } void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { @@ -63,3 +66,52 @@ void upb_stringsrc_uninit(upb_stringsrc *s) { upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { return &s->bytesrc; } + + +/* upb_stringsink *************************************************************/ + +void upb_stringsink_uninit(upb_stringsink *s) { + upb_string_unref(s->str); +} + +// Resets the stringsink to a state where it will append to the given string. +// The string must be newly created or recycled. The stringsink will take a +// reference on the string, so the caller need not ensure that it outlives the +// stringsink. A stringsink can be reset multiple times. +void upb_stringsink_reset(upb_stringsink *s, upb_string *str) { + if (str != s->str) { + upb_string_unref(s->str); + s->str = upb_string_getref(str); + } + // Resize to 0. + upb_string_getrwbuf(s->str, 0); +} + +upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) { + return &s->bytesink; +} + +static upb_strlen_t upb_stringsink_vprintf(upb_bytesink *_sink, upb_status *s, + const char *fmt, va_list args) { + (void)s; // No errors can occur. + upb_stringsink *sink = (upb_stringsink*)_sink; + return upb_string_vprintf_at(sink->str, upb_string_len(sink->str), fmt, args); +} + +static upb_strlen_t upb_stringsink_putstr(upb_bytesink *_sink, upb_string *str, + upb_status *s) { + (void)s; // No errors can occur. + upb_stringsink *sink = (upb_stringsink*)_sink; + upb_strcat(sink->str, str); + return upb_string_len(str); +} + +void upb_stringsink_init(upb_stringsink *s) { + static upb_bytesink_vtbl vtbl = { + NULL, + upb_stringsink_putstr, + upb_stringsink_vprintf + }; + upb_bytesink_init(&s->bytesink, &vtbl); + s->str = NULL; +} diff --git a/src/upb_strstream.h b/src/upb_strstream.h index 1a8792b..cd8ff01 100644 --- a/src/upb_strstream.h +++ b/src/upb_strstream.h @@ -40,22 +40,24 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); /* upb_stringsink *************************************************************/ -struct upb_stringsink; -typedef struct upb_stringsink upb_stringsink; +struct _upb_stringsink { + upb_bytesink bytesink; + upb_string *str; +}; +typedef struct _upb_stringsink upb_stringsink; // Create/free a stringsrc. -upb_stringsink *upb_stringsink_new(); -void upb_stringsink_free(upb_stringsink *s); +void upb_stringsink_init(upb_stringsink *s); +void upb_stringsink_uninit(upb_stringsink *s); -// Gets a string containing the data that has been written to this stringsink. -// The caller does *not* own any references to this string. -upb_string *upb_stringsink_getstring(upb_stringsink *s); +// Resets the stringsink to a state where it will append to the given string. +// The string must be newly created or recycled. The stringsink will take a +// reference on the string, so the caller need not ensure that it outlives the +// stringsink. A stringsink can be reset multiple times. +void upb_stringsink_reset(upb_stringsink *s, upb_string *str); -// Clears the internal string of accumulated data, resetting it to empty. -void upb_stringsink_reset(upb_stringsink *s); - -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesink *upb_stringsrc_bytesink(); +// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above. +upb_bytesink *upb_stringsink_bytesink(); #ifdef __cplusplus diff --git a/src/upb_table.h b/src/upb_table.h index c658a6e..a799653 100644 --- a/src/upb_table.h +++ b/src/upb_table.h @@ -133,7 +133,7 @@ INLINE void *_upb_inttable_fastlookup(upb_inttable *t, uint32_t key, while (1) { //DEBUGPRINTF("%d, %d, %d\n", e->val.has_entry, e->hdr.key, key); if (e->hdr.key == key) { - DEBUGPRINTF("returning val from hash part\n"); + //DEBUGPRINTF("returning val from hash part\n"); return &e->val; } if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL; diff --git a/src/upb_textprinter.c b/src/upb_textprinter.c index c0fb944..7e99ebd 100644 --- a/src/upb_textprinter.c +++ b/src/upb_textprinter.c @@ -20,8 +20,7 @@ struct _upb_textprinter { #define CHECK(x) if ((x) < 0) goto err; -static int upb_textprinter_indent(upb_textprinter *p) -{ +static int upb_textprinter_indent(upb_textprinter *p) { if(!p->single_line) for(int i = 0; i < p->indent_depth; i++) CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); @@ -107,8 +106,7 @@ err: return UPB_BREAK; } -static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_fielddef *f) -{ +static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_fielddef *f) { (void)f; upb_textprinter *p = _p; p->indent_depth--; -- cgit v1.2.3