From b8481e0e55aebad1d9ffa0f3845609f929bca02f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 2 Jul 2009 18:31:32 -0700 Subject: A bit more work on generalizing parsing. --- upb.h | 1 + upb_context.c | 2 +- upb_context.h | 4 +-- upb_msg.c | 104 ++++++++++++++++++++++++++++++++++------------------------ upb_msg.h | 26 ++++++++++++--- upb_parse.c | 6 ++-- upb_parse.h | 6 ++-- 7 files changed, 91 insertions(+), 58 deletions(-) diff --git a/upb.h b/upb.h index f5f19d3..563314c 100644 --- a/upb.h +++ b/upb.h @@ -112,6 +112,7 @@ union upb_value_ptr { bool *_bool; struct upb_string **string; struct upb_array **array; + void **message; void *_void; }; diff --git a/upb_context.c b/upb_context.c index 9fd6cef..e124583 100644 --- a/upb_context.c +++ b/upb_context.c @@ -232,7 +232,7 @@ error: } bool upb_context_parsefd(struct upb_context *c, struct upb_string *fd_str) { - google_protobuf_FileDescriptorProto *fd = upb_msg_parse(c->fd_msg, fd_str); + google_protobuf_FileDescriptorProto *fd = upb_alloc_and_parse(c->fd_msg, fd_str); if(!fd) return false; if(!upb_context_addfd(c, fd)) return false; c->fd[c->fd_len++] = fd; /* Need to keep a ref since we own it. */ diff --git a/upb_context.h b/upb_context.h index e15b4b8..cd62e30 100644 --- a/upb_context.h +++ b/upb_context.h @@ -70,8 +70,8 @@ INLINE struct upb_symtab_entry *upb_context_symbegin(struct upb_context *c) { } INLINE struct upb_symtab_entry *upb_context_symnext( - struct upb_context *c, struct upb_inttable_entry *cur) { - return upb_strtable_next(&c->symtab, cur); + struct upb_context *c, struct upb_symtab_entry *cur) { + return upb_strtable_next(&c->symtab, &cur->e); } /* Adding symbols. ************************************************************/ diff --git a/upb_msg.c b/upb_msg.c index bf55764..1c79f57 100644 --- a/upb_msg.c +++ b/upb_msg.c @@ -113,7 +113,7 @@ struct mm_upb_array { char *data; }; -uint32_t round_up_to_pow2(uint32_t v) +static uint32_t round_up_to_pow2(uint32_t v) { #ifdef __GNUC__ return (1U<<31) >> (__builtin_clz(v-1)+1); @@ -168,6 +168,14 @@ struct parse_frame_data { void *data; }; +static void set_frame_data(struct upb_parse_state *s, struct upb_msg *m, + void *data) +{ + struct parse_frame_data *frame = (void*)s->top->user_data; + frame->m = m; + frame->data = data; +} + static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag, void **user_field_desc) { @@ -179,32 +187,36 @@ static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag, return f->type; } +static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f) +{ + union upb_value_ptr p = upb_msg_get_ptr(data, f); + if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) { + size_t len = upb_msg_is_set(data, f) ? (*p.array)->len : 0; + upb_msg_reuse_array(p.array, len, f->type); + (*p.array)->len = len + 1; + p = upb_array_getelementptr(*p.array, len, f->type); + } + return p; +} + static upb_status_t value_cb(struct upb_parse_state *s, void **buf, void *end, - upb_field_type_t type, void *user_field_desc) + void *user_field_desc) { struct parse_frame_data *frame = (void*)s->top->user_data; struct upb_msg_field *f = user_field_desc; - union upb_value_ptr p = upb_msg_get_ptr(frame->data, f); - if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) { - upb_msg_reuse_array(p.array, (*p.array)->len, type); - p = upb_array_getelementptr(*p.array, (*p.array)->len++, type); - } - UPB_CHECK(upb_parse_value(buf, end, type, p)); + union upb_value_ptr p = get_value_ptr(frame->data, f); + UPB_CHECK(upb_parse_value(buf, end, f->type, p)); return UPB_STATUS_OK; } -static upb_status_t str_cb(struct upb_parse_state *s, struct upb_string *str, - upb_field_type_t type, void *user_field_desc) +static upb_status_t str_cb(struct upb_parse_state *_s, struct upb_string *str, + void *user_field_desc) { - struct parse_frame_data *frame = (void*)s->top->user_data; + struct upb_msg_parse_state *s = (void*)_s; + struct parse_frame_data *frame = (void*)s->s.top->user_data; struct upb_msg_field *f = user_field_desc; - union upb_value_ptr p = upb_msg_get_ptr(frame->data, f); - if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) { - upb_msg_reuse_array(p.array, (*p.array)->len, type); - p = upb_array_getelementptr(*p.array, (*p.array)->len++, type); - } - bool byref = false; - if(byref) { + union upb_value_ptr p = get_value_ptr(frame->data, f); + if(s->byref) { upb_msg_reuse_strref(p.string); **p.string = *str; } else { @@ -214,33 +226,39 @@ static upb_status_t str_cb(struct upb_parse_state *s, struct upb_string *str, return UPB_STATUS_OK; } -static void set_frame_data(struct upb_parse_state *s, struct upb_msg *m, - void *data) +static void submsg_start_cb(struct upb_parse_state *_s, void *user_field_desc) { - struct parse_frame_data *frame = (void*)s->top->user_data; - frame->m = m; - frame->data = data; + struct upb_msg_parse_state *s = (void*)_s; + struct upb_msg_field *f = user_field_desc; + struct parse_frame_data *frame = (void*)s->s.top->user_data; + union upb_value_ptr p = upb_msg_get_ptr(frame->data, f); + upb_msg_reuse_submsg(*p.message, f->ref.msg); + if(!s->merge) upb_msg_clear(frame->data, f->ref.msg); + set_frame_data(&s->s, f->ref.msg, *p.message); +} + + +void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg, + struct upb_msg *m, bool merge, bool byref) +{ + upb_parse_init(&s->s, sizeof(struct parse_frame_data)); + s->merge = merge; + s->byref = byref; + if(!merge && msg == NULL) msg = upb_msg_new(m); + set_frame_data(&s->s, m, msg); + s->s.tag_cb = tag_cb; + s->s.value_cb = value_cb; + s->s.str_cb = str_cb; + s->s.submsg_start_cb = submsg_start_cb; } -static void submsg_start_cb(struct upb_parse_state *s, void *user_field_desc) +void upb_msg_parse_free(struct upb_msg_parse_state *s) { - struct upb_msg_field *f = user_field_desc; - struct parse_frame_data *frame = (void*)s->top->user_data; - void **submsg = upb_msg_get_submsg_ptr(frame->data, f); - upb_msg_reuse_submsg(submsg, f->ref.msg); - set_frame_data(s, f->ref.msg, *submsg); -} - -upb_status_t upb_msg_merge(void *data, struct upb_msg *m, struct upb_string *str) -{ - struct upb_parse_state s; - upb_parse_state_init(&s, sizeof(struct parse_frame_data)); - set_frame_data(&s, m, data); - s.tag_cb = tag_cb; - s.value_cb = value_cb; - s.str_cb = str_cb; - s.submsg_start_cb = submsg_start_cb; - size_t read; - UPB_CHECK(upb_parse(&s, str->ptr, str->byte_len, &read)); - return UPB_STATUS_OK; + upb_parse_free(&s->s); +} + +upb_status_t upb_msg_parse(struct upb_msg_parse_state *s, + void *data, size_t len, size_t *read) +{ + return upb_parse(&s->s, data, len, read); } diff --git a/upb_msg.h b/upb_msg.h index 605ca94..48b411f 100644 --- a/upb_msg.h +++ b/upb_msg.h @@ -46,6 +46,7 @@ #include "upb.h" #include "upb_table.h" +#include "upb_parse.h" #ifdef __cplusplus extern "C" { @@ -57,13 +58,14 @@ struct google_protobuf_FieldDescriptorProto; /* Message definition. ********************************************************/ -/* Structure that describes a single field in a message. */ +/* Structure that describes a single field in a message. This structure is very + * consciously designed to fit into 12/16 bytes (32/64 bit, respectively). */ struct upb_msg_field { + union upb_symbol_ref ref; uint32_t byte_offset; /* Where to find the data. */ uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */ upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */ upb_label_t label; - union upb_symbol_ref ref; }; /* Structure that describes a single .proto message type. */ @@ -155,8 +157,8 @@ struct upb_array { uint32_t len; \ }; -union upb_value_ptr upb_array_getelementptr(struct upb_array *arr, uint32_t n, - upb_field_type_t type) +INLINE union upb_value_ptr upb_array_getelementptr( + struct upb_array *arr, uint32_t n, upb_field_type_t type) { union upb_value_ptr ptr = { ._void = ((char*)arr->elements._void + n*upb_type_info[type].size) @@ -226,7 +228,21 @@ INLINE union upb_value_ptr upb_msg_get_ptr( /* Memory management *********************************************************/ void *upb_msg_new(struct upb_msg *m); -//void upb_msg_free(void *msg, struct upb_msg *m, bool free_submsgs); + +struct upb_msg_parse_state { + struct upb_parse_state s; + bool merge; + bool byref; + struct upb_msg *m; +}; + +void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg, + struct upb_msg *m, bool merge, bool byref); +void upb_msg_parse_free(struct upb_msg_parse_state *s); +upb_status_t upb_msg_parse(struct upb_msg_parse_state *s, + void *data, size_t len, size_t *read); + +void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s); /* Note! These two may not be use on a upb_string* that was initialized by * means other than these functions. */ diff --git a/upb_parse.c b/upb_parse.c index 94eb19a..559e227 100644 --- a/upb_parse.c +++ b/upb_parse.c @@ -308,12 +308,12 @@ static upb_status_t parse_delimited(struct upb_parse_state *s, if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) { struct upb_string str = {.ptr = *buf, .byte_len = delim_len}; - s->str_cb(s, &str, ft, user_field_desc); + s->str_cb(s, &str, user_field_desc); *buf = delim_end; } else { /* Packed Array. */ while(*buf < delim_end) - UPB_CHECK(s->value_cb(s, buf, end, ft, user_field_desc)); + UPB_CHECK(s->value_cb(s, buf, end, user_field_desc)); } } return UPB_STATUS_OK; @@ -332,7 +332,7 @@ static upb_status_t parse_nondelimited(struct upb_parse_state *s, /* No length specified, an "end group" tag will mark the end. */ UPB_CHECK(push_stack_frame(s, 0, user_field_desc)); } else { - UPB_CHECK(s->value_cb(s, buf, end, ft, user_field_desc)); + UPB_CHECK(s->value_cb(s, buf, end, user_field_desc)); } return UPB_STATUS_OK; } diff --git a/upb_parse.h b/upb_parse.h index 14520f3..83104f1 100644 --- a/upb_parse.h +++ b/upb_parse.h @@ -25,8 +25,8 @@ struct upb_parse_state; /* Initialize and free (respectively) the given parse state, which must have * been previously allocated. udata_size specifies how much space will be * available at parse_stack_frame.user_data in each frame for user data. */ -void upb_parse_state_init(struct upb_parse_state *state, size_t udata_size); -void upb_parse_state_free(struct upb_parse_state *state); +void upb_parse_init(struct upb_parse_state *state, size_t udata_size); +void upb_parse_free(struct upb_parse_state *state); /* The callback that is called immediately after a tag has been parsed. The * client should determine whether it wants to parse or skip the corresponding @@ -48,13 +48,11 @@ typedef upb_field_type_t (*upb_tag_cb)(struct upb_parse_state *s, * call to tag_cb in the case of packed arrays. */ typedef upb_status_t (*upb_value_cb)(struct upb_parse_state *s, void **buf, void *end, - upb_field_type_t type, void *user_field_desc); /* The callback that is called when a string is parsed. */ typedef upb_status_t (*upb_str_cb)(struct upb_parse_state *s, struct upb_string *str, - upb_field_type_t type, void *user_field_desc); /* Callbacks that are called when a submessage begins and ends, respectively. -- cgit v1.2.3