From 2282d2489bd8db3cd4ddbe0dd813732bffcf6452 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 12 Aug 2009 13:47:24 -0700 Subject: Refactoring: unify upb_msg. The cost is that a upb_msg will now always have an overhead of 2*sizeof(void*). This is comparable to proto2 overhead. The benefit is that upb_msg is now self-describing, and read-only algorithms can now operate on a upb_msg regardless of the memory-management scheme. Also, upb_array and upb_string now know inherently if they own their associated memory, and upb_array has a generic pointer for memory management purposes like upb_msg does. --- src/upb.h | 6 +-- src/upb_array.h | 34 +++++++----- src/upb_context.c | 4 +- src/upb_msg.c | 158 +++++++++++++++++++++++++++--------------------------- src/upb_msg.h | 95 +++++++++++++++++--------------- src/upb_string.c | 2 +- src/upb_string.h | 78 +++++++++++++++++++-------- src/upb_text.c | 6 +-- src/upb_text.h | 4 +- 9 files changed, 220 insertions(+), 167 deletions(-) (limited to 'src') diff --git a/src/upb.h b/src/upb.h index 83917af..27bf5fc 100644 --- a/src/upb.h +++ b/src/upb.h @@ -35,8 +35,6 @@ extern "C" { #define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) -INLINE uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; } - /* Fundamental types and type constants. **************************************/ /* A list of types as they are encoded on-the-wire. */ @@ -104,7 +102,7 @@ union upb_value { bool _bool; struct upb_string *str; struct upb_array *arr; - void *msg; + struct upb_msg *msg; }; /* A pointer to a .proto value. The owner must have an out-of-band way of @@ -119,7 +117,7 @@ union upb_value_ptr { bool *_bool; struct upb_string **str; struct upb_array **arr; - void **msg; + struct upb_msg **msg; void *_void; }; diff --git a/src/upb_array.h b/src/upb_array.h index d48aa17..65d44bb 100644 --- a/src/upb_array.h +++ b/src/upb_array.h @@ -38,22 +38,32 @@ typedef uint32_t upb_arraylen_t; * the data in the array depends on the type. */ struct upb_array { union upb_value_ptr elements; - void *mem; upb_arraylen_t len; /* Number of elements in "elements". */ - upb_arraylen_t size; /* Memory allocated in "mem" (measured in elements) */ + upb_arraylen_t size; /* Memory we own (0 if by reference). */ + void *gptr; }; INLINE void upb_array_init(struct upb_array *arr) { arr->elements._void = NULL; - arr->mem = NULL; arr->len = 0; arr->size = 0; } -INLINE void upb_array_free(struct upb_array *arr) +INLINE void upb_array_uninit(struct upb_array *arr) { - free(arr->mem); + if(arr->size) free(arr->elements._void); +} + +INLINE struct upb_array *upb_array_new() { + struct upb_array *arr = malloc(sizeof(*arr)); + upb_array_init(arr); + return arr; +} + +INLINE void upb_array_free(struct upb_array *arr) { + upb_array_uninit(arr); + free(arr); } /* Returns a pointer to an array element. Does not perform a bounds check! */ @@ -92,18 +102,18 @@ INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen, { size_t type_size = upb_type_info[type].size; bool dropped = false; - bool ref = arr->elements._void != arr->mem; /* Ref'ing external memory. */ + bool ref = arr->size == 0; /* Ref'ing external memory. */ + void *data = arr->elements._void; if(arr->size < newlen) { /* Need to resize. */ - arr->size = max(4, upb_round_up_to_pow2(newlen)); - arr->mem = realloc(arr->mem, arr->size * type_size); + arr->size = UPB_MAX(4, upb_round_up_to_pow2(newlen)); + arr->elements._void = realloc(ref ? NULL : data, arr->size * type_size); } if(ref) { /* Need to take referenced data and copy it to memory we own. */ - memcpy(arr->mem, arr->elements._void, UPB_MIN(arr->len, newlen) * type_size); + memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size); dropped = true; } - arr->elements._void = arr->mem; arr->len = newlen; return dropped; } @@ -111,8 +121,9 @@ INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen, /* These are all overlays on upb_array, pointers between them can be cast. */ #define UPB_DEFINE_ARRAY_TYPE(name, type) \ struct name ## _array { \ + struct upb_fielddef *f; \ + void *gptr; \ type *elements; \ - type *mem; \ upb_arraylen_t len; \ upb_arraylen_t size; \ }; @@ -132,7 +143,6 @@ UPB_DEFINE_ARRAY_TYPE(upb_msg, void*) #define UPB_DEFINE_MSG_ARRAY(msg_type) \ UPB_MSG_ARRAY(msg_type) { \ msg_type **elements; \ - msg_type **mem; \ upb_arraylen_t len; \ upb_arraylen_t size; \ }; diff --git a/src/upb_context.c b/src/upb_context.c index 5e1833e..252cf17 100644 --- a/src/upb_context.c +++ b/src/upb_context.c @@ -60,7 +60,7 @@ void upb_context_free(struct upb_context *c) { free_symtab(&c->symtab); for(size_t i = 0; i < c->fds_len; i++) - upb_msg_free(c->fds[i], c->fds_msg); + upb_msg_free((struct upb_msg*)c->fds[i]); free_symtab(&c->psymtab); free(c->fds); } @@ -280,7 +280,7 @@ bool upb_context_addfds(struct upb_context *c, bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) { google_protobuf_FileDescriptorSet *fds = - upb_msg_parsenew(c->fds_msg, fds_str); + (google_protobuf_FileDescriptorSet*)upb_msg_parsenew(c->fds_msg, fds_str); if(!fds) return false; if(!upb_context_addfds(c, fds)) return false; diff --git a/src/upb_msg.c b/src/upb_msg.c index 16c24c0..dd627eb 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -82,7 +82,7 @@ bool upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d, f->type = fd->type; f->label = fd->label; m->size = f->byte_offset + type_info->size; - max_align = max(max_align, type_info->align); + max_align = UPB_MAX(max_align, type_info->align); if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) m->num_required_fields++; @@ -123,59 +123,49 @@ void upb_msgdef_ref(struct upb_msgdef *m, struct upb_msg_fielddef *f, /* Simple, one-shot parsing ***************************************************/ -void *upb_msg_new(struct upb_msgdef *md) +static void *upb_msg_new(struct upb_msgdef *md) { - void *msg = malloc(md->size); - memset(msg, 0, md->size); + size_t size = md->size + (sizeof(void*) * 2); + struct upb_msg *msg = malloc(size); + memset(msg, 0, size); + msg->def = md; return msg; } /* Allocation callbacks. */ -static struct upb_array *getarray_cb(void *msg, struct upb_msgdef *md, - struct upb_array *existingval, - struct upb_msg_fielddef *f, - upb_arraylen_t len) +struct upb_array *getarray_cb( + void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f) { - (void)msg; - (void)md; + (void)from_gptr; (void)existingval; /* Don't care -- always zero. */ - (void)len; - struct upb_array *arr = existingval; - if(!arr) { - arr = malloc(sizeof(*arr)); - upb_array_init(arr); - } - upb_array_resize(arr, len, f->type); - return arr; + (void)f; + return upb_array_new(); } -static struct upb_string *getstring_cb(void *msg, struct upb_msgdef *md, - struct upb_string *existingval, - struct upb_msg_fielddef *f, size_t len) +static struct upb_string *getstring_cb( + void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f, + bool byref) { - (void)msg; - (void)md; + (void)from_gptr; (void)existingval; /* Don't care -- always zero. */ (void)f; - struct upb_string *str = malloc(sizeof(*str)); - str->ptr = malloc(len); - return str; + (void)byref; + return upb_strnew(); } -static void *getmsg_cb(void *msg, struct upb_msgdef *md, - void *existingval, struct upb_msg_fielddef *f) +static struct upb_msg *getmsg_cb( + void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f) { - (void)msg; - (void)md; + (void)from_gptr; (void)existingval; /* Don't care -- always zero. */ return upb_msg_new(f->ref.msg); } -void *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s) +struct upb_msg *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s) { struct upb_msg_parser mp; - void *msg = upb_msg_new(md); - upb_msg_parser_reset(&mp, msg, md, false); + struct upb_msg *msg = upb_msg_new(md); + upb_msg_parser_reset(&mp, msg, false); mp.getarray_cb = getarray_cb; mp.getstring_cb = getstring_cb; mp.getmsg_cb = getmsg_cb; @@ -184,7 +174,7 @@ void *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s) if(status == UPB_STATUS_OK && read == s->byte_len) { return msg; } else { - upb_msg_free(msg, md); + upb_msg_free(msg); return NULL; } } @@ -197,28 +187,28 @@ static void free_value(union upb_value_ptr p, struct upb_msg_fielddef *f) free((*p.str)->ptr); free(*p.str); } else if(upb_issubmsg(f)) { - upb_msg_free(*p.msg, f->ref.msg); + upb_msg_free(*p.msg); } } -void upb_msg_free(void *data, struct upb_msgdef *m) +void upb_msg_free(struct upb_msg *msg) { - if(!data) return; /* A very free-like thing to do. */ + if(!msg) return; /* A very free-like thing to do. */ + struct upb_msgdef *m = msg->def; for(unsigned int i = 0; i < m->num_fields; i++) { struct upb_msg_fielddef *f = &m->fields[i]; - if(!upb_msg_isset(data, f)) continue; - union upb_value_ptr p = upb_msg_getptr(data, f); + if(!upb_msg_isset(msg, f)) continue; + union upb_value_ptr p = upb_msg_getptr(msg, f); if(upb_isarray(f)) { assert(*p.arr); for(upb_arraylen_t j = 0; j < (*p.arr)->len; j++) free_value(upb_array_getelementptr(*p.arr, j, f->type), f); - free((*p.arr)->elements._void); - free(*p.arr); + upb_array_free(*p.arr); } else { free_value(p, f); } } - free(data); + free(msg); } /* Parsing. ******************************************************************/ @@ -226,14 +216,18 @@ void upb_msg_free(void *data, struct upb_msgdef *m) /* Helper function that returns a pointer to where the next value for field "f" * should be stored, taking into account whether f is an array that may need to * be allocated or resized. */ -static union upb_value_ptr get_value_ptr(void *data, struct upb_msgdef *m, +static union upb_value_ptr get_value_ptr(struct upb_msg *msg, struct upb_msg_fielddef *f, - upb_msg_getarray_cb_t getarray_cb) + void **gptr, + upb_msg_getandref_array_cb_t getarray_cb) { - union upb_value_ptr p = upb_msg_getptr(data, f); + union upb_value_ptr p = upb_msg_getptr(msg, f); if(upb_isarray(f)) { - size_t len = upb_msg_isset(data, f) ? (*p.arr)->len : 0; - *p.arr = getarray_cb(data, m, *p.arr, f, len + 1); + bool isset = upb_msg_isset(msg, f); + size_t len = isset ? (*p.arr)->len : 0; + if(!isset) *p.arr = getarray_cb(*gptr, *p.arr, f); + upb_array_resize(*p.arr, len+1, f->type); + *gptr = (*p.arr)->gptr; p = upb_array_getelementptr(*p.arr, len, f->type); } return p; @@ -245,7 +239,8 @@ static upb_field_type_t tag_cb(void *udata, struct upb_tag *tag, void **user_field_desc) { struct upb_msg_parser *mp = udata; - struct upb_msg_fielddef *f = upb_msg_fieldbynum(mp->top->m, tag->field_number); + struct upb_msg_fielddef *f = + upb_msg_fieldbynum(mp->top->msg->def, tag->field_number); if(!f || !upb_check_type(tag->wire_type, f->type)) return 0; /* Skip unknown or fields of the wrong type. */ *user_field_desc = f; @@ -257,8 +252,9 @@ static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end, { struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = user_field_desc; - void *msg = mp->top->msg; - union upb_value_ptr p = get_value_ptr(msg, mp->top->m, f, mp->getarray_cb); + struct upb_msg *msg = mp->top->msg; + void *gptr = upb_msg_gptr(msg); + union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb); upb_msg_set(msg, f); UPB_CHECK(upb_parse_value(buf, end, f->type, p, outbuf)); return UPB_STATUS_OK; @@ -270,16 +266,19 @@ static void str_cb(void *udata, uint8_t *str, { struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = udesc; - void *msg = mp->top->msg; - union upb_value_ptr p = get_value_ptr(msg, mp->top->m, f, mp->getarray_cb); + struct upb_msg *msg = mp->top->msg; + void *gptr = upb_msg_gptr(msg); + union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb); upb_msg_set(msg, f); if(avail_len != total_len) abort(); /* TODO: support streaming. */ - if(avail_len == total_len && mp->byref) { - *p.str = mp->getstring_cb(msg, mp->top->m, *p.str, f, 0); + bool byref = avail_len == total_len && mp->byref; + *p.str = mp->getstring_cb(gptr, *p.str, f, byref); + if(byref) { + upb_strdrop(*p.str); (*p.str)->ptr = (char*)str; (*p.str)->byte_len = avail_len; } else { - *p.str = mp->getstring_cb(msg, mp->top->m, *p.str, f, total_len); + upb_stralloc(*p.str, total_len); memcpy((*p.str)->ptr, str, avail_len); (*p.str)->byte_len = avail_len; } @@ -289,13 +288,12 @@ static void submsg_start_cb(void *udata, void *user_field_desc) { struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = user_field_desc; - struct upb_msgdef *oldmsgdef = mp->top->m; - void *oldmsg = mp->top->msg; - union upb_value_ptr p = get_value_ptr(oldmsg, oldmsgdef, f, mp->getarray_cb); + struct upb_msg *oldmsg = mp->top->msg; + void *gptr = upb_msg_gptr(oldmsg); + union upb_value_ptr p = get_value_ptr(oldmsg, f, &gptr, mp->getarray_cb); upb_msg_set(oldmsg, f); - *p.msg = mp->getmsg_cb(oldmsg, oldmsgdef, *p.msg, f); + *p.msg = mp->getmsg_cb(gptr, *p.msg, f); mp->top++; - mp->top->m = f->ref.msg; mp->top->msg = *p.msg; } @@ -307,13 +305,11 @@ static void submsg_end_cb(void *udata) /* Externally-visible functions for the msg parser. */ -void upb_msg_parser_reset(struct upb_msg_parser *s, void *msg, - struct upb_msgdef *m, bool byref) +void upb_msg_parser_reset(struct upb_msg_parser *s, struct upb_msg *msg, bool byref) { upb_stream_parser_reset(&s->s, s); s->byref = byref; s->top = s->stack; - s->top->m = m; s->top->msg = msg; s->s.tag_cb = tag_cb; s->s.value_cb = value_cb; @@ -551,8 +547,7 @@ bool upb_array_eql(struct upb_array *arr1, struct upb_array *arr2, if(upb_issubmsg(f)) { if(!recursive) return true; for(uint32_t i = 0; i < arr1->len; i++) - if(!upb_msg_eql(arr1->elements.msg[i], arr2->elements.msg[i], - f->ref.msg, recursive)) + if(!upb_msg_eql(arr1->elements.msg[i], arr2->elements.msg[i], recursive)) return false; } else if(upb_isstring(f)) { for(uint32_t i = 0; i < arr1->len; i++) @@ -566,11 +561,13 @@ bool upb_array_eql(struct upb_array *arr1, struct upb_array *arr2, return true; } -bool upb_msg_eql(void *data1, void *data2, struct upb_msgdef *m, bool recursive) +bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive) { /* Must have the same fields set. TODO: is this wrong? Should we also * consider absent defaults equal to explicitly set defaults? */ - if(memcmp(data1, data2, m->set_flags_bytes) != 0) + if(msg1->def != msg2->def) return false; + struct upb_msgdef *m = msg1->def; + if(memcmp(msg1->data, msg2->data, msg1->def->set_flags_bytes) != 0) return false; /* Possible optimization: create a mask of the bytes in the messages that @@ -579,13 +576,16 @@ bool upb_msg_eql(void *data1, void *data2, struct upb_msgdef *m, bool recursive) for(uint32_t i = 0; i < m->num_fields; i++) { struct upb_msg_fielddef *f = &m->fields[i]; - if(!upb_msg_isset(data1, f)) continue; - union upb_value_ptr p1 = upb_msg_getptr(data1, f); - union upb_value_ptr p2 = upb_msg_getptr(data2, f); + bool msg1set = upb_msg_isset(msg1, f); + bool msg2set = upb_msg_isset(msg2, f); + if(msg1set != msg2set) return false; + if(!msg1set) continue; + union upb_value_ptr p1 = upb_msg_getptr(msg1, f); + union upb_value_ptr p2 = upb_msg_getptr(msg2, f); if(upb_isarray(f)) { if(!upb_array_eql(*p1.arr, *p2.arr, f, recursive)) return false; } else if(upb_issubmsg(f)) { - if(recursive && !upb_msg_eql(p1.msg, p2.msg, f->ref.msg, recursive)) + if(recursive && !upb_msg_eql(*p1.msg, *p2.msg, recursive)) return false; } else if(!upb_value_eql(p1, p2, f->type)) { return false; @@ -600,9 +600,10 @@ static void printval(struct upb_text_printer *printer, union upb_value_ptr p, google_protobuf_FieldDescriptorProto *fd, FILE *stream); -static void printmsg(struct upb_text_printer *printer, void *msg, - struct upb_msgdef *m, FILE *stream) +static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg, + FILE *stream) { + struct upb_msgdef *m = msg->def; for(uint32_t i = 0; i < m->num_fields; i++) { struct upb_msg_fielddef *f = &m->fields[i]; google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m); @@ -626,18 +627,17 @@ static void printval(struct upb_text_printer *printer, union upb_value_ptr p, FILE *stream) { if(upb_issubmsg(f)) { - upb_text_push(printer, *fd->name, stream); - printmsg(printer, *p.msg, f->ref.msg, stream); + upb_text_push(printer, fd->name, stream); + printmsg(printer, *p.msg, stream); upb_text_pop(printer, stream); } else { - upb_text_printfield(printer, *fd->name, f->type, upb_deref(p, f->type), stream); + upb_text_printfield(printer, fd->name, f->type, upb_deref(p, f->type), stream); } } -void upb_msg_print(void *data, struct upb_msgdef *m, bool single_line, - FILE *stream) +void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream) { struct upb_text_printer printer; upb_text_printer_init(&printer, single_line); - printmsg(&printer, data, m, stream); + printmsg(&printer, msg, stream); } diff --git a/src/upb_msg.h b/src/upb_msg.h index 6a0568a..e2b6903 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -108,6 +108,18 @@ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( return m->field_descriptors[f->field_index]; } +/* Message structure. *********************************************************/ + +struct upb_msg { + struct upb_msgdef *def; + void *gptr; /* Generic pointer for use by subclasses. */ + uint8_t data[1]; +}; + +INLINE void *upb_msg_gptr(struct upb_msg *msg) { + return msg->gptr; +} + /* Field access. **************************************************************/ /* Note that these only provide access to fields that are directly in the msg @@ -115,14 +127,16 @@ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor( * necessary to dereference the returned values. */ /* Returns a pointer to a specific field in a message. */ -INLINE union upb_value_ptr upb_msg_getptr(void *msg, struct upb_msg_fielddef *f) { +INLINE union upb_value_ptr upb_msg_getptr(struct upb_msg *msg, + struct upb_msg_fielddef *f) { union upb_value_ptr p; - p._void = ((char*)msg + f->byte_offset); + p._void = &msg->data[f->byte_offset]; return p; } /* Returns a a specific field in a message. */ -INLINE union upb_value upb_msg_get(void *msg, struct upb_msg_fielddef *f) { +INLINE union upb_value upb_msg_get(struct upb_msg *msg, + struct upb_msg_fielddef *f) { return upb_deref(upb_msg_getptr(msg, f), f->type); } @@ -145,40 +159,40 @@ INLINE uint8_t upb_isset_mask(uint32_t field_index) { } /* Returns true if the given field is set, false otherwise. */ -INLINE void upb_msg_set(void *msg, struct upb_msg_fielddef *f) +INLINE void upb_msg_set(struct upb_msg *msg, struct upb_msg_fielddef *f) { - ((char*)msg)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index); + msg->data[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index); } /* Clears the set bit for this field in the given message. */ -INLINE void upb_msg_unset(void *msg, struct upb_msg_fielddef *f) +INLINE void upb_msg_unset(struct upb_msg *msg, struct upb_msg_fielddef *f) { - ((char*)msg)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index); + msg->data[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index); } /* Tests whether the given field is set. */ -INLINE bool upb_msg_isset(void *msg, struct upb_msg_fielddef *f) +INLINE bool upb_msg_isset(struct upb_msg *msg, struct upb_msg_fielddef *f) { - return ((char*)msg)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index); + return msg->data[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index); } /* Returns true if *all* required fields are set, false otherwise. */ -INLINE bool upb_msg_all_required_fields_set(void *msg, struct upb_msgdef *m) +INLINE bool upb_msg_all_required_fields_set(struct upb_msg *msg, struct upb_msgdef *m) { int num_fields = m->num_required_fields; int i = 0; while(num_fields > 8) { - if(((uint8_t*)msg)[i++] != 0xFF) return false; + if(msg->data[i++] != 0xFF) return false; num_fields -= 8; } - if(((uint8_t*)msg)[i] != (1 << num_fields) - 1) return false; + if(msg->data[i] != (1 << num_fields) - 1) return false; return true; } /* Clears the set bit for all fields. */ -INLINE void upb_msg_clear(void *msg, struct upb_msgdef *m) +INLINE void upb_msg_clear(struct upb_msg *msg) { - memset(msg, 0, m->set_flags_bytes); + memset(msg->data, 0, msg->def->set_flags_bytes); } /* Number->field and name->field lookup. *************************************/ @@ -230,12 +244,12 @@ INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m, * new message data to hold it. If byref is set, strings in the returned * upb_msg will reference s instead of copying from it, but this requires that * s will live for as long as the returned message does. */ -void *upb_msg_parsenew(struct upb_msgdef *m, struct upb_string *s); +struct upb_msg *upb_msg_parsenew(struct upb_msgdef *m, struct upb_string *s); /* This function should be used to free messages that were parsed with * upb_msg_parsenew. It will free the message appropriately (including all * submessages). */ -void upb_msg_free(void *msg, struct upb_msgdef *m); +void upb_msg_free(struct upb_msg *msg); /* Parsing with (re)allocation callbacks. *************************************/ @@ -243,56 +257,52 @@ void upb_msg_free(void *msg, struct upb_msgdef *m); /* This interface parses protocol buffers into upb_msgs, but allows the client * to supply allocation callbacks whenever the parser needs to obtain a string, * array, or submsg (a "dynamic field"). If the parser sees that a dynamic - * field is already present (its "set bit" is set) it will use that, otherwise - * it will call the allocation callback to obtain one. + * field is already present (its "set bit" is set) it will use that, resizing + * it if necessary in the case of an array. Otherwise it will call the + * allocation callback to obtain one. * * This may seem trivial (since nearly all clients will use malloc and free for * memory management), but the allocation callback can be used for more than * just allocation. If we are parsing data into an existing upb_msg, the * allocation callback can examine any existing memory that is allocated for * the dynamic field and determine whether it can reuse it. It can also - * perform memory management like unrefing the existing field or refing the new. + * perform memory management like refing the new field. * * This parser is layered on top of the event-based parser in upb_parse.h. The * parser is upb_mm_msg.h is layered on top of this parser. * * This parser is fully streaming-capable. */ -typedef struct upb_array *(*upb_msg_getarray_cb_t)( - void *msg, struct upb_msgdef *m, - struct upb_array *existingval, struct upb_msg_fielddef *f, - upb_arraylen_t size); +/* Should return an initialized array. */ +typedef struct upb_array *(*upb_msg_getandref_array_cb_t)( + void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f); -/* Callback to allocate a string of size >=len. If len==0 then the client can - * assume that the parser intends to reference the memory instead of copying - * it. */ -typedef struct upb_string *(*upb_msg_getstring_cb_t)( - void *msg, struct upb_msgdef *m, - struct upb_string *existingval, struct upb_msg_fielddef *f, size_t len); +/* Callback to allocate a string. If byref is true, the client should assume + * that the string will be referencing the input data. */ +typedef struct upb_string *(*upb_msg_getandref_string_cb_t)( + void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f, + bool byref); -typedef void *(*upb_msg_getmsg_cb_t)( - void *msg, struct upb_msgdef *m, - void *existingval, struct upb_msg_fielddef *f); +/* Should return a cleared message. */ +typedef struct upb_msg *(*upb_msg_getandref_msg_cb_t)( + void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f); struct upb_msg_parser_frame { - struct upb_msgdef *m; - void *msg; + struct upb_msg *msg; }; struct upb_msg_parser { struct upb_stream_parser s; bool merge; bool byref; - struct upb_msg *m; struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top; - upb_msg_getarray_cb_t getarray_cb; - upb_msg_getstring_cb_t getstring_cb; - upb_msg_getmsg_cb_t getmsg_cb; + upb_msg_getandref_array_cb_t getarray_cb; + upb_msg_getandref_string_cb_t getstring_cb; + upb_msg_getandref_msg_cb_t getmsg_cb; }; void upb_msg_parser_reset(struct upb_msg_parser *p, - void *msg, struct upb_msgdef *m, - bool byref); + struct upb_msg *msg, bool byref); /* Parses protocol buffer data out of data which has length of len. The data * need not be a complete protocol buffer. The number of bytes parsed is @@ -347,9 +357,8 @@ upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s, /* Text dump *****************************************************************/ -bool upb_msg_eql(void *data1, void *data2, struct upb_msgdef *m, bool recursive); -void upb_msg_print(void *data, struct upb_msgdef *m, bool single_line, - FILE *stream); +bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive); +void upb_msg_print(struct upb_msg *data, bool single_line, FILE *stream); /* Internal functions. ********************************************************/ diff --git a/src/upb_string.c b/src/upb_string.c index bb40a62..7754936 100644 --- a/src/upb_string.c +++ b/src/upb_string.c @@ -14,8 +14,8 @@ bool upb_strreadfile(const char *filename, struct upb_string *data) { long size = ftell(f); if(size < 0) return false; if(fseek(f, 0, SEEK_SET) != 0) return false; - data->ptr = (char*)malloc(size); data->byte_len = size; + upb_stralloc(data, data->byte_len); if(fread(data->ptr, size, 1, f) != 1) { free(data->ptr); return false; diff --git a/src/upb_string.h b/src/upb_string.h index 528c8c8..7b63f1c 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -37,6 +37,7 @@ extern "C" { #define INLINE static inline #endif +#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) struct upb_string { @@ -44,45 +45,80 @@ struct upb_string { * ingrained convention that we follow it. */ char *ptr; uint32_t byte_len; + uint32_t byte_size; /* How many bytes of ptr we own. */ }; +INLINE void upb_strinit(struct upb_string *str) +{ + str->ptr = NULL; + str->byte_len = 0; + str->byte_size = 0; +} + +INLINE void upb_struninit(struct upb_string *str) +{ + if(str->byte_size) free(str->ptr); +} + +INLINE struct upb_string *upb_strnew() +{ + struct upb_string *str = (struct upb_string*)malloc(sizeof(*str)); + upb_strinit(str); + return str; +} + +INLINE void upb_strfree(struct upb_string *str) +{ + upb_struninit(str); + free(str); +} + +INLINE void upb_stralloc(struct upb_string *str, uint32_t size) +{ + if(str->byte_size < size) { + /* Need to resize. */ + str->byte_size = size; + void *oldptr = str->byte_size == 0 ? NULL : str->ptr; + str->ptr = (char*)realloc(oldptr, str->byte_size); + } +} + +INLINE void upb_strdrop(struct upb_string *str) +{ + upb_struninit(str); +} + INLINE bool upb_streql(struct upb_string *s1, struct upb_string *s2) { return s1->byte_len == s2->byte_len && memcmp(s1->ptr, s2->ptr, s1->byte_len) == 0; } -INLINE int upb_strcmp(struct upb_string s1, struct upb_string s2) { - size_t common_length = UPB_MIN(s1.byte_len, s2.byte_len); - int common_diff = memcmp(s1.ptr, s2.ptr, common_length); - if(common_diff == 0) return s1.byte_len - s2.byte_len; - else return common_diff; +INLINE int upb_strcmp(struct upb_string *s1, struct upb_string *s2) { + size_t common_length = UPB_MIN(s1->byte_len, s2->byte_len); + int common_diff = memcmp(s1->ptr, s2->ptr, common_length); + return common_diff == 0 ? (int)s1->byte_len - (int)s2->byte_len : common_diff; } INLINE void upb_strcpy(struct upb_string *dest, struct upb_string *src) { - memcpy(dest->ptr, src->ptr, src->byte_len); dest->byte_len = src->byte_len; + upb_stralloc(dest, dest->byte_len); + memcpy(dest->ptr, src->ptr, src->byte_len); } -INLINE struct upb_string upb_strdup(struct upb_string s) { - struct upb_string copy; - copy.ptr = (char*)malloc(s.byte_len); - copy.byte_len = s.byte_len; - memcpy(copy.ptr, s.ptr, s.byte_len); +INLINE struct upb_string *upb_strdup(struct upb_string *s) { + struct upb_string *copy = upb_strnew(); + upb_strcpy(copy, s); return copy; } -INLINE struct upb_string upb_strdupc(char *s) { - struct upb_string copy; - copy.byte_len = strlen(s); - copy.ptr = (char*)malloc(copy.byte_len); - memcpy(copy.ptr, s, copy.byte_len); +INLINE struct upb_string *upb_strdupc(char *s) { + struct upb_string *copy = upb_strnew(); + copy->byte_len = strlen(s); + upb_stralloc(copy, copy->byte_len); + memcpy(copy->ptr, s, copy->byte_len); return copy; } -INLINE void upb_strfree(struct upb_string s) { - free(s.ptr); -} - /* Reads an entire file into a newly-allocated string. */ bool upb_strreadfile(const char *filename, struct upb_string *data); @@ -95,7 +131,7 @@ bool upb_strreadfile(const char *filename, struct upb_string *data); /* Allows using upb_strings in printf, ie: * struct upb_string str = UPB_STRLIT("Hello, World!\n"); * printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) (str).byte_len, (str).ptr +#define UPB_STRARG(str) (str)->byte_len, (str)->ptr #define UPB_STRFMT "%.*s" #ifdef __cplusplus diff --git a/src/upb_text.c b/src/upb_text.c index 6e17777..c9aad7e 100644 --- a/src/upb_text.c +++ b/src/upb_text.c @@ -36,7 +36,7 @@ void upb_text_printval(upb_field_type_t type, union upb_value val, FILE *file) case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING: case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: /* TODO: escaping. */ - fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(*val.str)); break; + fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break; } } @@ -48,7 +48,7 @@ static void print_indent(struct upb_text_printer *p, FILE *stream) } void upb_text_printfield(struct upb_text_printer *p, - struct upb_string name, + struct upb_string *name, upb_field_type_t valtype, union upb_value val, FILE *stream) { @@ -62,7 +62,7 @@ void upb_text_printfield(struct upb_text_printer *p, } void upb_text_push(struct upb_text_printer *p, - struct upb_string submsg_type, + struct upb_string *submsg_type, FILE *stream) { print_indent(p, stream); diff --git a/src/upb_text.h b/src/upb_text.h index f35f8d8..32d7278 100644 --- a/src/upb_text.h +++ b/src/upb_text.h @@ -23,10 +23,10 @@ INLINE void upb_text_printer_init(struct upb_text_printer *p, bool single_line) p->single_line = single_line; } void upb_text_printval(upb_field_type_t type, union upb_value p, FILE *file); -void upb_text_printfield(struct upb_text_printer *p, struct upb_string name, +void upb_text_printfield(struct upb_text_printer *p, struct upb_string *name, upb_field_type_t valtype, union upb_value val, FILE *stream); -void upb_text_push(struct upb_text_printer *p, struct upb_string submsg_type, +void upb_text_push(struct upb_text_printer *p, struct upb_string *submsg_type, FILE *stream); void upb_text_pop(struct upb_text_printer *p, FILE *stream); -- cgit v1.2.3