From c2419764856e5666bfa9e3c1b87de29ec93babe1 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 21 Dec 2009 10:48:01 -0800 Subject: In the midst of a major refactoring. --- src/upb_msg.c | 422 ---------------------------------------------------------- 1 file changed, 422 deletions(-) delete mode 100644 src/upb_msg.c (limited to 'src/upb_msg.c') diff --git a/src/upb_msg.c b/src/upb_msg.c deleted file mode 100644 index dd6b72e..0000000 --- a/src/upb_msg.c +++ /dev/null @@ -1,422 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include -#include "upb_msg.h" -#include "descriptor.h" -#include "upb_mm.h" -#include "upb_parse.h" -#include "upb_serialize.h" -#include "upb_text.h" - -/* Parsing. ******************************************************************/ - -struct upb_msgparser_frame { - struct upb_msg *msg; -}; - -struct upb_msgparser { - struct upb_cbparser *s; - bool merge; - bool byref; - struct upb_msgparser_frame stack[UPB_MAX_NESTING], *top; -}; - -/* Helper function that returns a pointer to where the next value for field "f" - * should be stored, taking into account whether f is an array that may need to - * be allocated or resized. */ -static union upb_value_ptr get_value_ptr(struct upb_msg *msg, - struct upb_fielddef *f) -{ - union upb_value_ptr p = upb_msg_getptr(msg, f); - if(upb_isarray(f)) { - if(!upb_msg_isset(msg, f)) { - if(!*p.arr || !upb_mmhead_only(&((*p.arr)->mmhead))) { - if(*p.arr) - upb_array_unref(*p.arr); - *p.arr = upb_array_new(f); - } - upb_array_truncate(*p.arr); - upb_msg_set(msg, f); - } - p = upb_array_append(*p.arr); - } - return p; -} - -/* Callbacks for the stream parser. */ - -static bool value_cb(void *udata, struct upb_msgdef *msgdef, - struct upb_fielddef *f, union upb_value val) -{ - (void)msgdef; - struct upb_msgparser *mp = udata; - struct upb_msg *msg = mp->top->msg; - union upb_value_ptr p = get_value_ptr(msg, f); - upb_msg_set(msg, f); - upb_value_write(p, val, f->type); - return true; -} - -static bool str_cb(void *udata, struct upb_msgdef *msgdef, - struct upb_fielddef *f, uint8_t *str, size_t avail_len, - size_t total_len) -{ - (void)msgdef; - struct upb_msgparser *mp = udata; - struct upb_msg *msg = mp->top->msg; - union upb_value_ptr p = get_value_ptr(msg, f); - upb_msg_set(msg, f); - if(avail_len != total_len) abort(); /* TODO: support streaming. */ - //bool byref = avail_len == total_len && mp->byref; - if(!*p.str || !upb_mmhead_only(&((*p.str)->mmhead))) { - if(*p.str) - upb_string_unref(*p.str); - *p.str = upb_string_new(); - } - //if(byref) { - // upb_strdrop(*p.str); - // (*p.str)->ptr = (char*)str; - // (*p.str)->byte_len = avail_len; - //} else { - upb_string_resize(*p.str, total_len); - memcpy((*p.str)->ptr, str, avail_len); - (*p.str)->byte_len = avail_len; - //} - return true; -} - -static void start_cb(void *udata, struct upb_fielddef *f) -{ - struct upb_msgparser *mp = udata; - struct upb_msg *oldmsg = mp->top->msg; - union upb_value_ptr p = get_value_ptr(oldmsg, f); - - if(upb_isarray(f) || !upb_msg_isset(oldmsg, f)) { - if(!*p.msg || !upb_mmhead_only(&((*p.msg)->mmhead))) { - if(*p.msg) - upb_msg_unref(*p.msg); - *p.msg = upb_msg_new(upb_downcast_msgdef(f->def)); - } - upb_msg_clear(*p.msg); - upb_msg_set(oldmsg, f); - } - - mp->top++; - mp->top->msg = *p.msg; -} - -static void end_cb(void *udata) -{ - struct upb_msgparser *mp = udata; - mp->top--; -} - -/* Externally-visible functions for the msg parser. */ - -struct upb_msgparser *upb_msgparser_new(struct upb_msgdef *def) -{ - struct upb_msgparser *mp = malloc(sizeof(struct upb_msgparser)); - mp->s = upb_cbparser_new(def, value_cb, str_cb, start_cb, end_cb); - return mp; -} - -void upb_msgparser_reset(struct upb_msgparser *s, struct upb_msg *msg, bool byref) -{ - upb_cbparser_reset(s->s, s); - s->byref = byref; - s->top = s->stack; - s->top->msg = msg; -} - -void upb_msgparser_free(struct upb_msgparser *s) -{ - upb_cbparser_free(s->s); - free(s); -} - -void upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len, - struct upb_status *status) -{ - struct upb_msgparser *mp = upb_msgparser_new(msg->def); - upb_msgparser_reset(mp, msg, false); - upb_msg_clear(msg); - upb_msgparser_parse(mp, buf, len, status); - upb_msgparser_free(mp); -} - -size_t upb_msgparser_parse(struct upb_msgparser *s, void *data, size_t len, - struct upb_status *status) -{ - return upb_cbparser_parse(s->s, data, len, status); -} - -/* Serialization. ************************************************************/ - -/* We store the message sizes linearly in post-order (size of parent after sizes - * of children) for a right-to-left traversal of the message tree. Iterating - * over this in reverse gives us a pre-order (size of parent before sizes of - * children) left-to-right traversal, which is what we want for parsing. */ -struct upb_msgsizes { - int len; - int size; - size_t *sizes; -}; - -/* Declared below -- this and get_valuesize are mutually recursive. */ -static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m); - -/* Returns a size of a value as it will be serialized. Does *not* include - * the size of the tag -- that is already accounted for. */ -static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p, - struct upb_fielddef *f) -{ - switch(f->type) { - default: assert(false); return 0; /* Internal corruption. */ - case UPB_TYPE(MESSAGE): { - size_t submsg_size = get_msgsize(sizes, *p.msg); - return upb_get_INT32_size(submsg_size) + submsg_size; - } - case UPB_TYPE(GROUP): { - size_t endgrp_tag_size = upb_get_tag_size(f->number); - return endgrp_tag_size + get_msgsize(sizes, *p.msg); - } -#define CASE(type, member) \ - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type: \ - return upb_get_ ## type ## _size(*p.member); - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) -#undef CASE - } -} - -/* This is mostly just a pure recursive function to calculate the size of a - * message. However it also stores the results of each level of the recursion - * in sizes, because we need all of this intermediate information later. */ -static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m) -{ - size_t size = 0; - /* We iterate over fields and arrays in reverse order. */ - for(int32_t i = m->def->num_fields - 1; i >= 0; i--) { - struct upb_fielddef *f = &m->def->fields[i]; - if(!upb_msg_isset(m, f)) continue; - union upb_value_ptr p = upb_msg_getptr(m, f); - if(upb_isarray(f)) { - for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) { - union upb_value_ptr elem = upb_array_getelementptr(*p.arr, j); - /* TODO: for packed arrays tag size goes outside the loop. */ - size += upb_get_tag_size(f->number); - size += get_valuesize(sizes, elem, f); - } - } else { - size += upb_get_tag_size(f->number); - size += get_valuesize(sizes, p, f); - } - } - /* Resize the 'sizes' array if necessary. */ - assert(sizes->len <= sizes->size); - if(sizes->len == sizes->size) { - sizes->size *= 2; - sizes->sizes = realloc(sizes->sizes, sizes->size * sizeof(size_t)); - } - /* Add our size (already added our children, so post-order). */ - sizes->sizes[sizes->len++] = size; - return size; -} - -void upb_msgsizes_read(struct upb_msgsizes *sizes, struct upb_msg *m) -{ - get_msgsize(sizes, m); -} - -/* Initialize/free a upb_msg_sizes for the given message. */ -void upb_msgsizes_init(struct upb_msgsizes *sizes) -{ - sizes->len = 0; - sizes->size = 0; - sizes->sizes = NULL; -} - -void upb_msgsizes_free(struct upb_msgsizes *sizes) -{ - free(sizes->sizes); -} - -size_t upb_msgsizes_totalsize(struct upb_msgsizes *sizes) -{ - return sizes->sizes[sizes->len-1]; -} - -struct upb_msg_serialize_state { - struct { - int field_iter; - int elem_iter; - struct upb_msgdef *m; - void *msg; - } stack[UPB_MAX_NESTING], *top, *limit; -}; - -void upb_msg_serialize_alloc(struct upb_msg_serialize_state *s) -{ - (void)s; -} - -void upb_msg_serialize_free(struct upb_msg_serialize_state *s) -{ - (void)s; -} - -void upb_msg_serialize_init(struct upb_msg_serialize_state *s, struct upb_msg *m, - struct upb_msgsizes *sizes) -{ - (void)s; - (void)m; - (void)sizes; -} - -#if 0 -static uint8_t *serialize_tag(uint8_t *buf, uint8_t *end, - struct upb_fielddef *f, - struct upb_status *status) -{ - /* TODO: need to have the field number also. */ - return upb_put_UINT32(buf, end, f->type, status); -} - -/* Serializes the next set of bytes into buf (which has size len). Returns - * UPB_STATUS_OK if serialization is complete, or UPB_STATUS_NEED_MORE_DATA - * if there is more data from the message left to be serialized. - * - * The number of bytes written to buf is returned in *read. This will be - * equal to len unless we finished serializing. */ -size_t upb_msg_serialize(struct upb_msg_serialize_state *s, - void *_buf, size_t len, struct upb_status *status) -{ - uint8_t *buf = _buf; - uint8_t *end = buf + len; - uint8_t *const start = buf; - int i = s->top->field_iter; - //int j = s->top->elem_iter; - void *msg = s->top->msg; - struct upb_msgdef *m = s->top->m; - - while(buf < end) { - struct upb_fielddef *f = &m->fields[i]; - //union upb_value_ptr p = upb_msg_getptr(msg, f); - buf = serialize_tag(buf, end, f, status); - if(f->type == UPB_TYPE(MESSAGE)) { - } else if(f->type == UPB_TYPE(GROUP)) { - } else if(upb_isstring(f)) { - } else { - //upb_serialize_value(buf, end, f->type, p, status); - } - } - return buf - start; -} -#endif - - -/* Comparison. ***************************************************************/ - -bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2, - upb_field_type_t type) -{ -#define CMP(type) return *p1.type == *p2.type; - switch(type) { - case UPB_TYPE(DOUBLE): - CMP(_double) - case UPB_TYPE(FLOAT): - CMP(_float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CMP(int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CMP(uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CMP(int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CMP(uint32); - case UPB_TYPE(BOOL): - CMP(_bool); - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - return upb_streql(*p1.str, *p2.str); - default: return false; - } -} - -bool upb_array_eql(struct upb_array *arr1, struct upb_array *arr2, - struct upb_fielddef *f, bool recursive) -{ - if(arr1->len != arr2->len) return false; - if(upb_issubmsg(f)) { - if(!recursive) return true; - for(uint32_t i = 0; i < arr1->len; i++) - if(!upb_msg_eql(arr1->elements.msg[i], arr2->elements.msg[i], recursive)) - return false; - } else if(upb_isstring(f)) { - for(uint32_t i = 0; i < arr1->len; i++) - if(!upb_streql(arr1->elements.str[i], arr2->elements.str[i])) - return false; - } else { - /* For primitive types we can compare the memory directly. */ - return memcmp(arr1->elements._void, arr2->elements._void, - arr1->len * upb_type_info[f->type].size) == 0; - } - return true; -} - -bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive) -{ - /* Must have the same fields set. TODO: is this wrong? Should we also - * consider absent defaults equal to explicitly set defaults? */ - if(msg1->def != msg2->def) return false; - struct upb_msgdef *m = msg1->def; - if(memcmp(msg1->data, msg2->data, msg1->def->set_flags_bytes) != 0) - return false; - - /* Possible optimization: create a mask of the bytes in the messages that - * contain only primitive values (not strings, arrays, submessages, or - * padding) and memcmp the masked messages. */ - - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - struct upb_fielddef *f = &m->fields[i]; - bool msg1set = upb_msg_isset(msg1, f); - bool msg2set = upb_msg_isset(msg2, f); - if(msg1set != msg2set) return false; - if(!msg1set) continue; - union upb_value_ptr p1 = upb_msg_getptr(msg1, f); - union upb_value_ptr p2 = upb_msg_getptr(msg2, f); - if(upb_isarray(f)) { - if(!upb_array_eql(*p1.arr, *p2.arr, f, recursive)) return false; - } else if(upb_issubmsg(f)) { - if(recursive && !upb_msg_eql(*p1.msg, *p2.msg, recursive)) - return false; - } else if(!upb_value_eql(p1, p2, f->type)) { - return false; - } - } - return true; -} -- cgit v1.2.3