/* * upb - a minimalist implementation of protocol buffers. * * upbc is the upb compiler. * * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. */ #include #include #include "descriptor.h" #include "upb_context.h" #include "upb_enum.h" /* These are in-place string transformations that do not change the length of * the string (and thus never need to re-allocate). */ static void to_cident(struct upb_string str) { for(uint32_t i = 0; i < str.byte_len; i++) if(str.ptr[i] == '.' || str.ptr[i] == '/') str.ptr[i] = '_'; } static void to_preproc(struct upb_string str) { to_cident(str); for(uint32_t i = 0; i < str.byte_len; i++) str.ptr[i] = toupper(str.ptr[i]); } static int memrchr(char *data, char c, size_t len) { int off = len-1; while(off > 0 && data[off] != c) --off; return off; } void *strtable_to_array(struct upb_strtable *t, int *size) { *size = t->t.count; void **array = malloc(*size * sizeof(void*)); struct upb_symtab_entry *e; int i = 0; for(e = upb_strtable_begin(t); e && i < *size; e = upb_strtable_next(t, &e->e)) array[i++] = e; assert(i == *size && e == NULL); return array; } /* The .h file defines structs for the types defined in the .proto file. It * also defines constants for the enum values. * * Assumes that d has been validated. */ static void write_h(struct upb_symtab_entry *entries[], int num_entries, char *outfile_name, FILE *stream) { /* Header file prologue. */ struct upb_string include_guard_name = upb_strdupc(outfile_name); to_preproc(include_guard_name); fputs("/* This file was generated by upbc (the upb compiler). " "Do not edit. */\n\n", stream), fprintf(stream, "#ifndef " UPB_STRFMT "\n", UPB_STRARG(include_guard_name)); fprintf(stream, "#define " UPB_STRFMT "\n\n", UPB_STRARG(include_guard_name)); fputs("#include \n\n", stream); fputs("#ifdef __cplusplus\n", stream); fputs("extern \"C\" {\n", stream); fputs("#endif\n\n", stream); /* Enums. */ fprintf(stream, "/* Enums. */\n\n"); for(int i = 0; i < num_entries; i++) { /* Foreach enum */ if(entries[i]->type != UPB_SYM_ENUM) continue; struct upb_symtab_entry *entry = entries[i]; struct upb_enum *e = entry->ref._enum; google_protobuf_EnumDescriptorProto *ed = e->descriptor; /* We use entry->e.key (the fully qualified name) instead of ed->name. */ struct upb_string enum_name = upb_strdup(entry->e.key); to_cident(enum_name); struct upb_string enum_val_prefix = upb_strdup(entry->e.key); enum_val_prefix.byte_len = memrchr(enum_val_prefix.ptr, UPB_SYMBOL_SEPARATOR, enum_val_prefix.byte_len); enum_val_prefix.byte_len++; to_preproc(enum_val_prefix); fprintf(stream, "typedef enum " UPB_STRFMT " {\n", UPB_STRARG(enum_name)); if(ed->set_flags.has.value) { for(uint32_t j = 0; j < ed->value->len; j++) { /* Foreach enum value. */ google_protobuf_EnumValueDescriptorProto *v = ed->value->elements[j]; struct upb_string value_name = upb_strdup(*v->name); to_preproc(value_name); /* " GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13," */ fprintf(stream, " " UPB_STRFMT UPB_STRFMT " = %" PRIu32, UPB_STRARG(enum_val_prefix), UPB_STRARG(value_name), v->number); if(j != ed->value->len-1) fputc(',', stream); fputc('\n', stream); upb_strfree(value_name); } } fprintf(stream, "} " UPB_STRFMT ";\n\n", UPB_STRARG(enum_name)); upb_strfree(enum_name); upb_strfree(enum_val_prefix); } /* Forward declarations. */ fputs("/* Forward declarations of all message types.\n", stream); fputs(" * So they can refer to each other in ", stream); fputs("possibly-recursive ways. */\n\n", stream); for(int i = 0; i < num_entries; i++) { /* Foreach message */ if(entries[i]->type != UPB_SYM_MESSAGE) continue; struct upb_symtab_entry *entry = entries[i]; /* We use entry->e.key (the fully qualified name). */ struct upb_string msg_name = upb_strdup(entry->e.key); to_cident(msg_name); fprintf(stream, "struct " UPB_STRFMT ";\n", UPB_STRARG(msg_name)); fprintf(stream, "typedef struct " UPB_STRFMT "\n " UPB_STRFMT ";\n\n", UPB_STRARG(msg_name), UPB_STRARG(msg_name)); upb_strfree(msg_name); } /* Message Declarations. */ fputs("/* The message definitions themselves. */\n\n", stream); for(int i = 0; i < num_entries; i++) { /* Foreach message */ if(entries[i]->type != UPB_SYM_MESSAGE) continue; struct upb_symtab_entry *entry = entries[i]; struct upb_msg *m = entry->ref.msg; /* We use entry->e.key (the fully qualified name). */ struct upb_string msg_name = upb_strdup(entry->e.key); to_cident(msg_name); fprintf(stream, "struct " UPB_STRFMT " {\n", UPB_STRARG(msg_name)); fputs(" union {\n", stream); fprintf(stream, " uint8_t bytes[%" PRIu32 "];\n", m->set_flags_bytes); fputs(" struct {\n", stream); for(uint32_t j = 0; j < m->num_fields; j++) { static char* labels[] = {"", "optional", "required", "repeated"}; struct google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j]; fprintf(stream, " bool " UPB_STRFMT ":1; /* = %" PRIu32 ", %s. */\n", UPB_STRARG(*fd->name), fd->number, labels[fd->label]); } fputs(" } has;\n", stream); fputs(" } set_flags;\n", stream); for(uint32_t j = 0; j < m->num_fields; j++) { struct upb_msg_field *f = &m->fields[j]; struct google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j]; if(f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP || f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { /* Submessages get special treatment, since we have to use the message * name directly. */ struct upb_string type_name_ref = *fd->type_name; if(type_name_ref.ptr[0] == UPB_SYMBOL_SEPARATOR) { /* Omit leading '.'. */ type_name_ref.ptr++; type_name_ref.byte_len--; } struct upb_string type_name = upb_strdup(type_name_ref); to_cident(type_name); if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) { fprintf(stream, " UPB_MSG_ARRAY(" UPB_STRFMT ")* " UPB_STRFMT ";\n", UPB_STRARG(type_name), UPB_STRARG(*fd->name)); } else { fprintf(stream, " " UPB_STRFMT "* " UPB_STRFMT ";\n", UPB_STRARG(type_name), UPB_STRARG(*fd->name)); } upb_strfree(type_name); } else if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) { static char* c_types[] = { "", "struct upb_double_array*", "struct upb_float_array*", "struct upb_int64_array*", "struct upb_uint64_array*", "struct upb_int32_array*", "struct upb_uint64_array*", "struct upb_uint32_array*", "struct upb_bool_array*", "struct upb_string_array*", "", "", "struct upb_string_array*", "struct upb_uint32_array*", "struct upb_uint32_array*", "struct upb_int32_array*", "struct upb_int64_array*", "struct upb_int32_array*", "struct upb_int64_array*" }; fprintf(stream, " %s " UPB_STRFMT ";\n", c_types[fd->type], UPB_STRARG(*fd->name)); } else { static char* c_types[] = { "", "double", "float", "int64_t", "uint64_t", "int32_t", "uint64_t", "uint32_t", "bool", "struct upb_string*", "", "", "struct upb_string*", "uint32_t", "uint32_t", "int32_t", "int64_t", "int32_t", "int64_t" }; fprintf(stream, " %s " UPB_STRFMT ";\n", c_types[fd->type], UPB_STRARG(*fd->name)); } } fputs("};\n", stream); fprintf(stream, "UPB_DEFINE_MSG_ARRAY(" UPB_STRFMT ")\n\n", UPB_STRARG(msg_name)); upb_strfree(msg_name); } /* Epilogue. */ fputs("#ifdef __cplusplus\n", stream); fputs("} /* extern \"C\" */\n", stream); fputs("#endif\n\n", stream); fprintf(stream, "#endif /* " UPB_STRFMT " */\n", UPB_STRARG(include_guard_name)); upb_strfree(include_guard_name); } struct strtable_entry { struct upb_strtable_entry e; int offset; int num; }; int compare_entries(const void *_e1, const void *_e2) { struct strtable_entry *const*e1 = _e1, *const*e2 = _e2; return upb_strcmp((*e1)->e.key, (*e2)->e.key); } /* The .c file defines the descriptor as data (in C structs). * * Assumes that d has been validated. */ static void write_c(struct upb_symtab_entry *entries[], int num_entries, char *hfile_name, char *outfile_name, FILE *stream) { (void)outfile_name; fputs("/* This file was generated by upbc (the upb compiler). " "Do not edit. */\n\n", stream), fprintf(stream, "#include \"%s\"\n\n", hfile_name); /* Gather all strings into a giant string. Use a hash */ struct upb_strtable t; #define ADDSTR(msg, field) \ if(msg->set_flags.has.field) { \ struct strtable_entry e = {.e = {.key = *msg->field}}; \ if(upb_strtable_lookup(&t, &e.e.key) == NULL) upb_strtable_insert(&t, &e.e); \ } upb_strtable_init(&t, 16, sizeof(struct strtable_entry)); for(int i = 0; i < num_entries; i++) { struct strtable_entry e = {.e = {.key = entries[i]->e.key}}; if(upb_strtable_lookup(&t, &e.e.key) == NULL) upb_strtable_insert(&t, &e.e); switch(entries[i]->type) { case UPB_SYM_MESSAGE: { struct upb_msg *m = entries[i]->ref.msg; ADDSTR(m->descriptor, name); for(uint32_t i = 0; i < m->num_fields; i++) { google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; ADDSTR(fd, name); ADDSTR(fd, type_name); //ADDSTR(fd, extendee); //ADDSTR(fd, default_value); /* Neglect fd->options, doubtful that they're needed. */ } } case UPB_SYM_ENUM: { google_protobuf_EnumDescriptorProto *ed = entries[i]->ref._enum->descriptor; ADDSTR(ed, name); if(ed->set_flags.has.value) { for(uint32_t i = 0; i < ed->value->len; i++) { ADDSTR(ed->value->elements[i], name); /* Neglect ed->value[i]->options, doubtful that they're needed. */ } } } case UPB_SYM_SERVICE: case UPB_SYM_EXTENSION: break; /* TODO */ } } int size; struct strtable_entry **str_entries = strtable_to_array(&t, &size); /* Sort for nice size and reproduceability. */ qsort(str_entries, size, sizeof(void*), compare_entries); /* Emit strings. */ fputs("static char strdata[] =\n \"", stream); int col = 2; int offset = 0; for(int i = 0; i < size; i++) { struct upb_string *s = &str_entries[i]->e.key; str_entries[i]->offset = offset; str_entries[i]->num = i; for(uint32_t j = 0; j < s->byte_len; j++) { if(++col == 80) { fputs("\"\n \"", stream); col = 3; } fputc(s->ptr[j], stream); } offset += s->byte_len; } fputs("\";\n\n", stream); fputs("static struct upb_string strings[] = {\n", stream); for(int i = 0; i < size; i++) { struct strtable_entry *e = str_entries[i]; fprintf(stream, " {.ptr = &strdata[%d], .byte_len=%d},\n", e->offset, e->e.key.byte_len); } fputs("};\n\n", stream); /* Emit fields. */ fputs("static google_protobuf_FieldDescriptorProto fields[] = {\n", stream); int total = 0; for(int i = 0; i < num_entries; i++) { if(entries[i]->type != UPB_SYM_MESSAGE) continue; struct upb_msg *m = entries[i]->ref.msg; for(uint32_t j = 0; j < m->num_fields; j++) { struct google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j]; struct strtable_entry *e = upb_strtable_lookup(&t, fd->name); fprintf(stream, " {.set_flags = {.bytes={0x%02hhx}}, .name=&strings[%3d], .number=%d, .label=%d, .type=%2d, .type_name=NULL},\n", fd->set_flags.bytes[0], e->num, fd->number, fd->label, fd->type); total ++; } } fputs("};\n\n", stream); fputs("static google_protobuf_FieldDescriptorProto *field_pointers[] = {\n", stream); for(int i = 0; i < total; i++) { fprintf(stream, " &fields[%d],\n", i); } fputs("};\n\n", stream); offset = 0; fputs("static UPB_MSG_ARRAY(google_protobuf_FieldDescriptorProto) field_arrays[] = {\n", stream); for(int i = 0; i < num_entries; i++) { if(entries[i]->type != UPB_SYM_MESSAGE) continue; struct upb_msg *m = entries[i]->ref.msg; fprintf(stream, " {.elements=&field_pointers[%d], .len=%d},\n", offset, m->num_fields); offset += m->num_fields; } fputs("};\n\n", stream); /* Emit messages. */ fputs("static google_protobuf_DescriptorProto messages[] = {\n", stream); offset = 0; for(int i = 0; i < num_entries; i++) { if(entries[i]->type != UPB_SYM_MESSAGE) continue; struct upb_msg *m = entries[i]->ref.msg; struct google_protobuf_DescriptorProto *d = m->descriptor; struct strtable_entry *e = upb_strtable_lookup(&t, d->name); fprintf(stream, " {.set_flags = {.bytes={0x%02hhx}}, .name=&strings[%3d], .field=&field_arrays[%d]},\n", d->set_flags.bytes[0], e->num, offset); } fputs("};\n\n", stream); fputs("static google_protobuf_DescriptorProto *message_pointers[] = {\n", stream); offset = 0; for(int i = 0; i < num_entries; i++) { if(entries[i]->type != UPB_SYM_MESSAGE) continue; fprintf(stream, " &messages[%d],\n", offset); } fputs("};\n\n", stream); } const char usage[] = "upbc -- upb compiler.\n" "upb v0.1 http://blog.reverberate.org/upb/\n" "\n" "Usage: upbc [options] input-file\n" "\n" " -o OUTFILE-BASE Write to OUTFILE-BASE.h and OUTFILE-BASE.c instead\n" " of using the input file as a basename.\n" ; void usage_err(char *err) { fprintf(stderr, "upbc: %s\n\n", err); fputs(usage, stderr); exit(1); } void error(char *err) { fprintf(stderr, "upbc: %s\n\n", err); exit(1); } int main(int argc, char *argv[]) { /* Parse arguments. */ char *outfile_base = NULL, *input_file = NULL; for(int i = 1; i < argc; i++) { if(strcmp(argv[i], "-o") == 0) { if(++i == argc) usage_err("-o must be followed by a FILE-BASE."); else if(outfile_base) usage_err("-o was specified multiple times."); outfile_base = argv[i]; } else { if(input_file) usage_err("You can only specify one input file."); input_file = argv[i]; } } if(!input_file) usage_err("You must specify an input file."); if(!outfile_base) outfile_base = input_file; /* Read input file. */ struct upb_string descriptor; if(!upb_strreadfile(input_file, &descriptor)) error("Couldn't read input file."); /* Parse input file. */ struct upb_context c; upb_context_init(&c); if(!upb_context_parsefds(&c, &descriptor)) error("Failed to parse input file descriptor."); /* Emit output files. */ const int maxsize = 256; char h_filename[maxsize], c_filename[maxsize]; if(snprintf(h_filename, maxsize, "%s.h", outfile_base) >= maxsize || snprintf(c_filename, maxsize, "%s.c", outfile_base) >= maxsize) error("File base too long.\n"); FILE *h_file = fopen(h_filename, "w"), *c_file = fopen(c_filename, "w"); if(!h_file || !c_file) error("Failed to open output file(s)"); int symcount; struct upb_symtab_entry **entries = strtable_to_array(&c.symtab, &symcount); write_h(entries, symcount, h_filename, h_file); write_c(entries, symcount, h_filename, c_filename, c_file); upb_context_free(&c); upb_strfree(descriptor); fclose(h_file); fclose(c_file); return 0; }