From 73bac1f2b007d5b534f28d15eaa5327718fb3165 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 8 Jul 2009 19:38:39 -0700 Subject: More work on the compiler. --- tools/upbc.c | 212 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 186 insertions(+), 26 deletions(-) diff --git a/tools/upbc.c b/tools/upbc.c index 4177576..51029d0 100644 --- a/tools/upbc.c +++ b/tools/upbc.c @@ -35,16 +35,30 @@ static int memrchr(char *data, char c, size_t len) return off; } +void *strtable_to_array(struct upb_strtable *t, int *size) +{ + *size = t->t.count; + void **array = malloc(*size * sizeof(void*)); + struct upb_symtab_entry *e; + int i = 0; + for(e = upb_strtable_begin(t); e && i < *size; e = upb_strtable_next(t, &e->e)) + array[i++] = e; + assert(i == *size && e == NULL); + return array; +} + /* The .h file defines structs for the types defined in the .proto file. It * also defines constants for the enum values. * * Assumes that d has been validated. */ -static void write_header(struct upb_symtab_entry entries[], int num_entries, - struct upb_string outfile_name, FILE *stream) +static void write_h(struct upb_symtab_entry *entries[], int num_entries, + char *outfile_name, FILE *stream) { /* Header file prologue. */ - struct upb_string include_guard_name = upb_strdup(outfile_name); + struct upb_string include_guard_name = upb_strdupc(outfile_name); to_preproc(include_guard_name); + fputs("/* This file was generated by upbc (the upb compiler). " + "Do not edit. */\n\n", stream), fprintf(stream, "#ifndef " UPB_STRFMT "\n", UPB_STRARG(include_guard_name)); fprintf(stream, "#define " UPB_STRFMT "\n\n", UPB_STRARG(include_guard_name)); fputs("#include \n\n", stream); @@ -55,8 +69,8 @@ static void write_header(struct upb_symtab_entry entries[], int num_entries, /* Enums. */ fprintf(stream, "/* Enums. */\n\n"); for(int i = 0; i < num_entries; i++) { /* Foreach enum */ - if(entries[i].type != UPB_SYM_ENUM) continue; - struct upb_symtab_entry *entry = &entries[i]; + if(entries[i]->type != UPB_SYM_ENUM) continue; + struct upb_symtab_entry *entry = entries[i]; struct upb_enum *e = entry->ref._enum; google_protobuf_EnumDescriptorProto *ed = e->descriptor; /* We use entry->e.key (the fully qualified name) instead of ed->name. */ @@ -95,13 +109,13 @@ static void write_header(struct upb_symtab_entry entries[], int num_entries, fputs("possibly-recursive ways. */\n\n", stream); for(int i = 0; i < num_entries; i++) { /* Foreach message */ - if(entries[i].type != UPB_SYM_MESSAGE) continue; - struct upb_symtab_entry *entry = &entries[i]; + if(entries[i]->type != UPB_SYM_MESSAGE) continue; + struct upb_symtab_entry *entry = entries[i]; /* We use entry->e.key (the fully qualified name). */ struct upb_string msg_name = upb_strdup(entry->e.key); to_cident(msg_name); fprintf(stream, "struct " UPB_STRFMT ";\n", UPB_STRARG(msg_name)); - fprintf(stream, "typedef struct " UPB_STRFMT "\n " UPB_STRFMT ";\n\n", + fprintf(stream, "typedef struct " UPB_STRFMT "\n " UPB_STRFMT ";\n\n", UPB_STRARG(msg_name), UPB_STRARG(msg_name)); upb_strfree(msg_name); } @@ -109,8 +123,8 @@ static void write_header(struct upb_symtab_entry entries[], int num_entries, /* Message Declarations. */ fputs("/* The message definitions themselves. */\n\n", stream); for(int i = 0; i < num_entries; i++) { /* Foreach message */ - if(entries[i].type != UPB_SYM_MESSAGE) continue; - struct upb_symtab_entry *entry = &entries[i]; + if(entries[i]->type != UPB_SYM_MESSAGE) continue; + struct upb_symtab_entry *entry = entries[i]; struct upb_msg *m = entry->ref.msg; /* We use entry->e.key (the fully qualified name). */ struct upb_string msg_name = upb_strdup(entry->e.key); @@ -189,24 +203,170 @@ static void write_header(struct upb_symtab_entry entries[], int num_entries, upb_strfree(include_guard_name); } -int main() +struct strtable_entry { + struct upb_strtable_entry e; + size_t offset; + int num; +}; + +int compare_entries(const void *_e1, const void *_e2) +{ + struct strtable_entry *const*e1 = _e1, *const*e2 = _e2; + return upb_strcmp((*e1)->e.key, (*e2)->e.key); +} + +/* The .c file defines the descriptor as data (in C structs). + * + * Assumes that d has been validated. */ +static void write_c(struct upb_symtab_entry *entries[], int num_entries, + char *hfile_name, char *outfile_name, FILE *stream) { + fputs("/* This file was generated by upbc (the upb compiler). " + "Do not edit. */\n\n", stream), + fprintf(stream, "#include \"%s\"\n\n", hfile_name); + + /* Gather all strings into a giant string. Use a hash */ + struct upb_strtable t; + +#define ADDSTR(msg, field) \ + if(msg->set_flags.has.field) { \ + struct strtable_entry e = {.e = {.key = *msg->field}}; \ + if(upb_strtable_lookup(&t, &e.e.key) == NULL) upb_strtable_insert(&t, &e.e); \ + } + + upb_strtable_init(&t, 16, sizeof(struct strtable_entry)); + for(int i = 0; i < num_entries; i++) { + //addident(entries[i].key); + struct strtable_entry e = {.e = {.key = entries[i]->e.key}}; + if(upb_strtable_lookup(&t, &e.e.key) == NULL) upb_strtable_insert(&t, &e.e); + switch(entries[i]->type) { + case UPB_SYM_MESSAGE: { + struct upb_msg *m = entries[i]->ref.msg; + ADDSTR(m->descriptor, name); + for(uint32_t i = 0; i < m->num_fields; i++) { + google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; + ADDSTR(fd, name); + ADDSTR(fd, type_name); + //ADDSTR(fd, extendee); + //ADDSTR(fd, default_value); + /* Neglect fd->options, doubtful that they're needed. */ + } + } + + case UPB_SYM_ENUM: { + google_protobuf_EnumDescriptorProto *ed = entries[i]->ref._enum->descriptor; + ADDSTR(ed, name); + if(ed->set_flags.has.value) { + for(uint32_t i = 0; i < ed->value->len; i++) { + ADDSTR(ed->value->elements[i], name); + /* Neglect ed->value[i]->options, doubtful that they're needed. */ + } + } + } + + case UPB_SYM_SERVICE: + case UPB_SYM_EXTENSION: break; /* TODO */ + } + } + + int size; + struct strtable_entry **str_entries = strtable_to_array(&t, &size); + /* Sort for nice size and reproduceability. */ + qsort(str_entries, size, sizeof(void*), compare_entries); + + /* Emit strings. */ + fputs("static const char strings[] =\n \"", stream); + int col = 2; + int offset = 0; + for(int i = 0; i < size; i++) { + struct upb_string *s = &str_entries[i]->e.key; + str_entries[i]->offset = offset; + str_entries[i]->num = i; + for(uint32_t j = 0; j < s->byte_len; j++) { + if(++col == 80) { + fputs("\"\n \"", stream); + col = 3; + } + fputc(s->ptr[j], stream); + } + offset += s->byte_len; + } + fputs("\"\n\n", stream); +} + +const char usage[] = + "upbc -- upb compiler.\n" + "upb v0.1 http://blog.reverberate.org/upb/\n" + "\n" + "Usage: upbc [options] input-file\n" + "\n" + " -o OUTFILE-BASE Write to OUTFILE-BASE.h and OUTFILE-BASE.c instead\n" + " of using the input file as a basename.\n" +; + +void usage_err(char *err) +{ + fprintf(stderr, "upbc: %s\n\n", err); + fputs(usage, stderr); + exit(1); +} + +void error(char *err) +{ + fprintf(stderr, "upbc: %s\n\n", err); + exit(1); +} + +int main(int argc, char *argv[]) +{ + /* Parse arguments. */ + char *outfile_base = NULL, *input_file = NULL; + for(int i = 1; i < argc; i++) { + if(strcmp(argv[i], "-o") == 0) { + if(++i == argc) + usage_err("-o must be followed by a FILE-BASE."); + else if(outfile_base) + usage_err("-o was specified multiple times."); + outfile_base = argv[i]; + } else { + if(input_file) + usage_err("You can only specify one input file."); + input_file = argv[i]; + } + } + if(!input_file) usage_err("You must specify an input file."); + if(!outfile_base) outfile_base = input_file; + + /* Read input file. */ + struct upb_string descriptor; + if(!upb_strreadfile(input_file, &descriptor)) + error("Couldn't read input file."); + + /* Parse input file. */ struct upb_context c; upb_context_init(&c); - struct upb_string fds; - assert(upb_strreadfile("/tmp/descriptor.proto.bin", &fds)); - assert(upb_context_parsefds(&c, &fds)); - struct upb_strtable *t = &c.symtab; - int symcount = t->t.count; - struct upb_symtab_entry entries[symcount]; - struct upb_symtab_entry *e = upb_strtable_begin(t); - int i = 0; - for(; e && i < symcount; e = upb_strtable_next(t, &e->e), i++) - entries[i] = *e; - assert(e == NULL && i == symcount); - struct upb_string name = UPB_STRLIT("descriptor.proto"); - write_header(entries, symcount, name, stdout); + if(!upb_context_parsefds(&c, &descriptor)) + error("Failed to parse input file descriptor."); + + /* Emit output files. */ + const int maxsize = 256; + char h_filename[maxsize], c_filename[maxsize]; + if(snprintf(h_filename, maxsize, "%s.h", outfile_base) >= maxsize || + snprintf(c_filename, maxsize, "%s.c", outfile_base) >= maxsize) + error("File base too long.\n"); + + FILE *h_file = fopen(h_filename, "w"), *c_file = fopen(c_filename, "w"); + if(!h_file || !c_file) + error("Failed to open output file(s)"); + + int symcount; + struct upb_symtab_entry **entries = strtable_to_array(&c.symtab, &symcount); + write_h(entries, symcount, h_filename, h_file); + write_c(entries, symcount, h_filename, c_filename, c_file); upb_context_free(&c); - upb_strfree(fds); -} + upb_strfree(descriptor); + fclose(h_file); + fclose(c_file); + return 0; +} -- cgit v1.2.3