summaryrefslogtreecommitdiff
path: root/tools/upbc.c
blob: 51029d0fd8ab4d026375c969f404f9d5c1013635 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * upbc is the upb compiler.
 *
 * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
 */

#include <ctype.h>
#include <inttypes.h>
#include "descriptor.h"
#include "upb_context.h"
#include "upb_enum.h"

/* These are in-place string transformations that do not change the length of
 * the string (and thus never need to re-allocate). */
static void to_cident(struct upb_string str)
{
  for(uint32_t i = 0; i < str.byte_len; i++)
    if(str.ptr[i] == '.' || str.ptr[i] == '/')
      str.ptr[i] = '_';
}

static void to_preproc(struct upb_string str)
{
  to_cident(str);
  for(uint32_t i = 0; i < str.byte_len; i++)
    str.ptr[i] = toupper(str.ptr[i]);
}

static int memrchr(char *data, char c, size_t len)
{
  int off = len-1;
  while(off > 0 && data[off] != c) --off;
  return off;
}

void *strtable_to_array(struct upb_strtable *t, int *size)
{
  *size = t->t.count;
  void **array = malloc(*size * sizeof(void*));
  struct upb_symtab_entry *e;
  int i = 0;
  for(e = upb_strtable_begin(t); e && i < *size; e = upb_strtable_next(t, &e->e))
    array[i++] = e;
  assert(i == *size && e == NULL);
  return array;
}

/* The .h file defines structs for the types defined in the .proto file.  It
 * also defines constants for the enum values.
 *
 * Assumes that d has been validated. */
static void write_h(struct upb_symtab_entry *entries[], int num_entries,
                    char *outfile_name, FILE *stream)
{
  /* Header file prologue. */
  struct upb_string include_guard_name = upb_strdupc(outfile_name);
  to_preproc(include_guard_name);
  fputs("/* This file was generated by upbc (the upb compiler).  "
        "Do not edit. */\n\n", stream),
  fprintf(stream, "#ifndef " UPB_STRFMT "\n", UPB_STRARG(include_guard_name));
  fprintf(stream, "#define " UPB_STRFMT "\n\n", UPB_STRARG(include_guard_name));
  fputs("#include <upb_msg.h>\n\n", stream);
  fputs("#ifdef __cplusplus\n", stream);
  fputs("extern \"C\" {\n", stream);
  fputs("#endif\n\n", stream);

  /* Enums. */
  fprintf(stream, "/* Enums. */\n\n");
  for(int i = 0; i < num_entries; i++) {  /* Foreach enum */
    if(entries[i]->type != UPB_SYM_ENUM) continue;
    struct upb_symtab_entry *entry = entries[i];
    struct upb_enum *e = entry->ref._enum;
    google_protobuf_EnumDescriptorProto *ed = e->descriptor;
    /* We use entry->e.key (the fully qualified name) instead of ed->name. */
    struct upb_string enum_name = upb_strdup(entry->e.key);
    to_cident(enum_name);

    struct upb_string enum_val_prefix = upb_strdup(entry->e.key);
    enum_val_prefix.byte_len = memrchr(enum_val_prefix.ptr,
                                       UPB_SYMBOL_SEPARATOR,
                                       enum_val_prefix.byte_len);
    enum_val_prefix.byte_len++;
    to_preproc(enum_val_prefix);

    fprintf(stream, "typedef enum " UPB_STRFMT " {\n", UPB_STRARG(enum_name));
    if(ed->set_flags.has.value) {
      for(uint32_t j = 0; j < ed->value->len; j++) {  /* Foreach enum value. */
        google_protobuf_EnumValueDescriptorProto *v = ed->value->elements[j];
        struct upb_string value_name = upb_strdup(*v->name);
        to_preproc(value_name);
        /* "  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13," */
        fprintf(stream, "  " UPB_STRFMT UPB_STRFMT " = %" PRIu32,
                UPB_STRARG(enum_val_prefix), UPB_STRARG(value_name), v->number);
        if(j != ed->value->len-1) fputc(',', stream);
        fputc('\n', stream);
        upb_strfree(value_name);
      }
    }
    fprintf(stream, "} " UPB_STRFMT ";\n\n", UPB_STRARG(enum_name));
    upb_strfree(enum_name);
    upb_strfree(enum_val_prefix);
  }

  /* Forward declarations. */
  fputs("/* Forward declarations of all message types.\n", stream);
  fputs(" * So they can refer to each other in ", stream);
  fputs("possibly-recursive ways. */\n\n", stream);

  for(int i = 0; i < num_entries; i++) {  /* Foreach message */
    if(entries[i]->type != UPB_SYM_MESSAGE) continue;
    struct upb_symtab_entry *entry = entries[i];
    /* We use entry->e.key (the fully qualified name). */
    struct upb_string msg_name = upb_strdup(entry->e.key);
    to_cident(msg_name);
    fprintf(stream, "struct " UPB_STRFMT ";\n", UPB_STRARG(msg_name));
    fprintf(stream, "typedef struct " UPB_STRFMT "\n    " UPB_STRFMT ";\n\n",
            UPB_STRARG(msg_name), UPB_STRARG(msg_name));
    upb_strfree(msg_name);
  }

  /* Message Declarations. */
  fputs("/* The message definitions themselves. */\n\n", stream);
  for(int i = 0; i < num_entries; i++) {  /* Foreach message */
    if(entries[i]->type != UPB_SYM_MESSAGE) continue;
    struct upb_symtab_entry *entry = entries[i];
    struct upb_msg *m = entry->ref.msg;
    /* We use entry->e.key (the fully qualified name). */
    struct upb_string msg_name = upb_strdup(entry->e.key);
    to_cident(msg_name);
    fprintf(stream, "struct " UPB_STRFMT " {\n", UPB_STRARG(msg_name));
    fputs("  union {\n", stream);
    fprintf(stream, "    uint8_t bytes[%" PRIu32 "];\n", m->set_flags_bytes);
    fputs("    struct {\n", stream);
    for(uint32_t j = 0; j < m->num_fields; j++) {
      static char* labels[] = {"", "optional", "required", "repeated"};
      struct google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
      fprintf(stream, "      bool " UPB_STRFMT ":1;  /* = %" PRIu32 ", %s. */\n",
              UPB_STRARG(*fd->name), fd->number, labels[fd->label]);
    }
    fputs("    } has;\n", stream);
    fputs("  } set_flags;\n", stream);
    for(uint32_t j = 0; j < m->num_fields; j++) {
      struct upb_msg_field *f = &m->fields[j];
      struct google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
      if(f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ||
         f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
        /* Submessages get special treatment, since we have to use the message
         * name directly. */
        struct upb_string type_name_ref = *fd->type_name;
        if(type_name_ref.ptr[0] == UPB_SYMBOL_SEPARATOR) {
          /* Omit leading '.'. */
          type_name_ref.ptr++;
          type_name_ref.byte_len--;
        }
        struct upb_string type_name = upb_strdup(type_name_ref);
        to_cident(type_name);
        if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
          fprintf(stream, "  UPB_MSG_ARRAY(" UPB_STRFMT ")* " UPB_STRFMT ";\n",
                  UPB_STRARG(type_name), UPB_STRARG(*fd->name));
        } else {
          fprintf(stream, "  " UPB_STRFMT "* " UPB_STRFMT ";\n",
                  UPB_STRARG(type_name), UPB_STRARG(*fd->name));
        }
        upb_strfree(type_name);
      } else if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
        static char* c_types[] = {
          "", "struct upb_double_array*", "struct upb_float_array*",
          "struct upb_int64_array*", "struct upb_uint64_array*",
          "struct upb_int32_array*", "struct upb_uint64_array*",
          "struct upb_uint32_array*", "struct upb_bool_array*",
          "struct upb_string_array*", "", "",
          "struct upb_string_array*", "struct upb_uint32_array*",
          "struct upb_uint32_array*", "struct upb_int32_array*",
          "struct upb_int64_array*", "struct upb_int32_array*",
          "struct upb_int64_array*"
        };
        fprintf(stream, "  %s " UPB_STRFMT ";\n",
                c_types[fd->type], UPB_STRARG(*fd->name));
      } else {
        static char* c_types[] = {
          "", "double", "float", "int64_t", "uint64_t", "int32_t", "uint64_t",
          "uint32_t", "bool", "struct upb_string*", "", "",
          "struct upb_string*", "uint32_t", "uint32_t", "int32_t", "int64_t",
          "int32_t", "int64_t"
        };
        fprintf(stream, "  %s " UPB_STRFMT ";\n",
                c_types[fd->type], UPB_STRARG(*fd->name));
      }
    }
    fputs("};\n", stream);
    fprintf(stream, "UPB_DEFINE_MSG_ARRAY(" UPB_STRFMT ")\n\n",
            UPB_STRARG(msg_name));
    upb_strfree(msg_name);
  }

  /* Epilogue. */
  fputs("#ifdef __cplusplus\n", stream);
  fputs("}  /* extern \"C\" */\n", stream);
  fputs("#endif\n\n", stream);
  fprintf(stream, "#endif  /* " UPB_STRFMT " */\n", UPB_STRARG(include_guard_name));
  upb_strfree(include_guard_name);
}

struct strtable_entry {
  struct upb_strtable_entry e;
  size_t offset;
  int num;
};

int compare_entries(const void *_e1, const void *_e2)
{
  struct strtable_entry *const*e1 = _e1, *const*e2 = _e2;
  return upb_strcmp((*e1)->e.key, (*e2)->e.key);
}

/* The .c file defines the descriptor as data (in C structs).
 *
 * Assumes that d has been validated. */
static void write_c(struct upb_symtab_entry *entries[], int num_entries,
                    char *hfile_name, char *outfile_name, FILE *stream)
{
  fputs("/* This file was generated by upbc (the upb compiler).  "
        "Do not edit. */\n\n", stream),
  fprintf(stream, "#include \"%s\"\n\n", hfile_name);

  /* Gather all strings into a giant string.  Use a hash */
  struct upb_strtable t;

#define ADDSTR(msg, field) \
  if(msg->set_flags.has.field) { \
    struct strtable_entry e = {.e = {.key = *msg->field}}; \
    if(upb_strtable_lookup(&t, &e.e.key) == NULL) upb_strtable_insert(&t, &e.e); \
  }

  upb_strtable_init(&t, 16, sizeof(struct strtable_entry));
  for(int i = 0; i < num_entries; i++) {
    //addident(entries[i].key);
    struct strtable_entry e = {.e = {.key = entries[i]->e.key}};
    if(upb_strtable_lookup(&t, &e.e.key) == NULL) upb_strtable_insert(&t, &e.e);
    switch(entries[i]->type) {
      case UPB_SYM_MESSAGE: {
        struct upb_msg *m = entries[i]->ref.msg;
        ADDSTR(m->descriptor, name);
        for(uint32_t i = 0; i < m->num_fields; i++) {
          google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i];
          ADDSTR(fd, name);
          ADDSTR(fd, type_name);
          //ADDSTR(fd, extendee);
          //ADDSTR(fd, default_value);
          /* Neglect fd->options, doubtful that they're needed. */
        }
      }

      case UPB_SYM_ENUM: {
        google_protobuf_EnumDescriptorProto *ed = entries[i]->ref._enum->descriptor;
        ADDSTR(ed, name);
        if(ed->set_flags.has.value) {
          for(uint32_t i = 0; i < ed->value->len; i++) {
            ADDSTR(ed->value->elements[i], name);
            /* Neglect ed->value[i]->options, doubtful that they're needed. */
          }
        }
      }

      case UPB_SYM_SERVICE:
      case UPB_SYM_EXTENSION: break;  /* TODO */
    }
  }

  int size;
  struct strtable_entry **str_entries = strtable_to_array(&t, &size);
  /* Sort for nice size and reproduceability. */
  qsort(str_entries, size, sizeof(void*), compare_entries);

  /* Emit strings. */
  fputs("static const char strings[] =\n  \"", stream);
  int col = 2;
  int offset = 0;
  for(int i = 0; i < size; i++) {
    struct upb_string *s = &str_entries[i]->e.key;
    str_entries[i]->offset = offset;
    str_entries[i]->num = i;
    for(uint32_t j = 0; j < s->byte_len; j++) {
      if(++col == 80) {
        fputs("\"\n  \"", stream);
        col = 3;
      }
      fputc(s->ptr[j], stream);
    }
    offset += s->byte_len;
  }
  fputs("\"\n\n", stream);
}

const char usage[] =
  "upbc -- upb compiler.\n"
  "upb v0.1  http://blog.reverberate.org/upb/\n"
  "\n"
  "Usage: upbc [options] input-file\n"
  "\n"
  "  -o OUTFILE-BASE    Write to OUTFILE-BASE.h and OUTFILE-BASE.c instead\n"
  "                     of using the input file as a basename.\n"
;

void usage_err(char *err)
{
  fprintf(stderr, "upbc: %s\n\n", err);
  fputs(usage, stderr);
  exit(1);
}

void error(char *err)
{
  fprintf(stderr, "upbc: %s\n\n", err);
  exit(1);
}

int main(int argc, char *argv[])
{
  /* Parse arguments. */
  char *outfile_base = NULL, *input_file = NULL;
  for(int i = 1; i < argc; i++) {
    if(strcmp(argv[i], "-o") == 0) {
      if(++i == argc)
        usage_err("-o must be followed by a FILE-BASE.");
      else if(outfile_base)
        usage_err("-o was specified multiple times.");
      outfile_base = argv[i];
    } else {
      if(input_file)
        usage_err("You can only specify one input file.");
      input_file = argv[i];
    }
  }
  if(!input_file) usage_err("You must specify an input file.");
  if(!outfile_base) outfile_base = input_file;

  /* Read input file. */
  struct upb_string descriptor;
  if(!upb_strreadfile(input_file, &descriptor))
    error("Couldn't read input file.");

  /* Parse input file. */
  struct upb_context c;
  upb_context_init(&c);
  if(!upb_context_parsefds(&c, &descriptor))
    error("Failed to parse input file descriptor.");

  /* Emit output files. */
  const int maxsize = 256;
  char h_filename[maxsize], c_filename[maxsize];
  if(snprintf(h_filename, maxsize, "%s.h", outfile_base) >= maxsize ||
     snprintf(c_filename, maxsize, "%s.c", outfile_base) >= maxsize)
    error("File base too long.\n");

  FILE *h_file = fopen(h_filename, "w"), *c_file = fopen(c_filename, "w");
  if(!h_file || !c_file)
    error("Failed to open output file(s)");

  int symcount;
  struct upb_symtab_entry **entries = strtable_to_array(&c.symtab, &symcount);
  write_h(entries, symcount, h_filename, h_file);
  write_c(entries, symcount, h_filename, c_filename, c_file);
  upb_context_free(&c);
  upb_strfree(descriptor);
  fclose(h_file);
  fclose(c_file);

  return 0;
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback