%{ #include #include #include #include "libdietc.h" static void declare_pointer(char *); static void declare_array(char *); static void declare_function(char *); static void predeclare_struct(char *); static void predeclare_union(char *); static void define_aggregate(char *); static void declare_basic(char *); static void declare_object(char *); static void start_function(char *); static void add_instruction(char *); static void end_function(); %} %% ^[#].*$ {} "typedef Type_"[0-9]*" * Type_"[0-9]*" ;" { declare_pointer(yytext); } "typedef Type_"[0-9]*" Type_"[0-9]*" [ "[^ ]*" ] ;" { declare_array(yytext); } "typedef Type_"[0-9]*" ( "[^)\n]*") ;" { declare_function(yytext); } "typedef struct Struct_"[0-9]*" Type_"[0-9]*" ;" { predeclare_struct(yytext); } "struct Struct_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); } "typedef union Union_"[0-9]*" Type_"[0-9]*" ;" { predeclare_union(yytext); } "union Union_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); } "typedef ".*" ;" { declare_basic(yytext); } "extern Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); } "static Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); } ^[^=\n\t]*"= ".*$ { /* definition */ } ^[^{\n]*"{"$ { start_function(yytext); } \tType_[0-9]*" "[a-zA-Z0-9_]*" ;" { declare_object(yytext); add_instruction(yytext); } \t[^\n]*$ { add_instruction(yytext); } "}"$ { end_function(); } \n { } (.|[ \t]) { fprintf(stderr, "UNKNOWN: %s\n", yytext); } %% static struct program PROGRAM; static struct function FUNCTION; static struct instruction **NEXT_INSTRUCTION; // skips to the next number in the string, reads past it, and then returns the // read number. @ptr is left pointing to the char immediately after the number. static int next_number(char **ptr) { int value = 0; while (**ptr && !isdigit(**ptr)) (*ptr)++; for (; isdigit(**ptr); (*ptr)++) value = (value * 10) + (**ptr - '0'); return value; } static void add_type(struct type type) { if (PROGRAM.n_types <= type.id) { PROGRAM.n_types = type.id + 1; PROGRAM.id2type = realloc(PROGRAM.id2type, PROGRAM.n_types * sizeof(PROGRAM.id2type[0])); } PROGRAM.id2type[type.id] = malloc(sizeof(type)); *(PROGRAM.id2type[type.id]) = type; } static struct type *typedup(struct type *type) { struct type *dup = malloc(sizeof(*dup)); *dup = *type; return dup; } static void declare_pointer(char *line) { struct type type = {strdup(line), TYPE_POINTER, 0}; type.base = PROGRAM.id2type[next_number(&line)]; type.id = next_number(&line); add_type(type); } static void declare_array(char *line) { struct type type = {strdup(line), TYPE_ARRAY, 0}; type.base = PROGRAM.id2type[next_number(&line)]; type.id = next_number(&line); type.length = next_number(&line); if (*line == '\0') type.length = -1; add_type(type); } static void declare_function(char *line) { struct type type = {strdup(line), TYPE_FUNCTION, 0}; type.return_type = PROGRAM.id2type[next_number(&line)]; type.id = next_number(&line); if (strstr(line, "...")) type.is_variadic = 1; struct type **insert_arg = &(type.params); while (1) { int arg_id = next_number(&line); if (*line == '\0') break; *insert_arg = typedup(PROGRAM.id2type[arg_id]); insert_arg = &((*insert_arg)->next_param); } *insert_arg = 0; add_type(type); } static void predeclare_struct(char *line) { struct type type = {strdup(line), TYPE_STRUCT, next_number(&line), 0}; add_type(type); } static void predeclare_union(char *line) { struct type type = {strdup(line), TYPE_UNION, next_number(&line), 0}; add_type(type); } // struct Struct_[0-9]* [{] [^}]* [}] ; // union Union_[0-9]* [{] [^}]* [}] ; static void define_aggregate(char *line) { struct type *type = PROGRAM.id2type[next_number(&line)]; line = strchr(line, 'T'); struct type **insert_at = &(type->members); while (line && *line == 'T') { struct type *field_type = typedup(PROGRAM.id2type[next_number(&line)]); field_type->field_name = strdup(line + 1); *strchrnul(field_type->field_name, ' ') = '\0'; *insert_at = field_type; insert_at = &(field_type->next_member); line = strchrnul(strchrnul(line, ','), 'T'); } *insert_at = 0; } static void declare_basic(char *line) { char *basic = strdup(line + strlen("typedef ")); char *basic_end = basic; while (strncmp(basic_end + 1, "Type_", strlen(basic_end))) basic_end++; // now basic_end is the space right before Type_ *basic_end = '\0'; line += (basic_end - basic); struct type type = {strdup(line), TYPE_BASIC, next_number(&line), 0}; type.basic = basic; add_type(type); } char *libdietc_tokdup(char *str) { return strndup(str, strchrnul(str, ' ') - str); } static unsigned long hash(char *str) { // djb2 unsigned long hash = 6997; for (; *str; str++) hash = hash * 33 ^ (*str); return hash; } // ... Type_# name ; static void declare_object(char *line) { // MAYBE REHASH if (PROGRAM.n_objects >= PROGRAM.cap_objects / 2) { unsigned long old_cap = PROGRAM.cap_objects; struct object **old_objects = PROGRAM.objects; PROGRAM.cap_objects = 4 * (PROGRAM.n_objects + 1); PROGRAM.objects = calloc(PROGRAM.cap_objects, sizeof(void*)); for (unsigned long i = 0; i < old_cap; i++) { if (!old_objects[i]) continue; unsigned long h = hash(old_objects[i]->name) % old_cap; while (PROGRAM.objects[h]) h = (h + 1) % old_cap; PROGRAM.objects[h] = old_objects[i]; } free(old_objects); } // INSERT struct object *object = malloc(sizeof(*object)); object->type = PROGRAM.id2type[next_number(&line)]; object->name = libdietc_tokdup(line + 1); PROGRAM.n_objects++; unsigned long h = hash(object->name) % PROGRAM.cap_objects; while (PROGRAM.objects[h]) h = (h + 1) % PROGRAM.cap_objects; PROGRAM.objects[h] = object; } static void start_function(char *line) { FUNCTION = (struct function){0}; FUNCTION.start_line = strdup(line); NEXT_INSTRUCTION = &(FUNCTION.instructions); } static void add_instruction(char *line) { *NEXT_INSTRUCTION = calloc(sizeof(**NEXT_INSTRUCTION), 1); (*NEXT_INSTRUCTION)->line = strdup(line); NEXT_INSTRUCTION = &((*NEXT_INSTRUCTION)->next); } static void end_function() { struct function **insert = &(PROGRAM.functions); while (*insert) insert = &((*insert)->next); *insert = calloc(1, sizeof(**insert)); **insert = FUNCTION; } struct program libdietc_parse(char *filename) { PROGRAM = (struct program){0}; FILE *f = fopen(filename, "r"); assert(f); struct stat statinfo; stat(filename, &statinfo); int filesize = statinfo.st_size; // read whole file PROGRAM.original = malloc((statinfo.st_size + 1) * sizeof(char)); fread(PROGRAM.original, statinfo.st_size, 1, f); PROGRAM.original[statinfo.st_size] = '\0'; // identify end of the preamble char *end_preamble = strstr(PROGRAM.original, "{\n"); if (!end_preamble) { PROGRAM.preamble = strdup(PROGRAM.original); PROGRAM.functions = NULL; } else { while (*end_preamble != '\n') end_preamble--; PROGRAM.preamble = PROGRAM.original; PROGRAM.original = strdup(PROGRAM.original); *(end_preamble + 1) = '\0'; } // parse the file rewind(f); YY_BUFFER_STATE flex_buf = yy_create_buffer(f, YY_BUF_SIZE); yy_switch_to_buffer(flex_buf); yylex(); return PROGRAM; } void libdietc_print(struct program program) { fputs(program.preamble, stdout); for (struct function *f = program.functions; f; f = f->next) { printf("%s\n", f->start_line); for (struct instruction *i = f->instructions; i; i = i->next) printf("%s\n", i->line); printf("}\n"); } }