From 4de60a709d3af497781b7467f2c6fe7e09b39595 Mon Sep 17 00:00:00 2001 From: Matthew Sotoudeh Date: Sun, 30 Jul 2023 14:38:43 -0700 Subject: basic libdietc for writing passes in C --- c/libdietc.l | 243 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 c/libdietc.l (limited to 'c/libdietc.l') diff --git a/c/libdietc.l b/c/libdietc.l new file mode 100644 index 0000000..cd167b2 --- /dev/null +++ b/c/libdietc.l @@ -0,0 +1,243 @@ +%{ +#include +#include +#include +#include "libdietc.h" + +static void declare_pointer(char *); +static void declare_array(char *); +static void declare_function(char *); +static void predeclare_struct(char *); +static void predeclare_union(char *); +static void define_aggregate(char *); +static void declare_basic(char *); +static void declare_object(char *); +static void start_function(char *); +static void add_instruction(char *); +static void end_function(); +%} +%% + +^[#].*$ {} +"typedef Type_"[0-9]*" * Type_"[0-9]*" ;" { declare_pointer(yytext); } +"typedef Type_"[0-9]*" Type_"[0-9]*" [ "[^ ]*" ] ;" { declare_array(yytext); } +"typedef Type_"[0-9]*" ( "[^)\n]*") ;" { declare_function(yytext); } +"typedef struct Struct_"[0-9]*" Type_"[0-9]*" ;" { predeclare_struct(yytext); } +"struct Struct_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); } +"typedef union Union_"[0-9]*" Type_"[0-9]*" ;" { predeclare_union(yytext); } +"union Union_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); } +"typedef ".*" ;" { declare_basic(yytext); } +"extern Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); } +"static Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); } +^[^=\n\t]*"= ".*$ { /* definition */ } +^[^{\n]*"{"$ { start_function(yytext); } +\tType_[0-9]*" "[a-zA-Z0-9_]*" ;" { declare_object(yytext); add_instruction(yytext); } +\t[^\n]*$ { add_instruction(yytext); } +"}"$ { end_function(); } +\n { } +(.|[ \t]) { fprintf(stderr, "UNKNOWN: %s\n", yytext); } + +%% +static struct program PROGRAM; +static struct function FUNCTION; +static struct instruction **NEXT_INSTRUCTION; + +// skips to the next number in the string, reads past it, and then returns the +// read number. @ptr is left pointing to the char immediately after the number. +static int next_number(char **ptr) { + int value = 0; + while (**ptr && !isdigit(**ptr)) (*ptr)++; + for (; isdigit(**ptr); (*ptr)++) value = (value * 10) + (**ptr - '0'); + return value; +} + +static void add_type(struct type type) { + if (PROGRAM.n_types <= type.id) { + PROGRAM.n_types = type.id + 1; + PROGRAM.id2type = realloc(PROGRAM.id2type, + PROGRAM.n_types * sizeof(PROGRAM.id2type[0])); + } + PROGRAM.id2type[type.id] = malloc(sizeof(type)); + *(PROGRAM.id2type[type.id]) = type; +} + +static struct type *typedup(struct type *type) { + struct type *dup = malloc(sizeof(*dup)); + *dup = *type; + return dup; +} + +static void declare_pointer(char *line) { + struct type type = {strdup(line), TYPE_POINTER, 0}; + type.base = PROGRAM.id2type[next_number(&line)]; + type.id = next_number(&line); + add_type(type); +} + +static void declare_array(char *line) { + struct type type = {strdup(line), TYPE_ARRAY, 0}; + type.base = PROGRAM.id2type[next_number(&line)]; + type.id = next_number(&line); + type.length = next_number(&line); + if (*line == '\0') type.length = -1; + add_type(type); +} + +static void declare_function(char *line) { + struct type type = {strdup(line), TYPE_FUNCTION, 0}; + type.return_type = PROGRAM.id2type[next_number(&line)]; + type.id = next_number(&line); + if (strstr(line, "...")) type.is_variadic = 1; + struct type **insert_arg = &(type.params); + while (1) { + int arg_id = next_number(&line); + if (*line == '\0') break; + *insert_arg = typedup(PROGRAM.id2type[arg_id]); + insert_arg = &((*insert_arg)->next_param); + } + *insert_arg = 0; + add_type(type); +} + +static void predeclare_struct(char *line) { + struct type type = {strdup(line), TYPE_STRUCT, next_number(&line), 0}; + add_type(type); +} + +static void predeclare_union(char *line) { + struct type type = {strdup(line), TYPE_UNION, next_number(&line), 0}; + add_type(type); +} + +// struct Struct_[0-9]* [{] [^}]* [}] ; +// union Union_[0-9]* [{] [^}]* [}] ; +static void define_aggregate(char *line) { + struct type *type = PROGRAM.id2type[next_number(&line)]; + line = strchr(line, 'T'); + struct type **insert_at = &(type->members); + while (line && *line == 'T') { + struct type *field_type = typedup(PROGRAM.id2type[next_number(&line)]); + + field_type->field_name = strdup(line + 1); + *strchrnul(field_type->field_name, ' ') = '\0'; + + *insert_at = field_type; + insert_at = &(field_type->next_member); + + line = strchrnul(strchrnul(line, ','), 'T'); + } + *insert_at = 0; +} + +static void declare_basic(char *line) { + char *basic = strdup(line + strlen("typedef ")); + char *basic_end = basic; + while (strncmp(basic_end + 1, "Type_", strlen(basic_end))) basic_end++; + // now basic_end is the space right before Type_ + *basic_end = '\0'; + line += (basic_end - basic); + struct type type = {strdup(line), TYPE_BASIC, next_number(&line), 0}; + type.basic = basic; + add_type(type); +} + +char *libdietc_tokdup(char *str) { + return strndup(str, strchrnul(str, ' ') - str); +} + +static unsigned long hash(char *str) { // djb2 + unsigned long hash = 6997; + for (; *str; str++) hash = hash * 33 ^ (*str); + return hash; +} + +// ... Type_# name ; +static void declare_object(char *line) { + // MAYBE REHASH + if (PROGRAM.n_objects >= PROGRAM.cap_objects / 2) { + unsigned long old_cap = PROGRAM.cap_objects; + struct object **old_objects = PROGRAM.objects; + PROGRAM.cap_objects = 4 * (PROGRAM.n_objects + 1); + PROGRAM.objects = calloc(PROGRAM.cap_objects, sizeof(void*)); + for (unsigned long i = 0; i < old_cap; i++) { + if (!old_objects[i]) continue; + unsigned long h = hash(old_objects[i]->name) % old_cap; + while (PROGRAM.objects[h]) h = (h + 1) % old_cap; + PROGRAM.objects[h] = old_objects[i]; + } + free(old_objects); + } + // INSERT + struct object *object = malloc(sizeof(*object)); + object->type = PROGRAM.id2type[next_number(&line)]; + object->name = libdietc_tokdup(line + 1); + PROGRAM.n_objects++; + unsigned long h = hash(object->name) % PROGRAM.cap_objects; + while (PROGRAM.objects[h]) h = (h + 1) % PROGRAM.cap_objects; + PROGRAM.objects[h] = object; +} + +static void start_function(char *line) { + FUNCTION = (struct function){0}; + FUNCTION.start_line = strdup(line); + NEXT_INSTRUCTION = &(FUNCTION.instructions); +} + +static void add_instruction(char *line) { + *NEXT_INSTRUCTION = calloc(sizeof(**NEXT_INSTRUCTION), 1); + (*NEXT_INSTRUCTION)->line = strdup(line); + NEXT_INSTRUCTION = &((*NEXT_INSTRUCTION)->next); +} + +static void end_function() { + struct function **insert = &(PROGRAM.functions); + while (*insert) insert = &((*insert)->next); + *insert = calloc(1, sizeof(**insert)); + **insert = FUNCTION; +} + +struct program libdietc_parse(char *filename) { + PROGRAM = (struct program){0}; + + FILE *f = fopen(filename, "r"); + assert(f); + + struct stat statinfo; + stat(filename, &statinfo); + int filesize = statinfo.st_size; + + // read whole file + PROGRAM.original = malloc((statinfo.st_size + 1) * sizeof(char)); + fread(PROGRAM.original, statinfo.st_size, 1, f); + PROGRAM.original[statinfo.st_size] = '\0'; + + // identify end of the preamble + char *end_preamble = strstr(PROGRAM.original, "{\n"); + if (!end_preamble) { + PROGRAM.preamble = strdup(PROGRAM.original); + PROGRAM.functions = NULL; + } else { + while (*end_preamble != '\n') end_preamble--; + PROGRAM.preamble = PROGRAM.original; + PROGRAM.original = strdup(PROGRAM.original); + *(end_preamble + 1) = '\0'; + } + + // parse the file + rewind(f); + YY_BUFFER_STATE flex_buf = yy_create_buffer(f, YY_BUF_SIZE); + yy_switch_to_buffer(flex_buf); + yylex(); + + return PROGRAM; +} + +void libdietc_print(struct program program) { + fputs(program.preamble, stdout); + for (struct function *f = program.functions; f; f = f->next) { + printf("%s\n", f->start_line); + for (struct instruction *i = f->instructions; i; i = i->next) + printf("%s\n", i->line); + printf("}\n"); + } +} -- cgit v1.2.3