summaryrefslogtreecommitdiff
path: root/c/libdietc.l
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2023-07-30 14:38:43 -0700
committerMatthew Sotoudeh <matthew@masot.net>2023-07-30 14:38:43 -0700
commit4de60a709d3af497781b7467f2c6fe7e09b39595 (patch)
treea9c4b70c6e87f37f9b69d77153c7cda10a60e043 /c/libdietc.l
parent678b0d4a2ec15db0735d08ebafec55605ce0a688 (diff)
basic libdietc for writing passes in C
Diffstat (limited to 'c/libdietc.l')
-rw-r--r--c/libdietc.l243
1 files changed, 243 insertions, 0 deletions
diff --git a/c/libdietc.l b/c/libdietc.l
new file mode 100644
index 0000000..cd167b2
--- /dev/null
+++ b/c/libdietc.l
@@ -0,0 +1,243 @@
+%{
+#include <assert.h>
+#include <ctype.h>
+#include <sys/stat.h>
+#include "libdietc.h"
+
+static void declare_pointer(char *);
+static void declare_array(char *);
+static void declare_function(char *);
+static void predeclare_struct(char *);
+static void predeclare_union(char *);
+static void define_aggregate(char *);
+static void declare_basic(char *);
+static void declare_object(char *);
+static void start_function(char *);
+static void add_instruction(char *);
+static void end_function();
+%}
+%%
+
+^[#].*$ {}
+"typedef Type_"[0-9]*" * Type_"[0-9]*" ;" { declare_pointer(yytext); }
+"typedef Type_"[0-9]*" Type_"[0-9]*" [ "[^ ]*" ] ;" { declare_array(yytext); }
+"typedef Type_"[0-9]*" ( "[^)\n]*") ;" { declare_function(yytext); }
+"typedef struct Struct_"[0-9]*" Type_"[0-9]*" ;" { predeclare_struct(yytext); }
+"struct Struct_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); }
+"typedef union Union_"[0-9]*" Type_"[0-9]*" ;" { predeclare_union(yytext); }
+"union Union_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); }
+"typedef ".*" ;" { declare_basic(yytext); }
+"extern Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); }
+"static Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); }
+^[^=\n\t]*"= ".*$ { /* definition */ }
+^[^{\n]*"{"$ { start_function(yytext); }
+\tType_[0-9]*" "[a-zA-Z0-9_]*" ;" { declare_object(yytext); add_instruction(yytext); }
+\t[^\n]*$ { add_instruction(yytext); }
+"}"$ { end_function(); }
+\n { }
+(.|[ \t]) { fprintf(stderr, "UNKNOWN: %s\n", yytext); }
+
+%%
+static struct program PROGRAM;
+static struct function FUNCTION;
+static struct instruction **NEXT_INSTRUCTION;
+
+// skips to the next number in the string, reads past it, and then returns the
+// read number. @ptr is left pointing to the char immediately after the number.
+static int next_number(char **ptr) {
+ int value = 0;
+ while (**ptr && !isdigit(**ptr)) (*ptr)++;
+ for (; isdigit(**ptr); (*ptr)++) value = (value * 10) + (**ptr - '0');
+ return value;
+}
+
+static void add_type(struct type type) {
+ if (PROGRAM.n_types <= type.id) {
+ PROGRAM.n_types = type.id + 1;
+ PROGRAM.id2type = realloc(PROGRAM.id2type,
+ PROGRAM.n_types * sizeof(PROGRAM.id2type[0]));
+ }
+ PROGRAM.id2type[type.id] = malloc(sizeof(type));
+ *(PROGRAM.id2type[type.id]) = type;
+}
+
+static struct type *typedup(struct type *type) {
+ struct type *dup = malloc(sizeof(*dup));
+ *dup = *type;
+ return dup;
+}
+
+static void declare_pointer(char *line) {
+ struct type type = {strdup(line), TYPE_POINTER, 0};
+ type.base = PROGRAM.id2type[next_number(&line)];
+ type.id = next_number(&line);
+ add_type(type);
+}
+
+static void declare_array(char *line) {
+ struct type type = {strdup(line), TYPE_ARRAY, 0};
+ type.base = PROGRAM.id2type[next_number(&line)];
+ type.id = next_number(&line);
+ type.length = next_number(&line);
+ if (*line == '\0') type.length = -1;
+ add_type(type);
+}
+
+static void declare_function(char *line) {
+ struct type type = {strdup(line), TYPE_FUNCTION, 0};
+ type.return_type = PROGRAM.id2type[next_number(&line)];
+ type.id = next_number(&line);
+ if (strstr(line, "...")) type.is_variadic = 1;
+ struct type **insert_arg = &(type.params);
+ while (1) {
+ int arg_id = next_number(&line);
+ if (*line == '\0') break;
+ *insert_arg = typedup(PROGRAM.id2type[arg_id]);
+ insert_arg = &((*insert_arg)->next_param);
+ }
+ *insert_arg = 0;
+ add_type(type);
+}
+
+static void predeclare_struct(char *line) {
+ struct type type = {strdup(line), TYPE_STRUCT, next_number(&line), 0};
+ add_type(type);
+}
+
+static void predeclare_union(char *line) {
+ struct type type = {strdup(line), TYPE_UNION, next_number(&line), 0};
+ add_type(type);
+}
+
+// struct Struct_[0-9]* [{] [^}]* [}] ;
+// union Union_[0-9]* [{] [^}]* [}] ;
+static void define_aggregate(char *line) {
+ struct type *type = PROGRAM.id2type[next_number(&line)];
+ line = strchr(line, 'T');
+ struct type **insert_at = &(type->members);
+ while (line && *line == 'T') {
+ struct type *field_type = typedup(PROGRAM.id2type[next_number(&line)]);
+
+ field_type->field_name = strdup(line + 1);
+ *strchrnul(field_type->field_name, ' ') = '\0';
+
+ *insert_at = field_type;
+ insert_at = &(field_type->next_member);
+
+ line = strchrnul(strchrnul(line, ','), 'T');
+ }
+ *insert_at = 0;
+}
+
+static void declare_basic(char *line) {
+ char *basic = strdup(line + strlen("typedef "));
+ char *basic_end = basic;
+ while (strncmp(basic_end + 1, "Type_", strlen(basic_end))) basic_end++;
+ // now basic_end is the space right before Type_
+ *basic_end = '\0';
+ line += (basic_end - basic);
+ struct type type = {strdup(line), TYPE_BASIC, next_number(&line), 0};
+ type.basic = basic;
+ add_type(type);
+}
+
+char *libdietc_tokdup(char *str) {
+ return strndup(str, strchrnul(str, ' ') - str);
+}
+
+static unsigned long hash(char *str) { // djb2
+ unsigned long hash = 6997;
+ for (; *str; str++) hash = hash * 33 ^ (*str);
+ return hash;
+}
+
+// ... Type_# name ;
+static void declare_object(char *line) {
+ // MAYBE REHASH
+ if (PROGRAM.n_objects >= PROGRAM.cap_objects / 2) {
+ unsigned long old_cap = PROGRAM.cap_objects;
+ struct object **old_objects = PROGRAM.objects;
+ PROGRAM.cap_objects = 4 * (PROGRAM.n_objects + 1);
+ PROGRAM.objects = calloc(PROGRAM.cap_objects, sizeof(void*));
+ for (unsigned long i = 0; i < old_cap; i++) {
+ if (!old_objects[i]) continue;
+ unsigned long h = hash(old_objects[i]->name) % old_cap;
+ while (PROGRAM.objects[h]) h = (h + 1) % old_cap;
+ PROGRAM.objects[h] = old_objects[i];
+ }
+ free(old_objects);
+ }
+ // INSERT
+ struct object *object = malloc(sizeof(*object));
+ object->type = PROGRAM.id2type[next_number(&line)];
+ object->name = libdietc_tokdup(line + 1);
+ PROGRAM.n_objects++;
+ unsigned long h = hash(object->name) % PROGRAM.cap_objects;
+ while (PROGRAM.objects[h]) h = (h + 1) % PROGRAM.cap_objects;
+ PROGRAM.objects[h] = object;
+}
+
+static void start_function(char *line) {
+ FUNCTION = (struct function){0};
+ FUNCTION.start_line = strdup(line);
+ NEXT_INSTRUCTION = &(FUNCTION.instructions);
+}
+
+static void add_instruction(char *line) {
+ *NEXT_INSTRUCTION = calloc(sizeof(**NEXT_INSTRUCTION), 1);
+ (*NEXT_INSTRUCTION)->line = strdup(line);
+ NEXT_INSTRUCTION = &((*NEXT_INSTRUCTION)->next);
+}
+
+static void end_function() {
+ struct function **insert = &(PROGRAM.functions);
+ while (*insert) insert = &((*insert)->next);
+ *insert = calloc(1, sizeof(**insert));
+ **insert = FUNCTION;
+}
+
+struct program libdietc_parse(char *filename) {
+ PROGRAM = (struct program){0};
+
+ FILE *f = fopen(filename, "r");
+ assert(f);
+
+ struct stat statinfo;
+ stat(filename, &statinfo);
+ int filesize = statinfo.st_size;
+
+ // read whole file
+ PROGRAM.original = malloc((statinfo.st_size + 1) * sizeof(char));
+ fread(PROGRAM.original, statinfo.st_size, 1, f);
+ PROGRAM.original[statinfo.st_size] = '\0';
+
+ // identify end of the preamble
+ char *end_preamble = strstr(PROGRAM.original, "{\n");
+ if (!end_preamble) {
+ PROGRAM.preamble = strdup(PROGRAM.original);
+ PROGRAM.functions = NULL;
+ } else {
+ while (*end_preamble != '\n') end_preamble--;
+ PROGRAM.preamble = PROGRAM.original;
+ PROGRAM.original = strdup(PROGRAM.original);
+ *(end_preamble + 1) = '\0';
+ }
+
+ // parse the file
+ rewind(f);
+ YY_BUFFER_STATE flex_buf = yy_create_buffer(f, YY_BUF_SIZE);
+ yy_switch_to_buffer(flex_buf);
+ yylex();
+
+ return PROGRAM;
+}
+
+void libdietc_print(struct program program) {
+ fputs(program.preamble, stdout);
+ for (struct function *f = program.functions; f; f = f->next) {
+ printf("%s\n", f->start_line);
+ for (struct instruction *i = f->instructions; i; i = i->next)
+ printf("%s\n", i->line);
+ printf("}\n");
+ }
+}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback