summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2023-07-30 14:38:43 -0700
committerMatthew Sotoudeh <matthew@masot.net>2023-07-30 14:38:43 -0700
commit4de60a709d3af497781b7467f2c6fe7e09b39595 (patch)
treea9c4b70c6e87f37f9b69d77153c7cda10a60e043
parent678b0d4a2ec15db0735d08ebafec55605ce0a688 (diff)
basic libdietc for writing passes in C
-rw-r--r--c/.gitignore4
-rw-r--r--c/Makefile8
-rw-r--r--c/README.txt18
-rw-r--r--c/examples/zero_init/.gitignore2
-rw-r--r--c/examples/zero_init/Makefile14
-rw-r--r--c/examples/zero_init/dietpass.c25
-rw-r--r--c/examples/zero_init/test.c20
-rw-r--r--c/libdietc.h83
-rw-r--r--c/libdietc.l243
9 files changed, 417 insertions, 0 deletions
diff --git a/c/.gitignore b/c/.gitignore
new file mode 100644
index 0000000..4dd9f43
--- /dev/null
+++ b/c/.gitignore
@@ -0,0 +1,4 @@
+.*.sw*
+libdietc.c
+*.o
+dietpass
diff --git a/c/Makefile b/c/Makefile
new file mode 100644
index 0000000..3908b05
--- /dev/null
+++ b/c/Makefile
@@ -0,0 +1,8 @@
+CFLAGS += -O3 -g
+
+libdietc.o: libdietc.c
+
+clean:
+ rm -rf libdietc.o libdietc.c
+
+.PHONY: clean
diff --git a/c/README.txt b/c/README.txt
new file mode 100644
index 0000000..50450aa
--- /dev/null
+++ b/c/README.txt
@@ -0,0 +1,18 @@
+C library for building dietcc passes
+
+Example of using a pass:
+
+ The examples/zero_init pass zero-initializes all local variables.
+
+ $ which dietcc
+ [make sure it's on your path!]
+ $ cd examples/zero_init
+ $ make -B
+ $ ./default
+ Foo return value: 0
+ Foo return value: 32765
+ Foo return value: 32765
+ $ ./instrumented
+ Foo return value: 0
+ Foo return value: 0
+ Foo return value: 0
diff --git a/c/examples/zero_init/.gitignore b/c/examples/zero_init/.gitignore
new file mode 100644
index 0000000..b74bd7f
--- /dev/null
+++ b/c/examples/zero_init/.gitignore
@@ -0,0 +1,2 @@
+default
+instrumented
diff --git a/c/examples/zero_init/Makefile b/c/examples/zero_init/Makefile
new file mode 100644
index 0000000..79b5692
--- /dev/null
+++ b/c/examples/zero_init/Makefile
@@ -0,0 +1,14 @@
+all: default instrumented
+
+default: test.c
+ gcc -O0 $^ -o $@
+
+instrumented: test.c dietpass
+ dietcc -O0 test.c -o $@ --dietc-pass $(PWD)/dietpass
+
+dietpass: dietpass.c ../../libdietc.l ../../libdietc.h
+ make -C ../..
+ gcc -O3 dietpass.c ../../libdietc.o -I../../ -lfl -o $@
+
+clean:
+ rm -f default instrumented dietpass
diff --git a/c/examples/zero_init/dietpass.c b/c/examples/zero_init/dietpass.c
new file mode 100644
index 0000000..e145a91
--- /dev/null
+++ b/c/examples/zero_init/dietpass.c
@@ -0,0 +1,25 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libdietc.h>
+
+int main(int argc, char **argv) {
+ assert(argc == 2);
+ struct program program = libdietc_parse(argv[1]);
+
+ for (struct function *f = program.functions; f; f = f->next) {
+ for (struct instruction *i = f->instructions; i; i = i->next) {
+ if (strncmp(i->line, "\tType_", strlen("\tType_"))) continue;
+ char *name = libdietc_tokdup(strchrnul(i->line, ' ') + 1);
+ struct instruction *n = calloc(1, sizeof(*n));
+ n->line = malloc(strlen(name) + 512);
+ sprintf(n->line, "memset(&%s, 0, sizeof(%s));", name, name);
+ n->next = i->next;
+ i->next = n;
+ i = n;
+ }
+ }
+ libdietc_print(program);
+ return 0;
+}
diff --git a/c/examples/zero_init/test.c b/c/examples/zero_init/test.c
new file mode 100644
index 0000000..5db109a
--- /dev/null
+++ b/c/examples/zero_init/test.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+
+int set_xyz() {
+ int xyz = 1;
+ return xyz;
+}
+
+int foo() {
+ int xyz;
+ return xyz;
+}
+
+int main() {
+ printf("Foo return value: %d\n", foo());
+ set_xyz();
+ printf("Foo return value: %d\n", foo());
+ set_xyz();
+ printf("Foo return value: %d\n", foo());
+ return 0;
+}
diff --git a/c/libdietc.h b/c/libdietc.h
new file mode 100644
index 0000000..14b7ad2
--- /dev/null
+++ b/c/libdietc.h
@@ -0,0 +1,83 @@
+#pragma once
+
+static char *strchrnul(char *s, char c) {
+ while (*s && *s != c) s++;
+ return s;
+}
+
+enum TYPE {
+ TYPE_BASIC,
+ TYPE_POINTER,
+ TYPE_ARRAY,
+ TYPE_STRUCT,
+ TYPE_UNION,
+ TYPE_FUNCTION,
+};
+
+struct type {
+ char *typedef_line;
+ enum TYPE kind;
+ unsigned long id;
+
+ // basic type
+ char *basic;
+
+ // pointer & array types
+ struct type *base;
+ unsigned long length;
+
+ // struct & union types
+ struct type *members;
+ struct type *next_member;
+ char *field_name;
+
+ // function types
+ struct type *return_type;
+ struct type *params;
+ struct type *next_param;
+ unsigned long is_variadic;
+};
+
+struct instruction {
+ char *line;
+ struct instruction *next;
+};
+
+struct function {
+ char *start_line;
+ struct type *type;
+ struct instruction *instructions;
+ struct function *next;
+};
+
+struct object {
+ char *name;
+ struct type *type;
+};
+
+struct program {
+ char *original;
+
+ // all lines before the function definitions
+ char *preamble;
+
+ // id -> type table
+ struct type **id2type;
+ unsigned long n_types;
+
+ // hash map of objects
+ struct object **objects;
+ unsigned long cap_objects;
+ unsigned long n_objects;
+
+ struct function *functions;
+};
+
+struct identifiers {
+ char *token;
+ struct identifier *next;
+};
+
+char *libdietc_tokdup(char *tok);
+struct program libdietc_parse(char *filename);
+void libdietc_print(struct program program);
diff --git a/c/libdietc.l b/c/libdietc.l
new file mode 100644
index 0000000..cd167b2
--- /dev/null
+++ b/c/libdietc.l
@@ -0,0 +1,243 @@
+%{
+#include <assert.h>
+#include <ctype.h>
+#include <sys/stat.h>
+#include "libdietc.h"
+
+static void declare_pointer(char *);
+static void declare_array(char *);
+static void declare_function(char *);
+static void predeclare_struct(char *);
+static void predeclare_union(char *);
+static void define_aggregate(char *);
+static void declare_basic(char *);
+static void declare_object(char *);
+static void start_function(char *);
+static void add_instruction(char *);
+static void end_function();
+%}
+%%
+
+^[#].*$ {}
+"typedef Type_"[0-9]*" * Type_"[0-9]*" ;" { declare_pointer(yytext); }
+"typedef Type_"[0-9]*" Type_"[0-9]*" [ "[^ ]*" ] ;" { declare_array(yytext); }
+"typedef Type_"[0-9]*" ( "[^)\n]*") ;" { declare_function(yytext); }
+"typedef struct Struct_"[0-9]*" Type_"[0-9]*" ;" { predeclare_struct(yytext); }
+"struct Struct_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); }
+"typedef union Union_"[0-9]*" Type_"[0-9]*" ;" { predeclare_union(yytext); }
+"union Union_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); }
+"typedef ".*" ;" { declare_basic(yytext); }
+"extern Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); }
+"static Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); }
+^[^=\n\t]*"= ".*$ { /* definition */ }
+^[^{\n]*"{"$ { start_function(yytext); }
+\tType_[0-9]*" "[a-zA-Z0-9_]*" ;" { declare_object(yytext); add_instruction(yytext); }
+\t[^\n]*$ { add_instruction(yytext); }
+"}"$ { end_function(); }
+\n { }
+(.|[ \t]) { fprintf(stderr, "UNKNOWN: %s\n", yytext); }
+
+%%
+static struct program PROGRAM;
+static struct function FUNCTION;
+static struct instruction **NEXT_INSTRUCTION;
+
+// skips to the next number in the string, reads past it, and then returns the
+// read number. @ptr is left pointing to the char immediately after the number.
+static int next_number(char **ptr) {
+ int value = 0;
+ while (**ptr && !isdigit(**ptr)) (*ptr)++;
+ for (; isdigit(**ptr); (*ptr)++) value = (value * 10) + (**ptr - '0');
+ return value;
+}
+
+static void add_type(struct type type) {
+ if (PROGRAM.n_types <= type.id) {
+ PROGRAM.n_types = type.id + 1;
+ PROGRAM.id2type = realloc(PROGRAM.id2type,
+ PROGRAM.n_types * sizeof(PROGRAM.id2type[0]));
+ }
+ PROGRAM.id2type[type.id] = malloc(sizeof(type));
+ *(PROGRAM.id2type[type.id]) = type;
+}
+
+static struct type *typedup(struct type *type) {
+ struct type *dup = malloc(sizeof(*dup));
+ *dup = *type;
+ return dup;
+}
+
+static void declare_pointer(char *line) {
+ struct type type = {strdup(line), TYPE_POINTER, 0};
+ type.base = PROGRAM.id2type[next_number(&line)];
+ type.id = next_number(&line);
+ add_type(type);
+}
+
+static void declare_array(char *line) {
+ struct type type = {strdup(line), TYPE_ARRAY, 0};
+ type.base = PROGRAM.id2type[next_number(&line)];
+ type.id = next_number(&line);
+ type.length = next_number(&line);
+ if (*line == '\0') type.length = -1;
+ add_type(type);
+}
+
+static void declare_function(char *line) {
+ struct type type = {strdup(line), TYPE_FUNCTION, 0};
+ type.return_type = PROGRAM.id2type[next_number(&line)];
+ type.id = next_number(&line);
+ if (strstr(line, "...")) type.is_variadic = 1;
+ struct type **insert_arg = &(type.params);
+ while (1) {
+ int arg_id = next_number(&line);
+ if (*line == '\0') break;
+ *insert_arg = typedup(PROGRAM.id2type[arg_id]);
+ insert_arg = &((*insert_arg)->next_param);
+ }
+ *insert_arg = 0;
+ add_type(type);
+}
+
+static void predeclare_struct(char *line) {
+ struct type type = {strdup(line), TYPE_STRUCT, next_number(&line), 0};
+ add_type(type);
+}
+
+static void predeclare_union(char *line) {
+ struct type type = {strdup(line), TYPE_UNION, next_number(&line), 0};
+ add_type(type);
+}
+
+// struct Struct_[0-9]* [{] [^}]* [}] ;
+// union Union_[0-9]* [{] [^}]* [}] ;
+static void define_aggregate(char *line) {
+ struct type *type = PROGRAM.id2type[next_number(&line)];
+ line = strchr(line, 'T');
+ struct type **insert_at = &(type->members);
+ while (line && *line == 'T') {
+ struct type *field_type = typedup(PROGRAM.id2type[next_number(&line)]);
+
+ field_type->field_name = strdup(line + 1);
+ *strchrnul(field_type->field_name, ' ') = '\0';
+
+ *insert_at = field_type;
+ insert_at = &(field_type->next_member);
+
+ line = strchrnul(strchrnul(line, ','), 'T');
+ }
+ *insert_at = 0;
+}
+
+static void declare_basic(char *line) {
+ char *basic = strdup(line + strlen("typedef "));
+ char *basic_end = basic;
+ while (strncmp(basic_end + 1, "Type_", strlen(basic_end))) basic_end++;
+ // now basic_end is the space right before Type_
+ *basic_end = '\0';
+ line += (basic_end - basic);
+ struct type type = {strdup(line), TYPE_BASIC, next_number(&line), 0};
+ type.basic = basic;
+ add_type(type);
+}
+
+char *libdietc_tokdup(char *str) {
+ return strndup(str, strchrnul(str, ' ') - str);
+}
+
+static unsigned long hash(char *str) { // djb2
+ unsigned long hash = 6997;
+ for (; *str; str++) hash = hash * 33 ^ (*str);
+ return hash;
+}
+
+// ... Type_# name ;
+static void declare_object(char *line) {
+ // MAYBE REHASH
+ if (PROGRAM.n_objects >= PROGRAM.cap_objects / 2) {
+ unsigned long old_cap = PROGRAM.cap_objects;
+ struct object **old_objects = PROGRAM.objects;
+ PROGRAM.cap_objects = 4 * (PROGRAM.n_objects + 1);
+ PROGRAM.objects = calloc(PROGRAM.cap_objects, sizeof(void*));
+ for (unsigned long i = 0; i < old_cap; i++) {
+ if (!old_objects[i]) continue;
+ unsigned long h = hash(old_objects[i]->name) % old_cap;
+ while (PROGRAM.objects[h]) h = (h + 1) % old_cap;
+ PROGRAM.objects[h] = old_objects[i];
+ }
+ free(old_objects);
+ }
+ // INSERT
+ struct object *object = malloc(sizeof(*object));
+ object->type = PROGRAM.id2type[next_number(&line)];
+ object->name = libdietc_tokdup(line + 1);
+ PROGRAM.n_objects++;
+ unsigned long h = hash(object->name) % PROGRAM.cap_objects;
+ while (PROGRAM.objects[h]) h = (h + 1) % PROGRAM.cap_objects;
+ PROGRAM.objects[h] = object;
+}
+
+static void start_function(char *line) {
+ FUNCTION = (struct function){0};
+ FUNCTION.start_line = strdup(line);
+ NEXT_INSTRUCTION = &(FUNCTION.instructions);
+}
+
+static void add_instruction(char *line) {
+ *NEXT_INSTRUCTION = calloc(sizeof(**NEXT_INSTRUCTION), 1);
+ (*NEXT_INSTRUCTION)->line = strdup(line);
+ NEXT_INSTRUCTION = &((*NEXT_INSTRUCTION)->next);
+}
+
+static void end_function() {
+ struct function **insert = &(PROGRAM.functions);
+ while (*insert) insert = &((*insert)->next);
+ *insert = calloc(1, sizeof(**insert));
+ **insert = FUNCTION;
+}
+
+struct program libdietc_parse(char *filename) {
+ PROGRAM = (struct program){0};
+
+ FILE *f = fopen(filename, "r");
+ assert(f);
+
+ struct stat statinfo;
+ stat(filename, &statinfo);
+ int filesize = statinfo.st_size;
+
+ // read whole file
+ PROGRAM.original = malloc((statinfo.st_size + 1) * sizeof(char));
+ fread(PROGRAM.original, statinfo.st_size, 1, f);
+ PROGRAM.original[statinfo.st_size] = '\0';
+
+ // identify end of the preamble
+ char *end_preamble = strstr(PROGRAM.original, "{\n");
+ if (!end_preamble) {
+ PROGRAM.preamble = strdup(PROGRAM.original);
+ PROGRAM.functions = NULL;
+ } else {
+ while (*end_preamble != '\n') end_preamble--;
+ PROGRAM.preamble = PROGRAM.original;
+ PROGRAM.original = strdup(PROGRAM.original);
+ *(end_preamble + 1) = '\0';
+ }
+
+ // parse the file
+ rewind(f);
+ YY_BUFFER_STATE flex_buf = yy_create_buffer(f, YY_BUF_SIZE);
+ yy_switch_to_buffer(flex_buf);
+ yylex();
+
+ return PROGRAM;
+}
+
+void libdietc_print(struct program program) {
+ fputs(program.preamble, stdout);
+ for (struct function *f = program.functions; f; f = f->next) {
+ printf("%s\n", f->start_line);
+ for (struct instruction *i = f->instructions; i; i = i->next)
+ printf("%s\n", i->line);
+ printf("}\n");
+ }
+}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback