From 4de60a709d3af497781b7467f2c6fe7e09b39595 Mon Sep 17 00:00:00 2001 From: Matthew Sotoudeh Date: Sun, 30 Jul 2023 14:38:43 -0700 Subject: basic libdietc for writing passes in C --- c/.gitignore | 4 + c/Makefile | 8 ++ c/README.txt | 18 +++ c/examples/zero_init/.gitignore | 2 + c/examples/zero_init/Makefile | 14 +++ c/examples/zero_init/dietpass.c | 25 +++++ c/examples/zero_init/test.c | 20 ++++ c/libdietc.h | 83 ++++++++++++++ c/libdietc.l | 243 ++++++++++++++++++++++++++++++++++++++++ 9 files changed, 417 insertions(+) create mode 100644 c/.gitignore create mode 100644 c/Makefile create mode 100644 c/README.txt create mode 100644 c/examples/zero_init/.gitignore create mode 100644 c/examples/zero_init/Makefile create mode 100644 c/examples/zero_init/dietpass.c create mode 100644 c/examples/zero_init/test.c create mode 100644 c/libdietc.h create mode 100644 c/libdietc.l (limited to 'c') diff --git a/c/.gitignore b/c/.gitignore new file mode 100644 index 0000000..4dd9f43 --- /dev/null +++ b/c/.gitignore @@ -0,0 +1,4 @@ +.*.sw* +libdietc.c +*.o +dietpass diff --git a/c/Makefile b/c/Makefile new file mode 100644 index 0000000..3908b05 --- /dev/null +++ b/c/Makefile @@ -0,0 +1,8 @@ +CFLAGS += -O3 -g + +libdietc.o: libdietc.c + +clean: + rm -rf libdietc.o libdietc.c + +.PHONY: clean diff --git a/c/README.txt b/c/README.txt new file mode 100644 index 0000000..50450aa --- /dev/null +++ b/c/README.txt @@ -0,0 +1,18 @@ +C library for building dietcc passes + +Example of using a pass: + + The examples/zero_init pass zero-initializes all local variables. + + $ which dietcc + [make sure it's on your path!] + $ cd examples/zero_init + $ make -B + $ ./default + Foo return value: 0 + Foo return value: 32765 + Foo return value: 32765 + $ ./instrumented + Foo return value: 0 + Foo return value: 0 + Foo return value: 0 diff --git a/c/examples/zero_init/.gitignore b/c/examples/zero_init/.gitignore new file mode 100644 index 0000000..b74bd7f --- /dev/null +++ b/c/examples/zero_init/.gitignore @@ -0,0 +1,2 @@ +default +instrumented diff --git a/c/examples/zero_init/Makefile b/c/examples/zero_init/Makefile new file mode 100644 index 0000000..79b5692 --- /dev/null +++ b/c/examples/zero_init/Makefile @@ -0,0 +1,14 @@ +all: default instrumented + +default: test.c + gcc -O0 $^ -o $@ + +instrumented: test.c dietpass + dietcc -O0 test.c -o $@ --dietc-pass $(PWD)/dietpass + +dietpass: dietpass.c ../../libdietc.l ../../libdietc.h + make -C ../.. + gcc -O3 dietpass.c ../../libdietc.o -I../../ -lfl -o $@ + +clean: + rm -f default instrumented dietpass diff --git a/c/examples/zero_init/dietpass.c b/c/examples/zero_init/dietpass.c new file mode 100644 index 0000000..e145a91 --- /dev/null +++ b/c/examples/zero_init/dietpass.c @@ -0,0 +1,25 @@ +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + assert(argc == 2); + struct program program = libdietc_parse(argv[1]); + + for (struct function *f = program.functions; f; f = f->next) { + for (struct instruction *i = f->instructions; i; i = i->next) { + if (strncmp(i->line, "\tType_", strlen("\tType_"))) continue; + char *name = libdietc_tokdup(strchrnul(i->line, ' ') + 1); + struct instruction *n = calloc(1, sizeof(*n)); + n->line = malloc(strlen(name) + 512); + sprintf(n->line, "memset(&%s, 0, sizeof(%s));", name, name); + n->next = i->next; + i->next = n; + i = n; + } + } + libdietc_print(program); + return 0; +} diff --git a/c/examples/zero_init/test.c b/c/examples/zero_init/test.c new file mode 100644 index 0000000..5db109a --- /dev/null +++ b/c/examples/zero_init/test.c @@ -0,0 +1,20 @@ +#include + +int set_xyz() { + int xyz = 1; + return xyz; +} + +int foo() { + int xyz; + return xyz; +} + +int main() { + printf("Foo return value: %d\n", foo()); + set_xyz(); + printf("Foo return value: %d\n", foo()); + set_xyz(); + printf("Foo return value: %d\n", foo()); + return 0; +} diff --git a/c/libdietc.h b/c/libdietc.h new file mode 100644 index 0000000..14b7ad2 --- /dev/null +++ b/c/libdietc.h @@ -0,0 +1,83 @@ +#pragma once + +static char *strchrnul(char *s, char c) { + while (*s && *s != c) s++; + return s; +} + +enum TYPE { + TYPE_BASIC, + TYPE_POINTER, + TYPE_ARRAY, + TYPE_STRUCT, + TYPE_UNION, + TYPE_FUNCTION, +}; + +struct type { + char *typedef_line; + enum TYPE kind; + unsigned long id; + + // basic type + char *basic; + + // pointer & array types + struct type *base; + unsigned long length; + + // struct & union types + struct type *members; + struct type *next_member; + char *field_name; + + // function types + struct type *return_type; + struct type *params; + struct type *next_param; + unsigned long is_variadic; +}; + +struct instruction { + char *line; + struct instruction *next; +}; + +struct function { + char *start_line; + struct type *type; + struct instruction *instructions; + struct function *next; +}; + +struct object { + char *name; + struct type *type; +}; + +struct program { + char *original; + + // all lines before the function definitions + char *preamble; + + // id -> type table + struct type **id2type; + unsigned long n_types; + + // hash map of objects + struct object **objects; + unsigned long cap_objects; + unsigned long n_objects; + + struct function *functions; +}; + +struct identifiers { + char *token; + struct identifier *next; +}; + +char *libdietc_tokdup(char *tok); +struct program libdietc_parse(char *filename); +void libdietc_print(struct program program); diff --git a/c/libdietc.l b/c/libdietc.l new file mode 100644 index 0000000..cd167b2 --- /dev/null +++ b/c/libdietc.l @@ -0,0 +1,243 @@ +%{ +#include +#include +#include +#include "libdietc.h" + +static void declare_pointer(char *); +static void declare_array(char *); +static void declare_function(char *); +static void predeclare_struct(char *); +static void predeclare_union(char *); +static void define_aggregate(char *); +static void declare_basic(char *); +static void declare_object(char *); +static void start_function(char *); +static void add_instruction(char *); +static void end_function(); +%} +%% + +^[#].*$ {} +"typedef Type_"[0-9]*" * Type_"[0-9]*" ;" { declare_pointer(yytext); } +"typedef Type_"[0-9]*" Type_"[0-9]*" [ "[^ ]*" ] ;" { declare_array(yytext); } +"typedef Type_"[0-9]*" ( "[^)\n]*") ;" { declare_function(yytext); } +"typedef struct Struct_"[0-9]*" Type_"[0-9]*" ;" { predeclare_struct(yytext); } +"struct Struct_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); } +"typedef union Union_"[0-9]*" Type_"[0-9]*" ;" { predeclare_union(yytext); } +"union Union_"[0-9]*" { "[^}\n]*"} ;" { define_aggregate(yytext); } +"typedef ".*" ;" { declare_basic(yytext); } +"extern Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); } +"static Type_"[0-9]*" "[^ \n]*" ;" { declare_object(yytext); } +^[^=\n\t]*"= ".*$ { /* definition */ } +^[^{\n]*"{"$ { start_function(yytext); } +\tType_[0-9]*" "[a-zA-Z0-9_]*" ;" { declare_object(yytext); add_instruction(yytext); } +\t[^\n]*$ { add_instruction(yytext); } +"}"$ { end_function(); } +\n { } +(.|[ \t]) { fprintf(stderr, "UNKNOWN: %s\n", yytext); } + +%% +static struct program PROGRAM; +static struct function FUNCTION; +static struct instruction **NEXT_INSTRUCTION; + +// skips to the next number in the string, reads past it, and then returns the +// read number. @ptr is left pointing to the char immediately after the number. +static int next_number(char **ptr) { + int value = 0; + while (**ptr && !isdigit(**ptr)) (*ptr)++; + for (; isdigit(**ptr); (*ptr)++) value = (value * 10) + (**ptr - '0'); + return value; +} + +static void add_type(struct type type) { + if (PROGRAM.n_types <= type.id) { + PROGRAM.n_types = type.id + 1; + PROGRAM.id2type = realloc(PROGRAM.id2type, + PROGRAM.n_types * sizeof(PROGRAM.id2type[0])); + } + PROGRAM.id2type[type.id] = malloc(sizeof(type)); + *(PROGRAM.id2type[type.id]) = type; +} + +static struct type *typedup(struct type *type) { + struct type *dup = malloc(sizeof(*dup)); + *dup = *type; + return dup; +} + +static void declare_pointer(char *line) { + struct type type = {strdup(line), TYPE_POINTER, 0}; + type.base = PROGRAM.id2type[next_number(&line)]; + type.id = next_number(&line); + add_type(type); +} + +static void declare_array(char *line) { + struct type type = {strdup(line), TYPE_ARRAY, 0}; + type.base = PROGRAM.id2type[next_number(&line)]; + type.id = next_number(&line); + type.length = next_number(&line); + if (*line == '\0') type.length = -1; + add_type(type); +} + +static void declare_function(char *line) { + struct type type = {strdup(line), TYPE_FUNCTION, 0}; + type.return_type = PROGRAM.id2type[next_number(&line)]; + type.id = next_number(&line); + if (strstr(line, "...")) type.is_variadic = 1; + struct type **insert_arg = &(type.params); + while (1) { + int arg_id = next_number(&line); + if (*line == '\0') break; + *insert_arg = typedup(PROGRAM.id2type[arg_id]); + insert_arg = &((*insert_arg)->next_param); + } + *insert_arg = 0; + add_type(type); +} + +static void predeclare_struct(char *line) { + struct type type = {strdup(line), TYPE_STRUCT, next_number(&line), 0}; + add_type(type); +} + +static void predeclare_union(char *line) { + struct type type = {strdup(line), TYPE_UNION, next_number(&line), 0}; + add_type(type); +} + +// struct Struct_[0-9]* [{] [^}]* [}] ; +// union Union_[0-9]* [{] [^}]* [}] ; +static void define_aggregate(char *line) { + struct type *type = PROGRAM.id2type[next_number(&line)]; + line = strchr(line, 'T'); + struct type **insert_at = &(type->members); + while (line && *line == 'T') { + struct type *field_type = typedup(PROGRAM.id2type[next_number(&line)]); + + field_type->field_name = strdup(line + 1); + *strchrnul(field_type->field_name, ' ') = '\0'; + + *insert_at = field_type; + insert_at = &(field_type->next_member); + + line = strchrnul(strchrnul(line, ','), 'T'); + } + *insert_at = 0; +} + +static void declare_basic(char *line) { + char *basic = strdup(line + strlen("typedef ")); + char *basic_end = basic; + while (strncmp(basic_end + 1, "Type_", strlen(basic_end))) basic_end++; + // now basic_end is the space right before Type_ + *basic_end = '\0'; + line += (basic_end - basic); + struct type type = {strdup(line), TYPE_BASIC, next_number(&line), 0}; + type.basic = basic; + add_type(type); +} + +char *libdietc_tokdup(char *str) { + return strndup(str, strchrnul(str, ' ') - str); +} + +static unsigned long hash(char *str) { // djb2 + unsigned long hash = 6997; + for (; *str; str++) hash = hash * 33 ^ (*str); + return hash; +} + +// ... Type_# name ; +static void declare_object(char *line) { + // MAYBE REHASH + if (PROGRAM.n_objects >= PROGRAM.cap_objects / 2) { + unsigned long old_cap = PROGRAM.cap_objects; + struct object **old_objects = PROGRAM.objects; + PROGRAM.cap_objects = 4 * (PROGRAM.n_objects + 1); + PROGRAM.objects = calloc(PROGRAM.cap_objects, sizeof(void*)); + for (unsigned long i = 0; i < old_cap; i++) { + if (!old_objects[i]) continue; + unsigned long h = hash(old_objects[i]->name) % old_cap; + while (PROGRAM.objects[h]) h = (h + 1) % old_cap; + PROGRAM.objects[h] = old_objects[i]; + } + free(old_objects); + } + // INSERT + struct object *object = malloc(sizeof(*object)); + object->type = PROGRAM.id2type[next_number(&line)]; + object->name = libdietc_tokdup(line + 1); + PROGRAM.n_objects++; + unsigned long h = hash(object->name) % PROGRAM.cap_objects; + while (PROGRAM.objects[h]) h = (h + 1) % PROGRAM.cap_objects; + PROGRAM.objects[h] = object; +} + +static void start_function(char *line) { + FUNCTION = (struct function){0}; + FUNCTION.start_line = strdup(line); + NEXT_INSTRUCTION = &(FUNCTION.instructions); +} + +static void add_instruction(char *line) { + *NEXT_INSTRUCTION = calloc(sizeof(**NEXT_INSTRUCTION), 1); + (*NEXT_INSTRUCTION)->line = strdup(line); + NEXT_INSTRUCTION = &((*NEXT_INSTRUCTION)->next); +} + +static void end_function() { + struct function **insert = &(PROGRAM.functions); + while (*insert) insert = &((*insert)->next); + *insert = calloc(1, sizeof(**insert)); + **insert = FUNCTION; +} + +struct program libdietc_parse(char *filename) { + PROGRAM = (struct program){0}; + + FILE *f = fopen(filename, "r"); + assert(f); + + struct stat statinfo; + stat(filename, &statinfo); + int filesize = statinfo.st_size; + + // read whole file + PROGRAM.original = malloc((statinfo.st_size + 1) * sizeof(char)); + fread(PROGRAM.original, statinfo.st_size, 1, f); + PROGRAM.original[statinfo.st_size] = '\0'; + + // identify end of the preamble + char *end_preamble = strstr(PROGRAM.original, "{\n"); + if (!end_preamble) { + PROGRAM.preamble = strdup(PROGRAM.original); + PROGRAM.functions = NULL; + } else { + while (*end_preamble != '\n') end_preamble--; + PROGRAM.preamble = PROGRAM.original; + PROGRAM.original = strdup(PROGRAM.original); + *(end_preamble + 1) = '\0'; + } + + // parse the file + rewind(f); + YY_BUFFER_STATE flex_buf = yy_create_buffer(f, YY_BUF_SIZE); + yy_switch_to_buffer(flex_buf); + yylex(); + + return PROGRAM; +} + +void libdietc_print(struct program program) { + fputs(program.preamble, stdout); + for (struct function *f = program.functions; f; f = f->next) { + printf("%s\n", f->start_line); + for (struct instruction *i = f->instructions; i; i = i->next) + printf("%s\n", i->line); + printf("}\n"); + } +} -- cgit v1.2.3