From 135b17bada960c4585d5a124fedb7e4e9d849078 Mon Sep 17 00:00:00 2001 From: Matthew Sotoudeh Date: Mon, 31 Jul 2023 13:34:45 -0700 Subject: simple refcounting pass in C --- c/examples/refcounting/.gitignore | 1 + c/examples/refcounting/Makefile | 12 +++ c/examples/refcounting/dietpass.c | 196 ++++++++++++++++++++++++++++++++++++++ c/examples/refcounting/runtime | 1 + c/examples/refcounting/test.c | 1 + c/libdietc.h | 13 ++- c/libdietc.l | 24 ++++- 7 files changed, 243 insertions(+), 5 deletions(-) create mode 100644 c/examples/refcounting/.gitignore create mode 100644 c/examples/refcounting/Makefile create mode 100644 c/examples/refcounting/dietpass.c create mode 120000 c/examples/refcounting/runtime create mode 120000 c/examples/refcounting/test.c (limited to 'c') diff --git a/c/examples/refcounting/.gitignore b/c/examples/refcounting/.gitignore new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/c/examples/refcounting/.gitignore @@ -0,0 +1 @@ +test diff --git a/c/examples/refcounting/Makefile b/c/examples/refcounting/Makefile new file mode 100644 index 0000000..b48783c --- /dev/null +++ b/c/examples/refcounting/Makefile @@ -0,0 +1,12 @@ +test: test.c runtime/refcounting.o dietpass + dietcc -I $(PWD)/runtime -o $@ test.c runtime/refcounting.o --dietc-pass $(PWD)/dietpass + +dietpass: dietpass.c ../../libdietc.l ../../libdietc.h + make -C ../.. + gcc -g -O3 dietpass.c ../../libdietc.o -I../../ -lfl -o $@ + +runtime/refcounting.o: runtime/refcounting.c + gcc -c -o $@ $^ + +clean: + rm -f runtime/refcounting.o test dietpass diff --git a/c/examples/refcounting/dietpass.c b/c/examples/refcounting/dietpass.c new file mode 100644 index 0000000..33e2531 --- /dev/null +++ b/c/examples/refcounting/dietpass.c @@ -0,0 +1,196 @@ +#include +#include +#include +#include +#include + +static int startswith(char *s, char *p); +static struct instruction *instruction_before(struct instruction *i, char *line); +static struct instruction *instruction_after(struct instruction *i, char *line); +static char **pointer_expressions(char *name, struct type *type); +static void rewrite(struct instruction *i, char *old, char *new); + +int main(int argc, char **argv) { + assert(argc == 2); + struct program program = libdietc_parse(argv[1]); + unsigned long count = 0; + + for (struct function *f = program.functions; f; f = f->next) { + // (1) rewrite calloc, free, ... calls to refcount_* + for (struct instruction *i = f->instructions; i; i = i->next) { + rewrite(i, " calloc ", " refcount_calloc "); + rewrite(i, " free ", " refcount_free "); + rewrite(i, " malloc ", " refcount_malloc "); + rewrite(i, " realloc ", " refcount_realloc "); + } + // identify the instruction right before the return (we'll insert + // refcount(0, ...)s here). + struct instruction *before_return = f->instructions; + while (!startswith(before_return->next->line, "\treturn ")) + before_return = before_return->next; + struct instruction *return_instruction = before_return->next; + char *return_expr = libdietc_tokdup(libdietc_nth_token(return_instruction->line, 1)); + // now the instructions. + for (struct instruction *i = f->instructions; i; i = i->next) { + if (startswith(i->line, "\tType_")) { + // (2a) initialize all pointer expressions to 0 + char *type_name = libdietc_nth_token(i->line, 0); + char *name = libdietc_tokdup(libdietc_nth_token(i->line, 1)); + assert(name); + unsigned long type_id = 0; + sscanf(type_name, "Type_%ld", &type_id); + char **ptr_exprs = pointer_expressions(name, program.id2type[type_id]); + for (char **expr = ptr_exprs; *expr; expr++) { + char *line = calloc(strlen(*expr) + 128, sizeof(char)); + sprintf(line, "\t%s = 0;", *expr); + i = instruction_after(i, line); + } + // is this the return expression? + if (!strcmp(name, return_expr)) { // yes + for (char **expr = ptr_exprs; *expr; expr++) { + char *line = calloc(strlen(*expr) + 128, sizeof(char)); + sprintf(line, "\trefcount_returning(%s);", *expr); + instruction_before(return_instruction, line); + } + } else { // no + for (char **expr = ptr_exprs; *expr; expr++) { + char *line = calloc(strlen(*expr) + 128, sizeof(char)); + sprintf(line, "\trefcount_write(0, %s);", *expr); + instruction_after(before_return, line); + } + } + } else if (strstr(i->line, " = ")) { + // (2b) whenever a pointer is written to, call refcount_write + struct type *lhs_type = libdietc_lhs_type(program, i); + assert(lhs_type); + char **ptr_exprs = NULL; + if (startswith(i->line, "\t* ")) { + // \t* tmp = ... -> (* tmp ) + char *name = strdup(i->line); + *name = '('; + *strchr(name, '=') = ')'; + *(strchr(name, ')') + 1) = '\0'; + assert(name); + ptr_exprs = pointer_expressions(name, lhs_type); + } else { + // \ttmp = ... -> tmp + char *name = strdup(i->line); + name[0] = ' '; + *strchr(name + 1, ' ') = '\0'; + assert(strlen(name)); + ptr_exprs = pointer_expressions(name, lhs_type); + } + // *before* the instruction, need to insert backups + unsigned long backup_count = count; + for (char **e = ptr_exprs; *e; e++) { + char *line = calloc(strlen(*e) + 128, sizeof(char)); + assert(strlen(*e)); + sprintf(line, "\tvoid *backup_%lu = (%s);", count++, *e); + instruction_before(i, line); + } + // *then* execute the instruction + // *then* call refcount_write + for (char **e = ptr_exprs; *e; e++) { + char *line = calloc(strlen(*e) + 128, sizeof(char)); + sprintf(line, "\trefcount_write(%s, backup_%lu);", *e, backup_count++); + i = instruction_after(i, line); + } + } + } + } + printf("#include \n"); + libdietc_print(program); + return 0; +} + +static int startswith(char *s, char *p) { + return !strncmp(s, p, strlen(p)); +} + +static struct instruction *instruction_before(struct instruction *i, char *line) { + struct instruction *new = calloc(1, sizeof(*new)); + *(i->pprev) = new; + new->pprev = i->pprev; + new->line = line; + new->next = i; + i->pprev = &(new->next); + return new; +} + +static struct instruction *instruction_after(struct instruction *i, char *line) { + struct instruction *new = calloc(1, sizeof(*new)); + new->line = line; + new->next = i->next; + i->next->pprev = &(new->next); + new->pprev = &(i->next); + i->next = new; + return new; +} + +static char **empty_list() { + return calloc(1, sizeof(char *)); +} +static char **append_list(char **list, char *line) { + assert(line); + assert(strlen(line)); + unsigned long n = 0; + for (char **data = list; *data; data++) n++; + char **res = calloc(n + 2, sizeof(*list)); + for (unsigned long i = 0; i < n; i++) { + res[i] = strdup(list[i]); + assert(list[i] && strlen(list[i])); + } + res[n] = strdup(line); + return res; +} +static char **pointer_expressions(char *name, struct type *type) { + assert(strlen(name)); + switch (type->kind) { + case TYPE_POINTER: + return append_list(empty_list(), name); + case TYPE_ARRAY: { + char **list = empty_list(); + for (unsigned long i = 0; i < type->length; i++) { + char *line = calloc(strlen(name) + 128, sizeof(char)); + sprintf(line, "((%s)[%ld])", name, i); + list = append_list(list, line); + } + return list; + } + case TYPE_STRUCT: + case TYPE_UNION: { + char **list = empty_list(); + for (struct type *m = type->members; m; m = m->next_member) { + char *subname = calloc(strlen(name) + strlen(m->field_name) + 4, sizeof(char)); + sprintf(subname, "%s.%s", name, m->field_name); + char **sublist = pointer_expressions(subname, m); + for (char **subline = sublist; *subline; subline++) + list = append_list(list, *subline); + } + return list; + } + case TYPE_BASIC: + case TYPE_FUNCTION: + return empty_list(); + default: assert(0); break; + } +} + +static void rewrite(struct instruction *i, char *old, char *new) { + if (!strstr(i->line, old)) return; + + char *rewritten = calloc((strlen(new) * strlen(i->line)) + 1, sizeof(char)); + unsigned long new_len = 0; + for (char *s = i->line; *s;) { + if (!strncmp(s, old, strlen(old))) { + strcpy(rewritten + new_len, new); + new_len += strlen(new); + s += strlen(old); + } else { + rewritten[new_len++] = *s; + s++; + } + } + rewritten[new_len] = '\0'; + i->line = rewritten; +} diff --git a/c/examples/refcounting/runtime b/c/examples/refcounting/runtime new file mode 120000 index 0000000..0c389ea --- /dev/null +++ b/c/examples/refcounting/runtime @@ -0,0 +1 @@ +../../../python/examples/refcounting/runtime \ No newline at end of file diff --git a/c/examples/refcounting/test.c b/c/examples/refcounting/test.c new file mode 120000 index 0000000..f54831f --- /dev/null +++ b/c/examples/refcounting/test.c @@ -0,0 +1 @@ +../../../python/examples/refcounting/test.c \ No newline at end of file diff --git a/c/libdietc.h b/c/libdietc.h index e63af8d..4dea137 100644 --- a/c/libdietc.h +++ b/c/libdietc.h @@ -38,9 +38,19 @@ struct type { unsigned long is_variadic; }; +enum INSTRUCTION_TYPE { + INSTRUCTION_DECLARE, + INSTRUCTION_DEREF_ASSIGN, + INSTRUCTION_ASSIGN_DEREF, + INSTRUCTION_CAST, + INSTRUCTION_COPY, + INSTRUCTION_OTHER, +}; + struct instruction { char *line; - struct instruction *next; + enum INSTRUCTION_TYPE kind; + struct instruction *next, **pprev; }; struct function { @@ -83,3 +93,4 @@ struct program libdietc_parse(char *filename); void libdietc_print(struct program program); char *libdietc_nth_token(char *string, int n); struct type *libdietc_object_type(struct program program, char *name); +struct type *libdietc_lhs_type(struct program program, struct instruction *instruction); diff --git a/c/libdietc.l b/c/libdietc.l index 7133803..51c8e91 100644 --- a/c/libdietc.l +++ b/c/libdietc.l @@ -13,7 +13,7 @@ static void define_aggregate(char *); static void declare_basic(char *); static void declare_object(char *); static void start_function(char *); -static void add_instruction(char *); +static void add_instruction(char *, enum INSTRUCTION_TYPE); static void end_function(); %} %% @@ -33,9 +33,13 @@ static void end_function(); ^[^{\n]*"{"$ { start_function(yytext); } ^\tType_[0-9]*" "[a-zA-Z0-9_]*" ;"$ { declare_object(yytext); - add_instruction(yytext); + add_instruction(yytext, INSTRUCTION_DECLARE); } -\t[^\n]*$ { add_instruction(yytext); } +\t[*]" "[a-zA-Z_0-9]*" = "[a-zA-Z_0-9]*" ;"$ { add_instruction(yytext, INSTRUCTION_DEREF_ASSIGN); } +\t[a-zA-Z_0-9]*" = "[*]" "[a-zA-Z_0-9]*" ;"$ { add_instruction(yytext, INSTRUCTION_ASSIGN_DEREF); } +\t[a-zA-Z_0-9]*" = "[a-zA-Z_0-9]*" ;"$ { add_instruction(yytext, INSTRUCTION_COPY); } +\t[a-zA-Z_0-9]*" = ( Type_"[0-9]*" ) "[a-zA-Z_0-9]*" ;"$ { add_instruction(yytext, INSTRUCTION_CAST); } +\t[^\n]*$ { add_instruction(yytext, INSTRUCTION_OTHER); } "}"$ { end_function(); } \n { } (.|[ \t]) { fprintf(stderr, "UNKNOWN: %s\n", yytext); } @@ -188,9 +192,11 @@ static void start_function(char *line) { NEXT_INSTRUCTION = &(FUNCTION.instructions); } -static void add_instruction(char *line) { +static void add_instruction(char *line, enum INSTRUCTION_TYPE kind) { *NEXT_INSTRUCTION = calloc(sizeof(**NEXT_INSTRUCTION), 1); (*NEXT_INSTRUCTION)->line = strdup(line); + (*NEXT_INSTRUCTION)->kind = kind; + (*NEXT_INSTRUCTION)->pprev = NEXT_INSTRUCTION; NEXT_INSTRUCTION = &((*NEXT_INSTRUCTION)->next); } @@ -264,3 +270,13 @@ struct type *libdietc_object_type(struct program program, char *name) { free(name_tok); return program.objects[h] ? program.objects[h]->type : 0; } + +struct type *libdietc_lhs_type(struct program program, struct instruction *instruction) { + char *name = 0; + if (instruction->line[1] == '*') { + name = libdietc_tokdup(libdietc_nth_token(instruction->line, 1)); + return libdietc_object_type(program, name)->base; + } + name = libdietc_tokdup(libdietc_nth_token(instruction->line, 0)); + return libdietc_object_type(program, name); +} -- cgit v1.2.3