diff options
-rw-r--r-- | .lvimrc | 2 | ||||
-rw-r--r-- | chibicc.h | 3 | ||||
-rw-r--r-- | codegen.c | 270 | ||||
-rw-r--r-- | docs/CHIBICC_MODS.txt | 1 | ||||
-rw-r--r-- | main.c | 1 | ||||
-rwxr-xr-x | scripts/dietcc | 13 | ||||
-rw-r--r-- | tests/void_cast.c | 5 | ||||
-rw-r--r-- | type_helpers.c | 40 | ||||
-rw-r--r-- | typegen.c | 304 |
9 files changed, 262 insertions, 377 deletions
@@ -0,0 +1,2 @@ +set shiftwidth=2 +set softtabstop=2 @@ -421,7 +421,8 @@ void typegen(Obj *prog, FILE *out); // int definitely_same_type(Type *type1, Type *type2); -unsigned long hash_type(Type *type); +void hash_insert(Type *type); +Type *hash_lookup(Type *type); // // unicode.c @@ -8,25 +8,60 @@ int is_bad_number(double x) { return (x != x) || (x == 1./0.) || (-x == 1./0.); } -static FILE *output_file; -static int RETURN_TMP; - -static void printnoln(char *fmt, ...); -static void println(char *fmt, ...); -static void print_tok(Token *tok); - -FILE *BUFFER; -int DO_BUFFER; - -static void flush_buffer() { - rewind(BUFFER); +/////////// PRINTING +static FILE *CURR_BUFFER, + // the typedefs at the very beginning + *TYPE_BUFFER, + // everything after the typedefs + *MAIN_BUFFER, + // code inside a function (after all declarations, which go into + // the main buffer immediately) + *CODE_BUFFER; + +FILE *BUFFER_STACK[8]; +int N_BUFFER_STACK; +static void push_buffer(FILE *which) { + assert(N_BUFFER_STACK < 8); + BUFFER_STACK[N_BUFFER_STACK++] = which; + CURR_BUFFER = which; +} +static void pop_buffer(FILE *which) { + assert(N_BUFFER_STACK--); + assert(BUFFER_STACK[N_BUFFER_STACK] == which); + CURR_BUFFER = BUFFER_STACK[N_BUFFER_STACK - 1]; +} +static void flush_buffer(FILE *to_buffer, FILE *from_buffer) { + rewind(from_buffer); int c; - while ((c = fgetc(BUFFER)) != EOF) - fputc(c, output_file); - rewind(BUFFER); - ftruncate(fileno(BUFFER), 0); + while ((c = fgetc(from_buffer)) != EOF) + fputc(c, to_buffer); + rewind(from_buffer); + ftruncate(fileno(from_buffer), 0); +} +__attribute__((format(printf, 1, 2))) +static void printnoln(char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(CURR_BUFFER, fmt, ap); + va_end(ap); +} +__attribute__((format(printf, 1, 2))) +static void println(char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(CURR_BUFFER, fmt, ap); + va_end(ap); + fputc('\n', CURR_BUFFER); +} +/////////// END PRINTING + +static int count(void) { + static int i = 1; + return i++; } +static int RETURN_TMP; + void print_label(char *label) { printnoln("_l"); for (; *label; label++) { @@ -57,52 +92,147 @@ void print_obj(Obj *obj) { } } +///////// TYPES +const void typedecl(Type *type) { + if (type->id) return; + Type *hashed = hash_lookup(type); + if (hashed && !(hashed->typedecling)) { + assert(hashed->id); + type->id = hashed->id; + if (hashed->pointer_type) type->pointer_type = hashed->pointer_type; + if (hashed->return_ty) type->return_ty = hashed->return_ty; + if (hashed->params) type->params = hashed->params; + return; + } + type->typedecling = 1; + type->id = count(); + hash_insert(type); + switch (type->kind) { + case TY_FLOAT: + println("typedef float Type_%d ;", type->id); + break; + case TY_DOUBLE: + println("typedef double Type_%d ;", type->id); + break; + case TY_LDOUBLE: + println("typedef long double Type_%d ;", type->id); + break; + case TY_INT: + if (type->is_unsigned) + println("typedef unsigned int Type_%d ;", type->id); + else + println("typedef int Type_%d ;", type->id); + break; + case TY_LONG: + if (type->is_unsigned) + println("typedef unsigned long Type_%d ;", type->id); + else + println("typedef long Type_%d ;", type->id); + break; + case TY_SHORT: + if (type->is_unsigned) + println("typedef unsigned short Type_%d ;", type->id); + else + println("typedef short Type_%d ;", type->id); + break; + case TY_VOID: + println("typedef void Type_%d ;", type->id); + break; + case TY_BOOL: + println("typedef _Bool Type_%d ;", type->id); + break; + case TY_CHAR: + println("typedef char Type_%d ;", type->id); + break; + case TY_PTR: + typedecl(type->base); + println("typedef Type_%d * Type_%d ;", type->base->id, type->id); + break; + case TY_FUNC: { + typedecl(type->return_ty); + for (Type *p = type->params; p; p = p->next) + typedecl(p); + printnoln("typedef Type_%d Type_%d ( ", type->return_ty->id, type->id); + + int i = 0; + for (Type *p = type->params; p; p = p->next, i++) + if (i) printnoln(", Type_%d ", p->id); + else printnoln("Type_%d ", p->id); + + if (type->is_variadic) { + if (i) printnoln(", ... "); + // else printnoln("... "); + } + println(") ;"); + break; + } + case TY_ARRAY: + typedecl(type->base); + if (type->array_len == -1) + println("typedef Type_%d Type_%d [ ] ;", type->base->id, type->id); + else + println("typedef Type_%d Type_%d [ %d ] ;", type->base->id, type->id, + type->array_len); + break; + case TY_STRUCT: + case TY_UNION: + if (type->kind == TY_STRUCT) + println("typedef struct Struct_%d Type_%d ;", type->id, type->id); + else + println("typedef union Union_%d Type_%d ;", type->id, type->id); + + for (Member *m = type->members; m; m = m->next) + typedecl(m->ty); + + if (type->kind == TY_STRUCT) printnoln("struct Struct_%d { ", type->id); + else printnoln("union Union_%d { ", type->id); + for (Member *m = type->members; m; m = m->next) { + printnoln("Type_%d ", m->ty->id); + if (m->name) + print_tok(m->name); + else if (m->ty->kind == TY_STRUCT || m->ty->kind == TY_UNION) + printnoln("___dietc_f%d", m->idx); + else + printnoln("__field_%d", count()); + printnoln(" ; "); + } + println("} ;"); + break; + case TY_ENUM: + println("typedef enum Enum_%d { EN_%d } Type_%d ;", type->id, type->id, type->id); + break; + case TY_VLA: + assert(!"unimplemented vla?"); + break; + default: + assert(!"unimplemented?"); + break; + } + type->typedecling = 0; +} +void ensure_pointer_to(Type *type) { + if (type->pointer_type) return; + type->pointer_type = pointer_to(type); +} +//////////// END TYPES + const void print_type(Type *type) { if (type->kind == TY_FUNC) { - assert(type->pointer_type); + ensure_pointer_to(type); print_type(type->pointer_type); } else { + push_buffer(TYPE_BUFFER); + typedecl(type); + pop_buffer(TYPE_BUFFER); assert(type->id); printnoln("Type_%d", type->id); } } -static int depth; static Obj *current_fn; static void gen_expr(Node *node, int to_tmp); static void gen_stmt(Node *node); -__attribute__((format(printf, 1, 2))) -static void println(char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - if (DO_BUFFER) - vfprintf(BUFFER, fmt, ap); - else - vfprintf(output_file, fmt, ap); - va_end(ap); - if (DO_BUFFER) - fprintf(BUFFER, "\n"); - else - fprintf(output_file, "\n"); -} - -__attribute__((format(printf, 1, 2))) -static void printnoln(char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - if (DO_BUFFER) - vfprintf(BUFFER, fmt, ap); - else - vfprintf(output_file, fmt, ap); - va_end(ap); -} - -static int count(void) { - static int i = 1; - return i++; -} - // Round up `n` to the nearest multiple of `align`. For instance, // align_to(5, 8) returns 8 and align_to(11, 8) returns 16. int align_to(int n, int align) { @@ -110,15 +240,16 @@ int align_to(int n, int align) { } void decltmp(Type *type, int c) { - DO_BUFFER = 0; + // Write directly to the main buffer so it decls before the code. + push_buffer(MAIN_BUFFER); printnoln("\t"); print_type(type); println(" t%d ;", c); - DO_BUFFER = 1; + pop_buffer(MAIN_BUFFER); } void decltmpptr(Type *type, int c) { - assert(type->pointer_type); + ensure_pointer_to(type); decltmp(type->pointer_type, c); } @@ -268,6 +399,7 @@ static void gen_expr(Node *node, int to_tmp) { int c = count(); if (node->ty->kind == TY_VOID) { gen_expr(node->lhs, c); + push_buffer(TYPE_BUFFER); typedecl(node->ty); pop_buffer(TYPE_BUFFER); println("\t( Type_%d ) t%d ;", node->ty->id, c); } else if (node->lhs->ty->kind == TY_UNION) { // union *tuptr = &union @@ -531,16 +663,11 @@ static void assign_lvar_offsets(Obj *prog) { if (!fn->is_function) continue; - // If a function has many parameters, some parameters are - // inevitably passed by stack rather than by register. - // The first passed-by-stack parameter resides at RBP+16. - int c = 1; - for (Obj *var = fn->params; var; var = var->next) - var->offset = c++; + var->offset = count(); for (Obj *var = fn->locals; var; var = var->next) if (!(var->offset)) - var->offset = c++; + var->offset = count(); } } @@ -663,6 +790,9 @@ void emit_constant(int pos, char *data, Relocation **rel, Type *type) { static void emit_data(Obj *prog) { for (Obj *var = prog; var; var = var->next) { + push_buffer(TYPE_BUFFER); + typedecl(var->ty); + pop_buffer(TYPE_BUFFER); if (var->is_static && var->is_function) { printnoln("static Type_%d ", var->ty->id); print_obj(var); println(" ;"); @@ -732,7 +862,6 @@ static void emit_text(Obj *prog) { } for (Obj *var = fn->locals; var; var = var->next) { - if (!(var->ty->id)) continue; if (var->is_param) continue; if (var == fn->va_area) continue; if (var == fn->alloca_bottom) continue; @@ -744,11 +873,10 @@ static void emit_text(Obj *prog) { } // Emit code - DO_BUFFER = 1; + push_buffer(CODE_BUFFER); gen_stmt(fn->body); - assert(depth == 0); - flush_buffer(); - DO_BUFFER = 0; + pop_buffer(CODE_BUFFER); + flush_buffer(MAIN_BUFFER, CODE_BUFFER); println("\t_L_RETURN :"); if (fn->ty->return_ty->kind != TY_VOID) { @@ -762,10 +890,18 @@ static void emit_text(Obj *prog) { } void codegen(Obj *prog, FILE *out) { - output_file = out; - - BUFFER = tmpfile(); + TYPE_BUFFER = tmpfile(); + MAIN_BUFFER = tmpfile(); + CODE_BUFFER = tmpfile(); assign_lvar_offsets(prog); + + // Everything defaults to writing to main + push_buffer(MAIN_BUFFER); emit_data(prog); emit_text(prog); + pop_buffer(MAIN_BUFFER); + + fprintf(out, "#include \"%s/scripts/dietc_helpers.h\"\n", DIETC_ROOT); + flush_buffer(out, TYPE_BUFFER); + flush_buffer(out, MAIN_BUFFER); } diff --git a/docs/CHIBICC_MODS.txt b/docs/CHIBICC_MODS.txt index ab733ba..13d314a 100644 --- a/docs/CHIBICC_MODS.txt +++ b/docs/CHIBICC_MODS.txt @@ -3,7 +3,6 @@ we're attempting to stay as close to mainline chibicc as possible at a high level, the major changes made are: - strip out most of proprocess.c, as we're OK with using GCC's preprocessor (still need preprocess.c, as it handles parsing of literals) -- add a typegen.c, which runs before codegen & outputs the typedefs - replace codegen.c to output C instead of assembly to support these, ended up making some changes to the rest of the files. these @@ -54,7 +54,6 @@ static void cc1(char *base_file) { FILE *output_buf = open_memstream(&buf, &buflen); // Traverse the AST to emit assembly. - typegen(prog, output_buf); codegen(prog, output_buf); fclose(output_buf); diff --git a/scripts/dietcc b/scripts/dietcc index 110a647..540d168 100755 --- a/scripts/dietcc +++ b/scripts/dietcc @@ -127,6 +127,16 @@ def final_cleanup_pass(dietc, meta): return dietc +def make_external_pass(command): + def pass_(dietc, meta): + with tempfile.TemporaryDirectory() as tmpdir: + t_file = open(f"{tmpdir}/file.c", "wb") + t_file.write(dietc) + t_file.flush() + return check_result(subprocess.run(f"{command} {t_file.name}", + shell=True, capture_output=True)) + return pass_ + PASSES = [preprocess_pass, strip_after_preprocess_pass, dietc_pass, @@ -136,11 +146,10 @@ def main(): args = list(map(shlex.quote, sys.argv[1:])) args.insert(0, f"-I{dietc_dir}/scripts/stdincludes") # first, parse out any DietC passes - dietc_passes = [] while "--dietc-pass" in args: i = args.index("--dietc-pass") args.pop(i) - dietc_passes.append(args.pop(i)) + PASSES.insert(-1, make_external_pass(args.pop(i))) # then process all of the C files out_dir = tempfile.TemporaryDirectory() diff --git a/tests/void_cast.c b/tests/void_cast.c new file mode 100644 index 0000000..d559b2a --- /dev/null +++ b/tests/void_cast.c @@ -0,0 +1,5 @@ +int main() { + int x; + (void)x; + return 0; +} diff --git a/type_helpers.c b/type_helpers.c index 0f6b022..cfa2241 100644 --- a/type_helpers.c +++ b/type_helpers.c @@ -1,7 +1,7 @@ #include <stddef.h> #include "chibicc.h" -unsigned long hash_type(Type *type) { +static unsigned long hash_type(Type *type) { unsigned long hash = 6997; if (!type) return hash; if (type->hashing) return hash; @@ -76,3 +76,41 @@ disequal: type2->hashing = 0; return 0; } + +static Type **HASH_MAP = 0; +static int CAP_HASH_MAP = 0; +static int N_HASH_MAP = 0; +Type *hash_lookup(Type *type) { + if (!CAP_HASH_MAP) return 0; + + size_t idx = hash_type(type) % CAP_HASH_MAP; + while (HASH_MAP[idx]) { + if (definitely_same_type(HASH_MAP[idx], type)) { + return HASH_MAP[idx]; + } + if (++idx == CAP_HASH_MAP) idx = 0; + } + return 0; +} + +void hash_insert(Type *type) { + if ((N_HASH_MAP + 1) >= (CAP_HASH_MAP / 2)) { + // RESIZE AND REHASH + int old_cap = CAP_HASH_MAP; + Type **old_hash_map = HASH_MAP; + + CAP_HASH_MAP = (CAP_HASH_MAP + 1) * 4; + N_HASH_MAP = 0; + HASH_MAP = calloc(CAP_HASH_MAP, sizeof(HASH_MAP[0])); + for (int i = 0; i < old_cap; i++) + if (old_hash_map[i]) + hash_insert(old_hash_map[i]); + if (old_hash_map) free(old_hash_map); + } + + N_HASH_MAP++; + size_t idx = hash_type(type) % CAP_HASH_MAP; + while (HASH_MAP[idx]) + if (++idx == CAP_HASH_MAP) idx = 0; + HASH_MAP[idx] = type; +} diff --git a/typegen.c b/typegen.c deleted file mode 100644 index 75b3cff..0000000 --- a/typegen.c +++ /dev/null @@ -1,304 +0,0 @@ -#include "chibicc.h" - -static Type **HASH_MAP = 0; -static int CAP_HASH_MAP = 0; -static int N_HASH_MAP = 0; -Type *hash_lookup(Type *type) { - if (!CAP_HASH_MAP) return 0; - - size_t idx = hash_type(type) % CAP_HASH_MAP; - while (HASH_MAP[idx]) { - if (definitely_same_type(HASH_MAP[idx], type)) { - return HASH_MAP[idx]; - } - if (++idx == CAP_HASH_MAP) idx = 0; - } - return 0; -} - -void hash_insert(Type *type) { - if ((N_HASH_MAP + 1) >= (CAP_HASH_MAP / 2)) { - // RESIZE AND REHASH - int old_cap = CAP_HASH_MAP; - Type **old_hash_map = HASH_MAP; - - CAP_HASH_MAP = (CAP_HASH_MAP + 1) * 4; - N_HASH_MAP = 0; - HASH_MAP = calloc(CAP_HASH_MAP, sizeof(HASH_MAP[0])); - for (int i = 0; i < old_cap; i++) { - if (old_hash_map[i]) - hash_insert(old_hash_map[i]); - } - if (old_hash_map) free(old_hash_map); - } - - N_HASH_MAP++; - size_t idx = hash_type(type) % CAP_HASH_MAP; - while (HASH_MAP[idx]) { - if (++idx == CAP_HASH_MAP) idx = 0; - } - HASH_MAP[idx] = type; -} - -static int count(void) { - static int i = 1; - return i++; -} - -static void printnoln(char *fmt, ...); -static void println(char *fmt, ...); - -static void print_tok(Token *tok) { - if (tok->str) printnoln("%s", tok->str); - else { - assert(tok->loc); - for (int i = 0; i < tok->len; i++) - printnoln("%c", tok->loc[i]); - } -} - -const void addrdecl(Type *type); -const void typedecl(Type *type) { - if (type->id) return; - if (type->kind == TY_ARRAY) - addrdecl(type->base); - Type *hashed = hash_lookup(type); - if (hashed && !(hashed->typedecling)) { - assert(hashed->id); - type->id = hashed->id; - if (hashed->pointer_type) - type->pointer_type = hashed->pointer_type; - if (hashed->return_ty) - type->return_ty = hashed->return_ty; - if (hashed->params) - type->params = hashed->params; - return; - } - type->typedecling = 1; - type->id = count(); - hash_insert(type); - switch (type->kind) { - case TY_FLOAT: - println("typedef float Type_%d ;", type->id); - break; - case TY_DOUBLE: - println("typedef double Type_%d ;", type->id); - break; - case TY_LDOUBLE: - println("typedef long double Type_%d ;", type->id); - break; - case TY_INT: - if (type->is_unsigned) - println("typedef unsigned int Type_%d ;", type->id); - else - println("typedef int Type_%d ;", type->id); - break; - case TY_LONG: - if (type->is_unsigned) - println("typedef unsigned long Type_%d ;", type->id); - else - println("typedef long Type_%d ;", type->id); - break; - case TY_SHORT: - if (type->is_unsigned) - println("typedef unsigned short Type_%d ;", type->id); - else - println("typedef short Type_%d ;", type->id); - break; - case TY_VOID: - println("typedef void Type_%d ;", type->id); - break; - case TY_BOOL: - println("typedef _Bool Type_%d ;", type->id); - break; - case TY_CHAR: - println("typedef char Type_%d ;", type->id); - break; - case TY_PTR: - typedecl(type->base); - println("typedef Type_%d * Type_%d ;", type->base->id, type->id); - break; - case TY_FUNC: { - typedecl(type->return_ty); - for (Type *p = type->params; p; p = p->next) - typedecl(p); - printnoln("typedef Type_%d Type_%d ( ", type->return_ty->id, type->id); - - int i = 0; - for (Type *p = type->params; p; p = p->next, i++) - if (i) printnoln(", Type_%d ", p->id); - else printnoln("Type_%d ", p->id); - - if (type->is_variadic) { - if (i) printnoln(", ... "); - // else printnoln("... "); - } - println(") ;"); - addrdecl(type); - break; - } - case TY_ARRAY: - typedecl(type->base); - if (type->array_len == -1) - println("typedef Type_%d Type_%d [ ] ;", type->base->id, type->id); - else - println("typedef Type_%d Type_%d [ %d ] ;", type->base->id, type->id, - type->array_len); - break; - case TY_STRUCT: - case TY_UNION: - if (type->kind == TY_STRUCT) - println("typedef struct Struct_%d Type_%d ;", type->id, type->id); - else - println("typedef union Union_%d Type_%d ;", type->id, type->id); - - for (Member *m = type->members; m; m = m->next) - typedecl(m->ty); - - if (type->kind == TY_STRUCT) printnoln("struct Struct_%d { ", type->id); - else printnoln("union Union_%d { ", type->id); - for (Member *m = type->members; m; m = m->next) { - printnoln("Type_%d ", m->ty->id); - if (m->name) - print_tok(m->name); - else if (m->ty->kind == TY_STRUCT || m->ty->kind == TY_UNION) - printnoln("___dietc_f%d", m->idx); - else - printnoln("__field_%d", count()); - printnoln(" ; "); - } - println("} ;"); - break; - case TY_ENUM: - println("typedef enum Enum_%d { EN_%d } Type_%d ;", type->id, type->id, type->id); - break; - case TY_VLA: - assert(!"unimplemented vla?"); - break; - default: - assert(!"unimplemented?"); - break; - } - type->typedecling = 0; -} - -static FILE *output_file; -static Obj *current_fn; - -__attribute__((format(printf, 1, 2))) -static void println(char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - vfprintf(output_file, fmt, ap); - va_end(ap); - fprintf(output_file, "\n"); -} - -__attribute__((format(printf, 1, 2))) -static void printnoln(char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - vfprintf(output_file, fmt, ap); - va_end(ap); -} - -const void addrdecl(Type *type) { - if (type->pointer_type) return; - type->pointer_type = pointer_to(type); - typedecl(type->pointer_type); -} - -static void gen_addr_decls(Node *node) { - switch (node->kind) { - case ND_VAR: - addrdecl(node->ty); - return; - case ND_COMMA: - gen_addr_decls(node->rhs); - return; - case ND_MEMBER: - gen_addr_decls(node->lhs); - addrdecl(node->ty); - return; - } -} - -// Generate code for a given node. -static void gen_typedecls(Node *node) { - if (!node) return; - if (node->ty) typedecl(node->ty); - gen_typedecls(node->lhs); - gen_typedecls(node->rhs); - gen_typedecls(node->cond); - gen_typedecls(node->then); - gen_typedecls(node->els); - gen_typedecls(node->init); - gen_typedecls(node->inc); - for (Node *n = node->body; n; n = n->next) - gen_typedecls(n); - for (Node *arg = node->args; arg; arg = arg->next) - gen_typedecls(arg); - if (node->kind == ND_SWITCH) - for (Node *n = node->case_next; n; n = n->case_next) - gen_typedecls(n); - - switch (node->kind) { - case ND_VAR: - if (node->ty->kind == TY_ARRAY) - addrdecl(node->ty->base); - break; - case ND_CAST: - if (node->lhs->ty->kind == TY_UNION) - gen_addr_decls(node->lhs); - addrdecl(node->ty); - break; - case ND_MEMBER: - gen_addr_decls(node); - if (node->ty->kind == TY_ARRAY) - addrdecl(node->ty->base); - break; - case ND_ADDR: - case ND_ASSIGN: - gen_addr_decls(node->lhs); - break; - } -} - -static void typedecl_data(Obj *prog) { - for (Obj *var = prog; var; var = var->next) { - // if (var->is_function || !var->is_definition) - // continue; - typedecl(var->ty); - } -} - -static void typedecl_text(Obj *prog) { - for (Obj *fn = prog; fn; fn = fn->next) { - if (!fn->is_function || !fn->is_definition) - continue; - - // No code is emitted for "static inline" functions - // if no one is referencing them. - if (!fn->is_live) - continue; - - current_fn = fn; - - typedecl(fn->ty); - gen_typedecls(fn->body); - - // ensure function params & locals are typegened - for (Obj *var = fn->locals; var; var = var->next) { - typedecl(var->ty); - } - } -} - -void typegen(Obj *prog, FILE *out) { - output_file = out; - - printnoln("#include \"%s/scripts/dietc_helpers.h\"\n", DIETC_ROOT); - typedecl(ty_int); - typedecl_data(prog); - typedecl_text(prog); -} |