aboutsummaryrefslogtreecommitdiff
path: root/compiler/codegen.cup
diff options
context:
space:
mode:
authorMustafa Quraish <[email protected]>2022-02-07 03:02:39 -0500
committerMustafa Quraish <[email protected]>2022-02-07 03:18:08 -0500
commit3817688851fae07b1d6a13ba2ce1906fc9811f8f (patch)
treebb936b224cada39dc7ede856d9f15a4000950526 /compiler/codegen.cup
parentAdd missing files to self-hosted directory (diff)
downloadcup-3817688851fae07b1d6a13ba2ce1906fc9811f8f.tar.xz
cup-3817688851fae07b1d6a13ba2ce1906fc9811f8f.zip
[cup] Self-hosting is now possible! Make some tweaks to match C output
A bit of a chonky commit, but this ports over the remaining (well, almost) everything from the C implementation to the self-hosted compiler. The only things that really remain right now are (1) defer support and (2) support for constants in local scopes. There were used barely enough so for now their uses have been removed, but I'll implement them back later. Not sure how useful (2) is though.
Diffstat (limited to 'compiler/codegen.cup')
-rw-r--r--compiler/codegen.cup124
1 files changed, 89 insertions, 35 deletions
diff --git a/compiler/codegen.cup b/compiler/codegen.cup
index 3d52397..c18500f 100644
--- a/compiler/codegen.cup
+++ b/compiler/codegen.cup
@@ -3,7 +3,7 @@ import "compiler/ast.cup"
import "std/file.cup"
let gen_out_file: File*;
-let gen_label_counter = 0;
+let gen_label_counter = -1; // So the labels start at 0
let gen_string_literals = vector_new();
@@ -49,6 +49,24 @@ fn generate_syscall(num: int) {
emit_asm(" syscall\n");
}
+fn subregister_for_type(typ: Type*): char* {
+ let n = size_for_type(typ);
+ if (n == 1) return "al";
+ if (n == 2) return "ax";
+ if (n == 4) return "eax";
+ if (n == 8) return "rax";
+ die2(here, "Unsupported type size");
+}
+
+fn specifier_for_type(typ: Type*): char* {
+ let n = size_for_type(typ);
+ if (n == 1) return "byte";
+ if (n == 2) return "word";
+ if (n == 4) return "dword";
+ if (n == 8) return "qword";
+ die2(here, "Unsupported type size");
+}
+
fn generate_expr_into_rax(node: Node*);
fn generate_lvalue_into_rax(node: Node*) {
@@ -60,6 +78,13 @@ fn generate_lvalue_into_rax(node: Node*) {
let offset = node.d.variable.offset;
emit_asm(" mov rax, global_vars\n");
emit_asm(" add rax, "); emit_num(offset); emit_asm("\n");
+ } else if (node.typ == AST_MEMBER) {
+ let offset = node.d.member.offset;
+ if (node.d.member.is_ptr)
+ generate_expr_into_rax(node.d.member.obj);
+ else
+ generate_lvalue_into_rax(node.d.member.obj);
+ emit_asm(" add rax, "); emit_num(offset); emit_asm("\n");
} else if (node.typ == AST_DEREF) {
generate_expr_into_rax(node.d.unary);
} else {
@@ -102,12 +127,12 @@ fn generate_expr_into_rax(node: Node*) {
let label = ++gen_label_counter;
generate_expr_into_rax(node.d.conditional.cond);
emit_asm(" cmp rax, 0\n");
- emit_asm(" je .cond_els"); emit_num(label); emit_asm("\n");
+ emit_asm(" je .cond_else_"); emit_num(label); emit_asm("\n");
generate_expr_into_rax(node.d.conditional.then);
- emit_asm(" jmp .cond"); emit_num(label); emit_asm("\n");
- emit_asm(".cond_els"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .cond_end_"); emit_num(label); emit_asm("\n");
+ emit_asm(".cond_else_"); emit_num(label); emit_asm(":\n");
generate_expr_into_rax(node.d.conditional.els);
- emit_asm(".cond"); emit_num(label); emit_asm(":\n");
+ emit_asm(".cond_end_"); emit_num(label); emit_asm(":\n");
} else if (node.typ == AST_PLUS) {
generate_expr_into_rax(node.d.binary.rhs);
@@ -242,41 +267,46 @@ fn generate_expr_into_rax(node: Node*) {
generate_expr_into_rax(node.d.binary.lhs,);
// If left is true, we can short-circuit
emit_asm(" cmp rax, 0\n");
- emit_asm(" je .or_r"); emit_num(label); emit_asm("\n");
+ emit_asm(" je .or_right_"); emit_num(label); emit_asm("\n");
emit_asm(" mov rax, 1\n");
- emit_asm(" jmp .or_e"); emit_num(label); emit_asm("\n");
- emit_asm(".or_r"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .or_end_"); emit_num(label); emit_asm("\n");
+ emit_asm(".or_right_"); emit_num(label); emit_asm(":\n");
generate_expr_into_rax(node.d.binary.rhs);
// Booleanize the result
emit_asm(" cmp rax, 0\n");
emit_asm(" setne al\n");
- emit_asm(".or_e"); emit_num(label); emit_asm(":\n");
+ emit_asm(".or_end_"); emit_num(label); emit_asm(":\n");
} else if (node.typ == AST_AND) {
let label = ++gen_label_counter;
generate_expr_into_rax(node.d.binary.lhs);
// If left is false, we can short-circuit
emit_asm(" cmp rax, 0\n");
- emit_asm(" jne .and_r"); emit_num(label); emit_asm("\n");
+ emit_asm(" jne .and_right_"); emit_num(label); emit_asm("\n");
emit_asm(" mov rax, 0\n");
- emit_asm(" jmp .and_e"); emit_num(label); emit_asm("\n");
- emit_asm(".and_r"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .and_end_"); emit_num(label); emit_asm("\n");
+ emit_asm(".and_right_"); emit_num(label); emit_asm(":\n");
generate_expr_into_rax(node.d.binary.rhs);
// Booleanize the result
emit_asm(" cmp rax, 0\n");
emit_asm(" setne al\n");
- emit_asm(".and_e"); emit_num(label); emit_asm(":\n");
+ emit_asm(".and_end_"); emit_num(label); emit_asm(":\n");
} else if (is_lvalue(node.typ)) {
generate_lvalue_into_rax(node);
- emit_asm(" mov rax, [rax]\n");
+ if (size_for_type(node.etyp) == 8) {
+ emit_asm(" mov rax, [rax]\n");
+ } else {
+ emit_asm3(" movsx rax, ", specifier_for_type(node.etyp), " [rax]\n");
+ }
} else if (node.typ == AST_ASSIGN) {
- generate_lvalue_into_rax(node.d.assign.lhs);
+ let var = node.d.assign.lhs;
+ generate_lvalue_into_rax(var);
emit_asm(" push rax\n");
generate_expr_into_rax(node.d.assign.rhs);
emit_asm(" pop rbx\n");
- emit_asm(" mov [rbx], rax\n");
+ emit_asm3(" mov [rbx], ", subregister_for_type(var.etyp), "\n");
} else if (node.typ == AST_FUNCCALL) {
generate_function_call(node);
@@ -290,7 +320,16 @@ fn generate_block(node: Node*);
fn generate_statement(node: Node*) {
if (node.typ == AST_RETURN) {
- generate_expr_into_rax(node.d.unary);
+ if (node.d.unary) {
+ generate_expr_into_rax(node.d.unary);
+ } else {
+ emit_asm(" xor rax, rax\n"); // Default to 0
+ }
+
+ emit_asm(" push rax\n");
+ // TODO: Undo the defer stack here, this is for consistency with the C implementation for now.
+ emit_asm(" pop rax\n");
+
emit_asm(" mov rsp, rbp\n");
emit_asm(" pop rbp\n");
emit_asm(" ret\n");
@@ -305,40 +344,48 @@ fn generate_statement(node: Node*) {
} else if (node.typ == AST_IF) {
let label = ++gen_label_counter;
generate_expr_into_rax(node.d.conditional.cond);
- emit_asm(" cmp rax, 0\n");
- emit_asm(" je .els"); emit_num(label); emit_asm("\n");
- generate_statement(node.d.conditional.then);
- emit_asm(" jmp .if"); emit_num(label); emit_asm("\n");
- emit_asm(".els"); emit_num(label); emit_asm(":\n");
- if (node.d.conditional.els)
+ // If we don't have an `else` clause, we can simplify
+ if (node.d.conditional.els == null) {
+ emit_asm(" cmp rax, 0\n");
+ emit_asm(" je .if_end_"); emit_num(label); emit_asm("\n");
+ generate_statement(node.d.conditional.then);
+ emit_asm(".if_end_"); emit_num(label); emit_asm(":\n");
+ } else {
+ emit_asm(" cmp rax, 0\n");
+ emit_asm(" je .if_else_"); emit_num(label); emit_asm("\n");
+ generate_statement(node.d.conditional.then);
+ emit_asm(" jmp .if_end_"); emit_num(label); emit_asm("\n");
+ emit_asm(".if_else_"); emit_num(label); emit_asm(":\n");
generate_statement(node.d.conditional.els);
- emit_asm(".if"); emit_num(label); emit_asm(":\n");
-
+ emit_asm(".if_end_"); emit_num(label); emit_asm(":\n");
+ }
} else if (node.typ == AST_WHILE) {
let label = ++gen_label_counter;
- emit_asm(".loop_s"); emit_num(label); emit_asm(":\n");
+ emit_asm(".loop_start_"); emit_num(label); emit_asm(":\n");
+ emit_asm(".loop_continue_"); emit_num(label); emit_asm(":\n");
generate_expr_into_rax(node.d.looop.cond);
emit_asm(" cmp rax, 0\n");
- emit_asm(" je .loop_e"); emit_num(label); emit_asm("\n");
+ emit_asm(" je .loop_end_"); emit_num(label); emit_asm("\n");
generate_statement(node.d.looop.body);
- emit_asm(" jmp .loop_s"); emit_num(label); emit_asm("\n");
- emit_asm(".loop_e"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .loop_start_"); emit_num(label); emit_asm("\n");
+ emit_asm(".loop_end_"); emit_num(label); emit_asm(":\n");
} else if (node.typ == AST_FOR) {
let label = ++gen_label_counter;
if (node.d.looop.init)
generate_statement(node.d.looop.init);
- emit_asm(".loop_s"); emit_num(label); emit_asm(":\n");
+ emit_asm(".loop_start_"); emit_num(label); emit_asm(":\n");
if (node.d.looop.cond) {
generate_expr_into_rax(node.d.looop.cond);
emit_asm(" cmp rax, 0\n");
- emit_asm(" je .loop_e"); emit_num(label); emit_asm("\n");
+ emit_asm(" je .loop_end_"); emit_num(label); emit_asm("\n");
}
generate_statement(node.d.looop.body);
+ emit_asm(".loop_continue_"); emit_num(label); emit_asm(":\n");
if (node.d.looop.step)
generate_statement(node.d.looop.step);
- emit_asm(" jmp .loop_s"); emit_num(label); emit_asm("\n");
- emit_asm(".loop_e"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .loop_start_"); emit_num(label); emit_asm("\n");
+ emit_asm(".loop_end_"); emit_num(label); emit_asm(":\n");
} else {
// Default to a simple expression statement
@@ -354,6 +401,10 @@ fn generate_block(node: Node*) {
}
fn generate_function(node: Node*) {
+ // Skip declarations
+ if (node.d.func.body == null)
+ return;
+
emit_asm3("global func_", node.d.func.name, "\n");
emit_asm3("func_", node.d.func.name, ":\n");
emit_asm(" push rbp\n");
@@ -364,6 +415,8 @@ fn generate_function(node: Node*) {
emit_asm(" mov rsp, rbp\n");
emit_asm(" pop rbp\n");
+ // Return 0 by default if we don't have a return statement
+ emit_asm(" mov qword rax, 0\n");
emit_asm(" ret\n");
}
@@ -409,11 +462,12 @@ fn generate_program(ast: Node*, file: File*) {
for (let i = 0; i < n; ++i) {
let node: Node* = ast.d.block.children.data[i];
if (node.typ == AST_VARDECL && node.d.var_decl.init) {
+ let expr = node.d.var_decl.init;
generate_expr_into_rax(node.d.var_decl.init);
let offset = node.d.var_decl.var.offset;
emit_asm(" mov rbx, global_vars\n");
emit_asm(" add rbx, "); emit_num(offset); emit_asm("\n");
- emit_asm(" mov [rbx], rax\n");
+ emit_asm3(" mov [rbx], ", subregister_for_type(expr.etyp), "\n");
}
}
@@ -436,6 +490,6 @@ fn generate_program(ast: Node*, file: File*) {
emit_asm_char('`');
emit_asm(gen_string_literals.data[i]);
emit_asm_char('`');
- emit_asm("\n");
+ emit_asm(", 0\n");
}
} \ No newline at end of file