aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/ast.cup28
-rw-r--r--compiler/codegen.cup124
-rw-r--r--compiler/lexer.cup15
-rw-r--r--compiler/main.cup81
-rw-r--r--compiler/parser.cup196
-rw-r--r--compiler/types.cup10
-rwxr-xr-xrun.sh10
-rwxr-xr-xrun.sh29
-rw-r--r--src/builtins.c4
-rw-r--r--src/generator.c1
-rw-r--r--std/vector.cup2
11 files changed, 390 insertions, 90 deletions
diff --git a/compiler/ast.cup b/compiler/ast.cup
index b452241..2eb96f0 100644
--- a/compiler/ast.cup
+++ b/compiler/ast.cup
@@ -75,6 +75,7 @@ struct Node {
body: Node *;
max_locals_size: int;
args: Vector *; // Vector<Variable>
+ is_defined: int;
};
block: struct {
@@ -462,4 +463,31 @@ fn type_check_binary(node: Node*, token: Token*): Node*
node.etyp = type_new(TYPE_INT);
}
return node;
+}
+
+// FIXME: These should be in `types.cup` ideally, but `Variable` is not defined
+// there and we can't forward-declare types.
+fn compound_push_field(compound: Type*, name: char*, typ: Type*): int {
+ if (compound.typ != TYPE_STRUCT && compound.typ != TYPE_UNION)
+ die("compound_push_field: not a compound type");
+
+ let is_union = compound.typ == TYPE_UNION;
+
+ let field_size = size_for_type(typ);
+ let offset_factor = min(field_size, 8);
+ let offset = is_union ? 0 : align_up(compound.size, offset_factor);
+ compound.size = is_union ? max(field_size, compound.size) : offset + field_size;
+
+ vector_push(compound.fields, variable_new(name, typ, offset));
+ return offset;
+}
+
+fn compound_find_field(typ: Type*, name: char*): Variable* {
+ for (let i = 0; i < typ.fields.size; ++i) {
+ let field: Variable* = typ.fields.data[i];
+ if (streq(field.name, name)) {
+ return field;
+ }
+ }
+ return null;
} \ No newline at end of file
diff --git a/compiler/codegen.cup b/compiler/codegen.cup
index 3d52397..c18500f 100644
--- a/compiler/codegen.cup
+++ b/compiler/codegen.cup
@@ -3,7 +3,7 @@ import "compiler/ast.cup"
import "std/file.cup"
let gen_out_file: File*;
-let gen_label_counter = 0;
+let gen_label_counter = -1; // So the labels start at 0
let gen_string_literals = vector_new();
@@ -49,6 +49,24 @@ fn generate_syscall(num: int) {
emit_asm(" syscall\n");
}
+fn subregister_for_type(typ: Type*): char* {
+ let n = size_for_type(typ);
+ if (n == 1) return "al";
+ if (n == 2) return "ax";
+ if (n == 4) return "eax";
+ if (n == 8) return "rax";
+ die2(here, "Unsupported type size");
+}
+
+fn specifier_for_type(typ: Type*): char* {
+ let n = size_for_type(typ);
+ if (n == 1) return "byte";
+ if (n == 2) return "word";
+ if (n == 4) return "dword";
+ if (n == 8) return "qword";
+ die2(here, "Unsupported type size");
+}
+
fn generate_expr_into_rax(node: Node*);
fn generate_lvalue_into_rax(node: Node*) {
@@ -60,6 +78,13 @@ fn generate_lvalue_into_rax(node: Node*) {
let offset = node.d.variable.offset;
emit_asm(" mov rax, global_vars\n");
emit_asm(" add rax, "); emit_num(offset); emit_asm("\n");
+ } else if (node.typ == AST_MEMBER) {
+ let offset = node.d.member.offset;
+ if (node.d.member.is_ptr)
+ generate_expr_into_rax(node.d.member.obj);
+ else
+ generate_lvalue_into_rax(node.d.member.obj);
+ emit_asm(" add rax, "); emit_num(offset); emit_asm("\n");
} else if (node.typ == AST_DEREF) {
generate_expr_into_rax(node.d.unary);
} else {
@@ -102,12 +127,12 @@ fn generate_expr_into_rax(node: Node*) {
let label = ++gen_label_counter;
generate_expr_into_rax(node.d.conditional.cond);
emit_asm(" cmp rax, 0\n");
- emit_asm(" je .cond_els"); emit_num(label); emit_asm("\n");
+ emit_asm(" je .cond_else_"); emit_num(label); emit_asm("\n");
generate_expr_into_rax(node.d.conditional.then);
- emit_asm(" jmp .cond"); emit_num(label); emit_asm("\n");
- emit_asm(".cond_els"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .cond_end_"); emit_num(label); emit_asm("\n");
+ emit_asm(".cond_else_"); emit_num(label); emit_asm(":\n");
generate_expr_into_rax(node.d.conditional.els);
- emit_asm(".cond"); emit_num(label); emit_asm(":\n");
+ emit_asm(".cond_end_"); emit_num(label); emit_asm(":\n");
} else if (node.typ == AST_PLUS) {
generate_expr_into_rax(node.d.binary.rhs);
@@ -242,41 +267,46 @@ fn generate_expr_into_rax(node: Node*) {
generate_expr_into_rax(node.d.binary.lhs,);
// If left is true, we can short-circuit
emit_asm(" cmp rax, 0\n");
- emit_asm(" je .or_r"); emit_num(label); emit_asm("\n");
+ emit_asm(" je .or_right_"); emit_num(label); emit_asm("\n");
emit_asm(" mov rax, 1\n");
- emit_asm(" jmp .or_e"); emit_num(label); emit_asm("\n");
- emit_asm(".or_r"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .or_end_"); emit_num(label); emit_asm("\n");
+ emit_asm(".or_right_"); emit_num(label); emit_asm(":\n");
generate_expr_into_rax(node.d.binary.rhs);
// Booleanize the result
emit_asm(" cmp rax, 0\n");
emit_asm(" setne al\n");
- emit_asm(".or_e"); emit_num(label); emit_asm(":\n");
+ emit_asm(".or_end_"); emit_num(label); emit_asm(":\n");
} else if (node.typ == AST_AND) {
let label = ++gen_label_counter;
generate_expr_into_rax(node.d.binary.lhs);
// If left is false, we can short-circuit
emit_asm(" cmp rax, 0\n");
- emit_asm(" jne .and_r"); emit_num(label); emit_asm("\n");
+ emit_asm(" jne .and_right_"); emit_num(label); emit_asm("\n");
emit_asm(" mov rax, 0\n");
- emit_asm(" jmp .and_e"); emit_num(label); emit_asm("\n");
- emit_asm(".and_r"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .and_end_"); emit_num(label); emit_asm("\n");
+ emit_asm(".and_right_"); emit_num(label); emit_asm(":\n");
generate_expr_into_rax(node.d.binary.rhs);
// Booleanize the result
emit_asm(" cmp rax, 0\n");
emit_asm(" setne al\n");
- emit_asm(".and_e"); emit_num(label); emit_asm(":\n");
+ emit_asm(".and_end_"); emit_num(label); emit_asm(":\n");
} else if (is_lvalue(node.typ)) {
generate_lvalue_into_rax(node);
- emit_asm(" mov rax, [rax]\n");
+ if (size_for_type(node.etyp) == 8) {
+ emit_asm(" mov rax, [rax]\n");
+ } else {
+ emit_asm3(" movsx rax, ", specifier_for_type(node.etyp), " [rax]\n");
+ }
} else if (node.typ == AST_ASSIGN) {
- generate_lvalue_into_rax(node.d.assign.lhs);
+ let var = node.d.assign.lhs;
+ generate_lvalue_into_rax(var);
emit_asm(" push rax\n");
generate_expr_into_rax(node.d.assign.rhs);
emit_asm(" pop rbx\n");
- emit_asm(" mov [rbx], rax\n");
+ emit_asm3(" mov [rbx], ", subregister_for_type(var.etyp), "\n");
} else if (node.typ == AST_FUNCCALL) {
generate_function_call(node);
@@ -290,7 +320,16 @@ fn generate_block(node: Node*);
fn generate_statement(node: Node*) {
if (node.typ == AST_RETURN) {
- generate_expr_into_rax(node.d.unary);
+ if (node.d.unary) {
+ generate_expr_into_rax(node.d.unary);
+ } else {
+ emit_asm(" xor rax, rax\n"); // Default to 0
+ }
+
+ emit_asm(" push rax\n");
+ // TODO: Undo the defer stack here, this is for consistency with the C implementation for now.
+ emit_asm(" pop rax\n");
+
emit_asm(" mov rsp, rbp\n");
emit_asm(" pop rbp\n");
emit_asm(" ret\n");
@@ -305,40 +344,48 @@ fn generate_statement(node: Node*) {
} else if (node.typ == AST_IF) {
let label = ++gen_label_counter;
generate_expr_into_rax(node.d.conditional.cond);
- emit_asm(" cmp rax, 0\n");
- emit_asm(" je .els"); emit_num(label); emit_asm("\n");
- generate_statement(node.d.conditional.then);
- emit_asm(" jmp .if"); emit_num(label); emit_asm("\n");
- emit_asm(".els"); emit_num(label); emit_asm(":\n");
- if (node.d.conditional.els)
+ // If we don't have an `else` clause, we can simplify
+ if (node.d.conditional.els == null) {
+ emit_asm(" cmp rax, 0\n");
+ emit_asm(" je .if_end_"); emit_num(label); emit_asm("\n");
+ generate_statement(node.d.conditional.then);
+ emit_asm(".if_end_"); emit_num(label); emit_asm(":\n");
+ } else {
+ emit_asm(" cmp rax, 0\n");
+ emit_asm(" je .if_else_"); emit_num(label); emit_asm("\n");
+ generate_statement(node.d.conditional.then);
+ emit_asm(" jmp .if_end_"); emit_num(label); emit_asm("\n");
+ emit_asm(".if_else_"); emit_num(label); emit_asm(":\n");
generate_statement(node.d.conditional.els);
- emit_asm(".if"); emit_num(label); emit_asm(":\n");
-
+ emit_asm(".if_end_"); emit_num(label); emit_asm(":\n");
+ }
} else if (node.typ == AST_WHILE) {
let label = ++gen_label_counter;
- emit_asm(".loop_s"); emit_num(label); emit_asm(":\n");
+ emit_asm(".loop_start_"); emit_num(label); emit_asm(":\n");
+ emit_asm(".loop_continue_"); emit_num(label); emit_asm(":\n");
generate_expr_into_rax(node.d.looop.cond);
emit_asm(" cmp rax, 0\n");
- emit_asm(" je .loop_e"); emit_num(label); emit_asm("\n");
+ emit_asm(" je .loop_end_"); emit_num(label); emit_asm("\n");
generate_statement(node.d.looop.body);
- emit_asm(" jmp .loop_s"); emit_num(label); emit_asm("\n");
- emit_asm(".loop_e"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .loop_start_"); emit_num(label); emit_asm("\n");
+ emit_asm(".loop_end_"); emit_num(label); emit_asm(":\n");
} else if (node.typ == AST_FOR) {
let label = ++gen_label_counter;
if (node.d.looop.init)
generate_statement(node.d.looop.init);
- emit_asm(".loop_s"); emit_num(label); emit_asm(":\n");
+ emit_asm(".loop_start_"); emit_num(label); emit_asm(":\n");
if (node.d.looop.cond) {
generate_expr_into_rax(node.d.looop.cond);
emit_asm(" cmp rax, 0\n");
- emit_asm(" je .loop_e"); emit_num(label); emit_asm("\n");
+ emit_asm(" je .loop_end_"); emit_num(label); emit_asm("\n");
}
generate_statement(node.d.looop.body);
+ emit_asm(".loop_continue_"); emit_num(label); emit_asm(":\n");
if (node.d.looop.step)
generate_statement(node.d.looop.step);
- emit_asm(" jmp .loop_s"); emit_num(label); emit_asm("\n");
- emit_asm(".loop_e"); emit_num(label); emit_asm(":\n");
+ emit_asm(" jmp .loop_start_"); emit_num(label); emit_asm("\n");
+ emit_asm(".loop_end_"); emit_num(label); emit_asm(":\n");
} else {
// Default to a simple expression statement
@@ -354,6 +401,10 @@ fn generate_block(node: Node*) {
}
fn generate_function(node: Node*) {
+ // Skip declarations
+ if (node.d.func.body == null)
+ return;
+
emit_asm3("global func_", node.d.func.name, "\n");
emit_asm3("func_", node.d.func.name, ":\n");
emit_asm(" push rbp\n");
@@ -364,6 +415,8 @@ fn generate_function(node: Node*) {
emit_asm(" mov rsp, rbp\n");
emit_asm(" pop rbp\n");
+ // Return 0 by default if we don't have a return statement
+ emit_asm(" mov qword rax, 0\n");
emit_asm(" ret\n");
}
@@ -409,11 +462,12 @@ fn generate_program(ast: Node*, file: File*) {
for (let i = 0; i < n; ++i) {
let node: Node* = ast.d.block.children.data[i];
if (node.typ == AST_VARDECL && node.d.var_decl.init) {
+ let expr = node.d.var_decl.init;
generate_expr_into_rax(node.d.var_decl.init);
let offset = node.d.var_decl.var.offset;
emit_asm(" mov rbx, global_vars\n");
emit_asm(" add rbx, "); emit_num(offset); emit_asm("\n");
- emit_asm(" mov [rbx], rax\n");
+ emit_asm3(" mov [rbx], ", subregister_for_type(expr.etyp), "\n");
}
}
@@ -436,6 +490,6 @@ fn generate_program(ast: Node*, file: File*) {
emit_asm_char('`');
emit_asm(gen_string_literals.data[i]);
emit_asm_char('`');
- emit_asm("\n");
+ emit_asm(", 0\n");
}
} \ No newline at end of file
diff --git a/compiler/lexer.cup b/compiler/lexer.cup
index 50b06e2..9e967f8 100644
--- a/compiler/lexer.cup
+++ b/compiler/lexer.cup
@@ -19,16 +19,11 @@ fn lexer_new(filename: char*, src: char*, len: int): Lexer* {
}
fn lexer_new_open_file(filename: char*): Lexer* {
- let input_file = fopen(filename, 'r');
- defer fclose(input_file);
-
- // using `fmap` here doesn't work on linux, for some reason.
- let file_size = fsize(input_file);
- let src: char* = malloc(file_size+1);
- fread(input_file, src, file_size);
- src[file_size] = '\0';
-
- return lexer_new(filename, src, file_size);
+ let file = fopen(filename, 'r');
+ let size = 0;
+ let source = fread_to_string(file, &size);
+ fclose(file);
+ return lexer_new(filename, source, size);
}
fn lexer_loc(lexer: Lexer*, loc: Location*) {
diff --git a/compiler/main.cup b/compiler/main.cup
index fa7db31..7a4ae0c 100644
--- a/compiler/main.cup
+++ b/compiler/main.cup
@@ -5,23 +5,80 @@ import "compiler/builtins.cup"
import "compiler/parser.cup"
import "compiler/codegen.cup"
+fn print_usage_and_exit(name: char*, status: int) {
+ puts("Usage: "); puts(name); putsln(" [options] <file>");
+ putsln("Options:");
+ putsln(" -c <code> Code to compile");
+ putsln(" -h Show this help");
+ putsln(" -o <file> Output file");
+ putsln(" -d Dump AST to stdout");
+ putsln("Output file will be named 'build/host.nasm' by default");
+ exit(status);
+}
+
+// FIXME: Allow variables functions to be static to a file?
+// we can avoid the prefixes then.
+const MAX_STDIN_SOURCE_LEN = 4096;
+let m_filename: char*;
+let m_outut_filename = "build/host.nasm";
+let m_source: char*;
+let m_source_len = 0;
+let m_dump_ast = false;
+
+fn parse_cli_args(argc: int, argv: char**) {
+ for (let i = 1; i < argc; ++i) {
+ if (streq(argv[i], "-c")) {
+ m_source_len = strlen(argv[i+1]);
+ m_source = argv[i+1];
+ ++i;
+ m_filename = "CLI";
+ } else if (streq(argv[i], "-h")) {
+ print_usage_and_exit(argv[0], 0);
+ } else if (streq(argv[i], "-d")) {
+ m_dump_ast = true;
+ } else if (streq(argv[i], "-o")) {
+ ++i;
+ m_outut_filename = argv[i];
+ } else if (m_filename == null) {
+ if (streq(argv[i], "-")) {
+ m_filename = "stdin";
+ m_source = malloc(MAX_STDIN_SOURCE_LEN);
+ m_source_len = read(0, m_source, MAX_STDIN_SOURCE_LEN);
+ m_source[m_source_len] = 0;
+ if (m_source_len == MAX_STDIN_SOURCE_LEN) {
+ putsln("Source too long to use through stdin");
+ exit(1);
+ }
+ } else {
+ m_filename = argv[i];
+ let file = fopen(m_filename, 'r');
+ m_source = fread_to_string(file, &m_source_len);
+ fclose(file);
+ }
+ } else {
+ print_usage_and_exit(argv[0], 1);
+ }
+ }
+ if (m_filename == null) {
+ print_usage_and_exit(argv[0], 1);
+ }
+}
+
fn main(argc: int, argv: char **): int {
- if (argc != 2)
- die("Usage: cupcc <input_file>");
+ parse_cli_args(argc, argv);
- let lexer = lexer_new_open_file(argv[1]);
+ let lexer = lexer_new(m_filename, m_source, m_source_len);
let ast = parse_program(lexer);
- dump_ast(ast, 0);
-
- let out_file = fopen("build/host.nasm", 'w');
- defer fclose(out_file);
+ if (m_dump_ast)
+ dump_ast(ast, 0);
+ let out_file = fopen(m_outut_filename, 'w');
generate_program(ast, out_file);
+ fclose(out_file); // TODO: Defer on self-host
- puts("---------------------------\n");
-
- puts("Total amount of memory used by malloc: ");
- putu(__malloc_buf_pos);
- putsln("\nDone.");
+ // puts("---------------------------\n");
+ // puts("Total amount of memory used by malloc: ");
+ // putu(__malloc_buf_pos);
+ // putsln("\nDone.");
} \ No newline at end of file
diff --git a/compiler/parser.cup b/compiler/parser.cup
index 8f02c57..d57885e 100644
--- a/compiler/parser.cup
+++ b/compiler/parser.cup
@@ -425,8 +425,32 @@ fn parse_factor(lexer: Lexer*): Node* {
lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACKET);
} else if (token.typ == TOKEN_DOT) {
- die_loc(here, &token.loc, "Member access not implemented");
+ lexer_next_assert(lexer, &token, TOKEN_DOT);
+ if (!is_struct_or_structptr(expr.etyp)) {
+ putsln(create_type_string(expr.etyp));
+ die_loc(here, &token.loc, "Cannot access member of non-struct type");
+ }
+
+ let is_ptr = expr.etyp.typ == TYPE_PTR;
+ let struct_type = is_ptr ? expr.etyp.ptr : expr.etyp;
+
+ lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
+ let name = token.value.as_string;
+ let field = compound_find_field(struct_type, name);
+
+ if (field == null) {
+ puts("Struct type: "); putsln(create_type_string(struct_type));
+ puts("Field name: "); putsln(name);
+ die_loc(here, &token.loc, "Invalid field name for struct");
+ }
+
+ let member = node_new(AST_MEMBER);
+ member.etyp = field.typ;
+ member.d.member.obj = expr;
+ member.d.member.offset = field.offset;
+ member.d.member.is_ptr = (expr.etyp.typ == TYPE_PTR);
+ expr = decay_array_to_pointer(member, &token);
} else {
running = false;
}
@@ -686,7 +710,16 @@ fn parse_var_declaration(lexer: Lexer*): Node* {
if (token.typ == TOKEN_ASSIGN) {
lexer_next(lexer, &token);
decl.init = parse_expression(lexer);
- decl.var.typ = decl.init.etyp;
+
+ if (missing_type) {
+ decl.var.typ = decl.init.etyp;
+ } else if (!is_convertible(decl.var.typ, decl.init.etyp)) {
+ puts("- Variable type: "); putsln(create_type_string(decl.var.typ));
+ puts("- Value type: "); putsln(create_type_string(decl.init.etyp));
+ die_loc2(here, &token.loc, "Type mismatch for variable declaration: ", decl.var.name);
+ }
+
+ node.etyp = decl.init.etyp;
} else if (missing_type) {
die_loc(here, &token.loc, "Expected ':' or '=' after variable declaration");
}
@@ -849,7 +882,7 @@ fn parse_statement(lexer: Lexer*): Node* {
node = parse_for_loop(lexer);
} else if (token.typ == TOKEN_DEFER) {
- die("defer is not implemented yet");
+ die_loc(here, &token.loc, "defer is not implemented yet");
} else if (token.typ == TOKEN_LET) {
node = parse_var_declaration(lexer);
lexer_next_assert(lexer, &token, TOKEN_SEMICOLON);
@@ -882,6 +915,114 @@ fn parse_block(lexer: Lexer*): Node* {
return block;
}
+// FIXME: Make this a real type
+fn parse_enum_declaration(lexer: Lexer*) {
+ let token: Token;
+ // TODO: This is all a hack to automatically number
+ // Some constants. It does not behave like a type,
+ // and cannot be used as one. Fix this in the future.
+ lexer_next_assert(lexer, &token, TOKEN_ENUM);
+ lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // Use this!
+ lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE);
+
+ let enum_count = 0;
+ lexer_peek(lexer, &token);
+ while (token.typ != TOKEN_CLOSE_BRACE) {
+ lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
+
+ if (identifier_exists(&token))
+ die_loc(here, &token.loc, "Identifier already exists, enums just behave like numbered constants.");
+
+ constant_push(token.value.as_string, enum_count);
+ ++enum_count;
+
+ lexer_peek(lexer, &token);
+ if (token.typ == TOKEN_COMMA) {
+ lexer_next(lexer, &token);
+ lexer_peek(lexer, &token);
+ } else if (token.typ != TOKEN_CLOSE_BRACE) {
+ die_loc(here, &token.loc, "Expected a comma or a closing brace.");
+ }
+ }
+ lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE);
+}
+
+// FIXME: This should just be part of `parse_type()`, and we should be allowed
+// to parse a type without a name. Probably also need to handle converstions
+// between structs with similar embedded types.
+fn parse_struct_union_declaration(lexer: Lexer*, top_level: int): Type* {
+ let token: Token;
+ lexer_next(lexer, &token);
+
+ if (token.typ != TOKEN_STRUCT && token.typ != TOKEN_UNION)
+ die_loc(here, &token.loc, "Expected STRUCT or UNION in parse_struct_union_declaration");
+
+ let compound = type_new(token.typ == TOKEN_STRUCT ? TYPE_STRUCT : TYPE_UNION);
+ compound.fields = vector_new();
+
+ lexer_peek(lexer, &token);
+
+ // For nested temporary structs we don't need a name
+ if (token.typ != TOKEN_IDENTIFIER && top_level)
+ die_loc(here, &token.loc, "You need to specify a name for the struct defined globally.");
+
+ // But if they do provide one, we'll add it to the list of defined structs so they
+ // it can referenced internally.
+ if (token.typ == TOKEN_IDENTIFIER) {
+ compound.struct_name = token.value.as_string;
+ vector_push(p_compound_type_stack, compound);
+ lexer_next(lexer, &token);
+ } else {
+ compound.struct_name = "<anonymous>";
+ }
+
+ lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE);
+
+ lexer_peek(lexer, &token);
+ while (token.typ != TOKEN_CLOSE_BRACE) {
+ // TODO: Allow no-name fields
+ lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
+ let name = token.value.as_string;
+
+ lexer_next_assert(lexer, &token, TOKEN_COLON);
+ lexer_peek(lexer, &token);
+
+ // We want to allow nested temporary structs.
+ let typ: Type*;
+ if (token.typ == TOKEN_STRUCT || token.typ == TOKEN_UNION) {
+ // Nested structs live in their own "namespace", can't be accessed
+ // from outside, so we will pop them off the stack once done.
+ let prev_compound_count = p_compound_type_stack.size;
+ typ = parse_struct_union_declaration(lexer, false);
+ p_compound_type_stack.size = prev_compound_count;
+
+ } else {
+ typ = parse_type(lexer);
+ }
+
+ compound_push_field(compound, name, typ);
+ lexer_next_assert(lexer, &token, TOKEN_SEMICOLON);
+ lexer_peek(lexer, &token);
+ }
+ lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE);
+
+ // printf("Defined %s: %s, size: %lld\n",
+ // compound.type == TYPE_UNION ? "union":"struct",
+ // compound.struct_name,
+ // compound.fields.size
+ // );
+ // for (int i = 0; i < compound.fields.num_fields; i++) {
+ // printf("\t%s: %s (offset: %lld, size: %lld)\n",
+ // compound.fields.name[i],
+ // type_to_str(compound.fields.typ[i]),
+ // compound.fields.offset[i],
+ // size_for_type(compound.fields.typ[i])
+ // );
+ // }
+
+ return compound;
+}
+
fn parse_function(lexer: Lexer*): Node* {
let token: Token;
@@ -889,28 +1030,53 @@ fn parse_function(lexer: Lexer*): Node* {
lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
// TODO: Check if identifier exists
- let node = node_new(AST_FUNC);
- node.d.func.name = token.value.as_string;
+ let func = node_new(AST_FUNC);
+ let dfunc = func;
+ func.d.func.name = token.value.as_string;
- vector_push(p_all_functions, node);
- p_current_function = node;
+ // If the identifier exists, there's 3 possible cases:
+ // 1. It's another variable / struct, which is an error.
+ // 2. It's a function that's been defined, which is an error.
+ // 3. It's a function that's been declared (but not defined), which is OK
+ if (identifier_exists(&token)) {
+ dfunc = find_function_definition(&token);
+ // Case 1
+ if (dfunc == null)
+ die_loc(here, &token.loc, "Function name already exists as an identifier");
+ // Case 2
+ if (dfunc.d.func.is_defined)
+ die_loc(here, &token.loc, "Function already defined earlier");
+
+ // Case 3 (No error, just set the current function correctly)
+ p_current_function = func;
+ } else {
+ // We don't have a declaration yet, push this.
+ vector_push(p_all_functions, func);
+ p_current_function = func;
+ }
lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN);
- parse_function_params(lexer, node);
+ parse_function_params(lexer, func);
lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN);
lexer_peek(lexer, &token);
if (token.typ == TOKEN_COLON) {
lexer_next(lexer, &token);
- node.etyp = parse_type(lexer);
+ func.etyp = parse_type(lexer);
} else {
- node.etyp = type_new(TYPE_VOID);
+ func.etyp = type_new(TYPE_VOID);
}
- node.d.func.body = parse_block(lexer);
+ lexer_peek(lexer, &token);
+ if (token.typ == TOKEN_OPEN_BRACE) {
+ func.d.func.body = parse_block(lexer);
+ func.d.func.is_defined = true;
+ } else {
+ func.d.func.is_defined = false;
+ }
p_current_function = null;
- return node;
+ return func;
}
let p_opened_files = vector_new();
@@ -925,7 +1091,7 @@ fn parser_open_new_file(path: char*) {
}
for (let i = 0; i < p_opened_files.size; i = i + 1) {
if (streq(p_opened_files.data[i], path)) {
- puts("Already opened file: "); puts(path); putsln(": Ignoring.");
+ // Already opened this file, ignore
return;
}
}
@@ -958,6 +1124,10 @@ fn parse_program(lexer: Lexer*): Node* {
let path = token.value.as_string;
parser_open_new_file(path);
lexer = vector_top(p_lexer_stack);
+ } else if (token.typ == TOKEN_STRUCT || token.typ == TOKEN_UNION) {
+ parse_struct_union_declaration(lexer, true);
+ } else if (token.typ == TOKEN_ENUM) {
+ parse_enum_declaration(lexer);
} else {
die_loc2(here, &token.loc, "unexpected token in parse_program: ", token_type_to_string(token.typ));
}
diff --git a/compiler/types.cup b/compiler/types.cup
index 10a2d6f..9b0d822 100644
--- a/compiler/types.cup
+++ b/compiler/types.cup
@@ -1,4 +1,5 @@
import "std/common.cup"
+import "std/vector.cup"
enum BaseType {
TYPE_VOID,
@@ -19,11 +20,7 @@ struct Type {
struct_name: char*;
size: int;
array_size: int;
- fields: struct {
- names: char**;
- types: Type**;
- num_fields: int;
- };
+ fields: Vector*; // Vector<Variable*>;
};
fn size_for_base_type(type: int): int {
@@ -86,7 +83,8 @@ fn create_type_string(typ: Type *): char* {
else if (typ.typ == TYPE_CHAR) strcat(buf, "char");
else if (typ.typ == TYPE_VOID) strcat(buf, "void");
else if (typ.typ == TYPE_ANY) strcat(buf, "any");
- else die("type_to_string: unknown type");
+ else if (typ.typ == TYPE_STRUCT) strcat(buf, typ.struct_name);
+ else die("create_type_string: unknown type");
return buf;
}
diff --git a/run.sh b/run.sh
index 19360df..1c24981 100755
--- a/run.sh
+++ b/run.sh
@@ -13,13 +13,13 @@ then
exit 1
fi
-set -xe
+set -e
-make
+make -s
build/cupcc "$@"
-make build/output.out
+make build/output.out -s
set +e
+set -x
-build/output.out
-echo "Exit status: $?" \ No newline at end of file
+build/output.out \ No newline at end of file
diff --git a/run.sh2 b/run.sh2
index 20ba136..72317e6 100755
--- a/run.sh2
+++ b/run.sh2
@@ -12,16 +12,15 @@ then
exit 1
fi
-set -xe
+set -e
build/cupcc compiler/main.cup -o build/cup.nasm
-make build/cup.out
+make build/cup.out -s
build/cup.out "$@"
-make build/host.out
+make build/host.out -s
set +e
+set -x
build/host.out
-
-echo "Exit status: $?" \ No newline at end of file
diff --git a/src/builtins.c b/src/builtins.c
index 58e907e..3caf753 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -102,7 +102,7 @@ static void generate_custom_builtins(FILE *out)
" mov rdi, [rsp+8]\n"
" mov r9, -3689348814741910323\n"
" sub rsp, 40\n"
- " mov BYTE [rsp+31], 10\n"
+ " mov byte [rsp+31], 10\n"
" lea rcx, [rsp+30]\n"
" mov qword rbx, 0\n"
".L2:\n"
@@ -116,7 +116,7 @@ static void generate_custom_builtins(FILE *out)
" add rsi, rsi\n"
" sub rax, rsi\n"
" add eax, 48\n"
- " mov BYTE [rcx], al\n"
+ " mov byte [rcx], al\n"
" mov rax, rdi\n"
" mov rdi, rdx\n"
" mov rdx, rcx\n"
diff --git a/src/generator.c b/src/generator.c
index 59a193e..c52817c 100644
--- a/src/generator.c
+++ b/src/generator.c
@@ -317,7 +317,6 @@ void generate_expr_into_rax(Node *expr, FILE *out)
generate_expr_into_rax(expr->conditional.do_then, out);
fprintf(out, " jmp .cond_end_%d\n", cur_label);
fprintf(out, ".cond_else_%d:\n", cur_label);
- generate_expr_into_rax(expr->binary.right, out);
// Booleanize the result
generate_expr_into_rax(expr->conditional.do_else, out);
fprintf(out, ".cond_end_%d:\n", cur_label);
diff --git a/std/vector.cup b/std/vector.cup
index 6953e9b..1b7104c 100644
--- a/std/vector.cup
+++ b/std/vector.cup
@@ -15,7 +15,7 @@ fn vector_new_sized(capacity: int): Vector* {
}
fn vector_new(): Vector* {
- const initial_default_capacity = 8;
+ let initial_default_capacity = 8;
return vector_new_sized(initial_default_capacity);
}