diff options
| author | Mustafa Quraish <[email protected]> | 2022-02-07 03:02:39 -0500 |
|---|---|---|
| committer | Mustafa Quraish <[email protected]> | 2022-02-07 03:18:08 -0500 |
| commit | 3817688851fae07b1d6a13ba2ce1906fc9811f8f (patch) | |
| tree | bb936b224cada39dc7ede856d9f15a4000950526 /compiler | |
| parent | Add missing files to self-hosted directory (diff) | |
| download | cup-3817688851fae07b1d6a13ba2ce1906fc9811f8f.tar.xz cup-3817688851fae07b1d6a13ba2ce1906fc9811f8f.zip | |
[cup] Self-hosting is now possible! Make some tweaks to match C output
A bit of a chonky commit, but this ports over the remaining (well,
almost) everything from the C implementation to the self-hosted
compiler.
The only things that really remain right now are (1) defer support
and (2) support for constants in local scopes. There were used barely
enough so for now their uses have been removed, but I'll implement
them back later. Not sure how useful (2) is though.
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/ast.cup | 28 | ||||
| -rw-r--r-- | compiler/codegen.cup | 124 | ||||
| -rw-r--r-- | compiler/lexer.cup | 15 | ||||
| -rw-r--r-- | compiler/main.cup | 81 | ||||
| -rw-r--r-- | compiler/parser.cup | 196 | ||||
| -rw-r--r-- | compiler/types.cup | 10 |
6 files changed, 378 insertions, 76 deletions
diff --git a/compiler/ast.cup b/compiler/ast.cup index b452241..2eb96f0 100644 --- a/compiler/ast.cup +++ b/compiler/ast.cup @@ -75,6 +75,7 @@ struct Node { body: Node *; max_locals_size: int; args: Vector *; // Vector<Variable> + is_defined: int; }; block: struct { @@ -462,4 +463,31 @@ fn type_check_binary(node: Node*, token: Token*): Node* node.etyp = type_new(TYPE_INT); } return node; +} + +// FIXME: These should be in `types.cup` ideally, but `Variable` is not defined +// there and we can't forward-declare types. +fn compound_push_field(compound: Type*, name: char*, typ: Type*): int { + if (compound.typ != TYPE_STRUCT && compound.typ != TYPE_UNION) + die("compound_push_field: not a compound type"); + + let is_union = compound.typ == TYPE_UNION; + + let field_size = size_for_type(typ); + let offset_factor = min(field_size, 8); + let offset = is_union ? 0 : align_up(compound.size, offset_factor); + compound.size = is_union ? max(field_size, compound.size) : offset + field_size; + + vector_push(compound.fields, variable_new(name, typ, offset)); + return offset; +} + +fn compound_find_field(typ: Type*, name: char*): Variable* { + for (let i = 0; i < typ.fields.size; ++i) { + let field: Variable* = typ.fields.data[i]; + if (streq(field.name, name)) { + return field; + } + } + return null; }
\ No newline at end of file diff --git a/compiler/codegen.cup b/compiler/codegen.cup index 3d52397..c18500f 100644 --- a/compiler/codegen.cup +++ b/compiler/codegen.cup @@ -3,7 +3,7 @@ import "compiler/ast.cup" import "std/file.cup" let gen_out_file: File*; -let gen_label_counter = 0; +let gen_label_counter = -1; // So the labels start at 0 let gen_string_literals = vector_new(); @@ -49,6 +49,24 @@ fn generate_syscall(num: int) { emit_asm(" syscall\n"); } +fn subregister_for_type(typ: Type*): char* { + let n = size_for_type(typ); + if (n == 1) return "al"; + if (n == 2) return "ax"; + if (n == 4) return "eax"; + if (n == 8) return "rax"; + die2(here, "Unsupported type size"); +} + +fn specifier_for_type(typ: Type*): char* { + let n = size_for_type(typ); + if (n == 1) return "byte"; + if (n == 2) return "word"; + if (n == 4) return "dword"; + if (n == 8) return "qword"; + die2(here, "Unsupported type size"); +} + fn generate_expr_into_rax(node: Node*); fn generate_lvalue_into_rax(node: Node*) { @@ -60,6 +78,13 @@ fn generate_lvalue_into_rax(node: Node*) { let offset = node.d.variable.offset; emit_asm(" mov rax, global_vars\n"); emit_asm(" add rax, "); emit_num(offset); emit_asm("\n"); + } else if (node.typ == AST_MEMBER) { + let offset = node.d.member.offset; + if (node.d.member.is_ptr) + generate_expr_into_rax(node.d.member.obj); + else + generate_lvalue_into_rax(node.d.member.obj); + emit_asm(" add rax, "); emit_num(offset); emit_asm("\n"); } else if (node.typ == AST_DEREF) { generate_expr_into_rax(node.d.unary); } else { @@ -102,12 +127,12 @@ fn generate_expr_into_rax(node: Node*) { let label = ++gen_label_counter; generate_expr_into_rax(node.d.conditional.cond); emit_asm(" cmp rax, 0\n"); - emit_asm(" je .cond_els"); emit_num(label); emit_asm("\n"); + emit_asm(" je .cond_else_"); emit_num(label); emit_asm("\n"); generate_expr_into_rax(node.d.conditional.then); - emit_asm(" jmp .cond"); emit_num(label); emit_asm("\n"); - emit_asm(".cond_els"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .cond_end_"); emit_num(label); emit_asm("\n"); + emit_asm(".cond_else_"); emit_num(label); emit_asm(":\n"); generate_expr_into_rax(node.d.conditional.els); - emit_asm(".cond"); emit_num(label); emit_asm(":\n"); + emit_asm(".cond_end_"); emit_num(label); emit_asm(":\n"); } else if (node.typ == AST_PLUS) { generate_expr_into_rax(node.d.binary.rhs); @@ -242,41 +267,46 @@ fn generate_expr_into_rax(node: Node*) { generate_expr_into_rax(node.d.binary.lhs,); // If left is true, we can short-circuit emit_asm(" cmp rax, 0\n"); - emit_asm(" je .or_r"); emit_num(label); emit_asm("\n"); + emit_asm(" je .or_right_"); emit_num(label); emit_asm("\n"); emit_asm(" mov rax, 1\n"); - emit_asm(" jmp .or_e"); emit_num(label); emit_asm("\n"); - emit_asm(".or_r"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .or_end_"); emit_num(label); emit_asm("\n"); + emit_asm(".or_right_"); emit_num(label); emit_asm(":\n"); generate_expr_into_rax(node.d.binary.rhs); // Booleanize the result emit_asm(" cmp rax, 0\n"); emit_asm(" setne al\n"); - emit_asm(".or_e"); emit_num(label); emit_asm(":\n"); + emit_asm(".or_end_"); emit_num(label); emit_asm(":\n"); } else if (node.typ == AST_AND) { let label = ++gen_label_counter; generate_expr_into_rax(node.d.binary.lhs); // If left is false, we can short-circuit emit_asm(" cmp rax, 0\n"); - emit_asm(" jne .and_r"); emit_num(label); emit_asm("\n"); + emit_asm(" jne .and_right_"); emit_num(label); emit_asm("\n"); emit_asm(" mov rax, 0\n"); - emit_asm(" jmp .and_e"); emit_num(label); emit_asm("\n"); - emit_asm(".and_r"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .and_end_"); emit_num(label); emit_asm("\n"); + emit_asm(".and_right_"); emit_num(label); emit_asm(":\n"); generate_expr_into_rax(node.d.binary.rhs); // Booleanize the result emit_asm(" cmp rax, 0\n"); emit_asm(" setne al\n"); - emit_asm(".and_e"); emit_num(label); emit_asm(":\n"); + emit_asm(".and_end_"); emit_num(label); emit_asm(":\n"); } else if (is_lvalue(node.typ)) { generate_lvalue_into_rax(node); - emit_asm(" mov rax, [rax]\n"); + if (size_for_type(node.etyp) == 8) { + emit_asm(" mov rax, [rax]\n"); + } else { + emit_asm3(" movsx rax, ", specifier_for_type(node.etyp), " [rax]\n"); + } } else if (node.typ == AST_ASSIGN) { - generate_lvalue_into_rax(node.d.assign.lhs); + let var = node.d.assign.lhs; + generate_lvalue_into_rax(var); emit_asm(" push rax\n"); generate_expr_into_rax(node.d.assign.rhs); emit_asm(" pop rbx\n"); - emit_asm(" mov [rbx], rax\n"); + emit_asm3(" mov [rbx], ", subregister_for_type(var.etyp), "\n"); } else if (node.typ == AST_FUNCCALL) { generate_function_call(node); @@ -290,7 +320,16 @@ fn generate_block(node: Node*); fn generate_statement(node: Node*) { if (node.typ == AST_RETURN) { - generate_expr_into_rax(node.d.unary); + if (node.d.unary) { + generate_expr_into_rax(node.d.unary); + } else { + emit_asm(" xor rax, rax\n"); // Default to 0 + } + + emit_asm(" push rax\n"); + // TODO: Undo the defer stack here, this is for consistency with the C implementation for now. + emit_asm(" pop rax\n"); + emit_asm(" mov rsp, rbp\n"); emit_asm(" pop rbp\n"); emit_asm(" ret\n"); @@ -305,40 +344,48 @@ fn generate_statement(node: Node*) { } else if (node.typ == AST_IF) { let label = ++gen_label_counter; generate_expr_into_rax(node.d.conditional.cond); - emit_asm(" cmp rax, 0\n"); - emit_asm(" je .els"); emit_num(label); emit_asm("\n"); - generate_statement(node.d.conditional.then); - emit_asm(" jmp .if"); emit_num(label); emit_asm("\n"); - emit_asm(".els"); emit_num(label); emit_asm(":\n"); - if (node.d.conditional.els) + // If we don't have an `else` clause, we can simplify + if (node.d.conditional.els == null) { + emit_asm(" cmp rax, 0\n"); + emit_asm(" je .if_end_"); emit_num(label); emit_asm("\n"); + generate_statement(node.d.conditional.then); + emit_asm(".if_end_"); emit_num(label); emit_asm(":\n"); + } else { + emit_asm(" cmp rax, 0\n"); + emit_asm(" je .if_else_"); emit_num(label); emit_asm("\n"); + generate_statement(node.d.conditional.then); + emit_asm(" jmp .if_end_"); emit_num(label); emit_asm("\n"); + emit_asm(".if_else_"); emit_num(label); emit_asm(":\n"); generate_statement(node.d.conditional.els); - emit_asm(".if"); emit_num(label); emit_asm(":\n"); - + emit_asm(".if_end_"); emit_num(label); emit_asm(":\n"); + } } else if (node.typ == AST_WHILE) { let label = ++gen_label_counter; - emit_asm(".loop_s"); emit_num(label); emit_asm(":\n"); + emit_asm(".loop_start_"); emit_num(label); emit_asm(":\n"); + emit_asm(".loop_continue_"); emit_num(label); emit_asm(":\n"); generate_expr_into_rax(node.d.looop.cond); emit_asm(" cmp rax, 0\n"); - emit_asm(" je .loop_e"); emit_num(label); emit_asm("\n"); + emit_asm(" je .loop_end_"); emit_num(label); emit_asm("\n"); generate_statement(node.d.looop.body); - emit_asm(" jmp .loop_s"); emit_num(label); emit_asm("\n"); - emit_asm(".loop_e"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .loop_start_"); emit_num(label); emit_asm("\n"); + emit_asm(".loop_end_"); emit_num(label); emit_asm(":\n"); } else if (node.typ == AST_FOR) { let label = ++gen_label_counter; if (node.d.looop.init) generate_statement(node.d.looop.init); - emit_asm(".loop_s"); emit_num(label); emit_asm(":\n"); + emit_asm(".loop_start_"); emit_num(label); emit_asm(":\n"); if (node.d.looop.cond) { generate_expr_into_rax(node.d.looop.cond); emit_asm(" cmp rax, 0\n"); - emit_asm(" je .loop_e"); emit_num(label); emit_asm("\n"); + emit_asm(" je .loop_end_"); emit_num(label); emit_asm("\n"); } generate_statement(node.d.looop.body); + emit_asm(".loop_continue_"); emit_num(label); emit_asm(":\n"); if (node.d.looop.step) generate_statement(node.d.looop.step); - emit_asm(" jmp .loop_s"); emit_num(label); emit_asm("\n"); - emit_asm(".loop_e"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .loop_start_"); emit_num(label); emit_asm("\n"); + emit_asm(".loop_end_"); emit_num(label); emit_asm(":\n"); } else { // Default to a simple expression statement @@ -354,6 +401,10 @@ fn generate_block(node: Node*) { } fn generate_function(node: Node*) { + // Skip declarations + if (node.d.func.body == null) + return; + emit_asm3("global func_", node.d.func.name, "\n"); emit_asm3("func_", node.d.func.name, ":\n"); emit_asm(" push rbp\n"); @@ -364,6 +415,8 @@ fn generate_function(node: Node*) { emit_asm(" mov rsp, rbp\n"); emit_asm(" pop rbp\n"); + // Return 0 by default if we don't have a return statement + emit_asm(" mov qword rax, 0\n"); emit_asm(" ret\n"); } @@ -409,11 +462,12 @@ fn generate_program(ast: Node*, file: File*) { for (let i = 0; i < n; ++i) { let node: Node* = ast.d.block.children.data[i]; if (node.typ == AST_VARDECL && node.d.var_decl.init) { + let expr = node.d.var_decl.init; generate_expr_into_rax(node.d.var_decl.init); let offset = node.d.var_decl.var.offset; emit_asm(" mov rbx, global_vars\n"); emit_asm(" add rbx, "); emit_num(offset); emit_asm("\n"); - emit_asm(" mov [rbx], rax\n"); + emit_asm3(" mov [rbx], ", subregister_for_type(expr.etyp), "\n"); } } @@ -436,6 +490,6 @@ fn generate_program(ast: Node*, file: File*) { emit_asm_char('`'); emit_asm(gen_string_literals.data[i]); emit_asm_char('`'); - emit_asm("\n"); + emit_asm(", 0\n"); } }
\ No newline at end of file diff --git a/compiler/lexer.cup b/compiler/lexer.cup index 50b06e2..9e967f8 100644 --- a/compiler/lexer.cup +++ b/compiler/lexer.cup @@ -19,16 +19,11 @@ fn lexer_new(filename: char*, src: char*, len: int): Lexer* { } fn lexer_new_open_file(filename: char*): Lexer* { - let input_file = fopen(filename, 'r'); - defer fclose(input_file); - - // using `fmap` here doesn't work on linux, for some reason. - let file_size = fsize(input_file); - let src: char* = malloc(file_size+1); - fread(input_file, src, file_size); - src[file_size] = '\0'; - - return lexer_new(filename, src, file_size); + let file = fopen(filename, 'r'); + let size = 0; + let source = fread_to_string(file, &size); + fclose(file); + return lexer_new(filename, source, size); } fn lexer_loc(lexer: Lexer*, loc: Location*) { diff --git a/compiler/main.cup b/compiler/main.cup index fa7db31..7a4ae0c 100644 --- a/compiler/main.cup +++ b/compiler/main.cup @@ -5,23 +5,80 @@ import "compiler/builtins.cup" import "compiler/parser.cup" import "compiler/codegen.cup" +fn print_usage_and_exit(name: char*, status: int) { + puts("Usage: "); puts(name); putsln(" [options] <file>"); + putsln("Options:"); + putsln(" -c <code> Code to compile"); + putsln(" -h Show this help"); + putsln(" -o <file> Output file"); + putsln(" -d Dump AST to stdout"); + putsln("Output file will be named 'build/host.nasm' by default"); + exit(status); +} + +// FIXME: Allow variables functions to be static to a file? +// we can avoid the prefixes then. +const MAX_STDIN_SOURCE_LEN = 4096; +let m_filename: char*; +let m_outut_filename = "build/host.nasm"; +let m_source: char*; +let m_source_len = 0; +let m_dump_ast = false; + +fn parse_cli_args(argc: int, argv: char**) { + for (let i = 1; i < argc; ++i) { + if (streq(argv[i], "-c")) { + m_source_len = strlen(argv[i+1]); + m_source = argv[i+1]; + ++i; + m_filename = "CLI"; + } else if (streq(argv[i], "-h")) { + print_usage_and_exit(argv[0], 0); + } else if (streq(argv[i], "-d")) { + m_dump_ast = true; + } else if (streq(argv[i], "-o")) { + ++i; + m_outut_filename = argv[i]; + } else if (m_filename == null) { + if (streq(argv[i], "-")) { + m_filename = "stdin"; + m_source = malloc(MAX_STDIN_SOURCE_LEN); + m_source_len = read(0, m_source, MAX_STDIN_SOURCE_LEN); + m_source[m_source_len] = 0; + if (m_source_len == MAX_STDIN_SOURCE_LEN) { + putsln("Source too long to use through stdin"); + exit(1); + } + } else { + m_filename = argv[i]; + let file = fopen(m_filename, 'r'); + m_source = fread_to_string(file, &m_source_len); + fclose(file); + } + } else { + print_usage_and_exit(argv[0], 1); + } + } + if (m_filename == null) { + print_usage_and_exit(argv[0], 1); + } +} + fn main(argc: int, argv: char **): int { - if (argc != 2) - die("Usage: cupcc <input_file>"); + parse_cli_args(argc, argv); - let lexer = lexer_new_open_file(argv[1]); + let lexer = lexer_new(m_filename, m_source, m_source_len); let ast = parse_program(lexer); - dump_ast(ast, 0); - - let out_file = fopen("build/host.nasm", 'w'); - defer fclose(out_file); + if (m_dump_ast) + dump_ast(ast, 0); + let out_file = fopen(m_outut_filename, 'w'); generate_program(ast, out_file); + fclose(out_file); // TODO: Defer on self-host - puts("---------------------------\n"); - - puts("Total amount of memory used by malloc: "); - putu(__malloc_buf_pos); - putsln("\nDone."); + // puts("---------------------------\n"); + // puts("Total amount of memory used by malloc: "); + // putu(__malloc_buf_pos); + // putsln("\nDone."); }
\ No newline at end of file diff --git a/compiler/parser.cup b/compiler/parser.cup index 8f02c57..d57885e 100644 --- a/compiler/parser.cup +++ b/compiler/parser.cup @@ -425,8 +425,32 @@ fn parse_factor(lexer: Lexer*): Node* { lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACKET); } else if (token.typ == TOKEN_DOT) { - die_loc(here, &token.loc, "Member access not implemented"); + lexer_next_assert(lexer, &token, TOKEN_DOT); + if (!is_struct_or_structptr(expr.etyp)) { + putsln(create_type_string(expr.etyp)); + die_loc(here, &token.loc, "Cannot access member of non-struct type"); + } + + let is_ptr = expr.etyp.typ == TYPE_PTR; + let struct_type = is_ptr ? expr.etyp.ptr : expr.etyp; + + lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); + let name = token.value.as_string; + let field = compound_find_field(struct_type, name); + + if (field == null) { + puts("Struct type: "); putsln(create_type_string(struct_type)); + puts("Field name: "); putsln(name); + die_loc(here, &token.loc, "Invalid field name for struct"); + } + + let member = node_new(AST_MEMBER); + member.etyp = field.typ; + member.d.member.obj = expr; + member.d.member.offset = field.offset; + member.d.member.is_ptr = (expr.etyp.typ == TYPE_PTR); + expr = decay_array_to_pointer(member, &token); } else { running = false; } @@ -686,7 +710,16 @@ fn parse_var_declaration(lexer: Lexer*): Node* { if (token.typ == TOKEN_ASSIGN) { lexer_next(lexer, &token); decl.init = parse_expression(lexer); - decl.var.typ = decl.init.etyp; + + if (missing_type) { + decl.var.typ = decl.init.etyp; + } else if (!is_convertible(decl.var.typ, decl.init.etyp)) { + puts("- Variable type: "); putsln(create_type_string(decl.var.typ)); + puts("- Value type: "); putsln(create_type_string(decl.init.etyp)); + die_loc2(here, &token.loc, "Type mismatch for variable declaration: ", decl.var.name); + } + + node.etyp = decl.init.etyp; } else if (missing_type) { die_loc(here, &token.loc, "Expected ':' or '=' after variable declaration"); } @@ -849,7 +882,7 @@ fn parse_statement(lexer: Lexer*): Node* { node = parse_for_loop(lexer); } else if (token.typ == TOKEN_DEFER) { - die("defer is not implemented yet"); + die_loc(here, &token.loc, "defer is not implemented yet"); } else if (token.typ == TOKEN_LET) { node = parse_var_declaration(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); @@ -882,6 +915,114 @@ fn parse_block(lexer: Lexer*): Node* { return block; } +// FIXME: Make this a real type +fn parse_enum_declaration(lexer: Lexer*) { + let token: Token; + // TODO: This is all a hack to automatically number + // Some constants. It does not behave like a type, + // and cannot be used as one. Fix this in the future. + lexer_next_assert(lexer, &token, TOKEN_ENUM); + lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // Use this! + lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE); + + let enum_count = 0; + lexer_peek(lexer, &token); + while (token.typ != TOKEN_CLOSE_BRACE) { + lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); + + if (identifier_exists(&token)) + die_loc(here, &token.loc, "Identifier already exists, enums just behave like numbered constants."); + + constant_push(token.value.as_string, enum_count); + ++enum_count; + + lexer_peek(lexer, &token); + if (token.typ == TOKEN_COMMA) { + lexer_next(lexer, &token); + lexer_peek(lexer, &token); + } else if (token.typ != TOKEN_CLOSE_BRACE) { + die_loc(here, &token.loc, "Expected a comma or a closing brace."); + } + } + lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE); +} + +// FIXME: This should just be part of `parse_type()`, and we should be allowed +// to parse a type without a name. Probably also need to handle converstions +// between structs with similar embedded types. +fn parse_struct_union_declaration(lexer: Lexer*, top_level: int): Type* { + let token: Token; + lexer_next(lexer, &token); + + if (token.typ != TOKEN_STRUCT && token.typ != TOKEN_UNION) + die_loc(here, &token.loc, "Expected STRUCT or UNION in parse_struct_union_declaration"); + + let compound = type_new(token.typ == TOKEN_STRUCT ? TYPE_STRUCT : TYPE_UNION); + compound.fields = vector_new(); + + lexer_peek(lexer, &token); + + // For nested temporary structs we don't need a name + if (token.typ != TOKEN_IDENTIFIER && top_level) + die_loc(here, &token.loc, "You need to specify a name for the struct defined globally."); + + // But if they do provide one, we'll add it to the list of defined structs so they + // it can referenced internally. + if (token.typ == TOKEN_IDENTIFIER) { + compound.struct_name = token.value.as_string; + vector_push(p_compound_type_stack, compound); + lexer_next(lexer, &token); + } else { + compound.struct_name = "<anonymous>"; + } + + lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE); + + lexer_peek(lexer, &token); + while (token.typ != TOKEN_CLOSE_BRACE) { + // TODO: Allow no-name fields + lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); + let name = token.value.as_string; + + lexer_next_assert(lexer, &token, TOKEN_COLON); + lexer_peek(lexer, &token); + + // We want to allow nested temporary structs. + let typ: Type*; + if (token.typ == TOKEN_STRUCT || token.typ == TOKEN_UNION) { + // Nested structs live in their own "namespace", can't be accessed + // from outside, so we will pop them off the stack once done. + let prev_compound_count = p_compound_type_stack.size; + typ = parse_struct_union_declaration(lexer, false); + p_compound_type_stack.size = prev_compound_count; + + } else { + typ = parse_type(lexer); + } + + compound_push_field(compound, name, typ); + lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); + lexer_peek(lexer, &token); + } + lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE); + + // printf("Defined %s: %s, size: %lld\n", + // compound.type == TYPE_UNION ? "union":"struct", + // compound.struct_name, + // compound.fields.size + // ); + // for (int i = 0; i < compound.fields.num_fields; i++) { + // printf("\t%s: %s (offset: %lld, size: %lld)\n", + // compound.fields.name[i], + // type_to_str(compound.fields.typ[i]), + // compound.fields.offset[i], + // size_for_type(compound.fields.typ[i]) + // ); + // } + + return compound; +} + fn parse_function(lexer: Lexer*): Node* { let token: Token; @@ -889,28 +1030,53 @@ fn parse_function(lexer: Lexer*): Node* { lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // TODO: Check if identifier exists - let node = node_new(AST_FUNC); - node.d.func.name = token.value.as_string; + let func = node_new(AST_FUNC); + let dfunc = func; + func.d.func.name = token.value.as_string; - vector_push(p_all_functions, node); - p_current_function = node; + // If the identifier exists, there's 3 possible cases: + // 1. It's another variable / struct, which is an error. + // 2. It's a function that's been defined, which is an error. + // 3. It's a function that's been declared (but not defined), which is OK + if (identifier_exists(&token)) { + dfunc = find_function_definition(&token); + // Case 1 + if (dfunc == null) + die_loc(here, &token.loc, "Function name already exists as an identifier"); + // Case 2 + if (dfunc.d.func.is_defined) + die_loc(here, &token.loc, "Function already defined earlier"); + + // Case 3 (No error, just set the current function correctly) + p_current_function = func; + } else { + // We don't have a declaration yet, push this. + vector_push(p_all_functions, func); + p_current_function = func; + } lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); - parse_function_params(lexer, node); + parse_function_params(lexer, func); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); lexer_peek(lexer, &token); if (token.typ == TOKEN_COLON) { lexer_next(lexer, &token); - node.etyp = parse_type(lexer); + func.etyp = parse_type(lexer); } else { - node.etyp = type_new(TYPE_VOID); + func.etyp = type_new(TYPE_VOID); } - node.d.func.body = parse_block(lexer); + lexer_peek(lexer, &token); + if (token.typ == TOKEN_OPEN_BRACE) { + func.d.func.body = parse_block(lexer); + func.d.func.is_defined = true; + } else { + func.d.func.is_defined = false; + } p_current_function = null; - return node; + return func; } let p_opened_files = vector_new(); @@ -925,7 +1091,7 @@ fn parser_open_new_file(path: char*) { } for (let i = 0; i < p_opened_files.size; i = i + 1) { if (streq(p_opened_files.data[i], path)) { - puts("Already opened file: "); puts(path); putsln(": Ignoring."); + // Already opened this file, ignore return; } } @@ -958,6 +1124,10 @@ fn parse_program(lexer: Lexer*): Node* { let path = token.value.as_string; parser_open_new_file(path); lexer = vector_top(p_lexer_stack); + } else if (token.typ == TOKEN_STRUCT || token.typ == TOKEN_UNION) { + parse_struct_union_declaration(lexer, true); + } else if (token.typ == TOKEN_ENUM) { + parse_enum_declaration(lexer); } else { die_loc2(here, &token.loc, "unexpected token in parse_program: ", token_type_to_string(token.typ)); } diff --git a/compiler/types.cup b/compiler/types.cup index 10a2d6f..9b0d822 100644 --- a/compiler/types.cup +++ b/compiler/types.cup @@ -1,4 +1,5 @@ import "std/common.cup" +import "std/vector.cup" enum BaseType { TYPE_VOID, @@ -19,11 +20,7 @@ struct Type { struct_name: char*; size: int; array_size: int; - fields: struct { - names: char**; - types: Type**; - num_fields: int; - }; + fields: Vector*; // Vector<Variable*>; }; fn size_for_base_type(type: int): int { @@ -86,7 +83,8 @@ fn create_type_string(typ: Type *): char* { else if (typ.typ == TYPE_CHAR) strcat(buf, "char"); else if (typ.typ == TYPE_VOID) strcat(buf, "void"); else if (typ.typ == TYPE_ANY) strcat(buf, "any"); - else die("type_to_string: unknown type"); + else if (typ.typ == TYPE_STRUCT) strcat(buf, typ.struct_name); + else die("create_type_string: unknown type"); return buf; } |