diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/ast.cup | 28 | ||||
| -rw-r--r-- | compiler/codegen.cup | 124 | ||||
| -rw-r--r-- | compiler/lexer.cup | 15 | ||||
| -rw-r--r-- | compiler/main.cup | 81 | ||||
| -rw-r--r-- | compiler/parser.cup | 196 | ||||
| -rw-r--r-- | compiler/types.cup | 10 |
6 files changed, 378 insertions, 76 deletions
diff --git a/compiler/ast.cup b/compiler/ast.cup index b452241..2eb96f0 100644 --- a/compiler/ast.cup +++ b/compiler/ast.cup @@ -75,6 +75,7 @@ struct Node { body: Node *; max_locals_size: int; args: Vector *; // Vector<Variable> + is_defined: int; }; block: struct { @@ -462,4 +463,31 @@ fn type_check_binary(node: Node*, token: Token*): Node* node.etyp = type_new(TYPE_INT); } return node; +} + +// FIXME: These should be in `types.cup` ideally, but `Variable` is not defined +// there and we can't forward-declare types. +fn compound_push_field(compound: Type*, name: char*, typ: Type*): int { + if (compound.typ != TYPE_STRUCT && compound.typ != TYPE_UNION) + die("compound_push_field: not a compound type"); + + let is_union = compound.typ == TYPE_UNION; + + let field_size = size_for_type(typ); + let offset_factor = min(field_size, 8); + let offset = is_union ? 0 : align_up(compound.size, offset_factor); + compound.size = is_union ? max(field_size, compound.size) : offset + field_size; + + vector_push(compound.fields, variable_new(name, typ, offset)); + return offset; +} + +fn compound_find_field(typ: Type*, name: char*): Variable* { + for (let i = 0; i < typ.fields.size; ++i) { + let field: Variable* = typ.fields.data[i]; + if (streq(field.name, name)) { + return field; + } + } + return null; }
\ No newline at end of file diff --git a/compiler/codegen.cup b/compiler/codegen.cup index 3d52397..c18500f 100644 --- a/compiler/codegen.cup +++ b/compiler/codegen.cup @@ -3,7 +3,7 @@ import "compiler/ast.cup" import "std/file.cup" let gen_out_file: File*; -let gen_label_counter = 0; +let gen_label_counter = -1; // So the labels start at 0 let gen_string_literals = vector_new(); @@ -49,6 +49,24 @@ fn generate_syscall(num: int) { emit_asm(" syscall\n"); } +fn subregister_for_type(typ: Type*): char* { + let n = size_for_type(typ); + if (n == 1) return "al"; + if (n == 2) return "ax"; + if (n == 4) return "eax"; + if (n == 8) return "rax"; + die2(here, "Unsupported type size"); +} + +fn specifier_for_type(typ: Type*): char* { + let n = size_for_type(typ); + if (n == 1) return "byte"; + if (n == 2) return "word"; + if (n == 4) return "dword"; + if (n == 8) return "qword"; + die2(here, "Unsupported type size"); +} + fn generate_expr_into_rax(node: Node*); fn generate_lvalue_into_rax(node: Node*) { @@ -60,6 +78,13 @@ fn generate_lvalue_into_rax(node: Node*) { let offset = node.d.variable.offset; emit_asm(" mov rax, global_vars\n"); emit_asm(" add rax, "); emit_num(offset); emit_asm("\n"); + } else if (node.typ == AST_MEMBER) { + let offset = node.d.member.offset; + if (node.d.member.is_ptr) + generate_expr_into_rax(node.d.member.obj); + else + generate_lvalue_into_rax(node.d.member.obj); + emit_asm(" add rax, "); emit_num(offset); emit_asm("\n"); } else if (node.typ == AST_DEREF) { generate_expr_into_rax(node.d.unary); } else { @@ -102,12 +127,12 @@ fn generate_expr_into_rax(node: Node*) { let label = ++gen_label_counter; generate_expr_into_rax(node.d.conditional.cond); emit_asm(" cmp rax, 0\n"); - emit_asm(" je .cond_els"); emit_num(label); emit_asm("\n"); + emit_asm(" je .cond_else_"); emit_num(label); emit_asm("\n"); generate_expr_into_rax(node.d.conditional.then); - emit_asm(" jmp .cond"); emit_num(label); emit_asm("\n"); - emit_asm(".cond_els"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .cond_end_"); emit_num(label); emit_asm("\n"); + emit_asm(".cond_else_"); emit_num(label); emit_asm(":\n"); generate_expr_into_rax(node.d.conditional.els); - emit_asm(".cond"); emit_num(label); emit_asm(":\n"); + emit_asm(".cond_end_"); emit_num(label); emit_asm(":\n"); } else if (node.typ == AST_PLUS) { generate_expr_into_rax(node.d.binary.rhs); @@ -242,41 +267,46 @@ fn generate_expr_into_rax(node: Node*) { generate_expr_into_rax(node.d.binary.lhs,); // If left is true, we can short-circuit emit_asm(" cmp rax, 0\n"); - emit_asm(" je .or_r"); emit_num(label); emit_asm("\n"); + emit_asm(" je .or_right_"); emit_num(label); emit_asm("\n"); emit_asm(" mov rax, 1\n"); - emit_asm(" jmp .or_e"); emit_num(label); emit_asm("\n"); - emit_asm(".or_r"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .or_end_"); emit_num(label); emit_asm("\n"); + emit_asm(".or_right_"); emit_num(label); emit_asm(":\n"); generate_expr_into_rax(node.d.binary.rhs); // Booleanize the result emit_asm(" cmp rax, 0\n"); emit_asm(" setne al\n"); - emit_asm(".or_e"); emit_num(label); emit_asm(":\n"); + emit_asm(".or_end_"); emit_num(label); emit_asm(":\n"); } else if (node.typ == AST_AND) { let label = ++gen_label_counter; generate_expr_into_rax(node.d.binary.lhs); // If left is false, we can short-circuit emit_asm(" cmp rax, 0\n"); - emit_asm(" jne .and_r"); emit_num(label); emit_asm("\n"); + emit_asm(" jne .and_right_"); emit_num(label); emit_asm("\n"); emit_asm(" mov rax, 0\n"); - emit_asm(" jmp .and_e"); emit_num(label); emit_asm("\n"); - emit_asm(".and_r"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .and_end_"); emit_num(label); emit_asm("\n"); + emit_asm(".and_right_"); emit_num(label); emit_asm(":\n"); generate_expr_into_rax(node.d.binary.rhs); // Booleanize the result emit_asm(" cmp rax, 0\n"); emit_asm(" setne al\n"); - emit_asm(".and_e"); emit_num(label); emit_asm(":\n"); + emit_asm(".and_end_"); emit_num(label); emit_asm(":\n"); } else if (is_lvalue(node.typ)) { generate_lvalue_into_rax(node); - emit_asm(" mov rax, [rax]\n"); + if (size_for_type(node.etyp) == 8) { + emit_asm(" mov rax, [rax]\n"); + } else { + emit_asm3(" movsx rax, ", specifier_for_type(node.etyp), " [rax]\n"); + } } else if (node.typ == AST_ASSIGN) { - generate_lvalue_into_rax(node.d.assign.lhs); + let var = node.d.assign.lhs; + generate_lvalue_into_rax(var); emit_asm(" push rax\n"); generate_expr_into_rax(node.d.assign.rhs); emit_asm(" pop rbx\n"); - emit_asm(" mov [rbx], rax\n"); + emit_asm3(" mov [rbx], ", subregister_for_type(var.etyp), "\n"); } else if (node.typ == AST_FUNCCALL) { generate_function_call(node); @@ -290,7 +320,16 @@ fn generate_block(node: Node*); fn generate_statement(node: Node*) { if (node.typ == AST_RETURN) { - generate_expr_into_rax(node.d.unary); + if (node.d.unary) { + generate_expr_into_rax(node.d.unary); + } else { + emit_asm(" xor rax, rax\n"); // Default to 0 + } + + emit_asm(" push rax\n"); + // TODO: Undo the defer stack here, this is for consistency with the C implementation for now. + emit_asm(" pop rax\n"); + emit_asm(" mov rsp, rbp\n"); emit_asm(" pop rbp\n"); emit_asm(" ret\n"); @@ -305,40 +344,48 @@ fn generate_statement(node: Node*) { } else if (node.typ == AST_IF) { let label = ++gen_label_counter; generate_expr_into_rax(node.d.conditional.cond); - emit_asm(" cmp rax, 0\n"); - emit_asm(" je .els"); emit_num(label); emit_asm("\n"); - generate_statement(node.d.conditional.then); - emit_asm(" jmp .if"); emit_num(label); emit_asm("\n"); - emit_asm(".els"); emit_num(label); emit_asm(":\n"); - if (node.d.conditional.els) + // If we don't have an `else` clause, we can simplify + if (node.d.conditional.els == null) { + emit_asm(" cmp rax, 0\n"); + emit_asm(" je .if_end_"); emit_num(label); emit_asm("\n"); + generate_statement(node.d.conditional.then); + emit_asm(".if_end_"); emit_num(label); emit_asm(":\n"); + } else { + emit_asm(" cmp rax, 0\n"); + emit_asm(" je .if_else_"); emit_num(label); emit_asm("\n"); + generate_statement(node.d.conditional.then); + emit_asm(" jmp .if_end_"); emit_num(label); emit_asm("\n"); + emit_asm(".if_else_"); emit_num(label); emit_asm(":\n"); generate_statement(node.d.conditional.els); - emit_asm(".if"); emit_num(label); emit_asm(":\n"); - + emit_asm(".if_end_"); emit_num(label); emit_asm(":\n"); + } } else if (node.typ == AST_WHILE) { let label = ++gen_label_counter; - emit_asm(".loop_s"); emit_num(label); emit_asm(":\n"); + emit_asm(".loop_start_"); emit_num(label); emit_asm(":\n"); + emit_asm(".loop_continue_"); emit_num(label); emit_asm(":\n"); generate_expr_into_rax(node.d.looop.cond); emit_asm(" cmp rax, 0\n"); - emit_asm(" je .loop_e"); emit_num(label); emit_asm("\n"); + emit_asm(" je .loop_end_"); emit_num(label); emit_asm("\n"); generate_statement(node.d.looop.body); - emit_asm(" jmp .loop_s"); emit_num(label); emit_asm("\n"); - emit_asm(".loop_e"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .loop_start_"); emit_num(label); emit_asm("\n"); + emit_asm(".loop_end_"); emit_num(label); emit_asm(":\n"); } else if (node.typ == AST_FOR) { let label = ++gen_label_counter; if (node.d.looop.init) generate_statement(node.d.looop.init); - emit_asm(".loop_s"); emit_num(label); emit_asm(":\n"); + emit_asm(".loop_start_"); emit_num(label); emit_asm(":\n"); if (node.d.looop.cond) { generate_expr_into_rax(node.d.looop.cond); emit_asm(" cmp rax, 0\n"); - emit_asm(" je .loop_e"); emit_num(label); emit_asm("\n"); + emit_asm(" je .loop_end_"); emit_num(label); emit_asm("\n"); } generate_statement(node.d.looop.body); + emit_asm(".loop_continue_"); emit_num(label); emit_asm(":\n"); if (node.d.looop.step) generate_statement(node.d.looop.step); - emit_asm(" jmp .loop_s"); emit_num(label); emit_asm("\n"); - emit_asm(".loop_e"); emit_num(label); emit_asm(":\n"); + emit_asm(" jmp .loop_start_"); emit_num(label); emit_asm("\n"); + emit_asm(".loop_end_"); emit_num(label); emit_asm(":\n"); } else { // Default to a simple expression statement @@ -354,6 +401,10 @@ fn generate_block(node: Node*) { } fn generate_function(node: Node*) { + // Skip declarations + if (node.d.func.body == null) + return; + emit_asm3("global func_", node.d.func.name, "\n"); emit_asm3("func_", node.d.func.name, ":\n"); emit_asm(" push rbp\n"); @@ -364,6 +415,8 @@ fn generate_function(node: Node*) { emit_asm(" mov rsp, rbp\n"); emit_asm(" pop rbp\n"); + // Return 0 by default if we don't have a return statement + emit_asm(" mov qword rax, 0\n"); emit_asm(" ret\n"); } @@ -409,11 +462,12 @@ fn generate_program(ast: Node*, file: File*) { for (let i = 0; i < n; ++i) { let node: Node* = ast.d.block.children.data[i]; if (node.typ == AST_VARDECL && node.d.var_decl.init) { + let expr = node.d.var_decl.init; generate_expr_into_rax(node.d.var_decl.init); let offset = node.d.var_decl.var.offset; emit_asm(" mov rbx, global_vars\n"); emit_asm(" add rbx, "); emit_num(offset); emit_asm("\n"); - emit_asm(" mov [rbx], rax\n"); + emit_asm3(" mov [rbx], ", subregister_for_type(expr.etyp), "\n"); } } @@ -436,6 +490,6 @@ fn generate_program(ast: Node*, file: File*) { emit_asm_char('`'); emit_asm(gen_string_literals.data[i]); emit_asm_char('`'); - emit_asm("\n"); + emit_asm(", 0\n"); } }
\ No newline at end of file diff --git a/compiler/lexer.cup b/compiler/lexer.cup index 50b06e2..9e967f8 100644 --- a/compiler/lexer.cup +++ b/compiler/lexer.cup @@ -19,16 +19,11 @@ fn lexer_new(filename: char*, src: char*, len: int): Lexer* { } fn lexer_new_open_file(filename: char*): Lexer* { - let input_file = fopen(filename, 'r'); - defer fclose(input_file); - - // using `fmap` here doesn't work on linux, for some reason. - let file_size = fsize(input_file); - let src: char* = malloc(file_size+1); - fread(input_file, src, file_size); - src[file_size] = '\0'; - - return lexer_new(filename, src, file_size); + let file = fopen(filename, 'r'); + let size = 0; + let source = fread_to_string(file, &size); + fclose(file); + return lexer_new(filename, source, size); } fn lexer_loc(lexer: Lexer*, loc: Location*) { diff --git a/compiler/main.cup b/compiler/main.cup index fa7db31..7a4ae0c 100644 --- a/compiler/main.cup +++ b/compiler/main.cup @@ -5,23 +5,80 @@ import "compiler/builtins.cup" import "compiler/parser.cup" import "compiler/codegen.cup" +fn print_usage_and_exit(name: char*, status: int) { + puts("Usage: "); puts(name); putsln(" [options] <file>"); + putsln("Options:"); + putsln(" -c <code> Code to compile"); + putsln(" -h Show this help"); + putsln(" -o <file> Output file"); + putsln(" -d Dump AST to stdout"); + putsln("Output file will be named 'build/host.nasm' by default"); + exit(status); +} + +// FIXME: Allow variables functions to be static to a file? +// we can avoid the prefixes then. +const MAX_STDIN_SOURCE_LEN = 4096; +let m_filename: char*; +let m_outut_filename = "build/host.nasm"; +let m_source: char*; +let m_source_len = 0; +let m_dump_ast = false; + +fn parse_cli_args(argc: int, argv: char**) { + for (let i = 1; i < argc; ++i) { + if (streq(argv[i], "-c")) { + m_source_len = strlen(argv[i+1]); + m_source = argv[i+1]; + ++i; + m_filename = "CLI"; + } else if (streq(argv[i], "-h")) { + print_usage_and_exit(argv[0], 0); + } else if (streq(argv[i], "-d")) { + m_dump_ast = true; + } else if (streq(argv[i], "-o")) { + ++i; + m_outut_filename = argv[i]; + } else if (m_filename == null) { + if (streq(argv[i], "-")) { + m_filename = "stdin"; + m_source = malloc(MAX_STDIN_SOURCE_LEN); + m_source_len = read(0, m_source, MAX_STDIN_SOURCE_LEN); + m_source[m_source_len] = 0; + if (m_source_len == MAX_STDIN_SOURCE_LEN) { + putsln("Source too long to use through stdin"); + exit(1); + } + } else { + m_filename = argv[i]; + let file = fopen(m_filename, 'r'); + m_source = fread_to_string(file, &m_source_len); + fclose(file); + } + } else { + print_usage_and_exit(argv[0], 1); + } + } + if (m_filename == null) { + print_usage_and_exit(argv[0], 1); + } +} + fn main(argc: int, argv: char **): int { - if (argc != 2) - die("Usage: cupcc <input_file>"); + parse_cli_args(argc, argv); - let lexer = lexer_new_open_file(argv[1]); + let lexer = lexer_new(m_filename, m_source, m_source_len); let ast = parse_program(lexer); - dump_ast(ast, 0); - - let out_file = fopen("build/host.nasm", 'w'); - defer fclose(out_file); + if (m_dump_ast) + dump_ast(ast, 0); + let out_file = fopen(m_outut_filename, 'w'); generate_program(ast, out_file); + fclose(out_file); // TODO: Defer on self-host - puts("---------------------------\n"); - - puts("Total amount of memory used by malloc: "); - putu(__malloc_buf_pos); - putsln("\nDone."); + // puts("---------------------------\n"); + // puts("Total amount of memory used by malloc: "); + // putu(__malloc_buf_pos); + // putsln("\nDone."); }
\ No newline at end of file diff --git a/compiler/parser.cup b/compiler/parser.cup index 8f02c57..d57885e 100644 --- a/compiler/parser.cup +++ b/compiler/parser.cup @@ -425,8 +425,32 @@ fn parse_factor(lexer: Lexer*): Node* { lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACKET); } else if (token.typ == TOKEN_DOT) { - die_loc(here, &token.loc, "Member access not implemented"); + lexer_next_assert(lexer, &token, TOKEN_DOT); + if (!is_struct_or_structptr(expr.etyp)) { + putsln(create_type_string(expr.etyp)); + die_loc(here, &token.loc, "Cannot access member of non-struct type"); + } + + let is_ptr = expr.etyp.typ == TYPE_PTR; + let struct_type = is_ptr ? expr.etyp.ptr : expr.etyp; + + lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); + let name = token.value.as_string; + let field = compound_find_field(struct_type, name); + + if (field == null) { + puts("Struct type: "); putsln(create_type_string(struct_type)); + puts("Field name: "); putsln(name); + die_loc(here, &token.loc, "Invalid field name for struct"); + } + + let member = node_new(AST_MEMBER); + member.etyp = field.typ; + member.d.member.obj = expr; + member.d.member.offset = field.offset; + member.d.member.is_ptr = (expr.etyp.typ == TYPE_PTR); + expr = decay_array_to_pointer(member, &token); } else { running = false; } @@ -686,7 +710,16 @@ fn parse_var_declaration(lexer: Lexer*): Node* { if (token.typ == TOKEN_ASSIGN) { lexer_next(lexer, &token); decl.init = parse_expression(lexer); - decl.var.typ = decl.init.etyp; + + if (missing_type) { + decl.var.typ = decl.init.etyp; + } else if (!is_convertible(decl.var.typ, decl.init.etyp)) { + puts("- Variable type: "); putsln(create_type_string(decl.var.typ)); + puts("- Value type: "); putsln(create_type_string(decl.init.etyp)); + die_loc2(here, &token.loc, "Type mismatch for variable declaration: ", decl.var.name); + } + + node.etyp = decl.init.etyp; } else if (missing_type) { die_loc(here, &token.loc, "Expected ':' or '=' after variable declaration"); } @@ -849,7 +882,7 @@ fn parse_statement(lexer: Lexer*): Node* { node = parse_for_loop(lexer); } else if (token.typ == TOKEN_DEFER) { - die("defer is not implemented yet"); + die_loc(here, &token.loc, "defer is not implemented yet"); } else if (token.typ == TOKEN_LET) { node = parse_var_declaration(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); @@ -882,6 +915,114 @@ fn parse_block(lexer: Lexer*): Node* { return block; } +// FIXME: Make this a real type +fn parse_enum_declaration(lexer: Lexer*) { + let token: Token; + // TODO: This is all a hack to automatically number + // Some constants. It does not behave like a type, + // and cannot be used as one. Fix this in the future. + lexer_next_assert(lexer, &token, TOKEN_ENUM); + lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // Use this! + lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE); + + let enum_count = 0; + lexer_peek(lexer, &token); + while (token.typ != TOKEN_CLOSE_BRACE) { + lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); + + if (identifier_exists(&token)) + die_loc(here, &token.loc, "Identifier already exists, enums just behave like numbered constants."); + + constant_push(token.value.as_string, enum_count); + ++enum_count; + + lexer_peek(lexer, &token); + if (token.typ == TOKEN_COMMA) { + lexer_next(lexer, &token); + lexer_peek(lexer, &token); + } else if (token.typ != TOKEN_CLOSE_BRACE) { + die_loc(here, &token.loc, "Expected a comma or a closing brace."); + } + } + lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE); +} + +// FIXME: This should just be part of `parse_type()`, and we should be allowed +// to parse a type without a name. Probably also need to handle converstions +// between structs with similar embedded types. +fn parse_struct_union_declaration(lexer: Lexer*, top_level: int): Type* { + let token: Token; + lexer_next(lexer, &token); + + if (token.typ != TOKEN_STRUCT && token.typ != TOKEN_UNION) + die_loc(here, &token.loc, "Expected STRUCT or UNION in parse_struct_union_declaration"); + + let compound = type_new(token.typ == TOKEN_STRUCT ? TYPE_STRUCT : TYPE_UNION); + compound.fields = vector_new(); + + lexer_peek(lexer, &token); + + // For nested temporary structs we don't need a name + if (token.typ != TOKEN_IDENTIFIER && top_level) + die_loc(here, &token.loc, "You need to specify a name for the struct defined globally."); + + // But if they do provide one, we'll add it to the list of defined structs so they + // it can referenced internally. + if (token.typ == TOKEN_IDENTIFIER) { + compound.struct_name = token.value.as_string; + vector_push(p_compound_type_stack, compound); + lexer_next(lexer, &token); + } else { + compound.struct_name = "<anonymous>"; + } + + lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE); + + lexer_peek(lexer, &token); + while (token.typ != TOKEN_CLOSE_BRACE) { + // TODO: Allow no-name fields + lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); + let name = token.value.as_string; + + lexer_next_assert(lexer, &token, TOKEN_COLON); + lexer_peek(lexer, &token); + + // We want to allow nested temporary structs. + let typ: Type*; + if (token.typ == TOKEN_STRUCT || token.typ == TOKEN_UNION) { + // Nested structs live in their own "namespace", can't be accessed + // from outside, so we will pop them off the stack once done. + let prev_compound_count = p_compound_type_stack.size; + typ = parse_struct_union_declaration(lexer, false); + p_compound_type_stack.size = prev_compound_count; + + } else { + typ = parse_type(lexer); + } + + compound_push_field(compound, name, typ); + lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); + lexer_peek(lexer, &token); + } + lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE); + + // printf("Defined %s: %s, size: %lld\n", + // compound.type == TYPE_UNION ? "union":"struct", + // compound.struct_name, + // compound.fields.size + // ); + // for (int i = 0; i < compound.fields.num_fields; i++) { + // printf("\t%s: %s (offset: %lld, size: %lld)\n", + // compound.fields.name[i], + // type_to_str(compound.fields.typ[i]), + // compound.fields.offset[i], + // size_for_type(compound.fields.typ[i]) + // ); + // } + + return compound; +} + fn parse_function(lexer: Lexer*): Node* { let token: Token; @@ -889,28 +1030,53 @@ fn parse_function(lexer: Lexer*): Node* { lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // TODO: Check if identifier exists - let node = node_new(AST_FUNC); - node.d.func.name = token.value.as_string; + let func = node_new(AST_FUNC); + let dfunc = func; + func.d.func.name = token.value.as_string; - vector_push(p_all_functions, node); - p_current_function = node; + // If the identifier exists, there's 3 possible cases: + // 1. It's another variable / struct, which is an error. + // 2. It's a function that's been defined, which is an error. + // 3. It's a function that's been declared (but not defined), which is OK + if (identifier_exists(&token)) { + dfunc = find_function_definition(&token); + // Case 1 + if (dfunc == null) + die_loc(here, &token.loc, "Function name already exists as an identifier"); + // Case 2 + if (dfunc.d.func.is_defined) + die_loc(here, &token.loc, "Function already defined earlier"); + + // Case 3 (No error, just set the current function correctly) + p_current_function = func; + } else { + // We don't have a declaration yet, push this. + vector_push(p_all_functions, func); + p_current_function = func; + } lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); - parse_function_params(lexer, node); + parse_function_params(lexer, func); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); lexer_peek(lexer, &token); if (token.typ == TOKEN_COLON) { lexer_next(lexer, &token); - node.etyp = parse_type(lexer); + func.etyp = parse_type(lexer); } else { - node.etyp = type_new(TYPE_VOID); + func.etyp = type_new(TYPE_VOID); } - node.d.func.body = parse_block(lexer); + lexer_peek(lexer, &token); + if (token.typ == TOKEN_OPEN_BRACE) { + func.d.func.body = parse_block(lexer); + func.d.func.is_defined = true; + } else { + func.d.func.is_defined = false; + } p_current_function = null; - return node; + return func; } let p_opened_files = vector_new(); @@ -925,7 +1091,7 @@ fn parser_open_new_file(path: char*) { } for (let i = 0; i < p_opened_files.size; i = i + 1) { if (streq(p_opened_files.data[i], path)) { - puts("Already opened file: "); puts(path); putsln(": Ignoring."); + // Already opened this file, ignore return; } } @@ -958,6 +1124,10 @@ fn parse_program(lexer: Lexer*): Node* { let path = token.value.as_string; parser_open_new_file(path); lexer = vector_top(p_lexer_stack); + } else if (token.typ == TOKEN_STRUCT || token.typ == TOKEN_UNION) { + parse_struct_union_declaration(lexer, true); + } else if (token.typ == TOKEN_ENUM) { + parse_enum_declaration(lexer); } else { die_loc2(here, &token.loc, "unexpected token in parse_program: ", token_type_to_string(token.typ)); } diff --git a/compiler/types.cup b/compiler/types.cup index 10a2d6f..9b0d822 100644 --- a/compiler/types.cup +++ b/compiler/types.cup @@ -1,4 +1,5 @@ import "std/common.cup" +import "std/vector.cup" enum BaseType { TYPE_VOID, @@ -19,11 +20,7 @@ struct Type { struct_name: char*; size: int; array_size: int; - fields: struct { - names: char**; - types: Type**; - num_fields: int; - }; + fields: Vector*; // Vector<Variable*>; }; fn size_for_base_type(type: int): int { @@ -86,7 +83,8 @@ fn create_type_string(typ: Type *): char* { else if (typ.typ == TYPE_CHAR) strcat(buf, "char"); else if (typ.typ == TYPE_VOID) strcat(buf, "void"); else if (typ.typ == TYPE_ANY) strcat(buf, "any"); - else die("type_to_string: unknown type"); + else if (typ.typ == TYPE_STRUCT) strcat(buf, typ.struct_name); + else die("create_type_string: unknown type"); return buf; } |