diff options
| author | Mustafa Quraish <[email protected]> | 2022-02-05 19:05:40 -0500 |
|---|---|---|
| committer | Mustafa Quraish <[email protected]> | 2022-02-05 19:05:40 -0500 |
| commit | 8164626888793c0e706a8dd9f65a2bedb4110b55 (patch) | |
| tree | 235a1381f3cebb5447f9baa807e470945ba04e81 | |
| parent | Add implementation of self-hosted compiler so far (diff) | |
| download | cup-8164626888793c0e706a8dd9f65a2bedb4110b55.tar.xz cup-8164626888793c0e706a8dd9f65a2bedb4110b55.zip | |
[compiler.cup] Add support for lexically scoped local variables
| -rw-r--r-- | compiler/codegen.cup | 37 | ||||
| -rw-r--r-- | compiler/lexer.cup | 5 | ||||
| -rw-r--r-- | compiler/parser.cup | 112 | ||||
| -rw-r--r-- | compiler/types.cup | 12 | ||||
| -rwxr-xr-x | run.sh2 | 8 | ||||
| -rw-r--r-- | std/common.cup | 16 | ||||
| -rw-r--r-- | std/vector.cup | 7 |
7 files changed, 177 insertions, 20 deletions
diff --git a/compiler/codegen.cup b/compiler/codegen.cup index 41eea33..70a83c8 100644 --- a/compiler/codegen.cup +++ b/compiler/codegen.cup @@ -34,6 +34,16 @@ fn generate_syscall(num: int) { emit_asm(" syscall\n"); } +fn generate_lvalue_into_rax(node: Node*) { + if (node.typ == AST_LOCAL_VAR) { + let offset = node.d.variable.offset; + emit_asm(" mov rax, rbp\n"); + emit_asm(" sub rax, "); emit_num(offset); emit_asm("\n"); + } else { + die2("Unsupported type in generate_lvalue_into_rax: ", node_type_to_string(node.typ)); + } +} + fn generate_expr_into_rax(node: Node*) { if (node.typ == AST_LITERAL) { if (node.etyp.typ == TYPE_INT) { @@ -76,15 +86,42 @@ fn generate_expr_into_rax(node: Node*) { generate_expr_into_rax(node.d.binary.lhs); emit_asm(" pop rbx\n"); emit_asm(" imul rbx\n"); + } else if (is_lvalue(node.typ)) { + generate_lvalue_into_rax(node); + emit_asm(" mov rax, [rax]\n"); + + } else if (node.typ == AST_ASSIGN) { + putsln("..."); + generate_lvalue_into_rax(node.d.assign.lhs); + emit_asm(" push rax\n"); + generate_expr_into_rax(node.d.assign.rhs); + emit_asm(" pop rbx\n"); + emit_asm(" mov [rbx], rax\n"); + + } else { + die2("Unsupported node type in generate_expr_into_rax: ", node_type_to_string(node.typ)); } } +fn generate_block(node: Node*); + fn generate_statement(node: Node*) { if (node.typ == AST_RETURN) { generate_expr_into_rax(node.d.unary); emit_asm(" mov rsp, rbp\n"); emit_asm(" pop rbp\n"); emit_asm(" ret\n"); + } else if (node.typ == AST_VARDECL) { + if (node.d.var_decl.init) { + generate_expr_into_rax(node.d.var_decl.init); + let offset = node.d.var_decl.var.offset; + emit_asm(" mov [rbp-"); emit_num(offset); emit_asm("], rax\n"); + } + } else if (node.typ == AST_BLOCK) { + generate_block(node); + } else { + // Default to a simple expression statement + generate_expr_into_rax(node); } } diff --git a/compiler/lexer.cup b/compiler/lexer.cup index ff22d8f..c0062a2 100644 --- a/compiler/lexer.cup +++ b/compiler/lexer.cup @@ -85,12 +85,7 @@ fn lexer_make_token(lexer: Lexer*, token: Token*, typ: int, inc: int) { fn lexer_next(lexer: Lexer*, token: Token*) { while (lexer.pos < lexer.len) { - putsln("101.1"); - print(lexer.pos); - print(lexer.len); let c = lexer.src[lexer.pos]; - putc(c); - putc('\n'); if (c == '\n') { ++lexer.line; lexer.col = 0; ++lexer.pos; } else if (is_space(c)) { lexer_advance(lexer, 1); } diff --git a/compiler/parser.cup b/compiler/parser.cup index 48e4514..a910865 100644 --- a/compiler/parser.cup +++ b/compiler/parser.cup @@ -4,10 +4,36 @@ import "compiler/lexer.cup" // p_ prefix for parser global variables. let p_all_functions = vector_new(); +let p_current_function: Node* = null; let p_block_stack = vector_new(); let p_cur_stack_offset = 0; +fn block_stack_push(block: Node*) { + vector_push(p_block_stack, block); +} + +fn block_stack_pop() { + let block: Node* = vector_pop(p_block_stack); + p_cur_stack_offset = p_cur_stack_offset - block.d.block.locals_size; +} + +fn find_local_variable(token: Token*): Variable* { + if (p_current_function == null) return null; + + for (let i = p_block_stack.size - 1; i >= 0; --i) { + let block: Node* = p_block_stack.data[i]; + for (let j = 0; j < block.d.block.locals.size; ++j) { + let var: Variable* = block.d.block.locals.data[j]; + if (streq(token.value.as_string, var.name)) { + return var; + } + } + } + + return null; +} + fn parse_literal(lexer: Lexer*): Node* { let token: Token; lexer_next(lexer, &token); @@ -60,7 +86,23 @@ fn parse_type(lexer: Lexer*): Type* { return typ; } -// pragma region expressions +fn parse_identifier(lexer: Lexer*): Node* { + let token: Token; + lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); + + let expr: Node*; + let var = find_local_variable(&token); + if (var != null) { + expr = node_new(AST_LOCAL_VAR); + expr.d.variable = var; + expr.etyp = var.typ; + return expr; + } + + die_loc2(&token.loc, "Unknown identifier in parse_identifier: ", token.value.as_string); +} + + fn parse_expression(lexer: Lexer*): Node*; fn parse_factor(lexer: Lexer*): Node* { @@ -91,6 +133,9 @@ fn parse_factor(lexer: Lexer*): Node* { } else if (is_literal_token(token.typ)) { expr = parse_literal(lexer); + } else if (token.typ == TOKEN_IDENTIFIER) { + expr = parse_identifier(lexer); + } else { die_loc2(&token.loc, ": Unexpected token found in parse_factor: ", token_type_to_string(token.typ)); } @@ -274,7 +319,40 @@ fn parse_conditional_exp(lexer: Lexer*): Node* { } fn parse_expression(lexer: Lexer*): Node* { - return parse_conditional_exp(lexer); + let lhs = parse_conditional_exp(lexer); + putsln(node_type_to_string(lhs.typ)); + if (is_lvalue(lhs.typ)) { + let token: Token; + lexer_peek(lexer, &token); + if (token.typ == TOKEN_ASSIGN) { + lexer_next(lexer, &token); + let node = node_new(AST_ASSIGN); + node.d.assign.lhs = lhs; + node.d.assign.rhs = parse_expression(lexer); + lhs = node; + } + } + return lhs; +} + +fn add_variable_to_current_block(var: Variable*) { + // Set offset for variable + let cur_block: Node* = vector_top(p_block_stack); + let var_size = align_up(size_for_type(var.typ), 8); + + // Add to the block + // FIXME: Use a map here + vector_push(cur_block.d.block.locals, var); + + assert(p_current_function != null); + // Update current stack offset (w.r.t function stack frame) and block size + p_cur_stack_offset = p_cur_stack_offset + var_size; + cur_block.d.block.locals_size = cur_block.d.block.locals_size + var_size; + var.offset = p_cur_stack_offset; + + // Update function's max locals size + let max_offset = max(p_current_function.d.func.max_locals_size, p_cur_stack_offset); + p_current_function.d.func.max_locals_size = max_offset; } fn parse_var_declaration(lexer: Lexer*): Node* { @@ -283,24 +361,25 @@ fn parse_var_declaration(lexer: Lexer*): Node* { lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // TODO: check if identifier is already defined let node = node_new(AST_VARDECL); - node.d.var_decl.var.name = token.value.as_string; + let decl = &node.d.var_decl; + decl.var.name = token.value.as_string; lexer_peek(lexer, &token); let has_type = false; - if (token.typ == TOKEN_COLON) { - lexer_next(lexer, &token); - has_type = true; - node.d.var_decl.var.typ = parse_type(lexer); - lexer_peek(lexer, &token); - } - + // TODO: implicit types when type system is better + lexer_next_assert(lexer, &token, TOKEN_COLON); + decl.var.typ = parse_type(lexer); + + lexer_peek(lexer, &token); if (token.typ == TOKEN_ASSIGN) { lexer_next(lexer, &token); - node.d.var_decl.init = parse_expression(lexer); + decl.init = parse_expression(lexer); } else if (!has_type) { die_loc(&token.loc, "Expected ':' or '=' after variable declaration"); } + add_variable_to_current_block(&decl.var); + return node; } @@ -374,6 +453,9 @@ fn parse_block(lexer: Lexer*): Node* { let block = node_new(AST_BLOCK); block.d.block.children = vector_new(); + block.d.block.locals = vector_new(); + + block_stack_push(block); lexer_peek(lexer, &token); while (token.typ != TOKEN_CLOSE_BRACE) { @@ -381,6 +463,8 @@ fn parse_block(lexer: Lexer*): Node* { lexer_peek(lexer, &token); } lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE); + + block_stack_pop(); return block; } @@ -389,9 +473,13 @@ fn parse_function(lexer: Lexer*): Node* { lexer_next_assert(lexer, &token, TOKEN_FN); lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); + // TODO: Check if identifier exists let node = node_new(AST_FUNC); node.d.func.name = token.value.as_string; + + vector_push(p_all_functions, node); + p_current_function = node; lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); parse_function_params(lexer, node); @@ -406,6 +494,8 @@ fn parse_function(lexer: Lexer*): Node* { } node.d.func.body = parse_block(lexer); + + p_current_function = null; return node; } diff --git a/compiler/types.cup b/compiler/types.cup index f3c7b38..c2afdec 100644 --- a/compiler/types.cup +++ b/compiler/types.cup @@ -34,6 +34,18 @@ fn size_for_base_type(type: int): int { return 0; } +fn size_for_type(typ: Type*): int { + if (typ.typ == TYPE_INT) return 8; + if (typ.typ == TYPE_PTR) return 8; + if (typ.typ == TYPE_CHAR) return 1; + if (typ.typ == TYPE_ARRAY) return typ.array_size * size_for_type(typ.ptr); + if (typ.typ == TYPE_STRUCT) return typ.size; + if (typ.typ == TYPE_UNION) return typ.size; + if (typ.typ == TYPE_VOID) return 0; + if (typ.typ == TYPE_ANY) return 8; + die("Unknown type in size_for_type"); +} + let _type_int: Type* = null; let _type_char: Type* = null; let _type_void: Type* = null; @@ -1,15 +1,15 @@ #!/bin/bash # This script does the following: -# 1. Builds the project -# 2. Compiles selected file -# 3. Assembles executable from compiled asm +# 1. Builds compiler written in C first +# 2. Builds the compiler written in CUP with the C compiler +# 3. Compiles the file specified through CLI with CUP compiler # 4. Runs the executable # 5. Echoes the output of the executable if [ -z "$1" ] then - echo "Usage: $0 <arguments to cupcc>" + echo "Usage: $0 <arguments to build/cup.out>" exit 1 fi diff --git a/std/common.cup b/std/common.cup index abd7e82..c2d008e 100644 --- a/std/common.cup +++ b/std/common.cup @@ -175,6 +175,18 @@ fn die(msg: char *) { exit(1); } +fn die2(msg1: char *, msg2: char *) { + puts(msg1); + putsln(msg2); + exit(1); +} + +// TODO: file/line numbers would be helpful +fn assert(cond: int) { + if (!cond) + die("assertion failed"); +} + /////////////////////////////////////////////////////////////////////////////// // Math @@ -201,6 +213,10 @@ fn factorial(n: int): int { return res; } +fn align_up(val: int, algn: int): int { + return 8; +} + /////////////////////////////////////////////////////////////////////////////// // Memory diff --git a/std/vector.cup b/std/vector.cup index eb76155..6b42fdc 100644 --- a/std/vector.cup +++ b/std/vector.cup @@ -37,4 +37,11 @@ fn vector_pop(vec: Vector*): void* { vec.size = vec.size - 1; return vec.data[vec.size]; +} + +fn vector_top(vec: Vector*): void* { + if (vec.size == 0) + die("Vector is empty, nothing to return."); + + return vec.data[vec.size - 1]; }
\ No newline at end of file |