aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMustafa Quraish <[email protected]>2022-02-05 19:05:40 -0500
committerMustafa Quraish <[email protected]>2022-02-05 19:05:40 -0500
commit8164626888793c0e706a8dd9f65a2bedb4110b55 (patch)
tree235a1381f3cebb5447f9baa807e470945ba04e81
parentAdd implementation of self-hosted compiler so far (diff)
downloadcup-8164626888793c0e706a8dd9f65a2bedb4110b55.tar.xz
cup-8164626888793c0e706a8dd9f65a2bedb4110b55.zip
[compiler.cup] Add support for lexically scoped local variables
-rw-r--r--compiler/codegen.cup37
-rw-r--r--compiler/lexer.cup5
-rw-r--r--compiler/parser.cup112
-rw-r--r--compiler/types.cup12
-rwxr-xr-xrun.sh28
-rw-r--r--std/common.cup16
-rw-r--r--std/vector.cup7
7 files changed, 177 insertions, 20 deletions
diff --git a/compiler/codegen.cup b/compiler/codegen.cup
index 41eea33..70a83c8 100644
--- a/compiler/codegen.cup
+++ b/compiler/codegen.cup
@@ -34,6 +34,16 @@ fn generate_syscall(num: int) {
emit_asm(" syscall\n");
}
+fn generate_lvalue_into_rax(node: Node*) {
+ if (node.typ == AST_LOCAL_VAR) {
+ let offset = node.d.variable.offset;
+ emit_asm(" mov rax, rbp\n");
+ emit_asm(" sub rax, "); emit_num(offset); emit_asm("\n");
+ } else {
+ die2("Unsupported type in generate_lvalue_into_rax: ", node_type_to_string(node.typ));
+ }
+}
+
fn generate_expr_into_rax(node: Node*) {
if (node.typ == AST_LITERAL) {
if (node.etyp.typ == TYPE_INT) {
@@ -76,15 +86,42 @@ fn generate_expr_into_rax(node: Node*) {
generate_expr_into_rax(node.d.binary.lhs);
emit_asm(" pop rbx\n");
emit_asm(" imul rbx\n");
+ } else if (is_lvalue(node.typ)) {
+ generate_lvalue_into_rax(node);
+ emit_asm(" mov rax, [rax]\n");
+
+ } else if (node.typ == AST_ASSIGN) {
+ putsln("...");
+ generate_lvalue_into_rax(node.d.assign.lhs);
+ emit_asm(" push rax\n");
+ generate_expr_into_rax(node.d.assign.rhs);
+ emit_asm(" pop rbx\n");
+ emit_asm(" mov [rbx], rax\n");
+
+ } else {
+ die2("Unsupported node type in generate_expr_into_rax: ", node_type_to_string(node.typ));
}
}
+fn generate_block(node: Node*);
+
fn generate_statement(node: Node*) {
if (node.typ == AST_RETURN) {
generate_expr_into_rax(node.d.unary);
emit_asm(" mov rsp, rbp\n");
emit_asm(" pop rbp\n");
emit_asm(" ret\n");
+ } else if (node.typ == AST_VARDECL) {
+ if (node.d.var_decl.init) {
+ generate_expr_into_rax(node.d.var_decl.init);
+ let offset = node.d.var_decl.var.offset;
+ emit_asm(" mov [rbp-"); emit_num(offset); emit_asm("], rax\n");
+ }
+ } else if (node.typ == AST_BLOCK) {
+ generate_block(node);
+ } else {
+ // Default to a simple expression statement
+ generate_expr_into_rax(node);
}
}
diff --git a/compiler/lexer.cup b/compiler/lexer.cup
index ff22d8f..c0062a2 100644
--- a/compiler/lexer.cup
+++ b/compiler/lexer.cup
@@ -85,12 +85,7 @@ fn lexer_make_token(lexer: Lexer*, token: Token*, typ: int, inc: int) {
fn lexer_next(lexer: Lexer*, token: Token*) {
while (lexer.pos < lexer.len) {
- putsln("101.1");
- print(lexer.pos);
- print(lexer.len);
let c = lexer.src[lexer.pos];
- putc(c);
- putc('\n');
if (c == '\n') { ++lexer.line; lexer.col = 0; ++lexer.pos; }
else if (is_space(c)) { lexer_advance(lexer, 1); }
diff --git a/compiler/parser.cup b/compiler/parser.cup
index 48e4514..a910865 100644
--- a/compiler/parser.cup
+++ b/compiler/parser.cup
@@ -4,10 +4,36 @@ import "compiler/lexer.cup"
// p_ prefix for parser global variables.
let p_all_functions = vector_new();
+let p_current_function: Node* = null;
let p_block_stack = vector_new();
let p_cur_stack_offset = 0;
+fn block_stack_push(block: Node*) {
+ vector_push(p_block_stack, block);
+}
+
+fn block_stack_pop() {
+ let block: Node* = vector_pop(p_block_stack);
+ p_cur_stack_offset = p_cur_stack_offset - block.d.block.locals_size;
+}
+
+fn find_local_variable(token: Token*): Variable* {
+ if (p_current_function == null) return null;
+
+ for (let i = p_block_stack.size - 1; i >= 0; --i) {
+ let block: Node* = p_block_stack.data[i];
+ for (let j = 0; j < block.d.block.locals.size; ++j) {
+ let var: Variable* = block.d.block.locals.data[j];
+ if (streq(token.value.as_string, var.name)) {
+ return var;
+ }
+ }
+ }
+
+ return null;
+}
+
fn parse_literal(lexer: Lexer*): Node* {
let token: Token;
lexer_next(lexer, &token);
@@ -60,7 +86,23 @@ fn parse_type(lexer: Lexer*): Type* {
return typ;
}
-// pragma region expressions
+fn parse_identifier(lexer: Lexer*): Node* {
+ let token: Token;
+ lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
+
+ let expr: Node*;
+ let var = find_local_variable(&token);
+ if (var != null) {
+ expr = node_new(AST_LOCAL_VAR);
+ expr.d.variable = var;
+ expr.etyp = var.typ;
+ return expr;
+ }
+
+ die_loc2(&token.loc, "Unknown identifier in parse_identifier: ", token.value.as_string);
+}
+
+
fn parse_expression(lexer: Lexer*): Node*;
fn parse_factor(lexer: Lexer*): Node* {
@@ -91,6 +133,9 @@ fn parse_factor(lexer: Lexer*): Node* {
} else if (is_literal_token(token.typ)) {
expr = parse_literal(lexer);
+ } else if (token.typ == TOKEN_IDENTIFIER) {
+ expr = parse_identifier(lexer);
+
} else {
die_loc2(&token.loc, ": Unexpected token found in parse_factor: ", token_type_to_string(token.typ));
}
@@ -274,7 +319,40 @@ fn parse_conditional_exp(lexer: Lexer*): Node* {
}
fn parse_expression(lexer: Lexer*): Node* {
- return parse_conditional_exp(lexer);
+ let lhs = parse_conditional_exp(lexer);
+ putsln(node_type_to_string(lhs.typ));
+ if (is_lvalue(lhs.typ)) {
+ let token: Token;
+ lexer_peek(lexer, &token);
+ if (token.typ == TOKEN_ASSIGN) {
+ lexer_next(lexer, &token);
+ let node = node_new(AST_ASSIGN);
+ node.d.assign.lhs = lhs;
+ node.d.assign.rhs = parse_expression(lexer);
+ lhs = node;
+ }
+ }
+ return lhs;
+}
+
+fn add_variable_to_current_block(var: Variable*) {
+ // Set offset for variable
+ let cur_block: Node* = vector_top(p_block_stack);
+ let var_size = align_up(size_for_type(var.typ), 8);
+
+ // Add to the block
+ // FIXME: Use a map here
+ vector_push(cur_block.d.block.locals, var);
+
+ assert(p_current_function != null);
+ // Update current stack offset (w.r.t function stack frame) and block size
+ p_cur_stack_offset = p_cur_stack_offset + var_size;
+ cur_block.d.block.locals_size = cur_block.d.block.locals_size + var_size;
+ var.offset = p_cur_stack_offset;
+
+ // Update function's max locals size
+ let max_offset = max(p_current_function.d.func.max_locals_size, p_cur_stack_offset);
+ p_current_function.d.func.max_locals_size = max_offset;
}
fn parse_var_declaration(lexer: Lexer*): Node* {
@@ -283,24 +361,25 @@ fn parse_var_declaration(lexer: Lexer*): Node* {
lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
// TODO: check if identifier is already defined
let node = node_new(AST_VARDECL);
- node.d.var_decl.var.name = token.value.as_string;
+ let decl = &node.d.var_decl;
+ decl.var.name = token.value.as_string;
lexer_peek(lexer, &token);
let has_type = false;
- if (token.typ == TOKEN_COLON) {
- lexer_next(lexer, &token);
- has_type = true;
- node.d.var_decl.var.typ = parse_type(lexer);
- lexer_peek(lexer, &token);
- }
-
+ // TODO: implicit types when type system is better
+ lexer_next_assert(lexer, &token, TOKEN_COLON);
+ decl.var.typ = parse_type(lexer);
+
+ lexer_peek(lexer, &token);
if (token.typ == TOKEN_ASSIGN) {
lexer_next(lexer, &token);
- node.d.var_decl.init = parse_expression(lexer);
+ decl.init = parse_expression(lexer);
} else if (!has_type) {
die_loc(&token.loc, "Expected ':' or '=' after variable declaration");
}
+ add_variable_to_current_block(&decl.var);
+
return node;
}
@@ -374,6 +453,9 @@ fn parse_block(lexer: Lexer*): Node* {
let block = node_new(AST_BLOCK);
block.d.block.children = vector_new();
+ block.d.block.locals = vector_new();
+
+ block_stack_push(block);
lexer_peek(lexer, &token);
while (token.typ != TOKEN_CLOSE_BRACE) {
@@ -381,6 +463,8 @@ fn parse_block(lexer: Lexer*): Node* {
lexer_peek(lexer, &token);
}
lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE);
+
+ block_stack_pop();
return block;
}
@@ -389,9 +473,13 @@ fn parse_function(lexer: Lexer*): Node* {
lexer_next_assert(lexer, &token, TOKEN_FN);
lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
+
// TODO: Check if identifier exists
let node = node_new(AST_FUNC);
node.d.func.name = token.value.as_string;
+
+ vector_push(p_all_functions, node);
+ p_current_function = node;
lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN);
parse_function_params(lexer, node);
@@ -406,6 +494,8 @@ fn parse_function(lexer: Lexer*): Node* {
}
node.d.func.body = parse_block(lexer);
+
+ p_current_function = null;
return node;
}
diff --git a/compiler/types.cup b/compiler/types.cup
index f3c7b38..c2afdec 100644
--- a/compiler/types.cup
+++ b/compiler/types.cup
@@ -34,6 +34,18 @@ fn size_for_base_type(type: int): int {
return 0;
}
+fn size_for_type(typ: Type*): int {
+ if (typ.typ == TYPE_INT) return 8;
+ if (typ.typ == TYPE_PTR) return 8;
+ if (typ.typ == TYPE_CHAR) return 1;
+ if (typ.typ == TYPE_ARRAY) return typ.array_size * size_for_type(typ.ptr);
+ if (typ.typ == TYPE_STRUCT) return typ.size;
+ if (typ.typ == TYPE_UNION) return typ.size;
+ if (typ.typ == TYPE_VOID) return 0;
+ if (typ.typ == TYPE_ANY) return 8;
+ die("Unknown type in size_for_type");
+}
+
let _type_int: Type* = null;
let _type_char: Type* = null;
let _type_void: Type* = null;
diff --git a/run.sh2 b/run.sh2
index 0e6ee64..9259dd4 100755
--- a/run.sh2
+++ b/run.sh2
@@ -1,15 +1,15 @@
#!/bin/bash
# This script does the following:
-# 1. Builds the project
-# 2. Compiles selected file
-# 3. Assembles executable from compiled asm
+# 1. Builds compiler written in C first
+# 2. Builds the compiler written in CUP with the C compiler
+# 3. Compiles the file specified through CLI with CUP compiler
# 4. Runs the executable
# 5. Echoes the output of the executable
if [ -z "$1" ]
then
- echo "Usage: $0 <arguments to cupcc>"
+ echo "Usage: $0 <arguments to build/cup.out>"
exit 1
fi
diff --git a/std/common.cup b/std/common.cup
index abd7e82..c2d008e 100644
--- a/std/common.cup
+++ b/std/common.cup
@@ -175,6 +175,18 @@ fn die(msg: char *) {
exit(1);
}
+fn die2(msg1: char *, msg2: char *) {
+ puts(msg1);
+ putsln(msg2);
+ exit(1);
+}
+
+// TODO: file/line numbers would be helpful
+fn assert(cond: int) {
+ if (!cond)
+ die("assertion failed");
+}
+
///////////////////////////////////////////////////////////////////////////////
// Math
@@ -201,6 +213,10 @@ fn factorial(n: int): int {
return res;
}
+fn align_up(val: int, algn: int): int {
+ return 8;
+}
+
///////////////////////////////////////////////////////////////////////////////
// Memory
diff --git a/std/vector.cup b/std/vector.cup
index eb76155..6b42fdc 100644
--- a/std/vector.cup
+++ b/std/vector.cup
@@ -37,4 +37,11 @@ fn vector_pop(vec: Vector*): void* {
vec.size = vec.size - 1;
return vec.data[vec.size];
+}
+
+fn vector_top(vec: Vector*): void* {
+ if (vec.size == 0)
+ die("Vector is empty, nothing to return.");
+
+ return vec.data[vec.size - 1];
} \ No newline at end of file