aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMustafa Quraish <[email protected]>2022-01-29 17:37:00 -0500
committerMustafa Quraish <[email protected]>2022-01-29 17:37:00 -0500
commita9683512b29abd53814bf834e23f752a8999a679 (patch)
tree033d76dbf7b1249354d2967b9de53fd6af20b7cb
parentAdd i64{max,min} helper functions (diff)
downloadcup-a9683512b29abd53814bf834e23f752a8999a679.tar.xz
cup-a9683512b29abd53814bf834e23f752a8999a679.zip
Implement blocks (lexically scoped) and conditionals
-rw-r--r--cup/ast.c8
-rw-r--r--cup/ast.h19
-rw-r--r--cup/generator.c47
-rw-r--r--cup/lexer.c2
-rw-r--r--cup/parser.c156
-rwxr-xr-xtest.sh5
-rw-r--r--tests/common.sh20
-rwxr-xr-xtests/compound.sh161
-rwxr-xr-xtests/conditions.sh65
-rwxr-xr-xtests/variables.sh43
10 files changed, 487 insertions, 39 deletions
diff --git a/cup/ast.c b/cup/ast.c
index 473af90..76ace8a 100644
--- a/cup/ast.c
+++ b/cup/ast.c
@@ -135,14 +135,6 @@ void dump_func(Node *node, int depth)
printf(" -> ");
print_type_to_file(stdout, node->func.return_type);
}
- if (node->func.num_locals > 0) {
- printf("\n locals: \n");
- for (int i = 0; i < node->func.num_locals; i++) {
- printf(" - `%s`, offset: %lld (", node->func.locals[i]->name, node->func.locals[i]->offset);
- print_type_to_file(stdout, node->func.locals[i]->type);
- printf(")\n");
- }
- }
do_print_ast(node->func.body, depth + 1);
}
diff --git a/cup/ast.h b/cup/ast.h
index efbaf84..46557da 100644
--- a/cup/ast.h
+++ b/cup/ast.h
@@ -24,6 +24,8 @@
F(OP_GEQ, ">=") \
F(OP_ASSIGN, "=") \
F(AST_LITERAL, "literal") \
+ F(AST_CONDITIONAL, "conditional expression") \
+ F(AST_IF, "if statement") \
F(AST_VARDECL, "variable decl") \
F(AST_VAR, "variable") \
F(AST_RETURN, "return") \
@@ -82,10 +84,9 @@ typedef struct ast_node {
Type return_type;
Node *body;
- Variable **locals;
- int num_locals;
-
- int cur_stack_offset;
+ // TODO: Should we just dynamically allocate space on the
+ // stack for each block instead of storing this?
+ i64 max_locals_size;
// TODO: Arguments / etc?
} func;
@@ -93,6 +94,10 @@ typedef struct ast_node {
struct {
Node **children;
int num_children;
+
+ Variable **locals;
+ int num_locals;
+ i64 locals_size;
} block;
struct {
@@ -112,6 +117,12 @@ typedef struct ast_node {
Node *value;
} assign;
+ struct {
+ Node *cond;
+ Node *do_then;
+ Node *do_else;
+ } conditional;
+
Variable *variable;
};
} Node;
diff --git a/cup/generator.c b/cup/generator.c
index 7403fac..9e93164 100644
--- a/cup/generator.c
+++ b/cup/generator.c
@@ -167,12 +167,28 @@ void generate_expr_into_rax(Node *expr, FILE *out)
fprintf(out, ".and_end_%d:\n", label_counter);
label_counter++;
+ } else if (expr->type == AST_CONDITIONAL) {
+ generate_expr_into_rax(expr->conditional.cond, out);
+ // If left is false, we can short-circuit
+ fprintf(out, " cmp rax, 0\n");
+ fprintf(out, " je .cond_else_%d\n", label_counter);
+ generate_expr_into_rax(expr->conditional.do_then, out);
+ fprintf(out, " jmp .cond_end_%d\n", label_counter);
+ fprintf(out, ".cond_else_%d:\n", label_counter);
+ generate_expr_into_rax(expr->binary.right, out);
+ // Booleanize the result
+ generate_expr_into_rax(expr->conditional.do_else, out);
+ fprintf(out, ".cond_end_%d:\n", label_counter);
+ label_counter++;
+
} else {
fprintf(stderr, "Unsupported expression type in generate_expr: `%s`\n", node_type_to_str(expr->type));
exit(1);
}
}
+void generate_block(Node *block, FILE *out);
+
void generate_statement(Node *stmt, FILE *out)
{
if (stmt->type == AST_RETURN) {
@@ -186,7 +202,35 @@ void generate_statement(Node *stmt, FILE *out)
generate_expr_into_rax(stmt->var_decl.value, out);
i64 offset = stmt->var_decl.var.offset;
fprintf(out, " mov [rbp-%lld], rax\n", offset);
+ } else {
+ // Initialize to 0
+ i64 offset = stmt->var_decl.var.offset;
+ // TODO: Use correct size for the type
+ fprintf(out, " mov qword [rbp-%lld], 0\n", offset);
}
+ } else if (stmt->type == AST_IF) {
+ assert(stmt->conditional.cond);
+ assert(stmt->conditional.do_then);
+ generate_expr_into_rax(stmt->conditional.cond, out);
+
+ // If we don't have an `else` clause, we can simplify
+ if (!stmt->conditional.do_else) {
+ fprintf(out, " cmp rax, 0\n");
+ fprintf(out, " je .if_end_%d\n", label_counter);
+ generate_statement(stmt->conditional.do_then, out);
+ fprintf(out, ".if_end_%d:\n", label_counter);
+ } else {
+ fprintf(out, " cmp rax, 0\n");
+ fprintf(out, " je .if_else_%d\n", label_counter);
+ generate_statement(stmt->conditional.do_then, out);
+ fprintf(out, " jmp .if_end_%d\n", label_counter);
+ fprintf(out, ".if_else_%d:\n", label_counter);
+ generate_statement(stmt->conditional.do_else, out);
+ fprintf(out, ".if_end_%d:\n", label_counter);
+ }
+ label_counter++;
+ } else if (stmt->type == AST_BLOCK) {
+ generate_block(stmt, out);
} else {
// Once again, default to an expression here...
generate_expr_into_rax(stmt, out);
@@ -208,7 +252,8 @@ void generate_function_header(Node *func, FILE *out)
// TODO: Only do this if we have local variables
fprintf(out, " push rbp\n");
fprintf(out, " mov rbp, rsp\n");
- fprintf(out, " sub rsp, %d\n", func->func.cur_stack_offset);
+ // FIXME: Also account for arguments
+ fprintf(out, " sub rsp, %lld\n", func->func.max_locals_size);
}
void generate_function(Node *func, FILE *out)
diff --git a/cup/lexer.c b/cup/lexer.c
index 88df9e0..32138cc 100644
--- a/cup/lexer.c
+++ b/cup/lexer.c
@@ -93,6 +93,8 @@ Token Lexer_next(Lexer *lexer)
case ';': return Lexer_make_token(lexer, TOKEN_SEMICOLON, 1);
case ':': return Lexer_make_token(lexer, TOKEN_COLON, 1);
case '~': return Lexer_make_token(lexer, TOKEN_TILDE, 1);
+ case '?': return Lexer_make_token(lexer, TOKEN_QUESTION, 1);
+ case ',': return Lexer_make_token(lexer, TOKEN_COMMA, 1);
case '&': {
if (peek(lexer, 1) == '&')
diff --git a/cup/parser.c b/cup/parser.c
index a47116b..b3113ea 100644
--- a/cup/parser.c
+++ b/cup/parser.c
@@ -6,6 +6,12 @@
static Node *current_function = NULL;
+#define BLOCK_STACK_SIZE 64
+static Node *block_stack[BLOCK_STACK_SIZE];
+static i64 block_stack_count = 0;
+static i64 cur_stack_offset = 0;
+
+
Token do_assert_token(Token token, TokenType type, char *filename, int line)
{
if (token.type != type) {
@@ -48,17 +54,76 @@ NodeType binary_token_to_op(TokenType type)
}
}
+void block_stack_push(Node *block)
+{
+ assert(block_stack_count < BLOCK_STACK_SIZE);
+ assert(current_function);
+ block_stack[block_stack_count++] = block;
+}
+
+void block_stack_pop()
+{
+ assert(block_stack_count > 0);
+ assert(current_function);
+ Node *block = block_stack[--block_stack_count];
+ cur_stack_offset -= block->block.locals_size;
+ assert(cur_stack_offset >= 0);
+}
+
+void dump_block_stack()
+{
+ for (i64 i = 0; i < block_stack_count; i++) {
+ Node *block = block_stack[i];
+ for (int i = 0; i < block->block.num_locals; i++) {
+ printf("%s: offset: %lld\n", block->block.locals[i]->name, block->block.locals[i]->offset);
+ }
+ printf("\n");
+ }
+}
+
Variable *find_local_variable(Token *token)
{
assert_token(*token, TOKEN_IDENTIFIER);
- for (int i = 0; i < current_function->func.num_locals; i++) {
- if (strcmp(current_function->func.locals[i]->name, token->value.as_string) == 0) {
- return current_function->func.locals[i];
+ for (i64 i = block_stack_count - 1; i >= 0; --i) {
+ Node *block = block_stack[i];
+ for (int i = 0; i < block->block.num_locals; i++) {
+ if (strcmp(block->block.locals[i]->name, token->value.as_string) == 0) {
+ return block->block.locals[i];
+ }
}
}
return NULL;
}
+// TODO: rename this, it's ugly
+void add_variable_to_current_block(Variable *var)
+{
+ // Set offset for variable
+ Node *cur_block = block_stack[block_stack_count - 1];
+ var->offset = cur_stack_offset;
+
+ int new_len = (cur_block->block.num_locals + 1);
+ int var_size = 8; // TODO: Compute sizes based on different types
+
+ // Add to the block
+ // FIXME: Use a map here
+ cur_block->block.locals = realloc(cur_block->block.locals, sizeof(Variable *) * new_len);
+ cur_block->block.locals[cur_block->block.num_locals] = var;
+ cur_block->block.num_locals++;
+
+ assert(current_function);
+ // Update current stack offset (w.r.t function stack frame) and block size
+ cur_stack_offset += var_size;
+ block_stack[block_stack_count-1]->block.locals_size += var_size;
+
+ // Update function's max locals size
+ i64 max_offset = i64max(current_function->func.max_locals_size, cur_stack_offset);
+ current_function->func.max_locals_size = max_offset;
+
+ assert(cur_stack_offset >= 0);
+ assert(block_stack_count > 0);
+}
+
void Node_add_child(Node *parent, Node *child)
{
// TODO, use a vector
@@ -112,23 +177,20 @@ Node *parse_var_declaration(Lexer *lexer)
die_location(token.loc, "Variable declaration outside of function");
token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
+ // NOTE: We don't allow shadowing of variables in the any blocks,
+ // this is by design since it's a common mistake.
if (find_local_variable(&token) != NULL)
die_location(token.loc, "Variable `%s` already declared", token.value.as_string);
Node *node = Node_new(AST_VARDECL);
node->var_decl.var.name = token.value.as_string;
- assert_token(Lexer_next(lexer), TOKEN_COLON);
- node->var_decl.var.type = parse_type(lexer);
- // Set offset for variable
- node->var_decl.var.offset = current_function->func.cur_stack_offset;
+ token = Lexer_next(lexer);
+ if (token.type != TOKEN_COLON)
+ die_location(token.loc, "Missing type specifier for variable `%s`", node->var_decl.var.name);
+ node->var_decl.var.type = parse_type(lexer);
- int new_len = (current_function->func.num_locals + 1);
- int var_size = 8; // TODO: Compute sizes based on different types
- current_function->func.locals = realloc(current_function->func.locals, sizeof(Variable *) * new_len);
- current_function->func.locals[current_function->func.num_locals] = &node->var_decl.var;
- current_function->func.num_locals++;
- current_function->func.cur_stack_offset += var_size;
+ add_variable_to_current_block(&node->var_decl.var);
token = Lexer_next(lexer);
if (token.type == TOKEN_ASSIGN) {
@@ -210,9 +272,27 @@ Node *parse_logical_and(Lexer *lexer) { BINOP_PARSER(parse_equality, is_logical_
bool is_logical_or_token(TokenType type) { return type == TOKEN_OR; }
Node *parse_logical_or(Lexer *lexer) { BINOP_PARSER(parse_logical_and, is_logical_or_token); }
+Node *parse_conditional_exp(Lexer *lexer)
+{
+ Node *expr = parse_logical_or(lexer);
+ Token token = Lexer_peek(lexer);
+ if (token.type == TOKEN_QUESTION) {
+ Lexer_next(lexer);
+ Node *then_expr = parse_expression(lexer);
+ assert_token(Lexer_next(lexer), TOKEN_COLON);
+ Node *else_expr = parse_expression(lexer);
+ Node *conditional = Node_new(AST_CONDITIONAL);
+ conditional->conditional.cond = expr;
+ conditional->conditional.do_then = then_expr;
+ conditional->conditional.do_else = else_expr;
+ expr = conditional;
+ }
+ return expr;
+}
+
Node *parse_expression(Lexer *lexer)
{
- Node *node = parse_logical_or(lexer);
+ Node *node = parse_conditional_exp(lexer);
// FIXME: This is a hack to handle assignment expressions
// and can probably be done properly.
if (node->type == AST_VAR) {
@@ -228,6 +308,8 @@ Node *parse_expression(Lexer *lexer)
return node;
}
+Node *parse_block(Lexer *lexer);
+
Node *parse_statement(Lexer *lexer)
{
Node *node;
@@ -238,8 +320,22 @@ Node *parse_statement(Lexer *lexer)
node = Node_new(AST_RETURN);
node->unary_expr = parse_expression(lexer);
assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
- } else if (token.type == TOKEN_LET) {
- return parse_var_declaration(lexer);
+ } else if (token.type == TOKEN_IF) {
+ Lexer_next(lexer);
+ node = Node_new(AST_IF);
+ assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
+ node->conditional.cond = parse_expression(lexer);
+ assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
+ // TODO: Allow blocks in here once implemented, currently
+ // we can onle have a single statement in the if/else
+ node->conditional.do_then = parse_statement(lexer);
+ token = Lexer_peek(lexer);
+ if (token.type == TOKEN_ELSE) {
+ Lexer_next(lexer);
+ node->conditional.do_else = parse_statement(lexer);
+ }
+ } else if (token.type == TOKEN_OPEN_BRACE) {
+ node = parse_block(lexer);
} else {
// Default to trying to handle it as an expression
node = parse_expression(lexer);
@@ -251,14 +347,28 @@ Node *parse_statement(Lexer *lexer)
Node *parse_block(Lexer *lexer)
{
+ assert_token(Lexer_next(lexer), TOKEN_OPEN_BRACE);
+
Node *block = Node_new(AST_BLOCK);
- block->block.num_children = 0;
- Token token;
+ Token token = Lexer_peek(lexer);
+
+ block_stack_push(block);
- while ((token = Lexer_peek(lexer)).type != TOKEN_CLOSE_BRACE) {
- Node_add_child(block, parse_statement(lexer));
+ while (token.type != TOKEN_CLOSE_BRACE) {
+ Node *block_item;
+ if (token.type == TOKEN_LET) {
+ block_item = parse_var_declaration(lexer);
+ } else {
+ // Default to a statement
+ block_item = parse_statement(lexer);
+ }
+ Node_add_child(block, block_item);
+ token = Lexer_peek(lexer);
}
+ block_stack_pop();
+
+ assert_token(Lexer_next(lexer), TOKEN_CLOSE_BRACE);
return block;
}
@@ -286,9 +396,11 @@ Node *parse_func(Lexer *lexer)
// No return type, void fn.
func->func.return_type = (Type){.type = TYPE_NONE};
}
- assert_token(Lexer_next(lexer), TOKEN_OPEN_BRACE);
+
+ // Make sure there's no funny business with the stack offet
+ assert(cur_stack_offset == 0);
func->func.body = parse_block(lexer);
- assert_token(Lexer_next(lexer), TOKEN_CLOSE_BRACE);
+ assert(cur_stack_offset == 0);
return func;
}
diff --git a/test.sh b/test.sh
index d665a3d..091a2bb 100755
--- a/test.sh
+++ b/test.sh
@@ -2,10 +2,11 @@
set -e
./compile.sh
-set +e
for i in `ls tests/*.sh | grep -v common.sh`
do
echo "Running $i"
bash $i
-done \ No newline at end of file
+done
+
+set +e \ No newline at end of file
diff --git a/tests/common.sh b/tests/common.sh
index dd64ade..80e0bea 100644
--- a/tests/common.sh
+++ b/tests/common.sh
@@ -13,6 +13,7 @@ function assert_exit_status() {
set +e
./a.out
res=$?
+ set -e
if [ $res -ne $2 ]
then
echo ""
@@ -22,11 +23,28 @@ function assert_exit_status() {
echo "$1"
exit 1
fi
- set -e
echo -n "."
}
function assert_exit_status_stdin() {
code=$(</dev/stdin)
assert_exit_status "$code" $1
+}
+
+function assert_compile_failure_stdin() {
+ code=$(</dev/stdin)
+ set +e
+ ./cupcc -c "$code" >/dev/null 2>&1
+ res=$?
+ set -e
+ if [ $res -eq 0 ]
+ then
+ echo ""
+ echo "----------------------------------------------"
+ echo "Test failed: expected compilation, got success"
+ echo "----------------------------------------------"
+ echo "$code"
+ exit 1
+ fi
+ echo -n "."
} \ No newline at end of file
diff --git a/tests/compound.sh b/tests/compound.sh
new file mode 100755
index 0000000..cb49fa0
--- /dev/null
+++ b/tests/compound.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+
+# Test compound scopes
+
+. tests/common.sh
+
+set -e
+
+echo -n "- Nested Blocks: "
+assert_exit_status_stdin 3 <<EOF
+fn main() {
+ let x: int = 1;
+ {
+ let y: int = 3;
+ x = y;
+ }
+ return x;
+}
+EOF
+
+assert_exit_status_stdin 15 <<EOF
+fn main(): int {
+ let x: int;
+ {
+ let y: int = 10;
+ {
+ let z: int = 5;
+ x = y + z;
+ }
+ }
+ return x;
+}
+EOF
+
+assert_exit_status_stdin 8 <<EOF
+fn main(): int {
+ let x: int = 0;
+ {
+ let y: int = 3;
+ x = x + y;
+ }
+ {
+ let y: int = 5;
+ x = x + y;
+ }
+ return x;
+}
+EOF
+
+assert_compile_failure_stdin <<EOF
+fn main(): int {
+ let x: int;
+ {
+ let y: int = 10;
+ {
+ let z: int = 5;
+ x = y + z;
+ }
+ y = z;
+ }
+ return x;
+}
+EOF
+
+assert_compile_failure_stdin <<EOF
+fn main(): int {
+ let x: int;
+ {
+ let y: int = 10;
+ {
+ let z: int = 5;
+ x = y + z;
+ }
+ }
+ x = z;
+ return x;
+}
+EOF
+
+assert_compile_failure_stdin <<EOF
+fn main(): int {
+ let x: int;
+ {
+ let y: int = 10;
+ {
+ let z: int = 5;
+ x = y + z;
+ }
+ }
+ x = y;
+ return x;
+}
+EOF
+
+# We do not allow shadowing variables on purpose
+assert_compile_failure_stdin <<EOF
+fn main(): int {
+ let x: int;
+ {
+ let y: int = 10;
+ {
+ let z: int = 5;
+ x = y + z;
+ let x: int = 10;
+ }
+ }
+ return x;
+}
+EOF
+echo " OK"
+
+echo -n "- Conditionals w/ blocks: "
+assert_exit_status_stdin 3 <<EOF
+fn main() {
+ let x: int = 1;
+ if (x == 1) {
+ let y: int = 3;
+ x = y;
+ }
+ return x;
+}
+EOF
+
+assert_exit_status_stdin 1 <<EOF
+fn main() {
+ let x: int = 1;
+ if (x != 1) {
+ let y: int = 3;
+ x = y;
+ }
+ return x;
+}
+EOF
+
+assert_exit_status_stdin 5 <<EOF
+fn main() {
+ let x: int = 1;
+ if (x != 1) {
+ let y: int = 3;
+ x = y;
+ } else {
+ let y: int = 5;
+ x = y;
+ }
+ return x;
+}
+EOF
+
+assert_compile_failure_stdin <<EOF
+fn main() {
+ let x: int = 1;
+ if (x != 1) {
+ let y: int = 3;
+ x = y;
+ }
+ x = y; // Invalid
+ return x;
+}
+EOF
+
+echo " OK" \ No newline at end of file
diff --git a/tests/conditions.sh b/tests/conditions.sh
new file mode 100755
index 0000000..be80d96
--- /dev/null
+++ b/tests/conditions.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. tests/common.sh
+
+set -e
+
+echo -n "- Conditionals: "
+assert_exit_status 'fn main() { return 1 ? 5 : 10; }' 5
+assert_exit_status 'fn main() { return 0 ? 5 : 10; }' 10
+assert_exit_status 'fn main() { return 1 < 2 ? 10 : 20; }' 10
+
+assert_exit_status_stdin 5 <<EOF
+fn main() {
+ let flag: int = 1;
+ let a: int;
+ flag ? a = 5 : a = 10;
+ return a;
+}
+EOF
+
+assert_exit_status_stdin 10 <<EOF
+fn main() {
+ let flag: int = 0;
+ let a: int;
+ flag ? a = 5 : a = 10;
+ return a;
+}
+EOF
+echo " OK"
+
+echo -n "- If statement: "
+assert_exit_status_stdin 10 <<EOF
+fn main() {
+ if (5 < 20) return 10;
+ return 3;
+}
+EOF
+
+assert_exit_status_stdin 3 <<EOF
+fn main() {
+ if (5 > 20) return 10;
+ return 3;
+}
+EOF
+
+assert_exit_status_stdin 20 <<EOF
+fn main() {
+ let x: int;
+ if (0)
+ x = 3;
+ else
+ x = 20;
+ return x;
+}
+EOF
+
+assert_exit_status_stdin 3 <<EOF
+fn main() {
+ let x: int;
+ if (1)
+ x = 3;
+ return x;
+}
+EOF
+echo " OK" \ No newline at end of file
diff --git a/tests/variables.sh b/tests/variables.sh
index 0c88784..7b44fe7 100755
--- a/tests/variables.sh
+++ b/tests/variables.sh
@@ -29,7 +29,6 @@ EOF
echo " OK"
echo -n "- Multiple variable: "
-
assert_exit_status_stdin 2 <<EOF
fn main() {
let x: int = 1;
@@ -59,4 +58,46 @@ fn main() {
}
EOF
+assert_exit_status_stdin 18 <<EOF
+fn main() {
+ let x: int = 5;
+ let y: int;
+ let z: int = (y = x + 3) + 2;
+ return z + y;
+}
+EOF
+echo " OK"
+
+echo -n "- Short-circuiting assignments: "
+assert_exit_status_stdin 5 <<EOF
+fn main() {
+ let x: int = 5;
+ let y: int = (1 || (x = 10));
+ return x;
+}
+EOF
+
+assert_exit_status_stdin 10 <<EOF
+fn main() {
+ let x: int = 5;
+ let y: int = (0 || (x = 10));
+ return x;
+}
+EOF
+
+assert_exit_status_stdin 5 <<EOF
+fn main() {
+ let x: int = 5;
+ let y: int = (0 && (x = 10));
+ return x;
+}
+EOF
+
+assert_exit_status_stdin 10 <<EOF
+fn main() {
+ let x: int = 5;
+ let y: int = (1 && (x = 10));
+ return x;
+}
+EOF
echo " OK"