diff options
| author | Mustafa Quraish <[email protected]> | 2022-01-29 17:37:00 -0500 |
|---|---|---|
| committer | Mustafa Quraish <[email protected]> | 2022-01-29 17:37:00 -0500 |
| commit | a9683512b29abd53814bf834e23f752a8999a679 (patch) | |
| tree | 033d76dbf7b1249354d2967b9de53fd6af20b7cb | |
| parent | Add i64{max,min} helper functions (diff) | |
| download | cup-a9683512b29abd53814bf834e23f752a8999a679.tar.xz cup-a9683512b29abd53814bf834e23f752a8999a679.zip | |
Implement blocks (lexically scoped) and conditionals
| -rw-r--r-- | cup/ast.c | 8 | ||||
| -rw-r--r-- | cup/ast.h | 19 | ||||
| -rw-r--r-- | cup/generator.c | 47 | ||||
| -rw-r--r-- | cup/lexer.c | 2 | ||||
| -rw-r--r-- | cup/parser.c | 156 | ||||
| -rwxr-xr-x | test.sh | 5 | ||||
| -rw-r--r-- | tests/common.sh | 20 | ||||
| -rwxr-xr-x | tests/compound.sh | 161 | ||||
| -rwxr-xr-x | tests/conditions.sh | 65 | ||||
| -rwxr-xr-x | tests/variables.sh | 43 |
10 files changed, 487 insertions, 39 deletions
@@ -135,14 +135,6 @@ void dump_func(Node *node, int depth) printf(" -> "); print_type_to_file(stdout, node->func.return_type); } - if (node->func.num_locals > 0) { - printf("\n locals: \n"); - for (int i = 0; i < node->func.num_locals; i++) { - printf(" - `%s`, offset: %lld (", node->func.locals[i]->name, node->func.locals[i]->offset); - print_type_to_file(stdout, node->func.locals[i]->type); - printf(")\n"); - } - } do_print_ast(node->func.body, depth + 1); } @@ -24,6 +24,8 @@ F(OP_GEQ, ">=") \ F(OP_ASSIGN, "=") \ F(AST_LITERAL, "literal") \ + F(AST_CONDITIONAL, "conditional expression") \ + F(AST_IF, "if statement") \ F(AST_VARDECL, "variable decl") \ F(AST_VAR, "variable") \ F(AST_RETURN, "return") \ @@ -82,10 +84,9 @@ typedef struct ast_node { Type return_type; Node *body; - Variable **locals; - int num_locals; - - int cur_stack_offset; + // TODO: Should we just dynamically allocate space on the + // stack for each block instead of storing this? + i64 max_locals_size; // TODO: Arguments / etc? } func; @@ -93,6 +94,10 @@ typedef struct ast_node { struct { Node **children; int num_children; + + Variable **locals; + int num_locals; + i64 locals_size; } block; struct { @@ -112,6 +117,12 @@ typedef struct ast_node { Node *value; } assign; + struct { + Node *cond; + Node *do_then; + Node *do_else; + } conditional; + Variable *variable; }; } Node; diff --git a/cup/generator.c b/cup/generator.c index 7403fac..9e93164 100644 --- a/cup/generator.c +++ b/cup/generator.c @@ -167,12 +167,28 @@ void generate_expr_into_rax(Node *expr, FILE *out) fprintf(out, ".and_end_%d:\n", label_counter); label_counter++; + } else if (expr->type == AST_CONDITIONAL) { + generate_expr_into_rax(expr->conditional.cond, out); + // If left is false, we can short-circuit + fprintf(out, " cmp rax, 0\n"); + fprintf(out, " je .cond_else_%d\n", label_counter); + generate_expr_into_rax(expr->conditional.do_then, out); + fprintf(out, " jmp .cond_end_%d\n", label_counter); + fprintf(out, ".cond_else_%d:\n", label_counter); + generate_expr_into_rax(expr->binary.right, out); + // Booleanize the result + generate_expr_into_rax(expr->conditional.do_else, out); + fprintf(out, ".cond_end_%d:\n", label_counter); + label_counter++; + } else { fprintf(stderr, "Unsupported expression type in generate_expr: `%s`\n", node_type_to_str(expr->type)); exit(1); } } +void generate_block(Node *block, FILE *out); + void generate_statement(Node *stmt, FILE *out) { if (stmt->type == AST_RETURN) { @@ -186,7 +202,35 @@ void generate_statement(Node *stmt, FILE *out) generate_expr_into_rax(stmt->var_decl.value, out); i64 offset = stmt->var_decl.var.offset; fprintf(out, " mov [rbp-%lld], rax\n", offset); + } else { + // Initialize to 0 + i64 offset = stmt->var_decl.var.offset; + // TODO: Use correct size for the type + fprintf(out, " mov qword [rbp-%lld], 0\n", offset); } + } else if (stmt->type == AST_IF) { + assert(stmt->conditional.cond); + assert(stmt->conditional.do_then); + generate_expr_into_rax(stmt->conditional.cond, out); + + // If we don't have an `else` clause, we can simplify + if (!stmt->conditional.do_else) { + fprintf(out, " cmp rax, 0\n"); + fprintf(out, " je .if_end_%d\n", label_counter); + generate_statement(stmt->conditional.do_then, out); + fprintf(out, ".if_end_%d:\n", label_counter); + } else { + fprintf(out, " cmp rax, 0\n"); + fprintf(out, " je .if_else_%d\n", label_counter); + generate_statement(stmt->conditional.do_then, out); + fprintf(out, " jmp .if_end_%d\n", label_counter); + fprintf(out, ".if_else_%d:\n", label_counter); + generate_statement(stmt->conditional.do_else, out); + fprintf(out, ".if_end_%d:\n", label_counter); + } + label_counter++; + } else if (stmt->type == AST_BLOCK) { + generate_block(stmt, out); } else { // Once again, default to an expression here... generate_expr_into_rax(stmt, out); @@ -208,7 +252,8 @@ void generate_function_header(Node *func, FILE *out) // TODO: Only do this if we have local variables fprintf(out, " push rbp\n"); fprintf(out, " mov rbp, rsp\n"); - fprintf(out, " sub rsp, %d\n", func->func.cur_stack_offset); + // FIXME: Also account for arguments + fprintf(out, " sub rsp, %lld\n", func->func.max_locals_size); } void generate_function(Node *func, FILE *out) diff --git a/cup/lexer.c b/cup/lexer.c index 88df9e0..32138cc 100644 --- a/cup/lexer.c +++ b/cup/lexer.c @@ -93,6 +93,8 @@ Token Lexer_next(Lexer *lexer) case ';': return Lexer_make_token(lexer, TOKEN_SEMICOLON, 1); case ':': return Lexer_make_token(lexer, TOKEN_COLON, 1); case '~': return Lexer_make_token(lexer, TOKEN_TILDE, 1); + case '?': return Lexer_make_token(lexer, TOKEN_QUESTION, 1); + case ',': return Lexer_make_token(lexer, TOKEN_COMMA, 1); case '&': { if (peek(lexer, 1) == '&') diff --git a/cup/parser.c b/cup/parser.c index a47116b..b3113ea 100644 --- a/cup/parser.c +++ b/cup/parser.c @@ -6,6 +6,12 @@ static Node *current_function = NULL; +#define BLOCK_STACK_SIZE 64 +static Node *block_stack[BLOCK_STACK_SIZE]; +static i64 block_stack_count = 0; +static i64 cur_stack_offset = 0; + + Token do_assert_token(Token token, TokenType type, char *filename, int line) { if (token.type != type) { @@ -48,17 +54,76 @@ NodeType binary_token_to_op(TokenType type) } } +void block_stack_push(Node *block) +{ + assert(block_stack_count < BLOCK_STACK_SIZE); + assert(current_function); + block_stack[block_stack_count++] = block; +} + +void block_stack_pop() +{ + assert(block_stack_count > 0); + assert(current_function); + Node *block = block_stack[--block_stack_count]; + cur_stack_offset -= block->block.locals_size; + assert(cur_stack_offset >= 0); +} + +void dump_block_stack() +{ + for (i64 i = 0; i < block_stack_count; i++) { + Node *block = block_stack[i]; + for (int i = 0; i < block->block.num_locals; i++) { + printf("%s: offset: %lld\n", block->block.locals[i]->name, block->block.locals[i]->offset); + } + printf("\n"); + } +} + Variable *find_local_variable(Token *token) { assert_token(*token, TOKEN_IDENTIFIER); - for (int i = 0; i < current_function->func.num_locals; i++) { - if (strcmp(current_function->func.locals[i]->name, token->value.as_string) == 0) { - return current_function->func.locals[i]; + for (i64 i = block_stack_count - 1; i >= 0; --i) { + Node *block = block_stack[i]; + for (int i = 0; i < block->block.num_locals; i++) { + if (strcmp(block->block.locals[i]->name, token->value.as_string) == 0) { + return block->block.locals[i]; + } } } return NULL; } +// TODO: rename this, it's ugly +void add_variable_to_current_block(Variable *var) +{ + // Set offset for variable + Node *cur_block = block_stack[block_stack_count - 1]; + var->offset = cur_stack_offset; + + int new_len = (cur_block->block.num_locals + 1); + int var_size = 8; // TODO: Compute sizes based on different types + + // Add to the block + // FIXME: Use a map here + cur_block->block.locals = realloc(cur_block->block.locals, sizeof(Variable *) * new_len); + cur_block->block.locals[cur_block->block.num_locals] = var; + cur_block->block.num_locals++; + + assert(current_function); + // Update current stack offset (w.r.t function stack frame) and block size + cur_stack_offset += var_size; + block_stack[block_stack_count-1]->block.locals_size += var_size; + + // Update function's max locals size + i64 max_offset = i64max(current_function->func.max_locals_size, cur_stack_offset); + current_function->func.max_locals_size = max_offset; + + assert(cur_stack_offset >= 0); + assert(block_stack_count > 0); +} + void Node_add_child(Node *parent, Node *child) { // TODO, use a vector @@ -112,23 +177,20 @@ Node *parse_var_declaration(Lexer *lexer) die_location(token.loc, "Variable declaration outside of function"); token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER); + // NOTE: We don't allow shadowing of variables in the any blocks, + // this is by design since it's a common mistake. if (find_local_variable(&token) != NULL) die_location(token.loc, "Variable `%s` already declared", token.value.as_string); Node *node = Node_new(AST_VARDECL); node->var_decl.var.name = token.value.as_string; - assert_token(Lexer_next(lexer), TOKEN_COLON); - node->var_decl.var.type = parse_type(lexer); - // Set offset for variable - node->var_decl.var.offset = current_function->func.cur_stack_offset; + token = Lexer_next(lexer); + if (token.type != TOKEN_COLON) + die_location(token.loc, "Missing type specifier for variable `%s`", node->var_decl.var.name); + node->var_decl.var.type = parse_type(lexer); - int new_len = (current_function->func.num_locals + 1); - int var_size = 8; // TODO: Compute sizes based on different types - current_function->func.locals = realloc(current_function->func.locals, sizeof(Variable *) * new_len); - current_function->func.locals[current_function->func.num_locals] = &node->var_decl.var; - current_function->func.num_locals++; - current_function->func.cur_stack_offset += var_size; + add_variable_to_current_block(&node->var_decl.var); token = Lexer_next(lexer); if (token.type == TOKEN_ASSIGN) { @@ -210,9 +272,27 @@ Node *parse_logical_and(Lexer *lexer) { BINOP_PARSER(parse_equality, is_logical_ bool is_logical_or_token(TokenType type) { return type == TOKEN_OR; } Node *parse_logical_or(Lexer *lexer) { BINOP_PARSER(parse_logical_and, is_logical_or_token); } +Node *parse_conditional_exp(Lexer *lexer) +{ + Node *expr = parse_logical_or(lexer); + Token token = Lexer_peek(lexer); + if (token.type == TOKEN_QUESTION) { + Lexer_next(lexer); + Node *then_expr = parse_expression(lexer); + assert_token(Lexer_next(lexer), TOKEN_COLON); + Node *else_expr = parse_expression(lexer); + Node *conditional = Node_new(AST_CONDITIONAL); + conditional->conditional.cond = expr; + conditional->conditional.do_then = then_expr; + conditional->conditional.do_else = else_expr; + expr = conditional; + } + return expr; +} + Node *parse_expression(Lexer *lexer) { - Node *node = parse_logical_or(lexer); + Node *node = parse_conditional_exp(lexer); // FIXME: This is a hack to handle assignment expressions // and can probably be done properly. if (node->type == AST_VAR) { @@ -228,6 +308,8 @@ Node *parse_expression(Lexer *lexer) return node; } +Node *parse_block(Lexer *lexer); + Node *parse_statement(Lexer *lexer) { Node *node; @@ -238,8 +320,22 @@ Node *parse_statement(Lexer *lexer) node = Node_new(AST_RETURN); node->unary_expr = parse_expression(lexer); assert_token(Lexer_next(lexer), TOKEN_SEMICOLON); - } else if (token.type == TOKEN_LET) { - return parse_var_declaration(lexer); + } else if (token.type == TOKEN_IF) { + Lexer_next(lexer); + node = Node_new(AST_IF); + assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN); + node->conditional.cond = parse_expression(lexer); + assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN); + // TODO: Allow blocks in here once implemented, currently + // we can onle have a single statement in the if/else + node->conditional.do_then = parse_statement(lexer); + token = Lexer_peek(lexer); + if (token.type == TOKEN_ELSE) { + Lexer_next(lexer); + node->conditional.do_else = parse_statement(lexer); + } + } else if (token.type == TOKEN_OPEN_BRACE) { + node = parse_block(lexer); } else { // Default to trying to handle it as an expression node = parse_expression(lexer); @@ -251,14 +347,28 @@ Node *parse_statement(Lexer *lexer) Node *parse_block(Lexer *lexer) { + assert_token(Lexer_next(lexer), TOKEN_OPEN_BRACE); + Node *block = Node_new(AST_BLOCK); - block->block.num_children = 0; - Token token; + Token token = Lexer_peek(lexer); + + block_stack_push(block); - while ((token = Lexer_peek(lexer)).type != TOKEN_CLOSE_BRACE) { - Node_add_child(block, parse_statement(lexer)); + while (token.type != TOKEN_CLOSE_BRACE) { + Node *block_item; + if (token.type == TOKEN_LET) { + block_item = parse_var_declaration(lexer); + } else { + // Default to a statement + block_item = parse_statement(lexer); + } + Node_add_child(block, block_item); + token = Lexer_peek(lexer); } + block_stack_pop(); + + assert_token(Lexer_next(lexer), TOKEN_CLOSE_BRACE); return block; } @@ -286,9 +396,11 @@ Node *parse_func(Lexer *lexer) // No return type, void fn. func->func.return_type = (Type){.type = TYPE_NONE}; } - assert_token(Lexer_next(lexer), TOKEN_OPEN_BRACE); + + // Make sure there's no funny business with the stack offet + assert(cur_stack_offset == 0); func->func.body = parse_block(lexer); - assert_token(Lexer_next(lexer), TOKEN_CLOSE_BRACE); + assert(cur_stack_offset == 0); return func; } @@ -2,10 +2,11 @@ set -e ./compile.sh -set +e for i in `ls tests/*.sh | grep -v common.sh` do echo "Running $i" bash $i -done
\ No newline at end of file +done + +set +e
\ No newline at end of file diff --git a/tests/common.sh b/tests/common.sh index dd64ade..80e0bea 100644 --- a/tests/common.sh +++ b/tests/common.sh @@ -13,6 +13,7 @@ function assert_exit_status() { set +e ./a.out res=$? + set -e if [ $res -ne $2 ] then echo "" @@ -22,11 +23,28 @@ function assert_exit_status() { echo "$1" exit 1 fi - set -e echo -n "." } function assert_exit_status_stdin() { code=$(</dev/stdin) assert_exit_status "$code" $1 +} + +function assert_compile_failure_stdin() { + code=$(</dev/stdin) + set +e + ./cupcc -c "$code" >/dev/null 2>&1 + res=$? + set -e + if [ $res -eq 0 ] + then + echo "" + echo "----------------------------------------------" + echo "Test failed: expected compilation, got success" + echo "----------------------------------------------" + echo "$code" + exit 1 + fi + echo -n "." }
\ No newline at end of file diff --git a/tests/compound.sh b/tests/compound.sh new file mode 100755 index 0000000..cb49fa0 --- /dev/null +++ b/tests/compound.sh @@ -0,0 +1,161 @@ +#!/bin/bash + +# Test compound scopes + +. tests/common.sh + +set -e + +echo -n "- Nested Blocks: " +assert_exit_status_stdin 3 <<EOF +fn main() { + let x: int = 1; + { + let y: int = 3; + x = y; + } + return x; +} +EOF + +assert_exit_status_stdin 15 <<EOF +fn main(): int { + let x: int; + { + let y: int = 10; + { + let z: int = 5; + x = y + z; + } + } + return x; +} +EOF + +assert_exit_status_stdin 8 <<EOF +fn main(): int { + let x: int = 0; + { + let y: int = 3; + x = x + y; + } + { + let y: int = 5; + x = x + y; + } + return x; +} +EOF + +assert_compile_failure_stdin <<EOF +fn main(): int { + let x: int; + { + let y: int = 10; + { + let z: int = 5; + x = y + z; + } + y = z; + } + return x; +} +EOF + +assert_compile_failure_stdin <<EOF +fn main(): int { + let x: int; + { + let y: int = 10; + { + let z: int = 5; + x = y + z; + } + } + x = z; + return x; +} +EOF + +assert_compile_failure_stdin <<EOF +fn main(): int { + let x: int; + { + let y: int = 10; + { + let z: int = 5; + x = y + z; + } + } + x = y; + return x; +} +EOF + +# We do not allow shadowing variables on purpose +assert_compile_failure_stdin <<EOF +fn main(): int { + let x: int; + { + let y: int = 10; + { + let z: int = 5; + x = y + z; + let x: int = 10; + } + } + return x; +} +EOF +echo " OK" + +echo -n "- Conditionals w/ blocks: " +assert_exit_status_stdin 3 <<EOF +fn main() { + let x: int = 1; + if (x == 1) { + let y: int = 3; + x = y; + } + return x; +} +EOF + +assert_exit_status_stdin 1 <<EOF +fn main() { + let x: int = 1; + if (x != 1) { + let y: int = 3; + x = y; + } + return x; +} +EOF + +assert_exit_status_stdin 5 <<EOF +fn main() { + let x: int = 1; + if (x != 1) { + let y: int = 3; + x = y; + } else { + let y: int = 5; + x = y; + } + return x; +} +EOF + +assert_compile_failure_stdin <<EOF +fn main() { + let x: int = 1; + if (x != 1) { + let y: int = 3; + x = y; + } + x = y; // Invalid + return x; +} +EOF + +echo " OK"
\ No newline at end of file diff --git a/tests/conditions.sh b/tests/conditions.sh new file mode 100755 index 0000000..be80d96 --- /dev/null +++ b/tests/conditions.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +. tests/common.sh + +set -e + +echo -n "- Conditionals: " +assert_exit_status 'fn main() { return 1 ? 5 : 10; }' 5 +assert_exit_status 'fn main() { return 0 ? 5 : 10; }' 10 +assert_exit_status 'fn main() { return 1 < 2 ? 10 : 20; }' 10 + +assert_exit_status_stdin 5 <<EOF +fn main() { + let flag: int = 1; + let a: int; + flag ? a = 5 : a = 10; + return a; +} +EOF + +assert_exit_status_stdin 10 <<EOF +fn main() { + let flag: int = 0; + let a: int; + flag ? a = 5 : a = 10; + return a; +} +EOF +echo " OK" + +echo -n "- If statement: " +assert_exit_status_stdin 10 <<EOF +fn main() { + if (5 < 20) return 10; + return 3; +} +EOF + +assert_exit_status_stdin 3 <<EOF +fn main() { + if (5 > 20) return 10; + return 3; +} +EOF + +assert_exit_status_stdin 20 <<EOF +fn main() { + let x: int; + if (0) + x = 3; + else + x = 20; + return x; +} +EOF + +assert_exit_status_stdin 3 <<EOF +fn main() { + let x: int; + if (1) + x = 3; + return x; +} +EOF +echo " OK"
\ No newline at end of file diff --git a/tests/variables.sh b/tests/variables.sh index 0c88784..7b44fe7 100755 --- a/tests/variables.sh +++ b/tests/variables.sh @@ -29,7 +29,6 @@ EOF echo " OK" echo -n "- Multiple variable: " - assert_exit_status_stdin 2 <<EOF fn main() { let x: int = 1; @@ -59,4 +58,46 @@ fn main() { } EOF +assert_exit_status_stdin 18 <<EOF +fn main() { + let x: int = 5; + let y: int; + let z: int = (y = x + 3) + 2; + return z + y; +} +EOF +echo " OK" + +echo -n "- Short-circuiting assignments: " +assert_exit_status_stdin 5 <<EOF +fn main() { + let x: int = 5; + let y: int = (1 || (x = 10)); + return x; +} +EOF + +assert_exit_status_stdin 10 <<EOF +fn main() { + let x: int = 5; + let y: int = (0 || (x = 10)); + return x; +} +EOF + +assert_exit_status_stdin 5 <<EOF +fn main() { + let x: int = 5; + let y: int = (0 && (x = 10)); + return x; +} +EOF + +assert_exit_status_stdin 10 <<EOF +fn main() { + let x: int = 5; + let y: int = (1 && (x = 10)); + return x; +} +EOF echo " OK" |