diff options
| author | Mustafa Quraish <[email protected]> | 2022-01-31 21:20:16 -0500 |
|---|---|---|
| committer | Mustafa Quraish <[email protected]> | 2022-01-31 21:20:16 -0500 |
| commit | 18aafac383e3180a8d57e1ed88470d94b1477a06 (patch) | |
| tree | 4212ed36cd4d2a3b28603147a5d037674126cbaf | |
| parent | Add .gitattributes to try to highlight code as Rust (diff) | |
| download | cup-18aafac383e3180a8d57e1ed88470d94b1477a06.tar.xz cup-18aafac383e3180a8d57e1ed88470d94b1477a06.zip | |
Global variables now supported! + some fixes to OP_ASSIGN
Previously we weren't creating a new assignment node, and this was
causing all sorts of funky errors. This commit also fixes that, and
we can now use global variables :^)
| -rw-r--r-- | src/ast.c | 2 | ||||
| -rw-r--r-- | src/ast.h | 5 | ||||
| -rw-r--r-- | src/generator.c | 56 | ||||
| -rw-r--r-- | src/parser.c | 72 | ||||
| -rwxr-xr-x | tests/variables.sh | 85 |
5 files changed, 157 insertions, 63 deletions
@@ -109,7 +109,7 @@ static void do_print_ast(Node *node, int depth) printf("%s\n", node_type_to_str(node->type)); do_print_ast(node->binary.left, depth + 1); do_print_ast(node->binary.right, depth + 1); - } else if (node->type == AST_VAR) { + } else if (node->type == AST_LOCAL_VAR) { assert(node->variable && node->variable->name); printf("%s\n", node->variable->name); } else if (node->type == AST_VARDECL) { @@ -30,7 +30,8 @@ F(AST_WHILE, "while statement") \ F(AST_FOR, "for statement") \ F(AST_VARDECL, "variable decl") \ - F(AST_VAR, "variable") \ + F(AST_LOCAL_VAR, "local variable") \ + F(AST_GLOBAL_VAR, "global variable") \ F(AST_RETURN, "return") \ F(AST_FUNC, "func") \ F(AST_BUILTIN, "builtin") \ @@ -120,7 +121,7 @@ typedef struct ast_node { } var_decl; struct { - Variable *var; + Node *var; Node *value; } assign; diff --git a/src/generator.c b/src/generator.c index b141dce..c7a50bc 100644 --- a/src/generator.c +++ b/src/generator.c @@ -51,17 +51,33 @@ void generate_expr_into_rax(Node *expr, FILE *out) } else if (expr->type == AST_FUNCCALL) { generate_func_call(expr, out); - } else if (expr->type == AST_VAR) { + } else if (expr->type == AST_LOCAL_VAR) { i64 offset = expr->variable->offset; if (offset > 0) fprintf(out, " mov rax, [rbp-%lld]\n", offset); else fprintf(out, " mov rax, [rbp+%lld]\n", -offset); + } else if (expr->type == AST_GLOBAL_VAR) { + i64 offset = expr->variable->offset; + fprintf(out, " mov rax, global_vars\n"); + fprintf(out, " add rax, %lld\n", offset); + fprintf(out, " mov rax, [rax]\n"); + } else if (expr->type == OP_ASSIGN) { - i64 offset = expr->assign.var->offset; + Node *var = expr->assign.var; + i64 offset = var->variable->offset; generate_expr_into_rax(expr->assign.value, out); - fprintf(out, " mov [rbp-%lld], rax\n", offset); + + if (var->type == AST_LOCAL_VAR) { + fprintf(out, " mov [rbp-%lld], rax\n", offset); + } else if (var->type == AST_GLOBAL_VAR) { + fprintf(out, " mov rbx, global_vars\n"); + fprintf(out, " mov [rbx+%lld], rax\n", offset); + } else { + fprintf(stderr, "Unhandled assignment type: %s\n", node_type_to_str(var->type)); + exit(1); + } } else if (expr->type == OP_NEG) { generate_expr_into_rax(expr->unary_expr, out); @@ -342,10 +358,13 @@ void generate_asm(Node *root, FILE *out) { assert(root->type == AST_PROGRAM); for (int i = 0; i < root->block.num_children; i++) { - if (root->block.children[i]->type == AST_FUNC) { - generate_function(root->block.children[i], out); + Node *child = root->block.children[i]; + if (child->type == AST_FUNC) { + generate_function(child, out); + } else if (child->type == AST_VARDECL) { + // Do nothing, we don't need to generate global variables } else { - fprintf(stderr, "Unsupported node type in generate_asm: %s\n", node_type_to_str(root->block.children[i]->type)); + fprintf(stderr, "Unsupported node type in generate_asm: %s\n", node_type_to_str(child->type)); exit(1); } } @@ -363,8 +382,12 @@ void generate_asm(Node *root, FILE *out) fprintf(out, " mov rdi, rax\n"); make_syscall(SYS_exit, out); + // TODO: Don't generate code for functions that cannot get called. // TODO: Add implementations of some primitives? generate_builtins(out); + + fprintf(out, "section .bss\n"); + fprintf(out, " global_vars: resb %lld\n", root->block.locals_size); } void generate_builtins(FILE *out) @@ -379,14 +402,6 @@ void generate_builtins(FILE *out) " mov BYTE [rsp+31], 10\n" " lea rcx, [rsp+30]\n" " mov qword rbx, 0\n" - - // Check if < 0, and set rbx=0, negate value - // " cmp rdi, 0\n" - // " jge .L2\n" - // " mov qword rbx, 1\n" - // " neg rdi\n" - // " sub rcx, 1\n" - ".L2:\n" " mov rax, rdi\n" " lea r8, [rsp+32]\n" @@ -405,19 +420,6 @@ void generate_builtins(FILE *out) " sub rcx, 1\n" " cmp rax, 9\n" " ja .L2\n" - - // If rbx=1, then we need to add a minus sign, not sure how - // the above code works so there's probably a nicer way. - // " cmp rbx, 0\n" - // " je .end_neg_sign\n" - // " add eax, 48\n" - // " mov BYTE [rcx], 45\n" - // " mov rax, rdi\n" - // " mov rdi, rdx\n" - // " mov rdx, rcx\n" - // " sub rcx, 1\n" - // ".end_neg_sign:\n" - " lea rax, [rsp+32]\n" " mov edi, 1\n" " sub rdx, rax\n" diff --git a/src/parser.c b/src/parser.c index 4dc891d..a0ec7a1 100644 --- a/src/parser.c +++ b/src/parser.c @@ -15,11 +15,16 @@ static Node *block_stack[BLOCK_STACK_SIZE]; static i64 block_stack_count = 0; static i64 cur_stack_offset = 0; +// TODO: Probably use a vector here +#define GLOBAL_VARS_SIZE 1024 +static Variable *global_vars[GLOBAL_VARS_SIZE]; +static i64 global_vars_count = 0; +static i64 global_vars_offset = 0; + #define LEXER_STACK_SIZE 64 static Lexer *lexer_stack[LEXER_STACK_SIZE]; static i64 lexer_stack_count = 0; - Token do_assert_token(Token token, TokenType type, char *filename, int line) { if (token.type != type) { @@ -133,6 +138,9 @@ Node *find_builtin_function(Token *token) Variable *find_local_variable(Token *token) { + if (current_function == NULL) + return NULL; + assert_token(*token, TOKEN_IDENTIFIER); for (i64 i = block_stack_count - 1; i >= 0; --i) { Node *block = block_stack[i]; @@ -151,6 +159,16 @@ Variable *find_local_variable(Token *token) return NULL; } +Variable *find_global_variable(Token *token) +{ + for (int i = 0; i < global_vars_count; i++) { + if (strcmp(global_vars[i]->name, token->value.as_string) == 0) { + return global_vars[i]; + } + } + return NULL; +} + Node *find_function_definition(Token *token) { assert_token(*token, TOKEN_IDENTIFIER); @@ -163,6 +181,15 @@ Node *find_function_definition(Token *token) return NULL; } +void add_global_variable(Variable *var) +{ + var->offset = global_vars_offset; + // TODO: Compute based on type + int var_size = 8; + global_vars_offset += var_size; + global_vars[global_vars_count++] = var; +} + // TODO: rename this, it's ugly void add_variable_to_current_block(Variable *var) { @@ -224,16 +251,16 @@ Node *parse_expression(Lexer *); Node *parse_var_declaration(Lexer *lexer) { + bool is_global = (current_function == NULL); Token token = assert_token(Lexer_next(lexer), TOKEN_LET); - // TODO: Reuse this for globals? Or maybe just make a new function? - if (!current_function || current_function->type != AST_FUNC) - die_location(token.loc, "Variable declaration outside of function"); token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER); // NOTE: We don't allow shadowing of variables in the any blocks, // this is by design since it's a common mistake. if (find_local_variable(&token) != NULL) - die_location(token.loc, "Variable `%s` already declared", token.value.as_string); + die_location(token.loc, "Variable `%s` already declared in function", token.value.as_string); + if (find_global_variable(&token) != NULL) + die_location(token.loc, "Variable `%s` already declared globally", token.value.as_string); Node *node = Node_new(AST_VARDECL); node->var_decl.var.name = token.value.as_string; @@ -243,10 +270,16 @@ Node *parse_var_declaration(Lexer *lexer) die_location(token.loc, "Missing type specifier for variable `%s`", node->var_decl.var.name); node->var_decl.var.type = parse_type(lexer); - add_variable_to_current_block(&node->var_decl.var); + if (is_global) { + add_global_variable(&node->var_decl.var); + } else { + add_variable_to_current_block(&node->var_decl.var); + } token = Lexer_next(lexer); if (token.type == TOKEN_ASSIGN) { + if (is_global) + die_location(token.loc, "Cannot initialize global variable `%s` outside function", node->var_decl.var.name); node->var_decl.value = parse_expression(lexer); assert_token(Lexer_next(lexer), TOKEN_SEMICOLON); } else { @@ -317,11 +350,19 @@ Node *parse_factor(Lexer *lexer) Variable *var = find_local_variable(&token); if (var != NULL) { Lexer_next(lexer); - expr = Node_new(AST_VAR); + expr = Node_new(AST_LOCAL_VAR); expr->variable = var; return expr; } + Variable *gvar = find_global_variable(&token); + if (gvar != NULL) { + Lexer_next(lexer); + expr = Node_new(AST_GLOBAL_VAR); + expr->variable = gvar; + return expr; + } + Node *func = find_function_definition(&token); if (func != NULL) { return parse_function_call_args(lexer, func); @@ -396,14 +437,14 @@ Node *parse_expression(Lexer *lexer) Node *node = parse_conditional_exp(lexer); // FIXME: This is a hack to handle assignment expressions // and can probably be done properly. - if (node->type == AST_VAR) { + if (node->type == AST_LOCAL_VAR || node->type == AST_GLOBAL_VAR) { Token token = Lexer_peek(lexer); if (token.type == TOKEN_ASSIGN) { Lexer_next(lexer); - Variable *var = node->variable; - node->type = OP_ASSIGN; - node->assign.var = var; - node->assign.value = parse_expression(lexer); + Node *assign = Node_new(OP_ASSIGN); + assign->assign.var = node; + assign->assign.value = parse_expression(lexer); + node = assign; } } return node; @@ -577,6 +618,9 @@ Node *parse_func(Lexer *lexer) assert(block_stack_count == 0); assert(cur_stack_offset == 0); + // Reset current function + current_function = NULL; + return func; } @@ -607,6 +651,9 @@ Node *parse_program(Lexer *lexer) if (token.type == TOKEN_FN) { Node *func = parse_func(lexer); Node_add_child(program, func); + } else if (token.type == TOKEN_LET) { + Node *var_decl = parse_var_declaration(lexer); + Node_add_child(program, var_decl); } else if (token.type == TOKEN_IMPORT) { // TODO: Handle circular imports // TODO: Handle complex import graphs (#pragma once) @@ -629,5 +676,6 @@ Node *parse_program(Lexer *lexer) token = Lexer_peek(lexer); } } + program->block.locals_size = global_vars_offset; return program; }
\ No newline at end of file diff --git a/tests/variables.sh b/tests/variables.sh index 2d08c76..ffa66c3 100755 --- a/tests/variables.sh +++ b/tests/variables.sh @@ -10,19 +10,19 @@ assert_exit_status 'fn main() { let x: int = 45; return x; }' 45 assert_exit_status 'fn main() { let x: int = 45; return x+x; }' 90 assert_exit_status_stdin 5 <<EOF -fn main() { +fn main() { let x: int; x = 3; x = 5; - return x; + return x; } EOF assert_exit_status_stdin 5 <<EOF -fn main() { +fn main() { let x: int = 3; x = x + x - 1; - return x; + return x; } EOF @@ -30,36 +30,36 @@ echo " OK" echo -n "- Multiple variable: " assert_exit_status_stdin 2 <<EOF -fn main() { +fn main() { let x: int = 1; let y: int = x + x; - return y; + return y; } EOF assert_exit_status_stdin 23 <<EOF -fn main() { +fn main() { let x: int = 1; let y: int = x + x; let z: int = y + y; let w: int = z + z; let r: int = w + w; - return r + x + y + z; + return r + x + y + z; } EOF assert_exit_status_stdin 2 <<EOF -fn main() { +fn main() { let x: int = 1; let y: int = x + x; y = y + x; x = (x + x) * y; - return x / y; + return x / y; } EOF assert_exit_status_stdin 18 <<EOF -fn main() { +fn main() { let x: int = 5; let y: int; let z: int = (y = x + 3) + 2; @@ -68,15 +68,58 @@ fn main() { EOF echo " OK" +echo -n "- Global variables: " +assert_exit_status_stdin 18 <<EOF +let g: int; +fn main() { + g = 18; + return g; +} +EOF + +assert_exit_status_stdin 18 <<EOF +let g: int; +let h: int; +fn main() { + g = 18; + h = g + g; + return h - g; +} +EOF + +assert_exit_status_stdin 18 <<EOF +let g: int; +let h: int; + +fn test() { + g = 18; + h = g + g; +} + +fn main() { + test(); + return h - g; +} +EOF + +assert_compile_failure_stdin <<EOF +let g: int = 0; + +fn main() { + return g; +} +EOF +echo " OK" + echo -n "- Nested Blocks: " assert_exit_status_stdin 3 <<EOF -fn main() { +fn main() { let x: int = 1; { let y: int = 3; x = y; } - return x; + return x; } EOF @@ -173,29 +216,29 @@ echo " OK" echo -n "- Conditionals w/ blocks: " assert_exit_status_stdin 3 <<EOF -fn main() { +fn main() { let x: int = 1; if (x == 1) { let y: int = 3; x = y; } - return x; + return x; } EOF assert_exit_status_stdin 1 <<EOF -fn main() { +fn main() { let x: int = 1; if (x != 1) { let y: int = 3; x = y; } - return x; + return x; } EOF assert_exit_status_stdin 5 <<EOF -fn main() { +fn main() { let x: int = 1; if (x != 1) { let y: int = 3; @@ -204,19 +247,19 @@ fn main() { let y: int = 5; x = y; } - return x; + return x; } EOF assert_compile_failure_stdin <<EOF -fn main() { +fn main() { let x: int = 1; if (x != 1) { let y: int = 3; x = y; } x = y; // Invalid - return x; + return x; } EOF |