diff options
| author | Mustafa Quraish <[email protected]> | 2022-01-31 01:26:07 -0500 |
|---|---|---|
| committer | Mustafa Quraish <[email protected]> | 2022-01-31 01:26:07 -0500 |
| commit | cb0d77c3dc83d6c7cb39642e8708b12a29b7b8c7 (patch) | |
| tree | 904a6c70509da3f9274bb2df8e214f2b9e6fd9ac /src | |
| parent | Put tokens in their own macro to allow looping over them (diff) | |
| download | cup-cb0d77c3dc83d6c7cb39642e8708b12a29b7b8c7.tar.xz cup-cb0d77c3dc83d6c7cb39642e8708b12a29b7b8c7.zip | |
Add basic builtin-function support
This isn't really super extendible for now, but it's a start and gives
us the `print` builtin which allows us to finally actually print out
values to the screen, so we can move away from testing with exit codes
eventually.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.h | 1 | ||||
| -rw-r--r-- | src/generator.c | 101 | ||||
| -rw-r--r-- | src/parser.c | 91 |
3 files changed, 158 insertions, 35 deletions
@@ -33,6 +33,7 @@ F(AST_VAR, "variable") \ F(AST_RETURN, "return") \ F(AST_FUNC, "func") \ + F(AST_BUILTIN, "builtin") \ F(AST_PROGRAM, "program") \ F(AST_BLOCK, "block statements") diff --git a/src/generator.c b/src/generator.c index 6a95d0c..5b1f630 100644 --- a/src/generator.c +++ b/src/generator.c @@ -12,6 +12,14 @@ static int label_counter = 0; static Node *current_function = NULL; +void make_syscall(i64 syscall_no, FILE *out) { +#if __APPLE__ + syscall_no += 0x2000000; +#endif + fprintf(out, " mov rax, %lld\n", syscall_no); + fprintf(out, " syscall\n"); +} + void generate_expr_into_rax(Node *expr, FILE *out); void generate_func_call(Node *node, FILE *out) @@ -319,8 +327,17 @@ void generate_function(Node *func, FILE *out) current_function = func; generate_function_header(func, out); generate_block(func->func.body, out); + // TODO: This is a hack, we should make sure a function contains a return statement + // if it says it's going to return something + fprintf(out, " mov rsp, rbp\n"); + fprintf(out, " pop rbp\n"); + // Return 0 by default if we don't have a return statement + fprintf(out, " mov qword rax, 0\n"); + fprintf(out, " ret\n"); } +void generate_builtins(FILE *out); + void generate_asm(Node *root, FILE *out) { assert(root->type == AST_PROGRAM); @@ -343,13 +360,85 @@ void generate_asm(Node *root, FILE *out) #endif fprintf(out, " call main\n"); -#if __APPLE__ - fprintf(out, " ret\n"); -#else fprintf(out, " mov rdi, rax\n"); - fprintf(out, " mov rax, %d\n", SYS_exit); - fprintf(out, " syscall\n"); -#endif + make_syscall(SYS_exit, out); // TODO: Add implementations of some primitives? + generate_builtins(out); +} + +void generate_builtins(FILE *out) +{ + // Stolen shamelessly from tsoding/porth: + // https://gitlab.com/tsoding/porth + fprintf(out, + "print:\n" + " mov rdi, [rsp+8]\n" + " mov r9, -3689348814741910323\n" + " sub rsp, 40\n" + " mov BYTE [rsp+31], 10\n" + " lea rcx, [rsp+30]\n" + " mov qword rbx, 0\n" + + // Check if < 0, and set rbx=0, negate value + // " cmp rdi, 0\n" + // " jge .L2\n" + // " mov qword rbx, 1\n" + // " neg rdi\n" + // " sub rcx, 1\n" + + ".L2:\n" + " mov rax, rdi\n" + " lea r8, [rsp+32]\n" + " mul r9\n" + " mov rax, rdi\n" + " sub r8, rcx\n" + " shr rdx, 3\n" + " lea rsi, [rdx+rdx*4]\n" + " add rsi, rsi\n" + " sub rax, rsi\n" + " add eax, 48\n" + " mov BYTE [rcx], al\n" + " mov rax, rdi\n" + " mov rdi, rdx\n" + " mov rdx, rcx\n" + " sub rcx, 1\n" + " cmp rax, 9\n" + " ja .L2\n" + + // If rbx=1, then we need to add a minus sign, not sure how + // the above code works so there's probably a nicer way. + // " cmp rbx, 0\n" + // " je .end_neg_sign\n" + // " add eax, 48\n" + // " mov BYTE [rcx], 45\n" + // " mov rax, rdi\n" + // " mov rdi, rdx\n" + // " mov rdx, rcx\n" + // " sub rcx, 1\n" + // ".end_neg_sign:\n" + + " lea rax, [rsp+32]\n" + " mov edi, 1\n" + " sub rdx, rax\n" + " xor eax, eax\n" + " lea rsi, [rsp+32+rdx]\n" + " mov rdx, r8\n" + ); + make_syscall(SYS_write, out); + fprintf(out, " add rsp, 40\n"); + fprintf(out, " ret\n"); + + ///////////////////////////////////////////////////////////////// + + // Print out a single character + fprintf(out, + "putc:\n" + " mov rdi, 1\n" // stdout + " mov rsi, rsp\n" + " add rsi, 8\n" + " mov rdx, 1\n" // 1 byte + ); + make_syscall(SYS_write, out); + fprintf(out, " ret\n"); }
\ No newline at end of file diff --git a/src/parser.c b/src/parser.c index cfb5d97..2c11656 100644 --- a/src/parser.c +++ b/src/parser.c @@ -33,6 +33,21 @@ Token do_assert_token(Token token, TokenType type, char *filename, int line) * Some helpers */ +void Node_add_child(Node *parent, Node *child) +{ + // TODO, use a vector + parent->block.children = realloc(parent->block.children, sizeof(Node *) * (parent->block.num_children + 1)); + parent->block.children[parent->block.num_children] = child; + parent->block.num_children++; +} + +Node *Node_new(NodeType type) +{ + Node *self = calloc(sizeof(Node), 1); + self->type = type; + return self; +} + NodeType binary_token_to_op(TokenType type) { switch (type) @@ -53,7 +68,7 @@ NodeType binary_token_to_op(TokenType type) case TOKEN_LEQ: return OP_LEQ; case TOKEN_GT: return OP_GT; case TOKEN_GEQ: return OP_GEQ; - + default: assert(false && "binary_token_to_op called with invalid token type"); } } @@ -85,6 +100,33 @@ void dump_block_stack() } } +Node *builtin_print; +Node *builtin_putc; + +void initialize_builtins() +{ + builtin_print = Node_new(AST_BUILTIN); + builtin_print->func.name = "print"; + builtin_print->func.return_type = (Type){TYPE_INT,0}; + builtin_print->func.num_args = 1; + builtin_print->func.args = (Variable *)calloc(sizeof(Variable), 1); + builtin_print->func.args[0] = (Variable){"val", (Type){TYPE_INT,0}, 0}; + + builtin_putc = Node_new(AST_BUILTIN); + builtin_putc->func.name = "putc"; + builtin_putc->func.return_type = (Type){TYPE_INT,0}; + builtin_putc->func.num_args = 1; + builtin_putc->func.args = (Variable *)calloc(sizeof(Variable), 2); + builtin_putc->func.args[0] = (Variable){"arg", (Type){TYPE_INT,0}, 0}; +} + +Node *find_builtin_function(Token *token) +{ + if (strcmp(token->value.as_string, "putc") == 0) { return builtin_putc; } + if (strcmp(token->value.as_string, "print") == 0) { return builtin_print; } + return NULL; +} + Variable *find_local_variable(Token *token) { assert_token(*token, TOKEN_IDENTIFIER); @@ -126,7 +168,7 @@ void add_variable_to_current_block(Variable *var) int new_len = (cur_block->block.num_locals + 1); int var_size = 8; // TODO: Compute sizes based on different types - + // Add to the block // FIXME: Use a map here cur_block->block.locals = realloc(cur_block->block.locals, sizeof(Variable *) * new_len); @@ -137,7 +179,7 @@ void add_variable_to_current_block(Variable *var) // Update current stack offset (w.r.t function stack frame) and block size cur_stack_offset += var_size; block_stack[block_stack_count-1]->block.locals_size += var_size; - + // Update function's max locals size i64 max_offset = i64max(current_function->func.max_locals_size, cur_stack_offset); current_function->func.max_locals_size = max_offset; @@ -146,21 +188,6 @@ void add_variable_to_current_block(Variable *var) assert(block_stack_count > 0); } -void Node_add_child(Node *parent, Node *child) -{ - // TODO, use a vector - parent->block.children = realloc(parent->block.children, sizeof(Node *) * (parent->block.num_children + 1)); - parent->block.children[parent->block.num_children] = child; - parent->block.num_children++; -} - -Node *Node_new(NodeType type) -{ - Node *self = calloc(sizeof(Node), 1); - self->type = type; - return self; -} - Type parse_type(Lexer *lexer) { Type type = {0}; @@ -197,13 +224,13 @@ Node *parse_var_declaration(Lexer *lexer) // TODO: Reuse this for globals? Or maybe just make a new function? if (!current_function || current_function->type != AST_FUNC) die_location(token.loc, "Variable declaration outside of function"); - + token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER); - // NOTE: We don't allow shadowing of variables in the any blocks, + // NOTE: We don't allow shadowing of variables in the any blocks, // this is by design since it's a common mistake. if (find_local_variable(&token) != NULL) die_location(token.loc, "Variable `%s` already declared", token.value.as_string); - + Node *node = Node_new(AST_VARDECL); node->var_decl.var.name = token.value.as_string; @@ -239,7 +266,7 @@ Node *parse_function_call_args(Lexer *lexer, Node *func) int new_size = call->call.num_args + 1; call->call.args = realloc(call->call.args, sizeof(Node *) * new_size); call->call.args[call->call.num_args++] = arg; - + if (new_size > func->func.num_args) die_location(identifier.loc, "Too many arguments to function `%s`", func->func.name); @@ -280,22 +307,27 @@ Node *parse_factor(Lexer *lexer) assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN); } else if (token.type == TOKEN_INTLIT) { expr = parse_literal(lexer); - } else if (token.type == TOKEN_IDENTIFIER) { + } else if (token.type == TOKEN_IDENTIFIER) { // TODO: Check for global variables when added - Variable *var = find_local_variable(&token); + Variable *var = find_local_variable(&token); if (var != NULL) { Lexer_next(lexer); expr = Node_new(AST_VAR); expr->variable = var; return expr; } - + Node *func = find_function_definition(&token); if (func != NULL) { return parse_function_call_args(lexer, func); } + Node *builtin = find_builtin_function(&token); + if (builtin != NULL) { + return parse_function_call_args(lexer, builtin); + } + die_location(token.loc, "Unknown identifier `%s`", token.value.as_string); expr = NULL; } else { @@ -410,7 +442,7 @@ Node *parse_statement(Lexer *lexer) Lexer_next(lexer); node = Node_new(AST_FOR); assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN); - + // All of the expressions in the for loop are optional // TODO: Allow this to be a declaration, need to inject @@ -482,7 +514,7 @@ void parse_func_args(Lexer *lexer, Node *func) token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER); // TODO: Check for shadowing with globals assert_token(Lexer_next(lexer), TOKEN_COLON); - Type type = parse_type(lexer); + Type type = parse_type(lexer); i64 new_count = func->func.num_args + 1; func->func.args = realloc(func->func.args, sizeof(Variable) * new_count); @@ -499,10 +531,10 @@ void parse_func_args(Lexer *lexer, Node *func) assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN); // Set the offsets for the arguments - + // IMPORTANT: We want to skip the saved ret_addr+old_rbp that we // pushed on the stack. Each of these is 8 bytes. - int offset = -16; + int offset = -16; for (int i = 0; i < func->func.num_args; i++) { Variable *var = &func->func.args[i]; var->offset = offset; @@ -546,6 +578,7 @@ Node *parse_func(Lexer *lexer) Node *parse_program(Lexer *lexer) { + initialize_builtins(); Node *program = Node_new(AST_PROGRAM); Token token; while ((token = Lexer_peek(lexer)).type != TOKEN_EOF) { |