aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMustafa Quraish <[email protected]>2022-01-31 01:26:07 -0500
committerMustafa Quraish <[email protected]>2022-01-31 01:26:07 -0500
commitcb0d77c3dc83d6c7cb39642e8708b12a29b7b8c7 (patch)
tree904a6c70509da3f9274bb2df8e214f2b9e6fd9ac /src
parentPut tokens in their own macro to allow looping over them (diff)
downloadcup-cb0d77c3dc83d6c7cb39642e8708b12a29b7b8c7.tar.xz
cup-cb0d77c3dc83d6c7cb39642e8708b12a29b7b8c7.zip
Add basic builtin-function support
This isn't really super extendible for now, but it's a start and gives us the `print` builtin which allows us to finally actually print out values to the screen, so we can move away from testing with exit codes eventually.
Diffstat (limited to 'src')
-rw-r--r--src/ast.h1
-rw-r--r--src/generator.c101
-rw-r--r--src/parser.c91
3 files changed, 158 insertions, 35 deletions
diff --git a/src/ast.h b/src/ast.h
index 22a7283..0413f79 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -33,6 +33,7 @@
F(AST_VAR, "variable") \
F(AST_RETURN, "return") \
F(AST_FUNC, "func") \
+ F(AST_BUILTIN, "builtin") \
F(AST_PROGRAM, "program") \
F(AST_BLOCK, "block statements")
diff --git a/src/generator.c b/src/generator.c
index 6a95d0c..5b1f630 100644
--- a/src/generator.c
+++ b/src/generator.c
@@ -12,6 +12,14 @@
static int label_counter = 0;
static Node *current_function = NULL;
+void make_syscall(i64 syscall_no, FILE *out) {
+#if __APPLE__
+ syscall_no += 0x2000000;
+#endif
+ fprintf(out, " mov rax, %lld\n", syscall_no);
+ fprintf(out, " syscall\n");
+}
+
void generate_expr_into_rax(Node *expr, FILE *out);
void generate_func_call(Node *node, FILE *out)
@@ -319,8 +327,17 @@ void generate_function(Node *func, FILE *out)
current_function = func;
generate_function_header(func, out);
generate_block(func->func.body, out);
+ // TODO: This is a hack, we should make sure a function contains a return statement
+ // if it says it's going to return something
+ fprintf(out, " mov rsp, rbp\n");
+ fprintf(out, " pop rbp\n");
+ // Return 0 by default if we don't have a return statement
+ fprintf(out, " mov qword rax, 0\n");
+ fprintf(out, " ret\n");
}
+void generate_builtins(FILE *out);
+
void generate_asm(Node *root, FILE *out)
{
assert(root->type == AST_PROGRAM);
@@ -343,13 +360,85 @@ void generate_asm(Node *root, FILE *out)
#endif
fprintf(out, " call main\n");
-#if __APPLE__
- fprintf(out, " ret\n");
-#else
fprintf(out, " mov rdi, rax\n");
- fprintf(out, " mov rax, %d\n", SYS_exit);
- fprintf(out, " syscall\n");
-#endif
+ make_syscall(SYS_exit, out);
// TODO: Add implementations of some primitives?
+ generate_builtins(out);
+}
+
+void generate_builtins(FILE *out)
+{
+ // Stolen shamelessly from tsoding/porth:
+ // https://gitlab.com/tsoding/porth
+ fprintf(out,
+ "print:\n"
+ " mov rdi, [rsp+8]\n"
+ " mov r9, -3689348814741910323\n"
+ " sub rsp, 40\n"
+ " mov BYTE [rsp+31], 10\n"
+ " lea rcx, [rsp+30]\n"
+ " mov qword rbx, 0\n"
+
+ // Check if < 0, and set rbx=0, negate value
+ // " cmp rdi, 0\n"
+ // " jge .L2\n"
+ // " mov qword rbx, 1\n"
+ // " neg rdi\n"
+ // " sub rcx, 1\n"
+
+ ".L2:\n"
+ " mov rax, rdi\n"
+ " lea r8, [rsp+32]\n"
+ " mul r9\n"
+ " mov rax, rdi\n"
+ " sub r8, rcx\n"
+ " shr rdx, 3\n"
+ " lea rsi, [rdx+rdx*4]\n"
+ " add rsi, rsi\n"
+ " sub rax, rsi\n"
+ " add eax, 48\n"
+ " mov BYTE [rcx], al\n"
+ " mov rax, rdi\n"
+ " mov rdi, rdx\n"
+ " mov rdx, rcx\n"
+ " sub rcx, 1\n"
+ " cmp rax, 9\n"
+ " ja .L2\n"
+
+ // If rbx=1, then we need to add a minus sign, not sure how
+ // the above code works so there's probably a nicer way.
+ // " cmp rbx, 0\n"
+ // " je .end_neg_sign\n"
+ // " add eax, 48\n"
+ // " mov BYTE [rcx], 45\n"
+ // " mov rax, rdi\n"
+ // " mov rdi, rdx\n"
+ // " mov rdx, rcx\n"
+ // " sub rcx, 1\n"
+ // ".end_neg_sign:\n"
+
+ " lea rax, [rsp+32]\n"
+ " mov edi, 1\n"
+ " sub rdx, rax\n"
+ " xor eax, eax\n"
+ " lea rsi, [rsp+32+rdx]\n"
+ " mov rdx, r8\n"
+ );
+ make_syscall(SYS_write, out);
+ fprintf(out, " add rsp, 40\n");
+ fprintf(out, " ret\n");
+
+ /////////////////////////////////////////////////////////////////
+
+ // Print out a single character
+ fprintf(out,
+ "putc:\n"
+ " mov rdi, 1\n" // stdout
+ " mov rsi, rsp\n"
+ " add rsi, 8\n"
+ " mov rdx, 1\n" // 1 byte
+ );
+ make_syscall(SYS_write, out);
+ fprintf(out, " ret\n");
} \ No newline at end of file
diff --git a/src/parser.c b/src/parser.c
index cfb5d97..2c11656 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -33,6 +33,21 @@ Token do_assert_token(Token token, TokenType type, char *filename, int line)
* Some helpers
*/
+void Node_add_child(Node *parent, Node *child)
+{
+ // TODO, use a vector
+ parent->block.children = realloc(parent->block.children, sizeof(Node *) * (parent->block.num_children + 1));
+ parent->block.children[parent->block.num_children] = child;
+ parent->block.num_children++;
+}
+
+Node *Node_new(NodeType type)
+{
+ Node *self = calloc(sizeof(Node), 1);
+ self->type = type;
+ return self;
+}
+
NodeType binary_token_to_op(TokenType type)
{
switch (type)
@@ -53,7 +68,7 @@ NodeType binary_token_to_op(TokenType type)
case TOKEN_LEQ: return OP_LEQ;
case TOKEN_GT: return OP_GT;
case TOKEN_GEQ: return OP_GEQ;
-
+
default: assert(false && "binary_token_to_op called with invalid token type");
}
}
@@ -85,6 +100,33 @@ void dump_block_stack()
}
}
+Node *builtin_print;
+Node *builtin_putc;
+
+void initialize_builtins()
+{
+ builtin_print = Node_new(AST_BUILTIN);
+ builtin_print->func.name = "print";
+ builtin_print->func.return_type = (Type){TYPE_INT,0};
+ builtin_print->func.num_args = 1;
+ builtin_print->func.args = (Variable *)calloc(sizeof(Variable), 1);
+ builtin_print->func.args[0] = (Variable){"val", (Type){TYPE_INT,0}, 0};
+
+ builtin_putc = Node_new(AST_BUILTIN);
+ builtin_putc->func.name = "putc";
+ builtin_putc->func.return_type = (Type){TYPE_INT,0};
+ builtin_putc->func.num_args = 1;
+ builtin_putc->func.args = (Variable *)calloc(sizeof(Variable), 2);
+ builtin_putc->func.args[0] = (Variable){"arg", (Type){TYPE_INT,0}, 0};
+}
+
+Node *find_builtin_function(Token *token)
+{
+ if (strcmp(token->value.as_string, "putc") == 0) { return builtin_putc; }
+ if (strcmp(token->value.as_string, "print") == 0) { return builtin_print; }
+ return NULL;
+}
+
Variable *find_local_variable(Token *token)
{
assert_token(*token, TOKEN_IDENTIFIER);
@@ -126,7 +168,7 @@ void add_variable_to_current_block(Variable *var)
int new_len = (cur_block->block.num_locals + 1);
int var_size = 8; // TODO: Compute sizes based on different types
-
+
// Add to the block
// FIXME: Use a map here
cur_block->block.locals = realloc(cur_block->block.locals, sizeof(Variable *) * new_len);
@@ -137,7 +179,7 @@ void add_variable_to_current_block(Variable *var)
// Update current stack offset (w.r.t function stack frame) and block size
cur_stack_offset += var_size;
block_stack[block_stack_count-1]->block.locals_size += var_size;
-
+
// Update function's max locals size
i64 max_offset = i64max(current_function->func.max_locals_size, cur_stack_offset);
current_function->func.max_locals_size = max_offset;
@@ -146,21 +188,6 @@ void add_variable_to_current_block(Variable *var)
assert(block_stack_count > 0);
}
-void Node_add_child(Node *parent, Node *child)
-{
- // TODO, use a vector
- parent->block.children = realloc(parent->block.children, sizeof(Node *) * (parent->block.num_children + 1));
- parent->block.children[parent->block.num_children] = child;
- parent->block.num_children++;
-}
-
-Node *Node_new(NodeType type)
-{
- Node *self = calloc(sizeof(Node), 1);
- self->type = type;
- return self;
-}
-
Type parse_type(Lexer *lexer)
{
Type type = {0};
@@ -197,13 +224,13 @@ Node *parse_var_declaration(Lexer *lexer)
// TODO: Reuse this for globals? Or maybe just make a new function?
if (!current_function || current_function->type != AST_FUNC)
die_location(token.loc, "Variable declaration outside of function");
-
+
token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
- // NOTE: We don't allow shadowing of variables in the any blocks,
+ // NOTE: We don't allow shadowing of variables in the any blocks,
// this is by design since it's a common mistake.
if (find_local_variable(&token) != NULL)
die_location(token.loc, "Variable `%s` already declared", token.value.as_string);
-
+
Node *node = Node_new(AST_VARDECL);
node->var_decl.var.name = token.value.as_string;
@@ -239,7 +266,7 @@ Node *parse_function_call_args(Lexer *lexer, Node *func)
int new_size = call->call.num_args + 1;
call->call.args = realloc(call->call.args, sizeof(Node *) * new_size);
call->call.args[call->call.num_args++] = arg;
-
+
if (new_size > func->func.num_args)
die_location(identifier.loc, "Too many arguments to function `%s`", func->func.name);
@@ -280,22 +307,27 @@ Node *parse_factor(Lexer *lexer)
assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
} else if (token.type == TOKEN_INTLIT) {
expr = parse_literal(lexer);
- } else if (token.type == TOKEN_IDENTIFIER) {
+ } else if (token.type == TOKEN_IDENTIFIER) {
// TODO: Check for global variables when added
- Variable *var = find_local_variable(&token);
+ Variable *var = find_local_variable(&token);
if (var != NULL) {
Lexer_next(lexer);
expr = Node_new(AST_VAR);
expr->variable = var;
return expr;
}
-
+
Node *func = find_function_definition(&token);
if (func != NULL) {
return parse_function_call_args(lexer, func);
}
+ Node *builtin = find_builtin_function(&token);
+ if (builtin != NULL) {
+ return parse_function_call_args(lexer, builtin);
+ }
+
die_location(token.loc, "Unknown identifier `%s`", token.value.as_string);
expr = NULL;
} else {
@@ -410,7 +442,7 @@ Node *parse_statement(Lexer *lexer)
Lexer_next(lexer);
node = Node_new(AST_FOR);
assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
-
+
// All of the expressions in the for loop are optional
// TODO: Allow this to be a declaration, need to inject
@@ -482,7 +514,7 @@ void parse_func_args(Lexer *lexer, Node *func)
token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
// TODO: Check for shadowing with globals
assert_token(Lexer_next(lexer), TOKEN_COLON);
- Type type = parse_type(lexer);
+ Type type = parse_type(lexer);
i64 new_count = func->func.num_args + 1;
func->func.args = realloc(func->func.args, sizeof(Variable) * new_count);
@@ -499,10 +531,10 @@ void parse_func_args(Lexer *lexer, Node *func)
assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
// Set the offsets for the arguments
-
+
// IMPORTANT: We want to skip the saved ret_addr+old_rbp that we
// pushed on the stack. Each of these is 8 bytes.
- int offset = -16;
+ int offset = -16;
for (int i = 0; i < func->func.num_args; i++) {
Variable *var = &func->func.args[i];
var->offset = offset;
@@ -546,6 +578,7 @@ Node *parse_func(Lexer *lexer)
Node *parse_program(Lexer *lexer)
{
+ initialize_builtins();
Node *program = Node_new(AST_PROGRAM);
Token token;
while ((token = Lexer_peek(lexer)).type != TOKEN_EOF) {