From 1a165b3fcdabf7462a8f86eab5a6274f92ce9653 Mon Sep 17 00:00:00 2001 From: Mustafa Quraish Date: Thu, 3 Feb 2022 04:38:35 -0500 Subject: Add helper to create builtins for syscalls + implement `read()` This was possible, but very tedious to do by hand before. Now we automate it based on the number of arguments. Note that currently we can't just add `syscall3()` etc as builtins because the actual numbers for the system calls vary from one system to another, and we want to maintain support for macOS and Linux (at least for now). --- src/builtins.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++------- src/builtins.h | 5 ++- src/generator.c | 58 +----------------------- src/generator.h | 1 + src/types.c | 1 + 5 files changed, 125 insertions(+), 74 deletions(-) (limited to 'src') diff --git a/src/builtins.c b/src/builtins.c index 3922d54..2883c1d 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1,19 +1,54 @@ #include "builtins.h" #include "ast.h" #include "utils.h" +#include "generator.h" #include #include #include +#include +#include +#define MAX_CUSTOM_BUILTIN_COUNT 128 +static Node *custom_builtins[MAX_CUSTOM_BUILTIN_COUNT]; +static i64 custom_builtins_count = 0; -#define MAX_BUILTINS_COUNT 128 -static Node *all_builtins[MAX_BUILTINS_COUNT]; -static i64 builtins_count = 0; +#define MAX_SYSCALL_BUILTIN_COUNT 256 +static Node *syscall_builtins[MAX_SYSCALL_BUILTIN_COUNT]; +static i64 syscall_builtins_count = 0; static void push_builtin(Node *node) { - assert(builtins_count < MAX_BUILTINS_COUNT); - all_builtins[builtins_count++] = node; + assert(custom_builtins_count < MAX_CUSTOM_BUILTIN_COUNT); + custom_builtins[custom_builtins_count++] = node; +} + +static void make_syscall(int num_args, i64 syscall_num, char *name, Type *return_type, ...) +{ + Node *node = Node_new(AST_BUILTIN); + node->func.name = name; + node->func.return_type = return_type; + node->func.num_args = num_args; + node->func.args = calloc(sizeof(Variable), num_args); + + // This is a hack to get around the fact that we can't pass a variable + // Luckily, we don't actually use this field for the builtins anyway. + node->func.max_locals_size = syscall_num; + + va_list ap; + va_start(ap, return_type); + for (i64 i = 0; i < num_args; i++) + { + Type *typ = va_arg(ap, Type *); + if (!typ) { + fprintf(stderr, "Error: Builtin %s has no type for argument %lld\n", name, i); + exit(1); + } + node->func.args[i] = (Variable){"arg", typ, 0}; + } + va_end(ap); + + assert(syscall_builtins_count < MAX_SYSCALL_BUILTIN_COUNT); + syscall_builtins[syscall_builtins_count++] = node; } void initialize_builtins() @@ -28,22 +63,87 @@ void initialize_builtins() node->func.args[0] = (Variable){"val", type_new(TYPE_ANY), 0}; push_builtin(node); - node = Node_new(AST_BUILTIN); - node->func.name = "write"; - node->func.return_type = type_new(TYPE_INT); - node->func.num_args = 3; - node->func.args = (Variable *)calloc(sizeof(Variable), 3); - node->func.args[0] = (Variable){"fd", type_new(TYPE_INT), 0}; - node->func.args[1] = (Variable){"buf", type_new_ptr(TYPE_CHAR), 0}; - node->func.args[2] = (Variable){"size", type_new(TYPE_INT), 0}; - push_builtin(node); + make_syscall(3, SYS_write, "write", type_new(TYPE_INT), + type_new(TYPE_INT), type_new_ptr(TYPE_CHAR), type_new(TYPE_INT) // Args + ); + make_syscall(3, SYS_read, "read", type_new(TYPE_INT), + type_new(TYPE_INT), type_new_ptr(TYPE_CHAR), type_new(TYPE_INT) // Args + ); } Node *find_builtin_function(Token *token) { - for (i64 i = 0; i < builtins_count; i++) { - if (strcmp(all_builtins[i]->func.name, token->value.as_string) == 0) - return all_builtins[i]; + for (i64 i = 0; i < custom_builtins_count; i++) { + if (strcmp(custom_builtins[i]->func.name, token->value.as_string) == 0) + return custom_builtins[i]; + } + for (i64 i = 0; i < syscall_builtins_count; i++) { + if (strcmp(syscall_builtins[i]->func.name, token->value.as_string) == 0) + return syscall_builtins[i]; } return NULL; +} + +char *x86_64_syscall_regs[10] = { + "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13" +}; + +static void generate_syscall_builtins(FILE *out) +{ + for (i64 i = 0; i < syscall_builtins_count; i++) { + Node *node = syscall_builtins[i]; + fprintf(out, "func_%s:\n", node->func.name); + for (i64 i = 0; i < node->func.num_args; i++) + fprintf(out, " mov %s, [rsp+%lld]\n", x86_64_syscall_regs[i], (i+1) * 8); + generate_syscall(node->func.max_locals_size, out); + fprintf(out, " ret\n"); + } +} + +static void generate_custom_builtins(FILE *out) +{ + // Stolen shamelessly from tsoding/porth: + // https://gitlab.com/tsoding/porth + fprintf(out, + "func_print:\n" + " mov rdi, [rsp+8]\n" + " mov r9, -3689348814741910323\n" + " sub rsp, 40\n" + " mov BYTE [rsp+31], 10\n" + " lea rcx, [rsp+30]\n" + " mov qword rbx, 0\n" + ".L2:\n" + " mov rax, rdi\n" + " lea r8, [rsp+32]\n" + " mul r9\n" + " mov rax, rdi\n" + " sub r8, rcx\n" + " shr rdx, 3\n" + " lea rsi, [rdx+rdx*4]\n" + " add rsi, rsi\n" + " sub rax, rsi\n" + " add eax, 48\n" + " mov BYTE [rcx], al\n" + " mov rax, rdi\n" + " mov rdi, rdx\n" + " mov rdx, rcx\n" + " sub rcx, 1\n" + " cmp rax, 9\n" + " ja .L2\n" + " lea rax, [rsp+32]\n" + " mov edi, 1\n" + " sub rdx, rax\n" + " xor eax, eax\n" + " lea rsi, [rsp+32+rdx]\n" + " mov rdx, r8\n" + ); + generate_syscall(SYS_write, out); + fprintf(out, " add rsp, 40\n"); + fprintf(out, " ret\n"); +} + +void generate_builtins(FILE *out) +{ + generate_custom_builtins(out); + generate_syscall_builtins(out); } \ No newline at end of file diff --git a/src/builtins.h b/src/builtins.h index 38c1cb4..b81d8d4 100644 --- a/src/builtins.h +++ b/src/builtins.h @@ -3,4 +3,7 @@ #include "ast.h" void initialize_builtins(); -Node *find_builtin_function(Token *token); \ No newline at end of file +Node *find_builtin_function(Token *token); + +// Codegen +void generate_builtins(FILE *out); diff --git a/src/generator.c b/src/generator.c index aace3bc..829bf73 100644 --- a/src/generator.c +++ b/src/generator.c @@ -16,7 +16,7 @@ static Node *current_function = NULL; static Node *defer_stack[DEFER_STACK_SIZE]; static i64 defer_stack_count = 0; -void make_syscall(i64 syscall_no, FILE *out) { +void generate_syscall(i64 syscall_no, FILE *out) { #if __APPLE__ syscall_no += 0x2000000; #endif @@ -457,7 +457,7 @@ void generate_asm(Node *root, FILE *out) fprintf(out, " call func_main\n"); fprintf(out, " mov rdi, rax\n"); - make_syscall(SYS_exit, out); + generate_syscall(SYS_exit, out); // TODO: Don't generate code for functions that cannot get called. // TODO: Add implementations of some primitives? @@ -471,58 +471,4 @@ void generate_asm(Node *root, FILE *out) for (i64 i = 0; i < num_string_literals; i++) { fprintf(out, " global_string_%lld: db `%s`, 0\n", i, all_string_literals[i]); } -} - -void generate_builtins(FILE *out) -{ - // Stolen shamelessly from tsoding/porth: - // https://gitlab.com/tsoding/porth - fprintf(out, - "func_print:\n" - " mov rdi, [rsp+8]\n" - " mov r9, -3689348814741910323\n" - " sub rsp, 40\n" - " mov BYTE [rsp+31], 10\n" - " lea rcx, [rsp+30]\n" - " mov qword rbx, 0\n" - ".L2:\n" - " mov rax, rdi\n" - " lea r8, [rsp+32]\n" - " mul r9\n" - " mov rax, rdi\n" - " sub r8, rcx\n" - " shr rdx, 3\n" - " lea rsi, [rdx+rdx*4]\n" - " add rsi, rsi\n" - " sub rax, rsi\n" - " add eax, 48\n" - " mov BYTE [rcx], al\n" - " mov rax, rdi\n" - " mov rdi, rdx\n" - " mov rdx, rcx\n" - " sub rcx, 1\n" - " cmp rax, 9\n" - " ja .L2\n" - " lea rax, [rsp+32]\n" - " mov edi, 1\n" - " sub rdx, rax\n" - " xor eax, eax\n" - " lea rsi, [rsp+32+rdx]\n" - " mov rdx, r8\n" - ); - make_syscall(SYS_write, out); - fprintf(out, " add rsp, 40\n"); - fprintf(out, " ret\n"); - - ///////////////////////////////////////////////////////////////// - - // Write syscall - fprintf(out, - "func_write:\n" - " mov rdi, [rsp+8]\n" // stdout - " mov rsi, [rsp+16]\n" - " mov rdx, [rsp+24]\n" // 1 byte - ); - make_syscall(SYS_write, out); - fprintf(out, " ret\n"); } \ No newline at end of file diff --git a/src/generator.h b/src/generator.h index 602cf3a..a03b9d4 100644 --- a/src/generator.h +++ b/src/generator.h @@ -3,4 +3,5 @@ #include "ast.h" #include +void generate_syscall(i64 syscall_no, FILE *out); void generate_asm(Node *root, FILE *out); \ No newline at end of file diff --git a/src/types.c b/src/types.c index 7a055fe..6c7c0e5 100644 --- a/src/types.c +++ b/src/types.c @@ -95,6 +95,7 @@ static char *data_type_to_str(DataType type) case TYPE_PTR: return "*"; case TYPE_ARRAY: return "array"; case TYPE_CHAR: return "char"; + case TYPE_ANY: return "<@>"; default: assert(false && "Unreachable"); } } -- cgit v1.2.3