diff options
| author | Mustafa Quraish <[email protected]> | 2022-02-04 06:16:18 -0500 |
|---|---|---|
| committer | Mustafa Quraish <[email protected]> | 2022-02-05 08:56:15 -0500 |
| commit | 8fd0a145947011f113abaab245e35f1adfb6eb48 (patch) | |
| tree | 957bcf38c33d697c2fe5dd997bc294ae28adcaec | |
| parent | Update README.md (diff) | |
| download | cup-8fd0a145947011f113abaab245e35f1adfb6eb48.tar.xz cup-8fd0a145947011f113abaab245e35f1adfb6eb48.zip | |
Allow `builtins.c` to inject constants into program, use for syscalls
We can now directly expose the `syscallN()` APIs to the program and
define the `open()`, `write()` etc syscalls in the stdlib. This
simplifies the implementation a decent bunch :^)
| -rw-r--r-- | src/builtins.c | 105 | ||||
| -rw-r--r-- | src/generator.c | 6 | ||||
| -rw-r--r-- | src/parser.c | 41 | ||||
| -rw-r--r-- | src/parser.h | 1 | ||||
| -rw-r--r-- | src/utils.c | 9 | ||||
| -rw-r--r-- | src/utils.h | 2 | ||||
| -rw-r--r-- | std/common.cup | 130 |
7 files changed, 204 insertions, 90 deletions
diff --git a/src/builtins.c b/src/builtins.c index 45f203d..97ac9ee 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -2,8 +2,10 @@ #include "ast.h" #include "utils.h" #include "generator.h" +#include "parser.h" #include <string.h> #include <stdlib.h> +#include <fcntl.h> #include <assert.h> #include <stdarg.h> #include <sys/syscall.h> @@ -12,47 +14,32 @@ static Node *custom_builtins[MAX_CUSTOM_BUILTIN_COUNT]; static i64 custom_builtins_count = 0; -#define MAX_SYSCALL_BUILTIN_COUNT 256 -static Node *syscall_builtins[MAX_SYSCALL_BUILTIN_COUNT]; -static i64 syscall_builtins_count = 0; - static void push_builtin(Node *node) { assert(custom_builtins_count < MAX_CUSTOM_BUILTIN_COUNT); custom_builtins[custom_builtins_count++] = node; } -static void make_syscall(int num_args, i64 syscall_num, char *name, Type *return_type, ...) -{ - Node *node = Node_new(AST_BUILTIN); +static void push_syscall_builtin(char *name, int num_args) { + Node *node; + node = Node_new(AST_BUILTIN); node->func.name = name; - node->func.return_type = return_type; - node->func.num_args = num_args; - node->func.args = calloc(sizeof(Variable), num_args); - - // This is a hack to get around the fact that we can't pass a variable - // Luckily, we don't actually use this field for the builtins anyway. - node->func.max_locals_size = syscall_num; - - va_list ap; - va_start(ap, return_type); - for (i64 i = 0; i < num_args; i++) - { - Type *typ = va_arg(ap, Type *); - if (!typ) { - fprintf(stderr, "Error: Builtin %s has no type for argument %lld\n", name, i); - exit(1); - } - node->func.args[i] = (Variable){"arg", typ, 0}; + node->func.return_type = type_new(TYPE_INT); + node->func.num_args = num_args+1; + node->func.args = (Variable *)calloc(sizeof(Variable), num_args+1); + node->func.args[0] = (Variable){"syscall_num", type_new(TYPE_INT), 0}; + for (int i = 0; i < num_args; i++) { + node->func.args[i+1].type = type_new(TYPE_ANY); } - va_end(ap); - - assert(syscall_builtins_count < MAX_SYSCALL_BUILTIN_COUNT); - syscall_builtins[syscall_builtins_count++] = node; + push_builtin(node); } +void push_posix_constants(); + void initialize_builtins() { + push_posix_constants(); + Node *node; // FIXME: The `TYPE_ANY` is a hack node = Node_new(AST_BUILTIN); @@ -63,15 +50,14 @@ void initialize_builtins() node->func.args[0] = (Variable){"val", type_new(TYPE_ANY), 0}; push_builtin(node); - make_syscall(3, SYS_write, "write", type_new(TYPE_INT), - type_new(TYPE_INT), type_new_ptr(TYPE_CHAR), type_new(TYPE_INT) // Args - ); - make_syscall(3, SYS_read, "read", type_new(TYPE_INT), - type_new(TYPE_INT), type_new_ptr(TYPE_CHAR), type_new(TYPE_INT) // Args - ); - make_syscall(1, SYS_exit, "exit", type_new(TYPE_NONE), - type_new(TYPE_INT) // Args - ); + push_syscall_builtin("syscall0", 0); + push_syscall_builtin("syscall1", 1); + push_syscall_builtin("syscall2", 2); + push_syscall_builtin("syscall3", 3); + push_syscall_builtin("syscall4", 4); + push_syscall_builtin("syscall5", 5); + push_syscall_builtin("syscall6", 6); + push_syscall_builtin("syscall7", 7); } Node *find_builtin_function(Token *token) @@ -80,25 +66,20 @@ Node *find_builtin_function(Token *token) if (strcmp(custom_builtins[i]->func.name, token->value.as_string) == 0) return custom_builtins[i]; } - for (i64 i = 0; i < syscall_builtins_count; i++) { - if (strcmp(syscall_builtins[i]->func.name, token->value.as_string) == 0) - return syscall_builtins[i]; - } return NULL; } char *x86_64_syscall_regs[10] = { - "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13" + "rax", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12" }; static void generate_syscall_builtins(FILE *out) { - for (i64 i = 0; i < syscall_builtins_count; i++) { - Node *node = syscall_builtins[i]; - fprintf(out, "func_%s:\n", node->func.name); - for (i64 i = 0; i < node->func.num_args; i++) + for (i64 sysc_args = 0; sysc_args < 7; sysc_args++) { + fprintf(out, "func_syscall%lld:\n", sysc_args); + for (i64 i = 0; i < sysc_args+1; i++) fprintf(out, " mov %s, [rsp+%lld]\n", x86_64_syscall_regs[i], (i+1) * 8); - generate_syscall(node->func.max_locals_size, out); + fprintf(out, " syscall\n"); fprintf(out, " ret\n"); } } @@ -149,4 +130,32 @@ void generate_builtins(FILE *out) { generate_custom_builtins(out); generate_syscall_builtins(out); +} + + +#define PUSH_SYS_(name) push_constant("SYS_" #name, get_syscall_num(SYS_##name)) +#define PUSH(name) push_constant(#name, name) + +void push_posix_constants() +{ + PUSH_SYS_(read); + PUSH_SYS_(write); + PUSH_SYS_(exit); + PUSH_SYS_(open); + PUSH_SYS_(openat); + PUSH_SYS_(close); + PUSH_SYS_(fork); + PUSH_SYS_(wait4); + + PUSH(SEEK_SET); + PUSH(SEEK_CUR); + PUSH(SEEK_END); + + PUSH(O_RDONLY); + PUSH(O_WRONLY); + PUSH(O_RDWR); + PUSH(O_CREAT); + PUSH(O_EXCL); + PUSH(O_TRUNC); + PUSH(AT_FDCWD); }
\ No newline at end of file diff --git a/src/generator.c b/src/generator.c index 2fb89b7..5da0748 100644 --- a/src/generator.c +++ b/src/generator.c @@ -3,6 +3,7 @@ */ #include "generator.h" +#include "utils.h" #include <stdlib.h> #include <string.h> #include <assert.h> @@ -17,10 +18,7 @@ static Node *defer_stack[DEFER_STACK_SIZE]; static i64 defer_stack_count = 0; void generate_syscall(i64 syscall_no, FILE *out) { -#if __APPLE__ - syscall_no += 0x2000000; -#endif - fprintf(out, " mov rax, %lld\n", syscall_no); + fprintf(out, " mov rax, %lld\n", get_syscall_num(syscall_no)); fprintf(out, " syscall\n"); } diff --git a/src/parser.c b/src/parser.c index 235a240..267947c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -171,8 +171,11 @@ bool identifier_exists(Token *token) { return false; } -void push_constant(Node *node) { +void push_constant(char *name, i64 value) { assert(constants_count < TOTAL_CONSTANTS_SIZE); + Node *node = Node_new(AST_CONSTANT); + node->constant.name = name; + node->constant.int_literal = Node_from_int_literal(value); all_constants[constants_count++] = node; } @@ -246,8 +249,20 @@ Type *parse_type(Lexer *lexer) Lexer_next(lexer); Type *arr = type_new(TYPE_ARRAY); arr->ptr = type; - // TODO: Contant integer expression support? - arr->array_size = assert_token(Lexer_next(lexer), TOKEN_INTLIT).value.as_int; + + token = Lexer_next(lexer); + if (token.type == TOKEN_INTLIT) { + arr->array_size = token.value.as_int; + } else if (token.type == TOKEN_IDENTIFIER) { + Node *constant = find_constant(&token); + if (!constant) + die_location(token.loc, "Could not find constant `%s`", token.value.as_string); + arr->array_size = constant->constant.int_literal->literal.as_int; + } else { + die_location(token.loc, "Expected a constant expression for array size"); + } + + assert_token(Lexer_peek(lexer), TOKEN_CLOSE_BRACKET); Lexer_next(lexer); type = arr; @@ -305,7 +320,7 @@ i64 eval_constexp(Node *expr) } -Node *parse_constant_declaration(Lexer *lexer) +void parse_constant_declaration(Lexer *lexer) { Token token = assert_token(Lexer_next(lexer), TOKEN_CONST); @@ -326,14 +341,9 @@ Node *parse_constant_declaration(Lexer *lexer) assert_token(token, TOKEN_ASSIGN); Node *expr = parse_expression(lexer); i64 value = eval_constexp(expr); - - Node *node = Node_new(AST_CONSTANT); - node->constant.name = constant_name; - node->constant.int_literal = Node_from_int_literal(value); - push_constant(node); + push_constant(constant_name, value); assert_token(Lexer_next(lexer), TOKEN_SEMICOLON); - return node; } Node *parse_var_declaration(Lexer *lexer) @@ -956,13 +966,13 @@ Type *parse_struct_union_declaration(Lexer *lexer, bool is_global) { if (!is_global) defined_structs_count = prev_struct_count; - // printf("Defined %s: %s, size: %lld\n", - // struct_type->type == TYPE_UNION ? "union":"struct", + // printf("Defined %s: %s, size: %lld\n", + // struct_type->type == TYPE_UNION ? "union":"struct", // struct_type->struct_name, // struct_type->fields.size // ); // for (int i = 0; i < struct_type->fields.num_fields; i++) { - // printf("\t%s: %s (offset: %lld, size: %lld)\n", + // printf("\t%s: %s (offset: %lld, size: %lld)\n", // struct_type->fields.name[i], // type_to_str(struct_type->fields.type[i]), // struct_type->fields.offset[i], @@ -990,10 +1000,7 @@ void parse_enum_declaration(Lexer *lexer) if (identifier_exists(&token)) die_location(token.loc, "Identifier already exists, enums just behave like numbered constants."); - Node *node = Node_new(AST_CONSTANT); - node->constant.name = token.value.as_string; - node->constant.int_literal = Node_from_int_literal(enum_count++); - push_constant(node); + push_constant(token.value.as_string, enum_count++); token = Lexer_peek(lexer); if (token.type == TOKEN_COMMA) { diff --git a/src/parser.h b/src/parser.h index 7f7dacb..dd72686 100644 --- a/src/parser.h +++ b/src/parser.h @@ -3,5 +3,6 @@ #include "ast.h" #include "lexer.h" +void push_constant(char *name, i64 value); Node *parse_program(Lexer *lexer); void print_ast(Node *node);
\ No newline at end of file diff --git a/src/utils.c b/src/utils.c index 939021a..050c654 100644 --- a/src/utils.c +++ b/src/utils.c @@ -29,6 +29,15 @@ void _die_location(char *file, int line, Location loc, const char *fmt, ...) i64 i64max(i64 a, i64 b) { return a > b ? a : b; } i64 i64min(i64 a, i64 b) { return a < b ? a : b; } + +i64 get_syscall_num(i64 orig_syscall) { +#if __APPLE__ + return orig_syscall + 0x2000000; +#else + return orig_syscall; +#endif +} + i64 align_up(i64 val, i64 align) { return (val + align - 1) & ~(align - 1); diff --git a/src/utils.h b/src/utils.h index 7718c1c..fbd3aac 100644 --- a/src/utils.h +++ b/src/utils.h @@ -11,5 +11,7 @@ i64 i64min(i64 a, i64 b); // Assumes alignment is a power of 2 i64 align_up(i64 val, i64 align); +i64 get_syscall_num(i64 orig_syscall); + #define die_location(loc, ...) _die_location(__FILE__, __LINE__, loc, __VA_ARGS__)
\ No newline at end of file diff --git a/std/common.cup b/std/common.cup index 004eaa9..24e24b7 100644 --- a/std/common.cup +++ b/std/common.cup @@ -1,26 +1,40 @@ -fn min(a: int, b: int): int { - return a < b ? a : b; +const true = 1; +const false = 0; + +/////////////////////////////////////////////////////////////////////////////// +// Syscalls + +fn write(fd: int, s: char*, n: int): int { + return syscall3(SYS_write, fd, s, n); } -fn max(a: int, b: int): int { - return a > b ? a : b; +fn exit(status: int): int { + return syscall1(SYS_exit, status); } -fn sign(a: int): int { - return a > 0 ? 1 : a == 0 ? 0 : -1; +fn read(fd: int, s: char*, n: int): int { + return syscall3(SYS_read, fd, s, n); } -fn abs(a: int): int { - return a * sign(a); +fn open(path: char*, flags: int, mode: int): int { + return syscall3(SYS_open, path, flags, mode); } -fn factorial(n: int): int { - let res: int = 1; - for (;n > 0; n = n - 1) - res = res * n; - return res; +fn openat(fd: int, path: char*, flags: int, mode: int): int { + return syscall4(SYS_openat, fd, path, flags, mode); } +fn fork(): int { + return syscall0(SYS_fork); +} + +fn wait(status: int*): int { + return syscall4(SYS_wait4, -1, status, 0, 0); +} + + +/////////////////////////////////////////////////////////////////////////////// +// Strings fn strlen(s: char *): int { let count: int = 0; @@ -62,6 +76,47 @@ fn streq(s1: char *, s2: char *): int { return *s1 == *s2; } +fn strrev(s: char *) { + let len: int = strlen(s); + let i: int = 0; + let j: int = len - 1; + while (i < j) { + let tmp: char = s[i]; + s[i] = s[j]; + s[j] = tmp; + i = i + 1; + j = j - 1; + } +} + +fn atoi_end(s: char *, end: char**): int { + let i: int = 0; + let sign: int = 1; + if (*s == '-') { + sign = -1; + s = s + 1; + } + while (*s >= '0' && *s <= '9') { + i = i * 10 + (*s - '0'); + s = s + 1; + } + *end = s; + return i * sign; +} + +fn atoi(s: char *): int { + let tmp: char*; + return atoi_end(s, &tmp); +} + + +/////////////////////////////////////////////////////////////////////////////// +// I/O + +const stdin = 0; +const stdout = 1; +const stderr = 2; + fn putc(c: char) { write(0, &c, 1); } @@ -76,15 +131,48 @@ fn putsln(c: char *) { putc('\n'); } -fn strrev(s: char *) { - let len: int = strlen(s); +fn putu(n: int) { + let buf: char[32]; let i: int = 0; - let j: int = len - 1; - while (i < j) { - let tmp: char = s[i]; - s[i] = s[j]; - s[j] = tmp; + while (n > 0) { + buf[i] = (n % 10) + '0'; + n = n / 10; i = i + 1; - j = j - 1; } + if (i == 0) + buf[i] = '0'; + else + strrev(buf); + write(0, buf, i); +} + +fn die(msg: char *) { + putsln(msg); + exit(1); +} + +/////////////////////////////////////////////////////////////////////////////// +// Math + +fn min(a: int, b: int): int { + return a < b ? a : b; +} + +fn max(a: int, b: int): int { + return a > b ? a : b; +} + +fn sign(a: int): int { + return a > 0 ? 1 : a == 0 ? 0 : -1; +} + +fn abs(a: int): int { + return a * sign(a); +} + +fn factorial(n: int): int { + let res: int = 1; + for (;n > 0; n = n - 1) + res = res * n; + return res; } |