aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMustafa Quraish <[email protected]>2022-02-04 06:16:18 -0500
committerMustafa Quraish <[email protected]>2022-02-05 08:56:15 -0500
commit8fd0a145947011f113abaab245e35f1adfb6eb48 (patch)
tree957bcf38c33d697c2fe5dd997bc294ae28adcaec
parentUpdate README.md (diff)
downloadcup-8fd0a145947011f113abaab245e35f1adfb6eb48.tar.xz
cup-8fd0a145947011f113abaab245e35f1adfb6eb48.zip
Allow `builtins.c` to inject constants into program, use for syscalls
We can now directly expose the `syscallN()` APIs to the program and define the `open()`, `write()` etc syscalls in the stdlib. This simplifies the implementation a decent bunch :^)
-rw-r--r--src/builtins.c105
-rw-r--r--src/generator.c6
-rw-r--r--src/parser.c41
-rw-r--r--src/parser.h1
-rw-r--r--src/utils.c9
-rw-r--r--src/utils.h2
-rw-r--r--std/common.cup130
7 files changed, 204 insertions, 90 deletions
diff --git a/src/builtins.c b/src/builtins.c
index 45f203d..97ac9ee 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -2,8 +2,10 @@
#include "ast.h"
#include "utils.h"
#include "generator.h"
+#include "parser.h"
#include <string.h>
#include <stdlib.h>
+#include <fcntl.h>
#include <assert.h>
#include <stdarg.h>
#include <sys/syscall.h>
@@ -12,47 +14,32 @@
static Node *custom_builtins[MAX_CUSTOM_BUILTIN_COUNT];
static i64 custom_builtins_count = 0;
-#define MAX_SYSCALL_BUILTIN_COUNT 256
-static Node *syscall_builtins[MAX_SYSCALL_BUILTIN_COUNT];
-static i64 syscall_builtins_count = 0;
-
static void push_builtin(Node *node)
{
assert(custom_builtins_count < MAX_CUSTOM_BUILTIN_COUNT);
custom_builtins[custom_builtins_count++] = node;
}
-static void make_syscall(int num_args, i64 syscall_num, char *name, Type *return_type, ...)
-{
- Node *node = Node_new(AST_BUILTIN);
+static void push_syscall_builtin(char *name, int num_args) {
+ Node *node;
+ node = Node_new(AST_BUILTIN);
node->func.name = name;
- node->func.return_type = return_type;
- node->func.num_args = num_args;
- node->func.args = calloc(sizeof(Variable), num_args);
-
- // This is a hack to get around the fact that we can't pass a variable
- // Luckily, we don't actually use this field for the builtins anyway.
- node->func.max_locals_size = syscall_num;
-
- va_list ap;
- va_start(ap, return_type);
- for (i64 i = 0; i < num_args; i++)
- {
- Type *typ = va_arg(ap, Type *);
- if (!typ) {
- fprintf(stderr, "Error: Builtin %s has no type for argument %lld\n", name, i);
- exit(1);
- }
- node->func.args[i] = (Variable){"arg", typ, 0};
+ node->func.return_type = type_new(TYPE_INT);
+ node->func.num_args = num_args+1;
+ node->func.args = (Variable *)calloc(sizeof(Variable), num_args+1);
+ node->func.args[0] = (Variable){"syscall_num", type_new(TYPE_INT), 0};
+ for (int i = 0; i < num_args; i++) {
+ node->func.args[i+1].type = type_new(TYPE_ANY);
}
- va_end(ap);
-
- assert(syscall_builtins_count < MAX_SYSCALL_BUILTIN_COUNT);
- syscall_builtins[syscall_builtins_count++] = node;
+ push_builtin(node);
}
+void push_posix_constants();
+
void initialize_builtins()
{
+ push_posix_constants();
+
Node *node;
// FIXME: The `TYPE_ANY` is a hack
node = Node_new(AST_BUILTIN);
@@ -63,15 +50,14 @@ void initialize_builtins()
node->func.args[0] = (Variable){"val", type_new(TYPE_ANY), 0};
push_builtin(node);
- make_syscall(3, SYS_write, "write", type_new(TYPE_INT),
- type_new(TYPE_INT), type_new_ptr(TYPE_CHAR), type_new(TYPE_INT) // Args
- );
- make_syscall(3, SYS_read, "read", type_new(TYPE_INT),
- type_new(TYPE_INT), type_new_ptr(TYPE_CHAR), type_new(TYPE_INT) // Args
- );
- make_syscall(1, SYS_exit, "exit", type_new(TYPE_NONE),
- type_new(TYPE_INT) // Args
- );
+ push_syscall_builtin("syscall0", 0);
+ push_syscall_builtin("syscall1", 1);
+ push_syscall_builtin("syscall2", 2);
+ push_syscall_builtin("syscall3", 3);
+ push_syscall_builtin("syscall4", 4);
+ push_syscall_builtin("syscall5", 5);
+ push_syscall_builtin("syscall6", 6);
+ push_syscall_builtin("syscall7", 7);
}
Node *find_builtin_function(Token *token)
@@ -80,25 +66,20 @@ Node *find_builtin_function(Token *token)
if (strcmp(custom_builtins[i]->func.name, token->value.as_string) == 0)
return custom_builtins[i];
}
- for (i64 i = 0; i < syscall_builtins_count; i++) {
- if (strcmp(syscall_builtins[i]->func.name, token->value.as_string) == 0)
- return syscall_builtins[i];
- }
return NULL;
}
char *x86_64_syscall_regs[10] = {
- "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13"
+ "rax", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
};
static void generate_syscall_builtins(FILE *out)
{
- for (i64 i = 0; i < syscall_builtins_count; i++) {
- Node *node = syscall_builtins[i];
- fprintf(out, "func_%s:\n", node->func.name);
- for (i64 i = 0; i < node->func.num_args; i++)
+ for (i64 sysc_args = 0; sysc_args < 7; sysc_args++) {
+ fprintf(out, "func_syscall%lld:\n", sysc_args);
+ for (i64 i = 0; i < sysc_args+1; i++)
fprintf(out, " mov %s, [rsp+%lld]\n", x86_64_syscall_regs[i], (i+1) * 8);
- generate_syscall(node->func.max_locals_size, out);
+ fprintf(out, " syscall\n");
fprintf(out, " ret\n");
}
}
@@ -149,4 +130,32 @@ void generate_builtins(FILE *out)
{
generate_custom_builtins(out);
generate_syscall_builtins(out);
+}
+
+
+#define PUSH_SYS_(name) push_constant("SYS_" #name, get_syscall_num(SYS_##name))
+#define PUSH(name) push_constant(#name, name)
+
+void push_posix_constants()
+{
+ PUSH_SYS_(read);
+ PUSH_SYS_(write);
+ PUSH_SYS_(exit);
+ PUSH_SYS_(open);
+ PUSH_SYS_(openat);
+ PUSH_SYS_(close);
+ PUSH_SYS_(fork);
+ PUSH_SYS_(wait4);
+
+ PUSH(SEEK_SET);
+ PUSH(SEEK_CUR);
+ PUSH(SEEK_END);
+
+ PUSH(O_RDONLY);
+ PUSH(O_WRONLY);
+ PUSH(O_RDWR);
+ PUSH(O_CREAT);
+ PUSH(O_EXCL);
+ PUSH(O_TRUNC);
+ PUSH(AT_FDCWD);
} \ No newline at end of file
diff --git a/src/generator.c b/src/generator.c
index 2fb89b7..5da0748 100644
--- a/src/generator.c
+++ b/src/generator.c
@@ -3,6 +3,7 @@
*/
#include "generator.h"
+#include "utils.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
@@ -17,10 +18,7 @@ static Node *defer_stack[DEFER_STACK_SIZE];
static i64 defer_stack_count = 0;
void generate_syscall(i64 syscall_no, FILE *out) {
-#if __APPLE__
- syscall_no += 0x2000000;
-#endif
- fprintf(out, " mov rax, %lld\n", syscall_no);
+ fprintf(out, " mov rax, %lld\n", get_syscall_num(syscall_no));
fprintf(out, " syscall\n");
}
diff --git a/src/parser.c b/src/parser.c
index 235a240..267947c 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -171,8 +171,11 @@ bool identifier_exists(Token *token) {
return false;
}
-void push_constant(Node *node) {
+void push_constant(char *name, i64 value) {
assert(constants_count < TOTAL_CONSTANTS_SIZE);
+ Node *node = Node_new(AST_CONSTANT);
+ node->constant.name = name;
+ node->constant.int_literal = Node_from_int_literal(value);
all_constants[constants_count++] = node;
}
@@ -246,8 +249,20 @@ Type *parse_type(Lexer *lexer)
Lexer_next(lexer);
Type *arr = type_new(TYPE_ARRAY);
arr->ptr = type;
- // TODO: Contant integer expression support?
- arr->array_size = assert_token(Lexer_next(lexer), TOKEN_INTLIT).value.as_int;
+
+ token = Lexer_next(lexer);
+ if (token.type == TOKEN_INTLIT) {
+ arr->array_size = token.value.as_int;
+ } else if (token.type == TOKEN_IDENTIFIER) {
+ Node *constant = find_constant(&token);
+ if (!constant)
+ die_location(token.loc, "Could not find constant `%s`", token.value.as_string);
+ arr->array_size = constant->constant.int_literal->literal.as_int;
+ } else {
+ die_location(token.loc, "Expected a constant expression for array size");
+ }
+
+
assert_token(Lexer_peek(lexer), TOKEN_CLOSE_BRACKET);
Lexer_next(lexer);
type = arr;
@@ -305,7 +320,7 @@ i64 eval_constexp(Node *expr)
}
-Node *parse_constant_declaration(Lexer *lexer)
+void parse_constant_declaration(Lexer *lexer)
{
Token token = assert_token(Lexer_next(lexer), TOKEN_CONST);
@@ -326,14 +341,9 @@ Node *parse_constant_declaration(Lexer *lexer)
assert_token(token, TOKEN_ASSIGN);
Node *expr = parse_expression(lexer);
i64 value = eval_constexp(expr);
-
- Node *node = Node_new(AST_CONSTANT);
- node->constant.name = constant_name;
- node->constant.int_literal = Node_from_int_literal(value);
- push_constant(node);
+ push_constant(constant_name, value);
assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
- return node;
}
Node *parse_var_declaration(Lexer *lexer)
@@ -956,13 +966,13 @@ Type *parse_struct_union_declaration(Lexer *lexer, bool is_global) {
if (!is_global)
defined_structs_count = prev_struct_count;
- // printf("Defined %s: %s, size: %lld\n",
- // struct_type->type == TYPE_UNION ? "union":"struct",
+ // printf("Defined %s: %s, size: %lld\n",
+ // struct_type->type == TYPE_UNION ? "union":"struct",
// struct_type->struct_name,
// struct_type->fields.size
// );
// for (int i = 0; i < struct_type->fields.num_fields; i++) {
- // printf("\t%s: %s (offset: %lld, size: %lld)\n",
+ // printf("\t%s: %s (offset: %lld, size: %lld)\n",
// struct_type->fields.name[i],
// type_to_str(struct_type->fields.type[i]),
// struct_type->fields.offset[i],
@@ -990,10 +1000,7 @@ void parse_enum_declaration(Lexer *lexer)
if (identifier_exists(&token))
die_location(token.loc, "Identifier already exists, enums just behave like numbered constants.");
- Node *node = Node_new(AST_CONSTANT);
- node->constant.name = token.value.as_string;
- node->constant.int_literal = Node_from_int_literal(enum_count++);
- push_constant(node);
+ push_constant(token.value.as_string, enum_count++);
token = Lexer_peek(lexer);
if (token.type == TOKEN_COMMA) {
diff --git a/src/parser.h b/src/parser.h
index 7f7dacb..dd72686 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -3,5 +3,6 @@
#include "ast.h"
#include "lexer.h"
+void push_constant(char *name, i64 value);
Node *parse_program(Lexer *lexer);
void print_ast(Node *node); \ No newline at end of file
diff --git a/src/utils.c b/src/utils.c
index 939021a..050c654 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -29,6 +29,15 @@ void _die_location(char *file, int line, Location loc, const char *fmt, ...)
i64 i64max(i64 a, i64 b) { return a > b ? a : b; }
i64 i64min(i64 a, i64 b) { return a < b ? a : b; }
+
+i64 get_syscall_num(i64 orig_syscall) {
+#if __APPLE__
+ return orig_syscall + 0x2000000;
+#else
+ return orig_syscall;
+#endif
+}
+
i64 align_up(i64 val, i64 align)
{
return (val + align - 1) & ~(align - 1);
diff --git a/src/utils.h b/src/utils.h
index 7718c1c..fbd3aac 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -11,5 +11,7 @@ i64 i64min(i64 a, i64 b);
// Assumes alignment is a power of 2
i64 align_up(i64 val, i64 align);
+i64 get_syscall_num(i64 orig_syscall);
+
#define die_location(loc, ...) _die_location(__FILE__, __LINE__, loc, __VA_ARGS__) \ No newline at end of file
diff --git a/std/common.cup b/std/common.cup
index 004eaa9..24e24b7 100644
--- a/std/common.cup
+++ b/std/common.cup
@@ -1,26 +1,40 @@
-fn min(a: int, b: int): int {
- return a < b ? a : b;
+const true = 1;
+const false = 0;
+
+///////////////////////////////////////////////////////////////////////////////
+// Syscalls
+
+fn write(fd: int, s: char*, n: int): int {
+ return syscall3(SYS_write, fd, s, n);
}
-fn max(a: int, b: int): int {
- return a > b ? a : b;
+fn exit(status: int): int {
+ return syscall1(SYS_exit, status);
}
-fn sign(a: int): int {
- return a > 0 ? 1 : a == 0 ? 0 : -1;
+fn read(fd: int, s: char*, n: int): int {
+ return syscall3(SYS_read, fd, s, n);
}
-fn abs(a: int): int {
- return a * sign(a);
+fn open(path: char*, flags: int, mode: int): int {
+ return syscall3(SYS_open, path, flags, mode);
}
-fn factorial(n: int): int {
- let res: int = 1;
- for (;n > 0; n = n - 1)
- res = res * n;
- return res;
+fn openat(fd: int, path: char*, flags: int, mode: int): int {
+ return syscall4(SYS_openat, fd, path, flags, mode);
}
+fn fork(): int {
+ return syscall0(SYS_fork);
+}
+
+fn wait(status: int*): int {
+ return syscall4(SYS_wait4, -1, status, 0, 0);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Strings
fn strlen(s: char *): int {
let count: int = 0;
@@ -62,6 +76,47 @@ fn streq(s1: char *, s2: char *): int {
return *s1 == *s2;
}
+fn strrev(s: char *) {
+ let len: int = strlen(s);
+ let i: int = 0;
+ let j: int = len - 1;
+ while (i < j) {
+ let tmp: char = s[i];
+ s[i] = s[j];
+ s[j] = tmp;
+ i = i + 1;
+ j = j - 1;
+ }
+}
+
+fn atoi_end(s: char *, end: char**): int {
+ let i: int = 0;
+ let sign: int = 1;
+ if (*s == '-') {
+ sign = -1;
+ s = s + 1;
+ }
+ while (*s >= '0' && *s <= '9') {
+ i = i * 10 + (*s - '0');
+ s = s + 1;
+ }
+ *end = s;
+ return i * sign;
+}
+
+fn atoi(s: char *): int {
+ let tmp: char*;
+ return atoi_end(s, &tmp);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// I/O
+
+const stdin = 0;
+const stdout = 1;
+const stderr = 2;
+
fn putc(c: char) {
write(0, &c, 1);
}
@@ -76,15 +131,48 @@ fn putsln(c: char *) {
putc('\n');
}
-fn strrev(s: char *) {
- let len: int = strlen(s);
+fn putu(n: int) {
+ let buf: char[32];
let i: int = 0;
- let j: int = len - 1;
- while (i < j) {
- let tmp: char = s[i];
- s[i] = s[j];
- s[j] = tmp;
+ while (n > 0) {
+ buf[i] = (n % 10) + '0';
+ n = n / 10;
i = i + 1;
- j = j - 1;
}
+ if (i == 0)
+ buf[i] = '0';
+ else
+ strrev(buf);
+ write(0, buf, i);
+}
+
+fn die(msg: char *) {
+ putsln(msg);
+ exit(1);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Math
+
+fn min(a: int, b: int): int {
+ return a < b ? a : b;
+}
+
+fn max(a: int, b: int): int {
+ return a > b ? a : b;
+}
+
+fn sign(a: int): int {
+ return a > 0 ? 1 : a == 0 ? 0 : -1;
+}
+
+fn abs(a: int): int {
+ return a * sign(a);
+}
+
+fn factorial(n: int): int {
+ let res: int = 1;
+ for (;n > 0; n = n - 1)
+ res = res * n;
+ return res;
}