aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMustafa Quraish <[email protected]>2022-02-04 06:16:18 -0500
committerMustafa Quraish <[email protected]>2022-02-05 08:56:15 -0500
commit8fd0a145947011f113abaab245e35f1adfb6eb48 (patch)
tree957bcf38c33d697c2fe5dd997bc294ae28adcaec /src
parentUpdate README.md (diff)
downloadcup-8fd0a145947011f113abaab245e35f1adfb6eb48.tar.xz
cup-8fd0a145947011f113abaab245e35f1adfb6eb48.zip
Allow `builtins.c` to inject constants into program, use for syscalls
We can now directly expose the `syscallN()` APIs to the program and define the `open()`, `write()` etc syscalls in the stdlib. This simplifies the implementation a decent bunch :^)
Diffstat (limited to 'src')
-rw-r--r--src/builtins.c105
-rw-r--r--src/generator.c6
-rw-r--r--src/parser.c41
-rw-r--r--src/parser.h1
-rw-r--r--src/utils.c9
-rw-r--r--src/utils.h2
6 files changed, 95 insertions, 69 deletions
diff --git a/src/builtins.c b/src/builtins.c
index 45f203d..97ac9ee 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -2,8 +2,10 @@
#include "ast.h"
#include "utils.h"
#include "generator.h"
+#include "parser.h"
#include <string.h>
#include <stdlib.h>
+#include <fcntl.h>
#include <assert.h>
#include <stdarg.h>
#include <sys/syscall.h>
@@ -12,47 +14,32 @@
static Node *custom_builtins[MAX_CUSTOM_BUILTIN_COUNT];
static i64 custom_builtins_count = 0;
-#define MAX_SYSCALL_BUILTIN_COUNT 256
-static Node *syscall_builtins[MAX_SYSCALL_BUILTIN_COUNT];
-static i64 syscall_builtins_count = 0;
-
static void push_builtin(Node *node)
{
assert(custom_builtins_count < MAX_CUSTOM_BUILTIN_COUNT);
custom_builtins[custom_builtins_count++] = node;
}
-static void make_syscall(int num_args, i64 syscall_num, char *name, Type *return_type, ...)
-{
- Node *node = Node_new(AST_BUILTIN);
+static void push_syscall_builtin(char *name, int num_args) {
+ Node *node;
+ node = Node_new(AST_BUILTIN);
node->func.name = name;
- node->func.return_type = return_type;
- node->func.num_args = num_args;
- node->func.args = calloc(sizeof(Variable), num_args);
-
- // This is a hack to get around the fact that we can't pass a variable
- // Luckily, we don't actually use this field for the builtins anyway.
- node->func.max_locals_size = syscall_num;
-
- va_list ap;
- va_start(ap, return_type);
- for (i64 i = 0; i < num_args; i++)
- {
- Type *typ = va_arg(ap, Type *);
- if (!typ) {
- fprintf(stderr, "Error: Builtin %s has no type for argument %lld\n", name, i);
- exit(1);
- }
- node->func.args[i] = (Variable){"arg", typ, 0};
+ node->func.return_type = type_new(TYPE_INT);
+ node->func.num_args = num_args+1;
+ node->func.args = (Variable *)calloc(sizeof(Variable), num_args+1);
+ node->func.args[0] = (Variable){"syscall_num", type_new(TYPE_INT), 0};
+ for (int i = 0; i < num_args; i++) {
+ node->func.args[i+1].type = type_new(TYPE_ANY);
}
- va_end(ap);
-
- assert(syscall_builtins_count < MAX_SYSCALL_BUILTIN_COUNT);
- syscall_builtins[syscall_builtins_count++] = node;
+ push_builtin(node);
}
+void push_posix_constants();
+
void initialize_builtins()
{
+ push_posix_constants();
+
Node *node;
// FIXME: The `TYPE_ANY` is a hack
node = Node_new(AST_BUILTIN);
@@ -63,15 +50,14 @@ void initialize_builtins()
node->func.args[0] = (Variable){"val", type_new(TYPE_ANY), 0};
push_builtin(node);
- make_syscall(3, SYS_write, "write", type_new(TYPE_INT),
- type_new(TYPE_INT), type_new_ptr(TYPE_CHAR), type_new(TYPE_INT) // Args
- );
- make_syscall(3, SYS_read, "read", type_new(TYPE_INT),
- type_new(TYPE_INT), type_new_ptr(TYPE_CHAR), type_new(TYPE_INT) // Args
- );
- make_syscall(1, SYS_exit, "exit", type_new(TYPE_NONE),
- type_new(TYPE_INT) // Args
- );
+ push_syscall_builtin("syscall0", 0);
+ push_syscall_builtin("syscall1", 1);
+ push_syscall_builtin("syscall2", 2);
+ push_syscall_builtin("syscall3", 3);
+ push_syscall_builtin("syscall4", 4);
+ push_syscall_builtin("syscall5", 5);
+ push_syscall_builtin("syscall6", 6);
+ push_syscall_builtin("syscall7", 7);
}
Node *find_builtin_function(Token *token)
@@ -80,25 +66,20 @@ Node *find_builtin_function(Token *token)
if (strcmp(custom_builtins[i]->func.name, token->value.as_string) == 0)
return custom_builtins[i];
}
- for (i64 i = 0; i < syscall_builtins_count; i++) {
- if (strcmp(syscall_builtins[i]->func.name, token->value.as_string) == 0)
- return syscall_builtins[i];
- }
return NULL;
}
char *x86_64_syscall_regs[10] = {
- "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13"
+ "rax", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "r12"
};
static void generate_syscall_builtins(FILE *out)
{
- for (i64 i = 0; i < syscall_builtins_count; i++) {
- Node *node = syscall_builtins[i];
- fprintf(out, "func_%s:\n", node->func.name);
- for (i64 i = 0; i < node->func.num_args; i++)
+ for (i64 sysc_args = 0; sysc_args < 7; sysc_args++) {
+ fprintf(out, "func_syscall%lld:\n", sysc_args);
+ for (i64 i = 0; i < sysc_args+1; i++)
fprintf(out, " mov %s, [rsp+%lld]\n", x86_64_syscall_regs[i], (i+1) * 8);
- generate_syscall(node->func.max_locals_size, out);
+ fprintf(out, " syscall\n");
fprintf(out, " ret\n");
}
}
@@ -149,4 +130,32 @@ void generate_builtins(FILE *out)
{
generate_custom_builtins(out);
generate_syscall_builtins(out);
+}
+
+
+#define PUSH_SYS_(name) push_constant("SYS_" #name, get_syscall_num(SYS_##name))
+#define PUSH(name) push_constant(#name, name)
+
+void push_posix_constants()
+{
+ PUSH_SYS_(read);
+ PUSH_SYS_(write);
+ PUSH_SYS_(exit);
+ PUSH_SYS_(open);
+ PUSH_SYS_(openat);
+ PUSH_SYS_(close);
+ PUSH_SYS_(fork);
+ PUSH_SYS_(wait4);
+
+ PUSH(SEEK_SET);
+ PUSH(SEEK_CUR);
+ PUSH(SEEK_END);
+
+ PUSH(O_RDONLY);
+ PUSH(O_WRONLY);
+ PUSH(O_RDWR);
+ PUSH(O_CREAT);
+ PUSH(O_EXCL);
+ PUSH(O_TRUNC);
+ PUSH(AT_FDCWD);
} \ No newline at end of file
diff --git a/src/generator.c b/src/generator.c
index 2fb89b7..5da0748 100644
--- a/src/generator.c
+++ b/src/generator.c
@@ -3,6 +3,7 @@
*/
#include "generator.h"
+#include "utils.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
@@ -17,10 +18,7 @@ static Node *defer_stack[DEFER_STACK_SIZE];
static i64 defer_stack_count = 0;
void generate_syscall(i64 syscall_no, FILE *out) {
-#if __APPLE__
- syscall_no += 0x2000000;
-#endif
- fprintf(out, " mov rax, %lld\n", syscall_no);
+ fprintf(out, " mov rax, %lld\n", get_syscall_num(syscall_no));
fprintf(out, " syscall\n");
}
diff --git a/src/parser.c b/src/parser.c
index 235a240..267947c 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -171,8 +171,11 @@ bool identifier_exists(Token *token) {
return false;
}
-void push_constant(Node *node) {
+void push_constant(char *name, i64 value) {
assert(constants_count < TOTAL_CONSTANTS_SIZE);
+ Node *node = Node_new(AST_CONSTANT);
+ node->constant.name = name;
+ node->constant.int_literal = Node_from_int_literal(value);
all_constants[constants_count++] = node;
}
@@ -246,8 +249,20 @@ Type *parse_type(Lexer *lexer)
Lexer_next(lexer);
Type *arr = type_new(TYPE_ARRAY);
arr->ptr = type;
- // TODO: Contant integer expression support?
- arr->array_size = assert_token(Lexer_next(lexer), TOKEN_INTLIT).value.as_int;
+
+ token = Lexer_next(lexer);
+ if (token.type == TOKEN_INTLIT) {
+ arr->array_size = token.value.as_int;
+ } else if (token.type == TOKEN_IDENTIFIER) {
+ Node *constant = find_constant(&token);
+ if (!constant)
+ die_location(token.loc, "Could not find constant `%s`", token.value.as_string);
+ arr->array_size = constant->constant.int_literal->literal.as_int;
+ } else {
+ die_location(token.loc, "Expected a constant expression for array size");
+ }
+
+
assert_token(Lexer_peek(lexer), TOKEN_CLOSE_BRACKET);
Lexer_next(lexer);
type = arr;
@@ -305,7 +320,7 @@ i64 eval_constexp(Node *expr)
}
-Node *parse_constant_declaration(Lexer *lexer)
+void parse_constant_declaration(Lexer *lexer)
{
Token token = assert_token(Lexer_next(lexer), TOKEN_CONST);
@@ -326,14 +341,9 @@ Node *parse_constant_declaration(Lexer *lexer)
assert_token(token, TOKEN_ASSIGN);
Node *expr = parse_expression(lexer);
i64 value = eval_constexp(expr);
-
- Node *node = Node_new(AST_CONSTANT);
- node->constant.name = constant_name;
- node->constant.int_literal = Node_from_int_literal(value);
- push_constant(node);
+ push_constant(constant_name, value);
assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
- return node;
}
Node *parse_var_declaration(Lexer *lexer)
@@ -956,13 +966,13 @@ Type *parse_struct_union_declaration(Lexer *lexer, bool is_global) {
if (!is_global)
defined_structs_count = prev_struct_count;
- // printf("Defined %s: %s, size: %lld\n",
- // struct_type->type == TYPE_UNION ? "union":"struct",
+ // printf("Defined %s: %s, size: %lld\n",
+ // struct_type->type == TYPE_UNION ? "union":"struct",
// struct_type->struct_name,
// struct_type->fields.size
// );
// for (int i = 0; i < struct_type->fields.num_fields; i++) {
- // printf("\t%s: %s (offset: %lld, size: %lld)\n",
+ // printf("\t%s: %s (offset: %lld, size: %lld)\n",
// struct_type->fields.name[i],
// type_to_str(struct_type->fields.type[i]),
// struct_type->fields.offset[i],
@@ -990,10 +1000,7 @@ void parse_enum_declaration(Lexer *lexer)
if (identifier_exists(&token))
die_location(token.loc, "Identifier already exists, enums just behave like numbered constants.");
- Node *node = Node_new(AST_CONSTANT);
- node->constant.name = token.value.as_string;
- node->constant.int_literal = Node_from_int_literal(enum_count++);
- push_constant(node);
+ push_constant(token.value.as_string, enum_count++);
token = Lexer_peek(lexer);
if (token.type == TOKEN_COMMA) {
diff --git a/src/parser.h b/src/parser.h
index 7f7dacb..dd72686 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -3,5 +3,6 @@
#include "ast.h"
#include "lexer.h"
+void push_constant(char *name, i64 value);
Node *parse_program(Lexer *lexer);
void print_ast(Node *node); \ No newline at end of file
diff --git a/src/utils.c b/src/utils.c
index 939021a..050c654 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -29,6 +29,15 @@ void _die_location(char *file, int line, Location loc, const char *fmt, ...)
i64 i64max(i64 a, i64 b) { return a > b ? a : b; }
i64 i64min(i64 a, i64 b) { return a < b ? a : b; }
+
+i64 get_syscall_num(i64 orig_syscall) {
+#if __APPLE__
+ return orig_syscall + 0x2000000;
+#else
+ return orig_syscall;
+#endif
+}
+
i64 align_up(i64 val, i64 align)
{
return (val + align - 1) & ~(align - 1);
diff --git a/src/utils.h b/src/utils.h
index 7718c1c..fbd3aac 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -11,5 +11,7 @@ i64 i64min(i64 a, i64 b);
// Assumes alignment is a power of 2
i64 align_up(i64 val, i64 align);
+i64 get_syscall_num(i64 orig_syscall);
+
#define die_location(loc, ...) _die_location(__FILE__, __LINE__, loc, __VA_ARGS__) \ No newline at end of file