aboutsummaryrefslogtreecommitdiff
path: root/cup
diff options
context:
space:
mode:
authorMustafa Quraish <[email protected]>2022-01-30 01:19:54 -0500
committerMustafa Quraish <[email protected]>2022-01-30 01:19:54 -0500
commit5b87ec6ef2b84f1d319e5376bcf9eedea9829d79 (patch)
treef1afa80f034a1cd73192510046978435252e1160 /cup
parentRemove return-0 example (diff)
downloadarchived-cup-5b87ec6ef2b84f1d319e5376bcf9eedea9829d79.tar.xz
archived-cup-5b87ec6ef2b84f1d319e5376bcf9eedea9829d79.zip
Rename `cup` directory to `src`
Diffstat (limited to 'cup')
-rw-r--r--cup/ast.c161
-rw-r--r--cup/ast.h150
-rw-r--r--cup/common.h6
-rw-r--r--cup/generator.c355
-rw-r--r--cup/generator.h6
-rw-r--r--cup/lexer.c238
-rw-r--r--cup/lexer.h19
-rw-r--r--cup/main.c82
-rw-r--r--cup/parser.c562
-rw-r--r--cup/parser.h7
-rw-r--r--cup/tokens.c70
-rw-r--r--cup/tokens.h83
-rw-r--r--cup/utils.c30
-rw-r--r--cup/utils.h12
14 files changed, 0 insertions, 1781 deletions
diff --git a/cup/ast.c b/cup/ast.c
deleted file mode 100644
index 6e33de1..0000000
--- a/cup/ast.c
+++ /dev/null
@@ -1,161 +0,0 @@
-#include "ast.h"
-#include <assert.h>
-#include <stdio.h>
-
-char *data_type_to_str(DataType type)
-{
- switch (type)
- {
- case TYPE_NONE: return "void";
- case TYPE_INT: return "int";
- default: assert(false && "Unreachable");
- }
-}
-
-void print_type_to_file(FILE *out, Type type)
-{
- fprintf(out, "%s", data_type_to_str(type.type));
- for (int i = 0; i < type.indirection; i++) {
- fprintf(out, "*");
- }
-}
-
-char *node_type_to_str(NodeType type)
-{
- switch (type)
- {
- #define ENUM_TOKEN(t, name) case t: return name;
- ENUM_AST_TYPES(ENUM_TOKEN)
- #undef ENUM_TOKEN
- default: assert(false && "Unreachable");
- }
-}
-
-bool is_binary_op(NodeType type)
-{
- switch (type)
- {
- case OP_PLUS:
- case OP_MINUS:
- case OP_MUL:
- case OP_DIV:
- case OP_MOD:
- case OP_LSHIFT:
- case OP_RSHIFT:
- case OP_AND:
- case OP_OR:
- case OP_XOR:
- case OP_EQ:
- case OP_NEQ:
- case OP_LT:
- case OP_LEQ:
- case OP_GT:
- case OP_GEQ:
- return true;
- default: return false;
- }
-}
-
-bool is_unary_op(NodeType type)
-{
- switch (type)
- {
- case OP_NEG:
- case OP_NOT:
- case OP_BWINV:
- return true;
- default: return false;
- }
-}
-
-bool is_expression(NodeType type)
-{
- if (is_unary_op(type) || is_binary_op(type))
- return true;
- return type == AST_LITERAL;
-}
-
-void dump_func(Node *, int);
-
-static void do_print_ast(Node *node, int depth)
-{
- for (int i = 0; i < depth; i++) {
- printf(" ");
- }
- if (node->type == AST_PROGRAM) {
- for (int i = 0; i < node->block.num_children; i++) {
- do_print_ast(node->block.children[i], depth);
- }
- } else if (node->type == AST_BLOCK) {
- printf("{\n");
- for (int i = 0; i < node->block.num_children; i++) {
- do_print_ast(node->block.children[i], depth + 1);
- }
- for (int i = 0; i < depth; i++) {
- printf(" ");
- }
- printf("}\n");
- } else if (node->type == AST_FUNC) {
- dump_func(node, depth);
- } else if (node->type == AST_LITERAL) {
- printf("%d\n", node->literal.as_int);
- } else if (node->type == AST_RETURN) {
- printf("return\n");
- do_print_ast(node->unary_expr, depth + 1);
- } else if (is_unary_op(node->type)) {
- printf("%s\n", node_type_to_str(node->type));
- do_print_ast(node->unary_expr, depth + 1);
- } else if (is_binary_op(node->type)) {
- printf("%s\n", node_type_to_str(node->type));
- do_print_ast(node->binary.left, depth + 1);
- do_print_ast(node->binary.right, depth + 1);
- } else if (node->type == AST_VAR) {
- assert(node->variable && node->variable->name);
- printf("%s\n", node->variable->name);
- } else if (node->type == AST_VARDECL) {
- printf("var %s (", node->var_decl.var.name);
- print_type_to_file(stdout, node->var_decl.var.type);
- printf(")");
- if (node->var_decl.value != NULL) {
- printf(" = \n");
- do_print_ast(node->var_decl.value, depth + 1);
- } else {
- printf("\n");
- }
- } else if (node->type == AST_FUNCCALL) {
- printf("CALL %s(\n", node->call.func->func.name);
- for (int i = 0; i < node->call.num_args; i++) {
- do_print_ast(node->call.args[i], depth + 1);
- }
- for (int i = 0; i < depth; i++) {
- printf(" ");
- }
- printf(")\n");
- } else {
- printf("{{ %s }}\n", node_type_to_str(node->type));
- }
-}
-
-void dump_func(Node *node, int depth)
-{
- printf("fn %s(", node->func.name);
- for (int i = 0; i < node->func.num_args; i++) {
- if (i > 0) printf(", ");
- printf("%s: ", node->func.args[i].name);
- print_type_to_file(stdout, node->func.args[i].type);
- printf("[[%lld]]", node->func.args[i].offset);
- }
- printf(")");
- if (node->func.return_type.type != TYPE_NONE) {
- // FIXME: Print return type properly
- printf(" -> ");
- print_type_to_file(stdout, node->func.return_type);
- }
-
- do_print_ast(node->func.body, depth + 1);
-}
-
-void print_ast(Node *node)
-{
- do_print_ast(node, 0);
-} \ No newline at end of file
diff --git a/cup/ast.h b/cup/ast.h
deleted file mode 100644
index 22a7283..0000000
--- a/cup/ast.h
+++ /dev/null
@@ -1,150 +0,0 @@
-#pragma once
-
-#include "common.h"
-
-#define ENUM_AST_TYPES(F) \
- F(OP_NEG, "neg") \
- F(OP_NOT, "!") \
- F(OP_BWINV, "~") \
- F(OP_PLUS, "+") \
- F(OP_MINUS, "-") \
- F(OP_MUL, "*") \
- F(OP_DIV, "/") \
- F(OP_MOD, "%") \
- F(OP_LSHIFT, "<<") \
- F(OP_RSHIFT, ">>") \
- F(OP_AND, "&&") \
- F(OP_OR, "||") \
- F(OP_XOR, "^") \
- F(OP_EQ, "==") \
- F(OP_NEQ, "!=") \
- F(OP_LT, "<") \
- F(OP_LEQ, "<=") \
- F(OP_GT, ">") \
- F(OP_GEQ, ">=") \
- F(OP_ASSIGN, "=") \
- F(AST_LITERAL, "literal") \
- F(AST_FUNCCALL, "Function call") \
- F(AST_CONDITIONAL, "conditional expression") \
- F(AST_IF, "if statement") \
- F(AST_WHILE, "while statement") \
- F(AST_FOR, "for statement") \
- F(AST_VARDECL, "variable decl") \
- F(AST_VAR, "variable") \
- F(AST_RETURN, "return") \
- F(AST_FUNC, "func") \
- F(AST_PROGRAM, "program") \
- F(AST_BLOCK, "block statements")
-
-typedef enum {
-#define DEFINE_ENUM(name, str) name,
- ENUM_AST_TYPES(DEFINE_ENUM)
-#undef DEFINE_ENUM
-} NodeType;
-
-char *node_type_to_str(NodeType type);
-
-bool is_binary_op(NodeType type);
-bool is_unary_op(NodeType type);
-bool is_expression(NodeType type);
-
-typedef enum {
- TYPE_NONE,
- TYPE_INT,
-} DataType;
-
-char *data_type_to_str(DataType type);
-
-typedef struct {
- DataType type;
- // 0 = value, 1 = pointer, 2 = double pointer, ...
- int indirection;
-} Type;
-
-typedef struct {
- char *name;
- Type type;
- i64 offset;
-} Variable;
-
-typedef struct ast_node Node;
-typedef struct ast_node {
- NodeType type;
-
- union {
- // Binary expr
- struct {
- Node *left;
- Node *right;
- } binary;
-
- // Unary expr
- Node *unary_expr;
-
- // Function definition
- struct {
- char *name;
- Type return_type;
- Node *body;
-
- // TODO: Should we just dynamically allocate space on the
- // stack for each block instead of storing this?
- i64 max_locals_size;
-
- // TODO: Arguments / etc?
- Variable *args;
- int num_args;
- } func;
-
- // Block of statements
- struct {
- Node **children;
- int num_children;
-
- Variable **locals;
- int num_locals;
- i64 locals_size;
- } block;
-
- struct {
- Type type;
- union {
- int as_int;
- };
- } literal;
-
- struct {
- Variable var;
- Node *value;
- } var_decl;
-
- struct {
- Variable *var;
- Node *value;
- } assign;
-
- struct {
- Node *cond;
- Node *do_then;
- Node *do_else;
- } conditional;
-
- // Used for all loops
- struct {
- Node *cond;
- Node *init;
- Node *step;
- Node *body;
- } loop;
-
- Variable *variable;
-
- struct {
- Node *func;
- Node **args;
- int num_args;
- } call;
- };
-} Node;
-
-void print_ast(Node *node); \ No newline at end of file
diff --git a/cup/common.h b/cup/common.h
deleted file mode 100644
index 20d057f..0000000
--- a/cup/common.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-#include <stdbool.h>
-#include <stdint.h>
-
-typedef long long int i64;
diff --git a/cup/generator.c b/cup/generator.c
deleted file mode 100644
index 6a95d0c..0000000
--- a/cup/generator.c
+++ /dev/null
@@ -1,355 +0,0 @@
-/**
- * This file generates x86-64 assembly code in NASM format from the parsed AST.
- */
-
-#include "generator.h"
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#include <sys/syscall.h>
-
-static int label_counter = 0;
-static Node *current_function = NULL;
-
-void generate_expr_into_rax(Node *expr, FILE *out);
-
-void generate_func_call(Node *node, FILE *out)
-{
- assert(node->type == AST_FUNCCALL);
- // FIXME: This seems like a big hack
- i64 total_size = 0;
- for (int i = node->call.num_args - 1; i >= 0; i--) {
- Node *arg = node->call.args[i];
- generate_expr_into_rax(arg, out);
- fprintf(out, " push rax\n");
- // TODO: Compute this for different types
- // TODO: Also make sure of padding and stuff?
- total_size += 8;
- }
- fprintf(out, " call %s\n", node->call.func->func.name);
- fprintf(out, " add rsp, %lld\n", total_size);
-}
-
-// The evaluated expression is stored into `rax`
-void generate_expr_into_rax(Node *expr, FILE *out)
-{
- // TODO: Different sized output for different types?
- if (expr->type == AST_LITERAL) {
- // TODO: More literal types
- assert(expr->literal.type.type == TYPE_INT);
- fprintf(out, " mov rax, %d\n", expr->literal.as_int);
-
- } else if (expr->type == AST_FUNCCALL) {
- generate_func_call(expr, out);
-
- } else if (expr->type == AST_VAR) {
- i64 offset = expr->variable->offset;
- if (offset > 0)
- fprintf(out, " mov rax, [rbp-%lld]\n", offset);
- else
- fprintf(out, " mov rax, [rbp+%lld]\n", -offset);
-
- } else if (expr->type == OP_ASSIGN) {
- i64 offset = expr->assign.var->offset;
- generate_expr_into_rax(expr->assign.value, out);
- fprintf(out, " mov [rbp-%lld], rax\n", offset);
-
- } else if (expr->type == OP_NEG) {
- generate_expr_into_rax(expr->unary_expr, out);
- fprintf(out, " neg rax\n");
-
- } else if (expr->type == OP_NOT) {
- generate_expr_into_rax(expr->unary_expr, out);
- // Booleanize
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " sete al\n");
- fprintf(out, " movzx rax, al\n");
-
- } else if (expr->type == OP_BWINV) {
- generate_expr_into_rax(expr->unary_expr, out);
- fprintf(out, " not rax\n");
-
- } else if (expr->type == OP_PLUS) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " add rax, rbx\n");
-
- } else if (expr->type == OP_MINUS) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " sub rax, rbx\n");
-
- } else if (expr->type == OP_DIV) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " cqo\n");
- fprintf(out, " idiv rbx\n");
-
- } else if (expr->type == OP_MUL) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " imul rbx\n");
-
- // TODO: Compress these, there's barely any differences
- } else if (expr->type == OP_EQ) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " cmp rax, rbx\n");
- fprintf(out, " sete al\n");
- fprintf(out, " movzx rax, al\n");
-
- } else if (expr->type == OP_NEQ) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " cmp rax, rbx\n");
- fprintf(out, " setne al\n");
- fprintf(out, " movzx rax, al\n");
-
- } else if (expr->type == OP_LT) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " cmp rax, rbx\n");
- fprintf(out, " setl al\n");
- fprintf(out, " movzx rax, al\n");
-
- } else if (expr->type == OP_LEQ) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " cmp rax, rbx\n");
- fprintf(out, " setle al\n");
- fprintf(out, " movzx rax, al\n");
-
- } else if (expr->type == OP_GT) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " cmp rax, rbx\n");
- fprintf(out, " setg al\n");
- fprintf(out, " movzx rax, al\n");
-
- } else if (expr->type == OP_GEQ) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " cmp rax, rbx\n");
- fprintf(out, " setge al\n");
- fprintf(out, " movzx rax, al\n");
-
- } else if (expr->type == OP_GEQ) {
- generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " push rax\n");
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " pop rbx\n");
- fprintf(out, " cmp rax, rbx\n");
- fprintf(out, " setge al\n");
- fprintf(out, " movzx rax, al\n");
-
- // Note: These are different because of short-circuit evaluation!
- } else if (expr->type == OP_OR) {
- generate_expr_into_rax(expr->binary.left, out);
- // If left is true, we can short-circuit
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " je .or_right_%d\n", label_counter);
- fprintf(out, " mov rax, 1\n");
- fprintf(out, " jmp .or_end_%d\n", label_counter);
- fprintf(out, ".or_right_%d:\n", label_counter);
- generate_expr_into_rax(expr->binary.right, out);
- // Booleanize the result
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " setne al\n");
- fprintf(out, ".or_end_%d:\n", label_counter);
- label_counter++;
-
- } else if (expr->type == OP_AND) {
- generate_expr_into_rax(expr->binary.left, out);
- // If left is false, we can short-circuit
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " jne .and_right_%d\n", label_counter);
- fprintf(out, " mov rax, 0\n");
- fprintf(out, " jmp .and_end_%d\n", label_counter);
- fprintf(out, ".and_right_%d:\n", label_counter);
- generate_expr_into_rax(expr->binary.right, out);
- // Booleanize the result
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " setne al\n");
- fprintf(out, ".and_end_%d:\n", label_counter);
- label_counter++;
-
- } else if (expr->type == AST_CONDITIONAL) {
- generate_expr_into_rax(expr->conditional.cond, out);
- // If left is false, we can short-circuit
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " je .cond_else_%d\n", label_counter);
- generate_expr_into_rax(expr->conditional.do_then, out);
- fprintf(out, " jmp .cond_end_%d\n", label_counter);
- fprintf(out, ".cond_else_%d:\n", label_counter);
- generate_expr_into_rax(expr->binary.right, out);
- // Booleanize the result
- generate_expr_into_rax(expr->conditional.do_else, out);
- fprintf(out, ".cond_end_%d:\n", label_counter);
- label_counter++;
-
- } else {
- fprintf(stderr, "Unsupported expression type in generate_expr: `%s`\n", node_type_to_str(expr->type));
- exit(1);
- }
-}
-
-void generate_block(Node *block, FILE *out);
-
-void generate_statement(Node *stmt, FILE *out)
-{
- if (stmt->type == AST_RETURN) {
- generate_expr_into_rax(stmt->unary_expr, out);
- // TODO: Only do this if we have local variables
- fprintf(out, " mov rsp, rbp\n");
- fprintf(out, " pop rbp\n");
- fprintf(out, " ret\n");
- } else if (stmt->type == AST_VARDECL) {
- if (stmt->var_decl.value) {
- generate_expr_into_rax(stmt->var_decl.value, out);
- i64 offset = stmt->var_decl.var.offset;
- fprintf(out, " mov [rbp-%lld], rax\n", offset);
- } else {
- // Initialize to 0
- i64 offset = stmt->var_decl.var.offset;
- // TODO: Use correct size for the type
- fprintf(out, " mov qword [rbp-%lld], 0\n", offset);
- }
- } else if (stmt->type == AST_IF) {
- assert(stmt->conditional.cond);
- assert(stmt->conditional.do_then);
- int cur_label = label_counter++;
-
- generate_expr_into_rax(stmt->conditional.cond, out);
- // If we don't have an `else` clause, we can simplify
- if (!stmt->conditional.do_else) {
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " je .if_end_%d\n", cur_label);
- generate_statement(stmt->conditional.do_then, out);
- fprintf(out, ".if_end_%d:\n", cur_label);
- } else {
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " je .if_else_%d\n", cur_label);
- generate_statement(stmt->conditional.do_then, out);
- fprintf(out, " jmp .if_end_%d\n", cur_label);
- fprintf(out, ".if_else_%d:\n", cur_label);
- generate_statement(stmt->conditional.do_else, out);
- fprintf(out, ".if_end_%d:\n", cur_label);
- }
- } else if (stmt->type == AST_WHILE) {
- int cur_label = label_counter++;
- fprintf(out, ".loop_start_%d:\n", cur_label);
- fprintf(out, ".loop_continue_%d:\n", cur_label);
- generate_expr_into_rax(stmt->loop.cond, out);
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " je .loop_end_%d\n", cur_label);
- generate_statement(stmt->loop.body, out);
- fprintf(out, " jmp .loop_start_%d\n", cur_label);
- fprintf(out, ".loop_end_%d:\n", cur_label);
-
- } else if (stmt->type == AST_FOR) {
- int cur_label = label_counter++;
- if (stmt->loop.init) {
- generate_statement(stmt->loop.init, out);
- }
- fprintf(out, ".loop_start_%d:\n", cur_label);
- if (stmt->loop.cond) {
- generate_expr_into_rax(stmt->loop.cond, out);
- fprintf(out, " cmp rax, 0\n");
- fprintf(out, " je .loop_end_%d\n", cur_label);
- }
- generate_statement(stmt->loop.body, out);
- fprintf(out, ".loop_continue_%d:\n", cur_label);
- if (stmt->loop.step) {
- generate_expr_into_rax(stmt->loop.step, out);
- }
- fprintf(out, " jmp .loop_start_%d\n", cur_label);
- fprintf(out, ".loop_end_%d:\n", cur_label);
-
- } else if (stmt->type == AST_BLOCK) {
- generate_block(stmt, out);
- } else {
- // Once again, default to an expression here...
- generate_expr_into_rax(stmt, out);
- }
-}
-
-void generate_block(Node *block, FILE *out)
-{
- assert(block->type == AST_BLOCK);
- for (int i = 0; i < block->block.num_children; i++)
- generate_statement(block->block.children[i], out);
-}
-
-void generate_function_header(Node *func, FILE *out)
-{
- assert(func->type == AST_FUNC);
- fprintf(out, "global %s\n", func->func.name);
- fprintf(out, "%s:\n", func->func.name);
- // TODO: Only do this if we have local variables
- fprintf(out, " push rbp\n");
- fprintf(out, " mov rbp, rsp\n");
- // FIXME: Also account for arguments
- fprintf(out, " sub rsp, %lld\n", func->func.max_locals_size);
-}
-
-void generate_function(Node *func, FILE *out)
-{
- assert(func->type == AST_FUNC);
- current_function = func;
- generate_function_header(func, out);
- generate_block(func->func.body, out);
-}
-
-void generate_asm(Node *root, FILE *out)
-{
- assert(root->type == AST_PROGRAM);
- for (int i = 0; i < root->block.num_children; i++) {
- if (root->block.children[i]->type == AST_FUNC) {
- generate_function(root->block.children[i], out);
- } else {
- fprintf(stderr, "Unsupported node type in generate_asm: %s\n", node_type_to_str(root->block.children[i]->type));
- exit(1);
- }
- }
-
- // Call `main` from `_main` and return
-#if __APPLE__
- fprintf(out, "global _main\n");
- fprintf(out, "_main:\n");
-#else
- fprintf(out, "global _start\n");
- fprintf(out, "_start:\n");
-#endif
- fprintf(out, " call main\n");
-
-#if __APPLE__
- fprintf(out, " ret\n");
-#else
- fprintf(out, " mov rdi, rax\n");
- fprintf(out, " mov rax, %d\n", SYS_exit);
- fprintf(out, " syscall\n");
-#endif
-
- // TODO: Add implementations of some primitives?
-} \ No newline at end of file
diff --git a/cup/generator.h b/cup/generator.h
deleted file mode 100644
index 602cf3a..0000000
--- a/cup/generator.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-#include "ast.h"
-#include <stdio.h>
-
-void generate_asm(Node *root, FILE *out); \ No newline at end of file
diff --git a/cup/lexer.c b/cup/lexer.c
deleted file mode 100644
index ea095ba..0000000
--- a/cup/lexer.c
+++ /dev/null
@@ -1,238 +0,0 @@
-#include "lexer.h"
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "utils.h"
-
-Lexer Lexer_new(char *filename, char *src, i64 len)
-{
- Lexer self = {0};
- self.src = src;
- self.len = len;
- self.filename = filename;
- return self;
-}
-
-static Location Lexer_loc(Lexer *lexer)
-{
- Location loc = {0};
- loc.filename = lexer->filename;
- loc.line = lexer->line;
- loc.col = lexer->col;
- return loc;
-}
-
-static void Lexer_skip_whitespace(Lexer *lexer)
-{
- while (lexer->pos < lexer->len && isspace(lexer->src[lexer->pos])) {
- if (lexer->src[lexer->pos] == '\n') {
- lexer->line++;
- lexer->col = 0;
- } else {
- lexer->col++;
- }
- lexer->pos++;
- }
-}
-
-bool Lexer_has_more(Lexer *lexer)
-{
- Lexer_skip_whitespace(lexer);
- return lexer->pos < lexer->len;
-}
-
-static bool Lexer_starts_with(Lexer *lexer, char *str)
-{
- i64 len = strlen(str);
- if (lexer->len - lexer->pos < len)
- return false;
- for (i64 i = 0; i < len; i++)
- if (lexer->src[lexer->pos + i] != str[i])
- return false;
- i64 end_pos = lexer->pos + len;
- if (end_pos == lexer->len)
- return true;
- char end_char = lexer->src[end_pos];
- return !(isdigit(end_char) || isalpha(end_char) || end_char == '_');
-}
-
-static void advance(Lexer *lexer, i64 amount)
-{
- lexer->pos += amount;
- lexer->col += amount;
-}
-
-static char peek(Lexer *lexer, int amount)
-{
- if (lexer->pos + amount >= lexer->len)
- return '\0';
- return lexer->src[lexer->pos + amount];
-}
-
-static Token Lexer_make_token(Lexer *lexer, TokenType type, int inc_amount)
-{
- Token token = {0};
- token.type = type;
- token.loc = Lexer_loc(lexer);
- advance(lexer, inc_amount);
- return token;
-}
-
-#define LEX_KEYWORD(str, token_type) \
- if (Lexer_starts_with(lexer, str)) return Lexer_make_token(lexer, token_type, strlen(str));
-
-Token Lexer_next(Lexer *lexer)
-{
- while (lexer->pos < lexer->len) {
- switch (lexer->src[lexer->pos])
- {
- case ' ': case '\t': case '\r': advance(lexer, 1); continue;
- case '\n': lexer->line++; lexer->col = 0; lexer->pos++; continue;
- case '(': return Lexer_make_token(lexer, TOKEN_OPEN_PAREN, 1);
- case ')': return Lexer_make_token(lexer, TOKEN_CLOSE_PAREN, 1);
- case '{': return Lexer_make_token(lexer, TOKEN_OPEN_BRACE, 1);
- case '}': return Lexer_make_token(lexer, TOKEN_CLOSE_BRACE, 1);
- case ';': return Lexer_make_token(lexer, TOKEN_SEMICOLON, 1);
- case ':': return Lexer_make_token(lexer, TOKEN_COLON, 1);
- case '~': return Lexer_make_token(lexer, TOKEN_TILDE, 1);
- case '?': return Lexer_make_token(lexer, TOKEN_QUESTION, 1);
- case ',': return Lexer_make_token(lexer, TOKEN_COMMA, 1);
-
- case '&': {
- if (peek(lexer, 1) == '&')
- return Lexer_make_token(lexer, TOKEN_AND, 2);
- return Lexer_make_token(lexer, TOKEN_AMPERSAND, 1);
- }
-
- case '!': {
- if (peek(lexer, 1) == '=')
- return Lexer_make_token(lexer, TOKEN_NEQ, 2);
- return Lexer_make_token(lexer, TOKEN_EXCLAMATION, 1);
- }
-
- case '<': {
- if (peek(lexer, 1) == '=')
- return Lexer_make_token(lexer, TOKEN_LEQ, 2);
- return Lexer_make_token(lexer, TOKEN_LT, 1);
- }
-
- case '>': {
- if (peek(lexer, 1) == '=')
- return Lexer_make_token(lexer, TOKEN_GEQ, 2);
- return Lexer_make_token(lexer, TOKEN_GT, 1);
- }
-
- case '=': {
- if (peek(lexer, 1) == '=')
- return Lexer_make_token(lexer, TOKEN_EQ, 2);
- return Lexer_make_token(lexer, TOKEN_ASSIGN, 1);
- }
-
- case '|': {
- if (peek(lexer, 1) == '|')
- return Lexer_make_token(lexer, TOKEN_OR, 2);
- return Lexer_make_token(lexer, TOKEN_BAR, 1);
- }
-
-
- case '+': {
- if (peek(lexer, 1) == '+')
- return Lexer_make_token(lexer, TOKEN_PLUSPLUS, 2);
- if (peek(lexer, 1) == '=')
- return Lexer_make_token(lexer, TOKEN_PLUSEQUALS, 2);
- return Lexer_make_token(lexer, TOKEN_PLUS, 1);
- }
-
- case '-': {
- if (peek(lexer, 1) == '-')
- return Lexer_make_token(lexer, TOKEN_MINUSMINUS, 2);
- if (peek(lexer, 1) == '=')
- return Lexer_make_token(lexer, TOKEN_MINUSEQUALS, 2);
- return Lexer_make_token(lexer, TOKEN_MINUS, 1);
- }
-
- case '/': {
- if (peek(lexer, 1) == '/') {
- lexer->pos += 2; // skip the '//'
- while (lexer->pos < lexer->len && lexer->src[lexer->pos] != '\n')
- lexer->pos++;
- continue;
- }
- return Lexer_make_token(lexer, TOKEN_SLASH, 1);
- }
-
- case '*': return Lexer_make_token(lexer, TOKEN_STAR, 1);
- case '%': return Lexer_make_token(lexer, TOKEN_PERCENT, 1);
-
-
- default: {
- // Handle keywords explicitly
- LEX_KEYWORD("fn", TOKEN_FN);
- LEX_KEYWORD("if", TOKEN_IF);
- LEX_KEYWORD("int", TOKEN_INT);
- LEX_KEYWORD("let", TOKEN_LET);
- LEX_KEYWORD("for", TOKEN_FOR);
- LEX_KEYWORD("else", TOKEN_ELSE);
- LEX_KEYWORD("while", TOKEN_WHILE);
- LEX_KEYWORD("return", TOKEN_RETURN);
-
- if (isdigit(lexer->src[lexer->pos])) {
- // TODO: Parse hex and octal numbers
- i64 pos = lexer->pos;
- while (pos < lexer->len && isdigit(lexer->src[pos]))
- pos++;
- Token token = Token_from_int(atoi(lexer->src + lexer->pos), Lexer_loc(lexer));
- advance(lexer, pos - lexer->pos);
- return token;
- }
-
- if (isalpha(lexer->src[lexer->pos]) || lexer->src[lexer->pos] == '_') {
- i64 pos = lexer->pos;
- while (pos < lexer->len && (isalnum(lexer->src[pos]) || lexer->src[pos] == '_'))
- pos++;
- int str_len = pos - lexer->pos;
- char *str = calloc(str_len + 1, 1);
- strncpy(str, lexer->src + lexer->pos, str_len);
- Token token = Token_from_identifier(str, Lexer_loc(lexer));
- advance(lexer, str_len);
- return token;
- }
-
- // TODO: Handle escapes
- if (lexer->src[lexer->pos] == '"') {
- i64 pos = lexer->pos + 1;
- while (pos < lexer->len && lexer->src[pos] != '"')
- pos++;
- if (pos == lexer->len) {
- die_location(Lexer_loc(lexer), ": ERROR: Reached end-of-file while parsing string literal beginning here.\n");
- }
- // Careful with indexing here, because we want to skip opening and closing quotes
- char *str = calloc(pos - lexer->pos, 1);
- strncpy(str, lexer->src + lexer->pos + 1, pos - lexer->pos - 1);
- Token token = Token_from_identifier(str, Lexer_loc(lexer));
- advance(lexer, pos - lexer->pos + 1);
- return token;
- }
-
-
- die_location(Lexer_loc(lexer), ": ERROR: Unexpected character '%c'\n", lexer->src[lexer->pos]);
- advance(lexer, 1);
- }
- }
- }
-
- return Token_from_type(TOKEN_EOF, Lexer_loc(lexer));
-}
-
-Token Lexer_peek(Lexer *lexer)
-{
- i64 pos = lexer->pos;
- i64 col = lexer->col;
- i64 line = lexer->line;
- Token token = Lexer_next(lexer);
- lexer->pos = pos;
- lexer->col = col;
- lexer->line = line;
- return token;
-} \ No newline at end of file
diff --git a/cup/lexer.h b/cup/lexer.h
deleted file mode 100644
index f710d3d..0000000
--- a/cup/lexer.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-#include "tokens.h"
-#include <stdbool.h>
-
-typedef struct {
- char *src;
- i64 len;
- i64 pos;
-
- char *filename;
- i64 line;
- i64 col;
-} Lexer;
-
-Lexer Lexer_new(char *filename, char *src, i64 len);
-
-Token Lexer_next(Lexer *lexer);
-Token Lexer_peek(Lexer *lexer); \ No newline at end of file
diff --git a/cup/main.c b/cup/main.c
deleted file mode 100644
index 15460ff..0000000
--- a/cup/main.c
+++ /dev/null
@@ -1,82 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "lexer.h"
-#include "parser.h"
-#include "generator.h"
-#include "unistd.h"
-
-char *filename = NULL;
-char *source = NULL;
-i64 source_len = 0;
-bool dump_ast = false;
-
-#define MAX_STDIN_SOURCE_LEN 2048
-
-void usage(int exit_status)
-{
- printf("Usage: cup [options] <file>\n");
- printf("Options:\n");
- printf(" -c <code> Code to compile\n");
- printf(" -h Show this help\n");
- printf(" -d Dump AST to stdout\n");
- printf("Output file will be named `output.nasm`\n");
- exit(exit_status);
-}
-
-void parse_args(int argc, char **argv)
-{
- for (int i = 1; i < argc; i++) {
- if (strcmp(argv[i], "-c") == 0) {
- source_len = strlen(argv[i+1]);
- source = calloc(source_len + 1, 1);
- strcpy(source, argv[i+1]);
- i++;
- filename = "CLI";
- } else if (strcmp(argv[i], "-h") == 0) {
- usage(0);
- } else if (strcmp(argv[i], "-d") == 0) {
- dump_ast = true;
- } else if (filename == NULL) {
- if (strcmp(argv[i], "-") == 0) {
- filename = "stdin";
- source = calloc(MAX_STDIN_SOURCE_LEN, 1);
- source_len = fread(source, 1, MAX_STDIN_SOURCE_LEN, stdin);
- if (source_len == MAX_STDIN_SOURCE_LEN) {
- fprintf(stderr, "Source too long to use through stdin\n");
- exit(1);
- }
- } else {
- filename = argv[i];
- // Read entire file into memory
- FILE *fp = fopen(filename, "r");
- fseek(fp, 0, SEEK_END);
- source_len = ftell(fp);
- fseek(fp, 0, SEEK_SET);
- source = malloc(source_len + 1);
- fread(source, source_len, 1, fp);
- source[source_len] = 0;
- fclose(fp);
- }
- } else {
- usage(1);
- }
- }
-}
-
-int main(int argc, char**argv) {
- parse_args(argc, argv);
-
- Lexer lexer = Lexer_new(filename, source, source_len);
- Node *ast = parse_program(&lexer);
-
- if (dump_ast)
- print_ast(ast);
-
- FILE *f = fopen("output.nasm", "w");
- generate_asm(ast, f);
-
-
- free(source);
- return 0;
-}
diff --git a/cup/parser.c b/cup/parser.c
deleted file mode 100644
index cfb5d97..0000000
--- a/cup/parser.c
+++ /dev/null
@@ -1,562 +0,0 @@
-#include "parser.h"
-#include "utils.h"
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#define MAX_FUNCTION_COUNT 1024
-static Node *all_functions[MAX_FUNCTION_COUNT];
-static i64 function_count = 0;
-
-static Node *current_function = NULL;
-
-#define BLOCK_STACK_SIZE 64
-static Node *block_stack[BLOCK_STACK_SIZE];
-static i64 block_stack_count = 0;
-static i64 cur_stack_offset = 0;
-
-
-Token do_assert_token(Token token, TokenType type, char *filename, int line)
-{
- if (token.type != type) {
- Location_print(stderr, token.loc);
- fprintf(stderr, ": Expected token of type `%s` but got `%s`\n", token_type_to_str(type), token_type_to_str(token.type));
- fprintf(stderr, "Relevant location in compiler: %s:%d\n", filename, line);
- exit(1);
- }
- return token;
-}
-
-#define assert_token(token, type) do_assert_token(token, type, __FILE__, __LINE__)
-
-/******
- * Some helpers
- */
-
-NodeType binary_token_to_op(TokenType type)
-{
- switch (type)
- {
- case TOKEN_PLUS: return OP_PLUS;
- case TOKEN_MINUS: return OP_MINUS;
- case TOKEN_STAR: return OP_MUL;
- case TOKEN_SLASH: return OP_DIV;
- case TOKEN_PERCENT: return OP_MOD;
- case TOKEN_LSHIFT: return OP_LSHIFT;
- case TOKEN_RSHIFT: return OP_RSHIFT;
- case TOKEN_AND: return OP_AND;
- case TOKEN_OR: return OP_OR;
- case TOKEN_XOR: return OP_XOR;
- case TOKEN_EQ: return OP_EQ;
- case TOKEN_NEQ: return OP_NEQ;
- case TOKEN_LT: return OP_LT;
- case TOKEN_LEQ: return OP_LEQ;
- case TOKEN_GT: return OP_GT;
- case TOKEN_GEQ: return OP_GEQ;
-
- default: assert(false && "binary_token_to_op called with invalid token type");
- }
-}
-
-void block_stack_push(Node *block)
-{
- assert(block_stack_count < BLOCK_STACK_SIZE);
- assert(current_function);
- block_stack[block_stack_count++] = block;
-}
-
-void block_stack_pop()
-{
- assert(block_stack_count > 0);
- assert(current_function);
- Node *block = block_stack[--block_stack_count];
- cur_stack_offset -= block->block.locals_size;
- assert(cur_stack_offset >= 0);
-}
-
-void dump_block_stack()
-{
- for (i64 i = 0; i < block_stack_count; i++) {
- Node *block = block_stack[i];
- for (int i = 0; i < block->block.num_locals; i++) {
- printf("%s: offset: %lld\n", block->block.locals[i]->name, block->block.locals[i]->offset);
- }
- printf("\n");
- }
-}
-
-Variable *find_local_variable(Token *token)
-{
- assert_token(*token, TOKEN_IDENTIFIER);
- for (i64 i = block_stack_count - 1; i >= 0; --i) {
- Node *block = block_stack[i];
- for (int i = 0; i < block->block.num_locals; i++) {
- if (strcmp(block->block.locals[i]->name, token->value.as_string) == 0) {
- return block->block.locals[i];
- }
- }
- }
- Node *func = current_function;
- for (int i = 0; i < func->func.num_args; i++) {
- if (strcmp(func->func.args[i].name, token->value.as_string) == 0) {
- return &func->func.args[i];
- }
- }
- return NULL;
-}
-
-Node *find_function_definition(Token *token)
-{
- assert_token(*token, TOKEN_IDENTIFIER);
- for (i64 i = 0; i < function_count; i++) {
- Node *function = all_functions[i];
- if (strcmp(function->func.name, token->value.as_string) == 0) {
- return function;
- }
- }
- return NULL;
-}
-
-// TODO: rename this, it's ugly
-void add_variable_to_current_block(Variable *var)
-{
- // Set offset for variable
- Node *cur_block = block_stack[block_stack_count - 1];
- var->offset = cur_stack_offset;
-
- int new_len = (cur_block->block.num_locals + 1);
- int var_size = 8; // TODO: Compute sizes based on different types
-
- // Add to the block
- // FIXME: Use a map here
- cur_block->block.locals = realloc(cur_block->block.locals, sizeof(Variable *) * new_len);
- cur_block->block.locals[cur_block->block.num_locals] = var;
- cur_block->block.num_locals++;
-
- assert(current_function);
- // Update current stack offset (w.r.t function stack frame) and block size
- cur_stack_offset += var_size;
- block_stack[block_stack_count-1]->block.locals_size += var_size;
-
- // Update function's max locals size
- i64 max_offset = i64max(current_function->func.max_locals_size, cur_stack_offset);
- current_function->func.max_locals_size = max_offset;
-
- assert(cur_stack_offset >= 0);
- assert(block_stack_count > 0);
-}
-
-void Node_add_child(Node *parent, Node *child)
-{
- // TODO, use a vector
- parent->block.children = realloc(parent->block.children, sizeof(Node *) * (parent->block.num_children + 1));
- parent->block.children[parent->block.num_children] = child;
- parent->block.num_children++;
-}
-
-Node *Node_new(NodeType type)
-{
- Node *self = calloc(sizeof(Node), 1);
- self->type = type;
- return self;
-}
-
-Type parse_type(Lexer *lexer)
-{
- Type type = {0};
- Token token = Lexer_peek(lexer);
- if (token.type == TOKEN_INT) {
- type.type = TYPE_INT;
- Lexer_next(lexer);
- } else {
- type.type = TYPE_NONE;
- }
-
- while (Lexer_peek(lexer).type == TOKEN_AMPERSAND) {
- Lexer_next(lexer);
- type.indirection++;
- }
-
- return type;
-}
-
-Node *parse_literal(Lexer *lexer)
-{
- Node *node = Node_new(AST_LITERAL);
- Token token = assert_token(Lexer_next(lexer), TOKEN_INTLIT);
- node->literal.type = (Type) {.type = TYPE_INT};
- node->literal.as_int = token.value.as_int;
- return node;
-}
-
-Node *parse_expression(Lexer *);
-
-Node *parse_var_declaration(Lexer *lexer)
-{
- Token token = assert_token(Lexer_next(lexer), TOKEN_LET);
- // TODO: Reuse this for globals? Or maybe just make a new function?
- if (!current_function || current_function->type != AST_FUNC)
- die_location(token.loc, "Variable declaration outside of function");
-
- token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
- // NOTE: We don't allow shadowing of variables in the any blocks,
- // this is by design since it's a common mistake.
- if (find_local_variable(&token) != NULL)
- die_location(token.loc, "Variable `%s` already declared", token.value.as_string);
-
- Node *node = Node_new(AST_VARDECL);
- node->var_decl.var.name = token.value.as_string;
-
- token = Lexer_next(lexer);
- if (token.type != TOKEN_COLON)
- die_location(token.loc, "Missing type specifier for variable `%s`", node->var_decl.var.name);
- node->var_decl.var.type = parse_type(lexer);
-
- add_variable_to_current_block(&node->var_decl.var);
-
- token = Lexer_next(lexer);
- if (token.type == TOKEN_ASSIGN) {
- node->var_decl.value = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
- } else {
- assert_token(token, TOKEN_SEMICOLON);
- }
-
- return node;
-}
-
-Node *parse_function_call_args(Lexer *lexer, Node *func)
-{
- Token identifier = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
- Node *call = Node_new(AST_FUNCCALL);
- call->call.func = func;
- assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
- Token token = Lexer_peek(lexer);
-
- while (token.type != TOKEN_CLOSE_PAREN) {
- Node *arg = parse_expression(lexer);
-
- int new_size = call->call.num_args + 1;
- call->call.args = realloc(call->call.args, sizeof(Node *) * new_size);
- call->call.args[call->call.num_args++] = arg;
-
- if (new_size > func->func.num_args)
- die_location(identifier.loc, "Too many arguments to function `%s`", func->func.name);
-
- token = Lexer_peek(lexer);
- if (token.type == TOKEN_COMMA) {
- Lexer_next(lexer);
- token = Lexer_peek(lexer);
- }
- }
-
- if (call->call.num_args != func->func.num_args)
- die_location(identifier.loc, "Too few arguments to function `%s`", func->func.name);
-
- assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
- return call;
-}
-
-Node *parse_factor(Lexer *lexer)
-{
- // TODO: Parse more complicated things
- Token token = Lexer_peek(lexer);
- Node *expr;
- if (token.type == TOKEN_MINUS) {
- Lexer_next(lexer);
- expr = Node_new(OP_NEG);
- expr->unary_expr = parse_factor(lexer);
- } else if (token.type == TOKEN_TILDE) {
- Lexer_next(lexer);
- expr = Node_new(OP_BWINV);
- expr->unary_expr = parse_factor(lexer);
- } else if (token.type == TOKEN_EXCLAMATION) {
- Lexer_next(lexer);
- expr = Node_new(OP_NOT);
- expr->unary_expr = parse_factor(lexer);
- } else if (token.type == TOKEN_OPEN_PAREN) {
- Lexer_next(lexer);
- expr = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
- } else if (token.type == TOKEN_INTLIT) {
- expr = parse_literal(lexer);
- } else if (token.type == TOKEN_IDENTIFIER) {
- // TODO: Check for global variables when added
-
- Variable *var = find_local_variable(&token);
- if (var != NULL) {
- Lexer_next(lexer);
- expr = Node_new(AST_VAR);
- expr->variable = var;
- return expr;
- }
-
- Node *func = find_function_definition(&token);
- if (func != NULL) {
- return parse_function_call_args(lexer, func);
- }
-
- die_location(token.loc, "Unknown identifier `%s`", token.value.as_string);
- expr = NULL;
- } else {
- die_location(token.loc, ": Expected token found in parse_factor: `%s`", token_type_to_str(token.type));
- exit(1);
- }
- return expr;
-}
-
-#define BINOP_PARSER(next_parser, predicate) \
- Node *expr = next_parser(lexer); \
- Token token = Lexer_peek(lexer); \
- while (predicate(token.type)) { \
- Lexer_next(lexer); \
- Node *op = Node_new(binary_token_to_op(token.type)); \
- Node *right = next_parser(lexer); \
- op->binary.left = expr; \
- op->binary.right = right; \
- expr = op; \
- token = Lexer_peek(lexer); \
- } \
- return expr;
-
-bool is_term_token(TokenType type) { return type == TOKEN_STAR || type == TOKEN_SLASH || type == TOKEN_PERCENT; }
-Node *parse_term(Lexer *lexer) { BINOP_PARSER(parse_factor, is_term_token); }
-
-bool is_additive_token(TokenType type) { return type == TOKEN_PLUS || type == TOKEN_MINUS; }
-Node *parse_additive(Lexer *lexer) { BINOP_PARSER(parse_term, is_additive_token); }
-
-bool is_relational_token(TokenType type) { return type == TOKEN_LT || type == TOKEN_LEQ || type == TOKEN_GT || type == TOKEN_GEQ; }
-Node *parse_relational(Lexer *lexer) { BINOP_PARSER(parse_additive, is_relational_token); }
-
-bool is_equality_token(TokenType type) { return type == TOKEN_EQ || type == TOKEN_NEQ; }
-Node *parse_equality(Lexer *lexer) { BINOP_PARSER(parse_relational, is_equality_token); }
-
-bool is_logical_and_token(TokenType type) { return type == TOKEN_AND; }
-Node *parse_logical_and(Lexer *lexer) { BINOP_PARSER(parse_equality, is_logical_and_token); }
-
-bool is_logical_or_token(TokenType type) { return type == TOKEN_OR; }
-Node *parse_logical_or(Lexer *lexer) { BINOP_PARSER(parse_logical_and, is_logical_or_token); }
-
-Node *parse_conditional_exp(Lexer *lexer)
-{
- Node *expr = parse_logical_or(lexer);
- Token token = Lexer_peek(lexer);
- if (token.type == TOKEN_QUESTION) {
- Lexer_next(lexer);
- Node *then_expr = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_COLON);
- Node *else_expr = parse_expression(lexer);
- Node *conditional = Node_new(AST_CONDITIONAL);
- conditional->conditional.cond = expr;
- conditional->conditional.do_then = then_expr;
- conditional->conditional.do_else = else_expr;
- expr = conditional;
- }
- return expr;
-}
-
-Node *parse_expression(Lexer *lexer)
-{
- Node *node = parse_conditional_exp(lexer);
- // FIXME: This is a hack to handle assignment expressions
- // and can probably be done properly.
- if (node->type == AST_VAR) {
- Token token = Lexer_peek(lexer);
- if (token.type == TOKEN_ASSIGN) {
- Lexer_next(lexer);
- Variable *var = node->variable;
- node->type = OP_ASSIGN;
- node->assign.var = var;
- node->assign.value = parse_expression(lexer);
- }
- }
- return node;
-}
-
-Node *parse_block(Lexer *lexer);
-
-Node *parse_statement(Lexer *lexer)
-{
- Node *node;
- Token token = Lexer_peek(lexer);
-
- if (token.type == TOKEN_RETURN) {
- assert_token(Lexer_next(lexer), TOKEN_RETURN);
- node = Node_new(AST_RETURN);
- node->unary_expr = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
- } else if (token.type == TOKEN_IF) {
- Lexer_next(lexer);
- node = Node_new(AST_IF);
- assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
- node->conditional.cond = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
- // TODO: Allow blocks in here once implemented, currently
- // we can onle have a single statement in the if/else
- node->conditional.do_then = parse_statement(lexer);
- token = Lexer_peek(lexer);
- if (token.type == TOKEN_ELSE) {
- Lexer_next(lexer);
- node->conditional.do_else = parse_statement(lexer);
- }
- } else if (token.type == TOKEN_WHILE) {
- Lexer_next(lexer);
- node = Node_new(AST_WHILE);
- assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
- node->loop.cond = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
- node->loop.body = parse_statement(lexer);
- } else if (token.type == TOKEN_FOR) {
- Lexer_next(lexer);
- node = Node_new(AST_FOR);
- assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
-
- // All of the expressions in the for loop are optional
-
- // TODO: Allow this to be a declaration, need to inject
- // the variable into the symbol table for the block
- if (Lexer_peek(lexer).type != TOKEN_SEMICOLON)
- node->loop.init = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
-
- if (Lexer_peek(lexer).type != TOKEN_SEMICOLON)
- node->loop.cond = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
-
- if (Lexer_peek(lexer).type != TOKEN_CLOSE_PAREN)
- node->loop.step = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
-
- node->loop.body = parse_statement(lexer);
- } else if (token.type == TOKEN_OPEN_BRACE) {
- node = parse_block(lexer);
- } else {
- // Default to trying to handle it as an expression
- node = parse_expression(lexer);
- assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
- }
-
- return node;
-}
-
-Node *parse_block(Lexer *lexer)
-{
- assert_token(Lexer_next(lexer), TOKEN_OPEN_BRACE);
-
- Node *block = Node_new(AST_BLOCK);
- Token token = Lexer_peek(lexer);
-
- block_stack_push(block);
-
- while (token.type != TOKEN_CLOSE_BRACE) {
- Node *block_item;
- if (token.type == TOKEN_LET) {
- block_item = parse_var_declaration(lexer);
- } else {
- // Default to a statement
- block_item = parse_statement(lexer);
- }
- Node_add_child(block, block_item);
- token = Lexer_peek(lexer);
- }
-
- block_stack_pop();
-
- assert_token(Lexer_next(lexer), TOKEN_CLOSE_BRACE);
- return block;
-}
-
-void push_new_function(Node *func)
-{
- assert(func->type == AST_FUNC);
- assert(function_count < MAX_FUNCTION_COUNT);
- all_functions[function_count++] = func;
- current_function = func;
-}
-
-void parse_func_args(Lexer *lexer, Node *func)
-{
- assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
- Token token = Lexer_peek(lexer);
- while (token.type != TOKEN_CLOSE_PAREN) {
- token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
- // TODO: Check for shadowing with globals
- assert_token(Lexer_next(lexer), TOKEN_COLON);
- Type type = parse_type(lexer);
-
- i64 new_count = func->func.num_args + 1;
- func->func.args = realloc(func->func.args, sizeof(Variable) * new_count);
- Variable *var = &func->func.args[func->func.num_args++];
- var->name = token.value.as_string;
- var->type = type;
-
- token = Lexer_peek(lexer);
- if (token.type == TOKEN_COMMA) {
- Lexer_next(lexer);
- token = Lexer_peek(lexer);
- }
- }
- assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
-
- // Set the offsets for the arguments
-
- // IMPORTANT: We want to skip the saved ret_addr+old_rbp that we
- // pushed on the stack. Each of these is 8 bytes.
- int offset = -16;
- for (int i = 0; i < func->func.num_args; i++) {
- Variable *var = &func->func.args[i];
- var->offset = offset;
- // TODO: Compute this for different types
- int var_size = 8;
- offset -= var_size;
- }
-}
-
-Node *parse_func(Lexer *lexer)
-{
- Token token;
- token = assert_token(Lexer_next(lexer), TOKEN_FN);
-
- Node *func = Node_new(AST_FUNC);
- push_new_function(func);
-
- token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
- func->func.name = token.value.as_string;
- parse_func_args(lexer, func);
-
- token = Lexer_peek(lexer);
- if (token.type == TOKEN_COLON) {
- // TODO: Parse all return types
- assert_token(Lexer_next(lexer), TOKEN_COLON);
- func->func.return_type = parse_type(lexer);
- } else {
- // No return type, void fn.
- func->func.return_type = (Type){.type = TYPE_NONE};
- }
-
- // Make sure there's no funny business with the stack offset
- assert(cur_stack_offset == 0);
- assert(block_stack_count == 0);
- func->func.body = parse_block(lexer);
- assert(block_stack_count == 0);
- assert(cur_stack_offset == 0);
-
- return func;
-}
-
-Node *parse_program(Lexer *lexer)
-{
- Node *program = Node_new(AST_PROGRAM);
- Token token;
- while ((token = Lexer_peek(lexer)).type != TOKEN_EOF) {
- if (token.type == TOKEN_FN) {
- Node *func = parse_func(lexer);
- Node_add_child(program, func);
- } else {
- die_location(token.loc, "Unexpected token in parse_program: `%s`\n", token_type_to_str(token.type));
- exit(1);
- break;
- }
- }
- return program;
-} \ No newline at end of file
diff --git a/cup/parser.h b/cup/parser.h
deleted file mode 100644
index 7f7dacb..0000000
--- a/cup/parser.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#pragma once
-
-#include "ast.h"
-#include "lexer.h"
-
-Node *parse_program(Lexer *lexer);
-void print_ast(Node *node); \ No newline at end of file
diff --git a/cup/tokens.c b/cup/tokens.c
deleted file mode 100644
index e562e16..0000000
--- a/cup/tokens.c
+++ /dev/null
@@ -1,70 +0,0 @@
-#include "tokens.h"
-#include <assert.h>
-#include <stdio.h>
-
-Token Token_from_type(TokenType type, Location loc)
-{
- Token token = {0};
- token.type = type;
- token.loc = loc;
- return token;
-}
-
-Token Token_from_int(i64 value, Location loc)
-{
- Token token = {0};
- token.type = TOKEN_INTLIT;
- token.value.as_int = value;
- token.loc = loc;
- return token;
-}
-
-Token Token_from_string(char *value, Location loc)
-{
- Token token = {0};
- token.type = TOKEN_STRINGLIT;
- token.value.as_string = value;
- token.loc = loc;
- return token;
-}
-
-Token Token_from_identifier(char *value, Location loc)
-{
- Token token = {0};
- token.type = TOKEN_IDENTIFIER;
- token.value.as_string = value;
- token.loc = loc;
- return token;
-}
-
-void Location_print(FILE *f, Location loc)
-{
- fprintf(f, "%s:%d:%d", loc.filename, loc.line+1, loc.col+1);
-}
-
-char *token_type_to_str(TokenType type)
-{
- // Otherwise, just print the token type
- switch (type)
- {
- #define ENUM_TOKEN(name, str) case name: return str;
- ENUM_TOKENS(ENUM_TOKEN)
- #undef ENUM_TOKEN
- default: assert(false && "Unreachable");
- }
-}
-
-
-void Token_print(FILE *f, Token *token)
-{
- // Handle the different literals manually
- switch (token->type)
- {
- case TOKEN_INTLIT: fprintf(f, "%lld", token->value.as_int); return;
- case TOKEN_STRINGLIT: fprintf(f, "\"%s\"", token->value.as_string); return;
- case TOKEN_IDENTIFIER: fprintf(f, "%s", token->value.as_string); return;
- default: break;
- }
-
- fprintf(f, "%s", token_type_to_str(token->type));
-} \ No newline at end of file
diff --git a/cup/tokens.h b/cup/tokens.h
deleted file mode 100644
index 44f5e6e..0000000
--- a/cup/tokens.h
+++ /dev/null
@@ -1,83 +0,0 @@
-#pragma once
-
-#include "common.h"
-#include <stdio.h>
-
-#define ENUM_TOKENS(F) \
- F(TOKEN_AMPERSAND, "&") \
- F(TOKEN_AND, "&&") \
- F(TOKEN_ASSIGN, "=") \
- F(TOKEN_BAR, "|") \
- F(TOKEN_CLOSE_BRACE, "}") \
- F(TOKEN_CLOSE_PAREN, ")") \
- F(TOKEN_COLON, ":") \
- F(TOKEN_COMMA, ",") \
- F(TOKEN_ELSE, "else") \
- F(TOKEN_EOF, "EOF") \
- F(TOKEN_EQ, "==") \
- F(TOKEN_EXCLAMATION, "!") \
- F(TOKEN_FN, "fn") \
- F(TOKEN_FOR, "for") \
- F(TOKEN_GEQ, ">=") \
- F(TOKEN_GT, ">") \
- F(TOKEN_IDENTIFIER, "identifier") \
- F(TOKEN_IF, "if") \
- F(TOKEN_INT, "int") \
- F(TOKEN_INTLIT, "integer literal") \
- F(TOKEN_LEQ, "<=") \
- F(TOKEN_LSHIFT, "<<") \
- F(TOKEN_LET, "let") \
- F(TOKEN_LT, "<") \
- F(TOKEN_MINUS, "-") \
- F(TOKEN_MINUSEQUALS, "-=") \
- F(TOKEN_MINUSMINUS, "--") \
- F(TOKEN_NEQ, "!=") \
- F(TOKEN_OPEN_BRACE, "{") \
- F(TOKEN_OPEN_PAREN, "(") \
- F(TOKEN_OR, "||") \
- F(TOKEN_PERCENT, "%") \
- F(TOKEN_PLUS, "+") \
- F(TOKEN_PLUSEQUALS, "+=") \
- F(TOKEN_PLUSPLUS, "++") \
- F(TOKEN_QUESTION, "?") \
- F(TOKEN_RETURN, "return") \
- F(TOKEN_RSHIFT, ">>") \
- F(TOKEN_SEMICOLON, ";") \
- F(TOKEN_SLASH, "/") \
- F(TOKEN_STAR, "*") \
- F(TOKEN_STRINGLIT, "string literal") \
- F(TOKEN_TILDE, "~") \
- F(TOKEN_WHILE, "while") \
- F(TOKEN_XOR, "^")
-
-typedef enum {
-#define ENUM_TOKEN(name, str) name,
- ENUM_TOKENS(ENUM_TOKEN)
-#undef ENUM_TOKEN
-} TokenType;
-
-typedef struct {
- char *filename;
- int line, col;
-} Location;
-
-void Location_print(FILE *f, Location loc);
-
-typedef struct {
- TokenType type;
- Location loc;
- union tokens {
- i64 as_int;
- char *as_string;
- char as_char;
- } value;
-} Token;
-
-char *token_type_to_str(TokenType type);
-
-Token Token_from_type(TokenType type, Location loc);
-Token Token_from_int(i64 value, Location loc);
-Token Token_from_string(char *value, Location loc);
-Token Token_from_identifier(char *value, Location loc);
-
-void Token_print(FILE *f, Token *token); \ No newline at end of file
diff --git a/cup/utils.c b/cup/utils.c
deleted file mode 100644
index 2335981..0000000
--- a/cup/utils.c
+++ /dev/null
@@ -1,30 +0,0 @@
-#include "utils.h"
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-void die(const char *fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
- exit(1);
-}
-
-void _die_location(char *file, int line, Location loc, const char *fmt, ...)
-{
- Location_print(stderr, loc);
- fprintf(stderr, ": ");
- va_list args;
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
- fprintf(stderr, "\n");
- fprintf(stderr, "NOTE: Error occurred in %s:%d\n", file, line);
- exit(1);
-}
-
-i64 i64max(i64 a, i64 b) { return a > b ? a : b; }
-i64 i64min(i64 a, i64 b) { return a < b ? a : b; } \ No newline at end of file
diff --git a/cup/utils.h b/cup/utils.h
deleted file mode 100644
index dfc018c..0000000
--- a/cup/utils.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-#include "common.h"
-#include "tokens.h"
-
-void die(const char *fmt, ...);
-void _die_location(char *file, int line, Location loc, const char *fmt, ...);
-
-i64 i64max(i64 a, i64 b);
-i64 i64min(i64 a, i64 b);
-
-#define die_location(loc, ...) _die_location(__FILE__, __LINE__, loc, __VA_ARGS__) \ No newline at end of file