Add implementation of self-hosted compiler so far

There's also a `run.sh2` script which does the following: - Compiles the C compiler `build/cupcc` - Compiles the self-hosted compiler `build/cup.out` (with `cupcc`) - Compiles the specified file on CLI with `build/cup.out` - Runs this exectuable and shows the output
author: Mustafa Quraish <[email protected]> 2022-02-05 08:23:14 -0500
committer: Mustafa Quraish <[email protected]> 2022-02-05 08:56:15 -0500
commit: aeaf92127d1c090f9281616e49ad10dda414bd45 (patch)
tree: f85127c08b0caa13b95b3fb80e2996d3b5186434
parent: Remove old test which disallowed initializing globals (diff)
download: cup-aeaf92127d1c090f9281616e49ad10dda414bd45.tar.xz
cup-aeaf92127d1c090f9281616e49ad10dda414bd45.zip
9 files changed, 1574 insertions, 0 deletions
diff --git a/compiler/README.md b/compiler/README.md
new file mode 100644
index 0000000..0e58d43
--- /dev/null
+++ b/compiler/README.md
@@ -0,0 +1,3 @@
+# CUP Compiler in CUP
+
+This is the beginnings of a CUP compiler written in itself.
+\ No newline at end of file
diff --git a/compiler/ast.cup b/compiler/ast.cup
new file mode 100644
index 0000000..689f7fb
--- /dev/null
+++ b/compiler/ast.cup
@@ -0,0 +1,317 @@
+import "std/vector.cup"
+import "compiler/types.cup"
+
+enum NodeType {
+    // Unary
+    AST_NEG,
+    AST_NOT,
+    AST_BWINV,
+    AST_ADDROF,
+    AST_DEREF,
+    // Binary
+    AST_PLUS,
+    AST_MINUS,
+    AST_MUL,
+    AST_DIV,
+    AST_MOD,
+    AST_LSHIFT,
+    AST_RSHIFT,
+    AST_AND,
+    AST_BWAND,
+    AST_OR,
+    AST_BWOR,
+    AST_XOR,
+    // Comparison
+    AST_EQ,
+    AST_NEQ,
+    AST_LT,
+    AST_LEQ,
+    AST_GT,
+    AST_GEQ,
+    // Misc.
+    AST_ASSIGN,
+    AST_MEMBER,
+    // AST types
+    AST_LITERAL,
+    AST_CONSTANT,
+    AST_FUNCCALL,
+    AST_CONDITIONAL,
+    AST_IF,
+    AST_WHILE,
+    AST_DEFER,
+    AST_FOR,
+    AST_VARDECL,
+    AST_LOCAL_VAR,
+    AST_GLOBAL_VAR,
+    AST_RETURN,
+    AST_FUNC,
+    AST_BUILTIN,
+    AST_PROGRAM,
+    AST_BLOCK,
+};
+
+struct Variable {
+    name: char *;
+    typ: Type *;
+    offset: int;
+};
+
+struct Node {
+    typ: int;       // NodeType
+    etyp: Type*;    // Expression type
+
+    // TODO: Anonymous union members so we can do `Node.binary`, etc.
+    d: union {
+        binary: struct {
+            lhs: Node *;
+            rhs: Node *;
+        };
+        
+        unary: Node *;
+        
+        func: struct {
+            name: char *;
+            body: Node *;
+            max_locals_size: int;
+            args: Vector *; // Vector<Variable>
+        };
+
+        block: struct {
+            children: Vector *; // Vector<Node *>
+            locals: Vector *;   // Vector<Variable>
+            locals_size: int;
+        };
+
+        literal: union {
+            as_int: int;
+            as_char: char;
+            as_string: char *;
+        };
+
+        var_decl: struct {
+            var: Variable;
+            init: Node *;
+        };
+
+        assign: struct {
+            lhs: Node *;
+            rhs: Node *;
+        };
+
+        conditional: struct {
+            cond: Node *;
+            then: Node *;
+            els: Node *;
+        };
+
+        // `loop` is keyword in rust, syntax highlighting breaks
+        looop: struct {
+            cond: Node *;
+            body: Node *;
+            // for loop:
+            init: Node *;
+            step: Node *;
+        };
+
+        variable: Variable *;
+
+        call: struct {
+            func: Node *;
+            args: Vector *; // Vector<Node *>
+        };
+
+        member: struct {
+            obj: Node *;
+            offset: int;
+            is_ptr: int;
+        };
+
+        constant: struct {
+            name: char *;
+            value: Node *; // Must be int literal
+        };
+    };
+};
+
+let node_counter = 0;
+
+fn node_new(typ: int): Node* {
+    let node: Node* = malloc(sizeof(Node));
+    ++node_counter;
+    node.typ = typ;
+    return node;
+}
+
+fn node_from_int_literal(val: int): Node* {
+    let node: Node* = node_new(AST_LITERAL);
+    node.etyp = type_new(TYPE_INT);
+    node.d.literal.as_int = val;
+    return node;
+}
+
+fn block_add_child(block: Node*, child: Node*) {
+    if (block.d.block.children == null)
+        block.d.block.children = vector_new();
+    vector_push(block.d.block.children, child);
+}
+
+// TODO: Careful here, the input type here is the same as `type_to_string`
+fn node_type_to_string(typ: int): char* {
+    if (typ == AST_NEG) return "AST_NEG";
+    if (typ == AST_NOT) return "AST_NOT";
+    if (typ == AST_BWINV) return "AST_BWINV";
+    if (typ == AST_ADDROF) return "AST_ADDROF";
+    if (typ == AST_DEREF) return "AST_DEREF";
+    if (typ == AST_PLUS) return "AST_PLUS";
+    if (typ == AST_MINUS) return "AST_MINUS";
+    if (typ == AST_MUL) return "AST_MUL";
+    if (typ == AST_DIV) return "AST_DIV";
+    if (typ == AST_MOD) return "AST_MOD";
+    if (typ == AST_LSHIFT) return "AST_LSHIFT";
+    if (typ == AST_RSHIFT) return "AST_RSHIFT";
+    if (typ == AST_AND) return "AST_AND";
+    if (typ == AST_BWAND) return "AST_BWAND";
+    if (typ == AST_OR) return "AST_OR";
+    if (typ == AST_BWOR) return "AST_BWOR";
+    if (typ == AST_XOR) return "AST_XOR";
+    if (typ == AST_EQ) return "AST_EQ";
+    if (typ == AST_NEQ) return "AST_NEQ";
+    if (typ == AST_LT) return "AST_LT";
+    if (typ == AST_LEQ) return "AST_LEQ";
+    if (typ == AST_GT) return "AST_GT";
+    if (typ == AST_GEQ) return "AST_GEQ";
+    if (typ == AST_ASSIGN) return "AST_ASSIGN";
+    if (typ == AST_MEMBER) return "AST_MEMBER";
+    if (typ == AST_LITERAL) return "AST_LITERAL";
+    if (typ == AST_CONSTANT) return "AST_CONSTANT";
+    if (typ == AST_FUNCCALL) return "AST_FUNCCALL";
+    if (typ == AST_CONDITIONAL) return "AST_CONDITIONAL";
+    if (typ == AST_IF) return "AST_IF";
+    if (typ == AST_WHILE) return "AST_WHILE";
+    if (typ == AST_DEFER) return "AST_DEFER";
+    if (typ == AST_FOR) return "AST_FOR";
+    if (typ == AST_VARDECL) return "AST_VARDECL";
+    if (typ == AST_LOCAL_VAR) return "AST_LOCAL_VAR";
+    if (typ == AST_GLOBAL_VAR) return "AST_GLOBAL_VAR";
+    if (typ == AST_RETURN) return "AST_RETURN";
+    if (typ == AST_FUNC) return "AST_FUNC";
+    if (typ == AST_BUILTIN) return "AST_BUILTIN";
+    if (typ == AST_PROGRAM) return "AST_PROGRAM";
+    if (typ == AST_BLOCK) return "AST_BLOCK";
+
+    puts("Unknown node type in node_type_to_string: "); 
+    putu(typ); putc('\n');
+    exit(1);
+}
+
+fn is_binary_op(typ: int): int {
+    if (typ == AST_PLUS) return true;
+    if (typ == AST_MINUS) return true;
+    if (typ == AST_MUL) return true;
+    if (typ == AST_DIV) return true;
+    if (typ == AST_MOD) return true;
+    if (typ == AST_LSHIFT) return true;
+    if (typ == AST_RSHIFT) return true;
+    if (typ == AST_AND) return true;
+    if (typ == AST_BWAND) return true;
+    if (typ == AST_OR) return true;
+    if (typ == AST_BWOR) return true;
+    if (typ == AST_XOR) return true;
+    if (typ == AST_EQ) return true;
+    if (typ == AST_NEQ) return true;
+    if (typ == AST_LT) return true;
+    if (typ == AST_LEQ) return true;
+    if (typ == AST_GT) return true;
+    if (typ == AST_GEQ) return true;
+    return false;
+}
+
+fn is_unary_op(typ: int): int {
+    if (typ == AST_NEG) return true;
+    if (typ == AST_NOT) return true;
+    if (typ == AST_BWINV) return true;
+    if (typ == AST_ADDROF) return true;
+    if (typ == AST_DEREF) return true;
+    return false;
+}
+
+fn is_lvalue(typ: int): int {
+    if (typ == AST_LOCAL_VAR) return true;
+    if (typ == AST_GLOBAL_VAR) return true;
+    if (typ == AST_MEMBER) return true;
+    if (typ == AST_DEREF) return true;
+    return false;
+}
+
+fn binary_token_to_op(token_typ: int): int
+{
+    if (token_typ == TOKEN_PLUS)      return AST_PLUS;
+    if (token_typ == TOKEN_MINUS)     return AST_MINUS;
+    if (token_typ == TOKEN_STAR)      return AST_MUL;
+    if (token_typ == TOKEN_SLASH)     return AST_DIV;
+    if (token_typ == TOKEN_PERCENT)   return AST_MOD;
+    if (token_typ == TOKEN_LSHIFT)    return AST_LSHIFT;
+    if (token_typ == TOKEN_RSHIFT)    return AST_RSHIFT;
+    if (token_typ == TOKEN_AND)       return AST_AND;
+    if (token_typ == TOKEN_OR)        return AST_OR;
+    if (token_typ == TOKEN_XOR)       return AST_XOR;
+    if (token_typ == TOKEN_EQ)        return AST_EQ;
+    if (token_typ == TOKEN_NEQ)       return AST_NEQ;
+    if (token_typ == TOKEN_LT)        return AST_LT;
+    if (token_typ == TOKEN_LEQ)       return AST_LEQ;
+    if (token_typ == TOKEN_GT)        return AST_GT;
+    if (token_typ == TOKEN_GEQ)       return AST_GEQ;
+    if (token_typ == TOKEN_AMPERSAND) return AST_BWAND;
+    if (token_typ == TOKEN_BAR)       return AST_BWOR;
+    if (token_typ == TOKEN_CARET)     return AST_XOR;
+
+    puts("Unknown token in binary_token_to_op: ");
+    putsln(token_type_to_string(token_typ));
+    exit(1);
+}
+
+fn dump_ast(node: Node*, depth: int) {
+    for (let i = 0; i < 2*depth; ++i)
+        putc(' ');
+    if (node.typ == AST_PROGRAM || node.typ == AST_BLOCK) {
+        putsln(node_type_to_string(node.typ));
+        for (let i = 0; i < node.d.block.children.size; ++i) {
+            dump_ast(node.d.block.children.data[i], depth + 1);
+        }
+    } else if (is_binary_op(node.typ)) {
+        putsln(node_type_to_string(node.typ));
+        dump_ast(node.d.binary.lhs, depth + 1);
+        dump_ast(node.d.binary.rhs, depth + 1);
+    } else if (is_unary_op(node.typ) || node.typ == AST_RETURN) {
+        putsln(node_type_to_string(node.typ));
+        dump_ast(node.d.unary, depth + 1);
+    
+    } else if (node.typ == AST_LITERAL) {
+        if (node.etyp.typ == TYPE_INT) {
+            putu(node.d.literal.as_int); putc('\n');
+        } else if (node.etyp.typ == TYPE_PTR) {
+            putc('"'); puts(node.d.literal.as_string); putc('"'); putc('\n');
+        } else if (node.etyp.typ == TYPE_CHAR) {
+            putc('\''); putc(node.d.literal.as_char); putc('\''); putc('\n');
+        } else {
+            die("Unknown literal type in dump_ast");
+        }
+    } else if (node.typ == AST_FUNC) {
+        puts("func "); puts(node.d.func.name); puts("()\n");
+        dump_ast(node.d.func.body, depth + 1);
+    } else if (node.typ == AST_VARDECL) {
+        puts("let "); puts(node.d.var_decl.var.name); 
+        if (node.d.var_decl.var.typ == TYPE_PTR) {
+            puts(": ");
+            puts(create_type_string(node.d.var_decl.var.typ));
+        }
+        if (node.d.var_decl.init) {
+            puts(" =\n"); 
+            dump_ast(node.d.var_decl.init, depth + 1);
+        } else {
+            putc('\n');
+        }
+    } else {
+        putsln(node_type_to_string(node.typ));
+    }
+}
+\ No newline at end of file
diff --git a/compiler/codegen.cup b/compiler/codegen.cup
new file mode 100644
index 0000000..41eea33
--- /dev/null
+++ b/compiler/codegen.cup
@@ -0,0 +1,151 @@
+import "compiler/ast.cup"
+import "std/file.cup"
+
+let gen_out_file: File*;
+
+fn emit_asm4(msg1: char*, msg2: char*, msg3: char*, msg4: char*) {
+    fwrite(gen_out_file, msg1, strlen(msg1));
+    fwrite(gen_out_file, msg2, strlen(msg2));
+    fwrite(gen_out_file, msg3, strlen(msg3));
+    fwrite(gen_out_file, msg4, strlen(msg4));
+}
+
+fn emit_asm3(msg1: char*, msg2: char*, msg3: char*) {
+    fwrite(gen_out_file, msg1, strlen(msg1));
+    fwrite(gen_out_file, msg2, strlen(msg2));
+    fwrite(gen_out_file, msg3, strlen(msg3));
+}
+
+fn emit_asm2(msg1: char*, msg2: char*) {
+    fwrite(gen_out_file, msg1, strlen(msg1));
+    fwrite(gen_out_file, msg2, strlen(msg2));
+}
+
+fn emit_asm(msg: char*) {
+    fwrite(gen_out_file, msg, strlen(msg));
+}
+
+fn emit_num(num: int) {
+    fputu(gen_out_file, num);
+}
+
+fn generate_syscall(num: int) {
+    emit_asm("    mov rax, "); emit_num(num); emit_asm("\n");
+    emit_asm("    syscall\n");
+}
+
+fn generate_expr_into_rax(node: Node*) {
+    if (node.typ == AST_LITERAL) {
+        if (node.etyp.typ == TYPE_INT) {
+            emit_asm("    mov rax, "); emit_num(node.d.literal.as_int); emit_asm("\n");
+        } else {
+            die("Unsupported literal type in generate_expr_into_rax");
+        }
+    } else if (node.typ == AST_PLUS) {
+        generate_expr_into_rax(node.d.binary.rhs);
+        emit_asm("    push rax\n");
+        generate_expr_into_rax(node.d.binary.lhs);
+        emit_asm("    pop rbx\n");
+        emit_asm("    add rax, rbx\n");
+    } else if (node.typ == AST_MINUS) {
+        generate_expr_into_rax(node.d.binary.rhs);
+        emit_asm("    push rax\n");
+        generate_expr_into_rax(node.d.binary.lhs);
+        emit_asm("    pop rbx\n");
+        emit_asm("    sub rax, rbx\n");
+    } else if (node.typ == AST_DIV) {
+        generate_expr_into_rax(node.d.binary.rhs);
+        emit_asm("    push rax\n");
+        generate_expr_into_rax(node.d.binary.lhs);
+        emit_asm("    pop rbx\n");
+        emit_asm("    cqo\n");
+        emit_asm("    idiv rbx\n");
+
+    } else if (node.typ == AST_MOD) {
+        generate_expr_into_rax(node.d.binary.rhs);
+        emit_asm("    push rax\n");
+        generate_expr_into_rax(node.d.binary.lhs);
+        emit_asm("    pop rbx\n");
+        emit_asm("    cqo\n");
+        emit_asm("    idiv rbx\n");
+        emit_asm("    mov rax, rdx\n");
+
+    } else if (node.typ == AST_MUL) {
+        generate_expr_into_rax(node.d.binary.rhs);
+        emit_asm("    push rax\n");
+        generate_expr_into_rax(node.d.binary.lhs);
+        emit_asm("    pop rbx\n");
+        emit_asm("    imul rbx\n");
+    }
+}
+
+fn generate_statement(node: Node*) {
+    if (node.typ == AST_RETURN) {
+        generate_expr_into_rax(node.d.unary);
+        emit_asm("    mov rsp, rbp\n");
+        emit_asm("    pop rbp\n");
+        emit_asm("    ret\n");   
+    }
+}
+
+fn generate_block(node: Node*) {
+    let n = node.d.block.children.size;
+    for (let i = 0; i < n; ++i) {
+        generate_statement(node.d.block.children.data[i]);
+    }
+}
+
+fn generate_function(node: Node*) {
+    emit_asm3("global func_", node.d.func.name, "\n");
+    emit_asm3("func_", node.d.func.name, ":\n");
+    emit_asm("    push rbp\n");
+    emit_asm("    mov rbp, rsp\n");
+    emit_asm("    sub rsp, "); emit_num(node.d.func.max_locals_size); emit_asm("\n");
+
+    generate_block(node.d.func.body);
+
+    emit_asm("    mov rsp, rbp\n");
+    emit_asm("    pop rbp\n");
+    emit_asm("    ret\n");
+}
+
+fn generate_program(ast: Node*, file: File*) {
+    gen_out_file = file;
+
+    let n = ast.d.block.children.size;
+    for (let i = 0; i < n; ++i) {
+        let node: Node* = ast.d.block.children.data[i];
+        if (node.typ == AST_FUNC) {
+            generate_function(node);
+        } else {
+            die("Unknown node type in generate_program");
+        }
+    }
+
+    if (OS_IS_MACOS) {
+        emit_asm("global _main\n");
+        emit_asm("_main:\n");
+        // Push argv
+        emit_asm("    mov rax, rsi\n");
+        emit_asm("    push rax\n");
+        // Push argc
+        emit_asm("    mov rax, rdi\n");
+        emit_asm("    push rax\n");
+    } else {
+        emit_asm("global _start\n");
+        emit_asm("_start:\n");
+
+        emit_asm("    mov rbp, rsp\n");
+        // // Push argv
+        emit_asm("    mov rax, rbp\n");
+        emit_asm("    add rax, 8\n");
+        emit_asm("    push rax\n");
+        // Push argc
+        emit_asm("    mov rax, [rbp]\n");
+        emit_asm("    push rax\n");
+    }
+
+    emit_asm("    call func_main\n");
+    emit_asm("    mov rdi, rax\n");
+    generate_syscall(SYS_exit);
+}
+\ No newline at end of file
diff --git a/compiler/lexer.cup b/compiler/lexer.cup
new file mode 100644
index 0000000..ff22d8f
--- /dev/null
+++ b/compiler/lexer.cup
@@ -0,0 +1,288 @@
+import "compiler/tokens.cup"
+
+struct Lexer {
+    src: char*;
+    len: int;
+    pos: int;
+
+    filename: char*;
+    line: int;
+    col: int;
+};
+
+fn lexer_new(filename: char*, src: char*, len: int): Lexer* {
+    let lexer: Lexer* = malloc(sizeof(Lexer));
+    lexer.filename = filename;
+    lexer.src = src;
+    lexer.len = len;
+    return lexer;
+} 
+
+fn lexer_loc(lexer: Lexer*, loc: Location*) {
+    loc.filename = lexer.filename;
+    loc.line = lexer.line;
+    loc.col = lexer.col;
+}
+
+fn is_space(c: char): int {
+    return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+
+fn is_digit(c: char): int {
+    return c >= '0' && c <= '9';
+}
+
+fn is_alpha(c: char): int {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
+}
+
+fn is_alnum(c: char): int {
+    return is_digit(c) || is_alpha(c);
+}
+
+fn lexer_skip_whitespace(lexer: Lexer*) {
+    while (lexer.pos < lexer.len && is_space(lexer.src[lexer.pos])) {
+        if (lexer.src[lexer.pos] == '\n') {
+            lexer.line = lexer.pos + 1;
+            lexer.col = 0;
+        } else {
+            lexer.col = lexer.col + 1;
+        }
+        lexer.pos = lexer.pos + 1;
+    }
+}
+
+fn lexer_starts_with(lexer: Lexer*, str: char*): int {
+    let len = strlen(str);
+    if (lexer.len - lexer.pos < len)
+        return 0;
+    for (let i = 0; i < len; ++i)
+        if (lexer.src[lexer.pos + i] != str[i])
+            return 0;
+    let end_pos = lexer.pos + len;
+    if (end_pos == lexer.len)
+        return len;
+    let end_char = lexer.src[end_pos];
+    return !(is_digit(end_char) || is_alpha(end_char));
+}
+
+fn lexer_advance(lexer: Lexer*, n: int) {
+    lexer.pos = lexer.pos + n;
+    lexer.col = lexer.col + n;
+}
+
+fn lexer_peek_char(lexer: Lexer*, n: int): char {
+    if (lexer.pos + n >= lexer.len)
+        return 0;
+    return lexer.src[lexer.pos + n];
+}
+
+fn lexer_make_token(lexer: Lexer*, token: Token*, typ: int, inc: int) {
+    lexer_loc(lexer, &token.loc);
+    lexer_advance(lexer, inc);
+    token.typ = typ;
+}
+
+fn lexer_next(lexer: Lexer*, token: Token*) {
+    while (lexer.pos < lexer.len) {
+        putsln("101.1");
+        print(lexer.pos);
+        print(lexer.len);
+        let c = lexer.src[lexer.pos];
+        putc(c);
+        putc('\n');
+
+             if (c == '\n')   { ++lexer.line; lexer.col = 0; ++lexer.pos; }
+        else if (is_space(c)) { lexer_advance(lexer, 1); }
+        else if (c == '(') { return lexer_make_token(lexer, token, TOKEN_OPEN_PAREN, 1); }
+        else if (c == ')') { return lexer_make_token(lexer, token, TOKEN_CLOSE_PAREN, 1); }
+        else if (c == '{') { return lexer_make_token(lexer, token, TOKEN_OPEN_BRACE, 1); }
+        else if (c == '}') { return lexer_make_token(lexer, token, TOKEN_CLOSE_BRACE, 1); }
+        else if (c == '[') { return lexer_make_token(lexer, token, TOKEN_OPEN_BRACKET, 1); }
+        else if (c == ']') { return lexer_make_token(lexer, token, TOKEN_CLOSE_BRACKET, 1); }
+        else if (c == ';') { return lexer_make_token(lexer, token, TOKEN_SEMICOLON, 1); }
+        else if (c == ':') { return lexer_make_token(lexer, token, TOKEN_COLON, 1); }
+        else if (c == '~') { return lexer_make_token(lexer, token, TOKEN_TILDE, 1); }
+        else if (c == '?') { return lexer_make_token(lexer, token, TOKEN_QUESTION, 1); }
+        else if (c == '^') { return lexer_make_token(lexer, token, TOKEN_CARET, 1); }
+        else if (c == '.') { return lexer_make_token(lexer, token, TOKEN_DOT, 1); }
+        else if (c == ',') { return lexer_make_token(lexer, token, TOKEN_COMMA, 1); }
+        else if (c == '*') { return lexer_make_token(lexer, token, TOKEN_STAR, 1); }
+        else if (c == '%') { return lexer_make_token(lexer, token, TOKEN_PERCENT, 1); }
+        
+        else if (c == '/' && lexer_peek_char(lexer, 1) == '/') {
+            lexer.pos = lexer.pos + 2;    // skip the '//'
+            while (lexer.pos < lexer.len && lexer.src[lexer.pos] != '\n')
+                ++lexer.pos;
+            // Implicit `continue`
+        }
+
+        // This needs to go after the comment check.
+        else if (c == '/') {
+            return lexer_make_token(lexer, token, TOKEN_SLASH, 1);
+        }
+
+        else if (c == '&') {
+            if (lexer_peek_char(lexer, 1) == '&')
+                return lexer_make_token(lexer, token, TOKEN_AND, 2);
+            return lexer_make_token(lexer, token, TOKEN_AMPERSAND, 1);
+        }
+
+        else if (c == '!') {
+            if (lexer_peek_char(lexer, 1) == '=')
+                return lexer_make_token(lexer, token, TOKEN_NEQ, 2);
+            return lexer_make_token(lexer, token, TOKEN_EXCLAMATION, 1);
+        }
+
+        else if (c == '<') {
+            if (lexer_peek_char(lexer, 1) == '=')
+                return lexer_make_token(lexer, token, TOKEN_LEQ, 2);
+            return lexer_make_token(lexer, token, TOKEN_LT, 1);
+        }
+
+        else if (c == '>') {
+            if (lexer_peek_char(lexer, 1) == '=')
+                return lexer_make_token(lexer, token, TOKEN_GEQ, 2);
+            return lexer_make_token(lexer, token, TOKEN_GT, 1);
+        }
+
+        else if (c == '=') {
+            if (lexer_peek_char(lexer, 1) == '=')
+                return lexer_make_token(lexer, token, TOKEN_EQ, 2);
+            return lexer_make_token(lexer, token, TOKEN_ASSIGN, 1);
+        }
+
+        else if (c == '|') {
+            if (lexer_peek_char(lexer, 1) == '|')
+                return lexer_make_token(lexer, token, TOKEN_OR, 2);
+            return lexer_make_token(lexer, token, TOKEN_BAR, 1);
+        }
+
+
+        else if (c == '+') {
+            if (lexer_peek_char(lexer, 1) == '+')
+                return lexer_make_token(lexer, token, TOKEN_PLUSPLUS, 2);
+            if (lexer_peek_char(lexer, 1) == '=')
+                return lexer_make_token(lexer, token, TOKEN_PLUSEQUALS, 2);
+            return lexer_make_token(lexer, token, TOKEN_PLUS, 1);
+        }
+
+        else if (c == '-') {
+            if (lexer_peek_char(lexer, 1) == '-')
+                return lexer_make_token(lexer, token, TOKEN_MINUSMINUS, 2);
+            if (lexer_peek_char(lexer, 1) == '=')
+                return lexer_make_token(lexer, token, TOKEN_MINUSEQUALS, 2);
+            return lexer_make_token(lexer, token, TOKEN_MINUS, 1);
+        }
+
+        else {
+            // Parse the keywords...
+            for (let i = TOKEN__KEYWORD_BEGIN+1; i < TOKEN__KEYWORD_END; ++i) {
+                let str = keyword_to_string(i);
+                if (lexer_starts_with(lexer, str)) {
+                    return lexer_make_token(lexer, token, i, strlen(str));
+                }
+            }
+
+            // Parse numbers:
+            if (is_digit(c)) {
+                // TODO: Parse hex and octal numbers
+                let pos = lexer.pos;
+                while (pos < lexer.len && is_digit(lexer.src[pos]))
+                    ++pos;
+                let loc: Location;
+                lexer_loc(lexer, &loc);
+                token_from_int(token, atoi(lexer.src + lexer.pos), &loc);
+                lexer_advance(lexer, pos - lexer.pos);
+                return;
+            }
+
+            // Parse identifiers:
+            if (is_alpha(lexer.src[lexer.pos])) {
+                let pos = lexer.pos;
+                while (pos < lexer.len && is_alnum(lexer.src[pos]))
+                    ++pos;
+                let str_len = pos - lexer.pos;
+                let str: char* = malloc(str_len + 1);
+                memcpy(str, lexer.src + lexer.pos, str_len);
+                str[str_len] = '\0';
+                let loc: Location;
+                lexer_loc(lexer, &loc);
+                token_from_identifier(token, str, &loc);
+                lexer_advance(lexer, str_len);
+                return;
+            }
+
+            if (c == '"') {
+                let pos = lexer.pos + 1;
+                while (pos < lexer.len && lexer.src[pos] != '"')
+                    ++pos;
+                
+                let loc: Location;
+                lexer_loc(lexer, &loc);
+
+                if (pos == lexer.len)
+                    die_loc(&loc, "EOF while parsing string literal");
+
+                // Careful with indexing here, because we want to skip opening and closing quotes
+                let str_len = pos - lexer.pos - 1;
+                let str: char* = malloc(str_len + 1);
+                memcpy(str, lexer.src + lexer.pos + 1, str_len);
+                str[str_len] = '\0';
+                token_from_string(token, str, &loc);
+                lexer_advance(lexer, pos - lexer.pos + 1);
+                return;
+            }
+            
+            if (c == '\'') {
+                let pos = lexer.pos + 1;
+                // TODO: Handle malformed / incomplete literals
+                // TODO: Handle escapes
+                c = lexer.src[pos];
+                if (c == '\\') {
+                    ++pos;
+                    c = lexer.src[pos];
+                         if (c == 'n')  { c = '\n'; }
+                    else if (c == 't')  { c = '\t'; }
+                    else if (c == 'n')  { c = '\n'; }
+                    else if (c == 'r')  { c = '\r'; }
+                    else if (c == 't')  { c = '\t'; }
+                    else if (c == '0')  { c = '\0'; }
+                    else { }
+                    // TODO: Handle octal and hex escapes
+                }
+
+                let loc: Location;
+                lexer_loc(lexer, &loc);
+                token_from_char(token, c, &loc);
+                lexer_advance(lexer, pos - lexer.pos + 2);
+                return;
+            }
+
+            puts("Unknown character in lexer_next: '"); putc(c); putsln("'");
+            die("Exiting");
+        }
+    }
+    return lexer_make_token(lexer, token, TOKEN_EOF, 0);
+}
+
+fn lexer_next_assert(lexer: Lexer*, token: Token*, expected: int) {
+    lexer_next(lexer, token);
+    if (token.typ != expected) {
+        location_print(&token.loc);
+        puts(": Expected "); puts(token_type_to_string(expected)); 
+        puts(" but got "); puts(token_type_to_string(token.typ));
+        putc('\n');
+        exit(1);
+    }
+}
+
+fn lexer_peek(lexer: Lexer*, token: Token*) {
+    let pos = lexer.pos;
+    let col = lexer.col;
+    let line = lexer.line;
+    lexer_next(lexer, token);
+    lexer.pos = pos;
+    lexer.col = col;
+    lexer.line = line;
+}
+\ No newline at end of file
diff --git a/compiler/main.cup b/compiler/main.cup
new file mode 100644
index 0000000..a0a3476
--- /dev/null
+++ b/compiler/main.cup
@@ -0,0 +1,34 @@
+import "std/file.cup"
+import "compiler/lexer.cup"
+import "compiler/parser.cup"
+import "compiler/codegen.cup"
+
+fn main(argc: int, argv: char **): int {
+    if (argc != 2)
+        die("Usage: cupcc <input_file>");
+    
+    let input_file = fopen(argv[1], 'r');
+    defer fclose(input_file);
+
+    // using `fmap` here doesn't work on linux, for some reason.
+    let file_size = fsize(input_file);
+    let src: char* = malloc(file_size+1);
+    fread(input_file, src, file_size);
+    src[file_size] = '\0';
+    
+    let lexer = lexer_new(argv[1], src, file_size);
+    let ast = parse_program(lexer);
+
+    dump_ast(ast, 0);
+
+    let out_file = fopen("build/host.nasm", 'w');
+    defer fclose(out_file);
+
+    generate_program(ast, out_file);
+
+    puts("---------------------------\n");
+
+    puts("Total amount of memory used by malloc: ");
+    putu(__malloc_buf_pos);
+    putsln("\nDone.");
+}
+\ No newline at end of file
diff --git a/compiler/parser.cup b/compiler/parser.cup
new file mode 100644
index 0000000..48e4514
--- /dev/null
+++ b/compiler/parser.cup
@@ -0,0 +1,433 @@
+import "compiler/ast.cup"
+import "compiler/lexer.cup"
+
+// p_ prefix for parser global variables.
+
+let p_all_functions = vector_new();
+
+let p_block_stack = vector_new();
+let p_cur_stack_offset = 0;
+
+fn parse_literal(lexer: Lexer*): Node* {
+    let token: Token;
+    lexer_next(lexer, &token);
+    let node = node_new(AST_LITERAL);
+
+    if (token.typ == TOKEN_INTLIT) {
+        node.d.literal.as_int = token.value.as_int;
+        node.etyp = type_new(TYPE_INT);
+    } else if (token.typ == TOKEN_STRINGLIT) {
+        node.d.literal.as_string = token.value.as_string;
+        node.etyp = type_new_ptr(TYPE_CHAR);
+    } else if (token.typ == TOKEN_CHARLIT) {
+        node.d.literal.as_char = token.value.as_char;
+        node.etyp = type_new(TYPE_CHAR);
+    } else {
+        die_loc2(&token.loc, "Unexpected token in parse_literal: ", token_type_to_string(token.typ));
+    }
+    return node;
+}
+
+fn parse_type(lexer: Lexer*): Type* {
+    let token: Token;
+    let typ: Type *;
+    lexer_peek(lexer, &token);
+    if (token.typ == TOKEN_INT) {
+        lexer_next(lexer, &token);
+        typ = type_new(TYPE_INT);
+    } else if (token.typ == TOKEN_CHAR) {
+        lexer_next(lexer, &token);
+        typ = type_new(TYPE_CHAR);
+    } else if (token.typ == TOKEN_VOID) {
+        lexer_next(lexer, &token);
+        typ = type_new(TYPE_VOID);
+    }
+    
+    let running = true;
+    while (running) {
+        lexer_peek(lexer, &token);
+        if (token.typ == TOKEN_STAR) {
+            lexer_next(lexer, &token);
+            let ptr = type_new(TYPE_PTR);
+            ptr.ptr = typ;
+            typ = ptr;
+        } else if (token.typ == TOKEN_OPEN_BRACKET) {
+            die("Array types not yet implemented");
+        } else {
+            running = false;
+        }
+    }
+    return typ;
+}
+
+// pragma region expressions
+fn parse_expression(lexer: Lexer*): Node*;
+
+fn parse_factor(lexer: Lexer*): Node* {
+    let token: Token;
+    let expr: Node*;
+    lexer_peek(lexer, &token);
+
+    if (token.typ == TOKEN_MINUS) {
+        lexer_next(lexer, &token);
+        expr = node_new(AST_NEG);
+        expr.d.unary = parse_factor(lexer);
+
+    } else if (token.typ == TOKEN_TILDE) {
+        lexer_next(lexer, &token);
+        expr = node_new(AST_BWINV);
+        expr.d.unary = parse_factor(lexer);
+
+    } else if (token.typ == TOKEN_EXCLAMATION) {
+        lexer_next(lexer, &token);
+        expr = node_new(AST_NOT);
+        expr.d.unary = parse_factor(lexer);
+
+    } else if (token.typ == TOKEN_OPEN_PAREN) {
+        lexer_next(lexer, &token);
+        expr = parse_expression(lexer);
+        lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN);
+
+    } else if (is_literal_token(token.typ)) {
+        expr = parse_literal(lexer);
+
+    } else {
+        die_loc2(&token.loc, ": Unexpected token found in parse_factor: ", token_type_to_string(token.typ));
+    }
+    return expr;
+}
+
+// This is absolutely terrible, but I'm not sure how to do it better without macros...
+fn parse_term(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_factor(lexer);
+    lexer_peek(lexer, &token);
+    while (token.typ == TOKEN_STAR || token.typ == TOKEN_SLASH || token.typ == TOKEN_PERCENT) {
+        lexer_next(lexer, &token);
+        let op = node_new(binary_token_to_op(token.typ));
+        let rhs = parse_factor(lexer);
+        op.d.binary.lhs = lhs;
+        op.d.binary.rhs = rhs;
+        lhs = op;
+        lexer_peek(lexer, &token);
+    }
+    return lhs;
+}
+
+fn parse_additive(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_term(lexer);
+    lexer_peek(lexer, &token);
+    while (token.typ == TOKEN_PLUS || token.typ == TOKEN_MINUS) {
+        lexer_next(lexer, &token);
+        let op = node_new(binary_token_to_op(token.typ));
+        let rhs = parse_term(lexer);
+        op.d.binary.lhs = lhs;
+        op.d.binary.rhs = rhs;
+        lhs = op;
+        lexer_peek(lexer, &token);
+    }
+    return lhs;
+}
+
+fn parse_relational(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_additive(lexer);
+    lexer_peek(lexer, &token);
+    while (token.typ == TOKEN_LT || token.typ == TOKEN_LEQ || 
+           token.typ == TOKEN_GT || token.typ == TOKEN_GEQ) {
+        lexer_next(lexer, &token);
+        let op = node_new(binary_token_to_op(token.typ));
+        let rhs = parse_additive(lexer);
+        op.d.binary.lhs = lhs;
+        op.d.binary.rhs = rhs;
+        lhs = op;
+        lexer_peek(lexer, &token);
+    }
+    return lhs;
+}
+
+fn parse_equality(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_relational(lexer);
+    lexer_peek(lexer, &token);
+    while (token.typ == TOKEN_EQ || token.typ == TOKEN_NEQ) {
+        lexer_next(lexer, &token);
+        let op = node_new(binary_token_to_op(token.typ));
+        let rhs = parse_relational(lexer);
+        op.d.binary.lhs = lhs;
+        op.d.binary.rhs = rhs;
+        lhs = op;
+        lexer_peek(lexer, &token);
+    }
+    return lhs;
+}
+
+fn parse_and(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_equality(lexer);
+    lexer_peek(lexer, &token);
+    while (token.typ == TOKEN_AMPERSAND) {
+        lexer_next(lexer, &token);
+        let op = node_new(binary_token_to_op(token.typ));
+        let rhs = parse_equality(lexer);
+        op.d.binary.lhs = lhs;
+        op.d.binary.rhs = rhs;
+        lhs = op;
+        lexer_peek(lexer, &token);
+    }
+    return lhs;
+}
+
+fn parse_exclusive_or(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_and(lexer);
+    lexer_peek(lexer, &token);
+    while (token.typ == TOKEN_CARET) {
+        lexer_next(lexer, &token);
+        let op = node_new(binary_token_to_op(token.typ));
+        let rhs = parse_and(lexer);
+        op.d.binary.lhs = lhs;
+        op.d.binary.rhs = rhs;
+        lhs = op;
+        lexer_peek(lexer, &token);
+    }
+    return lhs;
+}
+
+fn parse_inclusive_or(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_exclusive_or(lexer);
+    lexer_peek(lexer, &token);
+    while (token.typ == TOKEN_BAR) {
+        lexer_next(lexer, &token);
+        let op = node_new(binary_token_to_op(token.typ));
+        let rhs = parse_exclusive_or(lexer);
+        op.d.binary.lhs = lhs;
+        op.d.binary.rhs = rhs;
+        lhs = op;
+        lexer_peek(lexer, &token);
+    }
+    return lhs;
+}
+
+fn parse_logical_and(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_inclusive_or(lexer);
+    lexer_peek(lexer, &token);
+    while (token.typ == TOKEN_AND) {
+        lexer_next(lexer, &token);
+        let op = node_new(binary_token_to_op(token.typ));
+        let rhs = parse_inclusive_or(lexer);
+        op.d.binary.lhs = lhs;
+        op.d.binary.rhs = rhs;
+        lhs = op;
+        lexer_peek(lexer, &token);
+    }
+    return lhs;
+}
+
+fn parse_logical_or(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_logical_and(lexer);
+    lexer_peek(lexer, &token);
+    while (token.typ == TOKEN_OR) {
+        lexer_next(lexer, &token);
+        let op = node_new(binary_token_to_op(token.typ));
+        let rhs = parse_logical_and(lexer);
+        op.d.binary.lhs = lhs;
+        op.d.binary.rhs = rhs;
+        lhs = op;
+        lexer_peek(lexer, &token);
+    }
+    return lhs;
+}
+
+fn parse_conditional_exp(lexer: Lexer*): Node* {
+    let token: Token;
+
+    let lhs = parse_logical_or(lexer);
+    lexer_peek(lexer, &token);
+    if (token.typ == TOKEN_QUESTION) {
+        lexer_next(lexer, &token);
+        let then = parse_expression(lexer);
+        lexer_next_assert(lexer, &token, TOKEN_COLON);
+        let els = parse_expression(lexer);
+
+        let cond = node_new(AST_CONDITIONAL);
+        cond.d.conditional.cond = lhs;
+        cond.d.conditional.then = then;
+        cond.d.conditional.els = els;
+
+        lhs = cond;
+    }
+    return lhs;
+}
+
+fn parse_expression(lexer: Lexer*): Node* {
+    return parse_conditional_exp(lexer);
+}
+
+fn parse_var_declaration(lexer: Lexer*): Node* {
+    let token: Token;
+    lexer_next_assert(lexer, &token, TOKEN_LET);
+    lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
+    // TODO: check if identifier is already defined
+    let node = node_new(AST_VARDECL);
+    node.d.var_decl.var.name = token.value.as_string;
+
+    lexer_peek(lexer, &token);
+    let has_type = false;
+    if (token.typ == TOKEN_COLON) {
+        lexer_next(lexer, &token);
+        has_type = true;
+        node.d.var_decl.var.typ = parse_type(lexer);
+        lexer_peek(lexer, &token);
+    }
+
+    if (token.typ == TOKEN_ASSIGN) {
+        lexer_next(lexer, &token);
+        node.d.var_decl.init = parse_expression(lexer);
+    } else if (!has_type) {
+        die_loc(&token.loc, "Expected ':' or '=' after variable declaration");
+    }
+
+    return node;
+}
+
+fn parse_function_params(lexer: Lexer*, func: Node*) {
+    let token: Token;
+    lexer_peek(lexer, &token);
+    // TODO: Actually parse params
+    while (token.typ != TOKEN_CLOSE_PAREN) {
+        lexer_next(lexer, &token);
+    }
+}
+
+fn parse_block(lexer: Lexer*): Node*;
+
+
+fn parse_statement(lexer: Lexer*): Node* {
+    let node: Node*;
+    let token: Token;
+
+    lexer_peek(lexer, &token);
+    if (token.typ == TOKEN_OPEN_BRACE) {
+        node = parse_block(lexer);
+
+    } else if (token.typ == TOKEN_RETURN) {
+        lexer_next(lexer, &token);
+        node = node_new(AST_RETURN);
+
+        lexer_peek(lexer, &token);
+        if (token.typ != TOKEN_SEMICOLON) {
+            node.d.unary = parse_expression(lexer);
+        } else {
+            node.d.unary = null; // empty return statment
+        }
+        lexer_next_assert(lexer, &token, TOKEN_SEMICOLON);
+
+    } else if (token.typ == TOKEN_IF) {
+        lexer_next(lexer, &token);
+
+        node = node_new(AST_IF);
+
+        lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN);
+        node.d.conditional.cond = parse_expression(lexer);
+        lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN);
+        node.d.conditional.then = parse_statement(lexer);
+        
+        lexer_peek(lexer, &token);
+        if (token.typ == TOKEN_ELSE) {
+            lexer_next(lexer, &token);
+            node.d.conditional.els = parse_statement(lexer);
+        }
+    } else if (token.typ == TOKEN_WHILE) {
+        die("while is not implemented yet");
+    } else if (token.typ == TOKEN_FOR) {
+        die("for is not implemented yet");
+    } else if (token.typ == TOKEN_DEFER) {
+        die("defer is not implemented yet");
+    } else if (token.typ == TOKEN_LET) {
+        node = parse_var_declaration(lexer);
+        lexer_next_assert(lexer, &token, TOKEN_SEMICOLON);
+    } else {
+        // Default to expression statement
+        node = parse_expression(lexer);
+        lexer_next_assert(lexer, &token, TOKEN_SEMICOLON);
+    }
+    return node;
+}
+
+fn parse_block(lexer: Lexer*): Node* {
+    let token: Token;
+    lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE);
+
+    let block = node_new(AST_BLOCK);
+    block.d.block.children = vector_new();
+
+    lexer_peek(lexer, &token);
+    while (token.typ != TOKEN_CLOSE_BRACE) {
+        block_add_child(block, parse_statement(lexer));
+        lexer_peek(lexer, &token);
+    }
+    lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE);
+    return block;
+}
+
+fn parse_function(lexer: Lexer*): Node* {
+    let token: Token;
+
+    lexer_next_assert(lexer, &token, TOKEN_FN);
+    lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
+    // TODO: Check if identifier exists
+    let node = node_new(AST_FUNC);
+    node.d.func.name = token.value.as_string;
+
+    lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN);
+    parse_function_params(lexer, node);
+    lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN);
+
+    lexer_peek(lexer, &token);
+    if (token.typ == TOKEN_COLON) {
+        lexer_next(lexer, &token);
+        node.etyp = parse_type(lexer);
+    } else {
+        node.etyp = type_new(TYPE_VOID);
+    }
+
+    node.d.func.body = parse_block(lexer);
+    return node;
+}
+
+fn parse_program(lexer: Lexer*): Node* {
+    let node = node_new(AST_PROGRAM);
+    node.d.block.children = vector_new();
+    
+    let token: Token;
+    lexer_peek(lexer, &token);
+
+    while (token.typ != TOKEN_EOF) {
+        if (token.typ == TOKEN_FN) {
+            block_add_child(node, parse_function(lexer));
+        } else if (token.typ == TOKEN_LET) {
+            block_add_child(node, parse_var_declaration(lexer));
+        } else if (token.typ == TOKEN_SEMICOLON) {
+            lexer_next(lexer, &token);
+        } else {
+            die_loc2(&token.loc, "unexpected token in parse_program", token_type_to_string(token.typ));
+        }
+
+        lexer_peek(lexer, &token);
+    }
+    return node;
+}
+\ No newline at end of file
diff --git a/compiler/tokens.cup b/compiler/tokens.cup
new file mode 100644
index 0000000..e991610
--- /dev/null
+++ b/compiler/tokens.cup
@@ -0,0 +1,238 @@
+import "std/common.cup"
+
+enum TokenType {
+    TOKEN_AMPERSAND,
+    TOKEN_AND,
+    TOKEN_ASSIGN,
+    TOKEN_BAR,
+    TOKEN_CARET,
+    TOKEN_CHARLIT,
+    TOKEN_CLOSE_BRACE,
+    TOKEN_CLOSE_BRACKET,
+    TOKEN_CLOSE_PAREN,
+    TOKEN_COLON,
+    TOKEN_COMMA,
+    TOKEN_DOT,
+    TOKEN_EOF,
+    TOKEN_EQ,
+    TOKEN_EXCLAMATION,
+    TOKEN_GEQ,
+    TOKEN_GT,
+    TOKEN_IDENTIFIER,
+    TOKEN_INTLIT,
+    TOKEN_LEQ,
+    TOKEN_LSHIFT,
+    TOKEN_LT,
+    TOKEN_MINUS,
+    TOKEN_MINUSEQUALS,
+    TOKEN_MINUSMINUS,
+    TOKEN_NEQ,
+    TOKEN_OPEN_BRACE,
+    TOKEN_OPEN_BRACKET,
+    TOKEN_OPEN_PAREN,
+    TOKEN_OR,
+    TOKEN_PERCENT,
+    TOKEN_PLUS,
+    TOKEN_PLUSEQUALS,
+    TOKEN_PLUSPLUS,
+    TOKEN_QUESTION,
+    TOKEN_RSHIFT,
+    TOKEN_SEMICOLON,
+    TOKEN_SLASH,
+    TOKEN_STAR,
+    TOKEN_STRINGLIT,
+    TOKEN_TILDE,
+    TOKEN_XOR,
+
+    // Keywords go below:
+    TOKEN__KEYWORD_BEGIN,
+    TOKEN_CHAR,
+    TOKEN_CONST,
+    TOKEN_ENUM,
+    TOKEN_ELSE,
+    TOKEN_DEFER,
+    TOKEN_FN,
+    TOKEN_FOR,
+    TOKEN_IF,
+    TOKEN_INT,
+    TOKEN_LET,
+    TOKEN_RETURN,
+    TOKEN_STRUCT,
+    TOKEN_UNION,
+    TOKEN_VOID,
+    TOKEN_WHILE,
+    TOKEN_IMPORT,
+    TOKEN__KEYWORD_END,
+};
+
+struct Location {
+    filename: char*;
+    line: int;
+    col: int;
+};
+
+struct Token {
+    typ: int;
+    loc: Location;
+    value: union {
+        as_int: int;
+        as_string: char*;
+        as_char: char;
+    };
+};
+
+fn token_type_to_string(typ: int): char* {
+    if (typ == TOKEN_AMPERSAND) return "TOKEN_AMPERSAND";
+    if (typ == TOKEN_AND) return "TOKEN_AND";
+    if (typ == TOKEN_ASSIGN) return "TOKEN_ASSIGN";
+    if (typ == TOKEN_BAR) return "TOKEN_BAR";
+    if (typ == TOKEN_CARET) return "TOKEN_CARET";
+    if (typ == TOKEN_CHARLIT) return "TOKEN_CHARLIT";
+    if (typ == TOKEN_CLOSE_BRACE) return "TOKEN_CLOSE_BRACE";
+    if (typ == TOKEN_CLOSE_BRACKET) return "TOKEN_CLOSE_BRACKET";
+    if (typ == TOKEN_CLOSE_PAREN) return "TOKEN_CLOSE_PAREN";
+    if (typ == TOKEN_COLON) return "TOKEN_COLON";
+    if (typ == TOKEN_COMMA) return "TOKEN_COMMA";
+    if (typ == TOKEN_DOT) return "TOKEN_DOT";
+    if (typ == TOKEN_EOF) return "TOKEN_EOF";
+    if (typ == TOKEN_EQ) return "TOKEN_EQ";
+    if (typ == TOKEN_EXCLAMATION) return "TOKEN_EXCLAMATION";
+    if (typ == TOKEN_GEQ) return "TOKEN_GEQ";
+    if (typ == TOKEN_GT) return "TOKEN_GT";
+    if (typ == TOKEN_IDENTIFIER) return "TOKEN_IDENTIFIER";
+    if (typ == TOKEN_INTLIT) return "TOKEN_INTLIT";
+    if (typ == TOKEN_LEQ) return "TOKEN_LEQ";
+    if (typ == TOKEN_LSHIFT) return "TOKEN_LSHIFT";
+    if (typ == TOKEN_LT) return "TOKEN_LT";
+    if (typ == TOKEN_MINUS) return "TOKEN_MINUS";
+    if (typ == TOKEN_MINUSEQUALS) return "TOKEN_MINUSEQUALS";
+    if (typ == TOKEN_MINUSMINUS) return "TOKEN_MINUSMINUS";
+    if (typ == TOKEN_NEQ) return "TOKEN_NEQ";
+    if (typ == TOKEN_OPEN_BRACE) return "TOKEN_OPEN_BRACE";
+    if (typ == TOKEN_OPEN_BRACKET) return "TOKEN_OPEN_BRACKET";
+    if (typ == TOKEN_OPEN_PAREN) return "TOKEN_OPEN_PAREN";
+    if (typ == TOKEN_OR) return "TOKEN_OR";
+    if (typ == TOKEN_PERCENT) return "TOKEN_PERCENT";
+    if (typ == TOKEN_PLUS) return "TOKEN_PLUS";
+    if (typ == TOKEN_PLUSEQUALS) return "TOKEN_PLUSEQUALS";
+    if (typ == TOKEN_PLUSPLUS) return "TOKEN_PLUSPLUS";
+    if (typ == TOKEN_QUESTION) return "TOKEN_QUESTION";
+    if (typ == TOKEN_RSHIFT) return "TOKEN_RSHIFT";
+    if (typ == TOKEN_SEMICOLON) return "TOKEN_SEMICOLON";
+    if (typ == TOKEN_SLASH) return "TOKEN_SLASH";
+    if (typ == TOKEN_STAR) return "TOKEN_STAR";
+    if (typ == TOKEN_STRINGLIT) return "TOKEN_STRINGLIT";
+    if (typ == TOKEN_TILDE) return "TOKEN_TILDE";
+    if (typ == TOKEN_XOR) return "TOKEN_XOR";
+    if (typ == TOKEN_CHAR) return "TOKEN_CHAR";
+    if (typ == TOKEN_CONST) return "TOKEN_CONST";
+    if (typ == TOKEN_ENUM) return "TOKEN_ENUM";
+    if (typ == TOKEN_ELSE) return "TOKEN_ELSE";
+    if (typ == TOKEN_DEFER) return "TOKEN_DEFER";
+    if (typ == TOKEN_FN) return "TOKEN_FN";
+    if (typ == TOKEN_FOR) return "TOKEN_FOR";
+    if (typ == TOKEN_IF) return "TOKEN_IF";
+    if (typ == TOKEN_INT) return "TOKEN_INT";
+    if (typ == TOKEN_LET) return "TOKEN_LET";
+    if (typ == TOKEN_RETURN) return "TOKEN_RETURN";
+    if (typ == TOKEN_STRUCT) return "TOKEN_STRUCT";
+    if (typ == TOKEN_UNION) return "TOKEN_UNION";
+    if (typ == TOKEN_VOID) return "TOKEN_VOID";
+    if (typ == TOKEN_WHILE) return "TOKEN_WHILE";
+    if (typ == TOKEN_IMPORT) return "TOKEN_IMPORT";
+
+    putsln("\nUnknown token type in token_type_to_string: "); print(typ);
+    exit(1);
+}
+
+fn keyword_to_string(typ: int): char* {
+    if (typ == TOKEN_CHAR) return "char";
+    if (typ == TOKEN_CONST) return "const";
+    if (typ == TOKEN_ENUM) return "enum";
+    if (typ == TOKEN_ELSE) return "else";
+    if (typ == TOKEN_DEFER) return "defer";
+    if (typ == TOKEN_FN) return "fn";
+    if (typ == TOKEN_FOR) return "for";
+    if (typ == TOKEN_IF) return "if";
+    if (typ == TOKEN_INT) return "int";
+    if (typ == TOKEN_LET) return "let";
+    if (typ == TOKEN_RETURN) return "return";
+    if (typ == TOKEN_STRUCT) return "struct";
+    if (typ == TOKEN_UNION) return "union";
+    if (typ == TOKEN_VOID) return "void";
+    if (typ == TOKEN_WHILE) return "while";
+    if (typ == TOKEN_IMPORT) return "import";
+
+    puts("Unknown keyword in keyword_to_string: ");
+    putsln(token_type_to_string(typ));
+    exit(1);
+}
+
+fn location_init(loc: Location*, filename: char*, line: int, col: int) {
+    loc.filename = filename;
+    loc.line = line;
+    loc.col = col;
+}
+
+fn location_print(loc: Location *) {
+    puts(loc.filename);
+    putc(':');
+    putu(loc.line + 1);
+    putc(':');
+    putu(loc.col + 1);
+}
+
+fn die_loc2(loc: Location*, msg1: char *, msg2: char *) {
+    location_print(loc);
+    puts(": ");
+    puts(msg1);
+    putsln(msg2);
+    exit(1);
+}
+
+fn die_loc(loc: Location*, msg: char *) {
+    die_loc2(loc, msg, "");
+}
+
+fn token_from_type(token: Token*, typ: int, loc: Location *) {
+    token.typ = typ;
+}
+
+fn token_from_int(token: Token*, val: int, loc: Location *) {
+    token.typ = TOKEN_INTLIT;
+    token.value.as_int = val;
+    token.loc.filename = loc.filename;
+    token.loc.line = loc.line;
+    token.loc.col = loc.col;
+}
+
+fn token_from_string(token: Token*, str: char *, loc: Location *) {
+    token.typ = TOKEN_STRINGLIT;
+    token.value.as_string = str;
+    token.loc.filename = loc.filename;
+    token.loc.line = loc.line;
+    token.loc.col = loc.col;
+}
+
+fn token_from_char(token: Token*, c: char, loc: Location *) {
+    token.typ = TOKEN_CHARLIT;
+    token.value.as_char = c;
+    token.loc.filename = loc.filename;
+    token.loc.line = loc.line;
+    token.loc.col = loc.col;
+}
+
+fn token_from_identifier(token: Token*, str: char *, loc: Location *) {
+    token.typ = TOKEN_IDENTIFIER;
+    token.value.as_string = str;
+    token.loc.filename = loc.filename;
+    token.loc.line = loc.line;
+    token.loc.col = loc.col;
+}
+
+fn is_literal_token(typ: int): int {
+    if (typ == TOKEN_INTLIT) return true;
+    if (typ == TOKEN_CHARLIT) return true;
+    if (typ == TOKEN_STRINGLIT) return true;
+    return false;
+}
+\ No newline at end of file
diff --git a/compiler/types.cup b/compiler/types.cup
new file mode 100644
index 0000000..f3c7b38
--- /dev/null
+++ b/compiler/types.cup
@@ -0,0 +1,82 @@
+import "std/common.cup"
+
+enum BaseType {
+    TYPE_VOID,
+    TYPE_ANY,
+
+    TYPE_PTR,
+    TYPE_ARRAY,
+    TYPE_STRUCT,
+    TYPE_UNION,
+
+    TYPE_INT,
+    TYPE_CHAR,
+};
+
+struct Type {
+    typ: int;
+    ptr: Type*;
+    struct_name: char*;
+    size: int;
+    array_size: int;
+    fields: struct {
+        names: char**;
+        types: Type**;
+        num_fields: int;
+    };
+};
+
+fn size_for_base_type(type: int): int {
+    if (type == TYPE_INT) return 8;
+    if (type == TYPE_PTR) return 8;
+    if (type == TYPE_CHAR) return 1;
+    // Need to be initialized explicitly for compound types 
+    return 0; 
+}
+
+let _type_int: Type* = null;
+let _type_char: Type* = null;
+let _type_void: Type* = null;
+let _type_any: Type* = null;
+
+fn type_new(typ: int): Type* {
+    if (_type_int == null) { _type_int = malloc(sizeof(Type)); _type_int.typ = TYPE_INT; _type_int.size = 8; }
+    if (_type_char == null) { _type_char = malloc(sizeof(Type)); _type_char.typ = TYPE_CHAR; _type_char.size = 1; }
+    if (_type_void == null) { _type_void = malloc(sizeof(Type)); _type_void.typ = TYPE_VOID; _type_void.size = 0; }
+    if (_type_any == null) { _type_any = malloc(sizeof(Type)); _type_any.typ = TYPE_ANY; _type_any.size = 8; }
+    
+    if (typ == TYPE_INT) return _type_int;
+    if (typ == TYPE_CHAR) return _type_char;
+    if (typ == TYPE_VOID) return _type_void;
+    if (typ == TYPE_ANY) return _type_any;
+
+    putsln("Allocating Type*");
+
+    let t: Type* = malloc(sizeof(Type));
+    t.typ = typ;
+    t.size = size_for_base_type(typ);
+    return t;
+}
+
+fn type_new_ptr(typ: int): Type* {
+    let ptr = type_new(TYPE_PTR);
+    ptr.ptr = type_new(typ);
+    return ptr;
+}
+
+// This is named differently because it performs an allocation
+fn create_type_string(typ: Type *): char* {
+    let buf: char* = malloc(32);
+    while (typ.typ == TYPE_PTR || typ.typ == TYPE_ARRAY) {
+        strcat(buf, typ.typ == TYPE_PTR ? "*" : "[]");
+        typ = typ.ptr;
+    }
+
+         if (typ.typ == TYPE_INT) strcat(buf, "int");
+    else if (typ.typ == TYPE_CHAR) strcat(buf, "char");
+    else if (typ.typ == TYPE_VOID) strcat(buf, "void");
+    else if (typ.typ == TYPE_ANY) strcat(buf, "any");
+    else  die("type_to_string: unknown type");
+
+    return buf;
+}
+\ No newline at end of file
diff --git a/run.sh2 b/run.sh2
new file mode 100755
index 0000000..0e6ee64
--- /dev/null
+++ b/run.sh2
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# This script does the following:
+#   1. Builds the project
+#   2. Compiles selected file
+#   3. Assembles executable from compiled asm
+#   4. Runs the executable
+#   5. Echoes the output of the executable
+
+if [ -z "$1" ]
+then
+    echo "Usage: $0 <arguments to cupcc>"
+    exit 1
+fi
+
+set -xe
+
+make
+build/cupcc compiler/main.cup -o build/cup.nasm
+make build/cup.out
+build/cup.out "$@"
+make build/host.out
+
+set +e
+
+build/host.out
+
+echo "Exit status: $?"
+\ No newline at end of file
author	Mustafa Quraish <[email protected]>	2022-02-05 08:23:14 -0500
committer	Mustafa Quraish <[email protected]>	2022-02-05 08:56:15 -0500
commit	aeaf92127d1c090f9281616e49ad10dda414bd45 (patch)
tree	f85127c08b0caa13b95b3fb80e2996d3b5186434
parent	Remove old test which disallowed initializing globals (diff)
download	cup-aeaf92127d1c090f9281616e49ad10dda414bd45.tar.xz cup-aeaf92127d1c090f9281616e49ad10dda414bd45.zip