From 5b87ec6ef2b84f1d319e5376bcf9eedea9829d79 Mon Sep 17 00:00:00 2001
From: Mustafa Quraish <mustafaq9@gmail.com>
Date: Sun, 30 Jan 2022 01:19:54 -0500
Subject: Rename `cup` directory to `src`

---
 src/ast.c       | 161 ++++++++++++++++
 src/ast.h       | 150 +++++++++++++++
 src/common.h    |   6 +
 src/generator.c | 355 +++++++++++++++++++++++++++++++++++
 src/generator.h |   6 +
 src/lexer.c     | 238 ++++++++++++++++++++++++
 src/lexer.h     |  19 ++
 src/main.c      |  82 +++++++++
 src/parser.c    | 562 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/parser.h    |   7 +
 src/tokens.c    |  70 +++++++
 src/tokens.h    |  83 +++++++++
 src/utils.c     |  30 +++
 src/utils.h     |  12 ++
 14 files changed, 1781 insertions(+)
 create mode 100644 src/ast.c
 create mode 100644 src/ast.h
 create mode 100644 src/common.h
 create mode 100644 src/generator.c
 create mode 100644 src/generator.h
 create mode 100644 src/lexer.c
 create mode 100644 src/lexer.h
 create mode 100644 src/main.c
 create mode 100644 src/parser.c
 create mode 100644 src/parser.h
 create mode 100644 src/tokens.c
 create mode 100644 src/tokens.h
 create mode 100644 src/utils.c
 create mode 100644 src/utils.h

(limited to 'src')

diff --git a/src/ast.c b/src/ast.c
new file mode 100644
index 0000000..6e33de1
--- /dev/null
+++ b/src/ast.c
@@ -0,0 +1,161 @@
+#include "ast.h"
+#include <assert.h>
+#include <stdio.h>
+
+char *data_type_to_str(DataType type)
+{
+    switch (type)
+    {
+    case TYPE_NONE: return "void";
+    case TYPE_INT: return "int";
+    default: assert(false && "Unreachable");
+    }
+}
+
+void print_type_to_file(FILE *out, Type type)
+{
+    fprintf(out, "%s", data_type_to_str(type.type));
+    for (int i = 0; i < type.indirection; i++) {
+        fprintf(out, "*");
+    }
+}
+
+char *node_type_to_str(NodeType type)
+{
+    switch (type)
+    {
+    #define ENUM_TOKEN(t, name) case t: return name;
+        ENUM_AST_TYPES(ENUM_TOKEN)
+    #undef ENUM_TOKEN
+    default: assert(false && "Unreachable");
+    }
+}
+
+bool is_binary_op(NodeType type)
+{
+    switch (type)
+    {
+    case OP_PLUS:
+    case OP_MINUS:
+    case OP_MUL:
+    case OP_DIV:
+    case OP_MOD:
+    case OP_LSHIFT:
+    case OP_RSHIFT:
+    case OP_AND:
+    case OP_OR:
+    case OP_XOR:
+    case OP_EQ:
+    case OP_NEQ:
+    case OP_LT:
+    case OP_LEQ:
+    case OP_GT:
+    case OP_GEQ: 
+        return true;
+    default: return false;
+    }
+}
+
+bool is_unary_op(NodeType type)
+{
+    switch (type)
+    {
+    case OP_NEG:
+    case OP_NOT:
+    case OP_BWINV:
+        return true;
+    default: return false;
+    }
+}
+
+bool is_expression(NodeType type)
+{
+    if (is_unary_op(type) || is_binary_op(type))
+        return true;
+    return type == AST_LITERAL;
+}
+
+void dump_func(Node *, int);
+
+static void do_print_ast(Node *node, int depth)
+{
+    for (int i = 0; i < depth; i++) {
+        printf("  ");
+    }
+    if (node->type == AST_PROGRAM) {
+        for (int i = 0; i < node->block.num_children; i++) {
+            do_print_ast(node->block.children[i], depth);
+        }
+    } else if (node->type == AST_BLOCK) {
+        printf("{\n");
+        for (int i = 0; i < node->block.num_children; i++) {
+            do_print_ast(node->block.children[i], depth + 1);
+        }
+        for (int i = 0; i < depth; i++) {
+            printf("  ");
+        }
+        printf("}\n");
+    } else if (node->type == AST_FUNC) {
+        dump_func(node, depth);
+    } else if (node->type == AST_LITERAL) {
+        printf("%d\n", node->literal.as_int);
+    } else if (node->type == AST_RETURN) {
+        printf("return\n");
+        do_print_ast(node->unary_expr, depth + 1);
+    } else if (is_unary_op(node->type)) {
+        printf("%s\n", node_type_to_str(node->type));
+        do_print_ast(node->unary_expr, depth + 1);
+    } else if (is_binary_op(node->type)) {
+        printf("%s\n", node_type_to_str(node->type));
+        do_print_ast(node->binary.left, depth + 1);
+        do_print_ast(node->binary.right, depth + 1);
+    } else if (node->type == AST_VAR) {
+        assert(node->variable && node->variable->name);
+        printf("%s\n", node->variable->name);
+    } else if (node->type == AST_VARDECL) {
+        printf("var %s (", node->var_decl.var.name);
+        print_type_to_file(stdout, node->var_decl.var.type);
+        printf(")");
+        if (node->var_decl.value != NULL) {
+            printf(" = \n");
+            do_print_ast(node->var_decl.value, depth + 1);
+        } else {
+            printf("\n");
+        }
+    } else if (node->type == AST_FUNCCALL) {
+        printf("CALL %s(\n", node->call.func->func.name);
+        for (int i = 0; i < node->call.num_args; i++) {
+            do_print_ast(node->call.args[i], depth + 1);
+        }
+        for (int i = 0; i < depth; i++) {
+            printf("  ");
+        }
+        printf(")\n");
+    } else {
+        printf("{{ %s }}\n", node_type_to_str(node->type));
+    }
+}
+
+void dump_func(Node *node, int depth)
+{
+    printf("fn %s(", node->func.name);
+    for (int i = 0; i < node->func.num_args; i++) {
+        if (i > 0) printf(", ");
+        printf("%s: ", node->func.args[i].name);
+        print_type_to_file(stdout, node->func.args[i].type);
+        printf("[[%lld]]", node->func.args[i].offset);
+    }
+    printf(")");
+    if (node->func.return_type.type != TYPE_NONE) {
+        // FIXME: Print return type properly
+        printf(" -> ");
+        print_type_to_file(stdout, node->func.return_type);
+    }
+
+    do_print_ast(node->func.body, depth + 1);
+}
+
+void print_ast(Node *node)
+{
+    do_print_ast(node, 0);
+}
\ No newline at end of file
diff --git a/src/ast.h b/src/ast.h
new file mode 100644
index 0000000..22a7283
--- /dev/null
+++ b/src/ast.h
@@ -0,0 +1,150 @@
+#pragma once
+
+#include "common.h"
+
+#define ENUM_AST_TYPES(F)                                                      \
+  F(OP_NEG, "neg")                                                             \
+  F(OP_NOT, "!")                                                               \
+  F(OP_BWINV, "~")                                                             \
+  F(OP_PLUS, "+")                                                              \
+  F(OP_MINUS, "-")                                                             \
+  F(OP_MUL, "*")                                                               \
+  F(OP_DIV, "/")                                                               \
+  F(OP_MOD, "%")                                                               \
+  F(OP_LSHIFT, "<<")                                                           \
+  F(OP_RSHIFT, ">>")                                                           \
+  F(OP_AND, "&&")                                                              \
+  F(OP_OR, "||")                                                               \
+  F(OP_XOR, "^")                                                               \
+  F(OP_EQ, "==")                                                               \
+  F(OP_NEQ, "!=")                                                              \
+  F(OP_LT, "<")                                                                \
+  F(OP_LEQ, "<=")                                                              \
+  F(OP_GT, ">")                                                                \
+  F(OP_GEQ, ">=")                                                              \
+  F(OP_ASSIGN, "=")                                                            \
+  F(AST_LITERAL, "literal")                                                    \
+  F(AST_FUNCCALL, "Function call")                                             \
+  F(AST_CONDITIONAL, "conditional expression")                                 \
+  F(AST_IF, "if statement")                                                    \
+  F(AST_WHILE, "while statement")                                              \
+  F(AST_FOR, "for statement")                                                  \
+  F(AST_VARDECL, "variable decl")                                              \
+  F(AST_VAR, "variable")                                                       \
+  F(AST_RETURN, "return")                                                      \
+  F(AST_FUNC, "func")                                                          \
+  F(AST_PROGRAM, "program")                                                    \
+  F(AST_BLOCK, "block statements")
+
+typedef enum {
+#define DEFINE_ENUM(name, str) name,
+  ENUM_AST_TYPES(DEFINE_ENUM)
+#undef DEFINE_ENUM
+} NodeType;
+
+char *node_type_to_str(NodeType type);
+
+bool is_binary_op(NodeType type);
+bool is_unary_op(NodeType type);
+bool is_expression(NodeType type);
+
+typedef enum {
+    TYPE_NONE,
+    TYPE_INT,
+} DataType;
+
+char *data_type_to_str(DataType type);
+
+typedef struct {
+    DataType type;
+    // 0 = value, 1 = pointer, 2 = double pointer, ...
+    int indirection;
+} Type;
+
+typedef struct {
+    char *name;
+    Type type;
+    i64 offset;
+} Variable;
+
+typedef struct ast_node Node;
+typedef struct ast_node {
+    NodeType type;
+
+    union {
+        // Binary expr
+        struct {
+            Node *left;
+            Node *right;
+        } binary;
+
+        // Unary expr
+        Node *unary_expr;
+
+        // Function definition
+        struct {
+            char *name;
+            Type return_type;
+            Node *body;
+
+            // TODO: Should we just dynamically allocate space on the
+            //       stack for each block instead of storing this?
+            i64 max_locals_size;
+
+            // TODO: Arguments / etc?
+            Variable *args;
+            int num_args;
+        } func;
+
+        // Block of statements
+        struct {
+            Node **children;
+            int num_children;
+
+            Variable **locals;
+            int num_locals;
+            i64 locals_size;
+        } block;
+
+        struct {
+            Type type;
+            union {
+                int as_int;
+            };
+        } literal;
+
+        struct {
+            Variable var;
+            Node *value;
+        } var_decl;
+
+        struct {
+            Variable *var;
+            Node *value;
+        } assign;
+
+        struct {
+            Node *cond;
+            Node *do_then;
+            Node *do_else;
+        } conditional;
+
+        // Used for all loops
+        struct {
+            Node *cond;
+            Node *init;
+            Node *step;
+            Node *body;
+        } loop;
+
+        Variable *variable;
+
+        struct {
+            Node *func;
+            Node **args;
+            int num_args;
+        } call;
+    };
+} Node;
+
+void print_ast(Node *node);
\ No newline at end of file
diff --git a/src/common.h b/src/common.h
new file mode 100644
index 0000000..20d057f
--- /dev/null
+++ b/src/common.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+typedef long long int i64;
diff --git a/src/generator.c b/src/generator.c
new file mode 100644
index 0000000..6a95d0c
--- /dev/null
+++ b/src/generator.c
@@ -0,0 +1,355 @@
+/**
+ * This file generates x86-64 assembly code in NASM format from the parsed AST.
+ */
+
+#include "generator.h"
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <sys/syscall.h>
+
+static int label_counter = 0;
+static Node *current_function = NULL;
+
+void generate_expr_into_rax(Node *expr, FILE *out);
+
+void generate_func_call(Node *node, FILE *out)
+{
+    assert(node->type == AST_FUNCCALL);
+    // FIXME: This seems like a big hack
+    i64 total_size = 0;
+    for (int i = node->call.num_args - 1; i >= 0; i--) {
+        Node *arg = node->call.args[i];
+        generate_expr_into_rax(arg, out);
+        fprintf(out, "    push rax\n");
+        // TODO: Compute this for different types
+        // TODO: Also make sure of padding and stuff?
+        total_size += 8;
+    }
+    fprintf(out, "    call %s\n", node->call.func->func.name);
+    fprintf(out, "    add rsp, %lld\n", total_size);
+}
+
+// The evaluated expression is stored into `rax`
+void generate_expr_into_rax(Node *expr, FILE *out)
+{
+    // TODO: Different sized output for different types?
+    if (expr->type == AST_LITERAL) {
+        // TODO: More literal types
+        assert(expr->literal.type.type == TYPE_INT);
+        fprintf(out, "    mov rax, %d\n", expr->literal.as_int);
+
+    } else if (expr->type == AST_FUNCCALL) {
+        generate_func_call(expr, out);
+
+    } else if (expr->type == AST_VAR) {
+        i64 offset = expr->variable->offset;
+        if (offset > 0)
+            fprintf(out, "    mov rax, [rbp-%lld]\n", offset);
+        else
+            fprintf(out, "    mov rax, [rbp+%lld]\n", -offset);
+
+    } else if (expr->type == OP_ASSIGN) {
+        i64 offset = expr->assign.var->offset;
+        generate_expr_into_rax(expr->assign.value, out);
+        fprintf(out, "    mov [rbp-%lld], rax\n", offset);
+
+    } else if (expr->type == OP_NEG) {
+        generate_expr_into_rax(expr->unary_expr, out);
+        fprintf(out, "    neg rax\n");
+
+    } else if (expr->type == OP_NOT) {
+        generate_expr_into_rax(expr->unary_expr, out);
+        // Booleanize
+        fprintf(out, "    cmp rax, 0\n");
+        fprintf(out, "    sete al\n");
+        fprintf(out, "    movzx rax, al\n");
+
+    } else if (expr->type == OP_BWINV) {
+        generate_expr_into_rax(expr->unary_expr, out);
+        fprintf(out, "    not rax\n");
+
+    } else if (expr->type == OP_PLUS) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    add rax, rbx\n");
+
+    } else if (expr->type == OP_MINUS) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    sub rax, rbx\n");
+
+    } else if (expr->type == OP_DIV) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    cqo\n");
+        fprintf(out, "    idiv rbx\n");
+
+    } else if (expr->type == OP_MUL) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    imul rbx\n");
+
+    // TODO: Compress these, there's barely any differences
+    } else if (expr->type == OP_EQ) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    cmp rax, rbx\n");
+        fprintf(out, "    sete al\n");
+        fprintf(out, "    movzx rax, al\n");
+
+    } else if (expr->type == OP_NEQ) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    cmp rax, rbx\n");
+        fprintf(out, "    setne al\n");
+        fprintf(out, "    movzx rax, al\n");
+
+    } else if (expr->type == OP_LT) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    cmp rax, rbx\n");
+        fprintf(out, "    setl al\n");
+        fprintf(out, "    movzx rax, al\n");
+
+    }  else if (expr->type == OP_LEQ) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    cmp rax, rbx\n");
+        fprintf(out, "    setle al\n");
+        fprintf(out, "    movzx rax, al\n");
+
+    }  else if (expr->type == OP_GT) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    cmp rax, rbx\n");
+        fprintf(out, "    setg al\n");
+        fprintf(out, "    movzx rax, al\n");
+
+    }  else if (expr->type == OP_GEQ) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    cmp rax, rbx\n");
+        fprintf(out, "    setge al\n");
+        fprintf(out, "    movzx rax, al\n");
+
+    } else if (expr->type == OP_GEQ) {
+        generate_expr_into_rax(expr->binary.right, out);
+        fprintf(out, "    push rax\n");
+        generate_expr_into_rax(expr->binary.left, out);
+        fprintf(out, "    pop rbx\n");
+        fprintf(out, "    cmp rax, rbx\n");
+        fprintf(out, "    setge al\n");
+        fprintf(out, "    movzx rax, al\n");
+
+    // Note: These are different because of short-circuit evaluation!
+    } else if (expr->type == OP_OR) {
+        generate_expr_into_rax(expr->binary.left, out);
+        // If left is true, we can short-circuit
+        fprintf(out, "    cmp rax, 0\n");
+        fprintf(out, "    je .or_right_%d\n", label_counter);
+        fprintf(out, "    mov rax, 1\n");
+        fprintf(out, "    jmp .or_end_%d\n", label_counter);
+        fprintf(out, ".or_right_%d:\n", label_counter);
+        generate_expr_into_rax(expr->binary.right, out);
+        // Booleanize the result
+        fprintf(out, "    cmp rax, 0\n");
+        fprintf(out, "    setne al\n");
+        fprintf(out, ".or_end_%d:\n", label_counter);
+        label_counter++;
+
+    } else if (expr->type == OP_AND) {
+        generate_expr_into_rax(expr->binary.left, out);
+        // If left is false, we can short-circuit
+        fprintf(out, "    cmp rax, 0\n");
+        fprintf(out, "    jne .and_right_%d\n", label_counter);
+        fprintf(out, "    mov rax, 0\n");
+        fprintf(out, "    jmp .and_end_%d\n", label_counter);
+        fprintf(out, ".and_right_%d:\n", label_counter);
+        generate_expr_into_rax(expr->binary.right, out);
+        // Booleanize the result
+        fprintf(out, "    cmp rax, 0\n");
+        fprintf(out, "    setne al\n");
+        fprintf(out, ".and_end_%d:\n", label_counter);
+        label_counter++;
+
+    }  else if (expr->type == AST_CONDITIONAL) {
+        generate_expr_into_rax(expr->conditional.cond, out);
+        // If left is false, we can short-circuit
+        fprintf(out, "    cmp rax, 0\n");
+        fprintf(out, "    je .cond_else_%d\n", label_counter);
+        generate_expr_into_rax(expr->conditional.do_then, out);
+        fprintf(out, "    jmp .cond_end_%d\n", label_counter);
+        fprintf(out, ".cond_else_%d:\n", label_counter);
+        generate_expr_into_rax(expr->binary.right, out);
+        // Booleanize the result
+        generate_expr_into_rax(expr->conditional.do_else, out);
+        fprintf(out, ".cond_end_%d:\n", label_counter);
+        label_counter++;
+
+    } else {
+        fprintf(stderr, "Unsupported expression type in generate_expr: `%s`\n", node_type_to_str(expr->type));
+        exit(1);
+    }
+}
+
+void generate_block(Node *block, FILE *out);
+
+void generate_statement(Node *stmt, FILE *out)
+{
+    if (stmt->type == AST_RETURN) {
+        generate_expr_into_rax(stmt->unary_expr, out);
+        // TODO: Only do this if we have local variables
+        fprintf(out, "    mov rsp, rbp\n");
+        fprintf(out, "    pop rbp\n");
+        fprintf(out, "    ret\n");
+    } else if (stmt->type == AST_VARDECL) {
+        if (stmt->var_decl.value) {
+            generate_expr_into_rax(stmt->var_decl.value, out);
+            i64 offset = stmt->var_decl.var.offset;
+            fprintf(out, "    mov [rbp-%lld], rax\n", offset);
+        } else {
+            // Initialize to 0
+            i64 offset = stmt->var_decl.var.offset;
+            // TODO: Use correct size for the type
+            fprintf(out, "    mov qword [rbp-%lld], 0\n", offset);
+        }
+    } else if (stmt->type == AST_IF) {
+        assert(stmt->conditional.cond);
+        assert(stmt->conditional.do_then);
+        int cur_label = label_counter++;
+
+        generate_expr_into_rax(stmt->conditional.cond, out);
+        // If we don't have an `else` clause, we can simplify
+        if (!stmt->conditional.do_else) {
+            fprintf(out, "    cmp rax, 0\n");
+            fprintf(out, "    je .if_end_%d\n", cur_label);
+            generate_statement(stmt->conditional.do_then, out);
+            fprintf(out, ".if_end_%d:\n", cur_label);
+        } else {
+            fprintf(out, "    cmp rax, 0\n");
+            fprintf(out, "    je .if_else_%d\n", cur_label);
+            generate_statement(stmt->conditional.do_then, out);
+            fprintf(out, "    jmp .if_end_%d\n", cur_label);
+            fprintf(out, ".if_else_%d:\n", cur_label);
+            generate_statement(stmt->conditional.do_else, out);
+            fprintf(out, ".if_end_%d:\n", cur_label);
+        }
+    } else if (stmt->type == AST_WHILE) {
+        int cur_label = label_counter++;
+        fprintf(out, ".loop_start_%d:\n", cur_label);
+        fprintf(out, ".loop_continue_%d:\n", cur_label);
+        generate_expr_into_rax(stmt->loop.cond, out);
+        fprintf(out, "    cmp rax, 0\n");
+        fprintf(out, "    je .loop_end_%d\n", cur_label);
+        generate_statement(stmt->loop.body, out);
+        fprintf(out, "    jmp .loop_start_%d\n", cur_label);
+        fprintf(out, ".loop_end_%d:\n", cur_label);
+
+    }  else if (stmt->type == AST_FOR) {
+        int cur_label = label_counter++;
+        if (stmt->loop.init) {
+            generate_statement(stmt->loop.init, out);
+        }
+        fprintf(out, ".loop_start_%d:\n", cur_label);
+        if (stmt->loop.cond) {
+            generate_expr_into_rax(stmt->loop.cond, out);
+            fprintf(out, "    cmp rax, 0\n");
+            fprintf(out, "    je .loop_end_%d\n", cur_label);
+        }
+        generate_statement(stmt->loop.body, out);
+        fprintf(out, ".loop_continue_%d:\n", cur_label);
+        if (stmt->loop.step) {
+            generate_expr_into_rax(stmt->loop.step, out);
+        }
+        fprintf(out, "    jmp .loop_start_%d\n", cur_label);
+        fprintf(out, ".loop_end_%d:\n", cur_label);
+
+    } else if (stmt->type == AST_BLOCK) {
+        generate_block(stmt, out);
+    } else {
+        // Once again, default to an expression here...
+        generate_expr_into_rax(stmt, out);
+    }
+}
+
+void generate_block(Node *block, FILE *out)
+{
+    assert(block->type == AST_BLOCK);
+    for (int i = 0; i < block->block.num_children; i++)
+        generate_statement(block->block.children[i], out);
+}
+
+void generate_function_header(Node *func, FILE *out)
+{
+    assert(func->type == AST_FUNC);
+    fprintf(out, "global %s\n", func->func.name);
+    fprintf(out, "%s:\n", func->func.name);
+    // TODO: Only do this if we have local variables
+    fprintf(out, "    push rbp\n");
+    fprintf(out, "    mov rbp, rsp\n");
+    // FIXME: Also account for arguments
+    fprintf(out, "    sub rsp, %lld\n", func->func.max_locals_size);
+}
+
+void generate_function(Node *func, FILE *out)
+{
+    assert(func->type == AST_FUNC);
+    current_function = func;
+    generate_function_header(func, out);
+    generate_block(func->func.body, out);
+}
+
+void generate_asm(Node *root, FILE *out)
+{
+    assert(root->type == AST_PROGRAM);
+    for (int i = 0; i < root->block.num_children; i++) {
+        if (root->block.children[i]->type == AST_FUNC) {
+            generate_function(root->block.children[i], out);
+        } else {
+            fprintf(stderr, "Unsupported node type in generate_asm: %s\n", node_type_to_str(root->block.children[i]->type));
+            exit(1);
+        }
+    }
+
+    // Call `main` from `_main` and return
+#if __APPLE__
+    fprintf(out, "global _main\n");
+    fprintf(out, "_main:\n");
+#else
+    fprintf(out, "global _start\n");
+    fprintf(out, "_start:\n");
+#endif
+    fprintf(out, "    call main\n");
+
+#if __APPLE__
+    fprintf(out, "    ret\n");
+#else
+    fprintf(out, "    mov rdi, rax\n");
+    fprintf(out, "    mov rax, %d\n", SYS_exit);
+    fprintf(out, "    syscall\n");
+#endif
+
+    // TODO: Add implementations of some primitives?
+}
\ No newline at end of file
diff --git a/src/generator.h b/src/generator.h
new file mode 100644
index 0000000..602cf3a
--- /dev/null
+++ b/src/generator.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include "ast.h"
+#include <stdio.h>
+
+void generate_asm(Node *root, FILE *out);
\ No newline at end of file
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..ea095ba
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,238 @@
+#include "lexer.h"
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "utils.h"
+
+Lexer Lexer_new(char *filename, char *src, i64 len)
+{
+    Lexer self = {0};
+    self.src = src;
+    self.len = len;
+    self.filename = filename;
+    return self;
+}
+
+static Location Lexer_loc(Lexer *lexer)
+{
+    Location loc = {0};
+    loc.filename = lexer->filename;
+    loc.line = lexer->line;
+    loc.col = lexer->col;
+    return loc;
+}
+
+static void Lexer_skip_whitespace(Lexer *lexer)
+{
+    while (lexer->pos < lexer->len && isspace(lexer->src[lexer->pos])) {
+        if (lexer->src[lexer->pos] == '\n') {
+            lexer->line++;
+            lexer->col = 0;
+        } else {
+            lexer->col++;
+        }
+        lexer->pos++;
+    }
+}
+
+bool Lexer_has_more(Lexer *lexer)
+{
+    Lexer_skip_whitespace(lexer);
+    return lexer->pos < lexer->len;
+}
+
+static bool Lexer_starts_with(Lexer *lexer, char *str)
+{
+    i64 len = strlen(str);
+    if (lexer->len - lexer->pos < len)
+        return false;
+    for (i64 i = 0; i < len; i++)
+        if (lexer->src[lexer->pos + i] != str[i])
+            return false;
+    i64 end_pos = lexer->pos + len;
+    if (end_pos == lexer->len)
+        return true;
+    char end_char = lexer->src[end_pos];
+    return !(isdigit(end_char) || isalpha(end_char) || end_char == '_');
+}
+
+static void advance(Lexer *lexer, i64 amount)
+{
+    lexer->pos += amount;
+    lexer->col += amount;
+}
+
+static char peek(Lexer *lexer, int amount)
+{
+    if (lexer->pos + amount >= lexer->len)
+        return '\0';
+    return lexer->src[lexer->pos + amount];
+}
+
+static Token Lexer_make_token(Lexer *lexer, TokenType type, int inc_amount)
+{
+    Token token = {0};
+    token.type = type;
+    token.loc = Lexer_loc(lexer);
+    advance(lexer, inc_amount);
+    return token;
+}
+
+#define LEX_KEYWORD(str, token_type) \
+    if (Lexer_starts_with(lexer, str)) return Lexer_make_token(lexer, token_type, strlen(str));
+
+Token Lexer_next(Lexer *lexer)
+{
+    while (lexer->pos < lexer->len) {
+        switch (lexer->src[lexer->pos])
+        {
+        case ' ': case '\t': case '\r': advance(lexer, 1); continue;
+        case '\n': lexer->line++; lexer->col = 0; lexer->pos++; continue;
+        case '(': return Lexer_make_token(lexer, TOKEN_OPEN_PAREN, 1);
+        case ')': return Lexer_make_token(lexer, TOKEN_CLOSE_PAREN, 1);
+        case '{': return Lexer_make_token(lexer, TOKEN_OPEN_BRACE, 1);
+        case '}': return Lexer_make_token(lexer, TOKEN_CLOSE_BRACE, 1);
+        case ';': return Lexer_make_token(lexer, TOKEN_SEMICOLON, 1);
+        case ':': return Lexer_make_token(lexer, TOKEN_COLON, 1);
+        case '~': return Lexer_make_token(lexer, TOKEN_TILDE, 1);
+        case '?': return Lexer_make_token(lexer, TOKEN_QUESTION, 1);
+        case ',': return Lexer_make_token(lexer, TOKEN_COMMA, 1);
+        
+        case '&': {
+            if (peek(lexer, 1) == '&')
+                return Lexer_make_token(lexer, TOKEN_AND, 2);
+            return Lexer_make_token(lexer, TOKEN_AMPERSAND, 1);
+        }
+        
+        case '!': {
+            if (peek(lexer, 1) == '=')
+                return Lexer_make_token(lexer, TOKEN_NEQ, 2);
+            return Lexer_make_token(lexer, TOKEN_EXCLAMATION, 1);
+        }
+        
+        case '<': {
+            if (peek(lexer, 1) == '=')
+                return Lexer_make_token(lexer, TOKEN_LEQ, 2);
+            return Lexer_make_token(lexer, TOKEN_LT, 1);
+        }
+
+        case '>': {
+            if (peek(lexer, 1) == '=')
+                return Lexer_make_token(lexer, TOKEN_GEQ, 2);
+            return Lexer_make_token(lexer, TOKEN_GT, 1);
+        }
+
+        case '=': {
+            if (peek(lexer, 1) == '=')
+                return Lexer_make_token(lexer, TOKEN_EQ, 2);
+            return Lexer_make_token(lexer, TOKEN_ASSIGN, 1);
+        }
+
+        case '|': {
+            if (peek(lexer, 1) == '|')
+                return Lexer_make_token(lexer, TOKEN_OR, 2);
+            return Lexer_make_token(lexer, TOKEN_BAR, 1);
+        }
+
+
+        case '+': {
+            if (peek(lexer, 1) == '+')
+                return Lexer_make_token(lexer, TOKEN_PLUSPLUS, 2);
+            if (peek(lexer, 1) == '=')
+                return Lexer_make_token(lexer, TOKEN_PLUSEQUALS, 2);
+            return Lexer_make_token(lexer, TOKEN_PLUS, 1);
+        }
+
+        case '-': {
+            if (peek(lexer, 1) == '-')
+                return Lexer_make_token(lexer, TOKEN_MINUSMINUS, 2);
+            if (peek(lexer, 1) == '=')
+                return Lexer_make_token(lexer, TOKEN_MINUSEQUALS, 2);
+            return Lexer_make_token(lexer, TOKEN_MINUS, 1);
+        }
+        
+        case '/': {
+            if (peek(lexer, 1) == '/') {
+                lexer->pos += 2;    // skip the '//'
+                while (lexer->pos < lexer->len && lexer->src[lexer->pos] != '\n')
+                    lexer->pos++;
+                continue;
+            }
+            return Lexer_make_token(lexer, TOKEN_SLASH, 1);
+        }
+
+        case '*': return Lexer_make_token(lexer, TOKEN_STAR, 1);
+        case '%': return Lexer_make_token(lexer, TOKEN_PERCENT, 1);
+
+
+        default: {
+            // Handle keywords explicitly
+            LEX_KEYWORD("fn", TOKEN_FN);
+            LEX_KEYWORD("if", TOKEN_IF);
+            LEX_KEYWORD("int", TOKEN_INT);
+            LEX_KEYWORD("let", TOKEN_LET);
+            LEX_KEYWORD("for", TOKEN_FOR);
+            LEX_KEYWORD("else", TOKEN_ELSE);
+            LEX_KEYWORD("while", TOKEN_WHILE);
+            LEX_KEYWORD("return", TOKEN_RETURN);
+
+            if (isdigit(lexer->src[lexer->pos])) {
+                // TODO: Parse hex and octal numbers
+                i64 pos = lexer->pos;
+                while (pos < lexer->len && isdigit(lexer->src[pos])) 
+                    pos++;
+                Token token = Token_from_int(atoi(lexer->src + lexer->pos), Lexer_loc(lexer));
+                advance(lexer, pos - lexer->pos);
+                return token;
+            }
+
+            if (isalpha(lexer->src[lexer->pos]) || lexer->src[lexer->pos] == '_') {
+                i64 pos = lexer->pos;
+                while (pos < lexer->len && (isalnum(lexer->src[pos]) || lexer->src[pos] == '_'))
+                    pos++;
+                int str_len = pos - lexer->pos;
+                char *str = calloc(str_len + 1, 1);
+                strncpy(str, lexer->src + lexer->pos, str_len);
+                Token token = Token_from_identifier(str, Lexer_loc(lexer));
+                advance(lexer, str_len);
+                return token;
+            }
+
+            // TODO: Handle escapes
+            if (lexer->src[lexer->pos] == '"') {
+                i64 pos = lexer->pos + 1;
+                while (pos < lexer->len && lexer->src[pos] != '"')
+                    pos++;
+                if (pos == lexer->len) {
+                    die_location(Lexer_loc(lexer), ": ERROR: Reached end-of-file while parsing string literal beginning here.\n");
+                }
+                // Careful with indexing here, because we want to skip opening and closing quotes
+                char *str = calloc(pos - lexer->pos, 1);
+                strncpy(str, lexer->src + lexer->pos + 1, pos - lexer->pos - 1);
+                Token token = Token_from_identifier(str, Lexer_loc(lexer));
+                advance(lexer, pos - lexer->pos + 1);
+                return token;
+            }
+
+
+            die_location(Lexer_loc(lexer), ": ERROR: Unexpected character '%c'\n", lexer->src[lexer->pos]);
+            advance(lexer, 1);
+        }
+        }
+    }
+
+    return Token_from_type(TOKEN_EOF, Lexer_loc(lexer));
+}
+
+Token Lexer_peek(Lexer *lexer)
+{
+    i64 pos = lexer->pos;
+    i64 col = lexer->col;
+    i64 line = lexer->line;
+    Token token = Lexer_next(lexer);
+    lexer->pos = pos;
+    lexer->col = col;
+    lexer->line = line;
+    return token;
+}
\ No newline at end of file
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..f710d3d
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "tokens.h"
+#include <stdbool.h>
+
+typedef struct {
+    char *src;
+    i64 len;
+    i64 pos;
+
+    char *filename;
+    i64 line;
+    i64 col;
+} Lexer;
+
+Lexer Lexer_new(char *filename, char *src, i64 len);
+
+Token Lexer_next(Lexer *lexer);
+Token Lexer_peek(Lexer *lexer);
\ No newline at end of file
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..15460ff
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,82 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+#include "parser.h"
+#include "generator.h"
+#include "unistd.h"
+
+char *filename = NULL;
+char *source = NULL;
+i64 source_len = 0;
+bool dump_ast = false;
+
+#define MAX_STDIN_SOURCE_LEN 2048
+
+void usage(int exit_status)
+{
+    printf("Usage: cup [options] <file>\n");
+    printf("Options:\n");
+    printf("  -c <code>  Code to compile\n");
+    printf("  -h         Show this help\n");
+    printf("  -d         Dump AST to stdout\n");
+    printf("Output file will be named `output.nasm`\n");
+    exit(exit_status);
+}
+
+void parse_args(int argc, char **argv)
+{
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-c") == 0) {
+            source_len = strlen(argv[i+1]);
+            source = calloc(source_len + 1, 1);
+            strcpy(source, argv[i+1]);
+            i++;
+            filename = "CLI";
+        } else if (strcmp(argv[i], "-h") == 0) {
+            usage(0);
+        } else if (strcmp(argv[i], "-d") == 0) {
+            dump_ast = true;
+        } else if (filename == NULL) {
+            if (strcmp(argv[i], "-") == 0) {
+                filename = "stdin";
+                source = calloc(MAX_STDIN_SOURCE_LEN, 1);
+                source_len = fread(source, 1, MAX_STDIN_SOURCE_LEN, stdin);
+                if (source_len == MAX_STDIN_SOURCE_LEN) {
+                    fprintf(stderr, "Source too long to use through stdin\n");
+                    exit(1);
+                }
+            } else {
+                filename = argv[i];
+                // Read entire file into memory
+                FILE *fp = fopen(filename, "r");
+                fseek(fp, 0, SEEK_END);
+                source_len = ftell(fp);
+                fseek(fp, 0, SEEK_SET);
+                source = malloc(source_len + 1);
+                fread(source, source_len, 1, fp);
+                source[source_len] = 0;
+                fclose(fp);
+            }
+        } else {
+            usage(1);
+        }
+    }
+}
+
+int main(int argc, char**argv) {
+    parse_args(argc, argv);
+
+    Lexer lexer = Lexer_new(filename, source, source_len);
+    Node *ast = parse_program(&lexer);
+
+    if (dump_ast)
+        print_ast(ast);
+
+    FILE *f = fopen("output.nasm", "w");
+    generate_asm(ast, f);
+
+
+    free(source);
+    return 0;
+}
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..cfb5d97
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,562 @@
+#include "parser.h"
+#include "utils.h"
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#define MAX_FUNCTION_COUNT 1024
+static Node *all_functions[MAX_FUNCTION_COUNT];
+static i64 function_count = 0;
+
+static Node *current_function = NULL;
+
+#define BLOCK_STACK_SIZE 64
+static Node *block_stack[BLOCK_STACK_SIZE];
+static i64 block_stack_count = 0;
+static i64 cur_stack_offset = 0;
+
+
+Token do_assert_token(Token token, TokenType type, char *filename, int line)
+{
+    if (token.type != type) {
+        Location_print(stderr, token.loc);
+        fprintf(stderr, ": Expected token of type `%s` but got `%s`\n", token_type_to_str(type), token_type_to_str(token.type));
+        fprintf(stderr, "Relevant location in compiler: %s:%d\n", filename, line);
+        exit(1);
+    }
+    return token;
+}
+
+#define assert_token(token, type) do_assert_token(token, type, __FILE__, __LINE__)
+
+/******
+ * Some helpers
+ */
+
+NodeType binary_token_to_op(TokenType type)
+{
+    switch (type)
+    {
+        case TOKEN_PLUS:    return OP_PLUS;
+        case TOKEN_MINUS:   return OP_MINUS;
+        case TOKEN_STAR:    return OP_MUL;
+        case TOKEN_SLASH:   return OP_DIV;
+        case TOKEN_PERCENT: return OP_MOD;
+        case TOKEN_LSHIFT:  return OP_LSHIFT;
+        case TOKEN_RSHIFT:  return OP_RSHIFT;
+        case TOKEN_AND:     return OP_AND;
+        case TOKEN_OR:      return OP_OR;
+        case TOKEN_XOR:     return OP_XOR;
+        case TOKEN_EQ:      return OP_EQ;
+        case TOKEN_NEQ:     return OP_NEQ;
+        case TOKEN_LT:      return OP_LT;
+        case TOKEN_LEQ:     return OP_LEQ;
+        case TOKEN_GT:      return OP_GT;
+        case TOKEN_GEQ:     return OP_GEQ;
+        
+        default: assert(false && "binary_token_to_op called with invalid token type");
+    }
+}
+
+void block_stack_push(Node *block)
+{
+    assert(block_stack_count < BLOCK_STACK_SIZE);
+    assert(current_function);
+    block_stack[block_stack_count++] = block;
+}
+
+void block_stack_pop()
+{
+    assert(block_stack_count > 0);
+    assert(current_function);
+    Node *block = block_stack[--block_stack_count];
+    cur_stack_offset -= block->block.locals_size;
+    assert(cur_stack_offset >= 0);
+}
+
+void dump_block_stack()
+{
+    for (i64 i = 0; i < block_stack_count; i++) {
+        Node *block = block_stack[i];
+        for (int i = 0; i < block->block.num_locals; i++) {
+            printf("%s: offset: %lld\n", block->block.locals[i]->name, block->block.locals[i]->offset);
+        }
+        printf("\n");
+    }
+}
+
+Variable *find_local_variable(Token *token)
+{
+    assert_token(*token, TOKEN_IDENTIFIER);
+    for (i64 i = block_stack_count - 1; i >= 0; --i) {
+        Node *block = block_stack[i];
+        for (int i = 0; i < block->block.num_locals; i++) {
+            if (strcmp(block->block.locals[i]->name, token->value.as_string) == 0) {
+                return block->block.locals[i];
+            }
+        }
+    }
+    Node *func = current_function;
+    for (int i = 0; i < func->func.num_args; i++) {
+        if (strcmp(func->func.args[i].name, token->value.as_string) == 0) {
+            return &func->func.args[i];
+        }
+    }
+    return NULL;
+}
+
+Node *find_function_definition(Token *token)
+{
+    assert_token(*token, TOKEN_IDENTIFIER);
+    for (i64 i = 0; i < function_count; i++) {
+        Node *function = all_functions[i];
+        if (strcmp(function->func.name, token->value.as_string) == 0) {
+            return function;
+        }
+    }
+    return NULL;
+}
+
+// TODO: rename this, it's ugly
+void add_variable_to_current_block(Variable *var)
+{
+    // Set offset for variable
+    Node *cur_block = block_stack[block_stack_count - 1];
+    var->offset = cur_stack_offset;
+
+    int new_len = (cur_block->block.num_locals + 1);
+    int var_size = 8; // TODO: Compute sizes based on different types
+    
+    // Add to the block
+    // FIXME: Use a map here
+    cur_block->block.locals = realloc(cur_block->block.locals, sizeof(Variable *) * new_len);
+    cur_block->block.locals[cur_block->block.num_locals] = var;
+    cur_block->block.num_locals++;
+
+    assert(current_function);
+    // Update current stack offset (w.r.t function stack frame) and block size
+    cur_stack_offset += var_size;
+    block_stack[block_stack_count-1]->block.locals_size += var_size;
+    
+    // Update function's max locals size
+    i64 max_offset = i64max(current_function->func.max_locals_size, cur_stack_offset);
+    current_function->func.max_locals_size = max_offset;
+
+    assert(cur_stack_offset >= 0);
+    assert(block_stack_count > 0);
+}
+
+void Node_add_child(Node *parent, Node *child)
+{
+    // TODO, use a vector
+    parent->block.children = realloc(parent->block.children, sizeof(Node *) * (parent->block.num_children + 1));
+    parent->block.children[parent->block.num_children] = child;
+    parent->block.num_children++;
+}
+
+Node *Node_new(NodeType type)
+{
+    Node *self = calloc(sizeof(Node), 1);
+    self->type = type;
+    return self;
+}
+
+Type parse_type(Lexer *lexer)
+{
+    Type type = {0};
+    Token token = Lexer_peek(lexer);
+    if (token.type == TOKEN_INT) {
+        type.type = TYPE_INT;
+        Lexer_next(lexer);
+    } else {
+        type.type = TYPE_NONE;
+    }
+
+    while (Lexer_peek(lexer).type == TOKEN_AMPERSAND) {
+        Lexer_next(lexer);
+        type.indirection++;
+    }
+
+    return type;
+}
+
+Node *parse_literal(Lexer *lexer)
+{
+    Node *node = Node_new(AST_LITERAL);
+    Token token = assert_token(Lexer_next(lexer), TOKEN_INTLIT);
+    node->literal.type = (Type) {.type = TYPE_INT};
+    node->literal.as_int = token.value.as_int;
+    return node;
+}
+
+Node *parse_expression(Lexer *);
+
+Node *parse_var_declaration(Lexer *lexer)
+{
+    Token token = assert_token(Lexer_next(lexer), TOKEN_LET);
+    // TODO: Reuse this for globals? Or maybe just make a new function?
+    if (!current_function || current_function->type != AST_FUNC)
+        die_location(token.loc, "Variable declaration outside of function");
+    
+    token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
+    // NOTE: We don't allow shadowing of variables in the any blocks, 
+    //       this is by design since it's a common mistake.
+    if (find_local_variable(&token) != NULL)
+        die_location(token.loc, "Variable `%s` already declared", token.value.as_string);
+    
+    Node *node = Node_new(AST_VARDECL);
+    node->var_decl.var.name = token.value.as_string;
+
+    token = Lexer_next(lexer);
+    if (token.type != TOKEN_COLON)
+        die_location(token.loc, "Missing type specifier for variable `%s`", node->var_decl.var.name);
+    node->var_decl.var.type = parse_type(lexer);
+
+    add_variable_to_current_block(&node->var_decl.var);
+
+    token = Lexer_next(lexer);
+    if (token.type == TOKEN_ASSIGN) {
+        node->var_decl.value = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
+    } else {
+        assert_token(token, TOKEN_SEMICOLON);
+    }
+
+    return node;
+}
+
+Node *parse_function_call_args(Lexer *lexer, Node *func)
+{
+    Token identifier = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
+    Node *call = Node_new(AST_FUNCCALL);
+    call->call.func = func;
+    assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
+    Token token = Lexer_peek(lexer);
+
+    while (token.type != TOKEN_CLOSE_PAREN) {
+        Node *arg = parse_expression(lexer);
+
+        int new_size = call->call.num_args + 1;
+        call->call.args = realloc(call->call.args, sizeof(Node *) * new_size);
+        call->call.args[call->call.num_args++] = arg;
+        
+        if (new_size > func->func.num_args)
+            die_location(identifier.loc, "Too many arguments to function `%s`", func->func.name);
+
+        token = Lexer_peek(lexer);
+        if (token.type == TOKEN_COMMA) {
+            Lexer_next(lexer);
+            token = Lexer_peek(lexer);
+        }
+    }
+
+    if (call->call.num_args != func->func.num_args)
+        die_location(identifier.loc, "Too few arguments to function `%s`", func->func.name);
+
+    assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
+    return call;
+}
+
+Node *parse_factor(Lexer *lexer)
+{
+    // TODO: Parse more complicated things
+    Token token = Lexer_peek(lexer);
+    Node *expr;
+    if (token.type == TOKEN_MINUS) {
+        Lexer_next(lexer);
+        expr = Node_new(OP_NEG);
+        expr->unary_expr = parse_factor(lexer);
+    } else if (token.type == TOKEN_TILDE) {
+        Lexer_next(lexer);
+        expr = Node_new(OP_BWINV);
+        expr->unary_expr = parse_factor(lexer);
+    } else if (token.type == TOKEN_EXCLAMATION) {
+        Lexer_next(lexer);
+        expr = Node_new(OP_NOT);
+        expr->unary_expr = parse_factor(lexer);
+    } else if (token.type == TOKEN_OPEN_PAREN) {
+        Lexer_next(lexer);
+        expr = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
+    } else if (token.type == TOKEN_INTLIT) {
+        expr = parse_literal(lexer);
+    } else if (token.type == TOKEN_IDENTIFIER) {   
+        // TODO: Check for global variables when added
+
+        Variable *var = find_local_variable(&token); 
+        if (var != NULL) {
+            Lexer_next(lexer);
+            expr = Node_new(AST_VAR);
+            expr->variable = var;
+            return expr;
+        }
+        
+        Node *func = find_function_definition(&token);
+        if (func != NULL) {
+            return parse_function_call_args(lexer, func);
+        }
+
+        die_location(token.loc, "Unknown identifier `%s`", token.value.as_string);
+        expr = NULL;
+    } else {
+        die_location(token.loc, ": Expected token found in parse_factor: `%s`", token_type_to_str(token.type));
+        exit(1);
+    }
+    return expr;
+}
+
+#define BINOP_PARSER(next_parser, predicate)                                   \
+  Node *expr = next_parser(lexer);                                             \
+  Token token = Lexer_peek(lexer);                                             \
+  while (predicate(token.type)) {                                              \
+    Lexer_next(lexer);                                                         \
+    Node *op = Node_new(binary_token_to_op(token.type));                       \
+    Node *right = next_parser(lexer);                                          \
+    op->binary.left = expr;                                                    \
+    op->binary.right = right;                                                  \
+    expr = op;                                                                 \
+    token = Lexer_peek(lexer);                                                 \
+  }                                                                            \
+  return expr;
+
+bool is_term_token(TokenType type) { return type == TOKEN_STAR ||  type == TOKEN_SLASH ||  type == TOKEN_PERCENT; }
+Node *parse_term(Lexer *lexer) { BINOP_PARSER(parse_factor, is_term_token); }
+
+bool is_additive_token(TokenType type) { return type == TOKEN_PLUS || type == TOKEN_MINUS; }
+Node *parse_additive(Lexer *lexer) { BINOP_PARSER(parse_term, is_additive_token); }
+
+bool is_relational_token(TokenType type) { return type == TOKEN_LT || type == TOKEN_LEQ || type == TOKEN_GT || type == TOKEN_GEQ; }
+Node *parse_relational(Lexer *lexer) { BINOP_PARSER(parse_additive, is_relational_token); }
+
+bool is_equality_token(TokenType type) { return type == TOKEN_EQ || type == TOKEN_NEQ; }
+Node *parse_equality(Lexer *lexer) { BINOP_PARSER(parse_relational, is_equality_token); }
+
+bool is_logical_and_token(TokenType type) { return type == TOKEN_AND; }
+Node *parse_logical_and(Lexer *lexer) { BINOP_PARSER(parse_equality, is_logical_and_token); }
+
+bool is_logical_or_token(TokenType type) { return type == TOKEN_OR; }
+Node *parse_logical_or(Lexer *lexer) { BINOP_PARSER(parse_logical_and, is_logical_or_token); }
+
+Node *parse_conditional_exp(Lexer *lexer)
+{
+    Node *expr = parse_logical_or(lexer);
+    Token token = Lexer_peek(lexer);
+    if (token.type == TOKEN_QUESTION) {
+        Lexer_next(lexer);
+        Node *then_expr = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_COLON);
+        Node *else_expr = parse_expression(lexer);
+        Node *conditional = Node_new(AST_CONDITIONAL);
+        conditional->conditional.cond = expr;
+        conditional->conditional.do_then = then_expr;
+        conditional->conditional.do_else = else_expr;
+        expr = conditional;
+    }
+    return expr;
+}
+
+Node *parse_expression(Lexer *lexer)
+{
+    Node *node = parse_conditional_exp(lexer);
+    // FIXME: This is a hack to handle assignment expressions
+    //        and can probably be done properly.
+    if (node->type == AST_VAR) {
+        Token token = Lexer_peek(lexer);
+        if (token.type == TOKEN_ASSIGN) {
+            Lexer_next(lexer);
+            Variable *var = node->variable;
+            node->type = OP_ASSIGN;
+            node->assign.var = var;
+            node->assign.value = parse_expression(lexer);
+        }
+    }
+    return node;
+}
+
+Node *parse_block(Lexer *lexer);
+
+Node *parse_statement(Lexer *lexer)
+{
+    Node *node;
+    Token token = Lexer_peek(lexer);
+
+    if (token.type == TOKEN_RETURN) {
+        assert_token(Lexer_next(lexer), TOKEN_RETURN);
+        node = Node_new(AST_RETURN);
+        node->unary_expr = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
+    } else if (token.type == TOKEN_IF) {
+        Lexer_next(lexer);
+        node = Node_new(AST_IF);
+        assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
+        node->conditional.cond = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
+        // TODO: Allow blocks in here once implemented, currently
+        //       we can onle have a single statement in the if/else
+        node->conditional.do_then = parse_statement(lexer);
+        token = Lexer_peek(lexer);
+        if (token.type == TOKEN_ELSE) {
+            Lexer_next(lexer);
+            node->conditional.do_else = parse_statement(lexer);
+        }
+    } else if (token.type == TOKEN_WHILE) {
+        Lexer_next(lexer);
+        node = Node_new(AST_WHILE);
+        assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
+        node->loop.cond = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
+        node->loop.body = parse_statement(lexer);
+    }  else if (token.type == TOKEN_FOR) {
+        Lexer_next(lexer);
+        node = Node_new(AST_FOR);
+        assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
+        
+        // All of the expressions in the for loop are optional
+
+        // TODO: Allow this to be a declaration, need to inject
+        //       the variable into the symbol table for the block
+        if (Lexer_peek(lexer).type != TOKEN_SEMICOLON)
+            node->loop.init = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
+
+        if (Lexer_peek(lexer).type != TOKEN_SEMICOLON)
+            node->loop.cond = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
+
+        if (Lexer_peek(lexer).type != TOKEN_CLOSE_PAREN)
+            node->loop.step = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
+
+        node->loop.body = parse_statement(lexer);
+    } else if (token.type == TOKEN_OPEN_BRACE) {
+        node = parse_block(lexer);
+    } else {
+        // Default to trying to handle it as an expression
+        node = parse_expression(lexer);
+        assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
+    }
+
+    return node;
+}
+
+Node *parse_block(Lexer *lexer)
+{
+    assert_token(Lexer_next(lexer), TOKEN_OPEN_BRACE);
+
+    Node *block = Node_new(AST_BLOCK);
+    Token token = Lexer_peek(lexer);
+
+    block_stack_push(block);
+
+    while (token.type != TOKEN_CLOSE_BRACE) {
+        Node *block_item;
+        if (token.type == TOKEN_LET) {
+            block_item = parse_var_declaration(lexer);
+        } else {
+            // Default to a statement
+            block_item = parse_statement(lexer);
+        }
+        Node_add_child(block, block_item);
+        token = Lexer_peek(lexer);
+    }
+
+    block_stack_pop();
+
+    assert_token(Lexer_next(lexer), TOKEN_CLOSE_BRACE);
+    return block;
+}
+
+void push_new_function(Node *func)
+{
+    assert(func->type == AST_FUNC);
+    assert(function_count < MAX_FUNCTION_COUNT);
+    all_functions[function_count++] = func;
+    current_function = func;
+}
+
+void parse_func_args(Lexer *lexer, Node *func)
+{
+    assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
+    Token token = Lexer_peek(lexer);
+    while (token.type != TOKEN_CLOSE_PAREN) {
+        token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
+        // TODO: Check for shadowing with globals
+        assert_token(Lexer_next(lexer), TOKEN_COLON);
+        Type type = parse_type(lexer);    
+
+        i64 new_count = func->func.num_args + 1;
+        func->func.args = realloc(func->func.args, sizeof(Variable) * new_count);
+        Variable *var = &func->func.args[func->func.num_args++];
+        var->name = token.value.as_string;
+        var->type = type;
+
+        token = Lexer_peek(lexer);
+        if (token.type == TOKEN_COMMA) {
+            Lexer_next(lexer);
+            token = Lexer_peek(lexer);
+        }
+    }
+    assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
+
+    // Set the offsets for the arguments
+    
+    // IMPORTANT: We want to skip the saved ret_addr+old_rbp that we
+    //            pushed on the stack. Each of these is 8 bytes.
+    int offset = -16; 
+    for (int i = 0; i < func->func.num_args; i++) {
+        Variable *var = &func->func.args[i];
+        var->offset = offset;
+        // TODO: Compute this for different types
+        int var_size = 8;
+        offset -= var_size;
+    }
+}
+
+Node *parse_func(Lexer *lexer)
+{
+    Token token;
+    token = assert_token(Lexer_next(lexer), TOKEN_FN);
+
+    Node *func = Node_new(AST_FUNC);
+    push_new_function(func);
+
+    token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
+    func->func.name = token.value.as_string;
+    parse_func_args(lexer, func);
+
+    token = Lexer_peek(lexer);
+    if (token.type == TOKEN_COLON) {
+        // TODO: Parse all return types
+        assert_token(Lexer_next(lexer), TOKEN_COLON);
+        func->func.return_type = parse_type(lexer);
+    } else {
+        // No return type, void fn.
+        func->func.return_type = (Type){.type = TYPE_NONE};
+    }
+
+    // Make sure there's no funny business with the stack offset
+    assert(cur_stack_offset == 0);
+    assert(block_stack_count == 0);
+    func->func.body = parse_block(lexer);
+    assert(block_stack_count == 0);
+    assert(cur_stack_offset == 0);
+
+    return func;
+}
+
+Node *parse_program(Lexer *lexer)
+{
+    Node *program = Node_new(AST_PROGRAM);
+    Token token;
+    while ((token = Lexer_peek(lexer)).type != TOKEN_EOF) {
+        if (token.type == TOKEN_FN) {
+            Node *func = parse_func(lexer);
+            Node_add_child(program, func);
+        } else {
+            die_location(token.loc, "Unexpected token in parse_program: `%s`\n", token_type_to_str(token.type));
+            exit(1);
+            break;
+        }
+    }
+    return program;
+}
\ No newline at end of file
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 0000000..7f7dacb
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include "ast.h"
+#include "lexer.h"
+
+Node *parse_program(Lexer *lexer);
+void print_ast(Node *node);
\ No newline at end of file
diff --git a/src/tokens.c b/src/tokens.c
new file mode 100644
index 0000000..e562e16
--- /dev/null
+++ b/src/tokens.c
@@ -0,0 +1,70 @@
+#include "tokens.h"
+#include <assert.h>
+#include <stdio.h>
+
+Token Token_from_type(TokenType type, Location loc)
+{
+    Token token = {0};
+    token.type = type;
+    token.loc = loc;
+    return token;
+}
+
+Token Token_from_int(i64 value, Location loc)
+{
+    Token token = {0};
+    token.type = TOKEN_INTLIT;
+    token.value.as_int = value;
+    token.loc = loc;
+    return token;
+}
+
+Token Token_from_string(char *value, Location loc)
+{
+    Token token = {0};
+    token.type = TOKEN_STRINGLIT;
+    token.value.as_string = value;
+    token.loc = loc;
+    return token;
+}
+
+Token Token_from_identifier(char *value, Location loc)
+{
+    Token token = {0};
+    token.type = TOKEN_IDENTIFIER;
+    token.value.as_string = value;
+    token.loc = loc;
+    return token;
+}
+
+void Location_print(FILE *f, Location loc)
+{
+    fprintf(f, "%s:%d:%d", loc.filename, loc.line+1, loc.col+1);
+}
+
+char *token_type_to_str(TokenType type)
+{
+    // Otherwise, just print the token type
+    switch (type)
+    {
+    #define ENUM_TOKEN(name, str) case name: return str;
+        ENUM_TOKENS(ENUM_TOKEN)
+    #undef ENUM_TOKEN
+    default: assert(false && "Unreachable");
+    }
+}
+
+
+void Token_print(FILE *f, Token *token)
+{
+    // Handle the different literals manually
+    switch (token->type)
+    {
+        case TOKEN_INTLIT:      fprintf(f, "%lld", token->value.as_int); return;
+        case TOKEN_STRINGLIT:   fprintf(f, "\"%s\"", token->value.as_string); return;
+        case TOKEN_IDENTIFIER:  fprintf(f, "%s", token->value.as_string); return;
+        default: break;
+    }
+
+    fprintf(f, "%s", token_type_to_str(token->type));
+}
\ No newline at end of file
diff --git a/src/tokens.h b/src/tokens.h
new file mode 100644
index 0000000..44f5e6e
--- /dev/null
+++ b/src/tokens.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include "common.h"
+#include <stdio.h>
+
+#define ENUM_TOKENS(F)                                                         \
+  F(TOKEN_AMPERSAND, "&")                                                      \
+  F(TOKEN_AND, "&&")                                                           \
+  F(TOKEN_ASSIGN, "=")                                                         \
+  F(TOKEN_BAR, "|")                                                            \
+  F(TOKEN_CLOSE_BRACE, "}")                                                    \
+  F(TOKEN_CLOSE_PAREN, ")")                                                    \
+  F(TOKEN_COLON, ":")                                                          \
+  F(TOKEN_COMMA, ",")                                                          \
+  F(TOKEN_ELSE, "else")                                                        \
+  F(TOKEN_EOF, "EOF")                                                          \
+  F(TOKEN_EQ, "==")                                                            \
+  F(TOKEN_EXCLAMATION, "!")                                                    \
+  F(TOKEN_FN, "fn")                                                            \
+  F(TOKEN_FOR, "for")                                                          \
+  F(TOKEN_GEQ, ">=")                                                           \
+  F(TOKEN_GT, ">")                                                             \
+  F(TOKEN_IDENTIFIER, "identifier")                                            \
+  F(TOKEN_IF, "if")                                                            \
+  F(TOKEN_INT, "int")                                                          \
+  F(TOKEN_INTLIT, "integer literal")                                           \
+  F(TOKEN_LEQ, "<=")                                                           \
+  F(TOKEN_LSHIFT, "<<")                                                        \
+  F(TOKEN_LET, "let")                                                          \
+  F(TOKEN_LT, "<")                                                             \
+  F(TOKEN_MINUS, "-")                                                          \
+  F(TOKEN_MINUSEQUALS, "-=")                                                   \
+  F(TOKEN_MINUSMINUS, "--")                                                    \
+  F(TOKEN_NEQ, "!=")                                                           \
+  F(TOKEN_OPEN_BRACE, "{")                                                     \
+  F(TOKEN_OPEN_PAREN, "(")                                                     \
+  F(TOKEN_OR, "||")                                                            \
+  F(TOKEN_PERCENT, "%")                                                        \
+  F(TOKEN_PLUS, "+")                                                           \
+  F(TOKEN_PLUSEQUALS, "+=")                                                    \
+  F(TOKEN_PLUSPLUS, "++")                                                      \
+  F(TOKEN_QUESTION, "?")                                                       \
+  F(TOKEN_RETURN, "return")                                                    \
+  F(TOKEN_RSHIFT, ">>")                                                        \
+  F(TOKEN_SEMICOLON, ";")                                                      \
+  F(TOKEN_SLASH, "/")                                                          \
+  F(TOKEN_STAR, "*")                                                           \
+  F(TOKEN_STRINGLIT, "string literal")                                         \
+  F(TOKEN_TILDE, "~")                                                          \
+  F(TOKEN_WHILE, "while")                                                      \
+  F(TOKEN_XOR, "^")
+
+typedef enum {
+#define ENUM_TOKEN(name, str) name,
+  ENUM_TOKENS(ENUM_TOKEN)
+#undef ENUM_TOKEN
+} TokenType;
+
+typedef struct {
+  char *filename;
+  int line, col;
+} Location;
+
+void Location_print(FILE *f, Location loc);
+
+typedef struct {
+  TokenType type;
+  Location loc;
+  union tokens {
+    i64 as_int;
+    char *as_string;
+    char as_char;
+  } value;
+} Token;
+
+char *token_type_to_str(TokenType type);
+
+Token Token_from_type(TokenType type, Location loc);
+Token Token_from_int(i64 value, Location loc);
+Token Token_from_string(char *value, Location loc);
+Token Token_from_identifier(char *value, Location loc);
+
+void Token_print(FILE *f, Token *token);
\ No newline at end of file
diff --git a/src/utils.c b/src/utils.c
new file mode 100644
index 0000000..2335981
--- /dev/null
+++ b/src/utils.c
@@ -0,0 +1,30 @@
+#include "utils.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void die(const char *fmt, ...)
+{
+    va_list args;
+    va_start(args, fmt);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    exit(1);
+}
+
+void _die_location(char *file, int line, Location loc, const char *fmt, ...)
+{
+    Location_print(stderr, loc);
+    fprintf(stderr, ": ");
+    va_list args;
+    va_start(args, fmt);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "NOTE: Error occurred in %s:%d\n", file, line);
+    exit(1);
+}
+
+i64 i64max(i64 a, i64 b) { return a > b ? a : b; }
+i64 i64min(i64 a, i64 b) { return a < b ? a : b; }
\ No newline at end of file
diff --git a/src/utils.h b/src/utils.h
new file mode 100644
index 0000000..dfc018c
--- /dev/null
+++ b/src/utils.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "common.h"
+#include "tokens.h"
+
+void die(const char *fmt, ...);
+void _die_location(char *file, int line, Location loc, const char *fmt, ...);
+
+i64 i64max(i64 a, i64 b);
+i64 i64min(i64 a, i64 b);
+
+#define die_location(loc, ...) _die_location(__FILE__, __LINE__, loc, __VA_ARGS__)
\ No newline at end of file
-- 
cgit v1.2.3