import "std/vector.cup" import "compiler/types.cup" enum NodeType { // Unary AST_NEG, AST_NOT, AST_BWINV, AST_ADDROF, AST_DEREF, // Binary AST_PLUS, AST_MINUS, AST_MUL, AST_DIV, AST_MOD, AST_LSHIFT, AST_RSHIFT, AST_AND, AST_BWAND, AST_OR, AST_BWOR, AST_XOR, // Comparison AST_EQ, AST_NEQ, AST_LT, AST_LEQ, AST_GT, AST_GEQ, // Misc. AST_ASSIGN, AST_MEMBER, // AST types AST_LITERAL, AST_CONSTANT, AST_FUNCCALL, AST_CONDITIONAL, AST_IF, AST_WHILE, AST_DEFER, AST_FOR, AST_VARDECL, AST_LOCAL_VAR, AST_GLOBAL_VAR, AST_RETURN, AST_FUNC, AST_BUILTIN, AST_PROGRAM, AST_BLOCK, }; struct Variable { name: char *; typ: Type *; offset: int; }; struct Node { typ: int; // NodeType etyp: Type*; // Expression type // TODO: Anonymous union members so we can do `Node.binary`, etc. d: union { binary: struct { lhs: Node *; rhs: Node *; }; unary: Node *; func: struct { name: char *; body: Node *; max_locals_size: int; args: Vector *; // Vector }; block: struct { children: Vector *; // Vector locals: Vector *; // Vector locals_size: int; }; literal: union { as_int: int; as_char: char; as_string: char *; }; var_decl: struct { var: Variable; init: Node *; }; assign: struct { lhs: Node *; rhs: Node *; }; conditional: struct { cond: Node *; then: Node *; els: Node *; }; // `loop` is keyword in rust, syntax highlighting breaks looop: struct { cond: Node *; body: Node *; // for loop: init: Node *; step: Node *; }; variable: Variable *; call: struct { func: Node *; args: Vector *; // Vector }; member: struct { obj: Node *; offset: int; is_ptr: int; }; constant: struct { name: char *; value: Node *; // Must be int literal }; }; }; let node_counter = 0; fn node_new(typ: int): Node* { let node: Node* = malloc(sizeof(Node)); ++node_counter; node.typ = typ; return node; } fn node_from_int_literal(val: int): Node* { let node: Node* = node_new(AST_LITERAL); node.etyp = type_new(TYPE_INT); node.d.literal.as_int = val; return node; } fn block_add_child(block: Node*, child: Node*) { if (block.d.block.children == null) block.d.block.children = vector_new(); vector_push(block.d.block.children, child); } // TODO: Careful here, the input type here is the same as `type_to_string` fn node_type_to_string(typ: int): char* { if (typ == AST_NEG) return "AST_NEG"; if (typ == AST_NOT) return "AST_NOT"; if (typ == AST_BWINV) return "AST_BWINV"; if (typ == AST_ADDROF) return "AST_ADDROF"; if (typ == AST_DEREF) return "AST_DEREF"; if (typ == AST_PLUS) return "AST_PLUS"; if (typ == AST_MINUS) return "AST_MINUS"; if (typ == AST_MUL) return "AST_MUL"; if (typ == AST_DIV) return "AST_DIV"; if (typ == AST_MOD) return "AST_MOD"; if (typ == AST_LSHIFT) return "AST_LSHIFT"; if (typ == AST_RSHIFT) return "AST_RSHIFT"; if (typ == AST_AND) return "AST_AND"; if (typ == AST_BWAND) return "AST_BWAND"; if (typ == AST_OR) return "AST_OR"; if (typ == AST_BWOR) return "AST_BWOR"; if (typ == AST_XOR) return "AST_XOR"; if (typ == AST_EQ) return "AST_EQ"; if (typ == AST_NEQ) return "AST_NEQ"; if (typ == AST_LT) return "AST_LT"; if (typ == AST_LEQ) return "AST_LEQ"; if (typ == AST_GT) return "AST_GT"; if (typ == AST_GEQ) return "AST_GEQ"; if (typ == AST_ASSIGN) return "AST_ASSIGN"; if (typ == AST_MEMBER) return "AST_MEMBER"; if (typ == AST_LITERAL) return "AST_LITERAL"; if (typ == AST_CONSTANT) return "AST_CONSTANT"; if (typ == AST_FUNCCALL) return "AST_FUNCCALL"; if (typ == AST_CONDITIONAL) return "AST_CONDITIONAL"; if (typ == AST_IF) return "AST_IF"; if (typ == AST_WHILE) return "AST_WHILE"; if (typ == AST_DEFER) return "AST_DEFER"; if (typ == AST_FOR) return "AST_FOR"; if (typ == AST_VARDECL) return "AST_VARDECL"; if (typ == AST_LOCAL_VAR) return "AST_LOCAL_VAR"; if (typ == AST_GLOBAL_VAR) return "AST_GLOBAL_VAR"; if (typ == AST_RETURN) return "AST_RETURN"; if (typ == AST_FUNC) return "AST_FUNC"; if (typ == AST_BUILTIN) return "AST_BUILTIN"; if (typ == AST_PROGRAM) return "AST_PROGRAM"; if (typ == AST_BLOCK) return "AST_BLOCK"; puts("Unknown node type in node_type_to_string: "); putu(typ); putc('\n'); exit(1); } fn is_binary_op(typ: int): int { if (typ == AST_PLUS) return true; if (typ == AST_MINUS) return true; if (typ == AST_MUL) return true; if (typ == AST_DIV) return true; if (typ == AST_MOD) return true; if (typ == AST_LSHIFT) return true; if (typ == AST_RSHIFT) return true; if (typ == AST_AND) return true; if (typ == AST_BWAND) return true; if (typ == AST_OR) return true; if (typ == AST_BWOR) return true; if (typ == AST_XOR) return true; if (typ == AST_EQ) return true; if (typ == AST_NEQ) return true; if (typ == AST_LT) return true; if (typ == AST_LEQ) return true; if (typ == AST_GT) return true; if (typ == AST_GEQ) return true; return false; } fn is_unary_op(typ: int): int { if (typ == AST_NEG) return true; if (typ == AST_NOT) return true; if (typ == AST_BWINV) return true; if (typ == AST_ADDROF) return true; if (typ == AST_DEREF) return true; return false; } fn is_lvalue(typ: int): int { if (typ == AST_LOCAL_VAR) return true; if (typ == AST_GLOBAL_VAR) return true; if (typ == AST_MEMBER) return true; if (typ == AST_DEREF) return true; return false; } fn binary_token_to_op(token_typ: int): int { if (token_typ == TOKEN_PLUS) return AST_PLUS; if (token_typ == TOKEN_MINUS) return AST_MINUS; if (token_typ == TOKEN_STAR) return AST_MUL; if (token_typ == TOKEN_SLASH) return AST_DIV; if (token_typ == TOKEN_PERCENT) return AST_MOD; if (token_typ == TOKEN_LSHIFT) return AST_LSHIFT; if (token_typ == TOKEN_RSHIFT) return AST_RSHIFT; if (token_typ == TOKEN_AND) return AST_AND; if (token_typ == TOKEN_OR) return AST_OR; if (token_typ == TOKEN_XOR) return AST_XOR; if (token_typ == TOKEN_EQ) return AST_EQ; if (token_typ == TOKEN_NEQ) return AST_NEQ; if (token_typ == TOKEN_LT) return AST_LT; if (token_typ == TOKEN_LEQ) return AST_LEQ; if (token_typ == TOKEN_GT) return AST_GT; if (token_typ == TOKEN_GEQ) return AST_GEQ; if (token_typ == TOKEN_AMPERSAND) return AST_BWAND; if (token_typ == TOKEN_BAR) return AST_BWOR; if (token_typ == TOKEN_CARET) return AST_XOR; puts("Unknown token in binary_token_to_op: "); putsln(token_type_to_string(token_typ)); exit(1); } fn dump_ast(node: Node*, depth: int) { for (let i = 0; i < 2*depth; ++i) putc(' '); if (node.typ == AST_PROGRAM || node.typ == AST_BLOCK) { putsln(node_type_to_string(node.typ)); for (let i = 0; i < node.d.block.children.size; ++i) { dump_ast(node.d.block.children.data[i], depth + 1); } } else if (is_binary_op(node.typ)) { putsln(node_type_to_string(node.typ)); dump_ast(node.d.binary.lhs, depth + 1); dump_ast(node.d.binary.rhs, depth + 1); } else if (is_unary_op(node.typ) || node.typ == AST_RETURN) { putsln(node_type_to_string(node.typ)); dump_ast(node.d.unary, depth + 1); } else if (node.typ == AST_CONDITIONAL || node.typ == AST_IF) { putsln(node_type_to_string(node.typ)); dump_ast(node.d.conditional.cond, depth + 1); dump_ast(node.d.conditional.then, depth + 1); if (node.d.conditional.els) dump_ast(node.d.conditional.els, depth + 1); } else if (node.typ == AST_LITERAL) { if (node.etyp.typ == TYPE_INT) { putu(node.d.literal.as_int); putc('\n'); } else if (node.etyp.typ == TYPE_PTR) { putc('"'); puts(node.d.literal.as_string); putc('"'); putc('\n'); } else if (node.etyp.typ == TYPE_CHAR) { putc('\''); putc(node.d.literal.as_char); putc('\''); putc('\n'); } else { die("Unknown literal type in dump_ast"); } } else if (node.typ == AST_FUNC) { puts("func "); puts(node.d.func.name); puts("()\n"); dump_ast(node.d.func.body, depth + 1); } else if (node.typ == AST_VARDECL) { puts("let "); puts(node.d.var_decl.var.name); if (node.d.var_decl.var.typ == TYPE_PTR) { puts(": "); puts(create_type_string(node.d.var_decl.var.typ)); } if (node.d.var_decl.init) { puts(" =\n"); dump_ast(node.d.var_decl.init, depth + 1); } else { putc('\n'); } } else if (node.typ == AST_ASSIGN) { putsln(node_type_to_string(node.typ)); dump_ast(node.d.assign.lhs, depth + 1); dump_ast(node.d.assign.rhs, depth + 1); } else if (node.typ == AST_LOCAL_VAR || node.typ == AST_GLOBAL_VAR) { putsln(node.d.variable.name); } else { putsln(node_type_to_string(node.typ)); } }