import "compiler/tokens.cup" import "compiler/types.cup" import "std/vector.cup" enum NodeType { // Unary AST_NEG, AST_NOT, AST_BWINV, AST_ADDROF, AST_DEREF, // Binary AST_PLUS, AST_MINUS, AST_MUL, AST_DIV, AST_MOD, AST_LSHIFT, AST_RSHIFT, AST_AND, AST_BWAND, AST_OR, AST_BWOR, AST_XOR, // Comparison AST_EQ, AST_NEQ, AST_LT, AST_LEQ, AST_GT, AST_GEQ, // Misc. AST_ASSIGN, AST_MEMBER, // AST types AST_LITERAL, AST_CONSTANT, AST_FUNCCALL, AST_CONDITIONAL, AST_IF, AST_WHILE, AST_DEFER, AST_FOR, AST_VARDECL, AST_LOCAL_VAR, AST_GLOBAL_VAR, AST_RETURN, AST_FUNC, AST_BUILTIN, AST_PROGRAM, AST_BLOCK, }; struct Variable { name: char *; typ: Type *; offset: int; }; struct Node { typ: int; // NodeType etyp: Type*; // Expression type // TODO: Anonymous union members so we can do `Node.binary`, etc. d: union { binary: struct { lhs: Node *; rhs: Node *; }; unary: Node *; func: struct { name: char *; body: Node *; max_locals_size: int; args: Vector *; // Vector is_defined: int; }; block: struct { children: Vector *; // Vector locals: Vector *; // Vector locals_size: int; }; literal: union { as_int: int; as_char: char; as_string: char *; }; var_decl: struct { var: Variable; init: Node *; }; assign: struct { lhs: Node *; rhs: Node *; }; conditional: struct { cond: Node *; then: Node *; els: Node *; }; // `loop` is keyword in rust, syntax highlighting breaks looop: struct { cond: Node *; body: Node *; // for loop: init: Node *; step: Node *; }; variable: Variable *; call: struct { func: Node *; args: Vector *; // Vector }; member: struct { obj: Node *; offset: int; is_ptr: int; }; constant: struct { name: char *; value: Node *; // Must be int literal }; }; }; let node_counter = 0; fn node_new(typ: int): Node* { let node: Node* = malloc(sizeof(Node)); ++node_counter; node.typ = typ; return node; } fn node_from_int_literal(val: int): Node* { let node: Node* = node_new(AST_LITERAL); node.etyp = type_new(TYPE_INT); node.d.literal.as_int = val; return node; } fn variable_new(name: char*, typ: Type*, offset: int): Variable* { let v: Variable* = malloc(sizeof(Variable)); v.name = name; v.typ = typ; v.offset = offset; return v; } fn block_add_child(block: Node*, child: Node*) { if (block.d.block.children == null) block.d.block.children = vector_new(); vector_push(block.d.block.children, child); } // TODO: Careful here, the input type here is the same as `type_to_string` fn node_type_to_string(typ: int): char* { if (typ == AST_NEG) return "AST_NEG"; if (typ == AST_NOT) return "AST_NOT"; if (typ == AST_BWINV) return "AST_BWINV"; if (typ == AST_ADDROF) return "AST_ADDROF"; if (typ == AST_DEREF) return "AST_DEREF"; if (typ == AST_PLUS) return "AST_PLUS"; if (typ == AST_MINUS) return "AST_MINUS"; if (typ == AST_MUL) return "AST_MUL"; if (typ == AST_DIV) return "AST_DIV"; if (typ == AST_MOD) return "AST_MOD"; if (typ == AST_LSHIFT) return "AST_LSHIFT"; if (typ == AST_RSHIFT) return "AST_RSHIFT"; if (typ == AST_AND) return "AST_AND"; if (typ == AST_BWAND) return "AST_BWAND"; if (typ == AST_OR) return "AST_OR"; if (typ == AST_BWOR) return "AST_BWOR"; if (typ == AST_XOR) return "AST_XOR"; if (typ == AST_EQ) return "AST_EQ"; if (typ == AST_NEQ) return "AST_NEQ"; if (typ == AST_LT) return "AST_LT"; if (typ == AST_LEQ) return "AST_LEQ"; if (typ == AST_GT) return "AST_GT"; if (typ == AST_GEQ) return "AST_GEQ"; if (typ == AST_ASSIGN) return "AST_ASSIGN"; if (typ == AST_MEMBER) return "AST_MEMBER"; if (typ == AST_LITERAL) return "AST_LITERAL"; if (typ == AST_CONSTANT) return "AST_CONSTANT"; if (typ == AST_FUNCCALL) return "AST_FUNCCALL"; if (typ == AST_CONDITIONAL) return "AST_CONDITIONAL"; if (typ == AST_IF) return "AST_IF"; if (typ == AST_WHILE) return "AST_WHILE"; if (typ == AST_DEFER) return "AST_DEFER"; if (typ == AST_FOR) return "AST_FOR"; if (typ == AST_VARDECL) return "AST_VARDECL"; if (typ == AST_LOCAL_VAR) return "AST_LOCAL_VAR"; if (typ == AST_GLOBAL_VAR) return "AST_GLOBAL_VAR"; if (typ == AST_RETURN) return "AST_RETURN"; if (typ == AST_FUNC) return "AST_FUNC"; if (typ == AST_BUILTIN) return "AST_BUILTIN"; if (typ == AST_PROGRAM) return "AST_PROGRAM"; if (typ == AST_BLOCK) return "AST_BLOCK"; puts("Unknown node type in node_type_to_string: "); putu(typ); putc('\n'); exit(1); } fn is_binary_op(typ: int): int { if (typ == AST_PLUS) return true; if (typ == AST_MINUS) return true; if (typ == AST_MUL) return true; if (typ == AST_DIV) return true; if (typ == AST_MOD) return true; if (typ == AST_LSHIFT) return true; if (typ == AST_RSHIFT) return true; if (typ == AST_AND) return true; if (typ == AST_BWAND) return true; if (typ == AST_OR) return true; if (typ == AST_BWOR) return true; if (typ == AST_XOR) return true; if (typ == AST_EQ) return true; if (typ == AST_NEQ) return true; if (typ == AST_LT) return true; if (typ == AST_LEQ) return true; if (typ == AST_GT) return true; if (typ == AST_GEQ) return true; return false; } fn is_unary_op(typ: int): int { if (typ == AST_NEG) return true; if (typ == AST_NOT) return true; if (typ == AST_BWINV) return true; if (typ == AST_ADDROF) return true; if (typ == AST_DEREF) return true; return false; } fn is_lvalue(typ: int): int { if (typ == AST_LOCAL_VAR) return true; if (typ == AST_GLOBAL_VAR) return true; if (typ == AST_MEMBER) return true; if (typ == AST_DEREF) return true; return false; } fn binary_token_to_op(token_typ: int): int { if (token_typ == TOKEN_PLUS) return AST_PLUS; if (token_typ == TOKEN_MINUS) return AST_MINUS; if (token_typ == TOKEN_STAR) return AST_MUL; if (token_typ == TOKEN_SLASH) return AST_DIV; if (token_typ == TOKEN_PERCENT) return AST_MOD; if (token_typ == TOKEN_LSHIFT) return AST_LSHIFT; if (token_typ == TOKEN_RSHIFT) return AST_RSHIFT; if (token_typ == TOKEN_AND) return AST_AND; if (token_typ == TOKEN_OR) return AST_OR; if (token_typ == TOKEN_XOR) return AST_XOR; if (token_typ == TOKEN_EQ) return AST_EQ; if (token_typ == TOKEN_NEQ) return AST_NEQ; if (token_typ == TOKEN_LT) return AST_LT; if (token_typ == TOKEN_LEQ) return AST_LEQ; if (token_typ == TOKEN_GT) return AST_GT; if (token_typ == TOKEN_GEQ) return AST_GEQ; if (token_typ == TOKEN_AMPERSAND) return AST_BWAND; if (token_typ == TOKEN_BAR) return AST_BWOR; if (token_typ == TOKEN_CARET) return AST_XOR; puts("Unknown token in binary_token_to_op: "); putsln(token_type_to_string(token_typ)); exit(1); } fn dump_ast(node: Node*, depth: int) { for (let i = 0; i < 2*depth; ++i) putc(' '); if (node.typ == AST_PROGRAM || node.typ == AST_BLOCK) { putsln(node_type_to_string(node.typ)); for (let i = 0; i < node.d.block.children.size; ++i) { dump_ast(node.d.block.children.data[i], depth + 1); } } else if (is_binary_op(node.typ)) { putsln(node_type_to_string(node.typ)); dump_ast(node.d.binary.lhs, depth + 1); dump_ast(node.d.binary.rhs, depth + 1); } else if (is_unary_op(node.typ) || node.typ == AST_RETURN) { putsln(node_type_to_string(node.typ)); dump_ast(node.d.unary, depth + 1); } else if (node.typ == AST_CONDITIONAL || node.typ == AST_IF) { putsln(node_type_to_string(node.typ)); dump_ast(node.d.conditional.cond, depth + 1); dump_ast(node.d.conditional.then, depth + 1); if (node.d.conditional.els) dump_ast(node.d.conditional.els, depth + 1); } else if (node.typ == AST_LITERAL) { if (node.etyp.typ == TYPE_INT) { putu(node.d.literal.as_int); putc('\n'); } else if (node.etyp.typ == TYPE_PTR) { putc('"'); puts(node.d.literal.as_string); putc('"'); putc('\n'); } else if (node.etyp.typ == TYPE_CHAR) { putc('\''); putc(node.d.literal.as_char); putc('\''); putc('\n'); } else { die("Unknown literal type in dump_ast"); } } else if (node.typ == AST_FUNC) { puts("func "); puts(node.d.func.name); puts("()\n"); dump_ast(node.d.func.body, depth + 1); } else if (node.typ == AST_VARDECL) { puts("let "); puts(node.d.var_decl.var.name); if (node.d.var_decl.var.typ == TYPE_PTR) { puts(": "); puts(create_type_string(node.d.var_decl.var.typ)); } if (node.d.var_decl.init) { puts(" =\n"); dump_ast(node.d.var_decl.init, depth + 1); } else { putc('\n'); } } else if (node.typ == AST_ASSIGN) { putsln(node_type_to_string(node.typ)); dump_ast(node.d.assign.lhs, depth + 1); dump_ast(node.d.assign.rhs, depth + 1); } else if (node.typ == AST_LOCAL_VAR || node.typ == AST_GLOBAL_VAR) { putsln(node.d.variable.name); } else { putsln(node_type_to_string(node.typ)); } } // Defined below. fn type_check_unary(node: Node*, token: Token*): Node*; fn decay_array_to_pointer(node: Node*, token: Token*): Node* { // We can only take the address of an lvalue, so we need to ensure that if (is_lvalue(node.typ) && node.etyp.typ == TYPE_ARRAY) { let address = node_new(AST_ADDROF); address.d.unary = node; address = type_check_unary(address, token); node = address; } return node; } fn type_check_unary(node: Node*, token: Token*): Node* { let old_type = node.d.unary.etyp; if (node.typ != AST_ADDROF && old_type.typ == TYPE_STRUCT) die_loc(here, &token.loc, "Performing invalid unary operation on struct type"); if (node.typ == AST_NOT) { node.etyp = type_new(TYPE_INT); } else if (node.typ == AST_ADDROF) { let ptr = type_new(TYPE_PTR); // The address of an array is a pointer to the first element ptr.ptr = old_type.typ == TYPE_ARRAY ? old_type.ptr : old_type; node.etyp = ptr; } else if (node.typ == AST_DEREF) { if (old_type.typ != TYPE_PTR) die_loc(here, &token.loc, "Cannot dereference non-pointer type"); node.etyp = old_type.ptr; // If the dereferenced type is an array, we need to decay it to a // pointer to the first element. node = decay_array_to_pointer(node, token); } else if (node.typ == AST_NEG) { if (old_type.typ != TYPE_INT) die_loc(here, &token.loc, "Cannot negate non-integer type"); node.etyp = type_new(TYPE_INT); } else { // Default to not changing the type node.etyp = old_type; } return node; } fn type_check_binary(node: Node*, token: Token*): Node* { let lhs = node.d.binary.lhs.etyp; let rhs = node.d.binary.rhs.etyp; if (lhs.typ == TYPE_STRUCT || rhs.typ == TYPE_STRUCT) die_loc(here, &token.loc, "Performing invalid binary operation on struct type"); if (node.typ == AST_PLUS) { if (is_int_type(lhs) && is_int_type(rhs)) { node.etyp = type_new(TYPE_INT); } else if (lhs.typ == TYPE_PTR && is_int_type(rhs)) { node.etyp = lhs; // Pointer arithmetic! if (size_for_type(lhs.ptr) != 1) { let mul = node_new(AST_MUL); mul.d.binary.lhs = node.d.binary.rhs; mul.d.binary.rhs = node_from_int_literal(size_for_type(lhs.ptr)); node.d.binary.rhs = mul; } } else if (is_int_type(lhs) && rhs.typ == TYPE_PTR) { node.etyp = rhs; // Pointer arithmetic! if (size_for_type(rhs.ptr) != 1) { let mul = node_new(AST_MUL); mul.d.binary.lhs = node.d.binary.lhs; mul.d.binary.rhs = node_from_int_literal(size_for_type(rhs.ptr)); node.d.binary.lhs = mul; } } else { die_loc(here, &token.loc, "Cannot add non-integer types"); } } else if (node.typ == AST_MINUS) { if (is_int_type(lhs) && is_int_type(rhs)) { node.etyp = type_new(TYPE_INT); } else if (lhs.typ == TYPE_PTR && is_int_type(rhs)) { node.etyp = lhs; // Pointer arithmetic! if (size_for_type(lhs.ptr) != 1) { let mul = node_new(AST_MUL); mul.d.binary.lhs = node.d.binary.rhs; mul.d.binary.rhs = node_from_int_literal(size_for_type(lhs.ptr)); node.d.binary.rhs = mul; } } else if (is_int_type(lhs) && rhs.typ == TYPE_PTR) { node.etyp = rhs; // Pointer arithmetic! if (size_for_type(rhs.ptr) != 1) { let mul = node_new(AST_MUL); mul.d.binary.lhs = node.d.binary.lhs; mul.d.binary.rhs = node_from_int_literal(size_for_type(rhs.ptr)); node.d.binary.lhs = mul; } } else if (lhs.typ == TYPE_PTR && rhs.typ == TYPE_PTR) { node.etyp = type_new(TYPE_INT); if (!types_equal(lhs.ptr, rhs.ptr)) die_loc(here, &token.loc, "Cannot subtract pointers of different types"); // Pointer arithmetic! (Divide by size of element) if (size_for_type(lhs.ptr) != 1) { let mul = node_new(AST_MUL); mul.d.binary.lhs = node.d.binary.lhs; mul.d.binary.rhs = node_from_int_literal(size_for_type(lhs.ptr)); node.d.binary.lhs = mul; } } else { die_loc(here, &token.loc, "Cannot subtract non-integer types"); } } else if (node.typ == AST_MUL || node.typ == AST_DIV || node.typ == AST_MOD) { if (is_int_type(lhs) && is_int_type(rhs)) { node.etyp = lhs; } else { die_loc2(here, &token.loc, "Cannot do operation non-integer types:", node_type_to_string(node.typ)); } } else { // FIXME: This isn't very correct, but it's probably good enough for now node.etyp = type_new(TYPE_INT); } return node; } // FIXME: These should be in `types.cup` ideally, but `Variable` is not defined // there and we can't forward-declare types. fn compound_push_field(compound: Type*, name: char*, typ: Type*): int { if (compound.typ != TYPE_STRUCT && compound.typ != TYPE_UNION) die("compound_push_field: not a compound type"); let is_union = compound.typ == TYPE_UNION; let field_size = size_for_type(typ); let offset_factor = min(field_size, 8); let offset = is_union ? 0 : align_up(compound.size, offset_factor); compound.size = is_union ? max(field_size, compound.size) : offset + field_size; vector_push(compound.fields, variable_new(name, typ, offset)); return offset; } fn compound_find_field(typ: Type*, name: char*): Variable* { for (let i = 0; i < typ.fields.size; ++i) { let field: Variable* = typ.fields.data[i]; if (streq(field.name, name)) { return field; } } return null; }