import "compiler/ast.cup" import "compiler/lexer.cup" // p_ prefix for parser global variables. let p_all_functions = vector_new(); let p_current_function: Node* = null; let p_block_stack = vector_new(); let p_cur_stack_offset = 0; let p_global_variables = vector_new(); let p_global_offset = 0; let p_lexer_stack = vector_new(); let p_constant_stack = vector_new(); let p_compound_type_stack = vector_new(); fn block_stack_push(block: Node*) { vector_push(p_block_stack, block); } fn block_stack_pop() { let block: Node* = vector_pop(p_block_stack); p_cur_stack_offset = p_cur_stack_offset - block.d.block.locals_size; } fn constant_push(name: char*, value: int) { let node = node_new(AST_CONSTANT); node.d.constant.name = name; node.d.constant.value = node_from_int_literal(value); vector_push(p_constant_stack, node); } fn find_local_variable(token: Token*): Variable* { if (p_current_function == null) return null; for (let i = p_block_stack.size - 1; i >= 0; --i) { let block: Node* = p_block_stack.data[i]; for (let j = 0; j < block.d.block.locals.size; ++j) { let var: Variable* = block.d.block.locals.data[j]; if (streq(token.value.as_string, var.name)) { return var; } } } let args = p_current_function.d.func.args; for (let i = 0; i < args.size; ++i) { let var: Variable* = args.data[i]; if (streq(token.value.as_string, var.name)) { return var; } } return null; } fn find_compound_type(token: Token*): Type* { for (let i = p_compound_type_stack.size - 1; i >= 0; --i) { let typ: Type* = p_compound_type_stack.data[i]; if (streq(token.value.as_string, typ.struct_name)) { return typ; } } return null; } fn find_global_variable(token: Token*): Variable* { for (let i = 0; i < p_global_variables.size; ++i) { let var: Variable* = p_global_variables.data[i]; if (streq(token.value.as_string, var.name)) { return var; } } return null; } fn find_function_definition(token: Token*): Node* { for (let i = 0; i < p_all_functions.size; ++i) { let func: Node* = p_all_functions.data[i]; if (streq(token.value.as_string, func.d.func.name)) { return func; } } return null; } fn find_constant(token: Token*): Node* { for (let i = 0; i < p_constant_stack.size; ++i) { let constant: Node* = p_constant_stack.data[i]; if (streq(token.value.as_string, constant.d.constant.name)) { return constant; } } return null; } fn identifier_exists(token: Token*): int { if (find_local_variable(token)) return true; if (find_global_variable(token)) return true; if (find_function_definition(token)) return true; if (find_builtin_function(token)) return true; if (find_compound_type(token)) return true; if (find_constant(token)) return true; return false; } fn eval_constexp(node: Node*): int { if (node.typ == AST_LITERAL) { if (node.etyp.typ != TYPE_INT) die("Constant expressions can only contain integer literals/constants."); return node.d.literal.as_int; } else if (is_binary_op(node.typ)) { let left = eval_constexp(node.d.binary.lhs); let right = eval_constexp(node.d.binary.rhs); if (node.typ == AST_PLUS) return left + right; else if (node.typ == AST_MINUS) return left - right; else if (node.typ == AST_BWOR) return left | right; else if (node.typ == AST_BWAND) return left & right; else if (node.typ == AST_XOR) return left ^ right; else if (node.typ == AST_MUL) return left * right; else if (node.typ == AST_DIV) return left / right; else if (node.typ == AST_MOD) return left % right; else if (node.typ == AST_MUL) return left * right; else if (node.typ == AST_MUL) return left * right; else { die("Unsupported binary operator in constant expression."); } } else if (node.typ == AST_NEG) return -eval_constexp(node.d.unary); else if (node.typ == AST_BWINV) return ~eval_constexp(node.d.unary); else if (node.typ == AST_NOT) return !eval_constexp(node.d.unary); else { die("Unsupported constant expression."); } return 0; } fn parse_expression(lexer: Lexer*): Node*; fn parse_constant_declaration(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_CONST); lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); if (identifier_exists(&token)) die_loc2(here, &token.loc, "Identifier already exists: ", token.value.as_string); let constant_name = token.value.as_string; lexer_peek(lexer, &token); // All constants are implicitly `int`, but we'll allow it for consistency if (token.typ == TOKEN_COLON) { lexer_next(lexer, &token); if (token.typ != TOKEN_INT) die_loc(here, &token.loc, "Expected 'int' type for constant"); lexer_peek(lexer, &token); } lexer_next_assert(lexer, &token, TOKEN_ASSIGN); let expr = parse_expression(lexer); let value = eval_constexp(expr); constant_push(constant_name, value); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); } fn parse_literal(lexer: Lexer*): Node* { let token: Token; lexer_next(lexer, &token); let node = node_new(AST_LITERAL); if (token.typ == TOKEN_INTLIT) { node.d.literal.as_int = token.value.as_int; node.etyp = type_new(TYPE_INT); } else if (token.typ == TOKEN_STRINGLIT) { node.d.literal.as_string = token.value.as_string; node.etyp = type_new_ptr(TYPE_CHAR); } else if (token.typ == TOKEN_CHARLIT) { node.d.literal.as_char = token.value.as_char; node.etyp = type_new(TYPE_CHAR); } else { die_loc2(here, &token.loc, "Unexpected token in parse_literal: ", token_type_to_string(token.typ)); } return node; } fn parse_type(lexer: Lexer*): Type* { let token: Token; let typ: Type *; lexer_peek(lexer, &token); if (token.typ == TOKEN_INT) { lexer_next(lexer, &token); typ = type_new(TYPE_INT); } else if (token.typ == TOKEN_CHAR) { lexer_next(lexer, &token); typ = type_new(TYPE_CHAR); } else if (token.typ == TOKEN_VOID) { lexer_next(lexer, &token); typ = type_new(TYPE_VOID); } else { lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); typ = find_compound_type(&token); if (!typ) die_loc2(here, &token.loc, "Unknown token in parse_type: ", token.value.as_string); } let running = true; while (running) { lexer_peek(lexer, &token); if (token.typ == TOKEN_STAR) { lexer_next(lexer, &token); let ptr = type_new(TYPE_PTR); ptr.ptr = typ; typ = ptr; } else if (token.typ == TOKEN_OPEN_BRACKET) { lexer_next(lexer, &token); let arr = type_new(TYPE_ARRAY); arr.ptr = typ; lexer_next(lexer, &token); if (token.typ == TOKEN_INTLIT) { arr.array_size = token.value.as_int; } else if (token.typ == TOKEN_IDENTIFIER) { let constant = find_constant(&token); if (!constant) die_loc2(here, &token.loc, "Could not find constant: ", token.value.as_string); // FIXME: This is a complete mess. let value = constant.d.constant.value; arr.array_size = value.d.literal.as_int; } else { die_loc(here, &token.loc, "Expected a constant expression for array size"); } lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACKET); typ = arr; } else { running = false; } } return typ; } fn parse_function_call_args(lexer: Lexer*, func: Node*): Node* { let token: Token; let node = node_new(AST_FUNCCALL); node.d.call.func = func; node.d.call.args = vector_new(); node.etyp = func.etyp; lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); lexer_peek(lexer, &token); while (token.typ != TOKEN_CLOSE_PAREN) { // TODO: Check types let arg = parse_expression(lexer); vector_push(node.d.call.args, arg); lexer_peek(lexer, &token); if (token.typ == TOKEN_COMMA) { lexer_next(lexer, &token); lexer_peek(lexer, &token); } } lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); if (node.d.call.args.size != func.d.func.args.size) die_loc2(here, &token.loc, "Function call argument count mismatch: ", func.d.func.name); return node; } fn parse_identifier(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); let node: Node*; let var = find_local_variable(&token); if (var != null) { node = node_new(AST_LOCAL_VAR); node.d.variable = var; node.etyp = var.typ; return decay_array_to_pointer(node, &token); } var = find_global_variable(&token); if (var != null) { node = node_new(AST_GLOBAL_VAR); node.d.variable = var; node.etyp = var.typ; return decay_array_to_pointer(node, &token); } let func = find_function_definition(&token); if (func != null) { return parse_function_call_args(lexer, func); } func = find_builtin_function(&token); if (func != null) { return parse_function_call_args(lexer, func); } let constant = find_constant(&token); if (constant != null) { // TODO: Make sure this is an integer return constant.d.constant.value; } die_loc2(here, &token.loc, "Unknown identifier in parse_identifier: ", token.value.as_string); return null; } fn parse_factor(lexer: Lexer*): Node* { let token: Token; let expr: Node*; lexer_peek(lexer, &token); if (token.typ == TOKEN_MINUS) { lexer_next(lexer, &token); expr = node_new(AST_NEG); expr.d.unary = parse_factor(lexer); expr = type_check_unary(expr, &token); } else if (token.typ == TOKEN_TILDE) { lexer_next(lexer, &token); expr = node_new(AST_BWINV); expr.d.unary = parse_factor(lexer); expr = type_check_unary(expr, &token); } else if (token.typ == TOKEN_PLUSPLUS) { lexer_next(lexer, &token); expr = node_new(AST_ASSIGN); expr.d.assign.lhs = parse_factor(lexer); if (!is_lvalue(expr.d.assign.lhs.typ)) die_loc(here, &token.loc, "Cannot increment non-lvalue"); let plus = node_new(AST_PLUS); plus.d.binary.lhs = expr.d.assign.lhs; plus.d.binary.rhs = node_from_int_literal(1); expr.d.assign.rhs = type_check_binary(plus, &token); expr.etyp = expr.d.assign.lhs.etyp; // --x is changed to (x = x - 1) } else if (token.typ == TOKEN_MINUSMINUS) { lexer_next(lexer, &token); expr = node_new(AST_ASSIGN); expr.d.assign.lhs = parse_factor(lexer); if (!is_lvalue(expr.d.assign.lhs.typ)) die_loc(here, &token.loc, "Cannot decrement non-lvalue"); let minus = node_new(AST_MINUS); minus.d.binary.lhs = expr.d.assign.lhs; minus.d.binary.rhs = node_from_int_literal(1); expr.d.assign.rhs = type_check_binary(minus, &token); expr.etyp = expr.d.assign.lhs.etyp; // FIXME: This should probably go somewhere else more appropriate } else if (token.typ == TOKEN_SIZEOF) { lexer_next(lexer, &token); lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); let typ = parse_type(lexer); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); expr = node_from_int_literal(size_for_type(typ)); } else if (token.typ == TOKEN_EXCLAMATION) { lexer_next(lexer, &token); expr = node_new(AST_NOT); expr.d.unary = parse_factor(lexer); expr = type_check_unary(expr, &token); } else if (token.typ == TOKEN_OPEN_PAREN) { lexer_next(lexer, &token); expr = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); } else if (is_literal_token(token.typ)) { expr = parse_literal(lexer); } else if (token.typ == TOKEN_IDENTIFIER) { expr = parse_identifier(lexer); } else if (token.typ == TOKEN_AMPERSAND) { lexer_next(lexer, &token); expr = node_new(AST_ADDROF); expr.d.unary = parse_factor(lexer); if (!is_lvalue(expr.d.unary.typ)) die_loc(here, &token.loc, "Cannot take address of non-lvalue"); expr = type_check_unary(expr, &token); } else if (token.typ == TOKEN_STAR) { lexer_next(lexer, &token); let subexp = parse_factor(lexer); if (subexp.etyp.typ != TYPE_PTR) die_loc(here, &token.loc, "Cannot dereference non-pointer type"); expr = node_new(AST_DEREF); expr.d.unary = subexp; expr = type_check_unary(expr, &token); } else { die_loc2(here, &token.loc, ": Unexpected token found in parse_factor: ", token_type_to_string(token.typ)); } let running = true; while (running) { lexer_peek(lexer, &token); if (token.typ == TOKEN_OPEN_BRACKET) { if (expr.etyp.typ != TYPE_PTR) die_loc(here, &token.loc, "Cannot index non-pointer/array type"); lexer_next(lexer, &token); let index = parse_expression(lexer); let offset = node_new(AST_PLUS); offset.d.binary.lhs = expr; offset.d.binary.rhs = index; offset = type_check_binary(offset, &token); expr = node_new(AST_DEREF); expr.d.unary = offset; expr = type_check_unary(expr, &token); lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACKET); } else if (token.typ == TOKEN_DOT) { lexer_next_assert(lexer, &token, TOKEN_DOT); if (!is_struct_or_structptr(expr.etyp)) { putsln(create_type_string(expr.etyp)); die_loc(here, &token.loc, "Cannot access member of non-struct type"); } let is_ptr = expr.etyp.typ == TYPE_PTR; let struct_type = is_ptr ? expr.etyp.ptr : expr.etyp; lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); let name = token.value.as_string; let field = compound_find_field(struct_type, name); if (field == null) { puts("Struct type: "); putsln(create_type_string(struct_type)); puts("Field name: "); putsln(name); die_loc(here, &token.loc, "Invalid field name for struct"); } let member = node_new(AST_MEMBER); member.etyp = field.typ; member.d.member.obj = expr; member.d.member.offset = field.offset; member.d.member.is_ptr = (expr.etyp.typ == TYPE_PTR); expr = decay_array_to_pointer(member, &token); } else { running = false; } } return expr; } // This is absolutely terrible, but I'm not sure how to do it better without macros... fn parse_term(lexer: Lexer*): Node* { let token: Token; let lhs = parse_factor(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_STAR || token.typ == TOKEN_SLASH || token.typ == TOKEN_PERCENT) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_factor(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = type_check_binary(op, &token); lexer_peek(lexer, &token); } return lhs; } fn parse_additive(lexer: Lexer*): Node* { let token: Token; let lhs = parse_term(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_PLUS || token.typ == TOKEN_MINUS) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_term(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = type_check_binary(op, &token); lexer_peek(lexer, &token); } return lhs; } fn parse_relational(lexer: Lexer*): Node* { let token: Token; let lhs = parse_additive(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_LT || token.typ == TOKEN_LEQ || token.typ == TOKEN_GT || token.typ == TOKEN_GEQ) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_additive(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = type_check_binary(op, &token); lexer_peek(lexer, &token); } return lhs; } fn parse_equality(lexer: Lexer*): Node* { let token: Token; let lhs = parse_relational(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_EQ || token.typ == TOKEN_NEQ) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_relational(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = type_check_binary(op, &token); lexer_peek(lexer, &token); } return lhs; } fn parse_and(lexer: Lexer*): Node* { let token: Token; let lhs = parse_equality(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_AMPERSAND) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_equality(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = type_check_binary(op, &token); lexer_peek(lexer, &token); } return lhs; } fn parse_exclusive_or(lexer: Lexer*): Node* { let token: Token; let lhs = parse_and(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_CARET) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_and(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = type_check_binary(op, &token); lexer_peek(lexer, &token); } return lhs; } fn parse_inclusive_or(lexer: Lexer*): Node* { let token: Token; let lhs = parse_exclusive_or(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_BAR) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_exclusive_or(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = type_check_binary(op, &token); lexer_peek(lexer, &token); } return lhs; } fn parse_logical_and(lexer: Lexer*): Node* { let token: Token; let lhs = parse_inclusive_or(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_AND) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_inclusive_or(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = type_check_binary(op, &token); lexer_peek(lexer, &token); } return lhs; } fn parse_logical_or(lexer: Lexer*): Node* { let token: Token; let lhs = parse_logical_and(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_OR) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_logical_and(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = type_check_binary(op, &token); lexer_peek(lexer, &token); } return lhs; } fn parse_conditional_exp(lexer: Lexer*): Node* { let token: Token; let lhs = parse_logical_or(lexer); lexer_peek(lexer, &token); if (token.typ == TOKEN_QUESTION) { lexer_next(lexer, &token); let then = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_COLON); let els = parse_expression(lexer); let cond = node_new(AST_CONDITIONAL); cond.d.conditional.cond = lhs; cond.d.conditional.then = then; cond.d.conditional.els = els; if (!types_equal(then.etyp, els.etyp)) die_loc(here, &token.loc, "THEN and ELSE branches of conditional expression have different types"); lhs = cond; lhs.etyp = then.etyp; } return lhs; } fn parse_expression(lexer: Lexer*): Node* { let lhs = parse_conditional_exp(lexer); if (is_lvalue(lhs.typ)) { let token: Token; lexer_peek(lexer, &token); if (token.typ == TOKEN_ASSIGN) { lexer_next(lexer, &token); let node = node_new(AST_ASSIGN); node.d.assign.lhs = lhs; node.d.assign.rhs = parse_expression(lexer); lhs = node; } } return lhs; } fn add_variable_to_current_block(var: Variable*) { // Set offset for variable let cur_block: Node* = vector_top(p_block_stack); let var_size = align_up(size_for_type(var.typ), 8); // Add to the block // FIXME: Use a map here vector_push(cur_block.d.block.locals, var); assert(p_current_function != null); // Update current stack offset (w.r.t function stack frame) and block size p_cur_stack_offset = p_cur_stack_offset + var_size; cur_block.d.block.locals_size = cur_block.d.block.locals_size + var_size; var.offset = p_cur_stack_offset; // Update function's max locals size let max_offset = max(p_current_function.d.func.max_locals_size, p_cur_stack_offset); p_current_function.d.func.max_locals_size = max_offset; } fn add_global_variable(var: Variable*) { var.offset = p_global_offset; let var_size = align_up(size_for_type(var.typ), 8); p_global_offset = p_global_offset + var_size; vector_push(p_global_variables, var); } fn parse_var_declaration(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_LET); lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); if (identifier_exists(&token)) die_loc2(here, &token.loc, "Identifier already defined: %s", token.value.as_string); let is_global = (p_current_function == null); // TODO: check if identifier is already defined let node = node_new(AST_VARDECL); let decl = &node.d.var_decl; decl.var.name = token.value.as_string; lexer_peek(lexer, &token); let missing_type = true; if (token.typ == TOKEN_COLON) { lexer_next(lexer, &token); decl.var.typ = parse_type(lexer); missing_type = false; lexer_peek(lexer, &token); } if (token.typ == TOKEN_ASSIGN) { lexer_next(lexer, &token); decl.init = parse_expression(lexer); if (missing_type) { decl.var.typ = decl.init.etyp; } else if (!is_convertible(decl.var.typ, decl.init.etyp)) { puts("- Variable type: "); putsln(create_type_string(decl.var.typ)); puts("- Value type: "); putsln(create_type_string(decl.init.etyp)); die_loc2(here, &token.loc, "Type mismatch for variable declaration: ", decl.var.name); } node.etyp = decl.init.etyp; } else if (missing_type) { die_loc(here, &token.loc, "Expected ':' or '=' after variable declaration"); } if (is_global) { add_global_variable(&decl.var); } else { add_variable_to_current_block(&decl.var); } if (0) { putsln("------------------"); location_print(&token.loc); putsln(" : Variable declared"); puts("Name: "); putsln(decl.var.name); puts("Type: "); putsln(create_type_string(decl.var.typ)); puts("Offset: "); putu(abs(decl.var.offset)); putsln("\n------------------"); } return node; } fn parse_function_params(lexer: Lexer*, func: Node*) { let token: Token; lexer_peek(lexer, &token); func.d.func.args = vector_new(); // TODO: Actually parse params while (token.typ != TOKEN_CLOSE_PAREN) { lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); let name = token.value.as_string; if (identifier_exists(&token)) die_loc2(here, &token.loc, "Identifier already defined: ", name); lexer_next_assert(lexer, &token, TOKEN_COLON); let typ = parse_type(lexer); let var: Variable* = malloc(sizeof(Variable)); var.name = name; var.typ = typ; vector_push(func.d.func.args, var); lexer_peek(lexer, &token); if (token.typ == TOKEN_COMMA) { lexer_next(lexer, &token); lexer_peek(lexer, &token); } } // Set the offsets for the arguments // IMPORTANT: We want to skip the saved ret_addr+old_rbp that we // pushed on the stack. Each of these is 8 bytes. let offset = -16; let n = func.d.func.args.size; for (let i = 0; i < n; ++i) { let var: Variable* = func.d.func.args.data[i]; var.offset = offset; // TODO: (Here and other uses of `size_for_type`): // Should we only align to max(8, type.size) instead? let var_size = align_up(size_for_type(var.typ), 8); offset = offset - var_size; } } fn parse_block(lexer: Lexer*): Node*; fn parse_statement(lexer: Lexer*): Node*; fn parse_for_loop(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_FOR); let looop = node_new(AST_FOR); lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); // NOTE: We're going to put the for loop in it's own block // so that any declarations in the init of the loop // can only be referenced within the loop. let node = node_new(AST_BLOCK); node.d.block.children = vector_new_sized(1); node.d.block.locals = vector_new_sized(1); block_add_child(node, looop); block_stack_push(node); // All of the expressions in the for loop are optional lexer_peek(lexer, &token); if (token.typ == TOKEN_LET) looop.d.looop.init = parse_var_declaration(lexer); else if (token.typ != TOKEN_SEMICOLON) looop.d.looop.init = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); lexer_peek(lexer, &token); if (token.typ != TOKEN_SEMICOLON) looop.d.looop.cond = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); lexer_peek(lexer, &token); if (token.typ != TOKEN_CLOSE_PAREN) looop.d.looop.step = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); looop.d.looop.body = parse_statement(lexer); block_stack_pop(); return node; } fn parse_statement(lexer: Lexer*): Node* { let node: Node*; let token: Token; lexer_peek(lexer, &token); if (token.typ == TOKEN_OPEN_BRACE) { node = parse_block(lexer); } else if (token.typ == TOKEN_RETURN) { lexer_next(lexer, &token); node = node_new(AST_RETURN); lexer_peek(lexer, &token); if (token.typ != TOKEN_SEMICOLON) { node.d.unary = parse_expression(lexer); } else { node.d.unary = null; // empty return statment } lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); } else if (token.typ == TOKEN_IF) { lexer_next(lexer, &token); node = node_new(AST_IF); lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); node.d.conditional.cond = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); node.d.conditional.then = parse_statement(lexer); lexer_peek(lexer, &token); if (token.typ == TOKEN_ELSE) { lexer_next(lexer, &token); node.d.conditional.els = parse_statement(lexer); } } else if (token.typ == TOKEN_WHILE) { lexer_next(lexer, &token); node = node_new(AST_WHILE); lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); node.d.looop.cond = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); node.d.looop.body = parse_statement(lexer); } else if (token.typ == TOKEN_FOR) { node = parse_for_loop(lexer); } else if (token.typ == TOKEN_DEFER) { die_loc(here, &token.loc, "defer is not implemented yet"); } else if (token.typ == TOKEN_LET) { node = parse_var_declaration(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); } else { // Default to expression statement node = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); } return node; } fn parse_block(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE); let block = node_new(AST_BLOCK); block.d.block.children = vector_new(); block.d.block.locals = vector_new(); block_stack_push(block); lexer_peek(lexer, &token); while (token.typ != TOKEN_CLOSE_BRACE) { block_add_child(block, parse_statement(lexer)); lexer_peek(lexer, &token); } lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE); block_stack_pop(); return block; } // FIXME: Make this a real type fn parse_enum_declaration(lexer: Lexer*) { let token: Token; // TODO: This is all a hack to automatically number // Some constants. It does not behave like a type, // and cannot be used as one. Fix this in the future. lexer_next_assert(lexer, &token, TOKEN_ENUM); lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // Use this! lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE); let enum_count = 0; lexer_peek(lexer, &token); while (token.typ != TOKEN_CLOSE_BRACE) { lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); if (identifier_exists(&token)) die_loc(here, &token.loc, "Identifier already exists, enums just behave like numbered constants."); constant_push(token.value.as_string, enum_count); ++enum_count; lexer_peek(lexer, &token); if (token.typ == TOKEN_COMMA) { lexer_next(lexer, &token); lexer_peek(lexer, &token); } else if (token.typ != TOKEN_CLOSE_BRACE) { die_loc(here, &token.loc, "Expected a comma or a closing brace."); } } lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE); } // FIXME: This should just be part of `parse_type()`, and we should be allowed // to parse a type without a name. Probably also need to handle converstions // between structs with similar embedded types. fn parse_struct_union_declaration(lexer: Lexer*, top_level: int): Type* { let token: Token; lexer_next(lexer, &token); if (token.typ != TOKEN_STRUCT && token.typ != TOKEN_UNION) die_loc(here, &token.loc, "Expected STRUCT or UNION in parse_struct_union_declaration"); let compound = type_new(token.typ == TOKEN_STRUCT ? TYPE_STRUCT : TYPE_UNION); compound.fields = vector_new(); lexer_peek(lexer, &token); // For nested temporary structs we don't need a name if (token.typ != TOKEN_IDENTIFIER && top_level) die_loc(here, &token.loc, "You need to specify a name for the struct defined globally."); // But if they do provide one, we'll add it to the list of defined structs so they // it can referenced internally. if (token.typ == TOKEN_IDENTIFIER) { compound.struct_name = token.value.as_string; vector_push(p_compound_type_stack, compound); lexer_next(lexer, &token); } else { compound.struct_name = ""; } lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE); lexer_peek(lexer, &token); while (token.typ != TOKEN_CLOSE_BRACE) { // TODO: Allow no-name fields lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); let name = token.value.as_string; lexer_next_assert(lexer, &token, TOKEN_COLON); lexer_peek(lexer, &token); // We want to allow nested temporary structs. let typ: Type*; if (token.typ == TOKEN_STRUCT || token.typ == TOKEN_UNION) { // Nested structs live in their own "namespace", can't be accessed // from outside, so we will pop them off the stack once done. let prev_compound_count = p_compound_type_stack.size; typ = parse_struct_union_declaration(lexer, false); p_compound_type_stack.size = prev_compound_count; } else { typ = parse_type(lexer); } compound_push_field(compound, name, typ); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); lexer_peek(lexer, &token); } lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE); // printf("Defined %s: %s, size: %lld\n", // compound.type == TYPE_UNION ? "union":"struct", // compound.struct_name, // compound.fields.size // ); // for (int i = 0; i < compound.fields.num_fields; i++) { // printf("\t%s: %s (offset: %lld, size: %lld)\n", // compound.fields.name[i], // type_to_str(compound.fields.typ[i]), // compound.fields.offset[i], // size_for_type(compound.fields.typ[i]) // ); // } return compound; } fn parse_function(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_FN); lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // TODO: Check if identifier exists let func = node_new(AST_FUNC); let dfunc = func; func.d.func.name = token.value.as_string; // If the identifier exists, there's 3 possible cases: // 1. It's another variable / struct, which is an error. // 2. It's a function that's been defined, which is an error. // 3. It's a function that's been declared (but not defined), which is OK if (identifier_exists(&token)) { dfunc = find_function_definition(&token); // Case 1 if (dfunc == null) die_loc(here, &token.loc, "Function name already exists as an identifier"); // Case 2 if (dfunc.d.func.is_defined) die_loc(here, &token.loc, "Function already defined earlier"); // Case 3 (No error, just set the current function correctly) p_current_function = func; } else { // We don't have a declaration yet, push this. vector_push(p_all_functions, func); p_current_function = func; } lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); parse_function_params(lexer, func); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); lexer_peek(lexer, &token); if (token.typ == TOKEN_COLON) { lexer_next(lexer, &token); func.etyp = parse_type(lexer); } else { func.etyp = type_new(TYPE_VOID); } lexer_peek(lexer, &token); if (token.typ == TOKEN_OPEN_BRACE) { func.d.func.body = parse_block(lexer); func.d.func.is_defined = true; } else { func.d.func.is_defined = false; } p_current_function = null; return func; } let p_opened_files = vector_new(); fn parser_open_new_file(path: char*) { for (let i = 0; i < p_lexer_stack.size; i = i + 1) { let lex: Lexer* = p_lexer_stack.data[i]; if (streq(lex.filename, path)) { puts("Found a circular import dependency in: "); puts(path); putsln(": Exiting."); exit(1); } } for (let i = 0; i < p_opened_files.size; i = i + 1) { if (streq(p_opened_files.data[i], path)) { // Already opened this file, ignore return; } } vector_push(p_opened_files, path); vector_push(p_lexer_stack, lexer_new_open_file(path)); } fn parse_program(lexer: Lexer*): Node* { initialize_builtins(); let node = node_new(AST_PROGRAM); node.d.block.children = vector_new(); let token: Token; lexer_peek(lexer, &token); vector_push(p_lexer_stack, lexer); while (token.typ != TOKEN_EOF) { if (token.typ == TOKEN_FN) { block_add_child(node, parse_function(lexer)); } else if (token.typ == TOKEN_LET) { block_add_child(node, parse_var_declaration(lexer)); } else if (token.typ == TOKEN_SEMICOLON) { lexer_next(lexer, &token); } else if (token.typ == TOKEN_CONST) { parse_constant_declaration(lexer); } else if (token.typ == TOKEN_IMPORT) { lexer_next(lexer, &token); lexer_next_assert(lexer, &token, TOKEN_STRINGLIT); let path = token.value.as_string; parser_open_new_file(path); lexer = vector_top(p_lexer_stack); } else if (token.typ == TOKEN_STRUCT || token.typ == TOKEN_UNION) { parse_struct_union_declaration(lexer, true); } else if (token.typ == TOKEN_ENUM) { parse_enum_declaration(lexer); } else { die_loc2(here, &token.loc, "unexpected token in parse_program: ", token_type_to_string(token.typ)); } lexer_peek(lexer, &token); while (token.typ == TOKEN_EOF && p_lexer_stack.size > 1) { vector_pop(p_lexer_stack); lexer = vector_top(p_lexer_stack); lexer_peek(lexer, &token); } } return node; }