import "compiler/ast.cup" import "compiler/lexer.cup" // p_ prefix for parser global variables. let p_all_functions = vector_new(); let p_current_function: Node* = null; let p_block_stack = vector_new(); let p_cur_stack_offset = 0; fn block_stack_push(block: Node*) { vector_push(p_block_stack, block); } fn block_stack_pop() { let block: Node* = vector_pop(p_block_stack); p_cur_stack_offset = p_cur_stack_offset - block.d.block.locals_size; } fn find_local_variable(token: Token*): Variable* { if (p_current_function == null) return null; for (let i = p_block_stack.size - 1; i >= 0; --i) { let block: Node* = p_block_stack.data[i]; for (let j = 0; j < block.d.block.locals.size; ++j) { let var: Variable* = block.d.block.locals.data[j]; if (streq(token.value.as_string, var.name)) { return var; } } } let args = p_current_function.d.func.args; for (let i = 0; i < args.size; ++i) { let var: Variable* = args.data[i]; if (streq(token.value.as_string, var.name)) { return var; } } return null; } fn find_function_definition(token: Token*): Node* { for (let i = 0; i < p_all_functions.size; ++i) { let func: Node* = p_all_functions.data[i]; if (streq(token.value.as_string, func.d.func.name)) { return func; } } return null; } fn identifier_exists(token: Token*): int { if (find_local_variable(token)) return true; if (find_function_definition(token)) return true; if (find_builtin_function(token)) return true; return false; } fn parse_literal(lexer: Lexer*): Node* { let token: Token; lexer_next(lexer, &token); let node = node_new(AST_LITERAL); if (token.typ == TOKEN_INTLIT) { node.d.literal.as_int = token.value.as_int; node.etyp = type_new(TYPE_INT); } else if (token.typ == TOKEN_STRINGLIT) { node.d.literal.as_string = token.value.as_string; node.etyp = type_new_ptr(TYPE_CHAR); } else if (token.typ == TOKEN_CHARLIT) { node.d.literal.as_char = token.value.as_char; node.etyp = type_new(TYPE_CHAR); } else { die_loc2(&token.loc, "Unexpected token in parse_literal: ", token_type_to_string(token.typ)); } return node; } fn parse_type(lexer: Lexer*): Type* { let token: Token; let typ: Type *; lexer_peek(lexer, &token); if (token.typ == TOKEN_INT) { lexer_next(lexer, &token); typ = type_new(TYPE_INT); } else if (token.typ == TOKEN_CHAR) { lexer_next(lexer, &token); typ = type_new(TYPE_CHAR); } else if (token.typ == TOKEN_VOID) { lexer_next(lexer, &token); typ = type_new(TYPE_VOID); } let running = true; while (running) { lexer_peek(lexer, &token); if (token.typ == TOKEN_STAR) { lexer_next(lexer, &token); let ptr = type_new(TYPE_PTR); ptr.ptr = typ; typ = ptr; } else if (token.typ == TOKEN_OPEN_BRACKET) { die("Array types not yet implemented"); } else { running = false; } } return typ; } fn parse_expression(lexer: Lexer*): Node*; fn parse_function_call_args(lexer: Lexer*, func: Node*): Node* { let token: Token; let node = node_new(AST_FUNCCALL); node.d.call.func = func; node.d.call.args = vector_new(); lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); lexer_peek(lexer, &token); while (token.typ != TOKEN_CLOSE_PAREN) { // TODO: Check types let arg = parse_expression(lexer); vector_push(node.d.call.args, arg); lexer_peek(lexer, &token); if (token.typ == TOKEN_COMMA) { lexer_next(lexer, &token); lexer_peek(lexer, &token); } } lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); if (node.d.call.args.size != func.d.func.args.size) die_loc2(&token.loc, "Function call argument count mismatch: ", func.d.func.name); return node; } fn parse_identifier(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); let node: Node*; let var = find_local_variable(&token); if (var != null) { node = node_new(AST_LOCAL_VAR); node.d.variable = var; node.etyp = var.typ; return node; } let func = find_function_definition(&token); if (func != null) { return parse_function_call_args(lexer, func); } func = find_builtin_function(&token); if (func != null) { return parse_function_call_args(lexer, func); } die_loc2(&token.loc, "Unknown identifier in parse_identifier: ", token.value.as_string); } fn parse_factor(lexer: Lexer*): Node* { let token: Token; let expr: Node*; lexer_peek(lexer, &token); if (token.typ == TOKEN_MINUS) { lexer_next(lexer, &token); expr = node_new(AST_NEG); expr.d.unary = parse_factor(lexer); } else if (token.typ == TOKEN_TILDE) { lexer_next(lexer, &token); expr = node_new(AST_BWINV); expr.d.unary = parse_factor(lexer); } else if (token.typ == TOKEN_EXCLAMATION) { lexer_next(lexer, &token); expr = node_new(AST_NOT); expr.d.unary = parse_factor(lexer); } else if (token.typ == TOKEN_OPEN_PAREN) { lexer_next(lexer, &token); expr = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); } else if (is_literal_token(token.typ)) { expr = parse_literal(lexer); } else if (token.typ == TOKEN_IDENTIFIER) { expr = parse_identifier(lexer); } else { die_loc2(&token.loc, ": Unexpected token found in parse_factor: ", token_type_to_string(token.typ)); } return expr; } // This is absolutely terrible, but I'm not sure how to do it better without macros... fn parse_term(lexer: Lexer*): Node* { let token: Token; let lhs = parse_factor(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_STAR || token.typ == TOKEN_SLASH || token.typ == TOKEN_PERCENT) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_factor(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = op; lexer_peek(lexer, &token); } return lhs; } fn parse_additive(lexer: Lexer*): Node* { let token: Token; let lhs = parse_term(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_PLUS || token.typ == TOKEN_MINUS) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_term(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = op; lexer_peek(lexer, &token); } return lhs; } fn parse_relational(lexer: Lexer*): Node* { let token: Token; let lhs = parse_additive(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_LT || token.typ == TOKEN_LEQ || token.typ == TOKEN_GT || token.typ == TOKEN_GEQ) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_additive(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = op; lexer_peek(lexer, &token); } return lhs; } fn parse_equality(lexer: Lexer*): Node* { let token: Token; let lhs = parse_relational(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_EQ || token.typ == TOKEN_NEQ) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_relational(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = op; lexer_peek(lexer, &token); } return lhs; } fn parse_and(lexer: Lexer*): Node* { let token: Token; let lhs = parse_equality(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_AMPERSAND) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_equality(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = op; lexer_peek(lexer, &token); } return lhs; } fn parse_exclusive_or(lexer: Lexer*): Node* { let token: Token; let lhs = parse_and(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_CARET) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_and(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = op; lexer_peek(lexer, &token); } return lhs; } fn parse_inclusive_or(lexer: Lexer*): Node* { let token: Token; let lhs = parse_exclusive_or(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_BAR) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_exclusive_or(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = op; lexer_peek(lexer, &token); } return lhs; } fn parse_logical_and(lexer: Lexer*): Node* { let token: Token; let lhs = parse_inclusive_or(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_AND) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_inclusive_or(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = op; lexer_peek(lexer, &token); } return lhs; } fn parse_logical_or(lexer: Lexer*): Node* { let token: Token; let lhs = parse_logical_and(lexer); lexer_peek(lexer, &token); while (token.typ == TOKEN_OR) { lexer_next(lexer, &token); let op = node_new(binary_token_to_op(token.typ)); let rhs = parse_logical_and(lexer); op.d.binary.lhs = lhs; op.d.binary.rhs = rhs; lhs = op; lexer_peek(lexer, &token); } return lhs; } fn parse_conditional_exp(lexer: Lexer*): Node* { let token: Token; let lhs = parse_logical_or(lexer); lexer_peek(lexer, &token); if (token.typ == TOKEN_QUESTION) { lexer_next(lexer, &token); let then = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_COLON); let els = parse_expression(lexer); let cond = node_new(AST_CONDITIONAL); cond.d.conditional.cond = lhs; cond.d.conditional.then = then; cond.d.conditional.els = els; lhs = cond; } return lhs; } fn parse_expression(lexer: Lexer*): Node* { let lhs = parse_conditional_exp(lexer); if (is_lvalue(lhs.typ)) { let token: Token; lexer_peek(lexer, &token); if (token.typ == TOKEN_ASSIGN) { lexer_next(lexer, &token); let node = node_new(AST_ASSIGN); node.d.assign.lhs = lhs; node.d.assign.rhs = parse_expression(lexer); lhs = node; } } return lhs; } fn add_variable_to_current_block(var: Variable*) { // Set offset for variable let cur_block: Node* = vector_top(p_block_stack); let var_size = align_up(size_for_type(var.typ), 8); // Add to the block // FIXME: Use a map here vector_push(cur_block.d.block.locals, var); assert(p_current_function != null); // Update current stack offset (w.r.t function stack frame) and block size p_cur_stack_offset = p_cur_stack_offset + var_size; cur_block.d.block.locals_size = cur_block.d.block.locals_size + var_size; var.offset = p_cur_stack_offset; // Update function's max locals size let max_offset = max(p_current_function.d.func.max_locals_size, p_cur_stack_offset); p_current_function.d.func.max_locals_size = max_offset; } fn parse_var_declaration(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_LET); lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // TODO: check if identifier is already defined let node = node_new(AST_VARDECL); let decl = &node.d.var_decl; decl.var.name = token.value.as_string; lexer_peek(lexer, &token); let has_type = false; // TODO: implicit types when type system is better lexer_next_assert(lexer, &token, TOKEN_COLON); decl.var.typ = parse_type(lexer); lexer_peek(lexer, &token); if (token.typ == TOKEN_ASSIGN) { lexer_next(lexer, &token); decl.init = parse_expression(lexer); } else if (!has_type) { die_loc(&token.loc, "Expected ':' or '=' after variable declaration"); } add_variable_to_current_block(&decl.var); return node; } fn parse_function_params(lexer: Lexer*, func: Node*) { let token: Token; lexer_peek(lexer, &token); func.d.func.args = vector_new(); // TODO: Actually parse params while (token.typ != TOKEN_CLOSE_PAREN) { lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); let name = token.value.as_string; if (identifier_exists(&token)) die_loc2(&token.loc, "Identifier already defined: ", name); lexer_next_assert(lexer, &token, TOKEN_COLON); let typ = parse_type(lexer); let var: Variable* = malloc(sizeof(Variable)); var.name = name; var.typ = typ; vector_push(func.d.func.args, var); lexer_peek(lexer, &token); if (token.typ == TOKEN_COMMA) { lexer_next(lexer, &token); lexer_peek(lexer, &token); } } // Set the offsets for the arguments // IMPORTANT: We want to skip the saved ret_addr+old_rbp that we // pushed on the stack. Each of these is 8 bytes. let offset = -16; let n = func.d.func.args.size; for (let i = 0; i < n; ++i) { let var: Variable* = func.d.func.args.data[i]; var.offset = offset; // TODO: (Here and other uses of `size_for_type`): // Should we only align to max(8, type->size) instead? let var_size = align_up(size_for_type(var.typ), 8); offset = offset - var_size; } } fn parse_block(lexer: Lexer*): Node*; fn parse_statement(lexer: Lexer*): Node*; fn parse_for_loop(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_FOR); let looop = node_new(AST_FOR); lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); // NOTE: We're going to put the for loop in it's own block // so that any declarations in the init of the loop // can only be referenced within the loop. let node = node_new(AST_BLOCK); node.d.block.children = vector_new_sized(1); node.d.block.locals = vector_new_sized(1); block_add_child(node, looop); block_stack_push(node); // All of the expressions in the for loop are optional lexer_peek(lexer, &token); if (token.typ == TOKEN_LET) looop.d.looop.init = parse_var_declaration(lexer); else if (token.typ != TOKEN_SEMICOLON) looop.d.looop.init = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); lexer_peek(lexer, &token); if (token.typ != TOKEN_SEMICOLON) looop.d.looop.cond = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); lexer_peek(lexer, &token); if (token.typ != TOKEN_CLOSE_PAREN) looop.d.looop.step = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); looop.d.looop.body = parse_statement(lexer); block_stack_pop(); return node; } fn parse_statement(lexer: Lexer*): Node* { let node: Node*; let token: Token; lexer_peek(lexer, &token); if (token.typ == TOKEN_OPEN_BRACE) { node = parse_block(lexer); } else if (token.typ == TOKEN_RETURN) { lexer_next(lexer, &token); node = node_new(AST_RETURN); lexer_peek(lexer, &token); if (token.typ != TOKEN_SEMICOLON) { node.d.unary = parse_expression(lexer); } else { node.d.unary = null; // empty return statment } lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); } else if (token.typ == TOKEN_IF) { lexer_next(lexer, &token); node = node_new(AST_IF); lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); node.d.conditional.cond = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); node.d.conditional.then = parse_statement(lexer); lexer_peek(lexer, &token); if (token.typ == TOKEN_ELSE) { lexer_next(lexer, &token); node.d.conditional.els = parse_statement(lexer); } } else if (token.typ == TOKEN_WHILE) { lexer_next(lexer, &token); node = node_new(AST_WHILE); lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); node.d.looop.cond = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); node.d.looop.body = parse_statement(lexer); } else if (token.typ == TOKEN_FOR) { node = parse_for_loop(lexer); } else if (token.typ == TOKEN_DEFER) { die("defer is not implemented yet"); } else if (token.typ == TOKEN_LET) { node = parse_var_declaration(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); } else { // Default to expression statement node = parse_expression(lexer); lexer_next_assert(lexer, &token, TOKEN_SEMICOLON); } return node; } fn parse_block(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_OPEN_BRACE); let block = node_new(AST_BLOCK); block.d.block.children = vector_new(); block.d.block.locals = vector_new(); block_stack_push(block); lexer_peek(lexer, &token); while (token.typ != TOKEN_CLOSE_BRACE) { block_add_child(block, parse_statement(lexer)); lexer_peek(lexer, &token); } lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACE); block_stack_pop(); return block; } fn parse_function(lexer: Lexer*): Node* { let token: Token; lexer_next_assert(lexer, &token, TOKEN_FN); lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER); // TODO: Check if identifier exists let node = node_new(AST_FUNC); node.d.func.name = token.value.as_string; vector_push(p_all_functions, node); p_current_function = node; lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN); parse_function_params(lexer, node); lexer_next_assert(lexer, &token, TOKEN_CLOSE_PAREN); lexer_peek(lexer, &token); if (token.typ == TOKEN_COLON) { lexer_next(lexer, &token); node.etyp = parse_type(lexer); } else { node.etyp = type_new(TYPE_VOID); } node.d.func.body = parse_block(lexer); p_current_function = null; return node; } fn parse_program(lexer: Lexer*): Node* { initialize_builtins(); let node = node_new(AST_PROGRAM); node.d.block.children = vector_new(); let token: Token; lexer_peek(lexer, &token); while (token.typ != TOKEN_EOF) { if (token.typ == TOKEN_FN) { block_add_child(node, parse_function(lexer)); } else if (token.typ == TOKEN_LET) { block_add_child(node, parse_var_declaration(lexer)); } else if (token.typ == TOKEN_SEMICOLON) { lexer_next(lexer, &token); } else { die_loc2(&token.loc, "unexpected token in parse_program", token_type_to_string(token.typ)); } lexer_peek(lexer, &token); } return node; }