diff options
| -rw-r--r-- | src/ast.c | 74 | ||||
| -rw-r--r-- | src/ast.h | 23 | ||||
| -rw-r--r-- | src/generator.c | 4 | ||||
| -rw-r--r-- | src/parser.c | 141 |
4 files changed, 143 insertions, 99 deletions
@@ -1,6 +1,48 @@ #include "ast.h" #include <assert.h> #include <stdio.h> +#include <stdlib.h> + +void Node_add_child(Node *parent, Node *child) +{ + // TODO, use a vector + parent->block.children = realloc(parent->block.children, sizeof(Node *) * (parent->block.num_children + 1)); + parent->block.children[parent->block.num_children] = child; + parent->block.num_children++; +} + +Node *Node_new(NodeType type) +{ + Node *self = calloc(sizeof(Node), 1); + self->type = type; + return self; +} + +NodeType binary_token_to_op(TokenType type) +{ + switch (type) + { + case TOKEN_PLUS: return OP_PLUS; + case TOKEN_MINUS: return OP_MINUS; + case TOKEN_STAR: return OP_MUL; + case TOKEN_SLASH: return OP_DIV; + case TOKEN_PERCENT: return OP_MOD; + case TOKEN_LSHIFT: return OP_LSHIFT; + case TOKEN_RSHIFT: return OP_RSHIFT; + case TOKEN_AND: return OP_AND; + case TOKEN_OR: return OP_OR; + case TOKEN_XOR: return OP_XOR; + case TOKEN_EQ: return OP_EQ; + case TOKEN_NEQ: return OP_NEQ; + case TOKEN_LT: return OP_LT; + case TOKEN_LEQ: return OP_LEQ; + case TOKEN_GT: return OP_GT; + case TOKEN_GEQ: return OP_GEQ; + + default: assert(false && "binary_token_to_op called with invalid token type"); + } +} + char *data_type_to_str(DataType type) { @@ -8,16 +50,23 @@ char *data_type_to_str(DataType type) { case TYPE_NONE: return "void"; case TYPE_INT: return "int"; + case TYPE_PTR: return "*"; default: assert(false && "Unreachable"); } } -void print_type_to_file(FILE *out, Type type) +Type *type_new(DataType type) { - fprintf(out, "%s", data_type_to_str(type.type)); - for (int i = 0; i < type.indirection; i++) { - fprintf(out, "*"); - } + Type *t = calloc(sizeof(Type), 1); + t->type = type; + return t; +} + +void print_type_to_file(FILE *out, Type *type) +{ + if (type->type == TYPE_PTR) + print_type_to_file(out, type->ptr); + fprintf(out, "%s", data_type_to_str(type->type)); } char *node_type_to_str(NodeType type) @@ -50,7 +99,7 @@ bool is_binary_op(NodeType type) case OP_LT: case OP_LEQ: case OP_GT: - case OP_GEQ: + case OP_GEQ: return true; default: return false; } @@ -75,6 +124,17 @@ bool is_expression(NodeType type) return type == AST_LITERAL; } +bool is_lvalue(NodeType type) +{ + switch (type) + { + case AST_LOCAL_VAR: + case AST_GLOBAL_VAR: + return true; + default: return false; + } +} + void dump_func(Node *, int); static void do_print_ast(Node *node, int depth) @@ -146,7 +206,7 @@ void dump_func(Node *node, int depth) printf("[[%lld]]", node->func.args[i].offset); } printf(")"); - if (node->func.return_type.type != TYPE_NONE) { + if (node->func.return_type->type != TYPE_NONE) { // FIXME: Print return type properly printf(" -> "); print_type_to_file(stdout, node->func.return_type); @@ -1,6 +1,7 @@ #pragma once #include "common.h" +#include "tokens.h" #define ENUM_AST_TYPES(F) \ F(OP_NEG, "neg") \ @@ -8,6 +9,7 @@ F(OP_BWINV, "~") \ F(OP_PLUS, "+") \ F(OP_MINUS, "-") \ + F(OP_ADDROF, "&") \ F(OP_MUL, "*") \ F(OP_DIV, "/") \ F(OP_MOD, "%") \ @@ -45,28 +47,34 @@ typedef enum { #undef DEFINE_ENUM } NodeType; +NodeType binary_token_to_op(TokenType type); + char *node_type_to_str(NodeType type); bool is_binary_op(NodeType type); bool is_unary_op(NodeType type); bool is_expression(NodeType type); +bool is_lvalue(NodeType type); + typedef enum { TYPE_NONE, TYPE_INT, + TYPE_PTR, } DataType; char *data_type_to_str(DataType type); -typedef struct { +typedef struct data_type_node { DataType type; - // 0 = value, 1 = pointer, 2 = double pointer, ... - int indirection; + struct data_type_node *ptr; } Type; +Type *type_new(DataType type); + typedef struct { char *name; - Type type; + Type *type; i64 offset; } Variable; @@ -87,7 +95,7 @@ typedef struct ast_node { // Function definition struct { char *name; - Type return_type; + Type *return_type; Node *body; // TODO: Should we just dynamically allocate space on the @@ -110,7 +118,7 @@ typedef struct ast_node { } block; struct { - Type type; + Type *type; union { int as_int; }; @@ -150,4 +158,7 @@ typedef struct ast_node { }; } Node; +void Node_add_child(Node *parent, Node *child); +Node *Node_new(NodeType type); + void print_ast(Node *node);
\ No newline at end of file diff --git a/src/generator.c b/src/generator.c index 7de9d32..d3e63de 100644 --- a/src/generator.c +++ b/src/generator.c @@ -49,7 +49,7 @@ void generate_expr_into_rax(Node *expr, FILE *out) // TODO: Different sized output for different types? if (expr->type == AST_LITERAL) { // TODO: More literal types - assert(expr->literal.type.type == TYPE_INT); + assert(expr->literal.type->type == TYPE_INT); fprintf(out, " mov rax, %d\n", expr->literal.as_int); } else if (expr->type == AST_FUNCCALL) { @@ -340,7 +340,7 @@ void generate_block(Node *block, FILE *out) assert(block->type == AST_BLOCK); for (int i = 0; i < block->block.num_children; i++) generate_statement(block->block.children[i], out); - + assert(defer_stack_count - cur_defer_pos >= 0); while (defer_stack_count > cur_defer_pos) { Node *deferred = defer_stack[--defer_stack_count]; diff --git a/src/parser.c b/src/parser.c index 0cd0e5f..67cbfbc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -42,46 +42,6 @@ Token do_assert_token(Token token, TokenType type, char *filename, int line) * Some helpers */ -void Node_add_child(Node *parent, Node *child) -{ - // TODO, use a vector - parent->block.children = realloc(parent->block.children, sizeof(Node *) * (parent->block.num_children + 1)); - parent->block.children[parent->block.num_children] = child; - parent->block.num_children++; -} - -Node *Node_new(NodeType type) -{ - Node *self = calloc(sizeof(Node), 1); - self->type = type; - return self; -} - -NodeType binary_token_to_op(TokenType type) -{ - switch (type) - { - case TOKEN_PLUS: return OP_PLUS; - case TOKEN_MINUS: return OP_MINUS; - case TOKEN_STAR: return OP_MUL; - case TOKEN_SLASH: return OP_DIV; - case TOKEN_PERCENT: return OP_MOD; - case TOKEN_LSHIFT: return OP_LSHIFT; - case TOKEN_RSHIFT: return OP_RSHIFT; - case TOKEN_AND: return OP_AND; - case TOKEN_OR: return OP_OR; - case TOKEN_XOR: return OP_XOR; - case TOKEN_EQ: return OP_EQ; - case TOKEN_NEQ: return OP_NEQ; - case TOKEN_LT: return OP_LT; - case TOKEN_LEQ: return OP_LEQ; - case TOKEN_GT: return OP_GT; - case TOKEN_GEQ: return OP_GEQ; - - default: assert(false && "binary_token_to_op called with invalid token type"); - } -} - void block_stack_push(Node *block) { assert(block_stack_count < BLOCK_STACK_SIZE); @@ -116,17 +76,17 @@ void initialize_builtins() { builtin_print = Node_new(AST_BUILTIN); builtin_print->func.name = "print"; - builtin_print->func.return_type = (Type){TYPE_INT,0}; + builtin_print->func.return_type = type_new(TYPE_INT); builtin_print->func.num_args = 1; builtin_print->func.args = (Variable *)calloc(sizeof(Variable), 1); - builtin_print->func.args[0] = (Variable){"val", (Type){TYPE_INT,0}, 0}; + builtin_print->func.args[0] = (Variable){"val", type_new(TYPE_INT), 0}; builtin_putc = Node_new(AST_BUILTIN); builtin_putc->func.name = "putc"; - builtin_putc->func.return_type = (Type){TYPE_INT,0}; + builtin_putc->func.return_type = type_new(TYPE_INT); builtin_putc->func.num_args = 1; builtin_putc->func.args = (Variable *)calloc(sizeof(Variable), 2); - builtin_putc->func.args[0] = (Variable){"arg", (Type){TYPE_INT,0}, 0}; + builtin_putc->func.args[0] = (Variable){"arg", type_new(TYPE_INT), 0}; } Node *find_builtin_function(Token *token) @@ -219,20 +179,22 @@ void add_variable_to_current_block(Variable *var) assert(block_stack_count > 0); } -Type parse_type(Lexer *lexer) +Type *parse_type(Lexer *lexer) { - Type type = {0}; + Type *type; Token token = Lexer_peek(lexer); if (token.type == TOKEN_INT) { - type.type = TYPE_INT; + type = type_new(TYPE_INT); Lexer_next(lexer); } else { - type.type = TYPE_NONE; + type = type_new(TYPE_NONE); } while (Lexer_peek(lexer).type == TOKEN_AMPERSAND) { Lexer_next(lexer); - type.indirection++; + Type *ptr = type_new(TYPE_PTR); + ptr->ptr = type; + type = ptr; } return type; @@ -242,7 +204,7 @@ Node *parse_literal(Lexer *lexer) { Node *node = Node_new(AST_LITERAL); Token token = assert_token(Lexer_next(lexer), TOKEN_INTLIT); - node->literal.type = (Type) {.type = TYPE_INT}; + node->literal.type = type_new(TYPE_INT); node->literal.as_int = token.value.as_int; return node; } @@ -321,6 +283,42 @@ Node *parse_function_call_args(Lexer *lexer, Node *func) return call; } +Node *parse_identifier(Lexer *lexer) +{ + Token token = assert_token(Lexer_peek(lexer), TOKEN_IDENTIFIER); + + // TODO: Check for global variables when added + Node *expr; + Variable *var = find_local_variable(&token); + if (var != NULL) { + Lexer_next(lexer); + expr = Node_new(AST_LOCAL_VAR); + expr->variable = var; + return expr; + } + + Variable *gvar = find_global_variable(&token); + if (gvar != NULL) { + Lexer_next(lexer); + expr = Node_new(AST_GLOBAL_VAR); + expr->variable = gvar; + return expr; + } + + Node *func = find_function_definition(&token); + if (func != NULL) { + return parse_function_call_args(lexer, func); + } + + Node *builtin = find_builtin_function(&token); + if (builtin != NULL) { + return parse_function_call_args(lexer, builtin); + } + + die_location(token.loc, "Unknown identifier `%s`", token.value.as_string); + return NULL; +} + Node *parse_factor(Lexer *lexer) { // TODO: Parse more complicated things @@ -345,36 +343,11 @@ Node *parse_factor(Lexer *lexer) } else if (token.type == TOKEN_INTLIT) { expr = parse_literal(lexer); } else if (token.type == TOKEN_IDENTIFIER) { - // TODO: Check for global variables when added - - Variable *var = find_local_variable(&token); - if (var != NULL) { - Lexer_next(lexer); - expr = Node_new(AST_LOCAL_VAR); - expr->variable = var; - return expr; - } - - Variable *gvar = find_global_variable(&token); - if (gvar != NULL) { - Lexer_next(lexer); - expr = Node_new(AST_GLOBAL_VAR); - expr->variable = gvar; - return expr; - } - - Node *func = find_function_definition(&token); - if (func != NULL) { - return parse_function_call_args(lexer, func); - } - - Node *builtin = find_builtin_function(&token); - if (builtin != NULL) { - return parse_function_call_args(lexer, builtin); - } - - die_location(token.loc, "Unknown identifier `%s`", token.value.as_string); - expr = NULL; + expr = parse_identifier(lexer); + } else if (token.type == TOKEN_AMPERSAND) { + Lexer_next(lexer); + expr = Node_new(OP_ADDROF); + expr->unary_expr = parse_factor(lexer); } else { die_location(token.loc, ": Expected token found in parse_factor: `%s`", token_type_to_str(token.type)); exit(1); @@ -563,7 +536,7 @@ void parse_func_args(Lexer *lexer, Node *func) token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER); // TODO: Check for shadowing with globals assert_token(Lexer_next(lexer), TOKEN_COLON); - Type type = parse_type(lexer); + Type *type = parse_type(lexer); i64 new_count = func->func.num_args + 1; func->func.args = realloc(func->func.args, sizeof(Variable) * new_count); @@ -612,7 +585,7 @@ Node *parse_func(Lexer *lexer) func->func.return_type = parse_type(lexer); } else { // No return type, void fn. - func->func.return_type = (Type){.type = TYPE_NONE}; + func->func.return_type = type_new(TYPE_NONE); } // Make sure there's no funny business with the stack offset @@ -647,9 +620,9 @@ Node *parse_program(Lexer *lexer) { initialize_builtins(); Node *program = Node_new(AST_PROGRAM); - + push_new_lexer(lexer); - + Token token = Lexer_peek(lexer); while (token.type != TOKEN_EOF) { if (token.type == TOKEN_FN) { |