aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ast.c6
-rw-r--r--src/lexer.c2
-rw-r--r--src/parser.c69
-rw-r--r--src/tokens.h2
-rw-r--r--src/types.c20
-rw-r--r--src/types.h6
6 files changed, 95 insertions, 10 deletions
diff --git a/src/ast.c b/src/ast.c
index 3eaaf61..f75e531 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -129,6 +129,12 @@ static void do_print_ast(Node *node, int depth)
for (int i = 0; i < node->block.num_children; i++) {
do_print_ast(node->block.children[i], depth);
}
+ } else if (node->type == OP_DEREF) {
+ printf("DEREF\n");
+ do_print_ast(node->unary_expr, depth + 1);
+ } else if (node->type == OP_ADDROF) {
+ printf("ADDROF\n");
+ do_print_ast(node->unary_expr, depth + 1);
} else if (node->type == AST_BLOCK) {
printf("{\n");
for (int i = 0; i < node->block.num_children; i++) {
diff --git a/src/lexer.c b/src/lexer.c
index af5a14c..3e71343 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -109,6 +109,8 @@ Token Lexer_next(Lexer *lexer)
case ')': return Lexer_make_token(lexer, TOKEN_CLOSE_PAREN, 1);
case '{': return Lexer_make_token(lexer, TOKEN_OPEN_BRACE, 1);
case '}': return Lexer_make_token(lexer, TOKEN_CLOSE_BRACE, 1);
+ case '[': return Lexer_make_token(lexer, TOKEN_OPEN_BRACKET, 1);
+ case ']': return Lexer_make_token(lexer, TOKEN_CLOSE_BRACKET, 1);
case ';': return Lexer_make_token(lexer, TOKEN_SEMICOLON, 1);
case ':': return Lexer_make_token(lexer, TOKEN_COLON, 1);
case '~': return Lexer_make_token(lexer, TOKEN_TILDE, 1);
diff --git a/src/parser.c b/src/parser.c
index fe0802f..479767d 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -190,11 +190,26 @@ Type *parse_type(Lexer *lexer)
type = type_new(TYPE_NONE);
}
- while (Lexer_peek(lexer).type == TOKEN_STAR) {
- Lexer_next(lexer);
- Type *ptr = type_new(TYPE_PTR);
- ptr->ptr = type;
- type = ptr;
+ for (;;) {
+ // FIXME: It doesn't really make sense to allow stuff like "int[3]*"
+ token = Lexer_peek(lexer);
+ if (token.type == TOKEN_STAR) {
+ Lexer_next(lexer);
+ Type *ptr = type_new(TYPE_PTR);
+ ptr->ptr = type;
+ type = ptr;
+ } else if (token.type == TOKEN_OPEN_BRACKET) {
+ Lexer_next(lexer);
+ Type *arr = type_new(TYPE_ARRAY);
+ arr->ptr = type;
+ // TODO: Contant integer expression support?
+ arr->array_size = assert_token(Lexer_next(lexer), TOKEN_INTLIT).value.as_int;
+ assert_token(Lexer_peek(lexer), TOKEN_CLOSE_BRACKET);
+ Lexer_next(lexer);
+ type = arr;
+ } else {
+ break;
+ }
}
return type;
@@ -245,8 +260,11 @@ Node *parse_var_declaration(Lexer *lexer)
die_location(token.loc, "Cannot initialize global variable `%s` outside function", node->var_decl.var.name);
node->var_decl.value = parse_expression(lexer);
- if (!type_equals(node->var_decl.var.type, node->var_decl.value->expr_type))
+ if (!type_equals(node->var_decl.var.type, node->var_decl.value->expr_type)) {
+ fprintf(stderr, "- Variable type: %s\n", type_to_str(node->var_decl.var.type));
+ fprintf(stderr, "- Value type: %s\n", type_to_str(node->var_decl.value->expr_type));
die_location(token.loc, "Type mismatch for variable declaration `%s` initalizer", node->var_decl.var.name);
+ }
assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
} else {
@@ -282,6 +300,8 @@ Node *parse_function_call_args(Lexer *lexer, Node *func)
die_location(identifier.loc, "Function `%s` expects %d arguments, got %d", func->func.name, func->func.num_args, call->call.num_args);
for (int i = 0; i < call->call.num_args; i++) {
if (!type_equals(func->func.args[i].type, call->call.args[i]->expr_type)) {
+ fprintf(stderr, "- Function argument %d: %s\n", i, type_to_str(func->func.args[i].type));
+ fprintf(stderr, "- Provided argument %d: %s\n", i, type_to_str(call->call.args[i]->expr_type));
die_location(identifier.loc, "Type mismatch for argument %d in function call `%s`", i, func->func.name);
}
}
@@ -303,6 +323,7 @@ Node *parse_identifier(Lexer *lexer)
expr = Node_new(AST_LOCAL_VAR);
expr->variable = var;
expr->expr_type = var->type;
+ expr = decay_array_to_pointer(expr, &token);
return expr;
}
@@ -312,6 +333,7 @@ Node *parse_identifier(Lexer *lexer)
expr = Node_new(AST_GLOBAL_VAR);
expr->variable = gvar;
expr->expr_type = gvar->type;
+ expr = decay_array_to_pointer(expr, &token);
return expr;
}
@@ -375,8 +397,35 @@ Node *parse_factor(Lexer *lexer)
expr->unary_expr = parse_factor(lexer);
expr = handle_unary_expr_types(expr, &token);
} else {
- die_location(token.loc, ": Expected token found in parse_factor: `%s`", token_type_to_str(token.type));
+ die_location(token.loc, ": Unexpected token found in parse_factor: `%s`", token_type_to_str(token.type));
+ }
+
+ // TODO: This is a bit hacky, ideally we do this in a way that better follows the
+ // grammar rules.
+ for (;;) {
+ token = Lexer_peek(lexer);
+ // Convert indexing into pointer arithmetic + dereferencing
+ if (token.type == TOKEN_OPEN_BRACKET) {
+ // if (expr->expr_type->type != TYPE_PTR)
+ // die_location(token.loc, "Cannot index non-pointer type");
+ Lexer_next(lexer);
+
+ Node *index = parse_expression(lexer);
+ assert_token(Lexer_next(lexer), TOKEN_CLOSE_BRACKET);
+
+ Node *offset = Node_new(OP_PLUS);
+ offset->binary.left = expr;
+ offset->binary.right = index;
+ offset = handle_binary_expr_types(offset, &token);
+
+ expr = Node_new(OP_DEREF);
+ expr->unary_expr = offset;
+ expr = handle_unary_expr_types(expr, &token);
+ } else {
+ break;
+ }
}
+
return expr;
}
@@ -450,8 +499,12 @@ Node *parse_expression(Lexer *lexer)
assign->assign.var = node;
assign->assign.value = parse_expression(lexer);
- if (!type_equals(node->expr_type, assign->assign.value->expr_type))
+
+ if (!type_equals(node->expr_type, assign->assign.value->expr_type)) {
+ fprintf(stderr, "- Variable type: %s\n", type_to_str(assign->assign.var->expr_type));
+ fprintf(stderr, "- Value type: %s\n", type_to_str(assign->assign.value->expr_type));
die_location(token.loc, "Type mismatch in assignment expression");
+ }
node = assign;
node->expr_type = node->assign.var->expr_type;
diff --git a/src/tokens.h b/src/tokens.h
index 42f4647..f076b89 100644
--- a/src/tokens.h
+++ b/src/tokens.h
@@ -9,6 +9,7 @@
F(TOKEN_ASSIGN, "=") \
F(TOKEN_BAR, "|") \
F(TOKEN_CLOSE_BRACE, "}") \
+ F(TOKEN_CLOSE_BRACKET, "]") \
F(TOKEN_CLOSE_PAREN, ")") \
F(TOKEN_COLON, ":") \
F(TOKEN_COMMA, ",") \
@@ -27,6 +28,7 @@
F(TOKEN_MINUSMINUS, "--") \
F(TOKEN_NEQ, "!=") \
F(TOKEN_OPEN_BRACE, "{") \
+ F(TOKEN_OPEN_BRACKET, "[") \
F(TOKEN_OPEN_PAREN, "(") \
F(TOKEN_OR, "||") \
F(TOKEN_PERCENT, "%") \
diff --git a/src/types.c b/src/types.c
index bf855de..8876b22 100644
--- a/src/types.c
+++ b/src/types.c
@@ -21,6 +21,7 @@ i64 size_for_type(Type *type)
{
case TYPE_INT: return 8;
case TYPE_PTR: return 8;
+ case TYPE_ARRAY: return type->array_size * size_for_type(type->ptr);
default: assert(false && "Unreachable type");
}
}
@@ -44,6 +45,7 @@ static char *data_type_to_str(DataType type)
case TYPE_NONE: return "void";
case TYPE_INT: return "int";
case TYPE_PTR: return "*";
+ case TYPE_ARRAY: return "array";
default: assert(false && "Unreachable");
}
}
@@ -76,12 +78,16 @@ Node *handle_unary_expr_types(Node *node, Token *token)
node->expr_type = type_new(TYPE_INT);
} else if (node->type == OP_ADDROF) {
Type *ptr = type_new(TYPE_PTR);
- ptr->ptr = old_type;
+ // The address of an array is a pointer to the first element
+ ptr->ptr = old_type->type == TYPE_ARRAY ? old_type->ptr : old_type;
node->expr_type = ptr;
} else if (node->type == OP_DEREF) {
if (old_type->type != TYPE_PTR)
die_location(token->loc, "Cannot dereference non-pointer type");
node->expr_type = old_type->ptr;
+ // If the dereferenced type is an array, we need to decay it to a
+ // pointer to the first element.
+ node = decay_array_to_pointer(node, token);
} else if (node->type == OP_NEG) {
if (old_type->type != TYPE_INT)
die_location(token->loc, "Cannot negate non-integer type");
@@ -184,3 +190,15 @@ Node *handle_binary_expr_types(Node *node, Token *token)
}
return node;
}
+
+Node *decay_array_to_pointer(Node *node, Token *token)
+{
+ // We can only take the address of an lvalue, so we need to ensure that
+ if (is_lvalue(node->type) && node->expr_type->type == TYPE_ARRAY) {
+ Node *address = Node_new(OP_ADDROF);
+ address->unary_expr = node;
+ address = handle_unary_expr_types(address, token);
+ node = address;
+ }
+ return node;
+} \ No newline at end of file
diff --git a/src/types.h b/src/types.h
index 5f6f45a..8bbf4e3 100644
--- a/src/types.h
+++ b/src/types.h
@@ -7,11 +7,13 @@ typedef enum {
TYPE_NONE,
TYPE_INT,
TYPE_PTR,
+ TYPE_ARRAY,
} DataType;
typedef struct data_type_node {
DataType type;
struct data_type_node *ptr;
+ i64 array_size;
} Type;
Type *type_new(DataType type);
@@ -23,4 +25,6 @@ char *type_to_str(Type *type);
typedef struct ast_node Node;
Node *handle_unary_expr_types(Node *, Token *);
-Node *handle_binary_expr_types(Node *, Token *); \ No newline at end of file
+Node *handle_binary_expr_types(Node *, Token *);
+
+Node *decay_array_to_pointer(Node *, Token *); \ No newline at end of file