diff options
| author | Mustafa Quraish <[email protected]> | 2022-02-02 19:22:15 -0500 |
|---|---|---|
| committer | Mustafa Quraish <[email protected]> | 2022-02-02 19:22:15 -0500 |
| commit | e2dbc82213a6e5da74de2220eeeab78da41fb519 (patch) | |
| tree | 7f6d9a40fde00ec761e100a92bd93f555725d56c | |
| parent | Move type-related stuff to a separate file (diff) | |
| download | cup-e2dbc82213a6e5da74de2220eeeab78da41fb519.tar.xz cup-e2dbc82213a6e5da74de2220eeeab78da41fb519.zip | |
Add initial support for arrays (also no testing)
Usual disclaimer at this point: Quick&Dirty implementation, hasn't
been tested other than basic sanity checks. Arrays are automatically
decayed into pointers when the identifier is accessed.
| -rw-r--r-- | src/ast.c | 6 | ||||
| -rw-r--r-- | src/lexer.c | 2 | ||||
| -rw-r--r-- | src/parser.c | 69 | ||||
| -rw-r--r-- | src/tokens.h | 2 | ||||
| -rw-r--r-- | src/types.c | 20 | ||||
| -rw-r--r-- | src/types.h | 6 |
6 files changed, 95 insertions, 10 deletions
@@ -129,6 +129,12 @@ static void do_print_ast(Node *node, int depth) for (int i = 0; i < node->block.num_children; i++) { do_print_ast(node->block.children[i], depth); } + } else if (node->type == OP_DEREF) { + printf("DEREF\n"); + do_print_ast(node->unary_expr, depth + 1); + } else if (node->type == OP_ADDROF) { + printf("ADDROF\n"); + do_print_ast(node->unary_expr, depth + 1); } else if (node->type == AST_BLOCK) { printf("{\n"); for (int i = 0; i < node->block.num_children; i++) { diff --git a/src/lexer.c b/src/lexer.c index af5a14c..3e71343 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -109,6 +109,8 @@ Token Lexer_next(Lexer *lexer) case ')': return Lexer_make_token(lexer, TOKEN_CLOSE_PAREN, 1); case '{': return Lexer_make_token(lexer, TOKEN_OPEN_BRACE, 1); case '}': return Lexer_make_token(lexer, TOKEN_CLOSE_BRACE, 1); + case '[': return Lexer_make_token(lexer, TOKEN_OPEN_BRACKET, 1); + case ']': return Lexer_make_token(lexer, TOKEN_CLOSE_BRACKET, 1); case ';': return Lexer_make_token(lexer, TOKEN_SEMICOLON, 1); case ':': return Lexer_make_token(lexer, TOKEN_COLON, 1); case '~': return Lexer_make_token(lexer, TOKEN_TILDE, 1); diff --git a/src/parser.c b/src/parser.c index fe0802f..479767d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -190,11 +190,26 @@ Type *parse_type(Lexer *lexer) type = type_new(TYPE_NONE); } - while (Lexer_peek(lexer).type == TOKEN_STAR) { - Lexer_next(lexer); - Type *ptr = type_new(TYPE_PTR); - ptr->ptr = type; - type = ptr; + for (;;) { + // FIXME: It doesn't really make sense to allow stuff like "int[3]*" + token = Lexer_peek(lexer); + if (token.type == TOKEN_STAR) { + Lexer_next(lexer); + Type *ptr = type_new(TYPE_PTR); + ptr->ptr = type; + type = ptr; + } else if (token.type == TOKEN_OPEN_BRACKET) { + Lexer_next(lexer); + Type *arr = type_new(TYPE_ARRAY); + arr->ptr = type; + // TODO: Contant integer expression support? + arr->array_size = assert_token(Lexer_next(lexer), TOKEN_INTLIT).value.as_int; + assert_token(Lexer_peek(lexer), TOKEN_CLOSE_BRACKET); + Lexer_next(lexer); + type = arr; + } else { + break; + } } return type; @@ -245,8 +260,11 @@ Node *parse_var_declaration(Lexer *lexer) die_location(token.loc, "Cannot initialize global variable `%s` outside function", node->var_decl.var.name); node->var_decl.value = parse_expression(lexer); - if (!type_equals(node->var_decl.var.type, node->var_decl.value->expr_type)) + if (!type_equals(node->var_decl.var.type, node->var_decl.value->expr_type)) { + fprintf(stderr, "- Variable type: %s\n", type_to_str(node->var_decl.var.type)); + fprintf(stderr, "- Value type: %s\n", type_to_str(node->var_decl.value->expr_type)); die_location(token.loc, "Type mismatch for variable declaration `%s` initalizer", node->var_decl.var.name); + } assert_token(Lexer_next(lexer), TOKEN_SEMICOLON); } else { @@ -282,6 +300,8 @@ Node *parse_function_call_args(Lexer *lexer, Node *func) die_location(identifier.loc, "Function `%s` expects %d arguments, got %d", func->func.name, func->func.num_args, call->call.num_args); for (int i = 0; i < call->call.num_args; i++) { if (!type_equals(func->func.args[i].type, call->call.args[i]->expr_type)) { + fprintf(stderr, "- Function argument %d: %s\n", i, type_to_str(func->func.args[i].type)); + fprintf(stderr, "- Provided argument %d: %s\n", i, type_to_str(call->call.args[i]->expr_type)); die_location(identifier.loc, "Type mismatch for argument %d in function call `%s`", i, func->func.name); } } @@ -303,6 +323,7 @@ Node *parse_identifier(Lexer *lexer) expr = Node_new(AST_LOCAL_VAR); expr->variable = var; expr->expr_type = var->type; + expr = decay_array_to_pointer(expr, &token); return expr; } @@ -312,6 +333,7 @@ Node *parse_identifier(Lexer *lexer) expr = Node_new(AST_GLOBAL_VAR); expr->variable = gvar; expr->expr_type = gvar->type; + expr = decay_array_to_pointer(expr, &token); return expr; } @@ -375,8 +397,35 @@ Node *parse_factor(Lexer *lexer) expr->unary_expr = parse_factor(lexer); expr = handle_unary_expr_types(expr, &token); } else { - die_location(token.loc, ": Expected token found in parse_factor: `%s`", token_type_to_str(token.type)); + die_location(token.loc, ": Unexpected token found in parse_factor: `%s`", token_type_to_str(token.type)); + } + + // TODO: This is a bit hacky, ideally we do this in a way that better follows the + // grammar rules. + for (;;) { + token = Lexer_peek(lexer); + // Convert indexing into pointer arithmetic + dereferencing + if (token.type == TOKEN_OPEN_BRACKET) { + // if (expr->expr_type->type != TYPE_PTR) + // die_location(token.loc, "Cannot index non-pointer type"); + Lexer_next(lexer); + + Node *index = parse_expression(lexer); + assert_token(Lexer_next(lexer), TOKEN_CLOSE_BRACKET); + + Node *offset = Node_new(OP_PLUS); + offset->binary.left = expr; + offset->binary.right = index; + offset = handle_binary_expr_types(offset, &token); + + expr = Node_new(OP_DEREF); + expr->unary_expr = offset; + expr = handle_unary_expr_types(expr, &token); + } else { + break; + } } + return expr; } @@ -450,8 +499,12 @@ Node *parse_expression(Lexer *lexer) assign->assign.var = node; assign->assign.value = parse_expression(lexer); - if (!type_equals(node->expr_type, assign->assign.value->expr_type)) + + if (!type_equals(node->expr_type, assign->assign.value->expr_type)) { + fprintf(stderr, "- Variable type: %s\n", type_to_str(assign->assign.var->expr_type)); + fprintf(stderr, "- Value type: %s\n", type_to_str(assign->assign.value->expr_type)); die_location(token.loc, "Type mismatch in assignment expression"); + } node = assign; node->expr_type = node->assign.var->expr_type; diff --git a/src/tokens.h b/src/tokens.h index 42f4647..f076b89 100644 --- a/src/tokens.h +++ b/src/tokens.h @@ -9,6 +9,7 @@ F(TOKEN_ASSIGN, "=") \ F(TOKEN_BAR, "|") \ F(TOKEN_CLOSE_BRACE, "}") \ + F(TOKEN_CLOSE_BRACKET, "]") \ F(TOKEN_CLOSE_PAREN, ")") \ F(TOKEN_COLON, ":") \ F(TOKEN_COMMA, ",") \ @@ -27,6 +28,7 @@ F(TOKEN_MINUSMINUS, "--") \ F(TOKEN_NEQ, "!=") \ F(TOKEN_OPEN_BRACE, "{") \ + F(TOKEN_OPEN_BRACKET, "[") \ F(TOKEN_OPEN_PAREN, "(") \ F(TOKEN_OR, "||") \ F(TOKEN_PERCENT, "%") \ diff --git a/src/types.c b/src/types.c index bf855de..8876b22 100644 --- a/src/types.c +++ b/src/types.c @@ -21,6 +21,7 @@ i64 size_for_type(Type *type) { case TYPE_INT: return 8; case TYPE_PTR: return 8; + case TYPE_ARRAY: return type->array_size * size_for_type(type->ptr); default: assert(false && "Unreachable type"); } } @@ -44,6 +45,7 @@ static char *data_type_to_str(DataType type) case TYPE_NONE: return "void"; case TYPE_INT: return "int"; case TYPE_PTR: return "*"; + case TYPE_ARRAY: return "array"; default: assert(false && "Unreachable"); } } @@ -76,12 +78,16 @@ Node *handle_unary_expr_types(Node *node, Token *token) node->expr_type = type_new(TYPE_INT); } else if (node->type == OP_ADDROF) { Type *ptr = type_new(TYPE_PTR); - ptr->ptr = old_type; + // The address of an array is a pointer to the first element + ptr->ptr = old_type->type == TYPE_ARRAY ? old_type->ptr : old_type; node->expr_type = ptr; } else if (node->type == OP_DEREF) { if (old_type->type != TYPE_PTR) die_location(token->loc, "Cannot dereference non-pointer type"); node->expr_type = old_type->ptr; + // If the dereferenced type is an array, we need to decay it to a + // pointer to the first element. + node = decay_array_to_pointer(node, token); } else if (node->type == OP_NEG) { if (old_type->type != TYPE_INT) die_location(token->loc, "Cannot negate non-integer type"); @@ -184,3 +190,15 @@ Node *handle_binary_expr_types(Node *node, Token *token) } return node; } + +Node *decay_array_to_pointer(Node *node, Token *token) +{ + // We can only take the address of an lvalue, so we need to ensure that + if (is_lvalue(node->type) && node->expr_type->type == TYPE_ARRAY) { + Node *address = Node_new(OP_ADDROF); + address->unary_expr = node; + address = handle_unary_expr_types(address, token); + node = address; + } + return node; +}
\ No newline at end of file diff --git a/src/types.h b/src/types.h index 5f6f45a..8bbf4e3 100644 --- a/src/types.h +++ b/src/types.h @@ -7,11 +7,13 @@ typedef enum { TYPE_NONE, TYPE_INT, TYPE_PTR, + TYPE_ARRAY, } DataType; typedef struct data_type_node { DataType type; struct data_type_node *ptr; + i64 array_size; } Type; Type *type_new(DataType type); @@ -23,4 +25,6 @@ char *type_to_str(Type *type); typedef struct ast_node Node; Node *handle_unary_expr_types(Node *, Token *); -Node *handle_binary_expr_types(Node *, Token *);
\ No newline at end of file +Node *handle_binary_expr_types(Node *, Token *); + +Node *decay_array_to_pointer(Node *, Token *);
\ No newline at end of file |