1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
|
#include "parser.h"
#include "utils.h"
#include <stdlib.h>
#include <assert.h>
static Node *current_function = NULL;
Token do_assert_token(Token token, TokenType type, char *filename, int line)
{
if (token.type != type) {
Location_print(stderr, token.loc);
fprintf(stderr, ": Expected token of type `%s` but got `%s`\n", token_type_to_str(type), token_type_to_str(token.type));
fprintf(stderr, "Relevant location in compiler: %s:%d\n", filename, line);
exit(1);
}
return token;
}
#define assert_token(token, type) do_assert_token(token, type, __FILE__, __LINE__)
/******
* Some helpers
*/
NodeType binary_token_to_op(TokenType type)
{
switch (type)
{
case TOKEN_PLUS: return OP_PLUS;
case TOKEN_MINUS: return OP_MINUS;
case TOKEN_STAR: return OP_MUL;
case TOKEN_SLASH: return OP_DIV;
case TOKEN_PERCENT: return OP_MOD;
case TOKEN_LSHIFT: return OP_LSHIFT;
case TOKEN_RSHIFT: return OP_RSHIFT;
case TOKEN_AND: return OP_AND;
case TOKEN_OR: return OP_OR;
case TOKEN_XOR: return OP_XOR;
case TOKEN_EQ: return OP_EQ;
case TOKEN_NEQ: return OP_NEQ;
case TOKEN_LT: return OP_LT;
case TOKEN_LEQ: return OP_LEQ;
case TOKEN_GT: return OP_GT;
case TOKEN_GEQ: return OP_GEQ;
default: assert(false && "binary_token_to_op called with invalid token type");
}
}
void Node_add_child(Node *parent, Node *child)
{
// TODO, use a vector
parent->block.children = realloc(parent->block.children, sizeof(Node *) * (parent->block.num_children + 1));
parent->block.children[parent->block.num_children] = child;
parent->block.num_children++;
}
Node *Node_new(NodeType type)
{
Node *self = calloc(sizeof(Node), 1);
self->type = type;
return self;
}
Type parse_type(Lexer *lexer)
{
Type type = {0};
Token token = Lexer_peek(lexer);
if (token.type == TOKEN_INT) {
type.type = TYPE_INT;
Lexer_next(lexer);
} else {
type.type = TYPE_NONE;
}
while (Lexer_peek(lexer).type == TOKEN_AMPERSAND) {
Lexer_next(lexer);
type.indirection++;
}
return type;
}
Node *parse_literal(Lexer *lexer)
{
Node *node = Node_new(AST_LITERAL);
Token token = assert_token(Lexer_next(lexer), TOKEN_INTLIT);
node->literal.type = (Type) {.type = TYPE_INT};
node->literal.as_int = token.value.as_int;
return node;
}
Node *parse_expression(Lexer *);
Node *parse_var_declaration(Lexer *lexer)
{
Token token = assert_token(Lexer_next(lexer), TOKEN_LET);
Node *node = Node_new(AST_VARDECL);
node->var.name = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER).value.as_string;
assert_token(Lexer_next(lexer), TOKEN_COLON);
node->var.type = parse_type(lexer);
assert_token(Lexer_next(lexer), TOKEN_ASSIGN);
node->var.value = parse_expression(lexer);
assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
// Add variable to current function
if (!current_function || current_function->type != AST_FUNC)
die_location(token.loc, "Variable declaration outside of function");
int new_len = (current_function->func.num_locals + 1);
int var_size = 8; // TODO: Compute sizes based on different types
current_function->func.locals = realloc(current_function->func.locals, sizeof(Variable) * new_len);
current_function->func.locals[current_function->func.num_locals] = (Variable) {
.name = node->var.name,
.type = node->var.type,
.offset = current_function->func.cur_stack_offset,
};
current_function->func.num_locals++;
current_function->func.cur_stack_offset += var_size;
return node;
}
Node *parse_factor(Lexer *lexer)
{
// TODO: Parse more complicated things
Token token = Lexer_peek(lexer);
Node *expr;
if (token.type == TOKEN_MINUS) {
Lexer_next(lexer);
expr = Node_new(OP_NEG);
expr->unary_expr = parse_factor(lexer);
} else if (token.type == TOKEN_TILDE) {
Lexer_next(lexer);
expr = Node_new(OP_BWINV);
expr->unary_expr = parse_factor(lexer);
} else if (token.type == TOKEN_EXCLAMATION) {
Lexer_next(lexer);
expr = Node_new(OP_NOT);
expr->unary_expr = parse_factor(lexer);
} else if (token.type == TOKEN_OPEN_PAREN) {
Lexer_next(lexer);
expr = parse_expression(lexer);
assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
} else if (token.type == TOKEN_INTLIT) {
expr = parse_literal(lexer);
} else {
die_location(token.loc, ": Expected token found in parse_factor: `%s`", token_type_to_str(token.type));
exit(1);
}
return expr;
}
#define BINOP_PARSER(next_parser, predicate) \
Node *expr = next_parser(lexer); \
Token token = Lexer_peek(lexer); \
while (predicate(token.type)) { \
Lexer_next(lexer); \
Node *op = Node_new(binary_token_to_op(token.type)); \
Node *right = next_parser(lexer); \
op->binary.left = expr; \
op->binary.right = right; \
expr = op; \
token = Lexer_peek(lexer); \
} \
return expr;
bool is_term_token(TokenType type) { return type == TOKEN_STAR || type == TOKEN_SLASH || type == TOKEN_PERCENT; }
Node *parse_term(Lexer *lexer) { BINOP_PARSER(parse_factor, is_term_token); }
bool is_additive_token(TokenType type) { return type == TOKEN_PLUS || type == TOKEN_MINUS; }
Node *parse_additive(Lexer *lexer) { BINOP_PARSER(parse_term, is_additive_token); }
bool is_relational_token(TokenType type) { return type == TOKEN_LT || type == TOKEN_LEQ || type == TOKEN_GT || type == TOKEN_GEQ; }
Node *parse_relational(Lexer *lexer) { BINOP_PARSER(parse_additive, is_relational_token); }
bool is_equality_token(TokenType type) { return type == TOKEN_EQ || type == TOKEN_NEQ; }
Node *parse_equality(Lexer *lexer) { BINOP_PARSER(parse_relational, is_equality_token); }
bool is_logical_and_token(TokenType type) { return type == TOKEN_AND; }
Node *parse_logical_and(Lexer *lexer) { BINOP_PARSER(parse_equality, is_logical_and_token); }
bool is_logical_or_token(TokenType type) { return type == TOKEN_OR; }
Node *parse_expression(Lexer *lexer) { BINOP_PARSER(parse_logical_and, is_logical_or_token); }
Node *parse_statement(Lexer *lexer)
{
Node *node;
Token token = Lexer_peek(lexer);
if (token.type == TOKEN_RETURN) {
assert_token(Lexer_next(lexer), TOKEN_RETURN);
node = Node_new(AST_RETURN);
node->unary_expr = parse_expression(lexer);
assert_token(Lexer_next(lexer), TOKEN_SEMICOLON);
} else if (token.type == TOKEN_LET) {
return parse_var_declaration(lexer);
} else {
die_location(token.loc, ": Unexpected token in parse_statement: %s\n", token_type_to_str(token.type));
exit(1);
}
return node;
}
Node *parse_block(Lexer *lexer)
{
Node *block = Node_new(AST_BLOCK);
block->block.num_children = 0;
Token token;
while ((token = Lexer_peek(lexer)).type != TOKEN_CLOSE_BRACE) {
Node_add_child(block, parse_statement(lexer));
}
return block;
}
Node *parse_func(Lexer *lexer)
{
Token token;
token = assert_token(Lexer_next(lexer), TOKEN_FN);
Node *func = Node_new(AST_FUNC);
current_function = func;
token = assert_token(Lexer_next(lexer), TOKEN_IDENTIFIER);
func->func.name = token.value.as_string;
assert_token(Lexer_next(lexer), TOKEN_OPEN_PAREN);
// TODO: Parse parameters
assert_token(Lexer_next(lexer), TOKEN_CLOSE_PAREN);
token = Lexer_peek(lexer);
if (token.type == TOKEN_COLON) {
// TODO: Parse all return types
assert_token(Lexer_next(lexer), TOKEN_COLON);
func->func.return_type = parse_type(lexer);
} else {
// No return type, void fn.
func->func.return_type = (Type){.type = TYPE_NONE};
}
assert_token(Lexer_next(lexer), TOKEN_OPEN_BRACE);
func->func.body = parse_block(lexer);
assert_token(Lexer_next(lexer), TOKEN_CLOSE_BRACE);
return func;
}
Node *parse_program(Lexer *lexer)
{
Node *program = Node_new(AST_PROGRAM);
Token token;
while ((token = Lexer_peek(lexer)).type != TOKEN_EOF) {
if (token.type == TOKEN_FN) {
Node *func = parse_func(lexer);
Node_add_child(program, func);
} else {
die_location(token.loc, "Unexpected token in parse_program: `%s`\n", token_type_to_str(token.type));
exit(1);
break;
}
}
return program;
}
|