aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/ast.cup126
-rw-r--r--compiler/codegen.cup5
-rw-r--r--compiler/parser.cup103
-rw-r--r--compiler/types.cup44
4 files changed, 252 insertions, 26 deletions
diff --git a/compiler/ast.cup b/compiler/ast.cup
index 2ba677b..b452241 100644
--- a/compiler/ast.cup
+++ b/compiler/ast.cup
@@ -336,4 +336,130 @@ fn dump_ast(node: Node*, depth: int) {
} else {
putsln(node_type_to_string(node.typ));
}
+}
+
+// Defined below.
+fn type_check_unary(node: Node*, token: Token*): Node*;
+
+fn decay_array_to_pointer(node: Node*, token: Token*): Node* {
+ // We can only take the address of an lvalue, so we need to ensure that
+ if (is_lvalue(node.typ) && node.etyp.typ == TYPE_ARRAY) {
+ let address = node_new(AST_ADDROF);
+ address.d.unary = node;
+ address = type_check_unary(address, token);
+ node = address;
+ }
+ return node;
+}
+
+fn type_check_unary(node: Node*, token: Token*): Node* {
+ let old_type = node.d.unary.etyp;
+
+ if (node.typ != AST_ADDROF && old_type.typ == TYPE_STRUCT)
+ die_loc(here, &token.loc, "Performing invalid unary operation on struct type");
+
+ if (node.typ == AST_NOT) {
+ node.etyp = type_new(TYPE_INT);
+ } else if (node.typ == AST_ADDROF) {
+ let ptr = type_new(TYPE_PTR);
+ // The address of an array is a pointer to the first element
+ ptr.ptr = old_type.typ == TYPE_ARRAY ? old_type.ptr : old_type;
+ node.etyp = ptr;
+ } else if (node.typ == AST_DEREF) {
+ if (old_type.typ != TYPE_PTR)
+ die_loc(here, &token.loc, "Cannot dereference non-pointer type");
+ node.etyp = old_type.ptr;
+ // If the dereferenced type is an array, we need to decay it to a
+ // pointer to the first element.
+ node = decay_array_to_pointer(node, token);
+ } else if (node.typ == AST_NEG) {
+ if (old_type.typ != TYPE_INT)
+ die_loc(here, &token.loc, "Cannot negate non-integer type");
+ node.etyp = type_new(TYPE_INT);
+ } else {
+ // Default to not changing the type
+ node.etyp = old_type;
+ }
+ return node;
+}
+
+fn type_check_binary(node: Node*, token: Token*): Node*
+{
+ let lhs = node.d.binary.lhs.etyp;
+ let rhs = node.d.binary.rhs.etyp;
+
+ if (lhs.typ == TYPE_STRUCT || rhs.typ == TYPE_STRUCT)
+ die_loc(here, &token.loc, "Performing invalid binary operation on struct type");
+
+ if (node.typ == AST_PLUS) {
+ if (is_int_type(lhs) && is_int_type(rhs)) {
+ node.etyp = type_new(TYPE_INT);
+ } else if (lhs.typ == TYPE_PTR && is_int_type(rhs)) {
+ node.etyp = lhs;
+ // Pointer arithmetic!
+ if (size_for_type(lhs.ptr) != 1) {
+ let mul = node_new(AST_MUL);
+ mul.d.binary.lhs = node.d.binary.rhs;
+ mul.d.binary.rhs = node_from_int_literal(size_for_type(lhs.ptr));
+ node.d.binary.rhs = mul;
+ }
+ } else if (is_int_type(lhs) && rhs.typ == TYPE_PTR) {
+ node.etyp = rhs;
+ // Pointer arithmetic!
+ if (size_for_type(rhs.ptr) != 1) {
+ let mul = node_new(AST_MUL);
+ mul.d.binary.lhs = node.d.binary.lhs;
+ mul.d.binary.rhs = node_from_int_literal(size_for_type(rhs.ptr));
+ node.d.binary.lhs = mul;
+ }
+ } else {
+ die_loc(here, &token.loc, "Cannot add non-integer types");
+ }
+ } else if (node.typ == AST_MINUS) {
+ if (is_int_type(lhs) && is_int_type(rhs)) {
+ node.etyp = type_new(TYPE_INT);
+ } else if (lhs.typ == TYPE_PTR && is_int_type(rhs)) {
+ node.etyp = lhs;
+ // Pointer arithmetic!
+ if (size_for_type(lhs.ptr) != 1) {
+ let mul = node_new(AST_MUL);
+ mul.d.binary.lhs = node.d.binary.rhs;
+ mul.d.binary.rhs = node_from_int_literal(size_for_type(lhs.ptr));
+ node.d.binary.rhs = mul;
+ }
+ } else if (is_int_type(lhs) && rhs.typ == TYPE_PTR) {
+ node.etyp = rhs;
+ // Pointer arithmetic!
+ if (size_for_type(rhs.ptr) != 1) {
+ let mul = node_new(AST_MUL);
+ mul.d.binary.lhs = node.d.binary.lhs;
+ mul.d.binary.rhs = node_from_int_literal(size_for_type(rhs.ptr));
+ node.d.binary.lhs = mul;
+ }
+ } else if (lhs.typ == TYPE_PTR && rhs.typ == TYPE_PTR) {
+ node.etyp = type_new(TYPE_INT);
+ if (!types_equal(lhs.ptr, rhs.ptr))
+ die_loc(here, &token.loc, "Cannot subtract pointers of different types");
+
+ // Pointer arithmetic! (Divide by size of element)
+ if (size_for_type(lhs.ptr) != 1) {
+ let mul = node_new(AST_MUL);
+ mul.d.binary.lhs = node.d.binary.lhs;
+ mul.d.binary.rhs = node_from_int_literal(size_for_type(lhs.ptr));
+ node.d.binary.lhs = mul;
+ }
+ } else {
+ die_loc(here, &token.loc, "Cannot subtract non-integer types");
+ }
+ } else if (node.typ == AST_MUL || node.typ == AST_DIV || node.typ == AST_MOD) {
+ if (is_int_type(lhs) && is_int_type(rhs)) {
+ node.etyp = lhs;
+ } else {
+ die_loc2(here, &token.loc, "Cannot do operation non-integer types:", node_type_to_string(node.typ));
+ }
+ } else {
+ // FIXME: This isn't very correct, but it's probably good enough for now
+ node.etyp = type_new(TYPE_INT);
+ }
+ return node;
} \ No newline at end of file
diff --git a/compiler/codegen.cup b/compiler/codegen.cup
index 68c3321..d0cf8ab 100644
--- a/compiler/codegen.cup
+++ b/compiler/codegen.cup
@@ -52,6 +52,8 @@ fn generate_lvalue_into_rax(node: Node*) {
let offset = node.d.variable.offset;
emit_asm(" mov rax, global_vars\n");
emit_asm(" add rax, "); emit_num(offset); emit_asm("\n");
+ } else if (node.typ == AST_DEREF) {
+ generate_expr_into_rax(node.d.unary);
} else {
die2("Unsupported type in generate_lvalue_into_rax: ", node_type_to_string(node.typ));
}
@@ -78,6 +80,9 @@ fn generate_expr_into_rax(node: Node*) {
} else {
die("Unsupported literal type in generate_expr_into_rax");
}
+ } else if (node.typ == AST_ADDROF) {
+ generate_lvalue_into_rax(node.d.unary);
+
} else if (node.typ == AST_CONDITIONAL) {
let label = ++gen_label_counter;
generate_expr_into_rax(node.d.conditional.cond);
diff --git a/compiler/parser.cup b/compiler/parser.cup
index a6a4f7b..d13a309 100644
--- a/compiler/parser.cup
+++ b/compiler/parser.cup
@@ -246,6 +246,7 @@ fn parse_function_call_args(lexer: Lexer*, func: Node*): Node* {
let node = node_new(AST_FUNCCALL);
node.d.call.func = func;
node.d.call.args = vector_new();
+ node.etyp = func.etyp;
lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN);
lexer_peek(lexer, &token);
@@ -280,7 +281,7 @@ fn parse_identifier(lexer: Lexer*): Node* {
node = node_new(AST_LOCAL_VAR);
node.d.variable = var;
node.etyp = var.typ;
- return node;
+ return decay_array_to_pointer(node, &token);
}
var = find_global_variable(&token);
@@ -288,7 +289,7 @@ fn parse_identifier(lexer: Lexer*): Node* {
node = node_new(AST_GLOBAL_VAR);
node.d.variable = var;
node.etyp = var.typ;
- return node;
+ return decay_array_to_pointer(node, &token);
}
let func = find_function_definition(&token);
@@ -320,11 +321,13 @@ fn parse_factor(lexer: Lexer*): Node* {
lexer_next(lexer, &token);
expr = node_new(AST_NEG);
expr.d.unary = parse_factor(lexer);
+ expr = type_check_unary(expr, &token);
} else if (token.typ == TOKEN_TILDE) {
lexer_next(lexer, &token);
expr = node_new(AST_BWINV);
expr.d.unary = parse_factor(lexer);
+ expr = type_check_unary(expr, &token);
} else if (token.typ == TOKEN_PLUSPLUS) {
lexer_next(lexer, &token);
@@ -336,7 +339,8 @@ fn parse_factor(lexer: Lexer*): Node* {
let plus = node_new(AST_PLUS);
plus.d.binary.lhs = expr.d.assign.lhs;
plus.d.binary.rhs = node_from_int_literal(1);
- expr.d.assign.rhs = plus;
+ expr.d.assign.rhs = type_check_binary(plus, &token);
+ expr.etyp = expr.d.assign.lhs.etyp;
// --x is changed to (x = x - 1)
} else if (token.typ == TOKEN_MINUSMINUS) {
@@ -349,7 +353,8 @@ fn parse_factor(lexer: Lexer*): Node* {
let minus = node_new(AST_MINUS);
minus.d.binary.lhs = expr.d.assign.lhs;
minus.d.binary.rhs = node_from_int_literal(1);
- expr.d.assign.rhs = minus;
+ expr.d.assign.rhs = type_check_binary(minus, &token);
+ expr.etyp = expr.d.assign.lhs.etyp;
// FIXME: This should probably go somewhere else more appropriate
} else if (token.typ == TOKEN_SIZEOF) {
@@ -363,6 +368,7 @@ fn parse_factor(lexer: Lexer*): Node* {
lexer_next(lexer, &token);
expr = node_new(AST_NOT);
expr.d.unary = parse_factor(lexer);
+ expr = type_check_unary(expr, &token);
} else if (token.typ == TOKEN_OPEN_PAREN) {
lexer_next(lexer, &token);
@@ -376,16 +382,55 @@ fn parse_factor(lexer: Lexer*): Node* {
expr = parse_identifier(lexer);
} else if (token.typ == TOKEN_AMPERSAND) {
- die("TOKEN_AMPERSAND not implemented in parse_factor");
+ lexer_next(lexer, &token);
+ expr = node_new(AST_ADDROF);
+ expr.d.unary = parse_factor(lexer);
+ if (!is_lvalue(expr.d.unary.typ))
+ die_loc(here, &token.loc, "Cannot take address of non-lvalue");
+ expr = type_check_unary(expr, &token);
} else if (token.typ == TOKEN_STAR) {
- die("TOKEN_STAR not implemented in parse_factor");
+ lexer_next(lexer, &token);
+ let subexp = parse_factor(lexer);
+ if (subexp.etyp.typ != TYPE_PTR)
+ die_loc(here, &token.loc, "Cannot dereference non-pointer type");
+
+ expr = node_new(AST_DEREF);
+ expr.d.unary = subexp;
+ expr = type_check_unary(expr, &token);
} else {
die_loc2(here, &token.loc, ": Unexpected token found in parse_factor: ", token_type_to_string(token.typ));
}
- // TODO: Handle postfix operators here.
+ let running = true;
+ while (running) {
+ lexer_peek(lexer, &token);
+ if (token.typ == TOKEN_OPEN_BRACKET) {
+ if (expr.etyp.typ != TYPE_PTR)
+ die_loc(here, &token.loc, "Cannot index non-pointer/array type");
+ lexer_next(lexer, &token);
+
+ let index = parse_expression(lexer);
+
+ let offset = node_new(AST_PLUS);
+ offset.d.binary.lhs = expr;
+ offset.d.binary.rhs = index;
+ offset = type_check_binary(offset, &token);
+
+ expr = node_new(AST_DEREF);
+ expr.d.unary = offset;
+ expr = type_check_unary(expr, &token);
+
+ lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACKET);
+
+ } else if (token.typ == TOKEN_DOT) {
+ die_loc(here, &token.loc, "Member access not implemented");
+
+ } else {
+ running = false;
+ }
+ }
return expr;
}
@@ -402,7 +447,7 @@ fn parse_term(lexer: Lexer*): Node* {
let rhs = parse_factor(lexer);
op.d.binary.lhs = lhs;
op.d.binary.rhs = rhs;
- lhs = op;
+ lhs = type_check_binary(op, &token);
lexer_peek(lexer, &token);
}
return lhs;
@@ -419,7 +464,7 @@ fn parse_additive(lexer: Lexer*): Node* {
let rhs = parse_term(lexer);
op.d.binary.lhs = lhs;
op.d.binary.rhs = rhs;
- lhs = op;
+ lhs = type_check_binary(op, &token);
lexer_peek(lexer, &token);
}
return lhs;
@@ -437,7 +482,7 @@ fn parse_relational(lexer: Lexer*): Node* {
let rhs = parse_additive(lexer);
op.d.binary.lhs = lhs;
op.d.binary.rhs = rhs;
- lhs = op;
+ lhs = type_check_binary(op, &token);
lexer_peek(lexer, &token);
}
return lhs;
@@ -454,7 +499,7 @@ fn parse_equality(lexer: Lexer*): Node* {
let rhs = parse_relational(lexer);
op.d.binary.lhs = lhs;
op.d.binary.rhs = rhs;
- lhs = op;
+ lhs = type_check_binary(op, &token);
lexer_peek(lexer, &token);
}
return lhs;
@@ -471,7 +516,7 @@ fn parse_and(lexer: Lexer*): Node* {
let rhs = parse_equality(lexer);
op.d.binary.lhs = lhs;
op.d.binary.rhs = rhs;
- lhs = op;
+ lhs = type_check_binary(op, &token);
lexer_peek(lexer, &token);
}
return lhs;
@@ -488,7 +533,7 @@ fn parse_exclusive_or(lexer: Lexer*): Node* {
let rhs = parse_and(lexer);
op.d.binary.lhs = lhs;
op.d.binary.rhs = rhs;
- lhs = op;
+ lhs = type_check_binary(op, &token);
lexer_peek(lexer, &token);
}
return lhs;
@@ -505,7 +550,7 @@ fn parse_inclusive_or(lexer: Lexer*): Node* {
let rhs = parse_exclusive_or(lexer);
op.d.binary.lhs = lhs;
op.d.binary.rhs = rhs;
- lhs = op;
+ lhs = type_check_binary(op, &token);
lexer_peek(lexer, &token);
}
return lhs;
@@ -522,7 +567,7 @@ fn parse_logical_and(lexer: Lexer*): Node* {
let rhs = parse_inclusive_or(lexer);
op.d.binary.lhs = lhs;
op.d.binary.rhs = rhs;
- lhs = op;
+ lhs = type_check_binary(op, &token);
lexer_peek(lexer, &token);
}
return lhs;
@@ -539,7 +584,7 @@ fn parse_logical_or(lexer: Lexer*): Node* {
let rhs = parse_logical_and(lexer);
op.d.binary.lhs = lhs;
op.d.binary.rhs = rhs;
- lhs = op;
+ lhs = type_check_binary(op, &token);
lexer_peek(lexer, &token);
}
return lhs;
@@ -561,7 +606,11 @@ fn parse_conditional_exp(lexer: Lexer*): Node* {
cond.d.conditional.then = then;
cond.d.conditional.els = els;
+ if (!types_equal(then.etyp, els.etyp))
+ die_loc(here, &token.loc, "THEN and ELSE branches of conditional expression have different types");
+
lhs = cond;
+ lhs.etyp = then.etyp;
}
return lhs;
}
@@ -615,6 +664,9 @@ fn parse_var_declaration(lexer: Lexer*): Node* {
lexer_next_assert(lexer, &token, TOKEN_LET);
lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
+ if (identifier_exists(&token))
+ die_loc2(here, &token.loc, "Identifier already defined: %s", token.value.as_string);
+
let is_global = (p_current_function == null);
// TODO: check if identifier is already defined
@@ -623,18 +675,19 @@ fn parse_var_declaration(lexer: Lexer*): Node* {
decl.var.name = token.value.as_string;
lexer_peek(lexer, &token);
- // TODO: implicit types when type system is better
- lexer_next_assert(lexer, &token, TOKEN_COLON);
- decl.var.typ = parse_type(lexer);
+ let missing_type = true;
+ if (token.typ == TOKEN_COLON) {
+ lexer_next(lexer, &token);
+ decl.var.typ = parse_type(lexer);
+ missing_type = false;
+ lexer_peek(lexer, &token);
+ }
- lexer_peek(lexer, &token);
if (token.typ == TOKEN_ASSIGN) {
lexer_next(lexer, &token);
decl.init = parse_expression(lexer);
- }
-
- // FIXME: This will need to be enabled at some point
- if (false) {
+ decl.var.typ = decl.init.etyp;
+ } else if (missing_type) {
die_loc(here, &token.loc, "Expected ':' or '=' after variable declaration");
}
@@ -893,7 +946,7 @@ fn parse_program(lexer: Lexer*): Node* {
parser_open_new_file(path);
lexer = vector_top(p_lexer_stack);
} else {
- die_loc2(here, &token.loc, "unexpected token in parse_program", token_type_to_string(token.typ));
+ die_loc2(here, &token.loc, "unexpected token in parse_program: ", token_type_to_string(token.typ));
}
lexer_peek(lexer, &token);
diff --git a/compiler/types.cup b/compiler/types.cup
index e6cb233..2addd95 100644
--- a/compiler/types.cup
+++ b/compiler/types.cup
@@ -89,4 +89,46 @@ fn create_type_string(typ: Type *): char* {
else die("type_to_string: unknown type");
return buf;
-} \ No newline at end of file
+}
+
+fn is_int_type(typ: Type*): int {
+ if (typ.typ == TYPE_INT) return true;
+ if (typ.typ == TYPE_CHAR) return true;
+ return false;
+}
+
+
+fn types_equal(a: Type*, b: Type*): int {
+ if (a == null && b == null)
+ return true;
+ if (a == null || b == null)
+ return false;
+ if (a.typ == TYPE_ANY || b.typ == TYPE_ANY)
+ return true;
+ return a.typ == b.typ && types_equal(a.ptr, b.ptr);
+}
+
+fn is_convertible(from: Type*, to: Type*): int {
+ if (from.typ == TYPE_ANY || to.typ == TYPE_ANY)
+ return true;
+
+ // Allow converstions to and from void* to any other pointer type
+ if (from.typ == TYPE_PTR && to.typ == TYPE_PTR)
+ if (from.ptr.typ == TYPE_VOID || to.ptr.typ == TYPE_VOID)
+ return true;
+
+ // TODO: Should we rause a warning if the target type is narrower
+ // than the source type?
+ if (is_int_type(from) && is_int_type(to))
+ return true;
+ return types_equal(from, to);
+}
+
+fn is_struct_or_structptr(typ: Type*): int {
+ if (typ.typ == TYPE_STRUCT || typ.typ == TYPE_UNION)
+ return true;
+ if (typ.typ == TYPE_PTR)
+ if (typ.ptr.typ == TYPE_STRUCT || typ.ptr.typ == TYPE_UNION)
+ return true;
+ return false;
+}