[cup] Port over all the type-checking/pointer arithmetic stuff

There's still some work to be done when checking function call args, etc, but the general mechanism of actually propagating the type back up the AST is now here from the C compiler. ... Not to say that was very good, but it's passable enough for now. More improvements to come in the future!
author: Mustafa Quraish <[email protected]> 2022-02-06 23:13:05 -0500
committer: Mustafa Quraish <[email protected]> 2022-02-07 03:18:08 -0500
commit: 502067ade0e38e4b84134d729dee3e34c8d2890e (patch)
tree: 605f404331e479f830595a8c6ec635be718fbf17
parent: [cup] Add ability to import files (diff)
download: cup-502067ade0e38e4b84134d729dee3e34c8d2890e.tar.xz
cup-502067ade0e38e4b84134d729dee3e34c8d2890e.zip
4 files changed, 252 insertions, 26 deletions
diff --git a/compiler/ast.cup b/compiler/ast.cup
index 2ba677b..b452241 100644
--- a/compiler/ast.cup
+++ b/compiler/ast.cup
@@ -336,4 +336,130 @@ fn dump_ast(node: Node*, depth: int) {
     } else {
         putsln(node_type_to_string(node.typ));
     }
+}
+
+// Defined below.
+fn type_check_unary(node: Node*, token: Token*): Node*;
+
+fn decay_array_to_pointer(node: Node*, token: Token*): Node* {
+    // We can only take the address of an lvalue, so we need to ensure that
+    if (is_lvalue(node.typ) && node.etyp.typ == TYPE_ARRAY) {
+        let address = node_new(AST_ADDROF);
+        address.d.unary = node;
+        address = type_check_unary(address, token);
+        node = address;
+    }
+    return node;
+}
+
+fn type_check_unary(node: Node*, token: Token*): Node* {
+    let old_type = node.d.unary.etyp;
+
+    if (node.typ != AST_ADDROF && old_type.typ == TYPE_STRUCT)
+        die_loc(here, &token.loc, "Performing invalid unary operation on struct type");
+
+    if (node.typ == AST_NOT) {
+        node.etyp = type_new(TYPE_INT);
+    } else if (node.typ == AST_ADDROF) {
+        let ptr = type_new(TYPE_PTR);
+        // The address of an array is a pointer to the first element
+        ptr.ptr = old_type.typ == TYPE_ARRAY ? old_type.ptr : old_type;
+        node.etyp = ptr;
+    } else if (node.typ == AST_DEREF) {
+        if (old_type.typ != TYPE_PTR)
+            die_loc(here, &token.loc, "Cannot dereference non-pointer type");
+        node.etyp = old_type.ptr;
+        // If the dereferenced type is an array, we need to decay it to a
+        // pointer to the first element.
+        node = decay_array_to_pointer(node, token);
+    } else if (node.typ == AST_NEG) {
+        if (old_type.typ != TYPE_INT)
+            die_loc(here, &token.loc, "Cannot negate non-integer type");
+        node.etyp = type_new(TYPE_INT);
+    } else {
+        // Default to not changing the type
+        node.etyp = old_type;
+    }
+    return node;
+}
+
+fn type_check_binary(node: Node*, token: Token*): Node*
+{
+    let lhs = node.d.binary.lhs.etyp;
+    let rhs = node.d.binary.rhs.etyp;
+
+    if (lhs.typ == TYPE_STRUCT || rhs.typ == TYPE_STRUCT)
+        die_loc(here, &token.loc, "Performing invalid binary operation on struct type");
+
+    if (node.typ == AST_PLUS) {
+        if (is_int_type(lhs) && is_int_type(rhs)) {
+            node.etyp = type_new(TYPE_INT);
+        } else if (lhs.typ == TYPE_PTR && is_int_type(rhs)) {
+            node.etyp = lhs;
+            // Pointer arithmetic!
+            if (size_for_type(lhs.ptr) != 1) {
+                let mul = node_new(AST_MUL);
+                mul.d.binary.lhs = node.d.binary.rhs;
+                mul.d.binary.rhs = node_from_int_literal(size_for_type(lhs.ptr));
+                node.d.binary.rhs = mul;
+            }
+        } else if (is_int_type(lhs) && rhs.typ == TYPE_PTR) {
+            node.etyp = rhs;
+            // Pointer arithmetic!
+            if (size_for_type(rhs.ptr) != 1) {
+                let mul = node_new(AST_MUL);
+                mul.d.binary.lhs = node.d.binary.lhs;
+                mul.d.binary.rhs = node_from_int_literal(size_for_type(rhs.ptr));
+                node.d.binary.lhs = mul;
+            }
+        } else {
+            die_loc(here, &token.loc, "Cannot add non-integer types");
+        }
+    } else if (node.typ == AST_MINUS) {
+        if (is_int_type(lhs) && is_int_type(rhs)) {
+            node.etyp = type_new(TYPE_INT);
+        } else if (lhs.typ == TYPE_PTR && is_int_type(rhs)) {
+            node.etyp = lhs;
+            // Pointer arithmetic!
+            if (size_for_type(lhs.ptr) != 1) {
+                let mul = node_new(AST_MUL);
+                mul.d.binary.lhs = node.d.binary.rhs;
+                mul.d.binary.rhs = node_from_int_literal(size_for_type(lhs.ptr));
+                node.d.binary.rhs = mul;
+            }
+        } else if (is_int_type(lhs) && rhs.typ == TYPE_PTR) {
+            node.etyp = rhs;
+            // Pointer arithmetic!
+            if (size_for_type(rhs.ptr) != 1) {
+                let mul = node_new(AST_MUL);
+                mul.d.binary.lhs = node.d.binary.lhs;
+                mul.d.binary.rhs = node_from_int_literal(size_for_type(rhs.ptr));
+                node.d.binary.lhs = mul;
+            }
+        } else if (lhs.typ == TYPE_PTR && rhs.typ == TYPE_PTR) {
+            node.etyp = type_new(TYPE_INT);
+            if (!types_equal(lhs.ptr, rhs.ptr))
+                die_loc(here, &token.loc, "Cannot subtract pointers of different types");
+
+            // Pointer arithmetic! (Divide by size of element)
+            if (size_for_type(lhs.ptr) != 1) {
+                let mul = node_new(AST_MUL);
+                mul.d.binary.lhs = node.d.binary.lhs;
+                mul.d.binary.rhs = node_from_int_literal(size_for_type(lhs.ptr));
+                node.d.binary.lhs = mul;
+            }
+        } else {
+            die_loc(here, &token.loc, "Cannot subtract non-integer types");
+        }
+    } else if (node.typ == AST_MUL || node.typ == AST_DIV || node.typ == AST_MOD) {
+        if (is_int_type(lhs) && is_int_type(rhs)) {
+            node.etyp = lhs;
+        } else {
+            die_loc2(here, &token.loc, "Cannot do operation non-integer types:", node_type_to_string(node.typ));
+        }
+    } else {
+        // FIXME: This isn't very correct, but it's probably good enough for now
+        node.etyp = type_new(TYPE_INT);
+    }
+    return node;
 }
 \ No newline at end of file
diff --git a/compiler/codegen.cup b/compiler/codegen.cup
index 68c3321..d0cf8ab 100644
--- a/compiler/codegen.cup
+++ b/compiler/codegen.cup
@@ -52,6 +52,8 @@ fn generate_lvalue_into_rax(node: Node*) {
         let offset = node.d.variable.offset;
         emit_asm("    mov rax, global_vars\n");
         emit_asm("    add rax, "); emit_num(offset); emit_asm("\n");
+    } else if (node.typ == AST_DEREF) {
+        generate_expr_into_rax(node.d.unary);
     } else {
         die2("Unsupported type in generate_lvalue_into_rax: ", node_type_to_string(node.typ));
     }
@@ -78,6 +80,9 @@ fn generate_expr_into_rax(node: Node*) {
         } else {
             die("Unsupported literal type in generate_expr_into_rax");
         }
+    } else if (node.typ == AST_ADDROF) {
+        generate_lvalue_into_rax(node.d.unary);
+
     } else if (node.typ == AST_CONDITIONAL) {
         let label = ++gen_label_counter;
         generate_expr_into_rax(node.d.conditional.cond);
diff --git a/compiler/parser.cup b/compiler/parser.cup
index a6a4f7b..d13a309 100644
--- a/compiler/parser.cup
+++ b/compiler/parser.cup
@@ -246,6 +246,7 @@ fn parse_function_call_args(lexer: Lexer*, func: Node*): Node* {
     let node = node_new(AST_FUNCCALL);
     node.d.call.func = func;
     node.d.call.args = vector_new();
+    node.etyp = func.etyp;
     
     lexer_next_assert(lexer, &token, TOKEN_OPEN_PAREN);
     lexer_peek(lexer, &token);
@@ -280,7 +281,7 @@ fn parse_identifier(lexer: Lexer*): Node* {
         node = node_new(AST_LOCAL_VAR);
         node.d.variable = var;
         node.etyp = var.typ;
-        return node;
+        return decay_array_to_pointer(node, &token);
     }
 
     var = find_global_variable(&token);
@@ -288,7 +289,7 @@ fn parse_identifier(lexer: Lexer*): Node* {
         node = node_new(AST_GLOBAL_VAR);
         node.d.variable = var;
         node.etyp = var.typ;
-        return node;
+        return decay_array_to_pointer(node, &token);
     }
 
     let func = find_function_definition(&token);
@@ -320,11 +321,13 @@ fn parse_factor(lexer: Lexer*): Node* {
         lexer_next(lexer, &token);
         expr = node_new(AST_NEG);
         expr.d.unary = parse_factor(lexer);
+        expr = type_check_unary(expr, &token);
 
     } else if (token.typ == TOKEN_TILDE) {
         lexer_next(lexer, &token);
         expr = node_new(AST_BWINV);
         expr.d.unary = parse_factor(lexer);
+        expr = type_check_unary(expr, &token);
 
     } else if (token.typ == TOKEN_PLUSPLUS) {
         lexer_next(lexer, &token);
@@ -336,7 +339,8 @@ fn parse_factor(lexer: Lexer*): Node* {
         let plus = node_new(AST_PLUS);
         plus.d.binary.lhs = expr.d.assign.lhs;
         plus.d.binary.rhs = node_from_int_literal(1);
-        expr.d.assign.rhs = plus;
+        expr.d.assign.rhs = type_check_binary(plus, &token);
+        expr.etyp = expr.d.assign.lhs.etyp;
 
     // --x is changed to (x = x - 1)
     } else if (token.typ == TOKEN_MINUSMINUS) {
@@ -349,7 +353,8 @@ fn parse_factor(lexer: Lexer*): Node* {
         let minus = node_new(AST_MINUS);
         minus.d.binary.lhs = expr.d.assign.lhs;
         minus.d.binary.rhs = node_from_int_literal(1);
-        expr.d.assign.rhs = minus;
+        expr.d.assign.rhs = type_check_binary(minus, &token);
+        expr.etyp = expr.d.assign.lhs.etyp;
 
     // FIXME: This should probably go somewhere else more appropriate
     } else if (token.typ == TOKEN_SIZEOF) {
@@ -363,6 +368,7 @@ fn parse_factor(lexer: Lexer*): Node* {
         lexer_next(lexer, &token);
         expr = node_new(AST_NOT);
         expr.d.unary = parse_factor(lexer);
+        expr = type_check_unary(expr, &token);
 
     } else if (token.typ == TOKEN_OPEN_PAREN) {
         lexer_next(lexer, &token);
@@ -376,16 +382,55 @@ fn parse_factor(lexer: Lexer*): Node* {
         expr = parse_identifier(lexer);
 
     } else if (token.typ == TOKEN_AMPERSAND) {
-        die("TOKEN_AMPERSAND not implemented in parse_factor");
+        lexer_next(lexer, &token);
+        expr = node_new(AST_ADDROF);
+        expr.d.unary = parse_factor(lexer);
+        if (!is_lvalue(expr.d.unary.typ))
+            die_loc(here, &token.loc, "Cannot take address of non-lvalue");
+        expr = type_check_unary(expr, &token);
 
     } else if (token.typ == TOKEN_STAR) {
-        die("TOKEN_STAR not implemented in parse_factor");
+        lexer_next(lexer, &token);
+        let subexp = parse_factor(lexer);
+        if (subexp.etyp.typ != TYPE_PTR)
+            die_loc(here, &token.loc, "Cannot dereference non-pointer type");
+
+        expr = node_new(AST_DEREF);
+        expr.d.unary = subexp;
+        expr = type_check_unary(expr, &token);
 
     } else {
         die_loc2(here, &token.loc, ": Unexpected token found in parse_factor: ", token_type_to_string(token.typ));
     }
 
-    // TODO: Handle postfix operators here.
+    let running = true;
+    while (running) {
+        lexer_peek(lexer, &token);
+        if (token.typ == TOKEN_OPEN_BRACKET) {
+            if (expr.etyp.typ != TYPE_PTR)
+                die_loc(here, &token.loc, "Cannot index non-pointer/array type");
+            lexer_next(lexer, &token);
+
+            let index = parse_expression(lexer);
+
+            let offset = node_new(AST_PLUS);
+            offset.d.binary.lhs = expr;
+            offset.d.binary.rhs = index;
+            offset = type_check_binary(offset, &token);
+            
+            expr = node_new(AST_DEREF);
+            expr.d.unary = offset;
+            expr = type_check_unary(expr, &token);
+
+            lexer_next_assert(lexer, &token, TOKEN_CLOSE_BRACKET);
+            
+        } else if (token.typ == TOKEN_DOT) {
+            die_loc(here, &token.loc, "Member access not implemented");
+
+        } else {
+            running = false;
+        }
+    }
 
     return expr;
 }
@@ -402,7 +447,7 @@ fn parse_term(lexer: Lexer*): Node* {
         let rhs = parse_factor(lexer);
         op.d.binary.lhs = lhs;
         op.d.binary.rhs = rhs;
-        lhs = op;
+        lhs = type_check_binary(op, &token);
         lexer_peek(lexer, &token);
     }
     return lhs;
@@ -419,7 +464,7 @@ fn parse_additive(lexer: Lexer*): Node* {
         let rhs = parse_term(lexer);
         op.d.binary.lhs = lhs;
         op.d.binary.rhs = rhs;
-        lhs = op;
+        lhs = type_check_binary(op, &token);
         lexer_peek(lexer, &token);
     }
     return lhs;
@@ -437,7 +482,7 @@ fn parse_relational(lexer: Lexer*): Node* {
         let rhs = parse_additive(lexer);
         op.d.binary.lhs = lhs;
         op.d.binary.rhs = rhs;
-        lhs = op;
+        lhs = type_check_binary(op, &token);
         lexer_peek(lexer, &token);
     }
     return lhs;
@@ -454,7 +499,7 @@ fn parse_equality(lexer: Lexer*): Node* {
         let rhs = parse_relational(lexer);
         op.d.binary.lhs = lhs;
         op.d.binary.rhs = rhs;
-        lhs = op;
+        lhs = type_check_binary(op, &token);
         lexer_peek(lexer, &token);
     }
     return lhs;
@@ -471,7 +516,7 @@ fn parse_and(lexer: Lexer*): Node* {
         let rhs = parse_equality(lexer);
         op.d.binary.lhs = lhs;
         op.d.binary.rhs = rhs;
-        lhs = op;
+        lhs = type_check_binary(op, &token);
         lexer_peek(lexer, &token);
     }
     return lhs;
@@ -488,7 +533,7 @@ fn parse_exclusive_or(lexer: Lexer*): Node* {
         let rhs = parse_and(lexer);
         op.d.binary.lhs = lhs;
         op.d.binary.rhs = rhs;
-        lhs = op;
+        lhs = type_check_binary(op, &token);
         lexer_peek(lexer, &token);
     }
     return lhs;
@@ -505,7 +550,7 @@ fn parse_inclusive_or(lexer: Lexer*): Node* {
         let rhs = parse_exclusive_or(lexer);
         op.d.binary.lhs = lhs;
         op.d.binary.rhs = rhs;
-        lhs = op;
+        lhs = type_check_binary(op, &token);
         lexer_peek(lexer, &token);
     }
     return lhs;
@@ -522,7 +567,7 @@ fn parse_logical_and(lexer: Lexer*): Node* {
         let rhs = parse_inclusive_or(lexer);
         op.d.binary.lhs = lhs;
         op.d.binary.rhs = rhs;
-        lhs = op;
+        lhs = type_check_binary(op, &token);
         lexer_peek(lexer, &token);
     }
     return lhs;
@@ -539,7 +584,7 @@ fn parse_logical_or(lexer: Lexer*): Node* {
         let rhs = parse_logical_and(lexer);
         op.d.binary.lhs = lhs;
         op.d.binary.rhs = rhs;
-        lhs = op;
+        lhs = type_check_binary(op, &token);
         lexer_peek(lexer, &token);
     }
     return lhs;
@@ -561,7 +606,11 @@ fn parse_conditional_exp(lexer: Lexer*): Node* {
         cond.d.conditional.then = then;
         cond.d.conditional.els = els;
 
+        if (!types_equal(then.etyp, els.etyp))
+            die_loc(here, &token.loc, "THEN and ELSE branches of conditional expression have different types");
+
         lhs = cond;
+        lhs.etyp = then.etyp;
     }
     return lhs;
 }
@@ -615,6 +664,9 @@ fn parse_var_declaration(lexer: Lexer*): Node* {
     lexer_next_assert(lexer, &token, TOKEN_LET);
     lexer_next_assert(lexer, &token, TOKEN_IDENTIFIER);
 
+    if (identifier_exists(&token))
+        die_loc2(here, &token.loc, "Identifier already defined: %s", token.value.as_string);
+
     let is_global = (p_current_function == null);
 
     // TODO: check if identifier is already defined
@@ -623,18 +675,19 @@ fn parse_var_declaration(lexer: Lexer*): Node* {
     decl.var.name = token.value.as_string;
 
     lexer_peek(lexer, &token);
-    // TODO: implicit types when type system is better
-    lexer_next_assert(lexer, &token, TOKEN_COLON);
-    decl.var.typ = parse_type(lexer);
+    let missing_type = true;
+    if (token.typ == TOKEN_COLON) {
+        lexer_next(lexer, &token);
+        decl.var.typ = parse_type(lexer);
+        missing_type = false;
+        lexer_peek(lexer, &token);
+    }
     
-    lexer_peek(lexer, &token);
     if (token.typ == TOKEN_ASSIGN) {
         lexer_next(lexer, &token);
         decl.init = parse_expression(lexer);
-    } 
-    
-    // FIXME: This will need to be enabled at some point
-    if (false) {
+        decl.var.typ = decl.init.etyp;
+    } else if (missing_type) {
         die_loc(here, &token.loc, "Expected ':' or '=' after variable declaration");
     }
 
@@ -893,7 +946,7 @@ fn parse_program(lexer: Lexer*): Node* {
             parser_open_new_file(path);
             lexer = vector_top(p_lexer_stack);
         } else {
-            die_loc2(here, &token.loc, "unexpected token in parse_program", token_type_to_string(token.typ));
+            die_loc2(here, &token.loc, "unexpected token in parse_program: ", token_type_to_string(token.typ));
         }
 
         lexer_peek(lexer, &token);
diff --git a/compiler/types.cup b/compiler/types.cup
index e6cb233..2addd95 100644
--- a/compiler/types.cup
+++ b/compiler/types.cup
@@ -89,4 +89,46 @@ fn create_type_string(typ: Type *): char* {
     else  die("type_to_string: unknown type");
 
     return buf;
-}
-\ No newline at end of file
+}
+
+fn is_int_type(typ: Type*): int {
+    if (typ.typ == TYPE_INT) return true;
+    if (typ.typ == TYPE_CHAR) return true;
+    return false;
+}
+
+
+fn types_equal(a: Type*, b: Type*): int {
+    if (a == null && b == null)
+        return true;
+    if (a == null || b == null)
+        return false;
+    if (a.typ == TYPE_ANY || b.typ == TYPE_ANY)
+        return true;
+    return a.typ == b.typ && types_equal(a.ptr, b.ptr);
+}
+
+fn is_convertible(from: Type*, to: Type*): int {
+    if (from.typ == TYPE_ANY || to.typ == TYPE_ANY)
+        return true;
+
+    // Allow converstions to and from void* to any other pointer type
+    if (from.typ == TYPE_PTR && to.typ == TYPE_PTR)
+        if (from.ptr.typ == TYPE_VOID || to.ptr.typ == TYPE_VOID)
+            return true;
+
+    // TODO: Should we rause a warning if the target type is narrower
+    //       than the source type?
+    if (is_int_type(from) && is_int_type(to))
+        return true;
+    return types_equal(from, to);
+}
+
+fn is_struct_or_structptr(typ: Type*): int {
+    if (typ.typ == TYPE_STRUCT || typ.typ == TYPE_UNION)
+        return true;
+    if (typ.typ == TYPE_PTR)
+        if (typ.ptr.typ == TYPE_STRUCT || typ.ptr.typ == TYPE_UNION)
+            return true;
+    return false;
+}
author	Mustafa Quraish <[email protected]>	2022-02-06 23:13:05 -0500
committer	Mustafa Quraish <[email protected]>	2022-02-07 03:18:08 -0500
commit	502067ade0e38e4b84134d729dee3e34c8d2890e (patch)
tree	605f404331e479f830595a8c6ec635be718fbf17
parent	[cup] Add ability to import files (diff)
download	cup-502067ade0e38e4b84134d729dee3e34c8d2890e.tar.xz cup-502067ade0e38e4b84134d729dee3e34c8d2890e.zip