diff options
| author | Mustafa Quraish <[email protected]> | 2022-01-29 00:20:09 -0500 |
|---|---|---|
| committer | Mustafa Quraish <[email protected]> | 2022-01-29 00:42:58 -0500 |
| commit | 08056428cec5d1e63e471590ac09d82dcbfcab93 (patch) | |
| tree | e71c34fb2a47780a6e01d8fa0a2af4b3bd3e7130 | |
| parent | Add some arithmetic binary operations into lex+parse+generation (diff) | |
| download | cup-08056428cec5d1e63e471590ac09d82dcbfcab93.tar.xz cup-08056428cec5d1e63e471590ac09d82dcbfcab93.zip | |
Add relational and logical operators + refactor binop parser
We now support OR and AND with short circuiting! (Yet to be tested
since we don't yet have local variables to play with).
The binop parser took a bit of an overhaul factoring out the common
code so that it's easier to describe the operator precendence
relationships without being overly repetitive.
| -rw-r--r-- | cup/generator.c | 120 | ||||
| -rw-r--r-- | cup/lexer.c | 23 | ||||
| -rw-r--r-- | cup/parser.c | 66 | ||||
| -rw-r--r-- | examples/comparisons.cup | 3 | ||||
| -rwxr-xr-x | test.sh | 2 | ||||
| -rw-r--r-- | tests/basics.sh | 35 |
6 files changed, 198 insertions, 51 deletions
diff --git a/cup/generator.c b/cup/generator.c index 6018e47..7bddac3 100644 --- a/cup/generator.c +++ b/cup/generator.c @@ -7,10 +7,12 @@ #include <string.h> #include <assert.h> +static int label_counter = 0; // The evaluated expression is stored into `rax` void generate_expr_into_rax(Node *expr, FILE *out) { + // TODO: Different sized output for different types? if (expr->type == AST_LITERAL) { // TODO: More literal types assert(expr->literal.type.type == TYPE_INT); @@ -22,6 +24,7 @@ void generate_expr_into_rax(Node *expr, FILE *out) } else if (expr->type == OP_NOT) { generate_expr_into_rax(expr->unary_expr, out); + // Booleanize fprintf(out, " cmp rax, 0\n"); fprintf(out, " sete al\n"); fprintf(out, " movzx rax, al\n"); @@ -31,38 +34,131 @@ void generate_expr_into_rax(Node *expr, FILE *out) fprintf(out, " not rax\n"); } else if (expr->type == OP_PLUS) { - generate_expr_into_rax(expr->binary.left, out); - fprintf(out, " push rax\n"); generate_expr_into_rax(expr->binary.right, out); + fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); fprintf(out, " pop rbx\n"); fprintf(out, " add rax, rbx\n"); } else if (expr->type == OP_MINUS) { - generate_expr_into_rax(expr->binary.left, out); - fprintf(out, " push rax\n"); generate_expr_into_rax(expr->binary.right, out); - fprintf(out, " mov rbx, rax\n"); - fprintf(out, " pop rax\n"); + fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); + fprintf(out, " pop rbx\n"); fprintf(out, " sub rax, rbx\n"); } else if (expr->type == OP_DIV) { - generate_expr_into_rax(expr->binary.left, out); - fprintf(out, " push rax\n"); generate_expr_into_rax(expr->binary.right, out); - fprintf(out, " mov rbx, rax\n"); - fprintf(out, " pop rax\n"); + fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); + fprintf(out, " pop rbx\n"); fprintf(out, " cqo\n"); fprintf(out, " idiv rbx\n"); } else if (expr->type == OP_MUL) { + generate_expr_into_rax(expr->binary.right, out); + fprintf(out, " push rax\n"); generate_expr_into_rax(expr->binary.left, out); + fprintf(out, " pop rbx\n"); + fprintf(out, " imul rbx\n"); + + // TODO: Compress these, there's barely any differences + } else if (expr->type == OP_EQ) { + generate_expr_into_rax(expr->binary.right, out); fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); + fprintf(out, " pop rbx\n"); + fprintf(out, " cmp rax, rbx\n"); + fprintf(out, " sete al\n"); + fprintf(out, " movzx rax, al\n"); + + } else if (expr->type == OP_NEQ) { generate_expr_into_rax(expr->binary.right, out); + fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); fprintf(out, " pop rbx\n"); - fprintf(out, " imul rbx\n"); + fprintf(out, " cmp rax, rbx\n"); + fprintf(out, " setne al\n"); + fprintf(out, " movzx rax, al\n"); + + } else if (expr->type == OP_LT) { + generate_expr_into_rax(expr->binary.right, out); + fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); + fprintf(out, " pop rbx\n"); + fprintf(out, " cmp rax, rbx\n"); + fprintf(out, " setl al\n"); + fprintf(out, " movzx rax, al\n"); + + } else if (expr->type == OP_LEQ) { + generate_expr_into_rax(expr->binary.right, out); + fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); + fprintf(out, " pop rbx\n"); + fprintf(out, " cmp rax, rbx\n"); + fprintf(out, " setle al\n"); + fprintf(out, " movzx rax, al\n"); + + } else if (expr->type == OP_GT) { + generate_expr_into_rax(expr->binary.right, out); + fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); + fprintf(out, " pop rbx\n"); + fprintf(out, " cmp rax, rbx\n"); + fprintf(out, " setg al\n"); + fprintf(out, " movzx rax, al\n"); + + } else if (expr->type == OP_GEQ) { + generate_expr_into_rax(expr->binary.right, out); + fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); + fprintf(out, " pop rbx\n"); + fprintf(out, " cmp rax, rbx\n"); + fprintf(out, " setge al\n"); + fprintf(out, " movzx rax, al\n"); + + } else if (expr->type == OP_GEQ) { + generate_expr_into_rax(expr->binary.right, out); + fprintf(out, " push rax\n"); + generate_expr_into_rax(expr->binary.left, out); + fprintf(out, " pop rbx\n"); + fprintf(out, " cmp rax, rbx\n"); + fprintf(out, " setge al\n"); + fprintf(out, " movzx rax, al\n"); + + // Note: These are different because of short-circuit evaluation! + } else if (expr->type == OP_OR) { + generate_expr_into_rax(expr->binary.left, out); + // If left is true, we can short-circuit + fprintf(out, " cmp rax, 0\n"); + fprintf(out, " je .or_right_%d\n", label_counter); + fprintf(out, " mov rax, 1\n"); + fprintf(out, " jmp .or_end_%d\n", label_counter); + fprintf(out, ".or_right_%d:\n", label_counter); + generate_expr_into_rax(expr->binary.right, out); + // Booleanize the result + fprintf(out, " cmp rax, 0\n"); + fprintf(out, " setne al\n"); + fprintf(out, ".or_end_%d:\n", label_counter); + label_counter++; + + } else if (expr->type == OP_AND) { + generate_expr_into_rax(expr->binary.left, out); + // If left is true, we can short-circuit + fprintf(out, " cmp rax, 0\n"); + fprintf(out, " jne .and_right_%d\n", label_counter); + fprintf(out, " mov rax, 0\n"); + fprintf(out, " jmp .and_end_%d\n", label_counter); + fprintf(out, ".and_right_%d:\n", label_counter); + generate_expr_into_rax(expr->binary.right, out); + // Booleanize the result + fprintf(out, " cmp rax, 0\n"); + fprintf(out, " setne al\n"); + fprintf(out, ".and_end_%d:\n", label_counter); + label_counter++; } else { - fprintf(stderr, "Unsupported expression type in generate_expr: %d\n", expr->type); + fprintf(stderr, "Unsupported expression type in generate_expr: `%s`\n", node_type_to_str(expr->type)); exit(1); } } diff --git a/cup/lexer.c b/cup/lexer.c index b88d321..3b9e070 100644 --- a/cup/lexer.c +++ b/cup/lexer.c @@ -92,10 +92,20 @@ Token Lexer_next(Lexer *lexer) case '}': return Lexer_make_token(lexer, TOKEN_CLOSE_BRACE, 1); case ';': return Lexer_make_token(lexer, TOKEN_SEMICOLON, 1); case ':': return Lexer_make_token(lexer, TOKEN_COLON, 1); - case '&': return Lexer_make_token(lexer, TOKEN_AMPERSAND, 1); case '~': return Lexer_make_token(lexer, TOKEN_TILDE, 1); - case '!': return Lexer_make_token(lexer, TOKEN_EXCLAMATION, 1); - + + case '&': { + if (peek(lexer, 1) == '&') + return Lexer_make_token(lexer, TOKEN_AND, 2); + return Lexer_make_token(lexer, TOKEN_AMPERSAND, 1); + } + + case '!': { + if (peek(lexer, 1) == '=') + return Lexer_make_token(lexer, TOKEN_NEQ, 2); + return Lexer_make_token(lexer, TOKEN_EXCLAMATION, 1); + } + case '<': { if (peek(lexer, 1) == '=') return Lexer_make_token(lexer, TOKEN_LEQ, 2); @@ -114,6 +124,13 @@ Token Lexer_next(Lexer *lexer) return Lexer_make_token(lexer, TOKEN_ASSIGN, 1); } + case '|': { + if (peek(lexer, 1) == '|') + return Lexer_make_token(lexer, TOKEN_OR, 2); + return Lexer_make_token(lexer, TOKEN_BAR, 1); + } + + case '+': { if (peek(lexer, 1) == '+') return Lexer_make_token(lexer, TOKEN_PLUSPLUS, 2); diff --git a/cup/parser.c b/cup/parser.c index a6ea9b2..68bd22d 100644 --- a/cup/parser.c +++ b/cup/parser.c @@ -122,41 +122,37 @@ Node *parse_factor(Lexer *lexer) return expr; } -Node *parse_term(Lexer *lexer) -{ - Node *expr = parse_factor(lexer); - Token token = Lexer_peek(lexer); - while (token.type == TOKEN_STAR || - token.type == TOKEN_SLASH || - token.type == TOKEN_PERCENT) { - Lexer_next(lexer); - Node *op = Node_new(binary_token_to_op(token.type)); - Node *right = parse_factor(lexer); - op->binary.left = expr; - op->binary.right = right; - expr = op; - token = Lexer_peek(lexer); - } - return expr; -} - - -Node *parse_expression(Lexer *lexer) -{ - Node *expr = parse_term(lexer); - Token token = Lexer_peek(lexer); - while (token.type == TOKEN_PLUS || - token.type == TOKEN_MINUS) { - Lexer_next(lexer); - Node *op = Node_new(binary_token_to_op(token.type)); - Node *right = parse_term(lexer); - op->binary.left = expr; - op->binary.right = right; - expr = op; - token = Lexer_peek(lexer); - } - return expr; -} +#define BINOP_PARSER(next_parser, predicate) \ + Node *expr = next_parser(lexer); \ + Token token = Lexer_peek(lexer); \ + while (predicate(token.type)) { \ + Lexer_next(lexer); \ + Node *op = Node_new(binary_token_to_op(token.type)); \ + Node *right = next_parser(lexer); \ + op->binary.left = expr; \ + op->binary.right = right; \ + expr = op; \ + token = Lexer_peek(lexer); \ + } \ + return expr; + +bool is_term_token(TokenType type) { return type == TOKEN_STAR || type == TOKEN_SLASH || type == TOKEN_PERCENT; } +Node *parse_term(Lexer *lexer) { BINOP_PARSER(parse_factor, is_term_token); } + +bool is_additive_token(TokenType type) { return type == TOKEN_PLUS || type == TOKEN_MINUS; } +Node *parse_additive(Lexer *lexer) { BINOP_PARSER(parse_term, is_additive_token); } + +bool is_relational_token(TokenType type) { return type == TOKEN_LT || type == TOKEN_LEQ || type == TOKEN_GT || type == TOKEN_GEQ; } +Node *parse_relational(Lexer *lexer) { BINOP_PARSER(parse_additive, is_relational_token); } + +bool is_equality_token(TokenType type) { return type == TOKEN_EQ || type == TOKEN_NEQ; } +Node *parse_equality(Lexer *lexer) { BINOP_PARSER(parse_relational, is_equality_token); } + +bool is_logical_and_token(TokenType type) { return type == TOKEN_AND; } +Node *parse_logical_and(Lexer *lexer) { BINOP_PARSER(parse_equality, is_logical_and_token); } + +bool is_logical_or_token(TokenType type) { return type == TOKEN_OR; } +Node *parse_expression(Lexer *lexer) { BINOP_PARSER(parse_logical_and, is_logical_or_token); } Node *parse_statement(Lexer *lexer) { diff --git a/examples/comparisons.cup b/examples/comparisons.cup new file mode 100644 index 0000000..d2bd41f --- /dev/null +++ b/examples/comparisons.cup @@ -0,0 +1,3 @@ +fn main() { + return (5 == 3) + (5 > 4) + (1 != 2); +}
\ No newline at end of file @@ -1,5 +1,7 @@ #!/bin/bash +./compile.sh + for i in `ls tests/*.sh | grep -v common.sh` do echo "Running $i" diff --git a/tests/basics.sh b/tests/basics.sh index 9ef3bf4..cab2462 100644 --- a/tests/basics.sh +++ b/tests/basics.sh @@ -20,7 +20,7 @@ assert_exit_status 'fn main() { return !-1; }' 0 assert_exit_status 'fn main() { return ~34; }' 221 echo " OK" -echo -n "- Testing Binary ops: " +echo -n "- Testing Arith Binary ops: " assert_exit_status 'fn main() { return 1 + 1; }' 2 assert_exit_status 'fn main() { return 1 + 100; }' 101 assert_exit_status 'fn main() { return 100 + 1; }' 101 @@ -36,4 +36,37 @@ assert_exit_status 'fn main() { return 100 / 1; }' 100 assert_exit_status 'fn main() { return 100 / 7; }' 14 assert_exit_status 'fn main() { return 100 / 100; }' 1 assert_exit_status 'fn main() { return 100 / -1; }' 156 +echo " OK" + +echo -n "- Testing Relational ops: " +assert_exit_status 'fn main() { return 1 == 1; }' 1 +assert_exit_status 'fn main() { return 1 == 2; }' 0 +assert_exit_status 'fn main() { return 1 != 1; }' 0 +assert_exit_status 'fn main() { return 1 != 2; }' 1 + +assert_exit_status 'fn main() { return 1 < 2; }' 1 +assert_exit_status 'fn main() { return 2 < 2; }' 0 + +assert_exit_status 'fn main() { return 1 <= 2; }' 1 +assert_exit_status 'fn main() { return 2 <= 2; }' 1 +assert_exit_status 'fn main() { return 3 <= 2; }' 0 + +assert_exit_status 'fn main() { return 2 > 2; }' 0 +assert_exit_status 'fn main() { return 3 > 2; }' 1 + +assert_exit_status 'fn main() { return 1 >= 2; }' 0 +assert_exit_status 'fn main() { return 2 >= 2; }' 1 +assert_exit_status 'fn main() { return 3 >= 2; }' 1 +echo " OK" + +echo -n "- Testing simple logical ops: " +assert_exit_status 'fn main() { return 0 && 0; }' 0 +assert_exit_status 'fn main() { return 0 && 5; }' 0 +assert_exit_status 'fn main() { return 5 && 0; }' 0 +assert_exit_status 'fn main() { return 5 && 1; }' 1 + +assert_exit_status 'fn main() { return 0 || 0; }' 0 +assert_exit_status 'fn main() { return 5 || 0; }' 1 +assert_exit_status 'fn main() { return 0 || 3; }' 1 +assert_exit_status 'fn main() { return 2 || 1; }' 1 echo " OK"
\ No newline at end of file |