aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMustafa Quraish <[email protected]>2022-01-29 00:20:09 -0500
committerMustafa Quraish <[email protected]>2022-01-29 00:42:58 -0500
commit08056428cec5d1e63e471590ac09d82dcbfcab93 (patch)
treee71c34fb2a47780a6e01d8fa0a2af4b3bd3e7130
parentAdd some arithmetic binary operations into lex+parse+generation (diff)
downloadcup-08056428cec5d1e63e471590ac09d82dcbfcab93.tar.xz
cup-08056428cec5d1e63e471590ac09d82dcbfcab93.zip
Add relational and logical operators + refactor binop parser
We now support OR and AND with short circuiting! (Yet to be tested since we don't yet have local variables to play with). The binop parser took a bit of an overhaul factoring out the common code so that it's easier to describe the operator precendence relationships without being overly repetitive.
-rw-r--r--cup/generator.c120
-rw-r--r--cup/lexer.c23
-rw-r--r--cup/parser.c66
-rw-r--r--examples/comparisons.cup3
-rwxr-xr-xtest.sh2
-rw-r--r--tests/basics.sh35
6 files changed, 198 insertions, 51 deletions
diff --git a/cup/generator.c b/cup/generator.c
index 6018e47..7bddac3 100644
--- a/cup/generator.c
+++ b/cup/generator.c
@@ -7,10 +7,12 @@
#include <string.h>
#include <assert.h>
+static int label_counter = 0;
// The evaluated expression is stored into `rax`
void generate_expr_into_rax(Node *expr, FILE *out)
{
+ // TODO: Different sized output for different types?
if (expr->type == AST_LITERAL) {
// TODO: More literal types
assert(expr->literal.type.type == TYPE_INT);
@@ -22,6 +24,7 @@ void generate_expr_into_rax(Node *expr, FILE *out)
} else if (expr->type == OP_NOT) {
generate_expr_into_rax(expr->unary_expr, out);
+ // Booleanize
fprintf(out, " cmp rax, 0\n");
fprintf(out, " sete al\n");
fprintf(out, " movzx rax, al\n");
@@ -31,38 +34,131 @@ void generate_expr_into_rax(Node *expr, FILE *out)
fprintf(out, " not rax\n");
} else if (expr->type == OP_PLUS) {
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " push rax\n");
generate_expr_into_rax(expr->binary.right, out);
+ fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
fprintf(out, " pop rbx\n");
fprintf(out, " add rax, rbx\n");
} else if (expr->type == OP_MINUS) {
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " push rax\n");
generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " mov rbx, rax\n");
- fprintf(out, " pop rax\n");
+ fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
+ fprintf(out, " pop rbx\n");
fprintf(out, " sub rax, rbx\n");
} else if (expr->type == OP_DIV) {
- generate_expr_into_rax(expr->binary.left, out);
- fprintf(out, " push rax\n");
generate_expr_into_rax(expr->binary.right, out);
- fprintf(out, " mov rbx, rax\n");
- fprintf(out, " pop rax\n");
+ fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
+ fprintf(out, " pop rbx\n");
fprintf(out, " cqo\n");
fprintf(out, " idiv rbx\n");
} else if (expr->type == OP_MUL) {
+ generate_expr_into_rax(expr->binary.right, out);
+ fprintf(out, " push rax\n");
generate_expr_into_rax(expr->binary.left, out);
+ fprintf(out, " pop rbx\n");
+ fprintf(out, " imul rbx\n");
+
+ // TODO: Compress these, there's barely any differences
+ } else if (expr->type == OP_EQ) {
+ generate_expr_into_rax(expr->binary.right, out);
fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
+ fprintf(out, " pop rbx\n");
+ fprintf(out, " cmp rax, rbx\n");
+ fprintf(out, " sete al\n");
+ fprintf(out, " movzx rax, al\n");
+
+ } else if (expr->type == OP_NEQ) {
generate_expr_into_rax(expr->binary.right, out);
+ fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
fprintf(out, " pop rbx\n");
- fprintf(out, " imul rbx\n");
+ fprintf(out, " cmp rax, rbx\n");
+ fprintf(out, " setne al\n");
+ fprintf(out, " movzx rax, al\n");
+
+ } else if (expr->type == OP_LT) {
+ generate_expr_into_rax(expr->binary.right, out);
+ fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
+ fprintf(out, " pop rbx\n");
+ fprintf(out, " cmp rax, rbx\n");
+ fprintf(out, " setl al\n");
+ fprintf(out, " movzx rax, al\n");
+
+ } else if (expr->type == OP_LEQ) {
+ generate_expr_into_rax(expr->binary.right, out);
+ fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
+ fprintf(out, " pop rbx\n");
+ fprintf(out, " cmp rax, rbx\n");
+ fprintf(out, " setle al\n");
+ fprintf(out, " movzx rax, al\n");
+
+ } else if (expr->type == OP_GT) {
+ generate_expr_into_rax(expr->binary.right, out);
+ fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
+ fprintf(out, " pop rbx\n");
+ fprintf(out, " cmp rax, rbx\n");
+ fprintf(out, " setg al\n");
+ fprintf(out, " movzx rax, al\n");
+
+ } else if (expr->type == OP_GEQ) {
+ generate_expr_into_rax(expr->binary.right, out);
+ fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
+ fprintf(out, " pop rbx\n");
+ fprintf(out, " cmp rax, rbx\n");
+ fprintf(out, " setge al\n");
+ fprintf(out, " movzx rax, al\n");
+
+ } else if (expr->type == OP_GEQ) {
+ generate_expr_into_rax(expr->binary.right, out);
+ fprintf(out, " push rax\n");
+ generate_expr_into_rax(expr->binary.left, out);
+ fprintf(out, " pop rbx\n");
+ fprintf(out, " cmp rax, rbx\n");
+ fprintf(out, " setge al\n");
+ fprintf(out, " movzx rax, al\n");
+
+ // Note: These are different because of short-circuit evaluation!
+ } else if (expr->type == OP_OR) {
+ generate_expr_into_rax(expr->binary.left, out);
+ // If left is true, we can short-circuit
+ fprintf(out, " cmp rax, 0\n");
+ fprintf(out, " je .or_right_%d\n", label_counter);
+ fprintf(out, " mov rax, 1\n");
+ fprintf(out, " jmp .or_end_%d\n", label_counter);
+ fprintf(out, ".or_right_%d:\n", label_counter);
+ generate_expr_into_rax(expr->binary.right, out);
+ // Booleanize the result
+ fprintf(out, " cmp rax, 0\n");
+ fprintf(out, " setne al\n");
+ fprintf(out, ".or_end_%d:\n", label_counter);
+ label_counter++;
+
+ } else if (expr->type == OP_AND) {
+ generate_expr_into_rax(expr->binary.left, out);
+ // If left is true, we can short-circuit
+ fprintf(out, " cmp rax, 0\n");
+ fprintf(out, " jne .and_right_%d\n", label_counter);
+ fprintf(out, " mov rax, 0\n");
+ fprintf(out, " jmp .and_end_%d\n", label_counter);
+ fprintf(out, ".and_right_%d:\n", label_counter);
+ generate_expr_into_rax(expr->binary.right, out);
+ // Booleanize the result
+ fprintf(out, " cmp rax, 0\n");
+ fprintf(out, " setne al\n");
+ fprintf(out, ".and_end_%d:\n", label_counter);
+ label_counter++;
} else {
- fprintf(stderr, "Unsupported expression type in generate_expr: %d\n", expr->type);
+ fprintf(stderr, "Unsupported expression type in generate_expr: `%s`\n", node_type_to_str(expr->type));
exit(1);
}
}
diff --git a/cup/lexer.c b/cup/lexer.c
index b88d321..3b9e070 100644
--- a/cup/lexer.c
+++ b/cup/lexer.c
@@ -92,10 +92,20 @@ Token Lexer_next(Lexer *lexer)
case '}': return Lexer_make_token(lexer, TOKEN_CLOSE_BRACE, 1);
case ';': return Lexer_make_token(lexer, TOKEN_SEMICOLON, 1);
case ':': return Lexer_make_token(lexer, TOKEN_COLON, 1);
- case '&': return Lexer_make_token(lexer, TOKEN_AMPERSAND, 1);
case '~': return Lexer_make_token(lexer, TOKEN_TILDE, 1);
- case '!': return Lexer_make_token(lexer, TOKEN_EXCLAMATION, 1);
-
+
+ case '&': {
+ if (peek(lexer, 1) == '&')
+ return Lexer_make_token(lexer, TOKEN_AND, 2);
+ return Lexer_make_token(lexer, TOKEN_AMPERSAND, 1);
+ }
+
+ case '!': {
+ if (peek(lexer, 1) == '=')
+ return Lexer_make_token(lexer, TOKEN_NEQ, 2);
+ return Lexer_make_token(lexer, TOKEN_EXCLAMATION, 1);
+ }
+
case '<': {
if (peek(lexer, 1) == '=')
return Lexer_make_token(lexer, TOKEN_LEQ, 2);
@@ -114,6 +124,13 @@ Token Lexer_next(Lexer *lexer)
return Lexer_make_token(lexer, TOKEN_ASSIGN, 1);
}
+ case '|': {
+ if (peek(lexer, 1) == '|')
+ return Lexer_make_token(lexer, TOKEN_OR, 2);
+ return Lexer_make_token(lexer, TOKEN_BAR, 1);
+ }
+
+
case '+': {
if (peek(lexer, 1) == '+')
return Lexer_make_token(lexer, TOKEN_PLUSPLUS, 2);
diff --git a/cup/parser.c b/cup/parser.c
index a6ea9b2..68bd22d 100644
--- a/cup/parser.c
+++ b/cup/parser.c
@@ -122,41 +122,37 @@ Node *parse_factor(Lexer *lexer)
return expr;
}
-Node *parse_term(Lexer *lexer)
-{
- Node *expr = parse_factor(lexer);
- Token token = Lexer_peek(lexer);
- while (token.type == TOKEN_STAR ||
- token.type == TOKEN_SLASH ||
- token.type == TOKEN_PERCENT) {
- Lexer_next(lexer);
- Node *op = Node_new(binary_token_to_op(token.type));
- Node *right = parse_factor(lexer);
- op->binary.left = expr;
- op->binary.right = right;
- expr = op;
- token = Lexer_peek(lexer);
- }
- return expr;
-}
-
-
-Node *parse_expression(Lexer *lexer)
-{
- Node *expr = parse_term(lexer);
- Token token = Lexer_peek(lexer);
- while (token.type == TOKEN_PLUS ||
- token.type == TOKEN_MINUS) {
- Lexer_next(lexer);
- Node *op = Node_new(binary_token_to_op(token.type));
- Node *right = parse_term(lexer);
- op->binary.left = expr;
- op->binary.right = right;
- expr = op;
- token = Lexer_peek(lexer);
- }
- return expr;
-}
+#define BINOP_PARSER(next_parser, predicate) \
+ Node *expr = next_parser(lexer); \
+ Token token = Lexer_peek(lexer); \
+ while (predicate(token.type)) { \
+ Lexer_next(lexer); \
+ Node *op = Node_new(binary_token_to_op(token.type)); \
+ Node *right = next_parser(lexer); \
+ op->binary.left = expr; \
+ op->binary.right = right; \
+ expr = op; \
+ token = Lexer_peek(lexer); \
+ } \
+ return expr;
+
+bool is_term_token(TokenType type) { return type == TOKEN_STAR || type == TOKEN_SLASH || type == TOKEN_PERCENT; }
+Node *parse_term(Lexer *lexer) { BINOP_PARSER(parse_factor, is_term_token); }
+
+bool is_additive_token(TokenType type) { return type == TOKEN_PLUS || type == TOKEN_MINUS; }
+Node *parse_additive(Lexer *lexer) { BINOP_PARSER(parse_term, is_additive_token); }
+
+bool is_relational_token(TokenType type) { return type == TOKEN_LT || type == TOKEN_LEQ || type == TOKEN_GT || type == TOKEN_GEQ; }
+Node *parse_relational(Lexer *lexer) { BINOP_PARSER(parse_additive, is_relational_token); }
+
+bool is_equality_token(TokenType type) { return type == TOKEN_EQ || type == TOKEN_NEQ; }
+Node *parse_equality(Lexer *lexer) { BINOP_PARSER(parse_relational, is_equality_token); }
+
+bool is_logical_and_token(TokenType type) { return type == TOKEN_AND; }
+Node *parse_logical_and(Lexer *lexer) { BINOP_PARSER(parse_equality, is_logical_and_token); }
+
+bool is_logical_or_token(TokenType type) { return type == TOKEN_OR; }
+Node *parse_expression(Lexer *lexer) { BINOP_PARSER(parse_logical_and, is_logical_or_token); }
Node *parse_statement(Lexer *lexer)
{
diff --git a/examples/comparisons.cup b/examples/comparisons.cup
new file mode 100644
index 0000000..d2bd41f
--- /dev/null
+++ b/examples/comparisons.cup
@@ -0,0 +1,3 @@
+fn main() {
+ return (5 == 3) + (5 > 4) + (1 != 2);
+} \ No newline at end of file
diff --git a/test.sh b/test.sh
index ebaa622..2fdbf47 100755
--- a/test.sh
+++ b/test.sh
@@ -1,5 +1,7 @@
#!/bin/bash
+./compile.sh
+
for i in `ls tests/*.sh | grep -v common.sh`
do
echo "Running $i"
diff --git a/tests/basics.sh b/tests/basics.sh
index 9ef3bf4..cab2462 100644
--- a/tests/basics.sh
+++ b/tests/basics.sh
@@ -20,7 +20,7 @@ assert_exit_status 'fn main() { return !-1; }' 0
assert_exit_status 'fn main() { return ~34; }' 221
echo " OK"
-echo -n "- Testing Binary ops: "
+echo -n "- Testing Arith Binary ops: "
assert_exit_status 'fn main() { return 1 + 1; }' 2
assert_exit_status 'fn main() { return 1 + 100; }' 101
assert_exit_status 'fn main() { return 100 + 1; }' 101
@@ -36,4 +36,37 @@ assert_exit_status 'fn main() { return 100 / 1; }' 100
assert_exit_status 'fn main() { return 100 / 7; }' 14
assert_exit_status 'fn main() { return 100 / 100; }' 1
assert_exit_status 'fn main() { return 100 / -1; }' 156
+echo " OK"
+
+echo -n "- Testing Relational ops: "
+assert_exit_status 'fn main() { return 1 == 1; }' 1
+assert_exit_status 'fn main() { return 1 == 2; }' 0
+assert_exit_status 'fn main() { return 1 != 1; }' 0
+assert_exit_status 'fn main() { return 1 != 2; }' 1
+
+assert_exit_status 'fn main() { return 1 < 2; }' 1
+assert_exit_status 'fn main() { return 2 < 2; }' 0
+
+assert_exit_status 'fn main() { return 1 <= 2; }' 1
+assert_exit_status 'fn main() { return 2 <= 2; }' 1
+assert_exit_status 'fn main() { return 3 <= 2; }' 0
+
+assert_exit_status 'fn main() { return 2 > 2; }' 0
+assert_exit_status 'fn main() { return 3 > 2; }' 1
+
+assert_exit_status 'fn main() { return 1 >= 2; }' 0
+assert_exit_status 'fn main() { return 2 >= 2; }' 1
+assert_exit_status 'fn main() { return 3 >= 2; }' 1
+echo " OK"
+
+echo -n "- Testing simple logical ops: "
+assert_exit_status 'fn main() { return 0 && 0; }' 0
+assert_exit_status 'fn main() { return 0 && 5; }' 0
+assert_exit_status 'fn main() { return 5 && 0; }' 0
+assert_exit_status 'fn main() { return 5 && 1; }' 1
+
+assert_exit_status 'fn main() { return 0 || 0; }' 0
+assert_exit_status 'fn main() { return 5 || 0; }' 1
+assert_exit_status 'fn main() { return 0 || 3; }' 1
+assert_exit_status 'fn main() { return 2 || 1; }' 1
echo " OK" \ No newline at end of file