aboutsummaryrefslogtreecommitdiff
path: root/src/comp/fe
diff options
context:
space:
mode:
Diffstat (limited to 'src/comp/fe')
-rw-r--r--src/comp/fe/ast.rs58
-rw-r--r--src/comp/fe/lexer.rs113
-rw-r--r--src/comp/fe/parser.rs33
-rw-r--r--src/comp/fe/token.rs333
4 files changed, 522 insertions, 15 deletions
diff --git a/src/comp/fe/ast.rs b/src/comp/fe/ast.rs
new file mode 100644
index 00000000..3a329ded
--- /dev/null
+++ b/src/comp/fe/ast.rs
@@ -0,0 +1,58 @@
+
+import std.util.option;
+import std.map.hashmap;
+
+type ident = str;
+
+type crate = rec( str filename,
+ _mod module);
+
+type block = vec[stmt];
+
+type stmt = tag( stmt_block(block),
+ stmt_decl(@decl),
+ stmt_ret(option[@lval]) );
+
+type decl = tag( decl_local(ident, option[ty]),
+ decl_item(ident, @item) );
+
+type lval = tag( lval_ident(ident),
+ lval_ext(@lval, ident),
+ lval_idx(@lval, @atom) );
+
+type atom = tag( atom_lit(lit));
+
+type lit = tag( lit_char(char),
+ lit_int(int),
+ lit_nil(),
+ lit_bool(bool) );
+
+type ty = tag( ty_nil(),
+ ty_bool(),
+ ty_int(),
+ ty_char() );
+
+type mode = tag( local(), alias() );
+
+type slot = rec(ty ty, mode mode);
+
+type _fn = rec(vec[rec(slot slot, ident ident)] inputs,
+ slot output,
+ block body);
+
+type _mod = hashmap[ident,item];
+
+type item = tag( item_fn(@_fn),
+ item_mod(@_mod) );
+
+
+//
+// Local Variables:
+// mode: rust
+// fill-column: 78;
+// indent-tabs-mode: nil
+// c-basic-offset: 4
+// buffer-file-coding-system: utf-8-unix
+// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+// End:
+//
diff --git a/src/comp/fe/lexer.rs b/src/comp/fe/lexer.rs
index 87809b3b..b0ee557d 100644
--- a/src/comp/fe/lexer.rs
+++ b/src/comp/fe/lexer.rs
@@ -1,20 +1,103 @@
-import std._io.buf_reader;
+import std._io.stdio_reader;
-iter buffers(buf_reader rdr) -> vec[u8] {
- while (true) {
- let vec[u8] v = rdr.read();
- if (std._vec.len[u8](v) == 0u) {
- ret;
- }
- put v;
- }
+fn in_range(char c, char lo, char hi) -> bool {
+ ret lo <= c && c <= hi;
+}
+
+fn is_alpha(char c) -> bool {
+ ret in_range(c, 'a', 'z') ||
+ in_range(c, 'A', 'Z');
+}
+
+fn is_dec_digit(char c) -> bool {
+ ret in_range(c, '0', '9');
+}
+
+fn is_hex_digit(char c) -> bool {
+ ret in_range(c, '0', '9') ||
+ in_range(c, 'a', 'f') ||
+ in_range(c, 'A', 'F');
+}
+
+fn is_bin_digit(char c) -> bool {
+ ret c == '0' || c == '1';
+}
+
+fn is_whitespace(char c) -> bool {
+ ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
}
-iter bytes(buf_reader rdr) -> u8 {
- for each (vec[u8] buf in buffers(rdr)) {
- for (u8 b in buf) {
- // FIXME: doesn't compile at the moment.
- // put b;
+fn next_token(stdio_reader rdr) -> token.token {
+ auto eof = (-1) as char;
+ auto c = rdr.getc() as char;
+ auto accum_str = "";
+ auto accum_int = 0;
+
+ while (is_whitespace(c) && c != eof) {
+ c = rdr.getc() as char;
+ }
+
+ if (c == eof) { ret token.EOF(); }
+
+ if (is_alpha(c)) {
+ while (is_alpha(c)) {
+ accum_str += (c as u8);
+ c = rdr.getc() as char;
+ }
+ rdr.ungetc(c as int);
+ ret token.IDENT(accum_str);
+ }
+
+ if (is_dec_digit(c)) {
+ if (c == '0') {
+ } else {
+ while (is_dec_digit(c)) {
+ accum_int *= 10;
+ accum_int += (c as int) - ('0' as int);
+ c = rdr.getc() as char;
+ }
+ rdr.ungetc(c as int);
+ ret token.LIT_INT(accum_int);
+ }
}
- }
+
+ // One-byte structural symbols.
+ alt (c) {
+ case (';') { ret token.SEMI(); }
+ case (',') { ret token.COMMA(); }
+ case ('.') { ret token.DOT(); }
+ case ('(') { ret token.LPAREN(); }
+ case (')') { ret token.RPAREN(); }
+ case ('{') { ret token.LBRACE(); }
+ case ('}') { ret token.RBRACE(); }
+ case ('[') { ret token.LBRACKET(); }
+ case (']') { ret token.RBRACKET(); }
+ case ('@') { ret token.AT(); }
+ case ('#') { ret token.POUND(); }
+ case ('=') {
+ auto c2 = rdr.getc() as char;
+ if (c2 == '=') {
+ ret token.OP(token.EQEQ());
+ } else {
+ rdr.ungetc(c2 as int);
+ ret token.OP(token.EQ());
+ }
+ }
+ }
+
+ log "lexer stopping at ";
+ log c;
+ ret token.EOF();
}
+
+
+//
+// Local Variables:
+// mode: rust
+// fill-column: 78;
+// indent-tabs-mode: nil
+// c-basic-offset: 4
+// buffer-file-coding-system: utf-8-unix
+// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+// End:
+//
diff --git a/src/comp/fe/parser.rs b/src/comp/fe/parser.rs
index e69de29b..02de22a7 100644
--- a/src/comp/fe/parser.rs
+++ b/src/comp/fe/parser.rs
@@ -0,0 +1,33 @@
+import std._io;
+
+state type parser =
+ state obj {
+ state fn peek() -> token.token;
+ state fn bump();
+ };
+
+fn new_parser(str path) -> parser {
+ state obj stdio_parser(mutable token.token tok,
+ _io.stdio_reader rdr)
+ {
+ state fn peek() -> token.token {
+ ret tok;
+ }
+ state fn bump() {
+ tok = lexer.next_token(rdr);
+ }
+ }
+ auto rdr = _io.new_stdio_reader(path);
+ ret stdio_parser(lexer.next_token(rdr), rdr);
+}
+
+//
+// Local Variables:
+// mode: rust
+// fill-column: 78;
+// indent-tabs-mode: nil
+// c-basic-offset: 4
+// buffer-file-coding-system: utf-8-unix
+// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+// End:
+//
diff --git a/src/comp/fe/token.rs b/src/comp/fe/token.rs
new file mode 100644
index 00000000..5e8171bc
--- /dev/null
+++ b/src/comp/fe/token.rs
@@ -0,0 +1,333 @@
+import util.common.ty_mach;
+import util.common.ty_mach_to_str;
+import std._int;
+import std._uint;
+
+type op = tag
+ (PLUS(),
+ MINUS(),
+ STAR(),
+ SLASH(),
+ PERCENT(),
+ EQ(),
+ LT(),
+ LE(),
+ EQEQ(),
+ NE(),
+ GE(),
+ GT(),
+ NOT(),
+ TILDE(),
+ CARET(),
+ AND(),
+ ANDAND(),
+ OR(),
+ OROR(),
+ LSL(),
+ LSR(),
+ ASR());
+
+type token = tag
+ (OP(op),
+ OPEQ(op),
+ AS(),
+ WITH(),
+
+ /* Structural symbols */
+ AT(),
+ DOT(),
+ COMMA(),
+ SEMI(),
+ COLON(),
+ RARROW(),
+ SEND(),
+ LARROW(),
+ LPAREN(),
+ RPAREN(),
+ LBRACKET(),
+ RBRACKET(),
+ LBRACE(),
+ RBRACE(),
+
+ /* Module and crate keywords */
+ MOD(),
+ USE(),
+ AUTH(),
+ META(),
+
+ /* Metaprogramming keywords */
+ SYNTAX(),
+ POUND(),
+
+ /* Statement keywords */
+ IF(),
+ ELSE(),
+ DO(),
+ WHILE(),
+ ALT(),
+ CASE(),
+
+ FAIL(),
+ DROP(),
+
+ IN(),
+ FOR(),
+ EACH(),
+ PUT(),
+ RET(),
+ BE(),
+
+ /* Type and type-state keywords */
+ TYPE(),
+ CHECK(),
+ CLAIM(),
+ PROVE(),
+
+ /* Effect keywords */
+ IO(),
+ STATE(),
+ UNSAFE(),
+
+ /* Type qualifiers */
+ NATIVE(),
+ AUTO(),
+ MUTABLE(),
+
+ /* Name management */
+ IMPORT(),
+ EXPORT(),
+
+ /* Value / stmt declarators */
+ LET(),
+
+ /* Magic runtime services */
+ LOG(),
+ SPAWN(),
+ BIND(),
+ THREAD(),
+ YIELD(),
+ JOIN(),
+
+ /* Literals */
+ LIT_INT(int),
+ LIT_UINT(uint),
+ LIT_MACH_INT(ty_mach, int),
+ LIT_STR(str),
+ LIT_CHAR(char),
+ LIT_BOOL(bool),
+
+ /* Name components */
+ IDENT(str),
+ IDX(int),
+ UNDERSCORE(),
+
+ /* Reserved type names */
+ BOOL(),
+ INT(),
+ UINT(),
+ FLOAT(),
+ CHAR(),
+ STR(),
+ MACH(ty_mach),
+
+ /* Algebraic type constructors */
+ REC(),
+ TUP(),
+ TAG(),
+ VEC(),
+ ANY(),
+
+ /* Callable type constructors */
+ FN(),
+ ITER(),
+
+ /* Object type */
+ OBJ(),
+
+ /* Comm and task types */
+ CHAN(),
+ PORT(),
+ TASK(),
+
+ BRACEQUOTE(str),
+ EOF());
+
+fn op_to_str(op o) -> str {
+ alt (o) {
+ case (PLUS()) { ret "+"; }
+ case (MINUS()) { ret "-"; }
+ case (STAR()) { ret "*"; }
+ case (SLASH()) { ret "/"; }
+ case (PERCENT()) { ret "%"; }
+ case (EQ()) { ret "="; }
+ case (LT()) { ret "<"; }
+ case (LE()) { ret "<="; }
+ case (EQEQ()) { ret "=="; }
+ case (NE()) { ret "!="; }
+ case (GE()) { ret ">="; }
+ case (GT()) { ret ">"; }
+ case (NOT()) { ret "!"; }
+ case (TILDE()) { ret "~"; }
+ case (CARET()) { ret "^"; }
+ case (AND()) { ret "&"; }
+ case (ANDAND()) { ret "&&"; }
+ case (OR()) { ret "|"; }
+ case (OROR()) { ret "||"; }
+ case (LSL()) { ret "<<"; }
+ case (LSR()) { ret ">>"; }
+ case (ASR()) { ret ">>>"; }
+ }
+}
+
+fn to_str(token t) -> str {
+ alt (t) {
+ case (OP(op)) { ret op_to_str(op); }
+ case (OPEQ(op)) { ret op_to_str(op) + "="; }
+ case (AS()) { ret "as"; }
+ case (WITH()) { ret "with"; }
+
+ /* Structural symbols */
+ case (AT()) { ret "@"; }
+ case (DOT()) { ret "."; }
+ case (COMMA()) { ret ","; }
+ case (SEMI()) { ret ";"; }
+ case (COLON()) { ret ":"; }
+ case (RARROW()) { ret "->"; }
+ case (SEND()) { ret "<|"; }
+ case (LARROW()) { ret "<-"; }
+ case (LPAREN()) { ret "("; }
+ case (RPAREN()) { ret ")"; }
+ case (LBRACKET()) { ret "["; }
+ case (RBRACKET()) { ret "]"; }
+ case (LBRACE()) { ret "{"; }
+ case (RBRACE()) { ret "}"; }
+
+ /* Module and crate keywords */
+ case (MOD()) { ret "mod"; }
+ case (USE()) { ret "use"; }
+ case (AUTH()) { ret "auth"; }
+ case (META()) { ret "meta"; }
+
+ /* Metaprogramming keywords */
+ case (SYNTAX()) { ret "syntax"; }
+ case (POUND()) { ret "#"; }
+
+ /* Statement keywords */
+ case (IF()) { ret "if"; }
+ case (ELSE()) { ret "else"; }
+ case (DO()) { ret "do"; }
+ case (WHILE()) { ret "while"; }
+ case (ALT()) { ret "alt"; }
+ case (CASE()) { ret "case"; }
+
+ case (FAIL()) { ret "fail"; }
+ case (DROP()) { ret "drop"; }
+
+ case (IN()) { ret "in"; }
+ case (FOR()) { ret "for"; }
+ case (EACH()) { ret "each"; }
+ case (PUT()) { ret "put"; }
+ case (RET()) { ret "ret"; }
+ case (BE()) { ret "be"; }
+
+ /* Type and type-state keywords */
+ case (TYPE()) { ret "type"; }
+ case (CHECK()) { ret "check"; }
+ case (CLAIM()) { ret "claim"; }
+ case (PROVE()) { ret "prove"; }
+
+ /* Effect keywords */
+ case (IO()) { ret "io"; }
+ case (STATE()) { ret "state"; }
+ case (UNSAFE()) { ret "unsafe"; }
+
+ /* Type qualifiers */
+ case (NATIVE()) { ret "native"; }
+ case (AUTO()) { ret "auto"; }
+ case (MUTABLE()) { ret "mutable"; }
+
+ /* Name management */
+ case (IMPORT()) { ret "import"; }
+ case (EXPORT()) { ret "export"; }
+
+ /* Value / stmt declarators */
+ case (LET()) { ret "let"; }
+
+ /* Magic runtime services */
+ case (LOG()) { ret "log"; }
+ case (SPAWN()) { ret "spawn"; }
+ case (BIND()) { ret "bind"; }
+ case (THREAD()) { ret "thread"; }
+ case (YIELD()) { ret "yield"; }
+ case (JOIN()) { ret "join"; }
+
+ /* Literals */
+ case (LIT_INT(i)) { ret _int.to_str(i, 10u); }
+ case (LIT_UINT(u)) { ret _uint.to_str(u, 10u); }
+ case (LIT_MACH_INT(tm, i)) {
+ ret _int.to_str(i, 10u)
+ + "_" + ty_mach_to_str(tm);
+ }
+
+ case (LIT_STR(s)) {
+ // FIXME: escape.
+ ret "\"" + s + "\"";
+ }
+ case (LIT_CHAR(c)) {
+ // FIXME: escape and encode.
+ auto tmp = "";
+ tmp += (c as u8);
+ ret tmp;
+ }
+
+ case (LIT_BOOL(b)) {
+ if (b) { ret "true"; } else { ret "false"; }
+ }
+
+ /* Name components */
+ case (IDENT(s)) { ret s; }
+ case (IDX(i)) { ret "_" + _int.to_str(i, 10u); }
+ case (UNDERSCORE()) { ret "_"; }
+
+ /* Reserved type names */
+ case (BOOL()) { ret "bool"; }
+ case (INT()) { ret "int"; }
+ case (UINT()) { ret "uint"; }
+ case (FLOAT()) { ret "float"; }
+ case (CHAR()) { ret "char"; }
+ case (STR()) { ret "str"; }
+ case (MACH(tm)) { ret ty_mach_to_str(tm); }
+
+ /* Algebraic type constructors */
+ case (REC()) { ret "rec"; }
+ case (TUP()) { ret "tup"; }
+ case (TAG()) { ret "tag"; }
+ case (VEC()) { ret "vec"; }
+ case (ANY()) { ret "any"; }
+
+ /* Callable type constructors */
+ case (FN()) { ret "fn"; }
+ case (ITER()) { ret "iter"; }
+
+ /* Object type */
+ case (OBJ()) { ret "obj"; }
+
+ /* Comm and task types */
+ case (CHAN()) { ret "chan"; }
+ case (PORT()) { ret "port"; }
+ case (TASK()) { ret "task"; }
+
+ case (BRACEQUOTE(_)) { ret "<bracequote>"; }
+ case (EOF()) { ret "<eof>"; }
+ }
+}
+
+
+
+// Local Variables:
+// fill-column: 78;
+// indent-tabs-mode: nil
+// c-basic-offset: 4
+// buffer-file-coding-system: utf-8-unix
+// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+// End: