diff options
Diffstat (limited to 'src/comp/fe')
| -rw-r--r-- | src/comp/fe/ast.rs | 58 | ||||
| -rw-r--r-- | src/comp/fe/lexer.rs | 113 | ||||
| -rw-r--r-- | src/comp/fe/parser.rs | 33 | ||||
| -rw-r--r-- | src/comp/fe/token.rs | 333 |
4 files changed, 522 insertions, 15 deletions
diff --git a/src/comp/fe/ast.rs b/src/comp/fe/ast.rs new file mode 100644 index 00000000..3a329ded --- /dev/null +++ b/src/comp/fe/ast.rs @@ -0,0 +1,58 @@ + +import std.util.option; +import std.map.hashmap; + +type ident = str; + +type crate = rec( str filename, + _mod module); + +type block = vec[stmt]; + +type stmt = tag( stmt_block(block), + stmt_decl(@decl), + stmt_ret(option[@lval]) ); + +type decl = tag( decl_local(ident, option[ty]), + decl_item(ident, @item) ); + +type lval = tag( lval_ident(ident), + lval_ext(@lval, ident), + lval_idx(@lval, @atom) ); + +type atom = tag( atom_lit(lit)); + +type lit = tag( lit_char(char), + lit_int(int), + lit_nil(), + lit_bool(bool) ); + +type ty = tag( ty_nil(), + ty_bool(), + ty_int(), + ty_char() ); + +type mode = tag( local(), alias() ); + +type slot = rec(ty ty, mode mode); + +type _fn = rec(vec[rec(slot slot, ident ident)] inputs, + slot output, + block body); + +type _mod = hashmap[ident,item]; + +type item = tag( item_fn(@_fn), + item_mod(@_mod) ); + + +// +// Local Variables: +// mode: rust +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; +// End: +// diff --git a/src/comp/fe/lexer.rs b/src/comp/fe/lexer.rs index 87809b3b..b0ee557d 100644 --- a/src/comp/fe/lexer.rs +++ b/src/comp/fe/lexer.rs @@ -1,20 +1,103 @@ -import std._io.buf_reader; +import std._io.stdio_reader; -iter buffers(buf_reader rdr) -> vec[u8] { - while (true) { - let vec[u8] v = rdr.read(); - if (std._vec.len[u8](v) == 0u) { - ret; - } - put v; - } +fn in_range(char c, char lo, char hi) -> bool { + ret lo <= c && c <= hi; +} + +fn is_alpha(char c) -> bool { + ret in_range(c, 'a', 'z') || + in_range(c, 'A', 'Z'); +} + +fn is_dec_digit(char c) -> bool { + ret in_range(c, '0', '9'); +} + +fn is_hex_digit(char c) -> bool { + ret in_range(c, '0', '9') || + in_range(c, 'a', 'f') || + in_range(c, 'A', 'F'); +} + +fn is_bin_digit(char c) -> bool { + ret c == '0' || c == '1'; +} + +fn is_whitespace(char c) -> bool { + ret c == ' ' || c == '\t' || c == '\r' || c == '\n'; } -iter bytes(buf_reader rdr) -> u8 { - for each (vec[u8] buf in buffers(rdr)) { - for (u8 b in buf) { - // FIXME: doesn't compile at the moment. - // put b; +fn next_token(stdio_reader rdr) -> token.token { + auto eof = (-1) as char; + auto c = rdr.getc() as char; + auto accum_str = ""; + auto accum_int = 0; + + while (is_whitespace(c) && c != eof) { + c = rdr.getc() as char; + } + + if (c == eof) { ret token.EOF(); } + + if (is_alpha(c)) { + while (is_alpha(c)) { + accum_str += (c as u8); + c = rdr.getc() as char; + } + rdr.ungetc(c as int); + ret token.IDENT(accum_str); + } + + if (is_dec_digit(c)) { + if (c == '0') { + } else { + while (is_dec_digit(c)) { + accum_int *= 10; + accum_int += (c as int) - ('0' as int); + c = rdr.getc() as char; + } + rdr.ungetc(c as int); + ret token.LIT_INT(accum_int); + } } - } + + // One-byte structural symbols. + alt (c) { + case (';') { ret token.SEMI(); } + case (',') { ret token.COMMA(); } + case ('.') { ret token.DOT(); } + case ('(') { ret token.LPAREN(); } + case (')') { ret token.RPAREN(); } + case ('{') { ret token.LBRACE(); } + case ('}') { ret token.RBRACE(); } + case ('[') { ret token.LBRACKET(); } + case (']') { ret token.RBRACKET(); } + case ('@') { ret token.AT(); } + case ('#') { ret token.POUND(); } + case ('=') { + auto c2 = rdr.getc() as char; + if (c2 == '=') { + ret token.OP(token.EQEQ()); + } else { + rdr.ungetc(c2 as int); + ret token.OP(token.EQ()); + } + } + } + + log "lexer stopping at "; + log c; + ret token.EOF(); } + + +// +// Local Variables: +// mode: rust +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; +// End: +// diff --git a/src/comp/fe/parser.rs b/src/comp/fe/parser.rs index e69de29b..02de22a7 100644 --- a/src/comp/fe/parser.rs +++ b/src/comp/fe/parser.rs @@ -0,0 +1,33 @@ +import std._io; + +state type parser = + state obj { + state fn peek() -> token.token; + state fn bump(); + }; + +fn new_parser(str path) -> parser { + state obj stdio_parser(mutable token.token tok, + _io.stdio_reader rdr) + { + state fn peek() -> token.token { + ret tok; + } + state fn bump() { + tok = lexer.next_token(rdr); + } + } + auto rdr = _io.new_stdio_reader(path); + ret stdio_parser(lexer.next_token(rdr), rdr); +} + +// +// Local Variables: +// mode: rust +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; +// End: +// diff --git a/src/comp/fe/token.rs b/src/comp/fe/token.rs new file mode 100644 index 00000000..5e8171bc --- /dev/null +++ b/src/comp/fe/token.rs @@ -0,0 +1,333 @@ +import util.common.ty_mach; +import util.common.ty_mach_to_str; +import std._int; +import std._uint; + +type op = tag + (PLUS(), + MINUS(), + STAR(), + SLASH(), + PERCENT(), + EQ(), + LT(), + LE(), + EQEQ(), + NE(), + GE(), + GT(), + NOT(), + TILDE(), + CARET(), + AND(), + ANDAND(), + OR(), + OROR(), + LSL(), + LSR(), + ASR()); + +type token = tag + (OP(op), + OPEQ(op), + AS(), + WITH(), + + /* Structural symbols */ + AT(), + DOT(), + COMMA(), + SEMI(), + COLON(), + RARROW(), + SEND(), + LARROW(), + LPAREN(), + RPAREN(), + LBRACKET(), + RBRACKET(), + LBRACE(), + RBRACE(), + + /* Module and crate keywords */ + MOD(), + USE(), + AUTH(), + META(), + + /* Metaprogramming keywords */ + SYNTAX(), + POUND(), + + /* Statement keywords */ + IF(), + ELSE(), + DO(), + WHILE(), + ALT(), + CASE(), + + FAIL(), + DROP(), + + IN(), + FOR(), + EACH(), + PUT(), + RET(), + BE(), + + /* Type and type-state keywords */ + TYPE(), + CHECK(), + CLAIM(), + PROVE(), + + /* Effect keywords */ + IO(), + STATE(), + UNSAFE(), + + /* Type qualifiers */ + NATIVE(), + AUTO(), + MUTABLE(), + + /* Name management */ + IMPORT(), + EXPORT(), + + /* Value / stmt declarators */ + LET(), + + /* Magic runtime services */ + LOG(), + SPAWN(), + BIND(), + THREAD(), + YIELD(), + JOIN(), + + /* Literals */ + LIT_INT(int), + LIT_UINT(uint), + LIT_MACH_INT(ty_mach, int), + LIT_STR(str), + LIT_CHAR(char), + LIT_BOOL(bool), + + /* Name components */ + IDENT(str), + IDX(int), + UNDERSCORE(), + + /* Reserved type names */ + BOOL(), + INT(), + UINT(), + FLOAT(), + CHAR(), + STR(), + MACH(ty_mach), + + /* Algebraic type constructors */ + REC(), + TUP(), + TAG(), + VEC(), + ANY(), + + /* Callable type constructors */ + FN(), + ITER(), + + /* Object type */ + OBJ(), + + /* Comm and task types */ + CHAN(), + PORT(), + TASK(), + + BRACEQUOTE(str), + EOF()); + +fn op_to_str(op o) -> str { + alt (o) { + case (PLUS()) { ret "+"; } + case (MINUS()) { ret "-"; } + case (STAR()) { ret "*"; } + case (SLASH()) { ret "/"; } + case (PERCENT()) { ret "%"; } + case (EQ()) { ret "="; } + case (LT()) { ret "<"; } + case (LE()) { ret "<="; } + case (EQEQ()) { ret "=="; } + case (NE()) { ret "!="; } + case (GE()) { ret ">="; } + case (GT()) { ret ">"; } + case (NOT()) { ret "!"; } + case (TILDE()) { ret "~"; } + case (CARET()) { ret "^"; } + case (AND()) { ret "&"; } + case (ANDAND()) { ret "&&"; } + case (OR()) { ret "|"; } + case (OROR()) { ret "||"; } + case (LSL()) { ret "<<"; } + case (LSR()) { ret ">>"; } + case (ASR()) { ret ">>>"; } + } +} + +fn to_str(token t) -> str { + alt (t) { + case (OP(op)) { ret op_to_str(op); } + case (OPEQ(op)) { ret op_to_str(op) + "="; } + case (AS()) { ret "as"; } + case (WITH()) { ret "with"; } + + /* Structural symbols */ + case (AT()) { ret "@"; } + case (DOT()) { ret "."; } + case (COMMA()) { ret ","; } + case (SEMI()) { ret ";"; } + case (COLON()) { ret ":"; } + case (RARROW()) { ret "->"; } + case (SEND()) { ret "<|"; } + case (LARROW()) { ret "<-"; } + case (LPAREN()) { ret "("; } + case (RPAREN()) { ret ")"; } + case (LBRACKET()) { ret "["; } + case (RBRACKET()) { ret "]"; } + case (LBRACE()) { ret "{"; } + case (RBRACE()) { ret "}"; } + + /* Module and crate keywords */ + case (MOD()) { ret "mod"; } + case (USE()) { ret "use"; } + case (AUTH()) { ret "auth"; } + case (META()) { ret "meta"; } + + /* Metaprogramming keywords */ + case (SYNTAX()) { ret "syntax"; } + case (POUND()) { ret "#"; } + + /* Statement keywords */ + case (IF()) { ret "if"; } + case (ELSE()) { ret "else"; } + case (DO()) { ret "do"; } + case (WHILE()) { ret "while"; } + case (ALT()) { ret "alt"; } + case (CASE()) { ret "case"; } + + case (FAIL()) { ret "fail"; } + case (DROP()) { ret "drop"; } + + case (IN()) { ret "in"; } + case (FOR()) { ret "for"; } + case (EACH()) { ret "each"; } + case (PUT()) { ret "put"; } + case (RET()) { ret "ret"; } + case (BE()) { ret "be"; } + + /* Type and type-state keywords */ + case (TYPE()) { ret "type"; } + case (CHECK()) { ret "check"; } + case (CLAIM()) { ret "claim"; } + case (PROVE()) { ret "prove"; } + + /* Effect keywords */ + case (IO()) { ret "io"; } + case (STATE()) { ret "state"; } + case (UNSAFE()) { ret "unsafe"; } + + /* Type qualifiers */ + case (NATIVE()) { ret "native"; } + case (AUTO()) { ret "auto"; } + case (MUTABLE()) { ret "mutable"; } + + /* Name management */ + case (IMPORT()) { ret "import"; } + case (EXPORT()) { ret "export"; } + + /* Value / stmt declarators */ + case (LET()) { ret "let"; } + + /* Magic runtime services */ + case (LOG()) { ret "log"; } + case (SPAWN()) { ret "spawn"; } + case (BIND()) { ret "bind"; } + case (THREAD()) { ret "thread"; } + case (YIELD()) { ret "yield"; } + case (JOIN()) { ret "join"; } + + /* Literals */ + case (LIT_INT(i)) { ret _int.to_str(i, 10u); } + case (LIT_UINT(u)) { ret _uint.to_str(u, 10u); } + case (LIT_MACH_INT(tm, i)) { + ret _int.to_str(i, 10u) + + "_" + ty_mach_to_str(tm); + } + + case (LIT_STR(s)) { + // FIXME: escape. + ret "\"" + s + "\""; + } + case (LIT_CHAR(c)) { + // FIXME: escape and encode. + auto tmp = ""; + tmp += (c as u8); + ret tmp; + } + + case (LIT_BOOL(b)) { + if (b) { ret "true"; } else { ret "false"; } + } + + /* Name components */ + case (IDENT(s)) { ret s; } + case (IDX(i)) { ret "_" + _int.to_str(i, 10u); } + case (UNDERSCORE()) { ret "_"; } + + /* Reserved type names */ + case (BOOL()) { ret "bool"; } + case (INT()) { ret "int"; } + case (UINT()) { ret "uint"; } + case (FLOAT()) { ret "float"; } + case (CHAR()) { ret "char"; } + case (STR()) { ret "str"; } + case (MACH(tm)) { ret ty_mach_to_str(tm); } + + /* Algebraic type constructors */ + case (REC()) { ret "rec"; } + case (TUP()) { ret "tup"; } + case (TAG()) { ret "tag"; } + case (VEC()) { ret "vec"; } + case (ANY()) { ret "any"; } + + /* Callable type constructors */ + case (FN()) { ret "fn"; } + case (ITER()) { ret "iter"; } + + /* Object type */ + case (OBJ()) { ret "obj"; } + + /* Comm and task types */ + case (CHAN()) { ret "chan"; } + case (PORT()) { ret "port"; } + case (TASK()) { ret "task"; } + + case (BRACEQUOTE(_)) { ret "<bracequote>"; } + case (EOF()) { ret "<eof>"; } + } +} + + + +// Local Variables: +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; +// End: |