aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGraydon Hoare <[email protected]>2010-08-20 11:41:34 -0700
committerGraydon Hoare <[email protected]>2010-08-20 11:42:44 -0700
commit0f224f977d9edeb7f8ca56c052c1202fab384552 (patch)
tree2e9b93ce5e4eea139f44c747fd10679c1f4a03df
parentAdd _uint module to std, move some code around. (diff)
downloadrust-0f224f977d9edeb7f8ca56c052c1202fab384552.tar.xz
rust-0f224f977d9edeb7f8ca56c052c1202fab384552.zip
Expand rustc lexer to do almost-nearly-nontrivial stuff.
-rw-r--r--src/comp/driver/rustc.rs12
-rw-r--r--src/comp/fe/lexer.rs30
-rw-r--r--src/comp/fe/token.rs150
-rw-r--r--src/comp/util/common.rs13
4 files changed, 201 insertions, 4 deletions
diff --git a/src/comp/driver/rustc.rs b/src/comp/driver/rustc.rs
index 67aa5308..42bd91c5 100644
--- a/src/comp/driver/rustc.rs
+++ b/src/comp/driver/rustc.rs
@@ -17,7 +17,7 @@ fn write_module() {
llvm.LLVMDisposeModule(llmod);
}
-fn main(vec[str] args) -> () {
+fn main(vec[str] args) {
log "This is the rust 'self-hosted' compiler.";
log "The one written in rust.";
@@ -30,6 +30,16 @@ fn main(vec[str] args) -> () {
auto p = parser.new_parser(filename);
log "opened file: " + filename;
auto tok = p.peek();
+ while (true) {
+ alt (tok) {
+ case (token.EOF()) { ret; }
+ case (_) {
+ log token.to_str(tok);
+ p.bump();
+ tok = p.peek();
+ }
+ }
+ }
}
i += 1;
}
diff --git a/src/comp/fe/lexer.rs b/src/comp/fe/lexer.rs
index 80b4b676..57a60fe3 100644
--- a/src/comp/fe/lexer.rs
+++ b/src/comp/fe/lexer.rs
@@ -38,8 +38,36 @@ fn next_token(stdio_reader rdr) -> token.token {
if (c == eof) { ret token.EOF(); }
if (is_alpha(c)) {
- accum += (c as u8);
+ while (is_alpha(c)) {
+ accum += (c as u8);
+ c = rdr.getc() as char;
+ ret token.IDENT(accum);
+ }
}
+
+ if (is_dec_digit(c)) {
+ if (c == '0') {
+ } else {
+ while (is_dec_digit(c)) {
+ accum += (c as u8);
+ ret token.LIT_INT(0);
+ }
+ }
+ }
+
+ // One-byte structural symbols.
+ if (c == ';') { ret token.SEMI(); }
+ if (c == '.') { ret token.DOT(); }
+ if (c == '(') { ret token.LPAREN(); }
+ if (c == ')') { ret token.RPAREN(); }
+ if (c == '{') { ret token.LBRACE(); }
+ if (c == '}') { ret token.RBRACE(); }
+ if (c == '[') { ret token.LBRACKET(); }
+ if (c == ']') { ret token.RBRACKET(); }
+ if (c == '@') { ret token.AT(); }
+ if (c == '#') { ret token.POUND(); }
+
+ log "lexer stopping at ";
log c;
ret token.EOF();
}
diff --git a/src/comp/fe/token.rs b/src/comp/fe/token.rs
index 76142910..8896b6f7 100644
--- a/src/comp/fe/token.rs
+++ b/src/comp/fe/token.rs
@@ -1,4 +1,7 @@
import util.common.ty_mach;
+import util.common.ty_mach_to_str;
+import std._int;
+import std._uint;
type op = tag
(PLUS(),
@@ -107,10 +110,10 @@ type token = tag
/* Literals */
LIT_INT(int),
- LIT_UINT(int),
+ LIT_UINT(uint),
LIT_MACH_INT(ty_mach, int),
LIT_STR(str),
- LIT_CHAR(int),
+ LIT_CHAR(char),
LIT_BOOL(bool),
/* Name components */
@@ -149,6 +152,149 @@ type token = tag
BRACEQUOTE(str),
EOF());
+fn to_str(token t) -> str {
+ alt (t) {
+ case (OP(_)) { ret "<op>"; }
+ case (OPEQ(_)) { ret "<op>="; }
+ case (AS()) { ret "as"; }
+ case (WITH()) { ret "with"; }
+
+ /* Structural symbols */
+ case (AT()) { ret "@"; }
+ case (DOT()) { ret "."; }
+ case (COMMA()) { ret ","; }
+ case (SEMI()) { ret ";"; }
+ case (COLON()) { ret ":"; }
+ case (RARROW()) { ret "->"; }
+ case (SEND()) { ret "<|"; }
+ case (LARROW()) { ret "<-"; }
+ case (LPAREN()) { ret "("; }
+ case (RPAREN()) { ret ")"; }
+ case (LBRACKET()) { ret "["; }
+ case (RBRACKET()) { ret "]"; }
+ case (LBRACE()) { ret "{"; }
+ case (RBRACE()) { ret "}"; }
+
+ /* Module and crate keywords */
+ case (MOD()) { ret "mod"; }
+ case (USE()) { ret "use"; }
+ case (AUTH()) { ret "auth"; }
+ case (META()) { ret "meta"; }
+
+ /* Metaprogramming keywords */
+ case (SYNTAX()) { ret "syntax"; }
+ case (POUND()) { ret "#"; }
+
+ /* Statement keywords */
+ case (IF()) { ret "if"; }
+ case (ELSE()) { ret "else"; }
+ case (DO()) { ret "do"; }
+ case (WHILE()) { ret "while"; }
+ case (ALT()) { ret "alt"; }
+ case (CASE()) { ret "case"; }
+
+ case (FAIL()) { ret "fail"; }
+ case (DROP()) { ret "drop"; }
+
+ case (IN()) { ret "in"; }
+ case (FOR()) { ret "for"; }
+ case (EACH()) { ret "each"; }
+ case (PUT()) { ret "put"; }
+ case (RET()) { ret "ret"; }
+ case (BE()) { ret "be"; }
+
+ /* Type and type-state keywords */
+ case (TYPE()) { ret "type"; }
+ case (CHECK()) { ret "check"; }
+ case (CLAIM()) { ret "claim"; }
+ case (PROVE()) { ret "prove"; }
+
+ /* Effect keywords */
+ case (IO()) { ret "io"; }
+ case (STATE()) { ret "state"; }
+ case (UNSAFE()) { ret "unsafe"; }
+
+ /* Type qualifiers */
+ case (NATIVE()) { ret "native"; }
+ case (AUTO()) { ret "auto"; }
+ case (MUTABLE()) { ret "mutable"; }
+
+ /* Name management */
+ case (IMPORT()) { ret "import"; }
+ case (EXPORT()) { ret "export"; }
+
+ /* Value / stmt declarators */
+ case (LET()) { ret "let"; }
+
+ /* Magic runtime services */
+ case (LOG()) { ret "log"; }
+ case (SPAWN()) { ret "spawn"; }
+ case (BIND()) { ret "bind"; }
+ case (THREAD()) { ret "thread"; }
+ case (YIELD()) { ret "yield"; }
+ case (JOIN()) { ret "join"; }
+
+ /* Literals */
+ case (LIT_INT(i)) { ret _int.to_str(i, 10u); }
+ case (LIT_UINT(u)) { ret _uint.to_str(u, 10u); }
+ case (LIT_MACH_INT(tm, i)) {
+ ret _int.to_str(i, 10u)
+ + "_" + ty_mach_to_str(tm);
+ }
+
+ case (LIT_STR(s)) {
+ // FIXME: escape.
+ ret "\"" + s + "\"";
+ }
+ case (LIT_CHAR(c)) {
+ // FIXME: escape and encode.
+ auto tmp = "";
+ tmp += (c as u8);
+ ret tmp;
+ }
+
+ case (LIT_BOOL(b)) {
+ if (b) { ret "true"; } else { ret "false"; }
+ }
+
+ /* Name components */
+ case (IDENT(s)) { ret s; }
+ case (IDX(i)) { ret "_" + _int.to_str(i, 10u); }
+ case (UNDERSCORE()) { ret "_"; }
+
+ /* Reserved type names */
+ case (BOOL()) { ret "bool"; }
+ case (INT()) { ret "int"; }
+ case (UINT()) { ret "uint"; }
+ case (FLOAT()) { ret "float"; }
+ case (CHAR()) { ret "char"; }
+ case (STR()) { ret "str"; }
+ case (MACH(tm)) { ret ty_mach_to_str(tm); }
+
+ /* Algebraic type constructors */
+ case (REC()) { ret "rec"; }
+ case (TUP()) { ret "tup"; }
+ case (TAG()) { ret "tag"; }
+ case (VEC()) { ret "vec"; }
+ case (ANY()) { ret "any"; }
+
+ /* Callable type constructors */
+ case (FN()) { ret "fn"; }
+ case (ITER()) { ret "iter"; }
+
+ /* Object type */
+ case (OBJ()) { ret "obj"; }
+
+ /* Comm and task types */
+ case (CHAN()) { ret "chan"; }
+ case (PORT()) { ret "port"; }
+ case (TASK()) { ret "task"; }
+
+ case (BRACEQUOTE(_)) { ret "<bracequote>"; }
+ case (EOF()) { ret "<eof>"; }
+ }
+}
+
// Local Variables:
diff --git a/src/comp/util/common.rs b/src/comp/util/common.rs
index b36f244d..3089c2c1 100644
--- a/src/comp/util/common.rs
+++ b/src/comp/util/common.rs
@@ -3,6 +3,19 @@ type ty_mach = tag( ty_i8(), ty_i16(), ty_i32(), ty_i64(),
ty_u8(), ty_u16(), ty_u32(), ty_u64(),
ty_f32(), ty_f16() );
+fn ty_mach_to_str(ty_mach tm) -> str {
+ alt (tm) {
+ case (ty_u8()) { ret "u8"; }
+ case (ty_i8()) { ret "i8"; }
+ case (ty_u16()) { ret "u16"; }
+ case (ty_i16()) { ret "i16"; }
+ case (ty_u32()) { ret "u32"; }
+ case (ty_i32()) { ret "i32"; }
+ case (ty_u64()) { ret "u64"; }
+ case (ty_i64()) { ret "i64"; }
+ }
+}
+
//
// Local Variables:
// mode: rust