aboutsummaryrefslogtreecommitdiff
path: root/src/comp/front/lexer.rs
diff options
context:
space:
mode:
authorGraydon Hoare <[email protected]>2010-09-23 15:46:31 -0700
committerGraydon Hoare <[email protected]>2010-09-23 15:46:31 -0700
commit46e46d0b49de8e245d091f7062dfc28ab71e869e (patch)
tree5ca0d7ab10eb2a89b9c2a299ff3490eac912bf5d /src/comp/front/lexer.rs
parentMore fleshing-out on rustc.me.trans. Emitting modules and fns corresponding t... (diff)
downloadrust-46e46d0b49de8e245d091f7062dfc28ab71e869e.tar.xz
rust-46e46d0b49de8e245d091f7062dfc28ab71e869e.zip
Translate a bunch of the material (lltrans, llasm, abi) from rustboot to rustc, and move files around.
Diffstat (limited to 'src/comp/front/lexer.rs')
-rw-r--r--src/comp/front/lexer.rs595
1 files changed, 595 insertions, 0 deletions
diff --git a/src/comp/front/lexer.rs b/src/comp/front/lexer.rs
new file mode 100644
index 00000000..d058db4a
--- /dev/null
+++ b/src/comp/front/lexer.rs
@@ -0,0 +1,595 @@
+import std._io.stdio_reader;
+import std._str;
+import std.map;
+import std.map.hashmap;
+import util.common;
+import util.common.new_str_hash;
+
+state type reader = state obj {
+ fn is_eof() -> bool;
+ fn curr() -> char;
+ fn next() -> char;
+ state fn bump();
+ state fn mark();
+ fn get_filename() -> str;
+ fn get_mark_pos() -> common.pos;
+ fn get_curr_pos() -> common.pos;
+ fn get_keywords() -> hashmap[str,token.token];
+ fn get_reserved() -> hashmap[str,()];
+};
+
+fn new_reader(stdio_reader rdr, str filename) -> reader
+{
+ state obj reader(stdio_reader rdr,
+ str filename,
+ mutable char c,
+ mutable char n,
+ mutable uint mark_line,
+ mutable uint mark_col,
+ mutable uint line,
+ mutable uint col,
+ hashmap[str,token.token] keywords,
+ hashmap[str,()] reserved) {
+
+ fn is_eof() -> bool {
+ ret c == (-1) as char;
+ }
+
+ fn get_curr_pos() -> common.pos {
+ ret rec(line=line, col=col);
+ }
+
+ fn get_mark_pos() -> common.pos {
+ ret rec(line=mark_line, col=mark_col);
+ }
+
+ fn get_filename() -> str {
+ ret filename;
+ }
+
+ fn curr() -> char {
+ ret c;
+ }
+
+ fn next() -> char {
+ ret n;
+ }
+
+ state fn bump() {
+ c = n;
+
+ if (c == (-1) as char) {
+ ret;
+ }
+
+ if (c == '\n') {
+ line += 1u;
+ col = 1u;
+ } else {
+ col += 1u;
+ }
+
+ n = rdr.getc() as char;
+ }
+
+ state fn mark() {
+ mark_line = line;
+ mark_col = col;
+ }
+
+ fn get_keywords() -> hashmap[str,token.token] {
+ ret keywords;
+ }
+
+ fn get_reserved() -> hashmap[str,()] {
+ ret reserved;
+ }
+ }
+
+ auto keywords = new_str_hash[token.token]();
+ auto reserved = new_str_hash[()]();
+
+ keywords.insert("mod", token.MOD);
+ keywords.insert("use", token.USE);
+ keywords.insert("meta", token.META);
+ keywords.insert("auth", token.AUTH);
+
+ keywords.insert("syntax", token.SYNTAX);
+
+ keywords.insert("if", token.IF);
+ keywords.insert("else", token.ELSE);
+ keywords.insert("while", token.WHILE);
+ keywords.insert("do", token.DO);
+ keywords.insert("alt", token.ALT);
+ keywords.insert("case", token.CASE);
+
+ keywords.insert("for", token.FOR);
+ keywords.insert("each", token.EACH);
+ keywords.insert("put", token.PUT);
+ keywords.insert("ret", token.RET);
+ keywords.insert("be", token.BE);
+
+ keywords.insert("fail", token.FAIL);
+ keywords.insert("drop", token.DROP);
+
+ keywords.insert("type", token.TYPE);
+ keywords.insert("check", token.CHECK);
+ keywords.insert("claim", token.CLAIM);
+ keywords.insert("prove", token.PROVE);
+
+ keywords.insert("io", token.IO);
+ keywords.insert("state", token.STATE);
+ keywords.insert("unsafe", token.UNSAFE);
+
+ keywords.insert("native", token.NATIVE);
+ keywords.insert("mutable", token.MUTABLE);
+ keywords.insert("auto", token.AUTO);
+
+ keywords.insert("fn", token.FN);
+ keywords.insert("iter", token.ITER);
+
+ keywords.insert("import", token.IMPORT);
+ keywords.insert("export", token.EXPORT);
+
+ keywords.insert("let", token.LET);
+ keywords.insert("const", token.CONST);
+
+ keywords.insert("log", token.LOG);
+ keywords.insert("spawn", token.SPAWN);
+ keywords.insert("thread", token.THREAD);
+ keywords.insert("yield", token.YIELD);
+ keywords.insert("join", token.JOIN);
+
+ keywords.insert("bool", token.BOOL);
+
+ keywords.insert("int", token.INT);
+ keywords.insert("uint", token.UINT);
+ keywords.insert("float", token.FLOAT);
+
+ keywords.insert("char", token.CHAR);
+ keywords.insert("str", token.STR);
+
+
+ keywords.insert("rec", token.REC);
+ keywords.insert("tup", token.TUP);
+ keywords.insert("tag", token.TAG);
+ keywords.insert("vec", token.VEC);
+ keywords.insert("any", token.ANY);
+
+ keywords.insert("obj", token.OBJ);
+
+ keywords.insert("port", token.PORT);
+ keywords.insert("chan", token.CHAN);
+
+ keywords.insert("task", token.TASK);
+
+ keywords.insert("true", token.LIT_BOOL(true));
+ keywords.insert("false", token.LIT_BOOL(false));
+
+ keywords.insert("in", token.IN);
+
+ keywords.insert("as", token.AS);
+ keywords.insert("with", token.WITH);
+
+ keywords.insert("bind", token.BIND);
+
+ keywords.insert("u8", token.MACH(common.ty_u8));
+ keywords.insert("u16", token.MACH(common.ty_u16));
+ keywords.insert("u32", token.MACH(common.ty_u32));
+ keywords.insert("u64", token.MACH(common.ty_u64));
+ keywords.insert("i8", token.MACH(common.ty_i8));
+ keywords.insert("i16", token.MACH(common.ty_i16));
+ keywords.insert("i32", token.MACH(common.ty_i32));
+ keywords.insert("i64", token.MACH(common.ty_i64));
+ keywords.insert("f32", token.MACH(common.ty_f32));
+ keywords.insert("f64", token.MACH(common.ty_f64));
+
+ ret reader(rdr, filename, rdr.getc() as char, rdr.getc() as char,
+ 1u, 1u, 1u, 1u, keywords, reserved);
+}
+
+
+
+
+fn in_range(char c, char lo, char hi) -> bool {
+ ret lo <= c && c <= hi;
+}
+
+fn is_alpha(char c) -> bool {
+ ret in_range(c, 'a', 'z') ||
+ in_range(c, 'A', 'Z');
+}
+
+fn is_dec_digit(char c) -> bool {
+ ret in_range(c, '0', '9');
+}
+
+fn is_hex_digit(char c) -> bool {
+ ret in_range(c, '0', '9') ||
+ in_range(c, 'a', 'f') ||
+ in_range(c, 'A', 'F');
+}
+
+fn is_bin_digit(char c) -> bool {
+ ret c == '0' || c == '1';
+}
+
+fn dec_digit_val(char c) -> int {
+ ret (c as int) - ('0' as int);
+}
+
+fn hex_digit_val(char c) -> int {
+ if (in_range(c, '0', '9')) {
+ ret (c as int) - ('0' as int);
+ }
+
+ if (in_range(c, 'a', 'f')) {
+ ret ((c as int) - ('a' as int)) + 10;
+ }
+
+ if (in_range(c, 'A', 'F')) {
+ ret ((c as int) - ('A' as int)) + 10;
+ }
+
+ fail;
+}
+
+fn bin_digit_value(char c) -> int {
+ if (c == '0') { ret 0; }
+ ret 1;
+}
+
+fn is_whitespace(char c) -> bool {
+ ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
+}
+
+state fn consume_any_whitespace(reader rdr) {
+ while (is_whitespace(rdr.curr())) {
+ rdr.bump();
+ }
+ be consume_any_line_comment(rdr);
+}
+
+state fn consume_any_line_comment(reader rdr) {
+ if (rdr.curr() == '/') {
+ alt (rdr.next()) {
+ case ('/') {
+ while (rdr.curr() != '\n') {
+ rdr.bump();
+ }
+ // Restart whitespace munch.
+ be consume_any_whitespace(rdr);
+ }
+ case ('*') {
+ rdr.bump();
+ rdr.bump();
+ be consume_block_comment(rdr);
+ }
+ case (_) {
+ ret;
+ }
+ }
+ }
+}
+
+
+state fn consume_block_comment(reader rdr) {
+ let int level = 1;
+ while (level > 0) {
+ if (rdr.curr() == '/' && rdr.next() == '*') {
+ rdr.bump();
+ rdr.bump();
+ level += 1;
+ } else {
+ if (rdr.curr() == '*' && rdr.next() == '/') {
+ rdr.bump();
+ rdr.bump();
+ level -= 1;
+ } else {
+ rdr.bump();
+ }
+ }
+ }
+ // restart whitespace munch.
+ be consume_any_whitespace(rdr);
+}
+
+state fn next_token(reader rdr) -> token.token {
+ auto accum_str = "";
+ auto accum_int = 0;
+
+ consume_any_whitespace(rdr);
+
+ if (rdr.is_eof()) { ret token.EOF; }
+
+ auto c = rdr.curr();
+
+ if (is_alpha(c)) {
+ while (is_alpha(c) || c == '_') {
+ accum_str += (c as u8);
+ rdr.bump();
+ c = rdr.curr();
+ }
+
+ auto kwds = rdr.get_keywords();
+ if (kwds.contains_key(accum_str)) {
+ ret kwds.get(accum_str);
+ }
+
+ ret token.IDENT(accum_str);
+ }
+
+ if (is_dec_digit(c)) {
+ auto n = rdr.next();
+ if (c == '0' && n == 'x') {
+ rdr.bump();
+ rdr.bump();
+ c = rdr.curr();
+ while (is_hex_digit(c) || c == '_') {
+ accum_int *= 16;
+ accum_int += hex_digit_val(c);
+ rdr.bump();
+ c = rdr.curr();
+ }
+ }
+
+ if (c == '0' && n == 'b') {
+ rdr.bump();
+ rdr.bump();
+ c = rdr.curr();
+ while (is_bin_digit(c) || c == '_') {
+ accum_int *= 2;
+ accum_int += bin_digit_value(c);
+ rdr.bump();
+ c = rdr.curr();
+ }
+ }
+
+ while (is_dec_digit(c) || c == '_') {
+ accum_int *= 10;
+ accum_int += dec_digit_val(c);
+ rdr.bump();
+ c = rdr.curr();
+ }
+
+ ret token.LIT_INT(accum_int);
+ }
+
+ state fn binop(reader rdr, token.binop op) -> token.token {
+ rdr.bump();
+ if (rdr.next() == '=') {
+ rdr.bump();
+ ret token.BINOPEQ(op);
+ } else {
+ ret token.BINOP(op);
+ }
+ }
+
+ alt (c) {
+ // One-byte tokens.
+ case (':') { rdr.bump(); ret token.COLON; }
+ case ('?') { rdr.bump(); ret token.QUES; }
+ case (';') { rdr.bump(); ret token.SEMI; }
+ case (',') { rdr.bump(); ret token.COMMA; }
+ case ('.') { rdr.bump(); ret token.DOT; }
+ case ('(') { rdr.bump(); ret token.LPAREN; }
+ case (')') { rdr.bump(); ret token.RPAREN; }
+ case ('{') { rdr.bump(); ret token.LBRACE; }
+ case ('}') { rdr.bump(); ret token.RBRACE; }
+ case ('[') { rdr.bump(); ret token.LBRACKET; }
+ case (']') { rdr.bump(); ret token.RBRACKET; }
+ case ('@') { rdr.bump(); ret token.AT; }
+ case ('#') { rdr.bump(); ret token.POUND; }
+ case ('_') { rdr.bump(); ret token.UNDERSCORE; }
+ case ('~') { rdr.bump(); ret token.TILDE; }
+
+
+ // Multi-byte tokens.
+ case ('=') {
+ rdr.bump();
+ if (rdr.curr() == '=') {
+ rdr.bump();
+ ret token.EQEQ;
+ } else {
+ ret token.EQ;
+ }
+ }
+
+ case ('!') {
+ rdr.bump();
+ if (rdr.curr() == '=') {
+ rdr.bump();
+ ret token.NE;
+ } else {
+ ret token.NOT;
+ }
+ }
+
+ case ('<') {
+ rdr.bump();
+ alt (rdr.curr()) {
+ case ('=') {
+ rdr.bump();
+ ret token.LE;
+ }
+ case ('<') {
+ ret binop(rdr, token.LSL);
+ }
+ case ('-') {
+ rdr.bump();
+ ret token.LARROW;
+ }
+ case ('|') {
+ rdr.bump();
+ ret token.SEND;
+ }
+ case (_) {
+ ret token.LT;
+ }
+ }
+ }
+
+ case ('>') {
+ rdr.bump();
+ alt (rdr.curr()) {
+ case ('=') {
+ rdr.bump();
+ ret token.GE;
+ }
+
+ case ('>') {
+ if (rdr.next() == '>') {
+ rdr.bump();
+ ret binop(rdr, token.ASR);
+ } else {
+ ret binop(rdr, token.LSR);
+ }
+ }
+
+ case (_) {
+ ret token.GT;
+ }
+ }
+ }
+
+ case ('\'') {
+ rdr.bump();
+ auto c2 = rdr.curr();
+ if (c2 == '\\') {
+ alt (rdr.next()) {
+ case ('n') { rdr.bump(); c2 = '\n'; }
+ case ('r') { rdr.bump(); c2 = '\r'; }
+ case ('t') { rdr.bump(); c2 = '\t'; }
+ case ('\\') { rdr.bump(); c2 = '\\'; }
+ case ('\'') { rdr.bump(); c2 = '\''; }
+ // FIXME: unicode numeric escapes.
+ case (?c2) {
+ log "unknown character escape";
+ log c2;
+ fail;
+ }
+ }
+ }
+
+ if (rdr.next() != '\'') {
+ log "unterminated character constant";
+ fail;
+ }
+ rdr.bump();
+ rdr.bump();
+ ret token.LIT_CHAR(c2);
+ }
+
+ case ('"') {
+ rdr.bump();
+ // FIXME: general utf8-consumption support.
+ while (rdr.curr() != '"') {
+ alt (rdr.curr()) {
+ case ('\\') {
+ alt (rdr.next()) {
+ case ('n') {
+ rdr.bump();
+ accum_str += '\n' as u8;
+ }
+ case ('r') {
+ rdr.bump();
+ accum_str += '\r' as u8;
+ }
+ case ('t') {
+ rdr.bump();
+ accum_str += '\t' as u8;
+ }
+ case ('\\') {
+ rdr.bump();
+ accum_str += '\\' as u8;
+ }
+ case ('"') {
+ rdr.bump();
+ accum_str += '"' as u8;
+ }
+ // FIXME: unicode numeric escapes.
+ case (?c2) {
+ log "unknown string escape";
+ log c2;
+ fail;
+ }
+ }
+ }
+ case (_) {
+ accum_str += rdr.curr() as u8;
+ }
+ }
+ rdr.bump();
+ }
+ rdr.bump();
+ ret token.LIT_STR(accum_str);
+ }
+
+ case ('-') {
+ if (rdr.next() == '>') {
+ rdr.bump();
+ rdr.bump();
+ ret token.RARROW;
+ } else {
+ ret binop(rdr, token.MINUS);
+ }
+ }
+
+ case ('&') {
+ if (rdr.next() == '&') {
+ rdr.bump();
+ rdr.bump();
+ ret token.ANDAND;
+ } else {
+ ret binop(rdr, token.AND);
+ }
+ }
+
+ case ('|') {
+ if (rdr.next() == '|') {
+ rdr.bump();
+ rdr.bump();
+ ret token.OROR;
+ } else {
+ ret binop(rdr, token.OR);
+ }
+ }
+
+ case ('+') {
+ ret binop(rdr, token.PLUS);
+ }
+
+ case ('*') {
+ ret binop(rdr, token.STAR);
+ }
+
+ case ('/') {
+ ret binop(rdr, token.STAR);
+ }
+
+ case ('^') {
+ ret binop(rdr, token.CARET);
+ }
+
+ case ('%') {
+ ret binop(rdr, token.PERCENT);
+ }
+
+ }
+
+ log "lexer stopping at ";
+ log c;
+ ret token.EOF;
+}
+
+
+//
+// Local Variables:
+// mode: rust
+// fill-column: 78;
+// indent-tabs-mode: nil
+// c-basic-offset: 4
+// buffer-file-coding-system: utf-8-unix
+// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+// End:
+//