aboutsummaryrefslogtreecommitdiff
path: root/src/comp
diff options
context:
space:
mode:
authorGraydon Hoare <[email protected]>2010-08-27 12:36:57 -0700
committerGraydon Hoare <[email protected]>2010-08-27 13:08:47 -0700
commit1428b59a19610d838d7849c7ac518c40281fddd1 (patch)
tree0487fee69e199737150c8f48f9149224d2412203 /src/comp
parentRead definitions of tag types in the DWARF (diff)
downloadrust-1428b59a19610d838d7849c7ac518c40281fddd1.tar.xz
rust-1428b59a19610d838d7849c7ac518c40281fddd1.zip
Modify rustboot to use lexer.reader.
Diffstat (limited to 'src/comp')
-rw-r--r--src/comp/fe/lexer.rs192
-rw-r--r--src/comp/fe/parser.rs5
2 files changed, 103 insertions, 94 deletions
diff --git a/src/comp/fe/lexer.rs b/src/comp/fe/lexer.rs
index caf3cd48..8ef8cea9 100644
--- a/src/comp/fe/lexer.rs
+++ b/src/comp/fe/lexer.rs
@@ -11,9 +11,10 @@ fn new_str_hash[V]() -> map.hashmap[str,V] {
type reader = obj {
fn is_eof() -> bool;
- fn peek() -> char;
+ fn curr() -> char;
+ fn next() -> char;
fn bump();
- fn get_pos() -> tup(str,uint,uint);
+ fn get_curr_pos() -> tup(str,uint,uint);
fn get_keywords() -> hashmap[str,token.token];
fn get_reserved() -> hashmap[str,()];
};
@@ -23,6 +24,7 @@ fn new_reader(stdio_reader rdr, str filename) -> reader
obj reader(stdio_reader rdr,
str filename,
mutable char c,
+ mutable char n,
mutable uint line,
mutable uint col,
hashmap[str,token.token] keywords,
@@ -32,22 +34,33 @@ fn new_reader(stdio_reader rdr, str filename) -> reader
ret c == (-1) as char;
}
- fn get_pos() -> tup(str,uint,uint) {
+ fn get_curr_pos() -> tup(str,uint,uint) {
ret tup(filename, line, col);
}
- fn peek() -> char {
+ fn curr() -> char {
ret c;
}
+ fn next() -> char {
+ ret n;
+ }
+
fn bump() {
- c = rdr.getc() as char;
+ c = n;
+
+ if (c == (-1) as char) {
+ ret;
+ }
+
if (c == '\n') {
line += 1u;
col = 0u;
} else {
col += 1u;
}
+
+ n = rdr.getc() as char;
}
fn get_keywords() -> hashmap[str,token.token] {
@@ -82,8 +95,8 @@ fn new_reader(stdio_reader rdr, str filename) -> reader
keywords.insert("ret", token.RET());
keywords.insert("be", token.BE());
- ret reader(rdr, filename, rdr.getc() as char, 1u, 1u,
- keywords, reserved);
+ ret reader(rdr, filename, rdr.getc() as char, rdr.getc() as char,
+ 1u, 1u, keywords, reserved);
}
@@ -116,146 +129,138 @@ fn is_whitespace(char c) -> bool {
ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
}
-fn consume_any_whitespace(stdio_reader rdr, char c) -> char {
- auto c1 = c;
- while (is_whitespace(c1)) {
- c1 = rdr.getc() as char;
+fn consume_any_whitespace(reader rdr) {
+ while (is_whitespace(rdr.curr())) {
+ rdr.bump();
}
- be consume_any_line_comment(rdr, c1);
+ be consume_any_line_comment(rdr);
}
-fn consume_any_line_comment(stdio_reader rdr, char c) -> char {
- auto c1 = c;
- if (c1 == '/') {
- auto c2 = rdr.getc() as char;
- if (c2 == '/') {
- while (c1 != '\n') {
- c1 = rdr.getc() as char;
+fn consume_any_line_comment(reader rdr) {
+ if (rdr.curr() == '/') {
+ if (rdr.next() == '/') {
+ while (rdr.curr() != '\n') {
+ rdr.bump();
}
// Restart whitespace munch.
- be consume_any_whitespace(rdr, c1);
+ be consume_any_whitespace(rdr);
}
}
- ret c;
}
-fn next_token(stdio_reader rdr) -> token.token {
- auto eof = (-1) as char;
- auto c = rdr.getc() as char;
+fn next_token(reader rdr) -> token.token {
auto accum_str = "";
auto accum_int = 0;
- fn next(stdio_reader rdr) -> char {
- ret rdr.getc() as char;
- }
-
- fn forget(stdio_reader rdr, char c) {
- rdr.ungetc(c as int);
- }
+ consume_any_whitespace(rdr);
- c = consume_any_whitespace(rdr, c);
+ if (rdr.is_eof()) { ret token.EOF(); }
- if (c == eof) { ret token.EOF(); }
+ auto c = rdr.curr();
if (is_alpha(c)) {
- while (is_alpha(c)) {
+ while (is_alpha(rdr.curr())) {
+ c = rdr.curr();
accum_str += (c as u8);
- c = next(rdr);
+ rdr.bump();
}
- forget(rdr, c);
ret token.IDENT(accum_str);
}
if (is_dec_digit(c)) {
if (c == '0') {
+ log "fixme: leading zero";
+ fail;
} else {
while (is_dec_digit(c)) {
+ c = rdr.curr();
accum_int *= 10;
accum_int += (c as int) - ('0' as int);
- c = next(rdr);
+ rdr.bump();
}
- forget(rdr, c);
ret token.LIT_INT(accum_int);
}
}
- fn op_or_opeq(stdio_reader rdr, char c2,
- token.op op) -> token.token {
- if (c2 == '=') {
+ fn op_or_opeq(reader rdr, token.op op) -> token.token {
+ rdr.bump();
+ if (rdr.next() == '=') {
+ rdr.bump();
ret token.OPEQ(op);
} else {
- forget(rdr, c2);
ret token.OP(op);
}
}
alt (c) {
// One-byte tokens.
- case (';') { ret token.SEMI(); }
- case (',') { ret token.COMMA(); }
- case ('.') { ret token.DOT(); }
- case ('(') { ret token.LPAREN(); }
- case (')') { ret token.RPAREN(); }
- case ('{') { ret token.LBRACE(); }
- case ('}') { ret token.RBRACE(); }
- case ('[') { ret token.LBRACKET(); }
- case (']') { ret token.RBRACKET(); }
- case ('@') { ret token.AT(); }
- case ('#') { ret token.POUND(); }
+ case (';') { rdr.bump(); ret token.SEMI(); }
+ case (',') { rdr.bump(); ret token.COMMA(); }
+ case ('.') { rdr.bump(); ret token.DOT(); }
+ case ('(') { rdr.bump(); ret token.LPAREN(); }
+ case (')') { rdr.bump(); ret token.RPAREN(); }
+ case ('{') { rdr.bump(); ret token.LBRACE(); }
+ case ('}') { rdr.bump(); ret token.RBRACE(); }
+ case ('[') { rdr.bump(); ret token.LBRACKET(); }
+ case (']') { rdr.bump(); ret token.RBRACKET(); }
+ case ('@') { rdr.bump(); ret token.AT(); }
+ case ('#') { rdr.bump(); ret token.POUND(); }
// Multi-byte tokens.
case ('=') {
- auto c2 = next(rdr);
- if (c2 == '=') {
+ if (rdr.next() == '=') {
+ rdr.bump();
+ rdr.bump();
ret token.OP(token.EQEQ());
} else {
- forget(rdr, c2);
+ rdr.bump();
ret token.OP(token.EQ());
}
}
case ('\'') {
- // FIXME: general utf8-consumption support.
- auto c2 = next(rdr);
+ rdr.bump();
+ auto c2 = rdr.curr();
if (c2 == '\\') {
- c2 = next(rdr);
- alt (c2) {
- case ('n') { c2 = '\n'; }
- case ('r') { c2 = '\r'; }
- case ('t') { c2 = '\t'; }
- case ('\\') { c2 = '\\'; }
- case ('\'') { c2 = '\''; }
+ alt (rdr.next()) {
+ case ('n') { rdr.bump(); c2 = '\n'; }
+ case ('r') { rdr.bump(); c2 = '\r'; }
+ case ('t') { rdr.bump(); c2 = '\t'; }
+ case ('\\') { rdr.bump(); c2 = '\\'; }
+ case ('\'') { rdr.bump(); c2 = '\''; }
// FIXME: unicode numeric escapes.
- case (_) {
+ case (c2) {
log "unknown character escape";
log c2;
fail;
}
}
}
- if (next(rdr) != '\'') {
+
+ if (rdr.next() != '\'') {
log "unterminated character constant";
fail;
}
+ rdr.bump();
+ rdr.bump();
ret token.LIT_CHAR(c2);
}
case ('"') {
+ rdr.bump();
// FIXME: general utf8-consumption support.
- auto c2 = next(rdr);
- while (c2 != '"') {
- alt (c2) {
+ while (rdr.curr() != '"') {
+ alt (rdr.curr()) {
case ('\\') {
- c2 = next(rdr);
- alt (c2) {
- case ('n') { accum_str += '\n' as u8; }
- case ('r') { accum_str += '\r' as u8; }
- case ('t') { accum_str += '\t' as u8; }
- case ('\\') { accum_str += '\\' as u8; }
- case ('"') { accum_str += '"' as u8; }
+ alt (rdr.next()) {
+ case ('n') { rdr.bump(); accum_str += '\n' as u8; }
+ case ('r') { rdr.bump(); accum_str += '\r' as u8; }
+ case ('t') { rdr.bump(); accum_str += '\t' as u8; }
+ case ('\\') { rdr.bump(); accum_str += '\\' as u8; }
+ case ('"') { rdr.bump(); accum_str += '"' as u8; }
// FIXME: unicode numeric escapes.
- case (_) {
+ case (c2) {
log "unknown string escape";
log c2;
fail;
@@ -263,54 +268,57 @@ fn next_token(stdio_reader rdr) -> token.token {
}
}
case (_) {
- accum_str += c2 as u8;
+ accum_str += rdr.curr() as u8;
}
}
- c2 = next(rdr);
+ rdr.bump();
}
+ rdr.bump();
ret token.LIT_STR(accum_str);
}
case ('-') {
- auto c2 = next(rdr);
- if (c2 == '>') {
+ if (rdr.next() == '>') {
+ rdr.bump();
+ rdr.bump();
ret token.RARROW();
} else {
- ret op_or_opeq(rdr, c2, token.MINUS());
+ ret op_or_opeq(rdr, token.MINUS());
}
}
case ('&') {
- auto c2 = next(rdr);
- if (c2 == '&') {
+ if (rdr.next() == '&') {
+ rdr.bump();
+ rdr.bump();
ret token.OP(token.ANDAND());
} else {
- ret op_or_opeq(rdr, c2, token.AND());
+ ret op_or_opeq(rdr, token.AND());
}
}
case ('+') {
- ret op_or_opeq(rdr, next(rdr), token.PLUS());
+ ret op_or_opeq(rdr, token.PLUS());
}
case ('*') {
- ret op_or_opeq(rdr, next(rdr), token.STAR());
+ ret op_or_opeq(rdr, token.STAR());
}
case ('/') {
- ret op_or_opeq(rdr, next(rdr), token.STAR());
+ ret op_or_opeq(rdr, token.STAR());
}
case ('!') {
- ret op_or_opeq(rdr, next(rdr), token.NOT());
+ ret op_or_opeq(rdr, token.NOT());
}
case ('^') {
- ret op_or_opeq(rdr, next(rdr), token.CARET());
+ ret op_or_opeq(rdr, token.CARET());
}
case ('%') {
- ret op_or_opeq(rdr, next(rdr), token.PERCENT());
+ ret op_or_opeq(rdr, token.PERCENT());
}
}
diff --git a/src/comp/fe/parser.rs b/src/comp/fe/parser.rs
index 02de22a7..d2da7910 100644
--- a/src/comp/fe/parser.rs
+++ b/src/comp/fe/parser.rs
@@ -8,7 +8,7 @@ state type parser =
fn new_parser(str path) -> parser {
state obj stdio_parser(mutable token.token tok,
- _io.stdio_reader rdr)
+ lexer.reader rdr)
{
state fn peek() -> token.token {
ret tok;
@@ -17,7 +17,8 @@ fn new_parser(str path) -> parser {
tok = lexer.next_token(rdr);
}
}
- auto rdr = _io.new_stdio_reader(path);
+ auto srdr = _io.new_stdio_reader(path);
+ auto rdr = lexer.new_reader(srdr, path);
ret stdio_parser(lexer.next_token(rdr), rdr);
}