aboutsummaryrefslogtreecommitdiff
path: root/src/boot/fe/lexer.mll
diff options
context:
space:
mode:
authorGraydon Hoare <[email protected]>2010-06-23 21:03:09 -0700
committerGraydon Hoare <[email protected]>2010-06-23 21:03:09 -0700
commitd6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch)
treeb425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/fe/lexer.mll
parentInitial git commit. (diff)
downloadrust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz
rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip
Populate tree.
Diffstat (limited to 'src/boot/fe/lexer.mll')
-rw-r--r--src/boot/fe/lexer.mll362
1 files changed, 362 insertions, 0 deletions
diff --git a/src/boot/fe/lexer.mll b/src/boot/fe/lexer.mll
new file mode 100644
index 00000000..fb4d58c5
--- /dev/null
+++ b/src/boot/fe/lexer.mll
@@ -0,0 +1,362 @@
+
+
+{
+
+ open Token;;
+ open Common;;
+
+ exception Lex_err of (string * Common.pos);;
+
+ let fail lexbuf s =
+ let p = lexbuf.Lexing.lex_start_p in
+ let pos =
+ (p.Lexing.pos_fname,
+ p.Lexing.pos_lnum ,
+ (p.Lexing.pos_cnum) - (p.Lexing.pos_bol))
+ in
+ raise (Lex_err (s, pos))
+ ;;
+
+ let bump_line p = { p with
+ Lexing.pos_lnum = p.Lexing.pos_lnum + 1;
+ Lexing.pos_bol = p.Lexing.pos_cnum }
+ ;;
+
+ let keyword_table = Hashtbl.create 100
+ let _ =
+ List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok)
+ [ ("mod", MOD);
+ ("use", USE);
+ ("meta", META);
+ ("auth", AUTH);
+
+ ("syntax", SYNTAX);
+
+ ("if", IF);
+ ("else", ELSE);
+ ("while", WHILE);
+ ("do", DO);
+ ("alt", ALT);
+ ("case", CASE);
+
+ ("for", FOR);
+ ("each", EACH);
+ ("put", PUT);
+ ("ret", RET);
+ ("be", BE);
+
+ ("fail", FAIL);
+ ("drop", DROP);
+
+ ("type", TYPE);
+ ("check", CHECK);
+ ("claim", CLAIM);
+ ("prove", PROVE);
+
+ ("io", IO);
+ ("state", STATE);
+ ("unsafe", UNSAFE);
+
+ ("native", NATIVE);
+ ("mutable", MUTABLE);
+ ("auto", AUTO);
+
+ ("fn", FN);
+ ("iter", ITER);
+
+ ("import", IMPORT);
+ ("export", EXPORT);
+
+ ("let", LET);
+
+ ("log", LOG);
+ ("spawn", SPAWN);
+ ("thread", THREAD);
+ ("yield", YIELD);
+ ("join", JOIN);
+
+ ("bool", BOOL);
+
+ ("int", INT);
+ ("uint", UINT);
+
+ ("char", CHAR);
+ ("str", STR);
+
+ ("rec", REC);
+ ("tup", TUP);
+ ("tag", TAG);
+ ("vec", VEC);
+ ("any", ANY);
+
+ ("obj", OBJ);
+
+ ("port", PORT);
+ ("chan", CHAN);
+
+ ("task", TASK);
+
+ ("true", LIT_BOOL true);
+ ("false", LIT_BOOL false);
+
+ ("in", IN);
+
+ ("as", AS);
+ ("with", WITH);
+
+ ("bind", BIND);
+
+ ("u8", MACH TY_u8);
+ ("u16", MACH TY_u16);
+ ("u32", MACH TY_u32);
+ ("u64", MACH TY_u64);
+ ("i8", MACH TY_i8);
+ ("i16", MACH TY_i16);
+ ("i32", MACH TY_i32);
+ ("i64", MACH TY_i64);
+ ("f32", MACH TY_f32);
+ ("f64", MACH TY_f64)
+ ]
+;;
+}
+
+let hexdig = ['0'-'9' 'a'-'f' 'A'-'F']
+let bin = "0b" ['0' '1']['0' '1' '_']*
+let hex = "0x" hexdig ['0'-'9' 'a'-'f' 'A'-'F' '_']*
+let dec = ['0'-'9']+
+let exp = ['e''E']['-''+']? dec
+let flo = (dec '.' dec (exp?)) | (dec exp)
+
+let ws = [ ' ' '\t' '\r' ]
+
+let id = ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']*
+
+rule token = parse
+ ws+ { token lexbuf }
+| '\n' { lexbuf.Lexing.lex_curr_p
+ <- (bump_line lexbuf.Lexing.lex_curr_p);
+ token lexbuf }
+| "//" [^'\n']* { token lexbuf }
+
+| '+' { PLUS }
+| '-' { MINUS }
+| '*' { STAR }
+| '/' { SLASH }
+| '%' { PERCENT }
+| '=' { EQ }
+| '<' { LT }
+| "<=" { LE }
+| "==" { EQEQ }
+| "!=" { NE }
+| ">=" { GE }
+| '>' { GT }
+| '!' { NOT }
+| '&' { AND }
+| "&&" { ANDAND }
+| '|' { OR }
+| "||" { OROR }
+| "<<" { LSL }
+| ">>" { LSR }
+| ">>>" { ASR }
+| '~' { TILDE }
+| '{' { LBRACE }
+| '_' (dec as n) { IDX (int_of_string n) }
+| '_' { UNDERSCORE }
+| '}' { RBRACE }
+
+| "+=" { OPEQ (PLUS) }
+| "-=" { OPEQ (MINUS) }
+| "*=" { OPEQ (STAR) }
+| "/=" { OPEQ (SLASH) }
+| "%=" { OPEQ (PERCENT) }
+| "&=" { OPEQ (AND) }
+| "|=" { OPEQ (OR) }
+| "<<=" { OPEQ (LSL) }
+| ">>=" { OPEQ (LSR) }
+| ">>>=" { OPEQ (ASR) }
+| "^=" { OPEQ (CARET) }
+
+| '#' { POUND }
+| '@' { AT }
+| '^' { CARET }
+| '.' { DOT }
+| ',' { COMMA }
+| ';' { SEMI }
+| ':' { COLON }
+| "<-" { LARROW }
+| "<|" { SEND }
+| "->" { RARROW }
+| '(' { LPAREN }
+| ')' { RPAREN }
+| '[' { LBRACKET }
+| ']' { RBRACKET }
+
+| id as i
+ { try
+ Hashtbl.find keyword_table i
+ with
+ Not_found -> IDENT (i)
+ }
+
+| bin as n { LIT_INT (Int64.of_string n, n) }
+| hex as n { LIT_INT (Int64.of_string n, n) }
+| dec as n { LIT_INT (Int64.of_string n, n) }
+| flo as n { LIT_FLO n }
+
+| '\'' { char lexbuf }
+| '"' { let buf = Buffer.create 32 in
+ str buf lexbuf }
+
+| eof { EOF }
+
+and str buf = parse
+ _ as ch
+ {
+ match ch with
+ '"' -> LIT_STR (Buffer.contents buf)
+ | '\\' -> str_escape buf lexbuf
+ | _ ->
+ Buffer.add_char buf ch;
+ let c = Char.code ch in
+ if bounds 0 c 0x7f
+ then str buf lexbuf
+ else
+ if ((c land 0b1110_0000) == 0b1100_0000)
+ then ext_str 1 buf lexbuf
+ else
+ if ((c land 0b1111_0000) == 0b1110_0000)
+ then ext_str 2 buf lexbuf
+ else
+ if ((c land 0b1111_1000) == 0b1111_0000)
+ then ext_str 3 buf lexbuf
+ else
+ if ((c land 0b1111_1100) == 0b1111_1000)
+ then ext_str 4 buf lexbuf
+ else
+ if ((c land 0b1111_1110) == 0b1111_1100)
+ then ext_str 5 buf lexbuf
+ else fail lexbuf "bad initial utf-8 byte"
+ }
+
+and str_escape buf = parse
+ 'x' ((hexdig hexdig) as h)
+ | 'u' ((hexdig hexdig hexdig hexdig) as h)
+ | 'U'
+ ((hexdig hexdig hexdig hexdig
+ hexdig hexdig hexdig hexdig) as h)
+ {
+ Buffer.add_string buf (char_as_utf8 (int_of_string ("0x" ^ h)));
+ str buf lexbuf
+ }
+ | 'n' { Buffer.add_char buf '\n'; str buf lexbuf }
+ | 'r' { Buffer.add_char buf '\r'; str buf lexbuf }
+ | 't' { Buffer.add_char buf '\t'; str buf lexbuf }
+ | '\\' { Buffer.add_char buf '\\'; str buf lexbuf }
+ | '"' { Buffer.add_char buf '"'; str buf lexbuf }
+ | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) }
+
+
+and ext_str n buf = parse
+ _ as ch
+ {
+ let c = Char.code ch in
+ if ((c land 0b1100_0000) == (0b1000_0000))
+ then
+ begin
+ Buffer.add_char buf ch;
+ if n = 1
+ then str buf lexbuf
+ else ext_str (n-1) buf lexbuf
+ end
+ else
+ fail lexbuf "bad trailing utf-8 byte"
+ }
+
+
+and char = parse
+ '\\' { char_escape lexbuf }
+ | _ as c
+ {
+ let c = Char.code c in
+ if bounds 0 c 0x7f
+ then end_char c lexbuf
+ else
+ if ((c land 0b1110_0000) == 0b1100_0000)
+ then ext_char 1 (c land 0b0001_1111) lexbuf
+ else
+ if ((c land 0b1111_0000) == 0b1110_0000)
+ then ext_char 2 (c land 0b0000_1111) lexbuf
+ else
+ if ((c land 0b1111_1000) == 0b1111_0000)
+ then ext_char 3 (c land 0b0000_0111) lexbuf
+ else
+ if ((c land 0b1111_1100) == 0b1111_1000)
+ then ext_char 4 (c land 0b0000_0011) lexbuf
+ else
+ if ((c land 0b1111_1110) == 0b1111_1100)
+ then ext_char 5 (c land 0b0000_0001) lexbuf
+ else fail lexbuf "bad initial utf-8 byte"
+ }
+
+and char_escape = parse
+ 'x' ((hexdig hexdig) as h)
+ | 'u' ((hexdig hexdig hexdig hexdig) as h)
+ | 'U'
+ ((hexdig hexdig hexdig hexdig
+ hexdig hexdig hexdig hexdig) as h)
+ {
+ end_char (int_of_string ("0x" ^ h)) lexbuf
+ }
+ | 'n' { end_char (Char.code '\n') lexbuf }
+ | 'r' { end_char (Char.code '\r') lexbuf }
+ | 't' { end_char (Char.code '\t') lexbuf }
+ | '\\' { end_char (Char.code '\\') lexbuf }
+ | '\'' { end_char (Char.code '\'') lexbuf }
+ | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) }
+
+
+and ext_char n accum = parse
+ _ as c
+ {
+ let c = Char.code c in
+ if ((c land 0b1100_0000) == (0b1000_0000))
+ then
+ let accum = (accum lsl 6) lor (c land 0b0011_1111) in
+ if n = 1
+ then end_char accum lexbuf
+ else ext_char (n-1) accum lexbuf
+ else
+ fail lexbuf "bad trailing utf-8 byte"
+ }
+
+and end_char accum = parse
+ '\'' { LIT_CHAR accum }
+
+
+and bracequote buf depth = parse
+
+ '\\' '{' { Buffer.add_char buf '{';
+ bracequote buf depth lexbuf }
+
+| '{' { Buffer.add_char buf '{';
+ bracequote buf (depth+1) lexbuf }
+
+| '\\' '}' { Buffer.add_char buf '}';
+ bracequote buf depth lexbuf }
+
+| '}' { if depth = 1
+ then BRACEQUOTE (Buffer.contents buf)
+ else
+ begin
+ Buffer.add_char buf '}';
+ bracequote buf (depth-1) lexbuf
+ end }
+
+| '\\' [^'{' '}'] { let s = Lexing.lexeme lexbuf in
+ Buffer.add_string buf s;
+ bracequote buf depth lexbuf }
+
+
+| [^'\\' '{' '}']+ { let s = Lexing.lexeme lexbuf in
+ Buffer.add_string buf s;
+ bracequote buf depth lexbuf }