diff options
| author | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
|---|---|---|
| committer | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
| commit | d6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch) | |
| tree | b425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/fe/lexer.mll | |
| parent | Initial git commit. (diff) | |
| download | rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip | |
Populate tree.
Diffstat (limited to 'src/boot/fe/lexer.mll')
| -rw-r--r-- | src/boot/fe/lexer.mll | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/src/boot/fe/lexer.mll b/src/boot/fe/lexer.mll new file mode 100644 index 00000000..fb4d58c5 --- /dev/null +++ b/src/boot/fe/lexer.mll @@ -0,0 +1,362 @@ + + +{ + + open Token;; + open Common;; + + exception Lex_err of (string * Common.pos);; + + let fail lexbuf s = + let p = lexbuf.Lexing.lex_start_p in + let pos = + (p.Lexing.pos_fname, + p.Lexing.pos_lnum , + (p.Lexing.pos_cnum) - (p.Lexing.pos_bol)) + in + raise (Lex_err (s, pos)) + ;; + + let bump_line p = { p with + Lexing.pos_lnum = p.Lexing.pos_lnum + 1; + Lexing.pos_bol = p.Lexing.pos_cnum } + ;; + + let keyword_table = Hashtbl.create 100 + let _ = + List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok) + [ ("mod", MOD); + ("use", USE); + ("meta", META); + ("auth", AUTH); + + ("syntax", SYNTAX); + + ("if", IF); + ("else", ELSE); + ("while", WHILE); + ("do", DO); + ("alt", ALT); + ("case", CASE); + + ("for", FOR); + ("each", EACH); + ("put", PUT); + ("ret", RET); + ("be", BE); + + ("fail", FAIL); + ("drop", DROP); + + ("type", TYPE); + ("check", CHECK); + ("claim", CLAIM); + ("prove", PROVE); + + ("io", IO); + ("state", STATE); + ("unsafe", UNSAFE); + + ("native", NATIVE); + ("mutable", MUTABLE); + ("auto", AUTO); + + ("fn", FN); + ("iter", ITER); + + ("import", IMPORT); + ("export", EXPORT); + + ("let", LET); + + ("log", LOG); + ("spawn", SPAWN); + ("thread", THREAD); + ("yield", YIELD); + ("join", JOIN); + + ("bool", BOOL); + + ("int", INT); + ("uint", UINT); + + ("char", CHAR); + ("str", STR); + + ("rec", REC); + ("tup", TUP); + ("tag", TAG); + ("vec", VEC); + ("any", ANY); + + ("obj", OBJ); + + ("port", PORT); + ("chan", CHAN); + + ("task", TASK); + + ("true", LIT_BOOL true); + ("false", LIT_BOOL false); + + ("in", IN); + + ("as", AS); + ("with", WITH); + + ("bind", BIND); + + ("u8", MACH TY_u8); + ("u16", MACH TY_u16); + ("u32", MACH TY_u32); + ("u64", MACH TY_u64); + ("i8", MACH TY_i8); + ("i16", MACH TY_i16); + ("i32", MACH TY_i32); + ("i64", MACH TY_i64); + ("f32", MACH TY_f32); + ("f64", MACH TY_f64) + ] +;; +} + +let hexdig = ['0'-'9' 'a'-'f' 'A'-'F'] +let bin = "0b" ['0' '1']['0' '1' '_']* +let hex = "0x" hexdig ['0'-'9' 'a'-'f' 'A'-'F' '_']* +let dec = ['0'-'9']+ +let exp = ['e''E']['-''+']? dec +let flo = (dec '.' dec (exp?)) | (dec exp) + +let ws = [ ' ' '\t' '\r' ] + +let id = ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']* + +rule token = parse + ws+ { token lexbuf } +| '\n' { lexbuf.Lexing.lex_curr_p + <- (bump_line lexbuf.Lexing.lex_curr_p); + token lexbuf } +| "//" [^'\n']* { token lexbuf } + +| '+' { PLUS } +| '-' { MINUS } +| '*' { STAR } +| '/' { SLASH } +| '%' { PERCENT } +| '=' { EQ } +| '<' { LT } +| "<=" { LE } +| "==" { EQEQ } +| "!=" { NE } +| ">=" { GE } +| '>' { GT } +| '!' { NOT } +| '&' { AND } +| "&&" { ANDAND } +| '|' { OR } +| "||" { OROR } +| "<<" { LSL } +| ">>" { LSR } +| ">>>" { ASR } +| '~' { TILDE } +| '{' { LBRACE } +| '_' (dec as n) { IDX (int_of_string n) } +| '_' { UNDERSCORE } +| '}' { RBRACE } + +| "+=" { OPEQ (PLUS) } +| "-=" { OPEQ (MINUS) } +| "*=" { OPEQ (STAR) } +| "/=" { OPEQ (SLASH) } +| "%=" { OPEQ (PERCENT) } +| "&=" { OPEQ (AND) } +| "|=" { OPEQ (OR) } +| "<<=" { OPEQ (LSL) } +| ">>=" { OPEQ (LSR) } +| ">>>=" { OPEQ (ASR) } +| "^=" { OPEQ (CARET) } + +| '#' { POUND } +| '@' { AT } +| '^' { CARET } +| '.' { DOT } +| ',' { COMMA } +| ';' { SEMI } +| ':' { COLON } +| "<-" { LARROW } +| "<|" { SEND } +| "->" { RARROW } +| '(' { LPAREN } +| ')' { RPAREN } +| '[' { LBRACKET } +| ']' { RBRACKET } + +| id as i + { try + Hashtbl.find keyword_table i + with + Not_found -> IDENT (i) + } + +| bin as n { LIT_INT (Int64.of_string n, n) } +| hex as n { LIT_INT (Int64.of_string n, n) } +| dec as n { LIT_INT (Int64.of_string n, n) } +| flo as n { LIT_FLO n } + +| '\'' { char lexbuf } +| '"' { let buf = Buffer.create 32 in + str buf lexbuf } + +| eof { EOF } + +and str buf = parse + _ as ch + { + match ch with + '"' -> LIT_STR (Buffer.contents buf) + | '\\' -> str_escape buf lexbuf + | _ -> + Buffer.add_char buf ch; + let c = Char.code ch in + if bounds 0 c 0x7f + then str buf lexbuf + else + if ((c land 0b1110_0000) == 0b1100_0000) + then ext_str 1 buf lexbuf + else + if ((c land 0b1111_0000) == 0b1110_0000) + then ext_str 2 buf lexbuf + else + if ((c land 0b1111_1000) == 0b1111_0000) + then ext_str 3 buf lexbuf + else + if ((c land 0b1111_1100) == 0b1111_1000) + then ext_str 4 buf lexbuf + else + if ((c land 0b1111_1110) == 0b1111_1100) + then ext_str 5 buf lexbuf + else fail lexbuf "bad initial utf-8 byte" + } + +and str_escape buf = parse + 'x' ((hexdig hexdig) as h) + | 'u' ((hexdig hexdig hexdig hexdig) as h) + | 'U' + ((hexdig hexdig hexdig hexdig + hexdig hexdig hexdig hexdig) as h) + { + Buffer.add_string buf (char_as_utf8 (int_of_string ("0x" ^ h))); + str buf lexbuf + } + | 'n' { Buffer.add_char buf '\n'; str buf lexbuf } + | 'r' { Buffer.add_char buf '\r'; str buf lexbuf } + | 't' { Buffer.add_char buf '\t'; str buf lexbuf } + | '\\' { Buffer.add_char buf '\\'; str buf lexbuf } + | '"' { Buffer.add_char buf '"'; str buf lexbuf } + | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) } + + +and ext_str n buf = parse + _ as ch + { + let c = Char.code ch in + if ((c land 0b1100_0000) == (0b1000_0000)) + then + begin + Buffer.add_char buf ch; + if n = 1 + then str buf lexbuf + else ext_str (n-1) buf lexbuf + end + else + fail lexbuf "bad trailing utf-8 byte" + } + + +and char = parse + '\\' { char_escape lexbuf } + | _ as c + { + let c = Char.code c in + if bounds 0 c 0x7f + then end_char c lexbuf + else + if ((c land 0b1110_0000) == 0b1100_0000) + then ext_char 1 (c land 0b0001_1111) lexbuf + else + if ((c land 0b1111_0000) == 0b1110_0000) + then ext_char 2 (c land 0b0000_1111) lexbuf + else + if ((c land 0b1111_1000) == 0b1111_0000) + then ext_char 3 (c land 0b0000_0111) lexbuf + else + if ((c land 0b1111_1100) == 0b1111_1000) + then ext_char 4 (c land 0b0000_0011) lexbuf + else + if ((c land 0b1111_1110) == 0b1111_1100) + then ext_char 5 (c land 0b0000_0001) lexbuf + else fail lexbuf "bad initial utf-8 byte" + } + +and char_escape = parse + 'x' ((hexdig hexdig) as h) + | 'u' ((hexdig hexdig hexdig hexdig) as h) + | 'U' + ((hexdig hexdig hexdig hexdig + hexdig hexdig hexdig hexdig) as h) + { + end_char (int_of_string ("0x" ^ h)) lexbuf + } + | 'n' { end_char (Char.code '\n') lexbuf } + | 'r' { end_char (Char.code '\r') lexbuf } + | 't' { end_char (Char.code '\t') lexbuf } + | '\\' { end_char (Char.code '\\') lexbuf } + | '\'' { end_char (Char.code '\'') lexbuf } + | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) } + + +and ext_char n accum = parse + _ as c + { + let c = Char.code c in + if ((c land 0b1100_0000) == (0b1000_0000)) + then + let accum = (accum lsl 6) lor (c land 0b0011_1111) in + if n = 1 + then end_char accum lexbuf + else ext_char (n-1) accum lexbuf + else + fail lexbuf "bad trailing utf-8 byte" + } + +and end_char accum = parse + '\'' { LIT_CHAR accum } + + +and bracequote buf depth = parse + + '\\' '{' { Buffer.add_char buf '{'; + bracequote buf depth lexbuf } + +| '{' { Buffer.add_char buf '{'; + bracequote buf (depth+1) lexbuf } + +| '\\' '}' { Buffer.add_char buf '}'; + bracequote buf depth lexbuf } + +| '}' { if depth = 1 + then BRACEQUOTE (Buffer.contents buf) + else + begin + Buffer.add_char buf '}'; + bracequote buf (depth-1) lexbuf + end } + +| '\\' [^'{' '}'] { let s = Lexing.lexeme lexbuf in + Buffer.add_string buf s; + bracequote buf depth lexbuf } + + +| [^'\\' '{' '}']+ { let s = Lexing.lexeme lexbuf in + Buffer.add_string buf s; + bracequote buf depth lexbuf } |