Populate tree.

author: Graydon Hoare <[email protected]> 2010-06-23 21:03:09 -0700
committer: Graydon Hoare <[email protected]> 2010-06-23 21:03:09 -0700
commit: d6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch)
tree: b425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/fe/lexer.mll
parent: Initial git commit. (diff)
download: rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz
rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip
1 files changed, 362 insertions, 0 deletions
diff --git a/src/boot/fe/lexer.mll b/src/boot/fe/lexer.mll
new file mode 100644
index 00000000..fb4d58c5
--- /dev/null
+++ b/src/boot/fe/lexer.mll
@@ -0,0 +1,362 @@
+
+
+{
+
+  open Token;;
+  open Common;;
+
+  exception Lex_err of (string * Common.pos);;
+
+  let fail lexbuf s =
+    let p = lexbuf.Lexing.lex_start_p in
+    let pos =
+      (p.Lexing.pos_fname,
+       p.Lexing.pos_lnum ,
+       (p.Lexing.pos_cnum) - (p.Lexing.pos_bol))
+    in
+      raise (Lex_err (s, pos))
+  ;;
+
+  let bump_line p = { p with
+              Lexing.pos_lnum = p.Lexing.pos_lnum + 1;
+              Lexing.pos_bol = p.Lexing.pos_cnum }
+  ;;
+
+  let keyword_table = Hashtbl.create 100
+  let _ =
+    List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok)
+              [ ("mod", MOD);
+                ("use", USE);
+                ("meta", META);
+                ("auth", AUTH);
+
+                ("syntax", SYNTAX);
+
+                ("if", IF);
+                ("else", ELSE);
+                ("while", WHILE);
+                ("do", DO);
+                ("alt", ALT);
+                ("case", CASE);
+
+                ("for", FOR);
+                ("each", EACH);
+                ("put", PUT);
+                ("ret", RET);
+                ("be", BE);
+
+                ("fail", FAIL);
+                ("drop", DROP);
+
+                ("type", TYPE);
+                ("check", CHECK);
+                ("claim", CLAIM);
+                ("prove", PROVE);
+
+                ("io", IO);
+                ("state", STATE);
+                ("unsafe", UNSAFE);
+
+                ("native", NATIVE);
+                ("mutable", MUTABLE);
+                ("auto", AUTO);
+
+                ("fn", FN);
+                ("iter", ITER);
+
+                ("import", IMPORT);
+                ("export", EXPORT);
+
+                ("let", LET);
+
+                ("log", LOG);
+                ("spawn", SPAWN);
+                ("thread", THREAD);
+                ("yield", YIELD);
+                ("join", JOIN);
+
+                ("bool", BOOL);
+
+                ("int", INT);
+                ("uint", UINT);
+
+                ("char", CHAR);
+                ("str", STR);
+
+                ("rec", REC);
+                ("tup", TUP);
+                ("tag", TAG);
+                ("vec", VEC);
+                ("any", ANY);
+
+                ("obj", OBJ);
+
+                ("port", PORT);
+                ("chan", CHAN);
+
+                ("task", TASK);
+
+                ("true", LIT_BOOL true);
+                ("false", LIT_BOOL false);
+
+                ("in", IN);
+
+                ("as", AS);
+                ("with", WITH);
+
+                ("bind", BIND);
+
+                ("u8", MACH TY_u8);
+                ("u16", MACH TY_u16);
+                ("u32", MACH TY_u32);
+                ("u64", MACH TY_u64);
+                ("i8", MACH TY_i8);
+                ("i16", MACH TY_i16);
+                ("i32", MACH TY_i32);
+                ("i64", MACH TY_i64);
+                ("f32", MACH TY_f32);
+                ("f64", MACH TY_f64)
+              ]
+;;
+}
+
+let hexdig = ['0'-'9' 'a'-'f' 'A'-'F']
+let bin = "0b" ['0' '1']['0' '1' '_']*
+let hex = "0x" hexdig ['0'-'9' 'a'-'f' 'A'-'F' '_']*
+let dec = ['0'-'9']+
+let exp = ['e''E']['-''+']? dec
+let flo = (dec '.' dec (exp?)) | (dec exp)
+
+let ws = [ ' ' '\t' '\r' ]
+
+let id = ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']*
+
+rule token = parse
+  ws+                          { token lexbuf }
+| '\n'                         { lexbuf.Lexing.lex_curr_p
+                                     <- (bump_line lexbuf.Lexing.lex_curr_p);
+                                 token lexbuf }
+| "//" [^'\n']*                { token lexbuf }
+
+| '+'                          { PLUS       }
+| '-'                          { MINUS      }
+| '*'                          { STAR       }
+| '/'                          { SLASH      }
+| '%'                          { PERCENT    }
+| '='                          { EQ         }
+| '<'                          { LT         }
+| "<="                         { LE         }
+| "=="                         { EQEQ       }
+| "!="                         { NE         }
+| ">="                         { GE         }
+| '>'                          { GT         }
+| '!'                          { NOT        }
+| '&'                          { AND        }
+| "&&"                         { ANDAND     }
+| '|'                          { OR         }
+| "||"                         { OROR       }
+| "<<"                         { LSL        }
+| ">>"                         { LSR        }
+| ">>>"                        { ASR        }
+| '~'                          { TILDE      }
+| '{'                          { LBRACE     }
+| '_' (dec as n)               { IDX (int_of_string n) }
+| '_'                          { UNDERSCORE }
+| '}'                          { RBRACE     }
+
+| "+="                         { OPEQ (PLUS)    }
+| "-="                         { OPEQ (MINUS)   }
+| "*="                         { OPEQ (STAR)    }
+| "/="                         { OPEQ (SLASH)   }
+| "%="                         { OPEQ (PERCENT) }
+| "&="                         { OPEQ (AND) }
+| "|="                         { OPEQ (OR)  }
+| "<<="                        { OPEQ (LSL) }
+| ">>="                        { OPEQ (LSR) }
+| ">>>="                       { OPEQ (ASR) }
+| "^="                         { OPEQ (CARET) }
+
+| '#'                          { POUND      }
+| '@'                          { AT         }
+| '^'                          { CARET      }
+| '.'                          { DOT        }
+| ','                          { COMMA      }
+| ';'                          { SEMI       }
+| ':'                          { COLON      }
+| "<-"                         { LARROW     }
+| "<|"                         { SEND       }
+| "->"                         { RARROW     }
+| '('                          { LPAREN     }
+| ')'                          { RPAREN     }
+| '['                          { LBRACKET   }
+| ']'                          { RBRACKET   }
+
+| id as i
+                               { try
+                                     Hashtbl.find keyword_table i
+                                 with
+                                     Not_found -> IDENT (i)
+                                            }
+
+| bin as n                      { LIT_INT (Int64.of_string n, n)    }
+| hex as n                      { LIT_INT (Int64.of_string n, n)    }
+| dec as n                      { LIT_INT (Int64.of_string n, n)    }
+| flo as n                      { LIT_FLO n                         }
+
+| '\''                          { char lexbuf                       }
+| '"'                           { let buf = Buffer.create 32 in
+                                    str buf lexbuf                  }
+
+| eof                           { EOF        }
+
+and str buf = parse
+    _ as ch
+    {
+      match ch with
+          '"' -> LIT_STR (Buffer.contents buf)
+        | '\\' -> str_escape buf lexbuf
+        | _ ->
+            Buffer.add_char buf ch;
+            let c = Char.code ch in
+              if bounds 0 c 0x7f
+              then str buf lexbuf
+              else
+                if ((c land 0b1110_0000) == 0b1100_0000)
+                then ext_str 1 buf lexbuf
+                else
+                  if ((c land 0b1111_0000) == 0b1110_0000)
+                  then ext_str 2 buf lexbuf
+                  else
+                    if ((c land 0b1111_1000) == 0b1111_0000)
+                    then ext_str 3 buf lexbuf
+                    else
+                      if ((c land 0b1111_1100) == 0b1111_1000)
+                      then ext_str 4 buf lexbuf
+                      else
+                        if ((c land 0b1111_1110) == 0b1111_1100)
+                        then ext_str 5 buf lexbuf
+                        else fail lexbuf "bad initial utf-8 byte"
+    }
+
+and str_escape buf = parse
+    'x' ((hexdig hexdig) as h)
+  | 'u' ((hexdig hexdig hexdig hexdig) as h)
+  | 'U'
+      ((hexdig hexdig hexdig hexdig
+        hexdig hexdig hexdig hexdig) as h)
+      {
+        Buffer.add_string buf (char_as_utf8 (int_of_string ("0x" ^ h)));
+        str buf lexbuf
+      }
+  | 'n' { Buffer.add_char buf '\n'; str buf lexbuf }
+  | 'r' { Buffer.add_char buf '\r'; str buf lexbuf }
+  | 't' { Buffer.add_char buf '\t'; str buf lexbuf }
+  | '\\' { Buffer.add_char buf '\\'; str buf lexbuf }
+  | '"' { Buffer.add_char buf '"'; str buf lexbuf }
+  | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c))  }
+
+
+and ext_str n buf = parse
+    _ as ch
+      {
+        let c = Char.code ch in
+          if ((c land 0b1100_0000) == (0b1000_0000))
+          then
+            begin
+              Buffer.add_char buf ch;
+              if n = 1
+              then str buf lexbuf
+              else ext_str (n-1) buf lexbuf
+            end
+          else
+            fail lexbuf "bad trailing utf-8 byte"
+      }
+
+
+and char = parse
+    '\\' { char_escape lexbuf }
+  | _ as c
+    {
+      let c = Char.code c in
+        if bounds 0 c 0x7f
+        then end_char c lexbuf
+        else
+          if ((c land 0b1110_0000) == 0b1100_0000)
+          then ext_char 1 (c land 0b0001_1111) lexbuf
+          else
+            if ((c land 0b1111_0000) == 0b1110_0000)
+            then ext_char 2 (c land 0b0000_1111) lexbuf
+            else
+              if ((c land 0b1111_1000) == 0b1111_0000)
+              then ext_char 3 (c land 0b0000_0111) lexbuf
+              else
+                if ((c land 0b1111_1100) == 0b1111_1000)
+                then ext_char 4 (c land 0b0000_0011) lexbuf
+                else
+                  if ((c land 0b1111_1110) == 0b1111_1100)
+                  then ext_char 5 (c land 0b0000_0001) lexbuf
+                  else fail lexbuf "bad initial utf-8 byte"
+    }
+
+and char_escape = parse
+    'x' ((hexdig hexdig) as h)
+  | 'u' ((hexdig hexdig hexdig hexdig) as h)
+  | 'U'
+      ((hexdig hexdig hexdig hexdig
+        hexdig hexdig hexdig hexdig) as h)
+      {
+        end_char (int_of_string ("0x" ^ h)) lexbuf
+      }
+  | 'n' { end_char (Char.code '\n') lexbuf }
+  | 'r' { end_char (Char.code '\r') lexbuf }
+  | 't' { end_char (Char.code '\t') lexbuf }
+  | '\\' { end_char (Char.code '\\') lexbuf }
+  | '\'' { end_char (Char.code '\'') lexbuf }
+  | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c))  }
+
+
+and ext_char n accum = parse
+  _ as c
+    {
+      let c = Char.code c in
+        if ((c land 0b1100_0000) == (0b1000_0000))
+        then
+          let accum = (accum lsl 6) lor (c land 0b0011_1111) in
+            if n = 1
+            then end_char accum lexbuf
+            else ext_char (n-1) accum lexbuf
+        else
+          fail lexbuf "bad trailing utf-8 byte"
+    }
+
+and end_char accum = parse
+  '\'' { LIT_CHAR accum }
+
+
+and bracequote buf depth = parse
+
+  '\\' '{'                      { Buffer.add_char buf '{';
+                                  bracequote buf depth lexbuf          }
+
+| '{'                           { Buffer.add_char buf '{';
+                                  bracequote buf (depth+1) lexbuf      }
+
+| '\\' '}'                      { Buffer.add_char buf '}';
+                                  bracequote buf depth lexbuf          }
+
+| '}'                           { if depth = 1
+                                  then BRACEQUOTE (Buffer.contents buf)
+                                  else
+                                    begin
+                                      Buffer.add_char buf '}';
+                                      bracequote buf (depth-1) lexbuf
+                                    end                                }
+
+| '\\' [^'{' '}']               { let s = Lexing.lexeme lexbuf in
+                                    Buffer.add_string buf s;
+                                    bracequote buf depth lexbuf        }
+
+
+| [^'\\' '{' '}']+              { let s = Lexing.lexeme lexbuf in
+                                    Buffer.add_string buf s;
+                                    bracequote buf depth lexbuf        }
author	Graydon Hoare <[email protected]>	2010-06-23 21:03:09 -0700
committer	Graydon Hoare <[email protected]>	2010-06-23 21:03:09 -0700
commit	d6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch)
tree	b425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/fe/lexer.mll
parent	Initial git commit. (diff)
download	rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip