From d6b7c96c3eb29b9244ece0c046d3f372ff432d04 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Wed, 23 Jun 2010 21:03:09 -0700 Subject: Populate tree. --- src/boot/fe/parser.ml | 374 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 374 insertions(+) create mode 100644 src/boot/fe/parser.ml (limited to 'src/boot/fe/parser.ml') diff --git a/src/boot/fe/parser.ml b/src/boot/fe/parser.ml new file mode 100644 index 00000000..3dda93ac --- /dev/null +++ b/src/boot/fe/parser.ml @@ -0,0 +1,374 @@ + +open Common;; +open Token;; + +(* Fundamental parser types and actions *) + +type get_mod_fn = (Ast.meta_pat + -> node_id + -> (node_id ref) + -> (opaque_id ref) + -> (filename * Ast.mod_items)) +;; + +type pstate = + { mutable pstate_peek : token; + mutable pstate_ctxt : (string * pos) list; + mutable pstate_rstr : bool; + mutable pstate_depth: int; + pstate_lexbuf : Lexing.lexbuf; + pstate_file : filename; + pstate_sess : Session.sess; + pstate_temp_id : temp_id ref; + pstate_node_id : node_id ref; + pstate_opaque_id : opaque_id ref; + pstate_get_mod : get_mod_fn; + pstate_infer_lib_name : (Ast.ident -> filename); + pstate_required : (node_id, (required_lib * nabi_conv)) Hashtbl.t; + pstate_required_syms : (node_id, string) Hashtbl.t; } +;; + +let log (ps:pstate) = Session.log "parse" + ps.pstate_sess.Session.sess_log_parse + ps.pstate_sess.Session.sess_log_out +;; + +let iflog ps thunk = + if ps.pstate_sess.Session.sess_log_parse + then thunk () + else () +;; + +let make_parser + (tref:temp_id ref) + (nref:node_id ref) + (oref:opaque_id ref) + (sess:Session.sess) + (get_mod:get_mod_fn) + (infer_lib_name:Ast.ident -> filename) + (required:(node_id, (required_lib * nabi_conv)) Hashtbl.t) + (required_syms:(node_id, string) Hashtbl.t) + (fname:string) + : pstate = + let lexbuf = Lexing.from_channel (open_in fname) in + let spos = { lexbuf.Lexing.lex_start_p with Lexing.pos_fname = fname } in + let cpos = { lexbuf.Lexing.lex_curr_p with Lexing.pos_fname = fname } in + lexbuf.Lexing.lex_start_p <- spos; + lexbuf.Lexing.lex_curr_p <- cpos; + let first = Lexer.token lexbuf in + let ps = + { pstate_peek = first; + pstate_ctxt = []; + pstate_rstr = false; + pstate_depth = 0; + pstate_lexbuf = lexbuf; + pstate_file = fname; + pstate_sess = sess; + pstate_temp_id = tref; + pstate_node_id = nref; + pstate_opaque_id = oref; + pstate_get_mod = get_mod; + pstate_infer_lib_name = infer_lib_name; + pstate_required = required; + pstate_required_syms = required_syms; } + in + iflog ps (fun _ -> log ps "made parser for: %s\n%!" fname); + ps +;; + +exception Parse_err of (pstate * string) +;; + +let lexpos (ps:pstate) : pos = + let p = ps.pstate_lexbuf.Lexing.lex_start_p in + (p.Lexing.pos_fname, + p.Lexing.pos_lnum , + (p.Lexing.pos_cnum) - (p.Lexing.pos_bol)) +;; + +let next_node_id (ps:pstate) : node_id = + let id = !(ps.pstate_node_id) in + ps.pstate_node_id := Node ((int_of_node id)+1); + id +;; + +let next_opaque_id (ps:pstate) : opaque_id = + let id = !(ps.pstate_opaque_id) in + ps.pstate_opaque_id := Opaque ((int_of_opaque id)+1); + id +;; + +let span + (ps:pstate) + (apos:pos) + (bpos:pos) + (x:'a) + : 'a identified = + let span = { lo = apos; hi = bpos } in + let id = next_node_id ps in + iflog ps (fun _ -> log ps "span for node #%d: %s" + (int_of_node id) (Session.string_of_span span)); + htab_put ps.pstate_sess.Session.sess_spans id span; + { node = x; id = id } +;; + +let decl p i = + { Ast.decl_params = p; + Ast.decl_item = i } +;; + +let spans + (ps:pstate) + (things:('a identified) array) + (apos:pos) + (thing:'a) + : ('a identified) array = + Array.append things [| (span ps apos (lexpos ps) thing) |] +;; + +(* + * The point of this is to make a new node_id entry for a node that is a + * "copy" of an lval returned from somewhere else. For example if you create + * a temp, the lval it returns can only be used in *one* place, for the + * node_id denotes the place that lval is first used; subsequent uses of + * 'the same' reference must clone_lval it into a new node_id. Otherwise + * there is trouble. + *) + +let clone_span + (ps:pstate) + (oldnode:'a identified) + (newthing:'b) + : 'b identified = + let s = Hashtbl.find ps.pstate_sess.Session.sess_spans oldnode.id in + span ps s.lo s.hi newthing +;; + +let rec clone_lval (ps:pstate) (lval:Ast.lval) : Ast.lval = + match lval with + Ast.LVAL_base nb -> + let nnb = clone_span ps nb nb.node in + Ast.LVAL_base nnb + | Ast.LVAL_ext (base, ext) -> + Ast.LVAL_ext ((clone_lval ps base), ext) +;; + +let clone_atom (ps:pstate) (atom:Ast.atom) : Ast.atom = + match atom with + Ast.ATOM_literal _ -> atom + | Ast.ATOM_lval lv -> Ast.ATOM_lval (clone_lval ps lv) +;; + +let ctxt (n:string) (f:pstate -> 'a) (ps:pstate) : 'a = + (ps.pstate_ctxt <- (n, lexpos ps) :: ps.pstate_ctxt; + let res = f ps in + ps.pstate_ctxt <- List.tl ps.pstate_ctxt; + res) +;; + +let rstr (r:bool) (f:pstate -> 'a) (ps:pstate) : 'a = + let prev = ps.pstate_rstr in + (ps.pstate_rstr <- r; + let res = f ps in + ps.pstate_rstr <- prev; + res) +;; + +let err (str:string) (ps:pstate) = + (Parse_err (ps, (str))) +;; + + +let (slot_nil:Ast.slot) = + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = false; + Ast.slot_ty = Some Ast.TY_nil } +;; + +let (slot_auto:Ast.slot) = + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = true; + Ast.slot_ty = None } +;; + +let build_tmp + (ps:pstate) + (slot:Ast.slot) + (apos:pos) + (bpos:pos) + : (temp_id * Ast.lval * Ast.stmt) = + let nonce = !(ps.pstate_temp_id) in + ps.pstate_temp_id := Temp ((int_of_temp nonce)+1); + iflog ps + (fun _ -> log ps "building temporary %d" (int_of_temp nonce)); + let decl = Ast.DECL_slot (Ast.KEY_temp nonce, (span ps apos bpos slot)) in + let declstmt = span ps apos bpos (Ast.STMT_decl decl) in + let tmp = Ast.LVAL_base (span ps apos bpos (Ast.BASE_temp nonce)) in + (nonce, tmp, declstmt) +;; + +(* Simple helpers *) + +(* FIXME: please rename these, they make eyes bleed. *) + +let arr (ls:'a list) : 'a array = Array.of_list ls ;; +let arl (ls:'a list) : 'a array = Array.of_list (List.rev ls) ;; +let arj (ar:('a array array)) = Array.concat (Array.to_list ar) ;; +let arj1st (pairs:(('a array) * 'b) array) : (('a array) * 'b array) = + let (az, bz) = List.split (Array.to_list pairs) in + (Array.concat az, Array.of_list bz) + + +(* Bottom-most parser actions. *) + +let peek (ps:pstate) : token = + iflog ps + begin + fun _ -> + log ps "peeking at: %s // %s" + (string_of_tok ps.pstate_peek) + (match ps.pstate_ctxt with + (s, _) :: _ -> s + | _ -> "") + end; + ps.pstate_peek +;; + + +let bump (ps:pstate) : unit = + begin + iflog ps (fun _ -> log ps "bumping past: %s" + (string_of_tok ps.pstate_peek)); + ps.pstate_peek <- Lexer.token ps.pstate_lexbuf + end +;; + +let bump_bracequote (ps:pstate) : unit = + begin + assert (ps.pstate_peek = LBRACE); + iflog ps (fun _ -> log ps "bumping past: %s" + (string_of_tok ps.pstate_peek)); + let buf = Buffer.create 32 in + ps.pstate_peek <- Lexer.bracequote buf 1 ps.pstate_lexbuf + end +;; + + +let expect (ps:pstate) (t:token) : unit = + let p = peek ps in + if p == t + then bump ps + else + let msg = ("Expected '" ^ (string_of_tok t) ^ + "', found '" ^ (string_of_tok p ) ^ "'") in + raise (Parse_err (ps, msg)) +;; + +let unexpected (ps:pstate) = + err ("Unexpected token '" ^ (string_of_tok (peek ps)) ^ "'") ps +;; + + + +(* Parser combinators. *) + +let one_or_more + (sep:token) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + let accum = ref [prule ps] in + while peek ps == sep + do + bump ps; + accum := (prule ps) :: !accum + done; + arl !accum +;; + +let bracketed_seq + (mandatory:int) + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + expect ps bra; + let accum = ref [] in + let dosep _ = + (match sepOpt with + None -> () + | Some tok -> + if (!accum = []) + then () + else expect ps tok) + in + while mandatory > List.length (!accum) do + dosep (); + accum := (prule ps) :: (!accum) + done; + while (not (peek ps = ket)) + do + dosep (); + accum := (prule ps) :: !accum + done; + expect ps ket; + arl !accum +;; + + +let bracketed_zero_or_more + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_seq 0 bra ket sepOpt (ctxt "bracketed_seq" prule) ps +;; + + +let paren_comma_list + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_zero_or_more LPAREN RPAREN (Some COMMA) prule ps +;; + +let bracketed_one_or_more + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_seq 1 bra ket sepOpt (ctxt "bracketed_seq" prule) ps +;; + +let bracketed_two_or_more + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_seq 2 bra ket sepOpt (ctxt "bracketed_seq" prule) ps +;; + + +let bracketed (bra:token) (ket:token) (prule:pstate -> 'a) (ps:pstate) : 'a = + expect ps bra; + let res = ctxt "bracketed" prule ps in + expect ps ket; + res +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) -- cgit v1.2.3