diff options
| author | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
|---|---|---|
| committer | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
| commit | d6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch) | |
| tree | b425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/fe | |
| parent | Initial git commit. (diff) | |
| download | rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip | |
Populate tree.
Diffstat (limited to 'src/boot/fe')
| -rw-r--r-- | src/boot/fe/ast.ml | 1360 | ||||
| -rw-r--r-- | src/boot/fe/cexp.ml | 762 | ||||
| -rw-r--r-- | src/boot/fe/item.ml | 1139 | ||||
| -rw-r--r-- | src/boot/fe/lexer.mll | 362 | ||||
| -rw-r--r-- | src/boot/fe/parser.ml | 374 | ||||
| -rw-r--r-- | src/boot/fe/pexp.ml | 1354 | ||||
| -rw-r--r-- | src/boot/fe/token.ml | 308 |
7 files changed, 5659 insertions, 0 deletions
diff --git a/src/boot/fe/ast.ml b/src/boot/fe/ast.ml new file mode 100644 index 00000000..bf7a11ff --- /dev/null +++ b/src/boot/fe/ast.ml @@ -0,0 +1,1360 @@ +(* + * There are two kinds of rust files: + * + * .rc files, containing crates. + * .rs files, containing source. + * + *) + +open Common;; + +(* + * Slot names are given by a dot-separated path within the current + * module namespace. + *) + +type ident = string +;; + +type slot_key = + KEY_ident of ident + | KEY_temp of temp_id +;; + +(* "names" are statically computable references to particular items; + they never involve dynamic indexing (nor even static tuple-indexing; + you could add it but there are few contexts that need names that would + benefit from it). + + Each component of a name may also be type-parametric; you must + supply type parameters to reference through a type-parametric name + component. So for example if foo is parametric in 2 types, you can + write foo[int,int].bar but not foo.bar. + *) + +type effect = + PURE + | IO + | STATE + | UNSAFE +;; + +type name_base = + BASE_ident of ident + | BASE_temp of temp_id + | BASE_app of (ident * (ty array)) + +and name_component = + COMP_ident of ident + | COMP_app of (ident * (ty array)) + | COMP_idx of int + +and name = + NAME_base of name_base + | NAME_ext of (name * name_component) + +(* + * Type expressions are transparent to type names, their equality is + * structural. (after normalization) + *) +and ty = + + TY_any + | TY_nil + | TY_bool + | TY_mach of ty_mach + | TY_int + | TY_uint + | TY_char + | TY_str + + | TY_tup of ty_tup + | TY_vec of slot + | TY_rec of ty_rec + + (* + * Note that ty_idx is only valid inside a slot of a ty_iso group, not + * in a general type term. + *) + | TY_tag of ty_tag + | TY_iso of ty_iso + | TY_idx of int + + | TY_fn of ty_fn + | TY_chan of ty + | TY_port of ty + + | TY_obj of ty_obj + | TY_task + + | TY_native of opaque_id + | TY_param of (ty_param_idx * effect) + | TY_named of name + | TY_type + + | TY_constrained of (ty * constrs) + +and mode = + MODE_exterior + | MODE_interior + | MODE_alias + +and slot = { slot_mode: mode; + slot_mutable: bool; + slot_ty: ty option; } + +and ty_tup = slot array + +(* In closed type terms a constraint may refer to components of the term by + * anchoring off the "formal symbol" '*', which represents "the term this + * constraint is attached to". + * + * + * For example, if I have a tuple type tup(int,int), I may wish to enforce the + * lt predicate on it; I can write this as a constrained type term like: + * + * tup(int,int) : lt( *._0, *._1 ) + * + * In fact all tuple types are converted to this form for purpose of + * type-compatibility testing; the argument tuple in a function + * + * fn (int x, int y) : lt(x, y) -> int + * + * desugars to + * + * fn (tup(int, int) : lt( *._1, *._2 )) -> int + * + *) + +and carg_base = + BASE_formal + | BASE_named of name_base + +and carg_path = + CARG_base of carg_base + | CARG_ext of (carg_path * name_component) + +and carg = + CARG_path of carg_path + | CARG_lit of lit + +and constr = + { + constr_name: name; + constr_args: carg array; + } + +and constrs = constr array + +and ty_rec = (ident * slot) array + +(* ty_tag is a sum type. + * + * a tag type expression either normalizes to a TY_tag or a TY_iso, + * which (like in ocaml) is an indexed projection from an iso-recursive + * group of TY_tags. + *) + +and ty_tag = (name, ty_tup) Hashtbl.t + +and ty_iso = + { + iso_index: int; + iso_group: ty_tag array + } + +and ty_sig = + { + sig_input_slots: slot array; + sig_input_constrs: constrs; + sig_output_slot: slot; + } + +and ty_fn_aux = + { + fn_is_iter: bool; + fn_effect: effect; + } + +and ty_fn = (ty_sig * ty_fn_aux) + +and ty_obj_header = (slot array * constrs) + +and ty_obj = (effect * ((ident,ty_fn) Hashtbl.t)) + +and check_calls = (lval * (atom array)) array + +and rec_input = (ident * mode * bool * atom) + +and tup_input = (mode * bool * atom) + +and stmt' = + + (* lval-assigning stmts. *) + STMT_spawn of (lval * domain * lval * (atom array)) + | STMT_init_rec of (lval * (rec_input array) * lval option) + | STMT_init_tup of (lval * (tup_input array)) + | STMT_init_vec of (lval * slot * (atom array)) + | STMT_init_str of (lval * string) + | STMT_init_port of lval + | STMT_init_chan of (lval * (lval option)) + | STMT_copy of (lval * expr) + | STMT_copy_binop of (lval * binop * atom) + | STMT_call of (lval * lval * (atom array)) + | STMT_bind of (lval * lval * ((atom option) array)) + | STMT_recv of (lval * lval) + | STMT_slice of (lval * lval * slice) + + (* control-flow stmts. *) + | STMT_while of stmt_while + | STMT_do_while of stmt_while + | STMT_for of stmt_for + | STMT_for_each of stmt_for_each + | STMT_if of stmt_if + | STMT_put of (atom option) + | STMT_put_each of (lval * (atom array)) + | STMT_ret of (atom option) + | STMT_be of (lval * (atom array)) + | STMT_alt_tag of stmt_alt_tag + | STMT_alt_type of stmt_alt_type + | STMT_alt_port of stmt_alt_port + + (* structural and misc stmts. *) + | STMT_fail + | STMT_yield + | STMT_join of lval + | STMT_send of (lval * lval) + | STMT_log of atom + | STMT_note of atom + | STMT_prove of (constrs) + | STMT_check of (constrs * check_calls) + | STMT_check_expr of expr + | STMT_check_if of (constrs * check_calls * block) + | STMT_block of block + | STMT_decl of stmt_decl + +and stmt = stmt' identified + +and stmt_alt_tag = + { + alt_tag_lval: lval; + alt_tag_arms: arm array; + } + +and stmt_alt_type = + { + alt_type_lval: lval; + alt_type_arms: (ident * slot * stmt) array; + alt_type_else: stmt option; + } + +and block' = stmt array +and block = block' identified + +and stmt_decl = + DECL_mod_item of (ident * mod_item) + | DECL_slot of (slot_key * (slot identified)) + +and stmt_alt_port = + { + (* else lval is a timeout value. *) + alt_port_arms: (lval * lval) array; + alt_port_else: (lval * stmt) option; + } + +and stmt_while = + { + while_lval: ((stmt array) * expr); + while_body: block; + } + +and stmt_for_each = + { + for_each_slot: (slot identified * ident); + for_each_call: (lval * atom array); + for_each_head: block; + for_each_body: block; + } + +and stmt_for = + { + for_slot: (slot identified * ident); + for_seq: ((stmt array) * lval); + for_body: block; + } + +and stmt_if = + { + if_test: expr; + if_then: block; + if_else: block option; + } + +and slice = + { slice_start: atom option; + slice_len: atom option; } + +and domain = + DOMAIN_local + | DOMAIN_thread + +and pat = + PAT_lit of lit + | PAT_tag of ident * (pat array) + | PAT_slot of ((slot identified) * ident) + | PAT_wild + +and arm' = pat * block +and arm = arm' identified + +and atom = + ATOM_literal of (lit identified) + | ATOM_lval of lval + +and expr = + EXPR_binary of (binop * atom * atom) + | EXPR_unary of (unop * atom) + | EXPR_atom of atom + +and lit = + | LIT_nil + | LIT_bool of bool + | LIT_mach of (ty_mach * int64 * string) + | LIT_int of (int64 * string) + | LIT_uint of (int64 * string) + | LIT_char of int + + +and lval_component = + COMP_named of name_component + | COMP_atom of atom + + +and lval = + LVAL_base of name_base identified + | LVAL_ext of (lval * lval_component) + +and binop = + BINOP_or + | BINOP_and + | BINOP_xor + + | BINOP_eq + | BINOP_ne + + | BINOP_lt + | BINOP_le + | BINOP_ge + | BINOP_gt + + | BINOP_lsl + | BINOP_lsr + | BINOP_asr + + | BINOP_add + | BINOP_sub + | BINOP_mul + | BINOP_div + | BINOP_mod + | BINOP_send + +and unop = + UNOP_not + | UNOP_bitnot + | UNOP_neg + | UNOP_cast of ty identified + + +and header_slots = ((slot identified) * ident) array + +and header_tup = (slot identified) array + +and fn = + { + fn_input_slots: header_slots; + fn_input_constrs: constrs; + fn_output_slot: slot identified; + fn_aux: ty_fn_aux; + fn_body: block; + } + +and obj = + { + obj_state: header_slots; + obj_effect: effect; + obj_constrs: constrs; + obj_fns: (ident,fn identified) Hashtbl.t; + obj_drop: block option; + } + +(* + * An 'a decl is a sort-of-thing that represents a parametric (generative) + * declaration. Every reference to one of these involves applying 0 or more + * type arguments, as part of *name resolution*. + * + * Slots are *not* parametric declarations. A slot has a specific type + * even if it's a type that's bound by a quantifier in its environment. + *) + +and ty_param = ident * (ty_param_idx * effect) + +and mod_item' = + MOD_ITEM_type of ty + | MOD_ITEM_tag of (header_tup * ty_tag * node_id) + | MOD_ITEM_mod of (mod_view * mod_items) + | MOD_ITEM_fn of fn + | MOD_ITEM_obj of obj + +and mod_item_decl = + { + decl_params: (ty_param identified) array; + decl_item: mod_item'; + } + +and mod_item = mod_item_decl identified +and mod_items = (ident, mod_item) Hashtbl.t + +and export = + EXPORT_all_decls + | EXPORT_ident of ident + +and mod_view = + { + view_imports: (ident, name) Hashtbl.t; + view_exports: (export, unit) Hashtbl.t; + } + +and meta = (ident * string) array + +and meta_pat = (ident * string option) array + +and crate' = + { + crate_items: (mod_view * mod_items); + crate_meta: meta; + crate_auth: (name, effect) Hashtbl.t; + crate_required: (node_id, (required_lib * nabi_conv)) Hashtbl.t; + crate_required_syms: (node_id, string) Hashtbl.t; + crate_files: (node_id,filename) Hashtbl.t; + crate_main: name option; + } +and crate = crate' identified +;; + +(* + * NB: names can only be type-parametric in their *last* path-entry. + * All path-entries before that must be ident or idx (non-parametric). + *) +let sane_name (n:name) : bool = + let rec sane_prefix (n:name) : bool = + match n with + NAME_base (BASE_ident _) + | NAME_base (BASE_temp _) -> true + | NAME_ext (prefix, COMP_ident _) + | NAME_ext (prefix, COMP_idx _) -> sane_prefix prefix + | _ -> false + in + match n with + NAME_base _ -> true + | NAME_ext (prefix, _) -> sane_prefix prefix +;; + + +(***********************************************************************) + +(* FIXME (issue #19): finish all parts with ?foo? as their output. *) + +let fmt = Format.fprintf;; + +let fmt_ident (ff:Format.formatter) (i:ident) : unit = + fmt ff "%s" i + +let fmt_temp (ff:Format.formatter) (t:temp_id) : unit = + fmt ff ".t%d" (int_of_temp t) + +let fmt_slot_key ff (s:slot_key) : unit = + match s with + KEY_ident i -> fmt_ident ff i + | KEY_temp t -> fmt_temp ff t + +let rec fmt_app (ff:Format.formatter) (i:ident) (tys:ty array) : unit = + fmt ff "%s" i; + fmt_app_args ff tys + +and fmt_app_args (ff:Format.formatter) (tys:ty array) : unit = + fmt ff "[@["; + for i = 0 to (Array.length tys) - 1; + do + if i != 0 + then fmt ff ",@ "; + fmt_ty ff tys.(i); + done; + fmt ff "@]]" + +and fmt_name_base (ff:Format.formatter) (nb:name_base) : unit = + match nb with + BASE_ident i -> fmt_ident ff i + | BASE_temp t -> fmt_temp ff t + | BASE_app (id, tys) -> fmt_app ff id tys + +and fmt_name_component (ff:Format.formatter) (nc:name_component) : unit = + match nc with + COMP_ident i -> fmt_ident ff i + | COMP_app (id, tys) -> fmt_app ff id tys + | COMP_idx i -> fmt ff "_%d" i + +and fmt_name (ff:Format.formatter) (n:name) : unit = + match n with + NAME_base nb -> fmt_name_base ff nb + | NAME_ext (n, nc) -> + fmt_name ff n; + fmt ff "."; + fmt_name_component ff nc + +and fmt_mutable (ff:Format.formatter) (m:bool) : unit = + if m + then fmt ff "mutable "; + +and fmt_mode (ff:Format.formatter) (m:mode) : unit = + match m with + MODE_exterior -> fmt ff "@@" + | MODE_alias -> fmt ff "&" + | MODE_interior -> () + +and fmt_slot (ff:Format.formatter) (s:slot) : unit = + match s.slot_ty with + None -> fmt ff "auto" + | Some t -> + fmt_mutable ff s.slot_mutable; + fmt_mode ff s.slot_mode; + fmt_ty ff t + +and fmt_slots + (ff:Format.formatter) + (slots:slot array) + (idents:(ident array) option) + : unit = + fmt ff "(@["; + for i = 0 to (Array.length slots) - 1 + do + if i != 0 + then fmt ff ",@ "; + fmt_slot ff slots.(i); + begin + match idents with + None -> () + | Some ids -> (fmt ff " "; fmt_ident ff ids.(i)) + end; + done; + fmt ff "@])" + +and fmt_effect + (ff:Format.formatter) + (effect:effect) + : unit = + match effect with + PURE -> () + | IO -> fmt ff "io" + | STATE -> fmt ff "state" + | UNSAFE -> fmt ff "unsafe" + +and fmt_ty_fn + (ff:Format.formatter) + (ident_and_params:(ident * ty_param array) option) + (tf:ty_fn) + : unit = + let (tsig, ta) = tf in + fmt_effect ff ta.fn_effect; + if ta.fn_effect <> PURE then fmt ff " "; + fmt ff "%s" (if ta.fn_is_iter then "iter" else "fn"); + begin + match ident_and_params with + Some (id, params) -> + fmt ff " "; + fmt_ident_and_params ff id params + | None -> () + end; + fmt_slots ff tsig.sig_input_slots None; + fmt_decl_constrs ff tsig.sig_input_constrs; + fmt ff " -> "; + fmt_slot ff tsig.sig_output_slot; + +and fmt_tag (ff:Format.formatter) (ttag:ty_tag) : unit = + fmt ff "@[tag(@["; + let first = ref true in + Hashtbl.iter + begin + fun name ttup -> + (if !first + then first := false + else fmt ff ",@ "); + fmt_name ff name; + fmt_slots ff ttup None + end + ttag; + fmt ff "@])@]" + +and fmt_iso (ff:Format.formatter) (tiso:ty_iso) : unit = + fmt ff "@[iso [@["; + for i = 0 to (Array.length tiso.iso_group) - 1 + do + if i != 0 + then fmt ff ",@ "; + if i == tiso.iso_index + then fmt ff "<%d>: " i + else fmt ff "%d: " i; + fmt_tag ff tiso.iso_group.(i); + done; + fmt ff "@]]@]" + +and fmt_ty (ff:Format.formatter) (t:ty) : unit = + match t with + TY_any -> fmt ff "any" + | TY_nil -> fmt ff "()" + | TY_bool -> fmt ff "bool" + | TY_mach m -> fmt_mach ff m + | TY_int -> fmt ff "int" + | TY_uint -> fmt ff "uint" + | TY_char -> fmt ff "char" + | TY_str -> fmt ff "str" + + | TY_tup slots -> (fmt ff "tup"; fmt_slots ff slots None) + | TY_vec s -> (fmt ff "vec["; fmt_slot ff s; fmt ff "]") + | TY_chan t -> (fmt ff "chan["; fmt_ty ff t; fmt ff "]") + | TY_port t -> (fmt ff "port["; fmt_ty ff t; fmt ff "]") + + | TY_rec slots -> + let (idents, slots) = + let (idents, slots) = List.split (Array.to_list slots) in + (Array.of_list idents, Array.of_list slots) + in + fmt ff "@[rec"; + fmt_slots ff slots (Some idents); + fmt ff "@]" + + | TY_param (i, e) -> (fmt_effect ff e; + if e <> PURE then fmt ff " "; + fmt ff "<p#%d>" i) + | TY_native oid -> fmt ff "<native#%d>" (int_of_opaque oid) + | TY_named n -> fmt_name ff n + | TY_type -> fmt ff "type" + + | TY_fn tfn -> fmt_ty_fn ff None tfn + | TY_task -> fmt ff "task" + | TY_tag ttag -> fmt_tag ff ttag + | TY_iso tiso -> fmt_iso ff tiso + | TY_idx idx -> fmt ff "<idx#%d>" idx + | TY_constrained _ -> fmt ff "?constrained?" + + | TY_obj (effect, fns) -> + fmt_obox ff; + fmt_effect ff effect; + if effect <> PURE then fmt ff " "; + fmt ff "obj "; + fmt_obr ff; + Hashtbl.iter + begin + fun id fn -> + fmt ff "@\n"; + fmt_ty_fn ff (Some (id, [||])) fn; + fmt ff ";" + end + fns; + fmt_cbb ff + + +and fmt_constrs (ff:Format.formatter) (cc:constr array) : unit = + Array.iter (fmt_constr ff) cc + +and fmt_decl_constrs (ff:Format.formatter) (cc:constr array) : unit = + if Array.length cc = 0 + then () + else + begin + fmt ff " : "; + fmt_constrs ff cc + end + +and fmt_constr (ff:Format.formatter) (c:constr) : unit = + fmt_name ff c.constr_name; + fmt ff "(@["; + for i = 0 to (Array.length c.constr_args) - 1 + do + if i != 0 + then fmt ff ",@ "; + fmt_carg ff c.constr_args.(i); + done; + fmt ff "@])" + +and fmt_carg_path (ff:Format.formatter) (cp:carg_path) : unit = + match cp with + CARG_base BASE_formal -> fmt ff "*" + | CARG_base (BASE_named nb) -> fmt_name_base ff nb + | CARG_ext (base, nc) -> + fmt_carg_path ff base; + fmt ff "."; + fmt_name_component ff nc + +and fmt_carg (ff:Format.formatter) (ca:carg) : unit = + match ca with + CARG_path cp -> fmt_carg_path ff cp + | CARG_lit lit -> fmt_lit ff lit + +and fmt_obox ff = Format.pp_open_box ff 4 +and fmt_obox_3 ff = Format.pp_open_box ff 3 +and fmt_cbox ff = Format.pp_close_box ff () +and fmt_obr ff = fmt ff "{" +and fmt_cbr ff = fmt ff "@\n}" +and fmt_cbb ff = (fmt_cbox ff; fmt_cbr ff) + +and fmt_stmts (ff:Format.formatter) (ss:stmt array) : unit = + Array.iter (fmt_stmt ff) ss; + +and fmt_block (ff:Format.formatter) (b:stmt array) : unit = + fmt_obox ff; + fmt_obr ff; + fmt_stmts ff b; + fmt_cbb ff; + +and fmt_binop (ff:Format.formatter) (b:binop) : unit = + fmt ff "%s" + begin + match b with + BINOP_or -> "|" + | BINOP_and -> "&" + | BINOP_xor -> "^" + + | BINOP_eq -> "==" + | BINOP_ne -> "!=" + + | BINOP_lt -> "<" + | BINOP_le -> "<=" + | BINOP_ge -> ">=" + | BINOP_gt -> ">" + + | BINOP_lsl -> "<<" + | BINOP_lsr -> ">>" + | BINOP_asr -> ">>>" + + | BINOP_add -> "+" + | BINOP_sub -> "-" + | BINOP_mul -> "*" + | BINOP_div -> "/" + | BINOP_mod -> "%" + | BINOP_send -> "<|" + end + + +and fmt_unop (ff:Format.formatter) (u:unop) (a:atom) : unit = + begin + match u with + UNOP_not -> + fmt ff "!"; + fmt_atom ff a + + | UNOP_bitnot -> + fmt ff "~"; + fmt_atom ff a + + | UNOP_neg -> + fmt ff "-"; + fmt_atom ff a + + | UNOP_cast t -> + fmt_atom ff a; + fmt ff " as "; + fmt_ty ff t.node; + end + +and fmt_expr (ff:Format.formatter) (e:expr) : unit = + match e with + EXPR_binary (b,a1,a2) -> + begin + fmt_atom ff a1; + fmt ff " "; + fmt_binop ff b; + fmt ff " "; + fmt_atom ff a2 + end + | EXPR_unary (u,a) -> + begin + fmt_unop ff u a; + end + | EXPR_atom a -> fmt_atom ff a + +and fmt_mach (ff:Format.formatter) (m:ty_mach) : unit = + match m with + TY_u8 -> fmt ff "u8" + | TY_u16 -> fmt ff "u16" + | TY_u32 -> fmt ff "u32" + | TY_u64 -> fmt ff "u64" + | TY_i8 -> fmt ff "i8" + | TY_i16 -> fmt ff "i16" + | TY_i32 -> fmt ff "i32" + | TY_i64 -> fmt ff "i64" + | TY_f32 -> fmt ff "f32" + | TY_f64 -> fmt ff "f64" + +and fmt_lit (ff:Format.formatter) (l:lit) : unit = + match l with + | LIT_nil -> fmt ff "()" + | LIT_bool true -> fmt ff "true" + | LIT_bool false -> fmt ff "false" + | LIT_mach (m, _, s) -> + begin + fmt_mach ff m; + fmt ff "(%s)" s + end + | LIT_int (_,s) -> fmt ff "%s" s + | LIT_uint (_,s) -> fmt ff "%s" s + | LIT_char c -> fmt ff "'%s'" (Common.escaped_char c) + +and fmt_domain (ff:Format.formatter) (d:domain) : unit = + match d with + DOMAIN_local -> () + | DOMAIN_thread -> fmt ff "thread " + +and fmt_atom (ff:Format.formatter) (a:atom) : unit = + match a with + ATOM_literal lit -> fmt_lit ff lit.node + | ATOM_lval lval -> fmt_lval ff lval + +and fmt_atoms (ff:Format.formatter) (az:atom array) : unit = + fmt ff "("; + Array.iteri + begin + fun i a -> + if i != 0 + then fmt ff ", "; + fmt_atom ff a; + end + az; + fmt ff ")" + +and fmt_atom_opts (ff:Format.formatter) (az:(atom option) array) : unit = + fmt ff "("; + Array.iteri + begin + fun i a -> + if i != 0 + then fmt ff ", "; + match a with + None -> fmt ff "_" + | Some a -> fmt_atom ff a; + end + az; + fmt ff ")" + +and fmt_lval_component (ff:Format.formatter) (lvc:lval_component) : unit = + match lvc with + COMP_named nc -> fmt_name_component ff nc + | COMP_atom a -> + begin + fmt ff "("; + fmt_atom ff a; + fmt ff ")" + end + +and fmt_lval (ff:Format.formatter) (l:lval) : unit = + match l with + LVAL_base nbi -> fmt_name_base ff nbi.node + | LVAL_ext (lv, lvc) -> + begin + fmt_lval ff lv; + fmt ff "."; + fmt_lval_component ff lvc + end + +and fmt_stmt (ff:Format.formatter) (s:stmt) : unit = + fmt ff "@\n"; + fmt_stmt_body ff s + +and fmt_stmt_body (ff:Format.formatter) (s:stmt) : unit = + begin + match s.node with + STMT_log at -> + begin + fmt ff "log "; + fmt_atom ff at; + fmt ff ";" + end + + | STMT_spawn (dst, domain, fn, args) -> + fmt_lval ff dst; + fmt ff " = spawn "; + fmt_domain ff domain; + fmt_lval ff fn; + fmt_atoms ff args; + fmt ff ";"; + + | STMT_while sw -> + let (stmts, e) = sw.while_lval in + begin + fmt_obox ff; + fmt ff "while ("; + if Array.length stmts != 0 + then fmt_block ff stmts; + fmt_expr ff e; + fmt ff ") "; + fmt_obr ff; + fmt_stmts ff sw.while_body.node; + fmt_cbb ff + end + + | STMT_do_while sw -> + let (stmts, e) = sw.while_lval in + begin + fmt_obox ff; + fmt ff "do "; + fmt_obr ff; + fmt_stmts ff sw.while_body.node; + fmt ff "while ("; + if Array.length stmts != 0 + then fmt_block ff stmts; + fmt_expr ff e; + fmt ff ");"; + fmt_cbb ff + end + + | STMT_if sif -> + fmt_obox ff; + fmt ff "if ("; + fmt_expr ff sif.if_test; + fmt ff ") "; + fmt_obr ff; + fmt_stmts ff sif.if_then.node; + begin + match sif.if_else with + None -> () + | Some e -> + begin + fmt_cbb ff; + fmt_obox_3 ff; + fmt ff " else "; + fmt_obr ff; + fmt_stmts ff e.node + end + end; + fmt_cbb ff + + | STMT_ret (ao) -> + fmt ff "ret"; + begin + match ao with + None -> () + | Some at -> + fmt ff " "; + fmt_atom ff at + end; + fmt ff ";" + + | STMT_be (fn, az) -> + fmt ff "be "; + fmt_lval ff fn; + fmt_atoms ff az; + fmt ff ";"; + + | STMT_block b -> fmt_block ff b.node + + | STMT_copy (lv, ex) -> + fmt_lval ff lv; + fmt ff " = "; + fmt_expr ff ex; + fmt ff ";" + + | STMT_copy_binop (lv, binop, at) -> + fmt_lval ff lv; + fmt ff " "; + fmt_binop ff binop; + fmt ff "="; + fmt_atom ff at; + fmt ff ";" + + | STMT_call (dst, fn, args) -> + fmt_lval ff dst; + fmt ff " = "; + fmt_lval ff fn; + fmt_atoms ff args; + fmt ff ";"; + + | STMT_bind (dst, fn, arg_opts) -> + fmt_lval ff dst; + fmt ff " = "; + fmt_lval ff fn; + fmt_atom_opts ff arg_opts; + fmt ff ";"; + + | STMT_decl (DECL_slot (skey, sloti)) -> + if sloti.node.slot_ty != None then fmt ff "let "; + fmt_slot ff sloti.node; + fmt ff " "; + fmt_slot_key ff skey; + fmt ff ";" + + | STMT_decl (DECL_mod_item (ident, item)) -> + fmt_mod_item ff ident item + + | STMT_init_rec (dst, entries, base) -> + fmt_lval ff dst; + fmt ff " = rec("; + for i = 0 to (Array.length entries) - 1 + do + if i != 0 + then fmt ff ", "; + let (ident, mode, mut, atom) = entries.(i) in + fmt_ident ff ident; + fmt ff " = "; + fmt_mutable ff mut; + fmt_mode ff mode; + fmt_atom ff atom; + done; + begin + match base with + None -> () + | Some b -> + fmt ff " with "; + fmt_lval ff b + end; + fmt ff ");" + + | STMT_init_vec (dst, _, atoms) -> + fmt_lval ff dst; + fmt ff " = vec("; + for i = 0 to (Array.length atoms) - 1 + do + if i != 0 + then fmt ff ", "; + fmt_atom ff atoms.(i); + done; + fmt ff ");" + + | STMT_init_tup (dst, entries) -> + fmt_lval ff dst; + fmt ff " = ("; + for i = 0 to (Array.length entries) - 1 + do + if i != 0 + then fmt ff ", "; + let (mode, mut, atom) = entries.(i) in + fmt_mutable ff mut; + fmt_mode ff mode; + fmt_atom ff atom; + done; + fmt ff ");"; + + | STMT_init_str (dst, s) -> + fmt_lval ff dst; + fmt ff " = \"%s\"" (String.escaped s) + + | STMT_init_port dst -> + fmt_lval ff dst; + fmt ff " = port();" + + | STMT_init_chan (dst, port_opt) -> + fmt_lval ff dst; + fmt ff " = chan("; + begin + match port_opt with + None -> () + | Some lv -> fmt_lval ff lv + end; + fmt ff ");" + + | STMT_check_expr expr -> + fmt ff "check ("; + fmt_expr ff expr; + fmt ff ");" + + | STMT_check_if (constrs, _, block) -> + fmt_obox ff; + fmt ff "check if ("; + fmt_constrs ff constrs; + fmt ff ")"; + fmt_obr ff; + fmt_stmts ff block.node; + fmt_cbb ff + + | STMT_check (constrs, _) -> + fmt ff "check "; + fmt_constrs ff constrs; + fmt ff ";" + + | STMT_prove constrs -> + fmt ff "prove "; + fmt_constrs ff constrs; + fmt ff ";" + + | STMT_for sfor -> + let (slot, ident) = sfor.for_slot in + let (stmts, lval) = sfor.for_seq in + begin + fmt_obox ff; + fmt ff "for ("; + fmt_slot ff slot.node; + fmt ff " "; + fmt_ident ff ident; + fmt ff " in "; + fmt_stmts ff stmts; + fmt_lval ff lval; + fmt ff ") "; + fmt_obr ff; + fmt_stmts ff sfor.for_body.node; + fmt_cbb ff + end + + | STMT_for_each sf -> + let (slot, ident) = sf.for_each_slot in + let (f, az) = sf.for_each_call in + begin + fmt_obox ff; + fmt ff "for each ("; + fmt_slot ff slot.node; + fmt ff " "; + fmt_ident ff ident; + fmt ff " = "; + fmt_lval ff f; + fmt_atoms ff az; + fmt ff " "; + fmt_obr ff; + fmt_stmts ff sf.for_each_body.node; + fmt_cbb ff + end + + | STMT_put (atom) -> + fmt ff "put "; + begin + match atom with + Some a -> (fmt ff " "; fmt_atom ff a) + | None -> () + end; + fmt ff ";" + + | STMT_put_each (f, az) -> + fmt ff "put each "; + fmt_lval ff f; + fmt_atoms ff az; + fmt ff ";" + + | STMT_fail -> fmt ff "fail;" + | STMT_yield -> fmt ff "yield;" + + | STMT_send (chan, v) -> + fmt_lval ff chan; + fmt ff " <| "; + fmt_lval ff v; + fmt ff ";"; + + | STMT_recv (d, port) -> + fmt_lval ff d; + fmt ff " <- "; + fmt_lval ff port; + fmt ff ";"; + + | STMT_join t -> + fmt ff "join "; + fmt_lval ff t; + fmt ff ";" + + | STMT_alt_tag _ -> fmt ff "?stmt_alt_tag?" + | STMT_alt_type _ -> fmt ff "?stmt_alt_type?" + | STMT_alt_port _ -> fmt ff "?stmt_alt_port?" + | STMT_note _ -> fmt ff "?stmt_note?" + | STMT_slice _ -> fmt ff "?stmt_slice?" + end + +and fmt_decl_params (ff:Format.formatter) (params:ty_param array) : unit = + if Array.length params = 0 + then () + else + begin + fmt ff "["; + for i = 0 to (Array.length params) - 1 + do + if i <> 0 + then fmt ff ", "; + let (ident, (i, e)) = params.(i) in + fmt_effect ff e; + if e <> PURE then fmt ff " "; + fmt_ident ff ident; + fmt ff "=<p#%d>" i + done; + fmt ff "]" + end; + +and fmt_header_slots (ff:Format.formatter) (hslots:header_slots) : unit = + fmt_slots ff + (Array.map (fun (s,_) -> s.node) hslots) + (Some (Array.map (fun (_, i) -> i) hslots)) + +and fmt_ident_and_params + (ff:Format.formatter) + (id:ident) + (params:ty_param array) + : unit = + fmt_ident ff id; + fmt_decl_params ff params + +and fmt_fn + (ff:Format.formatter) + (id:ident) + (params:ty_param array) + (f:fn) + : unit = + fmt_obox ff; + fmt_effect ff f.fn_aux.fn_effect; + if f.fn_aux.fn_effect <> PURE then fmt ff " "; + fmt ff "%s "(if f.fn_aux.fn_is_iter then "iter" else "fn"); + fmt_ident_and_params ff id params; + fmt_header_slots ff f.fn_input_slots; + fmt_decl_constrs ff f.fn_input_constrs; + fmt ff " -> "; + fmt_slot ff f.fn_output_slot.node; + fmt ff " "; + fmt_obr ff; + fmt_stmts ff f.fn_body.node; + fmt_cbb ff + + +and fmt_obj + (ff:Format.formatter) + (id:ident) + (params:ty_param array) + (obj:obj) + : unit = + fmt_obox ff; + fmt_effect ff obj.obj_effect; + if obj.obj_effect <> PURE then fmt ff " "; + fmt ff "obj "; + fmt_ident_and_params ff id params; + fmt_header_slots ff obj.obj_state; + fmt_decl_constrs ff obj.obj_constrs; + fmt ff " "; + fmt_obr ff; + Hashtbl.iter + begin + fun id fn -> + fmt ff "@\n"; + fmt_fn ff id [||] fn.node + end + obj.obj_fns; + begin + match obj.obj_drop with + None -> () + | Some d -> + begin + fmt ff "@\n"; + fmt_obox ff; + fmt ff "drop "; + fmt_obr ff; + fmt_stmts ff d.node; + fmt_cbb ff; + end + end; + fmt_cbb ff + + +and fmt_mod_item (ff:Format.formatter) (id:ident) (item:mod_item) : unit = + fmt ff "@\n"; + let params = item.node.decl_params in + let params = Array.map (fun i -> i.node) params in + begin + match item.node.decl_item with + MOD_ITEM_type ty -> + fmt ff "type "; + fmt_ident_and_params ff id params; + fmt ff " = "; + fmt_ty ff ty; + fmt ff ";"; + + | MOD_ITEM_tag (hdr, ttag, _) -> + fmt ff "fn "; + fmt_ident_and_params ff id params; + fmt_header_slots ff + (Array.mapi (fun i s -> (s,(Printf.sprintf "_%d" i))) hdr); + fmt ff " -> "; + fmt_ty ff (TY_tag ttag); + fmt ff ";"; + + | MOD_ITEM_mod (view,items) -> + fmt_obox ff; + fmt ff "mod "; + fmt_ident_and_params ff id params; + fmt ff " "; + fmt_obr ff; + fmt_mod_view ff view; + fmt_mod_items ff items; + fmt_cbb ff + + | MOD_ITEM_fn f -> + fmt_fn ff id params f + + | MOD_ITEM_obj obj -> + fmt_obj ff id params obj + end + +and fmt_import (ff:Format.formatter) (ident:ident) (name:name) : unit = + fmt ff "@\n"; + fmt ff "import "; + fmt ff "%s = " ident; + fmt_name ff name; + +and fmt_export (ff:Format.formatter) (export:export) _ : unit = + fmt ff "@\n"; + match export with + EXPORT_all_decls -> fmt ff "export *;" + | EXPORT_ident i -> fmt ff "export %s;" i + +and fmt_mod_view (ff:Format.formatter) (mv:mod_view) : unit = + Hashtbl.iter (fmt_import ff) mv.view_imports; + Hashtbl.iter (fmt_export ff) mv.view_exports + +and fmt_mod_items (ff:Format.formatter) (mi:mod_items) : unit = + Hashtbl.iter (fmt_mod_item ff) mi + +and fmt_crate (ff:Format.formatter) (c:crate) : unit = + let (view,items) = c.node.crate_items in + fmt_mod_view ff view; + fmt_mod_items ff items + + +let fmt_to_str (f:Format.formatter -> 'a -> unit) (v:'a) : string = + let buf = Buffer.create 16 in + let bf = Format.formatter_of_buffer buf in + begin + f bf v; + Format.pp_print_flush bf (); + Buffer.contents buf + end + +let sprintf_fmt + (f:Format.formatter -> 'a -> unit) + : (unit -> 'a -> string) = + (fun _ -> fmt_to_str f) + + +let sprintf_expr = sprintf_fmt fmt_expr;; +let sprintf_name = sprintf_fmt fmt_name;; +let sprintf_lval = sprintf_fmt fmt_lval;; +let sprintf_lval_component = sprintf_fmt fmt_lval_component;; +let sprintf_atom = sprintf_fmt fmt_atom;; +let sprintf_slot = sprintf_fmt fmt_slot;; +let sprintf_slot_key = sprintf_fmt fmt_slot_key;; +let sprintf_mutable = sprintf_fmt fmt_mutable;; +let sprintf_ty = sprintf_fmt fmt_ty;; +let sprintf_effect = sprintf_fmt fmt_effect;; +let sprintf_tag = sprintf_fmt fmt_tag;; +let sprintf_carg = sprintf_fmt fmt_carg;; +let sprintf_constr = sprintf_fmt fmt_constr;; +let sprintf_stmt = sprintf_fmt fmt_stmt;; +let sprintf_mod_items = sprintf_fmt fmt_mod_items;; +let sprintf_decl_params = sprintf_fmt fmt_decl_params;; +let sprintf_app_args = sprintf_fmt fmt_app_args;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/cexp.ml b/src/boot/fe/cexp.ml new file mode 100644 index 00000000..6dffdb96 --- /dev/null +++ b/src/boot/fe/cexp.ml @@ -0,0 +1,762 @@ + +open Common;; +open Token;; +open Parser;; + +(* NB: cexps (crate-expressions / constant-expressions) are only used + * transiently during compilation: they are the outermost expression-language + * describing crate configuration and constants. They are completely evaluated + * at compile-time, in a little micro-interpreter defined here, with the + * results of evaluation being the sequence of directives controlling the rest + * of the compiler. + * + * Cexps, like pexps, do not escape the language front-end. + * + * You can think of the AST as a statement-language called "item" sandwiched + * between two expression-languages, "cexp" on the outside and "pexp" on the + * inside. The front-end evaluates cexp on the outside in order to get one big + * directive-list, evaluating those parts of pexp that are directly used by + * cexp in passing, and desugaring those remaining parts of pexp that are + * embedded within the items of the directives. + * + * The rest of the compiler only deals with the directives, which are mostly + * just a set of containers for items. Items are what most of AST describes + * ("most" because the type-grammar spans both items and pexps). + * + *) + +type meta = (Ast.ident * Pexp.pexp) array;; + +type meta_pat = (Ast.ident * (Pexp.pexp option)) array;; + +type auth = (Ast.name * Ast.effect);; + +type cexp = + CEXP_alt of cexp_alt identified + | CEXP_let of cexp_let identified + | CEXP_src_mod of cexp_src identified + | CEXP_dir_mod of cexp_dir identified + | CEXP_use_mod of cexp_use identified + | CEXP_nat_mod of cexp_nat identified + | CEXP_meta of meta identified + | CEXP_auth of auth identified + +and cexp_alt = + { alt_val: Pexp.pexp; + alt_arms: (Pexp.pexp * cexp array) array; + alt_else: cexp array } + +and cexp_let = + { let_ident: Ast.ident; + let_value: Pexp.pexp; + let_body: cexp array; } + +and cexp_src = + { src_ident: Ast.ident; + src_path: Pexp.pexp option } + +and cexp_dir = + { dir_ident: Ast.ident; + dir_path: Pexp.pexp option; + dir_body: cexp array } + +and cexp_use = + { use_ident: Ast.ident; + use_meta: meta_pat; } + +and cexp_nat = + { nat_abi: string; + nat_ident: Ast.ident; + nat_path: Pexp.pexp option; + (* + * FIXME: possibly support embedding optional strings as + * symbol-names, to handle mangling schemes that aren't + * Token.IDENT values + *) + nat_items: Ast.mod_items; + } +;; + + +(* Cexp grammar. *) + +let parse_meta_input (ps:pstate) : (Ast.ident * Pexp.pexp option) = + let lab = (ctxt "meta input: label" Pexp.parse_ident ps) in + match peek ps with + EQ -> + bump ps; + let v = + match peek ps with + UNDERSCORE -> bump ps; None + | _ -> Some (Pexp.parse_pexp ps) + in + (lab, v) + | _ -> raise (unexpected ps) +;; + +let parse_meta_pat (ps:pstate) : meta_pat = + bracketed_zero_or_more LPAREN RPAREN + (Some COMMA) parse_meta_input ps +;; + +let parse_meta (ps:pstate) : meta = + Array.map + begin + fun (id,v) -> + match v with + None -> + raise (err ("wildcard found in meta pattern " + ^ "where value expected") ps) + | Some v -> (id,v) + end + (parse_meta_pat ps) +;; + +let parse_optional_meta_pat + (ps:pstate) + (ident:Ast.ident) + : meta_pat = + match peek ps with + LPAREN -> parse_meta_pat ps + | _ -> + let apos = lexpos ps in + [| ("name", Some (span ps apos apos (Pexp.PEXP_str ident))) |] +;; + +let rec parse_cexps (ps:pstate) (term:Token.token) : cexp array = + let cexps = Queue.create () in + while ((peek ps) <> term) + do + Queue.push (parse_cexp ps) cexps + done; + expect ps term; + queue_to_arr cexps + +and parse_cexp (ps:pstate) : cexp = + + let apos = lexpos ps in + match peek ps with + MOD -> + begin + bump ps; + let name = ctxt "mod: name" Pexp.parse_ident ps in + let path = ctxt "mod: path" parse_eq_pexp_opt ps + in + match peek ps with + SEMI -> + bump ps; + let bpos = lexpos ps in + CEXP_src_mod + (span ps apos bpos { src_ident = name; + src_path = path }) + | LBRACE -> + let body = + bracketed_zero_or_more LBRACE RBRACE + None parse_cexp ps + in + let bpos = lexpos ps in + CEXP_dir_mod + (span ps apos bpos { dir_ident = name; + dir_path = path; + dir_body = body }) + | _ -> raise (unexpected ps) + end + + | NATIVE -> + begin + bump ps; + let abi = + match peek ps with + MOD -> "cdecl" + | LIT_STR s -> bump ps; s + | _ -> raise (unexpected ps) + in + let _ = expect ps MOD in + let name = ctxt "native mod: name" Pexp.parse_ident ps in + let path = ctxt "native mod: path" parse_eq_pexp_opt ps in + let items = Hashtbl.create 0 in + let get_item ps = + let (ident, item) = Item.parse_mod_item_from_signature ps in + htab_put items ident item; + in + ignore (bracketed_zero_or_more + LBRACE RBRACE None get_item ps); + let bpos = lexpos ps in + CEXP_nat_mod + (span ps apos bpos { nat_abi = abi; + nat_ident = name; + nat_path = path; + nat_items = items }) + end + + | USE -> + begin + bump ps; + let ident = ctxt "use mod: name" Pexp.parse_ident ps in + let meta = + ctxt "use mod: meta" parse_optional_meta_pat ps ident + in + let bpos = lexpos ps in + expect ps SEMI; + CEXP_use_mod + (span ps apos bpos { use_ident = ident; + use_meta = meta }) + end + + | LET -> + begin + bump ps; + expect ps LPAREN; + let id = Pexp.parse_ident ps in + expect ps EQ; + let v = Pexp.parse_pexp ps in + expect ps RPAREN; + expect ps LBRACE; + let body = parse_cexps ps RBRACE in + let bpos = lexpos ps in + CEXP_let + (span ps apos bpos + { let_ident = id; + let_value = v; + let_body = body }) + end + + | ALT -> + begin + bump ps; + expect ps LPAREN; + let v = Pexp.parse_pexp ps in + expect ps RPAREN; + expect ps LBRACE; + let rec consume_arms arms = + match peek ps with + CASE -> + begin + bump ps; + expect ps LPAREN; + let cond = Pexp.parse_pexp ps in + expect ps RPAREN; + expect ps LBRACE; + let consequent = parse_cexps ps RBRACE in + let arm = (cond, consequent) in + consume_arms (arm::arms) + end + | ELSE -> + begin + bump ps; + expect ps LBRACE; + let consequent = parse_cexps ps RBRACE in + expect ps RBRACE; + let bpos = lexpos ps in + span ps apos bpos + { alt_val = v; + alt_arms = Array.of_list (List.rev arms); + alt_else = consequent } + end + + | _ -> raise (unexpected ps) + in + CEXP_alt (consume_arms []) + end + + | META -> + bump ps; + let meta = parse_meta ps in + expect ps SEMI; + let bpos = lexpos ps in + CEXP_meta (span ps apos bpos meta) + + | AUTH -> + bump ps; + let name = Pexp.parse_name ps in + expect ps EQ; + let effect = Pexp.parse_effect ps in + expect ps SEMI; + let bpos = lexpos ps in + CEXP_auth (span ps apos bpos (name, effect)) + + | _ -> raise (unexpected ps) + + +and parse_eq_pexp_opt (ps:pstate) : Pexp.pexp option = + match peek ps with + EQ -> + begin + bump ps; + Some (Pexp.parse_pexp ps) + end + | _ -> None +;; + + +(* + * Dynamic-typed micro-interpreter for the cexp language. + * + * The product of evaluating a pexp is a pval. + * + * The product of evlauating a cexp is a cdir array. + *) + +type pval = + PVAL_str of string + | PVAL_num of int64 + | PVAL_bool of bool +;; + +type cdir = + CDIR_meta of ((Ast.ident * string) array) + | CDIR_syntax of Ast.name + | CDIR_check of (Ast.name * pval array) + | CDIR_mod of (Ast.ident * Ast.mod_item) + | CDIR_auth of auth + +type env = { env_bindings: (Ast.ident * pval) list; + env_prefix: filename list; + env_items: (filename, Ast.mod_items) Hashtbl.t; + env_files: (node_id,filename) Hashtbl.t; + env_required: (node_id, (required_lib * nabi_conv)) Hashtbl.t; + env_required_syms: (node_id, string) Hashtbl.t; + env_ps: pstate; } + +let unexpected_val (expected:string) (v:pval) = + let got = + match v with + PVAL_str s -> "str \"" ^ (String.escaped s) ^ "\"" + | PVAL_num i -> "num " ^ (Int64.to_string i) + | PVAL_bool b -> if b then "bool true" else "bool false" + in + (* FIXME: proper error reporting, please. *) + bug () "expected %s, got %s" expected got +;; + +let rewrap_items id items = + let item = decl [||] (Ast.MOD_ITEM_mod items) in + { id = id; node = item } +;; + + +let rec eval_cexps (env:env) (exps:cexp array) : cdir array = + Parser.arj (Array.map (eval_cexp env) exps) + +and eval_cexp (env:env) (exp:cexp) : cdir array = + match exp with + CEXP_alt {node=ca} -> + let v = eval_pexp env ca.alt_val in + let rec try_arm i = + if i >= Array.length ca.alt_arms + then ca.alt_else + else + let (arm_head, arm_body) = ca.alt_arms.(i) in + let v' = eval_pexp env arm_head in + if v' = v + then arm_body + else try_arm (i+1) + in + eval_cexps env (try_arm 0) + + | CEXP_let {node=cl} -> + let ident = cl.let_ident in + let v = eval_pexp env cl.let_value in + let env = { env with + env_bindings = ((ident,v)::env.env_bindings ) } + in + eval_cexps env cl.let_body + + | CEXP_src_mod {node=s; id=id} -> + let name = s.src_ident in + let path = + match s.src_path with + None -> name ^ ".rs" + | Some p -> eval_pexp_to_str env p + in + let full_path = + List.fold_left Filename.concat "" + (List.rev (path :: env.env_prefix)) + in + let ps = env.env_ps in + let p = + make_parser + ps.pstate_temp_id + ps.pstate_node_id + ps.pstate_opaque_id + ps.pstate_sess + ps.pstate_get_mod + ps.pstate_infer_lib_name + env.env_required + env.env_required_syms + full_path + in + let items = Item.parse_mod_items p EOF in + htab_put env.env_files id full_path; + [| CDIR_mod (name, rewrap_items id items) |] + + | CEXP_dir_mod {node=d; id=id} -> + let items = Hashtbl.create 0 in + let name = d.dir_ident in + let path = + match d.dir_path with + None -> name + | Some p -> eval_pexp_to_str env p + in + let env = { env with + env_prefix = path :: env.env_prefix } in + let sub_directives = eval_cexps env d.dir_body in + let add d = + match d with + CDIR_mod (name, item) -> + htab_put items name item + | _ -> raise (err "non-'mod' directive found in 'dir' directive" + env.env_ps) + in + Array.iter add sub_directives; + [| CDIR_mod (name, rewrap_items id (Item.empty_view, items)) |] + + | CEXP_use_mod {node=u; id=id} -> + let ps = env.env_ps in + let name = u.use_ident in + let (path, items) = + let meta_pat = + Array.map + begin + fun (k,vo) -> + match vo with + None -> (k, None) + | Some p -> (k, Some (eval_pexp_to_str env p)) + end + u.use_meta + in + ps.pstate_get_mod meta_pat id ps.pstate_node_id ps.pstate_opaque_id + in + iflog ps + begin + fun _ -> + log ps "extracted mod signature from %s (binding to %s)" + path name; + log ps "%a" Ast.sprintf_mod_items items; + end; + let rlib = REQUIRED_LIB_rust { required_libname = path; + required_prefix = 1 } + in + let item = decl [||] (Ast.MOD_ITEM_mod (Item.empty_view, items)) in + let item = { id = id; node = item } in + let span = Hashtbl.find ps.pstate_sess.Session.sess_spans id in + Item.note_required_mod env.env_ps span CONV_rust rlib item; + [| CDIR_mod (name, item) |] + + | CEXP_nat_mod {node=cn;id=id} -> + let conv = + let v = cn.nat_abi in + match string_to_conv v with + None -> unexpected_val "calling convention" (PVAL_str v) + | Some c -> c + in + let name = cn.nat_ident in + let filename = + match cn.nat_path with + None -> env.env_ps.pstate_infer_lib_name name + | Some p -> eval_pexp_to_str env p + in + let item = + decl [||] (Ast.MOD_ITEM_mod (Item.empty_view, cn.nat_items)) + in + let item = { id = id; node = item } in + let rlib = REQUIRED_LIB_c { required_libname = filename; + required_prefix = 1 } + in + let ps = env.env_ps in + let span = Hashtbl.find ps.pstate_sess.Session.sess_spans id in + Item.note_required_mod env.env_ps span conv rlib item; + [| CDIR_mod (name, item) |] + + | CEXP_meta m -> + [| CDIR_meta + begin + Array.map + begin + fun (id, p) -> (id, eval_pexp_to_str env p) + end + m.node + end |] + + | CEXP_auth a -> [| CDIR_auth a.node |] + + +and eval_pexp (env:env) (exp:Pexp.pexp) : pval = + match exp.node with + | Pexp.PEXP_binop (bop, a, b) -> + begin + let av = eval_pexp env a in + let bv = eval_pexp env b in + match (bop, av, bv) with + (Ast.BINOP_add, PVAL_str az, PVAL_str bz) -> + PVAL_str (az ^ bz) + | _ -> + let av = (need_num av) in + let bv = (need_num bv) in + PVAL_num + begin + match bop with + Ast.BINOP_add -> Int64.add av bv + | Ast.BINOP_sub -> Int64.sub av bv + | Ast.BINOP_mul -> Int64.mul av bv + | Ast.BINOP_div -> Int64.div av bv + | _ -> + bug () + "unhandled arithmetic op in Cexp.eval_pexp" + end + end + + | Pexp.PEXP_unop (uop, a) -> + begin + match uop with + Ast.UNOP_not -> + PVAL_bool (not (eval_pexp_to_bool env a)) + | Ast.UNOP_neg -> + PVAL_num (Int64.neg (eval_pexp_to_num env a)) + | _ -> bug () "Unexpected unop in Cexp.eval_pexp" + end + + | Pexp.PEXP_lval (Pexp.PLVAL_ident ident) -> + begin + match ltab_search env.env_bindings ident with + None -> raise (err (Printf.sprintf "no binding for '%s' found" + ident) env.env_ps) + | Some v -> v + end + + | Pexp.PEXP_lit (Ast.LIT_bool b) -> + PVAL_bool b + + | Pexp.PEXP_lit (Ast.LIT_int (i, _)) -> + PVAL_num i + + | Pexp.PEXP_str s -> + PVAL_str s + + | _ -> bug () "unexpected Pexp in Cexp.eval_pexp" + + +and eval_pexp_to_str (env:env) (exp:Pexp.pexp) : string = + match eval_pexp env exp with + PVAL_str s -> s + | v -> unexpected_val "str" v + +and need_num (cv:pval) : int64 = + match cv with + PVAL_num n -> n + | v -> unexpected_val "num" v + +and eval_pexp_to_num (env:env) (exp:Pexp.pexp) : int64 = + need_num (eval_pexp env exp) + +and eval_pexp_to_bool (env:env) (exp:Pexp.pexp) : bool = + match eval_pexp env exp with + PVAL_bool b -> b + | v -> unexpected_val "bool" v + +;; + + +let find_main_fn + (ps:pstate) + (crate_items:Ast.mod_items) + : Ast.name = + let fns = ref [] in + let extend prefix_name ident = + match prefix_name with + None -> Ast.NAME_base (Ast.BASE_ident ident) + | Some n -> Ast.NAME_ext (n, Ast.COMP_ident ident) + in + let rec dig prefix_name items = + Hashtbl.iter (extract_fn prefix_name) items + and extract_fn prefix_name ident item = + if not (Array.length item.node.Ast.decl_params = 0) || + Hashtbl.mem ps.pstate_required item.id + then () + else + match item.node.Ast.decl_item with + Ast.MOD_ITEM_mod (_, items) -> + dig (Some (extend prefix_name ident)) items + + | Ast.MOD_ITEM_fn _ -> + if ident = "main" + then fns := (extend prefix_name ident) :: (!fns) + else () + + | _ -> () + in + dig None crate_items; + match !fns with + [] -> raise (err "no 'main' function found" ps) + | [x] -> x + | _ -> raise (err "multiple 'main' functions found" ps) +;; + + +let with_err_handling sess thunk = + try + thunk () + with + Parse_err (ps, str) -> + Session.fail sess "Parse error: %s\n%!" str; + List.iter + (fun (cx,pos) -> + Session.fail sess "%s:E (parse context): %s\n%!" + (Session.string_of_pos pos) cx) + ps.pstate_ctxt; + let apos = lexpos ps in + span ps apos apos + { Ast.crate_items = (Item.empty_view, Hashtbl.create 0); + Ast.crate_meta = [||]; + Ast.crate_auth = Hashtbl.create 0; + Ast.crate_required = Hashtbl.create 0; + Ast.crate_required_syms = Hashtbl.create 0; + Ast.crate_main = None; + Ast.crate_files = Hashtbl.create 0 } +;; + + +let parse_crate_file + (sess:Session.sess) + (get_mod:get_mod_fn) + (infer_lib_name:(Ast.ident -> filename)) + : Ast.crate = + let fname = Session.filename_of sess.Session.sess_in in + let tref = ref (Temp 0) in + let nref = ref (Node 0) in + let oref = ref (Opaque 0) in + let required = Hashtbl.create 4 in + let required_syms = Hashtbl.create 4 in + let ps = + make_parser tref nref oref sess get_mod + infer_lib_name required required_syms fname + in + + let files = Hashtbl.create 0 in + let items = Hashtbl.create 4 in + let target_bindings = + let (os, arch, libc) = + match sess.Session.sess_targ with + Linux_x86_elf -> ("linux", "x86", "libc.so.6") + | Win32_x86_pe -> ("win32", "x86", "msvcrt.dll") + | MacOS_x86_macho -> ("macos", "x86", "libc.dylib") + in + [ + ("target_os", PVAL_str os); + ("target_arch", PVAL_str arch); + ("target_libc", PVAL_str libc) + ] + in + let build_bindings = + [ + ("build_compiler", PVAL_str Sys.executable_name); + ("build_input", PVAL_str fname); + ] + in + let initial_bindings = + target_bindings + @ build_bindings + in + let env = { env_bindings = initial_bindings; + env_prefix = [Filename.dirname fname]; + env_items = Hashtbl.create 0; + env_files = files; + env_required = required; + env_required_syms = required_syms; + env_ps = ps; } + in + let auth = Hashtbl.create 0 in + with_err_handling sess + begin + fun _ -> + let apos = lexpos ps in + let cexps = parse_cexps ps EOF in + let cdirs = eval_cexps env cexps in + let meta = Queue.create () in + let _ = + Array.iter + begin + fun d -> + match d with + CDIR_mod (name, item) -> htab_put items name item + | CDIR_meta metas -> + Array.iter (fun m -> Queue.add m meta) metas + | CDIR_auth (n,e) -> + if Hashtbl.mem auth n + then raise (err "duplicate 'auth' clause" ps) + else Hashtbl.add auth n e + | _ -> + raise + (err "unhandled directive at top level" ps) + end + cdirs + in + let bpos = lexpos ps in + let main = + if ps.pstate_sess.Session.sess_library_mode + then None + else Some (find_main_fn ps items) in + let crate = { Ast.crate_items = (Item.empty_view, items); + Ast.crate_meta = queue_to_arr meta; + Ast.crate_auth = auth; + Ast.crate_required = required; + Ast.crate_required_syms = required_syms; + Ast.crate_main = main; + Ast.crate_files = files } + in + let cratei = span ps apos bpos crate in + htab_put files cratei.id fname; + cratei + end +;; + +let parse_src_file + (sess:Session.sess) + (get_mod:get_mod_fn) + (infer_lib_name:(Ast.ident -> filename)) + : Ast.crate = + let fname = Session.filename_of sess.Session.sess_in in + let tref = ref (Temp 0) in + let nref = ref (Node 0) in + let oref = ref (Opaque 0) in + let required = Hashtbl.create 0 in + let required_syms = Hashtbl.create 0 in + let ps = + make_parser tref nref oref sess get_mod + infer_lib_name required required_syms fname + in + with_err_handling sess + begin + fun _ -> + let apos = lexpos ps in + let items = Item.parse_mod_items ps EOF in + let bpos = lexpos ps in + let files = Hashtbl.create 0 in + let main = + if ps.pstate_sess.Session.sess_library_mode + then None + else Some (find_main_fn ps (snd items)) + in + let crate = { Ast.crate_items = items; + Ast.crate_required = required; + Ast.crate_required_syms = required_syms; + Ast.crate_auth = Hashtbl.create 0; + Ast.crate_meta = [||]; + Ast.crate_main = main; + Ast.crate_files = files } + in + let cratei = span ps apos bpos crate in + htab_put files cratei.id fname; + cratei + end +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/item.ml b/src/boot/fe/item.ml new file mode 100644 index 00000000..75f86a58 --- /dev/null +++ b/src/boot/fe/item.ml @@ -0,0 +1,1139 @@ + +open Common;; +open Token;; +open Parser;; + +(* Item grammar. *) + +let default_exports = + let e = Hashtbl.create 0 in + Hashtbl.add e Ast.EXPORT_all_decls (); + e +;; + +let empty_view = { Ast.view_imports = Hashtbl.create 0; + Ast.view_exports = default_exports } +;; + +let rec parse_expr (ps:pstate) : (Ast.stmt array * Ast.expr) = + let pexp = ctxt "expr" Pexp.parse_pexp ps in + Pexp.desugar_expr ps pexp + +and parse_expr_atom (ps:pstate) : (Ast.stmt array * Ast.atom) = + let pexp = ctxt "expr" Pexp.parse_pexp ps in + Pexp.desugar_expr_atom ps pexp + +and parse_expr_atom_list + (bra:token) + (ket:token) + (ps:pstate) + : (Ast.stmt array * Ast.atom array) = + arj1st (bracketed_zero_or_more bra ket (Some COMMA) + (ctxt "expr-atom list" parse_expr_atom) ps) + +and parse_expr_init (lv:Ast.lval) (ps:pstate) : (Ast.stmt array) = + let pexp = ctxt "expr" Pexp.parse_pexp ps in + Pexp.desugar_expr_init ps lv pexp + +and parse_lval (ps:pstate) : (Ast.stmt array * Ast.lval) = + let pexp = Pexp.parse_pexp ps in + Pexp.desugar_lval ps pexp + +and parse_identified_slot_and_ident + (aliases_ok:bool) + (ps:pstate) + : (Ast.slot identified * Ast.ident) = + let slot = + ctxt "identified slot and ident: slot" + (Pexp.parse_identified_slot aliases_ok) ps + in + let ident = + ctxt "identified slot and ident: ident" Pexp.parse_ident ps + in + (slot, ident) + +and parse_zero_or_more_identified_slot_ident_pairs + (aliases_ok:bool) + (ps:pstate) + : (((Ast.slot identified) * Ast.ident) array) = + ctxt "zero+ slots and idents" + (paren_comma_list + (parse_identified_slot_and_ident aliases_ok)) ps + +and parse_block (ps:pstate) : Ast.block = + let apos = lexpos ps in + let stmts = + arj (ctxt "block: stmts" + (bracketed_zero_or_more LBRACE RBRACE + None parse_stmts) ps) + in + let bpos = lexpos ps in + span ps apos bpos stmts + +and parse_block_stmt (ps:pstate) : Ast.stmt = + let apos = lexpos ps in + let block = parse_block ps in + let bpos = lexpos ps in + span ps apos bpos (Ast.STMT_block block) + +and parse_init + (lval:Ast.lval) + (ps:pstate) + : Ast.stmt array = + let apos = lexpos ps in + let stmts = + match peek ps with + EQ -> + bump ps; + parse_expr_init lval ps + | LARROW -> + bump ps; + let (stmts, rhs) = ctxt "init: port" parse_lval ps in + let bpos = lexpos ps in + let stmt = Ast.STMT_recv (lval, rhs) in + Array.append stmts [| (span ps apos bpos stmt) |] + | _ -> arr [] + in + let _ = expect ps SEMI in + stmts + +and parse_slot_and_ident_and_init + (ps:pstate) + : (Ast.stmt array * Ast.slot * Ast.ident) = + let apos = lexpos ps in + let (slot, ident) = + ctxt "slot, ident and init: slot and ident" + (Pexp.parse_slot_and_ident false) ps + in + let bpos = lexpos ps in + let lval = Ast.LVAL_base (span ps apos bpos (Ast.BASE_ident ident)) in + let stmts = ctxt "slot, ident and init: init" (parse_init lval) ps in + (stmts, slot, ident) + +and parse_auto_slot_and_init + (ps:pstate) + : (Ast.stmt array * Ast.slot * Ast.ident) = + let apos = lexpos ps in + let ident = Pexp.parse_ident ps in + let bpos = lexpos ps in + let lval = Ast.LVAL_base (span ps apos bpos (Ast.BASE_ident ident)) in + let stmts = ctxt "slot, ident and init: init" (parse_init lval) ps in + (stmts, slot_auto, ident) + +(* + * We have no way to parse a single Ast.stmt; any incoming syntactic statement + * may desugar to N>1 real Ast.stmts + *) + +and parse_stmts (ps:pstate) : Ast.stmt array = + let apos = lexpos ps in + match peek ps with + + LOG -> + bump ps; + let (stmts, atom) = ctxt "stmts: log value" parse_expr_atom ps in + expect ps SEMI; + spans ps stmts apos (Ast.STMT_log atom) + + | CHECK -> + bump ps; + begin + + let rec name_to_lval (bpos:pos) (name:Ast.name) + : Ast.lval = + match name with + Ast.NAME_base nb -> + Ast.LVAL_base (span ps apos bpos nb) + | Ast.NAME_ext (n, nc) -> + Ast.LVAL_ext (name_to_lval bpos n, Ast.COMP_named nc) + in + + let rec carg_path_to_lval (bpos:pos) (path:Ast.carg_path) + : Ast.lval = + match path with + Ast.CARG_base Ast.BASE_formal -> + raise (err "converting formal constraint-arg to atom" ps) + | Ast.CARG_base (Ast.BASE_named nb) -> + Ast.LVAL_base (span ps apos bpos nb) + | Ast.CARG_ext (pth, nc) -> + Ast.LVAL_ext (carg_path_to_lval bpos pth, + Ast.COMP_named nc) + in + + let carg_to_atom (bpos:pos) (carg:Ast.carg) + : Ast.atom = + match carg with + Ast.CARG_lit lit -> + Ast.ATOM_literal (span ps apos bpos lit) + | Ast.CARG_path pth -> + Ast.ATOM_lval (carg_path_to_lval bpos pth) + in + + let synthesise_check_call (bpos:pos) (constr:Ast.constr) + : (Ast.lval * (Ast.atom array)) = + let lval = name_to_lval bpos constr.Ast.constr_name in + let args = + Array.map (carg_to_atom bpos) constr.Ast.constr_args + in + (lval, args) + in + + let synthesise_check_calls (bpos:pos) (constrs:Ast.constrs) + : Ast.check_calls = + Array.map (synthesise_check_call bpos) constrs + in + + match peek ps with + LPAREN -> + bump ps; + let (stmts, expr) = + ctxt "stmts: check value" parse_expr ps + in + expect ps RPAREN; + expect ps SEMI; + spans ps stmts apos (Ast.STMT_check_expr expr) + + | IF -> + bump ps; + expect ps LPAREN; + let constrs = Pexp.parse_constrs ps in + expect ps RPAREN; + let block = parse_block ps in + let bpos = lexpos ps in + let calls = synthesise_check_calls bpos constrs in + [| span ps apos bpos + (Ast.STMT_check_if (constrs, calls, block)) + |] + + | _ -> + let constrs = Pexp.parse_constrs ps in + expect ps SEMI; + let bpos = lexpos ps in + let calls = synthesise_check_calls bpos constrs in + [| span ps apos bpos + (Ast.STMT_check (constrs, calls)) + |] + end + + | ALT -> + bump ps; + begin + match peek ps with + TYPE -> [| |] + | LPAREN -> + let (stmts, lval) = bracketed LPAREN RPAREN parse_lval ps in + let rec parse_pat ps = + match peek ps with + IDENT ident -> + let apos = lexpos ps in + bump ps; + let bpos = lexpos ps in + + (* TODO: nullary constructors *) + if peek ps != LPAREN then + let slot = + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = false; + Ast.slot_ty = None } + in + Ast.PAT_slot ((span ps apos bpos slot), ident) + else + let pats = + paren_comma_list parse_pat ps + in + Ast.PAT_tag (ident, pats) + | LIT_INT _ | LIT_CHAR _ | LIT_BOOL _ -> + Ast.PAT_lit (Pexp.parse_lit ps) + | UNDERSCORE -> bump ps; Ast.PAT_wild + | tok -> raise (Parse_err (ps, + "Expected pattern but found '" ^ + (string_of_tok tok) ^ "'")) + in + let rec parse_arms ps = + match peek ps with + CASE -> + bump ps; + let pat = bracketed LPAREN RPAREN parse_pat ps in + let block = parse_block ps in + let arm = (pat, block) in + (span ps apos (lexpos ps) arm)::(parse_arms ps) + | _ -> [] + in + let parse_alt_block ps = + let arms = ctxt "alt tag arms" parse_arms ps in + spans ps stmts apos begin + Ast.STMT_alt_tag { + Ast.alt_tag_lval = lval; + Ast.alt_tag_arms = Array.of_list arms + } + end + in + bracketed LBRACE RBRACE parse_alt_block ps + | _ -> [| |] + end + + | IF -> + let final_else = ref None in + let rec parse_stmt_if _ = + bump ps; + let (stmts, expr) = + ctxt "stmts: if cond" + (bracketed LPAREN RPAREN parse_expr) ps + in + let then_block = ctxt "stmts: if-then" parse_block ps in + begin + match peek ps with + ELSE -> + begin + bump ps; + match peek ps with + IF -> + let nested_if = parse_stmt_if () in + let bpos = lexpos ps in + final_else := + Some (span ps apos bpos nested_if) + | _ -> + final_else := + Some (ctxt "stmts: if-else" parse_block ps) + end + | _ -> () + end; + let res = + spans ps stmts apos + (Ast.STMT_if + { Ast.if_test = expr; + Ast.if_then = then_block; + Ast.if_else = !final_else; }) + in + final_else := None; + res + in + parse_stmt_if() + + | FOR -> + bump ps; + begin + match peek ps with + EACH -> + bump ps; + let inner ps : ((Ast.slot identified * Ast.ident) + * Ast.stmt array + * (Ast.lval * Ast.atom array)) = + let slot = (parse_identified_slot_and_ident true ps) in + let _ = (expect ps IN) in + let (stmts1, iter) = (rstr true parse_lval) ps in + let (stmts2, args) = + parse_expr_atom_list LPAREN RPAREN ps + in + (slot, Array.append stmts1 stmts2, (iter, args)) + in + let (slot, stmts, call) = ctxt "stmts: foreach head" + (bracketed LPAREN RPAREN inner) ps + in + let body_block = + ctxt "stmts: foreach body" parse_block ps + in + let bpos = lexpos ps in + let head_block = + (* + * Slightly weird, but we put an extra nesting level of + * block here to separate the part that lives in our frame + * (the iter slot) from the part that lives in the callee + * frame (the body block). + *) + span ps apos bpos [| + span ps apos bpos (Ast.STMT_block body_block); + |] + in + Array.append stmts + [| span ps apos bpos + (Ast.STMT_for_each + { Ast.for_each_slot = slot; + Ast.for_each_call = call; + Ast.for_each_head = head_block; + Ast.for_each_body = body_block; }) |] + | _ -> + let inner ps = + let slot = (parse_identified_slot_and_ident false ps) in + let _ = (expect ps IN) in + let lval = (parse_lval ps) in + (slot, lval) in + let (slot, seq) = + ctxt "stmts: for head" (bracketed LPAREN RPAREN inner) ps + in + let body_block = ctxt "stmts: for body" parse_block ps in + let bpos = lexpos ps in + [| span ps apos bpos + (Ast.STMT_for + { Ast.for_slot = slot; + Ast.for_seq = seq; + Ast.for_body = body_block; }) |] + end + + | WHILE -> + bump ps; + let (stmts, test) = + ctxt "stmts: while cond" (bracketed LPAREN RPAREN parse_expr) ps + in + let body_block = ctxt "stmts: while body" parse_block ps in + let bpos = lexpos ps in + [| span ps apos bpos + (Ast.STMT_while + { Ast.while_lval = (stmts, test); + Ast.while_body = body_block; }) |] + + | PUT -> + begin + bump ps; + match peek ps with + EACH -> + bump ps; + let (lstmts, lval) = + ctxt "put each: lval" (rstr true parse_lval) ps + in + let (astmts, args) = + ctxt "put each: args" + (parse_expr_atom_list LPAREN RPAREN) ps + in + let bpos = lexpos ps in + let be = + span ps apos bpos (Ast.STMT_put_each (lval, args)) + in + expect ps SEMI; + Array.concat [ lstmts; astmts; [| be |] ] + + | _ -> + begin + let (stmts, e) = + match peek ps with + SEMI -> (arr [], None) + | _ -> + let (stmts, expr) = + ctxt "stmts: put expr" parse_expr_atom ps + in + expect ps SEMI; + (stmts, Some expr) + in + spans ps stmts apos (Ast.STMT_put e) + end + end + + | RET -> + bump ps; + let (stmts, e) = + match peek ps with + SEMI -> (bump ps; (arr [], None)) + | _ -> + let (stmts, expr) = + ctxt "stmts: ret expr" parse_expr_atom ps + in + expect ps SEMI; + (stmts, Some expr) + in + spans ps stmts apos (Ast.STMT_ret e) + + | BE -> + bump ps; + let (lstmts, lval) = ctxt "be: lval" (rstr true parse_lval) ps in + let (astmts, args) = + ctxt "be: args" (parse_expr_atom_list LPAREN RPAREN) ps + in + let bpos = lexpos ps in + let be = span ps apos bpos (Ast.STMT_be (lval, args)) in + expect ps SEMI; + Array.concat [ lstmts; astmts; [| be |] ] + + | LBRACE -> [| ctxt "stmts: block" parse_block_stmt ps |] + + | LET -> + bump ps; + let (stmts, slot, ident) = + ctxt "stmt slot" parse_slot_and_ident_and_init ps in + let slot = Pexp.apply_mutability slot true in + let bpos = lexpos ps in + let decl = Ast.DECL_slot (Ast.KEY_ident ident, + (span ps apos bpos slot)) + in + Array.concat [[| span ps apos bpos (Ast.STMT_decl decl) |]; stmts] + + | AUTO -> + bump ps; + let (stmts, slot, ident) = + ctxt "stmt slot" parse_auto_slot_and_init ps in + let slot = Pexp.apply_mutability slot true in + let bpos = lexpos ps in + let decl = Ast.DECL_slot (Ast.KEY_ident ident, + (span ps apos bpos slot)) + in + Array.concat [[| span ps apos bpos (Ast.STMT_decl decl) |]; stmts] + + | YIELD -> + bump ps; + expect ps SEMI; + let bpos = lexpos ps in + [| span ps apos bpos Ast.STMT_yield |] + + | FAIL -> + bump ps; + expect ps SEMI; + let bpos = lexpos ps in + [| span ps apos bpos Ast.STMT_fail |] + + | JOIN -> + bump ps; + let (stmts, lval) = ctxt "stmts: task expr" parse_lval ps in + expect ps SEMI; + spans ps stmts apos (Ast.STMT_join lval) + + | MOD | OBJ | TYPE | FN | USE | NATIVE -> + let (ident, item) = ctxt "stmt: decl" parse_mod_item ps in + let decl = Ast.DECL_mod_item (ident, item) in + let stmts = expand_tags_to_stmts ps item in + spans ps stmts apos (Ast.STMT_decl decl) + + | _ -> + let (lstmts, lval) = ctxt "stmt: lval" parse_lval ps in + begin + match peek ps with + + SEMI -> (bump ps; lstmts) + + | EQ -> parse_init lval ps + + | OPEQ binop_token -> + bump ps; + let (stmts, rhs) = + ctxt "stmt: opeq rhs" parse_expr_atom ps + in + let binop = + match binop_token with + PLUS -> Ast.BINOP_add + | MINUS -> Ast.BINOP_sub + | STAR -> Ast.BINOP_mul + | SLASH -> Ast.BINOP_div + | PERCENT -> Ast.BINOP_mod + | AND -> Ast.BINOP_and + | OR -> Ast.BINOP_or + | CARET -> Ast.BINOP_xor + | LSL -> Ast.BINOP_lsl + | LSR -> Ast.BINOP_lsr + | ASR -> Ast.BINOP_asr + | _ -> raise (err "unknown opeq token" ps) + in + expect ps SEMI; + spans ps stmts apos + (Ast.STMT_copy_binop (lval, binop, rhs)) + + | LARROW -> + bump ps; + let (stmts, rhs) = ctxt "stmt: recv rhs" parse_lval ps in + let _ = expect ps SEMI in + spans ps stmts apos (Ast.STMT_recv (lval, rhs)) + + | SEND -> + bump ps; + let (stmts, rhs) = + ctxt "stmt: send rhs" parse_expr_atom ps + in + let _ = expect ps SEMI in + let bpos = lexpos ps in + let (src, copy) = match rhs with + Ast.ATOM_lval lv -> (lv, [| |]) + | _ -> + let (_, tmp, tempdecl) = + build_tmp ps slot_auto apos bpos + in + let copy = span ps apos bpos + (Ast.STMT_copy (tmp, Ast.EXPR_atom rhs)) in + ((clone_lval ps tmp), [| tempdecl; copy |]) + in + let send = + span ps apos bpos + (Ast.STMT_send (lval, src)) + in + Array.concat [ stmts; copy; [| send |] ] + + | _ -> raise (unexpected ps) + end + + +and parse_ty_param (iref:int ref) (ps:pstate) : Ast.ty_param identified = + let apos = lexpos ps in + let e = Pexp.parse_effect ps in + let ident = Pexp.parse_ident ps in + let i = !iref in + let bpos = lexpos ps in + incr iref; + span ps apos bpos (ident, (i, e)) + +and parse_ty_params (ps:pstate) + : (Ast.ty_param identified) array = + match peek ps with + LBRACKET -> + bracketed_zero_or_more LBRACKET RBRACKET (Some COMMA) + (parse_ty_param (ref 0)) ps + | _ -> arr [] + +and parse_ident_and_params (ps:pstate) (cstr:string) + : (Ast.ident * (Ast.ty_param identified) array) = + let ident = ctxt ("mod " ^ cstr ^ " item: ident") Pexp.parse_ident ps in + let params = + ctxt ("mod " ^ cstr ^ " item: type params") parse_ty_params ps + in + (ident, params) + +and parse_inputs + (ps:pstate) + : ((Ast.slot identified * Ast.ident) array * Ast.constrs) = + let slots = + match peek ps with + LPAREN -> ctxt "inputs: input idents and slots" + (parse_zero_or_more_identified_slot_ident_pairs true) ps + | _ -> raise (unexpected ps) + in + let constrs = + match peek ps with + COLON -> (bump ps; ctxt "inputs: constrs" Pexp.parse_constrs ps) + | _ -> [| |] + in + let rec rewrite_carg_path cp = + match cp with + Ast.CARG_base (Ast.BASE_named (Ast.BASE_ident ident)) -> + begin + let res = ref cp in + for i = 0 to (Array.length slots) - 1 + do + let (_, ident') = slots.(i) in + if ident' = ident + then res := Ast.CARG_ext (Ast.CARG_base Ast.BASE_formal, + Ast.COMP_idx i) + else () + done; + !res + end + | Ast.CARG_base _ -> cp + | Ast.CARG_ext (cp, ext) -> + Ast.CARG_ext (rewrite_carg_path cp, ext) + in + (* Rewrite constrs with input tuple as BASE_formal. *) + Array.iter + begin + fun constr -> + let args = constr.Ast.constr_args in + Array.iteri + begin + fun i carg -> + match carg with + Ast.CARG_path cp -> + args.(i) <- Ast.CARG_path (rewrite_carg_path cp) + | _ -> () + end + args + end + constrs; + (slots, constrs) + + +and parse_in_and_out + (ps:pstate) + : ((Ast.slot identified * Ast.ident) array + * Ast.constrs + * Ast.slot identified) = + let (inputs, constrs) = parse_inputs ps in + let output = + match peek ps with + RARROW -> + bump ps; + ctxt "fn in and out: output slot" + (Pexp.parse_identified_slot true) ps + | _ -> + let apos = lexpos ps in + span ps apos apos slot_nil + in + (inputs, constrs, output) + + +(* parse_fn starts at the first lparen of the sig. *) +and parse_fn + (is_iter:bool) + (effect:Ast.effect) + (ps:pstate) + : Ast.fn = + let (inputs, constrs, output) = + ctxt "fn: in_and_out" parse_in_and_out ps + in + let body = ctxt "fn: body" parse_block ps in + { Ast.fn_input_slots = inputs; + Ast.fn_input_constrs = constrs; + Ast.fn_output_slot = output; + Ast.fn_aux = { Ast.fn_effect = effect; + Ast.fn_is_iter = is_iter; }; + Ast.fn_body = body; } + +and parse_meta_input (ps:pstate) : (Ast.ident * string option) = + let lab = (ctxt "meta input: label" Pexp.parse_ident ps) in + match peek ps with + EQ -> + bump ps; + let v = + match peek ps with + UNDERSCORE -> bump ps; None + | LIT_STR s -> bump ps; Some s + | _ -> raise (unexpected ps) + in + (lab, v) + | _ -> raise (unexpected ps) + +and parse_meta_pat (ps:pstate) : Ast.meta_pat = + bracketed_zero_or_more LPAREN RPAREN + (Some COMMA) parse_meta_input ps + +and parse_meta (ps:pstate) : Ast.meta = + Array.map + begin + fun (id,v) -> + match v with + None -> + raise (err ("wildcard found in meta " + ^ "pattern where value expected") ps) + | Some v -> (id,v) + end + (parse_meta_pat ps) + +and parse_optional_meta_pat (ps:pstate) (ident:Ast.ident) : Ast.meta_pat = + match peek ps with + LPAREN -> parse_meta_pat ps + | _ -> [| ("name", Some ident) |] + + +and parse_obj_item + (ps:pstate) + (apos:pos) + (effect:Ast.effect) + : (Ast.ident * Ast.mod_item) = + expect ps OBJ; + let (ident, params) = parse_ident_and_params ps "obj" in + let (state, constrs) = (ctxt "obj state" parse_inputs ps) in + let drop = ref None in + expect ps LBRACE; + let fns = Hashtbl.create 0 in + while (not (peek ps = RBRACE)) + do + let apos = lexpos ps in + match peek ps with + IO | STATE | UNSAFE | FN | ITER -> + let effect = Pexp.parse_effect ps in + let is_iter = (peek ps) = ITER in + bump ps; + let ident = ctxt "obj fn: ident" Pexp.parse_ident ps in + let fn = ctxt "obj fn: fn" (parse_fn is_iter effect) ps in + let bpos = lexpos ps in + htab_put fns ident (span ps apos bpos fn) + | DROP -> + bump ps; + drop := Some (parse_block ps) + | RBRACE -> () + | _ -> raise (unexpected ps) + done; + expect ps RBRACE; + let bpos = lexpos ps in + let obj = { Ast.obj_state = state; + Ast.obj_effect = effect; + Ast.obj_constrs = constrs; + Ast.obj_fns = fns; + Ast.obj_drop = !drop } + in + (ident, + span ps apos bpos + (decl params (Ast.MOD_ITEM_obj obj))) + + +and parse_mod_item (ps:pstate) : (Ast.ident * Ast.mod_item) = + let apos = lexpos ps in + let parse_lib_name ident = + match peek ps with + EQ -> + begin + bump ps; + match peek ps with + LIT_STR s -> (bump ps; s) + | _ -> raise (unexpected ps) + end + | _ -> ps.pstate_infer_lib_name ident + in + + match peek ps with + + IO | STATE | UNSAFE | OBJ | FN | ITER -> + let effect = Pexp.parse_effect ps in + begin + match peek ps with + OBJ -> parse_obj_item ps apos effect + | _ -> + let is_iter = (peek ps) = ITER in + bump ps; + let (ident, params) = parse_ident_and_params ps "fn" in + let fn = + ctxt "mod fn item: fn" (parse_fn is_iter effect) ps + in + let bpos = lexpos ps in + (ident, + span ps apos bpos + (decl params (Ast.MOD_ITEM_fn fn))) + end + + | TYPE -> + bump ps; + let (ident, params) = parse_ident_and_params ps "type" in + let _ = expect ps EQ in + let ty = ctxt "mod type item: ty" Pexp.parse_ty ps in + let _ = expect ps SEMI in + let bpos = lexpos ps in + let item = Ast.MOD_ITEM_type ty in + (ident, span ps apos bpos (decl params item)) + + | MOD -> + bump ps; + let (ident, params) = parse_ident_and_params ps "mod" in + expect ps LBRACE; + let items = parse_mod_items ps RBRACE in + let bpos = lexpos ps in + (ident, + span ps apos bpos + (decl params (Ast.MOD_ITEM_mod items))) + + | NATIVE -> + begin + bump ps; + let conv = + match peek ps with + LIT_STR s -> + bump ps; + begin + match string_to_conv s with + None -> raise (unexpected ps) + | Some c -> c + end + | _ -> CONV_cdecl + in + expect ps MOD; + let (ident, params) = parse_ident_and_params ps "native mod" in + let path = parse_lib_name ident in + let items = parse_mod_items_from_signature ps in + let bpos = lexpos ps in + let rlib = REQUIRED_LIB_c { required_libname = path; + required_prefix = ps.pstate_depth } + in + let item = decl params (Ast.MOD_ITEM_mod items) in + let item = span ps apos bpos item in + note_required_mod ps {lo=apos; hi=bpos} conv rlib item; + (ident, item) + end + + | USE -> + begin + bump ps; + let ident = ctxt "use mod: ident" Pexp.parse_ident ps in + let meta = + ctxt "use mod: meta" parse_optional_meta_pat ps ident + in + let bpos = lexpos ps in + let id = (span ps apos bpos ()).id in + let (path, items) = + ps.pstate_get_mod meta id ps.pstate_node_id ps.pstate_opaque_id + in + let bpos = lexpos ps in + expect ps SEMI; + let rlib = + REQUIRED_LIB_rust { required_libname = path; + required_prefix = ps.pstate_depth } + in + iflog ps + begin + fun _ -> + log ps "extracted mod from %s (binding to %s)" + path ident; + log ps "%a" Ast.sprintf_mod_items items; + end; + let item = decl [||] (Ast.MOD_ITEM_mod (empty_view, items)) in + let item = span ps apos bpos item in + note_required_mod ps {lo=apos; hi=bpos} CONV_rust rlib item; + (ident, item) + end + + + + | _ -> raise (unexpected ps) + + +and parse_mod_items_from_signature + (ps:pstate) + : (Ast.mod_view * Ast.mod_items) = + let mis = Hashtbl.create 0 in + expect ps LBRACE; + while not (peek ps = RBRACE) + do + let (ident, mti) = ctxt "mod items from sig: mod item" + parse_mod_item_from_signature ps + in + Hashtbl.add mis ident mti; + done; + expect ps RBRACE; + (empty_view, mis) + + +and parse_mod_item_from_signature (ps:pstate) + : (Ast.ident * Ast.mod_item) = + let apos = lexpos ps in + match peek ps with + MOD -> + bump ps; + let (ident, params) = parse_ident_and_params ps "mod signature" in + let items = parse_mod_items_from_signature ps in + let bpos = lexpos ps in + (ident, span ps apos bpos (decl params (Ast.MOD_ITEM_mod items))) + + | IO | STATE | UNSAFE | FN | ITER -> + let effect = Pexp.parse_effect ps in + let is_iter = (peek ps) = ITER in + bump ps; + let (ident, params) = parse_ident_and_params ps "fn signature" in + let (inputs, constrs, output) = parse_in_and_out ps in + let bpos = lexpos ps in + let body = span ps apos bpos [| |] in + let fn = + Ast.MOD_ITEM_fn + { Ast.fn_input_slots = inputs; + Ast.fn_input_constrs = constrs; + Ast.fn_output_slot = output; + Ast.fn_aux = { Ast.fn_effect = effect; + Ast.fn_is_iter = is_iter; }; + Ast.fn_body = body; } + in + let node = span ps apos bpos (decl params fn) in + begin + match peek ps with + EQ -> + bump ps; + begin + match peek ps with + LIT_STR s -> + bump ps; + htab_put ps.pstate_required_syms node.id s + | _ -> raise (unexpected ps) + end; + | _ -> () + end; + expect ps SEMI; + (ident, node) + + | TYPE -> + bump ps; + let (ident, params) = parse_ident_and_params ps "type type" in + let t = + match peek ps with + SEMI -> Ast.TY_native (next_opaque_id ps) + | _ -> Pexp.parse_ty ps + in + expect ps SEMI; + let bpos = lexpos ps in + (ident, span ps apos bpos (decl params (Ast.MOD_ITEM_type t))) + + (* FIXME: parse obj. *) + | _ -> raise (unexpected ps) + + +and expand_tags + (ps:pstate) + (item:Ast.mod_item) + : (Ast.ident * Ast.mod_item) array = + let handle_ty_tag id ttag = + let tags = ref [] in + Hashtbl.iter + begin + fun name tup -> + let ident = match name with + Ast.NAME_base (Ast.BASE_ident ident) -> ident + | _ -> + raise (Parse_err + (ps, "unexpected name type while expanding tag")) + in + let header = + Array.map (fun slot -> (clone_span ps item slot)) tup + in + let tag_item' = Ast.MOD_ITEM_tag (header, ttag, id) in + let cloned_params = + Array.map (fun p -> clone_span ps p p.node) + item.node.Ast.decl_params + in + let tag_item = + clone_span ps item (decl cloned_params tag_item') + in + tags := (ident, tag_item) :: (!tags) + end + ttag; + arr (!tags) + in + let handle_ty_decl id tyd = + match tyd with + Ast.TY_tag ttag -> handle_ty_tag id ttag + | _ -> [| |] + in + match item.node.Ast.decl_item with + Ast.MOD_ITEM_type tyd -> handle_ty_decl item.id tyd + | _ -> [| |] + + +and expand_tags_to_stmts + (ps:pstate) + (item:Ast.mod_item) + : Ast.stmt array = + let id_items = expand_tags ps item in + Array.map + (fun (ident, tag_item) -> + clone_span ps item + (Ast.STMT_decl + (Ast.DECL_mod_item (ident, tag_item)))) + id_items + + +and expand_tags_to_items + (ps:pstate) + (item:Ast.mod_item) + (items:Ast.mod_items) + : unit = + let id_items = expand_tags ps item in + Array.iter + (fun (ident, item) -> htab_put items ident item) + id_items + + +and note_required_mod + (ps:pstate) + (sp:span) + (conv:nabi_conv) + (rlib:required_lib) + (item:Ast.mod_item) + : unit = + iflog ps + begin + fun _ -> log ps "marking item #%d as required" (int_of_node item.id) + end; + htab_put ps.pstate_required item.id (rlib, conv); + if not (Hashtbl.mem ps.pstate_sess.Session.sess_spans item.id) + then Hashtbl.add ps.pstate_sess.Session.sess_spans item.id sp; + match item.node.Ast.decl_item with + Ast.MOD_ITEM_mod (_, items) -> + Hashtbl.iter + begin + fun _ sub -> + note_required_mod ps sp conv rlib sub + end + items + | _ -> () + + +and parse_import + (ps:pstate) + (imports:(Ast.ident, Ast.name) Hashtbl.t) + : unit = + let import a n = + let a = match a with + None -> + begin + match n with + Ast.NAME_ext (_, Ast.COMP_ident i) + | Ast.NAME_ext (_, Ast.COMP_app (i, _)) + | Ast.NAME_base (Ast.BASE_ident i) + | Ast.NAME_base (Ast.BASE_app (i, _)) -> i + | _ -> raise (Parse_err (ps, "bad import specification")) + end + | Some i -> i + in + Hashtbl.add imports a n + in + match peek ps with + IDENT i -> + begin + bump ps; + match peek ps with + EQ -> + (* + * import x = ... + *) + bump ps; + import (Some i) (Pexp.parse_name ps) + | _ -> + (* + * import x... + *) + import None (Pexp.parse_name_ext ps + (Ast.NAME_base + (Ast.BASE_ident i))) + end + | _ -> + import None (Pexp.parse_name ps) + + +and parse_export + (ps:pstate) + (exports:(Ast.export, unit) Hashtbl.t) + : unit = + let e = + match peek ps with + STAR -> bump ps; Ast.EXPORT_all_decls + | IDENT i -> bump ps; Ast.EXPORT_ident i + | _ -> raise (unexpected ps) + in + Hashtbl.add exports e () + + +and parse_mod_items + (ps:pstate) + (terminal:token) + : (Ast.mod_view * Ast.mod_items) = + ps.pstate_depth <- ps.pstate_depth + 1; + let imports = Hashtbl.create 0 in + let exports = Hashtbl.create 0 in + let in_view = ref true in + let items = Hashtbl.create 4 in + while (not (peek ps = terminal)) + do + if !in_view + then + match peek ps with + IMPORT -> + bump ps; + parse_import ps imports; + expect ps SEMI; + | EXPORT -> + bump ps; + parse_export ps exports; + expect ps SEMI; + | _ -> + in_view := false + else + let (ident, item) = parse_mod_item ps in + htab_put items ident item; + expand_tags_to_items ps item items; + done; + if (Hashtbl.length exports) = 0 + then Hashtbl.add exports Ast.EXPORT_all_decls (); + expect ps terminal; + ps.pstate_depth <- ps.pstate_depth - 1; + let view = { Ast.view_imports = imports; + Ast.view_exports = exports } + in + (view, items) +;; + + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/lexer.mll b/src/boot/fe/lexer.mll new file mode 100644 index 00000000..fb4d58c5 --- /dev/null +++ b/src/boot/fe/lexer.mll @@ -0,0 +1,362 @@ + + +{ + + open Token;; + open Common;; + + exception Lex_err of (string * Common.pos);; + + let fail lexbuf s = + let p = lexbuf.Lexing.lex_start_p in + let pos = + (p.Lexing.pos_fname, + p.Lexing.pos_lnum , + (p.Lexing.pos_cnum) - (p.Lexing.pos_bol)) + in + raise (Lex_err (s, pos)) + ;; + + let bump_line p = { p with + Lexing.pos_lnum = p.Lexing.pos_lnum + 1; + Lexing.pos_bol = p.Lexing.pos_cnum } + ;; + + let keyword_table = Hashtbl.create 100 + let _ = + List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok) + [ ("mod", MOD); + ("use", USE); + ("meta", META); + ("auth", AUTH); + + ("syntax", SYNTAX); + + ("if", IF); + ("else", ELSE); + ("while", WHILE); + ("do", DO); + ("alt", ALT); + ("case", CASE); + + ("for", FOR); + ("each", EACH); + ("put", PUT); + ("ret", RET); + ("be", BE); + + ("fail", FAIL); + ("drop", DROP); + + ("type", TYPE); + ("check", CHECK); + ("claim", CLAIM); + ("prove", PROVE); + + ("io", IO); + ("state", STATE); + ("unsafe", UNSAFE); + + ("native", NATIVE); + ("mutable", MUTABLE); + ("auto", AUTO); + + ("fn", FN); + ("iter", ITER); + + ("import", IMPORT); + ("export", EXPORT); + + ("let", LET); + + ("log", LOG); + ("spawn", SPAWN); + ("thread", THREAD); + ("yield", YIELD); + ("join", JOIN); + + ("bool", BOOL); + + ("int", INT); + ("uint", UINT); + + ("char", CHAR); + ("str", STR); + + ("rec", REC); + ("tup", TUP); + ("tag", TAG); + ("vec", VEC); + ("any", ANY); + + ("obj", OBJ); + + ("port", PORT); + ("chan", CHAN); + + ("task", TASK); + + ("true", LIT_BOOL true); + ("false", LIT_BOOL false); + + ("in", IN); + + ("as", AS); + ("with", WITH); + + ("bind", BIND); + + ("u8", MACH TY_u8); + ("u16", MACH TY_u16); + ("u32", MACH TY_u32); + ("u64", MACH TY_u64); + ("i8", MACH TY_i8); + ("i16", MACH TY_i16); + ("i32", MACH TY_i32); + ("i64", MACH TY_i64); + ("f32", MACH TY_f32); + ("f64", MACH TY_f64) + ] +;; +} + +let hexdig = ['0'-'9' 'a'-'f' 'A'-'F'] +let bin = "0b" ['0' '1']['0' '1' '_']* +let hex = "0x" hexdig ['0'-'9' 'a'-'f' 'A'-'F' '_']* +let dec = ['0'-'9']+ +let exp = ['e''E']['-''+']? dec +let flo = (dec '.' dec (exp?)) | (dec exp) + +let ws = [ ' ' '\t' '\r' ] + +let id = ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']* + +rule token = parse + ws+ { token lexbuf } +| '\n' { lexbuf.Lexing.lex_curr_p + <- (bump_line lexbuf.Lexing.lex_curr_p); + token lexbuf } +| "//" [^'\n']* { token lexbuf } + +| '+' { PLUS } +| '-' { MINUS } +| '*' { STAR } +| '/' { SLASH } +| '%' { PERCENT } +| '=' { EQ } +| '<' { LT } +| "<=" { LE } +| "==" { EQEQ } +| "!=" { NE } +| ">=" { GE } +| '>' { GT } +| '!' { NOT } +| '&' { AND } +| "&&" { ANDAND } +| '|' { OR } +| "||" { OROR } +| "<<" { LSL } +| ">>" { LSR } +| ">>>" { ASR } +| '~' { TILDE } +| '{' { LBRACE } +| '_' (dec as n) { IDX (int_of_string n) } +| '_' { UNDERSCORE } +| '}' { RBRACE } + +| "+=" { OPEQ (PLUS) } +| "-=" { OPEQ (MINUS) } +| "*=" { OPEQ (STAR) } +| "/=" { OPEQ (SLASH) } +| "%=" { OPEQ (PERCENT) } +| "&=" { OPEQ (AND) } +| "|=" { OPEQ (OR) } +| "<<=" { OPEQ (LSL) } +| ">>=" { OPEQ (LSR) } +| ">>>=" { OPEQ (ASR) } +| "^=" { OPEQ (CARET) } + +| '#' { POUND } +| '@' { AT } +| '^' { CARET } +| '.' { DOT } +| ',' { COMMA } +| ';' { SEMI } +| ':' { COLON } +| "<-" { LARROW } +| "<|" { SEND } +| "->" { RARROW } +| '(' { LPAREN } +| ')' { RPAREN } +| '[' { LBRACKET } +| ']' { RBRACKET } + +| id as i + { try + Hashtbl.find keyword_table i + with + Not_found -> IDENT (i) + } + +| bin as n { LIT_INT (Int64.of_string n, n) } +| hex as n { LIT_INT (Int64.of_string n, n) } +| dec as n { LIT_INT (Int64.of_string n, n) } +| flo as n { LIT_FLO n } + +| '\'' { char lexbuf } +| '"' { let buf = Buffer.create 32 in + str buf lexbuf } + +| eof { EOF } + +and str buf = parse + _ as ch + { + match ch with + '"' -> LIT_STR (Buffer.contents buf) + | '\\' -> str_escape buf lexbuf + | _ -> + Buffer.add_char buf ch; + let c = Char.code ch in + if bounds 0 c 0x7f + then str buf lexbuf + else + if ((c land 0b1110_0000) == 0b1100_0000) + then ext_str 1 buf lexbuf + else + if ((c land 0b1111_0000) == 0b1110_0000) + then ext_str 2 buf lexbuf + else + if ((c land 0b1111_1000) == 0b1111_0000) + then ext_str 3 buf lexbuf + else + if ((c land 0b1111_1100) == 0b1111_1000) + then ext_str 4 buf lexbuf + else + if ((c land 0b1111_1110) == 0b1111_1100) + then ext_str 5 buf lexbuf + else fail lexbuf "bad initial utf-8 byte" + } + +and str_escape buf = parse + 'x' ((hexdig hexdig) as h) + | 'u' ((hexdig hexdig hexdig hexdig) as h) + | 'U' + ((hexdig hexdig hexdig hexdig + hexdig hexdig hexdig hexdig) as h) + { + Buffer.add_string buf (char_as_utf8 (int_of_string ("0x" ^ h))); + str buf lexbuf + } + | 'n' { Buffer.add_char buf '\n'; str buf lexbuf } + | 'r' { Buffer.add_char buf '\r'; str buf lexbuf } + | 't' { Buffer.add_char buf '\t'; str buf lexbuf } + | '\\' { Buffer.add_char buf '\\'; str buf lexbuf } + | '"' { Buffer.add_char buf '"'; str buf lexbuf } + | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) } + + +and ext_str n buf = parse + _ as ch + { + let c = Char.code ch in + if ((c land 0b1100_0000) == (0b1000_0000)) + then + begin + Buffer.add_char buf ch; + if n = 1 + then str buf lexbuf + else ext_str (n-1) buf lexbuf + end + else + fail lexbuf "bad trailing utf-8 byte" + } + + +and char = parse + '\\' { char_escape lexbuf } + | _ as c + { + let c = Char.code c in + if bounds 0 c 0x7f + then end_char c lexbuf + else + if ((c land 0b1110_0000) == 0b1100_0000) + then ext_char 1 (c land 0b0001_1111) lexbuf + else + if ((c land 0b1111_0000) == 0b1110_0000) + then ext_char 2 (c land 0b0000_1111) lexbuf + else + if ((c land 0b1111_1000) == 0b1111_0000) + then ext_char 3 (c land 0b0000_0111) lexbuf + else + if ((c land 0b1111_1100) == 0b1111_1000) + then ext_char 4 (c land 0b0000_0011) lexbuf + else + if ((c land 0b1111_1110) == 0b1111_1100) + then ext_char 5 (c land 0b0000_0001) lexbuf + else fail lexbuf "bad initial utf-8 byte" + } + +and char_escape = parse + 'x' ((hexdig hexdig) as h) + | 'u' ((hexdig hexdig hexdig hexdig) as h) + | 'U' + ((hexdig hexdig hexdig hexdig + hexdig hexdig hexdig hexdig) as h) + { + end_char (int_of_string ("0x" ^ h)) lexbuf + } + | 'n' { end_char (Char.code '\n') lexbuf } + | 'r' { end_char (Char.code '\r') lexbuf } + | 't' { end_char (Char.code '\t') lexbuf } + | '\\' { end_char (Char.code '\\') lexbuf } + | '\'' { end_char (Char.code '\'') lexbuf } + | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) } + + +and ext_char n accum = parse + _ as c + { + let c = Char.code c in + if ((c land 0b1100_0000) == (0b1000_0000)) + then + let accum = (accum lsl 6) lor (c land 0b0011_1111) in + if n = 1 + then end_char accum lexbuf + else ext_char (n-1) accum lexbuf + else + fail lexbuf "bad trailing utf-8 byte" + } + +and end_char accum = parse + '\'' { LIT_CHAR accum } + + +and bracequote buf depth = parse + + '\\' '{' { Buffer.add_char buf '{'; + bracequote buf depth lexbuf } + +| '{' { Buffer.add_char buf '{'; + bracequote buf (depth+1) lexbuf } + +| '\\' '}' { Buffer.add_char buf '}'; + bracequote buf depth lexbuf } + +| '}' { if depth = 1 + then BRACEQUOTE (Buffer.contents buf) + else + begin + Buffer.add_char buf '}'; + bracequote buf (depth-1) lexbuf + end } + +| '\\' [^'{' '}'] { let s = Lexing.lexeme lexbuf in + Buffer.add_string buf s; + bracequote buf depth lexbuf } + + +| [^'\\' '{' '}']+ { let s = Lexing.lexeme lexbuf in + Buffer.add_string buf s; + bracequote buf depth lexbuf } diff --git a/src/boot/fe/parser.ml b/src/boot/fe/parser.ml new file mode 100644 index 00000000..3dda93ac --- /dev/null +++ b/src/boot/fe/parser.ml @@ -0,0 +1,374 @@ + +open Common;; +open Token;; + +(* Fundamental parser types and actions *) + +type get_mod_fn = (Ast.meta_pat + -> node_id + -> (node_id ref) + -> (opaque_id ref) + -> (filename * Ast.mod_items)) +;; + +type pstate = + { mutable pstate_peek : token; + mutable pstate_ctxt : (string * pos) list; + mutable pstate_rstr : bool; + mutable pstate_depth: int; + pstate_lexbuf : Lexing.lexbuf; + pstate_file : filename; + pstate_sess : Session.sess; + pstate_temp_id : temp_id ref; + pstate_node_id : node_id ref; + pstate_opaque_id : opaque_id ref; + pstate_get_mod : get_mod_fn; + pstate_infer_lib_name : (Ast.ident -> filename); + pstate_required : (node_id, (required_lib * nabi_conv)) Hashtbl.t; + pstate_required_syms : (node_id, string) Hashtbl.t; } +;; + +let log (ps:pstate) = Session.log "parse" + ps.pstate_sess.Session.sess_log_parse + ps.pstate_sess.Session.sess_log_out +;; + +let iflog ps thunk = + if ps.pstate_sess.Session.sess_log_parse + then thunk () + else () +;; + +let make_parser + (tref:temp_id ref) + (nref:node_id ref) + (oref:opaque_id ref) + (sess:Session.sess) + (get_mod:get_mod_fn) + (infer_lib_name:Ast.ident -> filename) + (required:(node_id, (required_lib * nabi_conv)) Hashtbl.t) + (required_syms:(node_id, string) Hashtbl.t) + (fname:string) + : pstate = + let lexbuf = Lexing.from_channel (open_in fname) in + let spos = { lexbuf.Lexing.lex_start_p with Lexing.pos_fname = fname } in + let cpos = { lexbuf.Lexing.lex_curr_p with Lexing.pos_fname = fname } in + lexbuf.Lexing.lex_start_p <- spos; + lexbuf.Lexing.lex_curr_p <- cpos; + let first = Lexer.token lexbuf in + let ps = + { pstate_peek = first; + pstate_ctxt = []; + pstate_rstr = false; + pstate_depth = 0; + pstate_lexbuf = lexbuf; + pstate_file = fname; + pstate_sess = sess; + pstate_temp_id = tref; + pstate_node_id = nref; + pstate_opaque_id = oref; + pstate_get_mod = get_mod; + pstate_infer_lib_name = infer_lib_name; + pstate_required = required; + pstate_required_syms = required_syms; } + in + iflog ps (fun _ -> log ps "made parser for: %s\n%!" fname); + ps +;; + +exception Parse_err of (pstate * string) +;; + +let lexpos (ps:pstate) : pos = + let p = ps.pstate_lexbuf.Lexing.lex_start_p in + (p.Lexing.pos_fname, + p.Lexing.pos_lnum , + (p.Lexing.pos_cnum) - (p.Lexing.pos_bol)) +;; + +let next_node_id (ps:pstate) : node_id = + let id = !(ps.pstate_node_id) in + ps.pstate_node_id := Node ((int_of_node id)+1); + id +;; + +let next_opaque_id (ps:pstate) : opaque_id = + let id = !(ps.pstate_opaque_id) in + ps.pstate_opaque_id := Opaque ((int_of_opaque id)+1); + id +;; + +let span + (ps:pstate) + (apos:pos) + (bpos:pos) + (x:'a) + : 'a identified = + let span = { lo = apos; hi = bpos } in + let id = next_node_id ps in + iflog ps (fun _ -> log ps "span for node #%d: %s" + (int_of_node id) (Session.string_of_span span)); + htab_put ps.pstate_sess.Session.sess_spans id span; + { node = x; id = id } +;; + +let decl p i = + { Ast.decl_params = p; + Ast.decl_item = i } +;; + +let spans + (ps:pstate) + (things:('a identified) array) + (apos:pos) + (thing:'a) + : ('a identified) array = + Array.append things [| (span ps apos (lexpos ps) thing) |] +;; + +(* + * The point of this is to make a new node_id entry for a node that is a + * "copy" of an lval returned from somewhere else. For example if you create + * a temp, the lval it returns can only be used in *one* place, for the + * node_id denotes the place that lval is first used; subsequent uses of + * 'the same' reference must clone_lval it into a new node_id. Otherwise + * there is trouble. + *) + +let clone_span + (ps:pstate) + (oldnode:'a identified) + (newthing:'b) + : 'b identified = + let s = Hashtbl.find ps.pstate_sess.Session.sess_spans oldnode.id in + span ps s.lo s.hi newthing +;; + +let rec clone_lval (ps:pstate) (lval:Ast.lval) : Ast.lval = + match lval with + Ast.LVAL_base nb -> + let nnb = clone_span ps nb nb.node in + Ast.LVAL_base nnb + | Ast.LVAL_ext (base, ext) -> + Ast.LVAL_ext ((clone_lval ps base), ext) +;; + +let clone_atom (ps:pstate) (atom:Ast.atom) : Ast.atom = + match atom with + Ast.ATOM_literal _ -> atom + | Ast.ATOM_lval lv -> Ast.ATOM_lval (clone_lval ps lv) +;; + +let ctxt (n:string) (f:pstate -> 'a) (ps:pstate) : 'a = + (ps.pstate_ctxt <- (n, lexpos ps) :: ps.pstate_ctxt; + let res = f ps in + ps.pstate_ctxt <- List.tl ps.pstate_ctxt; + res) +;; + +let rstr (r:bool) (f:pstate -> 'a) (ps:pstate) : 'a = + let prev = ps.pstate_rstr in + (ps.pstate_rstr <- r; + let res = f ps in + ps.pstate_rstr <- prev; + res) +;; + +let err (str:string) (ps:pstate) = + (Parse_err (ps, (str))) +;; + + +let (slot_nil:Ast.slot) = + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = false; + Ast.slot_ty = Some Ast.TY_nil } +;; + +let (slot_auto:Ast.slot) = + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = true; + Ast.slot_ty = None } +;; + +let build_tmp + (ps:pstate) + (slot:Ast.slot) + (apos:pos) + (bpos:pos) + : (temp_id * Ast.lval * Ast.stmt) = + let nonce = !(ps.pstate_temp_id) in + ps.pstate_temp_id := Temp ((int_of_temp nonce)+1); + iflog ps + (fun _ -> log ps "building temporary %d" (int_of_temp nonce)); + let decl = Ast.DECL_slot (Ast.KEY_temp nonce, (span ps apos bpos slot)) in + let declstmt = span ps apos bpos (Ast.STMT_decl decl) in + let tmp = Ast.LVAL_base (span ps apos bpos (Ast.BASE_temp nonce)) in + (nonce, tmp, declstmt) +;; + +(* Simple helpers *) + +(* FIXME: please rename these, they make eyes bleed. *) + +let arr (ls:'a list) : 'a array = Array.of_list ls ;; +let arl (ls:'a list) : 'a array = Array.of_list (List.rev ls) ;; +let arj (ar:('a array array)) = Array.concat (Array.to_list ar) ;; +let arj1st (pairs:(('a array) * 'b) array) : (('a array) * 'b array) = + let (az, bz) = List.split (Array.to_list pairs) in + (Array.concat az, Array.of_list bz) + + +(* Bottom-most parser actions. *) + +let peek (ps:pstate) : token = + iflog ps + begin + fun _ -> + log ps "peeking at: %s // %s" + (string_of_tok ps.pstate_peek) + (match ps.pstate_ctxt with + (s, _) :: _ -> s + | _ -> "<empty>") + end; + ps.pstate_peek +;; + + +let bump (ps:pstate) : unit = + begin + iflog ps (fun _ -> log ps "bumping past: %s" + (string_of_tok ps.pstate_peek)); + ps.pstate_peek <- Lexer.token ps.pstate_lexbuf + end +;; + +let bump_bracequote (ps:pstate) : unit = + begin + assert (ps.pstate_peek = LBRACE); + iflog ps (fun _ -> log ps "bumping past: %s" + (string_of_tok ps.pstate_peek)); + let buf = Buffer.create 32 in + ps.pstate_peek <- Lexer.bracequote buf 1 ps.pstate_lexbuf + end +;; + + +let expect (ps:pstate) (t:token) : unit = + let p = peek ps in + if p == t + then bump ps + else + let msg = ("Expected '" ^ (string_of_tok t) ^ + "', found '" ^ (string_of_tok p ) ^ "'") in + raise (Parse_err (ps, msg)) +;; + +let unexpected (ps:pstate) = + err ("Unexpected token '" ^ (string_of_tok (peek ps)) ^ "'") ps +;; + + + +(* Parser combinators. *) + +let one_or_more + (sep:token) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + let accum = ref [prule ps] in + while peek ps == sep + do + bump ps; + accum := (prule ps) :: !accum + done; + arl !accum +;; + +let bracketed_seq + (mandatory:int) + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + expect ps bra; + let accum = ref [] in + let dosep _ = + (match sepOpt with + None -> () + | Some tok -> + if (!accum = []) + then () + else expect ps tok) + in + while mandatory > List.length (!accum) do + dosep (); + accum := (prule ps) :: (!accum) + done; + while (not (peek ps = ket)) + do + dosep (); + accum := (prule ps) :: !accum + done; + expect ps ket; + arl !accum +;; + + +let bracketed_zero_or_more + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_seq 0 bra ket sepOpt (ctxt "bracketed_seq" prule) ps +;; + + +let paren_comma_list + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_zero_or_more LPAREN RPAREN (Some COMMA) prule ps +;; + +let bracketed_one_or_more + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_seq 1 bra ket sepOpt (ctxt "bracketed_seq" prule) ps +;; + +let bracketed_two_or_more + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_seq 2 bra ket sepOpt (ctxt "bracketed_seq" prule) ps +;; + + +let bracketed (bra:token) (ket:token) (prule:pstate -> 'a) (ps:pstate) : 'a = + expect ps bra; + let res = ctxt "bracketed" prule ps in + expect ps ket; + res +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/pexp.ml b/src/boot/fe/pexp.ml new file mode 100644 index 00000000..49eeeb5b --- /dev/null +++ b/src/boot/fe/pexp.ml @@ -0,0 +1,1354 @@ + +open Common;; +open Token;; +open Parser;; + +(* NB: pexps (parser-expressions) are only used transiently during + * parsing, static-evaluation and syntax-expansion. They're desugared + * into the general "item" AST and/or evaluated as part of the + * outermost "cexp" expressions. Expressions that can show up in source + * correspond to this loose grammar and have a wide-ish flexibility in + * *theoretical* composition; only subsets of those compositions are + * legal in various AST contexts. + * + * Desugaring on the fly is unfortunately complicated enough to require + * -- or at least "make much more convenient" -- this two-pass + * routine. + *) + +type pexp' = + PEXP_call of (pexp * pexp array) + | PEXP_spawn of (Ast.domain * pexp) + | PEXP_bind of (pexp * pexp option array) + | PEXP_rec of ((Ast.ident * pexp) array * pexp option) + | PEXP_tup of (pexp array) + | PEXP_vec of (Ast.slot * (pexp array)) + | PEXP_port + | PEXP_chan of (pexp option) + | PEXP_binop of (Ast.binop * pexp * pexp) + | PEXP_lazy_and of (pexp * pexp) + | PEXP_lazy_or of (pexp * pexp) + | PEXP_unop of (Ast.unop * pexp) + | PEXP_lval of plval + | PEXP_lit of Ast.lit + | PEXP_str of string + | PEXP_mutable of pexp + | PEXP_exterior of pexp + | PEXP_custom of Ast.name * (token array) * (string option) + +and plval = + PLVAL_ident of Ast.ident + | PLVAL_app of (Ast.ident * (Ast.ty array)) + | PLVAL_ext_name of (pexp * Ast.name_component) + | PLVAL_ext_pexp of (pexp * pexp) + +and pexp = pexp' Common.identified +;; + +(* Pexp grammar. Includes names, idents, types, constrs, binops and unops, + etc. *) + +let parse_ident (ps:pstate) : Ast.ident = + match peek ps with + IDENT id -> (bump ps; id) + (* Decay IDX tokens to identifiers if they occur ousdide name paths. *) + | IDX i -> (bump ps; string_of_tok (IDX i)) + | _ -> raise (unexpected ps) +;; + +(* Enforces the restricted pexp grammar when applicable (e.g. after "bind") *) +let check_rstr_start (ps:pstate) : 'a = + if (ps.pstate_rstr) then + match peek ps with + IDENT _ | LPAREN -> () + | _ -> raise (unexpected ps) +;; + +let rec parse_name_component (ps:pstate) : Ast.name_component = + match peek ps with + IDENT id -> + (bump ps; + match peek ps with + LBRACKET -> + let tys = + ctxt "name_component: apply" + (bracketed_one_or_more LBRACKET RBRACKET + (Some COMMA) parse_ty) ps + in + Ast.COMP_app (id, tys) + | _ -> Ast.COMP_ident id) + + | IDX i -> + bump ps; + Ast.COMP_idx i + | _ -> raise (unexpected ps) + +and parse_name_base (ps:pstate) : Ast.name_base = + match peek ps with + IDENT i -> + (bump ps; + match peek ps with + LBRACKET -> + let tys = + ctxt "name_base: apply" + (bracketed_one_or_more LBRACKET RBRACKET + (Some COMMA) parse_ty) ps + in + Ast.BASE_app (i, tys) + | _ -> Ast.BASE_ident i) + | _ -> raise (unexpected ps) + +and parse_name_ext (ps:pstate) (base:Ast.name) : Ast.name = + match peek ps with + DOT -> + bump ps; + let comps = one_or_more DOT parse_name_component ps in + Array.fold_left (fun x y -> Ast.NAME_ext (x, y)) base comps + | _ -> base + + +and parse_name (ps:pstate) : Ast.name = + let base = Ast.NAME_base (parse_name_base ps) in + let name = parse_name_ext ps base in + if Ast.sane_name name + then name + else raise (err "malformed name" ps) + +and parse_carg_base (ps:pstate) : Ast.carg_base = + match peek ps with + STAR -> bump ps; Ast.BASE_formal + | _ -> Ast.BASE_named (parse_name_base ps) + +and parse_carg (ps:pstate) : Ast.carg = + match peek ps with + IDENT _ -> + begin + let base = Ast.CARG_base (parse_carg_base ps) in + let path = + match peek ps with + DOT -> + bump ps; + let comps = one_or_more DOT parse_name_component ps in + Array.fold_left + (fun x y -> Ast.CARG_ext (x, y)) base comps + | _ -> base + in + Ast.CARG_path path + end + | _ -> + Ast.CARG_lit (parse_lit ps) + + +and parse_constraint (ps:pstate) : Ast.constr = + match peek ps with + + (* + * NB: A constraint *looks* a lot like an EXPR_call, but is restricted + * syntactically: the constraint name needs to be a name (not an lval) + * and the constraint args all need to be cargs, which are similar to + * names but can begin with the 'formal' base anchor '*'. + *) + + IDENT _ -> + let n = ctxt "constraint: name" parse_name ps in + let args = ctxt "constraint: args" + (bracketed_zero_or_more + LPAREN RPAREN (Some COMMA) + parse_carg) ps + in + { Ast.constr_name = n; + Ast.constr_args = args } + | _ -> raise (unexpected ps) + + +and parse_constrs (ps:pstate) : Ast.constrs = + ctxt "state: constraints" (one_or_more COMMA parse_constraint) ps + +and parse_optional_trailing_constrs (ps:pstate) : Ast.constrs = + match peek ps with + COLON -> (bump ps; parse_constrs ps) + | _ -> [| |] + +and parse_effect (ps:pstate) : Ast.effect = + match peek ps with + IO -> bump ps; Ast.IO + | STATE -> bump ps; Ast.STATE + | UNSAFE -> bump ps; Ast.UNSAFE + | _ -> Ast.PURE + +and parse_ty_fn + (effect:Ast.effect) + (ps:pstate) + : (Ast.ty_fn * Ast.ident option) = + match peek ps with + FN | ITER -> + let is_iter = (peek ps) = ITER in + bump ps; + let ident = + match peek ps with + IDENT i -> bump ps; Some i + | _ -> None + in + let in_slots = + match peek ps with + _ -> + bracketed_zero_or_more LPAREN RPAREN (Some COMMA) + (parse_slot_and_optional_ignored_ident true) ps + in + let out_slot = + match peek ps with + RARROW -> (bump ps; parse_slot false ps) + | _ -> slot_nil + in + let constrs = parse_optional_trailing_constrs ps in + let tsig = { Ast.sig_input_slots = in_slots; + Ast.sig_input_constrs = constrs; + Ast.sig_output_slot = out_slot; } + in + let taux = { Ast.fn_effect = effect; + Ast.fn_is_iter = is_iter; } + in + let tfn = (tsig, taux) in + (tfn, ident) + + | _ -> raise (unexpected ps) + +and check_dup_rec_labels ps labels = + arr_check_dups labels + (fun l _ -> + raise (err (Printf.sprintf + "duplicate record label: %s" l) ps)); + + +and parse_atomic_ty (ps:pstate) : Ast.ty = + match peek ps with + + BOOL -> + bump ps; + Ast.TY_bool + + | INT -> + bump ps; + Ast.TY_int + + | UINT -> + bump ps; + Ast.TY_uint + + | CHAR -> + bump ps; + Ast.TY_char + + | STR -> + bump ps; + Ast.TY_str + + | ANY -> + bump ps; + Ast.TY_any + + | TASK -> + bump ps; + Ast.TY_task + + | CHAN -> + bump ps; + Ast.TY_chan (bracketed LBRACKET RBRACKET parse_ty ps) + + | PORT -> + bump ps; + Ast.TY_port (bracketed LBRACKET RBRACKET parse_ty ps) + + | VEC -> + bump ps; + Ast.TY_vec (bracketed LBRACKET RBRACKET (parse_slot false) ps) + + | IDENT _ -> Ast.TY_named (parse_name ps) + + + | TAG -> + bump ps; + let htab = Hashtbl.create 4 in + let parse_tag_entry ps = + let ident = parse_ident ps in + let tup = + match peek ps with + LPAREN -> paren_comma_list (parse_slot false) ps + | _ -> raise (err "tag variant missing argument list" ps) + in + htab_put htab (Ast.NAME_base (Ast.BASE_ident ident)) tup + in + let _ = + bracketed_one_or_more LPAREN RPAREN + (Some COMMA) (ctxt "tag: variant" parse_tag_entry) ps + in + Ast.TY_tag htab + + | REC -> + bump ps; + let parse_rec_entry ps = + let mut = parse_mutability ps in + let (slot, ident) = parse_slot_and_ident false ps in + (ident, apply_mutability slot mut) + in + let entries = paren_comma_list parse_rec_entry ps in + let labels = Array.map (fun (l, _) -> l) entries in + begin + check_dup_rec_labels ps labels; + Ast.TY_rec entries + end + + | TUP -> + bump ps; + let slots = paren_comma_list (parse_slot false) ps in + Ast.TY_tup slots + + | MACH m -> + bump ps; + Ast.TY_mach m + + | IO | STATE | UNSAFE | OBJ | FN | ITER -> + let effect = parse_effect ps in + begin + match peek ps with + OBJ -> + bump ps; + let methods = Hashtbl.create 0 in + let parse_method ps = + let effect = parse_effect ps in + let (tfn, ident) = parse_ty_fn effect ps in + expect ps SEMI; + match ident with + None -> + raise (err (Printf.sprintf + "missing method identifier") ps) + | Some i -> htab_put methods i tfn + in + ignore (bracketed_zero_or_more LBRACE RBRACE + None parse_method ps); + Ast.TY_obj (effect, methods) + + | FN | ITER -> + Ast.TY_fn (fst (parse_ty_fn effect ps)) + | _ -> raise (unexpected ps) + end + + | LPAREN -> + begin + bump ps; + match peek ps with + RPAREN -> + bump ps; + Ast.TY_nil + | _ -> + let t = parse_ty ps in + expect ps RPAREN; + t + end + + | _ -> raise (unexpected ps) + +and flag (ps:pstate) (tok:token) : bool = + if peek ps = tok + then (bump ps; true) + else false + +and parse_mutability (ps:pstate) : bool = + flag ps MUTABLE + +and apply_mutability (slot:Ast.slot) (mut:bool) : Ast.slot = + { slot with Ast.slot_mutable = mut } + +and parse_slot (aliases_ok:bool) (ps:pstate) : Ast.slot = + let mut = parse_mutability ps in + let mode = + match (peek ps, aliases_ok) with + (AT, _) -> bump ps; Ast.MODE_exterior + | (AND, true) -> bump ps; Ast.MODE_alias + | (AND, false) -> raise (err "alias slot in prohibited context" ps) + | _ -> Ast.MODE_interior + in + let ty = parse_ty ps in + { Ast.slot_mode = mode; + Ast.slot_mutable = mut; + Ast.slot_ty = Some ty } + +and parse_slot_and_ident + (aliases_ok:bool) + (ps:pstate) + : (Ast.slot * Ast.ident) = + let slot = ctxt "slot and ident: slot" (parse_slot aliases_ok) ps in + let ident = ctxt "slot and ident: ident" parse_ident ps in + (slot, ident) + +and parse_slot_and_optional_ignored_ident + (aliases_ok:bool) + (ps:pstate) + : Ast.slot = + let slot = parse_slot aliases_ok ps in + begin + match peek ps with + IDENT _ -> bump ps + | _ -> () + end; + slot + +and parse_identified_slot + (aliases_ok:bool) + (ps:pstate) + : Ast.slot identified = + let apos = lexpos ps in + let slot = parse_slot aliases_ok ps in + let bpos = lexpos ps in + span ps apos bpos slot + +and parse_constrained_ty (ps:pstate) : Ast.ty = + let base = ctxt "ty: base" parse_atomic_ty ps in + match peek ps with + COLON -> + bump ps; + let constrs = ctxt "ty: constrs" parse_constrs ps in + Ast.TY_constrained (base, constrs) + + | _ -> base + +and parse_ty (ps:pstate) : Ast.ty = + parse_constrained_ty ps + + +and parse_rec_input (ps:pstate) : (Ast.ident * pexp) = + let lab = (ctxt "rec input: label" parse_ident ps) in + match peek ps with + EQ -> + bump ps; + let pexp = ctxt "rec input: expr" parse_pexp ps in + (lab, pexp) + | _ -> raise (unexpected ps) + + +and parse_rec_body (ps:pstate) : pexp' = (*((Ast.ident * pexp) array) =*) + begin + expect ps LPAREN; + match peek ps with + RPAREN -> PEXP_rec ([||], None) + | WITH -> raise (err "empty record extension" ps) + | _ -> + let inputs = one_or_more COMMA parse_rec_input ps in + let labels = Array.map (fun (l, _) -> l) inputs in + begin + check_dup_rec_labels ps labels; + match peek ps with + RPAREN -> (bump ps; PEXP_rec (inputs, None)) + | WITH -> + begin + bump ps; + let base = + ctxt "rec input: extension base" + parse_pexp ps + in + expect ps RPAREN; + PEXP_rec (inputs, Some base) + end + | _ -> raise (err "expected 'with' or ')'" ps) + end + end + + +and parse_lit (ps:pstate) : Ast.lit = + match peek ps with + LIT_INT (n,s) -> (bump ps; Ast.LIT_int (n,s)) + | LIT_CHAR c -> (bump ps; Ast.LIT_char c) + | LIT_BOOL b -> (bump ps; Ast.LIT_bool b) + | _ -> raise (unexpected ps) + + +and parse_bottom_pexp (ps:pstate) : pexp = + check_rstr_start ps; + let apos = lexpos ps in + match peek ps with + + MUTABLE -> + bump ps; + let inner = parse_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_mutable inner) + + | AT -> + bump ps; + let inner = parse_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_exterior inner) + + | TUP -> + bump ps; + let pexps = ctxt "paren pexps(s)" (rstr false parse_pexp_list) ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_tup pexps) + + | REC -> + bump ps; + let body = ctxt "rec pexp: rec body" parse_rec_body ps in + let bpos = lexpos ps in + span ps apos bpos body + + | VEC -> + bump ps; + begin + let slot = + match peek ps with + LBRACKET -> bracketed LBRACKET RBRACKET (parse_slot false) ps + | _ -> { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = false; + Ast.slot_ty = None } + in + let pexps = ctxt "vec pexp: exprs" parse_pexp_list ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_vec (slot, pexps)) + end + + + | LIT_STR s -> + bump ps; + let bpos = lexpos ps in + span ps apos bpos (PEXP_str s) + + | PORT -> + begin + bump ps; + expect ps LPAREN; + expect ps RPAREN; + let bpos = lexpos ps in + span ps apos bpos (PEXP_port) + end + + | CHAN -> + begin + bump ps; + let port = + match peek ps with + LPAREN -> + begin + bump ps; + match peek ps with + RPAREN -> (bump ps; None) + | _ -> + let lv = parse_pexp ps in + expect ps RPAREN; + Some lv + end + | _ -> raise (unexpected ps) + in + let bpos = lexpos ps in + span ps apos bpos (PEXP_chan port) + end + + | SPAWN -> + bump ps; + let domain = + match peek ps with + THREAD -> bump ps; Ast.DOMAIN_thread + | _ -> Ast.DOMAIN_local + in + let pexp = ctxt "spawn [domain] pexp: init call" parse_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_spawn (domain, pexp)) + + | BIND -> + let apos = lexpos ps in + begin + bump ps; + let pexp = ctxt "bind pexp: function" (rstr true parse_pexp) ps in + let args = + ctxt "bind args" + (paren_comma_list parse_bind_arg) ps + in + let bpos = lexpos ps in + span ps apos bpos (PEXP_bind (pexp, args)) + end + + | IDENT i -> + begin + bump ps; + match peek ps with + LBRACKET -> + begin + let tys = + ctxt "apply-type expr" + (bracketed_one_or_more LBRACKET RBRACKET + (Some COMMA) parse_ty) ps + in + let bpos = lexpos ps in + span ps apos bpos (PEXP_lval (PLVAL_app (i, tys))) + end + + | _ -> + begin + let bpos = lexpos ps in + span ps apos bpos (PEXP_lval (PLVAL_ident i)) + end + end + + | (INT | UINT | CHAR | BOOL) as tok -> + begin + bump ps; + expect ps LPAREN; + match peek ps with + (LIT_INT _ | LIT_CHAR _ | LIT_BOOL _) as tok2 -> + bump ps; + expect ps RPAREN; + let i = match tok2 with + LIT_INT i -> i + | LIT_CHAR c -> (Int64.of_int c, + Common.escaped_char c) + | LIT_BOOL b -> if b then (1L, "1") else (0L, "0") + | _ -> bug () "expected int/char literal" + in + let bpos = lexpos ps in + span ps apos bpos + (PEXP_lit + (match tok with + INT -> Ast.LIT_int i + | UINT -> Ast.LIT_uint i + | CHAR -> + Ast.LIT_char + (Int64.to_int (fst i)) + | BOOL -> Ast.LIT_bool (fst i <> 0L) + | _ -> bug () "expected int/uint/char/bool token")) + + | _ -> + let pexp = parse_pexp ps in + expect ps RPAREN; + let bpos = lexpos ps in + let t = + match tok with + INT -> Ast.TY_int + | UINT -> Ast.TY_uint + | CHAR -> Ast.TY_char + | BOOL -> Ast.TY_bool + | _ -> bug () "expected int/uint/char/bool token" + in + let t = span ps apos bpos t in + span ps apos bpos + (PEXP_unop ((Ast.UNOP_cast t), pexp)) + end + + | MACH m -> + let literal (num, str) = + let _ = bump ps in + let _ = expect ps RPAREN in + let bpos = lexpos ps in + let check_range (lo:int64) (hi:int64) : unit = + if (num < lo) or (num > hi) + then raise (err (Printf.sprintf + "integral literal %Ld out of range [%Ld,%Ld]" + num lo hi) ps) + else () + in + begin + match m with + TY_u8 -> check_range 0L 0xffL + | TY_u16 -> check_range 0L 0xffffL + | TY_u32 -> check_range 0L 0xffffffffL + (* | TY_u64 -> ... *) + | TY_i8 -> check_range (-128L) 127L + | TY_i16 -> check_range (-32768L) 32767L + | TY_i32 -> check_range (-2147483648L) 2147483647L + (* + | TY_i64 -> ... + | TY_f32 -> ... + | TY_f64 -> ... + *) + | _ -> () + end; + span ps apos bpos + (PEXP_lit + (Ast.LIT_mach + (m, num, str))) + + in + begin + bump ps; + expect ps LPAREN; + match peek ps with + LIT_INT (n,s) -> literal (n,s) + | MINUS -> + begin + bump ps; + match peek ps with + LIT_INT (n,s) -> + literal (Int64.neg n, "-" ^ s) + | _ -> raise (unexpected ps) + end + | _ -> + let pexp = parse_pexp ps in + expect ps RPAREN; + let bpos = lexpos ps in + let t = span ps apos bpos (Ast.TY_mach m) in + span ps apos bpos + (PEXP_unop ((Ast.UNOP_cast t), pexp)) + end + + | POUND -> + bump ps; + let name = parse_name ps in + let toks = + match peek ps with + LPAREN -> + bump ps; + let toks = Queue.create () in + while (peek ps) <> RPAREN + do + Queue.add (peek ps) toks; + bump ps; + done; + expect ps RPAREN; + queue_to_arr toks + | _ -> [| |] + in + let str = + match peek ps with + LBRACE -> + begin + bump_bracequote ps; + match peek ps with + BRACEQUOTE s -> bump ps; Some s + | _ -> raise (unexpected ps) + end + | _ -> None + in + let bpos = lexpos ps in + span ps apos bpos + (PEXP_custom (name, toks, str)) + + | LPAREN -> + begin + bump ps; + match peek ps with + RPAREN -> + bump ps; + let bpos = lexpos ps in + span ps apos bpos (PEXP_lit Ast.LIT_nil) + | _ -> + let pexp = parse_pexp ps in + expect ps RPAREN; + pexp + end + + | _ -> + let lit = parse_lit ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_lit lit) + + +and parse_bind_arg (ps:pstate) : pexp option = + match peek ps with + UNDERSCORE -> (bump ps; None) + | _ -> Some (parse_pexp ps) + + +and parse_ext_pexp (ps:pstate) (pexp:pexp) : pexp = + let apos = lexpos ps in + match peek ps with + LPAREN -> + if ps.pstate_rstr + then pexp + else + let args = parse_pexp_list ps in + let bpos = lexpos ps in + let ext = span ps apos bpos (PEXP_call (pexp, args)) in + parse_ext_pexp ps ext + + | DOT -> + begin + bump ps; + let ext = + match peek ps with + LPAREN -> + bump ps; + let rhs = rstr false parse_pexp ps in + expect ps RPAREN; + let bpos = lexpos ps in + span ps apos bpos + (PEXP_lval (PLVAL_ext_pexp (pexp, rhs))) + | _ -> + let rhs = parse_name_component ps in + let bpos = lexpos ps in + span ps apos bpos + (PEXP_lval (PLVAL_ext_name (pexp, rhs))) + in + parse_ext_pexp ps ext + end + + | _ -> pexp + + +and parse_negation_pexp (ps:pstate) : pexp = + let apos = lexpos ps in + match peek ps with + NOT -> + bump ps; + let rhs = ctxt "negation pexp" parse_negation_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_unop (Ast.UNOP_not, rhs)) + + | TILDE -> + bump ps; + let rhs = ctxt "negation pexp" parse_negation_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_unop (Ast.UNOP_bitnot, rhs)) + + | MINUS -> + bump ps; + let rhs = ctxt "negation pexp" parse_negation_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_unop (Ast.UNOP_neg, rhs)) + + | _ -> + let lhs = parse_bottom_pexp ps in + parse_ext_pexp ps lhs + + +(* Binops are all left-associative, *) +(* so we factor out some of the parsing code here. *) +and binop_rhs + (ps:pstate) + (name:string) + (apos:pos) + (lhs:pexp) + (rhs_parse_fn:pstate -> pexp) + (op:Ast.binop) + : pexp = + bump ps; + let rhs = (ctxt (name ^ " rhs") rhs_parse_fn ps) in + let bpos = lexpos ps in + span ps apos bpos (PEXP_binop (op, lhs, rhs)) + + +and parse_factor_pexp (ps:pstate) : pexp = + let name = "factor pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_negation_pexp ps in + match peek ps with + STAR -> binop_rhs ps name apos lhs parse_factor_pexp Ast.BINOP_mul + | SLASH -> binop_rhs ps name apos lhs parse_factor_pexp Ast.BINOP_div + | PERCENT -> binop_rhs ps name apos lhs parse_factor_pexp Ast.BINOP_mod + | _ -> lhs + + +and parse_term_pexp (ps:pstate) : pexp = + let name = "term pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_factor_pexp ps in + match peek ps with + PLUS -> binop_rhs ps name apos lhs parse_term_pexp Ast.BINOP_add + | MINUS -> binop_rhs ps name apos lhs parse_term_pexp Ast.BINOP_sub + | _ -> lhs + + +and parse_shift_pexp (ps:pstate) : pexp = + let name = "shift pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_term_pexp ps in + match peek ps with + LSL -> binop_rhs ps name apos lhs parse_shift_pexp Ast.BINOP_lsl + | LSR -> binop_rhs ps name apos lhs parse_shift_pexp Ast.BINOP_lsr + | ASR -> binop_rhs ps name apos lhs parse_shift_pexp Ast.BINOP_asr + | _ -> lhs + + +and parse_and_pexp (ps:pstate) : pexp = + let name = "and pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_shift_pexp ps in + match peek ps with + AND -> binop_rhs ps name apos lhs parse_and_pexp Ast.BINOP_and + | _ -> lhs + + +and parse_xor_pexp (ps:pstate) : pexp = + let name = "xor pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_and_pexp ps in + match peek ps with + CARET -> binop_rhs ps name apos lhs parse_xor_pexp Ast.BINOP_xor + | _ -> lhs + + +and parse_or_pexp (ps:pstate) : pexp = + let name = "or pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_xor_pexp ps in + match peek ps with + OR -> binop_rhs ps name apos lhs parse_or_pexp Ast.BINOP_or + | _ -> lhs + + +and parse_relational_pexp (ps:pstate) : pexp = + let name = "relational pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_or_pexp ps in + match peek ps with + LT -> binop_rhs ps name apos lhs parse_relational_pexp Ast.BINOP_lt + | LE -> binop_rhs ps name apos lhs parse_relational_pexp Ast.BINOP_le + | GE -> binop_rhs ps name apos lhs parse_relational_pexp Ast.BINOP_ge + | GT -> binop_rhs ps name apos lhs parse_relational_pexp Ast.BINOP_gt + | _ -> lhs + + +and parse_equality_pexp (ps:pstate) : pexp = + let name = "equality pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_relational_pexp ps in + match peek ps with + EQEQ -> binop_rhs ps name apos lhs parse_equality_pexp Ast.BINOP_eq + | NE -> binop_rhs ps name apos lhs parse_equality_pexp Ast.BINOP_ne + | _ -> lhs + + +and parse_andand_pexp (ps:pstate) : pexp = + let name = "andand pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_equality_pexp ps in + match peek ps with + ANDAND -> + bump ps; + let rhs = parse_andand_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_lazy_and (lhs, rhs)) + + | _ -> lhs + + +and parse_oror_pexp (ps:pstate) : pexp = + let name = "oror pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_andand_pexp ps in + match peek ps with + OROR -> + bump ps; + let rhs = parse_oror_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_lazy_or (lhs, rhs)) + + | _ -> lhs + +and parse_as_pexp (ps:pstate) : pexp = + let apos = lexpos ps in + let pexp = ctxt "as pexp" parse_oror_pexp ps in + match peek ps with + AS -> + bump ps; + let tapos = lexpos ps in + let t = parse_ty ps in + let bpos = lexpos ps in + let t = span ps tapos bpos t in + span ps apos bpos + (PEXP_unop ((Ast.UNOP_cast t), pexp)) + + | _ -> pexp + +and parse_pexp (ps:pstate) : pexp = + parse_as_pexp ps + + +and parse_pexp_list (ps:pstate) : pexp array = + match peek ps with + LPAREN -> + bracketed_zero_or_more LPAREN RPAREN (Some COMMA) + (ctxt "pexp list" parse_pexp) ps + | _ -> raise (unexpected ps) + +;; + +(* + * FIXME: This is a crude approximation of the syntax-extension system, + * for purposes of prototyping and/or hard-wiring any extensions we + * wish to use in the bootstrap compiler. The eventual aim is to permit + * loading rust crates to process extensions, but this will likely + * require a rust-based frontend, or an ocaml-FFI-based connection to + * rust crates. At the moment we have neither. + *) + +let expand_pexp_custom + (ps:pstate) + (name:Ast.name) + (args:token array) + (body:string option) + : pexp' = + let nstr = Ast.fmt_to_str Ast.fmt_name name in + match (nstr, (Array.length args), body) with + + ("shell", 0, Some cmd) -> + let c = Unix.open_process_in cmd in + let b = Buffer.create 32 in + let rec r _ = + try + Buffer.add_char b (input_char c); + r () + with + End_of_file -> + ignore (Unix.close_process_in c); + Buffer.contents b + in + PEXP_str (r ()) + + | _ -> + raise (err ("unsupported syntax extension: " ^ nstr) ps) +;; + +(* + * Desugarings depend on context: + * + * - If a pexp is used on the RHS of an assignment, it's turned into + * an initialization statement such as STMT_init_rec or such. This + * removes the possibility of initializing into a temp only to + * copy out. If the topmost pexp in such a desugaring is an atom, + * unop or binop, of course, it will still just emit a STMT_copy + * on a primitive expression. + * + * - If a pexp is used in the context where an atom is required, a + * statement declaring a temporary and initializing it with the + * result of the pexp is prepended, and the temporary atom is used. + *) + +let rec desugar_lval (ps:pstate) (pexp:pexp) : (Ast.stmt array * Ast.lval) = + let s = Hashtbl.find ps.pstate_sess.Session.sess_spans pexp.id in + let (apos, bpos) = (s.lo, s.hi) in + match pexp.node with + + PEXP_lval (PLVAL_ident ident) -> + let nb = span ps apos bpos (Ast.BASE_ident ident) in + ([||], Ast.LVAL_base nb) + + | PEXP_lval (PLVAL_app (ident, tys)) -> + let nb = span ps apos bpos (Ast.BASE_app (ident, tys)) in + ([||], Ast.LVAL_base nb) + + | PEXP_lval (PLVAL_ext_name (base_pexp, comp)) -> + let (base_stmts, base_atom) = desugar_expr_atom ps base_pexp in + let base_lval = atom_lval ps base_atom in + (base_stmts, Ast.LVAL_ext (base_lval, Ast.COMP_named comp)) + + | PEXP_lval (PLVAL_ext_pexp (base_pexp, ext_pexp)) -> + let (base_stmts, base_atom) = desugar_expr_atom ps base_pexp in + let (ext_stmts, ext_atom) = desugar_expr_atom ps ext_pexp in + let base_lval = atom_lval ps base_atom in + (Array.append base_stmts ext_stmts, + Ast.LVAL_ext (base_lval, Ast.COMP_atom (clone_atom ps ext_atom))) + + | _ -> + let (stmts, atom) = desugar_expr_atom ps pexp in + (stmts, atom_lval ps atom) + + +and desugar_expr + (ps:pstate) + (pexp:pexp) + : (Ast.stmt array * Ast.expr) = + match pexp.node with + + PEXP_unop (op, pe) -> + let (stmts, at) = desugar_expr_atom ps pe in + (stmts, Ast.EXPR_unary (op, at)) + + | PEXP_binop (op, lhs, rhs) -> + let (lhs_stmts, lhs_atom) = desugar_expr_atom ps lhs in + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + (Array.append lhs_stmts rhs_stmts, + Ast.EXPR_binary (op, lhs_atom, rhs_atom)) + + | _ -> + let (stmts, at) = desugar_expr_atom ps pexp in + (stmts, Ast.EXPR_atom at) + + +and desugar_opt_expr_atom + (ps:pstate) + (po:pexp option) + : (Ast.stmt array * Ast.atom option) = + match po with + None -> ([| |], None) + | Some pexp -> + let (stmts, atom) = desugar_expr_atom ps pexp in + (stmts, Some atom) + + +and desugar_expr_atom + (ps:pstate) + (pexp:pexp) + : (Ast.stmt array * Ast.atom) = + let s = Hashtbl.find ps.pstate_sess.Session.sess_spans pexp.id in + let (apos, bpos) = (s.lo, s.hi) in + match pexp.node with + + PEXP_unop _ + | PEXP_binop _ + | PEXP_lazy_or _ + | PEXP_lazy_and _ + | PEXP_rec _ + | PEXP_tup _ + | PEXP_str _ + | PEXP_vec _ + | PEXP_port + | PEXP_chan _ + | PEXP_call _ + | PEXP_bind _ + | PEXP_spawn _ -> + let (_, tmp, decl_stmt) = build_tmp ps slot_auto apos bpos in + let stmts = desugar_expr_init ps tmp pexp in + (Array.append [| decl_stmt |] stmts, + Ast.ATOM_lval (clone_lval ps tmp)) + + | PEXP_lit lit -> + ([||], Ast.ATOM_literal (span ps apos bpos lit)) + + | PEXP_lval _ -> + let (stmts, lval) = desugar_lval ps pexp in + (stmts, Ast.ATOM_lval lval) + + | PEXP_exterior _ -> + raise (err "exterior symbol in atom context" ps) + + | PEXP_mutable _ -> + raise (err "mutable keyword in atom context" ps) + + | PEXP_custom (n, a, b) -> + desugar_expr_atom ps + { pexp with node = expand_pexp_custom ps n a b } + + +and desugar_expr_mode_mut_atom + (ps:pstate) + (pexp:pexp) + : (Ast.stmt array * (Ast.mode * bool * Ast.atom)) = + let desugar_inner mode mut e = + let (stmts, atom) = desugar_expr_atom ps e in + (stmts, (mode, mut, atom)) + in + match pexp.node with + PEXP_mutable {node=(PEXP_exterior e); id=_} -> + desugar_inner Ast.MODE_exterior true e + | PEXP_exterior e -> + desugar_inner Ast.MODE_exterior false e + | PEXP_mutable e -> + desugar_inner Ast.MODE_interior true e + | _ -> + desugar_inner Ast.MODE_interior false pexp + +and desugar_expr_atoms + (ps:pstate) + (pexps:pexp array) + : (Ast.stmt array * Ast.atom array) = + arj1st (Array.map (desugar_expr_atom ps) pexps) + +and desugar_opt_expr_atoms + (ps:pstate) + (pexps:pexp option array) + : (Ast.stmt array * Ast.atom option array) = + arj1st (Array.map (desugar_opt_expr_atom ps) pexps) + +and desugar_expr_mode_mut_atoms + (ps:pstate) + (pexps:pexp array) + : (Ast.stmt array * (Ast.mode * bool * Ast.atom) array) = + arj1st (Array.map (desugar_expr_mode_mut_atom ps) pexps) + +and desugar_expr_init + (ps:pstate) + (dst_lval:Ast.lval) + (pexp:pexp) + : (Ast.stmt array) = + let s = Hashtbl.find ps.pstate_sess.Session.sess_spans pexp.id in + let (apos, bpos) = (s.lo, s.hi) in + + (* Helpers. *) + let ss x = span ps apos bpos x in + let cp v = Ast.STMT_copy (clone_lval ps dst_lval, v) in + let aa x y = Array.append x y in + let ac xs = Array.concat xs in + + match pexp.node with + + PEXP_lit _ + | PEXP_lval _ -> + let (stmts, atom) = desugar_expr_atom ps pexp in + aa stmts [| ss (cp (Ast.EXPR_atom atom)) |] + + | PEXP_binop (op, lhs, rhs) -> + let (lhs_stmts, lhs_atom) = desugar_expr_atom ps lhs in + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + let copy_stmt = + ss (cp (Ast.EXPR_binary (op, lhs_atom, rhs_atom))) + in + ac [ lhs_stmts; rhs_stmts; [| copy_stmt |] ] + + (* x = a && b ==> if (a) { x = b; } else { x = false; } *) + + | PEXP_lazy_and (lhs, rhs) -> + let (lhs_stmts, lhs_atom) = desugar_expr_atom ps lhs in + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + let sthen = + ss (aa rhs_stmts [| ss (cp (Ast.EXPR_atom rhs_atom)) |]) + in + let selse = + ss [| ss (cp (Ast.EXPR_atom + (Ast.ATOM_literal (ss (Ast.LIT_bool false))))) |] + in + let sif = + ss (Ast.STMT_if { Ast.if_test = Ast.EXPR_atom lhs_atom; + Ast.if_then = sthen; + Ast.if_else = Some selse }) + in + aa lhs_stmts [| sif |] + + (* x = a || b ==> if (a) { x = true; } else { x = b; } *) + + | PEXP_lazy_or (lhs, rhs) -> + let (lhs_stmts, lhs_atom) = desugar_expr_atom ps lhs in + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + let sthen = + ss [| ss (cp (Ast.EXPR_atom + (Ast.ATOM_literal (ss (Ast.LIT_bool true))))) |] + in + let selse = + ss (aa rhs_stmts [| ss (cp (Ast.EXPR_atom rhs_atom)) |]) + in + let sif = + ss (Ast.STMT_if { Ast.if_test = Ast.EXPR_atom lhs_atom; + Ast.if_then = sthen; + Ast.if_else = Some selse }) + in + aa lhs_stmts [| sif |] + + + | PEXP_unop (op, rhs) -> + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + let expr = Ast.EXPR_unary (op, rhs_atom) in + let copy_stmt = ss (cp expr) in + aa rhs_stmts [| copy_stmt |] + + | PEXP_call (fn, args) -> + let (fn_stmts, fn_atom) = desugar_expr_atom ps fn in + let (arg_stmts, arg_atoms) = desugar_expr_atoms ps args in + let fn_lval = atom_lval ps fn_atom in + let call_stmt = ss (Ast.STMT_call (dst_lval, fn_lval, arg_atoms)) in + ac [ fn_stmts; arg_stmts; [| call_stmt |] ] + + | PEXP_bind (fn, args) -> + let (fn_stmts, fn_atom) = desugar_expr_atom ps fn in + let (arg_stmts, arg_atoms) = desugar_opt_expr_atoms ps args in + let fn_lval = atom_lval ps fn_atom in + let bind_stmt = ss (Ast.STMT_bind (dst_lval, fn_lval, arg_atoms)) in + ac [ fn_stmts; arg_stmts; [| bind_stmt |] ] + + | PEXP_spawn (domain, sub) -> + begin + match sub.node with + PEXP_call (fn, args) -> + let (fn_stmts, fn_atom) = desugar_expr_atom ps fn in + let (arg_stmts, arg_atoms) = desugar_expr_atoms ps args in + let fn_lval = atom_lval ps fn_atom in + let spawn_stmt = + ss (Ast.STMT_spawn (dst_lval, domain, fn_lval, arg_atoms)) + in + ac [ fn_stmts; arg_stmts; [| spawn_stmt |] ] + | _ -> raise (err "non-call spawn" ps) + end + + | PEXP_rec (args, base) -> + let (arg_stmts, entries) = + arj1st + begin + Array.map + begin + fun (ident, pexp) -> + let (stmts, (mode, mut, atom)) = + desugar_expr_mode_mut_atom ps pexp + in + (stmts, (ident, mode, mut, atom)) + end + args + end + in + begin + match base with + Some base -> + let (base_stmts, base_lval) = desugar_lval ps base in + let rec_stmt = + ss (Ast.STMT_init_rec + (dst_lval, entries, Some base_lval)) + in + ac [ arg_stmts; base_stmts; [| rec_stmt |] ] + | None -> + let rec_stmt = + ss (Ast.STMT_init_rec (dst_lval, entries, None)) + in + aa arg_stmts [| rec_stmt |] + end + + | PEXP_tup args -> + let (arg_stmts, arg_mode_atoms) = + desugar_expr_mode_mut_atoms ps args + in + let stmt = ss (Ast.STMT_init_tup (dst_lval, arg_mode_atoms)) in + aa arg_stmts [| stmt |] + + | PEXP_str s -> + let stmt = ss (Ast.STMT_init_str (dst_lval, s)) in + [| stmt |] + + | PEXP_vec (slot, args) -> + let (arg_stmts, arg_atoms) = desugar_expr_atoms ps args in + let stmt = ss (Ast.STMT_init_vec (dst_lval, slot, arg_atoms)) in + aa arg_stmts [| stmt |] + + | PEXP_port -> + [| ss (Ast.STMT_init_port dst_lval) |] + + | PEXP_chan pexp_opt -> + let (port_stmts, port_opt) = + match pexp_opt with + None -> ([||], None) + | Some port_pexp -> + begin + let (port_stmts, port_atom) = + desugar_expr_atom ps port_pexp + in + let port_lval = atom_lval ps port_atom in + (port_stmts, Some port_lval) + end + in + let chan_stmt = + ss + (Ast.STMT_init_chan (dst_lval, port_opt)) + in + aa port_stmts [| chan_stmt |] + + | PEXP_exterior _ -> + raise (err "exterior symbol in initialiser context" ps) + + | PEXP_mutable _ -> + raise (err "mutable keyword in initialiser context" ps) + + | PEXP_custom (n, a, b) -> + desugar_expr_init ps dst_lval + { pexp with node = expand_pexp_custom ps n a b } + + +and atom_lval (ps:pstate) (at:Ast.atom) : Ast.lval = + match at with + Ast.ATOM_lval lv -> lv + | Ast.ATOM_literal _ -> raise (err "literal where lval expected" ps) +;; + + + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/token.ml b/src/boot/fe/token.ml new file mode 100644 index 00000000..636e1ac2 --- /dev/null +++ b/src/boot/fe/token.ml @@ -0,0 +1,308 @@ +type token = + + (* Expression operator symbols *) + PLUS + | MINUS + | STAR + | SLASH + | PERCENT + | EQ + | LT + | LE + | EQEQ + | NE + | GE + | GT + | NOT + | TILDE + | CARET + | AND + | ANDAND + | OR + | OROR + | LSL + | LSR + | ASR + | OPEQ of token + | AS + | WITH + + (* Structural symbols *) + | AT + | DOT + | COMMA + | SEMI + | COLON + | RARROW + | SEND + | LARROW + | LPAREN + | RPAREN + | LBRACKET + | RBRACKET + | LBRACE + | RBRACE + + (* Module and crate keywords *) + | MOD + | USE + | AUTH + | META + + (* Metaprogramming keywords *) + | SYNTAX + | POUND + + (* Statement keywords *) + | IF + | ELSE + | DO + | WHILE + | ALT + | CASE + + | FAIL + | DROP + + | IN + | FOR + | EACH + | PUT + | RET + | BE + + (* Type and type-state keywords *) + | TYPE + | CHECK + | CLAIM + | PROVE + + (* Effect keywords *) + | IO + | STATE + | UNSAFE + + (* Type qualifiers *) + | NATIVE + | AUTO + | MUTABLE + + (* Name management *) + | IMPORT + | EXPORT + + (* Value / stmt declarators *) + | LET + + (* Magic runtime services *) + | LOG + | SPAWN + | BIND + | THREAD + | YIELD + | JOIN + + (* Literals *) + | LIT_INT of (int64 * string) + | LIT_FLO of string + | LIT_STR of string + | LIT_CHAR of int + | LIT_BOOL of bool + + (* Name components *) + | IDENT of string + | IDX of int + | UNDERSCORE + + (* Reserved type names *) + | BOOL + | INT + | UINT + | CHAR + | STR + | MACH of Common.ty_mach + + (* Algebraic type constructors *) + | REC + | TUP + | TAG + | VEC + | ANY + + (* Callable type constructors *) + | FN + | ITER + + (* Object type *) + | OBJ + + (* Comm and task types *) + | CHAN + | PORT + | TASK + + | EOF + + | BRACEQUOTE of string + +;; + +let rec string_of_tok t = + match t with + (* Operator symbols (mostly) *) + PLUS -> "+" + | MINUS -> "-" + | STAR -> "*" + | SLASH -> "/" + | PERCENT -> "%" + | EQ -> "=" + | LT -> "<" + | LE -> "<=" + | EQEQ -> "==" + | NE -> "!=" + | GE -> ">=" + | GT -> ">" + | TILDE -> "~" + | CARET -> "^" + | NOT -> "!" + | AND -> "&" + | ANDAND -> "&&" + | OR -> "|" + | OROR -> "||" + | LSL -> "<<" + | LSR -> ">>" + | ASR -> ">>>" + | OPEQ op -> string_of_tok op ^ "=" + | AS -> "as" + | WITH -> "with" + + (* Structural symbols *) + | AT -> "@" + | DOT -> "." + | COMMA -> "," + | SEMI -> ";" + | COLON -> ":" + | RARROW -> "->" + | SEND -> "<|" + | LARROW -> "<-" + | LPAREN -> "(" + | RPAREN -> ")" + | LBRACKET -> "[" + | RBRACKET -> "]" + | LBRACE -> "{" + | RBRACE -> "}" + + (* Module and crate keywords *) + | MOD -> "mod" + | USE -> "use" + | AUTH -> "auth" + + (* Metaprogramming keywords *) + | SYNTAX -> "syntax" + | META -> "meta" + | POUND -> "#" + + (* Control-flow keywords *) + | IF -> "if" + | ELSE -> "else" + | DO -> "do" + | WHILE -> "while" + | ALT -> "alt" + | CASE -> "case" + + | FAIL -> "fail" + | DROP -> "drop" + + | IN -> "in" + | FOR -> "for" + | EACH -> "each" + | PUT -> "put" + | RET -> "ret" + | BE -> "be" + + (* Type and type-state keywords *) + | TYPE -> "type" + | CHECK -> "check" + | CLAIM -> "claim" + | PROVE -> "prove" + + (* Effect keywords *) + | IO -> "io" + | STATE -> "state" + | UNSAFE -> "unsafe" + + (* Type qualifiers *) + | NATIVE -> "native" + | AUTO -> "auto" + | MUTABLE -> "mutable" + + (* Name management *) + | IMPORT -> "import" + | EXPORT -> "export" + + (* Value / stmt declarators. *) + | LET -> "let" + + (* Magic runtime services *) + | LOG -> "log" + | SPAWN -> "spawn" + | BIND -> "bind" + | THREAD -> "thread" + | YIELD -> "yield" + | JOIN -> "join" + + (* Literals *) + | LIT_INT (_,s) -> s + | LIT_FLO n -> n + | LIT_STR s -> ("\"" ^ (String.escaped s) ^ "\"") + | LIT_CHAR c -> ("'" ^ (Common.escaped_char c) ^ "'") + | LIT_BOOL b -> if b then "true" else "false" + + (* Name components *) + | IDENT s -> s + | IDX i -> ("_" ^ (string_of_int i)) + | UNDERSCORE -> "_" + + (* Reserved type names *) + | BOOL -> "bool" + | INT -> "int" + | UINT -> "uint" + | CHAR -> "char" + | STR -> "str" + | MACH m -> Common.string_of_ty_mach m + + (* Algebraic type constructors *) + | REC -> "rec" + | TUP -> "tup" + | TAG -> "tag" + | VEC -> "vec" + | ANY -> "any" + + (* Callable type constructors *) + | FN -> "fn" + | ITER -> "fn" + + (* Object type *) + | OBJ -> "obj" + + (* Ports and channels *) + | CHAN -> "chan" + | PORT -> "port" + + (* Taskess types *) + | TASK -> "task" + + | BRACEQUOTE _ -> "{...bracequote...}" + + | EOF -> "<EOF>" +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) |