diff options
| author | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
|---|---|---|
| committer | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
| commit | d6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch) | |
| tree | b425187e232966063ffc2f0d14c04a55d8f004ef /src/boot | |
| parent | Initial git commit. (diff) | |
| download | rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip | |
Populate tree.
Diffstat (limited to 'src/boot')
40 files changed, 33169 insertions, 0 deletions
diff --git a/src/boot/be/abi.ml b/src/boot/be/abi.ml new file mode 100644 index 00000000..fd9ca750 --- /dev/null +++ b/src/boot/be/abi.ml @@ -0,0 +1,207 @@ + +(* + * The 'abi' structure is pretty much just a grab-bag of machine + * dependencies and structure-layout information. Part of the latter + * is shared with trans and semant. + * + * Make some attempt to factor it as time goes by. + *) + +(* Word offsets for structure fields in rust-internal.h, and elsewhere in + compiler. *) + +let rc_base_field_refcnt = 0;; + +let task_field_refcnt = rc_base_field_refcnt;; +let task_field_stk = task_field_refcnt + 1;; +let task_field_runtime_sp = task_field_stk + 1;; +let task_field_rust_sp = task_field_runtime_sp + 1;; +let task_field_gc_alloc_chain = task_field_rust_sp + 1;; +let task_field_dom = task_field_gc_alloc_chain + 1;; +let n_visible_task_fields = task_field_dom + 1;; + +let dom_field_interrupt_flag = 0;; + +let frame_glue_fns_field_mark = 0;; +let frame_glue_fns_field_drop = 1;; +let frame_glue_fns_field_reloc = 2;; + +let exterior_rc_slot_field_refcnt = 0;; +let exterior_rc_slot_field_body = 1;; + +let exterior_gc_slot_field_next = (-2);; +let exterior_gc_slot_field_ctrl = (-1);; +let exterior_gc_slot_field_refcnt = 0;; +let exterior_gc_slot_field_body = 1;; + +let exterior_rc_header_size = 1;; +let exterior_gc_header_size = 3;; + +let exterior_gc_malloc_return_adjustment = 2;; + +let stk_field_valgrind_id = 0 + 1;; +let stk_field_limit = stk_field_valgrind_id + 1;; +let stk_field_data = stk_field_limit + 1;; + +let binding_size = 2;; +let binding_field_item = 0;; +let binding_field_binding = 1;; + +let general_code_alignment = 16;; + +let tydesc_field_first_param = 0;; +let tydesc_field_size = 1;; +let tydesc_field_align = 2;; +let tydesc_field_copy_glue = 3;; +let tydesc_field_drop_glue = 4;; +let tydesc_field_free_glue = 5;; +let tydesc_field_mark_glue = 6;; +let tydesc_field_obj_drop_glue = 7;; + +let vec_elt_rc = 0;; +let vec_elt_alloc = 1;; +let vec_elt_fill = 2;; +let vec_elt_data = 3;; + +let calltup_elt_out_ptr = 0;; +let calltup_elt_task_ptr = 1;; +let calltup_elt_ty_params = 2;; +let calltup_elt_args = 3;; +let calltup_elt_iterator_args = 4;; +let calltup_elt_indirect_args = 5;; + +let iterator_args_elt_block_fn = 0;; +let iterator_args_elt_outer_frame_ptr = 1;; + +let indirect_args_elt_closure = 0;; + +(* ty_params, src, dst, tydesc, taskptr. *) +let worst_case_glue_call_args = 5;; + +type abi = + { + abi_word_sz: int64; + abi_word_bits: Il.bits; + abi_word_ty: Common.ty_mach; + + abi_is_2addr_machine: bool; + abi_has_pcrel_data: bool; + abi_has_pcrel_code: bool; + + abi_n_hardregs: int; + abi_str_of_hardreg: (int -> string); + + abi_prealloc_quad: (Il.quad' -> Il.quad'); + abi_constrain_vregs: (Il.quad -> Bits.t array -> unit); + + abi_emit_fn_prologue: (Il.emitter + -> Common.size (* framesz *) + -> Common.size (* callsz *) + -> Common.nabi + -> Common.fixup (* grow_task *) + -> unit); + + abi_emit_fn_epilogue: (Il.emitter -> unit); + + abi_emit_fn_tail_call: (Il.emitter + -> int64 (* caller_callsz *) + -> int64 (* caller_argsz *) + -> Il.code (* callee_code *) + -> int64 (* callee_argsz *) + -> unit); + + abi_clobbers: (Il.quad -> Il.hreg list); + + abi_emit_native_call: (Il.emitter + -> Il.cell (* ret *) + -> Common.nabi + -> Common.fixup (* callee *) + -> Il.operand array (* args *) + -> unit); + + abi_emit_native_void_call: (Il.emitter + -> Common.nabi + -> Common.fixup (* callee *) + -> Il.operand array (* args *) + -> unit); + + abi_emit_native_call_in_thunk: (Il.emitter + -> Il.cell (* ret *) + -> Common.nabi + -> Il.operand (* callee *) + -> Il.operand array (* args *) + -> unit); + abi_emit_inline_memcpy: (Il.emitter + -> int64 (* n_bytes *) + -> Il.reg (* dst_ptr *) + -> Il.reg (* src_ptr *) + -> Il.reg (* tmp_reg *) + -> bool (* ascending *) + -> unit); + + (* Global glue. *) + abi_activate: (Il.emitter -> unit); + abi_yield: (Il.emitter -> unit); + abi_unwind: (Il.emitter -> Common.nabi -> Common.fixup -> unit); + abi_get_next_pc_thunk: + ((Il.reg (* output *) + * Common.fixup (* thunk in objfile *) + * (Il.emitter -> unit)) (* fn to make thunk *) + option); + + abi_sp_reg: Il.reg; + abi_fp_reg: Il.reg; + abi_dwarf_fp_reg: int; + abi_tp_cell: Il.cell; + abi_implicit_args_sz: int64; + abi_frame_base_sz: int64; + abi_frame_info_sz: int64; + abi_spill_slot: (Il.spill -> Il.mem); + } +;; + +let load_fixup_addr + (e:Il.emitter) + (out_reg:Il.reg) + (fix:Common.fixup) + (rty:Il.referent_ty) + : unit = + + let cell = Il.Reg (out_reg, Il.AddrTy rty) in + let op = Il.ImmPtr (fix, rty) in + Il.emit e (Il.lea cell op); +;; + +let load_fixup_codeptr + (e:Il.emitter) + (out_reg:Il.reg) + (fixup:Common.fixup) + (has_pcrel_code:bool) + (indirect:bool) + : Il.code = + if indirect + then + begin + load_fixup_addr e out_reg fixup (Il.ScalarTy (Il.AddrTy Il.CodeTy)); + Il.CodePtr (Il.Cell (Il.Mem (Il.RegIn (out_reg, None), + Il.ScalarTy (Il.AddrTy Il.CodeTy)))) + end + else + if has_pcrel_code + then (Il.CodePtr (Il.ImmPtr (fixup, Il.CodeTy))) + else + begin + load_fixup_addr e out_reg fixup Il.CodeTy; + Il.CodePtr (Il.Cell (Il.Reg (out_reg, Il.AddrTy Il.CodeTy))) + end +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/asm.ml b/src/boot/be/asm.ml new file mode 100644 index 00000000..10b2142a --- /dev/null +++ b/src/boot/be/asm.ml @@ -0,0 +1,755 @@ +(* + + Our assembler is an all-at-once, buffer-in-memory job, very simple + minded. I have 1gb of memory on my laptop: I don't expect to ever + emit a program that large with this code. + + It is based on the 'frag' type, which has a variant for every major + type of machine-blob we know how to write (bytes, zstrings, BSS + blocks, words of various sorts). + + A frag can contain symbolic references between the sub-parts of + it. These are accomplished through ref cells we call fixups, and a + 2-pass (resolution and writing) process defined recursively over + the frag structure. + + Fixups are defined by wrapping a frag in a DEF pseudo-frag with + a fixup attached. This will record information about the wrapped + frag -- positions and sizes -- in the fixup during resolution. + + We say "positions" and "sizes" there, in plural, because both a + file number and a memory number is recorded for each concept. + + File numbers refer to positions and sizes in the file we're + generating, and are based on the native int type for the host + platform -- usually 31 or 62 bits -- whereas the expressions that + *use* position fixups tend to promote them up to 32 or 64 bits + somehow. On a 32 bit platform, you can't generate output buffers + with 64-bit positions (ocaml limitation!) + + Memory numbers are 64 bit, always, and refer to sizes and positions + of frags when they are loaded into memory in the target. When + you're generating code for a 32-bit target, or using a memory + number in a context that's less than 64 bits, the value is + range-checked and truncated. But in all other respects, we imagine + a 32-bit address space is just the prefix of the continuing 64-bit + address space. If you need to pin an object at a particular place + from the point 2^32-1, say, you will need to do arithmetic and use + the MEMPOS pseudo-frag, that sets the current memory position as + it's being processed. + + Fixups can be *used* anywhere else in the frag tree, as many times + as you like. If you try to write an unresolved fixup, the emitter + faults. When you specify the use of a fixup, you need to specify + whether you want to use its file size, file position, memory size, + or memory position. + + Positions, addresses, sizes and such, of course, are in bytes. + + Expressions are evaluated to an int64 (signed), even if the + expression is an int32 or less. Depending on how you use the result + of the expression, a range check error may fire (for example, if + the expression evaluates to -2^24 and you're emitting a word16). + + Word endianness is per-file. At the moment this seems acceptable. + + Because we want to be *very specific* about the time and place + arithmetic promotions occur, we define two separate expression-tree + types (with the same polymorphic constructors) and two separate + evaluation functions, with an explicit operator for marking the + promotion-points. + +*) + +open Common;; + + +let log (sess:Session.sess) = + Session.log "asm" + sess.Session.sess_log_asm + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_asm + then thunk () + else () +;; + +exception Bad_fit of string;; +exception Undef_sym of string;; + +type ('a, 'b) expr = + IMM of 'a + | ADD of (('a, 'b) expr) * (('a, 'b) expr) + | SUB of (('a, 'b) expr) * (('a, 'b) expr) + | MUL of (('a, 'b) expr) * (('a, 'b) expr) + | DIV of (('a, 'b) expr) * (('a, 'b) expr) + | REM of (('a, 'b) expr) * (('a, 'b) expr) + | MAX of (('a, 'b) expr) * (('a, 'b) expr) + | ALIGN of (('a, 'b) expr) * (('a, 'b) expr) + | SLL of (('a, 'b) expr) * int + | SLR of (('a, 'b) expr) * int + | SAR of (('a, 'b) expr) * int + | AND of (('a, 'b) expr) * (('a, 'b) expr) + | XOR of (('a, 'b) expr) * (('a, 'b) expr) + | OR of (('a, 'b) expr) * (('a, 'b) expr) + | NOT of (('a, 'b) expr) + | NEG of (('a, 'b) expr) + | F_POS of fixup + | F_SZ of fixup + | M_POS of fixup + | M_SZ of fixup + | EXT of 'b + +type expr32 = (int32, int) expr +;; + +type expr64 = (int64, expr32) expr +;; + + +let rec eval32 (e:expr32) + : int32 = + let chop64 kind name v = + let x = Int64.to_int32 v in + if (Int64.compare v (Int64.of_int32 x)) = 0 then + x + else raise (Bad_fit (kind + ^ " fixup " + ^ name + ^ " overflowed 32 bits in eval32: " + ^ Int64.to_string v)) + in + let expandInt _ _ v = Int32.of_int v in + let checkdef kind name v inj = + match v with + None -> + raise (Undef_sym (kind ^ " fixup " ^ name + ^ " undefined in eval32")) + | Some x -> inj kind name x + in + match e with + IMM i -> i + | ADD (a, b) -> Int32.add (eval32 a) (eval32 b) + | SUB (a, b) -> Int32.sub (eval32 a) (eval32 b) + | MUL (a, b) -> Int32.mul (eval32 a) (eval32 b) + | DIV (a, b) -> Int32.div (eval32 a) (eval32 b) + | REM (a, b) -> Int32.rem (eval32 a) (eval32 b) + | MAX (a, b) -> i32_max (eval32 a) (eval32 b) + | ALIGN (a, b) -> i32_align (eval32 a) (eval32 b) + | SLL (a, b) -> Int32.shift_left (eval32 a) b + | SLR (a, b) -> Int32.shift_right_logical (eval32 a) b + | SAR (a, b) -> Int32.shift_right (eval32 a) b + | AND (a, b) -> Int32.logand (eval32 a) (eval32 b) + | XOR (a, b) -> Int32.logxor (eval32 a) (eval32 b) + | OR (a, b) -> Int32.logor (eval32 a) (eval32 b) + | NOT a -> Int32.lognot (eval32 a) + | NEG a -> Int32.neg (eval32 a) + | F_POS f -> + checkdef "file position" + f.fixup_name f.fixup_file_pos expandInt + | F_SZ f -> + checkdef "file size" + f.fixup_name f.fixup_file_sz expandInt + | M_POS f -> + checkdef "mem position" + f.fixup_name f.fixup_mem_pos chop64 + | M_SZ f -> + checkdef "mem size" f.fixup_name f.fixup_mem_sz chop64 + | EXT i -> Int32.of_int i +;; + +let rec eval64 (e:expr64) + : int64 = + let checkdef kind name v inj = + match v with + None -> + raise (Undef_sym (kind ^ " fixup '" + ^ name ^ "' undefined in eval64")) + | Some x -> inj x + in + match e with + IMM i -> i + | ADD (a, b) -> Int64.add (eval64 a) (eval64 b) + | SUB (a, b) -> Int64.sub (eval64 a) (eval64 b) + | MUL (a, b) -> Int64.mul (eval64 a) (eval64 b) + | DIV (a, b) -> Int64.div (eval64 a) (eval64 b) + | REM (a, b) -> Int64.rem (eval64 a) (eval64 b) + | MAX (a, b) -> i64_max (eval64 a) (eval64 b) + | ALIGN (a, b) -> i64_align (eval64 a) (eval64 b) + | SLL (a, b) -> Int64.shift_left (eval64 a) b + | SLR (a, b) -> Int64.shift_right_logical (eval64 a) b + | SAR (a, b) -> Int64.shift_right (eval64 a) b + | AND (a, b) -> Int64.logand (eval64 a) (eval64 b) + | XOR (a, b) -> Int64.logxor (eval64 a) (eval64 b) + | OR (a, b) -> Int64.logor (eval64 a) (eval64 b) + | NOT a -> Int64.lognot (eval64 a) + | NEG a -> Int64.neg (eval64 a) + | F_POS f -> + checkdef "file position" + f.fixup_name f.fixup_file_pos Int64.of_int + | F_SZ f -> + checkdef "file size" + f.fixup_name f.fixup_file_sz Int64.of_int + | M_POS f -> + checkdef "mem position" + f.fixup_name f.fixup_mem_pos (fun x -> x) + | M_SZ f -> + checkdef "mem size" + f.fixup_name f.fixup_mem_sz (fun x -> x) + | EXT e -> Int64.of_int32 (eval32 e) +;; + + +type frag = + MARK (* MARK == 'PAD (IMM 0L)' *) + | SEQ of frag array + | PAD of int + | BSS of int64 + | MEMPOS of int64 + | BYTE of int + | BYTES of int array + | CHAR of char + | STRING of string + | ZSTRING of string + | ULEB128 of expr64 + | SLEB128 of expr64 + | WORD of (ty_mach * expr64) + | ALIGN_FILE of (int * frag) + | ALIGN_MEM of (int * frag) + | DEF of (fixup * frag) + | RELAX of relaxation + +and relaxation = + { relax_options: frag array; + relax_choice: int ref; } +;; + +exception Relax_more of relaxation;; + +let new_relaxation (frags:frag array) = + RELAX { relax_options = frags; + relax_choice = ref ((Array.length frags) - 1); } +;; + + +let rec write_frag + ~(sess:Session.sess) + ~(lsb0:bool) + ~(buf:Buffer.t) + ~(frag:frag) + : unit = + let relax = Queue.create () in + let bump_relax r = + iflog sess (fun _ -> + log sess "bumping relaxation to position %d" + ((!(r.relax_choice)) - 1)); + r.relax_choice := (!(r.relax_choice)) - 1; + if !(r.relax_choice) < 0 + then bug () "relaxation ran out of options" + in + let rec loop _ = + Queue.clear relax; + Buffer.clear buf; + resolve_frag_full relax frag; + lower_frag ~sess ~lsb0 ~buf ~relax ~frag; + if Queue.is_empty relax + then () + else + begin + iflog sess (fun _ -> log sess "relaxing"); + Queue.iter bump_relax relax; + loop () + end + in + loop () + + +and resolve_frag_full (relax:relaxation Queue.t) (frag:frag) + : unit = + let file_pos = ref 0 in + let mem_pos = ref 0L in + let bump i = + mem_pos := Int64.add (!mem_pos) (Int64.of_int i); + file_pos := (!file_pos) + i + in + + let uleb (e:expr64) : unit = + let rec loop value = + let value = Int64.shift_right_logical value 7 in + if value = 0L + then bump 1 + else + begin + bump 1; + loop value + end + in + loop (eval64 e) + in + + let sleb (e:expr64) : unit = + let rec loop value = + let byte = Int64.logand value 0xf7L in + let value = Int64.shift_right value 7 in + let signbit = Int64.logand byte 0x40L in + if (((value = 0L) && (signbit = 0L)) || + ((value = -1L) && (signbit = 0x40L))) + then bump 1 + else + begin + bump 1; + loop value + end + in + loop (eval64 e) + in + let rec resolve_frag it = + match it with + | MARK -> () + | SEQ frags -> Array.iter resolve_frag frags + | PAD i -> bump i + | BSS i -> mem_pos := Int64.add (!mem_pos) i + | MEMPOS i -> mem_pos := i + | BYTE _ -> bump 1 + | BYTES ia -> bump (Array.length ia) + | CHAR _ -> bump 1 + | STRING s -> bump (String.length s) + | ZSTRING s -> bump ((String.length s) + 1) + | ULEB128 e -> uleb e + | SLEB128 e -> sleb e + | WORD (mach,_) -> bump (bytes_of_ty_mach mach) + | ALIGN_FILE (n, frag) -> + let spill = (!file_pos) mod n in + let pad = (n - spill) mod n in + file_pos := (!file_pos) + pad; + (* + * NB: aligning the file *causes* likewise alignment of + * memory, since we implement "file alignment" by + * padding! + *) + mem_pos := Int64.add (!mem_pos) (Int64.of_int pad); + resolve_frag frag + + | ALIGN_MEM (n, frag) -> + let n64 = Int64.of_int n in + let spill = Int64.rem (!mem_pos) n64 in + let pad = Int64.rem (Int64.sub n64 spill) n64 in + mem_pos := Int64.add (!mem_pos) pad; + resolve_frag frag + + | DEF (f, i) -> + let fpos1 = !file_pos in + let mpos1 = !mem_pos in + resolve_frag i; + f.fixup_file_pos <- Some fpos1; + f.fixup_mem_pos <- Some mpos1; + f.fixup_file_sz <- Some ((!file_pos) - fpos1); + f.fixup_mem_sz <- Some (Int64.sub (!mem_pos) mpos1) + + | RELAX rel -> + begin + try + resolve_frag rel.relax_options.(!(rel.relax_choice)) + with + Bad_fit _ -> Queue.add rel relax + end + in + resolve_frag frag + +and lower_frag + ~(sess:Session.sess) + ~(lsb0:bool) + ~(buf:Buffer.t) + ~(relax:relaxation Queue.t) + ~(frag:frag) + : unit = + let byte (i:int) = + if i < 0 + then raise (Bad_fit "byte underflow") + else + if i > 255 + then raise (Bad_fit "byte overflow") + else Buffer.add_char buf (Char.chr i) + in + + let uleb (e:expr64) : unit = + let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in + let rec loop value = + let byte = Int64.logand value 0x7fL in + let value = Int64.shift_right_logical value 7 in + if value = 0L + then emit1 byte + else + begin + emit1 (Int64.logor byte 0x80L); + loop value + end + in + loop (eval64 e) + in + + let sleb (e:expr64) : unit = + let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in + let rec loop value = + let byte = Int64.logand value 0x7fL in + let value = Int64.shift_right value 7 in + let signbit = Int64.logand byte 0x40L in + if (((value = 0L) && (signbit = 0L)) || + ((value = -1L) && (signbit = 0x40L))) + then emit1 byte + else + begin + emit1 (Int64.logor byte 0x80L); + loop value + end + in + loop (eval64 e) + in + + let word (nbytes:int) (signed:bool) (e:expr64) = + let i = eval64 e in + + (* + FIXME: + + We should really base the entire assembler and memory-position + system on Big_int.big_int, but in ocaml the big_int type lacks, + oh, just about every useful function (no format string spec, no + bitwise ops, blah blah) so it's useless; we're stuck on int64 + for bootstrapping. + + For the time being we're just going to require you to represent + those few unsigned 64 bit terms you have in mind via their + signed bit pattern. Suboptimal but it's the best we can do. + *) + + let (top,bot) = + if nbytes >= 8 + then + if signed + then (Int64.max_int,Int64.min_int) + else (Int64.max_int,0L) + else + if signed + then + let bound = (Int64.shift_left 1L ((8 * nbytes) - 1)) in + (Int64.sub bound 1L, Int64.neg bound) + else + let bound = (Int64.shift_left 1L (8 * nbytes)) in + (Int64.sub bound 1L, 0L) + in + + let mask1 = Int64.logand 0xffL in + let shift = Int64.shift_right_logical in + let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in + if Int64.compare i bot = (-1) + then raise (Bad_fit ("word underflow: " + ^ (Int64.to_string i) + ^ " into " + ^ (string_of_int nbytes) + ^ (if signed then " signed" else " unsigned") + ^ " bytes")) + else + if Int64.compare i top = 1 + then raise (Bad_fit ("word overflow: " + ^ (Int64.to_string i) + ^ " into " + ^ (string_of_int nbytes) + ^ (if signed then " signed" else " unsigned") + ^ " bytes")) + else + if lsb0 + then + for n = 0 to (nbytes - 1) do + emit1 (mask1 (shift i (8*n))) + done + else + for n = (nbytes - 1) downto 0 do + emit1 (mask1 (shift i (8*n))) + done + in + match frag with + MARK -> () + + | SEQ frags -> + Array.iter + begin + fun frag -> + lower_frag ~sess ~lsb0 ~buf ~relax ~frag + end frags + + | PAD c -> + for i = 1 to c do + Buffer.add_char buf '\x00' + done + + | BSS _ -> () + + | MEMPOS _ -> () + + | BYTE i -> byte i + + | BYTES bs -> + iflog sess (fun _ -> log sess "lowering %d bytes" + (Array.length bs)); + Array.iter byte bs + + | CHAR c -> + iflog sess (fun _ -> log sess "lowering char: %c" c); + Buffer.add_char buf c + + | STRING s -> + iflog sess (fun _ -> log sess "lowering string: %s" s); + Buffer.add_string buf s + + | ZSTRING s -> + iflog sess (fun _ -> log sess "lowering zstring: %s" s); + Buffer.add_string buf s; + byte 0 + + | ULEB128 e -> uleb e + | SLEB128 e -> sleb e + + | WORD (m,e) -> + iflog sess + (fun _ -> + log sess "lowering word %s" + (string_of_ty_mach m)); + word (bytes_of_ty_mach m) (mach_is_signed m) e + + | ALIGN_FILE (n, frag) -> + let spill = (Buffer.length buf) mod n in + let pad = (n - spill) mod n in + for i = 1 to pad do + Buffer.add_char buf '\x00' + done; + lower_frag sess lsb0 buf relax frag + + | ALIGN_MEM (_, i) -> lower_frag sess lsb0 buf relax i + | DEF (f, i) -> + iflog sess (fun _ -> log sess "lowering fixup: %s" f.fixup_name); + lower_frag sess lsb0 buf relax i; + + | RELAX rel -> + begin + try + lower_frag sess lsb0 buf relax + rel.relax_options.(!(rel.relax_choice)) + with + Bad_fit _ -> Queue.add rel relax + end +;; + +let fold_flags (f:'a -> int64) (flags:'a list) : int64 = + List.fold_left (Int64.logor) 0x0L (List.map f flags) +;; + +let write_out_frag sess lsb0 frag = + let buf = Buffer.create 0xffff in + let file = Session.filename_of sess.Session.sess_out in + let out = open_out_bin file in + write_frag ~sess ~lsb0 ~buf ~frag; + Buffer.output_buffer out buf; + flush out; + close_out out; + Unix.chmod file 0o755 +;; + +(* Asm-reader stuff for loading info back from mapped files. *) +(* + * Unfortunately the ocaml Bigarray interface takes 'int' indices, so + * f.e. can't do 64-bit offsets / files when running on a 32bit platform. + * Despite the fact that we can possibly produce them. Sigh. Yet another + * "bootstrap compiler limitation". + *) +type asm_reader = + { + asm_seek: int -> unit; + asm_get_u32: unit -> int; + asm_get_u16: unit -> int; + asm_get_u8: unit -> int; + asm_get_uleb: unit -> int; + asm_get_zstr: unit -> string; + asm_get_zstr_padded: int -> string; + asm_get_off: unit -> int; + asm_adv: int -> unit; + asm_adv_u32: unit -> unit; + asm_adv_u16: unit -> unit; + asm_adv_u8: unit -> unit; + asm_adv_zstr: unit -> unit; + asm_close: unit -> unit; + } +;; + +type mmap_arr = + (int, Bigarray.int8_unsigned_elt, Bigarray.c_layout) + Bigarray.Array1.t +;; + +let new_asm_reader (sess:Session.sess) (s:filename) : asm_reader = + iflog sess (fun _ -> log sess "opening file %s" s); + let fd = Unix.openfile s [ Unix.O_RDONLY ] 0 in + let arr = (Bigarray.Array1.map_file + fd ~pos:0L + Bigarray.int8_unsigned + Bigarray.c_layout + false (-1)) + in + let tmp = ref Nativeint.zero in + let buf = Buffer.create 16 in + let off = ref 0 in + let is_open = ref true in + let get_word_as_int (nbytes:int) : int = + assert (!is_open); + let lsb0 = true in + tmp := Nativeint.zero; + if lsb0 + then + for j = nbytes-1 downto 0 do + tmp := Nativeint.shift_left (!tmp) 8; + tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j}) + done + else + for j = 0 to nbytes-1 do + tmp := Nativeint.shift_left (!tmp) 8; + tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j}) + done; + off := (!off) + nbytes; + Nativeint.to_int (!tmp) + in + let get_zstr_padded pad_opt = + assert (!is_open); + let i = ref (!off) in + Buffer.clear buf; + let buflen_ok _ = + match pad_opt with + None -> true + | Some pad -> (Buffer.length buf) < pad + in + while arr.{!i} != 0 && (buflen_ok()) do + Buffer.add_char buf (Char.chr arr.{!i}); + incr i + done; + begin + match pad_opt with + None -> off := (!off) + (Buffer.length buf) + 1 + | Some pad -> + begin + assert ((Buffer.length buf) <= pad); + off := (!off) + pad + end + end; + Buffer.contents buf + in + let bump i = + assert (!is_open); + off := (!off) + i + in + { + asm_seek = (fun i -> off := i); + asm_get_u32 = (fun _ -> get_word_as_int 4); + asm_get_u16 = (fun _ -> get_word_as_int 2); + asm_get_u8 = (fun _ -> get_word_as_int 1); + asm_get_uleb = + begin + fun _ -> + let rec loop result shift = + let byte = arr.{!off} in + incr off; + let result = result lor ((byte land 0x7f) lsl shift) in + if (byte land 0x80) = 0 + then result + else loop result (shift+7) + in + loop 0 0 + end; + asm_get_zstr = (fun _ -> get_zstr_padded None); + asm_get_zstr_padded = (fun pad -> get_zstr_padded (Some pad)); + asm_get_off = (fun _ -> !off); + asm_adv = bump; + asm_adv_u32 = (fun _ -> bump 4); + asm_adv_u16 = (fun _ -> bump 2); + asm_adv_u8 = (fun _ -> bump 1); + asm_adv_zstr = (fun _ -> while arr.{!off} != 0 + do incr off done); + asm_close = (fun _ -> + assert (!is_open); + Unix.close fd; + is_open := false) + } +;; + + +(* + * Metadata note-section encoding / decoding. + * + * Since the only object format that defines a "note" section at all is + * ELF, we model the contents of the metadata section on ELF's + * notes. But the same blob of data is stuck into PE and Mach-O files + * too. + * + * The format is essentially just the ELF note format: + * + * <un-padded-size-of-name:u32> + * <size-of-desc:u32> + * <type-code=0:u32> + * <name="rust":zstr> + * <0-pad to 4-byte boundary> + * <n=meta-count:u32> + * <k1:zstr> <v1:zstr> + * ... + * <kn:zstr> <vn:zstr> + * <0-pad to 4-byte boundary> + * + *) +let note_rust_frags (meta:(Ast.ident * string) array) : frag = + let desc_fixup = new_fixup ".rust.note metadata" in + let desc = + DEF (desc_fixup, + SEQ [| + WORD (TY_u32, IMM (Int64.of_int (Array.length meta))); + SEQ (Array.map + (fun (k,v) -> SEQ [| ZSTRING k; ZSTRING v; |]) + meta); + ALIGN_FILE (4, MARK) |]) + in + let name = "rust" in + let ty = 0L in + let padded_name = SEQ [| ZSTRING name; + ALIGN_FILE (4, MARK) |] + in + let name_sz = IMM (Int64.of_int ((String.length name) + 1)) in + SEQ [| WORD (TY_u32, name_sz); + WORD (TY_u32, F_SZ desc_fixup); + WORD (TY_u32, IMM ty); + padded_name; + desc;|] +;; + +let read_rust_note (ar:asm_reader) : (Ast.ident * string) array = + ar.asm_adv_u32 (); + ar.asm_adv_u32 (); + assert ((ar.asm_get_u32 ()) = 0); + let rust_name = ar.asm_get_zstr_padded 8 in + assert (rust_name = "rust"); + let n = ar.asm_get_u32() in + let meta = Queue.create () in + for i = 1 to n + do + let k = ar.asm_get_zstr() in + let v = ar.asm_get_zstr() in + Queue.add (k,v) meta + done; + queue_to_arr meta +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/elf.ml b/src/boot/be/elf.ml new file mode 100644 index 00000000..56905b2a --- /dev/null +++ b/src/boot/be/elf.ml @@ -0,0 +1,1760 @@ +(* + * Module for writing System V ELF files. + * + * FIXME: Presently heavily infected with x86 and elf32 specificities, + * though they are reasonably well marked. Needs to be refactored to + * depend on abi fields if it's to be usable for other elf + * configurations. + *) + +open Asm;; +open Common;; + +let log (sess:Session.sess) = + Session.log "obj (elf)" + sess.Session.sess_log_obj + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_obj + then thunk () + else () +;; + + +(* Fixed sizes of structs involved in elf32 spec. *) +let elf32_ehsize = 52L;; +let elf32_phentsize = 32L;; +let elf32_shentsize = 40L;; +let elf32_symsize = 16L;; +let elf32_rela_entsz = 0xcL;; + +type ei_class = + ELFCLASSNONE + | ELFCLASS32 + | ELFCLASS64 +;; + + +type ei_data = + ELFDATANONE + | ELFDATA2LSB + | ELFDATA2MSB +;; + + +let elf_identification ei_class ei_data = + SEQ + [| + STRING "\x7fELF"; + BYTES + [| + (match ei_class with (* EI_CLASS *) + ELFCLASSNONE -> 0 + | ELFCLASS32 -> 1 + | ELFCLASS64 -> 2); + (match ei_data with (* EI_DATA *) + ELFDATANONE -> 0 + | ELFDATA2LSB -> 1 + | ELFDATA2MSB -> 2); + 1; (* EI_VERSION = EV_CURRENT *) + 0; (* EI_PAD #7 *) + 0; (* EI_PAD #8 *) + 0; (* EI_PAD #9 *) + 0; (* EI_PAD #A *) + 0; (* EI_PAD #B *) + 0; (* EI_PAD #C *) + 0; (* EI_PAD #D *) + 0; (* EI_PAD #E *) + 0; (* EI_PAD #F *) + |] + |] +;; + + +type e_type = + ET_NONE + | ET_REL + | ET_EXEC + | ET_DYN + | ET_CORE +;; + + +type e_machine = + (* Maybe support more later. *) + EM_NONE + | EM_386 + | EM_X86_64 +;; + + +type e_version = + EV_NONE + | EV_CURRENT +;; + + +let elf32_header + ~(sess:Session.sess) + ~(ei_data:ei_data) + ~(e_type:e_type) + ~(e_machine:e_machine) + ~(e_version:e_version) + ~(e_entry_fixup:fixup) + ~(e_phoff_fixup:fixup) + ~(e_shoff_fixup:fixup) + ~(e_phnum:int64) + ~(e_shnum:int64) + ~(e_shstrndx:int64) + : frag = + let elf_header_fixup = new_fixup "elf header" in + let entry_pos = + if sess.Session.sess_library_mode + then (IMM 0L) + else (M_POS e_entry_fixup) + in + DEF + (elf_header_fixup, + SEQ [| elf_identification ELFCLASS32 ei_data; + WORD (TY_u16, (IMM (match e_type with + ET_NONE -> 0L + | ET_REL -> 1L + | ET_EXEC -> 2L + | ET_DYN -> 3L + | ET_CORE -> 4L))); + WORD (TY_u16, (IMM (match e_machine with + EM_NONE -> 0L + | EM_386 -> 3L + | EM_X86_64 -> 62L))); + WORD (TY_u32, (IMM (match e_version with + EV_NONE -> 0L + | EV_CURRENT -> 1L))); + WORD (TY_u32, entry_pos); + WORD (TY_u32, (F_POS e_phoff_fixup)); + WORD (TY_u32, (F_POS e_shoff_fixup)); + WORD (TY_u32, (IMM 0L)); (* e_flags *) + WORD (TY_u16, (IMM elf32_ehsize)); + WORD (TY_u16, (IMM elf32_phentsize)); + WORD (TY_u16, (IMM e_phnum)); + WORD (TY_u16, (IMM elf32_shentsize)); + WORD (TY_u16, (IMM e_shnum)); + WORD (TY_u16, (IMM e_shstrndx)); + |]) +;; + + +type sh_type = + SHT_NULL + | SHT_PROGBITS + | SHT_SYMTAB + | SHT_STRTAB + | SHT_RELA + | SHT_HASH + | SHT_DYNAMIC + | SHT_NOTE + | SHT_NOBITS + | SHT_REL + | SHT_SHLIB + | SHT_DYNSYM +;; + + +type sh_flags = + SHF_WRITE + | SHF_ALLOC + | SHF_EXECINSTR +;; + + +let section_header + ~(shstring_table_fixup:fixup) + ~(shname_string_fixup:fixup) + ~(sh_type:sh_type) + ~(sh_flags:sh_flags list) + ~(section_fixup:fixup option) + ~(sh_addralign:int64) + ~(sh_entsize:int64) + ~(sh_link:int64 option) + : frag = + SEQ + [| + WORD (TY_i32, (SUB + ((F_POS shname_string_fixup), + (F_POS shstring_table_fixup)))); + WORD (TY_u32, (IMM (match sh_type with + SHT_NULL -> 0L + | SHT_PROGBITS -> 1L + | SHT_SYMTAB -> 2L + | SHT_STRTAB -> 3L + | SHT_RELA -> 4L + | SHT_HASH -> 5L + | SHT_DYNAMIC -> 6L + | SHT_NOTE -> 7L + | SHT_NOBITS -> 8L + | SHT_REL -> 9L + | SHT_SHLIB -> 10L + | SHT_DYNSYM -> 11L))); + WORD (TY_u32, (IMM (fold_flags + (fun f -> match f with + SHF_WRITE -> 0x1L + | SHF_ALLOC -> 0x2L + | SHF_EXECINSTR -> 0x4L) sh_flags))); + WORD (TY_u32, (match section_fixup with + None -> (IMM 0L) + | Some s -> (M_POS s))); + WORD (TY_u32, (match section_fixup with + None -> (IMM 0L) + | Some s -> (F_POS s))); + WORD (TY_u32, (match section_fixup with + None -> (IMM 0L) + | Some s -> (F_SZ s))); + WORD (TY_u32, (IMM (match sh_link with + None -> 0L + | Some i -> i))); + WORD (TY_u32, (IMM 0L)); (* sh_info *) + WORD (TY_u32, (IMM sh_addralign)); + WORD (TY_u32, (IMM sh_entsize)); + |] +;; + + +type p_type = + PT_NULL + | PT_LOAD + | PT_DYNAMIC + | PT_INTERP + | PT_NOTE + | PT_SHLIB + | PT_PHDR +;; + + +type p_flag = + PF_X + | PF_W + | PF_R +;; + + +let program_header + ~(p_type:p_type) + ~(segment_fixup:fixup) + ~(p_flags:p_flag list) + ~(p_align:int64) + : frag = + SEQ + [| + WORD (TY_u32, (IMM (match p_type with + PT_NULL -> 0L + | PT_LOAD -> 1L + | PT_DYNAMIC -> 2L + | PT_INTERP -> 3L + | PT_NOTE -> 4L + | PT_SHLIB -> 5L + | PT_PHDR -> 6L))); + WORD (TY_u32, (F_POS segment_fixup)); + WORD (TY_u32, (M_POS segment_fixup)); + WORD (TY_u32, (M_POS segment_fixup)); + WORD (TY_u32, (F_SZ segment_fixup)); + WORD (TY_u32, (M_SZ segment_fixup)); + WORD (TY_u32, (IMM (fold_flags + (fun f -> + match f with + PF_X -> 0x1L + | PF_W -> 0x2L + | PF_R -> 0x4L) + p_flags))); + WORD (TY_u32, (IMM p_align)); + |] +;; + + +type st_bind = + STB_LOCAL + | STB_GLOBAL + | STB_WEAK +;; + + +type st_type = + STT_NOTYPE + | STT_OBJECT + | STT_FUNC + | STT_SECTION + | STT_FILE +;; + + +(* Special symbol-section indices *) +let shn_UNDEF = 0L;; +let shn_ABS = 0xfff1L;; +let shn_ABS = 0xfff2L;; + + +let symbol + ~(string_table_fixup:fixup) + ~(name_string_fixup:fixup) + ~(sym_target_fixup:fixup option) + ~(st_bind:st_bind) + ~(st_type:st_type) + ~(st_shndx:int64) + : frag = + let st_bind_num = + match st_bind with + STB_LOCAL -> 0L + | STB_GLOBAL -> 1L + | STB_WEAK -> 2L + in + let st_type_num = + match st_type with + STT_NOTYPE -> 0L + | STT_OBJECT -> 1L + | STT_FUNC -> 2L + | STT_SECTION -> 3L + | STT_FILE -> 4L + in + SEQ + [| + WORD (TY_u32, (SUB + ((F_POS name_string_fixup), + (F_POS string_table_fixup)))); + WORD (TY_u32, (match sym_target_fixup with + None -> (IMM 0L) + | Some f -> (M_POS f))); + WORD (TY_u32, (match sym_target_fixup with + None -> (IMM 0L) + | Some f -> (M_SZ f))); + WORD (TY_u8, (* st_info *) + (OR + ((SLL ((IMM st_bind_num), 4)), + (AND ((IMM st_type_num), (IMM 0xfL)))))); + WORD (TY_u8, (IMM 0L)); (* st_other *) + WORD (TY_u16, (IMM st_shndx)); + |] +;; + +type d_tag = + DT_NULL + | DT_NEEDED + | DT_PLTRELSZ + | DT_PLTGOT + | DT_HASH + | DT_STRTAB + | DT_SYMTAB + | DT_RELA + | DT_RELASZ + | DT_RELAENT + | DT_STRSZ + | DT_SYMENT + | DT_INIT + | DT_FINI + | DT_SONAME + | DT_RPATH + | DT_SYMBOLIC + | DT_REL + | DT_RELSZ + | DT_RELENT + | DT_PLTREL + | DT_DEBUG + | DT_TEXTREL + | DT_JMPREL + | DT_BIND_NOW + | DT_INIT_ARRAY + | DT_FINI_ARRAY + | DT_INIT_ARRAYSZ + | DT_FINI_ARRAYSZ + | DT_RUNPATH + | DT_FLAGS + | DT_ENCODING + | DT_PREINIT_ARRAY + | DT_PREINIT_ARRAYSZ +;; + +type elf32_dyn = (d_tag * expr64);; + +let elf32_num_of_dyn_tag tag = + match tag with + DT_NULL -> 0L + | DT_NEEDED -> 1L + | DT_PLTRELSZ -> 2L + | DT_PLTGOT -> 3L + | DT_HASH -> 4L + | DT_STRTAB -> 5L + | DT_SYMTAB -> 6L + | DT_RELA -> 7L + | DT_RELASZ -> 8L + | DT_RELAENT -> 9L + | DT_STRSZ -> 10L + | DT_SYMENT -> 11L + | DT_INIT -> 12L + | DT_FINI -> 13L + | DT_SONAME -> 14L + | DT_RPATH -> 15L + | DT_SYMBOLIC -> 16L + | DT_REL -> 17L + | DT_RELSZ -> 18L + | DT_RELENT -> 19L + | DT_PLTREL -> 20L + | DT_DEBUG -> 21L + | DT_TEXTREL -> 22L + | DT_JMPREL -> 23L + | DT_BIND_NOW -> 24L + | DT_INIT_ARRAY -> 25L + | DT_FINI_ARRAY -> 26L + | DT_INIT_ARRAYSZ -> 27L + | DT_FINI_ARRAYSZ -> 28L + | DT_RUNPATH -> 29L + | DT_FLAGS -> 30L + | DT_ENCODING -> 31L + | DT_PREINIT_ARRAY -> 32L + | DT_PREINIT_ARRAYSZ -> 33L +;; + +let elf32_dyn_frag d = + let (tag, expr) = d in + let tagval = elf32_num_of_dyn_tag tag in + SEQ [| WORD (TY_u32, (IMM tagval)); WORD (TY_u32, expr) |] +;; + +type elf32_386_reloc_type = + R_386_NONE + | R_386_32 + | R_386_PC32 + | R_386_GOT32 + | R_386_PLT32 + | R_386_COPY + | R_386_GLOB_DAT + | R_386_JMP_SLOT + | R_386_RELATIVE + | R_386_GOTOFF + | R_386_GOTPC +;; + + +type elf32_386_rela = + { elf32_386_rela_type: elf32_386_reloc_type; + elf32_386_rela_offset: expr64; + elf32_386_rela_sym: expr64; + elf32_386_rela_addend: expr64 } +;; + +let elf32_386_rela_frag r = + let type_val = + match r.elf32_386_rela_type with + R_386_NONE -> 0L + | R_386_32 -> 1L + | R_386_PC32 -> 2L + | R_386_GOT32 -> 3L + | R_386_PLT32 -> 4L + | R_386_COPY -> 5L + | R_386_GLOB_DAT -> 6L + | R_386_JMP_SLOT -> 7L + | R_386_RELATIVE -> 8L + | R_386_GOTOFF -> 9L + | R_386_GOTPC -> 10L + in + let info_expr = + WORD (TY_u32, + (OR + (SLL ((r.elf32_386_rela_sym), 8), + AND ((IMM 0xffL), (IMM type_val))))) + in + SEQ [| WORD (TY_u32, r.elf32_386_rela_offset); + info_expr; + WORD (TY_u32, r.elf32_386_rela_addend) |] +;; + + +let elf32_linux_x86_file + ~(sess:Session.sess) + ~(crate:Ast.crate) + ~(entry_name:string) + ~(text_frags:(string option, frag) Hashtbl.t) + ~(data_frags:(string option, frag) Hashtbl.t) + ~(rodata_frags:(string option, frag) Hashtbl.t) + ~(required_fixups:(string, fixup) Hashtbl.t) + ~(dwarf:Dwarf.debug_records) + ~(sem:Semant.ctxt) + ~(needed_libs:string array) + : frag = + + (* Procedure Linkage Tables (PLTs), Global Offset Tables + * (GOTs), and the relocations that set them up: + * + * The PLT goes in a section called .plt and GOT in a section called + * .got. The portion of the GOT that holds PLT jump slots goes in a + * section called .got.plt. Dynamic relocations for these jump slots go in + * section .rela.plt. + * + * The easiest way to understand the PLT/GOT system is to draw it: + * + * PLT GOT + * +----------------------+ +----------------------+ + * 0| push &<GOT[1]> 0| <reserved> + * | jmp *GOT[2] 1| <libcookie> + * | 2| & <ld.so:resolve-a-sym> + * 1| jmp *GOT[3] 3| & <'push 0' in PLT[1]> + * | push 0 4| & <'push 1' in PLT[2]> + * | jmp *PLT[0] 5| & <'push 2' in PLT[3]> + * | + * 2| jmp *GOT[4] + * | push 1 + * | jmp *PLT[0] + * | + * 2| jmp *GOT[5] + * | push 2 + * | jmp *PLT[0] + * + * + * In normal user code, we call PLT entries with a call to a + * PC-relative address, the PLT entry, which itself does an indirect + * jump through a slot in the GOT that it also addresses + * PC-relative. This makes the whole scheme PIC. + * + * The linker fills in the GOT on startup. For the first 3, it uses + * its own thinking. For the remainder it needs to be instructed to + * fill them in with "jump slot relocs", type R_386_JUMP_SLOT, each + * of which says in effect which PLT entry it's to point back to and + * which symbol it's to be resolved to later. These relocs go in the + * section .rela.plt. + *) + + let plt0_fixup = new_fixup "PLT[0]" in + let got_prefix = SEQ [| WORD (TY_u32, (IMM 0L)); + WORD (TY_u32, (IMM 0L)); + WORD (TY_u32, (IMM 0L)); |] + in + + let got_cell reg i = + let got_entry_off = Int64.of_int (i*4) in + let got_entry_mem = Il.RegIn (reg, (Some (Asm.IMM got_entry_off))) in + Il.Mem (got_entry_mem, Il.ScalarTy (Il.AddrTy Il.CodeTy)) + in + + let got_code_cell reg i = + Il.CodePtr (Il.Cell (got_cell reg i)) + in + + let plt0_frag = + let reg = Il.Hreg X86.eax in + let e = X86.new_emitter_without_vregs () in + Il.emit e (Il.Push (Il.Cell (got_cell reg 1))); + Il.emit e (Il.jmp Il.JMP (got_code_cell reg 2)); + Il.emit e Il.Nop; + Il.emit e Il.Nop; + Il.emit e Il.Nop; + Il.emit e Il.Nop; + DEF (plt0_fixup, (X86.frags_of_emitted_quads sess e)) + in + + (* + * The existence of the GOT/PLT mish-mash causes, therefore, the + * following new sections: + * + * .plt - the PLT itself, in the r/x text segment + * .got.plt - the PLT-used portion of the GOT, in the r/w segment + * .rela.plt - the dynamic relocs for the GOT-PLT, in the r/x segment + * + * In addition, because we're starting up a dynamically linked executable, + * we have to have several more sections! + * + * .interp - the read-only section that names ld.so + * .dynsym - symbols named by the PLT/GOT entries, r/x segment + * .dynstr - string-names used in those symbols, r/x segment + * .hash - hashtable in which to look these up, r/x segment + * .dynamic - the machine-readable description of the dynamic + * linkage requirements of this elf file, in the + * r/w _DYNAMIC segment + * + * The Dynamic section contains a sequence of 2-word records of type + * d_tag. + * + *) + + (* There are 17 official section headers in the file we're making: *) + (* *) + (* section 0: <null section> *) + (* *) + (* section 1: .interp (segment 1: R+X, INTERP) *) + (* *) + (* section 2: .text (segment 2: R+X, LOAD) *) + (* section 3: .rodata ... *) + (* section 4: .dynsym ... *) + (* section 5: .dynstr ... *) + (* section 6: .hash ... *) + (* section 7: .plt ... *) + (* section 8: .got ... *) + (* section 9: .rela.plt ... *) + (* *) + (* section 10: .data (segment 3: R+W, LOAD) *) + (* section 11: .bss ... *) + (* *) + (* section 12: .dynamic (segment 4: R+W, DYNAMIC) *) + (* *) + (* section 13: .shstrtab (not in a segment) *) + (* section 14: .debug_aranges (segment 2: cont'd) *) + (* section 15: .debug_pubnames ... *) + (* section 14: .debug_info ... *) + (* section 15: .debug_abbrev ... *) + (* section 14: .debug_line ... *) + (* section 15: .debug_frame ... *) + (* section 16: .note..rust (segment 5: NOTE) *) + + let sname s = + new_fixup (Printf.sprintf "string name of '%s' section" s) + in + let null_section_name_fixup = sname "<null>" in + let interp_section_name_fixup = sname ".interp"in + let text_section_name_fixup = sname ".text" in + let rodata_section_name_fixup = sname ".rodata" in + let dynsym_section_name_fixup = sname ".dynsym" in + let dynstr_section_name_fixup = sname ".dynstr" in + let hash_section_name_fixup = sname ".hash" in + let plt_section_name_fixup = sname ".plt" in + let got_plt_section_name_fixup = sname ".got.plt" in + let rela_plt_section_name_fixup = sname ".rela.plt" in + let data_section_name_fixup = sname ".data" in + let bss_section_name_fixup = sname ".bss" in + let dynamic_section_name_fixup = sname ".dynamic" in + let shstrtab_section_name_fixup = sname ".shstrtab" in + let debug_aranges_section_name_fixup = sname ".debug_aranges" in + let debug_pubnames_section_name_fixup = sname ".debug_pubnames" in + let debug_info_section_name_fixup = sname ".debug_info" in + let debug_abbrev_section_name_fixup = sname ".debug_abbrev" in + let debug_line_section_name_fixup = sname ".debug_line" in + let debug_frame_section_name_fixup = sname ".debug_frame" in + let note_rust_section_name_fixup = sname ".note.rust" in + + (* let interpndx = 1L in *) (* Section index of .interp *) + let textndx = 2L in (* Section index of .text *) + let rodatandx = 3L in (* Section index of .rodata *) + let dynsymndx = 4L in (* Section index of .dynsym *) + let dynstrndx = 5L in (* Section index of .dynstr *) + (* let hashndx = 6L in *) (* Section index of .hash *) + (* let pltndx = 7L in *) (* Section index of .plt *) + (* let gotpltndx = 8L in *) (* Section index of .got.plt *) + (* let relapltndx = 9L in *) (* Section index of .rela.plt *) + let datandx = 10L in (* Section index of .data *) + (* let bssndx = 11L in *) (* Section index of .bss *) + (* let dynamicndx = 12L in *) (* Section index of .dynamic *) + let shstrtabndx = 13L in (* Section index of .shstrtab *) + + let section_header_table_fixup = new_fixup ".section header table" in + let interp_section_fixup = new_fixup ".interp section" in + let text_section_fixup = new_fixup ".text section" in + let rodata_section_fixup = new_fixup ".rodata section" in + let dynsym_section_fixup = new_fixup ".dynsym section" in + let dynstr_section_fixup = new_fixup ".dynstr section" in + let hash_section_fixup = new_fixup ".hash section" in + let plt_section_fixup = new_fixup ".plt section" in + let got_plt_section_fixup = new_fixup ".got.plt section" in + let rela_plt_section_fixup = new_fixup ".rela.plt section" in + let data_section_fixup = new_fixup ".data section" in + let bss_section_fixup = new_fixup ".bss section" in + let dynamic_section_fixup = new_fixup ".dynamic section" in + let shstrtab_section_fixup = new_fixup ".shstrtab section" in + let note_rust_section_fixup = new_fixup ".shstrtab section" in + + let shstrtab_section = + SEQ + [| + DEF (null_section_name_fixup, ZSTRING ""); + DEF (interp_section_name_fixup, ZSTRING ".interp"); + DEF (text_section_name_fixup, ZSTRING ".text"); + DEF (rodata_section_name_fixup, ZSTRING ".rodata"); + DEF (dynsym_section_name_fixup, ZSTRING ".dynsym"); + DEF (dynstr_section_name_fixup, ZSTRING ".dynstr"); + DEF (hash_section_name_fixup, ZSTRING ".hash"); + DEF (plt_section_name_fixup, ZSTRING ".plt"); + DEF (got_plt_section_name_fixup, ZSTRING ".got.plt"); + DEF (rela_plt_section_name_fixup, ZSTRING ".rela.plt"); + DEF (data_section_name_fixup, ZSTRING ".data"); + DEF (bss_section_name_fixup, ZSTRING ".bss"); + DEF (dynamic_section_name_fixup, ZSTRING ".dynamic"); + DEF (shstrtab_section_name_fixup, ZSTRING ".shstrtab"); + DEF (debug_aranges_section_name_fixup, ZSTRING ".debug_aranges"); + DEF (debug_pubnames_section_name_fixup, ZSTRING ".debug_pubnames"); + DEF (debug_info_section_name_fixup, ZSTRING ".debug_info"); + DEF (debug_abbrev_section_name_fixup, ZSTRING ".debug_abbrev"); + DEF (debug_line_section_name_fixup, ZSTRING ".debug_line"); + DEF (debug_frame_section_name_fixup, ZSTRING ".debug_frame"); + DEF (note_rust_section_name_fixup, ZSTRING ".note.rust"); + |] + in + + let section_headers = + [| + (* <null> *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: null_section_name_fixup + ~sh_type: SHT_NULL + ~sh_flags: [] + ~section_fixup: None + ~sh_addralign: 0L + ~sh_entsize: 0L + ~sh_link: None); + + (* .interp *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: interp_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some interp_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + (* .text *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: text_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ] + ~section_fixup: (Some text_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .rodata *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: rodata_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some rodata_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .dynsym *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: dynsym_section_name_fixup + ~sh_type: SHT_DYNSYM + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some dynsym_section_fixup) + ~sh_addralign: 8L + ~sh_entsize: elf32_symsize + ~sh_link: (Some dynstrndx) ); + + (* .dynstr *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: dynstr_section_name_fixup + ~sh_type: SHT_STRTAB + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some dynstr_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + (* .hash *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: hash_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some hash_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: 4L + ~sh_link: (Some dynsymndx)); + + (* .plt *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: plt_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ] + ~section_fixup: (Some plt_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: 0L + ~sh_link: None); + + (* .got.plt *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: got_plt_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some got_plt_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: 0L + ~sh_link: None); + + (* .rela.plt *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: rela_plt_section_name_fixup + ~sh_type: SHT_RELA + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some rela_plt_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: elf32_rela_entsz + ~sh_link: (Some dynsymndx)); + + (* .data *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: data_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some data_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .bss *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: bss_section_name_fixup + ~sh_type: SHT_NOBITS + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some bss_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .dynamic *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: dynamic_section_name_fixup + ~sh_type: SHT_DYNAMIC + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some dynamic_section_fixup) + ~sh_addralign: 8L + ~sh_entsize: 0L + ~sh_link: None); + + (* .shstrtab *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: shstrtab_section_name_fixup + ~sh_type: SHT_STRTAB + ~sh_flags: [] + ~section_fixup: (Some shstrtab_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + +(* + FIXME: uncomment the dwarf section headers as you make use of them; + recent gdb versions have got fussier about parsing dwarf and don't + like seeing junk there. +*) + + (* .debug_aranges *) +(* + + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_aranges_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_aranges_fixup) + ~sh_addralign: 8L + ~sh_entsize: 0L + ~sh_link: None); +*) + (* .debug_pubnames *) +(* + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_pubnames_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_pubnames_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); +*) + + (* .debug_info *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_info_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_info_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + (* .debug_abbrev *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_abbrev_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_abbrev_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + (* .debug_line *) +(* + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_line_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_line_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); +*) + + (* .debug_frame *) +(* + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_frame_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_frame_fixup) + ~sh_addralign: 4L + ~sh_entsize: 0L + ~sh_link: None); +*) + + (* .note.rust *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: note_rust_section_name_fixup + ~sh_type: SHT_NOTE + ~sh_flags: [] + ~section_fixup: (Some note_rust_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + |] + in + let section_header_table = SEQ section_headers in + + + (* There are 6 official program headers in the file we're making: *) + (* segment 0: RX / PHDR *) + (* segment 1: R / INTERP *) + (* segment 2: RX / LOAD *) + (* segment 3: RW / LOAD *) + (* segment 4: RW / DYNAMIC *) + (* segment 5: R *) + + let program_header_table_fixup = new_fixup "program header table" in + let segment_0_fixup = new_fixup "segment 0" in + let segment_1_fixup = new_fixup "segment 1" in + let segment_2_fixup = new_fixup "segment 2" in + let segment_3_fixup = new_fixup "segment 3" in + let segment_4_fixup = new_fixup "segment 4" in + let segment_5_fixup = new_fixup "segment 5" in + + let segment_0_align = 4 in + let segment_1_align = 1 in + let segment_2_align = 0x1000 in + let segment_3_align = 0x1000 in + let segment_4_align = 0x1000 in + let segment_5_align = 1 in + + let program_headers = [| + (program_header + ~p_type: PT_PHDR + ~segment_fixup: segment_0_fixup + ~p_flags: [ PF_R; PF_X ] + ~p_align: (Int64.of_int segment_0_align)); + (program_header + ~p_type: PT_INTERP + ~segment_fixup: segment_1_fixup + ~p_flags: [ PF_R ] + ~p_align: (Int64.of_int segment_1_align)); + (program_header + ~p_type: PT_LOAD + ~segment_fixup: segment_2_fixup + ~p_flags: [ PF_R; PF_X ] + ~p_align: (Int64.of_int segment_2_align)); + (program_header + ~p_type: PT_LOAD + ~segment_fixup: segment_3_fixup + ~p_flags: [ PF_R; PF_W ] + ~p_align: (Int64.of_int segment_3_align)); + (program_header + ~p_type: PT_DYNAMIC + ~segment_fixup: segment_4_fixup + ~p_flags: [ PF_R; PF_W ] + ~p_align: (Int64.of_int segment_4_align)); + (program_header + ~p_type: PT_NOTE + ~segment_fixup: segment_5_fixup + ~p_flags: [ PF_R;] + ~p_align: (Int64.of_int segment_5_align)); + |] + in + let program_header_table = SEQ program_headers in + + let e_entry_fixup = new_fixup "entry symbol" in + + let elf_header = + elf32_header + ~sess + ~ei_data: ELFDATA2LSB + ~e_type: ET_DYN + ~e_machine: EM_386 + ~e_version: EV_CURRENT + + ~e_entry_fixup: e_entry_fixup + ~e_phoff_fixup: program_header_table_fixup + ~e_shoff_fixup: section_header_table_fixup + ~e_phnum: (Int64.of_int (Array.length program_headers)) + ~e_shnum: (Int64.of_int (Array.length section_headers)) + ~e_shstrndx: shstrtabndx + in + + let n_syms = ref 1 in (* The empty symbol, implicit. *) + + let data_sym name st_bind fixup = + let name_fixup = new_fixup ("data symbol name fixup: '" ^ name ^ "'") in + let strtab_entry = DEF (name_fixup, ZSTRING name) in + let symtab_entry = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: (Some fixup) + ~st_bind + ~st_type: STT_OBJECT + ~st_shndx: datandx + in + incr n_syms; + (strtab_entry, symtab_entry) + in + + let rodata_sym name st_bind fixup = + let name_fixup = new_fixup ("rodata symbol name fixup: '" ^ name ^ "'") in + let strtab_entry = DEF (name_fixup, ZSTRING name) in + let symtab_entry = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: (Some fixup) + ~st_bind + ~st_type: STT_OBJECT + ~st_shndx: rodatandx + in + incr n_syms; + (strtab_entry, symtab_entry) + in + + let text_sym name st_bind fixup = + let name_fixup = new_fixup ("text symbol name fixup: '" ^ name ^ "'") in + let strtab_frag = DEF (name_fixup, ZSTRING name) in + let symtab_frag = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: (Some fixup) + ~st_bind: st_bind + ~st_type: STT_FUNC + ~st_shndx: textndx + in + incr n_syms; + (strtab_frag, symtab_frag) + in + + let require_sym name st_bind _(*fixup*) = + let name_fixup = + new_fixup ("require symbol name fixup: '" ^ name ^ "'") + in + let strtab_frag = DEF (name_fixup, ZSTRING name) in + let symtab_frag = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: None + ~st_bind + ~st_type: STT_FUNC + ~st_shndx: shn_UNDEF + in + incr n_syms; + (strtab_frag, symtab_frag) + in + + let frags_of_symbol sym_emitter st_bind symname_opt symbody x = + let (strtab_frags, symtab_frags, body_frags) = x in + let (strtab_frag, symtab_frag, body_frag) = + match symname_opt with + None -> (MARK, MARK, symbody) + | Some symname -> + let body_fixup = + new_fixup ("symbol body fixup: '" ^ symname ^ "'") + in + let body = + if symname = entry_name + then DEF (e_entry_fixup, DEF (body_fixup, symbody)) + else DEF (body_fixup, symbody) + in + let (str, sym) = sym_emitter symname st_bind body_fixup in + (str, sym, body) + in + ((strtab_frag :: strtab_frags), + (symtab_frag :: symtab_frags), + (body_frag :: body_frags)) + in + + let frags_of_require_symbol sym_emitter st_bind symname plt_entry_fixup x = + let (i, strtab_frags, symtab_frags, + plt_frags, got_plt_frags, rela_plt_frags) = x in + let (strtab_frag, symtab_frag) = sym_emitter symname st_bind None in + let e = X86.new_emitter_without_vregs () in + let jump_slot_fixup = new_fixup ("jump slot #" ^ string_of_int i) in + let jump_slot_initial_target_fixup = + new_fixup ("jump slot #" ^ string_of_int i ^ " initial target") in + + (* You may notice this PLT entry doesn't look like either of the + * types of "normal" PLT entries outlined in the ELF manual. It is, + * however, just what you get when you combine a PIC PLT entry with + * inline calls to the horrible __i686.get_pc_thunk.ax kludge used + * on x86 to support entering PIC PLTs. We're just doing it *in* + * the PLT entries rather than infecting all the callers with the + * obligation of having the GOT address in a register on + * PLT-entry. + *) + + let plt_frag = + let (reg, _, _) = X86.get_next_pc_thunk in + + Il.emit_full e (Some plt_entry_fixup) [] Il.Dead; + + Abi.load_fixup_addr e reg got_plt_section_fixup Il.CodeTy; + + Il.emit e (Il.jmp Il.JMP (got_code_cell reg (2+i))); + + Il.emit_full e (Some jump_slot_initial_target_fixup) + [] (Il.Push (X86.immi (Int64.of_int i))); + + Il.emit e (Il.jmp Il.JMP (Il.direct_code_ptr plt0_fixup)); + X86.frags_of_emitted_quads sess e + in + let got_plt_frag = + DEF (jump_slot_fixup, + WORD (TY_u32, (M_POS jump_slot_initial_target_fixup))) + in + let rela_plt = + { elf32_386_rela_type = R_386_JMP_SLOT; + elf32_386_rela_offset = (M_POS jump_slot_fixup); + elf32_386_rela_sym = (IMM (Int64.of_int i)); + elf32_386_rela_addend = (IMM 0L) } + in + let rela_plt_frag = elf32_386_rela_frag rela_plt in + (i+1, + (strtab_frag :: strtab_frags), + (symtab_frag :: symtab_frags), + (plt_frag :: plt_frags), + (got_plt_frag :: got_plt_frags), + (rela_plt_frag :: rela_plt_frags)) + in + + (* Emit text export symbols. *) + let (global_text_strtab_frags, global_text_symtab_frags) = + match htab_search sem.Semant.ctxt_native_provided SEG_text with + None -> ([], []) + | Some etab -> + Hashtbl.fold + begin + fun name fix x -> + let (strtab_frags, symtab_frags) = x in + let (str, sym) = text_sym name STB_GLOBAL fix in + (str :: strtab_frags, + sym :: symtab_frags) + end + etab + ([],[]) + in + + (* Emit text fragments (possibly named). *) + let (global_text_strtab_frags, + global_text_symtab_frags, + text_body_frags) = + Hashtbl.fold + (frags_of_symbol text_sym STB_GLOBAL) + text_frags + (global_text_strtab_frags, global_text_symtab_frags, []) + in + + let (local_text_strtab_frags, + local_text_symtab_frags) = + + let symbol_frags_of_code _ code accum = + let (strtab_frags, symtab_frags) = accum in + let fix = code.Semant.code_fixup in + let (strtab_frag, symtab_frag) = + text_sym fix.fixup_name STB_LOCAL fix + in + (strtab_frag :: strtab_frags, + symtab_frag :: symtab_frags) + in + + let symbol_frags_of_glue_code g code accum = + let (strtab_frags, symtab_frags) = accum in + let fix = code.Semant.code_fixup in + let (strtab_frag, symtab_frag) = + text_sym (Semant.glue_str sem g) STB_LOCAL fix + in + (strtab_frag :: strtab_frags, + symtab_frag :: symtab_frags) + in + + let item_str_frags, item_sym_frags = + Hashtbl.fold symbol_frags_of_code + sem.Semant.ctxt_all_item_code ([], []) + in + let glue_str_frags, glue_sym_frags = + Hashtbl.fold symbol_frags_of_glue_code + sem.Semant.ctxt_glue_code ([], []) + in + (item_str_frags @ glue_str_frags, + item_sym_frags @ glue_sym_frags) + in + + (* Emit rodata export symbols. *) + let (rodata_strtab_frags, rodata_symtab_frags) = + match htab_search sem.Semant.ctxt_native_provided SEG_data with + None -> ([], []) + | Some etab -> + Hashtbl.fold + begin + fun name fix x -> + let (strtab_frags, symtab_frags) = x in + let (str, sym) = rodata_sym name STB_GLOBAL fix in + (str :: strtab_frags, + sym :: symtab_frags) + end + etab + ([],[]) + in + + (* Emit rodata fragments (possibly named). *) + let (rodata_strtab_frags, + rodata_symtab_frags, + rodata_body_frags) = + Hashtbl.fold + (frags_of_symbol rodata_sym STB_GLOBAL) + rodata_frags + (rodata_strtab_frags, rodata_symtab_frags, []) + in + + + let (data_strtab_frags, + data_symtab_frags, + data_body_frags) = + Hashtbl.fold (frags_of_symbol data_sym STB_GLOBAL) data_frags ([],[],[]) + in + + let (_, + require_strtab_frags, + require_symtab_frags, + plt_frags, + got_plt_frags, + rela_plt_frags) = + Hashtbl.fold (frags_of_require_symbol require_sym STB_GLOBAL) + required_fixups + (1,[],[],[plt0_frag],[got_prefix],[]) + in + let require_symtab_frags = List.rev require_symtab_frags in + let plt_frags = List.rev plt_frags in + let got_plt_frags = List.rev got_plt_frags in + let rela_plt_frags = List.rev rela_plt_frags in + + let dynamic_needed_strtab_frags = + Array.make (Array.length needed_libs) MARK + in + + let dynamic_frags = + let dynamic_needed_frags = Array.make (Array.length needed_libs) MARK in + for i = 0 to (Array.length needed_libs) - 1 do + let fixup = + new_fixup ("needed library name fixup: " ^ needed_libs.(i)) + in + dynamic_needed_frags.(i) <- + elf32_dyn_frag (DT_NEEDED, SUB (M_POS fixup, + M_POS dynstr_section_fixup)); + dynamic_needed_strtab_frags.(i) <- + DEF (fixup, ZSTRING needed_libs.(i)) + done; + (SEQ [| + SEQ dynamic_needed_frags; + elf32_dyn_frag (DT_STRTAB, M_POS dynstr_section_fixup); + elf32_dyn_frag (DT_STRSZ, M_SZ dynstr_section_fixup); + + elf32_dyn_frag (DT_SYMTAB, M_POS dynsym_section_fixup); + elf32_dyn_frag (DT_SYMENT, IMM elf32_symsize); + + elf32_dyn_frag (DT_HASH, M_POS hash_section_fixup); + elf32_dyn_frag (DT_PLTGOT, M_POS got_plt_section_fixup); + + elf32_dyn_frag (DT_PLTREL, IMM (elf32_num_of_dyn_tag DT_RELA)); + elf32_dyn_frag (DT_PLTRELSZ, M_SZ rela_plt_section_fixup); + elf32_dyn_frag (DT_JMPREL, M_POS rela_plt_section_fixup); + + elf32_dyn_frag (DT_NULL, IMM 0L) + |]) + in + + let null_strtab_fixup = new_fixup "null dynstrtab entry" in + let null_strtab_frag = DEF (null_strtab_fixup, ZSTRING "") in + let null_symtab_frag = (symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: null_strtab_fixup + ~sym_target_fixup: None + ~st_bind: STB_LOCAL + ~st_type: STT_NOTYPE + ~st_shndx: 0L) in + + let dynsym_frags = (null_symtab_frag :: + (require_symtab_frags @ + global_text_symtab_frags @ + local_text_symtab_frags @ + rodata_symtab_frags @ + data_symtab_frags)) + in + + let dynstr_frags = (null_strtab_frag :: + (require_strtab_frags @ + global_text_strtab_frags @ + local_text_strtab_frags @ + rodata_strtab_frags @ + data_strtab_frags @ + (Array.to_list dynamic_needed_strtab_frags))) + in + + let interp_section = + DEF (interp_section_fixup, ZSTRING "/lib/ld-linux.so.2") + in + + let text_section = + DEF (text_section_fixup, + SEQ (Array.of_list text_body_frags)) + in + let rodata_section = + DEF (rodata_section_fixup, + SEQ (Array.of_list rodata_body_frags)) + in + let data_section = + DEF (data_section_fixup, + SEQ (Array.of_list data_body_frags)) + in + let bss_section = + DEF (bss_section_fixup, + SEQ [| |]) + in + let dynsym_section = + DEF (dynsym_section_fixup, + SEQ (Array.of_list dynsym_frags)) + in + let dynstr_section = + DEF (dynstr_section_fixup, + SEQ (Array.of_list dynstr_frags)) + in + + let hash_section = + let n_syms = !n_syms in + + DEF (hash_section_fixup, + (* Worst hashtable ever: one chain. *) + SEQ [| + WORD (TY_u32, IMM 1L); (* nbucket *) + WORD (TY_u32, (* nchain *) + IMM (Int64.of_int n_syms)); + WORD (TY_u32, IMM 1L); (* bucket 0 => symbol 1. *) + SEQ + begin + Array.init + n_syms + (fun i -> + let next = (* chain[i] => if last then 0 else i+1 *) + if i > 0 && i < (n_syms-1) + then Int64.of_int (i+1) + else 0L + in + WORD (TY_u32, IMM next)) + end; + |]) + in + + let plt_section = + DEF (plt_section_fixup, + SEQ (Array.of_list plt_frags)) + in + + let got_plt_section = + DEF (got_plt_section_fixup, + SEQ (Array.of_list got_plt_frags)) + in + + let rela_plt_section = + DEF (rela_plt_section_fixup, + SEQ (Array.of_list rela_plt_frags)) + in + + let dynamic_section = + DEF (dynamic_section_fixup, dynamic_frags) + in + + let note_rust_section = + DEF (note_rust_section_fixup, + (Asm.note_rust_frags crate.node.Ast.crate_meta)) + in + + + let page_alignment = 0x1000 in + + let align_both i = + ALIGN_FILE (page_alignment, + (ALIGN_MEM (page_alignment, i))) + in + + let def_aligned f i = + align_both + (SEQ [| DEF(f,i); + (align_both MARK)|]) + in + + let debug_aranges_section = + def_aligned + sem.Semant.ctxt_debug_aranges_fixup + dwarf.Dwarf.debug_aranges + in + let debug_pubnames_section = + def_aligned + sem.Semant.ctxt_debug_pubnames_fixup + dwarf.Dwarf.debug_pubnames + in + let debug_info_section = + def_aligned + sem.Semant.ctxt_debug_info_fixup + dwarf.Dwarf.debug_info + in + let debug_abbrev_section = + def_aligned + sem.Semant.ctxt_debug_abbrev_fixup + dwarf.Dwarf.debug_abbrev + in + let debug_line_section = + def_aligned + sem.Semant.ctxt_debug_line_fixup + dwarf.Dwarf.debug_line + in + let debug_frame_section = + def_aligned sem.Semant.ctxt_debug_frame_fixup dwarf.Dwarf.debug_frame + in + + let load_address = 0x0804_8000L in + + SEQ + [| + MEMPOS load_address; + ALIGN_FILE + (segment_2_align, + DEF + (segment_2_fixup, + SEQ + [| + DEF (sem.Semant.ctxt_image_base_fixup, MARK); + elf_header; + ALIGN_FILE + (segment_0_align, + DEF + (segment_0_fixup, + SEQ + [| + DEF (program_header_table_fixup, + program_header_table); + |])); + ALIGN_FILE + (segment_1_align, + DEF (segment_1_fixup, interp_section)); + text_section; + rodata_section; + dynsym_section; + dynstr_section; + hash_section; + plt_section; + rela_plt_section; + debug_aranges_section; + debug_pubnames_section; + debug_info_section; + debug_abbrev_section; + debug_line_section; + debug_frame_section; + |])); + ALIGN_FILE + (segment_3_align, + DEF + (segment_3_fixup, + SEQ + [| + data_section; + got_plt_section; + bss_section; + ALIGN_FILE + (segment_4_align, + DEF (segment_4_fixup, + dynamic_section)); + ALIGN_FILE + (segment_5_align, + DEF (segment_5_fixup, + note_rust_section)); + |])); + DEF (shstrtab_section_fixup, + shstrtab_section); + DEF (section_header_table_fixup, + section_header_table); + |] +;; + +let emit_file + (sess:Session.sess) + (crate:Ast.crate) + (code:Asm.frag) + (data:Asm.frag) + (sem:Semant.ctxt) + (dwarf:Dwarf.debug_records) + : unit = + + let text_frags = Hashtbl.create 4 in + let rodata_frags = Hashtbl.create 4 in + let data_frags = Hashtbl.create 4 in + let required_fixups = Hashtbl.create 4 in + + (* + * Startup on elf-linux is more complex than in win32. It's + * thankfully documented in some detail around the net. + * + * - The elf entry address is for _start. + * + * - _start pushes: + * + * eax (should be zero) + * esp (holding the kernel-provided stack end) + * edx (address of _rtld_fini) + * address of _fini + * address of _init + * ecx (argv) + * esi (argc) + * address of main + * + * and then calls __libc_start_main@plt. + * + * - This means any sensible binary has a PLT. Fun. So + * We call into the PLT, which itself is just a bunch + * of indirect jumps through slots in the GOT, and wind + * up in __libc_start_main. Which calls _init, then + * essentially exit(main(argc,argv)). + *) + + + let init_fixup = new_fixup "_init function entry" in + let fini_fixup = new_fixup "_fini function entry" in + let (start_fixup, rust_start_fixup) = + if sess.Session.sess_library_mode + then (None, None) + else (Some (new_fixup "start function entry"), + Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start")) + in + let libc_start_main_fixup = new_fixup "__libc_start_main@plt stub" in + + let start_fn _ = + let start_fixup = + match start_fixup with + None -> bug () "missing start fixup in non-library mode" + | Some s -> s + in + let e = X86.new_emitter_without_vregs () in + let push_r32 r = Il.emit e + (Il.Push (Il.Cell (Il.Reg (Il.Hreg r, Il.ValTy Il.Bits32)))) + in + let push_pos32 = X86.push_pos32 e in + + Il.emit e (Il.unary Il.UMOV (X86.rc X86.ebp) (X86.immi 0L)); + Il.emit e (Il.Pop (X86.rc X86.esi)); + Il.emit e (Il.unary Il.UMOV (X86.rc X86.ecx) (X86.ro X86.esp)); + Il.emit e (Il.binary Il.AND + (X86.rc X86.esp) (X86.ro X86.esp) + (X86.immi 0xfffffffffffffff0L)); + + push_r32 X86.eax; + push_r32 X86.esp; + push_r32 X86.edx; + push_pos32 fini_fixup; + push_pos32 init_fixup; + push_r32 X86.ecx; + push_r32 X86.esi; + push_pos32 start_fixup; + Il.emit e (Il.call + (Il.Reg (Il.Hreg X86.eax, Il.ValTy Il.Bits32)) + (Il.direct_code_ptr libc_start_main_fixup)); + X86.frags_of_emitted_quads sess e + in + + let do_nothing_fn _ = + let e = X86.new_emitter_without_vregs () in + Il.emit e Il.Ret; + X86.frags_of_emitted_quads sess e + in + + let main_fn _ = + match (start_fixup, rust_start_fixup, sem.Semant.ctxt_main_fn_fixup) with + (None, _, _) + | (_, None, _) + | (_, _, None) -> MARK + | (Some start_fixup, + Some rust_start_fixup, + Some main_fn_fixup) -> + let e = X86.new_emitter_without_vregs () in + X86.objfile_start e + ~start_fixup + ~rust_start_fixup + ~main_fn_fixup + ~crate_fixup: sem.Semant.ctxt_crate_fixup + ~indirect_start: false; + X86.frags_of_emitted_quads sess e + in + + let needed_libs = + [| + "libc.so.6"; + "librustrt.so" + |] + in + + let _ = + if not sess.Session.sess_library_mode + then + begin + htab_put text_frags (Some "_start") (start_fn()); + htab_put text_frags (Some "_init") + (DEF (init_fixup, do_nothing_fn())); + htab_put text_frags (Some "_fini") + (DEF (fini_fixup, do_nothing_fn())); + htab_put text_frags (Some "main") (main_fn ()); + htab_put required_fixups "__libc_start_main" libc_start_main_fixup; + end; + htab_put text_frags None code; + htab_put rodata_frags None data; + + Hashtbl.iter + begin + fun _ tab -> + Hashtbl.iter + begin + fun name fixup -> + htab_put required_fixups name fixup + end + tab + end + sem.Semant.ctxt_native_required + in + let all_frags = + elf32_linux_x86_file + ~sess + ~crate + ~entry_name: "_start" + ~text_frags + ~data_frags + ~dwarf + ~sem + ~rodata_frags + ~required_fixups + ~needed_libs + in + write_out_frag sess true all_frags +;; + +let elf_magic = "\x7fELF";; + +let sniff + (sess:Session.sess) + (filename:filename) + : asm_reader option = + try + let stat = Unix.stat filename in + if (stat.Unix.st_kind = Unix.S_REG) && + (stat.Unix.st_size > 4) + then + let ar = new_asm_reader sess filename in + let _ = log sess "sniffing ELF file" in + if (ar.asm_get_zstr_padded 4) = elf_magic + then (ar.asm_seek 0; Some ar) + else None + else + None + with + _ -> None +;; + +let get_sections + (sess:Session.sess) + (ar:asm_reader) + : (string,(int*int)) Hashtbl.t = + let sects = Hashtbl.create 0 in + let _ = log sess "reading sections" in + let elf_id = ar.asm_get_zstr_padded 4 in + let _ = assert (elf_id = elf_magic) in + + let _ = ar.asm_seek 0x10 in + let _ = ar.asm_adv_u16 () in (* e_type *) + let _ = ar.asm_adv_u16 () in (* e_machine *) + let _ = ar.asm_adv_u32 () in (* e_version *) + let _ = ar.asm_adv_u32 () in (* e_entry *) + let _ = ar.asm_adv_u32 () in (* e_phoff *) + let e_shoff = ar.asm_get_u32 () in (* e_shoff *) + let _ = ar.asm_adv_u32 () in (* e_flags *) + let _ = ar.asm_adv_u16 () in (* e_ehsize *) + let _ = ar.asm_adv_u16 () in (* e_phentsize *) + let _ = ar.asm_adv_u16 () in (* e_phnum *) + let e_shentsize = ar.asm_get_u16 () in + let e_shnum = ar.asm_get_u16 () in + let e_shstrndx = ar.asm_get_u16 () in + let _ = log sess + "%d ELF section headers, %d bytes each, starting at 0x%x" + e_shnum e_shentsize e_shoff + in + let _ = log sess "section %d is .shstrtab" e_shstrndx in + + let read_section_hdr n = + let _ = ar.asm_seek (e_shoff + n * e_shentsize) in + let str_off = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in (* sh_type *) + let _ = ar.asm_adv_u32() in (* sh_flags *) + let _ = ar.asm_adv_u32() in (* sh_addr *) + let off = ar.asm_get_u32() in (* sh_off *) + let size = ar.asm_get_u32() in (* sh_size *) + let _ = ar.asm_adv_u32() in (* sh_link *) + let _ = ar.asm_adv_u32() in (* sh_info *) + let _ = ar.asm_adv_u32() in (* sh_addralign *) + let _ = ar.asm_adv_u32() in (* sh_entsize *) + (str_off, off, size) + in + + let (_, str_base, _) = read_section_hdr e_shstrndx in + + let _ = ar.asm_seek e_shoff in + for i = 0 to (e_shnum - 1) do + let (str_off, off, size) = read_section_hdr i in + let _ = ar.asm_seek (str_base + str_off) in + let name = ar.asm_get_zstr() in + log sess "section %d: %s, size %d, offset 0x%x" i name size off; + Hashtbl.add sects name (off, size); + done; + sects +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/il.ml b/src/boot/be/il.ml new file mode 100644 index 00000000..e095e627 --- /dev/null +++ b/src/boot/be/il.ml @@ -0,0 +1,1135 @@ +open Common;; + +(* FIXME (issue #1): thread a session object through this eventually. *) +let log_iltypes = ref false;; + +(* IL type system, very rudimentary. *) + +type bits = + Bits8 + | Bits16 + | Bits32 + | Bits64 +;; + +type scalar_ty = + ValTy of bits + | AddrTy of referent_ty + +and referent_ty = + ScalarTy of scalar_ty + | StructTy of referent_ty array + | UnionTy of referent_ty array + | ParamTy of ty_param_idx (* Thing of current-frame type-param #n *) + | OpaqueTy (* Unknown memory-resident thing. *) + | CodeTy (* Executable machine code. *) + | NilTy (* 0 bits of space. *) +;; + +let (voidptr_t:scalar_ty) = AddrTy OpaqueTy;; +let (codeptr_t:scalar_ty) = AddrTy CodeTy;; + +(* Operands. *) + +type vreg = int ;; +type hreg = int ;; +type label = int ;; +type spill = int ;; + +type reg = + Vreg of vreg + | Hreg of hreg +;; + +type mem = + Abs of Asm.expr64 + | RegIn of (reg * (Asm.expr64 option)) + | Spill of spill +;; + +type typed_reg = (reg * scalar_ty);; +type typed_mem = (mem * referent_ty);; +type typed_imm = (Asm.expr64 * ty_mach);; +type typed_imm_ptr = (fixup * referent_ty);; + +type cell = + Reg of typed_reg + | Mem of typed_mem +;; + +(* + * ImmPtr (a, rty) can be assigned to anything of scalar_ty + * AddrTy rty; the difference is that ImmAddr carries its value + * so can be used in cases where we want to have an immediate + * address constant-propagated through the code to the backend. + *) +type operand = + Cell of cell + | Imm of typed_imm + | ImmPtr of typed_imm_ptr +;; + + +type code = + CodeLabel of label (* Index into current quad block. *) + | CodePtr of operand + | CodeNone +;; + +(* NB: for the most part, we let the register allocator assign spills + * from vregs, and we permanently allocate aliased slots to stack + * locations by static aliasing information early, in layout. + * + * The one awkward case this doesn't handle is when someone tries to + * pass a literal-atom to an alias-slot. This *requires* a memory slot + * but we only realize it rather late, much later than we'd normally + * have thougt to desugar the literal into a temporary. + * + * So in these cases, we let the trans module explicitly demand a + * "Spill n" operand, which the register allocator mops up before it + * gets started on the vregs. + * + * NOTE: if we were more clever we'd integrate vregs and spills like + * this together along with the general notion of a temporary way back + * at the desugaring stage, and use some kind of size-class + * consolidation so that spills with non-overlapping lifetimes could + * share memory. But we're not that clever yet. + *) + + +(* Helpers. *) + +let direct_code_ptr fix = + (CodePtr (ImmPtr (fix, CodeTy))) +;; + +let cell_referent_ty c = + match c with + Reg (_, st) -> ScalarTy st + | Mem (_, rt) -> rt +;; + +let cell_is_nil c = + match c with + Mem (_, NilTy) -> true + | Reg (_, AddrTy NilTy) -> true + | _ -> false +;; + +let operand_is_nil o = + match o with + Cell c -> cell_is_nil c + | _ -> false +;; + +let mem_off (mem:mem) (off:Asm.expr64) : mem = + let addto e = Asm.ADD (off, e) in + match mem with + Abs e -> Abs (addto e) + | RegIn (r, None) -> RegIn (r, Some off) + | RegIn (r, Some e) -> RegIn (r, Some (addto e)) + | Spill _ -> bug () "Adding offset to spill slot" +;; + +let mem_off_imm (mem:mem) (imm:int64) : mem = + mem_off mem (Asm.IMM imm) +;; + + +(* Quads. *) + +type binop = + ADD | SUB + | IMUL | UMUL + | IDIV | UDIV + | IMOD | UMOD + | AND | OR | XOR + | LSL | LSR | ASR +;; + +type unop = + NEG | NOT + | UMOV | IMOV + | ZERO +;; + +type jmpop = + JE | JNE + | JZ | JNZ (* FIXME: Synonyms with JE/JNE in x86, others? *) + | JL | JLE | JG | JGE (* Signed. *) + | JB | JBE | JA | JAE (* Unsigned. *) + | JC | JNC | JO | JNO + | JMP +;; + +type binary = + { + binary_op: binop; + binary_dst: cell; + binary_lhs: operand; + binary_rhs: operand + } +;; + +type unary = + { + unary_op: unop; + unary_dst: cell; + unary_src: operand + } +;; + +type cmp = + { + cmp_lhs: operand; + cmp_rhs: operand + } +;; + +type lea = + { + lea_dst: cell; + lea_src: operand + } +;; + +type jmp = + { + jmp_op: jmpop; + jmp_targ: code; + } +;; + +type call = + { + call_dst: cell; + call_targ: code + } + +type quad' = + Binary of binary + | Unary of unary + | Lea of lea + | Cmp of cmp + | Jmp of jmp + | Push of operand + | Pop of cell + | Call of call + | Debug (* Debug-break pseudo-instruction. *) + | Enter of fixup (* Enter-fixup-block pseudo-instruction. *) + | Leave (* Leave-fixup-block pseudo-instruction. *) + | Ret (* Return to caller. *) + | Nop (* Keep this quad here, emit CPU nop. *) + | Dead (* Keep this quad but emit nothing. *) + | Regfence (* Clobber all hregs. *) + | End (* Space past the end of quads to emit. *) +;; + +type quad = + { quad_fixup: fixup option; + quad_implicits: label list; + quad_body: quad'; } + +type quads = quad array ;; + +(* Query functions. *) + +let cell_is_scalar (c:cell) : bool = + match c with + Reg (_, _) -> true + | Mem (_, ScalarTy _) -> true + | _ -> false +;; + + +let bits_of_ty_mach (tm:ty_mach) : bits = + match tm with + | TY_u8 -> Bits8 + | TY_i8 -> Bits8 + | TY_u16 -> Bits16 + | TY_i16 -> Bits16 + | TY_u32 -> Bits32 + | TY_i32 -> Bits32 + | TY_u64 -> Bits64 + | TY_i64 -> Bits64 + | TY_f32 -> Bits32 + | TY_f64 -> Bits64 +;; + +let cell_scalar_ty (c:cell) : scalar_ty = + match c with + Reg (_, st) -> st + | Mem (_, ScalarTy st) -> st + | _ -> bug () "mem of non-scalar in Il.cell_scalar_ty" +;; + +let operand_scalar_ty (op:operand) : scalar_ty = + match op with + Cell c -> cell_scalar_ty c + | Imm (_, t) -> ValTy (bits_of_ty_mach t) + | ImmPtr (_, t) -> AddrTy t +;; + + +let scalar_ty_bits (word_bits:bits) (st:scalar_ty) : bits = + match st with + ValTy bits -> bits + | AddrTy _ -> word_bits +;; + +let cell_bits (word_bits:bits) (c:cell) : bits = + match c with + Reg (_, st) -> scalar_ty_bits word_bits st + | Mem (_, ScalarTy st) -> scalar_ty_bits word_bits st + | Mem _ -> bug () "mem of non-scalar in Il.cell_bits" +;; + +let operand_bits (word_bits:bits) (op:operand) : bits = + match op with + Cell cell -> cell_bits word_bits cell + | Imm (_, tm) -> bits_of_ty_mach tm + | ImmPtr _ -> word_bits +;; + +let bits_size (bits:bits) : int64 = + match bits with + Bits8 -> 1L + | Bits16 -> 2L + | Bits32 -> 4L + | Bits64 -> 8L +;; + +let bits_align (bits:bits) : int64 = + match bits with + Bits8 -> 1L + | Bits16 -> 2L + | Bits32 -> 4L + | Bits64 -> 8L +;; + +let scalar_ty_size (word_bits:bits) (st:scalar_ty) : int64 = + bits_size (scalar_ty_bits word_bits st) +;; + +let scalar_ty_align (word_bits:bits) (st:scalar_ty) : int64 = + bits_align (scalar_ty_bits word_bits st) +;; + +let rec referent_ty_layout (word_bits:bits) (rt:referent_ty) : (size * size) = + match rt with + ScalarTy st -> (SIZE_fixed (scalar_ty_size word_bits st), + SIZE_fixed (scalar_ty_align word_bits st)) + | StructTy rts -> + begin + let accum (off,align) rt : (size * size) = + let (elt_size, elt_align) = referent_ty_layout word_bits rt in + let elt_off = align_sz elt_align off in + (add_sz elt_off elt_size, max_sz elt_align align) + in + Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts + end + | UnionTy rts -> + begin + let accum (sz,align) rt : (size * size) = + let (elt_size, elt_align) = referent_ty_layout word_bits rt in + (max_sz sz elt_size, max_sz elt_align align) + in + Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts + end + | OpaqueTy -> bug () "opaque ty in referent_ty_layout" + | CodeTy -> bug () "code ty in referent_ty_layout" + | ParamTy i -> (SIZE_param_size i, SIZE_param_align i) + | NilTy -> (SIZE_fixed 0L, SIZE_fixed 1L) + +and referent_ty_size (word_bits:bits) (rt:referent_ty) : size = + (fst (referent_ty_layout word_bits rt)) + +and referent_ty_align (word_bits:bits) (rt:referent_ty) : size = + (snd (referent_ty_layout word_bits rt)) + +;; + +let get_element_offset + (word_bits:bits) + (elts:referent_ty array) + (i:int) + : size = + let elts_before = Array.sub elts 0 i in + let elt_rty = elts.(i) in + let elts_before_size = referent_ty_size word_bits (StructTy elts_before) in + let elt_align = referent_ty_align word_bits elt_rty in + let elt_off = align_sz elt_align elts_before_size in + elt_off +;; + +(* Processor. *) + +type quad_processor = + { qp_reg: (quad_processor -> reg -> reg); + qp_mem: (quad_processor -> mem -> mem); + qp_cell_read: (quad_processor -> cell -> cell); + qp_cell_write: (quad_processor -> cell -> cell); + qp_code: (quad_processor -> code -> code); + qp_op: (quad_processor -> operand -> operand); } +;; + +let identity_processor = + let qp_cell = (fun qp c -> match c with + Reg (r, b) -> Reg (qp.qp_reg qp r, b) + | Mem (a, b) -> Mem (qp.qp_mem qp a, b)) + in + { qp_reg = (fun _ r -> r); + qp_mem = (fun qp a -> match a with + RegIn (r, o) -> RegIn (qp.qp_reg qp r, o) + | Abs _ + | Spill _ -> a); + qp_cell_read = qp_cell; + qp_cell_write = qp_cell; + qp_code = (fun qp c -> match c with + CodePtr op -> CodePtr (qp.qp_op qp op) + | CodeLabel _ + | CodeNone -> c); + qp_op = (fun qp op -> match op with + Cell c -> Cell (qp.qp_cell_read qp c) + | ImmPtr _ -> op + | Imm _ -> op) } +;; + +let process_quad (qp:quad_processor) (q:quad) : quad = + { q with + quad_body = match q.quad_body with + Binary b -> + Binary { b with + binary_dst = qp.qp_cell_write qp b.binary_dst; + binary_lhs = qp.qp_op qp b.binary_lhs; + binary_rhs = qp.qp_op qp b.binary_rhs } + | Unary u -> + Unary { u with + unary_dst = qp.qp_cell_write qp u.unary_dst; + unary_src = qp.qp_op qp u.unary_src } + + | Lea le -> + Lea { lea_dst = qp.qp_cell_write qp le.lea_dst; + lea_src = qp.qp_op qp le.lea_src } + + | Cmp c -> + Cmp { cmp_lhs = qp.qp_op qp c.cmp_lhs; + cmp_rhs = qp.qp_op qp c.cmp_rhs } + + | Jmp j -> + Jmp { j with + jmp_targ = qp.qp_code qp j.jmp_targ } + + | Push op -> + Push (qp.qp_op qp op) + + | Pop c -> + Pop (qp.qp_cell_write qp c) + + | Call c -> + Call { call_dst = qp.qp_cell_write qp c.call_dst; + call_targ = qp.qp_code qp c.call_targ } + + | Ret -> Ret + | Nop -> Nop + | Debug -> Debug + | Regfence -> Regfence + | Enter f -> Enter f + | Leave -> Leave + | Dead -> Dead + | End -> End } +;; + +let visit_quads (qp:quad_processor) (qs:quads) : unit = + Array.iter (fun x ->ignore ( process_quad qp x); ()) qs +;; + +let process_quads (qp:quad_processor) (qs:quads) : quads = + Array.map (process_quad qp) qs +;; + +let rewrite_quads (qp:quad_processor) (qs:quads) : unit = + for i = 0 to ((Array.length qs) - 1) do + qs.(i) <- process_quad qp qs.(i) + done +;; + + +(* A little partial-evaluator to help lowering sizes. *) + +let rec size_to_expr64 (a:size) : Asm.expr64 option = + let binary a b f = + match (size_to_expr64 a, size_to_expr64 b) with + (Some a, Some b) -> Some (f a b) + | _ -> None + in + match a with + SIZE_fixed i -> Some (Asm.IMM i) + | SIZE_fixup_mem_sz f -> Some (Asm.M_SZ f) + | SIZE_fixup_mem_pos f -> Some (Asm.M_POS f) + | SIZE_rt_neg s -> + begin + match (size_to_expr64 s) with + None -> None + | Some s -> Some (Asm.NEG s) + end + | SIZE_rt_add (a, b) -> binary a b (fun a b -> Asm.ADD (a,b)) + | SIZE_rt_mul (a, b) -> binary a b (fun a b -> Asm.MUL (a,b)) + | SIZE_rt_max (a, b) -> binary a b (fun a b -> Asm.MAX (a,b)) + | SIZE_rt_align (a, b) -> binary a b (fun a b -> Asm.ALIGN (a,b)) + | _ -> None +;; + + +(* Formatters. *) + +let string_of_bits (b:bits) : string = + match b with + Bits8 -> "b8" + | Bits16 -> "b16" + | Bits32 -> "b32" + | Bits64 -> "b64" +;; + +let rec string_of_scalar_ty (s:scalar_ty) : string = + match s with + ValTy b -> (string_of_bits b) + | AddrTy r -> (string_of_referent_ty r) ^ "*" + +and string_of_referent_ty (r:referent_ty) : string = + match r with + ScalarTy s -> (string_of_scalar_ty s) + | StructTy rs -> + Printf.sprintf "[%s]" + (String.concat "," + (Array.to_list (Array.map string_of_referent_ty rs))) + | UnionTy rs -> + Printf.sprintf "(%s)" + (String.concat "|" + (Array.to_list (Array.map string_of_referent_ty rs))) + | ParamTy i -> Printf.sprintf "#%d" i + | OpaqueTy -> "?" + | CodeTy -> "!" + | NilTy -> "()" +;; + + +type hreg_formatter = hreg -> string;; + +let string_of_reg (f:hreg_formatter) (r:reg) : string = + match r with + Vreg i -> Printf.sprintf "<v%d>" i + | Hreg i -> f i +;; + +let rec string_of_expr64 (e64:Asm.expr64) : string = + let bin op a b = + Printf.sprintf "(%s %s %s)" (string_of_expr64 a) op (string_of_expr64 b) + in + let bini op a b = + Printf.sprintf "(%s %s %d)" (string_of_expr64 a) op b + in + match e64 with + Asm.IMM i when (i64_lt i 0L) -> Printf.sprintf "-0x%Lx" (Int64.neg i) + | Asm.IMM i -> Printf.sprintf "0x%Lx" i + | Asm.ADD (a,b) -> bin "+" a b + | Asm.SUB (a,b) -> bin "-" a b + | Asm.MUL (a,b) -> bin "*" a b + | Asm.DIV (a,b) -> bin "/" a b + | Asm.REM (a,b) -> bin "%" a b + | Asm.MAX (a,b) -> + Printf.sprintf "(max %s %s)" + (string_of_expr64 a) (string_of_expr64 b) + | Asm.ALIGN (a,b) -> + Printf.sprintf "(align %s %s)" + (string_of_expr64 a) (string_of_expr64 b) + | Asm.SLL (a,b) -> bini "<<" a b + | Asm.SLR (a,b) -> bini ">>" a b + | Asm.SAR (a,b) -> bini ">>>" a b + | Asm.AND (a,b) -> bin "&" a b + | Asm.XOR (a,b) -> bin "xor" a b + | Asm.OR (a,b) -> bin "|" a b + | Asm.NOT a -> Printf.sprintf "(not %s)" (string_of_expr64 a) + | Asm.NEG a -> Printf.sprintf "-%s" (string_of_expr64 a) + | Asm.F_POS f -> Printf.sprintf "<%s>.fpos" f.fixup_name + | Asm.F_SZ f -> Printf.sprintf "<%s>.fsz" f.fixup_name + | Asm.M_POS f -> Printf.sprintf "<%s>.mpos" f.fixup_name + | Asm.M_SZ f -> Printf.sprintf "<%s>.msz" f.fixup_name + | Asm.EXT _ -> "??ext??" +;; + +let string_of_off (e:Asm.expr64 option) : string = + match e with + None -> "" + | Some (Asm.IMM i) when (i64_lt i 0L) -> + Printf.sprintf " - 0x%Lx" (Int64.neg i) + | Some e' -> " + " ^ (string_of_expr64 e') +;; + +let string_of_mem (f:hreg_formatter) (a:mem) : string = + match a with + Abs e -> + Printf.sprintf "[%s]" (string_of_expr64 e) + | RegIn (r, off) -> + Printf.sprintf "[%s%s]" (string_of_reg f r) (string_of_off off) + | Spill i -> + Printf.sprintf "[<spill %d>]" i +;; +let string_of_cell (f:hreg_formatter) (c:cell) : string = + match c with + Reg (r,ty) -> + if !log_iltypes + then + Printf.sprintf "%s:%s" (string_of_reg f r) (string_of_scalar_ty ty) + else + Printf.sprintf "%s" (string_of_reg f r) + | Mem (a,ty) -> + if !log_iltypes + then + Printf.sprintf "%s:%s" + (string_of_mem f a) (string_of_referent_ty ty) + else + Printf.sprintf "%s" (string_of_mem f a) +;; + +let string_of_operand (f:hreg_formatter) (op:operand) : string = + match op with + Cell c -> string_of_cell f c + | ImmPtr (f, ty) -> + if !log_iltypes + then + Printf.sprintf "$<%s>.mpos:%s*" + f.fixup_name (string_of_referent_ty ty) + else + Printf.sprintf "$<%s>.mpos" f.fixup_name + | Imm (i, ty) -> + if !log_iltypes + then + Printf.sprintf "$%s:%s" (string_of_expr64 i) (string_of_ty_mach ty) + else + Printf.sprintf "$%s" (string_of_expr64 i) +;; + + +let string_of_code (f:hreg_formatter) (c:code) : string = + match c with + CodeLabel lab -> Printf.sprintf "<label %d>" lab + | CodePtr op -> string_of_operand f op + | CodeNone -> "<none>" +;; + + +let string_of_binop (op:binop) : string = + match op with + ADD -> "add" + | SUB -> "sub" + | IMUL -> "imul" + | UMUL -> "umul" + | IDIV -> "idiv" + | UDIV -> "udiv" + | IMOD -> "imod" + | UMOD -> "umod" + | AND -> "and" + | OR -> "or" + | XOR -> "xor" + | LSL -> "lsl" + | LSR -> "lsr" + | ASR -> "asr" +;; + +let string_of_unop (op:unop) : string = + match op with + NEG -> "neg" + | NOT -> "not" + | UMOV -> "umov" + | IMOV -> "imov" + | ZERO -> "zero" +;; + +let string_of_jmpop (op:jmpop) : string = + match op with + JE -> "je" + | JNE -> "jne" + | JL -> "jl" + | JLE -> "jle" + | JG -> "jg" + | JGE -> "jge" + | JB -> "jb" + | JBE -> "jbe" + | JA -> "ja" + | JAE -> "jae" + | JC -> "jc" + | JNC ->"jnc" + | JO -> "jo" + | JNO -> "jno" + | JZ -> "jz" + | JNZ ->"jnz" + | JMP -> "jmp" +;; + +let string_of_quad (f:hreg_formatter) (q:quad) : string = + match q.quad_body with + Binary b -> + Printf.sprintf "%s = %s %s %s" + (string_of_cell f b.binary_dst) + (string_of_operand f b.binary_lhs) + (string_of_binop b.binary_op) + (string_of_operand f b.binary_rhs) + + | Unary u -> + Printf.sprintf "%s = %s %s" + (string_of_cell f u.unary_dst) + (string_of_unop u.unary_op) + (string_of_operand f u.unary_src) + + | Cmp c -> + Printf.sprintf "cmp %s %s" + (string_of_operand f c.cmp_lhs) + (string_of_operand f c.cmp_rhs) + + | Lea le -> + Printf.sprintf "lea %s %s" + (string_of_cell f le.lea_dst) + (string_of_operand f le.lea_src) + + | Jmp j -> + Printf.sprintf "%s %s" + (string_of_jmpop j.jmp_op) + (string_of_code f j.jmp_targ) + + | Push op -> + Printf.sprintf "push %s" + (string_of_operand f op) + + | Pop c -> + Printf.sprintf "%s = pop" + (string_of_cell f c) + + | Call c -> + Printf.sprintf "%s = call %s" + (string_of_cell f c.call_dst) + (string_of_code f c.call_targ) + + | Ret -> "ret" + | Nop -> "nop" + | Dead -> "dead" + | Debug -> "debug" + | Regfence -> "regfence" + | Enter _ -> "enter lexical block" + | Leave -> "leave lexical block" + | End -> "---" +;; + + + +(* Emitters. *) + + +type emitter = { mutable emit_pc: int; + mutable emit_next_vreg: int option; + mutable emit_next_spill: int; + emit_preallocator: (quad' -> quad'); + emit_is_2addr: bool; + mutable emit_quads: quads; + emit_annotations: (int,string) Hashtbl.t; + emit_size_cache: ((size,operand) Hashtbl.t) Stack.t; + emit_node: node_id option; + } + + +let badq = { quad_fixup = None; + quad_implicits = []; + quad_body = End } +;; + + +let deadq = { quad_fixup = None; + quad_implicits = []; + quad_body = Dead } +;; + + +let new_emitter + (preallocator:quad' -> quad') + (is_2addr:bool) + (vregs_ok:bool) + (node:node_id option) + : emitter = + { + emit_pc = 0; + emit_next_vreg = (if vregs_ok then Some 0 else None); + emit_next_spill = 0; + emit_preallocator = preallocator; + emit_is_2addr = is_2addr; + emit_quads = Array.create 4 badq; + emit_annotations = Hashtbl.create 0; + emit_size_cache = Stack.create (); + emit_node = node; + } +;; + + +let num_vregs (e:emitter) : int = + match e.emit_next_vreg with + None -> 0 + | Some i -> i +;; + +let next_vreg_num (e:emitter) : vreg = + match e.emit_next_vreg with + None -> bug () "Il.next_vreg_num on non-vreg emitter" + | Some i -> + e.emit_next_vreg <- Some (i + 1); + i +;; + +let next_vreg (e:emitter) : reg = + Vreg (next_vreg_num e) +;; + +let next_vreg_cell (e:emitter) (s:scalar_ty) : cell = + Reg ((next_vreg e), s) +;; + +let next_spill (e:emitter) : spill = + let i = e.emit_next_spill in + e.emit_next_spill <- i + 1; + i +;; + +let next_spill_slot (e:emitter) (r:referent_ty) : typed_mem = + (Spill (next_spill e), r); +;; + + +let grow_if_necessary e = + let len = Array.length e.emit_quads in + if e.emit_pc >= len - 1 + then + let n = Array.create (2 * len) badq in + Array.blit e.emit_quads 0 n 0 len; + e.emit_quads <- n +;; + + +let binary (op:binop) (dst:cell) (lhs:operand) (rhs:operand) : quad' = + Binary { binary_op = op; + binary_dst = dst; + binary_lhs = lhs; + binary_rhs = rhs } +;; + +let unary (op:unop) (dst:cell) (src:operand) : quad' = + Unary { unary_op = op; + unary_dst = dst; + unary_src = src } + +let jmp (op:jmpop) (targ:code) : quad' = + Jmp { jmp_op = op; + jmp_targ = targ; } +;; + + +let lea (dst:cell) (src:operand) : quad' = + Lea { lea_dst = dst; + lea_src = src; } +;; + +let cmp (lhs:operand) (rhs:operand) : quad' = + Cmp { cmp_lhs = lhs; + cmp_rhs = rhs; } +;; + +let call (dst:cell) (targ:code) : quad' = + Call { call_dst = dst; + call_targ = targ; } +;; + +let umov (dst:cell) (src:operand) : quad' = + if (cell_is_nil dst || operand_is_nil src) + then Dead + else unary UMOV dst src +;; + +let zero (dst:cell) (count:operand) : quad' = + unary ZERO dst count +;; + +let is_mov uop = + match uop with + UMOV | IMOV -> true + | _ -> false +;; + +let mk_quad (q':quad') : quad = + { quad_body = q'; + quad_implicits = []; + quad_fixup = None } +;; + +let emit_full + (e:emitter) + (fix:fixup option) + (implicits:label list) + (q':quad') + : unit = + let fixup = ref fix in + let emit_quad_bottom q' = + grow_if_necessary e; + e.emit_quads.(e.emit_pc) <- { quad_body = q'; + quad_implicits = implicits; + quad_fixup = (!fixup) }; + fixup := None; + e.emit_pc <- e.emit_pc + 1 + in + + let emit_quad (q':quad') : unit = + (* re-decay any freshly generated mem-mem movs. *) + match q' with + Unary { unary_dst = Mem (dst_mem, ScalarTy src_st); + unary_src = Cell (Mem (src_mem, ScalarTy dst_st)); + unary_op = op } + when is_mov op -> + let v = next_vreg_cell e dst_st in + emit_quad_bottom + (unary op v (Cell (Mem (src_mem, ScalarTy src_st)))); + emit_quad_bottom + (unary op (Mem (dst_mem, ScalarTy dst_st)) (Cell v)) + | _ -> emit_quad_bottom q' + in + + let default_mov = + match q' with + Binary b -> + begin + match b.binary_op with + IDIV | IMUL | IMOD -> IMOV + | _ -> UMOV + end + | Unary u -> + begin + match u.unary_op with + IMOV -> IMOV + | _ -> UMOV + end + | _ -> UMOV + in + + let emit_mov (dst:cell) (src:operand) : unit = + emit_quad (unary default_mov dst src) + in + + let mov_if_operands_differ + (old_op:operand) (new_op:operand) + : unit = + if (new_op <> old_op) + then + match new_op with + (Cell new_cell) -> + emit_mov new_cell old_op + | _ -> () + in + + let mov_if_two_operands_differ + (old_lhs_op:operand) (new_lhs_op:operand) + (old_rhs_op:operand) (new_rhs_op:operand) + : unit = + (* + * This is sufficiently obscure that it deserves an explanation. + * + * The main idea here is to do two "mov_if_operands_differ" calls, + * such as one might have when setting up a binary quad. + * + * The problem comes when you happen to hit a case like X86 div, + * which preallocates *both* operands. Preallocating both means we + * have to potentially issue two movs into the preallocated regs, + * and the second of those movs might be a problem. Specifically: + * the second mov-to-prealloc might make be moving from a + * register-indirect mem cell based on a vreg, and that vreg may + * wind up being assigned to an hreg that we just loaded with the + * first mov. In other words, the second mov may retask the + * preallocated hreg we set up in the first mov. + * + * You laugh, but of course this actually happens. + * + * So here we do a conservative thing and check to see if either + * operand is memory-indirect at all. If either is, then for either + * of the 'old' operands we're *about* to mov into a prealloc reg, + * we first bounce them off a spill slot. Spill slots, thankfully, + * we can always count on being able to address irrespective of the + * opinions of the RA, as they are all just fp-relative. + * + * A slightly more aggressive version of this would only bounce + * cases that are not fp-relative already, though doing so would + * require threading the notion of what fp *is* through to + * here. Possibly tighten this up in the future (or just + * ... destroy this backend ASAP). + * + *) + let has_reg_indirect op = + match op with + Cell (Mem _) -> true + | _ -> false + in + let either_old_op_has_reg_indirect = + (has_reg_indirect old_lhs_op) || (has_reg_indirect old_rhs_op) + in + let old_lhs_op = + if either_old_op_has_reg_indirect && (new_lhs_op <> old_lhs_op) + then + let tmp = + Mem (next_spill_slot e + (ScalarTy (operand_scalar_ty old_lhs_op))) + in + emit_mov tmp old_lhs_op; + Cell tmp + else + old_lhs_op + in + let old_rhs_op = + if either_old_op_has_reg_indirect && (new_rhs_op <> old_rhs_op) + then + let tmp = + Mem (next_spill_slot e + (ScalarTy (operand_scalar_ty old_rhs_op))) + in + emit_mov tmp old_rhs_op; + Cell tmp + else + old_rhs_op + in + mov_if_operands_differ old_lhs_op new_lhs_op; + mov_if_operands_differ old_rhs_op new_rhs_op; + in + + let mov_if_cells_differ (old_cell:cell) (new_cell:cell) : unit = + if not (new_cell = old_cell) + then + emit_mov old_cell (Cell new_cell) + in + + let emit_decayed_quad q' = + match (q', e.emit_preallocator q') with + (Binary b, Binary b') -> + begin + mov_if_two_operands_differ + b.binary_lhs b'.binary_lhs + b.binary_rhs b'.binary_rhs; + if e.emit_is_2addr && + (not (b'.binary_lhs = (Cell b'.binary_dst))) + then + begin + emit_mov b'.binary_dst b'.binary_lhs; + emit_quad (Binary { b' with + binary_lhs = (Cell b'.binary_dst) }) + end + else + emit_quad (Binary b'); + mov_if_cells_differ b.binary_dst b'.binary_dst + end + + | (Unary u, Unary u') -> + mov_if_operands_differ u.unary_src u'.unary_src; + (* Assume '2addr' means '1addr' for unary ops. *) + if e.emit_is_2addr && + (u'.unary_op = NEG || u'.unary_op = NOT) && + (not (u'.unary_src = (Cell u'.unary_dst))) + then + begin + emit_mov u'.unary_dst u'.unary_src; + emit_quad (Unary { u' with unary_src = (Cell u'.unary_dst) }) + end + else + emit_quad (Unary u'); + mov_if_cells_differ u.unary_dst u'.unary_dst + + | (Cmp c, Cmp c') -> + mov_if_two_operands_differ + c.cmp_lhs c'.cmp_lhs + c.cmp_rhs c'.cmp_rhs; + emit_quad (Cmp c'); + + | (Push op, Push op') -> + mov_if_operands_differ op op'; + emit_quad (Push op'); + + | (Pop c, Pop c') -> + emit_quad (Pop c'); + mov_if_cells_differ c c' + + | (Call c, Call c') -> + emit_quad (Call c'); + mov_if_cells_differ c.call_dst c'.call_dst + + | (Lea lea, Lea lea') -> + emit_quad (Lea lea'); + mov_if_cells_differ lea.lea_dst lea'.lea_dst + + | (x, y) -> + assert (x = y); + emit_quad x + in + + (* pre-decay mem-mem movs. *) + match q' with + Unary { unary_dst = Mem (dst_mem, ScalarTy src_st); + unary_src = Cell (Mem (src_mem, ScalarTy dst_st)); + unary_op = op } + when is_mov op -> + let v = next_vreg_cell e dst_st in + emit_decayed_quad + (unary op v (Cell (Mem (src_mem, ScalarTy src_st)))); + emit_decayed_quad + (unary op (Mem (dst_mem, ScalarTy dst_st)) (Cell v)) + | _ -> emit_decayed_quad q' +;; + +let emit (e:emitter) (q':quad') : unit = + emit_full e None [] q' +;; + +let patch_jump (e:emitter) (jmp:int) (targ:int) : unit = + let q = e.emit_quads.(jmp) in + match q.quad_body with + Jmp j -> + assert (j.jmp_targ = CodeNone); + e.emit_quads.(jmp) <- + { q with quad_body = + Jmp { j with jmp_targ = CodeLabel targ } } + | _ -> () +;; + +(* More query functions. *) + +let get_element_ptr + (word_bits:bits) + (fmt:hreg_formatter) + (mem_cell:cell) + (i:int) + : cell = + match mem_cell with + Mem (mem, StructTy elts) when i >= 0 && i < (Array.length elts) -> + assert ((Array.length elts) != 0); + begin + let elt_rty = elts.(i) in + let elt_off = get_element_offset word_bits elts i in + match elt_off with + SIZE_fixed fixed_off -> + Mem (mem_off_imm mem fixed_off, elt_rty) + | _ -> bug () + "get_element_ptr %d on dynamic-size cell: offset %s" + i (string_of_size elt_off) + end + + | _ -> bug () "get_element_ptr %d on cell %s" i + (string_of_cell fmt mem_cell) +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/macho.ml b/src/boot/be/macho.ml new file mode 100644 index 00000000..7fccdfd3 --- /dev/null +++ b/src/boot/be/macho.ml @@ -0,0 +1,1184 @@ +open Asm;; +open Common;; + +(* Mach-O writer. *) + +let log (sess:Session.sess) = + Session.log "obj (mach-o)" + sess.Session.sess_log_obj + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_obj + then thunk () + else () +;; + +let (cpu_arch_abi64:int64) = 0x01000000L +;; + +let (mh_magic:int64) = 0xfeedfaceL +;; + +let cpu_subtype_intel (f:int64) (m:int64) : int64 = + Int64.add f (Int64.shift_left m 4) +;; + +type cpu_type = + (* Maybe support more later. *) + CPU_TYPE_X86 + | CPU_TYPE_X86_64 + | CPU_TYPE_ARM + | CPU_TYPE_POWERPC +;; + +type cpu_subtype = + (* Maybe support more later. *) + CPU_SUBTYPE_X86_ALL + | CPU_SUBTYPE_X86_64_ALL + | CPU_SUBTYPE_ARM_ALL + | CPU_SUBTYPE_POWERPC_ALL +;; + +type file_type = + MH_OBJECT + | MH_EXECUTE + | MH_FVMLIB + | MH_CORE + | MH_PRELOAD + | MH_DYLIB + | MH_DYLINKER + | MH_BUNDLE + | MH_DYLIB_STUB + | MH_DSYM +;; + +let file_type_code (ft:file_type) : int64 = + match ft with + MH_OBJECT ->0x1L (* object *) + | MH_EXECUTE -> 0x2L (* executable *) + | MH_FVMLIB -> 0x3L (* fixed-VM shared lib *) + | MH_CORE -> 0x4L (* core *) + | MH_PRELOAD -> 0x5L (* preloaded executable *) + | MH_DYLIB -> 0x6L (* dynamic lib *) + | MH_DYLINKER -> 0x7L (* dynamic linker *) + | MH_BUNDLE -> 0x8L (* bundle *) + | MH_DYLIB_STUB -> 0x9L (* shared lib stub *) + | MH_DSYM -> 0xaL (* debuginfo only *) +;; + +type file_flag = + MH_NOUNDEFS + | MH_INCRLINK + | MH_DYLDLINK + | MH_BINDATLOAD + | MH_PREBOUND + | MH_SPLIT_SEGS + | MH_LAZY_INIT + | MH_TWOLEVEL + | MH_FORCE_FLAT + | MH_NOMULTIDEFS + | MH_NOFIXPREBINDING + | MH_PREBINDABLE + | MH_ALLMODSBOUND + | MH_SUBSECTIONS_VIA_SYMBOLS + | MH_CANONICAL + | MH_WEAK_DEFINES + | MH_BINDS_TO_WEAK + | MH_ALLOW_STACK_EXECUTION + | MH_ROOT_SAFE + | MH_SETUID_SAFE + | MH_NO_REEXPORTED_DYLIBS + | MH_PIE +;; + +let file_flag_code (ff:file_flag) : int64 = + match ff with + MH_NOUNDEFS -> 0x1L + | MH_INCRLINK -> 0x2L + | MH_DYLDLINK -> 0x4L + | MH_BINDATLOAD -> 0x8L + | MH_PREBOUND -> 0x10L + | MH_SPLIT_SEGS -> 0x20L + | MH_LAZY_INIT -> 0x40L + | MH_TWOLEVEL -> 0x80L + | MH_FORCE_FLAT -> 0x100L + | MH_NOMULTIDEFS -> 0x200L + | MH_NOFIXPREBINDING -> 0x400L + | MH_PREBINDABLE -> 0x800L + | MH_ALLMODSBOUND -> 0x1000L + | MH_SUBSECTIONS_VIA_SYMBOLS -> 0x2000L + | MH_CANONICAL -> 0x4000L + | MH_WEAK_DEFINES -> 0x8000L + | MH_BINDS_TO_WEAK -> 0x10000L + | MH_ALLOW_STACK_EXECUTION -> 0x20000L + | MH_ROOT_SAFE -> 0x40000L + | MH_SETUID_SAFE -> 0x80000L + | MH_NO_REEXPORTED_DYLIBS -> 0x100000L + | MH_PIE -> 0x200000L +;; + + +type vm_prot = + VM_PROT_NONE + | VM_PROT_READ + | VM_PROT_WRITE + | VM_PROT_EXECUTE +;; + + +type load_command = + LC_SEGMENT + | LC_SYMTAB + | LC_SYMSEG + | LC_THREAD + | LC_UNIXTHREAD + | LC_LOADFVMLIB + | LC_IDFVMLIB + | LC_IDENT + | LC_FVMFILE + | LC_PREPAGE + | LC_DYSYMTAB + | LC_LOAD_DYLIB + | LC_ID_DYLIB + | LC_LOAD_DYLINKER + | LC_ID_DYLINKER + | LC_PREBOUND_DYLIB + | LC_ROUTINES + | LC_SUB_FRAMEWORK + | LC_SUB_UMBRELLA + | LC_SUB_CLIENT + | LC_SUB_LIBRARY + | LC_TWOLEVEL_HINTS + | LC_PREBIND_CKSUM + | LC_LOAD_WEAK_DYLIB + | LC_SEGMENT_64 + | LC_ROUTINES_64 + | LC_UUID + | LC_RPATH + | LC_CODE_SIGNATURE + | LC_SEGMENT_SPLIT_INFO + | LC_REEXPORT_DYLIB + | LC_LAZY_LOAD_DYLIB + | LC_ENCRYPTION_INFO +;; + + +let cpu_type_code (cpu:cpu_type) : int64 = + match cpu with + CPU_TYPE_X86 -> 7L + | CPU_TYPE_X86_64 -> Int64.logor 7L cpu_arch_abi64 + | CPU_TYPE_ARM -> 12L + | CPU_TYPE_POWERPC -> 18L +;; + +let cpu_subtype_code (cpu:cpu_subtype) : int64 = + match cpu with + CPU_SUBTYPE_X86_ALL -> 3L + | CPU_SUBTYPE_X86_64_ALL -> 3L + | CPU_SUBTYPE_ARM_ALL -> 0L + | CPU_SUBTYPE_POWERPC_ALL -> 0L +;; + + +let vm_prot_code (vmp:vm_prot) : int64 = + match vmp with + VM_PROT_NONE -> 0L + | VM_PROT_READ -> 1L + | VM_PROT_WRITE -> 2L + | VM_PROT_EXECUTE -> 4L +;; + + +let lc_req_dyld = 0x80000000L;; + +let load_command_code (lc:load_command) = + match lc with + | LC_SEGMENT -> 0x1L + | LC_SYMTAB -> 0x2L + | LC_SYMSEG -> 0x3L + | LC_THREAD -> 0x4L + | LC_UNIXTHREAD -> 0x5L + | LC_LOADFVMLIB -> 0x6L + | LC_IDFVMLIB -> 0x7L + | LC_IDENT -> 0x8L + | LC_FVMFILE -> 0x9L + | LC_PREPAGE -> 0xaL + | LC_DYSYMTAB -> 0xbL + | LC_LOAD_DYLIB -> 0xcL + | LC_ID_DYLIB -> 0xdL + | LC_LOAD_DYLINKER -> 0xeL + | LC_ID_DYLINKER -> 0xfL + | LC_PREBOUND_DYLIB -> 0x10L + | LC_ROUTINES -> 0x11L + | LC_SUB_FRAMEWORK -> 0x12L + | LC_SUB_UMBRELLA -> 0x13L + | LC_SUB_CLIENT -> 0x14L + | LC_SUB_LIBRARY -> 0x15L + | LC_TWOLEVEL_HINTS -> 0x16L + | LC_PREBIND_CKSUM -> 0x17L + | LC_LOAD_WEAK_DYLIB -> Int64.logor lc_req_dyld 0x18L + | LC_SEGMENT_64 -> 0x19L + | LC_ROUTINES_64 -> 0x1aL + | LC_UUID -> 0x1bL + | LC_RPATH -> Int64.logor lc_req_dyld 0x1cL + | LC_CODE_SIGNATURE -> 0x1dL + | LC_SEGMENT_SPLIT_INFO -> 0x1eL + | LC_REEXPORT_DYLIB -> Int64.logor lc_req_dyld 0x1fL + | LC_LAZY_LOAD_DYLIB -> 0x20L + | LC_ENCRYPTION_INFO -> 0x21L +;; + + +let fixed_sz_string (sz:int) (str:string) : frag = + if String.length str > sz + then STRING (String.sub str 0 sz) + else SEQ [| STRING str; PAD (sz - (String.length str)) |] +;; + +type sect_type = + S_REGULAR + | S_ZEROFILL + | S_CSTRING_LITERALS + | S_4BYTE_LITERALS + | S_8BYTE_LITERALS + | S_LITERAL_POINTERS + | S_NON_LAZY_SYMBOL_POINTERS + | S_LAZY_SYMBOL_POINTERS + | S_SYMBOL_STUBS + | S_MOD_INIT_FUNC_POINTERS + | S_MOD_TERM_FUNC_POINTERS + | S_COALESCED + | S_GB_ZEROFILL + | S_INTERPOSING + | S_16BYTE_LITERALS + | S_DTRACE_DOF + | S_LAZY_DYLIB_SYMBOL_POINTERS +;; + +let sect_type_code (s:sect_type) : int64 = + match s with + S_REGULAR -> 0x0L + | S_ZEROFILL -> 0x1L + | S_CSTRING_LITERALS -> 0x2L + | S_4BYTE_LITERALS -> 0x3L + | S_8BYTE_LITERALS -> 0x4L + | S_LITERAL_POINTERS -> 0x5L + | S_NON_LAZY_SYMBOL_POINTERS -> 0x6L + | S_LAZY_SYMBOL_POINTERS -> 0x7L + | S_SYMBOL_STUBS -> 0x8L + | S_MOD_INIT_FUNC_POINTERS -> 0x9L + | S_MOD_TERM_FUNC_POINTERS -> 0xaL + | S_COALESCED -> 0xbL + | S_GB_ZEROFILL -> 0xcL + | S_INTERPOSING -> 0xdL + | S_16BYTE_LITERALS -> 0xeL + | S_DTRACE_DOF -> 0xfL + | S_LAZY_DYLIB_SYMBOL_POINTERS -> 0x10L +;; + +type sect_attr = + S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_NO_TOC + | S_ATTR_STRIP_STATIC_SYMS + | S_ATTR_NO_DEAD_STRIP + | S_ATTR_LIVE_SUPPORT + | S_ATTR_SELF_MODIFYING_CODE + | S_ATTR_DEBUG + | S_ATTR_SOME_INSTRUCTIONS + | S_ATTR_EXT_RELOC + | S_ATTR_LOC_RELOC +;; + +let sect_attr_code (s:sect_attr) : int64 = + match s with + S_ATTR_PURE_INSTRUCTIONS -> 0x80000000L + | S_ATTR_NO_TOC -> 0x40000000L + | S_ATTR_STRIP_STATIC_SYMS -> 0x20000000L + | S_ATTR_NO_DEAD_STRIP -> 0x10000000L + | S_ATTR_LIVE_SUPPORT -> 0x08000000L + | S_ATTR_SELF_MODIFYING_CODE -> 0x04000000L + | S_ATTR_DEBUG -> 0x02000000L + | S_ATTR_SOME_INSTRUCTIONS -> 0x00000400L + | S_ATTR_EXT_RELOC -> 0x00000200L + | S_ATTR_LOC_RELOC -> 0x00000100L +;; + +type n_type = + | N_EXT + | N_UNDF + | N_ABS + | N_SECT + | N_PBUD + | N_INDIR +;; + +let n_type_code (n:n_type) : int64 = + match n with + N_EXT -> 0x1L + | N_UNDF -> 0x0L + | N_ABS -> 0x2L + | N_SECT -> 0xeL + | N_PBUD -> 0xcL + | N_INDIR -> 0xaL +;; + + +type n_desc_reference_type = + REFERENCE_FLAG_UNDEFINED_NON_LAZY + | REFERENCE_FLAG_UNDEFINED_LAZY + | REFERENCE_FLAG_DEFINED + | REFERENCE_FLAG_PRIVATE_DEFINED + | REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY + | REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY +;; + +let n_desc_reference_type_code (n:n_desc_reference_type) : int64 = + match n with + REFERENCE_FLAG_UNDEFINED_NON_LAZY -> 0x0L + | REFERENCE_FLAG_UNDEFINED_LAZY -> 0x1L + | REFERENCE_FLAG_DEFINED -> 0x2L + | REFERENCE_FLAG_PRIVATE_DEFINED -> 0x3L + | REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY -> 0x4L + | REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY -> 0x5L +;; + +type n_desc_flags = + REFERENCED_DYNAMICALLY + | N_DESC_DISCARDED + | N_NO_DEAD_STRIP + | N_WEAK_REF + | N_WEAK_DEF +;; + +let n_desc_flags_code (n:n_desc_flags) : int64 = + match n with + REFERENCED_DYNAMICALLY -> 0x10L + | N_DESC_DISCARDED -> 0x20L + | N_NO_DEAD_STRIP -> 0x20L (* Yes, they reuse 0x20. *) + | N_WEAK_REF -> 0x40L + | N_WEAK_DEF -> 0x80L +;; + +type n_desc_dylib_ordinal = int;; + +type n_desc = (n_desc_dylib_ordinal * + (n_desc_flags list) * + n_desc_reference_type) +;; + +let n_desc_code (n:n_desc) : int64 = + let (dylib_ordinal, flags, ty) = n in + Int64.logor + (Int64.of_int (dylib_ordinal lsl 8)) + (Int64.logor + (fold_flags n_desc_flags_code flags) + (n_desc_reference_type_code ty)) +;; + + +let macho_section_command + (seg_name:string) + (sect:(string * int * (sect_attr list) * sect_type * fixup)) + : frag = + let (sect_name, sect_align, sect_attrs, sect_type, sect_fixup) = sect in + SEQ [| + fixed_sz_string 16 sect_name; + fixed_sz_string 16 seg_name; + WORD (TY_u32, M_POS sect_fixup); + WORD (TY_u32, M_SZ sect_fixup); + WORD (TY_u32, F_POS sect_fixup); + WORD (TY_u32, IMM (Int64.of_int sect_align)); + WORD (TY_u32, IMM 0L); (* reloff *) + WORD (TY_u32, IMM 0L); (* nreloc *) + WORD (TY_u32, (IMM (Int64.logor (* flags (and attrs) *) + (fold_flags sect_attr_code sect_attrs) + (sect_type_code sect_type)))); + WORD (TY_u32, IMM 0L); (* reserved1 *) + WORD (TY_u32, IMM 0L); (* reserved2 *) + |] +;; + +let macho_segment_command + (seg_name:string) + (seg_fixup:fixup) + (maxprot:vm_prot list) + (initprot:vm_prot list) + (sects:(string * int * (sect_attr list) * sect_type * fixup) array) + : frag = + + let cmd_fixup = new_fixup "segment command" in + let cmd = + SEQ [| + WORD (TY_u32, IMM (load_command_code LC_SEGMENT)); + WORD (TY_u32, F_SZ cmd_fixup); + fixed_sz_string 16 seg_name; + WORD (TY_u32, M_POS seg_fixup); + WORD (TY_u32, M_SZ seg_fixup); + WORD (TY_u32, F_POS seg_fixup); + WORD (TY_u32, F_SZ seg_fixup); + WORD (TY_u32, IMM (fold_flags vm_prot_code maxprot)); + WORD (TY_u32, IMM (fold_flags vm_prot_code initprot)); + WORD (TY_u32, IMM (Int64.of_int (Array.length sects))); + WORD (TY_u32, IMM 0L); (* Flags? *) + |] + in + DEF (cmd_fixup, + SEQ [| + cmd; + SEQ (Array.map (macho_section_command seg_name) sects); + |]) +;; + +let macho_thread_command + (entry:fixup) + : frag = + let cmd_fixup = new_fixup "thread command" in + let x86_THREAD_STATE32 = 1L in + let regs = + [| + WORD (TY_u32, IMM 0x0L); (* eax *) + WORD (TY_u32, IMM 0x0L); (* ebx *) + WORD (TY_u32, IMM 0x0L); (* ecx *) + WORD (TY_u32, IMM 0x0L); (* edx *) + + WORD (TY_u32, IMM 0x0L); (* edi *) + WORD (TY_u32, IMM 0x0L); (* esi *) + WORD (TY_u32, IMM 0x0L); (* ebp *) + WORD (TY_u32, IMM 0x0L); (* esp *) + + WORD (TY_u32, IMM 0x0L); (* ss *) + WORD (TY_u32, IMM 0x0L); (* eflags *) + WORD (TY_u32, M_POS entry); (* eip *) + WORD (TY_u32, IMM 0x0L); (* cs *) + + WORD (TY_u32, IMM 0x0L); (* ds *) + WORD (TY_u32, IMM 0x0L); (* es *) + WORD (TY_u32, IMM 0x0L); (* fs *) + WORD (TY_u32, IMM 0x0L); (* gs *) + |] + in + let cmd = + SEQ [| + WORD (TY_u32, IMM (load_command_code LC_UNIXTHREAD)); + WORD (TY_u32, F_SZ cmd_fixup); + WORD (TY_u32, IMM x86_THREAD_STATE32); (* "flavour" *) + WORD (TY_u32, IMM (Int64.of_int (Array.length regs))); + SEQ regs + |] + in + DEF (cmd_fixup, cmd) +;; + +let macho_dylinker_command : frag = + let cmd_fixup = new_fixup "dylinker command" in + let str_fixup = new_fixup "dylinker lc_str fixup" in + let cmd = + SEQ + [| + WORD (TY_u32, IMM (load_command_code LC_LOAD_DYLINKER)); + WORD (TY_u32, F_SZ cmd_fixup); + + (* see definition of lc_str; these things are weird. *) + WORD (TY_u32, SUB (F_POS (str_fixup), F_POS (cmd_fixup))); + DEF (str_fixup, ZSTRING "/usr/lib/dyld"); + ALIGN_FILE (4, MARK); + |] + in + DEF (cmd_fixup, cmd); +;; + +let macho_dylib_command (dylib:string) : frag = + + let cmd_fixup = new_fixup "dylib command" in + let str_fixup = new_fixup "dylib lc_str fixup" in + let cmd = + SEQ + [| + WORD (TY_u32, IMM (load_command_code LC_LOAD_DYLIB)); + WORD (TY_u32, F_SZ cmd_fixup); + + (* see definition of lc_str; these things are weird. *) + WORD (TY_u32, SUB (F_POS (str_fixup), F_POS (cmd_fixup))); + + WORD (TY_u32, IMM 0L); (* timestamp *) + WORD (TY_u32, IMM 0L); (* current_version *) + WORD (TY_u32, IMM 0L); (* compatibility_version *) + + (* Payload-and-alignment of an lc_str goes at end of command. *) + DEF (str_fixup, ZSTRING dylib); + ALIGN_FILE (4, MARK); + + |] + in + DEF (cmd_fixup, cmd) +;; + + +let macho_symtab_command + (symtab_fixup:fixup) + (nsyms:int64) + (strtab_fixup:fixup) + : frag = + let cmd_fixup = new_fixup "symtab command" in + let cmd = + SEQ + [| + WORD (TY_u32, IMM (load_command_code LC_SYMTAB)); + WORD (TY_u32, F_SZ cmd_fixup); + + WORD (TY_u32, F_POS symtab_fixup); (* symoff *) + WORD (TY_u32, IMM nsyms); (* nsyms *) + + WORD (TY_u32, F_POS strtab_fixup); (* stroff *) + WORD (TY_u32, F_SZ strtab_fixup); (* strsz *) + |] + in + DEF (cmd_fixup, cmd) +;; + +let macho_dysymtab_command + (local_defined_syms_index:int64) + (local_defined_syms_count:int64) + (external_defined_syms_index:int64) + (external_defined_syms_count:int64) + (undefined_syms_index:int64) + (undefined_syms_count:int64) + (indirect_symtab_fixup:fixup) : frag = + let cmd_fixup = new_fixup "dysymtab command" in + let cmd = + SEQ + [| + WORD (TY_u32, IMM (load_command_code LC_DYSYMTAB)); + WORD (TY_u32, F_SZ cmd_fixup); + + WORD (TY_u32, IMM local_defined_syms_index); (* ilocalsym *) + WORD (TY_u32, IMM local_defined_syms_count); (* nlocalsym *) + + WORD (TY_u32, IMM external_defined_syms_index); (* iextdefsym *) + WORD (TY_u32, IMM external_defined_syms_count); (* nextdefsym *) + + WORD (TY_u32, IMM undefined_syms_index); (* iundefsym *) + WORD (TY_u32, IMM undefined_syms_count); (* nundefsym *) + + WORD (TY_u32, IMM 0L); (* tocoff *) + WORD (TY_u32, IMM 0L); (* ntoc *) + + WORD (TY_u32, IMM 0L); (* modtaboff *) + WORD (TY_u32, IMM 0L); (* nmodtab *) + + WORD (TY_u32, IMM 0L); (* extrefsymoff *) + WORD (TY_u32, IMM 0L); (* nextrefsyms *) + + WORD (TY_u32, F_POS indirect_symtab_fixup); (* indirectsymoff *) + WORD (TY_u32, IMM undefined_syms_count); (* nindirectsyms *) + + WORD (TY_u32, IMM 0L); (* extreloff *) + WORD (TY_u32, IMM 0L); (* nextrel *) + + WORD (TY_u32, IMM 0L); (* locreloff *) + WORD (TY_u32, IMM 0L); (* nlocrel *) + |] + in + DEF (cmd_fixup, cmd) +;; + +let macho_header_32 + (cpu:cpu_type) + (sub:cpu_subtype) + (ftype:file_type) + (flags:file_flag list) + (loadcmds:frag array) : frag = + let load_commands_fixup = new_fixup "load commands" in + let cmds = DEF (load_commands_fixup, SEQ loadcmds) in + SEQ + [| + WORD (TY_u32, IMM mh_magic); + WORD (TY_u32, IMM (cpu_type_code cpu)); + WORD (TY_u32, IMM (cpu_subtype_code sub)); + WORD (TY_u32, IMM (file_type_code ftype)); + WORD (TY_u32, IMM (Int64.of_int (Array.length loadcmds))); + WORD (TY_u32, F_SZ load_commands_fixup); + WORD (TY_u32, IMM (fold_flags file_flag_code flags)); + cmds + |] +;; + +let emit_file + (sess:Session.sess) + (crate:Ast.crate) + (code:Asm.frag) + (data:Asm.frag) + (sem:Semant.ctxt) + (dwarf:Dwarf.debug_records) + : unit = + + (* FIXME: alignment? *) + + let mh_execute_header_fixup = new_fixup "__mh_execute header" in + + let nxargc_fixup = (Semant.provide_native sem SEG_data "NXArgc") in + let nxargv_fixup = (Semant.provide_native sem SEG_data "NXArgv") in + let progname_fixup = (Semant.provide_native sem SEG_data "__progname") in + let environ_fixup = (Semant.provide_native sem SEG_data "environ") in + let exit_fixup = (Semant.require_native sem REQUIRED_LIB_crt "exit") in + let (start_fixup, rust_start_fixup) = + if sess.Session.sess_library_mode + then (None, None) + else (Some (new_fixup "start function entry"), + Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start")) + in + + let text_sect_align_log2 = 2 in + let data_sect_align_log2 = 2 in + + let seg_align = 0x1000 in + let text_sect_align = 2 lsl text_sect_align_log2 in + let data_sect_align = 2 lsl data_sect_align_log2 in + + let align_both align i = + ALIGN_FILE (align, + (ALIGN_MEM (align, i))) + in + + let def_aligned a f i = + align_both a + (SEQ [| DEF(f, i); + (align_both a MARK)|]) + in + + (* Segments. *) + let zero_segment_fixup = new_fixup "__PAGEZERO segment" in + let text_segment_fixup = new_fixup "__TEXT segment" in + let data_segment_fixup = new_fixup "__DATA segment" in + let dwarf_segment_fixup = new_fixup "__DWARF segment" in + let linkedit_segment_fixup = new_fixup "__LINKEDIT segment" in + + (* Sections in the text segment. *) + let text_section_fixup = new_fixup "__text section" in + + (* Sections in the data segment. *) + let data_section_fixup = new_fixup "__data section" in + let const_section_fixup = new_fixup "__const section" in + let bss_section_fixup = new_fixup "__bss section" in + let note_rust_section_fixup = new_fixup "__note.rust section" in + let nl_symbol_ptr_section_fixup = new_fixup "__nl_symbol_ptr section" in + + let data_section = def_aligned data_sect_align data_section_fixup data in + let const_section = + def_aligned data_sect_align const_section_fixup (SEQ [| |]) + in + let bss_section = + def_aligned data_sect_align bss_section_fixup (SEQ [| |]) + in + let note_rust_section = + def_aligned + data_sect_align note_rust_section_fixup + (Asm.note_rust_frags crate.node.Ast.crate_meta) + in + + (* Officially Apple doesn't claim to support DWARF sections like this, but + they work. *) + let debug_info_section = + def_aligned data_sect_align + sem.Semant.ctxt_debug_info_fixup + dwarf.Dwarf.debug_info + in + let debug_abbrev_section = + def_aligned data_sect_align + sem.Semant.ctxt_debug_abbrev_fixup + dwarf.Dwarf.debug_abbrev + in + + + (* String, symbol and parallel "nonlazy-pointer" tables. *) + let symtab_fixup = new_fixup "symtab" in + let strtab_fixup = new_fixup "strtab" in + + let symbol_nlist_entry + (sect_index:int) + (nty:n_type list) + (nd:n_desc) + (nv:Asm.expr64) + : (frag * fixup) = + let strtab_entry_fixup = new_fixup "strtab entry" in + (SEQ + [| + WORD (TY_u32, SUB ((F_POS strtab_entry_fixup), + (F_POS strtab_fixup))); + BYTE (Int64.to_int (fold_flags n_type_code nty)); + BYTE sect_index; + WORD (TY_u16, IMM (n_desc_code nd)); + WORD (TY_u32, nv); + |], strtab_entry_fixup) + in + + let sect_symbol_nlist_entry + (seg:segment) + (fixup_to_use:fixup) + : (frag * fixup) = + let nty = [ N_SECT; N_EXT ] in + let nd = (0, [], REFERENCE_FLAG_UNDEFINED_NON_LAZY) in + let (sect_index, _(*seg_fix*)) = + match seg with + SEG_text -> (1, text_segment_fixup) + | SEG_data -> (2, data_segment_fixup) + in + symbol_nlist_entry sect_index nty nd (M_POS fixup_to_use) + in + + let sect_private_symbol_nlist_entry + (seg:segment) + (fixup_to_use:fixup) + : (frag * fixup) = + let nty = [ N_SECT; ] in + let nd = (0, [], REFERENCE_FLAG_UNDEFINED_NON_LAZY) in + let (sect_index, _(*seg_fix*)) = + match seg with + SEG_text -> (1, text_segment_fixup) + | SEG_data -> (2, data_segment_fixup) + in + symbol_nlist_entry sect_index nty nd (M_POS fixup_to_use) + in + + let indirect_symbol_nlist_entry (dylib_index:int) : (frag * fixup) = + let nty = [ N_UNDF; N_EXT ] in + let nd = (dylib_index, [], REFERENCE_FLAG_UNDEFINED_NON_LAZY) in + symbol_nlist_entry 0 nty nd (IMM 0L) + in + + let indirect_symbols = + Array.of_list + (List.concat + (List.map + (fun (lib, tab) -> + (List.map + (fun (name,fix) -> (lib,name,fix)) + (htab_pairs tab))) + (htab_pairs sem.Semant.ctxt_native_required))) + in + + let dylib_index (lib:required_lib) : int = + match lib with + REQUIRED_LIB_rustrt -> 1 + | REQUIRED_LIB_crt -> 2 + | _ -> bug () "Macho.dylib_index on nonstandard required lib." + in + + (* Make undef symbols for native imports. *) + let (undefined_symbols:(string * (frag * fixup)) array) = + Array.map (fun (lib,name,_) -> + ("_" ^ name, + indirect_symbol_nlist_entry (dylib_index lib))) + indirect_symbols + in + + (* Make symbols for exports. *) + let (export_symbols:(string * (frag * fixup)) array) = + let export_symbols_of_seg (seg, tab) = + List.map + begin + fun (name, fix) -> + let name = "_" ^ name in + let sym = sect_symbol_nlist_entry seg fix in + (name, sym) + end + (htab_pairs tab) + in + Array.of_list + (List.concat + (List.map export_symbols_of_seg + (htab_pairs sem.Semant.ctxt_native_provided))) + in + + (* Make private symbols for items. *) + let (local_item_symbols:(string * (frag * fixup)) array) = + Array.map (fun code -> + let fix = code.Semant.code_fixup in + ("_" ^ fix.fixup_name, + sect_private_symbol_nlist_entry SEG_text fix)) + (Array.of_list (htab_vals sem.Semant.ctxt_all_item_code)) + in + + (* Make private symbols for glue. *) + let (local_glue_symbols:(string * (frag * fixup)) array) = + Array.map (fun (g, code) -> + let fix = code.Semant.code_fixup in + ("_" ^ (Semant.glue_str sem g), + sect_private_symbol_nlist_entry SEG_text fix)) + (Array.of_list (htab_pairs sem.Semant.ctxt_glue_code)) + in + + let (export_header_symbols:(string * (frag * fixup)) array) = + let name = + if sess.Session.sess_library_mode + then "__mh_dylib_header" + else "__mh_execute_header" + in + [| + (name, sect_symbol_nlist_entry SEG_text mh_execute_header_fixup); + |] + in + + let export_symbols = Array.concat [ export_symbols; + export_header_symbols ] + in + + let local_symbols = Array.concat [ local_item_symbols; + local_glue_symbols ] + in + + let symbols = Array.concat [ local_symbols; + export_symbols; + undefined_symbols ] + in + let n_local_syms = Array.length local_symbols in + let n_export_syms = Array.length export_symbols in + let n_undef_syms = Array.length undefined_symbols in + + let indirect_symbols_off = n_local_syms + n_export_syms in + let indirect_symtab_fixup = new_fixup "indirect symbol table" in + let indirect_symtab = + DEF (indirect_symtab_fixup, + SEQ (Array.mapi + (fun i _ -> WORD (TY_u32, + IMM (Int64.of_int + (i + indirect_symbols_off)))) + indirect_symbols)) + in + + let nl_symbol_ptr_section = + def_aligned data_sect_align nl_symbol_ptr_section_fixup + (SEQ (Array.map + (fun (_, _, fix) -> + DEF(fix, WORD(TY_u32, IMM 0L))) + indirect_symbols)) + in + let strtab = DEF (strtab_fixup, + SEQ (Array.map + (fun (name, (_, fix)) -> DEF(fix, ZSTRING name)) + symbols)) + in + let symtab = DEF (symtab_fixup, + SEQ (Array.map (fun (_, (frag, _)) -> frag) symbols)) + in + + + let load_commands = + [| + macho_segment_command "__PAGEZERO" zero_segment_fixup + [] [] [||]; + + macho_segment_command "__TEXT" text_segment_fixup + [VM_PROT_READ; VM_PROT_EXECUTE] + [VM_PROT_READ; VM_PROT_EXECUTE] + [| + ("__text", text_sect_align_log2, [], S_REGULAR, text_section_fixup) + |]; + + macho_segment_command "__DATA" data_segment_fixup + [VM_PROT_READ; VM_PROT_WRITE] + [VM_PROT_READ; VM_PROT_WRITE] + [| + ("__data", data_sect_align_log2, [], + S_REGULAR, data_section_fixup); + ("__const", data_sect_align_log2, [], + S_REGULAR, const_section_fixup); + ("__bss", data_sect_align_log2, [], + S_REGULAR, bss_section_fixup); + ("__note.rust", data_sect_align_log2, [], + S_REGULAR, note_rust_section_fixup); + ("__nl_symbol_ptr", data_sect_align_log2, [], + S_NON_LAZY_SYMBOL_POINTERS, nl_symbol_ptr_section_fixup) + |]; + + macho_segment_command "__DWARF" dwarf_segment_fixup + [VM_PROT_READ] + [VM_PROT_READ] + [| + ("__debug_info", data_sect_align_log2, [], + S_REGULAR, sem.Semant.ctxt_debug_info_fixup); + ("__debug_abbrev", data_sect_align_log2, [], + S_REGULAR, sem.Semant.ctxt_debug_abbrev_fixup); + |]; + + macho_segment_command "__LINKEDIT" linkedit_segment_fixup + [VM_PROT_READ] + [VM_PROT_READ] + [| + |]; + + macho_symtab_command + symtab_fixup (Int64.of_int (Array.length symbols)) strtab_fixup; + + + macho_dysymtab_command + 0L + (Int64.of_int n_local_syms) + (Int64.of_int n_local_syms) + (Int64.of_int n_export_syms) + (Int64.of_int (n_local_syms + n_export_syms)) + (Int64.of_int n_undef_syms) + indirect_symtab_fixup; + + macho_dylinker_command; + + macho_dylib_command "librustrt.dylib"; + + macho_dylib_command "/usr/lib/libSystem.B.dylib"; + + begin + match start_fixup with + None -> MARK + | Some start_fixup -> + macho_thread_command start_fixup + end; + |] + in + + let header_and_commands = + macho_header_32 + CPU_TYPE_X86 + CPU_SUBTYPE_X86_ALL + (if sess.Session.sess_library_mode then MH_DYLIB else MH_EXECUTE) + [ MH_BINDATLOAD; MH_DYLDLINK; MH_TWOLEVEL ] + load_commands + in + + let objfile_start e start_fixup rust_start_fixup main_fn_fixup = + let edx = X86.h X86.edx in + let edx_pointee = + Il.Mem ((Il.RegIn (edx, None)), Il.ScalarTy (Il.AddrTy Il.OpaqueTy)) + in + Il.emit_full e (Some start_fixup) [] Il.Dead; + + (* zero marks the bottom of the frame chain. *) + Il.emit e (Il.Push (X86.imm (Asm.IMM 0L))); + Il.emit e (Il.umov (X86.rc X86.ebp) (X86.ro X86.esp)); + + (* 16-byte align stack for SSE. *) + Il.emit e (Il.binary Il.AND (X86.rc X86.esp) (X86.ro X86.esp) + (X86.imm (Asm.IMM 0xfffffffffffffff0L))); + + (* Store argv. *) + Abi.load_fixup_addr e edx nxargv_fixup Il.OpaqueTy; + Il.emit e (Il.lea (X86.rc X86.ecx) + (Il.Cell (Il.Mem ((Il.RegIn (Il.Hreg X86.ebp, + Some (X86.word_off_n 2))), + Il.OpaqueTy)))); + Il.emit e (Il.umov edx_pointee (X86.ro X86.ecx)); + Il.emit e (Il.Push (X86.ro X86.ecx)); + + (* Store argc. *) + Abi.load_fixup_addr e edx nxargc_fixup Il.OpaqueTy; + Il.emit e (Il.umov (X86.rc X86.eax) + (X86.c (X86.word_n (Il.Hreg X86.ebp) 1))); + Il.emit e (Il.umov edx_pointee (X86.ro X86.eax)); + Il.emit e (Il.Push (X86.ro X86.eax)); + + (* Calculte and store envp. *) + Il.emit e (Il.binary Il.ADD + (X86.rc X86.eax) (X86.ro X86.eax) + (X86.imm (Asm.IMM 1L))); + Il.emit e (Il.binary Il.UMUL + (X86.rc X86.eax) (X86.ro X86.eax) + (X86.imm (Asm.IMM X86.word_sz))); + Il.emit e (Il.binary Il.ADD (X86.rc X86.eax) + (X86.ro X86.eax) (X86.ro X86.ecx)); + Abi.load_fixup_addr e edx environ_fixup Il.OpaqueTy; + Il.emit e (Il.umov edx_pointee (X86.ro X86.eax)); + + (* Push 16 bytes to preserve SSE alignment. *) + Abi.load_fixup_addr e edx sem.Semant.ctxt_crate_fixup Il.OpaqueTy; + Il.emit e (Il.Push (X86.ro X86.edx)); + Abi.load_fixup_addr e edx main_fn_fixup Il.OpaqueTy; + Il.emit e (Il.Push (X86.ro X86.edx)); + let fptr = Abi.load_fixup_codeptr e edx rust_start_fixup true true in + Il.emit e (Il.call (X86.rc X86.eax) fptr); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + Il.emit e (Il.Push (X86.ro X86.eax)); + let fptr = Abi.load_fixup_codeptr e edx exit_fixup true true in + Il.emit e (Il.call (X86.rc X86.eax) fptr); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + + Il.emit e Il.Ret; + in + + let text_section = + let start_code = + match (start_fixup, rust_start_fixup, + sem.Semant.ctxt_main_fn_fixup) with + (None, _, _) + | (_, None, _) + | (_, _, None) -> MARK + | (Some start_fixup, + Some rust_start_fixup, + Some main_fn_fixup) -> + let e = X86.new_emitter_without_vregs () in + objfile_start e start_fixup rust_start_fixup main_fn_fixup; + X86.frags_of_emitted_quads sess e + in + def_aligned text_sect_align text_section_fixup + (SEQ [| + start_code; + code + |]) + in + + let text_segment = + def_aligned seg_align text_segment_fixup + (SEQ [| + DEF (mh_execute_header_fixup, header_and_commands); + text_section; + align_both seg_align MARK; + |]); + in + + let zero_segment = align_both seg_align + (SEQ [| MEMPOS 0L; DEF (zero_segment_fixup, + SEQ [| MEMPOS 0x1000L; MARK |] ) |]) + in + + let data_segment = def_aligned seg_align data_segment_fixup + (SEQ [| + DEF(nxargc_fixup, WORD (TY_u32, IMM 0L)); + DEF(nxargv_fixup, WORD (TY_u32, IMM 0L)); + DEF(environ_fixup, WORD (TY_u32, IMM 0L)); + DEF(progname_fixup, WORD (TY_u32, IMM 0L)); + data_section; + const_section; + bss_section; + note_rust_section; + nl_symbol_ptr_section + |]) + in + + let dwarf_segment = def_aligned seg_align dwarf_segment_fixup + (SEQ [| + debug_info_section; + debug_abbrev_section; + |]) + in + + let linkedit_segment = def_aligned seg_align linkedit_segment_fixup + (SEQ [| + symtab; + strtab; + indirect_symtab; + |]) + in + + let segments = + SEQ [| + DEF (sem.Semant.ctxt_image_base_fixup, MARK); + zero_segment; + text_segment; + data_segment; + dwarf_segment; + linkedit_segment; + |] + in + write_out_frag sess true segments +;; + + +let sniff + (sess:Session.sess) + (filename:filename) + : asm_reader option = + try + let stat = Unix.stat filename in + if (stat.Unix.st_kind = Unix.S_REG) && + (stat.Unix.st_size > 4) + then + let ar = new_asm_reader sess filename in + let _ = log sess "sniffing Mach-O file" in + if (ar.asm_get_u32()) = (Int64.to_int mh_magic) + then (ar.asm_seek 0; Some ar) + else None + else + None + with + _ -> None +;; + +let get_sections + (sess:Session.sess) + (ar:asm_reader) + : (string,(int*int)) Hashtbl.t = + let sects = Hashtbl.create 0 in + let _ = log sess "reading sections" in + let magic = ar.asm_get_u32() in + let _ = assert (magic = (Int64.to_int mh_magic)) in + let _ = ar.asm_adv_u32() in (* cpu type *) + let _ = ar.asm_adv_u32() in (* cpu subtype *) + let _ = ar.asm_adv_u32() in (* file type *) + let n_load_cmds = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in + let _ = log sess "Mach-o file with %d load commands" n_load_cmds in + let _ = ar.asm_adv_u32() in (* flags *) + let lc_seg = Int64.to_int (load_command_code LC_SEGMENT) in + for i = 0 to n_load_cmds - 1 do + let load_cmd_code = ar.asm_get_u32() in + let load_cmd_size = ar.asm_get_u32() in + let _ = log sess "load command %d:" i in + if load_cmd_code != lc_seg + then ar.asm_adv (load_cmd_size - 8) + else + begin + let seg_name = ar.asm_get_zstr_padded 16 in + let _ = log sess "LC_SEGMENT %s" seg_name in + let _ = ar.asm_adv_u32() in (* seg mem pos *) + let _ = ar.asm_adv_u32() in (* seg mem sz *) + let _ = ar.asm_adv_u32() in (* seg file pos *) + let _ = ar.asm_adv_u32() in (* seg file sz *) + let _ = ar.asm_adv_u32() in (* maxprot *) + let _ = ar.asm_adv_u32() in (* initprot *) + let n_sects = ar.asm_get_u32() in + let _ = ar.asm_get_u32() in (* flags *) + let _ = log sess "%d sections" in + for j = 0 to n_sects - 1 do + let sect_name = ar.asm_get_zstr_padded 16 in + let _ = ar.asm_adv 16 in (* seg name *) + let _ = ar.asm_adv_u32() in (* sect mem pos *) + let m_sz = ar.asm_get_u32() in + let f_pos = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in (* sect align *) + let _ = ar.asm_adv_u32() in (* reloff *) + let _ = ar.asm_adv_u32() in (* nreloc *) + let _ = ar.asm_adv_u32() in (* flags *) + let _ = ar.asm_adv_u32() in (* reserved1 *) + let _ = ar.asm_adv_u32() in (* reserved2 *) + let _ = + log sess + " section %d: 0x%x - 0x%x %s " + j f_pos (f_pos + m_sz) sect_name + in + let len = String.length sect_name in + let sect_name = + if (len > 2 + && sect_name.[0] = '_' + && sect_name.[1] = '_') + then "." ^ (String.sub sect_name 2 (len-2)) + else sect_name + in + Hashtbl.add sects sect_name (f_pos, m_sz) + done + end + done; + sects +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/pe.ml b/src/boot/be/pe.ml new file mode 100644 index 00000000..d360ddf5 --- /dev/null +++ b/src/boot/be/pe.ml @@ -0,0 +1,1149 @@ +(* + + Module for writing Microsoft PE files + + Every image has a base address it's to be loaded at. + + "file pointer" = offset in file + + "VA" = address at runtime + + "RVA" = VA - base address + + If you write a non-RVA absolute address at any point you must put it + in a rebasing list so the loader can adjust it when/if it has to load + you at a different address. + + Almost all addresses in the file are RVAs. Worry about the VAs. + +*) + +open Asm;; +open Common;; + +let log (sess:Session.sess) = + Session.log "obj (pe)" + sess.Session.sess_log_obj + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_obj + then thunk () + else () +;; + +(* + + The default image base (VA) for an executable on Win32 is 0x400000. + + We use this too. RVAs are relative to this. RVA 0 = VA 0x400000. + + Alignments are also relatively standard and fixed for Win32/PE32: + 4k memory pages, 512 byte disk sectors. + + Since this is a stupid emitter, and we're not generating an awful + lot of sections, we are not going to differentiate between these + two kinds of alignment: we just align our sections to memory pages + and sometimes waste most of them. Shucks. + +*) + +let pe_image_base = 0x400000L;; +let pe_file_alignment = 0x200;; +let pe_mem_alignment = 0x1000;; + +let rva (f:fixup) = (SUB ((M_POS f), (IMM pe_image_base)));; + +let def_file_aligned f i = + ALIGN_FILE + (pe_file_alignment, + SEQ [| + DEF(f, + SEQ [| i; + ALIGN_FILE + (pe_file_alignment, MARK) |]) |] ) +;; + +let def_mem_aligned f i = + ALIGN_MEM + (pe_mem_alignment, + SEQ [| + DEF(f, + SEQ [| i; + ALIGN_MEM + (pe_mem_alignment, MARK) |]) |] ) +;; + +let align_both i = + ALIGN_FILE (pe_file_alignment, + (ALIGN_MEM (pe_mem_alignment, i))) +;; + +let def_aligned f i = + align_both + (SEQ [| DEF(f,i); + (align_both MARK)|]) +;; + + +(* + + At the beginning of a PE file there is an MS-DOS stub, 0x00 - 0x7F, + that we just insert literally. It prints "This program must be run + under Win32" and exits. Woo! + + Within it, at offset 0x3C, there is an encoded offset of the PE + header we actually care about. So 0x3C - 0x3F are 0x00000100 (LE) + which say "the PE header is actually at 0x100", a nice sensible spot + for it. We pad the next 128 bytes out to 0x100 and start there for + real. + + From then on in it's a sensible object file. Here's the MS-DOS bit. +*) + +let pe_msdos_header_and_padding + : frag = + SEQ [| + BYTES + [| + (* 00000000 *) + 0x4d; 0x5a; 0x50; 0x00; 0x02; 0x00; 0x00; 0x00; + 0x04; 0x00; 0x0f; 0x00; 0xff; 0xff; 0x00; 0x00; + + (* 00000010 *) + 0xb8; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + 0x40; 0x00; 0x1a; 0x00; 0x00; 0x00; 0x00; 0x00; + + (* 00000020 *) + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + + (* 00000030 *) + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + 0x00; 0x00; 0x00; 0x00; 0x00; 0x01; 0x00; 0x00; + (* ^^^^PE HDR offset^^^^^ *) + + (* 00000040 *) + 0xba; 0x10; 0x00; 0x0e; 0x1f; 0xb4; 0x09; 0xcd; + 0x21; 0xb8; 0x01; 0x4c; 0xcd; 0x21; 0x90; 0x90; + + (* 00000050 *) + 0x54; 0x68; 0x69; 0x73; 0x20; 0x70; 0x72; 0x6f; (* "This pro" *) + 0x67; 0x72; 0x61; 0x6d; 0x20; 0x6d; 0x75; 0x73; (* "gram mus" *) + + (* 00000060 *) + 0x74; 0x20; 0x62; 0x65; 0x20; 0x72; 0x75; 0x6e; (* "t be run" *) + 0x20; 0x75; 0x6e; 0x64; 0x65; 0x72; 0x20; 0x57; (* " under W" *) + + (* 00000070 *) + 0x69; 0x6e; 0x33; 0x32; 0x0d; 0x0a; 0x24; 0x37; (* "in32\r\n" *) + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + |]; + PAD 0x80 + |] +;; + +(* + A work of art, is it not? Take a moment to appreciate the madness. + + All done? Ok, now on to the PE header proper. + + PE headers are just COFF headers with a little preamble. +*) + +type pe_machine = + (* Maybe support more later. *) + IMAGE_FILE_MACHINE_AMD64 + | IMAGE_FILE_MACHINE_I386 +;; + + +let pe_timestamp _ = + Int64.of_float (Unix.gettimeofday()) +;; + + +type pe_characteristics = + (* Maybe support more later. *) + IMAGE_FILE_RELOCS_STRIPPED + | IMAGE_FILE_EXECUTABLE_IMAGE + | IMAGE_FILE_LINE_NUMS_STRIPPED + | IMAGE_FILE_LOCAL_SYMS_STRIPPED + | IMAGE_FILE_32BIT_MACHINE + | IMAGE_FILE_DEBUG_STRIPPED + | IMAGE_FILE_DLL +;; + + +let pe_header + ~(machine:pe_machine) + ~(symbol_table_fixup:fixup) + ~(number_of_sections:int64) + ~(number_of_symbols:int64) + ~(loader_hdr_fixup:fixup) + ~(characteristics:pe_characteristics list) + : frag = + ALIGN_FILE + (8, + SEQ [| + STRING "PE\x00\x00"; + WORD (TY_u16, (IMM (match machine with + IMAGE_FILE_MACHINE_AMD64 -> 0x8664L + | IMAGE_FILE_MACHINE_I386 -> 0x014cL))); + WORD (TY_u16, (IMM number_of_sections)); + WORD (TY_u32, (IMM (pe_timestamp()))); + WORD (TY_u32, (F_POS symbol_table_fixup)); + WORD (TY_u32, (IMM number_of_symbols)); + WORD (TY_u16, (F_SZ loader_hdr_fixup)); + WORD (TY_u16, (IMM (fold_flags + (fun c -> match c with + IMAGE_FILE_RELOCS_STRIPPED -> 0x1L + | IMAGE_FILE_EXECUTABLE_IMAGE -> 0x2L + | IMAGE_FILE_LINE_NUMS_STRIPPED -> 0x4L + | IMAGE_FILE_LOCAL_SYMS_STRIPPED -> 0x8L + | IMAGE_FILE_32BIT_MACHINE -> 0x100L + | IMAGE_FILE_DEBUG_STRIPPED -> 0x200L + | IMAGE_FILE_DLL -> 0x2000L) + characteristics))) + |]) +;; + +(* + + After the PE header comes an "optional" header for the loader. In + our case this is hardly optional since we are producing a file for + the loader. + +*) + +type pe_subsystem = + (* Maybe support more later. *) + IMAGE_SUBSYSTEM_WINDOWS_GUI + | IMAGE_SUBSYSTEM_WINDOWS_CUI +;; + +let zero32 = WORD (TY_u32, (IMM 0L)) +;; + +let pe_loader_header + ~(text_fixup:fixup) + ~(init_data_fixup:fixup) + ~(size_of_uninit_data:int64) + ~(entry_point_fixup:fixup option) + ~(image_fixup:fixup) + ~(all_hdrs_fixup:fixup) + ~(subsys:pe_subsystem) + ~(loader_hdr_fixup:fixup) + ~(import_dir_fixup:fixup) + ~(export_dir_fixup:fixup) + : frag = + DEF + (loader_hdr_fixup, + SEQ [| + WORD (TY_u16, (IMM 0x10bL)); (* COFF magic tag for PE32. *) + (* Snagged *) + WORD (TY_u8, (IMM 0x2L)); (* Linker major version. *) + WORD (TY_u8, (IMM 0x38L)); (* Linker minor version. *) + + WORD (TY_u32, (F_SZ text_fixup)); (* "size of code" *) + WORD (TY_u32, (* "size of all init data" *) + (F_SZ init_data_fixup)); + WORD (TY_u32, + (IMM size_of_uninit_data)); + + begin + match entry_point_fixup with + None -> zero32 (* Library mode: DLLMain *) + | Some entry_point_fixup -> + WORD (TY_u32, + (rva + entry_point_fixup)) (* "address of entry point" *) + end; + + WORD (TY_u32, (rva text_fixup)); (* "base of code" *) + WORD (TY_u32, (rva init_data_fixup)); (* "base of data" *) + WORD (TY_u32, (IMM pe_image_base)); + WORD (TY_u32, (IMM (Int64.of_int + pe_mem_alignment))); + WORD (TY_u32, (IMM (Int64.of_int + pe_file_alignment))); + + WORD (TY_u16, (IMM 4L)); (* Major OS version: NT4. *) + WORD (TY_u16, (IMM 0L)); (* Minor OS version. *) + WORD (TY_u16, (IMM 1L)); (* Major image version. *) + WORD (TY_u16, (IMM 0L)); (* Minor image version. *) + WORD (TY_u16, (IMM 4L)); (* Major subsystem version. *) + WORD (TY_u16, (IMM 0L)); (* Minor subsystem version. *) + + zero32; (* Reserved. *) + + WORD (TY_u32, (M_SZ image_fixup)); + WORD (TY_u32, (M_SZ all_hdrs_fixup)); + + zero32; (* Checksum, but OK if zero. *) + WORD (TY_u16, (IMM (match subsys with + IMAGE_SUBSYSTEM_WINDOWS_GUI -> 2L + | IMAGE_SUBSYSTEM_WINDOWS_CUI -> 3L))); + + WORD (TY_u16, (IMM 0L)); (* DLL characteristics. *) + + WORD (TY_u32, (IMM 0x100000L)); (* Size of stack reserve. *) + WORD (TY_u32, (IMM 0x4000L)); (* Size of stack commit. *) + + WORD (TY_u32, (IMM 0x100000L)); (* Size of heap reserve. *) + WORD (TY_u32, (IMM 0x1000L)); (* Size of heap commit. *) + + zero32; (* Reserved. *) + WORD (TY_u32, (IMM 16L)); (* Number of dir references. *) + + (* Begin directories, variable part of hdr. *) + + (* + + Standard PE files have ~10 directories referenced from + here. We only fill in two of them -- the export/import + directories -- because we don't care about the others. We + leave the rest as zero in case someone is looking for + them. This may be superfluous or wrong. + + *) + + + WORD (TY_u32, (rva export_dir_fixup)); + WORD (TY_u32, (M_SZ export_dir_fixup)); + + WORD (TY_u32, (rva import_dir_fixup)); + WORD (TY_u32, (M_SZ import_dir_fixup)); + + zero32; zero32; (* Resource dir. *) + zero32; zero32; (* Exception dir. *) + zero32; zero32; (* Security dir. *) + zero32; zero32; (* Base reloc dir. *) + zero32; zero32; (* Debug dir. *) + zero32; zero32; (* Image desc dir. *) + zero32; zero32; (* Mach spec dir. *) + zero32; zero32; (* TLS dir. *) + + zero32; zero32; (* Load config. *) + zero32; zero32; (* Bound import. *) + zero32; zero32; (* IAT *) + zero32; zero32; (* Delay import. *) + zero32; zero32; (* COM descriptor *) + zero32; zero32; (* ???????? *) + |]) + +;; + + +type pe_section_id = + (* Maybe support more later. *) + SECTION_ID_TEXT + | SECTION_ID_DATA + | SECTION_ID_RDATA + | SECTION_ID_BSS + | SECTION_ID_IMPORTS + | SECTION_ID_EXPORTS + | SECTION_ID_DEBUG_ARANGES + | SECTION_ID_DEBUG_PUBNAMES + | SECTION_ID_DEBUG_INFO + | SECTION_ID_DEBUG_ABBREV + | SECTION_ID_DEBUG_LINE + | SECTION_ID_DEBUG_FRAME + | SECTION_ID_NOTE_RUST +;; + +type pe_section_characteristics = + (* Maybe support more later. *) + IMAGE_SCN_CNT_CODE + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_CNT_UNINITIALIZED_DATA + | IMAGE_SCN_MEM_DISCARDABLE + | IMAGE_SCN_MEM_SHARED + | IMAGE_SCN_MEM_EXECUTE + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE + +let pe_section_header + ~(id:pe_section_id) + ~(hdr_fixup:fixup) + : frag = + let + characteristics = + match id with + SECTION_ID_TEXT -> [ IMAGE_SCN_CNT_CODE; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_EXECUTE ] + | SECTION_ID_DATA -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_WRITE ] + | SECTION_ID_BSS -> [ IMAGE_SCN_CNT_UNINITIALIZED_DATA; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_WRITE ] + | SECTION_ID_IMPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_WRITE ] + | SECTION_ID_EXPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ ] + | SECTION_ID_RDATA + | SECTION_ID_DEBUG_ARANGES + | SECTION_ID_DEBUG_PUBNAMES + | SECTION_ID_DEBUG_INFO + | SECTION_ID_DEBUG_ABBREV + | SECTION_ID_DEBUG_LINE + | SECTION_ID_DEBUG_FRAME + | SECTION_ID_NOTE_RUST -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ ] + in + SEQ [| + STRING + begin + match id with + SECTION_ID_TEXT -> ".text\x00\x00\x00" + | SECTION_ID_DATA -> ".data\x00\x00\x00" + | SECTION_ID_RDATA -> ".rdata\x00\x00" + | SECTION_ID_BSS -> ".bss\x00\x00\x00\x00" + | SECTION_ID_IMPORTS -> ".idata\x00\x00" + | SECTION_ID_EXPORTS -> ".edata\x00\x00" + + (* There is a bizarre Microsoft COFF extension to account + * for longer-than-8-char section names: you emit a single + * '/' character then the ASCII-numeric encoding of the + * offset within the file's string table of the full name. + * So we put all our extended section names at the + * beginning of the string table in a very specific order + * and hard-wire the offsets as "names" here. You could + * theoretically extend this to a "new kind" of fixup + * reference (ASCII_POS or such), if you feel this is + * something you want to twiddle with. + *) + + | SECTION_ID_DEBUG_ARANGES -> "/4\x00\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_PUBNAMES -> "/19\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_INFO -> "/35\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_ABBREV -> "/47\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_LINE -> "/61\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_FRAME -> "/73\x00\x00\x00\x00\x00" + | SECTION_ID_NOTE_RUST -> "/86\x00\x00\x00\x00\x00" + end; + + (* The next two pairs are only supposed to be different if the + file and section alignments differ. This is a stupid emitter + so they're not, no problem. *) + + WORD (TY_u32, (M_SZ hdr_fixup)); (* "Virtual size" *) + WORD (TY_u32, (rva hdr_fixup)); (* "Virtual address" *) + + WORD (TY_u32, (F_SZ hdr_fixup)); (* "Size of raw data" *) + WORD (TY_u32, (F_POS hdr_fixup)); (* "Pointer to raw data" *) + + zero32; (* Reserved. *) + zero32; (* Reserved. *) + zero32; (* Reserved. *) + + WORD (TY_u32, (IMM (fold_flags + (fun c -> match c with + IMAGE_SCN_CNT_CODE -> 0x20L + | IMAGE_SCN_CNT_INITIALIZED_DATA -> 0x40L + | IMAGE_SCN_CNT_UNINITIALIZED_DATA -> 0x80L + | IMAGE_SCN_MEM_DISCARDABLE -> 0x2000000L + | IMAGE_SCN_MEM_SHARED -> 0x10000000L + | IMAGE_SCN_MEM_EXECUTE -> 0x20000000L + | IMAGE_SCN_MEM_READ -> 0x40000000L + | IMAGE_SCN_MEM_WRITE -> 0x80000000L) + characteristics))) + |] +;; + + +(* + + "Thunk" is a misnomer here; the thunk RVA is the address of a word + that the loader will store an address into. The stored address is + the address of the imported object. + + So if the imported object is X, and the thunk slot is Y, the loader + is doing "Y = &X" and returning &Y as the thunk RVA. To load datum X + after the imports are resolved, given the thunk RVA R, you load + **R. + +*) + +type pe_import = + { + pe_import_name_fixup: fixup; + pe_import_name: string; + pe_import_address_fixup: fixup; + } + +type pe_import_dll_entry = + { + pe_import_dll_name_fixup: fixup; + pe_import_dll_name: string; + pe_import_dll_ILT_fixup: fixup; + pe_import_dll_IAT_fixup: fixup; + pe_import_dll_imports: pe_import array; + } + + (* + + The import section .idata has a mostly self-contained table + structure. You feed it a list of DLL entries, each of which names + a DLL and lists symbols in the DLL to import. + + For each named symbol, a 4-byte slot will be reserved in an + "import lookup table" (ILT, also in this section). The slot is + a pointer to a string in this section giving the name. + + Immediately *after* the ILT, there is an "import address table" (IAT), + which is initially identical to the ILT. The loader replaces the entries + in the IAT slots with the imported pointers at runtime. + + A central directory at the start of the section lists all the the import + thunk tables. Each entry in the import directory is 20 bytes (5 words) + but only the last 2 are used: the second last is a pointer to the string + name of the DLL in question (string also in this section) and the last is + a pointer to the import thunk table itself (also in this section). + + Curiously, of the 5 documents I've consulted on the nature of the + first 3 fields, I find a variety of interpretations. + + *) + +let pe_import_section + ~(import_dir_fixup:fixup) + ~(dlls:pe_import_dll_entry array) + : frag = + + let form_dir_entry + (entry:pe_import_dll_entry) + : frag = + SEQ [| + (* Note: documented opinions vary greatly about whether the + first, last, or both of the slots in one of these rows points + to the RVA of the name/hint used to look the import up. This + table format is a mess! *) + WORD (TY_u32, + (rva + entry.pe_import_dll_ILT_fixup)); (* Import lookup table. *) + WORD (TY_u32, (IMM 0L)); (* Timestamp, unused. *) + WORD (TY_u32, (IMM 0x0L)); (* Forwarder chain, unused. *) + WORD (TY_u32, (rva entry.pe_import_dll_name_fixup)); + WORD (TY_u32, + (rva + entry.pe_import_dll_IAT_fixup)); (* Import address table.*) + |] + in + + let form_ILT_slot + (import:pe_import) + : frag = + (WORD (TY_u32, (rva import.pe_import_name_fixup))) + in + + let form_IAT_slot + (import:pe_import) + : frag = + (DEF (import.pe_import_address_fixup, + (WORD (TY_u32, (rva import.pe_import_name_fixup))))) + in + + let form_tables_for_dll + (dll:pe_import_dll_entry) + : frag = + let terminator = WORD (TY_u32, (IMM 0L)) in + let ilt = + SEQ [| + SEQ (Array.map form_ILT_slot dll.pe_import_dll_imports); + terminator + |] + in + let iat = + SEQ [| + SEQ (Array.map form_IAT_slot dll.pe_import_dll_imports); + terminator + |] + in + if Array.length dll.pe_import_dll_imports < 1 + then bug () "Pe.form_tables_for_dll: empty imports" + else + SEQ [| + DEF (dll.pe_import_dll_ILT_fixup, ilt); + DEF (dll.pe_import_dll_IAT_fixup, iat) + |] + + in + + let form_import_string + (import:pe_import) + : frag = + DEF + (import.pe_import_name_fixup, + SEQ [| + (* import string entries begin with a 2-byte "hint", but we just + set it to zero. *) + (WORD (TY_u16, (IMM 0L))); + ZSTRING import.pe_import_name; + (if String.length import.pe_import_name mod 2 == 0 + then PAD 1 + else PAD 0) + |]) + in + + let form_dir_entry_string + (dll:pe_import_dll_entry) + : frag = + DEF + (dll.pe_import_dll_name_fixup, + SEQ [| ZSTRING dll.pe_import_dll_name; + (if String.length dll.pe_import_dll_name mod 2 == 0 + then PAD 1 + else PAD 0); + SEQ (Array.map form_import_string dll.pe_import_dll_imports) |]) + in + + let dir = SEQ (Array.map form_dir_entry dlls) in + let dir_terminator = PAD 20 in + let tables = SEQ (Array.map form_tables_for_dll dlls) in + let strings = SEQ (Array.map form_dir_entry_string dlls) + in + def_aligned + import_dir_fixup + (SEQ + [| + dir; + dir_terminator; + tables; + strings + |]) + +;; + +type pe_export = + { + pe_export_name_fixup: fixup; + pe_export_name: string; + pe_export_address_fixup: fixup; + } +;; + +let pe_export_section + ~(sess:Session.sess) + ~(export_dir_fixup:fixup) + ~(exports:pe_export array) + : frag = + Array.sort (fun a b -> compare a.pe_export_name b.pe_export_name) exports; + let export_addr_table_fixup = new_fixup "export address table" in + let export_addr_table = + DEF + (export_addr_table_fixup, + SEQ + (Array.map + (fun e -> (WORD (TY_u32, rva e.pe_export_address_fixup))) + exports)) + in + let export_name_pointer_table_fixup = + new_fixup "export name pointer table" + in + let export_name_pointer_table = + DEF + (export_name_pointer_table_fixup, + SEQ + (Array.map + (fun e -> (WORD (TY_u32, rva e.pe_export_name_fixup))) + exports)) + in + let export_name_table_fixup = new_fixup "export name table" in + let export_name_table = + DEF + (export_name_table_fixup, + SEQ + (Array.map + (fun e -> (DEF (e.pe_export_name_fixup, + (ZSTRING e.pe_export_name)))) + exports)) + in + let export_ordinal_table_fixup = new_fixup "export ordinal table" in + let export_ordinal_table = + DEF + (export_ordinal_table_fixup, + SEQ + (Array.mapi + (fun i _ -> (WORD (TY_u16, IMM (Int64.of_int (i))))) + exports)) + in + let image_name_fixup = new_fixup "image name fixup" in + let n_exports = IMM (Int64.of_int (Array.length exports)) in + let export_dir_table = + SEQ [| + WORD (TY_u32, IMM 0L); (* Flags, reserved. *) + WORD (TY_u32, IMM 0L); (* Timestamp, unused. *) + WORD (TY_u16, IMM 0L); (* Major vers., unused *) + WORD (TY_u16, IMM 0L); (* Minor vers., unused *) + WORD (TY_u32, rva image_name_fixup); (* Name RVA. *) + WORD (TY_u32, IMM 1L); (* Ordinal base = 1. *) + WORD (TY_u32, n_exports); (* # entries in EAT. *) + WORD (TY_u32, n_exports); (* # entries in ENPT/EOT.*) + WORD (TY_u32, rva export_addr_table_fixup); (* EAT *) + WORD (TY_u32, rva export_name_pointer_table_fixup); (* ENPT *) + WORD (TY_u32, rva export_ordinal_table_fixup); (* EOT *) + |] + in + def_aligned export_dir_fixup + (SEQ [| + export_dir_table; + export_addr_table; + export_name_pointer_table; + export_ordinal_table; + DEF (image_name_fixup, + ZSTRING (Session.filename_of sess.Session.sess_out)); + export_name_table + |]) +;; + +let pe_text_section + ~(sess:Session.sess) + ~(sem:Semant.ctxt) + ~(start_fixup:fixup option) + ~(rust_start_fixup:fixup option) + ~(main_fn_fixup:fixup option) + ~(text_fixup:fixup) + ~(crate_code:frag) + : frag = + let startup = + match (start_fixup, rust_start_fixup, main_fn_fixup) with + (None, _, _) + | (_, None, _) + | (_, _, None) -> MARK + | (Some start_fixup, + Some rust_start_fixup, + Some main_fn_fixup) -> + let e = X86.new_emitter_without_vregs () in + (* + * We are called from the Microsoft C library startup routine, + * and assumed to be stdcall; so we have to clean up our own + * stack before returning. + *) + X86.objfile_start e + ~start_fixup + ~rust_start_fixup + ~main_fn_fixup + ~crate_fixup: sem.Semant.ctxt_crate_fixup + ~indirect_start: true; + X86.frags_of_emitted_quads sess e; + in + def_aligned + text_fixup + (SEQ [| + startup; + crate_code + |]) +;; + +let rustrt_imports sem = + let make_imports_for_lib (lib, tab) = + { + pe_import_dll_name_fixup = new_fixup "dll name"; + pe_import_dll_name = (match lib with + REQUIRED_LIB_rustrt -> "rustrt.dll" + | REQUIRED_LIB_crt -> "msvcrt.dll" + | REQUIRED_LIB_rust ls + | REQUIRED_LIB_c ls -> ls.required_libname); + pe_import_dll_ILT_fixup = new_fixup "dll ILT"; + pe_import_dll_IAT_fixup = new_fixup "dll IAT"; + pe_import_dll_imports = + Array.of_list + (List.map + begin + fun (name, fixup) -> + { + pe_import_name_fixup = new_fixup "import name"; + pe_import_name = name; + pe_import_address_fixup = fixup; + } + end + (htab_pairs tab)) + } + in + Array.of_list + (List.map + make_imports_for_lib + (htab_pairs sem.Semant.ctxt_native_required)) +;; + + +let crate_exports (sem:Semant.ctxt) : pe_export array = + let export_sym (name, fixup) = + { + pe_export_name_fixup = new_fixup "export name fixup"; + pe_export_name = name; + pe_export_address_fixup = fixup; + } + in + let export_seg (_, tab) = + Array.of_list (List.map export_sym (htab_pairs tab)) + in + Array.concat + (List.map export_seg + (htab_pairs sem.Semant.ctxt_native_provided)) +;; + + +let emit_file + (sess:Session.sess) + (crate:Ast.crate) + (code:Asm.frag) + (data:Asm.frag) + (sem:Semant.ctxt) + (dw:Dwarf.debug_records) + : unit = + + let all_hdrs_fixup = new_fixup "all headers" in + let all_init_data_fixup = new_fixup "all initialized data" in + let loader_hdr_fixup = new_fixup "loader header" in + let import_dir_fixup = new_fixup "import directory" in + let export_dir_fixup = new_fixup "export directory" in + let text_fixup = new_fixup "text section" in + let bss_fixup = new_fixup "bss section" in + let data_fixup = new_fixup "data section" in + let image_fixup = new_fixup "image fixup" in + let symtab_fixup = new_fixup "symbol table" in + let strtab_fixup = new_fixup "string table" in + let note_rust_fixup = new_fixup ".note.rust section" in + + let (start_fixup, rust_start_fixup) = + if sess.Session.sess_library_mode + then (None, None) + else + (Some (new_fixup "start"), + Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start")) + in + + let header = (pe_header + ~machine: IMAGE_FILE_MACHINE_I386 + ~symbol_table_fixup: symtab_fixup + ~number_of_sections: 8L + ~number_of_symbols: 0L + ~loader_hdr_fixup: loader_hdr_fixup + ~characteristics:([IMAGE_FILE_EXECUTABLE_IMAGE; + IMAGE_FILE_LINE_NUMS_STRIPPED; + IMAGE_FILE_32BIT_MACHINE;] + @ + (if sess.Session.sess_library_mode + then [ IMAGE_FILE_DLL ] + else [ ]))) + in + let symtab = + (* + * We're not actually presenting a "symbol table", but wish to + * provide a "string table" which comes immediately *after* the + * symbol table. It's a violation of the PE spec to put one of + * these in an executable file (as opposed to just loadable) but + * it's necessary to communicate the debug section names to GDB, + * and nobody else complains. + *) + (def_aligned + symtab_fixup + (def_aligned + strtab_fixup + (SEQ + [| + WORD (TY_u32, (F_SZ strtab_fixup)); + ZSTRING ".debug_aranges"; + ZSTRING ".debug_pubnames"; + ZSTRING ".debug_info"; + ZSTRING ".debug_abbrev"; + ZSTRING ".debug_line"; + ZSTRING ".debug_frame"; + ZSTRING ".note.rust"; + |]))) + in + let loader_header = (pe_loader_header + ~text_fixup + ~init_data_fixup: all_init_data_fixup + ~size_of_uninit_data: 0L + ~entry_point_fixup: start_fixup + ~image_fixup: image_fixup + ~subsys: IMAGE_SUBSYSTEM_WINDOWS_CUI + ~all_hdrs_fixup + ~loader_hdr_fixup + ~import_dir_fixup + ~export_dir_fixup) + in + let text_header = (pe_section_header + ~id: SECTION_ID_TEXT + ~hdr_fixup: text_fixup) + + in + let bss_header = (pe_section_header + ~id: SECTION_ID_BSS + ~hdr_fixup: bss_fixup) + in + let import_section = (pe_import_section + ~import_dir_fixup + ~dlls: (rustrt_imports sem)) + in + let import_header = (pe_section_header + ~id: SECTION_ID_IMPORTS + ~hdr_fixup: import_dir_fixup) + in + let export_section = (pe_export_section + ~sess + ~export_dir_fixup + ~exports: (crate_exports sem)) + in + let export_header = (pe_section_header + ~id: SECTION_ID_EXPORTS + ~hdr_fixup: export_dir_fixup) + in + let data_header = (pe_section_header + ~id: SECTION_ID_DATA + ~hdr_fixup: data_fixup) + in +(* + let debug_aranges_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_ARANGES + ~hdr_fixup: sem.Semant.ctxt_debug_aranges_fixup) + in + let debug_pubnames_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_PUBNAMES + ~hdr_fixup: sem.Semant.ctxt_debug_pubnames_fixup) + in +*) + let debug_info_header = (pe_section_header + ~id: SECTION_ID_DEBUG_INFO + ~hdr_fixup: sem.Semant.ctxt_debug_info_fixup) + in + let debug_abbrev_header = (pe_section_header + ~id: SECTION_ID_DEBUG_ABBREV + ~hdr_fixup: sem.Semant.ctxt_debug_abbrev_fixup) + in +(* + let debug_line_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_LINE + ~hdr_fixup: sem.Semant.ctxt_debug_line_fixup) + in + let debug_frame_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_FRAME + ~hdr_fixup: sem.Semant.ctxt_debug_frame_fixup) + in +*) + let note_rust_header = (pe_section_header + ~id: SECTION_ID_NOTE_RUST + ~hdr_fixup: note_rust_fixup) + in + let all_headers = (def_file_aligned + all_hdrs_fixup + (SEQ + [| + pe_msdos_header_and_padding; + header; + loader_header; + text_header; + bss_header; + import_header; + export_header; + data_header; + (* + debug_aranges_header; + debug_pubnames_header; + *) + debug_info_header; + debug_abbrev_header; + (* + debug_line_header; + debug_frame_header; + *) + note_rust_header; + |])) + in + + let text_section = (pe_text_section + ~sem + ~sess + ~start_fixup + ~rust_start_fixup + ~main_fn_fixup: sem.Semant.ctxt_main_fn_fixup + ~text_fixup + ~crate_code: code) + in + let bss_section = def_aligned bss_fixup (BSS 0x10L) + in + let data_section = (def_aligned data_fixup + (SEQ [| data; symtab; |])) + in + let all_init_data = (def_aligned + all_init_data_fixup + (SEQ [| import_section; + export_section; + data_section; |])) + in +(* + let debug_aranges_section = + def_aligned sem.Semant.ctxt_debug_aranges_fixup dw.Dwarf.debug_aranges + in + let debug_pubnames_section = + def_aligned sem.Semant.ctxt_debug_pubnames_fixup dw.Dwarf.debug_pubnames + in +*) + let debug_info_section = + def_aligned sem.Semant.ctxt_debug_info_fixup dw.Dwarf.debug_info + in + let debug_abbrev_section = + def_aligned sem.Semant.ctxt_debug_abbrev_fixup dw.Dwarf.debug_abbrev + in +(* + let debug_line_section = + def_aligned sem.Semant.ctxt_debug_line_fixup dw.Dwarf.debug_line + in + let debug_frame_section = + def_aligned sem.Semant.ctxt_debug_frame_fixup dw.Dwarf.debug_frame + in +*) + let note_rust_section = + def_aligned note_rust_fixup + (Asm.note_rust_frags crate.node.Ast.crate_meta) + in + + let all_frags = SEQ [| MEMPOS pe_image_base; + (def_file_aligned image_fixup + (SEQ [| DEF (sem.Semant.ctxt_image_base_fixup, + MARK); + all_headers; + text_section; + bss_section; + all_init_data; + (* debug_aranges_section; *) + (* debug_pubnames_section; *) + debug_info_section; + debug_abbrev_section; + (* debug_line_section; *) + (* debug_frame_section; *) + note_rust_section; + ALIGN_MEM (pe_mem_alignment, MARK) + |] + ) + ) + |] + in + write_out_frag sess true all_frags +;; + +let pe_magic = "PE";; + +let sniff + (sess:Session.sess) + (filename:filename) + : asm_reader option = + try + let stat = Unix.stat filename in + if (stat.Unix.st_kind = Unix.S_REG) && + (stat.Unix.st_size >= pe_file_alignment) + then + let ar = new_asm_reader sess filename in + let _ = log sess "sniffing PE file" in + (* PE header offset is at 0x3c in the MS-DOS compatibility header. *) + let _ = ar.asm_seek 0x3c in + let pe_hdr_off = ar.asm_get_u32() in + let _ = log sess "PE header offset: 0x%x" pe_hdr_off in + + let _ = ar.asm_seek pe_hdr_off in + let pe_signature = ar.asm_get_zstr_padded 4 in + let _ = log sess " PE signature: '%s'" pe_signature in + if pe_signature = pe_magic + then (ar.asm_seek 0; Some ar) + else None + else + None + with + _ -> None +;; + + +let get_sections + (sess:Session.sess) + (ar:asm_reader) + : (string,(int*int)) Hashtbl.t = + let _ = log sess "reading sections" in + (* PE header offset is at 0x3c in the MS-DOS compatibility header. *) + let _ = ar.asm_seek 0x3c in + let pe_hdr_off = ar.asm_get_u32() in + let _ = log sess "PE header offset: 0x%x" pe_hdr_off in + + let _ = ar.asm_seek pe_hdr_off in + let pe_signature = ar.asm_get_zstr_padded 4 in + let _ = log sess " PE signature: '%s'" pe_signature in + let _ = assert (pe_signature = pe_magic) in + let _ = ar.asm_adv_u16() in (* machine type *) + + let num_sections = ar.asm_get_u16() in + let _ = log sess " num sections: %d" num_sections in + + let _ = ar.asm_adv_u32() in (* timestamp *) + + let symtab_off = ar.asm_get_u32() in + let _ = log sess " symtab offset: 0x%x" symtab_off in + + let num_symbols = ar.asm_get_u32() in + let _ = log sess " num symbols: %d" num_symbols in + + let loader_hdr_size = ar.asm_get_u16() in + let _ = log sess "loader header sz: %d" loader_hdr_size in + + let _ = ar.asm_adv_u16() in (* flags *) + let sections_off = (ar.asm_get_off()) + loader_hdr_size in + + let sects = Hashtbl.create 0 in + + let _ = + ar.asm_seek sections_off; + for i = 0 to (num_sections - 1) do + (* + * Section-name encoding is crazy. ASCII-encoding offsets of + * long names. See pe_section_header for details. + *) + let sect_name = + let sect_name = ar.asm_get_zstr_padded 8 in + assert ((String.length sect_name) > 0); + if sect_name.[0] = '/' + then + let off_str = + String.sub sect_name 1 ((String.length sect_name) - 1) + in + let i = int_of_string off_str in + let curr = ar.asm_get_off() in + ar.asm_seek (symtab_off + i); + let ext_name = ar.asm_get_zstr() in + ar.asm_seek curr; + ext_name + else + sect_name + in + let _ = ar.asm_adv_u32() in (* virtual size *) + let _ = ar.asm_adv_u32() in (* virtual address *) + let file_sz = ar.asm_get_u32() in + let file_off = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in (* reserved *) + let _ = ar.asm_adv_u32() in (* reserved *) + let _ = ar.asm_adv_u32() in (* reserved *) + let _ = ar.asm_adv_u32() in (* flags *) + Hashtbl.add sects sect_name (file_off, file_sz); + log sess " section %d: %s, size %d, offset 0x%x" + i sect_name file_sz file_off; + done + in + sects +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/ra.ml b/src/boot/be/ra.ml new file mode 100644 index 00000000..db70b21d --- /dev/null +++ b/src/boot/be/ra.ml @@ -0,0 +1,664 @@ +open Il;; +open Common;; + +type ctxt = + { + ctxt_sess: Session.sess; + ctxt_n_vregs: int; + ctxt_abi: Abi.abi; + mutable ctxt_quads: Il.quads; + mutable ctxt_next_spill: int; + mutable ctxt_next_label: int; + (* More state as necessary. *) + } +;; + +let new_ctxt + (sess:Session.sess) + (quads:Il.quads) + (vregs:int) + (abi:Abi.abi) + : ctxt = + { + ctxt_sess = sess; + ctxt_quads = quads; + ctxt_n_vregs = vregs; + ctxt_abi = abi; + ctxt_next_spill = 0; + ctxt_next_label = 0; + } +;; + +let log (cx:ctxt) = + Session.log "ra" + cx.ctxt_sess.Session.sess_log_ra + cx.ctxt_sess.Session.sess_log_out +;; + +let iflog (cx:ctxt) (thunk:(unit -> unit)) : unit = + if cx.ctxt_sess.Session.sess_log_ra + then thunk () + else () +;; + +let list_to_str list eltstr = + (String.concat "," (List.map eltstr (List.sort compare list))) +;; + +let next_spill (cx:ctxt) : int = + let i = cx.ctxt_next_spill in + cx.ctxt_next_spill <- i + 1; + i +;; + +let next_label (cx:ctxt) : string = + let i = cx.ctxt_next_label in + cx.ctxt_next_label <- i + 1; + (".L" ^ (string_of_int i)) +;; + +exception Ra_error of string ;; + +let convert_labels (cx:ctxt) : unit = + let quad_fixups = Array.map (fun q -> q.quad_fixup) cx.ctxt_quads in + let qp_code (_:Il.quad_processor) (c:Il.code) : Il.code = + match c with + Il.CodeLabel lab -> + let fix = + match quad_fixups.(lab) with + None -> + let fix = new_fixup (next_label cx) in + begin + quad_fixups.(lab) <- Some fix; + fix + end + | Some f -> f + in + Il.CodePtr (Il.ImmPtr (fix, Il.CodeTy)) + | _ -> c + in + let qp = { Il.identity_processor + with Il.qp_code = qp_code } + in + Il.rewrite_quads qp cx.ctxt_quads; + Array.iteri (fun i fix -> + cx.ctxt_quads.(i) <- { cx.ctxt_quads.(i) with + Il.quad_fixup = fix }) + quad_fixups; +;; + +let convert_pre_spills + (cx:ctxt) + (mkspill:(Il.spill -> Il.mem)) + : int = + let n = ref 0 in + let qp_mem (_:Il.quad_processor) (a:Il.mem) : Il.mem = + match a with + Il.Spill i -> + begin + if i+1 > (!n) + then n := i+1; + mkspill i + end + | _ -> a + in + let qp = Il.identity_processor in + let qp = { qp with + Il.qp_mem = qp_mem } + in + begin + Il.rewrite_quads qp cx.ctxt_quads; + !n + end +;; + +let kill_quad (i:int) (cx:ctxt) : unit = + cx.ctxt_quads.(i) <- + { Il.deadq with + Il.quad_fixup = cx.ctxt_quads.(i).Il.quad_fixup } +;; + +let kill_redundant_moves (cx:ctxt) : unit = + let process_quad i q = + match q.Il.quad_body with + Il.Unary u when + ((Il.is_mov u.Il.unary_op) && + (Il.Cell u.Il.unary_dst) = u.Il.unary_src) -> + kill_quad i cx + | _ -> () + in + Array.iteri process_quad cx.ctxt_quads +;; + +let quad_jump_target_labels (q:quad) : Il.label list = + let explicits = + match q.Il.quad_body with + Il.Jmp { Il.jmp_targ = Il.CodeLabel lab } -> [ lab ] + | _ -> [] + in + explicits @ q.quad_implicits; +;; + +let quad_used_vregs (q:quad) : Il.vreg list = + let vregs = ref [] in + let qp_reg _ r = + match r with + Il.Vreg v -> (vregs := (v :: (!vregs)); r) + | _ -> r + in + let qp_cell_write qp c = + match c with + Il.Reg _ -> c + | Il.Mem (a, b) -> Il.Mem (qp.qp_mem qp a, b) + in + let qp = { Il.identity_processor with + Il.qp_reg = qp_reg; + Il.qp_cell_write = qp_cell_write } + in + ignore (Il.process_quad qp q); + !vregs +;; + +let quad_defined_vregs (q:quad) : Il.vreg list = + let vregs = ref [] in + let qp_cell_write _ c = + match c with + Il.Reg (Il.Vreg v, _) -> (vregs := (v :: (!vregs)); c) + | _ -> c + in + let qp = { Il.identity_processor with + Il.qp_cell_write = qp_cell_write } + in + ignore (Il.process_quad qp q); + !vregs +;; + +let quad_is_unconditional_jump (q:quad) : bool = + match q.Il.quad_body with + Il.Jmp { jmp_op = Il.JMP } -> true + | Il.Ret -> true + | _ -> false +;; + +let calculate_live_bitvectors + (cx:ctxt) + : ((Bits.t array) * (Bits.t array)) = + + log cx "calculating live bitvectors"; + + let quads = cx.ctxt_quads in + let n_quads = Array.length quads in + let n_vregs = cx.ctxt_n_vregs in + let new_bitv _ = Bits.create n_vregs false in + let (live_in_vregs:Bits.t array) = Array.init n_quads new_bitv in + let (live_out_vregs:Bits.t array) = Array.init n_quads new_bitv in + + let (quad_used_vrs:Bits.t array) = Array.init n_quads new_bitv in + let (quad_defined_vrs:Bits.t array) = Array.init n_quads new_bitv in + let (quad_uncond_jmp:bool array) = Array.make n_quads false in + let (quad_jmp_targs:(Il.label list) array) = Array.make n_quads [] in + + let outer_changed = ref true in + + (* Working bit-vector. *) + let scratch = new_bitv() in + + (* bit-vector helpers. *) + (* Setup pass. *) + for i = 0 to n_quads - 1 do + let q = quads.(i) in + quad_uncond_jmp.(i) <- quad_is_unconditional_jump q; + quad_jmp_targs.(i) <- quad_jump_target_labels q; + List.iter + (fun v -> Bits.set quad_used_vrs.(i) v true) + (quad_used_vregs q); + List.iter + (fun v -> Bits.set quad_defined_vrs.(i) v true) + (quad_defined_vregs q) + done; + + while !outer_changed do + iflog cx (fun _ -> log cx "iterating outer bitvector calculation"); + outer_changed := false; + for i = 0 to n_quads - 1 do + Bits.clear live_in_vregs.(i); + Bits.clear live_out_vregs.(i) + done; + let inner_changed = ref true in + while !inner_changed do + inner_changed := false; + iflog cx + (fun _ -> + log cx "iterating inner bitvector calculation over %d quads" + n_quads); + for i = n_quads - 1 downto 0 do + + let note_change b = if b then inner_changed := true in + let live_in = live_in_vregs.(i) in + let live_out = live_out_vregs.(i) in + let used = quad_used_vrs.(i) in + let defined = quad_defined_vrs.(i) in + + (* Union in the vregs we use. *) + note_change (Bits.union live_in used); + + (* Union in all our jump targets. *) + List.iter + (fun i -> note_change (Bits.union live_out live_in_vregs.(i))) + (quad_jmp_targs.(i)); + + (* Union in our block successor if we have one *) + if i < (n_quads - 1) && (not (quad_uncond_jmp.(i))) + then note_change (Bits.union live_out live_in_vregs.(i+1)); + + (* Propagate live-out to live-in on anything we don't define. *) + ignore (Bits.copy scratch defined); + Bits.invert scratch; + ignore (Bits.intersect scratch live_out); + note_change (Bits.union live_in scratch); + + done + done; + let kill_mov_to_dead_target i q = + match q.Il.quad_body with + Il.Unary { Il.unary_op=uop; + Il.unary_dst=Il.Reg (Il.Vreg v, _) } + when + ((Il.is_mov uop) && + not (Bits.get live_out_vregs.(i) v)) -> + begin + kill_quad i cx; + outer_changed := true; + end + | _ -> () + in + Array.iteri kill_mov_to_dead_target quads + done; + iflog cx + begin + fun _ -> + log cx "finished calculating live bitvectors"; + log cx "========================="; + for q = 0 to n_quads - 1 do + let buf = Buffer.create 128 in + for v = 0 to (n_vregs - 1) + do + if ((Bits.get live_in_vregs.(q) v) + && (Bits.get live_out_vregs.(q) v)) + then Printf.bprintf buf " %-2d" v + else Buffer.add_string buf " " + done; + log cx "[%6d] live vregs: %s" q (Buffer.contents buf) + done; + log cx "=========================" + end; + (live_in_vregs, live_out_vregs) +;; + + +let is_end_of_basic_block (q:quad) : bool = + match q.Il.quad_body with + Il.Jmp _ -> true + | Il.Ret -> true + | _ -> false +;; + +let is_beginning_of_basic_block (q:quad) : bool = + match q.Il.quad_fixup with + None -> false + | Some _ -> true +;; + +let dump_quads cx = + let f = cx.ctxt_abi.Abi.abi_str_of_hardreg in + let len = (Array.length cx.ctxt_quads) - 1 in + let ndigits_of n = (int_of_float (log10 (float_of_int n))) in + let padded_num n maxnum = + let ndigits = ndigits_of n in + let maxdigits = ndigits_of maxnum in + let pad = String.make (maxdigits - ndigits) ' ' in + Printf.sprintf "%s%d" pad n + in + let padded_str str maxlen = + let pad = String.make (maxlen - (String.length str)) ' ' in + Printf.sprintf "%s%s" pad str + in + let maxlablen = ref 0 in + for i = 0 to len + do + let q = cx.ctxt_quads.(i) in + match q.quad_fixup with + None -> () + | Some f -> + maxlablen := max (!maxlablen) ((String.length f.fixup_name) + 1) + done; + for i = 0 to len + do + let q = cx.ctxt_quads.(i) in + let qs = (string_of_quad f q) in + let lab = match q.quad_fixup with + None -> "" + | Some f -> f.fixup_name ^ ":" + in + log cx "[%s] %s %s" (padded_num i len) (padded_str lab (!maxlablen)) qs + done +;; + +let calculate_vreg_constraints (cx:ctxt) : Bits.t array = + let abi = cx.ctxt_abi in + let n_vregs = cx.ctxt_n_vregs in + let n_hregs = abi.Abi.abi_n_hardregs in + let constraints = Array.init n_vregs (fun _ -> Bits.create n_hregs true) in + Array.iteri + begin + fun i q -> + abi.Abi.abi_constrain_vregs q constraints; + iflog cx + begin + fun _ -> + let hr_str = cx.ctxt_abi.Abi.abi_str_of_hardreg in + log cx "constraints for quad %d = %s" + i (string_of_quad hr_str q); + let qp_reg _ r = + begin + match r with + Il.Hreg _ -> () + | Il.Vreg v -> + let hregs = Bits.to_list constraints.(v) in + log cx "<v%d> constrained to hregs: [%s]" + v (list_to_str hregs hr_str) + end; + r + in + ignore (Il.process_quad { Il.identity_processor with + Il.qp_reg = qp_reg } q) + end; + end + cx.ctxt_quads; + constraints +;; + +(* Simple local register allocator. Nothing fancy. *) +let reg_alloc + (sess:Session.sess) + (quads:Il.quads) + (vregs:int) + (abi:Abi.abi) = + try + let cx = new_ctxt sess quads vregs abi in + let _ = + iflog cx + begin + fun _ -> + log cx "un-allocated quads:"; + dump_quads cx + end + in + + (* Work out pre-spilled slots and allocate 'em. *) + let spill_slot (s:Il.spill) = abi.Abi.abi_spill_slot s in + let n_pre_spills = convert_pre_spills cx spill_slot in + + let (live_in_vregs, live_out_vregs) = + Session.time_inner "RA liveness" sess + (fun _ -> calculate_live_bitvectors cx) + in + let (vreg_constraints:Bits.t array) = (* vreg idx -> hreg bits.t *) + calculate_vreg_constraints cx + in + let inactive_hregs = ref [] in (* [hreg] *) + let active_hregs = ref [] in (* [hreg] *) + let dirty_vregs = Hashtbl.create 0 in (* vreg -> () *) + let hreg_to_vreg = Hashtbl.create 0 in (* hreg -> vreg *) + let vreg_to_hreg = Hashtbl.create 0 in (* vreg -> hreg *) + let vreg_to_spill = Hashtbl.create 0 in (* vreg -> spill *) + let (word_ty:Il.scalar_ty) = Il.ValTy abi.Abi.abi_word_bits in + let vreg_spill_cell v = + Il.Mem ((spill_slot (Hashtbl.find vreg_to_spill v)), + Il.ScalarTy word_ty) + in + let newq = ref [] in + let fixup = ref None in + let prepend q = + newq := {q with quad_fixup = !fixup} :: (!newq); + fixup := None + in + let hr h = Il.Reg (Il.Hreg h, Il.voidptr_t) in + let hr_str = cx.ctxt_abi.Abi.abi_str_of_hardreg in + let clean_hreg i hreg = + if (Hashtbl.mem hreg_to_vreg hreg) && + (hreg < cx.ctxt_abi.Abi.abi_n_hardregs) + then + let vreg = Hashtbl.find hreg_to_vreg hreg in + if Hashtbl.mem dirty_vregs vreg + then + begin + Hashtbl.remove dirty_vregs vreg; + if (Bits.get (live_out_vregs.(i)) vreg) + then + let spill_idx = + if Hashtbl.mem vreg_to_spill vreg + then Hashtbl.find vreg_to_spill vreg + else + begin + let s = next_spill cx in + Hashtbl.replace vreg_to_spill vreg s; + s + end + in + let spill_mem = spill_slot spill_idx in + let spill_cell = Il.Mem (spill_mem, Il.ScalarTy word_ty) in + log cx "spilling <%d> from %s to %s" + vreg (hr_str hreg) (string_of_mem hr_str spill_mem); + prepend (Il.mk_quad + (Il.umov spill_cell (Il.Cell (hr hreg)))); + else () + end + else () + else () + in + + let inactivate_hreg hreg = + if (Hashtbl.mem hreg_to_vreg hreg) && + (hreg < cx.ctxt_abi.Abi.abi_n_hardregs) + then + let vreg = Hashtbl.find hreg_to_vreg hreg in + Hashtbl.remove vreg_to_hreg vreg; + Hashtbl.remove hreg_to_vreg hreg; + active_hregs := List.filter (fun x -> x != hreg) (!active_hregs); + inactive_hregs := hreg :: (!inactive_hregs); + else () + in + + let spill_specific_hreg i hreg = + clean_hreg i hreg; + inactivate_hreg hreg + in + + let rec select_constrained + (constraints:Bits.t) + (hregs:Il.hreg list) + : Il.hreg option = + match hregs with + [] -> None + | h::hs -> + if Bits.get constraints h + then Some h + else select_constrained constraints hs + in + + let spill_constrained constrs i = + match select_constrained constrs (!active_hregs) with + None -> + raise (Ra_error ("unable to spill according to constraint")); + | Some h -> + begin + spill_specific_hreg i h; + h + end + in + + let all_hregs = Bits.create abi.Abi.abi_n_hardregs true in + + let spill_all_regs i = + while (!active_hregs) != [] + do + let _ = spill_constrained all_hregs i in + () + done + in + + let reload vreg hreg = + if Hashtbl.mem vreg_to_spill vreg + then + prepend (Il.mk_quad + (Il.umov + (hr hreg) + (Il.Cell (vreg_spill_cell vreg)))) + else () + in + + let use_vreg def i vreg = + if Hashtbl.mem vreg_to_hreg vreg + then + begin + let h = Hashtbl.find vreg_to_hreg vreg in + iflog cx (fun _ -> log cx "found cached assignment %s for <v%d>" + (hr_str h) vreg); + h + end + else + let hreg = + let constrs = vreg_constraints.(vreg) in + match select_constrained constrs (!inactive_hregs) with + None -> + let h = spill_constrained constrs i in + iflog cx + (fun _ -> log cx "selected %s to spill and use for <v%d>" + (hr_str h) vreg); + h + | Some h -> + iflog cx (fun _ -> log cx "selected inactive %s for <v%d>" + (hr_str h) vreg); + h + in + inactive_hregs := + List.filter (fun x -> x != hreg) (!inactive_hregs); + active_hregs := (!active_hregs) @ [hreg]; + Hashtbl.replace hreg_to_vreg hreg vreg; + Hashtbl.replace vreg_to_hreg vreg hreg; + if def + then () + else + reload vreg hreg; + hreg + in + let qp_reg def i _ r = + match r with + Il.Hreg h -> (spill_specific_hreg i h; r) + | Il.Vreg v -> (Il.Hreg (use_vreg def i v)) + in + let qp_cell def i qp c = + match c with + Il.Reg (r, b) -> Il.Reg (qp_reg def i qp r, b) + | Il.Mem (a, b) -> + let qp = { qp with Il.qp_reg = qp_reg false i } in + Il.Mem (qp.qp_mem qp a, b) + in + let qp i = { Il.identity_processor with + Il.qp_cell_read = qp_cell false i; + Il.qp_cell_write = qp_cell true i; + Il.qp_reg = qp_reg false i } + in + cx.ctxt_next_spill <- n_pre_spills; + convert_labels cx; + for i = 0 to cx.ctxt_abi.Abi.abi_n_hardregs - 1 + do + inactive_hregs := i :: (!inactive_hregs) + done; + for i = 0 to (Array.length cx.ctxt_quads) - 1 + do + let quad = cx.ctxt_quads.(i) in + let clobbers = cx.ctxt_abi.Abi.abi_clobbers quad in + let used = quad_used_vregs quad in + let defined = quad_defined_vregs quad in + begin + if List.exists (fun def -> List.mem def clobbers) defined + then raise (Ra_error ("clobber and defined sets overlap")); + iflog cx + begin + fun _ -> + let hr (v:int) : string = + if Hashtbl.mem vreg_to_hreg v + then hr_str (Hashtbl.find vreg_to_hreg v) + else "??" + in + let vr_str (v:int) : string = + Printf.sprintf "v%d=%s" v (hr v) + in + let lstr lab ls fn = + if List.length ls = 0 + then () + else log cx "\t%s: [%s]" lab (list_to_str ls fn) + in + log cx "processing quad %d = %s" + i (string_of_quad hr_str quad); + (lstr "dirt" (htab_keys dirty_vregs) vr_str); + (lstr "clob" clobbers hr_str); + (lstr "in" (Bits.to_list live_in_vregs.(i)) vr_str); + (lstr "out" (Bits.to_list live_out_vregs.(i)) vr_str); + (lstr "use" used vr_str); + (lstr "def" defined vr_str); + end; + List.iter (clean_hreg i) clobbers; + if is_beginning_of_basic_block quad + then + begin + spill_all_regs i; + fixup := quad.quad_fixup; + prepend (Il.process_quad (qp i) quad) + end + else + begin + fixup := quad.quad_fixup; + let newq = (Il.process_quad (qp i) quad) in + begin + if is_end_of_basic_block quad + then spill_all_regs i + else () + end; + prepend newq + end + end; + List.iter inactivate_hreg clobbers; + List.iter (fun i -> Hashtbl.replace dirty_vregs i ()) defined; + done; + cx.ctxt_quads <- Array.of_list (List.rev (!newq)); + kill_redundant_moves cx; + + iflog cx + begin + fun _ -> + log cx "spills: %d pre-spilled, %d total" + n_pre_spills cx.ctxt_next_spill; + log cx "register-allocated quads:"; + dump_quads cx; + end; + (cx.ctxt_quads, cx.ctxt_next_spill) + + with + Ra_error s -> + Session.fail sess "RA Error: %s" s; + (quads, 0) + +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/x86.ml b/src/boot/be/x86.ml new file mode 100644 index 00000000..01b7e299 --- /dev/null +++ b/src/boot/be/x86.ml @@ -0,0 +1,2205 @@ +(* + * x86/ia32 instructions have 6 parts: + * + * [pre][op][modrm][sib][disp][imm] + * + * [pre] = 0..4 bytes of prefix + * [op] = 1..3 byte opcode + * [modrm] = 0 or 1 byte: [mod:2][reg/op:3][r/m:3] + * [sib] = 0 or 1 byte: [scale:2][index:3][base:3] + * [disp] = 1, 2 or 4 byte displacement + * [imm] = 1, 2 or 4 byte immediate + * + * So between 1 and 17 bytes total. + * + * We're not going to use sib, but modrm is worth discussing. + * + * The high two bits of modrm denote an addressing mode. The modes are: + * + * 00 - "mostly" *(reg) + * 01 - "mostly" *(reg) + disp8 + * 10 - "mostly" *(reg) + disp32 + * 11 - reg + * + * The next-lowest 3 bits denote a specific register, or a subopcode if + * there is a fixed register or only one operand. The instruction format + * reference will say "/<n>" for some number n, if a fixed subopcode is used. + * It'll say "/r" if the instruction uses this field to specify a register. + * + * The registers specified in this field are: + * + * 000 - EAX or XMM0 + * 001 - ECX or XMM1 + * 010 - EDX or XMM2 + * 011 - EBX or XMM3 + * 100 - ESP or XMM4 + * 101 - EBP or XMM5 + * 110 - ESI or XMM6 + * 111 - EDI or XMM7 + * + * The final low 3 bits denote sub-modes of the primary mode selected + * with the top 2 bits. In particular, they "mostly" select the reg that is + * to be used for effective address calculation. + * + * For the most part, these follow the same numbering order: EAX, ECX, EDX, + * EBX, ESP, EBP, ESI, EDI. There are two unusual deviations from the rule + * though: + * + * - In primary modes 00, 01 and 10, r/m=100 means "use SIB byte". You can + * use (unscaled) ESP as the base register in these modes by appending the + * SIB byte 0x24. We do that in our rm_r operand-encoder function. + * + * - In primary mode 00, r/m=101 means "just disp32", no register is + * involved. There is no way to use EBP in primary mode 00. If you try, we + * just decay into a mode 01 with an appended 8-bit immediate displacement. + * + * Some opcodes are written 0xNN +rd. This means "we decided to chew up a + * whole pile of opcodes here, with each opcode including a hard-wired + * reference to a register". For example, POP is "0x58 +rd", which means that + * the 1-byte insns 0x58..0x5f are chewed up for "POP EAX" ... "POP EDI" + * (again, the canonical order of register numberings) + *) + +(* + * Notes on register availability of x86: + * + * There are 8 GPRs but we use 2 of them for specific purposes: + * + * - ESP always points to the current stack frame. + * - EBP always points to the current frame base. + * + * We tell IL that we have 6 GPRs then, and permit most register-register ops + * on any of these 6, mostly-unconstrained. + * + *) + +open Common;; + +exception Unrecognized +;; + +let modrm m rm reg_or_subopcode = + if (((m land 0b11) != m) or + ((rm land 0b111) != rm) or + ((reg_or_subopcode land 0b111) != reg_or_subopcode)) + then raise (Invalid_argument "X86.modrm_deref") + else + ((((m land 0b11) lsl 6) + lor + (rm land 0b111)) + lor + ((reg_or_subopcode land 0b111) lsl 3)) +;; + +let modrm_deref_reg = modrm 0b00 ;; +let modrm_deref_disp32 = modrm 0b00 0b101 ;; +let modrm_deref_reg_plus_disp8 = modrm 0b01 ;; +let modrm_deref_reg_plus_disp32 = modrm 0b10 ;; +let modrm_reg = modrm 0b11 ;; + +let slash0 = 0;; +let slash1 = 1;; +let slash2 = 2;; +let slash3 = 3;; +let slash4 = 4;; +let slash5 = 5;; +let slash6 = 6;; +let slash7 = 7;; + + +(* + * Translate an IL-level hwreg number from 0..nregs into the 3-bit code number + * used through the mod r/m byte and /r sub-register specifiers of the x86 + * ISA. + * + * See "Table 2-2: 32-Bit Addressing Forms with the ModR/M Byte", in the IA32 + * Architecture Software Developer's Manual, volume 2a. + *) + +let eax = 0 +let ecx = 1 +let ebx = 2 +let esi = 3 +let edi = 4 +let edx = 5 +let ebp = 6 +let esp = 7 + +let code_eax = 0b000;; +let code_ecx = 0b001;; +let code_edx = 0b010;; +let code_ebx = 0b011;; +let code_esp = 0b100;; +let code_ebp = 0b101;; +let code_esi = 0b110;; +let code_edi = 0b111;; + +let reg r = + match r with + 0 -> code_eax + | 1 -> code_ecx + | 2 -> code_ebx + | 3 -> code_esi + | 4 -> code_edi + | 5 -> code_edx + (* Never assigned by the register allocator, but synthetic code uses + them *) + | 6 -> code_ebp + | 7 -> code_esp + | _ -> raise (Invalid_argument "X86.reg") +;; + + +let dwarf_eax = 0;; +let dwarf_ecx = 1;; +let dwarf_edx = 2;; +let dwarf_ebx = 3;; +let dwarf_esp = 4;; +let dwarf_ebp = 5;; +let dwarf_esi = 6;; +let dwarf_edi = 7;; + +let dwarf_reg r = + match r with + 0 -> dwarf_eax + | 1 -> dwarf_ecx + | 2 -> dwarf_ebx + | 3 -> dwarf_esi + | 4 -> dwarf_edi + | 5 -> dwarf_edx + | 6 -> dwarf_ebp + | 7 -> dwarf_esp + | _ -> raise (Invalid_argument "X86.dwarf_reg") + +let reg_str r = + match r with + 0 -> "eax" + | 1 -> "ecx" + | 2 -> "ebx" + | 3 -> "esi" + | 4 -> "edi" + | 5 -> "edx" + | 6 -> "ebp" + | 7 -> "esp" + | _ -> raise (Invalid_argument "X86.reg_str") +;; + +(* This is a basic ABI. You might need to customize it by platform. *) +let (n_hardregs:int) = 6;; +let (n_callee_saves:int) = 4;; + + +let is_ty32 (ty:Il.scalar_ty) : bool = + match ty with + Il.ValTy (Il.Bits32) -> true + | Il.AddrTy _ -> true + | _ -> false +;; + +let is_r32 (c:Il.cell) : bool = + match c with + Il.Reg (_, st) -> is_ty32 st + | _ -> false +;; + +let is_rm32 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty32 st + | Il.Reg (_, st) -> is_ty32 st + | _ -> false +;; + +let is_ty8 (ty:Il.scalar_ty) : bool = + match ty with + Il.ValTy (Il.Bits8) -> true + | _ -> false +;; + +let is_m32 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty32 st + | _ -> false +;; + +let is_m8 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty8 st + | _ -> false +;; + +let is_ok_r8 (r:Il.hreg) : bool = + (r == eax || r == ebx || r == ecx || r == edx) +;; + +let is_r8 (c:Il.cell) : bool = + match c with + Il.Reg (Il.Hreg r, st) when is_ok_r8 r -> is_ty8 st + | _ -> false +;; + +let is_rm8 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty8 st + | _ -> is_r8 c +;; + +let prealloc_quad (quad':Il.quad') : Il.quad' = + let target_cell reg c = + Il.Reg (Il.Hreg reg, Il.cell_scalar_ty c) + in + let target_operand reg op = + Il.Cell (Il.Reg (Il.Hreg reg, Il.operand_scalar_ty op)) + in + + let target_bin_to_hreg bin dst src = + { bin with + Il.binary_rhs = target_operand src bin.Il.binary_rhs; + Il.binary_lhs = target_operand dst bin.Il.binary_lhs; + Il.binary_dst = target_cell dst bin.Il.binary_dst } + in + + let target_cmp cmp = + match cmp.Il.cmp_lhs with + (* Immediate LHS we force to eax. *) + Il.Imm _ -> + { cmp with + Il.cmp_lhs = target_operand eax cmp.Il.cmp_lhs } + | _ -> cmp + in + + match quad' with + Il.Binary bin -> + begin + Il.Binary + begin + match bin.Il.binary_op with + Il.IMUL | Il.UMUL + | Il.IDIV | Il.UDIV -> target_bin_to_hreg bin eax ecx + | Il.IMOD | Il.UMOD -> target_bin_to_hreg bin eax ecx + | _ -> bin + end + end + + | Il.Cmp cmp -> Il.Cmp (target_cmp cmp) + + | Il.Call c -> + let ty = Il.cell_scalar_ty c.Il.call_dst in + Il.Call { c with + Il.call_dst = Il.Reg ((Il.Hreg eax), ty) } + + | Il.Lea le -> + begin + match (le.Il.lea_dst, le.Il.lea_src) with + (Il.Reg (_, dst_ty), Il.ImmPtr _) + when is_ty32 dst_ty -> + Il.Lea { le with + Il.lea_dst = Il.Reg (Il.Hreg eax, dst_ty) } + | _ -> quad' + end + + | x -> x +;; + +let constrain_vregs (q:Il.quad) (hregs:Bits.t array) : unit = + + let involves_8bit_cell = + let b = ref false in + let qp_cell _ c = + match c with + Il.Reg (_, Il.ValTy Il.Bits8) + | Il.Mem (_, Il.ScalarTy (Il.ValTy Il.Bits8)) -> + (b := true; c) + | _ -> c + in + ignore (Il.process_quad { Il.identity_processor with + Il.qp_cell_read = qp_cell; + Il.qp_cell_write = qp_cell } q); + !b + in + + let qp_mem _ m = m in + let qp_cell _ c = + begin + match c with + Il.Reg (Il.Vreg v, _) when involves_8bit_cell -> + (* 8-bit register cells must only be al, cl, dl, bl. + * Not esi/edi. *) + let hv = hregs.(v) in + List.iter (fun bad -> Bits.set hv bad false) [esi; edi] + | _ -> () + end; + c + in + begin + match q.Il.quad_body with + Il.Binary b -> + begin + match b.Il.binary_op with + (* Shifts *) + | Il.LSL | Il.LSR | Il.ASR -> + begin + match b.Il.binary_rhs with + Il.Cell (Il.Reg (Il.Vreg v, _)) -> + let hv = hregs.(v) in + (* Shift src has to be ecx. *) + List.iter + (fun bad -> Bits.set hv bad false) + [eax; edx; ebx; esi; edi] + | _ -> () + end + | _ -> () + end + | _ -> () + end; + ignore + (Il.process_quad { Il.identity_processor with + Il.qp_mem = qp_mem; + Il.qp_cell_read = qp_cell; + Il.qp_cell_write = qp_cell } q) +;; + + +let clobbers (quad:Il.quad) : Il.hreg list = + match quad.Il.quad_body with + Il.Binary bin -> + begin + match bin.Il.binary_op with + Il.IMUL | Il.UMUL + | Il.IDIV | Il.UDIV -> [ edx ] + | Il.IMOD | Il.UMOD -> [ edx ] + | _ -> [] + end + | Il.Unary un -> + begin + match un.Il.unary_op with + Il.ZERO -> [ eax; edi; ecx ] + | _ -> [ ] + end + | Il.Call _ -> [ eax; ecx; edx; ] + | Il.Regfence -> [ eax; ecx; ebx; edx; edi; esi; ] + | _ -> [] +;; + + +let word_sz = 4L +;; + +let word_bits = Il.Bits32 +;; + +let word_ty = TY_u32 +;; + +let annotate (e:Il.emitter) (str:string) = + Hashtbl.add e.Il.emit_annotations e.Il.emit_pc str +;; + +let c (c:Il.cell) : Il.operand = Il.Cell c ;; +let r (r:Il.reg) : Il.cell = Il.Reg ( r, (Il.ValTy word_bits) ) ;; +let h (x:Il.hreg) : Il.reg = Il.Hreg x ;; +let rc (x:Il.hreg) : Il.cell = r (h x) ;; +let ro (x:Il.hreg) : Il.operand = c (rc x) ;; +let vreg (e:Il.emitter) : (Il.reg * Il.cell) = + let vr = Il.next_vreg e in + (vr, (Il.Reg (vr, (Il.ValTy word_bits)))) +;; +let imm (x:Asm.expr64) : Il.operand = + Il.Imm (x, word_ty) +;; +let immi (x:int64) : Il.operand = + imm (Asm.IMM x) +;; + +let imm_byte (x:Asm.expr64) : Il.operand = + Il.Imm (x, TY_u8) +;; +let immi_byte (x:int64) : Il.operand = + imm_byte (Asm.IMM x) +;; + + +let byte_off_n (i:int) : Asm.expr64 = + Asm.IMM (Int64.of_int i) +;; + +let byte_n (reg:Il.reg) (i:int) : Il.cell = + let imm = byte_off_n i in + let mem = Il.RegIn (reg, Some imm) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8)) +;; + +let word_off_n (i:int) : Asm.expr64 = + Asm.IMM (Int64.mul (Int64.of_int i) word_sz) +;; + +let word_at (reg:Il.reg) : Il.cell = + let mem = Il.RegIn (reg, None) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits)) +;; + +let word_at_off (reg:Il.reg) (off:Asm.expr64) : Il.cell = + let mem = Il.RegIn (reg, Some off) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits)) +;; + +let word_n (reg:Il.reg) (i:int) : Il.cell = + word_at_off reg (word_off_n i) +;; + +let reg_codeptr (reg:Il.reg) : Il.code = + Il.CodePtr (Il.Cell (Il.Reg (reg, Il.AddrTy Il.CodeTy))) +;; + +let word_n_low_byte (reg:Il.reg) (i:int) : Il.cell = + let imm = word_off_n i in + let mem = Il.RegIn (reg, Some imm) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8)) +;; + +let wordptr_n (reg:Il.reg) (i:int) : Il.cell = + let imm = word_off_n i in + let mem = Il.RegIn (reg, Some imm) in + Il.Mem (mem, Il.ScalarTy (Il.AddrTy (Il.ScalarTy (Il.ValTy word_bits)))) +;; + +let get_element_ptr = Il.get_element_ptr word_bits reg_str ;; + +let save_callee_saves (e:Il.emitter) : unit = + Il.emit e (Il.Push (ro ebp)); + Il.emit e (Il.Push (ro edi)); + Il.emit e (Il.Push (ro esi)); + Il.emit e (Il.Push (ro ebx)); +;; + + +let restore_callee_saves (e:Il.emitter) : unit = + Il.emit e (Il.Pop (rc ebx)); + Il.emit e (Il.Pop (rc esi)); + Il.emit e (Il.Pop (rc edi)); + Il.emit e (Il.Pop (rc ebp)); +;; + + +(* restores registers from the frame base without updating esp: + * - sets ebp, edi, esi, ebx to stored values from frame base + * - sets `retpc' register to stored retpc from frame base + * - sets `base' register to current fp + *) +let restore_frame_base (e:Il.emitter) (base:Il.reg) (retpc:Il.reg) : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + mov (r base) (ro ebp); + mov (rc ebx) (c (word_at base)); + mov (rc esi) (c (word_n base 1)); + mov (rc edi) (c (word_n base 2)); + mov (rc ebp) (c (word_n base 3)); + mov (r retpc) (c (word_n base 4)); +;; + + +(* + * Our arrangement on x86 is this: + * + * *ebp+20+(4*N) = [argN ] + * ... + * *ebp+24 = [arg1 ] = task ptr + * *ebp+20 = [arg0 ] = out ptr + * *ebp+16 = [retpc ] + * *ebp+12 = [old_ebp] + * *ebp+8 = [old_edi] + * *ebp+4 = [old_esi] + * *ebp = [old_ebx] + * + * For x86-cdecl: + * + * %eax, %ecx, %edx are "caller save" registers + * %ebp, %ebx, %esi, %edi are "callee save" registers + * + *) + +let frame_base_words = 5 (* eip,ebp,edi,esi,ebx *) ;; +let frame_base_sz = Int64.mul (Int64.of_int frame_base_words) word_sz;; + +let frame_info_words = 2 (* crate ptr, crate-rel frame info disp *) ;; +let frame_info_sz = Int64.mul (Int64.of_int frame_info_words) word_sz;; + +let implicit_arg_words = 2 (* task ptr,out ptr *);; +let implicit_args_sz = Int64.mul (Int64.of_int implicit_arg_words) word_sz;; + +let out_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words);; +let task_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words+1);; +let ty_param_n i = + wordptr_n (Il.Hreg ebp) (frame_base_words + implicit_arg_words + i);; + +let spill_slot (i:Il.spill) : Il.mem = + let imm = (Asm.IMM + (Int64.neg + (Int64.add frame_info_sz + (Int64.mul word_sz + (Int64.of_int (i+1)))))) + in + Il.RegIn ((Il.Hreg ebp), Some imm) +;; + + +let get_next_pc_thunk_fixup = new_fixup "glue$get_next_pc" +;; + +let emit_get_next_pc_thunk (e:Il.emitter) : unit = + let sty = Il.AddrTy Il.CodeTy in + let rty = Il.ScalarTy sty in + let deref_esp = Il.Mem (Il.RegIn (Il.Hreg esp, None), rty) in + let eax = (Il.Reg (Il.Hreg eax, sty)) in + Il.emit_full e (Some get_next_pc_thunk_fixup) [] + (Il.umov eax (Il.Cell deref_esp)); + Il.emit e Il.Ret; +;; + +let get_next_pc_thunk : (Il.reg * fixup * (Il.emitter -> unit)) = + (Il.Hreg eax, get_next_pc_thunk_fixup, emit_get_next_pc_thunk) +;; + +let emit_c_call + (e:Il.emitter) + (ret:Il.cell) + (tmp1:Il.reg) + (tmp2:Il.reg) + (nabi:nabi) + (in_prologue:bool) + (fptr:Il.code) + (args:Il.operand array) + : unit = + + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in + + (* rust calls get task as arg0 *) + let args = + if nabi.nabi_convention = CONV_rust + then Array.append [| c task_ptr |] args + else args + in + let nargs = Array.length args in + let arg_sz = Int64.mul (Int64.of_int nargs) word_sz + in + + mov (r tmp1) (c task_ptr); (* tmp1 = task from argv[-1] *) + mov (r tmp2) (ro esp); (* tmp2 = esp *) + mov (* task->rust_sp = tmp2 *) + (word_n tmp1 Abi.task_field_rust_sp) + (c (r tmp2)); + mov (* esp = task->runtime_sp *) + (rc esp) + (c (word_n tmp1 Abi.task_field_runtime_sp)); + + binary Il.SUB (rc esp) arg_sz; (* make room on the stack *) + binary Il.AND (rc esp) (* and 16-byte align sp *) + 0xfffffffffffffff0L; + + Array.iteri + begin + fun i (arg:Il.operand) -> (* write args to C stack *) + match arg with + Il.Cell (Il.Mem (a, ty)) -> + begin + match a with + Il.RegIn (Il.Hreg base, off) when base == esp -> + mov (r tmp1) (c (Il.Mem (Il.RegIn (tmp2, off), ty))); + mov (word_n (h esp) i) (c (r tmp1)); + | _ -> + mov (r tmp1) arg; + mov (word_n (h esp) i) (c (r tmp1)); + end + | _ -> + mov (word_n (h esp) i) arg + end + args; + + match ret with + Il.Mem (Il.RegIn (Il.Hreg base, _), _) when base == esp -> + assert (not in_prologue); + + (* If ret is esp-relative, use a temporary register until we + switched stacks. *) + + emit (Il.call (r tmp1) fptr); + mov (r tmp2) (c task_ptr); + mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp)); + mov ret (c (r tmp1)); + + | _ when in_prologue -> + (* + * We have to do something a little surprising here: + * we're doing a 'grow' call so ebp is going to point + * into a dead stack frame on call-return. So we + * temporarily store task-ptr into ebp and then reload + * esp *and* ebp via ebp->rust_sp on the other side of + * the call. + *) + mov (rc ebp) (c task_ptr); + emit (Il.call ret fptr); + mov (rc esp) (c (word_n (h ebp) Abi.task_field_rust_sp)); + mov (rc ebp) (ro esp); + + | _ -> + emit (Il.call ret fptr); + mov (r tmp2) (c task_ptr); + mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp)); +;; + +let emit_void_prologue_call + (e:Il.emitter) + (nabi:nabi) + (fn:fixup) + (args:Il.operand array) + : unit = + let callee = Abi.load_fixup_codeptr e (h eax) fn true nabi.nabi_indirect in + emit_c_call e (rc eax) (h edx) (h ecx) nabi true callee args +;; + +let emit_native_call + (e:Il.emitter) + (ret:Il.cell) + (nabi:nabi) + (fn:fixup) + (args:Il.operand array) + : unit = + + let (tmp1, _) = vreg e in + let (tmp2, _) = vreg e in + let (freg, _) = vreg e in + let callee = Abi.load_fixup_codeptr e freg fn true nabi.nabi_indirect in + emit_c_call e ret tmp1 tmp2 nabi false callee args +;; + +let emit_native_void_call + (e:Il.emitter) + (nabi:nabi) + (fn:fixup) + (args:Il.operand array) + : unit = + + let (ret, _) = vreg e in + emit_native_call e (r ret) nabi fn args +;; + +let emit_native_call_in_thunk + (e:Il.emitter) + (ret:Il.cell) + (nabi:nabi) + (fn:Il.operand) + (args:Il.operand array) + : unit = + + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + + begin + match fn with + (* + * NB: old path, remove when/if you're sure you don't + * want native-linker-symbol-driven requirements. + *) + Il.ImmPtr (fix, _) -> + let code = + Abi.load_fixup_codeptr e (h eax) fix true nabi.nabi_indirect + in + emit_c_call e (rc eax) (h edx) (h ecx) nabi false code args; + + | _ -> + (* + * NB: new path, ignores nabi_indirect, assumes + * indirect via pointer from upcall_require_c_sym + * or crate cache. + *) + mov (rc eax) fn; + let cell = Il.Reg (h eax, Il.AddrTy Il.CodeTy) in + let fptr = Il.CodePtr (Il.Cell cell) in + emit_c_call e (rc eax) (h edx) (h ecx) nabi false fptr args; + end; + + match ret with + Il.Reg (r, _) -> mov (word_at r) (ro eax) + | _ -> mov (rc edx) (c ret); + mov (word_at (h edx)) (ro eax) +;; + +let unwind_glue + (e:Il.emitter) + (nabi:nabi) + (exit_task_fixup:fixup) + : unit = + + let fp_n = word_n (Il.Hreg ebp) in + let edx_n = word_n (Il.Hreg edx) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let push x = emit (Il.Push x) in + let pop x = emit (Il.Pop x) in + let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in + let codefix fix = Il.CodePtr (Il.ImmPtr (fix, Il.CodeTy)) in + let mark fix = Il.emit_full e (Some fix) [] Il.Dead in + let glue_field = Abi.frame_glue_fns_field_drop in + + let repeat_jmp_fix = new_fixup "repeat jump" in + let skip_jmp_fix = new_fixup "skip jump" in + let exit_jmp_fix = new_fixup "exit jump" in + + mov (rc edx) (c task_ptr); (* switch back to rust stack *) + mov + (rc esp) + (c (edx_n Abi.task_field_rust_sp)); + + mark repeat_jmp_fix; + + mov (rc esi) (c (fp_n (-1))); (* esi <- crate ptr *) + mov (rc edx) (c (fp_n (-2))); (* edx <- frame glue functions. *) + emit (Il.cmp (ro edx) (immi 0L)); + + emit + (Il.jmp Il.JE + (codefix skip_jmp_fix)); (* if struct* is nonzero *) + add edx esi; (* add crate ptr to disp. *) + mov + (rc ecx) + (c (edx_n glue_field)); (* ecx <- drop glue *) + emit (Il.cmp (ro ecx) (immi 0L)); + + emit + (Il.jmp Il.JE + (codefix skip_jmp_fix)); (* if glue-fn is nonzero *) + add ecx esi; (* add crate ptr to disp. *) + push (ro ebp); (* frame-to-drop *) + push (c task_ptr); (* form usual call to glue *) + push (immi 0L); (* outptr *) + emit (Il.call (rc eax) + (reg_codeptr (h ecx))); (* call glue_fn, trashing eax. *) + pop (rc eax); + pop (rc eax); + pop (rc eax); + + mark skip_jmp_fix; + mov (rc edx) (c (fp_n 3)); (* load next fp (callee-saves[3]) *) + emit (Il.cmp (ro edx) (immi 0L)); + emit (Il.jmp Il.JE + (codefix exit_jmp_fix)); (* if nonzero *) + mov (rc ebp) (ro edx); (* move to next frame *) + emit (Il.jmp Il.JMP + (codefix repeat_jmp_fix)); (* loop *) + + (* exit path. *) + mark exit_jmp_fix; + + let callee = + Abi.load_fixup_codeptr + e (h eax) exit_task_fixup false nabi.nabi_indirect + in + emit_c_call + e (rc eax) (h edx) (h ecx) nabi false callee [| (c task_ptr) |]; +;; + +(* Puts result in eax; clobbers ecx, edx in the process. *) +let rec calculate_sz (e:Il.emitter) (size:size) : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let push x = emit (Il.Push x) in + let pop x = emit (Il.Pop x) in + let neg x = emit (Il.unary Il.NEG (rc x) (ro x)) in + let bnot x = emit (Il.unary Il.NOT (rc x) (ro x)) in + let band x y = emit (Il.binary Il.AND (rc x) (ro x) (ro y)) in + let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in + let mul x y = emit (Il.binary Il.UMUL (rc x) (ro x) (ro y)) in + let subi x y = emit (Il.binary Il.SUB (rc x) (ro x) (immi y)) in + let eax_gets_a_and_ecx_gets_b a b = + calculate_sz e b; + push (ro eax); + calculate_sz e a; + pop (rc ecx); + in + match size with + SIZE_fixed i -> + mov (rc eax) (immi i) + + | SIZE_fixup_mem_sz f -> + mov (rc eax) (imm (Asm.M_SZ f)) + + | SIZE_fixup_mem_pos f -> + mov (rc eax) (imm (Asm.M_POS f)) + + | SIZE_param_size i -> + mov (rc eax) (Il.Cell (ty_param_n i)); + mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_size)) + + | SIZE_param_align i -> + mov (rc eax) (Il.Cell (ty_param_n i)); + mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_align)) + + | SIZE_rt_neg a -> + calculate_sz e a; + neg eax + + | SIZE_rt_add (a, b) -> + eax_gets_a_and_ecx_gets_b a b; + add eax ecx + + | SIZE_rt_mul (a, b) -> + eax_gets_a_and_ecx_gets_b a b; + mul eax ecx + + | SIZE_rt_max (a, b) -> + eax_gets_a_and_ecx_gets_b a b; + emit (Il.cmp (ro eax) (ro ecx)); + let jmp_pc = e.Il.emit_pc in + emit (Il.jmp Il.JAE Il.CodeNone); + mov (rc eax) (ro ecx); + Il.patch_jump e jmp_pc e.Il.emit_pc; + + | SIZE_rt_align (align, off) -> + (* + * calculate off + pad where: + * + * pad = (align - (off mod align)) mod align + * + * In our case it's always a power of two, + * so we can just do: + * + * mask = align-1 + * off += mask + * off &= ~mask + * + *) + eax_gets_a_and_ecx_gets_b off align; + subi ecx 1L; + add eax ecx; + bnot ecx; + band eax ecx; +;; + +let rec size_calculation_stack_highwater (size:size) : int = + match size with + SIZE_fixed _ + | SIZE_fixup_mem_sz _ + | SIZE_fixup_mem_pos _ + | SIZE_param_size _ + | SIZE_param_align _ -> 0 + | SIZE_rt_neg a -> + (size_calculation_stack_highwater a) + | SIZE_rt_max (a, b) -> + (size_calculation_stack_highwater a) + + (size_calculation_stack_highwater b) + | SIZE_rt_add (a, b) + | SIZE_rt_mul (a, b) + | SIZE_rt_align (a, b) -> + (size_calculation_stack_highwater a) + + (size_calculation_stack_highwater b) + + 1 +;; + +let boundary_sz = + (Asm.IMM + (Int64.add (* Extra non-frame room: *) + frame_base_sz (* to safely enter the next frame, *) + frame_base_sz)) (* and make a 'grow' upcall there. *) +;; + +let stack_growth_check + (e:Il.emitter) + (nabi:nabi) + (grow_task_fixup:fixup) + (growsz:Il.operand) + (grow_jmp:Il.label option) + (restart_pc:Il.label) + (end_reg:Il.reg) (* + * stack limit on entry, + * new stack pointer on exit + *) + (tmp_reg:Il.reg) (* temporary (trashed) *) + : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in + let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in + mov (r tmp_reg) (ro esp); (* tmp = esp *) + sub (r tmp_reg) growsz; (* tmp -= size-request *) + emit (Il.cmp (c (r end_reg)) (c (r tmp_reg))); + (* + * Jump *over* 'grow' upcall on non-underflow: + * if end_reg <= tmp_reg + *) + + let bypass_grow_upcall_jmp_pc = e.Il.emit_pc in + emit (Il.jmp Il.JBE Il.CodeNone); + + begin + match grow_jmp with + None -> () + | Some j -> Il.patch_jump e j e.Il.emit_pc + end; + (* Extract growth-amount from tmp_reg. *) + mov (r end_reg) (ro esp); + sub (r end_reg) (c (r tmp_reg)); + add (r end_reg) (Il.Imm (boundary_sz, word_ty)); + (* Perform 'grow' upcall, then restart frame-entry. *) + emit_void_prologue_call e nabi grow_task_fixup [| c (r end_reg) |]; + emit (Il.jmp Il.JMP (Il.CodeLabel restart_pc)); + Il.patch_jump e bypass_grow_upcall_jmp_pc e.Il.emit_pc +;; + +let fn_prologue + (e:Il.emitter) + (framesz:size) + (callsz:size) + (nabi:nabi) + (grow_task_fixup:fixup) + : unit = + + let esi_n = word_n (h esi) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in + let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in + + (* We may be in a dynamic-sized frame. This makes matters complex, + * as we can't just perform a simple growth check in terms of a + * static size. The check is against a dynamic size, and we need to + * calculate that size. + * + * Unlike size-calculations in 'trans', we do not use vregs to + * calculate the frame size; instead we use a PUSH/POP stack-machine + * translation that doesn't disturb the registers we're + * somewhat-carefully *using* during frame setup. + * + * This only pushes the problem back a little ways though: we still + * need to be sure we have enough room to do the PUSH/POP + * calculation. We refer to this amount of space as the 'primordial' + * frame size, which can *thankfully* be calculated exactly from the + * arithmetic expression we're aiming to calculate. So we make room + * for the primordial frame, run the calculation of the full dynamic + * frame size, then make room *again* for this dynamic size. + * + * Our caller reserved enough room for us to push our own frame-base, + * as well as the frame-base that it will cost to do an upcall. + *) + + (* + * After we save callee-saves, We have a stack like this: + * + * | ... | + * | caller frame | + * | + spill | + * | caller arg K | + * | ... | + * | caller arg 0 | + * | retpc | <-- sp we received, top of callee frame + * | callee save 1 | + * | ... | + * | callee save N | <-- ebp and esp after saving callee-saves + * | ... | + * | callee frame | + * | + spill | + * | callee arg J | + * | ... | + * | callee arg 0 | <-- bottom of callee frame + * | next retpc | + * | next save 1 | + * | ... | + * | next save N | <-- bottom of region we must reserve + * | ... | + * + * A "frame base" is the retpc and set of callee-saves. + * + * We need to reserve room for our frame *and* the next frame-base, because + * we're going to be blindly entering the next frame-base (pushing eip and + * callee-saves) before we perform the next check. + *) + + (* + * We double the reserved callsz because we need a 'temporary tail-call + * region' above the actual call region, in case there's a drop call at the + * end of assembling the tail-call args and before copying them to callee + * position. + *) + + let callsz = add_sz callsz callsz in + let n_glue_args = Int64.of_int Abi.worst_case_glue_call_args in + let n_glue_words = Int64.mul word_sz n_glue_args in + + (* + * Add in *another* word to handle an extra-awkward spill of the + * callee address that might occur during an indirect tail call. + *) + let callsz = add_sz (SIZE_fixed word_sz) callsz in + + (* + * Add in enough words for a glue-call (these occur underneath esp) + *) + let callsz = add_sz (SIZE_fixed n_glue_words) callsz in + + (* + * Cumulative dynamic-frame size. + *) + let call_and_frame_sz = add_sz callsz framesz in + + (* Already have room to save regs on entry. *) + save_callee_saves e; + + let restart_pc = e.Il.emit_pc in + + mov (rc ebp) (ro esp); (* Establish frame base. *) + mov (rc esi) (c task_ptr); (* esi = task *) + mov + (rc esi) + (c (esi_n Abi.task_field_stk)); (* esi = task->stk *) + add (rc esi) (imm + (Asm.ADD + ((word_off_n Abi.stk_field_data), + boundary_sz))); + + let (dynamic_frame_sz, dynamic_grow_jmp) = + match Il.size_to_expr64 call_and_frame_sz with + None -> + begin + let primordial_frame_sz = + Asm.IMM + (Int64.mul word_sz + (Int64.of_int + (size_calculation_stack_highwater + call_and_frame_sz))) + in + (* Primordial size-check. *) + mov (rc edi) (ro esp); (* edi = esp *) + sub (* edi -= size-request *) + (rc edi) + (imm primordial_frame_sz); + emit (Il.cmp (ro esi) (ro edi)); + + (* Jump to 'grow' upcall on underflow: if esi (bottom) is > + edi (proposed-esp) *) + + let primordial_underflow_jmp_pc = e.Il.emit_pc in + emit (Il.jmp Il.JA Il.CodeNone); + + (* Calculate dynamic frame size. *) + calculate_sz e call_and_frame_sz; + ((ro eax), Some primordial_underflow_jmp_pc) + end + | Some e -> ((imm e), None) + in + + (* "Full" frame size-check. *) + stack_growth_check e nabi grow_task_fixup + dynamic_frame_sz dynamic_grow_jmp restart_pc (h esi) (h edi); + + + (* Establish a frame, wherever we landed. *) + sub (rc esp) dynamic_frame_sz; + + (* Zero the frame. + * + * FIXME: this is awful, will go away when we have proper CFI. + *) + + mov (rc edi) (ro esp); + mov (rc ecx) dynamic_frame_sz; + emit (Il.unary Il.ZERO (word_at (h edi)) (ro ecx)); + + (* Move esp back up over the glue region. *) + add (rc esp) (immi n_glue_words); +;; + + +let fn_epilogue (e:Il.emitter) : unit = + + (* Tear down existing frame. *) + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + mov (rc esp) (ro ebp); + restore_callee_saves e; + emit Il.Ret; +;; + +let inline_memcpy + (e:Il.emitter) + (n_bytes:int64) + (dst_ptr:Il.reg) + (src_ptr:Il.reg) + (tmp_reg:Il.reg) + (ascending:bool) + : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let bpw = Int64.to_int word_sz in + let w = Int64.to_int (Int64.div n_bytes word_sz) in + let b = Int64.to_int (Int64.rem n_bytes word_sz) in + if ascending + then + begin + for i = 0 to (w-1) do + mov (r tmp_reg) (c (word_n src_ptr i)); + mov (word_n dst_ptr i) (c (r tmp_reg)); + done; + for i = 0 to (b-1) do + let off = (w*bpw) + i in + mov (r tmp_reg) (c (byte_n src_ptr off)); + mov (byte_n dst_ptr off) (c (r tmp_reg)); + done; + end + else + begin + for i = (b-1) downto 0 do + let off = (w*bpw) + i in + mov (r tmp_reg) (c (byte_n src_ptr off)); + mov (byte_n dst_ptr off) (c (r tmp_reg)); + done; + for i = (w-1) downto 0 do + mov (r tmp_reg) (c (word_n src_ptr i)); + mov (word_n dst_ptr i) (c (r tmp_reg)); + done; + end +;; + + + +let fn_tail_call + (e:Il.emitter) + (caller_callsz:int64) + (caller_argsz:int64) + (callee_code:Il.code) + (callee_argsz:int64) + : unit = + let emit = Il.emit e in + let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in + let mov dst src = emit (Il.umov dst src) in + let argsz_diff = Int64.sub caller_argsz callee_argsz in + let callee_spill_cell = word_at_off (h esp) (Asm.IMM caller_callsz) in + + (* + * Our outgoing arguments were prepared in a region above the call region; + * this is reserved for the purpose of making tail-calls *only*, so we do + * not collide with glue calls we had to make while dropping the frame, + * after assembling our arg region. + * + * Thus, esp points to the "normal" arg region, and we need to move it + * to point to the tail-call arg region. To make matters simple, both + * regions are the same size, one atop the other. + *) + + annotate e "tail call: move esp to temporary tail call arg-prep area"; + binary Il.ADD (rc esp) caller_callsz; + + (* + * If we're given a non-ImmPtr callee, we may need to move it to a known + * cell to avoid clobbering its register while we do the argument shuffle + * below. + * + * Sadly, we are too register-starved to just flush our callee to a reg; + * so we carve out an extra word of the temporary call-region and use + * it. + * + * This is ridiculous, but works. + *) + begin + match callee_code with + Il.CodePtr (Il.Cell c) -> + annotate e "tail call: spill callee-ptr to temporary memory"; + mov callee_spill_cell (Il.Cell c); + + | _ -> () + end; + + (* edx <- ebp; restore ebp, edi, esi, ebx; ecx <- retpc *) + annotate e "tail call: restore callee-saves from frame base"; + restore_frame_base e (h edx) (h ecx); + (* move edx past frame base and adjust for difference in call sizes *) + annotate e "tail call: adjust temporary fp"; + binary Il.ADD (rc edx) (Int64.add frame_base_sz argsz_diff); + + (* + * stack grows downwards; copy from high to low + * + * bpw = word_sz + * w = floor(callee_argsz / word_sz) + * b = callee_argsz % word_sz + * + * byte copies: + * +------------------------+ + * | | + * +------------------------+ <-- base + (w * word_sz) + (b - 1) + * . . + * +------------------------+ + * | | + * +------------------------+ <-- base + (w * word_sz) + (b - b) + * word copies: = + * +------------------------+ <-- base + ((w-0) * word_sz) + * | bytes | + * | (w-1)*bpw..w*bpw-1 | + * +------------------------+ <-- base + ((w-1) * word_sz) + * | bytes | + * | (w-2)*bpw..(w-1)*bpw-1 | + * +------------------------+ <-- base + ((w-2) * word_sz) + * . . + * . . + * . . + * +------------------------+ + * | bytes | + * | 0..bpw - 1 | + * +------------------------+ <-- base + ((w-w) * word_sz) + *) + + annotate e "tail call: move arg-tuple up to top of frame"; + (* NOTE: must copy top-to-bottom in case the regions overlap *) + inline_memcpy e callee_argsz (h edx) (h esp) (h eax) false; + + (* + * We're done with eax now; so in the case where we had to spill + * our callee codeptr, we can reload it into eax here and rewrite + * our callee into *eax. + *) + let callee_code = + match callee_code with + Il.CodePtr (Il.Cell _) -> + annotate e "tail call: reload callee-ptr from temporary memory"; + mov (rc eax) (Il.Cell callee_spill_cell); + reg_codeptr (h eax) + + | _ -> callee_code + in + + + (* esp <- edx *) + annotate e "tail call: adjust stack pointer"; + mov (rc esp) (ro edx); + (* PUSH ecx (retpc) *) + annotate e "tail call: push retpc"; + emit (Il.Push (ro ecx)); + (* JMP callee_code *) + emit (Il.jmp Il.JMP callee_code); +;; + + +let loop_info_field_retpc = 0;; +let loop_info_field_sp = 1;; +let loop_info_field_fp = 2;; + +let self_args_cell (self_args_rty:Il.referent_ty) : Il.cell = + Il.Mem (Il.RegIn (h ebp, Some (Asm.IMM frame_base_sz)), self_args_rty) +;; + +let activate_glue (e:Il.emitter) : unit = + (* + * This is a bit of glue-code. It should be emitted once per + * compilation unit. + * + * - save regs on C stack + * - align sp on a 16-byte boundary + * - save sp to task.runtime_sp (runtime_sp is thus always aligned) + * - load saved task sp (switch stack) + * - restore saved task regs + * - return to saved task pc + * + * Our incoming stack looks like this: + * + * *esp+4 = [arg1 ] = task ptr + * *esp = [retpc ] + *) + + let sp_n = word_n (Il.Hreg esp) in + let edx_n = word_n (Il.Hreg edx) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in + + mov (rc edx) (c (sp_n 1)); (* edx <- task *) + save_callee_saves e; + mov + (edx_n Abi.task_field_runtime_sp) + (ro esp); (* task->runtime_sp <- esp *) + mov + (rc esp) + (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *) + + (* + * There are two paths we can arrive at this code from: + * + * + * 1. We are activating a task for the first time. When we switch into + * the task stack and 'ret' to its first instruction, we'll start + * doing whatever the first instruction says. Probably saving + * registers and starting to establish a frame. Harmless stuff, + * doesn't look at task->rust_sp again except when it clobbers it + * during a later upcall. + * + * + * 2. We are resuming a task that was descheduled by the yield glue + * below. When we switch into the task stack and 'ret', we'll be + * ret'ing to a very particular instruction: + * + * "esp <- task->rust_sp" + * + * this is the first instruction we 'ret' to after this glue, because + * it is the first instruction following *any* upcall, and the task + * we are activating was descheduled mid-upcall. + * + * Unfortunately for us, we have already restored esp from + * task->rust_sp and are about to eat the 5 words off the top of it. + * + * + * | ... | <-- where esp will be once we restore + ret, below, + * | retpc | and where we'd *like* task->rust_sp to wind up. + * | ebp | + * | edi | + * | esi | + * | ebx | <-- current task->rust_sp == current esp + * + * + * This is a problem. If we return to "esp <- task->rust_sp" it will + * push esp back down by 5 words. This manifests as a rust stack that + * grows by 5 words on each yield/reactivate. Not good. + * + * So what we do here is just adjust task->rust_sp up 5 words as + * well, to mirror the movement in esp we're about to perform. That + * way the "esp <- task->rust_sp" we 'ret' to below will be a + * no-op. Esp won't move, and the task's stack won't grow. + *) + + binary Il.ADD (edx_n Abi.task_field_rust_sp) + (Int64.mul (Int64.of_int (n_callee_saves + 1)) word_sz); + + (**** IN TASK STACK ****) + restore_callee_saves e; + emit Il.Ret; + (***********************) + () +;; + +let yield_glue (e:Il.emitter) : unit = + + (* More glue code, this time the 'bottom half' of yielding. + * + * We arrived here because an upcall decided to deschedule the + * running task. So the upcall's return address got patched to the + * first instruction of this glue code. + * + * When the upcall does 'ret' it will come here, and its esp will be + * pointing to the last argument pushed on the C stack before making + * the upcall: the 0th argument to the upcall, which is always the + * task ptr performing the upcall. That's where we take over. + * + * Our goal is to complete the descheduling + * + * - Switch over to the task stack temporarily. + * + * - Save the task's callee-saves onto the task stack. + * (the task is now 'descheduled', safe to set aside) + * + * - Switch *back* to the C stack. + * + * - Restore the C-stack callee-saves. + * + * - Return to the caller on the C stack that activated the task. + * + *) + let esp_n = word_n (Il.Hreg esp) in + let edx_n = word_n (Il.Hreg edx) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + + mov + (rc edx) (c (esp_n 0)); (* edx <- arg0 (task) *) + mov + (rc esp) + (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *) + save_callee_saves e; + mov (* task->rust_sp <- esp *) + (edx_n Abi.task_field_rust_sp) + (ro esp); + mov + (rc esp) + (c (edx_n Abi.task_field_runtime_sp)); (* esp <- task->runtime_sp *) + + (**** IN C STACK ****) + restore_callee_saves e; + emit Il.Ret; + (***********************) + () +;; + + +let push_pos32 (e:Il.emitter) (fix:fixup) : unit = + let (reg, _, _) = get_next_pc_thunk in + Abi.load_fixup_addr e reg fix Il.OpaqueTy; + Il.emit e (Il.Push (Il.Cell (Il.Reg (reg, Il.AddrTy Il.OpaqueTy)))) +;; + +let objfile_start + (e:Il.emitter) + ~(start_fixup:fixup) + ~(rust_start_fixup:fixup) + ~(main_fn_fixup:fixup) + ~(crate_fixup:fixup) + ~(indirect_start:bool) + : unit = + let ebp_n = word_n (Il.Hreg ebp) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let push_pos32 = push_pos32 e in + Il.emit_full e (Some start_fixup) [] Il.Dead; + save_callee_saves e; + mov (rc ebp) (ro esp); + + (* If we're very lucky, the platform will have left us with + * something sensible in the startup stack like so: + * + * *ebp+24 = [arg1 ] = argv + * *ebp+20 = [arg0 ] = argc + * *ebp+16 = [retpc ] + * *ebp+12 = [old_ebp] + * *ebp+8 = [old_edi] + * *ebp+4 = [old_esi] + * *ebp = [old_ebx] + * + * This is not the case everywhere, but we start with this + * assumption and correct it in the runtime library. + *) + + (* Copy argv. *) + mov (rc eax) (c (ebp_n (2 + n_callee_saves))); + Il.emit e (Il.Push (ro eax)); + + (* Copy argc. *) + mov (rc eax) (c (ebp_n (1 + n_callee_saves))); + Il.emit e (Il.Push (ro eax)); + + push_pos32 crate_fixup; + push_pos32 main_fn_fixup; + let fptr = + Abi.load_fixup_codeptr e (h eax) rust_start_fixup true indirect_start + in + Il.emit e (Il.call (rc eax) fptr); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.umov (rc esp) (ro ebp)); + restore_callee_saves e; + Il.emit e Il.Ret; +;; + +let (abi:Abi.abi) = + { + Abi.abi_word_sz = word_sz; + Abi.abi_word_bits = word_bits; + Abi.abi_word_ty = word_ty; + + Abi.abi_is_2addr_machine = true; + Abi.abi_has_pcrel_data = false; + Abi.abi_has_pcrel_code = true; + + Abi.abi_n_hardregs = n_hardregs; + Abi.abi_str_of_hardreg = reg_str; + Abi.abi_prealloc_quad = prealloc_quad; + Abi.abi_constrain_vregs = constrain_vregs; + + Abi.abi_emit_fn_prologue = fn_prologue; + Abi.abi_emit_fn_epilogue = fn_epilogue; + Abi.abi_emit_fn_tail_call = fn_tail_call; + Abi.abi_clobbers = clobbers; + + Abi.abi_emit_native_call = emit_native_call; + Abi.abi_emit_native_void_call = emit_native_void_call; + Abi.abi_emit_native_call_in_thunk = emit_native_call_in_thunk; + Abi.abi_emit_inline_memcpy = inline_memcpy; + + Abi.abi_activate = activate_glue; + Abi.abi_yield = yield_glue; + Abi.abi_unwind = unwind_glue; + Abi.abi_get_next_pc_thunk = Some get_next_pc_thunk; + + Abi.abi_sp_reg = (Il.Hreg esp); + Abi.abi_fp_reg = (Il.Hreg ebp); + Abi.abi_dwarf_fp_reg = dwarf_ebp; + Abi.abi_tp_cell = task_ptr; + Abi.abi_frame_base_sz = frame_base_sz; + Abi.abi_frame_info_sz = frame_info_sz; + Abi.abi_implicit_args_sz = implicit_args_sz; + Abi.abi_spill_slot = spill_slot; + } + + +(* + * NB: factor the instruction selector often. There's lots of + * semi-redundancy in the ISA. + *) + + +let imm_is_signed_byte (n:int64) : bool = + (i64_le (-128L) n) && (i64_le n 127L) +;; + +let imm_is_unsigned_byte (n:int64) : bool = + (i64_le (0L) n) && (i64_le n 255L) +;; + + +let rm_r (c:Il.cell) (r:int) : Asm.frag = + let reg_ebp = 6 in + let reg_esp = 7 in + + (* + * We do a little contortion here to accommodate the special case of + * being asked to form esp-relative addresses; these require SIB + * bytes on x86. Of course! + *) + let sib_esp_base = Asm.BYTE 0x24 in + let seq1 rm modrm = + if rm = reg_esp + then Asm.SEQ [| modrm; sib_esp_base |] + else modrm + in + let seq2 rm modrm disp = + if rm = reg_esp + then Asm.SEQ [| modrm; sib_esp_base; disp |] + else Asm.SEQ [| modrm; disp |] + in + + match c with + Il.Reg ((Il.Hreg rm), _) -> + Asm.BYTE (modrm_reg (reg rm) r) + | Il.Mem (a, _) -> + begin + match a with + Il.Abs disp -> + Asm.SEQ [| Asm.BYTE (modrm_deref_disp32 r); + Asm.WORD (TY_i32, disp) |] + + | Il.RegIn ((Il.Hreg rm), None) when rm != reg_ebp -> + seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r)) + + | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM 0L)) + when rm != reg_ebp -> + seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r)) + + (* The next two are just to save the relaxation system some + * churn. + *) + + | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n)) + when imm_is_signed_byte n -> + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r)) + (Asm.WORD (TY_i8, Asm.IMM n)) + + | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n)) -> + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r)) + (Asm.WORD (TY_i32, Asm.IMM n)) + + | Il.RegIn ((Il.Hreg rm), Some disp) -> + Asm.new_relaxation + [| + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r)) + (Asm.WORD (TY_i32, disp)); + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r)) + (Asm.WORD (TY_i8, disp)) + |] + | _ -> raise Unrecognized + end + | _ -> raise Unrecognized +;; + + +let insn_rm_r (op:int) (c:Il.cell) (r:int) : Asm.frag = + Asm.SEQ [| Asm.BYTE op; rm_r c r |] +;; + + +let insn_rm_r_imm + (op:int) + (c:Il.cell) + (r:int) + (ty:ty_mach) + (i:Asm.expr64) + : Asm.frag = + Asm.SEQ [| Asm.BYTE op; rm_r c r; Asm.WORD (ty, i) |] +;; + +let insn_rm_r_imm_s8_s32 + (op8:int) + (op32:int) + (c:Il.cell) + (r:int) + (i:Asm.expr64) + : Asm.frag = + match i with + Asm.IMM n when imm_is_signed_byte n -> + insn_rm_r_imm op8 c r TY_i8 i + | _ -> + Asm.new_relaxation + [| + insn_rm_r_imm op32 c r TY_i32 i; + insn_rm_r_imm op8 c r TY_i8 i + |] +;; + +let insn_rm_r_imm_u8_u32 + (op8:int) + (op32:int) + (c:Il.cell) + (r:int) + (i:Asm.expr64) + : Asm.frag = + match i with + Asm.IMM n when imm_is_unsigned_byte n -> + insn_rm_r_imm op8 c r TY_u8 i + | _ -> + Asm.new_relaxation + [| + insn_rm_r_imm op32 c r TY_u32 i; + insn_rm_r_imm op8 c r TY_u8 i + |] +;; + + +let insn_pcrel_relax + (op8_frag:Asm.frag) + (op32_frag:Asm.frag) + (fix:fixup) + : Asm.frag = + let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in + let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in + let pcrel_expr = (Asm.SUB (Asm.M_POS fix, + Asm.M_POS pcrel_mark_fixup)) + in + Asm.new_relaxation + [| + Asm.SEQ [| op32_frag; Asm.WORD (TY_i32, pcrel_expr); def |]; + Asm.SEQ [| op8_frag; Asm.WORD (TY_i8, pcrel_expr); def |]; + |] +;; + +let insn_pcrel_simple (op32:int) (fix:fixup) : Asm.frag = + let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in + let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in + let pcrel_expr = (Asm.SUB (Asm.M_POS fix, + Asm.M_POS pcrel_mark_fixup)) + in + Asm.SEQ [| Asm.BYTE op32; Asm.WORD (TY_i32, pcrel_expr); def |] +;; + +let insn_pcrel (op8:int) (op32:int) (fix:fixup) : Asm.frag = + insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTE op32) fix +;; + +let insn_pcrel_prefix32 + (op8:int) + (prefix32:int) + (op32:int) + (fix:fixup) + : Asm.frag = + insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTES [| prefix32; op32 |]) fix +;; + +(* FIXME: tighten imm-based dispatch by imm type. *) +let cmp (a:Il.operand) (b:Il.operand) : Asm.frag = + match (a,b) with + (Il.Cell c, Il.Imm (i, TY_i8)) when is_rm8 c -> + insn_rm_r_imm 0x80 c slash7 TY_i8 i + | (Il.Cell c, Il.Imm (i, TY_u8)) when is_rm8 c -> + insn_rm_r_imm 0x80 c slash7 TY_u8 i + | (Il.Cell c, Il.Imm (i, _)) when is_rm32 c -> + (* + * NB: We can't switch on signed-ness here, as 'cmp' is + * defined to sign-extend its operand; i.e. we have to treat + * it as though you're emitting a signed byte (in the sense of + * immediate-size selection) even if the incoming value is + * unsigned. + *) + insn_rm_r_imm_s8_s32 0x83 0x81 c slash7 i + | (Il.Cell c, Il.Cell (Il.Reg (Il.Hreg r, _))) -> + insn_rm_r 0x39 c (reg r) + | (Il.Cell (Il.Reg (Il.Hreg r, _)), Il.Cell c) -> + insn_rm_r 0x3b c (reg r) + | _ -> raise Unrecognized +;; + +let zero (dst:Il.cell) (count:Il.operand) : Asm.frag = + match (dst, count) with + + ((Il.Mem (Il.RegIn ((Il.Hreg dst_ptr), None), _)), + Il.Cell (Il.Reg ((Il.Hreg count), _))) + when dst_ptr = edi && count = ecx -> + Asm.BYTES [| + 0xb0; 0x0; (* mov %eax, 0 : move a zero into al. *) + 0xf3; 0xaa; (* rep stos m8 : fill ecx bytes at [edi] with al *) + |] + + | _ -> raise Unrecognized +;; + +let mov (signed:bool) (dst:Il.cell) (src:Il.operand) : Asm.frag = + if is_ty8 (Il.cell_scalar_ty dst) || is_ty8 (Il.operand_scalar_ty src) + then + begin + (match dst with + Il.Reg (Il.Hreg r, _) + -> assert (is_ok_r8 r) | _ -> ()); + (match src with + Il.Cell (Il.Reg (Il.Hreg r, _)) + -> assert (is_ok_r8 r) | _ -> ()); + end; + + match (signed, dst, src) with + + (* m8 <- r??, r8 or truncate(r32). *) + (_, _, Il.Cell (Il.Reg ((Il.Hreg r), _))) + when is_m8 dst -> + insn_rm_r 0x88 dst (reg r) + + (* r8 <- r8: treat as r32 <- r32. *) + | (_, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell) + when is_r8 dst && is_r8 src_cell -> + insn_rm_r 0x8b src_cell (reg r) + + (* rm32 <- r32 *) + | (_, _, Il.Cell (Il.Reg ((Il.Hreg r), src_ty))) + when (is_r8 dst || is_rm32 dst) && is_ty32 src_ty -> + insn_rm_r 0x89 dst (reg r) + + (* r32 <- rm32 *) + | (_, (Il.Reg ((Il.Hreg r), dst_ty)), Il.Cell src_cell) + when is_ty32 dst_ty && is_rm32 src_cell -> + insn_rm_r 0x8b src_cell (reg r) + + (* MOVZX: r8/r32 <- zx(rm8) *) + | (false, Il.Reg ((Il.Hreg r, _)), Il.Cell src_cell) + when (is_r8 dst || is_r32 dst) && is_rm8 src_cell -> + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xb6 src_cell (reg r) |] + + (* MOVZX: m32 <- zx(r8) *) + | (false, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell))) + when (is_m32 dst) && is_r8 src_cell -> + (* Fake with 2 insns: + * + * movzx r32 <- r8; (in-place zero-extension) + * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL) + *) + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xb6 src_cell (reg r); + insn_rm_r 0x89 dst (reg r); + |] + + (* MOVSX: r8/r32 <- sx(rm8) *) + | (true, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell) + when (is_r8 dst || is_r32 dst) && is_rm8 src_cell -> + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xbe src_cell (reg r) |] + + (* MOVSX: m32 <- sx(r8) *) + | (true, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell))) + when (is_m32 dst) && is_r8 src_cell -> + (* Fake with 2 insns: + * + * movsx r32 <- r8; (in-place sign-extension) + * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL) + *) + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xbe src_cell (reg r); + insn_rm_r 0x89 dst (reg r); + |] + + (* m8 <- imm8 (signed) *) + | (_, _, Il.Imm ((Asm.IMM n), _)) + when is_m8 dst && imm_is_signed_byte n && signed -> + insn_rm_r_imm 0xc6 dst slash0 TY_i8 (Asm.IMM n) + + (* m8 <- imm8 (unsigned) *) + | (_, _, Il.Imm ((Asm.IMM n), _)) + when is_m8 dst && imm_is_unsigned_byte n && (not signed) -> + insn_rm_r_imm 0xc6 dst slash0 TY_u8 (Asm.IMM n) + + (* rm32 <- imm32 *) + | (_, _, Il.Imm (i, _)) when is_rm32 dst || is_r8 dst -> + let t = if signed then TY_u32 else TY_i32 in + insn_rm_r_imm 0xc7 dst slash0 t i + + | _ -> raise Unrecognized +;; + + +let lea (dst:Il.cell) (src:Il.operand) : Asm.frag = + match (dst, src) with + (Il.Reg ((Il.Hreg r), dst_ty), + Il.Cell (Il.Mem (mem, _))) + when is_ty32 dst_ty -> + insn_rm_r 0x8d (Il.Mem (mem, Il.OpaqueTy)) (reg r) + + | (Il.Reg ((Il.Hreg r), dst_ty), + Il.ImmPtr (fix, _)) + when is_ty32 dst_ty && r = eax -> + let anchor = new_fixup "anchor" in + let fix_off = Asm.SUB ((Asm.M_POS fix), + (Asm.M_POS anchor)) + in + (* NB: These instructions must come as a + * cluster, w/o any separation. + *) + Asm.SEQ [| + insn_pcrel_simple 0xe8 get_next_pc_thunk_fixup; + Asm.DEF (anchor, insn_rm_r_imm 0x81 dst slash0 TY_i32 fix_off); + |] + + | _ -> raise Unrecognized +;; + + +let select_insn_misc (q:Il.quad') : Asm.frag = + + match q with + Il.Call c -> + begin + match c.Il.call_dst with + Il.Reg ((Il.Hreg dst), _) when dst = eax -> + begin + match c.Il.call_targ with + + Il.CodePtr (Il.Cell c) + when Il.cell_referent_ty c + = Il.ScalarTy (Il.AddrTy Il.CodeTy) -> + insn_rm_r 0xff c slash2 + + | Il.CodePtr (Il.ImmPtr (f, Il.CodeTy)) -> + insn_pcrel_simple 0xe8 f + + | _ -> raise Unrecognized + end + | _ -> raise Unrecognized + end + + | Il.Push (Il.Cell (Il.Reg ((Il.Hreg r), t))) when is_ty32 t -> + Asm.BYTE (0x50 + (reg r)) + + | Il.Push (Il.Cell c) when is_rm32 c -> + insn_rm_r 0xff c slash6 + + | Il.Push (Il.Imm (Asm.IMM i, _)) when imm_is_unsigned_byte i -> + Asm.SEQ [| Asm.BYTE 0x6a; Asm.WORD (TY_u8, (Asm.IMM i)) |] + + | Il.Push (Il.Imm (i, _)) -> + Asm.SEQ [| Asm.BYTE 0x68; Asm.WORD (TY_u32, i) |] + + | Il.Pop (Il.Reg ((Il.Hreg r), t)) when is_ty32 t -> + Asm.BYTE (0x58 + (reg r)) + + | Il.Pop c when is_rm32 c -> + insn_rm_r 0x8f c slash0 + + | Il.Ret -> Asm.BYTE 0xc3 + + | Il.Jmp j -> + begin + match (j.Il.jmp_op, j.Il.jmp_targ) with + + (Il.JMP, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) -> + insn_pcrel 0xeb 0xe9 f + + | (Il.JMP, Il.CodePtr (Il.Cell c)) + when Il.cell_referent_ty c + = Il.ScalarTy (Il.AddrTy Il.CodeTy) -> + insn_rm_r 0xff c slash4 + + (* FIXME: refactor this to handle cell-based jumps + * if we ever need them. So far not. *) + | (_, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) -> + let (op8, op32) = + match j.Il.jmp_op with + | Il.JC -> (0x72, 0x82) + | Il.JNC -> (0x73, 0x83) + | Il.JZ -> (0x74, 0x84) + | Il.JNZ -> (0x75, 0x85) + | Il.JO -> (0x70, 0x80) + | Il.JNO -> (0x71, 0x81) + | Il.JE -> (0x74, 0x84) + | Il.JNE -> (0x75, 0x85) + + | Il.JL -> (0x7c, 0x8c) + | Il.JLE -> (0x7e, 0x8e) + | Il.JG -> (0x7f, 0x8f) + | Il.JGE -> (0x7d, 0x8d) + + | Il.JB -> (0x72, 0x82) + | Il.JBE -> (0x76, 0x86) + | Il.JA -> (0x77, 0x87) + | Il.JAE -> (0x73, 0x83) + | _ -> raise Unrecognized + in + let prefix32 = 0x0f in + insn_pcrel_prefix32 op8 prefix32 op32 f + + | _ -> raise Unrecognized + end + + | Il.Dead -> Asm.MARK + | Il.Debug -> Asm.BYTES [| 0xcc |] (* int 3 *) + | Il.Regfence -> Asm.MARK + | Il.End -> Asm.BYTES [| 0x90 |] + | Il.Nop -> Asm.BYTES [| 0x90 |] + | _ -> raise Unrecognized +;; + + +type alu_binop_codes = + { + insn: string; + immslash: int; (* mod/rm "slash" code for imm-src variant *) + rm_dst_op8: int; (* opcode for 8-bit r/m dst variant *) + rm_dst_op32: int; (* opcode for 32-bit r/m dst variant *) + rm_src_op8: int; (* opcode for 8-bit r/m src variant *) + rm_src_op32: int; (* opcode for 32-bit r/m src variant *) + } +;; + +let alu_binop + (dst:Il.cell) (src:Il.operand) (codes:alu_binop_codes) + : Asm.frag = + match (dst, src) with + (Il.Reg ((Il.Hreg r), dst_ty), Il.Cell c) + when (is_ty32 dst_ty && is_rm32 c) || (is_ty8 dst_ty && is_rm8 c) + -> insn_rm_r codes.rm_src_op32 c (reg r) + + | (_, Il.Cell (Il.Reg ((Il.Hreg r), src_ty))) + when (is_rm32 dst && is_ty32 src_ty) || (is_rm8 dst && is_ty8 src_ty) + -> insn_rm_r codes.rm_dst_op32 dst (reg r) + + | (_, Il.Imm (i, _)) when is_rm32 dst || is_rm8 dst + -> insn_rm_r_imm_s8_s32 0x83 0x81 dst codes.immslash i + + | _ -> raise Unrecognized +;; + + +let mul_like (src:Il.operand) (signed:bool) (slash:int) + : Asm.frag = + match src with + Il.Cell src when is_rm32 src -> + insn_rm_r 0xf7 src slash + + | Il.Cell src when is_rm8 src -> + insn_rm_r 0xf6 src slash + + | Il.Imm (_, TY_u32) + | Il.Imm (_, TY_i32) -> + let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits32) in + Asm.SEQ [| mov signed tmp src; + insn_rm_r 0xf7 tmp slash |] + + | Il.Imm (_, TY_u8) + | Il.Imm (_, TY_i8) -> + let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits8) in + Asm.SEQ [| mov signed tmp src; + insn_rm_r 0xf6 tmp slash |] + + | _ -> raise Unrecognized +;; + + +let select_insn (q:Il.quad) : Asm.frag = + match q.Il.quad_body with + Il.Unary u -> + let unop s = + if u.Il.unary_src = Il.Cell u.Il.unary_dst + then insn_rm_r 0xf7 u.Il.unary_dst s + else raise Unrecognized + in + begin + match u.Il.unary_op with + Il.UMOV -> mov false u.Il.unary_dst u.Il.unary_src + | Il.IMOV -> mov true u.Il.unary_dst u.Il.unary_src + | Il.NEG -> unop slash3 + | Il.NOT -> unop slash2 + | Il.ZERO -> zero u.Il.unary_dst u.Il.unary_src + end + + | Il.Lea le -> lea le.Il.lea_dst le.Il.lea_src + + | Il.Cmp c -> cmp c.Il.cmp_lhs c.Il.cmp_rhs + + | Il.Binary b -> + begin + if Il.Cell b.Il.binary_dst = b.Il.binary_lhs + then + let binop = alu_binop b.Il.binary_dst b.Il.binary_rhs in + let mulop = mul_like b.Il.binary_rhs in + + let divop signed slash = + Asm.SEQ [| + (* xor edx edx, then mul_like. *) + insn_rm_r 0x33 (rc edx) (reg edx); + mul_like b.Il.binary_rhs signed slash + |] + in + + let modop signed slash = + Asm.SEQ [| + (* divop, then mov remainder to eax instead. *) + divop signed slash; + mov false (rc eax) (ro edx) + |] + in + + let shiftop slash = + let src = b.Il.binary_rhs in + let dst = b.Il.binary_dst in + let mask i = Asm.AND (i, Asm.IMM 0xffL) in + if is_rm8 dst + then + match src with + Il.Imm (i, _) -> + insn_rm_r_imm 0xC0 dst slash TY_u8 (mask i) + | Il.Cell (Il.Reg ((Il.Hreg r), _)) + when r = ecx -> + Asm.SEQ [| Asm.BYTE 0xD2; rm_r dst slash |] + | _ -> raise Unrecognized + else + match src with + Il.Imm (i, _) -> + insn_rm_r_imm 0xC1 dst slash TY_u8 (mask i) + | Il.Cell (Il.Reg ((Il.Hreg r), _)) + when r = ecx -> + Asm.SEQ [| Asm.BYTE 0xD3; rm_r dst slash |] + | _ -> raise Unrecognized + in + + match (b.Il.binary_dst, b.Il.binary_op) with + (_, Il.ADD) -> binop { insn="ADD"; + immslash=slash0; + rm_dst_op8=0x0; + rm_dst_op32=0x1; + rm_src_op8=0x2; + rm_src_op32=0x3; } + | (_, Il.SUB) -> binop { insn="SUB"; + immslash=slash5; + rm_dst_op8=0x28; + rm_dst_op32=0x29; + rm_src_op8=0x2a; + rm_src_op32=0x2b; } + | (_, Il.AND) -> binop { insn="AND"; + immslash=slash4; + rm_dst_op8=0x20; + rm_dst_op32=0x21; + rm_src_op8=0x22; + rm_src_op32=0x23; } + | (_, Il.OR) -> binop { insn="OR"; + immslash=slash1; + rm_dst_op8=0x08; + rm_dst_op32=0x09; + rm_src_op8=0x0a; + rm_src_op32=0x0b; } + | (_, Il.XOR) -> binop { insn="XOR"; + immslash=slash6; + rm_dst_op8=0x30; + rm_dst_op32=0x31; + rm_src_op8=0x32; + rm_src_op32=0x33; } + + | (_, Il.LSL) -> shiftop slash4 + | (_, Il.LSR) -> shiftop slash5 + | (_, Il.ASR) -> shiftop slash7 + + | (Il.Reg (Il.Hreg r, t), Il.UMUL) + when (is_ty32 t || is_ty8 t) && r = eax -> + mulop false slash4 + + | (Il.Reg (Il.Hreg r, t), Il.IMUL) + when (is_ty32 t || is_ty8 t) && r = eax -> + mulop true slash5 + + | (Il.Reg (Il.Hreg r, t), Il.UDIV) + when (is_ty32 t || is_ty8 t) && r = eax -> + divop false slash6 + + | (Il.Reg (Il.Hreg r, t), Il.IDIV) + when (is_ty32 t || is_ty8 t) && r = eax -> + divop true slash7 + + | (Il.Reg (Il.Hreg r, t), Il.UMOD) + when (is_ty32 t || is_ty8 t) && r = eax -> + modop false slash6 + + | (Il.Reg (Il.Hreg r, t), Il.IMOD) + when (is_ty32 t || is_ty8 t) && r = eax -> + modop true slash7 + + | _ -> raise Unrecognized + else raise Unrecognized + end + | _ -> select_insn_misc q.Il.quad_body +;; + + +let new_emitter_without_vregs _ : Il.emitter = + Il.new_emitter + abi.Abi.abi_prealloc_quad + abi.Abi.abi_is_2addr_machine + false None +;; + +let select_insns (sess:Session.sess) (q:Il.quads) : Asm.frag = + let scopes = Stack.create () in + let fixups = Stack.create () in + let pop_frags _ = + Asm.SEQ (Array.of_list + (List.rev + (!(Stack.pop scopes)))) + in + ignore (Stack.push (ref []) scopes); + for i = 0 to (Array.length q) - 1 do + let append frag = + let frags = Stack.top scopes in + frags := frag :: (!frags) + in + begin + match q.(i).Il.quad_fixup with + None -> () + | Some f -> append (Asm.DEF (f, Asm.MARK)) + end; + begin + match q.(i).Il.quad_body with + Il.Enter f -> + Stack.push f fixups; + Stack.push (ref []) scopes; + | Il.Leave -> + append (Asm.DEF (Stack.pop fixups, pop_frags ())) + | _ -> + try + append (select_insn q.(i)) + with + Unrecognized -> + Session.fail sess + "E:Assembly error: unrecognized quad: %s\n%!" + (Il.string_of_quad reg_str q.(i)); + () + end + done; + pop_frags() +;; + +let frags_of_emitted_quads (sess:Session.sess) (e:Il.emitter) : Asm.frag = + let frag = select_insns sess e.Il.emit_quads in + if sess.Session.sess_failed + then raise Unrecognized + else frag +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/driver/lib.ml b/src/boot/driver/lib.ml new file mode 100644 index 00000000..e0391c65 --- /dev/null +++ b/src/boot/driver/lib.ml @@ -0,0 +1,232 @@ +open Common;; + +let log (sess:Session.sess) = + Session.log "lib" + sess.Session.sess_log_lib + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_lib + then thunk () + else () +;; + +(* FIXME: move these to sess. *) +let ar_cache = Hashtbl.create 0 ;; +let sects_cache = Hashtbl.create 0;; +let meta_cache = Hashtbl.create 0;; +let die_cache = Hashtbl.create 0;; + +let get_ar + (sess:Session.sess) + (filename:filename) + : Asm.asm_reader option = + htab_search_or_add ar_cache filename + begin + fun _ -> + let sniff = + match sess.Session.sess_targ with + Win32_x86_pe -> Pe.sniff + | MacOS_x86_macho -> Macho.sniff + | Linux_x86_elf -> Elf.sniff + in + sniff sess filename + end +;; + + +let get_sects + (sess:Session.sess) + (filename:filename) : + (Asm.asm_reader * ((string,(int*int)) Hashtbl.t)) option = + htab_search_or_add sects_cache filename + begin + fun _ -> + match get_ar sess filename with + None -> None + | Some ar -> + let get_sections = + match sess.Session.sess_targ with + Win32_x86_pe -> Pe.get_sections + | MacOS_x86_macho -> Macho.get_sections + | Linux_x86_elf -> Elf.get_sections + in + Some (ar, (get_sections sess ar)) + end +;; + +let get_meta + (sess:Session.sess) + (filename:filename) + : Ast.meta option = + htab_search_or_add meta_cache filename + begin + fun _ -> + match get_sects sess filename with + None -> None + | Some (ar, sects) -> + match htab_search sects ".note.rust" with + Some (off, _) -> + ar.Asm.asm_seek off; + Some (Asm.read_rust_note ar) + | None -> None + end +;; + +let get_dies_opt + (sess:Session.sess) + (filename:filename) + : (Dwarf.rooted_dies option) = + htab_search_or_add die_cache filename + begin + fun _ -> + match get_sects sess filename with + None -> None + | Some (ar, sects) -> + let debug_abbrev = Hashtbl.find sects ".debug_abbrev" in + let debug_info = Hashtbl.find sects ".debug_info" in + let abbrevs = Dwarf.read_abbrevs sess ar debug_abbrev in + let dies = Dwarf.read_dies sess ar debug_info abbrevs in + ar.Asm.asm_close (); + Hashtbl.remove ar_cache filename; + Some dies + end +;; + +let get_dies + (sess:Session.sess) + (filename:filename) + : Dwarf.rooted_dies = + match get_dies_opt sess filename with + None -> + Printf.fprintf stderr "Error: bad crate file: %s\n%!" filename; + exit 1 + | Some dies -> dies +;; + +let get_file_mod + (sess:Session.sess) + (abi:Abi.abi) + (filename:filename) + (nref:node_id ref) + (oref:opaque_id ref) + : Ast.mod_items = + let dies = get_dies sess filename in + let items = Hashtbl.create 0 in + Dwarf.extract_mod_items nref oref abi items dies; + items +;; + +let get_mod + (sess:Session.sess) + (abi:Abi.abi) + (meta:Ast.meta_pat) + (use_id:node_id) + (nref:node_id ref) + (oref:opaque_id ref) + : (filename * Ast.mod_items) = + let found = Queue.create () in + let suffix = + match sess.Session.sess_targ with + Win32_x86_pe -> ".dll" + | MacOS_x86_macho -> ".dylib" + | Linux_x86_elf -> ".so" + in + let rec meta_matches i f_meta = + if i >= (Array.length meta) + then true + else + match meta.(i) with + (* FIXME: bind the wildcards. *) + (_, None) -> meta_matches (i+1) f_meta + | (k, Some v) -> + match atab_search f_meta k with + None -> false + | Some v' -> + if v = v' + then meta_matches (i+1) f_meta + else false + in + let file_matches file = + log sess "searching for metadata in %s" file; + match get_meta sess file with + None -> false + | Some f_meta -> + log sess "matching metadata in %s" file; + meta_matches 0 f_meta + in + iflog sess + begin + fun _ -> + log sess "searching for library matching:"; + Array.iter + begin + fun (k,vo) -> + match vo with + None -> () + | Some v -> + log sess "%s = %S" k v + end + meta; + end; + Queue.iter + begin + fun dir -> + let dh = Unix.opendir dir in + let rec scan _ = + try + let file = Unix.readdir dh in + log sess "considering file %s" file; + if (Filename.check_suffix file suffix) && + (file_matches file) + then + begin + iflog sess + begin + fun _ -> + log sess "matched against library %s" file; + match get_meta sess file with + None -> () + | Some meta -> + Array.iter + (fun (k,v) -> log sess "%s = %S" k v) + meta; + end; + Queue.add file found; + end; + scan() + with + End_of_file -> () + in + scan () + end + sess.Session.sess_lib_dirs; + match Queue.length found with + 0 -> Common.err (Some use_id) "unsatisfied 'use' clause" + | 1 -> + let filename = Queue.pop found in + let items = get_file_mod sess abi filename nref oref in + (filename, items) + | _ -> Common.err (Some use_id) "multiple crates match 'use' clause" +;; + +let infer_lib_name + (sess:Session.sess) + (ident:filename) + : filename = + match sess.Session.sess_targ with + Win32_x86_pe -> ident ^ ".dll" + | MacOS_x86_macho -> "lib" ^ ident ^ ".dylib" + | Linux_x86_elf -> "lib" ^ ident ^ ".so" +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/driver/llvm/glue.ml b/src/boot/driver/llvm/glue.ml new file mode 100644 index 00000000..ef5c1c86 --- /dev/null +++ b/src/boot/driver/llvm/glue.ml @@ -0,0 +1,37 @@ +(* + * Glue for the LLVM backend. + *) + +let alt_argspecs sess = [ + ("-llvm", Arg.Unit (fun _ -> sess.Session.sess_alt_backend <- true), + "emit LLVM bitcode") +];; + +let alt_pipeline sess sem_cx crate = + let process processor = + processor sem_cx crate; + if sess.Session.sess_failed then exit 1 else () + in + Array.iter process + [| + Resolve.process_crate; + Type.process_crate; + Effect.process_crate; + Typestate.process_crate; + Loop.process_crate; + Alias.process_crate; + Dead.process_crate; + Layout.process_crate + |]; + Llemit.trans_and_process_crate sess sem_cx crate +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) + diff --git a/src/boot/driver/main.ml b/src/boot/driver/main.ml new file mode 100644 index 00000000..c5199a82 --- /dev/null +++ b/src/boot/driver/main.ml @@ -0,0 +1,421 @@ + +open Common;; + +let _ = + Gc.set { (Gc.get()) with + Gc.space_overhead = 400; } +;; + +let (targ:Common.target) = + match Sys.os_type with + "Unix" -> + (* FIXME: this is an absurd heuristic. *) + if Sys.file_exists "/System/Library" + then MacOS_x86_macho + else Linux_x86_elf + | "Win32" -> Win32_x86_pe + | "Cygwin" -> Win32_x86_pe + | _ -> Linux_x86_elf +;; + +let (abi:Abi.abi) = X86.abi;; + +let (sess:Session.sess) = + { + Session.sess_in = None; + Session.sess_out = None; + Session.sess_library_mode = false; + Session.sess_alt_backend = false; + (* FIXME: need something fancier here for unix sub-flavours. *) + Session.sess_targ = targ; + Session.sess_log_lex = false; + Session.sess_log_parse = false; + Session.sess_log_ast = false; + Session.sess_log_resolve = false; + Session.sess_log_type = false; + Session.sess_log_effect = false; + Session.sess_log_typestate = false; + Session.sess_log_loop = false; + Session.sess_log_alias = false; + Session.sess_log_dead = false; + Session.sess_log_layout = false; + Session.sess_log_itype = false; + Session.sess_log_trans = false; + Session.sess_log_dwarf = false; + Session.sess_log_ra = false; + Session.sess_log_insn = false; + Session.sess_log_asm = false; + Session.sess_log_obj = false; + Session.sess_log_lib = false; + Session.sess_log_out = stdout; + Session.sess_trace_block = false; + Session.sess_trace_drop = false; + Session.sess_trace_tag = false; + Session.sess_trace_gc = false; + Session.sess_failed = false; + Session.sess_spans = Hashtbl.create 0; + Session.sess_report_timing = false; + Session.sess_report_gc = false; + Session.sess_report_deps = false; + Session.sess_timings = Hashtbl.create 0; + Session.sess_lib_dirs = Queue.create (); + } +;; + +let default_output_filename (sess:Session.sess) : filename option = + match sess.Session.sess_in with + None -> None + | Some fname -> + let base = Filename.chop_extension (Filename.basename fname) in + let out = + if sess.Session.sess_library_mode + then + Lib.infer_lib_name sess base + else + base ^ (match sess.Session.sess_targ with + Linux_x86_elf -> "" + | MacOS_x86_macho -> "" + | Win32_x86_pe -> ".exe") + in + Some out +;; + +let set_default_output_filename (sess:Session.sess) : unit = + match sess.Session.sess_out with + None -> (sess.Session.sess_out <- default_output_filename sess) + | _ -> () +;; + + +let dump_sig (filename:filename) : unit = + let items = + Lib.get_file_mod sess abi filename (ref (Node 0)) (ref (Opaque 0)) in + Printf.fprintf stdout "%s\n" (Ast.fmt_to_str Ast.fmt_mod_items items); + exit 0 +;; + +let dump_meta (filename:filename) : unit = + begin + match Lib.get_meta sess filename with + None -> Printf.fprintf stderr "Error: bad crate file: %s\n" filename + | Some meta -> + Array.iter + begin + fun (k,v) -> + Printf.fprintf stdout "%s = %S\n" k v; + end + meta + end; + exit 0 +;; + +let flag f opt desc = + (opt, Arg.Unit f, desc) +;; + +let argspecs = + [ + ("-t", Arg.Symbol (["linux-x86-elf"; "win32-x86-pe"; "macos-x86-macho"], + fun s -> (sess.Session.sess_targ <- + (match s with + "win32-x86-pe" -> Win32_x86_pe + | "macos-x86-macho" -> MacOS_x86_macho + | _ -> Linux_x86_elf))), + (" target (default: " ^ (match sess.Session.sess_targ with + Win32_x86_pe -> "win32-x86-pe" + | Linux_x86_elf -> "linux-x86-elf" + | MacOS_x86_macho -> "macos-x86-macho" + ) ^ ")")); + ("-o", Arg.String (fun s -> sess.Session.sess_out <- Some s), + "file to output (default: " + ^ (Session.filename_of sess.Session.sess_out) ^ ")"); + ("-shared", Arg.Unit (fun _ -> sess.Session.sess_library_mode <- true), + "compile a shared-library crate"); + ("-L", Arg.String (fun s -> Queue.add s sess.Session.sess_lib_dirs), + "dir to add to library path"); + ("-litype", Arg.Unit (fun _ -> sess.Session.sess_log_itype <- true; + Il.log_iltypes := true), "log IL types"); + (flag (fun _ -> sess.Session.sess_log_lex <- true) + "-llex" "log lexing"); + (flag (fun _ -> sess.Session.sess_log_parse <- true) + "-lparse" "log parsing"); + (flag (fun _ -> sess.Session.sess_log_ast <- true) + "-last" "log AST"); + (flag (fun _ -> sess.Session.sess_log_resolve <- true) + "-lresolve" "log resolution"); + (flag (fun _ -> sess.Session.sess_log_type <- true) + "-ltype" "log type checking"); + (flag (fun _ -> sess.Session.sess_log_effect <- true) + "-leffect" "log effect checking"); + (flag (fun _ -> sess.Session.sess_log_typestate <- true) + "-ltypestate" "log typestate pass"); + (flag (fun _ -> sess.Session.sess_log_loop <- true) + "-lloop" "log loop analysis"); + (flag (fun _ -> sess.Session.sess_log_alias <- true) + "-lalias" "log alias analysis"); + (flag (fun _ -> sess.Session.sess_log_dead <- true) + "-ldead" "log dead analysis"); + (flag (fun _ -> sess.Session.sess_log_layout <- true) + "-llayout" "log frame layout"); + (flag (fun _ -> sess.Session.sess_log_trans <- true) + "-ltrans" "log IR translation"); + (flag (fun _ -> sess.Session.sess_log_dwarf <- true) + "-ldwarf" "log DWARF generation"); + (flag (fun _ -> sess.Session.sess_log_ra <- true) + "-lra" "log register allocation"); + (flag (fun _ -> sess.Session.sess_log_insn <- true) + "-linsn" "log instruction selection"); + (flag (fun _ -> sess.Session.sess_log_asm <- true) + "-lasm" "log assembly"); + (flag (fun _ -> sess.Session.sess_log_obj <- true) + "-lobj" "log object-file generation"); + (flag (fun _ -> sess.Session.sess_log_lib <- true) + "-llib" "log library search"); + + (flag (fun _ -> sess.Session.sess_trace_block <- true) + "-tblock" "emit block-boundary tracing code"); + (flag (fun _ -> sess.Session.sess_trace_drop <- true) + "-tdrop" "emit slot-drop tracing code"); + (flag (fun _ -> sess.Session.sess_trace_tag <- true) + "-ttag" "emit tag-construction tracing code"); + (flag (fun _ -> sess.Session.sess_trace_gc <- true) + "-tgc" "emit GC tracing code"); + + ("-tall", Arg.Unit (fun _ -> + sess.Session.sess_trace_block <- true; + sess.Session.sess_trace_drop <- true; + sess.Session.sess_trace_tag <- true ), + "emit all tracing code"); + + (flag (fun _ -> sess.Session.sess_report_timing <- true) + "-rtime" "report timing of compiler phases"); + (flag (fun _ -> sess.Session.sess_report_gc <- true) + "-rgc" "report gc behavior of compiler"); + ("-rsig", Arg.String dump_sig, + "report type-signature from DWARF info in compiled file, then exit"); + ("-rmeta", Arg.String dump_meta, + "report metadata from DWARF info in compiled file, then exit"); + ("-rdeps", Arg.Unit (fun _ -> sess.Session.sess_report_deps <- true), + "report dependencies of input, then exit"); + ] @ (Glue.alt_argspecs sess) +;; + +let exit_if_failed _ = + if sess.Session.sess_failed + then exit 1 + else () +;; + +Arg.parse + argspecs + (fun arg -> sess.Session.sess_in <- (Some arg)) + ("usage: " ^ Sys.argv.(0) ^ " [options] (CRATE_FILE.rc|SOURCE_FILE.rs)\n") +;; + +let _ = set_default_output_filename sess +;; + +let _ = + if sess.Session.sess_out = None + then (Printf.fprintf stderr "Error: no output file specified\n"; exit 1) + else () +;; + +let _ = + if sess.Session.sess_in = None + then (Printf.fprintf stderr "Error: empty input filename\n"; exit 1) + else () +;; + + +let (crate:Ast.crate) = + Session.time_inner "parse" sess + begin + fun _ -> + let infile = Session.filename_of sess.Session.sess_in in + let crate = + if Filename.check_suffix infile ".rc" + then + Cexp.parse_crate_file sess + (Lib.get_mod sess abi) + (Lib.infer_lib_name sess) + else + if Filename.check_suffix infile ".rs" + then + Cexp.parse_src_file sess + (Lib.get_mod sess abi) + (Lib.infer_lib_name sess) + else + begin + Printf.fprintf stderr + "Error: unrecognized input file type: %s\n" + infile; + exit 1 + end + in + if sess.Session.sess_report_deps + then + let outfile = (Session.filename_of sess.Session.sess_out) in + let depfile = + match sess.Session.sess_targ with + Linux_x86_elf + | MacOS_x86_macho -> outfile ^ ".d" + | Win32_x86_pe -> (Filename.chop_extension outfile) ^ ".d" + in + begin + Array.iter + begin + fun out -> + Printf.fprintf stdout "%s: \\\n" out; + Hashtbl.iter + (fun _ file -> + Printf.fprintf stdout " %s \\\n" file) + crate.node.Ast.crate_files; + Printf.fprintf stdout "\n" + end + [| outfile; depfile|]; + exit 0 + end + else + crate + end +;; + +exit_if_failed () +;; + +if sess.Session.sess_log_ast +then + begin + Printf.fprintf stdout "Post-parse AST:\n"; + Format.set_margin 80; + Printf.fprintf stdout "%s\n" (Ast.fmt_to_str Ast.fmt_crate crate) + end + +let list_to_seq ls = Asm.SEQ (Array.of_list ls);; +let select_insns (quads:Il.quads) : Asm.frag = + Session.time_inner "insn" sess + (fun _ -> X86.select_insns sess quads) +;; + + +(* Semantic passes. *) +let sem_cx = Semant.new_ctxt sess abi crate.node +;; + + +let main_pipeline _ = + let _ = + Array.iter + (fun proc -> + proc sem_cx crate; + exit_if_failed ()) + [| Resolve.process_crate; + Type.process_crate; + Effect.process_crate; + Typestate.process_crate; + Loop.process_crate; + Alias.process_crate; + Dead.process_crate; + Layout.process_crate; + Trans.process_crate |] + in + + (* Tying up various knots, allocating registers and selecting + * instructions. + *) + let process_code _ (code:Semant.code) : Asm.frag = + let frag = + match code.Semant.code_vregs_and_spill with + None -> select_insns code.Semant.code_quads + | Some (n_vregs, spill_fix) -> + let (quads', n_spills) = + (Session.time_inner "RA" sess + (fun _ -> + Ra.reg_alloc sess + code.Semant.code_quads + n_vregs abi)) + in + let insns = select_insns quads' in + begin + spill_fix.fixup_mem_sz <- + Some (Int64.mul + (Int64.of_int n_spills) + abi.Abi.abi_word_sz); + insns + end + in + Asm.ALIGN_FILE (Abi.general_code_alignment, + Asm.DEF (code.Semant.code_fixup, frag)) + in + + let (file_frags:Asm.frag) = + let process_file file_id frag_code = + let file_fix = Hashtbl.find sem_cx.Semant.ctxt_file_fixups file_id in + Asm.DEF (file_fix, + list_to_seq (reduce_hash_to_list process_code frag_code)) + in + list_to_seq (reduce_hash_to_list + process_file sem_cx.Semant.ctxt_file_code) + in + + exit_if_failed (); + let (glue_frags:Asm.frag) = + list_to_seq (reduce_hash_to_list + process_code sem_cx.Semant.ctxt_glue_code) + in + + exit_if_failed (); + let code = Asm.SEQ [| file_frags; glue_frags |] in + let data = list_to_seq (reduce_hash_to_list + (fun _ (_, i) -> i) sem_cx.Semant.ctxt_data) + in + (* Emitting Dwarf and PE/ELF/Macho. *) + let (dwarf:Dwarf.debug_records) = + Session.time_inner "dwarf" sess + (fun _ -> Dwarf.process_crate sem_cx crate) + in + + exit_if_failed (); + let emitter = + match sess.Session.sess_targ with + Win32_x86_pe -> Pe.emit_file + | MacOS_x86_macho -> Macho.emit_file + | Linux_x86_elf -> Elf.emit_file + in + Session.time_inner "emit" sess + (fun _ -> emitter sess crate code data sem_cx dwarf); + exit_if_failed () +;; + +if sess.Session.sess_alt_backend +then Glue.alt_pipeline sess sem_cx crate +else main_pipeline () +;; + +if sess.Session.sess_report_timing +then + begin + Printf.fprintf stdout "timing:\n\n"; + Array.iter + begin + fun name -> + Printf.fprintf stdout "%20s: %f\n" name + (Hashtbl.find sess.Session.sess_timings name) + end + (sorted_htab_keys sess.Session.sess_timings) + end; +;; + +if sess.Session.sess_report_gc +then Gc.print_stat stdout;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/driver/session.ml b/src/boot/driver/session.ml new file mode 100644 index 00000000..80253f44 --- /dev/null +++ b/src/boot/driver/session.ml @@ -0,0 +1,111 @@ +(* + * This module goes near the bottom of the dependency DAG, and holds option, + * and global-state machinery for a single run of the compiler. + *) + +open Common;; + +type sess = +{ + mutable sess_in: filename option; + mutable sess_out: filename option; + mutable sess_library_mode: bool; + mutable sess_alt_backend: bool; + mutable sess_targ: target; + mutable sess_log_lex: bool; + mutable sess_log_parse: bool; + mutable sess_log_ast: bool; + mutable sess_log_resolve: bool; + mutable sess_log_type: bool; + mutable sess_log_effect: bool; + mutable sess_log_typestate: bool; + mutable sess_log_dead: bool; + mutable sess_log_loop: bool; + mutable sess_log_alias: bool; + mutable sess_log_layout: bool; + mutable sess_log_trans: bool; + mutable sess_log_itype: bool; + mutable sess_log_dwarf: bool; + mutable sess_log_ra: bool; + mutable sess_log_insn: bool; + mutable sess_log_asm: bool; + mutable sess_log_obj: bool; + mutable sess_log_lib: bool; + mutable sess_log_out: out_channel; + mutable sess_trace_block: bool; + mutable sess_trace_drop: bool; + mutable sess_trace_tag: bool; + mutable sess_trace_gc: bool; + mutable sess_failed: bool; + mutable sess_report_timing: bool; + mutable sess_report_gc: bool; + mutable sess_report_deps: bool; + sess_timings: (string, float) Hashtbl.t; + sess_spans: (node_id,span) Hashtbl.t; + sess_lib_dirs: filename Queue.t; +} +;; + +let add_time sess name amt = + let existing = + if Hashtbl.mem sess.sess_timings name + then Hashtbl.find sess.sess_timings name + else 0.0 + in + (Hashtbl.replace sess.sess_timings name (existing +. amt)) +;; + +let time_inner name sess thunk = + let t0 = Unix.gettimeofday() in + let x = thunk() in + let t1 = Unix.gettimeofday() in + add_time sess name (t1 -. t0); + x +;; + +let get_span sess id = + if Hashtbl.mem sess.sess_spans id + then (Some (Hashtbl.find sess.sess_spans id)) + else None +;; + +let log name flag chan = + let k1 s = + Printf.fprintf chan "%s: %s\n%!" name s + in + let k2 _ = () in + Printf.ksprintf (if flag then k1 else k2) +;; + +let fail sess = + sess.sess_failed <- true; + Printf.fprintf sess.sess_log_out +;; + + +let string_of_pos (p:pos) = + let (filename, line, col) = p in + Printf.sprintf "%s:%d:%d" filename line col +;; + + +let string_of_span (s:span) = + let (filename, line0, col0) = s.lo in + let (_, line1, col1) = s.hi in + Printf.sprintf "%s:%d:%d - %d:%d" filename line0 col0 line1 col1 +;; + +let filename_of (fo:filename option) : filename = + match fo with + None -> "<none>" + | Some f -> f +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/driver/x86/glue.ml b/src/boot/driver/x86/glue.ml new file mode 100644 index 00000000..4fc74480 --- /dev/null +++ b/src/boot/driver/x86/glue.ml @@ -0,0 +1,16 @@ +(* + * Glue, or lack thereof, for the standard x86 backend. + *) + +let alt_argspecs _ = [];; +let alt_pipeline _ _ _ = ();; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) + diff --git a/src/boot/fe/ast.ml b/src/boot/fe/ast.ml new file mode 100644 index 00000000..bf7a11ff --- /dev/null +++ b/src/boot/fe/ast.ml @@ -0,0 +1,1360 @@ +(* + * There are two kinds of rust files: + * + * .rc files, containing crates. + * .rs files, containing source. + * + *) + +open Common;; + +(* + * Slot names are given by a dot-separated path within the current + * module namespace. + *) + +type ident = string +;; + +type slot_key = + KEY_ident of ident + | KEY_temp of temp_id +;; + +(* "names" are statically computable references to particular items; + they never involve dynamic indexing (nor even static tuple-indexing; + you could add it but there are few contexts that need names that would + benefit from it). + + Each component of a name may also be type-parametric; you must + supply type parameters to reference through a type-parametric name + component. So for example if foo is parametric in 2 types, you can + write foo[int,int].bar but not foo.bar. + *) + +type effect = + PURE + | IO + | STATE + | UNSAFE +;; + +type name_base = + BASE_ident of ident + | BASE_temp of temp_id + | BASE_app of (ident * (ty array)) + +and name_component = + COMP_ident of ident + | COMP_app of (ident * (ty array)) + | COMP_idx of int + +and name = + NAME_base of name_base + | NAME_ext of (name * name_component) + +(* + * Type expressions are transparent to type names, their equality is + * structural. (after normalization) + *) +and ty = + + TY_any + | TY_nil + | TY_bool + | TY_mach of ty_mach + | TY_int + | TY_uint + | TY_char + | TY_str + + | TY_tup of ty_tup + | TY_vec of slot + | TY_rec of ty_rec + + (* + * Note that ty_idx is only valid inside a slot of a ty_iso group, not + * in a general type term. + *) + | TY_tag of ty_tag + | TY_iso of ty_iso + | TY_idx of int + + | TY_fn of ty_fn + | TY_chan of ty + | TY_port of ty + + | TY_obj of ty_obj + | TY_task + + | TY_native of opaque_id + | TY_param of (ty_param_idx * effect) + | TY_named of name + | TY_type + + | TY_constrained of (ty * constrs) + +and mode = + MODE_exterior + | MODE_interior + | MODE_alias + +and slot = { slot_mode: mode; + slot_mutable: bool; + slot_ty: ty option; } + +and ty_tup = slot array + +(* In closed type terms a constraint may refer to components of the term by + * anchoring off the "formal symbol" '*', which represents "the term this + * constraint is attached to". + * + * + * For example, if I have a tuple type tup(int,int), I may wish to enforce the + * lt predicate on it; I can write this as a constrained type term like: + * + * tup(int,int) : lt( *._0, *._1 ) + * + * In fact all tuple types are converted to this form for purpose of + * type-compatibility testing; the argument tuple in a function + * + * fn (int x, int y) : lt(x, y) -> int + * + * desugars to + * + * fn (tup(int, int) : lt( *._1, *._2 )) -> int + * + *) + +and carg_base = + BASE_formal + | BASE_named of name_base + +and carg_path = + CARG_base of carg_base + | CARG_ext of (carg_path * name_component) + +and carg = + CARG_path of carg_path + | CARG_lit of lit + +and constr = + { + constr_name: name; + constr_args: carg array; + } + +and constrs = constr array + +and ty_rec = (ident * slot) array + +(* ty_tag is a sum type. + * + * a tag type expression either normalizes to a TY_tag or a TY_iso, + * which (like in ocaml) is an indexed projection from an iso-recursive + * group of TY_tags. + *) + +and ty_tag = (name, ty_tup) Hashtbl.t + +and ty_iso = + { + iso_index: int; + iso_group: ty_tag array + } + +and ty_sig = + { + sig_input_slots: slot array; + sig_input_constrs: constrs; + sig_output_slot: slot; + } + +and ty_fn_aux = + { + fn_is_iter: bool; + fn_effect: effect; + } + +and ty_fn = (ty_sig * ty_fn_aux) + +and ty_obj_header = (slot array * constrs) + +and ty_obj = (effect * ((ident,ty_fn) Hashtbl.t)) + +and check_calls = (lval * (atom array)) array + +and rec_input = (ident * mode * bool * atom) + +and tup_input = (mode * bool * atom) + +and stmt' = + + (* lval-assigning stmts. *) + STMT_spawn of (lval * domain * lval * (atom array)) + | STMT_init_rec of (lval * (rec_input array) * lval option) + | STMT_init_tup of (lval * (tup_input array)) + | STMT_init_vec of (lval * slot * (atom array)) + | STMT_init_str of (lval * string) + | STMT_init_port of lval + | STMT_init_chan of (lval * (lval option)) + | STMT_copy of (lval * expr) + | STMT_copy_binop of (lval * binop * atom) + | STMT_call of (lval * lval * (atom array)) + | STMT_bind of (lval * lval * ((atom option) array)) + | STMT_recv of (lval * lval) + | STMT_slice of (lval * lval * slice) + + (* control-flow stmts. *) + | STMT_while of stmt_while + | STMT_do_while of stmt_while + | STMT_for of stmt_for + | STMT_for_each of stmt_for_each + | STMT_if of stmt_if + | STMT_put of (atom option) + | STMT_put_each of (lval * (atom array)) + | STMT_ret of (atom option) + | STMT_be of (lval * (atom array)) + | STMT_alt_tag of stmt_alt_tag + | STMT_alt_type of stmt_alt_type + | STMT_alt_port of stmt_alt_port + + (* structural and misc stmts. *) + | STMT_fail + | STMT_yield + | STMT_join of lval + | STMT_send of (lval * lval) + | STMT_log of atom + | STMT_note of atom + | STMT_prove of (constrs) + | STMT_check of (constrs * check_calls) + | STMT_check_expr of expr + | STMT_check_if of (constrs * check_calls * block) + | STMT_block of block + | STMT_decl of stmt_decl + +and stmt = stmt' identified + +and stmt_alt_tag = + { + alt_tag_lval: lval; + alt_tag_arms: arm array; + } + +and stmt_alt_type = + { + alt_type_lval: lval; + alt_type_arms: (ident * slot * stmt) array; + alt_type_else: stmt option; + } + +and block' = stmt array +and block = block' identified + +and stmt_decl = + DECL_mod_item of (ident * mod_item) + | DECL_slot of (slot_key * (slot identified)) + +and stmt_alt_port = + { + (* else lval is a timeout value. *) + alt_port_arms: (lval * lval) array; + alt_port_else: (lval * stmt) option; + } + +and stmt_while = + { + while_lval: ((stmt array) * expr); + while_body: block; + } + +and stmt_for_each = + { + for_each_slot: (slot identified * ident); + for_each_call: (lval * atom array); + for_each_head: block; + for_each_body: block; + } + +and stmt_for = + { + for_slot: (slot identified * ident); + for_seq: ((stmt array) * lval); + for_body: block; + } + +and stmt_if = + { + if_test: expr; + if_then: block; + if_else: block option; + } + +and slice = + { slice_start: atom option; + slice_len: atom option; } + +and domain = + DOMAIN_local + | DOMAIN_thread + +and pat = + PAT_lit of lit + | PAT_tag of ident * (pat array) + | PAT_slot of ((slot identified) * ident) + | PAT_wild + +and arm' = pat * block +and arm = arm' identified + +and atom = + ATOM_literal of (lit identified) + | ATOM_lval of lval + +and expr = + EXPR_binary of (binop * atom * atom) + | EXPR_unary of (unop * atom) + | EXPR_atom of atom + +and lit = + | LIT_nil + | LIT_bool of bool + | LIT_mach of (ty_mach * int64 * string) + | LIT_int of (int64 * string) + | LIT_uint of (int64 * string) + | LIT_char of int + + +and lval_component = + COMP_named of name_component + | COMP_atom of atom + + +and lval = + LVAL_base of name_base identified + | LVAL_ext of (lval * lval_component) + +and binop = + BINOP_or + | BINOP_and + | BINOP_xor + + | BINOP_eq + | BINOP_ne + + | BINOP_lt + | BINOP_le + | BINOP_ge + | BINOP_gt + + | BINOP_lsl + | BINOP_lsr + | BINOP_asr + + | BINOP_add + | BINOP_sub + | BINOP_mul + | BINOP_div + | BINOP_mod + | BINOP_send + +and unop = + UNOP_not + | UNOP_bitnot + | UNOP_neg + | UNOP_cast of ty identified + + +and header_slots = ((slot identified) * ident) array + +and header_tup = (slot identified) array + +and fn = + { + fn_input_slots: header_slots; + fn_input_constrs: constrs; + fn_output_slot: slot identified; + fn_aux: ty_fn_aux; + fn_body: block; + } + +and obj = + { + obj_state: header_slots; + obj_effect: effect; + obj_constrs: constrs; + obj_fns: (ident,fn identified) Hashtbl.t; + obj_drop: block option; + } + +(* + * An 'a decl is a sort-of-thing that represents a parametric (generative) + * declaration. Every reference to one of these involves applying 0 or more + * type arguments, as part of *name resolution*. + * + * Slots are *not* parametric declarations. A slot has a specific type + * even if it's a type that's bound by a quantifier in its environment. + *) + +and ty_param = ident * (ty_param_idx * effect) + +and mod_item' = + MOD_ITEM_type of ty + | MOD_ITEM_tag of (header_tup * ty_tag * node_id) + | MOD_ITEM_mod of (mod_view * mod_items) + | MOD_ITEM_fn of fn + | MOD_ITEM_obj of obj + +and mod_item_decl = + { + decl_params: (ty_param identified) array; + decl_item: mod_item'; + } + +and mod_item = mod_item_decl identified +and mod_items = (ident, mod_item) Hashtbl.t + +and export = + EXPORT_all_decls + | EXPORT_ident of ident + +and mod_view = + { + view_imports: (ident, name) Hashtbl.t; + view_exports: (export, unit) Hashtbl.t; + } + +and meta = (ident * string) array + +and meta_pat = (ident * string option) array + +and crate' = + { + crate_items: (mod_view * mod_items); + crate_meta: meta; + crate_auth: (name, effect) Hashtbl.t; + crate_required: (node_id, (required_lib * nabi_conv)) Hashtbl.t; + crate_required_syms: (node_id, string) Hashtbl.t; + crate_files: (node_id,filename) Hashtbl.t; + crate_main: name option; + } +and crate = crate' identified +;; + +(* + * NB: names can only be type-parametric in their *last* path-entry. + * All path-entries before that must be ident or idx (non-parametric). + *) +let sane_name (n:name) : bool = + let rec sane_prefix (n:name) : bool = + match n with + NAME_base (BASE_ident _) + | NAME_base (BASE_temp _) -> true + | NAME_ext (prefix, COMP_ident _) + | NAME_ext (prefix, COMP_idx _) -> sane_prefix prefix + | _ -> false + in + match n with + NAME_base _ -> true + | NAME_ext (prefix, _) -> sane_prefix prefix +;; + + +(***********************************************************************) + +(* FIXME (issue #19): finish all parts with ?foo? as their output. *) + +let fmt = Format.fprintf;; + +let fmt_ident (ff:Format.formatter) (i:ident) : unit = + fmt ff "%s" i + +let fmt_temp (ff:Format.formatter) (t:temp_id) : unit = + fmt ff ".t%d" (int_of_temp t) + +let fmt_slot_key ff (s:slot_key) : unit = + match s with + KEY_ident i -> fmt_ident ff i + | KEY_temp t -> fmt_temp ff t + +let rec fmt_app (ff:Format.formatter) (i:ident) (tys:ty array) : unit = + fmt ff "%s" i; + fmt_app_args ff tys + +and fmt_app_args (ff:Format.formatter) (tys:ty array) : unit = + fmt ff "[@["; + for i = 0 to (Array.length tys) - 1; + do + if i != 0 + then fmt ff ",@ "; + fmt_ty ff tys.(i); + done; + fmt ff "@]]" + +and fmt_name_base (ff:Format.formatter) (nb:name_base) : unit = + match nb with + BASE_ident i -> fmt_ident ff i + | BASE_temp t -> fmt_temp ff t + | BASE_app (id, tys) -> fmt_app ff id tys + +and fmt_name_component (ff:Format.formatter) (nc:name_component) : unit = + match nc with + COMP_ident i -> fmt_ident ff i + | COMP_app (id, tys) -> fmt_app ff id tys + | COMP_idx i -> fmt ff "_%d" i + +and fmt_name (ff:Format.formatter) (n:name) : unit = + match n with + NAME_base nb -> fmt_name_base ff nb + | NAME_ext (n, nc) -> + fmt_name ff n; + fmt ff "."; + fmt_name_component ff nc + +and fmt_mutable (ff:Format.formatter) (m:bool) : unit = + if m + then fmt ff "mutable "; + +and fmt_mode (ff:Format.formatter) (m:mode) : unit = + match m with + MODE_exterior -> fmt ff "@@" + | MODE_alias -> fmt ff "&" + | MODE_interior -> () + +and fmt_slot (ff:Format.formatter) (s:slot) : unit = + match s.slot_ty with + None -> fmt ff "auto" + | Some t -> + fmt_mutable ff s.slot_mutable; + fmt_mode ff s.slot_mode; + fmt_ty ff t + +and fmt_slots + (ff:Format.formatter) + (slots:slot array) + (idents:(ident array) option) + : unit = + fmt ff "(@["; + for i = 0 to (Array.length slots) - 1 + do + if i != 0 + then fmt ff ",@ "; + fmt_slot ff slots.(i); + begin + match idents with + None -> () + | Some ids -> (fmt ff " "; fmt_ident ff ids.(i)) + end; + done; + fmt ff "@])" + +and fmt_effect + (ff:Format.formatter) + (effect:effect) + : unit = + match effect with + PURE -> () + | IO -> fmt ff "io" + | STATE -> fmt ff "state" + | UNSAFE -> fmt ff "unsafe" + +and fmt_ty_fn + (ff:Format.formatter) + (ident_and_params:(ident * ty_param array) option) + (tf:ty_fn) + : unit = + let (tsig, ta) = tf in + fmt_effect ff ta.fn_effect; + if ta.fn_effect <> PURE then fmt ff " "; + fmt ff "%s" (if ta.fn_is_iter then "iter" else "fn"); + begin + match ident_and_params with + Some (id, params) -> + fmt ff " "; + fmt_ident_and_params ff id params + | None -> () + end; + fmt_slots ff tsig.sig_input_slots None; + fmt_decl_constrs ff tsig.sig_input_constrs; + fmt ff " -> "; + fmt_slot ff tsig.sig_output_slot; + +and fmt_tag (ff:Format.formatter) (ttag:ty_tag) : unit = + fmt ff "@[tag(@["; + let first = ref true in + Hashtbl.iter + begin + fun name ttup -> + (if !first + then first := false + else fmt ff ",@ "); + fmt_name ff name; + fmt_slots ff ttup None + end + ttag; + fmt ff "@])@]" + +and fmt_iso (ff:Format.formatter) (tiso:ty_iso) : unit = + fmt ff "@[iso [@["; + for i = 0 to (Array.length tiso.iso_group) - 1 + do + if i != 0 + then fmt ff ",@ "; + if i == tiso.iso_index + then fmt ff "<%d>: " i + else fmt ff "%d: " i; + fmt_tag ff tiso.iso_group.(i); + done; + fmt ff "@]]@]" + +and fmt_ty (ff:Format.formatter) (t:ty) : unit = + match t with + TY_any -> fmt ff "any" + | TY_nil -> fmt ff "()" + | TY_bool -> fmt ff "bool" + | TY_mach m -> fmt_mach ff m + | TY_int -> fmt ff "int" + | TY_uint -> fmt ff "uint" + | TY_char -> fmt ff "char" + | TY_str -> fmt ff "str" + + | TY_tup slots -> (fmt ff "tup"; fmt_slots ff slots None) + | TY_vec s -> (fmt ff "vec["; fmt_slot ff s; fmt ff "]") + | TY_chan t -> (fmt ff "chan["; fmt_ty ff t; fmt ff "]") + | TY_port t -> (fmt ff "port["; fmt_ty ff t; fmt ff "]") + + | TY_rec slots -> + let (idents, slots) = + let (idents, slots) = List.split (Array.to_list slots) in + (Array.of_list idents, Array.of_list slots) + in + fmt ff "@[rec"; + fmt_slots ff slots (Some idents); + fmt ff "@]" + + | TY_param (i, e) -> (fmt_effect ff e; + if e <> PURE then fmt ff " "; + fmt ff "<p#%d>" i) + | TY_native oid -> fmt ff "<native#%d>" (int_of_opaque oid) + | TY_named n -> fmt_name ff n + | TY_type -> fmt ff "type" + + | TY_fn tfn -> fmt_ty_fn ff None tfn + | TY_task -> fmt ff "task" + | TY_tag ttag -> fmt_tag ff ttag + | TY_iso tiso -> fmt_iso ff tiso + | TY_idx idx -> fmt ff "<idx#%d>" idx + | TY_constrained _ -> fmt ff "?constrained?" + + | TY_obj (effect, fns) -> + fmt_obox ff; + fmt_effect ff effect; + if effect <> PURE then fmt ff " "; + fmt ff "obj "; + fmt_obr ff; + Hashtbl.iter + begin + fun id fn -> + fmt ff "@\n"; + fmt_ty_fn ff (Some (id, [||])) fn; + fmt ff ";" + end + fns; + fmt_cbb ff + + +and fmt_constrs (ff:Format.formatter) (cc:constr array) : unit = + Array.iter (fmt_constr ff) cc + +and fmt_decl_constrs (ff:Format.formatter) (cc:constr array) : unit = + if Array.length cc = 0 + then () + else + begin + fmt ff " : "; + fmt_constrs ff cc + end + +and fmt_constr (ff:Format.formatter) (c:constr) : unit = + fmt_name ff c.constr_name; + fmt ff "(@["; + for i = 0 to (Array.length c.constr_args) - 1 + do + if i != 0 + then fmt ff ",@ "; + fmt_carg ff c.constr_args.(i); + done; + fmt ff "@])" + +and fmt_carg_path (ff:Format.formatter) (cp:carg_path) : unit = + match cp with + CARG_base BASE_formal -> fmt ff "*" + | CARG_base (BASE_named nb) -> fmt_name_base ff nb + | CARG_ext (base, nc) -> + fmt_carg_path ff base; + fmt ff "."; + fmt_name_component ff nc + +and fmt_carg (ff:Format.formatter) (ca:carg) : unit = + match ca with + CARG_path cp -> fmt_carg_path ff cp + | CARG_lit lit -> fmt_lit ff lit + +and fmt_obox ff = Format.pp_open_box ff 4 +and fmt_obox_3 ff = Format.pp_open_box ff 3 +and fmt_cbox ff = Format.pp_close_box ff () +and fmt_obr ff = fmt ff "{" +and fmt_cbr ff = fmt ff "@\n}" +and fmt_cbb ff = (fmt_cbox ff; fmt_cbr ff) + +and fmt_stmts (ff:Format.formatter) (ss:stmt array) : unit = + Array.iter (fmt_stmt ff) ss; + +and fmt_block (ff:Format.formatter) (b:stmt array) : unit = + fmt_obox ff; + fmt_obr ff; + fmt_stmts ff b; + fmt_cbb ff; + +and fmt_binop (ff:Format.formatter) (b:binop) : unit = + fmt ff "%s" + begin + match b with + BINOP_or -> "|" + | BINOP_and -> "&" + | BINOP_xor -> "^" + + | BINOP_eq -> "==" + | BINOP_ne -> "!=" + + | BINOP_lt -> "<" + | BINOP_le -> "<=" + | BINOP_ge -> ">=" + | BINOP_gt -> ">" + + | BINOP_lsl -> "<<" + | BINOP_lsr -> ">>" + | BINOP_asr -> ">>>" + + | BINOP_add -> "+" + | BINOP_sub -> "-" + | BINOP_mul -> "*" + | BINOP_div -> "/" + | BINOP_mod -> "%" + | BINOP_send -> "<|" + end + + +and fmt_unop (ff:Format.formatter) (u:unop) (a:atom) : unit = + begin + match u with + UNOP_not -> + fmt ff "!"; + fmt_atom ff a + + | UNOP_bitnot -> + fmt ff "~"; + fmt_atom ff a + + | UNOP_neg -> + fmt ff "-"; + fmt_atom ff a + + | UNOP_cast t -> + fmt_atom ff a; + fmt ff " as "; + fmt_ty ff t.node; + end + +and fmt_expr (ff:Format.formatter) (e:expr) : unit = + match e with + EXPR_binary (b,a1,a2) -> + begin + fmt_atom ff a1; + fmt ff " "; + fmt_binop ff b; + fmt ff " "; + fmt_atom ff a2 + end + | EXPR_unary (u,a) -> + begin + fmt_unop ff u a; + end + | EXPR_atom a -> fmt_atom ff a + +and fmt_mach (ff:Format.formatter) (m:ty_mach) : unit = + match m with + TY_u8 -> fmt ff "u8" + | TY_u16 -> fmt ff "u16" + | TY_u32 -> fmt ff "u32" + | TY_u64 -> fmt ff "u64" + | TY_i8 -> fmt ff "i8" + | TY_i16 -> fmt ff "i16" + | TY_i32 -> fmt ff "i32" + | TY_i64 -> fmt ff "i64" + | TY_f32 -> fmt ff "f32" + | TY_f64 -> fmt ff "f64" + +and fmt_lit (ff:Format.formatter) (l:lit) : unit = + match l with + | LIT_nil -> fmt ff "()" + | LIT_bool true -> fmt ff "true" + | LIT_bool false -> fmt ff "false" + | LIT_mach (m, _, s) -> + begin + fmt_mach ff m; + fmt ff "(%s)" s + end + | LIT_int (_,s) -> fmt ff "%s" s + | LIT_uint (_,s) -> fmt ff "%s" s + | LIT_char c -> fmt ff "'%s'" (Common.escaped_char c) + +and fmt_domain (ff:Format.formatter) (d:domain) : unit = + match d with + DOMAIN_local -> () + | DOMAIN_thread -> fmt ff "thread " + +and fmt_atom (ff:Format.formatter) (a:atom) : unit = + match a with + ATOM_literal lit -> fmt_lit ff lit.node + | ATOM_lval lval -> fmt_lval ff lval + +and fmt_atoms (ff:Format.formatter) (az:atom array) : unit = + fmt ff "("; + Array.iteri + begin + fun i a -> + if i != 0 + then fmt ff ", "; + fmt_atom ff a; + end + az; + fmt ff ")" + +and fmt_atom_opts (ff:Format.formatter) (az:(atom option) array) : unit = + fmt ff "("; + Array.iteri + begin + fun i a -> + if i != 0 + then fmt ff ", "; + match a with + None -> fmt ff "_" + | Some a -> fmt_atom ff a; + end + az; + fmt ff ")" + +and fmt_lval_component (ff:Format.formatter) (lvc:lval_component) : unit = + match lvc with + COMP_named nc -> fmt_name_component ff nc + | COMP_atom a -> + begin + fmt ff "("; + fmt_atom ff a; + fmt ff ")" + end + +and fmt_lval (ff:Format.formatter) (l:lval) : unit = + match l with + LVAL_base nbi -> fmt_name_base ff nbi.node + | LVAL_ext (lv, lvc) -> + begin + fmt_lval ff lv; + fmt ff "."; + fmt_lval_component ff lvc + end + +and fmt_stmt (ff:Format.formatter) (s:stmt) : unit = + fmt ff "@\n"; + fmt_stmt_body ff s + +and fmt_stmt_body (ff:Format.formatter) (s:stmt) : unit = + begin + match s.node with + STMT_log at -> + begin + fmt ff "log "; + fmt_atom ff at; + fmt ff ";" + end + + | STMT_spawn (dst, domain, fn, args) -> + fmt_lval ff dst; + fmt ff " = spawn "; + fmt_domain ff domain; + fmt_lval ff fn; + fmt_atoms ff args; + fmt ff ";"; + + | STMT_while sw -> + let (stmts, e) = sw.while_lval in + begin + fmt_obox ff; + fmt ff "while ("; + if Array.length stmts != 0 + then fmt_block ff stmts; + fmt_expr ff e; + fmt ff ") "; + fmt_obr ff; + fmt_stmts ff sw.while_body.node; + fmt_cbb ff + end + + | STMT_do_while sw -> + let (stmts, e) = sw.while_lval in + begin + fmt_obox ff; + fmt ff "do "; + fmt_obr ff; + fmt_stmts ff sw.while_body.node; + fmt ff "while ("; + if Array.length stmts != 0 + then fmt_block ff stmts; + fmt_expr ff e; + fmt ff ");"; + fmt_cbb ff + end + + | STMT_if sif -> + fmt_obox ff; + fmt ff "if ("; + fmt_expr ff sif.if_test; + fmt ff ") "; + fmt_obr ff; + fmt_stmts ff sif.if_then.node; + begin + match sif.if_else with + None -> () + | Some e -> + begin + fmt_cbb ff; + fmt_obox_3 ff; + fmt ff " else "; + fmt_obr ff; + fmt_stmts ff e.node + end + end; + fmt_cbb ff + + | STMT_ret (ao) -> + fmt ff "ret"; + begin + match ao with + None -> () + | Some at -> + fmt ff " "; + fmt_atom ff at + end; + fmt ff ";" + + | STMT_be (fn, az) -> + fmt ff "be "; + fmt_lval ff fn; + fmt_atoms ff az; + fmt ff ";"; + + | STMT_block b -> fmt_block ff b.node + + | STMT_copy (lv, ex) -> + fmt_lval ff lv; + fmt ff " = "; + fmt_expr ff ex; + fmt ff ";" + + | STMT_copy_binop (lv, binop, at) -> + fmt_lval ff lv; + fmt ff " "; + fmt_binop ff binop; + fmt ff "="; + fmt_atom ff at; + fmt ff ";" + + | STMT_call (dst, fn, args) -> + fmt_lval ff dst; + fmt ff " = "; + fmt_lval ff fn; + fmt_atoms ff args; + fmt ff ";"; + + | STMT_bind (dst, fn, arg_opts) -> + fmt_lval ff dst; + fmt ff " = "; + fmt_lval ff fn; + fmt_atom_opts ff arg_opts; + fmt ff ";"; + + | STMT_decl (DECL_slot (skey, sloti)) -> + if sloti.node.slot_ty != None then fmt ff "let "; + fmt_slot ff sloti.node; + fmt ff " "; + fmt_slot_key ff skey; + fmt ff ";" + + | STMT_decl (DECL_mod_item (ident, item)) -> + fmt_mod_item ff ident item + + | STMT_init_rec (dst, entries, base) -> + fmt_lval ff dst; + fmt ff " = rec("; + for i = 0 to (Array.length entries) - 1 + do + if i != 0 + then fmt ff ", "; + let (ident, mode, mut, atom) = entries.(i) in + fmt_ident ff ident; + fmt ff " = "; + fmt_mutable ff mut; + fmt_mode ff mode; + fmt_atom ff atom; + done; + begin + match base with + None -> () + | Some b -> + fmt ff " with "; + fmt_lval ff b + end; + fmt ff ");" + + | STMT_init_vec (dst, _, atoms) -> + fmt_lval ff dst; + fmt ff " = vec("; + for i = 0 to (Array.length atoms) - 1 + do + if i != 0 + then fmt ff ", "; + fmt_atom ff atoms.(i); + done; + fmt ff ");" + + | STMT_init_tup (dst, entries) -> + fmt_lval ff dst; + fmt ff " = ("; + for i = 0 to (Array.length entries) - 1 + do + if i != 0 + then fmt ff ", "; + let (mode, mut, atom) = entries.(i) in + fmt_mutable ff mut; + fmt_mode ff mode; + fmt_atom ff atom; + done; + fmt ff ");"; + + | STMT_init_str (dst, s) -> + fmt_lval ff dst; + fmt ff " = \"%s\"" (String.escaped s) + + | STMT_init_port dst -> + fmt_lval ff dst; + fmt ff " = port();" + + | STMT_init_chan (dst, port_opt) -> + fmt_lval ff dst; + fmt ff " = chan("; + begin + match port_opt with + None -> () + | Some lv -> fmt_lval ff lv + end; + fmt ff ");" + + | STMT_check_expr expr -> + fmt ff "check ("; + fmt_expr ff expr; + fmt ff ");" + + | STMT_check_if (constrs, _, block) -> + fmt_obox ff; + fmt ff "check if ("; + fmt_constrs ff constrs; + fmt ff ")"; + fmt_obr ff; + fmt_stmts ff block.node; + fmt_cbb ff + + | STMT_check (constrs, _) -> + fmt ff "check "; + fmt_constrs ff constrs; + fmt ff ";" + + | STMT_prove constrs -> + fmt ff "prove "; + fmt_constrs ff constrs; + fmt ff ";" + + | STMT_for sfor -> + let (slot, ident) = sfor.for_slot in + let (stmts, lval) = sfor.for_seq in + begin + fmt_obox ff; + fmt ff "for ("; + fmt_slot ff slot.node; + fmt ff " "; + fmt_ident ff ident; + fmt ff " in "; + fmt_stmts ff stmts; + fmt_lval ff lval; + fmt ff ") "; + fmt_obr ff; + fmt_stmts ff sfor.for_body.node; + fmt_cbb ff + end + + | STMT_for_each sf -> + let (slot, ident) = sf.for_each_slot in + let (f, az) = sf.for_each_call in + begin + fmt_obox ff; + fmt ff "for each ("; + fmt_slot ff slot.node; + fmt ff " "; + fmt_ident ff ident; + fmt ff " = "; + fmt_lval ff f; + fmt_atoms ff az; + fmt ff " "; + fmt_obr ff; + fmt_stmts ff sf.for_each_body.node; + fmt_cbb ff + end + + | STMT_put (atom) -> + fmt ff "put "; + begin + match atom with + Some a -> (fmt ff " "; fmt_atom ff a) + | None -> () + end; + fmt ff ";" + + | STMT_put_each (f, az) -> + fmt ff "put each "; + fmt_lval ff f; + fmt_atoms ff az; + fmt ff ";" + + | STMT_fail -> fmt ff "fail;" + | STMT_yield -> fmt ff "yield;" + + | STMT_send (chan, v) -> + fmt_lval ff chan; + fmt ff " <| "; + fmt_lval ff v; + fmt ff ";"; + + | STMT_recv (d, port) -> + fmt_lval ff d; + fmt ff " <- "; + fmt_lval ff port; + fmt ff ";"; + + | STMT_join t -> + fmt ff "join "; + fmt_lval ff t; + fmt ff ";" + + | STMT_alt_tag _ -> fmt ff "?stmt_alt_tag?" + | STMT_alt_type _ -> fmt ff "?stmt_alt_type?" + | STMT_alt_port _ -> fmt ff "?stmt_alt_port?" + | STMT_note _ -> fmt ff "?stmt_note?" + | STMT_slice _ -> fmt ff "?stmt_slice?" + end + +and fmt_decl_params (ff:Format.formatter) (params:ty_param array) : unit = + if Array.length params = 0 + then () + else + begin + fmt ff "["; + for i = 0 to (Array.length params) - 1 + do + if i <> 0 + then fmt ff ", "; + let (ident, (i, e)) = params.(i) in + fmt_effect ff e; + if e <> PURE then fmt ff " "; + fmt_ident ff ident; + fmt ff "=<p#%d>" i + done; + fmt ff "]" + end; + +and fmt_header_slots (ff:Format.formatter) (hslots:header_slots) : unit = + fmt_slots ff + (Array.map (fun (s,_) -> s.node) hslots) + (Some (Array.map (fun (_, i) -> i) hslots)) + +and fmt_ident_and_params + (ff:Format.formatter) + (id:ident) + (params:ty_param array) + : unit = + fmt_ident ff id; + fmt_decl_params ff params + +and fmt_fn + (ff:Format.formatter) + (id:ident) + (params:ty_param array) + (f:fn) + : unit = + fmt_obox ff; + fmt_effect ff f.fn_aux.fn_effect; + if f.fn_aux.fn_effect <> PURE then fmt ff " "; + fmt ff "%s "(if f.fn_aux.fn_is_iter then "iter" else "fn"); + fmt_ident_and_params ff id params; + fmt_header_slots ff f.fn_input_slots; + fmt_decl_constrs ff f.fn_input_constrs; + fmt ff " -> "; + fmt_slot ff f.fn_output_slot.node; + fmt ff " "; + fmt_obr ff; + fmt_stmts ff f.fn_body.node; + fmt_cbb ff + + +and fmt_obj + (ff:Format.formatter) + (id:ident) + (params:ty_param array) + (obj:obj) + : unit = + fmt_obox ff; + fmt_effect ff obj.obj_effect; + if obj.obj_effect <> PURE then fmt ff " "; + fmt ff "obj "; + fmt_ident_and_params ff id params; + fmt_header_slots ff obj.obj_state; + fmt_decl_constrs ff obj.obj_constrs; + fmt ff " "; + fmt_obr ff; + Hashtbl.iter + begin + fun id fn -> + fmt ff "@\n"; + fmt_fn ff id [||] fn.node + end + obj.obj_fns; + begin + match obj.obj_drop with + None -> () + | Some d -> + begin + fmt ff "@\n"; + fmt_obox ff; + fmt ff "drop "; + fmt_obr ff; + fmt_stmts ff d.node; + fmt_cbb ff; + end + end; + fmt_cbb ff + + +and fmt_mod_item (ff:Format.formatter) (id:ident) (item:mod_item) : unit = + fmt ff "@\n"; + let params = item.node.decl_params in + let params = Array.map (fun i -> i.node) params in + begin + match item.node.decl_item with + MOD_ITEM_type ty -> + fmt ff "type "; + fmt_ident_and_params ff id params; + fmt ff " = "; + fmt_ty ff ty; + fmt ff ";"; + + | MOD_ITEM_tag (hdr, ttag, _) -> + fmt ff "fn "; + fmt_ident_and_params ff id params; + fmt_header_slots ff + (Array.mapi (fun i s -> (s,(Printf.sprintf "_%d" i))) hdr); + fmt ff " -> "; + fmt_ty ff (TY_tag ttag); + fmt ff ";"; + + | MOD_ITEM_mod (view,items) -> + fmt_obox ff; + fmt ff "mod "; + fmt_ident_and_params ff id params; + fmt ff " "; + fmt_obr ff; + fmt_mod_view ff view; + fmt_mod_items ff items; + fmt_cbb ff + + | MOD_ITEM_fn f -> + fmt_fn ff id params f + + | MOD_ITEM_obj obj -> + fmt_obj ff id params obj + end + +and fmt_import (ff:Format.formatter) (ident:ident) (name:name) : unit = + fmt ff "@\n"; + fmt ff "import "; + fmt ff "%s = " ident; + fmt_name ff name; + +and fmt_export (ff:Format.formatter) (export:export) _ : unit = + fmt ff "@\n"; + match export with + EXPORT_all_decls -> fmt ff "export *;" + | EXPORT_ident i -> fmt ff "export %s;" i + +and fmt_mod_view (ff:Format.formatter) (mv:mod_view) : unit = + Hashtbl.iter (fmt_import ff) mv.view_imports; + Hashtbl.iter (fmt_export ff) mv.view_exports + +and fmt_mod_items (ff:Format.formatter) (mi:mod_items) : unit = + Hashtbl.iter (fmt_mod_item ff) mi + +and fmt_crate (ff:Format.formatter) (c:crate) : unit = + let (view,items) = c.node.crate_items in + fmt_mod_view ff view; + fmt_mod_items ff items + + +let fmt_to_str (f:Format.formatter -> 'a -> unit) (v:'a) : string = + let buf = Buffer.create 16 in + let bf = Format.formatter_of_buffer buf in + begin + f bf v; + Format.pp_print_flush bf (); + Buffer.contents buf + end + +let sprintf_fmt + (f:Format.formatter -> 'a -> unit) + : (unit -> 'a -> string) = + (fun _ -> fmt_to_str f) + + +let sprintf_expr = sprintf_fmt fmt_expr;; +let sprintf_name = sprintf_fmt fmt_name;; +let sprintf_lval = sprintf_fmt fmt_lval;; +let sprintf_lval_component = sprintf_fmt fmt_lval_component;; +let sprintf_atom = sprintf_fmt fmt_atom;; +let sprintf_slot = sprintf_fmt fmt_slot;; +let sprintf_slot_key = sprintf_fmt fmt_slot_key;; +let sprintf_mutable = sprintf_fmt fmt_mutable;; +let sprintf_ty = sprintf_fmt fmt_ty;; +let sprintf_effect = sprintf_fmt fmt_effect;; +let sprintf_tag = sprintf_fmt fmt_tag;; +let sprintf_carg = sprintf_fmt fmt_carg;; +let sprintf_constr = sprintf_fmt fmt_constr;; +let sprintf_stmt = sprintf_fmt fmt_stmt;; +let sprintf_mod_items = sprintf_fmt fmt_mod_items;; +let sprintf_decl_params = sprintf_fmt fmt_decl_params;; +let sprintf_app_args = sprintf_fmt fmt_app_args;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/cexp.ml b/src/boot/fe/cexp.ml new file mode 100644 index 00000000..6dffdb96 --- /dev/null +++ b/src/boot/fe/cexp.ml @@ -0,0 +1,762 @@ + +open Common;; +open Token;; +open Parser;; + +(* NB: cexps (crate-expressions / constant-expressions) are only used + * transiently during compilation: they are the outermost expression-language + * describing crate configuration and constants. They are completely evaluated + * at compile-time, in a little micro-interpreter defined here, with the + * results of evaluation being the sequence of directives controlling the rest + * of the compiler. + * + * Cexps, like pexps, do not escape the language front-end. + * + * You can think of the AST as a statement-language called "item" sandwiched + * between two expression-languages, "cexp" on the outside and "pexp" on the + * inside. The front-end evaluates cexp on the outside in order to get one big + * directive-list, evaluating those parts of pexp that are directly used by + * cexp in passing, and desugaring those remaining parts of pexp that are + * embedded within the items of the directives. + * + * The rest of the compiler only deals with the directives, which are mostly + * just a set of containers for items. Items are what most of AST describes + * ("most" because the type-grammar spans both items and pexps). + * + *) + +type meta = (Ast.ident * Pexp.pexp) array;; + +type meta_pat = (Ast.ident * (Pexp.pexp option)) array;; + +type auth = (Ast.name * Ast.effect);; + +type cexp = + CEXP_alt of cexp_alt identified + | CEXP_let of cexp_let identified + | CEXP_src_mod of cexp_src identified + | CEXP_dir_mod of cexp_dir identified + | CEXP_use_mod of cexp_use identified + | CEXP_nat_mod of cexp_nat identified + | CEXP_meta of meta identified + | CEXP_auth of auth identified + +and cexp_alt = + { alt_val: Pexp.pexp; + alt_arms: (Pexp.pexp * cexp array) array; + alt_else: cexp array } + +and cexp_let = + { let_ident: Ast.ident; + let_value: Pexp.pexp; + let_body: cexp array; } + +and cexp_src = + { src_ident: Ast.ident; + src_path: Pexp.pexp option } + +and cexp_dir = + { dir_ident: Ast.ident; + dir_path: Pexp.pexp option; + dir_body: cexp array } + +and cexp_use = + { use_ident: Ast.ident; + use_meta: meta_pat; } + +and cexp_nat = + { nat_abi: string; + nat_ident: Ast.ident; + nat_path: Pexp.pexp option; + (* + * FIXME: possibly support embedding optional strings as + * symbol-names, to handle mangling schemes that aren't + * Token.IDENT values + *) + nat_items: Ast.mod_items; + } +;; + + +(* Cexp grammar. *) + +let parse_meta_input (ps:pstate) : (Ast.ident * Pexp.pexp option) = + let lab = (ctxt "meta input: label" Pexp.parse_ident ps) in + match peek ps with + EQ -> + bump ps; + let v = + match peek ps with + UNDERSCORE -> bump ps; None + | _ -> Some (Pexp.parse_pexp ps) + in + (lab, v) + | _ -> raise (unexpected ps) +;; + +let parse_meta_pat (ps:pstate) : meta_pat = + bracketed_zero_or_more LPAREN RPAREN + (Some COMMA) parse_meta_input ps +;; + +let parse_meta (ps:pstate) : meta = + Array.map + begin + fun (id,v) -> + match v with + None -> + raise (err ("wildcard found in meta pattern " + ^ "where value expected") ps) + | Some v -> (id,v) + end + (parse_meta_pat ps) +;; + +let parse_optional_meta_pat + (ps:pstate) + (ident:Ast.ident) + : meta_pat = + match peek ps with + LPAREN -> parse_meta_pat ps + | _ -> + let apos = lexpos ps in + [| ("name", Some (span ps apos apos (Pexp.PEXP_str ident))) |] +;; + +let rec parse_cexps (ps:pstate) (term:Token.token) : cexp array = + let cexps = Queue.create () in + while ((peek ps) <> term) + do + Queue.push (parse_cexp ps) cexps + done; + expect ps term; + queue_to_arr cexps + +and parse_cexp (ps:pstate) : cexp = + + let apos = lexpos ps in + match peek ps with + MOD -> + begin + bump ps; + let name = ctxt "mod: name" Pexp.parse_ident ps in + let path = ctxt "mod: path" parse_eq_pexp_opt ps + in + match peek ps with + SEMI -> + bump ps; + let bpos = lexpos ps in + CEXP_src_mod + (span ps apos bpos { src_ident = name; + src_path = path }) + | LBRACE -> + let body = + bracketed_zero_or_more LBRACE RBRACE + None parse_cexp ps + in + let bpos = lexpos ps in + CEXP_dir_mod + (span ps apos bpos { dir_ident = name; + dir_path = path; + dir_body = body }) + | _ -> raise (unexpected ps) + end + + | NATIVE -> + begin + bump ps; + let abi = + match peek ps with + MOD -> "cdecl" + | LIT_STR s -> bump ps; s + | _ -> raise (unexpected ps) + in + let _ = expect ps MOD in + let name = ctxt "native mod: name" Pexp.parse_ident ps in + let path = ctxt "native mod: path" parse_eq_pexp_opt ps in + let items = Hashtbl.create 0 in + let get_item ps = + let (ident, item) = Item.parse_mod_item_from_signature ps in + htab_put items ident item; + in + ignore (bracketed_zero_or_more + LBRACE RBRACE None get_item ps); + let bpos = lexpos ps in + CEXP_nat_mod + (span ps apos bpos { nat_abi = abi; + nat_ident = name; + nat_path = path; + nat_items = items }) + end + + | USE -> + begin + bump ps; + let ident = ctxt "use mod: name" Pexp.parse_ident ps in + let meta = + ctxt "use mod: meta" parse_optional_meta_pat ps ident + in + let bpos = lexpos ps in + expect ps SEMI; + CEXP_use_mod + (span ps apos bpos { use_ident = ident; + use_meta = meta }) + end + + | LET -> + begin + bump ps; + expect ps LPAREN; + let id = Pexp.parse_ident ps in + expect ps EQ; + let v = Pexp.parse_pexp ps in + expect ps RPAREN; + expect ps LBRACE; + let body = parse_cexps ps RBRACE in + let bpos = lexpos ps in + CEXP_let + (span ps apos bpos + { let_ident = id; + let_value = v; + let_body = body }) + end + + | ALT -> + begin + bump ps; + expect ps LPAREN; + let v = Pexp.parse_pexp ps in + expect ps RPAREN; + expect ps LBRACE; + let rec consume_arms arms = + match peek ps with + CASE -> + begin + bump ps; + expect ps LPAREN; + let cond = Pexp.parse_pexp ps in + expect ps RPAREN; + expect ps LBRACE; + let consequent = parse_cexps ps RBRACE in + let arm = (cond, consequent) in + consume_arms (arm::arms) + end + | ELSE -> + begin + bump ps; + expect ps LBRACE; + let consequent = parse_cexps ps RBRACE in + expect ps RBRACE; + let bpos = lexpos ps in + span ps apos bpos + { alt_val = v; + alt_arms = Array.of_list (List.rev arms); + alt_else = consequent } + end + + | _ -> raise (unexpected ps) + in + CEXP_alt (consume_arms []) + end + + | META -> + bump ps; + let meta = parse_meta ps in + expect ps SEMI; + let bpos = lexpos ps in + CEXP_meta (span ps apos bpos meta) + + | AUTH -> + bump ps; + let name = Pexp.parse_name ps in + expect ps EQ; + let effect = Pexp.parse_effect ps in + expect ps SEMI; + let bpos = lexpos ps in + CEXP_auth (span ps apos bpos (name, effect)) + + | _ -> raise (unexpected ps) + + +and parse_eq_pexp_opt (ps:pstate) : Pexp.pexp option = + match peek ps with + EQ -> + begin + bump ps; + Some (Pexp.parse_pexp ps) + end + | _ -> None +;; + + +(* + * Dynamic-typed micro-interpreter for the cexp language. + * + * The product of evaluating a pexp is a pval. + * + * The product of evlauating a cexp is a cdir array. + *) + +type pval = + PVAL_str of string + | PVAL_num of int64 + | PVAL_bool of bool +;; + +type cdir = + CDIR_meta of ((Ast.ident * string) array) + | CDIR_syntax of Ast.name + | CDIR_check of (Ast.name * pval array) + | CDIR_mod of (Ast.ident * Ast.mod_item) + | CDIR_auth of auth + +type env = { env_bindings: (Ast.ident * pval) list; + env_prefix: filename list; + env_items: (filename, Ast.mod_items) Hashtbl.t; + env_files: (node_id,filename) Hashtbl.t; + env_required: (node_id, (required_lib * nabi_conv)) Hashtbl.t; + env_required_syms: (node_id, string) Hashtbl.t; + env_ps: pstate; } + +let unexpected_val (expected:string) (v:pval) = + let got = + match v with + PVAL_str s -> "str \"" ^ (String.escaped s) ^ "\"" + | PVAL_num i -> "num " ^ (Int64.to_string i) + | PVAL_bool b -> if b then "bool true" else "bool false" + in + (* FIXME: proper error reporting, please. *) + bug () "expected %s, got %s" expected got +;; + +let rewrap_items id items = + let item = decl [||] (Ast.MOD_ITEM_mod items) in + { id = id; node = item } +;; + + +let rec eval_cexps (env:env) (exps:cexp array) : cdir array = + Parser.arj (Array.map (eval_cexp env) exps) + +and eval_cexp (env:env) (exp:cexp) : cdir array = + match exp with + CEXP_alt {node=ca} -> + let v = eval_pexp env ca.alt_val in + let rec try_arm i = + if i >= Array.length ca.alt_arms + then ca.alt_else + else + let (arm_head, arm_body) = ca.alt_arms.(i) in + let v' = eval_pexp env arm_head in + if v' = v + then arm_body + else try_arm (i+1) + in + eval_cexps env (try_arm 0) + + | CEXP_let {node=cl} -> + let ident = cl.let_ident in + let v = eval_pexp env cl.let_value in + let env = { env with + env_bindings = ((ident,v)::env.env_bindings ) } + in + eval_cexps env cl.let_body + + | CEXP_src_mod {node=s; id=id} -> + let name = s.src_ident in + let path = + match s.src_path with + None -> name ^ ".rs" + | Some p -> eval_pexp_to_str env p + in + let full_path = + List.fold_left Filename.concat "" + (List.rev (path :: env.env_prefix)) + in + let ps = env.env_ps in + let p = + make_parser + ps.pstate_temp_id + ps.pstate_node_id + ps.pstate_opaque_id + ps.pstate_sess + ps.pstate_get_mod + ps.pstate_infer_lib_name + env.env_required + env.env_required_syms + full_path + in + let items = Item.parse_mod_items p EOF in + htab_put env.env_files id full_path; + [| CDIR_mod (name, rewrap_items id items) |] + + | CEXP_dir_mod {node=d; id=id} -> + let items = Hashtbl.create 0 in + let name = d.dir_ident in + let path = + match d.dir_path with + None -> name + | Some p -> eval_pexp_to_str env p + in + let env = { env with + env_prefix = path :: env.env_prefix } in + let sub_directives = eval_cexps env d.dir_body in + let add d = + match d with + CDIR_mod (name, item) -> + htab_put items name item + | _ -> raise (err "non-'mod' directive found in 'dir' directive" + env.env_ps) + in + Array.iter add sub_directives; + [| CDIR_mod (name, rewrap_items id (Item.empty_view, items)) |] + + | CEXP_use_mod {node=u; id=id} -> + let ps = env.env_ps in + let name = u.use_ident in + let (path, items) = + let meta_pat = + Array.map + begin + fun (k,vo) -> + match vo with + None -> (k, None) + | Some p -> (k, Some (eval_pexp_to_str env p)) + end + u.use_meta + in + ps.pstate_get_mod meta_pat id ps.pstate_node_id ps.pstate_opaque_id + in + iflog ps + begin + fun _ -> + log ps "extracted mod signature from %s (binding to %s)" + path name; + log ps "%a" Ast.sprintf_mod_items items; + end; + let rlib = REQUIRED_LIB_rust { required_libname = path; + required_prefix = 1 } + in + let item = decl [||] (Ast.MOD_ITEM_mod (Item.empty_view, items)) in + let item = { id = id; node = item } in + let span = Hashtbl.find ps.pstate_sess.Session.sess_spans id in + Item.note_required_mod env.env_ps span CONV_rust rlib item; + [| CDIR_mod (name, item) |] + + | CEXP_nat_mod {node=cn;id=id} -> + let conv = + let v = cn.nat_abi in + match string_to_conv v with + None -> unexpected_val "calling convention" (PVAL_str v) + | Some c -> c + in + let name = cn.nat_ident in + let filename = + match cn.nat_path with + None -> env.env_ps.pstate_infer_lib_name name + | Some p -> eval_pexp_to_str env p + in + let item = + decl [||] (Ast.MOD_ITEM_mod (Item.empty_view, cn.nat_items)) + in + let item = { id = id; node = item } in + let rlib = REQUIRED_LIB_c { required_libname = filename; + required_prefix = 1 } + in + let ps = env.env_ps in + let span = Hashtbl.find ps.pstate_sess.Session.sess_spans id in + Item.note_required_mod env.env_ps span conv rlib item; + [| CDIR_mod (name, item) |] + + | CEXP_meta m -> + [| CDIR_meta + begin + Array.map + begin + fun (id, p) -> (id, eval_pexp_to_str env p) + end + m.node + end |] + + | CEXP_auth a -> [| CDIR_auth a.node |] + + +and eval_pexp (env:env) (exp:Pexp.pexp) : pval = + match exp.node with + | Pexp.PEXP_binop (bop, a, b) -> + begin + let av = eval_pexp env a in + let bv = eval_pexp env b in + match (bop, av, bv) with + (Ast.BINOP_add, PVAL_str az, PVAL_str bz) -> + PVAL_str (az ^ bz) + | _ -> + let av = (need_num av) in + let bv = (need_num bv) in + PVAL_num + begin + match bop with + Ast.BINOP_add -> Int64.add av bv + | Ast.BINOP_sub -> Int64.sub av bv + | Ast.BINOP_mul -> Int64.mul av bv + | Ast.BINOP_div -> Int64.div av bv + | _ -> + bug () + "unhandled arithmetic op in Cexp.eval_pexp" + end + end + + | Pexp.PEXP_unop (uop, a) -> + begin + match uop with + Ast.UNOP_not -> + PVAL_bool (not (eval_pexp_to_bool env a)) + | Ast.UNOP_neg -> + PVAL_num (Int64.neg (eval_pexp_to_num env a)) + | _ -> bug () "Unexpected unop in Cexp.eval_pexp" + end + + | Pexp.PEXP_lval (Pexp.PLVAL_ident ident) -> + begin + match ltab_search env.env_bindings ident with + None -> raise (err (Printf.sprintf "no binding for '%s' found" + ident) env.env_ps) + | Some v -> v + end + + | Pexp.PEXP_lit (Ast.LIT_bool b) -> + PVAL_bool b + + | Pexp.PEXP_lit (Ast.LIT_int (i, _)) -> + PVAL_num i + + | Pexp.PEXP_str s -> + PVAL_str s + + | _ -> bug () "unexpected Pexp in Cexp.eval_pexp" + + +and eval_pexp_to_str (env:env) (exp:Pexp.pexp) : string = + match eval_pexp env exp with + PVAL_str s -> s + | v -> unexpected_val "str" v + +and need_num (cv:pval) : int64 = + match cv with + PVAL_num n -> n + | v -> unexpected_val "num" v + +and eval_pexp_to_num (env:env) (exp:Pexp.pexp) : int64 = + need_num (eval_pexp env exp) + +and eval_pexp_to_bool (env:env) (exp:Pexp.pexp) : bool = + match eval_pexp env exp with + PVAL_bool b -> b + | v -> unexpected_val "bool" v + +;; + + +let find_main_fn + (ps:pstate) + (crate_items:Ast.mod_items) + : Ast.name = + let fns = ref [] in + let extend prefix_name ident = + match prefix_name with + None -> Ast.NAME_base (Ast.BASE_ident ident) + | Some n -> Ast.NAME_ext (n, Ast.COMP_ident ident) + in + let rec dig prefix_name items = + Hashtbl.iter (extract_fn prefix_name) items + and extract_fn prefix_name ident item = + if not (Array.length item.node.Ast.decl_params = 0) || + Hashtbl.mem ps.pstate_required item.id + then () + else + match item.node.Ast.decl_item with + Ast.MOD_ITEM_mod (_, items) -> + dig (Some (extend prefix_name ident)) items + + | Ast.MOD_ITEM_fn _ -> + if ident = "main" + then fns := (extend prefix_name ident) :: (!fns) + else () + + | _ -> () + in + dig None crate_items; + match !fns with + [] -> raise (err "no 'main' function found" ps) + | [x] -> x + | _ -> raise (err "multiple 'main' functions found" ps) +;; + + +let with_err_handling sess thunk = + try + thunk () + with + Parse_err (ps, str) -> + Session.fail sess "Parse error: %s\n%!" str; + List.iter + (fun (cx,pos) -> + Session.fail sess "%s:E (parse context): %s\n%!" + (Session.string_of_pos pos) cx) + ps.pstate_ctxt; + let apos = lexpos ps in + span ps apos apos + { Ast.crate_items = (Item.empty_view, Hashtbl.create 0); + Ast.crate_meta = [||]; + Ast.crate_auth = Hashtbl.create 0; + Ast.crate_required = Hashtbl.create 0; + Ast.crate_required_syms = Hashtbl.create 0; + Ast.crate_main = None; + Ast.crate_files = Hashtbl.create 0 } +;; + + +let parse_crate_file + (sess:Session.sess) + (get_mod:get_mod_fn) + (infer_lib_name:(Ast.ident -> filename)) + : Ast.crate = + let fname = Session.filename_of sess.Session.sess_in in + let tref = ref (Temp 0) in + let nref = ref (Node 0) in + let oref = ref (Opaque 0) in + let required = Hashtbl.create 4 in + let required_syms = Hashtbl.create 4 in + let ps = + make_parser tref nref oref sess get_mod + infer_lib_name required required_syms fname + in + + let files = Hashtbl.create 0 in + let items = Hashtbl.create 4 in + let target_bindings = + let (os, arch, libc) = + match sess.Session.sess_targ with + Linux_x86_elf -> ("linux", "x86", "libc.so.6") + | Win32_x86_pe -> ("win32", "x86", "msvcrt.dll") + | MacOS_x86_macho -> ("macos", "x86", "libc.dylib") + in + [ + ("target_os", PVAL_str os); + ("target_arch", PVAL_str arch); + ("target_libc", PVAL_str libc) + ] + in + let build_bindings = + [ + ("build_compiler", PVAL_str Sys.executable_name); + ("build_input", PVAL_str fname); + ] + in + let initial_bindings = + target_bindings + @ build_bindings + in + let env = { env_bindings = initial_bindings; + env_prefix = [Filename.dirname fname]; + env_items = Hashtbl.create 0; + env_files = files; + env_required = required; + env_required_syms = required_syms; + env_ps = ps; } + in + let auth = Hashtbl.create 0 in + with_err_handling sess + begin + fun _ -> + let apos = lexpos ps in + let cexps = parse_cexps ps EOF in + let cdirs = eval_cexps env cexps in + let meta = Queue.create () in + let _ = + Array.iter + begin + fun d -> + match d with + CDIR_mod (name, item) -> htab_put items name item + | CDIR_meta metas -> + Array.iter (fun m -> Queue.add m meta) metas + | CDIR_auth (n,e) -> + if Hashtbl.mem auth n + then raise (err "duplicate 'auth' clause" ps) + else Hashtbl.add auth n e + | _ -> + raise + (err "unhandled directive at top level" ps) + end + cdirs + in + let bpos = lexpos ps in + let main = + if ps.pstate_sess.Session.sess_library_mode + then None + else Some (find_main_fn ps items) in + let crate = { Ast.crate_items = (Item.empty_view, items); + Ast.crate_meta = queue_to_arr meta; + Ast.crate_auth = auth; + Ast.crate_required = required; + Ast.crate_required_syms = required_syms; + Ast.crate_main = main; + Ast.crate_files = files } + in + let cratei = span ps apos bpos crate in + htab_put files cratei.id fname; + cratei + end +;; + +let parse_src_file + (sess:Session.sess) + (get_mod:get_mod_fn) + (infer_lib_name:(Ast.ident -> filename)) + : Ast.crate = + let fname = Session.filename_of sess.Session.sess_in in + let tref = ref (Temp 0) in + let nref = ref (Node 0) in + let oref = ref (Opaque 0) in + let required = Hashtbl.create 0 in + let required_syms = Hashtbl.create 0 in + let ps = + make_parser tref nref oref sess get_mod + infer_lib_name required required_syms fname + in + with_err_handling sess + begin + fun _ -> + let apos = lexpos ps in + let items = Item.parse_mod_items ps EOF in + let bpos = lexpos ps in + let files = Hashtbl.create 0 in + let main = + if ps.pstate_sess.Session.sess_library_mode + then None + else Some (find_main_fn ps (snd items)) + in + let crate = { Ast.crate_items = items; + Ast.crate_required = required; + Ast.crate_required_syms = required_syms; + Ast.crate_auth = Hashtbl.create 0; + Ast.crate_meta = [||]; + Ast.crate_main = main; + Ast.crate_files = files } + in + let cratei = span ps apos bpos crate in + htab_put files cratei.id fname; + cratei + end +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/item.ml b/src/boot/fe/item.ml new file mode 100644 index 00000000..75f86a58 --- /dev/null +++ b/src/boot/fe/item.ml @@ -0,0 +1,1139 @@ + +open Common;; +open Token;; +open Parser;; + +(* Item grammar. *) + +let default_exports = + let e = Hashtbl.create 0 in + Hashtbl.add e Ast.EXPORT_all_decls (); + e +;; + +let empty_view = { Ast.view_imports = Hashtbl.create 0; + Ast.view_exports = default_exports } +;; + +let rec parse_expr (ps:pstate) : (Ast.stmt array * Ast.expr) = + let pexp = ctxt "expr" Pexp.parse_pexp ps in + Pexp.desugar_expr ps pexp + +and parse_expr_atom (ps:pstate) : (Ast.stmt array * Ast.atom) = + let pexp = ctxt "expr" Pexp.parse_pexp ps in + Pexp.desugar_expr_atom ps pexp + +and parse_expr_atom_list + (bra:token) + (ket:token) + (ps:pstate) + : (Ast.stmt array * Ast.atom array) = + arj1st (bracketed_zero_or_more bra ket (Some COMMA) + (ctxt "expr-atom list" parse_expr_atom) ps) + +and parse_expr_init (lv:Ast.lval) (ps:pstate) : (Ast.stmt array) = + let pexp = ctxt "expr" Pexp.parse_pexp ps in + Pexp.desugar_expr_init ps lv pexp + +and parse_lval (ps:pstate) : (Ast.stmt array * Ast.lval) = + let pexp = Pexp.parse_pexp ps in + Pexp.desugar_lval ps pexp + +and parse_identified_slot_and_ident + (aliases_ok:bool) + (ps:pstate) + : (Ast.slot identified * Ast.ident) = + let slot = + ctxt "identified slot and ident: slot" + (Pexp.parse_identified_slot aliases_ok) ps + in + let ident = + ctxt "identified slot and ident: ident" Pexp.parse_ident ps + in + (slot, ident) + +and parse_zero_or_more_identified_slot_ident_pairs + (aliases_ok:bool) + (ps:pstate) + : (((Ast.slot identified) * Ast.ident) array) = + ctxt "zero+ slots and idents" + (paren_comma_list + (parse_identified_slot_and_ident aliases_ok)) ps + +and parse_block (ps:pstate) : Ast.block = + let apos = lexpos ps in + let stmts = + arj (ctxt "block: stmts" + (bracketed_zero_or_more LBRACE RBRACE + None parse_stmts) ps) + in + let bpos = lexpos ps in + span ps apos bpos stmts + +and parse_block_stmt (ps:pstate) : Ast.stmt = + let apos = lexpos ps in + let block = parse_block ps in + let bpos = lexpos ps in + span ps apos bpos (Ast.STMT_block block) + +and parse_init + (lval:Ast.lval) + (ps:pstate) + : Ast.stmt array = + let apos = lexpos ps in + let stmts = + match peek ps with + EQ -> + bump ps; + parse_expr_init lval ps + | LARROW -> + bump ps; + let (stmts, rhs) = ctxt "init: port" parse_lval ps in + let bpos = lexpos ps in + let stmt = Ast.STMT_recv (lval, rhs) in + Array.append stmts [| (span ps apos bpos stmt) |] + | _ -> arr [] + in + let _ = expect ps SEMI in + stmts + +and parse_slot_and_ident_and_init + (ps:pstate) + : (Ast.stmt array * Ast.slot * Ast.ident) = + let apos = lexpos ps in + let (slot, ident) = + ctxt "slot, ident and init: slot and ident" + (Pexp.parse_slot_and_ident false) ps + in + let bpos = lexpos ps in + let lval = Ast.LVAL_base (span ps apos bpos (Ast.BASE_ident ident)) in + let stmts = ctxt "slot, ident and init: init" (parse_init lval) ps in + (stmts, slot, ident) + +and parse_auto_slot_and_init + (ps:pstate) + : (Ast.stmt array * Ast.slot * Ast.ident) = + let apos = lexpos ps in + let ident = Pexp.parse_ident ps in + let bpos = lexpos ps in + let lval = Ast.LVAL_base (span ps apos bpos (Ast.BASE_ident ident)) in + let stmts = ctxt "slot, ident and init: init" (parse_init lval) ps in + (stmts, slot_auto, ident) + +(* + * We have no way to parse a single Ast.stmt; any incoming syntactic statement + * may desugar to N>1 real Ast.stmts + *) + +and parse_stmts (ps:pstate) : Ast.stmt array = + let apos = lexpos ps in + match peek ps with + + LOG -> + bump ps; + let (stmts, atom) = ctxt "stmts: log value" parse_expr_atom ps in + expect ps SEMI; + spans ps stmts apos (Ast.STMT_log atom) + + | CHECK -> + bump ps; + begin + + let rec name_to_lval (bpos:pos) (name:Ast.name) + : Ast.lval = + match name with + Ast.NAME_base nb -> + Ast.LVAL_base (span ps apos bpos nb) + | Ast.NAME_ext (n, nc) -> + Ast.LVAL_ext (name_to_lval bpos n, Ast.COMP_named nc) + in + + let rec carg_path_to_lval (bpos:pos) (path:Ast.carg_path) + : Ast.lval = + match path with + Ast.CARG_base Ast.BASE_formal -> + raise (err "converting formal constraint-arg to atom" ps) + | Ast.CARG_base (Ast.BASE_named nb) -> + Ast.LVAL_base (span ps apos bpos nb) + | Ast.CARG_ext (pth, nc) -> + Ast.LVAL_ext (carg_path_to_lval bpos pth, + Ast.COMP_named nc) + in + + let carg_to_atom (bpos:pos) (carg:Ast.carg) + : Ast.atom = + match carg with + Ast.CARG_lit lit -> + Ast.ATOM_literal (span ps apos bpos lit) + | Ast.CARG_path pth -> + Ast.ATOM_lval (carg_path_to_lval bpos pth) + in + + let synthesise_check_call (bpos:pos) (constr:Ast.constr) + : (Ast.lval * (Ast.atom array)) = + let lval = name_to_lval bpos constr.Ast.constr_name in + let args = + Array.map (carg_to_atom bpos) constr.Ast.constr_args + in + (lval, args) + in + + let synthesise_check_calls (bpos:pos) (constrs:Ast.constrs) + : Ast.check_calls = + Array.map (synthesise_check_call bpos) constrs + in + + match peek ps with + LPAREN -> + bump ps; + let (stmts, expr) = + ctxt "stmts: check value" parse_expr ps + in + expect ps RPAREN; + expect ps SEMI; + spans ps stmts apos (Ast.STMT_check_expr expr) + + | IF -> + bump ps; + expect ps LPAREN; + let constrs = Pexp.parse_constrs ps in + expect ps RPAREN; + let block = parse_block ps in + let bpos = lexpos ps in + let calls = synthesise_check_calls bpos constrs in + [| span ps apos bpos + (Ast.STMT_check_if (constrs, calls, block)) + |] + + | _ -> + let constrs = Pexp.parse_constrs ps in + expect ps SEMI; + let bpos = lexpos ps in + let calls = synthesise_check_calls bpos constrs in + [| span ps apos bpos + (Ast.STMT_check (constrs, calls)) + |] + end + + | ALT -> + bump ps; + begin + match peek ps with + TYPE -> [| |] + | LPAREN -> + let (stmts, lval) = bracketed LPAREN RPAREN parse_lval ps in + let rec parse_pat ps = + match peek ps with + IDENT ident -> + let apos = lexpos ps in + bump ps; + let bpos = lexpos ps in + + (* TODO: nullary constructors *) + if peek ps != LPAREN then + let slot = + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = false; + Ast.slot_ty = None } + in + Ast.PAT_slot ((span ps apos bpos slot), ident) + else + let pats = + paren_comma_list parse_pat ps + in + Ast.PAT_tag (ident, pats) + | LIT_INT _ | LIT_CHAR _ | LIT_BOOL _ -> + Ast.PAT_lit (Pexp.parse_lit ps) + | UNDERSCORE -> bump ps; Ast.PAT_wild + | tok -> raise (Parse_err (ps, + "Expected pattern but found '" ^ + (string_of_tok tok) ^ "'")) + in + let rec parse_arms ps = + match peek ps with + CASE -> + bump ps; + let pat = bracketed LPAREN RPAREN parse_pat ps in + let block = parse_block ps in + let arm = (pat, block) in + (span ps apos (lexpos ps) arm)::(parse_arms ps) + | _ -> [] + in + let parse_alt_block ps = + let arms = ctxt "alt tag arms" parse_arms ps in + spans ps stmts apos begin + Ast.STMT_alt_tag { + Ast.alt_tag_lval = lval; + Ast.alt_tag_arms = Array.of_list arms + } + end + in + bracketed LBRACE RBRACE parse_alt_block ps + | _ -> [| |] + end + + | IF -> + let final_else = ref None in + let rec parse_stmt_if _ = + bump ps; + let (stmts, expr) = + ctxt "stmts: if cond" + (bracketed LPAREN RPAREN parse_expr) ps + in + let then_block = ctxt "stmts: if-then" parse_block ps in + begin + match peek ps with + ELSE -> + begin + bump ps; + match peek ps with + IF -> + let nested_if = parse_stmt_if () in + let bpos = lexpos ps in + final_else := + Some (span ps apos bpos nested_if) + | _ -> + final_else := + Some (ctxt "stmts: if-else" parse_block ps) + end + | _ -> () + end; + let res = + spans ps stmts apos + (Ast.STMT_if + { Ast.if_test = expr; + Ast.if_then = then_block; + Ast.if_else = !final_else; }) + in + final_else := None; + res + in + parse_stmt_if() + + | FOR -> + bump ps; + begin + match peek ps with + EACH -> + bump ps; + let inner ps : ((Ast.slot identified * Ast.ident) + * Ast.stmt array + * (Ast.lval * Ast.atom array)) = + let slot = (parse_identified_slot_and_ident true ps) in + let _ = (expect ps IN) in + let (stmts1, iter) = (rstr true parse_lval) ps in + let (stmts2, args) = + parse_expr_atom_list LPAREN RPAREN ps + in + (slot, Array.append stmts1 stmts2, (iter, args)) + in + let (slot, stmts, call) = ctxt "stmts: foreach head" + (bracketed LPAREN RPAREN inner) ps + in + let body_block = + ctxt "stmts: foreach body" parse_block ps + in + let bpos = lexpos ps in + let head_block = + (* + * Slightly weird, but we put an extra nesting level of + * block here to separate the part that lives in our frame + * (the iter slot) from the part that lives in the callee + * frame (the body block). + *) + span ps apos bpos [| + span ps apos bpos (Ast.STMT_block body_block); + |] + in + Array.append stmts + [| span ps apos bpos + (Ast.STMT_for_each + { Ast.for_each_slot = slot; + Ast.for_each_call = call; + Ast.for_each_head = head_block; + Ast.for_each_body = body_block; }) |] + | _ -> + let inner ps = + let slot = (parse_identified_slot_and_ident false ps) in + let _ = (expect ps IN) in + let lval = (parse_lval ps) in + (slot, lval) in + let (slot, seq) = + ctxt "stmts: for head" (bracketed LPAREN RPAREN inner) ps + in + let body_block = ctxt "stmts: for body" parse_block ps in + let bpos = lexpos ps in + [| span ps apos bpos + (Ast.STMT_for + { Ast.for_slot = slot; + Ast.for_seq = seq; + Ast.for_body = body_block; }) |] + end + + | WHILE -> + bump ps; + let (stmts, test) = + ctxt "stmts: while cond" (bracketed LPAREN RPAREN parse_expr) ps + in + let body_block = ctxt "stmts: while body" parse_block ps in + let bpos = lexpos ps in + [| span ps apos bpos + (Ast.STMT_while + { Ast.while_lval = (stmts, test); + Ast.while_body = body_block; }) |] + + | PUT -> + begin + bump ps; + match peek ps with + EACH -> + bump ps; + let (lstmts, lval) = + ctxt "put each: lval" (rstr true parse_lval) ps + in + let (astmts, args) = + ctxt "put each: args" + (parse_expr_atom_list LPAREN RPAREN) ps + in + let bpos = lexpos ps in + let be = + span ps apos bpos (Ast.STMT_put_each (lval, args)) + in + expect ps SEMI; + Array.concat [ lstmts; astmts; [| be |] ] + + | _ -> + begin + let (stmts, e) = + match peek ps with + SEMI -> (arr [], None) + | _ -> + let (stmts, expr) = + ctxt "stmts: put expr" parse_expr_atom ps + in + expect ps SEMI; + (stmts, Some expr) + in + spans ps stmts apos (Ast.STMT_put e) + end + end + + | RET -> + bump ps; + let (stmts, e) = + match peek ps with + SEMI -> (bump ps; (arr [], None)) + | _ -> + let (stmts, expr) = + ctxt "stmts: ret expr" parse_expr_atom ps + in + expect ps SEMI; + (stmts, Some expr) + in + spans ps stmts apos (Ast.STMT_ret e) + + | BE -> + bump ps; + let (lstmts, lval) = ctxt "be: lval" (rstr true parse_lval) ps in + let (astmts, args) = + ctxt "be: args" (parse_expr_atom_list LPAREN RPAREN) ps + in + let bpos = lexpos ps in + let be = span ps apos bpos (Ast.STMT_be (lval, args)) in + expect ps SEMI; + Array.concat [ lstmts; astmts; [| be |] ] + + | LBRACE -> [| ctxt "stmts: block" parse_block_stmt ps |] + + | LET -> + bump ps; + let (stmts, slot, ident) = + ctxt "stmt slot" parse_slot_and_ident_and_init ps in + let slot = Pexp.apply_mutability slot true in + let bpos = lexpos ps in + let decl = Ast.DECL_slot (Ast.KEY_ident ident, + (span ps apos bpos slot)) + in + Array.concat [[| span ps apos bpos (Ast.STMT_decl decl) |]; stmts] + + | AUTO -> + bump ps; + let (stmts, slot, ident) = + ctxt "stmt slot" parse_auto_slot_and_init ps in + let slot = Pexp.apply_mutability slot true in + let bpos = lexpos ps in + let decl = Ast.DECL_slot (Ast.KEY_ident ident, + (span ps apos bpos slot)) + in + Array.concat [[| span ps apos bpos (Ast.STMT_decl decl) |]; stmts] + + | YIELD -> + bump ps; + expect ps SEMI; + let bpos = lexpos ps in + [| span ps apos bpos Ast.STMT_yield |] + + | FAIL -> + bump ps; + expect ps SEMI; + let bpos = lexpos ps in + [| span ps apos bpos Ast.STMT_fail |] + + | JOIN -> + bump ps; + let (stmts, lval) = ctxt "stmts: task expr" parse_lval ps in + expect ps SEMI; + spans ps stmts apos (Ast.STMT_join lval) + + | MOD | OBJ | TYPE | FN | USE | NATIVE -> + let (ident, item) = ctxt "stmt: decl" parse_mod_item ps in + let decl = Ast.DECL_mod_item (ident, item) in + let stmts = expand_tags_to_stmts ps item in + spans ps stmts apos (Ast.STMT_decl decl) + + | _ -> + let (lstmts, lval) = ctxt "stmt: lval" parse_lval ps in + begin + match peek ps with + + SEMI -> (bump ps; lstmts) + + | EQ -> parse_init lval ps + + | OPEQ binop_token -> + bump ps; + let (stmts, rhs) = + ctxt "stmt: opeq rhs" parse_expr_atom ps + in + let binop = + match binop_token with + PLUS -> Ast.BINOP_add + | MINUS -> Ast.BINOP_sub + | STAR -> Ast.BINOP_mul + | SLASH -> Ast.BINOP_div + | PERCENT -> Ast.BINOP_mod + | AND -> Ast.BINOP_and + | OR -> Ast.BINOP_or + | CARET -> Ast.BINOP_xor + | LSL -> Ast.BINOP_lsl + | LSR -> Ast.BINOP_lsr + | ASR -> Ast.BINOP_asr + | _ -> raise (err "unknown opeq token" ps) + in + expect ps SEMI; + spans ps stmts apos + (Ast.STMT_copy_binop (lval, binop, rhs)) + + | LARROW -> + bump ps; + let (stmts, rhs) = ctxt "stmt: recv rhs" parse_lval ps in + let _ = expect ps SEMI in + spans ps stmts apos (Ast.STMT_recv (lval, rhs)) + + | SEND -> + bump ps; + let (stmts, rhs) = + ctxt "stmt: send rhs" parse_expr_atom ps + in + let _ = expect ps SEMI in + let bpos = lexpos ps in + let (src, copy) = match rhs with + Ast.ATOM_lval lv -> (lv, [| |]) + | _ -> + let (_, tmp, tempdecl) = + build_tmp ps slot_auto apos bpos + in + let copy = span ps apos bpos + (Ast.STMT_copy (tmp, Ast.EXPR_atom rhs)) in + ((clone_lval ps tmp), [| tempdecl; copy |]) + in + let send = + span ps apos bpos + (Ast.STMT_send (lval, src)) + in + Array.concat [ stmts; copy; [| send |] ] + + | _ -> raise (unexpected ps) + end + + +and parse_ty_param (iref:int ref) (ps:pstate) : Ast.ty_param identified = + let apos = lexpos ps in + let e = Pexp.parse_effect ps in + let ident = Pexp.parse_ident ps in + let i = !iref in + let bpos = lexpos ps in + incr iref; + span ps apos bpos (ident, (i, e)) + +and parse_ty_params (ps:pstate) + : (Ast.ty_param identified) array = + match peek ps with + LBRACKET -> + bracketed_zero_or_more LBRACKET RBRACKET (Some COMMA) + (parse_ty_param (ref 0)) ps + | _ -> arr [] + +and parse_ident_and_params (ps:pstate) (cstr:string) + : (Ast.ident * (Ast.ty_param identified) array) = + let ident = ctxt ("mod " ^ cstr ^ " item: ident") Pexp.parse_ident ps in + let params = + ctxt ("mod " ^ cstr ^ " item: type params") parse_ty_params ps + in + (ident, params) + +and parse_inputs + (ps:pstate) + : ((Ast.slot identified * Ast.ident) array * Ast.constrs) = + let slots = + match peek ps with + LPAREN -> ctxt "inputs: input idents and slots" + (parse_zero_or_more_identified_slot_ident_pairs true) ps + | _ -> raise (unexpected ps) + in + let constrs = + match peek ps with + COLON -> (bump ps; ctxt "inputs: constrs" Pexp.parse_constrs ps) + | _ -> [| |] + in + let rec rewrite_carg_path cp = + match cp with + Ast.CARG_base (Ast.BASE_named (Ast.BASE_ident ident)) -> + begin + let res = ref cp in + for i = 0 to (Array.length slots) - 1 + do + let (_, ident') = slots.(i) in + if ident' = ident + then res := Ast.CARG_ext (Ast.CARG_base Ast.BASE_formal, + Ast.COMP_idx i) + else () + done; + !res + end + | Ast.CARG_base _ -> cp + | Ast.CARG_ext (cp, ext) -> + Ast.CARG_ext (rewrite_carg_path cp, ext) + in + (* Rewrite constrs with input tuple as BASE_formal. *) + Array.iter + begin + fun constr -> + let args = constr.Ast.constr_args in + Array.iteri + begin + fun i carg -> + match carg with + Ast.CARG_path cp -> + args.(i) <- Ast.CARG_path (rewrite_carg_path cp) + | _ -> () + end + args + end + constrs; + (slots, constrs) + + +and parse_in_and_out + (ps:pstate) + : ((Ast.slot identified * Ast.ident) array + * Ast.constrs + * Ast.slot identified) = + let (inputs, constrs) = parse_inputs ps in + let output = + match peek ps with + RARROW -> + bump ps; + ctxt "fn in and out: output slot" + (Pexp.parse_identified_slot true) ps + | _ -> + let apos = lexpos ps in + span ps apos apos slot_nil + in + (inputs, constrs, output) + + +(* parse_fn starts at the first lparen of the sig. *) +and parse_fn + (is_iter:bool) + (effect:Ast.effect) + (ps:pstate) + : Ast.fn = + let (inputs, constrs, output) = + ctxt "fn: in_and_out" parse_in_and_out ps + in + let body = ctxt "fn: body" parse_block ps in + { Ast.fn_input_slots = inputs; + Ast.fn_input_constrs = constrs; + Ast.fn_output_slot = output; + Ast.fn_aux = { Ast.fn_effect = effect; + Ast.fn_is_iter = is_iter; }; + Ast.fn_body = body; } + +and parse_meta_input (ps:pstate) : (Ast.ident * string option) = + let lab = (ctxt "meta input: label" Pexp.parse_ident ps) in + match peek ps with + EQ -> + bump ps; + let v = + match peek ps with + UNDERSCORE -> bump ps; None + | LIT_STR s -> bump ps; Some s + | _ -> raise (unexpected ps) + in + (lab, v) + | _ -> raise (unexpected ps) + +and parse_meta_pat (ps:pstate) : Ast.meta_pat = + bracketed_zero_or_more LPAREN RPAREN + (Some COMMA) parse_meta_input ps + +and parse_meta (ps:pstate) : Ast.meta = + Array.map + begin + fun (id,v) -> + match v with + None -> + raise (err ("wildcard found in meta " + ^ "pattern where value expected") ps) + | Some v -> (id,v) + end + (parse_meta_pat ps) + +and parse_optional_meta_pat (ps:pstate) (ident:Ast.ident) : Ast.meta_pat = + match peek ps with + LPAREN -> parse_meta_pat ps + | _ -> [| ("name", Some ident) |] + + +and parse_obj_item + (ps:pstate) + (apos:pos) + (effect:Ast.effect) + : (Ast.ident * Ast.mod_item) = + expect ps OBJ; + let (ident, params) = parse_ident_and_params ps "obj" in + let (state, constrs) = (ctxt "obj state" parse_inputs ps) in + let drop = ref None in + expect ps LBRACE; + let fns = Hashtbl.create 0 in + while (not (peek ps = RBRACE)) + do + let apos = lexpos ps in + match peek ps with + IO | STATE | UNSAFE | FN | ITER -> + let effect = Pexp.parse_effect ps in + let is_iter = (peek ps) = ITER in + bump ps; + let ident = ctxt "obj fn: ident" Pexp.parse_ident ps in + let fn = ctxt "obj fn: fn" (parse_fn is_iter effect) ps in + let bpos = lexpos ps in + htab_put fns ident (span ps apos bpos fn) + | DROP -> + bump ps; + drop := Some (parse_block ps) + | RBRACE -> () + | _ -> raise (unexpected ps) + done; + expect ps RBRACE; + let bpos = lexpos ps in + let obj = { Ast.obj_state = state; + Ast.obj_effect = effect; + Ast.obj_constrs = constrs; + Ast.obj_fns = fns; + Ast.obj_drop = !drop } + in + (ident, + span ps apos bpos + (decl params (Ast.MOD_ITEM_obj obj))) + + +and parse_mod_item (ps:pstate) : (Ast.ident * Ast.mod_item) = + let apos = lexpos ps in + let parse_lib_name ident = + match peek ps with + EQ -> + begin + bump ps; + match peek ps with + LIT_STR s -> (bump ps; s) + | _ -> raise (unexpected ps) + end + | _ -> ps.pstate_infer_lib_name ident + in + + match peek ps with + + IO | STATE | UNSAFE | OBJ | FN | ITER -> + let effect = Pexp.parse_effect ps in + begin + match peek ps with + OBJ -> parse_obj_item ps apos effect + | _ -> + let is_iter = (peek ps) = ITER in + bump ps; + let (ident, params) = parse_ident_and_params ps "fn" in + let fn = + ctxt "mod fn item: fn" (parse_fn is_iter effect) ps + in + let bpos = lexpos ps in + (ident, + span ps apos bpos + (decl params (Ast.MOD_ITEM_fn fn))) + end + + | TYPE -> + bump ps; + let (ident, params) = parse_ident_and_params ps "type" in + let _ = expect ps EQ in + let ty = ctxt "mod type item: ty" Pexp.parse_ty ps in + let _ = expect ps SEMI in + let bpos = lexpos ps in + let item = Ast.MOD_ITEM_type ty in + (ident, span ps apos bpos (decl params item)) + + | MOD -> + bump ps; + let (ident, params) = parse_ident_and_params ps "mod" in + expect ps LBRACE; + let items = parse_mod_items ps RBRACE in + let bpos = lexpos ps in + (ident, + span ps apos bpos + (decl params (Ast.MOD_ITEM_mod items))) + + | NATIVE -> + begin + bump ps; + let conv = + match peek ps with + LIT_STR s -> + bump ps; + begin + match string_to_conv s with + None -> raise (unexpected ps) + | Some c -> c + end + | _ -> CONV_cdecl + in + expect ps MOD; + let (ident, params) = parse_ident_and_params ps "native mod" in + let path = parse_lib_name ident in + let items = parse_mod_items_from_signature ps in + let bpos = lexpos ps in + let rlib = REQUIRED_LIB_c { required_libname = path; + required_prefix = ps.pstate_depth } + in + let item = decl params (Ast.MOD_ITEM_mod items) in + let item = span ps apos bpos item in + note_required_mod ps {lo=apos; hi=bpos} conv rlib item; + (ident, item) + end + + | USE -> + begin + bump ps; + let ident = ctxt "use mod: ident" Pexp.parse_ident ps in + let meta = + ctxt "use mod: meta" parse_optional_meta_pat ps ident + in + let bpos = lexpos ps in + let id = (span ps apos bpos ()).id in + let (path, items) = + ps.pstate_get_mod meta id ps.pstate_node_id ps.pstate_opaque_id + in + let bpos = lexpos ps in + expect ps SEMI; + let rlib = + REQUIRED_LIB_rust { required_libname = path; + required_prefix = ps.pstate_depth } + in + iflog ps + begin + fun _ -> + log ps "extracted mod from %s (binding to %s)" + path ident; + log ps "%a" Ast.sprintf_mod_items items; + end; + let item = decl [||] (Ast.MOD_ITEM_mod (empty_view, items)) in + let item = span ps apos bpos item in + note_required_mod ps {lo=apos; hi=bpos} CONV_rust rlib item; + (ident, item) + end + + + + | _ -> raise (unexpected ps) + + +and parse_mod_items_from_signature + (ps:pstate) + : (Ast.mod_view * Ast.mod_items) = + let mis = Hashtbl.create 0 in + expect ps LBRACE; + while not (peek ps = RBRACE) + do + let (ident, mti) = ctxt "mod items from sig: mod item" + parse_mod_item_from_signature ps + in + Hashtbl.add mis ident mti; + done; + expect ps RBRACE; + (empty_view, mis) + + +and parse_mod_item_from_signature (ps:pstate) + : (Ast.ident * Ast.mod_item) = + let apos = lexpos ps in + match peek ps with + MOD -> + bump ps; + let (ident, params) = parse_ident_and_params ps "mod signature" in + let items = parse_mod_items_from_signature ps in + let bpos = lexpos ps in + (ident, span ps apos bpos (decl params (Ast.MOD_ITEM_mod items))) + + | IO | STATE | UNSAFE | FN | ITER -> + let effect = Pexp.parse_effect ps in + let is_iter = (peek ps) = ITER in + bump ps; + let (ident, params) = parse_ident_and_params ps "fn signature" in + let (inputs, constrs, output) = parse_in_and_out ps in + let bpos = lexpos ps in + let body = span ps apos bpos [| |] in + let fn = + Ast.MOD_ITEM_fn + { Ast.fn_input_slots = inputs; + Ast.fn_input_constrs = constrs; + Ast.fn_output_slot = output; + Ast.fn_aux = { Ast.fn_effect = effect; + Ast.fn_is_iter = is_iter; }; + Ast.fn_body = body; } + in + let node = span ps apos bpos (decl params fn) in + begin + match peek ps with + EQ -> + bump ps; + begin + match peek ps with + LIT_STR s -> + bump ps; + htab_put ps.pstate_required_syms node.id s + | _ -> raise (unexpected ps) + end; + | _ -> () + end; + expect ps SEMI; + (ident, node) + + | TYPE -> + bump ps; + let (ident, params) = parse_ident_and_params ps "type type" in + let t = + match peek ps with + SEMI -> Ast.TY_native (next_opaque_id ps) + | _ -> Pexp.parse_ty ps + in + expect ps SEMI; + let bpos = lexpos ps in + (ident, span ps apos bpos (decl params (Ast.MOD_ITEM_type t))) + + (* FIXME: parse obj. *) + | _ -> raise (unexpected ps) + + +and expand_tags + (ps:pstate) + (item:Ast.mod_item) + : (Ast.ident * Ast.mod_item) array = + let handle_ty_tag id ttag = + let tags = ref [] in + Hashtbl.iter + begin + fun name tup -> + let ident = match name with + Ast.NAME_base (Ast.BASE_ident ident) -> ident + | _ -> + raise (Parse_err + (ps, "unexpected name type while expanding tag")) + in + let header = + Array.map (fun slot -> (clone_span ps item slot)) tup + in + let tag_item' = Ast.MOD_ITEM_tag (header, ttag, id) in + let cloned_params = + Array.map (fun p -> clone_span ps p p.node) + item.node.Ast.decl_params + in + let tag_item = + clone_span ps item (decl cloned_params tag_item') + in + tags := (ident, tag_item) :: (!tags) + end + ttag; + arr (!tags) + in + let handle_ty_decl id tyd = + match tyd with + Ast.TY_tag ttag -> handle_ty_tag id ttag + | _ -> [| |] + in + match item.node.Ast.decl_item with + Ast.MOD_ITEM_type tyd -> handle_ty_decl item.id tyd + | _ -> [| |] + + +and expand_tags_to_stmts + (ps:pstate) + (item:Ast.mod_item) + : Ast.stmt array = + let id_items = expand_tags ps item in + Array.map + (fun (ident, tag_item) -> + clone_span ps item + (Ast.STMT_decl + (Ast.DECL_mod_item (ident, tag_item)))) + id_items + + +and expand_tags_to_items + (ps:pstate) + (item:Ast.mod_item) + (items:Ast.mod_items) + : unit = + let id_items = expand_tags ps item in + Array.iter + (fun (ident, item) -> htab_put items ident item) + id_items + + +and note_required_mod + (ps:pstate) + (sp:span) + (conv:nabi_conv) + (rlib:required_lib) + (item:Ast.mod_item) + : unit = + iflog ps + begin + fun _ -> log ps "marking item #%d as required" (int_of_node item.id) + end; + htab_put ps.pstate_required item.id (rlib, conv); + if not (Hashtbl.mem ps.pstate_sess.Session.sess_spans item.id) + then Hashtbl.add ps.pstate_sess.Session.sess_spans item.id sp; + match item.node.Ast.decl_item with + Ast.MOD_ITEM_mod (_, items) -> + Hashtbl.iter + begin + fun _ sub -> + note_required_mod ps sp conv rlib sub + end + items + | _ -> () + + +and parse_import + (ps:pstate) + (imports:(Ast.ident, Ast.name) Hashtbl.t) + : unit = + let import a n = + let a = match a with + None -> + begin + match n with + Ast.NAME_ext (_, Ast.COMP_ident i) + | Ast.NAME_ext (_, Ast.COMP_app (i, _)) + | Ast.NAME_base (Ast.BASE_ident i) + | Ast.NAME_base (Ast.BASE_app (i, _)) -> i + | _ -> raise (Parse_err (ps, "bad import specification")) + end + | Some i -> i + in + Hashtbl.add imports a n + in + match peek ps with + IDENT i -> + begin + bump ps; + match peek ps with + EQ -> + (* + * import x = ... + *) + bump ps; + import (Some i) (Pexp.parse_name ps) + | _ -> + (* + * import x... + *) + import None (Pexp.parse_name_ext ps + (Ast.NAME_base + (Ast.BASE_ident i))) + end + | _ -> + import None (Pexp.parse_name ps) + + +and parse_export + (ps:pstate) + (exports:(Ast.export, unit) Hashtbl.t) + : unit = + let e = + match peek ps with + STAR -> bump ps; Ast.EXPORT_all_decls + | IDENT i -> bump ps; Ast.EXPORT_ident i + | _ -> raise (unexpected ps) + in + Hashtbl.add exports e () + + +and parse_mod_items + (ps:pstate) + (terminal:token) + : (Ast.mod_view * Ast.mod_items) = + ps.pstate_depth <- ps.pstate_depth + 1; + let imports = Hashtbl.create 0 in + let exports = Hashtbl.create 0 in + let in_view = ref true in + let items = Hashtbl.create 4 in + while (not (peek ps = terminal)) + do + if !in_view + then + match peek ps with + IMPORT -> + bump ps; + parse_import ps imports; + expect ps SEMI; + | EXPORT -> + bump ps; + parse_export ps exports; + expect ps SEMI; + | _ -> + in_view := false + else + let (ident, item) = parse_mod_item ps in + htab_put items ident item; + expand_tags_to_items ps item items; + done; + if (Hashtbl.length exports) = 0 + then Hashtbl.add exports Ast.EXPORT_all_decls (); + expect ps terminal; + ps.pstate_depth <- ps.pstate_depth - 1; + let view = { Ast.view_imports = imports; + Ast.view_exports = exports } + in + (view, items) +;; + + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/lexer.mll b/src/boot/fe/lexer.mll new file mode 100644 index 00000000..fb4d58c5 --- /dev/null +++ b/src/boot/fe/lexer.mll @@ -0,0 +1,362 @@ + + +{ + + open Token;; + open Common;; + + exception Lex_err of (string * Common.pos);; + + let fail lexbuf s = + let p = lexbuf.Lexing.lex_start_p in + let pos = + (p.Lexing.pos_fname, + p.Lexing.pos_lnum , + (p.Lexing.pos_cnum) - (p.Lexing.pos_bol)) + in + raise (Lex_err (s, pos)) + ;; + + let bump_line p = { p with + Lexing.pos_lnum = p.Lexing.pos_lnum + 1; + Lexing.pos_bol = p.Lexing.pos_cnum } + ;; + + let keyword_table = Hashtbl.create 100 + let _ = + List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok) + [ ("mod", MOD); + ("use", USE); + ("meta", META); + ("auth", AUTH); + + ("syntax", SYNTAX); + + ("if", IF); + ("else", ELSE); + ("while", WHILE); + ("do", DO); + ("alt", ALT); + ("case", CASE); + + ("for", FOR); + ("each", EACH); + ("put", PUT); + ("ret", RET); + ("be", BE); + + ("fail", FAIL); + ("drop", DROP); + + ("type", TYPE); + ("check", CHECK); + ("claim", CLAIM); + ("prove", PROVE); + + ("io", IO); + ("state", STATE); + ("unsafe", UNSAFE); + + ("native", NATIVE); + ("mutable", MUTABLE); + ("auto", AUTO); + + ("fn", FN); + ("iter", ITER); + + ("import", IMPORT); + ("export", EXPORT); + + ("let", LET); + + ("log", LOG); + ("spawn", SPAWN); + ("thread", THREAD); + ("yield", YIELD); + ("join", JOIN); + + ("bool", BOOL); + + ("int", INT); + ("uint", UINT); + + ("char", CHAR); + ("str", STR); + + ("rec", REC); + ("tup", TUP); + ("tag", TAG); + ("vec", VEC); + ("any", ANY); + + ("obj", OBJ); + + ("port", PORT); + ("chan", CHAN); + + ("task", TASK); + + ("true", LIT_BOOL true); + ("false", LIT_BOOL false); + + ("in", IN); + + ("as", AS); + ("with", WITH); + + ("bind", BIND); + + ("u8", MACH TY_u8); + ("u16", MACH TY_u16); + ("u32", MACH TY_u32); + ("u64", MACH TY_u64); + ("i8", MACH TY_i8); + ("i16", MACH TY_i16); + ("i32", MACH TY_i32); + ("i64", MACH TY_i64); + ("f32", MACH TY_f32); + ("f64", MACH TY_f64) + ] +;; +} + +let hexdig = ['0'-'9' 'a'-'f' 'A'-'F'] +let bin = "0b" ['0' '1']['0' '1' '_']* +let hex = "0x" hexdig ['0'-'9' 'a'-'f' 'A'-'F' '_']* +let dec = ['0'-'9']+ +let exp = ['e''E']['-''+']? dec +let flo = (dec '.' dec (exp?)) | (dec exp) + +let ws = [ ' ' '\t' '\r' ] + +let id = ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']* + +rule token = parse + ws+ { token lexbuf } +| '\n' { lexbuf.Lexing.lex_curr_p + <- (bump_line lexbuf.Lexing.lex_curr_p); + token lexbuf } +| "//" [^'\n']* { token lexbuf } + +| '+' { PLUS } +| '-' { MINUS } +| '*' { STAR } +| '/' { SLASH } +| '%' { PERCENT } +| '=' { EQ } +| '<' { LT } +| "<=" { LE } +| "==" { EQEQ } +| "!=" { NE } +| ">=" { GE } +| '>' { GT } +| '!' { NOT } +| '&' { AND } +| "&&" { ANDAND } +| '|' { OR } +| "||" { OROR } +| "<<" { LSL } +| ">>" { LSR } +| ">>>" { ASR } +| '~' { TILDE } +| '{' { LBRACE } +| '_' (dec as n) { IDX (int_of_string n) } +| '_' { UNDERSCORE } +| '}' { RBRACE } + +| "+=" { OPEQ (PLUS) } +| "-=" { OPEQ (MINUS) } +| "*=" { OPEQ (STAR) } +| "/=" { OPEQ (SLASH) } +| "%=" { OPEQ (PERCENT) } +| "&=" { OPEQ (AND) } +| "|=" { OPEQ (OR) } +| "<<=" { OPEQ (LSL) } +| ">>=" { OPEQ (LSR) } +| ">>>=" { OPEQ (ASR) } +| "^=" { OPEQ (CARET) } + +| '#' { POUND } +| '@' { AT } +| '^' { CARET } +| '.' { DOT } +| ',' { COMMA } +| ';' { SEMI } +| ':' { COLON } +| "<-" { LARROW } +| "<|" { SEND } +| "->" { RARROW } +| '(' { LPAREN } +| ')' { RPAREN } +| '[' { LBRACKET } +| ']' { RBRACKET } + +| id as i + { try + Hashtbl.find keyword_table i + with + Not_found -> IDENT (i) + } + +| bin as n { LIT_INT (Int64.of_string n, n) } +| hex as n { LIT_INT (Int64.of_string n, n) } +| dec as n { LIT_INT (Int64.of_string n, n) } +| flo as n { LIT_FLO n } + +| '\'' { char lexbuf } +| '"' { let buf = Buffer.create 32 in + str buf lexbuf } + +| eof { EOF } + +and str buf = parse + _ as ch + { + match ch with + '"' -> LIT_STR (Buffer.contents buf) + | '\\' -> str_escape buf lexbuf + | _ -> + Buffer.add_char buf ch; + let c = Char.code ch in + if bounds 0 c 0x7f + then str buf lexbuf + else + if ((c land 0b1110_0000) == 0b1100_0000) + then ext_str 1 buf lexbuf + else + if ((c land 0b1111_0000) == 0b1110_0000) + then ext_str 2 buf lexbuf + else + if ((c land 0b1111_1000) == 0b1111_0000) + then ext_str 3 buf lexbuf + else + if ((c land 0b1111_1100) == 0b1111_1000) + then ext_str 4 buf lexbuf + else + if ((c land 0b1111_1110) == 0b1111_1100) + then ext_str 5 buf lexbuf + else fail lexbuf "bad initial utf-8 byte" + } + +and str_escape buf = parse + 'x' ((hexdig hexdig) as h) + | 'u' ((hexdig hexdig hexdig hexdig) as h) + | 'U' + ((hexdig hexdig hexdig hexdig + hexdig hexdig hexdig hexdig) as h) + { + Buffer.add_string buf (char_as_utf8 (int_of_string ("0x" ^ h))); + str buf lexbuf + } + | 'n' { Buffer.add_char buf '\n'; str buf lexbuf } + | 'r' { Buffer.add_char buf '\r'; str buf lexbuf } + | 't' { Buffer.add_char buf '\t'; str buf lexbuf } + | '\\' { Buffer.add_char buf '\\'; str buf lexbuf } + | '"' { Buffer.add_char buf '"'; str buf lexbuf } + | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) } + + +and ext_str n buf = parse + _ as ch + { + let c = Char.code ch in + if ((c land 0b1100_0000) == (0b1000_0000)) + then + begin + Buffer.add_char buf ch; + if n = 1 + then str buf lexbuf + else ext_str (n-1) buf lexbuf + end + else + fail lexbuf "bad trailing utf-8 byte" + } + + +and char = parse + '\\' { char_escape lexbuf } + | _ as c + { + let c = Char.code c in + if bounds 0 c 0x7f + then end_char c lexbuf + else + if ((c land 0b1110_0000) == 0b1100_0000) + then ext_char 1 (c land 0b0001_1111) lexbuf + else + if ((c land 0b1111_0000) == 0b1110_0000) + then ext_char 2 (c land 0b0000_1111) lexbuf + else + if ((c land 0b1111_1000) == 0b1111_0000) + then ext_char 3 (c land 0b0000_0111) lexbuf + else + if ((c land 0b1111_1100) == 0b1111_1000) + then ext_char 4 (c land 0b0000_0011) lexbuf + else + if ((c land 0b1111_1110) == 0b1111_1100) + then ext_char 5 (c land 0b0000_0001) lexbuf + else fail lexbuf "bad initial utf-8 byte" + } + +and char_escape = parse + 'x' ((hexdig hexdig) as h) + | 'u' ((hexdig hexdig hexdig hexdig) as h) + | 'U' + ((hexdig hexdig hexdig hexdig + hexdig hexdig hexdig hexdig) as h) + { + end_char (int_of_string ("0x" ^ h)) lexbuf + } + | 'n' { end_char (Char.code '\n') lexbuf } + | 'r' { end_char (Char.code '\r') lexbuf } + | 't' { end_char (Char.code '\t') lexbuf } + | '\\' { end_char (Char.code '\\') lexbuf } + | '\'' { end_char (Char.code '\'') lexbuf } + | _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) } + + +and ext_char n accum = parse + _ as c + { + let c = Char.code c in + if ((c land 0b1100_0000) == (0b1000_0000)) + then + let accum = (accum lsl 6) lor (c land 0b0011_1111) in + if n = 1 + then end_char accum lexbuf + else ext_char (n-1) accum lexbuf + else + fail lexbuf "bad trailing utf-8 byte" + } + +and end_char accum = parse + '\'' { LIT_CHAR accum } + + +and bracequote buf depth = parse + + '\\' '{' { Buffer.add_char buf '{'; + bracequote buf depth lexbuf } + +| '{' { Buffer.add_char buf '{'; + bracequote buf (depth+1) lexbuf } + +| '\\' '}' { Buffer.add_char buf '}'; + bracequote buf depth lexbuf } + +| '}' { if depth = 1 + then BRACEQUOTE (Buffer.contents buf) + else + begin + Buffer.add_char buf '}'; + bracequote buf (depth-1) lexbuf + end } + +| '\\' [^'{' '}'] { let s = Lexing.lexeme lexbuf in + Buffer.add_string buf s; + bracequote buf depth lexbuf } + + +| [^'\\' '{' '}']+ { let s = Lexing.lexeme lexbuf in + Buffer.add_string buf s; + bracequote buf depth lexbuf } diff --git a/src/boot/fe/parser.ml b/src/boot/fe/parser.ml new file mode 100644 index 00000000..3dda93ac --- /dev/null +++ b/src/boot/fe/parser.ml @@ -0,0 +1,374 @@ + +open Common;; +open Token;; + +(* Fundamental parser types and actions *) + +type get_mod_fn = (Ast.meta_pat + -> node_id + -> (node_id ref) + -> (opaque_id ref) + -> (filename * Ast.mod_items)) +;; + +type pstate = + { mutable pstate_peek : token; + mutable pstate_ctxt : (string * pos) list; + mutable pstate_rstr : bool; + mutable pstate_depth: int; + pstate_lexbuf : Lexing.lexbuf; + pstate_file : filename; + pstate_sess : Session.sess; + pstate_temp_id : temp_id ref; + pstate_node_id : node_id ref; + pstate_opaque_id : opaque_id ref; + pstate_get_mod : get_mod_fn; + pstate_infer_lib_name : (Ast.ident -> filename); + pstate_required : (node_id, (required_lib * nabi_conv)) Hashtbl.t; + pstate_required_syms : (node_id, string) Hashtbl.t; } +;; + +let log (ps:pstate) = Session.log "parse" + ps.pstate_sess.Session.sess_log_parse + ps.pstate_sess.Session.sess_log_out +;; + +let iflog ps thunk = + if ps.pstate_sess.Session.sess_log_parse + then thunk () + else () +;; + +let make_parser + (tref:temp_id ref) + (nref:node_id ref) + (oref:opaque_id ref) + (sess:Session.sess) + (get_mod:get_mod_fn) + (infer_lib_name:Ast.ident -> filename) + (required:(node_id, (required_lib * nabi_conv)) Hashtbl.t) + (required_syms:(node_id, string) Hashtbl.t) + (fname:string) + : pstate = + let lexbuf = Lexing.from_channel (open_in fname) in + let spos = { lexbuf.Lexing.lex_start_p with Lexing.pos_fname = fname } in + let cpos = { lexbuf.Lexing.lex_curr_p with Lexing.pos_fname = fname } in + lexbuf.Lexing.lex_start_p <- spos; + lexbuf.Lexing.lex_curr_p <- cpos; + let first = Lexer.token lexbuf in + let ps = + { pstate_peek = first; + pstate_ctxt = []; + pstate_rstr = false; + pstate_depth = 0; + pstate_lexbuf = lexbuf; + pstate_file = fname; + pstate_sess = sess; + pstate_temp_id = tref; + pstate_node_id = nref; + pstate_opaque_id = oref; + pstate_get_mod = get_mod; + pstate_infer_lib_name = infer_lib_name; + pstate_required = required; + pstate_required_syms = required_syms; } + in + iflog ps (fun _ -> log ps "made parser for: %s\n%!" fname); + ps +;; + +exception Parse_err of (pstate * string) +;; + +let lexpos (ps:pstate) : pos = + let p = ps.pstate_lexbuf.Lexing.lex_start_p in + (p.Lexing.pos_fname, + p.Lexing.pos_lnum , + (p.Lexing.pos_cnum) - (p.Lexing.pos_bol)) +;; + +let next_node_id (ps:pstate) : node_id = + let id = !(ps.pstate_node_id) in + ps.pstate_node_id := Node ((int_of_node id)+1); + id +;; + +let next_opaque_id (ps:pstate) : opaque_id = + let id = !(ps.pstate_opaque_id) in + ps.pstate_opaque_id := Opaque ((int_of_opaque id)+1); + id +;; + +let span + (ps:pstate) + (apos:pos) + (bpos:pos) + (x:'a) + : 'a identified = + let span = { lo = apos; hi = bpos } in + let id = next_node_id ps in + iflog ps (fun _ -> log ps "span for node #%d: %s" + (int_of_node id) (Session.string_of_span span)); + htab_put ps.pstate_sess.Session.sess_spans id span; + { node = x; id = id } +;; + +let decl p i = + { Ast.decl_params = p; + Ast.decl_item = i } +;; + +let spans + (ps:pstate) + (things:('a identified) array) + (apos:pos) + (thing:'a) + : ('a identified) array = + Array.append things [| (span ps apos (lexpos ps) thing) |] +;; + +(* + * The point of this is to make a new node_id entry for a node that is a + * "copy" of an lval returned from somewhere else. For example if you create + * a temp, the lval it returns can only be used in *one* place, for the + * node_id denotes the place that lval is first used; subsequent uses of + * 'the same' reference must clone_lval it into a new node_id. Otherwise + * there is trouble. + *) + +let clone_span + (ps:pstate) + (oldnode:'a identified) + (newthing:'b) + : 'b identified = + let s = Hashtbl.find ps.pstate_sess.Session.sess_spans oldnode.id in + span ps s.lo s.hi newthing +;; + +let rec clone_lval (ps:pstate) (lval:Ast.lval) : Ast.lval = + match lval with + Ast.LVAL_base nb -> + let nnb = clone_span ps nb nb.node in + Ast.LVAL_base nnb + | Ast.LVAL_ext (base, ext) -> + Ast.LVAL_ext ((clone_lval ps base), ext) +;; + +let clone_atom (ps:pstate) (atom:Ast.atom) : Ast.atom = + match atom with + Ast.ATOM_literal _ -> atom + | Ast.ATOM_lval lv -> Ast.ATOM_lval (clone_lval ps lv) +;; + +let ctxt (n:string) (f:pstate -> 'a) (ps:pstate) : 'a = + (ps.pstate_ctxt <- (n, lexpos ps) :: ps.pstate_ctxt; + let res = f ps in + ps.pstate_ctxt <- List.tl ps.pstate_ctxt; + res) +;; + +let rstr (r:bool) (f:pstate -> 'a) (ps:pstate) : 'a = + let prev = ps.pstate_rstr in + (ps.pstate_rstr <- r; + let res = f ps in + ps.pstate_rstr <- prev; + res) +;; + +let err (str:string) (ps:pstate) = + (Parse_err (ps, (str))) +;; + + +let (slot_nil:Ast.slot) = + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = false; + Ast.slot_ty = Some Ast.TY_nil } +;; + +let (slot_auto:Ast.slot) = + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = true; + Ast.slot_ty = None } +;; + +let build_tmp + (ps:pstate) + (slot:Ast.slot) + (apos:pos) + (bpos:pos) + : (temp_id * Ast.lval * Ast.stmt) = + let nonce = !(ps.pstate_temp_id) in + ps.pstate_temp_id := Temp ((int_of_temp nonce)+1); + iflog ps + (fun _ -> log ps "building temporary %d" (int_of_temp nonce)); + let decl = Ast.DECL_slot (Ast.KEY_temp nonce, (span ps apos bpos slot)) in + let declstmt = span ps apos bpos (Ast.STMT_decl decl) in + let tmp = Ast.LVAL_base (span ps apos bpos (Ast.BASE_temp nonce)) in + (nonce, tmp, declstmt) +;; + +(* Simple helpers *) + +(* FIXME: please rename these, they make eyes bleed. *) + +let arr (ls:'a list) : 'a array = Array.of_list ls ;; +let arl (ls:'a list) : 'a array = Array.of_list (List.rev ls) ;; +let arj (ar:('a array array)) = Array.concat (Array.to_list ar) ;; +let arj1st (pairs:(('a array) * 'b) array) : (('a array) * 'b array) = + let (az, bz) = List.split (Array.to_list pairs) in + (Array.concat az, Array.of_list bz) + + +(* Bottom-most parser actions. *) + +let peek (ps:pstate) : token = + iflog ps + begin + fun _ -> + log ps "peeking at: %s // %s" + (string_of_tok ps.pstate_peek) + (match ps.pstate_ctxt with + (s, _) :: _ -> s + | _ -> "<empty>") + end; + ps.pstate_peek +;; + + +let bump (ps:pstate) : unit = + begin + iflog ps (fun _ -> log ps "bumping past: %s" + (string_of_tok ps.pstate_peek)); + ps.pstate_peek <- Lexer.token ps.pstate_lexbuf + end +;; + +let bump_bracequote (ps:pstate) : unit = + begin + assert (ps.pstate_peek = LBRACE); + iflog ps (fun _ -> log ps "bumping past: %s" + (string_of_tok ps.pstate_peek)); + let buf = Buffer.create 32 in + ps.pstate_peek <- Lexer.bracequote buf 1 ps.pstate_lexbuf + end +;; + + +let expect (ps:pstate) (t:token) : unit = + let p = peek ps in + if p == t + then bump ps + else + let msg = ("Expected '" ^ (string_of_tok t) ^ + "', found '" ^ (string_of_tok p ) ^ "'") in + raise (Parse_err (ps, msg)) +;; + +let unexpected (ps:pstate) = + err ("Unexpected token '" ^ (string_of_tok (peek ps)) ^ "'") ps +;; + + + +(* Parser combinators. *) + +let one_or_more + (sep:token) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + let accum = ref [prule ps] in + while peek ps == sep + do + bump ps; + accum := (prule ps) :: !accum + done; + arl !accum +;; + +let bracketed_seq + (mandatory:int) + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + expect ps bra; + let accum = ref [] in + let dosep _ = + (match sepOpt with + None -> () + | Some tok -> + if (!accum = []) + then () + else expect ps tok) + in + while mandatory > List.length (!accum) do + dosep (); + accum := (prule ps) :: (!accum) + done; + while (not (peek ps = ket)) + do + dosep (); + accum := (prule ps) :: !accum + done; + expect ps ket; + arl !accum +;; + + +let bracketed_zero_or_more + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_seq 0 bra ket sepOpt (ctxt "bracketed_seq" prule) ps +;; + + +let paren_comma_list + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_zero_or_more LPAREN RPAREN (Some COMMA) prule ps +;; + +let bracketed_one_or_more + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_seq 1 bra ket sepOpt (ctxt "bracketed_seq" prule) ps +;; + +let bracketed_two_or_more + (bra:token) + (ket:token) + (sepOpt:token option) + (prule:pstate -> 'a) + (ps:pstate) + : 'a array = + bracketed_seq 2 bra ket sepOpt (ctxt "bracketed_seq" prule) ps +;; + + +let bracketed (bra:token) (ket:token) (prule:pstate -> 'a) (ps:pstate) : 'a = + expect ps bra; + let res = ctxt "bracketed" prule ps in + expect ps ket; + res +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/pexp.ml b/src/boot/fe/pexp.ml new file mode 100644 index 00000000..49eeeb5b --- /dev/null +++ b/src/boot/fe/pexp.ml @@ -0,0 +1,1354 @@ + +open Common;; +open Token;; +open Parser;; + +(* NB: pexps (parser-expressions) are only used transiently during + * parsing, static-evaluation and syntax-expansion. They're desugared + * into the general "item" AST and/or evaluated as part of the + * outermost "cexp" expressions. Expressions that can show up in source + * correspond to this loose grammar and have a wide-ish flexibility in + * *theoretical* composition; only subsets of those compositions are + * legal in various AST contexts. + * + * Desugaring on the fly is unfortunately complicated enough to require + * -- or at least "make much more convenient" -- this two-pass + * routine. + *) + +type pexp' = + PEXP_call of (pexp * pexp array) + | PEXP_spawn of (Ast.domain * pexp) + | PEXP_bind of (pexp * pexp option array) + | PEXP_rec of ((Ast.ident * pexp) array * pexp option) + | PEXP_tup of (pexp array) + | PEXP_vec of (Ast.slot * (pexp array)) + | PEXP_port + | PEXP_chan of (pexp option) + | PEXP_binop of (Ast.binop * pexp * pexp) + | PEXP_lazy_and of (pexp * pexp) + | PEXP_lazy_or of (pexp * pexp) + | PEXP_unop of (Ast.unop * pexp) + | PEXP_lval of plval + | PEXP_lit of Ast.lit + | PEXP_str of string + | PEXP_mutable of pexp + | PEXP_exterior of pexp + | PEXP_custom of Ast.name * (token array) * (string option) + +and plval = + PLVAL_ident of Ast.ident + | PLVAL_app of (Ast.ident * (Ast.ty array)) + | PLVAL_ext_name of (pexp * Ast.name_component) + | PLVAL_ext_pexp of (pexp * pexp) + +and pexp = pexp' Common.identified +;; + +(* Pexp grammar. Includes names, idents, types, constrs, binops and unops, + etc. *) + +let parse_ident (ps:pstate) : Ast.ident = + match peek ps with + IDENT id -> (bump ps; id) + (* Decay IDX tokens to identifiers if they occur ousdide name paths. *) + | IDX i -> (bump ps; string_of_tok (IDX i)) + | _ -> raise (unexpected ps) +;; + +(* Enforces the restricted pexp grammar when applicable (e.g. after "bind") *) +let check_rstr_start (ps:pstate) : 'a = + if (ps.pstate_rstr) then + match peek ps with + IDENT _ | LPAREN -> () + | _ -> raise (unexpected ps) +;; + +let rec parse_name_component (ps:pstate) : Ast.name_component = + match peek ps with + IDENT id -> + (bump ps; + match peek ps with + LBRACKET -> + let tys = + ctxt "name_component: apply" + (bracketed_one_or_more LBRACKET RBRACKET + (Some COMMA) parse_ty) ps + in + Ast.COMP_app (id, tys) + | _ -> Ast.COMP_ident id) + + | IDX i -> + bump ps; + Ast.COMP_idx i + | _ -> raise (unexpected ps) + +and parse_name_base (ps:pstate) : Ast.name_base = + match peek ps with + IDENT i -> + (bump ps; + match peek ps with + LBRACKET -> + let tys = + ctxt "name_base: apply" + (bracketed_one_or_more LBRACKET RBRACKET + (Some COMMA) parse_ty) ps + in + Ast.BASE_app (i, tys) + | _ -> Ast.BASE_ident i) + | _ -> raise (unexpected ps) + +and parse_name_ext (ps:pstate) (base:Ast.name) : Ast.name = + match peek ps with + DOT -> + bump ps; + let comps = one_or_more DOT parse_name_component ps in + Array.fold_left (fun x y -> Ast.NAME_ext (x, y)) base comps + | _ -> base + + +and parse_name (ps:pstate) : Ast.name = + let base = Ast.NAME_base (parse_name_base ps) in + let name = parse_name_ext ps base in + if Ast.sane_name name + then name + else raise (err "malformed name" ps) + +and parse_carg_base (ps:pstate) : Ast.carg_base = + match peek ps with + STAR -> bump ps; Ast.BASE_formal + | _ -> Ast.BASE_named (parse_name_base ps) + +and parse_carg (ps:pstate) : Ast.carg = + match peek ps with + IDENT _ -> + begin + let base = Ast.CARG_base (parse_carg_base ps) in + let path = + match peek ps with + DOT -> + bump ps; + let comps = one_or_more DOT parse_name_component ps in + Array.fold_left + (fun x y -> Ast.CARG_ext (x, y)) base comps + | _ -> base + in + Ast.CARG_path path + end + | _ -> + Ast.CARG_lit (parse_lit ps) + + +and parse_constraint (ps:pstate) : Ast.constr = + match peek ps with + + (* + * NB: A constraint *looks* a lot like an EXPR_call, but is restricted + * syntactically: the constraint name needs to be a name (not an lval) + * and the constraint args all need to be cargs, which are similar to + * names but can begin with the 'formal' base anchor '*'. + *) + + IDENT _ -> + let n = ctxt "constraint: name" parse_name ps in + let args = ctxt "constraint: args" + (bracketed_zero_or_more + LPAREN RPAREN (Some COMMA) + parse_carg) ps + in + { Ast.constr_name = n; + Ast.constr_args = args } + | _ -> raise (unexpected ps) + + +and parse_constrs (ps:pstate) : Ast.constrs = + ctxt "state: constraints" (one_or_more COMMA parse_constraint) ps + +and parse_optional_trailing_constrs (ps:pstate) : Ast.constrs = + match peek ps with + COLON -> (bump ps; parse_constrs ps) + | _ -> [| |] + +and parse_effect (ps:pstate) : Ast.effect = + match peek ps with + IO -> bump ps; Ast.IO + | STATE -> bump ps; Ast.STATE + | UNSAFE -> bump ps; Ast.UNSAFE + | _ -> Ast.PURE + +and parse_ty_fn + (effect:Ast.effect) + (ps:pstate) + : (Ast.ty_fn * Ast.ident option) = + match peek ps with + FN | ITER -> + let is_iter = (peek ps) = ITER in + bump ps; + let ident = + match peek ps with + IDENT i -> bump ps; Some i + | _ -> None + in + let in_slots = + match peek ps with + _ -> + bracketed_zero_or_more LPAREN RPAREN (Some COMMA) + (parse_slot_and_optional_ignored_ident true) ps + in + let out_slot = + match peek ps with + RARROW -> (bump ps; parse_slot false ps) + | _ -> slot_nil + in + let constrs = parse_optional_trailing_constrs ps in + let tsig = { Ast.sig_input_slots = in_slots; + Ast.sig_input_constrs = constrs; + Ast.sig_output_slot = out_slot; } + in + let taux = { Ast.fn_effect = effect; + Ast.fn_is_iter = is_iter; } + in + let tfn = (tsig, taux) in + (tfn, ident) + + | _ -> raise (unexpected ps) + +and check_dup_rec_labels ps labels = + arr_check_dups labels + (fun l _ -> + raise (err (Printf.sprintf + "duplicate record label: %s" l) ps)); + + +and parse_atomic_ty (ps:pstate) : Ast.ty = + match peek ps with + + BOOL -> + bump ps; + Ast.TY_bool + + | INT -> + bump ps; + Ast.TY_int + + | UINT -> + bump ps; + Ast.TY_uint + + | CHAR -> + bump ps; + Ast.TY_char + + | STR -> + bump ps; + Ast.TY_str + + | ANY -> + bump ps; + Ast.TY_any + + | TASK -> + bump ps; + Ast.TY_task + + | CHAN -> + bump ps; + Ast.TY_chan (bracketed LBRACKET RBRACKET parse_ty ps) + + | PORT -> + bump ps; + Ast.TY_port (bracketed LBRACKET RBRACKET parse_ty ps) + + | VEC -> + bump ps; + Ast.TY_vec (bracketed LBRACKET RBRACKET (parse_slot false) ps) + + | IDENT _ -> Ast.TY_named (parse_name ps) + + + | TAG -> + bump ps; + let htab = Hashtbl.create 4 in + let parse_tag_entry ps = + let ident = parse_ident ps in + let tup = + match peek ps with + LPAREN -> paren_comma_list (parse_slot false) ps + | _ -> raise (err "tag variant missing argument list" ps) + in + htab_put htab (Ast.NAME_base (Ast.BASE_ident ident)) tup + in + let _ = + bracketed_one_or_more LPAREN RPAREN + (Some COMMA) (ctxt "tag: variant" parse_tag_entry) ps + in + Ast.TY_tag htab + + | REC -> + bump ps; + let parse_rec_entry ps = + let mut = parse_mutability ps in + let (slot, ident) = parse_slot_and_ident false ps in + (ident, apply_mutability slot mut) + in + let entries = paren_comma_list parse_rec_entry ps in + let labels = Array.map (fun (l, _) -> l) entries in + begin + check_dup_rec_labels ps labels; + Ast.TY_rec entries + end + + | TUP -> + bump ps; + let slots = paren_comma_list (parse_slot false) ps in + Ast.TY_tup slots + + | MACH m -> + bump ps; + Ast.TY_mach m + + | IO | STATE | UNSAFE | OBJ | FN | ITER -> + let effect = parse_effect ps in + begin + match peek ps with + OBJ -> + bump ps; + let methods = Hashtbl.create 0 in + let parse_method ps = + let effect = parse_effect ps in + let (tfn, ident) = parse_ty_fn effect ps in + expect ps SEMI; + match ident with + None -> + raise (err (Printf.sprintf + "missing method identifier") ps) + | Some i -> htab_put methods i tfn + in + ignore (bracketed_zero_or_more LBRACE RBRACE + None parse_method ps); + Ast.TY_obj (effect, methods) + + | FN | ITER -> + Ast.TY_fn (fst (parse_ty_fn effect ps)) + | _ -> raise (unexpected ps) + end + + | LPAREN -> + begin + bump ps; + match peek ps with + RPAREN -> + bump ps; + Ast.TY_nil + | _ -> + let t = parse_ty ps in + expect ps RPAREN; + t + end + + | _ -> raise (unexpected ps) + +and flag (ps:pstate) (tok:token) : bool = + if peek ps = tok + then (bump ps; true) + else false + +and parse_mutability (ps:pstate) : bool = + flag ps MUTABLE + +and apply_mutability (slot:Ast.slot) (mut:bool) : Ast.slot = + { slot with Ast.slot_mutable = mut } + +and parse_slot (aliases_ok:bool) (ps:pstate) : Ast.slot = + let mut = parse_mutability ps in + let mode = + match (peek ps, aliases_ok) with + (AT, _) -> bump ps; Ast.MODE_exterior + | (AND, true) -> bump ps; Ast.MODE_alias + | (AND, false) -> raise (err "alias slot in prohibited context" ps) + | _ -> Ast.MODE_interior + in + let ty = parse_ty ps in + { Ast.slot_mode = mode; + Ast.slot_mutable = mut; + Ast.slot_ty = Some ty } + +and parse_slot_and_ident + (aliases_ok:bool) + (ps:pstate) + : (Ast.slot * Ast.ident) = + let slot = ctxt "slot and ident: slot" (parse_slot aliases_ok) ps in + let ident = ctxt "slot and ident: ident" parse_ident ps in + (slot, ident) + +and parse_slot_and_optional_ignored_ident + (aliases_ok:bool) + (ps:pstate) + : Ast.slot = + let slot = parse_slot aliases_ok ps in + begin + match peek ps with + IDENT _ -> bump ps + | _ -> () + end; + slot + +and parse_identified_slot + (aliases_ok:bool) + (ps:pstate) + : Ast.slot identified = + let apos = lexpos ps in + let slot = parse_slot aliases_ok ps in + let bpos = lexpos ps in + span ps apos bpos slot + +and parse_constrained_ty (ps:pstate) : Ast.ty = + let base = ctxt "ty: base" parse_atomic_ty ps in + match peek ps with + COLON -> + bump ps; + let constrs = ctxt "ty: constrs" parse_constrs ps in + Ast.TY_constrained (base, constrs) + + | _ -> base + +and parse_ty (ps:pstate) : Ast.ty = + parse_constrained_ty ps + + +and parse_rec_input (ps:pstate) : (Ast.ident * pexp) = + let lab = (ctxt "rec input: label" parse_ident ps) in + match peek ps with + EQ -> + bump ps; + let pexp = ctxt "rec input: expr" parse_pexp ps in + (lab, pexp) + | _ -> raise (unexpected ps) + + +and parse_rec_body (ps:pstate) : pexp' = (*((Ast.ident * pexp) array) =*) + begin + expect ps LPAREN; + match peek ps with + RPAREN -> PEXP_rec ([||], None) + | WITH -> raise (err "empty record extension" ps) + | _ -> + let inputs = one_or_more COMMA parse_rec_input ps in + let labels = Array.map (fun (l, _) -> l) inputs in + begin + check_dup_rec_labels ps labels; + match peek ps with + RPAREN -> (bump ps; PEXP_rec (inputs, None)) + | WITH -> + begin + bump ps; + let base = + ctxt "rec input: extension base" + parse_pexp ps + in + expect ps RPAREN; + PEXP_rec (inputs, Some base) + end + | _ -> raise (err "expected 'with' or ')'" ps) + end + end + + +and parse_lit (ps:pstate) : Ast.lit = + match peek ps with + LIT_INT (n,s) -> (bump ps; Ast.LIT_int (n,s)) + | LIT_CHAR c -> (bump ps; Ast.LIT_char c) + | LIT_BOOL b -> (bump ps; Ast.LIT_bool b) + | _ -> raise (unexpected ps) + + +and parse_bottom_pexp (ps:pstate) : pexp = + check_rstr_start ps; + let apos = lexpos ps in + match peek ps with + + MUTABLE -> + bump ps; + let inner = parse_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_mutable inner) + + | AT -> + bump ps; + let inner = parse_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_exterior inner) + + | TUP -> + bump ps; + let pexps = ctxt "paren pexps(s)" (rstr false parse_pexp_list) ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_tup pexps) + + | REC -> + bump ps; + let body = ctxt "rec pexp: rec body" parse_rec_body ps in + let bpos = lexpos ps in + span ps apos bpos body + + | VEC -> + bump ps; + begin + let slot = + match peek ps with + LBRACKET -> bracketed LBRACKET RBRACKET (parse_slot false) ps + | _ -> { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = false; + Ast.slot_ty = None } + in + let pexps = ctxt "vec pexp: exprs" parse_pexp_list ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_vec (slot, pexps)) + end + + + | LIT_STR s -> + bump ps; + let bpos = lexpos ps in + span ps apos bpos (PEXP_str s) + + | PORT -> + begin + bump ps; + expect ps LPAREN; + expect ps RPAREN; + let bpos = lexpos ps in + span ps apos bpos (PEXP_port) + end + + | CHAN -> + begin + bump ps; + let port = + match peek ps with + LPAREN -> + begin + bump ps; + match peek ps with + RPAREN -> (bump ps; None) + | _ -> + let lv = parse_pexp ps in + expect ps RPAREN; + Some lv + end + | _ -> raise (unexpected ps) + in + let bpos = lexpos ps in + span ps apos bpos (PEXP_chan port) + end + + | SPAWN -> + bump ps; + let domain = + match peek ps with + THREAD -> bump ps; Ast.DOMAIN_thread + | _ -> Ast.DOMAIN_local + in + let pexp = ctxt "spawn [domain] pexp: init call" parse_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_spawn (domain, pexp)) + + | BIND -> + let apos = lexpos ps in + begin + bump ps; + let pexp = ctxt "bind pexp: function" (rstr true parse_pexp) ps in + let args = + ctxt "bind args" + (paren_comma_list parse_bind_arg) ps + in + let bpos = lexpos ps in + span ps apos bpos (PEXP_bind (pexp, args)) + end + + | IDENT i -> + begin + bump ps; + match peek ps with + LBRACKET -> + begin + let tys = + ctxt "apply-type expr" + (bracketed_one_or_more LBRACKET RBRACKET + (Some COMMA) parse_ty) ps + in + let bpos = lexpos ps in + span ps apos bpos (PEXP_lval (PLVAL_app (i, tys))) + end + + | _ -> + begin + let bpos = lexpos ps in + span ps apos bpos (PEXP_lval (PLVAL_ident i)) + end + end + + | (INT | UINT | CHAR | BOOL) as tok -> + begin + bump ps; + expect ps LPAREN; + match peek ps with + (LIT_INT _ | LIT_CHAR _ | LIT_BOOL _) as tok2 -> + bump ps; + expect ps RPAREN; + let i = match tok2 with + LIT_INT i -> i + | LIT_CHAR c -> (Int64.of_int c, + Common.escaped_char c) + | LIT_BOOL b -> if b then (1L, "1") else (0L, "0") + | _ -> bug () "expected int/char literal" + in + let bpos = lexpos ps in + span ps apos bpos + (PEXP_lit + (match tok with + INT -> Ast.LIT_int i + | UINT -> Ast.LIT_uint i + | CHAR -> + Ast.LIT_char + (Int64.to_int (fst i)) + | BOOL -> Ast.LIT_bool (fst i <> 0L) + | _ -> bug () "expected int/uint/char/bool token")) + + | _ -> + let pexp = parse_pexp ps in + expect ps RPAREN; + let bpos = lexpos ps in + let t = + match tok with + INT -> Ast.TY_int + | UINT -> Ast.TY_uint + | CHAR -> Ast.TY_char + | BOOL -> Ast.TY_bool + | _ -> bug () "expected int/uint/char/bool token" + in + let t = span ps apos bpos t in + span ps apos bpos + (PEXP_unop ((Ast.UNOP_cast t), pexp)) + end + + | MACH m -> + let literal (num, str) = + let _ = bump ps in + let _ = expect ps RPAREN in + let bpos = lexpos ps in + let check_range (lo:int64) (hi:int64) : unit = + if (num < lo) or (num > hi) + then raise (err (Printf.sprintf + "integral literal %Ld out of range [%Ld,%Ld]" + num lo hi) ps) + else () + in + begin + match m with + TY_u8 -> check_range 0L 0xffL + | TY_u16 -> check_range 0L 0xffffL + | TY_u32 -> check_range 0L 0xffffffffL + (* | TY_u64 -> ... *) + | TY_i8 -> check_range (-128L) 127L + | TY_i16 -> check_range (-32768L) 32767L + | TY_i32 -> check_range (-2147483648L) 2147483647L + (* + | TY_i64 -> ... + | TY_f32 -> ... + | TY_f64 -> ... + *) + | _ -> () + end; + span ps apos bpos + (PEXP_lit + (Ast.LIT_mach + (m, num, str))) + + in + begin + bump ps; + expect ps LPAREN; + match peek ps with + LIT_INT (n,s) -> literal (n,s) + | MINUS -> + begin + bump ps; + match peek ps with + LIT_INT (n,s) -> + literal (Int64.neg n, "-" ^ s) + | _ -> raise (unexpected ps) + end + | _ -> + let pexp = parse_pexp ps in + expect ps RPAREN; + let bpos = lexpos ps in + let t = span ps apos bpos (Ast.TY_mach m) in + span ps apos bpos + (PEXP_unop ((Ast.UNOP_cast t), pexp)) + end + + | POUND -> + bump ps; + let name = parse_name ps in + let toks = + match peek ps with + LPAREN -> + bump ps; + let toks = Queue.create () in + while (peek ps) <> RPAREN + do + Queue.add (peek ps) toks; + bump ps; + done; + expect ps RPAREN; + queue_to_arr toks + | _ -> [| |] + in + let str = + match peek ps with + LBRACE -> + begin + bump_bracequote ps; + match peek ps with + BRACEQUOTE s -> bump ps; Some s + | _ -> raise (unexpected ps) + end + | _ -> None + in + let bpos = lexpos ps in + span ps apos bpos + (PEXP_custom (name, toks, str)) + + | LPAREN -> + begin + bump ps; + match peek ps with + RPAREN -> + bump ps; + let bpos = lexpos ps in + span ps apos bpos (PEXP_lit Ast.LIT_nil) + | _ -> + let pexp = parse_pexp ps in + expect ps RPAREN; + pexp + end + + | _ -> + let lit = parse_lit ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_lit lit) + + +and parse_bind_arg (ps:pstate) : pexp option = + match peek ps with + UNDERSCORE -> (bump ps; None) + | _ -> Some (parse_pexp ps) + + +and parse_ext_pexp (ps:pstate) (pexp:pexp) : pexp = + let apos = lexpos ps in + match peek ps with + LPAREN -> + if ps.pstate_rstr + then pexp + else + let args = parse_pexp_list ps in + let bpos = lexpos ps in + let ext = span ps apos bpos (PEXP_call (pexp, args)) in + parse_ext_pexp ps ext + + | DOT -> + begin + bump ps; + let ext = + match peek ps with + LPAREN -> + bump ps; + let rhs = rstr false parse_pexp ps in + expect ps RPAREN; + let bpos = lexpos ps in + span ps apos bpos + (PEXP_lval (PLVAL_ext_pexp (pexp, rhs))) + | _ -> + let rhs = parse_name_component ps in + let bpos = lexpos ps in + span ps apos bpos + (PEXP_lval (PLVAL_ext_name (pexp, rhs))) + in + parse_ext_pexp ps ext + end + + | _ -> pexp + + +and parse_negation_pexp (ps:pstate) : pexp = + let apos = lexpos ps in + match peek ps with + NOT -> + bump ps; + let rhs = ctxt "negation pexp" parse_negation_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_unop (Ast.UNOP_not, rhs)) + + | TILDE -> + bump ps; + let rhs = ctxt "negation pexp" parse_negation_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_unop (Ast.UNOP_bitnot, rhs)) + + | MINUS -> + bump ps; + let rhs = ctxt "negation pexp" parse_negation_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_unop (Ast.UNOP_neg, rhs)) + + | _ -> + let lhs = parse_bottom_pexp ps in + parse_ext_pexp ps lhs + + +(* Binops are all left-associative, *) +(* so we factor out some of the parsing code here. *) +and binop_rhs + (ps:pstate) + (name:string) + (apos:pos) + (lhs:pexp) + (rhs_parse_fn:pstate -> pexp) + (op:Ast.binop) + : pexp = + bump ps; + let rhs = (ctxt (name ^ " rhs") rhs_parse_fn ps) in + let bpos = lexpos ps in + span ps apos bpos (PEXP_binop (op, lhs, rhs)) + + +and parse_factor_pexp (ps:pstate) : pexp = + let name = "factor pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_negation_pexp ps in + match peek ps with + STAR -> binop_rhs ps name apos lhs parse_factor_pexp Ast.BINOP_mul + | SLASH -> binop_rhs ps name apos lhs parse_factor_pexp Ast.BINOP_div + | PERCENT -> binop_rhs ps name apos lhs parse_factor_pexp Ast.BINOP_mod + | _ -> lhs + + +and parse_term_pexp (ps:pstate) : pexp = + let name = "term pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_factor_pexp ps in + match peek ps with + PLUS -> binop_rhs ps name apos lhs parse_term_pexp Ast.BINOP_add + | MINUS -> binop_rhs ps name apos lhs parse_term_pexp Ast.BINOP_sub + | _ -> lhs + + +and parse_shift_pexp (ps:pstate) : pexp = + let name = "shift pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_term_pexp ps in + match peek ps with + LSL -> binop_rhs ps name apos lhs parse_shift_pexp Ast.BINOP_lsl + | LSR -> binop_rhs ps name apos lhs parse_shift_pexp Ast.BINOP_lsr + | ASR -> binop_rhs ps name apos lhs parse_shift_pexp Ast.BINOP_asr + | _ -> lhs + + +and parse_and_pexp (ps:pstate) : pexp = + let name = "and pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_shift_pexp ps in + match peek ps with + AND -> binop_rhs ps name apos lhs parse_and_pexp Ast.BINOP_and + | _ -> lhs + + +and parse_xor_pexp (ps:pstate) : pexp = + let name = "xor pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_and_pexp ps in + match peek ps with + CARET -> binop_rhs ps name apos lhs parse_xor_pexp Ast.BINOP_xor + | _ -> lhs + + +and parse_or_pexp (ps:pstate) : pexp = + let name = "or pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_xor_pexp ps in + match peek ps with + OR -> binop_rhs ps name apos lhs parse_or_pexp Ast.BINOP_or + | _ -> lhs + + +and parse_relational_pexp (ps:pstate) : pexp = + let name = "relational pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_or_pexp ps in + match peek ps with + LT -> binop_rhs ps name apos lhs parse_relational_pexp Ast.BINOP_lt + | LE -> binop_rhs ps name apos lhs parse_relational_pexp Ast.BINOP_le + | GE -> binop_rhs ps name apos lhs parse_relational_pexp Ast.BINOP_ge + | GT -> binop_rhs ps name apos lhs parse_relational_pexp Ast.BINOP_gt + | _ -> lhs + + +and parse_equality_pexp (ps:pstate) : pexp = + let name = "equality pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_relational_pexp ps in + match peek ps with + EQEQ -> binop_rhs ps name apos lhs parse_equality_pexp Ast.BINOP_eq + | NE -> binop_rhs ps name apos lhs parse_equality_pexp Ast.BINOP_ne + | _ -> lhs + + +and parse_andand_pexp (ps:pstate) : pexp = + let name = "andand pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_equality_pexp ps in + match peek ps with + ANDAND -> + bump ps; + let rhs = parse_andand_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_lazy_and (lhs, rhs)) + + | _ -> lhs + + +and parse_oror_pexp (ps:pstate) : pexp = + let name = "oror pexp" in + let apos = lexpos ps in + let lhs = ctxt (name ^ " lhs") parse_andand_pexp ps in + match peek ps with + OROR -> + bump ps; + let rhs = parse_oror_pexp ps in + let bpos = lexpos ps in + span ps apos bpos (PEXP_lazy_or (lhs, rhs)) + + | _ -> lhs + +and parse_as_pexp (ps:pstate) : pexp = + let apos = lexpos ps in + let pexp = ctxt "as pexp" parse_oror_pexp ps in + match peek ps with + AS -> + bump ps; + let tapos = lexpos ps in + let t = parse_ty ps in + let bpos = lexpos ps in + let t = span ps tapos bpos t in + span ps apos bpos + (PEXP_unop ((Ast.UNOP_cast t), pexp)) + + | _ -> pexp + +and parse_pexp (ps:pstate) : pexp = + parse_as_pexp ps + + +and parse_pexp_list (ps:pstate) : pexp array = + match peek ps with + LPAREN -> + bracketed_zero_or_more LPAREN RPAREN (Some COMMA) + (ctxt "pexp list" parse_pexp) ps + | _ -> raise (unexpected ps) + +;; + +(* + * FIXME: This is a crude approximation of the syntax-extension system, + * for purposes of prototyping and/or hard-wiring any extensions we + * wish to use in the bootstrap compiler. The eventual aim is to permit + * loading rust crates to process extensions, but this will likely + * require a rust-based frontend, or an ocaml-FFI-based connection to + * rust crates. At the moment we have neither. + *) + +let expand_pexp_custom + (ps:pstate) + (name:Ast.name) + (args:token array) + (body:string option) + : pexp' = + let nstr = Ast.fmt_to_str Ast.fmt_name name in + match (nstr, (Array.length args), body) with + + ("shell", 0, Some cmd) -> + let c = Unix.open_process_in cmd in + let b = Buffer.create 32 in + let rec r _ = + try + Buffer.add_char b (input_char c); + r () + with + End_of_file -> + ignore (Unix.close_process_in c); + Buffer.contents b + in + PEXP_str (r ()) + + | _ -> + raise (err ("unsupported syntax extension: " ^ nstr) ps) +;; + +(* + * Desugarings depend on context: + * + * - If a pexp is used on the RHS of an assignment, it's turned into + * an initialization statement such as STMT_init_rec or such. This + * removes the possibility of initializing into a temp only to + * copy out. If the topmost pexp in such a desugaring is an atom, + * unop or binop, of course, it will still just emit a STMT_copy + * on a primitive expression. + * + * - If a pexp is used in the context where an atom is required, a + * statement declaring a temporary and initializing it with the + * result of the pexp is prepended, and the temporary atom is used. + *) + +let rec desugar_lval (ps:pstate) (pexp:pexp) : (Ast.stmt array * Ast.lval) = + let s = Hashtbl.find ps.pstate_sess.Session.sess_spans pexp.id in + let (apos, bpos) = (s.lo, s.hi) in + match pexp.node with + + PEXP_lval (PLVAL_ident ident) -> + let nb = span ps apos bpos (Ast.BASE_ident ident) in + ([||], Ast.LVAL_base nb) + + | PEXP_lval (PLVAL_app (ident, tys)) -> + let nb = span ps apos bpos (Ast.BASE_app (ident, tys)) in + ([||], Ast.LVAL_base nb) + + | PEXP_lval (PLVAL_ext_name (base_pexp, comp)) -> + let (base_stmts, base_atom) = desugar_expr_atom ps base_pexp in + let base_lval = atom_lval ps base_atom in + (base_stmts, Ast.LVAL_ext (base_lval, Ast.COMP_named comp)) + + | PEXP_lval (PLVAL_ext_pexp (base_pexp, ext_pexp)) -> + let (base_stmts, base_atom) = desugar_expr_atom ps base_pexp in + let (ext_stmts, ext_atom) = desugar_expr_atom ps ext_pexp in + let base_lval = atom_lval ps base_atom in + (Array.append base_stmts ext_stmts, + Ast.LVAL_ext (base_lval, Ast.COMP_atom (clone_atom ps ext_atom))) + + | _ -> + let (stmts, atom) = desugar_expr_atom ps pexp in + (stmts, atom_lval ps atom) + + +and desugar_expr + (ps:pstate) + (pexp:pexp) + : (Ast.stmt array * Ast.expr) = + match pexp.node with + + PEXP_unop (op, pe) -> + let (stmts, at) = desugar_expr_atom ps pe in + (stmts, Ast.EXPR_unary (op, at)) + + | PEXP_binop (op, lhs, rhs) -> + let (lhs_stmts, lhs_atom) = desugar_expr_atom ps lhs in + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + (Array.append lhs_stmts rhs_stmts, + Ast.EXPR_binary (op, lhs_atom, rhs_atom)) + + | _ -> + let (stmts, at) = desugar_expr_atom ps pexp in + (stmts, Ast.EXPR_atom at) + + +and desugar_opt_expr_atom + (ps:pstate) + (po:pexp option) + : (Ast.stmt array * Ast.atom option) = + match po with + None -> ([| |], None) + | Some pexp -> + let (stmts, atom) = desugar_expr_atom ps pexp in + (stmts, Some atom) + + +and desugar_expr_atom + (ps:pstate) + (pexp:pexp) + : (Ast.stmt array * Ast.atom) = + let s = Hashtbl.find ps.pstate_sess.Session.sess_spans pexp.id in + let (apos, bpos) = (s.lo, s.hi) in + match pexp.node with + + PEXP_unop _ + | PEXP_binop _ + | PEXP_lazy_or _ + | PEXP_lazy_and _ + | PEXP_rec _ + | PEXP_tup _ + | PEXP_str _ + | PEXP_vec _ + | PEXP_port + | PEXP_chan _ + | PEXP_call _ + | PEXP_bind _ + | PEXP_spawn _ -> + let (_, tmp, decl_stmt) = build_tmp ps slot_auto apos bpos in + let stmts = desugar_expr_init ps tmp pexp in + (Array.append [| decl_stmt |] stmts, + Ast.ATOM_lval (clone_lval ps tmp)) + + | PEXP_lit lit -> + ([||], Ast.ATOM_literal (span ps apos bpos lit)) + + | PEXP_lval _ -> + let (stmts, lval) = desugar_lval ps pexp in + (stmts, Ast.ATOM_lval lval) + + | PEXP_exterior _ -> + raise (err "exterior symbol in atom context" ps) + + | PEXP_mutable _ -> + raise (err "mutable keyword in atom context" ps) + + | PEXP_custom (n, a, b) -> + desugar_expr_atom ps + { pexp with node = expand_pexp_custom ps n a b } + + +and desugar_expr_mode_mut_atom + (ps:pstate) + (pexp:pexp) + : (Ast.stmt array * (Ast.mode * bool * Ast.atom)) = + let desugar_inner mode mut e = + let (stmts, atom) = desugar_expr_atom ps e in + (stmts, (mode, mut, atom)) + in + match pexp.node with + PEXP_mutable {node=(PEXP_exterior e); id=_} -> + desugar_inner Ast.MODE_exterior true e + | PEXP_exterior e -> + desugar_inner Ast.MODE_exterior false e + | PEXP_mutable e -> + desugar_inner Ast.MODE_interior true e + | _ -> + desugar_inner Ast.MODE_interior false pexp + +and desugar_expr_atoms + (ps:pstate) + (pexps:pexp array) + : (Ast.stmt array * Ast.atom array) = + arj1st (Array.map (desugar_expr_atom ps) pexps) + +and desugar_opt_expr_atoms + (ps:pstate) + (pexps:pexp option array) + : (Ast.stmt array * Ast.atom option array) = + arj1st (Array.map (desugar_opt_expr_atom ps) pexps) + +and desugar_expr_mode_mut_atoms + (ps:pstate) + (pexps:pexp array) + : (Ast.stmt array * (Ast.mode * bool * Ast.atom) array) = + arj1st (Array.map (desugar_expr_mode_mut_atom ps) pexps) + +and desugar_expr_init + (ps:pstate) + (dst_lval:Ast.lval) + (pexp:pexp) + : (Ast.stmt array) = + let s = Hashtbl.find ps.pstate_sess.Session.sess_spans pexp.id in + let (apos, bpos) = (s.lo, s.hi) in + + (* Helpers. *) + let ss x = span ps apos bpos x in + let cp v = Ast.STMT_copy (clone_lval ps dst_lval, v) in + let aa x y = Array.append x y in + let ac xs = Array.concat xs in + + match pexp.node with + + PEXP_lit _ + | PEXP_lval _ -> + let (stmts, atom) = desugar_expr_atom ps pexp in + aa stmts [| ss (cp (Ast.EXPR_atom atom)) |] + + | PEXP_binop (op, lhs, rhs) -> + let (lhs_stmts, lhs_atom) = desugar_expr_atom ps lhs in + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + let copy_stmt = + ss (cp (Ast.EXPR_binary (op, lhs_atom, rhs_atom))) + in + ac [ lhs_stmts; rhs_stmts; [| copy_stmt |] ] + + (* x = a && b ==> if (a) { x = b; } else { x = false; } *) + + | PEXP_lazy_and (lhs, rhs) -> + let (lhs_stmts, lhs_atom) = desugar_expr_atom ps lhs in + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + let sthen = + ss (aa rhs_stmts [| ss (cp (Ast.EXPR_atom rhs_atom)) |]) + in + let selse = + ss [| ss (cp (Ast.EXPR_atom + (Ast.ATOM_literal (ss (Ast.LIT_bool false))))) |] + in + let sif = + ss (Ast.STMT_if { Ast.if_test = Ast.EXPR_atom lhs_atom; + Ast.if_then = sthen; + Ast.if_else = Some selse }) + in + aa lhs_stmts [| sif |] + + (* x = a || b ==> if (a) { x = true; } else { x = b; } *) + + | PEXP_lazy_or (lhs, rhs) -> + let (lhs_stmts, lhs_atom) = desugar_expr_atom ps lhs in + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + let sthen = + ss [| ss (cp (Ast.EXPR_atom + (Ast.ATOM_literal (ss (Ast.LIT_bool true))))) |] + in + let selse = + ss (aa rhs_stmts [| ss (cp (Ast.EXPR_atom rhs_atom)) |]) + in + let sif = + ss (Ast.STMT_if { Ast.if_test = Ast.EXPR_atom lhs_atom; + Ast.if_then = sthen; + Ast.if_else = Some selse }) + in + aa lhs_stmts [| sif |] + + + | PEXP_unop (op, rhs) -> + let (rhs_stmts, rhs_atom) = desugar_expr_atom ps rhs in + let expr = Ast.EXPR_unary (op, rhs_atom) in + let copy_stmt = ss (cp expr) in + aa rhs_stmts [| copy_stmt |] + + | PEXP_call (fn, args) -> + let (fn_stmts, fn_atom) = desugar_expr_atom ps fn in + let (arg_stmts, arg_atoms) = desugar_expr_atoms ps args in + let fn_lval = atom_lval ps fn_atom in + let call_stmt = ss (Ast.STMT_call (dst_lval, fn_lval, arg_atoms)) in + ac [ fn_stmts; arg_stmts; [| call_stmt |] ] + + | PEXP_bind (fn, args) -> + let (fn_stmts, fn_atom) = desugar_expr_atom ps fn in + let (arg_stmts, arg_atoms) = desugar_opt_expr_atoms ps args in + let fn_lval = atom_lval ps fn_atom in + let bind_stmt = ss (Ast.STMT_bind (dst_lval, fn_lval, arg_atoms)) in + ac [ fn_stmts; arg_stmts; [| bind_stmt |] ] + + | PEXP_spawn (domain, sub) -> + begin + match sub.node with + PEXP_call (fn, args) -> + let (fn_stmts, fn_atom) = desugar_expr_atom ps fn in + let (arg_stmts, arg_atoms) = desugar_expr_atoms ps args in + let fn_lval = atom_lval ps fn_atom in + let spawn_stmt = + ss (Ast.STMT_spawn (dst_lval, domain, fn_lval, arg_atoms)) + in + ac [ fn_stmts; arg_stmts; [| spawn_stmt |] ] + | _ -> raise (err "non-call spawn" ps) + end + + | PEXP_rec (args, base) -> + let (arg_stmts, entries) = + arj1st + begin + Array.map + begin + fun (ident, pexp) -> + let (stmts, (mode, mut, atom)) = + desugar_expr_mode_mut_atom ps pexp + in + (stmts, (ident, mode, mut, atom)) + end + args + end + in + begin + match base with + Some base -> + let (base_stmts, base_lval) = desugar_lval ps base in + let rec_stmt = + ss (Ast.STMT_init_rec + (dst_lval, entries, Some base_lval)) + in + ac [ arg_stmts; base_stmts; [| rec_stmt |] ] + | None -> + let rec_stmt = + ss (Ast.STMT_init_rec (dst_lval, entries, None)) + in + aa arg_stmts [| rec_stmt |] + end + + | PEXP_tup args -> + let (arg_stmts, arg_mode_atoms) = + desugar_expr_mode_mut_atoms ps args + in + let stmt = ss (Ast.STMT_init_tup (dst_lval, arg_mode_atoms)) in + aa arg_stmts [| stmt |] + + | PEXP_str s -> + let stmt = ss (Ast.STMT_init_str (dst_lval, s)) in + [| stmt |] + + | PEXP_vec (slot, args) -> + let (arg_stmts, arg_atoms) = desugar_expr_atoms ps args in + let stmt = ss (Ast.STMT_init_vec (dst_lval, slot, arg_atoms)) in + aa arg_stmts [| stmt |] + + | PEXP_port -> + [| ss (Ast.STMT_init_port dst_lval) |] + + | PEXP_chan pexp_opt -> + let (port_stmts, port_opt) = + match pexp_opt with + None -> ([||], None) + | Some port_pexp -> + begin + let (port_stmts, port_atom) = + desugar_expr_atom ps port_pexp + in + let port_lval = atom_lval ps port_atom in + (port_stmts, Some port_lval) + end + in + let chan_stmt = + ss + (Ast.STMT_init_chan (dst_lval, port_opt)) + in + aa port_stmts [| chan_stmt |] + + | PEXP_exterior _ -> + raise (err "exterior symbol in initialiser context" ps) + + | PEXP_mutable _ -> + raise (err "mutable keyword in initialiser context" ps) + + | PEXP_custom (n, a, b) -> + desugar_expr_init ps dst_lval + { pexp with node = expand_pexp_custom ps n a b } + + +and atom_lval (ps:pstate) (at:Ast.atom) : Ast.lval = + match at with + Ast.ATOM_lval lv -> lv + | Ast.ATOM_literal _ -> raise (err "literal where lval expected" ps) +;; + + + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/token.ml b/src/boot/fe/token.ml new file mode 100644 index 00000000..636e1ac2 --- /dev/null +++ b/src/boot/fe/token.ml @@ -0,0 +1,308 @@ +type token = + + (* Expression operator symbols *) + PLUS + | MINUS + | STAR + | SLASH + | PERCENT + | EQ + | LT + | LE + | EQEQ + | NE + | GE + | GT + | NOT + | TILDE + | CARET + | AND + | ANDAND + | OR + | OROR + | LSL + | LSR + | ASR + | OPEQ of token + | AS + | WITH + + (* Structural symbols *) + | AT + | DOT + | COMMA + | SEMI + | COLON + | RARROW + | SEND + | LARROW + | LPAREN + | RPAREN + | LBRACKET + | RBRACKET + | LBRACE + | RBRACE + + (* Module and crate keywords *) + | MOD + | USE + | AUTH + | META + + (* Metaprogramming keywords *) + | SYNTAX + | POUND + + (* Statement keywords *) + | IF + | ELSE + | DO + | WHILE + | ALT + | CASE + + | FAIL + | DROP + + | IN + | FOR + | EACH + | PUT + | RET + | BE + + (* Type and type-state keywords *) + | TYPE + | CHECK + | CLAIM + | PROVE + + (* Effect keywords *) + | IO + | STATE + | UNSAFE + + (* Type qualifiers *) + | NATIVE + | AUTO + | MUTABLE + + (* Name management *) + | IMPORT + | EXPORT + + (* Value / stmt declarators *) + | LET + + (* Magic runtime services *) + | LOG + | SPAWN + | BIND + | THREAD + | YIELD + | JOIN + + (* Literals *) + | LIT_INT of (int64 * string) + | LIT_FLO of string + | LIT_STR of string + | LIT_CHAR of int + | LIT_BOOL of bool + + (* Name components *) + | IDENT of string + | IDX of int + | UNDERSCORE + + (* Reserved type names *) + | BOOL + | INT + | UINT + | CHAR + | STR + | MACH of Common.ty_mach + + (* Algebraic type constructors *) + | REC + | TUP + | TAG + | VEC + | ANY + + (* Callable type constructors *) + | FN + | ITER + + (* Object type *) + | OBJ + + (* Comm and task types *) + | CHAN + | PORT + | TASK + + | EOF + + | BRACEQUOTE of string + +;; + +let rec string_of_tok t = + match t with + (* Operator symbols (mostly) *) + PLUS -> "+" + | MINUS -> "-" + | STAR -> "*" + | SLASH -> "/" + | PERCENT -> "%" + | EQ -> "=" + | LT -> "<" + | LE -> "<=" + | EQEQ -> "==" + | NE -> "!=" + | GE -> ">=" + | GT -> ">" + | TILDE -> "~" + | CARET -> "^" + | NOT -> "!" + | AND -> "&" + | ANDAND -> "&&" + | OR -> "|" + | OROR -> "||" + | LSL -> "<<" + | LSR -> ">>" + | ASR -> ">>>" + | OPEQ op -> string_of_tok op ^ "=" + | AS -> "as" + | WITH -> "with" + + (* Structural symbols *) + | AT -> "@" + | DOT -> "." + | COMMA -> "," + | SEMI -> ";" + | COLON -> ":" + | RARROW -> "->" + | SEND -> "<|" + | LARROW -> "<-" + | LPAREN -> "(" + | RPAREN -> ")" + | LBRACKET -> "[" + | RBRACKET -> "]" + | LBRACE -> "{" + | RBRACE -> "}" + + (* Module and crate keywords *) + | MOD -> "mod" + | USE -> "use" + | AUTH -> "auth" + + (* Metaprogramming keywords *) + | SYNTAX -> "syntax" + | META -> "meta" + | POUND -> "#" + + (* Control-flow keywords *) + | IF -> "if" + | ELSE -> "else" + | DO -> "do" + | WHILE -> "while" + | ALT -> "alt" + | CASE -> "case" + + | FAIL -> "fail" + | DROP -> "drop" + + | IN -> "in" + | FOR -> "for" + | EACH -> "each" + | PUT -> "put" + | RET -> "ret" + | BE -> "be" + + (* Type and type-state keywords *) + | TYPE -> "type" + | CHECK -> "check" + | CLAIM -> "claim" + | PROVE -> "prove" + + (* Effect keywords *) + | IO -> "io" + | STATE -> "state" + | UNSAFE -> "unsafe" + + (* Type qualifiers *) + | NATIVE -> "native" + | AUTO -> "auto" + | MUTABLE -> "mutable" + + (* Name management *) + | IMPORT -> "import" + | EXPORT -> "export" + + (* Value / stmt declarators. *) + | LET -> "let" + + (* Magic runtime services *) + | LOG -> "log" + | SPAWN -> "spawn" + | BIND -> "bind" + | THREAD -> "thread" + | YIELD -> "yield" + | JOIN -> "join" + + (* Literals *) + | LIT_INT (_,s) -> s + | LIT_FLO n -> n + | LIT_STR s -> ("\"" ^ (String.escaped s) ^ "\"") + | LIT_CHAR c -> ("'" ^ (Common.escaped_char c) ^ "'") + | LIT_BOOL b -> if b then "true" else "false" + + (* Name components *) + | IDENT s -> s + | IDX i -> ("_" ^ (string_of_int i)) + | UNDERSCORE -> "_" + + (* Reserved type names *) + | BOOL -> "bool" + | INT -> "int" + | UINT -> "uint" + | CHAR -> "char" + | STR -> "str" + | MACH m -> Common.string_of_ty_mach m + + (* Algebraic type constructors *) + | REC -> "rec" + | TUP -> "tup" + | TAG -> "tag" + | VEC -> "vec" + | ANY -> "any" + + (* Callable type constructors *) + | FN -> "fn" + | ITER -> "fn" + + (* Object type *) + | OBJ -> "obj" + + (* Ports and channels *) + | CHAN -> "chan" + | PORT -> "port" + + (* Taskess types *) + | TASK -> "task" + + | BRACEQUOTE _ -> "{...bracequote...}" + + | EOF -> "<EOF>" +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/llvm/llabi.ml b/src/boot/llvm/llabi.ml new file mode 100644 index 00000000..fd5d9277 --- /dev/null +++ b/src/boot/llvm/llabi.ml @@ -0,0 +1,69 @@ +(* + * LLVM integration with the Rust runtime. + *) + +type abi = { + crate_ty: Llvm.lltype; + task_ty: Llvm.lltype; + word_ty: Llvm.lltype; + rust_start: Llvm.llvalue; +};; + +let declare_abi (llctx:Llvm.llcontext) (llmod:Llvm.llmodule) : abi = + let i32 = Llvm.i32_type llctx in + + let crate_ty = + (* TODO: other architectures besides x86 *) + let crate_opaque_ty = Llvm.opaque_type llctx in + let crate_tyhandle = Llvm.handle_to_type (Llvm.struct_type llctx [| + i32; (* ptrdiff_t image_base_off *) + Llvm.pointer_type crate_opaque_ty;(* uintptr_t self_addr *) + i32; (* ptrdiff_t debug_abbrev_off *) + i32; (* size_t debug_abbrev_sz *) + i32; (* ptrdiff_t debug_info_off *) + i32; (* size_t debug_info_sz *) + i32; (* size_t activate_glue_off *) + i32; (* size_t main_exit_task_glue_off *) + i32; (* size_t unwind_glue_off *) + i32; (* size_t yield_glue_off *) + i32; (* int n_rust_syms *) + i32; (* int n_c_syms *) + i32 (* int n_libs *) + |]) + in + Llvm.refine_type crate_opaque_ty (Llvm.type_of_handle crate_tyhandle); + Llvm.type_of_handle crate_tyhandle + in + ignore (Llvm.define_type_name "rust_crate" crate_ty llmod); + + let task_ty = + (* TODO: other architectures besides x86 *) + Llvm.struct_type llctx [| + i32; (* size_t refcnt *) + Llvm.pointer_type i32; (* stk_seg *stk *) + Llvm.pointer_type i32; (* uintptr_t runtime_sp *) + Llvm.pointer_type i32; (* uintptr_t rust_sp *) + Llvm.pointer_type i32; (* rust_rt *rt *) + Llvm.pointer_type i32 (* rust_crate_cache *cache *) + |] + in + ignore (Llvm.define_type_name "rust_task" task_ty llmod); + + let rust_start_ty = + let task_ptr_ty = Llvm.pointer_type task_ty in + let llnilty = Llvm.array_type (Llvm.i1_type llctx) 0 in + let main_ty = Llvm.function_type (Llvm.void_type llctx) + [| Llvm.pointer_type llnilty; task_ptr_ty; |] + in + let args_ty = Array.map Llvm.pointer_type [| main_ty; crate_ty; |] in + let args_ty = Array.append args_ty [| i32; i32 |] in + Llvm.function_type i32 args_ty + in + { + crate_ty = crate_ty; + task_ty = task_ty; + word_ty = i32; + rust_start = Llvm.declare_function "rust_start" rust_start_ty llmod + } +;; + diff --git a/src/boot/llvm/llasm.ml b/src/boot/llvm/llasm.ml new file mode 100644 index 00000000..56448b07 --- /dev/null +++ b/src/boot/llvm/llasm.ml @@ -0,0 +1,192 @@ +(* + * machine-specific assembler routines. + *) + +open Common;; + +type asm_glue = + { + asm_activate_glue : Llvm.llvalue; + asm_yield_glue : Llvm.llvalue; + asm_upcall_glues : Llvm.llvalue array; + } +;; + +let n_upcall_glues = 7 +;; + +(* x86-specific asm. *) + +let x86_glue + (llctx:Llvm.llcontext) + (llmod:Llvm.llmodule) + (abi:Llabi.abi) + (sess:Session.sess) + : asm_glue = + let (prefix,align) = + match sess.Session.sess_targ with + Linux_x86_elf + | Win32_x86_pe -> ("",4) + | MacOS_x86_macho -> ("_", 16) + in + let save_callee_saves = + ["pushl %ebp"; + "pushl %edi"; + "pushl %esi"; + "pushl %ebx";] + in + let restore_callee_saves = + ["popl %ebx"; + "popl %esi"; + "popl %edi"; + "popl %ebp";] + in + let load_esp_from_rust_sp = ["movl 12(%edx), %esp"] in + let load_esp_from_runtime_sp = ["movl 8(%edx), %esp"] in + let store_esp_to_rust_sp = ["movl %esp, 12(%edx)"] in + let store_esp_to_runtime_sp = ["movl %esp, 8(%edx)"] in + let list_init i f = (Array.to_list (Array.init i f)) in + let list_init_concat i f = List.concat (list_init i f) in + + let glue = + [ + ("rust_activate_glue", + String.concat "\n\t" + (["movl 4(%esp), %edx # edx = rust_task"] + @ save_callee_saves + @ store_esp_to_runtime_sp + @ load_esp_from_rust_sp + (* + * This 'add' instruction is a bit surprising. + * See lengthy comment in boot/be/x86.ml activate_glue. + *) + @ ["addl $20, 12(%edx)"] + @ restore_callee_saves + @ ["ret"])); + + ("rust_yield_glue", + String.concat "\n\t" + + (["movl 0(%esp), %edx # edx = rust_task"] + @ load_esp_from_rust_sp + @ save_callee_saves + @ store_esp_to_rust_sp + @ load_esp_from_runtime_sp + @ restore_callee_saves + @ ["ret"])) + ] + @ list_init n_upcall_glues + begin + fun i -> + (* + * 0, 4, 8, 12 are callee-saves + * 16 is retpc + * 20 is taskptr + * 24 is callee + * 28 .. (7+i) * 4 are args + *) + + ((Printf.sprintf "rust_upcall_%d" i), + String.concat "\n\t" + (save_callee_saves + @ ["movl %esp, %ebp # ebp = rust_sp"; + "movl 20(%esp), %edx # edx = rust_task"] + @ store_esp_to_rust_sp + @ load_esp_from_runtime_sp + @ [Printf.sprintf + "subl $%d, %%esp # esp -= args" ((i+1)*4); + "andl $~0xf, %esp # align esp down"; + "movl %edx, (%esp) # arg[0] = rust_task "] + + @ (list_init_concat i + begin + fun j -> + [ Printf.sprintf "movl %d(%%ebp),%%edx" ((j+7)*4); + Printf.sprintf "movl %%edx,%d(%%esp)" ((j+1)*4) ] + end) + + @ ["movl 24(%ebp), %edx # edx = callee"; + "call *%edx # call *%edx"; + "movl 20(%ebp), %edx # edx = rust_task"] + @ load_esp_from_rust_sp + @ restore_callee_saves + @ ["ret"])) + end + in + + let _ = + Llvm.set_module_inline_asm llmod + begin + String.concat "\n" + begin + List.map + begin + fun (sym,asm) -> + Printf.sprintf + "\t.globl %s%s\n\t.balign %d\n%s%s:\n\t%s" + prefix sym align prefix sym asm + end + glue + end + end + in + + let decl_cdecl_fn name out_ty arg_tys = + let ty = Llvm.function_type out_ty arg_tys in + let fn = Llvm.declare_function name ty llmod in + Llvm.set_function_call_conv Llvm.CallConv.c fn; + fn + in + + let decl_glue s = + let task_ptr_ty = Llvm.pointer_type abi.Llabi.task_ty in + let void_ty = Llvm.void_type llctx in + decl_cdecl_fn s void_ty [| task_ptr_ty |] + in + + let decl_upcall n = + let task_ptr_ty = Llvm.pointer_type abi.Llabi.task_ty in + let word_ty = abi.Llabi.word_ty in + let callee_ty = word_ty in + let args_ty = + Array.append + [| task_ptr_ty; callee_ty |] + (Array.init n (fun _ -> word_ty)) + in + let name = Printf.sprintf "rust_upcall_%d" n in + decl_cdecl_fn name word_ty args_ty + in + { + asm_activate_glue = decl_glue "rust_activate_glue"; + asm_yield_glue = decl_glue "rust_yield_glue"; + asm_upcall_glues = Array.init n_upcall_glues decl_upcall; + } +;; + +(* x64-specific asm. *) +(* arm-specific asm. *) +(* ... *) + + +let get_glue + (llctx:Llvm.llcontext) + (llmod:Llvm.llmodule) + (abi:Llabi.abi) + (sess:Session.sess) + : asm_glue = + match sess.Session.sess_targ with + Linux_x86_elf + | Win32_x86_pe + | MacOS_x86_macho -> + x86_glue llctx llmod abi sess +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/llvm/llemit.ml b/src/boot/llvm/llemit.ml new file mode 100644 index 00000000..2b229fde --- /dev/null +++ b/src/boot/llvm/llemit.ml @@ -0,0 +1,36 @@ +(* + * LLVM emitter. + *) + +(* The top-level interface to the LLVM translation subsystem. *) +let trans_and_process_crate + (sess:Session.sess) + (sem_cx:Semant.ctxt) + (crate:Ast.crate) + : unit = + let llcontext = Llvm.create_context () in + let emit_file (llmod:Llvm.llmodule) : unit = + let filename = Session.filename_of sess.Session.sess_out in + if not (Llvm_bitwriter.write_bitcode_file llmod filename) + then raise (Failure ("failed to write the LLVM bitcode '" ^ filename + ^ "'")) + in + let llmod = Lltrans.trans_crate sem_cx llcontext sess crate in + begin + try + emit_file llmod + with e -> Llvm.dispose_module llmod; raise e + end; + Llvm.dispose_module llmod; + Llvm.dispose_context llcontext +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) + diff --git a/src/boot/llvm/llfinal.ml b/src/boot/llvm/llfinal.ml new file mode 100644 index 00000000..64ea3d37 --- /dev/null +++ b/src/boot/llvm/llfinal.ml @@ -0,0 +1,96 @@ +(* + * LLVM ABI-level stuff that needs to happen after modules have been + * translated. + *) + +let finalize_module + (llctx:Llvm.llcontext) + (llmod:Llvm.llmodule) + (abi:Llabi.abi) + (asm_glue:Llasm.asm_glue) + (exit_task_glue:Llvm.llvalue) + (crate_ptr:Llvm.llvalue) + : unit = + let i32 = Llvm.i32_type llctx in + + (* + * Count the number of Rust functions and the number of C functions by + * simply (and crudely) testing whether each function in the module begins + * with "_rust_". + *) + + let (rust_fn_count, c_fn_count) = + let count (rust_fn_count, c_fn_count) fn = + let begins_with prefix str = + let (str_len, prefix_len) = + (String.length str, String.length prefix) + in + prefix_len <= str_len && (String.sub str 0 prefix_len) = prefix + in + if begins_with "_rust_" (Llvm.value_name fn) then + (rust_fn_count + 1, c_fn_count) + else + (rust_fn_count, c_fn_count + 1) + in + Llvm.fold_left_functions count (0, 0) llmod + in + + let crate_val = + let crate_addr = Llvm.const_ptrtoint crate_ptr i32 in + let glue_off glue = + let addr = Llvm.const_ptrtoint glue i32 in + Llvm.const_sub addr crate_addr + in + let activate_glue_off = glue_off asm_glue.Llasm.asm_activate_glue in + let yield_glue_off = glue_off asm_glue.Llasm.asm_yield_glue in + let exit_task_glue_off = glue_off exit_task_glue in + + Llvm.const_struct llctx [| + Llvm.const_int i32 0; (* ptrdiff_t image_base_off *) + crate_ptr; (* uintptr_t self_addr *) + Llvm.const_int i32 0; (* ptrdiff_t debug_abbrev_off *) + Llvm.const_int i32 0; (* size_t debug_abbrev_sz *) + Llvm.const_int i32 0; (* ptrdiff_t debug_info_off *) + Llvm.const_int i32 0; (* size_t debug_info_sz *) + activate_glue_off; (* size_t activate_glue_off *) + exit_task_glue_off; (* size_t main_exit_task_glue_off *) + Llvm.const_int i32 0; (* size_t unwind_glue_off *) + yield_glue_off; (* size_t yield_glue_off *) + Llvm.const_int i32 rust_fn_count; (* int n_rust_syms *) + Llvm.const_int i32 c_fn_count; (* int n_c_syms *) + Llvm.const_int i32 0 (* int n_libs *) + |] + in + + Llvm.set_initializer crate_val crate_ptr; + + (* Define the main function for crt0 to call. *) + let main_fn = + let main_ty = Llvm.function_type i32 [| i32; i32 |] in + Llvm.define_function "main" main_ty llmod + in + let argc = Llvm.param main_fn 0 in + let argv = Llvm.param main_fn 1 in + let main_builder = Llvm.builder_at_end llctx (Llvm.entry_block main_fn) in + let rust_main_fn = + match Llvm.lookup_function "_rust_main" llmod with + None -> raise (Failure "no main function found") + | Some fn -> fn + in + let rust_start = abi.Llabi.rust_start in + let rust_start_args = [| rust_main_fn; crate_ptr; argc; argv |] in + ignore (Llvm.build_call + rust_start rust_start_args "start_rust" main_builder); + ignore (Llvm.build_ret (Llvm.const_int i32 0) main_builder) +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) + diff --git a/src/boot/llvm/lltrans.ml b/src/boot/llvm/lltrans.ml new file mode 100644 index 00000000..7f985d25 --- /dev/null +++ b/src/boot/llvm/lltrans.ml @@ -0,0 +1,938 @@ +(* + * LLVM translator. + *) + +open Common;; +open Transutil;; + +let log cx = Session.log "trans" + cx.Semant.ctxt_sess.Session.sess_log_trans + cx.Semant.ctxt_sess.Session.sess_log_out +;; + +let trans_crate + (sem_cx:Semant.ctxt) + (llctx:Llvm.llcontext) + (sess:Session.sess) + (crate:Ast.crate) + : Llvm.llmodule = + + let iflog thunk = + if sess.Session.sess_log_trans + then thunk () + else () + in + + (* Helpers for adding metadata. *) + let (dbg_mdkind:int) = Llvm.mdkind_id llctx "dbg" in + let set_dbg_metadata (inst:Llvm.llvalue) (md:Llvm.llvalue) : unit = + Llvm.set_metadata inst dbg_mdkind md + in + let md_str (s:string) : Llvm.llvalue = Llvm.mdstring llctx s in + let md_node (vals:Llvm.llvalue array) : Llvm.llvalue = + Llvm.mdnode llctx vals + in + let const_i32 (i:int) : Llvm.llvalue = + Llvm.const_int (Llvm.i32_type llctx) i + in + let const_i1 (i:int) : Llvm.llvalue = + Llvm.const_int (Llvm.i1_type llctx) i + in + let llvm_debug_version : int = 0x8 lsl 16 in + let const_dw_tag (tag:Dwarf.dw_tag) : Llvm.llvalue = + const_i32 (llvm_debug_version lor (Dwarf.dw_tag_to_int tag)) + in + + (* Translation of our node_ids into LLVM identifiers, which are strings. *) + let next_anon_llid = ref 0 in + let num_llid num klass = Printf.sprintf "%s%d" klass num in + let anon_llid klass = + let llid = num_llid !next_anon_llid klass in + next_anon_llid := !next_anon_llid + 1; + llid + in + let node_llid (node_id_opt:node_id option) : (string -> string) = + match node_id_opt with + None -> anon_llid + | Some (Node num) -> num_llid num + in + + (* + * Returns a bogus value for use in stub code that hasn't been implemented + * yet. + * + * TODO: On some joyous day, remove me. + *) + let bogus = Llvm.const_null (Llvm.i32_type llctx) in + let bogus_ptr = Llvm.const_null (Llvm.pointer_type (Llvm.i32_type llctx)) in + + let llnilty = Llvm.array_type (Llvm.i1_type llctx) 0 in + let llnil = Llvm.const_array (Llvm.i1_type llctx) [| |] in + + let ty_of_item = Hashtbl.find sem_cx.Semant.ctxt_all_item_types in + let ty_of_slot n = Semant.slot_ty (Semant.get_slot sem_cx n) in + + let filename = Session.filename_of sess.Session.sess_in in + let llmod = Llvm.create_module llctx filename in + + let (abi:Llabi.abi) = Llabi.declare_abi llctx llmod in + let (crate_ptr:Llvm.llvalue) = + Llvm.declare_global abi.Llabi.crate_ty "rust_crate" llmod + in + + let (void_ty:Llvm.lltype) = Llvm.void_type llctx in + let (word_ty:Llvm.lltype) = abi.Llabi.word_ty in + let (wordptr_ty:Llvm.lltype) = Llvm.pointer_type word_ty in + let (task_ty:Llvm.lltype) = abi.Llabi.task_ty in + let (task_ptr_ty:Llvm.lltype) = Llvm.pointer_type task_ty in + let fn_ty (out:Llvm.lltype) (args:Llvm.lltype array) : Llvm.lltype = + Llvm.function_type out args + in + + let imm (i:int64) : Llvm.llvalue = + Llvm.const_int word_ty (Int64.to_int i) + in + + let asm_glue = Llasm.get_glue llctx llmod abi sess in + + let llty_str llty = + Llvm.string_of_lltype llty + in + + let llval_str llv = + let ts = llty_str (Llvm.type_of llv) in + match Llvm.value_name llv with + "" -> + Printf.sprintf "<anon=%s>" ts + | s -> Printf.sprintf "<%s=%s>" s ts + in + + let llvals_str llvals = + (String.concat ", " + (Array.to_list + (Array.map llval_str llvals))) + in + + let build_call callee args rvid builder = + iflog + begin + fun _ -> + let name = Llvm.value_name callee in + log sem_cx "build_call: %s(%s)" name (llvals_str args); + log sem_cx "build_call: typeof(%s) = %s" + name (llty_str (Llvm.type_of callee)) + end; + Llvm.build_call callee args rvid builder + in + + (* Upcall translation *) + + let extern_upcalls = Hashtbl.create 0 in + let trans_upcall + (llbuilder:Llvm.llbuilder) + (lltask:Llvm.llvalue) + (name:string) + (lldest:Llvm.llvalue option) + (llargs:Llvm.llvalue array) = + let n = Array.length llargs in + let llglue = asm_glue.Llasm.asm_upcall_glues.(n) in + let llupcall = htab_search_or_add extern_upcalls name + begin + fun _ -> + let args_ty = + Array.append + [| task_ptr_ty |] + (Array.init n (fun i -> Llvm.type_of llargs.(i))) + in + let out_ty = match lldest with + None -> void_ty + | Some v -> Llvm.type_of v + in + let fty = fn_ty out_ty args_ty in + (* + * NB: At this point it actually doesn't matter what type + * we gave the upcall function, as we're just going to + * pointercast it to a word and pass it to the upcall-glue + * for now. But possibly in the future it might matter if + * we develop a proper upcall calling convention. + *) + Llvm.declare_function name fty llmod + end + in + (* Cast everything to plain words so we can hand off to the glue. *) + let llupcall = Llvm.const_pointercast llupcall word_ty in + let llargs = + Array.map + (fun arg -> + Llvm.build_pointercast arg word_ty + (anon_llid "arg") llbuilder) + llargs + in + let llallargs = Array.append [| lltask; llupcall |] llargs in + let llid = anon_llid "rv" in + let llrv = build_call llglue llallargs llid llbuilder in + Llvm.set_instruction_call_conv Llvm.CallConv.c llrv; + match lldest with + None -> () + | Some lldest -> + let lldest = + Llvm.build_pointercast lldest wordptr_ty "" llbuilder + in + ignore (Llvm.build_store llrv lldest llbuilder); + in + + let upcall + (llbuilder:Llvm.llbuilder) + (lltask:Llvm.llvalue) + (name:string) + (lldest:Llvm.llvalue option) + (llargs:Llvm.llvalue array) + : unit = + trans_upcall llbuilder lltask name lldest llargs + in + + let trans_free + (llbuilder:Llvm.llbuilder) + (lltask:Llvm.llvalue) + (src:Llvm.llvalue) + : unit = + upcall llbuilder lltask "upcall_free" None [| src |] + in + + (* + * let trans_malloc (llbuilder:Llvm.llbuilder) + * (dst:Llvm.llvalue) (nbytes:int64) : unit = + * upcall llbuilder "upcall_malloc" (Some dst) [| imm nbytes |] + * in + *) + + (* Type translation *) + + let lltys = Hashtbl.create 0 in + + let trans_mach_ty (mty:ty_mach) : Llvm.lltype = + let tycon = + match mty with + TY_u8 | TY_i8 -> Llvm.i8_type + | TY_u16 | TY_i16 -> Llvm.i16_type + | TY_u32 | TY_i32 -> Llvm.i32_type + | TY_u64 | TY_i64 -> Llvm.i64_type + | TY_f32 -> Llvm.float_type + | TY_f64 -> Llvm.double_type + in + tycon llctx + in + + + let rec trans_ty_full (ty:Ast.ty) : Llvm.lltype = + let p t = Llvm.pointer_type t in + let s ts = Llvm.struct_type llctx ts in + let opaque _ = Llvm.opaque_type llctx in + let vec_body_ty _ = + s [| word_ty; word_ty; word_ty; (opaque()) |] + in + let rc_opaque_ty = + s [| word_ty; (opaque()) |] + in + match ty with + Ast.TY_any -> opaque () + | Ast.TY_nil -> llnilty + | Ast.TY_bool -> Llvm.i1_type llctx + | Ast.TY_mach mty -> trans_mach_ty mty + | Ast.TY_int -> word_ty + | Ast.TY_uint -> word_ty + | Ast.TY_char -> Llvm.i32_type llctx + | Ast.TY_vec _ + | Ast.TY_str -> p (vec_body_ty()) + + | Ast.TY_fn tfn -> + let (tsig, _) = tfn in + let lloutptr = p (trans_slot None tsig.Ast.sig_output_slot) in + let lltaskty = p abi.Llabi.task_ty in + let llins = Array.map (trans_slot None) tsig.Ast.sig_input_slots in + fn_ty void_ty (Array.append [| lloutptr; lltaskty |] llins) + + | Ast.TY_tup slots -> + s (Array.map (trans_slot None) slots) + + | Ast.TY_rec entries -> + s (Array.map (fun e -> trans_slot None (snd e)) entries) + + | Ast.TY_constrained (ty', _) -> trans_ty ty' + + | Ast.TY_chan _ | Ast.TY_port _ | Ast.TY_task -> + p rc_opaque_ty + + | Ast.TY_native _ -> + word_ty + + | Ast.TY_tag _ | Ast.TY_iso _ | Ast.TY_idx _ + | Ast.TY_obj _ | Ast.TY_type -> (opaque()) (* TODO *) + + | Ast.TY_param _ | Ast.TY_named _ -> + bug () "unresolved type in lltrans" + + and trans_ty t = + htab_search_or_add lltys t (fun _ -> trans_ty_full t) + + (* Translates the type of a slot into the corresponding LLVM type. If the + * id_opt parameter is specified, then the type will be fetched from the + * context. *) + and trans_slot (id_opt:node_id option) (slot:Ast.slot) : Llvm.lltype = + let ty = + match id_opt with + Some id -> ty_of_slot id + | None -> Semant.slot_ty slot + in + let base_llty = trans_ty ty in + match slot.Ast.slot_mode with + Ast.MODE_exterior _ + | Ast.MODE_alias _ -> + Llvm.pointer_type base_llty + | Ast.MODE_interior _ -> base_llty + in + + let get_element_ptr + (llbuilder:Llvm.llbuilder) + (ptr:Llvm.llvalue) + (i:int) + : Llvm.llvalue = + (* + * GEP takes a first-index of zero. Because it must! And this is + * sufficiently surprising that the GEP FAQ exists. And you must + * read it. + *) + let deref_ptr = Llvm.const_int (Llvm.i32_type llctx) 0 in + let idx = Llvm.const_int (Llvm.i32_type llctx) i in + Llvm.build_gep ptr [| deref_ptr; idx |] (anon_llid "gep") llbuilder + in + + let free_ty + (llbuilder:Llvm.llbuilder) + (lltask:Llvm.llvalue) + (ty:Ast.ty) + (ptr:Llvm.llvalue) + : unit = + match ty with + Ast.TY_port _ + | Ast.TY_chan _ + | Ast.TY_task -> bug () "unimplemented ty in Lltrans.free_ty" + | _ -> trans_free llbuilder lltask ptr + in + + let rec iter_ty_slots_full + (llbuilder:Llvm.llbuilder ref) + (ty:Ast.ty) + (dst_ptr:Llvm.llvalue) + (src_ptr:Llvm.llvalue) + (f:(Llvm.llvalue + -> Llvm.llvalue + -> Ast.slot + -> (Ast.ty_iso option) + -> unit)) + (curr_iso:Ast.ty_iso option) + : unit = + + (* NB: must deref llbuilder at call-time; don't curry this. *) + let gep p i = get_element_ptr (!llbuilder) p i in + + match ty with + Ast.TY_rec entries -> + iter_rec_slots gep dst_ptr src_ptr entries f curr_iso + + | Ast.TY_tup slots -> + iter_tup_slots gep dst_ptr src_ptr slots f curr_iso + + | Ast.TY_tag _ + | Ast.TY_iso _ + | Ast.TY_fn _ + | Ast.TY_obj _ -> + bug () "unimplemented ty in Lltrans.iter_ty_slots_full" + + | _ -> () + + and iter_ty_slots + (llbuilder:Llvm.llbuilder ref) + (ty:Ast.ty) + (ptr:Llvm.llvalue) + (f:Llvm.llvalue -> Ast.slot -> (Ast.ty_iso option) -> unit) + (curr_iso:Ast.ty_iso option) + : unit = + iter_ty_slots_full llbuilder ty ptr ptr + (fun _ src_ptr slot curr_iso -> f src_ptr slot curr_iso) + curr_iso + + and drop_ty + (llbuilder:Llvm.llbuilder ref) + (lltask:Llvm.llvalue) + (ty:Ast.ty) + (ptr:Llvm.llvalue) + (curr_iso:Ast.ty_iso option) + : unit = + iter_ty_slots llbuilder ty ptr (drop_slot llbuilder lltask) curr_iso + + and drop_slot + (llbuilder:Llvm.llbuilder ref) + (lltask:Llvm.llvalue) + (slot_ptr:Llvm.llvalue) + (slot:Ast.slot) + (curr_iso:Ast.ty_iso option) + : unit = + + let llfn = Llvm.block_parent (Llvm.insertion_block (!llbuilder)) in + let llty = trans_slot None slot in + let ty = Semant.slot_ty slot in + + let new_block klass = + let llblock = Llvm.append_block llctx (anon_llid klass) llfn in + let llbuilder = Llvm.builder_at_end llctx llblock in + (llblock, llbuilder) + in + + let if_ptr_in_slot_not_null + (inner:Llvm.llvalue -> Llvm.llbuilder -> Llvm.llbuilder) + (llbuilder:Llvm.llbuilder) + : Llvm.llbuilder = + let ptr = Llvm.build_load slot_ptr (anon_llid "tmp") llbuilder in + let null = Llvm.const_pointer_null llty in + let test = + Llvm.build_icmp Llvm.Icmp.Ne null ptr (anon_llid "nullp") llbuilder + in + let (llthen, llthen_builder) = new_block "then" in + let (llnext, llnext_builder) = new_block "next" in + ignore (Llvm.build_cond_br test llthen llnext llbuilder); + let llthen_builder = inner ptr llthen_builder in + ignore (Llvm.build_br llnext llthen_builder); + llnext_builder + in + + let decr_refcnt_and_if_zero + (rc_elt:int) + (inner:Llvm.llvalue -> Llvm.llbuilder -> Llvm.llbuilder) + (ptr:Llvm.llvalue) + (llbuilder:Llvm.llbuilder) + : Llvm.llbuilder = + let rc_ptr = get_element_ptr llbuilder ptr rc_elt in + let rc = Llvm.build_load rc_ptr (anon_llid "rc") llbuilder in + let rc = Llvm.build_sub rc (imm 1L) (anon_llid "tmp") llbuilder in + let _ = Llvm.build_store rc rc_ptr llbuilder in + log sem_cx "rc type: %s" (llval_str rc); + let test = + Llvm.build_icmp Llvm.Icmp.Eq + rc (imm 0L) (anon_llid "zerop") llbuilder + in + let (llthen, llthen_builder) = new_block "then" in + let (llnext, llnext_builder) = new_block "next" in + ignore (Llvm.build_cond_br test llthen llnext llbuilder); + let llthen_builder = inner ptr llthen_builder in + ignore (Llvm.build_br llnext llthen_builder); + llnext_builder + in + + let free_and_null_out_slot + (ptr:Llvm.llvalue) + (llbuilder:Llvm.llbuilder) + : Llvm.llbuilder = + free_ty llbuilder lltask ty ptr; + let null = Llvm.const_pointer_null llty in + ignore (Llvm.build_store null slot_ptr llbuilder); + llbuilder + in + + begin + match slot_mem_ctrl slot with + MEM_rc_struct + | MEM_gc -> + llbuilder := + if_ptr_in_slot_not_null + (decr_refcnt_and_if_zero + Abi.exterior_rc_slot_field_refcnt + free_and_null_out_slot) + (!llbuilder) + + | MEM_rc_opaque -> + llbuilder := + if_ptr_in_slot_not_null + (decr_refcnt_and_if_zero + Abi.exterior_rc_slot_field_refcnt + free_and_null_out_slot) + (!llbuilder) + + | MEM_interior when Semant.type_is_structured ty -> + (* FIXME: to handle recursive types, need to call drop + glue here, not inline. *) + drop_ty llbuilder lltask ty slot_ptr curr_iso + + | _ -> () + end + in + + let (llitems:(node_id, Llvm.llvalue) Hashtbl.t) = Hashtbl.create 0 in + let declare_mod_item + (name:Ast.ident) + { node = { Ast.decl_item = (item:Ast.mod_item') }; id = id } + : unit = + let full_name = Semant.item_str sem_cx id in + let line_num = + match Session.get_span sess id with + None -> 0 + | Some span -> + let (_, line, _) = span.lo in + line + in + match item with + Ast.MOD_ITEM_fn _ -> + let llty = trans_ty (ty_of_item id) in + let llfn = Llvm.declare_function ("_rust_" ^ name) llty llmod in + let meta = + md_node + [| + const_dw_tag Dwarf.DW_TAG_subprogram; + const_i32 0; (* unused *) + const_i32 0; (* context metadata llvalue *) + md_str name; + md_str full_name; + md_str full_name; + const_i32 0; (* file metadata llvalue *) + const_i32 line_num; + const_i32 0; (* type descriptor metadata llvalue *) + const_i1 1; (* flag: local to compile unit? *) + const_i1 1; (* flag: defined in compile unit? *) + |] + in + Llvm.set_function_call_conv Llvm.CallConv.c llfn; + Hashtbl.add llitems id llfn; + + (* FIXME: Adding metadata does not work yet. . *) + let _ = fun _ -> set_dbg_metadata llfn meta in + () + + | _ -> () (* TODO *) + in + + let trans_fn + ({ + Ast.fn_input_slots = (header_slots:Ast.header_slots); + Ast.fn_body = (body:Ast.block) + }:Ast.fn) + (fn_id:node_id) + : unit = + let llfn = Hashtbl.find llitems fn_id in + let lloutptr = Llvm.param llfn 0 in + let lltask = Llvm.param llfn 1 in + + (* LLVM requires that functions be grouped into basic blocks terminated by + * terminator instructions, while our AST is less strict. So we have to do + * a little trickery here to wrangle the statement sequence into LLVM's + * format. *) + + let new_block id_opt klass = + let llblock = Llvm.append_block llctx (node_llid id_opt klass) llfn in + let llbuilder = Llvm.builder_at_end llctx llblock in + (llblock, llbuilder) + in + + (* Build up the slot-to-llvalue mapping, allocating space along the + * way. *) + let slot_to_llvalue = Hashtbl.create 0 in + let (_, llinitbuilder) = new_block None "init" in + + (* Allocate space for arguments (needed because arguments are lvalues in + * Rust), and store them in the slot-to-llvalue mapping. *) + let n_implicit_args = 2 in + let build_arg idx llargval = + if idx >= n_implicit_args + then + let ({ id = id }, ident) = header_slots.(idx - 2) in + Llvm.set_value_name ident llargval; + let llarg = + let llty = Llvm.type_of llargval in + Llvm.build_alloca llty (ident ^ "_ptr") llinitbuilder + in + ignore (Llvm.build_store llargval llarg llinitbuilder); + Hashtbl.add slot_to_llvalue id llarg + in + Array.iteri build_arg (Llvm.params llfn); + + (* Allocate space for all the blocks' slots. + * and zero the exteriors. *) + let init_block (block_id:node_id) : unit = + let init_slot + (key:Ast.slot_key) + (slot_id:node_id) + (slot:Ast.slot) + : unit = + let name = Ast.sprintf_slot_key () key in + let llty = trans_slot (Some slot_id) slot in + let llptr = Llvm.build_alloca llty name llinitbuilder in + begin + match slot_mem_ctrl slot with + MEM_rc_struct + | MEM_rc_opaque + | MEM_gc -> + ignore (Llvm.build_store + (Llvm.const_pointer_null llty) + llptr llinitbuilder); + | _ -> () + end; + Hashtbl.add slot_to_llvalue slot_id llptr + in + iter_block_slots sem_cx block_id init_slot + in + + let exit_block + (llbuilder:Llvm.llbuilder) + (block_id:node_id) + : Llvm.llbuilder = + let r = ref llbuilder in + iter_block_slots sem_cx block_id + begin + fun _ slot_id slot -> + if (not (Semant.slot_is_obj_state sem_cx slot_id)) + then + let ptr = Hashtbl.find slot_to_llvalue slot_id in + drop_slot r lltask ptr slot None + end; + !r + in + + List.iter init_block (Hashtbl.find sem_cx.Semant.ctxt_frame_blocks fn_id); + + let static_str (s:string) : Llvm.llvalue = + Llvm.define_global (anon_llid "str") (Llvm.const_stringz llctx s) llmod + in + + + (* Translates a list of AST statements to a sequence of LLVM instructions. + * The supplied "terminate" function appends the appropriate terminator + * instruction to the instruction stream. It may or may not be called, + * depending on whether the AST contains a terminating instruction + * explicitly. *) + let rec trans_stmts + (block_id:node_id) + (llbuilder:Llvm.llbuilder) + (stmts:Ast.stmt list) + (terminate:(Llvm.llbuilder -> node_id -> unit)) + : unit = + let trans_literal + (lit:Ast.lit) + : Llvm.llvalue = + match lit with + Ast.LIT_nil -> llnil + | Ast.LIT_bool value -> + Llvm.const_int (Llvm.i1_type llctx) (if value then 1 else 0) + | Ast.LIT_mach (mty, value, _) -> + let llty = trans_mach_ty mty in + Llvm.const_of_int64 llty value (mach_is_signed mty) + | Ast.LIT_int (value, _) -> + Llvm.const_of_int64 (Llvm.i32_type llctx) value true + | Ast.LIT_uint (value, _) -> + Llvm.const_of_int64 (Llvm.i32_type llctx) value false + | Ast.LIT_char ch -> + Llvm.const_int (Llvm.i32_type llctx) ch + in + + (* Translates an lval by reference into the appropriate pointer + * value. *) + let trans_lval (lval:Ast.lval) : Llvm.llvalue = + iflog (fun _ -> log sem_cx "trans_lval: %a" Ast.sprintf_lval lval); + match lval with + Ast.LVAL_base { id = base_id } -> + let id = + Hashtbl.find sem_cx.Semant.ctxt_lval_to_referent base_id + in + let referent = Hashtbl.find sem_cx.Semant.ctxt_all_defns id in + begin + match referent with + Semant.DEFN_slot _ -> Hashtbl.find slot_to_llvalue id + | Semant.DEFN_item _ -> Hashtbl.find llitems id + | _ -> bogus_ptr (* TODO *) + end + | Ast.LVAL_ext _ -> bogus_ptr (* TODO *) + in + + let trans_atom (atom:Ast.atom) : Llvm.llvalue = + iflog (fun _ -> log sem_cx "trans_atom: %a" Ast.sprintf_atom atom); + match atom with + Ast.ATOM_literal { node = lit } -> trans_literal lit + | Ast.ATOM_lval lval -> + Llvm.build_load (trans_lval lval) (anon_llid "tmp") llbuilder + in + + let trans_binary_expr + ((op:Ast.binop), (lhs:Ast.atom), (rhs:Ast.atom)) + : Llvm.llvalue = + (* Evaluate the operands in the proper order. *) + let (lllhs, llrhs) = + match op with + Ast.BINOP_or | Ast.BINOP_and | Ast.BINOP_eq | Ast.BINOP_ne + | Ast.BINOP_lt | Ast.BINOP_le | Ast.BINOP_ge | Ast.BINOP_gt + | Ast.BINOP_lsl | Ast.BINOP_lsr | Ast.BINOP_asr + | Ast.BINOP_add | Ast.BINOP_sub | Ast.BINOP_mul + | Ast.BINOP_div | Ast.BINOP_mod | Ast.BINOP_xor -> + (trans_atom lhs, trans_atom rhs) + | Ast.BINOP_send -> + let llrhs = trans_atom rhs in + let lllhs = trans_atom lhs in + (lllhs, llrhs) + in + let llid = anon_llid "expr" in + match op with + Ast.BINOP_eq -> + (* TODO: equality works on more than just integers *) + Llvm.build_icmp Llvm.Icmp.Eq lllhs llrhs llid llbuilder + + (* TODO: signed/unsigned distinction, floating point *) + | Ast.BINOP_add -> Llvm.build_add lllhs llrhs llid llbuilder + | Ast.BINOP_sub -> Llvm.build_sub lllhs llrhs llid llbuilder + | Ast.BINOP_mul -> Llvm.build_mul lllhs llrhs llid llbuilder + | Ast.BINOP_div -> Llvm.build_sdiv lllhs llrhs llid llbuilder + | Ast.BINOP_mod -> Llvm.build_srem lllhs llrhs llid llbuilder + + | _ -> bogus (* TODO *) + in + + let trans_unary_expr _ = bogus in (* TODO *) + + let trans_expr (expr:Ast.expr) : Llvm.llvalue = + iflog (fun _ -> log sem_cx "trans_expr: %a" Ast.sprintf_expr expr); + match expr with + Ast.EXPR_binary binexp -> trans_binary_expr binexp + | Ast.EXPR_unary unexp -> trans_unary_expr unexp + | Ast.EXPR_atom atom -> trans_atom atom + in + + let trans_log_str (atom:Ast.atom) : unit = + upcall llbuilder lltask "upcall_log_str" None [| trans_atom atom |] + in + + let trans_log_int (atom:Ast.atom) : unit = + upcall llbuilder lltask "upcall_log_int" None [| trans_atom atom |] + in + + let trans_fail + (llbuilder:Llvm.llbuilder) + (lltask:Llvm.llvalue) + (reason:string) + (stmt_id:node_id) + : unit = + let (file, line, _) = + match Session.get_span sem_cx.Semant.ctxt_sess stmt_id with + None -> ("<none>", 0, 0) + | Some sp -> sp.lo + in + upcall llbuilder lltask "upcall_fail" None [| + static_str reason; + static_str file; + Llvm.const_int (Llvm.i32_type llctx) line + |]; + ignore (Llvm.build_unreachable llbuilder) + in + + (* FIXME: this may be irrelevant; possibly LLVM will wind up + * using GOT and such wherever it needs to to achieve PIC + * data. + *) + (* + let crate_rel (v:Llvm.llvalue) : Llvm.llvalue = + let v_int = Llvm.const_pointercast v word_ty in + let c_int = Llvm.const_pointercast crate_ptr word_ty in + Llvm.const_sub v_int c_int + in + *) + + match stmts with + [] -> terminate llbuilder block_id + | head::tail -> + + iflog (fun _ -> + log sem_cx "trans_stmt: %a" Ast.sprintf_stmt head); + + let trans_tail_with_builder llbuilder' : unit = + trans_stmts block_id llbuilder' tail terminate + in + let trans_tail () = trans_tail_with_builder llbuilder in + + match head.node with + Ast.STMT_init_tup (dest, atoms) -> + let zero = const_i32 0 in + let lldest = trans_lval dest in + let trans_tup_atom idx (_, _, atom) = + let indices = [| zero; const_i32 idx |] in + let gep_id = anon_llid "init_tup_gep" in + let ptr = + Llvm.build_gep lldest indices gep_id llbuilder + in + ignore (Llvm.build_store (trans_atom atom) ptr llbuilder) + in + Array.iteri trans_tup_atom atoms; + trans_tail () + + | Ast.STMT_copy (dest, src) -> + let llsrc = trans_expr src in + let lldest = trans_lval dest in + ignore (Llvm.build_store llsrc lldest llbuilder); + trans_tail () + + | Ast.STMT_call (dest, fn, args) -> + let llargs = Array.map trans_atom args in + let lldest = trans_lval dest in + let llfn = trans_lval fn in + let llallargs = Array.append [| lldest; lltask |] llargs in + let llrv = build_call llfn llallargs "" llbuilder in + Llvm.set_instruction_call_conv Llvm.CallConv.c llrv; + trans_tail () + + | Ast.STMT_if sif -> + let llexpr = trans_expr sif.Ast.if_test in + let (llnext, llnextbuilder) = new_block None "next" in + let branch_to_next llbuilder' _ = + ignore (Llvm.build_br llnext llbuilder') + in + let llthen = trans_block sif.Ast.if_then branch_to_next in + let llelse = + match sif.Ast.if_else with + None -> llnext + | Some if_else -> trans_block if_else branch_to_next + in + ignore (Llvm.build_cond_br llexpr llthen llelse llbuilder); + trans_tail_with_builder llnextbuilder + + | Ast.STMT_ret atom_opt -> + begin + match atom_opt with + None -> () + | Some atom -> + ignore (Llvm.build_store (trans_atom atom) + lloutptr llbuilder) + end; + let llbuilder = exit_block llbuilder block_id in + ignore (Llvm.build_ret_void llbuilder) + + | Ast.STMT_fail -> + trans_fail llbuilder lltask "explicit failure" head.id + + | Ast.STMT_log a -> + begin + match Semant.atom_type sem_cx a with + (* NB: If you extend this, be sure to update the + * typechecking code in type.ml as well. *) + Ast.TY_str -> trans_log_str a + | Ast.TY_int | Ast.TY_uint | Ast.TY_bool | Ast.TY_char + | Ast.TY_mach (TY_u8) | Ast.TY_mach (TY_u16) + | Ast.TY_mach (TY_u32) | Ast.TY_mach (TY_i8) + | Ast.TY_mach (TY_i16) | Ast.TY_mach (TY_i32) -> + trans_log_int a + | _ -> Semant.bugi sem_cx head.id + "unimplemented logging type" + end; + trans_tail () + + | Ast.STMT_check_expr expr -> + let llexpr = trans_expr expr in + let (llfail, llfailbuilder) = new_block None "fail" in + let reason = Ast.fmt_to_str Ast.fmt_expr expr in + trans_fail llfailbuilder lltask reason head.id; + let (llok, llokbuilder) = new_block None "ok" in + ignore (Llvm.build_cond_br llexpr llok llfail llbuilder); + trans_tail_with_builder llokbuilder + + | Ast.STMT_init_str (dst, str) -> + let d = trans_lval dst in + let s = static_str str in + let len = + Llvm.const_int word_ty ((String.length str) + 1) + in + upcall llbuilder lltask "upcall_new_str" + (Some d) [| s; len |]; + trans_tail () + + | _ -> trans_stmts block_id llbuilder tail terminate + + (* + * Translates an AST block to one or more LLVM basic blocks and returns + * the first basic block. The supplied callback is expected to add a + * terminator instruction. + *) + + and trans_block + ({ node = (stmts:Ast.stmt array); id = id }:Ast.block) + (terminate:Llvm.llbuilder -> node_id -> unit) + : Llvm.llbasicblock = + let (llblock, llbuilder) = new_block (Some id) "bb" in + trans_stmts id llbuilder (Array.to_list stmts) terminate; + llblock + in + + (* "Falling off the end" of a function needs to turn into an explicit + * return instruction. *) + let default_terminate llbuilder block_id = + let llbuilder = exit_block llbuilder block_id in + ignore (Llvm.build_ret_void llbuilder) + in + + (* Build up the first body block, and link it to the end of the + * initialization block. *) + let llbodyblock = (trans_block body default_terminate) in + ignore (Llvm.build_br llbodyblock llinitbuilder) + in + + let trans_mod_item + (_:Ast.ident) + { node = { Ast.decl_item = (item:Ast.mod_item') }; id = id } + : unit = + match item with + Ast.MOD_ITEM_fn fn -> trans_fn fn id + | _ -> () + in + + let exit_task_glue = + (* The exit-task glue does not get called. + * + * Rather, control arrives at it by *returning* to the first + * instruction of it, when control falls off the end of the task's + * root function. + * + * There is a "fake" frame set up by the runtime, underneath us, + * that we find ourselves in. This frame has the shape of a frame + * entered with 2 standard arguments (outptr + taskptr), then a + * retpc and N callee-saves sitting on the stack; all this is under + * ebp. Then there are 2 *outgoing* args at sp[0] and sp[1]. + * + * All these are fake except the taskptr, which is the one bit we + * want. So we construct an equally fake cdecl llvm signature here + * to crudely *get* the taskptr that's sitting 2 words up from sp, + * and pass it to upcall_exit. + * + * The latter never returns. + *) + let llty = fn_ty void_ty [| task_ptr_ty |] in + let llfn = Llvm.declare_function "rust_exit_task_glue" llty llmod in + let lltask = Llvm.param llfn 0 in + let llblock = Llvm.append_block llctx "body" llfn in + let llbuilder = Llvm.builder_at_end llctx llblock in + trans_upcall llbuilder lltask "upcall_exit" None [||]; + ignore (Llvm.build_ret_void llbuilder); + llfn + in + + try + let crate' = crate.node in + let items = snd (crate'.Ast.crate_items) in + Hashtbl.iter declare_mod_item items; + Hashtbl.iter trans_mod_item items; + Llfinal.finalize_module + llctx llmod abi asm_glue exit_task_glue crate_ptr; + llmod + with e -> Llvm.dispose_module llmod; raise e +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) + diff --git a/src/boot/me/alias.ml b/src/boot/me/alias.ml new file mode 100644 index 00000000..7009fe10 --- /dev/null +++ b/src/boot/me/alias.ml @@ -0,0 +1,134 @@ +open Semant;; +open Common;; + +let log cx = Session.log "alias" + cx.ctxt_sess.Session.sess_log_alias + cx.ctxt_sess.Session.sess_log_out +;; + +let alias_analysis_visitor + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + let curr_stmt = Stack.create () in + + let alias_slot (slot_id:node_id) : unit = + begin + log cx "noting slot #%d as aliased" (int_of_node slot_id); + Hashtbl.replace cx.ctxt_slot_aliased slot_id () + end + in + + let alias lval = + match lval with + Ast.LVAL_base nb -> + let referent = Hashtbl.find cx.ctxt_lval_to_referent nb.id in + if (referent_is_slot cx referent) + then alias_slot referent + | _ -> err None "unhandled form of lval %a in alias analysis" + Ast.sprintf_lval lval + in + + let alias_atom at = + match at with + Ast.ATOM_lval lv -> alias lv + | _ -> err None "aliasing literal" + in + + let alias_call_args dst callee args = + alias dst; + let callee_ty = lval_ty cx callee in + match callee_ty with + Ast.TY_fn (tsig,_) -> + Array.iteri + begin + fun i slot -> + match slot.Ast.slot_mode with + Ast.MODE_alias _ -> + alias_atom args.(i) + | _ -> () + end + tsig.Ast.sig_input_slots + | _ -> () + in + + let visit_stmt_pre s = + Stack.push s.id curr_stmt; + begin + try + match s.node with + (* FIXME (issue #26): actually all these *existing* cases + * can probably go now that we're using Trans.aliasing to + * form short-term spill-based aliases. Only aliases that + * survive 'into' a sub-block (those formed during iteration) + * need to be handled in this module. *) + Ast.STMT_call (dst, callee, args) + | Ast.STMT_spawn (dst, _, callee, args) + -> alias_call_args dst callee args + + | Ast.STMT_send (_, src) -> alias src + | Ast.STMT_recv (dst, _) -> alias dst + | Ast.STMT_init_port (dst) -> alias dst + | Ast.STMT_init_chan (dst, _) -> alias dst + | Ast.STMT_init_vec (dst, _, _) -> alias dst + | Ast.STMT_init_str (dst, _) -> alias dst + | Ast.STMT_for_each sfe -> + let (slot, _) = sfe.Ast.for_each_slot in + alias_slot slot.id + | _ -> () (* FIXME (issue #29): plenty more to handle here. *) + with + Semant_err (None, msg) -> + raise (Semant_err ((Some s.id), msg)) + end; + inner.Walk.visit_stmt_pre s + in + let visit_stmt_post s = + inner.Walk.visit_stmt_post s; + ignore (Stack.pop curr_stmt); + in + + let visit_lval_pre lv = + let slot_id = lval_to_referent cx (lval_base_id lv) in + if (not (Stack.is_empty curr_stmt)) && (referent_is_slot cx slot_id) + then + begin + let slot_depth = get_slot_depth cx slot_id in + let stmt_depth = get_stmt_depth cx (Stack.top curr_stmt) in + if slot_depth <> stmt_depth + then + begin + let _ = assert (slot_depth < stmt_depth) in + alias_slot slot_id + end + end + in + + { inner with + Walk.visit_stmt_pre = visit_stmt_pre; + Walk.visit_stmt_post = visit_stmt_post; + Walk.visit_lval_pre = visit_lval_pre + } +;; + +let process_crate + (cx:ctxt) + (crate:Ast.crate) + : unit = + let path = Stack.create () in + let passes = + [| + (alias_analysis_visitor cx + Walk.empty_visitor); + |] + in + run_passes cx "alias" path passes (log cx "%s") crate +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/me/dead.ml b/src/boot/me/dead.ml new file mode 100644 index 00000000..47e56166 --- /dev/null +++ b/src/boot/me/dead.ml @@ -0,0 +1,121 @@ +(* + * A simple dead-code analysis that rejects code following unconditional + * 'ret' or 'be'. + *) + +open Semant;; +open Common;; + +let log cx = Session.log "dead" + cx.ctxt_sess.Session.sess_log_dead + cx.ctxt_sess.Session.sess_log_out +;; + +let dead_code_visitor + ((*cx*)_:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + + (* FIXME: create separate table for each fn body for less garbage *) + let must_exit = Hashtbl.create 100 in + + let all_must_exit ids = + arr_for_all (fun _ id -> Hashtbl.mem must_exit id) ids + in + + let visit_block_post block = + let stmts = block.node in + let len = Array.length stmts in + if len > 0 then + Array.iteri + begin + fun i s -> + if (i < (len - 1)) && (Hashtbl.mem must_exit s.id) then + err (Some stmts.(i + 1).id) "dead statement" + end + stmts; + inner.Walk.visit_block_post block + in + + let visit_stmt_post s = + begin + match s.node with + | Ast.STMT_block block -> + if Hashtbl.mem must_exit block.id then + Hashtbl.add must_exit s.id () + + | Ast.STMT_while { Ast.while_body = body } + | Ast.STMT_do_while { Ast.while_body = body } + | Ast.STMT_for_each { Ast.for_each_body = body } + | Ast.STMT_for { Ast.for_body = body } -> + if (Hashtbl.mem must_exit body.id) then + Hashtbl.add must_exit s.id () + + | Ast.STMT_if { Ast.if_then = b1; Ast.if_else = Some b2 } -> + if (Hashtbl.mem must_exit b1.id) && (Hashtbl.mem must_exit b2.id) + then Hashtbl.add must_exit s.id () + + | Ast.STMT_if _ -> () + + | Ast.STMT_ret _ + | Ast.STMT_be _ -> + Hashtbl.add must_exit s.id () + + | Ast.STMT_alt_tag { Ast.alt_tag_arms = arms } -> + let arm_ids = + Array.map (fun { node = (_, block) } -> block.id) arms + in + if all_must_exit arm_ids + then Hashtbl.add must_exit s.id () + + | Ast.STMT_alt_type { Ast.alt_type_arms = arms; + Ast.alt_type_else = alt_type_else } -> + let arm_ids = Array.map (fun (_, _, block) -> block.id) arms in + let else_ids = + begin + match alt_type_else with + Some stmt -> [| stmt.id |] + | None -> [| |] + end + in + if all_must_exit (Array.append arm_ids else_ids) then + Hashtbl.add must_exit s.id () + + (* FIXME: figure this one out *) + | Ast.STMT_alt_port _ -> () + + | _ -> () + end; + inner.Walk.visit_stmt_post s + + in + { inner with + Walk.visit_block_post = visit_block_post; + Walk.visit_stmt_post = visit_stmt_post } +;; + +let process_crate + (cx:ctxt) + (crate:Ast.crate) + : unit = + let path = Stack.create () in + let passes = + [| + (dead_code_visitor cx + Walk.empty_visitor) + |] + in + + run_passes cx "dead" path passes (log cx "%s") crate; + () +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/me/dwarf.ml b/src/boot/me/dwarf.ml new file mode 100644 index 00000000..9423d4ee --- /dev/null +++ b/src/boot/me/dwarf.ml @@ -0,0 +1,3019 @@ +(* + * Walk crate and generate DWARF-3 records. This file might also go in + * the be/ directory; it's half-middle-end, half-back-end. Debug info is + * like that. + * + * Some notes about DWARF: + * + * - Records form an ownership tree. The tree is serialized in + * depth-first pre-order with child lists ending with null + * records. When a node type is defined to have no children, no null + * child record is provided; it's implied. + * + * [parent] + * / \ + * [child1] [child2] + * | + * [grandchild1] + * + * serializes as: + * + * [parent][child1][grandchild1][null][child2][null][null] + * + * - Sometimes you want to make it possible to scan through a sibling + * list quickly while skipping the sub-children of each (such as + * skipping the 'grandchild' above); this can be done with a + * DW_AT_sibling attribute that points forward to the next same-level + * sibling. + * + * - A DWARF consumer contains a little stack-machine interpreter for + * a micro-language that you can embed in DWARF records to compute + * values algorithmically. + * + * - DWARF is not "officially" supported by any Microsoft tools in + * PE files, but the Microsoft debugging information formats are + * proprietary and ever-shifting, and not clearly sufficient for + * our needs; by comparison DWARF is widely supported, stable, + * flexible, and required everywhere *else*. We are using DWARF to + * support major components of the rust runtime (reflection, + * unwinding, profiling) so it's helpful to not have to span + * technologies, just focus on DWARF. Luckily the MINGW/Cygwin + * communities have worked out a convention for PE, and taught BFD + * (thus most tools) how to digest DWARF sections trailing after + * the .idata section of a normal PE file. Seems to work fine. + * + * - DWARF supports variable-length coding using LEB128, and in the + * cases where these are symbolic or self-contained numbers, we + * support them in the assembler. Inter-DWARF-record references + * can be done via fixed-size DW_FORM_ref{1,2,4,8} or + * DW_FORM_ref_addr; or else via variable-size (LEB128) + * DW_FORM_ref_udata. It is hazardous to use the LEB128 form in + * our implementation of references, since we use a generic 2-pass + * (+ relaxation) fixup mechanism in our assembler which in + * general may present an information-dependency cycle for LEB128 + * coding of offsets: you need to know the offset before you can + * work out the LEB128 size, and you may need to know several + * LEB128-sizes before you can work out the offsets of other + * LEB128s (possibly even the one you're currently coding). In + * general the assembler makes no attempt to resolve such + * cycles. It'll just throw if it can't handle what you ask + * for. So it's best to pay a little extra space and use + * DW_FORM_ref_addr or DW_FORM_ref{1,2,4,8} values, in all cases. + *) + +open Semant;; +open Common;; +open Asm;; + +let log cx = Session.log "dwarf" + cx.ctxt_sess.Session.sess_log_dwarf + cx.ctxt_sess.Session.sess_log_out +;; + +type dw_tag = + DW_TAG_array_type + | DW_TAG_class_type + | DW_TAG_entry_point + | DW_TAG_enumeration_type + | DW_TAG_formal_parameter + | DW_TAG_imported_declaration + | DW_TAG_label + | DW_TAG_lexical_block + | DW_TAG_member + | DW_TAG_pointer_type + | DW_TAG_reference_type + | DW_TAG_compile_unit + | DW_TAG_string_type + | DW_TAG_structure_type + | DW_TAG_subroutine_type + | DW_TAG_typedef + | DW_TAG_union_type + | DW_TAG_unspecified_parameters + | DW_TAG_variant + | DW_TAG_common_block + | DW_TAG_common_inclusion + | DW_TAG_inheritance + | DW_TAG_inlined_subroutine + | DW_TAG_module + | DW_TAG_ptr_to_member_type + | DW_TAG_set_type + | DW_TAG_subrange_type + | DW_TAG_with_stmt + | DW_TAG_access_declaration + | DW_TAG_base_type + | DW_TAG_catch_block + | DW_TAG_const_type + | DW_TAG_constant + | DW_TAG_enumerator + | DW_TAG_file_type + | DW_TAG_friend + | DW_TAG_namelist + | DW_TAG_namelist_item + | DW_TAG_packed_type + | DW_TAG_subprogram + | DW_TAG_template_type_parameter + | DW_TAG_template_value_parameter + | DW_TAG_thrown_type + | DW_TAG_try_block + | DW_TAG_variant_part + | DW_TAG_variable + | DW_TAG_volatile_type + | DW_TAG_dwarf_procedure + | DW_TAG_restrict_type + | DW_TAG_interface_type + | DW_TAG_namespace + | DW_TAG_imported_module + | DW_TAG_unspecified_type + | DW_TAG_partial_unit + | DW_TAG_imported_unit + | DW_TAG_condition + | DW_TAG_shared_type + | DW_TAG_lo_user + | DW_TAG_rust_meta + | DW_TAG_hi_user +;; + + +let dw_tag_to_int (tag:dw_tag) : int = + match tag with + DW_TAG_array_type -> 0x01 + | DW_TAG_class_type -> 0x02 + | DW_TAG_entry_point -> 0x03 + | DW_TAG_enumeration_type -> 0x04 + | DW_TAG_formal_parameter -> 0x05 + | DW_TAG_imported_declaration -> 0x08 + | DW_TAG_label -> 0x0a + | DW_TAG_lexical_block -> 0x0b + | DW_TAG_member -> 0x0d + | DW_TAG_pointer_type -> 0x0f + | DW_TAG_reference_type -> 0x10 + | DW_TAG_compile_unit -> 0x11 + | DW_TAG_string_type -> 0x12 + | DW_TAG_structure_type -> 0x13 + | DW_TAG_subroutine_type -> 0x15 + | DW_TAG_typedef -> 0x16 + | DW_TAG_union_type -> 0x17 + | DW_TAG_unspecified_parameters -> 0x18 + | DW_TAG_variant -> 0x19 + | DW_TAG_common_block -> 0x1a + | DW_TAG_common_inclusion -> 0x1b + | DW_TAG_inheritance -> 0x1c + | DW_TAG_inlined_subroutine -> 0x1d + | DW_TAG_module -> 0x1e + | DW_TAG_ptr_to_member_type -> 0x1f + | DW_TAG_set_type -> 0x20 + | DW_TAG_subrange_type -> 0x21 + | DW_TAG_with_stmt -> 0x22 + | DW_TAG_access_declaration -> 0x23 + | DW_TAG_base_type -> 0x24 + | DW_TAG_catch_block -> 0x25 + | DW_TAG_const_type -> 0x26 + | DW_TAG_constant -> 0x27 + | DW_TAG_enumerator -> 0x28 + | DW_TAG_file_type -> 0x29 + | DW_TAG_friend -> 0x2a + | DW_TAG_namelist -> 0x2b + | DW_TAG_namelist_item -> 0x2c + | DW_TAG_packed_type -> 0x2d + | DW_TAG_subprogram -> 0x2e + | DW_TAG_template_type_parameter -> 0x2f + | DW_TAG_template_value_parameter -> 0x30 + | DW_TAG_thrown_type -> 0x31 + | DW_TAG_try_block -> 0x32 + | DW_TAG_variant_part -> 0x33 + | DW_TAG_variable -> 0x34 + | DW_TAG_volatile_type -> 0x35 + | DW_TAG_dwarf_procedure -> 0x36 + | DW_TAG_restrict_type -> 0x37 + | DW_TAG_interface_type -> 0x38 + | DW_TAG_namespace -> 0x39 + | DW_TAG_imported_module -> 0x3a + | DW_TAG_unspecified_type -> 0x3b + | DW_TAG_partial_unit -> 0x3c + | DW_TAG_imported_unit -> 0x3d + | DW_TAG_condition -> 0x3f + | DW_TAG_shared_type -> 0x40 + | DW_TAG_lo_user -> 0x4080 + | DW_TAG_rust_meta -> 0x4300 + | DW_TAG_hi_user -> 0xffff +;; + +let dw_tag_of_int (i:int) : dw_tag = + match i with + 0x01 -> DW_TAG_array_type + | 0x02 -> DW_TAG_class_type + | 0x03 -> DW_TAG_entry_point + | 0x04 -> DW_TAG_enumeration_type + | 0x05 -> DW_TAG_formal_parameter + | 0x08 -> DW_TAG_imported_declaration + | 0x0a -> DW_TAG_label + | 0x0b -> DW_TAG_lexical_block + | 0x0d -> DW_TAG_member + | 0x0f -> DW_TAG_pointer_type + | 0x10 -> DW_TAG_reference_type + | 0x11 -> DW_TAG_compile_unit + | 0x12 -> DW_TAG_string_type + | 0x13 -> DW_TAG_structure_type + | 0x15 -> DW_TAG_subroutine_type + | 0x16 -> DW_TAG_typedef + | 0x17 -> DW_TAG_union_type + | 0x18 -> DW_TAG_unspecified_parameters + | 0x19 -> DW_TAG_variant + | 0x1a -> DW_TAG_common_block + | 0x1b -> DW_TAG_common_inclusion + | 0x1c -> DW_TAG_inheritance + | 0x1d -> DW_TAG_inlined_subroutine + | 0x1e -> DW_TAG_module + | 0x1f -> DW_TAG_ptr_to_member_type + | 0x20 -> DW_TAG_set_type + | 0x21 -> DW_TAG_subrange_type + | 0x22 -> DW_TAG_with_stmt + | 0x23 -> DW_TAG_access_declaration + | 0x24 -> DW_TAG_base_type + | 0x25 -> DW_TAG_catch_block + | 0x26 -> DW_TAG_const_type + | 0x27 -> DW_TAG_constant + | 0x28 -> DW_TAG_enumerator + | 0x29 -> DW_TAG_file_type + | 0x2a -> DW_TAG_friend + | 0x2b -> DW_TAG_namelist + | 0x2c -> DW_TAG_namelist_item + | 0x2d -> DW_TAG_packed_type + | 0x2e -> DW_TAG_subprogram + | 0x2f -> DW_TAG_template_type_parameter + | 0x30 -> DW_TAG_template_value_parameter + | 0x31 -> DW_TAG_thrown_type + | 0x32 -> DW_TAG_try_block + | 0x33 -> DW_TAG_variant_part + | 0x34 -> DW_TAG_variable + | 0x35 -> DW_TAG_volatile_type + | 0x36 -> DW_TAG_dwarf_procedure + | 0x37 -> DW_TAG_restrict_type + | 0x38 -> DW_TAG_interface_type + | 0x39 -> DW_TAG_namespace + | 0x3a -> DW_TAG_imported_module + | 0x3b -> DW_TAG_unspecified_type + | 0x3c -> DW_TAG_partial_unit + | 0x3d -> DW_TAG_imported_unit + | 0x3f -> DW_TAG_condition + | 0x40 -> DW_TAG_shared_type + | 0x4080 -> DW_TAG_lo_user + | 0x4300 -> DW_TAG_rust_meta + | 0xffff -> DW_TAG_hi_user + | _ -> bug () "bad DWARF tag code: %d" i +;; + + +let dw_tag_to_string (tag:dw_tag) : string = + match tag with + DW_TAG_array_type -> "DW_TAG_array_type" + | DW_TAG_class_type -> "DW_TAG_class_type" + | DW_TAG_entry_point -> "DW_TAG_entry_point" + | DW_TAG_enumeration_type -> "DW_TAG_enumeration_type" + | DW_TAG_formal_parameter -> "DW_TAG_formal_parameter" + | DW_TAG_imported_declaration -> "DW_TAG_imported_declaration" + | DW_TAG_label -> "DW_TAG_label" + | DW_TAG_lexical_block -> "DW_TAG_lexical_block" + | DW_TAG_member -> "DW_TAG_member" + | DW_TAG_pointer_type -> "DW_TAG_pointer_type" + | DW_TAG_reference_type -> "DW_TAG_reference_type" + | DW_TAG_compile_unit -> "DW_TAG_compile_unit" + | DW_TAG_string_type -> "DW_TAG_string_type" + | DW_TAG_structure_type -> "DW_TAG_structure_type" + | DW_TAG_subroutine_type -> "DW_TAG_subroutine_type" + | DW_TAG_typedef -> "DW_TAG_typedef" + | DW_TAG_union_type -> "DW_TAG_union_type" + | DW_TAG_unspecified_parameters -> "DW_TAG_unspecified_parameters" + | DW_TAG_variant -> "DW_TAG_variant" + | DW_TAG_common_block -> "DW_TAG_common_block" + | DW_TAG_common_inclusion -> "DW_TAG_common_inclusion" + | DW_TAG_inheritance -> "DW_TAG_inheritance" + | DW_TAG_inlined_subroutine -> "DW_TAG_inlined_subroutine" + | DW_TAG_module -> "DW_TAG_module" + | DW_TAG_ptr_to_member_type -> "DW_TAG_ptr_to_member_type" + | DW_TAG_set_type -> "DW_TAG_set_type" + | DW_TAG_subrange_type -> "DW_TAG_subrange_type" + | DW_TAG_with_stmt -> "DW_TAG_with_stmt" + | DW_TAG_access_declaration -> "DW_TAG_access_declaration" + | DW_TAG_base_type -> "DW_TAG_base_type" + | DW_TAG_catch_block -> "DW_TAG_catch_block" + | DW_TAG_const_type -> "DW_TAG_const_type" + | DW_TAG_constant -> "DW_TAG_constant" + | DW_TAG_enumerator -> "DW_TAG_enumerator" + | DW_TAG_file_type -> "DW_TAG_file_type" + | DW_TAG_friend -> "DW_TAG_friend" + | DW_TAG_namelist -> "DW_TAG_namelist" + | DW_TAG_namelist_item -> "DW_TAG_namelist_item" + | DW_TAG_packed_type -> "DW_TAG_packed_type" + | DW_TAG_subprogram -> "DW_TAG_subprogram" + | DW_TAG_template_type_parameter -> "DW_TAG_template_type_parameter" + | DW_TAG_template_value_parameter -> "DW_TAG_template_value_parameter" + | DW_TAG_thrown_type -> "DW_TAG_thrown_type" + | DW_TAG_try_block -> "DW_TAG_try_block" + | DW_TAG_variant_part -> "DW_TAG_variant_part" + | DW_TAG_variable -> "DW_TAG_variable" + | DW_TAG_volatile_type -> "DW_TAG_volatile_type" + | DW_TAG_dwarf_procedure -> "DW_TAG_dwarf_procedure" + | DW_TAG_restrict_type -> "DW_TAG_restrict_type" + | DW_TAG_interface_type -> "DW_TAG_interface_type" + | DW_TAG_namespace -> "DW_TAG_namespace" + | DW_TAG_imported_module -> "DW_TAG_imported_module" + | DW_TAG_unspecified_type -> "DW_TAG_unspecified_type" + | DW_TAG_partial_unit -> "DW_TAG_partial_unit" + | DW_TAG_imported_unit -> "DW_TAG_imported_unit" + | DW_TAG_condition -> "DW_TAG_condition" + | DW_TAG_shared_type -> "DW_TAG_shared_type" + | DW_TAG_lo_user -> "DW_TAG_lo_user" + | DW_TAG_rust_meta -> "DW_TAG_rust_meta" + | DW_TAG_hi_user -> "DW_TAG_hi_user" +;; + + +type dw_children = + DW_CHILDREN_no + | DW_CHILDREN_yes +;; + + +let dw_children_to_int (ch:dw_children) : int = + match ch with + DW_CHILDREN_no -> 0x00 + | DW_CHILDREN_yes -> 0x01 +;; + +let dw_children_of_int (i:int) : dw_children = + match i with + 0 -> DW_CHILDREN_no + | 1 -> DW_CHILDREN_yes + | _ -> bug () "bad DWARF children code: %d" i +;; + +type dw_at = + DW_AT_sibling + | DW_AT_location + | DW_AT_name + | DW_AT_ordering + | DW_AT_byte_size + | DW_AT_bit_offset + | DW_AT_bit_size + | DW_AT_stmt_list + | DW_AT_low_pc + | DW_AT_high_pc + | DW_AT_language + | DW_AT_discr + | DW_AT_discr_value + | DW_AT_visibility + | DW_AT_import + | DW_AT_string_length + | DW_AT_common_reference + | DW_AT_comp_dir + | DW_AT_const_value + | DW_AT_containing_type + | DW_AT_default_value + | DW_AT_inline + | DW_AT_is_optional + | DW_AT_lower_bound + | DW_AT_producer + | DW_AT_prototyped + | DW_AT_return_addr + | DW_AT_start_scope + | DW_AT_bit_stride + | DW_AT_upper_bound + | DW_AT_abstract_origin + | DW_AT_accessibility + | DW_AT_address_class + | DW_AT_artificial + | DW_AT_base_types + | DW_AT_calling_convention + | DW_AT_count + | DW_AT_data_member_location + | DW_AT_decl_column + | DW_AT_decl_file + | DW_AT_decl_line + | DW_AT_declaration + | DW_AT_discr_list + | DW_AT_encoding + | DW_AT_external + | DW_AT_frame_base + | DW_AT_friend + | DW_AT_identifier_case + | DW_AT_macro_info + | DW_AT_namelist_item + | DW_AT_priority + | DW_AT_segment + | DW_AT_specification + | DW_AT_static_link + | DW_AT_type + | DW_AT_use_location + | DW_AT_variable_parameter + | DW_AT_virtuality + | DW_AT_vtable_elem_location + | DW_AT_allocated + | DW_AT_associated + | DW_AT_data_location + | DW_AT_byte_stride + | DW_AT_entry_pc + | DW_AT_use_UTF8 + | DW_AT_extension + | DW_AT_ranges + | DW_AT_trampoline + | DW_AT_call_column + | DW_AT_call_file + | DW_AT_call_line + | DW_AT_description + | DW_AT_binary_scale + | DW_AT_decimal_scale + | DW_AT_small + | DW_AT_decimal_sign + | DW_AT_digit_count + | DW_AT_picture_string + | DW_AT_mutable + | DW_AT_threads_scaled + | DW_AT_explicit + | DW_AT_object_pointer + | DW_AT_endianity + | DW_AT_elemental + | DW_AT_pure + | DW_AT_recursive + | DW_AT_lo_user + | DW_AT_rust_type_code + | DW_AT_rust_type_param_index + | DW_AT_rust_iterator + | DW_AT_rust_native_type_id + | DW_AT_hi_user +;; + + +let dw_at_to_int (a:dw_at) : int = + match a with + DW_AT_sibling -> 0x01 + | DW_AT_location -> 0x02 + | DW_AT_name -> 0x03 + | DW_AT_ordering -> 0x09 + | DW_AT_byte_size -> 0x0b + | DW_AT_bit_offset -> 0x0c + | DW_AT_bit_size -> 0x0d + | DW_AT_stmt_list -> 0x10 + | DW_AT_low_pc -> 0x11 + | DW_AT_high_pc -> 0x12 + | DW_AT_language -> 0x13 + | DW_AT_discr -> 0x15 + | DW_AT_discr_value -> 0x16 + | DW_AT_visibility -> 0x17 + | DW_AT_import -> 0x18 + | DW_AT_string_length -> 0x19 + | DW_AT_common_reference -> 0x1a + | DW_AT_comp_dir -> 0x1b + | DW_AT_const_value -> 0x1c + | DW_AT_containing_type -> 0x1d + | DW_AT_default_value -> 0x1e + | DW_AT_inline -> 0x20 + | DW_AT_is_optional -> 0x21 + | DW_AT_lower_bound -> 0x22 + | DW_AT_producer -> 0x25 + | DW_AT_prototyped -> 0x27 + | DW_AT_return_addr -> 0x2a + | DW_AT_start_scope -> 0x2c + | DW_AT_bit_stride -> 0x2e + | DW_AT_upper_bound -> 0x2f + | DW_AT_abstract_origin -> 0x31 + | DW_AT_accessibility -> 0x32 + | DW_AT_address_class -> 0x33 + | DW_AT_artificial -> 0x34 + | DW_AT_base_types -> 0x35 + | DW_AT_calling_convention -> 0x36 + | DW_AT_count -> 0x37 + | DW_AT_data_member_location -> 0x38 + | DW_AT_decl_column -> 0x39 + | DW_AT_decl_file -> 0x3a + | DW_AT_decl_line -> 0x3b + | DW_AT_declaration -> 0x3c + | DW_AT_discr_list -> 0x3d + | DW_AT_encoding -> 0x3e + | DW_AT_external -> 0x3f + | DW_AT_frame_base -> 0x40 + | DW_AT_friend -> 0x41 + | DW_AT_identifier_case -> 0x42 + | DW_AT_macro_info -> 0x43 + | DW_AT_namelist_item -> 0x44 + | DW_AT_priority -> 0x45 + | DW_AT_segment -> 0x46 + | DW_AT_specification -> 0x47 + | DW_AT_static_link -> 0x48 + | DW_AT_type -> 0x49 + | DW_AT_use_location -> 0x4a + | DW_AT_variable_parameter -> 0x4b + | DW_AT_virtuality -> 0x4c + | DW_AT_vtable_elem_location -> 0x4d + | DW_AT_allocated -> 0x4e + | DW_AT_associated -> 0x4f + | DW_AT_data_location -> 0x50 + | DW_AT_byte_stride -> 0x51 + | DW_AT_entry_pc -> 0x52 + | DW_AT_use_UTF8 -> 0x53 + | DW_AT_extension -> 0x54 + | DW_AT_ranges -> 0x55 + | DW_AT_trampoline -> 0x56 + | DW_AT_call_column -> 0x57 + | DW_AT_call_file -> 0x58 + | DW_AT_call_line -> 0x59 + | DW_AT_description -> 0x5a + | DW_AT_binary_scale -> 0x5b + | DW_AT_decimal_scale -> 0x5c + | DW_AT_small -> 0x5d + | DW_AT_decimal_sign -> 0x5e + | DW_AT_digit_count -> 0x5f + | DW_AT_picture_string -> 0x60 + | DW_AT_mutable -> 0x61 + | DW_AT_threads_scaled -> 0x62 + | DW_AT_explicit -> 0x63 + | DW_AT_object_pointer -> 0x64 + | DW_AT_endianity -> 0x65 + | DW_AT_elemental -> 0x66 + | DW_AT_pure -> 0x67 + | DW_AT_recursive -> 0x68 + | DW_AT_lo_user -> 0x2000 + | DW_AT_rust_type_code -> 0x2300 + | DW_AT_rust_type_param_index -> 0x2301 + | DW_AT_rust_iterator -> 0x2302 + | DW_AT_rust_native_type_id -> 0x2303 + | DW_AT_hi_user -> 0x3fff +;; + +let dw_at_of_int (i:int) : dw_at = + match i with + 0x01 -> DW_AT_sibling + | 0x02 -> DW_AT_location + | 0x03 -> DW_AT_name + | 0x09 -> DW_AT_ordering + | 0x0b -> DW_AT_byte_size + | 0x0c -> DW_AT_bit_offset + | 0x0d -> DW_AT_bit_size + | 0x10 -> DW_AT_stmt_list + | 0x11 -> DW_AT_low_pc + | 0x12 -> DW_AT_high_pc + | 0x13 -> DW_AT_language + | 0x15 -> DW_AT_discr + | 0x16 -> DW_AT_discr_value + | 0x17 -> DW_AT_visibility + | 0x18 -> DW_AT_import + | 0x19 -> DW_AT_string_length + | 0x1a -> DW_AT_common_reference + | 0x1b -> DW_AT_comp_dir + | 0x1c -> DW_AT_const_value + | 0x1d -> DW_AT_containing_type + | 0x1e -> DW_AT_default_value + | 0x20 -> DW_AT_inline + | 0x21 -> DW_AT_is_optional + | 0x22 -> DW_AT_lower_bound + | 0x25 -> DW_AT_producer + | 0x27 -> DW_AT_prototyped + | 0x2a -> DW_AT_return_addr + | 0x2c -> DW_AT_start_scope + | 0x2e -> DW_AT_bit_stride + | 0x2f -> DW_AT_upper_bound + | 0x31 -> DW_AT_abstract_origin + | 0x32 -> DW_AT_accessibility + | 0x33 -> DW_AT_address_class + | 0x34 -> DW_AT_artificial + | 0x35 -> DW_AT_base_types + | 0x36 -> DW_AT_calling_convention + | 0x37 -> DW_AT_count + | 0x38 -> DW_AT_data_member_location + | 0x39 -> DW_AT_decl_column + | 0x3a -> DW_AT_decl_file + | 0x3b -> DW_AT_decl_line + | 0x3c -> DW_AT_declaration + | 0x3d -> DW_AT_discr_list + | 0x3e -> DW_AT_encoding + | 0x3f -> DW_AT_external + | 0x40 -> DW_AT_frame_base + | 0x41 -> DW_AT_friend + | 0x42 -> DW_AT_identifier_case + | 0x43 -> DW_AT_macro_info + | 0x44 -> DW_AT_namelist_item + | 0x45 -> DW_AT_priority + | 0x46 -> DW_AT_segment + | 0x47 -> DW_AT_specification + | 0x48 -> DW_AT_static_link + | 0x49 -> DW_AT_type + | 0x4a -> DW_AT_use_location + | 0x4b -> DW_AT_variable_parameter + | 0x4c -> DW_AT_virtuality + | 0x4d -> DW_AT_vtable_elem_location + | 0x4e -> DW_AT_allocated + | 0x4f -> DW_AT_associated + | 0x50 -> DW_AT_data_location + | 0x51 -> DW_AT_byte_stride + | 0x52 -> DW_AT_entry_pc + | 0x53 -> DW_AT_use_UTF8 + | 0x54 -> DW_AT_extension + | 0x55 -> DW_AT_ranges + | 0x56 -> DW_AT_trampoline + | 0x57 -> DW_AT_call_column + | 0x58 -> DW_AT_call_file + | 0x59 -> DW_AT_call_line + | 0x5a -> DW_AT_description + | 0x5b -> DW_AT_binary_scale + | 0x5c -> DW_AT_decimal_scale + | 0x5d -> DW_AT_small + | 0x5e -> DW_AT_decimal_sign + | 0x5f -> DW_AT_digit_count + | 0x60 -> DW_AT_picture_string + | 0x61 -> DW_AT_mutable + | 0x62 -> DW_AT_threads_scaled + | 0x63 -> DW_AT_explicit + | 0x64 -> DW_AT_object_pointer + | 0x65 -> DW_AT_endianity + | 0x66 -> DW_AT_elemental + | 0x67 -> DW_AT_pure + | 0x68 -> DW_AT_recursive + | 0x2000 -> DW_AT_lo_user + | 0x2300 -> DW_AT_rust_type_code + | 0x2301 -> DW_AT_rust_type_param_index + | 0x2302 -> DW_AT_rust_iterator + | 0x2303 -> DW_AT_rust_native_type_id + | 0x3fff -> DW_AT_hi_user + | _ -> bug () "bad DWARF attribute code: 0x%x" i +;; + +let dw_at_to_string (a:dw_at) : string = + match a with + DW_AT_sibling -> "DW_AT_sibling" + | DW_AT_location -> "DW_AT_location" + | DW_AT_name -> "DW_AT_name" + | DW_AT_ordering -> "DW_AT_ordering" + | DW_AT_byte_size -> "DW_AT_byte_size" + | DW_AT_bit_offset -> "DW_AT_bit_offset" + | DW_AT_bit_size -> "DW_AT_bit_size" + | DW_AT_stmt_list -> "DW_AT_stmt_list" + | DW_AT_low_pc -> "DW_AT_low_pc" + | DW_AT_high_pc -> "DW_AT_high_pc" + | DW_AT_language -> "DW_AT_language" + | DW_AT_discr -> "DW_AT_discr" + | DW_AT_discr_value -> "DW_AT_discr_value" + | DW_AT_visibility -> "DW_AT_visibility" + | DW_AT_import -> "DW_AT_import" + | DW_AT_string_length -> "DW_AT_string_length" + | DW_AT_common_reference -> "DW_AT_common_reference" + | DW_AT_comp_dir -> "DW_AT_comp_dir" + | DW_AT_const_value -> "DW_AT_const_value" + | DW_AT_containing_type -> "DW_AT_containing_type" + | DW_AT_default_value -> "DW_AT_default_value" + | DW_AT_inline -> "DW_AT_inline" + | DW_AT_is_optional -> "DW_AT_is_optional" + | DW_AT_lower_bound -> "DW_AT_lower_bound" + | DW_AT_producer -> "DW_AT_producer" + | DW_AT_prototyped -> "DW_AT_prototyped" + | DW_AT_return_addr -> "DW_AT_return_addr" + | DW_AT_start_scope -> "DW_AT_start_scope" + | DW_AT_bit_stride -> "DW_AT_bit_stride" + | DW_AT_upper_bound -> "DW_AT_upper_bound" + | DW_AT_abstract_origin -> "DW_AT_abstract_origin" + | DW_AT_accessibility -> "DW_AT_accessibility" + | DW_AT_address_class -> "DW_AT_address_class" + | DW_AT_artificial -> "DW_AT_artificial" + | DW_AT_base_types -> "DW_AT_base_types" + | DW_AT_calling_convention -> "DW_AT_calling_convention" + | DW_AT_count -> "DW_AT_count" + | DW_AT_data_member_location -> "DW_AT_data_member_location" + | DW_AT_decl_column -> "DW_AT_decl_column" + | DW_AT_decl_file -> "DW_AT_decl_file" + | DW_AT_decl_line -> "DW_AT_decl_line" + | DW_AT_declaration -> "DW_AT_declaration" + | DW_AT_discr_list -> "DW_AT_discr_list" + | DW_AT_encoding -> "DW_AT_encoding" + | DW_AT_external -> "DW_AT_external" + | DW_AT_frame_base -> "DW_AT_frame_base" + | DW_AT_friend -> "DW_AT_friend" + | DW_AT_identifier_case -> "DW_AT_identifier_case" + | DW_AT_macro_info -> "DW_AT_macro_info" + | DW_AT_namelist_item -> "DW_AT_namelist_item" + | DW_AT_priority -> "DW_AT_priority" + | DW_AT_segment -> "DW_AT_segment" + | DW_AT_specification -> "DW_AT_specification" + | DW_AT_static_link -> "DW_AT_static_link" + | DW_AT_type -> "DW_AT_type" + | DW_AT_use_location -> "DW_AT_use_location" + | DW_AT_variable_parameter -> "DW_AT_variable_parameter" + | DW_AT_virtuality -> "DW_AT_virtuality" + | DW_AT_vtable_elem_location -> "DW_AT_vtable_elem_location" + | DW_AT_allocated -> "DW_AT_allocated" + | DW_AT_associated -> "DW_AT_associated" + | DW_AT_data_location -> "DW_AT_data_location" + | DW_AT_byte_stride -> "DW_AT_byte_stride" + | DW_AT_entry_pc -> "DW_AT_entry_pc" + | DW_AT_use_UTF8 -> "DW_AT_use_UTF8" + | DW_AT_extension -> "DW_AT_extension" + | DW_AT_ranges -> "DW_AT_ranges" + | DW_AT_trampoline -> "DW_AT_trampoline" + | DW_AT_call_column -> "DW_AT_call_column" + | DW_AT_call_file -> "DW_AT_call_file" + | DW_AT_call_line -> "DW_AT_call_line" + | DW_AT_description -> "DW_AT_description" + | DW_AT_binary_scale -> "DW_AT_binary_scale" + | DW_AT_decimal_scale -> "DW_AT_decimal_scale" + | DW_AT_small -> "DW_AT_small" + | DW_AT_decimal_sign -> "DW_AT_decimal_sign" + | DW_AT_digit_count -> "DW_AT_digit_count" + | DW_AT_picture_string -> "DW_AT_picture_string" + | DW_AT_mutable -> "DW_AT_mutable" + | DW_AT_threads_scaled -> "DW_AT_threads_scaled" + | DW_AT_explicit -> "DW_AT_explicit" + | DW_AT_object_pointer -> "DW_AT_object_pointer" + | DW_AT_endianity -> "DW_AT_endianity" + | DW_AT_elemental -> "DW_AT_elemental" + | DW_AT_pure -> "DW_AT_pure" + | DW_AT_recursive -> "DW_AT_recursive" + | DW_AT_lo_user -> "DW_AT_lo_user" + | DW_AT_rust_type_code -> "DW_AT_rust_type_code" + | DW_AT_rust_type_param_index -> "DW_AT_rust_type_param_index" + | DW_AT_rust_iterator -> "DW_AT_rust_iterator" + | DW_AT_rust_native_type_id -> "DW_AT_native_type_id" + | DW_AT_hi_user -> "DW_AT_hi_user" +;; + +(* + * We encode our 'built-in types' using DW_TAG_pointer_type and various + * DW_AT_pointer_type_codes. This seems to be more gdb-compatible than + * the DWARF-recommended way of using DW_TAG_unspecified_type. + *) +type dw_rust_type = + DW_RUST_type_param + | DW_RUST_nil + | DW_RUST_vec + | DW_RUST_chan + | DW_RUST_port + | DW_RUST_task + | DW_RUST_tag + | DW_RUST_iso + | DW_RUST_type + | DW_RUST_native +;; + +let dw_rust_type_to_int (pt:dw_rust_type) : int = + match pt with + DW_RUST_type_param -> 0x1 + | DW_RUST_nil -> 0x2 + | DW_RUST_vec -> 0x3 + | DW_RUST_chan -> 0x4 + | DW_RUST_port -> 0x5 + | DW_RUST_task -> 0x6 + | DW_RUST_tag -> 0x7 + | DW_RUST_iso -> 0x8 + | DW_RUST_type -> 0x9 + | DW_RUST_native -> 0xa +;; + +let dw_rust_type_of_int (i:int) : dw_rust_type = + match i with + 0x1 -> DW_RUST_type_param + | 0x2 -> DW_RUST_nil + | 0x3 -> DW_RUST_vec + | 0x4 -> DW_RUST_chan + | 0x5 -> DW_RUST_port + | 0x6 -> DW_RUST_task + | 0x7 -> DW_RUST_tag + | 0x8 -> DW_RUST_iso + | 0x9 -> DW_RUST_type + | 0xa -> DW_RUST_native + | _ -> bug () "bad DWARF rust-pointer-type code: %d" i +;; + +type dw_ate = + DW_ATE_address + | DW_ATE_boolean + | DW_ATE_complex_float + | DW_ATE_float + | DW_ATE_signed + | DW_ATE_signed_char + | DW_ATE_unsigned + | DW_ATE_unsigned_char + | DW_ATE_imaginary_float + | DW_ATE_packed_decimal + | DW_ATE_numeric_string + | DW_ATE_edited + | DW_ATE_signed_fixed + | DW_ATE_unsigned_fixed + | DW_ATE_decimal_float + | DW_ATE_lo_user + | DW_ATE_hi_user +;; + +let dw_ate_to_int (ate:dw_ate) : int = + match ate with + DW_ATE_address -> 0x01 + | DW_ATE_boolean -> 0x02 + | DW_ATE_complex_float -> 0x03 + | DW_ATE_float -> 0x04 + | DW_ATE_signed -> 0x05 + | DW_ATE_signed_char -> 0x06 + | DW_ATE_unsigned -> 0x07 + | DW_ATE_unsigned_char -> 0x08 + | DW_ATE_imaginary_float -> 0x09 + | DW_ATE_packed_decimal -> 0x0a + | DW_ATE_numeric_string -> 0x0b + | DW_ATE_edited -> 0x0c + | DW_ATE_signed_fixed -> 0x0d + | DW_ATE_unsigned_fixed -> 0x0e + | DW_ATE_decimal_float -> 0x0f + | DW_ATE_lo_user -> 0x80 + | DW_ATE_hi_user -> 0xff +;; + +let dw_ate_of_int (i:int) : dw_ate = + match i with + 0x01 -> DW_ATE_address + | 0x02 -> DW_ATE_boolean + | 0x03 -> DW_ATE_complex_float + | 0x04 -> DW_ATE_float + | 0x05 -> DW_ATE_signed + | 0x06 -> DW_ATE_signed_char + | 0x07 -> DW_ATE_unsigned + | 0x08 -> DW_ATE_unsigned_char + | 0x09 -> DW_ATE_imaginary_float + | 0x0a -> DW_ATE_packed_decimal + | 0x0b -> DW_ATE_numeric_string + | 0x0c -> DW_ATE_edited + | 0x0d -> DW_ATE_signed_fixed + | 0x0e -> DW_ATE_unsigned_fixed + | 0x0f -> DW_ATE_decimal_float + | 0x80 -> DW_ATE_lo_user + | 0xff -> DW_ATE_hi_user + | _ -> bug () "bad DWARF attribute-encoding code: %d" i +;; + +type dw_form = + | DW_FORM_addr + | DW_FORM_block2 + | DW_FORM_block4 + | DW_FORM_data2 + | DW_FORM_data4 + | DW_FORM_data8 + | DW_FORM_string + | DW_FORM_block + | DW_FORM_block1 + | DW_FORM_data1 + | DW_FORM_flag + | DW_FORM_sdata + | DW_FORM_strp + | DW_FORM_udata + | DW_FORM_ref_addr + | DW_FORM_ref1 + | DW_FORM_ref2 + | DW_FORM_ref4 + | DW_FORM_ref8 + | DW_FORM_ref_udata + | DW_FORM_indirect +;; + + +let dw_form_to_int (f:dw_form) : int = + match f with + | DW_FORM_addr -> 0x01 + | DW_FORM_block2 -> 0x03 + | DW_FORM_block4 -> 0x04 + | DW_FORM_data2 -> 0x05 + | DW_FORM_data4 -> 0x06 + | DW_FORM_data8 -> 0x07 + | DW_FORM_string -> 0x08 + | DW_FORM_block -> 0x09 + | DW_FORM_block1 -> 0x0a + | DW_FORM_data1 -> 0x0b + | DW_FORM_flag -> 0x0c + | DW_FORM_sdata -> 0x0d + | DW_FORM_strp -> 0x0e + | DW_FORM_udata -> 0x0f + | DW_FORM_ref_addr -> 0x10 + | DW_FORM_ref1 -> 0x11 + | DW_FORM_ref2 -> 0x12 + | DW_FORM_ref4 -> 0x13 + | DW_FORM_ref8 -> 0x14 + | DW_FORM_ref_udata -> 0x15 + | DW_FORM_indirect -> 0x16 +;; + +let dw_form_of_int (i:int) : dw_form = + match i with + | 0x01 -> DW_FORM_addr + | 0x03 -> DW_FORM_block2 + | 0x04 -> DW_FORM_block4 + | 0x05 -> DW_FORM_data2 + | 0x06 -> DW_FORM_data4 + | 0x07 -> DW_FORM_data8 + | 0x08 -> DW_FORM_string + | 0x09 -> DW_FORM_block + | 0x0a -> DW_FORM_block1 + | 0x0b -> DW_FORM_data1 + | 0x0c -> DW_FORM_flag + | 0x0d -> DW_FORM_sdata + | 0x0e -> DW_FORM_strp + | 0x0f -> DW_FORM_udata + | 0x10 -> DW_FORM_ref_addr + | 0x11 -> DW_FORM_ref1 + | 0x12 -> DW_FORM_ref2 + | 0x13 -> DW_FORM_ref4 + | 0x14 -> DW_FORM_ref8 + | 0x15 -> DW_FORM_ref_udata + | 0x16 -> DW_FORM_indirect + | _ -> bug () "bad DWARF form code: 0x%x" i +;; + +let dw_form_to_string (f:dw_form) : string = + match f with + | DW_FORM_addr -> "DW_FORM_addr" + | DW_FORM_block2 -> "DW_FORM_block2" + | DW_FORM_block4 -> "DW_FORM_block4" + | DW_FORM_data2 -> "DW_FORM_data2" + | DW_FORM_data4 -> "DW_FORM_data4" + | DW_FORM_data8 -> "DW_FORM_data8" + | DW_FORM_string -> "DW_FORM_string" + | DW_FORM_block -> "DW_FORM_block" + | DW_FORM_block1 -> "DW_FORM_block1" + | DW_FORM_data1 -> "DW_FORM_data1" + | DW_FORM_flag -> "DW_FORM_flag" + | DW_FORM_sdata -> "DW_FORM_sdata" + | DW_FORM_strp -> "DW_FORM_strp" + | DW_FORM_udata -> "DW_FORM_udata" + | DW_FORM_ref_addr -> "DW_FORM_ref_addr" + | DW_FORM_ref1 -> "DW_FORM_ref1" + | DW_FORM_ref2 -> "DW_FORM_ref2" + | DW_FORM_ref4 -> "DW_FORM_ref4" + | DW_FORM_ref8 -> "DW_FORM_ref8" + | DW_FORM_ref_udata -> "DW_FORM_ref_udata" + | DW_FORM_indirect -> "DW_FORM_indirect" +;; + +type dw_op = + DW_OP_lit of int + | DW_OP_addr of Asm.expr64 + | DW_OP_const1u of Asm.expr64 + | DW_OP_const1s of Asm.expr64 + | DW_OP_const2u of Asm.expr64 + | DW_OP_const2s of Asm.expr64 + | DW_OP_const4u of Asm.expr64 + | DW_OP_const4s of Asm.expr64 + | DW_OP_const8u of Asm.expr64 + | DW_OP_const8s of Asm.expr64 + | DW_OP_constu of Asm.expr64 + | DW_OP_consts of Asm.expr64 + | DW_OP_fbreg of Asm.expr64 + | DW_OP_reg of int + | DW_OP_regx of Asm.expr64 + | DW_OP_breg of (int * Asm.expr64) + | DW_OP_bregx of (Asm.expr64 * Asm.expr64) + | DW_OP_dup + | DW_OP_drop + | DW_OP_pick of Asm.expr64 + | DW_OP_over + | DW_OP_swap + | DW_OP_rot + | DW_OP_piece of Asm.expr64 + | DW_OP_bit_piece of (Asm.expr64 * Asm.expr64) + | DW_OP_deref + | DW_OP_deref_size of Asm.expr64 + | DW_OP_xderef + | DW_OP_xderef_size of Asm.expr64 + | DW_OP_push_object_address + | DW_OP_form_tls_address + | DW_OP_call_frame_cfa + | DW_OP_abs + | DW_OP_and + | DW_OP_div + | DW_OP_minus + | DW_OP_mod + | DW_OP_mul + | DW_OP_neg + | DW_OP_not + | DW_OP_or + | DW_OP_plus + | DW_OP_plus_uconst of Asm.expr64 + | DW_OP_shl + | DW_OP_shr + | DW_OP_shra + | DW_OP_xor + | DW_OP_le + | DW_OP_ge + | DW_OP_eq + | DW_OP_lt + | DW_OP_gt + | DW_OP_ne + | DW_OP_skip of Asm.expr64 + | DW_OP_bra of Asm.expr64 + | DW_OP_call2 of Asm.expr64 + | DW_OP_call4 of Asm.expr64 + | DW_OP_call_ref of Asm.expr64 + | DW_OP_nop +;; + +let dw_op_to_frag (abi:Abi.abi) (op:dw_op) : Asm.frag = + match op with + + DW_OP_addr e -> SEQ [| BYTE 0x03; WORD (abi.Abi.abi_word_ty, e) |] + | DW_OP_deref -> BYTE 0x06 + | DW_OP_const1u e -> SEQ [| BYTE 0x08; WORD (TY_u8, e) |] + | DW_OP_const1s e -> SEQ [| BYTE 0x09; WORD (TY_i8, e) |] + | DW_OP_const2u e -> SEQ [| BYTE 0x0a; WORD (TY_u16, e) |] + | DW_OP_const2s e -> SEQ [| BYTE 0x0b; WORD (TY_i16, e) |] + | DW_OP_const4u e -> SEQ [| BYTE 0x0c; WORD (TY_u32, e) |] + | DW_OP_const4s e -> SEQ [| BYTE 0x0d; WORD (TY_i32, e) |] + | DW_OP_const8u e -> SEQ [| BYTE 0x0e; WORD (TY_u64, e) |] + | DW_OP_const8s e -> SEQ [| BYTE 0x0f; WORD (TY_i64, e) |] + | DW_OP_constu e -> SEQ [| BYTE 0x10; ULEB128 e |] + | DW_OP_consts e -> SEQ [| BYTE 0x11; SLEB128 e |] + | DW_OP_dup -> BYTE 0x12 + | DW_OP_drop -> BYTE 0x13 + | DW_OP_over -> BYTE 0x14 + | DW_OP_pick e -> SEQ [| BYTE 0x15; WORD (TY_u8, e) |] + | DW_OP_swap -> BYTE 0x16 + | DW_OP_rot -> BYTE 0x17 + | DW_OP_xderef -> BYTE 0x18 + | DW_OP_abs -> BYTE 0x19 + | DW_OP_and -> BYTE 0x1a + | DW_OP_div -> BYTE 0x1b + | DW_OP_minus -> BYTE 0x1c + | DW_OP_mod -> BYTE 0x1d + | DW_OP_mul -> BYTE 0x1e + | DW_OP_neg -> BYTE 0x1f + | DW_OP_not -> BYTE 0x20 + | DW_OP_or -> BYTE 0x21 + | DW_OP_plus -> BYTE 0x22 + | DW_OP_plus_uconst e -> SEQ [| BYTE 0x23; ULEB128 e |] + | DW_OP_shl -> BYTE 0x24 + | DW_OP_shr -> BYTE 0x25 + | DW_OP_shra -> BYTE 0x26 + | DW_OP_xor -> BYTE 0x27 + | DW_OP_skip e -> SEQ [| BYTE 0x2f; WORD (TY_i16, e) |] + | DW_OP_bra e -> SEQ [| BYTE 0x28; WORD (TY_i16, e) |] + | DW_OP_eq -> BYTE 0x29 + | DW_OP_ge -> BYTE 0x2a + | DW_OP_gt -> BYTE 0x2b + | DW_OP_le -> BYTE 0x2c + | DW_OP_lt -> BYTE 0x2d + | DW_OP_ne -> BYTE 0x2e + + | DW_OP_lit i -> + assert (0 <= i && i < 32); + BYTE (i + 0x30) + + | DW_OP_reg i -> + assert (0 <= i && i < 32); + BYTE (i + 0x50) + + | DW_OP_breg (i, e) -> + assert (0 <= i && i < 32); + SEQ [| BYTE (i + 0x70); SLEB128 e |] + + | DW_OP_regx e -> SEQ [| BYTE 0x90; ULEB128 e|] + | DW_OP_fbreg e -> SEQ [| BYTE 0x91; SLEB128 e |] + | DW_OP_bregx (r, off) -> SEQ [| BYTE 0x92; ULEB128 r; SLEB128 off |] + | DW_OP_piece e -> SEQ [| BYTE 0x93; ULEB128 e |] + | DW_OP_deref_size e -> SEQ [| BYTE 0x94; WORD (TY_u8, e) |] + | DW_OP_xderef_size e -> SEQ [| BYTE 0x95; WORD (TY_u8, e) |] + | DW_OP_nop -> BYTE 0x96 + | DW_OP_push_object_address -> BYTE 0x97 + | DW_OP_call2 e -> SEQ [| BYTE 0x98; WORD (TY_u16, e) |] + | DW_OP_call4 e -> SEQ [| BYTE 0x99; WORD (TY_u32, e) |] + | DW_OP_call_ref e -> SEQ [| BYTE 0x9a; WORD (abi.Abi.abi_word_ty, e) |] + | DW_OP_form_tls_address -> BYTE 0x9b + | DW_OP_call_frame_cfa -> BYTE 0x9c + | DW_OP_bit_piece (sz, off) -> + SEQ [| BYTE 0x9d; ULEB128 sz; ULEB128 off |] +;; + +type dw_lns = + DW_LNS_copy + | DW_LNS_advance_pc + | DW_LNS_advance_line + | DW_LNS_set_file + | DW_LNS_set_column + | DW_LNS_negage_stmt + | DW_LNS_set_basic_block + | DW_LNS_const_add_pc + | DW_LNS_fixed_advance_pc + | DW_LNS_set_prologue_end + | DW_LNS_set_epilogue_begin + | DW_LNS_set_isa +;; + +let int_to_dw_lns i = + match i with + 1 -> DW_LNS_copy + | 2 -> DW_LNS_advance_pc + | 3 -> DW_LNS_advance_line + | 4 -> DW_LNS_set_file + | 5 -> DW_LNS_set_column + | 6 -> DW_LNS_negage_stmt + | 7 -> DW_LNS_set_basic_block + | 8 -> DW_LNS_const_add_pc + | 9 -> DW_LNS_fixed_advance_pc + | 10 -> DW_LNS_set_prologue_end + | 11 -> DW_LNS_set_epilogue_begin + | 12 -> DW_LNS_set_isa + | _ -> bug () "Internal logic error: (Dwarf.int_to_dw_lns %d)" i +;; + +let dw_lns_to_int lns = + match lns with + DW_LNS_copy -> 1 + | DW_LNS_advance_pc -> 2 + | DW_LNS_advance_line -> 3 + | DW_LNS_set_file -> 4 + | DW_LNS_set_column -> 5 + | DW_LNS_negage_stmt -> 6 + | DW_LNS_set_basic_block -> 7 + | DW_LNS_const_add_pc -> 8 + | DW_LNS_fixed_advance_pc -> 9 + | DW_LNS_set_prologue_end -> 10 + | DW_LNS_set_epilogue_begin -> 11 + | DW_LNS_set_isa -> 12 +;; + +let max_dw_lns = 12;; + +let dw_lns_arity lns = + match lns with + DW_LNS_copy -> 0 + | DW_LNS_advance_pc -> 1 + | DW_LNS_advance_line -> 1 + | DW_LNS_set_file -> 1 + | DW_LNS_set_column -> 1 + | DW_LNS_negage_stmt -> 0 + | DW_LNS_set_basic_block -> 0 + | DW_LNS_const_add_pc -> 0 + | DW_LNS_fixed_advance_pc -> 1 + | DW_LNS_set_prologue_end -> 0 + | DW_LNS_set_epilogue_begin -> 0 + | DW_LNS_set_isa -> 1 +;; + +type debug_records = + { + debug_aranges: Asm.frag; + debug_pubnames: Asm.frag; + debug_info: Asm.frag; + debug_abbrev: Asm.frag; + debug_line: Asm.frag; + debug_frame: Asm.frag; + } + +type abbrev = (dw_tag * dw_children * ((dw_at * dw_form) array));; + +let (abbrev_crate_cu:abbrev) = + (DW_TAG_compile_unit, DW_CHILDREN_yes, + [| + (DW_AT_producer, DW_FORM_string); + (DW_AT_language, DW_FORM_data4); + (DW_AT_name, DW_FORM_string); + (DW_AT_comp_dir, DW_FORM_string); + (DW_AT_low_pc, DW_FORM_addr); + (DW_AT_high_pc, DW_FORM_addr); + (DW_AT_use_UTF8, DW_FORM_flag) + |]) + ;; + +let (abbrev_meta:abbrev) = + (DW_TAG_rust_meta, DW_CHILDREN_no, + [| + (DW_AT_name, DW_FORM_string); + (DW_AT_const_value, DW_FORM_string) + |]) +;; + +let (abbrev_srcfile_cu:abbrev) = + (DW_TAG_compile_unit, DW_CHILDREN_yes, + [| + (DW_AT_name, DW_FORM_string); + (DW_AT_comp_dir, DW_FORM_string); + (DW_AT_low_pc, DW_FORM_addr); + (DW_AT_high_pc, DW_FORM_addr); + |]) +;; + + +let (abbrev_module:abbrev) = + (DW_TAG_module, DW_CHILDREN_yes, + [| + (DW_AT_name, DW_FORM_string); + |]) +;; + +let (abbrev_subprogram:abbrev) = + (DW_TAG_subprogram, DW_CHILDREN_yes, + [| + (DW_AT_name, DW_FORM_string); + (DW_AT_type, DW_FORM_ref_addr); + (DW_AT_low_pc, DW_FORM_addr); + (DW_AT_high_pc, DW_FORM_addr); + (DW_AT_frame_base, DW_FORM_block1); + (DW_AT_return_addr, DW_FORM_block1); + (DW_AT_mutable, DW_FORM_flag); + (DW_AT_pure, DW_FORM_flag); + |]) +;; + +let (abbrev_typedef:abbrev) = + (DW_TAG_typedef, DW_CHILDREN_yes, + [| + (DW_AT_name, DW_FORM_string); + (DW_AT_type, DW_FORM_ref_addr) + |]) +;; + +let (abbrev_lexical_block:abbrev) = + (DW_TAG_lexical_block, DW_CHILDREN_yes, + [| + (DW_AT_low_pc, DW_FORM_addr); + (DW_AT_high_pc, DW_FORM_addr); + |]) +;; + +let (abbrev_variable:abbrev) = + (DW_TAG_variable, DW_CHILDREN_no, + [| + (DW_AT_name, DW_FORM_string); + (DW_AT_location, DW_FORM_block1); + (DW_AT_type, DW_FORM_ref_addr) + |]) +;; + +(* NB: must have same abbrev-body as abbrev_variable. *) +let (abbrev_formal:abbrev) = + (DW_TAG_formal_parameter, DW_CHILDREN_no, + [| + (DW_AT_name, DW_FORM_string); + (DW_AT_location, DW_FORM_block1); + (DW_AT_type, DW_FORM_ref_addr) + |]) +;; + +let (abbrev_unspecified_anon_structure_type:abbrev) = + (DW_TAG_structure_type, DW_CHILDREN_no, + [| + (DW_AT_declaration, DW_FORM_flag); + |]) +;; + +let (abbrev_unspecified_structure_type:abbrev) = + (DW_TAG_structure_type, DW_CHILDREN_no, + [| + (DW_AT_rust_type_code, DW_FORM_data1); + (DW_AT_declaration, DW_FORM_flag); + |]) +;; + +let (abbrev_unspecified_pointer_type:abbrev) = + (DW_TAG_pointer_type, DW_CHILDREN_no, + [| + (DW_AT_rust_type_code, DW_FORM_data1); + (DW_AT_declaration, DW_FORM_flag); + (DW_AT_type, DW_FORM_ref_addr) + |]) +;; + +let (abbrev_native_pointer_type:abbrev) = + (DW_TAG_pointer_type, DW_CHILDREN_no, + [| + (DW_AT_rust_type_code, DW_FORM_data1); + (DW_AT_rust_native_type_id, DW_FORM_data4) + |]) +;; + +let (abbrev_rust_type_param:abbrev) = + (DW_TAG_pointer_type, DW_CHILDREN_no, + [| + (DW_AT_rust_type_code, DW_FORM_data1); + (DW_AT_rust_type_param_index, DW_FORM_data4); + (DW_AT_mutable, DW_FORM_flag); + (DW_AT_pure, DW_FORM_flag); + |]) +;; + +let (abbrev_rust_type_param_decl:abbrev) = + (DW_TAG_formal_parameter, DW_CHILDREN_no, + [| + (DW_AT_rust_type_code, DW_FORM_data1); + (DW_AT_name, DW_FORM_string); + (DW_AT_rust_type_param_index, DW_FORM_data4); + (DW_AT_mutable, DW_FORM_flag); + (DW_AT_pure, DW_FORM_flag); + |]) +;; + +let (abbrev_base_type:abbrev) = + (DW_TAG_base_type, DW_CHILDREN_no, + [| + (DW_AT_name, DW_FORM_string); + (DW_AT_encoding, DW_FORM_data1); + (DW_AT_byte_size, DW_FORM_data1) + |]) +;; + +let (abbrev_alias_slot:abbrev) = + (DW_TAG_reference_type, DW_CHILDREN_no, + [| + (DW_AT_type, DW_FORM_ref_addr); + (DW_AT_mutable, DW_FORM_flag); + |]) +;; + +let (abbrev_exterior_slot:abbrev) = + (DW_TAG_reference_type, DW_CHILDREN_no, + [| + (DW_AT_type, DW_FORM_ref_addr); + (DW_AT_mutable, DW_FORM_flag); + (DW_AT_data_location, DW_FORM_block1); + |]) +;; + +let (abbrev_struct_type:abbrev) = + (DW_TAG_structure_type, DW_CHILDREN_yes, + [| + (DW_AT_byte_size, DW_FORM_block4) + |]) +;; + +let (abbrev_struct_type_member:abbrev) = + (DW_TAG_member, DW_CHILDREN_no, + [| + (DW_AT_name, DW_FORM_string); + (DW_AT_type, DW_FORM_ref_addr); + (DW_AT_mutable, DW_FORM_flag); + (DW_AT_data_member_location, DW_FORM_block4); + (DW_AT_byte_size, DW_FORM_block4) + |]) +;; + +let (abbrev_subroutine_type:abbrev) = + (DW_TAG_subroutine_type, DW_CHILDREN_yes, + [| + (* FIXME: model effects properly. *) + (DW_AT_type, DW_FORM_ref_addr); (* NB: output type. *) + (DW_AT_mutable, DW_FORM_flag); + (DW_AT_pure, DW_FORM_flag); + (DW_AT_rust_iterator, DW_FORM_flag); + |]) +;; + +let (abbrev_formal_type:abbrev) = + (DW_TAG_formal_parameter, DW_CHILDREN_no, + [| + (DW_AT_type, DW_FORM_ref_addr) + |]) +;; + + +let (abbrev_obj_subroutine_type:abbrev) = + (DW_TAG_subroutine_type, DW_CHILDREN_yes, + [| + (* FIXME: model effects properly. *) + (DW_AT_name, DW_FORM_string); + (DW_AT_type, DW_FORM_ref_addr); (* NB: output type. *) + (DW_AT_mutable, DW_FORM_flag); + (DW_AT_pure, DW_FORM_flag); + (DW_AT_rust_iterator, DW_FORM_flag); + |]) +;; + +let (abbrev_obj_type:abbrev) = + (DW_TAG_interface_type, DW_CHILDREN_yes, + [| + (DW_AT_mutable, DW_FORM_flag); + (DW_AT_pure, DW_FORM_flag); + |]) +;; + +let (abbrev_string_type:abbrev) = + (DW_TAG_string_type, DW_CHILDREN_no, + [| + (DW_AT_string_length, DW_FORM_block1); + (DW_AT_data_location, DW_FORM_block1); + |]) +;; + + +let prepend lref x = lref := x :: (!lref) +;; + + +let dwarf_visitor + (cx:ctxt) + (inner:Walk.visitor) + (path:Ast.name_component Stack.t) + (cu_info_section_fixup:fixup) + (cu_aranges:(frag list) ref) + (cu_pubnames:(frag list) ref) + (cu_infos:(frag list) ref) + (cu_abbrevs:(frag list) ref) + (cu_lines:(frag list) ref) + (cu_frames:(frag list) ref) + : Walk.visitor = + + let (abi:Abi.abi) = cx.ctxt_abi in + let (word_sz:int64) = abi.Abi.abi_word_sz in + let (word_sz_int:int) = Int64.to_int word_sz in + let (word_bits:Il.bits) = abi.Abi.abi_word_bits in + let (word_ty_mach:ty_mach) = + match word_bits with + Il.Bits8 -> TY_u8 + | Il.Bits16 -> TY_u16 + | Il.Bits32 -> TY_u32 + | Il.Bits64 -> TY_u64 + in + let (signed_word_ty_mach:ty_mach) = + match word_bits with + Il.Bits8 -> TY_i8 + | Il.Bits16 -> TY_i16 + | Il.Bits32 -> TY_i32 + | Il.Bits64 -> TY_i64 + in + + let path_name _ = Ast.fmt_to_str Ast.fmt_name (Walk.path_to_name path) in + + let (abbrev_table:(abbrev, int) Hashtbl.t) = Hashtbl.create 0 in + + let uleb i = ULEB128 (IMM (Int64.of_int i)) in + + let get_abbrev_code + (ab:abbrev) + : int = + if Hashtbl.mem abbrev_table ab + then Hashtbl.find abbrev_table ab + else + let n = (Hashtbl.length abbrev_table) + 1 in + let (tag, children, attrs) = ab in + let attr_ulebs = Array.create ((Array.length attrs) * 2) MARK in + for i = 0 to (Array.length attrs) - 1 do + let (attr, form) = attrs.(i) in + attr_ulebs.(2*i) <- uleb (dw_at_to_int attr); + attr_ulebs.((2*i)+1) <- uleb (dw_form_to_int form) + done; + let ab_frag = + (SEQ [| + uleb n; + uleb (dw_tag_to_int tag); + BYTE (dw_children_to_int children); + SEQ attr_ulebs; + uleb 0; uleb 0; + |]) + in + prepend cu_abbrevs ab_frag; + htab_put abbrev_table ab n; + n + in + + let (curr_cu_aranges:(frag list) ref) = ref [] in + let (curr_cu_pubnames:(frag list) ref) = ref [] in + let (curr_cu_infos:(frag list) ref) = ref [] in + let (curr_cu_line:(frag list) ref) = ref [] in + let (curr_cu_frame:(frag list) ref) = ref [] in + + let emit_die die = prepend curr_cu_infos die in + let emit_null_die _ = emit_die (BYTE 0) in + + let dw_form_block1 (ops:dw_op array) : Asm.frag = + let frag = SEQ (Array.map (dw_op_to_frag abi) ops) in + let block_fixup = new_fixup "DW_FORM_block1 fixup" in + SEQ [| WORD (TY_u8, F_SZ block_fixup); + DEF (block_fixup, frag) |] + in + + let dw_form_ref_addr (fix:fixup) : Asm.frag = + WORD (signed_word_ty_mach, + SUB ((M_POS fix), M_POS cu_info_section_fixup)) + in + + let encode_effect eff = + (* Note: weird encoding: mutable+pure = unsafe. *) + let mut_byte, pure_byte = + match eff with + Ast.UNSAFE -> (1,1) + | Ast.STATE -> (1,0) + | Ast.IO -> (0,0) + | Ast.PURE -> (0,1) + in + SEQ [| + (* DW_AT_mutable: DW_FORM_flag *) + BYTE mut_byte; + (* DW_AT_pure: DW_FORM_flag *) + BYTE pure_byte; + |] + in + + (* Type-param DIEs. *) + + let type_param_die (p:(ty_param_idx * Ast.effect)) = + let (idx, eff) = p in + SEQ [| + uleb (get_abbrev_code abbrev_rust_type_param); + (* DW_AT_rust_type_code: DW_FORM_data1 *) + BYTE (dw_rust_type_to_int DW_RUST_type_param); + (* DW_AT_rust_type_param_index: DW_FORM_data4 *) + WORD (word_ty_mach, IMM (Int64.of_int idx)); + encode_effect eff; + |] + in + + (* Type DIEs. *) + + let (emitted_types:(Ast.ty, Asm.frag) Hashtbl.t) = Hashtbl.create 0 in + let (emitted_slots:(Ast.slot, Asm.frag) Hashtbl.t) = Hashtbl.create 0 in + + let rec ref_slot_die + (slot:Ast.slot) + : frag = + if Hashtbl.mem emitted_slots slot + then Hashtbl.find emitted_slots slot + else + let ref_addr_for_fix fix = + let res = dw_form_ref_addr fix in + Hashtbl.add emitted_slots slot res; + res + in + + match slot.Ast.slot_mode with + Ast.MODE_exterior -> + let fix = new_fixup "exterior DIE" in + let body_off = + word_sz_int * Abi.exterior_rc_slot_field_body + in + emit_die (DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_exterior_slot); + (* DW_AT_type: DW_FORM_ref_addr *) + (ref_type_die (slot_ty slot)); + (* DW_AT_mutable: DW_FORM_flag *) + BYTE (if slot.Ast.slot_mutable + then 1 else 0); + (* DW_AT_data_location: DW_FORM_block1 *) + (* This is a DWARF expression for moving + from the address of an exterior + allocation to the address of its + body. *) + dw_form_block1 + [| DW_OP_push_object_address; + DW_OP_lit body_off; + DW_OP_plus; + DW_OP_deref |] + |])); + ref_addr_for_fix fix + + (* FIXME: encode mutable-ness of interiors. *) + | Ast.MODE_interior -> ref_type_die (slot_ty slot) + + | Ast.MODE_alias -> + let fix = new_fixup "alias DIE" in + emit_die (DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_alias_slot); + (* DW_AT_type: DW_FORM_ref_addr *) + (ref_type_die (slot_ty slot)); + (* DW_AT_mutable: DW_FORM_flag *) + BYTE (if slot.Ast.slot_mutable then 1 else 0) + |])); + ref_addr_for_fix fix + + + and size_block4 (sz:size) (add_to_base:bool) : frag = + (* NB: typarams = "words following implicit args" by convention in + * ABI/x86. + *) + let abi = cx.ctxt_abi in + let typarams = + Int64.add abi.Abi.abi_frame_base_sz abi.Abi.abi_implicit_args_sz + in + let word_n n = Int64.mul abi.Abi.abi_word_sz (Int64.of_int n) in + let param_n n = Int64.add typarams (word_n n) in + let param_n_field_k n k = + [ DW_OP_fbreg (IMM (param_n n)); + DW_OP_deref; + DW_OP_constu (IMM (word_n k)); + DW_OP_plus; + DW_OP_deref ] + in + let rec sz_ops (sz:size) : dw_op list = + match sz with + SIZE_fixed i -> + [ DW_OP_constu (IMM i) ] + + | SIZE_fixup_mem_sz fix -> + [ DW_OP_constu (M_SZ fix) ] + + | SIZE_fixup_mem_pos fix -> + [ DW_OP_constu (M_POS fix) ] + + | SIZE_param_size i -> + param_n_field_k i Abi.tydesc_field_size + + | SIZE_param_align i -> + param_n_field_k i Abi.tydesc_field_align + + | SIZE_rt_neg s -> + (sz_ops s) @ [ DW_OP_neg ] + + | SIZE_rt_add (a, b) -> + (sz_ops a) @ (sz_ops b) @ [ DW_OP_plus ] + + | SIZE_rt_mul (a, b) -> + (sz_ops a) @ (sz_ops b) @ [ DW_OP_mul ] + + | SIZE_rt_max (a, b) -> + (sz_ops a) @ (sz_ops b) @ + [ DW_OP_over; (* ... a b a *) + DW_OP_over; (* ... a b a b *) + DW_OP_ge; (* ... a b (a>=b?1:0) *) + + (* jump +1 byte of dwarf ops if 1 *) + DW_OP_bra (IMM 1L); + + (* do this if 0, when b is max. *) + DW_OP_swap; (* ... b a *) + + (* jump to here when a is max. *) + DW_OP_drop; (* ... max *) + ] + + | SIZE_rt_align (align, off) -> + (* + * calculate off + pad where: + * + * pad = (align - (off mod align)) mod align + * + * In our case it's always a power of two, + * so we can just do: + * + * mask = align-1 + * off += mask + * off &= ~mask + * + *) + (sz_ops off) @ (sz_ops align) @ + [ + DW_OP_lit 1; (* ... off align 1 *) + DW_OP_minus; (* ... off mask *) + DW_OP_dup; (* ... off mask mask *) + DW_OP_not; (* ... off mask ~mask *) + DW_OP_rot; (* ... ~mask off mask *) + DW_OP_plus; (* ... ~mask (off+mask) *) + DW_OP_and; (* ... aligned *) + ] + in + let ops = sz_ops sz in + let ops = + if add_to_base + then ops @ [ DW_OP_plus ] + else ops + in + let frag = SEQ (Array.map (dw_op_to_frag abi) (Array.of_list ops)) in + let block_fixup = new_fixup "DW_FORM_block4 fixup" in + SEQ [| WORD (TY_u32, F_SZ block_fixup); + DEF (block_fixup, frag) |] + + + and ref_type_die + (ty:Ast.ty) + : frag = + (* Returns a DW_FORM_ref_addr to the type. *) + if Hashtbl.mem emitted_types ty + then Hashtbl.find emitted_types ty + else + let ref_addr_for_fix fix = + let res = dw_form_ref_addr fix in + Hashtbl.add emitted_types ty res; + res + in + + let record trec = + let rty = referent_type abi (Ast.TY_rec trec) in + let rty_sz = Il.referent_ty_size abi.Abi.abi_word_bits in + let fix = new_fixup "record type DIE" in + let die = DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_struct_type); + (* DW_AT_byte_size: DW_FORM_block4 *) + size_block4 (rty_sz rty) false + |]); + in + let rtys = + match rty with + Il.StructTy rtys -> rtys + | _ -> bug () "record type became non-struct referent_ty" + in + emit_die die; + Array.iteri + begin + fun i (ident, slot) -> + emit_die (SEQ [| + uleb (get_abbrev_code abbrev_struct_type_member); + (* DW_AT_name: DW_FORM_string *) + ZSTRING ident; + (* DW_AT_type: DW_FORM_ref_addr *) + (ref_slot_die slot); + (* DW_AT_mutable: DW_FORM_flag *) + BYTE (if slot.Ast.slot_mutable then 1 else 0); + (* DW_AT_data_member_location: DW_FORM_block4 *) + size_block4 + (Il.get_element_offset word_bits rtys i) + true; + (* DW_AT_byte_size: DW_FORM_block4 *) + size_block4 (rty_sz rtys.(i)) false |]); + end + trec; + emit_null_die (); + ref_addr_for_fix fix + in + + let string_type _ = + (* + * Strings, like vecs, are &[rc,alloc,fill,data...] + *) + let fix = new_fixup "string type DIE" in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_string_type); + (* (DW_AT_byte_size, DW_FORM_block1); *) + dw_form_block1 [| DW_OP_push_object_address; + DW_OP_deref; + DW_OP_lit (word_sz_int * 2); + DW_OP_plus; |]; + (* (DW_AT_data_location, DW_FORM_block1); *) + dw_form_block1 [| DW_OP_push_object_address; + DW_OP_deref; + DW_OP_lit (word_sz_int * 3); + DW_OP_plus |] + |]) + in + emit_die die; + ref_addr_for_fix fix + in + + let base (name, encoding, byte_size) = + let fix = new_fixup ("base type DIE: " ^ name) in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_base_type); + (* DW_AT_name: DW_FORM_string *) + ZSTRING name; + (* DW_AT_encoding: DW_FORM_data1 *) + BYTE (dw_ate_to_int encoding); + (* DW_AT_byte_size: DW_FORM_data1 *) + BYTE byte_size + |]) + in + emit_die die; + ref_addr_for_fix fix + in + + let unspecified_anon_struct _ = + let fix = new_fixup "unspecified-anon-struct DIE" in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code + abbrev_unspecified_anon_structure_type); + (* DW_AT_declaration: DW_FORM_flag *) + BYTE 1; + |]) + in + emit_die die; + ref_addr_for_fix fix + in + + let unspecified_struct rust_ty = + let fix = new_fixup "unspecified-struct DIE" in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_unspecified_structure_type); + (* DW_AT_rust_type_code: DW_FORM_data1 *) + BYTE (dw_rust_type_to_int rust_ty); + (* DW_AT_declaration: DW_FORM_flag *) + BYTE 1; + |]) + in + emit_die die; + ref_addr_for_fix fix + in + + let rust_type_param (p:(ty_param_idx * Ast.effect)) = + let fix = new_fixup "rust-type-param DIE" in + let die = DEF (fix, type_param_die p) in + emit_die die; + ref_addr_for_fix fix + in + + let unspecified_ptr_with_ref rust_ty ref_addr = + let fix = new_fixup ("unspecified-pointer-type-with-ref DIE") in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_unspecified_pointer_type); + (* DW_AT_rust_type_code: DW_FORM_data1 *) + BYTE (dw_rust_type_to_int rust_ty); + (* DW_AT_declaration: DW_FORM_flag *) + BYTE 1; + (* DW_AT_type: DW_FORM_ref_addr *) + ref_addr + |]) + in + emit_die die; + ref_addr_for_fix fix + in + + let formal_type slot = + let fix = new_fixup "formal type" in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_formal_type); + (* DW_AT_type: DW_FORM_ref_addr *) + (ref_slot_die slot); + |]) + in + emit_die die; + ref_addr_for_fix fix + in + + let fn_type tfn = + let (tsig, taux) = tfn in + let fix = new_fixup "fn type" in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_subroutine_type); + (* DW_AT_type: DW_FORM_ref_addr *) + (ref_slot_die tsig.Ast.sig_output_slot); + encode_effect taux.Ast.fn_effect; + (* DW_AT_rust_iterator: DW_FORM_flag *) + BYTE (if taux.Ast.fn_is_iter then 1 else 0) + |]) + in + emit_die die; + Array.iter + (fun s -> ignore (formal_type s)) + tsig.Ast.sig_input_slots; + emit_null_die (); + ref_addr_for_fix fix + in + + let obj_fn_type ident tfn = + let (tsig, taux) = tfn in + let fix = new_fixup "fn type" in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_obj_subroutine_type); + (* DW_AT_name: DW_FORM_string *) + ZSTRING ident; + (* DW_AT_type: DW_FORM_ref_addr *) + (ref_slot_die tsig.Ast.sig_output_slot); + encode_effect taux.Ast.fn_effect; + (* DW_AT_rust_iterator: DW_FORM_flag *) + BYTE (if taux.Ast.fn_is_iter then 1 else 0) + |]) + in + emit_die die; + Array.iter + (fun s -> ignore (formal_type s)) + tsig.Ast.sig_input_slots; + emit_null_die (); + ref_addr_for_fix fix + in + + let obj_type (eff,ob) = + let fix = new_fixup "object type" in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_obj_type); + encode_effect eff; + |]) + in + emit_die die; + Hashtbl.iter (fun k v -> ignore (obj_fn_type k v)) ob; + emit_null_die (); + ref_addr_for_fix fix + in + + let unspecified_ptr_with_ref_ty rust_ty ty = + unspecified_ptr_with_ref rust_ty (ref_type_die ty) + in + + let unspecified_ptr_with_ref_slot rust_ty slot = + unspecified_ptr_with_ref rust_ty (ref_slot_die slot) + in + + let unspecified_ptr rust_ty = + unspecified_ptr_with_ref rust_ty (unspecified_anon_struct ()) + in + + let native_ptr_type oid = + let fix = new_fixup "native type" in + let die = + DEF (fix, SEQ [| + uleb (get_abbrev_code abbrev_native_pointer_type); + (* DW_AT_rust_type_code: DW_FORM_data1 *) + BYTE (dw_rust_type_to_int DW_RUST_native); + (* DW_AT_rust_native_type_id: DW_FORM_data4 *) + WORD (word_ty_mach, IMM (Int64.of_int (int_of_opaque oid))); + |]) + in + emit_die die; + ref_addr_for_fix fix + in + + match ty with + Ast.TY_nil -> unspecified_struct DW_RUST_nil + | Ast.TY_bool -> base ("bool", DW_ATE_boolean, 1) + | Ast.TY_mach (TY_u8) -> base ("u8", DW_ATE_unsigned, 1) + | Ast.TY_mach (TY_u16) -> base ("u16", DW_ATE_unsigned, 2) + | Ast.TY_mach (TY_u32) -> base ("u32", DW_ATE_unsigned, 4) + | Ast.TY_mach (TY_u64) -> base ("u64", DW_ATE_unsigned, 8) + | Ast.TY_mach (TY_i8) -> base ("i8", DW_ATE_signed, 1) + | Ast.TY_mach (TY_i16) -> base ("i16", DW_ATE_signed, 2) + | Ast.TY_mach (TY_i32) -> base ("i32", DW_ATE_signed, 4) + | Ast.TY_mach (TY_i64) -> base ("i64", DW_ATE_signed, 8) + | Ast.TY_int -> base ("int", DW_ATE_signed, word_sz_int) + | Ast.TY_uint -> base ("uint", DW_ATE_unsigned, word_sz_int) + | Ast.TY_char -> base ("char", DW_ATE_unsigned_char, 4) + | Ast.TY_str -> string_type () + | Ast.TY_rec trec -> record trec + | Ast.TY_tup ttup -> + record (Array.mapi (fun i s -> + ("_" ^ (string_of_int i), s)) + ttup) + + | Ast.TY_vec s -> unspecified_ptr_with_ref_slot DW_RUST_vec s + | Ast.TY_chan t -> unspecified_ptr_with_ref_ty DW_RUST_chan t + | Ast.TY_port t -> unspecified_ptr_with_ref_ty DW_RUST_port t + | Ast.TY_task -> unspecified_ptr DW_RUST_task + | Ast.TY_fn fn -> fn_type fn + | Ast.TY_tag _ -> unspecified_ptr DW_RUST_tag + | Ast.TY_iso _ -> unspecified_ptr DW_RUST_iso + | Ast.TY_type -> unspecified_ptr DW_RUST_type + | Ast.TY_native i -> native_ptr_type i + | Ast.TY_param p -> rust_type_param p + | Ast.TY_obj ob -> obj_type ob + | _ -> + bug () "unimplemented dwarf encoding for type %a" + Ast.sprintf_ty ty + in + + let finish_crate_cu_and_compose_headers _ = + + let pubnames_header_and_curr_pubnames = + SEQ [| (BYTE 0) |] + in + + let aranges_header_and_curr_aranges = + SEQ [| (BYTE 0) |] + in + + let cu_info_fixup = new_fixup "CU debug_info fixup" in + let info_header_fixup = new_fixup "CU debug_info header" in + let info_header_and_curr_infos = + SEQ + [| + WORD (TY_u32, (* unit_length: *) + (ADD + ((F_SZ cu_info_fixup), (* including this header,*) + (F_SZ info_header_fixup)))); (* excluding this word. *) + DEF (info_header_fixup, + (SEQ [| + WORD (TY_u16, IMM 2L); (* DWARF version *) + (* Since we share abbrevs across all CUs, + * offset is always 0. + *) + WORD (TY_u32, IMM 0L); (* CU-abbrev offset. *) + BYTE 4; (* Size of an address. *) + |])); + DEF (cu_info_fixup, + SEQ (Array.of_list (List.rev (!curr_cu_infos)))); + |] + in + + let cu_line_fixup = new_fixup "CU debug_line fixup" in + let cu_line_header_fixup = new_fixup "CU debug_line header" in + let line_header_fixup = new_fixup "CU debug_line header" in + let line_header_and_curr_line = + SEQ + [| + WORD + (TY_u32, (* unit_length: *) + (ADD + ((F_SZ cu_line_fixup), (* including this header,*) + (F_SZ cu_line_header_fixup)))); (* excluding this word. *) + DEF (cu_line_header_fixup, + (SEQ [| + WORD (TY_u16, IMM 2L); (* DWARF version. *) + WORD + (TY_u32, + (F_SZ line_header_fixup)); (* Another header-length.*) + DEF (line_header_fixup, + SEQ [| + BYTE 1; (* Minimum insn length. *) + BYTE 1; (* default_is_stmt *) + BYTE 0; (* line_base *) + BYTE 0; (* line_range *) + BYTE (max_dw_lns + 1); (* opcode_base *) + BYTES (* opcode arity array. *) + (Array.init max_dw_lns + (fun i -> + (dw_lns_arity + (int_to_dw_lns + (i+1))))); + (BYTE 0); (* List of include dirs. *) + (BYTE 0); (* List of file entries. *) + |])|])); + DEF (cu_line_fixup, + SEQ (Array.of_list (List.rev (!curr_cu_line)))); + |] + in + let frame_header_and_curr_frame = + SEQ [| (BYTE 0) |] + in + let prepend_and_reset (curr_ref, accum_ref, header_and_curr) = + prepend accum_ref header_and_curr; + curr_ref := [] + in + List.iter prepend_and_reset + [ (curr_cu_aranges, cu_aranges, aranges_header_and_curr_aranges); + (curr_cu_pubnames, cu_pubnames, pubnames_header_and_curr_pubnames); + (curr_cu_infos, cu_infos, info_header_and_curr_infos); + (curr_cu_line, cu_lines, line_header_and_curr_line); + (curr_cu_frame, cu_frames, frame_header_and_curr_frame) ] + in + + let image_base_rel (fix:fixup) : expr64 = + SUB (M_POS (fix), M_POS (cx.ctxt_image_base_fixup)) + in + + let addr_ranges (fix:fixup) : frag = + let image_is_relocated = + match cx.ctxt_sess.Session.sess_targ with + Win32_x86_pe -> + cx.ctxt_sess.Session.sess_library_mode + | _ -> true + in + let lo = + if image_is_relocated + then image_base_rel fix + else M_POS fix + in + SEQ [| + (* DW_AT_low_pc, DW_FORM_addr *) + WORD (word_ty_mach, lo); + (* DW_AT_high_pc, DW_FORM_addr *) + WORD (word_ty_mach, ADD ((lo), + (M_SZ fix))) + |] + in + + let emit_srcfile_cu_die + (name:string) + (cu_text_fixup:fixup) + : unit = + let abbrev_code = get_abbrev_code abbrev_srcfile_cu in + let srcfile_cu_die = + (SEQ [| + uleb abbrev_code; + (* DW_AT_name: DW_FORM_string *) + ZSTRING (Filename.basename name); + (* DW_AT_comp_dir: DW_FORM_string *) + ZSTRING (Filename.concat (Sys.getcwd()) (Filename.dirname name)); + addr_ranges cu_text_fixup; + |]) + in + emit_die srcfile_cu_die + in + + let emit_meta_die + (meta:(Ast.ident * string)) + : unit = + let abbrev_code = get_abbrev_code abbrev_meta in + let die = + SEQ [| uleb abbrev_code; + (* DW_AT_name: DW_FORM_string *) + ZSTRING (fst meta); + (* DW_AT_const_value: DW_FORM_string *) + ZSTRING (snd meta); + |] + in + emit_die die + in + + let begin_crate_cu_and_emit_cu_die + (name:string) + + (cu_text_fixup:fixup) + : unit = + let abbrev_code = get_abbrev_code abbrev_crate_cu in + let crate_cu_die = + (SEQ [| + uleb abbrev_code; + (* DW_AT_producer: DW_FORM_string *) + ZSTRING "Rustboot pre-release"; + (* DW_AT_language: DW_FORM_data4 *) + WORD (word_ty_mach, IMM 0x2L); (* DW_LANG_C *) + (* DW_AT_name: DW_FORM_string *) + ZSTRING (Filename.basename name); + (* DW_AT_comp_dir: DW_FORM_string *) + ZSTRING (Filename.concat (Sys.getcwd()) (Filename.dirname name)); + addr_ranges cu_text_fixup; + (* DW_AT_use_UTF8, DW_FORM_flag *) + BYTE 1 + |]) + in + curr_cu_infos := [crate_cu_die]; + curr_cu_line := [] + in + + let type_param_decl_die (p:(Ast.ident * (ty_param_idx * Ast.effect))) = + let (ident, (idx, eff)) = p in + SEQ [| + uleb (get_abbrev_code abbrev_rust_type_param_decl); + (* DW_AT_rust_type_code: DW_FORM_data1 *) + BYTE (dw_rust_type_to_int DW_RUST_type_param); + (* DW_AT_name: DW_FORM_string *) + ZSTRING (Filename.basename ident); + (* DW_AT_rust_type_param_index: DW_FORM_data4 *) + WORD (word_ty_mach, IMM (Int64.of_int idx)); + encode_effect eff; + |] + in + + let emit_type_param_decl_dies + (params:(Ast.ty_param identified) array) + : unit = + Array.iter + (fun p -> + emit_die (type_param_decl_die p.node)) + params; + in + + let emit_module_die + (id:Ast.ident) + : unit = + let abbrev_code = get_abbrev_code abbrev_module in + let module_die = + (SEQ [| + uleb abbrev_code; + (* DW_AT_name *) + ZSTRING id; + |]) + in + emit_die module_die; + in + + let emit_subprogram_die + (id:Ast.ident) + (ret_slot:Ast.slot) + (effect:Ast.effect) + (fix:fixup) + : unit = + (* NB: retpc = "top word of frame-base" by convention in ABI/x86. *) + let abi = cx.ctxt_abi in + let retpc = Int64.sub abi.Abi.abi_frame_base_sz abi.Abi.abi_word_sz in + let abbrev_code = get_abbrev_code abbrev_subprogram in + let subprogram_die = + (SEQ [| + uleb abbrev_code; + (* DW_AT_name *) + ZSTRING id; + (* DW_AT_type: DW_FORM_ref_addr *) + ref_slot_die ret_slot; + addr_ranges fix; + (* DW_AT_frame_base *) + dw_form_block1 [| DW_OP_reg abi.Abi.abi_dwarf_fp_reg |]; + (* DW_AT_return_addr *) + dw_form_block1 [| DW_OP_fbreg (Asm.IMM retpc); |]; + encode_effect effect; + |]) + in + emit_die subprogram_die + in + + let emit_typedef_die + (id:Ast.ident) + (ty:Ast.ty) + : unit = + let abbrev_code = get_abbrev_code abbrev_typedef in + let typedef_die = + (SEQ [| + uleb abbrev_code; + (* DW_AT_name: DW_FORM_string *) + ZSTRING id; + (* DW_AT_type: DW_FORM_ref_addr *) + (ref_type_die ty); + |]) + in + emit_die typedef_die + in + + let visit_crate_pre + (crate:Ast.crate) + : unit = + let filename = (Hashtbl.find cx.ctxt_item_files crate.id) in + log cx "walking crate CU '%s'" filename; + begin_crate_cu_and_emit_cu_die filename + (Hashtbl.find cx.ctxt_file_fixups crate.id); + Array.iter emit_meta_die crate.node.Ast.crate_meta; + inner.Walk.visit_crate_pre crate + in + + let visit_mod_item_pre + (id:Ast.ident) + (params:(Ast.ty_param identified) array) + (item:Ast.mod_item) + : unit = + if Hashtbl.mem cx.ctxt_item_files item.id + then + begin + let filename = (Hashtbl.find cx.ctxt_item_files item.id) in + log cx "walking srcfile CU '%s'" filename; + emit_srcfile_cu_die filename + (Hashtbl.find cx.ctxt_file_fixups item.id); + end + else + (); + begin + match item.node.Ast.decl_item with + Ast.MOD_ITEM_mod _ -> + begin + log cx "walking module '%s' with %d type params" + (path_name()) + (Array.length item.node.Ast.decl_params); + emit_module_die id; + emit_type_param_decl_dies item.node.Ast.decl_params; + end + | Ast.MOD_ITEM_fn _ -> + begin + let ty = Hashtbl.find cx.ctxt_all_item_types item.id in + let (tsig,taux) = + match ty with + Ast.TY_fn tfn -> tfn + | _ -> + bug () + "non-fn type when emitting dwarf for MOD_ITEM_fn" + in + log cx "walking function '%s' with %d type params" + (path_name()) + (Array.length item.node.Ast.decl_params); + emit_subprogram_die + id tsig.Ast.sig_output_slot taux.Ast.fn_effect + (Hashtbl.find cx.ctxt_fn_fixups item.id); + emit_type_param_decl_dies item.node.Ast.decl_params; + end + | Ast.MOD_ITEM_type _ -> + begin + log cx "walking typedef '%s' with %d type params" + (path_name()) + (Array.length item.node.Ast.decl_params); + emit_typedef_die + id (Hashtbl.find cx.ctxt_all_type_items item.id); + emit_type_param_decl_dies item.node.Ast.decl_params; + end + | _ -> () + end; + inner.Walk.visit_mod_item_pre id params item + in + + let visit_crate_post + (crate:Ast.crate) + : unit = + inner.Walk.visit_crate_post crate; + assert (Hashtbl.mem cx.ctxt_item_files crate.id); + emit_null_die(); + log cx + "finishing crate CU and composing headers (%d DIEs collected)" + (List.length (!curr_cu_infos)); + finish_crate_cu_and_compose_headers () + in + + let visit_mod_item_post + (id:Ast.ident) + (params:(Ast.ty_param identified) array) + (item:Ast.mod_item) + : unit = + inner.Walk.visit_mod_item_post id params item; + begin + match item.node.Ast.decl_item with + Ast.MOD_ITEM_mod _ + | Ast.MOD_ITEM_fn _ + | Ast.MOD_ITEM_type _ -> emit_null_die () + | _ -> () + end; + if Hashtbl.mem cx.ctxt_item_files item.id + then emit_null_die() + in + + let visit_block_pre (b:Ast.block) : unit = + log cx "entering lexical block"; + let fix = Hashtbl.find cx.ctxt_block_fixups b.id in + let abbrev_code = get_abbrev_code abbrev_lexical_block in + let block_die = + SEQ [| + uleb abbrev_code; + addr_ranges fix; + |] + in + emit_die block_die; + inner.Walk.visit_block_pre b + in + + let visit_block_post (b:Ast.block) : unit = + inner.Walk.visit_block_post b; + log cx "leaving lexical block, terminating with NULL DIE"; + emit_null_die () + in + + let visit_slot_identified_pre (s:Ast.slot identified) : unit = + begin + match htab_search cx.ctxt_slot_keys s.id with + None + | Some Ast.KEY_temp _ -> () + | Some Ast.KEY_ident ident -> + begin + let abbrev_code = + if Hashtbl.mem cx.ctxt_slot_is_arg s.id + then get_abbrev_code abbrev_formal + else get_abbrev_code abbrev_variable + in + let resolved_slot = referent_to_slot cx s.id in + let emit_var_die slot_loc = + let var_die = + SEQ [| + uleb abbrev_code; + (* DW_AT_name: DW_FORM_string *) + ZSTRING ident; + (* DW_AT_location: DW_FORM_block1 *) + dw_form_block1 slot_loc; + (* DW_AT_type: DW_FORM_ref_addr *) + ref_slot_die resolved_slot + |] + in + emit_die var_die; + in + match htab_search cx.ctxt_slot_offsets s.id with + Some off -> + begin + match Il.size_to_expr64 off with + (* FIXME: handle dynamic-size slots. *) + None -> () + | Some off -> + emit_var_die + [| DW_OP_fbreg off |] + end + | None -> + (* FIXME (issue #28): handle slots assigned to + * vregs. + *) + () + end + end; + inner.Walk.visit_slot_identified_pre s + in + + { inner with + Walk.visit_crate_pre = visit_crate_pre; + Walk.visit_crate_post = visit_crate_post; + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_mod_item_post = visit_mod_item_post; + Walk.visit_block_pre = visit_block_pre; + Walk.visit_block_post = visit_block_post; + Walk.visit_slot_identified_pre = visit_slot_identified_pre + } +;; + + +let process_crate + (cx:ctxt) + (crate:Ast.crate) + : debug_records = + + let cu_aranges = ref [] in + let cu_pubnames = ref [] in + let cu_infos = ref [] in + let cu_abbrevs = ref [] in + let cu_lines = ref [] in + let cu_frames = ref [] in + + let path = Stack.create () in + + let passes = + [| + dwarf_visitor cx Walk.empty_visitor path + cx.ctxt_debug_info_fixup + cu_aranges cu_pubnames + cu_infos cu_abbrevs + cu_lines cu_frames + |]; + in + + log cx "emitting DWARF records"; + run_passes cx "dwarf" path passes (log cx "%s") crate; + + (* Terminate the tables. *) + { + debug_aranges = SEQ (Array.of_list (List.rev (!cu_aranges))); + debug_pubnames = SEQ (Array.of_list (List.rev (!cu_pubnames))); + debug_info = SEQ (Array.of_list (List.rev (!cu_infos))); + debug_abbrev = SEQ (Array.of_list (List.rev (!cu_abbrevs))); + debug_line = SEQ (Array.of_list (List.rev (!cu_lines))); + debug_frame = SEQ (Array.of_list (List.rev (!cu_frames))); + } +;; + +(* + * Support for reconstituting a DWARF tree from a file, and various + * artifacts we can distill back from said DWARF. + *) + +let log sess = Session.log "dwarf" + sess.Session.sess_log_dwarf + sess.Session.sess_log_out +;; + + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_dwarf + then thunk () + else () +;; + +let read_abbrevs + (sess:Session.sess) + (ar:asm_reader) + ((off:int),(sz:int)) + : (int,abbrev) Hashtbl.t = + ar.asm_seek off; + let abs = Hashtbl.create 0 in + let rec read_abbrevs _ = + if ar.asm_get_off() >= (off + sz) + then abs + else + begin + let n = ar.asm_get_uleb() in + let tag = ar.asm_get_uleb() in + let has_children = ar.asm_get_u8() in + let pairs = ref [] in + let _ = + log sess "abbrev: %d, tag: %d, has_children: %d" + n tag has_children + in + let rec read_pairs _ = + let attr = ar.asm_get_uleb() in + let form = ar.asm_get_uleb() in + let _ = log sess "attr: %d, form: %d" attr form in + match (attr,form) with + (0,0) -> Array.of_list (List.rev (!pairs)) + | _ -> + begin + pairs := (dw_at_of_int attr, + dw_form_of_int form) :: (!pairs); + read_pairs() + end + in + let pairs = read_pairs() in + Hashtbl.add abs n (dw_tag_of_int tag, + dw_children_of_int has_children, + pairs); + read_abbrevs() + end; + in + read_abbrevs() +;; + +type data = + DATA_str of string + | DATA_num of int + | DATA_other +;; + +type die = + { die_off: int; + die_tag: dw_tag; + die_attrs: (dw_at * (dw_form * data)) array; + die_children: die array; } +;; + +type rooted_dies = (int * (int,die) Hashtbl.t) +;; + +let fmt_dies + (ff:Format.formatter) + (dies:rooted_dies) + : unit = + let ((root:int),(dies:(int,die) Hashtbl.t)) = dies in + let rec fmt_die die = + Ast.fmt ff "@\nDIE <0x%x> %s" die.die_off (dw_tag_to_string die.die_tag); + Array.iter + begin + fun (at,(form,data)) -> + Ast.fmt ff "@\n %s = " (dw_at_to_string at); + begin + match data with + DATA_num n -> Ast.fmt ff "0x%x" n + | DATA_str s -> Ast.fmt ff "\"%s\"" s + | DATA_other -> Ast.fmt ff "<other>" + end; + Ast.fmt ff " (%s)" (dw_form_to_string form) + end + die.die_attrs; + if (Array.length die.die_children) != 0 + then + begin + Ast.fmt ff "@\n"; + Ast.fmt_obox ff; + Ast.fmt ff " children: "; + Ast.fmt_obr ff; + Array.iter fmt_die die.die_children; + Ast.fmt_cbb ff + end; + in + fmt_die (Hashtbl.find dies root) +;; + +let read_dies + (sess:Session.sess) + (ar:asm_reader) + ((off:int),(sz:int)) + (abbrevs:(int,abbrev) Hashtbl.t) + : (int * ((int,die) Hashtbl.t)) = + ar.asm_seek off; + let cu_len = ar.asm_get_u32() in + let _ = log sess "debug_info cu_len: %d, section size %d" cu_len sz in + let _ = assert ((cu_len + 4) = sz) in + let dwarf_vers = ar.asm_get_u16() in + let _ = assert (dwarf_vers >= 2) in + let cu_abbrev_off = ar.asm_get_u32() in + let _ = assert (cu_abbrev_off = 0) in + let sizeof_addr = ar.asm_get_u8() in + let _ = assert (sizeof_addr = 4) in + + let adv_block1 _ = + let len = ar.asm_get_u8() in + ar.asm_adv len + in + + let adv_block4 _ = + let len = ar.asm_get_u32() in + ar.asm_adv len + in + + let all_dies = Hashtbl.create 0 in + let root = (ar.asm_get_off()) - off in + + let rec read_dies (dies:(die list) ref) = + let die_arr _ = Array.of_list (List.rev (!dies)) in + if ar.asm_get_off() >= (off + sz) + then die_arr() + else + begin + let die_off = (ar.asm_get_off()) - off in + let abbrev_num = ar.asm_get_uleb() in + if abbrev_num = 0 + then die_arr() + else + let _ = + log sess "DIE at off <%d> with abbrev %d" + die_off abbrev_num + in + let abbrev = Hashtbl.find abbrevs abbrev_num in + let (tag, children, attrs) = abbrev in + let attrs = + Array.map + begin + fun (attr,form) -> + let data = + match form with + DW_FORM_string -> DATA_str (ar.asm_get_zstr()) + | DW_FORM_addr -> DATA_num (ar.asm_get_u32()) + | DW_FORM_ref_addr -> DATA_num (ar.asm_get_u32()) + | DW_FORM_data1 -> DATA_num (ar.asm_get_u8()) + | DW_FORM_data4 -> DATA_num (ar.asm_get_u32()) + | DW_FORM_flag -> DATA_num (ar.asm_get_u8()) + | DW_FORM_block1 -> (adv_block1(); DATA_other) + | DW_FORM_block4 -> (adv_block4(); DATA_other) + | _ -> + bug () "unknown DWARF form %d" + (dw_form_to_int form) + in + (attr, (form, data)) + end + attrs; + in + let children = + match children with + DW_CHILDREN_yes -> read_dies (ref []) + | DW_CHILDREN_no -> [| |] + in + let die = { die_off = die_off; + die_tag = tag; + die_attrs = attrs; + die_children = children } + in + prepend dies die; + htab_put all_dies die_off die; + read_dies dies + end + in + ignore (read_dies (ref [])); + iflog sess + begin + fun _ -> + log sess "read DIEs:"; + log sess "%s" (Ast.fmt_to_str fmt_dies (root, all_dies)); + end; + (root, all_dies) +;; + +let rec extract_meta + ((i:int),(dies:(int,die) Hashtbl.t)) + : (Ast.ident * string) array = + let meta = Queue.create () in + + let get_attr die attr = + atab_find die.die_attrs attr + in + + let get_str die attr = + match get_attr die attr with + (_, DATA_str s) -> s + | _ -> bug () "unexpected num form for %s" (dw_at_to_string attr) + in + + let die = Hashtbl.find dies i in + begin + match die.die_tag with + DW_TAG_rust_meta -> + let n = get_str die DW_AT_name in + let v = get_str die DW_AT_const_value in + Queue.add (n,v) meta + + | DW_TAG_compile_unit -> + Array.iter + (fun child -> + Array.iter (fun m -> Queue.add m meta) + (extract_meta (child.die_off,dies))) + die.die_children + + | _ -> () + end; + queue_to_arr meta +;; + + +let rec extract_mod_items + (nref:node_id ref) + (oref:opaque_id ref) + (abi:Abi.abi) + (mis:Ast.mod_items) + ((i:int),(dies:(int,die) Hashtbl.t)) + : unit = + + let next_node_id _ : node_id = + let id = !nref in + nref:= Node ((int_of_node id)+1); + id + in + + let next_opaque_id _ : opaque_id = + let id = !oref in + oref:= Opaque ((int_of_opaque id)+1); + id + in + + let external_opaques = Hashtbl.create 0 in + let get_opaque_of o = + htab_search_or_add external_opaques o + (fun _ -> next_opaque_id()) + in + + + let (word_sz:int64) = abi.Abi.abi_word_sz in + let (word_sz_int:int) = Int64.to_int word_sz in + + let get_die i = + Hashtbl.find dies i + in + + let get_attr die attr = + atab_find die.die_attrs attr + in + + let get_str die attr = + match get_attr die attr with + (_, DATA_str s) -> s + | _ -> bug () "unexpected num form for %s" (dw_at_to_string attr) + in + + let get_num die attr = + match get_attr die attr with + (_, DATA_num n) -> n + | _ -> bug () "unexpected str form for %s" (dw_at_to_string attr) + in + + let get_flag die attr = + match get_attr die attr with + (_, DATA_num 0) -> false + | (_, DATA_num 1) -> true + | _ -> bug () "unexpected non-flag form for %s" (dw_at_to_string attr) + in + + let get_effect die = + match (get_flag die DW_AT_mutable, get_flag die DW_AT_pure) with + (* Note: weird encoding: mutable+pure = unsafe. *) + (true, true) -> Ast.UNSAFE + | (true, false) -> Ast.STATE + | (false, false) -> Ast.IO + | (false, true) -> Ast.PURE + in + + let get_name die = get_str die DW_AT_name in + + let get_type_param die = + let idx = get_num die DW_AT_rust_type_param_index in + let e = get_effect die in + (idx, e) + in + + let get_native_id die = + get_num die DW_AT_rust_native_type_id + in + + let get_type_param_decl die = + ((get_str die DW_AT_name), (get_type_param die)) + in + + let is_rust_type die t = + match atab_search die.die_attrs DW_AT_rust_type_code with + Some (_, DATA_num n) -> (dw_rust_type_of_int n) = t + | _ -> false + in + + let rec get_ty die : Ast.ty = + match die.die_tag with + + DW_TAG_structure_type + when is_rust_type die DW_RUST_nil -> + Ast.TY_nil + + | DW_TAG_pointer_type + when is_rust_type die DW_RUST_task -> + Ast.TY_task + + | DW_TAG_pointer_type + when is_rust_type die DW_RUST_type -> + Ast.TY_type + + | DW_TAG_pointer_type + when is_rust_type die DW_RUST_port -> + Ast.TY_port (get_referenced_ty die) + + | DW_TAG_pointer_type + when is_rust_type die DW_RUST_chan -> + Ast.TY_chan (get_referenced_ty die) + + | DW_TAG_pointer_type + when is_rust_type die DW_RUST_vec -> + Ast.TY_vec (get_referenced_slot die) + + | DW_TAG_pointer_type + when is_rust_type die DW_RUST_type_param -> + Ast.TY_param (get_type_param die) + + | DW_TAG_pointer_type + when is_rust_type die DW_RUST_native -> + Ast.TY_native (get_opaque_of (get_native_id die)) + + | DW_TAG_string_type -> Ast.TY_str + + | DW_TAG_base_type -> + begin + match ((get_name die), + (dw_ate_of_int (get_num die DW_AT_encoding)), + (get_num die DW_AT_byte_size)) with + ("bool", DW_ATE_boolean, 1) -> Ast.TY_bool + | ("u8", DW_ATE_unsigned, 1) -> Ast.TY_mach TY_u8 + | ("u16", DW_ATE_unsigned, 2) -> Ast.TY_mach TY_u16 + | ("u32", DW_ATE_unsigned, 4) -> Ast.TY_mach TY_u32 + | ("u64", DW_ATE_unsigned, 8) -> Ast.TY_mach TY_u64 + | ("i8", DW_ATE_signed, 1) -> Ast.TY_mach TY_i8 + | ("i16", DW_ATE_signed, 2) -> Ast.TY_mach TY_i16 + | ("i32", DW_ATE_signed, 4) -> Ast.TY_mach TY_i32 + | ("i64", DW_ATE_signed, 8) -> Ast.TY_mach TY_i64 + | ("char", DW_ATE_unsigned_char, 4) -> Ast.TY_char + | ("int", DW_ATE_signed, sz) + when sz = word_sz_int -> Ast.TY_int + | ("uint", DW_ATE_unsigned, sz) + when sz = word_sz_int -> Ast.TY_uint + | _ -> bug () "unexpected type of DW_TAG_base_type" + end + + | DW_TAG_structure_type -> + begin + let is_num_idx s = + let len = String.length s in + if len >= 2 && s.[0] = '_' + then + let ok = ref true in + String.iter + (fun c -> ok := (!ok) && '0' <= c && c <= '9') + (String.sub s 1 (len-1)); + !ok + else + false + in + let members = arr_map_partial + die.die_children + begin + fun child -> + if child.die_tag = DW_TAG_member + then Some child + else None + end + in + assert ((Array.length members) > 0); + if is_num_idx (get_name members.(0)) + then + let slots = Array.map get_referenced_slot members in + Ast.TY_tup slots + else + let entries = + Array.map + (fun member_die -> ((get_name member_die), + (get_referenced_slot member_die))) + members + in + Ast.TY_rec entries + end + + | DW_TAG_interface_type -> + let eff = get_effect die in + let fns = Hashtbl.create 0 in + Array.iter + begin + fun child -> + if child.die_tag = DW_TAG_subroutine_type + then + Hashtbl.add fns (get_name child) (get_ty_fn child) + end + die.die_children; + Ast.TY_obj (eff,fns) + + | DW_TAG_subroutine_type -> + Ast.TY_fn (get_ty_fn die) + + | _ -> + bug () "unexpected tag in get_ty: %s" + (dw_tag_to_string die.die_tag) + + and get_slot die : Ast.slot = + match die.die_tag with + DW_TAG_reference_type -> + let ty = get_referenced_ty die in + let mut = get_flag die DW_AT_mutable in + let mode = + (* Exterior slots have a 'data_location' attr. *) + match atab_search die.die_attrs DW_AT_data_location with + Some _ -> Ast.MODE_exterior + | None -> Ast.MODE_alias + in + { Ast.slot_mode = mode; + Ast.slot_mutable = mut; + Ast.slot_ty = Some ty } + | _ -> + let ty = get_ty die in + (* FIXME: encode mutability of interior slots properly. *) + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = false; + Ast.slot_ty = Some ty } + + and get_referenced_ty die = + match get_attr die DW_AT_type with + (DW_FORM_ref_addr, DATA_num n) -> get_ty (get_die n) + | _ -> bug () "unexpected form of DW_AT_type in get_referenced_ty" + + and get_referenced_slot die = + match get_attr die DW_AT_type with + (DW_FORM_ref_addr, DATA_num n) -> get_slot (get_die n) + | _ -> bug () "unexpected form of DW_AT_type in get_referenced_slot" + + and get_ty_fn die = + let out = get_referenced_slot die in + let ins = + arr_map_partial + die.die_children + begin + fun child -> + if child.die_tag = DW_TAG_formal_parameter + then Some (get_referenced_slot child) + else None + end + in + let effect = get_effect die in + let iter = get_flag die DW_AT_rust_iterator in + let tsig = + { Ast.sig_input_slots = ins; + Ast.sig_input_constrs = [| |]; + Ast.sig_output_slot = out; } + in + let taux = + { Ast.fn_is_iter = iter; + Ast.fn_effect = effect } + in + (tsig, taux) + in + + let wrap n = + { id = next_node_id (); + node = n } + in + + let decl p i = + wrap { Ast.decl_params = p; + Ast.decl_item = i; } + in + + let get_formals die = + let islots = Queue.create () in + let params = Queue.create () in + Array.iter + begin + fun child -> + match child.die_tag with + DW_TAG_formal_parameter -> + if (is_rust_type child DW_RUST_type_param) + then Queue.push (wrap (get_type_param_decl child)) params + else Queue.push (get_referenced_slot child) islots + | _ -> () + end + die.die_children; + (queue_to_arr params, queue_to_arr islots) + in + + let extract_children mis die = + Array.iter + (fun child -> + extract_mod_items nref oref abi mis (child.die_off,dies)) + die.die_children + in + + let get_mod_items die = + let len = Array.length die.die_children in + let mis = Hashtbl.create len in + extract_children mis die; + mis + in + + let form_header_slots slots = + Array.mapi + (fun i slot -> (wrap slot, "_" ^ (string_of_int i))) + slots + in + + let die = Hashtbl.find dies i in + match die.die_tag with + DW_TAG_typedef -> + let ident = get_name die in + let ty = get_referenced_ty die in + let tyi = Ast.MOD_ITEM_type ty in + let (params, islots) = get_formals die in + assert ((Array.length islots) = 0); + htab_put mis ident (decl params tyi) + + | DW_TAG_compile_unit -> + extract_children mis die + + | DW_TAG_module -> + let ident = get_name die in + let sub_mis = get_mod_items die in + let exports = Hashtbl.create 0 in + let _ = Hashtbl.add exports Ast.EXPORT_all_decls () in + let view = { Ast.view_imports = Hashtbl.create 0; + Ast.view_exports = exports } + in + let mi = Ast.MOD_ITEM_mod (view, sub_mis) in + htab_put mis ident (decl [||] mi) + + | DW_TAG_subprogram -> + (* FIXME: finish this. *) + let ident = get_name die in + let oslot = get_referenced_slot die in + let effect = get_effect die in + let (params, islots) = get_formals die in + let taux = { Ast.fn_effect = effect; + Ast.fn_is_iter = false } + in + let tfn = { Ast.fn_input_slots = form_header_slots islots; + Ast.fn_input_constrs = [| |]; + Ast.fn_output_slot = wrap oslot; + Ast.fn_aux = taux; + Ast.fn_body = (wrap [||]); } + in + let fn = Ast.MOD_ITEM_fn tfn in + htab_put mis ident (decl params fn) + + | _ -> () +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) + diff --git a/src/boot/me/effect.ml b/src/boot/me/effect.ml new file mode 100644 index 00000000..515cfa21 --- /dev/null +++ b/src/boot/me/effect.ml @@ -0,0 +1,313 @@ +open Semant;; +open Common;; + +let log cx = Session.log "effect" + cx.ctxt_sess.Session.sess_log_effect + cx.ctxt_sess.Session.sess_log_out +;; + +let iflog cx thunk = + if cx.ctxt_sess.Session.sess_log_effect + then thunk () + else () +;; + +let mutability_checking_visitor + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + (* + * This visitor enforces the following rules: + * + * - A channel type carrying a mutable type is illegal. + * + * - Writing to an immutable slot is illegal. + * + * - Forming a mutable alias to an immutable slot is illegal. + * + *) + let visit_ty_pre t = + match t with + Ast.TY_chan t' when type_has_state t' -> + err None "channel of mutable type: %a " Ast.sprintf_ty t' + | _ -> () + in + + let check_write id dst = + let dst_slot = lval_slot cx dst in + if (dst_slot.Ast.slot_mutable or + (Hashtbl.mem cx.ctxt_copy_stmt_is_init id)) + then () + else err (Some id) "writing to non-mutable slot" + in + (* FIXME: enforce the no-write-alias-to-immutable-slot rule. *) + let visit_stmt_pre s = + begin + match s.node with + Ast.STMT_copy (dst, _) -> check_write s.id dst + | Ast.STMT_copy_binop (dst, _, _) -> check_write s.id dst + | Ast.STMT_call (dst, _, _) -> check_write s.id dst + | Ast.STMT_recv (dst, _) -> check_write s.id dst + | _ -> () + end; + inner.Walk.visit_stmt_pre s + in + + { inner with + Walk.visit_ty_pre = visit_ty_pre; + Walk.visit_stmt_pre = visit_stmt_pre } +;; + +let function_effect_propagation_visitor + (item_effect:(node_id, Ast.effect) Hashtbl.t) + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + (* + * This visitor calculates the effect of each function according to + * its statements: + * + * - Communication lowers to 'io' + * - Native calls lower to 'unsafe' + * - Calling a function with effect e lowers to e. + *) + let curr_fn = Stack.create () in + let visit_mod_item_pre n p i = + begin + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn _ -> Stack.push i.id curr_fn + | _ -> () + end; + inner.Walk.visit_mod_item_pre n p i + in + let visit_mod_item_post n p i = + inner.Walk.visit_mod_item_post n p i; + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn _ -> ignore (Stack.pop curr_fn) + | _ -> () + in + let visit_obj_drop_pre o b = + Stack.push b.id curr_fn; + inner.Walk.visit_obj_drop_pre o b + in + let visit_obj_drop_post o b = + inner.Walk.visit_obj_drop_post o b; + ignore (Stack.pop curr_fn); + in + + let lower_to s ne = + let fn_id = Stack.top curr_fn in + let e = + match htab_search item_effect fn_id with + None -> Ast.PURE + | Some e -> e + in + let ne = lower_effect_of ne e in + if ne <> e + then + begin + iflog cx + begin + fun _ -> + let name = Hashtbl.find cx.ctxt_all_item_names fn_id in + log cx "lowering calculated effect on '%a': '%a' -> '%a'" + Ast.sprintf_name name + Ast.sprintf_effect e + Ast.sprintf_effect ne; + log cx "at stmt %a" Ast.sprintf_stmt s + end; + Hashtbl.replace item_effect fn_id ne + end; + in + + let visit_stmt_pre s = + begin + match s.node with + Ast.STMT_send _ + | Ast.STMT_recv _ -> lower_to s Ast.IO + + | Ast.STMT_call (_, fn, _) -> + let lower_to_callee_ty t = + match t with + Ast.TY_fn (_, taux) -> + lower_to s taux.Ast.fn_effect; + | _ -> bug () "non-fn callee" + in + if lval_is_slot cx fn + then + let t = lval_slot cx fn in + lower_to_callee_ty (slot_ty t) + else + begin + let item = lval_item cx fn in + let t = Hashtbl.find cx.ctxt_all_item_types item.id in + lower_to_callee_ty t; + match htab_search cx.ctxt_required_items item.id with + None -> () + | Some (REQUIRED_LIB_rust _, _) -> () + | Some _ -> lower_to s Ast.UNSAFE + end + | _ -> () + end; + inner.Walk.visit_stmt_pre s + in + + { inner with + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_mod_item_post = visit_mod_item_post; + Walk.visit_obj_drop_pre = visit_obj_drop_pre; + Walk.visit_obj_drop_post = visit_obj_drop_post; + Walk.visit_stmt_pre = visit_stmt_pre } +;; + +let binding_effect_propagation_visitor + ((*cx*)_:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + (* This visitor lowers the effect of an object or binding according + * to its slots: holding a 'state' slot lowers any obj item, or + * bind-stmt LHS, to 'state'. + * + * Binding (or implicitly just making a native 1st-class) makes the LHS + * unsafe. + *) + inner +;; + +let effect_checking_visitor + (item_auth:(node_id, Ast.effect) Hashtbl.t) + (item_effect:(node_id, Ast.effect) Hashtbl.t) + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + (* + * This visitor checks that each type, item and obj declares + * effects consistent with what we calculated. + *) + let auth_stack = Stack.create () in + let visit_mod_item_pre n p i = + begin + match htab_search item_auth i.id with + None -> () + | Some e -> + let curr = + if Stack.is_empty auth_stack + then Ast.PURE + else Stack.top auth_stack + in + let next = lower_effect_of e curr in + Stack.push next auth_stack; + iflog cx + begin + fun _ -> + let name = Hashtbl.find cx.ctxt_all_item_names i.id in + log cx + "entering '%a', adjusting auth effect: '%a' -> '%a'" + Ast.sprintf_name name + Ast.sprintf_effect curr + Ast.sprintf_effect next + end + end; + begin + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn f -> + let e = + match htab_search item_effect i.id with + None -> Ast.PURE + | Some e -> e + in + let fe = f.Ast.fn_aux.Ast.fn_effect in + let ae = + if Stack.is_empty auth_stack + then None + else Some (Stack.top auth_stack) + in + if e <> fe && (ae <> (Some e)) + then + begin + let name = Hashtbl.find cx.ctxt_all_item_names i.id in + err (Some i.id) + "%a claims effect '%a' but calculated effect is '%a'%s" + Ast.sprintf_name name + Ast.sprintf_effect fe + Ast.sprintf_effect e + begin + match ae with + Some ae when ae <> fe -> + Printf.sprintf " (auth effect is '%a')" + Ast.sprintf_effect ae + | _ -> "" + end + end + | _ -> () + end; + inner.Walk.visit_mod_item_pre n p i + in + let visit_mod_item_post n p i = + inner.Walk.visit_mod_item_post n p i; + match htab_search item_auth i.id with + None -> () + | Some _ -> + let curr = Stack.pop auth_stack in + let next = + if Stack.is_empty auth_stack + then Ast.PURE + else Stack.top auth_stack + in + iflog cx + begin + fun _ -> + let name = Hashtbl.find cx.ctxt_all_item_names i.id in + log cx + "leaving '%a', restoring auth effect: '%a' -> '%a'" + Ast.sprintf_name name + Ast.sprintf_effect curr + Ast.sprintf_effect next + end + in + { inner with + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_mod_item_post = visit_mod_item_post; } +;; + + +let process_crate + (cx:ctxt) + (crate:Ast.crate) + : unit = + let path = Stack.create () in + let item_auth = Hashtbl.create 0 in + let item_effect = Hashtbl.create 0 in + let passes = + [| + (mutability_checking_visitor cx + Walk.empty_visitor); + (function_effect_propagation_visitor item_effect cx + Walk.empty_visitor); + (binding_effect_propagation_visitor cx + Walk.empty_visitor); + (effect_checking_visitor item_auth item_effect cx + Walk.empty_visitor); + |] + in + let root_scope = [ SCOPE_crate crate ] in + let auth_effect name eff = + match lookup_by_name cx root_scope name with + None -> () + | Some (_, id) -> + if referent_is_item cx id + then htab_put item_auth id eff + else err (Some id) "auth clause in crate refers to non-item" + in + Hashtbl.iter auth_effect crate.node.Ast.crate_auth; + run_passes cx "effect" path passes (log cx "%s") crate +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/me/layout.ml b/src/boot/me/layout.ml new file mode 100644 index 00000000..6c4567fd --- /dev/null +++ b/src/boot/me/layout.ml @@ -0,0 +1,470 @@ +open Semant;; +open Common;; + +let log cx = Session.log "layout" + cx.ctxt_sess.Session.sess_log_layout + cx.ctxt_sess.Session.sess_log_out +;; + +type slot_stack = Il.referent_ty Stack.t;; +type frame_blocks = slot_stack Stack.t;; + +let layout_visitor + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + (* + * - Frames look, broadly, like this (growing downward): + * + * +----------------------------+ <-- Rewind tail calls to here. + * |caller args | + * |... | + * |... | + * +----------------------------+ <-- fp + abi_frame_base_sz + * |task ptr (implicit arg) | + abi_implicit_args_sz + * |output ptr (implicit arg) | + * +----------------------------+ <-- fp + abi_frame_base_sz + * |return pc | + * |callee-save registers | + * |... | + * +----------------------------+ <-- fp + * |crate ptr | + * |crate-rel frame info disp | + * +----------------------------+ <-- fp - abi_frame_info_sz + * |spills determined in ra | + * |... | + * |... | + * +----------------------------+ <-- fp - (abi_frame_info_sz + * |... | + spillsz) + * |frame-allocated stuff | + * |determined in resolve | + * |laid out in layout | + * |... | + * |... | + * +----------------------------+ <-- fp - framesz + * |call space | == sp + callsz + * |... | + * |... | + * +----------------------------+ <-- fp - (framesz + callsz) == sp + * + * - Slot offsets fall into three classes: + * + * #1 frame-locals are negative offsets from fp + * (beneath the frame-info and spills) + * + * #2 incoming arg slots are positive offsets from fp + * (above the frame-base) + * + * #3 outgoing arg slots are positive offsets from sp + * + * - Slots are split into two classes: + * + * #1 those that are never aliased and fit in a word, so are + * vreg-allocated + * + * #2 all others + * + * - Non-aliased, word-fitting slots consume no frame space + * *yet*; they are given a generic value that indicates "try a + * vreg". The register allocator may spill them later, if it + * needs to, but that's not our concern. + * + * - Aliased / too-big slots are frame-allocated, need to be + * laid out in the frame at fixed offsets. + * + * - The frame size is the maximum of all the block sizes contained + * within it. Though at the moment it's the sum of them, due to + * the blood-curdling hack we use to ensure proper unwind/drop + * behavior in absence of CFI or similar precise frame-evolution + * tracking. See visit_block_post below (issue #27). + * + * - Each call is examined and the size of the call tuple required + * for that call is calculated. The call size is the maximum of all + * such call tuples. + * + * - In frames that have a tail call (in fact, currently, all frames + * because we're lazy) we double the call size in order to handle + * the possible need to *execute* a call (to drop glue) while + * destroying the frame, after we've built the outgoing args. This is + * done in the backend though; the logic in this file is ignorant of the + * doubling (some platforms may not require it? Hard to guess) + * + *) + + let force_slot_to_mem (slot:Ast.slot) : bool = + (* FIXME (issue #26): For the time being we force any slot that + * points into memory or is of opaque/code type to be stored in the + * frame rather than in a vreg. This can probably be relaxed in the + * future. + *) + let rec st_in_mem st = + match st with + Il.ValTy _ -> false + | Il.AddrTy _ -> true + + and rt_in_mem rt = + match rt with + Il.ScalarTy st -> st_in_mem st + | Il.StructTy rts + | Il.UnionTy rts -> List.exists rt_in_mem (Array.to_list rts) + | Il.OpaqueTy + | Il.ParamTy _ + | Il.CodeTy -> true + | Il.NilTy -> false + in + rt_in_mem (slot_referent_type cx.ctxt_abi slot) + in + + let rty_sz rty = Il.referent_ty_size cx.ctxt_abi.Abi.abi_word_bits rty in + let rty_layout rty = + Il.referent_ty_layout cx.ctxt_abi.Abi.abi_word_bits rty + in + + let is_subword_size sz = + match sz with + SIZE_fixed i -> i64_le i cx.ctxt_abi.Abi.abi_word_sz + | _ -> false + in + + let iflog thunk = + if cx.ctxt_sess.Session.sess_log_layout + then thunk () + else () + in + + let layout_slot_ids + (slot_accum:slot_stack) + (upwards:bool) + (vregs_ok:bool) + (offset:size) + (slots:node_id array) + : unit = + let accum (off,align) id : (size * size) = + let slot = referent_to_slot cx id in + let rt = slot_referent_type cx.ctxt_abi slot in + let (elt_size, elt_align) = rty_layout rt in + if vregs_ok + && (is_subword_size elt_size) + && (not (type_is_structured (slot_ty slot))) + && (not (force_slot_to_mem slot)) + && (not (Hashtbl.mem cx.ctxt_slot_aliased id)) + then + begin + iflog + begin + fun _ -> + let k = Hashtbl.find cx.ctxt_slot_keys id in + log cx "assigning slot #%d = %a to vreg" + (int_of_node id) + Ast.sprintf_slot_key k; + end; + htab_put cx.ctxt_slot_vregs id (ref None); + (off,align) + end + else + begin + let elt_off = align_sz elt_align off in + let frame_off = + if upwards + then elt_off + else neg_sz (add_sz elt_off elt_size) + in + Stack.push (slot_referent_type cx.ctxt_abi slot) slot_accum; + iflog + begin + fun _ -> + let k = Hashtbl.find cx.ctxt_slot_keys id in + log cx "assigning slot #%d = %a frame-offset %s" + (int_of_node id) + Ast.sprintf_slot_key k + (string_of_size frame_off); + end; + if (not (Hashtbl.mem cx.ctxt_slot_offsets id)) + then htab_put cx.ctxt_slot_offsets id frame_off; + (add_sz elt_off elt_size, max_sz elt_align align) + end + in + ignore (Array.fold_left accum (offset, SIZE_fixed 0L) slots) + in + + let layout_block + (slot_accum:slot_stack) + (offset:size) + (block:Ast.block) + : unit = + log cx "laying out block #%d at fp offset %s" + (int_of_node block.id) (string_of_size offset); + let block_slot_ids = + Array.of_list (htab_vals (Hashtbl.find cx.ctxt_block_slots block.id)) + in + layout_slot_ids slot_accum false true offset block_slot_ids + in + + let layout_header (id:node_id) (input_slot_ids:node_id array) : unit = + let rty = direct_call_args_referent_type cx id in + let offset = + match rty with + Il.StructTy elts -> + (add_sz + (SIZE_fixed cx.ctxt_abi.Abi.abi_frame_base_sz) + (Il.get_element_offset + cx.ctxt_abi.Abi.abi_word_bits + elts Abi.calltup_elt_args)) + | _ -> bug () "call tuple has non-StructTy" + in + log cx "laying out header for node #%d at fp offset %s" + (int_of_node id) (string_of_size offset); + layout_slot_ids (Stack.create()) true false offset input_slot_ids + in + + let layout_obj_state (id:node_id) (state_slot_ids:node_id array) : unit = + let offset = + let word_sz = cx.ctxt_abi.Abi.abi_word_sz in + let word_n (n:int) = Int64.mul word_sz (Int64.of_int n) in + SIZE_fixed (word_n (Abi.exterior_rc_slot_field_body + + 1 (* the state tydesc. *))) + in + log cx "laying out object-state for node #%d at offset %s" + (int_of_node id) (string_of_size offset); + layout_slot_ids (Stack.create()) true false offset state_slot_ids + in + + let (frame_stack:(node_id * frame_blocks) Stack.t) = Stack.create() in + + let block_rty (block:slot_stack) : Il.referent_ty = + Il.StructTy (Array.of_list (stk_elts_from_bot block)) + in + + let frame_rty (frame:frame_blocks) : Il.referent_ty = + Il.StructTy (Array.of_list (List.map block_rty (stk_elts_from_bot frame))) + in + + let update_frame_size _ = + let (frame_id, frame_blocks) = Stack.top frame_stack in + let frame_spill = Hashtbl.find cx.ctxt_spill_fixups frame_id in + let sz = + add_sz + (add_sz + (rty_sz (frame_rty frame_blocks)) + (SIZE_fixup_mem_sz frame_spill)) + (SIZE_fixed cx.ctxt_abi.Abi.abi_frame_info_sz) + in + let curr = Hashtbl.find cx.ctxt_frame_sizes frame_id in + let sz = max_sz curr sz in + log cx "extending frame #%d frame to size %s" + (int_of_node frame_id) (string_of_size sz); + Hashtbl.replace cx.ctxt_frame_sizes frame_id sz + in + + (* + * FIXME: this is a little aggressive for default callsz; it can be + * narrowed in frames with no drop glue and/or no indirect drop glue. + *) + + let glue_callsz = + let word = interior_slot Ast.TY_int in + let glue_fn = + mk_simple_ty_fn + (Array.init Abi.worst_case_glue_call_args (fun _ -> word)) + in + rty_sz (indirect_call_args_referent_type cx 0 glue_fn Il.OpaqueTy) + in + + let enter_frame id = + Stack.push (id, (Stack.create())) frame_stack; + htab_put cx.ctxt_frame_sizes id (SIZE_fixed 0L); + htab_put cx.ctxt_call_sizes id glue_callsz; + htab_put cx.ctxt_spill_fixups id (new_fixup "frame spill fixup"); + htab_put cx.ctxt_frame_blocks id []; + update_frame_size (); + in + + let leave_frame _ = + ignore (Stack.pop frame_stack); + in + + let header_slot_ids hdr = Array.map (fun (sid,_) -> sid.id) hdr in + + let visit_mod_item_pre n p i = + begin + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn f -> + enter_frame i.id; + layout_header i.id + (header_slot_ids f.Ast.fn_input_slots) + + | Ast.MOD_ITEM_tag (header_slots, _, _) -> + enter_frame i.id; + layout_header i.id + (Array.map (fun sid -> sid.id) header_slots) + + | Ast.MOD_ITEM_obj obj -> + enter_frame i.id; + let ids = header_slot_ids obj.Ast.obj_state in + layout_obj_state i.id ids; + Array.iter + (fun id -> htab_put cx.ctxt_slot_is_obj_state id ()) + ids + + | _ -> () + end; + inner.Walk.visit_mod_item_pre n p i + in + + let visit_mod_item_post n p i = + inner.Walk.visit_mod_item_post n p i; + begin + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn _ + | Ast.MOD_ITEM_tag _ + | Ast.MOD_ITEM_obj _ -> leave_frame () + | _ -> () + end + in + + let visit_obj_fn_pre obj ident fn = + enter_frame fn.id; + layout_header fn.id + (header_slot_ids fn.node.Ast.fn_input_slots); + inner.Walk.visit_obj_fn_pre obj ident fn + in + + let visit_obj_fn_post obj ident fn = + inner.Walk.visit_obj_fn_post obj ident fn; + leave_frame () + in + + let visit_obj_drop_pre obj b = + enter_frame b.id; + inner.Walk.visit_obj_drop_pre obj b + in + + let visit_obj_drop_post obj b = + inner.Walk.visit_obj_drop_post obj b; + leave_frame () + in + + let visit_block_pre b = + if Hashtbl.mem cx.ctxt_block_is_loop_body b.id + then enter_frame b.id; + let (frame_id, frame_blocks) = Stack.top frame_stack in + let frame_spill = Hashtbl.find cx.ctxt_spill_fixups frame_id in + let spill_sz = SIZE_fixup_mem_sz frame_spill in + let info_sz = SIZE_fixed cx.ctxt_abi.Abi.abi_frame_info_sz in + let locals_off = add_sz spill_sz info_sz in + let off = + if Stack.is_empty frame_blocks + then locals_off + else + add_sz locals_off (rty_sz (frame_rty frame_blocks)) + in + let block_slots = Stack.create() in + let frame_block_ids = Hashtbl.find cx.ctxt_frame_blocks frame_id in + Hashtbl.replace cx.ctxt_frame_blocks frame_id (b.id :: frame_block_ids); + layout_block block_slots off b; + Stack.push block_slots frame_blocks; + update_frame_size (); + inner.Walk.visit_block_pre b + in + + let visit_block_post b = + inner.Walk.visit_block_post b; + if Hashtbl.mem cx.ctxt_block_is_loop_body b.id + then leave_frame(); + (* FIXME (issue #27): In earlier versions of this file, multiple + * lexical blocks in the same frame would reuse space from one to + * the next so long as they were not nested; The (commented-out) + * code here supports that logic. Unfortunately since our marking + * and unwinding strategy is very simplistic for now (analogous to + * shadow stacks) we're going to give each lexical block in a frame + * its own space in the frame, even if they seem like they *should* + * be able to reuse space. This makes it possible to arrive at the + * frame and work out which variables are live (and which frame + * memory corresponds to them) w/o paying attention to the current + * pc in the function; a greatly-simplifying assumption. + * + * This is of course not optimal for the long term, but in the + * longer term we'll have time to form proper DWARF CFI + * records. We're in a hurry at the moment. *) + (* + let stk = Stack.top block_stacks in + ignore (Stack.pop stk) + *) + in + + let visit_stmt_pre (s:Ast.stmt) : unit = + + (* Call-size calculation. *) + begin + let callees = + match s.node with + Ast.STMT_call (_, lv, _) + | Ast.STMT_spawn (_, _, lv, _) -> [| lv |] + | Ast.STMT_check (_, calls) -> Array.map (fun (lv, _) -> lv) calls + | _ -> [| |] + in + Array.iter + begin + fun (callee:Ast.lval) -> + let lv_ty = lval_ty cx callee in + let abi = cx.ctxt_abi in + let static = lval_is_static cx callee in + let closure = if static then None else Some Il.OpaqueTy in + let n_ty_params = + match resolve_lval cx callee with + DEFN_item i -> Array.length i.Ast.decl_params + | _ -> 0 + in + let rty = + call_args_referent_type cx n_ty_params lv_ty closure + in + let sz = Il.referent_ty_size abi.Abi.abi_word_bits rty in + let frame_id = fst (Stack.top frame_stack) in + let curr = Hashtbl.find cx.ctxt_call_sizes frame_id in + log cx "extending frame #%d call size to %s" + (int_of_node frame_id) (string_of_size (max_sz curr sz)); + Hashtbl.replace cx.ctxt_call_sizes frame_id (max_sz curr sz) + end + callees + end; + inner.Walk.visit_stmt_pre s + in + + + { inner with + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_mod_item_post = visit_mod_item_post; + + Walk.visit_obj_fn_pre = visit_obj_fn_pre; + Walk.visit_obj_fn_post = visit_obj_fn_post; + Walk.visit_obj_drop_pre = visit_obj_drop_pre; + Walk.visit_obj_drop_post = visit_obj_drop_post; + + Walk.visit_stmt_pre = visit_stmt_pre; + Walk.visit_block_pre = visit_block_pre; + Walk.visit_block_post = visit_block_post } +;; + +let process_crate + (cx:ctxt) + (crate:Ast.crate) + : unit = + let path = Stack.create () in + let passes = + [| + (layout_visitor cx + Walk.empty_visitor) + |]; + in + run_passes cx "layout" path passes (log cx "%s") crate +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/me/loop.ml b/src/boot/me/loop.ml new file mode 100644 index 00000000..c23c4afd --- /dev/null +++ b/src/boot/me/loop.ml @@ -0,0 +1,163 @@ +(* + * Computes iterator-loop nesting depths and max depth of each function. + *) + +open Semant;; +open Common;; + +let log cx = Session.log "loop" + cx.ctxt_sess.Session.sess_log_loop + cx.ctxt_sess.Session.sess_log_out +;; + +type fn_ctxt = { current_depth: int; } +;; + +let incr_depth (fcx:fn_ctxt) = + { current_depth = fcx.current_depth + 1; } +;; + +let decr_depth (fcx:fn_ctxt) = + { current_depth = fcx.current_depth - 1; } +;; + +let top_fcx = { current_depth = 0; } +;; + +let loop_depth_visitor + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + + let (fcxs : fn_ctxt Stack.t) = Stack.create () in + + let push_loop () = + let fcx = Stack.pop fcxs in + Stack.push (incr_depth fcx) fcxs + in + + let pop_loop () = + let fcx = Stack.pop fcxs in + Stack.push (decr_depth fcx) fcxs + in + + let visit_mod_item_pre + (ident:Ast.ident) + (ty_params:(Ast.ty_param identified) array) + (item:Ast.mod_item) + : unit = + Stack.push top_fcx fcxs; + inner.Walk.visit_mod_item_pre ident ty_params item + in + + let visit_mod_item_post + (ident:Ast.ident) + (ty_params:(Ast.ty_param identified) array) + (item:Ast.mod_item) + : unit = + inner.Walk.visit_mod_item_post ident ty_params item; + ignore (Stack.pop fcxs); + in + + let visit_obj_fn_pre + (obj:Ast.obj identified) + (ident:Ast.ident) + (fn:Ast.fn identified) + : unit = + Stack.push top_fcx fcxs; + inner.Walk.visit_obj_fn_pre obj ident fn + in + + let visit_obj_fn_post + (obj:Ast.obj identified) + (ident:Ast.ident) + (fn:Ast.fn identified) + : unit = + inner.Walk.visit_obj_fn_pre obj ident fn; + ignore (Stack.pop fcxs) + in + + let visit_obj_drop_pre + (obj:Ast.obj identified) + (b:Ast.block) + : unit = + Stack.push top_fcx fcxs; + inner.Walk.visit_obj_drop_pre obj b + in + + let visit_obj_drop_post + (obj:Ast.obj identified) + (b:Ast.block) + : unit = + inner.Walk.visit_obj_drop_post obj b; + ignore (Stack.pop fcxs) + in + + let visit_slot_identified_pre sloti = + let fcx = Stack.top fcxs in + htab_put cx.ctxt_slot_loop_depths sloti.id fcx.current_depth; + inner.Walk.visit_slot_identified_pre sloti + in + + let visit_stmt_pre s = + let fcx = Stack.top fcxs in + htab_put cx.ctxt_stmt_loop_depths s.id fcx.current_depth; + begin + match s.node with + | Ast.STMT_for_each fe -> + htab_put cx.ctxt_block_is_loop_body fe.Ast.for_each_body.id (); + | _ -> () + end; + inner.Walk.visit_stmt_pre s + in + + let visit_block_pre b = + if Hashtbl.mem cx.ctxt_block_is_loop_body b.id + then push_loop (); + inner.Walk.visit_block_pre b + in + + let visit_block_post b = + inner.Walk.visit_block_post b; + if Hashtbl.mem cx.ctxt_block_is_loop_body b.id + then pop_loop () + in + + { inner with + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_mod_item_post = visit_mod_item_post; + Walk.visit_obj_fn_pre = visit_obj_fn_pre; + Walk.visit_obj_fn_post = visit_obj_fn_post; + Walk.visit_obj_drop_pre = visit_obj_drop_pre; + Walk.visit_obj_drop_post = visit_obj_drop_post; + Walk.visit_slot_identified_pre = visit_slot_identified_pre; + Walk.visit_stmt_pre = visit_stmt_pre; + Walk.visit_block_pre = visit_block_pre; + Walk.visit_block_post = visit_block_post } +;; + +let process_crate + (cx:ctxt) + (crate:Ast.crate) + : unit = + let path = Stack.create () in + let passes = + [| + (loop_depth_visitor cx + Walk.empty_visitor) + |] + in + + run_passes cx "loop" path passes (log cx "%s") crate; + () +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/me/resolve.ml b/src/boot/me/resolve.ml new file mode 100644 index 00000000..8f034aee --- /dev/null +++ b/src/boot/me/resolve.ml @@ -0,0 +1,959 @@ +open Semant;; +open Common;; + +(* + * Resolution passes: + * + * - build multiple 'scope' hashtables mapping slot_key -> node_id + * - build single 'type inference' hashtable mapping node_id -> slot + * + * (note: not every slot is identified; only those that are declared + * in statements and/or can participate in local type inference. + * Those in function signatures are not, f.e. Also no type values + * are identified, though module items are. ) + * + *) + + +let log cx = Session.log "resolve" + cx.ctxt_sess.Session.sess_log_resolve + cx.ctxt_sess.Session.sess_log_out +;; + +let iflog cx thunk = + if cx.ctxt_sess.Session.sess_log_resolve + then thunk () + else () +;; + + +let block_scope_forming_visitor + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + let visit_block_pre b = + if not (Hashtbl.mem cx.ctxt_block_items b.id) + then htab_put cx.ctxt_block_items b.id (Hashtbl.create 0); + if not (Hashtbl.mem cx.ctxt_block_slots b.id) + then htab_put cx.ctxt_block_slots b.id (Hashtbl.create 0); + inner.Walk.visit_block_pre b + in + { inner with Walk.visit_block_pre = visit_block_pre } +;; + + +let stmt_collecting_visitor + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + let block_ids = Stack.create () in + let visit_block_pre (b:Ast.block) = + Stack.push b.id block_ids; + inner.Walk.visit_block_pre b + in + let visit_block_post (b:Ast.block) = + inner.Walk.visit_block_post b; + ignore (Stack.pop block_ids) + in + + let visit_for_block + ((si:Ast.slot identified),(ident:Ast.ident)) + (block_id:node_id) + : unit = + let slots = Hashtbl.find cx.ctxt_block_slots block_id in + let key = Ast.KEY_ident ident in + log cx "found decl of '%s' in for-loop block header" ident; + htab_put slots key si.id; + htab_put cx.ctxt_slot_keys si.id key + in + + let visit_stmt_pre stmt = + begin + htab_put cx.ctxt_all_stmts stmt.id stmt; + match stmt.node with + Ast.STMT_decl d -> + begin + let bid = Stack.top block_ids in + let items = Hashtbl.find cx.ctxt_block_items bid in + let slots = Hashtbl.find cx.ctxt_block_slots bid in + let check_and_log_ident id ident = + if Hashtbl.mem items ident || + Hashtbl.mem slots (Ast.KEY_ident ident) + then + err (Some id) + "duplicate declaration '%s' in block" ident + else + log cx "found decl of '%s' in block" ident + in + let check_and_log_tmp id tmp = + if Hashtbl.mem slots (Ast.KEY_temp tmp) + then + err (Some id) + "duplicate declaration of temp #%d in block" + (int_of_temp tmp) + else + log cx "found decl of temp #%d in block" (int_of_temp tmp) + in + let check_and_log_key id key = + match key with + Ast.KEY_ident i -> check_and_log_ident id i + | Ast.KEY_temp t -> check_and_log_tmp id t + in + match d with + Ast.DECL_mod_item (ident, item) -> + check_and_log_ident item.id ident; + htab_put items ident item.id + | Ast.DECL_slot (key, sid) -> + check_and_log_key sid.id key; + htab_put slots key sid.id; + htab_put cx.ctxt_slot_keys sid.id key + end + | Ast.STMT_for f -> + visit_for_block f.Ast.for_slot f.Ast.for_body.id + | Ast.STMT_for_each f -> + visit_for_block f.Ast.for_each_slot f.Ast.for_each_head.id + | Ast.STMT_alt_tag { Ast.alt_tag_arms = arms } -> + let rec resolve_pat block pat = + match pat with + Ast.PAT_slot ({ id = slot_id }, ident) -> + let slots = Hashtbl.find cx.ctxt_block_slots block.id in + let key = Ast.KEY_ident ident in + htab_put slots key slot_id; + htab_put cx.ctxt_slot_keys slot_id key + | Ast.PAT_tag (_, pats) -> Array.iter (resolve_pat block) pats + | Ast.PAT_lit _ | Ast.PAT_wild -> () + in + Array.iter (fun { node = (p, b) } -> resolve_pat b p) arms + | _ -> () + end; + inner.Walk.visit_stmt_pre stmt + in + { inner with + Walk.visit_block_pre = visit_block_pre; + Walk.visit_block_post = visit_block_post; + Walk.visit_stmt_pre = visit_stmt_pre } +;; + + +let all_item_collecting_visitor + (cx:ctxt) + (path:Ast.name_component Stack.t) + (inner:Walk.visitor) + : Walk.visitor = + + let items = Stack.create () in + + let push_on_item_arg_list item_id arg_id = + let existing = + match htab_search cx.ctxt_frame_args item_id with + None -> [] + | Some x -> x + in + htab_put cx.ctxt_slot_is_arg arg_id (); + Hashtbl.replace cx.ctxt_frame_args item_id (arg_id :: existing) + in + + let note_header item_id header = + Array.iter + (fun (sloti,ident) -> + let key = Ast.KEY_ident ident in + htab_put cx.ctxt_slot_keys sloti.id key; + push_on_item_arg_list item_id sloti.id) + header; + in + + let visit_mod_item_pre n p i = + Stack.push i.id items; + Array.iter (fun p -> htab_put cx.ctxt_all_defns p.id + (DEFN_ty_param p.node)) p; + htab_put cx.ctxt_all_defns i.id (DEFN_item i.node); + htab_put cx.ctxt_all_item_names i.id (Walk.path_to_name path); + log cx "collected item #%d: %s" (int_of_node i.id) n; + begin + (* FIXME: this is incomplete. *) + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn f -> + note_header i.id f.Ast.fn_input_slots; + | Ast.MOD_ITEM_obj ob -> + note_header i.id ob.Ast.obj_state; + | Ast.MOD_ITEM_tag (header_slots, _, _) -> + let skey i = Printf.sprintf "_%d" i in + note_header i.id + (Array.mapi (fun i s -> (s, skey i)) header_slots) + | _ -> () + end; + inner.Walk.visit_mod_item_pre n p i + in + + let visit_mod_item_post n p i = + inner.Walk.visit_mod_item_post n p i; + ignore (Stack.pop items) + in + + let visit_obj_fn_pre obj ident fn = + htab_put cx.ctxt_all_defns fn.id (DEFN_obj_fn (obj.id, fn.node)); + htab_put cx.ctxt_all_item_names fn.id (Walk.path_to_name path); + note_header fn.id fn.node.Ast.fn_input_slots; + inner.Walk.visit_obj_fn_pre obj ident fn + in + + let visit_obj_drop_pre obj b = + htab_put cx.ctxt_all_defns b.id (DEFN_obj_drop obj.id); + htab_put cx.ctxt_all_item_names b.id (Walk.path_to_name path); + inner.Walk.visit_obj_drop_pre obj b + in + + let visit_stmt_pre s = + begin + match s.node with + Ast.STMT_for_each fe -> + let id = fe.Ast.for_each_body.id in + htab_put cx.ctxt_all_defns id + (DEFN_loop_body (Stack.top items)); + htab_put cx.ctxt_all_item_names id + (Walk.path_to_name path); + | _ -> () + end; + inner.Walk.visit_stmt_pre s; + in + + { inner with + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_mod_item_post = visit_mod_item_post; + Walk.visit_obj_fn_pre = visit_obj_fn_pre; + Walk.visit_obj_drop_pre = visit_obj_drop_pre; + Walk.visit_stmt_pre = visit_stmt_pre; } +;; + + +let lookup_type_node_by_name + (cx:ctxt) + (scopes:scope list) + (name:Ast.name) + : node_id = + iflog cx (fun _ -> + log cx "lookup_simple_type_by_name %a" + Ast.sprintf_name name); + match lookup_by_name cx scopes name with + None -> err None "unknown name: %a" Ast.sprintf_name name + | Some (_, id) -> + match htab_search cx.ctxt_all_defns id with + Some (DEFN_item { Ast.decl_item = Ast.MOD_ITEM_type _ }) + | Some (DEFN_item { Ast.decl_item = Ast.MOD_ITEM_obj _ }) + | Some (DEFN_ty_param _) -> id + | _ -> + err None "Found non-type binding for %a" + Ast.sprintf_name name +;; + + +let get_ty_references + (t:Ast.ty) + (cx:ctxt) + (scopes:scope list) + : node_id list = + let base = ty_fold_list_concat () in + let ty_fold_named n = + [ lookup_type_node_by_name cx scopes n ] + in + let fold = { base with ty_fold_named = ty_fold_named } in + fold_ty fold t +;; + + +let type_reference_and_tag_extracting_visitor + (cx:ctxt) + (scopes:(scope list) ref) + (node_to_references:(node_id,node_id list) Hashtbl.t) + (all_tags:(node_id,(Ast.ty_tag * (scope list))) Hashtbl.t) + (inner:Walk.visitor) + : Walk.visitor = + let visit_mod_item_pre id params item = + begin + match item.node.Ast.decl_item with + Ast.MOD_ITEM_type ty -> + begin + log cx "extracting references for type node %d" + (int_of_node item.id); + let referenced = get_ty_references ty cx (!scopes) in + List.iter + (fun i -> log cx "type %d references type %d" + (int_of_node item.id) (int_of_node i)) referenced; + htab_put node_to_references item.id referenced; + match ty with + Ast.TY_tag ttag -> + htab_put all_tags item.id (ttag, (!scopes)) + | _ -> () + end + | _ -> () + end; + inner.Walk.visit_mod_item_pre id params item + in + { inner with + Walk.visit_mod_item_pre = visit_mod_item_pre } +;; + + +type recur_info = + { recur_all_nodes: node_id list; + recur_curr_iso: (node_id array) option; } +;; + +let empty_recur_info = + { recur_all_nodes = []; + recur_curr_iso = None } +;; + +let push_node r n = + { r with recur_all_nodes = n :: r.recur_all_nodes } +;; + +let set_iso r i = + { r with recur_curr_iso = Some i } +;; + + +let index_in_curr_iso (recur:recur_info) (node:node_id) : int option = + match recur.recur_curr_iso with + None -> None + | Some iso -> + let rec search i = + if i >= (Array.length iso) + then None + else + if iso.(i) = node + then Some i + else search (i+1) + in + search 0 +;; + +let need_ty_tag t = + match t with + Ast.TY_tag ttag -> ttag + | _ -> err None "needed ty_tag" +;; + + +let rec ty_iso_of + (cx:ctxt) + (recursive_tag_groups:(node_id,(node_id,unit) Hashtbl.t) Hashtbl.t) + (all_tags:(node_id,(Ast.ty_tag * (scope list))) Hashtbl.t) + (n:node_id) + : Ast.ty = + let _ = iflog cx (fun _ -> log cx "+++ ty_iso_of #%d" (int_of_node n)) in + let group_table = Hashtbl.find recursive_tag_groups n in + let group_array = Array.of_list (htab_keys group_table) in + let compare_nodes a_id b_id = + (* FIXME: this should sort by the sorted name-lists of the + *constructors* of the tag, not the tag type name. *) + let a_name = Hashtbl.find cx.ctxt_all_item_names a_id in + let b_name = Hashtbl.find cx.ctxt_all_item_names b_id in + compare a_name b_name + in + let recur = set_iso (push_node empty_recur_info n) group_array in + let resolve_member member = + let (tag, scopes) = Hashtbl.find all_tags member in + let ty = Ast.TY_tag tag in + let ty = resolve_type cx scopes recursive_tag_groups all_tags recur ty in + need_ty_tag ty + in + Array.sort compare_nodes group_array; + log cx "resolving node %d, %d-member iso group" + (int_of_node n) (Array.length group_array); + Array.iteri (fun i n -> log cx "member %d: %d" i + (int_of_node n)) group_array; + let group = Array.map resolve_member group_array in + let rec search i = + if i >= (Array.length group_array) + then err None "node is not a member of its own iso group" + else + if group_array.(i) = n + then i + else search (i+1) + in + let iso = + Ast.TY_iso { Ast.iso_index = (search 0); + Ast.iso_group = group } + in + iflog cx (fun _ -> + log cx "--- ty_iso_of #%d ==> %a" + (int_of_node n) Ast.sprintf_ty iso); + iso + + +and lookup_type_by_name + (cx:ctxt) + (scopes:scope list) + (recursive_tag_groups:(node_id,(node_id,unit) Hashtbl.t) Hashtbl.t) + (all_tags:(node_id,(Ast.ty_tag * (scope list))) Hashtbl.t) + (recur:recur_info) + (name:Ast.name) + : ((scope list) * node_id * Ast.ty) = + iflog cx (fun _ -> + log cx "+++ lookup_type_by_name %a" + Ast.sprintf_name name); + match lookup_by_name cx scopes name with + None -> err None "unknown name: %a" Ast.sprintf_name name + | Some (scopes', id) -> + let ty, params = + match htab_search cx.ctxt_all_defns id with + Some (DEFN_item { Ast.decl_item = Ast.MOD_ITEM_type t; + Ast.decl_params = params }) -> + (t, Array.map (fun p -> p.node) params) + | Some (DEFN_item { Ast.decl_item = Ast.MOD_ITEM_obj ob; + Ast.decl_params = params }) -> + (Ast.TY_obj (ty_obj_of_obj ob), + Array.map (fun p -> p.node) params) + | Some (DEFN_ty_param (_, x)) -> + (Ast.TY_param x, [||]) + | _ -> + err None "Found non-type binding for %a" + Ast.sprintf_name name + in + let args = + match name with + Ast.NAME_ext (_, Ast.COMP_app (_, args)) -> args + | Ast.NAME_base (Ast.BASE_app (_, args)) -> args + | _ -> [| |] + in + let args = + iflog cx (fun _ -> log cx + "lookup_type_by_name %a resolving %d type args" + Ast.sprintf_name name + (Array.length args)); + Array.mapi + begin + fun i t -> + let t = + resolve_type cx scopes recursive_tag_groups + all_tags recur t + in + iflog cx (fun _ -> log cx + "lookup_type_by_name resolved arg %d to %a" i + Ast.sprintf_ty t); + t + end + args + in + iflog cx + begin + fun _ -> + log cx + "lookup_type_by_name %a found ty %a" + Ast.sprintf_name name Ast.sprintf_ty ty; + log cx "applying %d type args to %d params" + (Array.length args) (Array.length params); + log cx "params: %s" + (Ast.fmt_to_str Ast.fmt_decl_params params); + log cx "args: %s" + (Ast.fmt_to_str Ast.fmt_app_args args); + end; + let ty = rebuild_ty_under_params ty params args true in + iflog cx (fun _ -> log cx "--- lookup_type_by_name %a ==> %a" + Ast.sprintf_name name + Ast.sprintf_ty ty); + (scopes', id, ty) + +and resolve_type + (cx:ctxt) + (scopes:(scope list)) + (recursive_tag_groups:(node_id,(node_id,unit) Hashtbl.t) Hashtbl.t) + (all_tags:(node_id,(Ast.ty_tag * (scope list))) Hashtbl.t) + (recur:recur_info) + (t:Ast.ty) + : Ast.ty = + let _ = iflog cx (fun _ -> log cx "+++ resolve_type %a" Ast.sprintf_ty t) in + let base = ty_fold_rebuild (fun t -> t) in + let ty_fold_named name = + let (scopes, node, t) = + lookup_type_by_name cx scopes recursive_tag_groups all_tags recur name + in + iflog cx (fun _ -> + log cx "resolved type name '%a' to item %d with ty %a" + Ast.sprintf_name name (int_of_node node) Ast.sprintf_ty t); + match index_in_curr_iso recur node with + Some i -> Ast.TY_idx i + | None -> + if Hashtbl.mem recursive_tag_groups node + then + begin + let ttag = need_ty_tag t in + Hashtbl.replace all_tags node (ttag, scopes); + ty_iso_of cx recursive_tag_groups all_tags node + end + else + if List.mem node recur.recur_all_nodes + then (err (Some node) "infinite recursive type definition: '%a'" + Ast.sprintf_name name) + else + let recur = push_node recur node in + iflog cx (fun _ -> log cx "recursively resolving type %a" + Ast.sprintf_ty t); + resolve_type cx scopes recursive_tag_groups all_tags recur t + in + let fold = + { base with + ty_fold_named = ty_fold_named; } + in + let t' = fold_ty fold t in + iflog cx (fun _ -> + log cx "--- resolve_type %a ==> %a" + Ast.sprintf_ty t Ast.sprintf_ty t'); + t' +;; + + +let type_resolving_visitor + (cx:ctxt) + (scopes:(scope list) ref) + (recursive_tag_groups:(node_id,(node_id,unit) Hashtbl.t) Hashtbl.t) + (all_tags:(node_id,(Ast.ty_tag * (scope list))) Hashtbl.t) + (inner:Walk.visitor) + : Walk.visitor = + + let resolve_ty (t:Ast.ty) : Ast.ty = + resolve_type cx (!scopes) recursive_tag_groups all_tags empty_recur_info t + in + + let resolve_slot (s:Ast.slot) : Ast.slot = + match s.Ast.slot_ty with + None -> s + | Some ty -> { s with Ast.slot_ty = Some (resolve_ty ty) } + in + + let resolve_slot_identified + (s:Ast.slot identified) + : (Ast.slot identified) = + try + let slot = resolve_slot s.node in + { s with node = slot } + with + Semant_err (None, e) -> raise (Semant_err ((Some s.id), e)) + in + + let visit_slot_identified_pre slot = + let slot = resolve_slot_identified slot in + htab_put cx.ctxt_all_defns slot.id (DEFN_slot slot.node); + log cx "collected resolved slot #%d with type %s" (int_of_node slot.id) + (match slot.node.Ast.slot_ty with + None -> "??" + | Some t -> (Ast.fmt_to_str Ast.fmt_ty t)); + inner.Walk.visit_slot_identified_pre slot + in + + let visit_mod_item_pre id params item = + begin + try + match item.node.Ast.decl_item with + Ast.MOD_ITEM_type ty -> + let ty = + resolve_type cx (!scopes) recursive_tag_groups + all_tags empty_recur_info ty + in + log cx "resolved item %s, defining type %a" + id Ast.sprintf_ty ty; + htab_put cx.ctxt_all_type_items item.id ty; + htab_put cx.ctxt_all_item_types item.id Ast.TY_type + + (* + * Don't resolve the "type" of a mod item; just resolve its + * members. + *) + | Ast.MOD_ITEM_mod _ -> () + + | Ast.MOD_ITEM_tag (header_slots, _, nid) + when Hashtbl.mem recursive_tag_groups nid -> + begin + match ty_of_mod_item true item with + Ast.TY_fn (tsig, taux) -> + let input_slots = + Array.map + (fun sloti -> resolve_slot sloti.node) + header_slots + in + let output_slot = + interior_slot (ty_iso_of cx recursive_tag_groups + all_tags nid) + in + let ty = + Ast.TY_fn + ({tsig with + Ast.sig_input_slots = input_slots; + Ast.sig_output_slot = output_slot }, taux) + in + log cx "resolved recursive tag %s, type as %a" + id Ast.sprintf_ty ty; + htab_put cx.ctxt_all_item_types item.id ty + | _ -> bug () "recursive tag with non-function type" + end + + | _ -> + let t = ty_of_mod_item true item in + let ty = + resolve_type cx (!scopes) recursive_tag_groups + all_tags empty_recur_info t + in + log cx "resolved item %s, type as %a" id Ast.sprintf_ty ty; + htab_put cx.ctxt_all_item_types item.id ty; + with + Semant_err (None, e) -> raise (Semant_err ((Some item.id), e)) + end; + inner.Walk.visit_mod_item_pre id params item + in + + let visit_obj_fn_pre obj ident fn = + let fty = + resolve_type cx (!scopes) recursive_tag_groups all_tags + empty_recur_info (Ast.TY_fn (ty_fn_of_fn fn.node)) + in + log cx "resolved obj fn %s as %a" ident Ast.sprintf_ty fty; + htab_put cx.ctxt_all_item_types fn.id fty; + inner.Walk.visit_obj_fn_pre obj ident fn + in + + let visit_obj_drop_pre obj b = + let fty = mk_simple_ty_fn [| |] in + htab_put cx.ctxt_all_item_types b.id fty; + inner.Walk.visit_obj_drop_pre obj b + in + + let visit_stmt_pre stmt = + begin + match stmt.node with + Ast.STMT_for_each fe -> + let id = fe.Ast.for_each_body.id in + let fty = mk_simple_ty_iter [| |] in + htab_put cx.ctxt_all_item_types id fty; + | Ast.STMT_copy (_, Ast.EXPR_unary (Ast.UNOP_cast t, _)) -> + let ty = resolve_ty t.node in + htab_put cx.ctxt_all_cast_types t.id ty + | _ -> () + end; + inner.Walk.visit_stmt_pre stmt + in + + let visit_lval_pre lv = + let rec rebuild_lval' lv = + match lv with + Ast.LVAL_ext (base, ext) -> + let ext = + match ext with + Ast.COMP_named (Ast.COMP_ident _) + | Ast.COMP_named (Ast.COMP_idx _) + | Ast.COMP_atom (Ast.ATOM_literal _) -> ext + | Ast.COMP_atom (Ast.ATOM_lval lv) -> + Ast.COMP_atom (Ast.ATOM_lval (rebuild_lval lv)) + | Ast.COMP_named (Ast.COMP_app (ident, params)) -> + Ast.COMP_named + (Ast.COMP_app (ident, Array.map resolve_ty params)) + in + Ast.LVAL_ext (rebuild_lval' base, ext) + + | Ast.LVAL_base nb -> + let node = + match nb.node with + Ast.BASE_ident _ + | Ast.BASE_temp _ -> nb.node + | Ast.BASE_app (ident, params) -> + Ast.BASE_app (ident, Array.map resolve_ty params) + in + Ast.LVAL_base {nb with node = node} + + and rebuild_lval lv = + let id = lval_base_id lv in + let lv' = rebuild_lval' lv in + iflog cx (fun _ -> log cx "rebuilt lval %a as %a (#%d)" + Ast.sprintf_lval lv Ast.sprintf_lval lv' + (int_of_node id)); + htab_put cx.ctxt_all_lvals id lv'; + lv' + in + ignore (rebuild_lval lv); + inner.Walk.visit_lval_pre lv + in + + { inner with + Walk.visit_slot_identified_pre = visit_slot_identified_pre; + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_obj_fn_pre = visit_obj_fn_pre; + Walk.visit_obj_drop_pre = visit_obj_drop_pre; + Walk.visit_stmt_pre = visit_stmt_pre; + Walk.visit_lval_pre = visit_lval_pre; } +;; + + +let lval_base_resolving_visitor + (cx:ctxt) + (scopes:(scope list) ref) + (inner:Walk.visitor) + : Walk.visitor = + let lookup_referent_by_ident id ident = + log cx "looking up slot or item with ident '%s'" ident; + match lookup cx (!scopes) (Ast.KEY_ident ident) with + None -> err (Some id) "unresolved identifier '%s'" ident + | Some (_, id) -> (log cx "resolved to node id #%d" + (int_of_node id); id) + in + let lookup_slot_by_temp id temp = + log cx "looking up temp slot #%d" (int_of_temp temp); + let res = lookup cx (!scopes) (Ast.KEY_temp temp) in + match res with + None -> err + (Some id) "unresolved temp node #%d" (int_of_temp temp) + | Some (_, id) -> + (log cx "resolved to node id #%d" (int_of_node id); id) + in + let lookup_referent_by_name_base id nb = + match nb with + Ast.BASE_ident ident + | Ast.BASE_app (ident, _) -> lookup_referent_by_ident id ident + | Ast.BASE_temp temp -> lookup_slot_by_temp id temp + in + + let visit_lval_pre lv = + let rec lookup_lval lv = + iflog cx (fun _ -> + log cx "looking up lval #%d" + (int_of_node (lval_base_id lv))); + match lv with + Ast.LVAL_ext (base, ext) -> + begin + lookup_lval base; + match ext with + Ast.COMP_atom (Ast.ATOM_lval lv') -> lookup_lval lv' + | _ -> () + end + | Ast.LVAL_base nb -> + let referent_id = lookup_referent_by_name_base nb.id nb.node in + iflog cx (fun _ -> log cx "resolved lval #%d to referent #%d" + (int_of_node nb.id) (int_of_node referent_id)); + htab_put cx.ctxt_lval_to_referent nb.id referent_id + in + lookup_lval lv; + inner.Walk.visit_lval_pre lv + in + { inner with + Walk.visit_lval_pre = visit_lval_pre } +;; + + + +(* + * iso-recursion groups are very complicated. + * + * - iso groups are always rooted at *named* ty_tag nodes + * + * - consider: + * + * type colour = tag(red, green, blue); + * type list = tag(cons(colour, @list), nil()) + * + * this should include list as an iso but not colour, + * should result in: + * + * type list = iso[<0>:tag(cons(tag(red,green,blue),@#1))] + * + * - consider: + * + * type colour = tag(red, green, blue); + * type tree = tag(children(@list), leaf(colour)) + * type list = tag(cons(@tree, @list), nil()) + * + * this should result in: + * + * type list = iso[<0>:tag(cons(@#2, @#1),nil()); + * 1: tag(children(@#1),leaf(tag(red,green,blue)))] + * + * - how can you calculate these? + * + * - start by making a map from named-tag-node-id -> referenced-other-nodes + * - for each member in the set, if you can get from itself to itself, keep + * it, otherwise it's non-recursive => non-interesting, delete it. + * - group the members (now all recursive) by dependency + * - assign index-number to each elt of group + * - fully resolve each elt of group, turning names into numbers or chasing + * through to fully-resolving targets as necessary + * - place group in iso, store differently-indexed value in table for each + * + * + * - what are the illegal forms? + * - recursion that takes indefinite storage to form a tag, eg. + * + * type t = tag(foo(t)); + * + * - recursion that makes a tag unconstructable, eg: + * + * type t = tag(foo(@t)); + *) + +let resolve_recursion + (cx:ctxt) + (node_to_references:(node_id,node_id list) Hashtbl.t) + (recursive_tag_groups:(node_id,(node_id,unit) Hashtbl.t) Hashtbl.t) + : unit = + + let recursive_tag_types = Hashtbl.create 0 in + + let rec can_reach + (target:node_id) + (visited:node_id list) + (curr:node_id) + : bool = + if List.mem curr visited + then false + else + match htab_search node_to_references curr with + None -> false + | Some referenced -> + if List.mem target referenced + then true + else List.exists (can_reach target (curr :: visited)) referenced + in + + let extract_recursive_tags _ = + Hashtbl.iter + begin fun id _ -> + if can_reach id [] id + then begin + match Hashtbl.find cx.ctxt_all_defns id with + DEFN_item + { Ast.decl_item = Ast.MOD_ITEM_type (Ast.TY_tag _) } -> + log cx "type %d is a recursive tag" (int_of_node id); + Hashtbl.replace recursive_tag_types id () + | _ -> + log cx "type %d is recursive, but not a tag" (int_of_node id); + end + else log cx "type %d is non-recursive" (int_of_node id); + end + node_to_references + in + + let group_recursive_tags _ = + while (Hashtbl.length recursive_tag_types) != 0 do + let keys = htab_keys recursive_tag_types in + let root = List.hd keys in + let group = Hashtbl.create 0 in + let rec walk visited node = + if List.mem node visited + then () + else + begin + if Hashtbl.mem recursive_tag_types node + then + begin + Hashtbl.remove recursive_tag_types node; + htab_put recursive_tag_groups node group; + htab_put group node (); + log cx "recursion group rooted at tag %d contains tag %d" + (int_of_node root) (int_of_node node); + end; + match htab_search node_to_references node with + None -> () + | Some referenced -> + List.iter (walk (node :: visited)) referenced + end + in + walk [] root; + done + in + + begin + extract_recursive_tags (); + group_recursive_tags (); + log cx "found %d independent type-recursion groups" + (Hashtbl.length recursive_tag_groups); + end +;; + +let pattern_resolving_visitor + (cx:ctxt) + (scopes:scope list ref) + (inner:Walk.visitor) : Walk.visitor = + let visit_stmt_pre stmt = + begin + match stmt.node with + Ast.STMT_alt_tag { Ast.alt_tag_lval = _; Ast.alt_tag_arms = arms } -> + let resolve_arm { node = arm } = + match fst arm with + Ast.PAT_tag (ident, _) -> + begin + match lookup_by_ident cx !scopes ident with + None -> + err None "unresolved tag constructor '%s'" ident + | Some (_, tag_id) -> + match Hashtbl.find cx.ctxt_all_defns tag_id with + DEFN_item { + Ast.decl_item = Ast.MOD_ITEM_tag _ + } -> () + | _ -> + err None "'%s' is not a tag constructor" ident + end + | _ -> () + + in + Array.iter resolve_arm arms + | _ -> () + end; + inner.Walk.visit_stmt_pre stmt + in + { inner with Walk.visit_stmt_pre = visit_stmt_pre } +;; + +let process_crate + (cx:ctxt) + (crate:Ast.crate) + : unit = + let (scopes:(scope list) ref) = ref [] in + let path = Stack.create () in + + let node_to_references = Hashtbl.create 0 in + let all_tags = Hashtbl.create 0 in + let recursive_tag_groups = Hashtbl.create 0 in + + let passes_0 = + [| + (block_scope_forming_visitor cx Walk.empty_visitor); + (stmt_collecting_visitor cx + (all_item_collecting_visitor cx path + Walk.empty_visitor)); + (scope_stack_managing_visitor scopes + (type_reference_and_tag_extracting_visitor + cx scopes node_to_references all_tags + Walk.empty_visitor)) + |] + in + let passes_1 = + [| + (scope_stack_managing_visitor scopes + (type_resolving_visitor cx scopes + recursive_tag_groups all_tags + (lval_base_resolving_visitor cx scopes + Walk.empty_visitor))); + |] + in + let passes_2 = + [| + (scope_stack_managing_visitor scopes + (pattern_resolving_visitor cx scopes + Walk.empty_visitor)) + |] + in + log cx "running primary resolve passes"; + run_passes cx "resolve collect" path passes_0 (log cx "%s") crate; + resolve_recursion cx node_to_references recursive_tag_groups; + log cx "running secondary resolve passes"; + run_passes cx "resolve bind" path passes_1 (log cx "%s") crate; + log cx "running tertiary resolve passes"; + run_passes cx "resolve patterns" path passes_2 (log cx "%s") crate +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) + diff --git a/src/boot/me/semant.ml b/src/boot/me/semant.ml new file mode 100644 index 00000000..b5000ff3 --- /dev/null +++ b/src/boot/me/semant.ml @@ -0,0 +1,1969 @@ + +open Common;; + +type slots_table = (Ast.slot_key,node_id) Hashtbl.t +type items_table = (Ast.ident,node_id) Hashtbl.t +type block_slots_table = (node_id,slots_table) Hashtbl.t +type block_items_table = (node_id,items_table) Hashtbl.t +;; + + +type code = { + code_fixup: fixup; + code_quads: Il.quads; + code_vregs_and_spill: (int * fixup) option; +} +;; + +type glue = + GLUE_activate + | GLUE_yield + | GLUE_exit_main_task + | GLUE_exit_task + | GLUE_mark of Ast.ty + | GLUE_drop of Ast.ty + | GLUE_free of Ast.ty + | GLUE_copy of Ast.ty (* One-level copy. *) + | GLUE_clone of Ast.ty (* Deep copy. *) + | GLUE_compare of Ast.ty + | GLUE_hash of Ast.ty + | GLUE_write of Ast.ty + | GLUE_read of Ast.ty + | GLUE_unwind + | GLUE_get_next_pc + | GLUE_mark_frame of node_id (* node is the frame *) + | GLUE_drop_frame of node_id (* node is the frame *) + | GLUE_reloc_frame of node_id (* node is the frame *) + | GLUE_fn_binding of node_id (* node is the 'bind' stmt *) + | GLUE_obj_drop of node_id (* node is the obj *) + | GLUE_loop_body of node_id (* node is the 'for each' body block *) + | GLUE_forward of (Ast.ident * Ast.ty_obj * Ast.ty_obj) +;; + +type data = + DATA_str of string + | DATA_name of Ast.name + | DATA_tydesc of Ast.ty + | DATA_frame_glue_fns of node_id + | DATA_obj_vtbl of node_id + | DATA_forwarding_vtbl of (Ast.ty_obj * Ast.ty_obj) + | DATA_crate +;; + +type defn = + DEFN_slot of Ast.slot + | DEFN_item of Ast.mod_item_decl + | DEFN_ty_param of Ast.ty_param + | DEFN_obj_fn of (node_id * Ast.fn) + | DEFN_obj_drop of node_id + | DEFN_loop_body of node_id +;; + +type glue_code = (glue, code) Hashtbl.t;; +type item_code = (node_id, code) Hashtbl.t;; +type file_code = (node_id, item_code) Hashtbl.t;; +type data_frags = (data, (fixup * Asm.frag)) Hashtbl.t;; + +let string_of_name (n:Ast.name) : string = + Ast.fmt_to_str Ast.fmt_name n +;; + +(* The only need for a carg is to uniquely identify a constraint-arg + * in a scope-independent fashion. So we just look up the node that's + * used as the base of any such arg and glue it on the front of the + * symbolic name. + *) + +type constr_key_arg = Constr_arg_node of (node_id * Ast.carg_path) + | Constr_arg_lit of Ast.lit +type constr_key = + Constr_pred of (node_id * constr_key_arg array) + | Constr_init of node_id + +type ctxt = + { ctxt_sess: Session.sess; + ctxt_frame_args: (node_id,node_id list) Hashtbl.t; + ctxt_frame_blocks: (node_id,node_id list) Hashtbl.t; + ctxt_block_slots: block_slots_table; + ctxt_block_items: block_items_table; + ctxt_slot_is_arg: (node_id,unit) Hashtbl.t; + ctxt_slot_keys: (node_id,Ast.slot_key) Hashtbl.t; + ctxt_all_item_names: (node_id,Ast.name) Hashtbl.t; + ctxt_all_item_types: (node_id,Ast.ty) Hashtbl.t; + ctxt_all_lval_types: (node_id,Ast.ty) Hashtbl.t; + ctxt_all_cast_types: (node_id,Ast.ty) Hashtbl.t; + ctxt_all_type_items: (node_id,Ast.ty) Hashtbl.t; + ctxt_all_stmts: (node_id,Ast.stmt) Hashtbl.t; + ctxt_item_files: (node_id,filename) Hashtbl.t; + ctxt_all_lvals: (node_id,Ast.lval) Hashtbl.t; + + (* definition id --> definition *) + ctxt_all_defns: (node_id,defn) Hashtbl.t; + + (* reference id --> definition id *) + ctxt_lval_to_referent: (node_id,node_id) Hashtbl.t; + + ctxt_required_items: (node_id, (required_lib * nabi_conv)) Hashtbl.t; + ctxt_required_syms: (node_id, string) Hashtbl.t; + + (* Layout-y stuff. *) + ctxt_slot_aliased: (node_id,unit) Hashtbl.t; + ctxt_slot_is_obj_state: (node_id,unit) Hashtbl.t; + ctxt_slot_vregs: (node_id,((int option) ref)) Hashtbl.t; + ctxt_slot_offsets: (node_id,size) Hashtbl.t; + ctxt_frame_sizes: (node_id,size) Hashtbl.t; + ctxt_call_sizes: (node_id,size) Hashtbl.t; + ctxt_block_is_loop_body: (node_id,unit) Hashtbl.t; + ctxt_stmt_loop_depths: (node_id,int) Hashtbl.t; + ctxt_slot_loop_depths: (node_id,int) Hashtbl.t; + + (* Typestate-y stuff. *) + ctxt_constrs: (constr_id,constr_key) Hashtbl.t; + ctxt_constr_ids: (constr_key,constr_id) Hashtbl.t; + ctxt_preconditions: (node_id,Bits.t) Hashtbl.t; + ctxt_postconditions: (node_id,Bits.t) Hashtbl.t; + ctxt_prestates: (node_id,Bits.t) Hashtbl.t; + ctxt_poststates: (node_id,Bits.t) Hashtbl.t; + ctxt_call_lval_params: (node_id,Ast.ty array) Hashtbl.t; + ctxt_copy_stmt_is_init: (node_id,unit) Hashtbl.t; + ctxt_post_stmt_slot_drops: (node_id,node_id list) Hashtbl.t; + + (* Translation-y stuff. *) + ctxt_fn_fixups: (node_id,fixup) Hashtbl.t; + ctxt_block_fixups: (node_id,fixup) Hashtbl.t; + ctxt_file_fixups: (node_id,fixup) Hashtbl.t; + ctxt_spill_fixups: (node_id,fixup) Hashtbl.t; + ctxt_abi: Abi.abi; + ctxt_activate_fixup: fixup; + ctxt_yield_fixup: fixup; + ctxt_unwind_fixup: fixup; + ctxt_exit_task_fixup: fixup; + + ctxt_debug_aranges_fixup: fixup; + ctxt_debug_pubnames_fixup: fixup; + ctxt_debug_info_fixup: fixup; + ctxt_debug_abbrev_fixup: fixup; + ctxt_debug_line_fixup: fixup; + ctxt_debug_frame_fixup: fixup; + + ctxt_image_base_fixup: fixup; + ctxt_crate_fixup: fixup; + + ctxt_file_code: file_code; + ctxt_all_item_code: item_code; + ctxt_glue_code: glue_code; + ctxt_data: data_frags; + + ctxt_native_required: + (required_lib,((string,fixup) Hashtbl.t)) Hashtbl.t; + ctxt_native_provided: + (segment,((string, fixup) Hashtbl.t)) Hashtbl.t; + + ctxt_required_rust_sym_num: (node_id, int) Hashtbl.t; + ctxt_required_c_sym_num: ((required_lib * string), int) Hashtbl.t; + ctxt_required_lib_num: (required_lib, int) Hashtbl.t; + + ctxt_main_fn_fixup: fixup option; + ctxt_main_name: string option; + } +;; + +let new_ctxt sess abi crate = + { ctxt_sess = sess; + ctxt_frame_args = Hashtbl.create 0; + ctxt_frame_blocks = Hashtbl.create 0; + ctxt_block_slots = Hashtbl.create 0; + ctxt_block_items = Hashtbl.create 0; + ctxt_slot_is_arg = Hashtbl.create 0; + ctxt_slot_keys = Hashtbl.create 0; + ctxt_all_item_names = Hashtbl.create 0; + ctxt_all_item_types = Hashtbl.create 0; + ctxt_all_lval_types = Hashtbl.create 0; + ctxt_all_cast_types = Hashtbl.create 0; + ctxt_all_type_items = Hashtbl.create 0; + ctxt_all_stmts = Hashtbl.create 0; + ctxt_item_files = crate.Ast.crate_files; + ctxt_all_lvals = Hashtbl.create 0; + ctxt_all_defns = Hashtbl.create 0; + ctxt_lval_to_referent = Hashtbl.create 0; + ctxt_required_items = crate.Ast.crate_required; + ctxt_required_syms = crate.Ast.crate_required_syms; + + ctxt_constrs = Hashtbl.create 0; + ctxt_constr_ids = Hashtbl.create 0; + ctxt_preconditions = Hashtbl.create 0; + ctxt_postconditions = Hashtbl.create 0; + ctxt_prestates = Hashtbl.create 0; + ctxt_poststates = Hashtbl.create 0; + ctxt_copy_stmt_is_init = Hashtbl.create 0; + ctxt_post_stmt_slot_drops = Hashtbl.create 0; + ctxt_call_lval_params = Hashtbl.create 0; + + ctxt_slot_aliased = Hashtbl.create 0; + ctxt_slot_is_obj_state = Hashtbl.create 0; + ctxt_slot_vregs = Hashtbl.create 0; + ctxt_slot_offsets = Hashtbl.create 0; + ctxt_frame_sizes = Hashtbl.create 0; + ctxt_call_sizes = Hashtbl.create 0; + + ctxt_block_is_loop_body = Hashtbl.create 0; + ctxt_slot_loop_depths = Hashtbl.create 0; + ctxt_stmt_loop_depths = Hashtbl.create 0; + + ctxt_fn_fixups = Hashtbl.create 0; + ctxt_block_fixups = Hashtbl.create 0; + ctxt_file_fixups = Hashtbl.create 0; + ctxt_spill_fixups = Hashtbl.create 0; + ctxt_abi = abi; + ctxt_activate_fixup = new_fixup "activate glue"; + ctxt_yield_fixup = new_fixup "yield glue"; + ctxt_unwind_fixup = new_fixup "unwind glue"; + ctxt_exit_task_fixup = new_fixup "exit-task glue"; + + ctxt_debug_aranges_fixup = new_fixup "debug_aranges section"; + ctxt_debug_pubnames_fixup = new_fixup "debug_pubnames section"; + ctxt_debug_info_fixup = new_fixup "debug_info section"; + ctxt_debug_abbrev_fixup = new_fixup "debug_abbrev section"; + ctxt_debug_line_fixup = new_fixup "debug_line section"; + ctxt_debug_frame_fixup = new_fixup "debug_frame section"; + + ctxt_image_base_fixup = new_fixup "loaded image base"; + ctxt_crate_fixup = new_fixup "root crate structure"; + ctxt_file_code = Hashtbl.create 0; + ctxt_all_item_code = Hashtbl.create 0; + ctxt_glue_code = Hashtbl.create 0; + ctxt_data = Hashtbl.create 0; + + ctxt_native_required = Hashtbl.create 0; + ctxt_native_provided = Hashtbl.create 0; + + ctxt_required_rust_sym_num = Hashtbl.create 0; + ctxt_required_c_sym_num = Hashtbl.create 0; + ctxt_required_lib_num = Hashtbl.create 0; + + ctxt_main_fn_fixup = + (match crate.Ast.crate_main with + None -> None + | Some n -> Some (new_fixup (string_of_name n))); + + ctxt_main_name = + (match crate.Ast.crate_main with + None -> None + | Some n -> Some (string_of_name n)); + } +;; + +let report_err cx ido str = + let sess = cx.ctxt_sess in + let spano = match ido with + None -> None + | Some id -> (Session.get_span sess id) + in + match spano with + None -> + Session.fail sess "Error: %s\n%!" str + | Some span -> + Session.fail sess "%s:E:Error: %s\n%!" + (Session.string_of_span span) str +;; + +let bugi (cx:ctxt) (i:node_id) = + let k s = + report_err cx (Some i) s; + failwith s + in Printf.ksprintf k +;; + +(* Convenience accessors. *) + +(* resolve an lval reference id to the id of its definition *) +let lval_to_referent (cx:ctxt) (id:node_id) : node_id = + if Hashtbl.mem cx.ctxt_lval_to_referent id + then Hashtbl.find cx.ctxt_lval_to_referent id + else bug () "unresolved lval" +;; + +(* resolve an lval reference id to its definition *) +let resolve_lval_id (cx:ctxt) (id:node_id) : defn = + Hashtbl.find cx.ctxt_all_defns (lval_to_referent cx id) +;; + +let referent_is_slot (cx:ctxt) (id:node_id) : bool = + match Hashtbl.find cx.ctxt_all_defns id with + DEFN_slot _ -> true + | _ -> false +;; + +let referent_is_item (cx:ctxt) (id:node_id) : bool = + match Hashtbl.find cx.ctxt_all_defns id with + DEFN_item _ -> true + | _ -> false +;; + +(* coerce an lval definition id to a slot *) +let referent_to_slot (cx:ctxt) (id:node_id) : Ast.slot = + match Hashtbl.find cx.ctxt_all_defns id with + DEFN_slot slot -> slot + | _ -> bugi cx id "unknown slot" +;; + +(* coerce an lval reference id to its definition slot *) +let lval_to_slot (cx:ctxt) (id:node_id) : Ast.slot = + match resolve_lval_id cx id with + DEFN_slot slot -> slot + | _ -> bugi cx id "unknown slot" +;; + +let get_stmt_depth (cx:ctxt) (id:node_id) : int = + Hashtbl.find cx.ctxt_stmt_loop_depths id +;; + +let get_slot_depth (cx:ctxt) (id:node_id) : int = + Hashtbl.find cx.ctxt_slot_loop_depths id +;; + +let get_fn_fixup (cx:ctxt) (id:node_id) : fixup = + if Hashtbl.mem cx.ctxt_fn_fixups id + then Hashtbl.find cx.ctxt_fn_fixups id + else bugi cx id "fn without fixup" +;; + +let get_framesz (cx:ctxt) (id:node_id) : size = + if Hashtbl.mem cx.ctxt_frame_sizes id + then Hashtbl.find cx.ctxt_frame_sizes id + else bugi cx id "missing framesz" +;; + +let get_callsz (cx:ctxt) (id:node_id) : size = + if Hashtbl.mem cx.ctxt_call_sizes id + then Hashtbl.find cx.ctxt_call_sizes id + else bugi cx id "missing callsz" +;; + +let rec n_item_ty_params (cx:ctxt) (id:node_id) : int = + match Hashtbl.find cx.ctxt_all_defns id with + DEFN_item i -> Array.length i.Ast.decl_params + | DEFN_obj_fn (oid,_) -> n_item_ty_params cx oid + | DEFN_obj_drop oid -> n_item_ty_params cx oid + | DEFN_loop_body fid -> n_item_ty_params cx fid + | _ -> bugi cx id "n_item_ty_params on non-item" +;; + +let item_is_obj_fn (cx:ctxt) (id:node_id) : bool = + match Hashtbl.find cx.ctxt_all_defns id with + DEFN_obj_fn _ + | DEFN_obj_drop _ -> true + | _ -> false +;; + +let get_spill (cx:ctxt) (id:node_id) : fixup = + if Hashtbl.mem cx.ctxt_spill_fixups id + then Hashtbl.find cx.ctxt_spill_fixups id + else bugi cx id "missing spill fixup" +;; + +let require_native (cx:ctxt) (lib:required_lib) (name:string) : fixup = + let lib_tab = (htab_search_or_add cx.ctxt_native_required lib + (fun _ -> Hashtbl.create 0)) + in + htab_search_or_add lib_tab name + (fun _ -> new_fixup ("require: " ^ name)) +;; + +let provide_native (cx:ctxt) (seg:segment) (name:string) : fixup = + let seg_tab = (htab_search_or_add cx.ctxt_native_provided seg + (fun _ -> Hashtbl.create 0)) + in + htab_search_or_add seg_tab name + (fun _ -> new_fixup ("provide: " ^ name)) +;; + +let provide_existing_native + (cx:ctxt) + (seg:segment) + (name:string) + (fix:fixup) + : unit = + let seg_tab = (htab_search_or_add cx.ctxt_native_provided seg + (fun _ -> Hashtbl.create 0)) + in + htab_put seg_tab name fix +;; + +let slot_ty (s:Ast.slot) : Ast.ty = + match s.Ast.slot_ty with + Some t -> t + | None -> bug () "untyped slot" +;; + +let defn_is_slot (d:defn) : bool = + match d with + DEFN_slot _ -> true + | _ -> false +;; + +let defn_is_item (d:defn) : bool = + match d with + DEFN_item _ -> true + | _ -> false +;; + +let slot_is_obj_state (cx:ctxt) (sid:node_id) : bool = + Hashtbl.mem cx.ctxt_slot_is_obj_state sid +;; + + +(* determines whether d defines a statically-known value *) +let defn_is_static (d:defn) : bool = + not (defn_is_slot d) +;; + +let defn_is_callable (d:defn) : bool = + match d with + DEFN_slot { Ast.slot_ty = Some Ast.TY_fn _ } + | DEFN_item { Ast.decl_item = (Ast.MOD_ITEM_fn _ ) } -> true + | _ -> false +;; + +(* Constraint manipulation. *) + +let rec apply_names_to_carg_path + (names:(Ast.name_base option) array) + (cp:Ast.carg_path) + : Ast.carg_path = + match cp with + Ast.CARG_ext (Ast.CARG_base Ast.BASE_formal, + Ast.COMP_idx i) -> + begin + match names.(i) with + Some nb -> + Ast.CARG_base (Ast.BASE_named nb) + | None -> bug () "Indexing off non-named carg" + end + | Ast.CARG_ext (cp', e) -> + Ast.CARG_ext (apply_names_to_carg_path names cp', e) + | _ -> cp +;; + +let apply_names_to_carg + (names:(Ast.name_base option) array) + (carg:Ast.carg) + : Ast.carg = + match carg with + Ast.CARG_path cp -> + Ast.CARG_path (apply_names_to_carg_path names cp) + | Ast.CARG_lit _ -> carg +;; + +let apply_names_to_constr + (names:(Ast.name_base option) array) + (constr:Ast.constr) + : Ast.constr = + { constr with + Ast.constr_args = + Array.map (apply_names_to_carg names) constr.Ast.constr_args } +;; + +let atoms_to_names (atoms:Ast.atom array) + : (Ast.name_base option) array = + Array.map + begin + fun atom -> + match atom with + Ast.ATOM_lval (Ast.LVAL_base nbi) -> Some nbi.node + | _ -> None + end + atoms +;; + +let rec lval_base_id (lv:Ast.lval) : node_id = + match lv with + Ast.LVAL_base nbi -> nbi.id + | Ast.LVAL_ext (lv, _) -> lval_base_id lv +;; + +let rec lval_base_slot (cx:ctxt) (lv:Ast.lval) : node_id option = + match lv with + Ast.LVAL_base nbi -> + let referent = lval_to_referent cx nbi.id in + if referent_is_slot cx referent + then Some referent + else None + | Ast.LVAL_ext (lv, _) -> lval_base_slot cx lv +;; + +let rec lval_slots (cx:ctxt) (lv:Ast.lval) : node_id array = + match lv with + Ast.LVAL_base nbi -> + let referent = lval_to_referent cx nbi.id in + if referent_is_slot cx referent + then [| referent |] + else [| |] + | Ast.LVAL_ext (lv, Ast.COMP_named _) -> lval_slots cx lv + | Ast.LVAL_ext (lv, Ast.COMP_atom a) -> + Array.append (lval_slots cx lv) (atom_slots cx a) + +and atom_slots (cx:ctxt) (a:Ast.atom) : node_id array = + match a with + Ast.ATOM_literal _ -> [| |] + | Ast.ATOM_lval lv -> lval_slots cx lv +;; + +let lval_option_slots (cx:ctxt) (lv:Ast.lval option) : node_id array = + match lv with + None -> [| |] + | Some lv -> lval_slots cx lv +;; + +let resolve_lval (cx:ctxt) (lv:Ast.lval) : defn = + resolve_lval_id cx (lval_base_id lv) +;; + +let atoms_slots (cx:ctxt) (az:Ast.atom array) : node_id array = + Array.concat (List.map (atom_slots cx) (Array.to_list az)) +;; + +let tup_inputs_slots (cx:ctxt) (az:Ast.tup_input array) : node_id array = + Array.concat (List.map + (fun (_,_,a) -> atom_slots cx a) + (Array.to_list az)) +;; + +let rec_inputs_slots (cx:ctxt) + (inputs:Ast.rec_input array) : node_id array = + Array.concat (List.map + (fun (_, _, _, atom) -> atom_slots cx atom) + (Array.to_list inputs)) +;; + +let expr_slots (cx:ctxt) (e:Ast.expr) : node_id array = + match e with + Ast.EXPR_binary (_, a, b) -> + Array.append (atom_slots cx a) (atom_slots cx b) + | Ast.EXPR_unary (_, u) -> atom_slots cx u + | Ast.EXPR_atom a -> atom_slots cx a +;; + + +(* Type extraction. *) + +let interior_slot_full mut ty : Ast.slot = + { Ast.slot_mode = Ast.MODE_interior; + Ast.slot_mutable = mut; + Ast.slot_ty = Some ty } +;; + +let exterior_slot_full mut ty : Ast.slot = + { Ast.slot_mode = Ast.MODE_exterior; + Ast.slot_mutable = mut; + Ast.slot_ty = Some ty } +;; + +let interior_slot ty : Ast.slot = interior_slot_full false ty +;; + +let exterior_slot ty : Ast.slot = exterior_slot_full false ty +;; + + +(* General folds of Ast.ty. *) + +type ('ty, 'slot, 'slots, 'tag) ty_fold = + { + (* Functions that correspond to interior nodes in Ast.ty. *) + ty_fold_slot : (Ast.mode * bool * 'ty) -> 'slot; + ty_fold_slots : ('slot array) -> 'slots; + ty_fold_tags : (Ast.name, 'slots) Hashtbl.t -> 'tag; + + (* Functions that correspond to the Ast.ty constructors. *) + ty_fold_any: unit -> 'ty; + ty_fold_nil : unit -> 'ty; + ty_fold_bool : unit -> 'ty; + ty_fold_mach : ty_mach -> 'ty; + ty_fold_int : unit -> 'ty; + ty_fold_uint : unit -> 'ty; + ty_fold_char : unit -> 'ty; + ty_fold_str : unit -> 'ty; + ty_fold_tup : 'slots -> 'ty; + ty_fold_vec : 'slot -> 'ty; + ty_fold_rec : (Ast.ident * 'slot) array -> 'ty; + ty_fold_tag : 'tag -> 'ty; + ty_fold_iso : (int * 'tag array) -> 'ty; + ty_fold_idx : int -> 'ty; + ty_fold_fn : (('slots * Ast.constrs * 'slot) * Ast.ty_fn_aux) -> 'ty; + ty_fold_obj : (Ast.effect + * (Ast.ident, (('slots * Ast.constrs * 'slot) * + Ast.ty_fn_aux)) Hashtbl.t) -> 'ty; + ty_fold_chan : 'ty -> 'ty; + ty_fold_port : 'ty -> 'ty; + ty_fold_task : unit -> 'ty; + ty_fold_native : opaque_id -> 'ty; + ty_fold_param : (int * Ast.effect) -> 'ty; + ty_fold_named : Ast.name -> 'ty; + ty_fold_type : unit -> 'ty; + ty_fold_constrained : ('ty * Ast.constrs) -> 'ty } +;; + +let rec fold_ty (f:('ty, 'slot, 'slots, 'tag) ty_fold) (ty:Ast.ty) : 'ty = + let fold_slot (s:Ast.slot) : 'slot = + f.ty_fold_slot (s.Ast.slot_mode, + s.Ast.slot_mutable, + fold_ty f (slot_ty s)) + in + let fold_slots (slots:Ast.slot array) : 'slots = + f.ty_fold_slots (Array.map fold_slot slots) + in + let fold_tags (ttag:Ast.ty_tag) : 'tag = + f.ty_fold_tags (htab_map ttag (fun k v -> (k, fold_slots v))) + in + let fold_sig tsig = + (fold_slots tsig.Ast.sig_input_slots, + tsig.Ast.sig_input_constrs, + fold_slot tsig.Ast.sig_output_slot) + in + let fold_obj fns = + htab_map fns (fun i (tsig, taux) -> (i, (fold_sig tsig, taux))) + in + match ty with + Ast.TY_any -> f.ty_fold_any () + | Ast.TY_nil -> f.ty_fold_nil () + | Ast.TY_bool -> f.ty_fold_bool () + | Ast.TY_mach m -> f.ty_fold_mach m + | Ast.TY_int -> f.ty_fold_int () + | Ast.TY_uint -> f.ty_fold_uint () + | Ast.TY_char -> f.ty_fold_char () + | Ast.TY_str -> f.ty_fold_str () + + | Ast.TY_tup t -> f.ty_fold_tup (fold_slots t) + | Ast.TY_vec s -> f.ty_fold_vec (fold_slot s) + | Ast.TY_rec r -> f.ty_fold_rec (Array.map (fun (k,v) -> (k,fold_slot v)) r) + + | Ast.TY_tag tt -> f.ty_fold_tag (fold_tags tt) + | Ast.TY_iso ti -> f.ty_fold_iso (ti.Ast.iso_index, + (Array.map fold_tags ti.Ast.iso_group)) + | Ast.TY_idx i -> f.ty_fold_idx i + + | Ast.TY_fn (tsig,taux) -> f.ty_fold_fn (fold_sig tsig, taux) + | Ast.TY_chan t -> f.ty_fold_chan (fold_ty f t) + | Ast.TY_port t -> f.ty_fold_port (fold_ty f t) + + | Ast.TY_obj (eff,t) -> f.ty_fold_obj (eff, (fold_obj t)) + | Ast.TY_task -> f.ty_fold_task () + + | Ast.TY_native x -> f.ty_fold_native x + | Ast.TY_param x -> f.ty_fold_param x + | Ast.TY_named n -> f.ty_fold_named n + | Ast.TY_type -> f.ty_fold_type () + + | Ast.TY_constrained (t, constrs) -> + f.ty_fold_constrained (fold_ty f t, constrs) + +;; + +type 'a simple_ty_fold = ('a, 'a, 'a, 'a) ty_fold +;; + +let ty_fold_default (default:'a) : 'a simple_ty_fold = + { ty_fold_slot = (fun _ -> default); + ty_fold_slots = (fun _ -> default); + ty_fold_tags = (fun _ -> default); + ty_fold_any = (fun _ -> default); + ty_fold_nil = (fun _ -> default); + ty_fold_bool = (fun _ -> default); + ty_fold_mach = (fun _ -> default); + ty_fold_int = (fun _ -> default); + ty_fold_uint = (fun _ -> default); + ty_fold_char = (fun _ -> default); + ty_fold_str = (fun _ -> default); + ty_fold_tup = (fun _ -> default); + ty_fold_vec = (fun _ -> default); + ty_fold_rec = (fun _ -> default); + ty_fold_tag = (fun _ -> default); + ty_fold_iso = (fun _ -> default); + ty_fold_idx = (fun _ -> default); + ty_fold_fn = (fun _ -> default); + ty_fold_obj = (fun _ -> default); + ty_fold_chan = (fun _ -> default); + ty_fold_port = (fun _ -> default); + ty_fold_task = (fun _ -> default); + ty_fold_native = (fun _ -> default); + ty_fold_param = (fun _ -> default); + ty_fold_named = (fun _ -> default); + ty_fold_type = (fun _ -> default); + ty_fold_constrained = (fun _ -> default) } +;; + +let ty_fold_rebuild (id:Ast.ty -> Ast.ty) + : (Ast.ty, Ast.slot, Ast.slot array, Ast.ty_tag) ty_fold = + let rebuild_fn ((islots, constrs, oslot), aux) = + ({ Ast.sig_input_slots = islots; + Ast.sig_input_constrs = constrs; + Ast.sig_output_slot = oslot }, aux) + in + { ty_fold_slot = (fun (mode, mut, t) -> + { Ast.slot_mode = mode; + Ast.slot_mutable = mut; + Ast.slot_ty = Some t }); + ty_fold_slots = (fun slots -> slots); + ty_fold_tags = (fun htab -> htab); + ty_fold_any = (fun _ -> id Ast.TY_any); + ty_fold_nil = (fun _ -> id Ast.TY_nil); + ty_fold_bool = (fun _ -> id Ast.TY_bool); + ty_fold_mach = (fun m -> id (Ast.TY_mach m)); + ty_fold_int = (fun _ -> id Ast.TY_int); + ty_fold_uint = (fun _ -> id Ast.TY_uint); + ty_fold_char = (fun _ -> id Ast.TY_char); + ty_fold_str = (fun _ -> id Ast.TY_str); + ty_fold_tup = (fun slots -> id (Ast.TY_tup slots)); + ty_fold_vec = (fun slot -> id (Ast.TY_vec slot)); + ty_fold_rec = (fun entries -> id (Ast.TY_rec entries)); + ty_fold_tag = (fun tag -> id (Ast.TY_tag tag)); + ty_fold_iso = (fun (i, tags) -> id (Ast.TY_iso { Ast.iso_index = i; + Ast.iso_group = tags })); + ty_fold_idx = (fun i -> id (Ast.TY_idx i)); + ty_fold_fn = (fun t -> id (Ast.TY_fn (rebuild_fn t))); + ty_fold_obj = (fun (eff,fns) -> + id (Ast.TY_obj + (eff, (htab_map fns + (fun id fn -> (id, rebuild_fn fn)))))); + ty_fold_chan = (fun t -> id (Ast.TY_chan t)); + ty_fold_port = (fun t -> id (Ast.TY_port t)); + ty_fold_task = (fun _ -> id Ast.TY_task); + ty_fold_native = (fun oid -> id (Ast.TY_native oid)); + ty_fold_param = (fun (i, mut) -> id (Ast.TY_param (i, mut))); + ty_fold_named = (fun n -> id (Ast.TY_named n)); + ty_fold_type = (fun _ -> id (Ast.TY_type)); + ty_fold_constrained = (fun (t, constrs) -> + id (Ast.TY_constrained (t, constrs))) } +;; + +let rebuild_ty_under_params + (ty:Ast.ty) + (params:Ast.ty_param array) + (args:Ast.ty array) + (resolve_names:bool) + : Ast.ty = + if (Array.length params) <> (Array.length args) + then err None "mismatched type-params" + else + let nmap = Hashtbl.create (Array.length args) in + let pmap = Hashtbl.create (Array.length args) in + let _ = + Array.iteri + begin + fun i (ident, param) -> + htab_put pmap (Ast.TY_param param) args.(i); + if resolve_names + then + htab_put nmap ident args.(i) + end + params + in + let substituted = ref false in + let rec rebuild_ty t = + let base = ty_fold_rebuild (fun t -> t) in + let ty_fold_param (i, mut) = + let param = Ast.TY_param (i, mut) in + match htab_search pmap param with + None -> param + | Some arg -> (substituted := true; arg) + in + let ty_fold_named n = + let rec rebuild_name n = + match n with + Ast.NAME_base nb -> + Ast.NAME_base (rebuild_name_base nb) + | Ast.NAME_ext (n, nc) -> + Ast.NAME_ext (rebuild_name n, + rebuild_name_component nc) + + and rebuild_name_base nb = + match nb with + Ast.BASE_ident i -> + Ast.BASE_ident i + | Ast.BASE_temp t -> + Ast.BASE_temp t + | Ast.BASE_app (i, tys) -> + Ast.BASE_app (i, rebuild_tys tys) + + and rebuild_name_component nc = + match nc with + Ast.COMP_ident i -> + Ast.COMP_ident i + | Ast.COMP_app (i, tys) -> + Ast.COMP_app (i, rebuild_tys tys) + | Ast.COMP_idx i -> + Ast.COMP_idx i + + and rebuild_tys tys = + Array.map (fun t -> rebuild_ty t) tys + in + let n = rebuild_name n in + match n with + Ast.NAME_base (Ast.BASE_ident id) + when resolve_names -> + begin + match htab_search nmap id with + None -> Ast.TY_named n + | Some arg -> (substituted := true; arg) + end + | _ -> Ast.TY_named n + in + let fold = + { base with + ty_fold_param = ty_fold_param; + ty_fold_named = ty_fold_named; + } + in + let t' = fold_ty fold t in + (* + * FIXME: "substituted" and "ty'" here are only required + * because the current type-equality-comparison code in Type + * uses <> and will judge some cases, such as rebuilt tags, as + * unequal simply due to the different hashtable order in the + * fold. + *) + if !substituted + then t' + else t + in + rebuild_ty ty +;; + +let associative_binary_op_ty_fold + (default:'a) + (fn:'a -> 'a -> 'a) + : 'a simple_ty_fold = + let base = ty_fold_default default in + let reduce ls = + match ls with + [] -> default + | x::xs -> List.fold_left fn x xs + in + let reduce_fn ((islots, _, oslot), _) = + fn islots oslot + in + { base with + ty_fold_slots = (fun slots -> reduce (Array.to_list slots)); + ty_fold_slot = (fun (_, _, a) -> a); + ty_fold_tags = (fun tab -> reduce (htab_vals tab)); + ty_fold_tup = (fun a -> a); + ty_fold_vec = (fun a -> a); + ty_fold_rec = (fun sz -> + reduce (Array.to_list + (Array.map (fun (_, s) -> s) sz))); + ty_fold_tag = (fun a -> a); + ty_fold_iso = (fun (_,iso) -> reduce (Array.to_list iso)); + ty_fold_fn = reduce_fn; + ty_fold_obj = (fun (_,fns) -> + reduce (List.map reduce_fn (htab_vals fns))); + ty_fold_chan = (fun a -> a); + ty_fold_port = (fun a -> a); + ty_fold_constrained = (fun (a, _) -> a) } + +let ty_fold_bool_and (default:bool) : bool simple_ty_fold = + associative_binary_op_ty_fold default (fun a b -> a & b) +;; + +let ty_fold_bool_or (default:bool) : bool simple_ty_fold = + associative_binary_op_ty_fold default (fun a b -> a || b) +;; + +let ty_fold_int_max (default:int) : int simple_ty_fold = + associative_binary_op_ty_fold default (fun a b -> max a b) +;; + +let ty_fold_list_concat _ : ('a list) simple_ty_fold = + associative_binary_op_ty_fold [] (fun a b -> a @ b) +;; + +let type_is_structured (t:Ast.ty) : bool = + let fold = ty_fold_bool_or false in + let fold = { fold with + ty_fold_tup = (fun _ -> true); + ty_fold_vec = (fun _ -> true); + ty_fold_rec = (fun _ -> true); + ty_fold_tag = (fun _ -> true); + ty_fold_iso = (fun _ -> true); + ty_fold_idx = (fun _ -> true); + ty_fold_fn = (fun _ -> true); + ty_fold_obj = (fun _ -> true) } + in + fold_ty fold t +;; + +(* Effect analysis. *) +let effect_le x y = + match (x,y) with + (Ast.UNSAFE, _) -> true + | (Ast.STATE, Ast.PURE) -> true + | (Ast.STATE, Ast.IO) -> true + | (Ast.STATE, Ast.STATE) -> true + | (Ast.IO, Ast.PURE) -> true + | (Ast.IO, Ast.IO) -> true + | (Ast.PURE, Ast.PURE) -> true + | _ -> false +;; + +let lower_effect_of x y = + if effect_le x y then x else y +;; + +let type_effect (t:Ast.ty) : Ast.effect = + let fold_slot ((*mode*)_, mut, eff) = + if mut + then lower_effect_of Ast.STATE eff + else eff + in + let fold = associative_binary_op_ty_fold Ast.PURE lower_effect_of in + let fold = { fold with ty_fold_slot = fold_slot } in + fold_ty fold t +;; + +let type_has_state (t:Ast.ty) : bool = + effect_le (type_effect t) Ast.STATE +;; + + +(* Various type analyses. *) + +let is_prim_type (t:Ast.ty) : bool = + match t with + Ast.TY_int + | Ast.TY_uint + | Ast.TY_char + | Ast.TY_mach _ + | Ast.TY_bool -> true + | _ -> false +;; + +let type_contains_chan (t:Ast.ty) : bool = + let fold_chan _ = true in + let fold = ty_fold_bool_or false in + let fold = { fold with ty_fold_chan = fold_chan } in + fold_ty fold t +;; + + +let type_is_unsigned_2s_complement t = + match t with + Ast.TY_mach TY_u8 + | Ast.TY_mach TY_u16 + | Ast.TY_mach TY_u32 + | Ast.TY_mach TY_u64 + | Ast.TY_char + | Ast.TY_uint + | Ast.TY_bool -> true + | _ -> false +;; + + +let type_is_signed_2s_complement t = + match t with + Ast.TY_mach TY_i8 + | Ast.TY_mach TY_i16 + | Ast.TY_mach TY_i32 + | Ast.TY_mach TY_i64 + | Ast.TY_int -> true + | _ -> false +;; + + +let type_is_2s_complement t = + (type_is_unsigned_2s_complement t) + || (type_is_signed_2s_complement t) +;; + +let n_used_type_params t = + let fold_param (i,_) = i+1 in + let fold = ty_fold_int_max 0 in + let fold = { fold with ty_fold_param = fold_param } in + fold_ty fold t +;; + + + +let check_concrete params thing = + if Array.length params = 0 + then thing + else bug () "unhandled parametric binding" +;; + + +let project_type_to_slot + (base_ty:Ast.ty) + (comp:Ast.lval_component) + : Ast.slot = + match (base_ty, comp) with + (Ast.TY_rec elts, Ast.COMP_named (Ast.COMP_ident id)) -> + begin + match atab_search elts id with + Some slot -> slot + | None -> err None "unknown record-member '%s'" id + end + + | (Ast.TY_tup elts, Ast.COMP_named (Ast.COMP_idx i)) -> + if 0 <= i && i < (Array.length elts) + then elts.(i) + else err None "out-of-range tuple index %d" i + + | (Ast.TY_vec slot, Ast.COMP_atom _) -> + slot + + | (Ast.TY_str, Ast.COMP_atom _) -> + interior_slot (Ast.TY_mach TY_u8) + + | (Ast.TY_obj (_, fns), Ast.COMP_named (Ast.COMP_ident id)) -> + interior_slot (Ast.TY_fn (Hashtbl.find fns id)) + + | (_,_) -> + bug () + "unhandled form of lval-ext in Semant." + "project_slot: %a indexed by %a" + Ast.sprintf_ty base_ty Ast.sprintf_lval_component comp +;; + + +(* NB: this will fail if lval is not a slot. *) +let rec lval_slot (cx:ctxt) (lval:Ast.lval) : Ast.slot = + match lval with + Ast.LVAL_base nb -> lval_to_slot cx nb.id + | Ast.LVAL_ext (base, comp) -> + let base_ty = slot_ty (lval_slot cx base) in + project_type_to_slot base_ty comp +;; + +let exports_permit (view:Ast.mod_view) (ident:Ast.ident) : bool = + (Hashtbl.mem view.Ast.view_exports Ast.EXPORT_all_decls) || + (Hashtbl.mem view.Ast.view_exports (Ast.EXPORT_ident ident)) +;; + +(* NB: this will fail if lval is not an item. *) +let rec lval_item (cx:ctxt) (lval:Ast.lval) : Ast.mod_item = + match lval with + Ast.LVAL_base nb -> + begin + let referent = lval_to_referent cx nb.id in + match htab_search cx.ctxt_all_defns referent with + Some (DEFN_item item) -> {node=item; id=referent} + | _ -> err (Some (lval_base_id lval)) + "lval does not name an item" + end + | Ast.LVAL_ext (base, comp) -> + let base_item = lval_item cx base in + match base_item.node.Ast.decl_item with + Ast.MOD_ITEM_mod (view, items) -> + begin + let i, args = + match comp with + Ast.COMP_named (Ast.COMP_ident i) -> (i, [||]) + | Ast.COMP_named (Ast.COMP_app (i, args)) -> (i, args) + | _ -> + bug () + "unhandled lval-component '%a' in Semant.lval_item" + Ast.sprintf_lval_component comp + in + match htab_search items i with + | Some sub when exports_permit view i -> + assert + ((Array.length sub.node.Ast.decl_params) = + (Array.length args)); + check_concrete base_item.node.Ast.decl_params sub + | _ -> err (Some (lval_base_id lval)) + "unknown module item '%s'" i + end + | _ -> err (Some (lval_base_id lval)) + "lval base %a does not name a module" Ast.sprintf_lval base +;; + +let lval_is_slot (cx:ctxt) (lval:Ast.lval) : bool = + match resolve_lval cx lval with + DEFN_slot _ -> true + | _ -> false +;; + +let lval_is_item (cx:ctxt) (lval:Ast.lval) : bool = + match resolve_lval cx lval with + DEFN_item _ -> true + | _ -> false +;; + +let lval_is_direct_fn (cx:ctxt) (lval:Ast.lval) : bool = + let defn = resolve_lval cx lval in + (defn_is_static defn) && (defn_is_callable defn) +;; + +let lval_is_direct_mod (cx:ctxt) (lval:Ast.lval) : bool = + let defn = resolve_lval cx lval in + if not (defn_is_static defn) + then false + else + match defn with + DEFN_item { Ast.decl_item = Ast.MOD_ITEM_mod _ } -> true + | _ -> false +;; + +let lval_is_static (cx:ctxt) (lval:Ast.lval) : bool = + defn_is_static (resolve_lval cx lval) +;; + +let lval_is_callable (cx:ctxt) (lval:Ast.lval) : bool = + defn_is_callable (resolve_lval cx lval) +;; + +let lval_is_obj_vtbl (cx:ctxt) (lval:Ast.lval) : bool = + if lval_is_slot cx lval + then + match lval with + Ast.LVAL_ext (base, _) -> + begin + match slot_ty (lval_slot cx base) with + Ast.TY_obj _ -> true + | _ -> false + end + | _ -> false + else false +;; + +let rec lval_ty (cx:ctxt) (lval:Ast.lval) : Ast.ty = + let base_id = lval_base_id lval in + Hashtbl.find cx.ctxt_all_lval_types base_id +;; + +let rec atom_type (cx:ctxt) (at:Ast.atom) : Ast.ty = + match at with + Ast.ATOM_literal {node=(Ast.LIT_int _); id=_} -> Ast.TY_int + | Ast.ATOM_literal {node=(Ast.LIT_uint _); id=_} -> Ast.TY_uint + | Ast.ATOM_literal {node=(Ast.LIT_bool _); id=_} -> Ast.TY_bool + | Ast.ATOM_literal {node=(Ast.LIT_char _); id=_} -> Ast.TY_char + | Ast.ATOM_literal {node=(Ast.LIT_nil); id=_} -> Ast.TY_nil + | Ast.ATOM_literal {node=(Ast.LIT_mach (m,_,_)); id=_} -> Ast.TY_mach m + | Ast.ATOM_lval lv -> lval_ty cx lv +;; + +let expr_type (cx:ctxt) (e:Ast.expr) : Ast.ty = + match e with + Ast.EXPR_binary (op, a, _) -> + begin + match op with + Ast.BINOP_eq | Ast.BINOP_ne | Ast.BINOP_lt | Ast.BINOP_le + | Ast.BINOP_ge | Ast.BINOP_gt -> Ast.TY_bool + | _ -> atom_type cx a + end + | Ast.EXPR_unary (Ast.UNOP_not, _) -> Ast.TY_bool + | Ast.EXPR_unary (_, a) -> atom_type cx a + | Ast.EXPR_atom a -> atom_type cx a +;; + +(* Mappings between mod items and their respective types. *) + +let arg_slots (slots:Ast.header_slots) : Ast.slot array = + Array.map (fun (sid,_) -> sid.node) slots +;; + +let tup_slots (slots:Ast.header_tup) : Ast.slot array = + Array.map (fun sid -> sid.node) slots +;; + +let ty_fn_of_fn (fn:Ast.fn) : Ast.ty_fn = + ({ Ast.sig_input_slots = arg_slots fn.Ast.fn_input_slots; + Ast.sig_input_constrs = fn.Ast.fn_input_constrs; + Ast.sig_output_slot = fn.Ast.fn_output_slot.node }, + fn.Ast.fn_aux ) +;; + +let ty_obj_of_obj (obj:Ast.obj) : Ast.ty_obj = + (obj.Ast.obj_effect, + htab_map obj.Ast.obj_fns (fun i f -> (i, ty_fn_of_fn f.node))) +;; + +let ty_of_mod_item ((*inside*)_:bool) (item:Ast.mod_item) : Ast.ty = + match item.node.Ast.decl_item with + Ast.MOD_ITEM_type _ -> Ast.TY_type + | Ast.MOD_ITEM_fn f -> (Ast.TY_fn (ty_fn_of_fn f)) + | Ast.MOD_ITEM_mod _ -> bug () "Semant.ty_of_mod_item on mod" + | Ast.MOD_ITEM_obj ob -> + let taux = { Ast.fn_effect = Ast.PURE; + Ast.fn_is_iter = false } + in + let tobj = Ast.TY_obj (ty_obj_of_obj ob) in + let tsig = { Ast.sig_input_slots = arg_slots ob.Ast.obj_state; + Ast.sig_input_constrs = ob.Ast.obj_constrs; + Ast.sig_output_slot = interior_slot tobj } + in + (Ast.TY_fn (tsig, taux)) + + | Ast.MOD_ITEM_tag (htup, ttag, _) -> + let taux = { Ast.fn_effect = Ast.PURE; + Ast.fn_is_iter = false } + in + let tsig = { Ast.sig_input_slots = tup_slots htup; + Ast.sig_input_constrs = [| |]; + Ast.sig_output_slot = interior_slot (Ast.TY_tag ttag) } + in + (Ast.TY_fn (tsig, taux)) +;; + +(* Scopes and the visitor that builds them. *) + +type scope = + SCOPE_block of node_id + | SCOPE_mod_item of Ast.mod_item + | SCOPE_obj_fn of (Ast.fn identified) + | SCOPE_crate of Ast.crate +;; + +let id_of_scope (sco:scope) : node_id = + match sco with + SCOPE_block id -> id + | SCOPE_mod_item i -> i.id + | SCOPE_obj_fn f -> f.id + | SCOPE_crate c -> c.id +;; + +let scope_stack_managing_visitor + (scopes:(scope list) ref) + (inner:Walk.visitor) + : Walk.visitor = + let push s = + scopes := s :: (!scopes) + in + let pop _ = + scopes := List.tl (!scopes) + in + let visit_block_pre b = + push (SCOPE_block b.id); + inner.Walk.visit_block_pre b + in + let visit_block_post b = + inner.Walk.visit_block_post b; + pop(); + in + let visit_mod_item_pre n p i = + push (SCOPE_mod_item i); + inner.Walk.visit_mod_item_pre n p i + in + let visit_mod_item_post n p i = + inner.Walk.visit_mod_item_post n p i; + pop(); + in + let visit_obj_fn_pre obj ident fn = + push (SCOPE_obj_fn fn); + inner.Walk.visit_obj_fn_pre obj ident fn + in + let visit_obj_fn_post obj ident fn = + inner.Walk.visit_obj_fn_post obj ident fn; + pop(); + in + let visit_crate_pre c = + push (SCOPE_crate c); + inner.Walk.visit_crate_pre c + in + let visit_crate_post c = + inner.Walk.visit_crate_post c; + pop() + in + { inner with + Walk.visit_block_pre = visit_block_pre; + Walk.visit_block_post = visit_block_post; + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_mod_item_post = visit_mod_item_post; + Walk.visit_obj_fn_pre = visit_obj_fn_pre; + Walk.visit_obj_fn_post = visit_obj_fn_post; + Walk.visit_crate_pre = visit_crate_pre; + Walk.visit_crate_post = visit_crate_post; } +;; + +(* Generic lookup, used for slots, items, types, etc. *) + +type resolved = ((scope list * node_id) option) ;; + +let get_item (cx:ctxt) (node:node_id) : Ast.mod_item_decl = + match htab_search cx.ctxt_all_defns node with + Some (DEFN_item item) -> item + | Some _ -> err (Some node) "defn is not an item" + | None -> bug () "missing defn" +;; + +let get_slot (cx:ctxt) (node:node_id) : Ast.slot = + match htab_search cx.ctxt_all_defns node with + Some (DEFN_slot slot) -> slot + | Some _ -> err (Some node) "defn is not a slot" + | None -> bug () "missing defn" +;; + +let get_mod_item + (cx:ctxt) + (node:node_id) + : (Ast.mod_view * Ast.mod_items) = + match get_item cx node with + { Ast.decl_item = Ast.MOD_ITEM_mod md } -> md + | _ -> err (Some node) "defn is not a mod" +;; + +let get_name_comp_ident + (comp:Ast.name_component) + : Ast.ident = + match comp with + Ast.COMP_ident i -> i + | Ast.COMP_app (i, _) -> i + | Ast.COMP_idx i -> string_of_int i +;; + +let get_name_base_ident + (comp:Ast.name_base) + : Ast.ident = + match comp with + Ast.BASE_ident i -> i + | Ast.BASE_app (i, _) -> i + | Ast.BASE_temp _ -> + bug () "get_name_base_ident on BASE_temp" +;; + +let rec project_ident_from_items + (cx:ctxt) + (scopes:scope list) + ((view:Ast.mod_view),(items:Ast.mod_items)) + (ident:Ast.ident) + (inside:bool) + : resolved = + if not (inside || (exports_permit view ident)) + then None + else + match htab_search items ident with + Some i -> Some (scopes, i.id) + | None -> + match htab_search view.Ast.view_imports ident with + None -> None + | Some name -> lookup_by_name cx scopes name + +and project_name_comp_from_resolved + (cx:ctxt) + (mod_res:resolved) + (ext:Ast.name_component) + : resolved = + match mod_res with + None -> None + | Some (scopes, id) -> + let scope = (SCOPE_mod_item {id=id; node=get_item cx id}) in + let scopes = scope :: scopes in + let ident = get_name_comp_ident ext in + let md = get_mod_item cx id in + project_ident_from_items cx scopes md ident false + +and lookup_by_name + (cx:ctxt) + (scopes:scope list) + (name:Ast.name) + : resolved = + assert (Ast.sane_name name); + match name with + Ast.NAME_base nb -> + let ident = get_name_base_ident nb in + lookup_by_ident cx scopes ident + | Ast.NAME_ext (name, ext) -> + let base_res = lookup_by_name cx scopes name in + project_name_comp_from_resolved cx base_res ext + +and lookup_by_ident + (cx:ctxt) + (scopes:scope list) + (ident:Ast.ident) + : resolved = + let check_slots scopes islots = + arr_search islots + (fun _ (sloti,ident') -> + if ident = ident' + then Some (scopes, sloti.id) + else None) + in + let check_params scopes params = + arr_search params + (fun _ {node=(i,_); id=id} -> + if i = ident then Some (scopes, id) else None) + in + let passed_capture_scope = ref false in + let would_capture r = + match r with + None -> None + | Some _ -> + if !passed_capture_scope + then err None "attempted dynamic environment-capture" + else r + in + let check_scope scopes scope = + match scope with + SCOPE_block block_id -> + let block_slots = Hashtbl.find cx.ctxt_block_slots block_id in + let block_items = Hashtbl.find cx.ctxt_block_items block_id in + begin + match htab_search block_slots (Ast.KEY_ident ident) with + Some id -> would_capture (Some (scopes, id)) + | None -> + match htab_search block_items ident with + Some id -> Some (scopes, id) + | None -> None + end + + | SCOPE_crate crate -> + project_ident_from_items + cx scopes crate.node.Ast.crate_items ident true + + | SCOPE_obj_fn fn -> + would_capture (check_slots scopes fn.node.Ast.fn_input_slots) + + | SCOPE_mod_item item -> + begin + let item_match = + match item.node.Ast.decl_item with + Ast.MOD_ITEM_fn f -> + check_slots scopes f.Ast.fn_input_slots + + | Ast.MOD_ITEM_obj obj -> + begin + match htab_search obj.Ast.obj_fns ident with + Some fn -> Some (scopes, fn.id) + | None -> check_slots scopes obj.Ast.obj_state + end + + | Ast.MOD_ITEM_mod md -> + project_ident_from_items cx scopes md ident true + + | _ -> None + in + match item_match with + Some _ -> item_match + | None -> + would_capture + (check_params scopes item.node.Ast.decl_params) + end + in + let rec search scopes = + match scopes with + [] -> None + | scope::rest -> + match check_scope scopes scope with + None -> + begin + let is_ty_item i = + match i.node.Ast.decl_item with + Ast.MOD_ITEM_type _ -> true + | _ -> false + in + match scope with + SCOPE_block _ + | SCOPE_obj_fn _ -> + search rest + + | SCOPE_mod_item item when is_ty_item item -> + search rest + + | _ -> + passed_capture_scope := true; + search rest + end + | x -> x + in + search scopes +;; + +let lookup_by_temp + (cx:ctxt) + (scopes:scope list) + (temp:temp_id) + : ((scope list * node_id) option) = + let passed_item_scope = ref false in + let check_scope scope = + if !passed_item_scope + then None + else + match scope with + SCOPE_block block_id -> + let block_slots = Hashtbl.find cx.ctxt_block_slots block_id in + htab_search block_slots (Ast.KEY_temp temp) + | _ -> + passed_item_scope := true; + None + in + list_search_ctxt scopes check_scope +;; + +let lookup + (cx:ctxt) + (scopes:scope list) + (key:Ast.slot_key) + : ((scope list * node_id) option) = + match key with + Ast.KEY_temp temp -> lookup_by_temp cx scopes temp + | Ast.KEY_ident ident -> lookup_by_ident cx scopes ident +;; + + +let run_passes + (cx:ctxt) + (name:string) + (path:Ast.name_component Stack.t) + (passes:Walk.visitor array) + (log:string->unit) + (crate:Ast.crate) + : unit = + let do_pass i pass = + let logger s = log (Printf.sprintf "pass %d: %s" i s) in + Walk.walk_crate + (Walk.path_managing_visitor path + (Walk.mod_item_logging_visitor logger path pass)) + crate + in + let sess = cx.ctxt_sess in + if sess.Session.sess_failed + then () + else + try + Session.time_inner name sess + (fun _ -> Array.iteri do_pass passes) + with + Semant_err (ido, str) -> report_err cx ido str +;; + +(* Rust type -> IL type conversion. *) + +let word_sty (abi:Abi.abi) : Il.scalar_ty = + Il.ValTy abi.Abi.abi_word_bits +;; + +let word_rty (abi:Abi.abi) : Il.referent_ty = + Il.ScalarTy (word_sty abi) +;; + +let tydesc_rty (abi:Abi.abi) : Il.referent_ty = + (* + * NB: must match corresponding tydesc structure + * in trans and offsets in ABI exactly. + *) + Il.StructTy + [| + word_rty abi; (* Abi.tydesc_field_first_param *) + word_rty abi; (* Abi.tydesc_field_size *) + word_rty abi; (* Abi.tydesc_field_align *) + Il.ScalarTy (Il.AddrTy Il.CodeTy); (* Abi.tydesc_field_copy_glue *) + Il.ScalarTy (Il.AddrTy Il.CodeTy); (* Abi.tydesc_field_drop_glue *) + Il.ScalarTy (Il.AddrTy Il.CodeTy); (* Abi.tydesc_field_free_glue *) + Il.ScalarTy (Il.AddrTy Il.CodeTy); (* Abi.tydesc_field_mark_glue *) + Il.ScalarTy (Il.AddrTy Il.CodeTy); (* Abi.tydesc_field_obj_drop_glue *) + |] +;; + +let obj_closure_rty (abi:Abi.abi) : Il.referent_ty = + Il.StructTy [| word_rty abi; + Il.ScalarTy (Il.AddrTy (tydesc_rty abi)); + word_rty abi (* A lie: it's opaque, but this permits + * GEP'ing to it. *) + |] +;; + +let rec referent_type (abi:Abi.abi) (t:Ast.ty) : Il.referent_ty = + let s t = Il.ScalarTy t in + let v b = Il.ValTy b in + let p t = Il.AddrTy t in + let sv b = s (v b) in + let sp t = s (p t) in + + let word = word_rty abi in + let ptr = sp Il.OpaqueTy in + let rc_ptr = sp (Il.StructTy [| word; Il.OpaqueTy |]) in + let codeptr = sp Il.CodeTy in + let tup ttup = Il.StructTy (Array.map (slot_referent_type abi) ttup) in + let tag ttag = + let union = + Il.UnionTy + (Array.map + (fun key -> tup (Hashtbl.find ttag key)) + (sorted_htab_keys ttag)) + in + let discriminant = word in + Il.StructTy [| discriminant; union |] + in + + match t with + Ast.TY_any -> Il.StructTy [| word; ptr |] + | Ast.TY_nil -> Il.NilTy + | Ast.TY_int + | Ast.TY_uint -> word + + | Ast.TY_bool -> sv Il.Bits8 + + | Ast.TY_mach (TY_u8) + | Ast.TY_mach (TY_i8) -> sv Il.Bits8 + + | Ast.TY_mach (TY_u16) + | Ast.TY_mach (TY_i16) -> sv Il.Bits16 + + | Ast.TY_mach (TY_u32) + | Ast.TY_mach (TY_i32) + | Ast.TY_mach (TY_f32) + | Ast.TY_char -> sv Il.Bits32 + + | Ast.TY_mach (TY_u64) + | Ast.TY_mach (TY_i64) + | Ast.TY_mach (TY_f64) -> sv Il.Bits64 + + | Ast.TY_str -> sp (Il.StructTy [| word; word; word; ptr |]) + | Ast.TY_vec _ -> sp (Il.StructTy [| word; word; word; ptr |]) + | Ast.TY_tup tt -> tup tt + | Ast.TY_rec tr -> tup (Array.map snd tr) + + | Ast.TY_fn _ -> + let fn_closure_ptr = sp (Il.StructTy [| word; Il.OpaqueTy |]) in + Il.StructTy [| codeptr; fn_closure_ptr |] + + | Ast.TY_obj _ -> + let obj_closure_ptr = sp (obj_closure_rty abi) in + Il.StructTy [| ptr; obj_closure_ptr |] + + | Ast.TY_tag ttag -> tag ttag + | Ast.TY_iso tiso -> tag tiso.Ast.iso_group.(tiso.Ast.iso_index) + + | Ast.TY_idx _ -> word (* A lie, but permits GEP'ing to it. *) + + | Ast.TY_chan _ + | Ast.TY_port _ + | Ast.TY_task -> rc_ptr + + | Ast.TY_type -> sp (tydesc_rty abi) + + | Ast.TY_native _ -> ptr + + | Ast.TY_param (i, _) -> Il.ParamTy i + + | Ast.TY_named _ -> bug () "named type in referent_type" + | Ast.TY_constrained (t, _) -> referent_type abi t + +and slot_referent_type (abi:Abi.abi) (sl:Ast.slot) : Il.referent_ty = + let s t = Il.ScalarTy t in + let v b = Il.ValTy b in + let p t = Il.AddrTy t in + let sv b = s (v b) in + let sp t = s (p t) in + + let word = sv abi.Abi.abi_word_bits in + + let rty = referent_type abi (slot_ty sl) in + match sl.Ast.slot_mode with + Ast.MODE_exterior _ -> sp (Il.StructTy [| word; rty |]) + | Ast.MODE_interior _ -> rty + | Ast.MODE_alias _ -> sp rty +;; + +let task_rty (abi:Abi.abi) : Il.referent_ty = + Il.StructTy + begin + Array.init + Abi.n_visible_task_fields + (fun _ -> word_rty abi) + end +;; + +let call_args_referent_type_full + (abi:Abi.abi) + (out_slot:Ast.slot) + (n_ty_params:int) + (in_slots:Ast.slot array) + (iterator_arg_rtys:Il.referent_ty array) + (indirect_arg_rtys:Il.referent_ty array) + : Il.referent_ty = + let out_slot_rty = slot_referent_type abi out_slot in + let out_ptr_rty = Il.ScalarTy (Il.AddrTy out_slot_rty) in + let task_ptr_rty = Il.ScalarTy (Il.AddrTy (task_rty abi)) in + let ty_param_rtys = + let td = Il.ScalarTy (Il.AddrTy (tydesc_rty abi)) in + Il.StructTy (Array.init n_ty_params (fun _ -> td)) + in + let arg_rtys = Il.StructTy (Array.map (slot_referent_type abi) in_slots) in + (* + * NB: must match corresponding calltup structure in trans and + * member indices in ABI exactly. + *) + Il.StructTy + [| + out_ptr_rty; (* Abi.calltup_elt_out_ptr *) + task_ptr_rty; (* Abi.calltup_elt_task_ptr *) + ty_param_rtys; (* Abi.calltup_elt_ty_params *) + arg_rtys; (* Abi.calltup_elt_args *) + Il.StructTy iterator_arg_rtys; (* Abi.calltup_elt_iterator_args *) + Il.StructTy indirect_arg_rtys (* Abi.calltup_elt_indirect_args *) + |] +;; + +let call_args_referent_type + (cx:ctxt) + (n_ty_params:int) + (callee_ty:Ast.ty) + (closure:Il.referent_ty option) + : Il.referent_ty = + let indirect_arg_rtys = + match closure with + None -> [| |] + | Some c -> + [| + (* Abi.indirect_args_elt_closure *) + Il.ScalarTy (Il.AddrTy c) + |] + in + let iterator_arg_rtys _ = + [| + (* Abi.iterator_args_elt_loop_size *) + Il.ScalarTy (Il.ValTy cx.ctxt_abi.Abi.abi_word_bits); + (* Abi.iterator_args_elt_loop_info_ptr *) + Il.ScalarTy (Il.AddrTy Il.OpaqueTy) + |] + in + match callee_ty with + Ast.TY_fn (tsig, taux) -> + call_args_referent_type_full + cx.ctxt_abi + tsig.Ast.sig_output_slot + n_ty_params + tsig.Ast.sig_input_slots + (if taux.Ast.fn_is_iter then (iterator_arg_rtys()) else [||]) + indirect_arg_rtys + + | _ -> bug cx "Semant.call_args_referent_type on non-callable type" +;; + +let indirect_call_args_referent_type + (cx:ctxt) + (n_ty_params:int) + (callee_ty:Ast.ty) + (closure:Il.referent_ty) + : Il.referent_ty = + call_args_referent_type cx n_ty_params callee_ty (Some closure) +;; + +let direct_call_args_referent_type + (cx:ctxt) + (callee_node:node_id) + : Il.referent_ty = + let ity = Hashtbl.find cx.ctxt_all_item_types callee_node in + let n_ty_params = + if item_is_obj_fn cx callee_node + then 0 + else n_item_ty_params cx callee_node + in + call_args_referent_type cx n_ty_params ity None +;; + +let ty_sz (abi:Abi.abi) (t:Ast.ty) : int64 = + force_sz (Il.referent_ty_size abi.Abi.abi_word_bits (referent_type abi t)) +;; + +let ty_align (abi:Abi.abi) (t:Ast.ty) : int64 = + force_sz (Il.referent_ty_align abi.Abi.abi_word_bits (referent_type abi t)) +;; + +let slot_sz (abi:Abi.abi) (s:Ast.slot) : int64 = + force_sz (Il.referent_ty_size abi.Abi.abi_word_bits + (slot_referent_type abi s)) +;; + +let word_slot (abi:Abi.abi) : Ast.slot = + interior_slot (Ast.TY_mach abi.Abi.abi_word_ty) +;; + +let read_alias_slot (ty:Ast.ty) : Ast.slot = + { Ast.slot_mode = Ast.MODE_alias; + Ast.slot_mutable = false; + Ast.slot_ty = Some ty } +;; + +let word_write_alias_slot (abi:Abi.abi) : Ast.slot = + { Ast.slot_mode = Ast.MODE_alias; + Ast.slot_mutable = true; + Ast.slot_ty = Some (Ast.TY_mach abi.Abi.abi_word_ty) } +;; + +let mk_ty_fn_or_iter + (out_slot:Ast.slot) + (arg_slots:Ast.slot array) + (is_iter:bool) + : Ast.ty = + (* In some cases we don't care what aux or constrs are. *) + let taux = { Ast.fn_effect = Ast.PURE; + Ast.fn_is_iter = is_iter; } + in + let tsig = { Ast.sig_input_slots = arg_slots; + Ast.sig_input_constrs = [| |]; + Ast.sig_output_slot = out_slot; } + in + Ast.TY_fn (tsig, taux) +;; + +let mk_ty_fn + (out_slot:Ast.slot) + (arg_slots:Ast.slot array) + : Ast.ty = + mk_ty_fn_or_iter out_slot arg_slots false +;; + +let mk_simple_ty_fn + (arg_slots:Ast.slot array) + : Ast.ty = + (* In some cases we don't care what the output slot is. *) + let out_slot = interior_slot Ast.TY_nil in + mk_ty_fn out_slot arg_slots +;; + +let mk_simple_ty_iter + (arg_slots:Ast.slot array) + : Ast.ty = + (* In some cases we don't care what the output slot is. *) + let out_slot = interior_slot Ast.TY_nil in + mk_ty_fn_or_iter out_slot arg_slots true +;; + + +(* name mangling support. *) + +let item_name (cx:ctxt) (id:node_id) : Ast.name = + Hashtbl.find cx.ctxt_all_item_names id +;; + +let item_str (cx:ctxt) (id:node_id) : string = + string_of_name (item_name cx id) +;; + +let ty_str (ty:Ast.ty) : string = + let base = associative_binary_op_ty_fold "" (fun a b -> a ^ b) in + let fold_slot (mode,mut,ty) = + (if mut then "m" else "") + ^ (match mode with + Ast.MODE_exterior -> "e" + | Ast.MODE_alias -> "a" + | Ast.MODE_interior -> "") + ^ ty + in + let num n = (string_of_int n) ^ "$" in + let len a = num (Array.length a) in + let join az = Array.fold_left (fun a b -> a ^ b) "" az in + let fold_slots slots = + "t" + ^ (len slots) + ^ (join slots) + in + let fold_rec entries = + "r" + ^ (len entries) + ^ (Array.fold_left + (fun str (ident, s) -> str ^ "$" ^ ident ^ "$" ^ s) + "" entries) + in + let fold_tags tags = + "g" + ^ (num (Hashtbl.length tags)) + ^ (Array.fold_left + (fun str key -> str ^ (string_of_name key) ^ (Hashtbl.find tags key)) + "" (sorted_htab_keys tags)) + in + let fold_iso (n, tags) = + "G" + ^ (num n) + ^ (len tags) + ^ (join tags) + in + let fold_mach m = + match m with + TY_u8 -> "U0" + | TY_u16 -> "U1" + | TY_u32 -> "U2" + | TY_u64 -> "U3" + | TY_i8 -> "I0" + | TY_i16 -> "I1" + | TY_i32 -> "I2" + | TY_i64 -> "I3" + | TY_f32 -> "F2" + | TY_f64 -> "F3" + in + let fold = + { base with + (* Structural types. *) + ty_fold_slot = fold_slot; + ty_fold_slots = fold_slots; + ty_fold_tags = fold_tags; + ty_fold_rec = fold_rec; + ty_fold_nil = (fun _ -> "n"); + ty_fold_bool = (fun _ -> "b"); + ty_fold_mach = fold_mach; + ty_fold_int = (fun _ -> "i"); + ty_fold_uint = (fun _ -> "u"); + ty_fold_char = (fun _ -> "c"); + ty_fold_obj = (fun _ -> "o"); + ty_fold_str = (fun _ -> "s"); + ty_fold_vec = (fun s -> "v" ^ s); + ty_fold_iso = fold_iso; + ty_fold_idx = (fun i -> "x" ^ (string_of_int i)); + (* FIXME: encode constrs, aux as well. *) + ty_fold_fn = (fun ((ins,_,out),_) -> "f" ^ ins ^ out); + + (* Built-in special types. *) + ty_fold_any = (fun _ -> "A"); + ty_fold_chan = (fun t -> "H" ^ t); + ty_fold_port = (fun t -> "R" ^ t); + ty_fold_task = (fun _ -> "T"); + ty_fold_native = (fun _ -> "N"); + ty_fold_param = (fun _ -> "P"); + ty_fold_type = (fun _ -> "Y"); + + (* FIXME: encode obj types. *) + (* FIXME: encode opaque and param numbers. *) + ty_fold_named = (fun _ -> bug () "string-encoding named type"); + (* FIXME: encode constrs as well. *) + ty_fold_constrained = (fun (t,_)-> t) } + in + fold_ty fold ty +;; + +let glue_str (cx:ctxt) (g:glue) : string = + match g with + GLUE_activate -> "glue$activate" + | GLUE_yield -> "glue$yield" + | GLUE_exit_main_task -> "glue$exit_main_task" + | GLUE_exit_task -> "glue$exit_task" + | GLUE_mark ty -> "glue$mark$" ^ (ty_str ty) + | GLUE_drop ty -> "glue$drop$" ^ (ty_str ty) + | GLUE_free ty -> "glue$free$" ^ (ty_str ty) + | GLUE_copy ty -> "glue$copy$" ^ (ty_str ty) + | GLUE_clone ty -> "glue$clone$" ^ (ty_str ty) + | GLUE_compare ty -> "glue$compare$" ^ (ty_str ty) + | GLUE_hash ty -> "glue$hash$" ^ (ty_str ty) + | GLUE_write ty -> "glue$write$" ^ (ty_str ty) + | GLUE_read ty -> "glue$read$" ^ (ty_str ty) + | GLUE_unwind -> "glue$unwind" + | GLUE_get_next_pc -> "glue$get_next_pc" + | GLUE_mark_frame i -> "glue$mark_frame$" ^ (item_str cx i) + | GLUE_drop_frame i -> "glue$drop_frame$" ^ (item_str cx i) + | GLUE_reloc_frame i -> "glue$reloc_frame$" ^ (item_str cx i) + (* + * FIXME: the node_id here isn't an item, it's a statement; + * lookup bind target and encode bound arg tuple type. + *) + | GLUE_fn_binding i + -> "glue$fn_binding$" ^ (string_of_int (int_of_node i)) + | GLUE_obj_drop oid + -> (item_str cx oid) ^ ".drop" + | GLUE_loop_body i + -> "glue$loop_body$" ^ (string_of_int (int_of_node i)) + | GLUE_forward (id, oty1, oty2) + -> "glue$forward$" + ^ id + ^ "$" ^ (ty_str (Ast.TY_obj oty1)) + ^ "$" ^ (ty_str (Ast.TY_obj oty2)) +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/me/trans.ml b/src/boot/me/trans.ml new file mode 100644 index 00000000..bca15136 --- /dev/null +++ b/src/boot/me/trans.ml @@ -0,0 +1,5031 @@ +(* Translation *) + +open Semant;; +open Common;; +open Transutil;; + +let log cx = Session.log "trans" + cx.ctxt_sess.Session.sess_log_trans + cx.ctxt_sess.Session.sess_log_out +;; + +let arr_max a = (Array.length a) - 1;; + +type quad_idx = int +;; + +type call = + { + call_ctrl: call_ctrl; + call_callee_ptr: Il.operand; + call_callee_ty: Ast.ty; + call_callee_ty_params: Ast.ty array; + call_output: Il.cell; + call_args: Ast.atom array; + call_iterator_args: Il.operand array; + call_indirect_args: Il.operand array; + } +;; + +let trans_visitor + (cx:ctxt) + (path:Ast.name_component Stack.t) + (inner:Walk.visitor) + : Walk.visitor = + + let iflog thunk = + if cx.ctxt_sess.Session.sess_log_trans + then thunk () + else () + in + + let curr_file = Stack.create () in + let curr_stmt = Stack.create () in + + let (abi:Abi.abi) = cx.ctxt_abi in + let (word_sz:int64) = word_sz abi in + let (word_slot:Ast.slot) = word_slot abi in + + let oper_str = Il.string_of_operand abi.Abi.abi_str_of_hardreg in + let cell_str = Il.string_of_cell abi.Abi.abi_str_of_hardreg in + + let (word_bits:Il.bits) = abi.Abi.abi_word_bits in + let (word_ty:Il.scalar_ty) = Il.ValTy word_bits in + let (word_rty:Il.referent_ty) = Il.ScalarTy word_ty in + let (word_ty_mach:ty_mach) = + match word_bits with + Il.Bits8 -> TY_u8 + | Il.Bits16 -> TY_u16 + | Il.Bits32 -> TY_u32 + | Il.Bits64 -> TY_u64 + in + let (word_ty_signed_mach:ty_mach) = + match word_bits with + Il.Bits8 -> TY_i8 + | Il.Bits16 -> TY_i16 + | Il.Bits32 -> TY_i32 + | Il.Bits64 -> TY_i64 + in + let word_n = word_n abi in + let imm_of_ty (i:int64) (tm:ty_mach) : Il.operand = + Il.Imm (Asm.IMM i, tm) + in + + let imm (i:int64) : Il.operand = imm_of_ty i word_ty_mach in + let simm (i:int64) : Il.operand = imm_of_ty i word_ty_signed_mach in + let one = imm 1L in + let zero = imm 0L in + let imm_true = imm_of_ty 1L TY_u8 in + let imm_false = imm_of_ty 0L TY_u8 in + let nil_ptr = Il.Mem ((Il.Abs (Asm.IMM 0L)), Il.NilTy) in + + let crate_rel fix = + Asm.SUB (Asm.M_POS fix, Asm.M_POS cx.ctxt_crate_fixup) + in + + let crate_rel_word fix = + Asm.WORD (word_ty_signed_mach, crate_rel fix) + in + + let crate_rel_imm (fix:fixup) : Il.operand = + Il.Imm (crate_rel fix, word_ty_signed_mach) + in + + let table_of_crate_rel_fixups (fixups:fixup array) : Asm.frag = + Asm.SEQ (Array.map crate_rel_word fixups) + in + + let fixup_rel_word (base:fixup) (fix:fixup) = + Asm.WORD (word_ty_signed_mach, + Asm.SUB (Asm.M_POS fix, Asm.M_POS base)) + in + + let table_of_fixup_rel_fixups + (fixup:fixup) + (fixups:fixup array) + : Asm.frag = + Asm.SEQ (Array.map (fixup_rel_word fixup) fixups) + in + + let table_of_table_rel_fixups (fixups:fixup array) : Asm.frag = + let table_fix = new_fixup "vtbl" in + Asm.DEF (table_fix, table_of_fixup_rel_fixups table_fix fixups) + in + + let nabi_indirect = + match cx.ctxt_sess.Session.sess_targ with + Linux_x86_elf -> false + | _ -> true + in + + let nabi_rust = + { nabi_indirect = nabi_indirect; + nabi_convention = CONV_rust } + in + + let out_mem_disp = abi.Abi.abi_frame_base_sz in + let arg0_disp = + Int64.add abi.Abi.abi_frame_base_sz abi.Abi.abi_implicit_args_sz + in + let frame_crate_ptr = word_n (-1) in + let frame_fns_disp = word_n (-2) in + + let fn_ty (id:node_id) : Ast.ty = + Hashtbl.find cx.ctxt_all_item_types id + in + let fn_args_rty + (id:node_id) + (closure:Il.referent_ty option) + : Il.referent_ty = + let n_params = + if item_is_obj_fn cx id + then 0 + else n_item_ty_params cx id + in + call_args_referent_type cx n_params (fn_ty id) closure + in + + let emitters = Stack.create () in + let push_new_emitter (vregs_ok:bool) (fnid:node_id option) = + let e = Il.new_emitter + abi.Abi.abi_prealloc_quad + abi.Abi.abi_is_2addr_machine + vregs_ok fnid + in + Stack.push (Hashtbl.create 0) e.Il.emit_size_cache; + Stack.push e emitters; + in + + let push_new_emitter_with_vregs fnid = push_new_emitter true fnid in + let push_new_emitter_without_vregs fnid = push_new_emitter false fnid in + + let pop_emitter _ = ignore (Stack.pop emitters) in + let emitter _ = Stack.top emitters in + let emitter_size_cache _ = Stack.top (emitter()).Il.emit_size_cache in + let push_emitter_size_cache _ = + Stack.push + (Hashtbl.copy (emitter_size_cache())) + (emitter()).Il.emit_size_cache + in + let pop_emitter_size_cache _ = + ignore (Stack.pop (emitter()).Il.emit_size_cache) + in + let emit q = Il.emit (emitter()) q in + let next_vreg _ = Il.next_vreg (emitter()) in + let next_vreg_cell t = Il.next_vreg_cell (emitter()) t in + let next_spill_cell t = + let s = Il.next_spill (emitter()) in + let spill_mem = Il.Spill s in + let spill_ta = (spill_mem, Il.ScalarTy t) in + Il.Mem spill_ta + in + let mark _ : quad_idx = (emitter()).Il.emit_pc in + let patch_existing (jmp:quad_idx) (targ:quad_idx) : unit = + Il.patch_jump (emitter()) jmp targ + in + let patch (i:quad_idx) : unit = + Il.patch_jump (emitter()) i (mark()); + (* Insert a dead quad to ensure there's an otherwise-unused + * jump-target here. + *) + emit Il.Dead + in + + let current_fn () = + match (emitter()).Il.emit_node with + None -> bug () "current_fn without associated node" + | Some id -> id + in + let current_fn_args_rty (closure:Il.referent_ty option) : Il.referent_ty = + fn_args_rty (current_fn()) closure + in + let current_fn_callsz () = get_callsz cx (current_fn()) in + + let annotations _ = + (emitter()).Il.emit_annotations + in + + let annotate (str:string) = + let e = emitter() in + Hashtbl.add e.Il.emit_annotations e.Il.emit_pc str + in + + let epilogue_jumps = Stack.create() in + + let path_name (_:unit) : string = + string_of_name (Walk.path_to_name path) + in + + let based (reg:Il.reg) : Il.mem = + Il.RegIn (reg, None) + in + + let based_off (reg:Il.reg) (off:Asm.expr64) : Il.mem = + Il.RegIn (reg, Some off) + in + + let based_imm (reg:Il.reg) (imm:int64) : Il.mem = + based_off reg (Asm.IMM imm) + in + + let fp_imm (imm:int64) : Il.mem = + based_imm abi.Abi.abi_fp_reg imm + in + + let sp_imm (imm:int64) : Il.mem = + based_imm abi.Abi.abi_sp_reg imm + in + + let word_at (mem:Il.mem) : Il.cell = + Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits)) + in + + let wordptr_at (mem:Il.mem) : Il.cell = + Il.Mem (mem, Il.ScalarTy (Il.AddrTy (Il.ScalarTy (Il.ValTy word_bits)))) + in + + let mov (dst:Il.cell) (src:Il.operand) : unit = + emit (Il.umov dst src) + in + + let umul (dst:Il.cell) (a:Il.operand) (b:Il.operand) : unit = + emit (Il.binary Il.UMUL dst a b); + in + + let add (dst:Il.cell) (a:Il.operand) (b:Il.operand) : unit = + emit (Il.binary Il.ADD dst a b); + in + + let add_to (dst:Il.cell) (src:Il.operand) : unit = + add dst (Il.Cell dst) src; + in + + let sub (dst:Il.cell) (a:Il.operand) (b:Il.operand) : unit = + emit (Il.binary Il.SUB dst a b); + in + + let sub_from (dst:Il.cell) (src:Il.operand) : unit = + sub dst (Il.Cell dst) src; + in + + let lea (dst:Il.cell) (src:Il.mem) : unit = + emit (Il.lea dst (Il.Cell (Il.Mem (src, Il.OpaqueTy)))) + in + + let rty_ptr_at (mem:Il.mem) (pointee_rty:Il.referent_ty) : Il.cell = + Il.Mem (mem, Il.ScalarTy (Il.AddrTy pointee_rty)) + in + + let ptr_at (mem:Il.mem) (pointee_ty:Ast.ty) : Il.cell = + rty_ptr_at mem (referent_type abi pointee_ty) + in + + let need_scalar_ty (rty:Il.referent_ty) : Il.scalar_ty = + match rty with + Il.ScalarTy s -> s + | _ -> bug () "expected ScalarTy" + in + + let need_mem_cell (cell:Il.cell) : Il.typed_mem = + match cell with + Il.Mem a -> a + | Il.Reg _ -> bug () + "expected address cell, got non-address register cell" + in + + let need_cell (operand:Il.operand) : Il.cell = + match operand with + Il.Cell c -> c + | _ -> bug () "expected cell, got operand %s" + (Il.string_of_operand abi.Abi.abi_str_of_hardreg operand) + in + + let get_element_ptr = + Il.get_element_ptr word_bits abi.Abi.abi_str_of_hardreg + in + + let get_variant_ptr (mem_cell:Il.cell) (i:int) : Il.cell = + match mem_cell with + Il.Mem (mem, Il.UnionTy elts) + when i >= 0 && i < (Array.length elts) -> + assert ((Array.length elts) != 0); + Il.Mem (mem, elts.(i)) + + | _ -> bug () "get_variant_ptr %d on cell %s" i + (cell_str mem_cell) + in + + let rec ptr_cast (cell:Il.cell) (rty:Il.referent_ty) : Il.cell = + match cell with + Il.Mem (mem, _) -> Il.Mem (mem, rty) + | Il.Reg (reg, Il.AddrTy _) -> Il.Reg (reg, Il.AddrTy rty) + | _ -> bug () "expected address cell in Trans.ptr_cast" + + and curr_crate_ptr _ : Il.cell = + word_at (fp_imm frame_crate_ptr) + + and crate_rel_to_ptr (rel:Il.operand) (rty:Il.referent_ty) : Il.cell = + let cell = next_vreg_cell (Il.AddrTy rty) in + mov cell (Il.Cell (curr_crate_ptr())); + add_to cell rel; + cell + + (* + * Note: alias *requires* its cell to be in memory already, and should + * only be used on slots you know to be memory-resident. Use 'aliasing' or + * 'via_memory' if you have a cell or operand you want in memory for a very + * short period of time (the time spent by the code generated by the thunk). + *) + + and alias (cell:Il.cell) : Il.cell = + let mem, ty = need_mem_cell cell in + let vreg_cell = next_vreg_cell (Il.AddrTy ty) in + begin + match ty with + Il.NilTy -> () + | _ -> lea vreg_cell mem + end; + vreg_cell + + and force_to_mem (src:Il.operand) : Il.typed_mem = + let do_spill op (t:Il.scalar_ty) = + let spill = next_spill_cell t in + mov spill op; + need_mem_cell spill + in + match src with + Il.Cell (Il.Mem ta) -> ta + | Il.Cell (Il.Reg (_, t)) -> do_spill src t + | Il.Imm _ -> do_spill src (Il.ValTy word_bits) + | Il.ImmPtr (f, rty) -> + do_spill + (Il.Cell (crate_rel_to_ptr (crate_rel_imm f) rty)) + (Il.AddrTy rty) + + and force_to_reg (op:Il.operand) : Il.typed_reg = + let do_mov op st = + let tmp = next_vreg () in + let regty = (tmp, st) in + mov (Il.Reg regty) op; + regty + in + match op with + Il.Imm (_, tm) -> do_mov op (Il.ValTy (Il.bits_of_ty_mach tm)) + | Il.ImmPtr (f, rty) -> + do_mov + (Il.Cell (crate_rel_to_ptr (crate_rel_imm f) rty)) + (Il.AddrTy rty) + | Il.Cell (Il.Reg rt) -> rt + | Il.Cell (Il.Mem (_, Il.ScalarTy st)) -> do_mov op st + | Il.Cell (Il.Mem (_, rt)) -> + bug () "forcing non-scalar referent of type %s to register" + (Il.string_of_referent_ty rt) + + and via_memory (writeback:bool) (c:Il.cell) (thunk:Il.cell -> unit) : unit = + match c with + Il.Mem _ -> thunk c + | Il.Reg _ -> + let mem_c = Il.Mem (force_to_mem (Il.Cell c)) in + thunk mem_c; + if writeback + then + mov c (Il.Cell mem_c) + + and aliasing (writeback:bool) (c:Il.cell) (thunk:Il.cell -> unit) : unit = + via_memory writeback c (fun c -> thunk (alias c)) + + and pointee_type (ptr:Il.cell) : Il.referent_ty = + match ptr with + Il.Reg (_, (Il.AddrTy rt)) -> rt + | Il.Mem (_, Il.ScalarTy (Il.AddrTy rt)) -> rt + | _ -> + bug () "taking pointee-type of non-address cell %s " + (cell_str ptr) + + and deref (ptr:Il.cell) : Il.cell = + let (r, st) = force_to_reg (Il.Cell ptr) in + match st with + Il.AddrTy rt -> Il.Mem (based r, rt) + | _ -> bug () "dereferencing non-address cell of type %s " + (Il.string_of_scalar_ty st) + + and deref_off (ptr:Il.cell) (off:Asm.expr64) : Il.cell = + let (r, st) = force_to_reg (Il.Cell ptr) in + match st with + Il.AddrTy rt -> Il.Mem (based_off r off, rt) + | _ -> bug () "offset-dereferencing non-address cell of type %s " + (Il.string_of_scalar_ty st) + + and deref_imm (ptr:Il.cell) (imm:int64) : Il.cell = + deref_off ptr (Asm.IMM imm) + + and tp_imm (imm:int64) : Il.cell = + deref_imm abi.Abi.abi_tp_cell imm + in + + + let make_tydesc_slots n = + Array.init n (fun _ -> interior_slot Ast.TY_type) + in + + let cell_vreg_num (vr:(int option) ref) : int = + match !vr with + None -> + let v = (Il.next_vreg_num (emitter())) in + vr := Some v; + v + | Some v -> v + in + + let slot_id_referent_type (slot_id:node_id) : Il.referent_ty = + slot_referent_type abi (referent_to_slot cx slot_id) + in + + let caller_args_cell (args_rty:Il.referent_ty) : Il.cell = + Il.Mem (fp_imm out_mem_disp, args_rty) + in + + let get_ty_param (ty_params:Il.cell) (param_idx:int) : Il.cell = + get_element_ptr ty_params param_idx + in + + let get_ty_params_of_frame (fp:Il.reg) (n_params:int) : Il.cell = + let fn_ty = mk_simple_ty_fn [| |] in + let fn_rty = call_args_referent_type cx n_params fn_ty None in + let args_cell = Il.Mem (based_imm fp out_mem_disp, fn_rty) in + get_element_ptr args_cell Abi.calltup_elt_ty_params + in + + let get_args_for_current_frame _ = + let curr_args_rty = + current_fn_args_rty (Some Il.OpaqueTy) + in + caller_args_cell curr_args_rty + in + + let get_indirect_args_for_current_frame _ = + get_element_ptr (get_args_for_current_frame ()) + Abi.calltup_elt_indirect_args + in + + let get_iterator_args_for_current_frame _ = + get_element_ptr (get_args_for_current_frame ()) + Abi.calltup_elt_iterator_args + in + + let get_closure_for_current_frame _ = + let self_indirect_args = + get_indirect_args_for_current_frame () + in + get_element_ptr self_indirect_args + Abi.indirect_args_elt_closure + in + + let get_iter_block_fn_for_current_frame _ = + let self_iterator_args = + get_iterator_args_for_current_frame () + in + let blk_fn = get_element_ptr self_iterator_args + Abi.iterator_args_elt_block_fn + in + ptr_cast blk_fn + (Il.ScalarTy (Il.AddrTy Il.CodeTy)) + in + + let get_iter_outer_frame_ptr_for_current_frame _ = + let self_iterator_args = + get_iterator_args_for_current_frame () + in + get_element_ptr self_iterator_args + Abi.iterator_args_elt_outer_frame_ptr + in + + let get_obj_for_current_frame _ = + deref (ptr_cast + (get_closure_for_current_frame ()) + (Il.ScalarTy (Il.AddrTy (obj_closure_rty abi)))) + in + + let get_ty_params_of_current_frame _ : Il.cell = + let id = current_fn() in + let n_ty_params = n_item_ty_params cx id in + if item_is_obj_fn cx id + then + begin + let obj = get_obj_for_current_frame() in + let tydesc = get_element_ptr obj 1 in + let ty_params_ty = Ast.TY_tup (make_tydesc_slots n_ty_params) in + let ty_params_rty = referent_type abi ty_params_ty in + let ty_params = + get_element_ptr (deref tydesc) Abi.tydesc_field_first_param + in + let ty_params = + ptr_cast ty_params (Il.ScalarTy (Il.AddrTy ty_params_rty)) + in + deref ty_params + end + + else + get_ty_params_of_frame abi.Abi.abi_fp_reg n_ty_params + in + + let get_ty_param_in_current_frame (param_idx:int) : Il.cell = + get_ty_param (get_ty_params_of_current_frame()) param_idx + in + + let linearize_ty_params (ty:Ast.ty) : (Ast.ty * Il.operand array) = + let htab = Hashtbl.create 0 in + let q = Queue.create () in + let base = ty_fold_rebuild (fun t -> t) in + let ty_fold_param (i, mut) = + let param = Ast.TY_param (i, mut) in + match htab_search htab param with + Some p -> p + | None -> + let p = Ast.TY_param (Hashtbl.length htab, mut) in + htab_put htab param p; + Queue.add (Il.Cell (get_ty_param_in_current_frame i)) q; + p + in + let fold = + { base with + ty_fold_param = ty_fold_param; } + in + let ty = fold_ty fold ty in + (ty, queue_to_arr q) + in + + let has_parametric_types (t:Ast.ty) : bool = + let base = ty_fold_bool_or false in + let ty_fold_param _ = + true + in + let fold = { base with ty_fold_param = ty_fold_param } in + fold_ty fold t + in + + let rec calculate_sz (ty_params:Il.cell) (size:size) : Il.operand = + iflog (fun _ -> annotate + (Printf.sprintf "calculating size %s" + (string_of_size size))); + let sub_sz = calculate_sz ty_params in + match htab_search (emitter_size_cache()) size with + Some op -> op + | _ -> + let res = + match size with + SIZE_fixed i -> imm i + | SIZE_fixup_mem_pos f -> Il.Imm (Asm.M_POS f, word_ty_mach) + | SIZE_fixup_mem_sz f -> Il.Imm (Asm.M_SZ f, word_ty_mach) + + | SIZE_param_size i -> + let tydesc = deref (get_ty_param ty_params i) in + Il.Cell (get_element_ptr tydesc Abi.tydesc_field_size) + + | SIZE_param_align i -> + let tydesc = deref (get_ty_param ty_params i) in + Il.Cell (get_element_ptr tydesc Abi.tydesc_field_align) + + | SIZE_rt_neg a -> + let op_a = sub_sz a in + let tmp = next_vreg_cell word_ty in + emit (Il.unary Il.NEG tmp op_a); + Il.Cell tmp + + | SIZE_rt_add (a, b) -> + let op_a = sub_sz a in + let op_b = sub_sz b in + let tmp = next_vreg_cell word_ty in + add tmp op_a op_b; + Il.Cell tmp + + | SIZE_rt_mul (a, b) -> + let op_a = sub_sz a in + let op_b = sub_sz b in + let tmp = next_vreg_cell word_ty in + emit (Il.binary Il.UMUL tmp op_a op_b); + Il.Cell tmp + + | SIZE_rt_max (a, b) -> + let op_a = sub_sz a in + let op_b = sub_sz b in + let tmp = next_vreg_cell word_ty in + mov tmp op_a; + emit (Il.cmp op_a op_b); + let jmp = mark () in + emit (Il.jmp Il.JAE Il.CodeNone); + mov tmp op_b; + patch jmp; + Il.Cell tmp + + | SIZE_rt_align (align, off) -> + (* + * calculate off + pad where: + * + * pad = (align - (off mod align)) mod align + * + * In our case it's always a power of two, + * so we can just do: + * + * mask = align-1 + * off += mask + * off &= ~mask + * + *) + annotate "fetch alignment"; + let op_align = sub_sz align in + annotate "fetch offset"; + let op_off = sub_sz off in + let mask = next_vreg_cell word_ty in + let off = next_vreg_cell word_ty in + mov mask op_align; + sub_from mask one; + mov off op_off; + add_to off (Il.Cell mask); + emit (Il.unary Il.NOT mask (Il.Cell mask)); + emit (Il.binary Il.AND + off (Il.Cell off) (Il.Cell mask)); + Il.Cell off + in + iflog (fun _ -> annotate + (Printf.sprintf "calculated size %s is %s" + (string_of_size size) + (oper_str res))); + htab_put (emitter_size_cache()) size res; + res + + + and calculate_sz_in_current_frame (size:size) : Il.operand = + calculate_sz (get_ty_params_of_current_frame()) size + + and callee_args_cell (tail_area:bool) (args_rty:Il.referent_ty) : Il.cell = + if tail_area + then + Il.Mem (sp_off_sz (current_fn_callsz ()), args_rty) + else + Il.Mem (sp_imm 0L, args_rty) + + and based_sz (ty_params:Il.cell) (reg:Il.reg) (size:size) : Il.mem = + match Il.size_to_expr64 size with + Some e -> based_off reg e + | None -> + let runtime_size = calculate_sz ty_params size in + let v = next_vreg () in + let c = (Il.Reg (v, word_ty)) in + mov c (Il.Cell (Il.Reg (reg, word_ty))); + add_to c runtime_size; + based v + + and fp_off_sz (size:size) : Il.mem = + based_sz (get_ty_params_of_current_frame()) abi.Abi.abi_fp_reg size + + and sp_off_sz (size:size) : Il.mem = + based_sz (get_ty_params_of_current_frame()) abi.Abi.abi_sp_reg size + in + + let slot_sz_in_current_frame (slot:Ast.slot) : Il.operand = + let rty = slot_referent_type abi slot in + let sz = Il.referent_ty_size word_bits rty in + calculate_sz_in_current_frame sz + in + + let slot_sz_with_ty_params + (ty_params:Il.cell) + (slot:Ast.slot) + : Il.operand = + let rty = slot_referent_type abi slot in + let sz = Il.referent_ty_size word_bits rty in + calculate_sz ty_params sz + in + + let get_element_ptr_dyn + (ty_params:Il.cell) + (mem_cell:Il.cell) + (i:int) + : Il.cell = + match mem_cell with + Il.Mem (mem, Il.StructTy elts) + when i >= 0 && i < (Array.length elts) -> + assert ((Array.length elts) != 0); + begin + let elt_rty = elts.(i) in + let elt_off = Il.get_element_offset word_bits elts i in + match elt_off with + SIZE_fixed fixed_off -> + Il.Mem (Il.mem_off_imm mem fixed_off, elt_rty) + | sz -> + let sz = calculate_sz ty_params sz in + let v = next_vreg word_ty in + let vc = Il.Reg (v, word_ty) in + lea vc mem; + add_to vc sz; + Il.Mem (based v, elt_rty) + end + | _ -> bug () "get_element_ptr_dyn %d on cell %s" i + (cell_str mem_cell) + in + + let get_element_ptr_dyn_in_current_frame + (mem_cell:Il.cell) + (i:int) + : Il.cell = + get_element_ptr_dyn (get_ty_params_of_current_frame()) mem_cell i + in + + let get_explicit_args_for_current_frame _ = + get_element_ptr_dyn_in_current_frame (get_args_for_current_frame ()) + Abi.calltup_elt_args + in + + + let deref_off_sz + (ty_params:Il.cell) + (ptr:Il.cell) + (size:size) + : Il.cell = + match Il.size_to_expr64 size with + Some e -> deref_off ptr e + | None -> + let (r,_) = force_to_reg (Il.Cell ptr) in + let mem = based_sz ty_params r size in + Il.Mem (mem, (pointee_type ptr)) + in + + let cell_of_block_slot + (slot_id:node_id) + : Il.cell = + let referent_type = slot_id_referent_type slot_id in + match htab_search cx.ctxt_slot_vregs slot_id with + Some vr -> + begin + match referent_type with + Il.ScalarTy st -> Il.Reg (Il.Vreg (cell_vreg_num vr), st) + | Il.NilTy -> nil_ptr + | Il.StructTy _ -> bugi cx slot_id + "cannot treat structured referent as single operand" + | Il.UnionTy _ -> bugi cx slot_id + "cannot treat union referent as single operand" + | Il.ParamTy _ -> bugi cx slot_id + "cannot treat parametric referent as single operand" + | Il.OpaqueTy -> bugi cx slot_id + "cannot treat opaque referent as single operand" + | Il.CodeTy -> bugi cx slot_id + "cannot treat code referent as single operand" + end + | None -> + begin + match htab_search cx.ctxt_slot_offsets slot_id with + None -> bugi cx slot_id + "slot assigned to neither vreg nor offset" + | Some off -> + if slot_is_obj_state cx slot_id + then + begin + let state_arg = get_closure_for_current_frame () in + let (slot_mem, _) = + need_mem_cell (deref_off_sz + (get_ty_params_of_current_frame()) + state_arg off) + in + Il.Mem (slot_mem, referent_type) + end + else + if (Stack.is_empty curr_stmt) + then + Il.Mem (fp_off_sz off, referent_type) + else + let slot_depth = get_slot_depth cx slot_id in + let stmt_depth = + get_stmt_depth cx (Stack.top curr_stmt) + in + if slot_depth <> stmt_depth + then + let _ = assert (slot_depth < stmt_depth) in + let _ = + iflog + begin + fun _ -> + let k = + Hashtbl.find cx.ctxt_slot_keys slot_id + in + annotate + (Printf.sprintf + "access outer frame slot #%d = %s" + (int_of_node slot_id) + (Ast.fmt_to_str + Ast.fmt_slot_key k)) + end + in + let diff = stmt_depth - slot_depth in + let _ = annotate "get outer frame pointer" in + let fp = + get_iter_outer_frame_ptr_for_current_frame () + in + if diff > 1 + then + bug () "unsupported nested for each loop"; + for i = 2 to diff do + (* FIXME: access outer caller-block fps, + * given nearest caller-block fp. + *) + let _ = + annotate "step to outer-outer frame" + in + mov fp (Il.Cell fp) + done; + let _ = annotate "calculate size" in + let p = + based_sz (get_ty_params_of_current_frame()) + (fst (force_to_reg (Il.Cell fp))) off + in + Il.Mem (p, referent_type) + else + Il.Mem (fp_off_sz off, referent_type) + end + in + + let binop_to_jmpop (binop:Ast.binop) : Il.jmpop = + match binop with + Ast.BINOP_eq -> Il.JE + | Ast.BINOP_ne -> Il.JNE + | Ast.BINOP_lt -> Il.JL + | Ast.BINOP_le -> Il.JLE + | Ast.BINOP_ge -> Il.JGE + | Ast.BINOP_gt -> Il.JG + | _ -> bug () "Unhandled binop in binop_to_jmpop" + in + + let get_vtbl_entry_idx (table_ptr:Il.cell) (i:int) : Il.cell = + (* Vtbls are encoded as tables of table-relative displacements. *) + let (table_mem, _) = need_mem_cell (deref table_ptr) in + let disp = Il.Cell (word_at (Il.mem_off_imm table_mem (word_n i))) in + let ptr_cell = next_vreg_cell (Il.AddrTy Il.CodeTy) in + mov ptr_cell (Il.Cell table_ptr); + add_to ptr_cell disp; + ptr_cell + in + + let get_vtbl_entry + (obj_cell:Il.cell) + (obj_ty:Ast.ty_obj) + (id:Ast.ident) + : (Il.cell * Ast.ty_fn) = + let (_, fns) = obj_ty in + let sorted_idents = sorted_htab_keys fns in + let i = arr_idx sorted_idents id in + let fn_ty = Hashtbl.find fns id in + let table_ptr = get_element_ptr obj_cell Abi.binding_field_item in + (get_vtbl_entry_idx table_ptr i, fn_ty) + in + + let rec trans_slot_lval_ext + (base_ty:Ast.ty) + (cell:Il.cell) + (comp:Ast.lval_component) + : (Il.cell * Ast.slot) = + + let bounds_checked_access at slot = + let atop = trans_atom at in + let unit_sz = slot_sz_in_current_frame slot in + let idx = next_vreg_cell word_ty in + emit (Il.binary Il.UMUL idx atop unit_sz); + let elt_mem = trans_bounds_check (deref cell) (Il.Cell idx) in + (Il.Mem (elt_mem, slot_referent_type abi slot), slot) + in + + match (base_ty, comp) with + (Ast.TY_rec entries, + Ast.COMP_named (Ast.COMP_ident id)) -> + let i = arr_idx (Array.map fst entries) id in + (get_element_ptr_dyn_in_current_frame cell i, snd entries.(i)) + + | (Ast.TY_tup entries, + Ast.COMP_named (Ast.COMP_idx i)) -> + (get_element_ptr_dyn_in_current_frame cell i, entries.(i)) + + | (Ast.TY_vec slot, + Ast.COMP_atom at) -> + bounds_checked_access at slot + + | (Ast.TY_str, + Ast.COMP_atom at) -> + bounds_checked_access at (interior_slot (Ast.TY_mach TY_u8)) + + | (Ast.TY_obj obj_ty, + Ast.COMP_named (Ast.COMP_ident id)) -> + let (cell, fn_ty) = get_vtbl_entry cell obj_ty id in + (cell, (interior_slot (Ast.TY_fn fn_ty))) + + + | _ -> bug () "unhandled form of lval_ext in trans_slot_lval_ext" + + (* + * vec: operand holding ptr to vec. + * mul_idx: index value * unit size. + * return: ptr to element. + *) + and trans_bounds_check (vec:Il.cell) (mul_idx:Il.operand) : Il.mem = + let (len:Il.cell) = get_element_ptr vec Abi.vec_elt_fill in + let (data:Il.cell) = get_element_ptr vec Abi.vec_elt_data in + let (base:Il.cell) = next_vreg_cell Il.voidptr_t in + let (elt_reg:Il.reg) = next_vreg () in + let (elt:Il.cell) = Il.Reg (elt_reg, Il.voidptr_t) in + let (diff:Il.cell) = next_vreg_cell word_ty in + annotate "bounds check"; + lea base (fst (need_mem_cell data)); + add elt (Il.Cell base) mul_idx; + emit (Il.binary Il.SUB diff (Il.Cell elt) (Il.Cell base)); + let jmp = trans_compare Il.JB (Il.Cell diff) (Il.Cell len) in + trans_cond_fail "bounds check" jmp; + based elt_reg + + and trans_lval_full + (initializing:bool) + (lv:Ast.lval) + : (Il.cell * Ast.slot) = + + let rec trans_slot_lval_full (initializing:bool) lv = + let (cell, slot) = + match lv with + Ast.LVAL_ext (base, comp) -> + let (base_cell, base_slot) = + trans_slot_lval_full initializing base + in + let base_cell' = deref_slot initializing base_cell base_slot in + trans_slot_lval_ext (slot_ty base_slot) base_cell' comp + + | Ast.LVAL_base nb -> + let slot = lval_to_slot cx nb.id in + let referent = lval_to_referent cx nb.id in + let cell = cell_of_block_slot referent in + (cell, slot) + in + iflog + begin + fun _ -> + annotate + (Printf.sprintf "lval %a = %s" + Ast.sprintf_lval lv + (cell_str cell)) + end; + (cell, slot) + + in + if lval_is_slot cx lv + then trans_slot_lval_full initializing lv + else + if initializing + then err None "init item" + else + begin + assert (lval_is_item cx lv); + bug () + "trans_lval_full called on item lval '%a'" Ast.sprintf_lval lv + end + + and trans_lval_maybe_init + (initializing:bool) + (lv:Ast.lval) + : (Il.cell * Ast.slot) = + trans_lval_full initializing lv + + and trans_lval_init (lv:Ast.lval) : (Il.cell * Ast.slot) = + trans_lval_maybe_init true lv + + and trans_lval (lv:Ast.lval) : (Il.cell * Ast.slot) = + trans_lval_maybe_init false lv + + and trans_callee + (flv:Ast.lval) + : (Il.operand * Ast.ty) = + (* direct call to item *) + let fty = Hashtbl.find cx.ctxt_all_lval_types (lval_base_id flv) in + if lval_is_item cx flv then + let fn_item = lval_item cx flv in + let fn_ptr = code_fixup_to_ptr_operand (get_fn_fixup cx fn_item.id) in + (fn_ptr, fty) + else + (* indirect call to computed slot *) + let (cell, _) = trans_lval flv in + (Il.Cell cell, fty) + + and trans_crate_rel_data_operand + (d:data) + (thunk:unit -> Asm.frag) + : Il.operand = + let (fix, _) = + htab_search_or_add cx.ctxt_data d + begin + fun _ -> + let fix = new_fixup "data item" in + let frag = Asm.DEF (fix, thunk()) in + (fix, frag) + end + in + crate_rel_imm fix + + and trans_crate_rel_data_frag (d:data) (thunk:unit -> Asm.frag) : Asm.frag = + let (fix, _) = + htab_search_or_add cx.ctxt_data d + begin + fun _ -> + let fix = new_fixup "data item" in + let frag = Asm.DEF (fix, thunk()) in + (fix, frag) + end + in + crate_rel_word fix + + and trans_crate_rel_static_string_operand (s:string) : Il.operand = + trans_crate_rel_data_operand (DATA_str s) (fun _ -> Asm.ZSTRING s) + + and trans_crate_rel_static_string_frag (s:string) : Asm.frag = + trans_crate_rel_data_frag (DATA_str s) (fun _ -> Asm.ZSTRING s) + + and trans_static_string (s:string) : Il.operand = + Il.Cell (crate_rel_to_ptr + (trans_crate_rel_static_string_operand s) + (referent_type abi Ast.TY_str)) + + and get_static_tydesc + (idopt:node_id option) + (t:Ast.ty) + (sz:int64) + (align:int64) + : Il.operand = + trans_crate_rel_data_operand + (DATA_tydesc t) + begin + fun _ -> + let tydesc_fixup = new_fixup "tydesc" in + log cx "tydesc for %a has sz=%Ld, align=%Ld" + Ast.sprintf_ty t sz align; + Asm.DEF + (tydesc_fixup, + Asm.SEQ + [| + Asm.WORD (word_ty_mach, Asm.IMM 0L); + Asm.WORD (word_ty_mach, Asm.IMM sz); + Asm.WORD (word_ty_mach, Asm.IMM align); + table_of_fixup_rel_fixups tydesc_fixup + [| + get_copy_glue t None; + get_drop_glue t None; + get_free_glue t (slot_mem_ctrl (interior_slot t)) None; + get_mark_glue t None; + |]; + (* Include any obj-dtor, if this is an obj and has one. *) + begin + match idopt with + None -> Asm.WORD (word_ty_mach, Asm.IMM 0L); + | Some oid -> + begin + let g = GLUE_obj_drop oid in + match htab_search cx.ctxt_glue_code g with + Some code -> + fixup_rel_word + tydesc_fixup + code.code_fixup; + | None -> + Asm.WORD (word_ty_mach, Asm.IMM 0L); + end + end; + |]) + end + + and get_obj_vtbl (id:node_id) : Il.operand = + let obj = + match Hashtbl.find cx.ctxt_all_defns id with + DEFN_item { Ast.decl_item=Ast.MOD_ITEM_obj obj} -> obj + | _ -> bug () "Trans.get_obj_vtbl on non-obj referent" + in + trans_crate_rel_data_operand (DATA_obj_vtbl id) + begin + fun _ -> + iflog (fun _ -> log cx "emitting %d-entry obj vtbl for %s" + (Hashtbl.length obj.Ast.obj_fns) (path_name())); + table_of_table_rel_fixups + (Array.map + begin + fun k -> + let fn = Hashtbl.find obj.Ast.obj_fns k in + get_fn_fixup cx fn.id + end + (sorted_htab_keys obj.Ast.obj_fns)) + end + + + and trans_copy_forward_args (args_rty:Il.referent_ty) : unit = + let caller_args_cell = caller_args_cell args_rty in + let callee_args_cell = callee_args_cell false args_rty in + let (dst_reg, _) = force_to_reg (Il.Cell (alias callee_args_cell)) in + let (src_reg, _) = force_to_reg (Il.Cell (alias caller_args_cell)) in + let tmp_reg = next_vreg () in + let nbytes = force_sz (Il.referent_ty_size word_bits args_rty) in + abi.Abi.abi_emit_inline_memcpy (emitter()) + nbytes dst_reg src_reg tmp_reg false; + + + and get_forwarding_obj_fn + (ident:Ast.ident) + (caller:Ast.ty_obj) + (callee:Ast.ty_obj) + : fixup = + (* Forwarding "glue" is not glue in the normal sense of being called with + * only Abi.worst_case_glue_call_args args; the functions are full-fleged + * obj fns like any other, and they perform a full call to the target + * obj. We just use the glue facility here to store the forwarding + * operators somewhere. + *) + let g = GLUE_forward (ident, caller, callee) in + let fix = new_fixup (glue_str cx g) in + let fty = Hashtbl.find (snd caller) ident in + let self_args_rty = + call_args_referent_type cx 0 + (Ast.TY_fn fty) (Some (obj_closure_rty abi)) + in + let callsz = Il.referent_ty_size word_bits self_args_rty in + let spill = new_fixup "forwarding fn spill" in + trans_glue_frame_entry callsz spill; + let all_self_args_cell = caller_args_cell self_args_rty in + let self_indirect_args_cell = + get_element_ptr all_self_args_cell Abi.calltup_elt_indirect_args + in + (* + * Note: this is wrong. This assumes our closure is a vtbl, + * when in fact it is a pointer to a refcounted malloc slab + * containing an obj. + *) + let closure_cell = + deref (get_element_ptr self_indirect_args_cell + Abi.indirect_args_elt_closure) + in + + let (callee_fn_cell, _) = + get_vtbl_entry closure_cell callee ident + in + iflog (fun _ -> annotate "copy args forward to callee"); + trans_copy_forward_args self_args_rty; + + iflog (fun _ -> annotate "call through to callee"); + (* FIXME: use a tail-call here. *) + call_code (code_of_cell callee_fn_cell); + trans_glue_frame_exit fix spill g; + fix + + + and get_forwarding_vtbl + (caller:Ast.ty_obj) + (callee:Ast.ty_obj) + : Il.operand = + trans_crate_rel_data_operand (DATA_forwarding_vtbl (caller,callee)) + begin + fun _ -> + let (_,fns) = caller in + iflog (fun _ -> log cx "emitting %d-entry obj forwarding vtbl" + (Hashtbl.length fns)); + table_of_table_rel_fixups + (Array.map + begin + fun k -> + get_forwarding_obj_fn k caller callee + end + (sorted_htab_keys fns)) + end + + and trans_init_str (dst:Ast.lval) (s:string) : unit = + (* Include null byte. *) + let init_sz = Int64.of_int ((String.length s) + 1) in + let static = trans_static_string s in + let (dst, _) = trans_lval_init dst in + trans_upcall "upcall_new_str" dst [| static; imm init_sz |] + + and trans_lit (lit:Ast.lit) : Il.operand = + match lit with + Ast.LIT_nil -> Il.Cell (nil_ptr) + | Ast.LIT_bool false -> imm_false + | Ast.LIT_bool true -> imm_true + | Ast.LIT_char c -> imm_of_ty (Int64.of_int c) TY_u32 + | Ast.LIT_int (i, _) -> simm i + | Ast.LIT_uint (i, _) -> imm i + | Ast.LIT_mach (m, n, _) -> imm_of_ty n m + + and trans_atom (atom:Ast.atom) : Il.operand = + iflog + begin + fun _ -> + annotate (Ast.fmt_to_str Ast.fmt_atom atom) + end; + + match atom with + Ast.ATOM_lval lv -> + let (cell, slot) = trans_lval lv in + Il.Cell (deref_slot false cell slot) + + | Ast.ATOM_literal lit -> trans_lit lit.node + + and fixup_to_ptr_operand + (imm_ok:bool) + (fix:fixup) + (referent_ty:Il.referent_ty) + : Il.operand = + if imm_ok + then Il.ImmPtr (fix, referent_ty) + else Il.Cell (crate_rel_to_ptr (crate_rel_imm fix) referent_ty) + + and code_fixup_to_ptr_operand (fix:fixup) : Il.operand = + fixup_to_ptr_operand abi.Abi.abi_has_pcrel_code fix Il.CodeTy + + (* A pointer-valued op may be of the form ImmPtr, which carries its + * target fixup, "constant-propagated" through trans so that + * pc-relative addressing can make use of it whenever + * appropriate. Reify_ptr exists for cases when you are about to + * store an ImmPtr into a memory cell or other place beyond which the + * compiler will cease to know about its identity; at this point you + * should decay it to a crate-relative displacement and + * (computationally) add it to the crate base value, before working + * with it. + * + * This helps you obey the IL type-system prohibition against + * 'mov'-ing an ImmPtr to a cell. If you forget to call this + * in the right places, you will get code-generation failures. + *) + and reify_ptr (op:Il.operand) : Il.operand = + match op with + Il.ImmPtr (fix, rty) -> + Il.Cell (crate_rel_to_ptr (crate_rel_imm fix) rty) + | _ -> op + + and annotate_quads (name:string) : unit = + let e = emitter() in + let quads = emitted_quads e in + let annotations = annotations() in + log cx "emitted quads for %s:" name; + for i = 0 to arr_max quads + do + if Hashtbl.mem annotations i + then + List.iter + (fun a -> log cx "// %s" a) + (List.rev (Hashtbl.find_all annotations i)); + log cx "[%6d]\t%s" i + (Il.string_of_quad + abi.Abi.abi_str_of_hardreg quads.(i)); + done + + + and write_frame_info_ptrs (fnid:node_id option) = + let frame_fns = + match fnid with + None -> zero + | Some fnid -> get_frame_glue_fns fnid + in + let crate_ptr_reg = next_vreg () in + let crate_ptr_cell = Il.Reg (crate_ptr_reg, (Il.AddrTy Il.OpaqueTy)) in + iflog (fun _ -> annotate "write frame-info pointers"); + Abi.load_fixup_addr (emitter()) + crate_ptr_reg cx.ctxt_crate_fixup Il.OpaqueTy; + mov (word_at (fp_imm frame_crate_ptr)) (Il.Cell (crate_ptr_cell)); + mov (word_at (fp_imm frame_fns_disp)) frame_fns + + and check_interrupt_flag _ = + let wordptr_ty = Il.AddrTy (Il.ScalarTy word_ty) in + let dom = next_vreg_cell wordptr_ty in + let flag = next_vreg_cell word_ty in + mov dom (Il.Cell (tp_imm (word_n Abi.task_field_dom))); + mov flag (Il.Cell (deref_imm dom + (word_n Abi.dom_field_interrupt_flag))); + let null_jmp = null_check flag in + trans_yield (); + patch null_jmp + + and trans_glue_frame_entry + (callsz:size) + (spill:fixup) + : unit = + let framesz = SIZE_fixup_mem_sz spill in + push_new_emitter_with_vregs None; + iflog (fun _ -> annotate "prologue"); + abi.Abi.abi_emit_fn_prologue (emitter()) + framesz callsz nabi_rust (upcall_fixup "upcall_grow_task"); + write_frame_info_ptrs None; + check_interrupt_flag (); + iflog (fun _ -> annotate "finished prologue"); + + and emitted_quads e = + Array.sub e.Il.emit_quads 0 e.Il.emit_pc + + and capture_emitted_glue (fix:fixup) (spill:fixup) (g:glue) : unit = + let e = emitter() in + iflog (fun _ -> annotate_quads (glue_str cx g)); + let code = { code_fixup = fix; + code_quads = emitted_quads e; + code_vregs_and_spill = Some (Il.num_vregs e, spill); } + in + htab_put cx.ctxt_glue_code g code + + and trans_glue_frame_exit (fix:fixup) (spill:fixup) (g:glue) : unit = + iflog (fun _ -> annotate "epilogue"); + abi.Abi.abi_emit_fn_epilogue (emitter()); + capture_emitted_glue fix spill g; + pop_emitter () + + and emit_exit_task_glue (fix:fixup) (g:glue) : unit = + let name = glue_str cx g in + let spill = new_fixup (name ^ " spill") in + push_new_emitter_with_vregs None; + (* + * We return-to-here in a synthetic frame we did not build; our job is + * merely to call upcall_exit. + *) + iflog (fun _ -> annotate "assume 'exited' state"); + trans_void_upcall "upcall_exit" [| |]; + capture_emitted_glue fix spill g; + pop_emitter () + + and get_exit_task_glue _ : fixup = + let g = GLUE_exit_task in + match htab_search cx.ctxt_glue_code g with + Some code -> code.code_fixup + | None -> + let fix = cx.ctxt_exit_task_fixup in + emit_exit_task_glue fix g; + fix + + (* + * Closure representation has 3 GEP-parts: + * + * ...... + * . gc . gc control word, if mutable + * +----+ + * | rc | refcount + * +----+ + * + * +----+ + * | tf | ----> pair of fn+binding that closure + * +----+ / targets + * | tb | -- + * +----+ + * + * +----+ + * | b1 | bound arg1 + * +----+ + * . . + * . . + * . . + * +----+ + * | bN | bound argN + * +----+ + *) + + and closure_referent_type + (bs:Ast.slot array) + (* FIXME (issue #5): mutability flag *) + : Il.referent_ty = + let rc = Il.ScalarTy word_ty in + let targ = referent_type abi (mk_simple_ty_fn [||]) in + let bindings = Array.map (slot_referent_type abi) bs in + Il.StructTy [| rc; targ; Il.StructTy bindings |] + + (* FIXME (issue #2): this should eventually use tail calling logic *) + + and emit_fn_binding_glue + (arg_slots:Ast.slot array) + (arg_bound_flags:bool array) + (fix:fixup) + (g:glue) + : unit = + let extract_slots want_bound = + arr_filter_some + (arr_map2 + (fun slot bound -> + if bound = want_bound then Some slot else None) + arg_slots + arg_bound_flags) + in + let bound_slots = extract_slots true in + let unbound_slots = extract_slots false in + let (self_ty:Ast.ty) = mk_simple_ty_fn unbound_slots in + let (callee_ty:Ast.ty) = mk_simple_ty_fn arg_slots in + + let self_closure_rty = closure_referent_type bound_slots in + (* FIXME: binding type parameters doesn't work. *) + let self_args_rty = + call_args_referent_type cx 0 self_ty (Some self_closure_rty) + in + let callee_args_rty = + call_args_referent_type cx 0 callee_ty (Some Il.OpaqueTy) + in + + let callsz = Il.referent_ty_size word_bits callee_args_rty in + let spill = new_fixup "bind glue spill" in + trans_glue_frame_entry callsz spill; + + let all_self_args_cell = caller_args_cell self_args_rty in + let self_indirect_args_cell = + get_element_ptr all_self_args_cell Abi.calltup_elt_indirect_args + in + let closure_cell = + deref (get_element_ptr self_indirect_args_cell + Abi.indirect_args_elt_closure) + in + let closure_target_cell = + get_element_ptr closure_cell Abi.binding_field_binding + in + let closure_target_fn_cell = + get_element_ptr closure_target_cell Abi.binding_field_item + in + + merge_bound_args + self_args_rty callee_args_rty + arg_slots arg_bound_flags; + iflog (fun _ -> annotate "call through to closure target fn"); + + (* + * Closures, unlike first-class [disp,*binding] pairs, contain + * a fully-resolved target pointer, not a displacement. So we + * don't want to use callee_fn_ptr or the like to access the + * contents. We just call through the cell directly. + *) + + call_code (code_of_cell closure_target_fn_cell); + trans_glue_frame_exit fix spill g + + + and get_fn_binding_glue + (bind_id:node_id) + (arg_slots:Ast.slot array) + (arg_bound_flags:bool array) + : fixup = + let g = GLUE_fn_binding bind_id in + match htab_search cx.ctxt_glue_code g with + Some code -> code.code_fixup + | None -> + let fix = new_fixup (glue_str cx g) in + emit_fn_binding_glue arg_slots arg_bound_flags fix g; + fix + + + (* + * Mem-glue functions are either 'mark', 'drop' or 'free', they take + * one pointer arg and return nothing. + *) + + and trans_mem_glue_frame_entry (n_outgoing_args:int) (spill:fixup) : unit = + let isz = cx.ctxt_abi.Abi.abi_implicit_args_sz in + let callsz = SIZE_fixed (Int64.add isz (word_n n_outgoing_args)) in + trans_glue_frame_entry callsz spill + + and get_mem_glue (g:glue) (inner:Il.mem -> unit) : fixup = + match htab_search cx.ctxt_glue_code g with + Some code -> code.code_fixup + | None -> + begin + let name = glue_str cx g in + let fix = new_fixup name in + (* + * Put a temporary code entry in the table to handle + * recursive emit calls during the generation of the glue + * function. + *) + let tmp_code = { code_fixup = fix; + code_quads = [| |]; + code_vregs_and_spill = None; } in + let spill = new_fixup (name ^ " spill") in + htab_put cx.ctxt_glue_code g tmp_code; + log cx "emitting glue: %s" name; + trans_mem_glue_frame_entry Abi.worst_case_glue_call_args spill; + let (arg:Il.mem) = fp_imm arg0_disp in + inner arg; + Hashtbl.remove cx.ctxt_glue_code g; + trans_glue_frame_exit fix spill g; + fix + end + + and get_typed_mem_glue + (g:glue) + (fty:Ast.ty) + (inner:Il.cell -> Il.cell -> unit) + : fixup = + get_mem_glue g + begin + fun _ -> + let n_ty_params = 0 in + let calltup_rty = + call_args_referent_type cx n_ty_params fty None + in + let calltup_cell = caller_args_cell calltup_rty in + let out_cell = + get_element_ptr calltup_cell Abi.calltup_elt_out_ptr + in + let args_cell = + get_element_ptr calltup_cell Abi.calltup_elt_args + in + begin + match Il.cell_referent_ty args_cell with + Il.StructTy az -> + assert ((Array.length az) + <= Abi.worst_case_glue_call_args); + | _ -> bug () "unexpected cell referent ty in glue args" + end; + inner out_cell args_cell + end + + and trace_str b s = + if b + then + begin + let static = trans_static_string s in + trans_void_upcall "upcall_trace_str" [| static |] + end + + and trace_word b w = + if b + then + trans_void_upcall "upcall_trace_word" [| Il.Cell w |] + + and ty_params_covering (t:Ast.ty) : Ast.slot = + let n_ty_params = n_used_type_params t in + let params = make_tydesc_slots n_ty_params in + read_alias_slot (Ast.TY_tup params) + + and get_drop_glue + (ty:Ast.ty) + (curr_iso:Ast.ty_iso option) + : fixup = + let g = GLUE_drop ty in + let inner _ (args:Il.cell) = + let ty_params = deref (get_element_ptr args 0) in + let cell = get_element_ptr args 1 in + note_drop_step ty "in drop-glue, dropping"; + trace_word cx.ctxt_sess.Session.sess_trace_drop cell; + drop_ty ty_params ty (deref cell) curr_iso; + note_drop_step ty "drop-glue complete"; + in + let ty_params_ptr = ty_params_covering ty in + let fty = mk_simple_ty_fn [| ty_params_ptr; read_alias_slot ty |] in + get_typed_mem_glue g fty inner + + + and get_free_glue + (ty:Ast.ty) + (mctrl:mem_ctrl) + (curr_iso:Ast.ty_iso option) + : fixup = + let g = GLUE_free ty in + let inner _ (args:Il.cell) = + (* + * Free-glue assumes it's called with a pointer to an + * exterior allocation with normal exterior layout. It's + * just a way to move drop+free out of leaf code. + *) + let ty_params = deref (get_element_ptr args 0) in + let cell = get_element_ptr args 1 in + let (body_mem, _) = + need_mem_cell + (get_element_ptr_dyn ty_params (deref cell) + Abi.exterior_rc_slot_field_body) + in + let vr = next_vreg_cell Il.voidptr_t in + lea vr body_mem; + note_drop_step ty "in free-glue, calling drop-glue on body"; + trace_word cx.ctxt_sess.Session.sess_trace_drop vr; + trans_call_simple_static_glue + (get_drop_glue ty curr_iso) ty_params vr; + note_drop_step ty "back in free-glue, calling free"; + if type_has_state ty + then + note_drop_step ty "type has state" + else + note_drop_step ty "type has no state"; + if mctrl = MEM_gc + then + begin + note_drop_step ty "MEM_gc, adjusting pointer"; + lea vr (fst (need_mem_cell (deref cell))); + emit (Il.binary Il.SUB vr (Il.Cell vr) + (imm + (word_n Abi.exterior_gc_malloc_return_adjustment))); + trans_free vr + end + else + begin + note_drop_step ty "not MEM_gc"; + trans_free cell; + end; + trace_str cx.ctxt_sess.Session.sess_trace_drop + "free-glue complete"; + in + let ty_params_ptr = ty_params_covering ty in + let fty = mk_simple_ty_fn [| ty_params_ptr; exterior_slot ty |] in + get_typed_mem_glue g fty inner + + + and get_mark_glue + (ty:Ast.ty) + (curr_iso:Ast.ty_iso option) + : fixup = + let g = GLUE_mark ty in + let inner _ (args:Il.cell) = + let ty_params = deref (get_element_ptr args 0) in + let cell = get_element_ptr args 1 in + mark_ty ty_params ty (deref cell) curr_iso + in + let ty_params_ptr = ty_params_covering ty in + let fty = mk_simple_ty_fn [| ty_params_ptr; read_alias_slot ty |] in + get_typed_mem_glue g fty inner + + + and get_clone_glue + (ty:Ast.ty) + (curr_iso:Ast.ty_iso option) + : fixup = + let g = GLUE_clone ty in + let inner (out_ptr:Il.cell) (args:Il.cell) = + let dst = deref out_ptr in + let ty_params = deref (get_element_ptr args 0) in + let src = deref (get_element_ptr args 1) in + let clone_task = get_element_ptr args 2 in + clone_ty ty_params clone_task ty dst src curr_iso + in + let ty_params_ptr = ty_params_covering ty in + let fty = + mk_ty_fn + (interior_slot ty) (* dst *) + [| + ty_params_ptr; + read_alias_slot ty; (* src *) + word_slot (* clone-task *) + |] + in + get_typed_mem_glue g fty inner + + + and get_copy_glue + (ty:Ast.ty) + (curr_iso:Ast.ty_iso option) + : fixup = + let g = GLUE_copy ty in + let inner (out_ptr:Il.cell) (args:Il.cell) = + let dst = deref out_ptr in + let ty_params = deref (get_element_ptr args 0) in + let src = deref (get_element_ptr args 1) in + copy_ty ty_params ty dst src curr_iso + in + let ty_params_ptr = ty_params_covering ty in + let fty = + mk_ty_fn + (interior_slot ty) + [| ty_params_ptr; read_alias_slot ty |] + in + get_typed_mem_glue g fty inner + + + (* Glue functions use mostly the same calling convention as ordinary + * functions. + * + * Each glue function expects its own particular arguments, which are + * usually aliases-- ie, caller doesn't transfer ownership to the + * glue. And nothing is represented in terms of AST nodes. So we + * don't do lvals-and-atoms here. + *) + + and trans_call_glue + (code:Il.code) + (dst:Il.cell option) + (args:Il.cell array) + : unit = + let inner dst = + let scratch = next_vreg_cell Il.voidptr_t in + let pop _ = emit (Il.Pop scratch) in + for i = ((Array.length args) - 1) downto 0 + do + emit (Il.Push (Il.Cell args.(i))) + done; + emit (Il.Push (Il.Cell abi.Abi.abi_tp_cell)); + emit (Il.Push dst); + call_code code; + pop (); + pop (); + Array.iter (fun _ -> pop()) args; + in + match dst with + None -> inner zero + | Some dst -> aliasing true dst (fun dst -> inner (Il.Cell dst)) + + and trans_call_static_glue + (callee:Il.operand) + (dst:Il.cell option) + (args:Il.cell array) + : unit = + trans_call_glue (code_of_operand callee) dst args + + and trans_call_dynamic_glue + (tydesc:Il.cell) + (idx:int) + (dst:Il.cell option) + (args:Il.cell array) + : unit = + let fptr = get_vtbl_entry_idx tydesc idx in + trans_call_glue (code_of_operand (Il.Cell fptr)) dst args + + and trans_call_simple_static_glue + (fix:fixup) + (ty_params:Il.cell) + (arg:Il.cell) + : unit = + trans_call_static_glue + (code_fixup_to_ptr_operand fix) + None [| alias ty_params; arg |] + + and get_tydesc_params + (outer_ty_params:Il.cell) + (td:Il.cell) + : Il.cell = + let first_param = + get_element_ptr (deref td) Abi.tydesc_field_first_param + in + let res = next_vreg_cell Il.voidptr_t in + mov res (Il.Cell (alias outer_ty_params)); + emit (Il.cmp (Il.Cell first_param) zero); + let no_param_jmp = mark() in + emit (Il.jmp Il.JE Il.CodeNone); + mov res (Il.Cell first_param); + patch no_param_jmp; + res + + and trans_call_simple_dynamic_glue + (ty_param:int) + (vtbl_idx:int) + (ty_params:Il.cell) + (arg:Il.cell) + : unit = + iflog (fun _ -> + annotate (Printf.sprintf "calling tydesc[%d].glue[%d]" + ty_param vtbl_idx)); + let td = get_ty_param ty_params ty_param in + let ty_params_ptr = get_tydesc_params ty_params td in + trans_call_dynamic_glue + td vtbl_idx + None [| ty_params_ptr; arg; |] + + (* trans_compare returns a quad number of the cjmp, which the caller + patches to the cjmp destination. *) + and trans_compare + (cjmp:Il.jmpop) + (lhs:Il.operand) + (rhs:Il.operand) + : quad_idx list = + (* FIXME: this is an x86-ism; abstract via ABI. *) + emit (Il.cmp (Il.Cell (Il.Reg (force_to_reg lhs))) rhs); + let jmp = mark() in + emit (Il.jmp cjmp Il.CodeNone); + [jmp] + + and trans_cond (invert:bool) (expr:Ast.expr) : quad_idx list = + + let anno _ = + iflog + begin + fun _ -> + annotate ((Ast.fmt_to_str Ast.fmt_expr expr) ^ + ": cond, finale") + end + in + + match expr with + Ast.EXPR_binary (binop, a, b) -> + let lhs = trans_atom a in + let rhs = trans_atom b in + let cjmp = binop_to_jmpop binop in + let cjmp' = + if invert then + match cjmp with + Il.JE -> Il.JNE + | Il.JNE -> Il.JE + | Il.JL -> Il.JGE + | Il.JLE -> Il.JG + | Il.JGE -> Il.JL + | Il.JG -> Il.JLE + | _ -> bug () "Unhandled inverse binop in trans_cond" + else + cjmp + in + anno (); + trans_compare cjmp' lhs rhs + + | _ -> + let bool_operand = trans_expr expr in + anno (); + trans_compare Il.JNE bool_operand + (if invert then imm_true else imm_false) + + and trans_binop (binop:Ast.binop) : Il.binop = + match binop with + Ast.BINOP_or -> Il.OR + | Ast.BINOP_and -> Il.AND + | Ast.BINOP_xor -> Il.XOR + + | Ast.BINOP_lsl -> Il.LSL + | Ast.BINOP_lsr -> Il.LSR + | Ast.BINOP_asr -> Il.ASR + + | Ast.BINOP_add -> Il.ADD + | Ast.BINOP_sub -> Il.SUB + + (* FIXME (issue #57): + * switch on type of operands, IMUL/IDIV/IMOD etc. + *) + | Ast.BINOP_mul -> Il.UMUL + | Ast.BINOP_div -> Il.UDIV + | Ast.BINOP_mod -> Il.UMOD + | _ -> bug () "bad binop to Trans.trans_binop" + + and trans_binary + (binop:Ast.binop) + (lhs:Il.operand) + (rhs:Il.operand) : Il.operand = + let arith op = + let bits = Il.operand_bits word_bits lhs in + let dst = Il.Reg (Il.next_vreg (emitter()), Il.ValTy bits) in + emit (Il.binary op dst lhs rhs); + Il.Cell dst + in + match binop with + Ast.BINOP_or | Ast.BINOP_and | Ast.BINOP_xor + | Ast.BINOP_lsl | Ast.BINOP_lsr | Ast.BINOP_asr + | Ast.BINOP_add | Ast.BINOP_sub + (* FIXME (issue #57): + * switch on type of operands, IMUL/IDIV/IMOD etc. + *) + | Ast.BINOP_mul | Ast.BINOP_div | Ast.BINOP_mod -> + arith (trans_binop binop) + + | _ -> let dst = Il.Reg (Il.next_vreg (emitter()), Il.ValTy Il.Bits8) in + mov dst imm_true; + let jmps = trans_compare (binop_to_jmpop binop) lhs rhs in + mov dst imm_false; + List.iter patch jmps; + Il.Cell dst + + + and trans_expr (expr:Ast.expr) : Il.operand = + + let anno _ = + iflog + begin + fun _ -> + annotate ((Ast.fmt_to_str Ast.fmt_expr expr) ^ + ": plain exit, finale") + end + in + match expr with + Ast.EXPR_binary (binop, a, b) -> + assert (is_prim_type (atom_type cx a)); + assert (is_prim_type (atom_type cx b)); + trans_binary binop (trans_atom a) (trans_atom b) + + | Ast.EXPR_unary (unop, a) -> + assert (is_prim_type (atom_type cx a)); + let src = trans_atom a in + let bits = Il.operand_bits word_bits src in + let dst = Il.Reg (Il.next_vreg (emitter()), Il.ValTy bits) in + let op = match unop with + Ast.UNOP_not + | Ast.UNOP_bitnot -> Il.NOT + | Ast.UNOP_neg -> Il.NEG + | Ast.UNOP_cast t -> + let t = Hashtbl.find cx.ctxt_all_cast_types t.id in + let at = atom_type cx a in + if (type_is_2s_complement at) && + (type_is_2s_complement t) + then + if type_is_unsigned_2s_complement t + then Il.UMOV + else Il.IMOV + else + err None "unsupported cast operator" + in + anno (); + emit (Il.unary op dst src); + Il.Cell dst + + | Ast.EXPR_atom a -> + trans_atom a + + and trans_block (block:Ast.block) : unit = + trace_str cx.ctxt_sess.Session.sess_trace_block + "entering block"; + push_emitter_size_cache (); + emit (Il.Enter (Hashtbl.find cx.ctxt_block_fixups block.id)); + Array.iter trans_stmt block.node; + trace_str cx.ctxt_sess.Session.sess_trace_block + "exiting block"; + emit Il.Leave; + pop_emitter_size_cache (); + trace_str cx.ctxt_sess.Session.sess_trace_block + "exited block"; + + and upcall_fixup (name:string) : fixup = + Semant.require_native cx REQUIRED_LIB_rustrt name; + + and trans_upcall + (name:string) + (ret:Il.cell) + (args:Il.operand array) + : unit = + abi.Abi.abi_emit_native_call (emitter()) + ret nabi_rust (upcall_fixup name) args; + + and trans_void_upcall + (name:string) + (args:Il.operand array) + : unit = + abi.Abi.abi_emit_native_void_call (emitter()) + nabi_rust (upcall_fixup name) args; + + and trans_log_int (a:Ast.atom) : unit = + trans_void_upcall "upcall_log_int" [| (trans_atom a) |] + + and trans_log_str (a:Ast.atom) : unit = + trans_void_upcall "upcall_log_str" [| (trans_atom a) |] + + and trans_spawn + ((*initializing*)_:bool) + (dst:Ast.lval) + (domain:Ast.domain) + (fn_lval:Ast.lval) + (args:Ast.atom array) + : unit = + let (task_cell, _) = trans_lval_init dst in + let (fptr_operand, fn_ty) = trans_callee fn_lval in + (*let fn_ty_params = [| |] in*) + let _ = + (* FIXME: handle indirect-spawns (clone closure). *) + if not (lval_is_direct_fn cx fn_lval) + then bug () "unhandled indirect-spawn" + in + let args_rty = call_args_referent_type cx 0 fn_ty None in + let fptr_operand = reify_ptr fptr_operand in + let exit_task_glue_fixup = get_exit_task_glue () in + let callsz = + calculate_sz_in_current_frame (Il.referent_ty_size word_bits args_rty) + in + let exit_task_glue_fptr = + code_fixup_to_ptr_operand exit_task_glue_fixup + in + let exit_task_glue_fptr = reify_ptr exit_task_glue_fptr in + + iflog (fun _ -> annotate "spawn task: copy args"); + + let new_task = next_vreg_cell Il.voidptr_t in + let call = { call_ctrl = CALL_indirect; + call_callee_ptr = fptr_operand; + call_callee_ty = fn_ty; + call_callee_ty_params = [| |]; + call_output = task_cell; + call_args = args; + call_iterator_args = [| |]; + call_indirect_args = [| |] } + in + match domain with + Ast.DOMAIN_thread -> + begin + trans_upcall "upcall_new_thread" new_task [| |]; + copy_fn_args false (CLONE_all new_task) call; + trans_upcall "upcall_start_thread" task_cell + [| + Il.Cell new_task; + exit_task_glue_fptr; + fptr_operand; + callsz + |]; + end + | _ -> + begin + trans_upcall "upcall_new_task" new_task [| |]; + copy_fn_args false (CLONE_chan new_task) call; + trans_upcall "upcall_start_task" task_cell + [| + Il.Cell new_task; + exit_task_glue_fptr; + fptr_operand; + callsz + |]; + end; + () + + and get_curr_span _ = + if Stack.is_empty curr_stmt + then ("<none>", 0, 0) + else + let stmt_id = Stack.top curr_stmt in + match (Session.get_span cx.ctxt_sess stmt_id) with + None -> ("<none>", 0, 0) + | Some sp -> sp.lo + + and trans_cond_fail (str:string) (fwd_jmps:quad_idx list) : unit = + let (filename, line, _) = get_curr_span () in + iflog (fun _ -> annotate ("condition-fail: " ^ str)); + trans_void_upcall "upcall_fail" + [| + trans_static_string str; + trans_static_string filename; + imm (Int64.of_int line) + |]; + List.iter patch fwd_jmps + + and trans_check_expr (e:Ast.expr) : unit = + let fwd_jmps = trans_cond false e in + trans_cond_fail (Ast.fmt_to_str Ast.fmt_expr e) fwd_jmps + + and trans_malloc (dst:Il.cell) (nbytes:Il.operand) : unit = + trans_upcall "upcall_malloc" dst [| nbytes |] + + and trans_free (src:Il.cell) : unit = + trans_void_upcall "upcall_free" [| Il.Cell src |] + + and trans_yield () : unit = + trans_void_upcall "upcall_yield" [| |]; + + and trans_fail () : unit = + let (filename, line, _) = get_curr_span () in + trans_void_upcall "upcall_fail" + [| + trans_static_string "explicit failure"; + trans_static_string filename; + imm (Int64.of_int line) + |]; + + and trans_join (task:Ast.lval) : unit = + trans_void_upcall "upcall_join" [| trans_atom (Ast.ATOM_lval task) |] + + and trans_send (chan:Ast.lval) (src:Ast.lval) : unit = + let (srccell, _) = trans_lval src in + aliasing false srccell + begin + fun src_alias -> + trans_void_upcall "upcall_send" + [| trans_atom (Ast.ATOM_lval chan); + Il.Cell src_alias |]; + end + + and trans_recv (initializing:bool) (dst:Ast.lval) (chan:Ast.lval) : unit = + let (dstcell, _) = trans_lval_maybe_init initializing dst in + aliasing true dstcell + begin + fun dst_alias -> + trans_void_upcall "upcall_recv" + [| Il.Cell dst_alias; + trans_atom (Ast.ATOM_lval chan) |]; + end + + and trans_init_port (dst:Ast.lval) : unit = + let (dstcell, dst_slot) = trans_lval_init dst in + let unit_ty = match slot_ty dst_slot with + Ast.TY_port t -> t + | _ -> bug () "init dst of port-init has non-port type" + in + let unit_sz = ty_sz abi unit_ty in + trans_upcall "upcall_new_port" dstcell [| imm unit_sz |] + + and trans_del_port (port:Il.cell) : unit = + trans_void_upcall "upcall_del_port" [| Il.Cell port |] + + and trans_init_chan (dst:Ast.lval) (port:Ast.lval) : unit = + let (dstcell, _) = trans_lval_init dst + in + trans_upcall "upcall_new_chan" dstcell + [| trans_atom (Ast.ATOM_lval port) |] + + and trans_del_chan (chan:Il.cell) : unit = + trans_void_upcall "upcall_del_chan" [| Il.Cell chan |] + + and trans_kill_task (task:Il.cell) : unit = + trans_void_upcall "upcall_kill" [| Il.Cell task |] + + (* + * A vec is implicitly exterior: every slot vec[T] is 1 word and + * points to a refcounted structure. That structure has 3 words with + * defined meaning at the beginning; data follows the header. + * + * word 0: refcount or gc control word + * word 1: allocated size of data + * word 2: initialised size of data + * word 3...N: data + * + * This 3-word prefix is shared with strings, we factor the common + * part out for reuse in string code. + *) + + and trans_init_vec (dst:Ast.lval) (atoms:Ast.atom array) : unit = + let (dst_cell, dst_slot) = trans_lval_init dst in + let unit_slot = match slot_ty dst_slot with + Ast.TY_vec s -> s + | _ -> bug () "init dst of vec-init has non-vec type" + in + let fill = next_vreg_cell word_ty in + let unit_sz = slot_sz_in_current_frame unit_slot in + umul fill unit_sz (imm (Int64.of_int (Array.length atoms))); + trans_upcall "upcall_new_vec" dst_cell [| Il.Cell fill |]; + let vec = deref dst_cell in + let body_mem = + fst (need_mem_cell + (get_element_ptr_dyn_in_current_frame + vec Abi.vec_elt_data)) + in + let unit_rty = slot_referent_type abi unit_slot in + let body_rty = Il.StructTy (Array.map (fun _ -> unit_rty) atoms) in + let body = Il.Mem (body_mem, body_rty) in + Array.iteri + begin + fun i atom -> + let cell = get_element_ptr_dyn_in_current_frame body i in + trans_init_slot_from_atom CLONE_none cell unit_slot atom + end + atoms; + mov (get_element_ptr vec Abi.vec_elt_fill) (Il.Cell fill); + + and get_dynamic_tydesc (idopt:node_id option) (t:Ast.ty) : Il.cell = + let td = next_vreg_cell Il.voidptr_t in + let root_desc = + Il.Cell (crate_rel_to_ptr + (get_static_tydesc idopt t 0L 0L) + (tydesc_rty abi)) + in + let (t, param_descs) = linearize_ty_params t in + let descs = Array.append [| root_desc |] param_descs in + let n = Array.length descs in + let rty = referent_type abi t in + let (size_sz, align_sz) = Il.referent_ty_layout word_bits rty in + let size = calculate_sz_in_current_frame size_sz in + let align = calculate_sz_in_current_frame align_sz in + let descs_ptr = next_vreg_cell Il.voidptr_t in + if (Array.length descs) > 0 + then + (* FIXME: this relies on knowledge that spills are contiguous. *) + let spills = + Array.map (fun _ -> next_spill_cell Il.voidptr_t) descs + in + Array.iteri (fun i t -> mov spills.(n-(i+1)) t) descs; + lea descs_ptr (fst (need_mem_cell spills.(n-1))) + else + mov descs_ptr zero; + trans_upcall "upcall_get_type_desc" td + [| Il.Cell (curr_crate_ptr()); + size; align; imm (Int64.of_int n); + Il.Cell descs_ptr |]; + td + + and get_tydesc (idopt:node_id option) (ty:Ast.ty) : Il.cell = + log cx "getting tydesc for %a" Ast.sprintf_ty ty; + match ty with + Ast.TY_param (idx, _) -> + (get_ty_param_in_current_frame idx) + | t when has_parametric_types t -> + (get_dynamic_tydesc idopt t) + | _ -> + (crate_rel_to_ptr (get_static_tydesc idopt ty + (ty_sz abi ty) + (ty_align abi ty)) + (tydesc_rty abi)) + + and exterior_ctrl_cell (cell:Il.cell) (off:int) : Il.cell = + let (rc_mem, _) = need_mem_cell (deref_imm cell (word_n off)) in + word_at rc_mem + + and exterior_rc_cell (cell:Il.cell) : Il.cell = + exterior_ctrl_cell cell Abi.exterior_rc_slot_field_refcnt + + and exterior_gc_ctrl_cell (cell:Il.cell) : Il.cell = + exterior_ctrl_cell cell Abi.exterior_gc_slot_field_ctrl + + and exterior_gc_next_cell (cell:Il.cell) : Il.cell = + exterior_ctrl_cell cell Abi.exterior_gc_slot_field_next + + and exterior_allocation_size + (slot:Ast.slot) + : Il.operand = + let header_sz = + match slot_mem_ctrl slot with + MEM_gc -> word_n Abi.exterior_gc_header_size + | MEM_rc_opaque + | MEM_rc_struct -> word_n Abi.exterior_rc_header_size + | MEM_interior -> bug () "exterior_allocation_size of MEM_interior" + in + let t = slot_ty slot in + let refty_sz = + Il.referent_ty_size abi.Abi.abi_word_bits (referent_type abi t) + in + match refty_sz with + SIZE_fixed _ -> imm (Int64.add (ty_sz abi t) header_sz) + | _ -> + let ty_params = get_ty_params_of_current_frame() in + let refty_sz = calculate_sz ty_params refty_sz in + let v = next_vreg word_ty in + let vc = Il.Reg (v, word_ty) in + mov vc refty_sz; + add_to vc (imm header_sz); + Il.Cell vc; + + and iter_tag_slots + (ty_params:Il.cell) + (dst_cell:Il.cell) + (src_cell:Il.cell) + (ttag:Ast.ty_tag) + (f:Il.cell -> Il.cell -> Ast.slot -> (Ast.ty_iso option) -> unit) + (curr_iso:Ast.ty_iso option) + : unit = + let tag_keys = sorted_htab_keys ttag in + let src_tag = get_element_ptr src_cell 0 in + let dst_tag = get_element_ptr dst_cell 0 in + let src_union = get_element_ptr_dyn ty_params src_cell 1 in + let dst_union = get_element_ptr_dyn ty_params dst_cell 1 in + let tmp = next_vreg_cell word_ty in + f dst_tag src_tag word_slot curr_iso; + mov tmp (Il.Cell src_tag); + Array.iteri + begin + fun i key -> + (iflog (fun _ -> + annotate (Printf.sprintf "tag case #%i == %a" i + Ast.sprintf_name key))); + let jmps = + trans_compare Il.JNE (Il.Cell tmp) (imm (Int64.of_int i)) + in + let ttup = Hashtbl.find ttag key in + iter_tup_slots + (get_element_ptr_dyn ty_params) + (get_variant_ptr dst_union i) + (get_variant_ptr src_union i) + ttup f curr_iso; + List.iter patch jmps + end + tag_keys + + and get_iso_tag tiso = + tiso.Ast.iso_group.(tiso.Ast.iso_index) + + + and seq_unit_slot (seq:Ast.ty) : Ast.slot = + match seq with + Ast.TY_vec s -> s + | Ast.TY_str -> (interior_slot (Ast.TY_mach TY_u8)) + | _ -> bug () "seq_unit_slot of non-vec, non-str type" + + + and iter_seq_slots + (ty_params:Il.cell) + (dst_cell:Il.cell) + (src_cell:Il.cell) + (unit_slot:Ast.slot) + (f:Il.cell -> Il.cell -> Ast.slot -> (Ast.ty_iso option) -> unit) + (curr_iso:Ast.ty_iso option) + : unit = + let unit_sz = slot_sz_with_ty_params ty_params unit_slot in + (* + * Unlike most of the iter_ty_slots helpers; this one allocates a + * vreg and so has to be aware of when it's iterating over 2 + * sequences of cells or just 1. + *) + check_exterior_rty src_cell; + check_exterior_rty dst_cell; + if dst_cell = src_cell + then + begin + let src_cell = deref src_cell in + let data = + get_element_ptr_dyn ty_params src_cell Abi.vec_elt_data + in + let len = get_element_ptr src_cell Abi.vec_elt_fill in + let ptr = next_vreg_cell Il.voidptr_t in + let lim = next_vreg_cell Il.voidptr_t in + lea lim (fst (need_mem_cell data)); + mov ptr (Il.Cell lim); + add_to lim (Il.Cell len); + let back_jmp_target = mark () in + let fwd_jmps = trans_compare Il.JAE (Il.Cell ptr) (Il.Cell lim) in + let unit_cell = + deref (ptr_cast ptr (slot_referent_type abi unit_slot)) + in + f unit_cell unit_cell unit_slot curr_iso; + add_to ptr unit_sz; + check_interrupt_flag (); + emit (Il.jmp Il.JMP (Il.CodeLabel back_jmp_target)); + List.iter patch fwd_jmps; + end + else + begin + bug () "Unsupported form of seq iter: src != dst." + end + + + and iter_ty_slots_full + (ty_params:Il.cell) + (ty:Ast.ty) + (dst_cell:Il.cell) + (src_cell:Il.cell) + (f:Il.cell -> Il.cell -> Ast.slot -> (Ast.ty_iso option) -> unit) + (curr_iso:Ast.ty_iso option) + : unit = + (* + * FIXME: this will require some reworking if we support + * rec, tag or tup slots that fit in a vreg. It requires + * addrs presently. + *) + match ty with + Ast.TY_rec entries -> + iter_rec_slots + (get_element_ptr_dyn ty_params) dst_cell src_cell + entries f curr_iso + + | Ast.TY_tup slots -> + iter_tup_slots + (get_element_ptr_dyn ty_params) dst_cell src_cell + slots f curr_iso + + | Ast.TY_tag tag -> + iter_tag_slots ty_params dst_cell src_cell tag f curr_iso + + | Ast.TY_iso tiso -> + let ttag = get_iso_tag tiso in + iter_tag_slots ty_params dst_cell src_cell ttag f (Some tiso) + + | Ast.TY_fn _ + | Ast.TY_obj _ -> bug () "Attempting to iterate over fn/pred/obj slots" + + | Ast.TY_vec _ + | Ast.TY_str -> + let unit_slot = seq_unit_slot ty in + iter_seq_slots ty_params dst_cell src_cell unit_slot f curr_iso + + | _ -> () + + (* + * This just calls iter_ty_slots_full with your cell as both src and + * dst, with an adaptor function that discards the dst slots of the + * parallel traversal and and calls your provided function on the + * passed-in src slots. + *) + and iter_ty_slots + (ty_params:Il.cell) + (ty:Ast.ty) + (cell:Il.cell) + (f:Il.cell -> Ast.slot -> (Ast.ty_iso option) -> unit) + (curr_iso:Ast.ty_iso option) + : unit = + iter_ty_slots_full ty_params ty cell cell + (fun _ src_cell slot curr_iso -> f src_cell slot curr_iso) + curr_iso + + and drop_ty + (ty_params:Il.cell) + (ty:Ast.ty) + (cell:Il.cell) + (curr_iso:Ast.ty_iso option) + : unit = + match ty with + Ast.TY_param (i, _) -> + iflog (fun _ -> annotate + (Printf.sprintf "drop_ty: parametric drop %#d" i)); + aliasing false cell + begin + fun cell -> + trans_call_simple_dynamic_glue + i Abi.tydesc_field_drop_glue ty_params cell + end + + | Ast.TY_fn _ -> + begin + let binding = get_element_ptr cell Abi.binding_field_binding in + let null_jmp = null_check binding in + (* Drop non-null bindings. *) + (* FIXME (issue #58): this is completely wrong, + * need a second thunk that generates code to make + * use of a runtime type descriptor extracted from + * a binding tuple. For now this only works by + * accident. + *) + drop_slot ty_params binding + (exterior_slot Ast.TY_int) curr_iso; + patch null_jmp + end + + | Ast.TY_obj _ -> + begin + let binding = get_element_ptr cell Abi.binding_field_binding in + let null_jmp = null_check binding in + let obj = deref binding in + let rc = get_element_ptr obj 0 in + let rc_jmp = drop_refcount_and_cmp rc in + let tydesc = get_element_ptr obj 1 in + let body = get_element_ptr obj 2 in + let ty_params = + get_element_ptr (deref tydesc) Abi.tydesc_field_first_param + in + let dtor = + get_element_ptr (deref tydesc) Abi.tydesc_field_obj_drop_glue + in + let null_dtor_jmp = null_check dtor in + (* Call any dtor, if present. *) + trans_call_dynamic_glue tydesc + Abi.tydesc_field_obj_drop_glue None [| binding |]; + patch null_dtor_jmp; + (* Drop the body. *) + trans_call_dynamic_glue tydesc + Abi.tydesc_field_drop_glue None [| ty_params; alias body |]; + trans_free binding; + mov binding zero; + patch rc_jmp; + patch null_jmp + end + + + | _ -> + iter_ty_slots ty_params ty cell (drop_slot ty_params) curr_iso + + and mark_ty + (ty_params:Il.cell) + (ty:Ast.ty) + (cell:Il.cell) + (curr_iso:Ast.ty_iso option) + : unit = + match ty with + | Ast.TY_fn _ + | Ast.TY_obj _ -> () + | _ -> + iter_ty_slots ty_params ty cell (mark_slot ty_params) curr_iso + + and clone_ty + (ty_params:Il.cell) + (clone_task:Il.cell) + (ty:Ast.ty) + (dst:Il.cell) + (src:Il.cell) + (curr_iso:Ast.ty_iso option) + : unit = + match ty with + Ast.TY_chan _ -> + trans_upcall "upcall_clone_chan" dst + [| (Il.Cell clone_task); (Il.Cell src) |] + | Ast.TY_task + | Ast.TY_port _ + | _ when type_has_state ty + -> bug () "cloning mutable type" + | _ when i64_le (ty_sz abi ty) word_sz + -> mov dst (Il.Cell src) + | Ast.TY_fn _ + | Ast.TY_obj _ -> () + | _ -> + iter_ty_slots_full ty_params ty dst src + (clone_slot ty_params clone_task) curr_iso + + and copy_ty + (ty_params:Il.cell) + (ty:Ast.ty) + (dst:Il.cell) + (src:Il.cell) + (curr_iso:Ast.ty_iso option) + : unit = + iflog (fun _ -> + annotate ("copy_ty: referent data of type " ^ + (Ast.fmt_to_str Ast.fmt_ty ty))); + match ty with + Ast.TY_nil + | Ast.TY_bool + | Ast.TY_mach _ + | Ast.TY_int + | Ast.TY_uint + | Ast.TY_native _ + | Ast.TY_type + | Ast.TY_char -> + iflog + (fun _ -> annotate + (Printf.sprintf "copy_ty: simple mov (%Ld byte scalar)" + (ty_sz abi ty))); + mov dst (Il.Cell src) + + | Ast.TY_param (i, _) -> + iflog + (fun _ -> annotate + (Printf.sprintf "copy_ty: parametric copy %#d" i)); + aliasing false src + begin + fun src -> + let td = get_ty_param ty_params i in + let ty_params_ptr = get_tydesc_params ty_params td in + trans_call_dynamic_glue + td Abi.tydesc_field_copy_glue + (Some dst) [| ty_params_ptr; src; |] + end + + | Ast.TY_fn _ + | Ast.TY_obj _ -> + begin + let src_item = get_element_ptr src Abi.binding_field_item in + let dst_item = get_element_ptr dst Abi.binding_field_item in + let src_binding = get_element_ptr src Abi.binding_field_binding in + let dst_binding = get_element_ptr dst Abi.binding_field_binding in + mov dst_item (Il.Cell src_item); + let null_jmp = null_check src_binding in + (* Copy if we have a src binding. *) + (* FIXME (issue #58): this is completely wrong, call + * through to the binding's self-copy fptr. For now + * this only works by accident. + *) + trans_copy_slot ty_params true + dst_binding (exterior_slot Ast.TY_int) + src_binding (exterior_slot Ast.TY_int) + curr_iso; + patch null_jmp + end + + | _ -> + iter_ty_slots_full ty_params ty dst src + (fun dst src slot curr_iso -> + trans_copy_slot ty_params true + dst slot src slot curr_iso) + curr_iso + + and free_ty + (ty_params:Il.cell) + (ty:Ast.ty) + (cell:Il.cell) + (curr_iso:Ast.ty_iso option) + : unit = + match ty with + Ast.TY_port _ -> trans_del_port cell + | Ast.TY_chan _ -> trans_del_chan cell + | Ast.TY_task -> trans_kill_task cell + | Ast.TY_vec s -> + iter_seq_slots ty_params cell cell s + (fun _ src slot iso -> drop_slot ty_params src slot iso) curr_iso; + trans_free cell + + | _ -> trans_free cell + + and maybe_iso + (curr_iso:Ast.ty_iso option) + (t:Ast.ty) + : Ast.ty = + match (curr_iso, t) with + (Some iso, Ast.TY_idx n) -> + Ast.TY_iso { iso with Ast.iso_index = n } + | (None, Ast.TY_idx _) -> + bug () "TY_idx outside TY_iso" + | _ -> t + + and maybe_enter_iso + (t:Ast.ty) + (curr_iso:Ast.ty_iso option) + : Ast.ty_iso option = + match t with + Ast.TY_iso tiso -> Some tiso + | _ -> curr_iso + + and mark_slot + (ty_params:Il.cell) + (cell:Il.cell) + (slot:Ast.slot) + (curr_iso:Ast.ty_iso option) + : unit = + let ty = slot_ty slot in + match slot_mem_ctrl slot with + MEM_gc -> + note_gc_step slot "mark GC slot: check for null:"; + emit (Il.cmp (Il.Cell cell) zero); + let null_cell_jump = mark () in + emit (Il.jmp Il.JE Il.CodeNone); + let gc_word = exterior_gc_ctrl_cell cell in + let tmp = next_vreg_cell Il.voidptr_t in + (* if this has been marked already, jump to exit.*) + note_gc_step slot "mark GC slot: check for mark:"; + emit (Il.binary Il.AND tmp (Il.Cell gc_word) one); + let already_marked_jump = mark () in + emit (Il.jmp Il.JZ Il.CodeNone); + (* Set mark bit in allocation header. *) + note_gc_step slot "mark GC slot: mark:"; + emit (Il.binary Il.OR gc_word (Il.Cell gc_word) one); + (* Iterate over exterior slots marking outgoing links. *) + log cx "slot rty: %s" (cell_str cell); + let (body_mem, _) = + need_mem_cell + (get_element_ptr (deref cell) + Abi.exterior_gc_slot_field_body) + in + let ty = maybe_iso curr_iso ty in + let curr_iso = maybe_enter_iso ty curr_iso in + lea tmp body_mem; + trans_call_simple_static_glue + (get_mark_glue ty curr_iso) + ty_params tmp; + patch null_cell_jump; + patch already_marked_jump; + note_gc_step slot "mark GC slot: done marking:"; + + | MEM_interior when type_is_structured ty -> + (iflog (fun _ -> + annotate ("mark interior slot " ^ + (Ast.fmt_to_str Ast.fmt_slot slot)))); + let (mem, _) = need_mem_cell cell in + let tmp = next_vreg_cell Il.voidptr_t in + let ty = maybe_iso curr_iso ty in + let curr_iso = maybe_enter_iso ty curr_iso in + lea tmp mem; + trans_call_simple_static_glue + (get_mark_glue ty curr_iso) + ty_params tmp + + | _ -> () + + and check_exterior_rty cell = + match cell with + Il.Reg (_, Il.AddrTy (Il.StructTy fields)) + | Il.Mem (_, Il.ScalarTy (Il.AddrTy (Il.StructTy fields))) + when (((Array.length fields) > 0) && (fields.(0) = word_rty)) -> () + | _ -> bug () + "expected plausibly-exterior cell, got %s" + (Il.string_of_referent_ty (Il.cell_referent_ty cell)) + + and clone_slot + (ty_params:Il.cell) + (clone_task:Il.cell) + (dst:Il.cell) + (src:Il.cell) + (dst_slot:Ast.slot) + (curr_iso:Ast.ty_iso option) + : unit = + let ty = slot_ty dst_slot in + match dst_slot.Ast.slot_mode with + Ast.MODE_exterior _ -> + let ty = maybe_iso curr_iso ty in + let curr_iso = maybe_enter_iso ty curr_iso in + let dst = deref_slot true dst dst_slot in + let glue_fix = get_clone_glue (slot_ty dst_slot) curr_iso in + trans_call_static_glue + (code_fixup_to_ptr_operand glue_fix) + (Some dst) + [| alias ty_params; src; clone_task |] + + | Ast.MODE_alias _ -> bug () "cloning into alias slot" + | Ast.MODE_interior _ -> + clone_ty ty_params clone_task ty dst src curr_iso + + and drop_slot_in_current_frame + (cell:Il.cell) + (slot:Ast.slot) + (curr_iso:Ast.ty_iso option) + : unit = + drop_slot (get_ty_params_of_current_frame()) cell slot curr_iso + + and null_check (cell:Il.cell) : quad_idx = + emit (Il.cmp (Il.Cell cell) zero); + let j = mark() in + emit (Il.jmp Il.JE Il.CodeNone); + j + + and drop_refcount_and_cmp (rc:Il.cell) : quad_idx = + iflog (fun _ -> annotate "drop refcount and maybe free"); + emit (Il.binary Il.SUB rc (Il.Cell rc) one); + emit (Il.cmp (Il.Cell rc) zero); + let j = mark () in + emit (Il.jmp Il.JNE Il.CodeNone); + j + + and drop_slot + (ty_params:Il.cell) + (cell:Il.cell) + (slot:Ast.slot) + (curr_iso:Ast.ty_iso option) + : unit = + let ty = slot_ty slot in + let ty = maybe_iso curr_iso ty in + let curr_iso = maybe_enter_iso ty curr_iso in + let slot = {slot with Ast.slot_ty = Some ty} in + let mctrl = slot_mem_ctrl slot in + match mctrl with + MEM_rc_opaque -> + (* Refcounted opaque objects we handle without glue functions. *) + let _ = check_exterior_rty cell in + let null_jmp = null_check cell in + let j = drop_refcount_and_cmp (exterior_rc_cell cell) in + free_ty ty_params ty cell curr_iso; + (* Null the slot out to prevent double-free if the frame + * unwinds. + *) + mov cell zero; + patch j; + patch null_jmp + + | MEM_gc + | MEM_rc_struct -> + (* Refcounted "structured exterior" objects we handle via + * glue functions. + *) + + (* + * 'GC memory' is treated similarly, just happens to have + * an extra couple cells on the front. + *) + + (* FIXME (issue #25): check to see that the exterior has + * further exterior members; if it doesn't we can elide the + * call to the glue function. *) + let _ = check_exterior_rty cell in + let null_jmp = null_check cell in + let rc = exterior_rc_cell cell in + let _ = note_gc_step slot "dropping refcount on " in + let _ = trace_word cx.ctxt_sess.Session.sess_trace_gc rc in + let j = drop_refcount_and_cmp rc in + trans_call_simple_static_glue + (get_free_glue ty mctrl curr_iso) + ty_params cell; + (* Null the slot out to prevent double-free if the frame + * unwinds. + *) + mov cell zero; + patch j; + patch null_jmp + + | MEM_interior when type_is_structured ty -> + (iflog (fun _ -> + annotate ("drop interior slot " ^ + (Ast.fmt_to_str Ast.fmt_slot slot)))); + let (mem, _) = need_mem_cell cell in + let vr = next_vreg_cell Il.voidptr_t in + lea vr mem; + trans_call_simple_static_glue + (get_drop_glue ty curr_iso) + ty_params vr + + | MEM_interior -> + (* Interior allocation of all-interior value: free directly. *) + let ty = maybe_iso curr_iso ty in + drop_ty ty_params ty cell curr_iso + + and note_drop_step ty step = + if cx.ctxt_sess.Session.sess_trace_drop || + cx.ctxt_sess.Session.sess_log_trans + then + let slotstr = Ast.fmt_to_str Ast.fmt_ty ty in + let str = step ^ " " ^ slotstr in + begin + annotate str; + trace_str cx.ctxt_sess.Session.sess_trace_drop str + end + + and note_gc_step slot step = + if cx.ctxt_sess.Session.sess_trace_gc || + cx.ctxt_sess.Session.sess_log_trans + then + let mctrl_str = + match slot_mem_ctrl slot with + MEM_gc -> "MEM_gc" + | MEM_rc_struct -> "MEM_rc_struct" + | MEM_rc_opaque -> "MEM_rc_struct" + | MEM_interior -> "MEM_rc_struct" + in + let slotstr = Ast.fmt_to_str Ast.fmt_slot slot in + let str = step ^ " " ^ mctrl_str ^ " " ^ slotstr in + begin + annotate str; + trace_str cx.ctxt_sess.Session.sess_trace_gc str + end + + (* Returns the offset of the slot-body in the initialized allocation. *) + and init_exterior_slot (cell:Il.cell) (slot:Ast.slot) : unit = + match slot_mem_ctrl slot with + MEM_gc -> + iflog (fun _ -> annotate "init GC exterior: malloc"); + let sz = exterior_allocation_size slot in + (* + * Malloc and then immediately shift down to point to + * the pseudo-rc cell. + *) + note_gc_step slot "init GC exterior: malloc slot:"; + trans_malloc cell sz; + add_to cell + (imm (word_n Abi.exterior_gc_malloc_return_adjustment)); + note_gc_step slot "init GC exterior: load control word"; + let ctrl = exterior_gc_ctrl_cell cell in + let tydesc = get_tydesc None (slot_ty slot) in + let rc = exterior_rc_cell cell in + note_gc_step slot "init GC exterior: set refcount"; + mov rc one; + trace_word cx.ctxt_sess.Session.sess_trace_gc rc; + mov ctrl (Il.Cell tydesc); + note_gc_step slot "init GC exterior: load chain next-ptr"; + let next = exterior_gc_next_cell cell in + let chain = tp_imm (word_n Abi.task_field_gc_alloc_chain) in + mov next (Il.Cell chain); + note_gc_step slot "init GC exterior: link GC mem to chain"; + mov chain (Il.Cell cell); + note_gc_step slot "init GC exterior: done initializing" + + | MEM_rc_opaque + | MEM_rc_struct -> + iflog (fun _ -> annotate "init RC exterior: malloc"); + let sz = exterior_allocation_size slot in + trans_malloc cell sz; + iflog (fun _ -> annotate "init RC exterior: load refcount"); + let rc = exterior_rc_cell cell in + mov rc one + + | MEM_interior -> bug () "init_exterior_slot of MEM_interior" + + and deref_slot + (initializing:bool) + (cell:Il.cell) + (slot:Ast.slot) + : Il.cell = + match slot.Ast.slot_mode with + Ast.MODE_interior _ -> + cell + + | Ast.MODE_exterior _ -> + check_exterior_rty cell; + if initializing + then init_exterior_slot cell slot; + get_element_ptr_dyn_in_current_frame + (deref cell) + Abi.exterior_rc_slot_field_body + + | Ast.MODE_alias _ -> + if initializing + then cell + else deref cell + + and trans_copy_tup + (ty_params:Il.cell) + (initializing:bool) + (dst:Il.cell) + (src:Il.cell) + (slots:Ast.ty_tup) + : unit = + Array.iteri + begin + fun i slot -> + let sub_dst_cell = get_element_ptr_dyn ty_params dst i in + let sub_src_cell = get_element_ptr_dyn ty_params src i in + trans_copy_slot + ty_params initializing + sub_dst_cell slot sub_src_cell slot None + end + slots + + and trans_copy_slot + (ty_params:Il.cell) + (initializing:bool) + (dst:Il.cell) (dst_slot:Ast.slot) + (src:Il.cell) (src_slot:Ast.slot) + (curr_iso:Ast.ty_iso option) + : unit = + let anno (weight:string) : unit = + iflog + begin + fun _ -> + annotate + (Printf.sprintf "%sweight copy: %a <- %a" + weight + Ast.sprintf_slot dst_slot + Ast.sprintf_slot src_slot) + end; + in + assert (slot_ty src_slot = slot_ty dst_slot); + match (slot_mem_ctrl src_slot, + slot_mem_ctrl dst_slot) with + + | (MEM_rc_opaque, MEM_rc_opaque) + | (MEM_gc, MEM_gc) + | (MEM_rc_struct, MEM_rc_struct) -> + (* Lightweight copy: twiddle refcounts, move pointer. *) + anno "refcounted light"; + add_to (exterior_rc_cell src) one; + if not initializing + then + drop_slot ty_params dst dst_slot None; + mov dst (Il.Cell src) + + | _ -> + (* Heavyweight copy: duplicate 1 level of the referent. *) + anno "heavy"; + trans_copy_slot_heavy ty_params initializing + dst dst_slot src src_slot curr_iso + + (* NB: heavyweight copying here does not mean "producing a deep + * clone of the entire data tree rooted at the src operand". It means + * "replicating a single level of the tree". + * + * There is no general-recursion entailed in performing a heavy + * copy. There is only "one level" to each heavy copy call. + * + * In other words, this is a lightweight copy: + * + * [dstptr] <-copy- [srcptr] + * \ | + * \ | + * [some record.rc++] + * | + * [some other record] + * + * Whereas this is a heavyweight copy: + * + * [dstptr] <-copy- [srcptr] + * | | + * | | + * [some record] [some record] + * | | + * [some other record] + * + *) + + and trans_copy_slot_heavy + (ty_params:Il.cell) + (initializing:bool) + (dst:Il.cell) (dst_slot:Ast.slot) + (src:Il.cell) (src_slot:Ast.slot) + (curr_iso:Ast.ty_iso option) + : unit = + assert (slot_ty src_slot = slot_ty dst_slot); + iflog (fun _ -> + annotate ("heavy copy: slot preparation")); + + let ty = slot_ty src_slot in + let ty = maybe_iso curr_iso ty in + let curr_iso = maybe_enter_iso ty curr_iso in + let dst_slot = { dst_slot with Ast.slot_ty = Some ty } in + let src_slot = { src_slot with Ast.slot_ty = Some ty } in + let dst = deref_slot initializing dst dst_slot in + let src = deref_slot false src src_slot in + copy_ty ty_params ty dst src curr_iso + + and trans_copy + (initializing:bool) + (dst:Ast.lval) + (src:Ast.expr) + : unit = + let (dst_cell, dst_slot) = trans_lval_maybe_init initializing dst in + match (slot_ty dst_slot, src) with + (Ast.TY_vec _, + Ast.EXPR_binary (Ast.BINOP_add, + Ast.ATOM_lval a, Ast.ATOM_lval b)) + | (Ast.TY_str, + Ast.EXPR_binary (Ast.BINOP_add, + Ast.ATOM_lval a, Ast.ATOM_lval b)) -> + (* + * Translate str or vec + * + * s = a + b + * + * as + * + * s = a; + * s += b; + *) + let (a_cell, a_slot) = trans_lval a in + let (b_cell, b_slot) = trans_lval b in + trans_copy_slot + (get_ty_params_of_current_frame()) + initializing dst_cell dst_slot + a_cell a_slot None; + trans_vec_append dst_cell dst_slot + (Il.Cell b_cell) (slot_ty b_slot) + + + | (Ast.TY_obj caller_obj_ty, + Ast.EXPR_unary (Ast.UNOP_cast t, a)) -> + let src_ty = atom_type cx a in + let _ = assert (not (is_prim_type (src_ty))) in + begin + let t = Hashtbl.find cx.ctxt_all_cast_types t.id in + let _ = assert (t = (Ast.TY_obj caller_obj_ty)) in + let callee_obj_ty = + match atom_type cx a with + Ast.TY_obj t -> t + | _ -> bug () "obj cast from non-obj type" + in + let src_cell = need_cell (trans_atom a) in + let src_slot = interior_slot src_ty in + + (* FIXME: this is wrong. It treats the underlying obj-state + * as the same as the callee and simply substitutes the + * forwarding vtbl, which would be great if it had any way + * convey the callee vtbl to the forwarding functions. But it + * doesn't. Instead, we have to malloc a fresh 3-word + * refcounted obj to hold the callee's vtbl+state pair, copy + * that in as the state here. + *) + let _ = + trans_copy_slot (get_ty_params_of_current_frame()) + initializing + dst_cell dst_slot + src_cell src_slot + in + let caller_vtbl_oper = + get_forwarding_vtbl caller_obj_ty callee_obj_ty + in + let caller_obj = + deref_slot initializing dst_cell dst_slot + in + let caller_vtbl = + get_element_ptr caller_obj Abi.binding_field_item + in + mov caller_vtbl caller_vtbl_oper + end + + | (_, Ast.EXPR_binary _) + | (_, Ast.EXPR_unary _) + | (_, Ast.EXPR_atom (Ast.ATOM_literal _)) -> + (* + * Translations of these expr types yield vregs, + * so copy is just MOV into the lval. + *) + let src_operand = trans_expr src in + mov (deref_slot false dst_cell dst_slot) src_operand + + | (_, Ast.EXPR_atom (Ast.ATOM_lval src_lval)) -> + if lval_is_direct_fn cx src_lval then + trans_copy_direct_fn dst_cell src_lval + else + (* Possibly-large structure copying *) + let (src_cell, src_slot) = trans_lval src_lval in + trans_copy_slot + (get_ty_params_of_current_frame()) + initializing + dst_cell dst_slot + src_cell src_slot + None + + and trans_copy_direct_fn + (dst_cell:Il.cell) + (flv:Ast.lval) + : unit = + let item = lval_item cx flv in + let fix = Hashtbl.find cx.ctxt_fn_fixups item.id in + + let dst_pair_item_cell = + get_element_ptr dst_cell Abi.binding_field_item + in + let dst_pair_binding_cell = + get_element_ptr dst_cell Abi.binding_field_binding + in + + mov dst_pair_item_cell (crate_rel_imm fix); + mov dst_pair_binding_cell zero + + + and trans_init_structural_from_atoms + (dst:Il.cell) + (dst_slots:Ast.slot array) + (atoms:Ast.atom array) + : unit = + Array.iteri + begin + fun i atom -> + trans_init_slot_from_atom + CLONE_none + (get_element_ptr_dyn_in_current_frame dst i) + dst_slots.(i) + atom + end + atoms + + and trans_init_rec_update + (dst:Il.cell) + (dst_slots:Ast.slot array) + (trec:Ast.ty_rec) + (atab:(Ast.ident * Ast.mode * bool * Ast.atom) array) + (base:Ast.lval) + : unit = + Array.iteri + begin + fun i (fml_ident, _) -> + let fml_entry _ (act_ident, _, _, atom) = + if act_ident = fml_ident then Some atom else None + in + let slot = dst_slots.(i) in + match arr_search atab fml_entry with + Some atom -> + trans_init_slot_from_atom + CLONE_none + (get_element_ptr_dyn_in_current_frame dst i) + slot + atom + | None -> + let (src, _) = trans_lval base in + trans_copy_slot + (get_ty_params_of_current_frame()) true + (get_element_ptr_dyn_in_current_frame dst i) slot + (get_element_ptr_dyn_in_current_frame src i) slot + None + end + trec + + and trans_init_slot_from_atom + (clone:clone_ctrl) + (dst:Il.cell) (dst_slot:Ast.slot) + (atom:Ast.atom) + : unit = + let is_alias_cell = + match dst_slot.Ast.slot_mode with + Ast.MODE_alias _ -> true + | _ -> false + in + match atom with + | Ast.ATOM_literal _ -> + let src = trans_atom atom in + if is_alias_cell + then + match clone with + CLONE_none -> + (* + * FIXME: this won't work on mutable aliases, it + * doesn't know to reload. Try something + * else. + *) + mov dst (Il.Cell (alias (Il.Mem (force_to_mem src)))) + | _ -> + bug () "attempting to clone alias cell" + else + mov (deref_slot true dst dst_slot) src + | Ast.ATOM_lval src_lval -> + let (src, src_slot) = trans_lval src_lval in + trans_init_slot_from_cell clone dst dst_slot src src_slot + + and trans_init_slot_from_cell + (clone:clone_ctrl) + (dst:Il.cell) (dst_slot:Ast.slot) + (src:Il.cell) (src_slot:Ast.slot) + : unit = + assert (slot_ty src_slot = slot_ty dst_slot); + let is_alias_cell = + match dst_slot.Ast.slot_mode with + Ast.MODE_alias _ -> true + | _ -> false + in + match clone with + CLONE_chan clone_task -> + let clone = + if (type_contains_chan (slot_ty src_slot)) + then CLONE_all clone_task + else CLONE_none + in + trans_init_slot_from_cell clone dst dst_slot src src_slot + | CLONE_none -> + if is_alias_cell + then mov dst (Il.Cell (alias src)) + else + trans_copy_slot + (get_ty_params_of_current_frame()) + true dst dst_slot src src_slot None + | CLONE_all clone_task -> + if is_alias_cell + then bug () "attempting to clone alias cell" + else + clone_slot + (get_ty_params_of_current_frame()) + clone_task dst src dst_slot None + + and trans_be_fn + (cx:ctxt) + (dst_cell:Il.cell) + (flv:Ast.lval) + (ty_params:Ast.ty array) + (args:Ast.atom array) + : unit = + let (ptr, fn_ty) = trans_callee flv in + let cc = call_ctrl flv in + let call = { call_ctrl = cc; + call_callee_ptr = ptr; + call_callee_ty = fn_ty; + call_callee_ty_params = ty_params; + call_output = dst_cell; + call_args = args; + call_iterator_args = call_iterator_args None; + call_indirect_args = call_indirect_args flv cc } + in + (* FIXME: true if caller is object fn *) + let caller_is_closure = false in + log cx "trans_be_fn: %s call to lval %a" + (call_ctrl_string cc) Ast.sprintf_lval flv; + trans_be (fun () -> Ast.sprintf_lval () flv) caller_is_closure call + + and trans_prepare_fn_call + (initializing:bool) + (cx:ctxt) + (dst_cell:Il.cell) + (flv:Ast.lval) + (ty_params:Ast.ty array) + (fco:for_each_ctrl option) + (args:Ast.atom array) + : Il.operand = + let (ptr, fn_ty) = trans_callee flv in + let cc = call_ctrl flv in + let call = { call_ctrl = cc; + call_callee_ptr = ptr; + call_callee_ty = fn_ty; + call_callee_ty_params = ty_params; + call_output = dst_cell; + call_args = args; + call_iterator_args = call_iterator_args fco; + call_indirect_args = call_indirect_args flv cc } + in + iflog + begin + fun _ -> + log cx "trans_prepare_fn_call: %s call to lval %a" + (call_ctrl_string cc) Ast.sprintf_lval flv; + log cx "lval type: %a" Ast.sprintf_ty fn_ty; + Array.iteri (fun i t -> log cx "ty param %d = %a" + i Ast.sprintf_ty t) + ty_params; + end; + trans_prepare_call initializing (fun () -> Ast.sprintf_lval () flv) call + + and trans_call_pred_and_check + (constr:Ast.constr) + (flv:Ast.lval) + (args:Ast.atom array) + : unit = + let (ptr, fn_ty) = trans_callee flv in + let dst_cell = Il.Mem (force_to_mem imm_false) in + let call = { call_ctrl = call_ctrl flv; + call_callee_ptr = ptr; + call_callee_ty = fn_ty; + call_callee_ty_params = [| |]; + call_output = dst_cell; + call_args = args; + call_iterator_args = [| |]; + call_indirect_args = [| |] } + in + iflog (fun _ -> annotate "predicate call"); + let fn_ptr = + trans_prepare_call true (fun _ -> Ast.sprintf_lval () flv) call + in + call_code (code_of_operand fn_ptr); + iflog (fun _ -> annotate "predicate check/fail"); + let jmp = trans_compare Il.JE (Il.Cell dst_cell) imm_true in + let errstr = Printf.sprintf "predicate check: %a" + Ast.sprintf_constr constr + in + trans_cond_fail errstr jmp + + and trans_init_closure + (closure_cell:Il.cell) + (target_fn_ptr:Il.operand) + (target_binding_ptr:Il.operand) + (bound_arg_slots:Ast.slot array) + (bound_args:Ast.atom array) + : unit = + + let rc_cell = get_element_ptr closure_cell 0 in + let targ_cell = get_element_ptr closure_cell 1 in + let args_cell = get_element_ptr closure_cell 2 in + + iflog (fun _ -> annotate "init closure refcount"); + mov rc_cell one; + iflog (fun _ -> annotate "set closure target code ptr"); + mov (get_element_ptr targ_cell 0) (reify_ptr target_fn_ptr); + iflog (fun _ -> annotate "set closure target binding ptr"); + mov (get_element_ptr targ_cell 1) (reify_ptr target_binding_ptr); + + iflog (fun _ -> annotate "set closure bound args"); + copy_bound_args args_cell bound_arg_slots bound_args + + and trans_bind_fn + (initializing:bool) + (cc:call_ctrl) + (bind_id:node_id) + (dst:Ast.lval) + (flv:Ast.lval) + (fn_sig:Ast.ty_sig) + (args:Ast.atom option array) + : unit = + let (dst_cell, _) = trans_lval_maybe_init initializing dst in + let (target_ptr, _) = trans_callee flv in + let arg_bound_flags = Array.map bool_of_option args in + let arg_slots = + arr_map2 + (fun arg_slot bound_flag -> + if bound_flag then Some arg_slot else None) + fn_sig.Ast.sig_input_slots + arg_bound_flags + in + let bound_arg_slots = arr_filter_some arg_slots in + let bound_args = arr_filter_some args in + let glue_fixup = + get_fn_binding_glue bind_id fn_sig.Ast.sig_input_slots arg_bound_flags + in + let target_fn_ptr = callee_fn_ptr target_ptr cc in + let target_binding_ptr = callee_binding_ptr flv cc in + let closure_rty = closure_referent_type bound_arg_slots in + let closure_sz = force_sz (Il.referent_ty_size word_bits closure_rty) in + let fn_cell = get_element_ptr dst_cell Abi.binding_field_item in + let closure_cell = + ptr_cast + (get_element_ptr dst_cell Abi.binding_field_binding) + (Il.ScalarTy (Il.AddrTy (closure_rty))) + in + iflog (fun _ -> annotate "assign glue-code to fn slot of pair"); + mov fn_cell (crate_rel_imm glue_fixup); + iflog (fun _ -> + annotate "heap-allocate closure to binding slot of pair"); + trans_malloc closure_cell (imm closure_sz); + trans_init_closure + (deref closure_cell) + target_fn_ptr target_binding_ptr + bound_arg_slots bound_args + + + and trans_arg0 (arg_cell:Il.cell) (output_cell:Il.cell) : unit = + (* Emit arg0 of any call: the output slot. *) + iflog (fun _ -> annotate "fn-call arg 0: output slot"); + trans_init_slot_from_cell + CLONE_none + arg_cell (word_write_alias_slot abi) + output_cell word_slot + + and trans_arg1 (arg_cell:Il.cell) : unit = + (* Emit arg1 of any call: the task pointer. *) + iflog (fun _ -> annotate "fn-call arg 1: task pointer"); + trans_init_slot_from_cell + CLONE_none + arg_cell word_slot + abi.Abi.abi_tp_cell word_slot + + and trans_argN + (clone:clone_ctrl) + (arg_cell:Il.cell) + (arg_slot:Ast.slot) + (arg:Ast.atom) + : unit = + trans_init_slot_from_atom clone arg_cell arg_slot arg + + and code_of_cell (cell:Il.cell) : Il.code = + match cell with + Il.Mem (_, Il.ScalarTy (Il.AddrTy Il.CodeTy)) + | Il.Reg (_, Il.AddrTy Il.CodeTy) -> Il.CodePtr (Il.Cell cell) + | _ -> + bug () "expected code-pointer cell, found %s" + (cell_str cell) + + and code_of_operand (operand:Il.operand) : Il.code = + match operand with + Il.Cell c -> code_of_cell c + | Il.ImmPtr (_, Il.CodeTy) -> Il.CodePtr operand + | _ -> + bug () "expected code-pointer operand, got %s" + (oper_str operand) + + and ty_arg_slots (ty:Ast.ty) : Ast.slot array = + match ty with + Ast.TY_fn (tsig, _) -> tsig.Ast.sig_input_slots + | _ -> bug () "Trans.ty_arg_slots on non-callable type: %a" + Ast.sprintf_ty ty + + and copy_fn_args + (tail_area:bool) + (clone:clone_ctrl) + (call:call) + : unit = + + let n_ty_params = Array.length call.call_callee_ty_params in + let all_callee_args_rty = + let clo = + if call.call_ctrl = CALL_direct + then None + else (Some Il.OpaqueTy) + in + call_args_referent_type cx n_ty_params call.call_callee_ty clo + in + let all_callee_args_cell = + callee_args_cell tail_area all_callee_args_rty + in + + let _ = iflog (fun _ -> annotate + (Printf.sprintf + "copying fn args to %d-ty-param call with rty: %s\n" + n_ty_params (Il.string_of_referent_ty + all_callee_args_rty))) + in + let callee_arg_slots = ty_arg_slots call.call_callee_ty in + let callee_output_cell = + get_element_ptr all_callee_args_cell Abi.calltup_elt_out_ptr + in + let callee_task_cell = + get_element_ptr all_callee_args_cell Abi.calltup_elt_task_ptr + in + let callee_ty_params = + get_element_ptr all_callee_args_cell Abi.calltup_elt_ty_params + in + let callee_args = + get_element_ptr_dyn_in_current_frame + all_callee_args_cell Abi.calltup_elt_args + in + let callee_iterator_args = + get_element_ptr_dyn_in_current_frame + all_callee_args_cell Abi.calltup_elt_iterator_args + in + let callee_indirect_args = + get_element_ptr_dyn_in_current_frame + all_callee_args_cell Abi.calltup_elt_indirect_args + in + + let n_args = Array.length call.call_args in + let n_iterators = Array.length call.call_iterator_args in + let n_indirects = Array.length call.call_indirect_args in + + Array.iteri + begin + fun i arg_atom -> + iflog (fun _ -> + annotate + (Printf.sprintf "fn-call arg %d of %d (+ %d indirect)" + i n_args n_indirects)); + trans_argN + clone + (get_element_ptr_dyn_in_current_frame callee_args i) + callee_arg_slots.(i) + arg_atom + end + call.call_args; + + Array.iteri + begin + fun i iterator_arg_operand -> + iflog (fun _ -> + annotate (Printf.sprintf "fn-call iterator-arg %d of %d" + i n_iterators)); + mov + (get_element_ptr_dyn_in_current_frame callee_iterator_args i) + iterator_arg_operand + end + call.call_iterator_args; + + Array.iteri + begin + fun i indirect_arg_operand -> + iflog (fun _ -> + annotate (Printf.sprintf "fn-call indirect-arg %d of %d" + i n_indirects)); + mov + (get_element_ptr_dyn_in_current_frame callee_indirect_args i) + indirect_arg_operand + end + call.call_indirect_args; + + Array.iteri + begin + fun i ty_param -> + iflog (fun _ -> + annotate + (Printf.sprintf "fn-call ty param %d of %d" + i n_ty_params)); + trans_init_slot_from_cell CLONE_none + (get_element_ptr callee_ty_params i) word_slot + (get_tydesc None ty_param) word_slot + end + call.call_callee_ty_params; + + trans_arg1 callee_task_cell; + + trans_arg0 callee_output_cell call.call_output + + + + and call_code (code:Il.code) : unit = + let vr = next_vreg_cell Il.voidptr_t in + emit (Il.call vr code); + + + and copy_bound_args + (dst_cell:Il.cell) + (bound_arg_slots:Ast.slot array) + (bound_args:Ast.atom array) + : unit = + let n_slots = Array.length bound_arg_slots in + Array.iteri + begin + fun i slot -> + iflog (fun _ -> + annotate (Printf.sprintf + "copy bound arg %d of %d" i n_slots)); + trans_argN CLONE_none + (get_element_ptr dst_cell i) + slot bound_args.(i) + end + bound_arg_slots + + and merge_bound_args + (all_self_args_rty:Il.referent_ty) + (all_callee_args_rty:Il.referent_ty) + (arg_slots:Ast.slot array) + (arg_bound_flags:bool array) + : unit = + begin + (* + * NB: 'all_*_args', both self and callee, are always 4-tuples: + * + * [out_ptr, task_ptr, [args], [indirect_args]] + * + * The first few bindings here just destructure those via GEP. + * + *) + let all_self_args_cell = caller_args_cell all_self_args_rty in + let all_callee_args_cell = callee_args_cell false all_callee_args_rty in + + let self_args_cell = + get_element_ptr all_self_args_cell Abi.calltup_elt_args + in + let self_ty_params_cell = + get_element_ptr all_self_args_cell Abi.calltup_elt_ty_params + in + let callee_args_cell = + get_element_ptr all_callee_args_cell Abi.calltup_elt_args + in + let self_indirect_args_cell = + get_element_ptr all_self_args_cell Abi.calltup_elt_indirect_args + in + + let n_args = Array.length arg_bound_flags in + let bound_i = ref 0 in + let unbound_i = ref 0 in + + iflog (fun _ -> annotate "copy out-ptr"); + mov + (get_element_ptr all_callee_args_cell Abi.calltup_elt_out_ptr) + (Il.Cell (get_element_ptr all_self_args_cell + Abi.calltup_elt_out_ptr)); + + iflog (fun _ -> annotate "copy task-ptr"); + mov + (get_element_ptr all_callee_args_cell Abi.calltup_elt_task_ptr) + (Il.Cell (get_element_ptr all_self_args_cell + Abi.calltup_elt_task_ptr)); + + iflog (fun _ -> annotate "extract closure indirect-arg"); + let closure_cell = + deref (get_element_ptr self_indirect_args_cell + Abi.indirect_args_elt_closure) + in + let closure_args_cell = get_element_ptr closure_cell 2 in + + for arg_i = 0 to (n_args - 1) do + let dst_cell = get_element_ptr callee_args_cell arg_i in + let slot = arg_slots.(arg_i) in + let is_bound = arg_bound_flags.(arg_i) in + let src_cell = + if is_bound then + begin + iflog (fun _ -> annotate + (Printf.sprintf + "extract bound arg %d as actual arg %d" + !bound_i arg_i)); + get_element_ptr closure_args_cell (!bound_i); + end + else + begin + iflog (fun _ -> annotate + (Printf.sprintf + "extract unbound arg %d as actual arg %d" + !unbound_i arg_i)); + get_element_ptr self_args_cell (!unbound_i); + end + in + iflog (fun _ -> annotate + (Printf.sprintf + "copy into actual-arg %d" arg_i)); + trans_copy_slot + self_ty_params_cell + true dst_cell slot src_cell slot None; + incr (if is_bound then bound_i else unbound_i); + done; + assert ((!bound_i + !unbound_i) == n_args) + end + + + and callee_fn_ptr + (fptr:Il.operand) + (cc:call_ctrl) + : Il.operand = + match cc with + CALL_direct + | CALL_vtbl -> fptr + | CALL_indirect -> + (* fptr is a pair [disp, binding*] *) + let pair_cell = need_cell (reify_ptr fptr) in + let disp_cell = get_element_ptr pair_cell Abi.binding_field_item in + Il.Cell (crate_rel_to_ptr (Il.Cell disp_cell) Il.CodeTy) + + and callee_binding_ptr + (pair_lval:Ast.lval) + (cc:call_ctrl) + : Il.operand = + if cc = CALL_direct + then zero + else + let (pair_cell, _) = trans_lval pair_lval in + Il.Cell (get_element_ptr pair_cell Abi.binding_field_binding) + + and call_ctrl flv : call_ctrl = + if lval_is_static cx flv + then CALL_direct + else + if lval_is_obj_vtbl cx flv + then CALL_vtbl + else CALL_indirect + + and call_ctrl_string cc = + match cc with + CALL_direct -> "direct" + | CALL_indirect -> "indirect" + | CALL_vtbl -> "vtbl" + + and call_iterator_args + (fco:for_each_ctrl option) + : Il.operand array = + match fco with + None -> [| |] + | Some fco -> + begin + iflog (fun _ -> annotate "calculate iterator args"); + [| reify_ptr (code_fixup_to_ptr_operand fco.for_each_fixup); + Il.Cell (Il.Reg (abi.Abi.abi_fp_reg, Il.voidptr_t)); |] + end + + and call_indirect_args + (flv:Ast.lval) + (cc:call_ctrl) + : Il.operand array = + begin + match cc with + CALL_direct -> [| |] + | CALL_indirect -> [| callee_binding_ptr flv cc |] + | CALL_vtbl -> + begin + match flv with + (* + * FIXME: will need to pass both words of obj if we add + * a 'self' value for self-dispatch within objs. + *) + Ast.LVAL_ext (base, _) -> [| callee_binding_ptr base cc |] + | _ -> + bug (lval_base_id flv) + "call_indirect_args on obj-fn without base obj" + end + end + + and trans_be + (logname:(unit -> string)) + (caller_is_closure:bool) + (call:call) + : unit = + let callee_fptr = callee_fn_ptr call.call_callee_ptr call.call_ctrl in + let callee_code = code_of_operand callee_fptr in + let callee_args_rty = + call_args_referent_type cx 0 call.call_callee_ty + (if call.call_ctrl = CALL_direct then None else (Some Il.OpaqueTy)) + in + let callee_argsz = + force_sz (Il.referent_ty_size word_bits callee_args_rty) + in + let closure_rty = + if caller_is_closure + then Some Il.OpaqueTy + else None + in + let caller_args_rty = current_fn_args_rty closure_rty in + let caller_argsz = + force_sz (Il.referent_ty_size word_bits caller_args_rty) + in + iflog (fun _ -> annotate + (Printf.sprintf "copy args for tail call to %s" (logname ()))); + copy_fn_args true CLONE_none call; + drop_slots_at_curr_stmt(); + abi.Abi.abi_emit_fn_tail_call (emitter()) + (force_sz (current_fn_callsz())) + caller_argsz callee_code callee_argsz; + + + and trans_prepare_call + ((*initializing*)_:bool) + (logname:(unit -> string)) + (call:call) + : Il.operand = + + let callee_fptr = callee_fn_ptr call.call_callee_ptr call.call_ctrl in + iflog (fun _ -> annotate + (Printf.sprintf "copy args for call to %s" (logname ()))); + copy_fn_args false CLONE_none call; + iflog (fun _ -> annotate (Printf.sprintf "call %s" (logname ()))); + (* FIXME (issue #24): we need to actually handle writing to an + * already-initialised slot. Currently we blindly assume we're + * initializing, overwrite the slot; this is ok if we're writing + * to an interior output slot, but we'll leak any exteriors as we + * do that. *) + callee_fptr + + and callee_drop_slot + (k:Ast.slot_key) + (slot_id:node_id) + (slot:Ast.slot) + : unit = + iflog (fun _ -> + annotate (Printf.sprintf "callee_drop_slot %d = %s " + (int_of_node slot_id) + (Ast.fmt_to_str Ast.fmt_slot_key k))); + drop_slot_in_current_frame (cell_of_block_slot slot_id) slot None + + + and trans_alt_tag { Ast.alt_tag_lval = lval; Ast.alt_tag_arms = arms } = + let ((lval_cell:Il.cell), { Ast.slot_ty = ty_opt }) = trans_lval lval in + let lval_ty = + match ty_opt with + Some ty -> ty + | None -> bug cx "expected lval type" + in + + let trans_arm { node = (pat, block) } : quad_idx = + (* Translates the pattern and returns the addresses of the branch + * instructions, which are taken if the match fails. *) + let rec trans_pat pat cell (ty:Ast.ty) = + match pat with + Ast.PAT_lit lit -> + let operand = trans_lit lit in + emit (Il.cmp (Il.Cell cell) operand); + let next_jump = mark() in + emit (Il.jmp Il.JNE Il.CodeNone); + [ next_jump ] + + | Ast.PAT_tag (ident, pats) -> + let ty_tag = + match ty with + Ast.TY_tag tag_ty -> tag_ty + | Ast.TY_iso ti -> (ti.Ast.iso_group).(ti.Ast.iso_index) + | _ -> bug cx "expected tag type" + in + let tag_keys = sorted_htab_keys ty_tag in + let tag_name = Ast.NAME_base (Ast.BASE_ident ident) in + let tag_number = arr_idx tag_keys tag_name in + let ty_tup = Hashtbl.find ty_tag tag_name in + + let tag_cell:Il.cell = get_element_ptr cell 0 in + let union_cell = get_element_ptr_dyn_in_current_frame cell 1 in + + emit (Il.cmp + (Il.Cell tag_cell) + (imm (Int64.of_int tag_number))); + let next_jump = mark() in + emit (Il.jmp Il.JNE Il.CodeNone); + + let tup_cell:Il.cell = get_variant_ptr union_cell tag_number in + + let trans_elem_pat i elem_pat : quad_idx list = + let elem_cell = + get_element_ptr_dyn_in_current_frame tup_cell i + in + let elem_ty = + match ty_tup.(i).Ast.slot_ty with + Some ty -> ty + | None -> bug cx "expected element type" + in + trans_pat elem_pat elem_cell elem_ty + in + + let elem_jumps = Array.mapi trans_elem_pat pats in + next_jump::(List.concat (Array.to_list elem_jumps)) + + | Ast.PAT_slot ({ node = dst_slot; id = dst_id }, _) -> + let dst_cell = cell_of_block_slot dst_id in + let src_cell = Il.Cell cell in + mov (deref_slot true dst_cell dst_slot) src_cell; + [] (* irrefutable *) + + | Ast.PAT_wild -> [] (* irrefutable *) + in + + let next_jumps = trans_pat pat lval_cell lval_ty in + trans_block block; + let last_jump = mark() in + emit (Il.jmp Il.JMP Il.CodeNone); + List.iter patch next_jumps; + last_jump + in + let last_jumps = Array.map trans_arm arms in + Array.iter patch last_jumps + + and drop_slots_at_curr_stmt _ : unit = + let stmt = Stack.top curr_stmt in + match htab_search cx.ctxt_post_stmt_slot_drops stmt with + None -> () + | Some slots -> + List.iter + begin + fun slot_id -> + let slot = get_slot cx slot_id in + let k = Hashtbl.find cx.ctxt_slot_keys slot_id in + iflog (fun _ -> + annotate + (Printf.sprintf + "post-stmt, drop_slot %d = %s " + (int_of_node slot_id) + (Ast.fmt_to_str Ast.fmt_slot_key k))); + drop_slot_in_current_frame + (cell_of_block_slot slot_id) slot None + end + slots + + and trans_stmt (stmt:Ast.stmt) : unit = + (* Helper to localize errors by stmt, at minimum. *) + try + iflog + begin + fun _ -> + let s = Ast.fmt_to_str Ast.fmt_stmt_body stmt in + log cx "translating stmt: %s" s; + annotate s; + end; + Stack.push stmt.id curr_stmt; + trans_stmt_full stmt; + begin + match stmt.node with + Ast.STMT_be _ + | Ast.STMT_ret _ -> () + | _ -> drop_slots_at_curr_stmt(); + end; + ignore (Stack.pop curr_stmt); + with + Semant_err (None, msg) -> raise (Semant_err ((Some stmt.id), msg)) + + + and maybe_init (id:node_id) (action:string) (dst:Ast.lval) : bool = + let b = Hashtbl.mem cx.ctxt_copy_stmt_is_init id in + let act = if b then ("initializing-" ^ action) else action in + iflog + (fun _ -> + annotate (Printf.sprintf "%s on dst lval %a" + act Ast.sprintf_lval dst)); + b + + + and trans_set_outptr (at:Ast.atom) : unit = + let (dst_mem, _) = + need_mem_cell + (deref (wordptr_at (fp_imm out_mem_disp))) + in + let atom_ty = atom_type cx at in + let dst_slot = interior_slot atom_ty in + let dst_ty = referent_type abi atom_ty in + let dst_cell = Il.Mem (dst_mem, dst_ty) in + trans_init_slot_from_atom + CLONE_none dst_cell dst_slot at + + + and trans_for_loop (fo:Ast.stmt_for) : unit = + let ty_params = get_ty_params_of_current_frame () in + let (dst_slot, _) = fo.Ast.for_slot in + let dst_cell = cell_of_block_slot dst_slot.id in + let (head_stmts, seq) = fo.Ast.for_seq in + let (seq_cell, seq_slot) = trans_lval_full false seq in + let unit_slot = seq_unit_slot (slot_ty seq_slot) in + Array.iter trans_stmt head_stmts; + iter_seq_slots ty_params seq_cell seq_cell unit_slot + begin + fun _ src_cell unit_slot curr_iso -> + trans_copy_slot + ty_params true + dst_cell dst_slot.node + src_cell unit_slot curr_iso; + trans_block fo.Ast.for_body; + end + None + + and trans_for_each_loop (stmt_id:node_id) (fe:Ast.stmt_for_each) : unit = + let id = fe.Ast.for_each_body.id in + let g = GLUE_loop_body id in + let name = glue_str cx g in + let fix = new_fixup name in + let framesz = get_framesz cx id in + let callsz = get_callsz cx id in + let spill = Hashtbl.find cx.ctxt_spill_fixups id in + push_new_emitter_with_vregs (Some id); + iflog (fun _ -> annotate "prologue"); + abi.Abi.abi_emit_fn_prologue (emitter()) + framesz callsz nabi_rust (upcall_fixup "upcall_grow_task"); + write_frame_info_ptrs None; + iflog (fun _ -> annotate "finished prologue"); + trans_block fe.Ast.for_each_body; + trans_glue_frame_exit fix spill g; + + (* + * We've now emitted the body helper-fn. Next, set up a loop that + * calls the iter and passes the helper-fn in. + *) + emit (Il.Enter + (Hashtbl.find + cx.ctxt_block_fixups + fe.Ast.for_each_head.id)); + let (dst_slot, _) = fe.Ast.for_each_slot in + let dst_cell = cell_of_block_slot dst_slot.id in + let (flv, args) = fe.Ast.for_each_call in + let ty_params = + match htab_search cx.ctxt_call_lval_params (lval_base_id flv) with + Some params -> params + | None -> [| |] + in + let depth = Hashtbl.find cx.ctxt_stmt_loop_depths stmt_id in + let fc = { for_each_fixup = fix; for_each_depth = depth } in + iflog (fun _ -> + log cx "for-each at depth %d\n" depth); + let fn_ptr = + trans_prepare_fn_call true cx dst_cell flv ty_params (Some fc) args + in + call_code (code_of_operand fn_ptr); + emit Il.Leave; + + and trans_put (atom_opt:Ast.atom option) : unit = + begin + match atom_opt with + None -> () + | Some at -> trans_set_outptr at + end; + let block_fptr = Il.Cell (get_iter_block_fn_for_current_frame ()) in + let fp = get_iter_outer_frame_ptr_for_current_frame () in + let vr = next_vreg_cell Il.voidptr_t in + mov vr zero; + trans_call_glue (code_of_operand block_fptr) None [| vr; fp |] + + and trans_vec_append dst_cell dst_slot src_oper src_ty = + let (dst_elt_slot, trim_trailing_null) = + match slot_ty dst_slot with + Ast.TY_str -> (interior_slot (Ast.TY_mach TY_u8), true) + | Ast.TY_vec e -> (e, false) + | _ -> bug () "unexpected dst type in trans_vec_append" + in + match src_ty with + Ast.TY_str + | Ast.TY_vec _ -> + let src_cell = need_cell src_oper in + let src_vec = deref src_cell in + let src_fill = get_element_ptr src_vec Abi.vec_elt_fill in + let src_elt_slot = + match src_ty with + Ast.TY_str -> interior_slot (Ast.TY_mach TY_u8) + | Ast.TY_vec e -> e + | _ -> bug () "unexpected src type in trans_vec_append" + in + let dst_vec = deref dst_cell in + let dst_fill = get_element_ptr dst_vec Abi.vec_elt_fill in + if trim_trailing_null + then sub_from dst_fill (imm 1L); + trans_upcall "upcall_vec_grow" + dst_cell + [| Il.Cell dst_cell; + Il.Cell src_fill |]; + + (* + * By now, dst_cell points to a vec/str with room for us + * to add to. + *) + + (* Reload dst vec, fill; might have changed. *) + let dst_vec = deref dst_cell in + let dst_fill = get_element_ptr dst_vec Abi.vec_elt_fill in + + (* Copy loop: *) + let pty s = Il.AddrTy (slot_referent_type abi s) in + let dptr = next_vreg_cell (pty dst_elt_slot) in + let sptr = next_vreg_cell (pty src_elt_slot) in + let dlim = next_vreg_cell (pty dst_elt_slot) in + let dst_elt_sz = slot_sz_in_current_frame dst_elt_slot in + let src_elt_sz = slot_sz_in_current_frame src_elt_slot in + let dst_data = + get_element_ptr_dyn_in_current_frame + dst_vec Abi.vec_elt_data + in + let src_data = + get_element_ptr_dyn_in_current_frame + src_vec Abi.vec_elt_data + in + lea dptr (fst (need_mem_cell dst_data)); + lea sptr (fst (need_mem_cell src_data)); + add_to dptr (Il.Cell dst_fill); + mov dlim (Il.Cell dptr); + add_to dlim (Il.Cell src_fill); + let fwd_jmp = mark () in + emit (Il.jmp Il.JMP Il.CodeNone); + let back_jmp_targ = mark () in + (* copy slot *) + trans_copy_slot + (get_ty_params_of_current_frame()) true + (deref dptr) dst_elt_slot + (deref sptr) src_elt_slot + None; + add_to dptr dst_elt_sz; + add_to sptr src_elt_sz; + patch fwd_jmp; + check_interrupt_flag (); + let back_jmp = + trans_compare Il.JB (Il.Cell dptr) (Il.Cell dlim) in + List.iter + (fun j -> patch_existing j back_jmp_targ) back_jmp; + let v = next_vreg_cell word_ty in + mov v (Il.Cell src_fill); + add_to dst_fill (Il.Cell v); + | t -> + begin + bug () "unsupported vector-append type %a" Ast.sprintf_ty t + end + + + and trans_copy_binop dst binop a_src = + let (dst_cell, dst_slot) = trans_lval_maybe_init false dst in + let src_oper = trans_atom a_src in + match slot_ty dst_slot with + Ast.TY_str + | Ast.TY_vec _ when binop = Ast.BINOP_add -> + trans_vec_append dst_cell dst_slot src_oper (atom_type cx a_src) + | _ -> + let dst_cell = deref_slot false dst_cell dst_slot in + let op = trans_binop binop in + emit (Il.binary op dst_cell (Il.Cell dst_cell) src_oper); + + + + and trans_stmt_full (stmt:Ast.stmt) : unit = + match stmt.node with + + Ast.STMT_log a -> + begin + match atom_type cx a with + (* NB: If you extend this, be sure to update the + * typechecking code in type.ml as well. *) + Ast.TY_str -> trans_log_str a + | Ast.TY_int | Ast.TY_uint | Ast.TY_bool + | Ast.TY_char | Ast.TY_mach (TY_u8) + | Ast.TY_mach (TY_u16) | Ast.TY_mach (TY_u32) + | Ast.TY_mach (TY_i8) | Ast.TY_mach (TY_i16) + | Ast.TY_mach (TY_i32) -> + trans_log_int a + | _ -> bugi cx stmt.id "unimplemented logging type" + end + + | Ast.STMT_check_expr e -> + begin + match expr_type cx e with + Ast.TY_bool -> trans_check_expr e + | _ -> bugi cx stmt.id "check expr on non-bool" + end + + | Ast.STMT_yield -> + trans_yield () + + | Ast.STMT_fail -> + trans_fail () + + | Ast.STMT_join task -> + trans_join task + + | Ast.STMT_send (chan,src) -> + trans_send chan src + + | Ast.STMT_spawn (dst, domain, plv, args) -> + trans_spawn (maybe_init stmt.id "spawn" dst) dst domain plv args + + | Ast.STMT_recv (dst, chan) -> + trans_recv (maybe_init stmt.id "recv" dst) dst chan + + | Ast.STMT_copy (dst, e_src) -> + trans_copy (maybe_init stmt.id "copy" dst) dst e_src + + | Ast.STMT_copy_binop (dst, binop, a_src) -> + trans_copy_binop dst binop a_src + + | Ast.STMT_call (dst, flv, args) -> + begin + let init = maybe_init stmt.id "call" dst in + let ty = lval_ty cx flv in + let ty_params = + match + htab_search + cx.ctxt_call_lval_params (lval_base_id flv) + with + Some params -> params + | None -> [| |] + in + match ty with + Ast.TY_fn _ -> + let (dst_cell, _) = trans_lval_maybe_init init dst in + let fn_ptr = + trans_prepare_fn_call init cx dst_cell flv + ty_params None args + in + call_code (code_of_operand fn_ptr) + | _ -> bug () "Calling unexpected lval." + end + + | Ast.STMT_bind (dst, flv, args) -> + begin + let init = maybe_init stmt.id "bind" dst in + match lval_ty cx flv with + Ast.TY_fn (tsig, _) -> + trans_bind_fn + init (call_ctrl flv) stmt.id dst flv tsig args + | _ -> bug () "Binding unexpected lval." + end + + | Ast.STMT_init_rec (dst, atab, base) -> + let (slot_cell, slot) = trans_lval_init dst in + let (trec, dst_slots) = + match slot_ty slot with + Ast.TY_rec trec -> (trec, Array.map snd trec) + | _ -> + bugi cx stmt.id + "non-rec destination type in stmt_init_rec" + in + let dst_cell = deref_slot true slot_cell slot in + begin + match base with + None -> + let atoms = + Array.map (fun (_, _, _, atom) -> atom) atab + in + trans_init_structural_from_atoms + dst_cell dst_slots atoms + | Some base_lval -> + trans_init_rec_update + dst_cell dst_slots trec atab base_lval + end + + | Ast.STMT_init_tup (dst, mode_atoms) -> + let (slot_cell, slot) = trans_lval_init dst in + let dst_slots = + match slot_ty slot with + Ast.TY_tup ttup -> ttup + | _ -> + bugi cx stmt.id + "non-tup destination type in stmt_init_tup" + in + let atoms = Array.map (fun (_, _, atom) -> atom) mode_atoms in + let dst_cell = deref_slot true slot_cell slot in + trans_init_structural_from_atoms dst_cell dst_slots atoms + + + | Ast.STMT_init_str (dst, s) -> + trans_init_str dst s + + | Ast.STMT_init_vec (dst, _, atoms) -> + trans_init_vec dst atoms + + | Ast.STMT_init_port dst -> + trans_init_port dst + + | Ast.STMT_init_chan (dst, port) -> + begin + match port with + None -> + let (dst_cell, _) = + trans_lval_init dst + in + mov dst_cell imm_false + | Some p -> + trans_init_chan dst p + end + + | Ast.STMT_block block -> + trans_block block + + | Ast.STMT_while sw -> + let (head_stmts, head_expr) = sw.Ast.while_lval in + let fwd_jmp = mark () in + emit (Il.jmp Il.JMP Il.CodeNone); + let block_begin = mark () in + trans_block sw.Ast.while_body; + patch fwd_jmp; + Array.iter trans_stmt head_stmts; + check_interrupt_flag (); + let back_jmps = trans_cond false head_expr in + List.iter (fun j -> patch_existing j block_begin) back_jmps; + + | Ast.STMT_if si -> + let skip_thn_jmps = trans_cond true si.Ast.if_test in + trans_block si.Ast.if_then; + begin + match si.Ast.if_else with + None -> List.iter patch skip_thn_jmps + | Some els -> + let skip_els_jmp = mark () in + begin + emit (Il.jmp Il.JMP Il.CodeNone); + List.iter patch skip_thn_jmps; + trans_block els; + patch skip_els_jmp + end + end + + | Ast.STMT_check (preds, calls) -> + Array.iteri + (fun i (fn, args) -> trans_call_pred_and_check preds.(i) fn args) + calls + + | Ast.STMT_ret atom_opt -> + begin + match atom_opt with + None -> () + | Some at -> trans_set_outptr at + end; + drop_slots_at_curr_stmt(); + Stack.push (mark()) (Stack.top epilogue_jumps); + emit (Il.jmp Il.JMP Il.CodeNone) + + | Ast.STMT_be (flv, args) -> + let ty = lval_ty cx flv in + let ty_params = + match htab_search cx.ctxt_call_lval_params (lval_base_id flv) with + Some params -> params + | None -> [| |] + in + begin + match ty with + Ast.TY_fn (tsig, _) -> + let result_ty = slot_ty tsig.Ast.sig_output_slot in + let (dst_mem, _) = + need_mem_cell + (deref (wordptr_at (fp_imm out_mem_disp))) + in + let dst_rty = referent_type abi result_ty in + let dst_cell = Il.Mem (dst_mem, dst_rty) in + trans_be_fn cx dst_cell flv ty_params args + + | _ -> bug () "Calling unexpected lval." + end + + | Ast.STMT_put atom_opt -> + trans_put atom_opt + + | Ast.STMT_alt_tag stmt_alt_tag -> trans_alt_tag stmt_alt_tag + + | Ast.STMT_decl _ -> () + + | Ast.STMT_for fo -> + trans_for_loop fo + + | Ast.STMT_for_each fe -> + trans_for_each_loop stmt.id fe + + | _ -> bugi cx stmt.id "unhandled form of statement in trans_stmt %a" + Ast.sprintf_stmt stmt + + and capture_emitted_quads (fix:fixup) (node:node_id) : unit = + let e = emitter() in + let n_vregs = Il.num_vregs e in + let quads = emitted_quads e in + let name = path_name () in + let f = + if Stack.is_empty curr_file + then bugi cx node "missing file scope when capturing quads." + else Stack.top curr_file + in + let item_code = Hashtbl.find cx.ctxt_file_code f in + begin + iflog (fun _ -> + log cx "capturing quads for item #%d" (int_of_node node); + annotate_quads name); + let vr_s = + match htab_search cx.ctxt_spill_fixups node with + None -> (assert (n_vregs = 0); None) + | Some spill -> Some (n_vregs, spill) + in + let code = { code_fixup = fix; + code_quads = quads; + code_vregs_and_spill = vr_s; } + in + htab_put item_code node code; + htab_put cx.ctxt_all_item_code node code + end + + and get_frame_glue_fns (fnid:node_id) : Il.operand = + let n_ty_params = n_item_ty_params cx fnid in + let get_frame_glue glue inner = + get_mem_glue glue + begin + fun mem -> + iter_frame_and_arg_slots cx fnid + begin + fun key slot_id slot -> + match htab_search cx.ctxt_slot_offsets slot_id with + Some off when not (slot_is_obj_state cx slot_id) -> + let referent_type = slot_id_referent_type slot_id in + let fp_cell = rty_ptr_at mem referent_type in + let (fp, st) = force_to_reg (Il.Cell fp_cell) in + let ty_params = + get_ty_params_of_frame fp n_ty_params + in + let slot_cell = + deref_off_sz ty_params (Il.Reg (fp,st)) off + in + inner key slot_id ty_params slot slot_cell + | _ -> () + end + end + in + trans_crate_rel_data_operand + (DATA_frame_glue_fns fnid) + begin + fun _ -> + let mark_frame_glue_fixup = + get_frame_glue (GLUE_mark_frame fnid) + begin + fun _ _ ty_params slot slot_cell -> + mark_slot ty_params slot_cell slot None + end + in + let drop_frame_glue_fixup = + get_frame_glue (GLUE_drop_frame fnid) + begin + fun _ _ ty_params slot slot_cell -> + drop_slot ty_params slot_cell slot None + end + in + let reloc_frame_glue_fixup = + get_frame_glue (GLUE_reloc_frame fnid) + begin + fun _ _ _ _ _ -> + () + end + in + table_of_crate_rel_fixups + [| + (* + * NB: this must match the struct-offsets given in ABI + * & rust runtime library. + *) + mark_frame_glue_fixup; + drop_frame_glue_fixup; + reloc_frame_glue_fixup; + |] + end + in + + let trans_frame_entry (fnid:node_id) : unit = + let framesz = get_framesz cx fnid in + let callsz = get_callsz cx fnid in + Stack.push (Stack.create()) epilogue_jumps; + push_new_emitter_with_vregs (Some fnid); + iflog (fun _ -> annotate "prologue"); + iflog (fun _ -> annotate (Printf.sprintf + "framesz %s" + (string_of_size framesz))); + iflog (fun _ -> annotate (Printf.sprintf + "callsz %s" + (string_of_size callsz))); + abi.Abi.abi_emit_fn_prologue + (emitter()) framesz callsz nabi_rust + (upcall_fixup "upcall_grow_task"); + + write_frame_info_ptrs (Some fnid); + check_interrupt_flag (); + iflog (fun _ -> annotate "finished prologue"); + in + + let trans_frame_exit (fnid:node_id) (drop_args:bool) : unit = + Stack.iter patch (Stack.pop epilogue_jumps); + if drop_args + then + begin + iflog (fun _ -> annotate "drop args"); + iter_arg_slots cx fnid callee_drop_slot; + end; + iflog (fun _ -> annotate "epilogue"); + abi.Abi.abi_emit_fn_epilogue (emitter()); + capture_emitted_quads (get_fn_fixup cx fnid) fnid; + pop_emitter () + in + + let trans_fn + (fnid:node_id) + (body:Ast.block) + : unit = + trans_frame_entry fnid; + trans_block body; + trans_frame_exit fnid true; + in + + let trans_obj_ctor + (obj_id:node_id) + (state:Ast.header_slots) + : unit = + trans_frame_entry obj_id; + + let all_args_rty = current_fn_args_rty None in + let all_args_cell = caller_args_cell all_args_rty in + let frame_args = + get_element_ptr_dyn_in_current_frame + all_args_cell Abi.calltup_elt_args + in + let frame_ty_params = + get_element_ptr_dyn_in_current_frame + all_args_cell Abi.calltup_elt_ty_params + in + + let obj_args_tup = Array.map (fun (sloti,_) -> sloti.node) state in + let obj_args_slot = interior_slot (Ast.TY_tup obj_args_tup) in + let state_ty = + Ast.TY_tup [| interior_slot Ast.TY_type; + obj_args_slot |] + in + let state_rty = slot_referent_type abi (interior_slot state_ty) in + let state_ptr_slot = exterior_slot state_ty in + let state_ptr_rty = slot_referent_type abi state_ptr_slot in + let state_malloc_sz = + calculate_sz_in_current_frame + (SIZE_rt_add + ((SIZE_fixed (word_n Abi.exterior_rc_header_size)), + (Il.referent_ty_size word_bits state_rty))) + in + + let ctor_ty = Hashtbl.find cx.ctxt_all_item_types obj_id in + let obj_ty = + match ctor_ty with + Ast.TY_fn (tsig, _) -> slot_ty tsig.Ast.sig_output_slot + | _ -> bug () "object constructor doesn't have function type" + in + let vtbl_ptr = get_obj_vtbl obj_id in + let _ = + iflog (fun _ -> annotate "calculate vtbl-ptr from displacement") + in + let vtbl_cell = crate_rel_to_ptr vtbl_ptr Il.CodeTy in + + let _ = iflog (fun _ -> annotate "load destination obj pair ptr") in + let dst_pair_cell = deref (ptr_at (fp_imm out_mem_disp) obj_ty) in + let dst_pair_item_cell = + get_element_ptr dst_pair_cell Abi.binding_field_item + in + let dst_pair_state_cell = + get_element_ptr dst_pair_cell Abi.binding_field_binding + in + + (* Load first cell of pair with vtbl ptr.*) + iflog (fun _ -> annotate "mov vtbl-ptr to obj.item cell"); + mov dst_pair_item_cell (Il.Cell vtbl_cell); + + (* Load second cell of pair with pointer to fresh state tuple.*) + iflog (fun _ -> annotate "malloc state-tuple to obj.state cell"); + trans_malloc dst_pair_state_cell state_malloc_sz; + + (* Copy args into the state tuple. *) + let state_ptr = next_vreg_cell (need_scalar_ty state_ptr_rty) in + iflog (fun _ -> annotate "load obj.state ptr to vreg"); + mov state_ptr (Il.Cell dst_pair_state_cell); + let state = deref state_ptr in + let refcnt = get_element_ptr_dyn_in_current_frame state 0 in + let body = get_element_ptr_dyn_in_current_frame state 1 in + let obj_tydesc = get_element_ptr_dyn_in_current_frame body 0 in + let obj_args = get_element_ptr_dyn_in_current_frame body 1 in + iflog (fun _ -> annotate "write refcnt=1 to obj state"); + mov refcnt one; + iflog (fun _ -> annotate "get args-tup tydesc"); + mov obj_tydesc + (Il.Cell (get_tydesc + (Some obj_id) + (Ast.TY_tup obj_args_tup))); + iflog (fun _ -> annotate "copy ctor args to obj args"); + trans_copy_tup + frame_ty_params true + obj_args frame_args obj_args_tup; + (* We have to do something curious here: we can't drop the + * arg slots directly as in the normal frame-exit sequence, + * because the arg slot ids are actually given layout + * positions inside the object state, and are at different + * offsets within that state than within the current + * frame. So we manually drop the argument tuple here, + * without mentioning the arg slot ids. + *) + drop_slot frame_ty_params frame_args obj_args_slot None; + trans_frame_exit obj_id false; + in + + let string_of_name_component (nc:Ast.name_component) : string = + match nc with + Ast.COMP_ident i -> i + | _ -> bug () + "Trans.string_of_name_component on non-COMP_ident" + in + + + let trans_static_name_components + (ncs:Ast.name_component list) + : Il.operand = + let f nc = + trans_crate_rel_static_string_frag (string_of_name_component nc) + in + trans_crate_rel_data_operand + (DATA_name (Walk.name_of ncs)) + (fun _ -> Asm.SEQ (Array.append + (Array.map f (Array.of_list ncs)) + [| Asm.WORD (word_ty_mach, Asm.IMM 0L) |])) + in + + let trans_required_fn (fnid:node_id) (blockid:node_id) : unit = + trans_frame_entry fnid; + emit (Il.Enter (Hashtbl.find cx.ctxt_block_fixups blockid)); + let (ilib, conv) = Hashtbl.find cx.ctxt_required_items fnid in + let lib_num = + htab_search_or_add cx.ctxt_required_lib_num ilib + (fun _ -> Hashtbl.length cx.ctxt_required_lib_num) + in + let f = next_vreg_cell (Il.AddrTy (Il.CodeTy)) in + let n_ty_params = n_item_ty_params cx fnid in + let args_rty = direct_call_args_referent_type cx fnid in + let caller_args_cell = caller_args_cell args_rty in + begin + match ilib with + REQUIRED_LIB_rust ls -> + begin + let c_sym_num = + htab_search_or_add cx.ctxt_required_c_sym_num + (ilib, "rust_crate") + (fun _ -> Hashtbl.length cx.ctxt_required_c_sym_num) + in + let rust_sym_num = + htab_search_or_add cx.ctxt_required_rust_sym_num fnid + (fun _ -> Hashtbl.length cx.ctxt_required_rust_sym_num) + in + let path_elts = stk_elts_from_bot path in + let _ = + assert (ls.required_prefix < (List.length path_elts)) + in + let relative_path_elts = + list_drop ls.required_prefix path_elts + in + let libstr = trans_static_string ls.required_libname in + let relpath = + trans_static_name_components relative_path_elts + in + trans_upcall "upcall_require_rust_sym" f + [| Il.Cell (curr_crate_ptr()); + imm (Int64.of_int lib_num); + imm (Int64.of_int c_sym_num); + imm (Int64.of_int rust_sym_num); + libstr; + relpath |]; + + trans_copy_forward_args args_rty; + + call_code (code_of_operand (Il.Cell f)); + end + + | REQUIRED_LIB_c ls -> + begin + let c_sym_str = + match htab_search cx.ctxt_required_syms fnid with + Some s -> s + | None -> + string_of_name_component (Stack.top path) + in + let c_sym_num = + (* FIXME: permit remapping symbol names to handle + * mangled variants. + *) + htab_search_or_add cx.ctxt_required_c_sym_num + (ilib, c_sym_str) + (fun _ -> Hashtbl.length cx.ctxt_required_c_sym_num) + in + let libstr = trans_static_string ls.required_libname in + let symstr = trans_static_string c_sym_str in + let check_rty_sz rty = + let sz = force_sz (Il.referent_ty_size word_bits rty) in + if sz = 0L || sz = word_sz + then () + else bug () "bad arg or ret cell size for native require" + in + let out = + get_element_ptr caller_args_cell Abi.calltup_elt_out_ptr + in + let _ = check_rty_sz (pointee_type out) in + let args = + let ty_params_cell = + get_element_ptr caller_args_cell Abi.calltup_elt_ty_params + in + let args_cell = + get_element_ptr caller_args_cell Abi.calltup_elt_args + in + let n_args = + match args_cell with + Il.Mem (_, Il.StructTy elts) -> Array.length elts + | _ -> bug () "non-StructTy in Trans.trans_required_fn" + in + let mk_ty_param i = + Il.Cell (get_element_ptr ty_params_cell i) + in + let mk_arg i = + let arg = get_element_ptr args_cell i in + let _ = check_rty_sz (Il.cell_referent_ty arg) in + Il.Cell arg + in + Array.append + (Array.init n_ty_params mk_ty_param) + (Array.init n_args mk_arg) + in + let nabi = { nabi_convention = conv; + nabi_indirect = true } + in + if conv <> CONV_rust + then assert (n_ty_params = 0); + trans_upcall "upcall_require_c_sym" f + [| Il.Cell (curr_crate_ptr()); + imm (Int64.of_int lib_num); + imm (Int64.of_int c_sym_num); + libstr; + symstr |]; + + abi.Abi.abi_emit_native_call_in_thunk (emitter()) + out nabi (Il.Cell f) args; + end + + | _ -> bug () + "Trans.required_rust_fn on unexpected form of require library" + end; + emit Il.Leave; + match ilib with + REQUIRED_LIB_rust _ -> + trans_frame_exit fnid false; + | REQUIRED_LIB_c _ -> + trans_frame_exit fnid true; + | _ -> bug () + "Trans.required_rust_fn on unexpected form of require library" + in + + let trans_tag + (n:Ast.ident) + (tagid:node_id) + (tag:(Ast.header_tup * Ast.ty_tag * node_id)) + : unit = + trans_frame_entry tagid; + trace_str cx.ctxt_sess.Session.sess_trace_tag + ("in tag constructor " ^ n); + let (header_tup, _, _) = tag in + let ctor_ty = Hashtbl.find cx.ctxt_all_item_types tagid in + let ttag = + match ctor_ty with + Ast.TY_fn (tsig, _) -> + begin + match slot_ty tsig.Ast.sig_output_slot with + Ast.TY_tag ttag -> ttag + | Ast.TY_iso tiso -> get_iso_tag tiso + | _ -> bugi cx tagid "unexpected fn type for tag constructor" + end + | _ -> bugi cx tagid "unexpected type for tag constructor" + in + let slots = + Array.map (fun sloti -> referent_to_slot cx sloti.id) header_tup + in + let tag_keys = sorted_htab_keys ttag in + let i = arr_idx tag_keys (Ast.NAME_base (Ast.BASE_ident n)) in + let _ = log cx "tag variant: %s -> tag value #%d" n i in + let out_cell = deref (ptr_at (fp_imm out_mem_disp) (Ast.TY_tag ttag)) in + let tag_cell = get_element_ptr out_cell 0 in + let union_cell = get_element_ptr_dyn_in_current_frame out_cell 1 in + let dst = get_variant_ptr union_cell i in + let dst_ty = snd (need_mem_cell dst) in + let src = get_explicit_args_for_current_frame () in + (* A clever compiler will inline this. We are not clever. *) + iflog (fun _ -> annotate (Printf.sprintf "write tag #%d" i)); + mov tag_cell (imm (Int64.of_int i)); + iflog (fun _ -> annotate ("copy tag-content tuple: dst_ty=" ^ + (Il.string_of_referent_ty dst_ty))); + trans_copy_tup (get_ty_params_of_current_frame()) true dst src slots; + trace_str cx.ctxt_sess.Session.sess_trace_tag + ("finished tag constructor " ^ n); + trans_frame_exit tagid true; + in + + let enter_file_for id = + if Hashtbl.mem cx.ctxt_item_files id + then Stack.push id curr_file + in + + let leave_file_for id = + if Hashtbl.mem cx.ctxt_item_files id + then + if Stack.is_empty curr_file + then bugi cx id "Missing source file on file-scope exit." + else ignore (Stack.pop curr_file) + in + + let visit_local_mod_item_pre n _ i = + iflog (fun _ -> log cx "translating local item #%d = %s" + (int_of_node i.id) (path_name())); + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn f -> trans_fn i.id f.Ast.fn_body + | Ast.MOD_ITEM_tag t -> trans_tag n i.id t + | Ast.MOD_ITEM_obj ob -> + trans_obj_ctor i.id + (Array.map (fun (sloti,ident) -> + ({sloti with node = get_slot cx sloti.id},ident)) + ob.Ast.obj_state) + | _ -> () + in + + let visit_required_mod_item_pre _ _ i = + iflog (fun _ -> log cx "translating required item #%d = %s" + (int_of_node i.id) (path_name())); + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn f -> trans_required_fn i.id f.Ast.fn_body.id + | Ast.MOD_ITEM_mod _ -> () + | Ast.MOD_ITEM_type _ -> () + | _ -> bugi cx i.id "unsupported type of require: %s" (path_name()) + in + + let visit_obj_drop_pre obj b = + let g = GLUE_obj_drop obj.id in + let fix = + match htab_search cx.ctxt_glue_code g with + Some code -> code.code_fixup + | None -> bug () "visit_obj_drop_pre without assigned fixup" + in + let framesz = get_framesz cx b.id in + let callsz = get_callsz cx b.id in + let spill = Hashtbl.find cx.ctxt_spill_fixups b.id in + push_new_emitter_with_vregs (Some b.id); + iflog (fun _ -> annotate "prologue"); + abi.Abi.abi_emit_fn_prologue (emitter()) + framesz callsz nabi_rust (upcall_fixup "upcall_grow_task"); + write_frame_info_ptrs None; + iflog (fun _ -> annotate "finished prologue"); + trans_block b; + Hashtbl.remove cx.ctxt_glue_code g; + trans_glue_frame_exit fix spill g; + inner.Walk.visit_obj_drop_pre obj b + in + + let visit_local_obj_fn_pre _ _ fn = + trans_fn fn.id fn.node.Ast.fn_body + in + + let visit_required_obj_fn_pre _ _ _ = + () + in + + let visit_obj_fn_pre obj ident fn = + enter_file_for fn.id; + begin + if Hashtbl.mem cx.ctxt_required_items fn.id + then + visit_required_obj_fn_pre obj ident fn + else + visit_local_obj_fn_pre obj ident fn; + end; + inner.Walk.visit_obj_fn_pre obj ident fn + in + + let visit_mod_item_pre n p i = + enter_file_for i.id; + begin + if Hashtbl.mem cx.ctxt_required_items i.id + then + visit_required_mod_item_pre n p i + else + visit_local_mod_item_pre n p i + end; + inner.Walk.visit_mod_item_pre n p i + in + + let visit_mod_item_post n p i = + inner.Walk.visit_mod_item_post n p i; + leave_file_for i.id + in + + let visit_obj_fn_post obj ident fn = + inner.Walk.visit_obj_fn_post obj ident fn; + leave_file_for fn.id + in + + let visit_crate_pre crate = + enter_file_for crate.id; + inner.Walk.visit_crate_pre crate + in + + let visit_crate_post crate = + + inner.Walk.visit_crate_post crate; + + let emit_aux_global_glue cx glue fix fn = + let glue_name = glue_str cx glue in + push_new_emitter_without_vregs None; + let e = emitter() in + fn e; + iflog (fun _ -> annotate_quads glue_name); + if (Il.num_vregs e) != 0 + then bug () "%s uses nonzero vregs" glue_name; + pop_emitter(); + let code = + { code_fixup = fix; + code_quads = emitted_quads e; + code_vregs_and_spill = None; } + in + htab_put cx.ctxt_glue_code glue code + in + + let tab_sz htab = + Asm.WORD (word_ty_mach, Asm.IMM (Int64.of_int (Hashtbl.length htab))) + in + + let crate_data = + (cx.ctxt_crate_fixup, + Asm.DEF + (cx.ctxt_crate_fixup, + Asm.SEQ [| + (* + * NB: this must match the rust_crate structure + * in the rust runtime library. + *) + crate_rel_word cx.ctxt_image_base_fixup; + Asm.WORD (word_ty_mach, Asm.M_POS cx.ctxt_crate_fixup); + + crate_rel_word cx.ctxt_debug_abbrev_fixup; + Asm.WORD (word_ty_mach, Asm.M_SZ cx.ctxt_debug_abbrev_fixup); + + crate_rel_word cx.ctxt_debug_info_fixup; + Asm.WORD (word_ty_mach, Asm.M_SZ cx.ctxt_debug_info_fixup); + + crate_rel_word cx.ctxt_activate_fixup; + crate_rel_word cx.ctxt_exit_task_fixup; + crate_rel_word cx.ctxt_unwind_fixup; + crate_rel_word cx.ctxt_yield_fixup; + + tab_sz cx.ctxt_required_rust_sym_num; + tab_sz cx.ctxt_required_c_sym_num; + tab_sz cx.ctxt_required_lib_num; + |])) + in + + (* Emit additional glue we didn't do elsewhere. *) + emit_aux_global_glue cx GLUE_activate + cx.ctxt_activate_fixup + abi.Abi.abi_activate; + + emit_aux_global_glue cx GLUE_yield + cx.ctxt_yield_fixup + abi.Abi.abi_yield; + + emit_aux_global_glue cx GLUE_unwind + cx.ctxt_unwind_fixup + (fun e -> abi.Abi.abi_unwind + e nabi_rust (upcall_fixup "upcall_exit")); + + ignore (get_exit_task_glue ()); + + begin + match abi.Abi.abi_get_next_pc_thunk with + None -> () + | Some (_, fix, fn) -> + emit_aux_global_glue cx GLUE_get_next_pc fix fn + end; + + htab_put cx.ctxt_data + DATA_crate crate_data; + + provide_existing_native cx SEG_data "rust_crate" cx.ctxt_crate_fixup; + + leave_file_for crate.id + in + + { inner with + Walk.visit_crate_pre = visit_crate_pre; + Walk.visit_crate_post = visit_crate_post; + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_mod_item_post = visit_mod_item_post; + Walk.visit_obj_fn_pre = visit_obj_fn_pre; + Walk.visit_obj_fn_post = visit_obj_fn_post; + Walk.visit_obj_drop_pre = visit_obj_drop_pre; + } +;; + + +let fixup_assigning_visitor + (cx:ctxt) + (path:Ast.name_component Stack.t) + (inner:Walk.visitor) + : Walk.visitor = + + let path_name (_:unit) : string = + Ast.fmt_to_str Ast.fmt_name (Walk.path_to_name path) + in + + let enter_file_for id = + if Hashtbl.mem cx.ctxt_item_files id + then + begin + let name = + if Stack.is_empty path + then "crate root" + else path_name() + in + htab_put cx.ctxt_file_fixups id (new_fixup name); + if not (Hashtbl.mem cx.ctxt_file_code id) + then htab_put cx.ctxt_file_code id (Hashtbl.create 0); + end + in + + let visit_mod_item_pre n p i = + enter_file_for i.id; + begin + match i.node.Ast.decl_item with + + Ast.MOD_ITEM_tag _ -> + htab_put cx.ctxt_fn_fixups i.id + (new_fixup (path_name())); + + | Ast.MOD_ITEM_fn _ -> + begin + let path = path_name () in + let fixup = + if (not cx.ctxt_sess.Session.sess_library_mode) + && (Some path) = cx.ctxt_main_name + then + match cx.ctxt_main_fn_fixup with + None -> bug () "missing main fixup in trans" + | Some fix -> fix + else + new_fixup path + in + htab_put cx.ctxt_fn_fixups i.id fixup; + end + + | Ast.MOD_ITEM_obj _ -> + htab_put cx.ctxt_fn_fixups i.id + (new_fixup (path_name())); + + | _ -> () + end; + inner.Walk.visit_mod_item_pre n p i + in + + let visit_obj_fn_pre obj ident fn = + htab_put cx.ctxt_fn_fixups fn.id + (new_fixup (path_name())); + inner.Walk.visit_obj_fn_pre obj ident fn + in + + let visit_obj_drop_pre obj b = + let g = GLUE_obj_drop obj.id in + let fix = new_fixup (path_name()) in + let tmp_code = { code_fixup = fix; + code_quads = [| |]; + code_vregs_and_spill = None; } in + htab_put cx.ctxt_glue_code g tmp_code; + inner.Walk.visit_obj_drop_pre obj b + in + + let visit_block_pre b = + htab_put cx.ctxt_block_fixups b.id (new_fixup "lexical block"); + inner.Walk.visit_block_pre b + in + + let visit_crate_pre c = + enter_file_for c.id; + inner.Walk.visit_crate_pre c + in + + { inner with + Walk.visit_crate_pre = visit_crate_pre; + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_obj_fn_pre = visit_obj_fn_pre; + Walk.visit_obj_drop_pre = visit_obj_drop_pre; + Walk.visit_block_pre = visit_block_pre; } + + +let process_crate + (cx:ctxt) + (crate:Ast.crate) + : unit = + let path = Stack.create () in + let passes = + [| + (fixup_assigning_visitor cx path + Walk.empty_visitor); + (Walk.mod_item_logging_visitor + (log cx "translation pass: %s") + path + (trans_visitor cx path + Walk.empty_visitor)) + |]; + in + log cx "translating crate"; + begin + match cx.ctxt_main_name with + None -> () + | Some m -> log cx "with main fn %s" m + end; + run_passes cx "trans" path passes (log cx "%s") crate; +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/me/transutil.ml b/src/boot/me/transutil.ml new file mode 100644 index 00000000..c430e034 --- /dev/null +++ b/src/boot/me/transutil.ml @@ -0,0 +1,238 @@ +open Common;; +open Semant;; + +(* A note on GC: + * + * We employ -- or "will employ" when the last few pieces of it are done -- a + * "simple" precise, mark-sweep, single-generation, per-task (thereby + * preemptable and relatively quick) GC scheme on mutable memory. + * + * - For the sake of this note, call any exterior of 'state' effect a gc_val. + * + * - gc_vals come from the same malloc as all other values but undergo + * different storage management. + * + * - Every frame has a frame_glue_fns pointer in its fp[-1] slot, written on + * function-entry. + * + * - gc_vals have *three* extra words at their head, not one. + * + * - A pointer to a gc_val, however, points to the third of these three + * words. So a certain quantity of code can treat gc_vals the same way it + * would treat refcounted exterior vals. + * + * - The first word at the head of a gc_val is used as a refcount, as in + * non-gc allocations. + * + * - The (-1)st word at the head of a gc_val is a pointer to a tydesc, + * with the low bit of that pointer used as a mark bit. + * + * - The (-2)nd word at the head of a gc_val is a linked-list pointer to the + * gc_val that was allocated (temporally) just before it. Following this + * list traces through all the currently active gc_vals in a task. + * + * - The task has a gc_alloc_chain field that points to the most-recent + * gc_val allocated. + * + * - GC glue has two phases, mark and sweep: + * + * - The mark phase walks down the frame chain, like the unwinder. It calls + * each frame's mark glue as it's passing through. This will mark all the + * reachable parts of the task's gc_vals. + * + * - The sweep phase walks down the task's gc_alloc_chain checking to see + * if each allocation has been marked. If marked, it has its mark-bit + * reset and the sweep passes it by. If unmarked, it has its tydesc + * free_glue called on its body, and is unlinked from the chain. The + * free-glue will cause the allocation to (recursively) drop all of its + * references and/or run dtors. + * + * - Note that there is no "special gc state" at work here; the task looks + * like it's running normal code that happens to not perform any gc_val + * allocation. Mark-bit twiddling is open-coded into all the mark + * functions, which know their contents; we only have to do O(frames) + * indirect calls to mark, the rest are static. Sweeping costs O(gc-heap) + * indirect calls, unfortunately, because the set of sweep functions to + * call is arbitrary based on allocation order. + *) + + +type mem_ctrl = + MEM_rc_opaque + | MEM_rc_struct + | MEM_gc + | MEM_interior +;; + +type clone_ctrl = + CLONE_none + | CLONE_chan of Il.cell + | CLONE_all of Il.cell +;; + +type call_ctrl = + CALL_direct + | CALL_vtbl + | CALL_indirect +;; + +type for_each_ctrl = + { + for_each_fixup: fixup; + for_each_depth: int; + } +;; + +let word_sz (abi:Abi.abi) : int64 = + abi.Abi.abi_word_sz +;; + +let word_n (abi:Abi.abi) (n:int) : int64 = + Int64.mul (word_sz abi) (Int64.of_int n) +;; + +let word_bits (abi:Abi.abi) : Il.bits = + abi.Abi.abi_word_bits +;; + +let word_ty_mach (abi:Abi.abi) : ty_mach = + match word_bits abi with + Il.Bits8 -> TY_u8 + | Il.Bits16 -> TY_u16 + | Il.Bits32 -> TY_u32 + | Il.Bits64 -> TY_u64 +;; + +let word_ty_signed_mach (abi:Abi.abi) : ty_mach = + match word_bits abi with + Il.Bits8 -> TY_i8 + | Il.Bits16 -> TY_i16 + | Il.Bits32 -> TY_i32 + | Il.Bits64 -> TY_i64 +;; + + +let slot_mem_ctrl (slot:Ast.slot) : mem_ctrl = + let ty = slot_ty slot in + match ty with + Ast.TY_port _ + | Ast.TY_chan _ + | Ast.TY_task + | Ast.TY_vec _ + | Ast.TY_str -> MEM_rc_opaque + | _ -> + match slot.Ast.slot_mode with + Ast.MODE_exterior _ when type_is_structured ty -> + if type_has_state ty + then MEM_gc + else MEM_rc_struct + | Ast.MODE_exterior _ -> + MEM_rc_opaque + | _ -> + MEM_interior +;; + + +let iter_block_slots + (cx:Semant.ctxt) + (block_id:node_id) + (fn:Ast.slot_key -> node_id -> Ast.slot -> unit) + : unit = + let block_slots = Hashtbl.find cx.ctxt_block_slots block_id in + Hashtbl.iter + begin + fun key slot_id -> + let slot = referent_to_slot cx slot_id in + fn key slot_id slot + end + block_slots +;; + +let iter_frame_slots + (cx:Semant.ctxt) + (frame_id:node_id) + (fn:Ast.slot_key -> node_id -> Ast.slot -> unit) + : unit = + let blocks = Hashtbl.find cx.ctxt_frame_blocks frame_id in + List.iter (fun block -> iter_block_slots cx block fn) blocks +;; + +let iter_arg_slots + (cx:Semant.ctxt) + (frame_id:node_id) + (fn:Ast.slot_key -> node_id -> Ast.slot -> unit) + : unit = + match htab_search cx.ctxt_frame_args frame_id with + None -> () + | Some ls -> + List.iter + begin + fun slot_id -> + let key = Hashtbl.find cx.ctxt_slot_keys slot_id in + let slot = referent_to_slot cx slot_id in + fn key slot_id slot + end + ls +;; + +let iter_frame_and_arg_slots + (cx:Semant.ctxt) + (frame_id:node_id) + (fn:Ast.slot_key -> node_id -> Ast.slot -> unit) + : unit = + iter_frame_slots cx frame_id fn; + iter_arg_slots cx frame_id fn; +;; + +let next_power_of_two (x:int64) : int64 = + let xr = ref (Int64.sub x 1L) in + xr := Int64.logor (!xr) (Int64.shift_right_logical (!xr) 1); + xr := Int64.logor (!xr) (Int64.shift_right_logical (!xr) 2); + xr := Int64.logor (!xr) (Int64.shift_right_logical (!xr) 4); + xr := Int64.logor (!xr) (Int64.shift_right_logical (!xr) 8); + xr := Int64.logor (!xr) (Int64.shift_right_logical (!xr) 16); + xr := Int64.logor (!xr) (Int64.shift_right_logical (!xr) 32); + Int64.add 1L (!xr) +;; + +let iter_tup_slots + (get_element_ptr:'a -> int -> 'a) + (dst_ptr:'a) + (src_ptr:'a) + (slots:Ast.ty_tup) + (f:'a -> 'a -> Ast.slot -> (Ast.ty_iso option) -> unit) + (curr_iso:Ast.ty_iso option) + : unit = + Array.iteri + begin + fun i slot -> + f (get_element_ptr dst_ptr i) + (get_element_ptr src_ptr i) + slot curr_iso + end + slots +;; + +let iter_rec_slots + (get_element_ptr:'a -> int -> 'a) + (dst_ptr:'a) + (src_ptr:'a) + (entries:Ast.ty_rec) + (f:'a -> 'a -> Ast.slot -> (Ast.ty_iso option) -> unit) + (curr_iso:Ast.ty_iso option) + : unit = + iter_tup_slots get_element_ptr dst_ptr src_ptr + (Array.map snd entries) f curr_iso +;; + + + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/me/type.ml b/src/boot/me/type.ml new file mode 100644 index 00000000..2d4dd94a --- /dev/null +++ b/src/boot/me/type.ml @@ -0,0 +1,1294 @@ +open Common;; +open Semant;; + +type tyspec = + TYSPEC_equiv of tyvar + | TYSPEC_all + | TYSPEC_resolved of (Ast.ty_param array) * Ast.ty + | TYSPEC_callable of (tyvar * tyvar array) (* out, ins *) + | TYSPEC_collection of tyvar (* vec or str *) + | TYSPEC_comparable (* comparable with = and != *) + | TYSPEC_plusable (* nums, vecs, and strings *) + | TYSPEC_dictionary of dict + | TYSPEC_integral (* int-like *) + | TYSPEC_loggable + | TYSPEC_numeric (* int-like or float-like *) + | TYSPEC_ordered (* comparable with < etc. *) + | TYSPEC_record of dict + | TYSPEC_tuple of tyvar array (* heterogeneous tuple *) + | TYSPEC_vector of tyvar + | TYSPEC_app of (tyvar * Ast.ty array) + +and dict = (Ast.ident, tyvar) Hashtbl.t + +and tyvar = tyspec ref;; + +(* Signatures for binary operators. *) +type binopsig = + BINOPSIG_bool_bool_bool (* bool * bool -> bool *) + | BINOPSIG_comp_comp_bool (* comparable a * comparable a -> bool *) + | BINOPSIG_ord_ord_bool (* ordered a * ordered a -> bool *) + | BINOPSIG_integ_integ_integ (* integral a * integral a -> integral a *) + | BINOPSIG_num_num_num (* numeric a * numeric a -> numeric a *) + | BINOPSIG_plus_plus_plus (* plusable a * plusable a -> plusable a *) +;; + +let rec tyspec_to_str (ts:tyspec) : string = + + let fmt = Format.fprintf in + let fmt_ident (ff:Format.formatter) (i:Ast.ident) : unit = + fmt ff "%s" i + in + let fmt_obox ff = Format.pp_open_box ff 4 in + let fmt_cbox ff = Format.pp_close_box ff () in + let fmt_obr ff = fmt ff "<" in + let fmt_cbr ff = fmt ff ">" in + let fmt_obb ff = (fmt_obox ff; fmt_obr ff) in + let fmt_cbb ff = (fmt_cbox ff; fmt_cbr ff) in + + let rec fmt_fields (flav:string) (ff:Format.formatter) (flds:dict) : unit = + fmt_obb ff; + fmt ff "%s :" flav; + let fmt_entry ident tv = + fmt ff "@\n"; + fmt_ident ff ident; + fmt ff " : "; + fmt_tyspec ff (!tv); + in + Hashtbl.iter fmt_entry flds; + fmt_cbb ff + + and fmt_app ff tv args = + begin + assert (Array.length args <> 0); + fmt_obb ff; + fmt ff "app("; + fmt_tyspec ff (!tv); + fmt ff ")"; + Ast.fmt_app_args ff args; + fmt_cbb ff; + end + + and fmt_tvs ff tvs = + fmt_obox ff; + let fmt_tv i tv = + if i <> 0 + then fmt ff ", "; + fmt_tyspec ff (!tv) + in + Array.iteri fmt_tv tvs; + fmt_cbox ff; + + and fmt_tyspec ff ts = + match ts with + TYSPEC_all -> fmt ff "<?>" + | TYSPEC_comparable -> fmt ff "<comparable>" + | TYSPEC_plusable -> fmt ff "<plusable>" + | TYSPEC_integral -> fmt ff "<integral>" + | TYSPEC_loggable -> fmt ff "<loggable>" + | TYSPEC_numeric -> fmt ff "<numeric>" + | TYSPEC_ordered -> fmt ff "<ordered>" + | TYSPEC_resolved (params, ty) -> + if Array.length params <> 0 + then + begin + fmt ff "abs"; + Ast.fmt_decl_params ff params; + fmt ff "("; + Ast.fmt_ty ff ty; + fmt ff ")" + end + else + Ast.fmt_ty ff ty + + | TYSPEC_equiv tv -> + fmt_tyspec ff (!tv) + + | TYSPEC_callable (out, ins) -> + fmt_obb ff; + fmt ff "callable fn("; + fmt_tvs ff ins; + fmt ff ") -> "; + fmt_tyspec ff (!out); + fmt_cbb ff; + + | TYSPEC_collection tv -> + fmt_obb ff; + fmt ff "collection : "; + fmt_tyspec ff (!tv); + fmt_cbb ff; + + | TYSPEC_tuple tvs -> + fmt ff "("; + fmt_tvs ff tvs; + fmt ff ")"; + + | TYSPEC_vector tv -> + fmt_obb ff; + fmt ff "vector "; + fmt_tyspec ff (!tv); + fmt_cbb ff; + + | TYSPEC_dictionary dct -> + fmt_fields "dictionary" ff dct + + | TYSPEC_record dct -> + fmt_fields "record" ff dct + + | TYSPEC_app (tv, args) -> + fmt_app ff tv args + + in + let buf = Buffer.create 16 in + let bf = Format.formatter_of_buffer buf in + begin + fmt_tyspec bf ts; + Format.pp_print_flush bf (); + Buffer.contents buf + end +;; + +let iflog cx thunk = + if cx.ctxt_sess.Session.sess_log_type + then thunk () + else () +;; + +let rec resolve_tyvar (tv:tyvar) : tyvar = + match !tv with + TYSPEC_equiv subtv -> resolve_tyvar subtv + | _ -> tv +;; + +let process_crate (cx:ctxt) (crate:Ast.crate) : unit = + let log cx = Session.log "type" + cx.ctxt_sess.Session.sess_log_type + cx.ctxt_sess.Session.sess_log_out + in + let retval_tvs = Stack.create () in + let push_retval_tv tv = + Stack.push tv retval_tvs + in + let pop_retval_tv _ = + ignore (Stack.pop retval_tvs) + in + let retval_tv _ = + Stack.top retval_tvs + in + let (bindings:(node_id, tyvar) Hashtbl.t) = Hashtbl.create 10 in + let (item_params:(node_id, tyvar array) Hashtbl.t) = Hashtbl.create 10 in + let (lval_tyvars:(node_id, tyvar) Hashtbl.t) = Hashtbl.create 0 in + + let path = Stack.create () in + + let visitor (cx:ctxt) (inner:Walk.visitor) : Walk.visitor = + + let rec unify_slot + (slot:Ast.slot) + (id_opt:node_id option) + (tv:tyvar) : unit = + match id_opt with + Some id -> unify_tyvars (Hashtbl.find bindings id) tv + | None -> + match slot.Ast.slot_ty with + None -> bug () "untyped unidentified slot" + | Some ty -> unify_ty ty tv + + and check_sane_tyvar tv = + match !tv with + TYSPEC_resolved (_, (Ast.TY_named _)) -> + bug () "named-type in type checker" + | _ -> () + + and unify_tyvars (av:tyvar) (bv:tyvar) : unit = + iflog cx (fun _ -> + log cx "unifying types:"; + log cx "input tyvar A: %s" (tyspec_to_str !av); + log cx "input tyvar B: %s" (tyspec_to_str !bv)); + check_sane_tyvar av; + check_sane_tyvar bv; + + unify_tyvars' av bv; + + iflog cx (fun _ -> + log cx "unified types:"; + log cx "output tyvar A: %s" (tyspec_to_str !av); + log cx "output tyvar B: %s" (tyspec_to_str !bv)); + check_sane_tyvar av; + check_sane_tyvar bv; + + and unify_tyvars' (av:tyvar) (bv:tyvar) : unit = + let (a, b) = ((resolve_tyvar av), (resolve_tyvar bv)) in + let fail () = + err None "mismatched types: %s vs. %s" (tyspec_to_str !av) + (tyspec_to_str !bv); + in + + let merge_dicts a b = + let c = Hashtbl.create ((Hashtbl.length a) + (Hashtbl.length b)) in + let merge ident tv_a = + if Hashtbl.mem c ident + then unify_tyvars (Hashtbl.find c ident) tv_a + else Hashtbl.add c ident tv_a + in + Hashtbl.iter (Hashtbl.add c) b; + Hashtbl.iter merge a; + c + in + + let unify_dict_with_record_fields + (dct:dict) + (fields:Ast.ty_rec) + : unit = + let rec find_slot (query:Ast.ident) i : Ast.slot = + if i = Array.length fields + then fail () + else match fields.(i) with + (ident, slot) -> + if ident = query then slot + else find_slot query (i + 1) + in + + let check_entry ident tv = + unify_slot (find_slot ident 0) None tv + in + Hashtbl.iter check_entry dct + in + + let unify_dict_with_obj_fns + (dct:dict) + (fns:(Ast.ident,Ast.ty_fn) Hashtbl.t) : unit = + let check_entry (query:Ast.ident) tv : unit = + match htab_search fns query with + None -> fail () + | Some fn -> unify_ty (Ast.TY_fn fn) tv + in + Hashtbl.iter check_entry dct + in + + let rec is_comparable_or_ordered (comparable:bool) (ty:Ast.ty) : bool = + match ty with + Ast.TY_mach _ | Ast.TY_int | Ast.TY_uint + | Ast.TY_char | Ast.TY_str -> true + | Ast.TY_any | Ast.TY_nil | Ast.TY_bool | Ast.TY_chan _ + | Ast.TY_port _ | Ast.TY_task | Ast.TY_tup _ | Ast.TY_vec _ + | Ast.TY_rec _ | Ast.TY_tag _ | Ast.TY_iso _ | Ast.TY_idx _ -> + comparable + | Ast.TY_fn _ | Ast.TY_obj _ + | Ast.TY_param _ | Ast.TY_native _ | Ast.TY_type -> false + | Ast.TY_named _ -> bug () "unexpected named type" + | Ast.TY_constrained (ty, _) -> + is_comparable_or_ordered comparable ty + in + + let floating (ty:Ast.ty) : bool = + match ty with + Ast.TY_mach TY_f32 | Ast.TY_mach TY_f64 -> true + | _ -> false + in + + let integral (ty:Ast.ty) : bool = + match ty with + Ast.TY_int | Ast.TY_mach TY_u8 | Ast.TY_mach TY_u16 + | Ast.TY_mach TY_u32 | Ast.TY_mach TY_u64 | Ast.TY_mach TY_i8 + | Ast.TY_mach TY_i16 | Ast.TY_mach TY_i32 + | Ast.TY_mach TY_i64 -> + true + | _ -> false + in + + let numeric (ty:Ast.ty) : bool = (integral ty) || (floating ty) in + + let plusable (ty:Ast.ty) : bool = + match ty with + Ast.TY_str -> true + | Ast.TY_vec _ -> true + | _ -> numeric ty + in + + let loggable (ty:Ast.ty) : bool = + match ty with + Ast.TY_str | Ast.TY_bool | Ast.TY_int | Ast.TY_uint + | Ast.TY_char + | Ast.TY_mach TY_u8 | Ast.TY_mach TY_u16 | Ast.TY_mach TY_u32 + | Ast.TY_mach TY_i8 | Ast.TY_mach TY_i16 | Ast.TY_mach TY_i32 + -> true + | _ -> false + in + + let result = + match (!a, !b) with + (TYSPEC_equiv _, _) | (_, TYSPEC_equiv _) -> + bug () "equiv found even though tyvar was resolved" + + | (TYSPEC_all, other) | (other, TYSPEC_all) -> other + + (* resolved *) + + | (TYSPEC_resolved (params_a, ty_a), + TYSPEC_resolved (params_b, ty_b)) -> + if params_a <> params_b || ty_a <> ty_b + then fail() + else TYSPEC_resolved (params_a, ty_a) + + | (TYSPEC_resolved (params, ty), + TYSPEC_callable (out_tv, in_tvs)) + | (TYSPEC_callable (out_tv, in_tvs), + TYSPEC_resolved (params, ty)) -> + let unify_in_slot i in_slot = + unify_slot in_slot None in_tvs.(i) + in + begin + match ty with + Ast.TY_fn ({ + Ast.sig_input_slots = in_slots; + Ast.sig_output_slot = out_slot + }, _) -> + if Array.length in_slots != Array.length in_tvs + then fail (); + unify_slot out_slot None out_tv; + Array.iteri unify_in_slot in_slots + | _ -> fail () + end; + TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_collection tv) + | (TYSPEC_collection tv, TYSPEC_resolved (params, ty)) -> + begin + match ty with + Ast.TY_vec slot -> unify_slot slot None tv + | Ast.TY_str -> unify_ty (Ast.TY_mach TY_u8) tv + | _ -> fail () + end; + TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_comparable) + | (TYSPEC_comparable, TYSPEC_resolved (params, ty)) -> + if not (is_comparable_or_ordered true ty) then fail () + else TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_plusable) + | (TYSPEC_plusable, TYSPEC_resolved (params, ty)) -> + if not (plusable ty) then fail () + else TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_dictionary dct) + | (TYSPEC_dictionary dct, TYSPEC_resolved (params, ty)) -> + begin + match ty with + Ast.TY_rec fields -> + unify_dict_with_record_fields dct fields + | Ast.TY_obj (_, fns) -> + unify_dict_with_obj_fns dct fns + | _ -> fail () + end; + TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_integral) + | (TYSPEC_integral, TYSPEC_resolved (params, ty)) -> + if not (integral ty) + then fail () + else TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_loggable) + | (TYSPEC_loggable, TYSPEC_resolved (params, ty)) -> + if not (loggable ty) + then fail () + else TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_numeric) + | (TYSPEC_numeric, TYSPEC_resolved (params, ty)) -> + if not (numeric ty) then fail () + else TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_ordered) + | (TYSPEC_ordered, TYSPEC_resolved (params, ty)) -> + if not (is_comparable_or_ordered false ty) then fail () + else TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_app (tv, args)) + | (TYSPEC_app (tv, args), TYSPEC_resolved (params, ty)) -> + let ty = rebuild_ty_under_params ty params args false in + unify_ty ty tv; + TYSPEC_resolved ([| |], ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_record dct) + | (TYSPEC_record dct, TYSPEC_resolved (params, ty)) -> + begin + match ty with + Ast.TY_rec fields -> + unify_dict_with_record_fields dct fields + | _ -> fail () + end; + TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_tuple tvs) + | (TYSPEC_tuple tvs, TYSPEC_resolved (params, ty)) -> + begin + match ty with + Ast.TY_tup (elem_slots:Ast.slot array) -> + if (Array.length elem_slots) < (Array.length tvs) + then fail () + else + let check_elem i tv = + unify_slot (elem_slots.(i)) None tv + in + Array.iteri check_elem tvs + | _ -> fail () + end; + TYSPEC_resolved (params, ty) + + | (TYSPEC_resolved (params, ty), TYSPEC_vector tv) + | (TYSPEC_vector tv, TYSPEC_resolved (params, ty)) -> + begin + match ty with + Ast.TY_vec slot -> + unify_slot slot None tv; + TYSPEC_resolved (params, ty) + | _ -> fail () + end + + (* callable *) + + | (TYSPEC_callable (a_out_tv, a_in_tvs), + TYSPEC_callable (b_out_tv, b_in_tvs)) -> + unify_tyvars a_out_tv b_out_tv; + let check_in_tv i a_in_tv = + unify_tyvars a_in_tv b_in_tvs.(i) + in + Array.iteri check_in_tv a_in_tvs; + TYSPEC_callable (a_out_tv, a_in_tvs) + + | (TYSPEC_callable _, TYSPEC_collection _) + | (TYSPEC_callable _, TYSPEC_comparable) + | (TYSPEC_callable _, TYSPEC_plusable) + | (TYSPEC_callable _, TYSPEC_dictionary _) + | (TYSPEC_callable _, TYSPEC_integral) + | (TYSPEC_callable _, TYSPEC_loggable) + | (TYSPEC_callable _, TYSPEC_numeric) + | (TYSPEC_callable _, TYSPEC_ordered) + | (TYSPEC_callable _, TYSPEC_app _) + | (TYSPEC_callable _, TYSPEC_record _) + | (TYSPEC_callable _, TYSPEC_tuple _) + | (TYSPEC_callable _, TYSPEC_vector _) + | (TYSPEC_collection _, TYSPEC_callable _) + | (TYSPEC_comparable, TYSPEC_callable _) + | (TYSPEC_plusable, TYSPEC_callable _) + | (TYSPEC_dictionary _, TYSPEC_callable _) + | (TYSPEC_integral, TYSPEC_callable _) + | (TYSPEC_loggable, TYSPEC_callable _) + | (TYSPEC_numeric, TYSPEC_callable _) + | (TYSPEC_ordered, TYSPEC_callable _) + | (TYSPEC_app _, TYSPEC_callable _) + | (TYSPEC_record _, TYSPEC_callable _) + | (TYSPEC_tuple _, TYSPEC_callable _) + | (TYSPEC_vector _, TYSPEC_callable _) -> fail () + + (* collection *) + + | (TYSPEC_collection av, TYSPEC_collection bv) -> + unify_tyvars av bv; + TYSPEC_collection av + + | (TYSPEC_collection av, TYSPEC_comparable) + | (TYSPEC_comparable, TYSPEC_collection av) -> + TYSPEC_collection av + + | (TYSPEC_collection v, TYSPEC_plusable) + | (TYSPEC_plusable, TYSPEC_collection v) -> TYSPEC_collection v + + | (TYSPEC_collection _, TYSPEC_dictionary _) + | (TYSPEC_collection _, TYSPEC_integral) + | (TYSPEC_collection _, TYSPEC_loggable) + | (TYSPEC_collection _, TYSPEC_numeric) + | (TYSPEC_collection _, TYSPEC_ordered) + | (TYSPEC_collection _, TYSPEC_app _) + | (TYSPEC_collection _, TYSPEC_record _) + | (TYSPEC_collection _, TYSPEC_tuple _) + | (TYSPEC_dictionary _, TYSPEC_collection _) + | (TYSPEC_integral, TYSPEC_collection _) + | (TYSPEC_loggable, TYSPEC_collection _) + | (TYSPEC_numeric, TYSPEC_collection _) + | (TYSPEC_ordered, TYSPEC_collection _) + | (TYSPEC_app _, TYSPEC_collection _) + | (TYSPEC_record _, TYSPEC_collection _) + | (TYSPEC_tuple _, TYSPEC_collection _) -> fail () + + | (TYSPEC_collection av, TYSPEC_vector bv) + | (TYSPEC_vector bv, TYSPEC_collection av) -> + unify_tyvars av bv; + TYSPEC_vector av + + (* comparable *) + + | (TYSPEC_comparable, TYSPEC_comparable) -> TYSPEC_comparable + + | (TYSPEC_comparable, TYSPEC_plusable) + | (TYSPEC_plusable, TYSPEC_comparable) -> TYSPEC_plusable + + | (TYSPEC_comparable, TYSPEC_dictionary dict) + | (TYSPEC_dictionary dict, TYSPEC_comparable) -> + TYSPEC_dictionary dict + + | (TYSPEC_comparable, TYSPEC_integral) + | (TYSPEC_integral, TYSPEC_comparable) -> TYSPEC_integral + + | (TYSPEC_comparable, TYSPEC_loggable) + | (TYSPEC_loggable, TYSPEC_comparable) -> TYSPEC_loggable + + | (TYSPEC_comparable, TYSPEC_numeric) + | (TYSPEC_numeric, TYSPEC_comparable) -> TYSPEC_numeric + + | (TYSPEC_comparable, TYSPEC_ordered) + | (TYSPEC_ordered, TYSPEC_comparable) -> TYSPEC_ordered + + | (TYSPEC_comparable, TYSPEC_app _) + | (TYSPEC_app _, TYSPEC_comparable) -> fail () + + | (TYSPEC_comparable, TYSPEC_record r) + | (TYSPEC_record r, TYSPEC_comparable) -> TYSPEC_record r + + | (TYSPEC_comparable, TYSPEC_tuple t) + | (TYSPEC_tuple t, TYSPEC_comparable) -> TYSPEC_tuple t + + | (TYSPEC_comparable, TYSPEC_vector v) + | (TYSPEC_vector v, TYSPEC_comparable) -> TYSPEC_vector v + + (* plusable *) + + | (TYSPEC_plusable, TYSPEC_plusable) -> TYSPEC_plusable + + | (TYSPEC_plusable, TYSPEC_dictionary _) + | (TYSPEC_dictionary _, TYSPEC_plusable) -> fail () + + | (TYSPEC_plusable, TYSPEC_integral) + | (TYSPEC_integral, TYSPEC_plusable) -> TYSPEC_integral + + | (TYSPEC_plusable, TYSPEC_loggable) + | (TYSPEC_loggable, TYSPEC_plusable) -> TYSPEC_plusable + + | (TYSPEC_plusable, TYSPEC_numeric) + | (TYSPEC_numeric, TYSPEC_plusable) -> TYSPEC_numeric + + | (TYSPEC_plusable, TYSPEC_ordered) + | (TYSPEC_ordered, TYSPEC_plusable) -> TYSPEC_plusable + + | (TYSPEC_plusable, TYSPEC_record _) + | (TYSPEC_record _, TYSPEC_plusable) -> fail () + + | (TYSPEC_plusable, TYSPEC_tuple _) + | (TYSPEC_tuple _, TYSPEC_plusable) -> fail () + + | (TYSPEC_plusable, TYSPEC_vector v) + | (TYSPEC_vector v, TYSPEC_plusable) -> TYSPEC_vector v + + | (TYSPEC_plusable, TYSPEC_app _) + | (TYSPEC_app _, TYSPEC_plusable) -> fail () + + (* dictionary *) + + | (TYSPEC_dictionary da, TYSPEC_dictionary db) -> + TYSPEC_dictionary (merge_dicts da db) + + | (TYSPEC_dictionary _, TYSPEC_integral) + | (TYSPEC_dictionary _, TYSPEC_loggable) + | (TYSPEC_dictionary _, TYSPEC_numeric) + | (TYSPEC_dictionary _, TYSPEC_ordered) + | (TYSPEC_dictionary _, TYSPEC_app _) + | (TYSPEC_integral, TYSPEC_dictionary _) + | (TYSPEC_loggable, TYSPEC_dictionary _) + | (TYSPEC_numeric, TYSPEC_dictionary _) + | (TYSPEC_ordered, TYSPEC_dictionary _) + | (TYSPEC_app _, TYSPEC_dictionary _) -> fail () + + | (TYSPEC_dictionary d, TYSPEC_record r) + | (TYSPEC_record r, TYSPEC_dictionary d) -> + TYSPEC_record (merge_dicts d r) + + | (TYSPEC_dictionary _, TYSPEC_tuple _) + | (TYSPEC_dictionary _, TYSPEC_vector _) + | (TYSPEC_tuple _, TYSPEC_dictionary _) + | (TYSPEC_vector _, TYSPEC_dictionary _) -> fail () + + (* integral *) + + | (TYSPEC_integral, TYSPEC_integral) + | (TYSPEC_integral, TYSPEC_loggable) + | (TYSPEC_integral, TYSPEC_numeric) + | (TYSPEC_integral, TYSPEC_ordered) + | (TYSPEC_loggable, TYSPEC_integral) + | (TYSPEC_numeric, TYSPEC_integral) + | (TYSPEC_ordered, TYSPEC_integral) -> TYSPEC_integral + + | (TYSPEC_integral, TYSPEC_app _) + | (TYSPEC_integral, TYSPEC_record _) + | (TYSPEC_integral, TYSPEC_tuple _) + | (TYSPEC_integral, TYSPEC_vector _) + | (TYSPEC_app _, TYSPEC_integral) + | (TYSPEC_record _, TYSPEC_integral) + | (TYSPEC_tuple _, TYSPEC_integral) + | (TYSPEC_vector _, TYSPEC_integral) -> fail () + + (* loggable *) + + | (TYSPEC_loggable, TYSPEC_loggable) -> TYSPEC_loggable + + | (TYSPEC_loggable, TYSPEC_numeric) + | (TYSPEC_numeric, TYSPEC_loggable) -> TYSPEC_numeric + + | (TYSPEC_loggable, TYSPEC_ordered) + | (TYSPEC_ordered, TYSPEC_loggable) -> TYSPEC_ordered + + | (TYSPEC_loggable, TYSPEC_app _) + | (TYSPEC_loggable, TYSPEC_record _) + | (TYSPEC_loggable, TYSPEC_tuple _) + | (TYSPEC_loggable, TYSPEC_vector _) + | (TYSPEC_app _, TYSPEC_loggable) + | (TYSPEC_record _, TYSPEC_loggable) + | (TYSPEC_tuple _, TYSPEC_loggable) + | (TYSPEC_vector _, TYSPEC_loggable) -> fail () + + (* numeric *) + + | (TYSPEC_numeric, TYSPEC_numeric) -> TYSPEC_numeric + + | (TYSPEC_numeric, TYSPEC_ordered) + | (TYSPEC_ordered, TYSPEC_numeric) -> TYSPEC_ordered + + | (TYSPEC_numeric, TYSPEC_app _) + | (TYSPEC_numeric, TYSPEC_record _) + | (TYSPEC_numeric, TYSPEC_tuple _) + | (TYSPEC_numeric, TYSPEC_vector _) + | (TYSPEC_app _, TYSPEC_numeric) + | (TYSPEC_record _, TYSPEC_numeric) + | (TYSPEC_tuple _, TYSPEC_numeric) + | (TYSPEC_vector _, TYSPEC_numeric) -> fail () + + (* ordered *) + + | (TYSPEC_ordered, TYSPEC_ordered) -> TYSPEC_ordered + + | (TYSPEC_ordered, TYSPEC_app _) + | (TYSPEC_ordered, TYSPEC_record _) + | (TYSPEC_ordered, TYSPEC_tuple _) + | (TYSPEC_ordered, TYSPEC_vector _) + | (TYSPEC_app _, TYSPEC_ordered) + | (TYSPEC_record _, TYSPEC_ordered) + | (TYSPEC_tuple _, TYSPEC_ordered) + | (TYSPEC_vector _, TYSPEC_ordered) -> fail () + + (* app *) + + | (TYSPEC_app (tv_a, args_a), + TYSPEC_app (tv_b, args_b)) -> + if args_a <> args_b + then fail() + else + begin + unify_tyvars tv_a tv_b; + TYSPEC_app (tv_a, args_a) + end + + | (TYSPEC_app _, TYSPEC_record _) + | (TYSPEC_app _, TYSPEC_tuple _) + | (TYSPEC_app _, TYSPEC_vector _) + | (TYSPEC_record _, TYSPEC_app _) + | (TYSPEC_tuple _, TYSPEC_app _) + | (TYSPEC_vector _, TYSPEC_app _) -> fail () + + (* record *) + + | (TYSPEC_record da, TYSPEC_record db) -> + TYSPEC_record (merge_dicts da db) + + | (TYSPEC_record _, TYSPEC_tuple _) + | (TYSPEC_record _, TYSPEC_vector _) + | (TYSPEC_tuple _, TYSPEC_record _) + | (TYSPEC_vector _, TYSPEC_record _) -> fail () + + (* tuple *) + + | (TYSPEC_tuple tvs_a, TYSPEC_tuple tvs_b) -> + let len_a = Array.length tvs_a in + let len_b = Array.length tvs_b in + let max_len = max len_a len_b in + let init_tuple_elem i = + if i >= len_a + then tvs_b.(i) + else if i >= len_b + then tvs_a.(i) + else begin + unify_tyvars tvs_a.(i) tvs_b.(i); + tvs_a.(i) + end + in + TYSPEC_tuple (Array.init max_len init_tuple_elem) + + | (TYSPEC_tuple _, TYSPEC_vector _) + | (TYSPEC_vector _, TYSPEC_tuple _) -> fail () + + (* vector *) + + | (TYSPEC_vector av, TYSPEC_vector bv) -> + unify_tyvars av bv; + TYSPEC_vector av + in + let c = ref result in + a := TYSPEC_equiv c; + b := TYSPEC_equiv c + + and unify_ty (ty:Ast.ty) (tv:tyvar) : unit = + unify_tyvars (ref (TYSPEC_resolved ([||], ty))) tv + in + + let rec unify_atom (atom:Ast.atom) (tv:tyvar) : unit = + match atom with + Ast.ATOM_literal { node = literal; id = _ } -> + let ty = match literal with + Ast.LIT_nil -> Ast.TY_nil + | Ast.LIT_bool _ -> Ast.TY_bool + | Ast.LIT_mach (mty, _, _) -> Ast.TY_mach mty + | Ast.LIT_int (_, _) -> Ast.TY_int + | Ast.LIT_uint (_, _) -> Ast.TY_uint + | Ast.LIT_char _ -> Ast.TY_char + in + unify_ty ty tv + | Ast.ATOM_lval lval -> unify_lval lval tv + + and unify_expr (expr:Ast.expr) (tv:tyvar) : unit = + match expr with + Ast.EXPR_binary (binop, lhs, rhs) -> + let binop_sig = match binop with + Ast.BINOP_eq + | Ast.BINOP_ne -> BINOPSIG_comp_comp_bool + + | Ast.BINOP_lt + | Ast.BINOP_le + | Ast.BINOP_ge + | Ast.BINOP_gt -> BINOPSIG_ord_ord_bool + + | Ast.BINOP_or + | Ast.BINOP_and + | Ast.BINOP_xor + | Ast.BINOP_lsl + | Ast.BINOP_lsr + | Ast.BINOP_asr -> BINOPSIG_integ_integ_integ + + | Ast.BINOP_add -> BINOPSIG_plus_plus_plus + + | Ast.BINOP_sub + | Ast.BINOP_mul + | Ast.BINOP_div + | Ast.BINOP_mod -> BINOPSIG_num_num_num + + | Ast.BINOP_send -> bug () "BINOP_send found in expr" + in + begin + match binop_sig with + BINOPSIG_bool_bool_bool -> + unify_atom lhs + (ref (TYSPEC_resolved ([||], Ast.TY_bool))); + unify_atom rhs + (ref (TYSPEC_resolved ([||], Ast.TY_bool))); + unify_ty Ast.TY_bool tv + | BINOPSIG_comp_comp_bool -> + let tv_a = ref TYSPEC_comparable in + unify_atom lhs tv_a; + unify_atom rhs tv_a; + unify_ty Ast.TY_bool tv + | BINOPSIG_ord_ord_bool -> + let tv_a = ref TYSPEC_ordered in + unify_atom lhs tv_a; + unify_atom rhs tv_a; + unify_ty Ast.TY_bool tv + | BINOPSIG_integ_integ_integ -> + let tv_a = ref TYSPEC_integral in + unify_atom lhs tv_a; + unify_atom rhs tv_a; + unify_tyvars tv tv_a + | BINOPSIG_num_num_num -> + let tv_a = ref TYSPEC_numeric in + unify_atom lhs tv_a; + unify_atom rhs tv_a; + unify_tyvars tv tv_a + | BINOPSIG_plus_plus_plus -> + let tv_a = ref TYSPEC_plusable in + unify_atom lhs tv_a; + unify_atom rhs tv_a; + unify_tyvars tv tv_a + end + | Ast.EXPR_unary (unop, atom) -> + begin + match unop with + Ast.UNOP_not -> + unify_atom atom + (ref (TYSPEC_resolved ([||], Ast.TY_bool))); + unify_ty Ast.TY_bool tv + | Ast.UNOP_bitnot -> + let tv_a = ref TYSPEC_integral in + unify_atom atom tv_a; + unify_tyvars tv tv_a + | Ast.UNOP_neg -> + let tv_a = ref TYSPEC_numeric in + unify_atom atom tv_a; + unify_tyvars tv tv_a + | Ast.UNOP_cast t -> + (* + * FIXME: check cast-validity in post-typecheck pass. + * Only some casts make sense. + *) + let tv_a = ref TYSPEC_all in + let t = Hashtbl.find cx.ctxt_all_cast_types t.id in + unify_atom atom tv_a; + unify_ty t tv + end + | Ast.EXPR_atom atom -> unify_atom atom tv + + and unify_lval' (lval:Ast.lval) (tv:tyvar) : unit = + let note_args args = + iflog cx (fun _ -> log cx "noting lval '%a' type arguments: %a" + Ast.sprintf_lval lval Ast.sprintf_app_args args); + Hashtbl.add + cx.ctxt_call_lval_params + (lval_base_id lval) + args; + in + match lval with + Ast.LVAL_base nbi -> + let referent = Hashtbl.find cx.ctxt_lval_to_referent nbi.id in + begin + match Hashtbl.find cx.ctxt_all_defns referent with + DEFN_slot slot -> + iflog cx + begin + fun _ -> + let tv = Hashtbl.find bindings referent in + log cx "lval-base slot tyspec for %a = %s" + Ast.sprintf_lval lval (tyspec_to_str (!tv)); + end; + unify_slot slot (Some referent) tv + + | _ -> + let spec = (!(Hashtbl.find bindings referent)) in + let _ = + iflog cx + begin + fun _ -> + log cx "lval-base item tyspec for %a = %s" + Ast.sprintf_lval lval (tyspec_to_str spec); + log cx "unifying with supplied spec %s" + (tyspec_to_str !tv) + end + in + let tv = + match nbi.node with + Ast.BASE_ident _ -> tv + | Ast.BASE_app (_, args) -> + note_args args; + ref (TYSPEC_app (tv, args)) + | _ -> err None "bad lval / tyspec combination" + in + unify_tyvars (ref spec) tv + end + | Ast.LVAL_ext (base, comp) -> + let base_ts = match comp with + Ast.COMP_named (Ast.COMP_ident id) -> + let names = Hashtbl.create 1 in + Hashtbl.add names id tv; + TYSPEC_dictionary names + + | Ast.COMP_named (Ast.COMP_app (id, args)) -> + note_args args; + let tv = ref (TYSPEC_app (tv, args)) in + let names = Hashtbl.create 1 in + Hashtbl.add names id tv; + TYSPEC_dictionary names + + | Ast.COMP_named (Ast.COMP_idx i) -> + let init j = if i + 1 == j then tv else ref TYSPEC_all in + TYSPEC_tuple (Array.init (i + 1) init) + + | Ast.COMP_atom atom -> + unify_atom atom (ref (TYSPEC_resolved ([||], Ast.TY_int))); + TYSPEC_collection tv + in + let base_tv = ref base_ts in + unify_lval' base base_tv; + match !(resolve_tyvar base_tv) with + TYSPEC_resolved (_, ty) -> + unify_ty (slot_ty (project_type_to_slot ty comp)) tv + | _ -> + () + + and unify_lval (lval:Ast.lval) (tv:tyvar) : unit = + let id = lval_base_id lval in + (* Fetch lval with type components resolved. *) + let lval = Hashtbl.find cx.ctxt_all_lvals id in + iflog cx (fun _ -> log cx + "fetched resolved version of lval #%d = %a" + (int_of_node id) Ast.sprintf_lval lval); + Hashtbl.add lval_tyvars id tv; + unify_lval' lval tv + + in + let gen_atom_tvs atoms = + let gen_atom_tv atom = + let tv = ref TYSPEC_all in + unify_atom atom tv; + tv + in + Array.map gen_atom_tv atoms + in + let visit_stmt_pre_full (stmt:Ast.stmt) : unit = + + let check_callable out_tv callee args = + let in_tvs = gen_atom_tvs args in + let callee_tv = ref (TYSPEC_callable (out_tv, in_tvs)) in + unify_lval callee callee_tv; + in + match stmt.node with + Ast.STMT_spawn (out, _, callee, args) -> + let out_tv = ref (TYSPEC_resolved ([||], Ast.TY_nil)) in + unify_lval out (ref (TYSPEC_resolved ([||], Ast.TY_task))); + check_callable out_tv callee args + + | Ast.STMT_init_rec (lval, fields, Some base) -> + let dct = Hashtbl.create 10 in + let tvrec = ref (TYSPEC_record dct) in + let add_field (ident, _, _, atom) = + let tv = ref TYSPEC_all in + unify_atom atom tv; + Hashtbl.add dct ident tv + in + Array.iter add_field fields; + let tvbase = ref TYSPEC_all in + unify_lval base tvbase; + unify_tyvars tvrec tvbase; + unify_lval lval tvrec + + | Ast.STMT_init_rec (lval, fields, None) -> + let dct = Hashtbl.create 10 in + let add_field (ident, _, _, atom) = + let tv = ref TYSPEC_all in + unify_atom atom tv; + Hashtbl.add dct ident tv + in + Array.iter add_field fields; + unify_lval lval (ref (TYSPEC_record dct)) + + | Ast.STMT_init_tup (lval, members) -> + let member_to_tv (_, _, atom) = + let tv = ref TYSPEC_all in + unify_atom atom tv; + tv + in + let member_tvs = Array.map member_to_tv members in + unify_lval lval (ref (TYSPEC_tuple member_tvs)) + + | Ast.STMT_init_vec (lval, _, atoms) -> + let tv = ref TYSPEC_all in + let unify_with_tv atom = unify_atom atom tv in + Array.iter unify_with_tv atoms; + unify_lval lval (ref (TYSPEC_vector tv)) + + | Ast.STMT_init_str (lval, _) -> + unify_lval lval (ref (TYSPEC_resolved ([||], Ast.TY_str))) + + | Ast.STMT_copy (lval, expr) -> + let tv = ref TYSPEC_all in + unify_expr expr tv; + unify_lval lval tv + + | Ast.STMT_copy_binop (lval, binop, at) -> + let tv = ref TYSPEC_all in + unify_expr (Ast.EXPR_binary (binop, Ast.ATOM_lval lval, at)) tv; + unify_lval lval tv; + + | Ast.STMT_call (out, callee, args) -> + let out_tv = ref TYSPEC_all in + unify_lval out out_tv; + check_callable out_tv callee args + + | Ast.STMT_log atom -> unify_atom atom (ref TYSPEC_loggable) + + | Ast.STMT_check_expr expr -> + unify_expr expr (ref (TYSPEC_resolved ([||], Ast.TY_bool))) + + | Ast.STMT_check (_, check_calls) -> + let out_tv = ref (TYSPEC_resolved ([||], Ast.TY_bool)) in + Array.iter + (fun (callee, args) -> + check_callable out_tv callee args) + check_calls + + | Ast.STMT_while { Ast.while_lval = (_, expr); Ast.while_body = _ } -> + unify_expr expr (ref (TYSPEC_resolved ([||], Ast.TY_bool))) + + | Ast.STMT_if { Ast.if_test = if_test } -> + unify_expr if_test (ref (TYSPEC_resolved ([||], Ast.TY_bool))); + + | Ast.STMT_decl _ -> () + + (* FIXME: deal with difference between return-type vs. put-type *) + | Ast.STMT_ret atom_opt + | Ast.STMT_put atom_opt -> + begin + match atom_opt with + None -> unify_ty Ast.TY_nil (retval_tv()) + | Some atom -> unify_atom atom (retval_tv()) + end + + | Ast.STMT_be (callee, args) -> + check_callable (retval_tv()) callee args + + | Ast.STMT_bind (bound, callee, arg_opts) -> + (* FIXME: handle binding type parameters eventually. *) + let out_tv = ref TYSPEC_all in + let residue = ref [] in + let gen_atom_opt_tvs atoms = + let gen_atom_tv atom_opt = + let tv = ref TYSPEC_all in + begin + match atom_opt with + None -> residue := tv :: (!residue); + | Some atom -> unify_atom atom tv + end; + tv + in + Array.map gen_atom_tv atoms + in + + let in_tvs = gen_atom_opt_tvs arg_opts in + let arg_residue_tvs = Array.of_list (List.rev (!residue)) in + let callee_tv = ref (TYSPEC_callable (out_tv, in_tvs)) in + let bound_tv = ref (TYSPEC_callable (out_tv, arg_residue_tvs)) in + unify_lval callee callee_tv; + unify_lval bound bound_tv + + | Ast.STMT_for_each fe -> + let out_tv = ref TYSPEC_all in + let (si, _) = fe.Ast.for_each_slot in + let (callee, args) = fe.Ast.for_each_call in + unify_slot si.node (Some si.id) out_tv; + check_callable out_tv callee args + + | Ast.STMT_for fo -> + let mem_tv = ref TYSPEC_all in + let seq_tv = ref (TYSPEC_collection mem_tv) in + let (si, _) = fo.Ast.for_slot in + let (_, seq) = fo.Ast.for_seq in + unify_lval seq seq_tv; + unify_slot si.node (Some si.id) mem_tv + + (* FIXME (issue #52): plenty more to handle here. *) + | _ -> + log cx "warning: not typechecking stmt %s\n" + (Ast.sprintf_stmt () stmt) + in + + let visit_stmt_pre (stmt:Ast.stmt) : unit = + try + visit_stmt_pre_full stmt; + (* + * Reset any item-parameters that were resolved to types + * during inference for this statement. + *) + Hashtbl.iter + (fun _ params -> Array.iter (fun tv -> tv := TYSPEC_all) params) + item_params; + with + Semant_err (None, msg) -> + raise (Semant_err ((Some stmt.id), msg)) + in + + let enter_fn fn retspec = + let out = fn.Ast.fn_output_slot in + push_retval_tv (ref retspec); + unify_slot out.node (Some out.id) (retval_tv()) + in + + let visit_obj_fn_pre obj ident fn = + enter_fn fn.node TYSPEC_all; + inner.Walk.visit_obj_fn_pre obj ident fn + in + + let visit_obj_fn_post obj ident fn = + inner.Walk.visit_obj_fn_post obj ident fn; + pop_retval_tv (); + in + + let visit_mod_item_pre n p mod_item = + begin + try + match mod_item.node.Ast.decl_item with + Ast.MOD_ITEM_fn fn -> + enter_fn fn TYSPEC_all + + | _ -> () + with Semant_err (None, msg) -> + raise (Semant_err ((Some mod_item.id), msg)) + end; + inner.Walk.visit_mod_item_pre n p mod_item + in + + let path_name (_:unit) : string = + string_of_name (Walk.path_to_name path) + in + + let visit_mod_item_post n p mod_item = + inner.Walk.visit_mod_item_post n p mod_item; + match mod_item.node.Ast.decl_item with + + | Ast.MOD_ITEM_fn _ -> + pop_retval_tv (); + if (Some (path_name())) = cx.ctxt_main_name + then + begin + match Hashtbl.find cx.ctxt_all_item_types mod_item.id with + Ast.TY_fn (tsig, _) -> + begin + let vec_str = + interior_slot (Ast.TY_vec + (interior_slot Ast.TY_str)) + in + match tsig.Ast.sig_input_slots with + [| |] -> () + | [| vs |] when vs = vec_str -> () + | _ -> err (Some mod_item.id) + "main fn has bad type signature" + end + | _ -> + err (Some mod_item.id) "main item is not a function" + end + | _ -> () + in + + { + inner with + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_mod_item_post = visit_mod_item_post; + Walk.visit_obj_fn_pre = visit_obj_fn_pre; + Walk.visit_obj_fn_post = visit_obj_fn_post; + Walk.visit_stmt_pre = visit_stmt_pre + } + + in + try + let auto_queue = Queue.create () in + + let init_slot_tyvar id defn = + match defn with + DEFN_slot { Ast.slot_mode = _; Ast.slot_ty = None } -> + Queue.add id auto_queue; + Hashtbl.add bindings id (ref TYSPEC_all) + | DEFN_slot { Ast.slot_mode = _; Ast.slot_ty = Some ty } -> + let _ = iflog cx (fun _ -> log cx "initial slot #%d type: %a" + (int_of_node id) Ast.sprintf_ty ty) + in + Hashtbl.add bindings id (ref (TYSPEC_resolved ([||], ty))) + | _ -> () + in + + let init_item_tyvar id ty = + let _ = iflog cx (fun _ -> log cx "initial item #%d type: %a" + (int_of_node id) Ast.sprintf_ty ty) + in + let params = + match Hashtbl.find cx.ctxt_all_defns id with + DEFN_item i -> Array.map (fun p -> p.node) i.Ast.decl_params + | DEFN_obj_fn _ -> [| |] + | DEFN_obj_drop _ -> [| |] + | DEFN_loop_body _ -> [| |] + | _ -> err (Some id) "expected item defn for item tyvar" + in + let spec = TYSPEC_resolved (params, ty) in + Hashtbl.add bindings id (ref spec) + in + + let init_mod_dict id defn = + let rec tv_of_item id item = + match item.Ast.decl_item with + Ast.MOD_ITEM_mod (_, items) -> + if Hashtbl.mem bindings id + then Hashtbl.find bindings id + else + let dict = htab_map items + (fun i item -> (i, tv_of_item item.id item.node)) + in + let spec = TYSPEC_dictionary dict in + let tv = ref spec in + Hashtbl.add bindings id tv; + tv + | _ -> + Hashtbl.find bindings id + in + match defn with + DEFN_item ({ Ast.decl_item=Ast.MOD_ITEM_mod _ } as item) -> + ignore (tv_of_item id item) + | _ -> () + in + Hashtbl.iter init_slot_tyvar cx.ctxt_all_defns; + Hashtbl.iter init_item_tyvar cx.ctxt_all_item_types; + Hashtbl.iter init_mod_dict cx.ctxt_all_defns; + Walk.walk_crate + (Walk.path_managing_visitor path + (Walk.mod_item_logging_visitor + (log cx "typechecking pass: %s") + path + (visitor cx Walk.empty_visitor))) + crate; + + let update_auto_tyvar id ty = + let defn = Hashtbl.find cx.ctxt_all_defns id in + match defn with + DEFN_slot slot_defn -> + Hashtbl.replace cx.ctxt_all_defns id + (DEFN_slot { slot_defn with Ast.slot_ty = Some ty }) + | _ -> bug () "check_auto_tyvar: no slot defn" + in + + let get_resolved_ty tv id = + let ts = !(resolve_tyvar tv) in + match ts with + TYSPEC_resolved ([||], ty) -> ty + | TYSPEC_vector (tv) -> + begin + match !(resolve_tyvar tv) with + TYSPEC_resolved ([||], ty) -> + (Ast.TY_vec (interior_slot ty)) + | _ -> + err (Some id) + "unresolved vector-element type in %s (%d)" + (tyspec_to_str ts) (int_of_node id) + end + | _ -> err (Some id) + "unresolved type %s (%d)" + (tyspec_to_str ts) + (int_of_node id) + in + + let check_auto_tyvar id = + let tv = Hashtbl.find bindings id in + let ty = get_resolved_ty tv id in + update_auto_tyvar id ty + in + + let record_lval_ty id tv = + let ty = get_resolved_ty tv id in + Hashtbl.add cx.ctxt_all_lval_types id ty + in + + Queue.iter check_auto_tyvar auto_queue; + Hashtbl.iter record_lval_ty lval_tyvars; + with Semant_err (ido, str) -> report_err cx ido str +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) + diff --git a/src/boot/me/typestate.ml b/src/boot/me/typestate.ml new file mode 100644 index 00000000..4671d0f4 --- /dev/null +++ b/src/boot/me/typestate.ml @@ -0,0 +1,1089 @@ +open Semant;; +open Common;; + + +let log cx = Session.log "typestate" + cx.ctxt_sess.Session.sess_log_typestate + cx.ctxt_sess.Session.sess_log_out +;; + +let iflog cx thunk = + if cx.ctxt_sess.Session.sess_log_typestate + then thunk () + else () +;; + +let name_base_to_slot_key (nb:Ast.name_base) : Ast.slot_key = + match nb with + Ast.BASE_ident ident -> Ast.KEY_ident ident + | Ast.BASE_temp tmp -> Ast.KEY_temp tmp + | Ast.BASE_app _ -> bug () "name_base_to_slot_key on parametric name" +;; + +let determine_constr_key + (cx:ctxt) + (scopes:(scope list)) + (formal_base:node_id option) + (c:Ast.constr) + : constr_key = + + let cid = + match lookup_by_name cx scopes c.Ast.constr_name with + Some (_, cid) -> + if referent_is_item cx cid + then + begin + match Hashtbl.find cx.ctxt_all_item_types cid with + Ast.TY_fn (_, taux) -> + begin + if taux.Ast.fn_effect = Ast.PURE + then cid + else err (Some cid) "impure function used in constraint" + end + | _ -> bug () "bad type of predicate" + end + else + bug () "slot used as predicate" + | None -> bug () "predicate not found" + in + + let constr_arg_of_carg carg = + match carg with + Ast.CARG_path pth -> + let rec node_base_of pth = + match pth with + Ast.CARG_base Ast.BASE_formal -> + begin + match formal_base with + Some id -> id + | None -> + bug () "formal symbol * used in free constraint" + end + | Ast.CARG_ext (pth, _) -> node_base_of pth + | Ast.CARG_base (Ast.BASE_named nb) -> + begin + match lookup_by_name cx scopes (Ast.NAME_base nb) with + None -> bug () "constraint-arg not found" + | Some (_, aid) -> + if referent_is_slot cx aid + then + if type_has_state + (slot_ty (referent_to_slot cx aid)) + then err (Some aid) + "predicate applied to slot of mutable type" + else aid + else + (* Items are always constant, they're ok. + * Weird to be using them in a constr, but ok. *) + aid + end + in + Constr_arg_node (node_base_of pth, pth) + + | Ast.CARG_lit lit -> Constr_arg_lit lit + in + Constr_pred (cid, Array.map constr_arg_of_carg c.Ast.constr_args) +;; + +let fmt_constr_key cx ckey = + match ckey with + Constr_pred (cid, args) -> + let fmt_constr_arg carg = + match carg with + Constr_arg_lit lit -> + Ast.fmt_to_str Ast.fmt_lit lit + | Constr_arg_node (id, pth) -> + let rec fmt_pth pth = + match pth with + Ast.CARG_base _ -> + if referent_is_slot cx id + then + let key = Hashtbl.find cx.ctxt_slot_keys id in + Ast.fmt_to_str Ast.fmt_slot_key key + else + let n = Hashtbl.find cx.ctxt_all_item_names id in + Ast.fmt_to_str Ast.fmt_name n + | Ast.CARG_ext (pth, nc) -> + let b = fmt_pth pth in + b ^ (Ast.fmt_to_str Ast.fmt_name_component nc) + in + fmt_pth pth + in + let pred_name = Hashtbl.find cx.ctxt_all_item_names cid in + Printf.sprintf "%s(%s)" + (Ast.fmt_to_str Ast.fmt_name pred_name) + (String.concat ", " + (List.map + fmt_constr_arg + (Array.to_list args))) + + | Constr_init n when Hashtbl.mem cx.ctxt_slot_keys n -> + Printf.sprintf "<init #%d = %s>" + (int_of_node n) + (Ast.fmt_to_str Ast.fmt_slot_key (Hashtbl.find cx.ctxt_slot_keys n)) + | Constr_init n -> + Printf.sprintf "<init #%d>" (int_of_node n) +;; + +let entry_keys header constrs resolver = + let init_keys = + Array.map + (fun (sloti, _) -> (Constr_init sloti.id)) + header + in + let names = + Array.map + (fun (_, ident) -> (Some (Ast.BASE_ident ident))) + header + in + let input_constrs = + Array.map (apply_names_to_constr names) constrs in + let input_keys = Array.map resolver input_constrs in + (input_keys, init_keys) +;; + +let obj_keys ob resolver = + entry_keys ob.Ast.obj_state ob.Ast.obj_constrs resolver +;; + +let fn_keys fn resolver = + entry_keys fn.Ast.fn_input_slots fn.Ast.fn_input_constrs resolver +;; + +let constr_id_assigning_visitor + (cx:ctxt) + (scopes:(scope list) ref) + (idref:int ref) + (inner:Walk.visitor) + : Walk.visitor = + + let resolve_constr_to_key + (formal_base:node_id) + (constr:Ast.constr) + : constr_key = + determine_constr_key cx (!scopes) (Some formal_base) constr + in + + let note_constr_key key = + if not (Hashtbl.mem cx.ctxt_constr_ids key) + then + begin + let cid = Constr (!idref) in + iflog cx + (fun _ -> log cx "assigning constr id #%d to constr %s" + (!idref) (fmt_constr_key cx key)); + incr idref; + htab_put cx.ctxt_constrs cid key; + htab_put cx.ctxt_constr_ids key cid; + end + in + + let note_keys = Array.iter note_constr_key in + + let visit_mod_item_pre n p i = + let resolver = resolve_constr_to_key i.id in + begin + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn f -> + let (input_keys, init_keys) = fn_keys f resolver in + note_keys input_keys; + note_keys init_keys + | Ast.MOD_ITEM_obj ob -> + let (input_keys, init_keys) = obj_keys ob resolver in + note_keys input_keys; + note_keys init_keys + | _ -> () + end; + inner.Walk.visit_mod_item_pre n p i + in + + let visit_constr_pre formal_base c = + let key = determine_constr_key cx (!scopes) formal_base c in + note_constr_key key; + inner.Walk.visit_constr_pre formal_base c + in + (* + * We want to generate, for any call site, a variant of + * the callee's entry typestate specialized to the arguments + * that the caller passes. + * + * Also, for any slot-decl node, we have to generate a + * variant of Constr_init for the slot (because the slot is + * the sort of thing that can vary in init-ness over time). + *) + let visit_stmt_pre s = + begin + match s.node with + Ast.STMT_call (_, lv, args) -> + let referent = lval_to_referent cx (lval_base_id lv) in + let referent_ty = lval_ty cx lv in + begin + match referent_ty with + Ast.TY_fn (tsig,_) -> + let constrs = tsig.Ast.sig_input_constrs in + let names = atoms_to_names args in + let constrs' = + Array.map (apply_names_to_constr names) constrs + in + Array.iter (visit_constr_pre (Some referent)) constrs' + + | _ -> () + end + + | _ -> () + end; + inner.Walk.visit_stmt_pre s + in + + let visit_slot_identified_pre s = + note_constr_key (Constr_init s.id); + inner.Walk.visit_slot_identified_pre s + in + { inner with + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_slot_identified_pre = visit_slot_identified_pre; + Walk.visit_stmt_pre = visit_stmt_pre; + Walk.visit_constr_pre = visit_constr_pre } +;; + +let bitmap_assigning_visitor + (cx:ctxt) + (idref:int ref) + (inner:Walk.visitor) + : Walk.visitor = + let visit_stmt_pre s = + iflog cx (fun _ -> log cx "building %d-entry bitmap for node %d" + (!idref) (int_of_node s.id)); + htab_put cx.ctxt_preconditions s.id (Bits.create (!idref) false); + htab_put cx.ctxt_postconditions s.id (Bits.create (!idref) false); + htab_put cx.ctxt_prestates s.id (Bits.create (!idref) false); + htab_put cx.ctxt_poststates s.id (Bits.create (!idref) false); + inner.Walk.visit_stmt_pre s + in + let visit_block_pre b = + iflog cx (fun _ -> log cx "building %d-entry bitmap for node %d" + (!idref) (int_of_node b.id)); + htab_put cx.ctxt_preconditions b.id (Bits.create (!idref) false); + htab_put cx.ctxt_postconditions b.id (Bits.create (!idref) false); + htab_put cx.ctxt_prestates b.id (Bits.create (!idref) false); + htab_put cx.ctxt_poststates b.id (Bits.create (!idref) false); + inner.Walk.visit_block_pre b + in + { inner with + Walk.visit_stmt_pre = visit_stmt_pre; + Walk.visit_block_pre = visit_block_pre } +;; + +let condition_assigning_visitor + (cx:ctxt) + (scopes:(scope list) ref) + (inner:Walk.visitor) + : Walk.visitor = + + let raise_bits (bitv:Bits.t) (keys:constr_key array) : unit = + Array.iter + (fun key -> + let cid = Hashtbl.find cx.ctxt_constr_ids key in + let i = int_of_constr cid in + iflog cx (fun _ -> log cx "setting bit %d, constraint %s" + i (fmt_constr_key cx key)); + Bits.set bitv (int_of_constr cid) true) + keys + in + + let slot_inits ss = Array.map (fun s -> Constr_init s) ss in + + let raise_postcondition (id:node_id) (keys:constr_key array) : unit = + let bitv = Hashtbl.find cx.ctxt_postconditions id in + raise_bits bitv keys + in + + let raise_precondition (id:node_id) (keys:constr_key array) : unit = + let bitv = Hashtbl.find cx.ctxt_preconditions id in + raise_bits bitv keys + in + + let resolve_constr_to_key + (formal_base:node_id option) + (constr:Ast.constr) + : constr_key = + determine_constr_key cx (!scopes) formal_base constr + in + + let raise_entry_state input_keys init_keys block = + iflog cx + (fun _ -> log cx + "setting entry state as block %d postcondition (\"entry\" prestate)" + (int_of_node block.id)); + raise_postcondition block.id input_keys; + raise_postcondition block.id init_keys; + iflog cx (fun _ -> log cx "done setting block postcondition") + in + + let visit_mod_item_pre n p i = + begin + match i.node.Ast.decl_item with + Ast.MOD_ITEM_fn f -> + let (input_keys, init_keys) = + fn_keys f (resolve_constr_to_key (Some i.id)) + in + raise_entry_state input_keys init_keys f.Ast.fn_body + + | _ -> () + end; + inner.Walk.visit_mod_item_pre n p i + in + + let visit_obj_fn_pre obj ident fn = + let (obj_input_keys, obj_init_keys) = + obj_keys obj.node (resolve_constr_to_key (Some obj.id)) + in + let (fn_input_keys, fn_init_keys) = + fn_keys fn.node (resolve_constr_to_key (Some fn.id)) + in + raise_entry_state obj_input_keys obj_init_keys fn.node.Ast.fn_body; + raise_entry_state fn_input_keys fn_init_keys fn.node.Ast.fn_body; + inner.Walk.visit_obj_fn_pre obj ident fn + in + + let visit_obj_drop_pre obj b = + let (obj_input_keys, obj_init_keys) = + obj_keys obj.node (resolve_constr_to_key (Some obj.id)) + in + raise_entry_state obj_input_keys obj_init_keys b; + inner.Walk.visit_obj_drop_pre obj b + in + + let visit_callable_pre s dst lv args = + let referent_ty = lval_ty cx lv in + begin + match referent_ty with + Ast.TY_fn (tsig,_) -> + let formal_constrs = tsig.Ast.sig_input_constrs in + let names = atoms_to_names args in + let constrs = + Array.map (apply_names_to_constr names) formal_constrs + in + let keys = Array.map (resolve_constr_to_key None) constrs in + raise_precondition s.id keys + | _ -> () + end; + begin + let postcond = + slot_inits (lval_slots cx dst) + in + raise_postcondition s.id postcond + end + in + + let visit_stmt_pre s = + begin + match s.node with + Ast.STMT_check (constrs, _) -> + let postcond = Array.map (resolve_constr_to_key None) constrs in + raise_postcondition s.id postcond + + | Ast.STMT_recv (dst, src) -> + let precond = slot_inits (lval_slots cx src) in + let postcond = slot_inits (lval_slots cx dst) in + raise_precondition s.id precond; + raise_postcondition s.id postcond + + | Ast.STMT_send (dst, src) -> + let precond = Array.append + (slot_inits (lval_slots cx dst)) + (slot_inits (lval_slots cx src)) + in + raise_precondition s.id precond; + + | Ast.STMT_init_rec (dst, entries, base) -> + let base_slots = + begin + match base with + None -> [| |] + | Some lval -> lval_slots cx lval + end + in + let precond = slot_inits + (Array.append (rec_inputs_slots cx entries) base_slots) + in + let postcond = slot_inits (lval_slots cx dst) in + raise_precondition s.id precond; + raise_postcondition s.id postcond + + | Ast.STMT_init_tup (dst, modes_atoms) -> + let precond = slot_inits + (tup_inputs_slots cx modes_atoms) + in + let postcond = slot_inits (lval_slots cx dst) in + raise_precondition s.id precond; + raise_postcondition s.id postcond + + | Ast.STMT_init_vec (dst, _, atoms) -> + let precond = slot_inits (atoms_slots cx atoms) in + let postcond = slot_inits (lval_slots cx dst) in + raise_precondition s.id precond; + raise_postcondition s.id postcond + + | Ast.STMT_init_str (dst, _) -> + let postcond = slot_inits (lval_slots cx dst) in + raise_postcondition s.id postcond + + | Ast.STMT_init_port dst -> + let postcond = slot_inits (lval_slots cx dst) in + raise_postcondition s.id postcond + + | Ast.STMT_init_chan (dst, port) -> + let precond = slot_inits (lval_option_slots cx port) in + let postcond = slot_inits (lval_slots cx dst) in + raise_precondition s.id precond; + raise_postcondition s.id postcond + + | Ast.STMT_copy (dst, src) -> + let precond = slot_inits (expr_slots cx src) in + let postcond = slot_inits (lval_slots cx dst) in + raise_precondition s.id precond; + raise_postcondition s.id postcond + + | Ast.STMT_copy_binop (dst, _, src) -> + let dst_init = slot_inits (lval_slots cx dst) in + let src_init = slot_inits (atom_slots cx src) in + let precond = Array.append dst_init src_init in + raise_precondition s.id precond; + + | Ast.STMT_spawn (dst, _, lv, args) + | Ast.STMT_call (dst, lv, args) -> + visit_callable_pre s dst lv args + + | Ast.STMT_bind (dst, lv, args_opt) -> + let args = arr_map_partial args_opt (fun a -> a) in + visit_callable_pre s dst lv args + + | Ast.STMT_ret (Some at) -> + let precond = slot_inits (atom_slots cx at) in + raise_precondition s.id precond + + | Ast.STMT_put (Some at) -> + let precond = slot_inits (atom_slots cx at) in + raise_precondition s.id precond + + | Ast.STMT_join lval -> + let precond = slot_inits (lval_slots cx lval) in + raise_precondition s.id precond + + | Ast.STMT_log atom -> + let precond = slot_inits (atom_slots cx atom) in + raise_precondition s.id precond + + | Ast.STMT_check_expr expr -> + let precond = slot_inits (expr_slots cx expr) in + raise_precondition s.id precond + + | Ast.STMT_while sw -> + let (_, expr) = sw.Ast.while_lval in + let precond = slot_inits (expr_slots cx expr) in + raise_precondition s.id precond + + | Ast.STMT_alt_tag at -> + let precond = slot_inits (lval_slots cx at.Ast.alt_tag_lval) in + let visit_arm { node = (pat, block) } = + (* FIXME: propagate tag-carried constrs here. *) + let rec get_slots pat = + match pat with + Ast.PAT_slot header_slot -> [| header_slot |] + | Ast.PAT_tag (_, pats) -> + Array.concat (List.map get_slots (Array.to_list pats)) + | _ -> [| |] + in + let header_slots = get_slots pat in + let (input_keys, init_keys) = + entry_keys header_slots [| |] (resolve_constr_to_key None) + in + raise_entry_state input_keys init_keys block + in + raise_precondition s.id precond; + Array.iter visit_arm at.Ast.alt_tag_arms + + | Ast.STMT_for_each fe -> + let (si, _) = fe.Ast.for_each_slot in + let block_entry_state = [| Constr_init si.id |] in + raise_postcondition fe.Ast.for_each_body.id block_entry_state + + | Ast.STMT_for fo -> + let (si, _) = fo.Ast.for_slot in + let block_entry_state = [| Constr_init si.id |] in + raise_postcondition fo.Ast.for_body.id block_entry_state + + | _ -> () + end; + inner.Walk.visit_stmt_pre s + in + { inner with + Walk.visit_mod_item_pre = visit_mod_item_pre; + Walk.visit_obj_fn_pre = visit_obj_fn_pre; + Walk.visit_obj_drop_pre = visit_obj_drop_pre; + Walk.visit_stmt_pre = visit_stmt_pre } +;; + +let lset_add (x:node_id) (xs:node_id list) : node_id list = + if List.mem x xs + then xs + else x::xs +;; + +let lset_remove (x:node_id) (xs:node_id list) : node_id list = + List.filter (fun a -> not (a = x)) xs +;; + +let lset_union (xs:node_id list) (ys:node_id list) : node_id list = + List.fold_left (fun ns n -> lset_add n ns) xs ys +;; + +let lset_diff (xs:node_id list) (ys:node_id list) : node_id list = + List.fold_left (fun ns n -> lset_remove n ns) xs ys +;; + +let lset_fmt lset = + "[" ^ + (String.concat ", " + (List.map + (fun n -> string_of_int (int_of_node n)) lset)) ^ + "]" +;; + +type node_graph = (node_id, (node_id list)) Hashtbl.t;; + +let graph_sequence_building_visitor + (cx:ctxt) + (graph:node_graph) + (inner:Walk.visitor) + : Walk.visitor = + + (* Flow each stmt to its sequence-successor. *) + let visit_stmts stmts = + let len = Array.length stmts in + for i = 0 to len - 2 + do + let stmt = stmts.(i) in + let next = stmts.(i+1) in + log cx "sequential stmt edge %d -> %d" + (int_of_node stmt.id) (int_of_node next.id); + htab_put graph stmt.id [next.id] + done; + (* Flow last node to nowhere. *) + if len > 0 + then htab_put graph stmts.(len-1).id [] + in + + let visit_stmt_pre s = + (* Sequence the prelude nodes on special stmts. *) + begin + match s.node with + Ast.STMT_while sw -> + let (stmts, _) = sw.Ast.while_lval in + visit_stmts stmts + | _ -> () + end; + inner.Walk.visit_stmt_pre s + in + + let visit_block_pre b = + visit_stmts b.node; + inner.Walk.visit_block_pre b + in + + { inner with + Walk.visit_stmt_pre = visit_stmt_pre; + Walk.visit_block_pre = visit_block_pre } +;; + +let add_flow_edges (graph:node_graph) (n:node_id) (dsts:node_id list) : unit = + let existing = Hashtbl.find graph n in + Hashtbl.replace graph n (lset_union existing dsts) +;; + +let remove_flow_edges + (graph:node_graph) + (n:node_id) + (dsts:node_id list) + : unit = + let existing = Hashtbl.find graph n in + Hashtbl.replace graph n (lset_diff existing dsts) +;; + +let graph_general_block_structure_building_visitor + ((*cx*)_:ctxt) + (graph:node_graph) + (inner:Walk.visitor) + : Walk.visitor = + + let stmts = Stack.create () in + + let visit_stmt_pre s = + Stack.push s stmts; + inner.Walk.visit_stmt_pre s + in + + let visit_stmt_post s = + inner.Walk.visit_stmt_post s; + ignore (Stack.pop stmts) + in + + let visit_block_pre b = + begin + let len = Array.length b.node in + + (* Flow container-stmt to block, save existing out-edges for below. *) + let dsts = + if Stack.is_empty stmts + then [] + else + let s = Stack.top stmts in + let dsts = Hashtbl.find graph s.id in + add_flow_edges graph s.id [b.id]; + dsts + in + + (* + * If block has len, + * then flow block to block.node.(0) and block.node.(len-1) to dsts + * else flow block to dsts + * + * so AST: + * + * block#n{ stmt#0 ... stmt#k }; + * stmt#j; + * + * turns into graph: + * + * block#n -> stmt#0 -> ... -> stmt#k -> stmt#j + * + *) + + if len > 0 + then + begin + htab_put graph b.id [b.node.(0).id]; + add_flow_edges graph b.node.(len-1).id dsts + end + else + htab_put graph b.id dsts + end; + inner.Walk.visit_block_pre b + in + + { inner with + Walk.visit_stmt_pre = visit_stmt_pre; + Walk.visit_stmt_post = visit_stmt_post; + Walk.visit_block_pre = visit_block_pre } +;; + + +let graph_special_block_structure_building_visitor + ((*cx*)_:ctxt) + (graph:(node_id, (node_id list)) Hashtbl.t) + (inner:Walk.visitor) + : Walk.visitor = + + let visit_stmt_pre s = + begin + match s.node with + + | Ast.STMT_if sif -> + (* + * Drop implicit stmt-bypass edge(s); + * can only flow to inner block(s). + *) + let block_ids = + [sif.Ast.if_then.id] @ + match sif.Ast.if_else with + None -> [] + | Some eb -> [eb.id] + in + Hashtbl.replace graph s.id block_ids + + | Ast.STMT_while sw -> + (* There are a bunch of rewirings to do on 'while' nodes. *) + + begin + let dsts = Hashtbl.find graph s.id in + let body = sw.Ast.while_body in + let succ_stmts = + List.filter (fun x -> not (x = body.id)) dsts + in + + let (pre_loop_stmts, _) = sw.Ast.while_lval in + let loop_head_id = + (* Splice loop prelude into flow graph, save loop-head + * node. + *) + let slen = Array.length pre_loop_stmts in + if slen > 0 + then + begin + remove_flow_edges graph s.id [body.id]; + add_flow_edges graph s.id [pre_loop_stmts.(0).id]; + add_flow_edges graph + pre_loop_stmts.(slen-1).id [body.id]; + pre_loop_stmts.(slen - 1).id + end + else + body.id + in + + (* Always flow s into the loop prelude; prelude may end + * loop. + *) + remove_flow_edges graph s.id succ_stmts; + add_flow_edges graph loop_head_id succ_stmts; + + (* Flow loop-end to loop-head. *) + let blen = Array.length body.node in + if blen > 0 + then add_flow_edges graph + body.node.(blen - 1).id [loop_head_id] + else add_flow_edges graph + body.id [loop_head_id] + end + + | Ast.STMT_alt_tag at -> + let dsts = Hashtbl.find graph s.id in + let arm_blocks = + let arm_block_id { node = (_, block) } = block.id in + Array.to_list (Array.map arm_block_id at.Ast.alt_tag_arms) + in + let succ_stmts = + List.filter (fun x -> not (List.mem x arm_blocks)) dsts + in + remove_flow_edges graph s.id succ_stmts + + | _ -> () + end; + inner.Walk.visit_stmt_post s + in + { inner with + Walk.visit_stmt_pre = visit_stmt_pre } +;; + +let find_roots + (graph:(node_id, (node_id list)) Hashtbl.t) + : (node_id,unit) Hashtbl.t = + let roots = Hashtbl.create 0 in + Hashtbl.iter (fun src _ -> Hashtbl.replace roots src ()) graph; + Hashtbl.iter (fun _ dsts -> + List.iter (fun d -> Hashtbl.remove roots d) dsts) graph; + roots +;; + +let run_dataflow cx graph : unit = + let roots = find_roots graph in + let nodes = Queue.create () in + let progress = ref true in + let fmt_constr_bitv bitv = + String.concat ", " + (List.map + (fun i -> + fmt_constr_key cx + (Hashtbl.find cx.ctxt_constrs (Constr i))) + (Bits.to_list bitv)) + in + let set_bits dst src = + if Bits.copy dst src + then (progress := true; + iflog cx (fun _ -> log cx "made progress setting bits")) + in + let intersect_bits dst src = + if Bits.intersect dst src + then (progress := true; + iflog cx (fun _ -> log cx + "made progress intersecting bits")) + in + let raise_bits dst src = + if Bits.union dst src + then (progress := true; + iflog cx (fun _ -> log cx + "made progress unioning bits")) + in + let iter = ref 0 in + let written = Hashtbl.create 0 in + Hashtbl.iter (fun n _ -> Queue.push n nodes) roots; + while !progress do + incr iter; + progress := false; + iflog cx (fun _ -> log cx "dataflow pass %d" (!iter)); + Queue.iter + begin + fun node -> + let prestate = Hashtbl.find cx.ctxt_prestates node in + let postcond = Hashtbl.find cx.ctxt_postconditions node in + let poststate = Hashtbl.find cx.ctxt_poststates node in + iflog cx (fun _ -> log cx "stmt %d: '%s'" (int_of_node node) + (match htab_search cx.ctxt_all_stmts node with + None -> "??" + | Some stmt -> Ast.fmt_to_str Ast.fmt_stmt stmt)); + iflog cx (fun _ -> log cx "stmt %d:" (int_of_node node)); + iflog cx (fun _ -> log cx + " prestate %s" (fmt_constr_bitv prestate)); + raise_bits poststate prestate; + raise_bits poststate postcond; + iflog cx (fun _ -> log cx + " poststate %s" (fmt_constr_bitv poststate)); + Hashtbl.replace written node (); + let successors = Hashtbl.find graph node in + let i = int_of_node node in + iflog cx (fun _ -> log cx + "out-edges for %d: %s" i (lset_fmt successors)); + List.iter + begin + fun succ -> + let succ_prestates = + Hashtbl.find cx.ctxt_prestates succ + in + if Hashtbl.mem written succ + then + begin + intersect_bits succ_prestates poststate; + Hashtbl.replace written succ () + end + else + begin + progress := true; + Queue.push succ nodes; + set_bits succ_prestates poststate + end + end + successors + end + nodes + done +;; + +let typestate_verify_visitor + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + let visit_stmt_pre s = + let prestate = Hashtbl.find cx.ctxt_prestates s.id in + let precond = Hashtbl.find cx.ctxt_preconditions s.id in + List.iter + (fun i -> + if not (Bits.get prestate i) + then + let ckey = Hashtbl.find cx.ctxt_constrs (Constr i) in + let constr_str = fmt_constr_key cx ckey in + err (Some s.id) + "Unsatisfied precondition constraint %s at stmt %d: %s" + constr_str + (int_of_node s.id) + (Ast.fmt_to_str Ast.fmt_stmt + (Hashtbl.find cx.ctxt_all_stmts s.id))) + (Bits.to_list precond); + inner.Walk.visit_stmt_pre s + in + { inner with + Walk.visit_stmt_pre = visit_stmt_pre } +;; + +let lifecycle_visitor + (cx:ctxt) + (inner:Walk.visitor) + : Walk.visitor = + + (* + * This visitor doesn't *calculate* part of the typestate; it uses + * the typestates calculated in earlier passes to extract "summaries" + * of slot-lifecycle events into the ctxt tables + * ctxt_copy_stmt_is_init and ctxt_post_stmt_slot_drops. These are + * used later on in translation. + *) + + let (live_block_slots:(node_id Stack.t) Stack.t) = Stack.create () in + + let (implicit_init_block_slots:(node_id,node_id) Hashtbl.t) = + Hashtbl.create 0 + in + + let mark_slot_init sl = + Stack.push sl (Stack.top live_block_slots) + in + + + let visit_block_pre b = + Stack.push (Stack.create()) live_block_slots; + begin + match htab_search implicit_init_block_slots b.id with + None -> () + | Some slot -> mark_slot_init slot + end; + inner.Walk.visit_block_pre b + in + + let note_drops stmt slots = + iflog cx + begin + fun _ -> + log cx "implicit drop of %d slots after stmt %a: " + (List.length slots) + Ast.sprintf_stmt stmt; + List.iter (fun s -> log cx "drop: %a" + Ast.sprintf_slot_key + (Hashtbl.find cx.ctxt_slot_keys s)) + slots + end; + htab_put cx.ctxt_post_stmt_slot_drops stmt.id slots + in + + let visit_block_post b = + inner.Walk.visit_block_post b; + let blk_live = Stack.pop live_block_slots in + let stmts = b.node in + let len = Array.length stmts in + if len > 0 + then + begin + let s = stmts.(len-1) in + match s.node with + Ast.STMT_ret _ + | Ast.STMT_be _ -> + () (* Taken care of in visit_stmt_post below. *) + | _ -> + let slots = stk_elts_from_top blk_live in + note_drops s slots + end; + in + + let visit_stmt_pre s = + begin + let init_lval lv_dst = + let dst_slots = lval_slots cx lv_dst in + Array.iter mark_slot_init dst_slots; + in + match s.node with + Ast.STMT_copy (lv_dst, _) + | Ast.STMT_call (lv_dst, _, _) + | Ast.STMT_spawn (lv_dst, _, _, _) + | Ast.STMT_recv (lv_dst, _) + | Ast.STMT_bind (lv_dst, _, _) -> + let prestate = Hashtbl.find cx.ctxt_prestates s.id in + let poststate = Hashtbl.find cx.ctxt_poststates s.id in + let dst_slots = lval_slots cx lv_dst in + let is_initializing slot = + let cid = + Hashtbl.find cx.ctxt_constr_ids (Constr_init slot) + in + let i = int_of_constr cid in + (not (Bits.get prestate i)) && (Bits.get poststate i) + in + let initializing = + List.exists is_initializing (Array.to_list dst_slots) + in + if initializing + then + begin + Hashtbl.add cx.ctxt_copy_stmt_is_init s.id (); + init_lval lv_dst + end; + + | Ast.STMT_init_rec (lv_dst, _, _) + | Ast.STMT_init_tup (lv_dst, _) + | Ast.STMT_init_vec (lv_dst, _, _) + | Ast.STMT_init_str (lv_dst, _) + | Ast.STMT_init_port lv_dst + | Ast.STMT_init_chan (lv_dst, _) -> + init_lval lv_dst + + | Ast.STMT_for f -> + log cx "noting implicit init for slot %d in for-block %d" + (int_of_node (fst f.Ast.for_slot).id) + (int_of_node (f.Ast.for_body.id)); + htab_put implicit_init_block_slots + f.Ast.for_body.id + (fst f.Ast.for_slot).id + + | Ast.STMT_for_each f -> + log cx "noting implicit init for slot %d in for_each-block %d" + (int_of_node (fst f.Ast.for_each_slot).id) + (int_of_node (f.Ast.for_each_body.id)); + htab_put implicit_init_block_slots + f.Ast.for_each_body.id + (fst f.Ast.for_each_slot).id + + + | _ -> () + end; + inner.Walk.visit_stmt_pre s + in + + let visit_stmt_post s = + inner.Walk.visit_stmt_post s; + match s.node with + Ast.STMT_ret _ + | Ast.STMT_be _ -> + let stks = stk_elts_from_top live_block_slots in + let slots = List.concat (List.map stk_elts_from_top stks) in + note_drops s slots + | _ -> () + in + + { inner with + Walk.visit_block_pre = visit_block_pre; + Walk.visit_block_post = visit_block_post; + Walk.visit_stmt_pre = visit_stmt_pre; + Walk.visit_stmt_post = visit_stmt_post + } +;; + +let process_crate + (cx:ctxt) + (crate:Ast.crate) + : unit = + let path = Stack.create () in + let (scopes:(scope list) ref) = ref [] in + let constr_id = ref 0 in + let (graph:(node_id, (node_id list)) Hashtbl.t) = Hashtbl.create 0 in + let setup_passes = + [| + (scope_stack_managing_visitor scopes + (constr_id_assigning_visitor cx scopes constr_id + Walk.empty_visitor)); + (bitmap_assigning_visitor cx constr_id + Walk.empty_visitor); + (scope_stack_managing_visitor scopes + (condition_assigning_visitor cx scopes + Walk.empty_visitor)); + (graph_sequence_building_visitor cx graph + Walk.empty_visitor); + (graph_general_block_structure_building_visitor cx graph + Walk.empty_visitor); + (graph_special_block_structure_building_visitor cx graph + Walk.empty_visitor); + |] + in + let verify_passes = + [| + (scope_stack_managing_visitor scopes + (typestate_verify_visitor cx + Walk.empty_visitor)) + |] + in + let aux_passes = + [| + (lifecycle_visitor cx + Walk.empty_visitor) + |] + in + run_passes cx "typestate setup" path setup_passes (log cx "%s") crate; + run_dataflow cx graph; + run_passes cx "typestate verify" path verify_passes (log cx "%s") crate; + run_passes cx "typestate aux" path aux_passes (log cx "%s") crate +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/me/walk.ml b/src/boot/me/walk.ml new file mode 100644 index 00000000..3486bb16 --- /dev/null +++ b/src/boot/me/walk.ml @@ -0,0 +1,687 @@ + +open Common;; + +(* + * The purpose of this module is just to decouple the AST from the + * various passes that are interested in visiting "parts" of it. + * If the AST shifts, we have better odds of the shift only affecting + * this module rather than all of its clients. Similarly if the + * clients only need to visit part, they only have to define the + * part of the walk they're interested in, making it cheaper to define + * multiple passes. + *) + +type visitor = + { + visit_stmt_pre: Ast.stmt -> unit; + visit_stmt_post: Ast.stmt -> unit; + visit_slot_identified_pre: (Ast.slot identified) -> unit; + visit_slot_identified_post: (Ast.slot identified) -> unit; + visit_expr_pre: Ast.expr -> unit; + visit_expr_post: Ast.expr -> unit; + visit_ty_pre: Ast.ty -> unit; + visit_ty_post: Ast.ty -> unit; + visit_constr_pre: node_id option -> Ast.constr -> unit; + visit_constr_post: node_id option -> Ast.constr -> unit; + visit_pat_pre: Ast.pat -> unit; + visit_pat_post: Ast.pat -> unit; + visit_block_pre: Ast.block -> unit; + visit_block_post: Ast.block -> unit; + + visit_lit_pre: Ast.lit -> unit; + visit_lit_post: Ast.lit -> unit; + visit_lval_pre: Ast.lval -> unit; + visit_lval_post: Ast.lval -> unit; + visit_mod_item_pre: + (Ast.ident + -> ((Ast.ty_param identified) array) + -> Ast.mod_item + -> unit); + visit_mod_item_post: + (Ast.ident + -> ((Ast.ty_param identified) array) + -> Ast.mod_item + -> unit); + visit_obj_fn_pre: + (Ast.obj identified) -> Ast.ident -> (Ast.fn identified) -> unit; + visit_obj_fn_post: + (Ast.obj identified) -> Ast.ident -> (Ast.fn identified) -> unit; + visit_obj_drop_pre: + (Ast.obj identified) -> Ast.block -> unit; + visit_obj_drop_post: + (Ast.obj identified) -> Ast.block -> unit; + visit_crate_pre: Ast.crate -> unit; + visit_crate_post: Ast.crate -> unit; + } +;; + + +let empty_visitor = + { visit_stmt_pre = (fun _ -> ()); + visit_stmt_post = (fun _ -> ()); + visit_slot_identified_pre = (fun _ -> ()); + visit_slot_identified_post = (fun _ -> ()); + visit_expr_pre = (fun _ -> ()); + visit_expr_post = (fun _ -> ()); + visit_ty_pre = (fun _ -> ()); + visit_ty_post = (fun _ -> ()); + visit_constr_pre = (fun _ _ -> ()); + visit_constr_post = (fun _ _ -> ()); + visit_pat_pre = (fun _ -> ()); + visit_pat_post = (fun _ -> ()); + visit_block_pre = (fun _ -> ()); + visit_block_post = (fun _ -> ()); + visit_lit_pre = (fun _ -> ()); + visit_lit_post = (fun _ -> ()); + visit_lval_pre = (fun _ -> ()); + visit_lval_post = (fun _ -> ()); + visit_mod_item_pre = (fun _ _ _ -> ()); + visit_mod_item_post = (fun _ _ _ -> ()); + visit_obj_fn_pre = (fun _ _ _ -> ()); + visit_obj_fn_post = (fun _ _ _ -> ()); + visit_obj_drop_pre = (fun _ _ -> ()); + visit_obj_drop_post = (fun _ _ -> ()); + visit_crate_pre = (fun _ -> ()); + visit_crate_post = (fun _ -> ()); } +;; + +let path_managing_visitor + (path:Ast.name_component Stack.t) + (inner:visitor) + : visitor = + let visit_mod_item_pre ident params item = + Stack.push (Ast.COMP_ident ident) path; + inner.visit_mod_item_pre ident params item + in + let visit_mod_item_post ident params item = + inner.visit_mod_item_post ident params item; + ignore (Stack.pop path) + in + let visit_obj_fn_pre obj ident fn = + Stack.push (Ast.COMP_ident ident) path; + inner.visit_obj_fn_pre obj ident fn + in + let visit_obj_fn_post obj ident fn = + inner.visit_obj_fn_post obj ident fn; + ignore (Stack.pop path) + in + let visit_obj_drop_pre obj b = + Stack.push (Ast.COMP_ident "drop") path; + inner.visit_obj_drop_pre obj b + in + let visit_obj_drop_post obj b = + inner.visit_obj_drop_post obj b; + ignore (Stack.pop path) + in + { inner with + visit_mod_item_pre = visit_mod_item_pre; + visit_mod_item_post = visit_mod_item_post; + visit_obj_fn_pre = visit_obj_fn_pre; + visit_obj_fn_post = visit_obj_fn_post; + visit_obj_drop_pre = visit_obj_drop_pre; + visit_obj_drop_post = visit_obj_drop_post; + } +;; + +let rec name_of ncs = + match ncs with + [] -> bug () "Walk.name_of_ncs: empty path" + | [(Ast.COMP_ident i)] -> Ast.NAME_base (Ast.BASE_ident i) + | [(Ast.COMP_app x)] -> Ast.NAME_base (Ast.BASE_app x) + | [(Ast.COMP_idx _)] -> + bug () "Walk.name_of_ncs: path-name contains COMP_idx" + | nc::ncs -> Ast.NAME_ext (name_of ncs, nc) +;; + +let path_to_name + (path:Ast.name_component Stack.t) + : Ast.name = + name_of (stk_elts_from_top path) +;; + + +let mod_item_logging_visitor + (logfn:string->unit) + (path:Ast.name_component Stack.t) + (inner:visitor) + : visitor = + let path_name _ = Ast.fmt_to_str Ast.fmt_name (path_to_name path) in + let visit_mod_item_pre name params item = + logfn (Printf.sprintf "entering %s" (path_name())); + inner.visit_mod_item_pre name params item; + logfn (Printf.sprintf "entered %s" (path_name())); + in + let visit_mod_item_post name params item = + logfn (Printf.sprintf "leaving %s" (path_name())); + inner.visit_mod_item_post name params item; + logfn (Printf.sprintf "left %s" (path_name())); + in + let visit_obj_fn_pre obj ident fn = + logfn (Printf.sprintf "entering %s" (path_name())); + inner.visit_obj_fn_pre obj ident fn; + logfn (Printf.sprintf "entered %s" (path_name())); + in + let visit_obj_fn_post obj ident fn = + logfn (Printf.sprintf "leaving %s" (path_name())); + inner.visit_obj_fn_post obj ident fn; + logfn (Printf.sprintf "left %s" (path_name())); + in + let visit_obj_drop_pre obj b = + logfn (Printf.sprintf "entering %s" (path_name())); + inner.visit_obj_drop_pre obj b; + logfn (Printf.sprintf "entered %s" (path_name())); + in + let visit_obj_drop_post obj fn = + logfn (Printf.sprintf "leaving %s" (path_name())); + inner.visit_obj_drop_post obj fn; + logfn (Printf.sprintf "left %s" (path_name())); + in + { inner with + visit_mod_item_pre = visit_mod_item_pre; + visit_mod_item_post = visit_mod_item_post; + visit_obj_fn_pre = visit_obj_fn_pre; + visit_obj_fn_post = visit_obj_fn_post; + visit_obj_drop_pre = visit_obj_drop_pre; + visit_obj_drop_post = visit_obj_drop_post; + } +;; + + +let walk_bracketed + (pre:'a -> unit) + (children:unit -> unit) + (post:'a -> unit) + (x:'a) + : unit = + begin + pre x; + children (); + post x + end +;; + + +let walk_option + (walker:'a -> unit) + (opt:'a option) + : unit = + match opt with + None -> () + | Some v -> walker v +;; + + +let rec walk_crate + (v:visitor) + (crate:Ast.crate) + : unit = + walk_bracketed + v.visit_crate_pre + (fun _ -> walk_mod_items v (snd crate.node.Ast.crate_items)) + v.visit_crate_post + crate + +and walk_mod_items + (v:visitor) + (items:Ast.mod_items) + : unit = + Hashtbl.iter (walk_mod_item v) items + + +and walk_mod_item + (v:visitor) + (name:Ast.ident) + (item:Ast.mod_item) + : unit = + let children _ = + match item.node.Ast.decl_item with + Ast.MOD_ITEM_type ty -> walk_ty v ty + | Ast.MOD_ITEM_fn f -> walk_fn v f item.id + | Ast.MOD_ITEM_tag (htup, ttag, _) -> + walk_header_tup v htup; + walk_ty_tag v ttag + | Ast.MOD_ITEM_mod (_, items) -> + walk_mod_items v items + | Ast.MOD_ITEM_obj ob -> + walk_header_slots v ob.Ast.obj_state; + walk_constrs v (Some item.id) ob.Ast.obj_constrs; + let oid = { node = ob; id = item.id } in + Hashtbl.iter (walk_obj_fn v oid) ob.Ast.obj_fns; + match ob.Ast.obj_drop with + None -> () + | Some d -> + v.visit_obj_drop_pre oid d; + walk_block v d; + v.visit_obj_drop_post oid d + + in + walk_bracketed + (v.visit_mod_item_pre name item.node.Ast.decl_params) + children + (v.visit_mod_item_post name item.node.Ast.decl_params) + item + + +and walk_ty_tup v ttup = Array.iter (walk_slot v) ttup + +and walk_ty_tag v ttag = Hashtbl.iter (fun _ t -> walk_ty_tup v t) ttag + +and walk_ty + (v:visitor) + (ty:Ast.ty) + : unit = + let children _ = + match ty with + Ast.TY_tup ttup -> walk_ty_tup v ttup + | Ast.TY_vec s -> walk_slot v s + | Ast.TY_rec trec -> Array.iter (fun (_, s) -> walk_slot v s) trec + | Ast.TY_tag ttag -> walk_ty_tag v ttag + | Ast.TY_iso tiso -> Array.iter (walk_ty_tag v) tiso.Ast.iso_group + | Ast.TY_fn tfn -> walk_ty_fn v tfn + | Ast.TY_obj (_, fns) -> + Hashtbl.iter (fun _ tfn -> walk_ty_fn v tfn) fns + | Ast.TY_chan t -> walk_ty v t + | Ast.TY_port t -> walk_ty v t + | Ast.TY_constrained (t,cs) -> + begin + walk_ty v t; + walk_constrs v None cs + end + | Ast.TY_named _ -> () + | Ast.TY_param _ -> () + | Ast.TY_native _ -> () + | Ast.TY_idx _ -> () + | Ast.TY_mach _ -> () + | Ast.TY_type -> () + | Ast.TY_str -> () + | Ast.TY_char -> () + | Ast.TY_int -> () + | Ast.TY_uint -> () + | Ast.TY_bool -> () + | Ast.TY_nil -> () + | Ast.TY_task -> () + | Ast.TY_any -> () + in + walk_bracketed + v.visit_ty_pre + children + v.visit_ty_post + ty + + +and walk_ty_sig + (v:visitor) + (s:Ast.ty_sig) + : unit = + begin + Array.iter (walk_slot v) s.Ast.sig_input_slots; + walk_constrs v None s.Ast.sig_input_constrs; + walk_slot v s.Ast.sig_output_slot; + end + + +and walk_ty_fn + (v:visitor) + (tfn:Ast.ty_fn) + : unit = + let (tsig, _) = tfn in + walk_ty_sig v tsig + + +and walk_constrs + (v:visitor) + (formal_base:node_id option) + (cs:Ast.constrs) + : unit = + Array.iter (walk_constr v formal_base) cs + +and walk_check_calls + (v:visitor) + (calls:Ast.check_calls) + : unit = + Array.iter + begin + fun (f, args) -> + walk_lval v f; + Array.iter (walk_atom v) args + end + calls + + +and walk_constr + (v:visitor) + (formal_base:node_id option) + (c:Ast.constr) + : unit = + walk_bracketed + (v.visit_constr_pre formal_base) + (fun _ -> ()) + (v.visit_constr_post formal_base) + c + +and walk_header_slots + (v:visitor) + (hslots:Ast.header_slots) + : unit = + Array.iter (fun (s,_) -> walk_slot_identified v s) hslots + +and walk_header_tup + (v:visitor) + (htup:Ast.header_tup) + : unit = + Array.iter (walk_slot_identified v) htup + +and walk_obj_fn + (v:visitor) + (obj:Ast.obj identified) + (ident:Ast.ident) + (f:Ast.fn identified) + : unit = + v.visit_obj_fn_pre obj ident f; + walk_fn v f.node f.id; + v.visit_obj_fn_post obj ident f + +and walk_fn + (v:visitor) + (f:Ast.fn) + (id:node_id) + : unit = + walk_header_slots v f.Ast.fn_input_slots; + walk_constrs v (Some id) f.Ast.fn_input_constrs; + walk_slot_identified v f.Ast.fn_output_slot; + walk_block v f.Ast.fn_body + +and walk_slot_identified + (v:visitor) + (s:Ast.slot identified) + : unit = + walk_bracketed + v.visit_slot_identified_pre + (fun _ -> walk_slot v s.node) + v.visit_slot_identified_post + s + + +and walk_slot + (v:visitor) + (s:Ast.slot) + : unit = + walk_option (walk_ty v) s.Ast.slot_ty + + +and walk_stmt + (v:visitor) + (s:Ast.stmt) + : unit = + let walk_stmt_for + (s:Ast.stmt_for) + : unit = + let (si,_) = s.Ast.for_slot in + let (ss,lv) = s.Ast.for_seq in + walk_slot_identified v si; + Array.iter (walk_stmt v) ss; + walk_lval v lv; + walk_block v s.Ast.for_body + in + let walk_stmt_for_each + (s:Ast.stmt_for_each) + : unit = + let (si,_) = s.Ast.for_each_slot in + let (f,az) = s.Ast.for_each_call in + walk_slot_identified v si; + walk_lval v f; + Array.iter (walk_atom v) az; + walk_block v s.Ast.for_each_head + in + let walk_stmt_while + (s:Ast.stmt_while) + : unit = + let (ss,e) = s.Ast.while_lval in + Array.iter (walk_stmt v) ss; + walk_expr v e; + walk_block v s.Ast.while_body + in + let children _ = + match s.node with + Ast.STMT_log a -> + walk_atom v a + + | Ast.STMT_init_rec (lv, atab, base) -> + walk_lval v lv; + Array.iter (fun (_, _, _, a) -> walk_atom v a) atab; + walk_option (walk_lval v) base; + + | Ast.STMT_init_vec (lv, _, atoms) -> + walk_lval v lv; + Array.iter (walk_atom v) atoms + + | Ast.STMT_init_tup (lv, mut_atoms) -> + walk_lval v lv; + Array.iter (fun (_, _, a) -> walk_atom v a) mut_atoms + + | Ast.STMT_init_str (lv, _) -> + walk_lval v lv + + | Ast.STMT_init_port lv -> + walk_lval v lv + + | Ast.STMT_init_chan (chan,port) -> + walk_option (walk_lval v) port; + walk_lval v chan; + + | Ast.STMT_for f -> + walk_stmt_for f + + | Ast.STMT_for_each f -> + walk_stmt_for_each f + + | Ast.STMT_while w -> + walk_stmt_while w + + | Ast.STMT_do_while w -> + walk_stmt_while w + + | Ast.STMT_if i -> + begin + walk_expr v i.Ast.if_test; + walk_block v i.Ast.if_then; + walk_option (walk_block v) i.Ast.if_else + end + + | Ast.STMT_block b -> + walk_block v b + + | Ast.STMT_copy (lv,e) -> + walk_lval v lv; + walk_expr v e + + | Ast.STMT_copy_binop (lv,_,a) -> + walk_lval v lv; + walk_atom v a + + | Ast.STMT_call (dst,f,az) -> + walk_lval v dst; + walk_lval v f; + Array.iter (walk_atom v) az + + | Ast.STMT_bind (dst, f, az) -> + walk_lval v dst; + walk_lval v f; + Array.iter (walk_opt_atom v) az + + | Ast.STMT_spawn (dst,_,p,az) -> + walk_lval v dst; + walk_lval v p; + Array.iter (walk_atom v) az + + | Ast.STMT_ret ao -> + walk_option (walk_atom v) ao + + | Ast.STMT_put at -> + walk_option (walk_atom v) at + + | Ast.STMT_put_each (lv, ats) -> + walk_lval v lv; + Array.iter (walk_atom v) ats + + (* FIXME: this should have a param array, and invoke the visitors. *) + | Ast.STMT_decl (Ast.DECL_mod_item (id, mi)) -> + walk_mod_item v id mi + + | Ast.STMT_decl (Ast.DECL_slot (_, slot)) -> + walk_slot_identified v slot + + | Ast.STMT_yield + | Ast.STMT_fail -> + () + + | Ast.STMT_join task -> + walk_lval v task + + | Ast.STMT_send (dst,src) -> + walk_lval v dst; + walk_lval v src + + | Ast.STMT_recv (dst,src) -> + walk_lval v dst; + walk_lval v src + + | Ast.STMT_be (lv, ats) -> + walk_lval v lv; + Array.iter (walk_atom v) ats + + | Ast.STMT_check_expr e -> + walk_expr v e + + | Ast.STMT_check (cs, calls) -> + walk_constrs v None cs; + walk_check_calls v calls + + | Ast.STMT_check_if (cs,calls,b) -> + walk_constrs v None cs; + walk_check_calls v calls; + walk_block v b + + | Ast.STMT_prove cs -> + walk_constrs v None cs + + | Ast.STMT_alt_tag + { Ast.alt_tag_lval = lval; Ast.alt_tag_arms = arms } -> + walk_lval v lval; + let walk_arm { node = (pat, block) } = + walk_pat v pat; + walk_block v block + in + Array.iter walk_arm arms + + (* FIXME (issue #20): finish this as needed. *) + | Ast.STMT_slice _ + | Ast.STMT_note _ + | Ast.STMT_alt_type _ + | Ast.STMT_alt_port _ -> + bug () "unimplemented statement type in Walk.walk_stmt" + in + walk_bracketed + v.visit_stmt_pre + children + v.visit_stmt_post + s + + +and walk_expr + (v:visitor) + (e:Ast.expr) + : unit = + let children _ = + match e with + Ast.EXPR_binary (_,aa,ab) -> + walk_atom v aa; + walk_atom v ab + | Ast.EXPR_unary (_,a) -> + walk_atom v a + | Ast.EXPR_atom a -> + walk_atom v a + in + walk_bracketed + v.visit_expr_pre + children + v.visit_expr_post + e + +and walk_atom + (v:visitor) + (a:Ast.atom) + : unit = + match a with + Ast.ATOM_literal ls -> walk_lit v ls.node + | Ast.ATOM_lval lv -> walk_lval v lv + + +and walk_opt_atom + (v:visitor) + (ao:Ast.atom option) + : unit = + match ao with + None -> () + | Some a -> walk_atom v a + + +and walk_lit + (v:visitor) + (li:Ast.lit) + : unit = + walk_bracketed + v.visit_lit_pre + (fun _ -> ()) + v.visit_lit_post + li + + +and walk_lval + (v:visitor) + (lv:Ast.lval) + : unit = + walk_bracketed + v.visit_lval_pre + (fun _ -> ()) + v.visit_lval_post + lv + + +and walk_pat + (v:visitor) + (p:Ast.pat) + : unit = + let rec walk p = + match p with + Ast.PAT_lit lit -> walk_lit v lit + | Ast.PAT_tag (_, pats) -> Array.iter walk pats + | Ast.PAT_slot (si, _) -> walk_slot_identified v si + | Ast.PAT_wild -> () + in + walk_bracketed + v.visit_pat_pre + (fun _ -> walk p) + v.visit_pat_post + p + + +and walk_block + (v:visitor) + (b:Ast.block) + : unit = + walk_bracketed + v.visit_block_pre + (fun _ -> (Array.iter (walk_stmt v) b.node)) + v.visit_block_post + b +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/util/bits.ml b/src/boot/util/bits.ml new file mode 100644 index 00000000..3114bd66 --- /dev/null +++ b/src/boot/util/bits.ml @@ -0,0 +1,107 @@ +type t = { + storage: int array; + nbits: int; +} +;; + +let int_bits = + if max_int = (1 lsl 30) - 1 + then 31 + else 63 +;; + +let create nbits flag = + { storage = Array.make (nbits / int_bits + 1) (if flag then lnot 0 else 0); + nbits = nbits } +;; + +(* + * mutate v0 in place: v0.(i) <- v0.(i) op v1.(i), returning bool indicating + * whether any bits in v0 changed in the process. + *) +let process (op:int -> int -> int) (v0:t) (v1:t) : bool = + let changed = ref false in + assert (v0.nbits = v1.nbits); + assert ((Array.length v0.storage) = (Array.length v1.storage)); + Array.iteri + begin + fun i w1 -> + let w0 = v0.storage.(i) in + let w0' = op w0 w1 in + if not (w0' = w0) + then changed := true; + v0.storage.(i) <- w0'; + end + v1.storage; + !changed +;; + +let union = process (lor) ;; +let intersect = process (land) ;; +let copy = process (fun _ w1 -> w1) ;; + +let get (v:t) (i:int) : bool = + assert (i >= 0); + assert (i < v.nbits); + let w = i / int_bits in + let b = i mod int_bits in + let x = 1 land (v.storage.(w) lsr b) in + x = 1 +;; + +let equal (v1:t) (v0:t) : bool = + v0 = v1 +;; + +let clear (v:t) : unit = + for i = 0 to (Array.length v.storage) - 1 + do + v.storage.(i) <- 0 + done +;; + +let invert (v:t) : unit = + for i = 0 to (Array.length v.storage) - 1 + do + v.storage.(i) <- lnot v.storage.(i) + done +;; + +let set (v:t) (i:int) (x:bool) : unit = + assert (i >= 0); + assert (i < v.nbits); + let w = i / int_bits in + let b = i mod int_bits in + let w0 = v.storage.(w) in + let flag = 1 lsl b in + v.storage.(w) <- + if x + then w0 lor flag + else w0 land (lnot flag) +;; + +let to_list (v:t) : int list = + if v.nbits = 0 + then [] + else + let accum = ref [] in + let word = ref v.storage.(0) in + for i = 0 to (v.nbits-1) do + if i mod int_bits = 0 + then word := v.storage.(i / int_bits); + if (1 land (!word)) = 1 + then accum := i :: (!accum); + word := (!word) lsr 1; + done; + !accum +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/util/common.ml b/src/boot/util/common.ml new file mode 100644 index 00000000..f33a6ea1 --- /dev/null +++ b/src/boot/util/common.ml @@ -0,0 +1,709 @@ +(* + * This module goes near the *bottom* of the dependency DAG, and holds basic + * types shared across all phases of the compiler. + *) + +type filename = string +type pos = (filename * int * int) +type span = {lo: pos; hi: pos} + +type node_id = Node of int +type temp_id = Temp of int +type opaque_id = Opaque of int +type constr_id = Constr of int + +let int_of_node (Node i) = i +let int_of_temp (Temp i) = i +let int_of_opaque (Opaque i) = i +let int_of_constr (Constr i) = i + +type 'a identified = { node: 'a; id: node_id } +;; + +let bug _ = + let k s = failwith s + in Printf.ksprintf k +;; + +exception Semant_err of ((node_id option) * string) +;; + +let err (idopt:node_id option) = + let k s = + raise (Semant_err (idopt, s)) + in + Printf.ksprintf k +;; + +(* Some ubiquitous low-level types. *) + +type target = + Linux_x86_elf + | Win32_x86_pe + | MacOS_x86_macho +;; + +type ty_mach = + TY_u8 + | TY_u16 + | TY_u32 + | TY_u64 + | TY_i8 + | TY_i16 + | TY_i32 + | TY_i64 + | TY_f32 + | TY_f64 +;; + +let mach_is_integral (mach:ty_mach) : bool = + match mach with + TY_i8 | TY_i16 | TY_i32 | TY_i64 + | TY_u8 | TY_u16 | TY_u32 | TY_u64 -> true + | TY_f32 | TY_f64 -> false +;; + + +let mach_is_signed (mach:ty_mach) : bool = + match mach with + TY_i8 | TY_i16 | TY_i32 | TY_i64 -> true + | TY_u8 | TY_u16 | TY_u32 | TY_u64 + | TY_f32 | TY_f64 -> false +;; + +let string_of_ty_mach (mach:ty_mach) : string = + match mach with + TY_u8 -> "u8" + | TY_u16 -> "u16" + | TY_u32 -> "u32" + | TY_u64 -> "u64" + | TY_i8 -> "i8" + | TY_i16 -> "i16" + | TY_i32 -> "i32" + | TY_i64 -> "i64" + | TY_f32 -> "f32" + | TY_f64 -> "f64" +;; + +let bytes_of_ty_mach (mach:ty_mach) : int = + match mach with + TY_u8 -> 1 + | TY_u16 -> 2 + | TY_u32 -> 4 + | TY_u64 -> 8 + | TY_i8 -> 1 + | TY_i16 -> 2 + | TY_i32 -> 4 + | TY_i64 -> 8 + | TY_f32 -> 4 + | TY_f64 -> 8 +;; + +type ty_param_idx = int +;; + +type nabi_conv = + CONV_rust + | CONV_cdecl +;; + +type nabi = { nabi_indirect: bool; + nabi_convention: nabi_conv } +;; + +let string_to_conv (a:string) : nabi_conv option = + match a with + "cdecl" -> Some CONV_cdecl + | "rust" -> Some CONV_rust + | _ -> None + +(* FIXME: remove this when native items go away. *) +let string_to_nabi (s:string) (indirect:bool) : nabi option = + match string_to_conv s with + None -> None + | Some c -> + Some { nabi_indirect = indirect; + nabi_convention = c } +;; + +type required_lib_spec = + { + required_libname: string; + required_prefix: int; + } +;; + +type required_lib = + REQUIRED_LIB_rustrt + | REQUIRED_LIB_crt + | REQUIRED_LIB_rust of required_lib_spec + | REQUIRED_LIB_c of required_lib_spec +;; + +type segment = + SEG_text + | SEG_data +;; + +type fixup = + { fixup_name: string; + mutable fixup_file_pos: int option; + mutable fixup_file_sz: int option; + mutable fixup_mem_pos: int64 option; + mutable fixup_mem_sz: int64 option } +;; + + +let new_fixup (s:string) + : fixup = + { fixup_name = s; + fixup_file_pos = None; + fixup_file_sz = None; + fixup_mem_pos = None; + fixup_mem_sz = None } +;; + + +(* + * Auxiliary hashtable functions. + *) + +let htab_keys (htab:('a,'b) Hashtbl.t) : ('a list) = + Hashtbl.fold (fun k _ accum -> k :: accum) htab [] +;; + +let sorted_htab_keys (tab:('a, 'b) Hashtbl.t) : 'a array = + let keys = Array.of_list (htab_keys tab) in + Array.sort compare keys; + keys +;; + +let htab_vals (htab:('a,'b) Hashtbl.t) : ('b list) = + Hashtbl.fold (fun _ v accum -> v :: accum) htab [] +;; + +let htab_pairs (htab:('a,'b) Hashtbl.t) : (('a * 'b) list) = + Hashtbl.fold (fun k v accum -> (k,v) :: accum) htab [] +;; + +let htab_search (htab:('a,'b) Hashtbl.t) (k:'a) : ('b option) = + if Hashtbl.mem htab k + then Some (Hashtbl.find htab k) + else None +;; + +let htab_search_or_default + (htab:('a,'b) Hashtbl.t) + (k:'a) + (def:unit -> 'b) + : 'b = + match htab_search htab k with + Some v -> v + | None -> def() +;; + +let htab_search_or_add + (htab:('a,'b) Hashtbl.t) + (k:'a) + (mk:unit -> 'b) + : 'b = + let def () = + let v = mk() in + Hashtbl.add htab k v; + v + in + htab_search_or_default htab k def +;; + +let htab_put (htab:('a,'b) Hashtbl.t) (a:'a) (b:'b) : unit = + assert (not (Hashtbl.mem htab a)); + Hashtbl.add htab a b +;; + +let htab_map + (htab:('a,'b) Hashtbl.t) + (f:'a -> 'b -> ('c * 'd)) + : (('c,'d) Hashtbl.t) = + let ntab = Hashtbl.create (Hashtbl.length htab) in + let g a b = + let (c,d) = f a b in + htab_put ntab c d + in + Hashtbl.iter g htab; + ntab +;; + + +let htab_fold + (fn:'a -> 'b -> 'c -> 'c) + (init:'c) + (h:('a, 'b) Hashtbl.t) : 'c = + let accum = ref init in + let f a b = accum := (fn a b (!accum)) in + Hashtbl.iter f h; + !accum +;; + + +let reduce_hash_to_list + (fn:'a -> 'b -> 'c) + (h:('a, 'b) Hashtbl.t) + : ('c list) = + htab_fold (fun a b ls -> (fn a b) :: ls) [] h +;; + +(* + * Auxiliary association-array and association-list operations. + *) +let atab_search (atab:('a * 'b) array) (a:'a) : ('b option) = + let lim = Array.length atab in + let rec step i = + if i = lim + then None + else + let (k,v) = atab.(i) in + if k = a + then Some v + else step (i+1) + in + step 0 + +let atab_find (atab:('a * 'b) array) (a:'a) : 'b = + match atab_search atab a with + None -> bug () "atab_find: element not found" + | Some b -> b + +let atab_mem (atab:('a * 'b) array) (a:'a) : bool = + match atab_search atab a with + None -> false + | Some _ -> true + +let rec ltab_search (ltab:('a * 'b) list) (a:'a) : ('b option) = + match ltab with + [] -> None + | (k,v)::_ when k = a -> Some v + | _::lz -> ltab_search lz a + +let ltab_put (ltab:('a * 'b) list) (a:'a) (b:'b) : (('a * 'b) list) = + assert ((ltab_search ltab a) = None); + (a,b)::ltab + +(* + * Auxiliary list functions. + *) + +let rec list_search (list:'a list) (f:'a -> 'b option) : ('b option) = + match list with + [] -> None + | a::az -> + match f a with + Some b -> Some b + | None -> list_search az f + +let rec list_search_ctxt + (list:'a list) + (f:'a -> 'b option) + : ((('a list) * 'b) option) = + match list with + [] -> None + | a::az -> + match f a with + Some b -> Some (list, b) + | None -> list_search_ctxt az f + +let rec list_drop n ls = + if n = 0 + then ls + else list_drop (n-1) (List.tl ls) +;; + + +(* + * Auxiliary option functions. + *) + +let bool_of_option x = + match x with + Some _ -> true + | None -> false + + +(* + * Auxiliary stack functions. + *) + +let stk_fold (s:'a Stack.t) (f:'a -> 'b -> 'b) (x:'b) : 'b = + let r = ref x in + Stack.iter (fun e -> r := f e (!r)) s; + !r + +let stk_elts_from_bot (s:'a Stack.t) : ('a list) = + stk_fold s (fun x y -> x::y) [] + +let stk_elts_from_top (s:'a Stack.t) : ('a list) = + List.rev (stk_elts_from_bot s) + +let stk_search (s:'a Stack.t) (f:'a -> 'b option) : 'b option = + stk_fold s (fun e accum -> match accum with None -> (f e) | x -> x) None + + +(* + * Auxiliary array functions. + *) + +let arr_search (a:'a array) (f:int -> 'a -> 'b option) : 'b option = + let max = Array.length a in + let rec iter i = + if i < max + then + let v = a.(i) in + let r = f i v in + match r with + Some _ -> r + | None -> iter (i+1) + else + None + in + iter 0 +;; + +let arr_idx (arr:'a array) (a:'a) : int = + let find i v = if v = a then Some i else None in + match arr_search arr find with + None -> bug () "arr_idx: element not found" + | Some i -> i +;; + +let arr_map_partial (a:'a array) (f:'a -> 'b option) : 'b array = + let accum a ls = + match f a with + None -> ls + | Some b -> b :: ls + in + Array.of_list (Array.fold_right accum a []) +;; + +let arr_filter_some (a:'a option array) : 'a array = + arr_map_partial a (fun x -> x) +;; + +let arr_find_dups (a:'a array) : ('a * 'a) option = + let copy = Array.copy a in + Array.sort compare copy; + let lasti = (Array.length copy) - 1 in + let rec find_dups i = + if i < lasti then + let this = copy.(i) in + let next = copy.(i+1) in + (if (this = next) then + Some (this, next) + else + find_dups (i+1)) + else + None + in + find_dups 0 +;; + +let arr_check_dups (a:'a array) (f:'a -> 'a -> unit) : unit = + match arr_find_dups a with + Some (x, y) -> f x y + | None -> () +;; + +let arr_map2 (f:'a -> 'b -> 'c) (a:'a array) (b:'b array) : 'c array = + assert ((Array.length a) = (Array.length b)); + Array.init (Array.length a) (fun i -> f a.(i) b.(i)) +;; + +let arr_for_all (f:int -> 'a -> bool) (a:'a array) : bool = + let len = Array.length a in + let rec loop i = + (i >= len) || ((f i a.(i)) && (loop (i+1))) + in + loop 0 +;; + +let arr_exists (f:int -> 'a -> bool) (a:'a array) : bool = + let len = Array.length a in + let rec loop i = + (i < len) && ((f i a.(i)) || (loop (i+1))) + in + loop 0 +;; + +(* + * Auxiliary queue functions. + *) + +let queue_to_list (q:'a Queue.t) : 'a list = + List.rev (Queue.fold (fun ls elt -> elt :: ls) [] q) +;; + +let queue_to_arr (q:'a Queue.t) : 'a array = + Array.init (Queue.length q) (fun _ -> Queue.take q) +;; + +(* + * Auxiliary int64 functions + *) + +let i64_lt (a:int64) (b:int64) : bool = (Int64.compare a b) < 0 +let i64_le (a:int64) (b:int64) : bool = (Int64.compare a b) <= 0 +let i64_ge (a:int64) (b:int64) : bool = (Int64.compare a b) >= 0 +let i64_gt (a:int64) (b:int64) : bool = (Int64.compare a b) > 0 +let i64_max (a:int64) (b:int64) : int64 = + (if (Int64.compare a b) > 0 then a else b) +let i64_min (a:int64) (b:int64) : int64 = + (if (Int64.compare a b) < 0 then a else b) +let i64_align (align:int64) (v:int64) : int64 = + (assert (align <> 0L)); + let mask = Int64.sub align 1L in + Int64.logand (Int64.lognot mask) (Int64.add v mask) +;; + +let rec i64_for (lo:int64) (hi:int64) (thunk:int64 -> unit) : unit = + if i64_lt lo hi then + begin + thunk lo; + i64_for (Int64.add lo 1L) hi thunk; + end +;; + +let rec i64_for_rev (hi:int64) (lo:int64) (thunk:int64 -> unit) : unit = + if i64_ge hi lo then + begin + thunk hi; + i64_for_rev (Int64.sub hi 1L) lo thunk; + end +;; + + +(* + * Auxiliary int32 functions + *) + +let i32_lt (a:int32) (b:int32) : bool = (Int32.compare a b) < 0 +let i32_le (a:int32) (b:int32) : bool = (Int32.compare a b) <= 0 +let i32_ge (a:int32) (b:int32) : bool = (Int32.compare a b) >= 0 +let i32_gt (a:int32) (b:int32) : bool = (Int32.compare a b) > 0 +let i32_max (a:int32) (b:int32) : int32 = + (if (Int32.compare a b) > 0 then a else b) +let i32_min (a:int32) (b:int32) : int32 = + (if (Int32.compare a b) < 0 then a else b) +let i32_align (align:int32) (v:int32) : int32 = + (assert (align <> 0l)); + let mask = Int32.sub align 1l in + Int32.logand (Int32.lognot mask) (Int32.add v mask) +;; + +(* + * Int-as-unichar functions. + *) + +let bounds lo c hi = (lo <= c) && (c <= hi) +;; + +let escaped_char i = + if bounds 0 i 0x7f + then Char.escaped (Char.chr i) + else + if bounds 0 i 0xffff + then Printf.sprintf "\\u%4.4X" i + else Printf.sprintf "\\U%8.8X" i +;; + +let char_as_utf8 i = + let buf = Buffer.create 8 in + let addb i = + Buffer.add_char buf (Char.chr (i land 0xff)) + in + let fini _ = + Buffer.contents buf + in + let rec add_trailing_bytes n i = + if n = 0 + then fini() + else + begin + addb (0b1000_0000 lor ((i lsr ((n-1) * 6)) land 0b11_1111)); + add_trailing_bytes (n-1) i + end + in + if bounds 0 i 0x7f + then (addb i; fini()) + else + if bounds 0x80 i 0x7ff + then (addb ((0b1100_0000) lor (i lsr 6)); + add_trailing_bytes 1 i) + else + if bounds 0x800 i 0xffff + then (addb ((0b1110_0000) lor (i lsr 12)); + add_trailing_bytes 2 i) + else + if bounds 0x1000 i 0x1f_ffff + then (addb ((0b1111_0000) lor (i lsr 18)); + add_trailing_bytes 3 i) + else + if bounds 0x20_0000 i 0x3ff_ffff + then (addb ((0b1111_1000) lor (i lsr 24)); + add_trailing_bytes 4 i) + else + if bounds 0x400_0000 i 0x7fff_ffff + then (addb ((0b1111_1100) lor (i lsr 30)); + add_trailing_bytes 5 i) + else bug () "bad unicode character 0x%X" i +;; + +(* + * Size-expressions. + *) + + +type size = + SIZE_fixed of int64 + | SIZE_fixup_mem_sz of fixup + | SIZE_fixup_mem_pos of fixup + | SIZE_param_size of ty_param_idx + | SIZE_param_align of ty_param_idx + | SIZE_rt_neg of size + | SIZE_rt_add of size * size + | SIZE_rt_mul of size * size + | SIZE_rt_max of size * size + | SIZE_rt_align of size * size +;; + +let rec string_of_size (s:size) : string = + match s with + SIZE_fixed i -> Printf.sprintf "%Ld" i + | SIZE_fixup_mem_sz f -> Printf.sprintf "%s.mem_sz" f.fixup_name + | SIZE_fixup_mem_pos f -> Printf.sprintf "%s.mem_pos" f.fixup_name + | SIZE_param_size i -> Printf.sprintf "ty[%d].size" i + | SIZE_param_align i -> Printf.sprintf "ty[%d].align" i + | SIZE_rt_neg a -> + Printf.sprintf "-(%s)" (string_of_size a) + | SIZE_rt_add (a, b) -> + Printf.sprintf "(%s + %s)" (string_of_size a) (string_of_size b) + | SIZE_rt_mul (a, b) -> + Printf.sprintf "(%s * %s)" (string_of_size a) (string_of_size b) + | SIZE_rt_max (a, b) -> + Printf.sprintf "max(%s,%s)" (string_of_size a) (string_of_size b) + | SIZE_rt_align (align, off) -> + Printf.sprintf "align(%s,%s)" + (string_of_size align) (string_of_size off) +;; + +let neg_sz (a:size) : size = + match a with + SIZE_fixed a -> SIZE_fixed (Int64.neg a) + | _ -> SIZE_rt_neg a +;; + +let add_sz (a:size) (b:size) : size = + match (a, b) with + (SIZE_fixed a, SIZE_fixed b) -> SIZE_fixed (Int64.add a b) + + | ((SIZE_rt_add ((SIZE_fixed a), c)), SIZE_fixed b) + | ((SIZE_rt_add (c, (SIZE_fixed a))), SIZE_fixed b) + | (SIZE_fixed a, (SIZE_rt_add ((SIZE_fixed b), c))) + | (SIZE_fixed a, (SIZE_rt_add (c, (SIZE_fixed b)))) -> + SIZE_rt_add (SIZE_fixed (Int64.add a b), c) + + | (SIZE_fixed 0L, b) -> b + | (a, SIZE_fixed 0L) -> a + | (a, SIZE_fixed b) -> SIZE_rt_add (SIZE_fixed b, a) + | (a, b) -> SIZE_rt_add (a, b) +;; + +let mul_sz (a:size) (b:size) : size = + match (a, b) with + (SIZE_fixed a, SIZE_fixed b) -> SIZE_fixed (Int64.mul a b) + | (a, SIZE_fixed b) -> SIZE_rt_mul (SIZE_fixed b, a) + | (a, b) -> SIZE_rt_mul (a, b) +;; + +let rec max_sz (a:size) (b:size) : size = + let rec no_negs x = + match x with + SIZE_fixed _ + | SIZE_fixup_mem_sz _ + | SIZE_fixup_mem_pos _ + | SIZE_param_size _ + | SIZE_param_align _ -> true + | SIZE_rt_neg _ -> false + | SIZE_rt_add (a,b) -> (no_negs a) && (no_negs b) + | SIZE_rt_mul (a,b) -> (no_negs a) && (no_negs b) + | SIZE_rt_max (a,b) -> (no_negs a) && (no_negs b) + | SIZE_rt_align (a,b) -> (no_negs a) && (no_negs b) + in + match (a, b) with + (SIZE_rt_align _, SIZE_fixed 1L) -> a + | (SIZE_fixed 1L, SIZE_rt_align _) -> b + | (SIZE_param_align _, SIZE_fixed 1L) -> a + | (SIZE_fixed 1L, SIZE_param_align _) -> b + | (a, SIZE_rt_max (b, c)) when a = b -> max_sz a c + | (a, SIZE_rt_max (b, c)) when a = c -> max_sz a b + | (SIZE_rt_max (b, c), a) when a = b -> max_sz a c + | (SIZE_rt_max (b, c), a) when a = c -> max_sz a b + | (SIZE_fixed a, SIZE_fixed b) -> SIZE_fixed (i64_max a b) + | (SIZE_fixed 0L, b) when no_negs b -> b + | (a, SIZE_fixed 0L) when no_negs a -> b + | (a, SIZE_fixed b) -> max_sz (SIZE_fixed b) a + | (a, b) when a = b -> a + | (a, b) -> SIZE_rt_max (a, b) +;; + +(* FIXME: audit this carefuly; I am not terribly certain of the + * algebraic simplification going on here. Sadly, without it + * the diagnostic output from translation becomes completely + * illegible. + *) + +let align_sz (a:size) (b:size) : size = + let rec alignment_of s = + match s with + SIZE_rt_align (SIZE_fixed n, s) -> + let inner_alignment = alignment_of s in + if (Int64.rem n inner_alignment) = 0L + then inner_alignment + else n + | SIZE_rt_add (SIZE_fixed n, s) + | SIZE_rt_add (s, SIZE_fixed n) -> + let inner_alignment = alignment_of s in + if (Int64.rem n inner_alignment) = 0L + then inner_alignment + else 1L (* This could be lcd(...) or such. *) + | SIZE_rt_max (a, SIZE_fixed 1L) -> alignment_of a + | SIZE_rt_max (SIZE_fixed 1L, b) -> alignment_of b + | _ -> 1L + in + match (a, b) with + (SIZE_fixed a, SIZE_fixed b) -> SIZE_fixed (i64_align a b) + | (SIZE_fixed x, _) when i64_lt x 1L -> bug () "alignment less than 1" + | (SIZE_fixed 1L, b) -> b (* everything is 1-aligned. *) + | (_, SIZE_fixed 0L) -> b (* 0 is everything-aligned. *) + | (SIZE_fixed a, b) -> + let inner_alignment = alignment_of b in + if (Int64.rem a inner_alignment) = 0L + then b + else SIZE_rt_align (SIZE_fixed a, b) + | (SIZE_rt_max (a, SIZE_fixed 1L), b) -> SIZE_rt_align (a, b) + | (SIZE_rt_max (SIZE_fixed 1L, a), b) -> SIZE_rt_align (a, b) + | (a, b) -> SIZE_rt_align (a, b) +;; + +let force_sz (a:size) : int64 = + match a with + SIZE_fixed i -> i + | _ -> bug () "force_sz: forced non-fixed size expression %s" + (string_of_size a) +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) |