diff options
| author | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
|---|---|---|
| committer | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
| commit | d6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch) | |
| tree | b425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/be | |
| parent | Initial git commit. (diff) | |
| download | rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip | |
Populate tree.
Diffstat (limited to 'src/boot/be')
| -rw-r--r-- | src/boot/be/abi.ml | 207 | ||||
| -rw-r--r-- | src/boot/be/asm.ml | 755 | ||||
| -rw-r--r-- | src/boot/be/elf.ml | 1760 | ||||
| -rw-r--r-- | src/boot/be/il.ml | 1135 | ||||
| -rw-r--r-- | src/boot/be/macho.ml | 1184 | ||||
| -rw-r--r-- | src/boot/be/pe.ml | 1149 | ||||
| -rw-r--r-- | src/boot/be/ra.ml | 664 | ||||
| -rw-r--r-- | src/boot/be/x86.ml | 2205 |
8 files changed, 9059 insertions, 0 deletions
diff --git a/src/boot/be/abi.ml b/src/boot/be/abi.ml new file mode 100644 index 00000000..fd9ca750 --- /dev/null +++ b/src/boot/be/abi.ml @@ -0,0 +1,207 @@ + +(* + * The 'abi' structure is pretty much just a grab-bag of machine + * dependencies and structure-layout information. Part of the latter + * is shared with trans and semant. + * + * Make some attempt to factor it as time goes by. + *) + +(* Word offsets for structure fields in rust-internal.h, and elsewhere in + compiler. *) + +let rc_base_field_refcnt = 0;; + +let task_field_refcnt = rc_base_field_refcnt;; +let task_field_stk = task_field_refcnt + 1;; +let task_field_runtime_sp = task_field_stk + 1;; +let task_field_rust_sp = task_field_runtime_sp + 1;; +let task_field_gc_alloc_chain = task_field_rust_sp + 1;; +let task_field_dom = task_field_gc_alloc_chain + 1;; +let n_visible_task_fields = task_field_dom + 1;; + +let dom_field_interrupt_flag = 0;; + +let frame_glue_fns_field_mark = 0;; +let frame_glue_fns_field_drop = 1;; +let frame_glue_fns_field_reloc = 2;; + +let exterior_rc_slot_field_refcnt = 0;; +let exterior_rc_slot_field_body = 1;; + +let exterior_gc_slot_field_next = (-2);; +let exterior_gc_slot_field_ctrl = (-1);; +let exterior_gc_slot_field_refcnt = 0;; +let exterior_gc_slot_field_body = 1;; + +let exterior_rc_header_size = 1;; +let exterior_gc_header_size = 3;; + +let exterior_gc_malloc_return_adjustment = 2;; + +let stk_field_valgrind_id = 0 + 1;; +let stk_field_limit = stk_field_valgrind_id + 1;; +let stk_field_data = stk_field_limit + 1;; + +let binding_size = 2;; +let binding_field_item = 0;; +let binding_field_binding = 1;; + +let general_code_alignment = 16;; + +let tydesc_field_first_param = 0;; +let tydesc_field_size = 1;; +let tydesc_field_align = 2;; +let tydesc_field_copy_glue = 3;; +let tydesc_field_drop_glue = 4;; +let tydesc_field_free_glue = 5;; +let tydesc_field_mark_glue = 6;; +let tydesc_field_obj_drop_glue = 7;; + +let vec_elt_rc = 0;; +let vec_elt_alloc = 1;; +let vec_elt_fill = 2;; +let vec_elt_data = 3;; + +let calltup_elt_out_ptr = 0;; +let calltup_elt_task_ptr = 1;; +let calltup_elt_ty_params = 2;; +let calltup_elt_args = 3;; +let calltup_elt_iterator_args = 4;; +let calltup_elt_indirect_args = 5;; + +let iterator_args_elt_block_fn = 0;; +let iterator_args_elt_outer_frame_ptr = 1;; + +let indirect_args_elt_closure = 0;; + +(* ty_params, src, dst, tydesc, taskptr. *) +let worst_case_glue_call_args = 5;; + +type abi = + { + abi_word_sz: int64; + abi_word_bits: Il.bits; + abi_word_ty: Common.ty_mach; + + abi_is_2addr_machine: bool; + abi_has_pcrel_data: bool; + abi_has_pcrel_code: bool; + + abi_n_hardregs: int; + abi_str_of_hardreg: (int -> string); + + abi_prealloc_quad: (Il.quad' -> Il.quad'); + abi_constrain_vregs: (Il.quad -> Bits.t array -> unit); + + abi_emit_fn_prologue: (Il.emitter + -> Common.size (* framesz *) + -> Common.size (* callsz *) + -> Common.nabi + -> Common.fixup (* grow_task *) + -> unit); + + abi_emit_fn_epilogue: (Il.emitter -> unit); + + abi_emit_fn_tail_call: (Il.emitter + -> int64 (* caller_callsz *) + -> int64 (* caller_argsz *) + -> Il.code (* callee_code *) + -> int64 (* callee_argsz *) + -> unit); + + abi_clobbers: (Il.quad -> Il.hreg list); + + abi_emit_native_call: (Il.emitter + -> Il.cell (* ret *) + -> Common.nabi + -> Common.fixup (* callee *) + -> Il.operand array (* args *) + -> unit); + + abi_emit_native_void_call: (Il.emitter + -> Common.nabi + -> Common.fixup (* callee *) + -> Il.operand array (* args *) + -> unit); + + abi_emit_native_call_in_thunk: (Il.emitter + -> Il.cell (* ret *) + -> Common.nabi + -> Il.operand (* callee *) + -> Il.operand array (* args *) + -> unit); + abi_emit_inline_memcpy: (Il.emitter + -> int64 (* n_bytes *) + -> Il.reg (* dst_ptr *) + -> Il.reg (* src_ptr *) + -> Il.reg (* tmp_reg *) + -> bool (* ascending *) + -> unit); + + (* Global glue. *) + abi_activate: (Il.emitter -> unit); + abi_yield: (Il.emitter -> unit); + abi_unwind: (Il.emitter -> Common.nabi -> Common.fixup -> unit); + abi_get_next_pc_thunk: + ((Il.reg (* output *) + * Common.fixup (* thunk in objfile *) + * (Il.emitter -> unit)) (* fn to make thunk *) + option); + + abi_sp_reg: Il.reg; + abi_fp_reg: Il.reg; + abi_dwarf_fp_reg: int; + abi_tp_cell: Il.cell; + abi_implicit_args_sz: int64; + abi_frame_base_sz: int64; + abi_frame_info_sz: int64; + abi_spill_slot: (Il.spill -> Il.mem); + } +;; + +let load_fixup_addr + (e:Il.emitter) + (out_reg:Il.reg) + (fix:Common.fixup) + (rty:Il.referent_ty) + : unit = + + let cell = Il.Reg (out_reg, Il.AddrTy rty) in + let op = Il.ImmPtr (fix, rty) in + Il.emit e (Il.lea cell op); +;; + +let load_fixup_codeptr + (e:Il.emitter) + (out_reg:Il.reg) + (fixup:Common.fixup) + (has_pcrel_code:bool) + (indirect:bool) + : Il.code = + if indirect + then + begin + load_fixup_addr e out_reg fixup (Il.ScalarTy (Il.AddrTy Il.CodeTy)); + Il.CodePtr (Il.Cell (Il.Mem (Il.RegIn (out_reg, None), + Il.ScalarTy (Il.AddrTy Il.CodeTy)))) + end + else + if has_pcrel_code + then (Il.CodePtr (Il.ImmPtr (fixup, Il.CodeTy))) + else + begin + load_fixup_addr e out_reg fixup Il.CodeTy; + Il.CodePtr (Il.Cell (Il.Reg (out_reg, Il.AddrTy Il.CodeTy))) + end +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/asm.ml b/src/boot/be/asm.ml new file mode 100644 index 00000000..10b2142a --- /dev/null +++ b/src/boot/be/asm.ml @@ -0,0 +1,755 @@ +(* + + Our assembler is an all-at-once, buffer-in-memory job, very simple + minded. I have 1gb of memory on my laptop: I don't expect to ever + emit a program that large with this code. + + It is based on the 'frag' type, which has a variant for every major + type of machine-blob we know how to write (bytes, zstrings, BSS + blocks, words of various sorts). + + A frag can contain symbolic references between the sub-parts of + it. These are accomplished through ref cells we call fixups, and a + 2-pass (resolution and writing) process defined recursively over + the frag structure. + + Fixups are defined by wrapping a frag in a DEF pseudo-frag with + a fixup attached. This will record information about the wrapped + frag -- positions and sizes -- in the fixup during resolution. + + We say "positions" and "sizes" there, in plural, because both a + file number and a memory number is recorded for each concept. + + File numbers refer to positions and sizes in the file we're + generating, and are based on the native int type for the host + platform -- usually 31 or 62 bits -- whereas the expressions that + *use* position fixups tend to promote them up to 32 or 64 bits + somehow. On a 32 bit platform, you can't generate output buffers + with 64-bit positions (ocaml limitation!) + + Memory numbers are 64 bit, always, and refer to sizes and positions + of frags when they are loaded into memory in the target. When + you're generating code for a 32-bit target, or using a memory + number in a context that's less than 64 bits, the value is + range-checked and truncated. But in all other respects, we imagine + a 32-bit address space is just the prefix of the continuing 64-bit + address space. If you need to pin an object at a particular place + from the point 2^32-1, say, you will need to do arithmetic and use + the MEMPOS pseudo-frag, that sets the current memory position as + it's being processed. + + Fixups can be *used* anywhere else in the frag tree, as many times + as you like. If you try to write an unresolved fixup, the emitter + faults. When you specify the use of a fixup, you need to specify + whether you want to use its file size, file position, memory size, + or memory position. + + Positions, addresses, sizes and such, of course, are in bytes. + + Expressions are evaluated to an int64 (signed), even if the + expression is an int32 or less. Depending on how you use the result + of the expression, a range check error may fire (for example, if + the expression evaluates to -2^24 and you're emitting a word16). + + Word endianness is per-file. At the moment this seems acceptable. + + Because we want to be *very specific* about the time and place + arithmetic promotions occur, we define two separate expression-tree + types (with the same polymorphic constructors) and two separate + evaluation functions, with an explicit operator for marking the + promotion-points. + +*) + +open Common;; + + +let log (sess:Session.sess) = + Session.log "asm" + sess.Session.sess_log_asm + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_asm + then thunk () + else () +;; + +exception Bad_fit of string;; +exception Undef_sym of string;; + +type ('a, 'b) expr = + IMM of 'a + | ADD of (('a, 'b) expr) * (('a, 'b) expr) + | SUB of (('a, 'b) expr) * (('a, 'b) expr) + | MUL of (('a, 'b) expr) * (('a, 'b) expr) + | DIV of (('a, 'b) expr) * (('a, 'b) expr) + | REM of (('a, 'b) expr) * (('a, 'b) expr) + | MAX of (('a, 'b) expr) * (('a, 'b) expr) + | ALIGN of (('a, 'b) expr) * (('a, 'b) expr) + | SLL of (('a, 'b) expr) * int + | SLR of (('a, 'b) expr) * int + | SAR of (('a, 'b) expr) * int + | AND of (('a, 'b) expr) * (('a, 'b) expr) + | XOR of (('a, 'b) expr) * (('a, 'b) expr) + | OR of (('a, 'b) expr) * (('a, 'b) expr) + | NOT of (('a, 'b) expr) + | NEG of (('a, 'b) expr) + | F_POS of fixup + | F_SZ of fixup + | M_POS of fixup + | M_SZ of fixup + | EXT of 'b + +type expr32 = (int32, int) expr +;; + +type expr64 = (int64, expr32) expr +;; + + +let rec eval32 (e:expr32) + : int32 = + let chop64 kind name v = + let x = Int64.to_int32 v in + if (Int64.compare v (Int64.of_int32 x)) = 0 then + x + else raise (Bad_fit (kind + ^ " fixup " + ^ name + ^ " overflowed 32 bits in eval32: " + ^ Int64.to_string v)) + in + let expandInt _ _ v = Int32.of_int v in + let checkdef kind name v inj = + match v with + None -> + raise (Undef_sym (kind ^ " fixup " ^ name + ^ " undefined in eval32")) + | Some x -> inj kind name x + in + match e with + IMM i -> i + | ADD (a, b) -> Int32.add (eval32 a) (eval32 b) + | SUB (a, b) -> Int32.sub (eval32 a) (eval32 b) + | MUL (a, b) -> Int32.mul (eval32 a) (eval32 b) + | DIV (a, b) -> Int32.div (eval32 a) (eval32 b) + | REM (a, b) -> Int32.rem (eval32 a) (eval32 b) + | MAX (a, b) -> i32_max (eval32 a) (eval32 b) + | ALIGN (a, b) -> i32_align (eval32 a) (eval32 b) + | SLL (a, b) -> Int32.shift_left (eval32 a) b + | SLR (a, b) -> Int32.shift_right_logical (eval32 a) b + | SAR (a, b) -> Int32.shift_right (eval32 a) b + | AND (a, b) -> Int32.logand (eval32 a) (eval32 b) + | XOR (a, b) -> Int32.logxor (eval32 a) (eval32 b) + | OR (a, b) -> Int32.logor (eval32 a) (eval32 b) + | NOT a -> Int32.lognot (eval32 a) + | NEG a -> Int32.neg (eval32 a) + | F_POS f -> + checkdef "file position" + f.fixup_name f.fixup_file_pos expandInt + | F_SZ f -> + checkdef "file size" + f.fixup_name f.fixup_file_sz expandInt + | M_POS f -> + checkdef "mem position" + f.fixup_name f.fixup_mem_pos chop64 + | M_SZ f -> + checkdef "mem size" f.fixup_name f.fixup_mem_sz chop64 + | EXT i -> Int32.of_int i +;; + +let rec eval64 (e:expr64) + : int64 = + let checkdef kind name v inj = + match v with + None -> + raise (Undef_sym (kind ^ " fixup '" + ^ name ^ "' undefined in eval64")) + | Some x -> inj x + in + match e with + IMM i -> i + | ADD (a, b) -> Int64.add (eval64 a) (eval64 b) + | SUB (a, b) -> Int64.sub (eval64 a) (eval64 b) + | MUL (a, b) -> Int64.mul (eval64 a) (eval64 b) + | DIV (a, b) -> Int64.div (eval64 a) (eval64 b) + | REM (a, b) -> Int64.rem (eval64 a) (eval64 b) + | MAX (a, b) -> i64_max (eval64 a) (eval64 b) + | ALIGN (a, b) -> i64_align (eval64 a) (eval64 b) + | SLL (a, b) -> Int64.shift_left (eval64 a) b + | SLR (a, b) -> Int64.shift_right_logical (eval64 a) b + | SAR (a, b) -> Int64.shift_right (eval64 a) b + | AND (a, b) -> Int64.logand (eval64 a) (eval64 b) + | XOR (a, b) -> Int64.logxor (eval64 a) (eval64 b) + | OR (a, b) -> Int64.logor (eval64 a) (eval64 b) + | NOT a -> Int64.lognot (eval64 a) + | NEG a -> Int64.neg (eval64 a) + | F_POS f -> + checkdef "file position" + f.fixup_name f.fixup_file_pos Int64.of_int + | F_SZ f -> + checkdef "file size" + f.fixup_name f.fixup_file_sz Int64.of_int + | M_POS f -> + checkdef "mem position" + f.fixup_name f.fixup_mem_pos (fun x -> x) + | M_SZ f -> + checkdef "mem size" + f.fixup_name f.fixup_mem_sz (fun x -> x) + | EXT e -> Int64.of_int32 (eval32 e) +;; + + +type frag = + MARK (* MARK == 'PAD (IMM 0L)' *) + | SEQ of frag array + | PAD of int + | BSS of int64 + | MEMPOS of int64 + | BYTE of int + | BYTES of int array + | CHAR of char + | STRING of string + | ZSTRING of string + | ULEB128 of expr64 + | SLEB128 of expr64 + | WORD of (ty_mach * expr64) + | ALIGN_FILE of (int * frag) + | ALIGN_MEM of (int * frag) + | DEF of (fixup * frag) + | RELAX of relaxation + +and relaxation = + { relax_options: frag array; + relax_choice: int ref; } +;; + +exception Relax_more of relaxation;; + +let new_relaxation (frags:frag array) = + RELAX { relax_options = frags; + relax_choice = ref ((Array.length frags) - 1); } +;; + + +let rec write_frag + ~(sess:Session.sess) + ~(lsb0:bool) + ~(buf:Buffer.t) + ~(frag:frag) + : unit = + let relax = Queue.create () in + let bump_relax r = + iflog sess (fun _ -> + log sess "bumping relaxation to position %d" + ((!(r.relax_choice)) - 1)); + r.relax_choice := (!(r.relax_choice)) - 1; + if !(r.relax_choice) < 0 + then bug () "relaxation ran out of options" + in + let rec loop _ = + Queue.clear relax; + Buffer.clear buf; + resolve_frag_full relax frag; + lower_frag ~sess ~lsb0 ~buf ~relax ~frag; + if Queue.is_empty relax + then () + else + begin + iflog sess (fun _ -> log sess "relaxing"); + Queue.iter bump_relax relax; + loop () + end + in + loop () + + +and resolve_frag_full (relax:relaxation Queue.t) (frag:frag) + : unit = + let file_pos = ref 0 in + let mem_pos = ref 0L in + let bump i = + mem_pos := Int64.add (!mem_pos) (Int64.of_int i); + file_pos := (!file_pos) + i + in + + let uleb (e:expr64) : unit = + let rec loop value = + let value = Int64.shift_right_logical value 7 in + if value = 0L + then bump 1 + else + begin + bump 1; + loop value + end + in + loop (eval64 e) + in + + let sleb (e:expr64) : unit = + let rec loop value = + let byte = Int64.logand value 0xf7L in + let value = Int64.shift_right value 7 in + let signbit = Int64.logand byte 0x40L in + if (((value = 0L) && (signbit = 0L)) || + ((value = -1L) && (signbit = 0x40L))) + then bump 1 + else + begin + bump 1; + loop value + end + in + loop (eval64 e) + in + let rec resolve_frag it = + match it with + | MARK -> () + | SEQ frags -> Array.iter resolve_frag frags + | PAD i -> bump i + | BSS i -> mem_pos := Int64.add (!mem_pos) i + | MEMPOS i -> mem_pos := i + | BYTE _ -> bump 1 + | BYTES ia -> bump (Array.length ia) + | CHAR _ -> bump 1 + | STRING s -> bump (String.length s) + | ZSTRING s -> bump ((String.length s) + 1) + | ULEB128 e -> uleb e + | SLEB128 e -> sleb e + | WORD (mach,_) -> bump (bytes_of_ty_mach mach) + | ALIGN_FILE (n, frag) -> + let spill = (!file_pos) mod n in + let pad = (n - spill) mod n in + file_pos := (!file_pos) + pad; + (* + * NB: aligning the file *causes* likewise alignment of + * memory, since we implement "file alignment" by + * padding! + *) + mem_pos := Int64.add (!mem_pos) (Int64.of_int pad); + resolve_frag frag + + | ALIGN_MEM (n, frag) -> + let n64 = Int64.of_int n in + let spill = Int64.rem (!mem_pos) n64 in + let pad = Int64.rem (Int64.sub n64 spill) n64 in + mem_pos := Int64.add (!mem_pos) pad; + resolve_frag frag + + | DEF (f, i) -> + let fpos1 = !file_pos in + let mpos1 = !mem_pos in + resolve_frag i; + f.fixup_file_pos <- Some fpos1; + f.fixup_mem_pos <- Some mpos1; + f.fixup_file_sz <- Some ((!file_pos) - fpos1); + f.fixup_mem_sz <- Some (Int64.sub (!mem_pos) mpos1) + + | RELAX rel -> + begin + try + resolve_frag rel.relax_options.(!(rel.relax_choice)) + with + Bad_fit _ -> Queue.add rel relax + end + in + resolve_frag frag + +and lower_frag + ~(sess:Session.sess) + ~(lsb0:bool) + ~(buf:Buffer.t) + ~(relax:relaxation Queue.t) + ~(frag:frag) + : unit = + let byte (i:int) = + if i < 0 + then raise (Bad_fit "byte underflow") + else + if i > 255 + then raise (Bad_fit "byte overflow") + else Buffer.add_char buf (Char.chr i) + in + + let uleb (e:expr64) : unit = + let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in + let rec loop value = + let byte = Int64.logand value 0x7fL in + let value = Int64.shift_right_logical value 7 in + if value = 0L + then emit1 byte + else + begin + emit1 (Int64.logor byte 0x80L); + loop value + end + in + loop (eval64 e) + in + + let sleb (e:expr64) : unit = + let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in + let rec loop value = + let byte = Int64.logand value 0x7fL in + let value = Int64.shift_right value 7 in + let signbit = Int64.logand byte 0x40L in + if (((value = 0L) && (signbit = 0L)) || + ((value = -1L) && (signbit = 0x40L))) + then emit1 byte + else + begin + emit1 (Int64.logor byte 0x80L); + loop value + end + in + loop (eval64 e) + in + + let word (nbytes:int) (signed:bool) (e:expr64) = + let i = eval64 e in + + (* + FIXME: + + We should really base the entire assembler and memory-position + system on Big_int.big_int, but in ocaml the big_int type lacks, + oh, just about every useful function (no format string spec, no + bitwise ops, blah blah) so it's useless; we're stuck on int64 + for bootstrapping. + + For the time being we're just going to require you to represent + those few unsigned 64 bit terms you have in mind via their + signed bit pattern. Suboptimal but it's the best we can do. + *) + + let (top,bot) = + if nbytes >= 8 + then + if signed + then (Int64.max_int,Int64.min_int) + else (Int64.max_int,0L) + else + if signed + then + let bound = (Int64.shift_left 1L ((8 * nbytes) - 1)) in + (Int64.sub bound 1L, Int64.neg bound) + else + let bound = (Int64.shift_left 1L (8 * nbytes)) in + (Int64.sub bound 1L, 0L) + in + + let mask1 = Int64.logand 0xffL in + let shift = Int64.shift_right_logical in + let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in + if Int64.compare i bot = (-1) + then raise (Bad_fit ("word underflow: " + ^ (Int64.to_string i) + ^ " into " + ^ (string_of_int nbytes) + ^ (if signed then " signed" else " unsigned") + ^ " bytes")) + else + if Int64.compare i top = 1 + then raise (Bad_fit ("word overflow: " + ^ (Int64.to_string i) + ^ " into " + ^ (string_of_int nbytes) + ^ (if signed then " signed" else " unsigned") + ^ " bytes")) + else + if lsb0 + then + for n = 0 to (nbytes - 1) do + emit1 (mask1 (shift i (8*n))) + done + else + for n = (nbytes - 1) downto 0 do + emit1 (mask1 (shift i (8*n))) + done + in + match frag with + MARK -> () + + | SEQ frags -> + Array.iter + begin + fun frag -> + lower_frag ~sess ~lsb0 ~buf ~relax ~frag + end frags + + | PAD c -> + for i = 1 to c do + Buffer.add_char buf '\x00' + done + + | BSS _ -> () + + | MEMPOS _ -> () + + | BYTE i -> byte i + + | BYTES bs -> + iflog sess (fun _ -> log sess "lowering %d bytes" + (Array.length bs)); + Array.iter byte bs + + | CHAR c -> + iflog sess (fun _ -> log sess "lowering char: %c" c); + Buffer.add_char buf c + + | STRING s -> + iflog sess (fun _ -> log sess "lowering string: %s" s); + Buffer.add_string buf s + + | ZSTRING s -> + iflog sess (fun _ -> log sess "lowering zstring: %s" s); + Buffer.add_string buf s; + byte 0 + + | ULEB128 e -> uleb e + | SLEB128 e -> sleb e + + | WORD (m,e) -> + iflog sess + (fun _ -> + log sess "lowering word %s" + (string_of_ty_mach m)); + word (bytes_of_ty_mach m) (mach_is_signed m) e + + | ALIGN_FILE (n, frag) -> + let spill = (Buffer.length buf) mod n in + let pad = (n - spill) mod n in + for i = 1 to pad do + Buffer.add_char buf '\x00' + done; + lower_frag sess lsb0 buf relax frag + + | ALIGN_MEM (_, i) -> lower_frag sess lsb0 buf relax i + | DEF (f, i) -> + iflog sess (fun _ -> log sess "lowering fixup: %s" f.fixup_name); + lower_frag sess lsb0 buf relax i; + + | RELAX rel -> + begin + try + lower_frag sess lsb0 buf relax + rel.relax_options.(!(rel.relax_choice)) + with + Bad_fit _ -> Queue.add rel relax + end +;; + +let fold_flags (f:'a -> int64) (flags:'a list) : int64 = + List.fold_left (Int64.logor) 0x0L (List.map f flags) +;; + +let write_out_frag sess lsb0 frag = + let buf = Buffer.create 0xffff in + let file = Session.filename_of sess.Session.sess_out in + let out = open_out_bin file in + write_frag ~sess ~lsb0 ~buf ~frag; + Buffer.output_buffer out buf; + flush out; + close_out out; + Unix.chmod file 0o755 +;; + +(* Asm-reader stuff for loading info back from mapped files. *) +(* + * Unfortunately the ocaml Bigarray interface takes 'int' indices, so + * f.e. can't do 64-bit offsets / files when running on a 32bit platform. + * Despite the fact that we can possibly produce them. Sigh. Yet another + * "bootstrap compiler limitation". + *) +type asm_reader = + { + asm_seek: int -> unit; + asm_get_u32: unit -> int; + asm_get_u16: unit -> int; + asm_get_u8: unit -> int; + asm_get_uleb: unit -> int; + asm_get_zstr: unit -> string; + asm_get_zstr_padded: int -> string; + asm_get_off: unit -> int; + asm_adv: int -> unit; + asm_adv_u32: unit -> unit; + asm_adv_u16: unit -> unit; + asm_adv_u8: unit -> unit; + asm_adv_zstr: unit -> unit; + asm_close: unit -> unit; + } +;; + +type mmap_arr = + (int, Bigarray.int8_unsigned_elt, Bigarray.c_layout) + Bigarray.Array1.t +;; + +let new_asm_reader (sess:Session.sess) (s:filename) : asm_reader = + iflog sess (fun _ -> log sess "opening file %s" s); + let fd = Unix.openfile s [ Unix.O_RDONLY ] 0 in + let arr = (Bigarray.Array1.map_file + fd ~pos:0L + Bigarray.int8_unsigned + Bigarray.c_layout + false (-1)) + in + let tmp = ref Nativeint.zero in + let buf = Buffer.create 16 in + let off = ref 0 in + let is_open = ref true in + let get_word_as_int (nbytes:int) : int = + assert (!is_open); + let lsb0 = true in + tmp := Nativeint.zero; + if lsb0 + then + for j = nbytes-1 downto 0 do + tmp := Nativeint.shift_left (!tmp) 8; + tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j}) + done + else + for j = 0 to nbytes-1 do + tmp := Nativeint.shift_left (!tmp) 8; + tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j}) + done; + off := (!off) + nbytes; + Nativeint.to_int (!tmp) + in + let get_zstr_padded pad_opt = + assert (!is_open); + let i = ref (!off) in + Buffer.clear buf; + let buflen_ok _ = + match pad_opt with + None -> true + | Some pad -> (Buffer.length buf) < pad + in + while arr.{!i} != 0 && (buflen_ok()) do + Buffer.add_char buf (Char.chr arr.{!i}); + incr i + done; + begin + match pad_opt with + None -> off := (!off) + (Buffer.length buf) + 1 + | Some pad -> + begin + assert ((Buffer.length buf) <= pad); + off := (!off) + pad + end + end; + Buffer.contents buf + in + let bump i = + assert (!is_open); + off := (!off) + i + in + { + asm_seek = (fun i -> off := i); + asm_get_u32 = (fun _ -> get_word_as_int 4); + asm_get_u16 = (fun _ -> get_word_as_int 2); + asm_get_u8 = (fun _ -> get_word_as_int 1); + asm_get_uleb = + begin + fun _ -> + let rec loop result shift = + let byte = arr.{!off} in + incr off; + let result = result lor ((byte land 0x7f) lsl shift) in + if (byte land 0x80) = 0 + then result + else loop result (shift+7) + in + loop 0 0 + end; + asm_get_zstr = (fun _ -> get_zstr_padded None); + asm_get_zstr_padded = (fun pad -> get_zstr_padded (Some pad)); + asm_get_off = (fun _ -> !off); + asm_adv = bump; + asm_adv_u32 = (fun _ -> bump 4); + asm_adv_u16 = (fun _ -> bump 2); + asm_adv_u8 = (fun _ -> bump 1); + asm_adv_zstr = (fun _ -> while arr.{!off} != 0 + do incr off done); + asm_close = (fun _ -> + assert (!is_open); + Unix.close fd; + is_open := false) + } +;; + + +(* + * Metadata note-section encoding / decoding. + * + * Since the only object format that defines a "note" section at all is + * ELF, we model the contents of the metadata section on ELF's + * notes. But the same blob of data is stuck into PE and Mach-O files + * too. + * + * The format is essentially just the ELF note format: + * + * <un-padded-size-of-name:u32> + * <size-of-desc:u32> + * <type-code=0:u32> + * <name="rust":zstr> + * <0-pad to 4-byte boundary> + * <n=meta-count:u32> + * <k1:zstr> <v1:zstr> + * ... + * <kn:zstr> <vn:zstr> + * <0-pad to 4-byte boundary> + * + *) +let note_rust_frags (meta:(Ast.ident * string) array) : frag = + let desc_fixup = new_fixup ".rust.note metadata" in + let desc = + DEF (desc_fixup, + SEQ [| + WORD (TY_u32, IMM (Int64.of_int (Array.length meta))); + SEQ (Array.map + (fun (k,v) -> SEQ [| ZSTRING k; ZSTRING v; |]) + meta); + ALIGN_FILE (4, MARK) |]) + in + let name = "rust" in + let ty = 0L in + let padded_name = SEQ [| ZSTRING name; + ALIGN_FILE (4, MARK) |] + in + let name_sz = IMM (Int64.of_int ((String.length name) + 1)) in + SEQ [| WORD (TY_u32, name_sz); + WORD (TY_u32, F_SZ desc_fixup); + WORD (TY_u32, IMM ty); + padded_name; + desc;|] +;; + +let read_rust_note (ar:asm_reader) : (Ast.ident * string) array = + ar.asm_adv_u32 (); + ar.asm_adv_u32 (); + assert ((ar.asm_get_u32 ()) = 0); + let rust_name = ar.asm_get_zstr_padded 8 in + assert (rust_name = "rust"); + let n = ar.asm_get_u32() in + let meta = Queue.create () in + for i = 1 to n + do + let k = ar.asm_get_zstr() in + let v = ar.asm_get_zstr() in + Queue.add (k,v) meta + done; + queue_to_arr meta +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/elf.ml b/src/boot/be/elf.ml new file mode 100644 index 00000000..56905b2a --- /dev/null +++ b/src/boot/be/elf.ml @@ -0,0 +1,1760 @@ +(* + * Module for writing System V ELF files. + * + * FIXME: Presently heavily infected with x86 and elf32 specificities, + * though they are reasonably well marked. Needs to be refactored to + * depend on abi fields if it's to be usable for other elf + * configurations. + *) + +open Asm;; +open Common;; + +let log (sess:Session.sess) = + Session.log "obj (elf)" + sess.Session.sess_log_obj + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_obj + then thunk () + else () +;; + + +(* Fixed sizes of structs involved in elf32 spec. *) +let elf32_ehsize = 52L;; +let elf32_phentsize = 32L;; +let elf32_shentsize = 40L;; +let elf32_symsize = 16L;; +let elf32_rela_entsz = 0xcL;; + +type ei_class = + ELFCLASSNONE + | ELFCLASS32 + | ELFCLASS64 +;; + + +type ei_data = + ELFDATANONE + | ELFDATA2LSB + | ELFDATA2MSB +;; + + +let elf_identification ei_class ei_data = + SEQ + [| + STRING "\x7fELF"; + BYTES + [| + (match ei_class with (* EI_CLASS *) + ELFCLASSNONE -> 0 + | ELFCLASS32 -> 1 + | ELFCLASS64 -> 2); + (match ei_data with (* EI_DATA *) + ELFDATANONE -> 0 + | ELFDATA2LSB -> 1 + | ELFDATA2MSB -> 2); + 1; (* EI_VERSION = EV_CURRENT *) + 0; (* EI_PAD #7 *) + 0; (* EI_PAD #8 *) + 0; (* EI_PAD #9 *) + 0; (* EI_PAD #A *) + 0; (* EI_PAD #B *) + 0; (* EI_PAD #C *) + 0; (* EI_PAD #D *) + 0; (* EI_PAD #E *) + 0; (* EI_PAD #F *) + |] + |] +;; + + +type e_type = + ET_NONE + | ET_REL + | ET_EXEC + | ET_DYN + | ET_CORE +;; + + +type e_machine = + (* Maybe support more later. *) + EM_NONE + | EM_386 + | EM_X86_64 +;; + + +type e_version = + EV_NONE + | EV_CURRENT +;; + + +let elf32_header + ~(sess:Session.sess) + ~(ei_data:ei_data) + ~(e_type:e_type) + ~(e_machine:e_machine) + ~(e_version:e_version) + ~(e_entry_fixup:fixup) + ~(e_phoff_fixup:fixup) + ~(e_shoff_fixup:fixup) + ~(e_phnum:int64) + ~(e_shnum:int64) + ~(e_shstrndx:int64) + : frag = + let elf_header_fixup = new_fixup "elf header" in + let entry_pos = + if sess.Session.sess_library_mode + then (IMM 0L) + else (M_POS e_entry_fixup) + in + DEF + (elf_header_fixup, + SEQ [| elf_identification ELFCLASS32 ei_data; + WORD (TY_u16, (IMM (match e_type with + ET_NONE -> 0L + | ET_REL -> 1L + | ET_EXEC -> 2L + | ET_DYN -> 3L + | ET_CORE -> 4L))); + WORD (TY_u16, (IMM (match e_machine with + EM_NONE -> 0L + | EM_386 -> 3L + | EM_X86_64 -> 62L))); + WORD (TY_u32, (IMM (match e_version with + EV_NONE -> 0L + | EV_CURRENT -> 1L))); + WORD (TY_u32, entry_pos); + WORD (TY_u32, (F_POS e_phoff_fixup)); + WORD (TY_u32, (F_POS e_shoff_fixup)); + WORD (TY_u32, (IMM 0L)); (* e_flags *) + WORD (TY_u16, (IMM elf32_ehsize)); + WORD (TY_u16, (IMM elf32_phentsize)); + WORD (TY_u16, (IMM e_phnum)); + WORD (TY_u16, (IMM elf32_shentsize)); + WORD (TY_u16, (IMM e_shnum)); + WORD (TY_u16, (IMM e_shstrndx)); + |]) +;; + + +type sh_type = + SHT_NULL + | SHT_PROGBITS + | SHT_SYMTAB + | SHT_STRTAB + | SHT_RELA + | SHT_HASH + | SHT_DYNAMIC + | SHT_NOTE + | SHT_NOBITS + | SHT_REL + | SHT_SHLIB + | SHT_DYNSYM +;; + + +type sh_flags = + SHF_WRITE + | SHF_ALLOC + | SHF_EXECINSTR +;; + + +let section_header + ~(shstring_table_fixup:fixup) + ~(shname_string_fixup:fixup) + ~(sh_type:sh_type) + ~(sh_flags:sh_flags list) + ~(section_fixup:fixup option) + ~(sh_addralign:int64) + ~(sh_entsize:int64) + ~(sh_link:int64 option) + : frag = + SEQ + [| + WORD (TY_i32, (SUB + ((F_POS shname_string_fixup), + (F_POS shstring_table_fixup)))); + WORD (TY_u32, (IMM (match sh_type with + SHT_NULL -> 0L + | SHT_PROGBITS -> 1L + | SHT_SYMTAB -> 2L + | SHT_STRTAB -> 3L + | SHT_RELA -> 4L + | SHT_HASH -> 5L + | SHT_DYNAMIC -> 6L + | SHT_NOTE -> 7L + | SHT_NOBITS -> 8L + | SHT_REL -> 9L + | SHT_SHLIB -> 10L + | SHT_DYNSYM -> 11L))); + WORD (TY_u32, (IMM (fold_flags + (fun f -> match f with + SHF_WRITE -> 0x1L + | SHF_ALLOC -> 0x2L + | SHF_EXECINSTR -> 0x4L) sh_flags))); + WORD (TY_u32, (match section_fixup with + None -> (IMM 0L) + | Some s -> (M_POS s))); + WORD (TY_u32, (match section_fixup with + None -> (IMM 0L) + | Some s -> (F_POS s))); + WORD (TY_u32, (match section_fixup with + None -> (IMM 0L) + | Some s -> (F_SZ s))); + WORD (TY_u32, (IMM (match sh_link with + None -> 0L + | Some i -> i))); + WORD (TY_u32, (IMM 0L)); (* sh_info *) + WORD (TY_u32, (IMM sh_addralign)); + WORD (TY_u32, (IMM sh_entsize)); + |] +;; + + +type p_type = + PT_NULL + | PT_LOAD + | PT_DYNAMIC + | PT_INTERP + | PT_NOTE + | PT_SHLIB + | PT_PHDR +;; + + +type p_flag = + PF_X + | PF_W + | PF_R +;; + + +let program_header + ~(p_type:p_type) + ~(segment_fixup:fixup) + ~(p_flags:p_flag list) + ~(p_align:int64) + : frag = + SEQ + [| + WORD (TY_u32, (IMM (match p_type with + PT_NULL -> 0L + | PT_LOAD -> 1L + | PT_DYNAMIC -> 2L + | PT_INTERP -> 3L + | PT_NOTE -> 4L + | PT_SHLIB -> 5L + | PT_PHDR -> 6L))); + WORD (TY_u32, (F_POS segment_fixup)); + WORD (TY_u32, (M_POS segment_fixup)); + WORD (TY_u32, (M_POS segment_fixup)); + WORD (TY_u32, (F_SZ segment_fixup)); + WORD (TY_u32, (M_SZ segment_fixup)); + WORD (TY_u32, (IMM (fold_flags + (fun f -> + match f with + PF_X -> 0x1L + | PF_W -> 0x2L + | PF_R -> 0x4L) + p_flags))); + WORD (TY_u32, (IMM p_align)); + |] +;; + + +type st_bind = + STB_LOCAL + | STB_GLOBAL + | STB_WEAK +;; + + +type st_type = + STT_NOTYPE + | STT_OBJECT + | STT_FUNC + | STT_SECTION + | STT_FILE +;; + + +(* Special symbol-section indices *) +let shn_UNDEF = 0L;; +let shn_ABS = 0xfff1L;; +let shn_ABS = 0xfff2L;; + + +let symbol + ~(string_table_fixup:fixup) + ~(name_string_fixup:fixup) + ~(sym_target_fixup:fixup option) + ~(st_bind:st_bind) + ~(st_type:st_type) + ~(st_shndx:int64) + : frag = + let st_bind_num = + match st_bind with + STB_LOCAL -> 0L + | STB_GLOBAL -> 1L + | STB_WEAK -> 2L + in + let st_type_num = + match st_type with + STT_NOTYPE -> 0L + | STT_OBJECT -> 1L + | STT_FUNC -> 2L + | STT_SECTION -> 3L + | STT_FILE -> 4L + in + SEQ + [| + WORD (TY_u32, (SUB + ((F_POS name_string_fixup), + (F_POS string_table_fixup)))); + WORD (TY_u32, (match sym_target_fixup with + None -> (IMM 0L) + | Some f -> (M_POS f))); + WORD (TY_u32, (match sym_target_fixup with + None -> (IMM 0L) + | Some f -> (M_SZ f))); + WORD (TY_u8, (* st_info *) + (OR + ((SLL ((IMM st_bind_num), 4)), + (AND ((IMM st_type_num), (IMM 0xfL)))))); + WORD (TY_u8, (IMM 0L)); (* st_other *) + WORD (TY_u16, (IMM st_shndx)); + |] +;; + +type d_tag = + DT_NULL + | DT_NEEDED + | DT_PLTRELSZ + | DT_PLTGOT + | DT_HASH + | DT_STRTAB + | DT_SYMTAB + | DT_RELA + | DT_RELASZ + | DT_RELAENT + | DT_STRSZ + | DT_SYMENT + | DT_INIT + | DT_FINI + | DT_SONAME + | DT_RPATH + | DT_SYMBOLIC + | DT_REL + | DT_RELSZ + | DT_RELENT + | DT_PLTREL + | DT_DEBUG + | DT_TEXTREL + | DT_JMPREL + | DT_BIND_NOW + | DT_INIT_ARRAY + | DT_FINI_ARRAY + | DT_INIT_ARRAYSZ + | DT_FINI_ARRAYSZ + | DT_RUNPATH + | DT_FLAGS + | DT_ENCODING + | DT_PREINIT_ARRAY + | DT_PREINIT_ARRAYSZ +;; + +type elf32_dyn = (d_tag * expr64);; + +let elf32_num_of_dyn_tag tag = + match tag with + DT_NULL -> 0L + | DT_NEEDED -> 1L + | DT_PLTRELSZ -> 2L + | DT_PLTGOT -> 3L + | DT_HASH -> 4L + | DT_STRTAB -> 5L + | DT_SYMTAB -> 6L + | DT_RELA -> 7L + | DT_RELASZ -> 8L + | DT_RELAENT -> 9L + | DT_STRSZ -> 10L + | DT_SYMENT -> 11L + | DT_INIT -> 12L + | DT_FINI -> 13L + | DT_SONAME -> 14L + | DT_RPATH -> 15L + | DT_SYMBOLIC -> 16L + | DT_REL -> 17L + | DT_RELSZ -> 18L + | DT_RELENT -> 19L + | DT_PLTREL -> 20L + | DT_DEBUG -> 21L + | DT_TEXTREL -> 22L + | DT_JMPREL -> 23L + | DT_BIND_NOW -> 24L + | DT_INIT_ARRAY -> 25L + | DT_FINI_ARRAY -> 26L + | DT_INIT_ARRAYSZ -> 27L + | DT_FINI_ARRAYSZ -> 28L + | DT_RUNPATH -> 29L + | DT_FLAGS -> 30L + | DT_ENCODING -> 31L + | DT_PREINIT_ARRAY -> 32L + | DT_PREINIT_ARRAYSZ -> 33L +;; + +let elf32_dyn_frag d = + let (tag, expr) = d in + let tagval = elf32_num_of_dyn_tag tag in + SEQ [| WORD (TY_u32, (IMM tagval)); WORD (TY_u32, expr) |] +;; + +type elf32_386_reloc_type = + R_386_NONE + | R_386_32 + | R_386_PC32 + | R_386_GOT32 + | R_386_PLT32 + | R_386_COPY + | R_386_GLOB_DAT + | R_386_JMP_SLOT + | R_386_RELATIVE + | R_386_GOTOFF + | R_386_GOTPC +;; + + +type elf32_386_rela = + { elf32_386_rela_type: elf32_386_reloc_type; + elf32_386_rela_offset: expr64; + elf32_386_rela_sym: expr64; + elf32_386_rela_addend: expr64 } +;; + +let elf32_386_rela_frag r = + let type_val = + match r.elf32_386_rela_type with + R_386_NONE -> 0L + | R_386_32 -> 1L + | R_386_PC32 -> 2L + | R_386_GOT32 -> 3L + | R_386_PLT32 -> 4L + | R_386_COPY -> 5L + | R_386_GLOB_DAT -> 6L + | R_386_JMP_SLOT -> 7L + | R_386_RELATIVE -> 8L + | R_386_GOTOFF -> 9L + | R_386_GOTPC -> 10L + in + let info_expr = + WORD (TY_u32, + (OR + (SLL ((r.elf32_386_rela_sym), 8), + AND ((IMM 0xffL), (IMM type_val))))) + in + SEQ [| WORD (TY_u32, r.elf32_386_rela_offset); + info_expr; + WORD (TY_u32, r.elf32_386_rela_addend) |] +;; + + +let elf32_linux_x86_file + ~(sess:Session.sess) + ~(crate:Ast.crate) + ~(entry_name:string) + ~(text_frags:(string option, frag) Hashtbl.t) + ~(data_frags:(string option, frag) Hashtbl.t) + ~(rodata_frags:(string option, frag) Hashtbl.t) + ~(required_fixups:(string, fixup) Hashtbl.t) + ~(dwarf:Dwarf.debug_records) + ~(sem:Semant.ctxt) + ~(needed_libs:string array) + : frag = + + (* Procedure Linkage Tables (PLTs), Global Offset Tables + * (GOTs), and the relocations that set them up: + * + * The PLT goes in a section called .plt and GOT in a section called + * .got. The portion of the GOT that holds PLT jump slots goes in a + * section called .got.plt. Dynamic relocations for these jump slots go in + * section .rela.plt. + * + * The easiest way to understand the PLT/GOT system is to draw it: + * + * PLT GOT + * +----------------------+ +----------------------+ + * 0| push &<GOT[1]> 0| <reserved> + * | jmp *GOT[2] 1| <libcookie> + * | 2| & <ld.so:resolve-a-sym> + * 1| jmp *GOT[3] 3| & <'push 0' in PLT[1]> + * | push 0 4| & <'push 1' in PLT[2]> + * | jmp *PLT[0] 5| & <'push 2' in PLT[3]> + * | + * 2| jmp *GOT[4] + * | push 1 + * | jmp *PLT[0] + * | + * 2| jmp *GOT[5] + * | push 2 + * | jmp *PLT[0] + * + * + * In normal user code, we call PLT entries with a call to a + * PC-relative address, the PLT entry, which itself does an indirect + * jump through a slot in the GOT that it also addresses + * PC-relative. This makes the whole scheme PIC. + * + * The linker fills in the GOT on startup. For the first 3, it uses + * its own thinking. For the remainder it needs to be instructed to + * fill them in with "jump slot relocs", type R_386_JUMP_SLOT, each + * of which says in effect which PLT entry it's to point back to and + * which symbol it's to be resolved to later. These relocs go in the + * section .rela.plt. + *) + + let plt0_fixup = new_fixup "PLT[0]" in + let got_prefix = SEQ [| WORD (TY_u32, (IMM 0L)); + WORD (TY_u32, (IMM 0L)); + WORD (TY_u32, (IMM 0L)); |] + in + + let got_cell reg i = + let got_entry_off = Int64.of_int (i*4) in + let got_entry_mem = Il.RegIn (reg, (Some (Asm.IMM got_entry_off))) in + Il.Mem (got_entry_mem, Il.ScalarTy (Il.AddrTy Il.CodeTy)) + in + + let got_code_cell reg i = + Il.CodePtr (Il.Cell (got_cell reg i)) + in + + let plt0_frag = + let reg = Il.Hreg X86.eax in + let e = X86.new_emitter_without_vregs () in + Il.emit e (Il.Push (Il.Cell (got_cell reg 1))); + Il.emit e (Il.jmp Il.JMP (got_code_cell reg 2)); + Il.emit e Il.Nop; + Il.emit e Il.Nop; + Il.emit e Il.Nop; + Il.emit e Il.Nop; + DEF (plt0_fixup, (X86.frags_of_emitted_quads sess e)) + in + + (* + * The existence of the GOT/PLT mish-mash causes, therefore, the + * following new sections: + * + * .plt - the PLT itself, in the r/x text segment + * .got.plt - the PLT-used portion of the GOT, in the r/w segment + * .rela.plt - the dynamic relocs for the GOT-PLT, in the r/x segment + * + * In addition, because we're starting up a dynamically linked executable, + * we have to have several more sections! + * + * .interp - the read-only section that names ld.so + * .dynsym - symbols named by the PLT/GOT entries, r/x segment + * .dynstr - string-names used in those symbols, r/x segment + * .hash - hashtable in which to look these up, r/x segment + * .dynamic - the machine-readable description of the dynamic + * linkage requirements of this elf file, in the + * r/w _DYNAMIC segment + * + * The Dynamic section contains a sequence of 2-word records of type + * d_tag. + * + *) + + (* There are 17 official section headers in the file we're making: *) + (* *) + (* section 0: <null section> *) + (* *) + (* section 1: .interp (segment 1: R+X, INTERP) *) + (* *) + (* section 2: .text (segment 2: R+X, LOAD) *) + (* section 3: .rodata ... *) + (* section 4: .dynsym ... *) + (* section 5: .dynstr ... *) + (* section 6: .hash ... *) + (* section 7: .plt ... *) + (* section 8: .got ... *) + (* section 9: .rela.plt ... *) + (* *) + (* section 10: .data (segment 3: R+W, LOAD) *) + (* section 11: .bss ... *) + (* *) + (* section 12: .dynamic (segment 4: R+W, DYNAMIC) *) + (* *) + (* section 13: .shstrtab (not in a segment) *) + (* section 14: .debug_aranges (segment 2: cont'd) *) + (* section 15: .debug_pubnames ... *) + (* section 14: .debug_info ... *) + (* section 15: .debug_abbrev ... *) + (* section 14: .debug_line ... *) + (* section 15: .debug_frame ... *) + (* section 16: .note..rust (segment 5: NOTE) *) + + let sname s = + new_fixup (Printf.sprintf "string name of '%s' section" s) + in + let null_section_name_fixup = sname "<null>" in + let interp_section_name_fixup = sname ".interp"in + let text_section_name_fixup = sname ".text" in + let rodata_section_name_fixup = sname ".rodata" in + let dynsym_section_name_fixup = sname ".dynsym" in + let dynstr_section_name_fixup = sname ".dynstr" in + let hash_section_name_fixup = sname ".hash" in + let plt_section_name_fixup = sname ".plt" in + let got_plt_section_name_fixup = sname ".got.plt" in + let rela_plt_section_name_fixup = sname ".rela.plt" in + let data_section_name_fixup = sname ".data" in + let bss_section_name_fixup = sname ".bss" in + let dynamic_section_name_fixup = sname ".dynamic" in + let shstrtab_section_name_fixup = sname ".shstrtab" in + let debug_aranges_section_name_fixup = sname ".debug_aranges" in + let debug_pubnames_section_name_fixup = sname ".debug_pubnames" in + let debug_info_section_name_fixup = sname ".debug_info" in + let debug_abbrev_section_name_fixup = sname ".debug_abbrev" in + let debug_line_section_name_fixup = sname ".debug_line" in + let debug_frame_section_name_fixup = sname ".debug_frame" in + let note_rust_section_name_fixup = sname ".note.rust" in + + (* let interpndx = 1L in *) (* Section index of .interp *) + let textndx = 2L in (* Section index of .text *) + let rodatandx = 3L in (* Section index of .rodata *) + let dynsymndx = 4L in (* Section index of .dynsym *) + let dynstrndx = 5L in (* Section index of .dynstr *) + (* let hashndx = 6L in *) (* Section index of .hash *) + (* let pltndx = 7L in *) (* Section index of .plt *) + (* let gotpltndx = 8L in *) (* Section index of .got.plt *) + (* let relapltndx = 9L in *) (* Section index of .rela.plt *) + let datandx = 10L in (* Section index of .data *) + (* let bssndx = 11L in *) (* Section index of .bss *) + (* let dynamicndx = 12L in *) (* Section index of .dynamic *) + let shstrtabndx = 13L in (* Section index of .shstrtab *) + + let section_header_table_fixup = new_fixup ".section header table" in + let interp_section_fixup = new_fixup ".interp section" in + let text_section_fixup = new_fixup ".text section" in + let rodata_section_fixup = new_fixup ".rodata section" in + let dynsym_section_fixup = new_fixup ".dynsym section" in + let dynstr_section_fixup = new_fixup ".dynstr section" in + let hash_section_fixup = new_fixup ".hash section" in + let plt_section_fixup = new_fixup ".plt section" in + let got_plt_section_fixup = new_fixup ".got.plt section" in + let rela_plt_section_fixup = new_fixup ".rela.plt section" in + let data_section_fixup = new_fixup ".data section" in + let bss_section_fixup = new_fixup ".bss section" in + let dynamic_section_fixup = new_fixup ".dynamic section" in + let shstrtab_section_fixup = new_fixup ".shstrtab section" in + let note_rust_section_fixup = new_fixup ".shstrtab section" in + + let shstrtab_section = + SEQ + [| + DEF (null_section_name_fixup, ZSTRING ""); + DEF (interp_section_name_fixup, ZSTRING ".interp"); + DEF (text_section_name_fixup, ZSTRING ".text"); + DEF (rodata_section_name_fixup, ZSTRING ".rodata"); + DEF (dynsym_section_name_fixup, ZSTRING ".dynsym"); + DEF (dynstr_section_name_fixup, ZSTRING ".dynstr"); + DEF (hash_section_name_fixup, ZSTRING ".hash"); + DEF (plt_section_name_fixup, ZSTRING ".plt"); + DEF (got_plt_section_name_fixup, ZSTRING ".got.plt"); + DEF (rela_plt_section_name_fixup, ZSTRING ".rela.plt"); + DEF (data_section_name_fixup, ZSTRING ".data"); + DEF (bss_section_name_fixup, ZSTRING ".bss"); + DEF (dynamic_section_name_fixup, ZSTRING ".dynamic"); + DEF (shstrtab_section_name_fixup, ZSTRING ".shstrtab"); + DEF (debug_aranges_section_name_fixup, ZSTRING ".debug_aranges"); + DEF (debug_pubnames_section_name_fixup, ZSTRING ".debug_pubnames"); + DEF (debug_info_section_name_fixup, ZSTRING ".debug_info"); + DEF (debug_abbrev_section_name_fixup, ZSTRING ".debug_abbrev"); + DEF (debug_line_section_name_fixup, ZSTRING ".debug_line"); + DEF (debug_frame_section_name_fixup, ZSTRING ".debug_frame"); + DEF (note_rust_section_name_fixup, ZSTRING ".note.rust"); + |] + in + + let section_headers = + [| + (* <null> *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: null_section_name_fixup + ~sh_type: SHT_NULL + ~sh_flags: [] + ~section_fixup: None + ~sh_addralign: 0L + ~sh_entsize: 0L + ~sh_link: None); + + (* .interp *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: interp_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some interp_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + (* .text *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: text_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ] + ~section_fixup: (Some text_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .rodata *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: rodata_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some rodata_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .dynsym *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: dynsym_section_name_fixup + ~sh_type: SHT_DYNSYM + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some dynsym_section_fixup) + ~sh_addralign: 8L + ~sh_entsize: elf32_symsize + ~sh_link: (Some dynstrndx) ); + + (* .dynstr *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: dynstr_section_name_fixup + ~sh_type: SHT_STRTAB + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some dynstr_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + (* .hash *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: hash_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some hash_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: 4L + ~sh_link: (Some dynsymndx)); + + (* .plt *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: plt_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ] + ~section_fixup: (Some plt_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: 0L + ~sh_link: None); + + (* .got.plt *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: got_plt_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some got_plt_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: 0L + ~sh_link: None); + + (* .rela.plt *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: rela_plt_section_name_fixup + ~sh_type: SHT_RELA + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some rela_plt_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: elf32_rela_entsz + ~sh_link: (Some dynsymndx)); + + (* .data *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: data_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some data_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .bss *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: bss_section_name_fixup + ~sh_type: SHT_NOBITS + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some bss_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .dynamic *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: dynamic_section_name_fixup + ~sh_type: SHT_DYNAMIC + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some dynamic_section_fixup) + ~sh_addralign: 8L + ~sh_entsize: 0L + ~sh_link: None); + + (* .shstrtab *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: shstrtab_section_name_fixup + ~sh_type: SHT_STRTAB + ~sh_flags: [] + ~section_fixup: (Some shstrtab_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + +(* + FIXME: uncomment the dwarf section headers as you make use of them; + recent gdb versions have got fussier about parsing dwarf and don't + like seeing junk there. +*) + + (* .debug_aranges *) +(* + + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_aranges_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_aranges_fixup) + ~sh_addralign: 8L + ~sh_entsize: 0L + ~sh_link: None); +*) + (* .debug_pubnames *) +(* + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_pubnames_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_pubnames_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); +*) + + (* .debug_info *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_info_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_info_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + (* .debug_abbrev *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_abbrev_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_abbrev_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + (* .debug_line *) +(* + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_line_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_line_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); +*) + + (* .debug_frame *) +(* + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_frame_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_frame_fixup) + ~sh_addralign: 4L + ~sh_entsize: 0L + ~sh_link: None); +*) + + (* .note.rust *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: note_rust_section_name_fixup + ~sh_type: SHT_NOTE + ~sh_flags: [] + ~section_fixup: (Some note_rust_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + |] + in + let section_header_table = SEQ section_headers in + + + (* There are 6 official program headers in the file we're making: *) + (* segment 0: RX / PHDR *) + (* segment 1: R / INTERP *) + (* segment 2: RX / LOAD *) + (* segment 3: RW / LOAD *) + (* segment 4: RW / DYNAMIC *) + (* segment 5: R *) + + let program_header_table_fixup = new_fixup "program header table" in + let segment_0_fixup = new_fixup "segment 0" in + let segment_1_fixup = new_fixup "segment 1" in + let segment_2_fixup = new_fixup "segment 2" in + let segment_3_fixup = new_fixup "segment 3" in + let segment_4_fixup = new_fixup "segment 4" in + let segment_5_fixup = new_fixup "segment 5" in + + let segment_0_align = 4 in + let segment_1_align = 1 in + let segment_2_align = 0x1000 in + let segment_3_align = 0x1000 in + let segment_4_align = 0x1000 in + let segment_5_align = 1 in + + let program_headers = [| + (program_header + ~p_type: PT_PHDR + ~segment_fixup: segment_0_fixup + ~p_flags: [ PF_R; PF_X ] + ~p_align: (Int64.of_int segment_0_align)); + (program_header + ~p_type: PT_INTERP + ~segment_fixup: segment_1_fixup + ~p_flags: [ PF_R ] + ~p_align: (Int64.of_int segment_1_align)); + (program_header + ~p_type: PT_LOAD + ~segment_fixup: segment_2_fixup + ~p_flags: [ PF_R; PF_X ] + ~p_align: (Int64.of_int segment_2_align)); + (program_header + ~p_type: PT_LOAD + ~segment_fixup: segment_3_fixup + ~p_flags: [ PF_R; PF_W ] + ~p_align: (Int64.of_int segment_3_align)); + (program_header + ~p_type: PT_DYNAMIC + ~segment_fixup: segment_4_fixup + ~p_flags: [ PF_R; PF_W ] + ~p_align: (Int64.of_int segment_4_align)); + (program_header + ~p_type: PT_NOTE + ~segment_fixup: segment_5_fixup + ~p_flags: [ PF_R;] + ~p_align: (Int64.of_int segment_5_align)); + |] + in + let program_header_table = SEQ program_headers in + + let e_entry_fixup = new_fixup "entry symbol" in + + let elf_header = + elf32_header + ~sess + ~ei_data: ELFDATA2LSB + ~e_type: ET_DYN + ~e_machine: EM_386 + ~e_version: EV_CURRENT + + ~e_entry_fixup: e_entry_fixup + ~e_phoff_fixup: program_header_table_fixup + ~e_shoff_fixup: section_header_table_fixup + ~e_phnum: (Int64.of_int (Array.length program_headers)) + ~e_shnum: (Int64.of_int (Array.length section_headers)) + ~e_shstrndx: shstrtabndx + in + + let n_syms = ref 1 in (* The empty symbol, implicit. *) + + let data_sym name st_bind fixup = + let name_fixup = new_fixup ("data symbol name fixup: '" ^ name ^ "'") in + let strtab_entry = DEF (name_fixup, ZSTRING name) in + let symtab_entry = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: (Some fixup) + ~st_bind + ~st_type: STT_OBJECT + ~st_shndx: datandx + in + incr n_syms; + (strtab_entry, symtab_entry) + in + + let rodata_sym name st_bind fixup = + let name_fixup = new_fixup ("rodata symbol name fixup: '" ^ name ^ "'") in + let strtab_entry = DEF (name_fixup, ZSTRING name) in + let symtab_entry = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: (Some fixup) + ~st_bind + ~st_type: STT_OBJECT + ~st_shndx: rodatandx + in + incr n_syms; + (strtab_entry, symtab_entry) + in + + let text_sym name st_bind fixup = + let name_fixup = new_fixup ("text symbol name fixup: '" ^ name ^ "'") in + let strtab_frag = DEF (name_fixup, ZSTRING name) in + let symtab_frag = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: (Some fixup) + ~st_bind: st_bind + ~st_type: STT_FUNC + ~st_shndx: textndx + in + incr n_syms; + (strtab_frag, symtab_frag) + in + + let require_sym name st_bind _(*fixup*) = + let name_fixup = + new_fixup ("require symbol name fixup: '" ^ name ^ "'") + in + let strtab_frag = DEF (name_fixup, ZSTRING name) in + let symtab_frag = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: None + ~st_bind + ~st_type: STT_FUNC + ~st_shndx: shn_UNDEF + in + incr n_syms; + (strtab_frag, symtab_frag) + in + + let frags_of_symbol sym_emitter st_bind symname_opt symbody x = + let (strtab_frags, symtab_frags, body_frags) = x in + let (strtab_frag, symtab_frag, body_frag) = + match symname_opt with + None -> (MARK, MARK, symbody) + | Some symname -> + let body_fixup = + new_fixup ("symbol body fixup: '" ^ symname ^ "'") + in + let body = + if symname = entry_name + then DEF (e_entry_fixup, DEF (body_fixup, symbody)) + else DEF (body_fixup, symbody) + in + let (str, sym) = sym_emitter symname st_bind body_fixup in + (str, sym, body) + in + ((strtab_frag :: strtab_frags), + (symtab_frag :: symtab_frags), + (body_frag :: body_frags)) + in + + let frags_of_require_symbol sym_emitter st_bind symname plt_entry_fixup x = + let (i, strtab_frags, symtab_frags, + plt_frags, got_plt_frags, rela_plt_frags) = x in + let (strtab_frag, symtab_frag) = sym_emitter symname st_bind None in + let e = X86.new_emitter_without_vregs () in + let jump_slot_fixup = new_fixup ("jump slot #" ^ string_of_int i) in + let jump_slot_initial_target_fixup = + new_fixup ("jump slot #" ^ string_of_int i ^ " initial target") in + + (* You may notice this PLT entry doesn't look like either of the + * types of "normal" PLT entries outlined in the ELF manual. It is, + * however, just what you get when you combine a PIC PLT entry with + * inline calls to the horrible __i686.get_pc_thunk.ax kludge used + * on x86 to support entering PIC PLTs. We're just doing it *in* + * the PLT entries rather than infecting all the callers with the + * obligation of having the GOT address in a register on + * PLT-entry. + *) + + let plt_frag = + let (reg, _, _) = X86.get_next_pc_thunk in + + Il.emit_full e (Some plt_entry_fixup) [] Il.Dead; + + Abi.load_fixup_addr e reg got_plt_section_fixup Il.CodeTy; + + Il.emit e (Il.jmp Il.JMP (got_code_cell reg (2+i))); + + Il.emit_full e (Some jump_slot_initial_target_fixup) + [] (Il.Push (X86.immi (Int64.of_int i))); + + Il.emit e (Il.jmp Il.JMP (Il.direct_code_ptr plt0_fixup)); + X86.frags_of_emitted_quads sess e + in + let got_plt_frag = + DEF (jump_slot_fixup, + WORD (TY_u32, (M_POS jump_slot_initial_target_fixup))) + in + let rela_plt = + { elf32_386_rela_type = R_386_JMP_SLOT; + elf32_386_rela_offset = (M_POS jump_slot_fixup); + elf32_386_rela_sym = (IMM (Int64.of_int i)); + elf32_386_rela_addend = (IMM 0L) } + in + let rela_plt_frag = elf32_386_rela_frag rela_plt in + (i+1, + (strtab_frag :: strtab_frags), + (symtab_frag :: symtab_frags), + (plt_frag :: plt_frags), + (got_plt_frag :: got_plt_frags), + (rela_plt_frag :: rela_plt_frags)) + in + + (* Emit text export symbols. *) + let (global_text_strtab_frags, global_text_symtab_frags) = + match htab_search sem.Semant.ctxt_native_provided SEG_text with + None -> ([], []) + | Some etab -> + Hashtbl.fold + begin + fun name fix x -> + let (strtab_frags, symtab_frags) = x in + let (str, sym) = text_sym name STB_GLOBAL fix in + (str :: strtab_frags, + sym :: symtab_frags) + end + etab + ([],[]) + in + + (* Emit text fragments (possibly named). *) + let (global_text_strtab_frags, + global_text_symtab_frags, + text_body_frags) = + Hashtbl.fold + (frags_of_symbol text_sym STB_GLOBAL) + text_frags + (global_text_strtab_frags, global_text_symtab_frags, []) + in + + let (local_text_strtab_frags, + local_text_symtab_frags) = + + let symbol_frags_of_code _ code accum = + let (strtab_frags, symtab_frags) = accum in + let fix = code.Semant.code_fixup in + let (strtab_frag, symtab_frag) = + text_sym fix.fixup_name STB_LOCAL fix + in + (strtab_frag :: strtab_frags, + symtab_frag :: symtab_frags) + in + + let symbol_frags_of_glue_code g code accum = + let (strtab_frags, symtab_frags) = accum in + let fix = code.Semant.code_fixup in + let (strtab_frag, symtab_frag) = + text_sym (Semant.glue_str sem g) STB_LOCAL fix + in + (strtab_frag :: strtab_frags, + symtab_frag :: symtab_frags) + in + + let item_str_frags, item_sym_frags = + Hashtbl.fold symbol_frags_of_code + sem.Semant.ctxt_all_item_code ([], []) + in + let glue_str_frags, glue_sym_frags = + Hashtbl.fold symbol_frags_of_glue_code + sem.Semant.ctxt_glue_code ([], []) + in + (item_str_frags @ glue_str_frags, + item_sym_frags @ glue_sym_frags) + in + + (* Emit rodata export symbols. *) + let (rodata_strtab_frags, rodata_symtab_frags) = + match htab_search sem.Semant.ctxt_native_provided SEG_data with + None -> ([], []) + | Some etab -> + Hashtbl.fold + begin + fun name fix x -> + let (strtab_frags, symtab_frags) = x in + let (str, sym) = rodata_sym name STB_GLOBAL fix in + (str :: strtab_frags, + sym :: symtab_frags) + end + etab + ([],[]) + in + + (* Emit rodata fragments (possibly named). *) + let (rodata_strtab_frags, + rodata_symtab_frags, + rodata_body_frags) = + Hashtbl.fold + (frags_of_symbol rodata_sym STB_GLOBAL) + rodata_frags + (rodata_strtab_frags, rodata_symtab_frags, []) + in + + + let (data_strtab_frags, + data_symtab_frags, + data_body_frags) = + Hashtbl.fold (frags_of_symbol data_sym STB_GLOBAL) data_frags ([],[],[]) + in + + let (_, + require_strtab_frags, + require_symtab_frags, + plt_frags, + got_plt_frags, + rela_plt_frags) = + Hashtbl.fold (frags_of_require_symbol require_sym STB_GLOBAL) + required_fixups + (1,[],[],[plt0_frag],[got_prefix],[]) + in + let require_symtab_frags = List.rev require_symtab_frags in + let plt_frags = List.rev plt_frags in + let got_plt_frags = List.rev got_plt_frags in + let rela_plt_frags = List.rev rela_plt_frags in + + let dynamic_needed_strtab_frags = + Array.make (Array.length needed_libs) MARK + in + + let dynamic_frags = + let dynamic_needed_frags = Array.make (Array.length needed_libs) MARK in + for i = 0 to (Array.length needed_libs) - 1 do + let fixup = + new_fixup ("needed library name fixup: " ^ needed_libs.(i)) + in + dynamic_needed_frags.(i) <- + elf32_dyn_frag (DT_NEEDED, SUB (M_POS fixup, + M_POS dynstr_section_fixup)); + dynamic_needed_strtab_frags.(i) <- + DEF (fixup, ZSTRING needed_libs.(i)) + done; + (SEQ [| + SEQ dynamic_needed_frags; + elf32_dyn_frag (DT_STRTAB, M_POS dynstr_section_fixup); + elf32_dyn_frag (DT_STRSZ, M_SZ dynstr_section_fixup); + + elf32_dyn_frag (DT_SYMTAB, M_POS dynsym_section_fixup); + elf32_dyn_frag (DT_SYMENT, IMM elf32_symsize); + + elf32_dyn_frag (DT_HASH, M_POS hash_section_fixup); + elf32_dyn_frag (DT_PLTGOT, M_POS got_plt_section_fixup); + + elf32_dyn_frag (DT_PLTREL, IMM (elf32_num_of_dyn_tag DT_RELA)); + elf32_dyn_frag (DT_PLTRELSZ, M_SZ rela_plt_section_fixup); + elf32_dyn_frag (DT_JMPREL, M_POS rela_plt_section_fixup); + + elf32_dyn_frag (DT_NULL, IMM 0L) + |]) + in + + let null_strtab_fixup = new_fixup "null dynstrtab entry" in + let null_strtab_frag = DEF (null_strtab_fixup, ZSTRING "") in + let null_symtab_frag = (symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: null_strtab_fixup + ~sym_target_fixup: None + ~st_bind: STB_LOCAL + ~st_type: STT_NOTYPE + ~st_shndx: 0L) in + + let dynsym_frags = (null_symtab_frag :: + (require_symtab_frags @ + global_text_symtab_frags @ + local_text_symtab_frags @ + rodata_symtab_frags @ + data_symtab_frags)) + in + + let dynstr_frags = (null_strtab_frag :: + (require_strtab_frags @ + global_text_strtab_frags @ + local_text_strtab_frags @ + rodata_strtab_frags @ + data_strtab_frags @ + (Array.to_list dynamic_needed_strtab_frags))) + in + + let interp_section = + DEF (interp_section_fixup, ZSTRING "/lib/ld-linux.so.2") + in + + let text_section = + DEF (text_section_fixup, + SEQ (Array.of_list text_body_frags)) + in + let rodata_section = + DEF (rodata_section_fixup, + SEQ (Array.of_list rodata_body_frags)) + in + let data_section = + DEF (data_section_fixup, + SEQ (Array.of_list data_body_frags)) + in + let bss_section = + DEF (bss_section_fixup, + SEQ [| |]) + in + let dynsym_section = + DEF (dynsym_section_fixup, + SEQ (Array.of_list dynsym_frags)) + in + let dynstr_section = + DEF (dynstr_section_fixup, + SEQ (Array.of_list dynstr_frags)) + in + + let hash_section = + let n_syms = !n_syms in + + DEF (hash_section_fixup, + (* Worst hashtable ever: one chain. *) + SEQ [| + WORD (TY_u32, IMM 1L); (* nbucket *) + WORD (TY_u32, (* nchain *) + IMM (Int64.of_int n_syms)); + WORD (TY_u32, IMM 1L); (* bucket 0 => symbol 1. *) + SEQ + begin + Array.init + n_syms + (fun i -> + let next = (* chain[i] => if last then 0 else i+1 *) + if i > 0 && i < (n_syms-1) + then Int64.of_int (i+1) + else 0L + in + WORD (TY_u32, IMM next)) + end; + |]) + in + + let plt_section = + DEF (plt_section_fixup, + SEQ (Array.of_list plt_frags)) + in + + let got_plt_section = + DEF (got_plt_section_fixup, + SEQ (Array.of_list got_plt_frags)) + in + + let rela_plt_section = + DEF (rela_plt_section_fixup, + SEQ (Array.of_list rela_plt_frags)) + in + + let dynamic_section = + DEF (dynamic_section_fixup, dynamic_frags) + in + + let note_rust_section = + DEF (note_rust_section_fixup, + (Asm.note_rust_frags crate.node.Ast.crate_meta)) + in + + + let page_alignment = 0x1000 in + + let align_both i = + ALIGN_FILE (page_alignment, + (ALIGN_MEM (page_alignment, i))) + in + + let def_aligned f i = + align_both + (SEQ [| DEF(f,i); + (align_both MARK)|]) + in + + let debug_aranges_section = + def_aligned + sem.Semant.ctxt_debug_aranges_fixup + dwarf.Dwarf.debug_aranges + in + let debug_pubnames_section = + def_aligned + sem.Semant.ctxt_debug_pubnames_fixup + dwarf.Dwarf.debug_pubnames + in + let debug_info_section = + def_aligned + sem.Semant.ctxt_debug_info_fixup + dwarf.Dwarf.debug_info + in + let debug_abbrev_section = + def_aligned + sem.Semant.ctxt_debug_abbrev_fixup + dwarf.Dwarf.debug_abbrev + in + let debug_line_section = + def_aligned + sem.Semant.ctxt_debug_line_fixup + dwarf.Dwarf.debug_line + in + let debug_frame_section = + def_aligned sem.Semant.ctxt_debug_frame_fixup dwarf.Dwarf.debug_frame + in + + let load_address = 0x0804_8000L in + + SEQ + [| + MEMPOS load_address; + ALIGN_FILE + (segment_2_align, + DEF + (segment_2_fixup, + SEQ + [| + DEF (sem.Semant.ctxt_image_base_fixup, MARK); + elf_header; + ALIGN_FILE + (segment_0_align, + DEF + (segment_0_fixup, + SEQ + [| + DEF (program_header_table_fixup, + program_header_table); + |])); + ALIGN_FILE + (segment_1_align, + DEF (segment_1_fixup, interp_section)); + text_section; + rodata_section; + dynsym_section; + dynstr_section; + hash_section; + plt_section; + rela_plt_section; + debug_aranges_section; + debug_pubnames_section; + debug_info_section; + debug_abbrev_section; + debug_line_section; + debug_frame_section; + |])); + ALIGN_FILE + (segment_3_align, + DEF + (segment_3_fixup, + SEQ + [| + data_section; + got_plt_section; + bss_section; + ALIGN_FILE + (segment_4_align, + DEF (segment_4_fixup, + dynamic_section)); + ALIGN_FILE + (segment_5_align, + DEF (segment_5_fixup, + note_rust_section)); + |])); + DEF (shstrtab_section_fixup, + shstrtab_section); + DEF (section_header_table_fixup, + section_header_table); + |] +;; + +let emit_file + (sess:Session.sess) + (crate:Ast.crate) + (code:Asm.frag) + (data:Asm.frag) + (sem:Semant.ctxt) + (dwarf:Dwarf.debug_records) + : unit = + + let text_frags = Hashtbl.create 4 in + let rodata_frags = Hashtbl.create 4 in + let data_frags = Hashtbl.create 4 in + let required_fixups = Hashtbl.create 4 in + + (* + * Startup on elf-linux is more complex than in win32. It's + * thankfully documented in some detail around the net. + * + * - The elf entry address is for _start. + * + * - _start pushes: + * + * eax (should be zero) + * esp (holding the kernel-provided stack end) + * edx (address of _rtld_fini) + * address of _fini + * address of _init + * ecx (argv) + * esi (argc) + * address of main + * + * and then calls __libc_start_main@plt. + * + * - This means any sensible binary has a PLT. Fun. So + * We call into the PLT, which itself is just a bunch + * of indirect jumps through slots in the GOT, and wind + * up in __libc_start_main. Which calls _init, then + * essentially exit(main(argc,argv)). + *) + + + let init_fixup = new_fixup "_init function entry" in + let fini_fixup = new_fixup "_fini function entry" in + let (start_fixup, rust_start_fixup) = + if sess.Session.sess_library_mode + then (None, None) + else (Some (new_fixup "start function entry"), + Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start")) + in + let libc_start_main_fixup = new_fixup "__libc_start_main@plt stub" in + + let start_fn _ = + let start_fixup = + match start_fixup with + None -> bug () "missing start fixup in non-library mode" + | Some s -> s + in + let e = X86.new_emitter_without_vregs () in + let push_r32 r = Il.emit e + (Il.Push (Il.Cell (Il.Reg (Il.Hreg r, Il.ValTy Il.Bits32)))) + in + let push_pos32 = X86.push_pos32 e in + + Il.emit e (Il.unary Il.UMOV (X86.rc X86.ebp) (X86.immi 0L)); + Il.emit e (Il.Pop (X86.rc X86.esi)); + Il.emit e (Il.unary Il.UMOV (X86.rc X86.ecx) (X86.ro X86.esp)); + Il.emit e (Il.binary Il.AND + (X86.rc X86.esp) (X86.ro X86.esp) + (X86.immi 0xfffffffffffffff0L)); + + push_r32 X86.eax; + push_r32 X86.esp; + push_r32 X86.edx; + push_pos32 fini_fixup; + push_pos32 init_fixup; + push_r32 X86.ecx; + push_r32 X86.esi; + push_pos32 start_fixup; + Il.emit e (Il.call + (Il.Reg (Il.Hreg X86.eax, Il.ValTy Il.Bits32)) + (Il.direct_code_ptr libc_start_main_fixup)); + X86.frags_of_emitted_quads sess e + in + + let do_nothing_fn _ = + let e = X86.new_emitter_without_vregs () in + Il.emit e Il.Ret; + X86.frags_of_emitted_quads sess e + in + + let main_fn _ = + match (start_fixup, rust_start_fixup, sem.Semant.ctxt_main_fn_fixup) with + (None, _, _) + | (_, None, _) + | (_, _, None) -> MARK + | (Some start_fixup, + Some rust_start_fixup, + Some main_fn_fixup) -> + let e = X86.new_emitter_without_vregs () in + X86.objfile_start e + ~start_fixup + ~rust_start_fixup + ~main_fn_fixup + ~crate_fixup: sem.Semant.ctxt_crate_fixup + ~indirect_start: false; + X86.frags_of_emitted_quads sess e + in + + let needed_libs = + [| + "libc.so.6"; + "librustrt.so" + |] + in + + let _ = + if not sess.Session.sess_library_mode + then + begin + htab_put text_frags (Some "_start") (start_fn()); + htab_put text_frags (Some "_init") + (DEF (init_fixup, do_nothing_fn())); + htab_put text_frags (Some "_fini") + (DEF (fini_fixup, do_nothing_fn())); + htab_put text_frags (Some "main") (main_fn ()); + htab_put required_fixups "__libc_start_main" libc_start_main_fixup; + end; + htab_put text_frags None code; + htab_put rodata_frags None data; + + Hashtbl.iter + begin + fun _ tab -> + Hashtbl.iter + begin + fun name fixup -> + htab_put required_fixups name fixup + end + tab + end + sem.Semant.ctxt_native_required + in + let all_frags = + elf32_linux_x86_file + ~sess + ~crate + ~entry_name: "_start" + ~text_frags + ~data_frags + ~dwarf + ~sem + ~rodata_frags + ~required_fixups + ~needed_libs + in + write_out_frag sess true all_frags +;; + +let elf_magic = "\x7fELF";; + +let sniff + (sess:Session.sess) + (filename:filename) + : asm_reader option = + try + let stat = Unix.stat filename in + if (stat.Unix.st_kind = Unix.S_REG) && + (stat.Unix.st_size > 4) + then + let ar = new_asm_reader sess filename in + let _ = log sess "sniffing ELF file" in + if (ar.asm_get_zstr_padded 4) = elf_magic + then (ar.asm_seek 0; Some ar) + else None + else + None + with + _ -> None +;; + +let get_sections + (sess:Session.sess) + (ar:asm_reader) + : (string,(int*int)) Hashtbl.t = + let sects = Hashtbl.create 0 in + let _ = log sess "reading sections" in + let elf_id = ar.asm_get_zstr_padded 4 in + let _ = assert (elf_id = elf_magic) in + + let _ = ar.asm_seek 0x10 in + let _ = ar.asm_adv_u16 () in (* e_type *) + let _ = ar.asm_adv_u16 () in (* e_machine *) + let _ = ar.asm_adv_u32 () in (* e_version *) + let _ = ar.asm_adv_u32 () in (* e_entry *) + let _ = ar.asm_adv_u32 () in (* e_phoff *) + let e_shoff = ar.asm_get_u32 () in (* e_shoff *) + let _ = ar.asm_adv_u32 () in (* e_flags *) + let _ = ar.asm_adv_u16 () in (* e_ehsize *) + let _ = ar.asm_adv_u16 () in (* e_phentsize *) + let _ = ar.asm_adv_u16 () in (* e_phnum *) + let e_shentsize = ar.asm_get_u16 () in + let e_shnum = ar.asm_get_u16 () in + let e_shstrndx = ar.asm_get_u16 () in + let _ = log sess + "%d ELF section headers, %d bytes each, starting at 0x%x" + e_shnum e_shentsize e_shoff + in + let _ = log sess "section %d is .shstrtab" e_shstrndx in + + let read_section_hdr n = + let _ = ar.asm_seek (e_shoff + n * e_shentsize) in + let str_off = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in (* sh_type *) + let _ = ar.asm_adv_u32() in (* sh_flags *) + let _ = ar.asm_adv_u32() in (* sh_addr *) + let off = ar.asm_get_u32() in (* sh_off *) + let size = ar.asm_get_u32() in (* sh_size *) + let _ = ar.asm_adv_u32() in (* sh_link *) + let _ = ar.asm_adv_u32() in (* sh_info *) + let _ = ar.asm_adv_u32() in (* sh_addralign *) + let _ = ar.asm_adv_u32() in (* sh_entsize *) + (str_off, off, size) + in + + let (_, str_base, _) = read_section_hdr e_shstrndx in + + let _ = ar.asm_seek e_shoff in + for i = 0 to (e_shnum - 1) do + let (str_off, off, size) = read_section_hdr i in + let _ = ar.asm_seek (str_base + str_off) in + let name = ar.asm_get_zstr() in + log sess "section %d: %s, size %d, offset 0x%x" i name size off; + Hashtbl.add sects name (off, size); + done; + sects +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/il.ml b/src/boot/be/il.ml new file mode 100644 index 00000000..e095e627 --- /dev/null +++ b/src/boot/be/il.ml @@ -0,0 +1,1135 @@ +open Common;; + +(* FIXME (issue #1): thread a session object through this eventually. *) +let log_iltypes = ref false;; + +(* IL type system, very rudimentary. *) + +type bits = + Bits8 + | Bits16 + | Bits32 + | Bits64 +;; + +type scalar_ty = + ValTy of bits + | AddrTy of referent_ty + +and referent_ty = + ScalarTy of scalar_ty + | StructTy of referent_ty array + | UnionTy of referent_ty array + | ParamTy of ty_param_idx (* Thing of current-frame type-param #n *) + | OpaqueTy (* Unknown memory-resident thing. *) + | CodeTy (* Executable machine code. *) + | NilTy (* 0 bits of space. *) +;; + +let (voidptr_t:scalar_ty) = AddrTy OpaqueTy;; +let (codeptr_t:scalar_ty) = AddrTy CodeTy;; + +(* Operands. *) + +type vreg = int ;; +type hreg = int ;; +type label = int ;; +type spill = int ;; + +type reg = + Vreg of vreg + | Hreg of hreg +;; + +type mem = + Abs of Asm.expr64 + | RegIn of (reg * (Asm.expr64 option)) + | Spill of spill +;; + +type typed_reg = (reg * scalar_ty);; +type typed_mem = (mem * referent_ty);; +type typed_imm = (Asm.expr64 * ty_mach);; +type typed_imm_ptr = (fixup * referent_ty);; + +type cell = + Reg of typed_reg + | Mem of typed_mem +;; + +(* + * ImmPtr (a, rty) can be assigned to anything of scalar_ty + * AddrTy rty; the difference is that ImmAddr carries its value + * so can be used in cases where we want to have an immediate + * address constant-propagated through the code to the backend. + *) +type operand = + Cell of cell + | Imm of typed_imm + | ImmPtr of typed_imm_ptr +;; + + +type code = + CodeLabel of label (* Index into current quad block. *) + | CodePtr of operand + | CodeNone +;; + +(* NB: for the most part, we let the register allocator assign spills + * from vregs, and we permanently allocate aliased slots to stack + * locations by static aliasing information early, in layout. + * + * The one awkward case this doesn't handle is when someone tries to + * pass a literal-atom to an alias-slot. This *requires* a memory slot + * but we only realize it rather late, much later than we'd normally + * have thougt to desugar the literal into a temporary. + * + * So in these cases, we let the trans module explicitly demand a + * "Spill n" operand, which the register allocator mops up before it + * gets started on the vregs. + * + * NOTE: if we were more clever we'd integrate vregs and spills like + * this together along with the general notion of a temporary way back + * at the desugaring stage, and use some kind of size-class + * consolidation so that spills with non-overlapping lifetimes could + * share memory. But we're not that clever yet. + *) + + +(* Helpers. *) + +let direct_code_ptr fix = + (CodePtr (ImmPtr (fix, CodeTy))) +;; + +let cell_referent_ty c = + match c with + Reg (_, st) -> ScalarTy st + | Mem (_, rt) -> rt +;; + +let cell_is_nil c = + match c with + Mem (_, NilTy) -> true + | Reg (_, AddrTy NilTy) -> true + | _ -> false +;; + +let operand_is_nil o = + match o with + Cell c -> cell_is_nil c + | _ -> false +;; + +let mem_off (mem:mem) (off:Asm.expr64) : mem = + let addto e = Asm.ADD (off, e) in + match mem with + Abs e -> Abs (addto e) + | RegIn (r, None) -> RegIn (r, Some off) + | RegIn (r, Some e) -> RegIn (r, Some (addto e)) + | Spill _ -> bug () "Adding offset to spill slot" +;; + +let mem_off_imm (mem:mem) (imm:int64) : mem = + mem_off mem (Asm.IMM imm) +;; + + +(* Quads. *) + +type binop = + ADD | SUB + | IMUL | UMUL + | IDIV | UDIV + | IMOD | UMOD + | AND | OR | XOR + | LSL | LSR | ASR +;; + +type unop = + NEG | NOT + | UMOV | IMOV + | ZERO +;; + +type jmpop = + JE | JNE + | JZ | JNZ (* FIXME: Synonyms with JE/JNE in x86, others? *) + | JL | JLE | JG | JGE (* Signed. *) + | JB | JBE | JA | JAE (* Unsigned. *) + | JC | JNC | JO | JNO + | JMP +;; + +type binary = + { + binary_op: binop; + binary_dst: cell; + binary_lhs: operand; + binary_rhs: operand + } +;; + +type unary = + { + unary_op: unop; + unary_dst: cell; + unary_src: operand + } +;; + +type cmp = + { + cmp_lhs: operand; + cmp_rhs: operand + } +;; + +type lea = + { + lea_dst: cell; + lea_src: operand + } +;; + +type jmp = + { + jmp_op: jmpop; + jmp_targ: code; + } +;; + +type call = + { + call_dst: cell; + call_targ: code + } + +type quad' = + Binary of binary + | Unary of unary + | Lea of lea + | Cmp of cmp + | Jmp of jmp + | Push of operand + | Pop of cell + | Call of call + | Debug (* Debug-break pseudo-instruction. *) + | Enter of fixup (* Enter-fixup-block pseudo-instruction. *) + | Leave (* Leave-fixup-block pseudo-instruction. *) + | Ret (* Return to caller. *) + | Nop (* Keep this quad here, emit CPU nop. *) + | Dead (* Keep this quad but emit nothing. *) + | Regfence (* Clobber all hregs. *) + | End (* Space past the end of quads to emit. *) +;; + +type quad = + { quad_fixup: fixup option; + quad_implicits: label list; + quad_body: quad'; } + +type quads = quad array ;; + +(* Query functions. *) + +let cell_is_scalar (c:cell) : bool = + match c with + Reg (_, _) -> true + | Mem (_, ScalarTy _) -> true + | _ -> false +;; + + +let bits_of_ty_mach (tm:ty_mach) : bits = + match tm with + | TY_u8 -> Bits8 + | TY_i8 -> Bits8 + | TY_u16 -> Bits16 + | TY_i16 -> Bits16 + | TY_u32 -> Bits32 + | TY_i32 -> Bits32 + | TY_u64 -> Bits64 + | TY_i64 -> Bits64 + | TY_f32 -> Bits32 + | TY_f64 -> Bits64 +;; + +let cell_scalar_ty (c:cell) : scalar_ty = + match c with + Reg (_, st) -> st + | Mem (_, ScalarTy st) -> st + | _ -> bug () "mem of non-scalar in Il.cell_scalar_ty" +;; + +let operand_scalar_ty (op:operand) : scalar_ty = + match op with + Cell c -> cell_scalar_ty c + | Imm (_, t) -> ValTy (bits_of_ty_mach t) + | ImmPtr (_, t) -> AddrTy t +;; + + +let scalar_ty_bits (word_bits:bits) (st:scalar_ty) : bits = + match st with + ValTy bits -> bits + | AddrTy _ -> word_bits +;; + +let cell_bits (word_bits:bits) (c:cell) : bits = + match c with + Reg (_, st) -> scalar_ty_bits word_bits st + | Mem (_, ScalarTy st) -> scalar_ty_bits word_bits st + | Mem _ -> bug () "mem of non-scalar in Il.cell_bits" +;; + +let operand_bits (word_bits:bits) (op:operand) : bits = + match op with + Cell cell -> cell_bits word_bits cell + | Imm (_, tm) -> bits_of_ty_mach tm + | ImmPtr _ -> word_bits +;; + +let bits_size (bits:bits) : int64 = + match bits with + Bits8 -> 1L + | Bits16 -> 2L + | Bits32 -> 4L + | Bits64 -> 8L +;; + +let bits_align (bits:bits) : int64 = + match bits with + Bits8 -> 1L + | Bits16 -> 2L + | Bits32 -> 4L + | Bits64 -> 8L +;; + +let scalar_ty_size (word_bits:bits) (st:scalar_ty) : int64 = + bits_size (scalar_ty_bits word_bits st) +;; + +let scalar_ty_align (word_bits:bits) (st:scalar_ty) : int64 = + bits_align (scalar_ty_bits word_bits st) +;; + +let rec referent_ty_layout (word_bits:bits) (rt:referent_ty) : (size * size) = + match rt with + ScalarTy st -> (SIZE_fixed (scalar_ty_size word_bits st), + SIZE_fixed (scalar_ty_align word_bits st)) + | StructTy rts -> + begin + let accum (off,align) rt : (size * size) = + let (elt_size, elt_align) = referent_ty_layout word_bits rt in + let elt_off = align_sz elt_align off in + (add_sz elt_off elt_size, max_sz elt_align align) + in + Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts + end + | UnionTy rts -> + begin + let accum (sz,align) rt : (size * size) = + let (elt_size, elt_align) = referent_ty_layout word_bits rt in + (max_sz sz elt_size, max_sz elt_align align) + in + Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts + end + | OpaqueTy -> bug () "opaque ty in referent_ty_layout" + | CodeTy -> bug () "code ty in referent_ty_layout" + | ParamTy i -> (SIZE_param_size i, SIZE_param_align i) + | NilTy -> (SIZE_fixed 0L, SIZE_fixed 1L) + +and referent_ty_size (word_bits:bits) (rt:referent_ty) : size = + (fst (referent_ty_layout word_bits rt)) + +and referent_ty_align (word_bits:bits) (rt:referent_ty) : size = + (snd (referent_ty_layout word_bits rt)) + +;; + +let get_element_offset + (word_bits:bits) + (elts:referent_ty array) + (i:int) + : size = + let elts_before = Array.sub elts 0 i in + let elt_rty = elts.(i) in + let elts_before_size = referent_ty_size word_bits (StructTy elts_before) in + let elt_align = referent_ty_align word_bits elt_rty in + let elt_off = align_sz elt_align elts_before_size in + elt_off +;; + +(* Processor. *) + +type quad_processor = + { qp_reg: (quad_processor -> reg -> reg); + qp_mem: (quad_processor -> mem -> mem); + qp_cell_read: (quad_processor -> cell -> cell); + qp_cell_write: (quad_processor -> cell -> cell); + qp_code: (quad_processor -> code -> code); + qp_op: (quad_processor -> operand -> operand); } +;; + +let identity_processor = + let qp_cell = (fun qp c -> match c with + Reg (r, b) -> Reg (qp.qp_reg qp r, b) + | Mem (a, b) -> Mem (qp.qp_mem qp a, b)) + in + { qp_reg = (fun _ r -> r); + qp_mem = (fun qp a -> match a with + RegIn (r, o) -> RegIn (qp.qp_reg qp r, o) + | Abs _ + | Spill _ -> a); + qp_cell_read = qp_cell; + qp_cell_write = qp_cell; + qp_code = (fun qp c -> match c with + CodePtr op -> CodePtr (qp.qp_op qp op) + | CodeLabel _ + | CodeNone -> c); + qp_op = (fun qp op -> match op with + Cell c -> Cell (qp.qp_cell_read qp c) + | ImmPtr _ -> op + | Imm _ -> op) } +;; + +let process_quad (qp:quad_processor) (q:quad) : quad = + { q with + quad_body = match q.quad_body with + Binary b -> + Binary { b with + binary_dst = qp.qp_cell_write qp b.binary_dst; + binary_lhs = qp.qp_op qp b.binary_lhs; + binary_rhs = qp.qp_op qp b.binary_rhs } + | Unary u -> + Unary { u with + unary_dst = qp.qp_cell_write qp u.unary_dst; + unary_src = qp.qp_op qp u.unary_src } + + | Lea le -> + Lea { lea_dst = qp.qp_cell_write qp le.lea_dst; + lea_src = qp.qp_op qp le.lea_src } + + | Cmp c -> + Cmp { cmp_lhs = qp.qp_op qp c.cmp_lhs; + cmp_rhs = qp.qp_op qp c.cmp_rhs } + + | Jmp j -> + Jmp { j with + jmp_targ = qp.qp_code qp j.jmp_targ } + + | Push op -> + Push (qp.qp_op qp op) + + | Pop c -> + Pop (qp.qp_cell_write qp c) + + | Call c -> + Call { call_dst = qp.qp_cell_write qp c.call_dst; + call_targ = qp.qp_code qp c.call_targ } + + | Ret -> Ret + | Nop -> Nop + | Debug -> Debug + | Regfence -> Regfence + | Enter f -> Enter f + | Leave -> Leave + | Dead -> Dead + | End -> End } +;; + +let visit_quads (qp:quad_processor) (qs:quads) : unit = + Array.iter (fun x ->ignore ( process_quad qp x); ()) qs +;; + +let process_quads (qp:quad_processor) (qs:quads) : quads = + Array.map (process_quad qp) qs +;; + +let rewrite_quads (qp:quad_processor) (qs:quads) : unit = + for i = 0 to ((Array.length qs) - 1) do + qs.(i) <- process_quad qp qs.(i) + done +;; + + +(* A little partial-evaluator to help lowering sizes. *) + +let rec size_to_expr64 (a:size) : Asm.expr64 option = + let binary a b f = + match (size_to_expr64 a, size_to_expr64 b) with + (Some a, Some b) -> Some (f a b) + | _ -> None + in + match a with + SIZE_fixed i -> Some (Asm.IMM i) + | SIZE_fixup_mem_sz f -> Some (Asm.M_SZ f) + | SIZE_fixup_mem_pos f -> Some (Asm.M_POS f) + | SIZE_rt_neg s -> + begin + match (size_to_expr64 s) with + None -> None + | Some s -> Some (Asm.NEG s) + end + | SIZE_rt_add (a, b) -> binary a b (fun a b -> Asm.ADD (a,b)) + | SIZE_rt_mul (a, b) -> binary a b (fun a b -> Asm.MUL (a,b)) + | SIZE_rt_max (a, b) -> binary a b (fun a b -> Asm.MAX (a,b)) + | SIZE_rt_align (a, b) -> binary a b (fun a b -> Asm.ALIGN (a,b)) + | _ -> None +;; + + +(* Formatters. *) + +let string_of_bits (b:bits) : string = + match b with + Bits8 -> "b8" + | Bits16 -> "b16" + | Bits32 -> "b32" + | Bits64 -> "b64" +;; + +let rec string_of_scalar_ty (s:scalar_ty) : string = + match s with + ValTy b -> (string_of_bits b) + | AddrTy r -> (string_of_referent_ty r) ^ "*" + +and string_of_referent_ty (r:referent_ty) : string = + match r with + ScalarTy s -> (string_of_scalar_ty s) + | StructTy rs -> + Printf.sprintf "[%s]" + (String.concat "," + (Array.to_list (Array.map string_of_referent_ty rs))) + | UnionTy rs -> + Printf.sprintf "(%s)" + (String.concat "|" + (Array.to_list (Array.map string_of_referent_ty rs))) + | ParamTy i -> Printf.sprintf "#%d" i + | OpaqueTy -> "?" + | CodeTy -> "!" + | NilTy -> "()" +;; + + +type hreg_formatter = hreg -> string;; + +let string_of_reg (f:hreg_formatter) (r:reg) : string = + match r with + Vreg i -> Printf.sprintf "<v%d>" i + | Hreg i -> f i +;; + +let rec string_of_expr64 (e64:Asm.expr64) : string = + let bin op a b = + Printf.sprintf "(%s %s %s)" (string_of_expr64 a) op (string_of_expr64 b) + in + let bini op a b = + Printf.sprintf "(%s %s %d)" (string_of_expr64 a) op b + in + match e64 with + Asm.IMM i when (i64_lt i 0L) -> Printf.sprintf "-0x%Lx" (Int64.neg i) + | Asm.IMM i -> Printf.sprintf "0x%Lx" i + | Asm.ADD (a,b) -> bin "+" a b + | Asm.SUB (a,b) -> bin "-" a b + | Asm.MUL (a,b) -> bin "*" a b + | Asm.DIV (a,b) -> bin "/" a b + | Asm.REM (a,b) -> bin "%" a b + | Asm.MAX (a,b) -> + Printf.sprintf "(max %s %s)" + (string_of_expr64 a) (string_of_expr64 b) + | Asm.ALIGN (a,b) -> + Printf.sprintf "(align %s %s)" + (string_of_expr64 a) (string_of_expr64 b) + | Asm.SLL (a,b) -> bini "<<" a b + | Asm.SLR (a,b) -> bini ">>" a b + | Asm.SAR (a,b) -> bini ">>>" a b + | Asm.AND (a,b) -> bin "&" a b + | Asm.XOR (a,b) -> bin "xor" a b + | Asm.OR (a,b) -> bin "|" a b + | Asm.NOT a -> Printf.sprintf "(not %s)" (string_of_expr64 a) + | Asm.NEG a -> Printf.sprintf "-%s" (string_of_expr64 a) + | Asm.F_POS f -> Printf.sprintf "<%s>.fpos" f.fixup_name + | Asm.F_SZ f -> Printf.sprintf "<%s>.fsz" f.fixup_name + | Asm.M_POS f -> Printf.sprintf "<%s>.mpos" f.fixup_name + | Asm.M_SZ f -> Printf.sprintf "<%s>.msz" f.fixup_name + | Asm.EXT _ -> "??ext??" +;; + +let string_of_off (e:Asm.expr64 option) : string = + match e with + None -> "" + | Some (Asm.IMM i) when (i64_lt i 0L) -> + Printf.sprintf " - 0x%Lx" (Int64.neg i) + | Some e' -> " + " ^ (string_of_expr64 e') +;; + +let string_of_mem (f:hreg_formatter) (a:mem) : string = + match a with + Abs e -> + Printf.sprintf "[%s]" (string_of_expr64 e) + | RegIn (r, off) -> + Printf.sprintf "[%s%s]" (string_of_reg f r) (string_of_off off) + | Spill i -> + Printf.sprintf "[<spill %d>]" i +;; +let string_of_cell (f:hreg_formatter) (c:cell) : string = + match c with + Reg (r,ty) -> + if !log_iltypes + then + Printf.sprintf "%s:%s" (string_of_reg f r) (string_of_scalar_ty ty) + else + Printf.sprintf "%s" (string_of_reg f r) + | Mem (a,ty) -> + if !log_iltypes + then + Printf.sprintf "%s:%s" + (string_of_mem f a) (string_of_referent_ty ty) + else + Printf.sprintf "%s" (string_of_mem f a) +;; + +let string_of_operand (f:hreg_formatter) (op:operand) : string = + match op with + Cell c -> string_of_cell f c + | ImmPtr (f, ty) -> + if !log_iltypes + then + Printf.sprintf "$<%s>.mpos:%s*" + f.fixup_name (string_of_referent_ty ty) + else + Printf.sprintf "$<%s>.mpos" f.fixup_name + | Imm (i, ty) -> + if !log_iltypes + then + Printf.sprintf "$%s:%s" (string_of_expr64 i) (string_of_ty_mach ty) + else + Printf.sprintf "$%s" (string_of_expr64 i) +;; + + +let string_of_code (f:hreg_formatter) (c:code) : string = + match c with + CodeLabel lab -> Printf.sprintf "<label %d>" lab + | CodePtr op -> string_of_operand f op + | CodeNone -> "<none>" +;; + + +let string_of_binop (op:binop) : string = + match op with + ADD -> "add" + | SUB -> "sub" + | IMUL -> "imul" + | UMUL -> "umul" + | IDIV -> "idiv" + | UDIV -> "udiv" + | IMOD -> "imod" + | UMOD -> "umod" + | AND -> "and" + | OR -> "or" + | XOR -> "xor" + | LSL -> "lsl" + | LSR -> "lsr" + | ASR -> "asr" +;; + +let string_of_unop (op:unop) : string = + match op with + NEG -> "neg" + | NOT -> "not" + | UMOV -> "umov" + | IMOV -> "imov" + | ZERO -> "zero" +;; + +let string_of_jmpop (op:jmpop) : string = + match op with + JE -> "je" + | JNE -> "jne" + | JL -> "jl" + | JLE -> "jle" + | JG -> "jg" + | JGE -> "jge" + | JB -> "jb" + | JBE -> "jbe" + | JA -> "ja" + | JAE -> "jae" + | JC -> "jc" + | JNC ->"jnc" + | JO -> "jo" + | JNO -> "jno" + | JZ -> "jz" + | JNZ ->"jnz" + | JMP -> "jmp" +;; + +let string_of_quad (f:hreg_formatter) (q:quad) : string = + match q.quad_body with + Binary b -> + Printf.sprintf "%s = %s %s %s" + (string_of_cell f b.binary_dst) + (string_of_operand f b.binary_lhs) + (string_of_binop b.binary_op) + (string_of_operand f b.binary_rhs) + + | Unary u -> + Printf.sprintf "%s = %s %s" + (string_of_cell f u.unary_dst) + (string_of_unop u.unary_op) + (string_of_operand f u.unary_src) + + | Cmp c -> + Printf.sprintf "cmp %s %s" + (string_of_operand f c.cmp_lhs) + (string_of_operand f c.cmp_rhs) + + | Lea le -> + Printf.sprintf "lea %s %s" + (string_of_cell f le.lea_dst) + (string_of_operand f le.lea_src) + + | Jmp j -> + Printf.sprintf "%s %s" + (string_of_jmpop j.jmp_op) + (string_of_code f j.jmp_targ) + + | Push op -> + Printf.sprintf "push %s" + (string_of_operand f op) + + | Pop c -> + Printf.sprintf "%s = pop" + (string_of_cell f c) + + | Call c -> + Printf.sprintf "%s = call %s" + (string_of_cell f c.call_dst) + (string_of_code f c.call_targ) + + | Ret -> "ret" + | Nop -> "nop" + | Dead -> "dead" + | Debug -> "debug" + | Regfence -> "regfence" + | Enter _ -> "enter lexical block" + | Leave -> "leave lexical block" + | End -> "---" +;; + + + +(* Emitters. *) + + +type emitter = { mutable emit_pc: int; + mutable emit_next_vreg: int option; + mutable emit_next_spill: int; + emit_preallocator: (quad' -> quad'); + emit_is_2addr: bool; + mutable emit_quads: quads; + emit_annotations: (int,string) Hashtbl.t; + emit_size_cache: ((size,operand) Hashtbl.t) Stack.t; + emit_node: node_id option; + } + + +let badq = { quad_fixup = None; + quad_implicits = []; + quad_body = End } +;; + + +let deadq = { quad_fixup = None; + quad_implicits = []; + quad_body = Dead } +;; + + +let new_emitter + (preallocator:quad' -> quad') + (is_2addr:bool) + (vregs_ok:bool) + (node:node_id option) + : emitter = + { + emit_pc = 0; + emit_next_vreg = (if vregs_ok then Some 0 else None); + emit_next_spill = 0; + emit_preallocator = preallocator; + emit_is_2addr = is_2addr; + emit_quads = Array.create 4 badq; + emit_annotations = Hashtbl.create 0; + emit_size_cache = Stack.create (); + emit_node = node; + } +;; + + +let num_vregs (e:emitter) : int = + match e.emit_next_vreg with + None -> 0 + | Some i -> i +;; + +let next_vreg_num (e:emitter) : vreg = + match e.emit_next_vreg with + None -> bug () "Il.next_vreg_num on non-vreg emitter" + | Some i -> + e.emit_next_vreg <- Some (i + 1); + i +;; + +let next_vreg (e:emitter) : reg = + Vreg (next_vreg_num e) +;; + +let next_vreg_cell (e:emitter) (s:scalar_ty) : cell = + Reg ((next_vreg e), s) +;; + +let next_spill (e:emitter) : spill = + let i = e.emit_next_spill in + e.emit_next_spill <- i + 1; + i +;; + +let next_spill_slot (e:emitter) (r:referent_ty) : typed_mem = + (Spill (next_spill e), r); +;; + + +let grow_if_necessary e = + let len = Array.length e.emit_quads in + if e.emit_pc >= len - 1 + then + let n = Array.create (2 * len) badq in + Array.blit e.emit_quads 0 n 0 len; + e.emit_quads <- n +;; + + +let binary (op:binop) (dst:cell) (lhs:operand) (rhs:operand) : quad' = + Binary { binary_op = op; + binary_dst = dst; + binary_lhs = lhs; + binary_rhs = rhs } +;; + +let unary (op:unop) (dst:cell) (src:operand) : quad' = + Unary { unary_op = op; + unary_dst = dst; + unary_src = src } + +let jmp (op:jmpop) (targ:code) : quad' = + Jmp { jmp_op = op; + jmp_targ = targ; } +;; + + +let lea (dst:cell) (src:operand) : quad' = + Lea { lea_dst = dst; + lea_src = src; } +;; + +let cmp (lhs:operand) (rhs:operand) : quad' = + Cmp { cmp_lhs = lhs; + cmp_rhs = rhs; } +;; + +let call (dst:cell) (targ:code) : quad' = + Call { call_dst = dst; + call_targ = targ; } +;; + +let umov (dst:cell) (src:operand) : quad' = + if (cell_is_nil dst || operand_is_nil src) + then Dead + else unary UMOV dst src +;; + +let zero (dst:cell) (count:operand) : quad' = + unary ZERO dst count +;; + +let is_mov uop = + match uop with + UMOV | IMOV -> true + | _ -> false +;; + +let mk_quad (q':quad') : quad = + { quad_body = q'; + quad_implicits = []; + quad_fixup = None } +;; + +let emit_full + (e:emitter) + (fix:fixup option) + (implicits:label list) + (q':quad') + : unit = + let fixup = ref fix in + let emit_quad_bottom q' = + grow_if_necessary e; + e.emit_quads.(e.emit_pc) <- { quad_body = q'; + quad_implicits = implicits; + quad_fixup = (!fixup) }; + fixup := None; + e.emit_pc <- e.emit_pc + 1 + in + + let emit_quad (q':quad') : unit = + (* re-decay any freshly generated mem-mem movs. *) + match q' with + Unary { unary_dst = Mem (dst_mem, ScalarTy src_st); + unary_src = Cell (Mem (src_mem, ScalarTy dst_st)); + unary_op = op } + when is_mov op -> + let v = next_vreg_cell e dst_st in + emit_quad_bottom + (unary op v (Cell (Mem (src_mem, ScalarTy src_st)))); + emit_quad_bottom + (unary op (Mem (dst_mem, ScalarTy dst_st)) (Cell v)) + | _ -> emit_quad_bottom q' + in + + let default_mov = + match q' with + Binary b -> + begin + match b.binary_op with + IDIV | IMUL | IMOD -> IMOV + | _ -> UMOV + end + | Unary u -> + begin + match u.unary_op with + IMOV -> IMOV + | _ -> UMOV + end + | _ -> UMOV + in + + let emit_mov (dst:cell) (src:operand) : unit = + emit_quad (unary default_mov dst src) + in + + let mov_if_operands_differ + (old_op:operand) (new_op:operand) + : unit = + if (new_op <> old_op) + then + match new_op with + (Cell new_cell) -> + emit_mov new_cell old_op + | _ -> () + in + + let mov_if_two_operands_differ + (old_lhs_op:operand) (new_lhs_op:operand) + (old_rhs_op:operand) (new_rhs_op:operand) + : unit = + (* + * This is sufficiently obscure that it deserves an explanation. + * + * The main idea here is to do two "mov_if_operands_differ" calls, + * such as one might have when setting up a binary quad. + * + * The problem comes when you happen to hit a case like X86 div, + * which preallocates *both* operands. Preallocating both means we + * have to potentially issue two movs into the preallocated regs, + * and the second of those movs might be a problem. Specifically: + * the second mov-to-prealloc might make be moving from a + * register-indirect mem cell based on a vreg, and that vreg may + * wind up being assigned to an hreg that we just loaded with the + * first mov. In other words, the second mov may retask the + * preallocated hreg we set up in the first mov. + * + * You laugh, but of course this actually happens. + * + * So here we do a conservative thing and check to see if either + * operand is memory-indirect at all. If either is, then for either + * of the 'old' operands we're *about* to mov into a prealloc reg, + * we first bounce them off a spill slot. Spill slots, thankfully, + * we can always count on being able to address irrespective of the + * opinions of the RA, as they are all just fp-relative. + * + * A slightly more aggressive version of this would only bounce + * cases that are not fp-relative already, though doing so would + * require threading the notion of what fp *is* through to + * here. Possibly tighten this up in the future (or just + * ... destroy this backend ASAP). + * + *) + let has_reg_indirect op = + match op with + Cell (Mem _) -> true + | _ -> false + in + let either_old_op_has_reg_indirect = + (has_reg_indirect old_lhs_op) || (has_reg_indirect old_rhs_op) + in + let old_lhs_op = + if either_old_op_has_reg_indirect && (new_lhs_op <> old_lhs_op) + then + let tmp = + Mem (next_spill_slot e + (ScalarTy (operand_scalar_ty old_lhs_op))) + in + emit_mov tmp old_lhs_op; + Cell tmp + else + old_lhs_op + in + let old_rhs_op = + if either_old_op_has_reg_indirect && (new_rhs_op <> old_rhs_op) + then + let tmp = + Mem (next_spill_slot e + (ScalarTy (operand_scalar_ty old_rhs_op))) + in + emit_mov tmp old_rhs_op; + Cell tmp + else + old_rhs_op + in + mov_if_operands_differ old_lhs_op new_lhs_op; + mov_if_operands_differ old_rhs_op new_rhs_op; + in + + let mov_if_cells_differ (old_cell:cell) (new_cell:cell) : unit = + if not (new_cell = old_cell) + then + emit_mov old_cell (Cell new_cell) + in + + let emit_decayed_quad q' = + match (q', e.emit_preallocator q') with + (Binary b, Binary b') -> + begin + mov_if_two_operands_differ + b.binary_lhs b'.binary_lhs + b.binary_rhs b'.binary_rhs; + if e.emit_is_2addr && + (not (b'.binary_lhs = (Cell b'.binary_dst))) + then + begin + emit_mov b'.binary_dst b'.binary_lhs; + emit_quad (Binary { b' with + binary_lhs = (Cell b'.binary_dst) }) + end + else + emit_quad (Binary b'); + mov_if_cells_differ b.binary_dst b'.binary_dst + end + + | (Unary u, Unary u') -> + mov_if_operands_differ u.unary_src u'.unary_src; + (* Assume '2addr' means '1addr' for unary ops. *) + if e.emit_is_2addr && + (u'.unary_op = NEG || u'.unary_op = NOT) && + (not (u'.unary_src = (Cell u'.unary_dst))) + then + begin + emit_mov u'.unary_dst u'.unary_src; + emit_quad (Unary { u' with unary_src = (Cell u'.unary_dst) }) + end + else + emit_quad (Unary u'); + mov_if_cells_differ u.unary_dst u'.unary_dst + + | (Cmp c, Cmp c') -> + mov_if_two_operands_differ + c.cmp_lhs c'.cmp_lhs + c.cmp_rhs c'.cmp_rhs; + emit_quad (Cmp c'); + + | (Push op, Push op') -> + mov_if_operands_differ op op'; + emit_quad (Push op'); + + | (Pop c, Pop c') -> + emit_quad (Pop c'); + mov_if_cells_differ c c' + + | (Call c, Call c') -> + emit_quad (Call c'); + mov_if_cells_differ c.call_dst c'.call_dst + + | (Lea lea, Lea lea') -> + emit_quad (Lea lea'); + mov_if_cells_differ lea.lea_dst lea'.lea_dst + + | (x, y) -> + assert (x = y); + emit_quad x + in + + (* pre-decay mem-mem movs. *) + match q' with + Unary { unary_dst = Mem (dst_mem, ScalarTy src_st); + unary_src = Cell (Mem (src_mem, ScalarTy dst_st)); + unary_op = op } + when is_mov op -> + let v = next_vreg_cell e dst_st in + emit_decayed_quad + (unary op v (Cell (Mem (src_mem, ScalarTy src_st)))); + emit_decayed_quad + (unary op (Mem (dst_mem, ScalarTy dst_st)) (Cell v)) + | _ -> emit_decayed_quad q' +;; + +let emit (e:emitter) (q':quad') : unit = + emit_full e None [] q' +;; + +let patch_jump (e:emitter) (jmp:int) (targ:int) : unit = + let q = e.emit_quads.(jmp) in + match q.quad_body with + Jmp j -> + assert (j.jmp_targ = CodeNone); + e.emit_quads.(jmp) <- + { q with quad_body = + Jmp { j with jmp_targ = CodeLabel targ } } + | _ -> () +;; + +(* More query functions. *) + +let get_element_ptr + (word_bits:bits) + (fmt:hreg_formatter) + (mem_cell:cell) + (i:int) + : cell = + match mem_cell with + Mem (mem, StructTy elts) when i >= 0 && i < (Array.length elts) -> + assert ((Array.length elts) != 0); + begin + let elt_rty = elts.(i) in + let elt_off = get_element_offset word_bits elts i in + match elt_off with + SIZE_fixed fixed_off -> + Mem (mem_off_imm mem fixed_off, elt_rty) + | _ -> bug () + "get_element_ptr %d on dynamic-size cell: offset %s" + i (string_of_size elt_off) + end + + | _ -> bug () "get_element_ptr %d on cell %s" i + (string_of_cell fmt mem_cell) +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/macho.ml b/src/boot/be/macho.ml new file mode 100644 index 00000000..7fccdfd3 --- /dev/null +++ b/src/boot/be/macho.ml @@ -0,0 +1,1184 @@ +open Asm;; +open Common;; + +(* Mach-O writer. *) + +let log (sess:Session.sess) = + Session.log "obj (mach-o)" + sess.Session.sess_log_obj + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_obj + then thunk () + else () +;; + +let (cpu_arch_abi64:int64) = 0x01000000L +;; + +let (mh_magic:int64) = 0xfeedfaceL +;; + +let cpu_subtype_intel (f:int64) (m:int64) : int64 = + Int64.add f (Int64.shift_left m 4) +;; + +type cpu_type = + (* Maybe support more later. *) + CPU_TYPE_X86 + | CPU_TYPE_X86_64 + | CPU_TYPE_ARM + | CPU_TYPE_POWERPC +;; + +type cpu_subtype = + (* Maybe support more later. *) + CPU_SUBTYPE_X86_ALL + | CPU_SUBTYPE_X86_64_ALL + | CPU_SUBTYPE_ARM_ALL + | CPU_SUBTYPE_POWERPC_ALL +;; + +type file_type = + MH_OBJECT + | MH_EXECUTE + | MH_FVMLIB + | MH_CORE + | MH_PRELOAD + | MH_DYLIB + | MH_DYLINKER + | MH_BUNDLE + | MH_DYLIB_STUB + | MH_DSYM +;; + +let file_type_code (ft:file_type) : int64 = + match ft with + MH_OBJECT ->0x1L (* object *) + | MH_EXECUTE -> 0x2L (* executable *) + | MH_FVMLIB -> 0x3L (* fixed-VM shared lib *) + | MH_CORE -> 0x4L (* core *) + | MH_PRELOAD -> 0x5L (* preloaded executable *) + | MH_DYLIB -> 0x6L (* dynamic lib *) + | MH_DYLINKER -> 0x7L (* dynamic linker *) + | MH_BUNDLE -> 0x8L (* bundle *) + | MH_DYLIB_STUB -> 0x9L (* shared lib stub *) + | MH_DSYM -> 0xaL (* debuginfo only *) +;; + +type file_flag = + MH_NOUNDEFS + | MH_INCRLINK + | MH_DYLDLINK + | MH_BINDATLOAD + | MH_PREBOUND + | MH_SPLIT_SEGS + | MH_LAZY_INIT + | MH_TWOLEVEL + | MH_FORCE_FLAT + | MH_NOMULTIDEFS + | MH_NOFIXPREBINDING + | MH_PREBINDABLE + | MH_ALLMODSBOUND + | MH_SUBSECTIONS_VIA_SYMBOLS + | MH_CANONICAL + | MH_WEAK_DEFINES + | MH_BINDS_TO_WEAK + | MH_ALLOW_STACK_EXECUTION + | MH_ROOT_SAFE + | MH_SETUID_SAFE + | MH_NO_REEXPORTED_DYLIBS + | MH_PIE +;; + +let file_flag_code (ff:file_flag) : int64 = + match ff with + MH_NOUNDEFS -> 0x1L + | MH_INCRLINK -> 0x2L + | MH_DYLDLINK -> 0x4L + | MH_BINDATLOAD -> 0x8L + | MH_PREBOUND -> 0x10L + | MH_SPLIT_SEGS -> 0x20L + | MH_LAZY_INIT -> 0x40L + | MH_TWOLEVEL -> 0x80L + | MH_FORCE_FLAT -> 0x100L + | MH_NOMULTIDEFS -> 0x200L + | MH_NOFIXPREBINDING -> 0x400L + | MH_PREBINDABLE -> 0x800L + | MH_ALLMODSBOUND -> 0x1000L + | MH_SUBSECTIONS_VIA_SYMBOLS -> 0x2000L + | MH_CANONICAL -> 0x4000L + | MH_WEAK_DEFINES -> 0x8000L + | MH_BINDS_TO_WEAK -> 0x10000L + | MH_ALLOW_STACK_EXECUTION -> 0x20000L + | MH_ROOT_SAFE -> 0x40000L + | MH_SETUID_SAFE -> 0x80000L + | MH_NO_REEXPORTED_DYLIBS -> 0x100000L + | MH_PIE -> 0x200000L +;; + + +type vm_prot = + VM_PROT_NONE + | VM_PROT_READ + | VM_PROT_WRITE + | VM_PROT_EXECUTE +;; + + +type load_command = + LC_SEGMENT + | LC_SYMTAB + | LC_SYMSEG + | LC_THREAD + | LC_UNIXTHREAD + | LC_LOADFVMLIB + | LC_IDFVMLIB + | LC_IDENT + | LC_FVMFILE + | LC_PREPAGE + | LC_DYSYMTAB + | LC_LOAD_DYLIB + | LC_ID_DYLIB + | LC_LOAD_DYLINKER + | LC_ID_DYLINKER + | LC_PREBOUND_DYLIB + | LC_ROUTINES + | LC_SUB_FRAMEWORK + | LC_SUB_UMBRELLA + | LC_SUB_CLIENT + | LC_SUB_LIBRARY + | LC_TWOLEVEL_HINTS + | LC_PREBIND_CKSUM + | LC_LOAD_WEAK_DYLIB + | LC_SEGMENT_64 + | LC_ROUTINES_64 + | LC_UUID + | LC_RPATH + | LC_CODE_SIGNATURE + | LC_SEGMENT_SPLIT_INFO + | LC_REEXPORT_DYLIB + | LC_LAZY_LOAD_DYLIB + | LC_ENCRYPTION_INFO +;; + + +let cpu_type_code (cpu:cpu_type) : int64 = + match cpu with + CPU_TYPE_X86 -> 7L + | CPU_TYPE_X86_64 -> Int64.logor 7L cpu_arch_abi64 + | CPU_TYPE_ARM -> 12L + | CPU_TYPE_POWERPC -> 18L +;; + +let cpu_subtype_code (cpu:cpu_subtype) : int64 = + match cpu with + CPU_SUBTYPE_X86_ALL -> 3L + | CPU_SUBTYPE_X86_64_ALL -> 3L + | CPU_SUBTYPE_ARM_ALL -> 0L + | CPU_SUBTYPE_POWERPC_ALL -> 0L +;; + + +let vm_prot_code (vmp:vm_prot) : int64 = + match vmp with + VM_PROT_NONE -> 0L + | VM_PROT_READ -> 1L + | VM_PROT_WRITE -> 2L + | VM_PROT_EXECUTE -> 4L +;; + + +let lc_req_dyld = 0x80000000L;; + +let load_command_code (lc:load_command) = + match lc with + | LC_SEGMENT -> 0x1L + | LC_SYMTAB -> 0x2L + | LC_SYMSEG -> 0x3L + | LC_THREAD -> 0x4L + | LC_UNIXTHREAD -> 0x5L + | LC_LOADFVMLIB -> 0x6L + | LC_IDFVMLIB -> 0x7L + | LC_IDENT -> 0x8L + | LC_FVMFILE -> 0x9L + | LC_PREPAGE -> 0xaL + | LC_DYSYMTAB -> 0xbL + | LC_LOAD_DYLIB -> 0xcL + | LC_ID_DYLIB -> 0xdL + | LC_LOAD_DYLINKER -> 0xeL + | LC_ID_DYLINKER -> 0xfL + | LC_PREBOUND_DYLIB -> 0x10L + | LC_ROUTINES -> 0x11L + | LC_SUB_FRAMEWORK -> 0x12L + | LC_SUB_UMBRELLA -> 0x13L + | LC_SUB_CLIENT -> 0x14L + | LC_SUB_LIBRARY -> 0x15L + | LC_TWOLEVEL_HINTS -> 0x16L + | LC_PREBIND_CKSUM -> 0x17L + | LC_LOAD_WEAK_DYLIB -> Int64.logor lc_req_dyld 0x18L + | LC_SEGMENT_64 -> 0x19L + | LC_ROUTINES_64 -> 0x1aL + | LC_UUID -> 0x1bL + | LC_RPATH -> Int64.logor lc_req_dyld 0x1cL + | LC_CODE_SIGNATURE -> 0x1dL + | LC_SEGMENT_SPLIT_INFO -> 0x1eL + | LC_REEXPORT_DYLIB -> Int64.logor lc_req_dyld 0x1fL + | LC_LAZY_LOAD_DYLIB -> 0x20L + | LC_ENCRYPTION_INFO -> 0x21L +;; + + +let fixed_sz_string (sz:int) (str:string) : frag = + if String.length str > sz + then STRING (String.sub str 0 sz) + else SEQ [| STRING str; PAD (sz - (String.length str)) |] +;; + +type sect_type = + S_REGULAR + | S_ZEROFILL + | S_CSTRING_LITERALS + | S_4BYTE_LITERALS + | S_8BYTE_LITERALS + | S_LITERAL_POINTERS + | S_NON_LAZY_SYMBOL_POINTERS + | S_LAZY_SYMBOL_POINTERS + | S_SYMBOL_STUBS + | S_MOD_INIT_FUNC_POINTERS + | S_MOD_TERM_FUNC_POINTERS + | S_COALESCED + | S_GB_ZEROFILL + | S_INTERPOSING + | S_16BYTE_LITERALS + | S_DTRACE_DOF + | S_LAZY_DYLIB_SYMBOL_POINTERS +;; + +let sect_type_code (s:sect_type) : int64 = + match s with + S_REGULAR -> 0x0L + | S_ZEROFILL -> 0x1L + | S_CSTRING_LITERALS -> 0x2L + | S_4BYTE_LITERALS -> 0x3L + | S_8BYTE_LITERALS -> 0x4L + | S_LITERAL_POINTERS -> 0x5L + | S_NON_LAZY_SYMBOL_POINTERS -> 0x6L + | S_LAZY_SYMBOL_POINTERS -> 0x7L + | S_SYMBOL_STUBS -> 0x8L + | S_MOD_INIT_FUNC_POINTERS -> 0x9L + | S_MOD_TERM_FUNC_POINTERS -> 0xaL + | S_COALESCED -> 0xbL + | S_GB_ZEROFILL -> 0xcL + | S_INTERPOSING -> 0xdL + | S_16BYTE_LITERALS -> 0xeL + | S_DTRACE_DOF -> 0xfL + | S_LAZY_DYLIB_SYMBOL_POINTERS -> 0x10L +;; + +type sect_attr = + S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_NO_TOC + | S_ATTR_STRIP_STATIC_SYMS + | S_ATTR_NO_DEAD_STRIP + | S_ATTR_LIVE_SUPPORT + | S_ATTR_SELF_MODIFYING_CODE + | S_ATTR_DEBUG + | S_ATTR_SOME_INSTRUCTIONS + | S_ATTR_EXT_RELOC + | S_ATTR_LOC_RELOC +;; + +let sect_attr_code (s:sect_attr) : int64 = + match s with + S_ATTR_PURE_INSTRUCTIONS -> 0x80000000L + | S_ATTR_NO_TOC -> 0x40000000L + | S_ATTR_STRIP_STATIC_SYMS -> 0x20000000L + | S_ATTR_NO_DEAD_STRIP -> 0x10000000L + | S_ATTR_LIVE_SUPPORT -> 0x08000000L + | S_ATTR_SELF_MODIFYING_CODE -> 0x04000000L + | S_ATTR_DEBUG -> 0x02000000L + | S_ATTR_SOME_INSTRUCTIONS -> 0x00000400L + | S_ATTR_EXT_RELOC -> 0x00000200L + | S_ATTR_LOC_RELOC -> 0x00000100L +;; + +type n_type = + | N_EXT + | N_UNDF + | N_ABS + | N_SECT + | N_PBUD + | N_INDIR +;; + +let n_type_code (n:n_type) : int64 = + match n with + N_EXT -> 0x1L + | N_UNDF -> 0x0L + | N_ABS -> 0x2L + | N_SECT -> 0xeL + | N_PBUD -> 0xcL + | N_INDIR -> 0xaL +;; + + +type n_desc_reference_type = + REFERENCE_FLAG_UNDEFINED_NON_LAZY + | REFERENCE_FLAG_UNDEFINED_LAZY + | REFERENCE_FLAG_DEFINED + | REFERENCE_FLAG_PRIVATE_DEFINED + | REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY + | REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY +;; + +let n_desc_reference_type_code (n:n_desc_reference_type) : int64 = + match n with + REFERENCE_FLAG_UNDEFINED_NON_LAZY -> 0x0L + | REFERENCE_FLAG_UNDEFINED_LAZY -> 0x1L + | REFERENCE_FLAG_DEFINED -> 0x2L + | REFERENCE_FLAG_PRIVATE_DEFINED -> 0x3L + | REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY -> 0x4L + | REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY -> 0x5L +;; + +type n_desc_flags = + REFERENCED_DYNAMICALLY + | N_DESC_DISCARDED + | N_NO_DEAD_STRIP + | N_WEAK_REF + | N_WEAK_DEF +;; + +let n_desc_flags_code (n:n_desc_flags) : int64 = + match n with + REFERENCED_DYNAMICALLY -> 0x10L + | N_DESC_DISCARDED -> 0x20L + | N_NO_DEAD_STRIP -> 0x20L (* Yes, they reuse 0x20. *) + | N_WEAK_REF -> 0x40L + | N_WEAK_DEF -> 0x80L +;; + +type n_desc_dylib_ordinal = int;; + +type n_desc = (n_desc_dylib_ordinal * + (n_desc_flags list) * + n_desc_reference_type) +;; + +let n_desc_code (n:n_desc) : int64 = + let (dylib_ordinal, flags, ty) = n in + Int64.logor + (Int64.of_int (dylib_ordinal lsl 8)) + (Int64.logor + (fold_flags n_desc_flags_code flags) + (n_desc_reference_type_code ty)) +;; + + +let macho_section_command + (seg_name:string) + (sect:(string * int * (sect_attr list) * sect_type * fixup)) + : frag = + let (sect_name, sect_align, sect_attrs, sect_type, sect_fixup) = sect in + SEQ [| + fixed_sz_string 16 sect_name; + fixed_sz_string 16 seg_name; + WORD (TY_u32, M_POS sect_fixup); + WORD (TY_u32, M_SZ sect_fixup); + WORD (TY_u32, F_POS sect_fixup); + WORD (TY_u32, IMM (Int64.of_int sect_align)); + WORD (TY_u32, IMM 0L); (* reloff *) + WORD (TY_u32, IMM 0L); (* nreloc *) + WORD (TY_u32, (IMM (Int64.logor (* flags (and attrs) *) + (fold_flags sect_attr_code sect_attrs) + (sect_type_code sect_type)))); + WORD (TY_u32, IMM 0L); (* reserved1 *) + WORD (TY_u32, IMM 0L); (* reserved2 *) + |] +;; + +let macho_segment_command + (seg_name:string) + (seg_fixup:fixup) + (maxprot:vm_prot list) + (initprot:vm_prot list) + (sects:(string * int * (sect_attr list) * sect_type * fixup) array) + : frag = + + let cmd_fixup = new_fixup "segment command" in + let cmd = + SEQ [| + WORD (TY_u32, IMM (load_command_code LC_SEGMENT)); + WORD (TY_u32, F_SZ cmd_fixup); + fixed_sz_string 16 seg_name; + WORD (TY_u32, M_POS seg_fixup); + WORD (TY_u32, M_SZ seg_fixup); + WORD (TY_u32, F_POS seg_fixup); + WORD (TY_u32, F_SZ seg_fixup); + WORD (TY_u32, IMM (fold_flags vm_prot_code maxprot)); + WORD (TY_u32, IMM (fold_flags vm_prot_code initprot)); + WORD (TY_u32, IMM (Int64.of_int (Array.length sects))); + WORD (TY_u32, IMM 0L); (* Flags? *) + |] + in + DEF (cmd_fixup, + SEQ [| + cmd; + SEQ (Array.map (macho_section_command seg_name) sects); + |]) +;; + +let macho_thread_command + (entry:fixup) + : frag = + let cmd_fixup = new_fixup "thread command" in + let x86_THREAD_STATE32 = 1L in + let regs = + [| + WORD (TY_u32, IMM 0x0L); (* eax *) + WORD (TY_u32, IMM 0x0L); (* ebx *) + WORD (TY_u32, IMM 0x0L); (* ecx *) + WORD (TY_u32, IMM 0x0L); (* edx *) + + WORD (TY_u32, IMM 0x0L); (* edi *) + WORD (TY_u32, IMM 0x0L); (* esi *) + WORD (TY_u32, IMM 0x0L); (* ebp *) + WORD (TY_u32, IMM 0x0L); (* esp *) + + WORD (TY_u32, IMM 0x0L); (* ss *) + WORD (TY_u32, IMM 0x0L); (* eflags *) + WORD (TY_u32, M_POS entry); (* eip *) + WORD (TY_u32, IMM 0x0L); (* cs *) + + WORD (TY_u32, IMM 0x0L); (* ds *) + WORD (TY_u32, IMM 0x0L); (* es *) + WORD (TY_u32, IMM 0x0L); (* fs *) + WORD (TY_u32, IMM 0x0L); (* gs *) + |] + in + let cmd = + SEQ [| + WORD (TY_u32, IMM (load_command_code LC_UNIXTHREAD)); + WORD (TY_u32, F_SZ cmd_fixup); + WORD (TY_u32, IMM x86_THREAD_STATE32); (* "flavour" *) + WORD (TY_u32, IMM (Int64.of_int (Array.length regs))); + SEQ regs + |] + in + DEF (cmd_fixup, cmd) +;; + +let macho_dylinker_command : frag = + let cmd_fixup = new_fixup "dylinker command" in + let str_fixup = new_fixup "dylinker lc_str fixup" in + let cmd = + SEQ + [| + WORD (TY_u32, IMM (load_command_code LC_LOAD_DYLINKER)); + WORD (TY_u32, F_SZ cmd_fixup); + + (* see definition of lc_str; these things are weird. *) + WORD (TY_u32, SUB (F_POS (str_fixup), F_POS (cmd_fixup))); + DEF (str_fixup, ZSTRING "/usr/lib/dyld"); + ALIGN_FILE (4, MARK); + |] + in + DEF (cmd_fixup, cmd); +;; + +let macho_dylib_command (dylib:string) : frag = + + let cmd_fixup = new_fixup "dylib command" in + let str_fixup = new_fixup "dylib lc_str fixup" in + let cmd = + SEQ + [| + WORD (TY_u32, IMM (load_command_code LC_LOAD_DYLIB)); + WORD (TY_u32, F_SZ cmd_fixup); + + (* see definition of lc_str; these things are weird. *) + WORD (TY_u32, SUB (F_POS (str_fixup), F_POS (cmd_fixup))); + + WORD (TY_u32, IMM 0L); (* timestamp *) + WORD (TY_u32, IMM 0L); (* current_version *) + WORD (TY_u32, IMM 0L); (* compatibility_version *) + + (* Payload-and-alignment of an lc_str goes at end of command. *) + DEF (str_fixup, ZSTRING dylib); + ALIGN_FILE (4, MARK); + + |] + in + DEF (cmd_fixup, cmd) +;; + + +let macho_symtab_command + (symtab_fixup:fixup) + (nsyms:int64) + (strtab_fixup:fixup) + : frag = + let cmd_fixup = new_fixup "symtab command" in + let cmd = + SEQ + [| + WORD (TY_u32, IMM (load_command_code LC_SYMTAB)); + WORD (TY_u32, F_SZ cmd_fixup); + + WORD (TY_u32, F_POS symtab_fixup); (* symoff *) + WORD (TY_u32, IMM nsyms); (* nsyms *) + + WORD (TY_u32, F_POS strtab_fixup); (* stroff *) + WORD (TY_u32, F_SZ strtab_fixup); (* strsz *) + |] + in + DEF (cmd_fixup, cmd) +;; + +let macho_dysymtab_command + (local_defined_syms_index:int64) + (local_defined_syms_count:int64) + (external_defined_syms_index:int64) + (external_defined_syms_count:int64) + (undefined_syms_index:int64) + (undefined_syms_count:int64) + (indirect_symtab_fixup:fixup) : frag = + let cmd_fixup = new_fixup "dysymtab command" in + let cmd = + SEQ + [| + WORD (TY_u32, IMM (load_command_code LC_DYSYMTAB)); + WORD (TY_u32, F_SZ cmd_fixup); + + WORD (TY_u32, IMM local_defined_syms_index); (* ilocalsym *) + WORD (TY_u32, IMM local_defined_syms_count); (* nlocalsym *) + + WORD (TY_u32, IMM external_defined_syms_index); (* iextdefsym *) + WORD (TY_u32, IMM external_defined_syms_count); (* nextdefsym *) + + WORD (TY_u32, IMM undefined_syms_index); (* iundefsym *) + WORD (TY_u32, IMM undefined_syms_count); (* nundefsym *) + + WORD (TY_u32, IMM 0L); (* tocoff *) + WORD (TY_u32, IMM 0L); (* ntoc *) + + WORD (TY_u32, IMM 0L); (* modtaboff *) + WORD (TY_u32, IMM 0L); (* nmodtab *) + + WORD (TY_u32, IMM 0L); (* extrefsymoff *) + WORD (TY_u32, IMM 0L); (* nextrefsyms *) + + WORD (TY_u32, F_POS indirect_symtab_fixup); (* indirectsymoff *) + WORD (TY_u32, IMM undefined_syms_count); (* nindirectsyms *) + + WORD (TY_u32, IMM 0L); (* extreloff *) + WORD (TY_u32, IMM 0L); (* nextrel *) + + WORD (TY_u32, IMM 0L); (* locreloff *) + WORD (TY_u32, IMM 0L); (* nlocrel *) + |] + in + DEF (cmd_fixup, cmd) +;; + +let macho_header_32 + (cpu:cpu_type) + (sub:cpu_subtype) + (ftype:file_type) + (flags:file_flag list) + (loadcmds:frag array) : frag = + let load_commands_fixup = new_fixup "load commands" in + let cmds = DEF (load_commands_fixup, SEQ loadcmds) in + SEQ + [| + WORD (TY_u32, IMM mh_magic); + WORD (TY_u32, IMM (cpu_type_code cpu)); + WORD (TY_u32, IMM (cpu_subtype_code sub)); + WORD (TY_u32, IMM (file_type_code ftype)); + WORD (TY_u32, IMM (Int64.of_int (Array.length loadcmds))); + WORD (TY_u32, F_SZ load_commands_fixup); + WORD (TY_u32, IMM (fold_flags file_flag_code flags)); + cmds + |] +;; + +let emit_file + (sess:Session.sess) + (crate:Ast.crate) + (code:Asm.frag) + (data:Asm.frag) + (sem:Semant.ctxt) + (dwarf:Dwarf.debug_records) + : unit = + + (* FIXME: alignment? *) + + let mh_execute_header_fixup = new_fixup "__mh_execute header" in + + let nxargc_fixup = (Semant.provide_native sem SEG_data "NXArgc") in + let nxargv_fixup = (Semant.provide_native sem SEG_data "NXArgv") in + let progname_fixup = (Semant.provide_native sem SEG_data "__progname") in + let environ_fixup = (Semant.provide_native sem SEG_data "environ") in + let exit_fixup = (Semant.require_native sem REQUIRED_LIB_crt "exit") in + let (start_fixup, rust_start_fixup) = + if sess.Session.sess_library_mode + then (None, None) + else (Some (new_fixup "start function entry"), + Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start")) + in + + let text_sect_align_log2 = 2 in + let data_sect_align_log2 = 2 in + + let seg_align = 0x1000 in + let text_sect_align = 2 lsl text_sect_align_log2 in + let data_sect_align = 2 lsl data_sect_align_log2 in + + let align_both align i = + ALIGN_FILE (align, + (ALIGN_MEM (align, i))) + in + + let def_aligned a f i = + align_both a + (SEQ [| DEF(f, i); + (align_both a MARK)|]) + in + + (* Segments. *) + let zero_segment_fixup = new_fixup "__PAGEZERO segment" in + let text_segment_fixup = new_fixup "__TEXT segment" in + let data_segment_fixup = new_fixup "__DATA segment" in + let dwarf_segment_fixup = new_fixup "__DWARF segment" in + let linkedit_segment_fixup = new_fixup "__LINKEDIT segment" in + + (* Sections in the text segment. *) + let text_section_fixup = new_fixup "__text section" in + + (* Sections in the data segment. *) + let data_section_fixup = new_fixup "__data section" in + let const_section_fixup = new_fixup "__const section" in + let bss_section_fixup = new_fixup "__bss section" in + let note_rust_section_fixup = new_fixup "__note.rust section" in + let nl_symbol_ptr_section_fixup = new_fixup "__nl_symbol_ptr section" in + + let data_section = def_aligned data_sect_align data_section_fixup data in + let const_section = + def_aligned data_sect_align const_section_fixup (SEQ [| |]) + in + let bss_section = + def_aligned data_sect_align bss_section_fixup (SEQ [| |]) + in + let note_rust_section = + def_aligned + data_sect_align note_rust_section_fixup + (Asm.note_rust_frags crate.node.Ast.crate_meta) + in + + (* Officially Apple doesn't claim to support DWARF sections like this, but + they work. *) + let debug_info_section = + def_aligned data_sect_align + sem.Semant.ctxt_debug_info_fixup + dwarf.Dwarf.debug_info + in + let debug_abbrev_section = + def_aligned data_sect_align + sem.Semant.ctxt_debug_abbrev_fixup + dwarf.Dwarf.debug_abbrev + in + + + (* String, symbol and parallel "nonlazy-pointer" tables. *) + let symtab_fixup = new_fixup "symtab" in + let strtab_fixup = new_fixup "strtab" in + + let symbol_nlist_entry + (sect_index:int) + (nty:n_type list) + (nd:n_desc) + (nv:Asm.expr64) + : (frag * fixup) = + let strtab_entry_fixup = new_fixup "strtab entry" in + (SEQ + [| + WORD (TY_u32, SUB ((F_POS strtab_entry_fixup), + (F_POS strtab_fixup))); + BYTE (Int64.to_int (fold_flags n_type_code nty)); + BYTE sect_index; + WORD (TY_u16, IMM (n_desc_code nd)); + WORD (TY_u32, nv); + |], strtab_entry_fixup) + in + + let sect_symbol_nlist_entry + (seg:segment) + (fixup_to_use:fixup) + : (frag * fixup) = + let nty = [ N_SECT; N_EXT ] in + let nd = (0, [], REFERENCE_FLAG_UNDEFINED_NON_LAZY) in + let (sect_index, _(*seg_fix*)) = + match seg with + SEG_text -> (1, text_segment_fixup) + | SEG_data -> (2, data_segment_fixup) + in + symbol_nlist_entry sect_index nty nd (M_POS fixup_to_use) + in + + let sect_private_symbol_nlist_entry + (seg:segment) + (fixup_to_use:fixup) + : (frag * fixup) = + let nty = [ N_SECT; ] in + let nd = (0, [], REFERENCE_FLAG_UNDEFINED_NON_LAZY) in + let (sect_index, _(*seg_fix*)) = + match seg with + SEG_text -> (1, text_segment_fixup) + | SEG_data -> (2, data_segment_fixup) + in + symbol_nlist_entry sect_index nty nd (M_POS fixup_to_use) + in + + let indirect_symbol_nlist_entry (dylib_index:int) : (frag * fixup) = + let nty = [ N_UNDF; N_EXT ] in + let nd = (dylib_index, [], REFERENCE_FLAG_UNDEFINED_NON_LAZY) in + symbol_nlist_entry 0 nty nd (IMM 0L) + in + + let indirect_symbols = + Array.of_list + (List.concat + (List.map + (fun (lib, tab) -> + (List.map + (fun (name,fix) -> (lib,name,fix)) + (htab_pairs tab))) + (htab_pairs sem.Semant.ctxt_native_required))) + in + + let dylib_index (lib:required_lib) : int = + match lib with + REQUIRED_LIB_rustrt -> 1 + | REQUIRED_LIB_crt -> 2 + | _ -> bug () "Macho.dylib_index on nonstandard required lib." + in + + (* Make undef symbols for native imports. *) + let (undefined_symbols:(string * (frag * fixup)) array) = + Array.map (fun (lib,name,_) -> + ("_" ^ name, + indirect_symbol_nlist_entry (dylib_index lib))) + indirect_symbols + in + + (* Make symbols for exports. *) + let (export_symbols:(string * (frag * fixup)) array) = + let export_symbols_of_seg (seg, tab) = + List.map + begin + fun (name, fix) -> + let name = "_" ^ name in + let sym = sect_symbol_nlist_entry seg fix in + (name, sym) + end + (htab_pairs tab) + in + Array.of_list + (List.concat + (List.map export_symbols_of_seg + (htab_pairs sem.Semant.ctxt_native_provided))) + in + + (* Make private symbols for items. *) + let (local_item_symbols:(string * (frag * fixup)) array) = + Array.map (fun code -> + let fix = code.Semant.code_fixup in + ("_" ^ fix.fixup_name, + sect_private_symbol_nlist_entry SEG_text fix)) + (Array.of_list (htab_vals sem.Semant.ctxt_all_item_code)) + in + + (* Make private symbols for glue. *) + let (local_glue_symbols:(string * (frag * fixup)) array) = + Array.map (fun (g, code) -> + let fix = code.Semant.code_fixup in + ("_" ^ (Semant.glue_str sem g), + sect_private_symbol_nlist_entry SEG_text fix)) + (Array.of_list (htab_pairs sem.Semant.ctxt_glue_code)) + in + + let (export_header_symbols:(string * (frag * fixup)) array) = + let name = + if sess.Session.sess_library_mode + then "__mh_dylib_header" + else "__mh_execute_header" + in + [| + (name, sect_symbol_nlist_entry SEG_text mh_execute_header_fixup); + |] + in + + let export_symbols = Array.concat [ export_symbols; + export_header_symbols ] + in + + let local_symbols = Array.concat [ local_item_symbols; + local_glue_symbols ] + in + + let symbols = Array.concat [ local_symbols; + export_symbols; + undefined_symbols ] + in + let n_local_syms = Array.length local_symbols in + let n_export_syms = Array.length export_symbols in + let n_undef_syms = Array.length undefined_symbols in + + let indirect_symbols_off = n_local_syms + n_export_syms in + let indirect_symtab_fixup = new_fixup "indirect symbol table" in + let indirect_symtab = + DEF (indirect_symtab_fixup, + SEQ (Array.mapi + (fun i _ -> WORD (TY_u32, + IMM (Int64.of_int + (i + indirect_symbols_off)))) + indirect_symbols)) + in + + let nl_symbol_ptr_section = + def_aligned data_sect_align nl_symbol_ptr_section_fixup + (SEQ (Array.map + (fun (_, _, fix) -> + DEF(fix, WORD(TY_u32, IMM 0L))) + indirect_symbols)) + in + let strtab = DEF (strtab_fixup, + SEQ (Array.map + (fun (name, (_, fix)) -> DEF(fix, ZSTRING name)) + symbols)) + in + let symtab = DEF (symtab_fixup, + SEQ (Array.map (fun (_, (frag, _)) -> frag) symbols)) + in + + + let load_commands = + [| + macho_segment_command "__PAGEZERO" zero_segment_fixup + [] [] [||]; + + macho_segment_command "__TEXT" text_segment_fixup + [VM_PROT_READ; VM_PROT_EXECUTE] + [VM_PROT_READ; VM_PROT_EXECUTE] + [| + ("__text", text_sect_align_log2, [], S_REGULAR, text_section_fixup) + |]; + + macho_segment_command "__DATA" data_segment_fixup + [VM_PROT_READ; VM_PROT_WRITE] + [VM_PROT_READ; VM_PROT_WRITE] + [| + ("__data", data_sect_align_log2, [], + S_REGULAR, data_section_fixup); + ("__const", data_sect_align_log2, [], + S_REGULAR, const_section_fixup); + ("__bss", data_sect_align_log2, [], + S_REGULAR, bss_section_fixup); + ("__note.rust", data_sect_align_log2, [], + S_REGULAR, note_rust_section_fixup); + ("__nl_symbol_ptr", data_sect_align_log2, [], + S_NON_LAZY_SYMBOL_POINTERS, nl_symbol_ptr_section_fixup) + |]; + + macho_segment_command "__DWARF" dwarf_segment_fixup + [VM_PROT_READ] + [VM_PROT_READ] + [| + ("__debug_info", data_sect_align_log2, [], + S_REGULAR, sem.Semant.ctxt_debug_info_fixup); + ("__debug_abbrev", data_sect_align_log2, [], + S_REGULAR, sem.Semant.ctxt_debug_abbrev_fixup); + |]; + + macho_segment_command "__LINKEDIT" linkedit_segment_fixup + [VM_PROT_READ] + [VM_PROT_READ] + [| + |]; + + macho_symtab_command + symtab_fixup (Int64.of_int (Array.length symbols)) strtab_fixup; + + + macho_dysymtab_command + 0L + (Int64.of_int n_local_syms) + (Int64.of_int n_local_syms) + (Int64.of_int n_export_syms) + (Int64.of_int (n_local_syms + n_export_syms)) + (Int64.of_int n_undef_syms) + indirect_symtab_fixup; + + macho_dylinker_command; + + macho_dylib_command "librustrt.dylib"; + + macho_dylib_command "/usr/lib/libSystem.B.dylib"; + + begin + match start_fixup with + None -> MARK + | Some start_fixup -> + macho_thread_command start_fixup + end; + |] + in + + let header_and_commands = + macho_header_32 + CPU_TYPE_X86 + CPU_SUBTYPE_X86_ALL + (if sess.Session.sess_library_mode then MH_DYLIB else MH_EXECUTE) + [ MH_BINDATLOAD; MH_DYLDLINK; MH_TWOLEVEL ] + load_commands + in + + let objfile_start e start_fixup rust_start_fixup main_fn_fixup = + let edx = X86.h X86.edx in + let edx_pointee = + Il.Mem ((Il.RegIn (edx, None)), Il.ScalarTy (Il.AddrTy Il.OpaqueTy)) + in + Il.emit_full e (Some start_fixup) [] Il.Dead; + + (* zero marks the bottom of the frame chain. *) + Il.emit e (Il.Push (X86.imm (Asm.IMM 0L))); + Il.emit e (Il.umov (X86.rc X86.ebp) (X86.ro X86.esp)); + + (* 16-byte align stack for SSE. *) + Il.emit e (Il.binary Il.AND (X86.rc X86.esp) (X86.ro X86.esp) + (X86.imm (Asm.IMM 0xfffffffffffffff0L))); + + (* Store argv. *) + Abi.load_fixup_addr e edx nxargv_fixup Il.OpaqueTy; + Il.emit e (Il.lea (X86.rc X86.ecx) + (Il.Cell (Il.Mem ((Il.RegIn (Il.Hreg X86.ebp, + Some (X86.word_off_n 2))), + Il.OpaqueTy)))); + Il.emit e (Il.umov edx_pointee (X86.ro X86.ecx)); + Il.emit e (Il.Push (X86.ro X86.ecx)); + + (* Store argc. *) + Abi.load_fixup_addr e edx nxargc_fixup Il.OpaqueTy; + Il.emit e (Il.umov (X86.rc X86.eax) + (X86.c (X86.word_n (Il.Hreg X86.ebp) 1))); + Il.emit e (Il.umov edx_pointee (X86.ro X86.eax)); + Il.emit e (Il.Push (X86.ro X86.eax)); + + (* Calculte and store envp. *) + Il.emit e (Il.binary Il.ADD + (X86.rc X86.eax) (X86.ro X86.eax) + (X86.imm (Asm.IMM 1L))); + Il.emit e (Il.binary Il.UMUL + (X86.rc X86.eax) (X86.ro X86.eax) + (X86.imm (Asm.IMM X86.word_sz))); + Il.emit e (Il.binary Il.ADD (X86.rc X86.eax) + (X86.ro X86.eax) (X86.ro X86.ecx)); + Abi.load_fixup_addr e edx environ_fixup Il.OpaqueTy; + Il.emit e (Il.umov edx_pointee (X86.ro X86.eax)); + + (* Push 16 bytes to preserve SSE alignment. *) + Abi.load_fixup_addr e edx sem.Semant.ctxt_crate_fixup Il.OpaqueTy; + Il.emit e (Il.Push (X86.ro X86.edx)); + Abi.load_fixup_addr e edx main_fn_fixup Il.OpaqueTy; + Il.emit e (Il.Push (X86.ro X86.edx)); + let fptr = Abi.load_fixup_codeptr e edx rust_start_fixup true true in + Il.emit e (Il.call (X86.rc X86.eax) fptr); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + Il.emit e (Il.Push (X86.ro X86.eax)); + let fptr = Abi.load_fixup_codeptr e edx exit_fixup true true in + Il.emit e (Il.call (X86.rc X86.eax) fptr); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + Il.emit e (Il.Pop (X86.rc X86.ecx)); + + Il.emit e Il.Ret; + in + + let text_section = + let start_code = + match (start_fixup, rust_start_fixup, + sem.Semant.ctxt_main_fn_fixup) with + (None, _, _) + | (_, None, _) + | (_, _, None) -> MARK + | (Some start_fixup, + Some rust_start_fixup, + Some main_fn_fixup) -> + let e = X86.new_emitter_without_vregs () in + objfile_start e start_fixup rust_start_fixup main_fn_fixup; + X86.frags_of_emitted_quads sess e + in + def_aligned text_sect_align text_section_fixup + (SEQ [| + start_code; + code + |]) + in + + let text_segment = + def_aligned seg_align text_segment_fixup + (SEQ [| + DEF (mh_execute_header_fixup, header_and_commands); + text_section; + align_both seg_align MARK; + |]); + in + + let zero_segment = align_both seg_align + (SEQ [| MEMPOS 0L; DEF (zero_segment_fixup, + SEQ [| MEMPOS 0x1000L; MARK |] ) |]) + in + + let data_segment = def_aligned seg_align data_segment_fixup + (SEQ [| + DEF(nxargc_fixup, WORD (TY_u32, IMM 0L)); + DEF(nxargv_fixup, WORD (TY_u32, IMM 0L)); + DEF(environ_fixup, WORD (TY_u32, IMM 0L)); + DEF(progname_fixup, WORD (TY_u32, IMM 0L)); + data_section; + const_section; + bss_section; + note_rust_section; + nl_symbol_ptr_section + |]) + in + + let dwarf_segment = def_aligned seg_align dwarf_segment_fixup + (SEQ [| + debug_info_section; + debug_abbrev_section; + |]) + in + + let linkedit_segment = def_aligned seg_align linkedit_segment_fixup + (SEQ [| + symtab; + strtab; + indirect_symtab; + |]) + in + + let segments = + SEQ [| + DEF (sem.Semant.ctxt_image_base_fixup, MARK); + zero_segment; + text_segment; + data_segment; + dwarf_segment; + linkedit_segment; + |] + in + write_out_frag sess true segments +;; + + +let sniff + (sess:Session.sess) + (filename:filename) + : asm_reader option = + try + let stat = Unix.stat filename in + if (stat.Unix.st_kind = Unix.S_REG) && + (stat.Unix.st_size > 4) + then + let ar = new_asm_reader sess filename in + let _ = log sess "sniffing Mach-O file" in + if (ar.asm_get_u32()) = (Int64.to_int mh_magic) + then (ar.asm_seek 0; Some ar) + else None + else + None + with + _ -> None +;; + +let get_sections + (sess:Session.sess) + (ar:asm_reader) + : (string,(int*int)) Hashtbl.t = + let sects = Hashtbl.create 0 in + let _ = log sess "reading sections" in + let magic = ar.asm_get_u32() in + let _ = assert (magic = (Int64.to_int mh_magic)) in + let _ = ar.asm_adv_u32() in (* cpu type *) + let _ = ar.asm_adv_u32() in (* cpu subtype *) + let _ = ar.asm_adv_u32() in (* file type *) + let n_load_cmds = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in + let _ = log sess "Mach-o file with %d load commands" n_load_cmds in + let _ = ar.asm_adv_u32() in (* flags *) + let lc_seg = Int64.to_int (load_command_code LC_SEGMENT) in + for i = 0 to n_load_cmds - 1 do + let load_cmd_code = ar.asm_get_u32() in + let load_cmd_size = ar.asm_get_u32() in + let _ = log sess "load command %d:" i in + if load_cmd_code != lc_seg + then ar.asm_adv (load_cmd_size - 8) + else + begin + let seg_name = ar.asm_get_zstr_padded 16 in + let _ = log sess "LC_SEGMENT %s" seg_name in + let _ = ar.asm_adv_u32() in (* seg mem pos *) + let _ = ar.asm_adv_u32() in (* seg mem sz *) + let _ = ar.asm_adv_u32() in (* seg file pos *) + let _ = ar.asm_adv_u32() in (* seg file sz *) + let _ = ar.asm_adv_u32() in (* maxprot *) + let _ = ar.asm_adv_u32() in (* initprot *) + let n_sects = ar.asm_get_u32() in + let _ = ar.asm_get_u32() in (* flags *) + let _ = log sess "%d sections" in + for j = 0 to n_sects - 1 do + let sect_name = ar.asm_get_zstr_padded 16 in + let _ = ar.asm_adv 16 in (* seg name *) + let _ = ar.asm_adv_u32() in (* sect mem pos *) + let m_sz = ar.asm_get_u32() in + let f_pos = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in (* sect align *) + let _ = ar.asm_adv_u32() in (* reloff *) + let _ = ar.asm_adv_u32() in (* nreloc *) + let _ = ar.asm_adv_u32() in (* flags *) + let _ = ar.asm_adv_u32() in (* reserved1 *) + let _ = ar.asm_adv_u32() in (* reserved2 *) + let _ = + log sess + " section %d: 0x%x - 0x%x %s " + j f_pos (f_pos + m_sz) sect_name + in + let len = String.length sect_name in + let sect_name = + if (len > 2 + && sect_name.[0] = '_' + && sect_name.[1] = '_') + then "." ^ (String.sub sect_name 2 (len-2)) + else sect_name + in + Hashtbl.add sects sect_name (f_pos, m_sz) + done + end + done; + sects +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/pe.ml b/src/boot/be/pe.ml new file mode 100644 index 00000000..d360ddf5 --- /dev/null +++ b/src/boot/be/pe.ml @@ -0,0 +1,1149 @@ +(* + + Module for writing Microsoft PE files + + Every image has a base address it's to be loaded at. + + "file pointer" = offset in file + + "VA" = address at runtime + + "RVA" = VA - base address + + If you write a non-RVA absolute address at any point you must put it + in a rebasing list so the loader can adjust it when/if it has to load + you at a different address. + + Almost all addresses in the file are RVAs. Worry about the VAs. + +*) + +open Asm;; +open Common;; + +let log (sess:Session.sess) = + Session.log "obj (pe)" + sess.Session.sess_log_obj + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_obj + then thunk () + else () +;; + +(* + + The default image base (VA) for an executable on Win32 is 0x400000. + + We use this too. RVAs are relative to this. RVA 0 = VA 0x400000. + + Alignments are also relatively standard and fixed for Win32/PE32: + 4k memory pages, 512 byte disk sectors. + + Since this is a stupid emitter, and we're not generating an awful + lot of sections, we are not going to differentiate between these + two kinds of alignment: we just align our sections to memory pages + and sometimes waste most of them. Shucks. + +*) + +let pe_image_base = 0x400000L;; +let pe_file_alignment = 0x200;; +let pe_mem_alignment = 0x1000;; + +let rva (f:fixup) = (SUB ((M_POS f), (IMM pe_image_base)));; + +let def_file_aligned f i = + ALIGN_FILE + (pe_file_alignment, + SEQ [| + DEF(f, + SEQ [| i; + ALIGN_FILE + (pe_file_alignment, MARK) |]) |] ) +;; + +let def_mem_aligned f i = + ALIGN_MEM + (pe_mem_alignment, + SEQ [| + DEF(f, + SEQ [| i; + ALIGN_MEM + (pe_mem_alignment, MARK) |]) |] ) +;; + +let align_both i = + ALIGN_FILE (pe_file_alignment, + (ALIGN_MEM (pe_mem_alignment, i))) +;; + +let def_aligned f i = + align_both + (SEQ [| DEF(f,i); + (align_both MARK)|]) +;; + + +(* + + At the beginning of a PE file there is an MS-DOS stub, 0x00 - 0x7F, + that we just insert literally. It prints "This program must be run + under Win32" and exits. Woo! + + Within it, at offset 0x3C, there is an encoded offset of the PE + header we actually care about. So 0x3C - 0x3F are 0x00000100 (LE) + which say "the PE header is actually at 0x100", a nice sensible spot + for it. We pad the next 128 bytes out to 0x100 and start there for + real. + + From then on in it's a sensible object file. Here's the MS-DOS bit. +*) + +let pe_msdos_header_and_padding + : frag = + SEQ [| + BYTES + [| + (* 00000000 *) + 0x4d; 0x5a; 0x50; 0x00; 0x02; 0x00; 0x00; 0x00; + 0x04; 0x00; 0x0f; 0x00; 0xff; 0xff; 0x00; 0x00; + + (* 00000010 *) + 0xb8; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + 0x40; 0x00; 0x1a; 0x00; 0x00; 0x00; 0x00; 0x00; + + (* 00000020 *) + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + + (* 00000030 *) + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + 0x00; 0x00; 0x00; 0x00; 0x00; 0x01; 0x00; 0x00; + (* ^^^^PE HDR offset^^^^^ *) + + (* 00000040 *) + 0xba; 0x10; 0x00; 0x0e; 0x1f; 0xb4; 0x09; 0xcd; + 0x21; 0xb8; 0x01; 0x4c; 0xcd; 0x21; 0x90; 0x90; + + (* 00000050 *) + 0x54; 0x68; 0x69; 0x73; 0x20; 0x70; 0x72; 0x6f; (* "This pro" *) + 0x67; 0x72; 0x61; 0x6d; 0x20; 0x6d; 0x75; 0x73; (* "gram mus" *) + + (* 00000060 *) + 0x74; 0x20; 0x62; 0x65; 0x20; 0x72; 0x75; 0x6e; (* "t be run" *) + 0x20; 0x75; 0x6e; 0x64; 0x65; 0x72; 0x20; 0x57; (* " under W" *) + + (* 00000070 *) + 0x69; 0x6e; 0x33; 0x32; 0x0d; 0x0a; 0x24; 0x37; (* "in32\r\n" *) + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + |]; + PAD 0x80 + |] +;; + +(* + A work of art, is it not? Take a moment to appreciate the madness. + + All done? Ok, now on to the PE header proper. + + PE headers are just COFF headers with a little preamble. +*) + +type pe_machine = + (* Maybe support more later. *) + IMAGE_FILE_MACHINE_AMD64 + | IMAGE_FILE_MACHINE_I386 +;; + + +let pe_timestamp _ = + Int64.of_float (Unix.gettimeofday()) +;; + + +type pe_characteristics = + (* Maybe support more later. *) + IMAGE_FILE_RELOCS_STRIPPED + | IMAGE_FILE_EXECUTABLE_IMAGE + | IMAGE_FILE_LINE_NUMS_STRIPPED + | IMAGE_FILE_LOCAL_SYMS_STRIPPED + | IMAGE_FILE_32BIT_MACHINE + | IMAGE_FILE_DEBUG_STRIPPED + | IMAGE_FILE_DLL +;; + + +let pe_header + ~(machine:pe_machine) + ~(symbol_table_fixup:fixup) + ~(number_of_sections:int64) + ~(number_of_symbols:int64) + ~(loader_hdr_fixup:fixup) + ~(characteristics:pe_characteristics list) + : frag = + ALIGN_FILE + (8, + SEQ [| + STRING "PE\x00\x00"; + WORD (TY_u16, (IMM (match machine with + IMAGE_FILE_MACHINE_AMD64 -> 0x8664L + | IMAGE_FILE_MACHINE_I386 -> 0x014cL))); + WORD (TY_u16, (IMM number_of_sections)); + WORD (TY_u32, (IMM (pe_timestamp()))); + WORD (TY_u32, (F_POS symbol_table_fixup)); + WORD (TY_u32, (IMM number_of_symbols)); + WORD (TY_u16, (F_SZ loader_hdr_fixup)); + WORD (TY_u16, (IMM (fold_flags + (fun c -> match c with + IMAGE_FILE_RELOCS_STRIPPED -> 0x1L + | IMAGE_FILE_EXECUTABLE_IMAGE -> 0x2L + | IMAGE_FILE_LINE_NUMS_STRIPPED -> 0x4L + | IMAGE_FILE_LOCAL_SYMS_STRIPPED -> 0x8L + | IMAGE_FILE_32BIT_MACHINE -> 0x100L + | IMAGE_FILE_DEBUG_STRIPPED -> 0x200L + | IMAGE_FILE_DLL -> 0x2000L) + characteristics))) + |]) +;; + +(* + + After the PE header comes an "optional" header for the loader. In + our case this is hardly optional since we are producing a file for + the loader. + +*) + +type pe_subsystem = + (* Maybe support more later. *) + IMAGE_SUBSYSTEM_WINDOWS_GUI + | IMAGE_SUBSYSTEM_WINDOWS_CUI +;; + +let zero32 = WORD (TY_u32, (IMM 0L)) +;; + +let pe_loader_header + ~(text_fixup:fixup) + ~(init_data_fixup:fixup) + ~(size_of_uninit_data:int64) + ~(entry_point_fixup:fixup option) + ~(image_fixup:fixup) + ~(all_hdrs_fixup:fixup) + ~(subsys:pe_subsystem) + ~(loader_hdr_fixup:fixup) + ~(import_dir_fixup:fixup) + ~(export_dir_fixup:fixup) + : frag = + DEF + (loader_hdr_fixup, + SEQ [| + WORD (TY_u16, (IMM 0x10bL)); (* COFF magic tag for PE32. *) + (* Snagged *) + WORD (TY_u8, (IMM 0x2L)); (* Linker major version. *) + WORD (TY_u8, (IMM 0x38L)); (* Linker minor version. *) + + WORD (TY_u32, (F_SZ text_fixup)); (* "size of code" *) + WORD (TY_u32, (* "size of all init data" *) + (F_SZ init_data_fixup)); + WORD (TY_u32, + (IMM size_of_uninit_data)); + + begin + match entry_point_fixup with + None -> zero32 (* Library mode: DLLMain *) + | Some entry_point_fixup -> + WORD (TY_u32, + (rva + entry_point_fixup)) (* "address of entry point" *) + end; + + WORD (TY_u32, (rva text_fixup)); (* "base of code" *) + WORD (TY_u32, (rva init_data_fixup)); (* "base of data" *) + WORD (TY_u32, (IMM pe_image_base)); + WORD (TY_u32, (IMM (Int64.of_int + pe_mem_alignment))); + WORD (TY_u32, (IMM (Int64.of_int + pe_file_alignment))); + + WORD (TY_u16, (IMM 4L)); (* Major OS version: NT4. *) + WORD (TY_u16, (IMM 0L)); (* Minor OS version. *) + WORD (TY_u16, (IMM 1L)); (* Major image version. *) + WORD (TY_u16, (IMM 0L)); (* Minor image version. *) + WORD (TY_u16, (IMM 4L)); (* Major subsystem version. *) + WORD (TY_u16, (IMM 0L)); (* Minor subsystem version. *) + + zero32; (* Reserved. *) + + WORD (TY_u32, (M_SZ image_fixup)); + WORD (TY_u32, (M_SZ all_hdrs_fixup)); + + zero32; (* Checksum, but OK if zero. *) + WORD (TY_u16, (IMM (match subsys with + IMAGE_SUBSYSTEM_WINDOWS_GUI -> 2L + | IMAGE_SUBSYSTEM_WINDOWS_CUI -> 3L))); + + WORD (TY_u16, (IMM 0L)); (* DLL characteristics. *) + + WORD (TY_u32, (IMM 0x100000L)); (* Size of stack reserve. *) + WORD (TY_u32, (IMM 0x4000L)); (* Size of stack commit. *) + + WORD (TY_u32, (IMM 0x100000L)); (* Size of heap reserve. *) + WORD (TY_u32, (IMM 0x1000L)); (* Size of heap commit. *) + + zero32; (* Reserved. *) + WORD (TY_u32, (IMM 16L)); (* Number of dir references. *) + + (* Begin directories, variable part of hdr. *) + + (* + + Standard PE files have ~10 directories referenced from + here. We only fill in two of them -- the export/import + directories -- because we don't care about the others. We + leave the rest as zero in case someone is looking for + them. This may be superfluous or wrong. + + *) + + + WORD (TY_u32, (rva export_dir_fixup)); + WORD (TY_u32, (M_SZ export_dir_fixup)); + + WORD (TY_u32, (rva import_dir_fixup)); + WORD (TY_u32, (M_SZ import_dir_fixup)); + + zero32; zero32; (* Resource dir. *) + zero32; zero32; (* Exception dir. *) + zero32; zero32; (* Security dir. *) + zero32; zero32; (* Base reloc dir. *) + zero32; zero32; (* Debug dir. *) + zero32; zero32; (* Image desc dir. *) + zero32; zero32; (* Mach spec dir. *) + zero32; zero32; (* TLS dir. *) + + zero32; zero32; (* Load config. *) + zero32; zero32; (* Bound import. *) + zero32; zero32; (* IAT *) + zero32; zero32; (* Delay import. *) + zero32; zero32; (* COM descriptor *) + zero32; zero32; (* ???????? *) + |]) + +;; + + +type pe_section_id = + (* Maybe support more later. *) + SECTION_ID_TEXT + | SECTION_ID_DATA + | SECTION_ID_RDATA + | SECTION_ID_BSS + | SECTION_ID_IMPORTS + | SECTION_ID_EXPORTS + | SECTION_ID_DEBUG_ARANGES + | SECTION_ID_DEBUG_PUBNAMES + | SECTION_ID_DEBUG_INFO + | SECTION_ID_DEBUG_ABBREV + | SECTION_ID_DEBUG_LINE + | SECTION_ID_DEBUG_FRAME + | SECTION_ID_NOTE_RUST +;; + +type pe_section_characteristics = + (* Maybe support more later. *) + IMAGE_SCN_CNT_CODE + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_CNT_UNINITIALIZED_DATA + | IMAGE_SCN_MEM_DISCARDABLE + | IMAGE_SCN_MEM_SHARED + | IMAGE_SCN_MEM_EXECUTE + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE + +let pe_section_header + ~(id:pe_section_id) + ~(hdr_fixup:fixup) + : frag = + let + characteristics = + match id with + SECTION_ID_TEXT -> [ IMAGE_SCN_CNT_CODE; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_EXECUTE ] + | SECTION_ID_DATA -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_WRITE ] + | SECTION_ID_BSS -> [ IMAGE_SCN_CNT_UNINITIALIZED_DATA; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_WRITE ] + | SECTION_ID_IMPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_WRITE ] + | SECTION_ID_EXPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ ] + | SECTION_ID_RDATA + | SECTION_ID_DEBUG_ARANGES + | SECTION_ID_DEBUG_PUBNAMES + | SECTION_ID_DEBUG_INFO + | SECTION_ID_DEBUG_ABBREV + | SECTION_ID_DEBUG_LINE + | SECTION_ID_DEBUG_FRAME + | SECTION_ID_NOTE_RUST -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ ] + in + SEQ [| + STRING + begin + match id with + SECTION_ID_TEXT -> ".text\x00\x00\x00" + | SECTION_ID_DATA -> ".data\x00\x00\x00" + | SECTION_ID_RDATA -> ".rdata\x00\x00" + | SECTION_ID_BSS -> ".bss\x00\x00\x00\x00" + | SECTION_ID_IMPORTS -> ".idata\x00\x00" + | SECTION_ID_EXPORTS -> ".edata\x00\x00" + + (* There is a bizarre Microsoft COFF extension to account + * for longer-than-8-char section names: you emit a single + * '/' character then the ASCII-numeric encoding of the + * offset within the file's string table of the full name. + * So we put all our extended section names at the + * beginning of the string table in a very specific order + * and hard-wire the offsets as "names" here. You could + * theoretically extend this to a "new kind" of fixup + * reference (ASCII_POS or such), if you feel this is + * something you want to twiddle with. + *) + + | SECTION_ID_DEBUG_ARANGES -> "/4\x00\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_PUBNAMES -> "/19\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_INFO -> "/35\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_ABBREV -> "/47\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_LINE -> "/61\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_FRAME -> "/73\x00\x00\x00\x00\x00" + | SECTION_ID_NOTE_RUST -> "/86\x00\x00\x00\x00\x00" + end; + + (* The next two pairs are only supposed to be different if the + file and section alignments differ. This is a stupid emitter + so they're not, no problem. *) + + WORD (TY_u32, (M_SZ hdr_fixup)); (* "Virtual size" *) + WORD (TY_u32, (rva hdr_fixup)); (* "Virtual address" *) + + WORD (TY_u32, (F_SZ hdr_fixup)); (* "Size of raw data" *) + WORD (TY_u32, (F_POS hdr_fixup)); (* "Pointer to raw data" *) + + zero32; (* Reserved. *) + zero32; (* Reserved. *) + zero32; (* Reserved. *) + + WORD (TY_u32, (IMM (fold_flags + (fun c -> match c with + IMAGE_SCN_CNT_CODE -> 0x20L + | IMAGE_SCN_CNT_INITIALIZED_DATA -> 0x40L + | IMAGE_SCN_CNT_UNINITIALIZED_DATA -> 0x80L + | IMAGE_SCN_MEM_DISCARDABLE -> 0x2000000L + | IMAGE_SCN_MEM_SHARED -> 0x10000000L + | IMAGE_SCN_MEM_EXECUTE -> 0x20000000L + | IMAGE_SCN_MEM_READ -> 0x40000000L + | IMAGE_SCN_MEM_WRITE -> 0x80000000L) + characteristics))) + |] +;; + + +(* + + "Thunk" is a misnomer here; the thunk RVA is the address of a word + that the loader will store an address into. The stored address is + the address of the imported object. + + So if the imported object is X, and the thunk slot is Y, the loader + is doing "Y = &X" and returning &Y as the thunk RVA. To load datum X + after the imports are resolved, given the thunk RVA R, you load + **R. + +*) + +type pe_import = + { + pe_import_name_fixup: fixup; + pe_import_name: string; + pe_import_address_fixup: fixup; + } + +type pe_import_dll_entry = + { + pe_import_dll_name_fixup: fixup; + pe_import_dll_name: string; + pe_import_dll_ILT_fixup: fixup; + pe_import_dll_IAT_fixup: fixup; + pe_import_dll_imports: pe_import array; + } + + (* + + The import section .idata has a mostly self-contained table + structure. You feed it a list of DLL entries, each of which names + a DLL and lists symbols in the DLL to import. + + For each named symbol, a 4-byte slot will be reserved in an + "import lookup table" (ILT, also in this section). The slot is + a pointer to a string in this section giving the name. + + Immediately *after* the ILT, there is an "import address table" (IAT), + which is initially identical to the ILT. The loader replaces the entries + in the IAT slots with the imported pointers at runtime. + + A central directory at the start of the section lists all the the import + thunk tables. Each entry in the import directory is 20 bytes (5 words) + but only the last 2 are used: the second last is a pointer to the string + name of the DLL in question (string also in this section) and the last is + a pointer to the import thunk table itself (also in this section). + + Curiously, of the 5 documents I've consulted on the nature of the + first 3 fields, I find a variety of interpretations. + + *) + +let pe_import_section + ~(import_dir_fixup:fixup) + ~(dlls:pe_import_dll_entry array) + : frag = + + let form_dir_entry + (entry:pe_import_dll_entry) + : frag = + SEQ [| + (* Note: documented opinions vary greatly about whether the + first, last, or both of the slots in one of these rows points + to the RVA of the name/hint used to look the import up. This + table format is a mess! *) + WORD (TY_u32, + (rva + entry.pe_import_dll_ILT_fixup)); (* Import lookup table. *) + WORD (TY_u32, (IMM 0L)); (* Timestamp, unused. *) + WORD (TY_u32, (IMM 0x0L)); (* Forwarder chain, unused. *) + WORD (TY_u32, (rva entry.pe_import_dll_name_fixup)); + WORD (TY_u32, + (rva + entry.pe_import_dll_IAT_fixup)); (* Import address table.*) + |] + in + + let form_ILT_slot + (import:pe_import) + : frag = + (WORD (TY_u32, (rva import.pe_import_name_fixup))) + in + + let form_IAT_slot + (import:pe_import) + : frag = + (DEF (import.pe_import_address_fixup, + (WORD (TY_u32, (rva import.pe_import_name_fixup))))) + in + + let form_tables_for_dll + (dll:pe_import_dll_entry) + : frag = + let terminator = WORD (TY_u32, (IMM 0L)) in + let ilt = + SEQ [| + SEQ (Array.map form_ILT_slot dll.pe_import_dll_imports); + terminator + |] + in + let iat = + SEQ [| + SEQ (Array.map form_IAT_slot dll.pe_import_dll_imports); + terminator + |] + in + if Array.length dll.pe_import_dll_imports < 1 + then bug () "Pe.form_tables_for_dll: empty imports" + else + SEQ [| + DEF (dll.pe_import_dll_ILT_fixup, ilt); + DEF (dll.pe_import_dll_IAT_fixup, iat) + |] + + in + + let form_import_string + (import:pe_import) + : frag = + DEF + (import.pe_import_name_fixup, + SEQ [| + (* import string entries begin with a 2-byte "hint", but we just + set it to zero. *) + (WORD (TY_u16, (IMM 0L))); + ZSTRING import.pe_import_name; + (if String.length import.pe_import_name mod 2 == 0 + then PAD 1 + else PAD 0) + |]) + in + + let form_dir_entry_string + (dll:pe_import_dll_entry) + : frag = + DEF + (dll.pe_import_dll_name_fixup, + SEQ [| ZSTRING dll.pe_import_dll_name; + (if String.length dll.pe_import_dll_name mod 2 == 0 + then PAD 1 + else PAD 0); + SEQ (Array.map form_import_string dll.pe_import_dll_imports) |]) + in + + let dir = SEQ (Array.map form_dir_entry dlls) in + let dir_terminator = PAD 20 in + let tables = SEQ (Array.map form_tables_for_dll dlls) in + let strings = SEQ (Array.map form_dir_entry_string dlls) + in + def_aligned + import_dir_fixup + (SEQ + [| + dir; + dir_terminator; + tables; + strings + |]) + +;; + +type pe_export = + { + pe_export_name_fixup: fixup; + pe_export_name: string; + pe_export_address_fixup: fixup; + } +;; + +let pe_export_section + ~(sess:Session.sess) + ~(export_dir_fixup:fixup) + ~(exports:pe_export array) + : frag = + Array.sort (fun a b -> compare a.pe_export_name b.pe_export_name) exports; + let export_addr_table_fixup = new_fixup "export address table" in + let export_addr_table = + DEF + (export_addr_table_fixup, + SEQ + (Array.map + (fun e -> (WORD (TY_u32, rva e.pe_export_address_fixup))) + exports)) + in + let export_name_pointer_table_fixup = + new_fixup "export name pointer table" + in + let export_name_pointer_table = + DEF + (export_name_pointer_table_fixup, + SEQ + (Array.map + (fun e -> (WORD (TY_u32, rva e.pe_export_name_fixup))) + exports)) + in + let export_name_table_fixup = new_fixup "export name table" in + let export_name_table = + DEF + (export_name_table_fixup, + SEQ + (Array.map + (fun e -> (DEF (e.pe_export_name_fixup, + (ZSTRING e.pe_export_name)))) + exports)) + in + let export_ordinal_table_fixup = new_fixup "export ordinal table" in + let export_ordinal_table = + DEF + (export_ordinal_table_fixup, + SEQ + (Array.mapi + (fun i _ -> (WORD (TY_u16, IMM (Int64.of_int (i))))) + exports)) + in + let image_name_fixup = new_fixup "image name fixup" in + let n_exports = IMM (Int64.of_int (Array.length exports)) in + let export_dir_table = + SEQ [| + WORD (TY_u32, IMM 0L); (* Flags, reserved. *) + WORD (TY_u32, IMM 0L); (* Timestamp, unused. *) + WORD (TY_u16, IMM 0L); (* Major vers., unused *) + WORD (TY_u16, IMM 0L); (* Minor vers., unused *) + WORD (TY_u32, rva image_name_fixup); (* Name RVA. *) + WORD (TY_u32, IMM 1L); (* Ordinal base = 1. *) + WORD (TY_u32, n_exports); (* # entries in EAT. *) + WORD (TY_u32, n_exports); (* # entries in ENPT/EOT.*) + WORD (TY_u32, rva export_addr_table_fixup); (* EAT *) + WORD (TY_u32, rva export_name_pointer_table_fixup); (* ENPT *) + WORD (TY_u32, rva export_ordinal_table_fixup); (* EOT *) + |] + in + def_aligned export_dir_fixup + (SEQ [| + export_dir_table; + export_addr_table; + export_name_pointer_table; + export_ordinal_table; + DEF (image_name_fixup, + ZSTRING (Session.filename_of sess.Session.sess_out)); + export_name_table + |]) +;; + +let pe_text_section + ~(sess:Session.sess) + ~(sem:Semant.ctxt) + ~(start_fixup:fixup option) + ~(rust_start_fixup:fixup option) + ~(main_fn_fixup:fixup option) + ~(text_fixup:fixup) + ~(crate_code:frag) + : frag = + let startup = + match (start_fixup, rust_start_fixup, main_fn_fixup) with + (None, _, _) + | (_, None, _) + | (_, _, None) -> MARK + | (Some start_fixup, + Some rust_start_fixup, + Some main_fn_fixup) -> + let e = X86.new_emitter_without_vregs () in + (* + * We are called from the Microsoft C library startup routine, + * and assumed to be stdcall; so we have to clean up our own + * stack before returning. + *) + X86.objfile_start e + ~start_fixup + ~rust_start_fixup + ~main_fn_fixup + ~crate_fixup: sem.Semant.ctxt_crate_fixup + ~indirect_start: true; + X86.frags_of_emitted_quads sess e; + in + def_aligned + text_fixup + (SEQ [| + startup; + crate_code + |]) +;; + +let rustrt_imports sem = + let make_imports_for_lib (lib, tab) = + { + pe_import_dll_name_fixup = new_fixup "dll name"; + pe_import_dll_name = (match lib with + REQUIRED_LIB_rustrt -> "rustrt.dll" + | REQUIRED_LIB_crt -> "msvcrt.dll" + | REQUIRED_LIB_rust ls + | REQUIRED_LIB_c ls -> ls.required_libname); + pe_import_dll_ILT_fixup = new_fixup "dll ILT"; + pe_import_dll_IAT_fixup = new_fixup "dll IAT"; + pe_import_dll_imports = + Array.of_list + (List.map + begin + fun (name, fixup) -> + { + pe_import_name_fixup = new_fixup "import name"; + pe_import_name = name; + pe_import_address_fixup = fixup; + } + end + (htab_pairs tab)) + } + in + Array.of_list + (List.map + make_imports_for_lib + (htab_pairs sem.Semant.ctxt_native_required)) +;; + + +let crate_exports (sem:Semant.ctxt) : pe_export array = + let export_sym (name, fixup) = + { + pe_export_name_fixup = new_fixup "export name fixup"; + pe_export_name = name; + pe_export_address_fixup = fixup; + } + in + let export_seg (_, tab) = + Array.of_list (List.map export_sym (htab_pairs tab)) + in + Array.concat + (List.map export_seg + (htab_pairs sem.Semant.ctxt_native_provided)) +;; + + +let emit_file + (sess:Session.sess) + (crate:Ast.crate) + (code:Asm.frag) + (data:Asm.frag) + (sem:Semant.ctxt) + (dw:Dwarf.debug_records) + : unit = + + let all_hdrs_fixup = new_fixup "all headers" in + let all_init_data_fixup = new_fixup "all initialized data" in + let loader_hdr_fixup = new_fixup "loader header" in + let import_dir_fixup = new_fixup "import directory" in + let export_dir_fixup = new_fixup "export directory" in + let text_fixup = new_fixup "text section" in + let bss_fixup = new_fixup "bss section" in + let data_fixup = new_fixup "data section" in + let image_fixup = new_fixup "image fixup" in + let symtab_fixup = new_fixup "symbol table" in + let strtab_fixup = new_fixup "string table" in + let note_rust_fixup = new_fixup ".note.rust section" in + + let (start_fixup, rust_start_fixup) = + if sess.Session.sess_library_mode + then (None, None) + else + (Some (new_fixup "start"), + Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start")) + in + + let header = (pe_header + ~machine: IMAGE_FILE_MACHINE_I386 + ~symbol_table_fixup: symtab_fixup + ~number_of_sections: 8L + ~number_of_symbols: 0L + ~loader_hdr_fixup: loader_hdr_fixup + ~characteristics:([IMAGE_FILE_EXECUTABLE_IMAGE; + IMAGE_FILE_LINE_NUMS_STRIPPED; + IMAGE_FILE_32BIT_MACHINE;] + @ + (if sess.Session.sess_library_mode + then [ IMAGE_FILE_DLL ] + else [ ]))) + in + let symtab = + (* + * We're not actually presenting a "symbol table", but wish to + * provide a "string table" which comes immediately *after* the + * symbol table. It's a violation of the PE spec to put one of + * these in an executable file (as opposed to just loadable) but + * it's necessary to communicate the debug section names to GDB, + * and nobody else complains. + *) + (def_aligned + symtab_fixup + (def_aligned + strtab_fixup + (SEQ + [| + WORD (TY_u32, (F_SZ strtab_fixup)); + ZSTRING ".debug_aranges"; + ZSTRING ".debug_pubnames"; + ZSTRING ".debug_info"; + ZSTRING ".debug_abbrev"; + ZSTRING ".debug_line"; + ZSTRING ".debug_frame"; + ZSTRING ".note.rust"; + |]))) + in + let loader_header = (pe_loader_header + ~text_fixup + ~init_data_fixup: all_init_data_fixup + ~size_of_uninit_data: 0L + ~entry_point_fixup: start_fixup + ~image_fixup: image_fixup + ~subsys: IMAGE_SUBSYSTEM_WINDOWS_CUI + ~all_hdrs_fixup + ~loader_hdr_fixup + ~import_dir_fixup + ~export_dir_fixup) + in + let text_header = (pe_section_header + ~id: SECTION_ID_TEXT + ~hdr_fixup: text_fixup) + + in + let bss_header = (pe_section_header + ~id: SECTION_ID_BSS + ~hdr_fixup: bss_fixup) + in + let import_section = (pe_import_section + ~import_dir_fixup + ~dlls: (rustrt_imports sem)) + in + let import_header = (pe_section_header + ~id: SECTION_ID_IMPORTS + ~hdr_fixup: import_dir_fixup) + in + let export_section = (pe_export_section + ~sess + ~export_dir_fixup + ~exports: (crate_exports sem)) + in + let export_header = (pe_section_header + ~id: SECTION_ID_EXPORTS + ~hdr_fixup: export_dir_fixup) + in + let data_header = (pe_section_header + ~id: SECTION_ID_DATA + ~hdr_fixup: data_fixup) + in +(* + let debug_aranges_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_ARANGES + ~hdr_fixup: sem.Semant.ctxt_debug_aranges_fixup) + in + let debug_pubnames_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_PUBNAMES + ~hdr_fixup: sem.Semant.ctxt_debug_pubnames_fixup) + in +*) + let debug_info_header = (pe_section_header + ~id: SECTION_ID_DEBUG_INFO + ~hdr_fixup: sem.Semant.ctxt_debug_info_fixup) + in + let debug_abbrev_header = (pe_section_header + ~id: SECTION_ID_DEBUG_ABBREV + ~hdr_fixup: sem.Semant.ctxt_debug_abbrev_fixup) + in +(* + let debug_line_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_LINE + ~hdr_fixup: sem.Semant.ctxt_debug_line_fixup) + in + let debug_frame_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_FRAME + ~hdr_fixup: sem.Semant.ctxt_debug_frame_fixup) + in +*) + let note_rust_header = (pe_section_header + ~id: SECTION_ID_NOTE_RUST + ~hdr_fixup: note_rust_fixup) + in + let all_headers = (def_file_aligned + all_hdrs_fixup + (SEQ + [| + pe_msdos_header_and_padding; + header; + loader_header; + text_header; + bss_header; + import_header; + export_header; + data_header; + (* + debug_aranges_header; + debug_pubnames_header; + *) + debug_info_header; + debug_abbrev_header; + (* + debug_line_header; + debug_frame_header; + *) + note_rust_header; + |])) + in + + let text_section = (pe_text_section + ~sem + ~sess + ~start_fixup + ~rust_start_fixup + ~main_fn_fixup: sem.Semant.ctxt_main_fn_fixup + ~text_fixup + ~crate_code: code) + in + let bss_section = def_aligned bss_fixup (BSS 0x10L) + in + let data_section = (def_aligned data_fixup + (SEQ [| data; symtab; |])) + in + let all_init_data = (def_aligned + all_init_data_fixup + (SEQ [| import_section; + export_section; + data_section; |])) + in +(* + let debug_aranges_section = + def_aligned sem.Semant.ctxt_debug_aranges_fixup dw.Dwarf.debug_aranges + in + let debug_pubnames_section = + def_aligned sem.Semant.ctxt_debug_pubnames_fixup dw.Dwarf.debug_pubnames + in +*) + let debug_info_section = + def_aligned sem.Semant.ctxt_debug_info_fixup dw.Dwarf.debug_info + in + let debug_abbrev_section = + def_aligned sem.Semant.ctxt_debug_abbrev_fixup dw.Dwarf.debug_abbrev + in +(* + let debug_line_section = + def_aligned sem.Semant.ctxt_debug_line_fixup dw.Dwarf.debug_line + in + let debug_frame_section = + def_aligned sem.Semant.ctxt_debug_frame_fixup dw.Dwarf.debug_frame + in +*) + let note_rust_section = + def_aligned note_rust_fixup + (Asm.note_rust_frags crate.node.Ast.crate_meta) + in + + let all_frags = SEQ [| MEMPOS pe_image_base; + (def_file_aligned image_fixup + (SEQ [| DEF (sem.Semant.ctxt_image_base_fixup, + MARK); + all_headers; + text_section; + bss_section; + all_init_data; + (* debug_aranges_section; *) + (* debug_pubnames_section; *) + debug_info_section; + debug_abbrev_section; + (* debug_line_section; *) + (* debug_frame_section; *) + note_rust_section; + ALIGN_MEM (pe_mem_alignment, MARK) + |] + ) + ) + |] + in + write_out_frag sess true all_frags +;; + +let pe_magic = "PE";; + +let sniff + (sess:Session.sess) + (filename:filename) + : asm_reader option = + try + let stat = Unix.stat filename in + if (stat.Unix.st_kind = Unix.S_REG) && + (stat.Unix.st_size >= pe_file_alignment) + then + let ar = new_asm_reader sess filename in + let _ = log sess "sniffing PE file" in + (* PE header offset is at 0x3c in the MS-DOS compatibility header. *) + let _ = ar.asm_seek 0x3c in + let pe_hdr_off = ar.asm_get_u32() in + let _ = log sess "PE header offset: 0x%x" pe_hdr_off in + + let _ = ar.asm_seek pe_hdr_off in + let pe_signature = ar.asm_get_zstr_padded 4 in + let _ = log sess " PE signature: '%s'" pe_signature in + if pe_signature = pe_magic + then (ar.asm_seek 0; Some ar) + else None + else + None + with + _ -> None +;; + + +let get_sections + (sess:Session.sess) + (ar:asm_reader) + : (string,(int*int)) Hashtbl.t = + let _ = log sess "reading sections" in + (* PE header offset is at 0x3c in the MS-DOS compatibility header. *) + let _ = ar.asm_seek 0x3c in + let pe_hdr_off = ar.asm_get_u32() in + let _ = log sess "PE header offset: 0x%x" pe_hdr_off in + + let _ = ar.asm_seek pe_hdr_off in + let pe_signature = ar.asm_get_zstr_padded 4 in + let _ = log sess " PE signature: '%s'" pe_signature in + let _ = assert (pe_signature = pe_magic) in + let _ = ar.asm_adv_u16() in (* machine type *) + + let num_sections = ar.asm_get_u16() in + let _ = log sess " num sections: %d" num_sections in + + let _ = ar.asm_adv_u32() in (* timestamp *) + + let symtab_off = ar.asm_get_u32() in + let _ = log sess " symtab offset: 0x%x" symtab_off in + + let num_symbols = ar.asm_get_u32() in + let _ = log sess " num symbols: %d" num_symbols in + + let loader_hdr_size = ar.asm_get_u16() in + let _ = log sess "loader header sz: %d" loader_hdr_size in + + let _ = ar.asm_adv_u16() in (* flags *) + let sections_off = (ar.asm_get_off()) + loader_hdr_size in + + let sects = Hashtbl.create 0 in + + let _ = + ar.asm_seek sections_off; + for i = 0 to (num_sections - 1) do + (* + * Section-name encoding is crazy. ASCII-encoding offsets of + * long names. See pe_section_header for details. + *) + let sect_name = + let sect_name = ar.asm_get_zstr_padded 8 in + assert ((String.length sect_name) > 0); + if sect_name.[0] = '/' + then + let off_str = + String.sub sect_name 1 ((String.length sect_name) - 1) + in + let i = int_of_string off_str in + let curr = ar.asm_get_off() in + ar.asm_seek (symtab_off + i); + let ext_name = ar.asm_get_zstr() in + ar.asm_seek curr; + ext_name + else + sect_name + in + let _ = ar.asm_adv_u32() in (* virtual size *) + let _ = ar.asm_adv_u32() in (* virtual address *) + let file_sz = ar.asm_get_u32() in + let file_off = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in (* reserved *) + let _ = ar.asm_adv_u32() in (* reserved *) + let _ = ar.asm_adv_u32() in (* reserved *) + let _ = ar.asm_adv_u32() in (* flags *) + Hashtbl.add sects sect_name (file_off, file_sz); + log sess " section %d: %s, size %d, offset 0x%x" + i sect_name file_sz file_off; + done + in + sects +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/ra.ml b/src/boot/be/ra.ml new file mode 100644 index 00000000..db70b21d --- /dev/null +++ b/src/boot/be/ra.ml @@ -0,0 +1,664 @@ +open Il;; +open Common;; + +type ctxt = + { + ctxt_sess: Session.sess; + ctxt_n_vregs: int; + ctxt_abi: Abi.abi; + mutable ctxt_quads: Il.quads; + mutable ctxt_next_spill: int; + mutable ctxt_next_label: int; + (* More state as necessary. *) + } +;; + +let new_ctxt + (sess:Session.sess) + (quads:Il.quads) + (vregs:int) + (abi:Abi.abi) + : ctxt = + { + ctxt_sess = sess; + ctxt_quads = quads; + ctxt_n_vregs = vregs; + ctxt_abi = abi; + ctxt_next_spill = 0; + ctxt_next_label = 0; + } +;; + +let log (cx:ctxt) = + Session.log "ra" + cx.ctxt_sess.Session.sess_log_ra + cx.ctxt_sess.Session.sess_log_out +;; + +let iflog (cx:ctxt) (thunk:(unit -> unit)) : unit = + if cx.ctxt_sess.Session.sess_log_ra + then thunk () + else () +;; + +let list_to_str list eltstr = + (String.concat "," (List.map eltstr (List.sort compare list))) +;; + +let next_spill (cx:ctxt) : int = + let i = cx.ctxt_next_spill in + cx.ctxt_next_spill <- i + 1; + i +;; + +let next_label (cx:ctxt) : string = + let i = cx.ctxt_next_label in + cx.ctxt_next_label <- i + 1; + (".L" ^ (string_of_int i)) +;; + +exception Ra_error of string ;; + +let convert_labels (cx:ctxt) : unit = + let quad_fixups = Array.map (fun q -> q.quad_fixup) cx.ctxt_quads in + let qp_code (_:Il.quad_processor) (c:Il.code) : Il.code = + match c with + Il.CodeLabel lab -> + let fix = + match quad_fixups.(lab) with + None -> + let fix = new_fixup (next_label cx) in + begin + quad_fixups.(lab) <- Some fix; + fix + end + | Some f -> f + in + Il.CodePtr (Il.ImmPtr (fix, Il.CodeTy)) + | _ -> c + in + let qp = { Il.identity_processor + with Il.qp_code = qp_code } + in + Il.rewrite_quads qp cx.ctxt_quads; + Array.iteri (fun i fix -> + cx.ctxt_quads.(i) <- { cx.ctxt_quads.(i) with + Il.quad_fixup = fix }) + quad_fixups; +;; + +let convert_pre_spills + (cx:ctxt) + (mkspill:(Il.spill -> Il.mem)) + : int = + let n = ref 0 in + let qp_mem (_:Il.quad_processor) (a:Il.mem) : Il.mem = + match a with + Il.Spill i -> + begin + if i+1 > (!n) + then n := i+1; + mkspill i + end + | _ -> a + in + let qp = Il.identity_processor in + let qp = { qp with + Il.qp_mem = qp_mem } + in + begin + Il.rewrite_quads qp cx.ctxt_quads; + !n + end +;; + +let kill_quad (i:int) (cx:ctxt) : unit = + cx.ctxt_quads.(i) <- + { Il.deadq with + Il.quad_fixup = cx.ctxt_quads.(i).Il.quad_fixup } +;; + +let kill_redundant_moves (cx:ctxt) : unit = + let process_quad i q = + match q.Il.quad_body with + Il.Unary u when + ((Il.is_mov u.Il.unary_op) && + (Il.Cell u.Il.unary_dst) = u.Il.unary_src) -> + kill_quad i cx + | _ -> () + in + Array.iteri process_quad cx.ctxt_quads +;; + +let quad_jump_target_labels (q:quad) : Il.label list = + let explicits = + match q.Il.quad_body with + Il.Jmp { Il.jmp_targ = Il.CodeLabel lab } -> [ lab ] + | _ -> [] + in + explicits @ q.quad_implicits; +;; + +let quad_used_vregs (q:quad) : Il.vreg list = + let vregs = ref [] in + let qp_reg _ r = + match r with + Il.Vreg v -> (vregs := (v :: (!vregs)); r) + | _ -> r + in + let qp_cell_write qp c = + match c with + Il.Reg _ -> c + | Il.Mem (a, b) -> Il.Mem (qp.qp_mem qp a, b) + in + let qp = { Il.identity_processor with + Il.qp_reg = qp_reg; + Il.qp_cell_write = qp_cell_write } + in + ignore (Il.process_quad qp q); + !vregs +;; + +let quad_defined_vregs (q:quad) : Il.vreg list = + let vregs = ref [] in + let qp_cell_write _ c = + match c with + Il.Reg (Il.Vreg v, _) -> (vregs := (v :: (!vregs)); c) + | _ -> c + in + let qp = { Il.identity_processor with + Il.qp_cell_write = qp_cell_write } + in + ignore (Il.process_quad qp q); + !vregs +;; + +let quad_is_unconditional_jump (q:quad) : bool = + match q.Il.quad_body with + Il.Jmp { jmp_op = Il.JMP } -> true + | Il.Ret -> true + | _ -> false +;; + +let calculate_live_bitvectors + (cx:ctxt) + : ((Bits.t array) * (Bits.t array)) = + + log cx "calculating live bitvectors"; + + let quads = cx.ctxt_quads in + let n_quads = Array.length quads in + let n_vregs = cx.ctxt_n_vregs in + let new_bitv _ = Bits.create n_vregs false in + let (live_in_vregs:Bits.t array) = Array.init n_quads new_bitv in + let (live_out_vregs:Bits.t array) = Array.init n_quads new_bitv in + + let (quad_used_vrs:Bits.t array) = Array.init n_quads new_bitv in + let (quad_defined_vrs:Bits.t array) = Array.init n_quads new_bitv in + let (quad_uncond_jmp:bool array) = Array.make n_quads false in + let (quad_jmp_targs:(Il.label list) array) = Array.make n_quads [] in + + let outer_changed = ref true in + + (* Working bit-vector. *) + let scratch = new_bitv() in + + (* bit-vector helpers. *) + (* Setup pass. *) + for i = 0 to n_quads - 1 do + let q = quads.(i) in + quad_uncond_jmp.(i) <- quad_is_unconditional_jump q; + quad_jmp_targs.(i) <- quad_jump_target_labels q; + List.iter + (fun v -> Bits.set quad_used_vrs.(i) v true) + (quad_used_vregs q); + List.iter + (fun v -> Bits.set quad_defined_vrs.(i) v true) + (quad_defined_vregs q) + done; + + while !outer_changed do + iflog cx (fun _ -> log cx "iterating outer bitvector calculation"); + outer_changed := false; + for i = 0 to n_quads - 1 do + Bits.clear live_in_vregs.(i); + Bits.clear live_out_vregs.(i) + done; + let inner_changed = ref true in + while !inner_changed do + inner_changed := false; + iflog cx + (fun _ -> + log cx "iterating inner bitvector calculation over %d quads" + n_quads); + for i = n_quads - 1 downto 0 do + + let note_change b = if b then inner_changed := true in + let live_in = live_in_vregs.(i) in + let live_out = live_out_vregs.(i) in + let used = quad_used_vrs.(i) in + let defined = quad_defined_vrs.(i) in + + (* Union in the vregs we use. *) + note_change (Bits.union live_in used); + + (* Union in all our jump targets. *) + List.iter + (fun i -> note_change (Bits.union live_out live_in_vregs.(i))) + (quad_jmp_targs.(i)); + + (* Union in our block successor if we have one *) + if i < (n_quads - 1) && (not (quad_uncond_jmp.(i))) + then note_change (Bits.union live_out live_in_vregs.(i+1)); + + (* Propagate live-out to live-in on anything we don't define. *) + ignore (Bits.copy scratch defined); + Bits.invert scratch; + ignore (Bits.intersect scratch live_out); + note_change (Bits.union live_in scratch); + + done + done; + let kill_mov_to_dead_target i q = + match q.Il.quad_body with + Il.Unary { Il.unary_op=uop; + Il.unary_dst=Il.Reg (Il.Vreg v, _) } + when + ((Il.is_mov uop) && + not (Bits.get live_out_vregs.(i) v)) -> + begin + kill_quad i cx; + outer_changed := true; + end + | _ -> () + in + Array.iteri kill_mov_to_dead_target quads + done; + iflog cx + begin + fun _ -> + log cx "finished calculating live bitvectors"; + log cx "========================="; + for q = 0 to n_quads - 1 do + let buf = Buffer.create 128 in + for v = 0 to (n_vregs - 1) + do + if ((Bits.get live_in_vregs.(q) v) + && (Bits.get live_out_vregs.(q) v)) + then Printf.bprintf buf " %-2d" v + else Buffer.add_string buf " " + done; + log cx "[%6d] live vregs: %s" q (Buffer.contents buf) + done; + log cx "=========================" + end; + (live_in_vregs, live_out_vregs) +;; + + +let is_end_of_basic_block (q:quad) : bool = + match q.Il.quad_body with + Il.Jmp _ -> true + | Il.Ret -> true + | _ -> false +;; + +let is_beginning_of_basic_block (q:quad) : bool = + match q.Il.quad_fixup with + None -> false + | Some _ -> true +;; + +let dump_quads cx = + let f = cx.ctxt_abi.Abi.abi_str_of_hardreg in + let len = (Array.length cx.ctxt_quads) - 1 in + let ndigits_of n = (int_of_float (log10 (float_of_int n))) in + let padded_num n maxnum = + let ndigits = ndigits_of n in + let maxdigits = ndigits_of maxnum in + let pad = String.make (maxdigits - ndigits) ' ' in + Printf.sprintf "%s%d" pad n + in + let padded_str str maxlen = + let pad = String.make (maxlen - (String.length str)) ' ' in + Printf.sprintf "%s%s" pad str + in + let maxlablen = ref 0 in + for i = 0 to len + do + let q = cx.ctxt_quads.(i) in + match q.quad_fixup with + None -> () + | Some f -> + maxlablen := max (!maxlablen) ((String.length f.fixup_name) + 1) + done; + for i = 0 to len + do + let q = cx.ctxt_quads.(i) in + let qs = (string_of_quad f q) in + let lab = match q.quad_fixup with + None -> "" + | Some f -> f.fixup_name ^ ":" + in + log cx "[%s] %s %s" (padded_num i len) (padded_str lab (!maxlablen)) qs + done +;; + +let calculate_vreg_constraints (cx:ctxt) : Bits.t array = + let abi = cx.ctxt_abi in + let n_vregs = cx.ctxt_n_vregs in + let n_hregs = abi.Abi.abi_n_hardregs in + let constraints = Array.init n_vregs (fun _ -> Bits.create n_hregs true) in + Array.iteri + begin + fun i q -> + abi.Abi.abi_constrain_vregs q constraints; + iflog cx + begin + fun _ -> + let hr_str = cx.ctxt_abi.Abi.abi_str_of_hardreg in + log cx "constraints for quad %d = %s" + i (string_of_quad hr_str q); + let qp_reg _ r = + begin + match r with + Il.Hreg _ -> () + | Il.Vreg v -> + let hregs = Bits.to_list constraints.(v) in + log cx "<v%d> constrained to hregs: [%s]" + v (list_to_str hregs hr_str) + end; + r + in + ignore (Il.process_quad { Il.identity_processor with + Il.qp_reg = qp_reg } q) + end; + end + cx.ctxt_quads; + constraints +;; + +(* Simple local register allocator. Nothing fancy. *) +let reg_alloc + (sess:Session.sess) + (quads:Il.quads) + (vregs:int) + (abi:Abi.abi) = + try + let cx = new_ctxt sess quads vregs abi in + let _ = + iflog cx + begin + fun _ -> + log cx "un-allocated quads:"; + dump_quads cx + end + in + + (* Work out pre-spilled slots and allocate 'em. *) + let spill_slot (s:Il.spill) = abi.Abi.abi_spill_slot s in + let n_pre_spills = convert_pre_spills cx spill_slot in + + let (live_in_vregs, live_out_vregs) = + Session.time_inner "RA liveness" sess + (fun _ -> calculate_live_bitvectors cx) + in + let (vreg_constraints:Bits.t array) = (* vreg idx -> hreg bits.t *) + calculate_vreg_constraints cx + in + let inactive_hregs = ref [] in (* [hreg] *) + let active_hregs = ref [] in (* [hreg] *) + let dirty_vregs = Hashtbl.create 0 in (* vreg -> () *) + let hreg_to_vreg = Hashtbl.create 0 in (* hreg -> vreg *) + let vreg_to_hreg = Hashtbl.create 0 in (* vreg -> hreg *) + let vreg_to_spill = Hashtbl.create 0 in (* vreg -> spill *) + let (word_ty:Il.scalar_ty) = Il.ValTy abi.Abi.abi_word_bits in + let vreg_spill_cell v = + Il.Mem ((spill_slot (Hashtbl.find vreg_to_spill v)), + Il.ScalarTy word_ty) + in + let newq = ref [] in + let fixup = ref None in + let prepend q = + newq := {q with quad_fixup = !fixup} :: (!newq); + fixup := None + in + let hr h = Il.Reg (Il.Hreg h, Il.voidptr_t) in + let hr_str = cx.ctxt_abi.Abi.abi_str_of_hardreg in + let clean_hreg i hreg = + if (Hashtbl.mem hreg_to_vreg hreg) && + (hreg < cx.ctxt_abi.Abi.abi_n_hardregs) + then + let vreg = Hashtbl.find hreg_to_vreg hreg in + if Hashtbl.mem dirty_vregs vreg + then + begin + Hashtbl.remove dirty_vregs vreg; + if (Bits.get (live_out_vregs.(i)) vreg) + then + let spill_idx = + if Hashtbl.mem vreg_to_spill vreg + then Hashtbl.find vreg_to_spill vreg + else + begin + let s = next_spill cx in + Hashtbl.replace vreg_to_spill vreg s; + s + end + in + let spill_mem = spill_slot spill_idx in + let spill_cell = Il.Mem (spill_mem, Il.ScalarTy word_ty) in + log cx "spilling <%d> from %s to %s" + vreg (hr_str hreg) (string_of_mem hr_str spill_mem); + prepend (Il.mk_quad + (Il.umov spill_cell (Il.Cell (hr hreg)))); + else () + end + else () + else () + in + + let inactivate_hreg hreg = + if (Hashtbl.mem hreg_to_vreg hreg) && + (hreg < cx.ctxt_abi.Abi.abi_n_hardregs) + then + let vreg = Hashtbl.find hreg_to_vreg hreg in + Hashtbl.remove vreg_to_hreg vreg; + Hashtbl.remove hreg_to_vreg hreg; + active_hregs := List.filter (fun x -> x != hreg) (!active_hregs); + inactive_hregs := hreg :: (!inactive_hregs); + else () + in + + let spill_specific_hreg i hreg = + clean_hreg i hreg; + inactivate_hreg hreg + in + + let rec select_constrained + (constraints:Bits.t) + (hregs:Il.hreg list) + : Il.hreg option = + match hregs with + [] -> None + | h::hs -> + if Bits.get constraints h + then Some h + else select_constrained constraints hs + in + + let spill_constrained constrs i = + match select_constrained constrs (!active_hregs) with + None -> + raise (Ra_error ("unable to spill according to constraint")); + | Some h -> + begin + spill_specific_hreg i h; + h + end + in + + let all_hregs = Bits.create abi.Abi.abi_n_hardregs true in + + let spill_all_regs i = + while (!active_hregs) != [] + do + let _ = spill_constrained all_hregs i in + () + done + in + + let reload vreg hreg = + if Hashtbl.mem vreg_to_spill vreg + then + prepend (Il.mk_quad + (Il.umov + (hr hreg) + (Il.Cell (vreg_spill_cell vreg)))) + else () + in + + let use_vreg def i vreg = + if Hashtbl.mem vreg_to_hreg vreg + then + begin + let h = Hashtbl.find vreg_to_hreg vreg in + iflog cx (fun _ -> log cx "found cached assignment %s for <v%d>" + (hr_str h) vreg); + h + end + else + let hreg = + let constrs = vreg_constraints.(vreg) in + match select_constrained constrs (!inactive_hregs) with + None -> + let h = spill_constrained constrs i in + iflog cx + (fun _ -> log cx "selected %s to spill and use for <v%d>" + (hr_str h) vreg); + h + | Some h -> + iflog cx (fun _ -> log cx "selected inactive %s for <v%d>" + (hr_str h) vreg); + h + in + inactive_hregs := + List.filter (fun x -> x != hreg) (!inactive_hregs); + active_hregs := (!active_hregs) @ [hreg]; + Hashtbl.replace hreg_to_vreg hreg vreg; + Hashtbl.replace vreg_to_hreg vreg hreg; + if def + then () + else + reload vreg hreg; + hreg + in + let qp_reg def i _ r = + match r with + Il.Hreg h -> (spill_specific_hreg i h; r) + | Il.Vreg v -> (Il.Hreg (use_vreg def i v)) + in + let qp_cell def i qp c = + match c with + Il.Reg (r, b) -> Il.Reg (qp_reg def i qp r, b) + | Il.Mem (a, b) -> + let qp = { qp with Il.qp_reg = qp_reg false i } in + Il.Mem (qp.qp_mem qp a, b) + in + let qp i = { Il.identity_processor with + Il.qp_cell_read = qp_cell false i; + Il.qp_cell_write = qp_cell true i; + Il.qp_reg = qp_reg false i } + in + cx.ctxt_next_spill <- n_pre_spills; + convert_labels cx; + for i = 0 to cx.ctxt_abi.Abi.abi_n_hardregs - 1 + do + inactive_hregs := i :: (!inactive_hregs) + done; + for i = 0 to (Array.length cx.ctxt_quads) - 1 + do + let quad = cx.ctxt_quads.(i) in + let clobbers = cx.ctxt_abi.Abi.abi_clobbers quad in + let used = quad_used_vregs quad in + let defined = quad_defined_vregs quad in + begin + if List.exists (fun def -> List.mem def clobbers) defined + then raise (Ra_error ("clobber and defined sets overlap")); + iflog cx + begin + fun _ -> + let hr (v:int) : string = + if Hashtbl.mem vreg_to_hreg v + then hr_str (Hashtbl.find vreg_to_hreg v) + else "??" + in + let vr_str (v:int) : string = + Printf.sprintf "v%d=%s" v (hr v) + in + let lstr lab ls fn = + if List.length ls = 0 + then () + else log cx "\t%s: [%s]" lab (list_to_str ls fn) + in + log cx "processing quad %d = %s" + i (string_of_quad hr_str quad); + (lstr "dirt" (htab_keys dirty_vregs) vr_str); + (lstr "clob" clobbers hr_str); + (lstr "in" (Bits.to_list live_in_vregs.(i)) vr_str); + (lstr "out" (Bits.to_list live_out_vregs.(i)) vr_str); + (lstr "use" used vr_str); + (lstr "def" defined vr_str); + end; + List.iter (clean_hreg i) clobbers; + if is_beginning_of_basic_block quad + then + begin + spill_all_regs i; + fixup := quad.quad_fixup; + prepend (Il.process_quad (qp i) quad) + end + else + begin + fixup := quad.quad_fixup; + let newq = (Il.process_quad (qp i) quad) in + begin + if is_end_of_basic_block quad + then spill_all_regs i + else () + end; + prepend newq + end + end; + List.iter inactivate_hreg clobbers; + List.iter (fun i -> Hashtbl.replace dirty_vregs i ()) defined; + done; + cx.ctxt_quads <- Array.of_list (List.rev (!newq)); + kill_redundant_moves cx; + + iflog cx + begin + fun _ -> + log cx "spills: %d pre-spilled, %d total" + n_pre_spills cx.ctxt_next_spill; + log cx "register-allocated quads:"; + dump_quads cx; + end; + (cx.ctxt_quads, cx.ctxt_next_spill) + + with + Ra_error s -> + Session.fail sess "RA Error: %s" s; + (quads, 0) + +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/x86.ml b/src/boot/be/x86.ml new file mode 100644 index 00000000..01b7e299 --- /dev/null +++ b/src/boot/be/x86.ml @@ -0,0 +1,2205 @@ +(* + * x86/ia32 instructions have 6 parts: + * + * [pre][op][modrm][sib][disp][imm] + * + * [pre] = 0..4 bytes of prefix + * [op] = 1..3 byte opcode + * [modrm] = 0 or 1 byte: [mod:2][reg/op:3][r/m:3] + * [sib] = 0 or 1 byte: [scale:2][index:3][base:3] + * [disp] = 1, 2 or 4 byte displacement + * [imm] = 1, 2 or 4 byte immediate + * + * So between 1 and 17 bytes total. + * + * We're not going to use sib, but modrm is worth discussing. + * + * The high two bits of modrm denote an addressing mode. The modes are: + * + * 00 - "mostly" *(reg) + * 01 - "mostly" *(reg) + disp8 + * 10 - "mostly" *(reg) + disp32 + * 11 - reg + * + * The next-lowest 3 bits denote a specific register, or a subopcode if + * there is a fixed register or only one operand. The instruction format + * reference will say "/<n>" for some number n, if a fixed subopcode is used. + * It'll say "/r" if the instruction uses this field to specify a register. + * + * The registers specified in this field are: + * + * 000 - EAX or XMM0 + * 001 - ECX or XMM1 + * 010 - EDX or XMM2 + * 011 - EBX or XMM3 + * 100 - ESP or XMM4 + * 101 - EBP or XMM5 + * 110 - ESI or XMM6 + * 111 - EDI or XMM7 + * + * The final low 3 bits denote sub-modes of the primary mode selected + * with the top 2 bits. In particular, they "mostly" select the reg that is + * to be used for effective address calculation. + * + * For the most part, these follow the same numbering order: EAX, ECX, EDX, + * EBX, ESP, EBP, ESI, EDI. There are two unusual deviations from the rule + * though: + * + * - In primary modes 00, 01 and 10, r/m=100 means "use SIB byte". You can + * use (unscaled) ESP as the base register in these modes by appending the + * SIB byte 0x24. We do that in our rm_r operand-encoder function. + * + * - In primary mode 00, r/m=101 means "just disp32", no register is + * involved. There is no way to use EBP in primary mode 00. If you try, we + * just decay into a mode 01 with an appended 8-bit immediate displacement. + * + * Some opcodes are written 0xNN +rd. This means "we decided to chew up a + * whole pile of opcodes here, with each opcode including a hard-wired + * reference to a register". For example, POP is "0x58 +rd", which means that + * the 1-byte insns 0x58..0x5f are chewed up for "POP EAX" ... "POP EDI" + * (again, the canonical order of register numberings) + *) + +(* + * Notes on register availability of x86: + * + * There are 8 GPRs but we use 2 of them for specific purposes: + * + * - ESP always points to the current stack frame. + * - EBP always points to the current frame base. + * + * We tell IL that we have 6 GPRs then, and permit most register-register ops + * on any of these 6, mostly-unconstrained. + * + *) + +open Common;; + +exception Unrecognized +;; + +let modrm m rm reg_or_subopcode = + if (((m land 0b11) != m) or + ((rm land 0b111) != rm) or + ((reg_or_subopcode land 0b111) != reg_or_subopcode)) + then raise (Invalid_argument "X86.modrm_deref") + else + ((((m land 0b11) lsl 6) + lor + (rm land 0b111)) + lor + ((reg_or_subopcode land 0b111) lsl 3)) +;; + +let modrm_deref_reg = modrm 0b00 ;; +let modrm_deref_disp32 = modrm 0b00 0b101 ;; +let modrm_deref_reg_plus_disp8 = modrm 0b01 ;; +let modrm_deref_reg_plus_disp32 = modrm 0b10 ;; +let modrm_reg = modrm 0b11 ;; + +let slash0 = 0;; +let slash1 = 1;; +let slash2 = 2;; +let slash3 = 3;; +let slash4 = 4;; +let slash5 = 5;; +let slash6 = 6;; +let slash7 = 7;; + + +(* + * Translate an IL-level hwreg number from 0..nregs into the 3-bit code number + * used through the mod r/m byte and /r sub-register specifiers of the x86 + * ISA. + * + * See "Table 2-2: 32-Bit Addressing Forms with the ModR/M Byte", in the IA32 + * Architecture Software Developer's Manual, volume 2a. + *) + +let eax = 0 +let ecx = 1 +let ebx = 2 +let esi = 3 +let edi = 4 +let edx = 5 +let ebp = 6 +let esp = 7 + +let code_eax = 0b000;; +let code_ecx = 0b001;; +let code_edx = 0b010;; +let code_ebx = 0b011;; +let code_esp = 0b100;; +let code_ebp = 0b101;; +let code_esi = 0b110;; +let code_edi = 0b111;; + +let reg r = + match r with + 0 -> code_eax + | 1 -> code_ecx + | 2 -> code_ebx + | 3 -> code_esi + | 4 -> code_edi + | 5 -> code_edx + (* Never assigned by the register allocator, but synthetic code uses + them *) + | 6 -> code_ebp + | 7 -> code_esp + | _ -> raise (Invalid_argument "X86.reg") +;; + + +let dwarf_eax = 0;; +let dwarf_ecx = 1;; +let dwarf_edx = 2;; +let dwarf_ebx = 3;; +let dwarf_esp = 4;; +let dwarf_ebp = 5;; +let dwarf_esi = 6;; +let dwarf_edi = 7;; + +let dwarf_reg r = + match r with + 0 -> dwarf_eax + | 1 -> dwarf_ecx + | 2 -> dwarf_ebx + | 3 -> dwarf_esi + | 4 -> dwarf_edi + | 5 -> dwarf_edx + | 6 -> dwarf_ebp + | 7 -> dwarf_esp + | _ -> raise (Invalid_argument "X86.dwarf_reg") + +let reg_str r = + match r with + 0 -> "eax" + | 1 -> "ecx" + | 2 -> "ebx" + | 3 -> "esi" + | 4 -> "edi" + | 5 -> "edx" + | 6 -> "ebp" + | 7 -> "esp" + | _ -> raise (Invalid_argument "X86.reg_str") +;; + +(* This is a basic ABI. You might need to customize it by platform. *) +let (n_hardregs:int) = 6;; +let (n_callee_saves:int) = 4;; + + +let is_ty32 (ty:Il.scalar_ty) : bool = + match ty with + Il.ValTy (Il.Bits32) -> true + | Il.AddrTy _ -> true + | _ -> false +;; + +let is_r32 (c:Il.cell) : bool = + match c with + Il.Reg (_, st) -> is_ty32 st + | _ -> false +;; + +let is_rm32 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty32 st + | Il.Reg (_, st) -> is_ty32 st + | _ -> false +;; + +let is_ty8 (ty:Il.scalar_ty) : bool = + match ty with + Il.ValTy (Il.Bits8) -> true + | _ -> false +;; + +let is_m32 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty32 st + | _ -> false +;; + +let is_m8 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty8 st + | _ -> false +;; + +let is_ok_r8 (r:Il.hreg) : bool = + (r == eax || r == ebx || r == ecx || r == edx) +;; + +let is_r8 (c:Il.cell) : bool = + match c with + Il.Reg (Il.Hreg r, st) when is_ok_r8 r -> is_ty8 st + | _ -> false +;; + +let is_rm8 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty8 st + | _ -> is_r8 c +;; + +let prealloc_quad (quad':Il.quad') : Il.quad' = + let target_cell reg c = + Il.Reg (Il.Hreg reg, Il.cell_scalar_ty c) + in + let target_operand reg op = + Il.Cell (Il.Reg (Il.Hreg reg, Il.operand_scalar_ty op)) + in + + let target_bin_to_hreg bin dst src = + { bin with + Il.binary_rhs = target_operand src bin.Il.binary_rhs; + Il.binary_lhs = target_operand dst bin.Il.binary_lhs; + Il.binary_dst = target_cell dst bin.Il.binary_dst } + in + + let target_cmp cmp = + match cmp.Il.cmp_lhs with + (* Immediate LHS we force to eax. *) + Il.Imm _ -> + { cmp with + Il.cmp_lhs = target_operand eax cmp.Il.cmp_lhs } + | _ -> cmp + in + + match quad' with + Il.Binary bin -> + begin + Il.Binary + begin + match bin.Il.binary_op with + Il.IMUL | Il.UMUL + | Il.IDIV | Il.UDIV -> target_bin_to_hreg bin eax ecx + | Il.IMOD | Il.UMOD -> target_bin_to_hreg bin eax ecx + | _ -> bin + end + end + + | Il.Cmp cmp -> Il.Cmp (target_cmp cmp) + + | Il.Call c -> + let ty = Il.cell_scalar_ty c.Il.call_dst in + Il.Call { c with + Il.call_dst = Il.Reg ((Il.Hreg eax), ty) } + + | Il.Lea le -> + begin + match (le.Il.lea_dst, le.Il.lea_src) with + (Il.Reg (_, dst_ty), Il.ImmPtr _) + when is_ty32 dst_ty -> + Il.Lea { le with + Il.lea_dst = Il.Reg (Il.Hreg eax, dst_ty) } + | _ -> quad' + end + + | x -> x +;; + +let constrain_vregs (q:Il.quad) (hregs:Bits.t array) : unit = + + let involves_8bit_cell = + let b = ref false in + let qp_cell _ c = + match c with + Il.Reg (_, Il.ValTy Il.Bits8) + | Il.Mem (_, Il.ScalarTy (Il.ValTy Il.Bits8)) -> + (b := true; c) + | _ -> c + in + ignore (Il.process_quad { Il.identity_processor with + Il.qp_cell_read = qp_cell; + Il.qp_cell_write = qp_cell } q); + !b + in + + let qp_mem _ m = m in + let qp_cell _ c = + begin + match c with + Il.Reg (Il.Vreg v, _) when involves_8bit_cell -> + (* 8-bit register cells must only be al, cl, dl, bl. + * Not esi/edi. *) + let hv = hregs.(v) in + List.iter (fun bad -> Bits.set hv bad false) [esi; edi] + | _ -> () + end; + c + in + begin + match q.Il.quad_body with + Il.Binary b -> + begin + match b.Il.binary_op with + (* Shifts *) + | Il.LSL | Il.LSR | Il.ASR -> + begin + match b.Il.binary_rhs with + Il.Cell (Il.Reg (Il.Vreg v, _)) -> + let hv = hregs.(v) in + (* Shift src has to be ecx. *) + List.iter + (fun bad -> Bits.set hv bad false) + [eax; edx; ebx; esi; edi] + | _ -> () + end + | _ -> () + end + | _ -> () + end; + ignore + (Il.process_quad { Il.identity_processor with + Il.qp_mem = qp_mem; + Il.qp_cell_read = qp_cell; + Il.qp_cell_write = qp_cell } q) +;; + + +let clobbers (quad:Il.quad) : Il.hreg list = + match quad.Il.quad_body with + Il.Binary bin -> + begin + match bin.Il.binary_op with + Il.IMUL | Il.UMUL + | Il.IDIV | Il.UDIV -> [ edx ] + | Il.IMOD | Il.UMOD -> [ edx ] + | _ -> [] + end + | Il.Unary un -> + begin + match un.Il.unary_op with + Il.ZERO -> [ eax; edi; ecx ] + | _ -> [ ] + end + | Il.Call _ -> [ eax; ecx; edx; ] + | Il.Regfence -> [ eax; ecx; ebx; edx; edi; esi; ] + | _ -> [] +;; + + +let word_sz = 4L +;; + +let word_bits = Il.Bits32 +;; + +let word_ty = TY_u32 +;; + +let annotate (e:Il.emitter) (str:string) = + Hashtbl.add e.Il.emit_annotations e.Il.emit_pc str +;; + +let c (c:Il.cell) : Il.operand = Il.Cell c ;; +let r (r:Il.reg) : Il.cell = Il.Reg ( r, (Il.ValTy word_bits) ) ;; +let h (x:Il.hreg) : Il.reg = Il.Hreg x ;; +let rc (x:Il.hreg) : Il.cell = r (h x) ;; +let ro (x:Il.hreg) : Il.operand = c (rc x) ;; +let vreg (e:Il.emitter) : (Il.reg * Il.cell) = + let vr = Il.next_vreg e in + (vr, (Il.Reg (vr, (Il.ValTy word_bits)))) +;; +let imm (x:Asm.expr64) : Il.operand = + Il.Imm (x, word_ty) +;; +let immi (x:int64) : Il.operand = + imm (Asm.IMM x) +;; + +let imm_byte (x:Asm.expr64) : Il.operand = + Il.Imm (x, TY_u8) +;; +let immi_byte (x:int64) : Il.operand = + imm_byte (Asm.IMM x) +;; + + +let byte_off_n (i:int) : Asm.expr64 = + Asm.IMM (Int64.of_int i) +;; + +let byte_n (reg:Il.reg) (i:int) : Il.cell = + let imm = byte_off_n i in + let mem = Il.RegIn (reg, Some imm) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8)) +;; + +let word_off_n (i:int) : Asm.expr64 = + Asm.IMM (Int64.mul (Int64.of_int i) word_sz) +;; + +let word_at (reg:Il.reg) : Il.cell = + let mem = Il.RegIn (reg, None) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits)) +;; + +let word_at_off (reg:Il.reg) (off:Asm.expr64) : Il.cell = + let mem = Il.RegIn (reg, Some off) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits)) +;; + +let word_n (reg:Il.reg) (i:int) : Il.cell = + word_at_off reg (word_off_n i) +;; + +let reg_codeptr (reg:Il.reg) : Il.code = + Il.CodePtr (Il.Cell (Il.Reg (reg, Il.AddrTy Il.CodeTy))) +;; + +let word_n_low_byte (reg:Il.reg) (i:int) : Il.cell = + let imm = word_off_n i in + let mem = Il.RegIn (reg, Some imm) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8)) +;; + +let wordptr_n (reg:Il.reg) (i:int) : Il.cell = + let imm = word_off_n i in + let mem = Il.RegIn (reg, Some imm) in + Il.Mem (mem, Il.ScalarTy (Il.AddrTy (Il.ScalarTy (Il.ValTy word_bits)))) +;; + +let get_element_ptr = Il.get_element_ptr word_bits reg_str ;; + +let save_callee_saves (e:Il.emitter) : unit = + Il.emit e (Il.Push (ro ebp)); + Il.emit e (Il.Push (ro edi)); + Il.emit e (Il.Push (ro esi)); + Il.emit e (Il.Push (ro ebx)); +;; + + +let restore_callee_saves (e:Il.emitter) : unit = + Il.emit e (Il.Pop (rc ebx)); + Il.emit e (Il.Pop (rc esi)); + Il.emit e (Il.Pop (rc edi)); + Il.emit e (Il.Pop (rc ebp)); +;; + + +(* restores registers from the frame base without updating esp: + * - sets ebp, edi, esi, ebx to stored values from frame base + * - sets `retpc' register to stored retpc from frame base + * - sets `base' register to current fp + *) +let restore_frame_base (e:Il.emitter) (base:Il.reg) (retpc:Il.reg) : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + mov (r base) (ro ebp); + mov (rc ebx) (c (word_at base)); + mov (rc esi) (c (word_n base 1)); + mov (rc edi) (c (word_n base 2)); + mov (rc ebp) (c (word_n base 3)); + mov (r retpc) (c (word_n base 4)); +;; + + +(* + * Our arrangement on x86 is this: + * + * *ebp+20+(4*N) = [argN ] + * ... + * *ebp+24 = [arg1 ] = task ptr + * *ebp+20 = [arg0 ] = out ptr + * *ebp+16 = [retpc ] + * *ebp+12 = [old_ebp] + * *ebp+8 = [old_edi] + * *ebp+4 = [old_esi] + * *ebp = [old_ebx] + * + * For x86-cdecl: + * + * %eax, %ecx, %edx are "caller save" registers + * %ebp, %ebx, %esi, %edi are "callee save" registers + * + *) + +let frame_base_words = 5 (* eip,ebp,edi,esi,ebx *) ;; +let frame_base_sz = Int64.mul (Int64.of_int frame_base_words) word_sz;; + +let frame_info_words = 2 (* crate ptr, crate-rel frame info disp *) ;; +let frame_info_sz = Int64.mul (Int64.of_int frame_info_words) word_sz;; + +let implicit_arg_words = 2 (* task ptr,out ptr *);; +let implicit_args_sz = Int64.mul (Int64.of_int implicit_arg_words) word_sz;; + +let out_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words);; +let task_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words+1);; +let ty_param_n i = + wordptr_n (Il.Hreg ebp) (frame_base_words + implicit_arg_words + i);; + +let spill_slot (i:Il.spill) : Il.mem = + let imm = (Asm.IMM + (Int64.neg + (Int64.add frame_info_sz + (Int64.mul word_sz + (Int64.of_int (i+1)))))) + in + Il.RegIn ((Il.Hreg ebp), Some imm) +;; + + +let get_next_pc_thunk_fixup = new_fixup "glue$get_next_pc" +;; + +let emit_get_next_pc_thunk (e:Il.emitter) : unit = + let sty = Il.AddrTy Il.CodeTy in + let rty = Il.ScalarTy sty in + let deref_esp = Il.Mem (Il.RegIn (Il.Hreg esp, None), rty) in + let eax = (Il.Reg (Il.Hreg eax, sty)) in + Il.emit_full e (Some get_next_pc_thunk_fixup) [] + (Il.umov eax (Il.Cell deref_esp)); + Il.emit e Il.Ret; +;; + +let get_next_pc_thunk : (Il.reg * fixup * (Il.emitter -> unit)) = + (Il.Hreg eax, get_next_pc_thunk_fixup, emit_get_next_pc_thunk) +;; + +let emit_c_call + (e:Il.emitter) + (ret:Il.cell) + (tmp1:Il.reg) + (tmp2:Il.reg) + (nabi:nabi) + (in_prologue:bool) + (fptr:Il.code) + (args:Il.operand array) + : unit = + + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in + + (* rust calls get task as arg0 *) + let args = + if nabi.nabi_convention = CONV_rust + then Array.append [| c task_ptr |] args + else args + in + let nargs = Array.length args in + let arg_sz = Int64.mul (Int64.of_int nargs) word_sz + in + + mov (r tmp1) (c task_ptr); (* tmp1 = task from argv[-1] *) + mov (r tmp2) (ro esp); (* tmp2 = esp *) + mov (* task->rust_sp = tmp2 *) + (word_n tmp1 Abi.task_field_rust_sp) + (c (r tmp2)); + mov (* esp = task->runtime_sp *) + (rc esp) + (c (word_n tmp1 Abi.task_field_runtime_sp)); + + binary Il.SUB (rc esp) arg_sz; (* make room on the stack *) + binary Il.AND (rc esp) (* and 16-byte align sp *) + 0xfffffffffffffff0L; + + Array.iteri + begin + fun i (arg:Il.operand) -> (* write args to C stack *) + match arg with + Il.Cell (Il.Mem (a, ty)) -> + begin + match a with + Il.RegIn (Il.Hreg base, off) when base == esp -> + mov (r tmp1) (c (Il.Mem (Il.RegIn (tmp2, off), ty))); + mov (word_n (h esp) i) (c (r tmp1)); + | _ -> + mov (r tmp1) arg; + mov (word_n (h esp) i) (c (r tmp1)); + end + | _ -> + mov (word_n (h esp) i) arg + end + args; + + match ret with + Il.Mem (Il.RegIn (Il.Hreg base, _), _) when base == esp -> + assert (not in_prologue); + + (* If ret is esp-relative, use a temporary register until we + switched stacks. *) + + emit (Il.call (r tmp1) fptr); + mov (r tmp2) (c task_ptr); + mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp)); + mov ret (c (r tmp1)); + + | _ when in_prologue -> + (* + * We have to do something a little surprising here: + * we're doing a 'grow' call so ebp is going to point + * into a dead stack frame on call-return. So we + * temporarily store task-ptr into ebp and then reload + * esp *and* ebp via ebp->rust_sp on the other side of + * the call. + *) + mov (rc ebp) (c task_ptr); + emit (Il.call ret fptr); + mov (rc esp) (c (word_n (h ebp) Abi.task_field_rust_sp)); + mov (rc ebp) (ro esp); + + | _ -> + emit (Il.call ret fptr); + mov (r tmp2) (c task_ptr); + mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp)); +;; + +let emit_void_prologue_call + (e:Il.emitter) + (nabi:nabi) + (fn:fixup) + (args:Il.operand array) + : unit = + let callee = Abi.load_fixup_codeptr e (h eax) fn true nabi.nabi_indirect in + emit_c_call e (rc eax) (h edx) (h ecx) nabi true callee args +;; + +let emit_native_call + (e:Il.emitter) + (ret:Il.cell) + (nabi:nabi) + (fn:fixup) + (args:Il.operand array) + : unit = + + let (tmp1, _) = vreg e in + let (tmp2, _) = vreg e in + let (freg, _) = vreg e in + let callee = Abi.load_fixup_codeptr e freg fn true nabi.nabi_indirect in + emit_c_call e ret tmp1 tmp2 nabi false callee args +;; + +let emit_native_void_call + (e:Il.emitter) + (nabi:nabi) + (fn:fixup) + (args:Il.operand array) + : unit = + + let (ret, _) = vreg e in + emit_native_call e (r ret) nabi fn args +;; + +let emit_native_call_in_thunk + (e:Il.emitter) + (ret:Il.cell) + (nabi:nabi) + (fn:Il.operand) + (args:Il.operand array) + : unit = + + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + + begin + match fn with + (* + * NB: old path, remove when/if you're sure you don't + * want native-linker-symbol-driven requirements. + *) + Il.ImmPtr (fix, _) -> + let code = + Abi.load_fixup_codeptr e (h eax) fix true nabi.nabi_indirect + in + emit_c_call e (rc eax) (h edx) (h ecx) nabi false code args; + + | _ -> + (* + * NB: new path, ignores nabi_indirect, assumes + * indirect via pointer from upcall_require_c_sym + * or crate cache. + *) + mov (rc eax) fn; + let cell = Il.Reg (h eax, Il.AddrTy Il.CodeTy) in + let fptr = Il.CodePtr (Il.Cell cell) in + emit_c_call e (rc eax) (h edx) (h ecx) nabi false fptr args; + end; + + match ret with + Il.Reg (r, _) -> mov (word_at r) (ro eax) + | _ -> mov (rc edx) (c ret); + mov (word_at (h edx)) (ro eax) +;; + +let unwind_glue + (e:Il.emitter) + (nabi:nabi) + (exit_task_fixup:fixup) + : unit = + + let fp_n = word_n (Il.Hreg ebp) in + let edx_n = word_n (Il.Hreg edx) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let push x = emit (Il.Push x) in + let pop x = emit (Il.Pop x) in + let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in + let codefix fix = Il.CodePtr (Il.ImmPtr (fix, Il.CodeTy)) in + let mark fix = Il.emit_full e (Some fix) [] Il.Dead in + let glue_field = Abi.frame_glue_fns_field_drop in + + let repeat_jmp_fix = new_fixup "repeat jump" in + let skip_jmp_fix = new_fixup "skip jump" in + let exit_jmp_fix = new_fixup "exit jump" in + + mov (rc edx) (c task_ptr); (* switch back to rust stack *) + mov + (rc esp) + (c (edx_n Abi.task_field_rust_sp)); + + mark repeat_jmp_fix; + + mov (rc esi) (c (fp_n (-1))); (* esi <- crate ptr *) + mov (rc edx) (c (fp_n (-2))); (* edx <- frame glue functions. *) + emit (Il.cmp (ro edx) (immi 0L)); + + emit + (Il.jmp Il.JE + (codefix skip_jmp_fix)); (* if struct* is nonzero *) + add edx esi; (* add crate ptr to disp. *) + mov + (rc ecx) + (c (edx_n glue_field)); (* ecx <- drop glue *) + emit (Il.cmp (ro ecx) (immi 0L)); + + emit + (Il.jmp Il.JE + (codefix skip_jmp_fix)); (* if glue-fn is nonzero *) + add ecx esi; (* add crate ptr to disp. *) + push (ro ebp); (* frame-to-drop *) + push (c task_ptr); (* form usual call to glue *) + push (immi 0L); (* outptr *) + emit (Il.call (rc eax) + (reg_codeptr (h ecx))); (* call glue_fn, trashing eax. *) + pop (rc eax); + pop (rc eax); + pop (rc eax); + + mark skip_jmp_fix; + mov (rc edx) (c (fp_n 3)); (* load next fp (callee-saves[3]) *) + emit (Il.cmp (ro edx) (immi 0L)); + emit (Il.jmp Il.JE + (codefix exit_jmp_fix)); (* if nonzero *) + mov (rc ebp) (ro edx); (* move to next frame *) + emit (Il.jmp Il.JMP + (codefix repeat_jmp_fix)); (* loop *) + + (* exit path. *) + mark exit_jmp_fix; + + let callee = + Abi.load_fixup_codeptr + e (h eax) exit_task_fixup false nabi.nabi_indirect + in + emit_c_call + e (rc eax) (h edx) (h ecx) nabi false callee [| (c task_ptr) |]; +;; + +(* Puts result in eax; clobbers ecx, edx in the process. *) +let rec calculate_sz (e:Il.emitter) (size:size) : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let push x = emit (Il.Push x) in + let pop x = emit (Il.Pop x) in + let neg x = emit (Il.unary Il.NEG (rc x) (ro x)) in + let bnot x = emit (Il.unary Il.NOT (rc x) (ro x)) in + let band x y = emit (Il.binary Il.AND (rc x) (ro x) (ro y)) in + let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in + let mul x y = emit (Il.binary Il.UMUL (rc x) (ro x) (ro y)) in + let subi x y = emit (Il.binary Il.SUB (rc x) (ro x) (immi y)) in + let eax_gets_a_and_ecx_gets_b a b = + calculate_sz e b; + push (ro eax); + calculate_sz e a; + pop (rc ecx); + in + match size with + SIZE_fixed i -> + mov (rc eax) (immi i) + + | SIZE_fixup_mem_sz f -> + mov (rc eax) (imm (Asm.M_SZ f)) + + | SIZE_fixup_mem_pos f -> + mov (rc eax) (imm (Asm.M_POS f)) + + | SIZE_param_size i -> + mov (rc eax) (Il.Cell (ty_param_n i)); + mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_size)) + + | SIZE_param_align i -> + mov (rc eax) (Il.Cell (ty_param_n i)); + mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_align)) + + | SIZE_rt_neg a -> + calculate_sz e a; + neg eax + + | SIZE_rt_add (a, b) -> + eax_gets_a_and_ecx_gets_b a b; + add eax ecx + + | SIZE_rt_mul (a, b) -> + eax_gets_a_and_ecx_gets_b a b; + mul eax ecx + + | SIZE_rt_max (a, b) -> + eax_gets_a_and_ecx_gets_b a b; + emit (Il.cmp (ro eax) (ro ecx)); + let jmp_pc = e.Il.emit_pc in + emit (Il.jmp Il.JAE Il.CodeNone); + mov (rc eax) (ro ecx); + Il.patch_jump e jmp_pc e.Il.emit_pc; + + | SIZE_rt_align (align, off) -> + (* + * calculate off + pad where: + * + * pad = (align - (off mod align)) mod align + * + * In our case it's always a power of two, + * so we can just do: + * + * mask = align-1 + * off += mask + * off &= ~mask + * + *) + eax_gets_a_and_ecx_gets_b off align; + subi ecx 1L; + add eax ecx; + bnot ecx; + band eax ecx; +;; + +let rec size_calculation_stack_highwater (size:size) : int = + match size with + SIZE_fixed _ + | SIZE_fixup_mem_sz _ + | SIZE_fixup_mem_pos _ + | SIZE_param_size _ + | SIZE_param_align _ -> 0 + | SIZE_rt_neg a -> + (size_calculation_stack_highwater a) + | SIZE_rt_max (a, b) -> + (size_calculation_stack_highwater a) + + (size_calculation_stack_highwater b) + | SIZE_rt_add (a, b) + | SIZE_rt_mul (a, b) + | SIZE_rt_align (a, b) -> + (size_calculation_stack_highwater a) + + (size_calculation_stack_highwater b) + + 1 +;; + +let boundary_sz = + (Asm.IMM + (Int64.add (* Extra non-frame room: *) + frame_base_sz (* to safely enter the next frame, *) + frame_base_sz)) (* and make a 'grow' upcall there. *) +;; + +let stack_growth_check + (e:Il.emitter) + (nabi:nabi) + (grow_task_fixup:fixup) + (growsz:Il.operand) + (grow_jmp:Il.label option) + (restart_pc:Il.label) + (end_reg:Il.reg) (* + * stack limit on entry, + * new stack pointer on exit + *) + (tmp_reg:Il.reg) (* temporary (trashed) *) + : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in + let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in + mov (r tmp_reg) (ro esp); (* tmp = esp *) + sub (r tmp_reg) growsz; (* tmp -= size-request *) + emit (Il.cmp (c (r end_reg)) (c (r tmp_reg))); + (* + * Jump *over* 'grow' upcall on non-underflow: + * if end_reg <= tmp_reg + *) + + let bypass_grow_upcall_jmp_pc = e.Il.emit_pc in + emit (Il.jmp Il.JBE Il.CodeNone); + + begin + match grow_jmp with + None -> () + | Some j -> Il.patch_jump e j e.Il.emit_pc + end; + (* Extract growth-amount from tmp_reg. *) + mov (r end_reg) (ro esp); + sub (r end_reg) (c (r tmp_reg)); + add (r end_reg) (Il.Imm (boundary_sz, word_ty)); + (* Perform 'grow' upcall, then restart frame-entry. *) + emit_void_prologue_call e nabi grow_task_fixup [| c (r end_reg) |]; + emit (Il.jmp Il.JMP (Il.CodeLabel restart_pc)); + Il.patch_jump e bypass_grow_upcall_jmp_pc e.Il.emit_pc +;; + +let fn_prologue + (e:Il.emitter) + (framesz:size) + (callsz:size) + (nabi:nabi) + (grow_task_fixup:fixup) + : unit = + + let esi_n = word_n (h esi) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in + let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in + + (* We may be in a dynamic-sized frame. This makes matters complex, + * as we can't just perform a simple growth check in terms of a + * static size. The check is against a dynamic size, and we need to + * calculate that size. + * + * Unlike size-calculations in 'trans', we do not use vregs to + * calculate the frame size; instead we use a PUSH/POP stack-machine + * translation that doesn't disturb the registers we're + * somewhat-carefully *using* during frame setup. + * + * This only pushes the problem back a little ways though: we still + * need to be sure we have enough room to do the PUSH/POP + * calculation. We refer to this amount of space as the 'primordial' + * frame size, which can *thankfully* be calculated exactly from the + * arithmetic expression we're aiming to calculate. So we make room + * for the primordial frame, run the calculation of the full dynamic + * frame size, then make room *again* for this dynamic size. + * + * Our caller reserved enough room for us to push our own frame-base, + * as well as the frame-base that it will cost to do an upcall. + *) + + (* + * After we save callee-saves, We have a stack like this: + * + * | ... | + * | caller frame | + * | + spill | + * | caller arg K | + * | ... | + * | caller arg 0 | + * | retpc | <-- sp we received, top of callee frame + * | callee save 1 | + * | ... | + * | callee save N | <-- ebp and esp after saving callee-saves + * | ... | + * | callee frame | + * | + spill | + * | callee arg J | + * | ... | + * | callee arg 0 | <-- bottom of callee frame + * | next retpc | + * | next save 1 | + * | ... | + * | next save N | <-- bottom of region we must reserve + * | ... | + * + * A "frame base" is the retpc and set of callee-saves. + * + * We need to reserve room for our frame *and* the next frame-base, because + * we're going to be blindly entering the next frame-base (pushing eip and + * callee-saves) before we perform the next check. + *) + + (* + * We double the reserved callsz because we need a 'temporary tail-call + * region' above the actual call region, in case there's a drop call at the + * end of assembling the tail-call args and before copying them to callee + * position. + *) + + let callsz = add_sz callsz callsz in + let n_glue_args = Int64.of_int Abi.worst_case_glue_call_args in + let n_glue_words = Int64.mul word_sz n_glue_args in + + (* + * Add in *another* word to handle an extra-awkward spill of the + * callee address that might occur during an indirect tail call. + *) + let callsz = add_sz (SIZE_fixed word_sz) callsz in + + (* + * Add in enough words for a glue-call (these occur underneath esp) + *) + let callsz = add_sz (SIZE_fixed n_glue_words) callsz in + + (* + * Cumulative dynamic-frame size. + *) + let call_and_frame_sz = add_sz callsz framesz in + + (* Already have room to save regs on entry. *) + save_callee_saves e; + + let restart_pc = e.Il.emit_pc in + + mov (rc ebp) (ro esp); (* Establish frame base. *) + mov (rc esi) (c task_ptr); (* esi = task *) + mov + (rc esi) + (c (esi_n Abi.task_field_stk)); (* esi = task->stk *) + add (rc esi) (imm + (Asm.ADD + ((word_off_n Abi.stk_field_data), + boundary_sz))); + + let (dynamic_frame_sz, dynamic_grow_jmp) = + match Il.size_to_expr64 call_and_frame_sz with + None -> + begin + let primordial_frame_sz = + Asm.IMM + (Int64.mul word_sz + (Int64.of_int + (size_calculation_stack_highwater + call_and_frame_sz))) + in + (* Primordial size-check. *) + mov (rc edi) (ro esp); (* edi = esp *) + sub (* edi -= size-request *) + (rc edi) + (imm primordial_frame_sz); + emit (Il.cmp (ro esi) (ro edi)); + + (* Jump to 'grow' upcall on underflow: if esi (bottom) is > + edi (proposed-esp) *) + + let primordial_underflow_jmp_pc = e.Il.emit_pc in + emit (Il.jmp Il.JA Il.CodeNone); + + (* Calculate dynamic frame size. *) + calculate_sz e call_and_frame_sz; + ((ro eax), Some primordial_underflow_jmp_pc) + end + | Some e -> ((imm e), None) + in + + (* "Full" frame size-check. *) + stack_growth_check e nabi grow_task_fixup + dynamic_frame_sz dynamic_grow_jmp restart_pc (h esi) (h edi); + + + (* Establish a frame, wherever we landed. *) + sub (rc esp) dynamic_frame_sz; + + (* Zero the frame. + * + * FIXME: this is awful, will go away when we have proper CFI. + *) + + mov (rc edi) (ro esp); + mov (rc ecx) dynamic_frame_sz; + emit (Il.unary Il.ZERO (word_at (h edi)) (ro ecx)); + + (* Move esp back up over the glue region. *) + add (rc esp) (immi n_glue_words); +;; + + +let fn_epilogue (e:Il.emitter) : unit = + + (* Tear down existing frame. *) + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + mov (rc esp) (ro ebp); + restore_callee_saves e; + emit Il.Ret; +;; + +let inline_memcpy + (e:Il.emitter) + (n_bytes:int64) + (dst_ptr:Il.reg) + (src_ptr:Il.reg) + (tmp_reg:Il.reg) + (ascending:bool) + : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let bpw = Int64.to_int word_sz in + let w = Int64.to_int (Int64.div n_bytes word_sz) in + let b = Int64.to_int (Int64.rem n_bytes word_sz) in + if ascending + then + begin + for i = 0 to (w-1) do + mov (r tmp_reg) (c (word_n src_ptr i)); + mov (word_n dst_ptr i) (c (r tmp_reg)); + done; + for i = 0 to (b-1) do + let off = (w*bpw) + i in + mov (r tmp_reg) (c (byte_n src_ptr off)); + mov (byte_n dst_ptr off) (c (r tmp_reg)); + done; + end + else + begin + for i = (b-1) downto 0 do + let off = (w*bpw) + i in + mov (r tmp_reg) (c (byte_n src_ptr off)); + mov (byte_n dst_ptr off) (c (r tmp_reg)); + done; + for i = (w-1) downto 0 do + mov (r tmp_reg) (c (word_n src_ptr i)); + mov (word_n dst_ptr i) (c (r tmp_reg)); + done; + end +;; + + + +let fn_tail_call + (e:Il.emitter) + (caller_callsz:int64) + (caller_argsz:int64) + (callee_code:Il.code) + (callee_argsz:int64) + : unit = + let emit = Il.emit e in + let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in + let mov dst src = emit (Il.umov dst src) in + let argsz_diff = Int64.sub caller_argsz callee_argsz in + let callee_spill_cell = word_at_off (h esp) (Asm.IMM caller_callsz) in + + (* + * Our outgoing arguments were prepared in a region above the call region; + * this is reserved for the purpose of making tail-calls *only*, so we do + * not collide with glue calls we had to make while dropping the frame, + * after assembling our arg region. + * + * Thus, esp points to the "normal" arg region, and we need to move it + * to point to the tail-call arg region. To make matters simple, both + * regions are the same size, one atop the other. + *) + + annotate e "tail call: move esp to temporary tail call arg-prep area"; + binary Il.ADD (rc esp) caller_callsz; + + (* + * If we're given a non-ImmPtr callee, we may need to move it to a known + * cell to avoid clobbering its register while we do the argument shuffle + * below. + * + * Sadly, we are too register-starved to just flush our callee to a reg; + * so we carve out an extra word of the temporary call-region and use + * it. + * + * This is ridiculous, but works. + *) + begin + match callee_code with + Il.CodePtr (Il.Cell c) -> + annotate e "tail call: spill callee-ptr to temporary memory"; + mov callee_spill_cell (Il.Cell c); + + | _ -> () + end; + + (* edx <- ebp; restore ebp, edi, esi, ebx; ecx <- retpc *) + annotate e "tail call: restore callee-saves from frame base"; + restore_frame_base e (h edx) (h ecx); + (* move edx past frame base and adjust for difference in call sizes *) + annotate e "tail call: adjust temporary fp"; + binary Il.ADD (rc edx) (Int64.add frame_base_sz argsz_diff); + + (* + * stack grows downwards; copy from high to low + * + * bpw = word_sz + * w = floor(callee_argsz / word_sz) + * b = callee_argsz % word_sz + * + * byte copies: + * +------------------------+ + * | | + * +------------------------+ <-- base + (w * word_sz) + (b - 1) + * . . + * +------------------------+ + * | | + * +------------------------+ <-- base + (w * word_sz) + (b - b) + * word copies: = + * +------------------------+ <-- base + ((w-0) * word_sz) + * | bytes | + * | (w-1)*bpw..w*bpw-1 | + * +------------------------+ <-- base + ((w-1) * word_sz) + * | bytes | + * | (w-2)*bpw..(w-1)*bpw-1 | + * +------------------------+ <-- base + ((w-2) * word_sz) + * . . + * . . + * . . + * +------------------------+ + * | bytes | + * | 0..bpw - 1 | + * +------------------------+ <-- base + ((w-w) * word_sz) + *) + + annotate e "tail call: move arg-tuple up to top of frame"; + (* NOTE: must copy top-to-bottom in case the regions overlap *) + inline_memcpy e callee_argsz (h edx) (h esp) (h eax) false; + + (* + * We're done with eax now; so in the case where we had to spill + * our callee codeptr, we can reload it into eax here and rewrite + * our callee into *eax. + *) + let callee_code = + match callee_code with + Il.CodePtr (Il.Cell _) -> + annotate e "tail call: reload callee-ptr from temporary memory"; + mov (rc eax) (Il.Cell callee_spill_cell); + reg_codeptr (h eax) + + | _ -> callee_code + in + + + (* esp <- edx *) + annotate e "tail call: adjust stack pointer"; + mov (rc esp) (ro edx); + (* PUSH ecx (retpc) *) + annotate e "tail call: push retpc"; + emit (Il.Push (ro ecx)); + (* JMP callee_code *) + emit (Il.jmp Il.JMP callee_code); +;; + + +let loop_info_field_retpc = 0;; +let loop_info_field_sp = 1;; +let loop_info_field_fp = 2;; + +let self_args_cell (self_args_rty:Il.referent_ty) : Il.cell = + Il.Mem (Il.RegIn (h ebp, Some (Asm.IMM frame_base_sz)), self_args_rty) +;; + +let activate_glue (e:Il.emitter) : unit = + (* + * This is a bit of glue-code. It should be emitted once per + * compilation unit. + * + * - save regs on C stack + * - align sp on a 16-byte boundary + * - save sp to task.runtime_sp (runtime_sp is thus always aligned) + * - load saved task sp (switch stack) + * - restore saved task regs + * - return to saved task pc + * + * Our incoming stack looks like this: + * + * *esp+4 = [arg1 ] = task ptr + * *esp = [retpc ] + *) + + let sp_n = word_n (Il.Hreg esp) in + let edx_n = word_n (Il.Hreg edx) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in + + mov (rc edx) (c (sp_n 1)); (* edx <- task *) + save_callee_saves e; + mov + (edx_n Abi.task_field_runtime_sp) + (ro esp); (* task->runtime_sp <- esp *) + mov + (rc esp) + (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *) + + (* + * There are two paths we can arrive at this code from: + * + * + * 1. We are activating a task for the first time. When we switch into + * the task stack and 'ret' to its first instruction, we'll start + * doing whatever the first instruction says. Probably saving + * registers and starting to establish a frame. Harmless stuff, + * doesn't look at task->rust_sp again except when it clobbers it + * during a later upcall. + * + * + * 2. We are resuming a task that was descheduled by the yield glue + * below. When we switch into the task stack and 'ret', we'll be + * ret'ing to a very particular instruction: + * + * "esp <- task->rust_sp" + * + * this is the first instruction we 'ret' to after this glue, because + * it is the first instruction following *any* upcall, and the task + * we are activating was descheduled mid-upcall. + * + * Unfortunately for us, we have already restored esp from + * task->rust_sp and are about to eat the 5 words off the top of it. + * + * + * | ... | <-- where esp will be once we restore + ret, below, + * | retpc | and where we'd *like* task->rust_sp to wind up. + * | ebp | + * | edi | + * | esi | + * | ebx | <-- current task->rust_sp == current esp + * + * + * This is a problem. If we return to "esp <- task->rust_sp" it will + * push esp back down by 5 words. This manifests as a rust stack that + * grows by 5 words on each yield/reactivate. Not good. + * + * So what we do here is just adjust task->rust_sp up 5 words as + * well, to mirror the movement in esp we're about to perform. That + * way the "esp <- task->rust_sp" we 'ret' to below will be a + * no-op. Esp won't move, and the task's stack won't grow. + *) + + binary Il.ADD (edx_n Abi.task_field_rust_sp) + (Int64.mul (Int64.of_int (n_callee_saves + 1)) word_sz); + + (**** IN TASK STACK ****) + restore_callee_saves e; + emit Il.Ret; + (***********************) + () +;; + +let yield_glue (e:Il.emitter) : unit = + + (* More glue code, this time the 'bottom half' of yielding. + * + * We arrived here because an upcall decided to deschedule the + * running task. So the upcall's return address got patched to the + * first instruction of this glue code. + * + * When the upcall does 'ret' it will come here, and its esp will be + * pointing to the last argument pushed on the C stack before making + * the upcall: the 0th argument to the upcall, which is always the + * task ptr performing the upcall. That's where we take over. + * + * Our goal is to complete the descheduling + * + * - Switch over to the task stack temporarily. + * + * - Save the task's callee-saves onto the task stack. + * (the task is now 'descheduled', safe to set aside) + * + * - Switch *back* to the C stack. + * + * - Restore the C-stack callee-saves. + * + * - Return to the caller on the C stack that activated the task. + * + *) + let esp_n = word_n (Il.Hreg esp) in + let edx_n = word_n (Il.Hreg edx) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + + mov + (rc edx) (c (esp_n 0)); (* edx <- arg0 (task) *) + mov + (rc esp) + (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *) + save_callee_saves e; + mov (* task->rust_sp <- esp *) + (edx_n Abi.task_field_rust_sp) + (ro esp); + mov + (rc esp) + (c (edx_n Abi.task_field_runtime_sp)); (* esp <- task->runtime_sp *) + + (**** IN C STACK ****) + restore_callee_saves e; + emit Il.Ret; + (***********************) + () +;; + + +let push_pos32 (e:Il.emitter) (fix:fixup) : unit = + let (reg, _, _) = get_next_pc_thunk in + Abi.load_fixup_addr e reg fix Il.OpaqueTy; + Il.emit e (Il.Push (Il.Cell (Il.Reg (reg, Il.AddrTy Il.OpaqueTy)))) +;; + +let objfile_start + (e:Il.emitter) + ~(start_fixup:fixup) + ~(rust_start_fixup:fixup) + ~(main_fn_fixup:fixup) + ~(crate_fixup:fixup) + ~(indirect_start:bool) + : unit = + let ebp_n = word_n (Il.Hreg ebp) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let push_pos32 = push_pos32 e in + Il.emit_full e (Some start_fixup) [] Il.Dead; + save_callee_saves e; + mov (rc ebp) (ro esp); + + (* If we're very lucky, the platform will have left us with + * something sensible in the startup stack like so: + * + * *ebp+24 = [arg1 ] = argv + * *ebp+20 = [arg0 ] = argc + * *ebp+16 = [retpc ] + * *ebp+12 = [old_ebp] + * *ebp+8 = [old_edi] + * *ebp+4 = [old_esi] + * *ebp = [old_ebx] + * + * This is not the case everywhere, but we start with this + * assumption and correct it in the runtime library. + *) + + (* Copy argv. *) + mov (rc eax) (c (ebp_n (2 + n_callee_saves))); + Il.emit e (Il.Push (ro eax)); + + (* Copy argc. *) + mov (rc eax) (c (ebp_n (1 + n_callee_saves))); + Il.emit e (Il.Push (ro eax)); + + push_pos32 crate_fixup; + push_pos32 main_fn_fixup; + let fptr = + Abi.load_fixup_codeptr e (h eax) rust_start_fixup true indirect_start + in + Il.emit e (Il.call (rc eax) fptr); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.umov (rc esp) (ro ebp)); + restore_callee_saves e; + Il.emit e Il.Ret; +;; + +let (abi:Abi.abi) = + { + Abi.abi_word_sz = word_sz; + Abi.abi_word_bits = word_bits; + Abi.abi_word_ty = word_ty; + + Abi.abi_is_2addr_machine = true; + Abi.abi_has_pcrel_data = false; + Abi.abi_has_pcrel_code = true; + + Abi.abi_n_hardregs = n_hardregs; + Abi.abi_str_of_hardreg = reg_str; + Abi.abi_prealloc_quad = prealloc_quad; + Abi.abi_constrain_vregs = constrain_vregs; + + Abi.abi_emit_fn_prologue = fn_prologue; + Abi.abi_emit_fn_epilogue = fn_epilogue; + Abi.abi_emit_fn_tail_call = fn_tail_call; + Abi.abi_clobbers = clobbers; + + Abi.abi_emit_native_call = emit_native_call; + Abi.abi_emit_native_void_call = emit_native_void_call; + Abi.abi_emit_native_call_in_thunk = emit_native_call_in_thunk; + Abi.abi_emit_inline_memcpy = inline_memcpy; + + Abi.abi_activate = activate_glue; + Abi.abi_yield = yield_glue; + Abi.abi_unwind = unwind_glue; + Abi.abi_get_next_pc_thunk = Some get_next_pc_thunk; + + Abi.abi_sp_reg = (Il.Hreg esp); + Abi.abi_fp_reg = (Il.Hreg ebp); + Abi.abi_dwarf_fp_reg = dwarf_ebp; + Abi.abi_tp_cell = task_ptr; + Abi.abi_frame_base_sz = frame_base_sz; + Abi.abi_frame_info_sz = frame_info_sz; + Abi.abi_implicit_args_sz = implicit_args_sz; + Abi.abi_spill_slot = spill_slot; + } + + +(* + * NB: factor the instruction selector often. There's lots of + * semi-redundancy in the ISA. + *) + + +let imm_is_signed_byte (n:int64) : bool = + (i64_le (-128L) n) && (i64_le n 127L) +;; + +let imm_is_unsigned_byte (n:int64) : bool = + (i64_le (0L) n) && (i64_le n 255L) +;; + + +let rm_r (c:Il.cell) (r:int) : Asm.frag = + let reg_ebp = 6 in + let reg_esp = 7 in + + (* + * We do a little contortion here to accommodate the special case of + * being asked to form esp-relative addresses; these require SIB + * bytes on x86. Of course! + *) + let sib_esp_base = Asm.BYTE 0x24 in + let seq1 rm modrm = + if rm = reg_esp + then Asm.SEQ [| modrm; sib_esp_base |] + else modrm + in + let seq2 rm modrm disp = + if rm = reg_esp + then Asm.SEQ [| modrm; sib_esp_base; disp |] + else Asm.SEQ [| modrm; disp |] + in + + match c with + Il.Reg ((Il.Hreg rm), _) -> + Asm.BYTE (modrm_reg (reg rm) r) + | Il.Mem (a, _) -> + begin + match a with + Il.Abs disp -> + Asm.SEQ [| Asm.BYTE (modrm_deref_disp32 r); + Asm.WORD (TY_i32, disp) |] + + | Il.RegIn ((Il.Hreg rm), None) when rm != reg_ebp -> + seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r)) + + | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM 0L)) + when rm != reg_ebp -> + seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r)) + + (* The next two are just to save the relaxation system some + * churn. + *) + + | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n)) + when imm_is_signed_byte n -> + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r)) + (Asm.WORD (TY_i8, Asm.IMM n)) + + | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n)) -> + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r)) + (Asm.WORD (TY_i32, Asm.IMM n)) + + | Il.RegIn ((Il.Hreg rm), Some disp) -> + Asm.new_relaxation + [| + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r)) + (Asm.WORD (TY_i32, disp)); + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r)) + (Asm.WORD (TY_i8, disp)) + |] + | _ -> raise Unrecognized + end + | _ -> raise Unrecognized +;; + + +let insn_rm_r (op:int) (c:Il.cell) (r:int) : Asm.frag = + Asm.SEQ [| Asm.BYTE op; rm_r c r |] +;; + + +let insn_rm_r_imm + (op:int) + (c:Il.cell) + (r:int) + (ty:ty_mach) + (i:Asm.expr64) + : Asm.frag = + Asm.SEQ [| Asm.BYTE op; rm_r c r; Asm.WORD (ty, i) |] +;; + +let insn_rm_r_imm_s8_s32 + (op8:int) + (op32:int) + (c:Il.cell) + (r:int) + (i:Asm.expr64) + : Asm.frag = + match i with + Asm.IMM n when imm_is_signed_byte n -> + insn_rm_r_imm op8 c r TY_i8 i + | _ -> + Asm.new_relaxation + [| + insn_rm_r_imm op32 c r TY_i32 i; + insn_rm_r_imm op8 c r TY_i8 i + |] +;; + +let insn_rm_r_imm_u8_u32 + (op8:int) + (op32:int) + (c:Il.cell) + (r:int) + (i:Asm.expr64) + : Asm.frag = + match i with + Asm.IMM n when imm_is_unsigned_byte n -> + insn_rm_r_imm op8 c r TY_u8 i + | _ -> + Asm.new_relaxation + [| + insn_rm_r_imm op32 c r TY_u32 i; + insn_rm_r_imm op8 c r TY_u8 i + |] +;; + + +let insn_pcrel_relax + (op8_frag:Asm.frag) + (op32_frag:Asm.frag) + (fix:fixup) + : Asm.frag = + let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in + let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in + let pcrel_expr = (Asm.SUB (Asm.M_POS fix, + Asm.M_POS pcrel_mark_fixup)) + in + Asm.new_relaxation + [| + Asm.SEQ [| op32_frag; Asm.WORD (TY_i32, pcrel_expr); def |]; + Asm.SEQ [| op8_frag; Asm.WORD (TY_i8, pcrel_expr); def |]; + |] +;; + +let insn_pcrel_simple (op32:int) (fix:fixup) : Asm.frag = + let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in + let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in + let pcrel_expr = (Asm.SUB (Asm.M_POS fix, + Asm.M_POS pcrel_mark_fixup)) + in + Asm.SEQ [| Asm.BYTE op32; Asm.WORD (TY_i32, pcrel_expr); def |] +;; + +let insn_pcrel (op8:int) (op32:int) (fix:fixup) : Asm.frag = + insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTE op32) fix +;; + +let insn_pcrel_prefix32 + (op8:int) + (prefix32:int) + (op32:int) + (fix:fixup) + : Asm.frag = + insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTES [| prefix32; op32 |]) fix +;; + +(* FIXME: tighten imm-based dispatch by imm type. *) +let cmp (a:Il.operand) (b:Il.operand) : Asm.frag = + match (a,b) with + (Il.Cell c, Il.Imm (i, TY_i8)) when is_rm8 c -> + insn_rm_r_imm 0x80 c slash7 TY_i8 i + | (Il.Cell c, Il.Imm (i, TY_u8)) when is_rm8 c -> + insn_rm_r_imm 0x80 c slash7 TY_u8 i + | (Il.Cell c, Il.Imm (i, _)) when is_rm32 c -> + (* + * NB: We can't switch on signed-ness here, as 'cmp' is + * defined to sign-extend its operand; i.e. we have to treat + * it as though you're emitting a signed byte (in the sense of + * immediate-size selection) even if the incoming value is + * unsigned. + *) + insn_rm_r_imm_s8_s32 0x83 0x81 c slash7 i + | (Il.Cell c, Il.Cell (Il.Reg (Il.Hreg r, _))) -> + insn_rm_r 0x39 c (reg r) + | (Il.Cell (Il.Reg (Il.Hreg r, _)), Il.Cell c) -> + insn_rm_r 0x3b c (reg r) + | _ -> raise Unrecognized +;; + +let zero (dst:Il.cell) (count:Il.operand) : Asm.frag = + match (dst, count) with + + ((Il.Mem (Il.RegIn ((Il.Hreg dst_ptr), None), _)), + Il.Cell (Il.Reg ((Il.Hreg count), _))) + when dst_ptr = edi && count = ecx -> + Asm.BYTES [| + 0xb0; 0x0; (* mov %eax, 0 : move a zero into al. *) + 0xf3; 0xaa; (* rep stos m8 : fill ecx bytes at [edi] with al *) + |] + + | _ -> raise Unrecognized +;; + +let mov (signed:bool) (dst:Il.cell) (src:Il.operand) : Asm.frag = + if is_ty8 (Il.cell_scalar_ty dst) || is_ty8 (Il.operand_scalar_ty src) + then + begin + (match dst with + Il.Reg (Il.Hreg r, _) + -> assert (is_ok_r8 r) | _ -> ()); + (match src with + Il.Cell (Il.Reg (Il.Hreg r, _)) + -> assert (is_ok_r8 r) | _ -> ()); + end; + + match (signed, dst, src) with + + (* m8 <- r??, r8 or truncate(r32). *) + (_, _, Il.Cell (Il.Reg ((Il.Hreg r), _))) + when is_m8 dst -> + insn_rm_r 0x88 dst (reg r) + + (* r8 <- r8: treat as r32 <- r32. *) + | (_, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell) + when is_r8 dst && is_r8 src_cell -> + insn_rm_r 0x8b src_cell (reg r) + + (* rm32 <- r32 *) + | (_, _, Il.Cell (Il.Reg ((Il.Hreg r), src_ty))) + when (is_r8 dst || is_rm32 dst) && is_ty32 src_ty -> + insn_rm_r 0x89 dst (reg r) + + (* r32 <- rm32 *) + | (_, (Il.Reg ((Il.Hreg r), dst_ty)), Il.Cell src_cell) + when is_ty32 dst_ty && is_rm32 src_cell -> + insn_rm_r 0x8b src_cell (reg r) + + (* MOVZX: r8/r32 <- zx(rm8) *) + | (false, Il.Reg ((Il.Hreg r, _)), Il.Cell src_cell) + when (is_r8 dst || is_r32 dst) && is_rm8 src_cell -> + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xb6 src_cell (reg r) |] + + (* MOVZX: m32 <- zx(r8) *) + | (false, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell))) + when (is_m32 dst) && is_r8 src_cell -> + (* Fake with 2 insns: + * + * movzx r32 <- r8; (in-place zero-extension) + * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL) + *) + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xb6 src_cell (reg r); + insn_rm_r 0x89 dst (reg r); + |] + + (* MOVSX: r8/r32 <- sx(rm8) *) + | (true, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell) + when (is_r8 dst || is_r32 dst) && is_rm8 src_cell -> + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xbe src_cell (reg r) |] + + (* MOVSX: m32 <- sx(r8) *) + | (true, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell))) + when (is_m32 dst) && is_r8 src_cell -> + (* Fake with 2 insns: + * + * movsx r32 <- r8; (in-place sign-extension) + * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL) + *) + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xbe src_cell (reg r); + insn_rm_r 0x89 dst (reg r); + |] + + (* m8 <- imm8 (signed) *) + | (_, _, Il.Imm ((Asm.IMM n), _)) + when is_m8 dst && imm_is_signed_byte n && signed -> + insn_rm_r_imm 0xc6 dst slash0 TY_i8 (Asm.IMM n) + + (* m8 <- imm8 (unsigned) *) + | (_, _, Il.Imm ((Asm.IMM n), _)) + when is_m8 dst && imm_is_unsigned_byte n && (not signed) -> + insn_rm_r_imm 0xc6 dst slash0 TY_u8 (Asm.IMM n) + + (* rm32 <- imm32 *) + | (_, _, Il.Imm (i, _)) when is_rm32 dst || is_r8 dst -> + let t = if signed then TY_u32 else TY_i32 in + insn_rm_r_imm 0xc7 dst slash0 t i + + | _ -> raise Unrecognized +;; + + +let lea (dst:Il.cell) (src:Il.operand) : Asm.frag = + match (dst, src) with + (Il.Reg ((Il.Hreg r), dst_ty), + Il.Cell (Il.Mem (mem, _))) + when is_ty32 dst_ty -> + insn_rm_r 0x8d (Il.Mem (mem, Il.OpaqueTy)) (reg r) + + | (Il.Reg ((Il.Hreg r), dst_ty), + Il.ImmPtr (fix, _)) + when is_ty32 dst_ty && r = eax -> + let anchor = new_fixup "anchor" in + let fix_off = Asm.SUB ((Asm.M_POS fix), + (Asm.M_POS anchor)) + in + (* NB: These instructions must come as a + * cluster, w/o any separation. + *) + Asm.SEQ [| + insn_pcrel_simple 0xe8 get_next_pc_thunk_fixup; + Asm.DEF (anchor, insn_rm_r_imm 0x81 dst slash0 TY_i32 fix_off); + |] + + | _ -> raise Unrecognized +;; + + +let select_insn_misc (q:Il.quad') : Asm.frag = + + match q with + Il.Call c -> + begin + match c.Il.call_dst with + Il.Reg ((Il.Hreg dst), _) when dst = eax -> + begin + match c.Il.call_targ with + + Il.CodePtr (Il.Cell c) + when Il.cell_referent_ty c + = Il.ScalarTy (Il.AddrTy Il.CodeTy) -> + insn_rm_r 0xff c slash2 + + | Il.CodePtr (Il.ImmPtr (f, Il.CodeTy)) -> + insn_pcrel_simple 0xe8 f + + | _ -> raise Unrecognized + end + | _ -> raise Unrecognized + end + + | Il.Push (Il.Cell (Il.Reg ((Il.Hreg r), t))) when is_ty32 t -> + Asm.BYTE (0x50 + (reg r)) + + | Il.Push (Il.Cell c) when is_rm32 c -> + insn_rm_r 0xff c slash6 + + | Il.Push (Il.Imm (Asm.IMM i, _)) when imm_is_unsigned_byte i -> + Asm.SEQ [| Asm.BYTE 0x6a; Asm.WORD (TY_u8, (Asm.IMM i)) |] + + | Il.Push (Il.Imm (i, _)) -> + Asm.SEQ [| Asm.BYTE 0x68; Asm.WORD (TY_u32, i) |] + + | Il.Pop (Il.Reg ((Il.Hreg r), t)) when is_ty32 t -> + Asm.BYTE (0x58 + (reg r)) + + | Il.Pop c when is_rm32 c -> + insn_rm_r 0x8f c slash0 + + | Il.Ret -> Asm.BYTE 0xc3 + + | Il.Jmp j -> + begin + match (j.Il.jmp_op, j.Il.jmp_targ) with + + (Il.JMP, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) -> + insn_pcrel 0xeb 0xe9 f + + | (Il.JMP, Il.CodePtr (Il.Cell c)) + when Il.cell_referent_ty c + = Il.ScalarTy (Il.AddrTy Il.CodeTy) -> + insn_rm_r 0xff c slash4 + + (* FIXME: refactor this to handle cell-based jumps + * if we ever need them. So far not. *) + | (_, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) -> + let (op8, op32) = + match j.Il.jmp_op with + | Il.JC -> (0x72, 0x82) + | Il.JNC -> (0x73, 0x83) + | Il.JZ -> (0x74, 0x84) + | Il.JNZ -> (0x75, 0x85) + | Il.JO -> (0x70, 0x80) + | Il.JNO -> (0x71, 0x81) + | Il.JE -> (0x74, 0x84) + | Il.JNE -> (0x75, 0x85) + + | Il.JL -> (0x7c, 0x8c) + | Il.JLE -> (0x7e, 0x8e) + | Il.JG -> (0x7f, 0x8f) + | Il.JGE -> (0x7d, 0x8d) + + | Il.JB -> (0x72, 0x82) + | Il.JBE -> (0x76, 0x86) + | Il.JA -> (0x77, 0x87) + | Il.JAE -> (0x73, 0x83) + | _ -> raise Unrecognized + in + let prefix32 = 0x0f in + insn_pcrel_prefix32 op8 prefix32 op32 f + + | _ -> raise Unrecognized + end + + | Il.Dead -> Asm.MARK + | Il.Debug -> Asm.BYTES [| 0xcc |] (* int 3 *) + | Il.Regfence -> Asm.MARK + | Il.End -> Asm.BYTES [| 0x90 |] + | Il.Nop -> Asm.BYTES [| 0x90 |] + | _ -> raise Unrecognized +;; + + +type alu_binop_codes = + { + insn: string; + immslash: int; (* mod/rm "slash" code for imm-src variant *) + rm_dst_op8: int; (* opcode for 8-bit r/m dst variant *) + rm_dst_op32: int; (* opcode for 32-bit r/m dst variant *) + rm_src_op8: int; (* opcode for 8-bit r/m src variant *) + rm_src_op32: int; (* opcode for 32-bit r/m src variant *) + } +;; + +let alu_binop + (dst:Il.cell) (src:Il.operand) (codes:alu_binop_codes) + : Asm.frag = + match (dst, src) with + (Il.Reg ((Il.Hreg r), dst_ty), Il.Cell c) + when (is_ty32 dst_ty && is_rm32 c) || (is_ty8 dst_ty && is_rm8 c) + -> insn_rm_r codes.rm_src_op32 c (reg r) + + | (_, Il.Cell (Il.Reg ((Il.Hreg r), src_ty))) + when (is_rm32 dst && is_ty32 src_ty) || (is_rm8 dst && is_ty8 src_ty) + -> insn_rm_r codes.rm_dst_op32 dst (reg r) + + | (_, Il.Imm (i, _)) when is_rm32 dst || is_rm8 dst + -> insn_rm_r_imm_s8_s32 0x83 0x81 dst codes.immslash i + + | _ -> raise Unrecognized +;; + + +let mul_like (src:Il.operand) (signed:bool) (slash:int) + : Asm.frag = + match src with + Il.Cell src when is_rm32 src -> + insn_rm_r 0xf7 src slash + + | Il.Cell src when is_rm8 src -> + insn_rm_r 0xf6 src slash + + | Il.Imm (_, TY_u32) + | Il.Imm (_, TY_i32) -> + let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits32) in + Asm.SEQ [| mov signed tmp src; + insn_rm_r 0xf7 tmp slash |] + + | Il.Imm (_, TY_u8) + | Il.Imm (_, TY_i8) -> + let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits8) in + Asm.SEQ [| mov signed tmp src; + insn_rm_r 0xf6 tmp slash |] + + | _ -> raise Unrecognized +;; + + +let select_insn (q:Il.quad) : Asm.frag = + match q.Il.quad_body with + Il.Unary u -> + let unop s = + if u.Il.unary_src = Il.Cell u.Il.unary_dst + then insn_rm_r 0xf7 u.Il.unary_dst s + else raise Unrecognized + in + begin + match u.Il.unary_op with + Il.UMOV -> mov false u.Il.unary_dst u.Il.unary_src + | Il.IMOV -> mov true u.Il.unary_dst u.Il.unary_src + | Il.NEG -> unop slash3 + | Il.NOT -> unop slash2 + | Il.ZERO -> zero u.Il.unary_dst u.Il.unary_src + end + + | Il.Lea le -> lea le.Il.lea_dst le.Il.lea_src + + | Il.Cmp c -> cmp c.Il.cmp_lhs c.Il.cmp_rhs + + | Il.Binary b -> + begin + if Il.Cell b.Il.binary_dst = b.Il.binary_lhs + then + let binop = alu_binop b.Il.binary_dst b.Il.binary_rhs in + let mulop = mul_like b.Il.binary_rhs in + + let divop signed slash = + Asm.SEQ [| + (* xor edx edx, then mul_like. *) + insn_rm_r 0x33 (rc edx) (reg edx); + mul_like b.Il.binary_rhs signed slash + |] + in + + let modop signed slash = + Asm.SEQ [| + (* divop, then mov remainder to eax instead. *) + divop signed slash; + mov false (rc eax) (ro edx) + |] + in + + let shiftop slash = + let src = b.Il.binary_rhs in + let dst = b.Il.binary_dst in + let mask i = Asm.AND (i, Asm.IMM 0xffL) in + if is_rm8 dst + then + match src with + Il.Imm (i, _) -> + insn_rm_r_imm 0xC0 dst slash TY_u8 (mask i) + | Il.Cell (Il.Reg ((Il.Hreg r), _)) + when r = ecx -> + Asm.SEQ [| Asm.BYTE 0xD2; rm_r dst slash |] + | _ -> raise Unrecognized + else + match src with + Il.Imm (i, _) -> + insn_rm_r_imm 0xC1 dst slash TY_u8 (mask i) + | Il.Cell (Il.Reg ((Il.Hreg r), _)) + when r = ecx -> + Asm.SEQ [| Asm.BYTE 0xD3; rm_r dst slash |] + | _ -> raise Unrecognized + in + + match (b.Il.binary_dst, b.Il.binary_op) with + (_, Il.ADD) -> binop { insn="ADD"; + immslash=slash0; + rm_dst_op8=0x0; + rm_dst_op32=0x1; + rm_src_op8=0x2; + rm_src_op32=0x3; } + | (_, Il.SUB) -> binop { insn="SUB"; + immslash=slash5; + rm_dst_op8=0x28; + rm_dst_op32=0x29; + rm_src_op8=0x2a; + rm_src_op32=0x2b; } + | (_, Il.AND) -> binop { insn="AND"; + immslash=slash4; + rm_dst_op8=0x20; + rm_dst_op32=0x21; + rm_src_op8=0x22; + rm_src_op32=0x23; } + | (_, Il.OR) -> binop { insn="OR"; + immslash=slash1; + rm_dst_op8=0x08; + rm_dst_op32=0x09; + rm_src_op8=0x0a; + rm_src_op32=0x0b; } + | (_, Il.XOR) -> binop { insn="XOR"; + immslash=slash6; + rm_dst_op8=0x30; + rm_dst_op32=0x31; + rm_src_op8=0x32; + rm_src_op32=0x33; } + + | (_, Il.LSL) -> shiftop slash4 + | (_, Il.LSR) -> shiftop slash5 + | (_, Il.ASR) -> shiftop slash7 + + | (Il.Reg (Il.Hreg r, t), Il.UMUL) + when (is_ty32 t || is_ty8 t) && r = eax -> + mulop false slash4 + + | (Il.Reg (Il.Hreg r, t), Il.IMUL) + when (is_ty32 t || is_ty8 t) && r = eax -> + mulop true slash5 + + | (Il.Reg (Il.Hreg r, t), Il.UDIV) + when (is_ty32 t || is_ty8 t) && r = eax -> + divop false slash6 + + | (Il.Reg (Il.Hreg r, t), Il.IDIV) + when (is_ty32 t || is_ty8 t) && r = eax -> + divop true slash7 + + | (Il.Reg (Il.Hreg r, t), Il.UMOD) + when (is_ty32 t || is_ty8 t) && r = eax -> + modop false slash6 + + | (Il.Reg (Il.Hreg r, t), Il.IMOD) + when (is_ty32 t || is_ty8 t) && r = eax -> + modop true slash7 + + | _ -> raise Unrecognized + else raise Unrecognized + end + | _ -> select_insn_misc q.Il.quad_body +;; + + +let new_emitter_without_vregs _ : Il.emitter = + Il.new_emitter + abi.Abi.abi_prealloc_quad + abi.Abi.abi_is_2addr_machine + false None +;; + +let select_insns (sess:Session.sess) (q:Il.quads) : Asm.frag = + let scopes = Stack.create () in + let fixups = Stack.create () in + let pop_frags _ = + Asm.SEQ (Array.of_list + (List.rev + (!(Stack.pop scopes)))) + in + ignore (Stack.push (ref []) scopes); + for i = 0 to (Array.length q) - 1 do + let append frag = + let frags = Stack.top scopes in + frags := frag :: (!frags) + in + begin + match q.(i).Il.quad_fixup with + None -> () + | Some f -> append (Asm.DEF (f, Asm.MARK)) + end; + begin + match q.(i).Il.quad_body with + Il.Enter f -> + Stack.push f fixups; + Stack.push (ref []) scopes; + | Il.Leave -> + append (Asm.DEF (Stack.pop fixups, pop_frags ())) + | _ -> + try + append (select_insn q.(i)) + with + Unrecognized -> + Session.fail sess + "E:Assembly error: unrecognized quad: %s\n%!" + (Il.string_of_quad reg_str q.(i)); + () + end + done; + pop_frags() +;; + +let frags_of_emitted_quads (sess:Session.sess) (e:Il.emitter) : Asm.frag = + let frag = select_insns sess e.Il.emit_quads in + if sess.Session.sess_failed + then raise Unrecognized + else frag +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) |