aboutsummaryrefslogtreecommitdiff
path: root/src/boot/be/x86.ml
diff options
context:
space:
mode:
authorGraydon Hoare <[email protected]>2010-06-23 21:03:09 -0700
committerGraydon Hoare <[email protected]>2010-06-23 21:03:09 -0700
commitd6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch)
treeb425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/be/x86.ml
parentInitial git commit. (diff)
downloadrust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz
rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip
Populate tree.
Diffstat (limited to 'src/boot/be/x86.ml')
-rw-r--r--src/boot/be/x86.ml2205
1 files changed, 2205 insertions, 0 deletions
diff --git a/src/boot/be/x86.ml b/src/boot/be/x86.ml
new file mode 100644
index 00000000..01b7e299
--- /dev/null
+++ b/src/boot/be/x86.ml
@@ -0,0 +1,2205 @@
+(*
+ * x86/ia32 instructions have 6 parts:
+ *
+ * [pre][op][modrm][sib][disp][imm]
+ *
+ * [pre] = 0..4 bytes of prefix
+ * [op] = 1..3 byte opcode
+ * [modrm] = 0 or 1 byte: [mod:2][reg/op:3][r/m:3]
+ * [sib] = 0 or 1 byte: [scale:2][index:3][base:3]
+ * [disp] = 1, 2 or 4 byte displacement
+ * [imm] = 1, 2 or 4 byte immediate
+ *
+ * So between 1 and 17 bytes total.
+ *
+ * We're not going to use sib, but modrm is worth discussing.
+ *
+ * The high two bits of modrm denote an addressing mode. The modes are:
+ *
+ * 00 - "mostly" *(reg)
+ * 01 - "mostly" *(reg) + disp8
+ * 10 - "mostly" *(reg) + disp32
+ * 11 - reg
+ *
+ * The next-lowest 3 bits denote a specific register, or a subopcode if
+ * there is a fixed register or only one operand. The instruction format
+ * reference will say "/<n>" for some number n, if a fixed subopcode is used.
+ * It'll say "/r" if the instruction uses this field to specify a register.
+ *
+ * The registers specified in this field are:
+ *
+ * 000 - EAX or XMM0
+ * 001 - ECX or XMM1
+ * 010 - EDX or XMM2
+ * 011 - EBX or XMM3
+ * 100 - ESP or XMM4
+ * 101 - EBP or XMM5
+ * 110 - ESI or XMM6
+ * 111 - EDI or XMM7
+ *
+ * The final low 3 bits denote sub-modes of the primary mode selected
+ * with the top 2 bits. In particular, they "mostly" select the reg that is
+ * to be used for effective address calculation.
+ *
+ * For the most part, these follow the same numbering order: EAX, ECX, EDX,
+ * EBX, ESP, EBP, ESI, EDI. There are two unusual deviations from the rule
+ * though:
+ *
+ * - In primary modes 00, 01 and 10, r/m=100 means "use SIB byte". You can
+ * use (unscaled) ESP as the base register in these modes by appending the
+ * SIB byte 0x24. We do that in our rm_r operand-encoder function.
+ *
+ * - In primary mode 00, r/m=101 means "just disp32", no register is
+ * involved. There is no way to use EBP in primary mode 00. If you try, we
+ * just decay into a mode 01 with an appended 8-bit immediate displacement.
+ *
+ * Some opcodes are written 0xNN +rd. This means "we decided to chew up a
+ * whole pile of opcodes here, with each opcode including a hard-wired
+ * reference to a register". For example, POP is "0x58 +rd", which means that
+ * the 1-byte insns 0x58..0x5f are chewed up for "POP EAX" ... "POP EDI"
+ * (again, the canonical order of register numberings)
+ *)
+
+(*
+ * Notes on register availability of x86:
+ *
+ * There are 8 GPRs but we use 2 of them for specific purposes:
+ *
+ * - ESP always points to the current stack frame.
+ * - EBP always points to the current frame base.
+ *
+ * We tell IL that we have 6 GPRs then, and permit most register-register ops
+ * on any of these 6, mostly-unconstrained.
+ *
+ *)
+
+open Common;;
+
+exception Unrecognized
+;;
+
+let modrm m rm reg_or_subopcode =
+ if (((m land 0b11) != m) or
+ ((rm land 0b111) != rm) or
+ ((reg_or_subopcode land 0b111) != reg_or_subopcode))
+ then raise (Invalid_argument "X86.modrm_deref")
+ else
+ ((((m land 0b11) lsl 6)
+ lor
+ (rm land 0b111))
+ lor
+ ((reg_or_subopcode land 0b111) lsl 3))
+;;
+
+let modrm_deref_reg = modrm 0b00 ;;
+let modrm_deref_disp32 = modrm 0b00 0b101 ;;
+let modrm_deref_reg_plus_disp8 = modrm 0b01 ;;
+let modrm_deref_reg_plus_disp32 = modrm 0b10 ;;
+let modrm_reg = modrm 0b11 ;;
+
+let slash0 = 0;;
+let slash1 = 1;;
+let slash2 = 2;;
+let slash3 = 3;;
+let slash4 = 4;;
+let slash5 = 5;;
+let slash6 = 6;;
+let slash7 = 7;;
+
+
+(*
+ * Translate an IL-level hwreg number from 0..nregs into the 3-bit code number
+ * used through the mod r/m byte and /r sub-register specifiers of the x86
+ * ISA.
+ *
+ * See "Table 2-2: 32-Bit Addressing Forms with the ModR/M Byte", in the IA32
+ * Architecture Software Developer's Manual, volume 2a.
+ *)
+
+let eax = 0
+let ecx = 1
+let ebx = 2
+let esi = 3
+let edi = 4
+let edx = 5
+let ebp = 6
+let esp = 7
+
+let code_eax = 0b000;;
+let code_ecx = 0b001;;
+let code_edx = 0b010;;
+let code_ebx = 0b011;;
+let code_esp = 0b100;;
+let code_ebp = 0b101;;
+let code_esi = 0b110;;
+let code_edi = 0b111;;
+
+let reg r =
+ match r with
+ 0 -> code_eax
+ | 1 -> code_ecx
+ | 2 -> code_ebx
+ | 3 -> code_esi
+ | 4 -> code_edi
+ | 5 -> code_edx
+ (* Never assigned by the register allocator, but synthetic code uses
+ them *)
+ | 6 -> code_ebp
+ | 7 -> code_esp
+ | _ -> raise (Invalid_argument "X86.reg")
+;;
+
+
+let dwarf_eax = 0;;
+let dwarf_ecx = 1;;
+let dwarf_edx = 2;;
+let dwarf_ebx = 3;;
+let dwarf_esp = 4;;
+let dwarf_ebp = 5;;
+let dwarf_esi = 6;;
+let dwarf_edi = 7;;
+
+let dwarf_reg r =
+ match r with
+ 0 -> dwarf_eax
+ | 1 -> dwarf_ecx
+ | 2 -> dwarf_ebx
+ | 3 -> dwarf_esi
+ | 4 -> dwarf_edi
+ | 5 -> dwarf_edx
+ | 6 -> dwarf_ebp
+ | 7 -> dwarf_esp
+ | _ -> raise (Invalid_argument "X86.dwarf_reg")
+
+let reg_str r =
+ match r with
+ 0 -> "eax"
+ | 1 -> "ecx"
+ | 2 -> "ebx"
+ | 3 -> "esi"
+ | 4 -> "edi"
+ | 5 -> "edx"
+ | 6 -> "ebp"
+ | 7 -> "esp"
+ | _ -> raise (Invalid_argument "X86.reg_str")
+;;
+
+(* This is a basic ABI. You might need to customize it by platform. *)
+let (n_hardregs:int) = 6;;
+let (n_callee_saves:int) = 4;;
+
+
+let is_ty32 (ty:Il.scalar_ty) : bool =
+ match ty with
+ Il.ValTy (Il.Bits32) -> true
+ | Il.AddrTy _ -> true
+ | _ -> false
+;;
+
+let is_r32 (c:Il.cell) : bool =
+ match c with
+ Il.Reg (_, st) -> is_ty32 st
+ | _ -> false
+;;
+
+let is_rm32 (c:Il.cell) : bool =
+ match c with
+ Il.Mem (_, Il.ScalarTy st) -> is_ty32 st
+ | Il.Reg (_, st) -> is_ty32 st
+ | _ -> false
+;;
+
+let is_ty8 (ty:Il.scalar_ty) : bool =
+ match ty with
+ Il.ValTy (Il.Bits8) -> true
+ | _ -> false
+;;
+
+let is_m32 (c:Il.cell) : bool =
+ match c with
+ Il.Mem (_, Il.ScalarTy st) -> is_ty32 st
+ | _ -> false
+;;
+
+let is_m8 (c:Il.cell) : bool =
+ match c with
+ Il.Mem (_, Il.ScalarTy st) -> is_ty8 st
+ | _ -> false
+;;
+
+let is_ok_r8 (r:Il.hreg) : bool =
+ (r == eax || r == ebx || r == ecx || r == edx)
+;;
+
+let is_r8 (c:Il.cell) : bool =
+ match c with
+ Il.Reg (Il.Hreg r, st) when is_ok_r8 r -> is_ty8 st
+ | _ -> false
+;;
+
+let is_rm8 (c:Il.cell) : bool =
+ match c with
+ Il.Mem (_, Il.ScalarTy st) -> is_ty8 st
+ | _ -> is_r8 c
+;;
+
+let prealloc_quad (quad':Il.quad') : Il.quad' =
+ let target_cell reg c =
+ Il.Reg (Il.Hreg reg, Il.cell_scalar_ty c)
+ in
+ let target_operand reg op =
+ Il.Cell (Il.Reg (Il.Hreg reg, Il.operand_scalar_ty op))
+ in
+
+ let target_bin_to_hreg bin dst src =
+ { bin with
+ Il.binary_rhs = target_operand src bin.Il.binary_rhs;
+ Il.binary_lhs = target_operand dst bin.Il.binary_lhs;
+ Il.binary_dst = target_cell dst bin.Il.binary_dst }
+ in
+
+ let target_cmp cmp =
+ match cmp.Il.cmp_lhs with
+ (* Immediate LHS we force to eax. *)
+ Il.Imm _ ->
+ { cmp with
+ Il.cmp_lhs = target_operand eax cmp.Il.cmp_lhs }
+ | _ -> cmp
+ in
+
+ match quad' with
+ Il.Binary bin ->
+ begin
+ Il.Binary
+ begin
+ match bin.Il.binary_op with
+ Il.IMUL | Il.UMUL
+ | Il.IDIV | Il.UDIV -> target_bin_to_hreg bin eax ecx
+ | Il.IMOD | Il.UMOD -> target_bin_to_hreg bin eax ecx
+ | _ -> bin
+ end
+ end
+
+ | Il.Cmp cmp -> Il.Cmp (target_cmp cmp)
+
+ | Il.Call c ->
+ let ty = Il.cell_scalar_ty c.Il.call_dst in
+ Il.Call { c with
+ Il.call_dst = Il.Reg ((Il.Hreg eax), ty) }
+
+ | Il.Lea le ->
+ begin
+ match (le.Il.lea_dst, le.Il.lea_src) with
+ (Il.Reg (_, dst_ty), Il.ImmPtr _)
+ when is_ty32 dst_ty ->
+ Il.Lea { le with
+ Il.lea_dst = Il.Reg (Il.Hreg eax, dst_ty) }
+ | _ -> quad'
+ end
+
+ | x -> x
+;;
+
+let constrain_vregs (q:Il.quad) (hregs:Bits.t array) : unit =
+
+ let involves_8bit_cell =
+ let b = ref false in
+ let qp_cell _ c =
+ match c with
+ Il.Reg (_, Il.ValTy Il.Bits8)
+ | Il.Mem (_, Il.ScalarTy (Il.ValTy Il.Bits8)) ->
+ (b := true; c)
+ | _ -> c
+ in
+ ignore (Il.process_quad { Il.identity_processor with
+ Il.qp_cell_read = qp_cell;
+ Il.qp_cell_write = qp_cell } q);
+ !b
+ in
+
+ let qp_mem _ m = m in
+ let qp_cell _ c =
+ begin
+ match c with
+ Il.Reg (Il.Vreg v, _) when involves_8bit_cell ->
+ (* 8-bit register cells must only be al, cl, dl, bl.
+ * Not esi/edi. *)
+ let hv = hregs.(v) in
+ List.iter (fun bad -> Bits.set hv bad false) [esi; edi]
+ | _ -> ()
+ end;
+ c
+ in
+ begin
+ match q.Il.quad_body with
+ Il.Binary b ->
+ begin
+ match b.Il.binary_op with
+ (* Shifts *)
+ | Il.LSL | Il.LSR | Il.ASR ->
+ begin
+ match b.Il.binary_rhs with
+ Il.Cell (Il.Reg (Il.Vreg v, _)) ->
+ let hv = hregs.(v) in
+ (* Shift src has to be ecx. *)
+ List.iter
+ (fun bad -> Bits.set hv bad false)
+ [eax; edx; ebx; esi; edi]
+ | _ -> ()
+ end
+ | _ -> ()
+ end
+ | _ -> ()
+ end;
+ ignore
+ (Il.process_quad { Il.identity_processor with
+ Il.qp_mem = qp_mem;
+ Il.qp_cell_read = qp_cell;
+ Il.qp_cell_write = qp_cell } q)
+;;
+
+
+let clobbers (quad:Il.quad) : Il.hreg list =
+ match quad.Il.quad_body with
+ Il.Binary bin ->
+ begin
+ match bin.Il.binary_op with
+ Il.IMUL | Il.UMUL
+ | Il.IDIV | Il.UDIV -> [ edx ]
+ | Il.IMOD | Il.UMOD -> [ edx ]
+ | _ -> []
+ end
+ | Il.Unary un ->
+ begin
+ match un.Il.unary_op with
+ Il.ZERO -> [ eax; edi; ecx ]
+ | _ -> [ ]
+ end
+ | Il.Call _ -> [ eax; ecx; edx; ]
+ | Il.Regfence -> [ eax; ecx; ebx; edx; edi; esi; ]
+ | _ -> []
+;;
+
+
+let word_sz = 4L
+;;
+
+let word_bits = Il.Bits32
+;;
+
+let word_ty = TY_u32
+;;
+
+let annotate (e:Il.emitter) (str:string) =
+ Hashtbl.add e.Il.emit_annotations e.Il.emit_pc str
+;;
+
+let c (c:Il.cell) : Il.operand = Il.Cell c ;;
+let r (r:Il.reg) : Il.cell = Il.Reg ( r, (Il.ValTy word_bits) ) ;;
+let h (x:Il.hreg) : Il.reg = Il.Hreg x ;;
+let rc (x:Il.hreg) : Il.cell = r (h x) ;;
+let ro (x:Il.hreg) : Il.operand = c (rc x) ;;
+let vreg (e:Il.emitter) : (Il.reg * Il.cell) =
+ let vr = Il.next_vreg e in
+ (vr, (Il.Reg (vr, (Il.ValTy word_bits))))
+;;
+let imm (x:Asm.expr64) : Il.operand =
+ Il.Imm (x, word_ty)
+;;
+let immi (x:int64) : Il.operand =
+ imm (Asm.IMM x)
+;;
+
+let imm_byte (x:Asm.expr64) : Il.operand =
+ Il.Imm (x, TY_u8)
+;;
+let immi_byte (x:int64) : Il.operand =
+ imm_byte (Asm.IMM x)
+;;
+
+
+let byte_off_n (i:int) : Asm.expr64 =
+ Asm.IMM (Int64.of_int i)
+;;
+
+let byte_n (reg:Il.reg) (i:int) : Il.cell =
+ let imm = byte_off_n i in
+ let mem = Il.RegIn (reg, Some imm) in
+ Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8))
+;;
+
+let word_off_n (i:int) : Asm.expr64 =
+ Asm.IMM (Int64.mul (Int64.of_int i) word_sz)
+;;
+
+let word_at (reg:Il.reg) : Il.cell =
+ let mem = Il.RegIn (reg, None) in
+ Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits))
+;;
+
+let word_at_off (reg:Il.reg) (off:Asm.expr64) : Il.cell =
+ let mem = Il.RegIn (reg, Some off) in
+ Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits))
+;;
+
+let word_n (reg:Il.reg) (i:int) : Il.cell =
+ word_at_off reg (word_off_n i)
+;;
+
+let reg_codeptr (reg:Il.reg) : Il.code =
+ Il.CodePtr (Il.Cell (Il.Reg (reg, Il.AddrTy Il.CodeTy)))
+;;
+
+let word_n_low_byte (reg:Il.reg) (i:int) : Il.cell =
+ let imm = word_off_n i in
+ let mem = Il.RegIn (reg, Some imm) in
+ Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8))
+;;
+
+let wordptr_n (reg:Il.reg) (i:int) : Il.cell =
+ let imm = word_off_n i in
+ let mem = Il.RegIn (reg, Some imm) in
+ Il.Mem (mem, Il.ScalarTy (Il.AddrTy (Il.ScalarTy (Il.ValTy word_bits))))
+;;
+
+let get_element_ptr = Il.get_element_ptr word_bits reg_str ;;
+
+let save_callee_saves (e:Il.emitter) : unit =
+ Il.emit e (Il.Push (ro ebp));
+ Il.emit e (Il.Push (ro edi));
+ Il.emit e (Il.Push (ro esi));
+ Il.emit e (Il.Push (ro ebx));
+;;
+
+
+let restore_callee_saves (e:Il.emitter) : unit =
+ Il.emit e (Il.Pop (rc ebx));
+ Il.emit e (Il.Pop (rc esi));
+ Il.emit e (Il.Pop (rc edi));
+ Il.emit e (Il.Pop (rc ebp));
+;;
+
+
+(* restores registers from the frame base without updating esp:
+ * - sets ebp, edi, esi, ebx to stored values from frame base
+ * - sets `retpc' register to stored retpc from frame base
+ * - sets `base' register to current fp
+ *)
+let restore_frame_base (e:Il.emitter) (base:Il.reg) (retpc:Il.reg) : unit =
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ mov (r base) (ro ebp);
+ mov (rc ebx) (c (word_at base));
+ mov (rc esi) (c (word_n base 1));
+ mov (rc edi) (c (word_n base 2));
+ mov (rc ebp) (c (word_n base 3));
+ mov (r retpc) (c (word_n base 4));
+;;
+
+
+(*
+ * Our arrangement on x86 is this:
+ *
+ * *ebp+20+(4*N) = [argN ]
+ * ...
+ * *ebp+24 = [arg1 ] = task ptr
+ * *ebp+20 = [arg0 ] = out ptr
+ * *ebp+16 = [retpc ]
+ * *ebp+12 = [old_ebp]
+ * *ebp+8 = [old_edi]
+ * *ebp+4 = [old_esi]
+ * *ebp = [old_ebx]
+ *
+ * For x86-cdecl:
+ *
+ * %eax, %ecx, %edx are "caller save" registers
+ * %ebp, %ebx, %esi, %edi are "callee save" registers
+ *
+ *)
+
+let frame_base_words = 5 (* eip,ebp,edi,esi,ebx *) ;;
+let frame_base_sz = Int64.mul (Int64.of_int frame_base_words) word_sz;;
+
+let frame_info_words = 2 (* crate ptr, crate-rel frame info disp *) ;;
+let frame_info_sz = Int64.mul (Int64.of_int frame_info_words) word_sz;;
+
+let implicit_arg_words = 2 (* task ptr,out ptr *);;
+let implicit_args_sz = Int64.mul (Int64.of_int implicit_arg_words) word_sz;;
+
+let out_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words);;
+let task_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words+1);;
+let ty_param_n i =
+ wordptr_n (Il.Hreg ebp) (frame_base_words + implicit_arg_words + i);;
+
+let spill_slot (i:Il.spill) : Il.mem =
+ let imm = (Asm.IMM
+ (Int64.neg
+ (Int64.add frame_info_sz
+ (Int64.mul word_sz
+ (Int64.of_int (i+1))))))
+ in
+ Il.RegIn ((Il.Hreg ebp), Some imm)
+;;
+
+
+let get_next_pc_thunk_fixup = new_fixup "glue$get_next_pc"
+;;
+
+let emit_get_next_pc_thunk (e:Il.emitter) : unit =
+ let sty = Il.AddrTy Il.CodeTy in
+ let rty = Il.ScalarTy sty in
+ let deref_esp = Il.Mem (Il.RegIn (Il.Hreg esp, None), rty) in
+ let eax = (Il.Reg (Il.Hreg eax, sty)) in
+ Il.emit_full e (Some get_next_pc_thunk_fixup) []
+ (Il.umov eax (Il.Cell deref_esp));
+ Il.emit e Il.Ret;
+;;
+
+let get_next_pc_thunk : (Il.reg * fixup * (Il.emitter -> unit)) =
+ (Il.Hreg eax, get_next_pc_thunk_fixup, emit_get_next_pc_thunk)
+;;
+
+let emit_c_call
+ (e:Il.emitter)
+ (ret:Il.cell)
+ (tmp1:Il.reg)
+ (tmp2:Il.reg)
+ (nabi:nabi)
+ (in_prologue:bool)
+ (fptr:Il.code)
+ (args:Il.operand array)
+ : unit =
+
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in
+
+ (* rust calls get task as arg0 *)
+ let args =
+ if nabi.nabi_convention = CONV_rust
+ then Array.append [| c task_ptr |] args
+ else args
+ in
+ let nargs = Array.length args in
+ let arg_sz = Int64.mul (Int64.of_int nargs) word_sz
+ in
+
+ mov (r tmp1) (c task_ptr); (* tmp1 = task from argv[-1] *)
+ mov (r tmp2) (ro esp); (* tmp2 = esp *)
+ mov (* task->rust_sp = tmp2 *)
+ (word_n tmp1 Abi.task_field_rust_sp)
+ (c (r tmp2));
+ mov (* esp = task->runtime_sp *)
+ (rc esp)
+ (c (word_n tmp1 Abi.task_field_runtime_sp));
+
+ binary Il.SUB (rc esp) arg_sz; (* make room on the stack *)
+ binary Il.AND (rc esp) (* and 16-byte align sp *)
+ 0xfffffffffffffff0L;
+
+ Array.iteri
+ begin
+ fun i (arg:Il.operand) -> (* write args to C stack *)
+ match arg with
+ Il.Cell (Il.Mem (a, ty)) ->
+ begin
+ match a with
+ Il.RegIn (Il.Hreg base, off) when base == esp ->
+ mov (r tmp1) (c (Il.Mem (Il.RegIn (tmp2, off), ty)));
+ mov (word_n (h esp) i) (c (r tmp1));
+ | _ ->
+ mov (r tmp1) arg;
+ mov (word_n (h esp) i) (c (r tmp1));
+ end
+ | _ ->
+ mov (word_n (h esp) i) arg
+ end
+ args;
+
+ match ret with
+ Il.Mem (Il.RegIn (Il.Hreg base, _), _) when base == esp ->
+ assert (not in_prologue);
+
+ (* If ret is esp-relative, use a temporary register until we
+ switched stacks. *)
+
+ emit (Il.call (r tmp1) fptr);
+ mov (r tmp2) (c task_ptr);
+ mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp));
+ mov ret (c (r tmp1));
+
+ | _ when in_prologue ->
+ (*
+ * We have to do something a little surprising here:
+ * we're doing a 'grow' call so ebp is going to point
+ * into a dead stack frame on call-return. So we
+ * temporarily store task-ptr into ebp and then reload
+ * esp *and* ebp via ebp->rust_sp on the other side of
+ * the call.
+ *)
+ mov (rc ebp) (c task_ptr);
+ emit (Il.call ret fptr);
+ mov (rc esp) (c (word_n (h ebp) Abi.task_field_rust_sp));
+ mov (rc ebp) (ro esp);
+
+ | _ ->
+ emit (Il.call ret fptr);
+ mov (r tmp2) (c task_ptr);
+ mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp));
+;;
+
+let emit_void_prologue_call
+ (e:Il.emitter)
+ (nabi:nabi)
+ (fn:fixup)
+ (args:Il.operand array)
+ : unit =
+ let callee = Abi.load_fixup_codeptr e (h eax) fn true nabi.nabi_indirect in
+ emit_c_call e (rc eax) (h edx) (h ecx) nabi true callee args
+;;
+
+let emit_native_call
+ (e:Il.emitter)
+ (ret:Il.cell)
+ (nabi:nabi)
+ (fn:fixup)
+ (args:Il.operand array)
+ : unit =
+
+ let (tmp1, _) = vreg e in
+ let (tmp2, _) = vreg e in
+ let (freg, _) = vreg e in
+ let callee = Abi.load_fixup_codeptr e freg fn true nabi.nabi_indirect in
+ emit_c_call e ret tmp1 tmp2 nabi false callee args
+;;
+
+let emit_native_void_call
+ (e:Il.emitter)
+ (nabi:nabi)
+ (fn:fixup)
+ (args:Il.operand array)
+ : unit =
+
+ let (ret, _) = vreg e in
+ emit_native_call e (r ret) nabi fn args
+;;
+
+let emit_native_call_in_thunk
+ (e:Il.emitter)
+ (ret:Il.cell)
+ (nabi:nabi)
+ (fn:Il.operand)
+ (args:Il.operand array)
+ : unit =
+
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+
+ begin
+ match fn with
+ (*
+ * NB: old path, remove when/if you're sure you don't
+ * want native-linker-symbol-driven requirements.
+ *)
+ Il.ImmPtr (fix, _) ->
+ let code =
+ Abi.load_fixup_codeptr e (h eax) fix true nabi.nabi_indirect
+ in
+ emit_c_call e (rc eax) (h edx) (h ecx) nabi false code args;
+
+ | _ ->
+ (*
+ * NB: new path, ignores nabi_indirect, assumes
+ * indirect via pointer from upcall_require_c_sym
+ * or crate cache.
+ *)
+ mov (rc eax) fn;
+ let cell = Il.Reg (h eax, Il.AddrTy Il.CodeTy) in
+ let fptr = Il.CodePtr (Il.Cell cell) in
+ emit_c_call e (rc eax) (h edx) (h ecx) nabi false fptr args;
+ end;
+
+ match ret with
+ Il.Reg (r, _) -> mov (word_at r) (ro eax)
+ | _ -> mov (rc edx) (c ret);
+ mov (word_at (h edx)) (ro eax)
+;;
+
+let unwind_glue
+ (e:Il.emitter)
+ (nabi:nabi)
+ (exit_task_fixup:fixup)
+ : unit =
+
+ let fp_n = word_n (Il.Hreg ebp) in
+ let edx_n = word_n (Il.Hreg edx) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let push x = emit (Il.Push x) in
+ let pop x = emit (Il.Pop x) in
+ let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in
+ let codefix fix = Il.CodePtr (Il.ImmPtr (fix, Il.CodeTy)) in
+ let mark fix = Il.emit_full e (Some fix) [] Il.Dead in
+ let glue_field = Abi.frame_glue_fns_field_drop in
+
+ let repeat_jmp_fix = new_fixup "repeat jump" in
+ let skip_jmp_fix = new_fixup "skip jump" in
+ let exit_jmp_fix = new_fixup "exit jump" in
+
+ mov (rc edx) (c task_ptr); (* switch back to rust stack *)
+ mov
+ (rc esp)
+ (c (edx_n Abi.task_field_rust_sp));
+
+ mark repeat_jmp_fix;
+
+ mov (rc esi) (c (fp_n (-1))); (* esi <- crate ptr *)
+ mov (rc edx) (c (fp_n (-2))); (* edx <- frame glue functions. *)
+ emit (Il.cmp (ro edx) (immi 0L));
+
+ emit
+ (Il.jmp Il.JE
+ (codefix skip_jmp_fix)); (* if struct* is nonzero *)
+ add edx esi; (* add crate ptr to disp. *)
+ mov
+ (rc ecx)
+ (c (edx_n glue_field)); (* ecx <- drop glue *)
+ emit (Il.cmp (ro ecx) (immi 0L));
+
+ emit
+ (Il.jmp Il.JE
+ (codefix skip_jmp_fix)); (* if glue-fn is nonzero *)
+ add ecx esi; (* add crate ptr to disp. *)
+ push (ro ebp); (* frame-to-drop *)
+ push (c task_ptr); (* form usual call to glue *)
+ push (immi 0L); (* outptr *)
+ emit (Il.call (rc eax)
+ (reg_codeptr (h ecx))); (* call glue_fn, trashing eax. *)
+ pop (rc eax);
+ pop (rc eax);
+ pop (rc eax);
+
+ mark skip_jmp_fix;
+ mov (rc edx) (c (fp_n 3)); (* load next fp (callee-saves[3]) *)
+ emit (Il.cmp (ro edx) (immi 0L));
+ emit (Il.jmp Il.JE
+ (codefix exit_jmp_fix)); (* if nonzero *)
+ mov (rc ebp) (ro edx); (* move to next frame *)
+ emit (Il.jmp Il.JMP
+ (codefix repeat_jmp_fix)); (* loop *)
+
+ (* exit path. *)
+ mark exit_jmp_fix;
+
+ let callee =
+ Abi.load_fixup_codeptr
+ e (h eax) exit_task_fixup false nabi.nabi_indirect
+ in
+ emit_c_call
+ e (rc eax) (h edx) (h ecx) nabi false callee [| (c task_ptr) |];
+;;
+
+(* Puts result in eax; clobbers ecx, edx in the process. *)
+let rec calculate_sz (e:Il.emitter) (size:size) : unit =
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let push x = emit (Il.Push x) in
+ let pop x = emit (Il.Pop x) in
+ let neg x = emit (Il.unary Il.NEG (rc x) (ro x)) in
+ let bnot x = emit (Il.unary Il.NOT (rc x) (ro x)) in
+ let band x y = emit (Il.binary Il.AND (rc x) (ro x) (ro y)) in
+ let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in
+ let mul x y = emit (Il.binary Il.UMUL (rc x) (ro x) (ro y)) in
+ let subi x y = emit (Il.binary Il.SUB (rc x) (ro x) (immi y)) in
+ let eax_gets_a_and_ecx_gets_b a b =
+ calculate_sz e b;
+ push (ro eax);
+ calculate_sz e a;
+ pop (rc ecx);
+ in
+ match size with
+ SIZE_fixed i ->
+ mov (rc eax) (immi i)
+
+ | SIZE_fixup_mem_sz f ->
+ mov (rc eax) (imm (Asm.M_SZ f))
+
+ | SIZE_fixup_mem_pos f ->
+ mov (rc eax) (imm (Asm.M_POS f))
+
+ | SIZE_param_size i ->
+ mov (rc eax) (Il.Cell (ty_param_n i));
+ mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_size))
+
+ | SIZE_param_align i ->
+ mov (rc eax) (Il.Cell (ty_param_n i));
+ mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_align))
+
+ | SIZE_rt_neg a ->
+ calculate_sz e a;
+ neg eax
+
+ | SIZE_rt_add (a, b) ->
+ eax_gets_a_and_ecx_gets_b a b;
+ add eax ecx
+
+ | SIZE_rt_mul (a, b) ->
+ eax_gets_a_and_ecx_gets_b a b;
+ mul eax ecx
+
+ | SIZE_rt_max (a, b) ->
+ eax_gets_a_and_ecx_gets_b a b;
+ emit (Il.cmp (ro eax) (ro ecx));
+ let jmp_pc = e.Il.emit_pc in
+ emit (Il.jmp Il.JAE Il.CodeNone);
+ mov (rc eax) (ro ecx);
+ Il.patch_jump e jmp_pc e.Il.emit_pc;
+
+ | SIZE_rt_align (align, off) ->
+ (*
+ * calculate off + pad where:
+ *
+ * pad = (align - (off mod align)) mod align
+ *
+ * In our case it's always a power of two,
+ * so we can just do:
+ *
+ * mask = align-1
+ * off += mask
+ * off &= ~mask
+ *
+ *)
+ eax_gets_a_and_ecx_gets_b off align;
+ subi ecx 1L;
+ add eax ecx;
+ bnot ecx;
+ band eax ecx;
+;;
+
+let rec size_calculation_stack_highwater (size:size) : int =
+ match size with
+ SIZE_fixed _
+ | SIZE_fixup_mem_sz _
+ | SIZE_fixup_mem_pos _
+ | SIZE_param_size _
+ | SIZE_param_align _ -> 0
+ | SIZE_rt_neg a ->
+ (size_calculation_stack_highwater a)
+ | SIZE_rt_max (a, b) ->
+ (size_calculation_stack_highwater a)
+ + (size_calculation_stack_highwater b)
+ | SIZE_rt_add (a, b)
+ | SIZE_rt_mul (a, b)
+ | SIZE_rt_align (a, b) ->
+ (size_calculation_stack_highwater a)
+ + (size_calculation_stack_highwater b)
+ + 1
+;;
+
+let boundary_sz =
+ (Asm.IMM
+ (Int64.add (* Extra non-frame room: *)
+ frame_base_sz (* to safely enter the next frame, *)
+ frame_base_sz)) (* and make a 'grow' upcall there. *)
+;;
+
+let stack_growth_check
+ (e:Il.emitter)
+ (nabi:nabi)
+ (grow_task_fixup:fixup)
+ (growsz:Il.operand)
+ (grow_jmp:Il.label option)
+ (restart_pc:Il.label)
+ (end_reg:Il.reg) (*
+ * stack limit on entry,
+ * new stack pointer on exit
+ *)
+ (tmp_reg:Il.reg) (* temporary (trashed) *)
+ : unit =
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in
+ let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in
+ mov (r tmp_reg) (ro esp); (* tmp = esp *)
+ sub (r tmp_reg) growsz; (* tmp -= size-request *)
+ emit (Il.cmp (c (r end_reg)) (c (r tmp_reg)));
+ (*
+ * Jump *over* 'grow' upcall on non-underflow:
+ * if end_reg <= tmp_reg
+ *)
+
+ let bypass_grow_upcall_jmp_pc = e.Il.emit_pc in
+ emit (Il.jmp Il.JBE Il.CodeNone);
+
+ begin
+ match grow_jmp with
+ None -> ()
+ | Some j -> Il.patch_jump e j e.Il.emit_pc
+ end;
+ (* Extract growth-amount from tmp_reg. *)
+ mov (r end_reg) (ro esp);
+ sub (r end_reg) (c (r tmp_reg));
+ add (r end_reg) (Il.Imm (boundary_sz, word_ty));
+ (* Perform 'grow' upcall, then restart frame-entry. *)
+ emit_void_prologue_call e nabi grow_task_fixup [| c (r end_reg) |];
+ emit (Il.jmp Il.JMP (Il.CodeLabel restart_pc));
+ Il.patch_jump e bypass_grow_upcall_jmp_pc e.Il.emit_pc
+;;
+
+let fn_prologue
+ (e:Il.emitter)
+ (framesz:size)
+ (callsz:size)
+ (nabi:nabi)
+ (grow_task_fixup:fixup)
+ : unit =
+
+ let esi_n = word_n (h esi) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in
+ let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in
+
+ (* We may be in a dynamic-sized frame. This makes matters complex,
+ * as we can't just perform a simple growth check in terms of a
+ * static size. The check is against a dynamic size, and we need to
+ * calculate that size.
+ *
+ * Unlike size-calculations in 'trans', we do not use vregs to
+ * calculate the frame size; instead we use a PUSH/POP stack-machine
+ * translation that doesn't disturb the registers we're
+ * somewhat-carefully *using* during frame setup.
+ *
+ * This only pushes the problem back a little ways though: we still
+ * need to be sure we have enough room to do the PUSH/POP
+ * calculation. We refer to this amount of space as the 'primordial'
+ * frame size, which can *thankfully* be calculated exactly from the
+ * arithmetic expression we're aiming to calculate. So we make room
+ * for the primordial frame, run the calculation of the full dynamic
+ * frame size, then make room *again* for this dynamic size.
+ *
+ * Our caller reserved enough room for us to push our own frame-base,
+ * as well as the frame-base that it will cost to do an upcall.
+ *)
+
+ (*
+ * After we save callee-saves, We have a stack like this:
+ *
+ * | ... |
+ * | caller frame |
+ * | + spill |
+ * | caller arg K |
+ * | ... |
+ * | caller arg 0 |
+ * | retpc | <-- sp we received, top of callee frame
+ * | callee save 1 |
+ * | ... |
+ * | callee save N | <-- ebp and esp after saving callee-saves
+ * | ... |
+ * | callee frame |
+ * | + spill |
+ * | callee arg J |
+ * | ... |
+ * | callee arg 0 | <-- bottom of callee frame
+ * | next retpc |
+ * | next save 1 |
+ * | ... |
+ * | next save N | <-- bottom of region we must reserve
+ * | ... |
+ *
+ * A "frame base" is the retpc and set of callee-saves.
+ *
+ * We need to reserve room for our frame *and* the next frame-base, because
+ * we're going to be blindly entering the next frame-base (pushing eip and
+ * callee-saves) before we perform the next check.
+ *)
+
+ (*
+ * We double the reserved callsz because we need a 'temporary tail-call
+ * region' above the actual call region, in case there's a drop call at the
+ * end of assembling the tail-call args and before copying them to callee
+ * position.
+ *)
+
+ let callsz = add_sz callsz callsz in
+ let n_glue_args = Int64.of_int Abi.worst_case_glue_call_args in
+ let n_glue_words = Int64.mul word_sz n_glue_args in
+
+ (*
+ * Add in *another* word to handle an extra-awkward spill of the
+ * callee address that might occur during an indirect tail call.
+ *)
+ let callsz = add_sz (SIZE_fixed word_sz) callsz in
+
+ (*
+ * Add in enough words for a glue-call (these occur underneath esp)
+ *)
+ let callsz = add_sz (SIZE_fixed n_glue_words) callsz in
+
+ (*
+ * Cumulative dynamic-frame size.
+ *)
+ let call_and_frame_sz = add_sz callsz framesz in
+
+ (* Already have room to save regs on entry. *)
+ save_callee_saves e;
+
+ let restart_pc = e.Il.emit_pc in
+
+ mov (rc ebp) (ro esp); (* Establish frame base. *)
+ mov (rc esi) (c task_ptr); (* esi = task *)
+ mov
+ (rc esi)
+ (c (esi_n Abi.task_field_stk)); (* esi = task->stk *)
+ add (rc esi) (imm
+ (Asm.ADD
+ ((word_off_n Abi.stk_field_data),
+ boundary_sz)));
+
+ let (dynamic_frame_sz, dynamic_grow_jmp) =
+ match Il.size_to_expr64 call_and_frame_sz with
+ None ->
+ begin
+ let primordial_frame_sz =
+ Asm.IMM
+ (Int64.mul word_sz
+ (Int64.of_int
+ (size_calculation_stack_highwater
+ call_and_frame_sz)))
+ in
+ (* Primordial size-check. *)
+ mov (rc edi) (ro esp); (* edi = esp *)
+ sub (* edi -= size-request *)
+ (rc edi)
+ (imm primordial_frame_sz);
+ emit (Il.cmp (ro esi) (ro edi));
+
+ (* Jump to 'grow' upcall on underflow: if esi (bottom) is >
+ edi (proposed-esp) *)
+
+ let primordial_underflow_jmp_pc = e.Il.emit_pc in
+ emit (Il.jmp Il.JA Il.CodeNone);
+
+ (* Calculate dynamic frame size. *)
+ calculate_sz e call_and_frame_sz;
+ ((ro eax), Some primordial_underflow_jmp_pc)
+ end
+ | Some e -> ((imm e), None)
+ in
+
+ (* "Full" frame size-check. *)
+ stack_growth_check e nabi grow_task_fixup
+ dynamic_frame_sz dynamic_grow_jmp restart_pc (h esi) (h edi);
+
+
+ (* Establish a frame, wherever we landed. *)
+ sub (rc esp) dynamic_frame_sz;
+
+ (* Zero the frame.
+ *
+ * FIXME: this is awful, will go away when we have proper CFI.
+ *)
+
+ mov (rc edi) (ro esp);
+ mov (rc ecx) dynamic_frame_sz;
+ emit (Il.unary Il.ZERO (word_at (h edi)) (ro ecx));
+
+ (* Move esp back up over the glue region. *)
+ add (rc esp) (immi n_glue_words);
+;;
+
+
+let fn_epilogue (e:Il.emitter) : unit =
+
+ (* Tear down existing frame. *)
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ mov (rc esp) (ro ebp);
+ restore_callee_saves e;
+ emit Il.Ret;
+;;
+
+let inline_memcpy
+ (e:Il.emitter)
+ (n_bytes:int64)
+ (dst_ptr:Il.reg)
+ (src_ptr:Il.reg)
+ (tmp_reg:Il.reg)
+ (ascending:bool)
+ : unit =
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let bpw = Int64.to_int word_sz in
+ let w = Int64.to_int (Int64.div n_bytes word_sz) in
+ let b = Int64.to_int (Int64.rem n_bytes word_sz) in
+ if ascending
+ then
+ begin
+ for i = 0 to (w-1) do
+ mov (r tmp_reg) (c (word_n src_ptr i));
+ mov (word_n dst_ptr i) (c (r tmp_reg));
+ done;
+ for i = 0 to (b-1) do
+ let off = (w*bpw) + i in
+ mov (r tmp_reg) (c (byte_n src_ptr off));
+ mov (byte_n dst_ptr off) (c (r tmp_reg));
+ done;
+ end
+ else
+ begin
+ for i = (b-1) downto 0 do
+ let off = (w*bpw) + i in
+ mov (r tmp_reg) (c (byte_n src_ptr off));
+ mov (byte_n dst_ptr off) (c (r tmp_reg));
+ done;
+ for i = (w-1) downto 0 do
+ mov (r tmp_reg) (c (word_n src_ptr i));
+ mov (word_n dst_ptr i) (c (r tmp_reg));
+ done;
+ end
+;;
+
+
+
+let fn_tail_call
+ (e:Il.emitter)
+ (caller_callsz:int64)
+ (caller_argsz:int64)
+ (callee_code:Il.code)
+ (callee_argsz:int64)
+ : unit =
+ let emit = Il.emit e in
+ let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in
+ let mov dst src = emit (Il.umov dst src) in
+ let argsz_diff = Int64.sub caller_argsz callee_argsz in
+ let callee_spill_cell = word_at_off (h esp) (Asm.IMM caller_callsz) in
+
+ (*
+ * Our outgoing arguments were prepared in a region above the call region;
+ * this is reserved for the purpose of making tail-calls *only*, so we do
+ * not collide with glue calls we had to make while dropping the frame,
+ * after assembling our arg region.
+ *
+ * Thus, esp points to the "normal" arg region, and we need to move it
+ * to point to the tail-call arg region. To make matters simple, both
+ * regions are the same size, one atop the other.
+ *)
+
+ annotate e "tail call: move esp to temporary tail call arg-prep area";
+ binary Il.ADD (rc esp) caller_callsz;
+
+ (*
+ * If we're given a non-ImmPtr callee, we may need to move it to a known
+ * cell to avoid clobbering its register while we do the argument shuffle
+ * below.
+ *
+ * Sadly, we are too register-starved to just flush our callee to a reg;
+ * so we carve out an extra word of the temporary call-region and use
+ * it.
+ *
+ * This is ridiculous, but works.
+ *)
+ begin
+ match callee_code with
+ Il.CodePtr (Il.Cell c) ->
+ annotate e "tail call: spill callee-ptr to temporary memory";
+ mov callee_spill_cell (Il.Cell c);
+
+ | _ -> ()
+ end;
+
+ (* edx <- ebp; restore ebp, edi, esi, ebx; ecx <- retpc *)
+ annotate e "tail call: restore callee-saves from frame base";
+ restore_frame_base e (h edx) (h ecx);
+ (* move edx past frame base and adjust for difference in call sizes *)
+ annotate e "tail call: adjust temporary fp";
+ binary Il.ADD (rc edx) (Int64.add frame_base_sz argsz_diff);
+
+ (*
+ * stack grows downwards; copy from high to low
+ *
+ * bpw = word_sz
+ * w = floor(callee_argsz / word_sz)
+ * b = callee_argsz % word_sz
+ *
+ * byte copies:
+ * +------------------------+
+ * | |
+ * +------------------------+ <-- base + (w * word_sz) + (b - 1)
+ * . .
+ * +------------------------+
+ * | |
+ * +------------------------+ <-- base + (w * word_sz) + (b - b)
+ * word copies: =
+ * +------------------------+ <-- base + ((w-0) * word_sz)
+ * | bytes |
+ * | (w-1)*bpw..w*bpw-1 |
+ * +------------------------+ <-- base + ((w-1) * word_sz)
+ * | bytes |
+ * | (w-2)*bpw..(w-1)*bpw-1 |
+ * +------------------------+ <-- base + ((w-2) * word_sz)
+ * . .
+ * . .
+ * . .
+ * +------------------------+
+ * | bytes |
+ * | 0..bpw - 1 |
+ * +------------------------+ <-- base + ((w-w) * word_sz)
+ *)
+
+ annotate e "tail call: move arg-tuple up to top of frame";
+ (* NOTE: must copy top-to-bottom in case the regions overlap *)
+ inline_memcpy e callee_argsz (h edx) (h esp) (h eax) false;
+
+ (*
+ * We're done with eax now; so in the case where we had to spill
+ * our callee codeptr, we can reload it into eax here and rewrite
+ * our callee into *eax.
+ *)
+ let callee_code =
+ match callee_code with
+ Il.CodePtr (Il.Cell _) ->
+ annotate e "tail call: reload callee-ptr from temporary memory";
+ mov (rc eax) (Il.Cell callee_spill_cell);
+ reg_codeptr (h eax)
+
+ | _ -> callee_code
+ in
+
+
+ (* esp <- edx *)
+ annotate e "tail call: adjust stack pointer";
+ mov (rc esp) (ro edx);
+ (* PUSH ecx (retpc) *)
+ annotate e "tail call: push retpc";
+ emit (Il.Push (ro ecx));
+ (* JMP callee_code *)
+ emit (Il.jmp Il.JMP callee_code);
+;;
+
+
+let loop_info_field_retpc = 0;;
+let loop_info_field_sp = 1;;
+let loop_info_field_fp = 2;;
+
+let self_args_cell (self_args_rty:Il.referent_ty) : Il.cell =
+ Il.Mem (Il.RegIn (h ebp, Some (Asm.IMM frame_base_sz)), self_args_rty)
+;;
+
+let activate_glue (e:Il.emitter) : unit =
+ (*
+ * This is a bit of glue-code. It should be emitted once per
+ * compilation unit.
+ *
+ * - save regs on C stack
+ * - align sp on a 16-byte boundary
+ * - save sp to task.runtime_sp (runtime_sp is thus always aligned)
+ * - load saved task sp (switch stack)
+ * - restore saved task regs
+ * - return to saved task pc
+ *
+ * Our incoming stack looks like this:
+ *
+ * *esp+4 = [arg1 ] = task ptr
+ * *esp = [retpc ]
+ *)
+
+ let sp_n = word_n (Il.Hreg esp) in
+ let edx_n = word_n (Il.Hreg edx) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in
+
+ mov (rc edx) (c (sp_n 1)); (* edx <- task *)
+ save_callee_saves e;
+ mov
+ (edx_n Abi.task_field_runtime_sp)
+ (ro esp); (* task->runtime_sp <- esp *)
+ mov
+ (rc esp)
+ (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *)
+
+ (*
+ * There are two paths we can arrive at this code from:
+ *
+ *
+ * 1. We are activating a task for the first time. When we switch into
+ * the task stack and 'ret' to its first instruction, we'll start
+ * doing whatever the first instruction says. Probably saving
+ * registers and starting to establish a frame. Harmless stuff,
+ * doesn't look at task->rust_sp again except when it clobbers it
+ * during a later upcall.
+ *
+ *
+ * 2. We are resuming a task that was descheduled by the yield glue
+ * below. When we switch into the task stack and 'ret', we'll be
+ * ret'ing to a very particular instruction:
+ *
+ * "esp <- task->rust_sp"
+ *
+ * this is the first instruction we 'ret' to after this glue, because
+ * it is the first instruction following *any* upcall, and the task
+ * we are activating was descheduled mid-upcall.
+ *
+ * Unfortunately for us, we have already restored esp from
+ * task->rust_sp and are about to eat the 5 words off the top of it.
+ *
+ *
+ * | ... | <-- where esp will be once we restore + ret, below,
+ * | retpc | and where we'd *like* task->rust_sp to wind up.
+ * | ebp |
+ * | edi |
+ * | esi |
+ * | ebx | <-- current task->rust_sp == current esp
+ *
+ *
+ * This is a problem. If we return to "esp <- task->rust_sp" it will
+ * push esp back down by 5 words. This manifests as a rust stack that
+ * grows by 5 words on each yield/reactivate. Not good.
+ *
+ * So what we do here is just adjust task->rust_sp up 5 words as
+ * well, to mirror the movement in esp we're about to perform. That
+ * way the "esp <- task->rust_sp" we 'ret' to below will be a
+ * no-op. Esp won't move, and the task's stack won't grow.
+ *)
+
+ binary Il.ADD (edx_n Abi.task_field_rust_sp)
+ (Int64.mul (Int64.of_int (n_callee_saves + 1)) word_sz);
+
+ (**** IN TASK STACK ****)
+ restore_callee_saves e;
+ emit Il.Ret;
+ (***********************)
+ ()
+;;
+
+let yield_glue (e:Il.emitter) : unit =
+
+ (* More glue code, this time the 'bottom half' of yielding.
+ *
+ * We arrived here because an upcall decided to deschedule the
+ * running task. So the upcall's return address got patched to the
+ * first instruction of this glue code.
+ *
+ * When the upcall does 'ret' it will come here, and its esp will be
+ * pointing to the last argument pushed on the C stack before making
+ * the upcall: the 0th argument to the upcall, which is always the
+ * task ptr performing the upcall. That's where we take over.
+ *
+ * Our goal is to complete the descheduling
+ *
+ * - Switch over to the task stack temporarily.
+ *
+ * - Save the task's callee-saves onto the task stack.
+ * (the task is now 'descheduled', safe to set aside)
+ *
+ * - Switch *back* to the C stack.
+ *
+ * - Restore the C-stack callee-saves.
+ *
+ * - Return to the caller on the C stack that activated the task.
+ *
+ *)
+ let esp_n = word_n (Il.Hreg esp) in
+ let edx_n = word_n (Il.Hreg edx) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+
+ mov
+ (rc edx) (c (esp_n 0)); (* edx <- arg0 (task) *)
+ mov
+ (rc esp)
+ (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *)
+ save_callee_saves e;
+ mov (* task->rust_sp <- esp *)
+ (edx_n Abi.task_field_rust_sp)
+ (ro esp);
+ mov
+ (rc esp)
+ (c (edx_n Abi.task_field_runtime_sp)); (* esp <- task->runtime_sp *)
+
+ (**** IN C STACK ****)
+ restore_callee_saves e;
+ emit Il.Ret;
+ (***********************)
+ ()
+;;
+
+
+let push_pos32 (e:Il.emitter) (fix:fixup) : unit =
+ let (reg, _, _) = get_next_pc_thunk in
+ Abi.load_fixup_addr e reg fix Il.OpaqueTy;
+ Il.emit e (Il.Push (Il.Cell (Il.Reg (reg, Il.AddrTy Il.OpaqueTy))))
+;;
+
+let objfile_start
+ (e:Il.emitter)
+ ~(start_fixup:fixup)
+ ~(rust_start_fixup:fixup)
+ ~(main_fn_fixup:fixup)
+ ~(crate_fixup:fixup)
+ ~(indirect_start:bool)
+ : unit =
+ let ebp_n = word_n (Il.Hreg ebp) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let push_pos32 = push_pos32 e in
+ Il.emit_full e (Some start_fixup) [] Il.Dead;
+ save_callee_saves e;
+ mov (rc ebp) (ro esp);
+
+ (* If we're very lucky, the platform will have left us with
+ * something sensible in the startup stack like so:
+ *
+ * *ebp+24 = [arg1 ] = argv
+ * *ebp+20 = [arg0 ] = argc
+ * *ebp+16 = [retpc ]
+ * *ebp+12 = [old_ebp]
+ * *ebp+8 = [old_edi]
+ * *ebp+4 = [old_esi]
+ * *ebp = [old_ebx]
+ *
+ * This is not the case everywhere, but we start with this
+ * assumption and correct it in the runtime library.
+ *)
+
+ (* Copy argv. *)
+ mov (rc eax) (c (ebp_n (2 + n_callee_saves)));
+ Il.emit e (Il.Push (ro eax));
+
+ (* Copy argc. *)
+ mov (rc eax) (c (ebp_n (1 + n_callee_saves)));
+ Il.emit e (Il.Push (ro eax));
+
+ push_pos32 crate_fixup;
+ push_pos32 main_fn_fixup;
+ let fptr =
+ Abi.load_fixup_codeptr e (h eax) rust_start_fixup true indirect_start
+ in
+ Il.emit e (Il.call (rc eax) fptr);
+ Il.emit e (Il.Pop (rc ecx));
+ Il.emit e (Il.Pop (rc ecx));
+ Il.emit e (Il.Pop (rc ecx));
+ Il.emit e (Il.Pop (rc ecx));
+ Il.emit e (Il.umov (rc esp) (ro ebp));
+ restore_callee_saves e;
+ Il.emit e Il.Ret;
+;;
+
+let (abi:Abi.abi) =
+ {
+ Abi.abi_word_sz = word_sz;
+ Abi.abi_word_bits = word_bits;
+ Abi.abi_word_ty = word_ty;
+
+ Abi.abi_is_2addr_machine = true;
+ Abi.abi_has_pcrel_data = false;
+ Abi.abi_has_pcrel_code = true;
+
+ Abi.abi_n_hardregs = n_hardregs;
+ Abi.abi_str_of_hardreg = reg_str;
+ Abi.abi_prealloc_quad = prealloc_quad;
+ Abi.abi_constrain_vregs = constrain_vregs;
+
+ Abi.abi_emit_fn_prologue = fn_prologue;
+ Abi.abi_emit_fn_epilogue = fn_epilogue;
+ Abi.abi_emit_fn_tail_call = fn_tail_call;
+ Abi.abi_clobbers = clobbers;
+
+ Abi.abi_emit_native_call = emit_native_call;
+ Abi.abi_emit_native_void_call = emit_native_void_call;
+ Abi.abi_emit_native_call_in_thunk = emit_native_call_in_thunk;
+ Abi.abi_emit_inline_memcpy = inline_memcpy;
+
+ Abi.abi_activate = activate_glue;
+ Abi.abi_yield = yield_glue;
+ Abi.abi_unwind = unwind_glue;
+ Abi.abi_get_next_pc_thunk = Some get_next_pc_thunk;
+
+ Abi.abi_sp_reg = (Il.Hreg esp);
+ Abi.abi_fp_reg = (Il.Hreg ebp);
+ Abi.abi_dwarf_fp_reg = dwarf_ebp;
+ Abi.abi_tp_cell = task_ptr;
+ Abi.abi_frame_base_sz = frame_base_sz;
+ Abi.abi_frame_info_sz = frame_info_sz;
+ Abi.abi_implicit_args_sz = implicit_args_sz;
+ Abi.abi_spill_slot = spill_slot;
+ }
+
+
+(*
+ * NB: factor the instruction selector often. There's lots of
+ * semi-redundancy in the ISA.
+ *)
+
+
+let imm_is_signed_byte (n:int64) : bool =
+ (i64_le (-128L) n) && (i64_le n 127L)
+;;
+
+let imm_is_unsigned_byte (n:int64) : bool =
+ (i64_le (0L) n) && (i64_le n 255L)
+;;
+
+
+let rm_r (c:Il.cell) (r:int) : Asm.frag =
+ let reg_ebp = 6 in
+ let reg_esp = 7 in
+
+ (*
+ * We do a little contortion here to accommodate the special case of
+ * being asked to form esp-relative addresses; these require SIB
+ * bytes on x86. Of course!
+ *)
+ let sib_esp_base = Asm.BYTE 0x24 in
+ let seq1 rm modrm =
+ if rm = reg_esp
+ then Asm.SEQ [| modrm; sib_esp_base |]
+ else modrm
+ in
+ let seq2 rm modrm disp =
+ if rm = reg_esp
+ then Asm.SEQ [| modrm; sib_esp_base; disp |]
+ else Asm.SEQ [| modrm; disp |]
+ in
+
+ match c with
+ Il.Reg ((Il.Hreg rm), _) ->
+ Asm.BYTE (modrm_reg (reg rm) r)
+ | Il.Mem (a, _) ->
+ begin
+ match a with
+ Il.Abs disp ->
+ Asm.SEQ [| Asm.BYTE (modrm_deref_disp32 r);
+ Asm.WORD (TY_i32, disp) |]
+
+ | Il.RegIn ((Il.Hreg rm), None) when rm != reg_ebp ->
+ seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r))
+
+ | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM 0L))
+ when rm != reg_ebp ->
+ seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r))
+
+ (* The next two are just to save the relaxation system some
+ * churn.
+ *)
+
+ | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n))
+ when imm_is_signed_byte n ->
+ seq2 rm
+ (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r))
+ (Asm.WORD (TY_i8, Asm.IMM n))
+
+ | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n)) ->
+ seq2 rm
+ (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r))
+ (Asm.WORD (TY_i32, Asm.IMM n))
+
+ | Il.RegIn ((Il.Hreg rm), Some disp) ->
+ Asm.new_relaxation
+ [|
+ seq2 rm
+ (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r))
+ (Asm.WORD (TY_i32, disp));
+ seq2 rm
+ (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r))
+ (Asm.WORD (TY_i8, disp))
+ |]
+ | _ -> raise Unrecognized
+ end
+ | _ -> raise Unrecognized
+;;
+
+
+let insn_rm_r (op:int) (c:Il.cell) (r:int) : Asm.frag =
+ Asm.SEQ [| Asm.BYTE op; rm_r c r |]
+;;
+
+
+let insn_rm_r_imm
+ (op:int)
+ (c:Il.cell)
+ (r:int)
+ (ty:ty_mach)
+ (i:Asm.expr64)
+ : Asm.frag =
+ Asm.SEQ [| Asm.BYTE op; rm_r c r; Asm.WORD (ty, i) |]
+;;
+
+let insn_rm_r_imm_s8_s32
+ (op8:int)
+ (op32:int)
+ (c:Il.cell)
+ (r:int)
+ (i:Asm.expr64)
+ : Asm.frag =
+ match i with
+ Asm.IMM n when imm_is_signed_byte n ->
+ insn_rm_r_imm op8 c r TY_i8 i
+ | _ ->
+ Asm.new_relaxation
+ [|
+ insn_rm_r_imm op32 c r TY_i32 i;
+ insn_rm_r_imm op8 c r TY_i8 i
+ |]
+;;
+
+let insn_rm_r_imm_u8_u32
+ (op8:int)
+ (op32:int)
+ (c:Il.cell)
+ (r:int)
+ (i:Asm.expr64)
+ : Asm.frag =
+ match i with
+ Asm.IMM n when imm_is_unsigned_byte n ->
+ insn_rm_r_imm op8 c r TY_u8 i
+ | _ ->
+ Asm.new_relaxation
+ [|
+ insn_rm_r_imm op32 c r TY_u32 i;
+ insn_rm_r_imm op8 c r TY_u8 i
+ |]
+;;
+
+
+let insn_pcrel_relax
+ (op8_frag:Asm.frag)
+ (op32_frag:Asm.frag)
+ (fix:fixup)
+ : Asm.frag =
+ let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in
+ let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in
+ let pcrel_expr = (Asm.SUB (Asm.M_POS fix,
+ Asm.M_POS pcrel_mark_fixup))
+ in
+ Asm.new_relaxation
+ [|
+ Asm.SEQ [| op32_frag; Asm.WORD (TY_i32, pcrel_expr); def |];
+ Asm.SEQ [| op8_frag; Asm.WORD (TY_i8, pcrel_expr); def |];
+ |]
+;;
+
+let insn_pcrel_simple (op32:int) (fix:fixup) : Asm.frag =
+ let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in
+ let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in
+ let pcrel_expr = (Asm.SUB (Asm.M_POS fix,
+ Asm.M_POS pcrel_mark_fixup))
+ in
+ Asm.SEQ [| Asm.BYTE op32; Asm.WORD (TY_i32, pcrel_expr); def |]
+;;
+
+let insn_pcrel (op8:int) (op32:int) (fix:fixup) : Asm.frag =
+ insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTE op32) fix
+;;
+
+let insn_pcrel_prefix32
+ (op8:int)
+ (prefix32:int)
+ (op32:int)
+ (fix:fixup)
+ : Asm.frag =
+ insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTES [| prefix32; op32 |]) fix
+;;
+
+(* FIXME: tighten imm-based dispatch by imm type. *)
+let cmp (a:Il.operand) (b:Il.operand) : Asm.frag =
+ match (a,b) with
+ (Il.Cell c, Il.Imm (i, TY_i8)) when is_rm8 c ->
+ insn_rm_r_imm 0x80 c slash7 TY_i8 i
+ | (Il.Cell c, Il.Imm (i, TY_u8)) when is_rm8 c ->
+ insn_rm_r_imm 0x80 c slash7 TY_u8 i
+ | (Il.Cell c, Il.Imm (i, _)) when is_rm32 c ->
+ (*
+ * NB: We can't switch on signed-ness here, as 'cmp' is
+ * defined to sign-extend its operand; i.e. we have to treat
+ * it as though you're emitting a signed byte (in the sense of
+ * immediate-size selection) even if the incoming value is
+ * unsigned.
+ *)
+ insn_rm_r_imm_s8_s32 0x83 0x81 c slash7 i
+ | (Il.Cell c, Il.Cell (Il.Reg (Il.Hreg r, _))) ->
+ insn_rm_r 0x39 c (reg r)
+ | (Il.Cell (Il.Reg (Il.Hreg r, _)), Il.Cell c) ->
+ insn_rm_r 0x3b c (reg r)
+ | _ -> raise Unrecognized
+;;
+
+let zero (dst:Il.cell) (count:Il.operand) : Asm.frag =
+ match (dst, count) with
+
+ ((Il.Mem (Il.RegIn ((Il.Hreg dst_ptr), None), _)),
+ Il.Cell (Il.Reg ((Il.Hreg count), _)))
+ when dst_ptr = edi && count = ecx ->
+ Asm.BYTES [|
+ 0xb0; 0x0; (* mov %eax, 0 : move a zero into al. *)
+ 0xf3; 0xaa; (* rep stos m8 : fill ecx bytes at [edi] with al *)
+ |]
+
+ | _ -> raise Unrecognized
+;;
+
+let mov (signed:bool) (dst:Il.cell) (src:Il.operand) : Asm.frag =
+ if is_ty8 (Il.cell_scalar_ty dst) || is_ty8 (Il.operand_scalar_ty src)
+ then
+ begin
+ (match dst with
+ Il.Reg (Il.Hreg r, _)
+ -> assert (is_ok_r8 r) | _ -> ());
+ (match src with
+ Il.Cell (Il.Reg (Il.Hreg r, _))
+ -> assert (is_ok_r8 r) | _ -> ());
+ end;
+
+ match (signed, dst, src) with
+
+ (* m8 <- r??, r8 or truncate(r32). *)
+ (_, _, Il.Cell (Il.Reg ((Il.Hreg r), _)))
+ when is_m8 dst ->
+ insn_rm_r 0x88 dst (reg r)
+
+ (* r8 <- r8: treat as r32 <- r32. *)
+ | (_, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell)
+ when is_r8 dst && is_r8 src_cell ->
+ insn_rm_r 0x8b src_cell (reg r)
+
+ (* rm32 <- r32 *)
+ | (_, _, Il.Cell (Il.Reg ((Il.Hreg r), src_ty)))
+ when (is_r8 dst || is_rm32 dst) && is_ty32 src_ty ->
+ insn_rm_r 0x89 dst (reg r)
+
+ (* r32 <- rm32 *)
+ | (_, (Il.Reg ((Il.Hreg r), dst_ty)), Il.Cell src_cell)
+ when is_ty32 dst_ty && is_rm32 src_cell ->
+ insn_rm_r 0x8b src_cell (reg r)
+
+ (* MOVZX: r8/r32 <- zx(rm8) *)
+ | (false, Il.Reg ((Il.Hreg r, _)), Il.Cell src_cell)
+ when (is_r8 dst || is_r32 dst) && is_rm8 src_cell ->
+ Asm.SEQ [| Asm.BYTE 0x0f;
+ insn_rm_r 0xb6 src_cell (reg r) |]
+
+ (* MOVZX: m32 <- zx(r8) *)
+ | (false, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell)))
+ when (is_m32 dst) && is_r8 src_cell ->
+ (* Fake with 2 insns:
+ *
+ * movzx r32 <- r8; (in-place zero-extension)
+ * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL)
+ *)
+ Asm.SEQ [| Asm.BYTE 0x0f;
+ insn_rm_r 0xb6 src_cell (reg r);
+ insn_rm_r 0x89 dst (reg r);
+ |]
+
+ (* MOVSX: r8/r32 <- sx(rm8) *)
+ | (true, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell)
+ when (is_r8 dst || is_r32 dst) && is_rm8 src_cell ->
+ Asm.SEQ [| Asm.BYTE 0x0f;
+ insn_rm_r 0xbe src_cell (reg r) |]
+
+ (* MOVSX: m32 <- sx(r8) *)
+ | (true, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell)))
+ when (is_m32 dst) && is_r8 src_cell ->
+ (* Fake with 2 insns:
+ *
+ * movsx r32 <- r8; (in-place sign-extension)
+ * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL)
+ *)
+ Asm.SEQ [| Asm.BYTE 0x0f;
+ insn_rm_r 0xbe src_cell (reg r);
+ insn_rm_r 0x89 dst (reg r);
+ |]
+
+ (* m8 <- imm8 (signed) *)
+ | (_, _, Il.Imm ((Asm.IMM n), _))
+ when is_m8 dst && imm_is_signed_byte n && signed ->
+ insn_rm_r_imm 0xc6 dst slash0 TY_i8 (Asm.IMM n)
+
+ (* m8 <- imm8 (unsigned) *)
+ | (_, _, Il.Imm ((Asm.IMM n), _))
+ when is_m8 dst && imm_is_unsigned_byte n && (not signed) ->
+ insn_rm_r_imm 0xc6 dst slash0 TY_u8 (Asm.IMM n)
+
+ (* rm32 <- imm32 *)
+ | (_, _, Il.Imm (i, _)) when is_rm32 dst || is_r8 dst ->
+ let t = if signed then TY_u32 else TY_i32 in
+ insn_rm_r_imm 0xc7 dst slash0 t i
+
+ | _ -> raise Unrecognized
+;;
+
+
+let lea (dst:Il.cell) (src:Il.operand) : Asm.frag =
+ match (dst, src) with
+ (Il.Reg ((Il.Hreg r), dst_ty),
+ Il.Cell (Il.Mem (mem, _)))
+ when is_ty32 dst_ty ->
+ insn_rm_r 0x8d (Il.Mem (mem, Il.OpaqueTy)) (reg r)
+
+ | (Il.Reg ((Il.Hreg r), dst_ty),
+ Il.ImmPtr (fix, _))
+ when is_ty32 dst_ty && r = eax ->
+ let anchor = new_fixup "anchor" in
+ let fix_off = Asm.SUB ((Asm.M_POS fix),
+ (Asm.M_POS anchor))
+ in
+ (* NB: These instructions must come as a
+ * cluster, w/o any separation.
+ *)
+ Asm.SEQ [|
+ insn_pcrel_simple 0xe8 get_next_pc_thunk_fixup;
+ Asm.DEF (anchor, insn_rm_r_imm 0x81 dst slash0 TY_i32 fix_off);
+ |]
+
+ | _ -> raise Unrecognized
+;;
+
+
+let select_insn_misc (q:Il.quad') : Asm.frag =
+
+ match q with
+ Il.Call c ->
+ begin
+ match c.Il.call_dst with
+ Il.Reg ((Il.Hreg dst), _) when dst = eax ->
+ begin
+ match c.Il.call_targ with
+
+ Il.CodePtr (Il.Cell c)
+ when Il.cell_referent_ty c
+ = Il.ScalarTy (Il.AddrTy Il.CodeTy) ->
+ insn_rm_r 0xff c slash2
+
+ | Il.CodePtr (Il.ImmPtr (f, Il.CodeTy)) ->
+ insn_pcrel_simple 0xe8 f
+
+ | _ -> raise Unrecognized
+ end
+ | _ -> raise Unrecognized
+ end
+
+ | Il.Push (Il.Cell (Il.Reg ((Il.Hreg r), t))) when is_ty32 t ->
+ Asm.BYTE (0x50 + (reg r))
+
+ | Il.Push (Il.Cell c) when is_rm32 c ->
+ insn_rm_r 0xff c slash6
+
+ | Il.Push (Il.Imm (Asm.IMM i, _)) when imm_is_unsigned_byte i ->
+ Asm.SEQ [| Asm.BYTE 0x6a; Asm.WORD (TY_u8, (Asm.IMM i)) |]
+
+ | Il.Push (Il.Imm (i, _)) ->
+ Asm.SEQ [| Asm.BYTE 0x68; Asm.WORD (TY_u32, i) |]
+
+ | Il.Pop (Il.Reg ((Il.Hreg r), t)) when is_ty32 t ->
+ Asm.BYTE (0x58 + (reg r))
+
+ | Il.Pop c when is_rm32 c ->
+ insn_rm_r 0x8f c slash0
+
+ | Il.Ret -> Asm.BYTE 0xc3
+
+ | Il.Jmp j ->
+ begin
+ match (j.Il.jmp_op, j.Il.jmp_targ) with
+
+ (Il.JMP, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) ->
+ insn_pcrel 0xeb 0xe9 f
+
+ | (Il.JMP, Il.CodePtr (Il.Cell c))
+ when Il.cell_referent_ty c
+ = Il.ScalarTy (Il.AddrTy Il.CodeTy) ->
+ insn_rm_r 0xff c slash4
+
+ (* FIXME: refactor this to handle cell-based jumps
+ * if we ever need them. So far not. *)
+ | (_, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) ->
+ let (op8, op32) =
+ match j.Il.jmp_op with
+ | Il.JC -> (0x72, 0x82)
+ | Il.JNC -> (0x73, 0x83)
+ | Il.JZ -> (0x74, 0x84)
+ | Il.JNZ -> (0x75, 0x85)
+ | Il.JO -> (0x70, 0x80)
+ | Il.JNO -> (0x71, 0x81)
+ | Il.JE -> (0x74, 0x84)
+ | Il.JNE -> (0x75, 0x85)
+
+ | Il.JL -> (0x7c, 0x8c)
+ | Il.JLE -> (0x7e, 0x8e)
+ | Il.JG -> (0x7f, 0x8f)
+ | Il.JGE -> (0x7d, 0x8d)
+
+ | Il.JB -> (0x72, 0x82)
+ | Il.JBE -> (0x76, 0x86)
+ | Il.JA -> (0x77, 0x87)
+ | Il.JAE -> (0x73, 0x83)
+ | _ -> raise Unrecognized
+ in
+ let prefix32 = 0x0f in
+ insn_pcrel_prefix32 op8 prefix32 op32 f
+
+ | _ -> raise Unrecognized
+ end
+
+ | Il.Dead -> Asm.MARK
+ | Il.Debug -> Asm.BYTES [| 0xcc |] (* int 3 *)
+ | Il.Regfence -> Asm.MARK
+ | Il.End -> Asm.BYTES [| 0x90 |]
+ | Il.Nop -> Asm.BYTES [| 0x90 |]
+ | _ -> raise Unrecognized
+;;
+
+
+type alu_binop_codes =
+ {
+ insn: string;
+ immslash: int; (* mod/rm "slash" code for imm-src variant *)
+ rm_dst_op8: int; (* opcode for 8-bit r/m dst variant *)
+ rm_dst_op32: int; (* opcode for 32-bit r/m dst variant *)
+ rm_src_op8: int; (* opcode for 8-bit r/m src variant *)
+ rm_src_op32: int; (* opcode for 32-bit r/m src variant *)
+ }
+;;
+
+let alu_binop
+ (dst:Il.cell) (src:Il.operand) (codes:alu_binop_codes)
+ : Asm.frag =
+ match (dst, src) with
+ (Il.Reg ((Il.Hreg r), dst_ty), Il.Cell c)
+ when (is_ty32 dst_ty && is_rm32 c) || (is_ty8 dst_ty && is_rm8 c)
+ -> insn_rm_r codes.rm_src_op32 c (reg r)
+
+ | (_, Il.Cell (Il.Reg ((Il.Hreg r), src_ty)))
+ when (is_rm32 dst && is_ty32 src_ty) || (is_rm8 dst && is_ty8 src_ty)
+ -> insn_rm_r codes.rm_dst_op32 dst (reg r)
+
+ | (_, Il.Imm (i, _)) when is_rm32 dst || is_rm8 dst
+ -> insn_rm_r_imm_s8_s32 0x83 0x81 dst codes.immslash i
+
+ | _ -> raise Unrecognized
+;;
+
+
+let mul_like (src:Il.operand) (signed:bool) (slash:int)
+ : Asm.frag =
+ match src with
+ Il.Cell src when is_rm32 src ->
+ insn_rm_r 0xf7 src slash
+
+ | Il.Cell src when is_rm8 src ->
+ insn_rm_r 0xf6 src slash
+
+ | Il.Imm (_, TY_u32)
+ | Il.Imm (_, TY_i32) ->
+ let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits32) in
+ Asm.SEQ [| mov signed tmp src;
+ insn_rm_r 0xf7 tmp slash |]
+
+ | Il.Imm (_, TY_u8)
+ | Il.Imm (_, TY_i8) ->
+ let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits8) in
+ Asm.SEQ [| mov signed tmp src;
+ insn_rm_r 0xf6 tmp slash |]
+
+ | _ -> raise Unrecognized
+;;
+
+
+let select_insn (q:Il.quad) : Asm.frag =
+ match q.Il.quad_body with
+ Il.Unary u ->
+ let unop s =
+ if u.Il.unary_src = Il.Cell u.Il.unary_dst
+ then insn_rm_r 0xf7 u.Il.unary_dst s
+ else raise Unrecognized
+ in
+ begin
+ match u.Il.unary_op with
+ Il.UMOV -> mov false u.Il.unary_dst u.Il.unary_src
+ | Il.IMOV -> mov true u.Il.unary_dst u.Il.unary_src
+ | Il.NEG -> unop slash3
+ | Il.NOT -> unop slash2
+ | Il.ZERO -> zero u.Il.unary_dst u.Il.unary_src
+ end
+
+ | Il.Lea le -> lea le.Il.lea_dst le.Il.lea_src
+
+ | Il.Cmp c -> cmp c.Il.cmp_lhs c.Il.cmp_rhs
+
+ | Il.Binary b ->
+ begin
+ if Il.Cell b.Il.binary_dst = b.Il.binary_lhs
+ then
+ let binop = alu_binop b.Il.binary_dst b.Il.binary_rhs in
+ let mulop = mul_like b.Il.binary_rhs in
+
+ let divop signed slash =
+ Asm.SEQ [|
+ (* xor edx edx, then mul_like. *)
+ insn_rm_r 0x33 (rc edx) (reg edx);
+ mul_like b.Il.binary_rhs signed slash
+ |]
+ in
+
+ let modop signed slash =
+ Asm.SEQ [|
+ (* divop, then mov remainder to eax instead. *)
+ divop signed slash;
+ mov false (rc eax) (ro edx)
+ |]
+ in
+
+ let shiftop slash =
+ let src = b.Il.binary_rhs in
+ let dst = b.Il.binary_dst in
+ let mask i = Asm.AND (i, Asm.IMM 0xffL) in
+ if is_rm8 dst
+ then
+ match src with
+ Il.Imm (i, _) ->
+ insn_rm_r_imm 0xC0 dst slash TY_u8 (mask i)
+ | Il.Cell (Il.Reg ((Il.Hreg r), _))
+ when r = ecx ->
+ Asm.SEQ [| Asm.BYTE 0xD2; rm_r dst slash |]
+ | _ -> raise Unrecognized
+ else
+ match src with
+ Il.Imm (i, _) ->
+ insn_rm_r_imm 0xC1 dst slash TY_u8 (mask i)
+ | Il.Cell (Il.Reg ((Il.Hreg r), _))
+ when r = ecx ->
+ Asm.SEQ [| Asm.BYTE 0xD3; rm_r dst slash |]
+ | _ -> raise Unrecognized
+ in
+
+ match (b.Il.binary_dst, b.Il.binary_op) with
+ (_, Il.ADD) -> binop { insn="ADD";
+ immslash=slash0;
+ rm_dst_op8=0x0;
+ rm_dst_op32=0x1;
+ rm_src_op8=0x2;
+ rm_src_op32=0x3; }
+ | (_, Il.SUB) -> binop { insn="SUB";
+ immslash=slash5;
+ rm_dst_op8=0x28;
+ rm_dst_op32=0x29;
+ rm_src_op8=0x2a;
+ rm_src_op32=0x2b; }
+ | (_, Il.AND) -> binop { insn="AND";
+ immslash=slash4;
+ rm_dst_op8=0x20;
+ rm_dst_op32=0x21;
+ rm_src_op8=0x22;
+ rm_src_op32=0x23; }
+ | (_, Il.OR) -> binop { insn="OR";
+ immslash=slash1;
+ rm_dst_op8=0x08;
+ rm_dst_op32=0x09;
+ rm_src_op8=0x0a;
+ rm_src_op32=0x0b; }
+ | (_, Il.XOR) -> binop { insn="XOR";
+ immslash=slash6;
+ rm_dst_op8=0x30;
+ rm_dst_op32=0x31;
+ rm_src_op8=0x32;
+ rm_src_op32=0x33; }
+
+ | (_, Il.LSL) -> shiftop slash4
+ | (_, Il.LSR) -> shiftop slash5
+ | (_, Il.ASR) -> shiftop slash7
+
+ | (Il.Reg (Il.Hreg r, t), Il.UMUL)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ mulop false slash4
+
+ | (Il.Reg (Il.Hreg r, t), Il.IMUL)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ mulop true slash5
+
+ | (Il.Reg (Il.Hreg r, t), Il.UDIV)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ divop false slash6
+
+ | (Il.Reg (Il.Hreg r, t), Il.IDIV)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ divop true slash7
+
+ | (Il.Reg (Il.Hreg r, t), Il.UMOD)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ modop false slash6
+
+ | (Il.Reg (Il.Hreg r, t), Il.IMOD)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ modop true slash7
+
+ | _ -> raise Unrecognized
+ else raise Unrecognized
+ end
+ | _ -> select_insn_misc q.Il.quad_body
+;;
+
+
+let new_emitter_without_vregs _ : Il.emitter =
+ Il.new_emitter
+ abi.Abi.abi_prealloc_quad
+ abi.Abi.abi_is_2addr_machine
+ false None
+;;
+
+let select_insns (sess:Session.sess) (q:Il.quads) : Asm.frag =
+ let scopes = Stack.create () in
+ let fixups = Stack.create () in
+ let pop_frags _ =
+ Asm.SEQ (Array.of_list
+ (List.rev
+ (!(Stack.pop scopes))))
+ in
+ ignore (Stack.push (ref []) scopes);
+ for i = 0 to (Array.length q) - 1 do
+ let append frag =
+ let frags = Stack.top scopes in
+ frags := frag :: (!frags)
+ in
+ begin
+ match q.(i).Il.quad_fixup with
+ None -> ()
+ | Some f -> append (Asm.DEF (f, Asm.MARK))
+ end;
+ begin
+ match q.(i).Il.quad_body with
+ Il.Enter f ->
+ Stack.push f fixups;
+ Stack.push (ref []) scopes;
+ | Il.Leave ->
+ append (Asm.DEF (Stack.pop fixups, pop_frags ()))
+ | _ ->
+ try
+ append (select_insn q.(i))
+ with
+ Unrecognized ->
+ Session.fail sess
+ "E:Assembly error: unrecognized quad: %s\n%!"
+ (Il.string_of_quad reg_str q.(i));
+ ()
+ end
+ done;
+ pop_frags()
+;;
+
+let frags_of_emitted_quads (sess:Session.sess) (e:Il.emitter) : Asm.frag =
+ let frag = select_insns sess e.Il.emit_quads in
+ if sess.Session.sess_failed
+ then raise Unrecognized
+ else frag
+;;
+
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)