diff options
| author | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
|---|---|---|
| committer | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
| commit | d6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch) | |
| tree | b425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/be/x86.ml | |
| parent | Initial git commit. (diff) | |
| download | rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip | |
Populate tree.
Diffstat (limited to 'src/boot/be/x86.ml')
| -rw-r--r-- | src/boot/be/x86.ml | 2205 |
1 files changed, 2205 insertions, 0 deletions
diff --git a/src/boot/be/x86.ml b/src/boot/be/x86.ml new file mode 100644 index 00000000..01b7e299 --- /dev/null +++ b/src/boot/be/x86.ml @@ -0,0 +1,2205 @@ +(* + * x86/ia32 instructions have 6 parts: + * + * [pre][op][modrm][sib][disp][imm] + * + * [pre] = 0..4 bytes of prefix + * [op] = 1..3 byte opcode + * [modrm] = 0 or 1 byte: [mod:2][reg/op:3][r/m:3] + * [sib] = 0 or 1 byte: [scale:2][index:3][base:3] + * [disp] = 1, 2 or 4 byte displacement + * [imm] = 1, 2 or 4 byte immediate + * + * So between 1 and 17 bytes total. + * + * We're not going to use sib, but modrm is worth discussing. + * + * The high two bits of modrm denote an addressing mode. The modes are: + * + * 00 - "mostly" *(reg) + * 01 - "mostly" *(reg) + disp8 + * 10 - "mostly" *(reg) + disp32 + * 11 - reg + * + * The next-lowest 3 bits denote a specific register, or a subopcode if + * there is a fixed register or only one operand. The instruction format + * reference will say "/<n>" for some number n, if a fixed subopcode is used. + * It'll say "/r" if the instruction uses this field to specify a register. + * + * The registers specified in this field are: + * + * 000 - EAX or XMM0 + * 001 - ECX or XMM1 + * 010 - EDX or XMM2 + * 011 - EBX or XMM3 + * 100 - ESP or XMM4 + * 101 - EBP or XMM5 + * 110 - ESI or XMM6 + * 111 - EDI or XMM7 + * + * The final low 3 bits denote sub-modes of the primary mode selected + * with the top 2 bits. In particular, they "mostly" select the reg that is + * to be used for effective address calculation. + * + * For the most part, these follow the same numbering order: EAX, ECX, EDX, + * EBX, ESP, EBP, ESI, EDI. There are two unusual deviations from the rule + * though: + * + * - In primary modes 00, 01 and 10, r/m=100 means "use SIB byte". You can + * use (unscaled) ESP as the base register in these modes by appending the + * SIB byte 0x24. We do that in our rm_r operand-encoder function. + * + * - In primary mode 00, r/m=101 means "just disp32", no register is + * involved. There is no way to use EBP in primary mode 00. If you try, we + * just decay into a mode 01 with an appended 8-bit immediate displacement. + * + * Some opcodes are written 0xNN +rd. This means "we decided to chew up a + * whole pile of opcodes here, with each opcode including a hard-wired + * reference to a register". For example, POP is "0x58 +rd", which means that + * the 1-byte insns 0x58..0x5f are chewed up for "POP EAX" ... "POP EDI" + * (again, the canonical order of register numberings) + *) + +(* + * Notes on register availability of x86: + * + * There are 8 GPRs but we use 2 of them for specific purposes: + * + * - ESP always points to the current stack frame. + * - EBP always points to the current frame base. + * + * We tell IL that we have 6 GPRs then, and permit most register-register ops + * on any of these 6, mostly-unconstrained. + * + *) + +open Common;; + +exception Unrecognized +;; + +let modrm m rm reg_or_subopcode = + if (((m land 0b11) != m) or + ((rm land 0b111) != rm) or + ((reg_or_subopcode land 0b111) != reg_or_subopcode)) + then raise (Invalid_argument "X86.modrm_deref") + else + ((((m land 0b11) lsl 6) + lor + (rm land 0b111)) + lor + ((reg_or_subopcode land 0b111) lsl 3)) +;; + +let modrm_deref_reg = modrm 0b00 ;; +let modrm_deref_disp32 = modrm 0b00 0b101 ;; +let modrm_deref_reg_plus_disp8 = modrm 0b01 ;; +let modrm_deref_reg_plus_disp32 = modrm 0b10 ;; +let modrm_reg = modrm 0b11 ;; + +let slash0 = 0;; +let slash1 = 1;; +let slash2 = 2;; +let slash3 = 3;; +let slash4 = 4;; +let slash5 = 5;; +let slash6 = 6;; +let slash7 = 7;; + + +(* + * Translate an IL-level hwreg number from 0..nregs into the 3-bit code number + * used through the mod r/m byte and /r sub-register specifiers of the x86 + * ISA. + * + * See "Table 2-2: 32-Bit Addressing Forms with the ModR/M Byte", in the IA32 + * Architecture Software Developer's Manual, volume 2a. + *) + +let eax = 0 +let ecx = 1 +let ebx = 2 +let esi = 3 +let edi = 4 +let edx = 5 +let ebp = 6 +let esp = 7 + +let code_eax = 0b000;; +let code_ecx = 0b001;; +let code_edx = 0b010;; +let code_ebx = 0b011;; +let code_esp = 0b100;; +let code_ebp = 0b101;; +let code_esi = 0b110;; +let code_edi = 0b111;; + +let reg r = + match r with + 0 -> code_eax + | 1 -> code_ecx + | 2 -> code_ebx + | 3 -> code_esi + | 4 -> code_edi + | 5 -> code_edx + (* Never assigned by the register allocator, but synthetic code uses + them *) + | 6 -> code_ebp + | 7 -> code_esp + | _ -> raise (Invalid_argument "X86.reg") +;; + + +let dwarf_eax = 0;; +let dwarf_ecx = 1;; +let dwarf_edx = 2;; +let dwarf_ebx = 3;; +let dwarf_esp = 4;; +let dwarf_ebp = 5;; +let dwarf_esi = 6;; +let dwarf_edi = 7;; + +let dwarf_reg r = + match r with + 0 -> dwarf_eax + | 1 -> dwarf_ecx + | 2 -> dwarf_ebx + | 3 -> dwarf_esi + | 4 -> dwarf_edi + | 5 -> dwarf_edx + | 6 -> dwarf_ebp + | 7 -> dwarf_esp + | _ -> raise (Invalid_argument "X86.dwarf_reg") + +let reg_str r = + match r with + 0 -> "eax" + | 1 -> "ecx" + | 2 -> "ebx" + | 3 -> "esi" + | 4 -> "edi" + | 5 -> "edx" + | 6 -> "ebp" + | 7 -> "esp" + | _ -> raise (Invalid_argument "X86.reg_str") +;; + +(* This is a basic ABI. You might need to customize it by platform. *) +let (n_hardregs:int) = 6;; +let (n_callee_saves:int) = 4;; + + +let is_ty32 (ty:Il.scalar_ty) : bool = + match ty with + Il.ValTy (Il.Bits32) -> true + | Il.AddrTy _ -> true + | _ -> false +;; + +let is_r32 (c:Il.cell) : bool = + match c with + Il.Reg (_, st) -> is_ty32 st + | _ -> false +;; + +let is_rm32 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty32 st + | Il.Reg (_, st) -> is_ty32 st + | _ -> false +;; + +let is_ty8 (ty:Il.scalar_ty) : bool = + match ty with + Il.ValTy (Il.Bits8) -> true + | _ -> false +;; + +let is_m32 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty32 st + | _ -> false +;; + +let is_m8 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty8 st + | _ -> false +;; + +let is_ok_r8 (r:Il.hreg) : bool = + (r == eax || r == ebx || r == ecx || r == edx) +;; + +let is_r8 (c:Il.cell) : bool = + match c with + Il.Reg (Il.Hreg r, st) when is_ok_r8 r -> is_ty8 st + | _ -> false +;; + +let is_rm8 (c:Il.cell) : bool = + match c with + Il.Mem (_, Il.ScalarTy st) -> is_ty8 st + | _ -> is_r8 c +;; + +let prealloc_quad (quad':Il.quad') : Il.quad' = + let target_cell reg c = + Il.Reg (Il.Hreg reg, Il.cell_scalar_ty c) + in + let target_operand reg op = + Il.Cell (Il.Reg (Il.Hreg reg, Il.operand_scalar_ty op)) + in + + let target_bin_to_hreg bin dst src = + { bin with + Il.binary_rhs = target_operand src bin.Il.binary_rhs; + Il.binary_lhs = target_operand dst bin.Il.binary_lhs; + Il.binary_dst = target_cell dst bin.Il.binary_dst } + in + + let target_cmp cmp = + match cmp.Il.cmp_lhs with + (* Immediate LHS we force to eax. *) + Il.Imm _ -> + { cmp with + Il.cmp_lhs = target_operand eax cmp.Il.cmp_lhs } + | _ -> cmp + in + + match quad' with + Il.Binary bin -> + begin + Il.Binary + begin + match bin.Il.binary_op with + Il.IMUL | Il.UMUL + | Il.IDIV | Il.UDIV -> target_bin_to_hreg bin eax ecx + | Il.IMOD | Il.UMOD -> target_bin_to_hreg bin eax ecx + | _ -> bin + end + end + + | Il.Cmp cmp -> Il.Cmp (target_cmp cmp) + + | Il.Call c -> + let ty = Il.cell_scalar_ty c.Il.call_dst in + Il.Call { c with + Il.call_dst = Il.Reg ((Il.Hreg eax), ty) } + + | Il.Lea le -> + begin + match (le.Il.lea_dst, le.Il.lea_src) with + (Il.Reg (_, dst_ty), Il.ImmPtr _) + when is_ty32 dst_ty -> + Il.Lea { le with + Il.lea_dst = Il.Reg (Il.Hreg eax, dst_ty) } + | _ -> quad' + end + + | x -> x +;; + +let constrain_vregs (q:Il.quad) (hregs:Bits.t array) : unit = + + let involves_8bit_cell = + let b = ref false in + let qp_cell _ c = + match c with + Il.Reg (_, Il.ValTy Il.Bits8) + | Il.Mem (_, Il.ScalarTy (Il.ValTy Il.Bits8)) -> + (b := true; c) + | _ -> c + in + ignore (Il.process_quad { Il.identity_processor with + Il.qp_cell_read = qp_cell; + Il.qp_cell_write = qp_cell } q); + !b + in + + let qp_mem _ m = m in + let qp_cell _ c = + begin + match c with + Il.Reg (Il.Vreg v, _) when involves_8bit_cell -> + (* 8-bit register cells must only be al, cl, dl, bl. + * Not esi/edi. *) + let hv = hregs.(v) in + List.iter (fun bad -> Bits.set hv bad false) [esi; edi] + | _ -> () + end; + c + in + begin + match q.Il.quad_body with + Il.Binary b -> + begin + match b.Il.binary_op with + (* Shifts *) + | Il.LSL | Il.LSR | Il.ASR -> + begin + match b.Il.binary_rhs with + Il.Cell (Il.Reg (Il.Vreg v, _)) -> + let hv = hregs.(v) in + (* Shift src has to be ecx. *) + List.iter + (fun bad -> Bits.set hv bad false) + [eax; edx; ebx; esi; edi] + | _ -> () + end + | _ -> () + end + | _ -> () + end; + ignore + (Il.process_quad { Il.identity_processor with + Il.qp_mem = qp_mem; + Il.qp_cell_read = qp_cell; + Il.qp_cell_write = qp_cell } q) +;; + + +let clobbers (quad:Il.quad) : Il.hreg list = + match quad.Il.quad_body with + Il.Binary bin -> + begin + match bin.Il.binary_op with + Il.IMUL | Il.UMUL + | Il.IDIV | Il.UDIV -> [ edx ] + | Il.IMOD | Il.UMOD -> [ edx ] + | _ -> [] + end + | Il.Unary un -> + begin + match un.Il.unary_op with + Il.ZERO -> [ eax; edi; ecx ] + | _ -> [ ] + end + | Il.Call _ -> [ eax; ecx; edx; ] + | Il.Regfence -> [ eax; ecx; ebx; edx; edi; esi; ] + | _ -> [] +;; + + +let word_sz = 4L +;; + +let word_bits = Il.Bits32 +;; + +let word_ty = TY_u32 +;; + +let annotate (e:Il.emitter) (str:string) = + Hashtbl.add e.Il.emit_annotations e.Il.emit_pc str +;; + +let c (c:Il.cell) : Il.operand = Il.Cell c ;; +let r (r:Il.reg) : Il.cell = Il.Reg ( r, (Il.ValTy word_bits) ) ;; +let h (x:Il.hreg) : Il.reg = Il.Hreg x ;; +let rc (x:Il.hreg) : Il.cell = r (h x) ;; +let ro (x:Il.hreg) : Il.operand = c (rc x) ;; +let vreg (e:Il.emitter) : (Il.reg * Il.cell) = + let vr = Il.next_vreg e in + (vr, (Il.Reg (vr, (Il.ValTy word_bits)))) +;; +let imm (x:Asm.expr64) : Il.operand = + Il.Imm (x, word_ty) +;; +let immi (x:int64) : Il.operand = + imm (Asm.IMM x) +;; + +let imm_byte (x:Asm.expr64) : Il.operand = + Il.Imm (x, TY_u8) +;; +let immi_byte (x:int64) : Il.operand = + imm_byte (Asm.IMM x) +;; + + +let byte_off_n (i:int) : Asm.expr64 = + Asm.IMM (Int64.of_int i) +;; + +let byte_n (reg:Il.reg) (i:int) : Il.cell = + let imm = byte_off_n i in + let mem = Il.RegIn (reg, Some imm) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8)) +;; + +let word_off_n (i:int) : Asm.expr64 = + Asm.IMM (Int64.mul (Int64.of_int i) word_sz) +;; + +let word_at (reg:Il.reg) : Il.cell = + let mem = Il.RegIn (reg, None) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits)) +;; + +let word_at_off (reg:Il.reg) (off:Asm.expr64) : Il.cell = + let mem = Il.RegIn (reg, Some off) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits)) +;; + +let word_n (reg:Il.reg) (i:int) : Il.cell = + word_at_off reg (word_off_n i) +;; + +let reg_codeptr (reg:Il.reg) : Il.code = + Il.CodePtr (Il.Cell (Il.Reg (reg, Il.AddrTy Il.CodeTy))) +;; + +let word_n_low_byte (reg:Il.reg) (i:int) : Il.cell = + let imm = word_off_n i in + let mem = Il.RegIn (reg, Some imm) in + Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8)) +;; + +let wordptr_n (reg:Il.reg) (i:int) : Il.cell = + let imm = word_off_n i in + let mem = Il.RegIn (reg, Some imm) in + Il.Mem (mem, Il.ScalarTy (Il.AddrTy (Il.ScalarTy (Il.ValTy word_bits)))) +;; + +let get_element_ptr = Il.get_element_ptr word_bits reg_str ;; + +let save_callee_saves (e:Il.emitter) : unit = + Il.emit e (Il.Push (ro ebp)); + Il.emit e (Il.Push (ro edi)); + Il.emit e (Il.Push (ro esi)); + Il.emit e (Il.Push (ro ebx)); +;; + + +let restore_callee_saves (e:Il.emitter) : unit = + Il.emit e (Il.Pop (rc ebx)); + Il.emit e (Il.Pop (rc esi)); + Il.emit e (Il.Pop (rc edi)); + Il.emit e (Il.Pop (rc ebp)); +;; + + +(* restores registers from the frame base without updating esp: + * - sets ebp, edi, esi, ebx to stored values from frame base + * - sets `retpc' register to stored retpc from frame base + * - sets `base' register to current fp + *) +let restore_frame_base (e:Il.emitter) (base:Il.reg) (retpc:Il.reg) : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + mov (r base) (ro ebp); + mov (rc ebx) (c (word_at base)); + mov (rc esi) (c (word_n base 1)); + mov (rc edi) (c (word_n base 2)); + mov (rc ebp) (c (word_n base 3)); + mov (r retpc) (c (word_n base 4)); +;; + + +(* + * Our arrangement on x86 is this: + * + * *ebp+20+(4*N) = [argN ] + * ... + * *ebp+24 = [arg1 ] = task ptr + * *ebp+20 = [arg0 ] = out ptr + * *ebp+16 = [retpc ] + * *ebp+12 = [old_ebp] + * *ebp+8 = [old_edi] + * *ebp+4 = [old_esi] + * *ebp = [old_ebx] + * + * For x86-cdecl: + * + * %eax, %ecx, %edx are "caller save" registers + * %ebp, %ebx, %esi, %edi are "callee save" registers + * + *) + +let frame_base_words = 5 (* eip,ebp,edi,esi,ebx *) ;; +let frame_base_sz = Int64.mul (Int64.of_int frame_base_words) word_sz;; + +let frame_info_words = 2 (* crate ptr, crate-rel frame info disp *) ;; +let frame_info_sz = Int64.mul (Int64.of_int frame_info_words) word_sz;; + +let implicit_arg_words = 2 (* task ptr,out ptr *);; +let implicit_args_sz = Int64.mul (Int64.of_int implicit_arg_words) word_sz;; + +let out_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words);; +let task_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words+1);; +let ty_param_n i = + wordptr_n (Il.Hreg ebp) (frame_base_words + implicit_arg_words + i);; + +let spill_slot (i:Il.spill) : Il.mem = + let imm = (Asm.IMM + (Int64.neg + (Int64.add frame_info_sz + (Int64.mul word_sz + (Int64.of_int (i+1)))))) + in + Il.RegIn ((Il.Hreg ebp), Some imm) +;; + + +let get_next_pc_thunk_fixup = new_fixup "glue$get_next_pc" +;; + +let emit_get_next_pc_thunk (e:Il.emitter) : unit = + let sty = Il.AddrTy Il.CodeTy in + let rty = Il.ScalarTy sty in + let deref_esp = Il.Mem (Il.RegIn (Il.Hreg esp, None), rty) in + let eax = (Il.Reg (Il.Hreg eax, sty)) in + Il.emit_full e (Some get_next_pc_thunk_fixup) [] + (Il.umov eax (Il.Cell deref_esp)); + Il.emit e Il.Ret; +;; + +let get_next_pc_thunk : (Il.reg * fixup * (Il.emitter -> unit)) = + (Il.Hreg eax, get_next_pc_thunk_fixup, emit_get_next_pc_thunk) +;; + +let emit_c_call + (e:Il.emitter) + (ret:Il.cell) + (tmp1:Il.reg) + (tmp2:Il.reg) + (nabi:nabi) + (in_prologue:bool) + (fptr:Il.code) + (args:Il.operand array) + : unit = + + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in + + (* rust calls get task as arg0 *) + let args = + if nabi.nabi_convention = CONV_rust + then Array.append [| c task_ptr |] args + else args + in + let nargs = Array.length args in + let arg_sz = Int64.mul (Int64.of_int nargs) word_sz + in + + mov (r tmp1) (c task_ptr); (* tmp1 = task from argv[-1] *) + mov (r tmp2) (ro esp); (* tmp2 = esp *) + mov (* task->rust_sp = tmp2 *) + (word_n tmp1 Abi.task_field_rust_sp) + (c (r tmp2)); + mov (* esp = task->runtime_sp *) + (rc esp) + (c (word_n tmp1 Abi.task_field_runtime_sp)); + + binary Il.SUB (rc esp) arg_sz; (* make room on the stack *) + binary Il.AND (rc esp) (* and 16-byte align sp *) + 0xfffffffffffffff0L; + + Array.iteri + begin + fun i (arg:Il.operand) -> (* write args to C stack *) + match arg with + Il.Cell (Il.Mem (a, ty)) -> + begin + match a with + Il.RegIn (Il.Hreg base, off) when base == esp -> + mov (r tmp1) (c (Il.Mem (Il.RegIn (tmp2, off), ty))); + mov (word_n (h esp) i) (c (r tmp1)); + | _ -> + mov (r tmp1) arg; + mov (word_n (h esp) i) (c (r tmp1)); + end + | _ -> + mov (word_n (h esp) i) arg + end + args; + + match ret with + Il.Mem (Il.RegIn (Il.Hreg base, _), _) when base == esp -> + assert (not in_prologue); + + (* If ret is esp-relative, use a temporary register until we + switched stacks. *) + + emit (Il.call (r tmp1) fptr); + mov (r tmp2) (c task_ptr); + mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp)); + mov ret (c (r tmp1)); + + | _ when in_prologue -> + (* + * We have to do something a little surprising here: + * we're doing a 'grow' call so ebp is going to point + * into a dead stack frame on call-return. So we + * temporarily store task-ptr into ebp and then reload + * esp *and* ebp via ebp->rust_sp on the other side of + * the call. + *) + mov (rc ebp) (c task_ptr); + emit (Il.call ret fptr); + mov (rc esp) (c (word_n (h ebp) Abi.task_field_rust_sp)); + mov (rc ebp) (ro esp); + + | _ -> + emit (Il.call ret fptr); + mov (r tmp2) (c task_ptr); + mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp)); +;; + +let emit_void_prologue_call + (e:Il.emitter) + (nabi:nabi) + (fn:fixup) + (args:Il.operand array) + : unit = + let callee = Abi.load_fixup_codeptr e (h eax) fn true nabi.nabi_indirect in + emit_c_call e (rc eax) (h edx) (h ecx) nabi true callee args +;; + +let emit_native_call + (e:Il.emitter) + (ret:Il.cell) + (nabi:nabi) + (fn:fixup) + (args:Il.operand array) + : unit = + + let (tmp1, _) = vreg e in + let (tmp2, _) = vreg e in + let (freg, _) = vreg e in + let callee = Abi.load_fixup_codeptr e freg fn true nabi.nabi_indirect in + emit_c_call e ret tmp1 tmp2 nabi false callee args +;; + +let emit_native_void_call + (e:Il.emitter) + (nabi:nabi) + (fn:fixup) + (args:Il.operand array) + : unit = + + let (ret, _) = vreg e in + emit_native_call e (r ret) nabi fn args +;; + +let emit_native_call_in_thunk + (e:Il.emitter) + (ret:Il.cell) + (nabi:nabi) + (fn:Il.operand) + (args:Il.operand array) + : unit = + + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + + begin + match fn with + (* + * NB: old path, remove when/if you're sure you don't + * want native-linker-symbol-driven requirements. + *) + Il.ImmPtr (fix, _) -> + let code = + Abi.load_fixup_codeptr e (h eax) fix true nabi.nabi_indirect + in + emit_c_call e (rc eax) (h edx) (h ecx) nabi false code args; + + | _ -> + (* + * NB: new path, ignores nabi_indirect, assumes + * indirect via pointer from upcall_require_c_sym + * or crate cache. + *) + mov (rc eax) fn; + let cell = Il.Reg (h eax, Il.AddrTy Il.CodeTy) in + let fptr = Il.CodePtr (Il.Cell cell) in + emit_c_call e (rc eax) (h edx) (h ecx) nabi false fptr args; + end; + + match ret with + Il.Reg (r, _) -> mov (word_at r) (ro eax) + | _ -> mov (rc edx) (c ret); + mov (word_at (h edx)) (ro eax) +;; + +let unwind_glue + (e:Il.emitter) + (nabi:nabi) + (exit_task_fixup:fixup) + : unit = + + let fp_n = word_n (Il.Hreg ebp) in + let edx_n = word_n (Il.Hreg edx) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let push x = emit (Il.Push x) in + let pop x = emit (Il.Pop x) in + let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in + let codefix fix = Il.CodePtr (Il.ImmPtr (fix, Il.CodeTy)) in + let mark fix = Il.emit_full e (Some fix) [] Il.Dead in + let glue_field = Abi.frame_glue_fns_field_drop in + + let repeat_jmp_fix = new_fixup "repeat jump" in + let skip_jmp_fix = new_fixup "skip jump" in + let exit_jmp_fix = new_fixup "exit jump" in + + mov (rc edx) (c task_ptr); (* switch back to rust stack *) + mov + (rc esp) + (c (edx_n Abi.task_field_rust_sp)); + + mark repeat_jmp_fix; + + mov (rc esi) (c (fp_n (-1))); (* esi <- crate ptr *) + mov (rc edx) (c (fp_n (-2))); (* edx <- frame glue functions. *) + emit (Il.cmp (ro edx) (immi 0L)); + + emit + (Il.jmp Il.JE + (codefix skip_jmp_fix)); (* if struct* is nonzero *) + add edx esi; (* add crate ptr to disp. *) + mov + (rc ecx) + (c (edx_n glue_field)); (* ecx <- drop glue *) + emit (Il.cmp (ro ecx) (immi 0L)); + + emit + (Il.jmp Il.JE + (codefix skip_jmp_fix)); (* if glue-fn is nonzero *) + add ecx esi; (* add crate ptr to disp. *) + push (ro ebp); (* frame-to-drop *) + push (c task_ptr); (* form usual call to glue *) + push (immi 0L); (* outptr *) + emit (Il.call (rc eax) + (reg_codeptr (h ecx))); (* call glue_fn, trashing eax. *) + pop (rc eax); + pop (rc eax); + pop (rc eax); + + mark skip_jmp_fix; + mov (rc edx) (c (fp_n 3)); (* load next fp (callee-saves[3]) *) + emit (Il.cmp (ro edx) (immi 0L)); + emit (Il.jmp Il.JE + (codefix exit_jmp_fix)); (* if nonzero *) + mov (rc ebp) (ro edx); (* move to next frame *) + emit (Il.jmp Il.JMP + (codefix repeat_jmp_fix)); (* loop *) + + (* exit path. *) + mark exit_jmp_fix; + + let callee = + Abi.load_fixup_codeptr + e (h eax) exit_task_fixup false nabi.nabi_indirect + in + emit_c_call + e (rc eax) (h edx) (h ecx) nabi false callee [| (c task_ptr) |]; +;; + +(* Puts result in eax; clobbers ecx, edx in the process. *) +let rec calculate_sz (e:Il.emitter) (size:size) : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let push x = emit (Il.Push x) in + let pop x = emit (Il.Pop x) in + let neg x = emit (Il.unary Il.NEG (rc x) (ro x)) in + let bnot x = emit (Il.unary Il.NOT (rc x) (ro x)) in + let band x y = emit (Il.binary Il.AND (rc x) (ro x) (ro y)) in + let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in + let mul x y = emit (Il.binary Il.UMUL (rc x) (ro x) (ro y)) in + let subi x y = emit (Il.binary Il.SUB (rc x) (ro x) (immi y)) in + let eax_gets_a_and_ecx_gets_b a b = + calculate_sz e b; + push (ro eax); + calculate_sz e a; + pop (rc ecx); + in + match size with + SIZE_fixed i -> + mov (rc eax) (immi i) + + | SIZE_fixup_mem_sz f -> + mov (rc eax) (imm (Asm.M_SZ f)) + + | SIZE_fixup_mem_pos f -> + mov (rc eax) (imm (Asm.M_POS f)) + + | SIZE_param_size i -> + mov (rc eax) (Il.Cell (ty_param_n i)); + mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_size)) + + | SIZE_param_align i -> + mov (rc eax) (Il.Cell (ty_param_n i)); + mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_align)) + + | SIZE_rt_neg a -> + calculate_sz e a; + neg eax + + | SIZE_rt_add (a, b) -> + eax_gets_a_and_ecx_gets_b a b; + add eax ecx + + | SIZE_rt_mul (a, b) -> + eax_gets_a_and_ecx_gets_b a b; + mul eax ecx + + | SIZE_rt_max (a, b) -> + eax_gets_a_and_ecx_gets_b a b; + emit (Il.cmp (ro eax) (ro ecx)); + let jmp_pc = e.Il.emit_pc in + emit (Il.jmp Il.JAE Il.CodeNone); + mov (rc eax) (ro ecx); + Il.patch_jump e jmp_pc e.Il.emit_pc; + + | SIZE_rt_align (align, off) -> + (* + * calculate off + pad where: + * + * pad = (align - (off mod align)) mod align + * + * In our case it's always a power of two, + * so we can just do: + * + * mask = align-1 + * off += mask + * off &= ~mask + * + *) + eax_gets_a_and_ecx_gets_b off align; + subi ecx 1L; + add eax ecx; + bnot ecx; + band eax ecx; +;; + +let rec size_calculation_stack_highwater (size:size) : int = + match size with + SIZE_fixed _ + | SIZE_fixup_mem_sz _ + | SIZE_fixup_mem_pos _ + | SIZE_param_size _ + | SIZE_param_align _ -> 0 + | SIZE_rt_neg a -> + (size_calculation_stack_highwater a) + | SIZE_rt_max (a, b) -> + (size_calculation_stack_highwater a) + + (size_calculation_stack_highwater b) + | SIZE_rt_add (a, b) + | SIZE_rt_mul (a, b) + | SIZE_rt_align (a, b) -> + (size_calculation_stack_highwater a) + + (size_calculation_stack_highwater b) + + 1 +;; + +let boundary_sz = + (Asm.IMM + (Int64.add (* Extra non-frame room: *) + frame_base_sz (* to safely enter the next frame, *) + frame_base_sz)) (* and make a 'grow' upcall there. *) +;; + +let stack_growth_check + (e:Il.emitter) + (nabi:nabi) + (grow_task_fixup:fixup) + (growsz:Il.operand) + (grow_jmp:Il.label option) + (restart_pc:Il.label) + (end_reg:Il.reg) (* + * stack limit on entry, + * new stack pointer on exit + *) + (tmp_reg:Il.reg) (* temporary (trashed) *) + : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in + let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in + mov (r tmp_reg) (ro esp); (* tmp = esp *) + sub (r tmp_reg) growsz; (* tmp -= size-request *) + emit (Il.cmp (c (r end_reg)) (c (r tmp_reg))); + (* + * Jump *over* 'grow' upcall on non-underflow: + * if end_reg <= tmp_reg + *) + + let bypass_grow_upcall_jmp_pc = e.Il.emit_pc in + emit (Il.jmp Il.JBE Il.CodeNone); + + begin + match grow_jmp with + None -> () + | Some j -> Il.patch_jump e j e.Il.emit_pc + end; + (* Extract growth-amount from tmp_reg. *) + mov (r end_reg) (ro esp); + sub (r end_reg) (c (r tmp_reg)); + add (r end_reg) (Il.Imm (boundary_sz, word_ty)); + (* Perform 'grow' upcall, then restart frame-entry. *) + emit_void_prologue_call e nabi grow_task_fixup [| c (r end_reg) |]; + emit (Il.jmp Il.JMP (Il.CodeLabel restart_pc)); + Il.patch_jump e bypass_grow_upcall_jmp_pc e.Il.emit_pc +;; + +let fn_prologue + (e:Il.emitter) + (framesz:size) + (callsz:size) + (nabi:nabi) + (grow_task_fixup:fixup) + : unit = + + let esi_n = word_n (h esi) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in + let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in + + (* We may be in a dynamic-sized frame. This makes matters complex, + * as we can't just perform a simple growth check in terms of a + * static size. The check is against a dynamic size, and we need to + * calculate that size. + * + * Unlike size-calculations in 'trans', we do not use vregs to + * calculate the frame size; instead we use a PUSH/POP stack-machine + * translation that doesn't disturb the registers we're + * somewhat-carefully *using* during frame setup. + * + * This only pushes the problem back a little ways though: we still + * need to be sure we have enough room to do the PUSH/POP + * calculation. We refer to this amount of space as the 'primordial' + * frame size, which can *thankfully* be calculated exactly from the + * arithmetic expression we're aiming to calculate. So we make room + * for the primordial frame, run the calculation of the full dynamic + * frame size, then make room *again* for this dynamic size. + * + * Our caller reserved enough room for us to push our own frame-base, + * as well as the frame-base that it will cost to do an upcall. + *) + + (* + * After we save callee-saves, We have a stack like this: + * + * | ... | + * | caller frame | + * | + spill | + * | caller arg K | + * | ... | + * | caller arg 0 | + * | retpc | <-- sp we received, top of callee frame + * | callee save 1 | + * | ... | + * | callee save N | <-- ebp and esp after saving callee-saves + * | ... | + * | callee frame | + * | + spill | + * | callee arg J | + * | ... | + * | callee arg 0 | <-- bottom of callee frame + * | next retpc | + * | next save 1 | + * | ... | + * | next save N | <-- bottom of region we must reserve + * | ... | + * + * A "frame base" is the retpc and set of callee-saves. + * + * We need to reserve room for our frame *and* the next frame-base, because + * we're going to be blindly entering the next frame-base (pushing eip and + * callee-saves) before we perform the next check. + *) + + (* + * We double the reserved callsz because we need a 'temporary tail-call + * region' above the actual call region, in case there's a drop call at the + * end of assembling the tail-call args and before copying them to callee + * position. + *) + + let callsz = add_sz callsz callsz in + let n_glue_args = Int64.of_int Abi.worst_case_glue_call_args in + let n_glue_words = Int64.mul word_sz n_glue_args in + + (* + * Add in *another* word to handle an extra-awkward spill of the + * callee address that might occur during an indirect tail call. + *) + let callsz = add_sz (SIZE_fixed word_sz) callsz in + + (* + * Add in enough words for a glue-call (these occur underneath esp) + *) + let callsz = add_sz (SIZE_fixed n_glue_words) callsz in + + (* + * Cumulative dynamic-frame size. + *) + let call_and_frame_sz = add_sz callsz framesz in + + (* Already have room to save regs on entry. *) + save_callee_saves e; + + let restart_pc = e.Il.emit_pc in + + mov (rc ebp) (ro esp); (* Establish frame base. *) + mov (rc esi) (c task_ptr); (* esi = task *) + mov + (rc esi) + (c (esi_n Abi.task_field_stk)); (* esi = task->stk *) + add (rc esi) (imm + (Asm.ADD + ((word_off_n Abi.stk_field_data), + boundary_sz))); + + let (dynamic_frame_sz, dynamic_grow_jmp) = + match Il.size_to_expr64 call_and_frame_sz with + None -> + begin + let primordial_frame_sz = + Asm.IMM + (Int64.mul word_sz + (Int64.of_int + (size_calculation_stack_highwater + call_and_frame_sz))) + in + (* Primordial size-check. *) + mov (rc edi) (ro esp); (* edi = esp *) + sub (* edi -= size-request *) + (rc edi) + (imm primordial_frame_sz); + emit (Il.cmp (ro esi) (ro edi)); + + (* Jump to 'grow' upcall on underflow: if esi (bottom) is > + edi (proposed-esp) *) + + let primordial_underflow_jmp_pc = e.Il.emit_pc in + emit (Il.jmp Il.JA Il.CodeNone); + + (* Calculate dynamic frame size. *) + calculate_sz e call_and_frame_sz; + ((ro eax), Some primordial_underflow_jmp_pc) + end + | Some e -> ((imm e), None) + in + + (* "Full" frame size-check. *) + stack_growth_check e nabi grow_task_fixup + dynamic_frame_sz dynamic_grow_jmp restart_pc (h esi) (h edi); + + + (* Establish a frame, wherever we landed. *) + sub (rc esp) dynamic_frame_sz; + + (* Zero the frame. + * + * FIXME: this is awful, will go away when we have proper CFI. + *) + + mov (rc edi) (ro esp); + mov (rc ecx) dynamic_frame_sz; + emit (Il.unary Il.ZERO (word_at (h edi)) (ro ecx)); + + (* Move esp back up over the glue region. *) + add (rc esp) (immi n_glue_words); +;; + + +let fn_epilogue (e:Il.emitter) : unit = + + (* Tear down existing frame. *) + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + mov (rc esp) (ro ebp); + restore_callee_saves e; + emit Il.Ret; +;; + +let inline_memcpy + (e:Il.emitter) + (n_bytes:int64) + (dst_ptr:Il.reg) + (src_ptr:Il.reg) + (tmp_reg:Il.reg) + (ascending:bool) + : unit = + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let bpw = Int64.to_int word_sz in + let w = Int64.to_int (Int64.div n_bytes word_sz) in + let b = Int64.to_int (Int64.rem n_bytes word_sz) in + if ascending + then + begin + for i = 0 to (w-1) do + mov (r tmp_reg) (c (word_n src_ptr i)); + mov (word_n dst_ptr i) (c (r tmp_reg)); + done; + for i = 0 to (b-1) do + let off = (w*bpw) + i in + mov (r tmp_reg) (c (byte_n src_ptr off)); + mov (byte_n dst_ptr off) (c (r tmp_reg)); + done; + end + else + begin + for i = (b-1) downto 0 do + let off = (w*bpw) + i in + mov (r tmp_reg) (c (byte_n src_ptr off)); + mov (byte_n dst_ptr off) (c (r tmp_reg)); + done; + for i = (w-1) downto 0 do + mov (r tmp_reg) (c (word_n src_ptr i)); + mov (word_n dst_ptr i) (c (r tmp_reg)); + done; + end +;; + + + +let fn_tail_call + (e:Il.emitter) + (caller_callsz:int64) + (caller_argsz:int64) + (callee_code:Il.code) + (callee_argsz:int64) + : unit = + let emit = Il.emit e in + let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in + let mov dst src = emit (Il.umov dst src) in + let argsz_diff = Int64.sub caller_argsz callee_argsz in + let callee_spill_cell = word_at_off (h esp) (Asm.IMM caller_callsz) in + + (* + * Our outgoing arguments were prepared in a region above the call region; + * this is reserved for the purpose of making tail-calls *only*, so we do + * not collide with glue calls we had to make while dropping the frame, + * after assembling our arg region. + * + * Thus, esp points to the "normal" arg region, and we need to move it + * to point to the tail-call arg region. To make matters simple, both + * regions are the same size, one atop the other. + *) + + annotate e "tail call: move esp to temporary tail call arg-prep area"; + binary Il.ADD (rc esp) caller_callsz; + + (* + * If we're given a non-ImmPtr callee, we may need to move it to a known + * cell to avoid clobbering its register while we do the argument shuffle + * below. + * + * Sadly, we are too register-starved to just flush our callee to a reg; + * so we carve out an extra word of the temporary call-region and use + * it. + * + * This is ridiculous, but works. + *) + begin + match callee_code with + Il.CodePtr (Il.Cell c) -> + annotate e "tail call: spill callee-ptr to temporary memory"; + mov callee_spill_cell (Il.Cell c); + + | _ -> () + end; + + (* edx <- ebp; restore ebp, edi, esi, ebx; ecx <- retpc *) + annotate e "tail call: restore callee-saves from frame base"; + restore_frame_base e (h edx) (h ecx); + (* move edx past frame base and adjust for difference in call sizes *) + annotate e "tail call: adjust temporary fp"; + binary Il.ADD (rc edx) (Int64.add frame_base_sz argsz_diff); + + (* + * stack grows downwards; copy from high to low + * + * bpw = word_sz + * w = floor(callee_argsz / word_sz) + * b = callee_argsz % word_sz + * + * byte copies: + * +------------------------+ + * | | + * +------------------------+ <-- base + (w * word_sz) + (b - 1) + * . . + * +------------------------+ + * | | + * +------------------------+ <-- base + (w * word_sz) + (b - b) + * word copies: = + * +------------------------+ <-- base + ((w-0) * word_sz) + * | bytes | + * | (w-1)*bpw..w*bpw-1 | + * +------------------------+ <-- base + ((w-1) * word_sz) + * | bytes | + * | (w-2)*bpw..(w-1)*bpw-1 | + * +------------------------+ <-- base + ((w-2) * word_sz) + * . . + * . . + * . . + * +------------------------+ + * | bytes | + * | 0..bpw - 1 | + * +------------------------+ <-- base + ((w-w) * word_sz) + *) + + annotate e "tail call: move arg-tuple up to top of frame"; + (* NOTE: must copy top-to-bottom in case the regions overlap *) + inline_memcpy e callee_argsz (h edx) (h esp) (h eax) false; + + (* + * We're done with eax now; so in the case where we had to spill + * our callee codeptr, we can reload it into eax here and rewrite + * our callee into *eax. + *) + let callee_code = + match callee_code with + Il.CodePtr (Il.Cell _) -> + annotate e "tail call: reload callee-ptr from temporary memory"; + mov (rc eax) (Il.Cell callee_spill_cell); + reg_codeptr (h eax) + + | _ -> callee_code + in + + + (* esp <- edx *) + annotate e "tail call: adjust stack pointer"; + mov (rc esp) (ro edx); + (* PUSH ecx (retpc) *) + annotate e "tail call: push retpc"; + emit (Il.Push (ro ecx)); + (* JMP callee_code *) + emit (Il.jmp Il.JMP callee_code); +;; + + +let loop_info_field_retpc = 0;; +let loop_info_field_sp = 1;; +let loop_info_field_fp = 2;; + +let self_args_cell (self_args_rty:Il.referent_ty) : Il.cell = + Il.Mem (Il.RegIn (h ebp, Some (Asm.IMM frame_base_sz)), self_args_rty) +;; + +let activate_glue (e:Il.emitter) : unit = + (* + * This is a bit of glue-code. It should be emitted once per + * compilation unit. + * + * - save regs on C stack + * - align sp on a 16-byte boundary + * - save sp to task.runtime_sp (runtime_sp is thus always aligned) + * - load saved task sp (switch stack) + * - restore saved task regs + * - return to saved task pc + * + * Our incoming stack looks like this: + * + * *esp+4 = [arg1 ] = task ptr + * *esp = [retpc ] + *) + + let sp_n = word_n (Il.Hreg esp) in + let edx_n = word_n (Il.Hreg edx) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in + + mov (rc edx) (c (sp_n 1)); (* edx <- task *) + save_callee_saves e; + mov + (edx_n Abi.task_field_runtime_sp) + (ro esp); (* task->runtime_sp <- esp *) + mov + (rc esp) + (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *) + + (* + * There are two paths we can arrive at this code from: + * + * + * 1. We are activating a task for the first time. When we switch into + * the task stack and 'ret' to its first instruction, we'll start + * doing whatever the first instruction says. Probably saving + * registers and starting to establish a frame. Harmless stuff, + * doesn't look at task->rust_sp again except when it clobbers it + * during a later upcall. + * + * + * 2. We are resuming a task that was descheduled by the yield glue + * below. When we switch into the task stack and 'ret', we'll be + * ret'ing to a very particular instruction: + * + * "esp <- task->rust_sp" + * + * this is the first instruction we 'ret' to after this glue, because + * it is the first instruction following *any* upcall, and the task + * we are activating was descheduled mid-upcall. + * + * Unfortunately for us, we have already restored esp from + * task->rust_sp and are about to eat the 5 words off the top of it. + * + * + * | ... | <-- where esp will be once we restore + ret, below, + * | retpc | and where we'd *like* task->rust_sp to wind up. + * | ebp | + * | edi | + * | esi | + * | ebx | <-- current task->rust_sp == current esp + * + * + * This is a problem. If we return to "esp <- task->rust_sp" it will + * push esp back down by 5 words. This manifests as a rust stack that + * grows by 5 words on each yield/reactivate. Not good. + * + * So what we do here is just adjust task->rust_sp up 5 words as + * well, to mirror the movement in esp we're about to perform. That + * way the "esp <- task->rust_sp" we 'ret' to below will be a + * no-op. Esp won't move, and the task's stack won't grow. + *) + + binary Il.ADD (edx_n Abi.task_field_rust_sp) + (Int64.mul (Int64.of_int (n_callee_saves + 1)) word_sz); + + (**** IN TASK STACK ****) + restore_callee_saves e; + emit Il.Ret; + (***********************) + () +;; + +let yield_glue (e:Il.emitter) : unit = + + (* More glue code, this time the 'bottom half' of yielding. + * + * We arrived here because an upcall decided to deschedule the + * running task. So the upcall's return address got patched to the + * first instruction of this glue code. + * + * When the upcall does 'ret' it will come here, and its esp will be + * pointing to the last argument pushed on the C stack before making + * the upcall: the 0th argument to the upcall, which is always the + * task ptr performing the upcall. That's where we take over. + * + * Our goal is to complete the descheduling + * + * - Switch over to the task stack temporarily. + * + * - Save the task's callee-saves onto the task stack. + * (the task is now 'descheduled', safe to set aside) + * + * - Switch *back* to the C stack. + * + * - Restore the C-stack callee-saves. + * + * - Return to the caller on the C stack that activated the task. + * + *) + let esp_n = word_n (Il.Hreg esp) in + let edx_n = word_n (Il.Hreg edx) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + + mov + (rc edx) (c (esp_n 0)); (* edx <- arg0 (task) *) + mov + (rc esp) + (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *) + save_callee_saves e; + mov (* task->rust_sp <- esp *) + (edx_n Abi.task_field_rust_sp) + (ro esp); + mov + (rc esp) + (c (edx_n Abi.task_field_runtime_sp)); (* esp <- task->runtime_sp *) + + (**** IN C STACK ****) + restore_callee_saves e; + emit Il.Ret; + (***********************) + () +;; + + +let push_pos32 (e:Il.emitter) (fix:fixup) : unit = + let (reg, _, _) = get_next_pc_thunk in + Abi.load_fixup_addr e reg fix Il.OpaqueTy; + Il.emit e (Il.Push (Il.Cell (Il.Reg (reg, Il.AddrTy Il.OpaqueTy)))) +;; + +let objfile_start + (e:Il.emitter) + ~(start_fixup:fixup) + ~(rust_start_fixup:fixup) + ~(main_fn_fixup:fixup) + ~(crate_fixup:fixup) + ~(indirect_start:bool) + : unit = + let ebp_n = word_n (Il.Hreg ebp) in + let emit = Il.emit e in + let mov dst src = emit (Il.umov dst src) in + let push_pos32 = push_pos32 e in + Il.emit_full e (Some start_fixup) [] Il.Dead; + save_callee_saves e; + mov (rc ebp) (ro esp); + + (* If we're very lucky, the platform will have left us with + * something sensible in the startup stack like so: + * + * *ebp+24 = [arg1 ] = argv + * *ebp+20 = [arg0 ] = argc + * *ebp+16 = [retpc ] + * *ebp+12 = [old_ebp] + * *ebp+8 = [old_edi] + * *ebp+4 = [old_esi] + * *ebp = [old_ebx] + * + * This is not the case everywhere, but we start with this + * assumption and correct it in the runtime library. + *) + + (* Copy argv. *) + mov (rc eax) (c (ebp_n (2 + n_callee_saves))); + Il.emit e (Il.Push (ro eax)); + + (* Copy argc. *) + mov (rc eax) (c (ebp_n (1 + n_callee_saves))); + Il.emit e (Il.Push (ro eax)); + + push_pos32 crate_fixup; + push_pos32 main_fn_fixup; + let fptr = + Abi.load_fixup_codeptr e (h eax) rust_start_fixup true indirect_start + in + Il.emit e (Il.call (rc eax) fptr); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.Pop (rc ecx)); + Il.emit e (Il.umov (rc esp) (ro ebp)); + restore_callee_saves e; + Il.emit e Il.Ret; +;; + +let (abi:Abi.abi) = + { + Abi.abi_word_sz = word_sz; + Abi.abi_word_bits = word_bits; + Abi.abi_word_ty = word_ty; + + Abi.abi_is_2addr_machine = true; + Abi.abi_has_pcrel_data = false; + Abi.abi_has_pcrel_code = true; + + Abi.abi_n_hardregs = n_hardregs; + Abi.abi_str_of_hardreg = reg_str; + Abi.abi_prealloc_quad = prealloc_quad; + Abi.abi_constrain_vregs = constrain_vregs; + + Abi.abi_emit_fn_prologue = fn_prologue; + Abi.abi_emit_fn_epilogue = fn_epilogue; + Abi.abi_emit_fn_tail_call = fn_tail_call; + Abi.abi_clobbers = clobbers; + + Abi.abi_emit_native_call = emit_native_call; + Abi.abi_emit_native_void_call = emit_native_void_call; + Abi.abi_emit_native_call_in_thunk = emit_native_call_in_thunk; + Abi.abi_emit_inline_memcpy = inline_memcpy; + + Abi.abi_activate = activate_glue; + Abi.abi_yield = yield_glue; + Abi.abi_unwind = unwind_glue; + Abi.abi_get_next_pc_thunk = Some get_next_pc_thunk; + + Abi.abi_sp_reg = (Il.Hreg esp); + Abi.abi_fp_reg = (Il.Hreg ebp); + Abi.abi_dwarf_fp_reg = dwarf_ebp; + Abi.abi_tp_cell = task_ptr; + Abi.abi_frame_base_sz = frame_base_sz; + Abi.abi_frame_info_sz = frame_info_sz; + Abi.abi_implicit_args_sz = implicit_args_sz; + Abi.abi_spill_slot = spill_slot; + } + + +(* + * NB: factor the instruction selector often. There's lots of + * semi-redundancy in the ISA. + *) + + +let imm_is_signed_byte (n:int64) : bool = + (i64_le (-128L) n) && (i64_le n 127L) +;; + +let imm_is_unsigned_byte (n:int64) : bool = + (i64_le (0L) n) && (i64_le n 255L) +;; + + +let rm_r (c:Il.cell) (r:int) : Asm.frag = + let reg_ebp = 6 in + let reg_esp = 7 in + + (* + * We do a little contortion here to accommodate the special case of + * being asked to form esp-relative addresses; these require SIB + * bytes on x86. Of course! + *) + let sib_esp_base = Asm.BYTE 0x24 in + let seq1 rm modrm = + if rm = reg_esp + then Asm.SEQ [| modrm; sib_esp_base |] + else modrm + in + let seq2 rm modrm disp = + if rm = reg_esp + then Asm.SEQ [| modrm; sib_esp_base; disp |] + else Asm.SEQ [| modrm; disp |] + in + + match c with + Il.Reg ((Il.Hreg rm), _) -> + Asm.BYTE (modrm_reg (reg rm) r) + | Il.Mem (a, _) -> + begin + match a with + Il.Abs disp -> + Asm.SEQ [| Asm.BYTE (modrm_deref_disp32 r); + Asm.WORD (TY_i32, disp) |] + + | Il.RegIn ((Il.Hreg rm), None) when rm != reg_ebp -> + seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r)) + + | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM 0L)) + when rm != reg_ebp -> + seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r)) + + (* The next two are just to save the relaxation system some + * churn. + *) + + | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n)) + when imm_is_signed_byte n -> + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r)) + (Asm.WORD (TY_i8, Asm.IMM n)) + + | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n)) -> + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r)) + (Asm.WORD (TY_i32, Asm.IMM n)) + + | Il.RegIn ((Il.Hreg rm), Some disp) -> + Asm.new_relaxation + [| + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r)) + (Asm.WORD (TY_i32, disp)); + seq2 rm + (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r)) + (Asm.WORD (TY_i8, disp)) + |] + | _ -> raise Unrecognized + end + | _ -> raise Unrecognized +;; + + +let insn_rm_r (op:int) (c:Il.cell) (r:int) : Asm.frag = + Asm.SEQ [| Asm.BYTE op; rm_r c r |] +;; + + +let insn_rm_r_imm + (op:int) + (c:Il.cell) + (r:int) + (ty:ty_mach) + (i:Asm.expr64) + : Asm.frag = + Asm.SEQ [| Asm.BYTE op; rm_r c r; Asm.WORD (ty, i) |] +;; + +let insn_rm_r_imm_s8_s32 + (op8:int) + (op32:int) + (c:Il.cell) + (r:int) + (i:Asm.expr64) + : Asm.frag = + match i with + Asm.IMM n when imm_is_signed_byte n -> + insn_rm_r_imm op8 c r TY_i8 i + | _ -> + Asm.new_relaxation + [| + insn_rm_r_imm op32 c r TY_i32 i; + insn_rm_r_imm op8 c r TY_i8 i + |] +;; + +let insn_rm_r_imm_u8_u32 + (op8:int) + (op32:int) + (c:Il.cell) + (r:int) + (i:Asm.expr64) + : Asm.frag = + match i with + Asm.IMM n when imm_is_unsigned_byte n -> + insn_rm_r_imm op8 c r TY_u8 i + | _ -> + Asm.new_relaxation + [| + insn_rm_r_imm op32 c r TY_u32 i; + insn_rm_r_imm op8 c r TY_u8 i + |] +;; + + +let insn_pcrel_relax + (op8_frag:Asm.frag) + (op32_frag:Asm.frag) + (fix:fixup) + : Asm.frag = + let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in + let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in + let pcrel_expr = (Asm.SUB (Asm.M_POS fix, + Asm.M_POS pcrel_mark_fixup)) + in + Asm.new_relaxation + [| + Asm.SEQ [| op32_frag; Asm.WORD (TY_i32, pcrel_expr); def |]; + Asm.SEQ [| op8_frag; Asm.WORD (TY_i8, pcrel_expr); def |]; + |] +;; + +let insn_pcrel_simple (op32:int) (fix:fixup) : Asm.frag = + let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in + let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in + let pcrel_expr = (Asm.SUB (Asm.M_POS fix, + Asm.M_POS pcrel_mark_fixup)) + in + Asm.SEQ [| Asm.BYTE op32; Asm.WORD (TY_i32, pcrel_expr); def |] +;; + +let insn_pcrel (op8:int) (op32:int) (fix:fixup) : Asm.frag = + insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTE op32) fix +;; + +let insn_pcrel_prefix32 + (op8:int) + (prefix32:int) + (op32:int) + (fix:fixup) + : Asm.frag = + insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTES [| prefix32; op32 |]) fix +;; + +(* FIXME: tighten imm-based dispatch by imm type. *) +let cmp (a:Il.operand) (b:Il.operand) : Asm.frag = + match (a,b) with + (Il.Cell c, Il.Imm (i, TY_i8)) when is_rm8 c -> + insn_rm_r_imm 0x80 c slash7 TY_i8 i + | (Il.Cell c, Il.Imm (i, TY_u8)) when is_rm8 c -> + insn_rm_r_imm 0x80 c slash7 TY_u8 i + | (Il.Cell c, Il.Imm (i, _)) when is_rm32 c -> + (* + * NB: We can't switch on signed-ness here, as 'cmp' is + * defined to sign-extend its operand; i.e. we have to treat + * it as though you're emitting a signed byte (in the sense of + * immediate-size selection) even if the incoming value is + * unsigned. + *) + insn_rm_r_imm_s8_s32 0x83 0x81 c slash7 i + | (Il.Cell c, Il.Cell (Il.Reg (Il.Hreg r, _))) -> + insn_rm_r 0x39 c (reg r) + | (Il.Cell (Il.Reg (Il.Hreg r, _)), Il.Cell c) -> + insn_rm_r 0x3b c (reg r) + | _ -> raise Unrecognized +;; + +let zero (dst:Il.cell) (count:Il.operand) : Asm.frag = + match (dst, count) with + + ((Il.Mem (Il.RegIn ((Il.Hreg dst_ptr), None), _)), + Il.Cell (Il.Reg ((Il.Hreg count), _))) + when dst_ptr = edi && count = ecx -> + Asm.BYTES [| + 0xb0; 0x0; (* mov %eax, 0 : move a zero into al. *) + 0xf3; 0xaa; (* rep stos m8 : fill ecx bytes at [edi] with al *) + |] + + | _ -> raise Unrecognized +;; + +let mov (signed:bool) (dst:Il.cell) (src:Il.operand) : Asm.frag = + if is_ty8 (Il.cell_scalar_ty dst) || is_ty8 (Il.operand_scalar_ty src) + then + begin + (match dst with + Il.Reg (Il.Hreg r, _) + -> assert (is_ok_r8 r) | _ -> ()); + (match src with + Il.Cell (Il.Reg (Il.Hreg r, _)) + -> assert (is_ok_r8 r) | _ -> ()); + end; + + match (signed, dst, src) with + + (* m8 <- r??, r8 or truncate(r32). *) + (_, _, Il.Cell (Il.Reg ((Il.Hreg r), _))) + when is_m8 dst -> + insn_rm_r 0x88 dst (reg r) + + (* r8 <- r8: treat as r32 <- r32. *) + | (_, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell) + when is_r8 dst && is_r8 src_cell -> + insn_rm_r 0x8b src_cell (reg r) + + (* rm32 <- r32 *) + | (_, _, Il.Cell (Il.Reg ((Il.Hreg r), src_ty))) + when (is_r8 dst || is_rm32 dst) && is_ty32 src_ty -> + insn_rm_r 0x89 dst (reg r) + + (* r32 <- rm32 *) + | (_, (Il.Reg ((Il.Hreg r), dst_ty)), Il.Cell src_cell) + when is_ty32 dst_ty && is_rm32 src_cell -> + insn_rm_r 0x8b src_cell (reg r) + + (* MOVZX: r8/r32 <- zx(rm8) *) + | (false, Il.Reg ((Il.Hreg r, _)), Il.Cell src_cell) + when (is_r8 dst || is_r32 dst) && is_rm8 src_cell -> + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xb6 src_cell (reg r) |] + + (* MOVZX: m32 <- zx(r8) *) + | (false, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell))) + when (is_m32 dst) && is_r8 src_cell -> + (* Fake with 2 insns: + * + * movzx r32 <- r8; (in-place zero-extension) + * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL) + *) + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xb6 src_cell (reg r); + insn_rm_r 0x89 dst (reg r); + |] + + (* MOVSX: r8/r32 <- sx(rm8) *) + | (true, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell) + when (is_r8 dst || is_r32 dst) && is_rm8 src_cell -> + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xbe src_cell (reg r) |] + + (* MOVSX: m32 <- sx(r8) *) + | (true, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell))) + when (is_m32 dst) && is_r8 src_cell -> + (* Fake with 2 insns: + * + * movsx r32 <- r8; (in-place sign-extension) + * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL) + *) + Asm.SEQ [| Asm.BYTE 0x0f; + insn_rm_r 0xbe src_cell (reg r); + insn_rm_r 0x89 dst (reg r); + |] + + (* m8 <- imm8 (signed) *) + | (_, _, Il.Imm ((Asm.IMM n), _)) + when is_m8 dst && imm_is_signed_byte n && signed -> + insn_rm_r_imm 0xc6 dst slash0 TY_i8 (Asm.IMM n) + + (* m8 <- imm8 (unsigned) *) + | (_, _, Il.Imm ((Asm.IMM n), _)) + when is_m8 dst && imm_is_unsigned_byte n && (not signed) -> + insn_rm_r_imm 0xc6 dst slash0 TY_u8 (Asm.IMM n) + + (* rm32 <- imm32 *) + | (_, _, Il.Imm (i, _)) when is_rm32 dst || is_r8 dst -> + let t = if signed then TY_u32 else TY_i32 in + insn_rm_r_imm 0xc7 dst slash0 t i + + | _ -> raise Unrecognized +;; + + +let lea (dst:Il.cell) (src:Il.operand) : Asm.frag = + match (dst, src) with + (Il.Reg ((Il.Hreg r), dst_ty), + Il.Cell (Il.Mem (mem, _))) + when is_ty32 dst_ty -> + insn_rm_r 0x8d (Il.Mem (mem, Il.OpaqueTy)) (reg r) + + | (Il.Reg ((Il.Hreg r), dst_ty), + Il.ImmPtr (fix, _)) + when is_ty32 dst_ty && r = eax -> + let anchor = new_fixup "anchor" in + let fix_off = Asm.SUB ((Asm.M_POS fix), + (Asm.M_POS anchor)) + in + (* NB: These instructions must come as a + * cluster, w/o any separation. + *) + Asm.SEQ [| + insn_pcrel_simple 0xe8 get_next_pc_thunk_fixup; + Asm.DEF (anchor, insn_rm_r_imm 0x81 dst slash0 TY_i32 fix_off); + |] + + | _ -> raise Unrecognized +;; + + +let select_insn_misc (q:Il.quad') : Asm.frag = + + match q with + Il.Call c -> + begin + match c.Il.call_dst with + Il.Reg ((Il.Hreg dst), _) when dst = eax -> + begin + match c.Il.call_targ with + + Il.CodePtr (Il.Cell c) + when Il.cell_referent_ty c + = Il.ScalarTy (Il.AddrTy Il.CodeTy) -> + insn_rm_r 0xff c slash2 + + | Il.CodePtr (Il.ImmPtr (f, Il.CodeTy)) -> + insn_pcrel_simple 0xe8 f + + | _ -> raise Unrecognized + end + | _ -> raise Unrecognized + end + + | Il.Push (Il.Cell (Il.Reg ((Il.Hreg r), t))) when is_ty32 t -> + Asm.BYTE (0x50 + (reg r)) + + | Il.Push (Il.Cell c) when is_rm32 c -> + insn_rm_r 0xff c slash6 + + | Il.Push (Il.Imm (Asm.IMM i, _)) when imm_is_unsigned_byte i -> + Asm.SEQ [| Asm.BYTE 0x6a; Asm.WORD (TY_u8, (Asm.IMM i)) |] + + | Il.Push (Il.Imm (i, _)) -> + Asm.SEQ [| Asm.BYTE 0x68; Asm.WORD (TY_u32, i) |] + + | Il.Pop (Il.Reg ((Il.Hreg r), t)) when is_ty32 t -> + Asm.BYTE (0x58 + (reg r)) + + | Il.Pop c when is_rm32 c -> + insn_rm_r 0x8f c slash0 + + | Il.Ret -> Asm.BYTE 0xc3 + + | Il.Jmp j -> + begin + match (j.Il.jmp_op, j.Il.jmp_targ) with + + (Il.JMP, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) -> + insn_pcrel 0xeb 0xe9 f + + | (Il.JMP, Il.CodePtr (Il.Cell c)) + when Il.cell_referent_ty c + = Il.ScalarTy (Il.AddrTy Il.CodeTy) -> + insn_rm_r 0xff c slash4 + + (* FIXME: refactor this to handle cell-based jumps + * if we ever need them. So far not. *) + | (_, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) -> + let (op8, op32) = + match j.Il.jmp_op with + | Il.JC -> (0x72, 0x82) + | Il.JNC -> (0x73, 0x83) + | Il.JZ -> (0x74, 0x84) + | Il.JNZ -> (0x75, 0x85) + | Il.JO -> (0x70, 0x80) + | Il.JNO -> (0x71, 0x81) + | Il.JE -> (0x74, 0x84) + | Il.JNE -> (0x75, 0x85) + + | Il.JL -> (0x7c, 0x8c) + | Il.JLE -> (0x7e, 0x8e) + | Il.JG -> (0x7f, 0x8f) + | Il.JGE -> (0x7d, 0x8d) + + | Il.JB -> (0x72, 0x82) + | Il.JBE -> (0x76, 0x86) + | Il.JA -> (0x77, 0x87) + | Il.JAE -> (0x73, 0x83) + | _ -> raise Unrecognized + in + let prefix32 = 0x0f in + insn_pcrel_prefix32 op8 prefix32 op32 f + + | _ -> raise Unrecognized + end + + | Il.Dead -> Asm.MARK + | Il.Debug -> Asm.BYTES [| 0xcc |] (* int 3 *) + | Il.Regfence -> Asm.MARK + | Il.End -> Asm.BYTES [| 0x90 |] + | Il.Nop -> Asm.BYTES [| 0x90 |] + | _ -> raise Unrecognized +;; + + +type alu_binop_codes = + { + insn: string; + immslash: int; (* mod/rm "slash" code for imm-src variant *) + rm_dst_op8: int; (* opcode for 8-bit r/m dst variant *) + rm_dst_op32: int; (* opcode for 32-bit r/m dst variant *) + rm_src_op8: int; (* opcode for 8-bit r/m src variant *) + rm_src_op32: int; (* opcode for 32-bit r/m src variant *) + } +;; + +let alu_binop + (dst:Il.cell) (src:Il.operand) (codes:alu_binop_codes) + : Asm.frag = + match (dst, src) with + (Il.Reg ((Il.Hreg r), dst_ty), Il.Cell c) + when (is_ty32 dst_ty && is_rm32 c) || (is_ty8 dst_ty && is_rm8 c) + -> insn_rm_r codes.rm_src_op32 c (reg r) + + | (_, Il.Cell (Il.Reg ((Il.Hreg r), src_ty))) + when (is_rm32 dst && is_ty32 src_ty) || (is_rm8 dst && is_ty8 src_ty) + -> insn_rm_r codes.rm_dst_op32 dst (reg r) + + | (_, Il.Imm (i, _)) when is_rm32 dst || is_rm8 dst + -> insn_rm_r_imm_s8_s32 0x83 0x81 dst codes.immslash i + + | _ -> raise Unrecognized +;; + + +let mul_like (src:Il.operand) (signed:bool) (slash:int) + : Asm.frag = + match src with + Il.Cell src when is_rm32 src -> + insn_rm_r 0xf7 src slash + + | Il.Cell src when is_rm8 src -> + insn_rm_r 0xf6 src slash + + | Il.Imm (_, TY_u32) + | Il.Imm (_, TY_i32) -> + let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits32) in + Asm.SEQ [| mov signed tmp src; + insn_rm_r 0xf7 tmp slash |] + + | Il.Imm (_, TY_u8) + | Il.Imm (_, TY_i8) -> + let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits8) in + Asm.SEQ [| mov signed tmp src; + insn_rm_r 0xf6 tmp slash |] + + | _ -> raise Unrecognized +;; + + +let select_insn (q:Il.quad) : Asm.frag = + match q.Il.quad_body with + Il.Unary u -> + let unop s = + if u.Il.unary_src = Il.Cell u.Il.unary_dst + then insn_rm_r 0xf7 u.Il.unary_dst s + else raise Unrecognized + in + begin + match u.Il.unary_op with + Il.UMOV -> mov false u.Il.unary_dst u.Il.unary_src + | Il.IMOV -> mov true u.Il.unary_dst u.Il.unary_src + | Il.NEG -> unop slash3 + | Il.NOT -> unop slash2 + | Il.ZERO -> zero u.Il.unary_dst u.Il.unary_src + end + + | Il.Lea le -> lea le.Il.lea_dst le.Il.lea_src + + | Il.Cmp c -> cmp c.Il.cmp_lhs c.Il.cmp_rhs + + | Il.Binary b -> + begin + if Il.Cell b.Il.binary_dst = b.Il.binary_lhs + then + let binop = alu_binop b.Il.binary_dst b.Il.binary_rhs in + let mulop = mul_like b.Il.binary_rhs in + + let divop signed slash = + Asm.SEQ [| + (* xor edx edx, then mul_like. *) + insn_rm_r 0x33 (rc edx) (reg edx); + mul_like b.Il.binary_rhs signed slash + |] + in + + let modop signed slash = + Asm.SEQ [| + (* divop, then mov remainder to eax instead. *) + divop signed slash; + mov false (rc eax) (ro edx) + |] + in + + let shiftop slash = + let src = b.Il.binary_rhs in + let dst = b.Il.binary_dst in + let mask i = Asm.AND (i, Asm.IMM 0xffL) in + if is_rm8 dst + then + match src with + Il.Imm (i, _) -> + insn_rm_r_imm 0xC0 dst slash TY_u8 (mask i) + | Il.Cell (Il.Reg ((Il.Hreg r), _)) + when r = ecx -> + Asm.SEQ [| Asm.BYTE 0xD2; rm_r dst slash |] + | _ -> raise Unrecognized + else + match src with + Il.Imm (i, _) -> + insn_rm_r_imm 0xC1 dst slash TY_u8 (mask i) + | Il.Cell (Il.Reg ((Il.Hreg r), _)) + when r = ecx -> + Asm.SEQ [| Asm.BYTE 0xD3; rm_r dst slash |] + | _ -> raise Unrecognized + in + + match (b.Il.binary_dst, b.Il.binary_op) with + (_, Il.ADD) -> binop { insn="ADD"; + immslash=slash0; + rm_dst_op8=0x0; + rm_dst_op32=0x1; + rm_src_op8=0x2; + rm_src_op32=0x3; } + | (_, Il.SUB) -> binop { insn="SUB"; + immslash=slash5; + rm_dst_op8=0x28; + rm_dst_op32=0x29; + rm_src_op8=0x2a; + rm_src_op32=0x2b; } + | (_, Il.AND) -> binop { insn="AND"; + immslash=slash4; + rm_dst_op8=0x20; + rm_dst_op32=0x21; + rm_src_op8=0x22; + rm_src_op32=0x23; } + | (_, Il.OR) -> binop { insn="OR"; + immslash=slash1; + rm_dst_op8=0x08; + rm_dst_op32=0x09; + rm_src_op8=0x0a; + rm_src_op32=0x0b; } + | (_, Il.XOR) -> binop { insn="XOR"; + immslash=slash6; + rm_dst_op8=0x30; + rm_dst_op32=0x31; + rm_src_op8=0x32; + rm_src_op32=0x33; } + + | (_, Il.LSL) -> shiftop slash4 + | (_, Il.LSR) -> shiftop slash5 + | (_, Il.ASR) -> shiftop slash7 + + | (Il.Reg (Il.Hreg r, t), Il.UMUL) + when (is_ty32 t || is_ty8 t) && r = eax -> + mulop false slash4 + + | (Il.Reg (Il.Hreg r, t), Il.IMUL) + when (is_ty32 t || is_ty8 t) && r = eax -> + mulop true slash5 + + | (Il.Reg (Il.Hreg r, t), Il.UDIV) + when (is_ty32 t || is_ty8 t) && r = eax -> + divop false slash6 + + | (Il.Reg (Il.Hreg r, t), Il.IDIV) + when (is_ty32 t || is_ty8 t) && r = eax -> + divop true slash7 + + | (Il.Reg (Il.Hreg r, t), Il.UMOD) + when (is_ty32 t || is_ty8 t) && r = eax -> + modop false slash6 + + | (Il.Reg (Il.Hreg r, t), Il.IMOD) + when (is_ty32 t || is_ty8 t) && r = eax -> + modop true slash7 + + | _ -> raise Unrecognized + else raise Unrecognized + end + | _ -> select_insn_misc q.Il.quad_body +;; + + +let new_emitter_without_vregs _ : Il.emitter = + Il.new_emitter + abi.Abi.abi_prealloc_quad + abi.Abi.abi_is_2addr_machine + false None +;; + +let select_insns (sess:Session.sess) (q:Il.quads) : Asm.frag = + let scopes = Stack.create () in + let fixups = Stack.create () in + let pop_frags _ = + Asm.SEQ (Array.of_list + (List.rev + (!(Stack.pop scopes)))) + in + ignore (Stack.push (ref []) scopes); + for i = 0 to (Array.length q) - 1 do + let append frag = + let frags = Stack.top scopes in + frags := frag :: (!frags) + in + begin + match q.(i).Il.quad_fixup with + None -> () + | Some f -> append (Asm.DEF (f, Asm.MARK)) + end; + begin + match q.(i).Il.quad_body with + Il.Enter f -> + Stack.push f fixups; + Stack.push (ref []) scopes; + | Il.Leave -> + append (Asm.DEF (Stack.pop fixups, pop_frags ())) + | _ -> + try + append (select_insn q.(i)) + with + Unrecognized -> + Session.fail sess + "E:Assembly error: unrecognized quad: %s\n%!" + (Il.string_of_quad reg_str q.(i)); + () + end + done; + pop_frags() +;; + +let frags_of_emitted_quads (sess:Session.sess) (e:Il.emitter) : Asm.frag = + let frag = select_insns sess e.Il.emit_quads in + if sess.Session.sess_failed + then raise Unrecognized + else frag +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) |