aboutsummaryrefslogtreecommitdiff
path: root/src/boot/be
diff options
context:
space:
mode:
authorGraydon Hoare <[email protected]>2010-06-23 21:03:09 -0700
committerGraydon Hoare <[email protected]>2010-06-23 21:03:09 -0700
commitd6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch)
treeb425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/be
parentInitial git commit. (diff)
downloadrust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz
rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip
Populate tree.
Diffstat (limited to 'src/boot/be')
-rw-r--r--src/boot/be/abi.ml207
-rw-r--r--src/boot/be/asm.ml755
-rw-r--r--src/boot/be/elf.ml1760
-rw-r--r--src/boot/be/il.ml1135
-rw-r--r--src/boot/be/macho.ml1184
-rw-r--r--src/boot/be/pe.ml1149
-rw-r--r--src/boot/be/ra.ml664
-rw-r--r--src/boot/be/x86.ml2205
8 files changed, 9059 insertions, 0 deletions
diff --git a/src/boot/be/abi.ml b/src/boot/be/abi.ml
new file mode 100644
index 00000000..fd9ca750
--- /dev/null
+++ b/src/boot/be/abi.ml
@@ -0,0 +1,207 @@
+
+(*
+ * The 'abi' structure is pretty much just a grab-bag of machine
+ * dependencies and structure-layout information. Part of the latter
+ * is shared with trans and semant.
+ *
+ * Make some attempt to factor it as time goes by.
+ *)
+
+(* Word offsets for structure fields in rust-internal.h, and elsewhere in
+ compiler. *)
+
+let rc_base_field_refcnt = 0;;
+
+let task_field_refcnt = rc_base_field_refcnt;;
+let task_field_stk = task_field_refcnt + 1;;
+let task_field_runtime_sp = task_field_stk + 1;;
+let task_field_rust_sp = task_field_runtime_sp + 1;;
+let task_field_gc_alloc_chain = task_field_rust_sp + 1;;
+let task_field_dom = task_field_gc_alloc_chain + 1;;
+let n_visible_task_fields = task_field_dom + 1;;
+
+let dom_field_interrupt_flag = 0;;
+
+let frame_glue_fns_field_mark = 0;;
+let frame_glue_fns_field_drop = 1;;
+let frame_glue_fns_field_reloc = 2;;
+
+let exterior_rc_slot_field_refcnt = 0;;
+let exterior_rc_slot_field_body = 1;;
+
+let exterior_gc_slot_field_next = (-2);;
+let exterior_gc_slot_field_ctrl = (-1);;
+let exterior_gc_slot_field_refcnt = 0;;
+let exterior_gc_slot_field_body = 1;;
+
+let exterior_rc_header_size = 1;;
+let exterior_gc_header_size = 3;;
+
+let exterior_gc_malloc_return_adjustment = 2;;
+
+let stk_field_valgrind_id = 0 + 1;;
+let stk_field_limit = stk_field_valgrind_id + 1;;
+let stk_field_data = stk_field_limit + 1;;
+
+let binding_size = 2;;
+let binding_field_item = 0;;
+let binding_field_binding = 1;;
+
+let general_code_alignment = 16;;
+
+let tydesc_field_first_param = 0;;
+let tydesc_field_size = 1;;
+let tydesc_field_align = 2;;
+let tydesc_field_copy_glue = 3;;
+let tydesc_field_drop_glue = 4;;
+let tydesc_field_free_glue = 5;;
+let tydesc_field_mark_glue = 6;;
+let tydesc_field_obj_drop_glue = 7;;
+
+let vec_elt_rc = 0;;
+let vec_elt_alloc = 1;;
+let vec_elt_fill = 2;;
+let vec_elt_data = 3;;
+
+let calltup_elt_out_ptr = 0;;
+let calltup_elt_task_ptr = 1;;
+let calltup_elt_ty_params = 2;;
+let calltup_elt_args = 3;;
+let calltup_elt_iterator_args = 4;;
+let calltup_elt_indirect_args = 5;;
+
+let iterator_args_elt_block_fn = 0;;
+let iterator_args_elt_outer_frame_ptr = 1;;
+
+let indirect_args_elt_closure = 0;;
+
+(* ty_params, src, dst, tydesc, taskptr. *)
+let worst_case_glue_call_args = 5;;
+
+type abi =
+ {
+ abi_word_sz: int64;
+ abi_word_bits: Il.bits;
+ abi_word_ty: Common.ty_mach;
+
+ abi_is_2addr_machine: bool;
+ abi_has_pcrel_data: bool;
+ abi_has_pcrel_code: bool;
+
+ abi_n_hardregs: int;
+ abi_str_of_hardreg: (int -> string);
+
+ abi_prealloc_quad: (Il.quad' -> Il.quad');
+ abi_constrain_vregs: (Il.quad -> Bits.t array -> unit);
+
+ abi_emit_fn_prologue: (Il.emitter
+ -> Common.size (* framesz *)
+ -> Common.size (* callsz *)
+ -> Common.nabi
+ -> Common.fixup (* grow_task *)
+ -> unit);
+
+ abi_emit_fn_epilogue: (Il.emitter -> unit);
+
+ abi_emit_fn_tail_call: (Il.emitter
+ -> int64 (* caller_callsz *)
+ -> int64 (* caller_argsz *)
+ -> Il.code (* callee_code *)
+ -> int64 (* callee_argsz *)
+ -> unit);
+
+ abi_clobbers: (Il.quad -> Il.hreg list);
+
+ abi_emit_native_call: (Il.emitter
+ -> Il.cell (* ret *)
+ -> Common.nabi
+ -> Common.fixup (* callee *)
+ -> Il.operand array (* args *)
+ -> unit);
+
+ abi_emit_native_void_call: (Il.emitter
+ -> Common.nabi
+ -> Common.fixup (* callee *)
+ -> Il.operand array (* args *)
+ -> unit);
+
+ abi_emit_native_call_in_thunk: (Il.emitter
+ -> Il.cell (* ret *)
+ -> Common.nabi
+ -> Il.operand (* callee *)
+ -> Il.operand array (* args *)
+ -> unit);
+ abi_emit_inline_memcpy: (Il.emitter
+ -> int64 (* n_bytes *)
+ -> Il.reg (* dst_ptr *)
+ -> Il.reg (* src_ptr *)
+ -> Il.reg (* tmp_reg *)
+ -> bool (* ascending *)
+ -> unit);
+
+ (* Global glue. *)
+ abi_activate: (Il.emitter -> unit);
+ abi_yield: (Il.emitter -> unit);
+ abi_unwind: (Il.emitter -> Common.nabi -> Common.fixup -> unit);
+ abi_get_next_pc_thunk:
+ ((Il.reg (* output *)
+ * Common.fixup (* thunk in objfile *)
+ * (Il.emitter -> unit)) (* fn to make thunk *)
+ option);
+
+ abi_sp_reg: Il.reg;
+ abi_fp_reg: Il.reg;
+ abi_dwarf_fp_reg: int;
+ abi_tp_cell: Il.cell;
+ abi_implicit_args_sz: int64;
+ abi_frame_base_sz: int64;
+ abi_frame_info_sz: int64;
+ abi_spill_slot: (Il.spill -> Il.mem);
+ }
+;;
+
+let load_fixup_addr
+ (e:Il.emitter)
+ (out_reg:Il.reg)
+ (fix:Common.fixup)
+ (rty:Il.referent_ty)
+ : unit =
+
+ let cell = Il.Reg (out_reg, Il.AddrTy rty) in
+ let op = Il.ImmPtr (fix, rty) in
+ Il.emit e (Il.lea cell op);
+;;
+
+let load_fixup_codeptr
+ (e:Il.emitter)
+ (out_reg:Il.reg)
+ (fixup:Common.fixup)
+ (has_pcrel_code:bool)
+ (indirect:bool)
+ : Il.code =
+ if indirect
+ then
+ begin
+ load_fixup_addr e out_reg fixup (Il.ScalarTy (Il.AddrTy Il.CodeTy));
+ Il.CodePtr (Il.Cell (Il.Mem (Il.RegIn (out_reg, None),
+ Il.ScalarTy (Il.AddrTy Il.CodeTy))))
+ end
+ else
+ if has_pcrel_code
+ then (Il.CodePtr (Il.ImmPtr (fixup, Il.CodeTy)))
+ else
+ begin
+ load_fixup_addr e out_reg fixup Il.CodeTy;
+ Il.CodePtr (Il.Cell (Il.Reg (out_reg, Il.AddrTy Il.CodeTy)))
+ end
+;;
+
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)
diff --git a/src/boot/be/asm.ml b/src/boot/be/asm.ml
new file mode 100644
index 00000000..10b2142a
--- /dev/null
+++ b/src/boot/be/asm.ml
@@ -0,0 +1,755 @@
+(*
+
+ Our assembler is an all-at-once, buffer-in-memory job, very simple
+ minded. I have 1gb of memory on my laptop: I don't expect to ever
+ emit a program that large with this code.
+
+ It is based on the 'frag' type, which has a variant for every major
+ type of machine-blob we know how to write (bytes, zstrings, BSS
+ blocks, words of various sorts).
+
+ A frag can contain symbolic references between the sub-parts of
+ it. These are accomplished through ref cells we call fixups, and a
+ 2-pass (resolution and writing) process defined recursively over
+ the frag structure.
+
+ Fixups are defined by wrapping a frag in a DEF pseudo-frag with
+ a fixup attached. This will record information about the wrapped
+ frag -- positions and sizes -- in the fixup during resolution.
+
+ We say "positions" and "sizes" there, in plural, because both a
+ file number and a memory number is recorded for each concept.
+
+ File numbers refer to positions and sizes in the file we're
+ generating, and are based on the native int type for the host
+ platform -- usually 31 or 62 bits -- whereas the expressions that
+ *use* position fixups tend to promote them up to 32 or 64 bits
+ somehow. On a 32 bit platform, you can't generate output buffers
+ with 64-bit positions (ocaml limitation!)
+
+ Memory numbers are 64 bit, always, and refer to sizes and positions
+ of frags when they are loaded into memory in the target. When
+ you're generating code for a 32-bit target, or using a memory
+ number in a context that's less than 64 bits, the value is
+ range-checked and truncated. But in all other respects, we imagine
+ a 32-bit address space is just the prefix of the continuing 64-bit
+ address space. If you need to pin an object at a particular place
+ from the point 2^32-1, say, you will need to do arithmetic and use
+ the MEMPOS pseudo-frag, that sets the current memory position as
+ it's being processed.
+
+ Fixups can be *used* anywhere else in the frag tree, as many times
+ as you like. If you try to write an unresolved fixup, the emitter
+ faults. When you specify the use of a fixup, you need to specify
+ whether you want to use its file size, file position, memory size,
+ or memory position.
+
+ Positions, addresses, sizes and such, of course, are in bytes.
+
+ Expressions are evaluated to an int64 (signed), even if the
+ expression is an int32 or less. Depending on how you use the result
+ of the expression, a range check error may fire (for example, if
+ the expression evaluates to -2^24 and you're emitting a word16).
+
+ Word endianness is per-file. At the moment this seems acceptable.
+
+ Because we want to be *very specific* about the time and place
+ arithmetic promotions occur, we define two separate expression-tree
+ types (with the same polymorphic constructors) and two separate
+ evaluation functions, with an explicit operator for marking the
+ promotion-points.
+
+*)
+
+open Common;;
+
+
+let log (sess:Session.sess) =
+ Session.log "asm"
+ sess.Session.sess_log_asm
+ sess.Session.sess_log_out
+;;
+
+let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit =
+ if sess.Session.sess_log_asm
+ then thunk ()
+ else ()
+;;
+
+exception Bad_fit of string;;
+exception Undef_sym of string;;
+
+type ('a, 'b) expr =
+ IMM of 'a
+ | ADD of (('a, 'b) expr) * (('a, 'b) expr)
+ | SUB of (('a, 'b) expr) * (('a, 'b) expr)
+ | MUL of (('a, 'b) expr) * (('a, 'b) expr)
+ | DIV of (('a, 'b) expr) * (('a, 'b) expr)
+ | REM of (('a, 'b) expr) * (('a, 'b) expr)
+ | MAX of (('a, 'b) expr) * (('a, 'b) expr)
+ | ALIGN of (('a, 'b) expr) * (('a, 'b) expr)
+ | SLL of (('a, 'b) expr) * int
+ | SLR of (('a, 'b) expr) * int
+ | SAR of (('a, 'b) expr) * int
+ | AND of (('a, 'b) expr) * (('a, 'b) expr)
+ | XOR of (('a, 'b) expr) * (('a, 'b) expr)
+ | OR of (('a, 'b) expr) * (('a, 'b) expr)
+ | NOT of (('a, 'b) expr)
+ | NEG of (('a, 'b) expr)
+ | F_POS of fixup
+ | F_SZ of fixup
+ | M_POS of fixup
+ | M_SZ of fixup
+ | EXT of 'b
+
+type expr32 = (int32, int) expr
+;;
+
+type expr64 = (int64, expr32) expr
+;;
+
+
+let rec eval32 (e:expr32)
+ : int32 =
+ let chop64 kind name v =
+ let x = Int64.to_int32 v in
+ if (Int64.compare v (Int64.of_int32 x)) = 0 then
+ x
+ else raise (Bad_fit (kind
+ ^ " fixup "
+ ^ name
+ ^ " overflowed 32 bits in eval32: "
+ ^ Int64.to_string v))
+ in
+ let expandInt _ _ v = Int32.of_int v in
+ let checkdef kind name v inj =
+ match v with
+ None ->
+ raise (Undef_sym (kind ^ " fixup " ^ name
+ ^ " undefined in eval32"))
+ | Some x -> inj kind name x
+ in
+ match e with
+ IMM i -> i
+ | ADD (a, b) -> Int32.add (eval32 a) (eval32 b)
+ | SUB (a, b) -> Int32.sub (eval32 a) (eval32 b)
+ | MUL (a, b) -> Int32.mul (eval32 a) (eval32 b)
+ | DIV (a, b) -> Int32.div (eval32 a) (eval32 b)
+ | REM (a, b) -> Int32.rem (eval32 a) (eval32 b)
+ | MAX (a, b) -> i32_max (eval32 a) (eval32 b)
+ | ALIGN (a, b) -> i32_align (eval32 a) (eval32 b)
+ | SLL (a, b) -> Int32.shift_left (eval32 a) b
+ | SLR (a, b) -> Int32.shift_right_logical (eval32 a) b
+ | SAR (a, b) -> Int32.shift_right (eval32 a) b
+ | AND (a, b) -> Int32.logand (eval32 a) (eval32 b)
+ | XOR (a, b) -> Int32.logxor (eval32 a) (eval32 b)
+ | OR (a, b) -> Int32.logor (eval32 a) (eval32 b)
+ | NOT a -> Int32.lognot (eval32 a)
+ | NEG a -> Int32.neg (eval32 a)
+ | F_POS f ->
+ checkdef "file position"
+ f.fixup_name f.fixup_file_pos expandInt
+ | F_SZ f ->
+ checkdef "file size"
+ f.fixup_name f.fixup_file_sz expandInt
+ | M_POS f ->
+ checkdef "mem position"
+ f.fixup_name f.fixup_mem_pos chop64
+ | M_SZ f ->
+ checkdef "mem size" f.fixup_name f.fixup_mem_sz chop64
+ | EXT i -> Int32.of_int i
+;;
+
+let rec eval64 (e:expr64)
+ : int64 =
+ let checkdef kind name v inj =
+ match v with
+ None ->
+ raise (Undef_sym (kind ^ " fixup '"
+ ^ name ^ "' undefined in eval64"))
+ | Some x -> inj x
+ in
+ match e with
+ IMM i -> i
+ | ADD (a, b) -> Int64.add (eval64 a) (eval64 b)
+ | SUB (a, b) -> Int64.sub (eval64 a) (eval64 b)
+ | MUL (a, b) -> Int64.mul (eval64 a) (eval64 b)
+ | DIV (a, b) -> Int64.div (eval64 a) (eval64 b)
+ | REM (a, b) -> Int64.rem (eval64 a) (eval64 b)
+ | MAX (a, b) -> i64_max (eval64 a) (eval64 b)
+ | ALIGN (a, b) -> i64_align (eval64 a) (eval64 b)
+ | SLL (a, b) -> Int64.shift_left (eval64 a) b
+ | SLR (a, b) -> Int64.shift_right_logical (eval64 a) b
+ | SAR (a, b) -> Int64.shift_right (eval64 a) b
+ | AND (a, b) -> Int64.logand (eval64 a) (eval64 b)
+ | XOR (a, b) -> Int64.logxor (eval64 a) (eval64 b)
+ | OR (a, b) -> Int64.logor (eval64 a) (eval64 b)
+ | NOT a -> Int64.lognot (eval64 a)
+ | NEG a -> Int64.neg (eval64 a)
+ | F_POS f ->
+ checkdef "file position"
+ f.fixup_name f.fixup_file_pos Int64.of_int
+ | F_SZ f ->
+ checkdef "file size"
+ f.fixup_name f.fixup_file_sz Int64.of_int
+ | M_POS f ->
+ checkdef "mem position"
+ f.fixup_name f.fixup_mem_pos (fun x -> x)
+ | M_SZ f ->
+ checkdef "mem size"
+ f.fixup_name f.fixup_mem_sz (fun x -> x)
+ | EXT e -> Int64.of_int32 (eval32 e)
+;;
+
+
+type frag =
+ MARK (* MARK == 'PAD (IMM 0L)' *)
+ | SEQ of frag array
+ | PAD of int
+ | BSS of int64
+ | MEMPOS of int64
+ | BYTE of int
+ | BYTES of int array
+ | CHAR of char
+ | STRING of string
+ | ZSTRING of string
+ | ULEB128 of expr64
+ | SLEB128 of expr64
+ | WORD of (ty_mach * expr64)
+ | ALIGN_FILE of (int * frag)
+ | ALIGN_MEM of (int * frag)
+ | DEF of (fixup * frag)
+ | RELAX of relaxation
+
+and relaxation =
+ { relax_options: frag array;
+ relax_choice: int ref; }
+;;
+
+exception Relax_more of relaxation;;
+
+let new_relaxation (frags:frag array) =
+ RELAX { relax_options = frags;
+ relax_choice = ref ((Array.length frags) - 1); }
+;;
+
+
+let rec write_frag
+ ~(sess:Session.sess)
+ ~(lsb0:bool)
+ ~(buf:Buffer.t)
+ ~(frag:frag)
+ : unit =
+ let relax = Queue.create () in
+ let bump_relax r =
+ iflog sess (fun _ ->
+ log sess "bumping relaxation to position %d"
+ ((!(r.relax_choice)) - 1));
+ r.relax_choice := (!(r.relax_choice)) - 1;
+ if !(r.relax_choice) < 0
+ then bug () "relaxation ran out of options"
+ in
+ let rec loop _ =
+ Queue.clear relax;
+ Buffer.clear buf;
+ resolve_frag_full relax frag;
+ lower_frag ~sess ~lsb0 ~buf ~relax ~frag;
+ if Queue.is_empty relax
+ then ()
+ else
+ begin
+ iflog sess (fun _ -> log sess "relaxing");
+ Queue.iter bump_relax relax;
+ loop ()
+ end
+ in
+ loop ()
+
+
+and resolve_frag_full (relax:relaxation Queue.t) (frag:frag)
+ : unit =
+ let file_pos = ref 0 in
+ let mem_pos = ref 0L in
+ let bump i =
+ mem_pos := Int64.add (!mem_pos) (Int64.of_int i);
+ file_pos := (!file_pos) + i
+ in
+
+ let uleb (e:expr64) : unit =
+ let rec loop value =
+ let value = Int64.shift_right_logical value 7 in
+ if value = 0L
+ then bump 1
+ else
+ begin
+ bump 1;
+ loop value
+ end
+ in
+ loop (eval64 e)
+ in
+
+ let sleb (e:expr64) : unit =
+ let rec loop value =
+ let byte = Int64.logand value 0xf7L in
+ let value = Int64.shift_right value 7 in
+ let signbit = Int64.logand byte 0x40L in
+ if (((value = 0L) && (signbit = 0L)) ||
+ ((value = -1L) && (signbit = 0x40L)))
+ then bump 1
+ else
+ begin
+ bump 1;
+ loop value
+ end
+ in
+ loop (eval64 e)
+ in
+ let rec resolve_frag it =
+ match it with
+ | MARK -> ()
+ | SEQ frags -> Array.iter resolve_frag frags
+ | PAD i -> bump i
+ | BSS i -> mem_pos := Int64.add (!mem_pos) i
+ | MEMPOS i -> mem_pos := i
+ | BYTE _ -> bump 1
+ | BYTES ia -> bump (Array.length ia)
+ | CHAR _ -> bump 1
+ | STRING s -> bump (String.length s)
+ | ZSTRING s -> bump ((String.length s) + 1)
+ | ULEB128 e -> uleb e
+ | SLEB128 e -> sleb e
+ | WORD (mach,_) -> bump (bytes_of_ty_mach mach)
+ | ALIGN_FILE (n, frag) ->
+ let spill = (!file_pos) mod n in
+ let pad = (n - spill) mod n in
+ file_pos := (!file_pos) + pad;
+ (*
+ * NB: aligning the file *causes* likewise alignment of
+ * memory, since we implement "file alignment" by
+ * padding!
+ *)
+ mem_pos := Int64.add (!mem_pos) (Int64.of_int pad);
+ resolve_frag frag
+
+ | ALIGN_MEM (n, frag) ->
+ let n64 = Int64.of_int n in
+ let spill = Int64.rem (!mem_pos) n64 in
+ let pad = Int64.rem (Int64.sub n64 spill) n64 in
+ mem_pos := Int64.add (!mem_pos) pad;
+ resolve_frag frag
+
+ | DEF (f, i) ->
+ let fpos1 = !file_pos in
+ let mpos1 = !mem_pos in
+ resolve_frag i;
+ f.fixup_file_pos <- Some fpos1;
+ f.fixup_mem_pos <- Some mpos1;
+ f.fixup_file_sz <- Some ((!file_pos) - fpos1);
+ f.fixup_mem_sz <- Some (Int64.sub (!mem_pos) mpos1)
+
+ | RELAX rel ->
+ begin
+ try
+ resolve_frag rel.relax_options.(!(rel.relax_choice))
+ with
+ Bad_fit _ -> Queue.add rel relax
+ end
+ in
+ resolve_frag frag
+
+and lower_frag
+ ~(sess:Session.sess)
+ ~(lsb0:bool)
+ ~(buf:Buffer.t)
+ ~(relax:relaxation Queue.t)
+ ~(frag:frag)
+ : unit =
+ let byte (i:int) =
+ if i < 0
+ then raise (Bad_fit "byte underflow")
+ else
+ if i > 255
+ then raise (Bad_fit "byte overflow")
+ else Buffer.add_char buf (Char.chr i)
+ in
+
+ let uleb (e:expr64) : unit =
+ let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in
+ let rec loop value =
+ let byte = Int64.logand value 0x7fL in
+ let value = Int64.shift_right_logical value 7 in
+ if value = 0L
+ then emit1 byte
+ else
+ begin
+ emit1 (Int64.logor byte 0x80L);
+ loop value
+ end
+ in
+ loop (eval64 e)
+ in
+
+ let sleb (e:expr64) : unit =
+ let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in
+ let rec loop value =
+ let byte = Int64.logand value 0x7fL in
+ let value = Int64.shift_right value 7 in
+ let signbit = Int64.logand byte 0x40L in
+ if (((value = 0L) && (signbit = 0L)) ||
+ ((value = -1L) && (signbit = 0x40L)))
+ then emit1 byte
+ else
+ begin
+ emit1 (Int64.logor byte 0x80L);
+ loop value
+ end
+ in
+ loop (eval64 e)
+ in
+
+ let word (nbytes:int) (signed:bool) (e:expr64) =
+ let i = eval64 e in
+
+ (*
+ FIXME:
+
+ We should really base the entire assembler and memory-position
+ system on Big_int.big_int, but in ocaml the big_int type lacks,
+ oh, just about every useful function (no format string spec, no
+ bitwise ops, blah blah) so it's useless; we're stuck on int64
+ for bootstrapping.
+
+ For the time being we're just going to require you to represent
+ those few unsigned 64 bit terms you have in mind via their
+ signed bit pattern. Suboptimal but it's the best we can do.
+ *)
+
+ let (top,bot) =
+ if nbytes >= 8
+ then
+ if signed
+ then (Int64.max_int,Int64.min_int)
+ else (Int64.max_int,0L)
+ else
+ if signed
+ then
+ let bound = (Int64.shift_left 1L ((8 * nbytes) - 1)) in
+ (Int64.sub bound 1L, Int64.neg bound)
+ else
+ let bound = (Int64.shift_left 1L (8 * nbytes)) in
+ (Int64.sub bound 1L, 0L)
+ in
+
+ let mask1 = Int64.logand 0xffL in
+ let shift = Int64.shift_right_logical in
+ let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in
+ if Int64.compare i bot = (-1)
+ then raise (Bad_fit ("word underflow: "
+ ^ (Int64.to_string i)
+ ^ " into "
+ ^ (string_of_int nbytes)
+ ^ (if signed then " signed" else " unsigned")
+ ^ " bytes"))
+ else
+ if Int64.compare i top = 1
+ then raise (Bad_fit ("word overflow: "
+ ^ (Int64.to_string i)
+ ^ " into "
+ ^ (string_of_int nbytes)
+ ^ (if signed then " signed" else " unsigned")
+ ^ " bytes"))
+ else
+ if lsb0
+ then
+ for n = 0 to (nbytes - 1) do
+ emit1 (mask1 (shift i (8*n)))
+ done
+ else
+ for n = (nbytes - 1) downto 0 do
+ emit1 (mask1 (shift i (8*n)))
+ done
+ in
+ match frag with
+ MARK -> ()
+
+ | SEQ frags ->
+ Array.iter
+ begin
+ fun frag ->
+ lower_frag ~sess ~lsb0 ~buf ~relax ~frag
+ end frags
+
+ | PAD c ->
+ for i = 1 to c do
+ Buffer.add_char buf '\x00'
+ done
+
+ | BSS _ -> ()
+
+ | MEMPOS _ -> ()
+
+ | BYTE i -> byte i
+
+ | BYTES bs ->
+ iflog sess (fun _ -> log sess "lowering %d bytes"
+ (Array.length bs));
+ Array.iter byte bs
+
+ | CHAR c ->
+ iflog sess (fun _ -> log sess "lowering char: %c" c);
+ Buffer.add_char buf c
+
+ | STRING s ->
+ iflog sess (fun _ -> log sess "lowering string: %s" s);
+ Buffer.add_string buf s
+
+ | ZSTRING s ->
+ iflog sess (fun _ -> log sess "lowering zstring: %s" s);
+ Buffer.add_string buf s;
+ byte 0
+
+ | ULEB128 e -> uleb e
+ | SLEB128 e -> sleb e
+
+ | WORD (m,e) ->
+ iflog sess
+ (fun _ ->
+ log sess "lowering word %s"
+ (string_of_ty_mach m));
+ word (bytes_of_ty_mach m) (mach_is_signed m) e
+
+ | ALIGN_FILE (n, frag) ->
+ let spill = (Buffer.length buf) mod n in
+ let pad = (n - spill) mod n in
+ for i = 1 to pad do
+ Buffer.add_char buf '\x00'
+ done;
+ lower_frag sess lsb0 buf relax frag
+
+ | ALIGN_MEM (_, i) -> lower_frag sess lsb0 buf relax i
+ | DEF (f, i) ->
+ iflog sess (fun _ -> log sess "lowering fixup: %s" f.fixup_name);
+ lower_frag sess lsb0 buf relax i;
+
+ | RELAX rel ->
+ begin
+ try
+ lower_frag sess lsb0 buf relax
+ rel.relax_options.(!(rel.relax_choice))
+ with
+ Bad_fit _ -> Queue.add rel relax
+ end
+;;
+
+let fold_flags (f:'a -> int64) (flags:'a list) : int64 =
+ List.fold_left (Int64.logor) 0x0L (List.map f flags)
+;;
+
+let write_out_frag sess lsb0 frag =
+ let buf = Buffer.create 0xffff in
+ let file = Session.filename_of sess.Session.sess_out in
+ let out = open_out_bin file in
+ write_frag ~sess ~lsb0 ~buf ~frag;
+ Buffer.output_buffer out buf;
+ flush out;
+ close_out out;
+ Unix.chmod file 0o755
+;;
+
+(* Asm-reader stuff for loading info back from mapped files. *)
+(*
+ * Unfortunately the ocaml Bigarray interface takes 'int' indices, so
+ * f.e. can't do 64-bit offsets / files when running on a 32bit platform.
+ * Despite the fact that we can possibly produce them. Sigh. Yet another
+ * "bootstrap compiler limitation".
+ *)
+type asm_reader =
+ {
+ asm_seek: int -> unit;
+ asm_get_u32: unit -> int;
+ asm_get_u16: unit -> int;
+ asm_get_u8: unit -> int;
+ asm_get_uleb: unit -> int;
+ asm_get_zstr: unit -> string;
+ asm_get_zstr_padded: int -> string;
+ asm_get_off: unit -> int;
+ asm_adv: int -> unit;
+ asm_adv_u32: unit -> unit;
+ asm_adv_u16: unit -> unit;
+ asm_adv_u8: unit -> unit;
+ asm_adv_zstr: unit -> unit;
+ asm_close: unit -> unit;
+ }
+;;
+
+type mmap_arr =
+ (int, Bigarray.int8_unsigned_elt, Bigarray.c_layout)
+ Bigarray.Array1.t
+;;
+
+let new_asm_reader (sess:Session.sess) (s:filename) : asm_reader =
+ iflog sess (fun _ -> log sess "opening file %s" s);
+ let fd = Unix.openfile s [ Unix.O_RDONLY ] 0 in
+ let arr = (Bigarray.Array1.map_file
+ fd ~pos:0L
+ Bigarray.int8_unsigned
+ Bigarray.c_layout
+ false (-1))
+ in
+ let tmp = ref Nativeint.zero in
+ let buf = Buffer.create 16 in
+ let off = ref 0 in
+ let is_open = ref true in
+ let get_word_as_int (nbytes:int) : int =
+ assert (!is_open);
+ let lsb0 = true in
+ tmp := Nativeint.zero;
+ if lsb0
+ then
+ for j = nbytes-1 downto 0 do
+ tmp := Nativeint.shift_left (!tmp) 8;
+ tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j})
+ done
+ else
+ for j = 0 to nbytes-1 do
+ tmp := Nativeint.shift_left (!tmp) 8;
+ tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j})
+ done;
+ off := (!off) + nbytes;
+ Nativeint.to_int (!tmp)
+ in
+ let get_zstr_padded pad_opt =
+ assert (!is_open);
+ let i = ref (!off) in
+ Buffer.clear buf;
+ let buflen_ok _ =
+ match pad_opt with
+ None -> true
+ | Some pad -> (Buffer.length buf) < pad
+ in
+ while arr.{!i} != 0 && (buflen_ok()) do
+ Buffer.add_char buf (Char.chr arr.{!i});
+ incr i
+ done;
+ begin
+ match pad_opt with
+ None -> off := (!off) + (Buffer.length buf) + 1
+ | Some pad ->
+ begin
+ assert ((Buffer.length buf) <= pad);
+ off := (!off) + pad
+ end
+ end;
+ Buffer.contents buf
+ in
+ let bump i =
+ assert (!is_open);
+ off := (!off) + i
+ in
+ {
+ asm_seek = (fun i -> off := i);
+ asm_get_u32 = (fun _ -> get_word_as_int 4);
+ asm_get_u16 = (fun _ -> get_word_as_int 2);
+ asm_get_u8 = (fun _ -> get_word_as_int 1);
+ asm_get_uleb =
+ begin
+ fun _ ->
+ let rec loop result shift =
+ let byte = arr.{!off} in
+ incr off;
+ let result = result lor ((byte land 0x7f) lsl shift) in
+ if (byte land 0x80) = 0
+ then result
+ else loop result (shift+7)
+ in
+ loop 0 0
+ end;
+ asm_get_zstr = (fun _ -> get_zstr_padded None);
+ asm_get_zstr_padded = (fun pad -> get_zstr_padded (Some pad));
+ asm_get_off = (fun _ -> !off);
+ asm_adv = bump;
+ asm_adv_u32 = (fun _ -> bump 4);
+ asm_adv_u16 = (fun _ -> bump 2);
+ asm_adv_u8 = (fun _ -> bump 1);
+ asm_adv_zstr = (fun _ -> while arr.{!off} != 0
+ do incr off done);
+ asm_close = (fun _ ->
+ assert (!is_open);
+ Unix.close fd;
+ is_open := false)
+ }
+;;
+
+
+(*
+ * Metadata note-section encoding / decoding.
+ *
+ * Since the only object format that defines a "note" section at all is
+ * ELF, we model the contents of the metadata section on ELF's
+ * notes. But the same blob of data is stuck into PE and Mach-O files
+ * too.
+ *
+ * The format is essentially just the ELF note format:
+ *
+ * <un-padded-size-of-name:u32>
+ * <size-of-desc:u32>
+ * <type-code=0:u32>
+ * <name="rust":zstr>
+ * <0-pad to 4-byte boundary>
+ * <n=meta-count:u32>
+ * <k1:zstr> <v1:zstr>
+ * ...
+ * <kn:zstr> <vn:zstr>
+ * <0-pad to 4-byte boundary>
+ *
+ *)
+let note_rust_frags (meta:(Ast.ident * string) array) : frag =
+ let desc_fixup = new_fixup ".rust.note metadata" in
+ let desc =
+ DEF (desc_fixup,
+ SEQ [|
+ WORD (TY_u32, IMM (Int64.of_int (Array.length meta)));
+ SEQ (Array.map
+ (fun (k,v) -> SEQ [| ZSTRING k; ZSTRING v; |])
+ meta);
+ ALIGN_FILE (4, MARK) |])
+ in
+ let name = "rust" in
+ let ty = 0L in
+ let padded_name = SEQ [| ZSTRING name;
+ ALIGN_FILE (4, MARK) |]
+ in
+ let name_sz = IMM (Int64.of_int ((String.length name) + 1)) in
+ SEQ [| WORD (TY_u32, name_sz);
+ WORD (TY_u32, F_SZ desc_fixup);
+ WORD (TY_u32, IMM ty);
+ padded_name;
+ desc;|]
+;;
+
+let read_rust_note (ar:asm_reader) : (Ast.ident * string) array =
+ ar.asm_adv_u32 ();
+ ar.asm_adv_u32 ();
+ assert ((ar.asm_get_u32 ()) = 0);
+ let rust_name = ar.asm_get_zstr_padded 8 in
+ assert (rust_name = "rust");
+ let n = ar.asm_get_u32() in
+ let meta = Queue.create () in
+ for i = 1 to n
+ do
+ let k = ar.asm_get_zstr() in
+ let v = ar.asm_get_zstr() in
+ Queue.add (k,v) meta
+ done;
+ queue_to_arr meta
+;;
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)
diff --git a/src/boot/be/elf.ml b/src/boot/be/elf.ml
new file mode 100644
index 00000000..56905b2a
--- /dev/null
+++ b/src/boot/be/elf.ml
@@ -0,0 +1,1760 @@
+(*
+ * Module for writing System V ELF files.
+ *
+ * FIXME: Presently heavily infected with x86 and elf32 specificities,
+ * though they are reasonably well marked. Needs to be refactored to
+ * depend on abi fields if it's to be usable for other elf
+ * configurations.
+ *)
+
+open Asm;;
+open Common;;
+
+let log (sess:Session.sess) =
+ Session.log "obj (elf)"
+ sess.Session.sess_log_obj
+ sess.Session.sess_log_out
+;;
+
+let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit =
+ if sess.Session.sess_log_obj
+ then thunk ()
+ else ()
+;;
+
+
+(* Fixed sizes of structs involved in elf32 spec. *)
+let elf32_ehsize = 52L;;
+let elf32_phentsize = 32L;;
+let elf32_shentsize = 40L;;
+let elf32_symsize = 16L;;
+let elf32_rela_entsz = 0xcL;;
+
+type ei_class =
+ ELFCLASSNONE
+ | ELFCLASS32
+ | ELFCLASS64
+;;
+
+
+type ei_data =
+ ELFDATANONE
+ | ELFDATA2LSB
+ | ELFDATA2MSB
+;;
+
+
+let elf_identification ei_class ei_data =
+ SEQ
+ [|
+ STRING "\x7fELF";
+ BYTES
+ [|
+ (match ei_class with (* EI_CLASS *)
+ ELFCLASSNONE -> 0
+ | ELFCLASS32 -> 1
+ | ELFCLASS64 -> 2);
+ (match ei_data with (* EI_DATA *)
+ ELFDATANONE -> 0
+ | ELFDATA2LSB -> 1
+ | ELFDATA2MSB -> 2);
+ 1; (* EI_VERSION = EV_CURRENT *)
+ 0; (* EI_PAD #7 *)
+ 0; (* EI_PAD #8 *)
+ 0; (* EI_PAD #9 *)
+ 0; (* EI_PAD #A *)
+ 0; (* EI_PAD #B *)
+ 0; (* EI_PAD #C *)
+ 0; (* EI_PAD #D *)
+ 0; (* EI_PAD #E *)
+ 0; (* EI_PAD #F *)
+ |]
+ |]
+;;
+
+
+type e_type =
+ ET_NONE
+ | ET_REL
+ | ET_EXEC
+ | ET_DYN
+ | ET_CORE
+;;
+
+
+type e_machine =
+ (* Maybe support more later. *)
+ EM_NONE
+ | EM_386
+ | EM_X86_64
+;;
+
+
+type e_version =
+ EV_NONE
+ | EV_CURRENT
+;;
+
+
+let elf32_header
+ ~(sess:Session.sess)
+ ~(ei_data:ei_data)
+ ~(e_type:e_type)
+ ~(e_machine:e_machine)
+ ~(e_version:e_version)
+ ~(e_entry_fixup:fixup)
+ ~(e_phoff_fixup:fixup)
+ ~(e_shoff_fixup:fixup)
+ ~(e_phnum:int64)
+ ~(e_shnum:int64)
+ ~(e_shstrndx:int64)
+ : frag =
+ let elf_header_fixup = new_fixup "elf header" in
+ let entry_pos =
+ if sess.Session.sess_library_mode
+ then (IMM 0L)
+ else (M_POS e_entry_fixup)
+ in
+ DEF
+ (elf_header_fixup,
+ SEQ [| elf_identification ELFCLASS32 ei_data;
+ WORD (TY_u16, (IMM (match e_type with
+ ET_NONE -> 0L
+ | ET_REL -> 1L
+ | ET_EXEC -> 2L
+ | ET_DYN -> 3L
+ | ET_CORE -> 4L)));
+ WORD (TY_u16, (IMM (match e_machine with
+ EM_NONE -> 0L
+ | EM_386 -> 3L
+ | EM_X86_64 -> 62L)));
+ WORD (TY_u32, (IMM (match e_version with
+ EV_NONE -> 0L
+ | EV_CURRENT -> 1L)));
+ WORD (TY_u32, entry_pos);
+ WORD (TY_u32, (F_POS e_phoff_fixup));
+ WORD (TY_u32, (F_POS e_shoff_fixup));
+ WORD (TY_u32, (IMM 0L)); (* e_flags *)
+ WORD (TY_u16, (IMM elf32_ehsize));
+ WORD (TY_u16, (IMM elf32_phentsize));
+ WORD (TY_u16, (IMM e_phnum));
+ WORD (TY_u16, (IMM elf32_shentsize));
+ WORD (TY_u16, (IMM e_shnum));
+ WORD (TY_u16, (IMM e_shstrndx));
+ |])
+;;
+
+
+type sh_type =
+ SHT_NULL
+ | SHT_PROGBITS
+ | SHT_SYMTAB
+ | SHT_STRTAB
+ | SHT_RELA
+ | SHT_HASH
+ | SHT_DYNAMIC
+ | SHT_NOTE
+ | SHT_NOBITS
+ | SHT_REL
+ | SHT_SHLIB
+ | SHT_DYNSYM
+;;
+
+
+type sh_flags =
+ SHF_WRITE
+ | SHF_ALLOC
+ | SHF_EXECINSTR
+;;
+
+
+let section_header
+ ~(shstring_table_fixup:fixup)
+ ~(shname_string_fixup:fixup)
+ ~(sh_type:sh_type)
+ ~(sh_flags:sh_flags list)
+ ~(section_fixup:fixup option)
+ ~(sh_addralign:int64)
+ ~(sh_entsize:int64)
+ ~(sh_link:int64 option)
+ : frag =
+ SEQ
+ [|
+ WORD (TY_i32, (SUB
+ ((F_POS shname_string_fixup),
+ (F_POS shstring_table_fixup))));
+ WORD (TY_u32, (IMM (match sh_type with
+ SHT_NULL -> 0L
+ | SHT_PROGBITS -> 1L
+ | SHT_SYMTAB -> 2L
+ | SHT_STRTAB -> 3L
+ | SHT_RELA -> 4L
+ | SHT_HASH -> 5L
+ | SHT_DYNAMIC -> 6L
+ | SHT_NOTE -> 7L
+ | SHT_NOBITS -> 8L
+ | SHT_REL -> 9L
+ | SHT_SHLIB -> 10L
+ | SHT_DYNSYM -> 11L)));
+ WORD (TY_u32, (IMM (fold_flags
+ (fun f -> match f with
+ SHF_WRITE -> 0x1L
+ | SHF_ALLOC -> 0x2L
+ | SHF_EXECINSTR -> 0x4L) sh_flags)));
+ WORD (TY_u32, (match section_fixup with
+ None -> (IMM 0L)
+ | Some s -> (M_POS s)));
+ WORD (TY_u32, (match section_fixup with
+ None -> (IMM 0L)
+ | Some s -> (F_POS s)));
+ WORD (TY_u32, (match section_fixup with
+ None -> (IMM 0L)
+ | Some s -> (F_SZ s)));
+ WORD (TY_u32, (IMM (match sh_link with
+ None -> 0L
+ | Some i -> i)));
+ WORD (TY_u32, (IMM 0L)); (* sh_info *)
+ WORD (TY_u32, (IMM sh_addralign));
+ WORD (TY_u32, (IMM sh_entsize));
+ |]
+;;
+
+
+type p_type =
+ PT_NULL
+ | PT_LOAD
+ | PT_DYNAMIC
+ | PT_INTERP
+ | PT_NOTE
+ | PT_SHLIB
+ | PT_PHDR
+;;
+
+
+type p_flag =
+ PF_X
+ | PF_W
+ | PF_R
+;;
+
+
+let program_header
+ ~(p_type:p_type)
+ ~(segment_fixup:fixup)
+ ~(p_flags:p_flag list)
+ ~(p_align:int64)
+ : frag =
+ SEQ
+ [|
+ WORD (TY_u32, (IMM (match p_type with
+ PT_NULL -> 0L
+ | PT_LOAD -> 1L
+ | PT_DYNAMIC -> 2L
+ | PT_INTERP -> 3L
+ | PT_NOTE -> 4L
+ | PT_SHLIB -> 5L
+ | PT_PHDR -> 6L)));
+ WORD (TY_u32, (F_POS segment_fixup));
+ WORD (TY_u32, (M_POS segment_fixup));
+ WORD (TY_u32, (M_POS segment_fixup));
+ WORD (TY_u32, (F_SZ segment_fixup));
+ WORD (TY_u32, (M_SZ segment_fixup));
+ WORD (TY_u32, (IMM (fold_flags
+ (fun f ->
+ match f with
+ PF_X -> 0x1L
+ | PF_W -> 0x2L
+ | PF_R -> 0x4L)
+ p_flags)));
+ WORD (TY_u32, (IMM p_align));
+ |]
+;;
+
+
+type st_bind =
+ STB_LOCAL
+ | STB_GLOBAL
+ | STB_WEAK
+;;
+
+
+type st_type =
+ STT_NOTYPE
+ | STT_OBJECT
+ | STT_FUNC
+ | STT_SECTION
+ | STT_FILE
+;;
+
+
+(* Special symbol-section indices *)
+let shn_UNDEF = 0L;;
+let shn_ABS = 0xfff1L;;
+let shn_ABS = 0xfff2L;;
+
+
+let symbol
+ ~(string_table_fixup:fixup)
+ ~(name_string_fixup:fixup)
+ ~(sym_target_fixup:fixup option)
+ ~(st_bind:st_bind)
+ ~(st_type:st_type)
+ ~(st_shndx:int64)
+ : frag =
+ let st_bind_num =
+ match st_bind with
+ STB_LOCAL -> 0L
+ | STB_GLOBAL -> 1L
+ | STB_WEAK -> 2L
+ in
+ let st_type_num =
+ match st_type with
+ STT_NOTYPE -> 0L
+ | STT_OBJECT -> 1L
+ | STT_FUNC -> 2L
+ | STT_SECTION -> 3L
+ | STT_FILE -> 4L
+ in
+ SEQ
+ [|
+ WORD (TY_u32, (SUB
+ ((F_POS name_string_fixup),
+ (F_POS string_table_fixup))));
+ WORD (TY_u32, (match sym_target_fixup with
+ None -> (IMM 0L)
+ | Some f -> (M_POS f)));
+ WORD (TY_u32, (match sym_target_fixup with
+ None -> (IMM 0L)
+ | Some f -> (M_SZ f)));
+ WORD (TY_u8, (* st_info *)
+ (OR
+ ((SLL ((IMM st_bind_num), 4)),
+ (AND ((IMM st_type_num), (IMM 0xfL))))));
+ WORD (TY_u8, (IMM 0L)); (* st_other *)
+ WORD (TY_u16, (IMM st_shndx));
+ |]
+;;
+
+type d_tag =
+ DT_NULL
+ | DT_NEEDED
+ | DT_PLTRELSZ
+ | DT_PLTGOT
+ | DT_HASH
+ | DT_STRTAB
+ | DT_SYMTAB
+ | DT_RELA
+ | DT_RELASZ
+ | DT_RELAENT
+ | DT_STRSZ
+ | DT_SYMENT
+ | DT_INIT
+ | DT_FINI
+ | DT_SONAME
+ | DT_RPATH
+ | DT_SYMBOLIC
+ | DT_REL
+ | DT_RELSZ
+ | DT_RELENT
+ | DT_PLTREL
+ | DT_DEBUG
+ | DT_TEXTREL
+ | DT_JMPREL
+ | DT_BIND_NOW
+ | DT_INIT_ARRAY
+ | DT_FINI_ARRAY
+ | DT_INIT_ARRAYSZ
+ | DT_FINI_ARRAYSZ
+ | DT_RUNPATH
+ | DT_FLAGS
+ | DT_ENCODING
+ | DT_PREINIT_ARRAY
+ | DT_PREINIT_ARRAYSZ
+;;
+
+type elf32_dyn = (d_tag * expr64);;
+
+let elf32_num_of_dyn_tag tag =
+ match tag with
+ DT_NULL -> 0L
+ | DT_NEEDED -> 1L
+ | DT_PLTRELSZ -> 2L
+ | DT_PLTGOT -> 3L
+ | DT_HASH -> 4L
+ | DT_STRTAB -> 5L
+ | DT_SYMTAB -> 6L
+ | DT_RELA -> 7L
+ | DT_RELASZ -> 8L
+ | DT_RELAENT -> 9L
+ | DT_STRSZ -> 10L
+ | DT_SYMENT -> 11L
+ | DT_INIT -> 12L
+ | DT_FINI -> 13L
+ | DT_SONAME -> 14L
+ | DT_RPATH -> 15L
+ | DT_SYMBOLIC -> 16L
+ | DT_REL -> 17L
+ | DT_RELSZ -> 18L
+ | DT_RELENT -> 19L
+ | DT_PLTREL -> 20L
+ | DT_DEBUG -> 21L
+ | DT_TEXTREL -> 22L
+ | DT_JMPREL -> 23L
+ | DT_BIND_NOW -> 24L
+ | DT_INIT_ARRAY -> 25L
+ | DT_FINI_ARRAY -> 26L
+ | DT_INIT_ARRAYSZ -> 27L
+ | DT_FINI_ARRAYSZ -> 28L
+ | DT_RUNPATH -> 29L
+ | DT_FLAGS -> 30L
+ | DT_ENCODING -> 31L
+ | DT_PREINIT_ARRAY -> 32L
+ | DT_PREINIT_ARRAYSZ -> 33L
+;;
+
+let elf32_dyn_frag d =
+ let (tag, expr) = d in
+ let tagval = elf32_num_of_dyn_tag tag in
+ SEQ [| WORD (TY_u32, (IMM tagval)); WORD (TY_u32, expr) |]
+;;
+
+type elf32_386_reloc_type =
+ R_386_NONE
+ | R_386_32
+ | R_386_PC32
+ | R_386_GOT32
+ | R_386_PLT32
+ | R_386_COPY
+ | R_386_GLOB_DAT
+ | R_386_JMP_SLOT
+ | R_386_RELATIVE
+ | R_386_GOTOFF
+ | R_386_GOTPC
+;;
+
+
+type elf32_386_rela =
+ { elf32_386_rela_type: elf32_386_reloc_type;
+ elf32_386_rela_offset: expr64;
+ elf32_386_rela_sym: expr64;
+ elf32_386_rela_addend: expr64 }
+;;
+
+let elf32_386_rela_frag r =
+ let type_val =
+ match r.elf32_386_rela_type with
+ R_386_NONE -> 0L
+ | R_386_32 -> 1L
+ | R_386_PC32 -> 2L
+ | R_386_GOT32 -> 3L
+ | R_386_PLT32 -> 4L
+ | R_386_COPY -> 5L
+ | R_386_GLOB_DAT -> 6L
+ | R_386_JMP_SLOT -> 7L
+ | R_386_RELATIVE -> 8L
+ | R_386_GOTOFF -> 9L
+ | R_386_GOTPC -> 10L
+ in
+ let info_expr =
+ WORD (TY_u32,
+ (OR
+ (SLL ((r.elf32_386_rela_sym), 8),
+ AND ((IMM 0xffL), (IMM type_val)))))
+ in
+ SEQ [| WORD (TY_u32, r.elf32_386_rela_offset);
+ info_expr;
+ WORD (TY_u32, r.elf32_386_rela_addend) |]
+;;
+
+
+let elf32_linux_x86_file
+ ~(sess:Session.sess)
+ ~(crate:Ast.crate)
+ ~(entry_name:string)
+ ~(text_frags:(string option, frag) Hashtbl.t)
+ ~(data_frags:(string option, frag) Hashtbl.t)
+ ~(rodata_frags:(string option, frag) Hashtbl.t)
+ ~(required_fixups:(string, fixup) Hashtbl.t)
+ ~(dwarf:Dwarf.debug_records)
+ ~(sem:Semant.ctxt)
+ ~(needed_libs:string array)
+ : frag =
+
+ (* Procedure Linkage Tables (PLTs), Global Offset Tables
+ * (GOTs), and the relocations that set them up:
+ *
+ * The PLT goes in a section called .plt and GOT in a section called
+ * .got. The portion of the GOT that holds PLT jump slots goes in a
+ * section called .got.plt. Dynamic relocations for these jump slots go in
+ * section .rela.plt.
+ *
+ * The easiest way to understand the PLT/GOT system is to draw it:
+ *
+ * PLT GOT
+ * +----------------------+ +----------------------+
+ * 0| push &<GOT[1]> 0| <reserved>
+ * | jmp *GOT[2] 1| <libcookie>
+ * | 2| & <ld.so:resolve-a-sym>
+ * 1| jmp *GOT[3] 3| & <'push 0' in PLT[1]>
+ * | push 0 4| & <'push 1' in PLT[2]>
+ * | jmp *PLT[0] 5| & <'push 2' in PLT[3]>
+ * |
+ * 2| jmp *GOT[4]
+ * | push 1
+ * | jmp *PLT[0]
+ * |
+ * 2| jmp *GOT[5]
+ * | push 2
+ * | jmp *PLT[0]
+ *
+ *
+ * In normal user code, we call PLT entries with a call to a
+ * PC-relative address, the PLT entry, which itself does an indirect
+ * jump through a slot in the GOT that it also addresses
+ * PC-relative. This makes the whole scheme PIC.
+ *
+ * The linker fills in the GOT on startup. For the first 3, it uses
+ * its own thinking. For the remainder it needs to be instructed to
+ * fill them in with "jump slot relocs", type R_386_JUMP_SLOT, each
+ * of which says in effect which PLT entry it's to point back to and
+ * which symbol it's to be resolved to later. These relocs go in the
+ * section .rela.plt.
+ *)
+
+ let plt0_fixup = new_fixup "PLT[0]" in
+ let got_prefix = SEQ [| WORD (TY_u32, (IMM 0L));
+ WORD (TY_u32, (IMM 0L));
+ WORD (TY_u32, (IMM 0L)); |]
+ in
+
+ let got_cell reg i =
+ let got_entry_off = Int64.of_int (i*4) in
+ let got_entry_mem = Il.RegIn (reg, (Some (Asm.IMM got_entry_off))) in
+ Il.Mem (got_entry_mem, Il.ScalarTy (Il.AddrTy Il.CodeTy))
+ in
+
+ let got_code_cell reg i =
+ Il.CodePtr (Il.Cell (got_cell reg i))
+ in
+
+ let plt0_frag =
+ let reg = Il.Hreg X86.eax in
+ let e = X86.new_emitter_without_vregs () in
+ Il.emit e (Il.Push (Il.Cell (got_cell reg 1)));
+ Il.emit e (Il.jmp Il.JMP (got_code_cell reg 2));
+ Il.emit e Il.Nop;
+ Il.emit e Il.Nop;
+ Il.emit e Il.Nop;
+ Il.emit e Il.Nop;
+ DEF (plt0_fixup, (X86.frags_of_emitted_quads sess e))
+ in
+
+ (*
+ * The existence of the GOT/PLT mish-mash causes, therefore, the
+ * following new sections:
+ *
+ * .plt - the PLT itself, in the r/x text segment
+ * .got.plt - the PLT-used portion of the GOT, in the r/w segment
+ * .rela.plt - the dynamic relocs for the GOT-PLT, in the r/x segment
+ *
+ * In addition, because we're starting up a dynamically linked executable,
+ * we have to have several more sections!
+ *
+ * .interp - the read-only section that names ld.so
+ * .dynsym - symbols named by the PLT/GOT entries, r/x segment
+ * .dynstr - string-names used in those symbols, r/x segment
+ * .hash - hashtable in which to look these up, r/x segment
+ * .dynamic - the machine-readable description of the dynamic
+ * linkage requirements of this elf file, in the
+ * r/w _DYNAMIC segment
+ *
+ * The Dynamic section contains a sequence of 2-word records of type
+ * d_tag.
+ *
+ *)
+
+ (* There are 17 official section headers in the file we're making: *)
+ (* *)
+ (* section 0: <null section> *)
+ (* *)
+ (* section 1: .interp (segment 1: R+X, INTERP) *)
+ (* *)
+ (* section 2: .text (segment 2: R+X, LOAD) *)
+ (* section 3: .rodata ... *)
+ (* section 4: .dynsym ... *)
+ (* section 5: .dynstr ... *)
+ (* section 6: .hash ... *)
+ (* section 7: .plt ... *)
+ (* section 8: .got ... *)
+ (* section 9: .rela.plt ... *)
+ (* *)
+ (* section 10: .data (segment 3: R+W, LOAD) *)
+ (* section 11: .bss ... *)
+ (* *)
+ (* section 12: .dynamic (segment 4: R+W, DYNAMIC) *)
+ (* *)
+ (* section 13: .shstrtab (not in a segment) *)
+ (* section 14: .debug_aranges (segment 2: cont'd) *)
+ (* section 15: .debug_pubnames ... *)
+ (* section 14: .debug_info ... *)
+ (* section 15: .debug_abbrev ... *)
+ (* section 14: .debug_line ... *)
+ (* section 15: .debug_frame ... *)
+ (* section 16: .note..rust (segment 5: NOTE) *)
+
+ let sname s =
+ new_fixup (Printf.sprintf "string name of '%s' section" s)
+ in
+ let null_section_name_fixup = sname "<null>" in
+ let interp_section_name_fixup = sname ".interp"in
+ let text_section_name_fixup = sname ".text" in
+ let rodata_section_name_fixup = sname ".rodata" in
+ let dynsym_section_name_fixup = sname ".dynsym" in
+ let dynstr_section_name_fixup = sname ".dynstr" in
+ let hash_section_name_fixup = sname ".hash" in
+ let plt_section_name_fixup = sname ".plt" in
+ let got_plt_section_name_fixup = sname ".got.plt" in
+ let rela_plt_section_name_fixup = sname ".rela.plt" in
+ let data_section_name_fixup = sname ".data" in
+ let bss_section_name_fixup = sname ".bss" in
+ let dynamic_section_name_fixup = sname ".dynamic" in
+ let shstrtab_section_name_fixup = sname ".shstrtab" in
+ let debug_aranges_section_name_fixup = sname ".debug_aranges" in
+ let debug_pubnames_section_name_fixup = sname ".debug_pubnames" in
+ let debug_info_section_name_fixup = sname ".debug_info" in
+ let debug_abbrev_section_name_fixup = sname ".debug_abbrev" in
+ let debug_line_section_name_fixup = sname ".debug_line" in
+ let debug_frame_section_name_fixup = sname ".debug_frame" in
+ let note_rust_section_name_fixup = sname ".note.rust" in
+
+ (* let interpndx = 1L in *) (* Section index of .interp *)
+ let textndx = 2L in (* Section index of .text *)
+ let rodatandx = 3L in (* Section index of .rodata *)
+ let dynsymndx = 4L in (* Section index of .dynsym *)
+ let dynstrndx = 5L in (* Section index of .dynstr *)
+ (* let hashndx = 6L in *) (* Section index of .hash *)
+ (* let pltndx = 7L in *) (* Section index of .plt *)
+ (* let gotpltndx = 8L in *) (* Section index of .got.plt *)
+ (* let relapltndx = 9L in *) (* Section index of .rela.plt *)
+ let datandx = 10L in (* Section index of .data *)
+ (* let bssndx = 11L in *) (* Section index of .bss *)
+ (* let dynamicndx = 12L in *) (* Section index of .dynamic *)
+ let shstrtabndx = 13L in (* Section index of .shstrtab *)
+
+ let section_header_table_fixup = new_fixup ".section header table" in
+ let interp_section_fixup = new_fixup ".interp section" in
+ let text_section_fixup = new_fixup ".text section" in
+ let rodata_section_fixup = new_fixup ".rodata section" in
+ let dynsym_section_fixup = new_fixup ".dynsym section" in
+ let dynstr_section_fixup = new_fixup ".dynstr section" in
+ let hash_section_fixup = new_fixup ".hash section" in
+ let plt_section_fixup = new_fixup ".plt section" in
+ let got_plt_section_fixup = new_fixup ".got.plt section" in
+ let rela_plt_section_fixup = new_fixup ".rela.plt section" in
+ let data_section_fixup = new_fixup ".data section" in
+ let bss_section_fixup = new_fixup ".bss section" in
+ let dynamic_section_fixup = new_fixup ".dynamic section" in
+ let shstrtab_section_fixup = new_fixup ".shstrtab section" in
+ let note_rust_section_fixup = new_fixup ".shstrtab section" in
+
+ let shstrtab_section =
+ SEQ
+ [|
+ DEF (null_section_name_fixup, ZSTRING "");
+ DEF (interp_section_name_fixup, ZSTRING ".interp");
+ DEF (text_section_name_fixup, ZSTRING ".text");
+ DEF (rodata_section_name_fixup, ZSTRING ".rodata");
+ DEF (dynsym_section_name_fixup, ZSTRING ".dynsym");
+ DEF (dynstr_section_name_fixup, ZSTRING ".dynstr");
+ DEF (hash_section_name_fixup, ZSTRING ".hash");
+ DEF (plt_section_name_fixup, ZSTRING ".plt");
+ DEF (got_plt_section_name_fixup, ZSTRING ".got.plt");
+ DEF (rela_plt_section_name_fixup, ZSTRING ".rela.plt");
+ DEF (data_section_name_fixup, ZSTRING ".data");
+ DEF (bss_section_name_fixup, ZSTRING ".bss");
+ DEF (dynamic_section_name_fixup, ZSTRING ".dynamic");
+ DEF (shstrtab_section_name_fixup, ZSTRING ".shstrtab");
+ DEF (debug_aranges_section_name_fixup, ZSTRING ".debug_aranges");
+ DEF (debug_pubnames_section_name_fixup, ZSTRING ".debug_pubnames");
+ DEF (debug_info_section_name_fixup, ZSTRING ".debug_info");
+ DEF (debug_abbrev_section_name_fixup, ZSTRING ".debug_abbrev");
+ DEF (debug_line_section_name_fixup, ZSTRING ".debug_line");
+ DEF (debug_frame_section_name_fixup, ZSTRING ".debug_frame");
+ DEF (note_rust_section_name_fixup, ZSTRING ".note.rust");
+ |]
+ in
+
+ let section_headers =
+ [|
+ (* <null> *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: null_section_name_fixup
+ ~sh_type: SHT_NULL
+ ~sh_flags: []
+ ~section_fixup: None
+ ~sh_addralign: 0L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .interp *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: interp_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: [ SHF_ALLOC ]
+ ~section_fixup: (Some interp_section_fixup)
+ ~sh_addralign: 1L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .text *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: text_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ]
+ ~section_fixup: (Some text_section_fixup)
+ ~sh_addralign: 32L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .rodata *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: rodata_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: [ SHF_ALLOC ]
+ ~section_fixup: (Some rodata_section_fixup)
+ ~sh_addralign: 32L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .dynsym *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: dynsym_section_name_fixup
+ ~sh_type: SHT_DYNSYM
+ ~sh_flags: [ SHF_ALLOC ]
+ ~section_fixup: (Some dynsym_section_fixup)
+ ~sh_addralign: 8L
+ ~sh_entsize: elf32_symsize
+ ~sh_link: (Some dynstrndx) );
+
+ (* .dynstr *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: dynstr_section_name_fixup
+ ~sh_type: SHT_STRTAB
+ ~sh_flags: [ SHF_ALLOC ]
+ ~section_fixup: (Some dynstr_section_fixup)
+ ~sh_addralign: 1L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .hash *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: hash_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: [ SHF_ALLOC ]
+ ~section_fixup: (Some hash_section_fixup)
+ ~sh_addralign: 4L
+ ~sh_entsize: 4L
+ ~sh_link: (Some dynsymndx));
+
+ (* .plt *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: plt_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ]
+ ~section_fixup: (Some plt_section_fixup)
+ ~sh_addralign: 4L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .got.plt *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: got_plt_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: [ SHF_ALLOC; SHF_WRITE ]
+ ~section_fixup: (Some got_plt_section_fixup)
+ ~sh_addralign: 4L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .rela.plt *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: rela_plt_section_name_fixup
+ ~sh_type: SHT_RELA
+ ~sh_flags: [ SHF_ALLOC ]
+ ~section_fixup: (Some rela_plt_section_fixup)
+ ~sh_addralign: 4L
+ ~sh_entsize: elf32_rela_entsz
+ ~sh_link: (Some dynsymndx));
+
+ (* .data *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: data_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: [ SHF_ALLOC; SHF_WRITE ]
+ ~section_fixup: (Some data_section_fixup)
+ ~sh_addralign: 32L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .bss *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: bss_section_name_fixup
+ ~sh_type: SHT_NOBITS
+ ~sh_flags: [ SHF_ALLOC; SHF_WRITE ]
+ ~section_fixup: (Some bss_section_fixup)
+ ~sh_addralign: 32L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .dynamic *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: dynamic_section_name_fixup
+ ~sh_type: SHT_DYNAMIC
+ ~sh_flags: [ SHF_ALLOC; SHF_WRITE ]
+ ~section_fixup: (Some dynamic_section_fixup)
+ ~sh_addralign: 8L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .shstrtab *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: shstrtab_section_name_fixup
+ ~sh_type: SHT_STRTAB
+ ~sh_flags: []
+ ~section_fixup: (Some shstrtab_section_fixup)
+ ~sh_addralign: 1L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+(*
+ FIXME: uncomment the dwarf section headers as you make use of them;
+ recent gdb versions have got fussier about parsing dwarf and don't
+ like seeing junk there.
+*)
+
+ (* .debug_aranges *)
+(*
+
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: debug_aranges_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: []
+ ~section_fixup: (Some sem.Semant.ctxt_debug_aranges_fixup)
+ ~sh_addralign: 8L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+*)
+ (* .debug_pubnames *)
+(*
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: debug_pubnames_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: []
+ ~section_fixup: (Some sem.Semant.ctxt_debug_pubnames_fixup)
+ ~sh_addralign: 1L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+*)
+
+ (* .debug_info *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: debug_info_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: []
+ ~section_fixup: (Some sem.Semant.ctxt_debug_info_fixup)
+ ~sh_addralign: 1L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ (* .debug_abbrev *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: debug_abbrev_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: []
+ ~section_fixup: (Some sem.Semant.ctxt_debug_abbrev_fixup)
+ ~sh_addralign: 1L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+ (* .debug_line *)
+(*
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: debug_line_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: []
+ ~section_fixup: (Some sem.Semant.ctxt_debug_line_fixup)
+ ~sh_addralign: 1L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+*)
+
+ (* .debug_frame *)
+(*
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: debug_frame_section_name_fixup
+ ~sh_type: SHT_PROGBITS
+ ~sh_flags: []
+ ~section_fixup: (Some sem.Semant.ctxt_debug_frame_fixup)
+ ~sh_addralign: 4L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+*)
+
+ (* .note.rust *)
+ (section_header
+ ~shstring_table_fixup: shstrtab_section_fixup
+ ~shname_string_fixup: note_rust_section_name_fixup
+ ~sh_type: SHT_NOTE
+ ~sh_flags: []
+ ~section_fixup: (Some note_rust_section_fixup)
+ ~sh_addralign: 1L
+ ~sh_entsize: 0L
+ ~sh_link: None);
+
+ |]
+ in
+ let section_header_table = SEQ section_headers in
+
+
+ (* There are 6 official program headers in the file we're making: *)
+ (* segment 0: RX / PHDR *)
+ (* segment 1: R / INTERP *)
+ (* segment 2: RX / LOAD *)
+ (* segment 3: RW / LOAD *)
+ (* segment 4: RW / DYNAMIC *)
+ (* segment 5: R *)
+
+ let program_header_table_fixup = new_fixup "program header table" in
+ let segment_0_fixup = new_fixup "segment 0" in
+ let segment_1_fixup = new_fixup "segment 1" in
+ let segment_2_fixup = new_fixup "segment 2" in
+ let segment_3_fixup = new_fixup "segment 3" in
+ let segment_4_fixup = new_fixup "segment 4" in
+ let segment_5_fixup = new_fixup "segment 5" in
+
+ let segment_0_align = 4 in
+ let segment_1_align = 1 in
+ let segment_2_align = 0x1000 in
+ let segment_3_align = 0x1000 in
+ let segment_4_align = 0x1000 in
+ let segment_5_align = 1 in
+
+ let program_headers = [|
+ (program_header
+ ~p_type: PT_PHDR
+ ~segment_fixup: segment_0_fixup
+ ~p_flags: [ PF_R; PF_X ]
+ ~p_align: (Int64.of_int segment_0_align));
+ (program_header
+ ~p_type: PT_INTERP
+ ~segment_fixup: segment_1_fixup
+ ~p_flags: [ PF_R ]
+ ~p_align: (Int64.of_int segment_1_align));
+ (program_header
+ ~p_type: PT_LOAD
+ ~segment_fixup: segment_2_fixup
+ ~p_flags: [ PF_R; PF_X ]
+ ~p_align: (Int64.of_int segment_2_align));
+ (program_header
+ ~p_type: PT_LOAD
+ ~segment_fixup: segment_3_fixup
+ ~p_flags: [ PF_R; PF_W ]
+ ~p_align: (Int64.of_int segment_3_align));
+ (program_header
+ ~p_type: PT_DYNAMIC
+ ~segment_fixup: segment_4_fixup
+ ~p_flags: [ PF_R; PF_W ]
+ ~p_align: (Int64.of_int segment_4_align));
+ (program_header
+ ~p_type: PT_NOTE
+ ~segment_fixup: segment_5_fixup
+ ~p_flags: [ PF_R;]
+ ~p_align: (Int64.of_int segment_5_align));
+ |]
+ in
+ let program_header_table = SEQ program_headers in
+
+ let e_entry_fixup = new_fixup "entry symbol" in
+
+ let elf_header =
+ elf32_header
+ ~sess
+ ~ei_data: ELFDATA2LSB
+ ~e_type: ET_DYN
+ ~e_machine: EM_386
+ ~e_version: EV_CURRENT
+
+ ~e_entry_fixup: e_entry_fixup
+ ~e_phoff_fixup: program_header_table_fixup
+ ~e_shoff_fixup: section_header_table_fixup
+ ~e_phnum: (Int64.of_int (Array.length program_headers))
+ ~e_shnum: (Int64.of_int (Array.length section_headers))
+ ~e_shstrndx: shstrtabndx
+ in
+
+ let n_syms = ref 1 in (* The empty symbol, implicit. *)
+
+ let data_sym name st_bind fixup =
+ let name_fixup = new_fixup ("data symbol name fixup: '" ^ name ^ "'") in
+ let strtab_entry = DEF (name_fixup, ZSTRING name) in
+ let symtab_entry =
+ symbol
+ ~string_table_fixup: dynstr_section_fixup
+ ~name_string_fixup: name_fixup
+ ~sym_target_fixup: (Some fixup)
+ ~st_bind
+ ~st_type: STT_OBJECT
+ ~st_shndx: datandx
+ in
+ incr n_syms;
+ (strtab_entry, symtab_entry)
+ in
+
+ let rodata_sym name st_bind fixup =
+ let name_fixup = new_fixup ("rodata symbol name fixup: '" ^ name ^ "'") in
+ let strtab_entry = DEF (name_fixup, ZSTRING name) in
+ let symtab_entry =
+ symbol
+ ~string_table_fixup: dynstr_section_fixup
+ ~name_string_fixup: name_fixup
+ ~sym_target_fixup: (Some fixup)
+ ~st_bind
+ ~st_type: STT_OBJECT
+ ~st_shndx: rodatandx
+ in
+ incr n_syms;
+ (strtab_entry, symtab_entry)
+ in
+
+ let text_sym name st_bind fixup =
+ let name_fixup = new_fixup ("text symbol name fixup: '" ^ name ^ "'") in
+ let strtab_frag = DEF (name_fixup, ZSTRING name) in
+ let symtab_frag =
+ symbol
+ ~string_table_fixup: dynstr_section_fixup
+ ~name_string_fixup: name_fixup
+ ~sym_target_fixup: (Some fixup)
+ ~st_bind: st_bind
+ ~st_type: STT_FUNC
+ ~st_shndx: textndx
+ in
+ incr n_syms;
+ (strtab_frag, symtab_frag)
+ in
+
+ let require_sym name st_bind _(*fixup*) =
+ let name_fixup =
+ new_fixup ("require symbol name fixup: '" ^ name ^ "'")
+ in
+ let strtab_frag = DEF (name_fixup, ZSTRING name) in
+ let symtab_frag =
+ symbol
+ ~string_table_fixup: dynstr_section_fixup
+ ~name_string_fixup: name_fixup
+ ~sym_target_fixup: None
+ ~st_bind
+ ~st_type: STT_FUNC
+ ~st_shndx: shn_UNDEF
+ in
+ incr n_syms;
+ (strtab_frag, symtab_frag)
+ in
+
+ let frags_of_symbol sym_emitter st_bind symname_opt symbody x =
+ let (strtab_frags, symtab_frags, body_frags) = x in
+ let (strtab_frag, symtab_frag, body_frag) =
+ match symname_opt with
+ None -> (MARK, MARK, symbody)
+ | Some symname ->
+ let body_fixup =
+ new_fixup ("symbol body fixup: '" ^ symname ^ "'")
+ in
+ let body =
+ if symname = entry_name
+ then DEF (e_entry_fixup, DEF (body_fixup, symbody))
+ else DEF (body_fixup, symbody)
+ in
+ let (str, sym) = sym_emitter symname st_bind body_fixup in
+ (str, sym, body)
+ in
+ ((strtab_frag :: strtab_frags),
+ (symtab_frag :: symtab_frags),
+ (body_frag :: body_frags))
+ in
+
+ let frags_of_require_symbol sym_emitter st_bind symname plt_entry_fixup x =
+ let (i, strtab_frags, symtab_frags,
+ plt_frags, got_plt_frags, rela_plt_frags) = x in
+ let (strtab_frag, symtab_frag) = sym_emitter symname st_bind None in
+ let e = X86.new_emitter_without_vregs () in
+ let jump_slot_fixup = new_fixup ("jump slot #" ^ string_of_int i) in
+ let jump_slot_initial_target_fixup =
+ new_fixup ("jump slot #" ^ string_of_int i ^ " initial target") in
+
+ (* You may notice this PLT entry doesn't look like either of the
+ * types of "normal" PLT entries outlined in the ELF manual. It is,
+ * however, just what you get when you combine a PIC PLT entry with
+ * inline calls to the horrible __i686.get_pc_thunk.ax kludge used
+ * on x86 to support entering PIC PLTs. We're just doing it *in*
+ * the PLT entries rather than infecting all the callers with the
+ * obligation of having the GOT address in a register on
+ * PLT-entry.
+ *)
+
+ let plt_frag =
+ let (reg, _, _) = X86.get_next_pc_thunk in
+
+ Il.emit_full e (Some plt_entry_fixup) [] Il.Dead;
+
+ Abi.load_fixup_addr e reg got_plt_section_fixup Il.CodeTy;
+
+ Il.emit e (Il.jmp Il.JMP (got_code_cell reg (2+i)));
+
+ Il.emit_full e (Some jump_slot_initial_target_fixup)
+ [] (Il.Push (X86.immi (Int64.of_int i)));
+
+ Il.emit e (Il.jmp Il.JMP (Il.direct_code_ptr plt0_fixup));
+ X86.frags_of_emitted_quads sess e
+ in
+ let got_plt_frag =
+ DEF (jump_slot_fixup,
+ WORD (TY_u32, (M_POS jump_slot_initial_target_fixup)))
+ in
+ let rela_plt =
+ { elf32_386_rela_type = R_386_JMP_SLOT;
+ elf32_386_rela_offset = (M_POS jump_slot_fixup);
+ elf32_386_rela_sym = (IMM (Int64.of_int i));
+ elf32_386_rela_addend = (IMM 0L) }
+ in
+ let rela_plt_frag = elf32_386_rela_frag rela_plt in
+ (i+1,
+ (strtab_frag :: strtab_frags),
+ (symtab_frag :: symtab_frags),
+ (plt_frag :: plt_frags),
+ (got_plt_frag :: got_plt_frags),
+ (rela_plt_frag :: rela_plt_frags))
+ in
+
+ (* Emit text export symbols. *)
+ let (global_text_strtab_frags, global_text_symtab_frags) =
+ match htab_search sem.Semant.ctxt_native_provided SEG_text with
+ None -> ([], [])
+ | Some etab ->
+ Hashtbl.fold
+ begin
+ fun name fix x ->
+ let (strtab_frags, symtab_frags) = x in
+ let (str, sym) = text_sym name STB_GLOBAL fix in
+ (str :: strtab_frags,
+ sym :: symtab_frags)
+ end
+ etab
+ ([],[])
+ in
+
+ (* Emit text fragments (possibly named). *)
+ let (global_text_strtab_frags,
+ global_text_symtab_frags,
+ text_body_frags) =
+ Hashtbl.fold
+ (frags_of_symbol text_sym STB_GLOBAL)
+ text_frags
+ (global_text_strtab_frags, global_text_symtab_frags, [])
+ in
+
+ let (local_text_strtab_frags,
+ local_text_symtab_frags) =
+
+ let symbol_frags_of_code _ code accum =
+ let (strtab_frags, symtab_frags) = accum in
+ let fix = code.Semant.code_fixup in
+ let (strtab_frag, symtab_frag) =
+ text_sym fix.fixup_name STB_LOCAL fix
+ in
+ (strtab_frag :: strtab_frags,
+ symtab_frag :: symtab_frags)
+ in
+
+ let symbol_frags_of_glue_code g code accum =
+ let (strtab_frags, symtab_frags) = accum in
+ let fix = code.Semant.code_fixup in
+ let (strtab_frag, symtab_frag) =
+ text_sym (Semant.glue_str sem g) STB_LOCAL fix
+ in
+ (strtab_frag :: strtab_frags,
+ symtab_frag :: symtab_frags)
+ in
+
+ let item_str_frags, item_sym_frags =
+ Hashtbl.fold symbol_frags_of_code
+ sem.Semant.ctxt_all_item_code ([], [])
+ in
+ let glue_str_frags, glue_sym_frags =
+ Hashtbl.fold symbol_frags_of_glue_code
+ sem.Semant.ctxt_glue_code ([], [])
+ in
+ (item_str_frags @ glue_str_frags,
+ item_sym_frags @ glue_sym_frags)
+ in
+
+ (* Emit rodata export symbols. *)
+ let (rodata_strtab_frags, rodata_symtab_frags) =
+ match htab_search sem.Semant.ctxt_native_provided SEG_data with
+ None -> ([], [])
+ | Some etab ->
+ Hashtbl.fold
+ begin
+ fun name fix x ->
+ let (strtab_frags, symtab_frags) = x in
+ let (str, sym) = rodata_sym name STB_GLOBAL fix in
+ (str :: strtab_frags,
+ sym :: symtab_frags)
+ end
+ etab
+ ([],[])
+ in
+
+ (* Emit rodata fragments (possibly named). *)
+ let (rodata_strtab_frags,
+ rodata_symtab_frags,
+ rodata_body_frags) =
+ Hashtbl.fold
+ (frags_of_symbol rodata_sym STB_GLOBAL)
+ rodata_frags
+ (rodata_strtab_frags, rodata_symtab_frags, [])
+ in
+
+
+ let (data_strtab_frags,
+ data_symtab_frags,
+ data_body_frags) =
+ Hashtbl.fold (frags_of_symbol data_sym STB_GLOBAL) data_frags ([],[],[])
+ in
+
+ let (_,
+ require_strtab_frags,
+ require_symtab_frags,
+ plt_frags,
+ got_plt_frags,
+ rela_plt_frags) =
+ Hashtbl.fold (frags_of_require_symbol require_sym STB_GLOBAL)
+ required_fixups
+ (1,[],[],[plt0_frag],[got_prefix],[])
+ in
+ let require_symtab_frags = List.rev require_symtab_frags in
+ let plt_frags = List.rev plt_frags in
+ let got_plt_frags = List.rev got_plt_frags in
+ let rela_plt_frags = List.rev rela_plt_frags in
+
+ let dynamic_needed_strtab_frags =
+ Array.make (Array.length needed_libs) MARK
+ in
+
+ let dynamic_frags =
+ let dynamic_needed_frags = Array.make (Array.length needed_libs) MARK in
+ for i = 0 to (Array.length needed_libs) - 1 do
+ let fixup =
+ new_fixup ("needed library name fixup: " ^ needed_libs.(i))
+ in
+ dynamic_needed_frags.(i) <-
+ elf32_dyn_frag (DT_NEEDED, SUB (M_POS fixup,
+ M_POS dynstr_section_fixup));
+ dynamic_needed_strtab_frags.(i) <-
+ DEF (fixup, ZSTRING needed_libs.(i))
+ done;
+ (SEQ [|
+ SEQ dynamic_needed_frags;
+ elf32_dyn_frag (DT_STRTAB, M_POS dynstr_section_fixup);
+ elf32_dyn_frag (DT_STRSZ, M_SZ dynstr_section_fixup);
+
+ elf32_dyn_frag (DT_SYMTAB, M_POS dynsym_section_fixup);
+ elf32_dyn_frag (DT_SYMENT, IMM elf32_symsize);
+
+ elf32_dyn_frag (DT_HASH, M_POS hash_section_fixup);
+ elf32_dyn_frag (DT_PLTGOT, M_POS got_plt_section_fixup);
+
+ elf32_dyn_frag (DT_PLTREL, IMM (elf32_num_of_dyn_tag DT_RELA));
+ elf32_dyn_frag (DT_PLTRELSZ, M_SZ rela_plt_section_fixup);
+ elf32_dyn_frag (DT_JMPREL, M_POS rela_plt_section_fixup);
+
+ elf32_dyn_frag (DT_NULL, IMM 0L)
+ |])
+ in
+
+ let null_strtab_fixup = new_fixup "null dynstrtab entry" in
+ let null_strtab_frag = DEF (null_strtab_fixup, ZSTRING "") in
+ let null_symtab_frag = (symbol
+ ~string_table_fixup: dynstr_section_fixup
+ ~name_string_fixup: null_strtab_fixup
+ ~sym_target_fixup: None
+ ~st_bind: STB_LOCAL
+ ~st_type: STT_NOTYPE
+ ~st_shndx: 0L) in
+
+ let dynsym_frags = (null_symtab_frag ::
+ (require_symtab_frags @
+ global_text_symtab_frags @
+ local_text_symtab_frags @
+ rodata_symtab_frags @
+ data_symtab_frags))
+ in
+
+ let dynstr_frags = (null_strtab_frag ::
+ (require_strtab_frags @
+ global_text_strtab_frags @
+ local_text_strtab_frags @
+ rodata_strtab_frags @
+ data_strtab_frags @
+ (Array.to_list dynamic_needed_strtab_frags)))
+ in
+
+ let interp_section =
+ DEF (interp_section_fixup, ZSTRING "/lib/ld-linux.so.2")
+ in
+
+ let text_section =
+ DEF (text_section_fixup,
+ SEQ (Array.of_list text_body_frags))
+ in
+ let rodata_section =
+ DEF (rodata_section_fixup,
+ SEQ (Array.of_list rodata_body_frags))
+ in
+ let data_section =
+ DEF (data_section_fixup,
+ SEQ (Array.of_list data_body_frags))
+ in
+ let bss_section =
+ DEF (bss_section_fixup,
+ SEQ [| |])
+ in
+ let dynsym_section =
+ DEF (dynsym_section_fixup,
+ SEQ (Array.of_list dynsym_frags))
+ in
+ let dynstr_section =
+ DEF (dynstr_section_fixup,
+ SEQ (Array.of_list dynstr_frags))
+ in
+
+ let hash_section =
+ let n_syms = !n_syms in
+
+ DEF (hash_section_fixup,
+ (* Worst hashtable ever: one chain. *)
+ SEQ [|
+ WORD (TY_u32, IMM 1L); (* nbucket *)
+ WORD (TY_u32, (* nchain *)
+ IMM (Int64.of_int n_syms));
+ WORD (TY_u32, IMM 1L); (* bucket 0 => symbol 1. *)
+ SEQ
+ begin
+ Array.init
+ n_syms
+ (fun i ->
+ let next = (* chain[i] => if last then 0 else i+1 *)
+ if i > 0 && i < (n_syms-1)
+ then Int64.of_int (i+1)
+ else 0L
+ in
+ WORD (TY_u32, IMM next))
+ end;
+ |])
+ in
+
+ let plt_section =
+ DEF (plt_section_fixup,
+ SEQ (Array.of_list plt_frags))
+ in
+
+ let got_plt_section =
+ DEF (got_plt_section_fixup,
+ SEQ (Array.of_list got_plt_frags))
+ in
+
+ let rela_plt_section =
+ DEF (rela_plt_section_fixup,
+ SEQ (Array.of_list rela_plt_frags))
+ in
+
+ let dynamic_section =
+ DEF (dynamic_section_fixup, dynamic_frags)
+ in
+
+ let note_rust_section =
+ DEF (note_rust_section_fixup,
+ (Asm.note_rust_frags crate.node.Ast.crate_meta))
+ in
+
+
+ let page_alignment = 0x1000 in
+
+ let align_both i =
+ ALIGN_FILE (page_alignment,
+ (ALIGN_MEM (page_alignment, i)))
+ in
+
+ let def_aligned f i =
+ align_both
+ (SEQ [| DEF(f,i);
+ (align_both MARK)|])
+ in
+
+ let debug_aranges_section =
+ def_aligned
+ sem.Semant.ctxt_debug_aranges_fixup
+ dwarf.Dwarf.debug_aranges
+ in
+ let debug_pubnames_section =
+ def_aligned
+ sem.Semant.ctxt_debug_pubnames_fixup
+ dwarf.Dwarf.debug_pubnames
+ in
+ let debug_info_section =
+ def_aligned
+ sem.Semant.ctxt_debug_info_fixup
+ dwarf.Dwarf.debug_info
+ in
+ let debug_abbrev_section =
+ def_aligned
+ sem.Semant.ctxt_debug_abbrev_fixup
+ dwarf.Dwarf.debug_abbrev
+ in
+ let debug_line_section =
+ def_aligned
+ sem.Semant.ctxt_debug_line_fixup
+ dwarf.Dwarf.debug_line
+ in
+ let debug_frame_section =
+ def_aligned sem.Semant.ctxt_debug_frame_fixup dwarf.Dwarf.debug_frame
+ in
+
+ let load_address = 0x0804_8000L in
+
+ SEQ
+ [|
+ MEMPOS load_address;
+ ALIGN_FILE
+ (segment_2_align,
+ DEF
+ (segment_2_fixup,
+ SEQ
+ [|
+ DEF (sem.Semant.ctxt_image_base_fixup, MARK);
+ elf_header;
+ ALIGN_FILE
+ (segment_0_align,
+ DEF
+ (segment_0_fixup,
+ SEQ
+ [|
+ DEF (program_header_table_fixup,
+ program_header_table);
+ |]));
+ ALIGN_FILE
+ (segment_1_align,
+ DEF (segment_1_fixup, interp_section));
+ text_section;
+ rodata_section;
+ dynsym_section;
+ dynstr_section;
+ hash_section;
+ plt_section;
+ rela_plt_section;
+ debug_aranges_section;
+ debug_pubnames_section;
+ debug_info_section;
+ debug_abbrev_section;
+ debug_line_section;
+ debug_frame_section;
+ |]));
+ ALIGN_FILE
+ (segment_3_align,
+ DEF
+ (segment_3_fixup,
+ SEQ
+ [|
+ data_section;
+ got_plt_section;
+ bss_section;
+ ALIGN_FILE
+ (segment_4_align,
+ DEF (segment_4_fixup,
+ dynamic_section));
+ ALIGN_FILE
+ (segment_5_align,
+ DEF (segment_5_fixup,
+ note_rust_section));
+ |]));
+ DEF (shstrtab_section_fixup,
+ shstrtab_section);
+ DEF (section_header_table_fixup,
+ section_header_table);
+ |]
+;;
+
+let emit_file
+ (sess:Session.sess)
+ (crate:Ast.crate)
+ (code:Asm.frag)
+ (data:Asm.frag)
+ (sem:Semant.ctxt)
+ (dwarf:Dwarf.debug_records)
+ : unit =
+
+ let text_frags = Hashtbl.create 4 in
+ let rodata_frags = Hashtbl.create 4 in
+ let data_frags = Hashtbl.create 4 in
+ let required_fixups = Hashtbl.create 4 in
+
+ (*
+ * Startup on elf-linux is more complex than in win32. It's
+ * thankfully documented in some detail around the net.
+ *
+ * - The elf entry address is for _start.
+ *
+ * - _start pushes:
+ *
+ * eax (should be zero)
+ * esp (holding the kernel-provided stack end)
+ * edx (address of _rtld_fini)
+ * address of _fini
+ * address of _init
+ * ecx (argv)
+ * esi (argc)
+ * address of main
+ *
+ * and then calls __libc_start_main@plt.
+ *
+ * - This means any sensible binary has a PLT. Fun. So
+ * We call into the PLT, which itself is just a bunch
+ * of indirect jumps through slots in the GOT, and wind
+ * up in __libc_start_main. Which calls _init, then
+ * essentially exit(main(argc,argv)).
+ *)
+
+
+ let init_fixup = new_fixup "_init function entry" in
+ let fini_fixup = new_fixup "_fini function entry" in
+ let (start_fixup, rust_start_fixup) =
+ if sess.Session.sess_library_mode
+ then (None, None)
+ else (Some (new_fixup "start function entry"),
+ Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start"))
+ in
+ let libc_start_main_fixup = new_fixup "__libc_start_main@plt stub" in
+
+ let start_fn _ =
+ let start_fixup =
+ match start_fixup with
+ None -> bug () "missing start fixup in non-library mode"
+ | Some s -> s
+ in
+ let e = X86.new_emitter_without_vregs () in
+ let push_r32 r = Il.emit e
+ (Il.Push (Il.Cell (Il.Reg (Il.Hreg r, Il.ValTy Il.Bits32))))
+ in
+ let push_pos32 = X86.push_pos32 e in
+
+ Il.emit e (Il.unary Il.UMOV (X86.rc X86.ebp) (X86.immi 0L));
+ Il.emit e (Il.Pop (X86.rc X86.esi));
+ Il.emit e (Il.unary Il.UMOV (X86.rc X86.ecx) (X86.ro X86.esp));
+ Il.emit e (Il.binary Il.AND
+ (X86.rc X86.esp) (X86.ro X86.esp)
+ (X86.immi 0xfffffffffffffff0L));
+
+ push_r32 X86.eax;
+ push_r32 X86.esp;
+ push_r32 X86.edx;
+ push_pos32 fini_fixup;
+ push_pos32 init_fixup;
+ push_r32 X86.ecx;
+ push_r32 X86.esi;
+ push_pos32 start_fixup;
+ Il.emit e (Il.call
+ (Il.Reg (Il.Hreg X86.eax, Il.ValTy Il.Bits32))
+ (Il.direct_code_ptr libc_start_main_fixup));
+ X86.frags_of_emitted_quads sess e
+ in
+
+ let do_nothing_fn _ =
+ let e = X86.new_emitter_without_vregs () in
+ Il.emit e Il.Ret;
+ X86.frags_of_emitted_quads sess e
+ in
+
+ let main_fn _ =
+ match (start_fixup, rust_start_fixup, sem.Semant.ctxt_main_fn_fixup) with
+ (None, _, _)
+ | (_, None, _)
+ | (_, _, None) -> MARK
+ | (Some start_fixup,
+ Some rust_start_fixup,
+ Some main_fn_fixup) ->
+ let e = X86.new_emitter_without_vregs () in
+ X86.objfile_start e
+ ~start_fixup
+ ~rust_start_fixup
+ ~main_fn_fixup
+ ~crate_fixup: sem.Semant.ctxt_crate_fixup
+ ~indirect_start: false;
+ X86.frags_of_emitted_quads sess e
+ in
+
+ let needed_libs =
+ [|
+ "libc.so.6";
+ "librustrt.so"
+ |]
+ in
+
+ let _ =
+ if not sess.Session.sess_library_mode
+ then
+ begin
+ htab_put text_frags (Some "_start") (start_fn());
+ htab_put text_frags (Some "_init")
+ (DEF (init_fixup, do_nothing_fn()));
+ htab_put text_frags (Some "_fini")
+ (DEF (fini_fixup, do_nothing_fn()));
+ htab_put text_frags (Some "main") (main_fn ());
+ htab_put required_fixups "__libc_start_main" libc_start_main_fixup;
+ end;
+ htab_put text_frags None code;
+ htab_put rodata_frags None data;
+
+ Hashtbl.iter
+ begin
+ fun _ tab ->
+ Hashtbl.iter
+ begin
+ fun name fixup ->
+ htab_put required_fixups name fixup
+ end
+ tab
+ end
+ sem.Semant.ctxt_native_required
+ in
+ let all_frags =
+ elf32_linux_x86_file
+ ~sess
+ ~crate
+ ~entry_name: "_start"
+ ~text_frags
+ ~data_frags
+ ~dwarf
+ ~sem
+ ~rodata_frags
+ ~required_fixups
+ ~needed_libs
+ in
+ write_out_frag sess true all_frags
+;;
+
+let elf_magic = "\x7fELF";;
+
+let sniff
+ (sess:Session.sess)
+ (filename:filename)
+ : asm_reader option =
+ try
+ let stat = Unix.stat filename in
+ if (stat.Unix.st_kind = Unix.S_REG) &&
+ (stat.Unix.st_size > 4)
+ then
+ let ar = new_asm_reader sess filename in
+ let _ = log sess "sniffing ELF file" in
+ if (ar.asm_get_zstr_padded 4) = elf_magic
+ then (ar.asm_seek 0; Some ar)
+ else None
+ else
+ None
+ with
+ _ -> None
+;;
+
+let get_sections
+ (sess:Session.sess)
+ (ar:asm_reader)
+ : (string,(int*int)) Hashtbl.t =
+ let sects = Hashtbl.create 0 in
+ let _ = log sess "reading sections" in
+ let elf_id = ar.asm_get_zstr_padded 4 in
+ let _ = assert (elf_id = elf_magic) in
+
+ let _ = ar.asm_seek 0x10 in
+ let _ = ar.asm_adv_u16 () in (* e_type *)
+ let _ = ar.asm_adv_u16 () in (* e_machine *)
+ let _ = ar.asm_adv_u32 () in (* e_version *)
+ let _ = ar.asm_adv_u32 () in (* e_entry *)
+ let _ = ar.asm_adv_u32 () in (* e_phoff *)
+ let e_shoff = ar.asm_get_u32 () in (* e_shoff *)
+ let _ = ar.asm_adv_u32 () in (* e_flags *)
+ let _ = ar.asm_adv_u16 () in (* e_ehsize *)
+ let _ = ar.asm_adv_u16 () in (* e_phentsize *)
+ let _ = ar.asm_adv_u16 () in (* e_phnum *)
+ let e_shentsize = ar.asm_get_u16 () in
+ let e_shnum = ar.asm_get_u16 () in
+ let e_shstrndx = ar.asm_get_u16 () in
+ let _ = log sess
+ "%d ELF section headers, %d bytes each, starting at 0x%x"
+ e_shnum e_shentsize e_shoff
+ in
+ let _ = log sess "section %d is .shstrtab" e_shstrndx in
+
+ let read_section_hdr n =
+ let _ = ar.asm_seek (e_shoff + n * e_shentsize) in
+ let str_off = ar.asm_get_u32() in
+ let _ = ar.asm_adv_u32() in (* sh_type *)
+ let _ = ar.asm_adv_u32() in (* sh_flags *)
+ let _ = ar.asm_adv_u32() in (* sh_addr *)
+ let off = ar.asm_get_u32() in (* sh_off *)
+ let size = ar.asm_get_u32() in (* sh_size *)
+ let _ = ar.asm_adv_u32() in (* sh_link *)
+ let _ = ar.asm_adv_u32() in (* sh_info *)
+ let _ = ar.asm_adv_u32() in (* sh_addralign *)
+ let _ = ar.asm_adv_u32() in (* sh_entsize *)
+ (str_off, off, size)
+ in
+
+ let (_, str_base, _) = read_section_hdr e_shstrndx in
+
+ let _ = ar.asm_seek e_shoff in
+ for i = 0 to (e_shnum - 1) do
+ let (str_off, off, size) = read_section_hdr i in
+ let _ = ar.asm_seek (str_base + str_off) in
+ let name = ar.asm_get_zstr() in
+ log sess "section %d: %s, size %d, offset 0x%x" i name size off;
+ Hashtbl.add sects name (off, size);
+ done;
+ sects
+;;
+
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)
diff --git a/src/boot/be/il.ml b/src/boot/be/il.ml
new file mode 100644
index 00000000..e095e627
--- /dev/null
+++ b/src/boot/be/il.ml
@@ -0,0 +1,1135 @@
+open Common;;
+
+(* FIXME (issue #1): thread a session object through this eventually. *)
+let log_iltypes = ref false;;
+
+(* IL type system, very rudimentary. *)
+
+type bits =
+ Bits8
+ | Bits16
+ | Bits32
+ | Bits64
+;;
+
+type scalar_ty =
+ ValTy of bits
+ | AddrTy of referent_ty
+
+and referent_ty =
+ ScalarTy of scalar_ty
+ | StructTy of referent_ty array
+ | UnionTy of referent_ty array
+ | ParamTy of ty_param_idx (* Thing of current-frame type-param #n *)
+ | OpaqueTy (* Unknown memory-resident thing. *)
+ | CodeTy (* Executable machine code. *)
+ | NilTy (* 0 bits of space. *)
+;;
+
+let (voidptr_t:scalar_ty) = AddrTy OpaqueTy;;
+let (codeptr_t:scalar_ty) = AddrTy CodeTy;;
+
+(* Operands. *)
+
+type vreg = int ;;
+type hreg = int ;;
+type label = int ;;
+type spill = int ;;
+
+type reg =
+ Vreg of vreg
+ | Hreg of hreg
+;;
+
+type mem =
+ Abs of Asm.expr64
+ | RegIn of (reg * (Asm.expr64 option))
+ | Spill of spill
+;;
+
+type typed_reg = (reg * scalar_ty);;
+type typed_mem = (mem * referent_ty);;
+type typed_imm = (Asm.expr64 * ty_mach);;
+type typed_imm_ptr = (fixup * referent_ty);;
+
+type cell =
+ Reg of typed_reg
+ | Mem of typed_mem
+;;
+
+(*
+ * ImmPtr (a, rty) can be assigned to anything of scalar_ty
+ * AddrTy rty; the difference is that ImmAddr carries its value
+ * so can be used in cases where we want to have an immediate
+ * address constant-propagated through the code to the backend.
+ *)
+type operand =
+ Cell of cell
+ | Imm of typed_imm
+ | ImmPtr of typed_imm_ptr
+;;
+
+
+type code =
+ CodeLabel of label (* Index into current quad block. *)
+ | CodePtr of operand
+ | CodeNone
+;;
+
+(* NB: for the most part, we let the register allocator assign spills
+ * from vregs, and we permanently allocate aliased slots to stack
+ * locations by static aliasing information early, in layout.
+ *
+ * The one awkward case this doesn't handle is when someone tries to
+ * pass a literal-atom to an alias-slot. This *requires* a memory slot
+ * but we only realize it rather late, much later than we'd normally
+ * have thougt to desugar the literal into a temporary.
+ *
+ * So in these cases, we let the trans module explicitly demand a
+ * "Spill n" operand, which the register allocator mops up before it
+ * gets started on the vregs.
+ *
+ * NOTE: if we were more clever we'd integrate vregs and spills like
+ * this together along with the general notion of a temporary way back
+ * at the desugaring stage, and use some kind of size-class
+ * consolidation so that spills with non-overlapping lifetimes could
+ * share memory. But we're not that clever yet.
+ *)
+
+
+(* Helpers. *)
+
+let direct_code_ptr fix =
+ (CodePtr (ImmPtr (fix, CodeTy)))
+;;
+
+let cell_referent_ty c =
+ match c with
+ Reg (_, st) -> ScalarTy st
+ | Mem (_, rt) -> rt
+;;
+
+let cell_is_nil c =
+ match c with
+ Mem (_, NilTy) -> true
+ | Reg (_, AddrTy NilTy) -> true
+ | _ -> false
+;;
+
+let operand_is_nil o =
+ match o with
+ Cell c -> cell_is_nil c
+ | _ -> false
+;;
+
+let mem_off (mem:mem) (off:Asm.expr64) : mem =
+ let addto e = Asm.ADD (off, e) in
+ match mem with
+ Abs e -> Abs (addto e)
+ | RegIn (r, None) -> RegIn (r, Some off)
+ | RegIn (r, Some e) -> RegIn (r, Some (addto e))
+ | Spill _ -> bug () "Adding offset to spill slot"
+;;
+
+let mem_off_imm (mem:mem) (imm:int64) : mem =
+ mem_off mem (Asm.IMM imm)
+;;
+
+
+(* Quads. *)
+
+type binop =
+ ADD | SUB
+ | IMUL | UMUL
+ | IDIV | UDIV
+ | IMOD | UMOD
+ | AND | OR | XOR
+ | LSL | LSR | ASR
+;;
+
+type unop =
+ NEG | NOT
+ | UMOV | IMOV
+ | ZERO
+;;
+
+type jmpop =
+ JE | JNE
+ | JZ | JNZ (* FIXME: Synonyms with JE/JNE in x86, others? *)
+ | JL | JLE | JG | JGE (* Signed. *)
+ | JB | JBE | JA | JAE (* Unsigned. *)
+ | JC | JNC | JO | JNO
+ | JMP
+;;
+
+type binary =
+ {
+ binary_op: binop;
+ binary_dst: cell;
+ binary_lhs: operand;
+ binary_rhs: operand
+ }
+;;
+
+type unary =
+ {
+ unary_op: unop;
+ unary_dst: cell;
+ unary_src: operand
+ }
+;;
+
+type cmp =
+ {
+ cmp_lhs: operand;
+ cmp_rhs: operand
+ }
+;;
+
+type lea =
+ {
+ lea_dst: cell;
+ lea_src: operand
+ }
+;;
+
+type jmp =
+ {
+ jmp_op: jmpop;
+ jmp_targ: code;
+ }
+;;
+
+type call =
+ {
+ call_dst: cell;
+ call_targ: code
+ }
+
+type quad' =
+ Binary of binary
+ | Unary of unary
+ | Lea of lea
+ | Cmp of cmp
+ | Jmp of jmp
+ | Push of operand
+ | Pop of cell
+ | Call of call
+ | Debug (* Debug-break pseudo-instruction. *)
+ | Enter of fixup (* Enter-fixup-block pseudo-instruction. *)
+ | Leave (* Leave-fixup-block pseudo-instruction. *)
+ | Ret (* Return to caller. *)
+ | Nop (* Keep this quad here, emit CPU nop. *)
+ | Dead (* Keep this quad but emit nothing. *)
+ | Regfence (* Clobber all hregs. *)
+ | End (* Space past the end of quads to emit. *)
+;;
+
+type quad =
+ { quad_fixup: fixup option;
+ quad_implicits: label list;
+ quad_body: quad'; }
+
+type quads = quad array ;;
+
+(* Query functions. *)
+
+let cell_is_scalar (c:cell) : bool =
+ match c with
+ Reg (_, _) -> true
+ | Mem (_, ScalarTy _) -> true
+ | _ -> false
+;;
+
+
+let bits_of_ty_mach (tm:ty_mach) : bits =
+ match tm with
+ | TY_u8 -> Bits8
+ | TY_i8 -> Bits8
+ | TY_u16 -> Bits16
+ | TY_i16 -> Bits16
+ | TY_u32 -> Bits32
+ | TY_i32 -> Bits32
+ | TY_u64 -> Bits64
+ | TY_i64 -> Bits64
+ | TY_f32 -> Bits32
+ | TY_f64 -> Bits64
+;;
+
+let cell_scalar_ty (c:cell) : scalar_ty =
+ match c with
+ Reg (_, st) -> st
+ | Mem (_, ScalarTy st) -> st
+ | _ -> bug () "mem of non-scalar in Il.cell_scalar_ty"
+;;
+
+let operand_scalar_ty (op:operand) : scalar_ty =
+ match op with
+ Cell c -> cell_scalar_ty c
+ | Imm (_, t) -> ValTy (bits_of_ty_mach t)
+ | ImmPtr (_, t) -> AddrTy t
+;;
+
+
+let scalar_ty_bits (word_bits:bits) (st:scalar_ty) : bits =
+ match st with
+ ValTy bits -> bits
+ | AddrTy _ -> word_bits
+;;
+
+let cell_bits (word_bits:bits) (c:cell) : bits =
+ match c with
+ Reg (_, st) -> scalar_ty_bits word_bits st
+ | Mem (_, ScalarTy st) -> scalar_ty_bits word_bits st
+ | Mem _ -> bug () "mem of non-scalar in Il.cell_bits"
+;;
+
+let operand_bits (word_bits:bits) (op:operand) : bits =
+ match op with
+ Cell cell -> cell_bits word_bits cell
+ | Imm (_, tm) -> bits_of_ty_mach tm
+ | ImmPtr _ -> word_bits
+;;
+
+let bits_size (bits:bits) : int64 =
+ match bits with
+ Bits8 -> 1L
+ | Bits16 -> 2L
+ | Bits32 -> 4L
+ | Bits64 -> 8L
+;;
+
+let bits_align (bits:bits) : int64 =
+ match bits with
+ Bits8 -> 1L
+ | Bits16 -> 2L
+ | Bits32 -> 4L
+ | Bits64 -> 8L
+;;
+
+let scalar_ty_size (word_bits:bits) (st:scalar_ty) : int64 =
+ bits_size (scalar_ty_bits word_bits st)
+;;
+
+let scalar_ty_align (word_bits:bits) (st:scalar_ty) : int64 =
+ bits_align (scalar_ty_bits word_bits st)
+;;
+
+let rec referent_ty_layout (word_bits:bits) (rt:referent_ty) : (size * size) =
+ match rt with
+ ScalarTy st -> (SIZE_fixed (scalar_ty_size word_bits st),
+ SIZE_fixed (scalar_ty_align word_bits st))
+ | StructTy rts ->
+ begin
+ let accum (off,align) rt : (size * size) =
+ let (elt_size, elt_align) = referent_ty_layout word_bits rt in
+ let elt_off = align_sz elt_align off in
+ (add_sz elt_off elt_size, max_sz elt_align align)
+ in
+ Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts
+ end
+ | UnionTy rts ->
+ begin
+ let accum (sz,align) rt : (size * size) =
+ let (elt_size, elt_align) = referent_ty_layout word_bits rt in
+ (max_sz sz elt_size, max_sz elt_align align)
+ in
+ Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts
+ end
+ | OpaqueTy -> bug () "opaque ty in referent_ty_layout"
+ | CodeTy -> bug () "code ty in referent_ty_layout"
+ | ParamTy i -> (SIZE_param_size i, SIZE_param_align i)
+ | NilTy -> (SIZE_fixed 0L, SIZE_fixed 1L)
+
+and referent_ty_size (word_bits:bits) (rt:referent_ty) : size =
+ (fst (referent_ty_layout word_bits rt))
+
+and referent_ty_align (word_bits:bits) (rt:referent_ty) : size =
+ (snd (referent_ty_layout word_bits rt))
+
+;;
+
+let get_element_offset
+ (word_bits:bits)
+ (elts:referent_ty array)
+ (i:int)
+ : size =
+ let elts_before = Array.sub elts 0 i in
+ let elt_rty = elts.(i) in
+ let elts_before_size = referent_ty_size word_bits (StructTy elts_before) in
+ let elt_align = referent_ty_align word_bits elt_rty in
+ let elt_off = align_sz elt_align elts_before_size in
+ elt_off
+;;
+
+(* Processor. *)
+
+type quad_processor =
+ { qp_reg: (quad_processor -> reg -> reg);
+ qp_mem: (quad_processor -> mem -> mem);
+ qp_cell_read: (quad_processor -> cell -> cell);
+ qp_cell_write: (quad_processor -> cell -> cell);
+ qp_code: (quad_processor -> code -> code);
+ qp_op: (quad_processor -> operand -> operand); }
+;;
+
+let identity_processor =
+ let qp_cell = (fun qp c -> match c with
+ Reg (r, b) -> Reg (qp.qp_reg qp r, b)
+ | Mem (a, b) -> Mem (qp.qp_mem qp a, b))
+ in
+ { qp_reg = (fun _ r -> r);
+ qp_mem = (fun qp a -> match a with
+ RegIn (r, o) -> RegIn (qp.qp_reg qp r, o)
+ | Abs _
+ | Spill _ -> a);
+ qp_cell_read = qp_cell;
+ qp_cell_write = qp_cell;
+ qp_code = (fun qp c -> match c with
+ CodePtr op -> CodePtr (qp.qp_op qp op)
+ | CodeLabel _
+ | CodeNone -> c);
+ qp_op = (fun qp op -> match op with
+ Cell c -> Cell (qp.qp_cell_read qp c)
+ | ImmPtr _ -> op
+ | Imm _ -> op) }
+;;
+
+let process_quad (qp:quad_processor) (q:quad) : quad =
+ { q with
+ quad_body = match q.quad_body with
+ Binary b ->
+ Binary { b with
+ binary_dst = qp.qp_cell_write qp b.binary_dst;
+ binary_lhs = qp.qp_op qp b.binary_lhs;
+ binary_rhs = qp.qp_op qp b.binary_rhs }
+ | Unary u ->
+ Unary { u with
+ unary_dst = qp.qp_cell_write qp u.unary_dst;
+ unary_src = qp.qp_op qp u.unary_src }
+
+ | Lea le ->
+ Lea { lea_dst = qp.qp_cell_write qp le.lea_dst;
+ lea_src = qp.qp_op qp le.lea_src }
+
+ | Cmp c ->
+ Cmp { cmp_lhs = qp.qp_op qp c.cmp_lhs;
+ cmp_rhs = qp.qp_op qp c.cmp_rhs }
+
+ | Jmp j ->
+ Jmp { j with
+ jmp_targ = qp.qp_code qp j.jmp_targ }
+
+ | Push op ->
+ Push (qp.qp_op qp op)
+
+ | Pop c ->
+ Pop (qp.qp_cell_write qp c)
+
+ | Call c ->
+ Call { call_dst = qp.qp_cell_write qp c.call_dst;
+ call_targ = qp.qp_code qp c.call_targ }
+
+ | Ret -> Ret
+ | Nop -> Nop
+ | Debug -> Debug
+ | Regfence -> Regfence
+ | Enter f -> Enter f
+ | Leave -> Leave
+ | Dead -> Dead
+ | End -> End }
+;;
+
+let visit_quads (qp:quad_processor) (qs:quads) : unit =
+ Array.iter (fun x ->ignore ( process_quad qp x); ()) qs
+;;
+
+let process_quads (qp:quad_processor) (qs:quads) : quads =
+ Array.map (process_quad qp) qs
+;;
+
+let rewrite_quads (qp:quad_processor) (qs:quads) : unit =
+ for i = 0 to ((Array.length qs) - 1) do
+ qs.(i) <- process_quad qp qs.(i)
+ done
+;;
+
+
+(* A little partial-evaluator to help lowering sizes. *)
+
+let rec size_to_expr64 (a:size) : Asm.expr64 option =
+ let binary a b f =
+ match (size_to_expr64 a, size_to_expr64 b) with
+ (Some a, Some b) -> Some (f a b)
+ | _ -> None
+ in
+ match a with
+ SIZE_fixed i -> Some (Asm.IMM i)
+ | SIZE_fixup_mem_sz f -> Some (Asm.M_SZ f)
+ | SIZE_fixup_mem_pos f -> Some (Asm.M_POS f)
+ | SIZE_rt_neg s ->
+ begin
+ match (size_to_expr64 s) with
+ None -> None
+ | Some s -> Some (Asm.NEG s)
+ end
+ | SIZE_rt_add (a, b) -> binary a b (fun a b -> Asm.ADD (a,b))
+ | SIZE_rt_mul (a, b) -> binary a b (fun a b -> Asm.MUL (a,b))
+ | SIZE_rt_max (a, b) -> binary a b (fun a b -> Asm.MAX (a,b))
+ | SIZE_rt_align (a, b) -> binary a b (fun a b -> Asm.ALIGN (a,b))
+ | _ -> None
+;;
+
+
+(* Formatters. *)
+
+let string_of_bits (b:bits) : string =
+ match b with
+ Bits8 -> "b8"
+ | Bits16 -> "b16"
+ | Bits32 -> "b32"
+ | Bits64 -> "b64"
+;;
+
+let rec string_of_scalar_ty (s:scalar_ty) : string =
+ match s with
+ ValTy b -> (string_of_bits b)
+ | AddrTy r -> (string_of_referent_ty r) ^ "*"
+
+and string_of_referent_ty (r:referent_ty) : string =
+ match r with
+ ScalarTy s -> (string_of_scalar_ty s)
+ | StructTy rs ->
+ Printf.sprintf "[%s]"
+ (String.concat ","
+ (Array.to_list (Array.map string_of_referent_ty rs)))
+ | UnionTy rs ->
+ Printf.sprintf "(%s)"
+ (String.concat "|"
+ (Array.to_list (Array.map string_of_referent_ty rs)))
+ | ParamTy i -> Printf.sprintf "#%d" i
+ | OpaqueTy -> "?"
+ | CodeTy -> "!"
+ | NilTy -> "()"
+;;
+
+
+type hreg_formatter = hreg -> string;;
+
+let string_of_reg (f:hreg_formatter) (r:reg) : string =
+ match r with
+ Vreg i -> Printf.sprintf "<v%d>" i
+ | Hreg i -> f i
+;;
+
+let rec string_of_expr64 (e64:Asm.expr64) : string =
+ let bin op a b =
+ Printf.sprintf "(%s %s %s)" (string_of_expr64 a) op (string_of_expr64 b)
+ in
+ let bini op a b =
+ Printf.sprintf "(%s %s %d)" (string_of_expr64 a) op b
+ in
+ match e64 with
+ Asm.IMM i when (i64_lt i 0L) -> Printf.sprintf "-0x%Lx" (Int64.neg i)
+ | Asm.IMM i -> Printf.sprintf "0x%Lx" i
+ | Asm.ADD (a,b) -> bin "+" a b
+ | Asm.SUB (a,b) -> bin "-" a b
+ | Asm.MUL (a,b) -> bin "*" a b
+ | Asm.DIV (a,b) -> bin "/" a b
+ | Asm.REM (a,b) -> bin "%" a b
+ | Asm.MAX (a,b) ->
+ Printf.sprintf "(max %s %s)"
+ (string_of_expr64 a) (string_of_expr64 b)
+ | Asm.ALIGN (a,b) ->
+ Printf.sprintf "(align %s %s)"
+ (string_of_expr64 a) (string_of_expr64 b)
+ | Asm.SLL (a,b) -> bini "<<" a b
+ | Asm.SLR (a,b) -> bini ">>" a b
+ | Asm.SAR (a,b) -> bini ">>>" a b
+ | Asm.AND (a,b) -> bin "&" a b
+ | Asm.XOR (a,b) -> bin "xor" a b
+ | Asm.OR (a,b) -> bin "|" a b
+ | Asm.NOT a -> Printf.sprintf "(not %s)" (string_of_expr64 a)
+ | Asm.NEG a -> Printf.sprintf "-%s" (string_of_expr64 a)
+ | Asm.F_POS f -> Printf.sprintf "<%s>.fpos" f.fixup_name
+ | Asm.F_SZ f -> Printf.sprintf "<%s>.fsz" f.fixup_name
+ | Asm.M_POS f -> Printf.sprintf "<%s>.mpos" f.fixup_name
+ | Asm.M_SZ f -> Printf.sprintf "<%s>.msz" f.fixup_name
+ | Asm.EXT _ -> "??ext??"
+;;
+
+let string_of_off (e:Asm.expr64 option) : string =
+ match e with
+ None -> ""
+ | Some (Asm.IMM i) when (i64_lt i 0L) ->
+ Printf.sprintf " - 0x%Lx" (Int64.neg i)
+ | Some e' -> " + " ^ (string_of_expr64 e')
+;;
+
+let string_of_mem (f:hreg_formatter) (a:mem) : string =
+ match a with
+ Abs e ->
+ Printf.sprintf "[%s]" (string_of_expr64 e)
+ | RegIn (r, off) ->
+ Printf.sprintf "[%s%s]" (string_of_reg f r) (string_of_off off)
+ | Spill i ->
+ Printf.sprintf "[<spill %d>]" i
+;;
+let string_of_cell (f:hreg_formatter) (c:cell) : string =
+ match c with
+ Reg (r,ty) ->
+ if !log_iltypes
+ then
+ Printf.sprintf "%s:%s" (string_of_reg f r) (string_of_scalar_ty ty)
+ else
+ Printf.sprintf "%s" (string_of_reg f r)
+ | Mem (a,ty) ->
+ if !log_iltypes
+ then
+ Printf.sprintf "%s:%s"
+ (string_of_mem f a) (string_of_referent_ty ty)
+ else
+ Printf.sprintf "%s" (string_of_mem f a)
+;;
+
+let string_of_operand (f:hreg_formatter) (op:operand) : string =
+ match op with
+ Cell c -> string_of_cell f c
+ | ImmPtr (f, ty) ->
+ if !log_iltypes
+ then
+ Printf.sprintf "$<%s>.mpos:%s*"
+ f.fixup_name (string_of_referent_ty ty)
+ else
+ Printf.sprintf "$<%s>.mpos" f.fixup_name
+ | Imm (i, ty) ->
+ if !log_iltypes
+ then
+ Printf.sprintf "$%s:%s" (string_of_expr64 i) (string_of_ty_mach ty)
+ else
+ Printf.sprintf "$%s" (string_of_expr64 i)
+;;
+
+
+let string_of_code (f:hreg_formatter) (c:code) : string =
+ match c with
+ CodeLabel lab -> Printf.sprintf "<label %d>" lab
+ | CodePtr op -> string_of_operand f op
+ | CodeNone -> "<none>"
+;;
+
+
+let string_of_binop (op:binop) : string =
+ match op with
+ ADD -> "add"
+ | SUB -> "sub"
+ | IMUL -> "imul"
+ | UMUL -> "umul"
+ | IDIV -> "idiv"
+ | UDIV -> "udiv"
+ | IMOD -> "imod"
+ | UMOD -> "umod"
+ | AND -> "and"
+ | OR -> "or"
+ | XOR -> "xor"
+ | LSL -> "lsl"
+ | LSR -> "lsr"
+ | ASR -> "asr"
+;;
+
+let string_of_unop (op:unop) : string =
+ match op with
+ NEG -> "neg"
+ | NOT -> "not"
+ | UMOV -> "umov"
+ | IMOV -> "imov"
+ | ZERO -> "zero"
+;;
+
+let string_of_jmpop (op:jmpop) : string =
+ match op with
+ JE -> "je"
+ | JNE -> "jne"
+ | JL -> "jl"
+ | JLE -> "jle"
+ | JG -> "jg"
+ | JGE -> "jge"
+ | JB -> "jb"
+ | JBE -> "jbe"
+ | JA -> "ja"
+ | JAE -> "jae"
+ | JC -> "jc"
+ | JNC ->"jnc"
+ | JO -> "jo"
+ | JNO -> "jno"
+ | JZ -> "jz"
+ | JNZ ->"jnz"
+ | JMP -> "jmp"
+;;
+
+let string_of_quad (f:hreg_formatter) (q:quad) : string =
+ match q.quad_body with
+ Binary b ->
+ Printf.sprintf "%s = %s %s %s"
+ (string_of_cell f b.binary_dst)
+ (string_of_operand f b.binary_lhs)
+ (string_of_binop b.binary_op)
+ (string_of_operand f b.binary_rhs)
+
+ | Unary u ->
+ Printf.sprintf "%s = %s %s"
+ (string_of_cell f u.unary_dst)
+ (string_of_unop u.unary_op)
+ (string_of_operand f u.unary_src)
+
+ | Cmp c ->
+ Printf.sprintf "cmp %s %s"
+ (string_of_operand f c.cmp_lhs)
+ (string_of_operand f c.cmp_rhs)
+
+ | Lea le ->
+ Printf.sprintf "lea %s %s"
+ (string_of_cell f le.lea_dst)
+ (string_of_operand f le.lea_src)
+
+ | Jmp j ->
+ Printf.sprintf "%s %s"
+ (string_of_jmpop j.jmp_op)
+ (string_of_code f j.jmp_targ)
+
+ | Push op ->
+ Printf.sprintf "push %s"
+ (string_of_operand f op)
+
+ | Pop c ->
+ Printf.sprintf "%s = pop"
+ (string_of_cell f c)
+
+ | Call c ->
+ Printf.sprintf "%s = call %s"
+ (string_of_cell f c.call_dst)
+ (string_of_code f c.call_targ)
+
+ | Ret -> "ret"
+ | Nop -> "nop"
+ | Dead -> "dead"
+ | Debug -> "debug"
+ | Regfence -> "regfence"
+ | Enter _ -> "enter lexical block"
+ | Leave -> "leave lexical block"
+ | End -> "---"
+;;
+
+
+
+(* Emitters. *)
+
+
+type emitter = { mutable emit_pc: int;
+ mutable emit_next_vreg: int option;
+ mutable emit_next_spill: int;
+ emit_preallocator: (quad' -> quad');
+ emit_is_2addr: bool;
+ mutable emit_quads: quads;
+ emit_annotations: (int,string) Hashtbl.t;
+ emit_size_cache: ((size,operand) Hashtbl.t) Stack.t;
+ emit_node: node_id option;
+ }
+
+
+let badq = { quad_fixup = None;
+ quad_implicits = [];
+ quad_body = End }
+;;
+
+
+let deadq = { quad_fixup = None;
+ quad_implicits = [];
+ quad_body = Dead }
+;;
+
+
+let new_emitter
+ (preallocator:quad' -> quad')
+ (is_2addr:bool)
+ (vregs_ok:bool)
+ (node:node_id option)
+ : emitter =
+ {
+ emit_pc = 0;
+ emit_next_vreg = (if vregs_ok then Some 0 else None);
+ emit_next_spill = 0;
+ emit_preallocator = preallocator;
+ emit_is_2addr = is_2addr;
+ emit_quads = Array.create 4 badq;
+ emit_annotations = Hashtbl.create 0;
+ emit_size_cache = Stack.create ();
+ emit_node = node;
+ }
+;;
+
+
+let num_vregs (e:emitter) : int =
+ match e.emit_next_vreg with
+ None -> 0
+ | Some i -> i
+;;
+
+let next_vreg_num (e:emitter) : vreg =
+ match e.emit_next_vreg with
+ None -> bug () "Il.next_vreg_num on non-vreg emitter"
+ | Some i ->
+ e.emit_next_vreg <- Some (i + 1);
+ i
+;;
+
+let next_vreg (e:emitter) : reg =
+ Vreg (next_vreg_num e)
+;;
+
+let next_vreg_cell (e:emitter) (s:scalar_ty) : cell =
+ Reg ((next_vreg e), s)
+;;
+
+let next_spill (e:emitter) : spill =
+ let i = e.emit_next_spill in
+ e.emit_next_spill <- i + 1;
+ i
+;;
+
+let next_spill_slot (e:emitter) (r:referent_ty) : typed_mem =
+ (Spill (next_spill e), r);
+;;
+
+
+let grow_if_necessary e =
+ let len = Array.length e.emit_quads in
+ if e.emit_pc >= len - 1
+ then
+ let n = Array.create (2 * len) badq in
+ Array.blit e.emit_quads 0 n 0 len;
+ e.emit_quads <- n
+;;
+
+
+let binary (op:binop) (dst:cell) (lhs:operand) (rhs:operand) : quad' =
+ Binary { binary_op = op;
+ binary_dst = dst;
+ binary_lhs = lhs;
+ binary_rhs = rhs }
+;;
+
+let unary (op:unop) (dst:cell) (src:operand) : quad' =
+ Unary { unary_op = op;
+ unary_dst = dst;
+ unary_src = src }
+
+let jmp (op:jmpop) (targ:code) : quad' =
+ Jmp { jmp_op = op;
+ jmp_targ = targ; }
+;;
+
+
+let lea (dst:cell) (src:operand) : quad' =
+ Lea { lea_dst = dst;
+ lea_src = src; }
+;;
+
+let cmp (lhs:operand) (rhs:operand) : quad' =
+ Cmp { cmp_lhs = lhs;
+ cmp_rhs = rhs; }
+;;
+
+let call (dst:cell) (targ:code) : quad' =
+ Call { call_dst = dst;
+ call_targ = targ; }
+;;
+
+let umov (dst:cell) (src:operand) : quad' =
+ if (cell_is_nil dst || operand_is_nil src)
+ then Dead
+ else unary UMOV dst src
+;;
+
+let zero (dst:cell) (count:operand) : quad' =
+ unary ZERO dst count
+;;
+
+let is_mov uop =
+ match uop with
+ UMOV | IMOV -> true
+ | _ -> false
+;;
+
+let mk_quad (q':quad') : quad =
+ { quad_body = q';
+ quad_implicits = [];
+ quad_fixup = None }
+;;
+
+let emit_full
+ (e:emitter)
+ (fix:fixup option)
+ (implicits:label list)
+ (q':quad')
+ : unit =
+ let fixup = ref fix in
+ let emit_quad_bottom q' =
+ grow_if_necessary e;
+ e.emit_quads.(e.emit_pc) <- { quad_body = q';
+ quad_implicits = implicits;
+ quad_fixup = (!fixup) };
+ fixup := None;
+ e.emit_pc <- e.emit_pc + 1
+ in
+
+ let emit_quad (q':quad') : unit =
+ (* re-decay any freshly generated mem-mem movs. *)
+ match q' with
+ Unary { unary_dst = Mem (dst_mem, ScalarTy src_st);
+ unary_src = Cell (Mem (src_mem, ScalarTy dst_st));
+ unary_op = op }
+ when is_mov op ->
+ let v = next_vreg_cell e dst_st in
+ emit_quad_bottom
+ (unary op v (Cell (Mem (src_mem, ScalarTy src_st))));
+ emit_quad_bottom
+ (unary op (Mem (dst_mem, ScalarTy dst_st)) (Cell v))
+ | _ -> emit_quad_bottom q'
+ in
+
+ let default_mov =
+ match q' with
+ Binary b ->
+ begin
+ match b.binary_op with
+ IDIV | IMUL | IMOD -> IMOV
+ | _ -> UMOV
+ end
+ | Unary u ->
+ begin
+ match u.unary_op with
+ IMOV -> IMOV
+ | _ -> UMOV
+ end
+ | _ -> UMOV
+ in
+
+ let emit_mov (dst:cell) (src:operand) : unit =
+ emit_quad (unary default_mov dst src)
+ in
+
+ let mov_if_operands_differ
+ (old_op:operand) (new_op:operand)
+ : unit =
+ if (new_op <> old_op)
+ then
+ match new_op with
+ (Cell new_cell) ->
+ emit_mov new_cell old_op
+ | _ -> ()
+ in
+
+ let mov_if_two_operands_differ
+ (old_lhs_op:operand) (new_lhs_op:operand)
+ (old_rhs_op:operand) (new_rhs_op:operand)
+ : unit =
+ (*
+ * This is sufficiently obscure that it deserves an explanation.
+ *
+ * The main idea here is to do two "mov_if_operands_differ" calls,
+ * such as one might have when setting up a binary quad.
+ *
+ * The problem comes when you happen to hit a case like X86 div,
+ * which preallocates *both* operands. Preallocating both means we
+ * have to potentially issue two movs into the preallocated regs,
+ * and the second of those movs might be a problem. Specifically:
+ * the second mov-to-prealloc might make be moving from a
+ * register-indirect mem cell based on a vreg, and that vreg may
+ * wind up being assigned to an hreg that we just loaded with the
+ * first mov. In other words, the second mov may retask the
+ * preallocated hreg we set up in the first mov.
+ *
+ * You laugh, but of course this actually happens.
+ *
+ * So here we do a conservative thing and check to see if either
+ * operand is memory-indirect at all. If either is, then for either
+ * of the 'old' operands we're *about* to mov into a prealloc reg,
+ * we first bounce them off a spill slot. Spill slots, thankfully,
+ * we can always count on being able to address irrespective of the
+ * opinions of the RA, as they are all just fp-relative.
+ *
+ * A slightly more aggressive version of this would only bounce
+ * cases that are not fp-relative already, though doing so would
+ * require threading the notion of what fp *is* through to
+ * here. Possibly tighten this up in the future (or just
+ * ... destroy this backend ASAP).
+ *
+ *)
+ let has_reg_indirect op =
+ match op with
+ Cell (Mem _) -> true
+ | _ -> false
+ in
+ let either_old_op_has_reg_indirect =
+ (has_reg_indirect old_lhs_op) || (has_reg_indirect old_rhs_op)
+ in
+ let old_lhs_op =
+ if either_old_op_has_reg_indirect && (new_lhs_op <> old_lhs_op)
+ then
+ let tmp =
+ Mem (next_spill_slot e
+ (ScalarTy (operand_scalar_ty old_lhs_op)))
+ in
+ emit_mov tmp old_lhs_op;
+ Cell tmp
+ else
+ old_lhs_op
+ in
+ let old_rhs_op =
+ if either_old_op_has_reg_indirect && (new_rhs_op <> old_rhs_op)
+ then
+ let tmp =
+ Mem (next_spill_slot e
+ (ScalarTy (operand_scalar_ty old_rhs_op)))
+ in
+ emit_mov tmp old_rhs_op;
+ Cell tmp
+ else
+ old_rhs_op
+ in
+ mov_if_operands_differ old_lhs_op new_lhs_op;
+ mov_if_operands_differ old_rhs_op new_rhs_op;
+ in
+
+ let mov_if_cells_differ (old_cell:cell) (new_cell:cell) : unit =
+ if not (new_cell = old_cell)
+ then
+ emit_mov old_cell (Cell new_cell)
+ in
+
+ let emit_decayed_quad q' =
+ match (q', e.emit_preallocator q') with
+ (Binary b, Binary b') ->
+ begin
+ mov_if_two_operands_differ
+ b.binary_lhs b'.binary_lhs
+ b.binary_rhs b'.binary_rhs;
+ if e.emit_is_2addr &&
+ (not (b'.binary_lhs = (Cell b'.binary_dst)))
+ then
+ begin
+ emit_mov b'.binary_dst b'.binary_lhs;
+ emit_quad (Binary { b' with
+ binary_lhs = (Cell b'.binary_dst) })
+ end
+ else
+ emit_quad (Binary b');
+ mov_if_cells_differ b.binary_dst b'.binary_dst
+ end
+
+ | (Unary u, Unary u') ->
+ mov_if_operands_differ u.unary_src u'.unary_src;
+ (* Assume '2addr' means '1addr' for unary ops. *)
+ if e.emit_is_2addr &&
+ (u'.unary_op = NEG || u'.unary_op = NOT) &&
+ (not (u'.unary_src = (Cell u'.unary_dst)))
+ then
+ begin
+ emit_mov u'.unary_dst u'.unary_src;
+ emit_quad (Unary { u' with unary_src = (Cell u'.unary_dst) })
+ end
+ else
+ emit_quad (Unary u');
+ mov_if_cells_differ u.unary_dst u'.unary_dst
+
+ | (Cmp c, Cmp c') ->
+ mov_if_two_operands_differ
+ c.cmp_lhs c'.cmp_lhs
+ c.cmp_rhs c'.cmp_rhs;
+ emit_quad (Cmp c');
+
+ | (Push op, Push op') ->
+ mov_if_operands_differ op op';
+ emit_quad (Push op');
+
+ | (Pop c, Pop c') ->
+ emit_quad (Pop c');
+ mov_if_cells_differ c c'
+
+ | (Call c, Call c') ->
+ emit_quad (Call c');
+ mov_if_cells_differ c.call_dst c'.call_dst
+
+ | (Lea lea, Lea lea') ->
+ emit_quad (Lea lea');
+ mov_if_cells_differ lea.lea_dst lea'.lea_dst
+
+ | (x, y) ->
+ assert (x = y);
+ emit_quad x
+ in
+
+ (* pre-decay mem-mem movs. *)
+ match q' with
+ Unary { unary_dst = Mem (dst_mem, ScalarTy src_st);
+ unary_src = Cell (Mem (src_mem, ScalarTy dst_st));
+ unary_op = op }
+ when is_mov op ->
+ let v = next_vreg_cell e dst_st in
+ emit_decayed_quad
+ (unary op v (Cell (Mem (src_mem, ScalarTy src_st))));
+ emit_decayed_quad
+ (unary op (Mem (dst_mem, ScalarTy dst_st)) (Cell v))
+ | _ -> emit_decayed_quad q'
+;;
+
+let emit (e:emitter) (q':quad') : unit =
+ emit_full e None [] q'
+;;
+
+let patch_jump (e:emitter) (jmp:int) (targ:int) : unit =
+ let q = e.emit_quads.(jmp) in
+ match q.quad_body with
+ Jmp j ->
+ assert (j.jmp_targ = CodeNone);
+ e.emit_quads.(jmp) <-
+ { q with quad_body =
+ Jmp { j with jmp_targ = CodeLabel targ } }
+ | _ -> ()
+;;
+
+(* More query functions. *)
+
+let get_element_ptr
+ (word_bits:bits)
+ (fmt:hreg_formatter)
+ (mem_cell:cell)
+ (i:int)
+ : cell =
+ match mem_cell with
+ Mem (mem, StructTy elts) when i >= 0 && i < (Array.length elts) ->
+ assert ((Array.length elts) != 0);
+ begin
+ let elt_rty = elts.(i) in
+ let elt_off = get_element_offset word_bits elts i in
+ match elt_off with
+ SIZE_fixed fixed_off ->
+ Mem (mem_off_imm mem fixed_off, elt_rty)
+ | _ -> bug ()
+ "get_element_ptr %d on dynamic-size cell: offset %s"
+ i (string_of_size elt_off)
+ end
+
+ | _ -> bug () "get_element_ptr %d on cell %s" i
+ (string_of_cell fmt mem_cell)
+;;
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)
diff --git a/src/boot/be/macho.ml b/src/boot/be/macho.ml
new file mode 100644
index 00000000..7fccdfd3
--- /dev/null
+++ b/src/boot/be/macho.ml
@@ -0,0 +1,1184 @@
+open Asm;;
+open Common;;
+
+(* Mach-O writer. *)
+
+let log (sess:Session.sess) =
+ Session.log "obj (mach-o)"
+ sess.Session.sess_log_obj
+ sess.Session.sess_log_out
+;;
+
+let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit =
+ if sess.Session.sess_log_obj
+ then thunk ()
+ else ()
+;;
+
+let (cpu_arch_abi64:int64) = 0x01000000L
+;;
+
+let (mh_magic:int64) = 0xfeedfaceL
+;;
+
+let cpu_subtype_intel (f:int64) (m:int64) : int64 =
+ Int64.add f (Int64.shift_left m 4)
+;;
+
+type cpu_type =
+ (* Maybe support more later. *)
+ CPU_TYPE_X86
+ | CPU_TYPE_X86_64
+ | CPU_TYPE_ARM
+ | CPU_TYPE_POWERPC
+;;
+
+type cpu_subtype =
+ (* Maybe support more later. *)
+ CPU_SUBTYPE_X86_ALL
+ | CPU_SUBTYPE_X86_64_ALL
+ | CPU_SUBTYPE_ARM_ALL
+ | CPU_SUBTYPE_POWERPC_ALL
+;;
+
+type file_type =
+ MH_OBJECT
+ | MH_EXECUTE
+ | MH_FVMLIB
+ | MH_CORE
+ | MH_PRELOAD
+ | MH_DYLIB
+ | MH_DYLINKER
+ | MH_BUNDLE
+ | MH_DYLIB_STUB
+ | MH_DSYM
+;;
+
+let file_type_code (ft:file_type) : int64 =
+ match ft with
+ MH_OBJECT ->0x1L (* object *)
+ | MH_EXECUTE -> 0x2L (* executable *)
+ | MH_FVMLIB -> 0x3L (* fixed-VM shared lib *)
+ | MH_CORE -> 0x4L (* core *)
+ | MH_PRELOAD -> 0x5L (* preloaded executable *)
+ | MH_DYLIB -> 0x6L (* dynamic lib *)
+ | MH_DYLINKER -> 0x7L (* dynamic linker *)
+ | MH_BUNDLE -> 0x8L (* bundle *)
+ | MH_DYLIB_STUB -> 0x9L (* shared lib stub *)
+ | MH_DSYM -> 0xaL (* debuginfo only *)
+;;
+
+type file_flag =
+ MH_NOUNDEFS
+ | MH_INCRLINK
+ | MH_DYLDLINK
+ | MH_BINDATLOAD
+ | MH_PREBOUND
+ | MH_SPLIT_SEGS
+ | MH_LAZY_INIT
+ | MH_TWOLEVEL
+ | MH_FORCE_FLAT
+ | MH_NOMULTIDEFS
+ | MH_NOFIXPREBINDING
+ | MH_PREBINDABLE
+ | MH_ALLMODSBOUND
+ | MH_SUBSECTIONS_VIA_SYMBOLS
+ | MH_CANONICAL
+ | MH_WEAK_DEFINES
+ | MH_BINDS_TO_WEAK
+ | MH_ALLOW_STACK_EXECUTION
+ | MH_ROOT_SAFE
+ | MH_SETUID_SAFE
+ | MH_NO_REEXPORTED_DYLIBS
+ | MH_PIE
+;;
+
+let file_flag_code (ff:file_flag) : int64 =
+ match ff with
+ MH_NOUNDEFS -> 0x1L
+ | MH_INCRLINK -> 0x2L
+ | MH_DYLDLINK -> 0x4L
+ | MH_BINDATLOAD -> 0x8L
+ | MH_PREBOUND -> 0x10L
+ | MH_SPLIT_SEGS -> 0x20L
+ | MH_LAZY_INIT -> 0x40L
+ | MH_TWOLEVEL -> 0x80L
+ | MH_FORCE_FLAT -> 0x100L
+ | MH_NOMULTIDEFS -> 0x200L
+ | MH_NOFIXPREBINDING -> 0x400L
+ | MH_PREBINDABLE -> 0x800L
+ | MH_ALLMODSBOUND -> 0x1000L
+ | MH_SUBSECTIONS_VIA_SYMBOLS -> 0x2000L
+ | MH_CANONICAL -> 0x4000L
+ | MH_WEAK_DEFINES -> 0x8000L
+ | MH_BINDS_TO_WEAK -> 0x10000L
+ | MH_ALLOW_STACK_EXECUTION -> 0x20000L
+ | MH_ROOT_SAFE -> 0x40000L
+ | MH_SETUID_SAFE -> 0x80000L
+ | MH_NO_REEXPORTED_DYLIBS -> 0x100000L
+ | MH_PIE -> 0x200000L
+;;
+
+
+type vm_prot =
+ VM_PROT_NONE
+ | VM_PROT_READ
+ | VM_PROT_WRITE
+ | VM_PROT_EXECUTE
+;;
+
+
+type load_command =
+ LC_SEGMENT
+ | LC_SYMTAB
+ | LC_SYMSEG
+ | LC_THREAD
+ | LC_UNIXTHREAD
+ | LC_LOADFVMLIB
+ | LC_IDFVMLIB
+ | LC_IDENT
+ | LC_FVMFILE
+ | LC_PREPAGE
+ | LC_DYSYMTAB
+ | LC_LOAD_DYLIB
+ | LC_ID_DYLIB
+ | LC_LOAD_DYLINKER
+ | LC_ID_DYLINKER
+ | LC_PREBOUND_DYLIB
+ | LC_ROUTINES
+ | LC_SUB_FRAMEWORK
+ | LC_SUB_UMBRELLA
+ | LC_SUB_CLIENT
+ | LC_SUB_LIBRARY
+ | LC_TWOLEVEL_HINTS
+ | LC_PREBIND_CKSUM
+ | LC_LOAD_WEAK_DYLIB
+ | LC_SEGMENT_64
+ | LC_ROUTINES_64
+ | LC_UUID
+ | LC_RPATH
+ | LC_CODE_SIGNATURE
+ | LC_SEGMENT_SPLIT_INFO
+ | LC_REEXPORT_DYLIB
+ | LC_LAZY_LOAD_DYLIB
+ | LC_ENCRYPTION_INFO
+;;
+
+
+let cpu_type_code (cpu:cpu_type) : int64 =
+ match cpu with
+ CPU_TYPE_X86 -> 7L
+ | CPU_TYPE_X86_64 -> Int64.logor 7L cpu_arch_abi64
+ | CPU_TYPE_ARM -> 12L
+ | CPU_TYPE_POWERPC -> 18L
+;;
+
+let cpu_subtype_code (cpu:cpu_subtype) : int64 =
+ match cpu with
+ CPU_SUBTYPE_X86_ALL -> 3L
+ | CPU_SUBTYPE_X86_64_ALL -> 3L
+ | CPU_SUBTYPE_ARM_ALL -> 0L
+ | CPU_SUBTYPE_POWERPC_ALL -> 0L
+;;
+
+
+let vm_prot_code (vmp:vm_prot) : int64 =
+ match vmp with
+ VM_PROT_NONE -> 0L
+ | VM_PROT_READ -> 1L
+ | VM_PROT_WRITE -> 2L
+ | VM_PROT_EXECUTE -> 4L
+;;
+
+
+let lc_req_dyld = 0x80000000L;;
+
+let load_command_code (lc:load_command) =
+ match lc with
+ | LC_SEGMENT -> 0x1L
+ | LC_SYMTAB -> 0x2L
+ | LC_SYMSEG -> 0x3L
+ | LC_THREAD -> 0x4L
+ | LC_UNIXTHREAD -> 0x5L
+ | LC_LOADFVMLIB -> 0x6L
+ | LC_IDFVMLIB -> 0x7L
+ | LC_IDENT -> 0x8L
+ | LC_FVMFILE -> 0x9L
+ | LC_PREPAGE -> 0xaL
+ | LC_DYSYMTAB -> 0xbL
+ | LC_LOAD_DYLIB -> 0xcL
+ | LC_ID_DYLIB -> 0xdL
+ | LC_LOAD_DYLINKER -> 0xeL
+ | LC_ID_DYLINKER -> 0xfL
+ | LC_PREBOUND_DYLIB -> 0x10L
+ | LC_ROUTINES -> 0x11L
+ | LC_SUB_FRAMEWORK -> 0x12L
+ | LC_SUB_UMBRELLA -> 0x13L
+ | LC_SUB_CLIENT -> 0x14L
+ | LC_SUB_LIBRARY -> 0x15L
+ | LC_TWOLEVEL_HINTS -> 0x16L
+ | LC_PREBIND_CKSUM -> 0x17L
+ | LC_LOAD_WEAK_DYLIB -> Int64.logor lc_req_dyld 0x18L
+ | LC_SEGMENT_64 -> 0x19L
+ | LC_ROUTINES_64 -> 0x1aL
+ | LC_UUID -> 0x1bL
+ | LC_RPATH -> Int64.logor lc_req_dyld 0x1cL
+ | LC_CODE_SIGNATURE -> 0x1dL
+ | LC_SEGMENT_SPLIT_INFO -> 0x1eL
+ | LC_REEXPORT_DYLIB -> Int64.logor lc_req_dyld 0x1fL
+ | LC_LAZY_LOAD_DYLIB -> 0x20L
+ | LC_ENCRYPTION_INFO -> 0x21L
+;;
+
+
+let fixed_sz_string (sz:int) (str:string) : frag =
+ if String.length str > sz
+ then STRING (String.sub str 0 sz)
+ else SEQ [| STRING str; PAD (sz - (String.length str)) |]
+;;
+
+type sect_type =
+ S_REGULAR
+ | S_ZEROFILL
+ | S_CSTRING_LITERALS
+ | S_4BYTE_LITERALS
+ | S_8BYTE_LITERALS
+ | S_LITERAL_POINTERS
+ | S_NON_LAZY_SYMBOL_POINTERS
+ | S_LAZY_SYMBOL_POINTERS
+ | S_SYMBOL_STUBS
+ | S_MOD_INIT_FUNC_POINTERS
+ | S_MOD_TERM_FUNC_POINTERS
+ | S_COALESCED
+ | S_GB_ZEROFILL
+ | S_INTERPOSING
+ | S_16BYTE_LITERALS
+ | S_DTRACE_DOF
+ | S_LAZY_DYLIB_SYMBOL_POINTERS
+;;
+
+let sect_type_code (s:sect_type) : int64 =
+ match s with
+ S_REGULAR -> 0x0L
+ | S_ZEROFILL -> 0x1L
+ | S_CSTRING_LITERALS -> 0x2L
+ | S_4BYTE_LITERALS -> 0x3L
+ | S_8BYTE_LITERALS -> 0x4L
+ | S_LITERAL_POINTERS -> 0x5L
+ | S_NON_LAZY_SYMBOL_POINTERS -> 0x6L
+ | S_LAZY_SYMBOL_POINTERS -> 0x7L
+ | S_SYMBOL_STUBS -> 0x8L
+ | S_MOD_INIT_FUNC_POINTERS -> 0x9L
+ | S_MOD_TERM_FUNC_POINTERS -> 0xaL
+ | S_COALESCED -> 0xbL
+ | S_GB_ZEROFILL -> 0xcL
+ | S_INTERPOSING -> 0xdL
+ | S_16BYTE_LITERALS -> 0xeL
+ | S_DTRACE_DOF -> 0xfL
+ | S_LAZY_DYLIB_SYMBOL_POINTERS -> 0x10L
+;;
+
+type sect_attr =
+ S_ATTR_PURE_INSTRUCTIONS
+ | S_ATTR_NO_TOC
+ | S_ATTR_STRIP_STATIC_SYMS
+ | S_ATTR_NO_DEAD_STRIP
+ | S_ATTR_LIVE_SUPPORT
+ | S_ATTR_SELF_MODIFYING_CODE
+ | S_ATTR_DEBUG
+ | S_ATTR_SOME_INSTRUCTIONS
+ | S_ATTR_EXT_RELOC
+ | S_ATTR_LOC_RELOC
+;;
+
+let sect_attr_code (s:sect_attr) : int64 =
+ match s with
+ S_ATTR_PURE_INSTRUCTIONS -> 0x80000000L
+ | S_ATTR_NO_TOC -> 0x40000000L
+ | S_ATTR_STRIP_STATIC_SYMS -> 0x20000000L
+ | S_ATTR_NO_DEAD_STRIP -> 0x10000000L
+ | S_ATTR_LIVE_SUPPORT -> 0x08000000L
+ | S_ATTR_SELF_MODIFYING_CODE -> 0x04000000L
+ | S_ATTR_DEBUG -> 0x02000000L
+ | S_ATTR_SOME_INSTRUCTIONS -> 0x00000400L
+ | S_ATTR_EXT_RELOC -> 0x00000200L
+ | S_ATTR_LOC_RELOC -> 0x00000100L
+;;
+
+type n_type =
+ | N_EXT
+ | N_UNDF
+ | N_ABS
+ | N_SECT
+ | N_PBUD
+ | N_INDIR
+;;
+
+let n_type_code (n:n_type) : int64 =
+ match n with
+ N_EXT -> 0x1L
+ | N_UNDF -> 0x0L
+ | N_ABS -> 0x2L
+ | N_SECT -> 0xeL
+ | N_PBUD -> 0xcL
+ | N_INDIR -> 0xaL
+;;
+
+
+type n_desc_reference_type =
+ REFERENCE_FLAG_UNDEFINED_NON_LAZY
+ | REFERENCE_FLAG_UNDEFINED_LAZY
+ | REFERENCE_FLAG_DEFINED
+ | REFERENCE_FLAG_PRIVATE_DEFINED
+ | REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY
+ | REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY
+;;
+
+let n_desc_reference_type_code (n:n_desc_reference_type) : int64 =
+ match n with
+ REFERENCE_FLAG_UNDEFINED_NON_LAZY -> 0x0L
+ | REFERENCE_FLAG_UNDEFINED_LAZY -> 0x1L
+ | REFERENCE_FLAG_DEFINED -> 0x2L
+ | REFERENCE_FLAG_PRIVATE_DEFINED -> 0x3L
+ | REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY -> 0x4L
+ | REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY -> 0x5L
+;;
+
+type n_desc_flags =
+ REFERENCED_DYNAMICALLY
+ | N_DESC_DISCARDED
+ | N_NO_DEAD_STRIP
+ | N_WEAK_REF
+ | N_WEAK_DEF
+;;
+
+let n_desc_flags_code (n:n_desc_flags) : int64 =
+ match n with
+ REFERENCED_DYNAMICALLY -> 0x10L
+ | N_DESC_DISCARDED -> 0x20L
+ | N_NO_DEAD_STRIP -> 0x20L (* Yes, they reuse 0x20. *)
+ | N_WEAK_REF -> 0x40L
+ | N_WEAK_DEF -> 0x80L
+;;
+
+type n_desc_dylib_ordinal = int;;
+
+type n_desc = (n_desc_dylib_ordinal *
+ (n_desc_flags list) *
+ n_desc_reference_type)
+;;
+
+let n_desc_code (n:n_desc) : int64 =
+ let (dylib_ordinal, flags, ty) = n in
+ Int64.logor
+ (Int64.of_int (dylib_ordinal lsl 8))
+ (Int64.logor
+ (fold_flags n_desc_flags_code flags)
+ (n_desc_reference_type_code ty))
+;;
+
+
+let macho_section_command
+ (seg_name:string)
+ (sect:(string * int * (sect_attr list) * sect_type * fixup))
+ : frag =
+ let (sect_name, sect_align, sect_attrs, sect_type, sect_fixup) = sect in
+ SEQ [|
+ fixed_sz_string 16 sect_name;
+ fixed_sz_string 16 seg_name;
+ WORD (TY_u32, M_POS sect_fixup);
+ WORD (TY_u32, M_SZ sect_fixup);
+ WORD (TY_u32, F_POS sect_fixup);
+ WORD (TY_u32, IMM (Int64.of_int sect_align));
+ WORD (TY_u32, IMM 0L); (* reloff *)
+ WORD (TY_u32, IMM 0L); (* nreloc *)
+ WORD (TY_u32, (IMM (Int64.logor (* flags (and attrs) *)
+ (fold_flags sect_attr_code sect_attrs)
+ (sect_type_code sect_type))));
+ WORD (TY_u32, IMM 0L); (* reserved1 *)
+ WORD (TY_u32, IMM 0L); (* reserved2 *)
+ |]
+;;
+
+let macho_segment_command
+ (seg_name:string)
+ (seg_fixup:fixup)
+ (maxprot:vm_prot list)
+ (initprot:vm_prot list)
+ (sects:(string * int * (sect_attr list) * sect_type * fixup) array)
+ : frag =
+
+ let cmd_fixup = new_fixup "segment command" in
+ let cmd =
+ SEQ [|
+ WORD (TY_u32, IMM (load_command_code LC_SEGMENT));
+ WORD (TY_u32, F_SZ cmd_fixup);
+ fixed_sz_string 16 seg_name;
+ WORD (TY_u32, M_POS seg_fixup);
+ WORD (TY_u32, M_SZ seg_fixup);
+ WORD (TY_u32, F_POS seg_fixup);
+ WORD (TY_u32, F_SZ seg_fixup);
+ WORD (TY_u32, IMM (fold_flags vm_prot_code maxprot));
+ WORD (TY_u32, IMM (fold_flags vm_prot_code initprot));
+ WORD (TY_u32, IMM (Int64.of_int (Array.length sects)));
+ WORD (TY_u32, IMM 0L); (* Flags? *)
+ |]
+ in
+ DEF (cmd_fixup,
+ SEQ [|
+ cmd;
+ SEQ (Array.map (macho_section_command seg_name) sects);
+ |])
+;;
+
+let macho_thread_command
+ (entry:fixup)
+ : frag =
+ let cmd_fixup = new_fixup "thread command" in
+ let x86_THREAD_STATE32 = 1L in
+ let regs =
+ [|
+ WORD (TY_u32, IMM 0x0L); (* eax *)
+ WORD (TY_u32, IMM 0x0L); (* ebx *)
+ WORD (TY_u32, IMM 0x0L); (* ecx *)
+ WORD (TY_u32, IMM 0x0L); (* edx *)
+
+ WORD (TY_u32, IMM 0x0L); (* edi *)
+ WORD (TY_u32, IMM 0x0L); (* esi *)
+ WORD (TY_u32, IMM 0x0L); (* ebp *)
+ WORD (TY_u32, IMM 0x0L); (* esp *)
+
+ WORD (TY_u32, IMM 0x0L); (* ss *)
+ WORD (TY_u32, IMM 0x0L); (* eflags *)
+ WORD (TY_u32, M_POS entry); (* eip *)
+ WORD (TY_u32, IMM 0x0L); (* cs *)
+
+ WORD (TY_u32, IMM 0x0L); (* ds *)
+ WORD (TY_u32, IMM 0x0L); (* es *)
+ WORD (TY_u32, IMM 0x0L); (* fs *)
+ WORD (TY_u32, IMM 0x0L); (* gs *)
+ |]
+ in
+ let cmd =
+ SEQ [|
+ WORD (TY_u32, IMM (load_command_code LC_UNIXTHREAD));
+ WORD (TY_u32, F_SZ cmd_fixup);
+ WORD (TY_u32, IMM x86_THREAD_STATE32); (* "flavour" *)
+ WORD (TY_u32, IMM (Int64.of_int (Array.length regs)));
+ SEQ regs
+ |]
+ in
+ DEF (cmd_fixup, cmd)
+;;
+
+let macho_dylinker_command : frag =
+ let cmd_fixup = new_fixup "dylinker command" in
+ let str_fixup = new_fixup "dylinker lc_str fixup" in
+ let cmd =
+ SEQ
+ [|
+ WORD (TY_u32, IMM (load_command_code LC_LOAD_DYLINKER));
+ WORD (TY_u32, F_SZ cmd_fixup);
+
+ (* see definition of lc_str; these things are weird. *)
+ WORD (TY_u32, SUB (F_POS (str_fixup), F_POS (cmd_fixup)));
+ DEF (str_fixup, ZSTRING "/usr/lib/dyld");
+ ALIGN_FILE (4, MARK);
+ |]
+ in
+ DEF (cmd_fixup, cmd);
+;;
+
+let macho_dylib_command (dylib:string) : frag =
+
+ let cmd_fixup = new_fixup "dylib command" in
+ let str_fixup = new_fixup "dylib lc_str fixup" in
+ let cmd =
+ SEQ
+ [|
+ WORD (TY_u32, IMM (load_command_code LC_LOAD_DYLIB));
+ WORD (TY_u32, F_SZ cmd_fixup);
+
+ (* see definition of lc_str; these things are weird. *)
+ WORD (TY_u32, SUB (F_POS (str_fixup), F_POS (cmd_fixup)));
+
+ WORD (TY_u32, IMM 0L); (* timestamp *)
+ WORD (TY_u32, IMM 0L); (* current_version *)
+ WORD (TY_u32, IMM 0L); (* compatibility_version *)
+
+ (* Payload-and-alignment of an lc_str goes at end of command. *)
+ DEF (str_fixup, ZSTRING dylib);
+ ALIGN_FILE (4, MARK);
+
+ |]
+ in
+ DEF (cmd_fixup, cmd)
+;;
+
+
+let macho_symtab_command
+ (symtab_fixup:fixup)
+ (nsyms:int64)
+ (strtab_fixup:fixup)
+ : frag =
+ let cmd_fixup = new_fixup "symtab command" in
+ let cmd =
+ SEQ
+ [|
+ WORD (TY_u32, IMM (load_command_code LC_SYMTAB));
+ WORD (TY_u32, F_SZ cmd_fixup);
+
+ WORD (TY_u32, F_POS symtab_fixup); (* symoff *)
+ WORD (TY_u32, IMM nsyms); (* nsyms *)
+
+ WORD (TY_u32, F_POS strtab_fixup); (* stroff *)
+ WORD (TY_u32, F_SZ strtab_fixup); (* strsz *)
+ |]
+ in
+ DEF (cmd_fixup, cmd)
+;;
+
+let macho_dysymtab_command
+ (local_defined_syms_index:int64)
+ (local_defined_syms_count:int64)
+ (external_defined_syms_index:int64)
+ (external_defined_syms_count:int64)
+ (undefined_syms_index:int64)
+ (undefined_syms_count:int64)
+ (indirect_symtab_fixup:fixup) : frag =
+ let cmd_fixup = new_fixup "dysymtab command" in
+ let cmd =
+ SEQ
+ [|
+ WORD (TY_u32, IMM (load_command_code LC_DYSYMTAB));
+ WORD (TY_u32, F_SZ cmd_fixup);
+
+ WORD (TY_u32, IMM local_defined_syms_index); (* ilocalsym *)
+ WORD (TY_u32, IMM local_defined_syms_count); (* nlocalsym *)
+
+ WORD (TY_u32, IMM external_defined_syms_index); (* iextdefsym *)
+ WORD (TY_u32, IMM external_defined_syms_count); (* nextdefsym *)
+
+ WORD (TY_u32, IMM undefined_syms_index); (* iundefsym *)
+ WORD (TY_u32, IMM undefined_syms_count); (* nundefsym *)
+
+ WORD (TY_u32, IMM 0L); (* tocoff *)
+ WORD (TY_u32, IMM 0L); (* ntoc *)
+
+ WORD (TY_u32, IMM 0L); (* modtaboff *)
+ WORD (TY_u32, IMM 0L); (* nmodtab *)
+
+ WORD (TY_u32, IMM 0L); (* extrefsymoff *)
+ WORD (TY_u32, IMM 0L); (* nextrefsyms *)
+
+ WORD (TY_u32, F_POS indirect_symtab_fixup); (* indirectsymoff *)
+ WORD (TY_u32, IMM undefined_syms_count); (* nindirectsyms *)
+
+ WORD (TY_u32, IMM 0L); (* extreloff *)
+ WORD (TY_u32, IMM 0L); (* nextrel *)
+
+ WORD (TY_u32, IMM 0L); (* locreloff *)
+ WORD (TY_u32, IMM 0L); (* nlocrel *)
+ |]
+ in
+ DEF (cmd_fixup, cmd)
+;;
+
+let macho_header_32
+ (cpu:cpu_type)
+ (sub:cpu_subtype)
+ (ftype:file_type)
+ (flags:file_flag list)
+ (loadcmds:frag array) : frag =
+ let load_commands_fixup = new_fixup "load commands" in
+ let cmds = DEF (load_commands_fixup, SEQ loadcmds) in
+ SEQ
+ [|
+ WORD (TY_u32, IMM mh_magic);
+ WORD (TY_u32, IMM (cpu_type_code cpu));
+ WORD (TY_u32, IMM (cpu_subtype_code sub));
+ WORD (TY_u32, IMM (file_type_code ftype));
+ WORD (TY_u32, IMM (Int64.of_int (Array.length loadcmds)));
+ WORD (TY_u32, F_SZ load_commands_fixup);
+ WORD (TY_u32, IMM (fold_flags file_flag_code flags));
+ cmds
+ |]
+;;
+
+let emit_file
+ (sess:Session.sess)
+ (crate:Ast.crate)
+ (code:Asm.frag)
+ (data:Asm.frag)
+ (sem:Semant.ctxt)
+ (dwarf:Dwarf.debug_records)
+ : unit =
+
+ (* FIXME: alignment? *)
+
+ let mh_execute_header_fixup = new_fixup "__mh_execute header" in
+
+ let nxargc_fixup = (Semant.provide_native sem SEG_data "NXArgc") in
+ let nxargv_fixup = (Semant.provide_native sem SEG_data "NXArgv") in
+ let progname_fixup = (Semant.provide_native sem SEG_data "__progname") in
+ let environ_fixup = (Semant.provide_native sem SEG_data "environ") in
+ let exit_fixup = (Semant.require_native sem REQUIRED_LIB_crt "exit") in
+ let (start_fixup, rust_start_fixup) =
+ if sess.Session.sess_library_mode
+ then (None, None)
+ else (Some (new_fixup "start function entry"),
+ Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start"))
+ in
+
+ let text_sect_align_log2 = 2 in
+ let data_sect_align_log2 = 2 in
+
+ let seg_align = 0x1000 in
+ let text_sect_align = 2 lsl text_sect_align_log2 in
+ let data_sect_align = 2 lsl data_sect_align_log2 in
+
+ let align_both align i =
+ ALIGN_FILE (align,
+ (ALIGN_MEM (align, i)))
+ in
+
+ let def_aligned a f i =
+ align_both a
+ (SEQ [| DEF(f, i);
+ (align_both a MARK)|])
+ in
+
+ (* Segments. *)
+ let zero_segment_fixup = new_fixup "__PAGEZERO segment" in
+ let text_segment_fixup = new_fixup "__TEXT segment" in
+ let data_segment_fixup = new_fixup "__DATA segment" in
+ let dwarf_segment_fixup = new_fixup "__DWARF segment" in
+ let linkedit_segment_fixup = new_fixup "__LINKEDIT segment" in
+
+ (* Sections in the text segment. *)
+ let text_section_fixup = new_fixup "__text section" in
+
+ (* Sections in the data segment. *)
+ let data_section_fixup = new_fixup "__data section" in
+ let const_section_fixup = new_fixup "__const section" in
+ let bss_section_fixup = new_fixup "__bss section" in
+ let note_rust_section_fixup = new_fixup "__note.rust section" in
+ let nl_symbol_ptr_section_fixup = new_fixup "__nl_symbol_ptr section" in
+
+ let data_section = def_aligned data_sect_align data_section_fixup data in
+ let const_section =
+ def_aligned data_sect_align const_section_fixup (SEQ [| |])
+ in
+ let bss_section =
+ def_aligned data_sect_align bss_section_fixup (SEQ [| |])
+ in
+ let note_rust_section =
+ def_aligned
+ data_sect_align note_rust_section_fixup
+ (Asm.note_rust_frags crate.node.Ast.crate_meta)
+ in
+
+ (* Officially Apple doesn't claim to support DWARF sections like this, but
+ they work. *)
+ let debug_info_section =
+ def_aligned data_sect_align
+ sem.Semant.ctxt_debug_info_fixup
+ dwarf.Dwarf.debug_info
+ in
+ let debug_abbrev_section =
+ def_aligned data_sect_align
+ sem.Semant.ctxt_debug_abbrev_fixup
+ dwarf.Dwarf.debug_abbrev
+ in
+
+
+ (* String, symbol and parallel "nonlazy-pointer" tables. *)
+ let symtab_fixup = new_fixup "symtab" in
+ let strtab_fixup = new_fixup "strtab" in
+
+ let symbol_nlist_entry
+ (sect_index:int)
+ (nty:n_type list)
+ (nd:n_desc)
+ (nv:Asm.expr64)
+ : (frag * fixup) =
+ let strtab_entry_fixup = new_fixup "strtab entry" in
+ (SEQ
+ [|
+ WORD (TY_u32, SUB ((F_POS strtab_entry_fixup),
+ (F_POS strtab_fixup)));
+ BYTE (Int64.to_int (fold_flags n_type_code nty));
+ BYTE sect_index;
+ WORD (TY_u16, IMM (n_desc_code nd));
+ WORD (TY_u32, nv);
+ |], strtab_entry_fixup)
+ in
+
+ let sect_symbol_nlist_entry
+ (seg:segment)
+ (fixup_to_use:fixup)
+ : (frag * fixup) =
+ let nty = [ N_SECT; N_EXT ] in
+ let nd = (0, [], REFERENCE_FLAG_UNDEFINED_NON_LAZY) in
+ let (sect_index, _(*seg_fix*)) =
+ match seg with
+ SEG_text -> (1, text_segment_fixup)
+ | SEG_data -> (2, data_segment_fixup)
+ in
+ symbol_nlist_entry sect_index nty nd (M_POS fixup_to_use)
+ in
+
+ let sect_private_symbol_nlist_entry
+ (seg:segment)
+ (fixup_to_use:fixup)
+ : (frag * fixup) =
+ let nty = [ N_SECT; ] in
+ let nd = (0, [], REFERENCE_FLAG_UNDEFINED_NON_LAZY) in
+ let (sect_index, _(*seg_fix*)) =
+ match seg with
+ SEG_text -> (1, text_segment_fixup)
+ | SEG_data -> (2, data_segment_fixup)
+ in
+ symbol_nlist_entry sect_index nty nd (M_POS fixup_to_use)
+ in
+
+ let indirect_symbol_nlist_entry (dylib_index:int) : (frag * fixup) =
+ let nty = [ N_UNDF; N_EXT ] in
+ let nd = (dylib_index, [], REFERENCE_FLAG_UNDEFINED_NON_LAZY) in
+ symbol_nlist_entry 0 nty nd (IMM 0L)
+ in
+
+ let indirect_symbols =
+ Array.of_list
+ (List.concat
+ (List.map
+ (fun (lib, tab) ->
+ (List.map
+ (fun (name,fix) -> (lib,name,fix))
+ (htab_pairs tab)))
+ (htab_pairs sem.Semant.ctxt_native_required)))
+ in
+
+ let dylib_index (lib:required_lib) : int =
+ match lib with
+ REQUIRED_LIB_rustrt -> 1
+ | REQUIRED_LIB_crt -> 2
+ | _ -> bug () "Macho.dylib_index on nonstandard required lib."
+ in
+
+ (* Make undef symbols for native imports. *)
+ let (undefined_symbols:(string * (frag * fixup)) array) =
+ Array.map (fun (lib,name,_) ->
+ ("_" ^ name,
+ indirect_symbol_nlist_entry (dylib_index lib)))
+ indirect_symbols
+ in
+
+ (* Make symbols for exports. *)
+ let (export_symbols:(string * (frag * fixup)) array) =
+ let export_symbols_of_seg (seg, tab) =
+ List.map
+ begin
+ fun (name, fix) ->
+ let name = "_" ^ name in
+ let sym = sect_symbol_nlist_entry seg fix in
+ (name, sym)
+ end
+ (htab_pairs tab)
+ in
+ Array.of_list
+ (List.concat
+ (List.map export_symbols_of_seg
+ (htab_pairs sem.Semant.ctxt_native_provided)))
+ in
+
+ (* Make private symbols for items. *)
+ let (local_item_symbols:(string * (frag * fixup)) array) =
+ Array.map (fun code ->
+ let fix = code.Semant.code_fixup in
+ ("_" ^ fix.fixup_name,
+ sect_private_symbol_nlist_entry SEG_text fix))
+ (Array.of_list (htab_vals sem.Semant.ctxt_all_item_code))
+ in
+
+ (* Make private symbols for glue. *)
+ let (local_glue_symbols:(string * (frag * fixup)) array) =
+ Array.map (fun (g, code) ->
+ let fix = code.Semant.code_fixup in
+ ("_" ^ (Semant.glue_str sem g),
+ sect_private_symbol_nlist_entry SEG_text fix))
+ (Array.of_list (htab_pairs sem.Semant.ctxt_glue_code))
+ in
+
+ let (export_header_symbols:(string * (frag * fixup)) array) =
+ let name =
+ if sess.Session.sess_library_mode
+ then "__mh_dylib_header"
+ else "__mh_execute_header"
+ in
+ [|
+ (name, sect_symbol_nlist_entry SEG_text mh_execute_header_fixup);
+ |]
+ in
+
+ let export_symbols = Array.concat [ export_symbols;
+ export_header_symbols ]
+ in
+
+ let local_symbols = Array.concat [ local_item_symbols;
+ local_glue_symbols ]
+ in
+
+ let symbols = Array.concat [ local_symbols;
+ export_symbols;
+ undefined_symbols ]
+ in
+ let n_local_syms = Array.length local_symbols in
+ let n_export_syms = Array.length export_symbols in
+ let n_undef_syms = Array.length undefined_symbols in
+
+ let indirect_symbols_off = n_local_syms + n_export_syms in
+ let indirect_symtab_fixup = new_fixup "indirect symbol table" in
+ let indirect_symtab =
+ DEF (indirect_symtab_fixup,
+ SEQ (Array.mapi
+ (fun i _ -> WORD (TY_u32,
+ IMM (Int64.of_int
+ (i + indirect_symbols_off))))
+ indirect_symbols))
+ in
+
+ let nl_symbol_ptr_section =
+ def_aligned data_sect_align nl_symbol_ptr_section_fixup
+ (SEQ (Array.map
+ (fun (_, _, fix) ->
+ DEF(fix, WORD(TY_u32, IMM 0L)))
+ indirect_symbols))
+ in
+ let strtab = DEF (strtab_fixup,
+ SEQ (Array.map
+ (fun (name, (_, fix)) -> DEF(fix, ZSTRING name))
+ symbols))
+ in
+ let symtab = DEF (symtab_fixup,
+ SEQ (Array.map (fun (_, (frag, _)) -> frag) symbols))
+ in
+
+
+ let load_commands =
+ [|
+ macho_segment_command "__PAGEZERO" zero_segment_fixup
+ [] [] [||];
+
+ macho_segment_command "__TEXT" text_segment_fixup
+ [VM_PROT_READ; VM_PROT_EXECUTE]
+ [VM_PROT_READ; VM_PROT_EXECUTE]
+ [|
+ ("__text", text_sect_align_log2, [], S_REGULAR, text_section_fixup)
+ |];
+
+ macho_segment_command "__DATA" data_segment_fixup
+ [VM_PROT_READ; VM_PROT_WRITE]
+ [VM_PROT_READ; VM_PROT_WRITE]
+ [|
+ ("__data", data_sect_align_log2, [],
+ S_REGULAR, data_section_fixup);
+ ("__const", data_sect_align_log2, [],
+ S_REGULAR, const_section_fixup);
+ ("__bss", data_sect_align_log2, [],
+ S_REGULAR, bss_section_fixup);
+ ("__note.rust", data_sect_align_log2, [],
+ S_REGULAR, note_rust_section_fixup);
+ ("__nl_symbol_ptr", data_sect_align_log2, [],
+ S_NON_LAZY_SYMBOL_POINTERS, nl_symbol_ptr_section_fixup)
+ |];
+
+ macho_segment_command "__DWARF" dwarf_segment_fixup
+ [VM_PROT_READ]
+ [VM_PROT_READ]
+ [|
+ ("__debug_info", data_sect_align_log2, [],
+ S_REGULAR, sem.Semant.ctxt_debug_info_fixup);
+ ("__debug_abbrev", data_sect_align_log2, [],
+ S_REGULAR, sem.Semant.ctxt_debug_abbrev_fixup);
+ |];
+
+ macho_segment_command "__LINKEDIT" linkedit_segment_fixup
+ [VM_PROT_READ]
+ [VM_PROT_READ]
+ [|
+ |];
+
+ macho_symtab_command
+ symtab_fixup (Int64.of_int (Array.length symbols)) strtab_fixup;
+
+
+ macho_dysymtab_command
+ 0L
+ (Int64.of_int n_local_syms)
+ (Int64.of_int n_local_syms)
+ (Int64.of_int n_export_syms)
+ (Int64.of_int (n_local_syms + n_export_syms))
+ (Int64.of_int n_undef_syms)
+ indirect_symtab_fixup;
+
+ macho_dylinker_command;
+
+ macho_dylib_command "librustrt.dylib";
+
+ macho_dylib_command "/usr/lib/libSystem.B.dylib";
+
+ begin
+ match start_fixup with
+ None -> MARK
+ | Some start_fixup ->
+ macho_thread_command start_fixup
+ end;
+ |]
+ in
+
+ let header_and_commands =
+ macho_header_32
+ CPU_TYPE_X86
+ CPU_SUBTYPE_X86_ALL
+ (if sess.Session.sess_library_mode then MH_DYLIB else MH_EXECUTE)
+ [ MH_BINDATLOAD; MH_DYLDLINK; MH_TWOLEVEL ]
+ load_commands
+ in
+
+ let objfile_start e start_fixup rust_start_fixup main_fn_fixup =
+ let edx = X86.h X86.edx in
+ let edx_pointee =
+ Il.Mem ((Il.RegIn (edx, None)), Il.ScalarTy (Il.AddrTy Il.OpaqueTy))
+ in
+ Il.emit_full e (Some start_fixup) [] Il.Dead;
+
+ (* zero marks the bottom of the frame chain. *)
+ Il.emit e (Il.Push (X86.imm (Asm.IMM 0L)));
+ Il.emit e (Il.umov (X86.rc X86.ebp) (X86.ro X86.esp));
+
+ (* 16-byte align stack for SSE. *)
+ Il.emit e (Il.binary Il.AND (X86.rc X86.esp) (X86.ro X86.esp)
+ (X86.imm (Asm.IMM 0xfffffffffffffff0L)));
+
+ (* Store argv. *)
+ Abi.load_fixup_addr e edx nxargv_fixup Il.OpaqueTy;
+ Il.emit e (Il.lea (X86.rc X86.ecx)
+ (Il.Cell (Il.Mem ((Il.RegIn (Il.Hreg X86.ebp,
+ Some (X86.word_off_n 2))),
+ Il.OpaqueTy))));
+ Il.emit e (Il.umov edx_pointee (X86.ro X86.ecx));
+ Il.emit e (Il.Push (X86.ro X86.ecx));
+
+ (* Store argc. *)
+ Abi.load_fixup_addr e edx nxargc_fixup Il.OpaqueTy;
+ Il.emit e (Il.umov (X86.rc X86.eax)
+ (X86.c (X86.word_n (Il.Hreg X86.ebp) 1)));
+ Il.emit e (Il.umov edx_pointee (X86.ro X86.eax));
+ Il.emit e (Il.Push (X86.ro X86.eax));
+
+ (* Calculte and store envp. *)
+ Il.emit e (Il.binary Il.ADD
+ (X86.rc X86.eax) (X86.ro X86.eax)
+ (X86.imm (Asm.IMM 1L)));
+ Il.emit e (Il.binary Il.UMUL
+ (X86.rc X86.eax) (X86.ro X86.eax)
+ (X86.imm (Asm.IMM X86.word_sz)));
+ Il.emit e (Il.binary Il.ADD (X86.rc X86.eax)
+ (X86.ro X86.eax) (X86.ro X86.ecx));
+ Abi.load_fixup_addr e edx environ_fixup Il.OpaqueTy;
+ Il.emit e (Il.umov edx_pointee (X86.ro X86.eax));
+
+ (* Push 16 bytes to preserve SSE alignment. *)
+ Abi.load_fixup_addr e edx sem.Semant.ctxt_crate_fixup Il.OpaqueTy;
+ Il.emit e (Il.Push (X86.ro X86.edx));
+ Abi.load_fixup_addr e edx main_fn_fixup Il.OpaqueTy;
+ Il.emit e (Il.Push (X86.ro X86.edx));
+ let fptr = Abi.load_fixup_codeptr e edx rust_start_fixup true true in
+ Il.emit e (Il.call (X86.rc X86.eax) fptr);
+ Il.emit e (Il.Pop (X86.rc X86.ecx));
+ Il.emit e (Il.Push (X86.ro X86.eax));
+ let fptr = Abi.load_fixup_codeptr e edx exit_fixup true true in
+ Il.emit e (Il.call (X86.rc X86.eax) fptr);
+ Il.emit e (Il.Pop (X86.rc X86.ecx));
+ Il.emit e (Il.Pop (X86.rc X86.ecx));
+ Il.emit e (Il.Pop (X86.rc X86.ecx));
+ Il.emit e (Il.Pop (X86.rc X86.ecx));
+
+ Il.emit e Il.Ret;
+ in
+
+ let text_section =
+ let start_code =
+ match (start_fixup, rust_start_fixup,
+ sem.Semant.ctxt_main_fn_fixup) with
+ (None, _, _)
+ | (_, None, _)
+ | (_, _, None) -> MARK
+ | (Some start_fixup,
+ Some rust_start_fixup,
+ Some main_fn_fixup) ->
+ let e = X86.new_emitter_without_vregs () in
+ objfile_start e start_fixup rust_start_fixup main_fn_fixup;
+ X86.frags_of_emitted_quads sess e
+ in
+ def_aligned text_sect_align text_section_fixup
+ (SEQ [|
+ start_code;
+ code
+ |])
+ in
+
+ let text_segment =
+ def_aligned seg_align text_segment_fixup
+ (SEQ [|
+ DEF (mh_execute_header_fixup, header_and_commands);
+ text_section;
+ align_both seg_align MARK;
+ |]);
+ in
+
+ let zero_segment = align_both seg_align
+ (SEQ [| MEMPOS 0L; DEF (zero_segment_fixup,
+ SEQ [| MEMPOS 0x1000L; MARK |] ) |])
+ in
+
+ let data_segment = def_aligned seg_align data_segment_fixup
+ (SEQ [|
+ DEF(nxargc_fixup, WORD (TY_u32, IMM 0L));
+ DEF(nxargv_fixup, WORD (TY_u32, IMM 0L));
+ DEF(environ_fixup, WORD (TY_u32, IMM 0L));
+ DEF(progname_fixup, WORD (TY_u32, IMM 0L));
+ data_section;
+ const_section;
+ bss_section;
+ note_rust_section;
+ nl_symbol_ptr_section
+ |])
+ in
+
+ let dwarf_segment = def_aligned seg_align dwarf_segment_fixup
+ (SEQ [|
+ debug_info_section;
+ debug_abbrev_section;
+ |])
+ in
+
+ let linkedit_segment = def_aligned seg_align linkedit_segment_fixup
+ (SEQ [|
+ symtab;
+ strtab;
+ indirect_symtab;
+ |])
+ in
+
+ let segments =
+ SEQ [|
+ DEF (sem.Semant.ctxt_image_base_fixup, MARK);
+ zero_segment;
+ text_segment;
+ data_segment;
+ dwarf_segment;
+ linkedit_segment;
+ |]
+ in
+ write_out_frag sess true segments
+;;
+
+
+let sniff
+ (sess:Session.sess)
+ (filename:filename)
+ : asm_reader option =
+ try
+ let stat = Unix.stat filename in
+ if (stat.Unix.st_kind = Unix.S_REG) &&
+ (stat.Unix.st_size > 4)
+ then
+ let ar = new_asm_reader sess filename in
+ let _ = log sess "sniffing Mach-O file" in
+ if (ar.asm_get_u32()) = (Int64.to_int mh_magic)
+ then (ar.asm_seek 0; Some ar)
+ else None
+ else
+ None
+ with
+ _ -> None
+;;
+
+let get_sections
+ (sess:Session.sess)
+ (ar:asm_reader)
+ : (string,(int*int)) Hashtbl.t =
+ let sects = Hashtbl.create 0 in
+ let _ = log sess "reading sections" in
+ let magic = ar.asm_get_u32() in
+ let _ = assert (magic = (Int64.to_int mh_magic)) in
+ let _ = ar.asm_adv_u32() in (* cpu type *)
+ let _ = ar.asm_adv_u32() in (* cpu subtype *)
+ let _ = ar.asm_adv_u32() in (* file type *)
+ let n_load_cmds = ar.asm_get_u32() in
+ let _ = ar.asm_adv_u32() in
+ let _ = log sess "Mach-o file with %d load commands" n_load_cmds in
+ let _ = ar.asm_adv_u32() in (* flags *)
+ let lc_seg = Int64.to_int (load_command_code LC_SEGMENT) in
+ for i = 0 to n_load_cmds - 1 do
+ let load_cmd_code = ar.asm_get_u32() in
+ let load_cmd_size = ar.asm_get_u32() in
+ let _ = log sess "load command %d:" i in
+ if load_cmd_code != lc_seg
+ then ar.asm_adv (load_cmd_size - 8)
+ else
+ begin
+ let seg_name = ar.asm_get_zstr_padded 16 in
+ let _ = log sess "LC_SEGMENT %s" seg_name in
+ let _ = ar.asm_adv_u32() in (* seg mem pos *)
+ let _ = ar.asm_adv_u32() in (* seg mem sz *)
+ let _ = ar.asm_adv_u32() in (* seg file pos *)
+ let _ = ar.asm_adv_u32() in (* seg file sz *)
+ let _ = ar.asm_adv_u32() in (* maxprot *)
+ let _ = ar.asm_adv_u32() in (* initprot *)
+ let n_sects = ar.asm_get_u32() in
+ let _ = ar.asm_get_u32() in (* flags *)
+ let _ = log sess "%d sections" in
+ for j = 0 to n_sects - 1 do
+ let sect_name = ar.asm_get_zstr_padded 16 in
+ let _ = ar.asm_adv 16 in (* seg name *)
+ let _ = ar.asm_adv_u32() in (* sect mem pos *)
+ let m_sz = ar.asm_get_u32() in
+ let f_pos = ar.asm_get_u32() in
+ let _ = ar.asm_adv_u32() in (* sect align *)
+ let _ = ar.asm_adv_u32() in (* reloff *)
+ let _ = ar.asm_adv_u32() in (* nreloc *)
+ let _ = ar.asm_adv_u32() in (* flags *)
+ let _ = ar.asm_adv_u32() in (* reserved1 *)
+ let _ = ar.asm_adv_u32() in (* reserved2 *)
+ let _ =
+ log sess
+ " section %d: 0x%x - 0x%x %s "
+ j f_pos (f_pos + m_sz) sect_name
+ in
+ let len = String.length sect_name in
+ let sect_name =
+ if (len > 2
+ && sect_name.[0] = '_'
+ && sect_name.[1] = '_')
+ then "." ^ (String.sub sect_name 2 (len-2))
+ else sect_name
+ in
+ Hashtbl.add sects sect_name (f_pos, m_sz)
+ done
+ end
+ done;
+ sects
+;;
+
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)
diff --git a/src/boot/be/pe.ml b/src/boot/be/pe.ml
new file mode 100644
index 00000000..d360ddf5
--- /dev/null
+++ b/src/boot/be/pe.ml
@@ -0,0 +1,1149 @@
+(*
+
+ Module for writing Microsoft PE files
+
+ Every image has a base address it's to be loaded at.
+
+ "file pointer" = offset in file
+
+ "VA" = address at runtime
+
+ "RVA" = VA - base address
+
+ If you write a non-RVA absolute address at any point you must put it
+ in a rebasing list so the loader can adjust it when/if it has to load
+ you at a different address.
+
+ Almost all addresses in the file are RVAs. Worry about the VAs.
+
+*)
+
+open Asm;;
+open Common;;
+
+let log (sess:Session.sess) =
+ Session.log "obj (pe)"
+ sess.Session.sess_log_obj
+ sess.Session.sess_log_out
+;;
+
+let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit =
+ if sess.Session.sess_log_obj
+ then thunk ()
+ else ()
+;;
+
+(*
+
+ The default image base (VA) for an executable on Win32 is 0x400000.
+
+ We use this too. RVAs are relative to this. RVA 0 = VA 0x400000.
+
+ Alignments are also relatively standard and fixed for Win32/PE32:
+ 4k memory pages, 512 byte disk sectors.
+
+ Since this is a stupid emitter, and we're not generating an awful
+ lot of sections, we are not going to differentiate between these
+ two kinds of alignment: we just align our sections to memory pages
+ and sometimes waste most of them. Shucks.
+
+*)
+
+let pe_image_base = 0x400000L;;
+let pe_file_alignment = 0x200;;
+let pe_mem_alignment = 0x1000;;
+
+let rva (f:fixup) = (SUB ((M_POS f), (IMM pe_image_base)));;
+
+let def_file_aligned f i =
+ ALIGN_FILE
+ (pe_file_alignment,
+ SEQ [|
+ DEF(f,
+ SEQ [| i;
+ ALIGN_FILE
+ (pe_file_alignment, MARK) |]) |] )
+;;
+
+let def_mem_aligned f i =
+ ALIGN_MEM
+ (pe_mem_alignment,
+ SEQ [|
+ DEF(f,
+ SEQ [| i;
+ ALIGN_MEM
+ (pe_mem_alignment, MARK) |]) |] )
+;;
+
+let align_both i =
+ ALIGN_FILE (pe_file_alignment,
+ (ALIGN_MEM (pe_mem_alignment, i)))
+;;
+
+let def_aligned f i =
+ align_both
+ (SEQ [| DEF(f,i);
+ (align_both MARK)|])
+;;
+
+
+(*
+
+ At the beginning of a PE file there is an MS-DOS stub, 0x00 - 0x7F,
+ that we just insert literally. It prints "This program must be run
+ under Win32" and exits. Woo!
+
+ Within it, at offset 0x3C, there is an encoded offset of the PE
+ header we actually care about. So 0x3C - 0x3F are 0x00000100 (LE)
+ which say "the PE header is actually at 0x100", a nice sensible spot
+ for it. We pad the next 128 bytes out to 0x100 and start there for
+ real.
+
+ From then on in it's a sensible object file. Here's the MS-DOS bit.
+*)
+
+let pe_msdos_header_and_padding
+ : frag =
+ SEQ [|
+ BYTES
+ [|
+ (* 00000000 *)
+ 0x4d; 0x5a; 0x50; 0x00; 0x02; 0x00; 0x00; 0x00;
+ 0x04; 0x00; 0x0f; 0x00; 0xff; 0xff; 0x00; 0x00;
+
+ (* 00000010 *)
+ 0xb8; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
+ 0x40; 0x00; 0x1a; 0x00; 0x00; 0x00; 0x00; 0x00;
+
+ (* 00000020 *)
+ 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
+ 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
+
+ (* 00000030 *)
+ 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
+ 0x00; 0x00; 0x00; 0x00; 0x00; 0x01; 0x00; 0x00;
+ (* ^^^^PE HDR offset^^^^^ *)
+
+ (* 00000040 *)
+ 0xba; 0x10; 0x00; 0x0e; 0x1f; 0xb4; 0x09; 0xcd;
+ 0x21; 0xb8; 0x01; 0x4c; 0xcd; 0x21; 0x90; 0x90;
+
+ (* 00000050 *)
+ 0x54; 0x68; 0x69; 0x73; 0x20; 0x70; 0x72; 0x6f; (* "This pro" *)
+ 0x67; 0x72; 0x61; 0x6d; 0x20; 0x6d; 0x75; 0x73; (* "gram mus" *)
+
+ (* 00000060 *)
+ 0x74; 0x20; 0x62; 0x65; 0x20; 0x72; 0x75; 0x6e; (* "t be run" *)
+ 0x20; 0x75; 0x6e; 0x64; 0x65; 0x72; 0x20; 0x57; (* " under W" *)
+
+ (* 00000070 *)
+ 0x69; 0x6e; 0x33; 0x32; 0x0d; 0x0a; 0x24; 0x37; (* "in32\r\n" *)
+ 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
+ |];
+ PAD 0x80
+ |]
+;;
+
+(*
+ A work of art, is it not? Take a moment to appreciate the madness.
+
+ All done? Ok, now on to the PE header proper.
+
+ PE headers are just COFF headers with a little preamble.
+*)
+
+type pe_machine =
+ (* Maybe support more later. *)
+ IMAGE_FILE_MACHINE_AMD64
+ | IMAGE_FILE_MACHINE_I386
+;;
+
+
+let pe_timestamp _ =
+ Int64.of_float (Unix.gettimeofday())
+;;
+
+
+type pe_characteristics =
+ (* Maybe support more later. *)
+ IMAGE_FILE_RELOCS_STRIPPED
+ | IMAGE_FILE_EXECUTABLE_IMAGE
+ | IMAGE_FILE_LINE_NUMS_STRIPPED
+ | IMAGE_FILE_LOCAL_SYMS_STRIPPED
+ | IMAGE_FILE_32BIT_MACHINE
+ | IMAGE_FILE_DEBUG_STRIPPED
+ | IMAGE_FILE_DLL
+;;
+
+
+let pe_header
+ ~(machine:pe_machine)
+ ~(symbol_table_fixup:fixup)
+ ~(number_of_sections:int64)
+ ~(number_of_symbols:int64)
+ ~(loader_hdr_fixup:fixup)
+ ~(characteristics:pe_characteristics list)
+ : frag =
+ ALIGN_FILE
+ (8,
+ SEQ [|
+ STRING "PE\x00\x00";
+ WORD (TY_u16, (IMM (match machine with
+ IMAGE_FILE_MACHINE_AMD64 -> 0x8664L
+ | IMAGE_FILE_MACHINE_I386 -> 0x014cL)));
+ WORD (TY_u16, (IMM number_of_sections));
+ WORD (TY_u32, (IMM (pe_timestamp())));
+ WORD (TY_u32, (F_POS symbol_table_fixup));
+ WORD (TY_u32, (IMM number_of_symbols));
+ WORD (TY_u16, (F_SZ loader_hdr_fixup));
+ WORD (TY_u16, (IMM (fold_flags
+ (fun c -> match c with
+ IMAGE_FILE_RELOCS_STRIPPED -> 0x1L
+ | IMAGE_FILE_EXECUTABLE_IMAGE -> 0x2L
+ | IMAGE_FILE_LINE_NUMS_STRIPPED -> 0x4L
+ | IMAGE_FILE_LOCAL_SYMS_STRIPPED -> 0x8L
+ | IMAGE_FILE_32BIT_MACHINE -> 0x100L
+ | IMAGE_FILE_DEBUG_STRIPPED -> 0x200L
+ | IMAGE_FILE_DLL -> 0x2000L)
+ characteristics)))
+ |])
+;;
+
+(*
+
+ After the PE header comes an "optional" header for the loader. In
+ our case this is hardly optional since we are producing a file for
+ the loader.
+
+*)
+
+type pe_subsystem =
+ (* Maybe support more later. *)
+ IMAGE_SUBSYSTEM_WINDOWS_GUI
+ | IMAGE_SUBSYSTEM_WINDOWS_CUI
+;;
+
+let zero32 = WORD (TY_u32, (IMM 0L))
+;;
+
+let pe_loader_header
+ ~(text_fixup:fixup)
+ ~(init_data_fixup:fixup)
+ ~(size_of_uninit_data:int64)
+ ~(entry_point_fixup:fixup option)
+ ~(image_fixup:fixup)
+ ~(all_hdrs_fixup:fixup)
+ ~(subsys:pe_subsystem)
+ ~(loader_hdr_fixup:fixup)
+ ~(import_dir_fixup:fixup)
+ ~(export_dir_fixup:fixup)
+ : frag =
+ DEF
+ (loader_hdr_fixup,
+ SEQ [|
+ WORD (TY_u16, (IMM 0x10bL)); (* COFF magic tag for PE32. *)
+ (* Snagged *)
+ WORD (TY_u8, (IMM 0x2L)); (* Linker major version. *)
+ WORD (TY_u8, (IMM 0x38L)); (* Linker minor version. *)
+
+ WORD (TY_u32, (F_SZ text_fixup)); (* "size of code" *)
+ WORD (TY_u32, (* "size of all init data" *)
+ (F_SZ init_data_fixup));
+ WORD (TY_u32,
+ (IMM size_of_uninit_data));
+
+ begin
+ match entry_point_fixup with
+ None -> zero32 (* Library mode: DLLMain *)
+ | Some entry_point_fixup ->
+ WORD (TY_u32,
+ (rva
+ entry_point_fixup)) (* "address of entry point" *)
+ end;
+
+ WORD (TY_u32, (rva text_fixup)); (* "base of code" *)
+ WORD (TY_u32, (rva init_data_fixup)); (* "base of data" *)
+ WORD (TY_u32, (IMM pe_image_base));
+ WORD (TY_u32, (IMM (Int64.of_int
+ pe_mem_alignment)));
+ WORD (TY_u32, (IMM (Int64.of_int
+ pe_file_alignment)));
+
+ WORD (TY_u16, (IMM 4L)); (* Major OS version: NT4. *)
+ WORD (TY_u16, (IMM 0L)); (* Minor OS version. *)
+ WORD (TY_u16, (IMM 1L)); (* Major image version. *)
+ WORD (TY_u16, (IMM 0L)); (* Minor image version. *)
+ WORD (TY_u16, (IMM 4L)); (* Major subsystem version. *)
+ WORD (TY_u16, (IMM 0L)); (* Minor subsystem version. *)
+
+ zero32; (* Reserved. *)
+
+ WORD (TY_u32, (M_SZ image_fixup));
+ WORD (TY_u32, (M_SZ all_hdrs_fixup));
+
+ zero32; (* Checksum, but OK if zero. *)
+ WORD (TY_u16, (IMM (match subsys with
+ IMAGE_SUBSYSTEM_WINDOWS_GUI -> 2L
+ | IMAGE_SUBSYSTEM_WINDOWS_CUI -> 3L)));
+
+ WORD (TY_u16, (IMM 0L)); (* DLL characteristics. *)
+
+ WORD (TY_u32, (IMM 0x100000L)); (* Size of stack reserve. *)
+ WORD (TY_u32, (IMM 0x4000L)); (* Size of stack commit. *)
+
+ WORD (TY_u32, (IMM 0x100000L)); (* Size of heap reserve. *)
+ WORD (TY_u32, (IMM 0x1000L)); (* Size of heap commit. *)
+
+ zero32; (* Reserved. *)
+ WORD (TY_u32, (IMM 16L)); (* Number of dir references. *)
+
+ (* Begin directories, variable part of hdr. *)
+
+ (*
+
+ Standard PE files have ~10 directories referenced from
+ here. We only fill in two of them -- the export/import
+ directories -- because we don't care about the others. We
+ leave the rest as zero in case someone is looking for
+ them. This may be superfluous or wrong.
+
+ *)
+
+
+ WORD (TY_u32, (rva export_dir_fixup));
+ WORD (TY_u32, (M_SZ export_dir_fixup));
+
+ WORD (TY_u32, (rva import_dir_fixup));
+ WORD (TY_u32, (M_SZ import_dir_fixup));
+
+ zero32; zero32; (* Resource dir. *)
+ zero32; zero32; (* Exception dir. *)
+ zero32; zero32; (* Security dir. *)
+ zero32; zero32; (* Base reloc dir. *)
+ zero32; zero32; (* Debug dir. *)
+ zero32; zero32; (* Image desc dir. *)
+ zero32; zero32; (* Mach spec dir. *)
+ zero32; zero32; (* TLS dir. *)
+
+ zero32; zero32; (* Load config. *)
+ zero32; zero32; (* Bound import. *)
+ zero32; zero32; (* IAT *)
+ zero32; zero32; (* Delay import. *)
+ zero32; zero32; (* COM descriptor *)
+ zero32; zero32; (* ???????? *)
+ |])
+
+;;
+
+
+type pe_section_id =
+ (* Maybe support more later. *)
+ SECTION_ID_TEXT
+ | SECTION_ID_DATA
+ | SECTION_ID_RDATA
+ | SECTION_ID_BSS
+ | SECTION_ID_IMPORTS
+ | SECTION_ID_EXPORTS
+ | SECTION_ID_DEBUG_ARANGES
+ | SECTION_ID_DEBUG_PUBNAMES
+ | SECTION_ID_DEBUG_INFO
+ | SECTION_ID_DEBUG_ABBREV
+ | SECTION_ID_DEBUG_LINE
+ | SECTION_ID_DEBUG_FRAME
+ | SECTION_ID_NOTE_RUST
+;;
+
+type pe_section_characteristics =
+ (* Maybe support more later. *)
+ IMAGE_SCN_CNT_CODE
+ | IMAGE_SCN_CNT_INITIALIZED_DATA
+ | IMAGE_SCN_CNT_UNINITIALIZED_DATA
+ | IMAGE_SCN_MEM_DISCARDABLE
+ | IMAGE_SCN_MEM_SHARED
+ | IMAGE_SCN_MEM_EXECUTE
+ | IMAGE_SCN_MEM_READ
+ | IMAGE_SCN_MEM_WRITE
+
+let pe_section_header
+ ~(id:pe_section_id)
+ ~(hdr_fixup:fixup)
+ : frag =
+ let
+ characteristics =
+ match id with
+ SECTION_ID_TEXT -> [ IMAGE_SCN_CNT_CODE;
+ IMAGE_SCN_MEM_READ;
+ IMAGE_SCN_MEM_EXECUTE ]
+ | SECTION_ID_DATA -> [ IMAGE_SCN_CNT_INITIALIZED_DATA;
+ IMAGE_SCN_MEM_READ;
+ IMAGE_SCN_MEM_WRITE ]
+ | SECTION_ID_BSS -> [ IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ IMAGE_SCN_MEM_READ;
+ IMAGE_SCN_MEM_WRITE ]
+ | SECTION_ID_IMPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA;
+ IMAGE_SCN_MEM_READ;
+ IMAGE_SCN_MEM_WRITE ]
+ | SECTION_ID_EXPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA;
+ IMAGE_SCN_MEM_READ ]
+ | SECTION_ID_RDATA
+ | SECTION_ID_DEBUG_ARANGES
+ | SECTION_ID_DEBUG_PUBNAMES
+ | SECTION_ID_DEBUG_INFO
+ | SECTION_ID_DEBUG_ABBREV
+ | SECTION_ID_DEBUG_LINE
+ | SECTION_ID_DEBUG_FRAME
+ | SECTION_ID_NOTE_RUST -> [ IMAGE_SCN_CNT_INITIALIZED_DATA;
+ IMAGE_SCN_MEM_READ ]
+ in
+ SEQ [|
+ STRING
+ begin
+ match id with
+ SECTION_ID_TEXT -> ".text\x00\x00\x00"
+ | SECTION_ID_DATA -> ".data\x00\x00\x00"
+ | SECTION_ID_RDATA -> ".rdata\x00\x00"
+ | SECTION_ID_BSS -> ".bss\x00\x00\x00\x00"
+ | SECTION_ID_IMPORTS -> ".idata\x00\x00"
+ | SECTION_ID_EXPORTS -> ".edata\x00\x00"
+
+ (* There is a bizarre Microsoft COFF extension to account
+ * for longer-than-8-char section names: you emit a single
+ * '/' character then the ASCII-numeric encoding of the
+ * offset within the file's string table of the full name.
+ * So we put all our extended section names at the
+ * beginning of the string table in a very specific order
+ * and hard-wire the offsets as "names" here. You could
+ * theoretically extend this to a "new kind" of fixup
+ * reference (ASCII_POS or such), if you feel this is
+ * something you want to twiddle with.
+ *)
+
+ | SECTION_ID_DEBUG_ARANGES -> "/4\x00\x00\x00\x00\x00\x00"
+ | SECTION_ID_DEBUG_PUBNAMES -> "/19\x00\x00\x00\x00\x00"
+ | SECTION_ID_DEBUG_INFO -> "/35\x00\x00\x00\x00\x00"
+ | SECTION_ID_DEBUG_ABBREV -> "/47\x00\x00\x00\x00\x00"
+ | SECTION_ID_DEBUG_LINE -> "/61\x00\x00\x00\x00\x00"
+ | SECTION_ID_DEBUG_FRAME -> "/73\x00\x00\x00\x00\x00"
+ | SECTION_ID_NOTE_RUST -> "/86\x00\x00\x00\x00\x00"
+ end;
+
+ (* The next two pairs are only supposed to be different if the
+ file and section alignments differ. This is a stupid emitter
+ so they're not, no problem. *)
+
+ WORD (TY_u32, (M_SZ hdr_fixup)); (* "Virtual size" *)
+ WORD (TY_u32, (rva hdr_fixup)); (* "Virtual address" *)
+
+ WORD (TY_u32, (F_SZ hdr_fixup)); (* "Size of raw data" *)
+ WORD (TY_u32, (F_POS hdr_fixup)); (* "Pointer to raw data" *)
+
+ zero32; (* Reserved. *)
+ zero32; (* Reserved. *)
+ zero32; (* Reserved. *)
+
+ WORD (TY_u32, (IMM (fold_flags
+ (fun c -> match c with
+ IMAGE_SCN_CNT_CODE -> 0x20L
+ | IMAGE_SCN_CNT_INITIALIZED_DATA -> 0x40L
+ | IMAGE_SCN_CNT_UNINITIALIZED_DATA -> 0x80L
+ | IMAGE_SCN_MEM_DISCARDABLE -> 0x2000000L
+ | IMAGE_SCN_MEM_SHARED -> 0x10000000L
+ | IMAGE_SCN_MEM_EXECUTE -> 0x20000000L
+ | IMAGE_SCN_MEM_READ -> 0x40000000L
+ | IMAGE_SCN_MEM_WRITE -> 0x80000000L)
+ characteristics)))
+ |]
+;;
+
+
+(*
+
+ "Thunk" is a misnomer here; the thunk RVA is the address of a word
+ that the loader will store an address into. The stored address is
+ the address of the imported object.
+
+ So if the imported object is X, and the thunk slot is Y, the loader
+ is doing "Y = &X" and returning &Y as the thunk RVA. To load datum X
+ after the imports are resolved, given the thunk RVA R, you load
+ **R.
+
+*)
+
+type pe_import =
+ {
+ pe_import_name_fixup: fixup;
+ pe_import_name: string;
+ pe_import_address_fixup: fixup;
+ }
+
+type pe_import_dll_entry =
+ {
+ pe_import_dll_name_fixup: fixup;
+ pe_import_dll_name: string;
+ pe_import_dll_ILT_fixup: fixup;
+ pe_import_dll_IAT_fixup: fixup;
+ pe_import_dll_imports: pe_import array;
+ }
+
+ (*
+
+ The import section .idata has a mostly self-contained table
+ structure. You feed it a list of DLL entries, each of which names
+ a DLL and lists symbols in the DLL to import.
+
+ For each named symbol, a 4-byte slot will be reserved in an
+ "import lookup table" (ILT, also in this section). The slot is
+ a pointer to a string in this section giving the name.
+
+ Immediately *after* the ILT, there is an "import address table" (IAT),
+ which is initially identical to the ILT. The loader replaces the entries
+ in the IAT slots with the imported pointers at runtime.
+
+ A central directory at the start of the section lists all the the import
+ thunk tables. Each entry in the import directory is 20 bytes (5 words)
+ but only the last 2 are used: the second last is a pointer to the string
+ name of the DLL in question (string also in this section) and the last is
+ a pointer to the import thunk table itself (also in this section).
+
+ Curiously, of the 5 documents I've consulted on the nature of the
+ first 3 fields, I find a variety of interpretations.
+
+ *)
+
+let pe_import_section
+ ~(import_dir_fixup:fixup)
+ ~(dlls:pe_import_dll_entry array)
+ : frag =
+
+ let form_dir_entry
+ (entry:pe_import_dll_entry)
+ : frag =
+ SEQ [|
+ (* Note: documented opinions vary greatly about whether the
+ first, last, or both of the slots in one of these rows points
+ to the RVA of the name/hint used to look the import up. This
+ table format is a mess! *)
+ WORD (TY_u32,
+ (rva
+ entry.pe_import_dll_ILT_fixup)); (* Import lookup table. *)
+ WORD (TY_u32, (IMM 0L)); (* Timestamp, unused. *)
+ WORD (TY_u32, (IMM 0x0L)); (* Forwarder chain, unused. *)
+ WORD (TY_u32, (rva entry.pe_import_dll_name_fixup));
+ WORD (TY_u32,
+ (rva
+ entry.pe_import_dll_IAT_fixup)); (* Import address table.*)
+ |]
+ in
+
+ let form_ILT_slot
+ (import:pe_import)
+ : frag =
+ (WORD (TY_u32, (rva import.pe_import_name_fixup)))
+ in
+
+ let form_IAT_slot
+ (import:pe_import)
+ : frag =
+ (DEF (import.pe_import_address_fixup,
+ (WORD (TY_u32, (rva import.pe_import_name_fixup)))))
+ in
+
+ let form_tables_for_dll
+ (dll:pe_import_dll_entry)
+ : frag =
+ let terminator = WORD (TY_u32, (IMM 0L)) in
+ let ilt =
+ SEQ [|
+ SEQ (Array.map form_ILT_slot dll.pe_import_dll_imports);
+ terminator
+ |]
+ in
+ let iat =
+ SEQ [|
+ SEQ (Array.map form_IAT_slot dll.pe_import_dll_imports);
+ terminator
+ |]
+ in
+ if Array.length dll.pe_import_dll_imports < 1
+ then bug () "Pe.form_tables_for_dll: empty imports"
+ else
+ SEQ [|
+ DEF (dll.pe_import_dll_ILT_fixup, ilt);
+ DEF (dll.pe_import_dll_IAT_fixup, iat)
+ |]
+
+ in
+
+ let form_import_string
+ (import:pe_import)
+ : frag =
+ DEF
+ (import.pe_import_name_fixup,
+ SEQ [|
+ (* import string entries begin with a 2-byte "hint", but we just
+ set it to zero. *)
+ (WORD (TY_u16, (IMM 0L)));
+ ZSTRING import.pe_import_name;
+ (if String.length import.pe_import_name mod 2 == 0
+ then PAD 1
+ else PAD 0)
+ |])
+ in
+
+ let form_dir_entry_string
+ (dll:pe_import_dll_entry)
+ : frag =
+ DEF
+ (dll.pe_import_dll_name_fixup,
+ SEQ [| ZSTRING dll.pe_import_dll_name;
+ (if String.length dll.pe_import_dll_name mod 2 == 0
+ then PAD 1
+ else PAD 0);
+ SEQ (Array.map form_import_string dll.pe_import_dll_imports) |])
+ in
+
+ let dir = SEQ (Array.map form_dir_entry dlls) in
+ let dir_terminator = PAD 20 in
+ let tables = SEQ (Array.map form_tables_for_dll dlls) in
+ let strings = SEQ (Array.map form_dir_entry_string dlls)
+ in
+ def_aligned
+ import_dir_fixup
+ (SEQ
+ [|
+ dir;
+ dir_terminator;
+ tables;
+ strings
+ |])
+
+;;
+
+type pe_export =
+ {
+ pe_export_name_fixup: fixup;
+ pe_export_name: string;
+ pe_export_address_fixup: fixup;
+ }
+;;
+
+let pe_export_section
+ ~(sess:Session.sess)
+ ~(export_dir_fixup:fixup)
+ ~(exports:pe_export array)
+ : frag =
+ Array.sort (fun a b -> compare a.pe_export_name b.pe_export_name) exports;
+ let export_addr_table_fixup = new_fixup "export address table" in
+ let export_addr_table =
+ DEF
+ (export_addr_table_fixup,
+ SEQ
+ (Array.map
+ (fun e -> (WORD (TY_u32, rva e.pe_export_address_fixup)))
+ exports))
+ in
+ let export_name_pointer_table_fixup =
+ new_fixup "export name pointer table"
+ in
+ let export_name_pointer_table =
+ DEF
+ (export_name_pointer_table_fixup,
+ SEQ
+ (Array.map
+ (fun e -> (WORD (TY_u32, rva e.pe_export_name_fixup)))
+ exports))
+ in
+ let export_name_table_fixup = new_fixup "export name table" in
+ let export_name_table =
+ DEF
+ (export_name_table_fixup,
+ SEQ
+ (Array.map
+ (fun e -> (DEF (e.pe_export_name_fixup,
+ (ZSTRING e.pe_export_name))))
+ exports))
+ in
+ let export_ordinal_table_fixup = new_fixup "export ordinal table" in
+ let export_ordinal_table =
+ DEF
+ (export_ordinal_table_fixup,
+ SEQ
+ (Array.mapi
+ (fun i _ -> (WORD (TY_u16, IMM (Int64.of_int (i)))))
+ exports))
+ in
+ let image_name_fixup = new_fixup "image name fixup" in
+ let n_exports = IMM (Int64.of_int (Array.length exports)) in
+ let export_dir_table =
+ SEQ [|
+ WORD (TY_u32, IMM 0L); (* Flags, reserved. *)
+ WORD (TY_u32, IMM 0L); (* Timestamp, unused. *)
+ WORD (TY_u16, IMM 0L); (* Major vers., unused *)
+ WORD (TY_u16, IMM 0L); (* Minor vers., unused *)
+ WORD (TY_u32, rva image_name_fixup); (* Name RVA. *)
+ WORD (TY_u32, IMM 1L); (* Ordinal base = 1. *)
+ WORD (TY_u32, n_exports); (* # entries in EAT. *)
+ WORD (TY_u32, n_exports); (* # entries in ENPT/EOT.*)
+ WORD (TY_u32, rva export_addr_table_fixup); (* EAT *)
+ WORD (TY_u32, rva export_name_pointer_table_fixup); (* ENPT *)
+ WORD (TY_u32, rva export_ordinal_table_fixup); (* EOT *)
+ |]
+ in
+ def_aligned export_dir_fixup
+ (SEQ [|
+ export_dir_table;
+ export_addr_table;
+ export_name_pointer_table;
+ export_ordinal_table;
+ DEF (image_name_fixup,
+ ZSTRING (Session.filename_of sess.Session.sess_out));
+ export_name_table
+ |])
+;;
+
+let pe_text_section
+ ~(sess:Session.sess)
+ ~(sem:Semant.ctxt)
+ ~(start_fixup:fixup option)
+ ~(rust_start_fixup:fixup option)
+ ~(main_fn_fixup:fixup option)
+ ~(text_fixup:fixup)
+ ~(crate_code:frag)
+ : frag =
+ let startup =
+ match (start_fixup, rust_start_fixup, main_fn_fixup) with
+ (None, _, _)
+ | (_, None, _)
+ | (_, _, None) -> MARK
+ | (Some start_fixup,
+ Some rust_start_fixup,
+ Some main_fn_fixup) ->
+ let e = X86.new_emitter_without_vregs () in
+ (*
+ * We are called from the Microsoft C library startup routine,
+ * and assumed to be stdcall; so we have to clean up our own
+ * stack before returning.
+ *)
+ X86.objfile_start e
+ ~start_fixup
+ ~rust_start_fixup
+ ~main_fn_fixup
+ ~crate_fixup: sem.Semant.ctxt_crate_fixup
+ ~indirect_start: true;
+ X86.frags_of_emitted_quads sess e;
+ in
+ def_aligned
+ text_fixup
+ (SEQ [|
+ startup;
+ crate_code
+ |])
+;;
+
+let rustrt_imports sem =
+ let make_imports_for_lib (lib, tab) =
+ {
+ pe_import_dll_name_fixup = new_fixup "dll name";
+ pe_import_dll_name = (match lib with
+ REQUIRED_LIB_rustrt -> "rustrt.dll"
+ | REQUIRED_LIB_crt -> "msvcrt.dll"
+ | REQUIRED_LIB_rust ls
+ | REQUIRED_LIB_c ls -> ls.required_libname);
+ pe_import_dll_ILT_fixup = new_fixup "dll ILT";
+ pe_import_dll_IAT_fixup = new_fixup "dll IAT";
+ pe_import_dll_imports =
+ Array.of_list
+ (List.map
+ begin
+ fun (name, fixup) ->
+ {
+ pe_import_name_fixup = new_fixup "import name";
+ pe_import_name = name;
+ pe_import_address_fixup = fixup;
+ }
+ end
+ (htab_pairs tab))
+ }
+ in
+ Array.of_list
+ (List.map
+ make_imports_for_lib
+ (htab_pairs sem.Semant.ctxt_native_required))
+;;
+
+
+let crate_exports (sem:Semant.ctxt) : pe_export array =
+ let export_sym (name, fixup) =
+ {
+ pe_export_name_fixup = new_fixup "export name fixup";
+ pe_export_name = name;
+ pe_export_address_fixup = fixup;
+ }
+ in
+ let export_seg (_, tab) =
+ Array.of_list (List.map export_sym (htab_pairs tab))
+ in
+ Array.concat
+ (List.map export_seg
+ (htab_pairs sem.Semant.ctxt_native_provided))
+;;
+
+
+let emit_file
+ (sess:Session.sess)
+ (crate:Ast.crate)
+ (code:Asm.frag)
+ (data:Asm.frag)
+ (sem:Semant.ctxt)
+ (dw:Dwarf.debug_records)
+ : unit =
+
+ let all_hdrs_fixup = new_fixup "all headers" in
+ let all_init_data_fixup = new_fixup "all initialized data" in
+ let loader_hdr_fixup = new_fixup "loader header" in
+ let import_dir_fixup = new_fixup "import directory" in
+ let export_dir_fixup = new_fixup "export directory" in
+ let text_fixup = new_fixup "text section" in
+ let bss_fixup = new_fixup "bss section" in
+ let data_fixup = new_fixup "data section" in
+ let image_fixup = new_fixup "image fixup" in
+ let symtab_fixup = new_fixup "symbol table" in
+ let strtab_fixup = new_fixup "string table" in
+ let note_rust_fixup = new_fixup ".note.rust section" in
+
+ let (start_fixup, rust_start_fixup) =
+ if sess.Session.sess_library_mode
+ then (None, None)
+ else
+ (Some (new_fixup "start"),
+ Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start"))
+ in
+
+ let header = (pe_header
+ ~machine: IMAGE_FILE_MACHINE_I386
+ ~symbol_table_fixup: symtab_fixup
+ ~number_of_sections: 8L
+ ~number_of_symbols: 0L
+ ~loader_hdr_fixup: loader_hdr_fixup
+ ~characteristics:([IMAGE_FILE_EXECUTABLE_IMAGE;
+ IMAGE_FILE_LINE_NUMS_STRIPPED;
+ IMAGE_FILE_32BIT_MACHINE;]
+ @
+ (if sess.Session.sess_library_mode
+ then [ IMAGE_FILE_DLL ]
+ else [ ])))
+ in
+ let symtab =
+ (*
+ * We're not actually presenting a "symbol table", but wish to
+ * provide a "string table" which comes immediately *after* the
+ * symbol table. It's a violation of the PE spec to put one of
+ * these in an executable file (as opposed to just loadable) but
+ * it's necessary to communicate the debug section names to GDB,
+ * and nobody else complains.
+ *)
+ (def_aligned
+ symtab_fixup
+ (def_aligned
+ strtab_fixup
+ (SEQ
+ [|
+ WORD (TY_u32, (F_SZ strtab_fixup));
+ ZSTRING ".debug_aranges";
+ ZSTRING ".debug_pubnames";
+ ZSTRING ".debug_info";
+ ZSTRING ".debug_abbrev";
+ ZSTRING ".debug_line";
+ ZSTRING ".debug_frame";
+ ZSTRING ".note.rust";
+ |])))
+ in
+ let loader_header = (pe_loader_header
+ ~text_fixup
+ ~init_data_fixup: all_init_data_fixup
+ ~size_of_uninit_data: 0L
+ ~entry_point_fixup: start_fixup
+ ~image_fixup: image_fixup
+ ~subsys: IMAGE_SUBSYSTEM_WINDOWS_CUI
+ ~all_hdrs_fixup
+ ~loader_hdr_fixup
+ ~import_dir_fixup
+ ~export_dir_fixup)
+ in
+ let text_header = (pe_section_header
+ ~id: SECTION_ID_TEXT
+ ~hdr_fixup: text_fixup)
+
+ in
+ let bss_header = (pe_section_header
+ ~id: SECTION_ID_BSS
+ ~hdr_fixup: bss_fixup)
+ in
+ let import_section = (pe_import_section
+ ~import_dir_fixup
+ ~dlls: (rustrt_imports sem))
+ in
+ let import_header = (pe_section_header
+ ~id: SECTION_ID_IMPORTS
+ ~hdr_fixup: import_dir_fixup)
+ in
+ let export_section = (pe_export_section
+ ~sess
+ ~export_dir_fixup
+ ~exports: (crate_exports sem))
+ in
+ let export_header = (pe_section_header
+ ~id: SECTION_ID_EXPORTS
+ ~hdr_fixup: export_dir_fixup)
+ in
+ let data_header = (pe_section_header
+ ~id: SECTION_ID_DATA
+ ~hdr_fixup: data_fixup)
+ in
+(*
+ let debug_aranges_header =
+ (pe_section_header
+ ~id: SECTION_ID_DEBUG_ARANGES
+ ~hdr_fixup: sem.Semant.ctxt_debug_aranges_fixup)
+ in
+ let debug_pubnames_header =
+ (pe_section_header
+ ~id: SECTION_ID_DEBUG_PUBNAMES
+ ~hdr_fixup: sem.Semant.ctxt_debug_pubnames_fixup)
+ in
+*)
+ let debug_info_header = (pe_section_header
+ ~id: SECTION_ID_DEBUG_INFO
+ ~hdr_fixup: sem.Semant.ctxt_debug_info_fixup)
+ in
+ let debug_abbrev_header = (pe_section_header
+ ~id: SECTION_ID_DEBUG_ABBREV
+ ~hdr_fixup: sem.Semant.ctxt_debug_abbrev_fixup)
+ in
+(*
+ let debug_line_header =
+ (pe_section_header
+ ~id: SECTION_ID_DEBUG_LINE
+ ~hdr_fixup: sem.Semant.ctxt_debug_line_fixup)
+ in
+ let debug_frame_header =
+ (pe_section_header
+ ~id: SECTION_ID_DEBUG_FRAME
+ ~hdr_fixup: sem.Semant.ctxt_debug_frame_fixup)
+ in
+*)
+ let note_rust_header = (pe_section_header
+ ~id: SECTION_ID_NOTE_RUST
+ ~hdr_fixup: note_rust_fixup)
+ in
+ let all_headers = (def_file_aligned
+ all_hdrs_fixup
+ (SEQ
+ [|
+ pe_msdos_header_and_padding;
+ header;
+ loader_header;
+ text_header;
+ bss_header;
+ import_header;
+ export_header;
+ data_header;
+ (*
+ debug_aranges_header;
+ debug_pubnames_header;
+ *)
+ debug_info_header;
+ debug_abbrev_header;
+ (*
+ debug_line_header;
+ debug_frame_header;
+ *)
+ note_rust_header;
+ |]))
+ in
+
+ let text_section = (pe_text_section
+ ~sem
+ ~sess
+ ~start_fixup
+ ~rust_start_fixup
+ ~main_fn_fixup: sem.Semant.ctxt_main_fn_fixup
+ ~text_fixup
+ ~crate_code: code)
+ in
+ let bss_section = def_aligned bss_fixup (BSS 0x10L)
+ in
+ let data_section = (def_aligned data_fixup
+ (SEQ [| data; symtab; |]))
+ in
+ let all_init_data = (def_aligned
+ all_init_data_fixup
+ (SEQ [| import_section;
+ export_section;
+ data_section; |]))
+ in
+(*
+ let debug_aranges_section =
+ def_aligned sem.Semant.ctxt_debug_aranges_fixup dw.Dwarf.debug_aranges
+ in
+ let debug_pubnames_section =
+ def_aligned sem.Semant.ctxt_debug_pubnames_fixup dw.Dwarf.debug_pubnames
+ in
+*)
+ let debug_info_section =
+ def_aligned sem.Semant.ctxt_debug_info_fixup dw.Dwarf.debug_info
+ in
+ let debug_abbrev_section =
+ def_aligned sem.Semant.ctxt_debug_abbrev_fixup dw.Dwarf.debug_abbrev
+ in
+(*
+ let debug_line_section =
+ def_aligned sem.Semant.ctxt_debug_line_fixup dw.Dwarf.debug_line
+ in
+ let debug_frame_section =
+ def_aligned sem.Semant.ctxt_debug_frame_fixup dw.Dwarf.debug_frame
+ in
+*)
+ let note_rust_section =
+ def_aligned note_rust_fixup
+ (Asm.note_rust_frags crate.node.Ast.crate_meta)
+ in
+
+ let all_frags = SEQ [| MEMPOS pe_image_base;
+ (def_file_aligned image_fixup
+ (SEQ [| DEF (sem.Semant.ctxt_image_base_fixup,
+ MARK);
+ all_headers;
+ text_section;
+ bss_section;
+ all_init_data;
+ (* debug_aranges_section; *)
+ (* debug_pubnames_section; *)
+ debug_info_section;
+ debug_abbrev_section;
+ (* debug_line_section; *)
+ (* debug_frame_section; *)
+ note_rust_section;
+ ALIGN_MEM (pe_mem_alignment, MARK)
+ |]
+ )
+ )
+ |]
+ in
+ write_out_frag sess true all_frags
+;;
+
+let pe_magic = "PE";;
+
+let sniff
+ (sess:Session.sess)
+ (filename:filename)
+ : asm_reader option =
+ try
+ let stat = Unix.stat filename in
+ if (stat.Unix.st_kind = Unix.S_REG) &&
+ (stat.Unix.st_size >= pe_file_alignment)
+ then
+ let ar = new_asm_reader sess filename in
+ let _ = log sess "sniffing PE file" in
+ (* PE header offset is at 0x3c in the MS-DOS compatibility header. *)
+ let _ = ar.asm_seek 0x3c in
+ let pe_hdr_off = ar.asm_get_u32() in
+ let _ = log sess "PE header offset: 0x%x" pe_hdr_off in
+
+ let _ = ar.asm_seek pe_hdr_off in
+ let pe_signature = ar.asm_get_zstr_padded 4 in
+ let _ = log sess " PE signature: '%s'" pe_signature in
+ if pe_signature = pe_magic
+ then (ar.asm_seek 0; Some ar)
+ else None
+ else
+ None
+ with
+ _ -> None
+;;
+
+
+let get_sections
+ (sess:Session.sess)
+ (ar:asm_reader)
+ : (string,(int*int)) Hashtbl.t =
+ let _ = log sess "reading sections" in
+ (* PE header offset is at 0x3c in the MS-DOS compatibility header. *)
+ let _ = ar.asm_seek 0x3c in
+ let pe_hdr_off = ar.asm_get_u32() in
+ let _ = log sess "PE header offset: 0x%x" pe_hdr_off in
+
+ let _ = ar.asm_seek pe_hdr_off in
+ let pe_signature = ar.asm_get_zstr_padded 4 in
+ let _ = log sess " PE signature: '%s'" pe_signature in
+ let _ = assert (pe_signature = pe_magic) in
+ let _ = ar.asm_adv_u16() in (* machine type *)
+
+ let num_sections = ar.asm_get_u16() in
+ let _ = log sess " num sections: %d" num_sections in
+
+ let _ = ar.asm_adv_u32() in (* timestamp *)
+
+ let symtab_off = ar.asm_get_u32() in
+ let _ = log sess " symtab offset: 0x%x" symtab_off in
+
+ let num_symbols = ar.asm_get_u32() in
+ let _ = log sess " num symbols: %d" num_symbols in
+
+ let loader_hdr_size = ar.asm_get_u16() in
+ let _ = log sess "loader header sz: %d" loader_hdr_size in
+
+ let _ = ar.asm_adv_u16() in (* flags *)
+ let sections_off = (ar.asm_get_off()) + loader_hdr_size in
+
+ let sects = Hashtbl.create 0 in
+
+ let _ =
+ ar.asm_seek sections_off;
+ for i = 0 to (num_sections - 1) do
+ (*
+ * Section-name encoding is crazy. ASCII-encoding offsets of
+ * long names. See pe_section_header for details.
+ *)
+ let sect_name =
+ let sect_name = ar.asm_get_zstr_padded 8 in
+ assert ((String.length sect_name) > 0);
+ if sect_name.[0] = '/'
+ then
+ let off_str =
+ String.sub sect_name 1 ((String.length sect_name) - 1)
+ in
+ let i = int_of_string off_str in
+ let curr = ar.asm_get_off() in
+ ar.asm_seek (symtab_off + i);
+ let ext_name = ar.asm_get_zstr() in
+ ar.asm_seek curr;
+ ext_name
+ else
+ sect_name
+ in
+ let _ = ar.asm_adv_u32() in (* virtual size *)
+ let _ = ar.asm_adv_u32() in (* virtual address *)
+ let file_sz = ar.asm_get_u32() in
+ let file_off = ar.asm_get_u32() in
+ let _ = ar.asm_adv_u32() in (* reserved *)
+ let _ = ar.asm_adv_u32() in (* reserved *)
+ let _ = ar.asm_adv_u32() in (* reserved *)
+ let _ = ar.asm_adv_u32() in (* flags *)
+ Hashtbl.add sects sect_name (file_off, file_sz);
+ log sess " section %d: %s, size %d, offset 0x%x"
+ i sect_name file_sz file_off;
+ done
+ in
+ sects
+;;
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)
diff --git a/src/boot/be/ra.ml b/src/boot/be/ra.ml
new file mode 100644
index 00000000..db70b21d
--- /dev/null
+++ b/src/boot/be/ra.ml
@@ -0,0 +1,664 @@
+open Il;;
+open Common;;
+
+type ctxt =
+ {
+ ctxt_sess: Session.sess;
+ ctxt_n_vregs: int;
+ ctxt_abi: Abi.abi;
+ mutable ctxt_quads: Il.quads;
+ mutable ctxt_next_spill: int;
+ mutable ctxt_next_label: int;
+ (* More state as necessary. *)
+ }
+;;
+
+let new_ctxt
+ (sess:Session.sess)
+ (quads:Il.quads)
+ (vregs:int)
+ (abi:Abi.abi)
+ : ctxt =
+ {
+ ctxt_sess = sess;
+ ctxt_quads = quads;
+ ctxt_n_vregs = vregs;
+ ctxt_abi = abi;
+ ctxt_next_spill = 0;
+ ctxt_next_label = 0;
+ }
+;;
+
+let log (cx:ctxt) =
+ Session.log "ra"
+ cx.ctxt_sess.Session.sess_log_ra
+ cx.ctxt_sess.Session.sess_log_out
+;;
+
+let iflog (cx:ctxt) (thunk:(unit -> unit)) : unit =
+ if cx.ctxt_sess.Session.sess_log_ra
+ then thunk ()
+ else ()
+;;
+
+let list_to_str list eltstr =
+ (String.concat "," (List.map eltstr (List.sort compare list)))
+;;
+
+let next_spill (cx:ctxt) : int =
+ let i = cx.ctxt_next_spill in
+ cx.ctxt_next_spill <- i + 1;
+ i
+;;
+
+let next_label (cx:ctxt) : string =
+ let i = cx.ctxt_next_label in
+ cx.ctxt_next_label <- i + 1;
+ (".L" ^ (string_of_int i))
+;;
+
+exception Ra_error of string ;;
+
+let convert_labels (cx:ctxt) : unit =
+ let quad_fixups = Array.map (fun q -> q.quad_fixup) cx.ctxt_quads in
+ let qp_code (_:Il.quad_processor) (c:Il.code) : Il.code =
+ match c with
+ Il.CodeLabel lab ->
+ let fix =
+ match quad_fixups.(lab) with
+ None ->
+ let fix = new_fixup (next_label cx) in
+ begin
+ quad_fixups.(lab) <- Some fix;
+ fix
+ end
+ | Some f -> f
+ in
+ Il.CodePtr (Il.ImmPtr (fix, Il.CodeTy))
+ | _ -> c
+ in
+ let qp = { Il.identity_processor
+ with Il.qp_code = qp_code }
+ in
+ Il.rewrite_quads qp cx.ctxt_quads;
+ Array.iteri (fun i fix ->
+ cx.ctxt_quads.(i) <- { cx.ctxt_quads.(i) with
+ Il.quad_fixup = fix })
+ quad_fixups;
+;;
+
+let convert_pre_spills
+ (cx:ctxt)
+ (mkspill:(Il.spill -> Il.mem))
+ : int =
+ let n = ref 0 in
+ let qp_mem (_:Il.quad_processor) (a:Il.mem) : Il.mem =
+ match a with
+ Il.Spill i ->
+ begin
+ if i+1 > (!n)
+ then n := i+1;
+ mkspill i
+ end
+ | _ -> a
+ in
+ let qp = Il.identity_processor in
+ let qp = { qp with
+ Il.qp_mem = qp_mem }
+ in
+ begin
+ Il.rewrite_quads qp cx.ctxt_quads;
+ !n
+ end
+;;
+
+let kill_quad (i:int) (cx:ctxt) : unit =
+ cx.ctxt_quads.(i) <-
+ { Il.deadq with
+ Il.quad_fixup = cx.ctxt_quads.(i).Il.quad_fixup }
+;;
+
+let kill_redundant_moves (cx:ctxt) : unit =
+ let process_quad i q =
+ match q.Il.quad_body with
+ Il.Unary u when
+ ((Il.is_mov u.Il.unary_op) &&
+ (Il.Cell u.Il.unary_dst) = u.Il.unary_src) ->
+ kill_quad i cx
+ | _ -> ()
+ in
+ Array.iteri process_quad cx.ctxt_quads
+;;
+
+let quad_jump_target_labels (q:quad) : Il.label list =
+ let explicits =
+ match q.Il.quad_body with
+ Il.Jmp { Il.jmp_targ = Il.CodeLabel lab } -> [ lab ]
+ | _ -> []
+ in
+ explicits @ q.quad_implicits;
+;;
+
+let quad_used_vregs (q:quad) : Il.vreg list =
+ let vregs = ref [] in
+ let qp_reg _ r =
+ match r with
+ Il.Vreg v -> (vregs := (v :: (!vregs)); r)
+ | _ -> r
+ in
+ let qp_cell_write qp c =
+ match c with
+ Il.Reg _ -> c
+ | Il.Mem (a, b) -> Il.Mem (qp.qp_mem qp a, b)
+ in
+ let qp = { Il.identity_processor with
+ Il.qp_reg = qp_reg;
+ Il.qp_cell_write = qp_cell_write }
+ in
+ ignore (Il.process_quad qp q);
+ !vregs
+;;
+
+let quad_defined_vregs (q:quad) : Il.vreg list =
+ let vregs = ref [] in
+ let qp_cell_write _ c =
+ match c with
+ Il.Reg (Il.Vreg v, _) -> (vregs := (v :: (!vregs)); c)
+ | _ -> c
+ in
+ let qp = { Il.identity_processor with
+ Il.qp_cell_write = qp_cell_write }
+ in
+ ignore (Il.process_quad qp q);
+ !vregs
+;;
+
+let quad_is_unconditional_jump (q:quad) : bool =
+ match q.Il.quad_body with
+ Il.Jmp { jmp_op = Il.JMP } -> true
+ | Il.Ret -> true
+ | _ -> false
+;;
+
+let calculate_live_bitvectors
+ (cx:ctxt)
+ : ((Bits.t array) * (Bits.t array)) =
+
+ log cx "calculating live bitvectors";
+
+ let quads = cx.ctxt_quads in
+ let n_quads = Array.length quads in
+ let n_vregs = cx.ctxt_n_vregs in
+ let new_bitv _ = Bits.create n_vregs false in
+ let (live_in_vregs:Bits.t array) = Array.init n_quads new_bitv in
+ let (live_out_vregs:Bits.t array) = Array.init n_quads new_bitv in
+
+ let (quad_used_vrs:Bits.t array) = Array.init n_quads new_bitv in
+ let (quad_defined_vrs:Bits.t array) = Array.init n_quads new_bitv in
+ let (quad_uncond_jmp:bool array) = Array.make n_quads false in
+ let (quad_jmp_targs:(Il.label list) array) = Array.make n_quads [] in
+
+ let outer_changed = ref true in
+
+ (* Working bit-vector. *)
+ let scratch = new_bitv() in
+
+ (* bit-vector helpers. *)
+ (* Setup pass. *)
+ for i = 0 to n_quads - 1 do
+ let q = quads.(i) in
+ quad_uncond_jmp.(i) <- quad_is_unconditional_jump q;
+ quad_jmp_targs.(i) <- quad_jump_target_labels q;
+ List.iter
+ (fun v -> Bits.set quad_used_vrs.(i) v true)
+ (quad_used_vregs q);
+ List.iter
+ (fun v -> Bits.set quad_defined_vrs.(i) v true)
+ (quad_defined_vregs q)
+ done;
+
+ while !outer_changed do
+ iflog cx (fun _ -> log cx "iterating outer bitvector calculation");
+ outer_changed := false;
+ for i = 0 to n_quads - 1 do
+ Bits.clear live_in_vregs.(i);
+ Bits.clear live_out_vregs.(i)
+ done;
+ let inner_changed = ref true in
+ while !inner_changed do
+ inner_changed := false;
+ iflog cx
+ (fun _ ->
+ log cx "iterating inner bitvector calculation over %d quads"
+ n_quads);
+ for i = n_quads - 1 downto 0 do
+
+ let note_change b = if b then inner_changed := true in
+ let live_in = live_in_vregs.(i) in
+ let live_out = live_out_vregs.(i) in
+ let used = quad_used_vrs.(i) in
+ let defined = quad_defined_vrs.(i) in
+
+ (* Union in the vregs we use. *)
+ note_change (Bits.union live_in used);
+
+ (* Union in all our jump targets. *)
+ List.iter
+ (fun i -> note_change (Bits.union live_out live_in_vregs.(i)))
+ (quad_jmp_targs.(i));
+
+ (* Union in our block successor if we have one *)
+ if i < (n_quads - 1) && (not (quad_uncond_jmp.(i)))
+ then note_change (Bits.union live_out live_in_vregs.(i+1));
+
+ (* Propagate live-out to live-in on anything we don't define. *)
+ ignore (Bits.copy scratch defined);
+ Bits.invert scratch;
+ ignore (Bits.intersect scratch live_out);
+ note_change (Bits.union live_in scratch);
+
+ done
+ done;
+ let kill_mov_to_dead_target i q =
+ match q.Il.quad_body with
+ Il.Unary { Il.unary_op=uop;
+ Il.unary_dst=Il.Reg (Il.Vreg v, _) }
+ when
+ ((Il.is_mov uop) &&
+ not (Bits.get live_out_vregs.(i) v)) ->
+ begin
+ kill_quad i cx;
+ outer_changed := true;
+ end
+ | _ -> ()
+ in
+ Array.iteri kill_mov_to_dead_target quads
+ done;
+ iflog cx
+ begin
+ fun _ ->
+ log cx "finished calculating live bitvectors";
+ log cx "=========================";
+ for q = 0 to n_quads - 1 do
+ let buf = Buffer.create 128 in
+ for v = 0 to (n_vregs - 1)
+ do
+ if ((Bits.get live_in_vregs.(q) v)
+ && (Bits.get live_out_vregs.(q) v))
+ then Printf.bprintf buf " %-2d" v
+ else Buffer.add_string buf " "
+ done;
+ log cx "[%6d] live vregs: %s" q (Buffer.contents buf)
+ done;
+ log cx "========================="
+ end;
+ (live_in_vregs, live_out_vregs)
+;;
+
+
+let is_end_of_basic_block (q:quad) : bool =
+ match q.Il.quad_body with
+ Il.Jmp _ -> true
+ | Il.Ret -> true
+ | _ -> false
+;;
+
+let is_beginning_of_basic_block (q:quad) : bool =
+ match q.Il.quad_fixup with
+ None -> false
+ | Some _ -> true
+;;
+
+let dump_quads cx =
+ let f = cx.ctxt_abi.Abi.abi_str_of_hardreg in
+ let len = (Array.length cx.ctxt_quads) - 1 in
+ let ndigits_of n = (int_of_float (log10 (float_of_int n))) in
+ let padded_num n maxnum =
+ let ndigits = ndigits_of n in
+ let maxdigits = ndigits_of maxnum in
+ let pad = String.make (maxdigits - ndigits) ' ' in
+ Printf.sprintf "%s%d" pad n
+ in
+ let padded_str str maxlen =
+ let pad = String.make (maxlen - (String.length str)) ' ' in
+ Printf.sprintf "%s%s" pad str
+ in
+ let maxlablen = ref 0 in
+ for i = 0 to len
+ do
+ let q = cx.ctxt_quads.(i) in
+ match q.quad_fixup with
+ None -> ()
+ | Some f ->
+ maxlablen := max (!maxlablen) ((String.length f.fixup_name) + 1)
+ done;
+ for i = 0 to len
+ do
+ let q = cx.ctxt_quads.(i) in
+ let qs = (string_of_quad f q) in
+ let lab = match q.quad_fixup with
+ None -> ""
+ | Some f -> f.fixup_name ^ ":"
+ in
+ log cx "[%s] %s %s" (padded_num i len) (padded_str lab (!maxlablen)) qs
+ done
+;;
+
+let calculate_vreg_constraints (cx:ctxt) : Bits.t array =
+ let abi = cx.ctxt_abi in
+ let n_vregs = cx.ctxt_n_vregs in
+ let n_hregs = abi.Abi.abi_n_hardregs in
+ let constraints = Array.init n_vregs (fun _ -> Bits.create n_hregs true) in
+ Array.iteri
+ begin
+ fun i q ->
+ abi.Abi.abi_constrain_vregs q constraints;
+ iflog cx
+ begin
+ fun _ ->
+ let hr_str = cx.ctxt_abi.Abi.abi_str_of_hardreg in
+ log cx "constraints for quad %d = %s"
+ i (string_of_quad hr_str q);
+ let qp_reg _ r =
+ begin
+ match r with
+ Il.Hreg _ -> ()
+ | Il.Vreg v ->
+ let hregs = Bits.to_list constraints.(v) in
+ log cx "<v%d> constrained to hregs: [%s]"
+ v (list_to_str hregs hr_str)
+ end;
+ r
+ in
+ ignore (Il.process_quad { Il.identity_processor with
+ Il.qp_reg = qp_reg } q)
+ end;
+ end
+ cx.ctxt_quads;
+ constraints
+;;
+
+(* Simple local register allocator. Nothing fancy. *)
+let reg_alloc
+ (sess:Session.sess)
+ (quads:Il.quads)
+ (vregs:int)
+ (abi:Abi.abi) =
+ try
+ let cx = new_ctxt sess quads vregs abi in
+ let _ =
+ iflog cx
+ begin
+ fun _ ->
+ log cx "un-allocated quads:";
+ dump_quads cx
+ end
+ in
+
+ (* Work out pre-spilled slots and allocate 'em. *)
+ let spill_slot (s:Il.spill) = abi.Abi.abi_spill_slot s in
+ let n_pre_spills = convert_pre_spills cx spill_slot in
+
+ let (live_in_vregs, live_out_vregs) =
+ Session.time_inner "RA liveness" sess
+ (fun _ -> calculate_live_bitvectors cx)
+ in
+ let (vreg_constraints:Bits.t array) = (* vreg idx -> hreg bits.t *)
+ calculate_vreg_constraints cx
+ in
+ let inactive_hregs = ref [] in (* [hreg] *)
+ let active_hregs = ref [] in (* [hreg] *)
+ let dirty_vregs = Hashtbl.create 0 in (* vreg -> () *)
+ let hreg_to_vreg = Hashtbl.create 0 in (* hreg -> vreg *)
+ let vreg_to_hreg = Hashtbl.create 0 in (* vreg -> hreg *)
+ let vreg_to_spill = Hashtbl.create 0 in (* vreg -> spill *)
+ let (word_ty:Il.scalar_ty) = Il.ValTy abi.Abi.abi_word_bits in
+ let vreg_spill_cell v =
+ Il.Mem ((spill_slot (Hashtbl.find vreg_to_spill v)),
+ Il.ScalarTy word_ty)
+ in
+ let newq = ref [] in
+ let fixup = ref None in
+ let prepend q =
+ newq := {q with quad_fixup = !fixup} :: (!newq);
+ fixup := None
+ in
+ let hr h = Il.Reg (Il.Hreg h, Il.voidptr_t) in
+ let hr_str = cx.ctxt_abi.Abi.abi_str_of_hardreg in
+ let clean_hreg i hreg =
+ if (Hashtbl.mem hreg_to_vreg hreg) &&
+ (hreg < cx.ctxt_abi.Abi.abi_n_hardregs)
+ then
+ let vreg = Hashtbl.find hreg_to_vreg hreg in
+ if Hashtbl.mem dirty_vregs vreg
+ then
+ begin
+ Hashtbl.remove dirty_vregs vreg;
+ if (Bits.get (live_out_vregs.(i)) vreg)
+ then
+ let spill_idx =
+ if Hashtbl.mem vreg_to_spill vreg
+ then Hashtbl.find vreg_to_spill vreg
+ else
+ begin
+ let s = next_spill cx in
+ Hashtbl.replace vreg_to_spill vreg s;
+ s
+ end
+ in
+ let spill_mem = spill_slot spill_idx in
+ let spill_cell = Il.Mem (spill_mem, Il.ScalarTy word_ty) in
+ log cx "spilling <%d> from %s to %s"
+ vreg (hr_str hreg) (string_of_mem hr_str spill_mem);
+ prepend (Il.mk_quad
+ (Il.umov spill_cell (Il.Cell (hr hreg))));
+ else ()
+ end
+ else ()
+ else ()
+ in
+
+ let inactivate_hreg hreg =
+ if (Hashtbl.mem hreg_to_vreg hreg) &&
+ (hreg < cx.ctxt_abi.Abi.abi_n_hardregs)
+ then
+ let vreg = Hashtbl.find hreg_to_vreg hreg in
+ Hashtbl.remove vreg_to_hreg vreg;
+ Hashtbl.remove hreg_to_vreg hreg;
+ active_hregs := List.filter (fun x -> x != hreg) (!active_hregs);
+ inactive_hregs := hreg :: (!inactive_hregs);
+ else ()
+ in
+
+ let spill_specific_hreg i hreg =
+ clean_hreg i hreg;
+ inactivate_hreg hreg
+ in
+
+ let rec select_constrained
+ (constraints:Bits.t)
+ (hregs:Il.hreg list)
+ : Il.hreg option =
+ match hregs with
+ [] -> None
+ | h::hs ->
+ if Bits.get constraints h
+ then Some h
+ else select_constrained constraints hs
+ in
+
+ let spill_constrained constrs i =
+ match select_constrained constrs (!active_hregs) with
+ None ->
+ raise (Ra_error ("unable to spill according to constraint"));
+ | Some h ->
+ begin
+ spill_specific_hreg i h;
+ h
+ end
+ in
+
+ let all_hregs = Bits.create abi.Abi.abi_n_hardregs true in
+
+ let spill_all_regs i =
+ while (!active_hregs) != []
+ do
+ let _ = spill_constrained all_hregs i in
+ ()
+ done
+ in
+
+ let reload vreg hreg =
+ if Hashtbl.mem vreg_to_spill vreg
+ then
+ prepend (Il.mk_quad
+ (Il.umov
+ (hr hreg)
+ (Il.Cell (vreg_spill_cell vreg))))
+ else ()
+ in
+
+ let use_vreg def i vreg =
+ if Hashtbl.mem vreg_to_hreg vreg
+ then
+ begin
+ let h = Hashtbl.find vreg_to_hreg vreg in
+ iflog cx (fun _ -> log cx "found cached assignment %s for <v%d>"
+ (hr_str h) vreg);
+ h
+ end
+ else
+ let hreg =
+ let constrs = vreg_constraints.(vreg) in
+ match select_constrained constrs (!inactive_hregs) with
+ None ->
+ let h = spill_constrained constrs i in
+ iflog cx
+ (fun _ -> log cx "selected %s to spill and use for <v%d>"
+ (hr_str h) vreg);
+ h
+ | Some h ->
+ iflog cx (fun _ -> log cx "selected inactive %s for <v%d>"
+ (hr_str h) vreg);
+ h
+ in
+ inactive_hregs :=
+ List.filter (fun x -> x != hreg) (!inactive_hregs);
+ active_hregs := (!active_hregs) @ [hreg];
+ Hashtbl.replace hreg_to_vreg hreg vreg;
+ Hashtbl.replace vreg_to_hreg vreg hreg;
+ if def
+ then ()
+ else
+ reload vreg hreg;
+ hreg
+ in
+ let qp_reg def i _ r =
+ match r with
+ Il.Hreg h -> (spill_specific_hreg i h; r)
+ | Il.Vreg v -> (Il.Hreg (use_vreg def i v))
+ in
+ let qp_cell def i qp c =
+ match c with
+ Il.Reg (r, b) -> Il.Reg (qp_reg def i qp r, b)
+ | Il.Mem (a, b) ->
+ let qp = { qp with Il.qp_reg = qp_reg false i } in
+ Il.Mem (qp.qp_mem qp a, b)
+ in
+ let qp i = { Il.identity_processor with
+ Il.qp_cell_read = qp_cell false i;
+ Il.qp_cell_write = qp_cell true i;
+ Il.qp_reg = qp_reg false i }
+ in
+ cx.ctxt_next_spill <- n_pre_spills;
+ convert_labels cx;
+ for i = 0 to cx.ctxt_abi.Abi.abi_n_hardregs - 1
+ do
+ inactive_hregs := i :: (!inactive_hregs)
+ done;
+ for i = 0 to (Array.length cx.ctxt_quads) - 1
+ do
+ let quad = cx.ctxt_quads.(i) in
+ let clobbers = cx.ctxt_abi.Abi.abi_clobbers quad in
+ let used = quad_used_vregs quad in
+ let defined = quad_defined_vregs quad in
+ begin
+ if List.exists (fun def -> List.mem def clobbers) defined
+ then raise (Ra_error ("clobber and defined sets overlap"));
+ iflog cx
+ begin
+ fun _ ->
+ let hr (v:int) : string =
+ if Hashtbl.mem vreg_to_hreg v
+ then hr_str (Hashtbl.find vreg_to_hreg v)
+ else "??"
+ in
+ let vr_str (v:int) : string =
+ Printf.sprintf "v%d=%s" v (hr v)
+ in
+ let lstr lab ls fn =
+ if List.length ls = 0
+ then ()
+ else log cx "\t%s: [%s]" lab (list_to_str ls fn)
+ in
+ log cx "processing quad %d = %s"
+ i (string_of_quad hr_str quad);
+ (lstr "dirt" (htab_keys dirty_vregs) vr_str);
+ (lstr "clob" clobbers hr_str);
+ (lstr "in" (Bits.to_list live_in_vregs.(i)) vr_str);
+ (lstr "out" (Bits.to_list live_out_vregs.(i)) vr_str);
+ (lstr "use" used vr_str);
+ (lstr "def" defined vr_str);
+ end;
+ List.iter (clean_hreg i) clobbers;
+ if is_beginning_of_basic_block quad
+ then
+ begin
+ spill_all_regs i;
+ fixup := quad.quad_fixup;
+ prepend (Il.process_quad (qp i) quad)
+ end
+ else
+ begin
+ fixup := quad.quad_fixup;
+ let newq = (Il.process_quad (qp i) quad) in
+ begin
+ if is_end_of_basic_block quad
+ then spill_all_regs i
+ else ()
+ end;
+ prepend newq
+ end
+ end;
+ List.iter inactivate_hreg clobbers;
+ List.iter (fun i -> Hashtbl.replace dirty_vregs i ()) defined;
+ done;
+ cx.ctxt_quads <- Array.of_list (List.rev (!newq));
+ kill_redundant_moves cx;
+
+ iflog cx
+ begin
+ fun _ ->
+ log cx "spills: %d pre-spilled, %d total"
+ n_pre_spills cx.ctxt_next_spill;
+ log cx "register-allocated quads:";
+ dump_quads cx;
+ end;
+ (cx.ctxt_quads, cx.ctxt_next_spill)
+
+ with
+ Ra_error s ->
+ Session.fail sess "RA Error: %s" s;
+ (quads, 0)
+
+;;
+
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)
diff --git a/src/boot/be/x86.ml b/src/boot/be/x86.ml
new file mode 100644
index 00000000..01b7e299
--- /dev/null
+++ b/src/boot/be/x86.ml
@@ -0,0 +1,2205 @@
+(*
+ * x86/ia32 instructions have 6 parts:
+ *
+ * [pre][op][modrm][sib][disp][imm]
+ *
+ * [pre] = 0..4 bytes of prefix
+ * [op] = 1..3 byte opcode
+ * [modrm] = 0 or 1 byte: [mod:2][reg/op:3][r/m:3]
+ * [sib] = 0 or 1 byte: [scale:2][index:3][base:3]
+ * [disp] = 1, 2 or 4 byte displacement
+ * [imm] = 1, 2 or 4 byte immediate
+ *
+ * So between 1 and 17 bytes total.
+ *
+ * We're not going to use sib, but modrm is worth discussing.
+ *
+ * The high two bits of modrm denote an addressing mode. The modes are:
+ *
+ * 00 - "mostly" *(reg)
+ * 01 - "mostly" *(reg) + disp8
+ * 10 - "mostly" *(reg) + disp32
+ * 11 - reg
+ *
+ * The next-lowest 3 bits denote a specific register, or a subopcode if
+ * there is a fixed register or only one operand. The instruction format
+ * reference will say "/<n>" for some number n, if a fixed subopcode is used.
+ * It'll say "/r" if the instruction uses this field to specify a register.
+ *
+ * The registers specified in this field are:
+ *
+ * 000 - EAX or XMM0
+ * 001 - ECX or XMM1
+ * 010 - EDX or XMM2
+ * 011 - EBX or XMM3
+ * 100 - ESP or XMM4
+ * 101 - EBP or XMM5
+ * 110 - ESI or XMM6
+ * 111 - EDI or XMM7
+ *
+ * The final low 3 bits denote sub-modes of the primary mode selected
+ * with the top 2 bits. In particular, they "mostly" select the reg that is
+ * to be used for effective address calculation.
+ *
+ * For the most part, these follow the same numbering order: EAX, ECX, EDX,
+ * EBX, ESP, EBP, ESI, EDI. There are two unusual deviations from the rule
+ * though:
+ *
+ * - In primary modes 00, 01 and 10, r/m=100 means "use SIB byte". You can
+ * use (unscaled) ESP as the base register in these modes by appending the
+ * SIB byte 0x24. We do that in our rm_r operand-encoder function.
+ *
+ * - In primary mode 00, r/m=101 means "just disp32", no register is
+ * involved. There is no way to use EBP in primary mode 00. If you try, we
+ * just decay into a mode 01 with an appended 8-bit immediate displacement.
+ *
+ * Some opcodes are written 0xNN +rd. This means "we decided to chew up a
+ * whole pile of opcodes here, with each opcode including a hard-wired
+ * reference to a register". For example, POP is "0x58 +rd", which means that
+ * the 1-byte insns 0x58..0x5f are chewed up for "POP EAX" ... "POP EDI"
+ * (again, the canonical order of register numberings)
+ *)
+
+(*
+ * Notes on register availability of x86:
+ *
+ * There are 8 GPRs but we use 2 of them for specific purposes:
+ *
+ * - ESP always points to the current stack frame.
+ * - EBP always points to the current frame base.
+ *
+ * We tell IL that we have 6 GPRs then, and permit most register-register ops
+ * on any of these 6, mostly-unconstrained.
+ *
+ *)
+
+open Common;;
+
+exception Unrecognized
+;;
+
+let modrm m rm reg_or_subopcode =
+ if (((m land 0b11) != m) or
+ ((rm land 0b111) != rm) or
+ ((reg_or_subopcode land 0b111) != reg_or_subopcode))
+ then raise (Invalid_argument "X86.modrm_deref")
+ else
+ ((((m land 0b11) lsl 6)
+ lor
+ (rm land 0b111))
+ lor
+ ((reg_or_subopcode land 0b111) lsl 3))
+;;
+
+let modrm_deref_reg = modrm 0b00 ;;
+let modrm_deref_disp32 = modrm 0b00 0b101 ;;
+let modrm_deref_reg_plus_disp8 = modrm 0b01 ;;
+let modrm_deref_reg_plus_disp32 = modrm 0b10 ;;
+let modrm_reg = modrm 0b11 ;;
+
+let slash0 = 0;;
+let slash1 = 1;;
+let slash2 = 2;;
+let slash3 = 3;;
+let slash4 = 4;;
+let slash5 = 5;;
+let slash6 = 6;;
+let slash7 = 7;;
+
+
+(*
+ * Translate an IL-level hwreg number from 0..nregs into the 3-bit code number
+ * used through the mod r/m byte and /r sub-register specifiers of the x86
+ * ISA.
+ *
+ * See "Table 2-2: 32-Bit Addressing Forms with the ModR/M Byte", in the IA32
+ * Architecture Software Developer's Manual, volume 2a.
+ *)
+
+let eax = 0
+let ecx = 1
+let ebx = 2
+let esi = 3
+let edi = 4
+let edx = 5
+let ebp = 6
+let esp = 7
+
+let code_eax = 0b000;;
+let code_ecx = 0b001;;
+let code_edx = 0b010;;
+let code_ebx = 0b011;;
+let code_esp = 0b100;;
+let code_ebp = 0b101;;
+let code_esi = 0b110;;
+let code_edi = 0b111;;
+
+let reg r =
+ match r with
+ 0 -> code_eax
+ | 1 -> code_ecx
+ | 2 -> code_ebx
+ | 3 -> code_esi
+ | 4 -> code_edi
+ | 5 -> code_edx
+ (* Never assigned by the register allocator, but synthetic code uses
+ them *)
+ | 6 -> code_ebp
+ | 7 -> code_esp
+ | _ -> raise (Invalid_argument "X86.reg")
+;;
+
+
+let dwarf_eax = 0;;
+let dwarf_ecx = 1;;
+let dwarf_edx = 2;;
+let dwarf_ebx = 3;;
+let dwarf_esp = 4;;
+let dwarf_ebp = 5;;
+let dwarf_esi = 6;;
+let dwarf_edi = 7;;
+
+let dwarf_reg r =
+ match r with
+ 0 -> dwarf_eax
+ | 1 -> dwarf_ecx
+ | 2 -> dwarf_ebx
+ | 3 -> dwarf_esi
+ | 4 -> dwarf_edi
+ | 5 -> dwarf_edx
+ | 6 -> dwarf_ebp
+ | 7 -> dwarf_esp
+ | _ -> raise (Invalid_argument "X86.dwarf_reg")
+
+let reg_str r =
+ match r with
+ 0 -> "eax"
+ | 1 -> "ecx"
+ | 2 -> "ebx"
+ | 3 -> "esi"
+ | 4 -> "edi"
+ | 5 -> "edx"
+ | 6 -> "ebp"
+ | 7 -> "esp"
+ | _ -> raise (Invalid_argument "X86.reg_str")
+;;
+
+(* This is a basic ABI. You might need to customize it by platform. *)
+let (n_hardregs:int) = 6;;
+let (n_callee_saves:int) = 4;;
+
+
+let is_ty32 (ty:Il.scalar_ty) : bool =
+ match ty with
+ Il.ValTy (Il.Bits32) -> true
+ | Il.AddrTy _ -> true
+ | _ -> false
+;;
+
+let is_r32 (c:Il.cell) : bool =
+ match c with
+ Il.Reg (_, st) -> is_ty32 st
+ | _ -> false
+;;
+
+let is_rm32 (c:Il.cell) : bool =
+ match c with
+ Il.Mem (_, Il.ScalarTy st) -> is_ty32 st
+ | Il.Reg (_, st) -> is_ty32 st
+ | _ -> false
+;;
+
+let is_ty8 (ty:Il.scalar_ty) : bool =
+ match ty with
+ Il.ValTy (Il.Bits8) -> true
+ | _ -> false
+;;
+
+let is_m32 (c:Il.cell) : bool =
+ match c with
+ Il.Mem (_, Il.ScalarTy st) -> is_ty32 st
+ | _ -> false
+;;
+
+let is_m8 (c:Il.cell) : bool =
+ match c with
+ Il.Mem (_, Il.ScalarTy st) -> is_ty8 st
+ | _ -> false
+;;
+
+let is_ok_r8 (r:Il.hreg) : bool =
+ (r == eax || r == ebx || r == ecx || r == edx)
+;;
+
+let is_r8 (c:Il.cell) : bool =
+ match c with
+ Il.Reg (Il.Hreg r, st) when is_ok_r8 r -> is_ty8 st
+ | _ -> false
+;;
+
+let is_rm8 (c:Il.cell) : bool =
+ match c with
+ Il.Mem (_, Il.ScalarTy st) -> is_ty8 st
+ | _ -> is_r8 c
+;;
+
+let prealloc_quad (quad':Il.quad') : Il.quad' =
+ let target_cell reg c =
+ Il.Reg (Il.Hreg reg, Il.cell_scalar_ty c)
+ in
+ let target_operand reg op =
+ Il.Cell (Il.Reg (Il.Hreg reg, Il.operand_scalar_ty op))
+ in
+
+ let target_bin_to_hreg bin dst src =
+ { bin with
+ Il.binary_rhs = target_operand src bin.Il.binary_rhs;
+ Il.binary_lhs = target_operand dst bin.Il.binary_lhs;
+ Il.binary_dst = target_cell dst bin.Il.binary_dst }
+ in
+
+ let target_cmp cmp =
+ match cmp.Il.cmp_lhs with
+ (* Immediate LHS we force to eax. *)
+ Il.Imm _ ->
+ { cmp with
+ Il.cmp_lhs = target_operand eax cmp.Il.cmp_lhs }
+ | _ -> cmp
+ in
+
+ match quad' with
+ Il.Binary bin ->
+ begin
+ Il.Binary
+ begin
+ match bin.Il.binary_op with
+ Il.IMUL | Il.UMUL
+ | Il.IDIV | Il.UDIV -> target_bin_to_hreg bin eax ecx
+ | Il.IMOD | Il.UMOD -> target_bin_to_hreg bin eax ecx
+ | _ -> bin
+ end
+ end
+
+ | Il.Cmp cmp -> Il.Cmp (target_cmp cmp)
+
+ | Il.Call c ->
+ let ty = Il.cell_scalar_ty c.Il.call_dst in
+ Il.Call { c with
+ Il.call_dst = Il.Reg ((Il.Hreg eax), ty) }
+
+ | Il.Lea le ->
+ begin
+ match (le.Il.lea_dst, le.Il.lea_src) with
+ (Il.Reg (_, dst_ty), Il.ImmPtr _)
+ when is_ty32 dst_ty ->
+ Il.Lea { le with
+ Il.lea_dst = Il.Reg (Il.Hreg eax, dst_ty) }
+ | _ -> quad'
+ end
+
+ | x -> x
+;;
+
+let constrain_vregs (q:Il.quad) (hregs:Bits.t array) : unit =
+
+ let involves_8bit_cell =
+ let b = ref false in
+ let qp_cell _ c =
+ match c with
+ Il.Reg (_, Il.ValTy Il.Bits8)
+ | Il.Mem (_, Il.ScalarTy (Il.ValTy Il.Bits8)) ->
+ (b := true; c)
+ | _ -> c
+ in
+ ignore (Il.process_quad { Il.identity_processor with
+ Il.qp_cell_read = qp_cell;
+ Il.qp_cell_write = qp_cell } q);
+ !b
+ in
+
+ let qp_mem _ m = m in
+ let qp_cell _ c =
+ begin
+ match c with
+ Il.Reg (Il.Vreg v, _) when involves_8bit_cell ->
+ (* 8-bit register cells must only be al, cl, dl, bl.
+ * Not esi/edi. *)
+ let hv = hregs.(v) in
+ List.iter (fun bad -> Bits.set hv bad false) [esi; edi]
+ | _ -> ()
+ end;
+ c
+ in
+ begin
+ match q.Il.quad_body with
+ Il.Binary b ->
+ begin
+ match b.Il.binary_op with
+ (* Shifts *)
+ | Il.LSL | Il.LSR | Il.ASR ->
+ begin
+ match b.Il.binary_rhs with
+ Il.Cell (Il.Reg (Il.Vreg v, _)) ->
+ let hv = hregs.(v) in
+ (* Shift src has to be ecx. *)
+ List.iter
+ (fun bad -> Bits.set hv bad false)
+ [eax; edx; ebx; esi; edi]
+ | _ -> ()
+ end
+ | _ -> ()
+ end
+ | _ -> ()
+ end;
+ ignore
+ (Il.process_quad { Il.identity_processor with
+ Il.qp_mem = qp_mem;
+ Il.qp_cell_read = qp_cell;
+ Il.qp_cell_write = qp_cell } q)
+;;
+
+
+let clobbers (quad:Il.quad) : Il.hreg list =
+ match quad.Il.quad_body with
+ Il.Binary bin ->
+ begin
+ match bin.Il.binary_op with
+ Il.IMUL | Il.UMUL
+ | Il.IDIV | Il.UDIV -> [ edx ]
+ | Il.IMOD | Il.UMOD -> [ edx ]
+ | _ -> []
+ end
+ | Il.Unary un ->
+ begin
+ match un.Il.unary_op with
+ Il.ZERO -> [ eax; edi; ecx ]
+ | _ -> [ ]
+ end
+ | Il.Call _ -> [ eax; ecx; edx; ]
+ | Il.Regfence -> [ eax; ecx; ebx; edx; edi; esi; ]
+ | _ -> []
+;;
+
+
+let word_sz = 4L
+;;
+
+let word_bits = Il.Bits32
+;;
+
+let word_ty = TY_u32
+;;
+
+let annotate (e:Il.emitter) (str:string) =
+ Hashtbl.add e.Il.emit_annotations e.Il.emit_pc str
+;;
+
+let c (c:Il.cell) : Il.operand = Il.Cell c ;;
+let r (r:Il.reg) : Il.cell = Il.Reg ( r, (Il.ValTy word_bits) ) ;;
+let h (x:Il.hreg) : Il.reg = Il.Hreg x ;;
+let rc (x:Il.hreg) : Il.cell = r (h x) ;;
+let ro (x:Il.hreg) : Il.operand = c (rc x) ;;
+let vreg (e:Il.emitter) : (Il.reg * Il.cell) =
+ let vr = Il.next_vreg e in
+ (vr, (Il.Reg (vr, (Il.ValTy word_bits))))
+;;
+let imm (x:Asm.expr64) : Il.operand =
+ Il.Imm (x, word_ty)
+;;
+let immi (x:int64) : Il.operand =
+ imm (Asm.IMM x)
+;;
+
+let imm_byte (x:Asm.expr64) : Il.operand =
+ Il.Imm (x, TY_u8)
+;;
+let immi_byte (x:int64) : Il.operand =
+ imm_byte (Asm.IMM x)
+;;
+
+
+let byte_off_n (i:int) : Asm.expr64 =
+ Asm.IMM (Int64.of_int i)
+;;
+
+let byte_n (reg:Il.reg) (i:int) : Il.cell =
+ let imm = byte_off_n i in
+ let mem = Il.RegIn (reg, Some imm) in
+ Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8))
+;;
+
+let word_off_n (i:int) : Asm.expr64 =
+ Asm.IMM (Int64.mul (Int64.of_int i) word_sz)
+;;
+
+let word_at (reg:Il.reg) : Il.cell =
+ let mem = Il.RegIn (reg, None) in
+ Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits))
+;;
+
+let word_at_off (reg:Il.reg) (off:Asm.expr64) : Il.cell =
+ let mem = Il.RegIn (reg, Some off) in
+ Il.Mem (mem, Il.ScalarTy (Il.ValTy word_bits))
+;;
+
+let word_n (reg:Il.reg) (i:int) : Il.cell =
+ word_at_off reg (word_off_n i)
+;;
+
+let reg_codeptr (reg:Il.reg) : Il.code =
+ Il.CodePtr (Il.Cell (Il.Reg (reg, Il.AddrTy Il.CodeTy)))
+;;
+
+let word_n_low_byte (reg:Il.reg) (i:int) : Il.cell =
+ let imm = word_off_n i in
+ let mem = Il.RegIn (reg, Some imm) in
+ Il.Mem (mem, Il.ScalarTy (Il.ValTy Il.Bits8))
+;;
+
+let wordptr_n (reg:Il.reg) (i:int) : Il.cell =
+ let imm = word_off_n i in
+ let mem = Il.RegIn (reg, Some imm) in
+ Il.Mem (mem, Il.ScalarTy (Il.AddrTy (Il.ScalarTy (Il.ValTy word_bits))))
+;;
+
+let get_element_ptr = Il.get_element_ptr word_bits reg_str ;;
+
+let save_callee_saves (e:Il.emitter) : unit =
+ Il.emit e (Il.Push (ro ebp));
+ Il.emit e (Il.Push (ro edi));
+ Il.emit e (Il.Push (ro esi));
+ Il.emit e (Il.Push (ro ebx));
+;;
+
+
+let restore_callee_saves (e:Il.emitter) : unit =
+ Il.emit e (Il.Pop (rc ebx));
+ Il.emit e (Il.Pop (rc esi));
+ Il.emit e (Il.Pop (rc edi));
+ Il.emit e (Il.Pop (rc ebp));
+;;
+
+
+(* restores registers from the frame base without updating esp:
+ * - sets ebp, edi, esi, ebx to stored values from frame base
+ * - sets `retpc' register to stored retpc from frame base
+ * - sets `base' register to current fp
+ *)
+let restore_frame_base (e:Il.emitter) (base:Il.reg) (retpc:Il.reg) : unit =
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ mov (r base) (ro ebp);
+ mov (rc ebx) (c (word_at base));
+ mov (rc esi) (c (word_n base 1));
+ mov (rc edi) (c (word_n base 2));
+ mov (rc ebp) (c (word_n base 3));
+ mov (r retpc) (c (word_n base 4));
+;;
+
+
+(*
+ * Our arrangement on x86 is this:
+ *
+ * *ebp+20+(4*N) = [argN ]
+ * ...
+ * *ebp+24 = [arg1 ] = task ptr
+ * *ebp+20 = [arg0 ] = out ptr
+ * *ebp+16 = [retpc ]
+ * *ebp+12 = [old_ebp]
+ * *ebp+8 = [old_edi]
+ * *ebp+4 = [old_esi]
+ * *ebp = [old_ebx]
+ *
+ * For x86-cdecl:
+ *
+ * %eax, %ecx, %edx are "caller save" registers
+ * %ebp, %ebx, %esi, %edi are "callee save" registers
+ *
+ *)
+
+let frame_base_words = 5 (* eip,ebp,edi,esi,ebx *) ;;
+let frame_base_sz = Int64.mul (Int64.of_int frame_base_words) word_sz;;
+
+let frame_info_words = 2 (* crate ptr, crate-rel frame info disp *) ;;
+let frame_info_sz = Int64.mul (Int64.of_int frame_info_words) word_sz;;
+
+let implicit_arg_words = 2 (* task ptr,out ptr *);;
+let implicit_args_sz = Int64.mul (Int64.of_int implicit_arg_words) word_sz;;
+
+let out_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words);;
+let task_ptr = wordptr_n (Il.Hreg ebp) (frame_base_words+1);;
+let ty_param_n i =
+ wordptr_n (Il.Hreg ebp) (frame_base_words + implicit_arg_words + i);;
+
+let spill_slot (i:Il.spill) : Il.mem =
+ let imm = (Asm.IMM
+ (Int64.neg
+ (Int64.add frame_info_sz
+ (Int64.mul word_sz
+ (Int64.of_int (i+1))))))
+ in
+ Il.RegIn ((Il.Hreg ebp), Some imm)
+;;
+
+
+let get_next_pc_thunk_fixup = new_fixup "glue$get_next_pc"
+;;
+
+let emit_get_next_pc_thunk (e:Il.emitter) : unit =
+ let sty = Il.AddrTy Il.CodeTy in
+ let rty = Il.ScalarTy sty in
+ let deref_esp = Il.Mem (Il.RegIn (Il.Hreg esp, None), rty) in
+ let eax = (Il.Reg (Il.Hreg eax, sty)) in
+ Il.emit_full e (Some get_next_pc_thunk_fixup) []
+ (Il.umov eax (Il.Cell deref_esp));
+ Il.emit e Il.Ret;
+;;
+
+let get_next_pc_thunk : (Il.reg * fixup * (Il.emitter -> unit)) =
+ (Il.Hreg eax, get_next_pc_thunk_fixup, emit_get_next_pc_thunk)
+;;
+
+let emit_c_call
+ (e:Il.emitter)
+ (ret:Il.cell)
+ (tmp1:Il.reg)
+ (tmp2:Il.reg)
+ (nabi:nabi)
+ (in_prologue:bool)
+ (fptr:Il.code)
+ (args:Il.operand array)
+ : unit =
+
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in
+
+ (* rust calls get task as arg0 *)
+ let args =
+ if nabi.nabi_convention = CONV_rust
+ then Array.append [| c task_ptr |] args
+ else args
+ in
+ let nargs = Array.length args in
+ let arg_sz = Int64.mul (Int64.of_int nargs) word_sz
+ in
+
+ mov (r tmp1) (c task_ptr); (* tmp1 = task from argv[-1] *)
+ mov (r tmp2) (ro esp); (* tmp2 = esp *)
+ mov (* task->rust_sp = tmp2 *)
+ (word_n tmp1 Abi.task_field_rust_sp)
+ (c (r tmp2));
+ mov (* esp = task->runtime_sp *)
+ (rc esp)
+ (c (word_n tmp1 Abi.task_field_runtime_sp));
+
+ binary Il.SUB (rc esp) arg_sz; (* make room on the stack *)
+ binary Il.AND (rc esp) (* and 16-byte align sp *)
+ 0xfffffffffffffff0L;
+
+ Array.iteri
+ begin
+ fun i (arg:Il.operand) -> (* write args to C stack *)
+ match arg with
+ Il.Cell (Il.Mem (a, ty)) ->
+ begin
+ match a with
+ Il.RegIn (Il.Hreg base, off) when base == esp ->
+ mov (r tmp1) (c (Il.Mem (Il.RegIn (tmp2, off), ty)));
+ mov (word_n (h esp) i) (c (r tmp1));
+ | _ ->
+ mov (r tmp1) arg;
+ mov (word_n (h esp) i) (c (r tmp1));
+ end
+ | _ ->
+ mov (word_n (h esp) i) arg
+ end
+ args;
+
+ match ret with
+ Il.Mem (Il.RegIn (Il.Hreg base, _), _) when base == esp ->
+ assert (not in_prologue);
+
+ (* If ret is esp-relative, use a temporary register until we
+ switched stacks. *)
+
+ emit (Il.call (r tmp1) fptr);
+ mov (r tmp2) (c task_ptr);
+ mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp));
+ mov ret (c (r tmp1));
+
+ | _ when in_prologue ->
+ (*
+ * We have to do something a little surprising here:
+ * we're doing a 'grow' call so ebp is going to point
+ * into a dead stack frame on call-return. So we
+ * temporarily store task-ptr into ebp and then reload
+ * esp *and* ebp via ebp->rust_sp on the other side of
+ * the call.
+ *)
+ mov (rc ebp) (c task_ptr);
+ emit (Il.call ret fptr);
+ mov (rc esp) (c (word_n (h ebp) Abi.task_field_rust_sp));
+ mov (rc ebp) (ro esp);
+
+ | _ ->
+ emit (Il.call ret fptr);
+ mov (r tmp2) (c task_ptr);
+ mov (rc esp) (c (word_n tmp2 Abi.task_field_rust_sp));
+;;
+
+let emit_void_prologue_call
+ (e:Il.emitter)
+ (nabi:nabi)
+ (fn:fixup)
+ (args:Il.operand array)
+ : unit =
+ let callee = Abi.load_fixup_codeptr e (h eax) fn true nabi.nabi_indirect in
+ emit_c_call e (rc eax) (h edx) (h ecx) nabi true callee args
+;;
+
+let emit_native_call
+ (e:Il.emitter)
+ (ret:Il.cell)
+ (nabi:nabi)
+ (fn:fixup)
+ (args:Il.operand array)
+ : unit =
+
+ let (tmp1, _) = vreg e in
+ let (tmp2, _) = vreg e in
+ let (freg, _) = vreg e in
+ let callee = Abi.load_fixup_codeptr e freg fn true nabi.nabi_indirect in
+ emit_c_call e ret tmp1 tmp2 nabi false callee args
+;;
+
+let emit_native_void_call
+ (e:Il.emitter)
+ (nabi:nabi)
+ (fn:fixup)
+ (args:Il.operand array)
+ : unit =
+
+ let (ret, _) = vreg e in
+ emit_native_call e (r ret) nabi fn args
+;;
+
+let emit_native_call_in_thunk
+ (e:Il.emitter)
+ (ret:Il.cell)
+ (nabi:nabi)
+ (fn:Il.operand)
+ (args:Il.operand array)
+ : unit =
+
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+
+ begin
+ match fn with
+ (*
+ * NB: old path, remove when/if you're sure you don't
+ * want native-linker-symbol-driven requirements.
+ *)
+ Il.ImmPtr (fix, _) ->
+ let code =
+ Abi.load_fixup_codeptr e (h eax) fix true nabi.nabi_indirect
+ in
+ emit_c_call e (rc eax) (h edx) (h ecx) nabi false code args;
+
+ | _ ->
+ (*
+ * NB: new path, ignores nabi_indirect, assumes
+ * indirect via pointer from upcall_require_c_sym
+ * or crate cache.
+ *)
+ mov (rc eax) fn;
+ let cell = Il.Reg (h eax, Il.AddrTy Il.CodeTy) in
+ let fptr = Il.CodePtr (Il.Cell cell) in
+ emit_c_call e (rc eax) (h edx) (h ecx) nabi false fptr args;
+ end;
+
+ match ret with
+ Il.Reg (r, _) -> mov (word_at r) (ro eax)
+ | _ -> mov (rc edx) (c ret);
+ mov (word_at (h edx)) (ro eax)
+;;
+
+let unwind_glue
+ (e:Il.emitter)
+ (nabi:nabi)
+ (exit_task_fixup:fixup)
+ : unit =
+
+ let fp_n = word_n (Il.Hreg ebp) in
+ let edx_n = word_n (Il.Hreg edx) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let push x = emit (Il.Push x) in
+ let pop x = emit (Il.Pop x) in
+ let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in
+ let codefix fix = Il.CodePtr (Il.ImmPtr (fix, Il.CodeTy)) in
+ let mark fix = Il.emit_full e (Some fix) [] Il.Dead in
+ let glue_field = Abi.frame_glue_fns_field_drop in
+
+ let repeat_jmp_fix = new_fixup "repeat jump" in
+ let skip_jmp_fix = new_fixup "skip jump" in
+ let exit_jmp_fix = new_fixup "exit jump" in
+
+ mov (rc edx) (c task_ptr); (* switch back to rust stack *)
+ mov
+ (rc esp)
+ (c (edx_n Abi.task_field_rust_sp));
+
+ mark repeat_jmp_fix;
+
+ mov (rc esi) (c (fp_n (-1))); (* esi <- crate ptr *)
+ mov (rc edx) (c (fp_n (-2))); (* edx <- frame glue functions. *)
+ emit (Il.cmp (ro edx) (immi 0L));
+
+ emit
+ (Il.jmp Il.JE
+ (codefix skip_jmp_fix)); (* if struct* is nonzero *)
+ add edx esi; (* add crate ptr to disp. *)
+ mov
+ (rc ecx)
+ (c (edx_n glue_field)); (* ecx <- drop glue *)
+ emit (Il.cmp (ro ecx) (immi 0L));
+
+ emit
+ (Il.jmp Il.JE
+ (codefix skip_jmp_fix)); (* if glue-fn is nonzero *)
+ add ecx esi; (* add crate ptr to disp. *)
+ push (ro ebp); (* frame-to-drop *)
+ push (c task_ptr); (* form usual call to glue *)
+ push (immi 0L); (* outptr *)
+ emit (Il.call (rc eax)
+ (reg_codeptr (h ecx))); (* call glue_fn, trashing eax. *)
+ pop (rc eax);
+ pop (rc eax);
+ pop (rc eax);
+
+ mark skip_jmp_fix;
+ mov (rc edx) (c (fp_n 3)); (* load next fp (callee-saves[3]) *)
+ emit (Il.cmp (ro edx) (immi 0L));
+ emit (Il.jmp Il.JE
+ (codefix exit_jmp_fix)); (* if nonzero *)
+ mov (rc ebp) (ro edx); (* move to next frame *)
+ emit (Il.jmp Il.JMP
+ (codefix repeat_jmp_fix)); (* loop *)
+
+ (* exit path. *)
+ mark exit_jmp_fix;
+
+ let callee =
+ Abi.load_fixup_codeptr
+ e (h eax) exit_task_fixup false nabi.nabi_indirect
+ in
+ emit_c_call
+ e (rc eax) (h edx) (h ecx) nabi false callee [| (c task_ptr) |];
+;;
+
+(* Puts result in eax; clobbers ecx, edx in the process. *)
+let rec calculate_sz (e:Il.emitter) (size:size) : unit =
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let push x = emit (Il.Push x) in
+ let pop x = emit (Il.Pop x) in
+ let neg x = emit (Il.unary Il.NEG (rc x) (ro x)) in
+ let bnot x = emit (Il.unary Il.NOT (rc x) (ro x)) in
+ let band x y = emit (Il.binary Il.AND (rc x) (ro x) (ro y)) in
+ let add x y = emit (Il.binary Il.ADD (rc x) (ro x) (ro y)) in
+ let mul x y = emit (Il.binary Il.UMUL (rc x) (ro x) (ro y)) in
+ let subi x y = emit (Il.binary Il.SUB (rc x) (ro x) (immi y)) in
+ let eax_gets_a_and_ecx_gets_b a b =
+ calculate_sz e b;
+ push (ro eax);
+ calculate_sz e a;
+ pop (rc ecx);
+ in
+ match size with
+ SIZE_fixed i ->
+ mov (rc eax) (immi i)
+
+ | SIZE_fixup_mem_sz f ->
+ mov (rc eax) (imm (Asm.M_SZ f))
+
+ | SIZE_fixup_mem_pos f ->
+ mov (rc eax) (imm (Asm.M_POS f))
+
+ | SIZE_param_size i ->
+ mov (rc eax) (Il.Cell (ty_param_n i));
+ mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_size))
+
+ | SIZE_param_align i ->
+ mov (rc eax) (Il.Cell (ty_param_n i));
+ mov (rc eax) (Il.Cell (word_n (h eax) Abi.tydesc_field_align))
+
+ | SIZE_rt_neg a ->
+ calculate_sz e a;
+ neg eax
+
+ | SIZE_rt_add (a, b) ->
+ eax_gets_a_and_ecx_gets_b a b;
+ add eax ecx
+
+ | SIZE_rt_mul (a, b) ->
+ eax_gets_a_and_ecx_gets_b a b;
+ mul eax ecx
+
+ | SIZE_rt_max (a, b) ->
+ eax_gets_a_and_ecx_gets_b a b;
+ emit (Il.cmp (ro eax) (ro ecx));
+ let jmp_pc = e.Il.emit_pc in
+ emit (Il.jmp Il.JAE Il.CodeNone);
+ mov (rc eax) (ro ecx);
+ Il.patch_jump e jmp_pc e.Il.emit_pc;
+
+ | SIZE_rt_align (align, off) ->
+ (*
+ * calculate off + pad where:
+ *
+ * pad = (align - (off mod align)) mod align
+ *
+ * In our case it's always a power of two,
+ * so we can just do:
+ *
+ * mask = align-1
+ * off += mask
+ * off &= ~mask
+ *
+ *)
+ eax_gets_a_and_ecx_gets_b off align;
+ subi ecx 1L;
+ add eax ecx;
+ bnot ecx;
+ band eax ecx;
+;;
+
+let rec size_calculation_stack_highwater (size:size) : int =
+ match size with
+ SIZE_fixed _
+ | SIZE_fixup_mem_sz _
+ | SIZE_fixup_mem_pos _
+ | SIZE_param_size _
+ | SIZE_param_align _ -> 0
+ | SIZE_rt_neg a ->
+ (size_calculation_stack_highwater a)
+ | SIZE_rt_max (a, b) ->
+ (size_calculation_stack_highwater a)
+ + (size_calculation_stack_highwater b)
+ | SIZE_rt_add (a, b)
+ | SIZE_rt_mul (a, b)
+ | SIZE_rt_align (a, b) ->
+ (size_calculation_stack_highwater a)
+ + (size_calculation_stack_highwater b)
+ + 1
+;;
+
+let boundary_sz =
+ (Asm.IMM
+ (Int64.add (* Extra non-frame room: *)
+ frame_base_sz (* to safely enter the next frame, *)
+ frame_base_sz)) (* and make a 'grow' upcall there. *)
+;;
+
+let stack_growth_check
+ (e:Il.emitter)
+ (nabi:nabi)
+ (grow_task_fixup:fixup)
+ (growsz:Il.operand)
+ (grow_jmp:Il.label option)
+ (restart_pc:Il.label)
+ (end_reg:Il.reg) (*
+ * stack limit on entry,
+ * new stack pointer on exit
+ *)
+ (tmp_reg:Il.reg) (* temporary (trashed) *)
+ : unit =
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in
+ let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in
+ mov (r tmp_reg) (ro esp); (* tmp = esp *)
+ sub (r tmp_reg) growsz; (* tmp -= size-request *)
+ emit (Il.cmp (c (r end_reg)) (c (r tmp_reg)));
+ (*
+ * Jump *over* 'grow' upcall on non-underflow:
+ * if end_reg <= tmp_reg
+ *)
+
+ let bypass_grow_upcall_jmp_pc = e.Il.emit_pc in
+ emit (Il.jmp Il.JBE Il.CodeNone);
+
+ begin
+ match grow_jmp with
+ None -> ()
+ | Some j -> Il.patch_jump e j e.Il.emit_pc
+ end;
+ (* Extract growth-amount from tmp_reg. *)
+ mov (r end_reg) (ro esp);
+ sub (r end_reg) (c (r tmp_reg));
+ add (r end_reg) (Il.Imm (boundary_sz, word_ty));
+ (* Perform 'grow' upcall, then restart frame-entry. *)
+ emit_void_prologue_call e nabi grow_task_fixup [| c (r end_reg) |];
+ emit (Il.jmp Il.JMP (Il.CodeLabel restart_pc));
+ Il.patch_jump e bypass_grow_upcall_jmp_pc e.Il.emit_pc
+;;
+
+let fn_prologue
+ (e:Il.emitter)
+ (framesz:size)
+ (callsz:size)
+ (nabi:nabi)
+ (grow_task_fixup:fixup)
+ : unit =
+
+ let esi_n = word_n (h esi) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let add dst src = emit (Il.binary Il.ADD dst (Il.Cell dst) src) in
+ let sub dst src = emit (Il.binary Il.SUB dst (Il.Cell dst) src) in
+
+ (* We may be in a dynamic-sized frame. This makes matters complex,
+ * as we can't just perform a simple growth check in terms of a
+ * static size. The check is against a dynamic size, and we need to
+ * calculate that size.
+ *
+ * Unlike size-calculations in 'trans', we do not use vregs to
+ * calculate the frame size; instead we use a PUSH/POP stack-machine
+ * translation that doesn't disturb the registers we're
+ * somewhat-carefully *using* during frame setup.
+ *
+ * This only pushes the problem back a little ways though: we still
+ * need to be sure we have enough room to do the PUSH/POP
+ * calculation. We refer to this amount of space as the 'primordial'
+ * frame size, which can *thankfully* be calculated exactly from the
+ * arithmetic expression we're aiming to calculate. So we make room
+ * for the primordial frame, run the calculation of the full dynamic
+ * frame size, then make room *again* for this dynamic size.
+ *
+ * Our caller reserved enough room for us to push our own frame-base,
+ * as well as the frame-base that it will cost to do an upcall.
+ *)
+
+ (*
+ * After we save callee-saves, We have a stack like this:
+ *
+ * | ... |
+ * | caller frame |
+ * | + spill |
+ * | caller arg K |
+ * | ... |
+ * | caller arg 0 |
+ * | retpc | <-- sp we received, top of callee frame
+ * | callee save 1 |
+ * | ... |
+ * | callee save N | <-- ebp and esp after saving callee-saves
+ * | ... |
+ * | callee frame |
+ * | + spill |
+ * | callee arg J |
+ * | ... |
+ * | callee arg 0 | <-- bottom of callee frame
+ * | next retpc |
+ * | next save 1 |
+ * | ... |
+ * | next save N | <-- bottom of region we must reserve
+ * | ... |
+ *
+ * A "frame base" is the retpc and set of callee-saves.
+ *
+ * We need to reserve room for our frame *and* the next frame-base, because
+ * we're going to be blindly entering the next frame-base (pushing eip and
+ * callee-saves) before we perform the next check.
+ *)
+
+ (*
+ * We double the reserved callsz because we need a 'temporary tail-call
+ * region' above the actual call region, in case there's a drop call at the
+ * end of assembling the tail-call args and before copying them to callee
+ * position.
+ *)
+
+ let callsz = add_sz callsz callsz in
+ let n_glue_args = Int64.of_int Abi.worst_case_glue_call_args in
+ let n_glue_words = Int64.mul word_sz n_glue_args in
+
+ (*
+ * Add in *another* word to handle an extra-awkward spill of the
+ * callee address that might occur during an indirect tail call.
+ *)
+ let callsz = add_sz (SIZE_fixed word_sz) callsz in
+
+ (*
+ * Add in enough words for a glue-call (these occur underneath esp)
+ *)
+ let callsz = add_sz (SIZE_fixed n_glue_words) callsz in
+
+ (*
+ * Cumulative dynamic-frame size.
+ *)
+ let call_and_frame_sz = add_sz callsz framesz in
+
+ (* Already have room to save regs on entry. *)
+ save_callee_saves e;
+
+ let restart_pc = e.Il.emit_pc in
+
+ mov (rc ebp) (ro esp); (* Establish frame base. *)
+ mov (rc esi) (c task_ptr); (* esi = task *)
+ mov
+ (rc esi)
+ (c (esi_n Abi.task_field_stk)); (* esi = task->stk *)
+ add (rc esi) (imm
+ (Asm.ADD
+ ((word_off_n Abi.stk_field_data),
+ boundary_sz)));
+
+ let (dynamic_frame_sz, dynamic_grow_jmp) =
+ match Il.size_to_expr64 call_and_frame_sz with
+ None ->
+ begin
+ let primordial_frame_sz =
+ Asm.IMM
+ (Int64.mul word_sz
+ (Int64.of_int
+ (size_calculation_stack_highwater
+ call_and_frame_sz)))
+ in
+ (* Primordial size-check. *)
+ mov (rc edi) (ro esp); (* edi = esp *)
+ sub (* edi -= size-request *)
+ (rc edi)
+ (imm primordial_frame_sz);
+ emit (Il.cmp (ro esi) (ro edi));
+
+ (* Jump to 'grow' upcall on underflow: if esi (bottom) is >
+ edi (proposed-esp) *)
+
+ let primordial_underflow_jmp_pc = e.Il.emit_pc in
+ emit (Il.jmp Il.JA Il.CodeNone);
+
+ (* Calculate dynamic frame size. *)
+ calculate_sz e call_and_frame_sz;
+ ((ro eax), Some primordial_underflow_jmp_pc)
+ end
+ | Some e -> ((imm e), None)
+ in
+
+ (* "Full" frame size-check. *)
+ stack_growth_check e nabi grow_task_fixup
+ dynamic_frame_sz dynamic_grow_jmp restart_pc (h esi) (h edi);
+
+
+ (* Establish a frame, wherever we landed. *)
+ sub (rc esp) dynamic_frame_sz;
+
+ (* Zero the frame.
+ *
+ * FIXME: this is awful, will go away when we have proper CFI.
+ *)
+
+ mov (rc edi) (ro esp);
+ mov (rc ecx) dynamic_frame_sz;
+ emit (Il.unary Il.ZERO (word_at (h edi)) (ro ecx));
+
+ (* Move esp back up over the glue region. *)
+ add (rc esp) (immi n_glue_words);
+;;
+
+
+let fn_epilogue (e:Il.emitter) : unit =
+
+ (* Tear down existing frame. *)
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ mov (rc esp) (ro ebp);
+ restore_callee_saves e;
+ emit Il.Ret;
+;;
+
+let inline_memcpy
+ (e:Il.emitter)
+ (n_bytes:int64)
+ (dst_ptr:Il.reg)
+ (src_ptr:Il.reg)
+ (tmp_reg:Il.reg)
+ (ascending:bool)
+ : unit =
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let bpw = Int64.to_int word_sz in
+ let w = Int64.to_int (Int64.div n_bytes word_sz) in
+ let b = Int64.to_int (Int64.rem n_bytes word_sz) in
+ if ascending
+ then
+ begin
+ for i = 0 to (w-1) do
+ mov (r tmp_reg) (c (word_n src_ptr i));
+ mov (word_n dst_ptr i) (c (r tmp_reg));
+ done;
+ for i = 0 to (b-1) do
+ let off = (w*bpw) + i in
+ mov (r tmp_reg) (c (byte_n src_ptr off));
+ mov (byte_n dst_ptr off) (c (r tmp_reg));
+ done;
+ end
+ else
+ begin
+ for i = (b-1) downto 0 do
+ let off = (w*bpw) + i in
+ mov (r tmp_reg) (c (byte_n src_ptr off));
+ mov (byte_n dst_ptr off) (c (r tmp_reg));
+ done;
+ for i = (w-1) downto 0 do
+ mov (r tmp_reg) (c (word_n src_ptr i));
+ mov (word_n dst_ptr i) (c (r tmp_reg));
+ done;
+ end
+;;
+
+
+
+let fn_tail_call
+ (e:Il.emitter)
+ (caller_callsz:int64)
+ (caller_argsz:int64)
+ (callee_code:Il.code)
+ (callee_argsz:int64)
+ : unit =
+ let emit = Il.emit e in
+ let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in
+ let mov dst src = emit (Il.umov dst src) in
+ let argsz_diff = Int64.sub caller_argsz callee_argsz in
+ let callee_spill_cell = word_at_off (h esp) (Asm.IMM caller_callsz) in
+
+ (*
+ * Our outgoing arguments were prepared in a region above the call region;
+ * this is reserved for the purpose of making tail-calls *only*, so we do
+ * not collide with glue calls we had to make while dropping the frame,
+ * after assembling our arg region.
+ *
+ * Thus, esp points to the "normal" arg region, and we need to move it
+ * to point to the tail-call arg region. To make matters simple, both
+ * regions are the same size, one atop the other.
+ *)
+
+ annotate e "tail call: move esp to temporary tail call arg-prep area";
+ binary Il.ADD (rc esp) caller_callsz;
+
+ (*
+ * If we're given a non-ImmPtr callee, we may need to move it to a known
+ * cell to avoid clobbering its register while we do the argument shuffle
+ * below.
+ *
+ * Sadly, we are too register-starved to just flush our callee to a reg;
+ * so we carve out an extra word of the temporary call-region and use
+ * it.
+ *
+ * This is ridiculous, but works.
+ *)
+ begin
+ match callee_code with
+ Il.CodePtr (Il.Cell c) ->
+ annotate e "tail call: spill callee-ptr to temporary memory";
+ mov callee_spill_cell (Il.Cell c);
+
+ | _ -> ()
+ end;
+
+ (* edx <- ebp; restore ebp, edi, esi, ebx; ecx <- retpc *)
+ annotate e "tail call: restore callee-saves from frame base";
+ restore_frame_base e (h edx) (h ecx);
+ (* move edx past frame base and adjust for difference in call sizes *)
+ annotate e "tail call: adjust temporary fp";
+ binary Il.ADD (rc edx) (Int64.add frame_base_sz argsz_diff);
+
+ (*
+ * stack grows downwards; copy from high to low
+ *
+ * bpw = word_sz
+ * w = floor(callee_argsz / word_sz)
+ * b = callee_argsz % word_sz
+ *
+ * byte copies:
+ * +------------------------+
+ * | |
+ * +------------------------+ <-- base + (w * word_sz) + (b - 1)
+ * . .
+ * +------------------------+
+ * | |
+ * +------------------------+ <-- base + (w * word_sz) + (b - b)
+ * word copies: =
+ * +------------------------+ <-- base + ((w-0) * word_sz)
+ * | bytes |
+ * | (w-1)*bpw..w*bpw-1 |
+ * +------------------------+ <-- base + ((w-1) * word_sz)
+ * | bytes |
+ * | (w-2)*bpw..(w-1)*bpw-1 |
+ * +------------------------+ <-- base + ((w-2) * word_sz)
+ * . .
+ * . .
+ * . .
+ * +------------------------+
+ * | bytes |
+ * | 0..bpw - 1 |
+ * +------------------------+ <-- base + ((w-w) * word_sz)
+ *)
+
+ annotate e "tail call: move arg-tuple up to top of frame";
+ (* NOTE: must copy top-to-bottom in case the regions overlap *)
+ inline_memcpy e callee_argsz (h edx) (h esp) (h eax) false;
+
+ (*
+ * We're done with eax now; so in the case where we had to spill
+ * our callee codeptr, we can reload it into eax here and rewrite
+ * our callee into *eax.
+ *)
+ let callee_code =
+ match callee_code with
+ Il.CodePtr (Il.Cell _) ->
+ annotate e "tail call: reload callee-ptr from temporary memory";
+ mov (rc eax) (Il.Cell callee_spill_cell);
+ reg_codeptr (h eax)
+
+ | _ -> callee_code
+ in
+
+
+ (* esp <- edx *)
+ annotate e "tail call: adjust stack pointer";
+ mov (rc esp) (ro edx);
+ (* PUSH ecx (retpc) *)
+ annotate e "tail call: push retpc";
+ emit (Il.Push (ro ecx));
+ (* JMP callee_code *)
+ emit (Il.jmp Il.JMP callee_code);
+;;
+
+
+let loop_info_field_retpc = 0;;
+let loop_info_field_sp = 1;;
+let loop_info_field_fp = 2;;
+
+let self_args_cell (self_args_rty:Il.referent_ty) : Il.cell =
+ Il.Mem (Il.RegIn (h ebp, Some (Asm.IMM frame_base_sz)), self_args_rty)
+;;
+
+let activate_glue (e:Il.emitter) : unit =
+ (*
+ * This is a bit of glue-code. It should be emitted once per
+ * compilation unit.
+ *
+ * - save regs on C stack
+ * - align sp on a 16-byte boundary
+ * - save sp to task.runtime_sp (runtime_sp is thus always aligned)
+ * - load saved task sp (switch stack)
+ * - restore saved task regs
+ * - return to saved task pc
+ *
+ * Our incoming stack looks like this:
+ *
+ * *esp+4 = [arg1 ] = task ptr
+ * *esp = [retpc ]
+ *)
+
+ let sp_n = word_n (Il.Hreg esp) in
+ let edx_n = word_n (Il.Hreg edx) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let binary op dst imm = emit (Il.binary op dst (c dst) (immi imm)) in
+
+ mov (rc edx) (c (sp_n 1)); (* edx <- task *)
+ save_callee_saves e;
+ mov
+ (edx_n Abi.task_field_runtime_sp)
+ (ro esp); (* task->runtime_sp <- esp *)
+ mov
+ (rc esp)
+ (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *)
+
+ (*
+ * There are two paths we can arrive at this code from:
+ *
+ *
+ * 1. We are activating a task for the first time. When we switch into
+ * the task stack and 'ret' to its first instruction, we'll start
+ * doing whatever the first instruction says. Probably saving
+ * registers and starting to establish a frame. Harmless stuff,
+ * doesn't look at task->rust_sp again except when it clobbers it
+ * during a later upcall.
+ *
+ *
+ * 2. We are resuming a task that was descheduled by the yield glue
+ * below. When we switch into the task stack and 'ret', we'll be
+ * ret'ing to a very particular instruction:
+ *
+ * "esp <- task->rust_sp"
+ *
+ * this is the first instruction we 'ret' to after this glue, because
+ * it is the first instruction following *any* upcall, and the task
+ * we are activating was descheduled mid-upcall.
+ *
+ * Unfortunately for us, we have already restored esp from
+ * task->rust_sp and are about to eat the 5 words off the top of it.
+ *
+ *
+ * | ... | <-- where esp will be once we restore + ret, below,
+ * | retpc | and where we'd *like* task->rust_sp to wind up.
+ * | ebp |
+ * | edi |
+ * | esi |
+ * | ebx | <-- current task->rust_sp == current esp
+ *
+ *
+ * This is a problem. If we return to "esp <- task->rust_sp" it will
+ * push esp back down by 5 words. This manifests as a rust stack that
+ * grows by 5 words on each yield/reactivate. Not good.
+ *
+ * So what we do here is just adjust task->rust_sp up 5 words as
+ * well, to mirror the movement in esp we're about to perform. That
+ * way the "esp <- task->rust_sp" we 'ret' to below will be a
+ * no-op. Esp won't move, and the task's stack won't grow.
+ *)
+
+ binary Il.ADD (edx_n Abi.task_field_rust_sp)
+ (Int64.mul (Int64.of_int (n_callee_saves + 1)) word_sz);
+
+ (**** IN TASK STACK ****)
+ restore_callee_saves e;
+ emit Il.Ret;
+ (***********************)
+ ()
+;;
+
+let yield_glue (e:Il.emitter) : unit =
+
+ (* More glue code, this time the 'bottom half' of yielding.
+ *
+ * We arrived here because an upcall decided to deschedule the
+ * running task. So the upcall's return address got patched to the
+ * first instruction of this glue code.
+ *
+ * When the upcall does 'ret' it will come here, and its esp will be
+ * pointing to the last argument pushed on the C stack before making
+ * the upcall: the 0th argument to the upcall, which is always the
+ * task ptr performing the upcall. That's where we take over.
+ *
+ * Our goal is to complete the descheduling
+ *
+ * - Switch over to the task stack temporarily.
+ *
+ * - Save the task's callee-saves onto the task stack.
+ * (the task is now 'descheduled', safe to set aside)
+ *
+ * - Switch *back* to the C stack.
+ *
+ * - Restore the C-stack callee-saves.
+ *
+ * - Return to the caller on the C stack that activated the task.
+ *
+ *)
+ let esp_n = word_n (Il.Hreg esp) in
+ let edx_n = word_n (Il.Hreg edx) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+
+ mov
+ (rc edx) (c (esp_n 0)); (* edx <- arg0 (task) *)
+ mov
+ (rc esp)
+ (c (edx_n Abi.task_field_rust_sp)); (* esp <- task->rust_sp *)
+ save_callee_saves e;
+ mov (* task->rust_sp <- esp *)
+ (edx_n Abi.task_field_rust_sp)
+ (ro esp);
+ mov
+ (rc esp)
+ (c (edx_n Abi.task_field_runtime_sp)); (* esp <- task->runtime_sp *)
+
+ (**** IN C STACK ****)
+ restore_callee_saves e;
+ emit Il.Ret;
+ (***********************)
+ ()
+;;
+
+
+let push_pos32 (e:Il.emitter) (fix:fixup) : unit =
+ let (reg, _, _) = get_next_pc_thunk in
+ Abi.load_fixup_addr e reg fix Il.OpaqueTy;
+ Il.emit e (Il.Push (Il.Cell (Il.Reg (reg, Il.AddrTy Il.OpaqueTy))))
+;;
+
+let objfile_start
+ (e:Il.emitter)
+ ~(start_fixup:fixup)
+ ~(rust_start_fixup:fixup)
+ ~(main_fn_fixup:fixup)
+ ~(crate_fixup:fixup)
+ ~(indirect_start:bool)
+ : unit =
+ let ebp_n = word_n (Il.Hreg ebp) in
+ let emit = Il.emit e in
+ let mov dst src = emit (Il.umov dst src) in
+ let push_pos32 = push_pos32 e in
+ Il.emit_full e (Some start_fixup) [] Il.Dead;
+ save_callee_saves e;
+ mov (rc ebp) (ro esp);
+
+ (* If we're very lucky, the platform will have left us with
+ * something sensible in the startup stack like so:
+ *
+ * *ebp+24 = [arg1 ] = argv
+ * *ebp+20 = [arg0 ] = argc
+ * *ebp+16 = [retpc ]
+ * *ebp+12 = [old_ebp]
+ * *ebp+8 = [old_edi]
+ * *ebp+4 = [old_esi]
+ * *ebp = [old_ebx]
+ *
+ * This is not the case everywhere, but we start with this
+ * assumption and correct it in the runtime library.
+ *)
+
+ (* Copy argv. *)
+ mov (rc eax) (c (ebp_n (2 + n_callee_saves)));
+ Il.emit e (Il.Push (ro eax));
+
+ (* Copy argc. *)
+ mov (rc eax) (c (ebp_n (1 + n_callee_saves)));
+ Il.emit e (Il.Push (ro eax));
+
+ push_pos32 crate_fixup;
+ push_pos32 main_fn_fixup;
+ let fptr =
+ Abi.load_fixup_codeptr e (h eax) rust_start_fixup true indirect_start
+ in
+ Il.emit e (Il.call (rc eax) fptr);
+ Il.emit e (Il.Pop (rc ecx));
+ Il.emit e (Il.Pop (rc ecx));
+ Il.emit e (Il.Pop (rc ecx));
+ Il.emit e (Il.Pop (rc ecx));
+ Il.emit e (Il.umov (rc esp) (ro ebp));
+ restore_callee_saves e;
+ Il.emit e Il.Ret;
+;;
+
+let (abi:Abi.abi) =
+ {
+ Abi.abi_word_sz = word_sz;
+ Abi.abi_word_bits = word_bits;
+ Abi.abi_word_ty = word_ty;
+
+ Abi.abi_is_2addr_machine = true;
+ Abi.abi_has_pcrel_data = false;
+ Abi.abi_has_pcrel_code = true;
+
+ Abi.abi_n_hardregs = n_hardregs;
+ Abi.abi_str_of_hardreg = reg_str;
+ Abi.abi_prealloc_quad = prealloc_quad;
+ Abi.abi_constrain_vregs = constrain_vregs;
+
+ Abi.abi_emit_fn_prologue = fn_prologue;
+ Abi.abi_emit_fn_epilogue = fn_epilogue;
+ Abi.abi_emit_fn_tail_call = fn_tail_call;
+ Abi.abi_clobbers = clobbers;
+
+ Abi.abi_emit_native_call = emit_native_call;
+ Abi.abi_emit_native_void_call = emit_native_void_call;
+ Abi.abi_emit_native_call_in_thunk = emit_native_call_in_thunk;
+ Abi.abi_emit_inline_memcpy = inline_memcpy;
+
+ Abi.abi_activate = activate_glue;
+ Abi.abi_yield = yield_glue;
+ Abi.abi_unwind = unwind_glue;
+ Abi.abi_get_next_pc_thunk = Some get_next_pc_thunk;
+
+ Abi.abi_sp_reg = (Il.Hreg esp);
+ Abi.abi_fp_reg = (Il.Hreg ebp);
+ Abi.abi_dwarf_fp_reg = dwarf_ebp;
+ Abi.abi_tp_cell = task_ptr;
+ Abi.abi_frame_base_sz = frame_base_sz;
+ Abi.abi_frame_info_sz = frame_info_sz;
+ Abi.abi_implicit_args_sz = implicit_args_sz;
+ Abi.abi_spill_slot = spill_slot;
+ }
+
+
+(*
+ * NB: factor the instruction selector often. There's lots of
+ * semi-redundancy in the ISA.
+ *)
+
+
+let imm_is_signed_byte (n:int64) : bool =
+ (i64_le (-128L) n) && (i64_le n 127L)
+;;
+
+let imm_is_unsigned_byte (n:int64) : bool =
+ (i64_le (0L) n) && (i64_le n 255L)
+;;
+
+
+let rm_r (c:Il.cell) (r:int) : Asm.frag =
+ let reg_ebp = 6 in
+ let reg_esp = 7 in
+
+ (*
+ * We do a little contortion here to accommodate the special case of
+ * being asked to form esp-relative addresses; these require SIB
+ * bytes on x86. Of course!
+ *)
+ let sib_esp_base = Asm.BYTE 0x24 in
+ let seq1 rm modrm =
+ if rm = reg_esp
+ then Asm.SEQ [| modrm; sib_esp_base |]
+ else modrm
+ in
+ let seq2 rm modrm disp =
+ if rm = reg_esp
+ then Asm.SEQ [| modrm; sib_esp_base; disp |]
+ else Asm.SEQ [| modrm; disp |]
+ in
+
+ match c with
+ Il.Reg ((Il.Hreg rm), _) ->
+ Asm.BYTE (modrm_reg (reg rm) r)
+ | Il.Mem (a, _) ->
+ begin
+ match a with
+ Il.Abs disp ->
+ Asm.SEQ [| Asm.BYTE (modrm_deref_disp32 r);
+ Asm.WORD (TY_i32, disp) |]
+
+ | Il.RegIn ((Il.Hreg rm), None) when rm != reg_ebp ->
+ seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r))
+
+ | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM 0L))
+ when rm != reg_ebp ->
+ seq1 rm (Asm.BYTE (modrm_deref_reg (reg rm) r))
+
+ (* The next two are just to save the relaxation system some
+ * churn.
+ *)
+
+ | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n))
+ when imm_is_signed_byte n ->
+ seq2 rm
+ (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r))
+ (Asm.WORD (TY_i8, Asm.IMM n))
+
+ | Il.RegIn ((Il.Hreg rm), Some (Asm.IMM n)) ->
+ seq2 rm
+ (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r))
+ (Asm.WORD (TY_i32, Asm.IMM n))
+
+ | Il.RegIn ((Il.Hreg rm), Some disp) ->
+ Asm.new_relaxation
+ [|
+ seq2 rm
+ (Asm.BYTE (modrm_deref_reg_plus_disp32 (reg rm) r))
+ (Asm.WORD (TY_i32, disp));
+ seq2 rm
+ (Asm.BYTE (modrm_deref_reg_plus_disp8 (reg rm) r))
+ (Asm.WORD (TY_i8, disp))
+ |]
+ | _ -> raise Unrecognized
+ end
+ | _ -> raise Unrecognized
+;;
+
+
+let insn_rm_r (op:int) (c:Il.cell) (r:int) : Asm.frag =
+ Asm.SEQ [| Asm.BYTE op; rm_r c r |]
+;;
+
+
+let insn_rm_r_imm
+ (op:int)
+ (c:Il.cell)
+ (r:int)
+ (ty:ty_mach)
+ (i:Asm.expr64)
+ : Asm.frag =
+ Asm.SEQ [| Asm.BYTE op; rm_r c r; Asm.WORD (ty, i) |]
+;;
+
+let insn_rm_r_imm_s8_s32
+ (op8:int)
+ (op32:int)
+ (c:Il.cell)
+ (r:int)
+ (i:Asm.expr64)
+ : Asm.frag =
+ match i with
+ Asm.IMM n when imm_is_signed_byte n ->
+ insn_rm_r_imm op8 c r TY_i8 i
+ | _ ->
+ Asm.new_relaxation
+ [|
+ insn_rm_r_imm op32 c r TY_i32 i;
+ insn_rm_r_imm op8 c r TY_i8 i
+ |]
+;;
+
+let insn_rm_r_imm_u8_u32
+ (op8:int)
+ (op32:int)
+ (c:Il.cell)
+ (r:int)
+ (i:Asm.expr64)
+ : Asm.frag =
+ match i with
+ Asm.IMM n when imm_is_unsigned_byte n ->
+ insn_rm_r_imm op8 c r TY_u8 i
+ | _ ->
+ Asm.new_relaxation
+ [|
+ insn_rm_r_imm op32 c r TY_u32 i;
+ insn_rm_r_imm op8 c r TY_u8 i
+ |]
+;;
+
+
+let insn_pcrel_relax
+ (op8_frag:Asm.frag)
+ (op32_frag:Asm.frag)
+ (fix:fixup)
+ : Asm.frag =
+ let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in
+ let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in
+ let pcrel_expr = (Asm.SUB (Asm.M_POS fix,
+ Asm.M_POS pcrel_mark_fixup))
+ in
+ Asm.new_relaxation
+ [|
+ Asm.SEQ [| op32_frag; Asm.WORD (TY_i32, pcrel_expr); def |];
+ Asm.SEQ [| op8_frag; Asm.WORD (TY_i8, pcrel_expr); def |];
+ |]
+;;
+
+let insn_pcrel_simple (op32:int) (fix:fixup) : Asm.frag =
+ let pcrel_mark_fixup = new_fixup "pcrel mark fixup" in
+ let def = Asm.DEF (pcrel_mark_fixup, Asm.MARK) in
+ let pcrel_expr = (Asm.SUB (Asm.M_POS fix,
+ Asm.M_POS pcrel_mark_fixup))
+ in
+ Asm.SEQ [| Asm.BYTE op32; Asm.WORD (TY_i32, pcrel_expr); def |]
+;;
+
+let insn_pcrel (op8:int) (op32:int) (fix:fixup) : Asm.frag =
+ insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTE op32) fix
+;;
+
+let insn_pcrel_prefix32
+ (op8:int)
+ (prefix32:int)
+ (op32:int)
+ (fix:fixup)
+ : Asm.frag =
+ insn_pcrel_relax (Asm.BYTE op8) (Asm.BYTES [| prefix32; op32 |]) fix
+;;
+
+(* FIXME: tighten imm-based dispatch by imm type. *)
+let cmp (a:Il.operand) (b:Il.operand) : Asm.frag =
+ match (a,b) with
+ (Il.Cell c, Il.Imm (i, TY_i8)) when is_rm8 c ->
+ insn_rm_r_imm 0x80 c slash7 TY_i8 i
+ | (Il.Cell c, Il.Imm (i, TY_u8)) when is_rm8 c ->
+ insn_rm_r_imm 0x80 c slash7 TY_u8 i
+ | (Il.Cell c, Il.Imm (i, _)) when is_rm32 c ->
+ (*
+ * NB: We can't switch on signed-ness here, as 'cmp' is
+ * defined to sign-extend its operand; i.e. we have to treat
+ * it as though you're emitting a signed byte (in the sense of
+ * immediate-size selection) even if the incoming value is
+ * unsigned.
+ *)
+ insn_rm_r_imm_s8_s32 0x83 0x81 c slash7 i
+ | (Il.Cell c, Il.Cell (Il.Reg (Il.Hreg r, _))) ->
+ insn_rm_r 0x39 c (reg r)
+ | (Il.Cell (Il.Reg (Il.Hreg r, _)), Il.Cell c) ->
+ insn_rm_r 0x3b c (reg r)
+ | _ -> raise Unrecognized
+;;
+
+let zero (dst:Il.cell) (count:Il.operand) : Asm.frag =
+ match (dst, count) with
+
+ ((Il.Mem (Il.RegIn ((Il.Hreg dst_ptr), None), _)),
+ Il.Cell (Il.Reg ((Il.Hreg count), _)))
+ when dst_ptr = edi && count = ecx ->
+ Asm.BYTES [|
+ 0xb0; 0x0; (* mov %eax, 0 : move a zero into al. *)
+ 0xf3; 0xaa; (* rep stos m8 : fill ecx bytes at [edi] with al *)
+ |]
+
+ | _ -> raise Unrecognized
+;;
+
+let mov (signed:bool) (dst:Il.cell) (src:Il.operand) : Asm.frag =
+ if is_ty8 (Il.cell_scalar_ty dst) || is_ty8 (Il.operand_scalar_ty src)
+ then
+ begin
+ (match dst with
+ Il.Reg (Il.Hreg r, _)
+ -> assert (is_ok_r8 r) | _ -> ());
+ (match src with
+ Il.Cell (Il.Reg (Il.Hreg r, _))
+ -> assert (is_ok_r8 r) | _ -> ());
+ end;
+
+ match (signed, dst, src) with
+
+ (* m8 <- r??, r8 or truncate(r32). *)
+ (_, _, Il.Cell (Il.Reg ((Il.Hreg r), _)))
+ when is_m8 dst ->
+ insn_rm_r 0x88 dst (reg r)
+
+ (* r8 <- r8: treat as r32 <- r32. *)
+ | (_, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell)
+ when is_r8 dst && is_r8 src_cell ->
+ insn_rm_r 0x8b src_cell (reg r)
+
+ (* rm32 <- r32 *)
+ | (_, _, Il.Cell (Il.Reg ((Il.Hreg r), src_ty)))
+ when (is_r8 dst || is_rm32 dst) && is_ty32 src_ty ->
+ insn_rm_r 0x89 dst (reg r)
+
+ (* r32 <- rm32 *)
+ | (_, (Il.Reg ((Il.Hreg r), dst_ty)), Il.Cell src_cell)
+ when is_ty32 dst_ty && is_rm32 src_cell ->
+ insn_rm_r 0x8b src_cell (reg r)
+
+ (* MOVZX: r8/r32 <- zx(rm8) *)
+ | (false, Il.Reg ((Il.Hreg r, _)), Il.Cell src_cell)
+ when (is_r8 dst || is_r32 dst) && is_rm8 src_cell ->
+ Asm.SEQ [| Asm.BYTE 0x0f;
+ insn_rm_r 0xb6 src_cell (reg r) |]
+
+ (* MOVZX: m32 <- zx(r8) *)
+ | (false, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell)))
+ when (is_m32 dst) && is_r8 src_cell ->
+ (* Fake with 2 insns:
+ *
+ * movzx r32 <- r8; (in-place zero-extension)
+ * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL)
+ *)
+ Asm.SEQ [| Asm.BYTE 0x0f;
+ insn_rm_r 0xb6 src_cell (reg r);
+ insn_rm_r 0x89 dst (reg r);
+ |]
+
+ (* MOVSX: r8/r32 <- sx(rm8) *)
+ | (true, Il.Reg ((Il.Hreg r), _), Il.Cell src_cell)
+ when (is_r8 dst || is_r32 dst) && is_rm8 src_cell ->
+ Asm.SEQ [| Asm.BYTE 0x0f;
+ insn_rm_r 0xbe src_cell (reg r) |]
+
+ (* MOVSX: m32 <- sx(r8) *)
+ | (true, _, (Il.Cell (Il.Reg ((Il.Hreg r), _) as src_cell)))
+ when (is_m32 dst) && is_r8 src_cell ->
+ (* Fake with 2 insns:
+ *
+ * movsx r32 <- r8; (in-place sign-extension)
+ * mov m32 <- r32; (NB: must happen in AL/CL/DL/BL)
+ *)
+ Asm.SEQ [| Asm.BYTE 0x0f;
+ insn_rm_r 0xbe src_cell (reg r);
+ insn_rm_r 0x89 dst (reg r);
+ |]
+
+ (* m8 <- imm8 (signed) *)
+ | (_, _, Il.Imm ((Asm.IMM n), _))
+ when is_m8 dst && imm_is_signed_byte n && signed ->
+ insn_rm_r_imm 0xc6 dst slash0 TY_i8 (Asm.IMM n)
+
+ (* m8 <- imm8 (unsigned) *)
+ | (_, _, Il.Imm ((Asm.IMM n), _))
+ when is_m8 dst && imm_is_unsigned_byte n && (not signed) ->
+ insn_rm_r_imm 0xc6 dst slash0 TY_u8 (Asm.IMM n)
+
+ (* rm32 <- imm32 *)
+ | (_, _, Il.Imm (i, _)) when is_rm32 dst || is_r8 dst ->
+ let t = if signed then TY_u32 else TY_i32 in
+ insn_rm_r_imm 0xc7 dst slash0 t i
+
+ | _ -> raise Unrecognized
+;;
+
+
+let lea (dst:Il.cell) (src:Il.operand) : Asm.frag =
+ match (dst, src) with
+ (Il.Reg ((Il.Hreg r), dst_ty),
+ Il.Cell (Il.Mem (mem, _)))
+ when is_ty32 dst_ty ->
+ insn_rm_r 0x8d (Il.Mem (mem, Il.OpaqueTy)) (reg r)
+
+ | (Il.Reg ((Il.Hreg r), dst_ty),
+ Il.ImmPtr (fix, _))
+ when is_ty32 dst_ty && r = eax ->
+ let anchor = new_fixup "anchor" in
+ let fix_off = Asm.SUB ((Asm.M_POS fix),
+ (Asm.M_POS anchor))
+ in
+ (* NB: These instructions must come as a
+ * cluster, w/o any separation.
+ *)
+ Asm.SEQ [|
+ insn_pcrel_simple 0xe8 get_next_pc_thunk_fixup;
+ Asm.DEF (anchor, insn_rm_r_imm 0x81 dst slash0 TY_i32 fix_off);
+ |]
+
+ | _ -> raise Unrecognized
+;;
+
+
+let select_insn_misc (q:Il.quad') : Asm.frag =
+
+ match q with
+ Il.Call c ->
+ begin
+ match c.Il.call_dst with
+ Il.Reg ((Il.Hreg dst), _) when dst = eax ->
+ begin
+ match c.Il.call_targ with
+
+ Il.CodePtr (Il.Cell c)
+ when Il.cell_referent_ty c
+ = Il.ScalarTy (Il.AddrTy Il.CodeTy) ->
+ insn_rm_r 0xff c slash2
+
+ | Il.CodePtr (Il.ImmPtr (f, Il.CodeTy)) ->
+ insn_pcrel_simple 0xe8 f
+
+ | _ -> raise Unrecognized
+ end
+ | _ -> raise Unrecognized
+ end
+
+ | Il.Push (Il.Cell (Il.Reg ((Il.Hreg r), t))) when is_ty32 t ->
+ Asm.BYTE (0x50 + (reg r))
+
+ | Il.Push (Il.Cell c) when is_rm32 c ->
+ insn_rm_r 0xff c slash6
+
+ | Il.Push (Il.Imm (Asm.IMM i, _)) when imm_is_unsigned_byte i ->
+ Asm.SEQ [| Asm.BYTE 0x6a; Asm.WORD (TY_u8, (Asm.IMM i)) |]
+
+ | Il.Push (Il.Imm (i, _)) ->
+ Asm.SEQ [| Asm.BYTE 0x68; Asm.WORD (TY_u32, i) |]
+
+ | Il.Pop (Il.Reg ((Il.Hreg r), t)) when is_ty32 t ->
+ Asm.BYTE (0x58 + (reg r))
+
+ | Il.Pop c when is_rm32 c ->
+ insn_rm_r 0x8f c slash0
+
+ | Il.Ret -> Asm.BYTE 0xc3
+
+ | Il.Jmp j ->
+ begin
+ match (j.Il.jmp_op, j.Il.jmp_targ) with
+
+ (Il.JMP, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) ->
+ insn_pcrel 0xeb 0xe9 f
+
+ | (Il.JMP, Il.CodePtr (Il.Cell c))
+ when Il.cell_referent_ty c
+ = Il.ScalarTy (Il.AddrTy Il.CodeTy) ->
+ insn_rm_r 0xff c slash4
+
+ (* FIXME: refactor this to handle cell-based jumps
+ * if we ever need them. So far not. *)
+ | (_, Il.CodePtr (Il.ImmPtr (f, Il.CodeTy))) ->
+ let (op8, op32) =
+ match j.Il.jmp_op with
+ | Il.JC -> (0x72, 0x82)
+ | Il.JNC -> (0x73, 0x83)
+ | Il.JZ -> (0x74, 0x84)
+ | Il.JNZ -> (0x75, 0x85)
+ | Il.JO -> (0x70, 0x80)
+ | Il.JNO -> (0x71, 0x81)
+ | Il.JE -> (0x74, 0x84)
+ | Il.JNE -> (0x75, 0x85)
+
+ | Il.JL -> (0x7c, 0x8c)
+ | Il.JLE -> (0x7e, 0x8e)
+ | Il.JG -> (0x7f, 0x8f)
+ | Il.JGE -> (0x7d, 0x8d)
+
+ | Il.JB -> (0x72, 0x82)
+ | Il.JBE -> (0x76, 0x86)
+ | Il.JA -> (0x77, 0x87)
+ | Il.JAE -> (0x73, 0x83)
+ | _ -> raise Unrecognized
+ in
+ let prefix32 = 0x0f in
+ insn_pcrel_prefix32 op8 prefix32 op32 f
+
+ | _ -> raise Unrecognized
+ end
+
+ | Il.Dead -> Asm.MARK
+ | Il.Debug -> Asm.BYTES [| 0xcc |] (* int 3 *)
+ | Il.Regfence -> Asm.MARK
+ | Il.End -> Asm.BYTES [| 0x90 |]
+ | Il.Nop -> Asm.BYTES [| 0x90 |]
+ | _ -> raise Unrecognized
+;;
+
+
+type alu_binop_codes =
+ {
+ insn: string;
+ immslash: int; (* mod/rm "slash" code for imm-src variant *)
+ rm_dst_op8: int; (* opcode for 8-bit r/m dst variant *)
+ rm_dst_op32: int; (* opcode for 32-bit r/m dst variant *)
+ rm_src_op8: int; (* opcode for 8-bit r/m src variant *)
+ rm_src_op32: int; (* opcode for 32-bit r/m src variant *)
+ }
+;;
+
+let alu_binop
+ (dst:Il.cell) (src:Il.operand) (codes:alu_binop_codes)
+ : Asm.frag =
+ match (dst, src) with
+ (Il.Reg ((Il.Hreg r), dst_ty), Il.Cell c)
+ when (is_ty32 dst_ty && is_rm32 c) || (is_ty8 dst_ty && is_rm8 c)
+ -> insn_rm_r codes.rm_src_op32 c (reg r)
+
+ | (_, Il.Cell (Il.Reg ((Il.Hreg r), src_ty)))
+ when (is_rm32 dst && is_ty32 src_ty) || (is_rm8 dst && is_ty8 src_ty)
+ -> insn_rm_r codes.rm_dst_op32 dst (reg r)
+
+ | (_, Il.Imm (i, _)) when is_rm32 dst || is_rm8 dst
+ -> insn_rm_r_imm_s8_s32 0x83 0x81 dst codes.immslash i
+
+ | _ -> raise Unrecognized
+;;
+
+
+let mul_like (src:Il.operand) (signed:bool) (slash:int)
+ : Asm.frag =
+ match src with
+ Il.Cell src when is_rm32 src ->
+ insn_rm_r 0xf7 src slash
+
+ | Il.Cell src when is_rm8 src ->
+ insn_rm_r 0xf6 src slash
+
+ | Il.Imm (_, TY_u32)
+ | Il.Imm (_, TY_i32) ->
+ let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits32) in
+ Asm.SEQ [| mov signed tmp src;
+ insn_rm_r 0xf7 tmp slash |]
+
+ | Il.Imm (_, TY_u8)
+ | Il.Imm (_, TY_i8) ->
+ let tmp = Il.Reg ((Il.Hreg edx), Il.ValTy Il.Bits8) in
+ Asm.SEQ [| mov signed tmp src;
+ insn_rm_r 0xf6 tmp slash |]
+
+ | _ -> raise Unrecognized
+;;
+
+
+let select_insn (q:Il.quad) : Asm.frag =
+ match q.Il.quad_body with
+ Il.Unary u ->
+ let unop s =
+ if u.Il.unary_src = Il.Cell u.Il.unary_dst
+ then insn_rm_r 0xf7 u.Il.unary_dst s
+ else raise Unrecognized
+ in
+ begin
+ match u.Il.unary_op with
+ Il.UMOV -> mov false u.Il.unary_dst u.Il.unary_src
+ | Il.IMOV -> mov true u.Il.unary_dst u.Il.unary_src
+ | Il.NEG -> unop slash3
+ | Il.NOT -> unop slash2
+ | Il.ZERO -> zero u.Il.unary_dst u.Il.unary_src
+ end
+
+ | Il.Lea le -> lea le.Il.lea_dst le.Il.lea_src
+
+ | Il.Cmp c -> cmp c.Il.cmp_lhs c.Il.cmp_rhs
+
+ | Il.Binary b ->
+ begin
+ if Il.Cell b.Il.binary_dst = b.Il.binary_lhs
+ then
+ let binop = alu_binop b.Il.binary_dst b.Il.binary_rhs in
+ let mulop = mul_like b.Il.binary_rhs in
+
+ let divop signed slash =
+ Asm.SEQ [|
+ (* xor edx edx, then mul_like. *)
+ insn_rm_r 0x33 (rc edx) (reg edx);
+ mul_like b.Il.binary_rhs signed slash
+ |]
+ in
+
+ let modop signed slash =
+ Asm.SEQ [|
+ (* divop, then mov remainder to eax instead. *)
+ divop signed slash;
+ mov false (rc eax) (ro edx)
+ |]
+ in
+
+ let shiftop slash =
+ let src = b.Il.binary_rhs in
+ let dst = b.Il.binary_dst in
+ let mask i = Asm.AND (i, Asm.IMM 0xffL) in
+ if is_rm8 dst
+ then
+ match src with
+ Il.Imm (i, _) ->
+ insn_rm_r_imm 0xC0 dst slash TY_u8 (mask i)
+ | Il.Cell (Il.Reg ((Il.Hreg r), _))
+ when r = ecx ->
+ Asm.SEQ [| Asm.BYTE 0xD2; rm_r dst slash |]
+ | _ -> raise Unrecognized
+ else
+ match src with
+ Il.Imm (i, _) ->
+ insn_rm_r_imm 0xC1 dst slash TY_u8 (mask i)
+ | Il.Cell (Il.Reg ((Il.Hreg r), _))
+ when r = ecx ->
+ Asm.SEQ [| Asm.BYTE 0xD3; rm_r dst slash |]
+ | _ -> raise Unrecognized
+ in
+
+ match (b.Il.binary_dst, b.Il.binary_op) with
+ (_, Il.ADD) -> binop { insn="ADD";
+ immslash=slash0;
+ rm_dst_op8=0x0;
+ rm_dst_op32=0x1;
+ rm_src_op8=0x2;
+ rm_src_op32=0x3; }
+ | (_, Il.SUB) -> binop { insn="SUB";
+ immslash=slash5;
+ rm_dst_op8=0x28;
+ rm_dst_op32=0x29;
+ rm_src_op8=0x2a;
+ rm_src_op32=0x2b; }
+ | (_, Il.AND) -> binop { insn="AND";
+ immslash=slash4;
+ rm_dst_op8=0x20;
+ rm_dst_op32=0x21;
+ rm_src_op8=0x22;
+ rm_src_op32=0x23; }
+ | (_, Il.OR) -> binop { insn="OR";
+ immslash=slash1;
+ rm_dst_op8=0x08;
+ rm_dst_op32=0x09;
+ rm_src_op8=0x0a;
+ rm_src_op32=0x0b; }
+ | (_, Il.XOR) -> binop { insn="XOR";
+ immslash=slash6;
+ rm_dst_op8=0x30;
+ rm_dst_op32=0x31;
+ rm_src_op8=0x32;
+ rm_src_op32=0x33; }
+
+ | (_, Il.LSL) -> shiftop slash4
+ | (_, Il.LSR) -> shiftop slash5
+ | (_, Il.ASR) -> shiftop slash7
+
+ | (Il.Reg (Il.Hreg r, t), Il.UMUL)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ mulop false slash4
+
+ | (Il.Reg (Il.Hreg r, t), Il.IMUL)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ mulop true slash5
+
+ | (Il.Reg (Il.Hreg r, t), Il.UDIV)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ divop false slash6
+
+ | (Il.Reg (Il.Hreg r, t), Il.IDIV)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ divop true slash7
+
+ | (Il.Reg (Il.Hreg r, t), Il.UMOD)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ modop false slash6
+
+ | (Il.Reg (Il.Hreg r, t), Il.IMOD)
+ when (is_ty32 t || is_ty8 t) && r = eax ->
+ modop true slash7
+
+ | _ -> raise Unrecognized
+ else raise Unrecognized
+ end
+ | _ -> select_insn_misc q.Il.quad_body
+;;
+
+
+let new_emitter_without_vregs _ : Il.emitter =
+ Il.new_emitter
+ abi.Abi.abi_prealloc_quad
+ abi.Abi.abi_is_2addr_machine
+ false None
+;;
+
+let select_insns (sess:Session.sess) (q:Il.quads) : Asm.frag =
+ let scopes = Stack.create () in
+ let fixups = Stack.create () in
+ let pop_frags _ =
+ Asm.SEQ (Array.of_list
+ (List.rev
+ (!(Stack.pop scopes))))
+ in
+ ignore (Stack.push (ref []) scopes);
+ for i = 0 to (Array.length q) - 1 do
+ let append frag =
+ let frags = Stack.top scopes in
+ frags := frag :: (!frags)
+ in
+ begin
+ match q.(i).Il.quad_fixup with
+ None -> ()
+ | Some f -> append (Asm.DEF (f, Asm.MARK))
+ end;
+ begin
+ match q.(i).Il.quad_body with
+ Il.Enter f ->
+ Stack.push f fixups;
+ Stack.push (ref []) scopes;
+ | Il.Leave ->
+ append (Asm.DEF (Stack.pop fixups, pop_frags ()))
+ | _ ->
+ try
+ append (select_insn q.(i))
+ with
+ Unrecognized ->
+ Session.fail sess
+ "E:Assembly error: unrecognized quad: %s\n%!"
+ (Il.string_of_quad reg_str q.(i));
+ ()
+ end
+ done;
+ pop_frags()
+;;
+
+let frags_of_emitted_quads (sess:Session.sess) (e:Il.emitter) : Asm.frag =
+ let frag = select_insns sess e.Il.emit_quads in
+ if sess.Session.sess_failed
+ then raise Unrecognized
+ else frag
+;;
+
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)