diff options
| author | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
|---|---|---|
| committer | Graydon Hoare <[email protected]> | 2010-06-23 21:03:09 -0700 |
| commit | d6b7c96c3eb29b9244ece0c046d3f372ff432d04 (patch) | |
| tree | b425187e232966063ffc2f0d14c04a55d8f004ef /src/boot/be/pe.ml | |
| parent | Initial git commit. (diff) | |
| download | rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.tar.xz rust-d6b7c96c3eb29b9244ece0c046d3f372ff432d04.zip | |
Populate tree.
Diffstat (limited to 'src/boot/be/pe.ml')
| -rw-r--r-- | src/boot/be/pe.ml | 1149 |
1 files changed, 1149 insertions, 0 deletions
diff --git a/src/boot/be/pe.ml b/src/boot/be/pe.ml new file mode 100644 index 00000000..d360ddf5 --- /dev/null +++ b/src/boot/be/pe.ml @@ -0,0 +1,1149 @@ +(* + + Module for writing Microsoft PE files + + Every image has a base address it's to be loaded at. + + "file pointer" = offset in file + + "VA" = address at runtime + + "RVA" = VA - base address + + If you write a non-RVA absolute address at any point you must put it + in a rebasing list so the loader can adjust it when/if it has to load + you at a different address. + + Almost all addresses in the file are RVAs. Worry about the VAs. + +*) + +open Asm;; +open Common;; + +let log (sess:Session.sess) = + Session.log "obj (pe)" + sess.Session.sess_log_obj + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_obj + then thunk () + else () +;; + +(* + + The default image base (VA) for an executable on Win32 is 0x400000. + + We use this too. RVAs are relative to this. RVA 0 = VA 0x400000. + + Alignments are also relatively standard and fixed for Win32/PE32: + 4k memory pages, 512 byte disk sectors. + + Since this is a stupid emitter, and we're not generating an awful + lot of sections, we are not going to differentiate between these + two kinds of alignment: we just align our sections to memory pages + and sometimes waste most of them. Shucks. + +*) + +let pe_image_base = 0x400000L;; +let pe_file_alignment = 0x200;; +let pe_mem_alignment = 0x1000;; + +let rva (f:fixup) = (SUB ((M_POS f), (IMM pe_image_base)));; + +let def_file_aligned f i = + ALIGN_FILE + (pe_file_alignment, + SEQ [| + DEF(f, + SEQ [| i; + ALIGN_FILE + (pe_file_alignment, MARK) |]) |] ) +;; + +let def_mem_aligned f i = + ALIGN_MEM + (pe_mem_alignment, + SEQ [| + DEF(f, + SEQ [| i; + ALIGN_MEM + (pe_mem_alignment, MARK) |]) |] ) +;; + +let align_both i = + ALIGN_FILE (pe_file_alignment, + (ALIGN_MEM (pe_mem_alignment, i))) +;; + +let def_aligned f i = + align_both + (SEQ [| DEF(f,i); + (align_both MARK)|]) +;; + + +(* + + At the beginning of a PE file there is an MS-DOS stub, 0x00 - 0x7F, + that we just insert literally. It prints "This program must be run + under Win32" and exits. Woo! + + Within it, at offset 0x3C, there is an encoded offset of the PE + header we actually care about. So 0x3C - 0x3F are 0x00000100 (LE) + which say "the PE header is actually at 0x100", a nice sensible spot + for it. We pad the next 128 bytes out to 0x100 and start there for + real. + + From then on in it's a sensible object file. Here's the MS-DOS bit. +*) + +let pe_msdos_header_and_padding + : frag = + SEQ [| + BYTES + [| + (* 00000000 *) + 0x4d; 0x5a; 0x50; 0x00; 0x02; 0x00; 0x00; 0x00; + 0x04; 0x00; 0x0f; 0x00; 0xff; 0xff; 0x00; 0x00; + + (* 00000010 *) + 0xb8; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + 0x40; 0x00; 0x1a; 0x00; 0x00; 0x00; 0x00; 0x00; + + (* 00000020 *) + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + + (* 00000030 *) + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + 0x00; 0x00; 0x00; 0x00; 0x00; 0x01; 0x00; 0x00; + (* ^^^^PE HDR offset^^^^^ *) + + (* 00000040 *) + 0xba; 0x10; 0x00; 0x0e; 0x1f; 0xb4; 0x09; 0xcd; + 0x21; 0xb8; 0x01; 0x4c; 0xcd; 0x21; 0x90; 0x90; + + (* 00000050 *) + 0x54; 0x68; 0x69; 0x73; 0x20; 0x70; 0x72; 0x6f; (* "This pro" *) + 0x67; 0x72; 0x61; 0x6d; 0x20; 0x6d; 0x75; 0x73; (* "gram mus" *) + + (* 00000060 *) + 0x74; 0x20; 0x62; 0x65; 0x20; 0x72; 0x75; 0x6e; (* "t be run" *) + 0x20; 0x75; 0x6e; 0x64; 0x65; 0x72; 0x20; 0x57; (* " under W" *) + + (* 00000070 *) + 0x69; 0x6e; 0x33; 0x32; 0x0d; 0x0a; 0x24; 0x37; (* "in32\r\n" *) + 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; + |]; + PAD 0x80 + |] +;; + +(* + A work of art, is it not? Take a moment to appreciate the madness. + + All done? Ok, now on to the PE header proper. + + PE headers are just COFF headers with a little preamble. +*) + +type pe_machine = + (* Maybe support more later. *) + IMAGE_FILE_MACHINE_AMD64 + | IMAGE_FILE_MACHINE_I386 +;; + + +let pe_timestamp _ = + Int64.of_float (Unix.gettimeofday()) +;; + + +type pe_characteristics = + (* Maybe support more later. *) + IMAGE_FILE_RELOCS_STRIPPED + | IMAGE_FILE_EXECUTABLE_IMAGE + | IMAGE_FILE_LINE_NUMS_STRIPPED + | IMAGE_FILE_LOCAL_SYMS_STRIPPED + | IMAGE_FILE_32BIT_MACHINE + | IMAGE_FILE_DEBUG_STRIPPED + | IMAGE_FILE_DLL +;; + + +let pe_header + ~(machine:pe_machine) + ~(symbol_table_fixup:fixup) + ~(number_of_sections:int64) + ~(number_of_symbols:int64) + ~(loader_hdr_fixup:fixup) + ~(characteristics:pe_characteristics list) + : frag = + ALIGN_FILE + (8, + SEQ [| + STRING "PE\x00\x00"; + WORD (TY_u16, (IMM (match machine with + IMAGE_FILE_MACHINE_AMD64 -> 0x8664L + | IMAGE_FILE_MACHINE_I386 -> 0x014cL))); + WORD (TY_u16, (IMM number_of_sections)); + WORD (TY_u32, (IMM (pe_timestamp()))); + WORD (TY_u32, (F_POS symbol_table_fixup)); + WORD (TY_u32, (IMM number_of_symbols)); + WORD (TY_u16, (F_SZ loader_hdr_fixup)); + WORD (TY_u16, (IMM (fold_flags + (fun c -> match c with + IMAGE_FILE_RELOCS_STRIPPED -> 0x1L + | IMAGE_FILE_EXECUTABLE_IMAGE -> 0x2L + | IMAGE_FILE_LINE_NUMS_STRIPPED -> 0x4L + | IMAGE_FILE_LOCAL_SYMS_STRIPPED -> 0x8L + | IMAGE_FILE_32BIT_MACHINE -> 0x100L + | IMAGE_FILE_DEBUG_STRIPPED -> 0x200L + | IMAGE_FILE_DLL -> 0x2000L) + characteristics))) + |]) +;; + +(* + + After the PE header comes an "optional" header for the loader. In + our case this is hardly optional since we are producing a file for + the loader. + +*) + +type pe_subsystem = + (* Maybe support more later. *) + IMAGE_SUBSYSTEM_WINDOWS_GUI + | IMAGE_SUBSYSTEM_WINDOWS_CUI +;; + +let zero32 = WORD (TY_u32, (IMM 0L)) +;; + +let pe_loader_header + ~(text_fixup:fixup) + ~(init_data_fixup:fixup) + ~(size_of_uninit_data:int64) + ~(entry_point_fixup:fixup option) + ~(image_fixup:fixup) + ~(all_hdrs_fixup:fixup) + ~(subsys:pe_subsystem) + ~(loader_hdr_fixup:fixup) + ~(import_dir_fixup:fixup) + ~(export_dir_fixup:fixup) + : frag = + DEF + (loader_hdr_fixup, + SEQ [| + WORD (TY_u16, (IMM 0x10bL)); (* COFF magic tag for PE32. *) + (* Snagged *) + WORD (TY_u8, (IMM 0x2L)); (* Linker major version. *) + WORD (TY_u8, (IMM 0x38L)); (* Linker minor version. *) + + WORD (TY_u32, (F_SZ text_fixup)); (* "size of code" *) + WORD (TY_u32, (* "size of all init data" *) + (F_SZ init_data_fixup)); + WORD (TY_u32, + (IMM size_of_uninit_data)); + + begin + match entry_point_fixup with + None -> zero32 (* Library mode: DLLMain *) + | Some entry_point_fixup -> + WORD (TY_u32, + (rva + entry_point_fixup)) (* "address of entry point" *) + end; + + WORD (TY_u32, (rva text_fixup)); (* "base of code" *) + WORD (TY_u32, (rva init_data_fixup)); (* "base of data" *) + WORD (TY_u32, (IMM pe_image_base)); + WORD (TY_u32, (IMM (Int64.of_int + pe_mem_alignment))); + WORD (TY_u32, (IMM (Int64.of_int + pe_file_alignment))); + + WORD (TY_u16, (IMM 4L)); (* Major OS version: NT4. *) + WORD (TY_u16, (IMM 0L)); (* Minor OS version. *) + WORD (TY_u16, (IMM 1L)); (* Major image version. *) + WORD (TY_u16, (IMM 0L)); (* Minor image version. *) + WORD (TY_u16, (IMM 4L)); (* Major subsystem version. *) + WORD (TY_u16, (IMM 0L)); (* Minor subsystem version. *) + + zero32; (* Reserved. *) + + WORD (TY_u32, (M_SZ image_fixup)); + WORD (TY_u32, (M_SZ all_hdrs_fixup)); + + zero32; (* Checksum, but OK if zero. *) + WORD (TY_u16, (IMM (match subsys with + IMAGE_SUBSYSTEM_WINDOWS_GUI -> 2L + | IMAGE_SUBSYSTEM_WINDOWS_CUI -> 3L))); + + WORD (TY_u16, (IMM 0L)); (* DLL characteristics. *) + + WORD (TY_u32, (IMM 0x100000L)); (* Size of stack reserve. *) + WORD (TY_u32, (IMM 0x4000L)); (* Size of stack commit. *) + + WORD (TY_u32, (IMM 0x100000L)); (* Size of heap reserve. *) + WORD (TY_u32, (IMM 0x1000L)); (* Size of heap commit. *) + + zero32; (* Reserved. *) + WORD (TY_u32, (IMM 16L)); (* Number of dir references. *) + + (* Begin directories, variable part of hdr. *) + + (* + + Standard PE files have ~10 directories referenced from + here. We only fill in two of them -- the export/import + directories -- because we don't care about the others. We + leave the rest as zero in case someone is looking for + them. This may be superfluous or wrong. + + *) + + + WORD (TY_u32, (rva export_dir_fixup)); + WORD (TY_u32, (M_SZ export_dir_fixup)); + + WORD (TY_u32, (rva import_dir_fixup)); + WORD (TY_u32, (M_SZ import_dir_fixup)); + + zero32; zero32; (* Resource dir. *) + zero32; zero32; (* Exception dir. *) + zero32; zero32; (* Security dir. *) + zero32; zero32; (* Base reloc dir. *) + zero32; zero32; (* Debug dir. *) + zero32; zero32; (* Image desc dir. *) + zero32; zero32; (* Mach spec dir. *) + zero32; zero32; (* TLS dir. *) + + zero32; zero32; (* Load config. *) + zero32; zero32; (* Bound import. *) + zero32; zero32; (* IAT *) + zero32; zero32; (* Delay import. *) + zero32; zero32; (* COM descriptor *) + zero32; zero32; (* ???????? *) + |]) + +;; + + +type pe_section_id = + (* Maybe support more later. *) + SECTION_ID_TEXT + | SECTION_ID_DATA + | SECTION_ID_RDATA + | SECTION_ID_BSS + | SECTION_ID_IMPORTS + | SECTION_ID_EXPORTS + | SECTION_ID_DEBUG_ARANGES + | SECTION_ID_DEBUG_PUBNAMES + | SECTION_ID_DEBUG_INFO + | SECTION_ID_DEBUG_ABBREV + | SECTION_ID_DEBUG_LINE + | SECTION_ID_DEBUG_FRAME + | SECTION_ID_NOTE_RUST +;; + +type pe_section_characteristics = + (* Maybe support more later. *) + IMAGE_SCN_CNT_CODE + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_CNT_UNINITIALIZED_DATA + | IMAGE_SCN_MEM_DISCARDABLE + | IMAGE_SCN_MEM_SHARED + | IMAGE_SCN_MEM_EXECUTE + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE + +let pe_section_header + ~(id:pe_section_id) + ~(hdr_fixup:fixup) + : frag = + let + characteristics = + match id with + SECTION_ID_TEXT -> [ IMAGE_SCN_CNT_CODE; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_EXECUTE ] + | SECTION_ID_DATA -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_WRITE ] + | SECTION_ID_BSS -> [ IMAGE_SCN_CNT_UNINITIALIZED_DATA; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_WRITE ] + | SECTION_ID_IMPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ; + IMAGE_SCN_MEM_WRITE ] + | SECTION_ID_EXPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ ] + | SECTION_ID_RDATA + | SECTION_ID_DEBUG_ARANGES + | SECTION_ID_DEBUG_PUBNAMES + | SECTION_ID_DEBUG_INFO + | SECTION_ID_DEBUG_ABBREV + | SECTION_ID_DEBUG_LINE + | SECTION_ID_DEBUG_FRAME + | SECTION_ID_NOTE_RUST -> [ IMAGE_SCN_CNT_INITIALIZED_DATA; + IMAGE_SCN_MEM_READ ] + in + SEQ [| + STRING + begin + match id with + SECTION_ID_TEXT -> ".text\x00\x00\x00" + | SECTION_ID_DATA -> ".data\x00\x00\x00" + | SECTION_ID_RDATA -> ".rdata\x00\x00" + | SECTION_ID_BSS -> ".bss\x00\x00\x00\x00" + | SECTION_ID_IMPORTS -> ".idata\x00\x00" + | SECTION_ID_EXPORTS -> ".edata\x00\x00" + + (* There is a bizarre Microsoft COFF extension to account + * for longer-than-8-char section names: you emit a single + * '/' character then the ASCII-numeric encoding of the + * offset within the file's string table of the full name. + * So we put all our extended section names at the + * beginning of the string table in a very specific order + * and hard-wire the offsets as "names" here. You could + * theoretically extend this to a "new kind" of fixup + * reference (ASCII_POS or such), if you feel this is + * something you want to twiddle with. + *) + + | SECTION_ID_DEBUG_ARANGES -> "/4\x00\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_PUBNAMES -> "/19\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_INFO -> "/35\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_ABBREV -> "/47\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_LINE -> "/61\x00\x00\x00\x00\x00" + | SECTION_ID_DEBUG_FRAME -> "/73\x00\x00\x00\x00\x00" + | SECTION_ID_NOTE_RUST -> "/86\x00\x00\x00\x00\x00" + end; + + (* The next two pairs are only supposed to be different if the + file and section alignments differ. This is a stupid emitter + so they're not, no problem. *) + + WORD (TY_u32, (M_SZ hdr_fixup)); (* "Virtual size" *) + WORD (TY_u32, (rva hdr_fixup)); (* "Virtual address" *) + + WORD (TY_u32, (F_SZ hdr_fixup)); (* "Size of raw data" *) + WORD (TY_u32, (F_POS hdr_fixup)); (* "Pointer to raw data" *) + + zero32; (* Reserved. *) + zero32; (* Reserved. *) + zero32; (* Reserved. *) + + WORD (TY_u32, (IMM (fold_flags + (fun c -> match c with + IMAGE_SCN_CNT_CODE -> 0x20L + | IMAGE_SCN_CNT_INITIALIZED_DATA -> 0x40L + | IMAGE_SCN_CNT_UNINITIALIZED_DATA -> 0x80L + | IMAGE_SCN_MEM_DISCARDABLE -> 0x2000000L + | IMAGE_SCN_MEM_SHARED -> 0x10000000L + | IMAGE_SCN_MEM_EXECUTE -> 0x20000000L + | IMAGE_SCN_MEM_READ -> 0x40000000L + | IMAGE_SCN_MEM_WRITE -> 0x80000000L) + characteristics))) + |] +;; + + +(* + + "Thunk" is a misnomer here; the thunk RVA is the address of a word + that the loader will store an address into. The stored address is + the address of the imported object. + + So if the imported object is X, and the thunk slot is Y, the loader + is doing "Y = &X" and returning &Y as the thunk RVA. To load datum X + after the imports are resolved, given the thunk RVA R, you load + **R. + +*) + +type pe_import = + { + pe_import_name_fixup: fixup; + pe_import_name: string; + pe_import_address_fixup: fixup; + } + +type pe_import_dll_entry = + { + pe_import_dll_name_fixup: fixup; + pe_import_dll_name: string; + pe_import_dll_ILT_fixup: fixup; + pe_import_dll_IAT_fixup: fixup; + pe_import_dll_imports: pe_import array; + } + + (* + + The import section .idata has a mostly self-contained table + structure. You feed it a list of DLL entries, each of which names + a DLL and lists symbols in the DLL to import. + + For each named symbol, a 4-byte slot will be reserved in an + "import lookup table" (ILT, also in this section). The slot is + a pointer to a string in this section giving the name. + + Immediately *after* the ILT, there is an "import address table" (IAT), + which is initially identical to the ILT. The loader replaces the entries + in the IAT slots with the imported pointers at runtime. + + A central directory at the start of the section lists all the the import + thunk tables. Each entry in the import directory is 20 bytes (5 words) + but only the last 2 are used: the second last is a pointer to the string + name of the DLL in question (string also in this section) and the last is + a pointer to the import thunk table itself (also in this section). + + Curiously, of the 5 documents I've consulted on the nature of the + first 3 fields, I find a variety of interpretations. + + *) + +let pe_import_section + ~(import_dir_fixup:fixup) + ~(dlls:pe_import_dll_entry array) + : frag = + + let form_dir_entry + (entry:pe_import_dll_entry) + : frag = + SEQ [| + (* Note: documented opinions vary greatly about whether the + first, last, or both of the slots in one of these rows points + to the RVA of the name/hint used to look the import up. This + table format is a mess! *) + WORD (TY_u32, + (rva + entry.pe_import_dll_ILT_fixup)); (* Import lookup table. *) + WORD (TY_u32, (IMM 0L)); (* Timestamp, unused. *) + WORD (TY_u32, (IMM 0x0L)); (* Forwarder chain, unused. *) + WORD (TY_u32, (rva entry.pe_import_dll_name_fixup)); + WORD (TY_u32, + (rva + entry.pe_import_dll_IAT_fixup)); (* Import address table.*) + |] + in + + let form_ILT_slot + (import:pe_import) + : frag = + (WORD (TY_u32, (rva import.pe_import_name_fixup))) + in + + let form_IAT_slot + (import:pe_import) + : frag = + (DEF (import.pe_import_address_fixup, + (WORD (TY_u32, (rva import.pe_import_name_fixup))))) + in + + let form_tables_for_dll + (dll:pe_import_dll_entry) + : frag = + let terminator = WORD (TY_u32, (IMM 0L)) in + let ilt = + SEQ [| + SEQ (Array.map form_ILT_slot dll.pe_import_dll_imports); + terminator + |] + in + let iat = + SEQ [| + SEQ (Array.map form_IAT_slot dll.pe_import_dll_imports); + terminator + |] + in + if Array.length dll.pe_import_dll_imports < 1 + then bug () "Pe.form_tables_for_dll: empty imports" + else + SEQ [| + DEF (dll.pe_import_dll_ILT_fixup, ilt); + DEF (dll.pe_import_dll_IAT_fixup, iat) + |] + + in + + let form_import_string + (import:pe_import) + : frag = + DEF + (import.pe_import_name_fixup, + SEQ [| + (* import string entries begin with a 2-byte "hint", but we just + set it to zero. *) + (WORD (TY_u16, (IMM 0L))); + ZSTRING import.pe_import_name; + (if String.length import.pe_import_name mod 2 == 0 + then PAD 1 + else PAD 0) + |]) + in + + let form_dir_entry_string + (dll:pe_import_dll_entry) + : frag = + DEF + (dll.pe_import_dll_name_fixup, + SEQ [| ZSTRING dll.pe_import_dll_name; + (if String.length dll.pe_import_dll_name mod 2 == 0 + then PAD 1 + else PAD 0); + SEQ (Array.map form_import_string dll.pe_import_dll_imports) |]) + in + + let dir = SEQ (Array.map form_dir_entry dlls) in + let dir_terminator = PAD 20 in + let tables = SEQ (Array.map form_tables_for_dll dlls) in + let strings = SEQ (Array.map form_dir_entry_string dlls) + in + def_aligned + import_dir_fixup + (SEQ + [| + dir; + dir_terminator; + tables; + strings + |]) + +;; + +type pe_export = + { + pe_export_name_fixup: fixup; + pe_export_name: string; + pe_export_address_fixup: fixup; + } +;; + +let pe_export_section + ~(sess:Session.sess) + ~(export_dir_fixup:fixup) + ~(exports:pe_export array) + : frag = + Array.sort (fun a b -> compare a.pe_export_name b.pe_export_name) exports; + let export_addr_table_fixup = new_fixup "export address table" in + let export_addr_table = + DEF + (export_addr_table_fixup, + SEQ + (Array.map + (fun e -> (WORD (TY_u32, rva e.pe_export_address_fixup))) + exports)) + in + let export_name_pointer_table_fixup = + new_fixup "export name pointer table" + in + let export_name_pointer_table = + DEF + (export_name_pointer_table_fixup, + SEQ + (Array.map + (fun e -> (WORD (TY_u32, rva e.pe_export_name_fixup))) + exports)) + in + let export_name_table_fixup = new_fixup "export name table" in + let export_name_table = + DEF + (export_name_table_fixup, + SEQ + (Array.map + (fun e -> (DEF (e.pe_export_name_fixup, + (ZSTRING e.pe_export_name)))) + exports)) + in + let export_ordinal_table_fixup = new_fixup "export ordinal table" in + let export_ordinal_table = + DEF + (export_ordinal_table_fixup, + SEQ + (Array.mapi + (fun i _ -> (WORD (TY_u16, IMM (Int64.of_int (i))))) + exports)) + in + let image_name_fixup = new_fixup "image name fixup" in + let n_exports = IMM (Int64.of_int (Array.length exports)) in + let export_dir_table = + SEQ [| + WORD (TY_u32, IMM 0L); (* Flags, reserved. *) + WORD (TY_u32, IMM 0L); (* Timestamp, unused. *) + WORD (TY_u16, IMM 0L); (* Major vers., unused *) + WORD (TY_u16, IMM 0L); (* Minor vers., unused *) + WORD (TY_u32, rva image_name_fixup); (* Name RVA. *) + WORD (TY_u32, IMM 1L); (* Ordinal base = 1. *) + WORD (TY_u32, n_exports); (* # entries in EAT. *) + WORD (TY_u32, n_exports); (* # entries in ENPT/EOT.*) + WORD (TY_u32, rva export_addr_table_fixup); (* EAT *) + WORD (TY_u32, rva export_name_pointer_table_fixup); (* ENPT *) + WORD (TY_u32, rva export_ordinal_table_fixup); (* EOT *) + |] + in + def_aligned export_dir_fixup + (SEQ [| + export_dir_table; + export_addr_table; + export_name_pointer_table; + export_ordinal_table; + DEF (image_name_fixup, + ZSTRING (Session.filename_of sess.Session.sess_out)); + export_name_table + |]) +;; + +let pe_text_section + ~(sess:Session.sess) + ~(sem:Semant.ctxt) + ~(start_fixup:fixup option) + ~(rust_start_fixup:fixup option) + ~(main_fn_fixup:fixup option) + ~(text_fixup:fixup) + ~(crate_code:frag) + : frag = + let startup = + match (start_fixup, rust_start_fixup, main_fn_fixup) with + (None, _, _) + | (_, None, _) + | (_, _, None) -> MARK + | (Some start_fixup, + Some rust_start_fixup, + Some main_fn_fixup) -> + let e = X86.new_emitter_without_vregs () in + (* + * We are called from the Microsoft C library startup routine, + * and assumed to be stdcall; so we have to clean up our own + * stack before returning. + *) + X86.objfile_start e + ~start_fixup + ~rust_start_fixup + ~main_fn_fixup + ~crate_fixup: sem.Semant.ctxt_crate_fixup + ~indirect_start: true; + X86.frags_of_emitted_quads sess e; + in + def_aligned + text_fixup + (SEQ [| + startup; + crate_code + |]) +;; + +let rustrt_imports sem = + let make_imports_for_lib (lib, tab) = + { + pe_import_dll_name_fixup = new_fixup "dll name"; + pe_import_dll_name = (match lib with + REQUIRED_LIB_rustrt -> "rustrt.dll" + | REQUIRED_LIB_crt -> "msvcrt.dll" + | REQUIRED_LIB_rust ls + | REQUIRED_LIB_c ls -> ls.required_libname); + pe_import_dll_ILT_fixup = new_fixup "dll ILT"; + pe_import_dll_IAT_fixup = new_fixup "dll IAT"; + pe_import_dll_imports = + Array.of_list + (List.map + begin + fun (name, fixup) -> + { + pe_import_name_fixup = new_fixup "import name"; + pe_import_name = name; + pe_import_address_fixup = fixup; + } + end + (htab_pairs tab)) + } + in + Array.of_list + (List.map + make_imports_for_lib + (htab_pairs sem.Semant.ctxt_native_required)) +;; + + +let crate_exports (sem:Semant.ctxt) : pe_export array = + let export_sym (name, fixup) = + { + pe_export_name_fixup = new_fixup "export name fixup"; + pe_export_name = name; + pe_export_address_fixup = fixup; + } + in + let export_seg (_, tab) = + Array.of_list (List.map export_sym (htab_pairs tab)) + in + Array.concat + (List.map export_seg + (htab_pairs sem.Semant.ctxt_native_provided)) +;; + + +let emit_file + (sess:Session.sess) + (crate:Ast.crate) + (code:Asm.frag) + (data:Asm.frag) + (sem:Semant.ctxt) + (dw:Dwarf.debug_records) + : unit = + + let all_hdrs_fixup = new_fixup "all headers" in + let all_init_data_fixup = new_fixup "all initialized data" in + let loader_hdr_fixup = new_fixup "loader header" in + let import_dir_fixup = new_fixup "import directory" in + let export_dir_fixup = new_fixup "export directory" in + let text_fixup = new_fixup "text section" in + let bss_fixup = new_fixup "bss section" in + let data_fixup = new_fixup "data section" in + let image_fixup = new_fixup "image fixup" in + let symtab_fixup = new_fixup "symbol table" in + let strtab_fixup = new_fixup "string table" in + let note_rust_fixup = new_fixup ".note.rust section" in + + let (start_fixup, rust_start_fixup) = + if sess.Session.sess_library_mode + then (None, None) + else + (Some (new_fixup "start"), + Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start")) + in + + let header = (pe_header + ~machine: IMAGE_FILE_MACHINE_I386 + ~symbol_table_fixup: symtab_fixup + ~number_of_sections: 8L + ~number_of_symbols: 0L + ~loader_hdr_fixup: loader_hdr_fixup + ~characteristics:([IMAGE_FILE_EXECUTABLE_IMAGE; + IMAGE_FILE_LINE_NUMS_STRIPPED; + IMAGE_FILE_32BIT_MACHINE;] + @ + (if sess.Session.sess_library_mode + then [ IMAGE_FILE_DLL ] + else [ ]))) + in + let symtab = + (* + * We're not actually presenting a "symbol table", but wish to + * provide a "string table" which comes immediately *after* the + * symbol table. It's a violation of the PE spec to put one of + * these in an executable file (as opposed to just loadable) but + * it's necessary to communicate the debug section names to GDB, + * and nobody else complains. + *) + (def_aligned + symtab_fixup + (def_aligned + strtab_fixup + (SEQ + [| + WORD (TY_u32, (F_SZ strtab_fixup)); + ZSTRING ".debug_aranges"; + ZSTRING ".debug_pubnames"; + ZSTRING ".debug_info"; + ZSTRING ".debug_abbrev"; + ZSTRING ".debug_line"; + ZSTRING ".debug_frame"; + ZSTRING ".note.rust"; + |]))) + in + let loader_header = (pe_loader_header + ~text_fixup + ~init_data_fixup: all_init_data_fixup + ~size_of_uninit_data: 0L + ~entry_point_fixup: start_fixup + ~image_fixup: image_fixup + ~subsys: IMAGE_SUBSYSTEM_WINDOWS_CUI + ~all_hdrs_fixup + ~loader_hdr_fixup + ~import_dir_fixup + ~export_dir_fixup) + in + let text_header = (pe_section_header + ~id: SECTION_ID_TEXT + ~hdr_fixup: text_fixup) + + in + let bss_header = (pe_section_header + ~id: SECTION_ID_BSS + ~hdr_fixup: bss_fixup) + in + let import_section = (pe_import_section + ~import_dir_fixup + ~dlls: (rustrt_imports sem)) + in + let import_header = (pe_section_header + ~id: SECTION_ID_IMPORTS + ~hdr_fixup: import_dir_fixup) + in + let export_section = (pe_export_section + ~sess + ~export_dir_fixup + ~exports: (crate_exports sem)) + in + let export_header = (pe_section_header + ~id: SECTION_ID_EXPORTS + ~hdr_fixup: export_dir_fixup) + in + let data_header = (pe_section_header + ~id: SECTION_ID_DATA + ~hdr_fixup: data_fixup) + in +(* + let debug_aranges_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_ARANGES + ~hdr_fixup: sem.Semant.ctxt_debug_aranges_fixup) + in + let debug_pubnames_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_PUBNAMES + ~hdr_fixup: sem.Semant.ctxt_debug_pubnames_fixup) + in +*) + let debug_info_header = (pe_section_header + ~id: SECTION_ID_DEBUG_INFO + ~hdr_fixup: sem.Semant.ctxt_debug_info_fixup) + in + let debug_abbrev_header = (pe_section_header + ~id: SECTION_ID_DEBUG_ABBREV + ~hdr_fixup: sem.Semant.ctxt_debug_abbrev_fixup) + in +(* + let debug_line_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_LINE + ~hdr_fixup: sem.Semant.ctxt_debug_line_fixup) + in + let debug_frame_header = + (pe_section_header + ~id: SECTION_ID_DEBUG_FRAME + ~hdr_fixup: sem.Semant.ctxt_debug_frame_fixup) + in +*) + let note_rust_header = (pe_section_header + ~id: SECTION_ID_NOTE_RUST + ~hdr_fixup: note_rust_fixup) + in + let all_headers = (def_file_aligned + all_hdrs_fixup + (SEQ + [| + pe_msdos_header_and_padding; + header; + loader_header; + text_header; + bss_header; + import_header; + export_header; + data_header; + (* + debug_aranges_header; + debug_pubnames_header; + *) + debug_info_header; + debug_abbrev_header; + (* + debug_line_header; + debug_frame_header; + *) + note_rust_header; + |])) + in + + let text_section = (pe_text_section + ~sem + ~sess + ~start_fixup + ~rust_start_fixup + ~main_fn_fixup: sem.Semant.ctxt_main_fn_fixup + ~text_fixup + ~crate_code: code) + in + let bss_section = def_aligned bss_fixup (BSS 0x10L) + in + let data_section = (def_aligned data_fixup + (SEQ [| data; symtab; |])) + in + let all_init_data = (def_aligned + all_init_data_fixup + (SEQ [| import_section; + export_section; + data_section; |])) + in +(* + let debug_aranges_section = + def_aligned sem.Semant.ctxt_debug_aranges_fixup dw.Dwarf.debug_aranges + in + let debug_pubnames_section = + def_aligned sem.Semant.ctxt_debug_pubnames_fixup dw.Dwarf.debug_pubnames + in +*) + let debug_info_section = + def_aligned sem.Semant.ctxt_debug_info_fixup dw.Dwarf.debug_info + in + let debug_abbrev_section = + def_aligned sem.Semant.ctxt_debug_abbrev_fixup dw.Dwarf.debug_abbrev + in +(* + let debug_line_section = + def_aligned sem.Semant.ctxt_debug_line_fixup dw.Dwarf.debug_line + in + let debug_frame_section = + def_aligned sem.Semant.ctxt_debug_frame_fixup dw.Dwarf.debug_frame + in +*) + let note_rust_section = + def_aligned note_rust_fixup + (Asm.note_rust_frags crate.node.Ast.crate_meta) + in + + let all_frags = SEQ [| MEMPOS pe_image_base; + (def_file_aligned image_fixup + (SEQ [| DEF (sem.Semant.ctxt_image_base_fixup, + MARK); + all_headers; + text_section; + bss_section; + all_init_data; + (* debug_aranges_section; *) + (* debug_pubnames_section; *) + debug_info_section; + debug_abbrev_section; + (* debug_line_section; *) + (* debug_frame_section; *) + note_rust_section; + ALIGN_MEM (pe_mem_alignment, MARK) + |] + ) + ) + |] + in + write_out_frag sess true all_frags +;; + +let pe_magic = "PE";; + +let sniff + (sess:Session.sess) + (filename:filename) + : asm_reader option = + try + let stat = Unix.stat filename in + if (stat.Unix.st_kind = Unix.S_REG) && + (stat.Unix.st_size >= pe_file_alignment) + then + let ar = new_asm_reader sess filename in + let _ = log sess "sniffing PE file" in + (* PE header offset is at 0x3c in the MS-DOS compatibility header. *) + let _ = ar.asm_seek 0x3c in + let pe_hdr_off = ar.asm_get_u32() in + let _ = log sess "PE header offset: 0x%x" pe_hdr_off in + + let _ = ar.asm_seek pe_hdr_off in + let pe_signature = ar.asm_get_zstr_padded 4 in + let _ = log sess " PE signature: '%s'" pe_signature in + if pe_signature = pe_magic + then (ar.asm_seek 0; Some ar) + else None + else + None + with + _ -> None +;; + + +let get_sections + (sess:Session.sess) + (ar:asm_reader) + : (string,(int*int)) Hashtbl.t = + let _ = log sess "reading sections" in + (* PE header offset is at 0x3c in the MS-DOS compatibility header. *) + let _ = ar.asm_seek 0x3c in + let pe_hdr_off = ar.asm_get_u32() in + let _ = log sess "PE header offset: 0x%x" pe_hdr_off in + + let _ = ar.asm_seek pe_hdr_off in + let pe_signature = ar.asm_get_zstr_padded 4 in + let _ = log sess " PE signature: '%s'" pe_signature in + let _ = assert (pe_signature = pe_magic) in + let _ = ar.asm_adv_u16() in (* machine type *) + + let num_sections = ar.asm_get_u16() in + let _ = log sess " num sections: %d" num_sections in + + let _ = ar.asm_adv_u32() in (* timestamp *) + + let symtab_off = ar.asm_get_u32() in + let _ = log sess " symtab offset: 0x%x" symtab_off in + + let num_symbols = ar.asm_get_u32() in + let _ = log sess " num symbols: %d" num_symbols in + + let loader_hdr_size = ar.asm_get_u16() in + let _ = log sess "loader header sz: %d" loader_hdr_size in + + let _ = ar.asm_adv_u16() in (* flags *) + let sections_off = (ar.asm_get_off()) + loader_hdr_size in + + let sects = Hashtbl.create 0 in + + let _ = + ar.asm_seek sections_off; + for i = 0 to (num_sections - 1) do + (* + * Section-name encoding is crazy. ASCII-encoding offsets of + * long names. See pe_section_header for details. + *) + let sect_name = + let sect_name = ar.asm_get_zstr_padded 8 in + assert ((String.length sect_name) > 0); + if sect_name.[0] = '/' + then + let off_str = + String.sub sect_name 1 ((String.length sect_name) - 1) + in + let i = int_of_string off_str in + let curr = ar.asm_get_off() in + ar.asm_seek (symtab_off + i); + let ext_name = ar.asm_get_zstr() in + ar.asm_seek curr; + ext_name + else + sect_name + in + let _ = ar.asm_adv_u32() in (* virtual size *) + let _ = ar.asm_adv_u32() in (* virtual address *) + let file_sz = ar.asm_get_u32() in + let file_off = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in (* reserved *) + let _ = ar.asm_adv_u32() in (* reserved *) + let _ = ar.asm_adv_u32() in (* reserved *) + let _ = ar.asm_adv_u32() in (* flags *) + Hashtbl.add sects sect_name (file_off, file_sz); + log sess " section %d: %s, size %d, offset 0x%x" + i sect_name file_sz file_off; + done + in + sects +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) |