1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
|
(*
Module for writing Microsoft PE files
Every image has a base address it's to be loaded at.
"file pointer" = offset in file
"VA" = address at runtime
"RVA" = VA - base address
If you write a non-RVA absolute address at any point you must put it
in a rebasing list so the loader can adjust it when/if it has to load
you at a different address.
Almost all addresses in the file are RVAs. Worry about the VAs.
*)
open Asm;;
open Common;;
let log (sess:Session.sess) =
Session.log "obj (pe)"
sess.Session.sess_log_obj
sess.Session.sess_log_out
;;
let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit =
if sess.Session.sess_log_obj
then thunk ()
else ()
;;
(*
The default image base (VA) for an executable on Win32 is 0x400000.
We use this too. RVAs are relative to this. RVA 0 = VA 0x400000.
Alignments are also relatively standard and fixed for Win32/PE32:
4k memory pages, 512 byte disk sectors.
Since this is a stupid emitter, and we're not generating an awful
lot of sections, we are not going to differentiate between these
two kinds of alignment: we just align our sections to memory pages
and sometimes waste most of them. Shucks.
*)
let pe_image_base = 0x400000L;;
let pe_file_alignment = 0x200;;
let pe_mem_alignment = 0x1000;;
let rva (f:fixup) = (SUB ((M_POS f), (IMM pe_image_base)));;
let def_file_aligned f i =
ALIGN_FILE
(pe_file_alignment,
SEQ [|
DEF(f,
SEQ [| i;
ALIGN_FILE
(pe_file_alignment, MARK) |]) |] )
;;
let def_mem_aligned f i =
ALIGN_MEM
(pe_mem_alignment,
SEQ [|
DEF(f,
SEQ [| i;
ALIGN_MEM
(pe_mem_alignment, MARK) |]) |] )
;;
let align_both i =
ALIGN_FILE (pe_file_alignment,
(ALIGN_MEM (pe_mem_alignment, i)))
;;
let def_aligned f i =
align_both
(SEQ [| DEF(f,i);
(align_both MARK)|])
;;
(*
At the beginning of a PE file there is an MS-DOS stub, 0x00 - 0x7F,
that we just insert literally. It prints "This program must be run
under Win32" and exits. Woo!
Within it, at offset 0x3C, there is an encoded offset of the PE
header we actually care about. So 0x3C - 0x3F are 0x00000100 (LE)
which say "the PE header is actually at 0x100", a nice sensible spot
for it. We pad the next 128 bytes out to 0x100 and start there for
real.
From then on in it's a sensible object file. Here's the MS-DOS bit.
*)
let pe_msdos_header_and_padding
: frag =
SEQ [|
BYTES
[|
(* 00000000 *)
0x4d; 0x5a; 0x50; 0x00; 0x02; 0x00; 0x00; 0x00;
0x04; 0x00; 0x0f; 0x00; 0xff; 0xff; 0x00; 0x00;
(* 00000010 *)
0xb8; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
0x40; 0x00; 0x1a; 0x00; 0x00; 0x00; 0x00; 0x00;
(* 00000020 *)
0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
(* 00000030 *)
0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
0x00; 0x00; 0x00; 0x00; 0x00; 0x01; 0x00; 0x00;
(* ^^^^PE HDR offset^^^^^ *)
(* 00000040 *)
0xba; 0x10; 0x00; 0x0e; 0x1f; 0xb4; 0x09; 0xcd;
0x21; 0xb8; 0x01; 0x4c; 0xcd; 0x21; 0x90; 0x90;
(* 00000050 *)
0x54; 0x68; 0x69; 0x73; 0x20; 0x70; 0x72; 0x6f; (* "This pro" *)
0x67; 0x72; 0x61; 0x6d; 0x20; 0x6d; 0x75; 0x73; (* "gram mus" *)
(* 00000060 *)
0x74; 0x20; 0x62; 0x65; 0x20; 0x72; 0x75; 0x6e; (* "t be run" *)
0x20; 0x75; 0x6e; 0x64; 0x65; 0x72; 0x20; 0x57; (* " under W" *)
(* 00000070 *)
0x69; 0x6e; 0x33; 0x32; 0x0d; 0x0a; 0x24; 0x37; (* "in32\r\n" *)
0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00; 0x00;
|];
PAD 0x80
|]
;;
(*
A work of art, is it not? Take a moment to appreciate the madness.
All done? Ok, now on to the PE header proper.
PE headers are just COFF headers with a little preamble.
*)
type pe_machine =
(* Maybe support more later. *)
IMAGE_FILE_MACHINE_AMD64
| IMAGE_FILE_MACHINE_I386
;;
let pe_timestamp _ =
Int64.of_float (Unix.gettimeofday())
;;
type pe_characteristics =
(* Maybe support more later. *)
IMAGE_FILE_RELOCS_STRIPPED
| IMAGE_FILE_EXECUTABLE_IMAGE
| IMAGE_FILE_LINE_NUMS_STRIPPED
| IMAGE_FILE_LOCAL_SYMS_STRIPPED
| IMAGE_FILE_32BIT_MACHINE
| IMAGE_FILE_DEBUG_STRIPPED
| IMAGE_FILE_DLL
;;
let pe_header
~(machine:pe_machine)
~(symbol_table_fixup:fixup)
~(number_of_sections:int64)
~(number_of_symbols:int64)
~(loader_hdr_fixup:fixup)
~(characteristics:pe_characteristics list)
: frag =
ALIGN_FILE
(8,
SEQ [|
STRING "PE\x00\x00";
WORD (TY_u16, (IMM (match machine with
IMAGE_FILE_MACHINE_AMD64 -> 0x8664L
| IMAGE_FILE_MACHINE_I386 -> 0x014cL)));
WORD (TY_u16, (IMM number_of_sections));
WORD (TY_u32, (IMM (pe_timestamp())));
WORD (TY_u32, (F_POS symbol_table_fixup));
WORD (TY_u32, (IMM number_of_symbols));
WORD (TY_u16, (F_SZ loader_hdr_fixup));
WORD (TY_u16, (IMM (fold_flags
(fun c -> match c with
IMAGE_FILE_RELOCS_STRIPPED -> 0x1L
| IMAGE_FILE_EXECUTABLE_IMAGE -> 0x2L
| IMAGE_FILE_LINE_NUMS_STRIPPED -> 0x4L
| IMAGE_FILE_LOCAL_SYMS_STRIPPED -> 0x8L
| IMAGE_FILE_32BIT_MACHINE -> 0x100L
| IMAGE_FILE_DEBUG_STRIPPED -> 0x200L
| IMAGE_FILE_DLL -> 0x2000L)
characteristics)))
|])
;;
(*
After the PE header comes an "optional" header for the loader. In
our case this is hardly optional since we are producing a file for
the loader.
*)
type pe_subsystem =
(* Maybe support more later. *)
IMAGE_SUBSYSTEM_WINDOWS_GUI
| IMAGE_SUBSYSTEM_WINDOWS_CUI
;;
let zero32 = WORD (TY_u32, (IMM 0L))
;;
let pe_loader_header
~(text_fixup:fixup)
~(init_data_fixup:fixup)
~(size_of_uninit_data:int64)
~(entry_point_fixup:fixup option)
~(image_fixup:fixup)
~(all_hdrs_fixup:fixup)
~(subsys:pe_subsystem)
~(loader_hdr_fixup:fixup)
~(import_dir_fixup:fixup)
~(export_dir_fixup:fixup)
: frag =
DEF
(loader_hdr_fixup,
SEQ [|
WORD (TY_u16, (IMM 0x10bL)); (* COFF magic tag for PE32. *)
(* Snagged *)
WORD (TY_u8, (IMM 0x2L)); (* Linker major version. *)
WORD (TY_u8, (IMM 0x38L)); (* Linker minor version. *)
WORD (TY_u32, (F_SZ text_fixup)); (* "size of code" *)
WORD (TY_u32, (* "size of all init data" *)
(F_SZ init_data_fixup));
WORD (TY_u32,
(IMM size_of_uninit_data));
begin
match entry_point_fixup with
None -> zero32 (* Library mode: DLLMain *)
| Some entry_point_fixup ->
WORD (TY_u32,
(rva
entry_point_fixup)) (* "address of entry point" *)
end;
WORD (TY_u32, (rva text_fixup)); (* "base of code" *)
WORD (TY_u32, (rva init_data_fixup)); (* "base of data" *)
WORD (TY_u32, (IMM pe_image_base));
WORD (TY_u32, (IMM (Int64.of_int
pe_mem_alignment)));
WORD (TY_u32, (IMM (Int64.of_int
pe_file_alignment)));
WORD (TY_u16, (IMM 4L)); (* Major OS version: NT4. *)
WORD (TY_u16, (IMM 0L)); (* Minor OS version. *)
WORD (TY_u16, (IMM 1L)); (* Major image version. *)
WORD (TY_u16, (IMM 0L)); (* Minor image version. *)
WORD (TY_u16, (IMM 4L)); (* Major subsystem version. *)
WORD (TY_u16, (IMM 0L)); (* Minor subsystem version. *)
zero32; (* Reserved. *)
WORD (TY_u32, (M_SZ image_fixup));
WORD (TY_u32, (M_SZ all_hdrs_fixup));
zero32; (* Checksum, but OK if zero. *)
WORD (TY_u16, (IMM (match subsys with
IMAGE_SUBSYSTEM_WINDOWS_GUI -> 2L
| IMAGE_SUBSYSTEM_WINDOWS_CUI -> 3L)));
WORD (TY_u16, (IMM 0L)); (* DLL characteristics. *)
WORD (TY_u32, (IMM 0x100000L)); (* Size of stack reserve. *)
WORD (TY_u32, (IMM 0x4000L)); (* Size of stack commit. *)
WORD (TY_u32, (IMM 0x100000L)); (* Size of heap reserve. *)
WORD (TY_u32, (IMM 0x1000L)); (* Size of heap commit. *)
zero32; (* Reserved. *)
WORD (TY_u32, (IMM 16L)); (* Number of dir references. *)
(* Begin directories, variable part of hdr. *)
(*
Standard PE files have ~10 directories referenced from
here. We only fill in two of them -- the export/import
directories -- because we don't care about the others. We
leave the rest as zero in case someone is looking for
them. This may be superfluous or wrong.
*)
WORD (TY_u32, (rva export_dir_fixup));
WORD (TY_u32, (M_SZ export_dir_fixup));
WORD (TY_u32, (rva import_dir_fixup));
WORD (TY_u32, (M_SZ import_dir_fixup));
zero32; zero32; (* Resource dir. *)
zero32; zero32; (* Exception dir. *)
zero32; zero32; (* Security dir. *)
zero32; zero32; (* Base reloc dir. *)
zero32; zero32; (* Debug dir. *)
zero32; zero32; (* Image desc dir. *)
zero32; zero32; (* Mach spec dir. *)
zero32; zero32; (* TLS dir. *)
zero32; zero32; (* Load config. *)
zero32; zero32; (* Bound import. *)
zero32; zero32; (* IAT *)
zero32; zero32; (* Delay import. *)
zero32; zero32; (* COM descriptor *)
zero32; zero32; (* ???????? *)
|])
;;
type pe_section_id =
(* Maybe support more later. *)
SECTION_ID_TEXT
| SECTION_ID_DATA
| SECTION_ID_RDATA
| SECTION_ID_BSS
| SECTION_ID_IMPORTS
| SECTION_ID_EXPORTS
| SECTION_ID_DEBUG_ARANGES
| SECTION_ID_DEBUG_PUBNAMES
| SECTION_ID_DEBUG_INFO
| SECTION_ID_DEBUG_ABBREV
| SECTION_ID_DEBUG_LINE
| SECTION_ID_DEBUG_FRAME
| SECTION_ID_NOTE_RUST
;;
type pe_section_characteristics =
(* Maybe support more later. *)
IMAGE_SCN_CNT_CODE
| IMAGE_SCN_CNT_INITIALIZED_DATA
| IMAGE_SCN_CNT_UNINITIALIZED_DATA
| IMAGE_SCN_MEM_DISCARDABLE
| IMAGE_SCN_MEM_SHARED
| IMAGE_SCN_MEM_EXECUTE
| IMAGE_SCN_MEM_READ
| IMAGE_SCN_MEM_WRITE
let pe_section_header
~(id:pe_section_id)
~(hdr_fixup:fixup)
: frag =
let
characteristics =
match id with
SECTION_ID_TEXT -> [ IMAGE_SCN_CNT_CODE;
IMAGE_SCN_MEM_READ;
IMAGE_SCN_MEM_EXECUTE ]
| SECTION_ID_DATA -> [ IMAGE_SCN_CNT_INITIALIZED_DATA;
IMAGE_SCN_MEM_READ;
IMAGE_SCN_MEM_WRITE ]
| SECTION_ID_BSS -> [ IMAGE_SCN_CNT_UNINITIALIZED_DATA;
IMAGE_SCN_MEM_READ;
IMAGE_SCN_MEM_WRITE ]
| SECTION_ID_IMPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA;
IMAGE_SCN_MEM_READ;
IMAGE_SCN_MEM_WRITE ]
| SECTION_ID_EXPORTS -> [ IMAGE_SCN_CNT_INITIALIZED_DATA;
IMAGE_SCN_MEM_READ ]
| SECTION_ID_RDATA
| SECTION_ID_DEBUG_ARANGES
| SECTION_ID_DEBUG_PUBNAMES
| SECTION_ID_DEBUG_INFO
| SECTION_ID_DEBUG_ABBREV
| SECTION_ID_DEBUG_LINE
| SECTION_ID_DEBUG_FRAME
| SECTION_ID_NOTE_RUST -> [ IMAGE_SCN_CNT_INITIALIZED_DATA;
IMAGE_SCN_MEM_READ ]
in
SEQ [|
STRING
begin
match id with
SECTION_ID_TEXT -> ".text\x00\x00\x00"
| SECTION_ID_DATA -> ".data\x00\x00\x00"
| SECTION_ID_RDATA -> ".rdata\x00\x00"
| SECTION_ID_BSS -> ".bss\x00\x00\x00\x00"
| SECTION_ID_IMPORTS -> ".idata\x00\x00"
| SECTION_ID_EXPORTS -> ".edata\x00\x00"
(* There is a bizarre Microsoft COFF extension to account
* for longer-than-8-char section names: you emit a single
* '/' character then the ASCII-numeric encoding of the
* offset within the file's string table of the full name.
* So we put all our extended section names at the
* beginning of the string table in a very specific order
* and hard-wire the offsets as "names" here. You could
* theoretically extend this to a "new kind" of fixup
* reference (ASCII_POS or such), if you feel this is
* something you want to twiddle with.
*)
| SECTION_ID_DEBUG_ARANGES -> "/4\x00\x00\x00\x00\x00\x00"
| SECTION_ID_DEBUG_PUBNAMES -> "/19\x00\x00\x00\x00\x00"
| SECTION_ID_DEBUG_INFO -> "/35\x00\x00\x00\x00\x00"
| SECTION_ID_DEBUG_ABBREV -> "/47\x00\x00\x00\x00\x00"
| SECTION_ID_DEBUG_LINE -> "/61\x00\x00\x00\x00\x00"
| SECTION_ID_DEBUG_FRAME -> "/73\x00\x00\x00\x00\x00"
| SECTION_ID_NOTE_RUST -> "/86\x00\x00\x00\x00\x00"
end;
(* The next two pairs are only supposed to be different if the
file and section alignments differ. This is a stupid emitter
so they're not, no problem. *)
WORD (TY_u32, (M_SZ hdr_fixup)); (* "Virtual size" *)
WORD (TY_u32, (rva hdr_fixup)); (* "Virtual address" *)
WORD (TY_u32, (F_SZ hdr_fixup)); (* "Size of raw data" *)
WORD (TY_u32, (F_POS hdr_fixup)); (* "Pointer to raw data" *)
zero32; (* Reserved. *)
zero32; (* Reserved. *)
zero32; (* Reserved. *)
WORD (TY_u32, (IMM (fold_flags
(fun c -> match c with
IMAGE_SCN_CNT_CODE -> 0x20L
| IMAGE_SCN_CNT_INITIALIZED_DATA -> 0x40L
| IMAGE_SCN_CNT_UNINITIALIZED_DATA -> 0x80L
| IMAGE_SCN_MEM_DISCARDABLE -> 0x2000000L
| IMAGE_SCN_MEM_SHARED -> 0x10000000L
| IMAGE_SCN_MEM_EXECUTE -> 0x20000000L
| IMAGE_SCN_MEM_READ -> 0x40000000L
| IMAGE_SCN_MEM_WRITE -> 0x80000000L)
characteristics)))
|]
;;
(*
"Thunk" is a misnomer here; the thunk RVA is the address of a word
that the loader will store an address into. The stored address is
the address of the imported object.
So if the imported object is X, and the thunk slot is Y, the loader
is doing "Y = &X" and returning &Y as the thunk RVA. To load datum X
after the imports are resolved, given the thunk RVA R, you load
**R.
*)
type pe_import =
{
pe_import_name_fixup: fixup;
pe_import_name: string;
pe_import_address_fixup: fixup;
}
type pe_import_dll_entry =
{
pe_import_dll_name_fixup: fixup;
pe_import_dll_name: string;
pe_import_dll_ILT_fixup: fixup;
pe_import_dll_IAT_fixup: fixup;
pe_import_dll_imports: pe_import array;
}
(*
The import section .idata has a mostly self-contained table
structure. You feed it a list of DLL entries, each of which names
a DLL and lists symbols in the DLL to import.
For each named symbol, a 4-byte slot will be reserved in an
"import lookup table" (ILT, also in this section). The slot is
a pointer to a string in this section giving the name.
Immediately *after* the ILT, there is an "import address table" (IAT),
which is initially identical to the ILT. The loader replaces the entries
in the IAT slots with the imported pointers at runtime.
A central directory at the start of the section lists all the the import
thunk tables. Each entry in the import directory is 20 bytes (5 words)
but only the last 2 are used: the second last is a pointer to the string
name of the DLL in question (string also in this section) and the last is
a pointer to the import thunk table itself (also in this section).
Curiously, of the 5 documents I've consulted on the nature of the
first 3 fields, I find a variety of interpretations.
*)
let pe_import_section
~(import_dir_fixup:fixup)
~(dlls:pe_import_dll_entry array)
: frag =
let form_dir_entry
(entry:pe_import_dll_entry)
: frag =
SEQ [|
(* Note: documented opinions vary greatly about whether the
first, last, or both of the slots in one of these rows points
to the RVA of the name/hint used to look the import up. This
table format is a mess! *)
WORD (TY_u32,
(rva
entry.pe_import_dll_ILT_fixup)); (* Import lookup table. *)
WORD (TY_u32, (IMM 0L)); (* Timestamp, unused. *)
WORD (TY_u32, (IMM 0x0L)); (* Forwarder chain, unused. *)
WORD (TY_u32, (rva entry.pe_import_dll_name_fixup));
WORD (TY_u32,
(rva
entry.pe_import_dll_IAT_fixup)); (* Import address table.*)
|]
in
let form_ILT_slot
(import:pe_import)
: frag =
(WORD (TY_u32, (rva import.pe_import_name_fixup)))
in
let form_IAT_slot
(import:pe_import)
: frag =
(DEF (import.pe_import_address_fixup,
(WORD (TY_u32, (rva import.pe_import_name_fixup)))))
in
let form_tables_for_dll
(dll:pe_import_dll_entry)
: frag =
let terminator = WORD (TY_u32, (IMM 0L)) in
let ilt =
SEQ [|
SEQ (Array.map form_ILT_slot dll.pe_import_dll_imports);
terminator
|]
in
let iat =
SEQ [|
SEQ (Array.map form_IAT_slot dll.pe_import_dll_imports);
terminator
|]
in
if Array.length dll.pe_import_dll_imports < 1
then bug () "Pe.form_tables_for_dll: empty imports"
else
SEQ [|
DEF (dll.pe_import_dll_ILT_fixup, ilt);
DEF (dll.pe_import_dll_IAT_fixup, iat)
|]
in
let form_import_string
(import:pe_import)
: frag =
DEF
(import.pe_import_name_fixup,
SEQ [|
(* import string entries begin with a 2-byte "hint", but we just
set it to zero. *)
(WORD (TY_u16, (IMM 0L)));
ZSTRING import.pe_import_name;
(if String.length import.pe_import_name mod 2 == 0
then PAD 1
else PAD 0)
|])
in
let form_dir_entry_string
(dll:pe_import_dll_entry)
: frag =
DEF
(dll.pe_import_dll_name_fixup,
SEQ [| ZSTRING dll.pe_import_dll_name;
(if String.length dll.pe_import_dll_name mod 2 == 0
then PAD 1
else PAD 0);
SEQ (Array.map form_import_string dll.pe_import_dll_imports) |])
in
let dir = SEQ (Array.map form_dir_entry dlls) in
let dir_terminator = PAD 20 in
let tables = SEQ (Array.map form_tables_for_dll dlls) in
let strings = SEQ (Array.map form_dir_entry_string dlls)
in
def_aligned
import_dir_fixup
(SEQ
[|
dir;
dir_terminator;
tables;
strings
|])
;;
type pe_export =
{
pe_export_name_fixup: fixup;
pe_export_name: string;
pe_export_address_fixup: fixup;
}
;;
let pe_export_section
~(sess:Session.sess)
~(export_dir_fixup:fixup)
~(exports:pe_export array)
: frag =
Array.sort (fun a b -> compare a.pe_export_name b.pe_export_name) exports;
let export_addr_table_fixup = new_fixup "export address table" in
let export_addr_table =
DEF
(export_addr_table_fixup,
SEQ
(Array.map
(fun e -> (WORD (TY_u32, rva e.pe_export_address_fixup)))
exports))
in
let export_name_pointer_table_fixup =
new_fixup "export name pointer table"
in
let export_name_pointer_table =
DEF
(export_name_pointer_table_fixup,
SEQ
(Array.map
(fun e -> (WORD (TY_u32, rva e.pe_export_name_fixup)))
exports))
in
let export_name_table_fixup = new_fixup "export name table" in
let export_name_table =
DEF
(export_name_table_fixup,
SEQ
(Array.map
(fun e -> (DEF (e.pe_export_name_fixup,
(ZSTRING e.pe_export_name))))
exports))
in
let export_ordinal_table_fixup = new_fixup "export ordinal table" in
let export_ordinal_table =
DEF
(export_ordinal_table_fixup,
SEQ
(Array.mapi
(fun i _ -> (WORD (TY_u16, IMM (Int64.of_int (i)))))
exports))
in
let image_name_fixup = new_fixup "image name fixup" in
let n_exports = IMM (Int64.of_int (Array.length exports)) in
let export_dir_table =
SEQ [|
WORD (TY_u32, IMM 0L); (* Flags, reserved. *)
WORD (TY_u32, IMM 0L); (* Timestamp, unused. *)
WORD (TY_u16, IMM 0L); (* Major vers., unused *)
WORD (TY_u16, IMM 0L); (* Minor vers., unused *)
WORD (TY_u32, rva image_name_fixup); (* Name RVA. *)
WORD (TY_u32, IMM 1L); (* Ordinal base = 1. *)
WORD (TY_u32, n_exports); (* # entries in EAT. *)
WORD (TY_u32, n_exports); (* # entries in ENPT/EOT.*)
WORD (TY_u32, rva export_addr_table_fixup); (* EAT *)
WORD (TY_u32, rva export_name_pointer_table_fixup); (* ENPT *)
WORD (TY_u32, rva export_ordinal_table_fixup); (* EOT *)
|]
in
def_aligned export_dir_fixup
(SEQ [|
export_dir_table;
export_addr_table;
export_name_pointer_table;
export_ordinal_table;
DEF (image_name_fixup,
ZSTRING (Session.filename_of sess.Session.sess_out));
export_name_table
|])
;;
let pe_text_section
~(sess:Session.sess)
~(sem:Semant.ctxt)
~(start_fixup:fixup option)
~(rust_start_fixup:fixup option)
~(main_fn_fixup:fixup option)
~(text_fixup:fixup)
~(crate_code:frag)
: frag =
let startup =
match (start_fixup, rust_start_fixup, main_fn_fixup) with
(None, _, _)
| (_, None, _)
| (_, _, None) -> MARK
| (Some start_fixup,
Some rust_start_fixup,
Some main_fn_fixup) ->
let e = X86.new_emitter_without_vregs () in
(*
* We are called from the Microsoft C library startup routine,
* and assumed to be stdcall; so we have to clean up our own
* stack before returning.
*)
X86.objfile_start e
~start_fixup
~rust_start_fixup
~main_fn_fixup
~crate_fixup: sem.Semant.ctxt_crate_fixup
~indirect_start: true;
X86.frags_of_emitted_quads sess e;
in
def_aligned
text_fixup
(SEQ [|
startup;
crate_code
|])
;;
let rustrt_imports sem =
let make_imports_for_lib (lib, tab) =
{
pe_import_dll_name_fixup = new_fixup "dll name";
pe_import_dll_name = (match lib with
REQUIRED_LIB_rustrt -> "rustrt.dll"
| REQUIRED_LIB_crt -> "msvcrt.dll"
| REQUIRED_LIB_rust ls
| REQUIRED_LIB_c ls -> ls.required_libname);
pe_import_dll_ILT_fixup = new_fixup "dll ILT";
pe_import_dll_IAT_fixup = new_fixup "dll IAT";
pe_import_dll_imports =
Array.of_list
(List.map
begin
fun (name, fixup) ->
{
pe_import_name_fixup = new_fixup "import name";
pe_import_name = name;
pe_import_address_fixup = fixup;
}
end
(htab_pairs tab))
}
in
Array.of_list
(List.map
make_imports_for_lib
(htab_pairs sem.Semant.ctxt_native_required))
;;
let crate_exports (sem:Semant.ctxt) : pe_export array =
let export_sym (name, fixup) =
{
pe_export_name_fixup = new_fixup "export name fixup";
pe_export_name = name;
pe_export_address_fixup = fixup;
}
in
let export_seg (_, tab) =
Array.of_list (List.map export_sym (htab_pairs tab))
in
(* Make some fake symbol table entries to aid in debugging. *)
let export_stab name fixup =
{
pe_export_name_fixup = new_fixup "export name fixup";
pe_export_name = "rust$" ^ name;
pe_export_address_fixup = fixup
}
in
let export_stab_of_item (node_id, code) =
let name = Hashtbl.find sem.Semant.ctxt_all_item_names node_id in
let name' = "item$" ^ (Semant.string_of_name name) in
export_stab name' code.Semant.code_fixup
in
let export_stab_of_glue (glue, code) =
export_stab (Semant.glue_str sem glue) code.Semant.code_fixup
in
let stabs =
Array.of_list (List.concat [
(List.map export_stab_of_item
(htab_pairs sem.Semant.ctxt_all_item_code));
(List.map export_stab_of_glue (htab_pairs sem.Semant.ctxt_glue_code))
])
in
Array.concat
(stabs::(List.map export_seg
(htab_pairs sem.Semant.ctxt_native_provided)))
;;
let emit_file
(sess:Session.sess)
(crate:Ast.crate)
(code:Asm.frag)
(data:Asm.frag)
(sem:Semant.ctxt)
(dw:Dwarf.debug_records)
: unit =
let all_hdrs_fixup = new_fixup "all headers" in
let all_init_data_fixup = new_fixup "all initialized data" in
let loader_hdr_fixup = new_fixup "loader header" in
let import_dir_fixup = new_fixup "import directory" in
let export_dir_fixup = new_fixup "export directory" in
let text_fixup = new_fixup "text section" in
let bss_fixup = new_fixup "bss section" in
let data_fixup = new_fixup "data section" in
let image_fixup = new_fixup "image fixup" in
let symtab_fixup = new_fixup "symbol table" in
let strtab_fixup = new_fixup "string table" in
let note_rust_fixup = new_fixup ".note.rust section" in
let (start_fixup, rust_start_fixup) =
if sess.Session.sess_library_mode
then (None, None)
else
(Some (new_fixup "start"),
Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start"))
in
let header = (pe_header
~machine: IMAGE_FILE_MACHINE_I386
~symbol_table_fixup: symtab_fixup
~number_of_sections: 8L
~number_of_symbols: 0L
~loader_hdr_fixup: loader_hdr_fixup
~characteristics:([IMAGE_FILE_EXECUTABLE_IMAGE;
IMAGE_FILE_LINE_NUMS_STRIPPED;
IMAGE_FILE_32BIT_MACHINE;]
@
(if sess.Session.sess_library_mode
then [ IMAGE_FILE_DLL ]
else [ ])))
in
let symtab =
(*
* We're not actually presenting a "symbol table", but wish to
* provide a "string table" which comes immediately *after* the
* symbol table. It's a violation of the PE spec to put one of
* these in an executable file (as opposed to just loadable) but
* it's necessary to communicate the debug section names to GDB,
* and nobody else complains.
*)
(def_aligned
symtab_fixup
(def_aligned
strtab_fixup
(SEQ
[|
WORD (TY_u32, (F_SZ strtab_fixup));
ZSTRING ".debug_aranges";
ZSTRING ".debug_pubnames";
ZSTRING ".debug_info";
ZSTRING ".debug_abbrev";
ZSTRING ".debug_line";
ZSTRING ".debug_frame";
ZSTRING ".note.rust";
|])))
in
let loader_header = (pe_loader_header
~text_fixup
~init_data_fixup: all_init_data_fixup
~size_of_uninit_data: 0L
~entry_point_fixup: start_fixup
~image_fixup: image_fixup
~subsys: IMAGE_SUBSYSTEM_WINDOWS_CUI
~all_hdrs_fixup
~loader_hdr_fixup
~import_dir_fixup
~export_dir_fixup)
in
let text_header = (pe_section_header
~id: SECTION_ID_TEXT
~hdr_fixup: text_fixup)
in
let bss_header = (pe_section_header
~id: SECTION_ID_BSS
~hdr_fixup: bss_fixup)
in
let import_section = (pe_import_section
~import_dir_fixup
~dlls: (rustrt_imports sem))
in
let import_header = (pe_section_header
~id: SECTION_ID_IMPORTS
~hdr_fixup: import_dir_fixup)
in
let export_section = (pe_export_section
~sess
~export_dir_fixup
~exports: (crate_exports sem))
in
let export_header = (pe_section_header
~id: SECTION_ID_EXPORTS
~hdr_fixup: export_dir_fixup)
in
let data_header = (pe_section_header
~id: SECTION_ID_DATA
~hdr_fixup: data_fixup)
in
(*
let debug_aranges_header =
(pe_section_header
~id: SECTION_ID_DEBUG_ARANGES
~hdr_fixup: sem.Semant.ctxt_debug_aranges_fixup)
in
let debug_pubnames_header =
(pe_section_header
~id: SECTION_ID_DEBUG_PUBNAMES
~hdr_fixup: sem.Semant.ctxt_debug_pubnames_fixup)
in
*)
let debug_info_header = (pe_section_header
~id: SECTION_ID_DEBUG_INFO
~hdr_fixup: sem.Semant.ctxt_debug_info_fixup)
in
let debug_abbrev_header = (pe_section_header
~id: SECTION_ID_DEBUG_ABBREV
~hdr_fixup: sem.Semant.ctxt_debug_abbrev_fixup)
in
(*
let debug_line_header =
(pe_section_header
~id: SECTION_ID_DEBUG_LINE
~hdr_fixup: sem.Semant.ctxt_debug_line_fixup)
in
let debug_frame_header =
(pe_section_header
~id: SECTION_ID_DEBUG_FRAME
~hdr_fixup: sem.Semant.ctxt_debug_frame_fixup)
in
*)
let note_rust_header = (pe_section_header
~id: SECTION_ID_NOTE_RUST
~hdr_fixup: note_rust_fixup)
in
let all_headers = (def_file_aligned
all_hdrs_fixup
(SEQ
[|
pe_msdos_header_and_padding;
header;
loader_header;
text_header;
bss_header;
import_header;
export_header;
data_header;
(*
debug_aranges_header;
debug_pubnames_header;
*)
debug_info_header;
debug_abbrev_header;
(*
debug_line_header;
debug_frame_header;
*)
note_rust_header;
|]))
in
let text_section = (pe_text_section
~sem
~sess
~start_fixup
~rust_start_fixup
~main_fn_fixup: sem.Semant.ctxt_main_fn_fixup
~text_fixup
~crate_code: code)
in
let bss_section = def_aligned bss_fixup (BSS 0x10L)
in
let data_section = (def_aligned data_fixup
(SEQ [| data; symtab; |]))
in
let all_init_data = (def_aligned
all_init_data_fixup
(SEQ [| import_section;
export_section;
data_section; |]))
in
(*
let debug_aranges_section =
def_aligned sem.Semant.ctxt_debug_aranges_fixup dw.Dwarf.debug_aranges
in
let debug_pubnames_section =
def_aligned sem.Semant.ctxt_debug_pubnames_fixup dw.Dwarf.debug_pubnames
in
*)
let debug_info_section =
def_aligned sem.Semant.ctxt_debug_info_fixup dw.Dwarf.debug_info
in
let debug_abbrev_section =
def_aligned sem.Semant.ctxt_debug_abbrev_fixup dw.Dwarf.debug_abbrev
in
(*
let debug_line_section =
def_aligned sem.Semant.ctxt_debug_line_fixup dw.Dwarf.debug_line
in
let debug_frame_section =
def_aligned sem.Semant.ctxt_debug_frame_fixup dw.Dwarf.debug_frame
in
*)
let note_rust_section =
def_aligned note_rust_fixup
(Asm.note_rust_frags crate.node.Ast.crate_meta)
in
let all_frags = SEQ [| MEMPOS pe_image_base;
(def_file_aligned image_fixup
(SEQ [| DEF (sem.Semant.ctxt_image_base_fixup,
MARK);
all_headers;
text_section;
bss_section;
all_init_data;
(* debug_aranges_section; *)
(* debug_pubnames_section; *)
debug_info_section;
debug_abbrev_section;
(* debug_line_section; *)
(* debug_frame_section; *)
note_rust_section;
ALIGN_MEM (pe_mem_alignment, MARK)
|]
)
)
|]
in
write_out_frag sess true all_frags
;;
let pe_magic = "PE";;
let sniff
(sess:Session.sess)
(filename:filename)
: asm_reader option =
try
let stat = Unix.stat filename in
if (stat.Unix.st_kind = Unix.S_REG) &&
(stat.Unix.st_size >= pe_file_alignment)
then
let ar = new_asm_reader sess filename in
let _ = log sess "sniffing PE file" in
(* PE header offset is at 0x3c in the MS-DOS compatibility header. *)
let _ = ar.asm_seek 0x3c in
let pe_hdr_off = ar.asm_get_u32() in
let _ = log sess "PE header offset: 0x%x" pe_hdr_off in
let _ = ar.asm_seek pe_hdr_off in
let pe_signature = ar.asm_get_zstr_padded 4 in
let _ = log sess " PE signature: '%s'" pe_signature in
if pe_signature = pe_magic
then (ar.asm_seek 0; Some ar)
else None
else
None
with
_ -> None
;;
let get_sections
(sess:Session.sess)
(ar:asm_reader)
: (string,(int*int)) Hashtbl.t =
let _ = log sess "reading sections" in
(* PE header offset is at 0x3c in the MS-DOS compatibility header. *)
let _ = ar.asm_seek 0x3c in
let pe_hdr_off = ar.asm_get_u32() in
let _ = log sess "PE header offset: 0x%x" pe_hdr_off in
let _ = ar.asm_seek pe_hdr_off in
let pe_signature = ar.asm_get_zstr_padded 4 in
let _ = log sess " PE signature: '%s'" pe_signature in
let _ = assert (pe_signature = pe_magic) in
let _ = ar.asm_adv_u16() in (* machine type *)
let num_sections = ar.asm_get_u16() in
let _ = log sess " num sections: %d" num_sections in
let _ = ar.asm_adv_u32() in (* timestamp *)
let symtab_off = ar.asm_get_u32() in
let _ = log sess " symtab offset: 0x%x" symtab_off in
let num_symbols = ar.asm_get_u32() in
let _ = log sess " num symbols: %d" num_symbols in
let loader_hdr_size = ar.asm_get_u16() in
let _ = log sess "loader header sz: %d" loader_hdr_size in
let _ = ar.asm_adv_u16() in (* flags *)
let sections_off = (ar.asm_get_off()) + loader_hdr_size in
let sects = Hashtbl.create 0 in
let _ =
ar.asm_seek sections_off;
for i = 0 to (num_sections - 1) do
(*
* Section-name encoding is crazy. ASCII-encoding offsets of
* long names. See pe_section_header for details.
*)
let sect_name =
let sect_name = ar.asm_get_zstr_padded 8 in
assert ((String.length sect_name) > 0);
if sect_name.[0] = '/'
then
let off_str =
String.sub sect_name 1 ((String.length sect_name) - 1)
in
let i = int_of_string off_str in
let curr = ar.asm_get_off() in
ar.asm_seek (symtab_off + i);
let ext_name = ar.asm_get_zstr() in
ar.asm_seek curr;
ext_name
else
sect_name
in
let _ = ar.asm_adv_u32() in (* virtual size *)
let _ = ar.asm_adv_u32() in (* virtual address *)
let file_sz = ar.asm_get_u32() in
let file_off = ar.asm_get_u32() in
let _ = ar.asm_adv_u32() in (* reserved *)
let _ = ar.asm_adv_u32() in (* reserved *)
let _ = ar.asm_adv_u32() in (* reserved *)
let _ = ar.asm_adv_u32() in (* flags *)
Hashtbl.add sects sect_name (file_off, file_sz);
log sess " section %d: %s, size %d, offset 0x%x"
i sect_name file_sz file_off;
done
in
sects
;;
(*
* Local Variables:
* fill-column: 78;
* indent-tabs-mode: nil
* buffer-file-coding-system: utf-8-unix
* compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
* End:
*)
|