1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
|
#include "all.h"
#include <stdbool.h>
typedef enum ArgPassStyle {
APS_Invalid = 0,
APS_Register,
APS_InlineOnStack,
APS_CopyAndPointerInRegister,
APS_CopyAndPointerOnStack,
APS_VarargsTag,
APS_EnvTag,
} ArgPassStyle;
typedef struct ArgClass {
Typ* type;
ArgPassStyle style;
int align;
uint size;
int cls;
Ref ref;
} ArgClass;
typedef struct ExtraAlloc ExtraAlloc;
struct ExtraAlloc {
Ins instr;
ExtraAlloc* link;
};
#define ALIGN_DOWN(n, a) ((n) & ~((a)-1))
#define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a)-1, (a))
// Number of stack bytes required be reserved for the callee.
#define SHADOW_SPACE_SIZE 32
int amd64_winabi_rsave[] = {RCX, RDX, R8, R9, R10, R11, RAX, XMM0,
XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8,
XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1};
int amd64_winabi_rclob[] = {RBX, R12, R13, R14, R15, RSI, RDI, -1};
MAKESURE(winabi_arrays_ok,
sizeof amd64_winabi_rsave == (NGPS_WIN + NFPS + 1) * sizeof(int) &&
sizeof amd64_winabi_rclob == (NCLR_WIN + 1) * sizeof(int));
// layout of call's second argument (RCall)
//
// bit 0: rax returned
// bit 1: xmm0 returned
// bits 23: 0
// bits 4567: rcx, rdx, r8, r9 passed
// bits 89ab: xmm0,1,2,3 passed
// bit c: env call (rax passed)
// bits d..1f: 0
bits amd64_winabi_retregs(Ref r, int p[2]) {
assert(rtype(r) == RCall);
bits b = 0;
int num_int_returns = r.val & 1;
int num_float_returns = r.val & 2;
if (num_int_returns == 1) {
b |= BIT(RAX);
} else {
b |= BIT(XMM0);
}
if (p) {
p[0] = num_int_returns;
p[1] = num_float_returns;
}
return b;
}
static uint popcnt(bits b) {
b = (b & 0x5555555555555555) + ((b >> 1) & 0x5555555555555555);
b = (b & 0x3333333333333333) + ((b >> 2) & 0x3333333333333333);
b = (b & 0x0f0f0f0f0f0f0f0f) + ((b >> 4) & 0x0f0f0f0f0f0f0f0f);
b += (b >> 8);
b += (b >> 16);
b += (b >> 32);
return b & 0xff;
}
bits amd64_winabi_argregs(Ref r, int p[2]) {
assert(rtype(r) == RCall);
// On SysV, these are counts. Here, a count isn't sufficient, we actually need
// to know which ones are in use because they're not necessarily contiguous.
int int_passed = (r.val >> 4) & 15;
int float_passed = (r.val >> 8) & 15;
bool env_param = (r.val >> 12) & 1;
bits b = 0;
b |= (int_passed & 1) ? BIT(RCX) : 0;
b |= (int_passed & 2) ? BIT(RDX) : 0;
b |= (int_passed & 4) ? BIT(R8) : 0;
b |= (int_passed & 8) ? BIT(R9) : 0;
b |= (float_passed & 1) ? BIT(XMM0) : 0;
b |= (float_passed & 2) ? BIT(XMM1) : 0;
b |= (float_passed & 4) ? BIT(XMM2) : 0;
b |= (float_passed & 8) ? BIT(XMM3) : 0;
b |= env_param ? BIT(RAX) : 0;
if (p) {
// TODO: The only place this is used is live.c. I'm not sure what should be
// returned here wrt to using the same counter for int/float regs on win.
// For now, try the number of registers in use even though they're not
// contiguous.
p[0] = popcnt(int_passed);
p[1] = popcnt(float_passed);
}
return b;
}
typedef struct RegisterUsage {
// Counter for both int/float as they're counted together. Only if the bool's
// set in regs_passed is the given register *actually* needed for a value
// (i.e. needs to be saved, etc.).
int num_regs_passed;
// Indexed first by 0=int, 1=float, use KBASE(cls).
// Indexed second by register index in calling convention, so for integer,
// 0=RCX, 1=RDX, 2=R8, 3=R9, and for float XMM0, XMM1, XMM2, XMM3.
bool regs_passed[2][4];
bool rax_returned;
bool xmm0_returned;
// This is also used as where the va_start will start for varargs functions
// (there's no 'Oparv', so we need to keep track of a count here.)
int num_named_args_passed;
// This is set when classifying the arguments for a call (but not when
// classifying the parameters of a function definition).
bool is_varargs_call;
bool has_env;
} RegisterUsage;
static int register_usage_to_call_arg_value(RegisterUsage reg_usage) {
return (reg_usage.rax_returned << 0) | //
(reg_usage.xmm0_returned << 1) | //
(reg_usage.regs_passed[0][0] << 4) | //
(reg_usage.regs_passed[0][1] << 5) | //
(reg_usage.regs_passed[0][2] << 6) | //
(reg_usage.regs_passed[0][3] << 7) | //
(reg_usage.regs_passed[1][0] << 8) | //
(reg_usage.regs_passed[1][1] << 9) | //
(reg_usage.regs_passed[1][2] << 10) | //
(reg_usage.regs_passed[1][3] << 11) | //
(reg_usage.has_env << 12);
}
// Assigns the argument to a register if there's any left according to the
// calling convention, and updates the regs_passed bools. Otherwise marks the
// value as needing stack space to be passed.
static void assign_register_or_stack(RegisterUsage* reg_usage,
ArgClass* arg,
bool is_float,
bool by_copy) {
if (reg_usage->num_regs_passed == 4) {
arg->style = by_copy ? APS_CopyAndPointerOnStack : APS_InlineOnStack;
} else {
reg_usage->regs_passed[is_float][reg_usage->num_regs_passed] = true;
++reg_usage->num_regs_passed;
arg->style = by_copy ? APS_CopyAndPointerInRegister : APS_Register;
}
++reg_usage->num_named_args_passed;
}
static bool type_is_by_copy(Typ* type) {
// Note that only these sizes are passed by register, even though e.g. a
// 5 byte struct would "fit", it still is passed by copy-and-pointer.
return type->isdark || (type->size != 1 && type->size != 2 &&
type->size != 4 && type->size != 8);
}
// This function is used for both arguments and parameters.
// begin_instr should either point at the first Oarg or Opar, and end_instr
// should point past the last one (so to the Ocall for arguments, or to the
// first 'real' instruction of the function for parameters).
static void classify_arguments(RegisterUsage* reg_usage,
Ins* begin_instr,
Ins* end_instr,
ArgClass* arg_classes,
Ref* env) {
ArgClass* arg = arg_classes;
// For each argument, determine how it will be passed (int, float, stack)
// and update the `reg_usage` counts. Additionally, fill out arg_classes for
// each argument.
for (Ins* instr = begin_instr; instr < end_instr; ++instr, ++arg) {
switch (instr->op) {
case Oarg:
case Opar:
assign_register_or_stack(reg_usage, arg, KBASE(instr->cls),
/*by_copy=*/false);
arg->cls = instr->cls;
arg->align = 3;
arg->size = 8;
break;
case Oargc:
case Oparc: {
int typ_index = instr->arg[0].val;
Typ* type = &typ[typ_index];
bool by_copy = type_is_by_copy(type);
assign_register_or_stack(reg_usage, arg, /*is_float=*/false, by_copy);
arg->cls = Kl;
if (!by_copy && type->size <= 4) {
arg->cls = Kw;
}
arg->align = 3;
arg->size = type->size;
break;
}
case Oarge:
*env = instr->arg[0];
arg->style = APS_EnvTag;
reg_usage->has_env = true;
break;
case Opare:
*env = instr->to;
arg->style = APS_EnvTag;
reg_usage->has_env = true;
break;
case Oargv:
reg_usage->is_varargs_call = true;
arg->style = APS_VarargsTag;
break;
}
}
if (reg_usage->has_env && reg_usage->is_varargs_call) {
die("can't use env with varargs");
}
// During a varargs call, float arguments have to be duplicated to their
// associated integer register, so mark them as in-use too.
if (reg_usage->is_varargs_call) {
for (int i = 0; i < 4; ++i) {
if (reg_usage->regs_passed[/*float*/ 1][i]) {
reg_usage->regs_passed[/*int*/ 0][i] = true;
}
}
}
}
static bool is_integer_type(int ty) {
assert(ty >= 0 && ty < 4 && "expecting Kw Kl Ks Kd");
return KBASE(ty) == 0;
}
static Ref register_for_arg(int cls, int counter) {
assert(counter < 4);
if (is_integer_type(cls)) {
return TMP(amd64_winabi_rsave[counter]);
} else {
return TMP(XMM0 + counter);
}
}
static Ins* lower_call(Fn* func,
Blk* block,
Ins* call_instr,
ExtraAlloc** pextra_alloc) {
// Call arguments are instructions. Walk through them to find the end of the
// call+args that we need to process (and return the instruction past the body
// of the instruction for continuing processing).
Ins* instr_past_args = call_instr - 1;
for (; instr_past_args >= block->ins; --instr_past_args) {
if (!isarg(instr_past_args->op)) {
break;
}
}
Ins* earliest_arg_instr = instr_past_args + 1;
// Don't need an ArgClass for the call itself, so one less than the total
// number of instructions we're dealing with.
uint num_args = call_instr - earliest_arg_instr;
ArgClass* arg_classes = alloc(num_args * sizeof(ArgClass));
RegisterUsage reg_usage = {0};
ArgClass ret_arg_class = {0};
// Ocall's two arguments are the the function to be called in 0, and, if the
// the function returns a non-basic type, then arg[1] is a reference to the
// type of the return. req checks if Refs are equal; `R` is 0.
bool il_has_struct_return = !req(call_instr->arg[1], R);
bool is_struct_return = false;
if (il_has_struct_return) {
Typ* ret_type = &typ[call_instr->arg[1].val];
is_struct_return = type_is_by_copy(ret_type);
if (is_struct_return) {
assign_register_or_stack(®_usage, &ret_arg_class, /*is_float=*/false,
/*by_copy=*/true);
}
ret_arg_class.size = ret_type->size;
}
Ref env = R;
classify_arguments(®_usage, earliest_arg_instr, call_instr, arg_classes,
&env);
// We now know which arguments are on the stack and which are in registers, so
// we can allocate the correct amount of space to stash the stack-located ones
// into.
uint stack_usage = 0;
for (uint i = 0; i < num_args; ++i) {
ArgClass* arg = &arg_classes[i];
// stack_usage only accounts for pushes that are for values that don't have
// enough registers. Large struct copies are alloca'd separately, and then
// only have (potentially) 8 bytes to add to stack_usage here.
if (arg->style == APS_InlineOnStack) {
if (arg->align > 4) {
err("win abi cannot pass alignments > 16");
}
stack_usage += arg->size;
} else if (arg->style == APS_CopyAndPointerOnStack) {
stack_usage += 8;
}
}
stack_usage = ALIGN_UP(stack_usage, 16);
// Note that here we're logically 'after' the call (due to emitting
// instructions in reverse order), so we're doing a negative stack
// allocation to clean up after the call.
Ref stack_size_ref =
getcon(-(int64_t)(stack_usage + SHADOW_SPACE_SIZE), func);
emit(Osalloc, Kl, R, stack_size_ref, R);
ExtraAlloc* return_pad = NULL;
if (is_struct_return) {
return_pad = alloc(sizeof(ExtraAlloc));
Ref ret_pad_ref = newtmp("abi.ret_pad", Kl, func);
return_pad->instr =
(Ins){Oalloc8, Kl, ret_pad_ref, {getcon(ret_arg_class.size, func)}};
return_pad->link = (*pextra_alloc);
*pextra_alloc = return_pad;
reg_usage.rax_returned = true;
emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
} else {
if (il_has_struct_return) {
// In the case that at the IL level, a struct return was specified, but as
// far as the calling convention is concerned it's not actually by
// pointer, we need to store the return value into an alloca because
// subsequent IL will still be treating the function return as a pointer.
ExtraAlloc* return_copy = alloc(sizeof(ExtraAlloc));
return_copy->instr =
(Ins){Oalloc8, Kl, call_instr->to, {getcon(8, func)}};
return_copy->link = (*pextra_alloc);
*pextra_alloc = return_copy;
Ref copy = newtmp("abi.copy", Kl, func);
emit(Ostorel, Kl, R, copy, call_instr->to);
emit(Ocopy, Kl, copy, TMP(RAX), R);
reg_usage.rax_returned = true;
} else if (is_integer_type(call_instr->cls)) {
// Only a basic type returned from the call, integer.
emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
reg_usage.rax_returned = true;
} else {
// Basic type, floating point.
emit(Ocopy, call_instr->cls, call_instr->to, TMP(XMM0), R);
reg_usage.xmm0_returned = true;
}
}
// Emit the actual call instruction. There's no 'to' value by this point
// because we've lowered it into register manipulation (that's the `R`),
// arg[0] of the call is the function, and arg[1] is register usage is
// documented as above (copied from SysV).
emit(Ocall, call_instr->cls, R, call_instr->arg[0],
CALL(register_usage_to_call_arg_value(reg_usage)));
if (!req(R, env)) {
// If there's an env arg to be passed, it gets stashed in RAX.
emit(Ocopy, Kl, TMP(RAX), env, R);
}
if (reg_usage.is_varargs_call) {
// Any float arguments need to be duplicated to integer registers. This is
// required by the calling convention so that dumping to shadow space can be
// done without a prototype and for varargs.
#define DUP_IF_USED(index, floatreg, intreg) \
if (reg_usage.regs_passed[/*float*/ 1][index]) { \
emit(Ocast, Kl, TMP(intreg), TMP(floatreg), R); \
}
DUP_IF_USED(0, XMM0, RCX);
DUP_IF_USED(1, XMM1, RDX);
DUP_IF_USED(2, XMM2, R8);
DUP_IF_USED(3, XMM3, R9);
#undef DUP_IF_USED
}
int reg_counter = 0;
if (is_struct_return) {
Ref first_reg = register_for_arg(Kl, reg_counter++);
emit(Ocopy, Kl, first_reg, return_pad->instr.to, R);
}
// This is where we actually do the load of values into registers or into
// stack slots.
Ref arg_stack_slots = newtmp("abi.args", Kl, func);
uint slot_offset = SHADOW_SPACE_SIZE;
ArgClass* arg = arg_classes;
for (Ins* instr = earliest_arg_instr; instr != call_instr; ++instr, ++arg) {
switch (arg->style) {
case APS_Register: {
Ref into = register_for_arg(arg->cls, reg_counter++);
if (instr->op == Oargc) {
// If this is a small struct being passed by value. The value in the
// instruction in this case is a pointer, but it needs to be loaded
// into the register.
emit(Oload, arg->cls, into, instr->arg[1], R);
} else {
// Otherwise, a normal value passed in a register.
emit(Ocopy, instr->cls, into, instr->arg[0], R);
}
break;
}
case APS_InlineOnStack: {
Ref slot = newtmp("abi.off", Kl, func);
if (instr->op == Oargc) {
// This is a small struct, so it's not passed by copy, but the
// instruction is a pointer. So we need to copy it into the stack
// slot. (And, remember that these are emitted backwards, so store,
// then load.)
Ref smalltmp = newtmp("abi.smalltmp", arg->cls, func);
emit(Ostorel, Kl, R, smalltmp, slot);
emit(Oload, arg->cls, smalltmp, instr->arg[1], R);
} else {
// Stash the value into the stack slot.
emit(Ostorel, Kl, R, instr->arg[0], slot);
}
emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
slot_offset += arg->size;
break;
}
case APS_CopyAndPointerInRegister:
case APS_CopyAndPointerOnStack: {
// Alloca a space to copy into, and blit the value from the instr to the
// copied location.
ExtraAlloc* arg_copy = alloc(sizeof(ExtraAlloc));
Ref copy_ref = newtmp("abi.copy", Kl, func);
arg_copy->instr =
(Ins){Oalloc8, Kl, copy_ref, {getcon(arg->size, func)}};
arg_copy->link = (*pextra_alloc);
*pextra_alloc = arg_copy;
emit(Oblit1, 0, R, INT(arg->size), R);
emit(Oblit0, 0, R, instr->arg[1], copy_ref);
// Now load the pointer into the correct register or stack slot.
if (arg->style == APS_CopyAndPointerInRegister) {
Ref into = register_for_arg(arg->cls, reg_counter++);
emit(Ocopy, Kl, into, copy_ref, R);
} else {
assert(arg->style == APS_CopyAndPointerOnStack);
Ref slot = newtmp("abi.off", Kl, func);
emit(Ostorel, Kl, R, copy_ref, slot);
emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
slot_offset += 8;
}
break;
}
case APS_EnvTag:
case APS_VarargsTag:
// Nothing to do here, see right before the call for reg dupe.
break;
case APS_Invalid:
die("unreachable");
}
}
if (stack_usage) {
// The last (first in call order) thing we do is allocate the the stack
// space we're going to fill with temporaries.
emit(Osalloc, Kl, arg_stack_slots,
getcon(stack_usage + SHADOW_SPACE_SIZE, func), R);
} else {
// When there's no usage for temporaries, we can add this into the other
// alloca, but otherwise emit it separately (not storing into a reference)
// so that it doesn't get removed later for being useless.
emit(Osalloc, Kl, R, getcon(SHADOW_SPACE_SIZE, func), R);
}
return instr_past_args;
}
static void lower_block_return(Fn* func, Blk* block) {
int jmp_type = block->jmp.type;
if (!isret(jmp_type) || jmp_type == Jret0) {
return;
}
// Save the argument, and set the block to be a void return because once it's
// lowered it's handled by the the register/stack manipulation.
Ref ret_arg = block->jmp.arg;
block->jmp.type = Jret0;
RegisterUsage reg_usage = {0};
if (jmp_type == Jretc) {
Typ* type = &typ[func->retty];
if (type_is_by_copy(type)) {
assert(rtype(func->retr) == RTmp);
emit(Ocopy, Kl, TMP(RAX), func->retr, R);
emit(Oblit1, 0, R, INT(type->size), R);
emit(Oblit0, 0, R, ret_arg, func->retr);
} else {
emit(Oload, Kl, TMP(RAX), ret_arg, R);
}
reg_usage.rax_returned = true;
} else {
int k = jmp_type - Jretw;
if (is_integer_type(k)) {
emit(Ocopy, k, TMP(RAX), ret_arg, R);
reg_usage.rax_returned = true;
} else {
emit(Ocopy, k, TMP(XMM0), ret_arg, R);
reg_usage.xmm0_returned = true;
}
}
block->jmp.arg = CALL(register_usage_to_call_arg_value(reg_usage));
}
static void lower_vastart(Fn* func,
RegisterUsage* param_reg_usage,
Ref valist) {
assert(func->vararg);
// In varargs functions:
// 1. the int registers are already dumped to the shadow stack space;
// 2. any parameters passed in floating point registers have
// been duplicated to the integer registers
// 3. we ensure (later) that for varargs functions we're always using an rbp
// frame pointer.
// So, the ... argument is just indexed past rbp by the number of named values
// that were actually passed.
Ref offset = newtmp("abi.vastart", Kl, func);
emit(Ostorel, Kl, R, offset, valist);
// *8 for sizeof(u64), +16 because the return address and rbp have been pushed
// by the time we get to the body of the function.
emit(Oadd, Kl, offset, TMP(RBP),
getcon(param_reg_usage->num_named_args_passed * 8 + 16, func));
}
static void lower_vaarg(Fn* func, Ins* vaarg_instr) {
// va_list is just a void** on winx64, so load the pointer, then load the
// argument from that pointer, then increment the pointer to the next arg.
// (All emitted backwards as usual.)
Ref inc = newtmp("abi.vaarg.inc", Kl, func);
Ref ptr = newtmp("abi.vaarg.ptr", Kl, func);
emit(Ostorel, Kl, R, inc, vaarg_instr->arg[0]);
emit(Oadd, Kl, inc, ptr, getcon(8, func));
emit(Oload, vaarg_instr->cls, vaarg_instr->to, ptr, R);
emit(Oload, Kl, ptr, vaarg_instr->arg[0], R);
}
static void lower_args_for_block(Fn* func,
Blk* block,
RegisterUsage* param_reg_usage,
ExtraAlloc** pextra_alloc) {
// global temporary buffer used by emit. Reset to the end, and predecremented
// when adding to it.
curi = &insb[NIns];
lower_block_return(func, block);
if (block->nins) {
// Work backwards through the instructions, either copying them unchanged,
// or modifying as necessary.
for (Ins* instr = &block->ins[block->nins - 1]; instr >= block->ins;) {
switch (instr->op) {
case Ocall:
instr = lower_call(func, block, instr, pextra_alloc);
break;
case Ovastart:
lower_vastart(func, param_reg_usage, instr->arg[0]);
--instr;
break;
case Ovaarg:
lower_vaarg(func, instr);
--instr;
break;
case Oarg:
case Oargc:
die("unreachable");
default:
emiti(*instr);
--instr;
break;
}
}
}
// This it the start block, which is processed last. Add any allocas that
// other blocks needed.
bool is_start_block = block == func->start;
if (is_start_block) {
for (ExtraAlloc* ea = *pextra_alloc; ea; ea = ea->link) {
emiti(ea->instr);
}
}
// emit/emiti add instructions from the end to the beginning of the temporary
// global buffer. dup the final version into the final block storage.
block->nins = &insb[NIns] - curi;
idup(block, curi, block->nins);
}
static Ins* find_end_of_func_parameters(Blk* start_block) {
Ins* i;
for (i = start_block->ins; i < &start_block->ins[start_block->nins]; ++i) {
if (!ispar(i->op)) {
break;
}
}
return i;
}
// Copy from registers/stack into values.
static RegisterUsage lower_func_parameters(Fn* func) {
// This is half-open, so end points after the last Opar.
Blk* start_block = func->start;
Ins* start_of_params = start_block->ins;
Ins* end_of_params = find_end_of_func_parameters(start_block);
size_t num_params = end_of_params - start_of_params;
ArgClass* arg_classes = alloc(num_params * sizeof(ArgClass));
ArgClass arg_ret = {0};
// global temporary buffer used by emit. Reset to the end, and predecremented
// when adding to it.
curi = &insb[NIns];
RegisterUsage reg_usage = {0};
if (func->retty >= 0) {
bool by_copy = type_is_by_copy(&typ[func->retty]);
if (by_copy) {
assign_register_or_stack(®_usage, &arg_ret, /*is_float=*/false,
by_copy);
Ref ret_ref = newtmp("abi.ret", Kl, func);
emit(Ocopy, Kl, ret_ref, TMP(RCX), R);
func->retr = ret_ref;
}
}
Ref env = R;
classify_arguments(®_usage, start_of_params, end_of_params, arg_classes,
&env);
func->reg = amd64_winabi_argregs(
CALL(register_usage_to_call_arg_value(reg_usage)), NULL);
// Copy from the registers or stack slots into the named parameters. Depending
// on how they're passed, they either need to be copied or loaded.
ArgClass* arg = arg_classes;
int reg_counter = 0;
uint slot_offset = SHADOW_SPACE_SIZE / 4 + 4;
for (Ins* instr = start_of_params; instr < end_of_params; ++instr, ++arg) {
switch (arg->style) {
case APS_Register: {
Ref from = register_for_arg(arg->cls, reg_counter++);
// If it's a struct at the IL level, we need to copy the register into
// an alloca so we have something to point at (same for InlineOnStack).
if (instr->op == Oparc) {
arg->ref = newtmp("abi", Kl, func);
emit(Ostorel, Kl, R, arg->ref, instr->to);
emit(Ocopy, instr->cls, arg->ref, from, R);
emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
} else {
emit(Ocopy, instr->cls, instr->to, from, R);
}
break;
}
case APS_InlineOnStack:
if (instr->op == Oparc) {
arg->ref = newtmp("abi", Kl, func);
emit(Ostorel, Kl, R, arg->ref, instr->to);
emit(Ocopy, instr->cls, arg->ref, SLOT(-slot_offset), R);
emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
} else {
emit(Ocopy, Kl, instr->to, SLOT(-slot_offset), R);
}
slot_offset += 2;
break;
case APS_CopyAndPointerOnStack:
emit(Oload, Kl, instr->to, SLOT(-slot_offset), R);
slot_offset += 2;
break;
case APS_CopyAndPointerInRegister: {
// Because this has to be a copy (that we own), it is sufficient to just
// copy the register to the target.
Ref from = register_for_arg(Kl, reg_counter++);
emit(Ocopy, Kl, instr->to, from, R);
break;
}
case APS_EnvTag:
break;
case APS_VarargsTag:
case APS_Invalid:
die("unreachable");
}
}
// If there was an `env`, it was passed in RAX, so copy it into the env ref.
if (!req(R, env)) {
emit(Ocopy, Kl, env, TMP(RAX), R);
}
int num_created_instrs = &insb[NIns] - curi;
int num_other_after_instrs = (int)(start_block->nins - num_params);
int new_total_instrs = num_other_after_instrs + num_created_instrs;
Ins* new_instrs = vnew(new_total_instrs, sizeof(Ins), PFn);
Ins* instr_p = icpy(new_instrs, curi, num_created_instrs);
icpy(instr_p, end_of_params, num_other_after_instrs);
start_block->nins = new_total_instrs;
start_block->ins = new_instrs;
return reg_usage;
}
// The main job of this function is to lower generic instructions into the
// specific details of how arguments are passed, and parameters are
// interpreted for win x64. A useful reference is
// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention .
//
// Some of the major differences from SysV if you're comparing the code
// (non-exhaustive):
// - only 4 int and 4 float regs are used
// - when an int register is assigned a value, its associated float register is
// left unused (and vice versa). i.e. there's only one counter as you assign
// arguments to registers.
// - any structs that aren't 1/2/4/8 bytes in size are passed by pointer, not
// by copying them into the stack. So e.g. if you pass something like
// `struct { void*, int64_t }` by value, it first needs to be copied to
// another alloca (in order to maintain value semantics at the language
// level), then the pointer to that copy is treated as a regular integer
// argument (which then itself may *also* be copied to the stack in the case
// there's no integer register remaining.)
// - when calling a varargs functions, floating point values must be duplicated
// integer registers. Along with the above restrictions, this makes varargs
// handling simpler for the callee than SysV.
void amd64_winabi_abi(Fn* func) {
// The first thing to do is lower incoming parameters to this function.
RegisterUsage param_reg_usage = lower_func_parameters(func);
// This is the second larger part of the job. We walk all blocks, and rewrite
// instructions returns, calls, and handling of varargs into their win x64
// specific versions. Any other instructions are just passed through unchanged
// by using `emiti`.
// Skip over the entry block, and do it at the end so that our later
// modifications can add allocations to the start block. In particular, we
// need to add stack allocas for copies when structs are passed or returned by
// value.
ExtraAlloc* extra_alloc = NULL;
for (Blk* block = func->start->link; block; block = block->link) {
lower_args_for_block(func, block, ¶m_reg_usage, &extra_alloc);
}
lower_args_for_block(func, func->start, ¶m_reg_usage, &extra_alloc);
if (debug['A']) {
fprintf(stderr, "\n> After ABI lowering:\n");
printfn(func, stderr);
}
}
|