diff options
| author | Roland Paterson-Jones <[email protected]> | 2024-10-23 14:51:53 +0200 |
|---|---|---|
| committer | Quentin Carbonneaux <[email protected]> | 2026-01-13 18:11:30 +0100 |
| commit | 5c1eb24e2c312021c7af4316e5adde53e270311a (patch) | |
| tree | e661a5d0c58d58c0f7fb8fc84dba575a800cc646 /amd64 | |
| parent | 72010791374d3be2ab21ee5ca1146fce2382d88b (diff) | |
If-conversion RFC 4 - x86 only (for now), use cmovXX
Replacement of tiny conditional jump graphlets with
conditional move instructions.
Currently enabled only for x86. Arm64 support using cselXX
will be essentially identical.
Adds (internal) frontend sel0/sel1 ops with flag-specific
backend xselXX following jnz implementation pattern.
Testing: standard QBE, cproc, harec, hare, roland
Diffstat (limited to 'amd64')
| -rw-r--r-- | amd64/emit.c | 37 | ||||
| -rw-r--r-- | amd64/isel.c | 106 | ||||
| -rw-r--r-- | amd64/targ.c | 1 |
3 files changed, 142 insertions, 2 deletions
diff --git a/amd64/emit.c b/amd64/emit.c index 6cf37ec..7290a80 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -576,6 +576,43 @@ emitins(Ins i, E *e) case Odbgloc: emitdbgloc(i.arg[0].val, i.arg[1].val, e->f); break; + case Oxselieq: + case Oxseline: + case Oxselisge: + case Oxselisgt: + case Oxselisle: + case Oxselislt: + case Oxseliuge: + case Oxseliugt: + case Oxseliule: + case Oxseliult: + case Oxselfeq: + case Oxselfge: + case Oxselfgt: + case Oxselfle: + case Oxselflt: + case Oxselfne: + case Oxselfo: + case Oxselfuo: + { + // TODO - how to do this "properly"? + static char *F0[] = { + "z", "nz", "ge", "g", "le", "l", "ae", "a", "be", "b", + "nz", "ae", "a", "be", "b", "nz", "p", "np" + }; + static char *F1[] = { + "nz", "z", "l", "le", "g", "ge", "b", "be", "a", "ae", + "z", "b", "be", "a", "ae", "z", "p", "np" + }; + char ins[16]; + sprintf(ins, "cmov%s %%1, %%=", F1[i.op-Oxselieq]); + if (req(i.to, i.arg[1])) + sprintf(ins, "cmov%s %%0, %%=", F0[i.op-Oxselieq]); + else if (!req(i.to, i.arg[0])) + emitf("mov %0, %=", &i, e); + emitf(ins, &i, e); + break; + } } } diff --git a/amd64/isel.c b/amd64/isel.c index 4aff0d6..c54b580 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -163,6 +163,10 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn) m->base = r0; } } + else if (isxsel(op) && rtype(*r) == RCon) { + r1 = newtmp("isel", i->cls, fn); + emit(Ocopy, i->cls, r1, *r, R); + } *r = r1; } @@ -425,6 +429,24 @@ sel(Ins i, Num *tn, Fn *fn) case Oexts: case Otruncd: case Ocast: + case Oxselieq: + case Oxseline: + case Oxselisge: + case Oxselisgt: + case Oxselisle: + case Oxselislt: + case Oxseliuge: + case Oxseliugt: + case Oxseliule: + case Oxseliult: + case Oxselfeq: + case Oxselfge: + case Oxselfgt: + case Oxselfle: + case Oxselflt: + case Oxselfne: + case Oxselfo: + case Oxselfuo: case_OExt: Emit: emiti(i); @@ -493,6 +515,80 @@ flagi(Ins *i0, Ins *i) return 0; } +static Ins* +selsel(Fn *fn, Blk *b, Ins *i, Num *tn) +{ + Ref r, cr0, cr1; + int c, k, swap, gencmp, gencpy; + Ins *isel0, *isel1, *fi; + Tmp *t; + + assert(i->op == Osel1); + for (isel0 = i; b->ins < isel0; isel0--) { + if (isel0->op == Osel0) + break; + assert(isel0->op == Osel1); + } + assert(isel0->op == Osel0); + r = isel0->arg[0]; + assert(rtype(r) == RTmp); + t = &fn->tmp[r.val]; + fi = flagi(b->ins, isel0); + cr0 = cr1 = R; + gencmp = gencpy = swap = 0; + k = Kw; + c = Cine; + if (!fi || !req(fi->to, r)) { + gencmp = 1; + cr0 = r; + cr1 = CON_Z; + } else if (iscmp(fi->op, &k, &c) + && c != NCmpI+Cfeq /* see sel() */ + && c != NCmpI+Cfne) { + swap = cmpswap(fi->arg, c); + if (swap) + c = cmpop(c); + if (t->nuse == 1) { + gencmp = 1; + cr0 = fi->arg[0]; + cr1 = fi->arg[1]; + *fi = (Ins){.op = Onop}; + } + } else if (fi->op == Oand && t->nuse == 1 + && (rtype(fi->arg[0]) == RTmp || + rtype(fi->arg[1]) == RTmp)) { + fi->op = Oxtest; + fi->to = R; + if (rtype(fi->arg[1]) == RCon) { + r = fi->arg[1]; + fi->arg[1] = fi->arg[0]; + fi->arg[0] = r; + } + } else { + /* since flags are not tracked in liveness, + * the result of the flag-setting instruction + * has to be marked as live + */ + if (t->nuse == 1) + gencpy = 1; + } + /* generate conditional moves */ + for (isel1 = i; isel0 < isel1; --isel1) { + isel1->op = Oxselieq+c; + sel(*isel1, tn, fn); + } + if (gencmp) { + assert(!gencpy); + selcmp((Ref[2]){cr0, cr1}, k, swap, fn); + } + if (gencpy) { + assert(!gencmp); + emit(Ocopy, Kw, R, r, R); + } + *isel0 = (Ins){.op = Onop}; + return isel0; +} + static void seljmp(Blk *b, Fn *fn) { @@ -826,8 +922,14 @@ amd64_isel(Fn *fn) memset(num, 0, n * sizeof num[0]); anumber(num, b, fn->con); seljmp(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) - sel(*--i, num, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) { + --i; + assert(i->op != Osel0); + if (i->op == Osel1) + i = selsel(fn, b, i, num); + else + sel(*i, num, fn); + } idup(b, curi, &insb[NIns]-curi); } free(num); diff --git a/amd64/targ.c b/amd64/targ.c index fba9144..a7e4552 100644 --- a/amd64/targ.c +++ b/amd64/targ.c @@ -28,6 +28,7 @@ amd64_memargs(int op) .abi1 = amd64_sysv_abi, \ .isel = amd64_isel, \ .emitfn = amd64_emitfn, \ + .cansel = 1, \ Target T_amd64_sysv = { .name = "amd64_sysv", |
