diff options
| author | Roland Paterson-Jones <[email protected]> | 2024-10-23 14:51:53 +0200 |
|---|---|---|
| committer | Quentin Carbonneaux <[email protected]> | 2026-01-13 18:11:30 +0100 |
| commit | 5c1eb24e2c312021c7af4316e5adde53e270311a (patch) | |
| tree | e661a5d0c58d58c0f7fb8fc84dba575a800cc646 | |
| parent | 72010791374d3be2ab21ee5ca1146fce2382d88b (diff) | |
If-conversion RFC 4 - x86 only (for now), use cmovXX
Replacement of tiny conditional jump graphlets with
conditional move instructions.
Currently enabled only for x86. Arm64 support using cselXX
will be essentially identical.
Adds (internal) frontend sel0/sel1 ops with flag-specific
backend xselXX following jnz implementation pattern.
Testing: standard QBE, cproc, harec, hare, roland
| -rw-r--r-- | Makefile | 3 | ||||
| -rw-r--r-- | all.h | 10 | ||||
| -rw-r--r-- | amd64/emit.c | 37 | ||||
| -rw-r--r-- | amd64/isel.c | 106 | ||||
| -rw-r--r-- | amd64/targ.c | 1 | ||||
| -rw-r--r-- | arm64/targ.c | 1 | ||||
| -rw-r--r-- | cfg.c | 45 | ||||
| -rw-r--r-- | gvn.c | 4 | ||||
| -rw-r--r-- | ifopt.c | 121 | ||||
| -rw-r--r-- | main.c | 8 | ||||
| -rw-r--r-- | ops.h | 22 | ||||
| -rw-r--r-- | rv64/targ.c | 1 | ||||
| -rw-r--r-- | test/ifc.ssa | 238 | ||||
| -rw-r--r-- | util.c | 13 |
14 files changed, 605 insertions, 5 deletions
@@ -5,7 +5,8 @@ PREFIX = /usr/local BINDIR = $(PREFIX)/bin COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \ - copy.o fold.o gvn.o gcm.o simpl.o live.o spill.o rega.o emit.o + copy.o fold.o gvn.o gcm.o simpl.o ifopt.o live.o spill.o rega.o \ + emit.o AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o @@ -62,6 +62,7 @@ struct Target { void (*emitfin)(FILE *); char asloc[4]; char assym[4]; + uint cansel:1; }; #define BIT(n) ((bits)1 << (n)) @@ -183,6 +184,8 @@ enum { Oalloc1 = Oalloc16, Oflag = Oflagieq, Oflag1 = Oflagfuo, + Oxsel = Oxselieq, + Oxsel1 = Oxselfuo, NPubOp = Onop, Jjf = Jjfieq, Jjf1 = Jjffuo, @@ -199,6 +202,7 @@ enum { #define isparbh(o) INRANGE(o, Oparsb, Oparuh) #define isargbh(o) INRANGE(o, Oargsb, Oarguh) #define isretbh(j) INRANGE(j, Jretsb, Jretuh) +#define isxsel(o) INRANGE(o, Oxsel, Oxsel1) enum { Kx = -1, /* "top" class (see usecheck() and clsmerge()) */ @@ -482,7 +486,7 @@ void *vnew(ulong, size_t, Pool); void vfree(void *); void vgrow(void *, ulong); void addins(Ins **, uint *, Ins *); -void addbins(Blk *, Ins **, uint *); +void addbins(Ins **, uint *, Blk *); void strf(char[NString], char *, ...); uint32_t intern(char *); char *str(uint32_t); @@ -555,6 +559,7 @@ void fillloop(Fn *); void simpljmp(Fn *); int reaches(Fn *, Blk *, Blk *); int reachesnotvia(Fn *, Blk *, Blk *, Blk *); +int ifgraph(Blk *, Blk **, Blk **, Blk **); /* mem.c */ void promote(Fn *); @@ -595,6 +600,9 @@ void gvn(Fn *); int pinned(Ins *); void gcm(Fn *); +/* ifopt.c */ +void ifconvert(Fn *fn); + /* simpl.c */ void simpl(Fn *); diff --git a/amd64/emit.c b/amd64/emit.c index 6cf37ec..7290a80 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -576,6 +576,43 @@ emitins(Ins i, E *e) case Odbgloc: emitdbgloc(i.arg[0].val, i.arg[1].val, e->f); break; + case Oxselieq: + case Oxseline: + case Oxselisge: + case Oxselisgt: + case Oxselisle: + case Oxselislt: + case Oxseliuge: + case Oxseliugt: + case Oxseliule: + case Oxseliult: + case Oxselfeq: + case Oxselfge: + case Oxselfgt: + case Oxselfle: + case Oxselflt: + case Oxselfne: + case Oxselfo: + case Oxselfuo: + { + // TODO - how to do this "properly"? + static char *F0[] = { + "z", "nz", "ge", "g", "le", "l", "ae", "a", "be", "b", + "nz", "ae", "a", "be", "b", "nz", "p", "np" + }; + static char *F1[] = { + "nz", "z", "l", "le", "g", "ge", "b", "be", "a", "ae", + "z", "b", "be", "a", "ae", "z", "p", "np" + }; + char ins[16]; + sprintf(ins, "cmov%s %%1, %%=", F1[i.op-Oxselieq]); + if (req(i.to, i.arg[1])) + sprintf(ins, "cmov%s %%0, %%=", F0[i.op-Oxselieq]); + else if (!req(i.to, i.arg[0])) + emitf("mov %0, %=", &i, e); + emitf(ins, &i, e); + break; + } } } diff --git a/amd64/isel.c b/amd64/isel.c index 4aff0d6..c54b580 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -163,6 +163,10 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn) m->base = r0; } } + else if (isxsel(op) && rtype(*r) == RCon) { + r1 = newtmp("isel", i->cls, fn); + emit(Ocopy, i->cls, r1, *r, R); + } *r = r1; } @@ -425,6 +429,24 @@ sel(Ins i, Num *tn, Fn *fn) case Oexts: case Otruncd: case Ocast: + case Oxselieq: + case Oxseline: + case Oxselisge: + case Oxselisgt: + case Oxselisle: + case Oxselislt: + case Oxseliuge: + case Oxseliugt: + case Oxseliule: + case Oxseliult: + case Oxselfeq: + case Oxselfge: + case Oxselfgt: + case Oxselfle: + case Oxselflt: + case Oxselfne: + case Oxselfo: + case Oxselfuo: case_OExt: Emit: emiti(i); @@ -493,6 +515,80 @@ flagi(Ins *i0, Ins *i) return 0; } +static Ins* +selsel(Fn *fn, Blk *b, Ins *i, Num *tn) +{ + Ref r, cr0, cr1; + int c, k, swap, gencmp, gencpy; + Ins *isel0, *isel1, *fi; + Tmp *t; + + assert(i->op == Osel1); + for (isel0 = i; b->ins < isel0; isel0--) { + if (isel0->op == Osel0) + break; + assert(isel0->op == Osel1); + } + assert(isel0->op == Osel0); + r = isel0->arg[0]; + assert(rtype(r) == RTmp); + t = &fn->tmp[r.val]; + fi = flagi(b->ins, isel0); + cr0 = cr1 = R; + gencmp = gencpy = swap = 0; + k = Kw; + c = Cine; + if (!fi || !req(fi->to, r)) { + gencmp = 1; + cr0 = r; + cr1 = CON_Z; + } else if (iscmp(fi->op, &k, &c) + && c != NCmpI+Cfeq /* see sel() */ + && c != NCmpI+Cfne) { + swap = cmpswap(fi->arg, c); + if (swap) + c = cmpop(c); + if (t->nuse == 1) { + gencmp = 1; + cr0 = fi->arg[0]; + cr1 = fi->arg[1]; + *fi = (Ins){.op = Onop}; + } + } else if (fi->op == Oand && t->nuse == 1 + && (rtype(fi->arg[0]) == RTmp || + rtype(fi->arg[1]) == RTmp)) { + fi->op = Oxtest; + fi->to = R; + if (rtype(fi->arg[1]) == RCon) { + r = fi->arg[1]; + fi->arg[1] = fi->arg[0]; + fi->arg[0] = r; + } + } else { + /* since flags are not tracked in liveness, + * the result of the flag-setting instruction + * has to be marked as live + */ + if (t->nuse == 1) + gencpy = 1; + } + /* generate conditional moves */ + for (isel1 = i; isel0 < isel1; --isel1) { + isel1->op = Oxselieq+c; + sel(*isel1, tn, fn); + } + if (gencmp) { + assert(!gencpy); + selcmp((Ref[2]){cr0, cr1}, k, swap, fn); + } + if (gencpy) { + assert(!gencmp); + emit(Ocopy, Kw, R, r, R); + } + *isel0 = (Ins){.op = Onop}; + return isel0; +} + static void seljmp(Blk *b, Fn *fn) { @@ -826,8 +922,14 @@ amd64_isel(Fn *fn) memset(num, 0, n * sizeof num[0]); anumber(num, b, fn->con); seljmp(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) - sel(*--i, num, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) { + --i; + assert(i->op != Osel0); + if (i->op == Osel1) + i = selsel(fn, b, i, num); + else + sel(*i, num, fn); + } idup(b, curi, &insb[NIns]-curi); } free(num); diff --git a/amd64/targ.c b/amd64/targ.c index fba9144..a7e4552 100644 --- a/amd64/targ.c +++ b/amd64/targ.c @@ -28,6 +28,7 @@ amd64_memargs(int op) .abi1 = amd64_sysv_abi, \ .isel = amd64_isel, \ .emitfn = amd64_emitfn, \ + .cansel = 1, \ Target T_amd64_sysv = { .name = "amd64_sysv", diff --git a/arm64/targ.c b/arm64/targ.c index 4c2643a..8f1e149 100644 --- a/arm64/targ.c +++ b/arm64/targ.c @@ -40,6 +40,7 @@ arm64_memargs(int op) .isel = arm64_isel, \ .abi1 = arm64_abi, \ .emitfn = arm64_emitfn, \ + .cansel = 0, \ Target T_arm64 = { .name = "arm64", @@ -396,3 +396,48 @@ reachesnotvia(Fn *fn, Blk *b, Blk *to, Blk *excl) excl->visit = 1; return reaches(fn, b, to); } + +int +ifgraph(Blk *ifb, Blk **pthenb, Blk **pelseb, Blk **pjoinb) +{ + Blk *s1, *s2, **t; + + if (ifb->jmp.type != Jjnz) + return 0; + + s1 = ifb->s1; + s2 = ifb->s2; + if (s1->id > s2->id) { + s1 = ifb->s2; + s2 = ifb->s1; + t = pthenb; + pthenb = pelseb; + pelseb = t; + } + if (s1 == s2) + return 0; + + if (s1->jmp.type != Jjmp || s1->npred != 1) + return 0; + + if (s1->s1 == s2) { + /* if-then / if-else */ + if (s2->npred != 2) + return 0; + *pthenb = s1; + *pelseb = ifb; + *pjoinb = s2; + return 1; + } + + if (s2->jmp.type != Jjmp || s2->npred != 1) + return 0; + if (s1->s1 != s2->s1 || s1->s1->npred != 2) + return 0; + + assert(s1->s1 != ifb); + *pthenb = s1; + *pelseb = s2; + *pjoinb = s1->s1; + return 1; +} @@ -247,6 +247,10 @@ dedupins(Fn *fn, Blk *b, Ins *i) if (i->op == Onop || pinned(i)) return; + /* when sel instructions are inserted + * before gvn, we may want to optimize + * them here */ + assert(i->op != Osel0); assert(!req(i->to, R)); assoccon(fn, b, i); @@ -0,0 +1,121 @@ +#include "all.h" + +enum { + MaxIns = 2, + MaxPhis = 2, +}; + +static int +okbranch(Blk *b) +{ + Ins *i; + int n; + + n = 0; + for (i=b->ins; i<&b->ins[b->nins]; i++) + if (i->op != Odbgloc) { + if (pinned(i)) + return 0; + if (i->op != Onop) + n++; + } + return n <= MaxIns; +} + +static int +okjoin(Blk *b) +{ + Phi *p; + int n; + + n = 0; + for (p=b->phi; p; p=p->link) { + if (KBASE(p->cls) != 0) + return 0; + n++; + } + return n <= MaxPhis; +} + +static int +okgraph(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb) +{ + if (joinb->npred != 2 || !okjoin(joinb)) + return 0; + assert(thenb != elseb); + if (thenb != ifb && !okbranch(thenb)) + return 0; + if (elseb != ifb && !okbranch(elseb)) + return 0; + return 1; +} + +static void +convert(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb) +{ + Ins *ins, sel; + Phi *p; + uint nins; + + ins = vnew(0, sizeof ins[0], PHeap); + nins = 0; + addbins(&ins, &nins, ifb); + if (thenb != ifb) + addbins(&ins, &nins, thenb); + if (elseb != ifb) + addbins(&ins, &nins, elseb); + assert(joinb->npred == 2); + if (joinb->phi) { + sel = (Ins){ + .op = Osel0, .cls = Kw, + .arg = {ifb->jmp.arg}, + }; + addins(&ins, &nins, &sel); + } + sel = (Ins){.op = Osel1}; + for (p=joinb->phi; p; p=p->link) { + sel.to = p->to; + sel.cls = p->cls; + sel.arg[0] = phiarg(p, thenb); + sel.arg[1] = phiarg(p, elseb); + addins(&ins, &nins, &sel); + } + idup(ifb, ins, nins); + ifb->jmp.type = Jjmp; + ifb->jmp.arg = R; + ifb->s1 = joinb; + ifb->s2 = 0; + joinb->npred = 1; + joinb->pred[0] = ifb; + joinb->phi = 0; + vfree(ins); +} + +/* eliminate if-then[-else] graphlets + * using sel instructions + * needs rpo pred use; breaks cfg use + */ +void +ifconvert(Fn *fn) +{ + Blk *ifb, *thenb, *elseb, *joinb; + + if (debug['K']) + fputs("\n> If-conversion:\n", stderr); + + for (ifb=fn->start; ifb; ifb=ifb->link) + if (ifgraph(ifb, &thenb, &elseb, &joinb)) + if (okgraph(ifb, thenb, elseb, joinb)) { + if (debug['K']) + fprintf(stderr, + " @%s -> @%s, @%s -> @%s\n", + ifb->name, thenb->name, elseb->name, + joinb->name); + convert(ifb, thenb, elseb, joinb); + } + + if (debug['K']) { + fprintf(stderr, "\n> After if-conversion:\n"); + printfn(fn, stderr); + } +} @@ -11,6 +11,7 @@ char debug['Z'+1] = { ['N'] = 0, /* ssa construction */ ['C'] = 0, /* copy elimination */ ['F'] = 0, /* constant folding */ + ['K'] = 0, /* if-conversion */ ['A'] = 0, /* abi lowering */ ['I'] = 0, /* instruction selection */ ['L'] = 0, /* liveness */ @@ -81,6 +82,13 @@ func(Fn *fn) gcm(fn); filluse(fn); ssacheck(fn); + if (T.cansel) { + ifconvert(fn); + fillcfg(fn); + filluse(fn); + filldom(fn); + ssacheck(fn); + } T.abi1(fn); simpl(fn); fillcfg(fn); @@ -145,6 +145,8 @@ O(nop, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) O(addr, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) O(blit0, T(m,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0) O(blit1, T(w,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0) +O(sel0, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0) +O(sel1, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0) O(swap, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0) O(sign, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) O(salloc, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) @@ -196,6 +198,26 @@ O(flagfne, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) O(flagfo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) O(flagfuo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) +/* Backend Flag Select (Condition Move) */ +O(xselieq, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseline, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselisge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselisgt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselisle, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselislt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseliuge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseliugt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseliule, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseliult, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfeq, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfge, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfgt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfle, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselflt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfne, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfuo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) + #undef T #undef X #undef V diff --git a/rv64/targ.c b/rv64/targ.c index c0e5e18..fc6632c 100644 --- a/rv64/targ.c +++ b/rv64/targ.c @@ -50,6 +50,7 @@ Target T_rv64 = { .emitfn = rv64_emitfn, .emitfin = elf_emitfin, .asloc = ".L", + .cansel = 0, }; MAKESURE(rsave_size_ok, sizeof rv64_rsave == (NGPS+NFPS+1) * sizeof(int)); diff --git a/test/ifc.ssa b/test/ifc.ssa new file mode 100644 index 0000000..29f4457 --- /dev/null +++ b/test/ifc.ssa @@ -0,0 +1,238 @@ +export +function l $ifc1(l %v0, l %v1, w %c) { +@start + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifc2(l %v0, l %v1, w %p) { +@start + %c =w cnew %p, 42 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifc3(l %v0, l %v1, w %p) { +@start + %c =w cugtw %p, 42 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifclts(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w clts %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcles(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cles %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcgts(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cgts %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcges(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cges %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifceqs(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w ceqs %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcnes(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cnes %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcos(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cos %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcuos(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cuos %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +# >>> driver +# extern long ifc1(long, long, int); +# extern long ifc2(long, long, int); +# extern long ifc3(long, long, int); +# extern long ifclts(float, float, long, long); +# extern long ifcles(float, float, long, long); +# extern long ifcgts(float, float, long, long); +# extern long ifcges(float, float, long, long); +# extern long ifceqs(float, float, long, long); +# extern long ifcnes(float, float, long, long); +# extern long ifcos(float, float, long, long); +# extern long ifcuos(float, float, long, long); +# int main() { +# return +# ifc1(7, 5, 0) != 7 +# || ifc1(7, 5, 1) != 5 +# || ifc1(7, 5, 33) != 5 +# || ifc2(7, 5, 42) != 7 +# || ifc2(7, 5, 41) != 5 +# || ifc2(7, 5, 43) != 5 +# || ifc3(7, 5, 42) != 7 +# || ifc3(7, 5, 41) != 7 +# || ifc3(7, 5, 43) != 5 +# || ifclts(5.0f, 6.0f, 7, 5) != 5 +# || ifclts(5.0f, 5.0f, 7, 5) != 7 +# || ifclts(5.0f, 4.0f, 7, 5) != 7 +# || ifclts(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifclts(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifclts(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcles(5.0f, 6.0f, 7, 5) != 5 +# || ifcles(5.0f, 5.0f, 7, 5) != 5 +# || ifcles(5.0f, 4.0f, 7, 5) != 7 +# || ifcles(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcles(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifcles(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcgts(5.0f, 6.0f, 7, 5) != 7 +# || ifcgts(5.0f, 5.0f, 7, 5) != 7 +# || ifcgts(5.0f, 4.0f, 7, 5) != 5 +# || ifcgts(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcgts(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifcgts(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcges(5.0f, 6.0f, 7, 5) != 7 +# || ifcges(5.0f, 5.0f, 7, 5) != 5 +# || ifcges(5.0f, 4.0f, 7, 5) != 5 +# || ifcges(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcges(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifcges(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifceqs(5.0f, 6.0f, 7, 5) != 7 +# || ifceqs(5.0f, 5.0f, 7, 5) != 5 +# || ifceqs(5.0f, 4.0f, 7, 5) != 7 +# || ifceqs(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifceqs(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifceqs(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcnes(5.0f, 6.0f, 7, 5) != 5 +# || ifcnes(5.0f, 5.0f, 7, 5) != 7 +# || ifcnes(5.0f, 4.0f, 7, 5) != 5 +# || ifcnes(5.0f, 0.0f/0.0f, 7, 5) != 5 +# || ifcnes(0.0f/0.0f, 5.0f, 7, 5) != 5 +# || ifcnes(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 5 +# || ifcos(5.0f, 6.0f, 7, 5) != 5 +# || ifcos(5.0f, 5.0f, 7, 5) != 5 +# || ifcos(5.0f, 4.0f, 7, 5) != 5 +# || ifcos(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcos(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifcos(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcuos(5.0f, 6.0f, 7, 5) != 7 +# || ifcuos(5.0f, 5.0f, 7, 5) != 7 +# || ifcuos(5.0f, 4.0f, 7, 5) != 7 +# || ifcuos(5.0f, 0.0f/0.0f, 7, 5) != 5 +# || ifcuos(0.0f/0.0f, 5.0f, 7, 5) != 5 +# || ifcuos(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 5 +# ; +# } +# <<< @@ -164,7 +164,7 @@ addins(Ins **pvins, uint *pnins, Ins *i) } void -addbins(Blk *b, Ins **pvins, uint *pnins) +addbins(Ins **pvins, uint *pnins, Blk *b) { Ins *i; @@ -281,6 +281,17 @@ igroup(Blk *b, Ins *i, Ins **i0, Ins **i1) assert(i < ie); *i1 = i + 1; return; + case Osel1: + for (; i>ib && (i-1)->op == Osel1; i--) + ; + assert(i->op == Osel0); + /* fall through */ + case Osel0: + *i0 = i++; + for (; i<ie && i->op == Osel1; i++) + ; + *i1 = i; + return; default: if (ispar(i->op)) goto case_Opar; |
