diff options
| -rw-r--r-- | Makefile | 3 | ||||
| -rw-r--r-- | all.h | 10 | ||||
| -rw-r--r-- | amd64/emit.c | 37 | ||||
| -rw-r--r-- | amd64/isel.c | 106 | ||||
| -rw-r--r-- | amd64/targ.c | 1 | ||||
| -rw-r--r-- | arm64/targ.c | 1 | ||||
| -rw-r--r-- | cfg.c | 45 | ||||
| -rw-r--r-- | gvn.c | 4 | ||||
| -rw-r--r-- | ifopt.c | 121 | ||||
| -rw-r--r-- | main.c | 8 | ||||
| -rw-r--r-- | ops.h | 22 | ||||
| -rw-r--r-- | rv64/targ.c | 1 | ||||
| -rw-r--r-- | test/ifc.ssa | 238 | ||||
| -rw-r--r-- | util.c | 13 |
14 files changed, 605 insertions, 5 deletions
@@ -5,7 +5,8 @@ PREFIX = /usr/local BINDIR = $(PREFIX)/bin COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \ - copy.o fold.o gvn.o gcm.o simpl.o live.o spill.o rega.o emit.o + copy.o fold.o gvn.o gcm.o simpl.o ifopt.o live.o spill.o rega.o \ + emit.o AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o @@ -62,6 +62,7 @@ struct Target { void (*emitfin)(FILE *); char asloc[4]; char assym[4]; + uint cansel:1; }; #define BIT(n) ((bits)1 << (n)) @@ -183,6 +184,8 @@ enum { Oalloc1 = Oalloc16, Oflag = Oflagieq, Oflag1 = Oflagfuo, + Oxsel = Oxselieq, + Oxsel1 = Oxselfuo, NPubOp = Onop, Jjf = Jjfieq, Jjf1 = Jjffuo, @@ -199,6 +202,7 @@ enum { #define isparbh(o) INRANGE(o, Oparsb, Oparuh) #define isargbh(o) INRANGE(o, Oargsb, Oarguh) #define isretbh(j) INRANGE(j, Jretsb, Jretuh) +#define isxsel(o) INRANGE(o, Oxsel, Oxsel1) enum { Kx = -1, /* "top" class (see usecheck() and clsmerge()) */ @@ -482,7 +486,7 @@ void *vnew(ulong, size_t, Pool); void vfree(void *); void vgrow(void *, ulong); void addins(Ins **, uint *, Ins *); -void addbins(Blk *, Ins **, uint *); +void addbins(Ins **, uint *, Blk *); void strf(char[NString], char *, ...); uint32_t intern(char *); char *str(uint32_t); @@ -555,6 +559,7 @@ void fillloop(Fn *); void simpljmp(Fn *); int reaches(Fn *, Blk *, Blk *); int reachesnotvia(Fn *, Blk *, Blk *, Blk *); +int ifgraph(Blk *, Blk **, Blk **, Blk **); /* mem.c */ void promote(Fn *); @@ -595,6 +600,9 @@ void gvn(Fn *); int pinned(Ins *); void gcm(Fn *); +/* ifopt.c */ +void ifconvert(Fn *fn); + /* simpl.c */ void simpl(Fn *); diff --git a/amd64/emit.c b/amd64/emit.c index 6cf37ec..7290a80 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -576,6 +576,43 @@ emitins(Ins i, E *e) case Odbgloc: emitdbgloc(i.arg[0].val, i.arg[1].val, e->f); break; + case Oxselieq: + case Oxseline: + case Oxselisge: + case Oxselisgt: + case Oxselisle: + case Oxselislt: + case Oxseliuge: + case Oxseliugt: + case Oxseliule: + case Oxseliult: + case Oxselfeq: + case Oxselfge: + case Oxselfgt: + case Oxselfle: + case Oxselflt: + case Oxselfne: + case Oxselfo: + case Oxselfuo: + { + // TODO - how to do this "properly"? + static char *F0[] = { + "z", "nz", "ge", "g", "le", "l", "ae", "a", "be", "b", + "nz", "ae", "a", "be", "b", "nz", "p", "np" + }; + static char *F1[] = { + "nz", "z", "l", "le", "g", "ge", "b", "be", "a", "ae", + "z", "b", "be", "a", "ae", "z", "p", "np" + }; + char ins[16]; + sprintf(ins, "cmov%s %%1, %%=", F1[i.op-Oxselieq]); + if (req(i.to, i.arg[1])) + sprintf(ins, "cmov%s %%0, %%=", F0[i.op-Oxselieq]); + else if (!req(i.to, i.arg[0])) + emitf("mov %0, %=", &i, e); + emitf(ins, &i, e); + break; + } } } diff --git a/amd64/isel.c b/amd64/isel.c index 4aff0d6..c54b580 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -163,6 +163,10 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn) m->base = r0; } } + else if (isxsel(op) && rtype(*r) == RCon) { + r1 = newtmp("isel", i->cls, fn); + emit(Ocopy, i->cls, r1, *r, R); + } *r = r1; } @@ -425,6 +429,24 @@ sel(Ins i, Num *tn, Fn *fn) case Oexts: case Otruncd: case Ocast: + case Oxselieq: + case Oxseline: + case Oxselisge: + case Oxselisgt: + case Oxselisle: + case Oxselislt: + case Oxseliuge: + case Oxseliugt: + case Oxseliule: + case Oxseliult: + case Oxselfeq: + case Oxselfge: + case Oxselfgt: + case Oxselfle: + case Oxselflt: + case Oxselfne: + case Oxselfo: + case Oxselfuo: case_OExt: Emit: emiti(i); @@ -493,6 +515,80 @@ flagi(Ins *i0, Ins *i) return 0; } +static Ins* +selsel(Fn *fn, Blk *b, Ins *i, Num *tn) +{ + Ref r, cr0, cr1; + int c, k, swap, gencmp, gencpy; + Ins *isel0, *isel1, *fi; + Tmp *t; + + assert(i->op == Osel1); + for (isel0 = i; b->ins < isel0; isel0--) { + if (isel0->op == Osel0) + break; + assert(isel0->op == Osel1); + } + assert(isel0->op == Osel0); + r = isel0->arg[0]; + assert(rtype(r) == RTmp); + t = &fn->tmp[r.val]; + fi = flagi(b->ins, isel0); + cr0 = cr1 = R; + gencmp = gencpy = swap = 0; + k = Kw; + c = Cine; + if (!fi || !req(fi->to, r)) { + gencmp = 1; + cr0 = r; + cr1 = CON_Z; + } else if (iscmp(fi->op, &k, &c) + && c != NCmpI+Cfeq /* see sel() */ + && c != NCmpI+Cfne) { + swap = cmpswap(fi->arg, c); + if (swap) + c = cmpop(c); + if (t->nuse == 1) { + gencmp = 1; + cr0 = fi->arg[0]; + cr1 = fi->arg[1]; + *fi = (Ins){.op = Onop}; + } + } else if (fi->op == Oand && t->nuse == 1 + && (rtype(fi->arg[0]) == RTmp || + rtype(fi->arg[1]) == RTmp)) { + fi->op = Oxtest; + fi->to = R; + if (rtype(fi->arg[1]) == RCon) { + r = fi->arg[1]; + fi->arg[1] = fi->arg[0]; + fi->arg[0] = r; + } + } else { + /* since flags are not tracked in liveness, + * the result of the flag-setting instruction + * has to be marked as live + */ + if (t->nuse == 1) + gencpy = 1; + } + /* generate conditional moves */ + for (isel1 = i; isel0 < isel1; --isel1) { + isel1->op = Oxselieq+c; + sel(*isel1, tn, fn); + } + if (gencmp) { + assert(!gencpy); + selcmp((Ref[2]){cr0, cr1}, k, swap, fn); + } + if (gencpy) { + assert(!gencmp); + emit(Ocopy, Kw, R, r, R); + } + *isel0 = (Ins){.op = Onop}; + return isel0; +} + static void seljmp(Blk *b, Fn *fn) { @@ -826,8 +922,14 @@ amd64_isel(Fn *fn) memset(num, 0, n * sizeof num[0]); anumber(num, b, fn->con); seljmp(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) - sel(*--i, num, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) { + --i; + assert(i->op != Osel0); + if (i->op == Osel1) + i = selsel(fn, b, i, num); + else + sel(*i, num, fn); + } idup(b, curi, &insb[NIns]-curi); } free(num); diff --git a/amd64/targ.c b/amd64/targ.c index fba9144..a7e4552 100644 --- a/amd64/targ.c +++ b/amd64/targ.c @@ -28,6 +28,7 @@ amd64_memargs(int op) .abi1 = amd64_sysv_abi, \ .isel = amd64_isel, \ .emitfn = amd64_emitfn, \ + .cansel = 1, \ Target T_amd64_sysv = { .name = "amd64_sysv", diff --git a/arm64/targ.c b/arm64/targ.c index 4c2643a..8f1e149 100644 --- a/arm64/targ.c +++ b/arm64/targ.c @@ -40,6 +40,7 @@ arm64_memargs(int op) .isel = arm64_isel, \ .abi1 = arm64_abi, \ .emitfn = arm64_emitfn, \ + .cansel = 0, \ Target T_arm64 = { .name = "arm64", @@ -396,3 +396,48 @@ reachesnotvia(Fn *fn, Blk *b, Blk *to, Blk *excl) excl->visit = 1; return reaches(fn, b, to); } + +int +ifgraph(Blk *ifb, Blk **pthenb, Blk **pelseb, Blk **pjoinb) +{ + Blk *s1, *s2, **t; + + if (ifb->jmp.type != Jjnz) + return 0; + + s1 = ifb->s1; + s2 = ifb->s2; + if (s1->id > s2->id) { + s1 = ifb->s2; + s2 = ifb->s1; + t = pthenb; + pthenb = pelseb; + pelseb = t; + } + if (s1 == s2) + return 0; + + if (s1->jmp.type != Jjmp || s1->npred != 1) + return 0; + + if (s1->s1 == s2) { + /* if-then / if-else */ + if (s2->npred != 2) + return 0; + *pthenb = s1; + *pelseb = ifb; + *pjoinb = s2; + return 1; + } + + if (s2->jmp.type != Jjmp || s2->npred != 1) + return 0; + if (s1->s1 != s2->s1 || s1->s1->npred != 2) + return 0; + + assert(s1->s1 != ifb); + *pthenb = s1; + *pelseb = s2; + *pjoinb = s1->s1; + return 1; +} @@ -247,6 +247,10 @@ dedupins(Fn *fn, Blk *b, Ins *i) if (i->op == Onop || pinned(i)) return; + /* when sel instructions are inserted + * before gvn, we may want to optimize + * them here */ + assert(i->op != Osel0); assert(!req(i->to, R)); assoccon(fn, b, i); @@ -0,0 +1,121 @@ +#include "all.h" + +enum { + MaxIns = 2, + MaxPhis = 2, +}; + +static int +okbranch(Blk *b) +{ + Ins *i; + int n; + + n = 0; + for (i=b->ins; i<&b->ins[b->nins]; i++) + if (i->op != Odbgloc) { + if (pinned(i)) + return 0; + if (i->op != Onop) + n++; + } + return n <= MaxIns; +} + +static int +okjoin(Blk *b) +{ + Phi *p; + int n; + + n = 0; + for (p=b->phi; p; p=p->link) { + if (KBASE(p->cls) != 0) + return 0; + n++; + } + return n <= MaxPhis; +} + +static int +okgraph(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb) +{ + if (joinb->npred != 2 || !okjoin(joinb)) + return 0; + assert(thenb != elseb); + if (thenb != ifb && !okbranch(thenb)) + return 0; + if (elseb != ifb && !okbranch(elseb)) + return 0; + return 1; +} + +static void +convert(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb) +{ + Ins *ins, sel; + Phi *p; + uint nins; + + ins = vnew(0, sizeof ins[0], PHeap); + nins = 0; + addbins(&ins, &nins, ifb); + if (thenb != ifb) + addbins(&ins, &nins, thenb); + if (elseb != ifb) + addbins(&ins, &nins, elseb); + assert(joinb->npred == 2); + if (joinb->phi) { + sel = (Ins){ + .op = Osel0, .cls = Kw, + .arg = {ifb->jmp.arg}, + }; + addins(&ins, &nins, &sel); + } + sel = (Ins){.op = Osel1}; + for (p=joinb->phi; p; p=p->link) { + sel.to = p->to; + sel.cls = p->cls; + sel.arg[0] = phiarg(p, thenb); + sel.arg[1] = phiarg(p, elseb); + addins(&ins, &nins, &sel); + } + idup(ifb, ins, nins); + ifb->jmp.type = Jjmp; + ifb->jmp.arg = R; + ifb->s1 = joinb; + ifb->s2 = 0; + joinb->npred = 1; + joinb->pred[0] = ifb; + joinb->phi = 0; + vfree(ins); +} + +/* eliminate if-then[-else] graphlets + * using sel instructions + * needs rpo pred use; breaks cfg use + */ +void +ifconvert(Fn *fn) +{ + Blk *ifb, *thenb, *elseb, *joinb; + + if (debug['K']) + fputs("\n> If-conversion:\n", stderr); + + for (ifb=fn->start; ifb; ifb=ifb->link) + if (ifgraph(ifb, &thenb, &elseb, &joinb)) + if (okgraph(ifb, thenb, elseb, joinb)) { + if (debug['K']) + fprintf(stderr, + " @%s -> @%s, @%s -> @%s\n", + ifb->name, thenb->name, elseb->name, + joinb->name); + convert(ifb, thenb, elseb, joinb); + } + + if (debug['K']) { + fprintf(stderr, "\n> After if-conversion:\n"); + printfn(fn, stderr); + } +} @@ -11,6 +11,7 @@ char debug['Z'+1] = { ['N'] = 0, /* ssa construction */ ['C'] = 0, /* copy elimination */ ['F'] = 0, /* constant folding */ + ['K'] = 0, /* if-conversion */ ['A'] = 0, /* abi lowering */ ['I'] = 0, /* instruction selection */ ['L'] = 0, /* liveness */ @@ -81,6 +82,13 @@ func(Fn *fn) gcm(fn); filluse(fn); ssacheck(fn); + if (T.cansel) { + ifconvert(fn); + fillcfg(fn); + filluse(fn); + filldom(fn); + ssacheck(fn); + } T.abi1(fn); simpl(fn); fillcfg(fn); @@ -145,6 +145,8 @@ O(nop, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) O(addr, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) O(blit0, T(m,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0) O(blit1, T(w,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0) +O(sel0, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0) +O(sel1, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0) O(swap, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0) O(sign, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) O(salloc, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) @@ -196,6 +198,26 @@ O(flagfne, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) O(flagfo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) O(flagfuo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0) +/* Backend Flag Select (Condition Move) */ +O(xselieq, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseline, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselisge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselisgt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselisle, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselislt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseliuge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseliugt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseliule, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xseliult, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfeq, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfge, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfgt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfle, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselflt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfne, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) +O(xselfuo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0) + #undef T #undef X #undef V diff --git a/rv64/targ.c b/rv64/targ.c index c0e5e18..fc6632c 100644 --- a/rv64/targ.c +++ b/rv64/targ.c @@ -50,6 +50,7 @@ Target T_rv64 = { .emitfn = rv64_emitfn, .emitfin = elf_emitfin, .asloc = ".L", + .cansel = 0, }; MAKESURE(rsave_size_ok, sizeof rv64_rsave == (NGPS+NFPS+1) * sizeof(int)); diff --git a/test/ifc.ssa b/test/ifc.ssa new file mode 100644 index 0000000..29f4457 --- /dev/null +++ b/test/ifc.ssa @@ -0,0 +1,238 @@ +export +function l $ifc1(l %v0, l %v1, w %c) { +@start + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifc2(l %v0, l %v1, w %p) { +@start + %c =w cnew %p, 42 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifc3(l %v0, l %v1, w %p) { +@start + %c =w cugtw %p, 42 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifclts(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w clts %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcles(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cles %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcgts(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cgts %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcges(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cges %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifceqs(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w ceqs %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcnes(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cnes %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcos(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cos %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +export +function l $ifcuos(s %s0, s %s1, l %v0, l %v1) { +@start + %c =w cuos %s0, %s1 + jnz %c, @true, @false +@true + %v =l copy %v1 + jmp @end +@false + %v =l copy %v0 + jmp @end +@end + ret %v +} + +# >>> driver +# extern long ifc1(long, long, int); +# extern long ifc2(long, long, int); +# extern long ifc3(long, long, int); +# extern long ifclts(float, float, long, long); +# extern long ifcles(float, float, long, long); +# extern long ifcgts(float, float, long, long); +# extern long ifcges(float, float, long, long); +# extern long ifceqs(float, float, long, long); +# extern long ifcnes(float, float, long, long); +# extern long ifcos(float, float, long, long); +# extern long ifcuos(float, float, long, long); +# int main() { +# return +# ifc1(7, 5, 0) != 7 +# || ifc1(7, 5, 1) != 5 +# || ifc1(7, 5, 33) != 5 +# || ifc2(7, 5, 42) != 7 +# || ifc2(7, 5, 41) != 5 +# || ifc2(7, 5, 43) != 5 +# || ifc3(7, 5, 42) != 7 +# || ifc3(7, 5, 41) != 7 +# || ifc3(7, 5, 43) != 5 +# || ifclts(5.0f, 6.0f, 7, 5) != 5 +# || ifclts(5.0f, 5.0f, 7, 5) != 7 +# || ifclts(5.0f, 4.0f, 7, 5) != 7 +# || ifclts(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifclts(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifclts(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcles(5.0f, 6.0f, 7, 5) != 5 +# || ifcles(5.0f, 5.0f, 7, 5) != 5 +# || ifcles(5.0f, 4.0f, 7, 5) != 7 +# || ifcles(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcles(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifcles(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcgts(5.0f, 6.0f, 7, 5) != 7 +# || ifcgts(5.0f, 5.0f, 7, 5) != 7 +# || ifcgts(5.0f, 4.0f, 7, 5) != 5 +# || ifcgts(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcgts(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifcgts(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcges(5.0f, 6.0f, 7, 5) != 7 +# || ifcges(5.0f, 5.0f, 7, 5) != 5 +# || ifcges(5.0f, 4.0f, 7, 5) != 5 +# || ifcges(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcges(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifcges(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifceqs(5.0f, 6.0f, 7, 5) != 7 +# || ifceqs(5.0f, 5.0f, 7, 5) != 5 +# || ifceqs(5.0f, 4.0f, 7, 5) != 7 +# || ifceqs(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifceqs(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifceqs(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcnes(5.0f, 6.0f, 7, 5) != 5 +# || ifcnes(5.0f, 5.0f, 7, 5) != 7 +# || ifcnes(5.0f, 4.0f, 7, 5) != 5 +# || ifcnes(5.0f, 0.0f/0.0f, 7, 5) != 5 +# || ifcnes(0.0f/0.0f, 5.0f, 7, 5) != 5 +# || ifcnes(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 5 +# || ifcos(5.0f, 6.0f, 7, 5) != 5 +# || ifcos(5.0f, 5.0f, 7, 5) != 5 +# || ifcos(5.0f, 4.0f, 7, 5) != 5 +# || ifcos(5.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcos(0.0f/0.0f, 5.0f, 7, 5) != 7 +# || ifcos(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7 +# || ifcuos(5.0f, 6.0f, 7, 5) != 7 +# || ifcuos(5.0f, 5.0f, 7, 5) != 7 +# || ifcuos(5.0f, 4.0f, 7, 5) != 7 +# || ifcuos(5.0f, 0.0f/0.0f, 7, 5) != 5 +# || ifcuos(0.0f/0.0f, 5.0f, 7, 5) != 5 +# || ifcuos(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 5 +# ; +# } +# <<< @@ -164,7 +164,7 @@ addins(Ins **pvins, uint *pnins, Ins *i) } void -addbins(Blk *b, Ins **pvins, uint *pnins) +addbins(Ins **pvins, uint *pnins, Blk *b) { Ins *i; @@ -281,6 +281,17 @@ igroup(Blk *b, Ins *i, Ins **i0, Ins **i1) assert(i < ie); *i1 = i + 1; return; + case Osel1: + for (; i>ib && (i-1)->op == Osel1; i--) + ; + assert(i->op == Osel0); + /* fall through */ + case Osel0: + *i0 = i++; + for (; i<ie && i->op == Osel1; i++) + ; + *i1 = i; + return; default: if (ispar(i->op)) goto case_Opar; |
