aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile3
-rw-r--r--all.h10
-rw-r--r--amd64/emit.c37
-rw-r--r--amd64/isel.c106
-rw-r--r--amd64/targ.c1
-rw-r--r--arm64/targ.c1
-rw-r--r--cfg.c45
-rw-r--r--gvn.c4
-rw-r--r--ifopt.c121
-rw-r--r--main.c8
-rw-r--r--ops.h22
-rw-r--r--rv64/targ.c1
-rw-r--r--test/ifc.ssa238
-rw-r--r--util.c13
14 files changed, 605 insertions, 5 deletions
diff --git a/Makefile b/Makefile
index 7acaf35..79d9a99 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,8 @@ PREFIX = /usr/local
BINDIR = $(PREFIX)/bin
COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \
- copy.o fold.o gvn.o gcm.o simpl.o live.o spill.o rega.o emit.o
+ copy.o fold.o gvn.o gcm.o simpl.o ifopt.o live.o spill.o rega.o \
+ emit.o
AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o
ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o
RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o
diff --git a/all.h b/all.h
index df72617..64d5ebe 100644
--- a/all.h
+++ b/all.h
@@ -62,6 +62,7 @@ struct Target {
void (*emitfin)(FILE *);
char asloc[4];
char assym[4];
+ uint cansel:1;
};
#define BIT(n) ((bits)1 << (n))
@@ -183,6 +184,8 @@ enum {
Oalloc1 = Oalloc16,
Oflag = Oflagieq,
Oflag1 = Oflagfuo,
+ Oxsel = Oxselieq,
+ Oxsel1 = Oxselfuo,
NPubOp = Onop,
Jjf = Jjfieq,
Jjf1 = Jjffuo,
@@ -199,6 +202,7 @@ enum {
#define isparbh(o) INRANGE(o, Oparsb, Oparuh)
#define isargbh(o) INRANGE(o, Oargsb, Oarguh)
#define isretbh(j) INRANGE(j, Jretsb, Jretuh)
+#define isxsel(o) INRANGE(o, Oxsel, Oxsel1)
enum {
Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
@@ -482,7 +486,7 @@ void *vnew(ulong, size_t, Pool);
void vfree(void *);
void vgrow(void *, ulong);
void addins(Ins **, uint *, Ins *);
-void addbins(Blk *, Ins **, uint *);
+void addbins(Ins **, uint *, Blk *);
void strf(char[NString], char *, ...);
uint32_t intern(char *);
char *str(uint32_t);
@@ -555,6 +559,7 @@ void fillloop(Fn *);
void simpljmp(Fn *);
int reaches(Fn *, Blk *, Blk *);
int reachesnotvia(Fn *, Blk *, Blk *, Blk *);
+int ifgraph(Blk *, Blk **, Blk **, Blk **);
/* mem.c */
void promote(Fn *);
@@ -595,6 +600,9 @@ void gvn(Fn *);
int pinned(Ins *);
void gcm(Fn *);
+/* ifopt.c */
+void ifconvert(Fn *fn);
+
/* simpl.c */
void simpl(Fn *);
diff --git a/amd64/emit.c b/amd64/emit.c
index 6cf37ec..7290a80 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -576,6 +576,43 @@ emitins(Ins i, E *e)
case Odbgloc:
emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
break;
+ case Oxselieq:
+ case Oxseline:
+ case Oxselisge:
+ case Oxselisgt:
+ case Oxselisle:
+ case Oxselislt:
+ case Oxseliuge:
+ case Oxseliugt:
+ case Oxseliule:
+ case Oxseliult:
+ case Oxselfeq:
+ case Oxselfge:
+ case Oxselfgt:
+ case Oxselfle:
+ case Oxselflt:
+ case Oxselfne:
+ case Oxselfo:
+ case Oxselfuo:
+ {
+ // TODO - how to do this "properly"?
+ static char *F0[] = {
+ "z", "nz", "ge", "g", "le", "l", "ae", "a", "be", "b",
+ "nz", "ae", "a", "be", "b", "nz", "p", "np"
+ };
+ static char *F1[] = {
+ "nz", "z", "l", "le", "g", "ge", "b", "be", "a", "ae",
+ "z", "b", "be", "a", "ae", "z", "p", "np"
+ };
+ char ins[16];
+ sprintf(ins, "cmov%s %%1, %%=", F1[i.op-Oxselieq]);
+ if (req(i.to, i.arg[1]))
+ sprintf(ins, "cmov%s %%0, %%=", F0[i.op-Oxselieq]);
+ else if (!req(i.to, i.arg[0]))
+ emitf("mov %0, %=", &i, e);
+ emitf(ins, &i, e);
+ break;
+ }
}
}
diff --git a/amd64/isel.c b/amd64/isel.c
index 4aff0d6..c54b580 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -163,6 +163,10 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
m->base = r0;
}
}
+ else if (isxsel(op) && rtype(*r) == RCon) {
+ r1 = newtmp("isel", i->cls, fn);
+ emit(Ocopy, i->cls, r1, *r, R);
+ }
*r = r1;
}
@@ -425,6 +429,24 @@ sel(Ins i, Num *tn, Fn *fn)
case Oexts:
case Otruncd:
case Ocast:
+ case Oxselieq:
+ case Oxseline:
+ case Oxselisge:
+ case Oxselisgt:
+ case Oxselisle:
+ case Oxselislt:
+ case Oxseliuge:
+ case Oxseliugt:
+ case Oxseliule:
+ case Oxseliult:
+ case Oxselfeq:
+ case Oxselfge:
+ case Oxselfgt:
+ case Oxselfle:
+ case Oxselflt:
+ case Oxselfne:
+ case Oxselfo:
+ case Oxselfuo:
case_OExt:
Emit:
emiti(i);
@@ -493,6 +515,80 @@ flagi(Ins *i0, Ins *i)
return 0;
}
+static Ins*
+selsel(Fn *fn, Blk *b, Ins *i, Num *tn)
+{
+ Ref r, cr0, cr1;
+ int c, k, swap, gencmp, gencpy;
+ Ins *isel0, *isel1, *fi;
+ Tmp *t;
+
+ assert(i->op == Osel1);
+ for (isel0 = i; b->ins < isel0; isel0--) {
+ if (isel0->op == Osel0)
+ break;
+ assert(isel0->op == Osel1);
+ }
+ assert(isel0->op == Osel0);
+ r = isel0->arg[0];
+ assert(rtype(r) == RTmp);
+ t = &fn->tmp[r.val];
+ fi = flagi(b->ins, isel0);
+ cr0 = cr1 = R;
+ gencmp = gencpy = swap = 0;
+ k = Kw;
+ c = Cine;
+ if (!fi || !req(fi->to, r)) {
+ gencmp = 1;
+ cr0 = r;
+ cr1 = CON_Z;
+ } else if (iscmp(fi->op, &k, &c)
+ && c != NCmpI+Cfeq /* see sel() */
+ && c != NCmpI+Cfne) {
+ swap = cmpswap(fi->arg, c);
+ if (swap)
+ c = cmpop(c);
+ if (t->nuse == 1) {
+ gencmp = 1;
+ cr0 = fi->arg[0];
+ cr1 = fi->arg[1];
+ *fi = (Ins){.op = Onop};
+ }
+ } else if (fi->op == Oand && t->nuse == 1
+ && (rtype(fi->arg[0]) == RTmp ||
+ rtype(fi->arg[1]) == RTmp)) {
+ fi->op = Oxtest;
+ fi->to = R;
+ if (rtype(fi->arg[1]) == RCon) {
+ r = fi->arg[1];
+ fi->arg[1] = fi->arg[0];
+ fi->arg[0] = r;
+ }
+ } else {
+ /* since flags are not tracked in liveness,
+ * the result of the flag-setting instruction
+ * has to be marked as live
+ */
+ if (t->nuse == 1)
+ gencpy = 1;
+ }
+ /* generate conditional moves */
+ for (isel1 = i; isel0 < isel1; --isel1) {
+ isel1->op = Oxselieq+c;
+ sel(*isel1, tn, fn);
+ }
+ if (gencmp) {
+ assert(!gencpy);
+ selcmp((Ref[2]){cr0, cr1}, k, swap, fn);
+ }
+ if (gencpy) {
+ assert(!gencmp);
+ emit(Ocopy, Kw, R, r, R);
+ }
+ *isel0 = (Ins){.op = Onop};
+ return isel0;
+}
+
static void
seljmp(Blk *b, Fn *fn)
{
@@ -826,8 +922,14 @@ amd64_isel(Fn *fn)
memset(num, 0, n * sizeof num[0]);
anumber(num, b, fn->con);
seljmp(b, fn);
- for (i=&b->ins[b->nins]; i!=b->ins;)
- sel(*--i, num, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;) {
+ --i;
+ assert(i->op != Osel0);
+ if (i->op == Osel1)
+ i = selsel(fn, b, i, num);
+ else
+ sel(*i, num, fn);
+ }
idup(b, curi, &insb[NIns]-curi);
}
free(num);
diff --git a/amd64/targ.c b/amd64/targ.c
index fba9144..a7e4552 100644
--- a/amd64/targ.c
+++ b/amd64/targ.c
@@ -28,6 +28,7 @@ amd64_memargs(int op)
.abi1 = amd64_sysv_abi, \
.isel = amd64_isel, \
.emitfn = amd64_emitfn, \
+ .cansel = 1, \
Target T_amd64_sysv = {
.name = "amd64_sysv",
diff --git a/arm64/targ.c b/arm64/targ.c
index 4c2643a..8f1e149 100644
--- a/arm64/targ.c
+++ b/arm64/targ.c
@@ -40,6 +40,7 @@ arm64_memargs(int op)
.isel = arm64_isel, \
.abi1 = arm64_abi, \
.emitfn = arm64_emitfn, \
+ .cansel = 0, \
Target T_arm64 = {
.name = "arm64",
diff --git a/cfg.c b/cfg.c
index 8047c12..8d31f18 100644
--- a/cfg.c
+++ b/cfg.c
@@ -396,3 +396,48 @@ reachesnotvia(Fn *fn, Blk *b, Blk *to, Blk *excl)
excl->visit = 1;
return reaches(fn, b, to);
}
+
+int
+ifgraph(Blk *ifb, Blk **pthenb, Blk **pelseb, Blk **pjoinb)
+{
+ Blk *s1, *s2, **t;
+
+ if (ifb->jmp.type != Jjnz)
+ return 0;
+
+ s1 = ifb->s1;
+ s2 = ifb->s2;
+ if (s1->id > s2->id) {
+ s1 = ifb->s2;
+ s2 = ifb->s1;
+ t = pthenb;
+ pthenb = pelseb;
+ pelseb = t;
+ }
+ if (s1 == s2)
+ return 0;
+
+ if (s1->jmp.type != Jjmp || s1->npred != 1)
+ return 0;
+
+ if (s1->s1 == s2) {
+ /* if-then / if-else */
+ if (s2->npred != 2)
+ return 0;
+ *pthenb = s1;
+ *pelseb = ifb;
+ *pjoinb = s2;
+ return 1;
+ }
+
+ if (s2->jmp.type != Jjmp || s2->npred != 1)
+ return 0;
+ if (s1->s1 != s2->s1 || s1->s1->npred != 2)
+ return 0;
+
+ assert(s1->s1 != ifb);
+ *pthenb = s1;
+ *pelseb = s2;
+ *pjoinb = s1->s1;
+ return 1;
+}
diff --git a/gvn.c b/gvn.c
index 1db5bbc..92ee5eb 100644
--- a/gvn.c
+++ b/gvn.c
@@ -247,6 +247,10 @@ dedupins(Fn *fn, Blk *b, Ins *i)
if (i->op == Onop || pinned(i))
return;
+ /* when sel instructions are inserted
+ * before gvn, we may want to optimize
+ * them here */
+ assert(i->op != Osel0);
assert(!req(i->to, R));
assoccon(fn, b, i);
diff --git a/ifopt.c b/ifopt.c
new file mode 100644
index 0000000..3e45f52
--- /dev/null
+++ b/ifopt.c
@@ -0,0 +1,121 @@
+#include "all.h"
+
+enum {
+ MaxIns = 2,
+ MaxPhis = 2,
+};
+
+static int
+okbranch(Blk *b)
+{
+ Ins *i;
+ int n;
+
+ n = 0;
+ for (i=b->ins; i<&b->ins[b->nins]; i++)
+ if (i->op != Odbgloc) {
+ if (pinned(i))
+ return 0;
+ if (i->op != Onop)
+ n++;
+ }
+ return n <= MaxIns;
+}
+
+static int
+okjoin(Blk *b)
+{
+ Phi *p;
+ int n;
+
+ n = 0;
+ for (p=b->phi; p; p=p->link) {
+ if (KBASE(p->cls) != 0)
+ return 0;
+ n++;
+ }
+ return n <= MaxPhis;
+}
+
+static int
+okgraph(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb)
+{
+ if (joinb->npred != 2 || !okjoin(joinb))
+ return 0;
+ assert(thenb != elseb);
+ if (thenb != ifb && !okbranch(thenb))
+ return 0;
+ if (elseb != ifb && !okbranch(elseb))
+ return 0;
+ return 1;
+}
+
+static void
+convert(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb)
+{
+ Ins *ins, sel;
+ Phi *p;
+ uint nins;
+
+ ins = vnew(0, sizeof ins[0], PHeap);
+ nins = 0;
+ addbins(&ins, &nins, ifb);
+ if (thenb != ifb)
+ addbins(&ins, &nins, thenb);
+ if (elseb != ifb)
+ addbins(&ins, &nins, elseb);
+ assert(joinb->npred == 2);
+ if (joinb->phi) {
+ sel = (Ins){
+ .op = Osel0, .cls = Kw,
+ .arg = {ifb->jmp.arg},
+ };
+ addins(&ins, &nins, &sel);
+ }
+ sel = (Ins){.op = Osel1};
+ for (p=joinb->phi; p; p=p->link) {
+ sel.to = p->to;
+ sel.cls = p->cls;
+ sel.arg[0] = phiarg(p, thenb);
+ sel.arg[1] = phiarg(p, elseb);
+ addins(&ins, &nins, &sel);
+ }
+ idup(ifb, ins, nins);
+ ifb->jmp.type = Jjmp;
+ ifb->jmp.arg = R;
+ ifb->s1 = joinb;
+ ifb->s2 = 0;
+ joinb->npred = 1;
+ joinb->pred[0] = ifb;
+ joinb->phi = 0;
+ vfree(ins);
+}
+
+/* eliminate if-then[-else] graphlets
+ * using sel instructions
+ * needs rpo pred use; breaks cfg use
+ */
+void
+ifconvert(Fn *fn)
+{
+ Blk *ifb, *thenb, *elseb, *joinb;
+
+ if (debug['K'])
+ fputs("\n> If-conversion:\n", stderr);
+
+ for (ifb=fn->start; ifb; ifb=ifb->link)
+ if (ifgraph(ifb, &thenb, &elseb, &joinb))
+ if (okgraph(ifb, thenb, elseb, joinb)) {
+ if (debug['K'])
+ fprintf(stderr,
+ " @%s -> @%s, @%s -> @%s\n",
+ ifb->name, thenb->name, elseb->name,
+ joinb->name);
+ convert(ifb, thenb, elseb, joinb);
+ }
+
+ if (debug['K']) {
+ fprintf(stderr, "\n> After if-conversion:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/main.c b/main.c
index 7a97f8a..f7180ce 100644
--- a/main.c
+++ b/main.c
@@ -11,6 +11,7 @@ char debug['Z'+1] = {
['N'] = 0, /* ssa construction */
['C'] = 0, /* copy elimination */
['F'] = 0, /* constant folding */
+ ['K'] = 0, /* if-conversion */
['A'] = 0, /* abi lowering */
['I'] = 0, /* instruction selection */
['L'] = 0, /* liveness */
@@ -81,6 +82,13 @@ func(Fn *fn)
gcm(fn);
filluse(fn);
ssacheck(fn);
+ if (T.cansel) {
+ ifconvert(fn);
+ fillcfg(fn);
+ filluse(fn);
+ filldom(fn);
+ ssacheck(fn);
+ }
T.abi1(fn);
simpl(fn);
fillcfg(fn);
diff --git a/ops.h b/ops.h
index 80b0d9b..2336d5f 100644
--- a/ops.h
+++ b/ops.h
@@ -145,6 +145,8 @@ O(nop, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(addr, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(blit0, T(m,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
O(blit1, T(w,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
+O(sel0, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(sel1, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(swap, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
O(sign, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(salloc, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
@@ -196,6 +198,26 @@ O(flagfne, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagfo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagfuo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+/* Backend Flag Select (Condition Move) */
+O(xselieq, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xseline, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselisge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselisgt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselisle, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselislt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xseliuge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xseliugt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xseliule, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xseliult, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselfeq, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselfge, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselfgt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselfle, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselflt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselfne, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselfo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xselfuo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+
#undef T
#undef X
#undef V
diff --git a/rv64/targ.c b/rv64/targ.c
index c0e5e18..fc6632c 100644
--- a/rv64/targ.c
+++ b/rv64/targ.c
@@ -50,6 +50,7 @@ Target T_rv64 = {
.emitfn = rv64_emitfn,
.emitfin = elf_emitfin,
.asloc = ".L",
+ .cansel = 0,
};
MAKESURE(rsave_size_ok, sizeof rv64_rsave == (NGPS+NFPS+1) * sizeof(int));
diff --git a/test/ifc.ssa b/test/ifc.ssa
new file mode 100644
index 0000000..29f4457
--- /dev/null
+++ b/test/ifc.ssa
@@ -0,0 +1,238 @@
+export
+function l $ifc1(l %v0, l %v1, w %c) {
+@start
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifc2(l %v0, l %v1, w %p) {
+@start
+ %c =w cnew %p, 42
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifc3(l %v0, l %v1, w %p) {
+@start
+ %c =w cugtw %p, 42
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifclts(s %s0, s %s1, l %v0, l %v1) {
+@start
+ %c =w clts %s0, %s1
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifcles(s %s0, s %s1, l %v0, l %v1) {
+@start
+ %c =w cles %s0, %s1
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifcgts(s %s0, s %s1, l %v0, l %v1) {
+@start
+ %c =w cgts %s0, %s1
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifcges(s %s0, s %s1, l %v0, l %v1) {
+@start
+ %c =w cges %s0, %s1
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifceqs(s %s0, s %s1, l %v0, l %v1) {
+@start
+ %c =w ceqs %s0, %s1
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifcnes(s %s0, s %s1, l %v0, l %v1) {
+@start
+ %c =w cnes %s0, %s1
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifcos(s %s0, s %s1, l %v0, l %v1) {
+@start
+ %c =w cos %s0, %s1
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+export
+function l $ifcuos(s %s0, s %s1, l %v0, l %v1) {
+@start
+ %c =w cuos %s0, %s1
+ jnz %c, @true, @false
+@true
+ %v =l copy %v1
+ jmp @end
+@false
+ %v =l copy %v0
+ jmp @end
+@end
+ ret %v
+}
+
+# >>> driver
+# extern long ifc1(long, long, int);
+# extern long ifc2(long, long, int);
+# extern long ifc3(long, long, int);
+# extern long ifclts(float, float, long, long);
+# extern long ifcles(float, float, long, long);
+# extern long ifcgts(float, float, long, long);
+# extern long ifcges(float, float, long, long);
+# extern long ifceqs(float, float, long, long);
+# extern long ifcnes(float, float, long, long);
+# extern long ifcos(float, float, long, long);
+# extern long ifcuos(float, float, long, long);
+# int main() {
+# return
+# ifc1(7, 5, 0) != 7
+# || ifc1(7, 5, 1) != 5
+# || ifc1(7, 5, 33) != 5
+# || ifc2(7, 5, 42) != 7
+# || ifc2(7, 5, 41) != 5
+# || ifc2(7, 5, 43) != 5
+# || ifc3(7, 5, 42) != 7
+# || ifc3(7, 5, 41) != 7
+# || ifc3(7, 5, 43) != 5
+# || ifclts(5.0f, 6.0f, 7, 5) != 5
+# || ifclts(5.0f, 5.0f, 7, 5) != 7
+# || ifclts(5.0f, 4.0f, 7, 5) != 7
+# || ifclts(5.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifclts(0.0f/0.0f, 5.0f, 7, 5) != 7
+# || ifclts(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifcles(5.0f, 6.0f, 7, 5) != 5
+# || ifcles(5.0f, 5.0f, 7, 5) != 5
+# || ifcles(5.0f, 4.0f, 7, 5) != 7
+# || ifcles(5.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifcles(0.0f/0.0f, 5.0f, 7, 5) != 7
+# || ifcles(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifcgts(5.0f, 6.0f, 7, 5) != 7
+# || ifcgts(5.0f, 5.0f, 7, 5) != 7
+# || ifcgts(5.0f, 4.0f, 7, 5) != 5
+# || ifcgts(5.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifcgts(0.0f/0.0f, 5.0f, 7, 5) != 7
+# || ifcgts(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifcges(5.0f, 6.0f, 7, 5) != 7
+# || ifcges(5.0f, 5.0f, 7, 5) != 5
+# || ifcges(5.0f, 4.0f, 7, 5) != 5
+# || ifcges(5.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifcges(0.0f/0.0f, 5.0f, 7, 5) != 7
+# || ifcges(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifceqs(5.0f, 6.0f, 7, 5) != 7
+# || ifceqs(5.0f, 5.0f, 7, 5) != 5
+# || ifceqs(5.0f, 4.0f, 7, 5) != 7
+# || ifceqs(5.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifceqs(0.0f/0.0f, 5.0f, 7, 5) != 7
+# || ifceqs(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifcnes(5.0f, 6.0f, 7, 5) != 5
+# || ifcnes(5.0f, 5.0f, 7, 5) != 7
+# || ifcnes(5.0f, 4.0f, 7, 5) != 5
+# || ifcnes(5.0f, 0.0f/0.0f, 7, 5) != 5
+# || ifcnes(0.0f/0.0f, 5.0f, 7, 5) != 5
+# || ifcnes(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 5
+# || ifcos(5.0f, 6.0f, 7, 5) != 5
+# || ifcos(5.0f, 5.0f, 7, 5) != 5
+# || ifcos(5.0f, 4.0f, 7, 5) != 5
+# || ifcos(5.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifcos(0.0f/0.0f, 5.0f, 7, 5) != 7
+# || ifcos(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 7
+# || ifcuos(5.0f, 6.0f, 7, 5) != 7
+# || ifcuos(5.0f, 5.0f, 7, 5) != 7
+# || ifcuos(5.0f, 4.0f, 7, 5) != 7
+# || ifcuos(5.0f, 0.0f/0.0f, 7, 5) != 5
+# || ifcuos(0.0f/0.0f, 5.0f, 7, 5) != 5
+# || ifcuos(0.0f/0.0f, 0.0f/0.0f, 7, 5) != 5
+# ;
+# }
+# <<<
diff --git a/util.c b/util.c
index 3b5c09d..c891c4e 100644
--- a/util.c
+++ b/util.c
@@ -164,7 +164,7 @@ addins(Ins **pvins, uint *pnins, Ins *i)
}
void
-addbins(Blk *b, Ins **pvins, uint *pnins)
+addbins(Ins **pvins, uint *pnins, Blk *b)
{
Ins *i;
@@ -281,6 +281,17 @@ igroup(Blk *b, Ins *i, Ins **i0, Ins **i1)
assert(i < ie);
*i1 = i + 1;
return;
+ case Osel1:
+ for (; i>ib && (i-1)->op == Osel1; i--)
+ ;
+ assert(i->op == Osel0);
+ /* fall through */
+ case Osel0:
+ *i0 = i++;
+ for (; i<ie && i->op == Osel1; i++)
+ ;
+ *i1 = i;
+ return;
default:
if (ispar(i->op))
goto case_Opar;