aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile3
-rw-r--r--all.h77
-rw-r--r--cfg.c316
-rw-r--r--copy.c475
-rw-r--r--fold.c347
-rw-r--r--gcm.c507
-rw-r--r--gvn.c748
-rw-r--r--ins.c195
-rw-r--r--main.c12
-rw-r--r--ops.h291
-rw-r--r--parse.c14
-rw-r--r--reassoc.c101
-rw-r--r--ssa.c2
-rw-r--r--test/_gcm1.ssa48
-rw-r--r--test/_gcm2.ssa43
-rw-r--r--test/_load-elim.ssa17
-rw-r--r--test/gvn-live-dead.ssa19
-rw-r--r--test/non0jnz.ssa31
-rw-r--r--util.c56
20 files changed, 2612 insertions, 691 deletions
diff --git a/.gitignore b/.gitignore
index afd08d7..09e4118 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ qbe
config.h
.comfile
*.out
+*~
diff --git a/Makefile b/Makefile
index 2482eb1..85783be 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,8 @@ PREFIX = /usr/local
BINDIR = $(PREFIX)/bin
COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \
- copy.o fold.o simpl.o live.o spill.o rega.o emit.o
+ copy.o fold.o gvn.o gcm.o ins.o simpl.o live.o \
+ spill.o rega.o emit.o
AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o
ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o
RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o
diff --git a/all.h b/all.h
index a5a0163..ceb7520 100644
--- a/all.h
+++ b/all.h
@@ -191,6 +191,7 @@ enum {
#define INRANGE(x, l, u) ((unsigned)(x) - l <= u - l) /* linear in x */
#define isstore(o) INRANGE(o, Ostoreb, Ostored)
#define isload(o) INRANGE(o, Oloadsb, Oload)
+#define isalloc(o) INRANGE(o, Oalloc4, Oalloc16)
#define isext(o) INRANGE(o, Oextsb, Oextuw)
#define ispar(o) INRANGE(o, Opar, Opare)
#define isarg(o) INRANGE(o, Oarg, Oargv)
@@ -214,8 +215,15 @@ struct Op {
char *name;
short argcls[2][4];
uint canfold:1;
- uint hasid:1;
- uint idval:1; /* identity value 0/1 */
+ uint hasid:1; /* op identity value? */
+ uint idval:1; /* identity value 0/1 */
+ uint commutes:1; /* commutative op? */
+ uint assoc:1; /* associative op? */
+ uint idemp:1; /* idempotent op? */
+ uint cmpeqwl:1; /* Kl/Kw cmp eq/ne? */
+ uint cmplgtewl:1; /* Kl/Kw cmp lt/gt/le/ge? */
+ uint eqval:1; /* 1 for eq; 0 for ne */
+ uint ispinned:1; /* GCM pinned op? */
};
struct Ins {
@@ -230,7 +238,8 @@ struct Phi {
Ref *arg;
Blk **blk;
uint narg;
- int cls;
+ short cls;
+ uint visit:1;
Phi *link;
};
@@ -251,6 +260,7 @@ struct Blk {
Blk *idom;
Blk *dom, *dlink;
+ int domdpth;
Blk **fron;
uint nfron;
@@ -344,6 +354,9 @@ struct Tmp {
Wuw
} width;
int visit;
+ uint gcmbid;
+ uint gcminsn; // TODO get rid
+ uint gcmdefinsn; // TODO get rid
};
struct Con {
@@ -450,6 +463,20 @@ struct Dat {
char isstr;
};
+typedef struct InsLoc InsLoc;
+
+struct InsLoc {
+ uint bid;
+ uint insn;
+};
+
+typedef struct InsMov InsMov;
+
+struct InsMov {
+ InsLoc from;
+ InsLoc to;
+};
+
/* main.c */
extern Target T;
extern char debug['Z'+1];
@@ -476,6 +503,7 @@ char *str(uint32_t);
int argcls(Ins *, int);
int isreg(Ref);
int iscmp(int, int *, int *);
+int invcmpwl(int);
void emit(int, int, Ref, Ref, Ref);
void emiti(Ins);
void idup(Blk *, Ins *, ulong);
@@ -484,16 +512,19 @@ int cmpop(int);
int cmpneg(int);
int clsmerge(short *, short);
int phicls(int, Tmp *);
+uint phiargn(Phi *, Blk *);
+Ref phiarg(Phi *, Blk *);
Ref newtmp(char *, int, Fn *);
void chuse(Ref, int, Fn *);
int symeq(Sym, Sym);
Ref newcon(Con *, Fn *);
Ref getcon(int64_t, Fn *);
int addcon(Con *, Con *, int);
+int isconbits(Fn *fn, Ref r, int64_t *v);
+int istmpconbits(Fn *fn, Ins *i, int64_t *v);
void salloc(Ref, Ref, Fn *);
void dumpts(BSet *, Tmp *, FILE *);
void runmatch(uchar *, Num *, Ref, Ref *);
-
void bsinit(BSet *, uint);
void bszero(BSet *);
uint bscount(BSet *);
@@ -525,7 +556,6 @@ void elimsb(Fn *);
/* cfg.c */
Blk *newblk(void);
-void edgedel(Blk *, Blk **);
void fillpreds(Fn *);
void fillcfg(Fn *);
void filldom(Fn *);
@@ -533,8 +563,20 @@ int sdom(Blk *, Blk *);
int dom(Blk *, Blk *);
void fillfron(Fn *);
void loopiter(Fn *, void (*)(Blk *, Blk *));
+void filldomdpth(Fn *);
+Blk *lca(Blk *, Blk *);
void fillloop(Fn *);
void simpljmp(Fn *);
+void replacepreds(Blk *, Blk *, Blk *);
+void killblks(Fn *);
+void blkmerge(Fn *);
+int ifgraph(Blk *, Blk **, Blk **, Blk **);
+int ifjoin(Blk *, Blk **, Blk **, Blk **);
+int emptyblk(Blk *);
+void ifelim(Fn *);
+void clrbvisit(Fn *);
+int reaches(Fn *,Blk *, Blk *);
+int reachesnotvia(Fn *, Blk *, Blk *, Blk *);
/* mem.c */
void promote(Fn *);
@@ -552,15 +594,36 @@ int storesz(Ins *);
void loadopt(Fn *);
/* ssa.c */
+void adduse(Tmp *, int, Blk *, ...);
void filluse(Fn *);
void ssa(Fn *);
void ssacheck(Fn *);
/* copy.c */
-void copy(Fn *);
+int iswu1(Fn *, Ref);
+void narrowpars(Fn *);
+Ref copyref(Fn *, Blk *, Ins *);
/* fold.c */
-void fold(Fn *);
+Ref foldref(Fn *, Ins *);
+
+/* gvn.c */
+extern Ref con01[2];
+int is0non0(Fn *, Blk *, Ref, int, int *);
+void gvn(Fn *);
+
+/* gcm.c */
+int isfixed(Fn *, Ins *);
+void gcm(Fn *);
+
+/* ins.c */
+void addins(Ins **, uint *, Ins *);
+void addbins(Blk *, Ins **, uint *);
+void nopunused(Fn *);
+void movins(Fn *, InsMov *, uint, int);
+
+/* reassoc.c */
+void reassoc(Fn *);
/* simpl.c */
void simpl(Fn *);
diff --git a/cfg.c b/cfg.c
index 99b6f40..1fe584c 100644
--- a/cfg.c
+++ b/cfg.c
@@ -39,37 +39,6 @@ prunephis(Fn *f)
}
}
-void
-edgedel(Blk *bs, Blk **pbd)
-{
- Blk *bd;
- Phi *p;
- uint a;
- int mult;
-
- bd = *pbd;
- mult = 1 + (bs->s1 == bs->s2);
- *pbd = 0;
- if (!bd || mult > 1)
- return;
- for (p=bd->phi; p; p=p->link) {
- for (a=0; p->blk[a]!=bs; a++)
- assert(a+1<p->narg);
- p->narg--;
- memmove(&p->blk[a], &p->blk[a+1],
- sizeof p->blk[0] * (p->narg-a));
- memmove(&p->arg[a], &p->arg[a+1],
- sizeof p->arg[0] * (p->narg-a));
- }
- if (bd->npred != 0) {
- for (a=0; bd->pred[a]!=bs; a++)
- assert(a+1<bd->npred);
- bd->npred--;
- memmove(&bd->pred[a], &bd->pred[a+1],
- sizeof bd->pred[0] * (bd->npred-a));
- }
-}
-
static void
addpred(Blk *bp, Blk *bc)
{
@@ -299,6 +268,49 @@ multloop(Blk *hd, Blk *b)
}
void
+filldomdpth(Fn *fn)
+{
+ Blk *b, *dom;
+ uint dpth;
+
+ for (b=fn->start; b; b=b->link)
+ b->domdpth = -1;
+
+ fn->start->domdpth = 0;
+
+ for (b=fn->start; b; b=b->link) {
+ if (b->domdpth != -1)
+ continue;
+ dpth = 1;
+ for (dom = b->idom; dom->domdpth == -1; dom = dom->idom)
+ dpth++;
+ dpth += dom->domdpth;
+ b->domdpth = dpth;
+ for (dom = b->idom; dom->domdpth == -1; dom = dom->idom)
+ dom->domdpth = --dpth;
+ }
+}
+
+/* least common ancestor in dom tree */
+Blk *
+lca(Blk *b1, Blk *b2)
+{
+ if (!b1)
+ return b2;
+ if (!b2)
+ return b1;
+ while (b1->domdpth > b2->domdpth)
+ b1 = b1->idom;
+ while (b2->domdpth > b1->domdpth)
+ b2 = b2->idom;
+ while (b1 != b2) {
+ b1 = b1->idom;
+ b2 = b2->idom;
+ }
+ return b1;
+}
+
+void
fillloop(Fn *fn)
{
Blk *b;
@@ -358,3 +370,245 @@ simpljmp(Fn *fn)
*p = ret;
free(uf);
}
+
+static void
+replacepred(Blk **blks, uint nblk, Blk *to, Blk *from)
+{
+ uint n;
+ for(n=0; n<nblk; n++)
+ if (blks[n] == from) {
+ blks[n] = to;
+ break;
+ }
+ assert(n != nblk);
+}
+
+/* replace b->pred[] and p->blk[] entries */
+void
+replacepreds(Blk *s, Blk *to, Blk *from)
+{
+ Phi *p;
+
+ if (!s)
+ return;
+ assert(s->npred);
+ replacepred(s->pred, s->npred, to, from);
+ for (p = s->phi; p; p = p->link) {
+ assert(p->narg == s->npred);
+ replacepred(p->blk, p->narg, to, from);
+ }
+}
+
+/* remove marked-dead blks - marked as fn->rpo[id] == 0 */
+void
+killblks(Fn *fn)
+{
+ Blk **pb;
+
+ for (pb = &fn->start; *pb;)
+ if (fn->rpo[(*pb)->id])
+ pb = &(*pb)->link;
+ else
+ *pb = (*pb)->link;
+}
+
+/* merge linear jmp chains */
+/* requires rpo pred, breaks cfg use */
+void
+blkmerge(Fn *fn)
+{
+ uint bid, nins;
+ Blk *curb, *b;
+ Ins *vins;
+
+ if (debug['B'])
+ fputs("\n> Block merge:\n", stderr);
+
+ vins = vnew(0, sizeof vins[0], PFn);
+ curb = 0;
+ /* linear jmp chains will be consecutive in rpo */
+ for (bid=0; bid<fn->nblk; bid++) {
+ b = fn->rpo[bid];
+ if (curb == 0) {
+ curb = b;
+ nins = 0;
+ } else
+ fn->rpo[bid] = 0;
+ addbins(b, &vins, &nins);
+ /* note - there are cases where GVN does not eliminate singleton phis */
+ if (b->jmp.type != Jjmp || b->s1->npred != 1 || b->s1->phi) {
+ idup(curb, vins, nins);
+ curb->nins = nins;
+ curb->jmp = b->jmp;
+ replacepreds(b->s1, curb, b);
+ if (b->s1 != b->s2)
+ replacepreds(b->s2, curb, b);
+ curb->s1 = b->s1;
+ curb->s2 = b->s2;
+ curb = 0;
+ } else {
+ assert(b->s1->id == bid+1);
+ if (debug['B'])
+ fprintf(stderr, " merging blocks @%s -> @%s\n", b->name, b->s1->name);
+ }
+ }
+ assert(curb == 0);
+ killblks(fn);
+}
+
+int
+lonesucc(Blk *b, Blk *s)
+{
+ assert(s);
+ if (s != b)
+ if (s->npred == 1)
+ if (s->pred[0] == b)
+ if (s->phi == 0)
+ return 1;
+ return 0;
+}
+
+int
+lonejmpsucc(Blk *b, Blk *s)
+{
+ return s->jmp.type == Jjmp && lonesucc(b, s);
+}
+
+/* (otherwise) isolated if-then[-else] graphlet */
+int
+ifgraph(Blk *ifb, Blk **pthenb, Blk **pelseb, Blk **pjoinb)
+{
+ if (ifb->jmp.type != Jjnz)
+ return 0;
+ assert(ifb->s1 && ifb->s2);
+ assert(ifb->s1 != ifb->s2); /* dubious */
+ *pthenb = ifb->s1;
+ *pelseb = ifb->s2;
+ *pjoinb = ifb->s1->s1;
+ if (ifb->s1 == ifb->s2->s1) {
+ /* empty then */
+ *pthenb = ifb;
+ *pjoinb = ifb->s1;
+ }
+ if (ifb->s1->s1 == ifb->s2)
+ /* empty else */
+ *pelseb = ifb;
+
+ if (*pthenb == ifb ||
+ ((*pthenb)->s1 == *pjoinb && lonejmpsucc(ifb, *pthenb)))
+ if (*pelseb == ifb ||
+ ((*pelseb)->s1 == *pjoinb && lonejmpsucc(ifb, *pelseb)))
+ /* there are cases where npred == 2 is not strictly needed - ifconvert for example */
+ if ((*pjoinb)->npred == 2)
+ return 1;
+
+ return 0;
+}
+
+/* join blk of if-then[-else] graphlet */
+int
+ifjoin(Blk *joinb, Blk **pifb, Blk **pthenb, Blk **pelseb)
+{
+ Blk *joinb1;
+
+ if (joinb->npred)
+ if (ifgraph(joinb->pred[0], pthenb, pelseb, &joinb1))
+ if (joinb == joinb1) {
+ *pifb = joinb->pred[0];
+ return 1;
+ }
+ if (joinb->npred && joinb->pred[0]->npred)
+ if (ifgraph(joinb->pred[0]->pred[0], pthenb, pelseb, &joinb1))
+ if (joinb == joinb1) {
+ *pifb = joinb->pred[0]->pred[0];
+ return 1;
+ }
+ return 0;
+}
+
+int
+emptyblk(Blk *b)
+{
+ Ins *i;
+
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ if (i->op != Onop)
+ if (i->op != Odbgloc)
+ return 0;
+ return 1;
+}
+
+/* remove empty jnz graphlets */
+/* needs rpo; breaks cfg */
+void
+ifelim(Fn *fn)
+{
+ uint bid;
+ Blk *ifb, *thenb, *elseb, *joinb;
+
+ for (bid = 0; bid < fn->nblk; bid++) {
+ ifb = fn->rpo[bid];
+ if (ifb == 0)
+ continue;
+ if (ifgraph(ifb, &thenb, &elseb, &joinb))
+ if (joinb->phi == 0)
+ if (thenb == ifb || emptyblk(thenb))
+ if (elseb == ifb || emptyblk(elseb)) {
+ if (debug['B'])
+ fprintf(stderr, " eliminating empty if @%s -> @%s, @%s -> @%s\n",
+ ifb->name, thenb->name, elseb->name, joinb->name);
+ if (thenb != ifb)
+ fn->rpo[thenb->id] = 0;
+ if (elseb != ifb)
+ fn->rpo[elseb->id] = 0;
+ ifb->jmp.type = Jjmp;
+ ifb->jmp.arg = R;
+ ifb->s1 = joinb;
+ ifb->s2 = 0;
+ }
+ }
+ killblks(fn);
+}
+
+void
+clrbvisit(Fn *fn)
+{
+ Blk *b;
+ for (b = fn->start; b; b = b->link)
+ b->visit = 0;
+}
+
+static int
+reaches1(Blk *b1, Blk *b2)
+{
+ assert(b2);
+ if (b1 == b2)
+ return 1;
+ if (b1 == 0 || b1->visit)
+ return 0;
+ b1->visit = 1;
+ return reaches1(b1->s1, b2) || reaches1(b1->s2, b2);
+}
+
+/* path from b1 to b2 not thru bnotvia? */
+/* uses b->visit */
+int
+reachesnotvia(Fn *fn, Blk *b1, Blk *b2, Blk *bnotvia)
+{
+ int rc;
+
+ if (bnotvia)
+ bnotvia->visit = 1;
+ rc = reaches1(b1, b2);
+ clrbvisit(fn);
+ return rc;
+}
+
+/* path from b1 to b2? */
+/* uses b->visit */
+int
+reaches(Fn *fn, Blk *b1, Blk *b2)
+{
+ return reachesnotvia(fn, b1, b2, 0);
+}
+
diff --git a/copy.c b/copy.c
index da3987f..95c7e00 100644
--- a/copy.c
+++ b/copy.c
@@ -1,217 +1,332 @@
#include "all.h"
-static int
-iscon(Ref r, int64_t bits, Fn *fn)
+static uint
+u64_wbits(uint64_t v)
{
- return rtype(r) == RCon
- && fn->con[r.val].type == CBits
- && fn->con[r.val].bits.i == bits;
+ uint n;
+
+ n = 0;
+ if (v >> 32) { n += 32; v >>= 32; }
+ if (v >> 16) { n += 16; v >>= 16; }
+ if (v >> 8) { n += 8; v >>= 8; }
+ if (v >> 4) { n += 4; v >>= 4; }
+ if (v >> 2) { n += 2; v >>= 2; }
+ if (v >> 1) { n += 1; v >>= 1; }
+ return n+v;
}
static int
-iscopy(Ins *i, Ref r, Fn *fn)
+EXTSIGNED[] = { /*extsb*/1, /*extub*/0, /*extsh*/1, /*extuh*/0, /*extsw*/1, /*extuw*/0 };
+
+static uint
+EXTMAXW[] = { /*extsb*/7, /*extub*/8, /*extsh*/15, /*extuh*/16, /*extsw*/31, /*extuw*/32 };
+
+static uint
+EXTW[] = { /*extsb*/8, /*extub*/8, /*extsh*/16, /*extuh*/16, /*extsw*/32, /*extuw*/32 };
+
+static uint
+STW[] = { /*storeb*/8, /*storeh*/16, /*storew*/32, /*storel*/64, /*stores*/32, /*stored*/64 };
+
+/* is the ref used only as a narrow value? */
+static int
+usewidthle(Fn *fn, Ref r, uint wbits)
{
- static bits extcpy[] = {
- [WFull] = 0,
- [Wsb] = BIT(Wsb) | BIT(Wsh) | BIT(Wsw),
- [Wub] = BIT(Wub) | BIT(Wuh) | BIT(Wuw),
- [Wsh] = BIT(Wsh) | BIT(Wsw),
- [Wuh] = BIT(Wuh) | BIT(Wuw),
- [Wsw] = BIT(Wsw),
- [Wuw] = BIT(Wuw),
- };
- Op *op;
- bits b;
Tmp *t;
+ Use *u;
+ Phi *p;
+ int b;
+ Ins *i;
+ Ref rc;
+ int64_t v;
- if (i->op == Ocopy)
+ if (isconbits(fn, r, &v))
+ if (u64_wbits(v) <= wbits)
return 1;
- op = &optab[i->op];
- if (op->hasid && KBASE(i->cls) == 0)
- return iscon(i->arg[1], op->idval, fn);
- if (!isext(i->op) || rtype(r) != RTmp)
+ if (rtype(r) != RTmp)
return 0;
- if (i->op == Oextsw || i->op == Oextuw)
- if (i->cls == Kw)
- return 1;
-
t = &fn->tmp[r.val];
- assert(KBASE(t->cls) == 0);
- if (i->cls == Kl && t->cls == Kw)
+ for (u = t->use; u < &t->use[t->nuse]; u++) {
+ switch (u->type) {
+ case UPhi:
+ p = u->u.phi;
+ if (p->visit)
+ continue;
+ p->visit = 1;
+ b = usewidthle(fn, p->to, wbits);
+ p->visit = 0;
+ if (b)
+ continue;
+ break;
+ case UIns:
+ i = u->u.ins;
+ assert(i != 0);
+ if (i->op == Ocopy)
+ if (usewidthle(fn, i->to, wbits))
+ continue;
+ if (isext(i->op)) {
+ if (EXTW[i->op - Oextsb] <= wbits)
+ continue;
+ else
+ if (usewidthle(fn, i->to, wbits))
+ continue;;
+ }
+ if (i->op == Oand) {
+ if (req(r, i->arg[0]))
+ rc = i->arg[1];
+ else {
+ assert(req(r, i->arg[1]));
+ rc = i->arg[0];
+ }
+ if (isconbits(fn, rc, &v))
+ if (u64_wbits(v) <= wbits)
+ continue;
+ break;
+ }
+ if (isstore(i->op))
+ if (req(r, i->arg[1]))
+ if (STW[i->op - Ostoreb] > wbits)
+ continue;
+ break;
+ default:
+ break;
+ }
return 0;
- b = extcpy[t->width];
- return (BIT(Wsb + (i->op-Oextsb)) & b) != 0;
+ }
+ return 1;
}
-static Ref
-copyof(Ref r, Ref *cpy)
+static Phi*
+findphi(Fn *fn, uint bid, Ref to)
{
- if (rtype(r) == RTmp && !req(cpy[r.val], R))
- return cpy[r.val];
- return r;
+ Phi *p;
+ for (p = fn->rpo[bid]->phi; p; p = p->link)
+ if (req(p->to, to))
+ break;
+ assert(p);
+ return p;
}
-/* detects a cluster of phis/copies redundant with 'r';
- * the algorithm is inspired by Section 3.2 of "Simple
- * and Efficient SSA Construction" by Braun M. et al.
- */
-static void
-phisimpl(Phi *p, Ref r, Ref *cpy, Use ***pstk, BSet *ts, BSet *as, Fn *fn)
+static uint
+uint_min(uint v1, uint v2)
{
- Use **stk, *u, *u1;
- uint nstk, a;
- int t;
- Ref r1;
- Phi *p0;
-
- bszero(ts);
- bszero(as);
- p0 = &(Phi){.narg = 0};
- stk = *pstk;
- nstk = 1;
- stk[0] = &(Use){.type = UPhi, .u.phi = p};
- while (nstk) {
- u = stk[--nstk];
- if (u->type == UIns && iscopy(u->u.ins, r, fn)) {
- p = p0;
- t = u->u.ins->to.val;
- }
- else if (u->type == UPhi) {
- p = u->u.phi;
- t = p->to.val;
- }
- else
- continue;
- if (bshas(ts, t))
- continue;
- bsset(ts, t);
- for (a=0; a<p->narg; a++) {
- r1 = copyof(p->arg[a], cpy);
- if (req(r1, r))
- continue;
- if (rtype(r1) != RTmp)
- return;
- bsset(as, r1.val);
+ return v1 < v2 ? v1 : v2;
+}
+
+/* is the ref def a narrow value? */
+static int
+defwidthle(Fn *fn, Ref r, uint wbits)
+{
+ Tmp *t;
+ Phi *p;
+ Ins *i;
+ uint n;
+ int64_t v;
+ int x;
+
+ if (isconbits(fn, r, &v))
+ if (u64_wbits(v) <= wbits)
+ return 1;
+ if (rtype(r) != RTmp)
+ return 0;
+ t = &fn->tmp[r.val];
+ if (t->cls != Kw)
+ return 0;
+ i = t->def;
+ if (i == 0) {
+ /* phi def */
+ p = findphi(fn, t->bid, r);
+ if (p->visit)
+ return 1;
+ p->visit = 1;
+ for (n = 0; n < p->narg; n++)
+ if (!defwidthle(fn, p->arg[n], wbits)) {
+ p->visit = 0;
+ return 0;
+ }
+ p->visit = 0;
+ return 1;
+ }
+ /* ins def */
+ if (i->op == Ocopy)
+ return defwidthle(fn, i->arg[0], wbits);
+ if (i->op == Oshr || i->op == Osar) {
+ if (isconbits(fn, i->arg[1], &v))
+ if (0 < v && v <= 32) {
+ if (i->op == Oshr && 32-v <= wbits)
+ return 1;
+ if (0 <= v && v < 32 && wbits < 32)
+ return defwidthle(fn, i->arg[0], uint_min((i->op == Osar ? 31 : 32), wbits+v));
}
- u = fn->tmp[t].use;
- u1 = &u[fn->tmp[t].nuse];
- vgrow(pstk, nstk+(u1-u));
- stk = *pstk;
- for (; u<u1; u++)
- stk[nstk++] = u;
+ return defwidthle(fn, i->arg[0], wbits);
+ }
+ if (iscmp(i->op, &x, &x))
+ return wbits >= 1;
+ if (i->op == Oand)
+ return defwidthle(fn, i->arg[0], wbits) || defwidthle(fn, i->arg[1], wbits);
+ if (i->op == Oor || i->op == Oxor)
+ return defwidthle(fn, i->arg[0], wbits) && defwidthle(fn, i->arg[1], wbits);
+ if (isext(i->op)) {
+ if (EXTSIGNED[i->op - Oextsb])
+ return defwidthle(fn, i->arg[0], uint_min(wbits, EXTMAXW[i->op - Oextsb]));
+ if (EXTW[i->op - Oextsb] <= wbits)
+ return 1;
+ return defwidthle(fn, i->arg[0], wbits);
}
- bsdiff(as, ts);
- if (!bscount(as))
- for (t=0; bsiter(ts, &t); t++)
- cpy[t] = r;
+ return 0;
+}
+
+/* is the ref a boolean - 0, 1 - value? */
+int
+iswu1(Fn *fn, Ref r)
+{
+ return defwidthle(fn, r, 1);
}
-static void
-subst(Ref *pr, Ref *cpy)
+static int
+isnarrowpar(Fn *fn, Ref r)
{
- assert(rtype(*pr) != RTmp || !req(cpy[pr->val], R));
- *pr = copyof(*pr, cpy);
+ Tmp *t;
+
+ if (rtype(r) != RTmp)
+ return 0;
+ t = &fn->tmp[r.val];
+ if (t->bid != fn->start->id || t->def == 0)
+ return 0;
+ return ispar(t->def->op);
}
-/* requires use and dom, breaks use */
+/* Insert extub/extuh instructions in start for pars used only narrowly */
+/* needs use; breaks use */
void
-copy(Fn *fn)
+narrowpars(Fn *fn)
{
- BSet ts[1], as[1];
- Use **stk;
- Phi *p, **pp;
- Ins *i;
Blk *b;
- uint n, a, eq;
- Ref *cpy, r, r1;
- int t;
-
- bsinit(ts, fn->ntmp);
- bsinit(as, fn->ntmp);
- cpy = emalloc(fn->ntmp * sizeof cpy[0]);
- stk = vnew(10, sizeof stk[0], PHeap);
-
- /* 1. build the copy-of map */
- for (n=0; n<fn->nblk; n++) {
- b = fn->rpo[n];
- for (p=b->phi; p; p=p->link) {
- assert(rtype(p->to) == RTmp);
- if (!req(cpy[p->to.val], R))
- continue;
- eq = 0;
- r = R;
- for (a=0; a<p->narg; a++)
- if (p->blk[a]->id < n) {
- r1 = copyof(p->arg[a], cpy);
- if (req(r, R) || req(r, UNDEF))
- r = r1;
- if (req(r1, r) || req(r1, UNDEF))
- eq++;
- }
- assert(!req(r, R));
- if (rtype(r) == RTmp
- && !dom(fn->rpo[fn->tmp[r.val].bid], b))
- cpy[p->to.val] = p->to;
- else if (eq == p->narg)
- cpy[p->to.val] = r;
- else {
- cpy[p->to.val] = p->to;
- phisimpl(p, r, cpy, &stk, ts, as, fn);
- }
- }
- for (i=b->ins; i<&b->ins[b->nins]; i++) {
- assert(rtype(i->to) <= RTmp);
- if (!req(cpy[i->to.val], R))
- continue;
- r = copyof(i->arg[0], cpy);
- if (iscopy(i, r, fn))
- cpy[i->to.val] = r;
- else
- cpy[i->to.val] = i->to;
- }
- }
+ int loop;
+ Ins *i, *ins;
+ uint npar, nins;
+ enum O extop;
+ Ref r;
- /* 2. remove redundant phis/copies
- * and rewrite their uses */
- for (b=fn->start; b; b=b->link) {
- for (pp=&b->phi; (p=*pp);) {
- r = cpy[p->to.val];
- if (!req(r, p->to)) {
- *pp = p->link;
- continue;
- }
- for (a=0; a<p->narg; a++)
- subst(&p->arg[a], cpy);
- pp=&p->link;
- }
- for (i=b->ins; i<&b->ins[b->nins]; i++) {
- r = cpy[i->to.val];
- if (!req(r, i->to)) {
- *i = (Ins){.op = Onop};
- continue;
- }
- subst(&i->arg[0], cpy);
- subst(&i->arg[1], cpy);
+ /* only useful for functions with loops */
+ loop = 0;
+ for (b = fn->start; b; b = b->link)
+ if (b->loop > 1) {
+ loop = 1;
+ break;
}
- subst(&b->jmp.arg, cpy);
+ if (!loop)
+ return;
+
+ b = fn->start;
+ npar = 0;
+
+ for (i = b->ins; i < &b->ins[b->nins]; i++) {
+ if (!ispar(i->op))
+ break;
+ npar++;
}
- if (debug['C']) {
- fprintf(stderr, "\n> Copy information:");
- for (t=Tmp0; t<fn->ntmp; t++) {
- if (req(cpy[t], R)) {
- fprintf(stderr, "\n%10s not seen!",
- fn->tmp[t].name);
- }
- else if (!req(cpy[t], TMP(t))) {
- fprintf(stderr, "\n%10s copy of ",
- fn->tmp[t].name);
- printref(cpy[t], fn, stderr);
+ if (npar == 0)
+ return;
+
+ nins = b->nins + npar;
+ ins = vnew(nins, sizeof ins[0], PFn); //alloc(nins * sizeof ins[0]);
+ memcpy(ins, b->ins, npar * sizeof ins[0]);
+ memcpy(ins + 2*npar, b->ins + npar, (b->nins - npar) * sizeof ins[0]);
+ b->ins = ins;
+ b->nins = nins;
+
+ for (i = b->ins; i < &b->ins[b->nins]; i++) {
+ if (!ispar(i->op))
+ break;
+ extop = Onop;
+ if (i->cls == Kw)
+ if (usewidthle(fn, i->to, 16)) {
+ if (usewidthle(fn, i->to, 8))
+ extop = Oextub;
+ else
+ extop = Oextuh;
}
+ if (extop == Onop) {
+ *(i+npar) = (Ins) {.op = Onop};
+ } else {
+ r = newtmp("vw", i->cls, fn);
+ *(i+npar) = (Ins) {.op = extop, .cls = i->cls, .to = i->to, .arg = {r}};
+ i->to = r;
}
- fprintf(stderr, "\n\n> After copy elimination:\n");
- printfn(fn, stderr);
}
- vfree(stk);
- free(cpy);
+}
+
+/* used by GVN */
+Ref
+copyref(Fn *fn, Blk *b, Ins *i)
+{
+ static bits extcpy[] = {
+ [WFull] = 0,
+ [Wsb] = BIT(Wsb) | BIT(Wsh) | BIT(Wsw),
+ [Wub] = BIT(Wub) | BIT(Wuh) | BIT(Wuw),
+ [Wsh] = BIT(Wsh) | BIT(Wsw),
+ [Wuh] = BIT(Wuh) | BIT(Wuw),
+ [Wsw] = BIT(Wsw),
+ [Wuw] = BIT(Wuw),
+ };
+ bits bext;
+ Tmp *t;
+ int64_t v;
+ int is0;
+
+ if (i->op == Ocopy)
+ return i->arg[0];
+
+ /* op identity value */
+ if (optab[i->op].hasid)
+ if (KBASE(i->cls) == 0) /* integer only - fp NaN! */
+ if (req(i->arg[1], con01[optab[i->op].idval]))
+ if (!optab[i->op].cmpeqwl || iswu1(fn, i->arg[0]))
+ return i->arg[0];
+
+ /* idempotent op with identical args */
+ if (optab[i->op].idemp)
+ if (req(i->arg[0], i->arg[1]))
+ return i->arg[0];
+
+ /* integer cmp with identical args */
+ if (optab[i->op].cmpeqwl || optab[i->op].cmplgtewl)
+ if (req(i->arg[0], i->arg[1]))
+ return con01[optab[i->op].eqval];
+
+ /* cmpeq/ne 0 with 0/non-0 inference from dominating jnz */
+ if (optab[i->op].cmpeqwl)
+ if (req(i->arg[1], con01[0]))
+ if (is0non0(fn, b, i->arg[0], argcls(i,0), &is0))
+ return con01[optab[i->op].eqval^is0^1];
+
+ /* redundant and mask */
+ if (i->op == Oand)
+ if (isconbits(fn, i->arg[1], &v))
+ if (((v+1) & v) == 0) /* v == 2^N-1 */
+ if (defwidthle(fn, i->arg[0], u64_wbits(v)))
+ return i->arg[0];
+
+ if (!isext(i->op) || rtype(i->arg[0]) != RTmp)
+ return R;
+ if (i->op == Oextsw || i->op == Oextuw)
+ if (i->cls == Kw)
+ return i->arg[0];
+
+ t = &fn->tmp[i->arg[0].val];
+ assert(KBASE(t->cls) == 0);
+ if (i->cls == Kl && t->cls == Kw)
+ return R;
+ bext = extcpy[t->width];
+ if ((BIT(Wsb + (i->op-Oextsb)) & bext) != 0)
+ return i->arg[0];
+
+ if (!isnarrowpar(fn, i->arg[0]))
+ if (usewidthle(fn, i->to, EXTW[i->op - Oextsb]))
+ return i->arg[0];
+ if (defwidthle(fn, i->arg[0], EXTMAXW[i->op - Oextsb]))
+ return i->arg[0];
+
+ return R;
}
diff --git a/fold.c b/fold.c
index 3ff0bc7..87e5824 100644
--- a/fold.c
+++ b/fold.c
@@ -1,22 +1,6 @@
#include "all.h"
-enum {
- Bot = -1, /* lattice bottom */
- Top = 0, /* lattice top (matches UNDEF) */
-};
-
-typedef struct Edge Edge;
-
-struct Edge {
- int dest;
- int dead;
- Edge *work;
-};
-
-static int *val;
-static Edge *flowrk, (*edge)[2];
-static Use **usewrk;
-static uint nuse;
+/* boring folding code */
static int
iscon(Con *c, int w, uint64_t k)
@@ -30,304 +14,6 @@ iscon(Con *c, int w, uint64_t k)
}
static int
-latval(Ref r)
-{
- switch (rtype(r)) {
- case RTmp:
- return val[r.val];
- case RCon:
- return r.val;
- default:
- die("unreachable");
- }
-}
-
-static int
-latmerge(int v, int m)
-{
- return m == Top ? v : (v == Top || v == m) ? m : Bot;
-}
-
-static void
-update(int t, int m, Fn *fn)
-{
- Tmp *tmp;
- uint u;
-
- m = latmerge(val[t], m);
- if (m != val[t]) {
- tmp = &fn->tmp[t];
- for (u=0; u<tmp->nuse; u++) {
- vgrow(&usewrk, ++nuse);
- usewrk[nuse-1] = &tmp->use[u];
- }
- val[t] = m;
- }
-}
-
-static int
-deadedge(int s, int d)
-{
- Edge *e;
-
- e = edge[s];
- if (e[0].dest == d && !e[0].dead)
- return 0;
- if (e[1].dest == d && !e[1].dead)
- return 0;
- return 1;
-}
-
-static void
-visitphi(Phi *p, int n, Fn *fn)
-{
- int v;
- uint a;
-
- v = Top;
- for (a=0; a<p->narg; a++)
- if (!deadedge(p->blk[a]->id, n))
- v = latmerge(v, latval(p->arg[a]));
- update(p->to.val, v, fn);
-}
-
-static int opfold(int, int, Con *, Con *, Fn *);
-
-static void
-visitins(Ins *i, Fn *fn)
-{
- int v, l, r;
-
- if (rtype(i->to) != RTmp)
- return;
- if (optab[i->op].canfold) {
- l = latval(i->arg[0]);
- if (!req(i->arg[1], R))
- r = latval(i->arg[1]);
- else
- r = CON_Z.val;
- if (l == Bot || r == Bot)
- v = Bot;
- else if (l == Top || r == Top)
- v = Top;
- else
- v = opfold(i->op, i->cls, &fn->con[l], &fn->con[r], fn);
- } else
- v = Bot;
- /* fprintf(stderr, "\nvisiting %s (%p)", optab[i->op].name, (void *)i); */
- update(i->to.val, v, fn);
-}
-
-static void
-visitjmp(Blk *b, int n, Fn *fn)
-{
- int l;
-
- switch (b->jmp.type) {
- case Jjnz:
- l = latval(b->jmp.arg);
- if (l == Bot) {
- edge[n][1].work = flowrk;
- edge[n][0].work = &edge[n][1];
- flowrk = &edge[n][0];
- }
- else if (iscon(&fn->con[l], 0, 0)) {
- assert(edge[n][0].dead);
- edge[n][1].work = flowrk;
- flowrk = &edge[n][1];
- }
- else {
- assert(edge[n][1].dead);
- edge[n][0].work = flowrk;
- flowrk = &edge[n][0];
- }
- break;
- case Jjmp:
- edge[n][0].work = flowrk;
- flowrk = &edge[n][0];
- break;
- case Jhlt:
- break;
- default:
- if (isret(b->jmp.type))
- break;
- die("unreachable");
- }
-}
-
-static void
-initedge(Edge *e, Blk *s)
-{
- if (s)
- e->dest = s->id;
- else
- e->dest = -1;
- e->dead = 1;
- e->work = 0;
-}
-
-static int
-renref(Ref *r)
-{
- int l;
-
- if (rtype(*r) == RTmp)
- if ((l=val[r->val]) != Bot) {
- *r = CON(l);
- return 1;
- }
- return 0;
-}
-
-/* require rpo, use, pred */
-void
-fold(Fn *fn)
-{
- Edge *e, start;
- Use *u;
- Blk *b, **pb;
- Phi *p, **pp;
- Ins *i;
- int t, d;
- uint n, a;
-
- val = emalloc(fn->ntmp * sizeof val[0]);
- edge = emalloc(fn->nblk * sizeof edge[0]);
- usewrk = vnew(0, sizeof usewrk[0], PHeap);
-
- for (t=0; t<fn->ntmp; t++)
- val[t] = Top;
- for (n=0; n<fn->nblk; n++) {
- b = fn->rpo[n];
- b->visit = 0;
- initedge(&edge[n][0], b->s1);
- initedge(&edge[n][1], b->s2);
- }
- initedge(&start, fn->start);
- flowrk = &start;
- nuse = 0;
-
- /* 1. find out constants and dead cfg edges */
- for (;;) {
- e = flowrk;
- if (e) {
- flowrk = e->work;
- e->work = 0;
- if (e->dest == -1 || !e->dead)
- continue;
- e->dead = 0;
- n = e->dest;
- b = fn->rpo[n];
- for (p=b->phi; p; p=p->link)
- visitphi(p, n, fn);
- if (b->visit == 0) {
- for (i=b->ins; i<&b->ins[b->nins]; i++)
- visitins(i, fn);
- visitjmp(b, n, fn);
- }
- b->visit++;
- assert(b->jmp.type != Jjmp
- || !edge[n][0].dead
- || flowrk == &edge[n][0]);
- }
- else if (nuse) {
- u = usewrk[--nuse];
- n = u->bid;
- b = fn->rpo[n];
- if (b->visit == 0)
- continue;
- switch (u->type) {
- case UPhi:
- visitphi(u->u.phi, u->bid, fn);
- break;
- case UIns:
- visitins(u->u.ins, fn);
- break;
- case UJmp:
- visitjmp(b, n, fn);
- break;
- default:
- die("unreachable");
- }
- }
- else
- break;
- }
-
- if (debug['F']) {
- fprintf(stderr, "\n> SCCP findings:");
- for (t=Tmp0; t<fn->ntmp; t++) {
- if (val[t] == Bot)
- continue;
- fprintf(stderr, "\n%10s: ", fn->tmp[t].name);
- if (val[t] == Top)
- fprintf(stderr, "Top");
- else
- printref(CON(val[t]), fn, stderr);
- }
- fprintf(stderr, "\n dead code: ");
- }
-
- /* 2. trim dead code, replace constants */
- d = 0;
- for (pb=&fn->start; (b=*pb);) {
- if (b->visit == 0) {
- d = 1;
- if (debug['F'])
- fprintf(stderr, "%s ", b->name);
- edgedel(b, &b->s1);
- edgedel(b, &b->s2);
- *pb = b->link;
- continue;
- }
- for (pp=&b->phi; (p=*pp);)
- if (val[p->to.val] != Bot)
- *pp = p->link;
- else {
- for (a=0; a<p->narg; a++)
- if (!deadedge(p->blk[a]->id, b->id))
- renref(&p->arg[a]);
- pp = &p->link;
- }
- for (i=b->ins; i<&b->ins[b->nins]; i++)
- if (renref(&i->to))
- *i = (Ins){.op = Onop};
- else {
- for (n=0; n<2; n++)
- renref(&i->arg[n]);
- if (isstore(i->op))
- if (req(i->arg[0], UNDEF))
- *i = (Ins){.op = Onop};
- }
- renref(&b->jmp.arg);
- if (b->jmp.type == Jjnz && rtype(b->jmp.arg) == RCon) {
- if (iscon(&fn->con[b->jmp.arg.val], 0, 0)) {
- edgedel(b, &b->s1);
- b->s1 = b->s2;
- b->s2 = 0;
- } else
- edgedel(b, &b->s2);
- b->jmp.type = Jjmp;
- b->jmp.arg = R;
- }
- pb = &b->link;
- }
-
- if (debug['F']) {
- if (!d)
- fprintf(stderr, "(none)");
- fprintf(stderr, "\n\n> After constant folding:\n");
- printfn(fn, stderr);
- }
-
- free(val);
- free(edge);
- vfree(usewrk);
-}
-
-/* boring folding code */
-
-static int
foldint(Con *res, int op, int w, Con *cl, Con *cr)
{
union {
@@ -516,7 +202,7 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
}
}
-static int
+static Ref
opfold(int op, int cls, Con *cl, Con *cr, Fn *fn)
{
Ref r;
@@ -524,12 +210,37 @@ opfold(int op, int cls, Con *cl, Con *cr, Fn *fn)
if (cls == Kw || cls == Kl) {
if (foldint(&c, op, cls == Kl, cl, cr))
- return Bot;
+ return R;
} else
foldflt(&c, op, cls == Kd, cl, cr);
if (!KWIDE(cls))
c.bits.i &= 0xffffffff;
r = newcon(&c, fn);
assert(!(cls == Ks || cls == Kd) || c.flt);
- return r.val;
+ return r;
+}
+
+/* used by GVN */
+Ref
+foldref(Fn *fn, Ins *i)
+{
+ Ref rr;
+ Con *cl, *cr;
+
+ if (rtype(i->to) != RTmp)
+ return R;
+ if (optab[i->op].canfold) {
+ if (rtype(i->arg[0]) != RCon)
+ return R;
+ cl = &fn->con[i->arg[0].val];
+ rr = i->arg[1];
+ if (req(rr, R))
+ rr = CON_Z;
+ if (rtype(rr) != RCon)
+ return R;
+ cr = &fn->con[rr.val];
+
+ return opfold(i->op, i->cls, cl, cr, fn);
+ }
+ return R;
}
diff --git a/gcm.c b/gcm.c
new file mode 100644
index 0000000..515d41f
--- /dev/null
+++ b/gcm.c
@@ -0,0 +1,507 @@
+#include "all.h"
+
+#define NOBID (-1u)
+
+/* ins can trap at runtime */
+static int
+istrapping(Fn *fn, Ins *i)
+{
+ int64_t v;
+
+ if (KBASE(i->cls) == 0)
+ if (INRANGE(i->op, Odiv, Ourem))
+ if (!isconbits(fn, i->arg[1], &v) || v == 0)
+ return 1;
+ return 0;
+}
+
+/* fixed ins that can be eliminated if unused */
+static int
+canelim(Fn *fn, Ins *i)
+{
+ return isload(i->op) || isalloc(i->op) || istrapping(fn, i);
+}
+
+/* ins must stay in def blk */
+int
+isfixed(Fn *fn, Ins *i)
+{
+ return optab[i->op].ispinned || istrapping(fn, i);
+}
+
+static uint earlyins(Fn *, Blk *, Ins *);
+
+/* return earlybid of ref def ins */
+static uint
+schedearly(Fn *fn, Ref r)
+{
+ Tmp *t;
+ Blk *b;
+
+ if (rtype(r) != RTmp)
+ return 0 /* root/start */;
+
+ t = &fn->tmp[r.val];
+ if (t->gcmbid != NOBID)
+ return t->gcmbid; /* already visited/visiting */
+
+ b = fn->rpo[t->bid];
+ if (t->def) {
+ /* def is an ins */
+ assert(b->ins <= t->def && t->def < &b->ins[b->nins]);
+ t->gcmbid = 0; /* mark visiting root/start blk */
+ t->gcmbid = earlyins(fn, b, t->def); /* schedule ins input defs */
+ } else {
+ /* def is a phi - it stays in its def blk */
+ t->gcmbid = t->bid;
+ }
+
+ return t->gcmbid;
+}
+
+/* return deepest domdpth bid of arg defs */
+static uint
+earlyins(Fn *fn, Blk *b, Ins* i)
+{
+ uint earlybid, arg1earlybid;
+
+ earlybid = schedearly(fn, i->arg[0]);
+ assert(earlybid != NOBID);
+ arg1earlybid = schedearly(fn, i->arg[1]);
+ assert(arg1earlybid != NOBID);
+ if (fn->rpo[earlybid]->domdpth < fn->rpo[arg1earlybid]->domdpth) {
+ assert(dom(fn->rpo[earlybid], fn->rpo[arg1earlybid]));
+ earlybid = arg1earlybid;
+ }
+
+ /* fixed ins remain in their defining blk */
+ return isfixed(fn, i) ? b->id : earlybid;
+}
+
+static void
+earlyblk(Fn *fn, uint bid)
+{
+ Blk *b;
+ Phi *p;
+ Ins *i;
+ uint n;
+
+ b = fn->rpo[bid];
+ for (p = b->phi; p; p = p->link)
+ for (n = 0; n < p->narg; n++)
+ schedearly(fn, p->arg[n]);
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ if (isfixed(fn, i))
+ earlyins(fn, b, i);
+ schedearly(fn, b->jmp.arg);
+}
+
+/* least common ancestor in dom tree */
+static uint
+lcabid(Fn *fn, uint bid1, uint bid2)
+{
+ Blk *b;
+
+ if (bid1 == NOBID)
+ return bid2;
+ if (bid2 == NOBID)
+ return bid1;
+
+ b = lca(fn->rpo[bid1], fn->rpo[bid2]);
+ assert(b);
+ return b->id;
+}
+
+static uint
+bestbid(Fn *fn, uint earlybid, uint latebid)
+{
+ Blk *curb, *earlyb, *bestb;
+
+ if (latebid == NOBID)
+ return NOBID; /* unused */
+
+ assert(earlybid != NOBID);
+
+ earlyb = fn->rpo[earlybid];
+ bestb = curb = fn->rpo[latebid];
+ assert(dom(earlyb, curb));
+
+ while (curb != earlyb) {
+ curb = curb->idom;
+ if (curb->loop < bestb->loop)
+ bestb = curb;
+ }
+ return bestb->id;
+}
+
+static uint lateins(Fn *, Blk *, Ins *, Ref r);
+static uint latephi(Fn *, Phi *, Ref r);
+static uint latejmp(Blk *, Ref r);
+
+/* return lca bid of ref uses */
+static uint
+schedlate(Fn *fn, Ref r)
+{
+ Tmp *t;
+ Blk *b;
+ Use *u;
+ uint earlybid;
+ uint latebid;
+ uint uselatebid;
+
+ if (rtype(r) != RTmp)
+ return NOBID;
+
+ t = &fn->tmp[r.val];
+ if (t->visit)
+ return t->gcmbid; /* already visited/visiting */
+
+ t->visit = 1; /* mark visiting/visited */
+ earlybid = t->gcmbid;
+ if (earlybid == NOBID)
+ return NOBID; /* not used */
+ t->gcmbid = t->bid; /* t->gcmbid is now latebid */
+
+ latebid = NOBID;
+ for (u = t->use; u < &t->use[t->nuse]; u++) {
+ assert(u->bid < fn->nblk);
+ b = fn->rpo[u->bid];
+ switch (u->type) {
+ case UXXX:
+ die("unreachable");
+ break;
+ case UPhi:
+ uselatebid = latephi(fn, u->u.phi, r);
+ break;
+ case UIns:
+ uselatebid = lateins(fn, b, u->u.ins, r);
+ break;
+ case UJmp:
+ uselatebid = latejmp(b, r);
+ break;
+ }
+ latebid = lcabid(fn, latebid, uselatebid);
+ }
+
+ /* phis stay in their def blk */
+ if (t->def) {
+ /* allow elimination of unused load/alloc/trapping ins */
+ if (latebid == NOBID && canelim(fn, t->def))
+ t->gcmbid = NOBID;
+ /* ... otherwise fixed ins stay in defining blk */
+ else if(!isfixed(fn, t->def))
+ t->gcmbid = bestbid(fn, earlybid, latebid);
+ }
+
+ return t->gcmbid;
+}
+
+/* return lca bid of uses, or NOBID if no active uses */
+static uint
+lateins(Fn *fn, Blk *b, Ins *i, Ref r)
+{
+ uint latebid;
+
+ assert(i->op == Onop || req(i->arg[0], r) || req(i->arg[1], r));
+ if (i->op == Onop)
+ return NOBID; /* already eliminated */
+
+ assert(b->ins <= i && i < &b->ins[b->nins]);
+
+ latebid = schedlate(fn, i->to);
+ /* allow elimination of unused load/alloc/trapping ins */
+ if (latebid == NOBID)
+ if (canelim(fn, i))
+ return NOBID;
+ /* ... otherwise fixed ins stay in defining blk */
+ return isfixed(fn, i) ? b->id : latebid;
+}
+
+static uint
+latephi(Fn *fn, Phi *p, Ref r)
+{
+ uint n;
+ uint latebid;
+
+ if (!p->narg)
+ return NOBID; /* marked as unused */
+
+ latebid = NOBID;
+ for (n = 0; n < p->narg; n++)
+ if (req(p->arg[n], r))
+ latebid = lcabid(fn, latebid, p->blk[n]->id);
+
+ assert(latebid != NOBID);
+ return latebid;
+}
+
+static uint
+latejmp(Blk *b, Ref r)
+{
+ if (req(b->jmp.arg, R))
+ return NOBID;
+ else {
+ assert(req(b->jmp.arg, r));
+ return b->id;
+ }
+}
+
+static void
+lateblk(Fn *fn, uint bid)
+{
+ Blk *b;
+ Phi **pp;
+ Ins *i;
+
+ b = fn->rpo[bid];
+ for (pp = &b->phi; *(pp);)
+ if (schedlate(fn, (*pp)->to) == NOBID) {
+ /* unused */
+ (*pp)->narg = 0; /* mark unused */
+ *pp = (*pp)->link; /* remove phi */
+ } else
+ pp = &(*pp)->link;
+
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ if (isfixed(fn, i))
+ lateins(fn, b, i, i->arg[0]);
+}
+
+static void
+addgcmins(Fn *fn, Ins *vins, uint nins)
+{
+ Ins *i;
+ Tmp *t;
+ Blk *b;
+
+ for (i = vins; i < &vins[nins]; i++) {
+ assert(rtype(i->to) == RTmp);
+ t = &fn->tmp[i->to.val];
+ b = fn->rpo[t->gcmbid];
+ addins(&b->ins, &b->nins, i);
+ }
+}
+
+/* remove unused instructions */
+/* move instructions to (end of) target block */
+/* use-before-def is fixed up afterwards */
+static void
+gcmmove(Fn *fn)
+{
+ Tmp *t;
+ Ins *vins, *i;
+ uint nins;
+
+ nins = 0;
+ vins = vnew(nins, sizeof vins[0], PFn);
+
+ for (t=&fn->tmp[Tmp0]; t < &fn->tmp[fn->ntmp]; t++) {
+ if (t->def == 0)
+ continue;
+ if (t->bid == t->gcmbid)
+ continue;
+ i = t->def;
+ if (isfixed(fn, i) && !canelim(fn, i))
+ continue;
+ assert(rtype(i->to) == RTmp);
+ assert(t == &fn->tmp[i->to.val]);
+ if (t->gcmbid != NOBID)
+ addins(&vins, &nins, i);
+ *i = (Ins){.op = Onop};
+ }
+
+ addgcmins(fn, vins, nins);
+}
+
+static void
+schedins(Fn *fn, Blk *b, Ins *i0, Ins **pvins, uint *pnins)
+{
+ Ins *i, *i1;
+ Tmp *t;
+ uint na;
+
+ if (i0->op == Onop)
+ return;
+ /* arg...call have to stay together */
+ /* TODO - sel0...sel1 too */
+ for (i1 = i0; isarg(i1->op); i1++) {}
+ for (i = i0; i <= i1; i++)
+ for (na = 0; na < 2; na++) {
+ if (rtype(i->arg[na]) != RTmp)
+ continue;
+ t = &fn->tmp[i->arg[na].val];
+ /* def in different blk, or phi in this blk */
+ if (t->bid != b->id || t->def == 0)
+ continue;
+ /* already scheduled */
+ if (t->def->op == Onop) {
+ continue;
+ }
+ schedins(fn, b, t->def, pvins, pnins);
+ }
+ for (i = i0; i <= i1; i++) {
+ addins(pvins, pnins, i);
+ *i = (Ins){.op = Onop};
+ }
+}
+
+static void
+fixbub4d(Fn *fn, Blk *b, Ins **pvins)
+{
+ Ins *i;
+ uint nins;
+
+ nins = 0;
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ schedins(fn, b, i, pvins, &nins);
+ idup(b, *pvins, nins);
+}
+
+static void
+fixub4d(Fn *fn)
+{
+ Blk *b;
+ Ins *vins; // TODO insb
+
+ vins = vnew(0, sizeof vins[0], PFn);
+ for (b = fn->start; b; b = b->link)
+ fixbub4d(fn, b, &vins);
+}
+
+static int
+iskladdcon(Fn *fn, Ins *i, int64_t *v)
+{
+ if (i->op == Oadd)
+ if (i->cls == Kl)
+ if (rtype(i->arg[0]) == RTmp)
+ if (isconbits(fn, i->arg[1], v))
+ return 1;
+ return 0;
+}
+
+static void
+ireassoc(Fn *fn, Blk *b, Ins *i, Ins **pvins, uint *pnins)
+{
+ Blk *b2;
+ Tmp *t, *t2;
+ Use *u;
+ Ref r2;
+ int64_t v;
+ int x;
+
+ assert(b->ins <= i && i < &b->ins[b->nins]);
+ if (!iscmp(i->op, &x, &x))
+ if (!iskladdcon(fn, i, &v))
+ return;
+
+ assert(rtype(i->to) == RTmp);
+ t = &fn->tmp[i->to.val];
+ for (u = t->use; u < &t->use[t->nuse]; u++) {
+ if (u->type == UPhi)
+ continue;
+ b2 = fn->rpo[u->bid];
+ addins(pvins, pnins, i);
+ /* careful, can move fn->tmp */
+ r2 = newtmp("rea", t->cls, fn);
+ t = &fn->tmp[i->to.val];
+ t2 = &fn->tmp[r2.val];
+ t2->gcmbid = u->bid;
+ (*pvins)[(*pnins)-1].to = r2;
+ if (u->type == UIns) {
+ assert(req(u->u.ins->arg[0], i->to)
+ || req(u->u.ins->arg[1], i->to));
+ if (req(u->u.ins->arg[0], i->to))
+ u->u.ins->arg[0] = r2;
+ if (req(u->u.ins->arg[1], i->to))
+ u->u.ins->arg[1] = r2;
+ } else {
+ assert(u->type == UJmp);
+ assert(req(b2->jmp.arg, i->to));
+ b2->jmp.arg = r2;
+ }
+ }
+}
+
+/* Redistribute trivial ops to point of use. */
+/* Reduces register pressure. */
+/* needs rpo, use; breaks use */
+void
+reassoc(Fn *fn)
+{
+ Blk *b;
+ Ins *vins, *i;
+ uint nins;
+
+ nins = 0;
+ vins = vnew(nins, sizeof vins[0], PFn);
+
+ /* identify trivial ins */
+ for (b = fn->start; b; b = b->link) {
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ ireassoc(fn, b, i, &vins, &nins);
+ }
+
+ addgcmins(fn, vins, nins);
+}
+
+static void
+cleartmps(Fn *fn)
+{
+ Tmp *t;
+
+ for (t=&fn->tmp[Tmp0]; t < &fn->tmp[fn->ntmp]; t++) {
+ t->visit = 0;
+ t->gcmbid = NOBID;
+ t->gcminsn = -1u; // TODO - get rid
+ }
+}
+
+/* https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf */
+/* require use dom */
+/* maintains rpo pred dom */
+/* breaks use */
+void
+gcm(Fn *fn)
+{
+ uint bid;
+
+ /* fprintf(stderr, "\n\nBefore gcm:\n\n"); */
+ /* printfn(fn, stderr); */
+
+ filldomdpth(fn);
+ fillloop(fn);
+
+ cleartmps(fn);
+ for (bid=0; bid<fn->nblk; bid++)
+ earlyblk(fn, bid);
+ for (bid=0; bid<fn->nblk; bid++)
+ lateblk(fn, bid);
+
+ /* fprintf(stderr, "\n\nBefore gcmmove/fixub4d:\n\n"); */
+ /* printfn(fn, stderr); */
+
+ gcmmove(fn);
+ cleartmps(fn);
+ /* filluse(fn); */
+ /* fixub4d(fn); */
+
+ /* fprintf(stderr, "\n\nAfter gcmmove/fixub4d:\n\n"); */
+ /* printfn(fn, stderr); */
+
+ //fillcfg(fn);
+ filluse(fn);
+ reassoc(fn);
+
+ /* cleartmps(fn); */
+ filluse(fn);
+ fixub4d(fn);
+
+ /* delete (now) unused ins - already done later??? */
+ filluse(fn);
+ nopunused(fn);
+
+ if (debug['G']) {
+ fprintf(stderr, "\n> After GCM:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/gvn.c b/gvn.c
new file mode 100644
index 0000000..20c8edd
--- /dev/null
+++ b/gvn.c
@@ -0,0 +1,748 @@
+#include "all.h"
+
+/* literal constants 0, 1 */
+Ref con01[2];
+
+static inline uint
+mix(uint x0, uint x1)
+{
+ return x0 + 17*x1;
+}
+
+static inline uint
+rhash(Ref r)
+{
+ return mix(r.type, r.val);
+}
+
+static uint
+ihash(Ins *i)
+{
+ uint a0h, a1h, ah, h;
+
+ a0h = rhash(i->arg[0]);
+ a1h = rhash(i->arg[1]);
+ ah = mix(a0h, a1h);
+ h = mix(i->cls, i->op);
+ h = mix(h, ah);
+
+ return h;
+
+}
+
+static int
+ieq(Ins *ia, Ins *ib)
+{
+ if (ia->cls == ib->cls)
+ if (ia->op == ib->op)
+ if (req(ia->arg[0], ib->arg[0]))
+ if (req(ia->arg[1], ib->arg[1]))
+ return 1;
+ return 0;
+}
+
+static Ins** gvntbl;
+static uint gvntbln;
+static uint lookupn;
+static uint proben;
+static uint maxproben;
+
+static Ins *
+gvndup(Ins *i, int insert) {
+ uint idx, n;
+ Ins *i2;
+
+ lookupn++;
+
+ idx = ihash(i) % gvntbln;
+ for (n = 1;; n++) {
+ proben++;
+ if (n > maxproben)
+ maxproben = n;
+ i2 = gvntbl[idx];
+ if (!i2)
+ break;
+ if (ieq(i, i2))
+ return i2;
+
+ idx++;
+ if (gvntbln <= idx)
+ idx = 0;
+ }
+ /* not found */
+ if (insert) {
+ gvntbl[idx] = i;
+ }
+ return 0;
+}
+
+static void
+replaceref(Ref *r, Ref r1, Ref r2)
+{
+ if (req(*r, r1))
+ *r = r2;
+}
+
+static void
+replacepuse(Phi *p, Ref r1, Ref r2)
+{
+ Ref *a;
+
+ for (a = p->arg; a < &p->arg[p->narg]; a++)
+ replaceref(a, r1, r2);
+}
+
+static void
+replaceiuse(Ins *i, Ref r1, Ref r2)
+{
+ replaceref(&i->arg[0], r1, r2);
+ replaceref(&i->arg[1], r1, r2);
+}
+
+static void
+replacejuse(Blk* b, Ref r1, Ref r2)
+{
+ replaceref(&b->jmp.arg, r1, r2);
+}
+
+static void
+replaceuse(Fn *fn, Use* u, Ref r1, Ref r2)
+{
+ Tmp *t2;
+ Blk *b;
+
+ t2 = rtype(r2) == RTmp ? &fn->tmp[r2.val] : 0;
+ b = fn->rpo[u->bid];
+
+ switch (u->type) {
+ case UXXX:
+ die("unreachable");
+ break;
+ case UPhi:
+ replacepuse(u->u.phi, r1, r2);
+ if (t2)
+ adduse(t2, UPhi, b, u->u.phi);
+ break;
+ case UIns:
+ replaceiuse(u->u.ins, r1, r2);
+ if (t2)
+ adduse(t2, UIns, b, u->u.ins);
+ break;
+ case UJmp:
+ replacejuse(fn->rpo[u->bid], r1, r2);
+ if (t2)
+ adduse(t2, UJmp, b);
+ break;
+ }
+}
+
+static void
+replaceuses(Fn *fn, Ref r1, Ref r2)
+{
+ Tmp *t1;
+ Use *u;
+
+ assert(rtype(r1) == RTmp);
+ t1 = &fn->tmp[r1.val];
+
+ for (u = t1->use; u < &t1->use[t1->nuse]; u++)
+ replaceuse(fn, u, r1, r2);
+
+ t1->nuse = 0;
+}
+
+static void
+rmuse(Fn *fn, Blk *b, uint ty, Ins *i, Phi *p, Ref r, int strict)
+{
+ Tmp *t;
+ Use *u;
+ int found, rm;
+
+ if (rtype(r) != RTmp)
+ return;
+ found = 0;
+ t = &fn->tmp[r.val];
+ for (u = t->use; u < &t->use[t->nuse];) {
+ rm = 0;
+ if (u->type == ty) {
+ switch (ty) {
+ case UXXX:
+ die("unreachable");
+ break;
+ case UIns:
+ assert(p == 0);
+ assert(i);
+ rm = u->u.ins == i;
+ break;
+ case UPhi:
+ assert(i == 0);
+ assert(p);
+ rm = u->u.phi == p;
+ break;
+ case UJmp:
+ assert(i == 0);
+ assert(p == 0);
+ rm = u->bid == b->id;
+ break;
+ default:
+ die("unreachable");
+ break;
+ }
+ }
+ if (rm) {
+ found++;
+ assert(u < &t->use[t->nuse]);
+ assert(u->bid == b->id);
+ /* careful - deleting below iterator */
+ memcpy(u, u+1, ((t->nuse) - ((u+1)-t->use)) * sizeof u[0]);
+ t->nuse--;
+ } else
+ u++;
+ }
+ if (strict)
+ assert(found);
+}
+
+static int
+phieq(Phi *pa, Phi *pb)
+{
+ uint n;
+
+ assert(pa->narg == pb->narg);
+ for (n=0; n<pa->narg; n++) {
+ if (!req(pa->arg[n], phiarg(pb, pa->blk[n])))
+ return 0;
+ }
+ return 1;
+}
+
+static void
+killphi(Fn *fn, Blk *b, Phi **pp, Ref r)
+{
+ Phi *p;
+ uint n;
+
+ p = *pp;
+ replaceuses(fn, p->to, r);
+ assert(b->npred == p->narg);
+ for (n = 0; n < p->narg; n++)
+ rmuse(fn, b, UPhi, 0, p, p->arg[n], 0/*!strict TODO dups*/);
+ p->narg = 0; /* mark as unused - TODO should not be necessary with rmuse*/
+ *pp = p->link;
+}
+
+static void
+ifphiopt(Fn *fn, Blk *b)
+{
+ Blk *ifb, *thenb, *elseb, *tmp;
+ Phi *p, **pp;
+ Tmp *t;
+ Ref argt, argf, r1;
+ int k, x, caninv;
+
+ /* is b the join blk of an if[-then][-else] graphlet? */
+ if (!ifjoin(b, &ifb, &thenb, &elseb))
+ return;
+
+ assert(ifb->jmp.type == Jjnz);
+ assert(ifb->s1 == b || (ifb->s1 == thenb && thenb->s1 == b));
+ assert(ifb->s2 == b || (ifb->s2 == elseb && elseb->s1 == b));
+ if (!iswu1(fn, ifb->jmp.arg))
+ return; /* no opportunity */
+
+ caninv = 1;
+ for (p = b->phi; p; p = p->link)
+ if (req(phiarg(p, elseb), con01[0])) {
+ caninv = 0;
+ break;
+ }
+
+ /* phi bool 1/0 "copy" */
+ for (pp = &b->phi; *pp;) {
+ p = *pp;
+ assert(p->narg == 2);
+ argt = phiarg(p, thenb);
+ argf = phiarg(p, elseb);
+ /* jnz jmp.arg in phi is 1/0 */
+ if (req(argt, ifb->jmp.arg)) {
+ if (!req(argf, ifb->jmp.arg))
+ rmuse(fn, b, UPhi, 0, p, argt, 1/*strict*/);
+ argt = p->arg[phiargn(p, thenb)] = con01[1];
+ }
+ if (req(argf, ifb->jmp.arg)) {
+ rmuse(fn, b, UPhi, 0, p, argf, 1/*strict*/);
+ argf = p->arg[phiargn(p, elseb)] = con01[0];
+ }
+ /* prefer 0 as argf and/or 1 as argt, so try to invert the cmp */
+ if (caninv &&
+ ((req(argt, con01[0]) && !req(argf, con01[0])) ||
+ (req(argf, con01[1]) && !req(argt, con01[1])))) {
+ assert(rtype(ifb->jmp.arg) == RTmp);
+ t = &fn->tmp[ifb->jmp.arg.val];
+ if (t->nuse == 1)
+ if (t->def && iscmp(t->def->op, &k, &x) && KBASE(k) == 0) {
+ assert(t->use[0].type == UJmp);
+ assert(t->bid == ifb->id);
+ t->def->op = invcmpwl(t->def->op);
+ tmp = ifb->s1;
+ ifb->s1 = ifb->s2;
+ ifb->s2 = tmp;
+ r1 = argt; argt = argf; argf = r1;
+ caninv = 0; /* only once */
+ }
+ }
+ if (req(argt, con01[1]) && req(argf, con01[0])) {
+ killphi(fn, b, pp, ifb->jmp.arg);
+ caninv = 0; /* used already */
+ continue;
+ }
+ pp = &(*pp)->link;
+ }
+}
+
+/* phi "copy" - singleton or all args identical */
+static void
+copyphielim(Fn *fn, Blk *b) {
+ Phi *p, **pp;
+ uint n;
+
+ for (pp = &b->phi; *pp;) {
+ p = *pp;
+ assert(p->narg == b->npred);
+ for (n = 0; n < p->narg-1; n++) {
+ if (!req(p->arg[n], p->arg[n+1]))
+ goto Skip;
+ }
+ killphi(fn, b, pp, p->arg[0]);
+ continue;
+ Skip:;
+ pp = &(*pp)->link;
+ }
+}
+
+/* redundant phis */
+static void
+dupphielim(Fn *fn, Blk *b) {
+ Phi *p, *p2, **pp;
+
+ /* O(N^2.M) but meh */
+ for (pp = &b->phi; *pp;) {
+ p = *pp;
+ assert(p->narg == b->npred);
+ for (p2 = p->link; p2; p2 = p2->link) {
+ assert(p2->narg == b->npred);
+ if (phieq(p, p2)) {
+ killphi(fn, b, pp, p2->to);
+ goto Skip;
+ }
+ }
+ pp = &(*pp)->link;
+ Skip:;
+ }
+ return;
+}
+
+static void
+dedupphis(Fn *fn, Blk *b) {
+ ifphiopt(fn, b);
+ copyphielim(fn,b);
+ dupphielim(fn, b);
+}
+
+static int
+rcmp(Ref a, Ref b)
+{
+ if (rtype(a) != rtype(b))
+ return rtype(a)-rtype(b);
+ return a.val - b.val;
+}
+
+static void
+normins(Fn *fn, Ins *i)
+{
+ uint n;
+ int64_t v;
+ Ref r;
+
+ /* truncate constant bits to 32 bits for "s"/w" uses */
+ for (n = 0; n < 2; n++) {
+ if (!KWIDE(argcls(i, n)))
+ if (isconbits(fn, i->arg[n], &v))
+ if ((v & 0xffffffff) != v)
+ i->arg[n] = getcon(v & 0xffffffff, fn);
+ }
+ /* order arg[0] <= arg[1] for commutative ops, preferring RTmp in arg[0] */
+ if (optab[i->op].commutes)
+ if (rcmp(i->arg[0], i->arg[1]) > 0) {
+ r = i->arg[1];
+ i->arg[1] = i->arg[0];
+ i->arg[0] = r;
+ }
+}
+
+static Ref
+negcon(Fn *fn, int cls, Ref r)
+{
+ int64_t v, v1;
+
+ assert(isconbits(fn, r, &v));
+ assert(KBASE(cls) == 0);
+ v1 = -v;
+ if (cls == Kw)
+ v1 = ((int64_t)(-(int32_t)v)) & 0xffffffff;
+ return getcon(v1, fn);
+}
+
+static void
+assoccon(Fn *fn, Blk *b, Ins *i1)
+{
+ Tmp *t2;
+ Ins *i2, i;
+ Ref r0, r1, rc;
+ int op1, op2;
+ int64_t v1, v2, vc;
+
+ op1 = i1->op;
+ if (op1 == Osub)
+ op1 = Oadd;
+ if (!optab[op1].assoc || KBASE(i1->cls) != 0 || rtype(i1->arg[0]) != RTmp
+ || !isconbits(fn, i1->arg[1], &v1) || (optab[op1].hasid && v1 == optab[op1].idval))
+ return;
+ t2 = &fn->tmp[i1->arg[0].val];
+ if (t2->def == 0)
+ return;
+ i2 = t2->def;
+ op2 = i2->op;
+ if (op2 == Osub)
+ op2 = Oadd;
+ if (op1 != op2 || rtype(i2->arg[0]) != RTmp || !isconbits(fn, i2->arg[1], &v2))
+ return;
+ assert(KBASE(i2->cls) == 0);
+ assert(i1->cls == Kl || i2->cls == Kw);
+ r0 = i1->arg[1];
+ if (i1->op == Osub)
+ r0 = negcon(fn, i1->cls, r0);
+ r1 = i2->arg[1];
+ if (i2->op == Osub)
+ r1 = negcon(fn, i2->cls, r1);
+ i = (Ins){.to = i2->to, .op = op2, .cls = i2->cls, .arg = {r0, r1}};
+ rc = foldref(fn, &i);
+ assert(isconbits(fn, rc, &vc));
+ if (op1 == Oadd) {
+ if (i2->cls == Kw) {
+ if ((int32_t)vc < 0) {
+ op1 = Osub;
+ rc = negcon(fn, Kw, rc);
+ }
+ } else {
+ assert(i2->cls == Kl);
+ if (vc < 0) {
+ op1 = Osub;
+ rc = negcon(fn, Kl, rc);
+ }
+ }
+ }
+ i1->op = op1;
+ rmuse(fn, b, UIns, i1, 0, i1->arg[0], 1/*strict*/);
+ i1->arg[0] = i2->arg[0];
+ adduse(&fn->tmp[i1->arg[0].val], UIns, b, i1);
+ i1->arg[1] = rc;
+}
+
+static void
+killins(Fn *fn, Blk *b, Ins *i, Ref r1, Ref r2)
+{
+ replaceuses(fn, r1, r2);
+ rmuse(fn, b, UIns, i, 0, i->arg[0], 1/*strict*/);
+ if (!req(i->arg[0], i->arg[1]))
+ rmuse(fn, b, UIns, i, 0, i->arg[1], 1/*strict*/);
+ *i = (Ins){.op = Onop};
+}
+
+static void
+dedupi(Fn *fn, Blk *b, Ins *i)
+{
+ Ref r2;
+ Ins *i2;
+
+ if (i->op == Onop || i->op == Odbgloc)
+ return;
+
+ normins(fn, i);
+
+ if (optab[i->op].ispinned)
+ return;
+ assert(rtype(i->to) == RTmp);
+
+ /* merge associative ops with constant arg[1] */
+ assoccon(fn, b, i);
+
+ /* effective copy? */
+ r2 = copyref(fn, b, i);
+ if (!req(r2, R)) {
+ killins(fn, b, i, i->to, r2);
+ return;
+ }
+
+ /* effective constant? */
+ r2 = foldref(fn, i);
+ if (!req(r2, R)) {
+ killins(fn, b, i, i->to, r2);
+ return;
+ }
+
+ /* do not dedup (trapping) ins that GCM will not move */
+ if (isfixed(fn, i))
+ return;
+
+ /* duplicate? */
+ i2 = gvndup(i, 1);
+ if (i2) {
+ killins(fn, b, i, i->to, i2->to);
+ return;
+ }
+}
+
+static void
+dedupins(Fn *fn, Blk *b)
+{
+ Ins *i;
+
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ dedupi(fn, b, i);
+}
+
+int
+cmpeq0def(Fn *fn, Ref r, Ref *arg0, int *cls, int *eqval)
+{
+ Ins *i;
+
+ if (rtype(r) != RTmp)
+ return 0;
+ i = fn->tmp[r.val].def;
+ if (i)
+ if (optab[i->op].cmpeqwl)
+ if (req(i->arg[1], con01[0])) {
+ *arg0 = i->arg[0];
+ *cls = argcls(i, 0);
+ *eqval = optab[i->op].eqval;
+ return 1;
+ }
+ return 0;
+}
+
+static int
+branchdom(Fn *fn, Blk *bif, Blk *bbr1, Blk *bbr2, Blk *b)
+{
+ if (bif->jmp.type == Jjnz)
+ if (b != bif)
+ if (dom(bbr1, b))
+ if (!reachesnotvia(fn, bbr2, b, bif))
+ return 1;
+ return 0;
+}
+
+static int
+dom0non0(Fn *fn, Blk *bdom, Blk *b, int *is0)
+{
+ if (branchdom(fn, bdom, bdom->s1, bdom->s2, b)) {
+ *is0 = 0;
+ return 1;
+ }
+ if (branchdom(fn, bdom, bdom->s2, bdom->s1, b)) {
+ *is0 = 1;
+ return 1;
+ }
+ return 0;
+}
+
+/* infer 0/non-0 value from dominating jnz */
+int
+is0non0(Fn *fn, Blk *b, Ref r, int cls, int *is0)
+{
+ Blk *bdom;
+ Ref arg0;
+ int cls1, eqval;
+
+ for (bdom = b->idom; bdom; bdom = bdom->idom) {
+ if (bdom->jmp.type != Jjnz)
+ continue;
+ if (cls == Kw)
+ if (req(r, bdom->jmp.arg))
+ if (dom0non0(fn, bdom, b, is0))
+ return 1;
+ if (cmpeq0def(fn, bdom->jmp.arg, &arg0, &cls1, &eqval))
+ if (cls == cls1)
+ if (req(r, arg0))
+ if (dom0non0(fn, bdom, b, is0)) {
+ *is0 = *is0 ^ eqval;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+usecls(Use *u, Ref r)
+{
+ switch (u->type) {
+ case UXXX: break;
+ case UIns:
+ /* safe to take arg[0] cls if both args == r, even for store */
+ if (req(u->u.ins->arg[0], r))
+ return argcls(u->u.ins, 0);
+ if (req(u->u.ins->arg[1], r))
+ return argcls(u->u.ins, 1);
+ break;
+ case UPhi: return u->u.phi->cls;
+ case UJmp: return Kw;
+ }
+ die("unreachable");
+}
+
+static void
+propjnz0(Fn *fn, Blk *bif, Blk *s0, Blk *snon0, Ref r, int cls)
+{
+ Blk *b;
+ Tmp *t;
+ Use *u;
+
+ if (s0->npred != 1 || rtype(r) != RTmp)
+ return;
+ t = &fn->tmp[r.val];
+ for (u = t->use; u < &t->use[t->nuse]; u++) {
+ b = fn->rpo[u->bid];
+ if (usecls(u, r) == cls)
+ if (branchdom(fn, bif, s0, snon0, b))
+ replaceuse(fn, u, r, con01[0]);
+ }
+}
+
+static void
+dedupjmp(Fn *fn, Blk *b) {
+ Blk *s1s2[2];
+ int64_t v;
+ Ref arg0;
+ int cls, eqval, is0;
+
+ if (b->jmp.type != Jjnz)
+ return;
+
+ /* propagate jmp arg as 0 thru s2 */
+ propjnz0(fn, b, b->s2, b->s1, b->jmp.arg, Kw);
+ /* propagate cmp eq/ne 0 def of jmp arg as 0 */
+ s1s2[0] = b->s1; s1s2[1] = b->s2;
+ if (cmpeq0def(fn, b->jmp.arg, &arg0, &cls, &eqval))
+ propjnz0(fn, b, s1s2[eqval^1], s1s2[eqval], arg0, cls);
+
+ /* collapse trivial/constant jnz to jmp */
+ v = 1;
+ is0 = 0;
+ if (b->s1 == b->s2
+ || isconbits(fn, b->jmp.arg, &v)
+ || is0non0(fn, b, b->jmp.arg, Kw, &is0)) {
+ if (v == 0 || is0)
+ b->s1 = b->s2;
+ /* we later move active ins out of dead blks */
+ b->s2 = 0;
+ b->jmp.type = Jjmp;
+ rmuse(fn, b, UJmp, 0, 0, b->jmp.arg, 1/*strict*/);
+ b->jmp.arg = R;
+ return;
+ }
+}
+
+/* rebuild rpo pred use */
+/* careful not to lose active ins in dead blks */
+static void
+rebuildcfg(Fn *fn) {
+ uint n, prevnblk;
+ Blk **prevrpo;
+ Blk *b;
+ Ins *i;
+
+ prevnblk = fn->nblk;
+ prevrpo = emalloc(prevnblk * sizeof prevrpo[0]);
+ memcpy(prevrpo, fn->rpo, prevnblk * sizeof prevrpo[0]);
+
+ fillcfg(fn); // TODO - OK?
+
+ for (n=0; n<prevnblk; n++) {
+ b = prevrpo[n];
+ if (b->id == -1u) {
+ assert(b != fn->start);
+ /* blk unreachable after GVN */
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ if (i->op != Onop)
+ if (!optab[i->op].ispinned)
+ if (gvndup(i, 0) == i)
+ /* (possibly) active ins - add to start blk */
+ addins(&fn->start->ins, &fn->start->nins, i);
+ }
+ }
+ free(prevrpo);
+}
+
+/* https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf */
+/* require rpo pred ssa use */
+/* recreates rpo */
+/* breaks pred use dom ssa (GCM fixes ssa) */
+void
+gvn(Fn *fn)
+{
+ uint n, nins;
+ Blk *b;
+ Phi *p;
+
+ /* facilitate jnz simplification */
+ blkmerge(fn);
+ fillcfg(fn);
+ filluse(fn);
+ filldom(fn);
+
+ con01[0] = getcon(0, fn);
+ con01[1] = getcon(1, fn);
+
+ nins = 0;
+ for (b=fn->start; b; b=b->link)
+ for (p = b->phi; p; p = p->link)
+ p->visit = 0;
+
+ /* facilitate ext elimination */
+ fillloop(fn);
+ narrowpars(fn);
+ filluse(fn);
+ ssacheck(fn);
+
+ for (b=fn->start; b; b=b->link)
+ nins += b->nins;
+
+ gvntbln = nins + nins/2;
+ gvntbl = emalloc(gvntbln * sizeof gvntbl[0]);
+ lookupn = 0;
+ proben = 0;
+ maxproben = 0;
+
+ /* GVN */
+ clrbvisit(fn);
+ for (n=0; n<fn->nblk; n++) {
+ b = fn->rpo[n];
+ dedupphis(fn, b);
+ dedupins(fn, b);
+ dedupjmp(fn, b);
+ }
+
+ rebuildcfg(fn);
+
+ free(gvntbl);
+ gvntbl = 0;
+ gvntbln = 0;
+ lookupn = 0;
+ proben = 0;
+ maxproben = 0;
+
+ if (debug['G']) {
+ fprintf(stderr, "\n> After GVN:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/ins.c b/ins.c
new file mode 100644
index 0000000..1935a95
--- /dev/null
+++ b/ins.c
@@ -0,0 +1,195 @@
+#include "all.h"
+
+void
+addins(Ins **pvins, uint *pnins, Ins *i)
+{
+ if (i->op == Onop)
+ return;
+ vgrow(pvins, ++(*pnins));
+ (*pvins)[(*pnins)-1] = *i;
+}
+
+void
+addbins(Blk *b, Ins **pvins, uint *pnins)
+{
+ Ins *i;
+
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ addins(pvins, pnins, i);
+}
+
+static int
+unusedins(Fn *fn, Ins *i);
+
+static int
+unusedtmp(Fn *fn, Tmp *t)
+{
+ if (t->nuse == 0)
+ return 1;
+ if (t->nuse == 1)
+ if (t->use[0].type == UIns)
+ return unusedins(fn, t->use[0].u.ins);
+ return 0;
+}
+
+static int
+unusedtmpref(Fn *fn, Ref r)
+{
+ if (rtype(r) != RTmp)
+ return 0;
+ return unusedtmp(fn, &fn->tmp[r.val]);
+}
+
+static int
+unusedins(Fn *fn, Ins *i)
+{
+ if (!INRANGE(i->op, Opar, Ocall))
+ if (unusedtmpref(fn, i->to))
+ return 1;
+ return 0;
+}
+
+/* replace unused ins with nop */
+/* needs use; breaks use */
+void
+nopunused(Fn *fn)
+{
+ Blk *b;
+ Ins *i;
+
+ for (b = fn->start; b; b = b->link)
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ if (unusedins(fn, i))
+ *i = (Ins){.op = Onop};
+}
+
+typedef struct FromLoc FromLoc;
+struct FromLoc {
+ InsLoc from;
+ uint ton;
+};
+
+static int
+loccmp(InsLoc *la, InsLoc *lb)
+{
+ if (la->bid != lb->bid)
+ return (int)la->bid - (int)lb->bid;
+ return la->insn - lb->insn;
+}
+
+static int
+tocmp(const void *a, const void *b)
+{
+ InsMov *ma, *mb;
+
+ ma = (InsMov*)a;
+ mb = (InsMov*)b;
+ return loccmp(&ma->to, &mb->to);
+}
+
+static int
+fromcmp(const void *a, const void *b)
+{
+ FromLoc *fa, *fb;
+
+ fa = (FromLoc*)a;
+ fb = (FromLoc*)b;
+ return loccmp(&fa->from, &fb->from);
+}
+
+static int
+loceq(InsLoc *a, InsLoc *b)
+{
+ return a->bid == b->bid && a->insn == b->insn;
+}
+
+/* after, mov is sorted by to, and to.insn, from.insn are updated */
+void
+movins(Fn *fn, InsMov *mov, uint nmov, int del)
+{
+ Blk *b, *b2;
+ uint bid, n, fromn, ton, ifromn, nins;
+ FromLoc *from;
+ uint *newbnins;
+ Ins **newbins, *vins;
+
+ qsort(mov, nmov, sizeof mov[0], tocmp);
+
+ from = emalloc(nmov * sizeof from[0]);
+ for (n = 0; n < nmov; n++) {
+ from[n].from = mov[n].from;
+ from[n].ton = n;
+ }
+ qsort(from, nmov, sizeof from[0], fromcmp);
+
+ nins = 0;
+ vins = vnew(nins, sizeof vins[0], PFn);
+
+ newbnins = emalloc(fn->nblk * sizeof newbnins[0]);
+ newbins = emalloc(fn->nblk * sizeof newbins[0]);
+
+ /* populate new ins buffers */
+ /* update mov to.insn, from insn */
+ fromn = ton = 0;
+ for (bid = 0; bid < fn->nblk; bid++) {
+ /* no moves to this block */
+ if (nmov <= ton || mov[ton].to.bid != bid) {
+ while (fromn < nmov && from[fromn].from.bid == bid)
+ fromn++;
+ continue;
+ }
+ b = fn->rpo[bid];
+ nins = 0;
+ for (ifromn = 0; ifromn < b->nins; ifromn++) {
+ /* insert new ins, update to */
+ while (ton < nmov && loceq(&mov[ton].to, &(InsLoc){.bid = bid, .insn = ifromn})) {
+ b2 = fn->rpo[mov[ton].from.bid];
+ assert(mov[ton].from.insn < b2->nins);
+ addins(&vins, &nins, &b2->ins[mov[ton].from.insn]);
+ mov[ton++].to.insn = nins-1;
+ }
+ /* update from */
+ while (fromn < nmov && loceq(&from[fromn].from, &(InsLoc){.bid = bid, .insn = ifromn}))
+ from[fromn++].from.insn = nins;
+ /* copy original ins */
+ addins(&vins, &nins, &b->ins[ifromn]);
+ }
+ /* append new ins, update to */
+ while (ton < nmov && mov[ton].to.bid == bid) {
+ assert(mov[ton].to.insn == b->nins);
+ b2 = fn->rpo[mov[ton].from.bid];
+ assert(mov[ton].from.insn < b2->nins);
+ addins(&vins, &nins, &b2->ins[mov[ton].from.insn]);
+ mov[ton++].to.insn = nins-1;
+ }
+ assert(ifromn == b->nins);
+ /* idup(&newbins[bid], vins, nins); */
+ newbins[bid] = vins;
+ vins = vnew(0, sizeof vins[0], PFn);
+ newbnins[bid] = nins;
+ }
+ assert(fromn == nmov);
+ assert(ton == nmov);
+
+ /* install new b->ins */
+ for (bid = 0; bid < fn->nblk; bid++) {
+ if (newbnins[bid] == 0)
+ continue;
+ b = fn->rpo[bid];
+ b->ins = newbins[bid];
+ b->nins = newbnins[bid];
+ }
+
+ /* remove from ins, update mov from insn */
+ for (fromn = 0; fromn < nmov; fromn++) {
+ b = fn->rpo[from[fromn].from.bid];
+ assert(from[fromn].from.insn < b->nins);
+ if (del)
+ b->ins[from[fromn].from.insn] = (Ins){.op = Onop};
+ mov[from[fromn].ton].from.insn = from[fromn].from.insn;
+ }
+
+ free(from);
+ free(newbins);
+ free(newbnins);
+}
diff --git a/main.c b/main.c
index 977c55b..ba3ad0c 100644
--- a/main.c
+++ b/main.c
@@ -72,10 +72,18 @@ func(Fn *fn)
fillalias(fn);
coalesce(fn);
filluse(fn);
+ filldom(fn);
ssacheck(fn);
- copy(fn);
+ gvn(fn);
+ fillcfg(fn);
+ filluse(fn);
+ ifelim(fn);
+ fillcfg(fn);
filluse(fn);
- fold(fn);
+ filldom(fn);
+ gcm(fn);
+ filluse(fn);
+ ssacheck(fn);
T.abi1(fn);
simpl(fn);
fillcfg(fn);
diff --git a/ops.h b/ops.h
index beaa6f3..00d62a0 100644
--- a/ops.h
+++ b/ops.h
@@ -6,188 +6,185 @@
#define V(Imm)
#endif
-#ifndef P
- #define P(CanFold, HasId, IdVal)
+#ifndef F
+#define F(CanFold, HasId, IdVal, Commutes, Associates, Idemp, IsCmpEq, IsCmpLgte, CmpEqVal, IsPinned)
#endif
-
#define T(a,b,c,d,e,f,g,h) { \
{[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}, \
{[Kw]=K##e, [Kl]=K##f, [Ks]=K##g, [Kd]=K##h} \
}
-
/*********************/
/* PUBLIC OPERATIONS */
/*********************/
/* Arithmetic and Bits */
-O(add, T(w,l,s,d, w,l,s,d), P(1,1,0)) X(2,1,0) V(1)
-O(sub, T(w,l,s,d, w,l,s,d), P(1,1,0)) X(2,1,0) V(0)
-O(neg, T(w,l,s,d, x,x,x,x), P(1,0,0)) X(1,1,0) V(0)
-O(div, T(w,l,s,d, w,l,s,d), P(1,1,1)) X(0,0,0) V(0)
-O(rem, T(w,l,e,e, w,l,e,e), P(1,0,0)) X(0,0,0) V(0)
-O(udiv, T(w,l,e,e, w,l,e,e), P(1,1,1)) X(0,0,0) V(0)
-O(urem, T(w,l,e,e, w,l,e,e), P(1,0,0)) X(0,0,0) V(0)
-O(mul, T(w,l,s,d, w,l,s,d), P(1,1,1)) X(2,0,0) V(0)
-O(and, T(w,l,e,e, w,l,e,e), P(1,0,0)) X(2,1,0) V(1)
-O(or, T(w,l,e,e, w,l,e,e), P(1,1,0)) X(2,1,0) V(1)
-O(xor, T(w,l,e,e, w,l,e,e), P(1,1,0)) X(2,1,0) V(1)
-O(sar, T(w,l,e,e, w,w,e,e), P(1,1,0)) X(1,1,0) V(1)
-O(shr, T(w,l,e,e, w,w,e,e), P(1,1,0)) X(1,1,0) V(1)
-O(shl, T(w,l,e,e, w,w,e,e), P(1,1,0)) X(1,1,0) V(1)
+O(add, T(w,l,s,d, w,l,s,d), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1)
+O(sub, T(w,l,s,d, w,l,s,d), F(1,1,0,0,0,0,0,0,0,0)) X(2,1,0) V(0)
+O(neg, T(w,l,s,d, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
+O(div, T(w,l,s,d, w,l,s,d), F(1,1,1,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(rem, T(w,l,e,e, w,l,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(udiv, T(w,l,e,e, w,l,e,e), F(1,1,1,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(urem, T(w,l,e,e, w,l,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(mul, T(w,l,s,d, w,l,s,d), F(1,1,1,1,0,0,0,0,0,0)) X(2,0,0) V(0)
+O(and, T(w,l,e,e, w,l,e,e), F(1,0,0,1,1,1,0,0,0,0)) X(2,1,0) V(1)
+O(or, T(w,l,e,e, w,l,e,e), F(1,1,0,1,1,1,0,0,0,0)) X(2,1,0) V(1)
+O(xor, T(w,l,e,e, w,l,e,e), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1)
+O(sar, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
+O(shr, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
+O(shl, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
/* Comparisons */
-O(ceqw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cnew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csgew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csgtw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cslew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csltw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(1)
-O(cugew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cugtw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(culew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cultw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(1)
-
-O(ceql, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cnel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csgel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csgtl, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cslel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csltl, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(1)
-O(cugel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cugtl, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(culel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cultl, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(1)
-
-O(ceqs, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cges, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cgts, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cles, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(clts, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cnes, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cos, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cuos, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-
-O(ceqd, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cged, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cgtd, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cled, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cltd, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cned, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cod, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cuod, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
+O(ceqw, T(w,w,e,e, w,w,e,e), F(1,1,1,1,0,0,1,0,1,0)) X(0,1,0) V(0)
+O(cnew, T(w,w,e,e, w,w,e,e), F(1,1,0,1,0,0,1,0,0,0)) X(0,1,0) V(0)
+O(csgew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(csgtw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
+O(cslew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(csltw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
+O(cugew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(cugtw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
+O(culew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(cultw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
+
+O(ceql, T(l,l,e,e, l,l,e,e), F(1,0,0,1,0,0,1,0,1,0)) X(0,1,0) V(0)
+O(cnel, T(l,l,e,e, l,l,e,e), F(1,0,0,1,0,0,1,0,0,0)) X(0,1,0) V(0)
+O(csgel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(csgtl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
+O(cslel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(csltl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
+O(cugel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(cugtl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
+O(culel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(cultl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
+
+O(ceqs, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cges, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cgts, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cles, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(clts, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cnes, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cos, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cuos, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+
+O(ceqd, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cged, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cgtd, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cled, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cltd, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cned, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cod, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cuod, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
/* Memory */
-O(storeb, T(w,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(storeh, T(w,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(storew, T(w,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(storel, T(l,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(stores, T(s,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(stored, T(d,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-
-O(loadsb, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loadub, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loadsh, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loaduh, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loadsw, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loaduw, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(load, T(m,m,m,m, x,x,x,x), P(0,0,0)) X(0,0,1) V(0)
+O(storeb, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(storeh, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(storew, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(storel, T(l,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(stores, T(s,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(stored, T(d,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+
+O(loadsb, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loadub, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loadsh, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loaduh, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loadsw, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loaduw, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(load, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
/* Extensions and Truncations */
-O(extsb, T(w,w,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extub, T(w,w,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extsh, T(w,w,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extuh, T(w,w,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extsw, T(e,w,e,e, e,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extuw, T(e,w,e,e, e,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-
-O(exts, T(e,e,e,s, e,e,e,x), P(1,0,0)) X(0,0,1) V(0)
-O(truncd, T(e,e,d,e, e,e,x,e), P(1,0,0)) X(0,0,1) V(0)
-O(stosi, T(s,s,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(stoui, T(s,s,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(dtosi, T(d,d,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(dtoui, T(d,d,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(swtof, T(e,e,w,w, e,e,x,x), P(1,0,0)) X(0,0,1) V(0)
-O(uwtof, T(e,e,w,w, e,e,x,x), P(1,0,0)) X(0,0,1) V(0)
-O(sltof, T(e,e,l,l, e,e,x,x), P(1,0,0)) X(0,0,1) V(0)
-O(ultof, T(e,e,l,l, e,e,x,x), P(1,0,0)) X(0,0,1) V(0)
-O(cast, T(s,d,w,l, x,x,x,x), P(1,0,0)) X(0,0,1) V(0)
+O(extsb, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extub, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extsh, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extuh, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extsw, T(e,w,e,e, e,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extuw, T(e,w,e,e, e,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+
+O(exts, T(e,e,e,s, e,e,e,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(truncd, T(e,e,d,e, e,e,x,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(stosi, T(s,s,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(stoui, T(s,s,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(dtosi, T(d,d,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(dtoui, T(d,d,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(swtof, T(e,e,w,w, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(uwtof, T(e,e,w,w, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(sltof, T(e,e,l,l, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(ultof, T(e,e,l,l, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(cast, T(s,d,w,l, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
/* Stack Allocation */
-O(alloc4, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(alloc8, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(alloc16, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
+O(alloc4, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(alloc8, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(alloc16, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
/* Variadic Function Helpers */
-O(vaarg, T(m,m,m,m, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(vastart, T(m,e,e,e, x,e,e,e), P(0,0,0)) X(0,0,0) V(0)
+O(vaarg, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(vastart, T(m,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
-O(copy, T(w,l,s,d, x,x,x,x), P(0,0,0)) X(0,0,1) V(0)
+O(copy, T(w,l,s,d, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
/* Debug */
-O(dbgloc, T(w,e,e,e, w,e,e,e), P(0,0,0)) X(0,0,1) V(0)
+O(dbgloc, T(w,e,e,e, w,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
/****************************************/
/* INTERNAL OPERATIONS (keep nop first) */
/****************************************/
/* Miscellaneous and Architecture-Specific Operations */
-O(nop, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,1) V(0)
-O(addr, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(blit0, T(m,e,e,e, m,e,e,e), P(0,0,0)) X(0,1,0) V(0)
-O(blit1, T(w,e,e,e, x,e,e,e), P(0,0,0)) X(0,1,0) V(0)
-O(swap, T(w,l,s,d, w,l,s,d), P(0,0,0)) X(1,0,0) V(0)
-O(sign, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(salloc, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(xidiv, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(1,0,0) V(0)
-O(xdiv, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(1,0,0) V(0)
-O(xcmp, T(w,l,s,d, w,l,s,d), P(0,0,0)) X(1,1,0) V(0)
-O(xtest, T(w,l,e,e, w,l,e,e), P(0,0,0)) X(1,1,0) V(0)
-O(acmp, T(w,l,e,e, w,l,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(acmn, T(w,l,e,e, w,l,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(afcmp, T(e,e,s,d, e,e,s,d), P(0,0,0)) X(0,0,0) V(0)
-O(reqz, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(rnez, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(0,0,0) V(0)
+O(nop, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(addr, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(blit0, T(m,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
+O(blit1, T(w,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
+O(swap, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
+O(sign, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(salloc, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xidiv, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
+O(xdiv, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
+O(xcmp, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
+O(xtest, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
+O(acmp, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(acmn, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(afcmp, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(reqz, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(rnez, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
/* Arguments, Parameters, and Calls */
-O(par, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(parsb, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(parub, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(parsh, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(paruh, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(parc, T(e,x,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(pare, T(e,x,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(arg, T(w,l,s,d, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(argsb, T(w,e,e,e, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(argub, T(w,e,e,e, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(argsh, T(w,e,e,e, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(arguh, T(w,e,e,e, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(argc, T(e,x,e,e, e,l,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(arge, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(argv, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(call, T(m,m,m,m, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
+O(par, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(parsb, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(parub, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(parsh, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(paruh, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(parc, T(e,x,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(pare, T(e,x,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(arg, T(w,l,s,d, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argsb, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argub, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argsh, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(arguh, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argc, T(e,x,e,e, e,l,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(arge, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argv, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(call, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
/* Flags Setting */
-O(flagieq, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagine, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagisge, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagisgt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagisle, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagislt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagiuge, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagiugt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagiule, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagiult, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfeq, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfge, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfgt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfle, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagflt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfne, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfo, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfuo, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-
+O(flagieq, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagine, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagisge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagisgt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagisle, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagislt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagiuge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagiugt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagiule, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagiult, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfeq, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfgt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfle, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagflt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfne, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfuo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
#undef T
#undef X
diff --git a/parse.c b/parse.c
index ebfc39f..e2ffccf 100644
--- a/parse.c
+++ b/parse.c
@@ -15,11 +15,17 @@ enum {
};
Op optab[NOp] = {
-#undef P
-#define P(cf, hi, id) .canfold = cf, .hasid = hi, .idval = id
-#define O(op, t, p) [O##op]={.name = #op, .argcls = t, p},
+#undef F
+#define F(cf, hi, id, co, as, im, ic, lg, cv, pn) \
+ .canfold = cf, \
+ .hasid = hi, .idval = id, \
+ .commutes = co, .assoc = as, \
+ .idemp = im, \
+ .cmpeqwl = ic, .cmplgtewl = lg, .eqval = cv, \
+ .ispinned = pn
+#define O(op, k, flags) [O##op]={.name = #op, .argcls = k, flags},
#include "ops.h"
-#undef P
+#undef F
};
typedef enum {
diff --git a/reassoc.c b/reassoc.c
new file mode 100644
index 0000000..25d88ea
--- /dev/null
+++ b/reassoc.c
@@ -0,0 +1,101 @@
+#include "all.h"
+
+static void
+ireassoc(Fn *fn, Blk *b, Ins *i, uint *pnim, InsMov **pim)
+{
+ Blk *b2;
+ Tmp *t;
+ Use *u;
+ int64_t v;
+ int x;
+
+ assert(b->ins <= i && i < &b->ins[b->nins]);
+ if (!iscmp(i->op, &x, &x))
+ if (!iskladdcon(fn, i, &v))
+ return;
+
+ assert(rtype(i->to) == RTmp);
+ t = &fn->tmp[i->to.val];
+ for (u = t->use; u < &t->use[t->nuse]; u++) {
+ if (u->type == UPhi)
+ continue;
+ if (u->type == UIns)
+ if (INRANGE(u->u.ins->op, Oarg, Ocall))
+ continue;
+ b2 = fn->rpo[u->bid];
+ vgrow(pim, ++(*pnim));
+ (*pim)[(*pnim)-1] = (InsMov){
+ .from = {.bid = b->id, .insn = i-b->ins},
+ .to = {
+ .bid = u->bid,
+ .insn = u->type == UJmp ? b2->nins : u->u.ins-b2->ins
+ }};
+ }
+}
+
+static void
+trealloc(Fn *fn, Blk *b, Ins *i)
+{
+ Ins *i2;
+ Ref r;
+
+ assert(b->ins <= i && i < &b->ins[b->nins]);
+ r = newtmp("rea", i->cls, fn);
+ if (i < &b->ins[b->nins-1]) {
+ i2 = i+1;
+ /* special case of both args of target instruction reassociated */
+ if (!req(i->to, i2->arg[0]) && !req(i->to, i2->arg[1])) {
+ assert(i < &b->ins[b->nins-2]);
+ i2 = i+2;
+ }
+ assert(req(i->to, i2->arg[0]) || req(i->to, i2->arg[1]));
+ if (req(i->to, i2->arg[0]))
+ i2->arg[0] = r;
+ if (req(i->to, i2->arg[1]))
+ i2->arg[1] = r;
+ } else {
+ assert(req(i->to, b->jmp.arg));
+ b->jmp.arg = r;
+ }
+ i->to = r;
+}
+
+/* Redistribute trivial ops to point of use. */
+/* Reduces register pressure. */
+/* needs rpo, use; breaks use */
+void
+reassoc(Fn *fn)
+{
+ Blk *b;
+ Ins *i;
+ InsMov *im;
+ uint n, nim;
+
+ nim = 0;
+ im = vnew(nim, sizeof im[0], PHeap);
+
+ /* identify trivial ins */
+ for (b = fn->start; b; b = b->link) {
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ ireassoc(fn, b, i, &nim, &im);
+ }
+
+ /* duplicate trivial ins */
+ movins(fn, im, nim, 0/*!del*/);
+
+ /* create new tmps for dups */
+ for (n = 0; n < nim; n++) {
+ b = fn->rpo[im[n].to.bid];
+ i = &b->ins[im[n].to.insn];
+ trealloc(fn, b, i);
+ }
+
+ /* delete (now) unused ins */
+ filluse(fn);
+ nopunused(fn);
+
+ if (debug['G']) {
+ fprintf(stderr, "\n> After Reassociation:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/ssa.c b/ssa.c
index 929301e..4e3d2a5 100644
--- a/ssa.c
+++ b/ssa.c
@@ -1,7 +1,7 @@
#include "all.h"
#include <stdarg.h>
-static void
+void
adduse(Tmp *tmp, int ty, Blk *b, ...)
{
Use *u;
diff --git a/test/_gcm1.ssa b/test/_gcm1.ssa
new file mode 100644
index 0000000..719cddb
--- /dev/null
+++ b/test/_gcm1.ssa
@@ -0,0 +1,48 @@
+export
+function w $ifmv(w %p1, w %p2, w %p3) {
+@start
+@entry
+ %rt =w add %p2, %p3 # gcm moves to @true
+ %rf =w sub %p2, %p3 # gcm moves to @false
+ jnz %p1, @true, @false
+@true
+ %r =w copy %rt
+ jmp @exit
+@false
+ %r =w copy %rf
+ jmp @exit
+@exit
+ ret %r
+}
+
+export
+function w $hoist1(w %p1, w %p2, w %p3) {
+@start
+@entry
+ %n =w copy 0
+ %i =w copy %p1
+@loop
+ %base =w add %p2, %p3 # gcm moves to @exit
+ %i =w sub %i, 1
+ %n =w add %n, 1
+ jnz %i, @loop, @exit
+@exit
+ %r =w add %base, %n
+ ret %r
+}
+
+export
+function w $hoist2(w %p1, w %p2, w %p3) {
+@start
+@entry
+ %n =w copy 0
+ %i =w copy %p1
+@loop
+ %base =w add %p2, %p3 # gcm moves to @entry
+ %i =w sub %i, 1
+ %n =w add %n, %base
+ jnz %i, @loop, @exit
+@exit
+ %r =w add %base, %n
+ ret %r
+}
diff --git a/test/_gcm2.ssa b/test/_gcm2.ssa
new file mode 100644
index 0000000..baeb4a4
--- /dev/null
+++ b/test/_gcm2.ssa
@@ -0,0 +1,43 @@
+# Programs from "Global Code Motion Global Value Numbering" by Cliff Click
+# https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf
+
+# GCM program in Figure 1
+
+function w $gcm_test(w %a){
+@start
+ %i.0 =w copy 0
+@loop
+ %i.1 =w phi @start %i.0, @loop %i.2
+ %b =w add %a, 1 # early schedule moves to @start
+ %i.2 =w add %i.1, %b
+ %c =w mul %i.2, 2 # late schedule moves to @end
+ %x =w csltw %i.2, 10
+ jnz %x, @loop, @end
+@end
+ ret %c
+}
+
+# GCM program in "Figure 3 x's definition does not dominate it's use"
+#
+# SSA contruction will insert phi instruction for "x" in @if_false
+# preventing the "add" in @if_false from being moved to @if_true
+
+function $gcm_test2 (w %a){
+@start
+ %f =w copy 1
+ %x =w copy 0
+ %s.0 =w copy 0
+@loop
+ %s.1 = w phi @start %s.0, @if_false %s.2
+ jnz %a, @if, @end
+@if
+ jnz %f, @if_true, @if_false
+@if_true
+ %f =w copy 0
+ %x =w add %x, 1
+@if_false
+ %s.2 =w add %s.1, %x
+ jmp @loop
+@end
+ ret
+}
diff --git a/test/_load-elim.ssa b/test/_load-elim.ssa
new file mode 100644
index 0000000..faae478
--- /dev/null
+++ b/test/_load-elim.ssa
@@ -0,0 +1,17 @@
+# GCM can eliminate unused add/load instructions
+
+export
+function w $f(l %p, w %c) {
+@start
+ jnz %c, @true, @false
+@true
+ %p1 =l add %p, 4
+ %v1 =w loaduw %p1
+ jmp @end
+@false
+ %p2 =l add %p, 4
+ %v2 =w loaduw %p2
+ jmp @end
+@end
+ ret 0
+}
diff --git a/test/gvn-live-dead.ssa b/test/gvn-live-dead.ssa
new file mode 100644
index 0000000..d47f05b
--- /dev/null
+++ b/test/gvn-live-dead.ssa
@@ -0,0 +1,19 @@
+export
+function w $test(w %p1, w %p2) {
+@start
+@entry
+ %t1 =w copy 1
+ jnz %t1, @live, @dead1
+@live
+ %t2 =w add %p1, %p2
+ ret %t2
+@dead1
+ %t2 =w add %p1, %p2 # live ins in dead blk
+@dead2
+ jnz %t1, @live, @dead1
+}
+
+# >>> driver
+# extern int test(int p1, int p2);
+# int main() { return test(1, 2) != 3; }
+# <<<
diff --git a/test/non0jnz.ssa b/test/non0jnz.ssa
new file mode 100644
index 0000000..33f9a96
--- /dev/null
+++ b/test/non0jnz.ssa
@@ -0,0 +1,31 @@
+# GVN 0/non-0 inference removes @yesyes, @yesno, @noyes, @nono
+
+export
+function w $test(w %c) {
+@start
+ jnz %c, @yes, @no
+@yes
+ %c0 =w cnew %c, 0
+ jnz %c0, @yesyes, @yesno
+@yesyes
+ %rc =w copy 1
+ jmp @end
+@yesno
+ %rc =w copy 111
+ jmp @end
+@no
+ %c1 =w cnew %c, 0
+ jnz %c1, @noyes, @nono
+@noyes
+ %rc =w copy 222
+ jmp @end
+@nono
+ %rc =w copy 0
+@end
+ ret %rc
+}
+
+# >>> driver
+# int test(int);
+# int main(void) { return test(0); }
+# <<<
diff --git a/util.c b/util.c
index 5a0c89f..ba1f98f 100644
--- a/util.c
+++ b/util.c
@@ -229,6 +229,24 @@ iscmp(int op, int *pk, int *pc)
return 1;
}
+static int INVCMPWL[] = {
+ /*Oceqw*/Ocnew, /*Ocnew*/Oceqw,
+ /*Ocsgew*/Ocsltw, /*Ocsgtw*/Ocslew, /*Ocslew*/Ocsgtw, /*Ocsltw*/Ocsgew,
+ /*Ocugew*/Ocultw, /*Ocugtw*/Oculew, /*Oculew*/Ocugtw, /*Ocultw*/Ocugew,
+ /*Oceql*/Ocnel, /*Ocnel*/Oceql,
+ /*Ocsgel*/Ocsltl, /*Ocsgtl*/Ocslel, /*Ocslel*/Ocsgtl, /*Ocsltl*/Ocsgel,
+ /*Ocugel*/Ocultl, /*Ocugtl*/Oculel, /*Oculel*/Ocugtl, /*Ocultl*/Ocugel,
+};
+
+int
+invcmpwl(int cmp)
+{
+ assert(Oceqw <= cmp && cmp <= Ocultl);
+ return INVCMPWL[cmp - Oceqw];
+}
+
+
+
int
argcls(Ins *i, int n)
{
@@ -338,6 +356,23 @@ phicls(int t, Tmp *tmp)
return t1;
}
+uint
+phiargn(Phi *p, Blk *b)
+{
+ uint n;
+
+ for (n = 0; n < p->narg; n++)
+ if (p->blk[n] == b)
+ return n;
+ die("unreachable");
+}
+
+Ref
+phiarg(Phi *p, Blk *b)
+{
+ return p->arg[phiargn(p, b)];
+}
+
Ref
newtmp(char *prfx, int k, Fn *fn)
{
@@ -421,6 +456,27 @@ addcon(Con *c0, Con *c1, int m)
return 1;
}
+int
+isconbits(Fn *fn, Ref r, int64_t *v)
+{
+ Con *c;
+
+ if (rtype(r) == RCon) {
+ c = &fn->con[r.val];
+ if (c->type == CBits) {
+ *v = c->bits.i;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+istmpconbits(Fn *fn, Ins *i, int64_t *v)
+{
+ return rtype(i->arg[0]) == RTmp && isconbits(fn, i->arg[1], v);
+}
+
void
salloc(Ref rt, Ref rs, Fn *fn)
{