aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--all.h34
-rw-r--r--amd64/emit.c9
-rw-r--r--cfg.c344
-rw-r--r--copy.c368
-rw-r--r--fold.c2
-rw-r--r--gcm.c361
-rw-r--r--gvn.c680
-rw-r--r--load.c1
-rw-r--r--main.c3
-rw-r--r--ops.h14
-rw-r--r--parse.c4
-rw-r--r--ssa.c3
-rw-r--r--test/copy.ssa15
-rw-r--r--test/gvn1.ssa (renamed from test/gvn-live-dead.ssa)0
-rw-r--r--test/gvn2.ssa (renamed from test/non0jnz.ssa)0
-rw-r--r--util.c98
16 files changed, 785 insertions, 1151 deletions
diff --git a/all.h b/all.h
index 6da6330..39a4f7c 100644
--- a/all.h
+++ b/all.h
@@ -223,7 +223,7 @@ struct Op {
uint cmpeqwl:1; /* Kl/Kw cmp eq/ne? */
uint cmplgtewl:1; /* Kl/Kw cmp lt/gt/le/ge? */
uint eqval:1; /* 1 for eq; 0 for ne */
- uint ispinned:1; /* GCM pinned op? */
+ uint pinned:1; /* GCM pinned op? */
};
struct Ins {
@@ -260,9 +260,9 @@ struct Blk {
Blk *idom;
Blk *dom, *dlink;
- int domdpth;
Blk **fron;
uint nfron;
+ int depth;
Blk **pred;
uint npred;
@@ -489,13 +489,14 @@ char *str(uint32_t);
int argcls(Ins *, int);
int isreg(Ref);
int iscmp(int, int *, int *);
-int invcmpwl(int);
+void igroup(Blk *, Ins *, Ins **, Ins **);
void emit(int, int, Ref, Ref, Ref);
void emiti(Ins);
void idup(Blk *, Ins *, ulong);
Ins *icpy(Ins *, Ins *, ulong);
int cmpop(int);
int cmpneg(int);
+int cmpwlneg(int);
int clsmerge(short *, short);
int phicls(int, Tmp *);
uint phiargn(Phi *, Blk *);
@@ -507,7 +508,6 @@ Ref newcon(Con *, Fn *);
Ref getcon(int64_t, Fn *);
int addcon(Con *, Con *, int);
int isconbits(Fn *fn, Ref r, int64_t *v);
-int istmpconbits(Fn *fn, Ins *i, int64_t *v);
void salloc(Ref, Ref, Fn *);
void dumpts(BSet *, Tmp *, FILE *);
void runmatch(uchar *, Num *, Ref, Ref *);
@@ -549,19 +549,11 @@ int sdom(Blk *, Blk *);
int dom(Blk *, Blk *);
void fillfron(Fn *);
void loopiter(Fn *, void (*)(Blk *, Blk *));
-void filldomdpth(Fn *);
+void filldepth(Fn *);
Blk *lca(Blk *, Blk *);
void fillloop(Fn *);
void simpljmp(Fn *);
-void replacepreds(Blk *, Blk *, Blk *);
-void killblks(Fn *);
-void blkmerge(Fn *);
-int ifgraph(Blk *, Blk **, Blk **, Blk **);
-int ifjoin(Blk *, Blk **, Blk **, Blk **);
-int emptyblk(Blk *);
-void ifelim(Fn *);
-void clrbvisit(Fn *);
-int reaches(Fn *,Blk *, Blk *);
+int reaches(Fn *, Blk *, Blk *);
int reachesnotvia(Fn *, Blk *, Blk *, Blk *);
/* mem.c */
@@ -586,25 +578,23 @@ void ssa(Fn *);
void ssacheck(Fn *);
/* copy.c */
-int iswu1(Fn *, Ref);
-void narrowpars(Fn *);
+void narrowpars(Fn *fn);
Ref copyref(Fn *, Blk *, Ins *);
+Ref phicopyref(Fn *, Blk *, Phi *);
/* fold.c */
+int foldint(Con *, int, int, Con *, Con *);
Ref foldref(Fn *, Ins *);
/* gvn.c */
-extern Ref con01[2];
-int is0non0(Fn *, Blk *, Ref, int, int *);
+extern Ref con01[2]; /* 0 and 1 */
+int zeroval(Fn *, Blk *, Ref, int, int *);
void gvn(Fn *);
/* gcm.c */
-int isfixed(Fn *, Ins *);
+int pinned(Ins *);
void gcm(Fn *);
-/* reassoc.c */
-void reassoc(Fn *);
-
/* simpl.c */
void simpl(Fn *);
diff --git a/amd64/emit.c b/amd64/emit.c
index 8f36188..a8cd0de 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -612,6 +612,7 @@ amd64_emitfn(Fn *fn, FILE *f)
Blk *b, *s;
Ins *i, itmp;
int *r, c, o, n, lbl;
+ uint p;
E *e;
e = &(E){.f = f, .fn = fn};
@@ -640,8 +641,14 @@ amd64_emitfn(Fn *fn, FILE *f)
}
for (lbl=0, b=fn->start; b; b=b->link) {
- if (lbl || b->npred > 1)
+ if (lbl || b->npred > 1) {
+ for (p=0; p<b->npred; p++)
+ if (b->pred[p]->id >= b->id)
+ break;
+ if (p != b->npred)
+ fprintf(f, ".p2align 4\n");
fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
+ }
for (i=b->ins; i!=&b->ins[b->nins]; i++)
emitins(*i, e);
lbl = 1;
diff --git a/cfg.c b/cfg.c
index 1fe584c..8047c12 100644
--- a/cfg.c
+++ b/cfg.c
@@ -8,49 +8,40 @@ newblk()
b = alloc(sizeof *b);
*b = z;
+ b->ins = vnew(0, sizeof b->ins[0], PFn);
+ b->pred = vnew(0, sizeof b->pred[0], PFn);
return b;
}
-/* TODO - this never seems to do anything??? */
static void
-prunephis(Fn *f)
+fixphis(Fn *f)
{
Blk *b;
- Phi *p, **pp;
- uint na, na0;
+ Phi *p;
+ uint n, n0;
- for (b = f->start; b; b = b->link) {
+ for (b=f->start; b; b=b->link) {
assert(b->id < f->nblk);
- for (pp = &b->phi; (*pp);) {
- p = *pp;
- for (na = na0 = 0; na < p->narg; na++)
- if (p->blk[na]->id != -1u) {
- p->blk[na0] = p->blk[na];
- p->arg[na0] = p->arg[na];
- na0++;
+ for (p=b->phi; p; p=p->link) {
+ for (n=n0=0; n<p->narg; n++)
+ if (p->blk[n]->id != -1u) {
+ p->blk[n0] = p->blk[n];
+ p->arg[n0] = p->arg[n];
+ n0++;
}
- if (na0 == 0) {
- *pp = p->link;
- } else {
- p->narg = na0;
- pp = &p->link;
- }
+ assert(n0 > 0);
+ p->narg = n0;
}
}
}
static void
-addpred(Blk *bp, Blk *bc)
+addpred(Blk *bp, Blk *b)
{
- ++bc->npred;
- if (bc->pred)
- vgrow(&bc->pred, bc->npred);
- else
- bc->pred = vnew(bc->npred, sizeof bc->pred[0], PFn);
- bc->pred[bc->npred-1] = bp;
+ vgrow(&b->pred, ++b->npred);
+ b->pred[b->npred-1] = bp;
}
-/* fill predecessors information in blocks; prune dead phi refs */
void
fillpreds(Fn *f)
{
@@ -64,8 +55,6 @@ fillpreds(Fn *f)
if (b->s2 && b->s2 != b->s1)
addpred(b, b->s2);
}
- /* TODO - this never seems to do anything??? */
- prunephis(f);
}
static void
@@ -87,7 +76,6 @@ porec(Blk *b, uint *npo)
b->id = (*npo)++;
}
-/* fill the rpo information; prune dead blks */
static void
fillrpo(Fn *f)
{
@@ -97,27 +85,25 @@ fillrpo(Fn *f)
b->id = -1u;
f->nblk = 0;
porec(f->start, &f->nblk);
- if (f->rpo)
- vgrow(&f->rpo, f->nblk);
- else
- f->rpo = vnew(f->nblk, sizeof f->rpo[0], PFn);
+ vgrow(&f->rpo, f->nblk);
for (p=&f->start; (b=*p);) {
if (b->id == -1u) {
*p = b->link;
} else {
- b->id = f->nblk-b->id-1; /* po -> rpo */
+ b->id = f->nblk-b->id-1;
f->rpo[b->id] = b;
p = &b->link;
}
}
}
-/* fill rpo, preds; prune dead blks, prune dead blk refs from phis */
+/* fill rpo, preds; prune dead blks */
void
fillcfg(Fn *f)
{
fillrpo(f);
fillpreds(f);
+ fixphis(f);
}
/* for dominators computation, read
@@ -260,34 +246,28 @@ loopiter(Fn *fn, void f(Blk *, Blk *))
}
}
+/* dominator tree depth */
void
-multloop(Blk *hd, Blk *b)
-{
- (void)hd;
- b->loop *= 10;
-}
-
-void
-filldomdpth(Fn *fn)
+filldepth(Fn *fn)
{
- Blk *b, *dom;
- uint dpth;
+ Blk *b, *d;
+ int depth;
for (b=fn->start; b; b=b->link)
- b->domdpth = -1;
+ b->depth = -1;
- fn->start->domdpth = 0;
+ fn->start->depth = 0;
for (b=fn->start; b; b=b->link) {
- if (b->domdpth != -1)
+ if (b->depth != -1)
continue;
- dpth = 1;
- for (dom = b->idom; dom->domdpth == -1; dom = dom->idom)
- dpth++;
- dpth += dom->domdpth;
- b->domdpth = dpth;
- for (dom = b->idom; dom->domdpth == -1; dom = dom->idom)
- dom->domdpth = --dpth;
+ depth = 1;
+ for (d=b->idom; d->depth==-1; d=d->idom)
+ depth++;
+ depth += d->depth;
+ b->depth = depth;
+ for (d=b->idom; d->depth==-1; d=d->idom)
+ d->depth = --depth;
}
}
@@ -299,9 +279,9 @@ lca(Blk *b1, Blk *b2)
return b2;
if (!b2)
return b1;
- while (b1->domdpth > b2->domdpth)
+ while (b1->depth > b2->depth)
b1 = b1->idom;
- while (b2->domdpth > b1->domdpth)
+ while (b2->depth > b1->depth)
b2 = b2->idom;
while (b1 != b2) {
b1 = b1->idom;
@@ -311,6 +291,13 @@ lca(Blk *b1, Blk *b2)
}
void
+multloop(Blk *hd, Blk *b)
+{
+ (void)hd;
+ b->loop *= 10;
+}
+
+void
fillloop(Fn *fn)
{
Blk *b;
@@ -371,244 +358,41 @@ simpljmp(Fn *fn)
free(uf);
}
-static void
-replacepred(Blk **blks, uint nblk, Blk *to, Blk *from)
-{
- uint n;
- for(n=0; n<nblk; n++)
- if (blks[n] == from) {
- blks[n] = to;
- break;
- }
- assert(n != nblk);
-}
-
-/* replace b->pred[] and p->blk[] entries */
-void
-replacepreds(Blk *s, Blk *to, Blk *from)
-{
- Phi *p;
-
- if (!s)
- return;
- assert(s->npred);
- replacepred(s->pred, s->npred, to, from);
- for (p = s->phi; p; p = p->link) {
- assert(p->narg == s->npred);
- replacepred(p->blk, p->narg, to, from);
- }
-}
-
-/* remove marked-dead blks - marked as fn->rpo[id] == 0 */
-void
-killblks(Fn *fn)
-{
- Blk **pb;
-
- for (pb = &fn->start; *pb;)
- if (fn->rpo[(*pb)->id])
- pb = &(*pb)->link;
- else
- *pb = (*pb)->link;
-}
-
-/* merge linear jmp chains */
-/* requires rpo pred, breaks cfg use */
-void
-blkmerge(Fn *fn)
-{
- uint bid, nins;
- Blk *curb, *b;
- Ins *vins;
-
- if (debug['B'])
- fputs("\n> Block merge:\n", stderr);
-
- vins = vnew(0, sizeof vins[0], PFn);
- curb = 0;
- /* linear jmp chains will be consecutive in rpo */
- for (bid=0; bid<fn->nblk; bid++) {
- b = fn->rpo[bid];
- if (curb == 0) {
- curb = b;
- nins = 0;
- } else
- fn->rpo[bid] = 0;
- addbins(b, &vins, &nins);
- /* note - there are cases where GVN does not eliminate singleton phis */
- if (b->jmp.type != Jjmp || b->s1->npred != 1 || b->s1->phi) {
- idup(curb, vins, nins);
- curb->nins = nins;
- curb->jmp = b->jmp;
- replacepreds(b->s1, curb, b);
- if (b->s1 != b->s2)
- replacepreds(b->s2, curb, b);
- curb->s1 = b->s1;
- curb->s2 = b->s2;
- curb = 0;
- } else {
- assert(b->s1->id == bid+1);
- if (debug['B'])
- fprintf(stderr, " merging blocks @%s -> @%s\n", b->name, b->s1->name);
- }
- }
- assert(curb == 0);
- killblks(fn);
-}
-
-int
-lonesucc(Blk *b, Blk *s)
+static int
+reachrec(Blk *b, Blk *to)
{
- assert(s);
- if (s != b)
- if (s->npred == 1)
- if (s->pred[0] == b)
- if (s->phi == 0)
+ if (b == to)
return 1;
- return 0;
-}
-
-int
-lonejmpsucc(Blk *b, Blk *s)
-{
- return s->jmp.type == Jjmp && lonesucc(b, s);
-}
-
-/* (otherwise) isolated if-then[-else] graphlet */
-int
-ifgraph(Blk *ifb, Blk **pthenb, Blk **pelseb, Blk **pjoinb)
-{
- if (ifb->jmp.type != Jjnz)
+ if (!b || b->visit)
return 0;
- assert(ifb->s1 && ifb->s2);
- assert(ifb->s1 != ifb->s2); /* dubious */
- *pthenb = ifb->s1;
- *pelseb = ifb->s2;
- *pjoinb = ifb->s1->s1;
- if (ifb->s1 == ifb->s2->s1) {
- /* empty then */
- *pthenb = ifb;
- *pjoinb = ifb->s1;
- }
- if (ifb->s1->s1 == ifb->s2)
- /* empty else */
- *pelseb = ifb;
-
- if (*pthenb == ifb ||
- ((*pthenb)->s1 == *pjoinb && lonejmpsucc(ifb, *pthenb)))
- if (*pelseb == ifb ||
- ((*pelseb)->s1 == *pjoinb && lonejmpsucc(ifb, *pelseb)))
- /* there are cases where npred == 2 is not strictly needed - ifconvert for example */
- if ((*pjoinb)->npred == 2)
- return 1;
-
- return 0;
-}
-
-/* join blk of if-then[-else] graphlet */
-int
-ifjoin(Blk *joinb, Blk **pifb, Blk **pthenb, Blk **pelseb)
-{
- Blk *joinb1;
- if (joinb->npred)
- if (ifgraph(joinb->pred[0], pthenb, pelseb, &joinb1))
- if (joinb == joinb1) {
- *pifb = joinb->pred[0];
+ b->visit = 1;
+ if (reachrec(b->s1, to))
return 1;
- }
- if (joinb->npred && joinb->pred[0]->npred)
- if (ifgraph(joinb->pred[0]->pred[0], pthenb, pelseb, &joinb1))
- if (joinb == joinb1) {
- *pifb = joinb->pred[0]->pred[0];
+ if (reachrec(b->s2, to))
return 1;
- }
+
return 0;
}
+/* Blk.visit needs to be clear at entry */
int
-emptyblk(Blk *b)
+reaches(Fn *fn, Blk *b, Blk *to)
{
- Ins *i;
-
- for (i = b->ins; i < &b->ins[b->nins]; i++)
- if (i->op != Onop)
- if (i->op != Odbgloc)
- return 0;
- return 1;
-}
+ int r;
-/* remove empty jnz graphlets */
-/* needs rpo; breaks cfg */
-void
-ifelim(Fn *fn)
-{
- uint bid;
- Blk *ifb, *thenb, *elseb, *joinb;
-
- for (bid = 0; bid < fn->nblk; bid++) {
- ifb = fn->rpo[bid];
- if (ifb == 0)
- continue;
- if (ifgraph(ifb, &thenb, &elseb, &joinb))
- if (joinb->phi == 0)
- if (thenb == ifb || emptyblk(thenb))
- if (elseb == ifb || emptyblk(elseb)) {
- if (debug['B'])
- fprintf(stderr, " eliminating empty if @%s -> @%s, @%s -> @%s\n",
- ifb->name, thenb->name, elseb->name, joinb->name);
- if (thenb != ifb)
- fn->rpo[thenb->id] = 0;
- if (elseb != ifb)
- fn->rpo[elseb->id] = 0;
- ifb->jmp.type = Jjmp;
- ifb->jmp.arg = R;
- ifb->s1 = joinb;
- ifb->s2 = 0;
- }
- }
- killblks(fn);
-}
-
-void
-clrbvisit(Fn *fn)
-{
- Blk *b;
- for (b = fn->start; b; b = b->link)
+ assert(to);
+ r = reachrec(b, to);
+ for (b=fn->start; b; b=b->link)
b->visit = 0;
+ return r;
}
-static int
-reaches1(Blk *b1, Blk *b2)
-{
- assert(b2);
- if (b1 == b2)
- return 1;
- if (b1 == 0 || b1->visit)
- return 0;
- b1->visit = 1;
- return reaches1(b1->s1, b2) || reaches1(b1->s2, b2);
-}
-
-/* path from b1 to b2 not thru bnotvia? */
-/* uses b->visit */
+/* can b reach 'to' not through excl
+ * Blk.visit needs to be clear at entry */
int
-reachesnotvia(Fn *fn, Blk *b1, Blk *b2, Blk *bnotvia)
+reachesnotvia(Fn *fn, Blk *b, Blk *to, Blk *excl)
{
- int rc;
-
- if (bnotvia)
- bnotvia->visit = 1;
- rc = reaches1(b1, b2);
- clrbvisit(fn);
- return rc;
+ excl->visit = 1;
+ return reaches(fn, b, to);
}
-
-/* path from b1 to b2? */
-/* uses b->visit */
-int
-reaches(Fn *fn, Blk *b1, Blk *b2)
-{
- return reachesnotvia(fn, b1, b2, 0);
-}
-
diff --git a/copy.c b/copy.c
index 95c7e00..c76bb79 100644
--- a/copy.c
+++ b/copy.c
@@ -1,9 +1,35 @@
#include "all.h"
-static uint
-u64_wbits(uint64_t v)
+typedef struct Ext Ext;
+
+struct Ext {
+ char zext;
+ char nopw; /* is a no-op if arg width is <= nopw */
+ char usew; /* uses only the low usew bits of arg */
+};
+
+static int
+ext(Ins *i, Ext *e)
{
- uint n;
+ static Ext tbl[] = {
+ /*extsb*/ {0, 7, 8},
+ /*extub*/ {1, 8, 8},
+ /*extsh*/ {0, 15, 16},
+ /*extuh*/ {1, 16, 16},
+ /*extsw*/ {0, 31, 32},
+ /*extuw*/ {1, 32, 32},
+ };
+
+ if (!isext(i->op))
+ return 0;
+ *e = tbl[i->op - Oextsb];
+ return 1;
+}
+
+static int
+bitwidth(uint64_t v)
+{
+ int n;
n = 0;
if (v >> 32) { n += 32; v >>= 32; }
@@ -15,44 +41,29 @@ u64_wbits(uint64_t v)
return n+v;
}
+/* no more than w bits are used */
static int
-EXTSIGNED[] = { /*extsb*/1, /*extub*/0, /*extsh*/1, /*extuh*/0, /*extsw*/1, /*extuw*/0 };
-
-static uint
-EXTMAXW[] = { /*extsb*/7, /*extub*/8, /*extsh*/15, /*extuh*/16, /*extsw*/31, /*extuw*/32 };
-
-static uint
-EXTW[] = { /*extsb*/8, /*extub*/8, /*extsh*/16, /*extuh*/16, /*extsw*/32, /*extuw*/32 };
-
-static uint
-STW[] = { /*storeb*/8, /*storeh*/16, /*storew*/32, /*storel*/64, /*stores*/32, /*stored*/64 };
-
-/* is the ref used only as a narrow value? */
-static int
-usewidthle(Fn *fn, Ref r, uint wbits)
+usewidthle(Fn *fn, Ref r, int w)
{
+ Ext e;
Tmp *t;
Use *u;
Phi *p;
- int b;
Ins *i;
Ref rc;
int64_t v;
+ int b;
- if (isconbits(fn, r, &v))
- if (u64_wbits(v) <= wbits)
- return 1;
- if (rtype(r) != RTmp)
- return 0;
+ assert(rtype(r) == RTmp);
t = &fn->tmp[r.val];
- for (u = t->use; u < &t->use[t->nuse]; u++) {
+ for (u=t->use; u<&t->use[t->nuse]; u++) {
switch (u->type) {
case UPhi:
p = u->u.phi;
if (p->visit)
continue;
p->visit = 1;
- b = usewidthle(fn, p->to, wbits);
+ b = usewidthle(fn, p->to, w);
p->visit = 0;
if (b)
continue;
@@ -61,14 +72,13 @@ usewidthle(Fn *fn, Ref r, uint wbits)
i = u->u.ins;
assert(i != 0);
if (i->op == Ocopy)
- if (usewidthle(fn, i->to, wbits))
+ if (usewidthle(fn, i->to, w))
continue;
- if (isext(i->op)) {
- if (EXTW[i->op - Oextsb] <= wbits)
+ if (ext(i, &e)) {
+ if (e.usew <= w)
+ continue;
+ if (usewidthle(fn, i->to, w))
continue;
- else
- if (usewidthle(fn, i->to, wbits))
- continue;;
}
if (i->op == Oand) {
if (req(r, i->arg[0]))
@@ -77,15 +87,11 @@ usewidthle(Fn *fn, Ref r, uint wbits)
assert(req(r, i->arg[1]));
rc = i->arg[0];
}
- if (isconbits(fn, rc, &v))
- if (u64_wbits(v) <= wbits)
+ if (isconbits(fn, rc, &v)
+ && bitwidth(v) <= w)
continue;
break;
}
- if (isstore(i->op))
- if (req(r, i->arg[1]))
- if (STW[i->op - Ostoreb] > wbits)
- continue;
break;
default:
break;
@@ -95,27 +101,17 @@ usewidthle(Fn *fn, Ref r, uint wbits)
return 1;
}
-static Phi*
-findphi(Fn *fn, uint bid, Ref to)
-{
- Phi *p;
- for (p = fn->rpo[bid]->phi; p; p = p->link)
- if (req(p->to, to))
- break;
- assert(p);
- return p;
-}
-
-static uint
-uint_min(uint v1, uint v2)
+static int
+min(int v1, int v2)
{
return v1 < v2 ? v1 : v2;
}
-/* is the ref def a narrow value? */
+/* is the ref narrower than w bits */
static int
-defwidthle(Fn *fn, Ref r, uint wbits)
+defwidthle(Fn *fn, Ref r, int w)
{
+ Ext e;
Tmp *t;
Phi *p;
Ins *i;
@@ -123,93 +119,99 @@ defwidthle(Fn *fn, Ref r, uint wbits)
int64_t v;
int x;
- if (isconbits(fn, r, &v))
- if (u64_wbits(v) <= wbits)
+ if (isconbits(fn, r, &v)
+ && bitwidth(v) <= w)
return 1;
if (rtype(r) != RTmp)
return 0;
t = &fn->tmp[r.val];
if (t->cls != Kw)
return 0;
- i = t->def;
- if (i == 0) {
+
+ if (!t->def) {
/* phi def */
- p = findphi(fn, t->bid, r);
+ for (p=fn->rpo[t->bid]->phi; p; p=p->link)
+ if (req(p->to, r))
+ break;
+ assert(p);
if (p->visit)
return 1;
p->visit = 1;
- for (n = 0; n < p->narg; n++)
- if (!defwidthle(fn, p->arg[n], wbits)) {
+ for (n=0; n<p->narg; n++)
+ if (!defwidthle(fn, p->arg[n], w)) {
p->visit = 0;
return 0;
}
p->visit = 0;
return 1;
}
- /* ins def */
+
+ i = t->def;
if (i->op == Ocopy)
- return defwidthle(fn, i->arg[0], wbits);
+ return defwidthle(fn, i->arg[0], w);
if (i->op == Oshr || i->op == Osar) {
if (isconbits(fn, i->arg[1], &v))
if (0 < v && v <= 32) {
- if (i->op == Oshr && 32-v <= wbits)
+ if (i->op == Oshr && w+v >= 32)
return 1;
- if (0 <= v && v < 32 && wbits < 32)
- return defwidthle(fn, i->arg[0], uint_min((i->op == Osar ? 31 : 32), wbits+v));
+ if (w < 32) {
+ if (i->op == Osar)
+ w = min(31, w+v);
+ else
+ w = min(32, w+v);
+ }
}
- return defwidthle(fn, i->arg[0], wbits);
+ return defwidthle(fn, i->arg[0], w);
}
if (iscmp(i->op, &x, &x))
- return wbits >= 1;
- if (i->op == Oand)
- return defwidthle(fn, i->arg[0], wbits) || defwidthle(fn, i->arg[1], wbits);
- if (i->op == Oor || i->op == Oxor)
- return defwidthle(fn, i->arg[0], wbits) && defwidthle(fn, i->arg[1], wbits);
- if (isext(i->op)) {
- if (EXTSIGNED[i->op - Oextsb])
- return defwidthle(fn, i->arg[0], uint_min(wbits, EXTMAXW[i->op - Oextsb]));
- if (EXTW[i->op - Oextsb] <= wbits)
+ return w >= 1;
+ if (i->op == Oand) {
+ if (defwidthle(fn, i->arg[0], w)
+ || defwidthle(fn, i->arg[1], w))
return 1;
- return defwidthle(fn, i->arg[0], wbits);
+ return 0;
+ }
+ if (i->op == Oor || i->op == Oxor) {
+ if (defwidthle(fn, i->arg[0], w)
+ && defwidthle(fn, i->arg[1], w))
+ return 1;
+ return 0;
+ }
+ if (ext(i, &e)) {
+ if (e.zext && e.usew <= w)
+ return 1;
+ w = min(w, e.nopw);
+ return defwidthle(fn, i->arg[0], w);
}
- return 0;
-}
-/* is the ref a boolean - 0, 1 - value? */
-int
-iswu1(Fn *fn, Ref r)
-{
- return defwidthle(fn, r, 1);
+ return 0;
}
static int
-isnarrowpar(Fn *fn, Ref r)
+isw1(Fn *fn, Ref r)
{
- Tmp *t;
-
- if (rtype(r) != RTmp)
- return 0;
- t = &fn->tmp[r.val];
- if (t->bid != fn->start->id || t->def == 0)
- return 0;
- return ispar(t->def->op);
+ return defwidthle(fn, r, 1);
}
-/* Insert extub/extuh instructions in start for pars used only narrowly */
-/* needs use; breaks use */
+/* insert early extub/extuh instructions
+ * for pars used only narrowly; this
+ * helps factoring extensions out of
+ * loops
+ *
+ * needs use; breaks use
+ */
void
narrowpars(Fn *fn)
{
Blk *b;
int loop;
- Ins *i, *ins;
+ Ins ext, *i, *ins;
uint npar, nins;
- enum O extop;
Ref r;
/* only useful for functions with loops */
loop = 0;
- for (b = fn->start; b; b = b->link)
+ for (b=fn->start; b; b=b->link)
if (b->loop > 1) {
loop = 1;
break;
@@ -218,49 +220,47 @@ narrowpars(Fn *fn)
return;
b = fn->start;
- npar = 0;
- for (i = b->ins; i < &b->ins[b->nins]; i++) {
+ npar = 0;
+ for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (!ispar(i->op))
break;
npar++;
}
-
if (npar == 0)
return;
nins = b->nins + npar;
- ins = vnew(nins, sizeof ins[0], PFn); //alloc(nins * sizeof ins[0]);
- memcpy(ins, b->ins, npar * sizeof ins[0]);
- memcpy(ins + 2*npar, b->ins + npar, (b->nins - npar) * sizeof ins[0]);
+ ins = vnew(nins, sizeof ins[0], PFn);
+ icpy(ins, b->ins, npar);
+ icpy(ins + 2*npar, b->ins+npar, b->nins-npar);
b->ins = ins;
b->nins = nins;
- for (i = b->ins; i < &b->ins[b->nins]; i++) {
+ for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (!ispar(i->op))
break;
- extop = Onop;
+ ext = (Ins){.op = Onop};
if (i->cls == Kw)
- if (usewidthle(fn, i->to, 16)) {
- if (usewidthle(fn, i->to, 8))
- extop = Oextub;
- else
- extop = Oextuh;
- }
- if (extop == Onop) {
- *(i+npar) = (Ins) {.op = Onop};
- } else {
+ if (usewidthle(fn, i->to, 16)) {
+ ext.op = Oextuh;
+ if (usewidthle(fn, i->to, 8))
+ ext.op = Oextub;
r = newtmp("vw", i->cls, fn);
- *(i+npar) = (Ins) {.op = extop, .cls = i->cls, .to = i->to, .arg = {r}};
+ ext.cls = i->cls;
+ ext.to = i->to;
+ ext.arg[0] = r;
i->to = r;
}
+ *(i+npar) = ext;
}
}
-/* used by GVN */
Ref
copyref(Fn *fn, Blk *b, Ins *i)
{
+ /* which extensions are copies for a given
+ * argument width */
static bits extcpy[] = {
[WFull] = 0,
[Wsb] = BIT(Wsb) | BIT(Wsh) | BIT(Wsw),
@@ -270,63 +270,133 @@ copyref(Fn *fn, Blk *b, Ins *i)
[Wsw] = BIT(Wsw),
[Wuw] = BIT(Wuw),
};
- bits bext;
+ Ext e;
Tmp *t;
int64_t v;
- int is0;
+ int w, z;
if (i->op == Ocopy)
return i->arg[0];
/* op identity value */
- if (optab[i->op].hasid)
- if (KBASE(i->cls) == 0) /* integer only - fp NaN! */
- if (req(i->arg[1], con01[optab[i->op].idval]))
- if (!optab[i->op].cmpeqwl || iswu1(fn, i->arg[0]))
+ if (optab[i->op].hasid
+ && KBASE(i->cls) == 0 /* integer only - fp NaN! */
+ && req(i->arg[1], con01[optab[i->op].idval])
+ && (!optab[i->op].cmpeqwl || isw1(fn, i->arg[0])))
return i->arg[0];
/* idempotent op with identical args */
- if (optab[i->op].idemp)
- if (req(i->arg[0], i->arg[1]))
+ if (optab[i->op].idemp
+ && req(i->arg[0], i->arg[1]))
return i->arg[0];
/* integer cmp with identical args */
- if (optab[i->op].cmpeqwl || optab[i->op].cmplgtewl)
- if (req(i->arg[0], i->arg[1]))
+ if ((optab[i->op].cmpeqwl || optab[i->op].cmplgtewl)
+ && req(i->arg[0], i->arg[1]))
return con01[optab[i->op].eqval];
- /* cmpeq/ne 0 with 0/non-0 inference from dominating jnz */
- if (optab[i->op].cmpeqwl)
- if (req(i->arg[1], con01[0]))
- if (is0non0(fn, b, i->arg[0], argcls(i,0), &is0))
- return con01[optab[i->op].eqval^is0^1];
+ /* cmpeq/ne 0 with 0/non-0 inference */
+ if (optab[i->op].cmpeqwl
+ && req(i->arg[1], CON_Z)
+ && zeroval(fn, b, i->arg[0], argcls(i, 0), &z))
+ return con01[optab[i->op].eqval^z^1];
/* redundant and mask */
- if (i->op == Oand)
- if (isconbits(fn, i->arg[1], &v))
- if (((v+1) & v) == 0) /* v == 2^N-1 */
- if (defwidthle(fn, i->arg[0], u64_wbits(v)))
+ if (i->op == Oand
+ && isconbits(fn, i->arg[1], &v)
+ && (v > 0 && ((v+1) & v) == 0)
+ && defwidthle(fn, i->arg[0], bitwidth(v)))
return i->arg[0];
- if (!isext(i->op) || rtype(i->arg[0]) != RTmp)
- return R;
- if (i->op == Oextsw || i->op == Oextuw)
- if (i->cls == Kw)
+ if (i->cls == Kw
+ && (i->op == Oextsw || i->op == Oextuw))
return i->arg[0];
- t = &fn->tmp[i->arg[0].val];
- assert(KBASE(t->cls) == 0);
- if (i->cls == Kl && t->cls == Kw)
+ if (ext(i, &e) && rtype(i->arg[0]) == RTmp) {
+ t = &fn->tmp[i->arg[0].val];
+ assert(KBASE(t->cls) == 0);
+
+ /* do not break typing by returning
+ * a narrower temp */
+ if (KWIDE(i->cls) > KWIDE(t->cls))
+ return R;
+
+ w = Wsb + (i->op - Oextsb);
+ if (BIT(w) & extcpy[t->width])
+ return i->arg[0];
+
+ /* avoid eliding extensions of params
+ * inserted in the start block; their
+ * point is to make further extensions
+ * redundant */
+ if ((!t->def || !ispar(t->def->op))
+ && usewidthle(fn, i->to, e.usew))
+ return i->arg[0];
+
+ if (defwidthle(fn, i->arg[0], e.nopw))
+ return i->arg[0];
+ }
+
+ return R;
+}
+
+static int
+phieq(Phi *pa, Phi *pb)
+{
+ Ref r;
+ uint n;
+
+ assert(pa->narg == pb->narg);
+ for (n=0; n<pa->narg; n++) {
+ r = phiarg(pb, pa->blk[n]);
+ if (!req(pa->arg[n], r))
+ return 0;
+ }
+ return 1;
+}
+
+Ref
+phicopyref(Fn *fn, Blk *b, Phi *p)
+{
+ Blk *d, **s;
+ Phi *p1;
+ uint n, c;
+
+ /* identical args */
+ for (n=0; n<p->narg-1; n++)
+ if (!req(p->arg[n], p->arg[n+1]))
+ break;
+ if (n == p->narg-1)
+ return p->arg[n];
+
+ /* same as a previous phi */
+ for (p1=b->phi; p1!=p; p1=p1->link) {
+ assert(p1);
+ if (phieq(p1, p))
+ return p1->to;
+ }
+
+ /* can be replaced by a
+ * dominating jnz arg */
+ d = b->idom;
+ if (p->narg != 2
+ || d->jmp.type != Jjnz
+ || !isw1(fn, d->jmp.arg))
return R;
- bext = extcpy[t->width];
- if ((BIT(Wsb + (i->op-Oextsb)) & bext) != 0)
- return i->arg[0];
- if (!isnarrowpar(fn, i->arg[0]))
- if (usewidthle(fn, i->to, EXTW[i->op - Oextsb]))
- return i->arg[0];
- if (defwidthle(fn, i->arg[0], EXTMAXW[i->op - Oextsb]))
- return i->arg[0];
+ s = (Blk*[]){0, 0};
+ for (n=0; n<2; n++)
+ for (c=0; c<2; c++)
+ if (req(p->arg[n], con01[c]))
+ s[c] = p->blk[n];
+
+ /* if s1 ends with a jnz on either b
+ * or s2; the inference below is wrong
+ * without the jump type checks */
+ if (d->s1 == s[1] && d->s2 == s[0]
+ && d->s1->jmp.type == Jjmp
+ && d->s2->jmp.type == Jjmp)
+ return d->jmp.arg;
return R;
}
diff --git a/fold.c b/fold.c
index 87e5824..7d0b04c 100644
--- a/fold.c
+++ b/fold.c
@@ -13,7 +13,7 @@ iscon(Con *c, int w, uint64_t k)
return (uint32_t)c->bits.i == (uint32_t)k;
}
-static int
+int
foldint(Con *res, int op, int w, Con *cl, Con *cr)
{
union {
diff --git a/gcm.c b/gcm.c
index 23e475b..e685d2a 100644
--- a/gcm.c
+++ b/gcm.c
@@ -2,36 +2,35 @@
#define NOBID (-1u)
-/* ins can trap at runtime */
static int
-istrapping(Fn *fn, Ins *i)
+isdivwl(Ins *i)
{
- int64_t v;
-
- if (KBASE(i->cls) == 0)
- if (INRANGE(i->op, Odiv, Ourem))
- if (!isconbits(fn, i->arg[1], &v) || v == 0)
- return 1;
- return 0;
+ switch (i->op) {
+ case Odiv:
+ case Orem:
+ case Oudiv:
+ case Ourem:
+ return KBASE(i->cls) == 0;
+ default:
+ return 0;
+ }
}
-/* fixed ins that can be eliminated if unused */
-static int
-canelim(Fn *fn, Ins *i)
+int
+pinned(Ins *i)
{
- return isload(i->op) || isalloc(i->op) || istrapping(fn, i);
+ return optab[i->op].pinned || isdivwl(i);
}
-/* ins must stay in def blk */
-int
-isfixed(Fn *fn, Ins *i)
+/* pinned ins that can be eliminated if unused */
+static int
+canelim(Ins *i)
{
- return optab[i->op].ispinned || istrapping(fn, i);
+ return isload(i->op) || isalloc(i->op) || isdivwl(i);
}
static uint earlyins(Fn *, Blk *, Ins *);
-/* return earlybid of ref def ins */
static uint
schedearly(Fn *fn, Ref r)
{
@@ -39,43 +38,39 @@ schedearly(Fn *fn, Ref r)
Blk *b;
if (rtype(r) != RTmp)
- return 0 /* root/start */;
+ return 0;
t = &fn->tmp[r.val];
if (t->gcmbid != NOBID)
- return t->gcmbid; /* already visited/visiting */
+ return t->gcmbid;
b = fn->rpo[t->bid];
if (t->def) {
- /* def is an ins */
assert(b->ins <= t->def && t->def < &b->ins[b->nins]);
- t->gcmbid = 0; /* mark visiting root/start blk */
- t->gcmbid = earlyins(fn, b, t->def); /* schedule ins input defs */
+ t->gcmbid = 0; /* mark as visiting */
+ t->gcmbid = earlyins(fn, b, t->def);
} else {
- /* def is a phi - it stays in its def blk */
+ /* phis do not move */
t->gcmbid = t->bid;
}
return t->gcmbid;
}
-/* return deepest domdpth bid of arg defs */
static uint
-earlyins(Fn *fn, Blk *b, Ins* i)
+earlyins(Fn *fn, Blk *b, Ins *i)
{
- uint earlybid, arg1earlybid;
-
- earlybid = schedearly(fn, i->arg[0]);
- assert(earlybid != NOBID);
- arg1earlybid = schedearly(fn, i->arg[1]);
- assert(arg1earlybid != NOBID);
- if (fn->rpo[earlybid]->domdpth < fn->rpo[arg1earlybid]->domdpth) {
- assert(dom(fn->rpo[earlybid], fn->rpo[arg1earlybid]));
- earlybid = arg1earlybid;
+ uint b0, b1;
+
+ b0 = schedearly(fn, i->arg[0]);
+ assert(b0 != NOBID);
+ b1 = schedearly(fn, i->arg[1]);
+ assert(b1 != NOBID);
+ if (fn->rpo[b0]->depth < fn->rpo[b1]->depth) {
+ assert(dom(fn->rpo[b0], fn->rpo[b1]));
+ b0 = b1;
}
-
- /* fixed ins remain in their defining blk */
- return isfixed(fn, i) ? b->id : earlybid;
+ return pinned(i) ? b->id : b0;
}
static void
@@ -87,12 +82,14 @@ earlyblk(Fn *fn, uint bid)
uint n;
b = fn->rpo[bid];
- for (p = b->phi; p; p = p->link)
- for (n = 0; n < p->narg; n++)
+ for (p=b->phi; p; p=p->link)
+ for (n=0; n<p->narg; n++)
schedearly(fn, p->arg[n]);
- for (i = b->ins; i < &b->ins[b->nins]; i++)
- if (isfixed(fn, i))
- earlyins(fn, b, i);
+ for (i=b->ins; i<&b->ins[b->nins]; i++)
+ if (pinned(i)) {
+ schedearly(fn, i->arg[0]);
+ schedearly(fn, i->arg[1]);
+ }
schedearly(fn, b->jmp.arg);
}
@@ -154,16 +151,17 @@ schedlate(Fn *fn, Ref r)
t = &fn->tmp[r.val];
if (t->visit)
- return t->gcmbid; /* already visited/visiting */
+ return t->gcmbid;
- t->visit = 1; /* mark visiting/visited */
+ t->visit = 1;
earlybid = t->gcmbid;
if (earlybid == NOBID)
return NOBID; /* not used */
- t->gcmbid = t->bid; /* t->gcmbid is now latebid */
+ /* reuse gcmbid for late bid */
+ t->gcmbid = t->bid;
latebid = NOBID;
- for (u = t->use; u < &t->use[t->nuse]; u++) {
+ for (u=t->use; u<&t->use[t->nuse]; u++) {
assert(u->bid < fn->nblk);
b = fn->rpo[u->bid];
switch (u->type) {
@@ -182,39 +180,37 @@ schedlate(Fn *fn, Ref r)
}
latebid = lcabid(fn, latebid, uselatebid);
}
+ /* latebid may be NOBID if the temp is used
+ * in fixed instructions that may be eliminated
+ * and are themselves unused transitively */
- /* phis stay in their def blk */
- if (t->def) {
- /* allow elimination of unused load/alloc/trapping ins */
- if (latebid == NOBID && canelim(fn, t->def))
- t->gcmbid = NOBID;
- /* ... otherwise fixed ins stay in defining blk */
- else if(!isfixed(fn, t->def))
- t->gcmbid = bestbid(fn, earlybid, latebid);
- }
+ if (t->def && !pinned(t->def))
+ t->gcmbid = bestbid(fn, earlybid, latebid);
+ /* else, keep the early one */
+ /* now, gcmbid is the best bid */
return t->gcmbid;
}
-/* return lca bid of uses, or NOBID if no active uses */
+/* returns lca bid of uses or NOBID if
+ * the definition can be eliminated */
static uint
lateins(Fn *fn, Blk *b, Ins *i, Ref r)
{
uint latebid;
- assert(i->op == Onop || req(i->arg[0], r) || req(i->arg[1], r));
- if (i->op == Onop)
- return NOBID; /* already eliminated */
-
assert(b->ins <= i && i < &b->ins[b->nins]);
+ assert(req(i->arg[0], r) || req(i->arg[1], r));
latebid = schedlate(fn, i->to);
- /* allow elimination of unused load/alloc/trapping ins */
- if (latebid == NOBID)
- if (canelim(fn, i))
- return NOBID;
- /* ... otherwise fixed ins stay in defining blk */
- return isfixed(fn, i) ? b->id : latebid;
+ if (pinned(i)) {
+ if (latebid == NOBID)
+ if (canelim(i))
+ return NOBID;
+ return b->id;
+ }
+
+ return latebid;
}
static uint
@@ -254,17 +250,16 @@ lateblk(Fn *fn, uint bid)
Ins *i;
b = fn->rpo[bid];
- for (pp = &b->phi; *(pp);)
+ for (pp=&b->phi; *(pp);)
if (schedlate(fn, (*pp)->to) == NOBID) {
- /* unused */
(*pp)->narg = 0; /* mark unused */
*pp = (*pp)->link; /* remove phi */
} else
pp = &(*pp)->link;
- for (i = b->ins; i < &b->ins[b->nins]; i++)
- if (isfixed(fn, i))
- lateins(fn, b, i, i->arg[0]);
+ for (i=b->ins; i<&b->ins[b->nins]; i++)
+ if (pinned(i))
+ schedlate(fn, i->to);
}
static void
@@ -274,7 +269,7 @@ addgcmins(Fn *fn, Ins *vins, uint nins)
Tmp *t;
Blk *b;
- for (i = vins; i < &vins[nins]; i++) {
+ for (i=vins; i<&vins[nins]; i++) {
assert(rtype(i->to) == RTmp);
t = &fn->tmp[i->to.val];
b = fn->rpo[t->gcmbid];
@@ -282,9 +277,10 @@ addgcmins(Fn *fn, Ins *vins, uint nins)
}
}
-/* remove unused instructions */
-/* move instructions to (end of) target block */
-/* use-before-def is fixed up afterwards */
+/* move live instructions to the
+ * end of their target block; use-
+ * before-def errors are fixed by
+ * schedblk */
static void
gcmmove(Fn *fn)
{
@@ -295,13 +291,13 @@ gcmmove(Fn *fn)
nins = 0;
vins = vnew(nins, sizeof vins[0], PFn);
- for (t=&fn->tmp[Tmp0]; t < &fn->tmp[fn->ntmp]; t++) {
+ for (t=fn->tmp; t<&fn->tmp[fn->ntmp]; t++) {
if (t->def == 0)
continue;
if (t->bid == t->gcmbid)
continue;
i = t->def;
- if (isfixed(fn, i) && !canelim(fn, i))
+ if (pinned(i) && !canelim(i))
continue;
assert(rtype(i->to) == RTmp);
assert(t == &fn->tmp[i->to.val]);
@@ -309,176 +305,153 @@ gcmmove(Fn *fn)
addins(&vins, &nins, i);
*i = (Ins){.op = Onop};
}
-
addgcmins(fn, vins, nins);
}
-static void
-schedins(Fn *fn, Blk *b, Ins *i0, Ins **pvins, uint *pnins)
+/* dfs ordering */
+static Ins *
+schedins(Fn *fn, Blk *b, Ins *i, Ins **pvins, uint *pnins)
{
- Ins *i, *i1;
+ Ins *i0, *i1;
Tmp *t;
- uint na;
+ uint n;
- if (i0->op == Onop)
- return;
- /* arg...call have to stay together */
- /* TODO - sel0...sel1 too */
- for (i1 = i0; isarg(i1->op); i1++) {}
- for (i = i0; i <= i1; i++)
- for (na = 0; na < 2; na++) {
- if (rtype(i->arg[na]) != RTmp)
- continue;
- t = &fn->tmp[i->arg[na].val];
- /* def in different blk, or phi in this blk */
- if (t->bid != b->id || t->def == 0)
+ igroup(b, i, &i0, &i1);
+ for (i=i0; i<i1; i++)
+ for (n=0; n<2; n++) {
+ if (rtype(i->arg[n]) != RTmp)
continue;
- /* already scheduled */
- if (t->def->op == Onop) {
+ t = &fn->tmp[i->arg[n].val];
+ if (t->bid != b->id || !t->def)
continue;
- }
schedins(fn, b, t->def, pvins, pnins);
}
- for (i = i0; i <= i1; i++) {
+ for (i=i0; i<i1; i++) {
addins(pvins, pnins, i);
*i = (Ins){.op = Onop};
}
+ return i1;
}
+/* order ins within a block */
static void
-fixbub4d(Fn *fn, Blk *b, Ins **pvins)
-{
- Ins *i;
- uint nins;
-
- nins = 0;
- for (i = b->ins; i < &b->ins[b->nins]; i++)
- schedins(fn, b, i, pvins, &nins);
- idup(b, *pvins, nins);
-}
-
-static void
-fixub4d(Fn *fn)
+schedblk(Fn *fn)
{
Blk *b;
- Ins *vins; // TODO insb
+ Ins *i, *vins;
+ uint nins;
- vins = vnew(0, sizeof vins[0], PFn);
- for (b = fn->start; b; b = b->link)
- fixbub4d(fn, b, &vins);
+ vins = vnew(0, sizeof vins[0], PHeap);
+ for (b=fn->start; b; b=b->link) {
+ nins = 0;
+ for (i=b->ins; i<&b->ins[b->nins];)
+ i = schedins(fn, b, i, &vins, &nins);
+ idup(b, vins, nins);
+ }
+ vfree(vins);
}
static int
-iskladdcon(Fn *fn, Ins *i, int64_t *v)
+cheap(Ins *i)
{
- if (i->op == Oadd)
- if (i->cls == Kl)
- if (rtype(i->arg[0]) == RTmp)
- if (isconbits(fn, i->arg[1], v))
- return 1;
- return 0;
-}
-
-static void
-ireassoc(Fn *fn, Blk *b, Ins *i, Ins **pvins, uint *pnins)
-{
- Blk *b2;
- Tmp *t, *t2;
- Use *u;
- Ref r2;
- int64_t v;
int x;
- assert(b->ins <= i && i < &b->ins[b->nins]);
- if (!iscmp(i->op, &x, &x))
- if (!iskladdcon(fn, i, &v))
- return;
-
- assert(rtype(i->to) == RTmp);
- t = &fn->tmp[i->to.val];
- for (u = t->use; u < &t->use[t->nuse]; u++) {
- if (u->type == UPhi)
- continue;
- b2 = fn->rpo[u->bid];
- addins(pvins, pnins, i);
- /* careful, can move fn->tmp */
- r2 = newtmp("rea", t->cls, fn);
- t = &fn->tmp[i->to.val];
- t2 = &fn->tmp[r2.val];
- t2->gcmbid = u->bid;
- (*pvins)[(*pnins)-1].to = r2;
- if (u->type == UIns) {
- assert(req(u->u.ins->arg[0], i->to)
- || req(u->u.ins->arg[1], i->to));
- if (req(u->u.ins->arg[0], i->to))
- u->u.ins->arg[0] = r2;
- if (req(u->u.ins->arg[1], i->to))
- u->u.ins->arg[1] = r2;
- } else {
- assert(u->type == UJmp);
- assert(req(b2->jmp.arg, i->to));
- b2->jmp.arg = r2;
- }
+ if (KBASE(i->cls) != 0)
+ return 0;
+ switch (i->op) {
+ case Oneg:
+ case Oadd:
+ case Osub:
+ case Omul:
+ case Oand:
+ case Oor:
+ case Oxor:
+ case Osar:
+ case Oshr:
+ case Oshl:
+ return 1;
+ default:
+ return iscmp(i->op, &x, &x);
}
}
-/* Redistribute trivial ops to point of use. */
-/* Reduces register pressure. */
-/* needs rpo, use; breaks use */
-void
-reassoc(Fn *fn)
+static void
+sinkref(Fn *fn, Blk *b, Ref *pr)
{
- Blk *b;
- Ins *vins, *i;
- uint nins;
-
- nins = 0;
- vins = vnew(nins, sizeof vins[0], PFn);
+ Ins i;
+ Tmp *t;
+ Ref r;
- /* identify trivial ins */
- for (b = fn->start; b; b = b->link) {
- for (i = b->ins; i < &b->ins[b->nins]; i++)
- ireassoc(fn, b, i, &vins, &nins);
- }
+ if (rtype(*pr) != RTmp)
+ return;
+ t = &fn->tmp[pr->val];
+ if (!t->def
+ || t->bid == b->id
+ || pinned(t->def)
+ || !cheap(t->def))
+ return;
- addgcmins(fn, vins, nins);
+ /* sink t->def to b */
+ i = *t->def;
+ r = newtmp("snk", t->cls, fn);
+ t = 0; /* invalidated */
+ *pr = r;
+ i.to = r;
+ fn->tmp[r.val].gcmbid = b->id;
+ emiti(i);
+ sinkref(fn, b, &i.arg[0]);
+ sinkref(fn, b, &i.arg[1]);
}
+/* redistribute trivial ops to point of
+ * use to reduce register pressure
+ * requires rpo, use; breaks use
+ */
static void
-cleartmps(Fn *fn)
+sink(Fn *fn)
{
- Tmp *t;
+ Blk *b;
+ Ins *i;
- for (t=&fn->tmp[Tmp0]; t < &fn->tmp[fn->ntmp]; t++) {
- t->visit = 0;
- t->gcmbid = NOBID;
+ for (b=fn->start; b; b=b->link) {
+ for (i=b->ins; i<&b->ins[b->nins]; i++)
+ if (isload(i->op))
+ sinkref(fn, b, &i->arg[0]);
+ else if (isstore(i->op))
+ sinkref(fn, b, &i->arg[1]);
+ sinkref(fn, b, &b->jmp.arg);
}
+ addgcmins(fn, curi, &insb[NIns] - curi);
}
-/* https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf */
-/* require use dom */
-/* maintains rpo pred dom */
-/* breaks use */
+/* requires use dom
+ * maintains rpo pred dom
+ * breaks use
+ */
void
gcm(Fn *fn)
{
+ Tmp *t;
uint bid;
- filldomdpth(fn);
+ filldepth(fn);
fillloop(fn);
- cleartmps(fn);
+ for (t=fn->tmp; t<&fn->tmp[fn->ntmp]; t++) {
+ t->visit = 0;
+ t->gcmbid = NOBID;
+ }
for (bid=0; bid<fn->nblk; bid++)
earlyblk(fn, bid);
for (bid=0; bid<fn->nblk; bid++)
lateblk(fn, bid);
gcmmove(fn);
- cleartmps(fn); /* filluse() uses visit */
filluse(fn);
- reassoc(fn);
+ curi = &insb[NIns];
+ sink(fn);
filluse(fn);
- fixub4d(fn);
+ schedblk(fn);
if (debug['G']) {
fprintf(stderr, "\n> After GCM:\n");
diff --git a/gvn.c b/gvn.c
index 20c8edd..1db5bbc 100644
--- a/gvn.c
+++ b/gvn.c
@@ -1,6 +1,5 @@
#include "all.h"
-/* literal constants 0, 1 */
Ref con01[2];
static inline uint
@@ -18,121 +17,91 @@ rhash(Ref r)
static uint
ihash(Ins *i)
{
- uint a0h, a1h, ah, h;
+ uint h;
- a0h = rhash(i->arg[0]);
- a1h = rhash(i->arg[1]);
- ah = mix(a0h, a1h);
- h = mix(i->cls, i->op);
- h = mix(h, ah);
+ h = mix(i->op, i->cls);
+ h = mix(h, rhash(i->arg[0]));
+ h = mix(h, rhash(i->arg[1]));
return h;
-
}
static int
ieq(Ins *ia, Ins *ib)
{
- if (ia->cls == ib->cls)
if (ia->op == ib->op)
+ if (ia->cls == ib->cls)
if (req(ia->arg[0], ib->arg[0]))
if (req(ia->arg[1], ib->arg[1]))
return 1;
return 0;
}
-static Ins** gvntbl;
+static Ins **gvntbl;
static uint gvntbln;
-static uint lookupn;
-static uint proben;
-static uint maxproben;
static Ins *
-gvndup(Ins *i, int insert) {
+gvndup(Ins *i, int insert)
+{
uint idx, n;
- Ins *i2;
-
- lookupn++;
+ Ins *ii;
idx = ihash(i) % gvntbln;
- for (n = 1;; n++) {
- proben++;
- if (n > maxproben)
- maxproben = n;
- i2 = gvntbl[idx];
- if (!i2)
+ for (n=1;; n++) {
+ ii = gvntbl[idx];
+ if (!ii)
break;
- if (ieq(i, i2))
- return i2;
+ if (ieq(i, ii))
+ return ii;
idx++;
if (gvntbln <= idx)
idx = 0;
}
- /* not found */
- if (insert) {
+ if (insert)
gvntbl[idx] = i;
- }
return 0;
}
static void
-replaceref(Ref *r, Ref r1, Ref r2)
-{
- if (req(*r, r1))
- *r = r2;
-}
-
-static void
-replacepuse(Phi *p, Ref r1, Ref r2)
-{
- Ref *a;
-
- for (a = p->arg; a < &p->arg[p->narg]; a++)
- replaceref(a, r1, r2);
-}
-
-static void
-replaceiuse(Ins *i, Ref r1, Ref r2)
-{
- replaceref(&i->arg[0], r1, r2);
- replaceref(&i->arg[1], r1, r2);
-}
-
-static void
-replacejuse(Blk* b, Ref r1, Ref r2)
-{
- replaceref(&b->jmp.arg, r1, r2);
-}
-
-static void
-replaceuse(Fn *fn, Use* u, Ref r1, Ref r2)
+replaceuse(Fn *fn, Use *u, Ref r1, Ref r2)
{
- Tmp *t2;
Blk *b;
+ Ins *i;
+ Phi *p;
+ Ref *pr;
+ Tmp *t2;
+ int n;
- t2 = rtype(r2) == RTmp ? &fn->tmp[r2.val] : 0;
+ t2 = 0;
+ if (rtype(r2) == RTmp)
+ t2 = &fn->tmp[r2.val];
b = fn->rpo[u->bid];
-
switch (u->type) {
- case UXXX:
- die("unreachable");
- break;
case UPhi:
- replacepuse(u->u.phi, r1, r2);
+ p = u->u.phi;
+ for (pr=p->arg; pr<&p->arg[p->narg]; pr++)
+ if (req(*pr, r1))
+ *pr = r2;
if (t2)
- adduse(t2, UPhi, b, u->u.phi);
+ adduse(t2, UPhi, b, p);
break;
case UIns:
- replaceiuse(u->u.ins, r1, r2);
+ i = u->u.ins;
+ for (n=0; n<2; n++)
+ if (req(i->arg[n], r1))
+ i->arg[n] = r2;
if (t2)
- adduse(t2, UIns, b, u->u.ins);
+ adduse(t2, UIns, b, i);
break;
case UJmp:
- replacejuse(fn->rpo[u->bid], r1, r2);
+ if (req(b->jmp.arg, r1))
+ b->jmp.arg = r2;
if (t2)
adduse(t2, UJmp, b);
break;
+ case UXXX:
+ die("unreachable");
}
}
@@ -144,216 +113,33 @@ replaceuses(Fn *fn, Ref r1, Ref r2)
assert(rtype(r1) == RTmp);
t1 = &fn->tmp[r1.val];
-
- for (u = t1->use; u < &t1->use[t1->nuse]; u++)
+ for (u=t1->use; u<&t1->use[t1->nuse]; u++)
replaceuse(fn, u, r1, r2);
-
t1->nuse = 0;
}
static void
-rmuse(Fn *fn, Blk *b, uint ty, Ins *i, Phi *p, Ref r, int strict)
-{
- Tmp *t;
- Use *u;
- int found, rm;
-
- if (rtype(r) != RTmp)
- return;
- found = 0;
- t = &fn->tmp[r.val];
- for (u = t->use; u < &t->use[t->nuse];) {
- rm = 0;
- if (u->type == ty) {
- switch (ty) {
- case UXXX:
- die("unreachable");
- break;
- case UIns:
- assert(p == 0);
- assert(i);
- rm = u->u.ins == i;
- break;
- case UPhi:
- assert(i == 0);
- assert(p);
- rm = u->u.phi == p;
- break;
- case UJmp:
- assert(i == 0);
- assert(p == 0);
- rm = u->bid == b->id;
- break;
- default:
- die("unreachable");
- break;
- }
- }
- if (rm) {
- found++;
- assert(u < &t->use[t->nuse]);
- assert(u->bid == b->id);
- /* careful - deleting below iterator */
- memcpy(u, u+1, ((t->nuse) - ((u+1)-t->use)) * sizeof u[0]);
- t->nuse--;
- } else
- u++;
- }
- if (strict)
- assert(found);
-}
-
-static int
-phieq(Phi *pa, Phi *pb)
-{
- uint n;
-
- assert(pa->narg == pb->narg);
- for (n=0; n<pa->narg; n++) {
- if (!req(pa->arg[n], phiarg(pb, pa->blk[n])))
- return 0;
- }
- return 1;
-}
-
-static void
-killphi(Fn *fn, Blk *b, Phi **pp, Ref r)
-{
- Phi *p;
- uint n;
-
- p = *pp;
- replaceuses(fn, p->to, r);
- assert(b->npred == p->narg);
- for (n = 0; n < p->narg; n++)
- rmuse(fn, b, UPhi, 0, p, p->arg[n], 0/*!strict TODO dups*/);
- p->narg = 0; /* mark as unused - TODO should not be necessary with rmuse*/
- *pp = p->link;
-}
-
-static void
-ifphiopt(Fn *fn, Blk *b)
+dedupphi(Fn *fn, Blk *b)
{
- Blk *ifb, *thenb, *elseb, *tmp;
- Phi *p, **pp;
- Tmp *t;
- Ref argt, argf, r1;
- int k, x, caninv;
-
- /* is b the join blk of an if[-then][-else] graphlet? */
- if (!ifjoin(b, &ifb, &thenb, &elseb))
- return;
-
- assert(ifb->jmp.type == Jjnz);
- assert(ifb->s1 == b || (ifb->s1 == thenb && thenb->s1 == b));
- assert(ifb->s2 == b || (ifb->s2 == elseb && elseb->s1 == b));
- if (!iswu1(fn, ifb->jmp.arg))
- return; /* no opportunity */
-
- caninv = 1;
- for (p = b->phi; p; p = p->link)
- if (req(phiarg(p, elseb), con01[0])) {
- caninv = 0;
- break;
- }
-
- /* phi bool 1/0 "copy" */
- for (pp = &b->phi; *pp;) {
- p = *pp;
- assert(p->narg == 2);
- argt = phiarg(p, thenb);
- argf = phiarg(p, elseb);
- /* jnz jmp.arg in phi is 1/0 */
- if (req(argt, ifb->jmp.arg)) {
- if (!req(argf, ifb->jmp.arg))
- rmuse(fn, b, UPhi, 0, p, argt, 1/*strict*/);
- argt = p->arg[phiargn(p, thenb)] = con01[1];
- }
- if (req(argf, ifb->jmp.arg)) {
- rmuse(fn, b, UPhi, 0, p, argf, 1/*strict*/);
- argf = p->arg[phiargn(p, elseb)] = con01[0];
- }
- /* prefer 0 as argf and/or 1 as argt, so try to invert the cmp */
- if (caninv &&
- ((req(argt, con01[0]) && !req(argf, con01[0])) ||
- (req(argf, con01[1]) && !req(argt, con01[1])))) {
- assert(rtype(ifb->jmp.arg) == RTmp);
- t = &fn->tmp[ifb->jmp.arg.val];
- if (t->nuse == 1)
- if (t->def && iscmp(t->def->op, &k, &x) && KBASE(k) == 0) {
- assert(t->use[0].type == UJmp);
- assert(t->bid == ifb->id);
- t->def->op = invcmpwl(t->def->op);
- tmp = ifb->s1;
- ifb->s1 = ifb->s2;
- ifb->s2 = tmp;
- r1 = argt; argt = argf; argf = r1;
- caninv = 0; /* only once */
- }
- }
- if (req(argt, con01[1]) && req(argf, con01[0])) {
- killphi(fn, b, pp, ifb->jmp.arg);
- caninv = 0; /* used already */
- continue;
- }
- pp = &(*pp)->link;
- }
-}
-
-/* phi "copy" - singleton or all args identical */
-static void
-copyphielim(Fn *fn, Blk *b) {
Phi *p, **pp;
- uint n;
-
- for (pp = &b->phi; *pp;) {
- p = *pp;
- assert(p->narg == b->npred);
- for (n = 0; n < p->narg-1; n++) {
- if (!req(p->arg[n], p->arg[n+1]))
- goto Skip;
- }
- killphi(fn, b, pp, p->arg[0]);
- continue;
- Skip:;
- pp = &(*pp)->link;
- }
-}
+ Ref r;
-/* redundant phis */
-static void
-dupphielim(Fn *fn, Blk *b) {
- Phi *p, *p2, **pp;
-
- /* O(N^2.M) but meh */
- for (pp = &b->phi; *pp;) {
- p = *pp;
- assert(p->narg == b->npred);
- for (p2 = p->link; p2; p2 = p2->link) {
- assert(p2->narg == b->npred);
- if (phieq(p, p2)) {
- killphi(fn, b, pp, p2->to);
- goto Skip;
- }
- }
- pp = &(*pp)->link;
- Skip:;
+ for (pp=&b->phi; (p=*pp);) {
+ r = phicopyref(fn, b, p);
+ if (!req(r, R)) {
+ replaceuses(fn, p->to, r);
+ p->to = R;
+ *pp = p->link;
+ } else
+ pp = &p->link;
}
- return;
-}
-
-static void
-dedupphis(Fn *fn, Blk *b) {
- ifphiopt(fn, b);
- copyphielim(fn,b);
- dupphielim(fn, b);
}
static int
rcmp(Ref a, Ref b)
{
if (rtype(a) != rtype(b))
- return rtype(a)-rtype(b);
+ return rtype(a) - rtype(b);
return a.val - b.val;
}
@@ -364,14 +150,17 @@ normins(Fn *fn, Ins *i)
int64_t v;
Ref r;
- /* truncate constant bits to 32 bits for "s"/w" uses */
- for (n = 0; n < 2; n++) {
+ /* truncate constant bits to
+ * 32 bits for s/w uses */
+ for (n=0; n<2; n++) {
if (!KWIDE(argcls(i, n)))
if (isconbits(fn, i->arg[n], &v))
if ((v & 0xffffffff) != v)
i->arg[n] = getcon(v & 0xffffffff, fn);
}
- /* order arg[0] <= arg[1] for commutative ops, preferring RTmp in arg[0] */
+ /* order arg[0] <= arg[1] for
+ * commutative ops, preferring
+ * RTmp in arg[0] */
if (optab[i->op].commutes)
if (rcmp(i->arg[0], i->arg[1]) > 0) {
r = i->arg[1];
@@ -380,140 +169,106 @@ normins(Fn *fn, Ins *i)
}
}
-static Ref
-negcon(Fn *fn, int cls, Ref r)
+static int
+negcon(int cls, Con *c)
{
- int64_t v, v1;
-
- assert(isconbits(fn, r, &v));
- assert(KBASE(cls) == 0);
- v1 = -v;
- if (cls == Kw)
- v1 = ((int64_t)(-(int32_t)v)) & 0xffffffff;
- return getcon(v1, fn);
+ static Con z = {.type = CBits, .bits.i = 0};
+
+ return foldint(c, Osub, cls, &z, c);
}
static void
assoccon(Fn *fn, Blk *b, Ins *i1)
{
Tmp *t2;
- Ins *i2, i;
- Ref r0, r1, rc;
- int op1, op2;
- int64_t v1, v2, vc;
-
- op1 = i1->op;
- if (op1 == Osub)
- op1 = Oadd;
- if (!optab[op1].assoc || KBASE(i1->cls) != 0 || rtype(i1->arg[0]) != RTmp
- || !isconbits(fn, i1->arg[1], &v1) || (optab[op1].hasid && v1 == optab[op1].idval))
+ Ins *i2;
+ int op, fail;
+ Con c, c1, c2;
+
+ op = i1->op;
+ if (op == Osub)
+ op = Oadd;
+
+ if (!optab[op].assoc
+ || KBASE(i1->cls) != 0
+ || rtype(i1->arg[0]) != RTmp
+ || rtype(i1->arg[1]) != RCon)
return;
+ c1 = fn->con[i1->arg[1].val];
+
t2 = &fn->tmp[i1->arg[0].val];
if (t2->def == 0)
return;
i2 = t2->def;
- op2 = i2->op;
- if (op2 == Osub)
- op2 = Oadd;
- if (op1 != op2 || rtype(i2->arg[0]) != RTmp || !isconbits(fn, i2->arg[1], &v2))
+
+ if (op != (i2->op == Osub ? Oadd : i2->op)
+ || rtype(i2->arg[1]) != RCon)
return;
+ c2 = fn->con[i2->arg[1].val];
+
assert(KBASE(i2->cls) == 0);
- assert(i1->cls == Kl || i2->cls == Kw);
- r0 = i1->arg[1];
- if (i1->op == Osub)
- r0 = negcon(fn, i1->cls, r0);
- r1 = i2->arg[1];
- if (i2->op == Osub)
- r1 = negcon(fn, i2->cls, r1);
- i = (Ins){.to = i2->to, .op = op2, .cls = i2->cls, .arg = {r0, r1}};
- rc = foldref(fn, &i);
- assert(isconbits(fn, rc, &vc));
- if (op1 == Oadd) {
- if (i2->cls == Kw) {
- if ((int32_t)vc < 0) {
- op1 = Osub;
- rc = negcon(fn, Kw, rc);
- }
- } else {
- assert(i2->cls == Kl);
- if (vc < 0) {
- op1 = Osub;
- rc = negcon(fn, Kl, rc);
- }
- }
+ assert(KWIDE(i2->cls) >= KWIDE(i1->cls));
+
+ if (i1->op == Osub && negcon(i1->cls, &c1))
+ return;
+ if (i2->op == Osub && negcon(i2->cls, &c2))
+ return;
+ if (foldint(&c, op, i1->cls, &c1, &c2))
+ return;
+
+ if (op == Oadd && c.type == CBits)
+ if ((i1->cls == Kl && c.bits.i < 0)
+ || (i1->cls == Kw && (int32_t)c.bits.i < 0)) {
+ fail = negcon(i1->cls, &c);
+ assert(fail == 0);
+ op = Osub;
}
- i1->op = op1;
- rmuse(fn, b, UIns, i1, 0, i1->arg[0], 1/*strict*/);
+
+ i1->op = op;
i1->arg[0] = i2->arg[0];
+ i1->arg[1] = newcon(&c, fn);
adduse(&fn->tmp[i1->arg[0].val], UIns, b, i1);
- i1->arg[1] = rc;
}
static void
-killins(Fn *fn, Blk *b, Ins *i, Ref r1, Ref r2)
+killins(Fn *fn, Ins *i, Ref r)
{
- replaceuses(fn, r1, r2);
- rmuse(fn, b, UIns, i, 0, i->arg[0], 1/*strict*/);
- if (!req(i->arg[0], i->arg[1]))
- rmuse(fn, b, UIns, i, 0, i->arg[1], 1/*strict*/);
+ replaceuses(fn, i->to, r);
*i = (Ins){.op = Onop};
}
static void
-dedupi(Fn *fn, Blk *b, Ins *i)
+dedupins(Fn *fn, Blk *b, Ins *i)
{
- Ref r2;
- Ins *i2;
-
- if (i->op == Onop || i->op == Odbgloc)
- return;
+ Ref r;
+ Ins *i1;
normins(fn, i);
-
- if (optab[i->op].ispinned)
+ if (i->op == Onop || pinned(i))
return;
- assert(rtype(i->to) == RTmp);
- /* merge associative ops with constant arg[1] */
+ assert(!req(i->to, R));
assoccon(fn, b, i);
- /* effective copy? */
- r2 = copyref(fn, b, i);
- if (!req(r2, R)) {
- killins(fn, b, i, i->to, r2);
+ r = copyref(fn, b, i);
+ if (!req(r, R)) {
+ killins(fn, i, r);
return;
}
-
- /* effective constant? */
- r2 = foldref(fn, i);
- if (!req(r2, R)) {
- killins(fn, b, i, i->to, r2);
+ r = foldref(fn, i);
+ if (!req(r, R)) {
+ killins(fn, i, r);
return;
}
-
- /* do not dedup (trapping) ins that GCM will not move */
- if (isfixed(fn, i))
- return;
-
- /* duplicate? */
- i2 = gvndup(i, 1);
- if (i2) {
- killins(fn, b, i, i->to, i2->to);
+ i1 = gvndup(i, 1);
+ if (i1) {
+ killins(fn, i, i1->to);
return;
}
}
-static void
-dedupins(Fn *fn, Blk *b)
-{
- Ins *i;
-
- for (i = b->ins; i < &b->ins[b->nins]; i++)
- dedupi(fn, b, i);
-}
-
int
-cmpeq0def(Fn *fn, Ref r, Ref *arg0, int *cls, int *eqval)
+cmpeqz(Fn *fn, Ref r, Ref *arg, int *cls, int *eqval)
{
Ins *i;
@@ -522,8 +277,8 @@ cmpeq0def(Fn *fn, Ref r, Ref *arg0, int *cls, int *eqval)
i = fn->tmp[r.val].def;
if (i)
if (optab[i->op].cmpeqwl)
- if (req(i->arg[1], con01[0])) {
- *arg0 = i->arg[0];
+ if (req(i->arg[1], CON_Z)) {
+ *arg = i->arg[0];
*cls = argcls(i, 0);
*eqval = optab[i->op].eqval;
return 1;
@@ -534,23 +289,25 @@ cmpeq0def(Fn *fn, Ref r, Ref *arg0, int *cls, int *eqval)
static int
branchdom(Fn *fn, Blk *bif, Blk *bbr1, Blk *bbr2, Blk *b)
{
- if (bif->jmp.type == Jjnz)
- if (b != bif)
- if (dom(bbr1, b))
- if (!reachesnotvia(fn, bbr2, b, bif))
+ assert(bif->jmp.type == Jjnz);
+
+ if (b != bif
+ && dom(bbr1, b)
+ && !reachesnotvia(fn, bbr2, b, bif))
return 1;
+
return 0;
}
static int
-dom0non0(Fn *fn, Blk *bdom, Blk *b, int *is0)
+domzero(Fn *fn, Blk *d, Blk *b, int *z)
{
- if (branchdom(fn, bdom, bdom->s1, bdom->s2, b)) {
- *is0 = 0;
+ if (branchdom(fn, d, d->s1, d->s2, b)) {
+ *z = 0;
return 1;
}
- if (branchdom(fn, bdom, bdom->s2, bdom->s1, b)) {
- *is0 = 1;
+ if (branchdom(fn, d, d->s2, d->s1, b)) {
+ *z = 1;
return 1;
}
return 0;
@@ -558,24 +315,25 @@ dom0non0(Fn *fn, Blk *bdom, Blk *b, int *is0)
/* infer 0/non-0 value from dominating jnz */
int
-is0non0(Fn *fn, Blk *b, Ref r, int cls, int *is0)
+zeroval(Fn *fn, Blk *b, Ref r, int cls, int *z)
{
- Blk *bdom;
- Ref arg0;
+ Blk *d;
+ Ref arg;
int cls1, eqval;
- for (bdom = b->idom; bdom; bdom = bdom->idom) {
- if (bdom->jmp.type != Jjnz)
+ for (d=b->idom; d; d=d->idom) {
+ if (d->jmp.type != Jjnz)
continue;
- if (cls == Kw)
- if (req(r, bdom->jmp.arg))
- if (dom0non0(fn, bdom, b, is0))
+ if (req(r, d->jmp.arg)
+ && cls == Kw
+ && domzero(fn, d, b, z)) {
return 1;
- if (cmpeq0def(fn, bdom->jmp.arg, &arg0, &cls1, &eqval))
- if (cls == cls1)
- if (req(r, arg0))
- if (dom0non0(fn, bdom, b, is0)) {
- *is0 = *is0 ^ eqval;
+ }
+ if (cmpeqz(fn, d->jmp.arg, &arg, &cls1, &eqval)
+ && req(r, arg)
+ && cls == cls1
+ && domzero(fn, d, b, z)) {
+ *z ^= eqval;
return 1;
}
}
@@ -583,19 +341,27 @@ is0non0(Fn *fn, Blk *b, Ref r, int cls, int *is0)
}
static int
-usecls(Use *u, Ref r)
+usecls(Use *u, Ref r, int cls)
{
+ int k;
+
switch (u->type) {
- case UXXX: break;
case UIns:
- /* safe to take arg[0] cls if both args == r, even for store */
+ k = Kx; /* widest use */
if (req(u->u.ins->arg[0], r))
- return argcls(u->u.ins, 0);
+ k = argcls(u->u.ins, 0);
if (req(u->u.ins->arg[1], r))
- return argcls(u->u.ins, 1);
+ if (k == Kx || !KWIDE(k))
+ k = argcls(u->u.ins, 1);
+ return k == Kx ? cls : k;
+ case UPhi:
+ if (req(u->u.phi->to, R))
+ return cls; /* eliminated */
+ return u->u.phi->cls;
+ case UJmp:
+ return Kw;
+ default:
break;
- case UPhi: return u->u.phi->cls;
- case UJmp: return Kw;
}
die("unreachable");
}
@@ -610,136 +376,126 @@ propjnz0(Fn *fn, Blk *bif, Blk *s0, Blk *snon0, Ref r, int cls)
if (s0->npred != 1 || rtype(r) != RTmp)
return;
t = &fn->tmp[r.val];
- for (u = t->use; u < &t->use[t->nuse]; u++) {
+ for (u=t->use; u<&t->use[t->nuse]; u++) {
b = fn->rpo[u->bid];
- if (usecls(u, r) == cls)
+ /* we may compare an l temp with a w
+ * comparison; so check that the use
+ * does not involve high bits */
+ if (usecls(u, r, cls) == cls)
if (branchdom(fn, bif, s0, snon0, b))
- replaceuse(fn, u, r, con01[0]);
+ replaceuse(fn, u, r, CON_Z);
}
}
static void
-dedupjmp(Fn *fn, Blk *b) {
- Blk *s1s2[2];
+dedupjmp(Fn *fn, Blk *b)
+{
+ Blk **ps;
int64_t v;
- Ref arg0;
- int cls, eqval, is0;
+ Ref arg;
+ int cls, eqval, z;
if (b->jmp.type != Jjnz)
return;
- /* propagate jmp arg as 0 thru s2 */
+ /* propagate jmp arg as 0 through s2 */
propjnz0(fn, b, b->s2, b->s1, b->jmp.arg, Kw);
/* propagate cmp eq/ne 0 def of jmp arg as 0 */
- s1s2[0] = b->s1; s1s2[1] = b->s2;
- if (cmpeq0def(fn, b->jmp.arg, &arg0, &cls, &eqval))
- propjnz0(fn, b, s1s2[eqval^1], s1s2[eqval], arg0, cls);
+ if (cmpeqz(fn, b->jmp.arg, &arg, &cls, &eqval)) {
+ ps = (Blk*[]){b->s1, b->s2};
+ propjnz0(fn, b, ps[eqval^1], ps[eqval], arg, cls);
+ }
/* collapse trivial/constant jnz to jmp */
v = 1;
- is0 = 0;
+ z = 0;
if (b->s1 == b->s2
- || isconbits(fn, b->jmp.arg, &v)
- || is0non0(fn, b, b->jmp.arg, Kw, &is0)) {
- if (v == 0 || is0)
+ || isconbits(fn, b->jmp.arg, &v)
+ || zeroval(fn, b, b->jmp.arg, Kw, &z)) {
+ if (v == 0 || z)
b->s1 = b->s2;
/* we later move active ins out of dead blks */
b->s2 = 0;
b->jmp.type = Jjmp;
- rmuse(fn, b, UJmp, 0, 0, b->jmp.arg, 1/*strict*/);
b->jmp.arg = R;
- return;
}
}
-/* rebuild rpo pred use */
-/* careful not to lose active ins in dead blks */
static void
-rebuildcfg(Fn *fn) {
- uint n, prevnblk;
- Blk **prevrpo;
- Blk *b;
+rebuildcfg(Fn *fn)
+{
+ uint n, nblk;
+ Blk *b, *s, **rpo;
Ins *i;
- prevnblk = fn->nblk;
- prevrpo = emalloc(prevnblk * sizeof prevrpo[0]);
- memcpy(prevrpo, fn->rpo, prevnblk * sizeof prevrpo[0]);
-
- fillcfg(fn); // TODO - OK?
-
- for (n=0; n<prevnblk; n++) {
- b = prevrpo[n];
- if (b->id == -1u) {
- assert(b != fn->start);
- /* blk unreachable after GVN */
- for (i = b->ins; i < &b->ins[b->nins]; i++)
- if (i->op != Onop)
- if (!optab[i->op].ispinned)
- if (gvndup(i, 0) == i)
- /* (possibly) active ins - add to start blk */
- addins(&fn->start->ins, &fn->start->nins, i);
- }
+ nblk = fn->nblk;
+ rpo = emalloc(nblk * sizeof rpo[0]);
+ memcpy(rpo, fn->rpo, nblk * sizeof rpo[0]);
+
+ fillcfg(fn);
+
+ /* move instructions that were in
+ * killed blocks and may be active
+ * in the computation in the start
+ * block */
+ s = fn->start;
+ for (n=0; n<nblk; n++) {
+ b = rpo[n];
+ if (b->id != -1u)
+ continue;
+ /* blk unreachable after GVN */
+ assert(b != s);
+ for (i=b->ins; i<&b->ins[b->nins]; i++)
+ if (!optab[i->op].pinned)
+ if (gvndup(i, 0) == i)
+ addins(&s->ins, &s->nins, i);
}
- free(prevrpo);
+ free(rpo);
}
-/* https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf */
-/* require rpo pred ssa use */
-/* recreates rpo */
-/* breaks pred use dom ssa (GCM fixes ssa) */
+/* requires rpo pred ssa use
+ * recreates rpo preds
+ * breaks pred use dom ssa (GCM fixes ssa)
+ */
void
gvn(Fn *fn)
{
- uint n, nins;
Blk *b;
Phi *p;
-
- /* facilitate jnz simplification */
- blkmerge(fn);
- fillcfg(fn);
- filluse(fn);
- filldom(fn);
+ Ins *i;
+ uint n, nins;
con01[0] = getcon(0, fn);
con01[1] = getcon(1, fn);
- nins = 0;
+ /* copy.c uses the visit bit */
for (b=fn->start; b; b=b->link)
- for (p = b->phi; p; p = p->link)
+ for (p=b->phi; p; p=p->link)
p->visit = 0;
- /* facilitate ext elimination */
fillloop(fn);
narrowpars(fn);
filluse(fn);
ssacheck(fn);
- for (b=fn->start; b; b=b->link)
+ nins = 0;
+ for (b=fn->start; b; b=b->link) {
+ b->visit = 0;
nins += b->nins;
+ }
gvntbln = nins + nins/2;
gvntbl = emalloc(gvntbln * sizeof gvntbl[0]);
- lookupn = 0;
- proben = 0;
- maxproben = 0;
-
- /* GVN */
- clrbvisit(fn);
for (n=0; n<fn->nblk; n++) {
b = fn->rpo[n];
- dedupphis(fn, b);
- dedupins(fn, b);
+ dedupphi(fn, b);
+ for (i=b->ins; i<&b->ins[b->nins]; i++)
+ dedupins(fn, b, i);
dedupjmp(fn, b);
}
-
rebuildcfg(fn);
-
free(gvntbl);
gvntbl = 0;
- gvntbln = 0;
- lookupn = 0;
- proben = 0;
- maxproben = 0;
if (debug['G']) {
fprintf(stderr, "\n> After GVN:\n");
diff --git a/load.c b/load.c
index bc808c1..bac382c 100644
--- a/load.c
+++ b/load.c
@@ -380,6 +380,7 @@ def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
goto Load;
p->arg[np] = r1;
p->blk[np] = bp;
+ /* XXX - multiplicity in predecessors!!! */
}
if (msk != msks)
mask(cls, &r, msk, il);
diff --git a/main.c b/main.c
index ba3ad0c..7a97f8a 100644
--- a/main.c
+++ b/main.c
@@ -77,9 +77,6 @@ func(Fn *fn)
gvn(fn);
fillcfg(fn);
filluse(fn);
- ifelim(fn);
- fillcfg(fn);
- filluse(fn);
filldom(fn);
gcm(fn);
filluse(fn);
diff --git a/ops.h b/ops.h
index 00d62a0..80b0d9b 100644
--- a/ops.h
+++ b/ops.h
@@ -7,7 +7,7 @@
#endif
#ifndef F
-#define F(CanFold, HasId, IdVal, Commutes, Associates, Idemp, IsCmpEq, IsCmpLgte, CmpEqVal, IsPinned)
+#define F(a,b,c,d,e,f,g,h,i,j)
#endif
#define T(a,b,c,d,e,f,g,h) { \
@@ -19,7 +19,17 @@
/* PUBLIC OPERATIONS */
/*********************/
-/* Arithmetic and Bits */
+/* can fold */
+/* | has identity */
+/* | | identity value for arg[1] */
+/* | | | commutative */
+/* | | | | associative */
+/* | | | | | idempotent */
+/* | | | | | | c{eq,ne}[wl] */
+/* | | | | | | | c[us][gl][et][wl] */
+/* | | | | | | | | value if = args */
+/* | | | | | | | | | pinned */
+/* Arithmetic and Bits v v v v v v v v v v */
O(add, T(w,l,s,d, w,l,s,d), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1)
O(sub, T(w,l,s,d, w,l,s,d), F(1,1,0,0,0,0,0,0,0,0)) X(2,1,0) V(0)
O(neg, T(w,l,s,d, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
diff --git a/parse.c b/parse.c
index e2ffccf..7ab3ea5 100644
--- a/parse.c
+++ b/parse.c
@@ -22,7 +22,7 @@ Op optab[NOp] = {
.commutes = co, .assoc = as, \
.idemp = im, \
.cmpeqwl = ic, .cmplgtewl = lg, .eqval = cv, \
- .ispinned = pn
+ .pinned = pn
#define O(op, k, flags) [O##op]={.name = #op, .argcls = k, flags},
#include "ops.h"
#undef F
@@ -940,7 +940,7 @@ parsefn(Lnk *lnk)
curf->mem = vnew(0, sizeof curf->mem[0], PFn);
curf->nmem = 0;
curf->nblk = nblk;
- curf->rpo = 0;
+ curf->rpo = vnew(nblk, sizeof curf->rpo[0], PFn);
for (b=curf->start; b; b=b->link)
b->dlink = 0; /* was trashed by findblk() */
for (i=0; i<BMask+1; ++i)
diff --git a/ssa.c b/ssa.c
index 4e3d2a5..89cde44 100644
--- a/ssa.c
+++ b/ssa.c
@@ -44,7 +44,6 @@ filluse(Fn *fn)
uint a;
Tmp *tmp;
- /* todo, is this the correct file? */
tmp = fn->tmp;
for (t=Tmp0; t<fn->ntmp; t++) {
tmp[t].def = 0;
@@ -145,7 +144,7 @@ phiins(Fn *fn)
continue;
}
bszero(u);
- k = -1;
+ k = Kx;
bp = be;
for (b=fn->start; b; b=b->link) {
b->visit = 0;
diff --git a/test/copy.ssa b/test/copy.ssa
new file mode 100644
index 0000000..5c2a4d0
--- /dev/null
+++ b/test/copy.ssa
@@ -0,0 +1,15 @@
+export function w $f() {
+@start
+ %x0 =w loadsb $a
+ # the extension must not be eliminated
+ # even though the load already extended
+ %x1 =l extsb %x0
+ %c =w ceql %x1, -1
+ ret %c
+}
+
+# >>> driver
+# char a = -1;
+# extern int f();
+# int main() { return !(f() == 1); }
+# <<<
diff --git a/test/gvn-live-dead.ssa b/test/gvn1.ssa
index d47f05b..d47f05b 100644
--- a/test/gvn-live-dead.ssa
+++ b/test/gvn1.ssa
diff --git a/test/non0jnz.ssa b/test/gvn2.ssa
index 33f9a96..33f9a96 100644
--- a/test/non0jnz.ssa
+++ b/test/gvn2.ssa
diff --git a/util.c b/util.c
index 580bc43..3b5c09d 100644
--- a/util.c
+++ b/util.c
@@ -168,7 +168,7 @@ addbins(Blk *b, Ins **pvins, uint *pnins)
{
Ins *i;
- for (i = b->ins; i < &b->ins[b->nins]; i++)
+ for (i=b->ins; i<&b->ins[b->nins]; i++)
addins(pvins, pnins, i);
}
@@ -247,23 +247,50 @@ iscmp(int op, int *pk, int *pc)
return 1;
}
-static int INVCMPWL[] = {
- /*Oceqw*/Ocnew, /*Ocnew*/Oceqw,
- /*Ocsgew*/Ocsltw, /*Ocsgtw*/Ocslew, /*Ocslew*/Ocsgtw, /*Ocsltw*/Ocsgew,
- /*Ocugew*/Ocultw, /*Ocugtw*/Oculew, /*Oculew*/Ocugtw, /*Ocultw*/Ocugew,
- /*Oceql*/Ocnel, /*Ocnel*/Oceql,
- /*Ocsgel*/Ocsltl, /*Ocsgtl*/Ocslel, /*Ocslel*/Ocsgtl, /*Ocsltl*/Ocsgel,
- /*Ocugel*/Ocultl, /*Ocugtl*/Oculel, /*Oculel*/Ocugtl, /*Ocultl*/Ocugel,
-};
-
-int
-invcmpwl(int cmp)
+void
+igroup(Blk *b, Ins *i, Ins **i0, Ins **i1)
{
- assert(Oceqw <= cmp && cmp <= Ocultl);
- return INVCMPWL[cmp - Oceqw];
-}
-
+ Ins *ib, *ie;
+ ib = b->ins;
+ ie = ib + b->nins;
+ switch (i->op) {
+ case Oblit0:
+ *i0 = i;
+ *i1 = i + 2;
+ return;
+ case Oblit1:
+ *i0 = i - 1;
+ *i1 = i + 1;
+ return;
+ case_Opar:
+ for (; i>ib && ispar((i-1)->op); i--)
+ ;
+ *i0 = i;
+ for (; i<ie && ispar(i->op); i++)
+ ;
+ *i1 = i;
+ return;
+ case Ocall:
+ case_Oarg:
+ for (; i>ib && isarg((i-1)->op); i--)
+ ;
+ *i0 = i;
+ for (; i<ie && i->op != Ocall; i++)
+ ;
+ assert(i < ie);
+ *i1 = i + 1;
+ return;
+ default:
+ if (ispar(i->op))
+ goto case_Opar;
+ if (isarg(i->op))
+ goto case_Oarg;
+ *i0 = i;
+ *i1 = i + 1;
+ return;
+ }
+}
int
argcls(Ins *i, int n)
@@ -291,12 +318,8 @@ emiti(Ins i)
void
idup(Blk *b, Ins *s, ulong n)
{
- if (b->ins)
- vgrow(&b->ins, n);
- else
- b->ins = vnew(n, sizeof(Ins), PFn);
- if (n)
- memcpy(b->ins, s, n * sizeof(Ins));
+ vgrow(&b->ins, n);
+ icpy(b->ins, s, n);
b->nins = n;
}
@@ -345,6 +368,16 @@ cmpop(int c)
}
int
+cmpwlneg(int op)
+{
+ if (INRANGE(op, Ocmpw, Ocmpw1))
+ return cmpneg(op - Ocmpw) + Ocmpw;
+ if (INRANGE(op, Ocmpl, Ocmpl1))
+ return cmpneg(op - Ocmpl) + Ocmpl;
+ die("not a wl comparison");
+}
+
+int
clsmerge(short *pk, short k)
{
short k1;
@@ -379,16 +412,21 @@ phiargn(Phi *p, Blk *b)
{
uint n;
- for (n = 0; n < p->narg; n++)
- if (p->blk[n] == b)
- return n;
- die("unreachable");
+ if (p)
+ for (n=0; n<p->narg; n++)
+ if (p->blk[n] == b)
+ return n;
+ return -1;
}
Ref
phiarg(Phi *p, Blk *b)
{
- return p->arg[phiargn(p, b)];
+ uint n;
+
+ n = phiargn(p, b);
+ assert(n != -1u && "block not found");
+ return p->arg[n];
}
Ref
@@ -489,12 +527,6 @@ isconbits(Fn *fn, Ref r, int64_t *v)
return 0;
}
-int
-istmpconbits(Fn *fn, Ins *i, int64_t *v)
-{
- return rtype(i->arg[0]) == RTmp && isconbits(fn, i->arg[1], v);
-}
-
void
salloc(Ref rt, Ref rs, Fn *fn)
{