From f3ca2577372eaae7056db24982abfc54be8f4cc1 Mon Sep 17 00:00:00 2001 From: Quentin Carbonneaux Date: Fri, 14 Mar 2025 13:09:21 +0100 Subject: gvn/gcm review - Many stylistic nits. - Removed blkmerge(). - Some minor bug fixes. - GCM reassoc is now "sink"; a pass that moves trivial ops in their target block with the same goal of reducing register pressure, but starting from instructions that benefit from having their inputs close. --- all.h | 34 +-- amd64/emit.c | 9 +- cfg.c | 344 +++++-------------------- copy.c | 368 +++++++++++++++----------- fold.c | 2 +- gcm.c | 361 ++++++++++++-------------- gvn.c | 680 ++++++++++++++++--------------------------------- load.c | 1 + main.c | 3 - ops.h | 14 +- parse.c | 4 +- ssa.c | 3 +- test/copy.ssa | 15 ++ test/gvn-live-dead.ssa | 19 -- test/gvn1.ssa | 19 ++ test/gvn2.ssa | 31 +++ test/non0jnz.ssa | 31 --- util.c | 98 ++++--- 18 files changed, 835 insertions(+), 1201 deletions(-) create mode 100644 test/copy.ssa delete mode 100644 test/gvn-live-dead.ssa create mode 100644 test/gvn1.ssa create mode 100644 test/gvn2.ssa delete mode 100644 test/non0jnz.ssa diff --git a/all.h b/all.h index 6da6330..39a4f7c 100644 --- a/all.h +++ b/all.h @@ -223,7 +223,7 @@ struct Op { uint cmpeqwl:1; /* Kl/Kw cmp eq/ne? */ uint cmplgtewl:1; /* Kl/Kw cmp lt/gt/le/ge? */ uint eqval:1; /* 1 for eq; 0 for ne */ - uint ispinned:1; /* GCM pinned op? */ + uint pinned:1; /* GCM pinned op? */ }; struct Ins { @@ -260,9 +260,9 @@ struct Blk { Blk *idom; Blk *dom, *dlink; - int domdpth; Blk **fron; uint nfron; + int depth; Blk **pred; uint npred; @@ -489,13 +489,14 @@ char *str(uint32_t); int argcls(Ins *, int); int isreg(Ref); int iscmp(int, int *, int *); -int invcmpwl(int); +void igroup(Blk *, Ins *, Ins **, Ins **); void emit(int, int, Ref, Ref, Ref); void emiti(Ins); void idup(Blk *, Ins *, ulong); Ins *icpy(Ins *, Ins *, ulong); int cmpop(int); int cmpneg(int); +int cmpwlneg(int); int clsmerge(short *, short); int phicls(int, Tmp *); uint phiargn(Phi *, Blk *); @@ -507,7 +508,6 @@ Ref newcon(Con *, Fn *); Ref getcon(int64_t, Fn *); int addcon(Con *, Con *, int); int isconbits(Fn *fn, Ref r, int64_t *v); -int istmpconbits(Fn *fn, Ins *i, int64_t *v); void salloc(Ref, Ref, Fn *); void dumpts(BSet *, Tmp *, FILE *); void runmatch(uchar *, Num *, Ref, Ref *); @@ -549,19 +549,11 @@ int sdom(Blk *, Blk *); int dom(Blk *, Blk *); void fillfron(Fn *); void loopiter(Fn *, void (*)(Blk *, Blk *)); -void filldomdpth(Fn *); +void filldepth(Fn *); Blk *lca(Blk *, Blk *); void fillloop(Fn *); void simpljmp(Fn *); -void replacepreds(Blk *, Blk *, Blk *); -void killblks(Fn *); -void blkmerge(Fn *); -int ifgraph(Blk *, Blk **, Blk **, Blk **); -int ifjoin(Blk *, Blk **, Blk **, Blk **); -int emptyblk(Blk *); -void ifelim(Fn *); -void clrbvisit(Fn *); -int reaches(Fn *,Blk *, Blk *); +int reaches(Fn *, Blk *, Blk *); int reachesnotvia(Fn *, Blk *, Blk *, Blk *); /* mem.c */ @@ -586,25 +578,23 @@ void ssa(Fn *); void ssacheck(Fn *); /* copy.c */ -int iswu1(Fn *, Ref); -void narrowpars(Fn *); +void narrowpars(Fn *fn); Ref copyref(Fn *, Blk *, Ins *); +Ref phicopyref(Fn *, Blk *, Phi *); /* fold.c */ +int foldint(Con *, int, int, Con *, Con *); Ref foldref(Fn *, Ins *); /* gvn.c */ -extern Ref con01[2]; -int is0non0(Fn *, Blk *, Ref, int, int *); +extern Ref con01[2]; /* 0 and 1 */ +int zeroval(Fn *, Blk *, Ref, int, int *); void gvn(Fn *); /* gcm.c */ -int isfixed(Fn *, Ins *); +int pinned(Ins *); void gcm(Fn *); -/* reassoc.c */ -void reassoc(Fn *); - /* simpl.c */ void simpl(Fn *); diff --git a/amd64/emit.c b/amd64/emit.c index 8f36188..a8cd0de 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -612,6 +612,7 @@ amd64_emitfn(Fn *fn, FILE *f) Blk *b, *s; Ins *i, itmp; int *r, c, o, n, lbl; + uint p; E *e; e = &(E){.f = f, .fn = fn}; @@ -640,8 +641,14 @@ amd64_emitfn(Fn *fn, FILE *f) } for (lbl=0, b=fn->start; b; b=b->link) { - if (lbl || b->npred > 1) + if (lbl || b->npred > 1) { + for (p=0; pnpred; p++) + if (b->pred[p]->id >= b->id) + break; + if (p != b->npred) + fprintf(f, ".p2align 4\n"); fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id); + } for (i=b->ins; i!=&b->ins[b->nins]; i++) emitins(*i, e); lbl = 1; diff --git a/cfg.c b/cfg.c index 1fe584c..8047c12 100644 --- a/cfg.c +++ b/cfg.c @@ -8,49 +8,40 @@ newblk() b = alloc(sizeof *b); *b = z; + b->ins = vnew(0, sizeof b->ins[0], PFn); + b->pred = vnew(0, sizeof b->pred[0], PFn); return b; } -/* TODO - this never seems to do anything??? */ static void -prunephis(Fn *f) +fixphis(Fn *f) { Blk *b; - Phi *p, **pp; - uint na, na0; + Phi *p; + uint n, n0; - for (b = f->start; b; b = b->link) { + for (b=f->start; b; b=b->link) { assert(b->id < f->nblk); - for (pp = &b->phi; (*pp);) { - p = *pp; - for (na = na0 = 0; na < p->narg; na++) - if (p->blk[na]->id != -1u) { - p->blk[na0] = p->blk[na]; - p->arg[na0] = p->arg[na]; - na0++; + for (p=b->phi; p; p=p->link) { + for (n=n0=0; nnarg; n++) + if (p->blk[n]->id != -1u) { + p->blk[n0] = p->blk[n]; + p->arg[n0] = p->arg[n]; + n0++; } - if (na0 == 0) { - *pp = p->link; - } else { - p->narg = na0; - pp = &p->link; - } + assert(n0 > 0); + p->narg = n0; } } } static void -addpred(Blk *bp, Blk *bc) +addpred(Blk *bp, Blk *b) { - ++bc->npred; - if (bc->pred) - vgrow(&bc->pred, bc->npred); - else - bc->pred = vnew(bc->npred, sizeof bc->pred[0], PFn); - bc->pred[bc->npred-1] = bp; + vgrow(&b->pred, ++b->npred); + b->pred[b->npred-1] = bp; } -/* fill predecessors information in blocks; prune dead phi refs */ void fillpreds(Fn *f) { @@ -64,8 +55,6 @@ fillpreds(Fn *f) if (b->s2 && b->s2 != b->s1) addpred(b, b->s2); } - /* TODO - this never seems to do anything??? */ - prunephis(f); } static void @@ -87,7 +76,6 @@ porec(Blk *b, uint *npo) b->id = (*npo)++; } -/* fill the rpo information; prune dead blks */ static void fillrpo(Fn *f) { @@ -97,27 +85,25 @@ fillrpo(Fn *f) b->id = -1u; f->nblk = 0; porec(f->start, &f->nblk); - if (f->rpo) - vgrow(&f->rpo, f->nblk); - else - f->rpo = vnew(f->nblk, sizeof f->rpo[0], PFn); + vgrow(&f->rpo, f->nblk); for (p=&f->start; (b=*p);) { if (b->id == -1u) { *p = b->link; } else { - b->id = f->nblk-b->id-1; /* po -> rpo */ + b->id = f->nblk-b->id-1; f->rpo[b->id] = b; p = &b->link; } } } -/* fill rpo, preds; prune dead blks, prune dead blk refs from phis */ +/* fill rpo, preds; prune dead blks */ void fillcfg(Fn *f) { fillrpo(f); fillpreds(f); + fixphis(f); } /* for dominators computation, read @@ -260,34 +246,28 @@ loopiter(Fn *fn, void f(Blk *, Blk *)) } } +/* dominator tree depth */ void -multloop(Blk *hd, Blk *b) -{ - (void)hd; - b->loop *= 10; -} - -void -filldomdpth(Fn *fn) +filldepth(Fn *fn) { - Blk *b, *dom; - uint dpth; + Blk *b, *d; + int depth; for (b=fn->start; b; b=b->link) - b->domdpth = -1; + b->depth = -1; - fn->start->domdpth = 0; + fn->start->depth = 0; for (b=fn->start; b; b=b->link) { - if (b->domdpth != -1) + if (b->depth != -1) continue; - dpth = 1; - for (dom = b->idom; dom->domdpth == -1; dom = dom->idom) - dpth++; - dpth += dom->domdpth; - b->domdpth = dpth; - for (dom = b->idom; dom->domdpth == -1; dom = dom->idom) - dom->domdpth = --dpth; + depth = 1; + for (d=b->idom; d->depth==-1; d=d->idom) + depth++; + depth += d->depth; + b->depth = depth; + for (d=b->idom; d->depth==-1; d=d->idom) + d->depth = --depth; } } @@ -299,9 +279,9 @@ lca(Blk *b1, Blk *b2) return b2; if (!b2) return b1; - while (b1->domdpth > b2->domdpth) + while (b1->depth > b2->depth) b1 = b1->idom; - while (b2->domdpth > b1->domdpth) + while (b2->depth > b1->depth) b2 = b2->idom; while (b1 != b2) { b1 = b1->idom; @@ -310,6 +290,13 @@ lca(Blk *b1, Blk *b2) return b1; } +void +multloop(Blk *hd, Blk *b) +{ + (void)hd; + b->loop *= 10; +} + void fillloop(Fn *fn) { @@ -371,244 +358,41 @@ simpljmp(Fn *fn) free(uf); } -static void -replacepred(Blk **blks, uint nblk, Blk *to, Blk *from) -{ - uint n; - for(n=0; npred[] and p->blk[] entries */ -void -replacepreds(Blk *s, Blk *to, Blk *from) -{ - Phi *p; - - if (!s) - return; - assert(s->npred); - replacepred(s->pred, s->npred, to, from); - for (p = s->phi; p; p = p->link) { - assert(p->narg == s->npred); - replacepred(p->blk, p->narg, to, from); - } -} - -/* remove marked-dead blks - marked as fn->rpo[id] == 0 */ -void -killblks(Fn *fn) -{ - Blk **pb; - - for (pb = &fn->start; *pb;) - if (fn->rpo[(*pb)->id]) - pb = &(*pb)->link; - else - *pb = (*pb)->link; -} - -/* merge linear jmp chains */ -/* requires rpo pred, breaks cfg use */ -void -blkmerge(Fn *fn) -{ - uint bid, nins; - Blk *curb, *b; - Ins *vins; - - if (debug['B']) - fputs("\n> Block merge:\n", stderr); - - vins = vnew(0, sizeof vins[0], PFn); - curb = 0; - /* linear jmp chains will be consecutive in rpo */ - for (bid=0; bidnblk; bid++) { - b = fn->rpo[bid]; - if (curb == 0) { - curb = b; - nins = 0; - } else - fn->rpo[bid] = 0; - addbins(b, &vins, &nins); - /* note - there are cases where GVN does not eliminate singleton phis */ - if (b->jmp.type != Jjmp || b->s1->npred != 1 || b->s1->phi) { - idup(curb, vins, nins); - curb->nins = nins; - curb->jmp = b->jmp; - replacepreds(b->s1, curb, b); - if (b->s1 != b->s2) - replacepreds(b->s2, curb, b); - curb->s1 = b->s1; - curb->s2 = b->s2; - curb = 0; - } else { - assert(b->s1->id == bid+1); - if (debug['B']) - fprintf(stderr, " merging blocks @%s -> @%s\n", b->name, b->s1->name); - } - } - assert(curb == 0); - killblks(fn); -} - -int -lonesucc(Blk *b, Blk *s) +static int +reachrec(Blk *b, Blk *to) { - assert(s); - if (s != b) - if (s->npred == 1) - if (s->pred[0] == b) - if (s->phi == 0) + if (b == to) return 1; - return 0; -} - -int -lonejmpsucc(Blk *b, Blk *s) -{ - return s->jmp.type == Jjmp && lonesucc(b, s); -} - -/* (otherwise) isolated if-then[-else] graphlet */ -int -ifgraph(Blk *ifb, Blk **pthenb, Blk **pelseb, Blk **pjoinb) -{ - if (ifb->jmp.type != Jjnz) + if (!b || b->visit) return 0; - assert(ifb->s1 && ifb->s2); - assert(ifb->s1 != ifb->s2); /* dubious */ - *pthenb = ifb->s1; - *pelseb = ifb->s2; - *pjoinb = ifb->s1->s1; - if (ifb->s1 == ifb->s2->s1) { - /* empty then */ - *pthenb = ifb; - *pjoinb = ifb->s1; - } - if (ifb->s1->s1 == ifb->s2) - /* empty else */ - *pelseb = ifb; - - if (*pthenb == ifb || - ((*pthenb)->s1 == *pjoinb && lonejmpsucc(ifb, *pthenb))) - if (*pelseb == ifb || - ((*pelseb)->s1 == *pjoinb && lonejmpsucc(ifb, *pelseb))) - /* there are cases where npred == 2 is not strictly needed - ifconvert for example */ - if ((*pjoinb)->npred == 2) - return 1; - - return 0; -} - -/* join blk of if-then[-else] graphlet */ -int -ifjoin(Blk *joinb, Blk **pifb, Blk **pthenb, Blk **pelseb) -{ - Blk *joinb1; - if (joinb->npred) - if (ifgraph(joinb->pred[0], pthenb, pelseb, &joinb1)) - if (joinb == joinb1) { - *pifb = joinb->pred[0]; + b->visit = 1; + if (reachrec(b->s1, to)) return 1; - } - if (joinb->npred && joinb->pred[0]->npred) - if (ifgraph(joinb->pred[0]->pred[0], pthenb, pelseb, &joinb1)) - if (joinb == joinb1) { - *pifb = joinb->pred[0]->pred[0]; + if (reachrec(b->s2, to)) return 1; - } + return 0; } +/* Blk.visit needs to be clear at entry */ int -emptyblk(Blk *b) +reaches(Fn *fn, Blk *b, Blk *to) { - Ins *i; - - for (i = b->ins; i < &b->ins[b->nins]; i++) - if (i->op != Onop) - if (i->op != Odbgloc) - return 0; - return 1; -} + int r; -/* remove empty jnz graphlets */ -/* needs rpo; breaks cfg */ -void -ifelim(Fn *fn) -{ - uint bid; - Blk *ifb, *thenb, *elseb, *joinb; - - for (bid = 0; bid < fn->nblk; bid++) { - ifb = fn->rpo[bid]; - if (ifb == 0) - continue; - if (ifgraph(ifb, &thenb, &elseb, &joinb)) - if (joinb->phi == 0) - if (thenb == ifb || emptyblk(thenb)) - if (elseb == ifb || emptyblk(elseb)) { - if (debug['B']) - fprintf(stderr, " eliminating empty if @%s -> @%s, @%s -> @%s\n", - ifb->name, thenb->name, elseb->name, joinb->name); - if (thenb != ifb) - fn->rpo[thenb->id] = 0; - if (elseb != ifb) - fn->rpo[elseb->id] = 0; - ifb->jmp.type = Jjmp; - ifb->jmp.arg = R; - ifb->s1 = joinb; - ifb->s2 = 0; - } - } - killblks(fn); -} - -void -clrbvisit(Fn *fn) -{ - Blk *b; - for (b = fn->start; b; b = b->link) + assert(to); + r = reachrec(b, to); + for (b=fn->start; b; b=b->link) b->visit = 0; + return r; } -static int -reaches1(Blk *b1, Blk *b2) -{ - assert(b2); - if (b1 == b2) - return 1; - if (b1 == 0 || b1->visit) - return 0; - b1->visit = 1; - return reaches1(b1->s1, b2) || reaches1(b1->s2, b2); -} - -/* path from b1 to b2 not thru bnotvia? */ -/* uses b->visit */ +/* can b reach 'to' not through excl + * Blk.visit needs to be clear at entry */ int -reachesnotvia(Fn *fn, Blk *b1, Blk *b2, Blk *bnotvia) +reachesnotvia(Fn *fn, Blk *b, Blk *to, Blk *excl) { - int rc; - - if (bnotvia) - bnotvia->visit = 1; - rc = reaches1(b1, b2); - clrbvisit(fn); - return rc; + excl->visit = 1; + return reaches(fn, b, to); } - -/* path from b1 to b2? */ -/* uses b->visit */ -int -reaches(Fn *fn, Blk *b1, Blk *b2) -{ - return reachesnotvia(fn, b1, b2, 0); -} - diff --git a/copy.c b/copy.c index 95c7e00..c76bb79 100644 --- a/copy.c +++ b/copy.c @@ -1,9 +1,35 @@ #include "all.h" -static uint -u64_wbits(uint64_t v) +typedef struct Ext Ext; + +struct Ext { + char zext; + char nopw; /* is a no-op if arg width is <= nopw */ + char usew; /* uses only the low usew bits of arg */ +}; + +static int +ext(Ins *i, Ext *e) { - uint n; + static Ext tbl[] = { + /*extsb*/ {0, 7, 8}, + /*extub*/ {1, 8, 8}, + /*extsh*/ {0, 15, 16}, + /*extuh*/ {1, 16, 16}, + /*extsw*/ {0, 31, 32}, + /*extuw*/ {1, 32, 32}, + }; + + if (!isext(i->op)) + return 0; + *e = tbl[i->op - Oextsb]; + return 1; +} + +static int +bitwidth(uint64_t v) +{ + int n; n = 0; if (v >> 32) { n += 32; v >>= 32; } @@ -15,44 +41,29 @@ u64_wbits(uint64_t v) return n+v; } +/* no more than w bits are used */ static int -EXTSIGNED[] = { /*extsb*/1, /*extub*/0, /*extsh*/1, /*extuh*/0, /*extsw*/1, /*extuw*/0 }; - -static uint -EXTMAXW[] = { /*extsb*/7, /*extub*/8, /*extsh*/15, /*extuh*/16, /*extsw*/31, /*extuw*/32 }; - -static uint -EXTW[] = { /*extsb*/8, /*extub*/8, /*extsh*/16, /*extuh*/16, /*extsw*/32, /*extuw*/32 }; - -static uint -STW[] = { /*storeb*/8, /*storeh*/16, /*storew*/32, /*storel*/64, /*stores*/32, /*stored*/64 }; - -/* is the ref used only as a narrow value? */ -static int -usewidthle(Fn *fn, Ref r, uint wbits) +usewidthle(Fn *fn, Ref r, int w) { + Ext e; Tmp *t; Use *u; Phi *p; - int b; Ins *i; Ref rc; int64_t v; + int b; - if (isconbits(fn, r, &v)) - if (u64_wbits(v) <= wbits) - return 1; - if (rtype(r) != RTmp) - return 0; + assert(rtype(r) == RTmp); t = &fn->tmp[r.val]; - for (u = t->use; u < &t->use[t->nuse]; u++) { + for (u=t->use; u<&t->use[t->nuse]; u++) { switch (u->type) { case UPhi: p = u->u.phi; if (p->visit) continue; p->visit = 1; - b = usewidthle(fn, p->to, wbits); + b = usewidthle(fn, p->to, w); p->visit = 0; if (b) continue; @@ -61,14 +72,13 @@ usewidthle(Fn *fn, Ref r, uint wbits) i = u->u.ins; assert(i != 0); if (i->op == Ocopy) - if (usewidthle(fn, i->to, wbits)) + if (usewidthle(fn, i->to, w)) continue; - if (isext(i->op)) { - if (EXTW[i->op - Oextsb] <= wbits) + if (ext(i, &e)) { + if (e.usew <= w) + continue; + if (usewidthle(fn, i->to, w)) continue; - else - if (usewidthle(fn, i->to, wbits)) - continue;; } if (i->op == Oand) { if (req(r, i->arg[0])) @@ -77,15 +87,11 @@ usewidthle(Fn *fn, Ref r, uint wbits) assert(req(r, i->arg[1])); rc = i->arg[0]; } - if (isconbits(fn, rc, &v)) - if (u64_wbits(v) <= wbits) + if (isconbits(fn, rc, &v) + && bitwidth(v) <= w) continue; break; } - if (isstore(i->op)) - if (req(r, i->arg[1])) - if (STW[i->op - Ostoreb] > wbits) - continue; break; default: break; @@ -95,27 +101,17 @@ usewidthle(Fn *fn, Ref r, uint wbits) return 1; } -static Phi* -findphi(Fn *fn, uint bid, Ref to) -{ - Phi *p; - for (p = fn->rpo[bid]->phi; p; p = p->link) - if (req(p->to, to)) - break; - assert(p); - return p; -} - -static uint -uint_min(uint v1, uint v2) +static int +min(int v1, int v2) { return v1 < v2 ? v1 : v2; } -/* is the ref def a narrow value? */ +/* is the ref narrower than w bits */ static int -defwidthle(Fn *fn, Ref r, uint wbits) +defwidthle(Fn *fn, Ref r, int w) { + Ext e; Tmp *t; Phi *p; Ins *i; @@ -123,93 +119,99 @@ defwidthle(Fn *fn, Ref r, uint wbits) int64_t v; int x; - if (isconbits(fn, r, &v)) - if (u64_wbits(v) <= wbits) + if (isconbits(fn, r, &v) + && bitwidth(v) <= w) return 1; if (rtype(r) != RTmp) return 0; t = &fn->tmp[r.val]; if (t->cls != Kw) return 0; - i = t->def; - if (i == 0) { + + if (!t->def) { /* phi def */ - p = findphi(fn, t->bid, r); + for (p=fn->rpo[t->bid]->phi; p; p=p->link) + if (req(p->to, r)) + break; + assert(p); if (p->visit) return 1; p->visit = 1; - for (n = 0; n < p->narg; n++) - if (!defwidthle(fn, p->arg[n], wbits)) { + for (n=0; nnarg; n++) + if (!defwidthle(fn, p->arg[n], w)) { p->visit = 0; return 0; } p->visit = 0; return 1; } - /* ins def */ + + i = t->def; if (i->op == Ocopy) - return defwidthle(fn, i->arg[0], wbits); + return defwidthle(fn, i->arg[0], w); if (i->op == Oshr || i->op == Osar) { if (isconbits(fn, i->arg[1], &v)) if (0 < v && v <= 32) { - if (i->op == Oshr && 32-v <= wbits) + if (i->op == Oshr && w+v >= 32) return 1; - if (0 <= v && v < 32 && wbits < 32) - return defwidthle(fn, i->arg[0], uint_min((i->op == Osar ? 31 : 32), wbits+v)); + if (w < 32) { + if (i->op == Osar) + w = min(31, w+v); + else + w = min(32, w+v); + } } - return defwidthle(fn, i->arg[0], wbits); + return defwidthle(fn, i->arg[0], w); } if (iscmp(i->op, &x, &x)) - return wbits >= 1; - if (i->op == Oand) - return defwidthle(fn, i->arg[0], wbits) || defwidthle(fn, i->arg[1], wbits); - if (i->op == Oor || i->op == Oxor) - return defwidthle(fn, i->arg[0], wbits) && defwidthle(fn, i->arg[1], wbits); - if (isext(i->op)) { - if (EXTSIGNED[i->op - Oextsb]) - return defwidthle(fn, i->arg[0], uint_min(wbits, EXTMAXW[i->op - Oextsb])); - if (EXTW[i->op - Oextsb] <= wbits) + return w >= 1; + if (i->op == Oand) { + if (defwidthle(fn, i->arg[0], w) + || defwidthle(fn, i->arg[1], w)) return 1; - return defwidthle(fn, i->arg[0], wbits); + return 0; + } + if (i->op == Oor || i->op == Oxor) { + if (defwidthle(fn, i->arg[0], w) + && defwidthle(fn, i->arg[1], w)) + return 1; + return 0; + } + if (ext(i, &e)) { + if (e.zext && e.usew <= w) + return 1; + w = min(w, e.nopw); + return defwidthle(fn, i->arg[0], w); } - return 0; -} -/* is the ref a boolean - 0, 1 - value? */ -int -iswu1(Fn *fn, Ref r) -{ - return defwidthle(fn, r, 1); + return 0; } static int -isnarrowpar(Fn *fn, Ref r) +isw1(Fn *fn, Ref r) { - Tmp *t; - - if (rtype(r) != RTmp) - return 0; - t = &fn->tmp[r.val]; - if (t->bid != fn->start->id || t->def == 0) - return 0; - return ispar(t->def->op); + return defwidthle(fn, r, 1); } -/* Insert extub/extuh instructions in start for pars used only narrowly */ -/* needs use; breaks use */ +/* insert early extub/extuh instructions + * for pars used only narrowly; this + * helps factoring extensions out of + * loops + * + * needs use; breaks use + */ void narrowpars(Fn *fn) { Blk *b; int loop; - Ins *i, *ins; + Ins ext, *i, *ins; uint npar, nins; - enum O extop; Ref r; /* only useful for functions with loops */ loop = 0; - for (b = fn->start; b; b = b->link) + for (b=fn->start; b; b=b->link) if (b->loop > 1) { loop = 1; break; @@ -218,49 +220,47 @@ narrowpars(Fn *fn) return; b = fn->start; - npar = 0; - for (i = b->ins; i < &b->ins[b->nins]; i++) { + npar = 0; + for (i=b->ins; i<&b->ins[b->nins]; i++) { if (!ispar(i->op)) break; npar++; } - if (npar == 0) return; nins = b->nins + npar; - ins = vnew(nins, sizeof ins[0], PFn); //alloc(nins * sizeof ins[0]); - memcpy(ins, b->ins, npar * sizeof ins[0]); - memcpy(ins + 2*npar, b->ins + npar, (b->nins - npar) * sizeof ins[0]); + ins = vnew(nins, sizeof ins[0], PFn); + icpy(ins, b->ins, npar); + icpy(ins + 2*npar, b->ins+npar, b->nins-npar); b->ins = ins; b->nins = nins; - for (i = b->ins; i < &b->ins[b->nins]; i++) { + for (i=b->ins; i<&b->ins[b->nins]; i++) { if (!ispar(i->op)) break; - extop = Onop; + ext = (Ins){.op = Onop}; if (i->cls == Kw) - if (usewidthle(fn, i->to, 16)) { - if (usewidthle(fn, i->to, 8)) - extop = Oextub; - else - extop = Oextuh; - } - if (extop == Onop) { - *(i+npar) = (Ins) {.op = Onop}; - } else { + if (usewidthle(fn, i->to, 16)) { + ext.op = Oextuh; + if (usewidthle(fn, i->to, 8)) + ext.op = Oextub; r = newtmp("vw", i->cls, fn); - *(i+npar) = (Ins) {.op = extop, .cls = i->cls, .to = i->to, .arg = {r}}; + ext.cls = i->cls; + ext.to = i->to; + ext.arg[0] = r; i->to = r; } + *(i+npar) = ext; } } -/* used by GVN */ Ref copyref(Fn *fn, Blk *b, Ins *i) { + /* which extensions are copies for a given + * argument width */ static bits extcpy[] = { [WFull] = 0, [Wsb] = BIT(Wsb) | BIT(Wsh) | BIT(Wsw), @@ -270,63 +270,133 @@ copyref(Fn *fn, Blk *b, Ins *i) [Wsw] = BIT(Wsw), [Wuw] = BIT(Wuw), }; - bits bext; + Ext e; Tmp *t; int64_t v; - int is0; + int w, z; if (i->op == Ocopy) return i->arg[0]; /* op identity value */ - if (optab[i->op].hasid) - if (KBASE(i->cls) == 0) /* integer only - fp NaN! */ - if (req(i->arg[1], con01[optab[i->op].idval])) - if (!optab[i->op].cmpeqwl || iswu1(fn, i->arg[0])) + if (optab[i->op].hasid + && KBASE(i->cls) == 0 /* integer only - fp NaN! */ + && req(i->arg[1], con01[optab[i->op].idval]) + && (!optab[i->op].cmpeqwl || isw1(fn, i->arg[0]))) return i->arg[0]; /* idempotent op with identical args */ - if (optab[i->op].idemp) - if (req(i->arg[0], i->arg[1])) + if (optab[i->op].idemp + && req(i->arg[0], i->arg[1])) return i->arg[0]; /* integer cmp with identical args */ - if (optab[i->op].cmpeqwl || optab[i->op].cmplgtewl) - if (req(i->arg[0], i->arg[1])) + if ((optab[i->op].cmpeqwl || optab[i->op].cmplgtewl) + && req(i->arg[0], i->arg[1])) return con01[optab[i->op].eqval]; - /* cmpeq/ne 0 with 0/non-0 inference from dominating jnz */ - if (optab[i->op].cmpeqwl) - if (req(i->arg[1], con01[0])) - if (is0non0(fn, b, i->arg[0], argcls(i,0), &is0)) - return con01[optab[i->op].eqval^is0^1]; + /* cmpeq/ne 0 with 0/non-0 inference */ + if (optab[i->op].cmpeqwl + && req(i->arg[1], CON_Z) + && zeroval(fn, b, i->arg[0], argcls(i, 0), &z)) + return con01[optab[i->op].eqval^z^1]; /* redundant and mask */ - if (i->op == Oand) - if (isconbits(fn, i->arg[1], &v)) - if (((v+1) & v) == 0) /* v == 2^N-1 */ - if (defwidthle(fn, i->arg[0], u64_wbits(v))) + if (i->op == Oand + && isconbits(fn, i->arg[1], &v) + && (v > 0 && ((v+1) & v) == 0) + && defwidthle(fn, i->arg[0], bitwidth(v))) return i->arg[0]; - if (!isext(i->op) || rtype(i->arg[0]) != RTmp) - return R; - if (i->op == Oextsw || i->op == Oextuw) - if (i->cls == Kw) + if (i->cls == Kw + && (i->op == Oextsw || i->op == Oextuw)) return i->arg[0]; - t = &fn->tmp[i->arg[0].val]; - assert(KBASE(t->cls) == 0); - if (i->cls == Kl && t->cls == Kw) + if (ext(i, &e) && rtype(i->arg[0]) == RTmp) { + t = &fn->tmp[i->arg[0].val]; + assert(KBASE(t->cls) == 0); + + /* do not break typing by returning + * a narrower temp */ + if (KWIDE(i->cls) > KWIDE(t->cls)) + return R; + + w = Wsb + (i->op - Oextsb); + if (BIT(w) & extcpy[t->width]) + return i->arg[0]; + + /* avoid eliding extensions of params + * inserted in the start block; their + * point is to make further extensions + * redundant */ + if ((!t->def || !ispar(t->def->op)) + && usewidthle(fn, i->to, e.usew)) + return i->arg[0]; + + if (defwidthle(fn, i->arg[0], e.nopw)) + return i->arg[0]; + } + + return R; +} + +static int +phieq(Phi *pa, Phi *pb) +{ + Ref r; + uint n; + + assert(pa->narg == pb->narg); + for (n=0; nnarg; n++) { + r = phiarg(pb, pa->blk[n]); + if (!req(pa->arg[n], r)) + return 0; + } + return 1; +} + +Ref +phicopyref(Fn *fn, Blk *b, Phi *p) +{ + Blk *d, **s; + Phi *p1; + uint n, c; + + /* identical args */ + for (n=0; nnarg-1; n++) + if (!req(p->arg[n], p->arg[n+1])) + break; + if (n == p->narg-1) + return p->arg[n]; + + /* same as a previous phi */ + for (p1=b->phi; p1!=p; p1=p1->link) { + assert(p1); + if (phieq(p1, p)) + return p1->to; + } + + /* can be replaced by a + * dominating jnz arg */ + d = b->idom; + if (p->narg != 2 + || d->jmp.type != Jjnz + || !isw1(fn, d->jmp.arg)) return R; - bext = extcpy[t->width]; - if ((BIT(Wsb + (i->op-Oextsb)) & bext) != 0) - return i->arg[0]; - if (!isnarrowpar(fn, i->arg[0])) - if (usewidthle(fn, i->to, EXTW[i->op - Oextsb])) - return i->arg[0]; - if (defwidthle(fn, i->arg[0], EXTMAXW[i->op - Oextsb])) - return i->arg[0]; + s = (Blk*[]){0, 0}; + for (n=0; n<2; n++) + for (c=0; c<2; c++) + if (req(p->arg[n], con01[c])) + s[c] = p->blk[n]; + + /* if s1 ends with a jnz on either b + * or s2; the inference below is wrong + * without the jump type checks */ + if (d->s1 == s[1] && d->s2 == s[0] + && d->s1->jmp.type == Jjmp + && d->s2->jmp.type == Jjmp) + return d->jmp.arg; return R; } diff --git a/fold.c b/fold.c index 87e5824..7d0b04c 100644 --- a/fold.c +++ b/fold.c @@ -13,7 +13,7 @@ iscon(Con *c, int w, uint64_t k) return (uint32_t)c->bits.i == (uint32_t)k; } -static int +int foldint(Con *res, int op, int w, Con *cl, Con *cr) { union { diff --git a/gcm.c b/gcm.c index 23e475b..e685d2a 100644 --- a/gcm.c +++ b/gcm.c @@ -2,36 +2,35 @@ #define NOBID (-1u) -/* ins can trap at runtime */ static int -istrapping(Fn *fn, Ins *i) +isdivwl(Ins *i) { - int64_t v; - - if (KBASE(i->cls) == 0) - if (INRANGE(i->op, Odiv, Ourem)) - if (!isconbits(fn, i->arg[1], &v) || v == 0) - return 1; - return 0; + switch (i->op) { + case Odiv: + case Orem: + case Oudiv: + case Ourem: + return KBASE(i->cls) == 0; + default: + return 0; + } } -/* fixed ins that can be eliminated if unused */ -static int -canelim(Fn *fn, Ins *i) +int +pinned(Ins *i) { - return isload(i->op) || isalloc(i->op) || istrapping(fn, i); + return optab[i->op].pinned || isdivwl(i); } -/* ins must stay in def blk */ -int -isfixed(Fn *fn, Ins *i) +/* pinned ins that can be eliminated if unused */ +static int +canelim(Ins *i) { - return optab[i->op].ispinned || istrapping(fn, i); + return isload(i->op) || isalloc(i->op) || isdivwl(i); } static uint earlyins(Fn *, Blk *, Ins *); -/* return earlybid of ref def ins */ static uint schedearly(Fn *fn, Ref r) { @@ -39,43 +38,39 @@ schedearly(Fn *fn, Ref r) Blk *b; if (rtype(r) != RTmp) - return 0 /* root/start */; + return 0; t = &fn->tmp[r.val]; if (t->gcmbid != NOBID) - return t->gcmbid; /* already visited/visiting */ + return t->gcmbid; b = fn->rpo[t->bid]; if (t->def) { - /* def is an ins */ assert(b->ins <= t->def && t->def < &b->ins[b->nins]); - t->gcmbid = 0; /* mark visiting root/start blk */ - t->gcmbid = earlyins(fn, b, t->def); /* schedule ins input defs */ + t->gcmbid = 0; /* mark as visiting */ + t->gcmbid = earlyins(fn, b, t->def); } else { - /* def is a phi - it stays in its def blk */ + /* phis do not move */ t->gcmbid = t->bid; } return t->gcmbid; } -/* return deepest domdpth bid of arg defs */ static uint -earlyins(Fn *fn, Blk *b, Ins* i) +earlyins(Fn *fn, Blk *b, Ins *i) { - uint earlybid, arg1earlybid; - - earlybid = schedearly(fn, i->arg[0]); - assert(earlybid != NOBID); - arg1earlybid = schedearly(fn, i->arg[1]); - assert(arg1earlybid != NOBID); - if (fn->rpo[earlybid]->domdpth < fn->rpo[arg1earlybid]->domdpth) { - assert(dom(fn->rpo[earlybid], fn->rpo[arg1earlybid])); - earlybid = arg1earlybid; + uint b0, b1; + + b0 = schedearly(fn, i->arg[0]); + assert(b0 != NOBID); + b1 = schedearly(fn, i->arg[1]); + assert(b1 != NOBID); + if (fn->rpo[b0]->depth < fn->rpo[b1]->depth) { + assert(dom(fn->rpo[b0], fn->rpo[b1])); + b0 = b1; } - - /* fixed ins remain in their defining blk */ - return isfixed(fn, i) ? b->id : earlybid; + return pinned(i) ? b->id : b0; } static void @@ -87,12 +82,14 @@ earlyblk(Fn *fn, uint bid) uint n; b = fn->rpo[bid]; - for (p = b->phi; p; p = p->link) - for (n = 0; n < p->narg; n++) + for (p=b->phi; p; p=p->link) + for (n=0; nnarg; n++) schedearly(fn, p->arg[n]); - for (i = b->ins; i < &b->ins[b->nins]; i++) - if (isfixed(fn, i)) - earlyins(fn, b, i); + for (i=b->ins; i<&b->ins[b->nins]; i++) + if (pinned(i)) { + schedearly(fn, i->arg[0]); + schedearly(fn, i->arg[1]); + } schedearly(fn, b->jmp.arg); } @@ -154,16 +151,17 @@ schedlate(Fn *fn, Ref r) t = &fn->tmp[r.val]; if (t->visit) - return t->gcmbid; /* already visited/visiting */ + return t->gcmbid; - t->visit = 1; /* mark visiting/visited */ + t->visit = 1; earlybid = t->gcmbid; if (earlybid == NOBID) return NOBID; /* not used */ - t->gcmbid = t->bid; /* t->gcmbid is now latebid */ + /* reuse gcmbid for late bid */ + t->gcmbid = t->bid; latebid = NOBID; - for (u = t->use; u < &t->use[t->nuse]; u++) { + for (u=t->use; u<&t->use[t->nuse]; u++) { assert(u->bid < fn->nblk); b = fn->rpo[u->bid]; switch (u->type) { @@ -182,39 +180,37 @@ schedlate(Fn *fn, Ref r) } latebid = lcabid(fn, latebid, uselatebid); } + /* latebid may be NOBID if the temp is used + * in fixed instructions that may be eliminated + * and are themselves unused transitively */ - /* phis stay in their def blk */ - if (t->def) { - /* allow elimination of unused load/alloc/trapping ins */ - if (latebid == NOBID && canelim(fn, t->def)) - t->gcmbid = NOBID; - /* ... otherwise fixed ins stay in defining blk */ - else if(!isfixed(fn, t->def)) - t->gcmbid = bestbid(fn, earlybid, latebid); - } + if (t->def && !pinned(t->def)) + t->gcmbid = bestbid(fn, earlybid, latebid); + /* else, keep the early one */ + /* now, gcmbid is the best bid */ return t->gcmbid; } -/* return lca bid of uses, or NOBID if no active uses */ +/* returns lca bid of uses or NOBID if + * the definition can be eliminated */ static uint lateins(Fn *fn, Blk *b, Ins *i, Ref r) { uint latebid; - assert(i->op == Onop || req(i->arg[0], r) || req(i->arg[1], r)); - if (i->op == Onop) - return NOBID; /* already eliminated */ - assert(b->ins <= i && i < &b->ins[b->nins]); + assert(req(i->arg[0], r) || req(i->arg[1], r)); latebid = schedlate(fn, i->to); - /* allow elimination of unused load/alloc/trapping ins */ - if (latebid == NOBID) - if (canelim(fn, i)) - return NOBID; - /* ... otherwise fixed ins stay in defining blk */ - return isfixed(fn, i) ? b->id : latebid; + if (pinned(i)) { + if (latebid == NOBID) + if (canelim(i)) + return NOBID; + return b->id; + } + + return latebid; } static uint @@ -254,17 +250,16 @@ lateblk(Fn *fn, uint bid) Ins *i; b = fn->rpo[bid]; - for (pp = &b->phi; *(pp);) + for (pp=&b->phi; *(pp);) if (schedlate(fn, (*pp)->to) == NOBID) { - /* unused */ (*pp)->narg = 0; /* mark unused */ *pp = (*pp)->link; /* remove phi */ } else pp = &(*pp)->link; - for (i = b->ins; i < &b->ins[b->nins]; i++) - if (isfixed(fn, i)) - lateins(fn, b, i, i->arg[0]); + for (i=b->ins; i<&b->ins[b->nins]; i++) + if (pinned(i)) + schedlate(fn, i->to); } static void @@ -274,7 +269,7 @@ addgcmins(Fn *fn, Ins *vins, uint nins) Tmp *t; Blk *b; - for (i = vins; i < &vins[nins]; i++) { + for (i=vins; i<&vins[nins]; i++) { assert(rtype(i->to) == RTmp); t = &fn->tmp[i->to.val]; b = fn->rpo[t->gcmbid]; @@ -282,9 +277,10 @@ addgcmins(Fn *fn, Ins *vins, uint nins) } } -/* remove unused instructions */ -/* move instructions to (end of) target block */ -/* use-before-def is fixed up afterwards */ +/* move live instructions to the + * end of their target block; use- + * before-def errors are fixed by + * schedblk */ static void gcmmove(Fn *fn) { @@ -295,13 +291,13 @@ gcmmove(Fn *fn) nins = 0; vins = vnew(nins, sizeof vins[0], PFn); - for (t=&fn->tmp[Tmp0]; t < &fn->tmp[fn->ntmp]; t++) { + for (t=fn->tmp; t<&fn->tmp[fn->ntmp]; t++) { if (t->def == 0) continue; if (t->bid == t->gcmbid) continue; i = t->def; - if (isfixed(fn, i) && !canelim(fn, i)) + if (pinned(i) && !canelim(i)) continue; assert(rtype(i->to) == RTmp); assert(t == &fn->tmp[i->to.val]); @@ -309,176 +305,153 @@ gcmmove(Fn *fn) addins(&vins, &nins, i); *i = (Ins){.op = Onop}; } - addgcmins(fn, vins, nins); } -static void -schedins(Fn *fn, Blk *b, Ins *i0, Ins **pvins, uint *pnins) +/* dfs ordering */ +static Ins * +schedins(Fn *fn, Blk *b, Ins *i, Ins **pvins, uint *pnins) { - Ins *i, *i1; + Ins *i0, *i1; Tmp *t; - uint na; + uint n; - if (i0->op == Onop) - return; - /* arg...call have to stay together */ - /* TODO - sel0...sel1 too */ - for (i1 = i0; isarg(i1->op); i1++) {} - for (i = i0; i <= i1; i++) - for (na = 0; na < 2; na++) { - if (rtype(i->arg[na]) != RTmp) - continue; - t = &fn->tmp[i->arg[na].val]; - /* def in different blk, or phi in this blk */ - if (t->bid != b->id || t->def == 0) + igroup(b, i, &i0, &i1); + for (i=i0; iarg[n]) != RTmp) continue; - /* already scheduled */ - if (t->def->op == Onop) { + t = &fn->tmp[i->arg[n].val]; + if (t->bid != b->id || !t->def) continue; - } schedins(fn, b, t->def, pvins, pnins); } - for (i = i0; i <= i1; i++) { + for (i=i0; iins; i < &b->ins[b->nins]; i++) - schedins(fn, b, i, pvins, &nins); - idup(b, *pvins, nins); -} - -static void -fixub4d(Fn *fn) +schedblk(Fn *fn) { Blk *b; - Ins *vins; // TODO insb + Ins *i, *vins; + uint nins; - vins = vnew(0, sizeof vins[0], PFn); - for (b = fn->start; b; b = b->link) - fixbub4d(fn, b, &vins); + vins = vnew(0, sizeof vins[0], PHeap); + for (b=fn->start; b; b=b->link) { + nins = 0; + for (i=b->ins; i<&b->ins[b->nins];) + i = schedins(fn, b, i, &vins, &nins); + idup(b, vins, nins); + } + vfree(vins); } static int -iskladdcon(Fn *fn, Ins *i, int64_t *v) +cheap(Ins *i) { - if (i->op == Oadd) - if (i->cls == Kl) - if (rtype(i->arg[0]) == RTmp) - if (isconbits(fn, i->arg[1], v)) - return 1; - return 0; -} - -static void -ireassoc(Fn *fn, Blk *b, Ins *i, Ins **pvins, uint *pnins) -{ - Blk *b2; - Tmp *t, *t2; - Use *u; - Ref r2; - int64_t v; int x; - assert(b->ins <= i && i < &b->ins[b->nins]); - if (!iscmp(i->op, &x, &x)) - if (!iskladdcon(fn, i, &v)) - return; - - assert(rtype(i->to) == RTmp); - t = &fn->tmp[i->to.val]; - for (u = t->use; u < &t->use[t->nuse]; u++) { - if (u->type == UPhi) - continue; - b2 = fn->rpo[u->bid]; - addins(pvins, pnins, i); - /* careful, can move fn->tmp */ - r2 = newtmp("rea", t->cls, fn); - t = &fn->tmp[i->to.val]; - t2 = &fn->tmp[r2.val]; - t2->gcmbid = u->bid; - (*pvins)[(*pnins)-1].to = r2; - if (u->type == UIns) { - assert(req(u->u.ins->arg[0], i->to) - || req(u->u.ins->arg[1], i->to)); - if (req(u->u.ins->arg[0], i->to)) - u->u.ins->arg[0] = r2; - if (req(u->u.ins->arg[1], i->to)) - u->u.ins->arg[1] = r2; - } else { - assert(u->type == UJmp); - assert(req(b2->jmp.arg, i->to)); - b2->jmp.arg = r2; - } + if (KBASE(i->cls) != 0) + return 0; + switch (i->op) { + case Oneg: + case Oadd: + case Osub: + case Omul: + case Oand: + case Oor: + case Oxor: + case Osar: + case Oshr: + case Oshl: + return 1; + default: + return iscmp(i->op, &x, &x); } } -/* Redistribute trivial ops to point of use. */ -/* Reduces register pressure. */ -/* needs rpo, use; breaks use */ -void -reassoc(Fn *fn) +static void +sinkref(Fn *fn, Blk *b, Ref *pr) { - Blk *b; - Ins *vins, *i; - uint nins; - - nins = 0; - vins = vnew(nins, sizeof vins[0], PFn); + Ins i; + Tmp *t; + Ref r; - /* identify trivial ins */ - for (b = fn->start; b; b = b->link) { - for (i = b->ins; i < &b->ins[b->nins]; i++) - ireassoc(fn, b, i, &vins, &nins); - } + if (rtype(*pr) != RTmp) + return; + t = &fn->tmp[pr->val]; + if (!t->def + || t->bid == b->id + || pinned(t->def) + || !cheap(t->def)) + return; - addgcmins(fn, vins, nins); + /* sink t->def to b */ + i = *t->def; + r = newtmp("snk", t->cls, fn); + t = 0; /* invalidated */ + *pr = r; + i.to = r; + fn->tmp[r.val].gcmbid = b->id; + emiti(i); + sinkref(fn, b, &i.arg[0]); + sinkref(fn, b, &i.arg[1]); } +/* redistribute trivial ops to point of + * use to reduce register pressure + * requires rpo, use; breaks use + */ static void -cleartmps(Fn *fn) +sink(Fn *fn) { - Tmp *t; + Blk *b; + Ins *i; - for (t=&fn->tmp[Tmp0]; t < &fn->tmp[fn->ntmp]; t++) { - t->visit = 0; - t->gcmbid = NOBID; + for (b=fn->start; b; b=b->link) { + for (i=b->ins; i<&b->ins[b->nins]; i++) + if (isload(i->op)) + sinkref(fn, b, &i->arg[0]); + else if (isstore(i->op)) + sinkref(fn, b, &i->arg[1]); + sinkref(fn, b, &b->jmp.arg); } + addgcmins(fn, curi, &insb[NIns] - curi); } -/* https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf */ -/* require use dom */ -/* maintains rpo pred dom */ -/* breaks use */ +/* requires use dom + * maintains rpo pred dom + * breaks use + */ void gcm(Fn *fn) { + Tmp *t; uint bid; - filldomdpth(fn); + filldepth(fn); fillloop(fn); - cleartmps(fn); + for (t=fn->tmp; t<&fn->tmp[fn->ntmp]; t++) { + t->visit = 0; + t->gcmbid = NOBID; + } for (bid=0; bidnblk; bid++) earlyblk(fn, bid); for (bid=0; bidnblk; bid++) lateblk(fn, bid); gcmmove(fn); - cleartmps(fn); /* filluse() uses visit */ filluse(fn); - reassoc(fn); + curi = &insb[NIns]; + sink(fn); filluse(fn); - fixub4d(fn); + schedblk(fn); if (debug['G']) { fprintf(stderr, "\n> After GCM:\n"); diff --git a/gvn.c b/gvn.c index 20c8edd..1db5bbc 100644 --- a/gvn.c +++ b/gvn.c @@ -1,6 +1,5 @@ #include "all.h" -/* literal constants 0, 1 */ Ref con01[2]; static inline uint @@ -18,121 +17,91 @@ rhash(Ref r) static uint ihash(Ins *i) { - uint a0h, a1h, ah, h; + uint h; - a0h = rhash(i->arg[0]); - a1h = rhash(i->arg[1]); - ah = mix(a0h, a1h); - h = mix(i->cls, i->op); - h = mix(h, ah); + h = mix(i->op, i->cls); + h = mix(h, rhash(i->arg[0])); + h = mix(h, rhash(i->arg[1])); return h; - } static int ieq(Ins *ia, Ins *ib) { - if (ia->cls == ib->cls) if (ia->op == ib->op) + if (ia->cls == ib->cls) if (req(ia->arg[0], ib->arg[0])) if (req(ia->arg[1], ib->arg[1])) return 1; return 0; } -static Ins** gvntbl; +static Ins **gvntbl; static uint gvntbln; -static uint lookupn; -static uint proben; -static uint maxproben; static Ins * -gvndup(Ins *i, int insert) { +gvndup(Ins *i, int insert) +{ uint idx, n; - Ins *i2; - - lookupn++; + Ins *ii; idx = ihash(i) % gvntbln; - for (n = 1;; n++) { - proben++; - if (n > maxproben) - maxproben = n; - i2 = gvntbl[idx]; - if (!i2) + for (n=1;; n++) { + ii = gvntbl[idx]; + if (!ii) break; - if (ieq(i, i2)) - return i2; + if (ieq(i, ii)) + return ii; idx++; if (gvntbln <= idx) idx = 0; } - /* not found */ - if (insert) { + if (insert) gvntbl[idx] = i; - } return 0; } static void -replaceref(Ref *r, Ref r1, Ref r2) -{ - if (req(*r, r1)) - *r = r2; -} - -static void -replacepuse(Phi *p, Ref r1, Ref r2) -{ - Ref *a; - - for (a = p->arg; a < &p->arg[p->narg]; a++) - replaceref(a, r1, r2); -} - -static void -replaceiuse(Ins *i, Ref r1, Ref r2) -{ - replaceref(&i->arg[0], r1, r2); - replaceref(&i->arg[1], r1, r2); -} - -static void -replacejuse(Blk* b, Ref r1, Ref r2) -{ - replaceref(&b->jmp.arg, r1, r2); -} - -static void -replaceuse(Fn *fn, Use* u, Ref r1, Ref r2) +replaceuse(Fn *fn, Use *u, Ref r1, Ref r2) { - Tmp *t2; Blk *b; + Ins *i; + Phi *p; + Ref *pr; + Tmp *t2; + int n; - t2 = rtype(r2) == RTmp ? &fn->tmp[r2.val] : 0; + t2 = 0; + if (rtype(r2) == RTmp) + t2 = &fn->tmp[r2.val]; b = fn->rpo[u->bid]; - switch (u->type) { - case UXXX: - die("unreachable"); - break; case UPhi: - replacepuse(u->u.phi, r1, r2); + p = u->u.phi; + for (pr=p->arg; pr<&p->arg[p->narg]; pr++) + if (req(*pr, r1)) + *pr = r2; if (t2) - adduse(t2, UPhi, b, u->u.phi); + adduse(t2, UPhi, b, p); break; case UIns: - replaceiuse(u->u.ins, r1, r2); + i = u->u.ins; + for (n=0; n<2; n++) + if (req(i->arg[n], r1)) + i->arg[n] = r2; if (t2) - adduse(t2, UIns, b, u->u.ins); + adduse(t2, UIns, b, i); break; case UJmp: - replacejuse(fn->rpo[u->bid], r1, r2); + if (req(b->jmp.arg, r1)) + b->jmp.arg = r2; if (t2) adduse(t2, UJmp, b); break; + case UXXX: + die("unreachable"); } } @@ -144,216 +113,33 @@ replaceuses(Fn *fn, Ref r1, Ref r2) assert(rtype(r1) == RTmp); t1 = &fn->tmp[r1.val]; - - for (u = t1->use; u < &t1->use[t1->nuse]; u++) + for (u=t1->use; u<&t1->use[t1->nuse]; u++) replaceuse(fn, u, r1, r2); - t1->nuse = 0; } static void -rmuse(Fn *fn, Blk *b, uint ty, Ins *i, Phi *p, Ref r, int strict) -{ - Tmp *t; - Use *u; - int found, rm; - - if (rtype(r) != RTmp) - return; - found = 0; - t = &fn->tmp[r.val]; - for (u = t->use; u < &t->use[t->nuse];) { - rm = 0; - if (u->type == ty) { - switch (ty) { - case UXXX: - die("unreachable"); - break; - case UIns: - assert(p == 0); - assert(i); - rm = u->u.ins == i; - break; - case UPhi: - assert(i == 0); - assert(p); - rm = u->u.phi == p; - break; - case UJmp: - assert(i == 0); - assert(p == 0); - rm = u->bid == b->id; - break; - default: - die("unreachable"); - break; - } - } - if (rm) { - found++; - assert(u < &t->use[t->nuse]); - assert(u->bid == b->id); - /* careful - deleting below iterator */ - memcpy(u, u+1, ((t->nuse) - ((u+1)-t->use)) * sizeof u[0]); - t->nuse--; - } else - u++; - } - if (strict) - assert(found); -} - -static int -phieq(Phi *pa, Phi *pb) -{ - uint n; - - assert(pa->narg == pb->narg); - for (n=0; nnarg; n++) { - if (!req(pa->arg[n], phiarg(pb, pa->blk[n]))) - return 0; - } - return 1; -} - -static void -killphi(Fn *fn, Blk *b, Phi **pp, Ref r) -{ - Phi *p; - uint n; - - p = *pp; - replaceuses(fn, p->to, r); - assert(b->npred == p->narg); - for (n = 0; n < p->narg; n++) - rmuse(fn, b, UPhi, 0, p, p->arg[n], 0/*!strict TODO dups*/); - p->narg = 0; /* mark as unused - TODO should not be necessary with rmuse*/ - *pp = p->link; -} - -static void -ifphiopt(Fn *fn, Blk *b) +dedupphi(Fn *fn, Blk *b) { - Blk *ifb, *thenb, *elseb, *tmp; - Phi *p, **pp; - Tmp *t; - Ref argt, argf, r1; - int k, x, caninv; - - /* is b the join blk of an if[-then][-else] graphlet? */ - if (!ifjoin(b, &ifb, &thenb, &elseb)) - return; - - assert(ifb->jmp.type == Jjnz); - assert(ifb->s1 == b || (ifb->s1 == thenb && thenb->s1 == b)); - assert(ifb->s2 == b || (ifb->s2 == elseb && elseb->s1 == b)); - if (!iswu1(fn, ifb->jmp.arg)) - return; /* no opportunity */ - - caninv = 1; - for (p = b->phi; p; p = p->link) - if (req(phiarg(p, elseb), con01[0])) { - caninv = 0; - break; - } - - /* phi bool 1/0 "copy" */ - for (pp = &b->phi; *pp;) { - p = *pp; - assert(p->narg == 2); - argt = phiarg(p, thenb); - argf = phiarg(p, elseb); - /* jnz jmp.arg in phi is 1/0 */ - if (req(argt, ifb->jmp.arg)) { - if (!req(argf, ifb->jmp.arg)) - rmuse(fn, b, UPhi, 0, p, argt, 1/*strict*/); - argt = p->arg[phiargn(p, thenb)] = con01[1]; - } - if (req(argf, ifb->jmp.arg)) { - rmuse(fn, b, UPhi, 0, p, argf, 1/*strict*/); - argf = p->arg[phiargn(p, elseb)] = con01[0]; - } - /* prefer 0 as argf and/or 1 as argt, so try to invert the cmp */ - if (caninv && - ((req(argt, con01[0]) && !req(argf, con01[0])) || - (req(argf, con01[1]) && !req(argt, con01[1])))) { - assert(rtype(ifb->jmp.arg) == RTmp); - t = &fn->tmp[ifb->jmp.arg.val]; - if (t->nuse == 1) - if (t->def && iscmp(t->def->op, &k, &x) && KBASE(k) == 0) { - assert(t->use[0].type == UJmp); - assert(t->bid == ifb->id); - t->def->op = invcmpwl(t->def->op); - tmp = ifb->s1; - ifb->s1 = ifb->s2; - ifb->s2 = tmp; - r1 = argt; argt = argf; argf = r1; - caninv = 0; /* only once */ - } - } - if (req(argt, con01[1]) && req(argf, con01[0])) { - killphi(fn, b, pp, ifb->jmp.arg); - caninv = 0; /* used already */ - continue; - } - pp = &(*pp)->link; - } -} - -/* phi "copy" - singleton or all args identical */ -static void -copyphielim(Fn *fn, Blk *b) { Phi *p, **pp; - uint n; - - for (pp = &b->phi; *pp;) { - p = *pp; - assert(p->narg == b->npred); - for (n = 0; n < p->narg-1; n++) { - if (!req(p->arg[n], p->arg[n+1])) - goto Skip; - } - killphi(fn, b, pp, p->arg[0]); - continue; - Skip:; - pp = &(*pp)->link; - } -} + Ref r; -/* redundant phis */ -static void -dupphielim(Fn *fn, Blk *b) { - Phi *p, *p2, **pp; - - /* O(N^2.M) but meh */ - for (pp = &b->phi; *pp;) { - p = *pp; - assert(p->narg == b->npred); - for (p2 = p->link; p2; p2 = p2->link) { - assert(p2->narg == b->npred); - if (phieq(p, p2)) { - killphi(fn, b, pp, p2->to); - goto Skip; - } - } - pp = &(*pp)->link; - Skip:; + for (pp=&b->phi; (p=*pp);) { + r = phicopyref(fn, b, p); + if (!req(r, R)) { + replaceuses(fn, p->to, r); + p->to = R; + *pp = p->link; + } else + pp = &p->link; } - return; -} - -static void -dedupphis(Fn *fn, Blk *b) { - ifphiopt(fn, b); - copyphielim(fn,b); - dupphielim(fn, b); } static int rcmp(Ref a, Ref b) { if (rtype(a) != rtype(b)) - return rtype(a)-rtype(b); + return rtype(a) - rtype(b); return a.val - b.val; } @@ -364,14 +150,17 @@ normins(Fn *fn, Ins *i) int64_t v; Ref r; - /* truncate constant bits to 32 bits for "s"/w" uses */ - for (n = 0; n < 2; n++) { + /* truncate constant bits to + * 32 bits for s/w uses */ + for (n=0; n<2; n++) { if (!KWIDE(argcls(i, n))) if (isconbits(fn, i->arg[n], &v)) if ((v & 0xffffffff) != v) i->arg[n] = getcon(v & 0xffffffff, fn); } - /* order arg[0] <= arg[1] for commutative ops, preferring RTmp in arg[0] */ + /* order arg[0] <= arg[1] for + * commutative ops, preferring + * RTmp in arg[0] */ if (optab[i->op].commutes) if (rcmp(i->arg[0], i->arg[1]) > 0) { r = i->arg[1]; @@ -380,140 +169,106 @@ normins(Fn *fn, Ins *i) } } -static Ref -negcon(Fn *fn, int cls, Ref r) +static int +negcon(int cls, Con *c) { - int64_t v, v1; - - assert(isconbits(fn, r, &v)); - assert(KBASE(cls) == 0); - v1 = -v; - if (cls == Kw) - v1 = ((int64_t)(-(int32_t)v)) & 0xffffffff; - return getcon(v1, fn); + static Con z = {.type = CBits, .bits.i = 0}; + + return foldint(c, Osub, cls, &z, c); } static void assoccon(Fn *fn, Blk *b, Ins *i1) { Tmp *t2; - Ins *i2, i; - Ref r0, r1, rc; - int op1, op2; - int64_t v1, v2, vc; - - op1 = i1->op; - if (op1 == Osub) - op1 = Oadd; - if (!optab[op1].assoc || KBASE(i1->cls) != 0 || rtype(i1->arg[0]) != RTmp - || !isconbits(fn, i1->arg[1], &v1) || (optab[op1].hasid && v1 == optab[op1].idval)) + Ins *i2; + int op, fail; + Con c, c1, c2; + + op = i1->op; + if (op == Osub) + op = Oadd; + + if (!optab[op].assoc + || KBASE(i1->cls) != 0 + || rtype(i1->arg[0]) != RTmp + || rtype(i1->arg[1]) != RCon) return; + c1 = fn->con[i1->arg[1].val]; + t2 = &fn->tmp[i1->arg[0].val]; if (t2->def == 0) return; i2 = t2->def; - op2 = i2->op; - if (op2 == Osub) - op2 = Oadd; - if (op1 != op2 || rtype(i2->arg[0]) != RTmp || !isconbits(fn, i2->arg[1], &v2)) + + if (op != (i2->op == Osub ? Oadd : i2->op) + || rtype(i2->arg[1]) != RCon) return; + c2 = fn->con[i2->arg[1].val]; + assert(KBASE(i2->cls) == 0); - assert(i1->cls == Kl || i2->cls == Kw); - r0 = i1->arg[1]; - if (i1->op == Osub) - r0 = negcon(fn, i1->cls, r0); - r1 = i2->arg[1]; - if (i2->op == Osub) - r1 = negcon(fn, i2->cls, r1); - i = (Ins){.to = i2->to, .op = op2, .cls = i2->cls, .arg = {r0, r1}}; - rc = foldref(fn, &i); - assert(isconbits(fn, rc, &vc)); - if (op1 == Oadd) { - if (i2->cls == Kw) { - if ((int32_t)vc < 0) { - op1 = Osub; - rc = negcon(fn, Kw, rc); - } - } else { - assert(i2->cls == Kl); - if (vc < 0) { - op1 = Osub; - rc = negcon(fn, Kl, rc); - } - } + assert(KWIDE(i2->cls) >= KWIDE(i1->cls)); + + if (i1->op == Osub && negcon(i1->cls, &c1)) + return; + if (i2->op == Osub && negcon(i2->cls, &c2)) + return; + if (foldint(&c, op, i1->cls, &c1, &c2)) + return; + + if (op == Oadd && c.type == CBits) + if ((i1->cls == Kl && c.bits.i < 0) + || (i1->cls == Kw && (int32_t)c.bits.i < 0)) { + fail = negcon(i1->cls, &c); + assert(fail == 0); + op = Osub; } - i1->op = op1; - rmuse(fn, b, UIns, i1, 0, i1->arg[0], 1/*strict*/); + + i1->op = op; i1->arg[0] = i2->arg[0]; + i1->arg[1] = newcon(&c, fn); adduse(&fn->tmp[i1->arg[0].val], UIns, b, i1); - i1->arg[1] = rc; } static void -killins(Fn *fn, Blk *b, Ins *i, Ref r1, Ref r2) +killins(Fn *fn, Ins *i, Ref r) { - replaceuses(fn, r1, r2); - rmuse(fn, b, UIns, i, 0, i->arg[0], 1/*strict*/); - if (!req(i->arg[0], i->arg[1])) - rmuse(fn, b, UIns, i, 0, i->arg[1], 1/*strict*/); + replaceuses(fn, i->to, r); *i = (Ins){.op = Onop}; } static void -dedupi(Fn *fn, Blk *b, Ins *i) +dedupins(Fn *fn, Blk *b, Ins *i) { - Ref r2; - Ins *i2; - - if (i->op == Onop || i->op == Odbgloc) - return; + Ref r; + Ins *i1; normins(fn, i); - - if (optab[i->op].ispinned) + if (i->op == Onop || pinned(i)) return; - assert(rtype(i->to) == RTmp); - /* merge associative ops with constant arg[1] */ + assert(!req(i->to, R)); assoccon(fn, b, i); - /* effective copy? */ - r2 = copyref(fn, b, i); - if (!req(r2, R)) { - killins(fn, b, i, i->to, r2); + r = copyref(fn, b, i); + if (!req(r, R)) { + killins(fn, i, r); return; } - - /* effective constant? */ - r2 = foldref(fn, i); - if (!req(r2, R)) { - killins(fn, b, i, i->to, r2); + r = foldref(fn, i); + if (!req(r, R)) { + killins(fn, i, r); return; } - - /* do not dedup (trapping) ins that GCM will not move */ - if (isfixed(fn, i)) - return; - - /* duplicate? */ - i2 = gvndup(i, 1); - if (i2) { - killins(fn, b, i, i->to, i2->to); + i1 = gvndup(i, 1); + if (i1) { + killins(fn, i, i1->to); return; } } -static void -dedupins(Fn *fn, Blk *b) -{ - Ins *i; - - for (i = b->ins; i < &b->ins[b->nins]; i++) - dedupi(fn, b, i); -} - int -cmpeq0def(Fn *fn, Ref r, Ref *arg0, int *cls, int *eqval) +cmpeqz(Fn *fn, Ref r, Ref *arg, int *cls, int *eqval) { Ins *i; @@ -522,8 +277,8 @@ cmpeq0def(Fn *fn, Ref r, Ref *arg0, int *cls, int *eqval) i = fn->tmp[r.val].def; if (i) if (optab[i->op].cmpeqwl) - if (req(i->arg[1], con01[0])) { - *arg0 = i->arg[0]; + if (req(i->arg[1], CON_Z)) { + *arg = i->arg[0]; *cls = argcls(i, 0); *eqval = optab[i->op].eqval; return 1; @@ -534,23 +289,25 @@ cmpeq0def(Fn *fn, Ref r, Ref *arg0, int *cls, int *eqval) static int branchdom(Fn *fn, Blk *bif, Blk *bbr1, Blk *bbr2, Blk *b) { - if (bif->jmp.type == Jjnz) - if (b != bif) - if (dom(bbr1, b)) - if (!reachesnotvia(fn, bbr2, b, bif)) + assert(bif->jmp.type == Jjnz); + + if (b != bif + && dom(bbr1, b) + && !reachesnotvia(fn, bbr2, b, bif)) return 1; + return 0; } static int -dom0non0(Fn *fn, Blk *bdom, Blk *b, int *is0) +domzero(Fn *fn, Blk *d, Blk *b, int *z) { - if (branchdom(fn, bdom, bdom->s1, bdom->s2, b)) { - *is0 = 0; + if (branchdom(fn, d, d->s1, d->s2, b)) { + *z = 0; return 1; } - if (branchdom(fn, bdom, bdom->s2, bdom->s1, b)) { - *is0 = 1; + if (branchdom(fn, d, d->s2, d->s1, b)) { + *z = 1; return 1; } return 0; @@ -558,24 +315,25 @@ dom0non0(Fn *fn, Blk *bdom, Blk *b, int *is0) /* infer 0/non-0 value from dominating jnz */ int -is0non0(Fn *fn, Blk *b, Ref r, int cls, int *is0) +zeroval(Fn *fn, Blk *b, Ref r, int cls, int *z) { - Blk *bdom; - Ref arg0; + Blk *d; + Ref arg; int cls1, eqval; - for (bdom = b->idom; bdom; bdom = bdom->idom) { - if (bdom->jmp.type != Jjnz) + for (d=b->idom; d; d=d->idom) { + if (d->jmp.type != Jjnz) continue; - if (cls == Kw) - if (req(r, bdom->jmp.arg)) - if (dom0non0(fn, bdom, b, is0)) + if (req(r, d->jmp.arg) + && cls == Kw + && domzero(fn, d, b, z)) { return 1; - if (cmpeq0def(fn, bdom->jmp.arg, &arg0, &cls1, &eqval)) - if (cls == cls1) - if (req(r, arg0)) - if (dom0non0(fn, bdom, b, is0)) { - *is0 = *is0 ^ eqval; + } + if (cmpeqz(fn, d->jmp.arg, &arg, &cls1, &eqval) + && req(r, arg) + && cls == cls1 + && domzero(fn, d, b, z)) { + *z ^= eqval; return 1; } } @@ -583,19 +341,27 @@ is0non0(Fn *fn, Blk *b, Ref r, int cls, int *is0) } static int -usecls(Use *u, Ref r) +usecls(Use *u, Ref r, int cls) { + int k; + switch (u->type) { - case UXXX: break; case UIns: - /* safe to take arg[0] cls if both args == r, even for store */ + k = Kx; /* widest use */ if (req(u->u.ins->arg[0], r)) - return argcls(u->u.ins, 0); + k = argcls(u->u.ins, 0); if (req(u->u.ins->arg[1], r)) - return argcls(u->u.ins, 1); + if (k == Kx || !KWIDE(k)) + k = argcls(u->u.ins, 1); + return k == Kx ? cls : k; + case UPhi: + if (req(u->u.phi->to, R)) + return cls; /* eliminated */ + return u->u.phi->cls; + case UJmp: + return Kw; + default: break; - case UPhi: return u->u.phi->cls; - case UJmp: return Kw; } die("unreachable"); } @@ -610,136 +376,126 @@ propjnz0(Fn *fn, Blk *bif, Blk *s0, Blk *snon0, Ref r, int cls) if (s0->npred != 1 || rtype(r) != RTmp) return; t = &fn->tmp[r.val]; - for (u = t->use; u < &t->use[t->nuse]; u++) { + for (u=t->use; u<&t->use[t->nuse]; u++) { b = fn->rpo[u->bid]; - if (usecls(u, r) == cls) + /* we may compare an l temp with a w + * comparison; so check that the use + * does not involve high bits */ + if (usecls(u, r, cls) == cls) if (branchdom(fn, bif, s0, snon0, b)) - replaceuse(fn, u, r, con01[0]); + replaceuse(fn, u, r, CON_Z); } } static void -dedupjmp(Fn *fn, Blk *b) { - Blk *s1s2[2]; +dedupjmp(Fn *fn, Blk *b) +{ + Blk **ps; int64_t v; - Ref arg0; - int cls, eqval, is0; + Ref arg; + int cls, eqval, z; if (b->jmp.type != Jjnz) return; - /* propagate jmp arg as 0 thru s2 */ + /* propagate jmp arg as 0 through s2 */ propjnz0(fn, b, b->s2, b->s1, b->jmp.arg, Kw); /* propagate cmp eq/ne 0 def of jmp arg as 0 */ - s1s2[0] = b->s1; s1s2[1] = b->s2; - if (cmpeq0def(fn, b->jmp.arg, &arg0, &cls, &eqval)) - propjnz0(fn, b, s1s2[eqval^1], s1s2[eqval], arg0, cls); + if (cmpeqz(fn, b->jmp.arg, &arg, &cls, &eqval)) { + ps = (Blk*[]){b->s1, b->s2}; + propjnz0(fn, b, ps[eqval^1], ps[eqval], arg, cls); + } /* collapse trivial/constant jnz to jmp */ v = 1; - is0 = 0; + z = 0; if (b->s1 == b->s2 - || isconbits(fn, b->jmp.arg, &v) - || is0non0(fn, b, b->jmp.arg, Kw, &is0)) { - if (v == 0 || is0) + || isconbits(fn, b->jmp.arg, &v) + || zeroval(fn, b, b->jmp.arg, Kw, &z)) { + if (v == 0 || z) b->s1 = b->s2; /* we later move active ins out of dead blks */ b->s2 = 0; b->jmp.type = Jjmp; - rmuse(fn, b, UJmp, 0, 0, b->jmp.arg, 1/*strict*/); b->jmp.arg = R; - return; } } -/* rebuild rpo pred use */ -/* careful not to lose active ins in dead blks */ static void -rebuildcfg(Fn *fn) { - uint n, prevnblk; - Blk **prevrpo; - Blk *b; +rebuildcfg(Fn *fn) +{ + uint n, nblk; + Blk *b, *s, **rpo; Ins *i; - prevnblk = fn->nblk; - prevrpo = emalloc(prevnblk * sizeof prevrpo[0]); - memcpy(prevrpo, fn->rpo, prevnblk * sizeof prevrpo[0]); - - fillcfg(fn); // TODO - OK? - - for (n=0; nid == -1u) { - assert(b != fn->start); - /* blk unreachable after GVN */ - for (i = b->ins; i < &b->ins[b->nins]; i++) - if (i->op != Onop) - if (!optab[i->op].ispinned) - if (gvndup(i, 0) == i) - /* (possibly) active ins - add to start blk */ - addins(&fn->start->ins, &fn->start->nins, i); - } + nblk = fn->nblk; + rpo = emalloc(nblk * sizeof rpo[0]); + memcpy(rpo, fn->rpo, nblk * sizeof rpo[0]); + + fillcfg(fn); + + /* move instructions that were in + * killed blocks and may be active + * in the computation in the start + * block */ + s = fn->start; + for (n=0; nid != -1u) + continue; + /* blk unreachable after GVN */ + assert(b != s); + for (i=b->ins; i<&b->ins[b->nins]; i++) + if (!optab[i->op].pinned) + if (gvndup(i, 0) == i) + addins(&s->ins, &s->nins, i); } - free(prevrpo); + free(rpo); } -/* https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf */ -/* require rpo pred ssa use */ -/* recreates rpo */ -/* breaks pred use dom ssa (GCM fixes ssa) */ +/* requires rpo pred ssa use + * recreates rpo preds + * breaks pred use dom ssa (GCM fixes ssa) + */ void gvn(Fn *fn) { - uint n, nins; Blk *b; Phi *p; - - /* facilitate jnz simplification */ - blkmerge(fn); - fillcfg(fn); - filluse(fn); - filldom(fn); + Ins *i; + uint n, nins; con01[0] = getcon(0, fn); con01[1] = getcon(1, fn); - nins = 0; + /* copy.c uses the visit bit */ for (b=fn->start; b; b=b->link) - for (p = b->phi; p; p = p->link) + for (p=b->phi; p; p=p->link) p->visit = 0; - /* facilitate ext elimination */ fillloop(fn); narrowpars(fn); filluse(fn); ssacheck(fn); - for (b=fn->start; b; b=b->link) + nins = 0; + for (b=fn->start; b; b=b->link) { + b->visit = 0; nins += b->nins; + } gvntbln = nins + nins/2; gvntbl = emalloc(gvntbln * sizeof gvntbl[0]); - lookupn = 0; - proben = 0; - maxproben = 0; - - /* GVN */ - clrbvisit(fn); for (n=0; nnblk; n++) { b = fn->rpo[n]; - dedupphis(fn, b); - dedupins(fn, b); + dedupphi(fn, b); + for (i=b->ins; i<&b->ins[b->nins]; i++) + dedupins(fn, b, i); dedupjmp(fn, b); } - rebuildcfg(fn); - free(gvntbl); gvntbl = 0; - gvntbln = 0; - lookupn = 0; - proben = 0; - maxproben = 0; if (debug['G']) { fprintf(stderr, "\n> After GVN:\n"); diff --git a/load.c b/load.c index bc808c1..bac382c 100644 --- a/load.c +++ b/load.c @@ -380,6 +380,7 @@ def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il) goto Load; p->arg[np] = r1; p->blk[np] = bp; + /* XXX - multiplicity in predecessors!!! */ } if (msk != msks) mask(cls, &r, msk, il); diff --git a/main.c b/main.c index ba3ad0c..7a97f8a 100644 --- a/main.c +++ b/main.c @@ -77,9 +77,6 @@ func(Fn *fn) gvn(fn); fillcfg(fn); filluse(fn); - ifelim(fn); - fillcfg(fn); - filluse(fn); filldom(fn); gcm(fn); filluse(fn); diff --git a/ops.h b/ops.h index 00d62a0..80b0d9b 100644 --- a/ops.h +++ b/ops.h @@ -7,7 +7,7 @@ #endif #ifndef F -#define F(CanFold, HasId, IdVal, Commutes, Associates, Idemp, IsCmpEq, IsCmpLgte, CmpEqVal, IsPinned) +#define F(a,b,c,d,e,f,g,h,i,j) #endif #define T(a,b,c,d,e,f,g,h) { \ @@ -19,7 +19,17 @@ /* PUBLIC OPERATIONS */ /*********************/ -/* Arithmetic and Bits */ +/* can fold */ +/* | has identity */ +/* | | identity value for arg[1] */ +/* | | | commutative */ +/* | | | | associative */ +/* | | | | | idempotent */ +/* | | | | | | c{eq,ne}[wl] */ +/* | | | | | | | c[us][gl][et][wl] */ +/* | | | | | | | | value if = args */ +/* | | | | | | | | | pinned */ +/* Arithmetic and Bits v v v v v v v v v v */ O(add, T(w,l,s,d, w,l,s,d), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1) O(sub, T(w,l,s,d, w,l,s,d), F(1,1,0,0,0,0,0,0,0,0)) X(2,1,0) V(0) O(neg, T(w,l,s,d, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0) diff --git a/parse.c b/parse.c index e2ffccf..7ab3ea5 100644 --- a/parse.c +++ b/parse.c @@ -22,7 +22,7 @@ Op optab[NOp] = { .commutes = co, .assoc = as, \ .idemp = im, \ .cmpeqwl = ic, .cmplgtewl = lg, .eqval = cv, \ - .ispinned = pn + .pinned = pn #define O(op, k, flags) [O##op]={.name = #op, .argcls = k, flags}, #include "ops.h" #undef F @@ -940,7 +940,7 @@ parsefn(Lnk *lnk) curf->mem = vnew(0, sizeof curf->mem[0], PFn); curf->nmem = 0; curf->nblk = nblk; - curf->rpo = 0; + curf->rpo = vnew(nblk, sizeof curf->rpo[0], PFn); for (b=curf->start; b; b=b->link) b->dlink = 0; /* was trashed by findblk() */ for (i=0; itmp; for (t=Tmp0; tntmp; t++) { tmp[t].def = 0; @@ -145,7 +144,7 @@ phiins(Fn *fn) continue; } bszero(u); - k = -1; + k = Kx; bp = be; for (b=fn->start; b; b=b->link) { b->visit = 0; diff --git a/test/copy.ssa b/test/copy.ssa new file mode 100644 index 0000000..5c2a4d0 --- /dev/null +++ b/test/copy.ssa @@ -0,0 +1,15 @@ +export function w $f() { +@start + %x0 =w loadsb $a + # the extension must not be eliminated + # even though the load already extended + %x1 =l extsb %x0 + %c =w ceql %x1, -1 + ret %c +} + +# >>> driver +# char a = -1; +# extern int f(); +# int main() { return !(f() == 1); } +# <<< diff --git a/test/gvn-live-dead.ssa b/test/gvn-live-dead.ssa deleted file mode 100644 index d47f05b..0000000 --- a/test/gvn-live-dead.ssa +++ /dev/null @@ -1,19 +0,0 @@ -export -function w $test(w %p1, w %p2) { -@start -@entry - %t1 =w copy 1 - jnz %t1, @live, @dead1 -@live - %t2 =w add %p1, %p2 - ret %t2 -@dead1 - %t2 =w add %p1, %p2 # live ins in dead blk -@dead2 - jnz %t1, @live, @dead1 -} - -# >>> driver -# extern int test(int p1, int p2); -# int main() { return test(1, 2) != 3; } -# <<< diff --git a/test/gvn1.ssa b/test/gvn1.ssa new file mode 100644 index 0000000..d47f05b --- /dev/null +++ b/test/gvn1.ssa @@ -0,0 +1,19 @@ +export +function w $test(w %p1, w %p2) { +@start +@entry + %t1 =w copy 1 + jnz %t1, @live, @dead1 +@live + %t2 =w add %p1, %p2 + ret %t2 +@dead1 + %t2 =w add %p1, %p2 # live ins in dead blk +@dead2 + jnz %t1, @live, @dead1 +} + +# >>> driver +# extern int test(int p1, int p2); +# int main() { return test(1, 2) != 3; } +# <<< diff --git a/test/gvn2.ssa b/test/gvn2.ssa new file mode 100644 index 0000000..33f9a96 --- /dev/null +++ b/test/gvn2.ssa @@ -0,0 +1,31 @@ +# GVN 0/non-0 inference removes @yesyes, @yesno, @noyes, @nono + +export +function w $test(w %c) { +@start + jnz %c, @yes, @no +@yes + %c0 =w cnew %c, 0 + jnz %c0, @yesyes, @yesno +@yesyes + %rc =w copy 1 + jmp @end +@yesno + %rc =w copy 111 + jmp @end +@no + %c1 =w cnew %c, 0 + jnz %c1, @noyes, @nono +@noyes + %rc =w copy 222 + jmp @end +@nono + %rc =w copy 0 +@end + ret %rc +} + +# >>> driver +# int test(int); +# int main(void) { return test(0); } +# <<< diff --git a/test/non0jnz.ssa b/test/non0jnz.ssa deleted file mode 100644 index 33f9a96..0000000 --- a/test/non0jnz.ssa +++ /dev/null @@ -1,31 +0,0 @@ -# GVN 0/non-0 inference removes @yesyes, @yesno, @noyes, @nono - -export -function w $test(w %c) { -@start - jnz %c, @yes, @no -@yes - %c0 =w cnew %c, 0 - jnz %c0, @yesyes, @yesno -@yesyes - %rc =w copy 1 - jmp @end -@yesno - %rc =w copy 111 - jmp @end -@no - %c1 =w cnew %c, 0 - jnz %c1, @noyes, @nono -@noyes - %rc =w copy 222 - jmp @end -@nono - %rc =w copy 0 -@end - ret %rc -} - -# >>> driver -# int test(int); -# int main(void) { return test(0); } -# <<< diff --git a/util.c b/util.c index 580bc43..3b5c09d 100644 --- a/util.c +++ b/util.c @@ -168,7 +168,7 @@ addbins(Blk *b, Ins **pvins, uint *pnins) { Ins *i; - for (i = b->ins; i < &b->ins[b->nins]; i++) + for (i=b->ins; i<&b->ins[b->nins]; i++) addins(pvins, pnins, i); } @@ -247,23 +247,50 @@ iscmp(int op, int *pk, int *pc) return 1; } -static int INVCMPWL[] = { - /*Oceqw*/Ocnew, /*Ocnew*/Oceqw, - /*Ocsgew*/Ocsltw, /*Ocsgtw*/Ocslew, /*Ocslew*/Ocsgtw, /*Ocsltw*/Ocsgew, - /*Ocugew*/Ocultw, /*Ocugtw*/Oculew, /*Oculew*/Ocugtw, /*Ocultw*/Ocugew, - /*Oceql*/Ocnel, /*Ocnel*/Oceql, - /*Ocsgel*/Ocsltl, /*Ocsgtl*/Ocslel, /*Ocslel*/Ocsgtl, /*Ocsltl*/Ocsgel, - /*Ocugel*/Ocultl, /*Ocugtl*/Oculel, /*Oculel*/Ocugtl, /*Ocultl*/Ocugel, -}; - -int -invcmpwl(int cmp) +void +igroup(Blk *b, Ins *i, Ins **i0, Ins **i1) { - assert(Oceqw <= cmp && cmp <= Ocultl); - return INVCMPWL[cmp - Oceqw]; -} - + Ins *ib, *ie; + ib = b->ins; + ie = ib + b->nins; + switch (i->op) { + case Oblit0: + *i0 = i; + *i1 = i + 2; + return; + case Oblit1: + *i0 = i - 1; + *i1 = i + 1; + return; + case_Opar: + for (; i>ib && ispar((i-1)->op); i--) + ; + *i0 = i; + for (; iop); i++) + ; + *i1 = i; + return; + case Ocall: + case_Oarg: + for (; i>ib && isarg((i-1)->op); i--) + ; + *i0 = i; + for (; iop != Ocall; i++) + ; + assert(i < ie); + *i1 = i + 1; + return; + default: + if (ispar(i->op)) + goto case_Opar; + if (isarg(i->op)) + goto case_Oarg; + *i0 = i; + *i1 = i + 1; + return; + } +} int argcls(Ins *i, int n) @@ -291,12 +318,8 @@ emiti(Ins i) void idup(Blk *b, Ins *s, ulong n) { - if (b->ins) - vgrow(&b->ins, n); - else - b->ins = vnew(n, sizeof(Ins), PFn); - if (n) - memcpy(b->ins, s, n * sizeof(Ins)); + vgrow(&b->ins, n); + icpy(b->ins, s, n); b->nins = n; } @@ -344,6 +367,16 @@ cmpop(int c) return cmptab[c][1]; } +int +cmpwlneg(int op) +{ + if (INRANGE(op, Ocmpw, Ocmpw1)) + return cmpneg(op - Ocmpw) + Ocmpw; + if (INRANGE(op, Ocmpl, Ocmpl1)) + return cmpneg(op - Ocmpl) + Ocmpl; + die("not a wl comparison"); +} + int clsmerge(short *pk, short k) { @@ -379,16 +412,21 @@ phiargn(Phi *p, Blk *b) { uint n; - for (n = 0; n < p->narg; n++) - if (p->blk[n] == b) - return n; - die("unreachable"); + if (p) + for (n=0; nnarg; n++) + if (p->blk[n] == b) + return n; + return -1; } Ref phiarg(Phi *p, Blk *b) { - return p->arg[phiargn(p, b)]; + uint n; + + n = phiargn(p, b); + assert(n != -1u && "block not found"); + return p->arg[n]; } Ref @@ -489,12 +527,6 @@ isconbits(Fn *fn, Ref r, int64_t *v) return 0; } -int -istmpconbits(Fn *fn, Ins *i, int64_t *v) -{ - return rtype(i->arg[0]) == RTmp && isconbits(fn, i->arg[1], v); -} - void salloc(Ref rt, Ref rs, Fn *fn) { -- cgit v1.2.3