aboutsummaryrefslogtreecommitdiff
path: root/gcm.c
diff options
context:
space:
mode:
authorRoland Paterson-Jones <[email protected]>2024-06-19 16:48:11 +0200
committerQuentin Carbonneaux <[email protected]>2025-03-14 09:58:37 +0100
commitc2ff93e75e5f6df8e1679120b18f0d5884deab2b (patch)
treeb0f728874af29ea01e29f0575b4e5d8a3228a252 /gcm.c
parent9e36cbe4d8f8c9ff3d739ff9ead2a4a4988c0904 (diff)
Global Value Numbering / Global Code Motion
More or less as proposed in its ninth iteration with the addition of a gcmmove() functionality to restore coherent local schedules. Changes since RFC 8: Features: - generalization of phi 1/0 detection - collapse linear jmp chains before GVN; simplifies if-graph detection used in 0/non-0 value inference and if-elim... - infer 0/non-0 values from dominating blk jnz; eliminates redundant cmp eq/ne 0 and associated jnz/blocks, for example redundant null pointer checks (hare codebase likes this) - remove (emergent) empty if-then-else graphlets between GVN and GCM; improves GCM instruction placement, particularly cmps. - merge %addr =l add %addr1, N sequences - reduces tmp count, register pressure. - squash consecutive associative ops with constant args, e.g. t1 = add t, N ... t2 = add t2, M -> t2 = add t, N+M Bug Fixes: - remove "cmp eq/ne of non-identical RCon's " in copyref(). RCon's are not guaranteed to be dedup'ed, and symbols can alias. Codebase: - moved some stuff into cfg.c including blkmerge() - some refactoring in gvn.c - simplification of reassoc.c - always reassoc all cmp ops and Kl add %t, N. Better on coremark, smaller codebase. - minor simplification of movins() - use vins Testing - standard QBE, cproc, hare, harec, coremark [still have Rust build issues with latest roland] Benchmark - coremark is ~15%+ faster than master - hare "HARETEST_INCLUDE='slow' make check" ~8% faster (crypto::sha1::sha1_1gb is biggest obvious win - ~25% faster) Changes since RFC 7: Bug fixes: - remove isbad4gcm() in GVN/GCM - it is unsound due to different state at GVN vs GCM time; replace with "reassociation" pass after GCM - fix intra-blk use-before-def after GCM - prevent GVN from deduping trapping instructions cos GCM will not move them - remove cmp eq/ne identical arg copy detection for floating point, it is not valid for NaN - fix cges/cged flagged as commutative in ops.h instead of cnes/cned respectively; just a typo Minor features: - copy detection handles cmp le/lt/ge/gt with identical args - treat (integer) div/rem by non-zero constant as non-trapping - eliminate add N/sub N pairs in copy detection - maintain accurate tmp use in GVN; not strictly necessary but enables interim global state sanity checking - "reassociation" of trivial constant offset load/store addresses, and cmp ops with point-of-use in pass after GCM - normalise commutative op arg order - e.g. op con, tmp -> op tmp, con to simplify copy detection and GVN instruction dedup Codebase: - split out core copy detection and constant folding (back) out into copy.c, fold.c respectively; gvn.c was getting monolithic - generic support for instruction moving in ins.c - used by GCM and reassoc - new reassociation pass in reassoc.c - other minor clean-up/refactor Changes since RFC 6: - More ext elimination in GVN by examination of def and use bit width - elimination of redundant and mask by bit width examination - Incorporation of Song's patch Changes since RFC 5: - avoidance of "bad" candidates for GVN/GCM - trivial address offset calculations, and comparisons - more copy detection mostly around boolean values - allow elimination of unused load, alloc, trapping instructions - detection of trivial boolean v ? 1 : 0 phi patterns - bug fix for (removal of) "chg" optimisation in ins recreation - it was missing removal of unused instructions in some cases ifelim() between GVN and GCM; deeper nopunused()
Diffstat (limited to 'gcm.c')
-rw-r--r--gcm.c507
1 files changed, 507 insertions, 0 deletions
diff --git a/gcm.c b/gcm.c
new file mode 100644
index 0000000..515d41f
--- /dev/null
+++ b/gcm.c
@@ -0,0 +1,507 @@
+#include "all.h"
+
+#define NOBID (-1u)
+
+/* ins can trap at runtime */
+static int
+istrapping(Fn *fn, Ins *i)
+{
+ int64_t v;
+
+ if (KBASE(i->cls) == 0)
+ if (INRANGE(i->op, Odiv, Ourem))
+ if (!isconbits(fn, i->arg[1], &v) || v == 0)
+ return 1;
+ return 0;
+}
+
+/* fixed ins that can be eliminated if unused */
+static int
+canelim(Fn *fn, Ins *i)
+{
+ return isload(i->op) || isalloc(i->op) || istrapping(fn, i);
+}
+
+/* ins must stay in def blk */
+int
+isfixed(Fn *fn, Ins *i)
+{
+ return optab[i->op].ispinned || istrapping(fn, i);
+}
+
+static uint earlyins(Fn *, Blk *, Ins *);
+
+/* return earlybid of ref def ins */
+static uint
+schedearly(Fn *fn, Ref r)
+{
+ Tmp *t;
+ Blk *b;
+
+ if (rtype(r) != RTmp)
+ return 0 /* root/start */;
+
+ t = &fn->tmp[r.val];
+ if (t->gcmbid != NOBID)
+ return t->gcmbid; /* already visited/visiting */
+
+ b = fn->rpo[t->bid];
+ if (t->def) {
+ /* def is an ins */
+ assert(b->ins <= t->def && t->def < &b->ins[b->nins]);
+ t->gcmbid = 0; /* mark visiting root/start blk */
+ t->gcmbid = earlyins(fn, b, t->def); /* schedule ins input defs */
+ } else {
+ /* def is a phi - it stays in its def blk */
+ t->gcmbid = t->bid;
+ }
+
+ return t->gcmbid;
+}
+
+/* return deepest domdpth bid of arg defs */
+static uint
+earlyins(Fn *fn, Blk *b, Ins* i)
+{
+ uint earlybid, arg1earlybid;
+
+ earlybid = schedearly(fn, i->arg[0]);
+ assert(earlybid != NOBID);
+ arg1earlybid = schedearly(fn, i->arg[1]);
+ assert(arg1earlybid != NOBID);
+ if (fn->rpo[earlybid]->domdpth < fn->rpo[arg1earlybid]->domdpth) {
+ assert(dom(fn->rpo[earlybid], fn->rpo[arg1earlybid]));
+ earlybid = arg1earlybid;
+ }
+
+ /* fixed ins remain in their defining blk */
+ return isfixed(fn, i) ? b->id : earlybid;
+}
+
+static void
+earlyblk(Fn *fn, uint bid)
+{
+ Blk *b;
+ Phi *p;
+ Ins *i;
+ uint n;
+
+ b = fn->rpo[bid];
+ for (p = b->phi; p; p = p->link)
+ for (n = 0; n < p->narg; n++)
+ schedearly(fn, p->arg[n]);
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ if (isfixed(fn, i))
+ earlyins(fn, b, i);
+ schedearly(fn, b->jmp.arg);
+}
+
+/* least common ancestor in dom tree */
+static uint
+lcabid(Fn *fn, uint bid1, uint bid2)
+{
+ Blk *b;
+
+ if (bid1 == NOBID)
+ return bid2;
+ if (bid2 == NOBID)
+ return bid1;
+
+ b = lca(fn->rpo[bid1], fn->rpo[bid2]);
+ assert(b);
+ return b->id;
+}
+
+static uint
+bestbid(Fn *fn, uint earlybid, uint latebid)
+{
+ Blk *curb, *earlyb, *bestb;
+
+ if (latebid == NOBID)
+ return NOBID; /* unused */
+
+ assert(earlybid != NOBID);
+
+ earlyb = fn->rpo[earlybid];
+ bestb = curb = fn->rpo[latebid];
+ assert(dom(earlyb, curb));
+
+ while (curb != earlyb) {
+ curb = curb->idom;
+ if (curb->loop < bestb->loop)
+ bestb = curb;
+ }
+ return bestb->id;
+}
+
+static uint lateins(Fn *, Blk *, Ins *, Ref r);
+static uint latephi(Fn *, Phi *, Ref r);
+static uint latejmp(Blk *, Ref r);
+
+/* return lca bid of ref uses */
+static uint
+schedlate(Fn *fn, Ref r)
+{
+ Tmp *t;
+ Blk *b;
+ Use *u;
+ uint earlybid;
+ uint latebid;
+ uint uselatebid;
+
+ if (rtype(r) != RTmp)
+ return NOBID;
+
+ t = &fn->tmp[r.val];
+ if (t->visit)
+ return t->gcmbid; /* already visited/visiting */
+
+ t->visit = 1; /* mark visiting/visited */
+ earlybid = t->gcmbid;
+ if (earlybid == NOBID)
+ return NOBID; /* not used */
+ t->gcmbid = t->bid; /* t->gcmbid is now latebid */
+
+ latebid = NOBID;
+ for (u = t->use; u < &t->use[t->nuse]; u++) {
+ assert(u->bid < fn->nblk);
+ b = fn->rpo[u->bid];
+ switch (u->type) {
+ case UXXX:
+ die("unreachable");
+ break;
+ case UPhi:
+ uselatebid = latephi(fn, u->u.phi, r);
+ break;
+ case UIns:
+ uselatebid = lateins(fn, b, u->u.ins, r);
+ break;
+ case UJmp:
+ uselatebid = latejmp(b, r);
+ break;
+ }
+ latebid = lcabid(fn, latebid, uselatebid);
+ }
+
+ /* phis stay in their def blk */
+ if (t->def) {
+ /* allow elimination of unused load/alloc/trapping ins */
+ if (latebid == NOBID && canelim(fn, t->def))
+ t->gcmbid = NOBID;
+ /* ... otherwise fixed ins stay in defining blk */
+ else if(!isfixed(fn, t->def))
+ t->gcmbid = bestbid(fn, earlybid, latebid);
+ }
+
+ return t->gcmbid;
+}
+
+/* return lca bid of uses, or NOBID if no active uses */
+static uint
+lateins(Fn *fn, Blk *b, Ins *i, Ref r)
+{
+ uint latebid;
+
+ assert(i->op == Onop || req(i->arg[0], r) || req(i->arg[1], r));
+ if (i->op == Onop)
+ return NOBID; /* already eliminated */
+
+ assert(b->ins <= i && i < &b->ins[b->nins]);
+
+ latebid = schedlate(fn, i->to);
+ /* allow elimination of unused load/alloc/trapping ins */
+ if (latebid == NOBID)
+ if (canelim(fn, i))
+ return NOBID;
+ /* ... otherwise fixed ins stay in defining blk */
+ return isfixed(fn, i) ? b->id : latebid;
+}
+
+static uint
+latephi(Fn *fn, Phi *p, Ref r)
+{
+ uint n;
+ uint latebid;
+
+ if (!p->narg)
+ return NOBID; /* marked as unused */
+
+ latebid = NOBID;
+ for (n = 0; n < p->narg; n++)
+ if (req(p->arg[n], r))
+ latebid = lcabid(fn, latebid, p->blk[n]->id);
+
+ assert(latebid != NOBID);
+ return latebid;
+}
+
+static uint
+latejmp(Blk *b, Ref r)
+{
+ if (req(b->jmp.arg, R))
+ return NOBID;
+ else {
+ assert(req(b->jmp.arg, r));
+ return b->id;
+ }
+}
+
+static void
+lateblk(Fn *fn, uint bid)
+{
+ Blk *b;
+ Phi **pp;
+ Ins *i;
+
+ b = fn->rpo[bid];
+ for (pp = &b->phi; *(pp);)
+ if (schedlate(fn, (*pp)->to) == NOBID) {
+ /* unused */
+ (*pp)->narg = 0; /* mark unused */
+ *pp = (*pp)->link; /* remove phi */
+ } else
+ pp = &(*pp)->link;
+
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ if (isfixed(fn, i))
+ lateins(fn, b, i, i->arg[0]);
+}
+
+static void
+addgcmins(Fn *fn, Ins *vins, uint nins)
+{
+ Ins *i;
+ Tmp *t;
+ Blk *b;
+
+ for (i = vins; i < &vins[nins]; i++) {
+ assert(rtype(i->to) == RTmp);
+ t = &fn->tmp[i->to.val];
+ b = fn->rpo[t->gcmbid];
+ addins(&b->ins, &b->nins, i);
+ }
+}
+
+/* remove unused instructions */
+/* move instructions to (end of) target block */
+/* use-before-def is fixed up afterwards */
+static void
+gcmmove(Fn *fn)
+{
+ Tmp *t;
+ Ins *vins, *i;
+ uint nins;
+
+ nins = 0;
+ vins = vnew(nins, sizeof vins[0], PFn);
+
+ for (t=&fn->tmp[Tmp0]; t < &fn->tmp[fn->ntmp]; t++) {
+ if (t->def == 0)
+ continue;
+ if (t->bid == t->gcmbid)
+ continue;
+ i = t->def;
+ if (isfixed(fn, i) && !canelim(fn, i))
+ continue;
+ assert(rtype(i->to) == RTmp);
+ assert(t == &fn->tmp[i->to.val]);
+ if (t->gcmbid != NOBID)
+ addins(&vins, &nins, i);
+ *i = (Ins){.op = Onop};
+ }
+
+ addgcmins(fn, vins, nins);
+}
+
+static void
+schedins(Fn *fn, Blk *b, Ins *i0, Ins **pvins, uint *pnins)
+{
+ Ins *i, *i1;
+ Tmp *t;
+ uint na;
+
+ if (i0->op == Onop)
+ return;
+ /* arg...call have to stay together */
+ /* TODO - sel0...sel1 too */
+ for (i1 = i0; isarg(i1->op); i1++) {}
+ for (i = i0; i <= i1; i++)
+ for (na = 0; na < 2; na++) {
+ if (rtype(i->arg[na]) != RTmp)
+ continue;
+ t = &fn->tmp[i->arg[na].val];
+ /* def in different blk, or phi in this blk */
+ if (t->bid != b->id || t->def == 0)
+ continue;
+ /* already scheduled */
+ if (t->def->op == Onop) {
+ continue;
+ }
+ schedins(fn, b, t->def, pvins, pnins);
+ }
+ for (i = i0; i <= i1; i++) {
+ addins(pvins, pnins, i);
+ *i = (Ins){.op = Onop};
+ }
+}
+
+static void
+fixbub4d(Fn *fn, Blk *b, Ins **pvins)
+{
+ Ins *i;
+ uint nins;
+
+ nins = 0;
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ schedins(fn, b, i, pvins, &nins);
+ idup(b, *pvins, nins);
+}
+
+static void
+fixub4d(Fn *fn)
+{
+ Blk *b;
+ Ins *vins; // TODO insb
+
+ vins = vnew(0, sizeof vins[0], PFn);
+ for (b = fn->start; b; b = b->link)
+ fixbub4d(fn, b, &vins);
+}
+
+static int
+iskladdcon(Fn *fn, Ins *i, int64_t *v)
+{
+ if (i->op == Oadd)
+ if (i->cls == Kl)
+ if (rtype(i->arg[0]) == RTmp)
+ if (isconbits(fn, i->arg[1], v))
+ return 1;
+ return 0;
+}
+
+static void
+ireassoc(Fn *fn, Blk *b, Ins *i, Ins **pvins, uint *pnins)
+{
+ Blk *b2;
+ Tmp *t, *t2;
+ Use *u;
+ Ref r2;
+ int64_t v;
+ int x;
+
+ assert(b->ins <= i && i < &b->ins[b->nins]);
+ if (!iscmp(i->op, &x, &x))
+ if (!iskladdcon(fn, i, &v))
+ return;
+
+ assert(rtype(i->to) == RTmp);
+ t = &fn->tmp[i->to.val];
+ for (u = t->use; u < &t->use[t->nuse]; u++) {
+ if (u->type == UPhi)
+ continue;
+ b2 = fn->rpo[u->bid];
+ addins(pvins, pnins, i);
+ /* careful, can move fn->tmp */
+ r2 = newtmp("rea", t->cls, fn);
+ t = &fn->tmp[i->to.val];
+ t2 = &fn->tmp[r2.val];
+ t2->gcmbid = u->bid;
+ (*pvins)[(*pnins)-1].to = r2;
+ if (u->type == UIns) {
+ assert(req(u->u.ins->arg[0], i->to)
+ || req(u->u.ins->arg[1], i->to));
+ if (req(u->u.ins->arg[0], i->to))
+ u->u.ins->arg[0] = r2;
+ if (req(u->u.ins->arg[1], i->to))
+ u->u.ins->arg[1] = r2;
+ } else {
+ assert(u->type == UJmp);
+ assert(req(b2->jmp.arg, i->to));
+ b2->jmp.arg = r2;
+ }
+ }
+}
+
+/* Redistribute trivial ops to point of use. */
+/* Reduces register pressure. */
+/* needs rpo, use; breaks use */
+void
+reassoc(Fn *fn)
+{
+ Blk *b;
+ Ins *vins, *i;
+ uint nins;
+
+ nins = 0;
+ vins = vnew(nins, sizeof vins[0], PFn);
+
+ /* identify trivial ins */
+ for (b = fn->start; b; b = b->link) {
+ for (i = b->ins; i < &b->ins[b->nins]; i++)
+ ireassoc(fn, b, i, &vins, &nins);
+ }
+
+ addgcmins(fn, vins, nins);
+}
+
+static void
+cleartmps(Fn *fn)
+{
+ Tmp *t;
+
+ for (t=&fn->tmp[Tmp0]; t < &fn->tmp[fn->ntmp]; t++) {
+ t->visit = 0;
+ t->gcmbid = NOBID;
+ t->gcminsn = -1u; // TODO - get rid
+ }
+}
+
+/* https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf */
+/* require use dom */
+/* maintains rpo pred dom */
+/* breaks use */
+void
+gcm(Fn *fn)
+{
+ uint bid;
+
+ /* fprintf(stderr, "\n\nBefore gcm:\n\n"); */
+ /* printfn(fn, stderr); */
+
+ filldomdpth(fn);
+ fillloop(fn);
+
+ cleartmps(fn);
+ for (bid=0; bid<fn->nblk; bid++)
+ earlyblk(fn, bid);
+ for (bid=0; bid<fn->nblk; bid++)
+ lateblk(fn, bid);
+
+ /* fprintf(stderr, "\n\nBefore gcmmove/fixub4d:\n\n"); */
+ /* printfn(fn, stderr); */
+
+ gcmmove(fn);
+ cleartmps(fn);
+ /* filluse(fn); */
+ /* fixub4d(fn); */
+
+ /* fprintf(stderr, "\n\nAfter gcmmove/fixub4d:\n\n"); */
+ /* printfn(fn, stderr); */
+
+ //fillcfg(fn);
+ filluse(fn);
+ reassoc(fn);
+
+ /* cleartmps(fn); */
+ filluse(fn);
+ fixub4d(fn);
+
+ /* delete (now) unused ins - already done later??? */
+ filluse(fn);
+ nopunused(fn);
+
+ if (debug['G']) {
+ fprintf(stderr, "\n> After GCM:\n");
+ printfn(fn, stderr);
+ }
+}