aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoland Paterson-Jones <[email protected]>2024-12-07 10:22:30 +0200
committerQuentin Carbonneaux <[email protected]>2025-03-21 16:02:12 +0100
commit95541ccfb0a63a1737e9d6e5c4ed9dae4530b9e8 (patch)
tree6c28211b4ba244104f3fec89463f0f21e78aa8e4
parent348f2eac90ffc0f3f8fa35e8a211ef8048589cb8 (diff)
Simple Inner Loop Optimzationloopopt
Two simple loop optimizations. 1. Strength reduction of mul[tiplication] by loop induction variable. 2. Hoisting of (address) base into phi where loop induction variable is used only as a base (address) offset. Limited to loops with a single body block, which happily is always innermost loops. This restriction would not be very hard to lift - it would require detecting the set of loop blocks (and ensuring reducibility?) Limited to loop induction variables with 0 initial value and increment of 1 (for mul strength reduction). This limitation is trivial to lift; however all of the cproc/hare[c]/coremark opportunity is with 0/1 loops for mul reduction, and 0 initial value for base-offset opt.
-rw-r--r--Makefile3
-rw-r--r--all.h3
-rw-r--r--loopopt.c236
-rw-r--r--main.c2
4 files changed, 243 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 7acaf35..7bcfcec 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,8 @@ PREFIX = /usr/local
BINDIR = $(PREFIX)/bin
COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \
- copy.o fold.o gvn.o gcm.o simpl.o live.o spill.o rega.o emit.o
+ copy.o fold.o gvn.o gcm.o loopopt.o simpl.o live.o spill.o rega.o \
+ emit.o
AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o
ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o
RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o
diff --git a/all.h b/all.h
index 39a4f7c..a86c85b 100644
--- a/all.h
+++ b/all.h
@@ -595,6 +595,9 @@ void gvn(Fn *);
int pinned(Ins *);
void gcm(Fn *);
+/* loopopt.c */
+void loopopt(Fn *fn);
+
/* simpl.c */
void simpl(Fn *);
diff --git a/loopopt.c b/loopopt.c
new file mode 100644
index 0000000..0592be4
--- /dev/null
+++ b/loopopt.c
@@ -0,0 +1,236 @@
+#include "all.h"
+
+static int
+forloop(Blk *bh, Blk *bb)
+{
+ if (bh->npred == 2)
+ if (bh->jmp.type == Jjnz)
+ if (bb->npred == 1)
+ if (bb->jmp.type == Jjmp)
+ if (bb->jmp.s[0].b == bh)
+ return 1;
+ return 0;
+}
+
+static int
+doloop(Blk *bh)
+{
+ if (bh->npred == 2)
+ if (bh->jmp.type == Jjnz)
+ if (bh->jmp.s[0].b == bh || bh->jmp.s[1].b == bh)
+ return 1;
+ return 0;
+}
+
+static int
+loopvar(Fn *fn, Blk *bh, Blk *bb, uint np, Ref *prl, Ref *pr0, Ref *pri)
+{
+ Blk *bp;
+ Phi *p;
+ Suc *sb, *sp;
+ Tmp *t;
+ Ins *i;
+
+ assert(bh->npred == 2);
+ assert(bh->pred[0] == bb || bh->pred[1] == bb);
+ bp = bh->pred[bh->pred[0] == bb]; /* pre-header */
+ assert(np < bh->nphi);
+ p = &bh->phi[np];
+ if (KBASE(p->cls) != 0)
+ return 0; /* not integer */
+ sp = getsuc(bp, bh);
+ assert(sp->nr == bh->nphi);
+ *pr0 = sp->r[np];
+ sb = getsuc(bb, bh);
+ assert(sb->nr == bh->nphi);
+ *prl = sb->r[np];
+ if (rtype(*prl) != RTmp)
+ return 0; /* loop var must be an incremented tmp */
+ t = &fn->tmp[prl->val];
+ if (t->bid != bb->id || t->def == 0)
+ return 0; /* loop var must be defined by an add ins in the body */
+ i = t->def;
+ if (i->op != Oadd || req(i->arg[0], p->to) == req(i->arg[1], p->to))
+ return 0;
+ *pri = i->arg[req(i->arg[0], p->to)];
+ assert(p->cls == i->cls);
+ if (rtype(*pri) == RTmp) {
+ t = &fn->tmp[pri->val];
+ if (t->bid == bh->id || t->bid == bb->id)
+ return 0;
+ return 1;
+ }
+ else {
+ assert(rtype(*pri) == RCon);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+mulredux(Fn *fn, Blk *bh, Blk *bb, Blk *bp, uint np, Ref rl, Ref r0, Ref ri, Ins **pvins, uint *pnins)
+{
+ Ins *i;
+ Tmp *t;
+ Ref r1, rlop;
+ Phi *p;
+ Suc *sb, *sp;
+
+ /* dead simple case for now... majority case in practice */
+ if (!req(r0, con01[0]) || !req(ri, con01[1]))
+ return;
+
+ assert(!req(rl, R)); /* rl unused */
+ assert(np < bh->nphi);
+ p = &bh->phi[np];
+ for (i = bb->ins; i < &bb->ins[bb->nins]; i++) {
+ /* TODO i->cls != p->cls is not necessary? */
+ if (i->op != Omul || i->cls != p->cls
+ || req(i->arg[0], p->to) == req(i->arg[1], p->to))
+ continue;
+ r1 = i->arg[req(i->arg[0], p->to)];
+ if (rtype(r1) == RTmp) {
+ t = &fn->tmp[r1.val];
+ if (t->bid == bh->id || t->bid == bb->id)
+ continue;
+ } else
+ assert(rtype(r1) == RCon);
+ assert(KBASE(i->cls) == 0);
+
+ rlop = newtmp("lop", i->cls, fn);
+ vgrow(&bh->phi, ++bh->nphi);
+ bh->phi[bh->nphi-1] = (Phi){.to = i->to, .cls = i->cls};
+ sp = getsuc(bp, bh);
+ vgrow(&sp->r, ++sp->nr);
+ sp->r[sp->nr-1] = con01[0];
+ assert(sp->nr == bh->nphi);
+ sb = getsuc(bb, bh);
+ vgrow(&sb->r, ++sb->nr);
+ sb->r[sp->nr-1] = rlop;
+ assert(sb->nr == bh->nphi);
+ addins(pvins, pnins, &(Ins){.to = rlop, .op = Oadd, .cls = i->cls,
+ .arg = {i->to, r1}});
+ *i = (Ins){.op = Onop};
+ }
+}
+
+static void
+addbase(Fn *fn, Blk *bh, Blk *bb, Blk *bp, uint np, Ref rl, Ref r0, Ref ri, Ins **pvins, uint *pnins)
+{
+ Ins *i, *ii, *ib;
+ Tmp *t, *ti, *tb;
+ Use *u;
+ Ref rb;
+ Phi *p;
+ Suc *sp;
+
+ /* dead simple case for now... majority case in practice */
+ if (!req(r0, con01[0]))
+ return;
+
+ assert(!req(ri, R)); /* unused */
+ assert(pvins && pnins); /* unused */
+ assert(np < bh->nphi);
+ p = &bh->phi[np];
+ assert(rtype(p->to) == RTmp);
+ t = &fn->tmp[p->to.val];
+ if (t->nuse != 2)
+ return;
+
+ i = ib = 0;
+ for (u = t->use; u < &t->use[t->nuse]; u++) {
+ if (u->type != UIns)
+ return;
+ i = u->u.ins;
+ if (i->op != Oadd || i->cls != p->cls || u->bid != bb->id)
+ return;
+ assert(req(i->arg[0], p->to) || req(i->arg[1], p->to));
+ if (req(i->to, rl)) {
+ assert(rtype(i->to) == RTmp);
+ ti = &fn->tmp[i->to.val];
+ if (ti->nuse != 1)
+ return;
+ assert(ti->use[0].type == UPhi);
+ assert(ti->use[0].bid == bh->id);
+ assert(ti->use[0].u.p.np == np);
+ assert(ti->use[0].u.p.pbid == bb->id);
+ ii = i;
+ continue;
+ }
+ rb = i->arg[req(i->arg[0], p->to)];
+ if (req(rb, p->to))
+ return;
+ if (rtype(rb) == RTmp) {
+ tb = &fn->tmp[rb.val];
+ if (tb->bid == bh->id || tb->bid == bb->id)
+ return;
+ } else
+ assert(rtype(rb) == RCon);
+ ib = i;
+ }
+ assert(ii);
+ assert(ib);
+ sp = getsuc(bp, bh);
+ assert(sp->nr == bh->nphi);
+ assert(req(sp->r[np], con01[0]));
+ sp->r[np] = rb;
+ assert(req(ii->arg[0], p->to) != req(ii->arg[1], p->to));
+ ii->arg[!req(ii->arg[0], p->to)] = ib->to;
+ p->to = ib->to;
+ *ib = (Ins){.op = Onop};
+}
+
+typedef void optfn_t(Fn *, Blk *, Blk *, Blk *, uint, Ref, Ref, Ref, Ins **, uint *);
+
+static void
+loopvaropt(Fn *fn, optfn_t *optfn)
+{
+ uint bid;
+ Blk *bh, *bb, *bp; /* header, body, pre-header */
+ Phi *p;
+ Ins *vins;
+ uint nins, np;
+ Ref rl, r0, ri; /* loop var, loop var init, loop var inc */
+
+ nins = 0;
+ vins = vnew(nins, sizeof vins[0], PFn); /* TODO use insb */
+
+ for (bid = 0; bid < fn->nblk; bid++) {
+ bh = fn->rpo[bid];
+ if (forloop(bh, bh->jmp.s[0].b))
+ bb = bh->jmp.s[0].b;
+ else if (forloop(bh, bh->jmp.s[1].b))
+ bb = bh->jmp.s[1].b;
+ else if (doloop(bh))
+ bb = bh; /* header and body */
+ else
+ continue;
+ bp = bh->pred[bh->pred[0] == bh];
+ assert(bh->loop > 1);
+ assert(bh->loop == bb->loop);
+ nins = 0;
+ for (np = 0; np < bh->nphi; np++)
+ if (loopvar(fn, bh, bb, np, &rl, &r0, &ri)) {
+ p = &bh->phi[np];
+ assert(KBASE(p->cls) == 0);
+ optfn(fn, bh, bb, bp, np, rl, r0, ri, &vins, &nins);
+ }
+ addnins(&bb->ins, &bb->nins, vins, nins);
+ }
+}
+
+void
+loopopt(Fn *fn)
+{
+ /* encourage simple loops */
+ ifelim(fn);
+ fillcfg(fn);
+ blkmerge(fn);
+ fillcfg(fn);
+ filluse(fn);
+ fillloop(fn);
+
+ loopvaropt(fn, mulredux);
+ filluse(fn);
+ loopvaropt(fn, addbase);
+}
diff --git a/main.c b/main.c
index 7a97f8a..ae73ba1 100644
--- a/main.c
+++ b/main.c
@@ -81,6 +81,8 @@ func(Fn *fn)
gcm(fn);
filluse(fn);
ssacheck(fn);
+ loopopt(fn);
+ filluse(fn);
T.abi1(fn);
simpl(fn);
fillcfg(fn);