aboutsummaryrefslogtreecommitdiff
path: root/rega.c
diff options
context:
space:
mode:
authorQuentin Carbonneaux <[email protected]>2016-03-28 12:53:53 -0400
committerQuentin Carbonneaux <[email protected]>2016-03-29 10:10:22 -0400
commitb75cb8388fb9b5f2393443d008bb46c522c5ec9b (patch)
tree25268fe5f71d826ee1f8f0e3a2a82aa68f9bf750 /rega.c
parent1b4943eb1f2a10837f56070bfe604179d0dc10e0 (diff)
new layout, put LICENSE in root
Diffstat (limited to 'rega.c')
-rw-r--r--rega.c598
1 files changed, 598 insertions, 0 deletions
diff --git a/rega.c b/rega.c
new file mode 100644
index 0000000..7f8edcf
--- /dev/null
+++ b/rega.c
@@ -0,0 +1,598 @@
+#include "all.h"
+
+#ifdef TEST_PMOV
+ #undef assert
+ #define assert(x) assert_test(#x, x)
+#endif
+
+typedef struct RMap RMap;
+
+struct RMap {
+ int t[NIReg+NFReg];
+ int r[NIReg+NFReg];
+ BSet b[1];
+ int n;
+};
+
+static bits regu; /* registers used */
+static Tmp *tmp; /* function temporaries */
+static Mem *mem; /* function mem references */
+static struct {
+ Ref src, dst;
+ int cls;
+} *pm; /* parallel move constructed */
+static int cpm, npm; /* capacity and size of pm */
+
+static int *
+hint(int t)
+{
+ return &tmp[phicls(t, tmp)].hint.r;
+}
+
+static void
+sethint(int t, int r)
+{
+ bits m;
+
+ m = tmp[phicls(t, tmp)].hint.m;
+ if (*hint(t) == -1)
+ if (!(BIT(r) & m))
+ *hint(t) = r;
+}
+
+static void
+rcopy(RMap *ma, RMap *mb)
+{
+ memcpy(ma->t, mb->t, sizeof ma->t);
+ memcpy(ma->r, mb->r, sizeof ma->r);
+ bscopy(ma->b, mb->b);
+ ma->n = mb->n;
+}
+
+static int
+rfind(RMap *m, int t)
+{
+ int i;
+
+ for (i=0; i<m->n; i++)
+ if (m->t[i] == t)
+ return m->r[i];
+ return -1;
+}
+
+static Ref
+rref(RMap *m, int t)
+{
+ int r, s;
+
+ r = rfind(m, t);
+ if (r == -1) {
+ s = tmp[t].slot;
+ assert(s != -1 && "should have spilled");
+ return SLOT(s);
+ } else
+ return TMP(r);
+}
+
+static void
+radd(RMap *m, int t, int r)
+{
+ assert((t >= Tmp0 || t == r) && "invalid temporary");
+ assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register");
+ assert(!bshas(m->b, t) && "temporary has mapping");
+ assert(!bshas(m->b, r) && "register already allocated");
+ assert(m->n <= NIReg+NFReg && "too many mappings");
+ bsset(m->b, t);
+ bsset(m->b, r);
+ m->t[m->n] = t;
+ m->r[m->n] = r;
+ m->n++;
+ regu |= BIT(r);
+}
+
+static Ref
+ralloc(RMap *m, int t)
+{
+ bits regs;
+ int r, r0, r1;
+
+ if (t < Tmp0) {
+ assert(bshas(m->b, t));
+ return TMP(t);
+ }
+ if (bshas(m->b, t)) {
+ r = rfind(m, t);
+ assert(r != -1);
+ return TMP(r);
+ }
+ r = *hint(t);
+ if (r == -1 || bshas(m->b, r)) {
+ regs = tmp[phicls(t, tmp)].hint.m;
+ regs |= m->b->t[0];
+ switch (KBASE(tmp[t].cls)) {
+ case 0:
+ r0 = RAX;
+ r1 = RAX + NIReg;
+ break;
+ case 1:
+ r0 = XMM0;
+ r1 = XMM0 + NFReg;
+ break;
+ }
+ for (r=r0; r<r1; r++)
+ if (!(regs & BIT(r)))
+ goto Found;
+ for (r=r0; r<r1; r++)
+ if (!bshas(m->b, r))
+ goto Found;
+ diag("rega: no more regs");
+ }
+Found:
+ radd(m, t, r);
+ sethint(t, r);
+ return TMP(r);
+}
+
+static int
+rfree(RMap *m, int t)
+{
+ int i, r;
+
+ if (!bshas(m->b, t))
+ return -1;
+ for (i=0; m->t[i] != t; i++)
+ assert(i+1 < m->n);
+ r = m->r[i];
+ bsclr(m->b, t);
+ bsclr(m->b, r);
+ m->n--;
+ memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
+ memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
+ return r;
+}
+
+static void
+mdump(RMap *m)
+{
+ int i;
+
+ for (i=0; i<m->n; i++)
+ fprintf(stderr, " (%s, R%d)",
+ tmp[m->t[i]].name,
+ m->r[i]);
+ fprintf(stderr, "\n");
+}
+
+static void
+pmadd(Ref src, Ref dst, int k)
+{
+ if (npm == cpm) {
+ cpm = cpm * 2 + 16;
+ pm = realloc(pm, cpm * sizeof pm[0]);
+ if (!pm)
+ diag("pmadd: out of memory");
+ }
+ pm[npm].src = src;
+ pm[npm].dst = dst;
+ pm[npm].cls = k;
+ npm++;
+}
+
+enum PMStat { ToMove, Moving, Moved };
+
+static Ref
+pmrec(enum PMStat *status, int i, int *k)
+{
+ Ref swp, swp1;
+ int j, k1;
+
+ /* note, this routine might emit
+ * too many large instructions:
+ *
+ * , x -- x
+ * x -- x -- x |
+ * ` x -- x
+ *
+ * if only the first move is wide
+ * the whole cycle will be wide,
+ * this is safe but not necessary
+ */
+
+ if (req(pm[i].src, pm[i].dst))
+ return R;
+ status[i] = Moving;
+ assert(KBASE(*k) == KBASE(pm[i].cls));
+ assert((Kw|1) == Kl && (Ks|1) == Kd);
+ *k |= KWIDE(pm[i].cls); /* see above */
+ swp = R;
+ for (j=0; j<npm; j++) {
+ if (req(pm[j].src, pm[i].dst))
+ switch (status[j]) {
+ case ToMove:
+ k1 = *k;
+ swp1 = pmrec(status, j, &k1);
+ if (!req(swp1, R)) {
+ assert(req(swp, R));
+ swp = swp1;
+ *k = k1;
+ }
+ break;
+ case Moving:
+ assert(req(swp, R));
+ swp = pm[i].dst;
+ break;
+ case Moved:
+ break;
+ }
+ }
+ status[i] = Moved;
+ if (req(swp, R)) {
+ *curi++ = (Ins){OCopy, pm[i].dst, {pm[i].src}, pm[i].cls};
+ return R;
+ } else if (!req(swp, pm[i].src)) {
+ *curi++ = (Ins){OSwap, R, {pm[i].src, pm[i].dst}, *k};
+ return swp;
+ } else
+ return R;
+
+}
+
+static void
+pmgen()
+{
+ int i, k;
+ enum PMStat *status;
+
+ status = alloc(npm * sizeof status[0]);
+ assert(!npm || status[npm-1] == ToMove);
+ curi = insb;
+ for (i=0; i<npm; i++)
+ if (status[i] == ToMove) {
+ k = pm[i].cls;
+ pmrec(status, i, &k);
+ }
+}
+
+static void
+move(int r, Ref to, RMap *m)
+{
+ int n, t, r1;
+
+ r1 = req(to, R) ? -1 : rfree(m, to.val);
+ if (bshas(m->b, r) && r1 != r) {
+ /* r is used and not by to */
+ for (n=0; m->r[n] != r; n++)
+ assert(n+1 < m->n);
+ t = m->t[n];
+ rfree(m, t);
+ bsset(m->b, r);
+ ralloc(m, t);
+ bsclr(m->b, r);
+ }
+ t = req(to, R) ? r : to.val;
+ radd(m, t, r);
+}
+
+static int
+regcpy(Ins *i)
+{
+ return i->op == OCopy && isreg(i->arg[0]);
+}
+
+static Ins *
+dopm(Blk *b, Ins *i, RMap *m)
+{
+ RMap m0;
+ int n, r, r1, t, s;
+ Ins *i0, *i1, *ip, *ir;
+ bits def;
+
+ m0 = *m;
+ i1 = ++i;
+ do {
+ i--;
+ move(i->arg[0].val, i->to, m);
+ } while (i != b->ins && regcpy(i-1));
+ assert(m0.n <= m->n);
+ if (i != b->ins && (i-1)->op == OCall) {
+ def = retregs((i-1)->arg[1], 0);
+ for (r=0; r<NRSave; r++)
+ if (!(BIT(rsave[r]) & def))
+ move(rsave[r], R, m);
+ }
+ for (npm=0, n=0; n<m->n; n++) {
+ t = m->t[n];
+ s = tmp[t].slot;
+ r1 = m->r[n];
+ r = rfind(&m0, t);
+ if (r != -1)
+ pmadd(TMP(r1), TMP(r), tmp[t].cls);
+ else if (s != -1)
+ pmadd(TMP(r1), SLOT(s), tmp[t].cls);
+ }
+ for (ip=i; ip<i1; ip++) {
+ if (!req(ip->to, R))
+ rfree(m, ip->to.val);
+ r = ip->arg[0].val;
+ if (rfind(m, r) == -1)
+ radd(m, r, r);
+ }
+ pmgen();
+#ifdef TEST_PMOV
+ return 0;
+#endif
+ n = b->nins - (i1 - i) + (curi - insb);
+ i0 = alloc(n * sizeof(Ins));
+ ip = icpy(ip = i0, b->ins, i - b->ins);
+ ip = icpy(ir = ip, insb, curi - insb);
+ ip = icpy(ip, i1, &b->ins[b->nins] - i1);
+ b->nins = n;
+ b->ins = i0;
+ return ir;
+}
+
+static int
+prio(Ref r1, Ref r2)
+{
+ /* trivial heuristic to begin with,
+ * later we can use the distance to
+ * the definition instruction
+ */
+ (void) r2;
+ return *hint(r1.val) != -1;
+}
+
+static void
+insert(Ref *r, Ref **rs, int p)
+{
+ int i;
+
+ rs[i = p] = r;
+ while (i-- > 0 && prio(*r, *rs[i])) {
+ rs[i+1] = rs[i];
+ rs[i] = r;
+ }
+}
+
+static void
+doblk(Blk *b, RMap *cur)
+{
+ int x, r, nr;
+ bits rs;
+ Ins *i;
+ Mem *m;
+ Ref *ra[4];
+
+ if (rtype(b->jmp.arg) == RTmp)
+ b->jmp.arg = ralloc(cur, b->jmp.arg.val);
+ else if (rtype(b->jmp.arg) == RACall) {
+ /* add return registers */
+ rs = retregs(b->jmp.arg, 0);
+ for (r=0; rs; rs/=2, r++)
+ if (rs & 1)
+ radd(cur, r, r);
+ }
+ for (i=&b->ins[b->nins]; i!=b->ins;) {
+ switch ((--i)->op) {
+ case OCall:
+ rs = argregs(i->arg[1], 0);
+ for (r=0; r<NRSave; r++)
+ if (!(BIT(rsave[r]) & rs))
+ rfree(cur, rsave[r]);
+ break;
+ case OCopy:
+ if (isreg(i->arg[0])) {
+ i = dopm(b, i, cur);
+ continue;
+ }
+ if (isreg(i->to))
+ if (rtype(i->arg[0]) == RTmp)
+ sethint(i->arg[0].val, i->to.val);
+ /* fall through */
+ default:
+ if (!req(i->to, R)) {
+ assert(rtype(i->to) == RTmp);
+ r = rfree(cur, i->to.val);
+ if (r == -1 && !isreg(i->to)) {
+ *i = (Ins){.op = ONop};
+ continue;
+ }
+ if (i->to.val >= Tmp0)
+ i->to = TMP(r);
+ }
+ break;
+ }
+ for (x=0, nr=0; x<2; x++)
+ switch (rtype(i->arg[x])) {
+ case RAMem:
+ m = &mem[i->arg[x].val & AMask];
+ if (rtype(m->base) == RTmp)
+ insert(&m->base, ra, nr++);
+ if (rtype(m->index) == RTmp)
+ insert(&m->index, ra, nr++);
+ break;
+ case RTmp:
+ insert(&i->arg[x], ra, nr++);
+ break;
+ }
+ for (r=0; r<nr; r++)
+ *ra[r] = ralloc(cur, ra[r]->val);
+ }
+}
+
+/* register allocation
+ * depends on rpo, phi, cost, (and obviously spill)
+ */
+void
+rega(Fn *fn)
+{
+ int j, n, t, r, r1, x, rl[Tmp0];
+ Blk *b, *b1, *s, ***ps, *blist;
+ RMap *end, *beg, cur, old;
+ Ins *i;
+ Phi *p;
+ uint u;
+ Ref src, dst;
+
+ /* 1. setup */
+ regu = 0;
+ tmp = fn->tmp;
+ mem = fn->mem;
+ end = alloc(fn->nblk * sizeof end[0]);
+ beg = alloc(fn->nblk * sizeof beg[0]);
+ for (n=0; n<fn->nblk; n++) {
+ bsinit(end[n].b, fn->ntmp);
+ bsinit(beg[n].b, fn->ntmp);
+ }
+ bsinit(cur.b, fn->ntmp);
+ bsinit(old.b, fn->ntmp);
+
+ for (t=Tmp0; t<fn->ntmp; t++)
+ *hint(t) = -1;
+ for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
+ if (i->op != OCopy || !isreg(i->arg[0]))
+ break;
+ else {
+ assert(rtype(i->to) == RTmp);
+ sethint(i->to.val, i->arg[0].val);
+ }
+
+ /* 2. assign registers following post-order */
+ for (n=fn->nblk-1; n>=0; n--) {
+ b = fn->rpo[n];
+ cur.n = 0;
+ bszero(cur.b);
+ for (x=0; x<2; x++)
+ for (t=Tmp0; t<fn->ntmp; t++) {
+ assert(bshas(b->out, t) ||
+ !bshas(cur.b, t));
+ if (bshas(b->out, t))
+ if (!bshas(cur.b, t))
+ if (x || (r=*hint(t)) != -1)
+ if (x || !bshas(cur.b, r))
+ ralloc(&cur, t);
+ }
+ rcopy(&end[n], &cur);
+ doblk(b, &cur);
+ bscopy(b->in, cur.b);
+ for (p=b->phi; p; p=p->link)
+ if (rtype(p->to) == RTmp) {
+ bsclr(b->in, p->to.val);
+ /* heuristic 0:
+ * if the phi destination has an
+ * argument from a frequent block
+ * that was already allocated to
+ * 'r', use 'r' as the new hint
+ */
+ memset(rl, 0, sizeof rl);
+ for (u=0; u<p->narg; u++) {
+ t = p->arg[u].val;
+ b1 = p->blk[u];
+ if (rtype(p->arg[u]) == RTmp)
+ if ((r=rfind(&end[b1->id], t)) != -1)
+ rl[r] += b1->loop;
+ }
+ for (x=0, j=0; j<Tmp0; j++)
+ if (rl[j] > rl[x])
+ x = j;
+ if (rl[x] >= b->loop)
+ *hint(p->to.val) = x;
+ }
+ if (b->npred > 1) {
+ /* heuristic 1:
+ * attempt to satisfy hints
+ * when it's simple and we have
+ * multiple predecessors
+ */
+ rcopy(&old, &cur);
+ curi = &insb[NIns];
+ for (j=0; j<old.n; j++) {
+ t = old.t[j];
+ r = *hint(t);
+ r1 = rfind(&cur, t);
+ if (r != -1 && r != r1)
+ if (!bshas(cur.b, r)) {
+ rfree(&cur, t);
+ radd(&cur, t, r);
+ x = tmp[t].cls;
+ emit(OCopy, x, TMP(r1), TMP(r), R);
+ }
+ }
+ if ((j = &insb[NIns] - curi)) {
+ b->nins += j;
+ i = alloc(b->nins * sizeof(Ins));
+ icpy(icpy(i, curi, j), b->ins, b->nins-j);
+ b->ins = i;
+ }
+ }
+ rcopy(&beg[n], &cur);
+ }
+ if (debug['R']) {
+ fprintf(stderr, "\n> Register mappings:\n");
+ for (n=0; n<fn->nblk; n++) {
+ b = fn->rpo[n];
+ fprintf(stderr, "\t%-10s beg", b->name);
+ mdump(&beg[n]);
+ fprintf(stderr, "\t end");
+ mdump(&end[n]);
+ }
+ fprintf(stderr, "\n");
+ }
+
+ /* 3. compose glue code */
+ blist = 0;
+ for (b=fn->start;; b=b->link) {
+ ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}};
+ for (; (s=**ps); ps++) {
+ npm = 0;
+ for (p=s->phi; p; p=p->link) {
+ dst = p->to;
+ assert(rtype(dst)==RSlot || rtype(dst)==RTmp);
+ if (rtype(dst) == RTmp) {
+ r = rfind(&beg[s->id], dst.val);
+ if (r == -1)
+ continue;
+ dst = TMP(r);
+ }
+ for (u=0; p->blk[u]!=b; u++)
+ assert(u+1 < p->narg);
+ src = p->arg[u];
+ if (rtype(src) == RTmp)
+ src = rref(&end[b->id], src.val);
+ pmadd(src, dst, p->cls);
+ }
+ for (t=Tmp0; t<fn->ntmp; t++)
+ if (bshas(s->in, t)) {
+ src = rref(&end[b->id], t);
+ dst = rref(&beg[s->id], t);
+ pmadd(src, dst, tmp[t].cls);
+ }
+ pmgen();
+ if (curi == insb)
+ continue;
+ b1 = blknew();
+ b1->loop = (b->loop+s->loop) / 2;
+ b1->link = blist;
+ blist = b1;
+ fn->nblk++;
+ sprintf(b1->name, "%s_%s", b->name, s->name);
+ b1->nins = curi - insb;
+ idup(&b1->ins, insb, b1->nins);
+ b1->jmp.type = JJmp;
+ b1->s1 = s;
+ **ps = b1;
+ }
+ if (!b->link) {
+ b->link = blist;
+ break;
+ }
+ }
+ for (b=fn->start; b; b=b->link)
+ b->phi = 0;
+ fn->reg = regu;
+
+ if (debug['R']) {
+ fprintf(stderr, "\n> After register allocation:\n");
+ printfn(fn, stderr);
+ }
+}