aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuentin Carbonneaux <[email protected]>2024-04-11 10:28:41 +0200
committerQuentin Carbonneaux <[email protected]>2024-04-11 14:14:53 +0200
commit4a809d69b5647aabc8f6a9e22e0bc889f9c779ed (patch)
tree31168efce4b56685c1564ea49b93a56bc27c9569
parent8e8f7064366996a7fcc8d84267958cfaf5ee8194 (diff)
fold scaled offsets in addresses
-rw-r--r--all.h2
-rw-r--r--amd64/emit.c2
-rw-r--r--amd64/isel.c11
-rw-r--r--test/isel4.ssa64
-rw-r--r--util.c11
5 files changed, 79 insertions, 11 deletions
diff --git a/all.h b/all.h
index 8ce0728..bd61510 100644
--- a/all.h
+++ b/all.h
@@ -484,7 +484,7 @@ void chuse(Ref, int, Fn *);
int symeq(Sym, Sym);
Ref newcon(Con *, Fn *);
Ref getcon(int64_t, Fn *);
-int addcon(Con *, Con *);
+int addcon(Con *, Con *, int);
void salloc(Ref, Ref, Fn *);
void dumpts(BSet *, Tmp *, FILE *);
void runmatch(uchar *, Num *, Ref, Ref *);
diff --git a/amd64/emit.c b/amd64/emit.c
index 51d1a5c..9636209 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -293,7 +293,7 @@ Next:
if (rtype(m->base) == RSlot) {
off.type = CBits;
off.bits.i = slot(m->base, fn);
- addcon(&m->offset, &off);
+ addcon(&m->offset, &off, 1);
m->base = TMP(RBP);
}
if (m->offset.type != CUndef)
diff --git a/amd64/isel.c b/amd64/isel.c
index 5f14ba3..ed0ba66 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -692,7 +692,7 @@ anumber(Num *tn, Blk *b, Con *con)
}
static Ref
-adisp(Con *c, Num *tn, Ref r, Fn *fn)
+adisp(Con *c, Num *tn, Ref r, Fn *fn, int s)
{
Ref v[2];
int n;
@@ -704,7 +704,7 @@ adisp(Con *c, Num *tn, Ref r, Fn *fn)
break;
runmatch(matcher[Pob], tn, r, v);
assert(rtype(v[0]) == RCon);
- addcon(c, &fn->con[v[0].val]);
+ addcon(c, &fn->con[v[0].val], s);
r = v[1];
}
return r;
@@ -733,18 +733,18 @@ amatch(Addr *a, Num *tn, Ref r, Fn *fn)
memset(&co, 0, sizeof co);
ro = v[0];
- rb = adisp(&co, tn, v[1], fn);
+ rb = adisp(&co, tn, v[1], fn, 1);
ri = v[2];
rs = v[3];
s = 1;
if (*p < 0 && co.type != CUndef)
if (amatch(a, tn, rb, fn))
- return addcon(&a->offset, &co);
+ return addcon(&a->offset, &co, 1);
if (!req(ro, R)) {
assert(rtype(ro) == RCon);
c = &fn->con[ro.val];
- if (!addcon(&co, c))
+ if (!addcon(&co, c, 1))
return 0;
}
if (!req(rs, R)) {
@@ -753,6 +753,7 @@ amatch(Addr *a, Num *tn, Ref r, Fn *fn)
assert(c->type = CBits);
s = c->bits.i;
}
+ ri = adisp(&co, tn, ri, fn, s);
*a = (Addr){co, rb, ri, s};
if (rtype(ri) == RTmp)
diff --git a/test/isel4.ssa b/test/isel4.ssa
new file mode 100644
index 0000000..874807e
--- /dev/null
+++ b/test/isel4.ssa
@@ -0,0 +1,64 @@
+# amd64 address-folding stress
+
+export function w $f0(l %a, l %b) {
+@start
+ %c =l add %b, 2
+ %d =l mul %c, 4
+ %e =l add %a, %d
+ %q =l loadw %e
+ ret %q
+}
+
+export function w $f1(l %a, l %b) {
+@start
+ %c =l add 1, %b
+ %f =l add %c, 1
+ %d =l mul %f, 4
+ %e =l add %d, %a
+ %q =l loadw %e
+ ret %q
+}
+
+export function w $f2(l %a, l %b) {
+@start
+ %l =l mul %b, 4
+ %d =l add 8, %l
+ %e =l add %a, %d
+ %q =l loadw %e
+ ret %q
+}
+
+# fixme: folding is not good here
+export function w $f3(l %a, l %b) {
+@start
+ %l =l mul %b, 4
+ %d =l add 4, %l
+ %f =l add 4, %d
+ %e =l add %a, %f
+ %q =l loadw %e
+ ret %q
+}
+
+export function w $f4(l %a, l %b) {
+@start
+ %c =l add 1, %b
+ %d =l mul %c, 4
+ %e =l add 4, %d
+ %f =l add %e, %a
+ %q =l loadw %f
+ ret %q
+}
+
+# >>> driver
+# int a[] = {1, 2, 3, 4};
+# typedef int loadf(int *, long long);
+# extern loadf f0, f1, f2, f3, f4;
+# loadf *fns[] = {&f0, &f1, &f2, &f3, &f4, 0};
+# int main() {
+# loadf **f;
+# int n;
+# for (n=1,f=fns; *f; f++,n++)
+# if ((*f)(a, 1) != 4) return n;
+# return 0;
+# }
+# <<<
diff --git a/util.c b/util.c
index b3401f2..2e4b4cc 100644
--- a/util.c
+++ b/util.c
@@ -398,18 +398,21 @@ getcon(int64_t val, Fn *fn)
}
int
-addcon(Con *c0, Con *c1)
+addcon(Con *c0, Con *c1, int m)
{
- if (c0->type == CUndef)
+ if (m != 1 && c1->type == CAddr)
+ return 0;
+ if (c0->type == CUndef) {
*c0 = *c1;
- else {
+ c0->bits.i *= m;
+ } else {
if (c1->type == CAddr) {
if (c0->type == CAddr)
return 0;
c0->type = CAddr;
c0->sym = c1->sym;
}
- c0->bits.i += c1->bits.i;
+ c0->bits.i += c1->bits.i * m;
}
return 1;
}