aboutsummaryrefslogtreecommitdiff
path: root/all.h
diff options
context:
space:
mode:
authorRoland Paterson-Jones <[email protected]>2024-06-19 16:48:11 +0200
committerQuentin Carbonneaux <[email protected]>2025-03-14 09:58:37 +0100
commitc2ff93e75e5f6df8e1679120b18f0d5884deab2b (patch)
treeb0f728874af29ea01e29f0575b4e5d8a3228a252 /all.h
parent9e36cbe4d8f8c9ff3d739ff9ead2a4a4988c0904 (diff)
Global Value Numbering / Global Code Motion
More or less as proposed in its ninth iteration with the addition of a gcmmove() functionality to restore coherent local schedules. Changes since RFC 8: Features: - generalization of phi 1/0 detection - collapse linear jmp chains before GVN; simplifies if-graph detection used in 0/non-0 value inference and if-elim... - infer 0/non-0 values from dominating blk jnz; eliminates redundant cmp eq/ne 0 and associated jnz/blocks, for example redundant null pointer checks (hare codebase likes this) - remove (emergent) empty if-then-else graphlets between GVN and GCM; improves GCM instruction placement, particularly cmps. - merge %addr =l add %addr1, N sequences - reduces tmp count, register pressure. - squash consecutive associative ops with constant args, e.g. t1 = add t, N ... t2 = add t2, M -> t2 = add t, N+M Bug Fixes: - remove "cmp eq/ne of non-identical RCon's " in copyref(). RCon's are not guaranteed to be dedup'ed, and symbols can alias. Codebase: - moved some stuff into cfg.c including blkmerge() - some refactoring in gvn.c - simplification of reassoc.c - always reassoc all cmp ops and Kl add %t, N. Better on coremark, smaller codebase. - minor simplification of movins() - use vins Testing - standard QBE, cproc, hare, harec, coremark [still have Rust build issues with latest roland] Benchmark - coremark is ~15%+ faster than master - hare "HARETEST_INCLUDE='slow' make check" ~8% faster (crypto::sha1::sha1_1gb is biggest obvious win - ~25% faster) Changes since RFC 7: Bug fixes: - remove isbad4gcm() in GVN/GCM - it is unsound due to different state at GVN vs GCM time; replace with "reassociation" pass after GCM - fix intra-blk use-before-def after GCM - prevent GVN from deduping trapping instructions cos GCM will not move them - remove cmp eq/ne identical arg copy detection for floating point, it is not valid for NaN - fix cges/cged flagged as commutative in ops.h instead of cnes/cned respectively; just a typo Minor features: - copy detection handles cmp le/lt/ge/gt with identical args - treat (integer) div/rem by non-zero constant as non-trapping - eliminate add N/sub N pairs in copy detection - maintain accurate tmp use in GVN; not strictly necessary but enables interim global state sanity checking - "reassociation" of trivial constant offset load/store addresses, and cmp ops with point-of-use in pass after GCM - normalise commutative op arg order - e.g. op con, tmp -> op tmp, con to simplify copy detection and GVN instruction dedup Codebase: - split out core copy detection and constant folding (back) out into copy.c, fold.c respectively; gvn.c was getting monolithic - generic support for instruction moving in ins.c - used by GCM and reassoc - new reassociation pass in reassoc.c - other minor clean-up/refactor Changes since RFC 6: - More ext elimination in GVN by examination of def and use bit width - elimination of redundant and mask by bit width examination - Incorporation of Song's patch Changes since RFC 5: - avoidance of "bad" candidates for GVN/GCM - trivial address offset calculations, and comparisons - more copy detection mostly around boolean values - allow elimination of unused load, alloc, trapping instructions - detection of trivial boolean v ? 1 : 0 phi patterns - bug fix for (removal of) "chg" optimisation in ins recreation - it was missing removal of unused instructions in some cases ifelim() between GVN and GCM; deeper nopunused()
Diffstat (limited to 'all.h')
-rw-r--r--all.h77
1 files changed, 70 insertions, 7 deletions
diff --git a/all.h b/all.h
index a5a0163..ceb7520 100644
--- a/all.h
+++ b/all.h
@@ -191,6 +191,7 @@ enum {
#define INRANGE(x, l, u) ((unsigned)(x) - l <= u - l) /* linear in x */
#define isstore(o) INRANGE(o, Ostoreb, Ostored)
#define isload(o) INRANGE(o, Oloadsb, Oload)
+#define isalloc(o) INRANGE(o, Oalloc4, Oalloc16)
#define isext(o) INRANGE(o, Oextsb, Oextuw)
#define ispar(o) INRANGE(o, Opar, Opare)
#define isarg(o) INRANGE(o, Oarg, Oargv)
@@ -214,8 +215,15 @@ struct Op {
char *name;
short argcls[2][4];
uint canfold:1;
- uint hasid:1;
- uint idval:1; /* identity value 0/1 */
+ uint hasid:1; /* op identity value? */
+ uint idval:1; /* identity value 0/1 */
+ uint commutes:1; /* commutative op? */
+ uint assoc:1; /* associative op? */
+ uint idemp:1; /* idempotent op? */
+ uint cmpeqwl:1; /* Kl/Kw cmp eq/ne? */
+ uint cmplgtewl:1; /* Kl/Kw cmp lt/gt/le/ge? */
+ uint eqval:1; /* 1 for eq; 0 for ne */
+ uint ispinned:1; /* GCM pinned op? */
};
struct Ins {
@@ -230,7 +238,8 @@ struct Phi {
Ref *arg;
Blk **blk;
uint narg;
- int cls;
+ short cls;
+ uint visit:1;
Phi *link;
};
@@ -251,6 +260,7 @@ struct Blk {
Blk *idom;
Blk *dom, *dlink;
+ int domdpth;
Blk **fron;
uint nfron;
@@ -344,6 +354,9 @@ struct Tmp {
Wuw
} width;
int visit;
+ uint gcmbid;
+ uint gcminsn; // TODO get rid
+ uint gcmdefinsn; // TODO get rid
};
struct Con {
@@ -450,6 +463,20 @@ struct Dat {
char isstr;
};
+typedef struct InsLoc InsLoc;
+
+struct InsLoc {
+ uint bid;
+ uint insn;
+};
+
+typedef struct InsMov InsMov;
+
+struct InsMov {
+ InsLoc from;
+ InsLoc to;
+};
+
/* main.c */
extern Target T;
extern char debug['Z'+1];
@@ -476,6 +503,7 @@ char *str(uint32_t);
int argcls(Ins *, int);
int isreg(Ref);
int iscmp(int, int *, int *);
+int invcmpwl(int);
void emit(int, int, Ref, Ref, Ref);
void emiti(Ins);
void idup(Blk *, Ins *, ulong);
@@ -484,16 +512,19 @@ int cmpop(int);
int cmpneg(int);
int clsmerge(short *, short);
int phicls(int, Tmp *);
+uint phiargn(Phi *, Blk *);
+Ref phiarg(Phi *, Blk *);
Ref newtmp(char *, int, Fn *);
void chuse(Ref, int, Fn *);
int symeq(Sym, Sym);
Ref newcon(Con *, Fn *);
Ref getcon(int64_t, Fn *);
int addcon(Con *, Con *, int);
+int isconbits(Fn *fn, Ref r, int64_t *v);
+int istmpconbits(Fn *fn, Ins *i, int64_t *v);
void salloc(Ref, Ref, Fn *);
void dumpts(BSet *, Tmp *, FILE *);
void runmatch(uchar *, Num *, Ref, Ref *);
-
void bsinit(BSet *, uint);
void bszero(BSet *);
uint bscount(BSet *);
@@ -525,7 +556,6 @@ void elimsb(Fn *);
/* cfg.c */
Blk *newblk(void);
-void edgedel(Blk *, Blk **);
void fillpreds(Fn *);
void fillcfg(Fn *);
void filldom(Fn *);
@@ -533,8 +563,20 @@ int sdom(Blk *, Blk *);
int dom(Blk *, Blk *);
void fillfron(Fn *);
void loopiter(Fn *, void (*)(Blk *, Blk *));
+void filldomdpth(Fn *);
+Blk *lca(Blk *, Blk *);
void fillloop(Fn *);
void simpljmp(Fn *);
+void replacepreds(Blk *, Blk *, Blk *);
+void killblks(Fn *);
+void blkmerge(Fn *);
+int ifgraph(Blk *, Blk **, Blk **, Blk **);
+int ifjoin(Blk *, Blk **, Blk **, Blk **);
+int emptyblk(Blk *);
+void ifelim(Fn *);
+void clrbvisit(Fn *);
+int reaches(Fn *,Blk *, Blk *);
+int reachesnotvia(Fn *, Blk *, Blk *, Blk *);
/* mem.c */
void promote(Fn *);
@@ -552,15 +594,36 @@ int storesz(Ins *);
void loadopt(Fn *);
/* ssa.c */
+void adduse(Tmp *, int, Blk *, ...);
void filluse(Fn *);
void ssa(Fn *);
void ssacheck(Fn *);
/* copy.c */
-void copy(Fn *);
+int iswu1(Fn *, Ref);
+void narrowpars(Fn *);
+Ref copyref(Fn *, Blk *, Ins *);
/* fold.c */
-void fold(Fn *);
+Ref foldref(Fn *, Ins *);
+
+/* gvn.c */
+extern Ref con01[2];
+int is0non0(Fn *, Blk *, Ref, int, int *);
+void gvn(Fn *);
+
+/* gcm.c */
+int isfixed(Fn *, Ins *);
+void gcm(Fn *);
+
+/* ins.c */
+void addins(Ins **, uint *, Ins *);
+void addbins(Blk *, Ins **, uint *);
+void nopunused(Fn *);
+void movins(Fn *, InsMov *, uint, int);
+
+/* reassoc.c */
+void reassoc(Fn *);
/* simpl.c */
void simpl(Fn *);