diff options
| author | Roland Paterson-Jones <[email protected]> | 2024-06-19 16:48:11 +0200 |
|---|---|---|
| committer | Quentin Carbonneaux <[email protected]> | 2025-03-14 09:58:37 +0100 |
| commit | c2ff93e75e5f6df8e1679120b18f0d5884deab2b (patch) | |
| tree | b0f728874af29ea01e29f0575b4e5d8a3228a252 /test | |
| parent | 9e36cbe4d8f8c9ff3d739ff9ead2a4a4988c0904 (diff) | |
Global Value Numbering / Global Code Motion
More or less as proposed in its ninth iteration with the
addition of a gcmmove() functionality to restore coherent
local schedules.
Changes since RFC 8:
Features:
- generalization of phi 1/0 detection
- collapse linear jmp chains before GVN; simplifies if-graph
detection used in 0/non-0 value inference and if-elim...
- infer 0/non-0 values from dominating blk jnz; eliminates
redundant cmp eq/ne 0 and associated jnz/blocks, for example
redundant null pointer checks (hare codebase likes this)
- remove (emergent) empty if-then-else graphlets between GVN and
GCM; improves GCM instruction placement, particularly cmps.
- merge %addr =l add %addr1, N sequences - reduces tmp count,
register pressure.
- squash consecutive associative ops with constant args, e.g.
t1 = add t, N ... t2 = add t2, M -> t2 = add t, N+M
Bug Fixes:
- remove "cmp eq/ne of non-identical RCon's " in copyref().
RCon's are not guaranteed to be dedup'ed, and symbols can
alias.
Codebase:
- moved some stuff into cfg.c including blkmerge()
- some refactoring in gvn.c
- simplification of reassoc.c - always reassoc all cmp ops
and Kl add %t, N. Better on coremark, smaller codebase.
- minor simplification of movins() - use vins
Testing - standard QBE, cproc, hare, harec, coremark
[still have Rust build issues with latest roland]
Benchmark
- coremark is ~15%+ faster than master
- hare "HARETEST_INCLUDE='slow' make check" ~8% faster
(crypto::sha1::sha1_1gb is biggest obvious win - ~25% faster)
Changes since RFC 7:
Bug fixes:
- remove isbad4gcm() in GVN/GCM - it is unsound due to different state
at GVN vs GCM time; replace with "reassociation" pass after GCM
- fix intra-blk use-before-def after GCM
- prevent GVN from deduping trapping instructions cos GCM will not
move them
- remove cmp eq/ne identical arg copy detection for floating point, it
is not valid for NaN
- fix cges/cged flagged as commutative in ops.h instead of cnes/cned
respectively; just a typo
Minor features:
- copy detection handles cmp le/lt/ge/gt with identical args
- treat (integer) div/rem by non-zero constant as non-trapping
- eliminate add N/sub N pairs in copy detection
- maintain accurate tmp use in GVN; not strictly necessary but enables
interim global state sanity checking
- "reassociation" of trivial constant offset load/store addresses, and
cmp ops with point-of-use in pass after GCM
- normalise commutative op arg order - e.g. op con, tmp -> op tmp, con
to simplify copy detection and GVN instruction dedup
Codebase:
- split out core copy detection and constant folding (back) out into
copy.c, fold.c respectively; gvn.c was getting monolithic
- generic support for instruction moving in ins.c - used by GCM and
reassoc
- new reassociation pass in reassoc.c
- other minor clean-up/refactor
Changes since RFC 6:
- More ext elimination in GVN by examination of def and use bit width
- elimination of redundant and mask by bit width examination
- Incorporation of Song's patch
Changes since RFC 5:
- avoidance of "bad" candidates for GVN/GCM - trivial address offset
calculations, and comparisons
- more copy detection mostly around boolean values
- allow elimination of unused load, alloc, trapping instructions
- detection of trivial boolean v ? 1 : 0 phi patterns
- bug fix for (removal of) "chg" optimisation in ins recreation - it
was missing removal of unused instructions in some cases
ifelim() between GVN and GCM; deeper nopunused()
Diffstat (limited to 'test')
| -rw-r--r-- | test/_gcm1.ssa | 48 | ||||
| -rw-r--r-- | test/_gcm2.ssa | 43 | ||||
| -rw-r--r-- | test/_load-elim.ssa | 17 | ||||
| -rw-r--r-- | test/gvn-live-dead.ssa | 19 | ||||
| -rw-r--r-- | test/non0jnz.ssa | 31 |
5 files changed, 158 insertions, 0 deletions
diff --git a/test/_gcm1.ssa b/test/_gcm1.ssa new file mode 100644 index 0000000..719cddb --- /dev/null +++ b/test/_gcm1.ssa @@ -0,0 +1,48 @@ +export +function w $ifmv(w %p1, w %p2, w %p3) { +@start +@entry + %rt =w add %p2, %p3 # gcm moves to @true + %rf =w sub %p2, %p3 # gcm moves to @false + jnz %p1, @true, @false +@true + %r =w copy %rt + jmp @exit +@false + %r =w copy %rf + jmp @exit +@exit + ret %r +} + +export +function w $hoist1(w %p1, w %p2, w %p3) { +@start +@entry + %n =w copy 0 + %i =w copy %p1 +@loop + %base =w add %p2, %p3 # gcm moves to @exit + %i =w sub %i, 1 + %n =w add %n, 1 + jnz %i, @loop, @exit +@exit + %r =w add %base, %n + ret %r +} + +export +function w $hoist2(w %p1, w %p2, w %p3) { +@start +@entry + %n =w copy 0 + %i =w copy %p1 +@loop + %base =w add %p2, %p3 # gcm moves to @entry + %i =w sub %i, 1 + %n =w add %n, %base + jnz %i, @loop, @exit +@exit + %r =w add %base, %n + ret %r +} diff --git a/test/_gcm2.ssa b/test/_gcm2.ssa new file mode 100644 index 0000000..baeb4a4 --- /dev/null +++ b/test/_gcm2.ssa @@ -0,0 +1,43 @@ +# Programs from "Global Code Motion Global Value Numbering" by Cliff Click +# https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf + +# GCM program in Figure 1 + +function w $gcm_test(w %a){ +@start + %i.0 =w copy 0 +@loop + %i.1 =w phi @start %i.0, @loop %i.2 + %b =w add %a, 1 # early schedule moves to @start + %i.2 =w add %i.1, %b + %c =w mul %i.2, 2 # late schedule moves to @end + %x =w csltw %i.2, 10 + jnz %x, @loop, @end +@end + ret %c +} + +# GCM program in "Figure 3 x's definition does not dominate it's use" +# +# SSA contruction will insert phi instruction for "x" in @if_false +# preventing the "add" in @if_false from being moved to @if_true + +function $gcm_test2 (w %a){ +@start + %f =w copy 1 + %x =w copy 0 + %s.0 =w copy 0 +@loop + %s.1 = w phi @start %s.0, @if_false %s.2 + jnz %a, @if, @end +@if + jnz %f, @if_true, @if_false +@if_true + %f =w copy 0 + %x =w add %x, 1 +@if_false + %s.2 =w add %s.1, %x + jmp @loop +@end + ret +} diff --git a/test/_load-elim.ssa b/test/_load-elim.ssa new file mode 100644 index 0000000..faae478 --- /dev/null +++ b/test/_load-elim.ssa @@ -0,0 +1,17 @@ +# GCM can eliminate unused add/load instructions + +export +function w $f(l %p, w %c) { +@start + jnz %c, @true, @false +@true + %p1 =l add %p, 4 + %v1 =w loaduw %p1 + jmp @end +@false + %p2 =l add %p, 4 + %v2 =w loaduw %p2 + jmp @end +@end + ret 0 +} diff --git a/test/gvn-live-dead.ssa b/test/gvn-live-dead.ssa new file mode 100644 index 0000000..d47f05b --- /dev/null +++ b/test/gvn-live-dead.ssa @@ -0,0 +1,19 @@ +export +function w $test(w %p1, w %p2) { +@start +@entry + %t1 =w copy 1 + jnz %t1, @live, @dead1 +@live + %t2 =w add %p1, %p2 + ret %t2 +@dead1 + %t2 =w add %p1, %p2 # live ins in dead blk +@dead2 + jnz %t1, @live, @dead1 +} + +# >>> driver +# extern int test(int p1, int p2); +# int main() { return test(1, 2) != 3; } +# <<< diff --git a/test/non0jnz.ssa b/test/non0jnz.ssa new file mode 100644 index 0000000..33f9a96 --- /dev/null +++ b/test/non0jnz.ssa @@ -0,0 +1,31 @@ +# GVN 0/non-0 inference removes @yesyes, @yesno, @noyes, @nono + +export +function w $test(w %c) { +@start + jnz %c, @yes, @no +@yes + %c0 =w cnew %c, 0 + jnz %c0, @yesyes, @yesno +@yesyes + %rc =w copy 1 + jmp @end +@yesno + %rc =w copy 111 + jmp @end +@no + %c1 =w cnew %c, 0 + jnz %c1, @noyes, @nono +@noyes + %rc =w copy 222 + jmp @end +@nono + %rc =w copy 0 +@end + ret %rc +} + +# >>> driver +# int test(int); +# int main(void) { return test(0); } +# <<< |
