aboutsummaryrefslogtreecommitdiff
path: root/ops.h
diff options
context:
space:
mode:
authorRoland Paterson-Jones <[email protected]>2024-06-19 16:48:11 +0200
committerQuentin Carbonneaux <[email protected]>2025-03-14 09:58:37 +0100
commitc2ff93e75e5f6df8e1679120b18f0d5884deab2b (patch)
treeb0f728874af29ea01e29f0575b4e5d8a3228a252 /ops.h
parent9e36cbe4d8f8c9ff3d739ff9ead2a4a4988c0904 (diff)
Global Value Numbering / Global Code Motion
More or less as proposed in its ninth iteration with the addition of a gcmmove() functionality to restore coherent local schedules. Changes since RFC 8: Features: - generalization of phi 1/0 detection - collapse linear jmp chains before GVN; simplifies if-graph detection used in 0/non-0 value inference and if-elim... - infer 0/non-0 values from dominating blk jnz; eliminates redundant cmp eq/ne 0 and associated jnz/blocks, for example redundant null pointer checks (hare codebase likes this) - remove (emergent) empty if-then-else graphlets between GVN and GCM; improves GCM instruction placement, particularly cmps. - merge %addr =l add %addr1, N sequences - reduces tmp count, register pressure. - squash consecutive associative ops with constant args, e.g. t1 = add t, N ... t2 = add t2, M -> t2 = add t, N+M Bug Fixes: - remove "cmp eq/ne of non-identical RCon's " in copyref(). RCon's are not guaranteed to be dedup'ed, and symbols can alias. Codebase: - moved some stuff into cfg.c including blkmerge() - some refactoring in gvn.c - simplification of reassoc.c - always reassoc all cmp ops and Kl add %t, N. Better on coremark, smaller codebase. - minor simplification of movins() - use vins Testing - standard QBE, cproc, hare, harec, coremark [still have Rust build issues with latest roland] Benchmark - coremark is ~15%+ faster than master - hare "HARETEST_INCLUDE='slow' make check" ~8% faster (crypto::sha1::sha1_1gb is biggest obvious win - ~25% faster) Changes since RFC 7: Bug fixes: - remove isbad4gcm() in GVN/GCM - it is unsound due to different state at GVN vs GCM time; replace with "reassociation" pass after GCM - fix intra-blk use-before-def after GCM - prevent GVN from deduping trapping instructions cos GCM will not move them - remove cmp eq/ne identical arg copy detection for floating point, it is not valid for NaN - fix cges/cged flagged as commutative in ops.h instead of cnes/cned respectively; just a typo Minor features: - copy detection handles cmp le/lt/ge/gt with identical args - treat (integer) div/rem by non-zero constant as non-trapping - eliminate add N/sub N pairs in copy detection - maintain accurate tmp use in GVN; not strictly necessary but enables interim global state sanity checking - "reassociation" of trivial constant offset load/store addresses, and cmp ops with point-of-use in pass after GCM - normalise commutative op arg order - e.g. op con, tmp -> op tmp, con to simplify copy detection and GVN instruction dedup Codebase: - split out core copy detection and constant folding (back) out into copy.c, fold.c respectively; gvn.c was getting monolithic - generic support for instruction moving in ins.c - used by GCM and reassoc - new reassociation pass in reassoc.c - other minor clean-up/refactor Changes since RFC 6: - More ext elimination in GVN by examination of def and use bit width - elimination of redundant and mask by bit width examination - Incorporation of Song's patch Changes since RFC 5: - avoidance of "bad" candidates for GVN/GCM - trivial address offset calculations, and comparisons - more copy detection mostly around boolean values - allow elimination of unused load, alloc, trapping instructions - detection of trivial boolean v ? 1 : 0 phi patterns - bug fix for (removal of) "chg" optimisation in ins recreation - it was missing removal of unused instructions in some cases ifelim() between GVN and GCM; deeper nopunused()
Diffstat (limited to 'ops.h')
-rw-r--r--ops.h291
1 files changed, 144 insertions, 147 deletions
diff --git a/ops.h b/ops.h
index beaa6f3..00d62a0 100644
--- a/ops.h
+++ b/ops.h
@@ -6,188 +6,185 @@
#define V(Imm)
#endif
-#ifndef P
- #define P(CanFold, HasId, IdVal)
+#ifndef F
+#define F(CanFold, HasId, IdVal, Commutes, Associates, Idemp, IsCmpEq, IsCmpLgte, CmpEqVal, IsPinned)
#endif
-
#define T(a,b,c,d,e,f,g,h) { \
{[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}, \
{[Kw]=K##e, [Kl]=K##f, [Ks]=K##g, [Kd]=K##h} \
}
-
/*********************/
/* PUBLIC OPERATIONS */
/*********************/
/* Arithmetic and Bits */
-O(add, T(w,l,s,d, w,l,s,d), P(1,1,0)) X(2,1,0) V(1)
-O(sub, T(w,l,s,d, w,l,s,d), P(1,1,0)) X(2,1,0) V(0)
-O(neg, T(w,l,s,d, x,x,x,x), P(1,0,0)) X(1,1,0) V(0)
-O(div, T(w,l,s,d, w,l,s,d), P(1,1,1)) X(0,0,0) V(0)
-O(rem, T(w,l,e,e, w,l,e,e), P(1,0,0)) X(0,0,0) V(0)
-O(udiv, T(w,l,e,e, w,l,e,e), P(1,1,1)) X(0,0,0) V(0)
-O(urem, T(w,l,e,e, w,l,e,e), P(1,0,0)) X(0,0,0) V(0)
-O(mul, T(w,l,s,d, w,l,s,d), P(1,1,1)) X(2,0,0) V(0)
-O(and, T(w,l,e,e, w,l,e,e), P(1,0,0)) X(2,1,0) V(1)
-O(or, T(w,l,e,e, w,l,e,e), P(1,1,0)) X(2,1,0) V(1)
-O(xor, T(w,l,e,e, w,l,e,e), P(1,1,0)) X(2,1,0) V(1)
-O(sar, T(w,l,e,e, w,w,e,e), P(1,1,0)) X(1,1,0) V(1)
-O(shr, T(w,l,e,e, w,w,e,e), P(1,1,0)) X(1,1,0) V(1)
-O(shl, T(w,l,e,e, w,w,e,e), P(1,1,0)) X(1,1,0) V(1)
+O(add, T(w,l,s,d, w,l,s,d), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1)
+O(sub, T(w,l,s,d, w,l,s,d), F(1,1,0,0,0,0,0,0,0,0)) X(2,1,0) V(0)
+O(neg, T(w,l,s,d, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
+O(div, T(w,l,s,d, w,l,s,d), F(1,1,1,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(rem, T(w,l,e,e, w,l,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(udiv, T(w,l,e,e, w,l,e,e), F(1,1,1,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(urem, T(w,l,e,e, w,l,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(mul, T(w,l,s,d, w,l,s,d), F(1,1,1,1,0,0,0,0,0,0)) X(2,0,0) V(0)
+O(and, T(w,l,e,e, w,l,e,e), F(1,0,0,1,1,1,0,0,0,0)) X(2,1,0) V(1)
+O(or, T(w,l,e,e, w,l,e,e), F(1,1,0,1,1,1,0,0,0,0)) X(2,1,0) V(1)
+O(xor, T(w,l,e,e, w,l,e,e), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1)
+O(sar, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
+O(shr, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
+O(shl, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
/* Comparisons */
-O(ceqw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cnew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csgew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csgtw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cslew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csltw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(1)
-O(cugew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cugtw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(culew, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cultw, T(w,w,e,e, w,w,e,e), P(1,0,0)) X(0,1,0) V(1)
-
-O(ceql, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cnel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csgel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csgtl, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cslel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(csltl, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(1)
-O(cugel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cugtl, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(culel, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cultl, T(l,l,e,e, l,l,e,e), P(1,0,0)) X(0,1,0) V(1)
-
-O(ceqs, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cges, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cgts, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cles, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(clts, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cnes, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cos, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cuos, T(s,s,e,e, s,s,e,e), P(1,0,0)) X(0,1,0) V(0)
-
-O(ceqd, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cged, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cgtd, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cled, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cltd, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cned, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cod, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
-O(cuod, T(d,d,e,e, d,d,e,e), P(1,0,0)) X(0,1,0) V(0)
+O(ceqw, T(w,w,e,e, w,w,e,e), F(1,1,1,1,0,0,1,0,1,0)) X(0,1,0) V(0)
+O(cnew, T(w,w,e,e, w,w,e,e), F(1,1,0,1,0,0,1,0,0,0)) X(0,1,0) V(0)
+O(csgew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(csgtw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
+O(cslew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(csltw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
+O(cugew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(cugtw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
+O(culew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(cultw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
+
+O(ceql, T(l,l,e,e, l,l,e,e), F(1,0,0,1,0,0,1,0,1,0)) X(0,1,0) V(0)
+O(cnel, T(l,l,e,e, l,l,e,e), F(1,0,0,1,0,0,1,0,0,0)) X(0,1,0) V(0)
+O(csgel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(csgtl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
+O(cslel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(csltl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
+O(cugel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(cugtl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
+O(culel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
+O(cultl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
+
+O(ceqs, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cges, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cgts, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cles, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(clts, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cnes, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cos, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cuos, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+
+O(ceqd, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cged, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cgtd, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cled, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cltd, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cned, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cod, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
+O(cuod, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
/* Memory */
-O(storeb, T(w,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(storeh, T(w,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(storew, T(w,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(storel, T(l,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(stores, T(s,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(stored, T(d,e,e,e, m,e,e,e), P(0,0,0)) X(0,0,1) V(0)
-
-O(loadsb, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loadub, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loadsh, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loaduh, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loadsw, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(loaduw, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(load, T(m,m,m,m, x,x,x,x), P(0,0,0)) X(0,0,1) V(0)
+O(storeb, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(storeh, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(storew, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(storel, T(l,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(stores, T(s,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(stored, T(d,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+
+O(loadsb, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loadub, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loadsh, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loaduh, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loadsw, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(loaduw, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
+O(load, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
/* Extensions and Truncations */
-O(extsb, T(w,w,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extub, T(w,w,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extsh, T(w,w,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extuh, T(w,w,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extsw, T(e,w,e,e, e,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(extuw, T(e,w,e,e, e,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-
-O(exts, T(e,e,e,s, e,e,e,x), P(1,0,0)) X(0,0,1) V(0)
-O(truncd, T(e,e,d,e, e,e,x,e), P(1,0,0)) X(0,0,1) V(0)
-O(stosi, T(s,s,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(stoui, T(s,s,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(dtosi, T(d,d,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(dtoui, T(d,d,e,e, x,x,e,e), P(1,0,0)) X(0,0,1) V(0)
-O(swtof, T(e,e,w,w, e,e,x,x), P(1,0,0)) X(0,0,1) V(0)
-O(uwtof, T(e,e,w,w, e,e,x,x), P(1,0,0)) X(0,0,1) V(0)
-O(sltof, T(e,e,l,l, e,e,x,x), P(1,0,0)) X(0,0,1) V(0)
-O(ultof, T(e,e,l,l, e,e,x,x), P(1,0,0)) X(0,0,1) V(0)
-O(cast, T(s,d,w,l, x,x,x,x), P(1,0,0)) X(0,0,1) V(0)
+O(extsb, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extub, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extsh, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extuh, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extsw, T(e,w,e,e, e,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(extuw, T(e,w,e,e, e,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+
+O(exts, T(e,e,e,s, e,e,e,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(truncd, T(e,e,d,e, e,e,x,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(stosi, T(s,s,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(stoui, T(s,s,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(dtosi, T(d,d,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(dtoui, T(d,d,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(swtof, T(e,e,w,w, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(uwtof, T(e,e,w,w, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(sltof, T(e,e,l,l, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(ultof, T(e,e,l,l, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(cast, T(s,d,w,l, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
/* Stack Allocation */
-O(alloc4, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(alloc8, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(alloc16, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
+O(alloc4, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(alloc8, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(alloc16, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
/* Variadic Function Helpers */
-O(vaarg, T(m,m,m,m, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(vastart, T(m,e,e,e, x,e,e,e), P(0,0,0)) X(0,0,0) V(0)
+O(vaarg, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(vastart, T(m,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
-O(copy, T(w,l,s,d, x,x,x,x), P(0,0,0)) X(0,0,1) V(0)
+O(copy, T(w,l,s,d, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
/* Debug */
-O(dbgloc, T(w,e,e,e, w,e,e,e), P(0,0,0)) X(0,0,1) V(0)
+O(dbgloc, T(w,e,e,e, w,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
/****************************************/
/* INTERNAL OPERATIONS (keep nop first) */
/****************************************/
/* Miscellaneous and Architecture-Specific Operations */
-O(nop, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,1) V(0)
-O(addr, T(m,m,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(blit0, T(m,e,e,e, m,e,e,e), P(0,0,0)) X(0,1,0) V(0)
-O(blit1, T(w,e,e,e, x,e,e,e), P(0,0,0)) X(0,1,0) V(0)
-O(swap, T(w,l,s,d, w,l,s,d), P(0,0,0)) X(1,0,0) V(0)
-O(sign, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(salloc, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(xidiv, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(1,0,0) V(0)
-O(xdiv, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(1,0,0) V(0)
-O(xcmp, T(w,l,s,d, w,l,s,d), P(0,0,0)) X(1,1,0) V(0)
-O(xtest, T(w,l,e,e, w,l,e,e), P(0,0,0)) X(1,1,0) V(0)
-O(acmp, T(w,l,e,e, w,l,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(acmn, T(w,l,e,e, w,l,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(afcmp, T(e,e,s,d, e,e,s,d), P(0,0,0)) X(0,0,0) V(0)
-O(reqz, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(rnez, T(w,l,e,e, x,x,e,e), P(0,0,0)) X(0,0,0) V(0)
+O(nop, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(addr, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(blit0, T(m,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
+O(blit1, T(w,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
+O(swap, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
+O(sign, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(salloc, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(xidiv, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
+O(xdiv, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
+O(xcmp, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
+O(xtest, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
+O(acmp, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(acmn, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(afcmp, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(reqz, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
+O(rnez, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
/* Arguments, Parameters, and Calls */
-O(par, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(parsb, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(parub, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(parsh, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(paruh, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(parc, T(e,x,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(pare, T(e,x,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(arg, T(w,l,s,d, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(argsb, T(w,e,e,e, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(argub, T(w,e,e,e, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(argsh, T(w,e,e,e, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(arguh, T(w,e,e,e, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(argc, T(e,x,e,e, e,l,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(arge, T(e,l,e,e, e,x,e,e), P(0,0,0)) X(0,0,0) V(0)
-O(argv, T(x,x,x,x, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
-O(call, T(m,m,m,m, x,x,x,x), P(0,0,0)) X(0,0,0) V(0)
+O(par, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(parsb, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(parub, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(parsh, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(paruh, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(parc, T(e,x,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(pare, T(e,x,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(arg, T(w,l,s,d, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argsb, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argub, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argsh, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(arguh, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argc, T(e,x,e,e, e,l,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(arge, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(argv, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
+O(call, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
/* Flags Setting */
-O(flagieq, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagine, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagisge, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagisgt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagisle, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagislt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagiuge, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagiugt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagiule, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagiult, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfeq, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfge, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfgt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfle, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagflt, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfne, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfo, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-O(flagfuo, T(x,x,e,e, x,x,e,e), P(0,0,0)) X(0,0,1) V(0)
-
+O(flagieq, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagine, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagisge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagisgt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagisle, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagislt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagiuge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagiugt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagiule, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagiult, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfeq, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfgt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfle, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagflt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfne, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
+O(flagfuo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
#undef T
#undef X