diff options
| author | Michael Forney <[email protected]> | 2021-08-22 12:55:02 -0700 |
|---|---|---|
| committer | Quentin Carbonneaux <[email protected]> | 2021-08-29 22:33:04 +0200 |
| commit | 7ac88f5d4874f03d62f48055eded26e9a08e54ac (patch) | |
| tree | 21469dfbb3dfbb1a198fe47061019d43e88e5159 | |
| parent | 804921a3ab463848aa0ffbe495ca542b3789c841 (diff) | |
amd64/isel: fix floating point == and != result with NaN
On x86_64, ucomis[sd] sets ZF=1, PF=0, CF=0 for equal arguments.
However, if the arguments are unordered it sets ZF=1, PF=1, CF=1,
and there is no jump/flag instruction for ZF=1 & PF=0 or ZF=1 & CF=0.
So, in order to correctly implement ceq[sd] on x86_64, we need to
be a bit more creative. There are several options available, depending
on whether the result of ceq[sd] is used with jnz, or with other
instructions, or both.
If the result is used for a conditional jump, both gcc and clang
use a combination of jp and jnz:
ucomisd %xmm1, %xmm0
jp .Lfalse
jnz .Lfalse
...
.Lfalse:
If the result is used in other instructions or return, gcc does the
following for x == y:
ucomisd %xmm1, %xmm0
setnp %al
movzbl %al, %eax
movl $0, %edx
cmovne %edx, %eax
This sets EAX to PF=0, then uses cmovne to clear it if ZF=0. It
also takes care to avoid clobbering the flags register in case the
result is also used for a conditional jump. Implementing this
approach in QBE would require adding an architecture-specific
instruction for cmovne.
In contrast, clang does an additional compare, this time using
cmpeqsd instead of ucomisd:
cmpeqsd %xmm1, %xmm0
movq %xmm0, %rax
andl $1, %rax
The cmpeqsd instruction doas a floating point equality test, setting
XMM0 to all 1s if they are equal and all 0s if they are not. However,
we need the result in a non-XMM register, so it moves the result
back then masks off all but the first bit.
Both of these approaches are a bit awkward to implement in QBE, so
instead, this commit does the following:
ucomisd %xmm1, %xmm0
setz %al
movzbl %al, %eax
setnp %cl
movzbl %cl, %ecx
andl %ecx, %eax
This sets the result by anding the two flags, but has a side effect
of clobbering the flags register. This was a problem in one of my
earlier patches to fix this issue[0], in addition to being more
complex than I'd hoped.
Instead, this commit always leaves the ceq[sd] instruction in the
block, even if the result is only used to control a jump, so that
the above instruction sequence is always used. Then, since we now
have ZF=!(ZF=1 & PF=0) for x == y, or ZF=!(ZF=0 | PF=1) for x != y,
we can use jnz for the jump instruction.
[0] https://git.sr.ht/~sircmpwn/qbe/commit/64833841b18c074a23b4a1254625315e05b86658
| -rw-r--r-- | amd64/isel.c | 26 | ||||
| -rw-r--r-- | test/isel2.ssa | 14 |
2 files changed, 30 insertions, 10 deletions
diff --git a/amd64/isel.c b/amd64/isel.c index 07e6142..607c176 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -344,6 +344,26 @@ Emit: if (isload(i.op)) goto case_Oload; if (iscmp(i.op, &kc, &x)) { + switch (x) { + case NCmpI+Cfeq: + /* zf is set when operands are + * unordered, so we may have to + * check pf + */ + r0 = newtmp("isel", Kw, fn); + r1 = newtmp("isel", Kw, fn); + emit(Oand, Kw, i.to, r0, r1); + emit(Oflagfo, k, r1, R, R); + i.to = r0; + break; + case NCmpI+Cfne: + r0 = newtmp("isel", Kw, fn); + r1 = newtmp("isel", Kw, fn); + emit(Oor, Kw, i.to, r0, r1); + emit(Oflagfuo, k, r1, R, R); + i.to = r0; + break; + } swap = cmpswap(i.arg, x); if (swap) x = cmpop(x); @@ -388,7 +408,7 @@ seljmp(Blk *b, Fn *fn) r = b->jmp.arg; t = &fn->tmp[r.val]; b->jmp.arg = R; - assert(!req(r, R) && rtype(r) != RCon); + assert(rtype(r) == RTmp); if (b->s1 == b->s2) { chuse(r, -1, fn); b->jmp.type = Jjmp; @@ -400,7 +420,9 @@ seljmp(Blk *b, Fn *fn) selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn); /* todo, long jnz */ b->jmp.type = Jjf + Cine; } - else if (iscmp(fi->op, &k, &c)) { + else if (iscmp(fi->op, &k, &c) + && c != NCmpI+Cfeq /* see sel() */ + && c != NCmpI+Cfne) { swap = cmpswap(fi->arg, c); if (swap) c = cmpop(c); diff --git a/test/isel2.ssa b/test/isel2.ssa index 280ceb2..8ca4a24 100644 --- a/test/isel2.ssa +++ b/test/isel2.ssa @@ -1,7 +1,5 @@ # tests that NaN is handled properly by # floating point comparisons -# -# TODO: fix eq[123](NAN, NAN) on amd64 export function w $lt(d %x, d %y) { @start @@ -97,12 +95,12 @@ export function w $ne3(d %x, d %y) { # + !le(0, 1) + !le(0, 0) + le(1, 0) + le(NAN, NAN) # + gt(0, 1) + gt(0, 0) + !gt(1, 0) + gt(NAN, NAN) # + ge(0, 1) + !ge(0, 0) + !ge(1, 0) + ge(NAN, NAN) -# + eq1(0, 1) + !eq1(0, 0) + eq1(1, 0) /*+ eq1(NAN, NAN)*/ -# + eq2(0, 1) + !eq2(0, 0) + eq2(1, 0) /*+ eq2(NAN, NAN)*/ -# + eq3(0, 1) + !eq3(0, 0) + eq3(1, 0) /*+ eq3(NAN, NAN)*/ -# + !ne1(0, 1) + ne1(0, 0) + !ne1(1, 0) /*+ !ne1(NAN, NAN)*/ -# + !ne2(0, 1) + ne2(0, 0) + !ne2(1, 0) /*+ !ne2(NAN, NAN)*/ -# + !ne3(0, 1) + ne3(0, 0) + !ne3(1, 0) /*+ !ne3(NAN, NAN)*/ +# + eq1(0, 1) + !eq1(0, 0) + eq1(1, 0) + eq1(NAN, NAN) +# + eq2(0, 1) + !eq2(0, 0) + eq2(1, 0) + eq2(NAN, NAN) +# + eq3(0, 1) + !eq3(0, 0) + eq3(1, 0) + eq3(NAN, NAN) +# + !ne1(0, 1) + ne1(0, 0) + !ne1(1, 0) + !ne1(NAN, NAN) +# + !ne2(0, 1) + ne2(0, 0) + !ne2(1, 0) + !ne2(NAN, NAN) +# + !ne3(0, 1) + ne3(0, 0) + !ne3(1, 0) + !ne3(NAN, NAN) # ; # } # <<< |
