aboutsummaryrefslogtreecommitdiff
path: root/amd64
diff options
context:
space:
mode:
authorMichael Forney <[email protected]>2021-08-22 12:55:02 -0700
committerQuentin Carbonneaux <[email protected]>2021-08-29 22:33:04 +0200
commit7ac88f5d4874f03d62f48055eded26e9a08e54ac (patch)
tree21469dfbb3dfbb1a198fe47061019d43e88e5159 /amd64
parent804921a3ab463848aa0ffbe495ca542b3789c841 (diff)
amd64/isel: fix floating point == and != result with NaN
On x86_64, ucomis[sd] sets ZF=1, PF=0, CF=0 for equal arguments. However, if the arguments are unordered it sets ZF=1, PF=1, CF=1, and there is no jump/flag instruction for ZF=1 & PF=0 or ZF=1 & CF=0. So, in order to correctly implement ceq[sd] on x86_64, we need to be a bit more creative. There are several options available, depending on whether the result of ceq[sd] is used with jnz, or with other instructions, or both. If the result is used for a conditional jump, both gcc and clang use a combination of jp and jnz: ucomisd %xmm1, %xmm0 jp .Lfalse jnz .Lfalse ... .Lfalse: If the result is used in other instructions or return, gcc does the following for x == y: ucomisd %xmm1, %xmm0 setnp %al movzbl %al, %eax movl $0, %edx cmovne %edx, %eax This sets EAX to PF=0, then uses cmovne to clear it if ZF=0. It also takes care to avoid clobbering the flags register in case the result is also used for a conditional jump. Implementing this approach in QBE would require adding an architecture-specific instruction for cmovne. In contrast, clang does an additional compare, this time using cmpeqsd instead of ucomisd: cmpeqsd %xmm1, %xmm0 movq %xmm0, %rax andl $1, %rax The cmpeqsd instruction doas a floating point equality test, setting XMM0 to all 1s if they are equal and all 0s if they are not. However, we need the result in a non-XMM register, so it moves the result back then masks off all but the first bit. Both of these approaches are a bit awkward to implement in QBE, so instead, this commit does the following: ucomisd %xmm1, %xmm0 setz %al movzbl %al, %eax setnp %cl movzbl %cl, %ecx andl %ecx, %eax This sets the result by anding the two flags, but has a side effect of clobbering the flags register. This was a problem in one of my earlier patches to fix this issue[0], in addition to being more complex than I'd hoped. Instead, this commit always leaves the ceq[sd] instruction in the block, even if the result is only used to control a jump, so that the above instruction sequence is always used. Then, since we now have ZF=!(ZF=1 & PF=0) for x == y, or ZF=!(ZF=0 | PF=1) for x != y, we can use jnz for the jump instruction. [0] https://git.sr.ht/~sircmpwn/qbe/commit/64833841b18c074a23b4a1254625315e05b86658
Diffstat (limited to 'amd64')
-rw-r--r--amd64/isel.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/amd64/isel.c b/amd64/isel.c
index 07e6142..607c176 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -344,6 +344,26 @@ Emit:
if (isload(i.op))
goto case_Oload;
if (iscmp(i.op, &kc, &x)) {
+ switch (x) {
+ case NCmpI+Cfeq:
+ /* zf is set when operands are
+ * unordered, so we may have to
+ * check pf
+ */
+ r0 = newtmp("isel", Kw, fn);
+ r1 = newtmp("isel", Kw, fn);
+ emit(Oand, Kw, i.to, r0, r1);
+ emit(Oflagfo, k, r1, R, R);
+ i.to = r0;
+ break;
+ case NCmpI+Cfne:
+ r0 = newtmp("isel", Kw, fn);
+ r1 = newtmp("isel", Kw, fn);
+ emit(Oor, Kw, i.to, r0, r1);
+ emit(Oflagfuo, k, r1, R, R);
+ i.to = r0;
+ break;
+ }
swap = cmpswap(i.arg, x);
if (swap)
x = cmpop(x);
@@ -388,7 +408,7 @@ seljmp(Blk *b, Fn *fn)
r = b->jmp.arg;
t = &fn->tmp[r.val];
b->jmp.arg = R;
- assert(!req(r, R) && rtype(r) != RCon);
+ assert(rtype(r) == RTmp);
if (b->s1 == b->s2) {
chuse(r, -1, fn);
b->jmp.type = Jjmp;
@@ -400,7 +420,9 @@ seljmp(Blk *b, Fn *fn)
selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn); /* todo, long jnz */
b->jmp.type = Jjf + Cine;
}
- else if (iscmp(fi->op, &k, &c)) {
+ else if (iscmp(fi->op, &k, &c)
+ && c != NCmpI+Cfeq /* see sel() */
+ && c != NCmpI+Cfne) {
swap = cmpswap(fi->arg, c);
if (swap)
c = cmpop(c);