Browse Source
* NaN-canonicalization without branching on x64
Modify the cranelift pass that performs NaN-canonicalization to avoid
branches on x64. The current implementation uses two branches.
* remove old fcmp case
* Revert "remove old fcmp case"
This reverts commit 48c3712b7e
.
* add filetests
* use old version for riscv
pull/8318/head
Adam Bratschi-Kaye
7 months ago
committed by
GitHub
9 changed files with 789 additions and 10 deletions
@ -0,0 +1,140 @@ |
|||
test compile precise-output |
|||
set enable_nan_canonicalization=true |
|||
target x86_64 sse41 |
|||
|
|||
function %f0(f32x4, f32x4) -> f32x4 { |
|||
block0(v0: f32x4, v1: f32x4): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; addps %xmm0, %xmm1, %xmm0 |
|||
; movl $2143289344, %r10d |
|||
; movd %r10d, %xmm7 |
|||
; shufps $0, %xmm7, const(0), %xmm7 |
|||
; movdqa %xmm0, %xmm1 |
|||
; cmpps $3, %xmm1, %xmm0, %xmm1 |
|||
; movdqa %xmm0, %xmm2 |
|||
; movdqa %xmm1, %xmm0 |
|||
; movdqa %xmm2, %xmm1 |
|||
; pblendvb %xmm1, %xmm7, %xmm1 |
|||
; movdqa %xmm1, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; addps %xmm1, %xmm0 |
|||
; movl $0x7fc00000, %r10d |
|||
; movd %r10d, %xmm7 |
|||
; shufps $0, 0x26(%rip), %xmm7 |
|||
; movdqa %xmm0, %xmm1 |
|||
; cmpunordps %xmm0, %xmm1 |
|||
; movdqa %xmm0, %xmm2 |
|||
; movdqa %xmm1, %xmm0 |
|||
; movdqa %xmm2, %xmm1 |
|||
; pblendvb %xmm0, %xmm7, %xmm1 |
|||
; movdqa %xmm1, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; sarb $0, (%rdi) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
|
|||
function %f1(f64, f64) -> f64 { |
|||
block0(v0: f64, v1: f64): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; addsd %xmm0, %xmm1, %xmm0 |
|||
; movabsq $9221120237041090560, %r9 |
|||
; movq %r9, %xmm1 |
|||
; movdqa %xmm0, %xmm7 |
|||
; cmppd $3, %xmm7, %xmm0, %xmm7 |
|||
; movdqa %xmm0, %xmm5 |
|||
; movdqa %xmm7, %xmm0 |
|||
; pblendvb %xmm5, %xmm1, %xmm5 |
|||
; movdqa %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; addsd %xmm1, %xmm0 |
|||
; movabsq $0x7ff8000000000000, %r9 |
|||
; movq %r9, %xmm1 |
|||
; movdqa %xmm0, %xmm7 |
|||
; cmpunordpd %xmm0, %xmm7 |
|||
; movdqa %xmm0, %xmm5 |
|||
; movdqa %xmm7, %xmm0 |
|||
; pblendvb %xmm0, %xmm1, %xmm5 |
|||
; movdqa %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %f1(f32, f32) -> f32 { |
|||
block0(v0: f32, v1: f32): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; addss %xmm0, %xmm1, %xmm0 |
|||
; movl $2143289344, %r9d |
|||
; movd %r9d, %xmm1 |
|||
; movdqa %xmm0, %xmm7 |
|||
; cmpps $3, %xmm7, %xmm0, %xmm7 |
|||
; movdqa %xmm0, %xmm5 |
|||
; movdqa %xmm7, %xmm0 |
|||
; pblendvb %xmm5, %xmm1, %xmm5 |
|||
; movdqa %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; addss %xmm1, %xmm0 |
|||
; movl $0x7fc00000, %r9d |
|||
; movd %r9d, %xmm1 |
|||
; movdqa %xmm0, %xmm7 |
|||
; cmpunordps %xmm0, %xmm7 |
|||
; movdqa %xmm0, %xmm5 |
|||
; movdqa %xmm7, %xmm0 |
|||
; pblendvb %xmm0, %xmm1, %xmm5 |
|||
; movdqa %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
@ -0,0 +1,160 @@ |
|||
test compile precise-output |
|||
set enable_nan_canonicalization=true |
|||
target riscv64 has_v |
|||
|
|||
function %f0(f32x4, f32x4) -> f32x4 { |
|||
block0(v0: f32x4, v1: f32x4): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; addi sp,sp,-16 |
|||
; sd ra,8(sp) |
|||
; sd fp,0(sp) |
|||
; mv fp,sp |
|||
; block0: |
|||
; vle8.v v9,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) |
|||
; vle8.v v11,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) |
|||
; vfadd.vv v13,v9,v11 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; lui a1,523264 |
|||
; fmv.w.x fa2,a1 |
|||
; vfmv.v.f v14,fa2 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmfne.vv v10,v13,v13 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmfne.vv v12,v13,v13 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmor.mm v0,v10,v12 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmerge.vvm v8,v13,v14,v0.t #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) |
|||
; ld ra,8(sp) |
|||
; ld fp,0(sp) |
|||
; addi sp,sp,16 |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; addi sp, sp, -0x10 |
|||
; sd ra, 8(sp) |
|||
; sd s0, 0(sp) |
|||
; mv s0, sp |
|||
; block1: ; offset 0x10 |
|||
; .byte 0x57, 0x70, 0x08, 0xcc |
|||
; addi t6, s0, 0x10 |
|||
; .byte 0x87, 0x84, 0x0f, 0x02 |
|||
; addi t6, s0, 0x20 |
|||
; .byte 0x87, 0x85, 0x0f, 0x02 |
|||
; .byte 0x57, 0x70, 0x02, 0xcd |
|||
; .byte 0xd7, 0x96, 0x95, 0x02 |
|||
; lui a1, 0x7fc00 |
|||
; fmv.w.x fa2, a1 |
|||
; .byte 0x57, 0x57, 0x06, 0x5e |
|||
; .byte 0x57, 0x95, 0xd6, 0x72 |
|||
; .byte 0x57, 0x96, 0xd6, 0x72 |
|||
; .byte 0x57, 0x20, 0xa6, 0x6a |
|||
; .byte 0x57, 0x04, 0xd7, 0x5c |
|||
; .byte 0x57, 0x70, 0x08, 0xcc |
|||
; .byte 0x27, 0x04, 0x05, 0x02 |
|||
; ld ra, 8(sp) |
|||
; ld s0, 0(sp) |
|||
; addi sp, sp, 0x10 |
|||
; ret |
|||
|
|||
function %f1(f64, f64) -> f64 { |
|||
block0(v0: f64, v1: f64): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; block0: |
|||
; fadd.d fa1,fa0,fa1,rne |
|||
; lui a5,4095 |
|||
; slli a1,a5,39 |
|||
; fmv.d.x fa3,a1 |
|||
; vmv.v.x v8,zero #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vfmv.s.f v10,fa3 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmv.v.i v0,1 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmerge.vvm v14,v8,v10,v0.t #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmv.v.x v8,zero #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vfmv.s.f v10,fa1 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmv.v.i v0,1 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmerge.vvm v15,v8,v10,v0.t #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmfne.vv v8,v15,v15 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmfne.vv v10,v15,v15 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmor.mm v0,v8,v10 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmerge.vvm v8,v15,v14,v0.t #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vfmv.f.s fa0,v8 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; fadd.d fa1, fa0, fa1, rne |
|||
; lui a5, 0xfff |
|||
; slli a1, a5, 0x27 |
|||
; fmv.d.x fa3, a1 |
|||
; .byte 0x57, 0x70, 0x81, 0xcd |
|||
; .byte 0x57, 0x44, 0x00, 0x5e |
|||
; .byte 0x57, 0xd5, 0x06, 0x42 |
|||
; .byte 0x57, 0xb0, 0x00, 0x5e |
|||
; .byte 0x57, 0x07, 0x85, 0x5c |
|||
; .byte 0x57, 0x44, 0x00, 0x5e |
|||
; .byte 0x57, 0xd5, 0x05, 0x42 |
|||
; .byte 0x57, 0xb0, 0x00, 0x5e |
|||
; .byte 0xd7, 0x07, 0x85, 0x5c |
|||
; .byte 0x57, 0x94, 0xf7, 0x72 |
|||
; .byte 0x57, 0x95, 0xf7, 0x72 |
|||
; .byte 0x57, 0x20, 0x85, 0x6a |
|||
; .byte 0x57, 0x04, 0xf7, 0x5c |
|||
; .byte 0x57, 0x15, 0x80, 0x42 |
|||
; ret |
|||
|
|||
function %f1(f32, f32) -> f32 { |
|||
block0(v0: f32, v1: f32): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; block0: |
|||
; fadd.s fa0,fa0,fa1,rne |
|||
; lui a5,523264 |
|||
; fmv.w.x fa1,a5 |
|||
; vmv.v.x v15,zero #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vfmv.s.f v9,fa1 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmv.v.i v0,1 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmerge.vvm v13,v15,v9,v0.t #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmv.v.x v15,zero #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vfmv.s.f v9,fa0 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmv.v.i v0,1 #avl=2, #vtype=(e64, m1, ta, ma) |
|||
; vmerge.vvm v14,v15,v9,v0.t #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmfne.vv v15,v14,v14 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmfne.vv v9,v14,v14 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmor.mm v0,v15,v9 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vmerge.vvm v15,v14,v13,v0.t #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; vfmv.f.s fa0,v15 #avl=4, #vtype=(e32, m1, ta, ma) |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; fadd.s fa0, fa0, fa1, rne |
|||
; lui a5, 0x7fc00 |
|||
; fmv.w.x fa1, a5 |
|||
; .byte 0x57, 0x70, 0x02, 0xcd |
|||
; .byte 0xd7, 0x47, 0x00, 0x5e |
|||
; .byte 0xd7, 0xd4, 0x05, 0x42 |
|||
; .byte 0x57, 0x70, 0x81, 0xcd |
|||
; .byte 0x57, 0xb0, 0x00, 0x5e |
|||
; .byte 0x57, 0x70, 0x02, 0xcd |
|||
; .byte 0xd7, 0x86, 0xf4, 0x5c |
|||
; .byte 0xd7, 0x47, 0x00, 0x5e |
|||
; .byte 0xd7, 0x54, 0x05, 0x42 |
|||
; .byte 0x57, 0x70, 0x81, 0xcd |
|||
; .byte 0x57, 0xb0, 0x00, 0x5e |
|||
; .byte 0x57, 0x70, 0x02, 0xcd |
|||
; .byte 0x57, 0x87, 0xf4, 0x5c |
|||
; .byte 0xd7, 0x17, 0xe7, 0x72 |
|||
; .byte 0xd7, 0x14, 0xe7, 0x72 |
|||
; .byte 0x57, 0xa0, 0xf4, 0x6a |
|||
; .byte 0xd7, 0x87, 0xe6, 0x5c |
|||
; .byte 0x57, 0x15, 0xf0, 0x42 |
|||
; ret |
|||
|
@ -0,0 +1,66 @@ |
|||
test compile precise-output |
|||
set enable_nan_canonicalization=true |
|||
target riscv64 |
|||
|
|||
function %f1(f64, f64) -> f64 { |
|||
block0(v0: f64, v1: f64): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; block0: |
|||
; fadd.d fa4,fa0,fa1,rne |
|||
; lui a2,4095 |
|||
; slli a4,a2,39 |
|||
; fmv.d.x fa0,a4 |
|||
; feq.d a1,fa4,fa4 |
|||
; feq.d a3,fa4,fa4 |
|||
; and a5,a1,a3 |
|||
; select fa0,fa0,fa4##condition=(a5 eq zero) |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; fadd.d fa4, fa0, fa1, rne |
|||
; lui a2, 0xfff |
|||
; slli a4, a2, 0x27 |
|||
; fmv.d.x fa0, a4 |
|||
; feq.d a1, fa4, fa4 |
|||
; feq.d a3, fa4, fa4 |
|||
; and a5, a1, a3 |
|||
; beqz a5, 8 |
|||
; fmv.d fa0, fa4 |
|||
; ret |
|||
|
|||
function %f1(f32, f32) -> f32 { |
|||
block0(v0: f32, v1: f32): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; block0: |
|||
; fadd.s fa3,fa0,fa1,rne |
|||
; lui a2,523264 |
|||
; fmv.w.x fa4,a2 |
|||
; feq.s a0,fa3,fa3 |
|||
; feq.s a2,fa3,fa3 |
|||
; and a4,a0,a2 |
|||
; select fa0,fa4,fa3##condition=(a4 eq zero) |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; fadd.s fa3, fa0, fa1, rne |
|||
; lui a2, 0x7fc00 |
|||
; fmv.w.x fa4, a2 |
|||
; feq.s a0, fa3, fa3 |
|||
; feq.s a2, fa3, fa3 |
|||
; and a4, a0, a2 |
|||
; bnez a4, 0xc |
|||
; fmv.d fa0, fa4 |
|||
; j 8 |
|||
; fmv.d fa0, fa3 |
|||
; ret |
|||
|
@ -0,0 +1,112 @@ |
|||
test compile precise-output |
|||
set enable_nan_canonicalization=true |
|||
target s390x |
|||
|
|||
function %f0(f32x4, f32x4) -> f32x4 { |
|||
block0(v0: f32x4, v1: f32x4): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; block0: |
|||
; vfasb %v17, %v24, %v25 |
|||
; bras %r1, 8 ; data.f32 NaN ; vlef %v18, 0(%r1), 0 |
|||
; vrepf %v18, %v18, 0 |
|||
; vfchesb %v7, %v17, %v17 |
|||
; vfchesb %v19, %v17, %v17 |
|||
; vno %v19, %v7, %v19 |
|||
; vsel %v24, %v18, %v17, %v19 |
|||
; br %r14 |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; vfasb %v17, %v24, %v25 |
|||
; bras %r1, 0xe |
|||
; su %f12, 0 |
|||
; vlef %v18, 0(%r1), 0 |
|||
; vrepf %v18, %v18, 0 |
|||
; vfchesb %v7, %v17, %v17 |
|||
; vfchesb %v19, %v17, %v17 |
|||
; vno %v19, %v7, %v19 |
|||
; vsel %v24, %v18, %v17, %v19 |
|||
; br %r14 |
|||
|
|||
function %f1(f64, f64) -> f64 { |
|||
block0(v0: f64, v1: f64): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; block0: |
|||
; wfadb %v21, %f0, %f2 |
|||
; bras %r1, 12 ; data.f64 NaN ; vleg %v22, 0(%r1), 0 |
|||
; vgbm %v20, 0 |
|||
; vpdi %v22, %v22, %v20, 0 |
|||
; vgbm %v20, 0 |
|||
; vpdi %v23, %v21, %v20, 0 |
|||
; vfchedb %v19, %v23, %v23 |
|||
; vfchedb %v21, %v23, %v23 |
|||
; vno %v24, %v19, %v21 |
|||
; vsel %v21, %v22, %v23, %v24 |
|||
; vrepg %v0, %v21, 0 |
|||
; br %r14 |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; wfadb %v21, %f0, %f2 |
|||
; bras %r1, 0x12 |
|||
; su %f15, 0(%r8) |
|||
; .byte 0x00, 0x00 |
|||
; .byte 0x00, 0x00 |
|||
; vleg %v22, 0(%r1), 0 |
|||
; vzero %v20 |
|||
; vpdi %v22, %v22, %v20, 0 |
|||
; vzero %v20 |
|||
; vpdi %v23, %v21, %v20, 0 |
|||
; vfchedb %v19, %v23, %v23 |
|||
; vfchedb %v21, %v23, %v23 |
|||
; vno %v24, %v19, %v21 |
|||
; vsel %v21, %v22, %v23, %v24 |
|||
; vrepg %v0, %v21, 0 |
|||
; br %r14 |
|||
|
|||
function %f1(f32, f32) -> f32 { |
|||
block0(v0: f32, v1: f32): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; block0: |
|||
; wfasb %v21, %f0, %f2 |
|||
; bras %r1, 8 ; data.f32 NaN ; vlef %v22, 0(%r1), 0 |
|||
; vgbm %v20, 61440 |
|||
; vn %v22, %v22, %v20 |
|||
; vgbm %v20, 61440 |
|||
; vn %v23, %v21, %v20 |
|||
; vfchesb %v19, %v23, %v23 |
|||
; vfchesb %v21, %v23, %v23 |
|||
; vno %v24, %v19, %v21 |
|||
; vsel %v21, %v22, %v23, %v24 |
|||
; vrepf %v0, %v21, 0 |
|||
; br %r14 |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; wfasb %v21, %f0, %f2 |
|||
; bras %r1, 0xe |
|||
; su %f12, 0 |
|||
; vlef %v22, 0(%r1), 0 |
|||
; vgbm %v20, 0xf000 |
|||
; vn %v22, %v22, %v20 |
|||
; vgbm %v20, 0xf000 |
|||
; vn %v23, %v21, %v20 |
|||
; vfchesb %v19, %v23, %v23 |
|||
; vfchesb %v21, %v23, %v23 |
|||
; vno %v24, %v19, %v21 |
|||
; vsel %v21, %v22, %v23, %v24 |
|||
; vrepf %v0, %v21, 0 |
|||
; br %r14 |
|||
|
@ -0,0 +1,140 @@ |
|||
test compile precise-output |
|||
set enable_nan_canonicalization=true |
|||
target x86_64 sse41 |
|||
|
|||
function %f0(f32x4, f32x4) -> f32x4 { |
|||
block0(v0: f32x4, v1: f32x4): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; addps %xmm0, %xmm1, %xmm0 |
|||
; movl $2143289344, %r10d |
|||
; movd %r10d, %xmm7 |
|||
; shufps $0, %xmm7, const(0), %xmm7 |
|||
; movdqa %xmm0, %xmm1 |
|||
; cmpps $3, %xmm1, %xmm0, %xmm1 |
|||
; movdqa %xmm0, %xmm2 |
|||
; movdqa %xmm1, %xmm0 |
|||
; movdqa %xmm2, %xmm1 |
|||
; pblendvb %xmm1, %xmm7, %xmm1 |
|||
; movdqa %xmm1, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; addps %xmm1, %xmm0 |
|||
; movl $0x7fc00000, %r10d |
|||
; movd %r10d, %xmm7 |
|||
; shufps $0, 0x26(%rip), %xmm7 |
|||
; movdqa %xmm0, %xmm1 |
|||
; cmpunordps %xmm0, %xmm1 |
|||
; movdqa %xmm0, %xmm2 |
|||
; movdqa %xmm1, %xmm0 |
|||
; movdqa %xmm2, %xmm1 |
|||
; pblendvb %xmm0, %xmm7, %xmm1 |
|||
; movdqa %xmm1, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; sarb $0, (%rdi) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
|
|||
function %f1(f64, f64) -> f64 { |
|||
block0(v0: f64, v1: f64): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; addsd %xmm0, %xmm1, %xmm0 |
|||
; movabsq $9221120237041090560, %r9 |
|||
; movq %r9, %xmm1 |
|||
; movdqa %xmm0, %xmm7 |
|||
; cmppd $3, %xmm7, %xmm0, %xmm7 |
|||
; movdqa %xmm0, %xmm5 |
|||
; movdqa %xmm7, %xmm0 |
|||
; pblendvb %xmm5, %xmm1, %xmm5 |
|||
; movdqa %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; addsd %xmm1, %xmm0 |
|||
; movabsq $0x7ff8000000000000, %r9 |
|||
; movq %r9, %xmm1 |
|||
; movdqa %xmm0, %xmm7 |
|||
; cmpunordpd %xmm0, %xmm7 |
|||
; movdqa %xmm0, %xmm5 |
|||
; movdqa %xmm7, %xmm0 |
|||
; pblendvb %xmm0, %xmm1, %xmm5 |
|||
; movdqa %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %f1(f32, f32) -> f32 { |
|||
block0(v0: f32, v1: f32): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; addss %xmm0, %xmm1, %xmm0 |
|||
; movl $2143289344, %r9d |
|||
; movd %r9d, %xmm1 |
|||
; movdqa %xmm0, %xmm7 |
|||
; cmpps $3, %xmm7, %xmm0, %xmm7 |
|||
; movdqa %xmm0, %xmm5 |
|||
; movdqa %xmm7, %xmm0 |
|||
; pblendvb %xmm5, %xmm1, %xmm5 |
|||
; movdqa %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; addss %xmm1, %xmm0 |
|||
; movl $0x7fc00000, %r9d |
|||
; movd %r9d, %xmm1 |
|||
; movdqa %xmm0, %xmm7 |
|||
; cmpunordps %xmm0, %xmm7 |
|||
; movdqa %xmm0, %xmm5 |
|||
; movdqa %xmm7, %xmm0 |
|||
; pblendvb %xmm0, %xmm1, %xmm5 |
|||
; movdqa %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
@ -0,0 +1,131 @@ |
|||
test compile precise-output |
|||
set enable_nan_canonicalization=true |
|||
target x86_64 |
|||
|
|||
function %f0(f32x4, f32x4) -> f32x4 { |
|||
block0(v0: f32x4, v1: f32x4): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; addps %xmm0, %xmm1, %xmm0 |
|||
; movdqa %xmm0, %xmm1 |
|||
; movl $2143289344, %esi |
|||
; movd %esi, %xmm5 |
|||
; shufps $0, %xmm5, const(0), %xmm5 |
|||
; cmpps $3, %xmm0, %xmm1, %xmm0 |
|||
; andps %xmm5, %xmm0, %xmm5 |
|||
; andnps %xmm0, %xmm1, %xmm0 |
|||
; orps %xmm0, %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; addps %xmm1, %xmm0 |
|||
; movdqa %xmm0, %xmm1 |
|||
; movl $0x7fc00000, %esi |
|||
; movd %esi, %xmm5 |
|||
; shufps $0, 0x14(%rip), %xmm5 |
|||
; cmpunordps %xmm1, %xmm0 |
|||
; andps %xmm0, %xmm5 |
|||
; andnps %xmm1, %xmm0 |
|||
; orps %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; sarb $0, (%rdi) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
; addb %al, (%rax) |
|||
|
|||
function %f1(f64, f64) -> f64 { |
|||
block0(v0: f64, v1: f64): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; addsd %xmm0, %xmm1, %xmm0 |
|||
; movdqa %xmm0, %xmm7 |
|||
; movabsq $9221120237041090560, %r11 |
|||
; movq %r11, %xmm5 |
|||
; cmppd $3, %xmm0, %xmm7, %xmm0 |
|||
; andpd %xmm5, %xmm0, %xmm5 |
|||
; andnpd %xmm0, %xmm7, %xmm0 |
|||
; orpd %xmm0, %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; addsd %xmm1, %xmm0 |
|||
; movdqa %xmm0, %xmm7 |
|||
; movabsq $0x7ff8000000000000, %r11 |
|||
; movq %r11, %xmm5 |
|||
; cmpunordpd %xmm7, %xmm0 |
|||
; andpd %xmm0, %xmm5 |
|||
; andnpd %xmm7, %xmm0 |
|||
; orpd %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %f1(f32, f32) -> f32 { |
|||
block0(v0: f32, v1: f32): |
|||
v2 = fadd v0, v1 |
|||
return v2 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; addss %xmm0, %xmm1, %xmm0 |
|||
; movdqa %xmm0, %xmm7 |
|||
; movl $2143289344, %r11d |
|||
; movd %r11d, %xmm5 |
|||
; cmpps $3, %xmm0, %xmm7, %xmm0 |
|||
; andps %xmm5, %xmm0, %xmm5 |
|||
; andnps %xmm0, %xmm7, %xmm0 |
|||
; orps %xmm0, %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; addss %xmm1, %xmm0 |
|||
; movdqa %xmm0, %xmm7 |
|||
; movl $0x7fc00000, %r11d |
|||
; movd %r11d, %xmm5 |
|||
; cmpunordps %xmm7, %xmm0 |
|||
; andps %xmm0, %xmm5 |
|||
; andnps %xmm7, %xmm0 |
|||
; orps %xmm5, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
Loading…
Reference in new issue