Browse Source
* x64: Add AVX encodings of `vcvt{ss2sd,sd2ss}` Additionally update the instruction helpers to take an `XmmMem` argument to allow load sinking into the instruction. * x64: Add AVX encoding of `sqrts{s,d}` * x64: Add AVX support for `rounds{s,d}`pull/6122/head
Alex Crichton
2 years ago
committed by
GitHub
8 changed files with 543 additions and 11 deletions
@ -0,0 +1,104 @@ |
|||
test compile precise-output |
|||
set enable_simd |
|||
target x86_64 has_avx |
|||
|
|||
function %f1(f32) -> f32 { |
|||
block0(v0: f32): |
|||
v1 = ceil v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; vroundss $2, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; vroundss $2, %xmm0, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %f2(f64) -> f64 { |
|||
block0(v0: f64): |
|||
v1 = ceil v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; vroundsd $2, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; vroundsd $2, %xmm0, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %f4(f32x4) -> f32x4 { |
|||
block0(v0: f32x4): |
|||
v1 = ceil v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; vroundps $2, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; vroundps $2, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %f4(f64x2) -> f64x2 { |
|||
block0(v0: f64x2): |
|||
v1 = ceil v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; vroundpd $2, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; vroundpd $2, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
@ -0,0 +1,130 @@ |
|||
test compile precise-output |
|||
set enable_simd |
|||
target x86_64 has_avx |
|||
|
|||
function %fpromote(f32) -> f64 { |
|||
block0(v0: f32): |
|||
v1 = fpromote.f64 v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; vcvtss2sd %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; vcvtss2sd %xmm0, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %fpromote_load(i64, f32) -> f64 { |
|||
ss0 = explicit_slot 16 |
|||
|
|||
block0(v1: i64, v2: f32): |
|||
v3 = stack_addr.i64 ss0 |
|||
store.f32 v2, v3 |
|||
v4 = load.f32 v3 |
|||
v5 = fpromote.f64 v4 |
|||
return v5 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; subq %rsp, $16, %rsp |
|||
; block0: |
|||
; lea rsp(0 + virtual offset), %rdx |
|||
; vmovss %xmm0, 0(%rdx) |
|||
; vcvtss2sd 0(%rdx), %xmm0 |
|||
; addq %rsp, $16, %rsp |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; subq $0x10, %rsp |
|||
; block1: ; offset 0x8 |
|||
; leaq (%rsp), %rdx |
|||
; vmovss %xmm0, (%rdx) ; trap: heap_oob |
|||
; vcvtss2sd (%rdx), %xmm0, %xmm0 ; trap: heap_oob |
|||
; addq $0x10, %rsp |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %fdemote(f64) -> f32 { |
|||
block0(v0: f64): |
|||
v1 = fdemote.f32 v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; vcvtsd2ss %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; vcvtsd2ss %xmm0, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %fdemote_load(i64, f64) -> f32 { |
|||
ss0 = explicit_slot 16 |
|||
|
|||
block0(v1: i64, v2: f64): |
|||
v3 = stack_addr.i64 ss0 |
|||
store.f64 v2, v3 |
|||
v4 = load.f64 v3 |
|||
v5 = fdemote.f32 v4 |
|||
return v5 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; subq %rsp, $16, %rsp |
|||
; block0: |
|||
; lea rsp(0 + virtual offset), %rdx |
|||
; vmovsd %xmm0, 0(%rdx) |
|||
; vcvtsd2ss 0(%rdx), %xmm0 |
|||
; addq %rsp, $16, %rsp |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; subq $0x10, %rsp |
|||
; block1: ; offset 0x8 |
|||
; leaq (%rsp), %rdx |
|||
; vmovsd %xmm0, (%rdx) ; trap: heap_oob |
|||
; vcvtsd2ss (%rdx), %xmm0, %xmm0 ; trap: heap_oob |
|||
; addq $0x10, %rsp |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
@ -0,0 +1,130 @@ |
|||
test compile precise-output |
|||
set enable_simd |
|||
target x86_64 |
|||
|
|||
function %fpromote(f32) -> f64 { |
|||
block0(v0: f32): |
|||
v1 = fpromote.f64 v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; cvtss2sd %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; cvtss2sd %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %fpromote_load(i64, f32) -> f64 { |
|||
ss0 = explicit_slot 16 |
|||
|
|||
block0(v1: i64, v2: f32): |
|||
v3 = stack_addr.i64 ss0 |
|||
store.f32 v2, v3 |
|||
v4 = load.f32 v3 |
|||
v5 = fpromote.f64 v4 |
|||
return v5 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; subq %rsp, $16, %rsp |
|||
; block0: |
|||
; lea rsp(0 + virtual offset), %rdx |
|||
; movss %xmm0, 0(%rdx) |
|||
; cvtss2sd 0(%rdx), %xmm0 |
|||
; addq %rsp, $16, %rsp |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; subq $0x10, %rsp |
|||
; block1: ; offset 0x8 |
|||
; leaq (%rsp), %rdx |
|||
; movss %xmm0, (%rdx) ; trap: heap_oob |
|||
; cvtss2sd (%rdx), %xmm0 ; trap: heap_oob |
|||
; addq $0x10, %rsp |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %fdemote(f64) -> f32 { |
|||
block0(v0: f64): |
|||
v1 = fdemote.f32 v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; cvtsd2ss %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; cvtsd2ss %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %fdemote_load(i64, f64) -> f32 { |
|||
ss0 = explicit_slot 16 |
|||
|
|||
block0(v1: i64, v2: f64): |
|||
v3 = stack_addr.i64 ss0 |
|||
store.f64 v2, v3 |
|||
v4 = load.f64 v3 |
|||
v5 = fdemote.f32 v4 |
|||
return v5 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; subq %rsp, $16, %rsp |
|||
; block0: |
|||
; lea rsp(0 + virtual offset), %rdx |
|||
; movsd %xmm0, 0(%rdx) |
|||
; cvtsd2ss 0(%rdx), %xmm0 |
|||
; addq %rsp, $16, %rsp |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; subq $0x10, %rsp |
|||
; block1: ; offset 0x8 |
|||
; leaq (%rsp), %rdx |
|||
; movsd %xmm0, (%rdx) ; trap: heap_oob |
|||
; cvtsd2ss (%rdx), %xmm0 ; trap: heap_oob |
|||
; addq $0x10, %rsp |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
@ -0,0 +1,54 @@ |
|||
test compile precise-output |
|||
set enable_simd |
|||
target x86_64 has_avx |
|||
|
|||
function %sqrt_f32(f32) -> f32 { |
|||
block0(v0: f32): |
|||
v1 = sqrt v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; vsqrtss %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; vsqrtss %xmm0, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %sqrt_f64(f64) -> f64 { |
|||
block0(v0: f64): |
|||
v1 = sqrt v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; vsqrtsd %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; vsqrtsd %xmm0, %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
@ -0,0 +1,54 @@ |
|||
test compile precise-output |
|||
set enable_simd |
|||
target x86_64 |
|||
|
|||
function %sqrt_f32(f32) -> f32 { |
|||
block0(v0: f32): |
|||
v1 = sqrt v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; sqrtss %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; sqrtss %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
|||
function %sqrt_f64(f64) -> f64 { |
|||
block0(v0: f64): |
|||
v1 = sqrt v0 |
|||
return v1 |
|||
} |
|||
|
|||
; VCode: |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block0: |
|||
; sqrtsd %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; ret |
|||
; |
|||
; Disassembled: |
|||
; block0: ; offset 0x0 |
|||
; pushq %rbp |
|||
; movq %rsp, %rbp |
|||
; block1: ; offset 0x4 |
|||
; sqrtsd %xmm0, %xmm0 |
|||
; movq %rbp, %rsp |
|||
; popq %rbp |
|||
; retq |
|||
|
Loading…
Reference in new issue