Browse Source

riscv64: Implement SIMD floating point conversion instructions (#6924)

* riscv64: Implement SIMD `fvpromote_low`/`fvdemote`

* riscv64: Implement SIMD `fcvt_from_{u,s}int`

* riscv64: Implement SIMD `fcvt_to_{u,s}int_sat`

* riscv64: Use `i8_to_imm5` constructor
pull/6941/head
Afonso Bordado 1 year ago
committed by GitHub
parent
commit
55fa2e7ccd
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 12
      build.rs
  2. 31
      cranelift/codegen/src/isa/riscv64/inst/vector.rs
  3. 14
      cranelift/codegen/src/isa/riscv64/inst_vector.isle
  4. 53
      cranelift/codegen/src/isa/riscv64/lower.isle
  5. 43
      cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif
  6. 43
      cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif
  7. 47
      cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif
  8. 47
      cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif
  9. 49
      cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif
  10. 43
      cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif
  11. 79
      cranelift/filetests/filetests/runtests/simd-conversion.clif
  12. 17
      cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif
  13. 18
      cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif
  14. 18
      cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif
  15. 28
      cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif
  16. 22
      cranelift/filetests/filetests/runtests/simd-fvdemote.clif
  17. 21
      cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif

12
build.rs

@ -254,17 +254,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
return true;
}
let known_failure = [
"canonicalize_nan",
"cvt_from_uint",
"issue_3327_bnot_lowering",
"simd_conversions",
"simd_i32x4_trunc_sat_f32x4",
"simd_i32x4_trunc_sat_f64x2",
"simd_load",
"simd_splat",
]
.contains(&testname);
let known_failure = ["issue_3327_bnot_lowering"].contains(&testname);
known_failure
}

31
cranelift/codegen/src/isa/riscv64/inst/vector.rs

@ -757,7 +757,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => 0b010010,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => 0b010010,
}
}
@ -779,7 +781,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => VecOpCategory::OPFVV,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => VecOpCategory::OPFVV,
VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
}
@ -806,12 +810,17 @@ impl VecAluOpRR {
VecAluOpRR::VzextVF2 => 0b00110,
VecAluOpRR::VsextVF2 => 0b00111,
// VFUNARY0
// single-width converts
VecAluOpRR::VfcvtxufV => 0b00000,
VecAluOpRR::VfcvtxfV => 0b00001,
VecAluOpRR::VfcvtrtzxufV => 0b00110,
VecAluOpRR::VfcvtrtzxfV => 0b00111,
VecAluOpRR::VfcvtfxuV => 0b00010,
VecAluOpRR::VfcvtfxV => 0b00011,
// widening converts
VecAluOpRR::VfwcvtffV => 0b01100,
// narrowing converts
VecAluOpRR::VfncvtffW => 0b10100,
// These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
// > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
@ -837,7 +846,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => true,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => true,
VecAluOpRR::VmvSX
| VecAluOpRR::VfmvSF
| VecAluOpRR::VmvVV
@ -865,7 +876,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => RegClass::Vector,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => RegClass::Vector,
VecAluOpRR::VmvXS => RegClass::Int,
VecAluOpRR::VfmvFS => RegClass::Float,
}
@ -888,7 +901,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => RegClass::Vector,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => RegClass::Vector,
VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
}
@ -902,7 +917,9 @@ impl VecAluOpRR {
| VecAluOpRR::VzextVF8
| VecAluOpRR::VsextVF2
| VecAluOpRR::VsextVF4
| VecAluOpRR::VsextVF8 => true,
| VecAluOpRR::VsextVF8
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => true,
_ => false,
}
}
@ -931,6 +948,8 @@ impl fmt::Display for VecAluOpRR {
VecAluOpRR::VfcvtrtzxfV => "vfcvt.rtz.x.f.v",
VecAluOpRR::VfcvtfxuV => "vfcvt.f.xu.v",
VecAluOpRR::VfcvtfxV => "vfcvt.f.x.v",
VecAluOpRR::VfwcvtffV => "vfwcvt.f.f.v",
VecAluOpRR::VfncvtffW => "vfncvt.f.f.w",
})
}
}

14
cranelift/codegen/src/isa/riscv64/inst_vector.isle

@ -291,6 +291,8 @@
(VfcvtrtzxfV)
(VfcvtfxuV)
(VfcvtfxV)
(VfwcvtffV)
(VfncvtffW)
))
;; Returns the canonical destination type for a VecAluOpRRImm5.
@ -1060,6 +1062,18 @@
(rule (rv_vfcvt_f_x_v vs mask vstate)
(vec_alu_rr (VecAluOpRR.VfcvtfxV) vs mask vstate))
;; Helper for emitting the `vfwcvt.f.f.v` instruction.
;; Convert single-width float to double-width float.
(decl rv_vfwcvt_f_f_v (VReg VecOpMasking VState) VReg)
(rule (rv_vfwcvt_f_f_v vs mask vstate)
(vec_alu_rr (VecAluOpRR.VfwcvtffV) vs mask vstate))
;; Helper for emitting the `vfncvt.f.f.w` instruction.
;; Convert double-width float to single-width float.
(decl rv_vfncvt_f_f_w (VReg VecOpMasking VState) VReg)
(rule (rv_vfncvt_f_f_w vs mask vstate)
(vec_alu_rr (VecAluOpRR.VfncvtffW) vs mask vstate))
;; Helper for emitting the `vslidedown.vx` instruction.
;; `vslidedown` moves all elements in the vector down by n elements.
;; The top most elements are up to the tail policy.

53
cranelift/codegen/src/isa/riscv64/lower.isle

@ -1201,10 +1201,26 @@
(rule (lower (fpromote x))
(rv_fcvtds x))
;;;;; Rules for `fvpromote_low`;;;;;;;;;;;;
(rule (lower (has_type (ty_vec_fits_in_register ty) (fvpromote_low x)))
(if-let half_ty (ty_half_width ty))
(rv_vfwcvt_f_f_v x (unmasked) (vstate_mf2 half_ty)))
;;;;; Rules for `fdemote`;;;;;;;;;;;;;;;;;;
(rule (lower (fdemote x))
(rv_fcvtsd x))
;;;;; Rules for `fvdemote`;;;;;;;;;;;;;;;;;
;; `vfncvt...` leaves the upper bits of the register undefined so
;; we need to zero them out.
(rule (lower (has_type (ty_vec_fits_in_register ty @ $F32X4) (fvdemote x)))
(if-let zero (i8_to_imm5 0))
(let ((narrow VReg (rv_vfncvt_f_f_w x (unmasked) (vstate_mf2 ty)))
(mask VReg (gen_vec_mask 0xC)))
(rv_vmerge_vim narrow zero mask ty)))
;;;;; Rules for for float arithmetic
@ -1613,39 +1629,52 @@
(gen_fcvt_int $false v $false from to))
;;;;; Rules for `fcvt_to_sint`;;;;;;;;;
(rule
(lower (has_type to (fcvt_to_sint v @ (value_type from))))
(rule 0 (lower (has_type to (fcvt_to_sint v @ (value_type (ty_scalar_float from)))))
(gen_fcvt_int $false v $true from to))
;;;;; Rules for `fcvt_to_sint_sat`;;;;;;;;;
(rule
(lower (has_type to (fcvt_to_sint_sat v @ (value_type from))))
(rule 0 (lower (has_type to (fcvt_to_sint_sat v @ (value_type (ty_scalar_float from)))))
(gen_fcvt_int $true v $true from to))
(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_sint_sat v @ (value_type from_ty))))
(if-let zero (i8_to_imm5 0))
(let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty))
(cvt VReg (rv_vfcvt_rtz_x_f_v v (unmasked) from_ty)))
(rv_vmerge_vim cvt zero is_nan from_ty)))
;;;;; Rules for `fcvt_to_uint_sat`;;;;;;;;;
(rule
(lower (has_type to (fcvt_to_uint_sat v @ (value_type from))))
(rule 0 (lower (has_type to (fcvt_to_uint_sat v @ (value_type (ty_scalar_float from)))))
(gen_fcvt_int $true v $false from to))
(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_uint_sat v @ (value_type from_ty))))
(if-let zero (i8_to_imm5 0))
(let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty))
(cvt VReg (rv_vfcvt_rtz_xu_f_v v (unmasked) from_ty)))
(rv_vmerge_vim cvt zero is_nan from_ty)))
;;;;; Rules for `fcvt_from_sint`;;;;;;;;;
(rule
(lower (has_type to (fcvt_from_sint v @ (value_type from_ty))))
(rule 0 (lower (has_type (ty_scalar_float to) (fcvt_from_sint v @ (value_type from_ty))))
(let ((float_op FpuOPRR (int_convert_2_float_op from_ty $true to))
(value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Signed))))
(fpu_rr float_op to value)))
(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_sint v @ (value_type from_ty))))
(rv_vfcvt_f_x_v v (unmasked) from_ty))
;;;;; Rules for `fcvt_from_uint`;;;;;;;;;
(rule
(lower (has_type to (fcvt_from_uint v @ (value_type from_ty))))
(rule 0 (lower (has_type (ty_scalar_float to) (fcvt_from_uint v @ (value_type from_ty))))
(let ((float_op FpuOPRR (int_convert_2_float_op from_ty $false to))
(value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Zero))))
(fpu_rr float_op to value)))
(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_uint v @ (value_type from_ty))))
(rv_vfcvt_f_xu_v v (unmasked) from_ty))
;;;;; Rules for `symbol_value`;;;;;;;;;
(rule
(lower (symbol_value (symbol_value_data name _ offset)))
(load_ext_name name offset)
)
(load_ext_name name offset))
;;;;; Rules for `bitcast`;;;;;;;;;
(rule
(lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))

43
cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif

@ -0,0 +1,43 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %fcvt_from_sint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_sint.f32x4 v0
return v1
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vfcvt.f.x.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x92, 0x11, 0x4a
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x02, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

43
cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif

@ -0,0 +1,43 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %fcvt_from_uint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vfcvt.f.xu.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x12, 0x11, 0x4a
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x02, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

47
cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif

@ -0,0 +1,47 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %fcvt_to_sint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_sint_sat.i32x4 v0
return v1
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vmfne.vv v0,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vfcvt.rtz.x.f.v v6,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vmerge.vim v8,v6,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x90, 0x10, 0x72
; .byte 0x57, 0x93, 0x13, 0x4a
; .byte 0x57, 0x34, 0x60, 0x5c
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x04, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

47
cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif

@ -0,0 +1,47 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %fcvt_to_uint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_uint_sat.i32x4 v0
return v1
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vmfne.vv v0,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vfcvt.rtz.xu.f.v v6,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vmerge.vim v8,v6,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x90, 0x10, 0x72
; .byte 0x57, 0x13, 0x13, 0x4a
; .byte 0x57, 0x34, 0x60, 0x5c
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x04, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

49
cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif

@ -0,0 +1,49 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %fvdemote(f64x2) -> f32x4 {
block0(v0: f64x2):
v1 = fvdemote v0
return v1
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vfncvt.f.f.w v4,v1 #avl=4, #vtype=(e32, mf2, ta, ma)
; vmv.v.i v0,12 #avl=2, #vtype=(e64, m1, ta, ma)
; vmerge.vim v8,v4,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x72, 0xcd
; .byte 0x57, 0x12, 0x1a, 0x4a
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x30, 0x06, 0x5e
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x34, 0x40, 0x5c
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x04, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

43
cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif

@ -0,0 +1,43 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %fvpromote_low(f32x4) -> f64x2 {
block0(v0: f32x4):
v1 = fvpromote_low v0
return v1
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vfwcvt.f.f.v v4,v1 #avl=2, #vtype=(e32, mf2, ta, ma)
; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x71, 0xcd
; .byte 0x57, 0x12, 0x16, 0x4a
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x02, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

79
cranelift/filetests/filetests/runtests/simd-conversion.clif

@ -1,79 +0,0 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 sse41
target x86_64 sse42
target x86_64 sse42 has_avx
function %fcvt_from_sint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_sint.f32x4 v0
return v1
}
; run: %fcvt_from_sint([-1 0 1 123456789]) == [-0x1.0 0.0 0x1.0 0x75bcd18.0]
; Note that 123456789 rounds to 123456792.0, an error of 3
function %fcvt_from_uint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; run: %fcvt_from_uint([0 0 0 0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
; run: %fcvt_from_uint([0xFFFFFFFF 0 1 123456789]) == [0x100000000.0 0.0 0x1.0 0x75bcd18.0]
; Note that 0xFFFFFFFF is decimal 4,294,967,295 and is rounded up 1 to 4,294,967,296 in f32x4.
function %fcvt_to_sint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_sint_sat.i32x4 v0
return v1
}
; run: %fcvt_to_sint_sat([0x0.0 -0x1.0 0x1.0 0x1.0p100]) == [0 -1 1 0x7FFFFFFF]
; run: %fcvt_to_sint_sat([-0x8.1 0x0.0 0x0.0 -0x1.0p100]) == [-8 0 0 0x80000000]
; run: %fcvt_to_sint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
function %fcvt_to_uint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_uint_sat.i32x4 v0
return v1
}
; run: %fcvt_to_uint_sat([0x1.0 0x4.2 0x4.6 0x1.0p100]) == [1 4 4 0xFFFFFFFF]
; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]
; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
function %fcvt_low_from_sint(i32x4) -> f64x2 {
block0(v0: i32x4):
v1 = swiden_low v0
v2 = fcvt_from_sint.f64x2 v1
return v2
}
; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0]
; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]
function %fvdemote(f64x2) -> f32x4 {
block0(v0: f64x2):
v1 = fvdemote v0
return v1
}
; run: %fvdemote([0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
; run: %fvdemote([0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0]
; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
function %fvpromote_low(f32x4) -> f64x2 {
block0(v0: f32x4):
v1 = fvpromote_low v0
return v1
}
; run: %fvpromote_low([0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0]
; run: %fvpromote_low([0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2]
; run: %fvpromote_low([0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2]
; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]

17
cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif

@ -0,0 +1,17 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 sse41
target x86_64 sse42
target x86_64 sse42 has_avx
target riscv64 has_v
function %fcvt_from_sint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_sint.f32x4 v0
return v1
}
; run: %fcvt_from_sint([-1 0 1 123456789]) == [-0x1.0 0.0 0x1.0 0x75bcd18.0]
; Note that 123456789 rounds to 123456792.0, an error of 3

18
cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif

@ -0,0 +1,18 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 sse41
target x86_64 sse42
target x86_64 sse42 has_avx
target riscv64 has_v
function %fcvt_from_uint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; run: %fcvt_from_uint([0 0 0 0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
; run: %fcvt_from_uint([0xFFFFFFFF 0 1 123456789]) == [0x100000000.0 0.0 0x1.0 0x75bcd18.0]
; Note that 0xFFFFFFFF is decimal 4,294,967,295 and is rounded up 1 to 4,294,967,296 in f32x4.

18
cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif

@ -0,0 +1,18 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 sse41
target x86_64 sse42
target x86_64 sse42 has_avx
target riscv64 has_v
function %fcvt_to_sint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_sint_sat.i32x4 v0
return v1
}
; run: %fcvt_to_sint_sat([0x0.0 -0x1.0 0x1.0 0x1.0p100]) == [0 -1 1 0x7FFFFFFF]
; run: %fcvt_to_sint_sat([-0x8.1 0x0.0 0x0.0 -0x1.0p100]) == [-8 0 0 0x80000000]
; run: %fcvt_to_sint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]

28
cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif

@ -0,0 +1,28 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 sse41
target x86_64 sse42
target x86_64 sse42 has_avx
target riscv64 has_v
function %fcvt_to_uint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_uint_sat.i32x4 v0
return v1
}
; run: %fcvt_to_uint_sat([0x1.0 0x4.2 0x4.6 0x1.0p100]) == [1 4 4 0xFFFFFFFF]
; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]
; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
function %fcvt_low_from_sint(i32x4) -> f64x2 {
block0(v0: i32x4):
v1 = swiden_low v0
v2 = fcvt_from_sint.f64x2 v1
return v2
}
; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0]
; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]

22
cranelift/filetests/filetests/runtests/simd-fvdemote.clif

@ -0,0 +1,22 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 sse41
target x86_64 sse42
target x86_64 sse42 has_avx
target riscv64 has_v
function %fvdemote(f64x2) -> f32x4 {
block0(v0: f64x2):
v1 = fvdemote v0
return v1
}
; run: %fvdemote([0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
; run: %fvdemote([0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0]
; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]

21
cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif

@ -0,0 +1,21 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 sse41
target x86_64 sse42
target x86_64 sse42 has_avx
target riscv64 has_v
function %fvpromote_low(f32x4) -> f64x2 {
block0(v0: f32x4):
v1 = fvpromote_low v0
return v1
}
; run: %fvpromote_low([0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0]
; run: %fvpromote_low([0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2]
; run: %fvpromote_low([0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2]
; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
Loading…
Cancel
Save