@ -312,6 +312,23 @@ impl PerCpuModeEncodings {
self . enc_x86_64_instp ( inst , template , instp ) ;
self . enc_x86_64_instp ( inst , template , instp ) ;
}
}
/// Add two encodings for `inst`:
/// - X86_32, dynamically infer the REX prefix.
/// - X86_64, dynamically infer the REX prefix.
fn enc_both_inferred ( & mut self , inst : impl Clone + Into < InstSpec > , template : Template ) {
self . enc32 ( inst . clone ( ) , template . infer_rex ( ) ) ;
self . enc64 ( inst , template . infer_rex ( ) ) ;
}
fn enc_both_inferred_maybe_isap (
& mut self ,
inst : impl Clone + Into < InstSpec > ,
template : Template ,
isap : Option < SettingPredicateNumber > ,
) {
self . enc32_maybe_isap ( inst . clone ( ) , template . infer_rex ( ) , isap ) ;
self . enc64_maybe_isap ( inst , template . infer_rex ( ) , isap ) ;
}
/// Add two encodings for `inst`:
/// Add two encodings for `inst`:
/// - X86_32
/// - X86_32
/// - X86_64 with the REX prefix.
/// - X86_64 with the REX prefix.
@ -340,12 +357,6 @@ impl PerCpuModeEncodings {
}
}
}
}
/// Add the same encoding/template pairing to both X86_32 and X86_64
fn enc_32_64 ( & mut self , inst : impl Clone + Into < InstSpec > , template : Template ) {
self . enc32 ( inst . clone ( ) , template . clone ( ) ) ;
self . enc64 ( inst , template ) ;
}
/// Add the same encoding/recipe pairing to both X86_32 and X86_64
/// Add the same encoding/recipe pairing to both X86_32 and X86_64
fn enc_32_64_rec (
fn enc_32_64_rec (
& mut self ,
& mut self ,
@ -1674,17 +1685,15 @@ fn define_simd(
// PSHUFB, 8-bit shuffle using two XMM registers.
// PSHUFB, 8-bit shuffle using two XMM registers.
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
let instruction = x86_pshufb . bind ( vector ( ty , sse_vector_size ) ) ;
let instruction = x86_pshufb . bind ( vector ( ty , sse_vector_size ) ) ;
let template = rec_fa . nonrex ( ) . opcodes ( & PSHUFB ) ;
let template = rec_fa . opcodes ( & PSHUFB ) ;
e . enc32_isap ( instruction . clone ( ) , template . clone ( ) , use_ssse3_simd ) ;
e . enc_both_inferred_maybe_isap ( instruction . clone ( ) , template . clone ( ) , Some ( use_ssse3_simd ) ) ;
e . enc64_isap ( instruction , template , use_ssse3_simd ) ;
}
}
// PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
// PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
for ty in ValueType ::all_lane_types ( ) . filter ( | t | t . lane_bits ( ) = = 32 ) {
for ty in ValueType ::all_lane_types ( ) . filter ( | t | t . lane_bits ( ) = = 32 ) {
let instruction = x86_pshufd . bind ( vector ( ty , sse_vector_size ) ) ;
let instruction = x86_pshufd . bind ( vector ( ty , sse_vector_size ) ) ;
let template = rec_r_ib_unsigned_fpr . nonrex ( ) . opcodes ( & PSHUFD ) ;
let template = rec_r_ib_unsigned_fpr . opcodes ( & PSHUFD ) ;
e . enc32 ( instruction . clone ( ) , template . clone ( ) ) ;
e . enc_both_inferred ( instruction , template ) ;
e . enc64 ( instruction , template ) ;
}
}
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
@ -1693,12 +1702,12 @@ fn define_simd(
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
let instruction = scalar_to_vector . bind ( vector ( ty , sse_vector_size ) ) ;
let instruction = scalar_to_vector . bind ( vector ( ty , sse_vector_size ) ) ;
if ty . is_float ( ) {
if ty . is_float ( ) {
// No need to move floats--they already live in XMM registers.
e . enc_32_64_rec ( instruction , rec_null_fpr , 0 ) ;
e . enc_32_64_rec ( instruction , rec_null_fpr , 0 ) ;
} else {
} else {
let template = rec_frurm . opcodes ( & MOVD_LOAD_XMM ) ;
let template = rec_frurm . opcodes ( & MOVD_LOAD_XMM ) ;
if ty . lane_bits ( ) < 64 {
if ty . lane_bits ( ) < 64 {
e . enc32 ( instruction . clone ( ) , template . clone ( ) ) ;
e . enc_both_inferred ( instruction , template ) ;
e . enc_x86_64 ( instruction , template ) ;
} else {
} else {
// No 32-bit encodings for 64-bit widths.
// No 32-bit encodings for 64-bit widths.
assert_eq ! ( ty . lane_bits ( ) , 64 ) ;
assert_eq ! ( ty . lane_bits ( ) , 64 ) ;
@ -1719,7 +1728,7 @@ fn define_simd(
let instruction = x86_pinsr . bind ( vector ( ty , sse_vector_size ) ) ;
let instruction = x86_pinsr . bind ( vector ( ty , sse_vector_size ) ) ;
let template = rec_r_ib_unsigned_r . opcodes ( opcode ) ;
let template = rec_r_ib_unsigned_r . opcodes ( opcode ) ;
if ty . lane_bits ( ) < 64 {
if ty . lane_bits ( ) < 64 {
e . enc_32_64 _maybe_isap ( instruction , template . nonrex ( ) , isap ) ;
e . enc_both_inferred _maybe_isap ( instruction , template , isap ) ;
} else {
} else {
// It turns out the 64-bit widths have REX/W encodings and only are available on
// It turns out the 64-bit widths have REX/W encodings and only are available on
// x86_64.
// x86_64.
@ -1730,22 +1739,22 @@ fn define_simd(
// For legalizing insertlane with floats, INSERTPS from SSE4.1.
// For legalizing insertlane with floats, INSERTPS from SSE4.1.
{
{
let instruction = x86_insertps . bind ( vector ( F32 , sse_vector_size ) ) ;
let instruction = x86_insertps . bind ( vector ( F32 , sse_vector_size ) ) ;
let template = rec_fa_ib . nonrex ( ) . opcodes ( & INSERTPS ) ;
let template = rec_fa_ib . opcodes ( & INSERTPS ) ;
e . enc_32_64 _maybe_isap ( instruction , template , Some ( use_sse41_simd ) ) ;
e . enc_both_inferred _maybe_isap ( instruction , template , Some ( use_sse41_simd ) ) ;
}
}
// For legalizing insertlane with floats, MOVSD from SSE2.
// For legalizing insertlane with floats, MOVSD from SSE2.
{
{
let instruction = x86_movsd . bind ( vector ( F64 , sse_vector_size ) ) ;
let instruction = x86_movsd . bind ( vector ( F64 , sse_vector_size ) ) ;
let template = rec_fa . nonrex ( ) . opcodes ( & MOVSD_LOAD ) ;
let template = rec_fa . opcodes ( & MOVSD_LOAD ) ;
e . enc_32_64_maybe_isap ( instruction , template , Non e ) ; // from SSE2
e . enc_both_inferred ( instruction , template ) ; // from SSE2
}
}
// For legalizing insertlane with floats, MOVLHPS from SSE.
// For legalizing insertlane with floats, MOVLHPS from SSE.
{
{
let instruction = x86_movlhps . bind ( vector ( F64 , sse_vector_size ) ) ;
let instruction = x86_movlhps . bind ( vector ( F64 , sse_vector_size ) ) ;
let template = rec_fa . nonrex ( ) . opcodes ( & MOVLHPS ) ;
let template = rec_fa . opcodes ( & MOVLHPS ) ;
e . enc_32_64_maybe_isap ( instruction , template , Non e ) ; // from SSE
e . enc_both_inferred ( instruction , template ) ; // from SSE
}
}
// SIMD extractlane
// SIMD extractlane
@ -1760,7 +1769,7 @@ fn define_simd(
let instruction = x86_pextr . bind ( vector ( ty , sse_vector_size ) ) ;
let instruction = x86_pextr . bind ( vector ( ty , sse_vector_size ) ) ;
let template = rec_r_ib_unsigned_gpr . opcodes ( opcode ) ;
let template = rec_r_ib_unsigned_gpr . opcodes ( opcode ) ;
if ty . lane_bits ( ) < 64 {
if ty . lane_bits ( ) < 64 {
e . enc_32_64 _maybe_isap ( instruction , template . nonrex ( ) , Some ( use_sse41_simd ) ) ;
e . enc_both_inferred _maybe_isap ( instruction , template , Some ( use_sse41_simd ) ) ;
} else {
} else {
// It turns out the 64-bit widths have REX/W encodings and only are available on
// It turns out the 64-bit widths have REX/W encodings and only are available on
// x86_64.
// x86_64.
@ -1838,85 +1847,81 @@ fn define_simd(
// in memory) but some performance measurements are needed.
// in memory) but some performance measurements are needed.
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
let instruction = vconst . bind ( vector ( ty , sse_vector_size ) ) ;
let instruction = vconst . bind ( vector ( ty , sse_vector_size ) ) ;
let template = rec_vconst . opcodes ( & MOVUPS_LOAD ) . infer_rex ( ) ;
let template = rec_vconst . opcodes ( & MOVUPS_LOAD ) ;
e . enc_32_64_maybe_isap ( instruction , template , Non e ) ; // from SSE
e . enc_both_inferred ( instruction , template ) ; // from SSE
}
}
// SIMD register movement: store, load, spill, fill, regmove. All of these use encodings of
// SIMD register movement: store, load, spill, fill, regmove, etc . All of these use encodings of
// MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
// MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
// alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124).
// alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124).
// Also, it would be ideal to infer REX prefixes for all of these instructions but for the
// time being only instructions with common recipes have `infer_rex()` support.
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
// Store
// Store
let bound_store = store . bind ( vector ( ty , sse_vector_size ) ) . bind ( Any ) ;
let bound_store = store . bind ( vector ( ty , sse_vector_size ) ) . bind ( Any ) ;
e . enc_32_64 (
e . enc_both_inferred ( bound_store . clone ( ) , rec_fst . opcodes ( & MOVUPS_STORE ) ) ;
bound_store . clone ( ) ,
e . enc_both ( bound_store . clone ( ) , rec_fstDisp8 . opcodes ( & MOVUPS_STORE ) ) ;
rec_fst . opcodes ( & MOVUPS_STORE ) . infer_rex ( ) ,
e . enc_both ( bound_store , rec_fstDisp32 . opcodes ( & MOVUPS_STORE ) ) ;
) ;
e . enc_32_64 ( bound_store . clone ( ) , rec_fstDisp8 . opcodes ( & MOVUPS_STORE ) ) ;
e . enc_32_64 ( bound_store , rec_fstDisp32 . opcodes ( & MOVUPS_STORE ) ) ;
// Store complex
// Store complex
let bound_store_complex = store_complex . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_store_complex = store_complex . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 (
e . enc_both (
bound_store_complex . clone ( ) ,
bound_store_complex . clone ( ) ,
rec_fstWithIndex . opcodes ( & MOVUPS_STORE ) ,
rec_fstWithIndex . opcodes ( & MOVUPS_STORE ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both (
bound_store_complex . clone ( ) ,
bound_store_complex . clone ( ) ,
rec_fstWithIndexDisp8 . opcodes ( & MOVUPS_STORE ) ,
rec_fstWithIndexDisp8 . opcodes ( & MOVUPS_STORE ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both (
bound_store_complex ,
bound_store_complex ,
rec_fstWithIndexDisp32 . opcodes ( & MOVUPS_STORE ) ,
rec_fstWithIndexDisp32 . opcodes ( & MOVUPS_STORE ) ,
) ;
) ;
// Load
// Load
let bound_load = load . bind ( vector ( ty , sse_vector_size ) ) . bind ( Any ) ;
let bound_load = load . bind ( vector ( ty , sse_vector_size ) ) . bind ( Any ) ;
e . enc_32_64 (
e . enc_both_inferred ( bound_load . clone ( ) , rec_fld . opcodes ( & MOVUPS_LOAD ) ) ;
bound_load . clone ( ) ,
e . enc_both ( bound_load . clone ( ) , rec_fldDisp8 . opcodes ( & MOVUPS_LOAD ) ) ;
rec_fld . opcodes ( & MOVUPS_LOAD ) . infer_rex ( ) ,
e . enc_both ( bound_load , rec_fldDisp32 . opcodes ( & MOVUPS_LOAD ) ) ;
) ;
e . enc_32_64 ( bound_load . clone ( ) , rec_fldDisp8 . opcodes ( & MOVUPS_LOAD ) ) ;
e . enc_32_64 ( bound_load , rec_fldDisp32 . opcodes ( & MOVUPS_LOAD ) ) ;
// Load complex
// Load complex
let bound_load_complex = load_complex . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_load_complex = load_complex . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 (
e . enc_both (
bound_load_complex . clone ( ) ,
bound_load_complex . clone ( ) ,
rec_fldWithIndex . opcodes ( & MOVUPS_LOAD ) ,
rec_fldWithIndex . opcodes ( & MOVUPS_LOAD ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both (
bound_load_complex . clone ( ) ,
bound_load_complex . clone ( ) ,
rec_fldWithIndexDisp8 . opcodes ( & MOVUPS_LOAD ) ,
rec_fldWithIndexDisp8 . opcodes ( & MOVUPS_LOAD ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both (
bound_load_complex ,
bound_load_complex ,
rec_fldWithIndexDisp32 . opcodes ( & MOVUPS_LOAD ) ,
rec_fldWithIndexDisp32 . opcodes ( & MOVUPS_LOAD ) ,
) ;
) ;
// Spill
// Spill
let bound_spill = spill . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_spill = spill . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( bound_spill , rec_fspillSib32 . opcodes ( & MOVUPS_STORE ) ) ;
e . enc_both ( bound_spill , rec_fspillSib32 . opcodes ( & MOVUPS_STORE ) ) ;
let bound_regspill = regspill . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_regspill = regspill . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( bound_regspill , rec_fregspill32 . opcodes ( & MOVUPS_STORE ) ) ;
e . enc_both ( bound_regspill , rec_fregspill32 . opcodes ( & MOVUPS_STORE ) ) ;
// Fill
// Fill
let bound_fill = fill . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_fill = fill . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( bound_fill , rec_ffillSib32 . opcodes ( & MOVUPS_LOAD ) ) ;
e . enc_both ( bound_fill , rec_ffillSib32 . opcodes ( & MOVUPS_LOAD ) ) ;
let bound_regfill = regfill . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_regfill = regfill . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( bound_regfill , rec_fregfill32 . opcodes ( & MOVUPS_LOAD ) ) ;
e . enc_both ( bound_regfill , rec_fregfill32 . opcodes ( & MOVUPS_LOAD ) ) ;
let bound_fill_nop = fill_nop . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_fill_nop = fill_nop . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64_rec ( bound_fill_nop , rec_ffillnull , 0 ) ;
e . enc_32_64_rec ( bound_fill_nop , rec_ffillnull , 0 ) ;
// Regmove
// Regmove
let bound_regmove = regmove . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_regmove = regmove . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( bound_regmove , rec_frmov . opcodes ( & MOVAPS_LOAD ) ) ;
e . enc_both ( bound_regmove , rec_frmov . opcodes ( & MOVAPS_LOAD ) ) ;
// Copy
// Copy
let bound_copy = copy . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_copy = copy . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( bound_copy , rec_furm . opcodes ( & MOVAPS_LOAD ) ) ;
e . enc_both ( bound_copy , rec_furm . opcodes ( & MOVAPS_LOAD ) ) ;
let bound_copy_to_ssa = copy_to_ssa . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_copy_to_ssa = copy_to_ssa . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( bound_copy_to_ssa , rec_furm_reg_to_ssa . opcodes ( & MOVAPS_LOAD ) ) ;
e . enc_both ( bound_copy_to_ssa , rec_furm_reg_to_ssa . opcodes ( & MOVAPS_LOAD ) ) ;
let bound_copy_nop = copy_nop . bind ( vector ( ty , sse_vector_size ) ) ;
let bound_copy_nop = copy_nop . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64_rec ( bound_copy_nop , rec_stacknull , 0 ) ;
e . enc_32_64_rec ( bound_copy_nop , rec_stacknull , 0 ) ;
}
}
@ -1924,23 +1929,23 @@ fn define_simd(
// SIMD integer addition
// SIMD integer addition
for ( ty , opcodes ) in & [ ( I8 , & PADDB ) , ( I16 , & PADDW ) , ( I32 , & PADDD ) , ( I64 , & PADDQ ) ] {
for ( ty , opcodes ) in & [ ( I8 , & PADDB ) , ( I16 , & PADDW ) , ( I32 , & PADDD ) , ( I64 , & PADDQ ) ] {
let iadd = iadd . bind ( vector ( * ty , sse_vector_size ) ) ;
let iadd = iadd . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 ( iadd , rec_fa . opcodes ( * opcodes ) ) ;
e . enc_both_inferred ( iadd , rec_fa . opcodes ( * opcodes ) ) ;
}
}
// SIMD integer saturating addition
// SIMD integer saturating addition
e . enc_32_64 (
e . enc_both_inferred (
sadd_sat . bind ( vector ( I8 , sse_vector_size ) ) ,
sadd_sat . bind ( vector ( I8 , sse_vector_size ) ) ,
rec_fa . opcodes ( & PADDSB ) ,
rec_fa . opcodes ( & PADDSB ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both_inferred (
sadd_sat . bind ( vector ( I16 , sse_vector_size ) ) ,
sadd_sat . bind ( vector ( I16 , sse_vector_size ) ) ,
rec_fa . opcodes ( & PADDSW ) ,
rec_fa . opcodes ( & PADDSW ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both_inferred (
uadd_sat . bind ( vector ( I8 , sse_vector_size ) ) ,
uadd_sat . bind ( vector ( I8 , sse_vector_size ) ) ,
rec_fa . opcodes ( & PADDUSB ) ,
rec_fa . opcodes ( & PADDUSB ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both_inferred (
uadd_sat . bind ( vector ( I16 , sse_vector_size ) ) ,
uadd_sat . bind ( vector ( I16 , sse_vector_size ) ) ,
rec_fa . opcodes ( & PADDUSW ) ,
rec_fa . opcodes ( & PADDUSW ) ,
) ;
) ;
@ -1949,23 +1954,23 @@ fn define_simd(
let isub = shared . by_name ( "isub" ) ;
let isub = shared . by_name ( "isub" ) ;
for ( ty , opcodes ) in & [ ( I8 , & PSUBB ) , ( I16 , & PSUBW ) , ( I32 , & PSUBD ) , ( I64 , & PSUBQ ) ] {
for ( ty , opcodes ) in & [ ( I8 , & PSUBB ) , ( I16 , & PSUBW ) , ( I32 , & PSUBD ) , ( I64 , & PSUBQ ) ] {
let isub = isub . bind ( vector ( * ty , sse_vector_size ) ) ;
let isub = isub . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 ( isub , rec_fa . opcodes ( * opcodes ) ) ;
e . enc_both_inferred ( isub , rec_fa . opcodes ( * opcodes ) ) ;
}
}
// SIMD integer saturating subtraction
// SIMD integer saturating subtraction
e . enc_32_64 (
e . enc_both_inferred (
ssub_sat . bind ( vector ( I8 , sse_vector_size ) ) ,
ssub_sat . bind ( vector ( I8 , sse_vector_size ) ) ,
rec_fa . opcodes ( & PSUBSB ) ,
rec_fa . opcodes ( & PSUBSB ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both_inferred (
ssub_sat . bind ( vector ( I16 , sse_vector_size ) ) ,
ssub_sat . bind ( vector ( I16 , sse_vector_size ) ) ,
rec_fa . opcodes ( & PSUBSW ) ,
rec_fa . opcodes ( & PSUBSW ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both_inferred (
usub_sat . bind ( vector ( I8 , sse_vector_size ) ) ,
usub_sat . bind ( vector ( I8 , sse_vector_size ) ) ,
rec_fa . opcodes ( & PSUBUSB ) ,
rec_fa . opcodes ( & PSUBUSB ) ,
) ;
) ;
e . enc_32_64 (
e . enc_both_inferred (
usub_sat . bind ( vector ( I16 , sse_vector_size ) ) ,
usub_sat . bind ( vector ( I16 , sse_vector_size ) ) ,
rec_fa . opcodes ( & PSUBUSW ) ,
rec_fa . opcodes ( & PSUBUSW ) ,
) ;
) ;
@ -1977,7 +1982,7 @@ fn define_simd(
( I32 , & PMULLD [ . . ] , Some ( use_sse41_simd ) ) ,
( I32 , & PMULLD [ . . ] , Some ( use_sse41_simd ) ) ,
] {
] {
let imul = imul . bind ( vector ( * ty , sse_vector_size ) ) ;
let imul = imul . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 _maybe_isap ( imul , rec_fa . opcodes ( opcodes ) , * isap ) ;
e . enc_both_inferred _maybe_isap ( imul , rec_fa . opcodes ( opcodes ) , * isap ) ;
}
}
// SIMD integer multiplication for I64x2 using a AVX512.
// SIMD integer multiplication for I64x2 using a AVX512.
@ -1993,7 +1998,7 @@ fn define_simd(
// SIMD integer average with rounding.
// SIMD integer average with rounding.
for ( ty , opcodes ) in & [ ( I8 , & PAVGB [ . . ] ) , ( I16 , & PAVGW [ . . ] ) ] {
for ( ty , opcodes ) in & [ ( I8 , & PAVGB [ . . ] ) , ( I16 , & PAVGW [ . . ] ) ] {
let avgr = avg_round . bind ( vector ( * ty , sse_vector_size ) ) ;
let avgr = avg_round . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 ( avgr , rec_fa . opcodes ( opcodes ) ) ;
e . enc_both_inferred ( avgr , rec_fa . opcodes ( opcodes ) ) ;
}
}
// SIMD logical operations
// SIMD logical operations
@ -2002,23 +2007,23 @@ fn define_simd(
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
for ty in ValueType ::all_lane_types ( ) . filter ( allowed_simd_type ) {
// and
// and
let band = band . bind ( vector ( ty , sse_vector_size ) ) ;
let band = band . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( band , rec_fa . opcodes ( & PAND ) ) ;
e . enc_both_inferred ( band , rec_fa . opcodes ( & PAND ) ) ;
// and not (note flipped recipe operands to match band_not order)
// and not (note flipped recipe operands to match band_not order)
let band_not = band_not . bind ( vector ( ty , sse_vector_size ) ) ;
let band_not = band_not . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( band_not , rec_fax . opcodes ( & PANDN ) ) ;
e . enc_both_inferred ( band_not , rec_fax . opcodes ( & PANDN ) ) ;
// or
// or
let bor = bor . bind ( vector ( ty , sse_vector_size ) ) ;
let bor = bor . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( bor , rec_fa . opcodes ( & POR ) ) ;
e . enc_both_inferred ( bor , rec_fa . opcodes ( & POR ) ) ;
// xor
// xor
let bxor = bxor . bind ( vector ( ty , sse_vector_size ) ) ;
let bxor = bxor . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 ( bxor , rec_fa . opcodes ( & PXOR ) ) ;
e . enc_both_inferred ( bxor , rec_fa . opcodes ( & PXOR ) ) ;
// ptest
// ptest
let x86_ptest = x86_ptest . bind ( vector ( ty , sse_vector_size ) ) ;
let x86_ptest = x86_ptest . bind ( vector ( ty , sse_vector_size ) ) ;
e . enc_32_64 _maybe_isap ( x86_ptest , rec_fcmp . opcodes ( & PTEST ) , Some ( use_sse41_simd ) ) ;
e . enc_both_inferred _maybe_isap ( x86_ptest , rec_fcmp . opcodes ( & PTEST ) , Some ( use_sse41_simd ) ) ;
}
}
// SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
// SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
@ -2026,7 +2031,7 @@ fn define_simd(
// I128x1 but restrictions on the type builder prevent this; the general idea here is that
// I128x1 but restrictions on the type builder prevent this; the general idea here is that
// the upper bits are all zeroed and do not form parts of any separate lane. See
// the upper bits are all zeroed and do not form parts of any separate lane. See
// https://github.com/bytecodealliance/wasmtime/issues/1140.
// https://github.com/bytecodealliance/wasmtime/issues/1140.
e . enc_both (
e . enc_both_inferred (
bitcast . bind ( vector ( I64 , sse_vector_size ) ) . bind ( I32 ) ,
bitcast . bind ( vector ( I64 , sse_vector_size ) ) . bind ( I32 ) ,
rec_frurm . opcodes ( & MOVD_LOAD_XMM ) ,
rec_frurm . opcodes ( & MOVD_LOAD_XMM ) ,
) ;
) ;
@ -2038,31 +2043,31 @@ fn define_simd(
// SIMD shift left
// SIMD shift left
for ( ty , opcodes ) in & [ ( I16 , & PSLLW ) , ( I32 , & PSLLD ) , ( I64 , & PSLLQ ) ] {
for ( ty , opcodes ) in & [ ( I16 , & PSLLW ) , ( I32 , & PSLLD ) , ( I64 , & PSLLQ ) ] {
let x86_psll = x86_psll . bind ( vector ( * ty , sse_vector_size ) ) ;
let x86_psll = x86_psll . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 ( x86_psll , rec_fa . opcodes ( * opcodes ) ) ;
e . enc_both_inferred ( x86_psll , rec_fa . opcodes ( * opcodes ) ) ;
}
}
// SIMD shift right (logical)
// SIMD shift right (logical)
for ( ty , opcodes ) in & [ ( I16 , & PSRLW ) , ( I32 , & PSRLD ) , ( I64 , & PSRLQ ) ] {
for ( ty , opcodes ) in & [ ( I16 , & PSRLW ) , ( I32 , & PSRLD ) , ( I64 , & PSRLQ ) ] {
let x86_psrl = x86_psrl . bind ( vector ( * ty , sse_vector_size ) ) ;
let x86_psrl = x86_psrl . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 ( x86_psrl , rec_fa . opcodes ( * opcodes ) ) ;
e . enc_both_inferred ( x86_psrl , rec_fa . opcodes ( * opcodes ) ) ;
}
}
// SIMD shift right (arithmetic)
// SIMD shift right (arithmetic)
for ( ty , opcodes ) in & [ ( I16 , & PSRAW ) , ( I32 , & PSRAD ) ] {
for ( ty , opcodes ) in & [ ( I16 , & PSRAW ) , ( I32 , & PSRAD ) ] {
let x86_psra = x86_psra . bind ( vector ( * ty , sse_vector_size ) ) ;
let x86_psra = x86_psra . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 ( x86_psra , rec_fa . opcodes ( * opcodes ) ) ;
e . enc_both_inferred ( x86_psra , rec_fa . opcodes ( * opcodes ) ) ;
}
}
// SIMD immediate shift
// SIMD immediate shift
for ( ty , opcodes ) in & [ ( I16 , & PS_W_IMM ) , ( I32 , & PS_D_IMM ) , ( I64 , & PS_Q_IMM ) ] {
for ( ty , opcodes ) in & [ ( I16 , & PS_W_IMM ) , ( I32 , & PS_D_IMM ) , ( I64 , & PS_Q_IMM ) ] {
let ishl_imm = ishl_imm . bind ( vector ( * ty , sse_vector_size ) ) ;
let ishl_imm = ishl_imm . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 ( ishl_imm , rec_f_ib . opcodes ( * opcodes ) . rrr ( 6 ) ) ;
e . enc_both_inferred ( ishl_imm , rec_f_ib . opcodes ( * opcodes ) . rrr ( 6 ) ) ;
let ushr_imm = ushr_imm . bind ( vector ( * ty , sse_vector_size ) ) ;
let ushr_imm = ushr_imm . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 ( ushr_imm , rec_f_ib . opcodes ( * opcodes ) . rrr ( 2 ) ) ;
e . enc_both_inferred ( ushr_imm , rec_f_ib . opcodes ( * opcodes ) . rrr ( 2 ) ) ;
let sshr_imm = sshr_imm . bind ( vector ( * ty , sse_vector_size ) ) ;
let sshr_imm = sshr_imm . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 ( sshr_imm , rec_f_ib . opcodes ( * opcodes ) . rrr ( 4 ) ) ;
e . enc_both_inferred ( sshr_imm , rec_f_ib . opcodes ( * opcodes ) . rrr ( 4 ) ) ;
}
}
// SIMD integer comparisons
// SIMD integer comparisons
@ -2081,8 +2086,8 @@ fn define_simd(
let instruction = icmp
let instruction = icmp
. bind ( Immediate ::IntCC ( * cc ) )
. bind ( Immediate ::IntCC ( * cc ) )
. bind ( vector ( * ty , sse_vector_size ) ) ;
. bind ( vector ( * ty , sse_vector_size ) ) ;
let template = rec_icscc_fpr . nonrex ( ) . opcodes ( opcodes ) ;
let template = rec_icscc_fpr . opcodes ( opcodes ) ;
e . enc_32_64 _maybe_isap ( instruction , template , * isa_predicate ) ;
e . enc_both_inferred _maybe_isap ( instruction , template , * isa_predicate ) ;
}
}
}
}
@ -2102,15 +2107,15 @@ fn define_simd(
( I32 , x86_pminu , & PMINUD [ . . ] , Some ( use_sse41_simd ) ) ,
( I32 , x86_pminu , & PMINUD [ . . ] , Some ( use_sse41_simd ) ) ,
] {
] {
let inst = inst . bind ( vector ( * ty , sse_vector_size ) ) ;
let inst = inst . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_32_64 _maybe_isap ( inst , rec_fa . opcodes ( opcodes ) , * isa_predicate ) ;
e . enc_both_inferred _maybe_isap ( inst , rec_fa . opcodes ( opcodes ) , * isa_predicate ) ;
}
}
// SIMD float comparisons
// SIMD float comparisons
e . enc_both (
e . enc_both_inferred (
fcmp . bind ( vector ( F32 , sse_vector_size ) ) ,
fcmp . bind ( vector ( F32 , sse_vector_size ) ) ,
rec_pfcmp . opcodes ( & CMPPS ) ,
rec_pfcmp . opcodes ( & CMPPS ) ,
) ;
) ;
e . enc_both (
e . enc_both_inferred (
fcmp . bind ( vector ( F64 , sse_vector_size ) ) ,
fcmp . bind ( vector ( F64 , sse_vector_size ) ) ,
rec_pfcmp . opcodes ( & CMPPD ) ,
rec_pfcmp . opcodes ( & CMPPD ) ,
) ;
) ;
@ -2131,11 +2136,11 @@ fn define_simd(
( F64 , fmax , & MAXPD [ . . ] ) ,
( F64 , fmax , & MAXPD [ . . ] ) ,
] {
] {
let inst = inst . bind ( vector ( * ty , sse_vector_size ) ) ;
let inst = inst . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_both ( inst , rec_fa . opcodes ( opcodes ) ) ;
e . enc_both_inferred ( inst , rec_fa . opcodes ( opcodes ) ) ;
}
}
for ( ty , inst , opcodes ) in & [ ( F32 , sqrt , & SQRTPS [ . . ] ) , ( F64 , sqrt , & SQRTPD [ . . ] ) ] {
for ( ty , inst , opcodes ) in & [ ( F32 , sqrt , & SQRTPS [ . . ] ) , ( F64 , sqrt , & SQRTPD [ . . ] ) ] {
let inst = inst . bind ( vector ( * ty , sse_vector_size ) ) ;
let inst = inst . bind ( vector ( * ty , sse_vector_size ) ) ;
e . enc_both ( inst , rec_furm . opcodes ( opcodes ) ) ;
e . enc_both_inferred ( inst , rec_furm . opcodes ( opcodes ) ) ;
}
}
}
}