Browse Source
* x64: Add non-SSE 4.1 lowerings of min/max instructions This commit updates the x64 backend to avoid using various `p{min,max}*` instructions if SSE 4.1 isn't enabled. These instructions are used for comparisons as well as the `{u,s}{min,max}` instructions. Alternative lowerings are primarily drawn from LLVM. Through this refactoring the x64 backend now has also grown (not the most efficient) lowerings for vector comparisons with `i64x2` types, which it previously largely didn't have. This enabled copying some non-x86_64 tests into the main test files for various operations. * Review commentspull/6323/head
Alex Crichton
2 years ago
committed by
GitHub
16 changed files with 327 additions and 192 deletions
@ -1,17 +0,0 @@ |
|||||
test interpret |
|
||||
test run |
|
||||
target aarch64 |
|
||||
target s390x |
|
||||
|
|
||||
; TODO: Move this to the main file once x86_64 supports this operation |
|
||||
; See: #5529 |
|
||||
|
|
||||
function %simd_icmp_uge_i64(i64x2, i64x2) -> i64x2 { |
|
||||
block0(v0: i64x2, v1: i64x2): |
|
||||
v2 = icmp uge v0, v1 |
|
||||
return v2 |
|
||||
} |
|
||||
; run: %simd_icmp_uge_i64([0 1], [0 0]) == [-1 -1] |
|
||||
; run: %simd_icmp_uge_i64([-1 0], [-1 1]) == [-1 0] |
|
||||
; run: %simd_icmp_uge_i64([-5 1], [-1 -1]) == [0 0] |
|
||||
; run: %simd_icmp_uge_i64([0 0], [0 0]) == [-1 -1] |
|
@ -1,17 +0,0 @@ |
|||||
test interpret |
|
||||
test run |
|
||||
target aarch64 |
|
||||
target s390x |
|
||||
|
|
||||
; TODO: Move this to the main file once x86_64 supports this operation |
|
||||
; See: #5529 |
|
||||
|
|
||||
function %simd_icmp_ugt_i64(i64x2, i64x2) -> i64x2 { |
|
||||
block0(v0: i64x2, v1: i64x2): |
|
||||
v2 = icmp ugt v0, v1 |
|
||||
return v2 |
|
||||
} |
|
||||
; run: %simd_icmp_ugt_i64([0 1], [0 0]) == [0 -1] |
|
||||
; run: %simd_icmp_ugt_i64([-1 0], [-1 1]) == [0 0] |
|
||||
; run: %simd_icmp_ugt_i64([-5 1], [-1 -1]) == [0 0] |
|
||||
; run: %simd_icmp_ugt_i64([0 0], [0 0]) == [0 0] |
|
@ -1,17 +0,0 @@ |
|||||
test interpret |
|
||||
test run |
|
||||
target aarch64 |
|
||||
target s390x |
|
||||
|
|
||||
; TODO: Move this to the main file once x86_64 supports this operation |
|
||||
; See: #5529 |
|
||||
|
|
||||
function %simd_icmp_ule_i64(i64x2, i64x2) -> i64x2 { |
|
||||
block0(v0: i64x2, v1: i64x2): |
|
||||
v2 = icmp ule v0, v1 |
|
||||
return v2 |
|
||||
} |
|
||||
; run: %simd_icmp_ule_i64([0 1], [0 0]) == [-1 0] |
|
||||
; run: %simd_icmp_ule_i64([-1 0], [-1 1]) == [-1 -1] |
|
||||
; run: %simd_icmp_ule_i64([-5 1], [-1 -1]) == [-1 -1] |
|
||||
; run: %simd_icmp_ule_i64([0 0], [0 0]) == [-1 -1] |
|
@ -1,17 +0,0 @@ |
|||||
test interpret |
|
||||
test run |
|
||||
target aarch64 |
|
||||
target s390x |
|
||||
|
|
||||
; TODO: Move this to the main file once x86_64 supports this operation |
|
||||
; See: #5529 |
|
||||
|
|
||||
function %simd_icmp_ult_i64(i64x2, i64x2) -> i64x2 { |
|
||||
block0(v0: i64x2, v1: i64x2): |
|
||||
v2 = icmp ult v0, v1 |
|
||||
return v2 |
|
||||
} |
|
||||
; run: %simd_icmp_ult_i64([0 1], [0 0]) == [0 0] |
|
||||
; run: %simd_icmp_ult_i64([-1 0], [-1 1]) == [0 -1] |
|
||||
; run: %simd_icmp_ult_i64([-5 1], [-1 -1]) == [-1 -1] |
|
||||
; run: %simd_icmp_ult_i64([0 0], [0 0]) == [0 0] |
|
@ -1,39 +0,0 @@ |
|||||
test run |
|
||||
test interpret |
|
||||
target aarch64 |
|
||||
|
|
||||
function %smin_i64x2(i64x2, i64x2) -> i64x2 { |
|
||||
block0(v0: i64x2, v1: i64x2): |
|
||||
v2 = smin v0, v1 |
|
||||
return v2 |
|
||||
} |
|
||||
|
|
||||
; run: %smin_i64x2([0xC00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0x98763210 0x43216789 ] |
|
||||
; run: %smin_i64x2([0x80000000C00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0x80000000C00FFFEE 0x43216789 ] |
|
||||
|
|
||||
function %smax_i64x2(i64x2, i64x2) -> i64x2 { |
|
||||
block0(v0: i64x2, v1: i64x2): |
|
||||
v2 = smax v0, v1 |
|
||||
return v2 |
|
||||
} |
|
||||
|
|
||||
; run: %smax_i64x2([0xC00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0xBADAB00F ] |
|
||||
; run: %smax_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0x43216789 ] |
|
||||
|
|
||||
function %umin_i64x2(i64x2, i64x2) -> i64x2 { |
|
||||
block0(v0: i64x2, v1: i64x2): |
|
||||
v2 = umin v0, v1 |
|
||||
return v2 |
|
||||
} |
|
||||
|
|
||||
; run: %umin_i64x2([0xDEADBEEF 0xBADAB00F], [0x12349876 0x43216789]) == [ 0x12349876 0x43216789 ] |
|
||||
; run: %umin_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0x98763210 0x43216789 ] |
|
||||
|
|
||||
function %umax_i64x2(i64x2, i64x2) -> i64x2 { |
|
||||
block0(v0: i64x2, v1: i64x2): |
|
||||
v2 = umax v0, v1 |
|
||||
return v2 |
|
||||
} |
|
||||
|
|
||||
; run: %umax_i64x2([0xBAADF00D 0xBADAB00F], [0xCA11ACAB 0x43216789]) == [ 0xCA11ACAB 0xBADAB00F ] |
|
||||
; run: %umax_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0x80000000BADAB00F ] |
|
Loading…
Reference in new issue