Browse Source
* Add tests for patterns I'm about to optimize * x64: Optimize vector compare-and-branch This commit implements lowering optimizations for the `vall_true` and `vany_true` CLIF instructions when combined with `brif`. This is in the same manner as `icmp` and `fcmp` combined with `brif` where the result of the comparison is never materialized into a general purpose register which helps lower register pressure and remove some instructions. * x64: Optimize `vconst` with an all-ones pattern This has a single-instruction lowering which doesn't load from memory so it's probably cheaper than loading all-ones from memory.pull/8843/head
Alex Crichton
5 months ago
committed by
GitHub
3 changed files with 171 additions and 11 deletions
@ -0,0 +1,125 @@ |
|||
;;! target = "x86_64" |
|||
;;! test = "compile" |
|||
;;! flags = ["-Ccranelift-sse41"] |
|||
|
|||
(module |
|||
(func $i8x16.all_true (param v128) (result i32) |
|||
local.get 0 |
|||
i8x16.all_true |
|||
if (result i32) |
|||
i32.const 100 |
|||
else |
|||
i32.const 200 |
|||
end |
|||
) |
|||
|
|||
(func $i16x8.all_true (param v128) (result i32) |
|||
local.get 0 |
|||
i16x8.all_true |
|||
if (result i32) |
|||
i32.const 100 |
|||
else |
|||
i32.const 200 |
|||
end |
|||
) |
|||
|
|||
(func $i32x4.all_true (param v128) (result i32) |
|||
local.get 0 |
|||
i32x4.all_true |
|||
if (result i32) |
|||
i32.const 100 |
|||
else |
|||
i32.const 200 |
|||
end |
|||
) |
|||
|
|||
(func $i64x2.all_true (param v128) (result i32) |
|||
local.get 0 |
|||
i64x2.all_true |
|||
if (result i32) |
|||
i32.const 100 |
|||
else |
|||
i32.const 200 |
|||
end |
|||
) |
|||
|
|||
(func $v128.any_true (param v128) (result i32) |
|||
local.get 0 |
|||
v128.any_true |
|||
if (result i32) |
|||
i32.const 100 |
|||
else |
|||
i32.const 200 |
|||
end |
|||
) |
|||
) |
|||
;; wasm[0]::function[0]::i8x16.all_true: |
|||
;; pushq %rbp |
|||
;; movq %rsp, %rbp |
|||
;; pxor %xmm7, %xmm7 |
|||
;; pcmpeqb %xmm7, %xmm0 |
|||
;; ptest %xmm0, %xmm0 |
|||
;; je 0x21 |
|||
;; 17: movl $0xc8, %eax |
|||
;; jmp 0x26 |
|||
;; 21: movl $0x64, %eax |
|||
;; movq %rbp, %rsp |
|||
;; popq %rbp |
|||
;; retq |
|||
;; |
|||
;; wasm[0]::function[1]::i16x8.all_true: |
|||
;; pushq %rbp |
|||
;; movq %rsp, %rbp |
|||
;; pxor %xmm7, %xmm7 |
|||
;; pcmpeqw %xmm7, %xmm0 |
|||
;; ptest %xmm0, %xmm0 |
|||
;; je 0x61 |
|||
;; 57: movl $0xc8, %eax |
|||
;; jmp 0x66 |
|||
;; 61: movl $0x64, %eax |
|||
;; movq %rbp, %rsp |
|||
;; popq %rbp |
|||
;; retq |
|||
;; |
|||
;; wasm[0]::function[2]::i32x4.all_true: |
|||
;; pushq %rbp |
|||
;; movq %rsp, %rbp |
|||
;; pxor %xmm7, %xmm7 |
|||
;; pcmpeqd %xmm7, %xmm0 |
|||
;; ptest %xmm0, %xmm0 |
|||
;; je 0xa1 |
|||
;; 97: movl $0xc8, %eax |
|||
;; jmp 0xa6 |
|||
;; a1: movl $0x64, %eax |
|||
;; movq %rbp, %rsp |
|||
;; popq %rbp |
|||
;; retq |
|||
;; |
|||
;; wasm[0]::function[3]::i64x2.all_true: |
|||
;; pushq %rbp |
|||
;; movq %rsp, %rbp |
|||
;; pxor %xmm7, %xmm7 |
|||
;; pcmpeqq %xmm7, %xmm0 |
|||
;; ptest %xmm0, %xmm0 |
|||
;; je 0xe2 |
|||
;; d8: movl $0xc8, %eax |
|||
;; jmp 0xe7 |
|||
;; e2: movl $0x64, %eax |
|||
;; movq %rbp, %rsp |
|||
;; popq %rbp |
|||
;; retq |
|||
;; |
|||
;; wasm[0]::function[4]::v128.any_true: |
|||
;; pushq %rbp |
|||
;; movq %rsp, %rbp |
|||
;; pxor %xmm7, %xmm7 |
|||
;; pcmpeqb %xmm7, %xmm0 |
|||
;; pmovmskb %xmm0, %ecx |
|||
;; cmpl $0xffff, %ecx |
|||
;; jne 0x126 |
|||
;; 11c: movl $0xc8, %eax |
|||
;; jmp 0x12b |
|||
;; 126: movl $0x64, %eax |
|||
;; movq %rbp, %rsp |
|||
;; popq %rbp |
|||
;; retq |
@ -0,0 +1,22 @@ |
|||
;;! target = "x86_64" |
|||
;;! test = "compile" |
|||
|
|||
(module |
|||
(func $zero (result v128) v128.const i64x2 0 0) |
|||
(func $ones (result v128) v128.const i64x2 -1 -1) |
|||
) |
|||
;; wasm[0]::function[0]::zero: |
|||
;; pushq %rbp |
|||
;; movq %rsp, %rbp |
|||
;; pxor %xmm0, %xmm0 |
|||
;; movq %rbp, %rsp |
|||
;; popq %rbp |
|||
;; retq |
|||
;; |
|||
;; wasm[0]::function[1]::ones: |
|||
;; pushq %rbp |
|||
;; movq %rsp, %rbp |
|||
;; pcmpeqd %xmm0, %xmm0 |
|||
;; movq %rbp, %rsp |
|||
;; popq %rbp |
|||
;; retq |
Loading…
Reference in new issue