Browse Source
* Wasm tests: add typed-funcref test showing example of desirable optimizations. In order to have fast IC (inline cache) chains in AOT-compiled dynamic language Wasms, it would be great if we could make the "call to a typed funcref at a constant table index" pattern fast. This use-case was discussed at the most recent Wasmtime biweekly and @jameysharp is working on some optimizations; the intent of this PR is to provide a concrete test-case whose blessed output we can see improve over time. In particular, the following opts are still desirable: - With the use of non-nullable typed funcrefs, there shouldn't be a null check (there currently is, as noted by a comment in the code due to lack of type information at the right spot). - With the use of a constant table size and a constant index to the `table.get`, we should be able to load from the table without a bounds-check or any Spectre masking. Other further optimizations for this pattern might be possible if we rearrange the table and function-reference data structures, and the lazy-initialization scheme thereof, but the above should be agnostic to that. * Add comments to clarify typed funcrefs usage.pull/8161/head
Chris Fallin
8 months ago
committed by
GitHub
1 changed files with 166 additions and 0 deletions
@ -0,0 +1,166 @@ |
|||||
|
;;! target = "x86_64" |
||||
|
;;! test = "optimize" |
||||
|
;;! flags = [ "-Wfunction-references=y" ] |
||||
|
|
||||
|
;; This test is meant to simulate how typed funcrefs in a table may be |
||||
|
;; used for ICs (inline caches) in a Wasm module compiled from a dynamic |
||||
|
;; language. In native JIT engines, IC chains have head pointers that |
||||
|
;; are raw code pointers and IC-using code can call each with a few ops |
||||
|
;; (load pointer, call indirect). We'd like similar efficiency by |
||||
|
;; storing funcrefs for the first IC in each chain in a typed-funcref |
||||
|
;; table. |
||||
|
|
||||
|
(module |
||||
|
(type $ic-stub (func (param i32 i32 i32 i32) (result i32))) |
||||
|
|
||||
|
;; This syntax declares a table that is exactly 100 elements, whose |
||||
|
;; elements are non-nullable function references, and whose default |
||||
|
;; value (needed because non-nullable) is a pointer to `$ic1`. |
||||
|
(table $ic-sites 100 100 (ref $ic-stub) (ref.func $ic1)) |
||||
|
|
||||
|
(func $ic1 (param i32 i32 i32 i32) (result i32) |
||||
|
local.get 0) |
||||
|
|
||||
|
(func $call-ics (param i32 i32 i32 i32) (result i32) |
||||
|
(local $sum i32) |
||||
|
|
||||
|
;; IC callsite index 1 (arbitrary). |
||||
|
local.get 0 |
||||
|
local.get 1 |
||||
|
local.get 2 |
||||
|
local.get 3 |
||||
|
i32.const 1 |
||||
|
table.get $ic-sites |
||||
|
call_ref $ic-stub |
||||
|
local.get $sum |
||||
|
i32.add |
||||
|
local.set $sum |
||||
|
|
||||
|
;; IC callsite index 2 (arbitrary). |
||||
|
local.get 0 |
||||
|
local.get 1 |
||||
|
local.get 2 |
||||
|
local.get 3 |
||||
|
i32.const 2 |
||||
|
table.get $ic-sites |
||||
|
call_ref $ic-stub |
||||
|
local.get $sum |
||||
|
i32.add |
||||
|
local.set $sum |
||||
|
|
||||
|
local.get $sum)) |
||||
|
;; function u0:0(i64 vmctx, i64, i32, i32, i32, i32) -> i32 fast { |
||||
|
;; gv0 = vmctx |
||||
|
;; gv1 = load.i64 notrap aligned readonly gv0+8 |
||||
|
;; gv2 = load.i64 notrap aligned gv1 |
||||
|
;; sig0 = (i64 vmctx, i32 uext, i32 uext) -> i32 uext system_v |
||||
|
;; sig1 = (i64 vmctx, i32 uext) -> i32 uext system_v |
||||
|
;; stack_limit = gv2 |
||||
|
;; |
||||
|
;; block0(v0: i64, v1: i64, v2: i32, v3: i32, v4: i32, v5: i32): |
||||
|
;; v6 -> v2 |
||||
|
;; @002c jump block1 |
||||
|
;; |
||||
|
;; block1: |
||||
|
;; @002c return v2 |
||||
|
;; } |
||||
|
;; |
||||
|
;; function u0:1(i64 vmctx, i64, i32, i32, i32, i32) -> i32 fast { |
||||
|
;; gv0 = vmctx |
||||
|
;; gv1 = load.i64 notrap aligned readonly gv0+8 |
||||
|
;; gv2 = load.i64 notrap aligned gv1 |
||||
|
;; gv3 = vmctx |
||||
|
;; gv4 = load.i64 notrap aligned gv3+72 |
||||
|
;; sig0 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v |
||||
|
;; sig1 = (i64 vmctx, i64, i32, i32, i32, i32) -> i32 fast |
||||
|
;; sig2 = (i64 vmctx, i32 uext, i32 uext) -> i32 uext system_v |
||||
|
;; sig3 = (i64 vmctx, i32 uext) -> i32 uext system_v |
||||
|
;; stack_limit = gv2 |
||||
|
;; |
||||
|
;; block0(v0: i64, v1: i64, v2: i32, v3: i32, v4: i32, v5: i32): |
||||
|
;; v21 -> v0 |
||||
|
;; v47 -> v0 |
||||
|
;; v56 -> v0 |
||||
|
;; v59 -> v0 |
||||
|
;; v30 -> v2 |
||||
|
;; v31 -> v3 |
||||
|
;; v32 -> v4 |
||||
|
;; v33 -> v5 |
||||
|
;; v62 = iconst.i8 0 |
||||
|
;; @003b brif v62, block6, block7 ; v62 = 0 |
||||
|
;; |
||||
|
;; block6 cold: |
||||
|
;; @003b trap table_oob |
||||
|
;; |
||||
|
;; block7: |
||||
|
;; @003b v12 = load.i64 notrap aligned v0+72 |
||||
|
;; v79 = iconst.i8 0 |
||||
|
;; v70 = iconst.i64 8 |
||||
|
;; @003b v14 = iadd v12, v70 ; v70 = 8 |
||||
|
;; @003b v16 = select_spectre_guard v79, v12, v14 ; v79 = 0 |
||||
|
;; @003b v17 = load.i64 notrap aligned table v16 |
||||
|
;; v58 = iconst.i64 -2 |
||||
|
;; @003b v18 = band v17, v58 ; v58 = -2 |
||||
|
;; @003b brif v17, block3(v18), block2 |
||||
|
;; |
||||
|
;; block2 cold: |
||||
|
;; @004e v48 = load.i64 notrap aligned readonly v0+56 |
||||
|
;; @004e v49 = load.i64 notrap aligned readonly v48+72 |
||||
|
;; @002f v7 = iconst.i32 0 |
||||
|
;; v28 -> v7 |
||||
|
;; @0039 v8 = iconst.i32 1 |
||||
|
;; @003b v24 = call_indirect sig0, v49(v0, v7, v8) ; v7 = 0, v8 = 1 |
||||
|
;; @003b jump block3(v24) |
||||
|
;; |
||||
|
;; block3(v19: i64): |
||||
|
;; @003d brif v19, block9, block8 |
||||
|
;; |
||||
|
;; block8 cold: |
||||
|
;; @003d trap null_reference |
||||
|
;; |
||||
|
;; block9: |
||||
|
;; @003d v25 = load.i64 notrap aligned readonly v19+16 |
||||
|
;; @003d v26 = load.i64 notrap aligned readonly v19+32 |
||||
|
;; @003d v27 = call_indirect sig1, v25(v26, v0, v2, v3, v4, v5) |
||||
|
;; v80 = iconst.i8 0 |
||||
|
;; @004e brif v80, block10, block11 ; v80 = 0 |
||||
|
;; |
||||
|
;; block10 cold: |
||||
|
;; @004e trap table_oob |
||||
|
;; |
||||
|
;; block11: |
||||
|
;; @004e v38 = load.i64 notrap aligned v0+72 |
||||
|
;; v81 = iconst.i8 0 |
||||
|
;; v78 = iconst.i64 16 |
||||
|
;; @004e v40 = iadd v38, v78 ; v78 = 16 |
||||
|
;; @004e v42 = select_spectre_guard v81, v38, v40 ; v81 = 0 |
||||
|
;; @004e v43 = load.i64 notrap aligned table v42 |
||||
|
;; v82 = iconst.i64 -2 |
||||
|
;; v83 = band v43, v82 ; v82 = -2 |
||||
|
;; @004e brif v43, block5(v83), block4 |
||||
|
;; |
||||
|
;; block4 cold: |
||||
|
;; v84 = load.i64 notrap aligned readonly v0+56 |
||||
|
;; v85 = load.i64 notrap aligned readonly v84+72 |
||||
|
;; v86 = iconst.i32 0 |
||||
|
;; @004c v34 = iconst.i32 2 |
||||
|
;; @004e v50 = call_indirect sig0, v85(v0, v86, v34) ; v86 = 0, v34 = 2 |
||||
|
;; @004e jump block5(v50) |
||||
|
;; |
||||
|
;; block5(v45: i64): |
||||
|
;; @0050 brif v45, block13, block12 |
||||
|
;; |
||||
|
;; block12 cold: |
||||
|
;; @0050 trap null_reference |
||||
|
;; |
||||
|
;; block13: |
||||
|
;; @0050 v51 = load.i64 notrap aligned readonly v45+16 |
||||
|
;; @0050 v52 = load.i64 notrap aligned readonly v45+32 |
||||
|
;; @0050 v53 = call_indirect sig1, v51(v52, v0, v2, v3, v4, v5) |
||||
|
;; @0059 jump block1 |
||||
|
;; |
||||
|
;; block1: |
||||
|
;; @0054 v55 = iadd.i32 v53, v27 |
||||
|
;; v6 -> v55 |
||||
|
;; @0059 return v55 |
||||
|
;; } |
Loading…
Reference in new issue