Browse Source

Winch: fix bug by spilling when emitting func call (#7573)

* Winch: fix bug by spilling when calling a func

* Forgot to commit new filetest

* Only support WasmHeapType::Func

* Elaborate on call_indirect jump details

* Update docs for call

* Verify stack is only consts and memory entries
pull/7585/head
Jeffrey Charles 12 months ago
committed by GitHub
parent
commit
55f9a4bdcd
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 9
      winch/codegen/src/abi/mod.rs
  2. 145
      winch/codegen/src/codegen/call.rs
  3. 30
      winch/codegen/src/codegen/context.rs
  4. 8
      winch/codegen/src/stack.rs
  5. 8
      winch/codegen/src/visitor.rs
  6. 42
      winch/filetests/filetests/x64/call/reg_on_stack.wat
  7. 140
      winch/filetests/filetests/x64/call_indirect/local_arg.wat
  8. 16
      winch/filetests/filetests/x64/f32_ceil/f32_ceil_param.wat
  9. 16
      winch/filetests/filetests/x64/f32_floor/f32_floor_param.wat
  10. 16
      winch/filetests/filetests/x64/f32_nearest/f32_nearest_param.wat
  11. 16
      winch/filetests/filetests/x64/f32_trunc/f32_trunc_param.wat
  12. 16
      winch/filetests/filetests/x64/f64_ceil/f64_ceil_param.wat
  13. 16
      winch/filetests/filetests/x64/f64_floor/f64_floor_param.wat
  14. 16
      winch/filetests/filetests/x64/f64_nearest/f64_nearest_param.wat
  15. 16
      winch/filetests/filetests/x64/f64_trunc/f64_trunc_param.wat
  16. 36
      winch/filetests/filetests/x64/table/fill.wat
  17. 22
      winch/filetests/filetests/x64/table/grow.wat
  18. 96
      winch/filetests/filetests/x64/table/init_copy_drop.wat
  19. 68
      winch/filetests/filetests/x64/table/set.wat

9
winch/codegen/src/abi/mod.rs

@ -49,7 +49,7 @@ use crate::masm::{OperandSize, SPOffset};
use smallvec::SmallVec;
use std::collections::HashSet;
use std::ops::{Add, BitAnd, Not, Sub};
use wasmtime_environ::{WasmFuncType, WasmType};
use wasmtime_environ::{WasmFuncType, WasmHeapType, WasmRefType, WasmType};
pub(crate) mod local;
pub(crate) use local::*;
@ -106,7 +106,12 @@ pub(crate) trait ABI {
/// Returns the designated scratch register for the given [WasmType].
fn scratch_for(ty: &WasmType) -> Reg {
match ty {
WasmType::I32 | WasmType::I64 => Self::scratch_reg(),
WasmType::I32
| WasmType::I64
| WasmType::Ref(WasmRefType {
heap_type: WasmHeapType::Func,
..
}) => Self::scratch_reg(),
WasmType::F32 | WasmType::F64 => Self::float_scratch_reg(),
_ => unimplemented!(),
}

145
winch/codegen/src/codegen/call.rs

@ -7,32 +7,29 @@
//!
//! 1. [`Callee`] resolution.
//! 2. Mapping of the [`Callee`] to the [`CalleeKind`].
//! 3. Calculation of the stack space consumed by the call.
//! 3. Spilling the value stack.
//! 4. Calculate the return area, for 1+ results.
//! 5. Emission.
//! 6. Stack space cleanup.
//!
//! The stack space consumed by the function call; that is,
//! the sum of:
//! The stack space consumed by the function call is the amount
//! of space used by any memory entries in the value stack present
//! at the callsite (after spilling the value stack), that will be
//! used as arguments for the function call. Any memory values in the
//! value stack that are needed as part of the function
//! arguments will be consumed by the function call (either by
//! assigning those values to a register or by storing those
//! values in a memory location if the callee argument is on
//! the stack).
//! This could also be done when assigning arguments every time a
//! memory entry needs to be assigned to a particular location,
//! but doing so will emit more instructions (e.g. a pop per
//! argument that needs to be assigned); it's more efficient to
//! calculate the space used by those memory values and reclaim it
//! at once when cleaning up the stack after the call has been
//! emitted.
//!
//! 1. The amount of stack space created by saving any live
//! registers at the callsite.
//! 2. The amount of space used by any memory entries in the value
//! stack present at the callsite, that will be used as
//! arguments for the function call. Any memory values in the
//! value stack that are needed as part of the function
//! arguments, will be consumed by the function call (either by
//! assigning those values to a register or by storing those
//! values to a memory location if the callee argument is on
//! the stack), so we track that stack space to reclaim it once
//! the function call has ended. This could also be done in
//! when assigning arguments everytime a memory entry needs to be assigned
//! to a particular location, but doing so, will incur in more
//! instructions (e.g. a pop per argument that needs to be
//! assigned); it's more efficient to track the space needed by
//! those memory values and reclaim it at once.
//!
//! The machine stack throghout the function call is as follows:
//! The machine stack throughout the function call is as follows:
//! ┌──────────────────────────────────────────────────┐
//! │ │
//! │ 1 │
@ -41,18 +38,18 @@
//! │ are used as function arguments. │
//! │ │
//! ├──────────────────────────────────────────────────┤ ---> The Wasm value stack at this point in time would look like:
//! │ │ [ Reg | Reg | Mem(offset) | Mem(offset) ]
//! │ │ [ Mem(offset) | Mem(offset) | Local(index) | Local(index) ]
//! │ 2 │
//! │ Stack space created by saving │
//! │ any live registers at the callsite. │
//! │ Stack space created by spilling locals and |
//! │ registers at the callsite.
//! │ │
//! │ │
//! ├─────────────────────────────────────────────────┬┤ ---> The Wasm value stack at this point in time would look like:
//! │ │ [ Mem(offset) | Mem(offset) | Mem(offset) | Mem(offset) ]
//! │ │ Assuming that the callee takes 4 arguments, we calculate
//! │ │ 2 spilled registers + 2 memory values; all of which will be used
//! │ Stack space allocated for │ as arguments to the call via `assign_args`, thus the memory they represent is
//! │ the callee function arguments in the stack; │ is considered to be consumed by the call.
//! │ │ 4 memory values; all of which will be used as arguments to
//! │ Stack space allocated for │ the call via `assign_args`, thus the sum of the size of the
//! │ the callee function arguments in the stack; │ memory they represent is considered to be consumed by the call.
//! │ represented by `arg_stack_space` │
//! │ │
//! │ │
@ -67,6 +64,7 @@ use crate::{
},
masm::{CalleeKind, MacroAssembler, OperandSize, SPOffset},
reg::Reg,
stack::Val,
CallingConvention,
};
use smallvec::SmallVec;
@ -81,10 +79,10 @@ impl FnCall {
/// Orchestrates the emission of a function call:
/// 1. Resolves the [`Callee`] through the given callback.
/// 2. Maps the resolved [`Callee`] to the [`CalleeKind`].
/// 3. Saves any live registers and calculates the stack space consumed
/// by the function call.
/// 3. Spills the value stack.
/// 4. Creates the stack space needed for the return area.
/// 5. Emits the call.
/// 6. Cleans up the stack space.
pub fn emit<M: MacroAssembler, P: PtrSize, R>(
masm: &mut M,
context: &mut CodeGenContext,
@ -98,7 +96,7 @@ impl FnCall {
let sig = sig.as_ref();
let kind = Self::map(&context.vmoffsets, &callee, sig, context, masm);
let call_stack_space = Self::save(context, masm, &sig);
context.spill(masm);
let ret_area = Self::make_ret_area(&sig, masm);
let arg_stack_space = sig.params_stack_size();
let reserved_stack = masm.call(arg_stack_space, |masm| {
@ -111,14 +109,7 @@ impl FnCall {
_ => {}
}
Self::cleanup(
sig,
reserved_stack,
call_stack_space,
ret_area,
masm,
context,
);
Self::cleanup(sig, reserved_stack, ret_area, masm, context);
}
/// Calculates the return area for the callee, if any.
@ -306,56 +297,11 @@ impl FnCall {
}
}
/// Save any live registers prior to emitting the call.
//
// Here we perform a "spill" of the register entries
// in the Wasm value stack, we also count any memory
// values that will be used used as part of the callee
// arguments. Saving the live registers is done by
// emitting push operations for every `Reg` entry in
// the Wasm value stack. We do this to be compliant
// with Winch's internal ABI, in which all registers
// are treated as caller-saved. For more details, see
// [ABI].
//
// The next few lines, partition the value stack into
// two sections:
// +------------------+--+--- (Stack top)
// | | |
// | | | 1. The top `n` elements, which are used for
// | | | function arguments; for which we save any
// | | | live registers, keeping track of the amount of registers
// +------------------+ | saved plus the amount of memory values consumed by the function call;
// | | | with this information we can later reclaim the space used by the function call.
// | | |
// +------------------+--+---
// | | | 2. The rest of the items in the stack, for which
// | | | we only save any live registers.
// | | |
// +------------------+ |
fn save<M: MacroAssembler>(context: &mut CodeGenContext, masm: &mut M, sig: &ABISig) -> u32 {
let callee_params = &sig.params_without_retptr();
let stack = &context.stack;
match callee_params.len() {
0 => {
let _ = context.save_live_registers_and_calculate_sizeof(masm, ..);
0u32
}
_ => {
assert!(stack.len() >= callee_params.len());
let partition = stack.len().checked_sub(callee_params.len()).unwrap_or(0);
let _ = context.save_live_registers_and_calculate_sizeof(masm, 0..partition);
context.save_live_registers_and_calculate_sizeof(masm, partition..)
}
}
}
/// Cleanup stack space, handle multiple results, and free registers after
/// emitting the call.
fn cleanup<M: MacroAssembler>(
sig: &ABISig,
reserved_space: u32,
stack_consumed: u32,
ret_area: Option<RetArea>,
masm: &mut M,
context: &mut CodeGenContext,
@ -364,6 +310,16 @@ impl FnCall {
// which was allocated last.
masm.free_stack(reserved_space);
// Drop params from value stack and calculate amount of machine stack
// space they consumed.
let mut stack_consumed = 0;
context.drop_last(sig.params.len_without_retptr(), |_regalloc, v| {
debug_assert!(v.is_mem() || v.is_const());
if let Val::Memory(mem) = v {
stack_consumed += mem.slot.size;
}
});
if let Some(ret_area) = ret_area {
if stack_consumed > 0 {
// Perform a memory move, by shuffling the result area to
@ -381,29 +337,6 @@ impl FnCall {
// Free the bytes consumed by the call.
masm.free_stack(stack_consumed);
// Only account for registers given that any memory entries
// consumed by the call (assigned to a register or to a stack
// slot) were freed by the previous call to
// `masm.free_stack`, so we only care about dropping them
// here.
//
// NOTE / TODO there's probably a path to getting rid of
// `save_live_registers_and_calculate_sizeof` and
// `call_stack_space`, making it a bit more obvious what's
// happening here. We could:
//
// * Modify the `spill` implementation so that it takes a
// filtering callback, to control which values the caller is
// interested in saving (e.g. save all if no function is provided)
// * Rely on the new implementation of `drop_last` to calcuate
// the stack memory entries consumed by the call and then free
// the calculated stack space.
context.drop_last(sig.params.len_without_retptr(), |regalloc, v| {
if v.is_reg() {
regalloc.free(v.unwrap_reg().into());
}
});
let mut results_data = ABIResultsData::wrap(sig.results.clone());
results_data.ret_area = ret_area;

30
winch/codegen/src/codegen/context.rs

@ -11,7 +11,6 @@ use crate::{
regalloc::RegAlloc,
stack::{Stack, TypedReg, Val},
};
use std::ops::RangeBounds;
/// The code generation context.
/// The code generation context is made up of three
@ -315,35 +314,6 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> {
}
}
/// Saves any live registers in the value stack in a particular
/// range defined by the caller. This is a specialization of the
/// spill function; made available for cases in which spilling
/// locals is not required, like for example for function calls in
/// which locals are not reachable by the callee.
///
/// Returns the size in bytes of the specified range.
pub fn save_live_registers_and_calculate_sizeof<M, R>(&mut self, masm: &mut M, range: R) -> u32
where
R: RangeBounds<usize>,
M: MacroAssembler,
{
let mut size = 0u32;
for v in self.stack.inner_mut().range_mut(range) {
match v {
Val::Reg(TypedReg { reg, ty }) => {
let slot = masm.push(*reg, (*ty).into());
self.regalloc.free(*reg);
*v = Val::mem(*ty, slot);
size += slot.size
}
Val::Memory(mem) => size += mem.slot.size,
_ => {}
}
}
size
}
/// Drops the last `n` elements of the stack, calling the provided
/// function for each `n` stack value.
/// The values are dropped in top-to-bottom order.

8
winch/codegen/src/stack.rs

@ -155,6 +155,14 @@ impl Val {
}
}
/// Check whether the value is a constant.
pub fn is_const(&self) -> bool {
match *self {
Val::I32(_) | Val::I64(_) | Val::F32(_) | Val::F64(_) => true,
_ => false,
}
}
/// Check whether the value is local with a particular index.
pub fn is_local_at_index(&self, index: u32) -> bool {
match *self {

8
winch/codegen/src/visitor.rs

@ -925,6 +925,14 @@ where
}
fn visit_call_indirect(&mut self, type_index: u32, table_index: u32, _: u8) {
// Spill now because `emit_lazy_init_funcref` and the `FnCall::emit`
// invocations will both trigger spills since they both call functions.
// However, the machine instructions for the spill emitted by
// `emit_lazy_funcref` will be jumped over if the funcref was previously
// initialized which may result in the machine stack becoming
// unbalanced.
self.context.spill(self.masm);
let type_index = TypeIndex::from_u32(type_index);
let table_index = TableIndex::from_u32(table_index);

42
winch/filetests/filetests/x64/call/reg_on_stack.wat

@ -0,0 +1,42 @@
;;! target = "x86_64"
(module
(func (export "") (param i32) (result i32)
local.get 0
i32.const 1
call 0
i32.const 1
call 0
br_if 0 (;@0;)
unreachable
)
)
;; 0: 55 push rbp
;; 1: 4889e5 mov rbp, rsp
;; 4: 4883ec10 sub rsp, 0x10
;; 8: 897c240c mov dword ptr [rsp + 0xc], edi
;; c: 4c893424 mov qword ptr [rsp], r14
;; 10: 448b5c240c mov r11d, dword ptr [rsp + 0xc]
;; 15: 4883ec04 sub rsp, 4
;; 19: 44891c24 mov dword ptr [rsp], r11d
;; 1d: 4883ec0c sub rsp, 0xc
;; 21: bf01000000 mov edi, 1
;; 26: e800000000 call 0x2b
;; 2b: 4883c40c add rsp, 0xc
;; 2f: 4883ec04 sub rsp, 4
;; 33: 890424 mov dword ptr [rsp], eax
;; 36: 4883ec08 sub rsp, 8
;; 3a: bf01000000 mov edi, 1
;; 3f: e800000000 call 0x44
;; 44: 4883c408 add rsp, 8
;; 48: 4883ec04 sub rsp, 4
;; 4c: 890424 mov dword ptr [rsp], eax
;; 4f: 8b0c24 mov ecx, dword ptr [rsp]
;; 52: 4883c404 add rsp, 4
;; 56: 8b0424 mov eax, dword ptr [rsp]
;; 59: 4883c404 add rsp, 4
;; 5d: 85c9 test ecx, ecx
;; 5f: 0f8502000000 jne 0x67
;; 65: 0f0b ud2
;; 67: 4883c410 add rsp, 0x10
;; 6b: 5d pop rbp
;; 6c: c3 ret

140
winch/filetests/filetests/x64/call_indirect/local_arg.wat

@ -0,0 +1,140 @@
;;! target="x86_64"
(module
(type $param-i32 (func (param i32)))
(func $param-i32 (type $param-i32))
(func (export "")
(local i32)
local.get 0
(call_indirect (type $param-i32) (i32.const 0))
)
(table funcref
(elem
$param-i32)
)
)
;; 0: 55 push rbp
;; 1: 4889e5 mov rbp, rsp
;; 4: 4883ec10 sub rsp, 0x10
;; 8: 897c240c mov dword ptr [rsp + 0xc], edi
;; c: 4c893424 mov qword ptr [rsp], r14
;; 10: 4883c410 add rsp, 0x10
;; 14: 5d pop rbp
;; 15: c3 ret
;;
;; 0: 55 push rbp
;; 1: 4889e5 mov rbp, rsp
;; 4: 4883ec10 sub rsp, 0x10
;; 8: 48c744240800000000
;; mov qword ptr [rsp + 8], 0
;; 11: 4c893424 mov qword ptr [rsp], r14
;; 15: b900000000 mov ecx, 0
;; 1a: 4c89f2 mov rdx, r14
;; 1d: 8b5a50 mov ebx, dword ptr [rdx + 0x50]
;; 20: 39d9 cmp ecx, ebx
;; 22: 0f8394000000 jae 0xbc
;; 28: 4189cb mov r11d, ecx
;; 2b: 4d6bdb08 imul r11, r11, 8
;; 2f: 488b5248 mov rdx, qword ptr [rdx + 0x48]
;; 33: 4889d6 mov rsi, rdx
;; 36: 4c01da add rdx, r11
;; 39: 39d9 cmp ecx, ebx
;; 3b: 480f43d6 cmovae rdx, rsi
;; 3f: 488b02 mov rax, qword ptr [rdx]
;; 42: 4885c0 test rax, rax
;; 45: 0f8536000000 jne 0x81
;; 4b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38]
;; 4f: 498b5b48 mov rbx, qword ptr [r11 + 0x48]
;; 53: 448b5c240c mov r11d, dword ptr [rsp + 0xc]
;; 58: 4883ec04 sub rsp, 4
;; 5c: 44891c24 mov dword ptr [rsp], r11d
;; 60: 4156 push r14
;; 62: 4883ec04 sub rsp, 4
;; 66: 890c24 mov dword ptr [rsp], ecx
;; 69: 488b7c2404 mov rdi, qword ptr [rsp + 4]
;; 6e: be00000000 mov esi, 0
;; 73: 8b1424 mov edx, dword ptr [rsp]
;; 76: ffd3 call rbx
;; 78: 4883c40c add rsp, 0xc
;; 7c: e904000000 jmp 0x85
;; 81: 4883e0fe and rax, 0xfffffffffffffffe
;; 85: 4885c0 test rax, rax
;; 88: 0f8430000000 je 0xbe
;; 8e: 4d8b5e40 mov r11, qword ptr [r14 + 0x40]
;; 92: 418b0b mov ecx, dword ptr [r11]
;; 95: 8b5018 mov edx, dword ptr [rax + 0x18]
;; 98: 39d1 cmp ecx, edx
;; 9a: 0f8520000000 jne 0xc0
;; a0: 488b4810 mov rcx, qword ptr [rax + 0x10]
;; a4: 4883ec0c sub rsp, 0xc
;; a8: 8b7c240c mov edi, dword ptr [rsp + 0xc]
;; ac: ffd1 call rcx
;; ae: 4883c40c add rsp, 0xc
;; b2: 4883c404 add rsp, 4
;; 0: 55 push rbp
;; 1: 4889e5 mov rbp, rsp
;; 4: 4883ec10 sub rsp, 0x10
;; 8: 897c240c mov dword ptr [rsp + 0xc], edi
;; c: 4c893424 mov qword ptr [rsp], r14
;; 10: 4883c410 add rsp, 0x10
;; 14: 5d pop rbp
;; 15: c3 ret
;;
;; 0: 55 push rbp
;; 1: 4889e5 mov rbp, rsp
;; 4: 4883ec10 sub rsp, 0x10
;; 8: 48c744240800000000
;; mov qword ptr [rsp + 8], 0
;; 11: 4c893424 mov qword ptr [rsp], r14
;; 15: 448b5c240c mov r11d, dword ptr [rsp + 0xc]
;; 1a: 4883ec04 sub rsp, 4
;; 1e: 44891c24 mov dword ptr [rsp], r11d
;; 22: b900000000 mov ecx, 0
;; 27: 4c89f2 mov rdx, r14
;; 2a: 8b5a50 mov ebx, dword ptr [rdx + 0x50]
;; 2d: 39d9 cmp ecx, ebx
;; 2f: 0f8387000000 jae 0xbc
;; 35: 4189cb mov r11d, ecx
;; 38: 4d6bdb08 imul r11, r11, 8
;; 3c: 488b5248 mov rdx, qword ptr [rdx + 0x48]
;; 40: 4889d6 mov rsi, rdx
;; 43: 4c01da add rdx, r11
;; 46: 39d9 cmp ecx, ebx
;; 48: 480f43d6 cmovae rdx, rsi
;; 4c: 488b02 mov rax, qword ptr [rdx]
;; 4f: 4885c0 test rax, rax
;; 52: 0f8529000000 jne 0x81
;; 58: 4d8b5e38 mov r11, qword ptr [r14 + 0x38]
;; 5c: 498b5b48 mov rbx, qword ptr [r11 + 0x48]
;; 60: 4156 push r14
;; 62: 4883ec04 sub rsp, 4
;; 66: 890c24 mov dword ptr [rsp], ecx
;; 69: 488b7c2404 mov rdi, qword ptr [rsp + 4]
;; 6e: be00000000 mov esi, 0
;; 73: 8b1424 mov edx, dword ptr [rsp]
;; 76: ffd3 call rbx
;; 78: 4883c40c add rsp, 0xc
;; 7c: e904000000 jmp 0x85
;; 81: 4883e0fe and rax, 0xfffffffffffffffe
;; 85: 4885c0 test rax, rax
;; 88: 0f8430000000 je 0xbe
;; 8e: 4d8b5e40 mov r11, qword ptr [r14 + 0x40]
;; 92: 418b0b mov ecx, dword ptr [r11]
;; 95: 8b5018 mov edx, dword ptr [rax + 0x18]
;; 98: 39d1 cmp ecx, edx
;; 9a: 0f8520000000 jne 0xc0
;; a0: 488b4810 mov rcx, qword ptr [rax + 0x10]
;; a4: 4883ec0c sub rsp, 0xc
;; a8: 8b7c240c mov edi, dword ptr [rsp + 0xc]
;; ac: ffd1 call rcx
;; ae: 4883c40c add rsp, 0xc
;; b2: 4883c404 add rsp, 4
;; b6: 4883c410 add rsp, 0x10
;; ba: 5d pop rbp
;; bb: c3 ret
;; bc: 0f0b ud2
;; be: 0f0b ud2
;; c0: 0f0b ud2

16
winch/filetests/filetests/x64/f32_ceil/f32_ceil_param.wat

@ -11,8 +11,14 @@
;; 4: 4883ec10 sub rsp, 0x10
;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0
;; e: 4c893424 mov qword ptr [rsp], r14
;; 12: f30f1044240c movss xmm0, dword ptr [rsp + 0xc]
;; 18: e800000000 call 0x1d
;; 1d: 4883c410 add rsp, 0x10
;; 21: 5d pop rbp
;; 22: c3 ret
;; 12: f3440f107c240c movss xmm15, dword ptr [rsp + 0xc]
;; 19: 4883ec04 sub rsp, 4
;; 1d: f3440f113c24 movss dword ptr [rsp], xmm15
;; 23: 4883ec0c sub rsp, 0xc
;; 27: f30f1044240c movss xmm0, dword ptr [rsp + 0xc]
;; 2d: e800000000 call 0x32
;; 32: 4883c40c add rsp, 0xc
;; 36: 4883c404 add rsp, 4
;; 3a: 4883c410 add rsp, 0x10
;; 3e: 5d pop rbp
;; 3f: c3 ret

16
winch/filetests/filetests/x64/f32_floor/f32_floor_param.wat

@ -11,8 +11,14 @@
;; 4: 4883ec10 sub rsp, 0x10
;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0
;; e: 4c893424 mov qword ptr [rsp], r14
;; 12: f30f1044240c movss xmm0, dword ptr [rsp + 0xc]
;; 18: e800000000 call 0x1d
;; 1d: 4883c410 add rsp, 0x10
;; 21: 5d pop rbp
;; 22: c3 ret
;; 12: f3440f107c240c movss xmm15, dword ptr [rsp + 0xc]
;; 19: 4883ec04 sub rsp, 4
;; 1d: f3440f113c24 movss dword ptr [rsp], xmm15
;; 23: 4883ec0c sub rsp, 0xc
;; 27: f30f1044240c movss xmm0, dword ptr [rsp + 0xc]
;; 2d: e800000000 call 0x32
;; 32: 4883c40c add rsp, 0xc
;; 36: 4883c404 add rsp, 4
;; 3a: 4883c410 add rsp, 0x10
;; 3e: 5d pop rbp
;; 3f: c3 ret

16
winch/filetests/filetests/x64/f32_nearest/f32_nearest_param.wat

@ -11,8 +11,14 @@
;; 4: 4883ec10 sub rsp, 0x10
;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0
;; e: 4c893424 mov qword ptr [rsp], r14
;; 12: f30f1044240c movss xmm0, dword ptr [rsp + 0xc]
;; 18: e800000000 call 0x1d
;; 1d: 4883c410 add rsp, 0x10
;; 21: 5d pop rbp
;; 22: c3 ret
;; 12: f3440f107c240c movss xmm15, dword ptr [rsp + 0xc]
;; 19: 4883ec04 sub rsp, 4
;; 1d: f3440f113c24 movss dword ptr [rsp], xmm15
;; 23: 4883ec0c sub rsp, 0xc
;; 27: f30f1044240c movss xmm0, dword ptr [rsp + 0xc]
;; 2d: e800000000 call 0x32
;; 32: 4883c40c add rsp, 0xc
;; 36: 4883c404 add rsp, 4
;; 3a: 4883c410 add rsp, 0x10
;; 3e: 5d pop rbp
;; 3f: c3 ret

16
winch/filetests/filetests/x64/f32_trunc/f32_trunc_param.wat

@ -11,8 +11,14 @@
;; 4: 4883ec10 sub rsp, 0x10
;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0
;; e: 4c893424 mov qword ptr [rsp], r14
;; 12: f30f1044240c movss xmm0, dword ptr [rsp + 0xc]
;; 18: e800000000 call 0x1d
;; 1d: 4883c410 add rsp, 0x10
;; 21: 5d pop rbp
;; 22: c3 ret
;; 12: f3440f107c240c movss xmm15, dword ptr [rsp + 0xc]
;; 19: 4883ec04 sub rsp, 4
;; 1d: f3440f113c24 movss dword ptr [rsp], xmm15
;; 23: 4883ec0c sub rsp, 0xc
;; 27: f30f1044240c movss xmm0, dword ptr [rsp + 0xc]
;; 2d: e800000000 call 0x32
;; 32: 4883c40c add rsp, 0xc
;; 36: 4883c404 add rsp, 4
;; 3a: 4883c410 add rsp, 0x10
;; 3e: 5d pop rbp
;; 3f: c3 ret

16
winch/filetests/filetests/x64/f64_ceil/f64_ceil_param.wat

@ -11,8 +11,14 @@
;; 4: 4883ec10 sub rsp, 0x10
;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0
;; e: 4c893424 mov qword ptr [rsp], r14
;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8]
;; 18: e800000000 call 0x1d
;; 1d: 4883c410 add rsp, 0x10
;; 21: 5d pop rbp
;; 22: c3 ret
;; 12: f2440f107c2408 movsd xmm15, qword ptr [rsp + 8]
;; 19: 4883ec08 sub rsp, 8
;; 1d: f2440f113c24 movsd qword ptr [rsp], xmm15
;; 23: 4883ec08 sub rsp, 8
;; 27: f20f10442408 movsd xmm0, qword ptr [rsp + 8]
;; 2d: e800000000 call 0x32
;; 32: 4883c408 add rsp, 8
;; 36: 4883c408 add rsp, 8
;; 3a: 4883c410 add rsp, 0x10
;; 3e: 5d pop rbp
;; 3f: c3 ret

16
winch/filetests/filetests/x64/f64_floor/f64_floor_param.wat

@ -11,8 +11,14 @@
;; 4: 4883ec10 sub rsp, 0x10
;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0
;; e: 4c893424 mov qword ptr [rsp], r14
;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8]
;; 18: e800000000 call 0x1d
;; 1d: 4883c410 add rsp, 0x10
;; 21: 5d pop rbp
;; 22: c3 ret
;; 12: f2440f107c2408 movsd xmm15, qword ptr [rsp + 8]
;; 19: 4883ec08 sub rsp, 8
;; 1d: f2440f113c24 movsd qword ptr [rsp], xmm15
;; 23: 4883ec08 sub rsp, 8
;; 27: f20f10442408 movsd xmm0, qword ptr [rsp + 8]
;; 2d: e800000000 call 0x32
;; 32: 4883c408 add rsp, 8
;; 36: 4883c408 add rsp, 8
;; 3a: 4883c410 add rsp, 0x10
;; 3e: 5d pop rbp
;; 3f: c3 ret

16
winch/filetests/filetests/x64/f64_nearest/f64_nearest_param.wat

@ -11,8 +11,14 @@
;; 4: 4883ec10 sub rsp, 0x10
;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0
;; e: 4c893424 mov qword ptr [rsp], r14
;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8]
;; 18: e800000000 call 0x1d
;; 1d: 4883c410 add rsp, 0x10
;; 21: 5d pop rbp
;; 22: c3 ret
;; 12: f2440f107c2408 movsd xmm15, qword ptr [rsp + 8]
;; 19: 4883ec08 sub rsp, 8
;; 1d: f2440f113c24 movsd qword ptr [rsp], xmm15
;; 23: 4883ec08 sub rsp, 8
;; 27: f20f10442408 movsd xmm0, qword ptr [rsp + 8]
;; 2d: e800000000 call 0x32
;; 32: 4883c408 add rsp, 8
;; 36: 4883c408 add rsp, 8
;; 3a: 4883c410 add rsp, 0x10
;; 3e: 5d pop rbp
;; 3f: c3 ret

16
winch/filetests/filetests/x64/f64_trunc/f64_trunc_param.wat

@ -11,8 +11,14 @@
;; 4: 4883ec10 sub rsp, 0x10
;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0
;; e: 4c893424 mov qword ptr [rsp], r14
;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8]
;; 18: e800000000 call 0x1d
;; 1d: 4883c410 add rsp, 0x10
;; 21: 5d pop rbp
;; 22: c3 ret
;; 12: f2440f107c2408 movsd xmm15, qword ptr [rsp + 8]
;; 19: 4883ec08 sub rsp, 8
;; 1d: f2440f113c24 movsd qword ptr [rsp], xmm15
;; 23: 4883ec08 sub rsp, 8
;; 27: f20f10442408 movsd xmm0, qword ptr [rsp + 8]
;; 2d: e800000000 call 0x32
;; 32: 4883c408 add rsp, 8
;; 36: 4883c408 add rsp, 8
;; 3a: 4883c410 add rsp, 0x10
;; 3e: 5d pop rbp
;; 3f: c3 ret

36
winch/filetests/filetests/x64/table/fill.wat

@ -57,7 +57,7 @@
;; 2d: 4c89f2 mov rdx, r14
;; 30: 8b5a50 mov ebx, dword ptr [rdx + 0x50]
;; 33: 39d9 cmp ecx, ebx
;; 35: 0f8394000000 jae 0xcf
;; 35: 0f83b5000000 jae 0xf0
;; 3b: 4189cb mov r11d, ecx
;; 3e: 4d6bdb08 imul r11, r11, 8
;; 42: 488b5248 mov rdx, qword ptr [rdx + 0x48]
@ -86,16 +86,24 @@
;; 99: 4d8b5e38 mov r11, qword ptr [r14 + 0x38]
;; 9d: 498b4368 mov rax, qword ptr [r11 + 0x68]
;; a1: 4156 push r14
;; a3: 4883ec08 sub rsp, 8
;; a7: 488b7c2408 mov rdi, qword ptr [rsp + 8]
;; ac: be01000000 mov esi, 1
;; b1: 8b54242c mov edx, dword ptr [rsp + 0x2c]
;; b5: 488b4c241c mov rcx, qword ptr [rsp + 0x1c]
;; ba: 448b442424 mov r8d, dword ptr [rsp + 0x24]
;; bf: ffd0 call rax
;; c1: 4883c408 add rsp, 8
;; c5: 4883c408 add rsp, 8
;; c9: 4883c420 add rsp, 0x20
;; cd: 5d pop rbp
;; ce: c3 ret
;; cf: 0f0b ud2
;; a3: 448b5c2424 mov r11d, dword ptr [rsp + 0x24]
;; a8: 4883ec04 sub rsp, 4
;; ac: 44891c24 mov dword ptr [rsp], r11d
;; b0: 4c8b5c2418 mov r11, qword ptr [rsp + 0x18]
;; b5: 4153 push r11
;; b7: 448b5c2428 mov r11d, dword ptr [rsp + 0x28]
;; bc: 4883ec04 sub rsp, 4
;; c0: 44891c24 mov dword ptr [rsp], r11d
;; c4: 4883ec08 sub rsp, 8
;; c8: 488b7c2418 mov rdi, qword ptr [rsp + 0x18]
;; cd: be01000000 mov esi, 1
;; d2: 8b542414 mov edx, dword ptr [rsp + 0x14]
;; d6: 488b4c240c mov rcx, qword ptr [rsp + 0xc]
;; db: 448b442408 mov r8d, dword ptr [rsp + 8]
;; e0: ffd0 call rax
;; e2: 4883c408 add rsp, 8
;; e6: 4883c418 add rsp, 0x18
;; ea: 4883c420 add rsp, 0x20
;; ee: 5d pop rbp
;; ef: c3 ret
;; f0: 0f0b ud2

22
winch/filetests/filetests/x64/table/grow.wat

@ -17,14 +17,14 @@
;; 11: 4d8b5e38 mov r11, qword ptr [r14 + 0x38]
;; 15: 498b5b50 mov rbx, qword ptr [r11 + 0x50]
;; 19: 4156 push r14
;; 1b: 4883ec08 sub rsp, 8
;; 1f: 488b7c2408 mov rdi, qword ptr [rsp + 8]
;; 24: be00000000 mov esi, 0
;; 29: ba0a000000 mov edx, 0xa
;; 2e: 488b4c2418 mov rcx, qword ptr [rsp + 0x18]
;; 33: ffd3 call rbx
;; 35: 4883c408 add rsp, 8
;; 39: 4883c408 add rsp, 8
;; 3d: 4883c410 add rsp, 0x10
;; 41: 5d pop rbp
;; 42: c3 ret
;; 1b: 4c8b5c2410 mov r11, qword ptr [rsp + 0x10]
;; 20: 4153 push r11
;; 22: 488b7c2408 mov rdi, qword ptr [rsp + 8]
;; 27: be00000000 mov esi, 0
;; 2c: ba0a000000 mov edx, 0xa
;; 31: 488b0c24 mov rcx, qword ptr [rsp]
;; 35: ffd3 call rbx
;; 37: 4883c410 add rsp, 0x10
;; 3b: 4883c410 add rsp, 0x10
;; 3f: 5d pop rbp
;; 40: c3 ret

96
winch/filetests/filetests/x64/table/init_copy_drop.wat

@ -182,49 +182,53 @@
;; 4: 4883ec10 sub rsp, 0x10
;; 8: 897c240c mov dword ptr [rsp + 0xc], edi
;; c: 4c893424 mov qword ptr [rsp], r14
;; 10: 8b4c240c mov ecx, dword ptr [rsp + 0xc]
;; 14: 4c89f2 mov rdx, r14
;; 17: 8b9af0000000 mov ebx, dword ptr [rdx + 0xf0]
;; 1d: 39d9 cmp ecx, ebx
;; 1f: 0f8385000000 jae 0xaa
;; 25: 4189cb mov r11d, ecx
;; 28: 4d6bdb08 imul r11, r11, 8
;; 2c: 488b92e8000000 mov rdx, qword ptr [rdx + 0xe8]
;; 33: 4889d6 mov rsi, rdx
;; 36: 4c01da add rdx, r11
;; 39: 39d9 cmp ecx, ebx
;; 3b: 480f43d6 cmovae rdx, rsi
;; 3f: 488b02 mov rax, qword ptr [rdx]
;; 42: 4885c0 test rax, rax
;; 45: 0f8532000000 jne 0x7d
;; 4b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38]
;; 4f: 498b5b48 mov rbx, qword ptr [r11 + 0x48]
;; 53: 4156 push r14
;; 55: 4883ec04 sub rsp, 4
;; 59: 890c24 mov dword ptr [rsp], ecx
;; 5c: 4883ec04 sub rsp, 4
;; 60: 488b7c2408 mov rdi, qword ptr [rsp + 8]
;; 65: be00000000 mov esi, 0
;; 6a: 8b542404 mov edx, dword ptr [rsp + 4]
;; 6e: ffd3 call rbx
;; 70: 4883c404 add rsp, 4
;; 74: 4883c40c add rsp, 0xc
;; 78: e904000000 jmp 0x81
;; 7d: 4883e0fe and rax, 0xfffffffffffffffe
;; 81: 4885c0 test rax, rax
;; 84: 0f8422000000 je 0xac
;; 8a: 4d8b5e40 mov r11, qword ptr [r14 + 0x40]
;; 8e: 418b0b mov ecx, dword ptr [r11]
;; 91: 8b5018 mov edx, dword ptr [rax + 0x18]
;; 94: 39d1 cmp ecx, edx
;; 96: 0f8512000000 jne 0xae
;; 9c: 50 push rax
;; 9d: 59 pop rcx
;; 9e: 488b5110 mov rdx, qword ptr [rcx + 0x10]
;; a2: ffd2 call rdx
;; a4: 4883c410 add rsp, 0x10
;; a8: 5d pop rbp
;; a9: c3 ret
;; aa: 0f0b ud2
;; ac: 0f0b ud2
;; ae: 0f0b ud2
;; 10: 448b5c240c mov r11d, dword ptr [rsp + 0xc]
;; 15: 4883ec04 sub rsp, 4
;; 19: 44891c24 mov dword ptr [rsp], r11d
;; 1d: 8b0c24 mov ecx, dword ptr [rsp]
;; 20: 4883c404 add rsp, 4
;; 24: 4c89f2 mov rdx, r14
;; 27: 8b9af0000000 mov ebx, dword ptr [rdx + 0xf0]
;; 2d: 39d9 cmp ecx, ebx
;; 2f: 0f8385000000 jae 0xba
;; 35: 4189cb mov r11d, ecx
;; 38: 4d6bdb08 imul r11, r11, 8
;; 3c: 488b92e8000000 mov rdx, qword ptr [rdx + 0xe8]
;; 43: 4889d6 mov rsi, rdx
;; 46: 4c01da add rdx, r11
;; 49: 39d9 cmp ecx, ebx
;; 4b: 480f43d6 cmovae rdx, rsi
;; 4f: 488b02 mov rax, qword ptr [rdx]
;; 52: 4885c0 test rax, rax
;; 55: 0f8532000000 jne 0x8d
;; 5b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38]
;; 5f: 498b5b48 mov rbx, qword ptr [r11 + 0x48]
;; 63: 4156 push r14
;; 65: 4883ec04 sub rsp, 4
;; 69: 890c24 mov dword ptr [rsp], ecx
;; 6c: 4883ec04 sub rsp, 4
;; 70: 488b7c2408 mov rdi, qword ptr [rsp + 8]
;; 75: be00000000 mov esi, 0
;; 7a: 8b542404 mov edx, dword ptr [rsp + 4]
;; 7e: ffd3 call rbx
;; 80: 4883c404 add rsp, 4
;; 84: 4883c40c add rsp, 0xc
;; 88: e904000000 jmp 0x91
;; 8d: 4883e0fe and rax, 0xfffffffffffffffe
;; 91: 4885c0 test rax, rax
;; 94: 0f8422000000 je 0xbc
;; 9a: 4d8b5e40 mov r11, qword ptr [r14 + 0x40]
;; 9e: 418b0b mov ecx, dword ptr [r11]
;; a1: 8b5018 mov edx, dword ptr [rax + 0x18]
;; a4: 39d1 cmp ecx, edx
;; a6: 0f8512000000 jne 0xbe
;; ac: 50 push rax
;; ad: 59 pop rcx
;; ae: 488b5110 mov rdx, qword ptr [rcx + 0x10]
;; b2: ffd2 call rdx
;; b4: 4883c410 add rsp, 0x10
;; b8: 5d pop rbp
;; b9: c3 ret
;; ba: 0f0b ud2
;; bc: 0f0b ud2
;; be: 0f0b ud2

68
winch/filetests/filetests/x64/table/set.wat

@ -58,7 +58,7 @@
;; 18: 4c89f2 mov rdx, r14
;; 1b: 8b5a50 mov ebx, dword ptr [rdx + 0x50]
;; 1e: 39d9 cmp ecx, ebx
;; 20: 0f838f000000 jae 0xb5
;; 20: 0f8396000000 jae 0xbc
;; 26: 4189cb mov r11d, ecx
;; 29: 4d6bdb08 imul r11, r11, 8
;; 2d: 488b5248 mov rdx, qword ptr [rdx + 0x48]
@ -68,37 +68,39 @@
;; 39: 480f43d6 cmovae rdx, rsi
;; 3d: 488b02 mov rax, qword ptr [rdx]
;; 40: 4885c0 test rax, rax
;; 43: 0f8532000000 jne 0x7b
;; 43: 0f8536000000 jne 0x7f
;; 49: 4d8b5e38 mov r11, qword ptr [r14 + 0x38]
;; 4d: 498b5b48 mov rbx, qword ptr [r11 + 0x48]
;; 51: 4156 push r14
;; 53: 4883ec04 sub rsp, 4
;; 57: 890c24 mov dword ptr [rsp], ecx
;; 5a: 4883ec04 sub rsp, 4
;; 5e: 488b7c2408 mov rdi, qword ptr [rsp + 8]
;; 63: be00000000 mov esi, 0
;; 68: 8b542404 mov edx, dword ptr [rsp + 4]
;; 6c: ffd3 call rbx
;; 6e: 4883c404 add rsp, 4
;; 72: 4883c40c add rsp, 0xc
;; 76: e904000000 jmp 0x7f
;; 7b: 4883e0fe and rax, 0xfffffffffffffffe
;; 7f: 8b4c240c mov ecx, dword ptr [rsp + 0xc]
;; 83: 4c89f2 mov rdx, r14
;; 86: 8b5a50 mov ebx, dword ptr [rdx + 0x50]
;; 89: 39d9 cmp ecx, ebx
;; 8b: 0f8326000000 jae 0xb7
;; 91: 4189cb mov r11d, ecx
;; 94: 4d6bdb08 imul r11, r11, 8
;; 98: 488b5248 mov rdx, qword ptr [rdx + 0x48]
;; 9c: 4889d6 mov rsi, rdx
;; 9f: 4c01da add rdx, r11
;; a2: 39d9 cmp ecx, ebx
;; a4: 480f43d6 cmovae rdx, rsi
;; a8: 4883c801 or rax, 1
;; ac: 488902 mov qword ptr [rdx], rax
;; af: 4883c410 add rsp, 0x10
;; b3: 5d pop rbp
;; b4: c3 ret
;; b5: 0f0b ud2
;; b7: 0f0b ud2
;; 51: 448b5c240c mov r11d, dword ptr [rsp + 0xc]
;; 56: 4883ec04 sub rsp, 4
;; 5a: 44891c24 mov dword ptr [rsp], r11d
;; 5e: 4156 push r14
;; 60: 4883ec04 sub rsp, 4
;; 64: 890c24 mov dword ptr [rsp], ecx
;; 67: 488b7c2404 mov rdi, qword ptr [rsp + 4]
;; 6c: be00000000 mov esi, 0
;; 71: 8b1424 mov edx, dword ptr [rsp]
;; 74: ffd3 call rbx
;; 76: 4883c40c add rsp, 0xc
;; 7a: e904000000 jmp 0x83
;; 7f: 4883e0fe and rax, 0xfffffffffffffffe
;; 83: 8b0c24 mov ecx, dword ptr [rsp]
;; 86: 4883c404 add rsp, 4
;; 8a: 4c89f2 mov rdx, r14
;; 8d: 8b5a50 mov ebx, dword ptr [rdx + 0x50]
;; 90: 39d9 cmp ecx, ebx
;; 92: 0f8326000000 jae 0xbe
;; 98: 4189cb mov r11d, ecx
;; 9b: 4d6bdb08 imul r11, r11, 8
;; 9f: 488b5248 mov rdx, qword ptr [rdx + 0x48]
;; a3: 4889d6 mov rsi, rdx
;; a6: 4c01da add rdx, r11
;; a9: 39d9 cmp ecx, ebx
;; ab: 480f43d6 cmovae rdx, rsi
;; af: 4883c801 or rax, 1
;; b3: 488902 mov qword ptr [rdx], rax
;; b6: 4883c410 add rsp, 0x10
;; ba: 5d pop rbp
;; bb: c3 ret
;; bc: 0f0b ud2
;; be: 0f0b ud2

Loading…
Cancel
Save