Browse Source

winch: Optimize calls (#7927)

* winch: Optimize calls

This commit introduces several optimizations to speed up the compilation
of function calls:

* Keep track of previously resolved function signatures for local or
  imported callees to avoid computing the `ABISig` on every
  function call.
* Keep track of previously resolved type signatures for indirect calls
  to avoid computing the `ABISig` on every function call.
* Refactor `CallKnown` and `CallUnknown` instructions to make the
  `BoxCallInfo` field in the struct optional. Prior to this change,
  from Winch's perspective each call lowering involved a heap
  allocation, using the default values for `BoxCallInfo`, which in the
  end are not used by Winch.
* Switch Winch's internal `Stack` to use a `SmallVec` rather than
  a `Vec`. Many of the operations involving builtin function calls
  require inserting elements at arbitrary offsets in the stack and
  using a `SmallVec` makes this process more efficient.

With the changes mentioned above, I observed ~30% improvement in
compilation times for modules that are call-heavy.

* Expect `CallInfo` where applicable and add a comment about the type
definition

* Remove unneeded types and lifetimes
pull/7938/head
Saúl Cabrera 9 months ago
committed by GitHub
parent
commit
1b5c4ae89b
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 6
      cranelift/codegen/src/isa/x64/abi.rs
  2. 2
      cranelift/codegen/src/isa/x64/inst.isle
  3. 32
      cranelift/codegen/src/isa/x64/inst/emit.rs
  4. 20
      cranelift/codegen/src/isa/x64/inst/mod.rs
  5. 6
      cranelift/codegen/src/isa/x64/lower/isle.rs
  6. 75
      winch/codegen/src/codegen/call.rs
  7. 9
      winch/codegen/src/codegen/context.rs
  8. 93
      winch/codegen/src/codegen/env.rs
  9. 14
      winch/codegen/src/codegen/mod.rs
  10. 6
      winch/codegen/src/isa/aarch64/masm.rs
  11. 4
      winch/codegen/src/isa/aarch64/mod.rs
  12. 35
      winch/codegen/src/isa/x64/asm.rs
  13. 42
      winch/codegen/src/isa/x64/masm.rs
  14. 4
      winch/codegen/src/isa/x64/mod.rs
  15. 11
      winch/codegen/src/masm.rs
  16. 2
      winch/codegen/src/regalloc.rs
  17. 20
      winch/codegen/src/stack.rs
  18. 203
      winch/codegen/src/visitor.rs

6
cranelift/codegen/src/isa/x64/abi.rs

@ -506,17 +506,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Writable::from_reg(regs::rax()),
));
insts.push(Inst::CallKnown {
opcode: Opcode::Call,
dest: ExternalName::LibCall(LibCall::Probestack),
info: Box::new(CallInfo {
info: Some(Box::new(CallInfo {
// No need to include arg here: we are post-regalloc
// so no constraints will be seen anyway.
uses: smallvec![],
defs: smallvec![],
clobbers: PRegSet::empty(),
opcode: Opcode::Call,
callee_pop_size: 0,
callee_conv: CallConv::Probestack,
}),
})),
});
}

2
cranelift/codegen/src/isa/x64/inst.isle

@ -525,10 +525,12 @@
;; Direct call: call simm32.
(CallKnown (dest ExternalName)
(opcode Opcode)
(info BoxCallInfo))
;; Indirect call: callq (reg mem)
(CallUnknown (dest RegMem)
(opcode Opcode)
(info BoxCallInfo))
;; Tail call to a direct destination.

32
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -1609,11 +1609,7 @@ pub(crate) fn emit(
inst.emit(&[], sink, info, state);
}
Inst::CallKnown {
dest,
info: call_info,
..
} => {
Inst::CallKnown { dest, opcode, info } => {
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s);
}
@ -1622,12 +1618,14 @@ pub(crate) fn emit(
// beginning of the immediate field.
emit_reloc(sink, Reloc::X86CallPCRel4, &dest, -4);
sink.put4(0);
if call_info.opcode.is_call() {
sink.add_call_site(call_info.opcode);
if opcode.is_call() {
sink.add_call_site(*opcode);
}
let callee_pop_size = i64::from(call_info.callee_pop_size);
state.adjust_virtual_sp_offset(-callee_pop_size);
if let Some(call_info) = info {
let callee_pop_size = i64::from(call_info.callee_pop_size);
state.adjust_virtual_sp_offset(-callee_pop_size);
}
}
Inst::ReturnCallKnown {
@ -1683,11 +1681,7 @@ pub(crate) fn emit(
sink.add_call_site(ir::Opcode::ReturnCallIndirect);
}
Inst::CallUnknown {
dest,
info: call_info,
..
} => {
Inst::CallUnknown { dest, opcode, info } => {
let dest = dest.with_allocs(allocs);
let start_offset = sink.cur_offset();
@ -1722,12 +1716,14 @@ pub(crate) fn emit(
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s);
}
if call_info.opcode.is_call() {
sink.add_call_site(call_info.opcode);
if opcode.is_call() {
sink.add_call_site(*opcode);
}
let callee_pop_size = i64::from(call_info.callee_pop_size);
state.adjust_virtual_sp_offset(-callee_pop_size);
if let Some(call_info) = info {
let callee_pop_size = i64::from(call_info.callee_pop_size);
state.adjust_virtual_sp_offset(-callee_pop_size);
}
}
Inst::Args { .. } => {}

20
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -41,8 +41,6 @@ pub struct CallInfo {
pub defs: CallRetList,
/// Registers clobbered by this call, as per its calling convention.
pub clobbers: PRegSet,
/// The opcode of this call.
pub opcode: Opcode,
/// The number of bytes that the callee will pop from the stack for the
/// caller, if any. (Used for popping stack arguments with the `tail`
/// calling convention.)
@ -556,14 +554,14 @@ impl Inst {
) -> Inst {
Inst::CallKnown {
dest,
info: Box::new(CallInfo {
opcode,
info: Some(Box::new(CallInfo {
uses,
defs,
clobbers,
opcode,
callee_pop_size,
callee_conv,
}),
})),
}
}
@ -579,14 +577,14 @@ impl Inst {
dest.assert_regclass_is(RegClass::Int);
Inst::CallUnknown {
dest,
info: Box::new(CallInfo {
opcode,
info: Some(Box::new(CallInfo {
uses,
defs,
clobbers,
opcode,
callee_pop_size,
callee_conv,
}),
})),
}
}
@ -2348,11 +2346,12 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_early_def(*tmp);
}
Inst::CallKnown { dest, ref info, .. } => {
Inst::CallKnown { dest, info, .. } => {
// Probestack is special and is only inserted after
// regalloc, so we do not need to represent its ABI to the
// register allocator. Assert that we don't alter that
// arrangement.
let info = info.as_ref().expect("CallInfo is expected in this path");
debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
for u in &info.uses {
collector.reg_fixed_use(u.vreg, u.preg);
@ -2363,7 +2362,8 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_clobbers(info.clobbers);
}
Inst::CallUnknown { ref info, dest, .. } => {
Inst::CallUnknown { info, dest, .. } => {
let info = info.as_ref().expect("CallInfo is expected in this path");
match dest {
RegMem::Reg { reg } if info.callee_conv == CallConv::Tail => {
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):

6
cranelift/codegen/src/isa/x64/lower/isle.rs

@ -33,7 +33,11 @@ use regalloc2::PReg;
use std::boxed::Box;
use std::convert::TryFrom;
type BoxCallInfo = Box<CallInfo>;
/// Type representing out-of-line data for calls. This type optional because the
/// call instruction is also used by Winch to emit calls, but the
/// `Box<CallInfo>` field is not used, it's only used by Cranelift. By making it
/// optional, we reduce the number of heap allocations in Winch.
type BoxCallInfo = Option<Box<CallInfo>>;
type BoxReturnCallInfo = Box<ReturnCallInfo>;
type VecArgPair = Vec<ArgPair>;

75
winch/codegen/src/codegen/call.rs

@ -65,11 +65,8 @@ use crate::{
masm::{CalleeKind, MacroAssembler, MemMoveDirection, OperandSize, SPOffset},
reg::Reg,
stack::Val,
CallingConvention,
};
use smallvec::SmallVec;
use std::borrow::Cow;
use wasmtime_environ::{PtrSize, VMOffsets, WasmValType};
use wasmtime_environ::{PtrSize, VMOffsets};
/// All the information needed to emit a function call.
#[derive(Copy, Clone)]
@ -83,23 +80,19 @@ impl FnCall {
/// 4. Creates the stack space needed for the return area.
/// 5. Emits the call.
/// 6. Cleans up the stack space.
pub fn emit<M: MacroAssembler, P: PtrSize, R>(
pub fn emit<'a, M: MacroAssembler, P: PtrSize>(
masm: &mut M,
context: &mut CodeGenContext,
mut resolve: R,
) where
R: FnMut(&mut CodeGenContext) -> Callee,
{
let callee = resolve(context);
let ptr_type = ptr_type_from_ptr_size(context.vmoffsets.ptr.size());
let mut sig = Self::get_sig::<M>(&callee, ptr_type);
let kind = Self::map(&context.vmoffsets, &callee, sig.as_ref(), context, masm);
callee: Callee<'a>,
) {
let sig = callee.sig();
let kind = Self::map(&context.vmoffsets, &callee, sig, context, masm);
context.spill(masm);
let ret_area = Self::make_ret_area(&sig, masm);
let arg_stack_space = sig.params_stack_size();
let reserved_stack = masm.call(arg_stack_space, |masm| {
Self::assign(sig.as_ref(), ret_area.as_ref(), context, masm);
Self::assign(sig, ret_area.as_ref(), context, masm);
kind
});
@ -108,7 +101,7 @@ impl FnCall {
_ => {}
}
Self::cleanup(&mut sig, reserved_stack, ret_area, masm, context);
Self::cleanup(sig, reserved_stack, ret_area, masm, context);
}
/// Calculates the return area for the callee, if any.
@ -123,30 +116,6 @@ impl FnCall {
})
}
/// Derive the [`ABISig`] for a particular [`Callee`].
fn get_sig<M: MacroAssembler>(callee: &Callee, ptr_type: WasmValType) -> Cow<'_, ABISig> {
match callee {
Callee::Builtin(info) => Cow::Borrowed(info.sig()),
Callee::Import(info) => {
let mut params: SmallVec<[WasmValType; 6]> =
SmallVec::with_capacity(info.ty.params().len() + 2);
params.extend_from_slice(&[ptr_type, ptr_type]);
params.extend_from_slice(info.ty.params());
Cow::Owned(<M::ABI as ABI>::sig_from(
&params,
info.ty.returns(),
&CallingConvention::Default,
))
}
Callee::Local(info) => {
Cow::Owned(<M::ABI as ABI>::sig(&info.ty, &CallingConvention::Default))
}
Callee::FuncRef(ty) => {
Cow::Owned(<M::ABI as ABI>::sig(&ty, &CallingConvention::Default))
}
}
}
/// Maps the given [`Callee`] to a [`CalleeKind`].
fn map<P: PtrSize, M: MacroAssembler>(
vmoffsets: &VMOffsets<P>,
@ -221,7 +190,7 @@ impl FnCall {
let location = stack.len().checked_sub(sig.params.len() - 2).unwrap_or(0);
context.stack.insert_many(
location,
[
&[
TypedReg::new(ptr_type, callee_vmctx).into(),
TypedReg::new(ptr_type, caller_vmctx).into(),
],
@ -303,7 +272,7 @@ impl FnCall {
/// Cleanup stack space, handle multiple results, and free registers after
/// emitting the call.
fn cleanup<M: MacroAssembler>(
sig: &mut Cow<'_, ABISig>,
sig: &ABISig,
reserved_space: u32,
ret_area: Option<RetArea>,
masm: &mut M,
@ -340,22 +309,22 @@ impl FnCall {
// Free the bytes consumed by the call.
masm.free_stack(stack_consumed);
if let Some(area) = ret_area {
debug_assert!(!area.is_uninit());
let ret_area = ret_area.map(|area| {
if stack_consumed > 0 {
sig.to_mut()
.results
.set_ret_area(RetArea::sp(masm.sp_offset()));
// If there's a return area and stack space was consumed by the
// call, adjust the return area to be to the current stack
// pointer offset.
RetArea::sp(masm.sp_offset())
} else {
// If theres a return area, and no memory was adjusted
// (memmoved), the offsets should be equal.
// Else if no stack space was consumed by the call, simply use
// the previously calculated area.
debug_assert_eq!(area.unwrap_sp(), masm.sp_offset());
sig.to_mut().results.set_ret_area(area);
area
}
}
context.push_abi_results(&sig.results, masm, |results, _, _| {
results.ret_area().copied()
});
// In the case of [Callee], there's no need to set the [RetArea] of the
// signature, as it's only used here to push abi results.
context.push_abi_results(&sig.results, masm, |_, _, _| ret_area);
}
}

9
winch/codegen/src/codegen/context.rs

@ -3,7 +3,6 @@ use wasmtime_environ::{VMOffsets, WasmHeapType, WasmValType};
use super::ControlStackFrame;
use crate::{
abi::{ABIOperand, ABIResults, RetArea, ABI},
codegen::BuiltinFunctions,
frame::Frame,
isa::reg::RegClass,
masm::{MacroAssembler, OperandSize, RegImm, SPOffset, StackSlot},
@ -27,7 +26,7 @@ use crate::{
/// generation process. The code generation context should
/// be generally used as the single entry point to access
/// the compound functionality provided by its elements.
pub(crate) struct CodeGenContext<'a, 'builtins: 'a> {
pub(crate) struct CodeGenContext<'a> {
/// The register allocator.
pub regalloc: RegAlloc,
/// The value stack.
@ -36,19 +35,16 @@ pub(crate) struct CodeGenContext<'a, 'builtins: 'a> {
pub frame: Frame,
/// Reachability state.
pub reachable: bool,
/// The built-in functions available to the JIT code.
pub builtins: &'builtins mut BuiltinFunctions,
/// A reference to the VMOffsets.
pub vmoffsets: &'a VMOffsets<u8>,
}
impl<'a, 'builtins> CodeGenContext<'a, 'builtins> {
impl<'a> CodeGenContext<'a> {
/// Create a new code generation context.
pub fn new(
regalloc: RegAlloc,
stack: Stack,
frame: Frame,
builtins: &'builtins mut BuiltinFunctions,
vmoffsets: &'a VMOffsets<u8>,
) -> Self {
Self {
@ -56,7 +52,6 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> {
stack,
frame,
reachable: true,
builtins,
vmoffsets,
}
}

93
winch/codegen/src/codegen/env.rs

@ -1,7 +1,9 @@
use crate::{
codegen::{control, BlockSig, BuiltinFunction, OperandSize},
isa::TargetIsa,
abi::{ABISig, ABI},
codegen::{control, BlockSig, BuiltinFunction, BuiltinFunctions, OperandSize},
isa::{CallingConvention, TargetIsa},
};
use smallvec::SmallVec;
use std::collections::{
hash_map::Entry::{Occupied, Vacant},
HashMap,
@ -10,7 +12,7 @@ use wasmparser::BlockType;
use wasmtime_environ::{
FuncIndex, GlobalIndex, MemoryIndex, MemoryPlan, MemoryStyle, ModuleTranslation,
ModuleTypesBuilder, PtrSize, TableIndex, TablePlan, TypeConvert, TypeIndex, VMOffsets,
WasmFuncType, WasmHeapType, WasmValType, WASM_PAGE_SIZE,
WasmHeapType, WasmValType, WASM_PAGE_SIZE,
};
/// Table metadata.
@ -74,23 +76,34 @@ pub struct HeapData {
/// It categorizes how the callee should be treated
/// when performing the call.
#[derive(Clone)]
pub enum Callee {
pub(crate) enum Callee<'a> {
/// Locally defined function.
Local(CalleeInfo),
Local(&'a CalleeInfo),
/// Imported function.
Import(CalleeInfo),
Import(&'a CalleeInfo),
/// Function reference.
FuncRef(WasmFuncType),
FuncRef(&'a ABISig),
/// A built-in function.
Builtin(BuiltinFunction),
}
impl Callee<'_> {
/// Returns the [ABISig] of the [Callee].
pub(crate) fn sig(&self) -> &ABISig {
match self {
Self::Local(info) | Self::Import(info) => &info.sig,
Self::FuncRef(sig) => sig,
Self::Builtin(b) => b.sig(),
}
}
}
/// Metadata about a function callee. Used by the code generation to
/// emit function calls to local or imported functions.
#[derive(Clone)]
pub struct CalleeInfo {
/// The function type.
pub ty: WasmFuncType,
/// The function's ABI signature.
pub(crate) sig: ABISig,
/// The callee index in the WebAssembly function index space.
pub index: FuncIndex,
}
@ -106,10 +119,20 @@ pub struct FuncEnv<'a, 'translation: 'a, 'data: 'translation, P: PtrSize> {
pub translation: &'translation ModuleTranslation<'data>,
/// The module's function types.
pub types: &'translation ModuleTypesBuilder,
/// The built-in functions available to the JIT code.
pub builtins: &'translation mut BuiltinFunctions,
/// Track resolved table information.
resolved_tables: HashMap<TableIndex, TableData>,
/// Track resolved heap information.
resolved_heaps: HashMap<MemoryIndex, HeapData>,
/// A map from [FunctionIndex] to [CalleeInfo], to keep track of the resolved
/// function callees.
resolved_callees: HashMap<FuncIndex, CalleeInfo>,
/// A map from [TypeIndex] to [ABISig], to keep track of the resolved
/// indirect function signatures.
resolved_sigs: HashMap<TypeIndex, ABISig>,
/// Pointer size represented as a WebAssembly type.
ptr_type: WasmValType,
/// Whether or not to enable Spectre mitigation on heap bounds checks.
heap_access_spectre_mitigation: bool,
}
@ -126,6 +149,7 @@ impl<'a, 'translation, 'data, P: PtrSize> FuncEnv<'a, 'translation, 'data, P> {
vmoffsets: &'a VMOffsets<P>,
translation: &'translation ModuleTranslation<'data>,
types: &'translation ModuleTypesBuilder,
builtins: &'translation mut BuiltinFunctions,
isa: &dyn TargetIsa,
) -> Self {
Self {
@ -134,40 +158,63 @@ impl<'a, 'translation, 'data, P: PtrSize> FuncEnv<'a, 'translation, 'data, P> {
types,
resolved_tables: HashMap::new(),
resolved_heaps: HashMap::new(),
resolved_callees: HashMap::new(),
resolved_sigs: HashMap::new(),
ptr_type: ptr_type_from_ptr_size(vmoffsets.ptr.size()),
heap_access_spectre_mitigation: isa.flags().enable_heap_access_spectre_mitigation(),
builtins,
}
}
/// Derive the [`WasmType`] from the pointer size.
pub(crate) fn ptr_type(&self) -> WasmValType {
ptr_type_from_ptr_size(self.ptr_size())
}
/// Returns the pointer size for the target ISA.
fn ptr_size(&self) -> u8 {
self.vmoffsets.ptr.size()
self.ptr_type
}
/// Resolves a [`Callee::FuncRef`] from a type index.
pub fn funcref(&self, idx: TypeIndex) -> Callee {
let sig_index = self.translation.module.types[idx].unwrap_function();
let ty = self.types[sig_index].clone();
Callee::FuncRef(ty)
pub(crate) fn funcref<A>(&mut self, idx: TypeIndex) -> Callee
where
A: ABI,
{
let val = || {
let sig_index = self.translation.module.types[idx].unwrap_function();
let ty = &self.types[sig_index];
let sig = <A as ABI>::sig(ty, &CallingConvention::Default);
sig
};
Callee::FuncRef(self.resolved_sigs.entry(idx).or_insert_with(val))
}
/// Resolves a function [`Callee`] from an index.
pub fn callee_from_index(&self, idx: FuncIndex) -> Callee {
pub(crate) fn callee_from_index<A>(&mut self, idx: FuncIndex) -> Callee
where
A: ABI,
{
let ptr = self.ptr_type();
let types = &self.translation.get_types();
let ty = types[types.core_function_at(idx.as_u32())].unwrap_func();
let ty = self.convert_func_type(ty);
let import = self.translation.module.is_imported_function(idx);
let val = || {
let info = if import {
let mut params: SmallVec<[WasmValType; 6]> =
SmallVec::with_capacity(ty.params().len() + 2);
params.extend_from_slice(&[ptr, ptr]);
params.extend_from_slice(ty.params());
let sig = <A as ABI>::sig_from(&params, ty.returns(), &CallingConvention::Default);
CalleeInfo { sig, index: idx }
} else {
let sig = <A as ABI>::sig(&ty, &CallingConvention::Default);
CalleeInfo { sig, index: idx }
};
let info = CalleeInfo { ty, index: idx };
info
};
if import {
Callee::Import(info)
Callee::Import(self.resolved_callees.entry(idx).or_insert_with(val))
} else {
Callee::Local(info)
Callee::Local(self.resolved_callees.entry(idx).or_insert_with(val))
}
}

14
winch/codegen/src/codegen/mod.rs

@ -37,7 +37,7 @@ where
pub sig: ABISig,
/// The code generation context.
pub context: CodeGenContext<'a, 'translation>,
pub context: CodeGenContext<'a>,
/// A reference to the function compilation environment.
pub env: FuncEnv<'a, 'translation, 'data, M::Ptr>,
@ -57,7 +57,7 @@ where
{
pub fn new(
masm: &'a mut M,
context: CodeGenContext<'a, 'translation>,
context: CodeGenContext<'a>,
env: FuncEnv<'a, 'translation, 'data, M::Ptr>,
sig: ABISig,
) -> Self {
@ -368,7 +368,7 @@ where
let table_data = self.env.resolve_table_data(table_index);
let ptr_type = self.env.ptr_type();
let builtin = self
.context
.env
.builtins
.table_get_lazy_init_func_ref::<M::ABI, M::Ptr>();
@ -416,9 +416,11 @@ where
// This is safe since the FnCall::emit call below, will ensure
// that the result register is placed on the value stack.
self.context.free_reg(elem_value);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |_| {
Callee::Builtin(builtin.clone())
});
FnCall::emit::<M, M::Ptr>(
self.masm,
&mut self.context,
Callee::Builtin(builtin.clone()),
);
// We know the signature of the libcall in this case, so we assert that there's
// one element in the stack and that it's the ABI signature's result register.

6
winch/codegen/src/isa/aarch64/masm.rs

@ -1,7 +1,7 @@
use super::{abi::Aarch64ABI, address::Address, asm::Assembler, regs};
use crate::{
abi::{self, local::LocalSlot},
codegen::{ptr_type_from_ptr_size, CodeGenContext, HeapData, TableData},
codegen::{ptr_type_from_ptr_size, CodeGenContext, FuncEnv, HeapData, TableData},
isa::reg::Reg,
masm::{
CalleeKind, DivKind, ExtendKind, FloatCmpKind, Imm as I, IntCmpKind,
@ -328,11 +328,13 @@ impl Masm for MacroAssembler {
todo!()
}
fn float_round(
fn float_round<F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext, &mut Self)>(
&mut self,
_mode: RoundingMode,
_env: &mut FuncEnv<Self::Ptr>,
_context: &mut CodeGenContext,
_size: OperandSize,
_fallback: F,
) {
todo!();
}

4
winch/codegen/src/isa/aarch64/mod.rs

@ -98,7 +98,7 @@ impl TargetIsa for Aarch64 {
let stack = Stack::new();
let abi_sig = abi::Aarch64ABI::sig(sig, &CallingConvention::Default);
let env = FuncEnv::new(&vmoffsets, translation, types, self);
let env = FuncEnv::new(&vmoffsets, translation, types, builtins, self);
let defined_locals = DefinedLocals::new::<abi::Aarch64ABI>(&env, &mut body, validator)?;
let frame = Frame::new::<abi::Aarch64ABI>(&abi_sig, &defined_locals)?;
let gpr = RegBitSet::int(
@ -109,7 +109,7 @@ impl TargetIsa for Aarch64 {
// TODO: Add floating point bitmask
let fpr = RegBitSet::float(0, 0, usize::try_from(MAX_FPR).unwrap());
let regalloc = RegAlloc::from(gpr, fpr);
let codegen_context = CodeGenContext::new(regalloc, stack, frame, builtins, &vmoffsets);
let codegen_context = CodeGenContext::new(regalloc, stack, frame, &vmoffsets);
let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig);
codegen.emit(&mut body, validator)?;

35
winch/codegen/src/isa/x64/asm.rs

@ -18,16 +18,15 @@ use cranelift_codegen::{
ShiftKind as CraneliftShiftKind, SseOpcode, SyntheticAmode, WritableGpr,
WritableXmm, Xmm, XmmMem, XmmMemAligned, CC,
},
settings as x64_settings, CallInfo, EmitInfo, EmitState, Inst,
settings as x64_settings, EmitInfo, EmitState, Inst,
},
CallConv,
},
settings, Final, MachBuffer, MachBufferFinalized, MachInstEmit, MachInstEmitState, MachLabel,
VCodeConstantData, VCodeConstants, Writable,
};
use super::address::Address;
use smallvec::{smallvec, SmallVec};
use smallvec::SmallVec;
// Conversions between winch-codegen x64 types and cranelift-codegen x64 types.
@ -1246,14 +1245,8 @@ impl Assembler {
pub fn call_with_reg(&mut self, callee: Reg) {
self.emit(Inst::CallUnknown {
dest: RegMem::reg(callee.into()),
info: Box::new(CallInfo {
uses: smallvec![],
defs: smallvec![],
clobbers: Default::default(),
opcode: Opcode::Call,
callee_pop_size: 0,
callee_conv: CallConv::SystemV,
}),
opcode: Opcode::Call,
info: None,
});
}
@ -1262,14 +1255,8 @@ impl Assembler {
let dest = ExternalName::user(UserExternalNameRef::new(index as usize));
self.emit(Inst::CallKnown {
dest,
info: Box::new(CallInfo {
uses: smallvec![],
defs: smallvec![],
clobbers: Default::default(),
opcode: Opcode::Call,
callee_pop_size: 0,
callee_conv: CallConv::SystemV,
}),
opcode: Opcode::Call,
info: None,
});
}
@ -1278,14 +1265,8 @@ impl Assembler {
let dest = ExternalName::LibCall(lib);
self.emit(Inst::CallKnown {
dest,
info: Box::new(CallInfo {
uses: smallvec![],
defs: smallvec![],
clobbers: Default::default(),
opcode: Opcode::Call,
callee_pop_size: 0,
callee_conv: CallConv::SystemV,
}),
opcode: Opcode::Call,
info: None,
});
}

42
winch/codegen/src/isa/x64/masm.rs

@ -16,7 +16,7 @@ use crate::{
};
use crate::{
abi::{self, align_to, calculate_frame_adjustment, LocalSlot},
codegen::{ptr_type_from_ptr_size, Callee, CodeGenContext, FnCall, HeapData, TableData},
codegen::{ptr_type_from_ptr_size, CodeGenContext, FuncEnv, HeapData, TableData},
stack::Val,
};
use crate::{
@ -591,42 +591,20 @@ impl Masm for MacroAssembler {
self.asm.xmm_and_rr(scratch_xmm, dst, size);
}
fn float_round(&mut self, mode: RoundingMode, context: &mut CodeGenContext, size: OperandSize) {
fn float_round<F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext, &mut Self)>(
&mut self,
mode: RoundingMode,
env: &mut FuncEnv<Self::Ptr>,
context: &mut CodeGenContext,
size: OperandSize,
mut fallback: F,
) {
if self.flags.has_sse41() {
let src = context.pop_to_reg(self, None);
self.asm.xmm_rounds_rr(src.into(), src.into(), mode, size);
context.stack.push(src.into());
} else {
FnCall::emit::<Self, Self::Ptr, _>(self, context, |context| {
let b = match (&mode, size) {
(RoundingMode::Up, OperandSize::S32) => {
context.builtins.ceil_f32::<<Self as Masm>::ABI>()
}
(RoundingMode::Up, OperandSize::S64) => {
context.builtins.ceil_f64::<<Self as Masm>::ABI>()
}
(RoundingMode::Down, OperandSize::S32) => {
context.builtins.floor_f32::<<Self as Masm>::ABI>()
}
(RoundingMode::Down, OperandSize::S64) => {
context.builtins.floor_f64::<<Self as Masm>::ABI>()
}
(RoundingMode::Nearest, OperandSize::S32) => {
context.builtins.nearest_f32::<<Self as Masm>::ABI>()
}
(RoundingMode::Nearest, OperandSize::S64) => {
context.builtins.nearest_f64::<<Self as Masm>::ABI>()
}
(RoundingMode::Zero, OperandSize::S32) => {
context.builtins.trunc_f32::<<Self as Masm>::ABI>()
}
(RoundingMode::Zero, OperandSize::S64) => {
context.builtins.trunc_f64::<<Self as Masm>::ABI>()
}
(_, _) => unreachable!(),
};
Callee::Builtin(b)
})
fallback(env, context, self);
}
}

4
winch/codegen/src/isa/x64/mod.rs

@ -107,7 +107,7 @@ impl TargetIsa for X64 {
let stack = Stack::new();
let abi_sig = abi::X64ABI::sig(sig, &CallingConvention::Default);
let env = FuncEnv::new(&vmoffsets, translation, types, self);
let env = FuncEnv::new(&vmoffsets, translation, types, builtins, self);
let defined_locals = DefinedLocals::new::<abi::X64ABI>(&env, &mut body, validator)?;
let frame = Frame::new::<abi::X64ABI>(&abi_sig, &defined_locals)?;
let gpr = RegBitSet::int(
@ -122,7 +122,7 @@ impl TargetIsa for X64 {
);
let regalloc = RegAlloc::from(gpr, fpr);
let codegen_context = CodeGenContext::new(regalloc, stack, frame, builtins, &vmoffsets);
let codegen_context = CodeGenContext::new(regalloc, stack, frame, &vmoffsets);
let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig);
codegen.emit(&mut body, validator)?;

11
winch/codegen/src/masm.rs

@ -1,5 +1,5 @@
use crate::abi::{self, align_to, LocalSlot};
use crate::codegen::{CodeGenContext, HeapData, TableData};
use crate::codegen::{CodeGenContext, FuncEnv, HeapData, TableData};
use crate::isa::reg::Reg;
use cranelift_codegen::{
ir::{Endianness, LibCall, MemFlags},
@ -603,7 +603,14 @@ pub(crate) trait MacroAssembler {
fn float_neg(&mut self, dst: Reg, size: OperandSize);
/// Perform a floating point floor operation.
fn float_round(&mut self, mode: RoundingMode, context: &mut CodeGenContext, size: OperandSize);
fn float_round<F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext, &mut Self)>(
&mut self,
mode: RoundingMode,
env: &mut FuncEnv<Self::Ptr>,
context: &mut CodeGenContext,
size: OperandSize,
fallback: F,
);
/// Perform a floating point square root operation.
fn float_sqrt(&mut self, dst: Reg, src: Reg, size: OperandSize);

2
winch/codegen/src/regalloc.rs

@ -54,7 +54,7 @@ impl RegAlloc {
spill(self);
self.regset
.reg(named)
.expect(&format!("register {:?} to be available", named))
.unwrap_or_else(|| panic!("Exepected register {:?} to be available", named))
})
}

20
winch/codegen/src/stack.rs

@ -1,4 +1,5 @@
use crate::{isa::reg::Reg, masm::StackSlot};
use smallvec::SmallVec;
use wasmparser::{Ieee32, Ieee64};
use wasmtime_environ::WasmValType;
@ -260,7 +261,8 @@ impl Val {
/// The shadow stack used for compilation.
#[derive(Default, Debug)]
pub(crate) struct Stack {
inner: Vec<Val>,
// NB: The 64 is chosen arbitrarily. We can adjust as we see fit.
inner: SmallVec<[Val; 64]>,
}
impl Stack {
@ -294,15 +296,13 @@ impl Stack {
}
/// Inserts many values at the given index.
pub fn insert_many(&mut self, at: usize, values: impl IntoIterator<Item = Val>) {
pub fn insert_many(&mut self, at: usize, values: &[Val]) {
debug_assert!(at <= self.len());
// If last, simply extend.
if at == self.inner.len() {
self.inner.extend(values);
if at == self.len() {
self.inner.extend_from_slice(values);
} else {
let mut tail = self.inner.split_off(at);
self.inner.extend(values);
self.inner.append(&mut tail);
self.inner.insert_from_slice(at, values);
}
}
@ -375,12 +375,12 @@ impl Stack {
}
/// Get a mutable reference to the inner stack representation.
pub fn inner_mut(&mut self) -> &mut Vec<Val> {
pub fn inner_mut(&mut self) -> &mut SmallVec<[Val; 64]> {
&mut self.inner
}
/// Get a reference to the inner stack representation.
pub fn inner(&self) -> &Vec<Val> {
pub fn inner(&self) -> &SmallVec<[Val; 64]> {
&self.inner
}

203
winch/codegen/src/visitor.rs

@ -453,43 +453,107 @@ where
}
fn visit_f32_floor(&mut self) {
self.masm
.float_round(RoundingMode::Down, &mut self.context, OperandSize::S32);
self.masm.float_round(
RoundingMode::Down,
&mut self.env,
&mut self.context,
OperandSize::S32,
|env, cx, masm| {
let builtin = env.builtins.floor_f32::<M::ABI>();
FnCall::emit::<M, M::Ptr>(masm, cx, Callee::Builtin(builtin));
},
);
}
fn visit_f64_floor(&mut self) {
self.masm
.float_round(RoundingMode::Down, &mut self.context, OperandSize::S64);
self.masm.float_round(
RoundingMode::Down,
&mut self.env,
&mut self.context,
OperandSize::S64,
|env, cx, masm| {
let builtin = env.builtins.floor_f64::<M::ABI>();
FnCall::emit::<M, M::Ptr>(masm, cx, Callee::Builtin(builtin));
},
);
}
fn visit_f32_ceil(&mut self) {
self.masm
.float_round(RoundingMode::Up, &mut self.context, OperandSize::S32);
self.masm.float_round(
RoundingMode::Up,
&mut self.env,
&mut self.context,
OperandSize::S32,
|env, cx, masm| {
let builtin = env.builtins.ceil_f32::<M::ABI>();
FnCall::emit::<M, M::Ptr>(masm, cx, Callee::Builtin(builtin));
},
);
}
fn visit_f64_ceil(&mut self) {
self.masm
.float_round(RoundingMode::Up, &mut self.context, OperandSize::S64);
self.masm.float_round(
RoundingMode::Up,
&mut self.env,
&mut self.context,
OperandSize::S64,
|env, cx, masm| {
let builtin = env.builtins.ceil_f64::<M::ABI>();
FnCall::emit::<M, M::Ptr>(masm, cx, Callee::Builtin(builtin));
},
);
}
fn visit_f32_nearest(&mut self) {
self.masm
.float_round(RoundingMode::Nearest, &mut self.context, OperandSize::S32);
self.masm.float_round(
RoundingMode::Nearest,
&mut self.env,
&mut self.context,
OperandSize::S32,
|env, cx, masm| {
let builtin = env.builtins.nearest_f32::<M::ABI>();
FnCall::emit::<M, M::Ptr>(masm, cx, Callee::Builtin(builtin))
},
);
}
fn visit_f64_nearest(&mut self) {
self.masm
.float_round(RoundingMode::Nearest, &mut self.context, OperandSize::S64);
self.masm.float_round(
RoundingMode::Nearest,
&mut self.env,
&mut self.context,
OperandSize::S64,
|env, cx, masm| {
let builtin = env.builtins.nearest_f64::<M::ABI>();
FnCall::emit::<M, M::Ptr>(masm, cx, Callee::Builtin(builtin));
},
);
}
fn visit_f32_trunc(&mut self) {
self.masm
.float_round(RoundingMode::Zero, &mut self.context, OperandSize::S32);
self.masm.float_round(
RoundingMode::Zero,
&mut self.env,
&mut self.context,
OperandSize::S32,
|env, cx, masm| {
let builtin = env.builtins.trunc_f32::<M::ABI>();
FnCall::emit::<M, M::Ptr>(masm, cx, Callee::Builtin(builtin));
},
);
}
fn visit_f64_trunc(&mut self) {
self.masm
.float_round(RoundingMode::Zero, &mut self.context, OperandSize::S64);
self.masm.float_round(
RoundingMode::Zero,
&mut self.env,
&mut self.context,
OperandSize::S64,
|env, cx, masm| {
let builtin = env.builtins.trunc_f64::<M::ABI>();
FnCall::emit::<M, M::Ptr>(masm, cx, Callee::Builtin(builtin));
},
);
}
fn visit_f32_sqrt(&mut self) {
@ -1291,8 +1355,12 @@ where
}
fn visit_call(&mut self, index: u32) {
let callee = self.env.callee_from_index(FuncIndex::from_u32(index));
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |_| callee.clone());
FnCall::emit::<M, M::Ptr>(
self.masm,
&mut self.context,
self.env
.callee_from_index::<M::ABI>(FuncIndex::from_u32(index)),
)
}
fn visit_call_indirect(&mut self, type_index: u32, table_index: u32, _: u8) {
@ -1321,9 +1389,11 @@ where
}
}
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |_| {
self.env.funcref(type_index)
})
FnCall::emit::<M, M::Ptr>(
self.masm,
&mut self.context,
self.env.funcref::<M::ABI>(type_index),
)
}
fn visit_table_init(&mut self, elem: u32, table: u32) {
@ -1335,15 +1405,19 @@ where
self.context.stack.insert_many(
at,
[
&[
vmctx.into(),
table.try_into().unwrap(),
elem.try_into().unwrap(),
],
);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |cx| {
Callee::Builtin(cx.builtins.table_init::<M::ABI, M::Ptr>())
});
let builtin = self.env.builtins.table_init::<M::ABI, M::Ptr>();
FnCall::emit::<M, M::Ptr>(
self.masm,
&mut self.context,
Callee::Builtin(builtin.clone()),
)
}
fn visit_table_copy(&mut self, dst: u32, src: u32) {
@ -1353,16 +1427,15 @@ where
let at = self.context.stack.len() - 3;
self.context.stack.insert_many(
at,
[
&[
vmctx.into(),
dst.try_into().unwrap(),
src.try_into().unwrap(),
],
);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |context| {
Callee::Builtin(context.builtins.table_copy::<M::ABI, M::Ptr>())
});
let builtin = self.env.builtins.table_copy::<M::ABI, M::Ptr>();
FnCall::emit::<M, M::Ptr>(self.masm, &mut self.context, Callee::Builtin(builtin))
}
fn visit_table_get(&mut self, table: u32) {
@ -1385,10 +1458,7 @@ where
let table_index = TableIndex::from_u32(table);
let table_plan = self.env.table_plan(table_index);
let builtin = match table_plan.table.wasm_ty.heap_type {
WasmHeapType::Func => self
.context
.builtins
.table_grow_func_ref::<M::ABI, M::Ptr>(),
WasmHeapType::Func => self.env.builtins.table_grow_func_ref::<M::ABI, M::Ptr>(),
ty => unimplemented!("Support for HeapType: {ty}"),
};
@ -1406,11 +1476,13 @@ where
self.context.stack.inner_mut().swap(len - 1, len - 2);
self.context
.stack
.insert_many(at, [vmctx.into(), table.try_into().unwrap()]);
.insert_many(at, &[vmctx.into(), table.try_into().unwrap()]);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |_| {
Callee::Builtin(builtin.clone())
});
FnCall::emit::<M, M::Ptr>(
self.masm,
&mut self.context,
Callee::Builtin(builtin.clone()),
)
}
fn visit_table_size(&mut self, table: u32) {
@ -1425,10 +1497,7 @@ where
let table_index = TableIndex::from_u32(table);
let table_plan = self.env.table_plan(table_index);
let builtin = match table_plan.table.wasm_ty.heap_type {
WasmHeapType::Func => self
.context
.builtins
.table_fill_func_ref::<M::ABI, M::Ptr>(),
WasmHeapType::Func => self.env.builtins.table_fill_func_ref::<M::ABI, M::Ptr>(),
ty => unimplemented!("Support for heap type: {ty}"),
};
@ -1437,10 +1506,12 @@ where
let at = len - 3;
self.context
.stack
.insert_many(at, [vmctx.into(), table.try_into().unwrap()]);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |_| {
Callee::Builtin(builtin.clone())
})
.insert_many(at, &[vmctx.into(), table.try_into().unwrap()]);
FnCall::emit::<M, M::Ptr>(
self.masm,
&mut self.context,
Callee::Builtin(builtin.clone()),
)
}
fn visit_table_set(&mut self, table: u32) {
@ -1482,14 +1553,12 @@ where
fn visit_elem_drop(&mut self, index: u32) {
let ptr_type = self.env.ptr_type();
let elem_drop = self.context.builtins.elem_drop::<M::ABI, M::Ptr>();
let elem_drop = self.env.builtins.elem_drop::<M::ABI, M::Ptr>();
let vmctx = TypedReg::new(ptr_type, <M::ABI as ABI>::vmctx_reg());
self.context
.stack
.extend([vmctx.into(), index.try_into().unwrap()]);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |_| {
Callee::Builtin(elem_drop.clone())
});
FnCall::emit::<M, M::Ptr>(self.masm, &mut self.context, Callee::Builtin(elem_drop))
}
fn visit_memory_init(&mut self, data_index: u32, mem: u32) {
@ -1499,15 +1568,14 @@ where
let vmctx = TypedReg::new(ptr_type, <M::ABI as ABI>::vmctx_reg());
self.context.stack.insert_many(
at,
[
&[
vmctx.into(),
mem.try_into().unwrap(),
data_index.try_into().unwrap(),
],
);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |cx| {
Callee::Builtin(cx.builtins.memory_init::<M::ABI, M::Ptr>())
});
let builtin = self.env.builtins.memory_init::<M::ABI, M::Ptr>();
FnCall::emit::<M, M::Ptr>(self.masm, &mut self.context, Callee::Builtin(builtin))
}
fn visit_memory_copy(&mut self, dst_mem: u32, src_mem: u32) {
@ -1522,17 +1590,17 @@ where
let at = self.context.stack.len() - 2;
self.context
.stack
.insert_many(at, [src_mem.try_into().unwrap()]);
.insert_many(at, &[src_mem.try_into().unwrap()]);
// One element was inserted above, so instead of 3, we use 4.
let at = self.context.stack.len() - 4;
self.context
.stack
.insert_many(at, [vmctx.into(), dst_mem.try_into().unwrap()]);
.insert_many(at, &[vmctx.into(), dst_mem.try_into().unwrap()]);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |cx| {
Callee::Builtin(cx.builtins.memory_copy::<M::ABI, M::Ptr>())
});
let builtin = self.env.builtins.memory_copy::<M::ABI, M::Ptr>();
FnCall::emit::<M, M::Ptr>(self.masm, &mut self.context, Callee::Builtin(builtin))
}
fn visit_memory_fill(&mut self, mem: u32) {
@ -1543,11 +1611,10 @@ where
self.context
.stack
.insert_many(at, [vmctx.into(), mem.try_into().unwrap()]);
.insert_many(at, &[vmctx.into(), mem.try_into().unwrap()]);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |cx| {
Callee::Builtin(cx.builtins.memory_fill::<M::ABI, M::Ptr>())
});
let builtin = self.env.builtins.memory_fill::<M::ABI, M::Ptr>();
FnCall::emit::<M, M::Ptr>(self.masm, &mut self.context, Callee::Builtin(builtin))
}
fn visit_memory_size(&mut self, mem: u32, _: u8) {
@ -1563,12 +1630,11 @@ where
// The stack at this point contains: [ delta ]
// The desired state is
// [ vmctx, delta, index ]
self.context.stack.insert_many(at, [vmctx.into()]);
self.context.stack.insert_many(at, &[vmctx.into()]);
self.context.stack.extend([mem.try_into().unwrap()]);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |cx| {
Callee::Builtin(cx.builtins.memory32_grow::<M::ABI, M::Ptr>())
});
let builtin = self.env.builtins.memory32_grow::<M::ABI, M::Ptr>();
FnCall::emit::<M, M::Ptr>(self.masm, &mut self.context, Callee::Builtin(builtin))
}
fn visit_data_drop(&mut self, data_index: u32) {
@ -1578,9 +1644,8 @@ where
.stack
.extend([vmctx.into(), data_index.try_into().unwrap()]);
FnCall::emit::<M, M::Ptr, _>(self.masm, &mut self.context, |cx| {
Callee::Builtin(cx.builtins.data_drop::<M::ABI, M::Ptr>())
});
let builtin = self.env.builtins.data_drop::<M::ABI, M::Ptr>();
FnCall::emit::<M, M::Ptr>(self.masm, &mut self.context, Callee::Builtin(builtin))
}
fn visit_nop(&mut self) {}

Loading…
Cancel
Save