Browse Source

cranelift: Fix return value handling with the winch calling convention (#8198)

* Return the last result through registers in the winch calling convention

* Add a run test for winch calling convention functions

* Disable the Winch calling convention in cranelift's aarch64 backend

* Remove the aarch64 winc.clif test

* Skip realignment for winch results on the stack
pull/8203/head
Trevor Elliott 8 months ago
committed by GitHub
parent
commit
d3cc12b455
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 11
      cranelift/codegen/src/isa/aarch64/abi.rs
  2. 5
      cranelift/codegen/src/isa/aarch64/inst/mod.rs
  3. 62
      cranelift/codegen/src/isa/x64/abi.rs
  4. 329
      cranelift/filetests/filetests/isa/aarch64/winch.clif
  5. 99
      cranelift/filetests/filetests/isa/x64/winch.clif
  6. 21
      cranelift/filetests/filetests/runtests/winch.clif
  7. 4
      crates/cranelift/src/lib.rs

11
cranelift/codegen/src/isa/aarch64/abi.rs

@ -107,7 +107,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
add_ret_area_ptr: bool,
mut args: ArgsAccumulator,
) -> CodegenResult<(u32, Option<usize>)> {
if matches!(call_conv, isa::CallConv::Tail | isa::CallConv::Winch) {
assert_ne!(
call_conv,
isa::CallConv::Winch,
"aarch64 does not support the 'winch' calling convention yet"
);
if matches!(call_conv, isa::CallConv::Tail) {
return compute_arg_locs_tail(params, add_ret_area_ptr, args);
}
@ -1105,7 +1111,6 @@ impl ABIMachineSpec for AArch64MachineDeps {
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet {
match call_conv_of_callee {
isa::CallConv::Tail => ALL_CLOBBERS,
isa::CallConv::Winch => ALL_CLOBBERS,
_ => DEFAULT_AAPCS_CLOBBERS,
}
}
@ -1423,7 +1428,7 @@ fn is_reg_saved_in_prologue(
sig: &Signature,
r: RealReg,
) -> bool {
if call_conv == isa::CallConv::Tail || call_conv == isa::CallConv::Winch {
if call_conv == isa::CallConv::Tail {
return false;
}

5
cranelift/codegen/src/isa/aarch64/inst/mod.rs

@ -873,11 +873,6 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
// This shouldn't be a fixed register constraint.
collector.reg_fixed_use(info.rn, xreg(1))
}
CallConv::Winch => {
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
// This shouldn't be a fixed register constraint.
collector.reg_fixed_use(info.rn, xreg(1))
}
_ => collector.reg_use(info.rn),
}
for u in &info.uses {

62
cranelift/codegen/src/isa/x64/abi.rs

@ -125,7 +125,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
next_stack = 32;
}
for param in params {
for (ix, param) in params.iter().enumerate() {
let last_param = ix == params.len() - 1;
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
let offset = next_stack as i64;
let size = size;
@ -210,6 +212,11 @@ impl ABIMachineSpec for X64ABIMachineSpec {
continue;
}
debug_assert!(
call_conv != CallConv::Winch || rcs.len() == 1,
"Winch is unable to handle values wider than 64-bits"
);
let mut slots = ABIArgSlotVec::new();
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
let intreg = *rc == RegClass::Int;
@ -218,14 +225,18 @@ impl ABIMachineSpec for X64ABIMachineSpec {
ArgsOrRets::Args => {
get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
}
ArgsOrRets::Rets => get_intreg_for_retval(&call_conv, flags, next_gpr),
ArgsOrRets::Rets => {
get_intreg_for_retval(&call_conv, flags, next_gpr, last_param)
}
}
} else {
match args_or_rets {
ArgsOrRets::Args => {
get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
}
ArgsOrRets::Rets => get_fltreg_for_retval(&call_conv, next_vreg),
ArgsOrRets::Rets => {
get_fltreg_for_retval(&call_conv, next_vreg, last_param)
}
}
};
next_param_idx += 1;
@ -241,11 +252,18 @@ impl ABIMachineSpec for X64ABIMachineSpec {
extension: param.extension,
});
} else {
let size = reg_ty.bits() / 8;
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = align_to(next_stack, size);
let size = reg_ty.bytes();
let size = if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {
size
} else {
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = align_to(next_stack, size);
size
};
slots.push(ABIArgSlot::Stack {
offset: next_stack as i64,
ty: *reg_ty,
@ -298,6 +316,23 @@ impl ABIMachineSpec for X64ABIMachineSpec {
None
};
// Winch writes the first result to the highest offset, so we need to iterate through the
// args and adjust the offsets down.
if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {
for arg in args.args_mut() {
if let ABIArg::Slots { slots, .. } = arg {
for slot in slots.iter_mut() {
if let ABIArgSlot::Stack { offset, ty, .. } = slot {
let size = i64::from(ty.bytes());
*offset = i64::from(next_stack) - *offset - size;
}
}
} else {
unreachable!("Winch cannot handle {arg:?}");
}
}
}
next_stack = align_to(next_stack, 16);
// To avoid overflow issues, limit the arg/return size to something reasonable.
@ -1037,6 +1072,7 @@ fn get_intreg_for_retval(
call_conv: &CallConv,
flags: &settings::Flags,
intreg_idx: usize,
is_last: bool,
) -> Option<Reg> {
match call_conv {
CallConv::Tail => match intreg_idx {
@ -1067,16 +1103,13 @@ fn get_intreg_for_retval(
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
_ => None,
},
CallConv::Winch => match intreg_idx {
0 => Some(regs::rax()),
_ => None,
},
CallConv::Winch => is_last.then(|| regs::rax()),
CallConv::Probestack => todo!(),
CallConv::WasmtimeSystemV | CallConv::AppleAarch64 => unreachable!(),
}
}
fn get_fltreg_for_retval(call_conv: &CallConv, fltreg_idx: usize) -> Option<Reg> {
fn get_fltreg_for_retval(call_conv: &CallConv, fltreg_idx: usize, is_last: bool) -> Option<Reg> {
match call_conv {
CallConv::Tail => match fltreg_idx {
0 => Some(regs::xmm0()),
@ -1094,10 +1127,11 @@ fn get_fltreg_for_retval(call_conv: &CallConv, fltreg_idx: usize) -> Option<Reg>
1 => Some(regs::xmm1()),
_ => None,
},
CallConv::WindowsFastcall | CallConv::Winch => match fltreg_idx {
CallConv::WindowsFastcall => match fltreg_idx {
0 => Some(regs::xmm0()),
_ => None,
},
CallConv::Winch => is_last.then(|| regs::xmm0()),
CallConv::Probestack => todo!(),
CallConv::WasmtimeSystemV | CallConv::AppleAarch64 => unreachable!(),
}

329
cranelift/filetests/filetests/isa/aarch64/winch.clif

@ -1,329 +0,0 @@
test compile precise-output
target aarch64
function %f1() winch {
block0:
return
}
; VCode:
; block0:
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ret
function %f2(i64, i64, i64, i64, i64, i64) -> i64 winch {
sig0 = () winch
fn0 = %g sig0
block0(v0:i64, v1:i64, v2:i64, v3:i64, v4:i64, v5:i64):
call fn0()
return v0
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; sub sp, sp, #16
; block0:
; str x2, [sp]
; load_ext_name x1, TestCase(%g)+0
; blr x1
; ldr x2, [sp]
; add sp, sp, #16
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; sub sp, sp, #0x10
; block1: ; offset 0xc
; stur x2, [sp]
; ldr x1, #0x18
; b #0x20
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x1
; ldur x2, [sp]
; add sp, sp, #0x10
; ldp x29, x30, [sp], #0x10
; ret
function %f3(i64, i64, i64, i64, i64, i64) -> i64 {
sig0 = () winch
fn0 = %g sig0
block0(v0:i64, v1:i64, v2:i64, v3:i64, v4:i64, v5:i64):
call fn0()
return v0
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; stp x27, x28, [sp, #-16]!
; stp x25, x26, [sp, #-16]!
; stp x23, x24, [sp, #-16]!
; stp x21, x22, [sp, #-16]!
; stp x19, x20, [sp, #-16]!
; stp d14, d15, [sp, #-16]!
; stp d12, d13, [sp, #-16]!
; stp d10, d11, [sp, #-16]!
; stp d8, d9, [sp, #-16]!
; sub sp, sp, #16
; block0:
; str x0, [sp]
; load_ext_name x1, TestCase(%g)+0
; blr x1
; ldr x0, [sp]
; add sp, sp, #16
; ldp d8, d9, [sp], #16
; ldp d10, d11, [sp], #16
; ldp d12, d13, [sp], #16
; ldp d14, d15, [sp], #16
; ldp x19, x20, [sp], #16
; ldp x21, x22, [sp], #16
; ldp x23, x24, [sp], #16
; ldp x25, x26, [sp], #16
; ldp x27, x28, [sp], #16
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; stp x27, x28, [sp, #-0x10]!
; stp x25, x26, [sp, #-0x10]!
; stp x23, x24, [sp, #-0x10]!
; stp x21, x22, [sp, #-0x10]!
; stp x19, x20, [sp, #-0x10]!
; stp d14, d15, [sp, #-0x10]!
; stp d12, d13, [sp, #-0x10]!
; stp d10, d11, [sp, #-0x10]!
; stp d8, d9, [sp, #-0x10]!
; sub sp, sp, #0x10
; block1: ; offset 0x30
; stur x0, [sp]
; ldr x1, #0x3c
; b #0x44
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x1
; ldur x0, [sp]
; add sp, sp, #0x10
; ldp d8, d9, [sp], #0x10
; ldp d10, d11, [sp], #0x10
; ldp d12, d13, [sp], #0x10
; ldp d14, d15, [sp], #0x10
; ldp x19, x20, [sp], #0x10
; ldp x21, x22, [sp], #0x10
; ldp x23, x24, [sp], #0x10
; ldp x25, x26, [sp], #0x10
; ldp x27, x28, [sp], #0x10
; ldp x29, x30, [sp], #0x10
; ret
function %f4(i64, i64, i64, i64, i64, i64) -> i64 winch {
sig0 = (i64, i64, i64, i64, i64, i64) -> i64 winch
fn0 = %g sig0
block0(v0:i64, v1:i64, v2:i64, v3:i64, v4:i64, v5:i64):
v6 = call fn0(v5, v1, v2, v3, v4, v0)
return v6
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; block0:
; mov x0, x2
; mov x2, x7
; mov x7, x0
; load_ext_name x1, TestCase(%g)+0
; blr x1
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; block1: ; offset 0x8
; mov x0, x2
; mov x2, x7
; mov x7, x0
; ldr x1, #0x1c
; b #0x24
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x1
; ldp x29, x30, [sp], #0x10
; ret
function %f5(i64, i64, i64, i64, i64, i64) -> i64 {
sig0 = (i64, i64, i64, i64, i64, i64) -> i64 winch
fn0 = %g sig0
block0(v0:i64, v1:i64, v2:i64, v3:i64, v4:i64, v5:i64):
v6 = call fn0(v5, v1, v2, v3, v4, v0)
return v6
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; stp x27, x28, [sp, #-16]!
; stp x25, x26, [sp, #-16]!
; stp x23, x24, [sp, #-16]!
; stp x21, x22, [sp, #-16]!
; stp x19, x20, [sp, #-16]!
; stp d14, d15, [sp, #-16]!
; stp d12, d13, [sp, #-16]!
; stp d10, d11, [sp, #-16]!
; stp d8, d9, [sp, #-16]!
; block0:
; mov x7, x0
; mov x6, x4
; mov x4, x2
; mov x2, x5
; mov x5, x3
; mov x3, x1
; load_ext_name x1, TestCase(%g)+0
; blr x1
; mov x0, x2
; ldp d8, d9, [sp], #16
; ldp d10, d11, [sp], #16
; ldp d12, d13, [sp], #16
; ldp d14, d15, [sp], #16
; ldp x19, x20, [sp], #16
; ldp x21, x22, [sp], #16
; ldp x23, x24, [sp], #16
; ldp x25, x26, [sp], #16
; ldp x27, x28, [sp], #16
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; stp x27, x28, [sp, #-0x10]!
; stp x25, x26, [sp, #-0x10]!
; stp x23, x24, [sp, #-0x10]!
; stp x21, x22, [sp, #-0x10]!
; stp x19, x20, [sp, #-0x10]!
; stp d14, d15, [sp, #-0x10]!
; stp d12, d13, [sp, #-0x10]!
; stp d10, d11, [sp, #-0x10]!
; stp d8, d9, [sp, #-0x10]!
; block1: ; offset 0x2c
; mov x7, x0
; mov x6, x4
; mov x4, x2
; mov x2, x5
; mov x5, x3
; mov x3, x1
; ldr x1, #0x4c
; b #0x54
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x1
; mov x0, x2
; ldp d8, d9, [sp], #0x10
; ldp d10, d11, [sp], #0x10
; ldp d12, d13, [sp], #0x10
; ldp d14, d15, [sp], #0x10
; ldp x19, x20, [sp], #0x10
; ldp x21, x22, [sp], #0x10
; ldp x23, x24, [sp], #0x10
; ldp x25, x26, [sp], #0x10
; ldp x27, x28, [sp], #0x10
; ldp x29, x30, [sp], #0x10
; ret
function u1:0() system_v {
sig0 = () winch
fn0 = u2:0 sig0
block0:
v5 = func_addr.i64 fn0
call_indirect sig0, v5()
call_indirect sig0, v5()
return
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; stp x27, x28, [sp, #-16]!
; stp x25, x26, [sp, #-16]!
; stp x23, x24, [sp, #-16]!
; stp x21, x22, [sp, #-16]!
; stp x19, x20, [sp, #-16]!
; stp d14, d15, [sp, #-16]!
; stp d12, d13, [sp, #-16]!
; stp d10, d11, [sp, #-16]!
; stp d8, d9, [sp, #-16]!
; sub sp, sp, #16
; block0:
; load_ext_name x1, User(userextname0)+0
; str x1, [sp]
; ldr x1, [sp]
; blr x1
; ldr x1, [sp]
; blr x1
; add sp, sp, #16
; ldp d8, d9, [sp], #16
; ldp d10, d11, [sp], #16
; ldp d12, d13, [sp], #16
; ldp d14, d15, [sp], #16
; ldp x19, x20, [sp], #16
; ldp x21, x22, [sp], #16
; ldp x23, x24, [sp], #16
; ldp x25, x26, [sp], #16
; ldp x27, x28, [sp], #16
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; stp x27, x28, [sp, #-0x10]!
; stp x25, x26, [sp, #-0x10]!
; stp x23, x24, [sp, #-0x10]!
; stp x21, x22, [sp, #-0x10]!
; stp x19, x20, [sp, #-0x10]!
; stp d14, d15, [sp, #-0x10]!
; stp d12, d13, [sp, #-0x10]!
; stp d10, d11, [sp, #-0x10]!
; stp d8, d9, [sp, #-0x10]!
; sub sp, sp, #0x10
; block1: ; offset 0x30
; ldr x1, #0x38
; b #0x40
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 u2:0 0
; .byte 0x00, 0x00, 0x00, 0x00
; stur x1, [sp]
; ldur x1, [sp]
; blr x1
; ldur x1, [sp]
; blr x1
; add sp, sp, #0x10
; ldp d8, d9, [sp], #0x10
; ldp d10, d11, [sp], #0x10
; ldp d12, d13, [sp], #0x10
; ldp d14, d15, [sp], #0x10
; ldp x19, x20, [sp], #0x10
; ldp x21, x22, [sp], #0x10
; ldp x23, x24, [sp], #0x10
; ldp x25, x26, [sp], #0x10
; ldp x27, x28, [sp], #0x10
; ldp x29, x30, [sp], #0x10
; ret

99
cranelift/filetests/filetests/isa/x64/winch.clif

@ -279,3 +279,102 @@ block0:
; popq %rbp
; retq
function %f6(i64) -> i32 {
sig0 = () -> i32, i32, f64 winch
fn0 = %g sig0
block0(v0:i64):
v1, v2, v3 = call fn0()
v4 = band.i32 v1, v2
return v4
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; subq %rsp, $48, %rsp
; movq %rbx, 0(%rsp)
; movq %r12, 8(%rsp)
; movq %r13, 16(%rsp)
; movq %r14, 24(%rsp)
; movq %r15, 32(%rsp)
; block0:
; subq %rsp, $16, %rsp
; virtual_sp_offset_adjust 16
; lea 0(%rsp), %rdi
; load_ext_name %g+0, %r15
; call *%r15
; movq 4(%rsp), %rax
; movq 0(%rsp), %rdi
; addq %rsp, $16, %rsp
; virtual_sp_offset_adjust -16
; andl %eax, %edi, %eax
; movq 0(%rsp), %rbx
; movq 8(%rsp), %r12
; movq 16(%rsp), %r13
; movq 24(%rsp), %r14
; movq 32(%rsp), %r15
; addq %rsp, $48, %rsp
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; subq $0x30, %rsp
; movq %rbx, (%rsp)
; movq %r12, 8(%rsp)
; movq %r13, 0x10(%rsp)
; movq %r14, 0x18(%rsp)
; movq %r15, 0x20(%rsp)
; block1: ; offset 0x20
; subq $0x10, %rsp
; leaq (%rsp), %rdi
; movabsq $0, %r15 ; reloc_external Abs8 %g 0
; callq *%r15
; movq 4(%rsp), %rax
; movq (%rsp), %rdi
; addq $0x10, %rsp
; andl %edi, %eax
; movq (%rsp), %rbx
; movq 8(%rsp), %r12
; movq 0x10(%rsp), %r13
; movq 0x18(%rsp), %r14
; movq 0x20(%rsp), %r15
; addq $0x30, %rsp
; movq %rbp, %rsp
; popq %rbp
; retq
function %reverse_args(i32, i64, i32, i64) -> i64, i32, i64, i32 winch {
block0(v0: i32, v1: i64, v2: i32, v3: i64):
return v3, v2, v1, v0
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movq %rcx, 12(%r8)
; movl %edx, 8(%r8)
; movq %rsi, 0(%r8)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rdi, %rax
; movq %rcx, 0xc(%r8)
; movl %edx, 8(%r8)
; movq %rsi, (%r8)
; movq %rbp, %rsp
; popq %rbp
; retq

21
cranelift/filetests/filetests/runtests/winch.clif

@ -0,0 +1,21 @@
test run
target x86_64
function %reverse_args(i32, i64, i32, i64) -> i64, i32, i64, i32 winch {
block0(v0: i32, v1: i64, v2: i32, v3: i64):
return v3, v2, v1, v0
}
function %call_winch() -> i64, i32, i64, i32 {
fn0 = %reverse_args(i32, i64, i32, i64) -> i64, i32, i64, i32 winch
block0:
v0 = iconst.i32 0
v1 = iconst.i64 1
v2 = iconst.i32 2
v3 = iconst.i64 3
v4, v5, v6, v7 = call fn0(v0, v1, v2, v3)
return v4, v5, v6, v7
}
; run: %call_winch() == [3, 2, 1, 0]

4
crates/cranelift/src/lib.rs

@ -156,8 +156,8 @@ fn wasm_call_signature(
// The winch calling convention is only implemented for x64 and aarch64
arch if tunables.winch_callable => {
assert!(
matches!(arch, Architecture::X86_64 | Architecture::Aarch64(_)),
"https://github.com/bytecodealliance/wasmtime/issues/6530"
matches!(arch, Architecture::X86_64),
"The Winch calling convention is only implemented for x86_64"
);
CallConv::Winch
}

Loading…
Cancel
Save