Browse Source

Merge pull request #3645 from cfallin/fix-xmm-spillslot-fuzzbug

Fix spillslot size bug in SIMD by removing type-dependent spillslot allocation.
pull/3652/head
Chris Fallin 3 years ago
committed by GitHub
parent
commit
be24edf9d8
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 9
      cranelift/codegen/src/isa/aarch64/abi.rs
  2. 2
      cranelift/codegen/src/isa/arm32/abi.rs
  3. 8
      cranelift/codegen/src/isa/s390x/abi.rs
  4. 9
      cranelift/codegen/src/isa/x64/abi.rs
  5. 19
      cranelift/codegen/src/machinst/abi.rs
  6. 73
      cranelift/codegen/src/machinst/abi_impl.rs
  7. 15
      cranelift/codegen/src/machinst/vcode.rs
  8. 32
      cranelift/filetests/filetests/isa/aarch64/call.clif
  9. 60
      cranelift/filetests/filetests/isa/x64/fastcall.clif
  10. 11
      tests/misc_testsuite/simd/spillslot-size-fuzzbug.wast

9
cranelift/codegen/src/isa/aarch64/abi.rs

@ -1145,12 +1145,11 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::V128, F32) | (RegClass::V128, F64) => 1,
(RegClass::V128, _) => 2,
match rc {
RegClass::I64 => 1,
RegClass::V128 => 2,
_ => panic!("Unexpected register class!"),
}
}

2
cranelift/codegen/src/isa/arm32/abi.rs

@ -436,7 +436,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
unimplemented!("StructArgs not implemented for ARM32 yet");
}
fn get_number_of_spillslots_for_value(rc: RegClass, _ty: Type) -> u32 {
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
match rc {
RegClass::I32 => 1,
_ => panic!("Unexpected register class!"),

8
cranelift/codegen/src/isa/s390x/abi.rs

@ -685,11 +685,11 @@ impl ABIMachineSpec for S390xMachineDeps {
unimplemented!("StructArgs not implemented for S390X yet");
}
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::F64, _) => 1,
match rc {
RegClass::I64 => 1,
RegClass::F64 => 1,
_ => panic!("Unexpected register class!"),
}
}

9
cranelift/codegen/src/isa/x64/abi.rs

@ -719,12 +719,11 @@ impl ABIMachineSpec for X64ABIMachineSpec {
insts
}
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1,
(RegClass::V128, _) => 2,
match rc {
RegClass::I64 => 1,
RegClass::V128 => 2,
_ => panic!("Unexpected register class!"),
}
}

19
cranelift/codegen/src/machinst/abi.rs

@ -161,22 +161,13 @@ pub trait ABICallee {
fn stack_args_size(&self) -> u32;
/// Get the spill-slot size.
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
fn get_spillslot_size(&self, rc: RegClass) -> u32;
/// Generate a spill. The type, if known, is given; this can be used to
/// generate a store instruction optimized for the particular type rather
/// than the RegClass (e.g., only F64 that resides in a V128 register). If
/// no type is given, the implementation should spill the whole register.
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I;
/// Generate a spill.
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I;
/// Generate a reload (fill). As for spills, the type may be given to allow
/// a more optimized load instruction to be generated.
fn gen_reload(
&self,
to_reg: Writable<RealReg>,
from_slot: SpillSlot,
ty: Option<Type>,
) -> Self::I;
/// Generate a reload (fill).
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I;
}
/// Trait implemented by an object that tracks ABI-related state and can

73
cranelift/codegen/src/machinst/abi_impl.rs

@ -502,9 +502,8 @@ pub trait ABIMachineSpec {
size: usize,
) -> SmallVec<[Self::I; 8]>;
/// Get the number of spillslots required for the given register-class and
/// type.
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32;
/// Get the number of spillslots required for the given register-class.
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32;
/// Get the current virtual-SP offset from an instruction-emission state.
fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64;
@ -658,6 +657,17 @@ fn get_special_purpose_param_register(
}
}
fn ty_from_class(class: RegClass) -> Type {
match class {
RegClass::I32 => I32,
RegClass::I64 => I64,
RegClass::F32 => F32,
RegClass::F64 => F64,
RegClass::V128 => I8X16,
_ => panic!("Unknown regclass: {:?}", class),
}
}
impl<M: ABIMachineSpec> ABICalleeImpl<M> {
/// Create a new body ABI instance.
pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult<Self> {
@ -856,26 +866,6 @@ fn generate_gv<M: ABIMachineSpec>(
}
}
/// Return a type either from an optional type hint, or if not, from the default
/// type associated with the given register's class. This is used to generate
/// loads/spills appropriately given the type of value loaded/stored (which may
/// be narrower than the spillslot). We usually have the type because the
/// regalloc usually provides the vreg being spilled/reloaded, and we know every
/// vreg's type. However, the regalloc *can* request a spill/reload without an
/// associated vreg when needed to satisfy a safepoint (which requires all
/// ref-typed values, even those in real registers in the original vcode, to be
/// in spillslots).
fn ty_from_ty_hint_or_reg_class<M: ABIMachineSpec>(r: Reg, ty: Option<Type>) -> Type {
match (ty, r.get_class()) {
// If the type is provided
(Some(t), _) => t,
// If no type is provided, this should be a register spill for a
// safepoint, so we only expect I32/I64 (integer) registers.
(None, rc) if rc == M::word_reg_class() => M::word_type(),
_ => panic!("Unexpected register class!"),
}
}
fn gen_load_stack_multi<M: ABIMachineSpec>(
from: StackAMode,
dst: ValueRegs<Writable<Reg>>,
@ -1203,14 +1193,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let sp_off = self.stackslots_size as i64 + spill_off;
log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
// Integer types smaller than word size have been spilled as words below,
// and therefore must be reloaded in the same type.
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
M::word_type()
} else {
ty
};
gen_load_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), into_regs, ty)
}
@ -1227,18 +1209,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let sp_off = self.stackslots_size as i64 + spill_off;
log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
// When reloading from a spill slot, we might have lost information about real integer
// types. For instance, on the x64 backend, a zero-extension can become spurious and
// optimized into a move, causing vregs of types I32 and I64 to share the same coalescing
// equivalency class. As a matter of fact, such a value can be spilled as an I32 and later
// reloaded as an I64; to make sure the high bits are always defined, do a word-sized store
// all the time, in this case.
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
M::word_type()
} else {
ty
};
gen_store_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty)
}
@ -1383,25 +1353,20 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
self.sig.stack_arg_space as u32
}
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
M::get_number_of_spillslots_for_value(rc, ty)
fn get_spillslot_size(&self, rc: RegClass) -> u32 {
M::get_number_of_spillslots_for_value(rc)
}
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I {
let ty = ty_from_ty_hint_or_reg_class::<M>(from_reg.to_reg(), ty);
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I {
let ty = ty_from_class(from_reg.to_reg().get_class());
self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg()))
.into_iter()
.next()
.unwrap()
}
fn gen_reload(
&self,
to_reg: Writable<RealReg>,
from_slot: SpillSlot,
ty: Option<Type>,
) -> Self::I {
let ty = ty_from_ty_hint_or_reg_class::<M>(to_reg.to_reg().to_reg(), ty);
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I {
let ty = ty_from_class(to_reg.to_reg().get_class());
self.load_spillslot(
from_slot,
ty,

15
cranelift/codegen/src/machinst/vcode.rs

@ -688,24 +688,21 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
self.vreg_types.len()
}
fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 {
let ty = self.vreg_type(vreg);
self.abi.get_spillslot_size(regclass, ty)
fn get_spillslot_size(&self, regclass: RegClass, _: VirtualReg) -> u32 {
self.abi.get_spillslot_size(regclass)
}
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option<VirtualReg>) -> I {
let ty = vreg.map(|v| self.vreg_type(v));
self.abi.gen_spill(to_slot, from_reg, ty)
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, _: Option<VirtualReg>) -> I {
self.abi.gen_spill(to_slot, from_reg)
}
fn gen_reload(
&self,
to_reg: Writable<RealReg>,
from_slot: SpillSlot,
vreg: Option<VirtualReg>,
_: Option<VirtualReg>,
) -> I {
let ty = vreg.map(|v| self.vreg_type(v));
self.abi.gen_reload(to_reg, from_slot, ty)
self.abi.gen_reload(to_reg, from_slot)
}
fn gen_move(&self, to_reg: Writable<RealReg>, from_reg: RealReg, vreg: VirtualReg) -> I {

32
cranelift/filetests/filetests/isa/aarch64/call.clif

@ -133,28 +133,28 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #32
; nextln: sub sp, sp, #48
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str s0, [sp]
; nextln: str q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str d0, [sp, #8]
; nextln: str q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str d0, [sp, #16]
; nextln: str q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr s0, [sp]
; nextln: ldr q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr d0, [sp, #8]
; nextln: ldr q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr d0, [sp, #16]
; nextln: ldr q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: add sp, sp, #32
; nextln: add sp, sp, #48
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@ -223,28 +223,28 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #32
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str s0, [sp]
; nextln: sub sp, sp, #48
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str d0, [sp, #8]
; nextln: str q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr s0, [sp]
; nextln: str q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr d0, [sp, #8]
; nextln: ldr q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: add sp, sp, #32
; nextln: ldr q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: add sp, sp, #48
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

60
cranelift/filetests/filetests/isa/x64/fastcall.clif

@ -238,34 +238,34 @@ block0(v0: i64):
; nextln: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
; nextln: movq %rsp, %rbp
; nextln: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 }
; nextln: subq $$192, %rsp
; nextln: movdqu %xmm6, 32(%rsp)
; nextln: subq $$224, %rsp
; nextln: movdqu %xmm6, 64(%rsp)
; nextln: unwind SaveReg { clobber_offset: 0, reg: r6V }
; nextln: movdqu %xmm7, 48(%rsp)
; nextln: movdqu %xmm7, 80(%rsp)
; nextln: unwind SaveReg { clobber_offset: 16, reg: r7V }
; nextln: movdqu %xmm8, 64(%rsp)
; nextln: movdqu %xmm8, 96(%rsp)
; nextln: unwind SaveReg { clobber_offset: 32, reg: r8V }
; nextln: movdqu %xmm9, 80(%rsp)
; nextln: movdqu %xmm9, 112(%rsp)
; nextln: unwind SaveReg { clobber_offset: 48, reg: r9V }
; nextln: movdqu %xmm10, 96(%rsp)
; nextln: movdqu %xmm10, 128(%rsp)
; nextln: unwind SaveReg { clobber_offset: 64, reg: r10V }
; nextln: movdqu %xmm11, 112(%rsp)
; nextln: movdqu %xmm11, 144(%rsp)
; nextln: unwind SaveReg { clobber_offset: 80, reg: r11V }
; nextln: movdqu %xmm12, 128(%rsp)
; nextln: movdqu %xmm12, 160(%rsp)
; nextln: unwind SaveReg { clobber_offset: 96, reg: r12V }
; nextln: movdqu %xmm13, 144(%rsp)
; nextln: movdqu %xmm13, 176(%rsp)
; nextln: unwind SaveReg { clobber_offset: 112, reg: r13V }
; nextln: movdqu %xmm14, 160(%rsp)
; nextln: movdqu %xmm14, 192(%rsp)
; nextln: unwind SaveReg { clobber_offset: 128, reg: r14V }
; nextln: movdqu %xmm15, 176(%rsp)
; nextln: movdqu %xmm15, 208(%rsp)
; nextln: unwind SaveReg { clobber_offset: 144, reg: r15V }
; nextln: movsd 0(%rcx), %xmm4
; nextln: movsd 8(%rcx), %xmm1
; nextln: movsd 16(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(16 + virtual offset)
; nextln: movdqu %xmm0, rsp(32 + virtual offset)
; nextln: movsd 24(%rcx), %xmm3
; nextln: movsd 32(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(24 + virtual offset)
; nextln: movdqu %xmm0, rsp(48 + virtual offset)
; nextln: movsd 40(%rcx), %xmm5
; nextln: movsd 48(%rcx), %xmm6
; nextln: movsd 56(%rcx), %xmm7
@ -278,24 +278,24 @@ block0(v0: i64):
; nextln: movsd 112(%rcx), %xmm14
; nextln: movsd 120(%rcx), %xmm15
; nextln: movsd 128(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(0 + virtual offset)
; nextln: movdqu %xmm0, rsp(0 + virtual offset)
; nextln: movsd 136(%rcx), %xmm0
; nextln: movsd 144(%rcx), %xmm2
; nextln: movsd %xmm2, rsp(8 + virtual offset)
; nextln: movdqu %xmm2, rsp(16 + virtual offset)
; nextln: movsd 152(%rcx), %xmm2
; nextln: addsd %xmm1, %xmm4
; nextln: movsd rsp(16 + virtual offset), %xmm1
; nextln: movdqu rsp(32 + virtual offset), %xmm1
; nextln: addsd %xmm3, %xmm1
; nextln: movsd rsp(24 + virtual offset), %xmm3
; nextln: movdqu rsp(48 + virtual offset), %xmm3
; nextln: addsd %xmm5, %xmm3
; nextln: addsd %xmm7, %xmm6
; nextln: addsd %xmm9, %xmm8
; nextln: addsd %xmm11, %xmm10
; nextln: addsd %xmm13, %xmm12
; nextln: addsd %xmm15, %xmm14
; nextln: movsd rsp(0 + virtual offset), %xmm5
; nextln: movdqu rsp(0 + virtual offset), %xmm5
; nextln: addsd %xmm0, %xmm5
; nextln: movsd rsp(8 + virtual offset), %xmm0
; nextln: movdqu rsp(16 + virtual offset), %xmm0
; nextln: addsd %xmm2, %xmm0
; nextln: addsd %xmm1, %xmm4
; nextln: addsd %xmm6, %xmm3
@ -307,17 +307,17 @@ block0(v0: i64):
; nextln: addsd %xmm8, %xmm4
; nextln: addsd %xmm5, %xmm4
; nextln: movaps %xmm4, %xmm0
; nextln: movdqu 32(%rsp), %xmm6
; nextln: movdqu 48(%rsp), %xmm7
; nextln: movdqu 64(%rsp), %xmm8
; nextln: movdqu 80(%rsp), %xmm9
; nextln: movdqu 96(%rsp), %xmm10
; nextln: movdqu 112(%rsp), %xmm11
; nextln: movdqu 128(%rsp), %xmm12
; nextln: movdqu 144(%rsp), %xmm13
; nextln: movdqu 160(%rsp), %xmm14
; nextln: movdqu 176(%rsp), %xmm15
; nextln: addq $$192, %rsp
; nextln: movdqu 64(%rsp), %xmm6
; nextln: movdqu 80(%rsp), %xmm7
; nextln: movdqu 96(%rsp), %xmm8
; nextln: movdqu 112(%rsp), %xmm9
; nextln: movdqu 128(%rsp), %xmm10
; nextln: movdqu 144(%rsp), %xmm11
; nextln: movdqu 160(%rsp), %xmm12
; nextln: movdqu 176(%rsp), %xmm13
; nextln: movdqu 192(%rsp), %xmm14
; nextln: movdqu 208(%rsp), %xmm15
; nextln: addq $$224, %rsp
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret

11
tests/misc_testsuite/simd/spillslot-size-fuzzbug.wast

@ -0,0 +1,11 @@
(module
(func (export "test") (result f32 f32)
i32.const 0
f32.convert_i32_s
v128.const i32x4 0 0 0 0
data.drop 0
f32x4.extract_lane 0
data.drop 0)
(data ""))
(assert_return (invoke "test") (f32.const 0.0) (f32.const 0.0))
Loading…
Cancel
Save