diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
index d0378a4aa1..6c1bf65ab0 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -715,7 +715,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
 
             // If the operation forbids source/destination overlap we need to
             // ensure that the source and destination registers are different.
-            if op.forbids_src_dst_overlaps() {
+            if op.forbids_overlaps(mask) {
                 collector.reg_late_use(vs2);
                 collector.reg_use(vd_src);
                 collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
@@ -745,7 +745,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             // If the operation forbids source/destination overlap, then we must
             // register it as an early_def. This encodes the constraint that
             // these must not overlap.
-            if op.forbids_src_dst_overlaps() {
+            if op.forbids_overlaps(mask) {
                 collector.reg_early_def(vd);
             } else {
                 collector.reg_def(vd);
@@ -768,7 +768,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             // If the operation forbids source/destination overlap, then we must
             // register it as an early_def. This encodes the constraint that
             // these must not overlap.
-            if op.forbids_src_dst_overlaps() {
+            if op.forbids_overlaps(mask) {
                 collector.reg_early_def(vd);
             } else {
                 collector.reg_def(vd);
@@ -791,7 +791,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             // If the operation forbids source/destination overlap, then we must
             // register it as an early_def. This encodes the constraint that
             // these must not overlap.
-            if op.forbids_src_dst_overlaps() {
+            if op.forbids_overlaps(mask) {
                 collector.reg_early_def(vd);
             } else {
                 collector.reg_def(vd);
@@ -799,8 +799,11 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
 
             vec_mask_operands(mask, collector);
         }
-        &Inst::VecAluRImm5 { vd, ref mask, .. } => {
+        &Inst::VecAluRImm5 {
+            op, vd, ref mask, ..
+        } => {
             debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert!(!op.forbids_overlaps(mask));
 
             collector.reg_def(vd);
             vec_mask_operands(mask, collector);
diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
index 78e7a2f2d3..4b75035fe3 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
@@ -236,6 +236,13 @@ impl VecOpCategory {
 }
 
 impl VecOpMasking {
+    pub fn is_enabled(&self) -> bool {
+        match self {
+            VecOpMasking::Enabled { .. } => true,
+            VecOpMasking::Disabled => false,
+        }
+    }
+
     pub fn encode(&self) -> u32 {
         match self {
             VecOpMasking::Enabled { .. } => 0,
@@ -300,6 +307,12 @@ impl VecAluOpRRRR {
     }
 }
 
+impl VecInstOverlapInfo for VecAluOpRRRR {
+    fn forbids_src_dst_overlaps(&self) -> bool {
+        false
+    }
+}
+
 impl fmt::Display for VecAluOpRRRR {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         let mut s = format!("{self:?}");
@@ -336,9 +349,10 @@ impl VecAluOpRRRImm5 {
             VecAluOpRRRImm5::VslideupVI => true,
         }
     }
+}
 
-    /// Some instructions do not allow the source and destination registers to overlap.
-    pub fn forbids_src_dst_overlaps(&self) -> bool {
+impl VecInstOverlapInfo for VecAluOpRRRImm5 {
+    fn forbids_src_dst_overlaps(&self) -> bool {
         match self {
             VecAluOpRRRImm5::VslideupVI => true,
         }
@@ -569,9 +583,10 @@ impl VecAluOpRRR {
             _ => unreachable!(),
         }
     }
+}
 
-    /// Some instructions do not allow the source and destination registers to overlap.
-    pub fn forbids_src_dst_overlaps(&self) -> bool {
+impl VecInstOverlapInfo for VecAluOpRRR {
+    fn forbids_src_dst_overlaps(&self) -> bool {
         match self {
             VecAluOpRRR::VrgatherVV
             | VecAluOpRRR::VrgatherVX
@@ -595,6 +610,37 @@ impl VecAluOpRRR {
             _ => false,
         }
     }
+
+    // Only mask writing operations, and reduction operations (`vred*`) allow mask / dst overlaps.
+    fn forbids_mask_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRR::VredmaxuVS
+            | VecAluOpRRR::VredminuVS
+            | VecAluOpRRR::VmandMM
+            | VecAluOpRRR::VmorMM
+            | VecAluOpRRR::VmnandMM
+            | VecAluOpRRR::VmnorMM
+            | VecAluOpRRR::VmseqVX
+            | VecAluOpRRR::VmsneVX
+            | VecAluOpRRR::VmsltuVX
+            | VecAluOpRRR::VmsltVX
+            | VecAluOpRRR::VmsleuVX
+            | VecAluOpRRR::VmsleVX
+            | VecAluOpRRR::VmsgtuVX
+            | VecAluOpRRR::VmsgtVX
+            | VecAluOpRRR::VmfeqVV
+            | VecAluOpRRR::VmfneVV
+            | VecAluOpRRR::VmfltVV
+            | VecAluOpRRR::VmfleVV
+            | VecAluOpRRR::VmfeqVF
+            | VecAluOpRRR::VmfneVF
+            | VecAluOpRRR::VmfltVF
+            | VecAluOpRRR::VmfleVF
+            | VecAluOpRRR::VmfgtVF
+            | VecAluOpRRR::VmfgeVF => false,
+            _ => true,
+        }
+    }
 }
 
 impl fmt::Display for VecAluOpRRR {
@@ -704,14 +750,28 @@ impl VecAluOpRRImm5 {
             | VecAluOpRRImm5::VmsgtVI => false,
         }
     }
+}
 
-    /// Some instructions do not allow the source and destination registers to overlap.
-    pub fn forbids_src_dst_overlaps(&self) -> bool {
+impl VecInstOverlapInfo for VecAluOpRRImm5 {
+    fn forbids_src_dst_overlaps(&self) -> bool {
         match self {
             VecAluOpRRImm5::VrgatherVI => true,
             _ => false,
         }
     }
+
+    // Only mask writing operations, and reduction operations (`vred*`) allow mask / dst overlaps.
+    fn forbids_mask_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRImm5::VmseqVI
+            | VecAluOpRRImm5::VmsneVI
+            | VecAluOpRRImm5::VmsleuVI
+            | VecAluOpRRImm5::VmsleVI
+            | VecAluOpRRImm5::VmsgtuVI
+            | VecAluOpRRImm5::VmsgtVI => false,
+            _ => true,
+        }
+    }
 }
 
 impl fmt::Display for VecAluOpRRImm5 {
@@ -908,9 +968,10 @@ impl VecAluOpRR {
             VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
         }
     }
+}
 
-    /// Some instructions do not allow the source and destination registers to overlap.
-    pub fn forbids_src_dst_overlaps(&self) -> bool {
+impl VecInstOverlapInfo for VecAluOpRR {
+    fn forbids_src_dst_overlaps(&self) -> bool {
         match self {
             VecAluOpRR::VzextVF2
             | VecAluOpRR::VzextVF4
@@ -986,6 +1047,14 @@ impl VecAluOpRImm5 {
     }
 }
 
+impl VecInstOverlapInfo for VecAluOpRImm5 {
+    fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRImm5::VmvVI => false,
+        }
+    }
+}
+
 impl fmt::Display for VecAluOpRImm5 {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         f.write_str(match self {
@@ -1057,3 +1126,43 @@ impl VecAMode {
         }
     }
 }
+
+pub trait VecInstOverlapInfo {
+    /// § 5.2 Vector Operands states:
+    ///
+    /// A destination vector register group can overlap a source vector register group
+    /// only if one of the following holds:
+    ///
+    ///  * The destination EEW equals the source EEW.
+    ///
+    ///  * The destination EEW is smaller than the source EEW and the overlap is
+    ///  in the lowest-numbered part of the source register group (e.g., when LMUL=1,
+    ///  vnsrl.wi v0, v0, 3 is legal, but a destination of v1 is not).
+    ///
+    ///  * The destination EEW is greater than the source EEW, the source EMUL is at
+    ///  least 1, and the overlap is in the highest-numbered part of the destination register
+    ///  group (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or v4 is not).
+    ///
+    /// For the purpose of determining register group overlap constraints, mask elements have EEW=1.
+    fn forbids_src_dst_overlaps(&self) -> bool;
+
+    /// § 5.3 Vector Masking states:
+    ///
+    /// > The destination vector register group for a masked vector instruction
+    /// > cannot overlap the source mask register (v0), unless the destination
+    /// > vector register is being written with a mask value (e.g., compares) or
+    /// > the scalar result of a reduction. These instruction encodings are reserved.
+    ///
+    /// In almost all instructions we should not allow the mask to be re-used as
+    /// a destination register.
+    fn forbids_mask_dst_overlaps(&self) -> bool {
+        true
+    }
+
+    /// There are two broad categories of overlaps (see above). But we can't represent such
+    /// fine grained overlaps to regalloc. So if any of the two come into play we forbid
+    /// all source and destination overlaps (including masks).
+    fn forbids_overlaps(&self, mask: &VecOpMasking) -> bool {
+        self.forbids_src_dst_overlaps() || (mask.is_enabled() && self.forbids_mask_dst_overlaps())
+    }
+}
diff --git a/cranelift/filetests/filetests/isa/riscv64/issue-6954.clif b/cranelift/filetests/filetests/isa/riscv64/issue-6954.clif
new file mode 100644
index 0000000000..4553bfca2d
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/issue-6954.clif
@@ -0,0 +1,763 @@
+test compile precise-output
+target riscv64gc has_v has_c has_zbkb has_zba has_zbb has_zbc has_zbs
+
+
+function %a(i16 sext, f32, f64x2, i32 sext, i8 sext, i64x2, i8, f32x4, i16x8, i8 sext, i8 sext) -> f64x2, i16x8, i8, f64x2, i16x8, i16x8, i16x8, i16x8 {
+    ss0 = explicit_slot 126
+    ss1 = explicit_slot 126
+    ss2 = explicit_slot 126
+
+block0(v0: i16, v1: f32, v2: f64x2, v3: i32, v4: i8, v5: i64x2, v6: i8, v7: f32x4, v8: i16x8, v9: i8, v10: i8):
+    v11 = iconst.i8 0
+    v12 = iconst.i16 0
+    v13 = iconst.i32 0
+    v14 = iconst.i64 0
+    v15 = uextend.i128 v14
+    stack_store v15, ss0
+    stack_store v15, ss0+16
+    stack_store v15, ss0+32
+    stack_store v15, ss0+48
+    stack_store v15, ss0+64
+    stack_store v15, ss0+80
+    stack_store v15, ss0+96
+    stack_store v14, ss0+112
+    stack_store v13, ss0+120
+    stack_store v12, ss0+124
+    stack_store v15, ss1
+    stack_store v15, ss1+16
+    stack_store v15, ss1+32
+    stack_store v15, ss1+48
+    stack_store v15, ss1+64
+    stack_store v15, ss1+80
+    stack_store v15, ss1+96
+    stack_store v14, ss1+112
+    stack_store v13, ss1+120
+    stack_store v12, ss1+124
+    stack_store v15, ss2
+    stack_store v15, ss2+16
+    stack_store v15, ss2+32
+    stack_store v15, ss2+48
+    stack_store v15, ss2+64
+    stack_store v15, ss2+80
+    stack_store v15, ss2+96
+    stack_store v14, ss2+112
+    stack_store v13, ss2+120
+    stack_store v12, ss2+124
+    v16 = select v3, v8, v8
+    v17 = select v3, v16, v16
+    v18 = select v3, v17, v17
+    v77 = sqrt v2
+    v78 = fcmp ne v77, v77
+    v79 = f64const +NaN
+    v80 = splat.f64x2 v79
+    v81 = bitcast.f64x2 v78
+    v19 = bitselect v81, v80, v77
+    v82 = sqrt v19
+    v83 = fcmp ne v82, v82
+    v84 = f64const +NaN
+    v85 = splat.f64x2 v84
+    v86 = bitcast.f64x2 v83
+    v20 = bitselect v86, v85, v82
+    v21 = select v3, v18, v18
+    v22 = umin v0, v0
+    v23 = select v3, v21, v21
+    v24 = select v3, v23, v23
+    v25 = select v3, v24, v24
+    v26 = select v3, v25, v25
+    v27 = select v3, v26, v26
+    v28 = select v3, v27, v27
+    v29 = select v3, v28, v28
+    v30 = iadd v3, v3
+    v31 = select v30, v29, v29
+    v32 = umin v22, v22
+    v33 = select v30, v31, v31
+    v34 = select v30, v33, v33
+    v35 = select v30, v34, v34
+    v36 = select v30, v35, v35
+    v37 = smax v5, v5
+    v38 = ishl v32, v32
+    v39 = select v30, v36, v36
+    v40 = stack_addr.i64 ss0+3
+    v41 = iadd_imm v40, 0
+    v42 = atomic_rmw.i8 and v41, v10
+    v43 = select v30, v39, v39
+    v44 = select v30, v43, v43
+    v45 = select v30, v44, v44
+    v46 = isub v38, v38
+    v47 = select v30, v45, v45
+    v48 = select v30, v47, v47
+    v49 = select v30, v48, v48
+    v50 = select v30, v49, v49
+    stack_store v37, ss0+33
+    v51 = select v30, v50, v50
+    v52 = select v30, v51, v51
+    v53 = select v30, v52, v52
+    v54 = select v30, v53, v53
+    v55 = select v30, v54, v54
+    v56 = select v30, v55, v55
+    v57 = select v30, v56, v56
+    v58 = select v30, v57, v57
+    v59 = select v30, v58, v58
+    v60 = select v30, v59, v59
+    v61 = select v30, v60, v60
+    v62 = select v30, v61, v61
+    v63 = select v30, v62, v62
+    v64 = select v30, v63, v63
+    v65 = select v30, v64, v64
+    v66 = select v30, v65, v65
+    v67 = select v30, v66, v66
+    v68 = select v30, v67, v67
+    v69 = select v30, v68, v68
+    v70 = select v30, v69, v69
+    v71 = select v30, v70, v70
+    v72 = select v30, v71, v71
+    v73 = select v30, v72, v72
+    v74 = select v30, v73, v73
+    v75 = select v30, v74, v74
+    v76 = select v30, v75, v75
+    return v20, v76, v42, v20, v76, v76, v76, v76
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   add sp,-384
+; block0:
+;   vle8.v v3,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v7,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v10,48(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v12,64(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   li a0,0
+;   li a2,0
+;   li a3,0
+;   li a4,0
+;   load_addr a7,0(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr a7,16(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr a7,32(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr a7,48(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr t3,64(nominal_sp)
+;   sd a3,0(t3)
+;   sd a4,8(t3)
+;   load_addr t0,80(nominal_sp)
+;   sd a3,0(t0)
+;   sd a4,8(t0)
+;   load_addr t2,96(nominal_sp)
+;   sd a3,0(t2)
+;   sd a4,8(t2)
+;   load_addr a7,112(nominal_sp)
+;   sd a3,0(a7)
+;   load_addr a7,120(nominal_sp)
+;   sw a2,0(a7)
+;   load_addr a7,124(nominal_sp)
+;   sh a0,0(a7)
+;   load_addr a7,128(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr a7,144(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr t3,160(nominal_sp)
+;   sd a3,0(t3)
+;   sd a4,8(t3)
+;   load_addr t0,176(nominal_sp)
+;   sd a3,0(t0)
+;   sd a4,8(t0)
+;   load_addr t2,192(nominal_sp)
+;   sd a3,0(t2)
+;   sd a4,8(t2)
+;   load_addr a7,208(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr a7,224(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr a7,240(nominal_sp)
+;   sd a3,0(a7)
+;   load_addr a7,248(nominal_sp)
+;   sw a2,0(a7)
+;   load_addr a7,252(nominal_sp)
+;   sh a0,0(a7)
+;   load_addr t3,256(nominal_sp)
+;   sd a3,0(t3)
+;   sd a4,8(t3)
+;   load_addr t0,272(nominal_sp)
+;   sd a3,0(t0)
+;   sd a4,8(t0)
+;   load_addr t2,288(nominal_sp)
+;   sd a3,0(t2)
+;   sd a4,8(t2)
+;   load_addr a7,304(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr a7,320(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr a7,336(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr a7,352(nominal_sp)
+;   sd a3,0(a7)
+;   sd a4,8(a7)
+;   load_addr t4,368(nominal_sp)
+;   sd a3,0(t4)
+;   load_addr t0,376(nominal_sp)
+;   sw a2,0(t0)
+;   load_addr t1,380(nominal_sp)
+;   sh a0,0(t1)
+;   zext.w t1,a1
+;   select_i16x8 v0,v12,v12##condition=t1
+;   zext.w t1,a1
+;   select_i16x8 v2,v0,v0##condition=t1
+;   zext.w t1,a1
+;   select_i16x8 v1,v2,v2##condition=t1
+;   vfsqrt.v v31,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld t1,[const(0)]
+;   fmv.d.x ft4,t1
+;   vfmv.v.f v3,ft4 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfne.vv v0,v31,v31 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v2,v31,v3,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfsqrt.v v31,v2 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld t1,[const(0)]
+;   fmv.d.x ft4,t1
+;   vfmv.v.f v2,ft4 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfne.vv v0,v31,v31 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v4,v31,v2,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   zext.w t1,a1
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,a1
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,a1
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,a1
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,a1
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,a1
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,a1
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,a1
+;   select_i16x8 v1,v2,v2##condition=t1
+;   add t2,a1,a1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   vmax.vv v31,v7,v7 #avl=2, #vtype=(e64, m1, ta, ma)
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   load_addr a0,3(nominal_sp)
+;   addi a0,a0,0
+;   andi t0,a0,3
+;   slli a1,t0,3
+;   andi a2,a0,-4
+;   atomic_rmw.i8 and a0,a5,(a2)##t0=a3 offset=a1
+;   zext.w t0,t2
+;   select_i16x8 v2,v1,v1##condition=t0
+;   zext.w t0,t2
+;   select_i16x8 v1,v2,v2##condition=t0
+;   zext.w t0,t2
+;   select_i16x8 v2,v1,v1##condition=t0
+;   zext.w t0,t2
+;   select_i16x8 v1,v2,v2##condition=t0
+;   zext.w t0,t2
+;   select_i16x8 v2,v1,v1##condition=t0
+;   zext.w t0,t2
+;   select_i16x8 v3,v2,v2##condition=t0
+;   zext.w t0,t2
+;   select_i16x8 v1,v3,v3##condition=t0
+;   load_addr t1,33(nominal_sp)
+;   vse64.v v31,0(t1) #avl=2, #vtype=(e64, m1, ta, ma)
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v2,v1,v1##condition=t1
+;   zext.w t1,t2
+;   select_i16x8 v1,v2,v2##condition=t1
+;   vse8.v v4,0(a6) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vse8.v v1,16(a6) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vse8.v v4,32(a6) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vse8.v v1,48(a6) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vse8.v v1,64(a6) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vse8.v v1,80(a6) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vse8.v v1,96(a6) #avl=16, #vtype=(e8, m1, ta, ma)
+;   add sp,+384
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   mv s0, sp
+;   addi sp, sp, -0x180
+; block1: ; offset 0x14
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x83, 0x0f, 0x02
+;   addi t6, s0, 0x30
+;   .byte 0x07, 0x85, 0x0f, 0x02
+;   addi t6, s0, 0x40
+;   .byte 0x07, 0x86, 0x0f, 0x02
+;   mv a0, zero
+;   mv a2, zero
+;   mv a3, zero
+;   mv a4, zero
+;   mv a7, sp
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi a7, sp, 0x10
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi a7, sp, 0x20
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi a7, sp, 0x30
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi t3, sp, 0x40
+;   sd a3, 0(t3)
+;   sd a4, 8(t3)
+;   addi t0, sp, 0x50
+;   sd a3, 0(t0)
+;   sd a4, 8(t0)
+;   addi t2, sp, 0x60
+;   sd a3, 0(t2)
+;   sd a4, 8(t2)
+;   addi a7, sp, 0x70
+;   sd a3, 0(a7)
+;   addi a7, sp, 0x78
+;   sw a2, 0(a7)
+;   addi a7, sp, 0x7c
+;   sh a0, 0(a7)
+;   addi a7, sp, 0x80
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi a7, sp, 0x90
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi t3, sp, 0xa0
+;   sd a3, 0(t3)
+;   sd a4, 8(t3)
+;   addi t0, sp, 0xb0
+;   sd a3, 0(t0)
+;   sd a4, 8(t0)
+;   addi t2, sp, 0xc0
+;   sd a3, 0(t2)
+;   sd a4, 8(t2)
+;   addi a7, sp, 0xd0
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi a7, sp, 0xe0
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi a7, sp, 0xf0
+;   sd a3, 0(a7)
+;   addi a7, sp, 0xf8
+;   sw a2, 0(a7)
+;   addi a7, sp, 0xfc
+;   sh a0, 0(a7)
+;   addi t3, sp, 0x100
+;   sd a3, 0(t3)
+;   sd a4, 8(t3)
+;   addi t0, sp, 0x110
+;   sd a3, 0(t0)
+;   sd a4, 8(t0)
+;   addi t2, sp, 0x120
+;   sd a3, 0(t2)
+;   sd a4, 8(t2)
+;   addi a7, sp, 0x130
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi a7, sp, 0x140
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi a7, sp, 0x150
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi a7, sp, 0x160
+;   sd a3, 0(a7)
+;   sd a4, 8(a7)
+;   addi t4, sp, 0x170
+;   sd a3, 0(t4)
+;   addi t0, sp, 0x178
+;   sw a2, 0(t0)
+;   addi t1, sp, 0x17c
+;   sh a0, 0(t1)
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x30, 0xc0, 0x9e
+;   j 8
+;   .byte 0x57, 0x30, 0xc0, 0x9e
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x00, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x00, 0x9e
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x1f, 0x30, 0x4e
+;   auipc t1, 0
+;   ld t1, 0x488(t1)
+;   fmv.d.x ft4, t1
+;   .byte 0xd7, 0x51, 0x02, 0x5e
+;   .byte 0x57, 0x90, 0xff, 0x73
+;   .byte 0x57, 0x81, 0xf1, 0x5d
+;   .byte 0xd7, 0x1f, 0x20, 0x4e
+;   auipc t1, 0
+;   ld t1, 0x46c(t1)
+;   fmv.d.x ft4, t1
+;   .byte 0x57, 0x51, 0x02, 0x5e
+;   .byte 0x57, 0x90, 0xff, 0x73
+;   .byte 0x57, 0x02, 0xf1, 0x5d
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x05, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   add t2, a1, a1
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0xd7, 0x8f, 0x73, 0x1e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   addi a0, sp, 3
+;   mv a0, a0
+;   andi t0, a0, 3
+;   slli a1, t0, 3
+;   andi a2, a0, -4
+;   lr.w.aqrl a0, (a2)
+;   srl a0, a0, a1
+;   andi a0, a0, 0xff
+;   and a3, a0, a5
+;   lr.w.aqrl t5, (a2)
+;   addi t6, zero, 0xff
+;   sll t6, t6, a1
+;   not t6, t6
+;   and t5, t5, t6
+;   andi t6, a3, 0xff
+;   sll t6, t6, a1
+;   or t5, t5, t6
+;   sc.w.aqrl a3, t5, (a2)
+;   bnez a3, -0x34
+;   .byte 0xbb, 0x82, 0x03, 0x08
+;   beqz t0, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0xbb, 0x82, 0x03, 0x08
+;   beqz t0, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0xbb, 0x82, 0x03, 0x08
+;   beqz t0, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0xbb, 0x82, 0x03, 0x08
+;   beqz t0, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0xbb, 0x82, 0x03, 0x08
+;   beqz t0, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0xbb, 0x82, 0x03, 0x08
+;   beqz t0, 0xc
+;   .byte 0xd7, 0x31, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x31, 0x20, 0x9e
+;   .byte 0xbb, 0x82, 0x03, 0x08
+;   beqz t0, 0xc
+;   .byte 0xd7, 0x30, 0x30, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x30, 0x9e
+;   addi t1, sp, 0x21
+;   .byte 0xa7, 0x7f, 0x03, 0x02
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   j 8
+;   .byte 0x57, 0x31, 0x10, 0x9e
+;   .byte 0x3b, 0x83, 0x03, 0x08
+;   beqz t1, 0xc
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   j 8
+;   .byte 0xd7, 0x30, 0x20, 0x9e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x08, 0x02
+;   addi t6, a6, 0x10
+;   .byte 0xa7, 0x80, 0x0f, 0x02
+;   addi t6, a6, 0x20
+;   .byte 0x27, 0x82, 0x0f, 0x02
+;   addi t6, a6, 0x30
+;   .byte 0xa7, 0x80, 0x0f, 0x02
+;   addi t6, a6, 0x40
+;   .byte 0xa7, 0x80, 0x0f, 0x02
+;   addi t6, a6, 0x50
+;   .byte 0xa7, 0x80, 0x0f, 0x02
+;   addi t6, a6, 0x60
+;   .byte 0xa7, 0x80, 0x0f, 0x02
+;   addi sp, sp, 0x180
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0xf8, 0x7f
+
diff --git a/cranelift/filetests/filetests/runtests/issue-6954.clif b/cranelift/filetests/filetests/runtests/issue-6954.clif
new file mode 100644
index 0000000000..f0fe43cf0f
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/issue-6954.clif
@@ -0,0 +1,124 @@
+test interpret
+test run
+target riscv64gc has_v has_c has_zbkb has_zba has_zbb has_zbc has_zbs
+target aarch64
+target s390x
+target x86_64
+
+function %a(i16 sext, f32, f64x2, i32 sext, i8 sext, i64x2, i8, f32x4, i16x8, i8 sext, i8 sext) -> f64x2, i16x8, i8, f64x2, i16x8, i16x8, i16x8, i16x8 {
+    ss0 = explicit_slot 126
+    ss1 = explicit_slot 126
+    ss2 = explicit_slot 126
+
+block0(v0: i16, v1: f32, v2: f64x2, v3: i32, v4: i8, v5: i64x2, v6: i8, v7: f32x4, v8: i16x8, v9: i8, v10: i8):
+    v11 = iconst.i8 0
+    v12 = iconst.i16 0
+    v13 = iconst.i32 0
+    v14 = iconst.i64 0
+    v15 = uextend.i128 v14  ; v14 = 0
+    stack_store v15, ss0
+    stack_store v15, ss0+16
+    stack_store v15, ss0+32
+    stack_store v15, ss0+48
+    stack_store v15, ss0+64
+    stack_store v15, ss0+80
+    stack_store v15, ss0+96
+    stack_store v14, ss0+112  ; v14 = 0
+    stack_store v13, ss0+120  ; v13 = 0
+    stack_store v12, ss0+124  ; v12 = 0
+    stack_store v15, ss1
+    stack_store v15, ss1+16
+    stack_store v15, ss1+32
+    stack_store v15, ss1+48
+    stack_store v15, ss1+64
+    stack_store v15, ss1+80
+    stack_store v15, ss1+96
+    stack_store v14, ss1+112  ; v14 = 0
+    stack_store v13, ss1+120  ; v13 = 0
+    stack_store v12, ss1+124  ; v12 = 0
+    stack_store v15, ss2
+    stack_store v15, ss2+16
+    stack_store v15, ss2+32
+    stack_store v15, ss2+48
+    stack_store v15, ss2+64
+    stack_store v15, ss2+80
+    stack_store v15, ss2+96
+    stack_store v14, ss2+112  ; v14 = 0
+    stack_store v13, ss2+120  ; v13 = 0
+    stack_store v12, ss2+124  ; v12 = 0
+    v16 = select v3, v8, v8
+    v17 = select v3, v16, v16
+    v18 = select v3, v17, v17
+    v77 = sqrt v2
+    v78 = fcmp ne v77, v77
+    v79 = f64const +NaN
+    v80 = splat.f64x2 v79  ; v79 = +NaN
+    v81 = bitcast.f64x2 v78
+    v19 = bitselect v81, v80, v77
+    v82 = sqrt v19
+    v83 = fcmp ne v82, v82
+    v84 = f64const +NaN
+    v85 = splat.f64x2 v84  ; v84 = +NaN
+    v86 = bitcast.f64x2 v83
+    v20 = bitselect v86, v85, v82
+    v21 = select v3, v18, v18
+    v22 = umin v0, v0
+    v23 = select v3, v21, v21
+    v24 = select v3, v23, v23
+    v25 = select v3, v24, v24
+    v26 = select v3, v25, v25
+    v27 = select v3, v26, v26
+    v28 = select v3, v27, v27
+    v29 = select v3, v28, v28
+    v30 = iadd v3, v3
+    v31 = select v30, v29, v29
+    v32 = umin v22, v22
+    v33 = select v30, v31, v31
+    v34 = select v30, v33, v33
+    v35 = select v30, v34, v34
+    v36 = select v30, v35, v35
+    v37 = smax v5, v5
+    v38 = ishl v32, v32
+    v39 = select v30, v36, v36
+    v40 = stack_addr.i64 ss0+3
+    v41 = iadd_imm v40, 0
+    v42 = atomic_rmw.i8 and v41, v10
+    v43 = select v30, v39, v39
+    v44 = select v30, v43, v43
+    v45 = select v30, v44, v44
+    v46 = isub v38, v38
+    v47 = select v30, v45, v45
+    v48 = select v30, v47, v47
+    v49 = select v30, v48, v48
+    v50 = select v30, v49, v49
+    stack_store v37, ss0+33
+    v51 = select v30, v50, v50
+    v52 = select v30, v51, v51
+    v53 = select v30, v52, v52
+    v54 = select v30, v53, v53
+    v55 = select v30, v54, v54
+    v56 = select v30, v55, v55
+    v57 = select v30, v56, v56
+    v58 = select v30, v57, v57
+    v59 = select v30, v58, v58
+    v60 = select v30, v59, v59
+    v61 = select v30, v60, v60
+    v62 = select v30, v61, v61
+    v63 = select v30, v62, v62
+    v64 = select v30, v63, v63
+    v65 = select v30, v64, v64
+    v66 = select v30, v65, v65
+    v67 = select v30, v66, v66
+    v68 = select v30, v67, v67
+    v69 = select v30, v68, v68
+    v70 = select v30, v69, v69
+    v71 = select v30, v70, v70
+    v72 = select v30, v71, v71
+    v73 = select v30, v72, v72
+    v74 = select v30, v73, v73
+    v75 = select v30, v74, v74
+    v76 = select v30, v75, v75
+    return v20, v76, v42, v20, v76, v76, v76, v76
+}
+
+; run: %a(-1, -NaN:0x3fffff, 0xffffff3fffffffffffffffffffffffff, -1, -1, 0xffffffffffffffffffffffffffc8ffff, -1, 0xffffffffffffffffffffffffffffffff, 0xffffffffffffffffffffffffffffffff, -1, -1) == [0x7ff80000000000007ff8000000000000, 0xffffffffffffffffffffffffffffffff, 0, 0x7ff80000000000007ff8000000000000, 0xffffffffffffffffffffffffffffffff, 0xffffffffffffffffffffffffffffffff, 0xffffffffffffffffffffffffffffffff, 0xffffffffffffffffffffffffffffffff]