[AArch64] Port min/max to ISLE (#4374)

2 years ago · d9e0e6a6a9
6 changed files with 511 additions and 28 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@ -925,6 +925,7 @@
 (rule (vector_size (multi_lane 8 16)) (VectorSize.Size8x16))
 (rule (vector_size (multi_lane 16 4)) (VectorSize.Size16x4))
 (rule (vector_size (multi_lane 16 8)) (VectorSize.Size16x8))
 (rule (vector_size (multi_lane 32 2)) (VectorSize.Size32x2))
 (rule (vector_size (multi_lane 32 4)) (VectorSize.Size32x4))
 (rule (vector_size (multi_lane 64 2)) (VectorSize.Size64x2))
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@ -3576,6 +3576,18 @@ fn test_aarch64_binemit() {
        "sshl v8.2d, v22.2d, v2.2d",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umin,
            rd: writable_vreg(0),
            rn: vreg(11),
            rm: vreg(2),
            size: VectorSize::Size8x8,
        },
        "606D222E",
        "umin v0.8b, v11.8b, v2.8b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umin,
@ -3588,6 +3600,18 @@ fn test_aarch64_binemit() {
        "umin v1.16b, v12.16b, v3.16b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umin,
            rd: writable_vreg(29),
            rn: vreg(19),
            rm: vreg(9),
            size: VectorSize::Size16x4,
        },
        "7D6E692E",
        "umin v29.4h, v19.4h, v9.4h",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umin,
@ -3600,6 +3624,18 @@ fn test_aarch64_binemit() {
        "umin v30.8h, v20.8h, v10.8h",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umin,
            rd: writable_vreg(7),
            rn: vreg(21),
            rm: vreg(20),
            size: VectorSize::Size32x2,
        },
        "A76EB42E",
        "umin v7.2s, v21.2s, v20.2s",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umin,
@ -3612,6 +3648,18 @@ fn test_aarch64_binemit() {
        "umin v8.4s, v22.4s, v21.4s",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smin,
            rd: writable_vreg(2),
            rn: vreg(13),
            rm: vreg(4),
            size: VectorSize::Size8x8,
        },
        "A26D240E",
        "smin v2.8b, v13.8b, v4.8b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smin,
@ -3624,6 +3672,18 @@ fn test_aarch64_binemit() {
        "smin v1.16b, v12.16b, v3.16b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smin,
            rd: writable_vreg(3),
            rn: vreg(2),
            rm: vreg(1),
            size: VectorSize::Size16x4,
        },
        "436C610E",
        "smin v3.4h, v2.4h, v1.4h",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smin,
@ -3636,6 +3696,18 @@ fn test_aarch64_binemit() {
        "smin v30.8h, v20.8h, v10.8h",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smin,
            rd: writable_vreg(9),
            rn: vreg(22),
            rm: vreg(20),
            size: VectorSize::Size32x2,
        },
        "C96EB40E",
        "smin v9.2s, v22.2s, v20.2s",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smin,
@ -3660,6 +3732,30 @@ fn test_aarch64_binemit() {
        "umax v6.8b, v9.8b, v8.8b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umax,
            rd: writable_vreg(5),
            rn: vreg(15),
            rm: vreg(8),
            size: VectorSize::Size8x16,
        },
        "E565286E",
        "umax v5.16b, v15.16b, v8.16b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umax,
            rd: writable_vreg(12),
            rn: vreg(14),
            rm: vreg(3),
            size: VectorSize::Size16x4,
        },
        "CC65632E",
        "umax v12.4h, v14.4h, v3.4h",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umax,
@ -3672,6 +3768,18 @@ fn test_aarch64_binemit() {
        "umax v11.8h, v13.8h, v2.8h",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umax,
            rd: writable_vreg(9),
            rn: vreg(13),
            rm: vreg(15),
            size: VectorSize::Size32x2,
        },
        "A965AF2E",
        "umax v9.2s, v13.2s, v15.2s",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umax,
@ -3684,6 +3792,18 @@ fn test_aarch64_binemit() {
        "umax v8.4s, v12.4s, v14.4s",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smax,
            rd: writable_vreg(7),
            rn: vreg(8),
            rm: vreg(9),
            size: VectorSize::Size8x8,
        },
        "0765290E",
        "smax v7.8b, v8.8b, v9.8b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smax,
@ -3696,6 +3816,18 @@ fn test_aarch64_binemit() {
        "smax v6.16b, v9.16b, v8.16b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smax,
            rd: writable_vreg(11),
            rn: vreg(12),
            rm: vreg(13),
            size: VectorSize::Size16x4,
        },
        "8B656D0E",
        "smax v11.4h, v12.4h, v13.4h",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smax,
@ -3708,6 +3840,18 @@ fn test_aarch64_binemit() {
        "smax v11.8h, v13.8h, v2.8h",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smax,
            rd: writable_vreg(14),
            rn: vreg(16),
            rm: vreg(18),
            size: VectorSize::Size32x2,
        },
        "0E66B20E",
        "smax v14.2s, v16.2s, v18.2s",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smax,
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@ -499,6 +499,20 @@
            (result Reg (msub $I64 div y64 x64)))
        result))
 ;;; Rules for integer min/max: umin, imin, umax, imax ;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule (lower (has_type ty @ (not_i64x2) (imin x y)))
      (vec_rrr (VecALUOp.Smin) x y (vector_size ty)))
 (rule (lower (has_type ty @ (not_i64x2) (umin x y)))
      (vec_rrr (VecALUOp.Umin) x y (vector_size ty)))
 (rule (lower (has_type ty @ (not_i64x2) (imax x y)))
      (vec_rrr (VecALUOp.Smax) x y (vector_size ty)))
 (rule (lower (has_type ty @ (not_i64x2) (umax x y)))
      (vec_rrr (VecALUOp.Umax) x y (vector_size ty)))
 ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; General rule for extending input to an output which fits in a single
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@ -1245,34 +1245,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ctx.emit(Inst::gen_move(dst.regs()[1], src_hi, I64));
        }
-        Opcode::Imax | Opcode::Umax | Opcode::Umin | Opcode::Imin => {
+        Opcode::Imax | Opcode::Umax | Opcode::Umin | Opcode::Imin => implemented_in_isle(ctx),
            let ty = ty.unwrap();
            if !ty.is_vector() || ty.lane_bits() == 64 {
                return Err(CodegenError::Unsupported(format!(
                    "{}: Unsupported type: {:?}",
                    op, ty
                )));
            }
            let alu_op = match op {
                Opcode::Umin => VecALUOp::Umin,
                Opcode::Imin => VecALUOp::Smin,
                Opcode::Umax => VecALUOp::Umax,
                Opcode::Imax => VecALUOp::Smax,
                _ => unreachable!(),
            };
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
            ctx.emit(Inst::VecRRR {
                alu_op,
                rd,
                rn,
                rm,
                size: VectorSize::from_ty(ty),
            });
        }
        Opcode::IaddPairwise => implemented_in_isle(ctx),
--- a/cranelift/filetests/filetests/isa/aarch64/simd-min-max.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-min-max.clif
@ -0,0 +1,244 @@
 test compile precise-output
 set unwind_info=false
 target aarch64
 function %fn0(i8x8, i8x8) -> i8x8 {
 block0(v0: i8x8, v1: i8x8):
  v2 = imin v0, v1
  return v2
 }
 ; block0:
 ;   smin v0.8b, v0.8b, v1.8b
 ;   ret
 function %fn1(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = imin v0, v1
  return v2
 }
 ; block0:
 ;   smin v0.16b, v0.16b, v1.16b
 ;   ret
 function %fn2(i16x4, i16x4) -> i16x4 {
 block0(v0: i16x4, v1: i16x4):
  v2 = imin v0, v1
  return v2
 }
 ; block0:
 ;   smin v0.4h, v0.4h, v1.4h
 ;   ret
 function %fn3(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = imin v0, v1
  return v2
 }
 ; block0:
 ;   smin v0.8h, v0.8h, v1.8h
 ;   ret
 function %fn4(i32x2, i32x2) -> i32x2 {
 block0(v0: i32x2, v1: i32x2):
  v2 = imin v0, v1
  return v2
 }
 ; block0:
 ;   smin v0.2s, v0.2s, v1.2s
 ;   ret
 function %fn5(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = imin v0, v1
  return v2
 }
 ; block0:
 ;   smin v0.4s, v0.4s, v1.4s
 ;   ret
 function %fn6(i8x8, i8x8) -> i8x8 {
 block0(v0: i8x8, v1: i8x8):
  v2 = umin v0, v1
  return v2
 }
 ; block0:
 ;   umin v0.8b, v0.8b, v1.8b
 ;   ret
 function %fn7(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = umin v0, v1
  return v2
 }
 ; block0:
 ;   umin v0.16b, v0.16b, v1.16b
 ;   ret
 function %fn8(i16x4, i16x4) -> i16x4 {
 block0(v0: i16x4, v1: i16x4):
  v2 = umin v0, v1
  return v2
 }
 ; block0:
 ;   umin v0.4h, v0.4h, v1.4h
 ;   ret
 function %fn9(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = umin v0, v1
  return v2
 }
 ; block0:
 ;   umin v0.8h, v0.8h, v1.8h
 ;   ret
 function %fn10(i32x2, i32x2) -> i32x2 {
 block0(v0: i32x2, v1: i32x2):
  v2 = umin v0, v1
  return v2
 }
 ; block0:
 ;   umin v0.2s, v0.2s, v1.2s
 ;   ret
 function %fn11(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = umin v0, v1
  return v2
 }
 ; block0:
 ;   umin v0.4s, v0.4s, v1.4s
 ;   ret
 function %fn12(i8x8, i8x8) -> i8x8 {
 block0(v0: i8x8, v1: i8x8):
  v2 = imax v0, v1
  return v2
 }
 ; block0:
 ;   smax v0.8b, v0.8b, v1.8b
 ;   ret
 function %fn13(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = imax v0, v1
  return v2
 }
 ; block0:
 ;   smax v0.16b, v0.16b, v1.16b
 ;   ret
 function %fn14(i16x4, i16x4) -> i16x4 {
 block0(v0: i16x4, v1: i16x4):
  v2 = imax v0, v1
  return v2
 }
 ; block0:
 ;   smax v0.4h, v0.4h, v1.4h
 ;   ret
 function %fn15(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = imax v0, v1
  return v2
 }
 ; block0:
 ;   smax v0.8h, v0.8h, v1.8h
 ;   ret
 function %fn16(i32x2, i32x2) -> i32x2 {
 block0(v0: i32x2, v1: i32x2):
  v2 = imax v0, v1
  return v2
 }
 ; block0:
 ;   smax v0.2s, v0.2s, v1.2s
 ;   ret
 function %fn17(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = imax v0, v1
  return v2
 }
 ; block0:
 ;   smax v0.4s, v0.4s, v1.4s
 ;   ret
 function %fn18(i8x8, i8x8) -> i8x8 {
 block0(v0: i8x8, v1: i8x8):
  v2 = umax v0, v1
  return v2
 }
 ; block0:
 ;   umax v0.8b, v0.8b, v1.8b
 ;   ret
 function %fn19(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = umax v0, v1
  return v2
 }
 ; block0:
 ;   umax v0.16b, v0.16b, v1.16b
 ;   ret
 function %fn20(i16x4, i16x4) -> i16x4 {
 block0(v0: i16x4, v1: i16x4):
  v2 = umax v0, v1
  return v2
 }
 ; block0:
 ;   umax v0.4h, v0.4h, v1.4h
 ;   ret
 function %fn21(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = umax v0, v1
  return v2
 }
 ; block0:
 ;   umax v0.8h, v0.8h, v1.8h
 ;   ret
 function %fn22(i32x2, i32x2) -> i32x2 {
 block0(v0: i32x2, v1: i32x2):
  v2 = umax v0, v1
  return v2
 }
 ; block0:
 ;   umax v0.2s, v0.2s, v1.2s
 ;   ret
 function %fn23(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = umax v0, v1
  return v2
 }
 ; block0:
 ;   umax v0.4s, v0.4s, v1.4s
 ;   ret
--- a/cranelift/filetests/filetests/runtests/simd-min-max.clif
+++ b/cranelift/filetests/filetests/runtests/simd-min-max.clif
@ -0,0 +1,107 @@
 test run
 target aarch64
 target x86_64
 function %imin_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = imin v0, v1
  return v2
 }
 ; run: %imin_i8x16([0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f], [0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f ]) == [ 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f ]
 ; run: %imin_i8x16([0x90 0x01 0x92 0x03 0x94 0x05 0x96 0x07 0x98 0x09 0x9a 0x0b 0x9c 0x0d 0x9e 0x0f], [0x10 0x91 0x12 0x93 0x14 0x95 0x16 0x97 0x18 0x99 0x1a 0x9b 0x1c 0x9d 0x1e 0x9f ]) == [ 0x90 0x91 0x92 0x93 0x94 0x95 0x96 0x97 0x98 0x99 0x9a 0x9b 0x9c 0x9d 0x9e 0x9f ]
 function %imin_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = imin v0, v1
  return v2
 }
 ; run: %imin_i16x8([0x1234 0x5678 0x9876 0x5432 0x7654 0x1234 0x4567 0x3456 ], [ 0x4567 0x1234 0x6789 0x0987 0x0123 0x3210 0x7890 0x3456 ]) == [ 0x1234 0x1234 0x9876 0x0987 0x0123 0x1234 0x4567 0x3456 ]
 function %imin_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = imin v0, v1
  return v2
 }
 ; run: %imin_i32x4([0xBAADF00D 0xDEADBEEF 0xC00FFFEE 0xBADAB00F], [0xCA11ACAB 0x12349876 0x98763210 0x43216789]) == [ 0xBAADF00D 0xDEADBEEF 0x98763210 0xBADAB00F ]
 function %umin_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = umin v0, v1
  return v2
 }
 ; run: %umin_i8x16([0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f], [0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f ]) == [ 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f ]
 ; run: %umin_i8x16([0x90 0x01 0x92 0x03 0x94 0x05 0x96 0x07 0x98 0x09 0x9a 0x0b 0x9c 0x0d 0x9e 0x0f], [0x10 0x91 0x12 0x93 0x14 0x95 0x16 0x97 0x18 0x99 0x1a 0x9b 0x1c 0x9d 0x1e 0x9f ]) == [ 0x10 0x01 0x12 0x03 0x14 0x05 0x16 0x07 0x18 0x09 0x1a 0x0b 0x1c 0x0d 0x1e 0x0f ]
 function %umin_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = umin v0, v1
  return v2
 }
 ; run: %umin_i16x8([0x1234 0x5678 0x9876 0x5432 0x7654 0x1234 0x4567 0x3456 ], [ 0x4567 0x1234 0x6789 0x0987 0x0123 0x3210 0x7890 0x3456 ]) == [ 0x1234 0x1234 0x6789 0x0987 0x0123 0x1234 0x4567 0x3456 ]
 function %umin_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = umin v0, v1
  return v2
 }
 ; run: %umin_i32x4([0xBAADF00D 0xDEADBEEF 0xC00FFFEE 0xBADAB00F], [0xCA11ACAB 0x12349876 0x98763210 0x43216789]) == [ 0xBAADF00D 0x12349876 0x98763210 0x43216789 ]
 function %imax_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = imax v0, v1
  return v2
 }
 ; run: %imax_i8x16([0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f], [0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f ]) == [ 0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f ]
 ; run: %imax_i8x16([0x90 0x01 0x92 0x03 0x94 0x05 0x96 0x07 0x98 0x09 0x9a 0x0b 0x9c 0x0d 0x9e 0x0f], [0x10 0x91 0x12 0x93 0x14 0x95 0x16 0x97 0x18 0x99 0x1a 0x9b 0x1c 0x9d 0x1e 0x9f ]) == [ 0x10 0x01 0x12 0x03 0x14 0x05 0x16 0x07 0x18 0x09 0x1a 0x0b 0x1c 0x0d 0x1e 0x0f ]
 function %imax_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = imax v0, v1
  return v2
 }
 ; run: %imax_i16x8([0x1234 0x5678 0x9876 0x5432 0x7654 0x1234 0x4567 0x3456 ], [ 0x4567 0x1234 0x6789 0x0987 0x0123 0x3210 0x7890 0x3456 ]) == [ 0x4567 0x5678 0x6789 0x5432 0x7654 0x3210 0x7890 0x3456 ]
 function %imax_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = imax v0, v1
  return v2
 }
 ; run: %imax_i32x4([0xBAADF00D 0xDEADBEEF 0xC00FFFEE 0xBADAB00F], [0xCA11ACAB 0x12349876 0x98763210 0x43216789]) == [ 0xCA11ACAB 0x12349876 0xC00FFFEE 0x43216789 ]
 function %umax_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = umax v0, v1
  return v2
 }
 ; run: %umax_i8x16([0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f], [0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f ]) == [ 0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f ]
 ; run: %umax_i8x16([0x90 0x01 0x92 0x03 0x94 0x05 0x96 0x07 0x98 0x09 0x9a 0x0b 0x9c 0x0d 0x9e 0x0f], [0x10 0x91 0x12 0x93 0x14 0x95 0x16 0x97 0x18 0x99 0x1a 0x9b 0x1c 0x9d 0x1e 0x9f ]) == [ 0x90 0x91 0x92 0x93 0x94 0x95 0x96 0x97 0x98 0x99 0x9a 0x9b 0x9c 0x9d 0x9e 0x9f ]
 function %umax_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = umax v0, v1
  return v2
 }
 ; run: %umax_i16x8([0x1234 0x5678 0x9876 0x5432 0x7654 0x1234 0x4567 0x3456 ], [ 0x4567 0x1234 0x6789 0x0987 0x0123 0x3210 0x7890 0x3456 ]) == [ 0x4567 0x5678 0x9876 0x5432 0x7654 0x3210 0x7890 0x3456 ]
 function %umax_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = umax v0, v1
  return v2
 }
 ; run: %umax_i32x4([0xBAADF00D 0xDEADBEEF 0xC00FFFEE 0xBADAB00F], [0xCA11ACAB 0x12349876 0x98763210 0x43216789]) == [ 0xCA11ACAB 0xDEADBEEF 0xC00FFFEE 0xBADAB00F ]