diff --git a/cranelift/codegen/meta/src/cdsl/recipes.rs b/cranelift/codegen/meta/src/cdsl/recipes.rs index 9bb966f605..dfe4cd67a5 100644 --- a/cranelift/codegen/meta/src/cdsl/recipes.rs +++ b/cranelift/codegen/meta/src/cdsl/recipes.rs @@ -172,7 +172,7 @@ pub(crate) struct EncodingRecipeBuilder { pub base_size: u64, pub operands_in: Option>, pub operands_out: Option>, - compute_size: Option<&'static str>, + pub compute_size: Option<&'static str>, pub branch_range: Option, pub emit: Option, clobbers_flags: Option, diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 26b5334c26..4fe2232508 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -140,32 +140,59 @@ impl PerCpuModeEncodings { self.enc64.push(encoding); } + /// Adds I32/I64 encodings as appropriate for a typed instruction. + /// The REX prefix is always inferred at runtime. + /// /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX. /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. fn enc_i32_i64(&mut self, inst: impl Into, template: Template) { let inst: InstSpec = inst.into(); + + // I32 on x86: no REX prefix. + self.enc32(inst.bind(I32), template.infer_rex()); + + // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(I32), template.infer_rex()); + + // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(I64), template.infer_rex().w()); + } + + /// Adds I32/I64 encodings as appropriate for a typed instruction. + /// All variants of REX prefix are explicitly emitted, not inferred. + /// + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with and without REX. + fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into, template: Template) { + let inst: InstSpec = inst.into(); self.enc32(inst.bind(I32), template.nonrex()); - // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise - // reg-alloc would never use r8 and up. + // REX-less encoding must come after REX encoding so we don't use it by default. + // Otherwise reg-alloc would never use r8 and up. self.enc64(inst.bind(I32), template.rex()); self.enc64(inst.bind(I32), template.nonrex()); self.enc64(inst.bind(I64), template.rex().w()); } - /// Add encodings for `inst.b32` to X86_32. - /// Add encodings for `inst.b32` to X86_64 with and without REX. - /// Add encodings for `inst.b64` to X86_64 with a REX.W prefix. + /// Adds B32/B64 encodings as appropriate for a typed instruction. + /// The REX prefix is always inferred at runtime. + /// + /// Adds encoding for `inst.b32` to X86_32. + /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX. + /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix. fn enc_b32_b64(&mut self, inst: impl Into, template: Template) { let inst: InstSpec = inst.into(); - self.enc32(inst.bind(B32), template.nonrex()); - // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise - // reg-alloc would never use r8 and up. - self.enc64(inst.bind(B32), template.rex()); - self.enc64(inst.bind(B32), template.nonrex()); - self.enc64(inst.bind(B64), template.rex().w()); + // B32 on x86: no REX prefix. + self.enc32(inst.bind(B32), template.infer_rex()); + + // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(B32), template.infer_rex()); + + // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(B64), template.infer_rex().w()); } /// Add encodings for `inst.i32` to X86_32. @@ -994,8 +1021,8 @@ pub(crate) fn define( e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); } - e.enc_i32_i64(spill, rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_i32_i64(regspill, rec_regspill32.opcodes(&MOV_STORE)); + e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE)); e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE)); e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE)); @@ -1020,8 +1047,8 @@ pub(crate) fn define( e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE)); } - e.enc_i32_i64(fill, rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_i32_i64(regfill, rec_regfill32.opcodes(&MOV_LOAD)); + e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD)); e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD)); e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD)); diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 9451460482..521248082b 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -61,7 +61,7 @@ impl<'builder> RecipeGroup<'builder> { self.templates .iter() .find(|recipe| recipe.name() == name) - .unwrap_or_else(|| panic!("unknown tail recipe name: {}. Try recipe?", name)) + .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name)) } } @@ -132,6 +132,33 @@ fn replace_nonrex_constraints( .collect() } +/// Specifies how the REX prefix is emitted by a Recipe. +#[derive(Copy, Clone, PartialEq)] +pub enum RexRecipeKind { + /// The REX emission behavior is not hardcoded for the Recipe + /// and may be overridden when using the Template. + Unspecified, + + /// The Recipe must hardcode the non-emission of the REX prefix. + NeverEmitRex, + + /// The Recipe must hardcode the emission of the REX prefix. + AlwaysEmitRex, + + /// The Recipe should infer the emission of the REX.RXB bits from registers, + /// and the REX.W bit from the EncodingBits. + /// + /// Because such a Recipe has a non-constant instruction size, it must have + /// a special `compute_size` handler for the inferrable-REX case. + InferRex, +} + +impl Default for RexRecipeKind { + fn default() -> Self { + Self::Unspecified + } +} + /// Previously called a TailRecipe in the Python meta language, this allows to create multiple /// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different /// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating @@ -145,16 +172,17 @@ pub(crate) struct Template<'builder> { /// The recipe template, which is to be specialized (by copy). recipe: EncodingRecipeBuilder, - /// Does this recipe requires a REX prefix? - requires_prefix: bool, + /// How is the REX prefix emitted? + rex_kind: RexRecipeKind, + + /// Function for `compute_size()` when REX is inferrable. + inferred_rex_compute_size: Option<&'static str>, /// Other recipe to use when REX-prefixed. when_prefixed: Option>>, - // Specialized parameters. - /// Should we include the REX prefix? - rex: bool, - /// Value of the W bit (0 or 1). + // Parameters passed in the EncodingBits. + /// Value of the W bit (0 or 1), stored in the EncodingBits. w_bit: u16, /// Value of the RRR bits (between 0 and 0b111). rrr_bits: u16, @@ -167,9 +195,9 @@ impl<'builder> Template<'builder> { Self { regs, recipe, - requires_prefix: false, + rex_kind: RexRecipeKind::default(), + inferred_rex_compute_size: None, when_prefixed: None, - rex: false, w_bit: 0, rrr_bits: 0, op_bytes: &opcodes::EMPTY, @@ -179,9 +207,15 @@ impl<'builder> Template<'builder> { fn name(&self) -> &str { &self.recipe.name } - fn requires_prefix(self, value: bool) -> Self { + fn rex_kind(self, kind: RexRecipeKind) -> Self { Self { - requires_prefix: value, + rex_kind: kind, + ..self + } + } + fn inferred_rex_compute_size(self, function: &'static str) -> Self { + Self { + inferred_rex_compute_size: Some(function), ..self } } @@ -212,12 +246,19 @@ impl<'builder> Template<'builder> { copy } pub fn nonrex(&self) -> Self { - assert!(!self.requires_prefix, "Tail recipe requires REX prefix."); + assert!( + self.rex_kind != RexRecipeKind::AlwaysEmitRex, + "Template requires REX prefix." + ); let mut copy = self.clone(); - copy.rex = false; + copy.rex_kind = RexRecipeKind::NeverEmitRex; copy } pub fn rex(&self) -> Self { + assert!( + self.rex_kind != RexRecipeKind::NeverEmitRex, + "Template requires no REX prefix." + ); if let Some(prefixed) = &self.when_prefixed { let mut ret = prefixed.rex(); // Forward specialized parameters. @@ -227,36 +268,62 @@ impl<'builder> Template<'builder> { return ret; } let mut copy = self.clone(); - copy.rex = true; + copy.rex_kind = RexRecipeKind::AlwaysEmitRex; + copy + } + pub fn infer_rex(&self) -> Self { + assert!( + self.rex_kind != RexRecipeKind::NeverEmitRex, + "Template requires no REX prefix." + ); + assert!( + self.when_prefixed.is_none(), + "infer_rex used with when_prefixed()." + ); + let mut copy = self.clone(); + copy.rex_kind = RexRecipeKind::InferRex; copy } pub fn build(mut self) -> (EncodingRecipe, u16) { - let (name, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit); - - let (name, rex_prefix_size) = if self.rex { - ("Rex".to_string() + name, 1) - } else { - (name.into(), 0) + let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit); + + let (recipe_name, rex_prefix_size) = match self.rex_kind { + RexRecipeKind::Unspecified | RexRecipeKind::NeverEmitRex => { + // Ensure the operands are limited to non-REX constraints. + let operands_in = self.recipe.operands_in.unwrap_or_default(); + self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in)); + let operands_out = self.recipe.operands_out.unwrap_or_default(); + self.recipe.operands_out = + Some(replace_nonrex_constraints(self.regs, operands_out)); + + (opcode.into(), 0) + } + RexRecipeKind::AlwaysEmitRex => ("Rex".to_string() + opcode, 1), + RexRecipeKind::InferRex => { + // Hook up the right function for inferred compute_size(). + assert!( + self.inferred_rex_compute_size.is_some(), + "InferRex recipe '{}' needs an inferred_rex_compute_size function.", + &self.recipe.name + ); + self.recipe.compute_size = self.inferred_rex_compute_size; + + ("DynRex".to_string() + opcode, 0) + } }; let size_addendum = self.op_bytes.len() as u64 + rex_prefix_size; self.recipe.base_size += size_addendum; // Branch ranges are relative to the end of the instruction. + // For InferRex, the range should be the minimum, assuming no REX. if let Some(range) = self.recipe.branch_range.as_mut() { range.inst_size += size_addendum; } - self.recipe.emit = replace_put_op(self.recipe.emit, &name); - self.recipe.name = name + &self.recipe.name; - - if !self.rex { - let operands_in = self.recipe.operands_in.unwrap_or_default(); - self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in)); - let operands_out = self.recipe.operands_out.unwrap_or_default(); - self.recipe.operands_out = Some(replace_nonrex_constraints(self.regs, operands_out)); - } + self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name); + self.recipe.name = recipe_name + &self.recipe.name; (self.recipe.build(), bits) } @@ -438,29 +505,37 @@ pub(crate) fn define<'shared>( ); // XX /r - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rr", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rr", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), ); // XX /r with operands swapped. (RM form). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rrx", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rrx", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), ); // XX /r with FPR ins and outs. A form. @@ -513,31 +588,39 @@ pub(crate) fn define<'shared>( } // XX /n for a unary operation with extension bits. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ur", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("ur", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), ); // XX /r, but for a unary operator with separate input/output register, like // copies. MR form, preserving flags. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("umr", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("umr", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); + modrm_rr(out_reg0, in_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), ); // Same as umr, but with FPR -> GPR registers. @@ -643,17 +726,21 @@ pub(crate) fn define<'shared>( ); // XX /r, RM form, GPR -> FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("frurm", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("frurm", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), ); // XX /r, RM form, FPR -> GPR. @@ -734,62 +821,74 @@ pub(crate) fn define<'shared>( ); // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("div", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::FixedReg(reg_rdx), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![reg_rax, reg_rdx]) - .emit( - r#" - sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]); - {{PUT_OP}}(bits, rex1(in_reg2), sink); - modrm_r_bits(in_reg2, bits, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("div", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::FixedReg(reg_rdx), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![reg_rax, reg_rdx]) + .emit( + r#" + sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]); + {{PUT_OP}}(bits, rex1(in_reg2), sink); + modrm_r_bits(in_reg2, bits, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"), ); // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo) - recipes.add_template_recipe( - EncodingRecipeBuilder::new("mulx", &formats.binary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::FixedReg(reg_rdx), - ]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg1), sink); - modrm_r_bits(in_reg1, bits, sink); - "#, - ), - ); - - // XX /n ib with 8-bit immediate sign-extended. - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm, - "imm", - 8, - 0, - )) + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("mulx", &formats.binary, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::FixedReg(reg_rdx), + ]) .emit( r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); + {{PUT_OP}}(bits, rex1(in_reg1), sink); + modrm_r_bits(in_reg1, bits, sink); "#, ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"), + ); + + // XX /n ib with 8-bit immediate sign-extended. + { + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), ); recipes.add_template_recipe( @@ -813,24 +912,28 @@ pub(crate) fn define<'shared>( ); // XX /n id with 32-bit immediate sign-extended. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm, + "imm", + 32, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), ); } @@ -1326,7 +1429,7 @@ pub(crate) fn define<'shared>( .operands_in(vec![gpr, gpr]) .inst_predicate(has_no_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1") + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1354,7 +1457,7 @@ pub(crate) fn define<'shared>( .operands_in(vec![abcd, gpr]) .inst_predicate(has_no_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1") + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1383,7 +1486,7 @@ pub(crate) fn define<'shared>( .operands_in(vec![fpr, gpr]) .inst_predicate(has_no_offset) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1") + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1412,7 +1515,7 @@ pub(crate) fn define<'shared>( .operands_in(vec![gpr, gpr]) .inst_predicate(has_small_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_1") + .compute_size("size_plus_maybe_sib_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1439,7 +1542,7 @@ pub(crate) fn define<'shared>( .operands_in(vec![abcd, gpr]) .inst_predicate(has_small_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_1") + .compute_size("size_plus_maybe_sib_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1467,7 +1570,7 @@ pub(crate) fn define<'shared>( .operands_in(vec![fpr, gpr]) .inst_predicate(has_small_offset) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_1") + .compute_size("size_plus_maybe_sib_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1491,7 +1594,7 @@ pub(crate) fn define<'shared>( EncodingRecipeBuilder::new("stDisp32", &formats.store, 5) .operands_in(vec![gpr, gpr]) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_1") + .compute_size("size_plus_maybe_sib_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1517,7 +1620,7 @@ pub(crate) fn define<'shared>( EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5) .operands_in(vec![abcd, gpr]) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_1") + .compute_size("size_plus_maybe_sib_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1544,7 +1647,7 @@ pub(crate) fn define<'shared>( EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5) .operands_in(vec![fpr, gpr]) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_1") + .compute_size("size_plus_maybe_sib_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1577,7 +1680,7 @@ pub(crate) fn define<'shared>( .operands_in(vec![gpr, gpr, gpr]) .inst_predicate(has_no_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_in_reg_1") + .compute_size("size_plus_maybe_offset_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1604,7 +1707,7 @@ pub(crate) fn define<'shared>( .operands_in(vec![abcd, gpr, gpr]) .inst_predicate(has_no_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_in_reg_1") + .compute_size("size_plus_maybe_offset_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1630,7 +1733,7 @@ pub(crate) fn define<'shared>( .operands_in(vec![fpr, gpr, gpr]) .inst_predicate(has_no_offset) .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_in_reg_1") + .compute_size("size_plus_maybe_offset_for_inreg_1") .emit( r#" if !flags.notrap() { @@ -1867,7 +1970,7 @@ pub(crate) fn define<'shared>( .operands_out(vec![gpr]) .inst_predicate(has_no_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_0") + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") .emit( r#" if !flags.notrap() { @@ -1894,7 +1997,7 @@ pub(crate) fn define<'shared>( .operands_out(vec![fpr]) .inst_predicate(has_no_offset) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_0") + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") .emit( r#" if !flags.notrap() { @@ -1924,7 +2027,7 @@ pub(crate) fn define<'shared>( .operands_out(vec![gpr]) .inst_predicate(has_small_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_0") + .compute_size("size_plus_maybe_sib_for_inreg_0") .emit( r#" if !flags.notrap() { @@ -1950,7 +2053,7 @@ pub(crate) fn define<'shared>( .operands_out(vec![fpr]) .inst_predicate(has_small_offset) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_0") + .compute_size("size_plus_maybe_sib_for_inreg_0") .emit( r#" if !flags.notrap() { @@ -1979,7 +2082,7 @@ pub(crate) fn define<'shared>( .operands_out(vec![gpr]) .inst_predicate(has_big_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_0") + .compute_size("size_plus_maybe_sib_for_inreg_0") .emit( r#" if !flags.notrap() { @@ -2005,7 +2108,7 @@ pub(crate) fn define<'shared>( .operands_out(vec![fpr]) .inst_predicate(has_big_offset) .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_in_reg_0") + .compute_size("size_plus_maybe_sib_for_inreg_0") .emit( r#" if !flags.notrap() { @@ -2039,7 +2142,7 @@ pub(crate) fn define<'shared>( .operands_out(vec![gpr]) .inst_predicate(has_no_offset.clone()) .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_in_reg_0") + .compute_size("size_plus_maybe_offset_for_inreg_0") .emit( r#" if !flags.notrap() { @@ -2066,7 +2169,7 @@ pub(crate) fn define<'shared>( .operands_out(vec![fpr]) .inst_predicate(has_no_offset) .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_in_reg_0") + .compute_size("size_plus_maybe_offset_for_inreg_0") .emit( r#" if !flags.notrap() { @@ -2396,7 +2499,7 @@ pub(crate) fn define<'shared>( .operands_out(vec![gpr]) .clobbers_flags(false) .inst_predicate(valid_scale(&*formats.branch_table_entry)) - .compute_size("size_plus_maybe_offset_for_in_reg_1") + .compute_size("size_plus_maybe_offset_for_inreg_1") .emit( r#" {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink); @@ -2475,7 +2578,7 @@ pub(crate) fn define<'shared>( ), regs, ) - .requires_prefix(true), + .rex_kind(RexRecipeKind::AlwaysEmitRex), ); recipes.add_template( @@ -2509,7 +2612,7 @@ pub(crate) fn define<'shared>( ), regs, ) - .requires_prefix(true), + .rex_kind(RexRecipeKind::AlwaysEmitRex), ); recipes.add_template( @@ -2532,110 +2635,134 @@ pub(crate) fn define<'shared>( // Conditional move (a.k.a integer select) // (maybe-REX.W) 0F 4x modrm(r,r) // 1 byte, modrm(r,r), is after the opcode - recipes.add_template_recipe( - EncodingRecipeBuilder::new("cmov", &formats.int_select, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rflags), - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![2]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink); - modrm_rr(in_reg1, in_reg2, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("cmov", &formats.int_select, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rflags), + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![2]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink); + modrm_rr(in_reg1, in_reg2, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"), ); // Bit scan forwards and reverse - recipes.add_template_recipe( - EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), ); // Arithematic with flag I/O. // XX /r, MR form. Add two GPR registers and set carry flag. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rout", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![ - OperandConstraint::TiedInput(0), - OperandConstraint::FixedReg(reg_rflags), - ]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rout", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![ + OperandConstraint::TiedInput(0), + OperandConstraint::FixedReg(reg_rflags), + ]) + .clobbers_flags(true) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), ); // XX /r, MR form. Add two GPR registers and get carry flag. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rin", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .operands_out(vec![0]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rin", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .operands_out(vec![0]) + .clobbers_flags(true) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), ); // XX /r, MR form. Add two GPR registers with carry flag. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rio", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .operands_out(vec![ - OperandConstraint::TiedInput(0), - OperandConstraint::FixedReg(reg_rflags), - ]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rio", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .operands_out(vec![ + OperandConstraint::TiedInput(0), + OperandConstraint::FixedReg(reg_rflags), + ]) + .clobbers_flags(true) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), ); // Compare and set flags. // XX /r, MR form. Compare two GPR registers and set flags. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rcmp", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rcmp", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), ); // Same as rcmp, but second operand is the stack pointer. @@ -2669,38 +2796,46 @@ pub(crate) fn define<'shared>( InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 8, 0); // XX /n, MI form with imm8. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm, 2) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .inst_predicate(has_small_offset) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm, 2) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .inst_predicate(has_small_offset) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), ); let has_big_offset = InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 32, 0); // XX /n, MI form with imm32. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm, 5) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .inst_predicate(has_big_offset) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm, 5) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .inst_predicate(has_big_offset) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), ); } @@ -2718,37 +2853,45 @@ pub(crate) fn define<'shared>( // Bits 0-7 are the Jcc opcode. // Bits 8-15 control the test instruction which always has opcode byte 0x85. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2) - .operands_in(vec![gpr]) - .branch_range((3, 8)) - .emit( - r#" - // test r, r. - {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2) + .operands_in(vec![gpr]) + .branch_range((3, 8)) + .emit( + r#" + // test r, r. + {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), ); - recipes.add_template_recipe( - EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6) - .operands_in(vec![gpr]) - .branch_range((7, 32)) - .emit( - r#" - // test r, r. - {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6) + .operands_in(vec![gpr]) + .branch_range((7, 32)) + .emit( + r#" + // test r, r. + {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), ); // 8-bit test-and-branch. @@ -2770,7 +2913,7 @@ pub(crate) fn define<'shared>( ), regs, ) - .requires_prefix(true), + .rex_kind(RexRecipeKind::AlwaysEmitRex), ); recipes.add_template( @@ -2811,7 +2954,7 @@ pub(crate) fn define<'shared>( ), regs, ) - .requires_prefix(true), + .rex_kind(RexRecipeKind::AlwaysEmitRex), ); recipes.add_template( @@ -2878,22 +3021,26 @@ pub(crate) fn define<'shared>( // instruction, so it is limited to the `ABCD` register class for booleans. // The omission of a `when_prefixed` alternative is deliberate here. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![abcd]) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + // `setCC` instruction, no REX. + let setcc = 0x90 | icc2opc(cond); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), ); recipes.add_template_recipe( @@ -2913,49 +3060,57 @@ pub(crate) fn define<'shared>( let is_small_imm = InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0); - recipes.add_template_recipe( - EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .inst_predicate(is_small_imm) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .inst_predicate(is_small_imm) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + // `setCC` instruction, no REX. + let setcc = 0x90 | icc2opc(cond); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), ); let is_big_imm = InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0); - recipes.add_template_recipe( - EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .inst_predicate(is_big_imm) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .inst_predicate(is_big_imm) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + // `setCC` instruction, no REX. + let setcc = 0x90 | icc2opc(cond); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), ); } diff --git a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs b/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs index 5195c24c5c..ba6418f7cf 100644 --- a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs +++ b/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs @@ -57,6 +57,24 @@ impl EncodingBits { new } + /// Returns a copy of the EncodingBits with the RRR bits set. + #[inline] + pub fn with_rrr(self, rrr: u8) -> Self { + debug_assert_eq!(u8::from(self.rrr()), 0); + let mut enc = self.clone(); + enc.write(RRR, rrr.into()); + enc + } + + /// Returns a copy of the EncodingBits with the REX.W bit set. + #[inline] + pub fn with_rex_w(self) -> Self { + debug_assert_eq!(self.rex_w(), 0); + let mut enc = self.clone(); + enc.write(REX_W, 1); + enc + } + /// Returns the raw bits. #[inline] pub fn bits(self) -> u16 { diff --git a/cranelift/codegen/src/isa/x86/binemit.rs b/cranelift/codegen/src/isa/x86/binemit.rs index 4fc074abe5..fa67e5efff 100644 --- a/cranelift/codegen/src/isa/x86/binemit.rs +++ b/cranelift/codegen/src/isa/x86/binemit.rs @@ -61,6 +61,12 @@ fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 { BASE_REX | b | (x << 1) | (r << 2) } +/// Determines whether a REX prefix should be emitted. +#[inline] +fn needs_rex(bits: u16, rex: u8) -> bool { + rex != BASE_REX || u8::from(EncodingBits::from(bits).rex_w()) == 1 +} + // Emit a REX prefix. // // The R, X, and B bits are computed from registers using the functions above. The W bit is @@ -80,11 +86,20 @@ fn put_op1(bits: u16, rex: u8, sink: &mut CS) { // Emit a single-byte opcode with REX prefix. fn put_rexop1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*"); + debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for RexOp1*"); rex_prefix(bits, rex, sink); sink.put1(bits as u8); } +/// Emit a single-byte opcode with inferred REX prefix. +fn put_dynrexop1(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for DynRexOp1*"); + if needs_rex(bits, rex) { + rex_prefix(bits, rex, sink); + } + sink.put1(bits as u8); +} + // Emit two-byte opcode: 0F XX fn put_op2(bits: u16, rex: u8, sink: &mut CS) { debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*"); @@ -101,6 +116,20 @@ fn put_rexop2(bits: u16, rex: u8, sink: &mut CS) { sink.put1(bits as u8); } +/// Emit two-byte opcode: 0F XX with inferred REX prefix. +fn put_dynrexop2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!( + bits & 0x0f00, + 0x0400, + "Invalid encoding bits for DynRexOp2*" + ); + if needs_rex(bits, rex) { + rex_prefix(bits, rex, sink); + } + sink.put1(0x0f); + sink.put1(bits as u8); +} + // Emit single-byte opcode with mandatory prefix. fn put_mp1(bits: u16, rex: u8, sink: &mut CS) { debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*"); @@ -112,7 +141,7 @@ fn put_mp1(bits: u16, rex: u8, sink: &mut CS) { // Emit single-byte opcode with mandatory prefix and REX. fn put_rexmp1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*"); + debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for RexMp1*"); let enc = EncodingBits::from(bits); sink.put1(PREFIX[(enc.pp() - 1) as usize]); rex_prefix(bits, rex, sink); @@ -131,7 +160,7 @@ fn put_mp2(bits: u16, rex: u8, sink: &mut CS) { // Emit two-byte opcode (0F XX) with mandatory prefix and REX. fn put_rexmp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*"); + debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for RexMp2*"); let enc = EncodingBits::from(bits); sink.put1(PREFIX[(enc.pp() - 1) as usize]); rex_prefix(bits, rex, sink); @@ -139,6 +168,22 @@ fn put_rexmp2(bits: u16, rex: u8, sink: &mut CS) { sink.put1(bits as u8); } +/// Emit two-byte opcode (0F XX) with mandatory prefix and inferred REX. +fn put_dynrexmp2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!( + bits & 0x0c00, + 0x0400, + "Invalid encoding bits for DynRexMp2*" + ); + let enc = EncodingBits::from(bits); + sink.put1(PREFIX[(enc.pp() - 1) as usize]); + if needs_rex(bits, rex) { + rex_prefix(bits, rex, sink); + } + sink.put1(0x0f); + sink.put1(bits as u8); +} + // Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix. fn put_mp3(bits: u16, rex: u8, sink: &mut CS) { debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*"); @@ -152,7 +197,7 @@ fn put_mp3(bits: u16, rex: u8, sink: &mut CS) { // Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX fn put_rexmp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for Mp3*"); + debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*"); let enc = EncodingBits::from(bits); sink.put1(PREFIX[(enc.pp() - 1) as usize]); rex_prefix(bits, rex, sink); diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index fd95a50a01..d45f1314aa 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -16,9 +16,20 @@ use crate::isa::{self, TargetIsa}; use crate::predicates; use crate::regalloc::RegDiversions; +use cranelift_codegen_shared::isa::x86::EncodingBits; + include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs")); include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs")); +/// Whether the REX prefix is needed for encoding extended registers (via REX.RXB). +/// +/// Normal x86 instructions have only 3 bits for encoding a register. +/// The REX prefix adds REX.R, REX,X, and REX.B bits, interpreted as fourth bits. +pub fn is_extended_reg(reg: RegUnit) -> bool { + // Extended registers have the fourth bit set. + reg as u8 & 0b1000 != 0 +} + pub fn needs_sib_byte(reg: RegUnit) -> bool { reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit } @@ -29,74 +40,179 @@ pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool { needs_sib_byte(reg) || needs_offset(reg) } -fn additional_size_if( +fn test_input( op_index: usize, inst: Inst, divert: &RegDiversions, func: &Function, condition_func: fn(RegUnit) -> bool, -) -> u8 { - let addr_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations); - if condition_func(addr_reg) { - 1 - } else { - 0 - } +) -> bool { + let in_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations); + condition_func(in_reg) +} + +fn test_result( + result_index: usize, + inst: Inst, + divert: &RegDiversions, + func: &Function, + condition_func: fn(RegUnit) -> bool, +) -> bool { + let out_reg = divert.reg(func.dfg.inst_results(inst)[result_index], &func.locations); + condition_func(out_reg) } -fn size_plus_maybe_offset_for_in_reg_0( +fn size_plus_maybe_offset_for_inreg_0( sizing: &RecipeSizing, _enc: Encoding, inst: Inst, divert: &RegDiversions, func: &Function, ) -> u8 { - sizing.base_size + additional_size_if(0, inst, divert, func, needs_offset) + let needs_offset = test_input(0, inst, divert, func, needs_offset); + sizing.base_size + if needs_offset { 1 } else { 0 } } -fn size_plus_maybe_offset_for_in_reg_1( +fn size_plus_maybe_offset_for_inreg_1( sizing: &RecipeSizing, _enc: Encoding, inst: Inst, divert: &RegDiversions, func: &Function, ) -> u8 { - sizing.base_size + additional_size_if(1, inst, divert, func, needs_offset) + let needs_offset = test_input(1, inst, divert, func, needs_offset); + sizing.base_size + if needs_offset { 1 } else { 0 } } -fn size_plus_maybe_sib_for_in_reg_0( +fn size_plus_maybe_sib_for_inreg_0( sizing: &RecipeSizing, _enc: Encoding, inst: Inst, divert: &RegDiversions, func: &Function, ) -> u8 { - sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte) + let needs_sib = test_input(0, inst, divert, func, needs_sib_byte); + sizing.base_size + if needs_sib { 1 } else { 0 } } -fn size_plus_maybe_sib_for_in_reg_1( +fn size_plus_maybe_sib_for_inreg_1( sizing: &RecipeSizing, _enc: Encoding, inst: Inst, divert: &RegDiversions, func: &Function, ) -> u8 { - sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte) + let needs_sib = test_input(1, inst, divert, func, needs_sib_byte); + sizing.base_size + if needs_sib { 1 } else { 0 } } -fn size_plus_maybe_sib_or_offset_for_in_reg_0( +fn size_plus_maybe_sib_or_offset_for_inreg_0( sizing: &RecipeSizing, _enc: Encoding, inst: Inst, divert: &RegDiversions, func: &Function, ) -> u8 { - sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte_or_offset) + let needs_sib_or_offset = test_input(0, inst, divert, func, needs_sib_byte_or_offset); + sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } } -fn size_plus_maybe_sib_or_offset_for_in_reg_1( +fn size_plus_maybe_sib_or_offset_for_inreg_1( sizing: &RecipeSizing, _enc: Encoding, inst: Inst, divert: &RegDiversions, func: &Function, ) -> u8 { - sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte_or_offset) + let needs_sib_or_offset = test_input(1, inst, divert, func, needs_sib_byte_or_offset); + sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg. +/// +/// A REX prefix is known to be emitted if either: +/// 1. The EncodingBits specify that REX.W is to be set. +/// 2. Registers are used that require REX.R or REX.B bits for encoding. +fn size_with_inferred_rex_for_inreg0( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, based on the second operand. +fn size_with_inferred_rex_for_inreg1( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(1, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, based on the third operand. +fn size_with_inferred_rex_for_inreg2( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(2, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, for use with two input registers. +/// +/// A REX prefix is known to be emitted if either: +/// 1. The EncodingBits specify that REX.W is to be set. +/// 2. Registers are used that require REX.R or REX.B bits for encoding. +fn size_with_inferred_rex_for_inreg0_inreg1( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg) + || test_input(1, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, based on a single +/// input register and a single output register. +fn size_with_inferred_rex_for_inreg0_outreg0( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg) + || test_result(0, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV. +/// +/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2. +fn size_with_inferred_rex_for_cmov( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(1, inst, divert, func, is_extended_reg) + || test_input(2, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } } /// If the value's definition is a constant immediate, returns its unpacked value, or None diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif index e7abc4a273..2f8ce5d78a 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif @@ -15,7 +15,7 @@ ebb1: ; sameln: function %br_icmp(i64 [%rdi]) fast { ; nextln: ebb0(v0: i64): ; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v2 = icmp eq v0, v1 +; nextln: [DynRexOp1icscc#8039] v2 = icmp eq v0, v1 ; nextln: [RexOp1t8jccb#75] brnz v2, ebb1 ; nextln: [Op1jmpb#eb] jump ebb1 ; nextln: @@ -37,7 +37,7 @@ ebb1(v2: i64): ; sameln: function %br_icmp_ebb_args(i64 [%rdi]) fast { ; nextln: ebb0(v0: i64): ; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v3 = icmp eq v0, v1 +; nextln: [DynRexOp1icscc#8039] v3 = icmp eq v0, v1 ; nextln: [RexOp1t8jccb#75] brnz v3, ebb1(v0) ; nextln: [Op1jmpb#eb] jump ebb1(v0) ; nextln: diff --git a/cranelift/filetests/filetests/isa/x86/relax_branch.clif b/cranelift/filetests/filetests/isa/x86/relax_branch.clif index 15c7e876a3..b97ca82f13 100644 --- a/cranelift/filetests/filetests/isa/x86/relax_branch.clif +++ b/cranelift/filetests/filetests/isa/x86/relax_branch.clif @@ -21,95 +21,95 @@ function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] ba @0005 [-] fallthrough ebb3(v0, v1) ebb3(v8: i32 [%rdi], v19: i32 [%rsi]): -@0005 [RexOp1ldDisp8#808b,%rax] v7 = load.i64 v2+48 -@0005 [RexOp1rcmp_ib#f083,%rflags] v91 = ifcmp_imm v7, 0 -@0005 [trapif#00] trapif ne v91, interrupt -[Op1umr#89,%rax] v105 = copy v8 -@000b [Op1r_ib#83,%rax] v10 = iadd_imm v105, 1 - v80 -> v10 -@0010 [Op1umr#89,%rcx] v92 = uextend.i64 v8 -@0010 [RexOp1ld#808b,%rdx] v93 = load.i64 notrap aligned readonly v2 - v95 -> v93 -@0010 [Op2ldWithIndex#4be,%rcx] v12 = sload8_complex.i32 v93+v92 -[Op1umr#89,%rbx] v106 = copy v12 -@0017 [Op1r_ib#40c1,%rbx] v14 = ishl_imm v106, 24 -@001a [Op1r_ib#70c1,%rbx] v16 = sshr_imm v14, 24 -[Op1umr#89,%rdi] v107 = copy v16 -@001f [Op1r_ib#83,%rdi] v18 = iadd_imm v107, 32 -[RexOp1umr#89,%r8] v108 = copy v19 -@0026 [RexOp1r_ib#83,%r8] v21 = iadd_imm v108, 1 - v82 -> v21 -@002b [Op1umr#89,%rsi] v94 = uextend.i64 v19 -@002b [Op2ldWithIndex#4be,%rdx] v23 = sload8_complex.i32 v93+v94 - v55 -> v23 -[Op1umr#89,%rsi] v109 = copy v23 -@0032 [Op1r_ib#40c1,%rsi] v25 = ishl_imm v109, 24 -@0035 [Op1r_ib#70c1,%rsi] v27 = sshr_imm v25, 24 - v69 -> v27 -[RexOp1umr#89,%r9] v110 = copy v27 -@003a [RexOp1r_ib#83,%r9] v29 = iadd_imm v110, 32 - v68 -> v29 -@0042 [Op1r_ib#83,%rcx] v31 = iadd_imm v12, -65 -@0045 [Op1r_ib#40c1,%rcx] v33 = ishl_imm v31, 24 -@0048 [Op1r_ib#70c1,%rcx] v35 = sshr_imm v33, 24 -@004c [Op1r_id#4081,%rcx] v37 = band_imm v35, 255 -[Op1rcmp_ib#7083,%rflags] v97 = ifcmp_imm v37, 26 -@0050 [Op1brib#70] brif sge v97, ebb6 -@0050 [-] fallthrough ebb10 +@0005 [RexOp1ldDisp8#808b,%rax] v7 = load.i64 v2+48 +@0005 [DynRexOp1rcmp_ib#f083,%rflags] v91 = ifcmp_imm v7, 0 +@0005 [trapif#00] trapif ne v91, interrupt +[DynRexOp1umr#89,%rax] v105 = copy v8 +@000b [DynRexOp1r_ib#83,%rax] v10 = iadd_imm v105, 1 + v80 -> v10 +@0010 [Op1umr#89,%rcx] v92 = uextend.i64 v8 +@0010 [RexOp1ld#808b,%rdx] v93 = load.i64 notrap aligned readonly v2 + v95 -> v93 +@0010 [Op2ldWithIndex#4be,%rcx] v12 = sload8_complex.i32 v93+v92 +[DynRexOp1umr#89,%rbx] v106 = copy v12 +@0017 [DynRexOp1r_ib#40c1,%rbx] v14 = ishl_imm v106, 24 +@001a [DynRexOp1r_ib#70c1,%rbx] v16 = sshr_imm v14, 24 +[DynRexOp1umr#89,%rdi] v107 = copy v16 +@001f [DynRexOp1r_ib#83,%rdi] v18 = iadd_imm v107, 32 +[DynRexOp1umr#89,%r8] v108 = copy v19 +@0026 [DynRexOp1r_ib#83,%r8] v21 = iadd_imm v108, 1 + v82 -> v21 +@002b [Op1umr#89,%rsi] v94 = uextend.i64 v19 +@002b [Op2ldWithIndex#4be,%rdx] v23 = sload8_complex.i32 v93+v94 + v55 -> v23 +[DynRexOp1umr#89,%rsi] v109 = copy v23 +@0032 [DynRexOp1r_ib#40c1,%rsi] v25 = ishl_imm v109, 24 +@0035 [DynRexOp1r_ib#70c1,%rsi] v27 = sshr_imm v25, 24 + v69 -> v27 +[DynRexOp1umr#89,%r9] v110 = copy v27 +@003a [DynRexOp1r_ib#83,%r9] v29 = iadd_imm v110, 32 + v68 -> v29 +@0042 [DynRexOp1r_ib#83,%rcx] v31 = iadd_imm v12, -65 +@0045 [DynRexOp1r_ib#40c1,%rcx] v33 = ishl_imm v31, 24 +@0048 [DynRexOp1r_ib#70c1,%rcx] v35 = sshr_imm v33, 24 +@004c [DynRexOp1r_id#4081,%rcx] v37 = band_imm v35, 255 +[DynRexOp1rcmp_ib#7083,%rflags] v97 = ifcmp_imm v37, 26 +@0050 [Op1brib#70] brif sge v97, ebb6 +@0050 [-] fallthrough ebb10 ebb10: -[Op1umr#89,%rcx] v101 = copy v18 +[DynRexOp1umr#89,%rcx] v101 = copy v18 @0054 [Op1jmpb#eb] jump ebb5(v18, v101) ebb6: -[Op1umr#89,%rcx] v102 = copy.i32 v16 +[DynRexOp1umr#89,%rcx] v102 = copy.i32 v16 @0059 [RexOp1rmov#89] regmove v102, %rcx -> %rdi @0059 [RexOp1rmov#89] regmove.i32 v16, %rbx -> %rcx @0059 [-] fallthrough ebb5(v102, v16) ebb5(v41: i32 [%rdi], v84: i32 [%rcx]): v83 -> v84 -@005d [Op1r_id#4081,%rdi] v43 = band_imm v41, 255 -@0062 [Op1r_ib#40c1,%rdi] v45 = ishl_imm v43, 24 +@005d [DynRexOp1r_id#4081,%rdi] v43 = band_imm v41, 255 +@0062 [DynRexOp1r_ib#40c1,%rdi] v45 = ishl_imm v43, 24 v52 -> v45 @0065 [RexOp1rmov#89] regmove v45, %rdi -> %rbx -@0065 [Op1r_ib#70c1,%rbx] v47 = sshr_imm v45, 24 +@0065 [DynRexOp1r_ib#70c1,%rbx] v47 = sshr_imm v45, 24 v54 -> v47 @0068 [RexOp1rmov#89] regmove v47, %rbx -> %rdi -@0068 [Op1icscc_ib#7083,%rbx] v49 = icmp_imm ne v47, 0 +@0068 [DynRexOp1icscc_ib#7083,%rbx] v49 = icmp_imm ne v47, 0 @0068 [RexOp2urm_noflags#4b6,%r10] v50 = bint.i32 v49 -@0076 [Op1r_ib#83,%rdx] v57 = iadd_imm.i32 v23, -65 -@0079 [Op1r_ib#40c1,%rdx] v59 = ishl_imm v57, 24 -@007c [Op1r_ib#70c1,%rdx] v61 = sshr_imm v59, 24 -@0080 [Op1r_id#4081,%rdx] v63 = band_imm v61, 255 -[Op1rcmp_ib#7083,%rflags] v98 = ifcmp_imm v63, 26 +@0076 [DynRexOp1r_ib#83,%rdx] v57 = iadd_imm.i32 v23, -65 +@0079 [DynRexOp1r_ib#40c1,%rdx] v59 = ishl_imm v57, 24 +@007c [DynRexOp1r_ib#70c1,%rdx] v61 = sshr_imm v59, 24 +@0080 [DynRexOp1r_id#4081,%rdx] v63 = band_imm v61, 255 +[DynRexOp1rcmp_ib#7083,%rflags] v98 = ifcmp_imm v63, 26 @0084 [RexOp1rmov#89] regmove v47, %rdi -> %rbx @0084 [Op1brib#70] brif sge v98, ebb8 @0084 [-] fallthrough ebb11 ebb11: -[RexOp1umr#89,%rdx] v103 = copy.i32 v29 +[DynRexOp1umr#89,%rdx] v103 = copy.i32 v29 @0088 [Op1jmpb#eb] jump ebb7(v29, v10, v21, v103) ebb8: -[Op1umr#89,%rdx] v104 = copy.i32 v27 +[DynRexOp1umr#89,%rdx] v104 = copy.i32 v27 @008d [RexOp1rmov#89] regmove v104, %rdx -> %r9 @008d [RexOp1rmov#89] regmove.i32 v27, %rsi -> %rdx @008d [-] fallthrough ebb7(v104, v10, v21, v27) ebb7(v67: i32 [%r9], v79: i32 [%rax], v81: i32 [%r8], v87: i32 [%rdx]): -@0091 [RexOp1r_id#4081,%r9] v71 = band_imm v67, 255 -@0094 [RexOp1r_ib#40c1,%r9] v73 = ishl_imm v71, 24 -@0097 [RexOp1r_ib#70c1,%r9] v75 = sshr_imm v73, 24 -@0098 [RexOp1icscc#39,%rbx] v76 = icmp.i32 eq v47, v75 -@0098 [Op2urm_noflags_abcd#4b6,%rbx] v77 = bint.i32 v76 -@0099 [RexOp1rr#21,%r10] v78 = band.i32 v50, v77 -@009a [RexOp1tjccb#74] brz v78, ebb9 -@009a [-] fallthrough ebb12 +@0091 [DynRexOp1r_id#4081,%r9] v71 = band_imm v67, 255 +@0094 [DynRexOp1r_ib#40c1,%r9] v73 = ishl_imm v71, 24 +@0097 [DynRexOp1r_ib#70c1,%r9] v75 = sshr_imm v73, 24 +@0098 [DynRexOp1icscc#39,%rbx] v76 = icmp.i32 eq v47, v75 +@0098 [Op2urm_noflags_abcd#4b6,%rbx] v77 = bint.i32 v76 +@0099 [DynRexOp1rr#21,%r10] v78 = band.i32 v50, v77 +@009a [DynRexOp1tjccb#74] brz v78, ebb9 +@009a [-] fallthrough ebb12 ebb12: -[RexOp1umr#89,%rcx] v99 = copy v81 -[Op1umr#89,%rdx] v100 = copy v79 +[DynRexOp1umr#89,%rcx] v99 = copy v81 +[DynRexOp1umr#89,%rdx] v100 = copy v79 @00a4 [RexOp1rmov#89] regmove v100, %rdx -> %rdi @00a4 [RexOp1rmov#89] regmove v99, %rcx -> %rsi @00a4 [Op1jmpd#e9] jump ebb3(v100, v99); bin: e9 ffffff2d @@ -118,9 +118,9 @@ function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] ba @00a7 [-] fallthrough ebb4 ebb4: -@00ad [Op1r_id#4081,%rcx] v86 = band_imm.i32 v84, 255 -@00b3 [Op1r_id#4081,%rdx] v89 = band_imm.i32 v87, 255 -@00b4 [Op1rr#29,%rcx] v90 = isub v86, v89 +@00ad [DynRexOp1r_id#4081,%rcx] v86 = band_imm.i32 v84, 255 +@00b3 [DynRexOp1r_id#4081,%rdx] v89 = band_imm.i32 v87, 255 +@00b4 [DynRexOp1rr#29,%rcx] v90 = isub v86, v89 @00b5 [-] fallthrough ebb2(v90) ebb2(v5: i32 [%rcx]): diff --git a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif index 358d098a6a..50e2389feb 100644 --- a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif +++ b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif @@ -4,7 +4,7 @@ target x86_64 function %test_multiple_uses(i32 [%rdi]) -> i32 { ebb0(v0: i32 [%rdi]): -[Op1rcmp_ib#7083,%rflags] v3 = ifcmp_imm v0, 0 +[DynRexOp1rcmp_ib#7083,%rflags] v3 = ifcmp_imm v0, 0 [Op2seti_abcd#490,%rax] v1 = trueif eq v3 [RexOp2urm_noflags#4b6,%rax] v2 = bint.i32 v1 [Op1brib#70] brif eq v3, ebb1 diff --git a/cranelift/filetests/filetests/postopt/basic.clif b/cranelift/filetests/filetests/postopt/basic.clif index 442d47de89..c38065f947 100644 --- a/cranelift/filetests/filetests/postopt/basic.clif +++ b/cranelift/filetests/filetests/postopt/basic.clif @@ -5,9 +5,9 @@ target i686 function %br_icmp(i32, i32) -> i32 { ebb0(v0: i32, v1: i32): -[Op1icscc#39,%rdx] v2 = icmp slt v0, v1 -[Op1t8jccd_long#85] brnz v2, ebb1 -[Op1jmpb#eb] jump ebb2 +[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 +[Op1t8jccd_long#85] brnz v2, ebb1 +[Op1jmpb#eb] jump ebb2 ebb2: [Op1ret#c3] return v1 @@ -35,9 +35,9 @@ ebb1: function %br_icmp_inverse(i32, i32) -> i32 { ebb0(v0: i32, v1: i32): -[Op1icscc#39,%rdx] v2 = icmp slt v0, v1 -[Op1t8jccd_long#84] brz v2, ebb1 -[Op1jmpb#eb] jump ebb2 +[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 +[Op1t8jccd_long#84] brz v2, ebb1 +[Op1jmpb#eb] jump ebb2 ebb2: [Op1ret#c3] return v1 @@ -65,9 +65,9 @@ ebb1: function %br_icmp_imm(i32, i32) -> i32 { ebb0(v0: i32, v1: i32): -[Op1icscc_ib#7083] v2 = icmp_imm slt v0, 2 -[Op1t8jccd_long#84] brz v2, ebb1 -[Op1jmpb#eb] jump ebb2 +[DynRexOp1icscc_ib#7083] v2 = icmp_imm slt v0, 2 +[Op1t8jccd_long#84] brz v2, ebb1 +[Op1jmpb#eb] jump ebb2 ebb2: [Op1ret#c3] return v1 diff --git a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif index 0977fa0d7d..bae58cd8bb 100644 --- a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif +++ b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif @@ -3,7 +3,7 @@ target x86_64 function %dual_loads(i64, i64) -> i64 { ebb0(v0: i64, v1: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 +[DynRexOp1rr#8001] v3 = iadd v0, v1 v4 = load.i64 v3 v5 = uload8.i64 v3 v6 = sload8.i64 v3 @@ -29,7 +29,7 @@ ebb0(v0: i64, v1: i64): function %dual_loads2(i64, i64) -> i64 { ebb0(v0: i64, v1: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 +[DynRexOp1rr#8001] v3 = iadd v0, v1 v4 = load.i64 v3+1 v5 = uload8.i64 v3+1 v6 = sload8.i64 v3+1 @@ -55,7 +55,7 @@ ebb0(v0: i64, v1: i64): function %dual_stores(i64, i64, i64) { ebb0(v0: i64, v1: i64, v2: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 +[DynRexOp1rr#8001] v3 = iadd v0, v1 [RexOp1st#8089] store.i64 v2, v3 [RexOp1st#88] istore8.i64 v2, v3 [RexMp1st#189] istore16.i64 v2, v3 @@ -75,7 +75,7 @@ ebb0(v0: i64, v1: i64, v2: i64): function %dual_stores2(i64, i64, i64) { ebb0(v0: i64, v1: i64, v2: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 +[DynRexOp1rr#8001] v3 = iadd v0, v1 [RexOp1stDisp8#8089] store.i64 v2, v3+1 [RexOp1stDisp8#88] istore8.i64 v2, v3+1 [RexMp1stDisp8#189] istore16.i64 v2, v3+1 diff --git a/cranelift/filetests/filetests/regalloc/coloring-227.clif b/cranelift/filetests/filetests/regalloc/coloring-227.clif index a469230b51..1d4860815a 100644 --- a/cranelift/filetests/filetests/regalloc/coloring-227.clif +++ b/cranelift/filetests/filetests/regalloc/coloring-227.clif @@ -8,7 +8,7 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64): [RexOp1pu_id#b8] v5 = iconst.i32 0 [RexOp1pu_id#b8] v6 = iconst.i32 0 -[RexOp1tjccb#74] brz v6, ebb10 +[DynRexOp1tjccb#74] brz v6, ebb10 [Op1jmpb#eb] jump ebb3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3) ebb3(v15: i32, v17: i32, v25: i32, v31: i32, v40: i32, v47: i32, v54: i32, v61: i32, v68: i32, v75: i32): @@ -16,33 +16,33 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) ebb6: [RexOp1pu_id#b8] v8 = iconst.i32 0 -[RexOp1tjccb#75] brnz v8, ebb5 +[DynRexOp1tjccb#75] brnz v8, ebb5 [Op1jmpb#eb] jump ebb20 ebb20: [RexOp1pu_id#b8] v9 = iconst.i32 0 [RexOp1pu_id#b8] v11 = iconst.i32 0 -[RexOp1icscc#39] v12 = icmp.i32 eq v15, v11 +[DynRexOp1icscc#39] v12 = icmp.i32 eq v15, v11 [RexOp2urm_noflags#4b6] v13 = bint.i32 v12 -[RexOp1rr#21] v14 = band v9, v13 -[RexOp1tjccb#75] brnz v14, ebb6 +[DynRexOp1rr#21] v14 = band v9, v13 +[DynRexOp1tjccb#75] brnz v14, ebb6 [Op1jmpb#eb] jump ebb7 ebb7: -[RexOp1tjccb#74] brz.i32 v17, ebb8 +[DynRexOp1tjccb#74] brz.i32 v17, ebb8 [Op1jmpb#eb] jump ebb17 ebb17: [RexOp1pu_id#b8] v18 = iconst.i32 0 -[RexOp1tjccb#74] brz v18, ebb9 +[DynRexOp1tjccb#74] brz v18, ebb9 [Op1jmpb#eb] jump ebb16 ebb16: [RexOp1pu_id#b8] v21 = iconst.i32 0 [RexOp1umr#89] v79 = uextend.i64 v5 -[RexOp1r_ib#8083] v80 = iadd_imm.i64 v4, 0 +[DynRexOp1r_ib#8083] v80 = iadd_imm.i64 v4, 0 [RexOp1ld#808b] v81 = load.i64 v80 -[RexOp1rr#8001] v22 = iadd v81, v79 +[DynRexOp1rr#8001] v22 = iadd v81, v79 [RexMp1st#189] istore16 v21, v22 [Op1jmpb#eb] jump ebb9 @@ -52,8 +52,8 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) ebb8: [RexOp1pu_id#b8] v27 = iconst.i32 3 [RexOp1pu_id#b8] v28 = iconst.i32 4 -[RexOp1rr#09] v35 = bor.i32 v31, v13 -[RexOp1tjccb#75] brnz v35, ebb15(v27) +[DynRexOp1rr#09] v35 = bor.i32 v31, v13 +[DynRexOp1tjccb#75] brnz v35, ebb15(v27) [Op1jmpb#eb] jump ebb15(v28) ebb15(v36: i32): @@ -71,24 +71,24 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) ebb2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32): [RexOp1pu_id#b8] v44 = iconst.i32 0 -[RexOp1tjccb#74] brz v44, ebb12 +[DynRexOp1tjccb#74] brz v44, ebb12 [Op1jmpb#eb] jump ebb18 ebb18: [RexOp1pu_id#b8] v50 = iconst.i32 11 -[RexOp1tjccb#74] brz v50, ebb14 +[DynRexOp1tjccb#74] brz v50, ebb14 [Op1jmpb#eb] jump ebb19 ebb19: [RexOp1umr#89] v82 = uextend.i64 v52 -[RexOp1r_ib#8083] v83 = iadd_imm.i64 v4, 0 +[DynRexOp1r_ib#8083] v83 = iadd_imm.i64 v4, 0 [RexOp1ld#808b] v84 = load.i64 v83 -[RexOp1rr#8001] v57 = iadd v84, v82 +[DynRexOp1rr#8001] v57 = iadd v84, v82 [RexOp1ld#8b] v58 = load.i32 v57 [RexOp1umr#89] v85 = uextend.i64 v58 -[RexOp1r_ib#8083] v86 = iadd_imm.i64 v4, 0 +[DynRexOp1r_ib#8083] v86 = iadd_imm.i64 v4, 0 [RexOp1ld#808b] v87 = load.i64 v86 -[RexOp1rr#8001] v64 = iadd v87, v85 +[DynRexOp1rr#8001] v64 = iadd v87, v85 [RexOp1st#88] istore8 v59, v64 [RexOp1pu_id#b8] v65 = iconst.i32 0 [Op1jmpb#eb] jump ebb13(v65) @@ -98,9 +98,9 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) ebb13(v51: i32): [RexOp1umr#89] v88 = uextend.i64 v45 -[RexOp1r_ib#8083] v89 = iadd_imm.i64 v4, 0 +[DynRexOp1r_ib#8083] v89 = iadd_imm.i64 v4, 0 [RexOp1ld#808b] v90 = load.i64 v89 -[RexOp1rr#8001] v71 = iadd v90, v88 +[DynRexOp1rr#8001] v71 = iadd v90, v88 [RexOp1st#89] store v51, v71 [Op1jmpb#eb] jump ebb12 diff --git a/cranelift/filetests/filetests/verifier/flags.clif b/cranelift/filetests/filetests/verifier/flags.clif index 8a6abe8b4d..8273160510 100644 --- a/cranelift/filetests/filetests/verifier/flags.clif +++ b/cranelift/filetests/filetests/verifier/flags.clif @@ -4,7 +4,7 @@ target i686 ; Simple, correct use of CPU flags. function %simple(i32) -> i32 { ebb0(v0: i32): - [Op1rcmp#39] v1 = ifcmp v0, v0 + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 [Op2seti_abcd#490] v2 = trueif ugt v1 [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 [Op1ret#c3] return v3 @@ -13,7 +13,7 @@ function %simple(i32) -> i32 { ; Overlapping flag values of different types. function %overlap(i32, f32) -> i32 { ebb0(v0: i32, v1: f32): - [Op1rcmp#39] v2 = ifcmp v0, v0 + [DynRexOp1rcmp#39] v2 = ifcmp v0, v0 [Op2fcmp#42e] v3 = ffcmp v1, v1 [Op2setf_abcd#490] v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3 [Op2seti_abcd#490] v5 = trueif ugt v2 @@ -25,8 +25,8 @@ function %overlap(i32, f32) -> i32 { ; CPU flags clobbered by arithmetic. function %clobbered(i32) -> i32 { ebb0(v0: i32): - [Op1rcmp#39] v1 = ifcmp v0, v0 - [Op1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1 + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 + [DynRexOp1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1 [Op2seti_abcd#490] v3 = trueif ugt v1 [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 [Op1ret#c3] return v4 @@ -35,7 +35,7 @@ function %clobbered(i32) -> i32 { ; CPU flags not clobbered by load. function %live_across_load(i32) -> i32 { ebb0(v0: i32): - [Op1rcmp#39] v1 = ifcmp v0, v0 + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 [Op1ld#8b] v2 = load.i32 v0 [Op2seti_abcd#490] v3 = trueif ugt v1 [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 @@ -45,7 +45,7 @@ function %live_across_load(i32) -> i32 { ; Correct use of CPU flags across EBB. function %live_across_ebb(i32) -> i32 { ebb0(v0: i32): - [Op1rcmp#39] v1 = ifcmp v0, v0 + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 [Op1jmpb#eb] jump ebb1 ebb1: [Op2seti_abcd#490] v2 = trueif ugt v1 @@ -61,14 +61,14 @@ function %live_across_ebb_backwards(i32) -> i32 { [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 [Op1ret#c3] return v3 ebb2: - [Op1rcmp#39] v1 = ifcmp v0, v0 + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 [Op1jmpb#eb] jump ebb1 } ; Flags live into loop. function %live_into_loop(i32) -> i32 { ebb0(v0: i32): - [Op1rcmp#39] v1 = ifcmp v0, v0 + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 [Op1jmpb#eb] jump ebb1 ebb1: [Op2seti_abcd#490] v2 = trueif ugt v1