Browse Source

Implement Spectre mitigations for table accesses and br_tables. (#4092)

Currently, we have partial Spectre mitigation: we protect heap accesses
with dynamic bounds checks. Specifically, we guard against errant
accesses on the misspeculated path beyond the bounds-check conditional
branch by adding a conditional move that is also dependent on the
bounds-check condition. This data dependency on the condition is not
speculated and thus will always pick the "safe" value (in the heap case,
a NULL address) on the misspeculated path, until the pipeline flushes
and recovers onto the correct path.

This PR uses the same technique both for table accesses -- used to
implement Wasm tables -- and for jumptables, used to implement Wasm
`br_table` instructions.

In the case of Wasm tables, the cmove picks the table base address on
the misspeculated path. This is equivalent to reading the first table
entry. This prevents loads of arbitrary data addresses on the
misspeculated path.

In the case of `br_table`, the cmove picks index 0 on the misspeculated
path. This is safer than allowing a branch to an address loaded from an
index under misspeculation (i.e., it preserves control-flow integrity
even under misspeculation).

The table mitigation is controlled by a Cranelift setting, on by
default. The br_table mitigation is always on, because it is part of the
single lowering pseudoinstruction. In both cases, the impact should be
minimal: a single extra cmove in a (relatively) rarely-used operation.

The table mitigation is architecture-independent (happens during
legalization); the br_table mitigation has been implemented for both x64
and aarch64. (I don't know enough about s390x to implement this
confidently there, but would happily review a PR to do the same on that
platform.)
pull/4105/head
Chris Fallin 3 years ago
committed by GitHub
parent
commit
61dc38c065
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 17
      cranelift/codegen/meta/src/shared/settings.rs
  2. 14
      cranelift/codegen/src/isa/aarch64/inst/emit.rs
  3. 77
      cranelift/codegen/src/isa/aarch64/mod.rs
  4. 10
      cranelift/codegen/src/isa/x64/inst/emit.rs
  5. 2
      cranelift/codegen/src/isa/x64/inst/mod.rs
  6. 27
      cranelift/codegen/src/isa/x64/lower.rs
  7. 93
      cranelift/codegen/src/isa/x64/mod.rs
  8. 2
      cranelift/codegen/src/legalizer/mod.rs
  9. 39
      cranelift/codegen/src/legalizer/table.rs
  10. 1
      cranelift/codegen/src/settings.rs
  11. 38
      cranelift/filetests/filetests/isa/x64/table.clif
  12. 1
      crates/wasmtime/src/engine.rs

17
cranelift/codegen/meta/src/shared/settings.rs

@ -305,5 +305,22 @@ pub(crate) fn define() -> SettingGroup {
true, true,
); );
settings.add_bool(
"enable_table_access_spectre_mitigation",
"Enable Spectre mitigation on table bounds checks.",
r#"
This option uses a conditional move to ensure that when a table
access index is bounds-checked and a conditional branch is used
for the out-of-bounds case, a misspeculation of that conditional
branch (falsely predicted in-bounds) will select an in-bounds
index to load on the speculative path.
This option is enabled by default because it is highly
recommended for secure sandboxing. The embedder should consider
the security implications carefully before disabling this option.
"#,
true,
);
settings.build() settings.build()
} }

14
cranelift/codegen/src/isa/aarch64/inst/emit.rs

@ -2876,6 +2876,7 @@ impl MachInstEmit for Inst {
CondBrKind::Cond(Cond::Hs), CondBrKind::Cond(Cond::Hs),
&mut AllocationConsumer::default(), &mut AllocationConsumer::default(),
); );
// No need to inform the sink's branch folding logic about this branch, because it // No need to inform the sink's branch folding logic about this branch, because it
// will not be merged with any other branch, flipped, or elided (it is not preceded // will not be merged with any other branch, flipped, or elided (it is not preceded
// or succeeded by any other branch). Just emit it with the label use. // or succeeded by any other branch). Just emit it with the label use.
@ -2885,10 +2886,17 @@ impl MachInstEmit for Inst {
} }
sink.put4(br); sink.put4(br);
// Save index in a tmp (the live range of ridx only goes to start of this // Overwrite the index with a zero when the above
// sequence; rtmp1 or rtmp2 may overwrite it). // branch misspeculates (Spectre mitigation). Save the
let inst = Inst::gen_move(rtmp2, ridx, I64); // resulting index in rtmp2.
let inst = Inst::CSel {
rd: rtmp2,
cond: Cond::Hs,
rn: zero_reg(),
rm: ridx,
};
inst.emit(&[], sink, emit_info, state); inst.emit(&[], sink, emit_info, state);
// Load address of jump table // Load address of jump table
let inst = Inst::Adr { rd: rtmp1, off: 16 }; let inst = Inst::Adr { rd: rtmp1, off: 16 };
inst.emit(&[], sink, emit_info, state); inst.emit(&[], sink, emit_info, state);

77
cranelift/codegen/src/isa/aarch64/mod.rs

@ -181,7 +181,7 @@ mod test {
use super::*; use super::*;
use crate::cursor::{Cursor, FuncCursor}; use crate::cursor::{Cursor, FuncCursor};
use crate::ir::types::*; use crate::ir::types::*;
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature}; use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, JumpTableData, Signature};
use crate::isa::CallConv; use crate::isa::CallConv;
use crate::settings; use crate::settings;
use crate::settings::Configurable; use crate::settings::Configurable;
@ -294,4 +294,79 @@ mod test {
assert_eq!(code, &golden[..]); assert_eq!(code, &golden[..]);
} }
#[test]
fn test_br_table() {
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
sig.returns.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let bb0 = func.dfg.make_block();
let arg0 = func.dfg.append_block_param(bb0, I32);
let bb1 = func.dfg.make_block();
let bb2 = func.dfg.make_block();
let bb3 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(bb0);
let mut jt_data = JumpTableData::new();
jt_data.push_entry(bb1);
jt_data.push_entry(bb2);
let jt = pos.func.create_jump_table(jt_data);
pos.ins().br_table(arg0, bb3, jt);
pos.insert_block(bb1);
let v1 = pos.ins().iconst(I32, 1);
pos.ins().return_(&[v1]);
pos.insert_block(bb2);
let v2 = pos.ins().iconst(I32, 2);
pos.ins().return_(&[v2]);
pos.insert_block(bb3);
let v3 = pos.ins().iconst(I32, 3);
pos.ins().return_(&[v3]);
let mut shared_flags_builder = settings::builder();
shared_flags_builder.set("opt_level", "none").unwrap();
shared_flags_builder.set("enable_verifier", "true").unwrap();
let shared_flags = settings::Flags::new(shared_flags_builder);
let isa_flags = aarch64_settings::Flags::new(&shared_flags, aarch64_settings::builder());
let backend = AArch64Backend::new_with_flags(
Triple::from_str("aarch64").unwrap(),
shared_flags,
isa_flags,
);
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
.unwrap();
let code = result.buffer.data();
// 0: 7100081f cmp w0, #0x2
// 4: 54000102 b.cs 0x24 // b.hs, b.nlast
// 8: 9a8023e9 csel x9, xzr, x0, cs // cs = hs, nlast
// c: 10000088 adr x8, 0x1c
// 10: b8a95909 ldrsw x9, [x8, w9, uxtw #2]
// 14: 8b090108 add x8, x8, x9
// 18: d61f0100 br x8
// 1c: 00000010 udf #16
// 20: 00000018 udf #24
// 24: d2800060 mov x0, #0x3 // #3
// 28: d65f03c0 ret
// 2c: d2800020 mov x0, #0x1 // #1
// 30: d65f03c0 ret
// 34: d2800040 mov x0, #0x2 // #2
// 38: d65f03c0 ret
let golden = vec![
31, 8, 0, 113, 2, 1, 0, 84, 233, 35, 128, 154, 136, 0, 0, 16, 9, 89, 169, 184, 8, 1, 9,
139, 0, 1, 31, 214, 16, 0, 0, 0, 24, 0, 0, 0, 96, 0, 128, 210, 192, 3, 95, 214, 32, 0,
128, 210, 192, 3, 95, 214, 64, 0, 128, 210, 192, 3, 95, 214,
];
assert_eq!(code, &golden[..]);
}
} }

10
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -1399,6 +1399,7 @@ pub(crate) fn emit(
// ;; generated by lowering: cmp #jmp_table_size, %idx // ;; generated by lowering: cmp #jmp_table_size, %idx
// jnb $default_target // jnb $default_target
// movl %idx, %tmp2 // movl %idx, %tmp2
// cmovnb %tmp1, %tmp2 ;; Spectre mitigation; we require tmp1 to be zero on entry.
// lea start_of_jump_table_offset(%rip), %tmp1 // lea start_of_jump_table_offset(%rip), %tmp1
// movslq [%tmp1, %tmp2, 4], %tmp2 ;; shift of 2, viz. multiply index by 4 // movslq [%tmp1, %tmp2, 4], %tmp2 ;; shift of 2, viz. multiply index by 4
// addq %tmp2, %tmp1 // addq %tmp2, %tmp1
@ -1411,6 +1412,15 @@ pub(crate) fn emit(
let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(idx), tmp2); let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(idx), tmp2);
inst.emit(&[], sink, info, state); inst.emit(&[], sink, info, state);
// Spectre mitigation: CMOV to zero the index if the out-of-bounds branch above misspeculated.
let inst = Inst::cmove(
OperandSize::Size64,
CC::NB,
RegMem::reg(tmp1.to_reg()),
tmp2,
);
inst.emit(&[], sink, info, state);
// Load base address of jump table. // Load base address of jump table.
let start_of_jumptable = sink.get_label(); let start_of_jumptable = sink.get_label();
let inst = Inst::lea(Amode::rip_relative(start_of_jumptable), tmp1); let inst = Inst::lea(Amode::rip_relative(start_of_jumptable), tmp1);

2
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -2015,7 +2015,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
.. ..
} => { } => {
collector.reg_use(*idx); collector.reg_use(*idx);
collector.reg_early_def(*tmp1); collector.reg_mod(*tmp1);
collector.reg_early_def(*tmp2); collector.reg_early_def(*tmp2);
} }

27
cranelift/codegen/src/isa/x64/lower.rs

@ -3411,16 +3411,6 @@ impl LowerBackend for X64Backend {
ext_spec, ext_spec,
); );
// Bounds-check (compute flags from idx - jt_size) and branch to default.
// We only support u32::MAX entries, but we compare the full 64 bit register
// when doing the bounds check.
let cmp_size = if ty == types::I64 {
OperandSize::Size64
} else {
OperandSize::Size32
};
ctx.emit(Inst::cmp_rmi_r(cmp_size, RegMemImm::imm(jt_size), idx));
// Emit the compound instruction that does: // Emit the compound instruction that does:
// //
// lea $jt, %rA // lea $jt, %rA
@ -3443,6 +3433,23 @@ impl LowerBackend for X64Backend {
// Cranelift type is thus unused). // Cranelift type is thus unused).
let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
// Put a zero in tmp1. This is needed for Spectre
// mitigations (a CMOV that zeroes the index on
// misspeculation).
let inst = Inst::imm(OperandSize::Size64, 0, tmp1);
ctx.emit(inst);
// Bounds-check (compute flags from idx - jt_size)
// and branch to default. We only support
// u32::MAX entries, but we compare the full 64
// bit register when doing the bounds check.
let cmp_size = if ty == types::I64 {
OperandSize::Size64
} else {
OperandSize::Size32
};
ctx.emit(Inst::cmp_rmi_r(cmp_size, RegMemImm::imm(jt_size), idx));
let targets_for_term: Vec<MachLabel> = targets.to_vec(); let targets_for_term: Vec<MachLabel> = targets.to_vec();
let default_target = targets[0]; let default_target = targets[0];

93
cranelift/codegen/src/isa/x64/mod.rs

@ -203,7 +203,7 @@ mod test {
use super::*; use super::*;
use crate::cursor::{Cursor, FuncCursor}; use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{types::*, SourceLoc, ValueLabel, ValueLabelStart}; use crate::ir::{types::*, SourceLoc, ValueLabel, ValueLabelStart};
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature}; use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, JumpTableData, Signature};
use crate::isa::CallConv; use crate::isa::CallConv;
use crate::settings; use crate::settings;
use crate::settings::Configurable; use crate::settings::Configurable;
@ -366,4 +366,95 @@ mod test {
Err(CodegenError::Unsupported(_)), Err(CodegenError::Unsupported(_)),
)); ));
} }
// Check that br_table lowers properly. We can't test this with an
// ordinary compile-test because the br_table pseudoinstruction
// expands during emission.
#[test]
fn br_table() {
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
sig.returns.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let bb0 = func.dfg.make_block();
let arg0 = func.dfg.append_block_param(bb0, I32);
let bb1 = func.dfg.make_block();
let bb2 = func.dfg.make_block();
let bb3 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(bb0);
let mut jt_data = JumpTableData::new();
jt_data.push_entry(bb1);
jt_data.push_entry(bb2);
let jt = pos.func.create_jump_table(jt_data);
pos.ins().br_table(arg0, bb3, jt);
pos.insert_block(bb1);
let v1 = pos.ins().iconst(I32, 1);
pos.ins().return_(&[v1]);
pos.insert_block(bb2);
let v2 = pos.ins().iconst(I32, 2);
pos.ins().return_(&[v2]);
pos.insert_block(bb3);
let v3 = pos.ins().iconst(I32, 3);
pos.ins().return_(&[v3]);
let mut shared_flags_builder = settings::builder();
shared_flags_builder.set("opt_level", "none").unwrap();
shared_flags_builder.set("enable_verifier", "true").unwrap();
let shared_flags = settings::Flags::new(shared_flags_builder);
let isa_flags = x64_settings::Flags::new(&shared_flags, x64_settings::builder());
let backend = X64Backend::new_with_flags(
Triple::from_str("x86_64").unwrap(),
shared_flags,
isa_flags,
);
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
.unwrap();
let code = result.buffer.data();
// 00000000 55 push rbp
// 00000001 4889E5 mov rbp,rsp
// 00000004 41B900000000 mov r9d,0x0
// 0000000A 83FF02 cmp edi,byte +0x2
// 0000000D 0F8320000000 jnc near 0x33
// 00000013 8BC7 mov eax,edi
// 00000015 490F43C1 cmovnc rax,r9
// 00000019 4C8D0D0B000000 lea r9,[rel 0x2b]
// 00000020 4963448100 movsxd rax,dword [r9+rax*4+0x0]
// 00000025 4901C1 add r9,rax
// 00000028 41FFE1 jmp r9
// 0000002B 1200 adc al,[rax]
// 0000002D 0000 add [rax],al
// 0000002F 1C00 sbb al,0x0
// 00000031 0000 add [rax],al
// 00000033 B803000000 mov eax,0x3
// 00000038 4889EC mov rsp,rbp
// 0000003B 5D pop rbp
// 0000003C C3 ret
// 0000003D B801000000 mov eax,0x1
// 00000042 4889EC mov rsp,rbp
// 00000045 5D pop rbp
// 00000046 C3 ret
// 00000047 B802000000 mov eax,0x2
// 0000004C 4889EC mov rsp,rbp
// 0000004F 5D pop rbp
// 00000050 C3 ret
let golden = vec![
85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 199, 73,
15, 67, 193, 76, 141, 13, 11, 0, 0, 0, 73, 99, 68, 129, 0, 73, 1, 193, 65, 255, 225,
18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, 72,
137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195,
];
assert_eq!(code, &golden[..]);
}
} }

2
cranelift/codegen/src/legalizer/mod.rs

@ -120,7 +120,7 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa:
table, table,
arg, arg,
offset, offset,
} => expand_table_addr(inst, &mut pos.func, table, arg, offset), } => expand_table_addr(isa, inst, &mut pos.func, table, arg, offset),
// bitops // bitops
InstructionData::BinaryImm64 { InstructionData::BinaryImm64 {

39
cranelift/codegen/src/legalizer/table.rs

@ -7,9 +7,11 @@ use crate::cursor::{Cursor, FuncCursor};
use crate::ir::condcodes::IntCC; use crate::ir::condcodes::IntCC;
use crate::ir::immediates::Offset32; use crate::ir::immediates::Offset32;
use crate::ir::{self, InstBuilder}; use crate::ir::{self, InstBuilder};
use crate::isa::TargetIsa;
/// Expand a `table_addr` instruction according to the definition of the table. /// Expand a `table_addr` instruction according to the definition of the table.
pub fn expand_table_addr( pub fn expand_table_addr(
isa: &dyn TargetIsa,
inst: ir::Inst, inst: ir::Inst,
func: &mut ir::Function, func: &mut ir::Function,
table: ir::Table, table: ir::Table,
@ -31,6 +33,15 @@ pub fn expand_table_addr(
.icmp(IntCC::UnsignedGreaterThanOrEqual, index, bound); .icmp(IntCC::UnsignedGreaterThanOrEqual, index, bound);
pos.ins().trapnz(oob, ir::TrapCode::TableOutOfBounds); pos.ins().trapnz(oob, ir::TrapCode::TableOutOfBounds);
// If Spectre mitigations are enabled, we will use a comparison to
// short-circuit the computed table element address to the start
// of the table on the misspeculation path when out-of-bounds.
let spectre_oob_cmp = if isa.flags().enable_table_access_spectre_mitigation() {
Some((index, bound))
} else {
None
};
compute_addr( compute_addr(
inst, inst,
table, table,
@ -39,6 +50,7 @@ pub fn expand_table_addr(
index_ty, index_ty,
element_offset, element_offset,
pos.func, pos.func,
spectre_oob_cmp,
); );
} }
@ -51,6 +63,7 @@ fn compute_addr(
index_ty: ir::Type, index_ty: ir::Type,
element_offset: Offset32, element_offset: Offset32,
func: &mut ir::Function, func: &mut ir::Function,
spectre_oob_cmp: Option<(ir::Value, ir::Value)>,
) { ) {
let mut pos = FuncCursor::new(func).at_inst(inst); let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst); pos.use_srcloc(inst);
@ -77,11 +90,29 @@ fn compute_addr(
offset = pos.ins().imul_imm(index, element_size as i64); offset = pos.ins().imul_imm(index, element_size as i64);
} }
if element_offset == Offset32::new(0) { let element_addr = if element_offset == Offset32::new(0) {
pos.func.dfg.replace(inst).iadd(base, offset); pos.ins().iadd(base, offset)
} else { } else {
let imm: i64 = element_offset.into(); let imm: i64 = element_offset.into();
offset = pos.ins().iadd(base, offset); offset = pos.ins().iadd(base, offset);
pos.func.dfg.replace(inst).iadd_imm(offset, imm); pos.ins().iadd_imm(offset, imm)
} };
let element_addr = if let Some((index, bound)) = spectre_oob_cmp {
let flags = pos.ins().ifcmp(index, bound);
// If out-of-bounds, choose the table base on the misspeculation path.
pos.ins().selectif_spectre_guard(
addr_ty,
IntCC::UnsignedGreaterThanOrEqual,
flags,
base,
element_addr,
)
} else {
element_addr
};
let new_inst = pos.func.dfg.value_def(element_addr).inst().unwrap();
pos.func.dfg.replace_with_aliases(inst, new_inst);
pos.remove_inst();
} }

1
cranelift/codegen/src/settings.rs

@ -535,6 +535,7 @@ enable_probestack = true
probestack_func_adjusts_sp = false probestack_func_adjusts_sp = false
enable_jump_tables = true enable_jump_tables = true
enable_heap_access_spectre_mitigation = true enable_heap_access_spectre_mitigation = true
enable_table_access_spectre_mitigation = true
"# "#
); );
assert_eq!(f.opt_level(), super::OptLevel::None); assert_eq!(f.opt_level(), super::OptLevel::None);

38
cranelift/filetests/filetests/isa/x64/table.clif

@ -0,0 +1,38 @@
test compile precise-output
set enable_safepoints=true
set enable_table_access_spectre_mitigation=true
target x86_64
function %table_set(i32, r64, i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0
gv2 = load.i32 notrap aligned gv0 +8
table0 = dynamic gv1, element_size 1, bound gv2, index_type i32
block0(v0: i32, v1: r64, v2: i64):
v3 = table_addr.i64 table0, v0, +0
store.r64 notrap aligned v1, v3
return
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl 8(%rdx), %eax
; cmpl %eax, %edi
; jb label1; j label2
; block1:
; movl %edi, %r9d
; movq 0(%rdx), %rdx
; movq %rdx, %r8
; addq %r8, %r9, %r8
; cmpl %eax, %edi
; cmovnbq %rdx, %r8, %r8
; movq %rsi, 0(%r8)
; movq %rbp, %rsp
; popq %rbp
; ret
; block2:
; ud2 table_oob

1
crates/wasmtime/src/engine.rs

@ -331,6 +331,7 @@ impl Engine {
// the module itself, so their configuration values shouldn't // the module itself, so their configuration values shouldn't
// matter. // matter.
"enable_heap_access_spectre_mitigation" "enable_heap_access_spectre_mitigation"
| "enable_table_access_spectre_mitigation"
| "enable_nan_canonicalization" | "enable_nan_canonicalization"
| "enable_jump_tables" | "enable_jump_tables"
| "enable_float" | "enable_float"

Loading…
Cancel
Save