Implement Spectre mitigations for table accesses and br_tables. (#4092)

Currently, we have partial Spectre mitigation: we protect heap accesses with dynamic bounds checks. Specifically, we guard against errant accesses on the misspeculated path beyond the bounds-check conditional branch by adding a conditional move that is also dependent on the bounds-check condition. This data dependency on the condition is not speculated and thus will always pick the "safe" value (in the heap case, a NULL address) on the misspeculated path, until the pipeline flushes and recovers onto the correct path. This PR uses the same technique both for table accesses -- used to implement Wasm tables -- and for jumptables, used to implement Wasm `br_table` instructions. In the case of Wasm tables, the cmove picks the table base address on the misspeculated path. This is equivalent to reading the first table entry. This prevents loads of arbitrary data addresses on the misspeculated path. In the case of `br_table`, the cmove picks index 0 on the misspeculated path. This is safer than allowing a branch to an address loaded from an index under misspeculation (i.e., it preserves control-flow integrity even under misspeculation). The table mitigation is controlled by a Cranelift setting, on by default. The br_table mitigation is always on, because it is part of the single lowering pseudoinstruction. In both cases, the impact should be minimal: a single extra cmove in a (relatively) rarely-used operation. The table mitigation is architecture-independent (happens during legalization); the br_table mitigation has been implemented for both x64 and aarch64. (I don't know enough about s390x to implement this confidently there, but would happily review a PR to do the same on that platform.)
3 years ago · 61dc38c065
12 changed files with 300 additions and 21 deletions
--- a/cranelift/codegen/meta/src/shared/settings.rs
+++ b/cranelift/codegen/meta/src/shared/settings.rs
@ -305,5 +305,22 @@ pub(crate) fn define() -> SettingGroup {
        true,
    );
    settings.add_bool(
        "enable_table_access_spectre_mitigation",
        "Enable Spectre mitigation on table bounds checks.",
        r#"
            This option uses a conditional move to ensure that when a table
            access index is bounds-checked and a conditional branch is used
            for the out-of-bounds case, a misspeculation of that conditional
            branch (falsely predicted in-bounds) will select an in-bounds
            index to load on the speculative path.
            This option is enabled by default because it is highly
            recommended for secure sandboxing. The embedder should consider
            the security implications carefully before disabling this option.
        "#,
        true,
    );
    settings.build()
 }
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@ -2876,6 +2876,7 @@ impl MachInstEmit for Inst {
                    CondBrKind::Cond(Cond::Hs),
                    &mut AllocationConsumer::default(),
                );
                // No need to inform the sink's branch folding logic about this branch, because it
                // will not be merged with any other branch, flipped, or elided (it is not preceded
                // or succeeded by any other branch). Just emit it with the label use.
@ -2885,10 +2886,17 @@ impl MachInstEmit for Inst {
                }
                sink.put4(br);
-                // Save index in a tmp (the live range of ridx only goes to start of this
+                // Overwrite the index with a zero when the above
-                // sequence; rtmp1 or rtmp2 may overwrite it).
+                // branch misspeculates (Spectre mitigation). Save the
-                let inst = Inst::gen_move(rtmp2, ridx, I64);
+                // resulting index in rtmp2.
                let inst = Inst::CSel {
                    rd: rtmp2,
                    cond: Cond::Hs,
                    rn: zero_reg(),
                    rm: ridx,
                };
                inst.emit(&[], sink, emit_info, state);
                // Load address of jump table
                let inst = Inst::Adr { rd: rtmp1, off: 16 };
                inst.emit(&[], sink, emit_info, state);
--- a/cranelift/codegen/src/isa/aarch64/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/mod.rs
@ -181,7 +181,7 @@ mod test {
    use super::*;
    use crate::cursor::{Cursor, FuncCursor};
    use crate::ir::types::*;
-    use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
+    use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, JumpTableData, Signature};
    use crate::isa::CallConv;
    use crate::settings;
    use crate::settings::Configurable;
@ -294,4 +294,79 @@ mod test {
        assert_eq!(code, &golden[..]);
    }
    #[test]
    fn test_br_table() {
        let name = ExternalName::testcase("test0");
        let mut sig = Signature::new(CallConv::SystemV);
        sig.params.push(AbiParam::new(I32));
        sig.returns.push(AbiParam::new(I32));
        let mut func = Function::with_name_signature(name, sig);
        let bb0 = func.dfg.make_block();
        let arg0 = func.dfg.append_block_param(bb0, I32);
        let bb1 = func.dfg.make_block();
        let bb2 = func.dfg.make_block();
        let bb3 = func.dfg.make_block();
        let mut pos = FuncCursor::new(&mut func);
        pos.insert_block(bb0);
        let mut jt_data = JumpTableData::new();
        jt_data.push_entry(bb1);
        jt_data.push_entry(bb2);
        let jt = pos.func.create_jump_table(jt_data);
        pos.ins().br_table(arg0, bb3, jt);
        pos.insert_block(bb1);
        let v1 = pos.ins().iconst(I32, 1);
        pos.ins().return_(&[v1]);
        pos.insert_block(bb2);
        let v2 = pos.ins().iconst(I32, 2);
        pos.ins().return_(&[v2]);
        pos.insert_block(bb3);
        let v3 = pos.ins().iconst(I32, 3);
        pos.ins().return_(&[v3]);
        let mut shared_flags_builder = settings::builder();
        shared_flags_builder.set("opt_level", "none").unwrap();
        shared_flags_builder.set("enable_verifier", "true").unwrap();
        let shared_flags = settings::Flags::new(shared_flags_builder);
        let isa_flags = aarch64_settings::Flags::new(&shared_flags, aarch64_settings::builder());
        let backend = AArch64Backend::new_with_flags(
            Triple::from_str("aarch64").unwrap(),
            shared_flags,
            isa_flags,
        );
        let result = backend
            .compile_function(&mut func, /* want_disasm = */ false)
            .unwrap();
        let code = result.buffer.data();
        //   0:   7100081f        cmp     w0, #0x2
        //   4:   54000102        b.cs    0x24  // b.hs, b.nlast
        //   8:   9a8023e9        csel    x9, xzr, x0, cs  // cs = hs, nlast
        //   c:   10000088        adr     x8, 0x1c
        //  10:   b8a95909        ldrsw   x9, [x8, w9, uxtw #2]
        //  14:   8b090108        add     x8, x8, x9
        //  18:   d61f0100        br      x8
        //  1c:   00000010        udf     #16
        //  20:   00000018        udf     #24
        //  24:   d2800060        mov     x0, #0x3                        // #3
        //  28:   d65f03c0        ret
        //  2c:   d2800020        mov     x0, #0x1                        // #1
        //  30:   d65f03c0        ret
        //  34:   d2800040        mov     x0, #0x2                        // #2
        //  38:   d65f03c0        ret
        let golden = vec![
            31, 8, 0, 113, 2, 1, 0, 84, 233, 35, 128, 154, 136, 0, 0, 16, 9, 89, 169, 184, 8, 1, 9,
            139, 0, 1, 31, 214, 16, 0, 0, 0, 24, 0, 0, 0, 96, 0, 128, 210, 192, 3, 95, 214, 32, 0,
            128, 210, 192, 3, 95, 214, 64, 0, 128, 210, 192, 3, 95, 214,
        ];
        assert_eq!(code, &golden[..]);
    }
 }
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@ -1399,6 +1399,7 @@ pub(crate) fn emit(
            // ;; generated by lowering: cmp #jmp_table_size, %idx
            // jnb $default_target
            // movl %idx, %tmp2
            // cmovnb %tmp1, %tmp2 ;; Spectre mitigation; we require tmp1 to be zero on entry.
            // lea start_of_jump_table_offset(%rip), %tmp1
            // movslq [%tmp1, %tmp2, 4], %tmp2 ;; shift of 2, viz. multiply index by 4
            // addq %tmp2, %tmp1
@ -1411,6 +1412,15 @@ pub(crate) fn emit(
            let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(idx), tmp2);
            inst.emit(&[], sink, info, state);
            // Spectre mitigation: CMOV to zero the index if the out-of-bounds branch above misspeculated.
            let inst = Inst::cmove(
                OperandSize::Size64,
                CC::NB,
                RegMem::reg(tmp1.to_reg()),
                tmp2,
            );
            inst.emit(&[], sink, info, state);
            // Load base address of jump table.
            let start_of_jumptable = sink.get_label();
            let inst = Inst::lea(Amode::rip_relative(start_of_jumptable), tmp1);
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@ -2015,7 +2015,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
            ..
        } => {
            collector.reg_use(*idx);
-            collector.reg_early_def(*tmp1);
+            collector.reg_mod(*tmp1);
            collector.reg_early_def(*tmp2);
        }
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@ -3411,16 +3411,6 @@ impl LowerBackend for X64Backend {
                        ext_spec,
                    );
                    // Bounds-check (compute flags from idx - jt_size) and branch to default.
                    // We only support u32::MAX entries, but we compare the full 64 bit register
                    // when doing the bounds check.
                    let cmp_size = if ty == types::I64 {
                        OperandSize::Size64
                    } else {
                        OperandSize::Size32
                    };
                    ctx.emit(Inst::cmp_rmi_r(cmp_size, RegMemImm::imm(jt_size), idx));
                    // Emit the compound instruction that does:
                    //
                    // lea $jt, %rA
@ -3443,6 +3433,23 @@ impl LowerBackend for X64Backend {
                    // Cranelift type is thus unused).
                    let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
                    // Put a zero in tmp1. This is needed for Spectre
                    // mitigations (a CMOV that zeroes the index on
                    // misspeculation).
                    let inst = Inst::imm(OperandSize::Size64, 0, tmp1);
                    ctx.emit(inst);
                    // Bounds-check (compute flags from idx - jt_size)
                    // and branch to default.  We only support
                    // u32::MAX entries, but we compare the full 64
                    // bit register when doing the bounds check.
                    let cmp_size = if ty == types::I64 {
                        OperandSize::Size64
                    } else {
                        OperandSize::Size32
                    };
                    ctx.emit(Inst::cmp_rmi_r(cmp_size, RegMemImm::imm(jt_size), idx));
                    let targets_for_term: Vec<MachLabel> = targets.to_vec();
                    let default_target = targets[0];
--- a/cranelift/codegen/src/isa/x64/mod.rs
+++ b/cranelift/codegen/src/isa/x64/mod.rs
@ -203,7 +203,7 @@ mod test {
    use super::*;
    use crate::cursor::{Cursor, FuncCursor};
    use crate::ir::{types::*, SourceLoc, ValueLabel, ValueLabelStart};
-    use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
+    use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, JumpTableData, Signature};
    use crate::isa::CallConv;
    use crate::settings;
    use crate::settings::Configurable;
@ -366,4 +366,95 @@ mod test {
            Err(CodegenError::Unsupported(_)),
        ));
    }
    // Check that br_table lowers properly. We can't test this with an
    // ordinary compile-test because the br_table pseudoinstruction
    // expands during emission.
    #[test]
    fn br_table() {
        let name = ExternalName::testcase("test0");
        let mut sig = Signature::new(CallConv::SystemV);
        sig.params.push(AbiParam::new(I32));
        sig.returns.push(AbiParam::new(I32));
        let mut func = Function::with_name_signature(name, sig);
        let bb0 = func.dfg.make_block();
        let arg0 = func.dfg.append_block_param(bb0, I32);
        let bb1 = func.dfg.make_block();
        let bb2 = func.dfg.make_block();
        let bb3 = func.dfg.make_block();
        let mut pos = FuncCursor::new(&mut func);
        pos.insert_block(bb0);
        let mut jt_data = JumpTableData::new();
        jt_data.push_entry(bb1);
        jt_data.push_entry(bb2);
        let jt = pos.func.create_jump_table(jt_data);
        pos.ins().br_table(arg0, bb3, jt);
        pos.insert_block(bb1);
        let v1 = pos.ins().iconst(I32, 1);
        pos.ins().return_(&[v1]);
        pos.insert_block(bb2);
        let v2 = pos.ins().iconst(I32, 2);
        pos.ins().return_(&[v2]);
        pos.insert_block(bb3);
        let v3 = pos.ins().iconst(I32, 3);
        pos.ins().return_(&[v3]);
        let mut shared_flags_builder = settings::builder();
        shared_flags_builder.set("opt_level", "none").unwrap();
        shared_flags_builder.set("enable_verifier", "true").unwrap();
        let shared_flags = settings::Flags::new(shared_flags_builder);
        let isa_flags = x64_settings::Flags::new(&shared_flags, x64_settings::builder());
        let backend = X64Backend::new_with_flags(
            Triple::from_str("x86_64").unwrap(),
            shared_flags,
            isa_flags,
        );
        let result = backend
            .compile_function(&mut func, /* want_disasm = */ false)
            .unwrap();
        let code = result.buffer.data();
        // 00000000  55                push rbp
        // 00000001  4889E5            mov rbp,rsp
        // 00000004  41B900000000      mov r9d,0x0
        // 0000000A  83FF02            cmp edi,byte +0x2
        // 0000000D  0F8320000000      jnc near 0x33
        // 00000013  8BC7              mov eax,edi
        // 00000015  490F43C1          cmovnc rax,r9
        // 00000019  4C8D0D0B000000    lea r9,[rel 0x2b]
        // 00000020  4963448100        movsxd rax,dword [r9+rax*4+0x0]
        // 00000025  4901C1            add r9,rax
        // 00000028  41FFE1            jmp r9
        // 0000002B  1200              adc al,[rax]
        // 0000002D  0000              add [rax],al
        // 0000002F  1C00              sbb al,0x0
        // 00000031  0000              add [rax],al
        // 00000033  B803000000        mov eax,0x3
        // 00000038  4889EC            mov rsp,rbp
        // 0000003B  5D                pop rbp
        // 0000003C  C3                ret
        // 0000003D  B801000000        mov eax,0x1
        // 00000042  4889EC            mov rsp,rbp
        // 00000045  5D                pop rbp
        // 00000046  C3                ret
        // 00000047  B802000000        mov eax,0x2
        // 0000004C  4889EC            mov rsp,rbp
        // 0000004F  5D                pop rbp
        // 00000050  C3                ret
        let golden = vec![
            85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 199, 73,
            15, 67, 193, 76, 141, 13, 11, 0, 0, 0, 73, 99, 68, 129, 0, 73, 1, 193, 65, 255, 225,
            18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, 72,
            137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195,
        ];
        assert_eq!(code, &golden[..]);
    }
 }
--- a/cranelift/codegen/src/legalizer/mod.rs
+++ b/cranelift/codegen/src/legalizer/mod.rs
@ -120,7 +120,7 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa:
                    table,
                    arg,
                    offset,
-                } => expand_table_addr(inst, &mut pos.func, table, arg, offset),
+                } => expand_table_addr(isa, inst, &mut pos.func, table, arg, offset),
                // bitops
                InstructionData::BinaryImm64 {
--- a/cranelift/codegen/src/legalizer/table.rs
+++ b/cranelift/codegen/src/legalizer/table.rs
@ -7,9 +7,11 @@ use crate::cursor::{Cursor, FuncCursor};
 use crate::ir::condcodes::IntCC;
 use crate::ir::immediates::Offset32;
 use crate::ir::{self, InstBuilder};
 use crate::isa::TargetIsa;
 /// Expand a `table_addr` instruction according to the definition of the table.
 pub fn expand_table_addr(
    isa: &dyn TargetIsa,
    inst: ir::Inst,
    func: &mut ir::Function,
    table: ir::Table,
@ -31,6 +33,15 @@ pub fn expand_table_addr(
        .icmp(IntCC::UnsignedGreaterThanOrEqual, index, bound);
    pos.ins().trapnz(oob, ir::TrapCode::TableOutOfBounds);
    // If Spectre mitigations are enabled, we will use a comparison to
    // short-circuit the computed table element address to the start
    // of the table on the misspeculation path when out-of-bounds.
    let spectre_oob_cmp = if isa.flags().enable_table_access_spectre_mitigation() {
        Some((index, bound))
    } else {
        None
    };
    compute_addr(
        inst,
        table,
@ -39,6 +50,7 @@ pub fn expand_table_addr(
        index_ty,
        element_offset,
        pos.func,
        spectre_oob_cmp,
    );
 }
@ -51,6 +63,7 @@ fn compute_addr(
    index_ty: ir::Type,
    element_offset: Offset32,
    func: &mut ir::Function,
    spectre_oob_cmp: Option<(ir::Value, ir::Value)>,
 ) {
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
@ -77,11 +90,29 @@ fn compute_addr(
        offset = pos.ins().imul_imm(index, element_size as i64);
    }
-    if element_offset == Offset32::new(0) {
+    let element_addr = if element_offset == Offset32::new(0) {
-        pos.func.dfg.replace(inst).iadd(base, offset);
+        pos.ins().iadd(base, offset)
    } else {
        let imm: i64 = element_offset.into();
        offset = pos.ins().iadd(base, offset);
-        pos.func.dfg.replace(inst).iadd_imm(offset, imm);
+        pos.ins().iadd_imm(offset, imm)
-    }
+    };
    let element_addr = if let Some((index, bound)) = spectre_oob_cmp {
        let flags = pos.ins().ifcmp(index, bound);
        // If out-of-bounds, choose the table base on the misspeculation path.
        pos.ins().selectif_spectre_guard(
            addr_ty,
            IntCC::UnsignedGreaterThanOrEqual,
            flags,
            base,
            element_addr,
        )
    } else {
        element_addr
    };
    let new_inst = pos.func.dfg.value_def(element_addr).inst().unwrap();
    pos.func.dfg.replace_with_aliases(inst, new_inst);
    pos.remove_inst();
 }
--- a/cranelift/codegen/src/settings.rs
+++ b/cranelift/codegen/src/settings.rs
@ -535,6 +535,7 @@ enable_probestack = true
 probestack_func_adjusts_sp = false
 enable_jump_tables = true
 enable_heap_access_spectre_mitigation = true
 enable_table_access_spectre_mitigation = true
 "#
        );
        assert_eq!(f.opt_level(), super::OptLevel::None);
--- a/cranelift/filetests/filetests/isa/x64/table.clif
+++ b/cranelift/filetests/filetests/isa/x64/table.clif
@ -0,0 +1,38 @@
 test compile precise-output
 set enable_safepoints=true
 set enable_table_access_spectre_mitigation=true
 target x86_64
 function %table_set(i32, r64, i64 vmctx) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0
    gv2 = load.i32 notrap aligned gv0 +8
    table0 = dynamic gv1, element_size 1, bound gv2, index_type i32
 block0(v0: i32, v1: r64, v2: i64):
    v3 = table_addr.i64 table0, v0, +0
    store.r64 notrap aligned v1, v3
    return
 }
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    8(%rdx), %eax
 ;   cmpl    %eax, %edi
 ;   jb      label1; j label2
 ; block1:
 ;   movl    %edi, %r9d
 ;   movq    0(%rdx), %rdx
 ;   movq    %rdx, %r8
 ;   addq    %r8, %r9, %r8
 ;   cmpl    %eax, %edi
 ;   cmovnbq %rdx, %r8, %r8
 ;   movq    %rsi, 0(%r8)
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
 ; block2:
 ;   ud2 table_oob
--- a/crates/wasmtime/src/engine.rs
+++ b/crates/wasmtime/src/engine.rs
@ -331,6 +331,7 @@ impl Engine {
            // the module itself, so their configuration values shouldn't
            // matter.
            "enable_heap_access_spectre_mitigation"
            | "enable_table_access_spectre_mitigation"
            | "enable_nan_canonicalization"
            | "enable_jump_tables"
            | "enable_float"