Add a verifier

The current implementation only performs a few basic checks.
8 years ago · de04d6d083
10 changed files with 287 additions and 3 deletions
--- a/src/libcretonne/Cargo.toml
+++ b/src/libcretonne/Cargo.toml
@ -11,3 +11,6 @@ build = "build.rs"
 [lib]
 name = "cretonne"
 path = "lib.rs"
+
+[dependencies]
+regex = "0.1.71"
--- a/src/libcretonne/ir/instructions.rs
+++ b/src/libcretonne/ir/instructions.rs
@ -349,6 +349,16 @@ impl InstructionData {
            _ => BranchInfo::NotABranch,
        }
    }
+
+    /// Return true if an instruction is terminating, or false otherwise.
+    pub fn is_terminating<'a>(&'a self) -> bool {
+        match self {
+            &InstructionData::Jump { .. } => true,
+            &InstructionData::Return { .. } => true,
+            &InstructionData::Nullary { .. } => true,
+            _ => false,
+        }
+    }
 }

 /// Information about branch and jump instructions.
--- a/src/libcretonne/ir/layout.rs
+++ b/src/libcretonne/ir/layout.rs
@ -55,7 +55,7 @@ impl Layout {
 /// can only be removed from the layout when it is empty.
 ///
 /// Since every EBB must end with a terminator instruction which cannot fall through, the layout of
-/// EBBs does not affect the semantics of the program.
+/// EBBs do not affect the semantics of the program.
 ///
 impl Layout {
    /// Is `ebb` currently part of the layout?
@ -188,6 +188,11 @@ impl Layout {
        ebb_node.last_inst = inst;
    }

+    /// Fetch an ebb's last instruction.
+    pub fn last_inst(&self, ebb: Ebb) -> Inst {
+        self.ebbs[ebb].last_inst
+    }
+
    /// Insert `inst` before the instruction `before` in the same EBB.
    pub fn insert_inst(&mut self, inst: Inst, before: Inst) {
        assert_eq!(self.inst_ebb(inst), None);
--- a/src/libcretonne/ir/mod.rs
+++ b/src/libcretonne/ir/mod.rs
@ -16,6 +16,6 @@ pub use ir::entities::{Ebb, Inst, Value, StackSlot, JumpTable};
 pub use ir::instructions::{Opcode, InstructionData};
 pub use ir::stackslot::StackSlotData;
 pub use ir::jumptable::JumpTableData;
-pub use ir::dfg::DataFlowGraph;
+pub use ir::dfg::{DataFlowGraph, ValueDef};
 pub use ir::layout::Layout;
 pub use ir::function::Function;
--- a/src/libcretonne/lib.rs
+++ b/src/libcretonne/lib.rs
@ -14,6 +14,7 @@ pub mod cfg;
 pub mod dominator_tree;
 pub mod entity_map;
 pub mod settings;
+pub mod verifier;

 mod constant_hash;
 mod predicates;
--- a/src/libcretonne/test_utils/make_inst.rs
+++ b/src/libcretonne/test_utils/make_inst.rs
@ -2,7 +2,7 @@

 use ir::{Function, Ebb, Inst, Opcode};
 use ir::entities::NO_VALUE;
-use ir::instructions::{InstructionData, VariableArgs, JumpData, BranchData};
+use ir::instructions::{InstructionData, ReturnData, VariableArgs, JumpData, BranchData};
 use ir::types;

 pub fn jump(func: &mut Function, dest: Ebb) -> Inst {
@ -27,3 +27,11 @@ pub fn branch(func: &mut Function, dest: Ebb) -> Inst {
        }),
    })
 }
+
+pub fn ret(func: &mut Function) -> Inst {
+    func.dfg.make_inst(InstructionData::Return {
+        opcode: Opcode::Return,
+        ty: types::VOID,
+        data: Box::new(ReturnData { args: VariableArgs::new() }),
+    })
+}
--- a/src/libcretonne/verifier.rs
+++ b/src/libcretonne/verifier.rs
@ -0,0 +1,171 @@
+//! A verifier for ensuring that functions are well formed.
+//! It verifies:
+//!
+//!   EBB integrity
+//!
+//!    - All instructions reached from the ebb_insts iterator must belong to
+//!      the EBB as reported by inst_ebb().
+//!    - Every EBB must end in a terminator instruction, and no other instruction
+//!      can be a terminator.
+//!    - Every value in the ebb_args iterator belongs to the EBB as reported by value_ebb.
+//!
+//!   Instruction integrity
+//!
+//!    - The instruction format must match the opcode.
+//! TODO:
+//!    - All result values must be created for multi-valued instructions.
+//!    - Instructions with no results must have a VOID first_type().
+//!    - All referenced entities must exist. (Values, EBBs, stack slots, ...)
+//!
+//!   SSA form
+//!
+//!    - Values must be defined by an instruction that exists and that is inserted in
+//!      an EBB, or be an argument of an existing EBB.
+//!    - Values used by an instruction must dominate the instruction.
+//!     Control flow graph and dominator tree integrity:
+//!
+//!    - All predecessors in the CFG must be branches to the EBB.
+//!    - All branches to an EBB must be present in the CFG.
+//!    - A recomputed dominator tree is identical to the existing one.
+//!
+//!   Type checking
+//!
+//!    - Compare input and output values against the opcode's type constraints.
+//!      For polymorphic opcodes, determine the controlling type variable first.
+//!    - Branches and jumps must pass arguments to destination EBBs that match the
+//!      expected types excatly. The number of arguments must match.
+//!    - All EBBs in a jump_table must take no arguments.
+//!    - Function calls are type checked against their signature.
+//!    - The entry block must take arguments that match the signature of the current
+//!      function.
+//!    - All return instructions must have return value operands matching the current
+//!      function signature.
+//!
+//!   Ad hoc checking
+//!
+//!    - Stack slot loads and stores must be in-bounds.
+//!    - Immediate constraints for certain opcodes, like udiv_imm v3, 0.
+//!    - Extend / truncate instructions have more type constraints: Source type can't be
+//!      larger / smaller than result type.
+//!    - Insertlane and extractlane instructions have immediate lane numbers that must be in
+//!      range for their polymorphic type.
+//!    - Swizzle and shuffle instructions take a variable number of lane arguments. The number
+//!      of arguments must match the destination type, and the lane indexes must be in range.
+
+use ir::{Function, ValueDef, Ebb, Inst};
+use ir::instructions::InstructionFormat;
+
+pub struct Verifier<'a> {
+    func: &'a Function,
+}
+
+impl<'a> Verifier<'a> {
+    pub fn new(func: &'a Function) -> Verifier {
+        Verifier { func: func }
+    }
+
+    fn ebb_integrity(&self, ebb: Ebb, inst: Inst) -> Result<(), String> {
+
+        let is_terminator = self.func.dfg[inst].is_terminating();
+        let is_last_inst = self.func.layout.last_inst(ebb) == inst;
+
+        if is_terminator && !is_last_inst {
+            // Terminating instructions only occur at the end of blocks.
+            return Err(format!("A terminating instruction was encountered before the \
+                                end of ebb {:?}!",
+                               ebb));
+        }
+        if is_last_inst && !is_terminator {
+            return Err(format!("Block {:?} does not end in a terminating instruction!", ebb));
+        }
+
+        // Instructions belong to the correct ebb.
+        let inst_ebb = self.func.layout.inst_ebb(inst);
+        if inst_ebb != Some(ebb) {
+            return Err(format!("{:?} should belong to {:?} not {:?}", inst, ebb, inst_ebb));
+        }
+
+        // Arguments belong to the correct ebb.
+        for arg in self.func.dfg.ebb_args(ebb) {
+            match self.func.dfg.value_def(arg) {
+                ValueDef::Arg(arg_ebb, _) => {
+                    if ebb != arg_ebb {
+                        return Err(format!("{:?} does not belong to {:?}", arg, ebb));
+                    }
+                }
+                _ => {
+                    return Err("Expected an argument, found a result!".to_string());
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn instruction_integrity(&self, inst: Inst) -> Result<(), String> {
+        let inst_data = &self.func.dfg[inst];
+
+        // The instruction format matches the opcode
+        if inst_data.opcode().format() != Some(InstructionFormat::from(inst_data)) {
+            return Err("Instruction opcode doesn't match instruction format!".to_string());
+        }
+
+        Ok(())
+    }
+
+    pub fn run(&self) -> Result<(), String> {
+        for ebb in self.func.layout.ebbs() {
+            for inst in self.func.layout.ebb_insts(ebb) {
+                try!(self.ebb_integrity(ebb, inst));
+                try!(self.instruction_integrity(inst));
+            }
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    extern crate regex;
+
+    use super::*;
+    use ir::Function;
+    use ir::instructions::{InstructionData, Opcode};
+    use ir::types;
+    use self::regex::Regex;
+
+    macro_rules! assert_err_with_msg {
+        ($e:expr, $msg:expr) => (
+            let err_re = Regex::new($msg).unwrap();
+            match $e {
+                Ok(_) => { panic!("Expected an error!") },
+                Err(err_msg) => {
+                    if !err_re.is_match(&err_msg) {
+                       panic!(format!("'{}' did not contain the pattern '{}'", err_msg, $msg));
+                    }
+                }
+            }
+        )
+    }
+
+    #[test]
+    fn empty() {
+        let func = Function::new();
+        let verifier = Verifier::new(&func);
+        assert_eq!(verifier.run(), Ok(()));
+    }
+
+    #[test]
+    fn bad_instruction_format() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        func.layout.append_ebb(ebb0);
+        let nullary_with_bad_opcode = func.dfg.make_inst(InstructionData::Nullary {
+            opcode: Opcode::Jump,
+            ty: types::VOID,
+        });
+        func.layout.append_inst(nullary_with_bad_opcode, ebb0);
+        let verifier = Verifier::new(&func);
+        assert_err_with_msg!(verifier.run(), "instruction format");
+    }
+}
--- a/src/tools/Cargo.lock
+++ b/src/tools/Cargo.lock
@ -21,6 +21,9 @@ dependencies = [
 [[package]]
 name = "cretonne"
 version = "0.0.0"
+dependencies = [
+ "regex 0.1.71 (registry+https://github.com/rust-lang/crates.io-index)",
+]

 [[package]]
 name = "cretonne-reader"
--- a/src/tools/tests/verifier.rs
+++ b/src/tools/tests/verifier.rs
@ -0,0 +1,66 @@
+extern crate cretonne;
+extern crate cton_reader;
+extern crate glob;
+extern crate regex;
+
+use std::env;
+use glob::glob;
+use regex::Regex;
+use std::fs::File;
+use std::io::Read;
+use self::cton_reader::parser::Parser;
+use self::cretonne::verifier::Verifier;
+
+/// Compile a function and run verifier tests based on specially formatted
+/// comments in the [function's] source.
+fn verifier_tests_from_source(function_source: &str) {
+    let func_re = Regex::new("^[ \t]*function.*").unwrap();
+    let err_re = Regex::new(";[ \t]*Err\\((.*)+\\)").unwrap();
+
+    // Each entry corresponds to an optional regular expression, where
+    // the index corresponds to the function offset in our source code.
+    // If no error is expected for a given function its entry will be
+    // set to None.
+    let mut verifier_results = Vec::new();
+    for line in function_source.lines() {
+        if func_re.is_match(line) {
+            match err_re.captures(line) {
+                Some(caps) => {
+                    verifier_results.push(Some(Regex::new(caps.at(1).unwrap()).unwrap()));
+                },
+                None => {
+                    verifier_results.push(None);
+                },
+            };
+        }
+    }
+
+    // Run the verifier against each function and compare the output
+    // with the expected result (as determined above).
+    for (i, func) in Parser::parse(function_source).unwrap().into_iter().enumerate() {
+        let result = Verifier::new(&func).run();
+        match verifier_results[i] {
+            Some(ref re) => {
+                assert_eq!(re.is_match(&result.err().unwrap()), true);
+            },
+            None => {
+                assert_eq!(result, Ok(()));
+            }
+        }
+    }
+}
+
+#[test]
+fn test_all() {
+    let testdir = format!("{}/tests/verifier_testdata/*.cton",
+                          env::current_dir().unwrap().display());
+
+    for entry in glob(&testdir).unwrap() {
+        let path = entry.unwrap();
+        println!("Testing {:?}", path);
+        let mut file = File::open(&path).unwrap();
+        let mut buffer = String::new();
+        file.read_to_string(&mut buffer).unwrap();
+        verifier_tests_from_source(&buffer);
+    }
+}
--- a/src/tools/tests/verifier_testdata/bad_layout.cton
+++ b/src/tools/tests/verifier_testdata/bad_layout.cton
@ -0,0 +1,17 @@
+function test(i32) {    ; Err(terminating)
+    ebb0(v0: i32):
+        jump ebb1
+        return
+    ebb1:
+        jump ebb2
+        brz v0, ebb3
+    ebb2:
+        jump ebb3
+    ebb3:
+        return
+}
+
+function test(i32) {    ; Ok
+    ebb0(v0: i32):
+        return
+}