Parse controlling type variable. Do basic type inference.

Replace the make_multi_inst() function with a make_inst_results() which uses the constraint system to create the result values. A typevar argument ensures that this function does not infer anything from the instruction data arguments. These arguments may not be valid during parsing. Implement basic type inference in the parser. If the designated value operand on a polymorphic instruction refers to a known value, use that to infer the controlling type variable. This simple method of type inference requires the operand value to be defined above the use in the text. Since reordering the EBBs could place a dominating EBB below the current one, this is a bit fragile. One possibility would be to require the value is defined in the same EBB. In all other cases, the controlling typevar should be explicit.
9 years ago · ecd8287eb0
6 changed files with 211 additions and 64 deletions
--- a/meta/gen_instr.py
+++ b/meta/gen_instr.py
@ -70,6 +70,14 @@ def gen_instruction_data_impl(fmt):
                            'InstructionData::{} {{ ty, .. }} => ty,'
                            .format(f.name))

+        fmt.doc_comment('Mutable reference to the type of the first result.')
+        with fmt.indented('pub fn first_type_mut(&mut self) -> &mut Type {', '}'):
+            with fmt.indented('match *self {', '}'):
+                for f in cretonne.InstructionFormat.all_formats:
+                    fmt.line(
+                            'InstructionData::{} {{ ref mut ty, .. }} => ty,'
+                            .format(f.name))
+
        # Generate shared and mutable accessors for `second_result` which only
        # applies to instruction formats that can produce multiple results.
        # Everything else returns `None`.
@ -120,6 +128,38 @@ def gen_instruction_data_impl(fmt):
                                ' { ref mut second_result, .. }' +
                                ' => Some(second_result),')

+        fmt.doc_comment('Get the controlling type variable operand.')
+        with fmt.indented(
+                'pub fn typevar_operand(&self) -> Option<Value> {', '}'):
+            with fmt.indented('match *self {', '}'):
+                for f in cretonne.InstructionFormat.all_formats:
+                    n = 'InstructionData::' + f.name
+                    if f.typevar_operand is None:
+                        fmt.line(n + ' { .. } => None,')
+                    elif len(f.value_operands) == 1:
+                        # We have a single value operand called 'arg'.
+                        if f.boxed_storage:
+                            fmt.line(
+                                    n + ' { ref data, .. } => Some(data.arg),')
+                        else:
+                            fmt.line(n + ' { arg, .. } => Some(arg),')
+                    else:
+                        # We have multiple value operands and an array `args`.
+                        # Which `args` index to use?
+                        # Map from index into f.kinds into f.value_operands
+                        # index.
+                        i = f.value_operands.index(f.typevar_operand)
+                        if f.boxed_storage:
+                            fmt.line(
+                                    n +
+                                    ' {{ ref data, .. }} => Some(data.args[{}]),'
+                                    .format(i))
+                        else:
+                            fmt.line(
+                                    n +
+                                    ' {{ ref args, .. }} => Some(args[{}]),'
+                                    .format(i))
+

 def collect_instr_groups(targets):
    seen = set()
--- a/src/libcretonne/instructions.rs
+++ b/src/libcretonne/instructions.rs
@ -142,15 +142,15 @@ pub enum InstructionData {
    BinaryImm {
        opcode: Opcode,
        ty: Type,
-        lhs: Value,
-        rhs: Imm64,
+        arg: Value,
+        imm: Imm64,
    },
    // Same as BinaryImm, but the immediate is the lhs operand.
    BinaryImmRev {
        opcode: Opcode,
        ty: Type,
-        rhs: Value,
-        lhs: Imm64,
+        arg: Value,
+        imm: Imm64,
    },
    BinaryOverflow {
        opcode: Opcode,
@ -366,6 +366,11 @@ impl OpcodeConstraints {
    pub fn ctrl_typeset(self) -> Option<ValueTypeSet> {
        self.typeset_offset().map(|offset| TYPE_SETS[offset])
    }
+
+    /// Is this instruction polymorphic?
+    pub fn is_polymorphic(self) -> bool {
+        self.ctrl_typeset().is_some()
+    }
 }

 /// A value type set describes the permitted set of types for a type variable.
--- a/src/libcretonne/repr.rs
+++ b/src/libcretonne/repr.rs
@ -88,8 +88,8 @@ impl Function {

    /// Create a new instruction.
    ///
-    /// The instruction is allowed to produce at most one result as indicated by `data.ty`. Use
-    /// `make_multi_inst()` to create instructions with multiple results.
+    /// The type of the first result is indicated by `data.ty`. If the instruction produces
+    /// multiple results, also call `make_inst_results` to allocate value table entries.
    pub fn make_inst(&mut self, data: InstructionData) -> Inst {
        let inst = Inst::new(self.instructions.len());
        self.instructions.push(data);
@ -101,40 +101,59 @@ impl Function {
        inst
    }

-    /// Make an instruction that may produce multiple results.
+    fn inst_mut(&mut self, inst: Inst) -> &mut InstructionData {
+        &mut self.instructions[inst.index()]
+    }
+
+    /// Create result values for an instruction that produces multiple results.
+    ///
+    /// Instructions that produce 0 or 1 result values only need to be created with `make_inst`. If
+    /// the instruction may produce more than 1 result, call `make_inst_results` to allocate
+    /// `Value` table entries for the additional results.
+    ///
+    /// The result value types are determined from the instruction's value type constraints and the
+    /// provided `ctrl_typevar` type for polymorphic instructions. For non-polymorphic
+    /// instructions, `ctrl_typevar` is ignored, and `VOID` can be used.
    ///
-    /// The type of the first result is `data.ty`. If the instruction generates more than one
-    /// result, additional result types are in `extra_result_types`.
+    /// The type of the first result value is also set, even if it was already set in the
+    /// `InstructionData` passed to `make_inst`. If this function is called with a single-result
+    /// instruction, that is the only effect.
    ///
-    /// Not all instruction formats can represent multiple result values. This function will panic
-    /// if the format of `data` is insufficient.
-    pub fn make_multi_inst(&mut self, data: InstructionData, extra_result_types: &[Type]) -> Inst {
-        let inst = self.make_inst(data);
-
-        if !extra_result_types.is_empty() {
-            // Additional values form a linked list starting from the second result value. Generate
-            // the list backwards so we don't have to modify value table entries in place. (This
-            // causes additional result values to be numbered backwards which is not the aestetic
-            // choice, but since it is only visible in extremely rare instructions with 3+ results,
-            // we don't care).
-            let mut head = NO_VALUE;
-            for ty in extra_result_types.into_iter().rev() {
+    /// Returns the number of results produced by the instruction.
+    pub fn make_inst_results(&mut self, inst: Inst, ctrl_typevar: Type) -> usize {
+        let constraints = self[inst].opcode().constraints();
+        let fixed_results = constraints.fixed_results();
+
+        // Additional values form a linked list starting from the second result value. Generate
+        // the list backwards so we don't have to modify value table entries in place. (This
+        // causes additional result values to be numbered backwards which is not the aestetic
+        // choice, but since it is only visible in extremely rare instructions with 3+ results,
+        // we don't care).
+        let mut head = NO_VALUE;
+        let mut first_type = Type::default();
+
+        // TBD: Function call return values for direct and indirect function calls.
+
+        if fixed_results > 0 {
+            for res_idx in (1..fixed_results).rev() {
                head = self.make_value(ValueData::Def {
-                    ty: *ty,
+                    ty: constraints.result_type(res_idx, ctrl_typevar),
                    def: inst,
                    next: head,
                });
            }
+            first_type = constraints.result_type(0, ctrl_typevar);
+        }

-            // Update the second_result pointer in `inst`.
-            if let Some(second_result_ref) = self.instructions[inst.index()].second_result_mut() {
-                *second_result_ref = head;
-            } else {
-                panic!("Instruction format doesn't allow multiple results.");
-            }
+        // Update the second_result pointer in `inst`.
+        if head != NO_VALUE {
+            *self.inst_mut(inst)
+                 .second_result_mut()
+                 .expect("instruction format doesn't allow multiple results") = head;
        }
+        *self.inst_mut(inst).first_type_mut() = first_type;

-        inst
+        fixed_results
    }

    /// Get the first result of an instruction.
@ -521,21 +540,6 @@ mod tests {
        assert_eq!(ins.first_type(), types::I32);
    }

-    #[test]
-    fn multiple_results() {
-        use types::*;
-        let mut func = Function::new();
-
-        let idata = InstructionData::call(Opcode::Vconst, I64);
-        let inst = func.make_multi_inst(idata, &[I8, F64]);
-        assert_eq!(inst.to_string(), "inst0");
-        let results: Vec<Value> = func.inst_results(inst).collect();
-        assert_eq!(results.len(), 3);
-        assert_eq!(func.value_type(results[0]), I64);
-        assert_eq!(func.value_type(results[1]), I8);
-        assert_eq!(func.value_type(results[2]), F64);
-    }
-
    #[test]
    fn stack_slot() {
        let mut func = Function::new();
--- a/src/libcretonne/types.rs
+++ b/src/libcretonne/types.rs
@ -1,6 +1,7 @@

 //! Common types for the Cretonne code generator.

+use std::default::Default;
 use std::fmt::{self, Display, Formatter, Write};

 // ====--------------------------------------------------------------------------------------====//
@ -202,6 +203,12 @@ impl Display for Type {
    }
 }

+impl Default for Type {
+    fn default() -> Type {
+        VOID
+    }
+}
+
 // ====--------------------------------------------------------------------------------------====//
 //
 // Function signatures
--- a/src/libcretonne/write.rs
+++ b/src/libcretonne/write.rs
@ -153,8 +153,8 @@ pub fn write_instruction(w: &mut Write, func: &Function, inst: Inst) -> Result {
        UnaryIeee64 { opcode, imm, .. } => writeln!(w, "{} {}", opcode, imm),
        UnaryImmVector { opcode, .. } => writeln!(w, "{} [...]", opcode),
        Binary { opcode, args, .. } => writeln!(w, "{} {}, {}", opcode, args[0], args[1]),
-        BinaryImm { opcode, lhs, rhs, .. } => writeln!(w, "{} {}, {}", opcode, lhs, rhs),
-        BinaryImmRev { opcode, lhs, rhs, .. } => writeln!(w, "{} {}, {}", opcode, lhs, rhs),
+        BinaryImm { opcode, arg, imm, .. } => writeln!(w, "{} {}, {}", opcode, arg, imm),
+        BinaryImmRev { opcode, imm, arg, .. } => writeln!(w, "{} {}, {}", opcode, imm, arg),
        BinaryOverflow { opcode, args, .. } => writeln!(w, "{} {}, {}", opcode, args[0], args[1]),
        Select { opcode, args, .. } => {
            writeln!(w, "{} {}, {}, {}", opcode, args[0], args[1], args[2])
--- a/src/libreader/parser.rs
+++ b/src/libreader/parser.rs
@ -144,18 +144,22 @@ impl<'a> Parser<'a> {
    }

    // Generate an error.
-    fn error(&self, message: &str) -> Error {
+    fn error_string(&self, message: String) -> Error {
        Error {
            location: self.location,
            message:
                // If we have a lexer error latched, report that.
                match self.lex_error {
                    Some(lexer::Error::InvalidChar) => "invalid character".to_string(),
-                    None => message.to_string(),
+                    None => message,
                }
        }
    }

+    fn error(&self, message: &str) -> Error {
+        self.error_string(message.to_string())
+    }
+
    // Match and consume a token without payload.
    fn match_token(&mut self, want: Token<'a>, err_msg: &str) -> Result<Token<'a>> {
        if self.token() == Some(want) {
@ -511,14 +515,15 @@ impl<'a> Parser<'a> {

    // Parse an instruction, append it to `ebb`.
    //
-    // instruction ::= [inst-results "="] Opcode(opc) ...
+    // instruction ::= [inst-results "="] Opcode(opc) ["." Type] ...
    // inst-results ::= Value(v) { "," Value(vx) }
    //
    fn parse_instruction(&mut self, ctx: &mut Context, ebb: Ebb) -> Result<()> {
        // Result value numbers.
        let mut results = Vec::new();

-        // instruction ::=  * [inst-results "="] Opcode(opc) ...
+        // instruction  ::=  * [inst-results "="] Opcode(opc) ["." Type] ...
+        // inst-results ::= * Value(v) { "," Value(vx) }
        if let Some(Token::Value(v)) = self.token() {
            self.consume();
            results.push(v);
@ -532,7 +537,7 @@ impl<'a> Parser<'a> {
            try!(self.match_token(Token::Equal, "expected '=' before opcode"));
        }

-        // instruction ::=  [inst-results "="] * Opcode(opc) ...
+        // instruction ::=  [inst-results "="] * Opcode(opc) ["." Type] ...
        let opcode = if let Some(Token::Identifier(text)) = self.token() {
            match text.parse() {
                Ok(opc) => opc,
@ -541,24 +546,110 @@ impl<'a> Parser<'a> {
        } else {
            return Err(self.error("expected instruction opcode"));
        };
+        self.consume();
+
+        // Look for a controlling type variable annotation.
+        // instruction ::=  [inst-results "="] Opcode(opc) * ["." Type] ...
+        let explicit_ctrl_type = if self.optional(Token::Dot) {
+            Some(try!(self.match_type("expected type after 'opcode.'")))
+        } else {
+            None
+        };

-        // instruction ::=  [inst-results "="] Opcode(opc) * ...
+        // instruction ::=  [inst-results "="] Opcode(opc) ["." Type] * ...
        let inst_data = try!(self.parse_inst_operands(opcode));
+
+        // We're done parsing the instruction now.
+        //
+        // We still need to check that the number of result values in the source matches the opcode
+        // or function call signature. We also need to create values with the right type for all
+        // the instruction results.
+        let ctrl_typevar = try!(self.infer_typevar(ctx, opcode, explicit_ctrl_type, &inst_data));
        let inst = ctx.function.make_inst(inst_data);
+        let num_results = ctx.function.make_inst_results(inst, ctrl_typevar);
+        ctx.function.append_inst(ebb, inst);

-        // TODO: Check that results.len() matches the opcode.
-        // TODO: Multiple results.
-        if !results.is_empty() {
-            assert!(results.len() == 1, "Multiple results not implemented");
-            let result = ctx.function.first_result(inst);
-            try!(ctx.add_value(results[0], result, &self.location));
+        if results.len() != num_results {
+            let m = format!("instruction produces {} result values, {} given",
+                            num_results,
+                            results.len());
+            return Err(self.error_string(m));
        }

-        ctx.function.append_inst(ebb, inst);
+        // Now map the source result values to the just created instruction results.
+        // We need to copy the list of result values to avoid fighting the borrow checker.
+        let new_results: Vec<Value> = ctx.function.inst_results(inst).collect();
+        for (src, val) in results.iter().zip(new_results) {
+            try!(ctx.add_value(*src, val, &self.location));
+        }

        Ok(())
    }

+    // Type inference for polymorphic instructions.
+    //
+    // The controlling type variable can be specified explicitly as 'splat.i32x4 v5', or it can be
+    // inferred from `inst_data.typevar_operand` for some opcodes.
+    //
+    // The value operands in `inst_data` are expected to use source numbering.
+    //
+    // Returns the controlling typevar for a polymorphic opcode, or `VOID` for a non-polymorphic
+    // opcode.
+    fn infer_typevar(&self,
+                     ctx: &Context,
+                     opcode: Opcode,
+                     explicit_ctrl_type: Option<Type>,
+                     inst_data: &InstructionData)
+                     -> Result<Type> {
+        let constraints = opcode.constraints();
+        let ctrl_type = match explicit_ctrl_type {
+            Some(t) => t,
+            None => {
+                if constraints.use_typevar_operand() {
+                    // This is an opcode that supports type inference, AND there was no explicit
+                    // type specified. Look up `ctrl_value` to see if it was defined already.
+                    // TBD: If it is defined in another block, the type should have been specified
+                    // explicitly. It is unfortunate that the correctness of IL depends on the
+                    // layout of the blocks.
+                    let ctrl_src_value = inst_data.typevar_operand()
+                                                  .expect("Constraints <-> Format inconsistency");
+                    ctx.function.value_type(match ctx.values.get(&ctrl_src_value) {
+                        Some(&v) => v,
+                        None => {
+                            let m = format!("cannot determine type of operand {}", ctrl_src_value);
+                            return Err(self.error_string(m));
+                        }
+                    })
+                } else if constraints.is_polymorphic() {
+                    // This opcode does not support type inference, so the explicit type variable
+                    // is required.
+                    return Err(self.error("type variable required for polymorphic opcode"));
+                } else {
+                    // This is a non-polymorphic opcode. No typevar needed.
+                    VOID
+                }
+            }
+        };
+
+        // Verify that `ctrl_type` is valid for the controlling type variable. We don't want to
+        // attempt deriving types from an incorrect basis.
+        // This is not a complete type check. The verifier does that.
+        if let Some(typeset) = constraints.ctrl_typeset() {
+            // This is a polymorphic opcode.
+            if !typeset.contains(ctrl_type) {
+                let m = format!("{} is not a valid typevar for {}", ctrl_type, opcode);
+                return Err(self.error_string(m));
+            }
+        } else {
+            // Treat it as a syntax error to speficy a typevar on a non-polymorphic opcode.
+            if ctrl_type != VOID {
+                return Err(self.error_string(format!("{} does not take a typevar", opcode)));
+            }
+        }
+
+        Ok(ctrl_type)
+    }
+
    // Parse the operands following the instruction opcode.
    // This depends on the format of the opcode.
    fn parse_inst_operands(&mut self, opcode: Opcode) -> Result<InstructionData> {
@ -617,8 +708,8 @@ impl<'a> Parser<'a> {
                InstructionData::BinaryImm {
                    opcode: opcode,
                    ty: VOID,
-                    lhs: lhs,
-                    rhs: rhs,
+                    arg: lhs,
+                    imm: rhs,
                }
            }
            InstructionFormat::BinaryImmRev => {
@ -628,8 +719,8 @@ impl<'a> Parser<'a> {
                InstructionData::BinaryImmRev {
                    opcode: opcode,
                    ty: VOID,
-                    lhs: lhs,
-                    rhs: rhs,
+                    imm: lhs,
+                    arg: rhs,
                }
            }
            InstructionFormat::BinaryOverflow => {