Browse Source

Implement Microwasm

pull/397/head
Jef 6 years ago
parent
commit
f1d9ccb9e8
  1. 2
      Cargo.toml
  2. 1731
      src/backend.rs
  3. 734
      src/function_body.rs
  4. 3
      src/lib.rs
  5. 1696
      src/microwasm.rs
  6. 98
      src/module.rs
  7. 35
      src/tests.rs
  8. 50
      src/translate_sections.rs
  9. 2
      wasmparser.rs

2
Cargo.toml

@ -19,6 +19,8 @@ capstone = "0.5.0"
failure = "0.1.3" failure = "0.1.3"
failure_derive = "0.1.3" failure_derive = "0.1.3"
cranelift-codegen = "0.28" cranelift-codegen = "0.28"
multi_mut = "0.1"
either = "1.5"
wabt = "0.7" wabt = "0.7"
lazy_static = "1.2" lazy_static = "1.2"
quickcheck = "0.7" quickcheck = "0.7"

1731
src/backend.rs

File diff suppressed because it is too large

734
src/function_body.rs

@ -1,499 +1,367 @@
use backend::*; use crate::backend::*;
use error::Error; use crate::error::Error;
use module::{quickhash, ModuleContext, Signature}; use crate::microwasm::*;
use wasmparser::{FunctionBody, Operator, Type}; use crate::module::{quickhash, ModuleContext, SigType, Signature};
use either::{Either, Left, Right};
// TODO: Use own declared `Type` enum. use multi_mut::HashMapMultiMut;
use std::collections::HashMap;
/// Type of a control frame. use std::hash::Hash;
#[derive(Debug, Copy, Clone, PartialEq)]
enum ControlFrameKind { #[derive(Debug)]
/// A regular block frame. struct Block {
/// label: BrTarget<Label>,
/// Can be used for an implicit function block. calling_convention: Option<Either<CallingConvention, VirtualCallingConvention>>,
Block { end_label: Label }, params: u32,
/// Loop frame (branching to the beginning of block). // TODO: Is there a cleaner way to do this? `has_backwards_callers` should always be set if `is_next`
Loop { header: Label }, // is false, so we should probably use an `enum` here.
/// True-subblock of if expression. is_next: bool,
IfTrue { num_callers: Option<u32>,
/// If jump happens inside the if-true block then control will actual_num_callers: u32,
/// land on this label. has_backwards_callers: bool,
end_label: Label,
/// If the condition of the `if` statement is unsatisfied, control
/// will land on this label. This label might point to `else` block if it
/// exists. Otherwise it equal to `end_label`.
if_not: Label,
},
/// False-subblock of if expression.
IfFalse { end_label: Label },
} }
impl ControlFrameKind { impl Block {
/// Returns a label which should be used as a branch destination. fn should_serialize_args(&self) -> bool {
fn block_end(&self) -> Option<Label> { self.calling_convention.is_none()
match *self { && (self.num_callers != Some(1) || self.has_backwards_callers)
ControlFrameKind::Block { end_label } => Some(end_label),
ControlFrameKind::IfTrue { end_label, .. } => Some(end_label),
ControlFrameKind::IfFalse { end_label } => Some(end_label),
ControlFrameKind::Loop { .. } => None,
}
}
fn end_labels(&self) -> impl Iterator<Item = Label> {
self.block_end()
.into_iter()
.chain(if let ControlFrameKind::IfTrue { if_not, .. } = self {
// this is `if .. end` construction. Define the `if_not` label.
Some(*if_not)
} else {
None
})
}
fn is_loop(&self) -> bool {
match *self {
ControlFrameKind::Loop { .. } => true,
_ => false,
}
}
fn branch_target(&self) -> Label {
match *self {
ControlFrameKind::Block { end_label } => end_label,
ControlFrameKind::IfTrue { end_label, .. } => end_label,
ControlFrameKind::IfFalse { end_label } => end_label,
ControlFrameKind::Loop { header } => header,
}
}
}
struct ControlFrame {
kind: ControlFrameKind,
/// Boolean which signals whether value stack became polymorphic. Value stack starts in non-polymorphic state and
/// becomes polymorphic only after an instruction that never passes control further is executed,
/// i.e. `unreachable`, `br` (but not `br_if`!), etc.
unreachable: bool,
/// State specific to the block (free temp registers, stack etc) which should be replaced
/// at the end of the block
block_state: BlockState,
arity: u32,
}
fn arity(ty: Type) -> u32 {
if ty == Type::EmptyBlockType {
0
} else {
1
}
}
impl ControlFrame {
pub fn new(kind: ControlFrameKind, block_state: BlockState, arity: u32) -> ControlFrame {
ControlFrame {
kind,
block_state,
arity,
unreachable: false,
}
}
pub fn arity(&self) -> u32 {
self.arity
}
/// Marks this control frame as reached stack-polymorphic state.
pub fn mark_unreachable(&mut self) {
self.unreachable = true;
} }
} }
pub fn translate<M: ModuleContext>( pub fn translate<M: ModuleContext, I, L>(
session: &mut CodeGenSession<M>, session: &mut CodeGenSession<M>,
func_idx: u32, func_idx: u32,
body: &FunctionBody, body: I,
) -> Result<(), Error> { ) -> Result<(), Error>
fn break_from_control_frame_with_id<_M: ModuleContext>( where
ctx: &mut Context<_M>, I: IntoIterator<Item = Operator<L>>,
control_frames: &mut Vec<ControlFrame>, L: Hash + Clone + Eq,
idx: usize, Operator<L>: std::fmt::Display,
) { {
let control_frame = control_frames.get_mut(idx).expect("wrong depth"); let func_type = session.module_context.defined_func_type(func_idx);
let mut body = body.into_iter().peekable();
if control_frame.kind.is_loop() {
ctx.restore_locals_to(&control_frame.block_state.locals);
} else {
// We can't do any execution after the function end so we just skip this logic
// if we're breaking out of the whole function.
if idx != 0 {
// Workaround for borrowck limitations
let should_set = if let Some(locals) = control_frame.block_state.end_locals.as_ref()
{
ctx.restore_locals_to(locals);
false
} else {
true
};
if should_set {
control_frame.block_state.end_locals = Some(ctx.block_state.locals.clone());
}
}
ctx.return_from_block(control_frame.arity());
}
ctx.br(control_frame.kind.branch_target());
}
let locals = body.get_locals_reader()?;
let func_type = session.module_context.func_type(func_idx);
let arg_count = func_type.params().len() as u32;
let return_arity = func_type.returns().len() as u32;
let mut num_locals = 0;
for local in locals {
let (count, _ty) = local?;
num_locals += count;
}
let ctx = &mut session.new_context(func_idx); let ctx = &mut session.new_context(func_idx);
let operators = body.get_operators_reader()?;
// TODO: Do we need this `function_block_state`? If we transformed to use an arbitrary
// CFG all this code would become way simpler.
let func = ctx.start_function(arg_count, num_locals);
let mut control_frames = Vec::new();
// Upon entering the function implicit frame for function body is pushed. It has the same
// result type as the function itself. Branching to it is equivalent to returning from the function.
let epilogue_label = ctx.create_label();
// TODO: I want to ideally not have the concept of "returning" at all and model everything as a CFG,
// with "returning" being modelled as "calling the end of the function". That means that passing
// arguments in argument registers and returning values in return registers are modelled
// identically.
control_frames.push(ControlFrame::new(
ControlFrameKind::Block {
end_label: epilogue_label,
},
Default::default(),
return_arity,
));
let mut operators = itertools::put_back(operators.into_iter()); let params = func_type
.params()
.iter()
.map(|t| t.to_microwasm_type())
.collect::<Vec<_>>();
ctx.start_function(params.iter().cloned());
let mut blocks = HashMap::<BrTarget<L>, Block>::new();
let num_returns = func_type.returns().len();
blocks.insert(
BrTarget::Return,
Block {
label: BrTarget::Return,
params: num_returns as u32,
// TODO: This only works for integers
//
calling_convention: Some(Left(CallingConvention::function_start(ret_locs(
func_type.returns().iter().map(|t| t.to_microwasm_type()),
)))),
is_next: false,
has_backwards_callers: false,
actual_num_callers: 0,
num_callers: None,
},
);
// TODO: We want to make this a state machine (maybe requires 1-element lookahead? Not sure) so that we
// can coelesce multiple `end`s and optimise break-at-end-of-block into noop.
// TODO: Does coelescing multiple `end`s matter since at worst this really only elides a single move at
// the end of a function, and this is probably a no-op anyway due to register renaming.
loop { loop {
if control_frames let op = if let Some(op) = body.next() {
.last() op
.map(|c| c.unreachable)
.unwrap_or(false)
{
use self::Operator::{Block, Else, End, If, Loop};
let mut depth = 0;
loop {
let op = if let Some(op) = operators.next() {
op?
} else {
break;
};
match op {
If { .. } | Block { .. } | Loop { .. } => depth += 1,
End => {
if depth == 0 {
operators.put_back(Ok(op));
break;
} else {
depth -= 1;
}
}
Else => {
if depth == 0 {
operators.put_back(Ok(op));
break;
}
}
_ => {}
}
}
}
let op = if let Some(op) = operators.next() {
op?
} else { } else {
break; break;
}; };
if let Some(Operator::Label(label)) = body.peek() {
let block = blocks
.get_mut(&BrTarget::Label(label.clone()))
.expect("Block definition should be before label definition");
block.is_next = true;
}
match op { match op {
Operator::Unreachable => { Operator::Unreachable => {
control_frames
.last_mut()
.expect("control stack is never empty")
.mark_unreachable();
ctx.trap(); ctx.trap();
} }
Operator::Block { ty } => { Operator::Label(label) => {
let label = ctx.create_label(); use std::collections::hash_map::Entry;
let state = ctx.start_block();
control_frames.push(ControlFrame::new( if let Entry::Occupied(mut entry) = blocks.entry(BrTarget::Label(label)) {
ControlFrameKind::Block { end_label: label }, let has_backwards_callers = {
state, let block = entry.get_mut();
arity(ty),
)); // TODO: Is it possible with arbitrary CFGs that a block will have _only_ backwards callers?
} // Certainly for Wasm that is currently impossible.
Operator::Return => { if block.actual_num_callers == 0 {
control_frames loop {
.last_mut() let done = match body.peek() {
.expect("Control stack is empty!") Some(Operator::Label(_)) | None => true,
.mark_unreachable(); Some(_) => false,
};
break_from_control_frame_with_id(ctx, &mut control_frames, 0);
} if done {
Operator::Br { relative_depth } => { break;
control_frames }
.last_mut()
.expect("Control stack is empty!") body.next();
.mark_unreachable(); }
let idx = control_frames.len() - 1 - relative_depth as usize; continue;
}
break_from_control_frame_with_id(ctx, &mut control_frames, idx);
}
Operator::BrIf { relative_depth } => {
let idx = control_frames.len() - 1 - relative_depth as usize;
let if_not = ctx.create_label(); block.is_next = false;
// TODO: We can `take` this if it's a `Right`
match block.calling_convention.as_ref() {
Some(Left(cc)) => {
ctx.apply_cc(cc);
}
Some(Right(virt)) => {
ctx.set_state(virt.clone());
}
_ => {}
}
ctx.jump_if_false(if_not); ctx.define_label(block.label.label().unwrap().clone());
break_from_control_frame_with_id(ctx, &mut control_frames, idx); block.has_backwards_callers
};
ctx.define_label(if_not); // To reduce memory overhead
} if !has_backwards_callers {
Operator::If { ty } => { entry.remove_entry();
let end_label = ctx.create_label(); }
let if_not = ctx.create_label(); } else {
panic!("Label defined before being declared");
ctx.jump_if_false(if_not); }
let state = ctx.start_block();
control_frames.push(ControlFrame::new(
ControlFrameKind::IfTrue { end_label, if_not },
state,
arity(ty),
));
} }
Operator::Loop { ty } => { Operator::Block {
let header = ctx.create_label(); label,
has_backwards_callers,
ctx.define_label(header); params,
let state = ctx.start_block(); num_callers,
} => {
control_frames.push(ControlFrame::new( let asm_label = ctx.create_label();
ControlFrameKind::Loop { header }, blocks.insert(
state, BrTarget::Label(label),
arity(ty), Block {
)); label: BrTarget::Label(asm_label),
params: params.len() as _,
calling_convention: None,
is_next: false,
has_backwards_callers,
actual_num_callers: 0,
num_callers,
},
);
} }
Operator::Else => { Operator::Br { target } => {
match control_frames.pop() { // TODO: We should add the block to the hashmap if we don't have it already
Some(ControlFrame { let block = blocks.get_mut(&target).unwrap();
kind: ControlFrameKind::IfTrue { if_not, end_label }, block.actual_num_callers += 1;
arity,
block_state, let should_serialize_args = block.should_serialize_args();
unreachable,
}) => { match block {
if !unreachable { Block {
ctx.return_from_block(arity); is_next,
label: BrTarget::Label(l),
calling_convention,
..
} => {
let cc = if should_serialize_args {
*calling_convention = Some(Left(ctx.serialize_args(block.params)));
None
} else {
calling_convention
.as_ref()
.map(Either::as_ref)
.and_then(Either::left)
};
if let Some(cc) = cc {
ctx.pass_block_args(cc);
} }
ctx.reset_block(block_state.clone()); if !*is_next {
ctx.br(*l);
// Finalize `then` block by jumping to the `end_label`. }
ctx.br(end_label);
// Define `if_not` label here, so if the corresponding `if` block receives
// 0 it will branch here.
// After that reset stack depth to the value before entering `if` block.
ctx.define_label(if_not);
// Carry over the `end_label`, so it will be resolved when the corresponding `end`
// is encountered.
//
// Also note that we reset `stack_depth` to the value before entering `if` block.
let mut frame = ControlFrame::new(
ControlFrameKind::IfFalse { end_label },
block_state,
arity,
);
control_frames.push(frame);
} }
Some(_) => panic!("else expects if block"), Block {
None => panic!("control stack is never empty"), label: BrTarget::Return,
}; calling_convention: Some(Left(cc)),
..
} => {
ctx.pass_block_args(cc);
ctx.ret();
}
_ => unimplemented!(),
}
} }
Operator::End => { Operator::BrIf { then, else_ } => {
// TODO: Merge `End`s so that we can // TODO: We should add the block to the hashmap if we don't have it already
// A) Move values directly into RAX when returning from deeply-nested blocks. let (then_block, else_block) = blocks.pair_mut(&then, &else_);
// B) Avoid restoring locals when not necessary. then_block.actual_num_callers += 1;
// else_block.actual_num_callers += 1;
// This doesn't require lookahead but it does require turning this loop into
// a kind of state machine. let then_block_parts = (then_block.is_next, then_block.label);
let mut control_frame = control_frames.pop().expect("control stack is never empty"); let else_block_parts = (else_block.is_next, else_block.label);
let mut labels = control_frame.kind.end_labels().collect::<Vec<_>>();
let mut unreachable = control_frame.unreachable; // TODO: Use "compatible" cc
assert_eq!(then_block.params, else_block.params);
let mut end = control_frame.block_state.end_locals.take();
// TODO: The blocks should have compatible (one must be subset of other?) calling
// Fold `End`s together to prevent unnecessary shuffling of locals // conventions or else at least one must have no calling convention. This
loop { // should always be true for converting from WebAssembly AIUI.
let op = if let Some(op) = operators.next() { let f = |ctx: &mut Context<_>| {
op? let then_block_should_serialize_args = then_block.should_serialize_args();
} else { let else_block_should_serialize_args = else_block.should_serialize_args();
break;
}; match (
&mut then_block.calling_convention,
match op { &mut else_block.calling_convention,
Operator::End => { ) {
control_frame = (Some(Left(ref cc)), ref mut other @ None)
control_frames.pop().expect("control stack is never empty"); | (ref mut other @ None, Some(Left(ref cc))) => {
**other = Some(Left(cc.clone()));
labels.extend(control_frame.kind.end_labels());
unreachable = unreachable || control_frame.unreachable; ctx.pass_block_args(cc);
end = control_frame.block_state.end_locals.take().or(end);
} }
other => { (ref mut then_cc @ None, ref mut else_cc @ None) => {
operators.put_back(Ok(other)); let cc = if then_block_should_serialize_args {
break; Some(Left(ctx.serialize_args(then_block.params)))
} else if else_block_should_serialize_args {
Some(Left(ctx.serialize_args(else_block.params)))
} else {
Some(Right(ctx.virtual_calling_convention()))
};
**then_cc = cc.clone();
**else_cc = cc;
} }
_ => unimplemented!(),
} }
} };
let arity = control_frame.arity();
// Don't bother generating this code if we're in unreachable code
if !unreachable {
ctx.return_from_block(arity);
// If there are no remaining frames we've hit the end of the function - we don't need to match (then_block_parts, else_block_parts) {
// restore locals since no execution will happen after this point. ((true, _), (false, BrTarget::Label(else_))) => {
if !control_frames.is_empty() { ctx.br_if_false(else_, f);
if let Some(end) = end {
ctx.restore_locals_to(&end);
}
} }
} ((false, BrTarget::Label(then)), (true, _)) => {
ctx.br_if_true(then, f);
// TODO: What is the correct order of this and the `define_label`? It's clear for `block`s }
// but I'm not certain for `if..then..else..end`. ((false, BrTarget::Label(then)), (false, BrTarget::Label(else_))) => {
ctx.end_block(control_frame.block_state, |ctx| { ctx.br_if_true(then, f);
for label in labels { ctx.br(else_);
ctx.define_label(label);
} }
}); other => unimplemented!("{:#?}", other),
}
} }
Operator::I32Eq => ctx.i32_eq(), Operator::Swap { depth } => ctx.swap(depth),
Operator::I32Eqz => ctx.i32_eqz(), Operator::Pick { depth } => ctx.pick(depth),
Operator::I32Ne => ctx.i32_neq(), Operator::Eq(I32) => ctx.i32_eq(),
Operator::I32LtS => ctx.i32_lt_s(), Operator::Eqz(Size::_32) => ctx.i32_eqz(),
Operator::I32LeS => ctx.i32_le_s(), Operator::Ne(I32) => ctx.i32_neq(),
Operator::I32GtS => ctx.i32_gt_s(), Operator::Lt(SI32) => ctx.i32_lt_s(),
Operator::I32GeS => ctx.i32_ge_s(), Operator::Le(SI32) => ctx.i32_le_s(),
Operator::I32LtU => ctx.i32_lt_u(), Operator::Gt(SI32) => ctx.i32_gt_s(),
Operator::I32LeU => ctx.i32_le_u(), Operator::Ge(SI32) => ctx.i32_ge_s(),
Operator::I32GtU => ctx.i32_gt_u(), Operator::Lt(SU32) => ctx.i32_lt_u(),
Operator::I32GeU => ctx.i32_ge_u(), Operator::Le(SU32) => ctx.i32_le_u(),
Operator::I32Add => ctx.i32_add(), Operator::Gt(SU32) => ctx.i32_gt_u(),
Operator::I32Sub => ctx.i32_sub(), Operator::Ge(SU32) => ctx.i32_ge_u(),
Operator::I32And => ctx.i32_and(), Operator::Add(I32) => ctx.i32_add(),
Operator::I32Or => ctx.i32_or(), Operator::Sub(I32) => ctx.i32_sub(),
Operator::I32Xor => ctx.i32_xor(), Operator::And(Size::_32) => ctx.i32_and(),
Operator::I32Mul => ctx.i32_mul(), Operator::Or(Size::_32) => ctx.i32_or(),
Operator::I32Shl => ctx.i32_shl(), Operator::Xor(Size::_32) => ctx.i32_xor(),
Operator::I32ShrS => ctx.i32_shr_s(), Operator::Mul(I32) => ctx.i32_mul(),
Operator::I32ShrU => ctx.i32_shr_u(), Operator::Shl(Size::_32) => ctx.i32_shl(),
Operator::I32Rotl => ctx.i32_rotl(), Operator::Shr(sint::I32) => ctx.i32_shr_s(),
Operator::I32Rotr => ctx.i32_rotr(), Operator::Shr(sint::U32) => ctx.i32_shr_u(),
Operator::I32Clz => ctx.i32_clz(), Operator::Rotl(Size::_32) => ctx.i32_rotl(),
Operator::I32Ctz => ctx.i32_ctz(), Operator::Rotr(Size::_32) => ctx.i32_rotr(),
Operator::I32Popcnt => ctx.i32_popcnt(), Operator::Clz(Size::_32) => ctx.i32_clz(),
Operator::I64Eq => ctx.i64_eq(), Operator::Ctz(Size::_32) => ctx.i32_ctz(),
Operator::I64Eqz => ctx.i64_eqz(), Operator::Popcnt(Size::_32) => ctx.i32_popcnt(),
Operator::I64Ne => ctx.i64_neq(), Operator::Eq(I64) => ctx.i64_eq(),
Operator::I64LtS => ctx.i64_lt_s(), Operator::Eqz(Size::_64) => ctx.i64_eqz(),
Operator::I64LeS => ctx.i64_le_s(), Operator::Ne(I64) => ctx.i64_neq(),
Operator::I64GtS => ctx.i64_gt_s(), Operator::Lt(SI64) => ctx.i64_lt_s(),
Operator::I64GeS => ctx.i64_ge_s(), Operator::Le(SI64) => ctx.i64_le_s(),
Operator::I64LtU => ctx.i64_lt_u(), Operator::Gt(SI64) => ctx.i64_gt_s(),
Operator::I64LeU => ctx.i64_le_u(), Operator::Ge(SI64) => ctx.i64_ge_s(),
Operator::I64GtU => ctx.i64_gt_u(), Operator::Lt(SU64) => ctx.i64_lt_u(),
Operator::I64GeU => ctx.i64_ge_u(), Operator::Le(SU64) => ctx.i64_le_u(),
Operator::I64Add => ctx.i64_add(), Operator::Gt(SU64) => ctx.i64_gt_u(),
Operator::I64Sub => ctx.i64_sub(), Operator::Ge(SU64) => ctx.i64_ge_u(),
Operator::I64And => ctx.i64_and(), Operator::Add(I64) => ctx.i64_add(),
Operator::I64Or => ctx.i64_or(), Operator::Sub(I64) => ctx.i64_sub(),
Operator::I64Xor => ctx.i64_xor(), Operator::And(Size::_64) => ctx.i64_and(),
Operator::I64Mul => ctx.i64_mul(), Operator::Or(Size::_64) => ctx.i64_or(),
Operator::I64Shl => ctx.i64_shl(), Operator::Xor(Size::_64) => ctx.i64_xor(),
Operator::I64ShrS => ctx.i64_shr_s(), Operator::Mul(I64) => ctx.i64_mul(),
Operator::I64ShrU => ctx.i64_shr_u(), Operator::Shl(Size::_64) => ctx.i64_shl(),
Operator::I64Rotl => ctx.i64_rotl(), Operator::Shr(sint::I64) => ctx.i64_shr_s(),
Operator::I64Rotr => ctx.i64_rotr(), Operator::Shr(sint::U64) => ctx.i64_shr_u(),
Operator::I64Clz => ctx.i64_clz(), Operator::Rotl(Size::_64) => ctx.i64_rotl(),
Operator::I64Ctz => ctx.i64_ctz(), Operator::Rotr(Size::_64) => ctx.i64_rotr(),
Operator::I64Popcnt => ctx.i64_popcnt(), Operator::Clz(Size::_64) => ctx.i64_clz(),
Operator::Drop => ctx.drop(), Operator::Ctz(Size::_64) => ctx.i64_ctz(),
Operator::SetLocal { local_index } => ctx.set_local(local_index), Operator::Popcnt(Size::_64) => ctx.i64_popcnt(),
Operator::GetLocal { local_index } => ctx.get_local(local_index), Operator::Drop(range) => ctx.drop(range),
Operator::TeeLocal { local_index } => ctx.tee_local(local_index), Operator::Const(Value::I32(value)) => ctx.i32_literal(value),
Operator::I32Const { value } => ctx.i32_literal(value), Operator::Const(Value::I64(value)) => ctx.i64_literal(value),
Operator::I64Const { value } => ctx.i64_literal(value), Operator::Load { ty: I32, memarg } => ctx.i32_load(memarg.offset)?,
Operator::I32Load { memarg } => ctx.i32_load(memarg.offset)?, Operator::Load { ty: I64, memarg } => ctx.i64_load(memarg.offset)?,
Operator::I64Load { memarg } => ctx.i64_load(memarg.offset)?, Operator::Store { ty: I32, memarg } => ctx.i32_store(memarg.offset)?,
Operator::I32Store { memarg } => ctx.i32_store(memarg.offset)?, Operator::Store { ty: I64, memarg } => ctx.i64_store(memarg.offset)?,
Operator::I64Store { memarg } => ctx.i64_store(memarg.offset)?,
Operator::Select => { Operator::Select => {
ctx.select(); ctx.select();
} }
Operator::Call { function_index } => { Operator::Call { function_index } => {
let function_index = session
.module_context
.defined_func_index(function_index)
.expect("We don't support host calls yet");
let callee_ty = session.module_context.func_type(function_index); let callee_ty = session.module_context.func_type(function_index);
// TODO: this implementation assumes that this function is locally defined. // TODO: this implementation assumes that this function is locally defined.
ctx.call_direct( ctx.call_direct(
function_index, function_index,
callee_ty.params().len() as u32, callee_ty.params().iter().map(|t| t.to_microwasm_type()),
callee_ty.returns().len() as u32, callee_ty.returns().len() as u32,
); );
} }
Operator::CallIndirect { index, table_index } => { Operator::CallIndirect {
type_index,
table_index,
} => {
assert_eq!(table_index, 0); assert_eq!(table_index, 0);
let callee_ty = session.module_context.signature(index); let callee_ty = session.module_context.signature(type_index);
// TODO: this implementation assumes that this function is locally defined. // TODO: this implementation assumes that this function is locally defined.
ctx.call_indirect( ctx.call_indirect(
quickhash(callee_ty) as u32, quickhash(callee_ty) as u32,
callee_ty.params().len() as u32, callee_ty.params().iter().map(|t| t.to_microwasm_type()),
callee_ty.returns().len() as u32, callee_ty.returns().len() as u32,
); );
} }
Operator::Nop => {}
op => { op => {
unimplemented!("{:?}", op); unimplemented!("{}", op);
} }
} }
} }
ctx.epilogue(func);
ctx.epilogue();
Ok(()) Ok(())
} }

3
src/lib.rs

@ -12,6 +12,7 @@ extern crate test;
extern crate arrayvec; extern crate arrayvec;
extern crate capstone; extern crate capstone;
extern crate either;
extern crate failure; extern crate failure;
pub extern crate wasmparser; pub extern crate wasmparser;
#[macro_use] #[macro_use]
@ -29,11 +30,13 @@ extern crate quickcheck;
extern crate wabt; extern crate wabt;
// Just so we can implement `Signature` for `cranelift_codegen::ir::Signature` // Just so we can implement `Signature` for `cranelift_codegen::ir::Signature`
extern crate cranelift_codegen; extern crate cranelift_codegen;
extern crate multi_mut;
mod backend; mod backend;
mod disassemble; mod disassemble;
mod error; mod error;
mod function_body; mod function_body;
mod microwasm;
mod module; mod module;
mod translate_sections; mod translate_sections;

1696
src/microwasm.rs

File diff suppressed because it is too large

98
src/module.rs

@ -1,3 +1,4 @@
use crate::microwasm;
use backend::TranslatedCodeSection; use backend::TranslatedCodeSection;
use cranelift_codegen::{ use cranelift_codegen::{
ir::{self, AbiParam, Signature as CraneliftSignature}, ir::{self, AbiParam, Signature as CraneliftSignature},
@ -184,18 +185,35 @@ pub struct ExecutableModule {
} }
impl ExecutableModule { impl ExecutableModule {
// For testing only. /// Executes the function _without checking types_. This can cause undefined
// TODO: Handle generic signatures. /// memory to be accessed.
pub fn execute_func<Args: FunctionArgs<T> + TypeList, T: TypeList>( pub unsafe fn execute_func_unchecked<Args: FunctionArgs<T>, T>(
&self, &self,
func_idx: u32, func_idx: u32,
args: Args, args: Args,
) -> Result<T, ExecutionError> { ) -> T {
let module = &self.module; let code_section = self
let code_section = module .module
.translated_code_section .translated_code_section
.as_ref() .as_ref()
.expect("no code section"); .expect("no code section");
let start_buf = code_section.func_start(func_idx as usize);
args.call(
Args::into_func(start_buf),
self.context
.as_ref()
.map(|ctx| (&**ctx) as *const VmCtx as *const u8)
.unwrap_or(std::ptr::null()),
)
}
pub fn execute_func<Args: FunctionArgs<T> + TypeList, T: TypeList>(
&self,
func_idx: u32,
args: Args,
) -> Result<T, ExecutionError> {
let module = &self.module;
if func_idx as usize >= module.types.func_ty_indicies.len() { if func_idx as usize >= module.types.func_ty_indicies.len() {
return Err(ExecutionError::FuncIndexOutOfBounds); return Err(ExecutionError::FuncIndexOutOfBounds);
@ -203,21 +221,12 @@ impl ExecutableModule {
let type_ = module.types.func_type(func_idx); let type_ = module.types.func_type(func_idx);
// TODO: Handle "compatible" types (i.e. f32 and i32)
if (&type_.params[..], &type_.returns[..]) != (Args::TYPE_LIST, T::TYPE_LIST) { if (&type_.params[..], &type_.returns[..]) != (Args::TYPE_LIST, T::TYPE_LIST) {
return Err(ExecutionError::TypeMismatch); return Err(ExecutionError::TypeMismatch);
} }
let start_buf = code_section.func_start(func_idx as usize); Ok(unsafe { self.execute_func_unchecked(func_idx, args) })
Ok(unsafe {
args.call(
Args::into_func(start_buf),
self.context
.as_ref()
.map(|ctx| (&**ctx) as *const VmCtx as *const u8)
.unwrap_or(std::ptr::null()),
)
})
} }
pub fn disassemble(&self) { pub fn disassemble(&self) {
@ -310,12 +319,35 @@ pub struct SimpleContext {
const WASM_PAGE_SIZE: usize = 65_536; const WASM_PAGE_SIZE: usize = 65_536;
pub trait Signature { pub trait Signature {
type Type; type Type: SigType;
fn params(&self) -> &[Self::Type]; fn params(&self) -> &[Self::Type];
fn returns(&self) -> &[Self::Type]; fn returns(&self) -> &[Self::Type];
} }
pub trait SigType {
fn to_microwasm_type(&self) -> microwasm::SignlessType;
fn is_float(&self) -> bool;
}
impl SigType for AbiParam {
fn to_microwasm_type(&self) -> microwasm::SignlessType {
use microwasm::{Size::*, Type::*};
if self.value_type == ir::Type::int(32).unwrap() {
Int(_32)
} else if self.value_type == ir::Type::int(64).unwrap() {
Int(_64)
} else {
unimplemented!()
}
}
fn is_float(&self) -> bool {
self.value_type.is_float()
}
}
impl Signature for CraneliftSignature { impl Signature for CraneliftSignature {
type Type = AbiParam; type Type = AbiParam;
@ -333,6 +365,19 @@ impl Signature for CraneliftSignature {
} }
} }
impl SigType for wasmparser::Type {
fn to_microwasm_type(&self) -> microwasm::SignlessType {
microwasm::Type::from_wasm(*self).unwrap()
}
fn is_float(&self) -> bool {
match self {
wasmparser::Type::F32 | wasmparser::Type::F64 => true,
_ => false,
}
}
}
impl Signature for FuncType { impl Signature for FuncType {
type Type = wasmparser::Type; type Type = wasmparser::Type;
@ -355,6 +400,14 @@ pub trait ModuleContext {
fn offset_of_funcs_ptr(&self) -> u8; fn offset_of_funcs_ptr(&self) -> u8;
fn offset_of_funcs_len(&self) -> u8; fn offset_of_funcs_len(&self) -> u8;
fn func_index(&self, defined_func_index: u32) -> u32;
fn defined_func_index(&self, func_index: u32) -> Option<u32>;
fn defined_func_type(&self, func_idx: u32) -> &Self::Signature {
// TODO: This assumes that there are no imported functions.
self.func_type(self.func_index(func_idx))
}
fn func_type(&self, func_idx: u32) -> &Self::Signature { fn func_type(&self, func_idx: u32) -> &Self::Signature {
// TODO: This assumes that there are no imported functions. // TODO: This assumes that there are no imported functions.
self.signature(self.func_type_index(func_idx)) self.signature(self.func_type_index(func_idx))
@ -364,6 +417,15 @@ pub trait ModuleContext {
impl ModuleContext for SimpleContext { impl ModuleContext for SimpleContext {
type Signature = FuncType; type Signature = FuncType;
// TODO: We don't support external functions yet
fn func_index(&self, func_idx: u32) -> u32 {
func_idx
}
fn defined_func_index(&self, func_idx: u32) -> Option<u32> {
Some(func_idx)
}
fn func_type_index(&self, func_idx: u32) -> u32 { fn func_type_index(&self, func_idx: u32) -> u32 {
self.func_ty_indicies[func_idx as usize] self.func_ty_indicies[func_idx as usize]
} }

35
src/tests.rs

@ -153,7 +153,6 @@ mod op64 {
($op:ident, $func:expr, $retty:ident) => { ($op:ident, $func:expr, $retty:ident) => {
mod $op { mod $op {
use super::{translate_wat, ExecutableModule}; use super::{translate_wat, ExecutableModule};
use std::sync::Once;
const RETTY: &str = stringify!($retty); const RETTY: &str = stringify!($retty);
const OP: &str = stringify!($op); const OP: &str = stringify!($op);
@ -191,10 +190,16 @@ mod op64 {
} }
fn reg_lit(a: i64, b: i64) -> bool { fn reg_lit(a: i64, b: i64) -> bool {
translate_wat(&format!(" use std::sync::Once;
let translated = translate_wat(&format!("
(module (func (param i64) (result {retty}) (module (func (param i64) (result {retty})
(i64.{op} (get_local 0) (i64.const {right})))) (i64.{op} (get_local 0) (i64.const {right}))))
", retty = RETTY, op = OP, right = b)).execute_func::<(i64,), $retty>(0, (a,)) == Ok($func(a, b) as $retty) ", retty = RETTY, op = OP, right = b));
static ONCE: Once = Once::new();
ONCE.call_once(|| translated.disassemble());
translated.execute_func::<(i64,), $retty>(0, (a,)) == Ok($func(a, b) as $retty)
} }
} }
} }
@ -303,7 +308,7 @@ quickcheck! {
"#; "#;
lazy_static! { lazy_static! {
static ref TRANSLATED: ExecutableModule = translate_wat(CODE); static ref TRANSLATED: ExecutableModule = {let out = translate_wat(CODE); out.disassemble(); out};
} }
let out = TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b)); let out = TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b));
@ -698,7 +703,8 @@ quickcheck! {
let n = n as i32; let n = n as i32;
assert_eq!(TRANSLATED.execute_func::<(i32,), i32>(0, (n,)), Ok(fac(n))); assert_eq!(TRANSLATED.execute_func::<(i32,), i32>(2, (n,)), Ok(fac(n)));
assert_eq!(TRANSLATED.execute_func::<(i32,), i32>(3, (n,)), Ok(fac(n)));
true true
} }
} }
@ -832,7 +838,7 @@ const FIBONACCI: &str = r#"
"#; "#;
#[test] #[test]
fn fib() { fn fib_unopt() {
let translated = translate_wat(FIBONACCI); let translated = translate_wat(FIBONACCI);
translated.disassemble(); translated.disassemble();
@ -893,7 +899,7 @@ fn fib_opt() {
} }
#[test] #[test]
fn storage() { fn just_storage() {
const CODE: &str = r#" const CODE: &str = r#"
(module (module
(memory 1 1) (memory 1 1)
@ -1066,9 +1072,10 @@ macro_rules! test_select {
mod $name { mod $name {
use super::{translate_wat, ExecutableModule}; use super::{translate_wat, ExecutableModule};
use std::sync::Once; use std::sync::Once;
lazy_static! { lazy_static! {
static ref AS_PARAMS: ExecutableModule = translate_wat(&format!(" static ref AS_PARAMS: ExecutableModule = translate_wat(&format!(
"
(module (module
(func (param {ty}) (param {ty}) (param i32) (result {ty}) (func (param {ty}) (param {ty}) (param i32) (result {ty})
(select (get_local 0) (get_local 1) (get_local 2)) (select (get_local 0) (get_local 1) (get_local 2))
@ -1077,17 +1084,17 @@ macro_rules! test_select {
ty = stringify!($ty) ty = stringify!($ty)
)); ));
} }
quickcheck! { quickcheck! {
fn as_param(cond: bool, then: $ty, else_: $ty) -> bool { fn as_param(cond: bool, then: $ty, else_: $ty) -> bool {
let icond: i32 = if cond { 1 } else { 0 }; let icond: i32 = if cond { 1 } else { 0 };
AS_PARAMS.execute_func::<($ty, $ty, i32), $ty>(0, (then, else_, icond)) == AS_PARAMS.execute_func::<($ty, $ty, i32), $ty>(0, (then, else_, icond)) ==
Ok(if cond { then } else { else_ }) Ok(if cond { then } else { else_ })
} }
fn lit(cond: bool, then: $ty, else_: $ty) -> bool { fn lit(cond: bool, then: $ty, else_: $ty) -> bool {
let icond: i32 = if cond { 1 } else { 0 }; let icond: i32 = if cond { 1 } else { 0 };
let translated = translate_wat(&format!(" let translated = translate_wat(&format!("
(module (func (param {ty}) (param {ty}) (result {ty}) (module (func (param {ty}) (param {ty}) (result {ty})
(select (get_local 0) (get_local 1) (i32.const {val})))) (select (get_local 0) (get_local 1) (i32.const {val}))))
", ",
@ -1096,13 +1103,13 @@ macro_rules! test_select {
)); ));
static ONCE: Once = Once::new(); static ONCE: Once = Once::new();
ONCE.call_once(|| translated.disassemble()); ONCE.call_once(|| translated.disassemble());
translated.execute_func::<($ty, $ty), $ty>(0, (then, else_)) == translated.execute_func::<($ty, $ty), $ty>(0, (then, else_)) ==
Ok(if cond { then } else { else_ }) Ok(if cond { then } else { else_ })
} }
} }
} }
} };
} }
test_select!(select32, i32); test_select!(select32, i32);

50
src/translate_sections.rs

@ -1,7 +1,8 @@
use backend::{CodeGenSession, TranslatedCodeSection}; use backend::{CodeGenSession, TranslatedCodeSection};
use error::Error; use error::Error;
use function_body; use function_body;
use module::SimpleContext; use microwasm::{MicrowasmConv, Type as MWType};
use module::{ModuleContext, SimpleContext};
#[allow(unused_imports)] // for now #[allow(unused_imports)] // for now
use wasmparser::{ use wasmparser::{
CodeSectionReader, Data, DataSectionReader, Element, ElementSectionReader, Export, CodeSectionReader, Data, DataSectionReader, Element, ElementSectionReader, Export,
@ -112,7 +113,52 @@ pub fn code(
let func_count = code.get_count(); let func_count = code.get_count();
let mut session = CodeGenSession::new(func_count, translation_ctx); let mut session = CodeGenSession::new(func_count, translation_ctx);
for (idx, body) in code.into_iter().enumerate() { for (idx, body) in code.into_iter().enumerate() {
function_body::translate(&mut session, idx as u32, &body?)?; let body = body?;
let mut microwasm_conv = MicrowasmConv::new(
translation_ctx,
translation_ctx
.func_type(idx as _)
.params
.iter()
.map(|t| MWType::from_wasm(*t).unwrap()),
translation_ctx
.func_type(idx as _)
.returns
.iter()
.map(|t| MWType::from_wasm(*t).unwrap()),
&body,
);
if true {
let mut microwasm = vec![];
let mut microwasm_conv = MicrowasmConv::new(
translation_ctx,
translation_ctx
.func_type(idx as _)
.params
.iter()
.map(|t| MWType::from_wasm(*t).unwrap()),
translation_ctx
.func_type(idx as _)
.returns
.iter()
.map(|t| MWType::from_wasm(*t).unwrap()),
&body,
);
for ops in microwasm_conv {
microwasm.extend(ops?);
}
println!("{}", crate::microwasm::dis(idx, &microwasm));
}
function_body::translate(
&mut session,
idx as u32,
microwasm_conv.flat_map(|i| i.expect("TODO: Make this not panic")),
)?;
} }
Ok(session.into_translated_code_section()?) Ok(session.into_translated_code_section()?)
} }

2
wasmparser.rs

@ -1 +1 @@
Subproject commit 4002d32c252131dd2a43c24e09839452fe2ff3b6 Subproject commit e8bc42b377559fb8be6bc333cceb36857d3f0d9c
Loading…
Cancel
Save