diff --git a/crates/runtime/src/guest_type.rs b/crates/runtime/src/guest_type.rs index b23830d062..c7517bf0ea 100644 --- a/crates/runtime/src/guest_type.rs +++ b/crates/runtime/src/guest_type.rs @@ -7,10 +7,36 @@ pub trait GuestErrorType { fn from_error(e: GuestError, ctx: &Self::Context) -> Self; } +/// A trait for types that are intended to be pointees in `GuestPtr`. +/// +/// This trait abstracts how to read/write information from the guest memory, as +/// well as how to offset elements in an array of guest memory. This layer of +/// abstraction allows the guest representation of a type to be different from +/// the host representation of a type, if necessary. It also allows for +/// validation when reading/writing. pub trait GuestType<'a>: Sized { + /// Returns the size, in bytes, of this type in the guest memory. fn guest_size() -> u32; + + /// Returns the required alignment of this type, in bytes, for both guest + /// and host memory. fn guest_align() -> usize; + + /// Reads this value from the provided `ptr`. + /// + /// Must internally perform any safety checks necessary and is allowed to + /// fail if the bytes pointed to are also invalid. + /// + /// Typically if you're implementing this by hand you'll want to delegate to + /// other safe implementations of this trait (e.g. for primitive types like + /// `u32`) rather than writing lots of raw code yourself. fn read(ptr: &GuestPtr<'a, Self>) -> Result; + + /// Writes a value to `ptr` after verifying that `ptr` is indeed valid to + /// store `val`. + /// + /// Similar to `read`, you'll probably want to implement this in terms of + /// other primitives. fn write(ptr: &GuestPtr<'_, Self>, val: Self) -> Result<(), GuestError>; } @@ -20,14 +46,14 @@ macro_rules! primitives { fn guest_size() -> u32 { mem::size_of::() as u32 } fn guest_align() -> usize { mem::align_of::() } + #[inline] fn read(ptr: &GuestPtr<'a, Self>) -> Result { - // Any bit pattern for any primitive implemented with this - // macro is safe, so our `as_raw` method will guarantee that if - // we are given a pointer it's valid for the size of our type - // as well as properly aligned. Consequently we should be able - // to safely ready the pointer just after we validated it, - // returning it along here. + // macro is safe, so our `validate_size_align` method will + // guarantee that if we are given a pointer it's valid for the + // size of our type as well as properly aligned. Consequently we + // should be able to safely ready the pointer just after we + // validated it, returning it along here. let host_ptr = ptr.mem().validate_size_align( ptr.offset(), Self::guest_align(), @@ -36,6 +62,7 @@ macro_rules! primitives { Ok(unsafe { *host_ptr.cast::() }) } + #[inline] fn write(ptr: &GuestPtr<'_, Self>, val: Self) -> Result<(), GuestError> { let host_ptr = ptr.mem().validate_size_align( ptr.offset(), @@ -55,8 +82,11 @@ macro_rules! primitives { } primitives! { + // signed i8 i16 i32 i64 i128 isize + // unsigned u8 u16 u32 u64 u128 usize + // floats f32 f64 } @@ -65,6 +95,7 @@ impl<'a, T> GuestType<'a> for GuestPtr<'a, T> { fn guest_size() -> u32 { u32::guest_size() } + fn guest_align() -> usize { u32::guest_align() } diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 1422a5f84b..b57a5bfe08 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,8 +1,10 @@ use std::cell::Cell; use std::fmt; use std::marker; +use std::rc::Rc; use std::slice; use std::str; +use std::sync::Arc; mod error; mod guest_type; @@ -11,9 +13,100 @@ pub use error::GuestError; pub use guest_type::{GuestErrorType, GuestType}; pub use region::Region; +/// A trait which abstracts how to get at the region of host memory taht +/// contains guest memory. +/// +/// All `GuestPtr` types will contain a handle to this trait, signifying where +/// the pointer is actually pointing into. This type will need to be implemented +/// for the host's memory storage object. +/// +/// # Safety +/// +/// Safety around this type is tricky, and the trait is `unsafe` since there are +/// a few contracts you need to uphold to implement this type correctly and have +/// everything else in this crate work out safely. +/// +/// The most important method of this trait is the `base` method. This returns, +/// in host memory, a pointer and a length. The pointer should point to valid +/// memory for the guest to read/write for the length contiguous bytes +/// afterwards. +/// +/// The region returned by `base` must not only be valid, however, but it must +/// be valid for "a period of time before the guest is reentered". This isn't +/// exactly well defined but the general idea is that `GuestMemory` is allowed +/// to change under our feet to accomodate instructions like `memory.grow` or +/// other guest modifications. Memory, however, cannot be changed if the guest +/// is not reentered or if no explicitly action is taken to modify the guest +/// memory. +/// +/// This provides the guarantee that host pointers based on the return value of +/// `base` have a dynamic period for which they are valid. This time duration +/// must be "somehow nonzero in length" to allow users of `GuestMemory` and +/// `GuestPtr` to safely read and write interior data. +/// +/// # Using Raw Pointers +/// +/// Methods like [`GuestMemory::base`] or [`GuestPtr::as_raw`] will return raw +/// pointers to use. Returning raw pointers is significant because it shows +/// there are hazards with using the returned pointers, and they can't blanket +/// be used in a safe fashion. It is possible to use these pointers safely, but +/// any usage needs to uphold a few guarantees. +/// +/// * Whenever a `*mut T` is accessed or modified, it must be guaranteed that +/// since the pointer was originally obtained the guest memory wasn't +/// relocated in any way. This means you can't call back into the guest, call +/// other arbitrary functions which might call into the guest, etc. The +/// problem here is that the guest could execute instructions like +/// `memory.grow` which would invalidate the raw pointer. If, however, after +/// you acquire `*mut T` you only execute your own code and it doesn't touch +/// the guest, then `*mut T` is still guaranteed to point to valid code. +/// +/// * Furthermore, Rust's aliasing rules must still be upheld. For example you +/// can't have two `&mut T` types that point to the area or overlap in any +/// way. This in particular becomes an issue when you're dealing with multiple +/// `GuestPtr` types. If you want to simultaneously work with them then you +/// need to dynamically validate that you're either working with them all in a +/// shared fashion (e.g. as if they were `&T`) or you must verify that they do +/// not overlap to work with them as `&mut T`. +/// +/// Note that safely using the raw pointers is relatively difficult. This crate +/// strives to provide utilities to safely work with guest pointers so long as +/// the previous guarantees are all upheld. If advanced operations are done with +/// guest pointers it's recommended to be extremely cautious and thoroughly +/// consider possible ramifications with respect to this API before codifying +/// implementation details. pub unsafe trait GuestMemory { + /// Returns the base allocation of this guest memory, located in host + /// memory. + /// + /// A pointer/length pair are returned to signify where the guest memory + /// lives in the host, and how many contiguous bytes the memory is valid for + /// after the returned pointer. + /// + /// Note that there are safety guarantees about this method that + /// implementations must uphold, and for more details see the + /// [`GuestMemory`] documentation. fn base(&self) -> (*mut u8, u32); + /// Validates a guest-relative pointer given various attributes, and returns + /// the corresponding host pointer. + /// + /// * `offset` - this is the guest-relative pointer, an offset from the + /// base. + /// * `align` - this is the desired alignment of the guest pointer, and if + /// successful the host pointer will be guaranteed to have this alignment. + /// * `len` - this is the number of bytes, after `offset`, that the returned + /// pointer must be valid for. + /// + /// This function will guarantee that the returned pointer is in-bounds of + /// `base`, *at this time*, for `len` bytes and has alignment `align`. If + /// any guarantees are not upheld then an error will be returned. + /// + /// Note that the returned pointer is an unsafe pointer. This is not safe to + /// use in general because guest memory can be relocated. Additionally the + /// guest may be modifying/reading memory as well. Consult the + /// [`GuestMemory`] documentation for safety information about using this + /// returned pointer. fn validate_size_align( &self, offset: u32, @@ -44,6 +137,11 @@ pub unsafe trait GuestMemory { Ok(start as *mut u8) } + /// Convenience method for creating a `GuestPtr` at a particular offset. + /// + /// Note that `T` can be almost any type, and typically `offset` is a `u32`. + /// The exception is slices and strings, in which case `offset` is a `(u32, + /// u32)` of `(offset, length)`. fn ptr<'a, T>(&'a self, offset: T::Pointer) -> GuestPtr<'a, T> where Self: Sized, @@ -53,6 +151,8 @@ pub unsafe trait GuestMemory { } } +// Forwarding trait implementations to the original type + unsafe impl<'a, T: ?Sized + GuestMemory> GuestMemory for &'a T { fn base(&self) -> (*mut u8, u32) { T::base(self) @@ -65,6 +165,68 @@ unsafe impl<'a, T: ?Sized + GuestMemory> GuestMemory for &'a mut T { } } +unsafe impl GuestMemory for Box { + fn base(&self) -> (*mut u8, u32) { + T::base(self) + } +} + +unsafe impl GuestMemory for Rc { + fn base(&self) -> (*mut u8, u32) { + T::base(self) + } +} + +unsafe impl GuestMemory for Arc { + fn base(&self) -> (*mut u8, u32) { + T::base(self) + } +} + +/// A *guest* pointer into host memory. +/// +/// This type represents a pointer from the guest that points into host memory. +/// Internally a `GuestPtr` contains a handle to its original [`GuestMemory`] as +/// well as the offset into the memory that the pointer is pointing at. +/// +/// Presence of a [`GuestPtr`] does not imply any form of validity. Pointers can +/// be out-of-bounds, misaligned, etc. It is safe to construct a `GuestPtr` with +/// any offset at any time. Consider a `GuestPtr` roughly equivalent to `*mut +/// T`, although there are a few more safety guarantees around this type. +/// +/// ## Slices and Strings +/// +/// Note that the type parameter does not need to implement the `Sized` trait, +/// so you can implement types such as this: +/// +/// * `GuestPtr<'_, str>` - a pointer to a guest string +/// * `GuestPtr<'_, [T]>` - a pointer to a guest array +/// +/// Unsized types such as this may have extra methods and won't have methods +/// like [`GuestPtr::read`] or [`GuestPtr::write`]. +/// +/// ## Type parameter and pointee +/// +/// The `T` type parameter is largely intended for more static safety in Rust as +/// well as having a better handle on what we're pointing to. A `GuestPtr`, +/// however, does not necessarily literally imply a guest pointer pointing to +/// type `T`. Instead the [`GuestType`] trait is a layer of abstraction where +/// `GuestPtr` may actually be a pointer to `U` in guest memory, but you can +/// construct a `T` from a `U`. +/// +/// For example `GuestPtr>` is a valid type, but this is actually +/// more equivalent to `GuestPtr` because guest pointers are always +/// 32-bits. That being said you can create a `GuestPtr` from a `u32`. +/// +/// Additionally `GuestPtr` will actually delegate, typically, to and +/// implementation which loads the underlying data as `GuestPtr` (or +/// similar) and then the bytes loaded are validated to fit within the +/// definition of `MyEnum` before `MyEnum` is returned. +/// +/// For more information see the [`GuestPtr::read`] and [`GuestPtr::write`] +/// methods. In general though be extremely careful about writing `unsafe` code +/// when working with a `GuestPtr` if you're not using one of the +/// already-attached helper methods. pub struct GuestPtr<'a, T: ?Sized + Pointee> { mem: &'a (dyn GuestMemory + 'a), pointer: T::Pointer, @@ -72,6 +234,11 @@ pub struct GuestPtr<'a, T: ?Sized + Pointee> { } impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> { + /// Creates a new `GuestPtr` from the given `mem` and `pointer` values. + /// + /// Note that for sized types like `u32`, `GuestPtr`, etc, the `pointer` + /// vlue is a `u32` offset into guest memory. For slices and strings, + /// `pointer` is a `(u32, u32)` offset/length pair. pub fn new(mem: &'a (dyn GuestMemory + 'a), pointer: T::Pointer) -> GuestPtr<'_, T> { GuestPtr { mem, @@ -80,14 +247,25 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> { } } + /// Returns the offset of this pointer in guest memory. + /// + /// Note that for sized types this returns a `u32`, but for slices and + /// strings it returns a `(u32, u32)` pointer/length pair. pub fn offset(&self) -> T::Pointer { self.pointer } + /// Returns the guest memory that this pointer is coming from. pub fn mem(&self) -> &'a (dyn GuestMemory + 'a) { self.mem } + /// Casts this `GuestPtr` type to a different type. + /// + /// This is a safe method which is useful for simply reinterpreting the type + /// parameter on this `GuestPtr`. Note that this is a safe method, where + /// again there's no guarantees about alignment, validity, in-bounds-ness, + /// etc of the returned pointer. pub fn cast(&self) -> GuestPtr<'a, U> where T: Pointee, @@ -95,6 +273,29 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> { GuestPtr::new(self.mem, self.pointer) } + /// Safely read a value from this pointer. + /// + /// This is a fun method, and is one of the lynchpins of this + /// implementation. The highlight here is that this is a *safe* operation, + /// not an unsafe one like `*mut T`. This works for a few reasons: + /// + /// * The `unsafe` contract of the `GuestMemory` trait means that there's + /// always at least some backing memory for this `GuestPtr`. + /// + /// * This does not use Rust-intrinsics to read the type `T`, but rather it + /// delegates to `T`'s implementation of [`GuestType`] to actually read + /// the underlying data. This again is a safe method, so any unsafety, if + /// any, must be internally documented. + /// + /// * Eventually what typically happens it that this bottoms out in the read + /// implementations for primitives types (like `i32`) which can safely be + /// read at any time, and then it's up to the runtime to determine what to + /// do with the bytes it read in a safe manner. + /// + /// Naturally lots of things can still go wrong, such as out-of-bounds + /// checks, alignment checks, validity checks (e.g. for enums), etc. All of + /// these check failures, however, are returned as a [`GuestError`] in the + /// `Result` here, and `Ok` is only returned if all the checks passed. pub fn read(&self) -> Result where T: GuestType<'a>, @@ -102,6 +303,16 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> { T::read(self) } + /// Safely write a valud to this pointer. + /// + /// This method, like [`GuestPtr::read`], is pretty crucial for the safe + /// operation of this crate. All the same reasons apply though for why this + /// method is safe, even eventually bottoming out in primitives like writing + /// an `i32` which is safe to write bit patterns into memory at any time due + /// to the guarantees of [`GuestMemory`]. + /// + /// Like `read`, `write` can fail due to any manner of pointer checks, but + /// any failure is returned as a [`GuestError`]. pub fn write(&self, val: T) -> Result<(), GuestError> where T: GuestType<'a>, @@ -109,6 +320,12 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> { T::write(self, val) } + /// Performs pointer arithmetic on this pointer, moving the pointer forward + /// `amt` slots. + /// + /// This will either return the resulting pointer or `Err` if the pointer + /// arithmetic calculation would overflow around the end of the address + /// space. pub fn add(&self, amt: u32) -> Result, GuestError> where T: GuestType<'a> + Pointee, @@ -125,38 +342,69 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> { } impl<'a, T> GuestPtr<'a, [T]> { + /// For slices, specifically returns the relative pointer to the base of the + /// array. + /// + /// This is similar to `<[T]>::as_ptr()` pub fn offset_base(&self) -> u32 { self.pointer.0 } + /// For slices, returns the length of the slice, in units. pub fn len(&self) -> u32 { self.pointer.1 } + /// Returns an iterator over interior pointers. + /// + /// Each item is a `Result` indicating whether it overflowed past the end of + /// the address space or not. pub fn iter<'b>( &'b self, ) -> impl ExactSizeIterator, GuestError>> + 'b where T: GuestType<'a>, { - let base = GuestPtr::new(self.mem, self.offset_base()); + let base = self.as_ptr(); (0..self.len()).map(move |i| base.add(i)) } + + /// Returns a `GuestPtr` pointing to the base of the array for the interior + /// type `T`. + pub fn as_ptr(&self) -> GuestPtr<'a, T> { + GuestPtr::new(self.mem, self.offset_base()) + } } impl<'a> GuestPtr<'a, str> { + /// For strings, returns the relative pointer to the base of the string + /// allocation. pub fn offset_base(&self) -> u32 { self.pointer.0 } + /// Returns the length, in bytes, of th estring. pub fn len(&self) -> u32 { self.pointer.1 } + /// Returns a raw pointer for the underlying slice of bytes that this + /// pointer points to. pub fn as_bytes(&self) -> GuestPtr<'a, [u8]> { GuestPtr::new(self.mem, self.pointer) } + /// Attempts to read a raw `*mut str` pointer from this pointer, performing + /// bounds checks and utf-8 checks. + /// + /// This function will return a raw pointer into host memory if all checks + /// succeed (valid utf-8, valid pointers, etc). If any checks fail then + /// `GuestError` will be returned. + /// + /// Note that the `*mut str` pointer is still unsafe to use in general, but + /// there are specific situations that it is safe to use. For more + /// information about using the raw pointer, consult the [`GuestMemory`] + /// trait documentation. pub fn as_raw(&self) -> Result<*mut str, GuestError> { let ptr = self .mem @@ -194,6 +442,10 @@ mod private { impl Sealed for str {} } +/// Types that can be pointed to by `GuestPtr`. +/// +/// In essence everything can, and the only special-case is unsized types like +/// `str` and `[T]` which have special implementations. pub trait Pointee: private::Sealed { #[doc(hidden)] type Pointer: Copy;