Browse Source

Merge pull request #38 from alexcrichton/docs

Add more thorough safety documentation to types
pull/1278/head
Pat Hickey 5 years ago
committed by GitHub
parent
commit
7669dee902
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 43
      crates/runtime/src/guest_type.rs
  2. 254
      crates/runtime/src/lib.rs

43
crates/runtime/src/guest_type.rs

@ -7,10 +7,36 @@ pub trait GuestErrorType {
fn from_error(e: GuestError, ctx: &Self::Context) -> Self;
}
/// A trait for types that are intended to be pointees in `GuestPtr<T>`.
///
/// This trait abstracts how to read/write information from the guest memory, as
/// well as how to offset elements in an array of guest memory. This layer of
/// abstraction allows the guest representation of a type to be different from
/// the host representation of a type, if necessary. It also allows for
/// validation when reading/writing.
pub trait GuestType<'a>: Sized {
/// Returns the size, in bytes, of this type in the guest memory.
fn guest_size() -> u32;
/// Returns the required alignment of this type, in bytes, for both guest
/// and host memory.
fn guest_align() -> usize;
/// Reads this value from the provided `ptr`.
///
/// Must internally perform any safety checks necessary and is allowed to
/// fail if the bytes pointed to are also invalid.
///
/// Typically if you're implementing this by hand you'll want to delegate to
/// other safe implementations of this trait (e.g. for primitive types like
/// `u32`) rather than writing lots of raw code yourself.
fn read(ptr: &GuestPtr<'a, Self>) -> Result<Self, GuestError>;
/// Writes a value to `ptr` after verifying that `ptr` is indeed valid to
/// store `val`.
///
/// Similar to `read`, you'll probably want to implement this in terms of
/// other primitives.
fn write(ptr: &GuestPtr<'_, Self>, val: Self) -> Result<(), GuestError>;
}
@ -20,14 +46,14 @@ macro_rules! primitives {
fn guest_size() -> u32 { mem::size_of::<Self>() as u32 }
fn guest_align() -> usize { mem::align_of::<Self>() }
#[inline]
fn read(ptr: &GuestPtr<'a, Self>) -> Result<Self, GuestError> {
// Any bit pattern for any primitive implemented with this
// macro is safe, so our `as_raw` method will guarantee that if
// we are given a pointer it's valid for the size of our type
// as well as properly aligned. Consequently we should be able
// to safely ready the pointer just after we validated it,
// returning it along here.
// macro is safe, so our `validate_size_align` method will
// guarantee that if we are given a pointer it's valid for the
// size of our type as well as properly aligned. Consequently we
// should be able to safely ready the pointer just after we
// validated it, returning it along here.
let host_ptr = ptr.mem().validate_size_align(
ptr.offset(),
Self::guest_align(),
@ -36,6 +62,7 @@ macro_rules! primitives {
Ok(unsafe { *host_ptr.cast::<Self>() })
}
#[inline]
fn write(ptr: &GuestPtr<'_, Self>, val: Self) -> Result<(), GuestError> {
let host_ptr = ptr.mem().validate_size_align(
ptr.offset(),
@ -55,8 +82,11 @@ macro_rules! primitives {
}
primitives! {
// signed
i8 i16 i32 i64 i128 isize
// unsigned
u8 u16 u32 u64 u128 usize
// floats
f32 f64
}
@ -65,6 +95,7 @@ impl<'a, T> GuestType<'a> for GuestPtr<'a, T> {
fn guest_size() -> u32 {
u32::guest_size()
}
fn guest_align() -> usize {
u32::guest_align()
}

254
crates/runtime/src/lib.rs

@ -1,8 +1,10 @@
use std::cell::Cell;
use std::fmt;
use std::marker;
use std::rc::Rc;
use std::slice;
use std::str;
use std::sync::Arc;
mod error;
mod guest_type;
@ -11,9 +13,100 @@ pub use error::GuestError;
pub use guest_type::{GuestErrorType, GuestType};
pub use region::Region;
/// A trait which abstracts how to get at the region of host memory taht
/// contains guest memory.
///
/// All `GuestPtr` types will contain a handle to this trait, signifying where
/// the pointer is actually pointing into. This type will need to be implemented
/// for the host's memory storage object.
///
/// # Safety
///
/// Safety around this type is tricky, and the trait is `unsafe` since there are
/// a few contracts you need to uphold to implement this type correctly and have
/// everything else in this crate work out safely.
///
/// The most important method of this trait is the `base` method. This returns,
/// in host memory, a pointer and a length. The pointer should point to valid
/// memory for the guest to read/write for the length contiguous bytes
/// afterwards.
///
/// The region returned by `base` must not only be valid, however, but it must
/// be valid for "a period of time before the guest is reentered". This isn't
/// exactly well defined but the general idea is that `GuestMemory` is allowed
/// to change under our feet to accomodate instructions like `memory.grow` or
/// other guest modifications. Memory, however, cannot be changed if the guest
/// is not reentered or if no explicitly action is taken to modify the guest
/// memory.
///
/// This provides the guarantee that host pointers based on the return value of
/// `base` have a dynamic period for which they are valid. This time duration
/// must be "somehow nonzero in length" to allow users of `GuestMemory` and
/// `GuestPtr` to safely read and write interior data.
///
/// # Using Raw Pointers
///
/// Methods like [`GuestMemory::base`] or [`GuestPtr::as_raw`] will return raw
/// pointers to use. Returning raw pointers is significant because it shows
/// there are hazards with using the returned pointers, and they can't blanket
/// be used in a safe fashion. It is possible to use these pointers safely, but
/// any usage needs to uphold a few guarantees.
///
/// * Whenever a `*mut T` is accessed or modified, it must be guaranteed that
/// since the pointer was originally obtained the guest memory wasn't
/// relocated in any way. This means you can't call back into the guest, call
/// other arbitrary functions which might call into the guest, etc. The
/// problem here is that the guest could execute instructions like
/// `memory.grow` which would invalidate the raw pointer. If, however, after
/// you acquire `*mut T` you only execute your own code and it doesn't touch
/// the guest, then `*mut T` is still guaranteed to point to valid code.
///
/// * Furthermore, Rust's aliasing rules must still be upheld. For example you
/// can't have two `&mut T` types that point to the area or overlap in any
/// way. This in particular becomes an issue when you're dealing with multiple
/// `GuestPtr` types. If you want to simultaneously work with them then you
/// need to dynamically validate that you're either working with them all in a
/// shared fashion (e.g. as if they were `&T`) or you must verify that they do
/// not overlap to work with them as `&mut T`.
///
/// Note that safely using the raw pointers is relatively difficult. This crate
/// strives to provide utilities to safely work with guest pointers so long as
/// the previous guarantees are all upheld. If advanced operations are done with
/// guest pointers it's recommended to be extremely cautious and thoroughly
/// consider possible ramifications with respect to this API before codifying
/// implementation details.
pub unsafe trait GuestMemory {
/// Returns the base allocation of this guest memory, located in host
/// memory.
///
/// A pointer/length pair are returned to signify where the guest memory
/// lives in the host, and how many contiguous bytes the memory is valid for
/// after the returned pointer.
///
/// Note that there are safety guarantees about this method that
/// implementations must uphold, and for more details see the
/// [`GuestMemory`] documentation.
fn base(&self) -> (*mut u8, u32);
/// Validates a guest-relative pointer given various attributes, and returns
/// the corresponding host pointer.
///
/// * `offset` - this is the guest-relative pointer, an offset from the
/// base.
/// * `align` - this is the desired alignment of the guest pointer, and if
/// successful the host pointer will be guaranteed to have this alignment.
/// * `len` - this is the number of bytes, after `offset`, that the returned
/// pointer must be valid for.
///
/// This function will guarantee that the returned pointer is in-bounds of
/// `base`, *at this time*, for `len` bytes and has alignment `align`. If
/// any guarantees are not upheld then an error will be returned.
///
/// Note that the returned pointer is an unsafe pointer. This is not safe to
/// use in general because guest memory can be relocated. Additionally the
/// guest may be modifying/reading memory as well. Consult the
/// [`GuestMemory`] documentation for safety information about using this
/// returned pointer.
fn validate_size_align(
&self,
offset: u32,
@ -44,6 +137,11 @@ pub unsafe trait GuestMemory {
Ok(start as *mut u8)
}
/// Convenience method for creating a `GuestPtr` at a particular offset.
///
/// Note that `T` can be almost any type, and typically `offset` is a `u32`.
/// The exception is slices and strings, in which case `offset` is a `(u32,
/// u32)` of `(offset, length)`.
fn ptr<'a, T>(&'a self, offset: T::Pointer) -> GuestPtr<'a, T>
where
Self: Sized,
@ -53,6 +151,8 @@ pub unsafe trait GuestMemory {
}
}
// Forwarding trait implementations to the original type
unsafe impl<'a, T: ?Sized + GuestMemory> GuestMemory for &'a T {
fn base(&self) -> (*mut u8, u32) {
T::base(self)
@ -65,6 +165,68 @@ unsafe impl<'a, T: ?Sized + GuestMemory> GuestMemory for &'a mut T {
}
}
unsafe impl<T: ?Sized + GuestMemory> GuestMemory for Box<T> {
fn base(&self) -> (*mut u8, u32) {
T::base(self)
}
}
unsafe impl<T: ?Sized + GuestMemory> GuestMemory for Rc<T> {
fn base(&self) -> (*mut u8, u32) {
T::base(self)
}
}
unsafe impl<T: ?Sized + GuestMemory> GuestMemory for Arc<T> {
fn base(&self) -> (*mut u8, u32) {
T::base(self)
}
}
/// A *guest* pointer into host memory.
///
/// This type represents a pointer from the guest that points into host memory.
/// Internally a `GuestPtr` contains a handle to its original [`GuestMemory`] as
/// well as the offset into the memory that the pointer is pointing at.
///
/// Presence of a [`GuestPtr`] does not imply any form of validity. Pointers can
/// be out-of-bounds, misaligned, etc. It is safe to construct a `GuestPtr` with
/// any offset at any time. Consider a `GuestPtr<T>` roughly equivalent to `*mut
/// T`, although there are a few more safety guarantees around this type.
///
/// ## Slices and Strings
///
/// Note that the type parameter does not need to implement the `Sized` trait,
/// so you can implement types such as this:
///
/// * `GuestPtr<'_, str>` - a pointer to a guest string
/// * `GuestPtr<'_, [T]>` - a pointer to a guest array
///
/// Unsized types such as this may have extra methods and won't have methods
/// like [`GuestPtr::read`] or [`GuestPtr::write`].
///
/// ## Type parameter and pointee
///
/// The `T` type parameter is largely intended for more static safety in Rust as
/// well as having a better handle on what we're pointing to. A `GuestPtr<T>`,
/// however, does not necessarily literally imply a guest pointer pointing to
/// type `T`. Instead the [`GuestType`] trait is a layer of abstraction where
/// `GuestPtr<T>` may actually be a pointer to `U` in guest memory, but you can
/// construct a `T` from a `U`.
///
/// For example `GuestPtr<GuestPtr<T>>` is a valid type, but this is actually
/// more equivalent to `GuestPtr<u32>` because guest pointers are always
/// 32-bits. That being said you can create a `GuestPtr<T>` from a `u32`.
///
/// Additionally `GuestPtr<MyEnum>` will actually delegate, typically, to and
/// implementation which loads the underlying data as `GuestPtr<u8>` (or
/// similar) and then the bytes loaded are validated to fit within the
/// definition of `MyEnum` before `MyEnum` is returned.
///
/// For more information see the [`GuestPtr::read`] and [`GuestPtr::write`]
/// methods. In general though be extremely careful about writing `unsafe` code
/// when working with a `GuestPtr` if you're not using one of the
/// already-attached helper methods.
pub struct GuestPtr<'a, T: ?Sized + Pointee> {
mem: &'a (dyn GuestMemory + 'a),
pointer: T::Pointer,
@ -72,6 +234,11 @@ pub struct GuestPtr<'a, T: ?Sized + Pointee> {
}
impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> {
/// Creates a new `GuestPtr` from the given `mem` and `pointer` values.
///
/// Note that for sized types like `u32`, `GuestPtr<T>`, etc, the `pointer`
/// vlue is a `u32` offset into guest memory. For slices and strings,
/// `pointer` is a `(u32, u32)` offset/length pair.
pub fn new(mem: &'a (dyn GuestMemory + 'a), pointer: T::Pointer) -> GuestPtr<'_, T> {
GuestPtr {
mem,
@ -80,14 +247,25 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> {
}
}
/// Returns the offset of this pointer in guest memory.
///
/// Note that for sized types this returns a `u32`, but for slices and
/// strings it returns a `(u32, u32)` pointer/length pair.
pub fn offset(&self) -> T::Pointer {
self.pointer
}
/// Returns the guest memory that this pointer is coming from.
pub fn mem(&self) -> &'a (dyn GuestMemory + 'a) {
self.mem
}
/// Casts this `GuestPtr` type to a different type.
///
/// This is a safe method which is useful for simply reinterpreting the type
/// parameter on this `GuestPtr`. Note that this is a safe method, where
/// again there's no guarantees about alignment, validity, in-bounds-ness,
/// etc of the returned pointer.
pub fn cast<U>(&self) -> GuestPtr<'a, U>
where
T: Pointee<Pointer = u32>,
@ -95,6 +273,29 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> {
GuestPtr::new(self.mem, self.pointer)
}
/// Safely read a value from this pointer.
///
/// This is a fun method, and is one of the lynchpins of this
/// implementation. The highlight here is that this is a *safe* operation,
/// not an unsafe one like `*mut T`. This works for a few reasons:
///
/// * The `unsafe` contract of the `GuestMemory` trait means that there's
/// always at least some backing memory for this `GuestPtr<T>`.
///
/// * This does not use Rust-intrinsics to read the type `T`, but rather it
/// delegates to `T`'s implementation of [`GuestType`] to actually read
/// the underlying data. This again is a safe method, so any unsafety, if
/// any, must be internally documented.
///
/// * Eventually what typically happens it that this bottoms out in the read
/// implementations for primitives types (like `i32`) which can safely be
/// read at any time, and then it's up to the runtime to determine what to
/// do with the bytes it read in a safe manner.
///
/// Naturally lots of things can still go wrong, such as out-of-bounds
/// checks, alignment checks, validity checks (e.g. for enums), etc. All of
/// these check failures, however, are returned as a [`GuestError`] in the
/// `Result` here, and `Ok` is only returned if all the checks passed.
pub fn read(&self) -> Result<T, GuestError>
where
T: GuestType<'a>,
@ -102,6 +303,16 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> {
T::read(self)
}
/// Safely write a valud to this pointer.
///
/// This method, like [`GuestPtr::read`], is pretty crucial for the safe
/// operation of this crate. All the same reasons apply though for why this
/// method is safe, even eventually bottoming out in primitives like writing
/// an `i32` which is safe to write bit patterns into memory at any time due
/// to the guarantees of [`GuestMemory`].
///
/// Like `read`, `write` can fail due to any manner of pointer checks, but
/// any failure is returned as a [`GuestError`].
pub fn write(&self, val: T) -> Result<(), GuestError>
where
T: GuestType<'a>,
@ -109,6 +320,12 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> {
T::write(self, val)
}
/// Performs pointer arithmetic on this pointer, moving the pointer forward
/// `amt` slots.
///
/// This will either return the resulting pointer or `Err` if the pointer
/// arithmetic calculation would overflow around the end of the address
/// space.
pub fn add(&self, amt: u32) -> Result<GuestPtr<'a, T>, GuestError>
where
T: GuestType<'a> + Pointee<Pointer = u32>,
@ -125,38 +342,69 @@ impl<'a, T: ?Sized + Pointee> GuestPtr<'a, T> {
}
impl<'a, T> GuestPtr<'a, [T]> {
/// For slices, specifically returns the relative pointer to the base of the
/// array.
///
/// This is similar to `<[T]>::as_ptr()`
pub fn offset_base(&self) -> u32 {
self.pointer.0
}
/// For slices, returns the length of the slice, in units.
pub fn len(&self) -> u32 {
self.pointer.1
}
/// Returns an iterator over interior pointers.
///
/// Each item is a `Result` indicating whether it overflowed past the end of
/// the address space or not.
pub fn iter<'b>(
&'b self,
) -> impl ExactSizeIterator<Item = Result<GuestPtr<'a, T>, GuestError>> + 'b
where
T: GuestType<'a>,
{
let base = GuestPtr::new(self.mem, self.offset_base());
let base = self.as_ptr();
(0..self.len()).map(move |i| base.add(i))
}
/// Returns a `GuestPtr` pointing to the base of the array for the interior
/// type `T`.
pub fn as_ptr(&self) -> GuestPtr<'a, T> {
GuestPtr::new(self.mem, self.offset_base())
}
}
impl<'a> GuestPtr<'a, str> {
/// For strings, returns the relative pointer to the base of the string
/// allocation.
pub fn offset_base(&self) -> u32 {
self.pointer.0
}
/// Returns the length, in bytes, of th estring.
pub fn len(&self) -> u32 {
self.pointer.1
}
/// Returns a raw pointer for the underlying slice of bytes that this
/// pointer points to.
pub fn as_bytes(&self) -> GuestPtr<'a, [u8]> {
GuestPtr::new(self.mem, self.pointer)
}
/// Attempts to read a raw `*mut str` pointer from this pointer, performing
/// bounds checks and utf-8 checks.
///
/// This function will return a raw pointer into host memory if all checks
/// succeed (valid utf-8, valid pointers, etc). If any checks fail then
/// `GuestError` will be returned.
///
/// Note that the `*mut str` pointer is still unsafe to use in general, but
/// there are specific situations that it is safe to use. For more
/// information about using the raw pointer, consult the [`GuestMemory`]
/// trait documentation.
pub fn as_raw(&self) -> Result<*mut str, GuestError> {
let ptr = self
.mem
@ -194,6 +442,10 @@ mod private {
impl Sealed for str {}
}
/// Types that can be pointed to by `GuestPtr<T>`.
///
/// In essence everything can, and the only special-case is unsized types like
/// `str` and `[T]` which have special implementations.
pub trait Pointee: private::Sealed {
#[doc(hidden)]
type Pointer: Copy;

Loading…
Cancel
Save