diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/ffi-support/src | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/ffi-support/src')
-rw-r--r-- | third_party/rust/ffi-support/src/error.rs | 365 | ||||
-rw-r--r-- | third_party/rust/ffi-support/src/ffistr.rs | 252 | ||||
-rw-r--r-- | third_party/rust/ffi-support/src/handle_map.rs | 1263 | ||||
-rw-r--r-- | third_party/rust/ffi-support/src/into_ffi.rs | 287 | ||||
-rw-r--r-- | third_party/rust/ffi-support/src/lib.rs | 625 | ||||
-rw-r--r-- | third_party/rust/ffi-support/src/macros.rs | 362 | ||||
-rw-r--r-- | third_party/rust/ffi-support/src/string.rs | 162 |
7 files changed, 3316 insertions, 0 deletions
diff --git a/third_party/rust/ffi-support/src/error.rs b/third_party/rust/ffi-support/src/error.rs new file mode 100644 index 0000000000..6bc2808b2c --- /dev/null +++ b/third_party/rust/ffi-support/src/error.rs @@ -0,0 +1,365 @@ +/* Copyright 2018-2019 Mozilla Foundation + * + * Licensed under the Apache License (Version 2.0), or the MIT license, + * (the "Licenses") at your option. You may not use this file except in + * compliance with one of the Licenses. You may obtain copies of the + * Licenses at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * http://opensource.org/licenses/MIT + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licenses is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licenses for the specific language governing permissions and + * limitations under the Licenses. */ + +use crate::string::{destroy_c_string, rust_string_to_c}; +use std::os::raw::c_char; +use std::{self, ptr}; + +/// Represents an error that occured within rust, storing both an error code, and additional data +/// that may be used by the caller. +/// +/// Misuse of this type can cause numerous issues, so please read the entire documentation before +/// usage. +/// +/// ## Rationale +/// +/// This library encourages a pattern of taking a `&mut ExternError` as the final parameter for +/// functions exposed over the FFI. This is an "out parameter" which we use to write error/success +/// information that occurred during the function's execution. +/// +/// To be clear, this means instances of `ExternError` will be created on the other side of the FFI, +/// and passed (by mutable reference) into Rust. +/// +/// While this pattern is not particularly ergonomic in Rust (although hopefully this library +/// helps!), it offers two main benefits over something more ergonomic (which might be `Result` +/// shaped). +/// +/// 1. It avoids defining a large number of `Result`-shaped types in the FFI consumer, as would +/// be required with something like an `struct ExternResult<T> { ok: *mut T, err:... }` +/// +/// 2. It offers additional type safety over `struct ExternResult { ok: *mut c_void, err:... }`, +/// which helps avoid memory safety errors. It also can offer better performance for returning +/// primitives and repr(C) structs (no boxing required). +/// +/// It also is less tricky to use properly than giving consumers a `get_last_error()` function, or +/// similar. +/// +/// ## Caveats +/// +/// Note that the order of the fields is `code` (an i32) then `message` (a `*mut c_char`), getting +/// this wrong on the other side of the FFI will cause memory corruption and crashes. +/// +/// The fields are public largely for documentation purposes, but you should use +/// [`ExternError::new_error`] or [`ExternError::success`] to create these. +/// +/// ## Layout/fields +/// +/// This struct's field are not `pub` (mostly so that we can soundly implement `Send`, but also so +/// that we can verify rust users are constructing them appropriately), the fields, their types, and +/// their order are *very much* a part of the public API of this type. Consumers on the other side +/// of the FFI will need to know its layout. +/// +/// If this were a C struct, it would look like +/// +/// ```c,no_run +/// struct ExternError { +/// int32_t code; +/// char *message; // note: nullable +/// }; +/// ``` +/// +/// In rust, there are two fields, in this order: `code: ErrorCode`, and `message: *mut c_char`. +/// Note that ErrorCode is a `#[repr(transparent)]` wrapper around an `i32`, so the first property +/// is equivalent to an `i32`. +/// +/// #### The `code` field. +/// +/// This is the error code, 0 represents success, all other values represent failure. If the `code` +/// field is nonzero, there should always be a message, and if it's zero, the message will always be +/// null. +/// +/// #### The `message` field. +/// +/// This is a null-terminated C string containing some amount of additional information about the +/// error. If the `code` property is nonzero, there should always be an error message. Otherwise, +/// this should will be null. +/// +/// This string (when not null) is allocated on the rust heap (using this crate's +/// [`rust_string_to_c`]), and must be freed on it as well. Critically, if there are multiple rust +/// packages using being used in the same application, it *must be freed on the same heap that +/// allocated it*, or you will corrupt both heaps. +/// +/// Typically, this object is managed on the other side of the FFI (on the "FFI consumer"), which +/// means you must expose a function to release the resources of `message` which can be done easily +/// using the [`define_string_destructor!`] macro provided by this crate. +/// +/// If, for some reason, you need to release the resources directly, you may call +/// `ExternError::release()`. Note that you probably do not need to do this, and it's +/// intentional that this is not called automatically by implementing `drop`. +/// +/// ## Example +/// +/// ```rust,no_run +/// use ffi_support::{ExternError, ErrorCode}; +/// +/// #[derive(Debug)] +/// pub enum MyError { +/// IllegalFoo(String), +/// InvalidBar(i64), +/// // ... +/// } +/// +/// // Putting these in a module is obviously optional, but it allows documentation, and helps +/// // avoid accidental reuse. +/// pub mod error_codes { +/// // note: -1 and 0 are reserved by ffi_support +/// pub const ILLEGAL_FOO: i32 = 1; +/// pub const INVALID_BAR: i32 = 2; +/// // ... +/// } +/// +/// fn get_code(e: &MyError) -> ErrorCode { +/// match e { +/// MyError::IllegalFoo(_) => ErrorCode::new(error_codes::ILLEGAL_FOO), +/// MyError::InvalidBar(_) => ErrorCode::new(error_codes::INVALID_BAR), +/// // ... +/// } +/// } +/// +/// impl From<MyError> for ExternError { +/// fn from(e: MyError) -> ExternError { +/// ExternError::new_error(get_code(&e), format!("{:?}", e)) +/// } +/// } +/// ``` +#[repr(C)] +// Note: We're intentionally not implementing Clone -- it's too risky. +#[derive(Debug, PartialEq)] +pub struct ExternError { + // Don't reorder or add anything here! + code: ErrorCode, + message: *mut c_char, +} + +impl std::panic::UnwindSafe for ExternError {} +impl std::panic::RefUnwindSafe for ExternError {} + +/// This is sound so long as our fields are private. +unsafe impl Send for ExternError {} + +impl ExternError { + /// Construct an ExternError representing failure from a code and a message. + #[inline] + pub fn new_error(code: ErrorCode, message: impl Into<String>) -> Self { + assert!( + !code.is_success(), + "Attempted to construct a success ExternError with a message" + ); + Self { + code, + message: rust_string_to_c(message), + } + } + + /// Returns a ExternError representing a success. Also returned by ExternError::default() + #[inline] + pub fn success() -> Self { + Self { + code: ErrorCode::SUCCESS, + message: ptr::null_mut(), + } + } + + /// Helper for the case where we aren't exposing this back over the FFI and + /// we just want to warn if an error occurred and then release the allocated + /// memory. + /// + /// Typically, this is done if the error will still be detected and reported + /// by other channels. + /// + /// We assume we're not inside a catch_unwind, and so we wrap inside one + /// ourselves. + pub fn consume_and_log_if_error(self) { + if !self.code.is_success() { + // in practice this should never panic, but you never know... + crate::abort_on_panic::call_with_output(|| { + log::error!("Unhandled ExternError({:?}) {:?}", self.code, unsafe { + crate::FfiStr::from_raw(self.message) + }); + unsafe { + self.manually_release(); + } + }) + } + } + + /// Get the `code` property. + #[inline] + pub fn get_code(&self) -> ErrorCode { + self.code + } + + /// Get the `message` property as a pointer to c_char. + #[inline] + pub fn get_raw_message(&self) -> *const c_char { + self.message as *const _ + } + + /// Get the `message` property as an [`FfiStr`][crate::FfiStr] + #[inline] + pub fn get_message(&self) -> crate::FfiStr<'_> { + // Safe because the lifetime is the same as our lifetime. + unsafe { crate::FfiStr::from_raw(self.get_raw_message()) } + } + + /// Get the `message` property as a String, or None if this is not an error result. + /// + /// ## Safety + /// + /// You should only call this if you are certain that the other side of the FFI doesn't have a + /// reference to this result (more specifically, to the `message` property) anywhere! + #[inline] + pub unsafe fn get_and_consume_message(self) -> Option<String> { + if self.code.is_success() { + None + } else { + let res = self.get_message().into_string(); + self.manually_release(); + Some(res) + } + } + + /// Manually release the memory behind this string. You probably don't want to call this. + /// + /// ## Safety + /// + /// You should only call this if you are certain that the other side of the FFI doesn't have a + /// reference to this result (more specifically, to the `message` property) anywhere! + pub unsafe fn manually_release(self) { + if !self.message.is_null() { + destroy_c_string(self.message) + } + } +} + +impl Default for ExternError { + #[inline] + fn default() -> Self { + ExternError::success() + } +} + +// This is the `Err` of std::thread::Result, which is what +// `panic::catch_unwind` returns. +impl From<Box<dyn std::any::Any + Send + 'static>> for ExternError { + fn from(e: Box<dyn std::any::Any + Send + 'static>) -> Self { + // The documentation suggests that it will *usually* be a str or String. + let message = if let Some(s) = e.downcast_ref::<&'static str>() { + (*s).to_string() + } else if let Some(s) = e.downcast_ref::<String>() { + s.clone() + } else { + "Unknown panic!".to_string() + }; + log::error!("Caught a panic calling rust code: {:?}", message); + ExternError::new_error(ErrorCode::PANIC, message) + } +} + +/// A wrapper around error codes, which is represented identically to an i32 on the other side of +/// the FFI. Essentially exists to check that we don't accidentally reuse success/panic codes for +/// other things. +#[repr(transparent)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)] +pub struct ErrorCode(i32); + +impl ErrorCode { + /// The ErrorCode used for success. + pub const SUCCESS: ErrorCode = ErrorCode(0); + + /// The ErrorCode used for panics. It's unlikely you need to ever use this. + // TODO: Consider moving to the reserved region... + pub const PANIC: ErrorCode = ErrorCode(-1); + + /// The ErrorCode used for handle map errors. + pub const INVALID_HANDLE: ErrorCode = ErrorCode(-1000); + + /// Construct an error code from an integer code. + /// + /// ## Panics + /// + /// Panics if you call it with 0 (reserved for success, but you can use `ErrorCode::SUCCESS` if + /// that's what you want), or -1 (reserved for panics, but you can use `ErrorCode::PANIC` if + /// that's what you want). + pub fn new(code: i32) -> Self { + assert!(code > ErrorCode::INVALID_HANDLE.0 && code != ErrorCode::PANIC.0 && code != ErrorCode::SUCCESS.0, + "Error: The ErrorCodes `{success}`, `{panic}`, and all error codes less than or equal \ + to `{reserved}` are reserved (got {code}). You may use the associated constants on this \ + type (`ErrorCode::PANIC`, etc) if you'd like instances of those error codes.", + panic = ErrorCode::PANIC.0, + success = ErrorCode::SUCCESS.0, + reserved = ErrorCode::INVALID_HANDLE.0, + code = code, + ); + + ErrorCode(code) + } + + /// Get the raw numeric value of this ErrorCode. + #[inline] + pub fn code(self) -> i32 { + self.0 + } + + /// Returns whether or not this is a success code. + #[inline] + pub fn is_success(self) -> bool { + self.code() == 0 + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + #[should_panic] + fn test_code_new_reserved_success() { + ErrorCode::new(0); + } + + #[test] + #[should_panic] + fn test_code_new_reserved_panic() { + ErrorCode::new(-1); + } + + #[test] + #[should_panic] + fn test_code_new_reserved_handle_error() { + ErrorCode::new(-1000); + } + #[test] + #[should_panic] + fn test_code_new_reserved_unknown() { + // Everything below -1000 should be reserved. + ErrorCode::new(-1043); + } + + #[test] + fn test_code_new_allowed() { + // Should not panic + ErrorCode::new(-2); + } + + #[test] + fn test_code() { + assert!(!ErrorCode::PANIC.is_success()); + assert!(!ErrorCode::INVALID_HANDLE.is_success()); + assert!(ErrorCode::SUCCESS.is_success()); + assert_eq!(ErrorCode::default(), ErrorCode::SUCCESS); + } +} diff --git a/third_party/rust/ffi-support/src/ffistr.rs b/third_party/rust/ffi-support/src/ffistr.rs new file mode 100644 index 0000000000..fb60e1f72c --- /dev/null +++ b/third_party/rust/ffi-support/src/ffistr.rs @@ -0,0 +1,252 @@ +/* Copyright 2018-2019 Mozilla Foundation + * + * Licensed under the Apache License (Version 2.0), or the MIT license, + * (the "Licenses") at your option. You may not use this file except in + * compliance with one of the Licenses. You may obtain copies of the + * Licenses at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * http://opensource.org/licenses/MIT + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licenses is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licenses for the specific language governing permissions and + * limitations under the Licenses. */ + +use std::ffi::CStr; +use std::marker::PhantomData; +use std::os::raw::c_char; + +/// `FfiStr<'a>` is a safe (`#[repr(transparent)]`) wrapper around a +/// nul-terminated `*const c_char` (e.g. a C string). Conceptually, it is +/// similar to [`std::ffi::CStr`], except that it may be used in the signatures +/// of extern "C" functions. +/// +/// Functions accepting strings should use this instead of accepting a C string +/// directly. This allows us to write those functions using safe code without +/// allowing safe Rust to cause memory unsafety. +/// +/// A single function for constructing these from Rust ([`FfiStr::from_raw`]) +/// has been provided. Most of the time, this should not be necessary, and users +/// should accept `FfiStr` in the parameter list directly. +/// +/// ## Caveats +/// +/// An effort has been made to make this struct hard to misuse, however it is +/// still possible, if the `'static` lifetime is manually specified in the +/// struct. E.g. +/// +/// ```rust,no_run +/// # use ffi_support::FfiStr; +/// // NEVER DO THIS +/// #[no_mangle] +/// extern "C" fn never_do_this(s: FfiStr<'static>) { +/// // save `s` somewhere, and access it after this +/// // function returns. +/// } +/// ``` +/// +/// Instead, one of the following patterns should be used: +/// +/// ``` +/// # use ffi_support::FfiStr; +/// #[no_mangle] +/// extern "C" fn valid_use_1(s: FfiStr<'_>) { +/// // Use of `s` after this function returns is impossible +/// } +/// // Alternative: +/// #[no_mangle] +/// extern "C" fn valid_use_2(s: FfiStr) { +/// // Use of `s` after this function returns is impossible +/// } +/// ``` +#[repr(transparent)] +pub struct FfiStr<'a> { + cstr: *const c_char, + _boo: PhantomData<&'a ()>, +} + +impl<'a> FfiStr<'a> { + /// Construct an `FfiStr` from a raw pointer. + /// + /// This should not be needed most of the time, and users should instead + /// accept `FfiStr` in function parameter lists. + /// + /// # Safety + /// + /// Dereferences a pointer and is thus unsafe. + #[inline] + pub unsafe fn from_raw(ptr: *const c_char) -> Self { + Self { + cstr: ptr, + _boo: PhantomData, + } + } + + /// Construct a FfiStr from a `std::ffi::CStr`. This is provided for + /// completeness, as a safe method of producing an `FfiStr` in Rust. + #[inline] + pub fn from_cstr(cstr: &'a CStr) -> Self { + Self { + cstr: cstr.as_ptr(), + _boo: PhantomData, + } + } + + /// Get an `&str` out of the `FfiStr`. This will panic in any case that + /// [`FfiStr::as_opt_str`] would return `None` (e.g. null pointer or invalid + /// UTF-8). + /// + /// If the string should be optional, you should use [`FfiStr::as_opt_str`] + /// instead. If an owned string is desired, use [`FfiStr::into_string`] or + /// [`FfiStr::into_opt_string`]. + #[inline] + pub fn as_str(&self) -> &'a str { + self.as_opt_str() + .expect("Unexpected null string pointer passed to rust") + } + + /// Get an `Option<&str>` out of the `FfiStr`. If this stores a null + /// pointer, then None will be returned. If a string containing invalid + /// UTF-8 was passed, then an error will be logged and `None` will be + /// returned. + /// + /// If the string is a required argument, use [`FfiStr::as_str`], or + /// [`FfiStr::into_string`] instead. If `Option<String>` is desired, use + /// [`FfiStr::into_opt_string`] (which will handle invalid UTF-8 by + /// replacing with the replacement character). + pub fn as_opt_str(&self) -> Option<&'a str> { + if self.cstr.is_null() { + return None; + } + unsafe { + match std::ffi::CStr::from_ptr(self.cstr).to_str() { + Ok(s) => Some(s), + Err(e) => { + log::error!("Invalid UTF-8 was passed to rust! {:?}", e); + None + } + } + } + } + + /// Get an `Option<String>` out of the `FfiStr`. Returns `None` if this + /// `FfiStr` holds a null pointer. Note that unlike [`FfiStr::as_opt_str`], + /// invalid UTF-8 is replaced with the replacement character instead of + /// causing us to return None. + /// + /// If the string should be mandatory, you should use + /// [`FfiStr::into_string`] instead. If an owned string is not needed, you + /// may want to use [`FfiStr::as_str`] or [`FfiStr::as_opt_str`] instead, + /// (however, note the differences in how invalid UTF-8 is handled, should + /// this be relevant to your use). + pub fn into_opt_string(self) -> Option<String> { + if !self.cstr.is_null() { + unsafe { Some(CStr::from_ptr(self.cstr).to_string_lossy().to_string()) } + } else { + None + } + } + + /// Get a `String` out of a `FfiStr`. This function is essential a + /// convenience wrapper for `ffi_str.into_opt_string().unwrap()`, with a + /// message that indicates that a null argument was passed to rust when it + /// should be mandatory. As with [`FfiStr::into_opt_string`], invalid UTF-8 + /// is replaced with the replacement character if encountered. + /// + /// If the string should *not* be mandatory, you should use + /// [`FfiStr::into_opt_string`] instead. If an owned string is not needed, + /// you may want to use [`FfiStr::as_str`] or [`FfiStr::as_opt_str`] + /// instead, (however, note the differences in how invalid UTF-8 is handled, + /// should this be relevant to your use). + #[inline] + pub fn into_string(self) -> String { + self.into_opt_string() + .expect("Unexpected null string pointer passed to rust") + } +} + +impl<'a> std::fmt::Debug for FfiStr<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(s) = self.as_opt_str() { + write!(f, "FfiStr({:?})", s) + } else { + write!(f, "FfiStr(null)") + } + } +} + +// Conversions... + +impl<'a> From<FfiStr<'a>> for String { + #[inline] + fn from(f: FfiStr<'a>) -> Self { + f.into_string() + } +} + +impl<'a> From<FfiStr<'a>> for Option<String> { + #[inline] + fn from(f: FfiStr<'a>) -> Self { + f.into_opt_string() + } +} + +impl<'a> From<FfiStr<'a>> for Option<&'a str> { + #[inline] + fn from(f: FfiStr<'a>) -> Self { + f.as_opt_str() + } +} + +impl<'a> From<FfiStr<'a>> for &'a str { + #[inline] + fn from(f: FfiStr<'a>) -> Self { + f.as_str() + } +} + +// TODO: `AsRef<str>`? + +// Comparisons... + +// Compare FfiStr with eachother +impl<'a> PartialEq for FfiStr<'a> { + #[inline] + fn eq(&self, other: &FfiStr<'a>) -> bool { + self.as_opt_str() == other.as_opt_str() + } +} + +// Compare FfiStr with str +impl<'a> PartialEq<str> for FfiStr<'a> { + #[inline] + fn eq(&self, other: &str) -> bool { + self.as_opt_str() == Some(other) + } +} + +// Compare FfiStr with &str +impl<'a, 'b> PartialEq<&'b str> for FfiStr<'a> { + #[inline] + fn eq(&self, other: &&'b str) -> bool { + self.as_opt_str() == Some(*other) + } +} + +// rhs/lhs swap version of above +impl<'a> PartialEq<FfiStr<'a>> for str { + #[inline] + fn eq(&self, other: &FfiStr<'a>) -> bool { + Some(self) == other.as_opt_str() + } +} + +// rhs/lhs swap... +impl<'a, 'b> PartialEq<FfiStr<'a>> for &'b str { + #[inline] + fn eq(&self, other: &FfiStr<'a>) -> bool { + Some(*self) == other.as_opt_str() + } +} diff --git a/third_party/rust/ffi-support/src/handle_map.rs b/third_party/rust/ffi-support/src/handle_map.rs new file mode 100644 index 0000000000..b9b0df7c78 --- /dev/null +++ b/third_party/rust/ffi-support/src/handle_map.rs @@ -0,0 +1,1263 @@ +/* Copyright 2018-2019 Mozilla Foundation + * + * Licensed under the Apache License (Version 2.0), or the MIT license, + * (the "Licenses") at your option. You may not use this file except in + * compliance with one of the Licenses. You may obtain copies of the + * Licenses at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * http://opensource.org/licenses/MIT + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licenses is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licenses for the specific language governing permissions and + * limitations under the Licenses. */ + +//! This module provides a [`Handle`] type, which you can think of something +//! like a dynamically checked, type erased reference/pointer type. Depending on +//! the usage pattern a handle can behave as either a borrowed reference, or an +//! owned pointer. +//! +//! They can be losslessly converted [to](Handle::into_u64) and +//! [from](Handle::from_u64) a 64 bit integer, for ease of passing over the FFI +//! (and they implement [`IntoFfi`] using these primitives for this purpose). +//! +//! The benefit is primarially that they can detect common misuse patterns that +//! would otherwise be silent bugs, such as use-after-free, double-free, passing +//! a wrongly-typed pointer to a function, etc. +//! +//! Handles are provided when inserting an item into either a [`HandleMap`] or a +//! [`ConcurrentHandleMap`]. +//! +//! # Comparison to types from other crates +//! +//! [`HandleMap`] is similar to types offered by other crates, such as +//! `slotmap`, or `slab`. However, it has a number of key differences which make +//! it better for our purposes as compared to the types in those crates: +//! +//! 1. Unlike `slab` (but like `slotmap`), we implement versioning, detecting +//! ABA problems, which allows us to detect use after free. +//! 2. Unlike `slotmap`, we don't have the `T: Copy` restriction. +//! 3. Unlike either, we can detect when you use a Key in a map that did not +//! allocate the key. This is true even when the map is from a `.so` file +//! compiled separately. +//! 3. Our implementation of doesn't use any `unsafe` (at the time of this +//! writing). +//! +//! However, it comes with the following drawbacks: +//! +//! 1. `slotmap` holds its version information in a `u32`, and so it takes +//! 2<sup>31</sup> colliding insertions and deletions before it could +//! potentially fail to detect an ABA issue, wheras we use a `u16`, and are +//! limited to 2<sup>15</sup>. +//! 2. Similarly, we can only hold 2<sup>16</sup> items at once, unlike +//! `slotmap`'s 2<sup>32</sup>. (Considering these items are typically things +//! like database handles, this is probably plenty). +//! 3. Our implementation is slower, and uses slightly more memory than +//! `slotmap` (which is in part due to the lack of `unsafe` mentioned above) +//! +//! The first two issues seem exceptionally unlikely, even for extremely +//! long-lived `HandleMap`, and we're still memory safe even if they occur (we +//! just might fail to notice a bug). The third issue also seems unimportant for +//! our use case. + +use crate::error::{ErrorCode, ExternError}; +use crate::into_ffi::IntoFfi; +use std::error::Error as StdError; +use std::fmt; +use std::ops; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Mutex, RwLock}; + +/// `HandleMap` is a collection type which can hold any type of value, and +/// offers a stable handle which can be used to retrieve it on insertion. These +/// handles offer methods for converting [to](Handle::into_u64) and +/// [from](Handle::from_u64) 64 bit integers, meaning they're very easy to pass +/// over the FFI (they also implement [`IntoFfi`] for the same purpose). +/// +/// See the [module level docs](index.html) for more information. +/// +/// Note: In FFI code, most usage of `HandleMap` will be done through the +/// [`ConcurrentHandleMap`] type, which is a thin wrapper around a +/// `RwLock<HandleMap<Mutex<T>>>`. +#[derive(Debug, Clone)] +pub struct HandleMap<T> { + // The value of `map_id` in each `Handle`. + id: u16, + + // Index to the start of the free list. Always points to a free item -- + // we never allow our free list to become empty. + first_free: u16, + + // The number of entries with `data.is_some()`. This is never equal to + // `entries.len()`, we always grow before that point to ensure we always have + // a valid `first_free` index to add entries onto. This is our `len`. + num_entries: usize, + + // The actual data. Note: entries.len() is our 'capacity'. + entries: Vec<Entry<T>>, +} + +#[derive(Debug, Clone)] +struct Entry<T> { + // initially 1, incremented on insertion and removal. Thus, + // if version is even, state should always be EntryState::Active. + version: u16, + state: EntryState<T>, +} + +#[derive(Debug, Clone)] +enum EntryState<T> { + // Not part of the free list + Active(T), + // The u16 is the next index in the free list. + InFreeList(u16), + // Part of the free list, but the sentinel. + EndOfFreeList, +} + +impl<T> EntryState<T> { + #[cfg(any(debug_assertions, test))] + fn is_end_of_list(&self) -> bool { + match self { + EntryState::EndOfFreeList => true, + _ => false, + } + } + + #[inline] + fn is_occupied(&self) -> bool { + self.get_item().is_some() + } + + #[inline] + fn get_item(&self) -> Option<&T> { + match self { + EntryState::Active(v) => Some(v), + _ => None, + } + } + + #[inline] + fn get_item_mut(&mut self) -> Option<&mut T> { + match self { + EntryState::Active(v) => Some(v), + _ => None, + } + } +} + +// Small helper to check our casts. +#[inline] +fn to_u16(v: usize) -> u16 { + use std::u16::MAX as U16_MAX; + // Shouldn't ever happen. + assert!(v <= (U16_MAX as usize), "Bug: Doesn't fit in u16: {}", v); + v as u16 +} + +/// The maximum capacity of a [`HandleMap`]. Attempting to instantiate one with +/// a larger capacity will cause a panic. +/// +/// Note: This could go as high as `(1 << 16) - 2`, but doing is seems more +/// error prone. For the sake of paranoia, we limit it to this size, which is +/// already quite a bit larger than it seems like we're likely to ever need. +pub const MAX_CAPACITY: usize = (1 << 15) - 1; + +// Never having to worry about capacity == 0 simplifies the code at the cost of +// worse memory usage. It doesn't seem like there's any reason to make this +// public. +const MIN_CAPACITY: usize = 4; + +/// An error representing the ways a `Handle` may be invalid. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum HandleError { + /// Identical to invalid handle, but has a slightly more helpful + /// message for the most common case 0. + NullHandle, + + /// Returned from [`Handle::from_u64`] if [`Handle::is_valid`] fails. + InvalidHandle, + + /// Returned from get/get_mut/delete if the handle is stale (this indicates + /// something equivalent to a use-after-free / double-free, etc). + StaleVersion, + + /// Returned if the handle index references an index past the end of the + /// HandleMap. + IndexPastEnd, + + /// The handle has a map_id for a different map than the one it was + /// attempted to be used with. + WrongMap, +} + +impl StdError for HandleError {} + +impl fmt::Display for HandleError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use HandleError::*; + match self { + NullHandle => { + f.write_str("Tried to use a null handle (this object has probably been closed)") + } + InvalidHandle => f.write_str("u64 could not encode a valid Handle"), + StaleVersion => f.write_str("Handle has stale version number"), + IndexPastEnd => f.write_str("Handle references a index past the end of this HandleMap"), + WrongMap => f.write_str("Handle is from a different map"), + } + } +} + +impl From<HandleError> for ExternError { + fn from(e: HandleError) -> Self { + ExternError::new_error(ErrorCode::INVALID_HANDLE, e.to_string()) + } +} + +impl<T> HandleMap<T> { + /// Create a new `HandleMap` with the default capacity. + pub fn new() -> Self { + Self::new_with_capacity(MIN_CAPACITY) + } + + /// Allocate a new `HandleMap`. Note that the actual capacity may be larger + /// than the requested value. + /// + /// Panics if `request` is greater than [`handle_map::MAX_CAPACITY`](MAX_CAPACITY) + pub fn new_with_capacity(request: usize) -> Self { + assert!( + request <= MAX_CAPACITY, + "HandleMap capacity is limited to {} (request was {})", + MAX_CAPACITY, + request + ); + + let capacity = request.max(MIN_CAPACITY); + let id = next_handle_map_id(); + let mut entries = Vec::with_capacity(capacity); + + // Initialize each entry with version 1, and as a member of the free list + for i in 0..(capacity - 1) { + entries.push(Entry { + version: 1, + state: EntryState::InFreeList(to_u16(i + 1)), + }); + } + + // And the final entry is at the end of the free list + // (but still has version 1). + entries.push(Entry { + version: 1, + state: EntryState::EndOfFreeList, + }); + Self { + id, + first_free: 0, + num_entries: 0, + entries, + } + } + + /// Get the number of entries in the `HandleMap`. + #[inline] + pub fn len(&self) -> usize { + self.num_entries + } + + /// Returns true if the HandleMap is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the number of slots allocated in the handle map. + #[inline] + pub fn capacity(&self) -> usize { + // It's not a bug that this isn't entries.capacity() -- We're returning + // how many slots exist, not something about the backing memory allocation + self.entries.len() + } + + fn ensure_capacity(&mut self, cap_at_least: usize) { + assert_ne!(self.len(), self.capacity(), "Bug: should have grown by now"); + assert!(cap_at_least <= MAX_CAPACITY, "HandleMap overfilled"); + if self.capacity() > cap_at_least { + return; + } + + let mut next_cap = self.capacity(); + while next_cap <= cap_at_least { + next_cap *= 2; + } + next_cap = next_cap.min(MAX_CAPACITY); + + let need_extra = next_cap.saturating_sub(self.entries.capacity()); + self.entries.reserve(need_extra); + + assert!( + !self.entries[self.first_free as usize].state.is_occupied(), + "Bug: HandleMap.first_free points at occupied index" + ); + + // Insert new entries at the front of our list. + while self.entries.len() < next_cap - 1 { + // This is a little wasteful but whatever. Add each new entry to the + // front of the free list one at a time. + self.entries.push(Entry { + version: 1, + state: EntryState::InFreeList(self.first_free), + }); + self.first_free = to_u16(self.entries.len() - 1); + } + + self.debug_check_valid(); + } + + #[inline] + fn debug_check_valid(&self) { + // Run the expensive validity check in tests and in debug builds. + #[cfg(any(debug_assertions, test))] + { + self.assert_valid(); + } + } + + #[cfg(any(debug_assertions, test))] + fn assert_valid(&self) { + assert_ne!(self.len(), self.capacity()); + assert!(self.capacity() <= MAX_CAPACITY, "Entries too large"); + // Validate that our free list is correct. + + let number_of_ends = self + .entries + .iter() + .filter(|e| e.state.is_end_of_list()) + .count(); + assert_eq!( + number_of_ends, 1, + "More than one entry think's it's the end of the list, or no entries do" + ); + + // Check that the free list hits every unoccupied item. + // The tuple is: `(should_be_in_free_list, is_in_free_list)`. + let mut free_indices = vec![(false, false); self.capacity()]; + for (i, e) in self.entries.iter().enumerate() { + if !e.state.is_occupied() { + free_indices[i].0 = true; + } + } + + let mut next = self.first_free; + loop { + let ni = next as usize; + + assert!( + ni <= free_indices.len(), + "Free list contains out of bounds index!" + ); + + assert!( + free_indices[ni].0, + "Free list has an index that shouldn't be free! {}", + ni + ); + + assert!( + !free_indices[ni].1, + "Free list hit an index ({}) more than once! Cycle detected!", + ni + ); + + free_indices[ni].1 = true; + + match &self.entries[ni].state { + EntryState::InFreeList(next_index) => next = *next_index, + EntryState::EndOfFreeList => break, + // Hitting `Active` here is probably not possible because of the checks above, but who knows. + EntryState::Active(..) => unreachable!("Bug: Active item in free list at {}", next), + } + } + let mut occupied_count = 0; + for (i, &(should_be_free, is_free)) in free_indices.iter().enumerate() { + assert_eq!( + should_be_free, is_free, + "Free list missed item, or contains an item it shouldn't: {}", + i + ); + if !should_be_free { + occupied_count += 1; + } + } + assert_eq!( + self.num_entries, occupied_count, + "num_entries doesn't reflect the actual number of entries" + ); + } + + /// Insert an item into the map, and return a handle to it. + pub fn insert(&mut self, v: T) -> Handle { + let need_cap = self.len() + 1; + self.ensure_capacity(need_cap); + let index = self.first_free; + let result = { + // Scoped mutable borrow of entry. + let entry = &mut self.entries[index as usize]; + let new_first_free = match entry.state { + EntryState::InFreeList(i) => i, + _ => panic!("Bug: next_index pointed at non-free list entry (or end of list)"), + }; + entry.version += 1; + if entry.version == 0 { + entry.version += 2; + } + entry.state = EntryState::Active(v); + self.first_free = new_first_free; + self.num_entries += 1; + + Handle { + map_id: self.id, + version: entry.version, + index, + } + }; + self.debug_check_valid(); + result + } + + // Helper to contain the handle validation boilerplate. Returns `h.index as usize`. + fn check_handle(&self, h: Handle) -> Result<usize, HandleError> { + if h.map_id != self.id { + log::info!( + "HandleMap access with handle having wrong map id: {:?} (our map id is {})", + h, + self.id + ); + return Err(HandleError::WrongMap); + } + let index = h.index as usize; + if index >= self.entries.len() { + log::info!("HandleMap accessed with handle past end of map: {:?}", h); + return Err(HandleError::IndexPastEnd); + } + if self.entries[index].version != h.version { + log::info!( + "HandleMap accessed with handle with wrong version {:?} (entry version is {})", + h, + self.entries[index].version + ); + return Err(HandleError::StaleVersion); + } + // At this point, we know the handle version matches the entry version, + // but if someone created a specially invalid handle, they could have + // its version match the version they expect an unoccupied index to + // have. + // + // We don't use any unsafe, so the worse thing that can happen here is + // that we get confused and panic, but still that's not great, so we + // check for this explicitly. + // + // Note that `active` versions are always even, as they start at 1, and + // are incremented on both insertion and deletion. + // + // Anyway, this is just for sanity checking, we already check this in + // practice when we convert `u64`s into `Handle`s, which is the only + // way we ever use these in the real world. + if (h.version % 2) != 0 { + log::info!( + "HandleMap given handle with matching but illegal version: {:?}", + h, + ); + return Err(HandleError::StaleVersion); + } + Ok(index) + } + + /// Delete an item from the HandleMap. + pub fn delete(&mut self, h: Handle) -> Result<(), HandleError> { + self.remove(h).map(drop) + } + + /// Remove an item from the HandleMap, returning the old value. + pub fn remove(&mut self, h: Handle) -> Result<T, HandleError> { + let index = self.check_handle(h)?; + let prev = { + // Scoped mutable borrow of entry. + let entry = &mut self.entries[index]; + entry.version += 1; + let index = h.index; + let last_state = + std::mem::replace(&mut entry.state, EntryState::InFreeList(self.first_free)); + self.num_entries -= 1; + self.first_free = index; + + if let EntryState::Active(value) = last_state { + value + } else { + // This indicates either a bug in HandleMap or memory + // corruption. Abandon all hope. + unreachable!( + "Handle {:?} passed validation but references unoccupied entry", + h + ); + } + }; + self.debug_check_valid(); + Ok(prev) + } + + /// Get a reference to the item referenced by the handle, or return a + /// [`HandleError`] describing the problem. + pub fn get(&self, h: Handle) -> Result<&T, HandleError> { + let idx = self.check_handle(h)?; + let entry = &self.entries[idx]; + // This should be caught by check_handle above, but we avoid panicking + // because we'd rather not poison any locks we don't have to poison + let item = entry + .state + .get_item() + .ok_or_else(|| HandleError::InvalidHandle)?; + Ok(item) + } + + /// Get a mut reference to the item referenced by the handle, or return a + /// [`HandleError`] describing the problem. + pub fn get_mut(&mut self, h: Handle) -> Result<&mut T, HandleError> { + let idx = self.check_handle(h)?; + let entry = &mut self.entries[idx]; + // This should be caught by check_handle above, but we avoid panicking + // because we'd rather not poison any locks we don't have to poison + let item = entry + .state + .get_item_mut() + .ok_or_else(|| HandleError::InvalidHandle)?; + Ok(item) + } +} + +impl<T> Default for HandleMap<T> { + #[inline] + fn default() -> Self { + HandleMap::new() + } +} + +impl<T> ops::Index<Handle> for HandleMap<T> { + type Output = T; + #[inline] + fn index(&self, h: Handle) -> &T { + self.get(h) + .expect("Indexed into HandleMap with invalid handle!") + } +} + +// We don't implement IndexMut intentionally (implementing ops::Index is +// dubious enough) + +/// A Handle we allow to be returned over the FFI by implementing [`IntoFfi`]. +/// This type is intentionally not `#[repr(C)]`, and getting the data out of the +/// FFI is done using `Handle::from_u64`, or it's implemetation of `From<u64>`. +/// +/// It consists of, at a minimum: +/// +/// - A "map id" (used to ensure you're using it with the correct map) +/// - a "version" (incremented when the value in the index changes, used to +/// detect multiple frees, use after free, and ABA and ABA) +/// - and a field indicating which index it goes into. +/// +/// In practice, it may also contain extra information to help detect other +/// errors (currently it stores a "magic value" used to detect invalid +/// [`Handle`]s). +/// +/// These fields may change but the following guarantees are made about the +/// internal representation: +/// +/// - This will always be representable in 64 bits. +/// - The bits, when interpreted as a signed 64 bit integer, will be positive +/// (that is to say, it will *actually* be representable in 63 bits, since +/// this makes the most significant bit unavailable for the purposes of +/// encoding). This guarantee makes things slightly less dubious when passing +/// things to Java, gives us some extra validation ability, etc. +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct Handle { + map_id: u16, + version: u16, + index: u16, +} + +// We stuff this into the top 16 bits of the handle when u16 encoded to detect +// various sorts of weirdness. It's the letters 'A' and 'S' as ASCII, but the +// only important thing about it is that the most significant bit be unset. +const HANDLE_MAGIC: u16 = 0x4153_u16; + +impl Handle { + /// Convert a `Handle` to a `u64`. You can also use `Into::into` directly. + /// Most uses of this will be automatic due to our [`IntoFfi`] implementation. + #[inline] + pub fn into_u64(self) -> u64 { + let map_id = u64::from(self.map_id); + let version = u64::from(self.version); + let index = u64::from(self.index); + // SOMEDAY: we could also use this as a sort of CRC if we were really paranoid. + // e.g. `magic = combine_to_u16(map_id, version, index)`. + let magic = u64::from(HANDLE_MAGIC); + (magic << 48) | (map_id << 32) | (index << 16) | version + } + + /// Convert a `u64` to a `Handle`. Inverse of `into_u64`. We also implement + /// `From::from` (which will panic instead of returning Err). + /// + /// Returns [`HandleError::InvalidHandle`](HandleError) if the bits cannot + /// possibly represent a valid handle. + pub fn from_u64(v: u64) -> Result<Self, HandleError> { + if !Handle::is_valid(v) { + log::warn!("Illegal handle! {:x}", v); + if v == 0 { + Err(HandleError::NullHandle) + } else { + Err(HandleError::InvalidHandle) + } + } else { + let map_id = (v >> 32) as u16; + let index = (v >> 16) as u16; + let version = v as u16; + Ok(Self { + map_id, + version, + index, + }) + } + } + + /// Returns whether or not `v` makes a bit pattern that could represent an + /// encoded [`Handle`]. + #[inline] + pub fn is_valid(v: u64) -> bool { + (v >> 48) == u64::from(HANDLE_MAGIC) && + // The "bottom" field is the version. We increment it both when + // inserting and removing, and they're all initially 1. So, all valid + // handles that we returned should have an even version. + ((v & 1) == 0) + } +} + +impl From<u64> for Handle { + fn from(u: u64) -> Self { + Handle::from_u64(u).expect("Illegal handle!") + } +} + +impl From<Handle> for u64 { + #[inline] + fn from(h: Handle) -> u64 { + h.into_u64() + } +} + +unsafe impl IntoFfi for Handle { + type Value = u64; + // Note: intentionally does not encode a valid handle for any map. + #[inline] + fn ffi_default() -> u64 { + 0u64 + } + #[inline] + fn into_ffi_value(self) -> u64 { + self.into_u64() + } +} + +/// `ConcurrentHandleMap` is a relatively thin wrapper around +/// `RwLock<HandleMap<Mutex<T>>>`. Due to the nested locking, it's not possible +/// to implement the same API as [`HandleMap`], however it does implement an API +/// that offers equivalent functionality, as well as several functions that +/// greatly simplify FFI usage (see example below). +/// +/// See the [module level documentation](index.html) for more info. +/// +/// # Example +/// +/// ```rust,no_run +/// # #[macro_use] extern crate lazy_static; +/// # extern crate ffi_support; +/// # use ffi_support::*; +/// # use std::sync::*; +/// +/// // Somewhere... +/// struct Thing { value: f64 } +/// +/// lazy_static! { +/// static ref ITEMS: ConcurrentHandleMap<Thing> = ConcurrentHandleMap::new(); +/// } +/// +/// #[no_mangle] +/// pub extern "C" fn mylib_new_thing(value: f64, err: &mut ExternError) -> u64 { +/// // Most uses will be `ITEMS.insert_with_result`. Note that this already +/// // calls `call_with_output` (or `call_with_result` if this were +/// // `insert_with_result`) for you. +/// ITEMS.insert_with_output(err, || Thing { value }) +/// } +/// +/// #[no_mangle] +/// pub extern "C" fn mylib_thing_value(h: u64, err: &mut ExternError) -> f64 { +/// // Or `ITEMS.call_with_result` for the fallible functions. +/// ITEMS.call_with_output(err, h, |thing| thing.value) +/// } +/// +/// #[no_mangle] +/// pub extern "C" fn mylib_thing_set_value(h: u64, new_value: f64, err: &mut ExternError) { +/// ITEMS.call_with_output_mut(err, h, |thing| { +/// thing.value = new_value; +/// }) +/// } +/// +/// // Note: defines the following function: +/// // pub extern "C" fn mylib_destroy_thing(h: u64, err: &mut ExternError) +/// define_handle_map_deleter!(ITEMS, mylib_destroy_thing); +/// ``` +pub struct ConcurrentHandleMap<T> { + /// The underlying map. Public so that more advanced use-cases + /// may use it as they please. + pub map: RwLock<HandleMap<Mutex<T>>>, +} + +impl<T> ConcurrentHandleMap<T> { + /// Construct a new `ConcurrentHandleMap`. + pub fn new() -> Self { + Self { + map: RwLock::new(HandleMap::new()), + } + } + + /// Get the number of entries in the `ConcurrentHandleMap`. + /// + /// This takes the map's `read` lock. + #[inline] + pub fn len(&self) -> usize { + let map = self.map.read().unwrap(); + map.len() + } + + /// Returns true if the `ConcurrentHandleMap` is empty. + /// + /// This takes the map's `read` lock. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Insert an item into the map, returning the newly allocated handle to the + /// item. + /// + /// # Locking + /// + /// Note that this requires taking the map's write lock, and so it will + /// block until all other threads have finished any read/write operations. + pub fn insert(&self, v: T) -> Handle { + // Fails if the lock is poisoned. Not clear what we should do here... We + // could always insert anyway (by matching on LockResult), but that + // seems... really quite dubious. + let mut map = self.map.write().unwrap(); + map.insert(Mutex::new(v)) + } + + /// Remove an item from the map. + /// + /// # Locking + /// + /// Note that this requires taking the map's write lock, and so it will + /// block until all other threads have finished any read/write operations. + pub fn delete(&self, h: Handle) -> Result<(), HandleError> { + // We use `remove` and not delete (and use the inner block) to ensure + // that if `v`'s destructor panics, we aren't holding the write lock + // when it happens, so that the map itself doesn't get poisoned. + let v = { + let mut map = self.map.write().unwrap(); + map.remove(h) + }; + v.map(drop) + } + + /// Convenient wrapper for `delete` which takes a `u64` that it will + /// convert to a handle. + /// + /// The main benefit (besides convenience) of this over the version + /// that takes a [`Handle`] is that it allows handling handle-related errors + /// in one place. + pub fn delete_u64(&self, h: u64) -> Result<(), HandleError> { + self.delete(Handle::from_u64(h)?) + } + + /// Remove an item from the map, returning either the item, + /// or None if its guard mutex got poisoned at some point. + /// + /// # Locking + /// + /// Note that this requires taking the map's write lock, and so it will + /// block until all other threads have finished any read/write operations. + pub fn remove(&self, h: Handle) -> Result<Option<T>, HandleError> { + let mut map = self.map.write().unwrap(); + let mutex = map.remove(h)?; + Ok(mutex.into_inner().ok()) + } + + /// Convenient wrapper for `remove` which takes a `u64` that it will + /// convert to a handle. + /// + /// The main benefit (besides convenience) of this over the version + /// that takes a [`Handle`] is that it allows handling handle-related errors + /// in one place. + pub fn remove_u64(&self, h: u64) -> Result<Option<T>, HandleError> { + self.remove(Handle::from_u64(h)?) + } + + /// Call `callback` with a non-mutable reference to the item from the map, + /// after acquiring the necessary locks. + /// + /// # Locking + /// + /// Note that this requires taking both: + /// + /// - The map's read lock, and so it will block until all other threads have + /// finished any write operations. + /// - The mutex on the slot the handle is mapped to. + /// + /// And so it will block if there are ongoing write operations, or if + /// another thread is reading from the same handle. + /// + /// # Panics + /// + /// This will panic if a previous `get()` or `get_mut()` call has panicked + /// inside it's callback. The solution to this + /// + /// (It may also panic if the handle map detects internal state corruption, + /// however this should not happen except for bugs in the handle map code). + pub fn get<F, E, R>(&self, h: Handle, callback: F) -> Result<R, E> + where + F: FnOnce(&T) -> Result<R, E>, + E: From<HandleError>, + { + self.get_mut(h, |v| callback(v)) + } + + /// Call `callback` with a mutable reference to the item from the map, after + /// acquiring the necessary locks. + /// + /// # Locking + /// + /// Note that this requires taking both: + /// + /// - The map's read lock, and so it will block until all other threads have + /// finished any write operations. + /// - The mutex on the slot the handle is mapped to. + /// + /// And so it will block if there are ongoing write operations, or if + /// another thread is reading from the same handle. + /// + /// # Panics + /// + /// This will panic if a previous `get()` or `get_mut()` call has panicked + /// inside it's callback. The only solution to this is to remove and reinsert + /// said item. + /// + /// (It may also panic if the handle map detects internal state corruption, + /// however this should not happen except for bugs in the handle map code). + pub fn get_mut<F, E, R>(&self, h: Handle, callback: F) -> Result<R, E> + where + F: FnOnce(&mut T) -> Result<R, E>, + E: From<HandleError>, + { + // XXX figure out how to handle poison... + let map = self.map.read().unwrap(); + let mtx = map.get(h)?; + let mut hm = mtx.lock().unwrap(); + callback(&mut *hm) + } + + /// Convenient wrapper for `get` which takes a `u64` that it will convert to + /// a handle. + /// + /// The other benefit (besides convenience) of this over the version + /// that takes a [`Handle`] is that it allows handling handle-related errors + /// in one place. + /// + /// # Locking + /// + /// Note that this requires taking both: + /// + /// - The map's read lock, and so it will block until all other threads have + /// finished any write operations. + /// - The mutex on the slot the handle is mapped to. + /// + /// And so it will block if there are ongoing write operations, or if + /// another thread is reading from the same handle. + pub fn get_u64<F, E, R>(&self, u: u64, callback: F) -> Result<R, E> + where + F: FnOnce(&T) -> Result<R, E>, + E: From<HandleError>, + { + self.get(Handle::from_u64(u)?, callback) + } + + /// Convenient wrapper for [`Self::get_mut`] which takes a `u64` that it will + /// convert to a handle. + /// + /// The main benefit (besides convenience) of this over the version + /// that takes a [`Handle`] is that it allows handling handle-related errors + /// in one place. + /// + /// # Locking + /// + /// Note that this requires taking both: + /// + /// - The map's read lock, and so it will block until all other threads have + /// finished any write operations. + /// - The mutex on the slot the handle is mapped to. + /// + /// And so it will block if there are ongoing write operations, or if + /// another thread is reading from the same handle. + pub fn get_mut_u64<F, E, R>(&self, u: u64, callback: F) -> Result<R, E> + where + F: FnOnce(&mut T) -> Result<R, E>, + E: From<HandleError>, + { + self.get_mut(Handle::from_u64(u)?, callback) + } + + /// Helper that performs both a + /// [`call_with_result`][crate::call_with_result] and + /// [`get`](ConcurrentHandleMap::get_mut). + pub fn call_with_result_mut<R, E, F>( + &self, + out_error: &mut ExternError, + h: u64, + callback: F, + ) -> R::Value + where + F: std::panic::UnwindSafe + FnOnce(&mut T) -> Result<R, E>, + ExternError: From<E>, + R: IntoFfi, + { + use crate::call_with_result; + call_with_result(out_error, || -> Result<_, ExternError> { + // We can't reuse get_mut here because it would require E: + // From<HandleError>, which is inconvenient... + let h = Handle::from_u64(h)?; + let map = self.map.read().unwrap(); + let mtx = map.get(h)?; + let mut hm = mtx.lock().unwrap(); + Ok(callback(&mut *hm)?) + }) + } + + /// Helper that performs both a + /// [`call_with_result`][crate::call_with_result] and + /// [`get`](ConcurrentHandleMap::get). + pub fn call_with_result<R, E, F>( + &self, + out_error: &mut ExternError, + h: u64, + callback: F, + ) -> R::Value + where + F: std::panic::UnwindSafe + FnOnce(&T) -> Result<R, E>, + ExternError: From<E>, + R: IntoFfi, + { + self.call_with_result_mut(out_error, h, |r| callback(r)) + } + + /// Helper that performs both a + /// [`call_with_output`][crate::call_with_output] and + /// [`get`](ConcurrentHandleMap::get). + pub fn call_with_output<R, F>( + &self, + out_error: &mut ExternError, + h: u64, + callback: F, + ) -> R::Value + where + F: std::panic::UnwindSafe + FnOnce(&T) -> R, + R: IntoFfi, + { + self.call_with_result(out_error, h, |r| -> Result<_, HandleError> { + Ok(callback(r)) + }) + } + + /// Helper that performs both a + /// [`call_with_output`][crate::call_with_output] and + /// [`get_mut`](ConcurrentHandleMap::get). + pub fn call_with_output_mut<R, F>( + &self, + out_error: &mut ExternError, + h: u64, + callback: F, + ) -> R::Value + where + F: std::panic::UnwindSafe + FnOnce(&mut T) -> R, + R: IntoFfi, + { + self.call_with_result_mut(out_error, h, |r| -> Result<_, HandleError> { + Ok(callback(r)) + }) + } + + /// Use `constructor` to create and insert a `T`, while inside a + /// [`call_with_result`][crate::call_with_result] call (to handle panics and + /// map errors onto an [`ExternError`][crate::ExternError]). + pub fn insert_with_result<E, F>(&self, out_error: &mut ExternError, constructor: F) -> u64 + where + F: std::panic::UnwindSafe + FnOnce() -> Result<T, E>, + ExternError: From<E>, + { + use crate::call_with_result; + call_with_result(out_error, || -> Result<_, ExternError> { + // Note: it's important that we don't call the constructor while + // we're holding the write lock, because we don't want to poison + // the entire map if it panics! + let to_insert = constructor()?; + Ok(self.insert(to_insert)) + }) + } + + /// Equivalent to + /// [`insert_with_result`](ConcurrentHandleMap::insert_with_result) for the + /// case where the constructor cannot produce an error. + /// + /// The name is somewhat dubious, since there's no `output`, but it's + /// intended to make it clear that it contains a + /// [`call_with_output`][crate::call_with_output] internally. + pub fn insert_with_output<F>(&self, out_error: &mut ExternError, constructor: F) -> u64 + where + F: std::panic::UnwindSafe + FnOnce() -> T, + { + // The Err type isn't important here beyond being convertable to ExternError + self.insert_with_result(out_error, || -> Result<_, HandleError> { + Ok(constructor()) + }) + } +} + +impl<T> Default for ConcurrentHandleMap<T> { + #[inline] + fn default() -> Self { + Self::new() + } +} + +// Returns the next map_id. +fn next_handle_map_id() -> u16 { + let id = HANDLE_MAP_ID_COUNTER + .fetch_add(1, Ordering::SeqCst) + .wrapping_add(1); + id as u16 +} + +// Note: These IDs are only used to detect using a key against the wrong HandleMap. +// We ensure they're randomly initialized, to prevent using them across separately +// compiled .so files. +lazy_static::lazy_static! { + // This should be `AtomicU16`, but those aren't stablilized yet. + // Instead, we just cast to u16 on read. + static ref HANDLE_MAP_ID_COUNTER: AtomicUsize = { + // Abuse HashMap's RandomState to get a strong RNG without bringing in + // the `rand` crate (OTOH maybe we should just bring in the rand crate?) + use std::collections::hash_map::RandomState; + use std::hash::{BuildHasher, Hasher}; + let init = RandomState::new().build_hasher().finish() as usize; + AtomicUsize::new(init) + }; +} + +#[cfg(test)] +mod test { + use super::*; + + #[derive(PartialEq, Debug)] + pub(super) struct Foobar(usize); + + #[test] + fn test_invalid_handle() { + assert_eq!(Handle::from_u64(0), Err(HandleError::NullHandle)); + // Valid except `version` is odd + assert_eq!( + Handle::from_u64((u64::from(HANDLE_MAGIC) << 48) | 0x1234_0012_0001), + Err(HandleError::InvalidHandle) + ); + + assert_eq!( + Handle::from_u64((u64::from(HANDLE_MAGIC) << 48) | 0x1234_0012_0002), + Ok(Handle { + version: 0x0002, + index: 0x0012, + map_id: 0x1234, + }) + ); + } + + #[test] + fn test_correct_value_single() { + let mut map = HandleMap::new(); + let handle = map.insert(Foobar(1234)); + assert_eq!(map.get(handle).unwrap(), &Foobar(1234)); + map.delete(handle).unwrap(); + assert_eq!(map.get(handle), Err(HandleError::StaleVersion)); + } + + #[test] + fn test_correct_value_multiple() { + let mut map = HandleMap::new(); + let handle1 = map.insert(Foobar(1234)); + let handle2 = map.insert(Foobar(4321)); + assert_eq!(map.get(handle1).unwrap(), &Foobar(1234)); + assert_eq!(map.get(handle2).unwrap(), &Foobar(4321)); + map.delete(handle1).unwrap(); + assert_eq!(map.get(handle1), Err(HandleError::StaleVersion)); + assert_eq!(map.get(handle2).unwrap(), &Foobar(4321)); + } + + #[test] + fn test_wrong_map() { + let mut map1 = HandleMap::new(); + let mut map2 = HandleMap::new(); + + let handle1 = map1.insert(Foobar(1234)); + let handle2 = map2.insert(Foobar(1234)); + + assert_eq!(map1.get(handle1).unwrap(), &Foobar(1234)); + assert_eq!(map2.get(handle2).unwrap(), &Foobar(1234)); + + assert_eq!(map1.get(handle2), Err(HandleError::WrongMap)); + assert_eq!(map2.get(handle1), Err(HandleError::WrongMap)); + } + + #[test] + fn test_bad_index() { + let map: HandleMap<Foobar> = HandleMap::new(); + assert_eq!( + map.get(Handle { + map_id: map.id, + version: 2, + index: 100 + }), + Err(HandleError::IndexPastEnd) + ); + } + + #[test] + fn test_resizing() { + let mut map = HandleMap::new(); + let mut handles = vec![]; + for i in 0..1000 { + handles.push(map.insert(Foobar(i))) + } + for (i, &h) in handles.iter().enumerate() { + assert_eq!(map.get(h).unwrap(), &Foobar(i)); + assert_eq!(map.remove(h).unwrap(), Foobar(i)); + } + let mut handles2 = vec![]; + for i in 1000..2000 { + // Not really related to this test, but it's convenient to check this here. + let h = map.insert(Foobar(i)); + let hu = h.into_u64(); + assert_eq!(Handle::from_u64(hu).unwrap(), h); + handles2.push(hu); + } + + for (i, (&h0, h1u)) in handles.iter().zip(handles2).enumerate() { + // It's still a stale version, even though the slot is occupied again. + assert_eq!(map.get(h0), Err(HandleError::StaleVersion)); + let h1 = Handle::from_u64(h1u).unwrap(); + assert_eq!(map.get(h1).unwrap(), &Foobar(i + 1000)); + } + } + + /// Tests that check our behavior when panicing. + /// + /// Naturally these require panic=unwind, which means we can't run them when + /// generating coverage (well, `-Zprofile`-based coverage can't -- although + /// ptrace-based coverage like tarpaulin can), and so we turn them off. + /// + /// (For clarity, `cfg(coverage)` is not a standard thing. We add it in + /// `automation/emit_coverage_info.sh`, and you can force it by adding + /// "--cfg coverage" to your RUSTFLAGS manually if you need to do so). + #[cfg(not(coverage))] + mod panic_tests { + use super::*; + + struct PanicOnDrop(()); + impl Drop for PanicOnDrop { + fn drop(&mut self) { + panic!("intentional panic (drop)"); + } + } + + #[test] + fn test_panicking_drop() { + let map = ConcurrentHandleMap::new(); + let h = map.insert(PanicOnDrop(())).into_u64(); + let mut e = ExternError::success(); + crate::call_with_result(&mut e, || map.delete_u64(h)); + assert_eq!(e.get_code(), crate::ErrorCode::PANIC); + let _ = unsafe { e.get_and_consume_message() }; + assert!(!map.map.is_poisoned()); + let inner = map.map.read().unwrap(); + inner.assert_valid(); + assert_eq!(inner.len(), 0); + } + + #[test] + fn test_panicking_call_with() { + let map = ConcurrentHandleMap::new(); + let h = map.insert(Foobar(0)).into_u64(); + let mut e = ExternError::success(); + map.call_with_output(&mut e, h, |_thing| { + panic!("intentional panic (call_with_output)"); + }); + + assert_eq!(e.get_code(), crate::ErrorCode::PANIC); + let _ = unsafe { e.get_and_consume_message() }; + + { + assert!(!map.map.is_poisoned()); + let inner = map.map.read().unwrap(); + inner.assert_valid(); + assert_eq!(inner.len(), 1); + let mut seen = false; + for e in &inner.entries { + if let EntryState::Active(v) = &e.state { + assert!(!seen); + assert!(v.is_poisoned()); + seen = true; + } + } + } + assert!(map.delete_u64(h).is_ok()); + assert!(!map.map.is_poisoned()); + let inner = map.map.read().unwrap(); + inner.assert_valid(); + assert_eq!(inner.len(), 0); + } + + #[test] + fn test_panicking_insert_with() { + let map = ConcurrentHandleMap::new(); + let mut e = ExternError::success(); + let res = map.insert_with_output(&mut e, || { + panic!("intentional panic (insert_with_output)"); + }); + + assert_eq!(e.get_code(), crate::ErrorCode::PANIC); + let _ = unsafe { e.get_and_consume_message() }; + + assert_eq!(res, 0); + + assert!(!map.map.is_poisoned()); + let inner = map.map.read().unwrap(); + inner.assert_valid(); + assert_eq!(inner.len(), 0); + } + } +} diff --git a/third_party/rust/ffi-support/src/into_ffi.rs b/third_party/rust/ffi-support/src/into_ffi.rs new file mode 100644 index 0000000000..96c2037e71 --- /dev/null +++ b/third_party/rust/ffi-support/src/into_ffi.rs @@ -0,0 +1,287 @@ +/* Copyright 2018-2019 Mozilla Foundation + * + * Licensed under the Apache License (Version 2.0), or the MIT license, + * (the "Licenses") at your option. You may not use this file except in + * compliance with one of the Licenses. You may obtain copies of the + * Licenses at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * http://opensource.org/licenses/MIT + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licenses is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licenses for the specific language governing permissions and + * limitations under the Licenses. */ + +use crate::string::*; +use std::os::raw::{c_char, c_void}; +use std::ptr; + +/// This trait is used to return types over the FFI. It essentially is a mapping between a type and +/// version of that type we can pass back to C (`IntoFfi::Value`). +/// +/// The main wrinkle is that we need to be able to pass a value back to C in both the success and +/// error cases. In the error cases, we don't want there to need to be any cleanup for the foreign +/// code to do, and we want the API to be relatively easy to use. +/// +/// Additionally, the mapping is not consistent for different types. For some rust types, we want to +/// convert them to JSON. For some, we want to return an opaque `*mut T` handle. For others, +/// we'd like to return by value. +/// +/// This trait supports those cases by adding some type-level indirection, and allowing both cases +/// to be provided (both cases what is done in the error and success cases). +/// +/// We implement this for the following types: +/// +/// - `String`, by conversion to `*mut c_char`. Note that the caller (on the other side of the FFI) +/// is expected to free this, so you will need to provide them with a destructor for strings, +/// which can be done with the [`define_string_destructor!`] macro. +/// +/// - `()`: as a no-op conversion -- this just allows us to expose functions without a return type +/// over the FFI. +/// +/// - `bool`: is implemented by conversion to `u8` (`0u8` is `false`, `1u8` is `true`, and +/// `ffi_default()` is `false`). This is because it doesn't seem to be safe to pass over the FFI +/// directly (or at least, doing so might hit a bug in JNA). +/// +/// - All numeric primitives except `isize`, `usize`, `char`, `i128`, and `u128` are implememented +/// by passing directly through (and using `Default::default()` for `ffi_default()`). +/// - `isize`, `usize` could be added, but they'd be quite easy to accidentally misuse, so we +/// currently omit them. +/// - `char` is less easy to misuse, but it's also less clear why you'd want to be doing this. +/// If we did ever add this, we'd probably want to convert to a `u32` (similar to how we +/// convert `bool` to `u8`) for better ABI stability. +/// - `i128` and `u128` do not have a stable ABI, so they cannot be returned across the FFI. +/// +/// - `Option<T>` where `T` is `IntoFfi`, by returning `IntoFfi::ffi_default()` for `None`. +/// +/// None of these are directly helpful for user types though, so macros are provided for the +/// following cases: +/// +/// 1. For types which are passed around by an opaque pointer, the macro +/// [`implement_into_ffi_by_pointer!`] is provided. +/// +/// 2. For types which should be returned as a JSON string, the macro +/// [`implement_into_ffi_by_json!`] is provided. +/// +/// See the "Examples" section below for some other cases, such as returning by value. +/// +/// ## Safety +/// +/// This is an unsafe trait (implementing it requires `unsafe impl`). This is because we cannot +/// guarantee that your type is safe to pass to C. The helpers we've provided as macros should be +/// safe to use, and in the cases where a common pattern can't be done both safely and generically, +/// we've opted not to provide a macro for it. That said, many of these cases are still safe if you +/// meet some relatively basic requirements, see below for examples. +/// +/// ## Examples +/// +/// ### Returning types by value +/// +/// If you want to return a type by value, we don't provide a macro for this, primarially because +/// doing so cannot be statically guarantee that it is safe. However, it *is* safe for the cases +/// where the type is either `#[repr(C)]` or `#[repr(transparent)]`. If this doesn't hold, you will +/// want to use a different option! +/// +/// Regardless, if this holds, it's fairly simple to implement, for example: +/// +/// ```rust +/// # use ffi_support::IntoFfi; +/// #[derive(Default)] +/// #[repr(C)] +/// pub struct Point { +/// pub x: i32, +/// pub y: i32, +/// } +/// +/// unsafe impl IntoFfi for Point { +/// type Value = Self; +/// #[inline] fn ffi_default() -> Self { Default::default() } +/// #[inline] fn into_ffi_value(self) -> Self { self } +/// } +/// ``` +/// +/// ### Conversion to another type (which is returned over the FFI) +/// +/// In the FxA FFI, we used to have a `SyncKeys` type, which was converted to a different type before +/// returning over the FFI. (The real FxA FFI is a little different, and more complex, but this is +/// relatively close, and more widely recommendable than the one the FxA FFI uses): +/// +/// This is fairly easy to do by performing the conversion inside `IntoFfi`. +/// +/// ```rust +/// # use ffi_support::{self, IntoFfi}; +/// # use std::{ptr, os::raw::c_char}; +/// pub struct SyncKeys(pub String, pub String); +/// +/// #[repr(C)] +/// pub struct SyncKeysC { +/// pub sync_key: *mut c_char, +/// pub xcs: *mut c_char, +/// } +/// +/// unsafe impl IntoFfi for SyncKeys { +/// type Value = SyncKeysC; +/// #[inline] +/// fn ffi_default() -> SyncKeysC { +/// SyncKeysC { +/// sync_key: ptr::null_mut(), +/// xcs: ptr::null_mut(), +/// } +/// } +/// +/// #[inline] +/// fn into_ffi_value(self) -> SyncKeysC { +/// SyncKeysC { +/// sync_key: ffi_support::rust_string_to_c(self.0), +/// xcs: ffi_support::rust_string_to_c(self.1), +/// } +/// } +/// } +/// +/// // Note: this type manages memory, so you still will want to expose a destructor for this, +/// // and possibly implement Drop as well. +/// ``` +pub unsafe trait IntoFfi: Sized { + /// This type must be: + /// + /// 1. Compatible with C, which is to say `#[repr(C)]`, a numeric primitive, + /// another type that has guarantees made about it's layout, or a + /// `#[repr(transparent)]` wrapper around one of those. + /// + /// One could even use `&T`, so long as `T: Sized`, although it's + /// extremely dubious to return a reference to borrowed memory over the + /// FFI, since it's very difficult for the caller to know how long it + /// remains valid. + /// + /// 2. Capable of storing an empty/ignorable/default value. + /// + /// 3. Capable of storing the actual value. + /// + /// Valid examples include: + /// + /// - Primitive numbers (other than i128/u128) + /// + /// - #[repr(C)] structs containing only things on this list. + /// + /// - `Option<Box<T>>`, but only if `T` is `Sized`. (Internally this is + /// guaranteed to be represented equivalently to a pointer) + /// + /// - Raw pointers such as `*const T`, and `*mut T`, but again, only if `T` + /// is `Sized` (`*const [T]`, `*mut dyn SomeTrait` etc are not valid). + /// + /// - Enums with a fixed `repr`, although it's a good idea avoid + /// `#[repr(C)]` enums in favor of, say, `#[repr(i32)]` (for example, any + /// fixed type there should be fine), as it's potentially error prone to + /// access `#[repr(C)]` enums from Android over JNA (it's only safe if C's + /// `sizeof(int) == 4`, which is very common, but not universally true). + /// + /// - `&T`/`&mut T` where `T: Sized` but only if you really know what you're + /// doing, because this is probably a mistake. + /// + /// Invalid examples include things like `&str`, `&[T]`, `String`, `Vec<T>`, + /// `std::ffi::CString`, `&std::ffi::CStr`, etc. + type Value; + + /// Return an 'empty' value. This is what's passed back to C in the case of an error, + /// so it doesn't actually need to be "empty", so much as "ignorable". Note that this + /// is also used when an empty `Option<T>` is returned. + fn ffi_default() -> Self::Value; + + /// Convert ourselves into a value we can pass back to C with confidence. + fn into_ffi_value(self) -> Self::Value; +} + +unsafe impl IntoFfi for String { + type Value = *mut c_char; + + #[inline] + fn ffi_default() -> Self::Value { + ptr::null_mut() + } + + #[inline] + fn into_ffi_value(self) -> Self::Value { + rust_string_to_c(self) + } +} + +// Implement IntoFfi for Option<T> by falling back to ffi_default for None. +unsafe impl<T: IntoFfi> IntoFfi for Option<T> { + type Value = <T as IntoFfi>::Value; + + #[inline] + fn ffi_default() -> Self::Value { + T::ffi_default() + } + + #[inline] + fn into_ffi_value(self) -> Self::Value { + if let Some(s) = self { + s.into_ffi_value() + } else { + T::ffi_default() + } + } +} + +// We've had problems in the past returning booleans over the FFI (specifically to JNA), and so +// we convert them to `u8`. +unsafe impl IntoFfi for bool { + type Value = u8; + #[inline] + fn ffi_default() -> Self::Value { + 0u8 + } + #[inline] + fn into_ffi_value(self) -> Self::Value { + self as u8 + } +} + +unsafe impl IntoFfi for crate::ByteBuffer { + type Value = crate::ByteBuffer; + #[inline] + fn ffi_default() -> Self::Value { + crate::ByteBuffer::default() + } + #[inline] + fn into_ffi_value(self) -> Self::Value { + self + } +} + +// just cuts down on boilerplate. Not public. +macro_rules! impl_into_ffi_for_primitive { + ($($T:ty),+) => {$( + unsafe impl IntoFfi for $T { + type Value = Self; + #[inline] fn ffi_default() -> Self { Default::default() } + #[inline] fn into_ffi_value(self) -> Self { self } + } + )+} +} + +// See IntoFfi docs for why this is not exhaustive +impl_into_ffi_for_primitive![(), i8, u8, i16, u16, i32, u32, i64, u64, f32, f64]; + +// just cuts down on boilerplate. Not public. +macro_rules! impl_into_ffi_for_pointer { + ($($T:ty),+) => {$( + unsafe impl IntoFfi for $T { + type Value = Self; + #[inline] fn ffi_default() -> Self { ptr::null_mut() } + #[inline] fn into_ffi_value(self) -> Self { self } + } + )+} +} + +impl_into_ffi_for_pointer![ + *mut i8, + *const i8, + *mut u8, + *const u8, + *mut c_void, + *const c_void +]; diff --git a/third_party/rust/ffi-support/src/lib.rs b/third_party/rust/ffi-support/src/lib.rs new file mode 100644 index 0000000000..cf50f0cd0d --- /dev/null +++ b/third_party/rust/ffi-support/src/lib.rs @@ -0,0 +1,625 @@ +/* Copyright 2018-2019 Mozilla Foundation + * + * Licensed under the Apache License (Version 2.0), or the MIT license, + * (the "Licenses") at your option. You may not use this file except in + * compliance with one of the Licenses. You may obtain copies of the + * Licenses at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * http://opensource.org/licenses/MIT + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licenses is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licenses for the specific language governing permissions and + * limitations under the Licenses. */ + +#![deny(missing_docs)] +#![allow(unknown_lints)] +#![warn(rust_2018_idioms)] + +//! # FFI Support +//! +//! This crate implements a support library to simplify implementing the patterns that the +//! `mozilla/application-services` repository uses for it's "Rust Component" FFI libraries. +//! +//! It is *strongly encouraged* that anybody writing FFI code in this repository read this +//! documentation before doing so, as it is a subtle, difficult, and error prone process. +//! +//! ## Terminology +//! +//! For each library, there are currently three parts we're concerned with. There's no clear correct +//! name for these, so this documentation will attempt to use the following terminology: +//! +//! - **Rust Component**: A Rust crate which does not expose an FFI directly, but may be may be +//! wrapped by one that does. These have a `crate-type` in their Cargo.toml (see +//! https://doc.rust-lang.org/reference/linkage.html) of `lib`, and not `staticlib` or `cdylib` +//! (Note that `lib` is the default if `crate-type` is not specified). Examples include the +//! `fxa-client`, and `logins` crates. +//! +//! - **FFI Component**: A wrapper crate that takes a Rust component, and exposes an FFI from it. +//! These typically have `ffi` in the name, and have `crate-type = ["lib", "staticlib", "cdylib"]` +//! in their Cargo.toml. For example, the `fxa-client/ffi` and `logins/ffi` crates (note: +//! paths are subject to change). When built, these produce a native library that is consumed by +//! the "FFI Consumer". +//! +//! - **FFI Consumer**: This is a low level library, typically implemented in Kotlin (for Android) +//! or Swift (for iOS), that exposes a memory-safe wrapper around the memory-unsafe C API produced +//! by the FFI component. It's expected that the maintainers of the FFI Component and FFI Consumer +//! be the same (or at least, the author of the consumer should be completely comfortable with the +//! API exposed by, and code in the FFI component), since the code in these is extremely tightly +//! coupled, and very easy to get wrong. +//! +//! Note that while there are three parts, there may be more than three libraries relevant here, for +//! example there may be more than one FFI consumer (one for Android, one for iOS). +//! +//! ## Usage +//! +//! This library will typically be used in both the Rust component, and the FFI component, however +//! it frequently will be an optional dependency in the Rust component that's only available when a +//! feature flag (which the FFI component will always require) is used. +//! +//! The reason it's required inside the Rust component (and not solely in the FFI component, which +//! would be nice), is so that types provided by that crate may implement the traits provided by +//! this crate (this is because Rust does not allow crate `C` to implement a trait defined in crate +//! `A` for a type defined in crate `B`). +//! +//! In general, examples should be provided for the most important types and functions +//! ([`call_with_result`], [`IntoFfi`], +//! [`ExternError`], etc), but you should also look at the code of +//! consumers of this library. +//! +//! ### Usage in the Rust Component +//! +//! Inside the Rust component, you will implement: +//! +//! 1. [`IntoFfi`] for all types defined in that crate that you want to return +//! over the FFI. For most common cases, the [`implement_into_ffi_by_json!`] and +//! [`implement_into_ffi_by_protobuf!`] macros will do the job here, however you +//! can see that trait's documentation for discussion and examples of +//! implementing it manually. +//! +//! 2. Conversion to [`ExternError`] for the error type(s) exposed by that +//! rust component, that is, `impl From<MyError> for ExternError`. +//! +//! ### Usage in the FFI Component +//! +//! Inside the FFI component, you will use this library in a few ways: +//! +//! 1. Destructors will be exposed for each types that had [`implement_into_ffi_by_pointer!`] called +//! on it (using [`define_box_destructor!`]), and a destructor for strings should be exposed as +//! well, using [`define_string_destructor`] +//! +//! 2. The body of every / nearly every FFI function will be wrapped in either a +//! [`call_with_result`] or [`call_with_output`]. +//! +//! This is required because if we `panic!` (e.g. from an `assert!`, `unwrap()`, `expect()`, from +//! indexing past the end of an array, etc) across the FFI boundary, the behavior is undefined +//! and in practice very weird things tend to happen (we aren't caught by the caller, since they +//! don't have the same exception behavior as us). +//! +//! If you don't think your program (or possibly just certain calls) can handle panics, you may +//! also use the versions of these functions in the [`abort_on_panic`] module, which +//! do as their name suggest. +//! +//! Additionally, c strings that are passed in as arguments may be represented using [`FfiStr`], +//! which contains several helpful inherent methods for extracting their data. +//! + +use std::{panic, thread}; + +mod error; +mod ffistr; +pub mod handle_map; +mod into_ffi; +#[macro_use] +mod macros; +mod string; + +pub use crate::error::*; +pub use crate::ffistr::FfiStr; +pub use crate::into_ffi::*; +pub use crate::macros::*; +pub use crate::string::*; + +// We export most of the types from this, but some constants +// (MAX_CAPACITY) don't make sense at the top level. +pub use crate::handle_map::{ConcurrentHandleMap, Handle, HandleError, HandleMap}; + +/// Call a callback that returns a `Result<T, E>` while: +/// +/// - Catching panics, and reporting them to C via [`ExternError`]. +/// - Converting `T` to a C-compatible type using [`IntoFfi`]. +/// - Converting `E` to a C-compatible error via `Into<ExternError>`. +/// +/// This (or [`call_with_output`]) should be in the majority of the FFI functions, see the crate +/// top-level docs for more info. +/// +/// If your function doesn't produce an error, you may use [`call_with_output`] instead, which +/// doesn't require you return a Result. +/// +/// ## Example +/// +/// A few points about the following example: +/// +/// - We need to mark it as `#[no_mangle] pub extern "C"`. +/// +/// - We prefix it with a unique name for the library (e.g. `mylib_`). Foreign functions are not +/// namespaced, and symbol collisions can cause a large number of problems and subtle bugs, +/// including memory safety issues in some cases. +/// +/// ```rust,no_run +/// # use ffi_support::{ExternError, ErrorCode, FfiStr}; +/// # use std::os::raw::c_char; +/// +/// # #[derive(Debug)] +/// # struct BadEmptyString; +/// # impl From<BadEmptyString> for ExternError { +/// # fn from(e: BadEmptyString) -> Self { +/// # ExternError::new_error(ErrorCode::new(1), "Bad empty string") +/// # } +/// # } +/// +/// #[no_mangle] +/// pub extern "C" fn mylib_print_string( +/// // Strings come in as an `FfiStr`, which is a wrapper around a null terminated C string. +/// thing_to_print: FfiStr<'_>, +/// // Note that taking `&mut T` and `&T` is both allowed and encouraged, so long as `T: Sized`, +/// // (e.g. it can't be a trait object, `&[T]`, a `&str`, etc). Also note that `Option<&T>` and +/// // `Option<&mut T>` are also allowed, if you expect the caller to sometimes pass in null, but +/// // that's the only case when it's currently to use `Option` in an argument list like this). +/// error: &mut ExternError +/// ) { +/// // You should try to to do as little as possible outside the call_with_result, +/// // to avoid a case where a panic occurs. +/// ffi_support::call_with_result(error, || { +/// let s = thing_to_print.as_str(); +/// if s.is_empty() { +/// // This is a silly example! +/// return Err(BadEmptyString); +/// } +/// println!("{}", s); +/// Ok(()) +/// }) +/// } +/// ``` +pub fn call_with_result<R, E, F>(out_error: &mut ExternError, callback: F) -> R::Value +where + F: panic::UnwindSafe + FnOnce() -> Result<R, E>, + E: Into<ExternError>, + R: IntoFfi, +{ + call_with_result_impl(out_error, callback) +} + +/// Call a callback that returns a `T` while: +/// +/// - Catching panics, and reporting them to C via [`ExternError`] +/// - Converting `T` to a C-compatible type using [`IntoFfi`] +/// +/// Note that you still need to provide an [`ExternError`] to this function, to report panics. +/// +/// See [`call_with_result`] if you'd like to return a `Result<T, E>` (Note: `E` must +/// be convertible to [`ExternError`]). +/// +/// This (or [`call_with_result`]) should be in the majority of the FFI functions, see +/// the crate top-level docs for more info. +pub fn call_with_output<R, F>(out_error: &mut ExternError, callback: F) -> R::Value +where + F: panic::UnwindSafe + FnOnce() -> R, + R: IntoFfi, +{ + // We need something that's `Into<ExternError>`, even though we never return it, so just use + // `ExternError` itself. + call_with_result(out_error, || -> Result<_, ExternError> { Ok(callback()) }) +} + +fn call_with_result_impl<R, E, F>(out_error: &mut ExternError, callback: F) -> R::Value +where + F: panic::UnwindSafe + FnOnce() -> Result<R, E>, + E: Into<ExternError>, + R: IntoFfi, +{ + *out_error = ExternError::success(); + let res: thread::Result<(ExternError, R::Value)> = panic::catch_unwind(|| { + ensure_panic_hook_is_setup(); + match callback() { + Ok(v) => (ExternError::default(), v.into_ffi_value()), + Err(e) => (e.into(), R::ffi_default()), + } + }); + match res { + Ok((err, o)) => { + *out_error = err; + o + } + Err(e) => { + *out_error = e.into(); + R::ffi_default() + } + } +} + +/// This module exists just to expose a variant of [`call_with_result`] and [`call_with_output`] +/// that aborts, instead of unwinding, on panic. +pub mod abort_on_panic { + use super::*; + + // Struct that exists to automatically process::abort if we don't call + // `std::mem::forget()` on it. This can have substantial performance + // benefits over calling `std::panic::catch_unwind` and aborting if a panic + // was caught, in addition to not requiring AssertUnwindSafe (for example). + struct AbortOnDrop; + impl Drop for AbortOnDrop { + fn drop(&mut self) { + std::process::abort(); + } + } + + /// A helper function useful for cases where you'd like to abort on panic, + /// but aren't in a position where you'd like to return an FFI-compatible + /// type. + #[inline] + pub fn with_abort_on_panic<R, F>(callback: F) -> R + where + F: FnOnce() -> R, + { + let aborter = AbortOnDrop; + let res = callback(); + std::mem::forget(aborter); + res + } + + /// Same as the root `call_with_result`, but aborts on panic instead of unwinding. See the + /// `call_with_result` documentation for more. + pub fn call_with_result<R, E, F>(out_error: &mut ExternError, callback: F) -> R::Value + where + F: FnOnce() -> Result<R, E>, + E: Into<ExternError>, + R: IntoFfi, + { + with_abort_on_panic(|| match callback() { + Ok(v) => { + *out_error = ExternError::default(); + v.into_ffi_value() + } + Err(e) => { + *out_error = e.into(); + R::ffi_default() + } + }) + } + + /// Same as the root `call_with_output`, but aborts on panic instead of unwinding. As a result, + /// it doesn't require a [`ExternError`] out argument. See the `call_with_output` documentation + /// for more info. + pub fn call_with_output<R, F>(callback: F) -> R::Value + where + F: FnOnce() -> R, + R: IntoFfi, + { + with_abort_on_panic(callback).into_ffi_value() + } +} + +/// Initialize our panic handling hook to optionally log panics +#[cfg(feature = "log_panics")] +pub fn ensure_panic_hook_is_setup() { + use std::sync::Once; + static INIT_BACKTRACES: Once = Once::new(); + INIT_BACKTRACES.call_once(move || { + #[cfg(all(feature = "log_backtraces", not(target_os = "android")))] + { + std::env::set_var("RUST_BACKTRACE", "1"); + } + // Turn on a panic hook which logs both backtraces and the panic + // "Location" (file/line). We do both in case we've been stripped, + // ). + std::panic::set_hook(Box::new(move |panic_info| { + let (file, line) = if let Some(loc) = panic_info.location() { + (loc.file(), loc.line()) + } else { + // Apparently this won't happen but rust has reserved the + // ability to start returning None from location in some cases + // in the future. + ("<unknown>", 0) + }; + log::error!("### Rust `panic!` hit at file '{}', line {}", file, line); + #[cfg(all(feature = "log_backtraces", not(target_os = "android")))] + { + log::error!(" Complete stack trace:\n{:?}", backtrace::Backtrace::new()); + } + })); + }); +} + +/// Initialize our panic handling hook to optionally log panics +#[cfg(not(feature = "log_panics"))] +pub fn ensure_panic_hook_is_setup() {} + +/// ByteBuffer is a struct that represents an array of bytes to be sent over the FFI boundaries. +/// There are several cases when you might want to use this, but the primary one for us +/// is for returning protobuf-encoded data to Swift and Java. The type is currently rather +/// limited (implementing almost no functionality), however in the future it may be +/// more expanded. +/// +/// ## Caveats +/// +/// Note that the order of the fields is `len` (an i64) then `data` (a `*mut u8`), getting +/// this wrong on the other side of the FFI will cause memory corruption and crashes. +/// `i64` is used for the length instead of `u64` and `usize` because JNA has interop +/// issues with both these types. +/// +/// ### `Drop` is not implemented +/// +/// ByteBuffer does not implement Drop. This is intentional. Memory passed into it will +/// be leaked if it is not explicitly destroyed by calling [`ByteBuffer::destroy`], or +/// [`ByteBuffer::destroy_into_vec`]. This is for two reasons: +/// +/// 1. In the future, we may allow it to be used for data that is not managed by +/// the Rust allocator\*, and `ByteBuffer` assuming it's okay to automatically +/// deallocate this data with the Rust allocator. +/// +/// 2. Automatically running destructors in unsafe code is a +/// [frequent footgun](https://without.boats/blog/two-memory-bugs-from-ringbahn/) +/// (among many similar issues across many crates). +/// +/// Note that calling `destroy` manually is often not needed, as usually you should +/// be passing these to the function defined by [`define_bytebuffer_destructor!`] from +/// the other side of the FFI. +/// +/// Because this type is essentially *only* useful in unsafe or FFI code (and because +/// the most common usage pattern does not require manually managing the memory), it +/// does not implement `Drop`. +/// +/// \* Note: in the case of multiple Rust shared libraries loaded at the same time, +/// there may be multiple instances of "the Rust allocator" (one per shared library), +/// in which case we're referring to whichever instance is active for the code using +/// the `ByteBuffer`. Note that this doesn't occur on all platforms or build +/// configurations, but treating allocators in different shared libraries as fully +/// independent is always safe. +/// +/// ## Layout/fields +/// +/// This struct's field are not `pub` (mostly so that we can soundly implement `Send`, but also so +/// that we can verify rust users are constructing them appropriately), the fields, their types, and +/// their order are *very much* a part of the public API of this type. Consumers on the other side +/// of the FFI will need to know its layout. +/// +/// If this were a C struct, it would look like +/// +/// ```c,no_run +/// struct ByteBuffer { +/// // Note: This should never be negative, but values above +/// // INT64_MAX / i64::MAX are not allowed. +/// int64_t len; +/// // Note: nullable! +/// uint8_t *data; +/// }; +/// ``` +/// +/// In rust, there are two fields, in this order: `len: i64`, and `data: *mut u8`. +/// +/// For clarity, the fact that the data pointer is nullable means that `Option<ByteBuffer>` is not +/// the same size as ByteBuffer, and additionally is not FFI-safe (the latter point is not +/// currently guaranteed anyway as of the time of writing this comment). +/// +/// ### Description of fields +/// +/// `data` is a pointer to an array of `len` bytes. Note that data can be a null pointer and therefore +/// should be checked. +/// +/// The bytes array is allocated on the heap and must be freed on it as well. Critically, if there +/// are multiple rust shared libraries using being used in the same application, it *must be freed +/// on the same heap that allocated it*, or you will corrupt both heaps. +/// +/// Typically, this object is managed on the other side of the FFI (on the "FFI consumer"), which +/// means you must expose a function to release the resources of `data` which can be done easily +/// using the [`define_bytebuffer_destructor!`] macro provided by this crate. +#[repr(C)] +pub struct ByteBuffer { + len: i64, + data: *mut u8, +} + +impl From<Vec<u8>> for ByteBuffer { + #[inline] + fn from(bytes: Vec<u8>) -> Self { + Self::from_vec(bytes) + } +} + +impl ByteBuffer { + /// Creates a `ByteBuffer` of the requested size, zero-filled. + /// + /// The contents of the vector will not be dropped. Instead, `destroy` must + /// be called later to reclaim this memory or it will be leaked. + /// + /// ## Caveats + /// + /// This will panic if the buffer length (`usize`) cannot fit into a `i64`. + #[inline] + pub fn new_with_size(size: usize) -> Self { + // Note: `Vec` requires this internally on 64 bit platforms (and has a + // stricter requirement on 32 bit ones), so this is just to be explicit. + assert!(size < i64::MAX as usize); + let mut buf = vec![]; + buf.reserve_exact(size); + buf.resize(size, 0); + ByteBuffer::from_vec(buf) + } + + /// Creates a `ByteBuffer` instance from a `Vec` instance. + /// + /// The contents of the vector will not be dropped. Instead, `destroy` must + /// be called later to reclaim this memory or it will be leaked. + /// + /// ## Caveats + /// + /// This will panic if the buffer length (`usize`) cannot fit into a `i64`. + #[inline] + pub fn from_vec(bytes: Vec<u8>) -> Self { + use std::convert::TryFrom; + let mut buf = bytes.into_boxed_slice(); + let data = buf.as_mut_ptr(); + let len = i64::try_from(buf.len()).expect("buffer length cannot fit into a i64."); + std::mem::forget(buf); + Self { data, len } + } + + /// View the data inside this `ByteBuffer` as a `&[u8]`. + // TODO: Is it worth implementing `Deref`? Patches welcome if you need this. + #[inline] + pub fn as_slice(&self) -> &[u8] { + if self.data.is_null() { + &[] + } else { + unsafe { std::slice::from_raw_parts(self.data, self.len()) } + } + } + + #[inline] + fn len(&self) -> usize { + use std::convert::TryInto; + self.len + .try_into() + .expect("ByteBuffer length negative or overflowed") + } + + /// View the data inside this `ByteBuffer` as a `&mut [u8]`. + // TODO: Is it worth implementing `DerefMut`? Patches welcome if you need this. + #[inline] + pub fn as_mut_slice(&mut self) -> &mut [u8] { + if self.data.is_null() { + &mut [] + } else { + unsafe { std::slice::from_raw_parts_mut(self.data, self.len()) } + } + } + + /// Deprecated alias for [`ByteBuffer::destroy_into_vec`]. + #[inline] + #[deprecated = "Name is confusing, please use `destroy_into_vec` instead"] + pub fn into_vec(self) -> Vec<u8> { + self.destroy_into_vec() + } + + /// Convert this `ByteBuffer` into a Vec<u8>, taking ownership of the + /// underlying memory, which will be freed using the rust allocator once the + /// `Vec<u8>`'s lifetime is done. + /// + /// If this is undesirable, you can do `bb.as_slice().to_vec()` to get a + /// `Vec<u8>` containing a copy of this `ByteBuffer`'s underlying data. + /// + /// ## Caveats + /// + /// This is safe so long as the buffer is empty, or the data was allocated + /// by Rust code, e.g. this is a ByteBuffer created by + /// `ByteBuffer::from_vec` or `Default::default`. + /// + /// If the ByteBuffer were allocated by something other than the + /// current/local Rust `global_allocator`, then calling `destroy` is + /// fundamentally broken. + /// + /// For example, if it were allocated externally by some other language's + /// runtime, or if it were allocated by the global allocator of some other + /// Rust shared object in the same application, the behavior is undefined + /// (and likely to cause problems). + /// + /// Note that this currently can only happen if the `ByteBuffer` is passed + /// to you via an `extern "C"` function that you expose, as opposed to being + /// created locally. + #[inline] + pub fn destroy_into_vec(self) -> Vec<u8> { + if self.data.is_null() { + vec![] + } else { + let len = self.len(); + // Safety: This is correct because we convert to a Box<[u8]> first, + // which is a design constraint of RawVec. + unsafe { Vec::from_raw_parts(self.data, len, len) } + } + } + + /// Reclaim memory stored in this ByteBuffer. + /// + /// You typically should not call this manually, and instead expose a + /// function that does so via [`define_bytebuffer_destructor!`]. + /// + /// ## Caveats + /// + /// This is safe so long as the buffer is empty, or the data was allocated + /// by Rust code, e.g. this is a ByteBuffer created by + /// `ByteBuffer::from_vec` or `Default::default`. + /// + /// If the ByteBuffer were allocated by something other than the + /// current/local Rust `global_allocator`, then calling `destroy` is + /// fundamentally broken. + /// + /// For example, if it were allocated externally by some other language's + /// runtime, or if it were allocated by the global allocator of some other + /// Rust shared object in the same application, the behavior is undefined + /// (and likely to cause problems). + /// + /// Note that this currently can only happen if the `ByteBuffer` is passed + /// to you via an `extern "C"` function that you expose, as opposed to being + /// created locally. + #[inline] + pub fn destroy(self) { + // Note: the drop is just for clarity, of course. + drop(self.destroy_into_vec()) + } +} + +impl Default for ByteBuffer { + #[inline] + fn default() -> Self { + Self { + len: 0 as i64, + data: std::ptr::null_mut(), + } + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_bb_access() { + let mut bb = ByteBuffer::from(vec![1u8, 2, 3]); + assert_eq!(bb.as_slice(), &[1u8, 2, 3]); + assert_eq!(bb.as_mut_slice(), &mut [1u8, 2, 3]); + bb.as_mut_slice()[2] = 4; + + // Use into_vec to cover both into_vec and destroy_into_vec. + #[allow(deprecated)] + { + assert_eq!(bb.into_vec(), &[1u8, 2, 4]); + } + } + + #[test] + fn test_bb_empty() { + let mut bb = ByteBuffer::default(); + assert_eq!(bb.as_slice(), &[]); + assert_eq!(bb.as_mut_slice(), &[]); + assert_eq!(bb.destroy_into_vec(), &[]); + } + + #[test] + fn test_bb_new() { + let bb = ByteBuffer::new_with_size(5); + assert_eq!(bb.as_slice(), &[0u8, 0, 0, 0, 0]); + bb.destroy(); + + let bb = ByteBuffer::new_with_size(0); + assert_eq!(bb.as_slice(), &[]); + assert!(!bb.data.is_null()); + bb.destroy(); + + let bb = ByteBuffer::from_vec(vec![]); + assert_eq!(bb.as_slice(), &[]); + assert!(!bb.data.is_null()); + bb.destroy(); + } +} diff --git a/third_party/rust/ffi-support/src/macros.rs b/third_party/rust/ffi-support/src/macros.rs new file mode 100644 index 0000000000..1eeff5771e --- /dev/null +++ b/third_party/rust/ffi-support/src/macros.rs @@ -0,0 +1,362 @@ +/* Copyright 2018-2019 Mozilla Foundation + * + * Licensed under the Apache License (Version 2.0), or the MIT license, + * (the "Licenses") at your option. You may not use this file except in + * compliance with one of the Licenses. You may obtain copies of the + * Licenses at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * http://opensource.org/licenses/MIT + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licenses is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licenses for the specific language governing permissions and + * limitations under the Licenses. */ + +/// Implements [`IntoFfi`][crate::IntoFfi] for the provided types (more than one +/// may be passed in) by allocating `$T` on the heap as an opaque pointer. +/// +/// This is typically going to be used from the "Rust component", and not the +/// "FFI component" (see the top level crate documentation for more +/// information), however you will still need to implement a destructor in the +/// FFI component using [`define_box_destructor!`][crate::define_box_destructor]. +/// +/// In general, is only safe to do for `send` types (even this is dodgy, but +/// it's often necessary to keep the locking on the other side of the FFI, so +/// Sync is too harsh), so we enforce this in this macro. (You're still free to +/// implement this manually, if this restriction is too harsh for your use case +/// and you're certain you know what you're doing). +#[macro_export] +macro_rules! implement_into_ffi_by_pointer { + ($($T:ty),* $(,)*) => {$( + unsafe impl $crate::IntoFfi for $T where $T: Send { + type Value = *mut $T; + + #[inline] + fn ffi_default() -> *mut $T { + std::ptr::null_mut() + } + + #[inline] + fn into_ffi_value(self) -> *mut $T { + Box::into_raw(Box::new(self)) + } + } + )*} +} + +/// Implements [`IntoFfi`][crate::IntoFfi] for the provided types (more than one +/// may be passed in) by converting to the type to a JSON string. +/// +/// Additionally, most of the time we recomment using this crate's protobuf +/// support, instead of JSON. +/// +/// This is typically going to be used from the "Rust component", and not the +/// "FFI component" (see the top level crate documentation for more +/// information). +/// +/// Note: Each type passed in must implement or derive `serde::Serialize`. +/// +/// Note: for this to works, the crate it's called in must depend on `serde` and +/// `serde_json`. +/// +/// ## Panics +/// +/// The [`IntoFfi`][crate::IntoFfi] implementation this macro generates may +/// panic in the following cases: +/// +/// - You've passed a type that contains a Map that has non-string keys (which +/// can't be represented in JSON). +/// +/// - You've passed a type which has a custom serializer, and the custom +/// serializer failed. +/// +/// These cases are both rare enough that this still seems fine for the majority +/// of uses. +#[macro_export] +macro_rules! implement_into_ffi_by_json { + ($($T:ty),* $(,)*) => {$( + unsafe impl $crate::IntoFfi for $T where $T: serde::Serialize { + type Value = *mut std::os::raw::c_char; + #[inline] + fn ffi_default() -> *mut std::os::raw::c_char { + std::ptr::null_mut() + } + #[inline] + fn into_ffi_value(self) -> *mut std::os::raw::c_char { + // This panic is inside our catch_panic, so it should be fine. + // We've also documented the case where the IntoFfi impl that + // calls this panics, and it's rare enough that it shouldn't + // matter that if it happens we return an ExternError + // representing a panic instead of one of some other type + // (especially given that the application isn't likely to be + // able to meaningfully handle JSON serialization failure). + let as_string = serde_json::to_string(&self).unwrap(); + $crate::rust_string_to_c(as_string) + } + } + )*} +} + +/// Implements [`IntoFfi`][crate::IntoFfi] for the provided types (more than one +/// may be passed in) implementing `prost::Message` (protobuf auto-generated +/// type) by converting to the type to a [`ByteBuffer`][crate::ByteBuffer]. This +/// [`ByteBuffer`][crate::ByteBuffer] should later be passed by value. +/// +/// Note: for this to works, the crate it's called in must depend on `prost`. +/// +/// Note: Each type passed in must implement or derive `prost::Message`. +#[macro_export] +macro_rules! implement_into_ffi_by_protobuf { + ($($FFIType:ty),* $(,)*) => {$( + unsafe impl $crate::IntoFfi for $FFIType where $FFIType: prost::Message { + type Value = $crate::ByteBuffer; + #[inline] + fn ffi_default() -> Self::Value { + Default::default() + } + + #[inline] + fn into_ffi_value(self) -> Self::Value { + use prost::Message; + let mut bytes = Vec::with_capacity(self.encoded_len()); + // Unwrap is safe, since we have reserved sufficient capacity in + // the vector. + self.encode(&mut bytes).unwrap(); + bytes.into() + } + } + )*} +} + +/// Implement [`InfoFfi`][crate::IntoFfi] for a type by converting through +/// another type. +/// +/// The argument `$MidTy` argument must implement `From<$SrcTy>` and +/// [`InfoFfi`][crate::IntoFfi]. +/// +/// This is provided (even though it's trivial) because it is always safe (well, +/// so long as `$MidTy`'s [`IntoFfi`][crate::IntoFfi] implementation is +/// correct), but would otherwise require use of `unsafe` to implement. +#[macro_export] +macro_rules! implement_into_ffi_by_delegation { + ($SrcTy:ty, $MidTy:ty) => { + unsafe impl $crate::IntoFfi for $SrcTy + where + $MidTy: From<$SrcTy> + $crate::IntoFfi, + { + // The <$MidTy as SomeTrait>::method is required even when it would + // be ambiguous due to some obscure details of macro syntax. + type Value = <$MidTy as $crate::IntoFfi>::Value; + + #[inline] + fn ffi_default() -> Self::Value { + <$MidTy as $crate::IntoFfi>::ffi_default() + } + + #[inline] + fn into_ffi_value(self) -> Self::Value { + use $crate::IntoFfi; + <$MidTy as From<$SrcTy>>::from(self).into_ffi_value() + } + } + }; +} + +/// For a number of reasons (name collisions are a big one, but, it also wouldn't work on all +/// platforms), we cannot export `extern "C"` functions from this library. However, it's pretty +/// common to want to free strings allocated by rust, so many libraries will need this, so we +/// provide it as a macro. +/// +/// It simply expands to a `#[no_mangle] pub unsafe extern "C" fn` which wraps this crate's +/// [`destroy_c_string`][crate::destroy_c_string] function. +/// +/// ## Caveats +/// +/// If you're using multiple separately compiled rust libraries in your application, it's critical +/// that you are careful to only ever free strings allocated by a Rust library using the same rust +/// library. Passing them to a different Rust library's string destructor will cause you to corrupt +/// multiple heaps. +/// +/// Additionally, be sure that all strings you pass to this were actually allocated by rust. It's a +/// common issue for JNA code to transparently convert Pointers to things to Strings behind the +/// scenes, which is quite risky here. (To avoid this in JNA, only use `String` for passing +/// read-only strings into Rust, e.g. it's for passing `*const c_char`. All other uses should use +/// `Pointer` and `getString()`). +/// +/// Finally, to avoid name collisions, it is strongly recommended that you provide an name for this +/// function unique to your library. +/// +/// ## Example +/// +/// ```rust +/// # use ffi_support::define_string_destructor; +/// define_string_destructor!(mylib_destroy_string); +/// ``` +#[macro_export] +macro_rules! define_string_destructor { + ($mylib_destroy_string:ident) => { + /// Public destructor for strings managed by the other side of the FFI. + /// + /// # Safety + /// + /// This will free the string pointer it gets passed in as an argument, + /// and thus can be wildly unsafe if misused. + /// + /// See the documentation of `ffi_support::destroy_c_string` and + /// `ffi_support::define_string_destructor!` for further info. + #[no_mangle] + pub unsafe extern "C" fn $mylib_destroy_string(s: *mut std::os::raw::c_char) { + // Note: This should never happen, but in the case of a bug aborting + // here is better than the badness that happens if we unwind across + // the FFI boundary. + $crate::abort_on_panic::with_abort_on_panic(|| { + if !s.is_null() { + $crate::destroy_c_string(s) + } + }); + } + }; +} + +/// Define a (public) destructor for a type that was allocated by +/// `Box::into_raw(Box::new(value))` (e.g. a pointer which is probably opaque). +/// +/// ## Caveats +/// +/// When called over the FFI, this can go wrong in a ridiculous number of ways, +/// and we can't really prevent any of them. But essentially, the caller (on the +/// other side of the FFI) needs to be extremely careful to ensure that it stops +/// using the pointer after it's freed. +/// +/// Also, to avoid name collisions, it is strongly recommended that you provide +/// an name for this function unique to your library. (This is true for all +/// functions you expose). +/// +/// However, when called from rust, this is safe, as it becomes a function that +/// just drops a `Option<Box<T>>` with some panic handling. +/// +/// ## Example +/// +/// ```rust +/// # use ffi_support::define_box_destructor; +/// struct CoolType(Vec<i32>); +/// +/// define_box_destructor!(CoolType, mylib_destroy_cooltype); +/// ``` +#[macro_export] +macro_rules! define_box_destructor { + ($T:ty, $destructor_name:ident) => { + /// # Safety + /// This is equivalent to calling Box::from_raw with panic handling, and + /// thus inherits [`Box::from_raw`]'s safety properties. That is to say, + /// this function is wildly unsafe. + #[no_mangle] + pub unsafe extern "C" fn $destructor_name(v: *mut $T) { + // We should consider passing an error parameter in here rather than + // aborting, but at the moment the only case where we do this + // (interrupt handles) should never panic in Drop, so it's probably + // fine. + $crate::abort_on_panic::with_abort_on_panic(|| { + if !v.is_null() { + drop(Box::from_raw(v)) + } + }); + } + }; +} + +/// Define a (public) destructor for the [`ByteBuffer`][crate::ByteBuffer] type. +/// +/// ## Caveats +/// +/// If you're using multiple separately compiled rust libraries in your application, it's critical +/// that you are careful to only ever free `ByteBuffer` instances allocated by a Rust library using +/// the same rust library. Passing them to a different Rust library's string destructor will cause +/// you to corrupt multiple heaps. +/// One common ByteBuffer destructor is defined per Rust library. +/// +/// Also, to avoid name collisions, it is strongly recommended that you provide an name for this +/// function unique to your library. (This is true for all functions you expose). +/// +/// ## Example +/// +/// ```rust +/// # use ffi_support::define_bytebuffer_destructor; +/// define_bytebuffer_destructor!(mylib_destroy_bytebuffer); +/// ``` +#[macro_export] +macro_rules! define_bytebuffer_destructor { + ($destructor_name:ident) => { + #[no_mangle] + pub extern "C" fn $destructor_name(v: $crate::ByteBuffer) { + // Note: This should never happen, but in the case of a bug aborting + // here is better than the badness that happens if we unwind across + // the FFI boundary. + $crate::abort_on_panic::with_abort_on_panic(|| v.destroy()) + } + }; +} + +/// Define a (public) destructor for a type that lives inside a lazy_static +/// [`ConcurrentHandleMap`][crate::ConcurrentHandleMap]. +/// +/// Note that this is actually totally safe, unlike the other +/// `define_blah_destructor` macros. +/// +/// A critical difference, however, is that this dtor takes an `err` out +/// parameter to indicate failure. This difference is why the name is different +/// as well (deleter vs destructor). +/// +/// ## Example +/// +/// ```rust +/// # use lazy_static::lazy_static; +/// # use ffi_support::{ConcurrentHandleMap, define_handle_map_deleter}; +/// struct Thing(Vec<i32>); +/// // Somewhere... +/// lazy_static! { +/// static ref THING_HANDLES: ConcurrentHandleMap<Thing> = ConcurrentHandleMap::new(); +/// } +/// define_handle_map_deleter!(THING_HANDLES, mylib_destroy_thing); +/// ``` +#[macro_export] +macro_rules! define_handle_map_deleter { + ($HANDLE_MAP_NAME:ident, $destructor_name:ident) => { + #[no_mangle] + pub extern "C" fn $destructor_name(v: u64, err: &mut $crate::ExternError) { + $crate::call_with_result(err, || { + // Force type errors here. + let map: &$crate::ConcurrentHandleMap<_> = &*$HANDLE_MAP_NAME; + map.delete_u64(v) + }) + } + }; +} + +/// Force a compile error if the condition is not met. Requires a unique name +/// for the assertion for... reasons. This is included mainly because it's a +/// common desire for FFI code, but not for other sorts of code. +/// +/// # Examples +/// +/// Failing example: +/// +/// ```compile_fail +/// ffi_support::static_assert!(THIS_SHOULD_FAIL, false); +/// ``` +/// +/// Passing example: +/// +/// ``` +/// ffi_support::static_assert!(THIS_SHOULD_PASS, true); +/// ``` +#[macro_export] +macro_rules! static_assert { + ($ASSERT_NAME:ident, $test:expr) => { + #[allow(dead_code, nonstandard_style)] + const $ASSERT_NAME: [u8; 0 - (!$test as bool as usize)] = + [0u8; 0 - (!$test as bool as usize)]; + }; +} diff --git a/third_party/rust/ffi-support/src/string.rs b/third_party/rust/ffi-support/src/string.rs new file mode 100644 index 0000000000..b1311669f5 --- /dev/null +++ b/third_party/rust/ffi-support/src/string.rs @@ -0,0 +1,162 @@ +/* Copyright 2018-2019 Mozilla Foundation + * + * Licensed under the Apache License (Version 2.0), or the MIT license, + * (the "Licenses") at your option. You may not use this file except in + * compliance with one of the Licenses. You may obtain copies of the + * Licenses at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * http://opensource.org/licenses/MIT + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licenses is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licenses for the specific language governing permissions and + * limitations under the Licenses. */ + +use crate::FfiStr; +use std::ffi::CString; +use std::os::raw::c_char; +use std::ptr; + +/// Convert a rust string into a NUL-terminated utf-8 string suitable for passing to C, or to things +/// ABI-compatible with C. +/// +/// Important: This string must eventually be freed. You may either do that using the +/// [`destroy_c_string`] method (or, if you must, by dropping the underlying [`std::ffi::CString`] +/// after recovering it via [`std::ffi::CString::from_raw`]). +/// +/// It's common to want to allow the consumer (e.g. on the "C" side of the FFI) to be allowed to +/// free this memory, and the macro [`define_string_destructor!`] may be used to do so. +/// +/// ## Panics +/// +/// This function may panic if the argument has an interior null byte. This is fairly rare, but +/// is possible in theory. +#[inline] +pub fn rust_string_to_c(rust_string: impl Into<String>) -> *mut c_char { + CString::new(rust_string.into()) + .expect("Error: Rust string contained an interior null byte.") + .into_raw() +} + +/// Variant of [`rust_string_to_c`] which takes an Option, and returns null for None. +#[inline] +pub fn opt_rust_string_to_c(opt_rust_string: Option<impl Into<String>>) -> *mut c_char { + if let Some(s) = opt_rust_string { + rust_string_to_c(s) + } else { + ptr::null_mut() + } +} + +/// Free the memory of a string created by [`rust_string_to_c`] on the rust heap. If `c_string` is +/// null, this is a no-op. +/// +/// See the [`define_string_destructor!`] macro which may be used for exposing this function over +/// the FFI. +/// +/// ## Safety +/// +/// This is inherently unsafe, since we're deallocating memory. Be sure +/// +/// - Nobody can use the memory after it's deallocated. +/// - The memory was actually allocated on this heap (and it's not a string from the other side of +/// the FFI which was allocated on e.g. the C heap). +/// - If multiple separate rust libraries are in use (for example, as DLLs) in a single program, +/// you must also make sure that the rust library that allocated the memory is also the one +/// that frees it. +/// +/// See documentation for [`define_string_destructor!`], which gives a more complete overview of the +/// potential issues. +#[inline] +pub unsafe fn destroy_c_string(cstring: *mut c_char) { + // we're not guaranteed to be in a place where we can complain about this beyond logging, + // and there's an obvious way to handle it. + if !cstring.is_null() { + drop(CString::from_raw(cstring)) + } +} + +/// Convert a null-terminated C string to a rust `str`. This does not take ownership of the string, +/// and you should be careful about the lifetime of the resulting string. Note that strings +/// containing invalid UTF-8 are replaced with the empty string (for many cases, you will want to +/// use [`rust_string_from_c`] instead, which will do a lossy conversion). +/// +/// If you actually need an owned rust `String`, you're encouraged to use [`rust_string_from_c`], +/// which, as mentioned, also behaves better in the face of invalid UTF-8. +/// +/// ## Safety +/// +/// This is unsafe because we read from a raw pointer, which may or may not be valid. +/// +/// We also assume `c_string` is a null terminated string, and have no way of knowing if that's +/// actually true. If it's not, we'll read arbitrary memory from the heap until we see a '\0', which +/// can result in a enormous number of problems. +/// +/// ## Panics +/// +/// Panics if it's argument is null, see [`opt_rust_str_from_c`] for a variant that returns None in +/// this case instead. +/// +/// Note: This means it's forbidden to call this outside of a `call_with_result` (or something else +/// that uses [`std::panic::catch_unwind`]), as it is UB to panic across the FFI boundary. +#[inline] +#[deprecated(since = "0.3.0", note = "Please use FfiStr::as_str instead")] +pub unsafe fn rust_str_from_c<'a>(c_string: *const c_char) -> &'a str { + FfiStr::from_raw(c_string).as_str() +} + +/// Same as `rust_string_from_c`, but returns None if `c_string` is null instead of asserting. +/// +/// ## Safety +/// +/// This is unsafe because we read from a raw pointer, which may or may not be valid. +/// +/// We also assume `c_string` is a null terminated string, and have no way of knowing if that's +/// actually true. If it's not, we'll read arbitrary memory from the heap until we see a '\0', which +/// can result in a enormous number of problems. +#[inline] +#[deprecated(since = "0.3.0", note = "Please use FfiStr::as_opt_str instead")] +pub unsafe fn opt_rust_str_from_c<'a>(c_string: *const c_char) -> Option<&'a str> { + FfiStr::from_raw(c_string).as_opt_str() +} + +/// Convert a null-terminated C into an owned rust string, replacing invalid UTF-8 with the +/// unicode replacement character. +/// +/// ## Safety +/// +/// This is unsafe because we dereference a raw pointer, which may or may not be valid. +/// +/// We also assume `c_string` is a null terminated string, and have no way of knowing if that's +/// actually true. If it's not, we'll read arbitrary memory from the heap until we see a '\0', which +/// can result in a enormous number of problems. +/// +/// ## Panics +/// +/// Panics if it's argument is null. See also [`opt_rust_string_from_c`], which returns None +/// instead. +/// +/// Note: This means it's forbidden to call this outside of a `call_with_result` (or something else +/// that uses `std::panic::catch_unwind`), as it is UB to panic across the FFI boundary. +#[inline] +#[deprecated(since = "0.3.0", note = "Please use FfiStr::into_string instead")] +pub unsafe fn rust_string_from_c(c_string: *const c_char) -> String { + FfiStr::from_raw(c_string).into_string() +} + +/// Same as `rust_string_from_c`, but returns None if `c_string` is null instead of asserting. +/// +/// ## Safety +/// +/// This is unsafe because we dereference a raw pointer, which may or may not be valid. +/// +/// We also assume `c_string` is a null terminated string, and have no way of knowing if that's +/// actually true. If it's not, we'll read arbitrary memory from the heap until we see a '\0', which +/// can result in a enormous number of problems. +#[inline] +#[deprecated(since = "0.3.0", note = "Please use FfiStr::into_opt_string instead")] +pub unsafe fn opt_rust_string_from_c(c_string: *const c_char) -> Option<String> { + FfiStr::from_raw(c_string).into_opt_string() +} |