summaryrefslogtreecommitdiffstats
path: root/vendor/hashbrown/src/raw/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/hashbrown/src/raw/mod.rs')
-rw-r--r--vendor/hashbrown/src/raw/mod.rs1942
1 files changed, 1650 insertions, 292 deletions
diff --git a/vendor/hashbrown/src/raw/mod.rs b/vendor/hashbrown/src/raw/mod.rs
index 1a6dced4b..25c5d1c4d 100644
--- a/vendor/hashbrown/src/raw/mod.rs
+++ b/vendor/hashbrown/src/raw/mod.rs
@@ -4,7 +4,6 @@ use crate::TryReserveError;
use core::iter::FusedIterator;
use core::marker::PhantomData;
use core::mem;
-use core::mem::ManuallyDrop;
use core::mem::MaybeUninit;
use core::ptr::NonNull;
use core::{hint, ptr};
@@ -21,11 +20,18 @@ cfg_if! {
if #[cfg(all(
target_feature = "sse2",
any(target_arch = "x86", target_arch = "x86_64"),
- not(miri)
+ not(miri),
))] {
mod sse2;
use sse2 as imp;
- } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
+ } else if #[cfg(all(
+ target_arch = "aarch64",
+ target_feature = "neon",
+ // NEON intrinsics are currently broken on big-endian targets.
+ // See https://github.com/rust-lang/stdarch/issues/1484.
+ target_endian = "little",
+ not(miri),
+ ))] {
mod neon;
use neon as imp;
} else {
@@ -93,6 +99,13 @@ impl Fallibility {
}
}
+trait SizedTypeProperties: Sized {
+ const IS_ZERO_SIZED: bool = mem::size_of::<Self>() == 0;
+ const NEEDS_DROP: bool = mem::needs_drop::<Self>();
+}
+
+impl<T> SizedTypeProperties for T {}
+
/// Control byte value for an empty bucket.
const EMPTY: u8 = 0b1111_1111;
@@ -294,8 +307,6 @@ impl<T> Clone for Bucket<T> {
}
impl<T> Bucket<T> {
- const IS_ZERO_SIZED_TYPE: bool = mem::size_of::<T>() == 0;
-
/// Creates a [`Bucket`] that contain pointer to the data.
/// The pointer calculation is performed by calculating the
/// offset from given `base` pointer (convenience for
@@ -364,7 +375,7 @@ impl<T> Bucket<T> {
//
// where: T0...Tlast - our stored data; C0...Clast - control bytes
// or metadata for data.
- let ptr = if Self::IS_ZERO_SIZED_TYPE {
+ let ptr = if T::IS_ZERO_SIZED {
// won't overflow because index must be less than length (bucket_mask)
// and bucket_mask is guaranteed to be less than `isize::MAX`
// (see TableLayout::calculate_layout_for method)
@@ -438,7 +449,7 @@ impl<T> Bucket<T> {
// (base.as_ptr() as usize - self.ptr.as_ptr() as usize) / mem::size_of::<T>()
//
// where: T0...Tlast - our stored data; C0...Clast - control bytes or metadata for data.
- if Self::IS_ZERO_SIZED_TYPE {
+ if T::IS_ZERO_SIZED {
// this can not be UB
self.ptr.as_ptr() as usize - 1
} else {
@@ -502,7 +513,7 @@ impl<T> Bucket<T> {
/// ```
#[inline]
pub fn as_ptr(&self) -> *mut T {
- if Self::IS_ZERO_SIZED_TYPE {
+ if T::IS_ZERO_SIZED {
// Just return an arbitrary ZST pointer which is properly aligned
// invalid pointer is good enough for ZST
invalid_mut(mem::align_of::<T>())
@@ -550,7 +561,7 @@ impl<T> Bucket<T> {
/// [`RawTableInner::buckets`]: RawTableInner::buckets
#[inline]
unsafe fn next_n(&self, offset: usize) -> Self {
- let ptr = if Self::IS_ZERO_SIZED_TYPE {
+ let ptr = if T::IS_ZERO_SIZED {
// invalid pointer is good enough for ZST
invalid_mut(self.ptr.as_ptr() as usize + offset)
} else {
@@ -774,15 +785,16 @@ impl<T> Bucket<T> {
}
/// A raw hash table with an unsafe API.
-pub struct RawTable<T, A: Allocator + Clone = Global> {
- table: RawTableInner<A>,
+pub struct RawTable<T, A: Allocator = Global> {
+ table: RawTableInner,
+ alloc: A,
// Tell dropck that we own instances of T.
marker: PhantomData<T>,
}
/// Non-generic part of `RawTable` which allows functions to be instantiated only once regardless
/// of how many different key-value types are used.
-struct RawTableInner<A> {
+struct RawTableInner {
// Mask to get an index from a hash value. The value is one less than the
// number of buckets in the table.
bucket_mask: usize,
@@ -796,8 +808,6 @@ struct RawTableInner<A> {
// Number of elements in the table, only really used by len()
items: usize,
-
- alloc: A,
}
impl<T> RawTable<T, Global> {
@@ -809,7 +819,8 @@ impl<T> RawTable<T, Global> {
#[inline]
pub const fn new() -> Self {
Self {
- table: RawTableInner::new_in(Global),
+ table: RawTableInner::NEW,
+ alloc: Global,
marker: PhantomData,
}
}
@@ -828,9 +839,8 @@ impl<T> RawTable<T, Global> {
}
}
-impl<T, A: Allocator + Clone> RawTable<T, A> {
+impl<T, A: Allocator> RawTable<T, A> {
const TABLE_LAYOUT: TableLayout = TableLayout::new::<T>();
- const DATA_NEEDS_DROP: bool = mem::needs_drop::<T>();
/// Creates a new empty hash table without allocating any memory, using the
/// given allocator.
@@ -841,7 +851,8 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
#[inline]
pub const fn new_in(alloc: A) -> Self {
Self {
- table: RawTableInner::new_in(alloc),
+ table: RawTableInner::NEW,
+ alloc,
marker: PhantomData,
}
}
@@ -859,66 +870,77 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
Ok(Self {
table: RawTableInner::new_uninitialized(
- alloc,
+ &alloc,
Self::TABLE_LAYOUT,
buckets,
fallibility,
)?,
+ alloc,
marker: PhantomData,
})
}
- /// Attempts to allocate a new hash table with at least enough capacity
- /// for inserting the given number of elements without reallocating.
- fn fallible_with_capacity(
- alloc: A,
- capacity: usize,
- fallibility: Fallibility,
- ) -> Result<Self, TryReserveError> {
+ /// Attempts to allocate a new hash table using the given allocator, with at least enough
+ /// capacity for inserting the given number of elements without reallocating.
+ #[cfg(feature = "raw")]
+ pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result<Self, TryReserveError> {
Ok(Self {
table: RawTableInner::fallible_with_capacity(
- alloc,
+ &alloc,
Self::TABLE_LAYOUT,
capacity,
- fallibility,
+ Fallibility::Fallible,
)?,
+ alloc,
marker: PhantomData,
})
}
- /// Attempts to allocate a new hash table using the given allocator, with at least enough
- /// capacity for inserting the given number of elements without reallocating.
- #[cfg(feature = "raw")]
- pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result<Self, TryReserveError> {
- Self::fallible_with_capacity(alloc, capacity, Fallibility::Fallible)
- }
-
/// Allocates a new hash table using the given allocator, with at least enough capacity for
/// inserting the given number of elements without reallocating.
pub fn with_capacity_in(capacity: usize, alloc: A) -> Self {
- // Avoid `Result::unwrap_or_else` because it bloats LLVM IR.
- match Self::fallible_with_capacity(alloc, capacity, Fallibility::Infallible) {
- Ok(capacity) => capacity,
- Err(_) => unsafe { hint::unreachable_unchecked() },
+ Self {
+ table: RawTableInner::with_capacity(&alloc, Self::TABLE_LAYOUT, capacity),
+ alloc,
+ marker: PhantomData,
}
}
/// Returns a reference to the underlying allocator.
#[inline]
pub fn allocator(&self) -> &A {
- &self.table.alloc
- }
-
- /// Deallocates the table without dropping any entries.
- #[cfg_attr(feature = "inline-more", inline)]
- unsafe fn free_buckets(&mut self) {
- self.table.free_buckets(Self::TABLE_LAYOUT);
+ &self.alloc
}
- /// Returns pointer to one past last element of data table.
+ /// Returns pointer to one past last `data` element in the the table as viewed from
+ /// the start point of the allocation.
+ ///
+ /// The caller must ensure that the `RawTable` outlives the returned [`NonNull<T>`],
+ /// otherwise using it may result in [`undefined behavior`].
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
- pub unsafe fn data_end(&self) -> NonNull<T> {
- NonNull::new_unchecked(self.table.ctrl.as_ptr().cast())
+ pub fn data_end(&self) -> NonNull<T> {
+ // SAFETY: `self.table.ctrl` is `NonNull`, so casting it is safe
+ //
+ // `self.table.ctrl.as_ptr().cast()` returns pointer that
+ // points here (to the end of `T0`)
+ // ∨
+ // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
+ // \________ ________/
+ // \/
+ // `n = buckets - 1`, i.e. `RawTable::buckets() - 1`
+ //
+ // where: T0...T_n - our stored data;
+ // CT0...CT_n - control bytes or metadata for `data`.
+ // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
+ // with loading `Group` bytes from the heap works properly, even if the result
+ // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
+ // `RawTableInner::set_ctrl` function.
+ //
+ // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
+ unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) }
}
/// Returns pointer to start of data table.
@@ -938,7 +960,9 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
#[inline]
#[cfg(feature = "raw")]
pub fn allocation_info(&self) -> (NonNull<u8>, Layout) {
- self.table.allocation_info_or_zero(Self::TABLE_LAYOUT)
+ // SAFETY: We use the same `table_layout` that was used to allocate
+ // this table.
+ unsafe { self.table.allocation_info_or_zero(Self::TABLE_LAYOUT) }
}
/// Returns the index of a bucket from a `Bucket`.
@@ -948,8 +972,55 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
}
/// Returns a pointer to an element in the table.
+ ///
+ /// The caller must ensure that the `RawTable` outlives the returned [`Bucket<T>`],
+ /// otherwise using it may result in [`undefined behavior`].
+ ///
+ /// # Safety
+ ///
+ /// If `mem::size_of::<T>() != 0`, then the caller of this function must observe the
+ /// following safety rules:
+ ///
+ /// * The table must already be allocated;
+ ///
+ /// * The `index` must not be greater than the number returned by the [`RawTable::buckets`]
+ /// function, i.e. `(index + 1) <= self.buckets()`.
+ ///
+ /// It is safe to call this function with index of zero (`index == 0`) on a table that has
+ /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
+ ///
+ /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
+ /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
+ /// `(index + 1) <= self.buckets()`.
+ ///
+ /// [`RawTable::buckets`]: RawTable::buckets
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
pub unsafe fn bucket(&self, index: usize) -> Bucket<T> {
+ // If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
+ // (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
+ // the "buckets" number of our `RawTable`, i.e. "n = RawTable::buckets() - 1"):
+ //
+ // `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
+ // part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`)
+ // |
+ // | `base = self.data_end()` points here
+ // | (to the start of CT0 or to the end of T0)
+ // v v
+ // [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
+ // ^ \__________ __________/
+ // `table.bucket(3)` returns a pointer that points \/
+ // here in the `data` part of the `RawTable` (to additional control bytes
+ // the end of T3) `m = Group::WIDTH - 1`
+ //
+ // where: T0...T_n - our stored data;
+ // CT0...CT_n - control bytes or metadata for `data`;
+ // CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
+ // the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask`
+ // is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function.
+ //
+ // P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
+ // of buckets is a power of two, and `self.table.bucket_mask = self.buckets() - 1`.
debug_assert_ne!(self.table.bucket_mask, 0);
debug_assert!(index < self.buckets());
Bucket::from_base_index(self.data_end(), index)
@@ -1028,15 +1099,10 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
// Ensure that the table is reset even if one of the drops panic
let mut self_ = guard(self, |self_| self_.clear_no_drop());
unsafe {
- self_.drop_elements();
- }
- }
-
- unsafe fn drop_elements(&mut self) {
- if Self::DATA_NEEDS_DROP && !self.is_empty() {
- for item in self.iter() {
- item.drop();
- }
+ // SAFETY: ScopeGuard sets to zero the `items` field of the table
+ // even in case of panic during the dropping of the elements so
+ // that there will be no double drop of the elements.
+ self_.table.drop_elements::<T>();
}
}
@@ -1047,7 +1113,16 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
// space for.
let min_size = usize::max(self.table.items, min_size);
if min_size == 0 {
- *self = Self::new_in(self.table.alloc.clone());
+ let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW);
+ unsafe {
+ // SAFETY:
+ // 1. We call the function only once;
+ // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
+ // and [`TableLayout`] that were used to allocate this table.
+ // 3. If any elements' drop function panics, then there will only be a memory leak,
+ // because we have replaced the inner table with a new one.
+ old_inner.drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
+ }
return;
}
@@ -1064,14 +1139,33 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
if min_buckets < self.buckets() {
// Fast path if the table is empty
if self.table.items == 0 {
- *self = Self::with_capacity_in(min_size, self.table.alloc.clone());
+ let new_inner =
+ RawTableInner::with_capacity(&self.alloc, Self::TABLE_LAYOUT, min_size);
+ let mut old_inner = mem::replace(&mut self.table, new_inner);
+ unsafe {
+ // SAFETY:
+ // 1. We call the function only once;
+ // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
+ // and [`TableLayout`] that were used to allocate this table.
+ // 3. If any elements' drop function panics, then there will only be a memory leak,
+ // because we have replaced the inner table with a new one.
+ old_inner.drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
+ }
} else {
// Avoid `Result::unwrap_or_else` because it bloats LLVM IR.
- if self
- .resize(min_size, hasher, Fallibility::Infallible)
- .is_err()
- {
- unsafe { hint::unreachable_unchecked() }
+ unsafe {
+ // SAFETY:
+ // 1. We know for sure that `min_size >= self.table.items`.
+ // 2. The [`RawTableInner`] must already have properly initialized control bytes since
+ // we will never expose RawTable::new_uninitialized in a public API.
+ if self
+ .resize(min_size, hasher, Fallibility::Infallible)
+ .is_err()
+ {
+ // SAFETY: The result of calling the `resize` function cannot be an error
+ // because `fallibility == Fallibility::Infallible.
+ hint::unreachable_unchecked()
+ }
}
}
}
@@ -1083,11 +1177,16 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) {
if unlikely(additional > self.table.growth_left) {
// Avoid `Result::unwrap_or_else` because it bloats LLVM IR.
- if self
- .reserve_rehash(additional, hasher, Fallibility::Infallible)
- .is_err()
- {
- unsafe { hint::unreachable_unchecked() }
+ unsafe {
+ // SAFETY: The [`RawTableInner`] must already have properly initialized control
+ // bytes since we will never expose RawTable::new_uninitialized in a public API.
+ if self
+ .reserve_rehash(additional, hasher, Fallibility::Infallible)
+ .is_err()
+ {
+ // SAFETY: All allocation errors will be caught inside `RawTableInner::reserve_rehash`.
+ hint::unreachable_unchecked()
+ }
}
}
}
@@ -1101,28 +1200,45 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
hasher: impl Fn(&T) -> u64,
) -> Result<(), TryReserveError> {
if additional > self.table.growth_left {
- self.reserve_rehash(additional, hasher, Fallibility::Fallible)
+ // SAFETY: The [`RawTableInner`] must already have properly initialized control
+ // bytes since we will never expose RawTable::new_uninitialized in a public API.
+ unsafe { self.reserve_rehash(additional, hasher, Fallibility::Fallible) }
} else {
Ok(())
}
}
/// Out-of-line slow path for `reserve` and `try_reserve`.
+ ///
+ /// # Safety
+ ///
+ /// The [`RawTableInner`] must have properly initialized control bytes,
+ /// otherwise calling this function results in [`undefined behavior`]
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[cold]
#[inline(never)]
- fn reserve_rehash(
+ unsafe fn reserve_rehash(
&mut self,
additional: usize,
hasher: impl Fn(&T) -> u64,
fallibility: Fallibility,
) -> Result<(), TryReserveError> {
unsafe {
+ // SAFETY:
+ // 1. We know for sure that `alloc` and `layout` matches the [`Allocator`] and
+ // [`TableLayout`] that were used to allocate this table.
+ // 2. The `drop` function is the actual drop function of the elements stored in
+ // the table.
+ // 3. The caller ensures that the control bytes of the `RawTableInner`
+ // are already initialized.
self.table.reserve_rehash_inner(
+ &self.alloc,
additional,
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
fallibility,
Self::TABLE_LAYOUT,
- if Self::DATA_NEEDS_DROP {
+ if T::NEEDS_DROP {
Some(mem::transmute(ptr::drop_in_place::<T> as unsafe fn(*mut T)))
} else {
None
@@ -1133,20 +1249,50 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
/// Allocates a new table of a different size and moves the contents of the
/// current table into it.
- fn resize(
+ ///
+ /// # Safety
+ ///
+ /// The [`RawTableInner`] must have properly initialized control bytes,
+ /// otherwise calling this function results in [`undefined behavior`]
+ ///
+ /// The caller of this function must ensure that `capacity >= self.table.items`
+ /// otherwise:
+ ///
+ /// * If `self.table.items != 0`, calling of this function with `capacity`
+ /// equal to 0 (`capacity == 0`) results in [`undefined behavior`].
+ ///
+ /// * If `capacity_to_buckets(capacity) < Group::WIDTH` and
+ /// `self.table.items > capacity_to_buckets(capacity)`
+ /// calling this function results in [`undefined behavior`].
+ ///
+ /// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and
+ /// `self.table.items > capacity_to_buckets(capacity)`
+ /// calling this function are never return (will go into an
+ /// infinite loop).
+ ///
+ /// See [`RawTableInner::find_insert_slot`] for more information.
+ ///
+ /// [`RawTableInner::find_insert_slot`]: RawTableInner::find_insert_slot
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ unsafe fn resize(
&mut self,
capacity: usize,
hasher: impl Fn(&T) -> u64,
fallibility: Fallibility,
) -> Result<(), TryReserveError> {
- unsafe {
- self.table.resize_inner(
- capacity,
- &|table, index| hasher(table.bucket::<T>(index).as_ref()),
- fallibility,
- Self::TABLE_LAYOUT,
- )
- }
+ // SAFETY:
+ // 1. The caller of this function guarantees that `capacity >= self.table.items`.
+ // 2. We know for sure that `alloc` and `layout` matches the [`Allocator`] and
+ // [`TableLayout`] that were used to allocate this table.
+ // 3. The caller ensures that the control bytes of the `RawTableInner`
+ // are already initialized.
+ self.table.resize_inner(
+ &self.alloc,
+ capacity,
+ &|table, index| hasher(table.bucket::<T>(index).as_ref()),
+ fallibility,
+ Self::TABLE_LAYOUT,
+ )
}
/// Inserts a new element into the table, and returns its raw bucket.
@@ -1155,14 +1301,23 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
#[cfg_attr(feature = "inline-more", inline)]
pub fn insert(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> Bucket<T> {
unsafe {
+ // SAFETY:
+ // 1. The [`RawTableInner`] must already have properly initialized control bytes since
+ // we will never expose `RawTable::new_uninitialized` in a public API.
+ //
+ // 2. We reserve additional space (if necessary) right after calling this function.
let mut slot = self.table.find_insert_slot(hash);
- // We can avoid growing the table once we have reached our load
- // factor if we are replacing a tombstone. This works since the
- // number of EMPTY slots does not change in this case.
+ // We can avoid growing the table once we have reached our load factor if we are replacing
+ // a tombstone. This works since the number of EMPTY slots does not change in this case.
+ //
+ // SAFETY: The function is guaranteed to return [`InsertSlot`] that contains an index
+ // in the range `0..=self.buckets()`.
let old_ctrl = *self.table.ctrl(slot.index);
if unlikely(self.table.growth_left == 0 && special_is_empty(old_ctrl)) {
self.reserve(1, hasher);
+ // SAFETY: We know for sure that `RawTableInner` has control bytes
+ // initialized and that there is extra space in the table.
slot = self.table.find_insert_slot(hash);
}
@@ -1261,13 +1416,22 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
) -> Result<Bucket<T>, InsertSlot> {
self.reserve(1, hasher);
- match self
- .table
- .find_or_find_insert_slot_inner(hash, &mut |index| unsafe {
- eq(self.bucket(index).as_ref())
- }) {
- Ok(index) => Ok(unsafe { self.bucket(index) }),
- Err(slot) => Err(slot),
+ unsafe {
+ // SAFETY:
+ // 1. We know for sure that there is at least one empty `bucket` in the table.
+ // 2. The [`RawTableInner`] must already have properly initialized control bytes since we will
+ // never expose `RawTable::new_uninitialized` in a public API.
+ // 3. The `find_or_find_insert_slot_inner` function returns the `index` of only the full bucket,
+ // which is in the range `0..self.buckets()` (since there is at least one empty `bucket` in
+ // the table), so calling `self.bucket(index)` and `Bucket::as_ref` is safe.
+ match self
+ .table
+ .find_or_find_insert_slot_inner(hash, &mut |index| eq(self.bucket(index).as_ref()))
+ {
+ // SAFETY: See explanation above.
+ Ok(index) => Ok(self.bucket(index)),
+ Err(slot) => Err(slot),
+ }
}
}
@@ -1292,14 +1456,23 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
/// Searches for an element in the table.
#[inline]
pub fn find(&self, hash: u64, mut eq: impl FnMut(&T) -> bool) -> Option<Bucket<T>> {
- let result = self.table.find_inner(hash, &mut |index| unsafe {
- eq(self.bucket(index).as_ref())
- });
-
- // Avoid `Option::map` because it bloats LLVM IR.
- match result {
- Some(index) => Some(unsafe { self.bucket(index) }),
- None => None,
+ unsafe {
+ // SAFETY:
+ // 1. The [`RawTableInner`] must already have properly initialized control bytes since we
+ // will never expose `RawTable::new_uninitialized` in a public API.
+ // 1. The `find_inner` function returns the `index` of only the full bucket, which is in
+ // the range `0..self.buckets()`, so calling `self.bucket(index)` and `Bucket::as_ref`
+ // is safe.
+ let result = self
+ .table
+ .find_inner(hash, &mut |index| eq(self.bucket(index).as_ref()));
+
+ // Avoid `Option::map` because it bloats LLVM IR.
+ match result {
+ // SAFETY: See explanation above.
+ Some(index) => Some(self.bucket(index)),
+ None => None,
+ }
}
}
@@ -1423,11 +1596,11 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
/// struct, we have to make the `iter` method unsafe.
#[inline]
pub unsafe fn iter(&self) -> RawIter<T> {
- let data = Bucket::from_base_index(self.data_end(), 0);
- RawIter {
- iter: RawIterRange::new(self.table.ctrl.as_ptr(), data, self.table.buckets()),
- items: self.table.items,
- }
+ // SAFETY:
+ // 1. The caller must uphold the safety contract for `iter` method.
+ // 2. The [`RawTableInner`] must already have properly initialized control bytes since
+ // we will never expose RawTable::new_uninitialized in a public API.
+ self.table.iter()
}
/// Returns an iterator over occupied buckets that could match a given hash.
@@ -1467,8 +1640,8 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
debug_assert_eq!(iter.len(), self.len());
RawDrain {
iter,
- table: ManuallyDrop::new(mem::replace(self, Self::new_in(self.table.alloc.clone()))),
- orig_table: NonNull::from(self),
+ table: mem::replace(&mut self.table, RawTableInner::NEW),
+ orig_table: NonNull::from(&mut self.table),
marker: PhantomData,
}
}
@@ -1482,20 +1655,18 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
pub unsafe fn into_iter_from(self, iter: RawIter<T>) -> RawIntoIter<T, A> {
debug_assert_eq!(iter.len(), self.len());
- let alloc = self.table.alloc.clone();
let allocation = self.into_allocation();
RawIntoIter {
iter,
allocation,
marker: PhantomData,
- alloc,
}
}
/// Converts the table into a raw allocation. The contents of the table
/// should be dropped using a `RawIter` before freeing the allocation.
#[cfg_attr(feature = "inline-more", inline)]
- pub(crate) fn into_allocation(self) -> Option<(NonNull<u8>, Layout)> {
+ pub(crate) fn into_allocation(self) -> Option<(NonNull<u8>, Layout, A)> {
let alloc = if self.table.is_empty_singleton() {
None
} else {
@@ -1508,6 +1679,7 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
Some((
unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset)) },
layout,
+ unsafe { ptr::read(&self.alloc) },
))
};
mem::forget(self);
@@ -1515,39 +1687,40 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
}
}
-unsafe impl<T, A: Allocator + Clone> Send for RawTable<T, A>
+unsafe impl<T, A: Allocator> Send for RawTable<T, A>
where
T: Send,
A: Send,
{
}
-unsafe impl<T, A: Allocator + Clone> Sync for RawTable<T, A>
+unsafe impl<T, A: Allocator> Sync for RawTable<T, A>
where
T: Sync,
A: Sync,
{
}
-impl<A> RawTableInner<A> {
+impl RawTableInner {
+ const NEW: Self = RawTableInner::new();
+
/// Creates a new empty hash table without allocating any memory.
///
/// In effect this returns a table with exactly 1 bucket. However we can
/// leave the data pointer dangling since that bucket is never accessed
/// due to our load factor forcing us to always have at least 1 free bucket.
#[inline]
- const fn new_in(alloc: A) -> Self {
+ const fn new() -> Self {
Self {
// Be careful to cast the entire slice to a raw pointer.
ctrl: unsafe { NonNull::new_unchecked(Group::static_empty() as *const _ as *mut u8) },
bucket_mask: 0,
items: 0,
growth_left: 0,
- alloc,
}
}
}
-impl<A: Allocator + Clone> RawTableInner<A> {
+impl RawTableInner {
/// Allocates a new [`RawTableInner`] with the given number of buckets.
/// The control bytes and buckets are left uninitialized.
///
@@ -1561,12 +1734,15 @@ impl<A: Allocator + Clone> RawTableInner<A> {
///
/// [`Allocator`]: https://doc.rust-lang.org/alloc/alloc/trait.Allocator.html
#[cfg_attr(feature = "inline-more", inline)]
- unsafe fn new_uninitialized(
- alloc: A,
+ unsafe fn new_uninitialized<A>(
+ alloc: &A,
table_layout: TableLayout,
buckets: usize,
fallibility: Fallibility,
- ) -> Result<Self, TryReserveError> {
+ ) -> Result<Self, TryReserveError>
+ where
+ A: Allocator,
+ {
debug_assert!(buckets.is_power_of_two());
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
@@ -1575,7 +1751,7 @@ impl<A: Allocator + Clone> RawTableInner<A> {
None => return Err(fallibility.capacity_overflow()),
};
- let ptr: NonNull<u8> = match do_alloc(&alloc, layout) {
+ let ptr: NonNull<u8> = match do_alloc(alloc, layout) {
Ok(block) => block.cast(),
Err(_) => return Err(fallibility.alloc_err(layout)),
};
@@ -1587,7 +1763,6 @@ impl<A: Allocator + Clone> RawTableInner<A> {
bucket_mask: buckets - 1,
items: 0,
growth_left: bucket_mask_to_capacity(buckets - 1),
- alloc,
})
}
@@ -1596,14 +1771,17 @@ impl<A: Allocator + Clone> RawTableInner<A> {
///
/// All the control bytes are initialized with the [`EMPTY`] bytes.
#[inline]
- fn fallible_with_capacity(
- alloc: A,
+ fn fallible_with_capacity<A>(
+ alloc: &A,
table_layout: TableLayout,
capacity: usize,
fallibility: Fallibility,
- ) -> Result<Self, TryReserveError> {
+ ) -> Result<Self, TryReserveError>
+ where
+ A: Allocator,
+ {
if capacity == 0 {
- Ok(Self::new_in(alloc))
+ Ok(Self::NEW)
} else {
// SAFETY: We checked that we could successfully allocate the new table, and then
// initialized all control bytes with the constant `EMPTY` byte.
@@ -1622,36 +1800,95 @@ impl<A: Allocator + Clone> RawTableInner<A> {
}
}
- /// Fixes up an insertion slot due to false positives for groups smaller than the group width.
- /// This must only be used on insertion slots found by `find_insert_slot_in_group`.
+ /// Allocates a new [`RawTableInner`] with at least enough capacity for inserting
+ /// the given number of elements without reallocating.
+ ///
+ /// Panics if the new capacity exceeds [`isize::MAX`] bytes and [`abort`] the program
+ /// in case of allocation error. Use [`fallible_with_capacity`] instead if you want to
+ /// handle memory allocation failure.
+ ///
+ /// All the control bytes are initialized with the [`EMPTY`] bytes.
+ ///
+ /// [`fallible_with_capacity`]: RawTableInner::fallible_with_capacity
+ /// [`abort`]: https://doc.rust-lang.org/alloc/alloc/fn.handle_alloc_error.html
+ fn with_capacity<A>(alloc: &A, table_layout: TableLayout, capacity: usize) -> Self
+ where
+ A: Allocator,
+ {
+ // Avoid `Result::unwrap_or_else` because it bloats LLVM IR.
+ match Self::fallible_with_capacity(alloc, table_layout, capacity, Fallibility::Infallible) {
+ Ok(table_inner) => table_inner,
+ // SAFETY: All allocation errors will be caught inside `RawTableInner::new_uninitialized`.
+ Err(_) => unsafe { hint::unreachable_unchecked() },
+ }
+ }
+
+ /// Fixes up an insertion slot returned by the [`RawTableInner::find_insert_slot_in_group`] method.
+ ///
+ /// In tables smaller than the group width (`self.buckets() < Group::WIDTH`), trailing control
+ /// bytes outside the range of the table are filled with [`EMPTY`] entries. These will unfortunately
+ /// trigger a match of [`RawTableInner::find_insert_slot_in_group`] function. This is because
+ /// the `Some(bit)` returned by `group.match_empty_or_deleted().lowest_set_bit()` after masking
+ /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket that is already occupied.
+ /// We detect this situation here and perform a second scan starting at the beginning of the table.
+ /// This second scan is guaranteed to find an empty slot (due to the load factor) before hitting the
+ /// trailing control bytes (containing [`EMPTY`] bytes).
+ ///
+ /// If this function is called correctly, it is guaranteed to return [`InsertSlot`] with an
+ /// index of an empty or deleted bucket in the range `0..self.buckets()` (see `Warning` and
+ /// `Safety`).
+ ///
+ /// # Warning
+ ///
+ /// The table must have at least 1 empty or deleted `bucket`, otherwise if the table is less than
+ /// the group width (`self.buckets() < Group::WIDTH`) this function returns an index outside of the
+ /// table indices range `0..self.buckets()` (`0..=self.bucket_mask`). Attempt to write data at that
+ /// index will cause immediate [`undefined behavior`].
+ ///
+ /// # Safety
+ ///
+ /// The safety rules are directly derived from the safety rules for [`RawTableInner::ctrl`] method.
+ /// Thus, in order to uphold those safety contracts, as well as for the correct logic of the work
+ /// of this crate, the following rules are necessary and sufficient:
+ ///
+ /// * The [`RawTableInner`] must have properly initialized control bytes otherwise calling this
+ /// function results in [`undefined behavior`].
+ ///
+ /// * This function must only be used on insertion slots found by [`RawTableInner::find_insert_slot_in_group`]
+ /// (after the `find_insert_slot_in_group` function, but before insertion into the table).
+ ///
+ /// * The `index` must not be greater than the `self.bucket_mask`, i.e. `(index + 1) <= self.buckets()`
+ /// (this one is provided by the [`RawTableInner::find_insert_slot_in_group`] function).
+ ///
+ /// Calling this function with an index not provided by [`RawTableInner::find_insert_slot_in_group`]
+ /// may result in [`undefined behavior`] even if the index satisfies the safety rules of the
+ /// [`RawTableInner::ctrl`] function (`index < self.bucket_mask + 1 + Group::WIDTH`).
+ ///
+ /// [`RawTableInner::ctrl`]: RawTableInner::ctrl
+ /// [`RawTableInner::find_insert_slot_in_group`]: RawTableInner::find_insert_slot_in_group
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
unsafe fn fix_insert_slot(&self, mut index: usize) -> InsertSlot {
- // In tables smaller than the group width
- // (self.buckets() < Group::WIDTH), trailing control
- // bytes outside the range of the table are filled with
- // EMPTY entries. These will unfortunately trigger a
- // match, but once masked may point to a full bucket that
- // is already occupied. We detect this situation here and
- // perform a second scan starting at the beginning of the
- // table. This second scan is guaranteed to find an empty
- // slot (due to the load factor) before hitting the trailing
- // control bytes (containing EMPTY).
+ // SAFETY: The caller of this function ensures that `index` is in the range `0..=self.bucket_mask`.
if unlikely(self.is_bucket_full(index)) {
debug_assert!(self.bucket_mask < Group::WIDTH);
// SAFETY:
//
- // * We are in range and `ptr = self.ctrl(0)` are valid for reads
- // and properly aligned, because the table is already allocated
- // (see `TableLayout::calculate_layout_for` and `ptr::read`);
+ // * Since the caller of this function ensures that the control bytes are properly
+ // initialized and `ptr = self.ctrl(0)` points to the start of the array of control
+ // bytes, therefore: `ctrl` is valid for reads, properly aligned to `Group::WIDTH`
+ // and points to the properly initialized control bytes (see also
+ // `TableLayout::calculate_layout_for` and `ptr::read`);
//
- // * For tables larger than the group width (self.buckets() >= Group::WIDTH),
- // we will never end up in the given branch, since
- // `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group` cannot
- // return a full bucket index. For tables smaller than the group width, calling the
- // `unwrap_unchecked` function is also
- // safe, as the trailing control bytes outside the range of the table are filled
- // with EMPTY bytes, so this second scan either finds an empty slot (due to the
- // load factor) or hits the trailing control bytes (containing EMPTY).
+ // * Because the caller of this function ensures that the index was provided by the
+ // `self.find_insert_slot_in_group()` function, so for for tables larger than the
+ // group width (self.buckets() >= Group::WIDTH), we will never end up in the given
+ // branch, since `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group`
+ // cannot return a full bucket index. For tables smaller than the group width, calling
+ // the `unwrap_unchecked` function is also safe, as the trailing control bytes outside
+ // the range of the table are filled with EMPTY bytes (and we know for sure that there
+ // is at least one FULL bucket), so this second scan either finds an empty slot (due to
+ // the load factor) or hits the trailing control bytes (containing EMPTY).
index = Group::load_aligned(self.ctrl(0))
.match_empty_or_deleted()
.lowest_set_bit()
@@ -1661,25 +1898,62 @@ impl<A: Allocator + Clone> RawTableInner<A> {
}
/// Finds the position to insert something in a group.
- /// This may have false positives and must be fixed up with `fix_insert_slot` before it's used.
+ ///
+ /// **This may have false positives and must be fixed up with `fix_insert_slot`
+ /// before it's used.**
+ ///
+ /// The function is guaranteed to return the index of an empty or deleted [`Bucket`]
+ /// in the range `0..self.buckets()` (`0..=self.bucket_mask`).
#[inline]
fn find_insert_slot_in_group(&self, group: &Group, probe_seq: &ProbeSeq) -> Option<usize> {
let bit = group.match_empty_or_deleted().lowest_set_bit();
if likely(bit.is_some()) {
+ // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number
+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
Some((probe_seq.pos + bit.unwrap()) & self.bucket_mask)
} else {
None
}
}
- /// Searches for an element in the table, or a potential slot where that element could be
- /// inserted.
+ /// Searches for an element in the table, or a potential slot where that element could
+ /// be inserted (an empty or deleted [`Bucket`] index).
///
/// This uses dynamic dispatch to reduce the amount of code generated, but that is
/// eliminated by LLVM optimizations.
+ ///
+ /// This function does not make any changes to the `data` part of the table, or any
+ /// changes to the `items` or `growth_left` field of the table.
+ ///
+ /// The table must have at least 1 empty or deleted `bucket`, otherwise, if the
+ /// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`, this function
+ /// will never return (will go into an infinite loop) for tables larger than the group
+ /// width, or return an index outside of the table indices range if the table is less
+ /// than the group width.
+ ///
+ /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool`
+ /// function with only `FULL` buckets' indices and return the `index` of the found
+ /// element (as `Ok(index)`). If the element is not found and there is at least 1
+ /// empty or deleted [`Bucket`] in the table, the function is guaranteed to return
+ /// [InsertSlot] with an index in the range `0..self.buckets()`, but in any case,
+ /// if this function returns [`InsertSlot`], it will contain an index in the range
+ /// `0..=self.buckets()`.
+ ///
+ /// # Safety
+ ///
+ /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling
+ /// this function results in [`undefined behavior`].
+ ///
+ /// Attempt to write data at the [`InsertSlot`] returned by this function when the table is
+ /// less than the group width and if there was not at least one empty or deleted bucket in
+ /// the table will cause immediate [`undefined behavior`]. This is because in this case the
+ /// function will return `self.bucket_mask + 1` as an index due to the trailing [`EMPTY]
+ /// control bytes outside the table range.
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
- fn find_or_find_insert_slot_inner(
+ unsafe fn find_or_find_insert_slot_inner(
&self,
hash: u64,
eq: &mut dyn FnMut(usize) -> bool,
@@ -1690,6 +1964,21 @@ impl<A: Allocator + Clone> RawTableInner<A> {
let mut probe_seq = self.probe_seq(hash);
loop {
+ // SAFETY:
+ // * Caller of this function ensures that the control bytes are properly initialized.
+ //
+ // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1`
+ // of the table due to masking with `self.bucket_mask` and also because mumber of
+ // buckets is a power of two (see `self.probe_seq` function).
+ //
+ // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to
+ // call `Group::load` due to the extended control bytes range, which is
+ // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control
+ // byte will never be read for the allocated table);
+ //
+ // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
+ // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
+ // bytes, which is safe (see RawTableInner::new).
let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
for bit in group.match_byte(h2_hash) {
@@ -1713,6 +2002,10 @@ impl<A: Allocator + Clone> RawTableInner<A> {
// least one. For tables smaller than the group width, there will still be an
// empty element in the current (and only) group due to the load factor.
unsafe {
+ // SAFETY:
+ // * Caller of this function ensures that the control bytes are properly initialized.
+ //
+ // * We use this function with the slot / index found by `self.find_insert_slot_in_group`
return Err(self.fix_insert_slot(insert_slot.unwrap_unchecked()));
}
}
@@ -1721,13 +2014,68 @@ impl<A: Allocator + Clone> RawTableInner<A> {
}
}
- /// Searches for an empty or deleted bucket which is suitable for inserting
- /// a new element and sets the hash for that slot.
+ /// Searches for an empty or deleted bucket which is suitable for inserting a new
+ /// element and sets the hash for that slot. Returns an index of that slot and the
+ /// old control byte stored in the found index.
+ ///
+ /// This function does not check if the given element exists in the table. Also,
+ /// this function does not check if there is enough space in the table to insert
+ /// a new element. Caller of the funtion must make ensure that the table has at
+ /// least 1 empty or deleted `bucket`, otherwise this function will never return
+ /// (will go into an infinite loop) for tables larger than the group width, or
+ /// return an index outside of the table indices range if the table is less than
+ /// the group width.
///
- /// There must be at least 1 empty bucket in the table.
+ /// If there is at least 1 empty or deleted `bucket` in the table, the function is
+ /// guaranteed to return an `index` in the range `0..self.buckets()`, but in any case,
+ /// if this function returns an `index` it will be in the range `0..=self.buckets()`.
+ ///
+ /// This function does not make any changes to the `data` parts of the table,
+ /// or any changes to the the `items` or `growth_left` field of the table.
+ ///
+ /// # Safety
+ ///
+ /// The safety rules are directly derived from the safety rules for the
+ /// [`RawTableInner::set_ctrl_h2`] and [`RawTableInner::find_insert_slot`] methods.
+ /// Thus, in order to uphold the safety contracts for that methods, as well as for
+ /// the correct logic of the work of this crate, you must observe the following rules
+ /// when calling this function:
+ ///
+ /// * The [`RawTableInner`] has already been allocated and has properly initialized
+ /// control bytes otherwise calling this function results in [`undefined behavior`].
+ ///
+ /// * The caller of this function must ensure that the "data" parts of the table
+ /// will have an entry in the returned index (matching the given hash) right
+ /// after calling this function.
+ ///
+ /// Attempt to write data at the `index` returned by this function when the table is
+ /// less than the group width and if there was not at least one empty or deleted bucket in
+ /// the table will cause immediate [`undefined behavior`]. This is because in this case the
+ /// function will return `self.bucket_mask + 1` as an index due to the trailing [`EMPTY]
+ /// control bytes outside the table range.
+ ///
+ /// The caller must independently increase the `items` field of the table, and also,
+ /// if the old control byte was [`EMPTY`], then decrease the table's `growth_left`
+ /// field, and do not change it if the old control byte was [`DELETED`].
+ ///
+ /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
+ /// or saving `element` from / into the [`RawTable`] / [`RawTableInner`].
+ ///
+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ /// [`RawTableInner::ctrl`]: RawTableInner::ctrl
+ /// [`RawTableInner::set_ctrl_h2`]: RawTableInner::set_ctrl_h2
+ /// [`RawTableInner::find_insert_slot`]: RawTableInner::find_insert_slot
#[inline]
- unsafe fn prepare_insert_slot(&self, hash: u64) -> (usize, u8) {
- let index = self.find_insert_slot(hash).index;
+ unsafe fn prepare_insert_slot(&mut self, hash: u64) -> (usize, u8) {
+ // SAFETY: Caller of this function ensures that the control bytes are properly initialized.
+ let index: usize = self.find_insert_slot(hash).index;
+ // SAFETY:
+ // 1. The `find_insert_slot` function either returns an `index` less than or
+ // equal to `self.buckets() = self.bucket_mask + 1` of the table, or never
+ // returns if it cannot find an empty or deleted slot.
+ // 2. The caller of this function guarantees that the table has already been
+ // allocated
let old_ctrl = *self.ctrl(index);
self.set_ctrl_h2(index, hash);
(index, old_ctrl)
@@ -1744,24 +2092,33 @@ impl<A: Allocator + Clone> RawTableInner<A> {
/// width, or return an index outside of the table indices range if the table is less
/// than the group width.
///
- /// # Note
+ /// If there is at least 1 empty or deleted `bucket` in the table, the function is
+ /// guaranteed to return [`InsertSlot`] with an index in the range `0..self.buckets()`,
+ /// but in any case, if this function returns [`InsertSlot`], it will contain an index
+ /// in the range `0..=self.buckets()`.
///
- /// Calling this function is always safe, but attempting to write data at
- /// the index returned by this function when the table is less than the group width
- /// and if there was not at least one empty bucket in the table will cause immediate
- /// [`undefined behavior`]. This is because in this case the function will return
- /// `self.bucket_mask + 1` as an index due to the trailing EMPTY control bytes outside
- /// the table range.
+ /// # Safety
+ ///
+ /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling
+ /// this function results in [`undefined behavior`].
+ ///
+ /// Attempt to write data at the [`InsertSlot`] returned by this function when the table is
+ /// less than the group width and if there was not at least one empty or deleted bucket in
+ /// the table will cause immediate [`undefined behavior`]. This is because in this case the
+ /// function will return `self.bucket_mask + 1` as an index due to the trailing [`EMPTY]
+ /// control bytes outside the table range.
///
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
- fn find_insert_slot(&self, hash: u64) -> InsertSlot {
+ unsafe fn find_insert_slot(&self, hash: u64) -> InsertSlot {
let mut probe_seq = self.probe_seq(hash);
loop {
// SAFETY:
+ // * Caller of this function ensures that the control bytes are properly initialized.
+ //
// * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1`
// of the table due to masking with `self.bucket_mask` and also because mumber of
- // buckets is a power of two (see comment for masking below).
+ // buckets is a power of two (see `self.probe_seq` function).
//
// * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to
// call `Group::load` due to the extended control bytes range, which is
@@ -1770,12 +2127,16 @@ impl<A: Allocator + Clone> RawTableInner<A> {
//
// * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
// always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
- // bytes, which is safe (see RawTableInner::new_in).
- unsafe {
- let group = Group::load(self.ctrl(probe_seq.pos));
- let index = self.find_insert_slot_in_group(&group, &probe_seq);
+ // bytes, which is safe (see RawTableInner::new).
+ let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
- if likely(index.is_some()) {
+ let index = self.find_insert_slot_in_group(&group, &probe_seq);
+ if likely(index.is_some()) {
+ // SAFETY:
+ // * Caller of this function ensures that the control bytes are properly initialized.
+ //
+ // * We use this function with the slot / index found by `self.find_insert_slot_in_group`
+ unsafe {
return self.fix_insert_slot(index.unwrap_unchecked());
}
}
@@ -1793,13 +2154,27 @@ impl<A: Allocator + Clone> RawTableInner<A> {
/// The table must have at least 1 empty `bucket`, otherwise, if the
/// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`,
/// this function will also never return (will go into an infinite loop).
+ ///
+ /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool`
+ /// function with only `FULL` buckets' indices and return the `index` of the found
+ /// element as `Some(index)`, so the index will always be in the range
+ /// `0..self.buckets()`.
+ ///
+ /// # Safety
+ ///
+ /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling
+ /// this function results in [`undefined behavior`].
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline(always)]
- fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option<usize> {
+ unsafe fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option<usize> {
let h2_hash = h2(hash);
let mut probe_seq = self.probe_seq(hash);
loop {
// SAFETY:
+ // * Caller of this function ensures that the control bytes are properly initialized.
+ //
// * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1`
// of the table due to masking with `self.bucket_mask`.
//
@@ -1853,6 +2228,9 @@ impl<A: Allocator + Clone> RawTableInner<A> {
/// to do during the first insert due to tombstones). If the caller does not do
/// this, then calling this function may result in a memory leak.
///
+ /// * The [`RawTableInner`] must have properly initialized control bytes otherwise
+ /// calling this function results in [`undefined behavior`].
+ ///
/// Calling this function on a table that has not been allocated results in
/// [`undefined behavior`].
///
@@ -1900,6 +2278,227 @@ impl<A: Allocator + Clone> RawTableInner<A> {
}
}
+ /// Returns an iterator over every element in the table.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result
+ /// is [`undefined behavior`]:
+ ///
+ /// * The caller has to ensure that the `RawTableInner` outlives the
+ /// `RawIter`. Because we cannot make the `next` method unsafe on
+ /// the `RawIter` struct, we have to make the `iter` method unsafe.
+ ///
+ /// * The [`RawTableInner`] must have properly initialized control bytes.
+ ///
+ /// The type `T` must be the actual type of the elements stored in the table,
+ /// otherwise using the returned [`RawIter`] results in [`undefined behavior`].
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ #[inline]
+ unsafe fn iter<T>(&self) -> RawIter<T> {
+ // SAFETY:
+ // 1. Since the caller of this function ensures that the control bytes
+ // are properly initialized and `self.data_end()` points to the start
+ // of the array of control bytes, therefore: `ctrl` is valid for reads,
+ // properly aligned to `Group::WIDTH` and points to the properly initialized
+ // control bytes.
+ // 2. `data` bucket index in the table is equal to the `ctrl` index (i.e.
+ // equal to zero).
+ // 3. We pass the exact value of buckets of the table to the function.
+ //
+ // `ctrl` points here (to the start
+ // of the first control byte `CT0`)
+ // ∨
+ // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
+ // \________ ________/
+ // \/
+ // `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
+ //
+ // where: T0...T_n - our stored data;
+ // CT0...CT_n - control bytes or metadata for `data`.
+ // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
+ // with loading `Group` bytes from the heap works properly, even if the result
+ // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
+ // `RawTableInner::set_ctrl` function.
+ //
+ // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
+ let data = Bucket::from_base_index(self.data_end(), 0);
+ RawIter {
+ // SAFETY: See explanation above
+ iter: RawIterRange::new(self.ctrl.as_ptr(), data, self.buckets()),
+ items: self.items,
+ }
+ }
+
+ /// Executes the destructors (if any) of the values stored in the table.
+ ///
+ /// # Note
+ ///
+ /// This function does not erase the control bytes of the table and does
+ /// not make any changes to the `items` or `growth_left` fields of the
+ /// table. If necessary, the caller of this function must manually set
+ /// up these table fields, for example using the [`clear_no_drop`] function.
+ ///
+ /// Be careful during calling this function, because drop function of
+ /// the elements can panic, and this can leave table in an inconsistent
+ /// state.
+ ///
+ /// # Safety
+ ///
+ /// The type `T` must be the actual type of the elements stored in the table,
+ /// otherwise calling this function may result in [`undefined behavior`].
+ ///
+ /// If `T` is a type that should be dropped and **the table is not empty**,
+ /// calling this function more than once results in [`undefined behavior`].
+ ///
+ /// If `T` is not [`Copy`], attempting to use values stored in the table after
+ /// calling this function may result in [`undefined behavior`].
+ ///
+ /// It is safe to call this function on a table that has not been allocated,
+ /// on a table with uninitialized control bytes, and on a table with no actual
+ /// data but with `Full` control bytes if `self.items == 0`.
+ ///
+ /// See also [`Bucket::drop`] / [`Bucket::as_ptr`] methods, for more information
+ /// about of properly removing or saving `element` from / into the [`RawTable`] /
+ /// [`RawTableInner`].
+ ///
+ /// [`Bucket::drop`]: Bucket::drop
+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
+ /// [`clear_no_drop`]: RawTableInner::clear_no_drop
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ unsafe fn drop_elements<T>(&mut self) {
+ // Check that `self.items != 0`. Protects against the possibility
+ // of creating an iterator on an table with uninitialized control bytes.
+ if T::NEEDS_DROP && self.items != 0 {
+ // SAFETY: We know for sure that RawTableInner will outlive the
+ // returned `RawIter` iterator, and the caller of this function
+ // must uphold the safety contract for `drop_elements` method.
+ for item in self.iter::<T>() {
+ // SAFETY: The caller must uphold the safety contract for
+ // `drop_elements` method.
+ item.drop();
+ }
+ }
+ }
+
+ /// Executes the destructors (if any) of the values stored in the table and than
+ /// deallocates the table.
+ ///
+ /// # Note
+ ///
+ /// Calling this function automatically makes invalid (dangling) all instances of
+ /// buckets ([`Bucket`]) and makes invalid (dangling) the `ctrl` field of the table.
+ ///
+ /// This function does not make any changes to the `bucket_mask`, `items` or `growth_left`
+ /// fields of the table. If necessary, the caller of this function must manually set
+ /// up these table fields.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is [`undefined behavior`]:
+ ///
+ /// * Calling this function more than once;
+ ///
+ /// * The type `T` must be the actual type of the elements stored in the table.
+ ///
+ /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
+ /// to allocate this table.
+ ///
+ /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that
+ /// was used to allocate this table.
+ ///
+ /// The caller of this function should pay attention to the possibility of the
+ /// elements' drop function panicking, because this:
+ ///
+ /// * May leave the table in an inconsistent state;
+ ///
+ /// * Memory is never deallocated, so a memory leak may occur.
+ ///
+ /// Attempt to use the `ctrl` field of the table (dereference) after calling this
+ /// function results in [`undefined behavior`].
+ ///
+ /// It is safe to call this function on a table that has not been allocated,
+ /// on a table with uninitialized control bytes, and on a table with no actual
+ /// data but with `Full` control bytes if `self.items == 0`.
+ ///
+ /// See also [`RawTableInner::drop_elements`] or [`RawTableInner::free_buckets`]
+ /// for more information.
+ ///
+ /// [`RawTableInner::drop_elements`]: RawTableInner::drop_elements
+ /// [`RawTableInner::free_buckets`]: RawTableInner::free_buckets
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ unsafe fn drop_inner_table<T, A: Allocator>(&mut self, alloc: &A, table_layout: TableLayout) {
+ if !self.is_empty_singleton() {
+ unsafe {
+ // SAFETY: The caller must uphold the safety contract for `drop_inner_table` method.
+ self.drop_elements::<T>();
+ // SAFETY:
+ // 1. We have checked that our table is allocated.
+ // 2. The caller must uphold the safety contract for `drop_inner_table` method.
+ self.free_buckets(alloc, table_layout);
+ }
+ }
+ }
+
+ /// Returns a pointer to an element in the table (convenience for
+ /// `Bucket::from_base_index(self.data_end::<T>(), index)`).
+ ///
+ /// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket<T>`],
+ /// otherwise using it may result in [`undefined behavior`].
+ ///
+ /// # Safety
+ ///
+ /// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived from the
+ /// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling
+ /// this function, the following safety rules must be observed:
+ ///
+ /// * The table must already be allocated;
+ ///
+ /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
+ /// function, i.e. `(index + 1) <= self.buckets()`.
+ ///
+ /// * The type `T` must be the actual type of the elements stored in the table, otherwise
+ /// using the returned [`Bucket`] may result in [`undefined behavior`].
+ ///
+ /// It is safe to call this function with index of zero (`index == 0`) on a table that has
+ /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
+ ///
+ /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
+ /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
+ /// `(index + 1) <= self.buckets()`.
+ ///
+ /// ```none
+ /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
+ /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
+ /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
+ ///
+ /// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
+ /// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`])
+ /// |
+ /// | `base = table.data_end::<T>()` points here
+ /// | (to the start of CT0 or to the end of T0)
+ /// v v
+ /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
+ /// ^ \__________ __________/
+ /// `table.bucket(3)` returns a pointer that points \/
+ /// here in the `data` part of the `RawTableInner` additional control bytes
+ /// (to the end of T3) `m = Group::WIDTH - 1`
+ ///
+ /// where: T0...T_n - our stored data;
+ /// CT0...CT_n - control bytes or metadata for `data`;
+ /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
+ /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
+ /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
+ ///
+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
+ /// ```
+ ///
+ /// [`Bucket::from_base_index`]: Bucket::from_base_index
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
unsafe fn bucket<T>(&self, index: usize) -> Bucket<T> {
debug_assert_ne!(self.bucket_mask, 0);
@@ -1907,6 +2506,52 @@ impl<A: Allocator + Clone> RawTableInner<A> {
Bucket::from_base_index(self.data_end(), index)
}
+ /// Returns a raw `*mut u8` pointer to the start of the `data` element in the table
+ /// (convenience for `self.data_end::<u8>().as_ptr().sub((index + 1) * size_of)`).
+ ///
+ /// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`,
+ /// otherwise using it may result in [`undefined behavior`].
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is [`undefined behavior`]:
+ ///
+ /// * The table must already be allocated;
+ ///
+ /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
+ /// function, i.e. `(index + 1) <= self.buckets()`;
+ ///
+ /// * The `size_of` must be equal to the size of the elements stored in the table;
+ ///
+ /// ```none
+ /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
+ /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
+ /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
+ ///
+ /// `table.bucket_ptr(3, mem::size_of::<T>())` returns a pointer that points here in the
+ /// `data` part of the `RawTableInner`, i.e. to the start of T3
+ /// |
+ /// | `base = table.data_end::<u8>()` points here
+ /// | (to the start of CT0 or to the end of T0)
+ /// v v
+ /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
+ /// \__________ __________/
+ /// \/
+ /// additional control bytes
+ /// `m = Group::WIDTH - 1`
+ ///
+ /// where: T0...T_n - our stored data;
+ /// CT0...CT_n - control bytes or metadata for `data`;
+ /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
+ /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
+ /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
+ ///
+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
+ /// ```
+ ///
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 {
debug_assert_ne!(self.bucket_mask, 0);
@@ -1915,9 +2560,47 @@ impl<A: Allocator + Clone> RawTableInner<A> {
base.sub((index + 1) * size_of)
}
+ /// Returns pointer to one past last `data` element in the the table as viewed from
+ /// the start point of the allocation (convenience for `self.ctrl.cast()`).
+ ///
+ /// This function actually returns a pointer to the end of the `data element` at
+ /// index "0" (zero).
+ ///
+ /// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull<T>`],
+ /// otherwise using it may result in [`undefined behavior`].
+ ///
+ /// # Note
+ ///
+ /// The type `T` must be the actual type of the elements stored in the table, otherwise
+ /// using the returned [`NonNull<T>`] may result in [`undefined behavior`].
+ ///
+ /// ```none
+ /// `table.data_end::<T>()` returns pointer that points here
+ /// (to the end of `T0`)
+ /// ∨
+ /// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
+ /// \________ ________/
+ /// \/
+ /// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
+ ///
+ /// where: T0...T_n - our stored data;
+ /// CT0...CT_n - control bytes or metadata for `data`.
+ /// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
+ /// with loading `Group` bytes from the heap works properly, even if the result
+ /// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
+ /// `RawTableInner::set_ctrl` function.
+ ///
+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
+ /// ```
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
- unsafe fn data_end<T>(&self) -> NonNull<T> {
- NonNull::new_unchecked(self.ctrl.as_ptr().cast())
+ fn data_end<T>(&self) -> NonNull<T> {
+ unsafe {
+ // SAFETY: `self.ctrl` is `NonNull`, so casting it is safe
+ NonNull::new_unchecked(self.ctrl.as_ptr().cast())
+ }
}
/// Returns an iterator-like object for a probe sequence on the table.
@@ -1928,6 +2611,8 @@ impl<A: Allocator + Clone> RawTableInner<A> {
#[inline]
fn probe_seq(&self, hash: u64) -> ProbeSeq {
ProbeSeq {
+ // This is the same as `hash as usize % self.buckets()` because the number
+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
pos: h1(hash) & self.bucket_mask,
stride: 0,
}
@@ -1991,7 +2676,7 @@ impl<A: Allocator + Clone> RawTableInner<A> {
/// [`Bucket::as_ptr`]: Bucket::as_ptr
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
- unsafe fn set_ctrl_h2(&self, index: usize, hash: u64) {
+ unsafe fn set_ctrl_h2(&mut self, index: usize, hash: u64) {
// SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl_h2`]
self.set_ctrl(index, h2(hash));
}
@@ -2025,7 +2710,7 @@ impl<A: Allocator + Clone> RawTableInner<A> {
/// [`Bucket::as_ptr`]: Bucket::as_ptr
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
- unsafe fn replace_ctrl_h2(&self, index: usize, hash: u64) -> u8 {
+ unsafe fn replace_ctrl_h2(&mut self, index: usize, hash: u64) -> u8 {
// SAFETY: The caller must uphold the safety rules for the [`RawTableInner::replace_ctrl_h2`]
let prev_ctrl = *self.ctrl(index);
self.set_ctrl_h2(index, hash);
@@ -2057,9 +2742,12 @@ impl<A: Allocator + Clone> RawTableInner<A> {
/// [`Bucket::as_ptr`]: Bucket::as_ptr
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
- unsafe fn set_ctrl(&self, index: usize, ctrl: u8) {
+ unsafe fn set_ctrl(&mut self, index: usize, ctrl: u8) {
// Replicate the first Group::WIDTH control bytes at the end of
- // the array without using a branch:
+ // the array without using a branch. If the tables smaller than
+ // the group width (self.buckets() < Group::WIDTH),
+ // `index2 = Group::WIDTH + index`, otherwise `index2` is:
+ //
// - If index >= Group::WIDTH then index == index2.
// - Otherwise index2 == self.bucket_mask + 1 + index.
//
@@ -2142,25 +2830,45 @@ impl<A: Allocator + Clone> RawTableInner<A> {
self.bucket_mask == 0
}
+ /// Attempts to allocate a new hash table with at least enough capacity
+ /// for inserting the given number of elements without reallocating,
+ /// and return it inside ScopeGuard to protect against panic in the hash
+ /// function.
+ ///
+ /// # Note
+ ///
+ /// It is recommended (but not required):
+ ///
+ /// * That the new table's `capacity` be greater than or equal to `self.items`.
+ ///
+ /// * The `alloc` is the same [`Allocator`] as the `Allocator` used
+ /// to allocate this table.
+ ///
+ /// * The `table_layout` is the same [`TableLayout`] as the `TableLayout` used
+ /// to allocate this table.
+ ///
+ /// If `table_layout` does not match the `TableLayout` that was used to allocate
+ /// this table, then using `mem::swap` with the `self` and the new table returned
+ /// by this function results in [`undefined behavior`].
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[allow(clippy::mut_mut)]
#[inline]
- unsafe fn prepare_resize(
+ fn prepare_resize<'a, A>(
&self,
+ alloc: &'a A,
table_layout: TableLayout,
capacity: usize,
fallibility: Fallibility,
- ) -> Result<crate::scopeguard::ScopeGuard<Self, impl FnMut(&mut Self)>, TryReserveError> {
+ ) -> Result<crate::scopeguard::ScopeGuard<Self, impl FnMut(&mut Self) + 'a>, TryReserveError>
+ where
+ A: Allocator,
+ {
debug_assert!(self.items <= capacity);
// Allocate and initialize the new table.
- let mut new_table = RawTableInner::fallible_with_capacity(
- self.alloc.clone(),
- table_layout,
- capacity,
- fallibility,
- )?;
- new_table.growth_left -= self.items;
- new_table.items = self.items;
+ let new_table =
+ RawTableInner::fallible_with_capacity(alloc, table_layout, capacity, fallibility)?;
// The hash function may panic, in which case we simply free the new
// table without dropping any elements that may have been copied into
@@ -2170,7 +2878,11 @@ impl<A: Allocator + Clone> RawTableInner<A> {
// the comment at the bottom of this function.
Ok(guard(new_table, move |self_| {
if !self_.is_empty_singleton() {
- self_.free_buckets(table_layout);
+ // SAFETY:
+ // 1. We have checked that our table is allocated.
+ // 2. We know for sure that the `alloc` and `table_layout` matches the
+ // [`Allocator`] and [`TableLayout`] used to allocate this table.
+ unsafe { self_.free_buckets(alloc, table_layout) };
}
}))
}
@@ -2179,16 +2891,38 @@ impl<A: Allocator + Clone> RawTableInner<A> {
///
/// This uses dynamic dispatch to reduce the amount of
/// code generated, but it is eliminated by LLVM optimizations when inlined.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is
+ /// [`undefined behavior`]:
+ ///
+ /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used
+ /// to allocate this table.
+ ///
+ /// * The `layout` must be the same [`TableLayout`] as the `TableLayout`
+ /// used to allocate this table.
+ ///
+ /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of
+ /// the elements stored in the table.
+ ///
+ /// * The [`RawTableInner`] must have properly initialized control bytes.
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[allow(clippy::inline_always)]
#[inline(always)]
- unsafe fn reserve_rehash_inner(
+ unsafe fn reserve_rehash_inner<A>(
&mut self,
+ alloc: &A,
additional: usize,
hasher: &dyn Fn(&mut Self, usize) -> u64,
fallibility: Fallibility,
layout: TableLayout,
drop: Option<fn(*mut u8)>,
- ) -> Result<(), TryReserveError> {
+ ) -> Result<(), TryReserveError>
+ where
+ A: Allocator,
+ {
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
let new_items = match self.items.checked_add(additional) {
Some(new_items) => new_items,
@@ -2198,12 +2932,30 @@ impl<A: Allocator + Clone> RawTableInner<A> {
if new_items <= full_capacity / 2 {
// Rehash in-place without re-allocating if we have plenty of spare
// capacity that is locked up due to DELETED entries.
+
+ // SAFETY:
+ // 1. We know for sure that `[`RawTableInner`]` has already been allocated
+ // (since new_items <= full_capacity / 2);
+ // 2. The caller ensures that `drop` function is the actual drop function of
+ // the elements stored in the table.
+ // 3. The caller ensures that `layout` matches the [`TableLayout`] that was
+ // used to allocate this table.
+ // 4. The caller ensures that the control bytes of the `RawTableInner`
+ // are already initialized.
self.rehash_in_place(hasher, layout.size, drop);
Ok(())
} else {
// Otherwise, conservatively resize to at least the next size up
// to avoid churning deletes into frequent rehashes.
+ //
+ // SAFETY:
+ // 1. We know for sure that `capacity >= self.items`.
+ // 2. The caller ensures that `alloc` and `layout` matches the [`Allocator`] and
+ // [`TableLayout`] that were used to allocate this table.
+ // 3. The caller ensures that the control bytes of the `RawTableInner`
+ // are already initialized.
self.resize_inner(
+ alloc,
usize::max(new_items, full_capacity + 1),
hasher,
fallibility,
@@ -2212,48 +2964,160 @@ impl<A: Allocator + Clone> RawTableInner<A> {
}
}
+ /// Returns an iterator over full buckets indices in the table.
+ ///
+ /// # Safety
+ ///
+ /// Behavior is undefined if any of the following conditions are violated:
+ ///
+ /// * The caller has to ensure that the `RawTableInner` outlives the
+ /// `FullBucketsIndices`. Because we cannot make the `next` method
+ /// unsafe on the `FullBucketsIndices` struct, we have to make the
+ /// `full_buckets_indices` method unsafe.
+ ///
+ /// * The [`RawTableInner`] must have properly initialized control bytes.
+ #[inline(always)]
+ unsafe fn full_buckets_indices(&self) -> FullBucketsIndices {
+ // SAFETY:
+ // 1. Since the caller of this function ensures that the control bytes
+ // are properly initialized and `self.ctrl(0)` points to the start
+ // of the array of control bytes, therefore: `ctrl` is valid for reads,
+ // properly aligned to `Group::WIDTH` and points to the properly initialized
+ // control bytes.
+ // 2. The value of `items` is equal to the amount of data (values) added
+ // to the table.
+ //
+ // `ctrl` points here (to the start
+ // of the first control byte `CT0`)
+ // ∨
+ // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
+ // \________ ________/
+ // \/
+ // `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
+ //
+ // where: T0...T_n - our stored data;
+ // CT0...CT_n - control bytes or metadata for `data`.
+ let ctrl = NonNull::new_unchecked(self.ctrl(0));
+
+ FullBucketsIndices {
+ // Load the first group
+ // SAFETY: See explanation above.
+ current_group: Group::load_aligned(ctrl.as_ptr()).match_full().into_iter(),
+ group_first_index: 0,
+ ctrl,
+ items: self.items,
+ }
+ }
+
/// Allocates a new table of a different size and moves the contents of the
/// current table into it.
///
/// This uses dynamic dispatch to reduce the amount of
/// code generated, but it is eliminated by LLVM optimizations when inlined.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is
+ /// [`undefined behavior`]:
+ ///
+ /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used
+ /// to allocate this table;
+ ///
+ /// * The `layout` must be the same [`TableLayout`] as the `TableLayout`
+ /// used to allocate this table;
+ ///
+ /// * The [`RawTableInner`] must have properly initialized control bytes.
+ ///
+ /// The caller of this function must ensure that `capacity >= self.items`
+ /// otherwise:
+ ///
+ /// * If `self.items != 0`, calling of this function with `capacity == 0`
+ /// results in [`undefined behavior`].
+ ///
+ /// * If `capacity_to_buckets(capacity) < Group::WIDTH` and
+ /// `self.items > capacity_to_buckets(capacity)` calling this function
+ /// results in [`undefined behavior`].
+ ///
+ /// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and
+ /// `self.items > capacity_to_buckets(capacity)` calling this function
+ /// are never return (will go into an infinite loop).
+ ///
+ /// Note: It is recommended (but not required) that the new table's `capacity`
+ /// be greater than or equal to `self.items`. In case if `capacity <= self.items`
+ /// this function can never return. See [`RawTableInner::find_insert_slot`] for
+ /// more information.
+ ///
+ /// [`RawTableInner::find_insert_slot`]: RawTableInner::find_insert_slot
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[allow(clippy::inline_always)]
#[inline(always)]
- unsafe fn resize_inner(
+ unsafe fn resize_inner<A>(
&mut self,
+ alloc: &A,
capacity: usize,
hasher: &dyn Fn(&mut Self, usize) -> u64,
fallibility: Fallibility,
layout: TableLayout,
- ) -> Result<(), TryReserveError> {
- let mut new_table = self.prepare_resize(layout, capacity, fallibility)?;
-
- // Copy all elements to the new table.
- for i in 0..self.buckets() {
- if !self.is_bucket_full(i) {
- continue;
- }
-
+ ) -> Result<(), TryReserveError>
+ where
+ A: Allocator,
+ {
+ // SAFETY: We know for sure that `alloc` and `layout` matches the [`Allocator`] and [`TableLayout`]
+ // that were used to allocate this table.
+ let mut new_table = self.prepare_resize(alloc, layout, capacity, fallibility)?;
+
+ // SAFETY: We know for sure that RawTableInner will outlive the
+ // returned `FullBucketsIndices` iterator, and the caller of this
+ // function ensures that the control bytes are properly initialized.
+ for full_byte_index in self.full_buckets_indices() {
// This may panic.
- let hash = hasher(self, i);
+ let hash = hasher(self, full_byte_index);
+ // SAFETY:
// We can use a simpler version of insert() here since:
- // - there are no DELETED entries.
- // - we know there is enough space in the table.
- // - all elements are unique.
- let (index, _) = new_table.prepare_insert_slot(hash);
+ // 1. There are no DELETED entries.
+ // 2. We know there is enough space in the table.
+ // 3. All elements are unique.
+ // 4. The caller of this function guarantees that `capacity > 0`
+ // so `new_table` must already have some allocated memory.
+ // 5. We set `growth_left` and `items` fields of the new table
+ // after the loop.
+ // 6. We insert into the table, at the returned index, the data
+ // matching the given hash immediately after calling this function.
+ let (new_index, _) = new_table.prepare_insert_slot(hash);
+ // SAFETY:
+ //
+ // * `src` is valid for reads of `layout.size` bytes, since the
+ // table is alive and the `full_byte_index` is guaranteed to be
+ // within bounds (see `FullBucketsIndices::next_impl`);
+ //
+ // * `dst` is valid for writes of `layout.size` bytes, since the
+ // caller ensures that `table_layout` matches the [`TableLayout`]
+ // that was used to allocate old table and we have the `new_index`
+ // returned by `prepare_insert_slot`.
+ //
+ // * Both `src` and `dst` are properly aligned.
+ //
+ // * Both `src` and `dst` point to different region of memory.
ptr::copy_nonoverlapping(
- self.bucket_ptr(i, layout.size),
- new_table.bucket_ptr(index, layout.size),
+ self.bucket_ptr(full_byte_index, layout.size),
+ new_table.bucket_ptr(new_index, layout.size),
layout.size,
);
}
+ // The hash function didn't panic, so we can safely set the
+ // `growth_left` and `items` fields of the new table.
+ new_table.growth_left -= self.items;
+ new_table.items = self.items;
+
// We successfully copied all elements without panicking. Now replace
// self with the new table. The old table will have its memory freed but
// the items will not be dropped (since they have been moved into the
// new table).
+ // SAFETY: The caller ensures that `table_layout` matches the [`TableLayout`]
+ // that was used to allocate this table.
mem::swap(self, &mut new_table);
Ok(())
@@ -2266,6 +3130,21 @@ impl<A: Allocator + Clone> RawTableInner<A> {
///
/// This uses dynamic dispatch to reduce the amount of
/// code generated, but it is eliminated by LLVM optimizations when inlined.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is [`undefined behavior`]:
+ ///
+ /// * The `size_of` must be equal to the size of the elements stored in the table;
+ ///
+ /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of
+ /// the elements stored in the table.
+ ///
+ /// * The [`RawTableInner`] has already been allocated;
+ ///
+ /// * The [`RawTableInner`] must have properly initialized control bytes.
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[allow(clippy::inline_always)]
#[cfg_attr(feature = "inline-more", inline(always))]
#[cfg_attr(not(feature = "inline-more"), inline)]
@@ -2309,6 +3188,9 @@ impl<A: Allocator + Clone> RawTableInner<A> {
let hash = hasher(*guard, i);
// Search for a suitable place to put it
+ //
+ // SAFETY: Caller of this function ensures that the control bytes
+ // are properly initialized.
let new_i = guard.find_insert_slot(hash).index;
// Probing works by scanning through all of the control
@@ -2349,14 +3231,64 @@ impl<A: Allocator + Clone> RawTableInner<A> {
mem::forget(guard);
}
+ /// Deallocates the table without dropping any entries.
+ ///
+ /// # Note
+ ///
+ /// This function must be called only after [`drop_elements`](RawTableInner::drop_elements),
+ /// else it can lead to leaking of memory. Also calling this function automatically
+ /// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid
+ /// (dangling) the `ctrl` field of the table.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is [`Undefined Behavior`]:
+ ///
+ /// * The [`RawTableInner`] has already been allocated;
+ ///
+ /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
+ /// to allocate this table.
+ ///
+ /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that was used
+ /// to allocate this table.
+ ///
+ /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information.
+ ///
+ /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ /// [`GlobalAlloc::dealloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#tymethod.dealloc
+ /// [`Allocator::deallocate`]: https://doc.rust-lang.org/alloc/alloc/trait.Allocator.html#tymethod.deallocate
#[inline]
- unsafe fn free_buckets(&mut self, table_layout: TableLayout) {
+ unsafe fn free_buckets<A>(&mut self, alloc: &A, table_layout: TableLayout)
+ where
+ A: Allocator,
+ {
+ // SAFETY: The caller must uphold the safety contract for `free_buckets`
+ // method.
let (ptr, layout) = self.allocation_info(table_layout);
- self.alloc.deallocate(ptr, layout);
+ alloc.deallocate(ptr, layout);
}
+ /// Returns a pointer to the allocated memory and the layout that was used to
+ /// allocate the table.
+ ///
+ /// # Safety
+ ///
+ /// Caller of this function must observe the following safety rules:
+ ///
+ /// * The [`RawTableInner`] has already been allocated, otherwise
+ /// calling this function results in [`undefined behavior`]
+ ///
+ /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout`
+ /// that was used to allocate this table. Failure to comply with this condition
+ /// may result in [`undefined behavior`].
+ ///
+ /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information.
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ /// [`GlobalAlloc::dealloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#tymethod.dealloc
+ /// [`Allocator::deallocate`]: https://doc.rust-lang.org/alloc/alloc/trait.Allocator.html#tymethod.deallocate
#[inline]
- fn allocation_info(&self, table_layout: TableLayout) -> (NonNull<u8>, Layout) {
+ unsafe fn allocation_info(&self, table_layout: TableLayout) -> (NonNull<u8>, Layout) {
debug_assert!(
!self.is_empty_singleton(),
"this function can only be called on non-empty tables"
@@ -2368,17 +3300,37 @@ impl<A: Allocator + Clone> RawTableInner<A> {
None => unsafe { hint::unreachable_unchecked() },
};
(
+ // SAFETY: The caller must uphold the safety contract for `allocation_info` method.
unsafe { NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)) },
layout,
)
}
+ /// Returns a pointer to the allocated memory and the layout that was used to
+ /// allocate the table. If [`RawTableInner`] has not been allocated, this
+ /// function return `dangling` pointer and `()` (unit) layout.
+ ///
+ /// # Safety
+ ///
+ /// The `table_layout` must be the same [`TableLayout`] as the `TableLayout`
+ /// that was used to allocate this table. Failure to comply with this condition
+ /// may result in [`undefined behavior`].
+ ///
+ /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information.
+ ///
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ /// [`GlobalAlloc::dealloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#tymethod.dealloc
+ /// [`Allocator::deallocate`]: https://doc.rust-lang.org/alloc/alloc/trait.Allocator.html#tymethod.deallocate
#[cfg(feature = "raw")]
- fn allocation_info_or_zero(&self, table_layout: TableLayout) -> (NonNull<u8>, Layout) {
+ unsafe fn allocation_info_or_zero(&self, table_layout: TableLayout) -> (NonNull<u8>, Layout) {
if self.is_empty_singleton() {
(NonNull::dangling(), Layout::new::<()>())
} else {
- self.allocation_info(table_layout)
+ // SAFETY:
+ // 1. We have checked that our table is allocated.
+ // 2. The caller ensures that `table_layout` matches the [`TableLayout`]
+ // that was used to allocate this table.
+ unsafe { self.allocation_info(table_layout) }
}
}
@@ -2491,12 +3443,16 @@ impl<A: Allocator + Clone> RawTableInner<A> {
impl<T: Clone, A: Allocator + Clone> Clone for RawTable<T, A> {
fn clone(&self) -> Self {
if self.table.is_empty_singleton() {
- Self::new_in(self.table.alloc.clone())
+ Self::new_in(self.alloc.clone())
} else {
unsafe {
// Avoid `Result::ok_or_else` because it bloats LLVM IR.
- let new_table = match Self::new_uninitialized(
- self.table.alloc.clone(),
+ //
+ // SAFETY: This is safe as we are taking the size of an already allocated table
+ // and therefore сapacity overflow cannot occur, `self.table.buckets()` is power
+ // of two and all allocator errors will be caught inside `RawTableInner::new_uninitialized`.
+ let mut new_table = match Self::new_uninitialized(
+ self.alloc.clone(),
self.table.buckets(),
Fallibility::Infallible,
) {
@@ -2504,24 +3460,32 @@ impl<T: Clone, A: Allocator + Clone> Clone for RawTable<T, A> {
Err(_) => hint::unreachable_unchecked(),
};
- // If cloning fails then we need to free the allocation for the
- // new table. However we don't run its drop since its control
- // bytes are not initialized yet.
- let mut guard = guard(ManuallyDrop::new(new_table), |new_table| {
- new_table.free_buckets();
- });
-
- guard.clone_from_spec(self);
-
- // Disarm the scope guard and return the newly created table.
- ManuallyDrop::into_inner(ScopeGuard::into_inner(guard))
+ // Cloning elements may fail (the clone function may panic). But we don't
+ // need to worry about uninitialized control bits, since:
+ // 1. The number of items (elements) in the table is zero, which means that
+ // the control bits will not be readed by Drop function.
+ // 2. The `clone_from_spec` method will first copy all control bits from
+ // `self` (thus initializing them). But this will not affect the `Drop`
+ // function, since the `clone_from_spec` function sets `items` only after
+ // successfully clonning all elements.
+ new_table.clone_from_spec(self);
+ new_table
}
}
}
fn clone_from(&mut self, source: &Self) {
if source.table.is_empty_singleton() {
- *self = Self::new_in(self.table.alloc.clone());
+ let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW);
+ unsafe {
+ // SAFETY:
+ // 1. We call the function only once;
+ // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
+ // and [`TableLayout`] that were used to allocate this table.
+ // 3. If any elements' drop function panics, then there will only be a memory leak,
+ // because we have replaced the inner table with a new one.
+ old_inner.drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
+ }
} else {
unsafe {
// Make sure that if any panics occurs, we clear the table and
@@ -2536,27 +3500,38 @@ impl<T: Clone, A: Allocator + Clone> Clone for RawTable<T, A> {
//
// This leak is unavoidable: we can't try dropping more elements
// since this could lead to another panic and abort the process.
- self_.drop_elements();
+ //
+ // SAFETY: If something gets wrong we clear our table right after
+ // dropping the elements, so there is no double drop, since `items`
+ // will be equal to zero.
+ self_.table.drop_elements::<T>();
// If necessary, resize our table to match the source.
if self_.buckets() != source.buckets() {
- // Skip our drop by using ptr::write.
- if !self_.table.is_empty_singleton() {
- self_.free_buckets();
+ let new_inner = match RawTableInner::new_uninitialized(
+ &self_.alloc,
+ Self::TABLE_LAYOUT,
+ source.buckets(),
+ Fallibility::Infallible,
+ ) {
+ Ok(table) => table,
+ Err(_) => hint::unreachable_unchecked(),
+ };
+ // Replace the old inner with new uninitialized one. It's ok, since if something gets
+ // wrong `ScopeGuard` will initialize all control bytes and leave empty table.
+ let mut old_inner = mem::replace(&mut self_.table, new_inner);
+ if !old_inner.is_empty_singleton() {
+ // SAFETY:
+ // 1. We have checked that our table is allocated.
+ // 2. We know for sure that `alloc` and `table_layout` matches
+ // the [`Allocator`] and [`TableLayout`] that were used to allocate this table.
+ old_inner.free_buckets(&self_.alloc, Self::TABLE_LAYOUT);
}
- (&mut **self_ as *mut Self).write(
- // Avoid `Result::unwrap_or_else` because it bloats LLVM IR.
- match Self::new_uninitialized(
- self_.table.alloc.clone(),
- source.buckets(),
- Fallibility::Infallible,
- ) {
- Ok(table) => table,
- Err(_) => hint::unreachable_unchecked(),
- },
- );
}
+ // Cloning elements may fail (the clone function may panic), but the `ScopeGuard`
+ // inside the `clone_from_impl` function will take care of that, dropping all
+ // cloned elements if necessary. Our `ScopeGuard` will clear the table.
self_.clone_from_spec(source);
// Disarm the scope guard if cloning was successful.
@@ -2613,7 +3588,7 @@ impl<T: Clone, A: Allocator + Clone> RawTable<T, A> {
// to make sure we drop only the elements that have been
// cloned so far.
let mut guard = guard((0, &mut *self), |(index, self_)| {
- if Self::DATA_NEEDS_DROP {
+ if T::NEEDS_DROP {
for i in 0..=*index {
if self_.is_bucket_full(i) {
self_.bucket(i).drop();
@@ -2650,7 +3625,7 @@ impl<T: Clone, A: Allocator + Clone> RawTable<T, A> {
{
self.clear();
- let guard_self = guard(&mut *self, |self_| {
+ let mut guard_self = guard(&mut *self, |self_| {
// Clear the partially copied table if a panic occurs, otherwise
// items and growth_left will be out of sync with the contents
// of the table.
@@ -2683,7 +3658,7 @@ impl<T: Clone, A: Allocator + Clone> RawTable<T, A> {
}
}
-impl<T, A: Allocator + Clone + Default> Default for RawTable<T, A> {
+impl<T, A: Allocator + Default> Default for RawTable<T, A> {
#[inline]
fn default() -> Self {
Self::new_in(Default::default())
@@ -2691,31 +3666,41 @@ impl<T, A: Allocator + Clone + Default> Default for RawTable<T, A> {
}
#[cfg(feature = "nightly")]
-unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawTable<T, A> {
+unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawTable<T, A> {
#[cfg_attr(feature = "inline-more", inline)]
fn drop(&mut self) {
- if !self.table.is_empty_singleton() {
- unsafe {
- self.drop_elements();
- self.free_buckets();
- }
+ unsafe {
+ // SAFETY:
+ // 1. We call the function only once;
+ // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
+ // and [`TableLayout`] that were used to allocate this table.
+ // 3. If the drop function of any elements fails, then only a memory leak will occur,
+ // and we don't care because we are inside the `Drop` function of the `RawTable`,
+ // so there won't be any table left in an inconsistent state.
+ self.table
+ .drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
}
}
}
#[cfg(not(feature = "nightly"))]
-impl<T, A: Allocator + Clone> Drop for RawTable<T, A> {
+impl<T, A: Allocator> Drop for RawTable<T, A> {
#[cfg_attr(feature = "inline-more", inline)]
fn drop(&mut self) {
- if !self.table.is_empty_singleton() {
- unsafe {
- self.drop_elements();
- self.free_buckets();
- }
+ unsafe {
+ // SAFETY:
+ // 1. We call the function only once;
+ // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
+ // and [`TableLayout`] that were used to allocate this table.
+ // 3. If the drop function of any elements fails, then only a memory leak will occur,
+ // and we don't care because we are inside the `Drop` function of the `RawTable`,
+ // so there won't be any table left in an inconsistent state.
+ self.table
+ .drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
}
}
}
-impl<T, A: Allocator + Clone> IntoIterator for RawTable<T, A> {
+impl<T, A: Allocator> IntoIterator for RawTable<T, A> {
type Item = T;
type IntoIter = RawIntoIter<T, A>;
@@ -2749,14 +3734,39 @@ pub(crate) struct RawIterRange<T> {
impl<T> RawIterRange<T> {
/// Returns a `RawIterRange` covering a subset of a table.
///
- /// The control byte address must be aligned to the group size.
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is
+ /// [`undefined behavior`]:
+ ///
+ /// * `ctrl` must be [valid] for reads, i.e. table outlives the `RawIterRange`;
+ ///
+ /// * `ctrl` must be properly aligned to the group size (Group::WIDTH);
+ ///
+ /// * `ctrl` must point to the array of properly initialized control bytes;
+ ///
+ /// * `data` must be the [`Bucket`] at the `ctrl` index in the table;
+ ///
+ /// * the value of `len` must be less than or equal to the number of table buckets,
+ /// and the returned value of `ctrl.as_ptr().add(len).offset_from(ctrl.as_ptr())`
+ /// must be positive.
+ ///
+ /// * The `ctrl.add(len)` pointer must be either in bounds or one
+ /// byte past the end of the same [allocated table].
+ ///
+ /// * The `len` must be a power of two.
+ ///
+ /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[cfg_attr(feature = "inline-more", inline)]
unsafe fn new(ctrl: *const u8, data: Bucket<T>, len: usize) -> Self {
debug_assert_ne!(len, 0);
debug_assert_eq!(ctrl as usize % Group::WIDTH, 0);
+ // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`]
let end = ctrl.add(len);
// Load the first group and advance ctrl to point to the next group
+ // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`]
let current_group = Group::load_aligned(ctrl).match_full();
let next_ctrl = ctrl.add(Group::WIDTH);
@@ -2900,8 +3910,6 @@ pub struct RawIter<T> {
}
impl<T> RawIter<T> {
- const DATA_NEEDS_DROP: bool = mem::needs_drop::<T>();
-
/// Refresh the iterator so that it reflects a removal from the given bucket.
///
/// For the iterator to remain valid, this method must be called once
@@ -3017,7 +4025,7 @@ impl<T> RawIter<T> {
}
unsafe fn drop_elements(&mut self) {
- if Self::DATA_NEEDS_DROP && self.len() != 0 {
+ if T::NEEDS_DROP && self.items != 0 {
for item in self {
item.drop();
}
@@ -3066,28 +4074,146 @@ impl<T> Iterator for RawIter<T> {
impl<T> ExactSizeIterator for RawIter<T> {}
impl<T> FusedIterator for RawIter<T> {}
+/// Iterator which returns an index of every full bucket in the table.
+///
+/// For maximum flexibility this iterator is not bound by a lifetime, but you
+/// must observe several rules when using it:
+/// - You must not free the hash table while iterating (including via growing/shrinking).
+/// - It is fine to erase a bucket that has been yielded by the iterator.
+/// - Erasing a bucket that has not yet been yielded by the iterator may still
+/// result in the iterator yielding index of that bucket.
+/// - It is unspecified whether an element inserted after the iterator was
+/// created will be yielded by that iterator.
+/// - The order in which the iterator yields indices of the buckets is unspecified
+/// and may change in the future.
+pub(crate) struct FullBucketsIndices {
+ // Mask of full buckets in the current group. Bits are cleared from this
+ // mask as each element is processed.
+ current_group: BitMaskIter,
+
+ // Initial value of the bytes' indices of the current group (relative
+ // to the start of the control bytes).
+ group_first_index: usize,
+
+ // Pointer to the current group of control bytes,
+ // Must be aligned to the group size (Group::WIDTH).
+ ctrl: NonNull<u8>,
+
+ // Number of elements in the table.
+ items: usize,
+}
+
+impl FullBucketsIndices {
+ /// Advances the iterator and returns the next value.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is
+ /// [`Undefined Behavior`]:
+ ///
+ /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved,
+ /// i.e. table outlives the `FullBucketsIndices`;
+ ///
+ /// * It never tries to iterate after getting all elements.
+ ///
+ /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ #[inline(always)]
+ unsafe fn next_impl(&mut self) -> Option<usize> {
+ loop {
+ if let Some(index) = self.current_group.next() {
+ // The returned `self.group_first_index + index` will always
+ // be in the range `0..self.buckets()`. See explanation below.
+ return Some(self.group_first_index + index);
+ }
+
+ // SAFETY: The caller of this function ensures that:
+ //
+ // 1. It never tries to iterate after getting all the elements;
+ // 2. The table is alive and did not moved;
+ // 3. The first `self.ctrl` pointed to the start of the array of control bytes.
+ //
+ // Taking the above into account, we always stay within the bounds, because:
+ //
+ // 1. For tables smaller than the group width (self.buckets() <= Group::WIDTH),
+ // we will never end up in the given branch, since we should have already
+ // yielded all the elements of the table.
+ //
+ // 2. For tables larger than the group width. The the number of buckets is a
+ // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Sinse
+ // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the
+ // the start of the array of control bytes, and never try to iterate after
+ // getting all the elements, the last `self.ctrl` will be equal to
+ // the `self.buckets() - Group::WIDTH`, so `self.current_group.next()`
+ // will always contains indices within the range `0..Group::WIDTH`,
+ // and subsequent `self.group_first_index + index` will always return a
+ // number less than `self.buckets()`.
+ self.ctrl = NonNull::new_unchecked(self.ctrl.as_ptr().add(Group::WIDTH));
+
+ // SAFETY: See explanation above.
+ self.current_group = Group::load_aligned(self.ctrl.as_ptr())
+ .match_full()
+ .into_iter();
+ self.group_first_index += Group::WIDTH;
+ }
+ }
+}
+
+impl Iterator for FullBucketsIndices {
+ type Item = usize;
+
+ /// Advances the iterator and returns the next value. It is up to
+ /// the caller to ensure that the `RawTable` outlives the `FullBucketsIndices`,
+ /// because we cannot make the `next` method unsafe.
+ #[inline(always)]
+ fn next(&mut self) -> Option<usize> {
+ // Return if we already yielded all items.
+ if self.items == 0 {
+ return None;
+ }
+
+ let nxt = unsafe {
+ // SAFETY:
+ // 1. We check number of items to yield using `items` field.
+ // 2. The caller ensures that the table is alive and has not moved.
+ self.next_impl()
+ };
+
+ debug_assert!(nxt.is_some());
+ self.items -= 1;
+
+ nxt
+ }
+
+ #[inline(always)]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.items, Some(self.items))
+ }
+}
+
+impl ExactSizeIterator for FullBucketsIndices {}
+impl FusedIterator for FullBucketsIndices {}
+
/// Iterator which consumes a table and returns elements.
-pub struct RawIntoIter<T, A: Allocator + Clone = Global> {
+pub struct RawIntoIter<T, A: Allocator = Global> {
iter: RawIter<T>,
- allocation: Option<(NonNull<u8>, Layout)>,
+ allocation: Option<(NonNull<u8>, Layout, A)>,
marker: PhantomData<T>,
- alloc: A,
}
-impl<T, A: Allocator + Clone> RawIntoIter<T, A> {
+impl<T, A: Allocator> RawIntoIter<T, A> {
#[cfg_attr(feature = "inline-more", inline)]
pub fn iter(&self) -> RawIter<T> {
self.iter.clone()
}
}
-unsafe impl<T, A: Allocator + Clone> Send for RawIntoIter<T, A>
+unsafe impl<T, A: Allocator> Send for RawIntoIter<T, A>
where
T: Send,
A: Send,
{
}
-unsafe impl<T, A: Allocator + Clone> Sync for RawIntoIter<T, A>
+unsafe impl<T, A: Allocator> Sync for RawIntoIter<T, A>
where
T: Sync,
A: Sync,
@@ -3095,7 +4221,7 @@ where
}
#[cfg(feature = "nightly")]
-unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawIntoIter<T, A> {
+unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawIntoIter<T, A> {
#[cfg_attr(feature = "inline-more", inline)]
fn drop(&mut self) {
unsafe {
@@ -3103,14 +4229,14 @@ unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawIntoIter<T, A> {
self.iter.drop_elements();
// Free the table
- if let Some((ptr, layout)) = self.allocation {
- self.alloc.deallocate(ptr, layout);
+ if let Some((ptr, layout, ref alloc)) = self.allocation {
+ alloc.deallocate(ptr, layout);
}
}
}
}
#[cfg(not(feature = "nightly"))]
-impl<T, A: Allocator + Clone> Drop for RawIntoIter<T, A> {
+impl<T, A: Allocator> Drop for RawIntoIter<T, A> {
#[cfg_attr(feature = "inline-more", inline)]
fn drop(&mut self) {
unsafe {
@@ -3118,14 +4244,14 @@ impl<T, A: Allocator + Clone> Drop for RawIntoIter<T, A> {
self.iter.drop_elements();
// Free the table
- if let Some((ptr, layout)) = self.allocation {
- self.alloc.deallocate(ptr, layout);
+ if let Some((ptr, layout, ref alloc)) = self.allocation {
+ alloc.deallocate(ptr, layout);
}
}
}
}
-impl<T, A: Allocator + Clone> Iterator for RawIntoIter<T, A> {
+impl<T, A: Allocator> Iterator for RawIntoIter<T, A> {
type Item = T;
#[cfg_attr(feature = "inline-more", inline)]
@@ -3139,45 +4265,45 @@ impl<T, A: Allocator + Clone> Iterator for RawIntoIter<T, A> {
}
}
-impl<T, A: Allocator + Clone> ExactSizeIterator for RawIntoIter<T, A> {}
-impl<T, A: Allocator + Clone> FusedIterator for RawIntoIter<T, A> {}
+impl<T, A: Allocator> ExactSizeIterator for RawIntoIter<T, A> {}
+impl<T, A: Allocator> FusedIterator for RawIntoIter<T, A> {}
/// Iterator which consumes elements without freeing the table storage.
-pub struct RawDrain<'a, T, A: Allocator + Clone = Global> {
+pub struct RawDrain<'a, T, A: Allocator = Global> {
iter: RawIter<T>,
// The table is moved into the iterator for the duration of the drain. This
// ensures that an empty table is left if the drain iterator is leaked
// without dropping.
- table: ManuallyDrop<RawTable<T, A>>,
- orig_table: NonNull<RawTable<T, A>>,
+ table: RawTableInner,
+ orig_table: NonNull<RawTableInner>,
// We don't use a &'a mut RawTable<T> because we want RawDrain to be
// covariant over T.
marker: PhantomData<&'a RawTable<T, A>>,
}
-impl<T, A: Allocator + Clone> RawDrain<'_, T, A> {
+impl<T, A: Allocator> RawDrain<'_, T, A> {
#[cfg_attr(feature = "inline-more", inline)]
pub fn iter(&self) -> RawIter<T> {
self.iter.clone()
}
}
-unsafe impl<T, A: Allocator + Copy> Send for RawDrain<'_, T, A>
+unsafe impl<T, A: Allocator> Send for RawDrain<'_, T, A>
where
T: Send,
A: Send,
{
}
-unsafe impl<T, A: Allocator + Copy> Sync for RawDrain<'_, T, A>
+unsafe impl<T, A: Allocator> Sync for RawDrain<'_, T, A>
where
T: Sync,
A: Sync,
{
}
-impl<T, A: Allocator + Clone> Drop for RawDrain<'_, T, A> {
+impl<T, A: Allocator> Drop for RawDrain<'_, T, A> {
#[cfg_attr(feature = "inline-more", inline)]
fn drop(&mut self) {
unsafe {
@@ -3191,12 +4317,12 @@ impl<T, A: Allocator + Clone> Drop for RawDrain<'_, T, A> {
// Move the now empty table back to its original location.
self.orig_table
.as_ptr()
- .copy_from_nonoverlapping(&*self.table, 1);
+ .copy_from_nonoverlapping(&self.table, 1);
}
}
}
-impl<T, A: Allocator + Clone> Iterator for RawDrain<'_, T, A> {
+impl<T, A: Allocator> Iterator for RawDrain<'_, T, A> {
type Item = T;
#[cfg_attr(feature = "inline-more", inline)]
@@ -3213,8 +4339,8 @@ impl<T, A: Allocator + Clone> Iterator for RawDrain<'_, T, A> {
}
}
-impl<T, A: Allocator + Clone> ExactSizeIterator for RawDrain<'_, T, A> {}
-impl<T, A: Allocator + Clone> FusedIterator for RawDrain<'_, T, A> {}
+impl<T, A: Allocator> ExactSizeIterator for RawDrain<'_, T, A> {}
+impl<T, A: Allocator> FusedIterator for RawDrain<'_, T, A> {}
/// Iterator over occupied buckets that could match a given hash.
///
@@ -3259,7 +4385,7 @@ struct RawIterHashInner {
impl<T> RawIterHash<T> {
#[cfg_attr(feature = "inline-more", inline)]
#[cfg(feature = "raw")]
- unsafe fn new<A: Allocator + Clone>(table: &RawTable<T, A>, hash: u64) -> Self {
+ unsafe fn new<A: Allocator>(table: &RawTable<T, A>, hash: u64) -> Self {
RawIterHash {
inner: RawIterHashInner::new(&table.table, hash),
_marker: PhantomData,
@@ -3269,7 +4395,7 @@ impl<T> RawIterHash<T> {
impl RawIterHashInner {
#[cfg_attr(feature = "inline-more", inline)]
#[cfg(feature = "raw")]
- unsafe fn new<A: Allocator + Clone>(table: &RawTableInner<A>, hash: u64) -> Self {
+ unsafe fn new(table: &RawTableInner, hash: u64) -> Self {
let h2_hash = h2(hash);
let probe_seq = table.probe_seq(hash);
let group = Group::load(table.ctrl(probe_seq.pos));
@@ -3333,6 +4459,28 @@ impl Iterator for RawIterHashInner {
}
}
+pub(crate) struct RawExtractIf<'a, T, A: Allocator> {
+ pub iter: RawIter<T>,
+ pub table: &'a mut RawTable<T, A>,
+}
+
+impl<T, A: Allocator> RawExtractIf<'_, T, A> {
+ #[cfg_attr(feature = "inline-more", inline)]
+ pub(crate) fn next<F>(&mut self, mut f: F) -> Option<T>
+ where
+ F: FnMut(&mut T) -> bool,
+ {
+ unsafe {
+ for item in &mut self.iter {
+ if f(item.as_mut()) {
+ return Some(self.table.remove(item).0);
+ }
+ }
+ }
+ None
+ }
+}
+
#[cfg(test)]
mod test_map {
use super::*;
@@ -3375,4 +4523,214 @@ mod test_map {
assert!(table.find(i + 100, |x| *x == i + 100).is_none());
}
}
+
+ /// CHECKING THAT WE ARE NOT TRYING TO READ THE MEMORY OF
+ /// AN UNINITIALIZED TABLE DURING THE DROP
+ #[test]
+ fn test_drop_uninitialized() {
+ use ::alloc::vec::Vec;
+
+ let table = unsafe {
+ // SAFETY: The `buckets` is power of two and we're not
+ // trying to actually use the returned RawTable.
+ RawTable::<(u64, Vec<i32>)>::new_uninitialized(Global, 8, Fallibility::Infallible)
+ .unwrap()
+ };
+ drop(table);
+ }
+
+ /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS`
+ /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES.
+ #[test]
+ fn test_drop_zero_items() {
+ use ::alloc::vec::Vec;
+ unsafe {
+ // SAFETY: The `buckets` is power of two and we're not
+ // trying to actually use the returned RawTable.
+ let table =
+ RawTable::<(u64, Vec<i32>)>::new_uninitialized(Global, 8, Fallibility::Infallible)
+ .unwrap();
+
+ // WE SIMULATE, AS IT WERE, A FULL TABLE.
+
+ // SAFETY: We checked that the table is allocated and therefore the table already has
+ // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for)
+ // so writing `table.table.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe.
+ table
+ .table
+ .ctrl(0)
+ .write_bytes(EMPTY, table.table.num_ctrl_bytes());
+
+ // SAFETY: table.capacity() is guaranteed to be smaller than table.buckets()
+ table.table.ctrl(0).write_bytes(0, table.capacity());
+
+ // Fix up the trailing control bytes. See the comments in set_ctrl
+ // for the handling of tables smaller than the group width.
+ if table.buckets() < Group::WIDTH {
+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes,
+ // so copying `self.buckets() == self.bucket_mask + 1` bytes with offset equal to
+ // `Group::WIDTH` is safe
+ table
+ .table
+ .ctrl(0)
+ .copy_to(table.table.ctrl(Group::WIDTH), table.table.buckets());
+ } else {
+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of
+ // control bytes,so copying `Group::WIDTH` bytes with offset equal
+ // to `self.buckets() == self.bucket_mask + 1` is safe
+ table
+ .table
+ .ctrl(0)
+ .copy_to(table.table.ctrl(table.table.buckets()), Group::WIDTH);
+ }
+ drop(table);
+ }
+ }
+
+ /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS`
+ /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES.
+ #[test]
+ fn test_catch_panic_clone_from() {
+ use ::alloc::sync::Arc;
+ use ::alloc::vec::Vec;
+ use allocator_api2::alloc::{AllocError, Allocator, Global};
+ use core::sync::atomic::{AtomicI8, Ordering};
+ use std::thread;
+
+ struct MyAllocInner {
+ drop_count: Arc<AtomicI8>,
+ }
+
+ #[derive(Clone)]
+ struct MyAlloc {
+ _inner: Arc<MyAllocInner>,
+ }
+
+ impl Drop for MyAllocInner {
+ fn drop(&mut self) {
+ println!("MyAlloc freed.");
+ self.drop_count.fetch_sub(1, Ordering::SeqCst);
+ }
+ }
+
+ unsafe impl Allocator for MyAlloc {
+ fn allocate(&self, layout: Layout) -> std::result::Result<NonNull<[u8]>, AllocError> {
+ let g = Global;
+ g.allocate(layout)
+ }
+
+ unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
+ let g = Global;
+ g.deallocate(ptr, layout)
+ }
+ }
+
+ const DISARMED: bool = false;
+ const ARMED: bool = true;
+
+ struct CheckedCloneDrop {
+ panic_in_clone: bool,
+ dropped: bool,
+ need_drop: Vec<u64>,
+ }
+
+ impl Clone for CheckedCloneDrop {
+ fn clone(&self) -> Self {
+ if self.panic_in_clone {
+ panic!("panic in clone")
+ }
+ Self {
+ panic_in_clone: self.panic_in_clone,
+ dropped: self.dropped,
+ need_drop: self.need_drop.clone(),
+ }
+ }
+ }
+
+ impl Drop for CheckedCloneDrop {
+ fn drop(&mut self) {
+ if self.dropped {
+ panic!("double drop");
+ }
+ self.dropped = true;
+ }
+ }
+
+ let dropped: Arc<AtomicI8> = Arc::new(AtomicI8::new(2));
+
+ let mut table = RawTable::new_in(MyAlloc {
+ _inner: Arc::new(MyAllocInner {
+ drop_count: dropped.clone(),
+ }),
+ });
+
+ for (idx, panic_in_clone) in core::iter::repeat(DISARMED).take(7).enumerate() {
+ let idx = idx as u64;
+ table.insert(
+ idx,
+ (
+ idx,
+ CheckedCloneDrop {
+ panic_in_clone,
+ dropped: false,
+ need_drop: vec![idx],
+ },
+ ),
+ |(k, _)| *k,
+ );
+ }
+
+ assert_eq!(table.len(), 7);
+
+ thread::scope(|s| {
+ let result = s.spawn(|| {
+ let armed_flags = [
+ DISARMED, DISARMED, ARMED, DISARMED, DISARMED, DISARMED, DISARMED,
+ ];
+ let mut scope_table = RawTable::new_in(MyAlloc {
+ _inner: Arc::new(MyAllocInner {
+ drop_count: dropped.clone(),
+ }),
+ });
+ for (idx, &panic_in_clone) in armed_flags.iter().enumerate() {
+ let idx = idx as u64;
+ scope_table.insert(
+ idx,
+ (
+ idx,
+ CheckedCloneDrop {
+ panic_in_clone,
+ dropped: false,
+ need_drop: vec![idx + 100],
+ },
+ ),
+ |(k, _)| *k,
+ );
+ }
+ table.clone_from(&scope_table);
+ });
+ assert!(result.join().is_err());
+ });
+
+ // Let's check that all iterators work fine and do not return elements
+ // (especially `RawIterRange`, which does not depend on the number of
+ // elements in the table, but looks directly at the control bytes)
+ //
+ // SAFETY: We know for sure that `RawTable` will outlive
+ // the returned `RawIter / RawIterRange` iterator.
+ assert_eq!(table.len(), 0);
+ assert_eq!(unsafe { table.iter().count() }, 0);
+ assert_eq!(unsafe { table.iter().iter.count() }, 0);
+
+ for idx in 0..table.buckets() {
+ let idx = idx as u64;
+ assert!(
+ table.find(idx, |(k, _)| *k == idx).is_none(),
+ "Index: {idx}"
+ );
+ }
+
+ // All allocator clones should already be dropped.
+ assert_eq!(dropped.load(Ordering::SeqCst), 1);
+ }
}