summaryrefslogtreecommitdiffstats
path: root/library/core/src/slice
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:19:41 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:19:41 +0000
commit4f9fe856a25ab29345b90e7725509e9ee38a37be (patch)
treee4ffd8a9374cae7b21f7cbfb352927e0e074aff6 /library/core/src/slice
parentAdding upstream version 1.68.2+dfsg1. (diff)
downloadrustc-4f9fe856a25ab29345b90e7725509e9ee38a37be.tar.xz
rustc-4f9fe856a25ab29345b90e7725509e9ee38a37be.zip
Adding upstream version 1.69.0+dfsg1.upstream/1.69.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/core/src/slice')
-rw-r--r--library/core/src/slice/cmp.rs27
-rw-r--r--library/core/src/slice/iter.rs4
-rw-r--r--library/core/src/slice/iter/macros.rs9
-rw-r--r--library/core/src/slice/memchr.rs15
-rw-r--r--library/core/src/slice/mod.rs70
-rw-r--r--library/core/src/slice/sort.rs526
6 files changed, 365 insertions, 286 deletions
diff --git a/library/core/src/slice/cmp.rs b/library/core/src/slice/cmp.rs
index 5e1b218e5..7601dd3c7 100644
--- a/library/core/src/slice/cmp.rs
+++ b/library/core/src/slice/cmp.rs
@@ -1,6 +1,6 @@
//! Comparison traits for `[T]`.
-use crate::cmp::{self, Ordering};
+use crate::cmp::{self, BytewiseEq, Ordering};
use crate::ffi;
use crate::mem;
@@ -77,7 +77,7 @@ where
// Use memcmp for bytewise equality when the types allow
impl<A, B> SlicePartialEq<B> for [A]
where
- A: BytewiseEquality<B>,
+ A: BytewiseEq<B>,
{
fn equal(&self, other: &[B]) -> bool {
if self.len() != other.len() {
@@ -203,29 +203,6 @@ impl SliceOrd for u8 {
}
}
-// Hack to allow specializing on `Eq` even though `Eq` has a method.
-#[rustc_unsafe_specialization_marker]
-trait MarkerEq<T>: PartialEq<T> {}
-
-impl<T: Eq> MarkerEq<T> for T {}
-
-#[doc(hidden)]
-/// Trait implemented for types that can be compared for equality using
-/// their bytewise representation
-#[rustc_specialization_trait]
-trait BytewiseEquality<T>: MarkerEq<T> + Copy {}
-
-macro_rules! impl_marker_for {
- ($traitname:ident, $($ty:ty)*) => {
- $(
- impl $traitname<$ty> for $ty { }
- )*
- }
-}
-
-impl_marker_for!(BytewiseEquality,
- u8 i8 u16 i16 u32 i32 u64 i64 u128 i128 usize isize char bool);
-
pub(super) trait SliceContains: Sized {
fn slice_contains(&self, x: &[Self]) -> bool;
}
diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs
index 90ab43d12..c4317799b 100644
--- a/library/core/src/slice/iter.rs
+++ b/library/core/src/slice/iter.rs
@@ -7,7 +7,9 @@ use crate::cmp;
use crate::cmp::Ordering;
use crate::fmt;
use crate::intrinsics::assume;
-use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
+use crate::iter::{
+ FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, UncheckedIterator,
+};
use crate::marker::{PhantomData, Send, Sized, Sync};
use crate::mem::{self, SizedTypeProperties};
use crate::num::NonZeroUsize;
diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs
index 0fd57b197..89b92a7d5 100644
--- a/library/core/src/slice/iter/macros.rs
+++ b/library/core/src/slice/iter/macros.rs
@@ -384,6 +384,15 @@ macro_rules! iterator {
#[unstable(feature = "trusted_len", issue = "37572")]
unsafe impl<T> TrustedLen for $name<'_, T> {}
+
+ impl<'a, T> UncheckedIterator for $name<'a, T> {
+ unsafe fn next_unchecked(&mut self) -> $elem {
+ // SAFETY: The caller promised there's at least one more item.
+ unsafe {
+ next_unchecked!(self)
+ }
+ }
+ }
}
}
diff --git a/library/core/src/slice/memchr.rs b/library/core/src/slice/memchr.rs
index c848c2e18..98c8349eb 100644
--- a/library/core/src/slice/memchr.rs
+++ b/library/core/src/slice/memchr.rs
@@ -16,25 +16,29 @@ const USIZE_BYTES: usize = mem::size_of::<usize>();
/// bytes where the borrow propagated all the way to the most significant
/// bit."
#[inline]
+#[rustc_const_stable(feature = "const_memchr", since = "1.65.0")]
const fn contains_zero_byte(x: usize) -> bool {
x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
}
-#[cfg(target_pointer_width = "16")]
#[inline]
+#[cfg(target_pointer_width = "16")]
+#[rustc_const_stable(feature = "const_memchr", since = "1.65.0")]
const fn repeat_byte(b: u8) -> usize {
(b as usize) << 8 | b as usize
}
-#[cfg(not(target_pointer_width = "16"))]
#[inline]
+#[cfg(not(target_pointer_width = "16"))]
+#[rustc_const_stable(feature = "const_memchr", since = "1.65.0")]
const fn repeat_byte(b: u8) -> usize {
(b as usize) * (usize::MAX / 255)
}
/// Returns the first index matching the byte `x` in `text`.
-#[must_use]
#[inline]
+#[must_use]
+#[rustc_const_stable(feature = "const_memchr", since = "1.65.0")]
pub const fn memchr(x: u8, text: &[u8]) -> Option<usize> {
// Fast path for small slices.
if text.len() < 2 * USIZE_BYTES {
@@ -45,6 +49,7 @@ pub const fn memchr(x: u8, text: &[u8]) -> Option<usize> {
}
#[inline]
+#[rustc_const_stable(feature = "const_memchr", since = "1.65.0")]
const fn memchr_naive(x: u8, text: &[u8]) -> Option<usize> {
let mut i = 0;
@@ -60,6 +65,10 @@ const fn memchr_naive(x: u8, text: &[u8]) -> Option<usize> {
None
}
+#[rustc_allow_const_fn_unstable(const_cmp)]
+#[rustc_allow_const_fn_unstable(const_slice_index)]
+#[rustc_allow_const_fn_unstable(const_align_offset)]
+#[rustc_const_stable(feature = "const_memchr", since = "1.65.0")]
const fn memchr_aligned(x: u8, text: &[u8]) -> Option<usize> {
// Scan for a single byte value by reading two `usize` words at a time.
//
diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs
index d93a3a57e..1cd86b445 100644
--- a/library/core/src/slice/mod.rs
+++ b/library/core/src/slice/mod.rs
@@ -805,8 +805,9 @@ impl<T> [T] {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
+ #[track_caller]
pub fn windows(&self, size: usize) -> Windows<'_, T> {
- let size = NonZeroUsize::new(size).expect("size is zero");
+ let size = NonZeroUsize::new(size).expect("window size must be non-zero");
Windows::new(self, size)
}
@@ -839,8 +840,9 @@ impl<T> [T] {
/// [`rchunks`]: slice::rchunks
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
+ #[track_caller]
pub fn chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
- assert_ne!(chunk_size, 0, "chunks cannot have a size of zero");
+ assert!(chunk_size != 0, "chunk size must be non-zero");
Chunks::new(self, chunk_size)
}
@@ -877,8 +879,9 @@ impl<T> [T] {
/// [`rchunks_mut`]: slice::rchunks_mut
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
+ #[track_caller]
pub fn chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
- assert_ne!(chunk_size, 0, "chunks cannot have a size of zero");
+ assert!(chunk_size != 0, "chunk size must be non-zero");
ChunksMut::new(self, chunk_size)
}
@@ -914,8 +917,9 @@ impl<T> [T] {
/// [`rchunks_exact`]: slice::rchunks_exact
#[stable(feature = "chunks_exact", since = "1.31.0")]
#[inline]
+ #[track_caller]
pub fn chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
- assert_ne!(chunk_size, 0, "chunks cannot have a size of zero");
+ assert!(chunk_size != 0, "chunk size must be non-zero");
ChunksExact::new(self, chunk_size)
}
@@ -956,8 +960,9 @@ impl<T> [T] {
/// [`rchunks_exact_mut`]: slice::rchunks_exact_mut
#[stable(feature = "chunks_exact", since = "1.31.0")]
#[inline]
+ #[track_caller]
pub fn chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
- assert_ne!(chunk_size, 0, "chunks cannot have a size of zero");
+ assert!(chunk_size != 0, "chunk size must be non-zero");
ChunksExactMut::new(self, chunk_size)
}
@@ -1037,9 +1042,10 @@ impl<T> [T] {
/// ```
#[unstable(feature = "slice_as_chunks", issue = "74985")]
#[inline]
+ #[track_caller]
#[must_use]
pub fn as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]) {
- assert_ne!(N, 0, "chunks cannot have a size of zero");
+ assert!(N != 0, "chunk size must be non-zero");
let len = self.len() / N;
let (multiple_of_n, remainder) = self.split_at(len * N);
// SAFETY: We already panicked for zero, and ensured by construction
@@ -1068,9 +1074,10 @@ impl<T> [T] {
/// ```
#[unstable(feature = "slice_as_chunks", issue = "74985")]
#[inline]
+ #[track_caller]
#[must_use]
pub fn as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
- assert_ne!(N, 0, "chunks cannot have a size of zero");
+ assert!(N != 0, "chunk size must be non-zero");
let len = self.len() / N;
let (remainder, multiple_of_n) = self.split_at(self.len() - len * N);
// SAFETY: We already panicked for zero, and ensured by construction
@@ -1108,8 +1115,9 @@ impl<T> [T] {
/// [`chunks_exact`]: slice::chunks_exact
#[unstable(feature = "array_chunks", issue = "74985")]
#[inline]
+ #[track_caller]
pub fn array_chunks<const N: usize>(&self) -> ArrayChunks<'_, T, N> {
- assert_ne!(N, 0, "chunks cannot have a size of zero");
+ assert!(N != 0, "chunk size must be non-zero");
ArrayChunks::new(self)
}
@@ -1186,9 +1194,10 @@ impl<T> [T] {
/// ```
#[unstable(feature = "slice_as_chunks", issue = "74985")]
#[inline]
+ #[track_caller]
#[must_use]
pub fn as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
- assert_ne!(N, 0, "chunks cannot have a size of zero");
+ assert!(N != 0, "chunk size must be non-zero");
let len = self.len() / N;
let (multiple_of_n, remainder) = self.split_at_mut(len * N);
// SAFETY: We already panicked for zero, and ensured by construction
@@ -1223,9 +1232,10 @@ impl<T> [T] {
/// ```
#[unstable(feature = "slice_as_chunks", issue = "74985")]
#[inline]
+ #[track_caller]
#[must_use]
pub fn as_rchunks_mut<const N: usize>(&mut self) -> (&mut [T], &mut [[T; N]]) {
- assert_ne!(N, 0, "chunks cannot have a size of zero");
+ assert!(N != 0, "chunk size must be non-zero");
let len = self.len() / N;
let (remainder, multiple_of_n) = self.split_at_mut(self.len() - len * N);
// SAFETY: We already panicked for zero, and ensured by construction
@@ -1265,8 +1275,9 @@ impl<T> [T] {
/// [`chunks_exact_mut`]: slice::chunks_exact_mut
#[unstable(feature = "array_chunks", issue = "74985")]
#[inline]
+ #[track_caller]
pub fn array_chunks_mut<const N: usize>(&mut self) -> ArrayChunksMut<'_, T, N> {
- assert_ne!(N, 0, "chunks cannot have a size of zero");
+ assert!(N != 0, "chunk size must be non-zero");
ArrayChunksMut::new(self)
}
@@ -1297,8 +1308,9 @@ impl<T> [T] {
/// [`windows`]: slice::windows
#[unstable(feature = "array_windows", issue = "75027")]
#[inline]
+ #[track_caller]
pub fn array_windows<const N: usize>(&self) -> ArrayWindows<'_, T, N> {
- assert_ne!(N, 0, "windows cannot have a size of zero");
+ assert!(N != 0, "window size must be non-zero");
ArrayWindows::new(self)
}
@@ -1331,8 +1343,9 @@ impl<T> [T] {
/// [`chunks`]: slice::chunks
#[stable(feature = "rchunks", since = "1.31.0")]
#[inline]
+ #[track_caller]
pub fn rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
- assert!(chunk_size != 0);
+ assert!(chunk_size != 0, "chunk size must be non-zero");
RChunks::new(self, chunk_size)
}
@@ -1369,8 +1382,9 @@ impl<T> [T] {
/// [`chunks_mut`]: slice::chunks_mut
#[stable(feature = "rchunks", since = "1.31.0")]
#[inline]
+ #[track_caller]
pub fn rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
- assert!(chunk_size != 0);
+ assert!(chunk_size != 0, "chunk size must be non-zero");
RChunksMut::new(self, chunk_size)
}
@@ -1408,8 +1422,9 @@ impl<T> [T] {
/// [`chunks_exact`]: slice::chunks_exact
#[stable(feature = "rchunks", since = "1.31.0")]
#[inline]
+ #[track_caller]
pub fn rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
- assert!(chunk_size != 0);
+ assert!(chunk_size != 0, "chunk size must be non-zero");
RChunksExact::new(self, chunk_size)
}
@@ -1451,8 +1466,9 @@ impl<T> [T] {
/// [`chunks_exact_mut`]: slice::chunks_exact_mut
#[stable(feature = "rchunks", since = "1.31.0")]
#[inline]
+ #[track_caller]
pub fn rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
- assert!(chunk_size != 0);
+ assert!(chunk_size != 0, "chunk size must be non-zero");
RChunksExactMut::new(self, chunk_size)
}
@@ -2714,8 +2730,10 @@ impl<T> [T] {
/// This reordering has the additional property that any value at position `i < index` will be
/// less than or equal to any value at a position `j > index`. Additionally, this reordering is
/// unstable (i.e. any number of equal elements may end up at position `index`), in-place
- /// (i.e. does not allocate), and *O*(*n*) worst-case. This function is also/ known as "kth
- /// element" in other libraries. It returns a triplet of the following from the reordered slice:
+ /// (i.e. does not allocate), and *O*(*n*) on average. The worst-case performance is *O*(*n* log *n*).
+ /// This function is also known as "kth element" in other libraries.
+ ///
+ /// It returns a triplet of the following from the reordered slice:
/// the subslice prior to `index`, the element at `index`, and the subslice after `index`;
/// accordingly, the values in those two subslices will respectively all be less-than-or-equal-to
/// and greater-than-or-equal-to the value of the element at `index`.
@@ -2761,8 +2779,11 @@ impl<T> [T] {
/// This reordering has the additional property that any value at position `i < index` will be
/// less than or equal to any value at a position `j > index` using the comparator function.
/// Additionally, this reordering is unstable (i.e. any number of equal elements may end up at
- /// position `index`), in-place (i.e. does not allocate), and *O*(*n*) worst-case. This function
- /// is also known as "kth element" in other libraries. It returns a triplet of the following from
+ /// position `index`), in-place (i.e. does not allocate), and *O*(*n*) on average.
+ /// The worst-case performance is *O*(*n* log *n*). This function is also known as
+ /// "kth element" in other libraries.
+ ///
+ /// It returns a triplet of the following from
/// the slice reordered according to the provided comparator function: the subslice prior to
/// `index`, the element at `index`, and the subslice after `index`; accordingly, the values in
/// those two subslices will respectively all be less-than-or-equal-to and greater-than-or-equal-to
@@ -2813,8 +2834,11 @@ impl<T> [T] {
/// This reordering has the additional property that any value at position `i < index` will be
/// less than or equal to any value at a position `j > index` using the key extraction function.
/// Additionally, this reordering is unstable (i.e. any number of equal elements may end up at
- /// position `index`), in-place (i.e. does not allocate), and *O*(*n*) worst-case. This function
- /// is also known as "kth element" in other libraries. It returns a triplet of the following from
+ /// position `index`), in-place (i.e. does not allocate), and *O*(*n*) on average.
+ /// The worst-case performance is *O*(*n* log *n*).
+ /// This function is also known as "kth element" in other libraries.
+ ///
+ /// It returns a triplet of the following from
/// the slice reordered according to the provided key extraction function: the subslice prior to
/// `index`, the element at `index`, and the subslice after `index`; accordingly, the values in
/// those two subslices will respectively all be less-than-or-equal-to and greater-than-or-equal-to
@@ -2931,7 +2955,7 @@ impl<T> [T] {
// This operation is still `O(n)`.
//
// Example: We start in this state, where `r` represents "next
- // read" and `w` represents "next_write`.
+ // read" and `w` represents "next_write".
//
// r
// +---+---+---+---+---+---+
diff --git a/library/core/src/slice/sort.rs b/library/core/src/slice/sort.rs
index 2181f9a81..2333f60a8 100644
--- a/library/core/src/slice/sort.rs
+++ b/library/core/src/slice/sort.rs
@@ -13,115 +13,184 @@ use crate::cmp;
use crate::mem::{self, MaybeUninit, SizedTypeProperties};
use crate::ptr;
-/// When dropped, copies from `src` into `dest`.
-struct CopyOnDrop<T> {
+// When dropped, copies from `src` into `dest`.
+struct InsertionHole<T> {
src: *const T,
dest: *mut T,
}
-impl<T> Drop for CopyOnDrop<T> {
+impl<T> Drop for InsertionHole<T> {
fn drop(&mut self) {
- // SAFETY: This is a helper class.
- // Please refer to its usage for correctness.
- // Namely, one must be sure that `src` and `dst` does not overlap as required by `ptr::copy_nonoverlapping`.
+ // SAFETY: This is a helper class. Please refer to its usage for correctness. Namely, one
+ // must be sure that `src` and `dst` does not overlap as required by
+ // `ptr::copy_nonoverlapping` and are both valid for writes.
unsafe {
ptr::copy_nonoverlapping(self.src, self.dest, 1);
}
}
}
-/// Shifts the first element to the right until it encounters a greater or equal element.
-fn shift_head<T, F>(v: &mut [T], is_less: &mut F)
+/// Inserts `v[v.len() - 1]` into pre-sorted sequence `v[..v.len() - 1]` so that whole `v[..]`
+/// becomes sorted.
+unsafe fn insert_tail<T, F>(v: &mut [T], is_less: &mut F)
where
F: FnMut(&T, &T) -> bool,
{
- let len = v.len();
- // SAFETY: The unsafe operations below involves indexing without a bounds check (by offsetting a
- // pointer) and copying memory (`ptr::copy_nonoverlapping`).
- //
- // a. Indexing:
- // 1. We checked the size of the array to >=2.
- // 2. All the indexing that we will do is always between {0 <= index < len} at most.
- //
- // b. Memory copying
- // 1. We are obtaining pointers to references which are guaranteed to be valid.
- // 2. They cannot overlap because we obtain pointers to difference indices of the slice.
- // Namely, `i` and `i-1`.
- // 3. If the slice is properly aligned, the elements are properly aligned.
- // It is the caller's responsibility to make sure the slice is properly aligned.
- //
- // See comments below for further detail.
+ debug_assert!(v.len() >= 2);
+
+ let arr_ptr = v.as_mut_ptr();
+ let i = v.len() - 1;
+
+ // SAFETY: caller must ensure v is at least len 2.
unsafe {
- // If the first two elements are out-of-order...
- if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) {
- // Read the first element into a stack-allocated variable. If a following comparison
- // operation panics, `hole` will get dropped and automatically write the element back
- // into the slice.
- let tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(0)));
- let v = v.as_mut_ptr();
- let mut hole = CopyOnDrop { src: &*tmp, dest: v.add(1) };
- ptr::copy_nonoverlapping(v.add(1), v.add(0), 1);
-
- for i in 2..len {
- if !is_less(&*v.add(i), &*tmp) {
+ // See insert_head which talks about why this approach is beneficial.
+ let i_ptr = arr_ptr.add(i);
+
+ // It's important that we use i_ptr here. If this check is positive and we continue,
+ // We want to make sure that no other copy of the value was seen by is_less.
+ // Otherwise we would have to copy it back.
+ if is_less(&*i_ptr, &*i_ptr.sub(1)) {
+ // It's important, that we use tmp for comparison from now on. As it is the value that
+ // will be copied back. And notionally we could have created a divergence if we copy
+ // back the wrong value.
+ let tmp = mem::ManuallyDrop::new(ptr::read(i_ptr));
+ // Intermediate state of the insertion process is always tracked by `hole`, which
+ // serves two purposes:
+ // 1. Protects integrity of `v` from panics in `is_less`.
+ // 2. Fills the remaining hole in `v` in the end.
+ //
+ // Panic safety:
+ //
+ // If `is_less` panics at any point during the process, `hole` will get dropped and
+ // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
+ // initially held exactly once.
+ let mut hole = InsertionHole { src: &*tmp, dest: i_ptr.sub(1) };
+ ptr::copy_nonoverlapping(hole.dest, i_ptr, 1);
+
+ // SAFETY: We know i is at least 1.
+ for j in (0..(i - 1)).rev() {
+ let j_ptr = arr_ptr.add(j);
+ if !is_less(&*tmp, &*j_ptr) {
break;
}
- // Move `i`-th element one place to the left, thus shifting the hole to the right.
- ptr::copy_nonoverlapping(v.add(i), v.add(i - 1), 1);
- hole.dest = v.add(i);
+ ptr::copy_nonoverlapping(j_ptr, hole.dest, 1);
+ hole.dest = j_ptr;
}
// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
}
}
}
-/// Shifts the last element to the left until it encounters a smaller or equal element.
-fn shift_tail<T, F>(v: &mut [T], is_less: &mut F)
+/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
+///
+/// This is the integral subroutine of insertion sort.
+unsafe fn insert_head<T, F>(v: &mut [T], is_less: &mut F)
where
F: FnMut(&T, &T) -> bool,
{
- let len = v.len();
- // SAFETY: The unsafe operations below involves indexing without a bound check (by offsetting a
- // pointer) and copying memory (`ptr::copy_nonoverlapping`).
- //
- // a. Indexing:
- // 1. We checked the size of the array to >= 2.
- // 2. All the indexing that we will do is always between `0 <= index < len-1` at most.
- //
- // b. Memory copying
- // 1. We are obtaining pointers to references which are guaranteed to be valid.
- // 2. They cannot overlap because we obtain pointers to difference indices of the slice.
- // Namely, `i` and `i+1`.
- // 3. If the slice is properly aligned, the elements are properly aligned.
- // It is the caller's responsibility to make sure the slice is properly aligned.
- //
- // See comments below for further detail.
+ debug_assert!(v.len() >= 2);
+
+ // SAFETY: caller must ensure v is at least len 2.
unsafe {
- // If the last two elements are out-of-order...
- if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) {
- // Read the last element into a stack-allocated variable. If a following comparison
- // operation panics, `hole` will get dropped and automatically write the element back
- // into the slice.
- let tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(len - 1)));
- let v = v.as_mut_ptr();
- let mut hole = CopyOnDrop { src: &*tmp, dest: v.add(len - 2) };
- ptr::copy_nonoverlapping(v.add(len - 2), v.add(len - 1), 1);
-
- for i in (0..len - 2).rev() {
- if !is_less(&*tmp, &*v.add(i)) {
+ if is_less(v.get_unchecked(1), v.get_unchecked(0)) {
+ let arr_ptr = v.as_mut_ptr();
+
+ // There are three ways to implement insertion here:
+ //
+ // 1. Swap adjacent elements until the first one gets to its final destination.
+ // However, this way we copy data around more than is necessary. If elements are big
+ // structures (costly to copy), this method will be slow.
+ //
+ // 2. Iterate until the right place for the first element is found. Then shift the
+ // elements succeeding it to make room for it and finally place it into the
+ // remaining hole. This is a good method.
+ //
+ // 3. Copy the first element into a temporary variable. Iterate until the right place
+ // for it is found. As we go along, copy every traversed element into the slot
+ // preceding it. Finally, copy data from the temporary variable into the remaining
+ // hole. This method is very good. Benchmarks demonstrated slightly better
+ // performance than with the 2nd method.
+ //
+ // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
+ let tmp = mem::ManuallyDrop::new(ptr::read(arr_ptr));
+
+ // Intermediate state of the insertion process is always tracked by `hole`, which
+ // serves two purposes:
+ // 1. Protects integrity of `v` from panics in `is_less`.
+ // 2. Fills the remaining hole in `v` in the end.
+ //
+ // Panic safety:
+ //
+ // If `is_less` panics at any point during the process, `hole` will get dropped and
+ // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
+ // initially held exactly once.
+ let mut hole = InsertionHole { src: &*tmp, dest: arr_ptr.add(1) };
+ ptr::copy_nonoverlapping(arr_ptr.add(1), arr_ptr.add(0), 1);
+
+ for i in 2..v.len() {
+ if !is_less(&v.get_unchecked(i), &*tmp) {
break;
}
-
- // Move `i`-th element one place to the right, thus shifting the hole to the left.
- ptr::copy_nonoverlapping(v.add(i), v.add(i + 1), 1);
- hole.dest = v.add(i);
+ ptr::copy_nonoverlapping(arr_ptr.add(i), arr_ptr.add(i - 1), 1);
+ hole.dest = arr_ptr.add(i);
}
// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
}
}
}
+/// Sort `v` assuming `v[..offset]` is already sorted.
+///
+/// Never inline this function to avoid code bloat. It still optimizes nicely and has practically no
+/// performance impact. Even improving performance in some cases.
+#[inline(never)]
+fn insertion_sort_shift_left<T, F>(v: &mut [T], offset: usize, is_less: &mut F)
+where
+ F: FnMut(&T, &T) -> bool,
+{
+ let len = v.len();
+
+ // Using assert here improves performance.
+ assert!(offset != 0 && offset <= len);
+
+ // Shift each element of the unsorted region v[i..] as far left as is needed to make v sorted.
+ for i in offset..len {
+ // SAFETY: we tested that `offset` must be at least 1, so this loop is only entered if len
+ // >= 2. The range is exclusive and we know `i` must be at least 1 so this slice has at
+ // >least len 2.
+ unsafe {
+ insert_tail(&mut v[..=i], is_less);
+ }
+ }
+}
+
+/// Sort `v` assuming `v[offset..]` is already sorted.
+///
+/// Never inline this function to avoid code bloat. It still optimizes nicely and has practically no
+/// performance impact. Even improving performance in some cases.
+#[inline(never)]
+fn insertion_sort_shift_right<T, F>(v: &mut [T], offset: usize, is_less: &mut F)
+where
+ F: FnMut(&T, &T) -> bool,
+{
+ let len = v.len();
+
+ // Using assert here improves performance.
+ assert!(offset != 0 && offset <= len && len >= 2);
+
+ // Shift each element of the unsorted region v[..i] as far left as is needed to make v sorted.
+ for i in (0..offset).rev() {
+ // SAFETY: we tested that `offset` must be at least 1, so this loop is only entered if len
+ // >= 2.We ensured that the slice length is always at least 2 long. We know that start_found
+ // will be at least one less than end, and the range is exclusive. Which gives us i always
+ // <= (end - 2).
+ unsafe {
+ insert_head(&mut v[i..len], is_less);
+ }
+ }
+}
+
/// Partially sorts a slice by shifting several out-of-order elements around.
///
/// Returns `true` if the slice is sorted at the end. This function is *O*(*n*) worst-case.
@@ -161,26 +230,19 @@ where
// Swap the found pair of elements. This puts them in correct order.
v.swap(i - 1, i);
- // Shift the smaller element to the left.
- shift_tail(&mut v[..i], is_less);
- // Shift the greater element to the right.
- shift_head(&mut v[i..], is_less);
+ if i >= 2 {
+ // Shift the smaller element to the left.
+ insertion_sort_shift_left(&mut v[..i], i - 1, is_less);
+
+ // Shift the greater element to the right.
+ insertion_sort_shift_right(&mut v[..i], 1, is_less);
+ }
}
// Didn't manage to sort the slice in the limited number of steps.
false
}
-/// Sorts a slice using insertion sort, which is *O*(*n*^2) worst-case.
-fn insertion_sort<T, F>(v: &mut [T], is_less: &mut F)
-where
- F: FnMut(&T, &T) -> bool,
-{
- for i in 1..v.len() {
- shift_tail(&mut v[..i + 1], is_less);
- }
-}
-
/// Sorts `v` using heapsort, which guarantees *O*(*n* \* log(*n*)) worst-case.
#[cold]
#[unstable(feature = "sort_internals", reason = "internal to sort module", issue = "none")]
@@ -198,8 +260,11 @@ where
}
// Choose the greater child.
- if child + 1 < v.len() && is_less(&v[child], &v[child + 1]) {
- child += 1;
+ if child + 1 < v.len() {
+ // We need a branch to be sure not to out-of-bounds index,
+ // but it's highly predictable. The comparison, however,
+ // is better done branchless, especially for primitives.
+ child += is_less(&v[child], &v[child + 1]) as usize;
}
// Stop if the invariant holds at `node`.
@@ -252,7 +317,7 @@ where
// 1. `block` - Number of elements in the block.
// 2. `start` - Start pointer into the `offsets` array.
// 3. `end` - End pointer into the `offsets` array.
- // 4. `offsets - Indices of out-of-order elements within the block.
+ // 4. `offsets` - Indices of out-of-order elements within the block.
// The current block on the left side (from `l` to `l.add(block_l)`).
let mut l = v.as_mut_ptr();
@@ -262,7 +327,7 @@ where
let mut offsets_l = [MaybeUninit::<u8>::uninit(); BLOCK];
// The current block on the right side (from `r.sub(block_r)` to `r`).
- // SAFETY: The documentation for .add() specifically mention that `vec.as_ptr().add(vec.len())` is always safe`
+ // SAFETY: The documentation for .add() specifically mention that `vec.as_ptr().add(vec.len())` is always safe
let mut r = unsafe { l.add(v.len()) };
let mut block_r = BLOCK;
let mut start_r = ptr::null_mut();
@@ -507,7 +572,7 @@ where
// SAFETY: `pivot` is a reference to the first element of `v`, so `ptr::read` is safe.
let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
- let _pivot_guard = CopyOnDrop { src: &*tmp, dest: pivot };
+ let _pivot_guard = InsertionHole { src: &*tmp, dest: pivot };
let pivot = &*tmp;
// Find the first pair of out-of-order elements.
@@ -560,7 +625,7 @@ where
// operation panics, the pivot will be automatically written back into the slice.
// SAFETY: The pointer here is valid because it is obtained from a reference to a slice.
let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
- let _pivot_guard = CopyOnDrop { src: &*tmp, dest: pivot };
+ let _pivot_guard = InsertionHole { src: &*tmp, dest: pivot };
let pivot = &*tmp;
// Now partition the slice.
@@ -608,19 +673,23 @@ where
fn break_patterns<T>(v: &mut [T]) {
let len = v.len();
if len >= 8 {
- // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
- let mut random = len as u32;
- let mut gen_u32 = || {
- random ^= random << 13;
- random ^= random >> 17;
- random ^= random << 5;
- random
- };
+ let mut seed = len;
let mut gen_usize = || {
+ // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
if usize::BITS <= 32 {
- gen_u32() as usize
+ let mut r = seed as u32;
+ r ^= r << 13;
+ r ^= r >> 17;
+ r ^= r << 5;
+ seed = r as usize;
+ seed
} else {
- (((gen_u32() as u64) << 32) | (gen_u32() as u64)) as usize
+ let mut r = seed as u64;
+ r ^= r << 13;
+ r ^= r >> 7;
+ r ^= r << 17;
+ seed = r as usize;
+ seed
}
};
@@ -742,7 +811,9 @@ where
// Very short slices get sorted using insertion sort.
if len <= MAX_INSERTION {
- insertion_sort(v, is_less);
+ if len >= 2 {
+ insertion_sort_shift_left(v, 1, is_less);
+ }
return;
}
@@ -844,10 +915,14 @@ fn partition_at_index_loop<'a, T, F>(
let mut was_balanced = true;
loop {
+ let len = v.len();
+
// For slices of up to this length it's probably faster to simply sort them.
const MAX_INSERTION: usize = 10;
- if v.len() <= MAX_INSERTION {
- insertion_sort(v, is_less);
+ if len <= MAX_INSERTION {
+ if len >= 2 {
+ insertion_sort_shift_left(v, 1, is_less);
+ }
return;
}
@@ -887,7 +962,7 @@ fn partition_at_index_loop<'a, T, F>(
}
let (mid, _) = partition(v, pivot, is_less);
- was_balanced = cmp::min(mid, v.len() - mid) >= v.len() / 8;
+ was_balanced = cmp::min(mid, len - mid) >= len / 8;
// Split the slice into `left`, `pivot`, and `right`.
let (left, right) = v.split_at_mut(mid);
@@ -954,75 +1029,6 @@ where
(left, pivot, right)
}
-/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
-///
-/// This is the integral subroutine of insertion sort.
-fn insert_head<T, F>(v: &mut [T], is_less: &mut F)
-where
- F: FnMut(&T, &T) -> bool,
-{
- if v.len() >= 2 && is_less(&v[1], &v[0]) {
- // SAFETY: Copy tmp back even if panic, and ensure unique observation.
- unsafe {
- // There are three ways to implement insertion here:
- //
- // 1. Swap adjacent elements until the first one gets to its final destination.
- // However, this way we copy data around more than is necessary. If elements are big
- // structures (costly to copy), this method will be slow.
- //
- // 2. Iterate until the right place for the first element is found. Then shift the
- // elements succeeding it to make room for it and finally place it into the
- // remaining hole. This is a good method.
- //
- // 3. Copy the first element into a temporary variable. Iterate until the right place
- // for it is found. As we go along, copy every traversed element into the slot
- // preceding it. Finally, copy data from the temporary variable into the remaining
- // hole. This method is very good. Benchmarks demonstrated slightly better
- // performance than with the 2nd method.
- //
- // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
- let tmp = mem::ManuallyDrop::new(ptr::read(&v[0]));
-
- // Intermediate state of the insertion process is always tracked by `hole`, which
- // serves two purposes:
- // 1. Protects integrity of `v` from panics in `is_less`.
- // 2. Fills the remaining hole in `v` in the end.
- //
- // Panic safety:
- //
- // If `is_less` panics at any point during the process, `hole` will get dropped and
- // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
- // initially held exactly once.
- let mut hole = InsertionHole { src: &*tmp, dest: &mut v[1] };
- ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
-
- for i in 2..v.len() {
- if !is_less(&v[i], &*tmp) {
- break;
- }
- ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
- hole.dest = &mut v[i];
- }
- // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
- }
- }
-
- // When dropped, copies from `src` into `dest`.
- struct InsertionHole<T> {
- src: *const T,
- dest: *mut T,
- }
-
- impl<T> Drop for InsertionHole<T> {
- fn drop(&mut self) {
- // SAFETY: The caller must ensure that src and dest are correctly set.
- unsafe {
- ptr::copy_nonoverlapping(self.src, self.dest, 1);
- }
- }
- }
-}
-
/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
/// stores the result into `v[..]`.
///
@@ -1180,8 +1186,6 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
{
// Slices of up to this length get sorted using insertion sort.
const MAX_INSERTION: usize = 20;
- // Very short runs are extended using insertion sort to span at least this many elements.
- const MIN_RUN: usize = 10;
// The caller should have already checked that.
debug_assert!(!T::IS_ZST);
@@ -1191,9 +1195,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
// Short arrays get sorted in-place via insertion sort to avoid allocations.
if len <= MAX_INSERTION {
if len >= 2 {
- for i in (0..len - 1).rev() {
- insert_head(&mut v[i..], is_less);
- }
+ insertion_sort_shift_left(v, 1, is_less);
}
return;
}
@@ -1203,59 +1205,43 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
// `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
// which will always have length at most `len / 2`.
let buf = BufGuard::new(len / 2, elem_alloc_fn, elem_dealloc_fn);
- let buf_ptr = buf.buf_ptr;
+ let buf_ptr = buf.buf_ptr.as_ptr();
let mut runs = RunVec::new(run_alloc_fn, run_dealloc_fn);
- // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
- // strange decision, but consider the fact that merges more often go in the opposite direction
- // (forwards). According to benchmarks, merging forwards is slightly faster than merging
- // backwards. To conclude, identifying runs by traversing backwards improves performance.
- let mut end = len;
- while end > 0 {
- // Find the next natural run, and reverse it if it's strictly descending.
- let mut start = end - 1;
- if start > 0 {
- start -= 1;
-
- // SAFETY: The v.get_unchecked must be fed with correct inbound indicies.
- unsafe {
- if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
- while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
- start -= 1;
- }
- v[start..end].reverse();
- } else {
- while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1))
- {
- start -= 1;
- }
- }
- }
+ let mut end = 0;
+ let mut start = 0;
+
+ // Scan forward. Memory pre-fetching prefers forward scanning vs backwards scanning, and the
+ // code-gen is usually better. For the most sensitive types such as integers, these are merged
+ // bidirectionally at once. So there is no benefit in scanning backwards.
+ while end < len {
+ let (streak_end, was_reversed) = find_streak(&v[start..], is_less);
+ end += streak_end;
+ if was_reversed {
+ v[start..end].reverse();
}
// Insert some more elements into the run if it's too short. Insertion sort is faster than
// merge sort on short sequences, so this significantly improves performance.
- while start > 0 && end - start < MIN_RUN {
- start -= 1;
- insert_head(&mut v[start..end], is_less);
- }
+ end = provide_sorted_batch(v, start, end, is_less);
// Push this run onto the stack.
runs.push(TimSortRun { start, len: end - start });
- end = start;
+ start = end;
// Merge some pairs of adjacent runs to satisfy the invariants.
- while let Some(r) = collapse(runs.as_slice()) {
- let left = runs[r + 1];
- let right = runs[r];
+ while let Some(r) = collapse(runs.as_slice(), len) {
+ let left = runs[r];
+ let right = runs[r + 1];
+ let merge_slice = &mut v[left.start..right.start + right.len];
// SAFETY: `buf_ptr` must hold enough capacity for the shorter of the two sides, and
// neither side may be on length 0.
unsafe {
- merge(&mut v[left.start..right.start + right.len], left.len, buf_ptr, is_less);
+ merge(merge_slice, left.len, buf_ptr, is_less);
}
- runs[r] = TimSortRun { start: left.start, len: left.len + right.len };
- runs.remove(r + 1);
+ runs[r + 1] = TimSortRun { start: left.start, len: left.len + right.len };
+ runs.remove(r);
}
}
@@ -1277,10 +1263,10 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
// run starts at index 0, it will always demand a merge operation until the stack is fully
// collapsed, in order to complete the sort.
#[inline]
- fn collapse(runs: &[TimSortRun]) -> Option<usize> {
+ fn collapse(runs: &[TimSortRun], stop: usize) -> Option<usize> {
let n = runs.len();
if n >= 2
- && (runs[n - 1].start == 0
+ && (runs[n - 1].start + runs[n - 1].len == stop
|| runs[n - 2].len <= runs[n - 1].len
|| (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
|| (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
@@ -1298,7 +1284,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
where
ElemDeallocF: Fn(*mut T, usize),
{
- buf_ptr: *mut T,
+ buf_ptr: ptr::NonNull<T>,
capacity: usize,
elem_dealloc_fn: ElemDeallocF,
}
@@ -1315,7 +1301,11 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
where
ElemAllocF: Fn(usize) -> *mut T,
{
- Self { buf_ptr: elem_alloc_fn(len), capacity: len, elem_dealloc_fn }
+ Self {
+ buf_ptr: ptr::NonNull::new(elem_alloc_fn(len)).unwrap(),
+ capacity: len,
+ elem_dealloc_fn,
+ }
}
}
@@ -1324,7 +1314,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
ElemDeallocF: Fn(*mut T, usize),
{
fn drop(&mut self) {
- (self.elem_dealloc_fn)(self.buf_ptr, self.capacity);
+ (self.elem_dealloc_fn)(self.buf_ptr.as_ptr(), self.capacity);
}
}
@@ -1333,7 +1323,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
RunAllocF: Fn(usize) -> *mut TimSortRun,
RunDeallocF: Fn(*mut TimSortRun, usize),
{
- buf_ptr: *mut TimSortRun,
+ buf_ptr: ptr::NonNull<TimSortRun>,
capacity: usize,
len: usize,
run_alloc_fn: RunAllocF,
@@ -1350,7 +1340,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
const START_RUN_CAPACITY: usize = 16;
Self {
- buf_ptr: run_alloc_fn(START_RUN_CAPACITY),
+ buf_ptr: ptr::NonNull::new(run_alloc_fn(START_RUN_CAPACITY)).unwrap(),
capacity: START_RUN_CAPACITY,
len: 0,
run_alloc_fn,
@@ -1361,15 +1351,15 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
fn push(&mut self, val: TimSortRun) {
if self.len == self.capacity {
let old_capacity = self.capacity;
- let old_buf_ptr = self.buf_ptr;
+ let old_buf_ptr = self.buf_ptr.as_ptr();
self.capacity = self.capacity * 2;
- self.buf_ptr = (self.run_alloc_fn)(self.capacity);
+ self.buf_ptr = ptr::NonNull::new((self.run_alloc_fn)(self.capacity)).unwrap();
// SAFETY: buf_ptr new and old were correctly allocated and old_buf_ptr has
// old_capacity valid elements.
unsafe {
- ptr::copy_nonoverlapping(old_buf_ptr, self.buf_ptr, old_capacity);
+ ptr::copy_nonoverlapping(old_buf_ptr, self.buf_ptr.as_ptr(), old_capacity);
}
(self.run_dealloc_fn)(old_buf_ptr, old_capacity);
@@ -1377,7 +1367,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
// SAFETY: The invariant was just checked.
unsafe {
- self.buf_ptr.add(self.len).write(val);
+ self.buf_ptr.as_ptr().add(self.len).write(val);
}
self.len += 1;
}
@@ -1390,7 +1380,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
// SAFETY: buf_ptr needs to be valid and len invariant upheld.
unsafe {
// the place we are taking from.
- let ptr = self.buf_ptr.add(index);
+ let ptr = self.buf_ptr.as_ptr().add(index);
// Shift everything down to fill in that spot.
ptr::copy(ptr.add(1), ptr, self.len - index - 1);
@@ -1400,7 +1390,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
fn as_slice(&self) -> &[TimSortRun] {
// SAFETY: Safe as long as buf_ptr is valid and len invariant was upheld.
- unsafe { &*ptr::slice_from_raw_parts(self.buf_ptr, self.len) }
+ unsafe { &*ptr::slice_from_raw_parts(self.buf_ptr.as_ptr(), self.len) }
}
fn len(&self) -> usize {
@@ -1419,7 +1409,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
if index < self.len {
// SAFETY: buf_ptr and len invariant must be upheld.
unsafe {
- return &*(self.buf_ptr.add(index));
+ return &*(self.buf_ptr.as_ptr().add(index));
}
}
@@ -1436,7 +1426,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
if index < self.len {
// SAFETY: buf_ptr and len invariant must be upheld.
unsafe {
- return &mut *(self.buf_ptr.add(index));
+ return &mut *(self.buf_ptr.as_ptr().add(index));
}
}
@@ -1452,7 +1442,7 @@ pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
fn drop(&mut self) {
// As long as TimSortRun is Copy we don't need to drop them individually but just the
// whole allocation.
- (self.run_dealloc_fn)(self.buf_ptr, self.capacity);
+ (self.run_dealloc_fn)(self.buf_ptr.as_ptr(), self.capacity);
}
}
}
@@ -1463,3 +1453,71 @@ pub struct TimSortRun {
len: usize,
start: usize,
}
+
+/// Takes a range as denoted by start and end, that is already sorted and extends it to the right if
+/// necessary with sorts optimized for smaller ranges such as insertion sort.
+#[cfg(not(no_global_oom_handling))]
+fn provide_sorted_batch<T, F>(v: &mut [T], start: usize, mut end: usize, is_less: &mut F) -> usize
+where
+ F: FnMut(&T, &T) -> bool,
+{
+ let len = v.len();
+ assert!(end >= start && end <= len);
+
+ // This value is a balance between least comparisons and best performance, as
+ // influenced by for example cache locality.
+ const MIN_INSERTION_RUN: usize = 10;
+
+ // Insert some more elements into the run if it's too short. Insertion sort is faster than
+ // merge sort on short sequences, so this significantly improves performance.
+ let start_end_diff = end - start;
+
+ if start_end_diff < MIN_INSERTION_RUN && end < len {
+ // v[start_found..end] are elements that are already sorted in the input. We want to extend
+ // the sorted region to the left, so we push up MIN_INSERTION_RUN - 1 to the right. Which is
+ // more efficient that trying to push those already sorted elements to the left.
+ end = cmp::min(start + MIN_INSERTION_RUN, len);
+ let presorted_start = cmp::max(start_end_diff, 1);
+
+ insertion_sort_shift_left(&mut v[start..end], presorted_start, is_less);
+ }
+
+ end
+}
+
+/// Finds a streak of presorted elements starting at the beginning of the slice. Returns the first
+/// value that is not part of said streak, and a bool denoting wether the streak was reversed.
+/// Streaks can be increasing or decreasing.
+fn find_streak<T, F>(v: &[T], is_less: &mut F) -> (usize, bool)
+where
+ F: FnMut(&T, &T) -> bool,
+{
+ let len = v.len();
+
+ if len < 2 {
+ return (len, false);
+ }
+
+ let mut end = 2;
+
+ // SAFETY: See below specific.
+ unsafe {
+ // SAFETY: We checked that len >= 2, so 0 and 1 are valid indices.
+ let assume_reverse = is_less(v.get_unchecked(1), v.get_unchecked(0));
+
+ // SAFETY: We know end >= 2 and check end < len.
+ // From that follows that accessing v at end and end - 1 is safe.
+ if assume_reverse {
+ while end < len && is_less(v.get_unchecked(end), v.get_unchecked(end - 1)) {
+ end += 1;
+ }
+
+ (end, true)
+ } else {
+ while end < len && !is_less(v.get_unchecked(end), v.get_unchecked(end - 1)) {
+ end += 1;
+ }
+ (end, false)
+ }
+ }
+}