Merging upstream version 1.68.2+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:19:13 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:19:13 +0000
commit: 218caa410aa38c29984be31a5229b9fa717560ee (patch)
tree: c54bd55eeb6e4c508940a30e94c0032fbd45d677 /library/core/src/slice
parent: Releasing progress-linux version 1.67.1+dfsg1-1~progress7.99u1. (diff)
download: rustc-218caa410aa38c29984be31a5229b9fa717560ee.tar.xz
rustc-218caa410aa38c29984be31a5229b9fa717560ee.zip
4 files changed, 596 insertions, 43 deletions
diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs
index 062289767..90ab43d12 100644
--- a/library/core/src/slice/iter.rs
+++ b/library/core/src/slice/iter.rs
@@ -6,7 +6,7 @@ mod macros;
 use crate::cmp;
 use crate::cmp::Ordering;
 use crate::fmt;
-use crate::intrinsics::{assume, exact_div, unchecked_sub};
+use crate::intrinsics::assume;
 use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
 use crate::marker::{PhantomData, Send, Sized, Sync};
 use crate::mem::{self, SizedTypeProperties};
@@ -35,12 +35,6 @@ impl<'a, T> IntoIterator for &'a mut [T] {
     }
 }
 
-// Macro helper functions
-#[inline(always)]
-fn size_from_ptr<T>(_: *const T) -> usize {
-    mem::size_of::<T>()
-}
-
 /// Immutable slice iterator
 ///
 /// This struct is created by the [`iter`] method on [slices].
@@ -65,7 +59,7 @@ fn size_from_ptr<T>(_: *const T) -> usize {
 #[must_use = "iterators are lazy and do nothing unless consumed"]
 pub struct Iter<'a, T: 'a> {
     ptr: NonNull<T>,
-    end: *const T, // If T is a ZST, this is actually ptr+len.  This encoding is picked so that
+    end: *const T, // If T is a ZST, this is actually ptr+len. This encoding is picked so that
     // ptr == end is a quick test for the Iterator being empty, that works
     // for both ZST and non-ZST.
     _marker: PhantomData<&'a T>,
@@ -186,7 +180,7 @@ impl<T> AsRef<[T]> for Iter<'_, T> {
 #[must_use = "iterators are lazy and do nothing unless consumed"]
 pub struct IterMut<'a, T: 'a> {
     ptr: NonNull<T>,
-    end: *mut T, // If T is a ZST, this is actually ptr+len.  This encoding is picked so that
+    end: *mut T, // If T is a ZST, this is actually ptr+len. This encoding is picked so that
     // ptr == end is a quick test for the Iterator being empty, that works
     // for both ZST and non-ZST.
     _marker: PhantomData<&'a mut T>,
diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs
index ce51d48e3..0fd57b197 100644
--- a/library/core/src/slice/iter/macros.rs
+++ b/library/core/src/slice/iter/macros.rs
@@ -9,30 +9,20 @@ macro_rules! is_empty {
     };
 }
 
-// To get rid of some bounds checks (see `position`), we compute the length in a somewhat
-// unexpected way. (Tested by `codegen/slice-position-bounds-check`.)
 macro_rules! len {
     ($self: ident) => {{
         #![allow(unused_unsafe)] // we're sometimes used within an unsafe block
 
         let start = $self.ptr;
-        let size = size_from_ptr(start.as_ptr());
-        if size == 0 {
-            // This _cannot_ use `unchecked_sub` because we depend on wrapping
+        if T::IS_ZST {
+            // This _cannot_ use `ptr_sub` because we depend on wrapping
             // to represent the length of long ZST slice iterators.
             $self.end.addr().wrapping_sub(start.as_ptr().addr())
         } else {
-            // We know that `start <= end`, so can do better than `offset_from`,
-            // which needs to deal in signed.  By setting appropriate flags here
-            // we can tell LLVM this, which helps it remove bounds checks.
-            // SAFETY: By the type invariant, `start <= end`
-            let diff = unsafe { unchecked_sub($self.end.addr(), start.as_ptr().addr()) };
-            // By also telling LLVM that the pointers are apart by an exact
-            // multiple of the type size, it can optimize `len() == 0` down to
-            // `start == end` instead of `(end - start) < size`.
-            // SAFETY: By the type invariant, the pointers are aligned so the
-            //         distance between them must be a multiple of pointee size
-            unsafe { exact_div(diff, size) }
+            // To get rid of some bounds checks (see `position`), we use ptr_sub instead of
+            // offset_from (Tested by `codegen/slice-position-bounds-check`.)
+            // SAFETY: by the type invariant pointers are aligned and `start <= end`
+            unsafe { $self.end.sub_ptr(start.as_ptr()) }
         }
     }};
 }
diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs
index d9281a925..d93a3a57e 100644
--- a/library/core/src/slice/mod.rs
+++ b/library/core/src/slice/mod.rs
@@ -29,13 +29,19 @@ use crate::slice;
 /// Pure rust memchr implementation, taken from rust-memchr
 pub mod memchr;
 
+#[unstable(
+    feature = "slice_internals",
+    issue = "none",
+    reason = "exposed from core to be reused in std;"
+)]
+pub mod sort;
+
 mod ascii;
 mod cmp;
 mod index;
 mod iter;
 mod raw;
 mod rotate;
-mod sort;
 mod specialize;
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -703,7 +709,7 @@ impl<T> [T] {
 
             // Because this function is first compiled in isolation,
             // this check tells LLVM that the indexing below is
-            // in-bounds.  Then after inlining -- once the actual
+            // in-bounds. Then after inlining -- once the actual
             // lengths of the slices are known -- it's removed.
             let (a, b) = (&mut a[..n], &mut b[..n]);
 
@@ -781,6 +787,22 @@ impl<T> [T] {
     /// let mut iter = slice.windows(4);
     /// assert!(iter.next().is_none());
     /// ```
+    ///
+    /// There's no `windows_mut`, as that existing would let safe code violate the
+    /// "only one `&mut` at a time to the same thing" rule.  However, you can sometimes
+    /// use [`Cell::as_slice_of_cells`](crate::cell::Cell::as_slice_of_cells) in
+    /// conjunction with `windows` to accomplish something similar:
+    /// ```
+    /// use std::cell::Cell;
+    ///
+    /// let mut array = ['R', 'u', 's', 't', ' ', '2', '0', '1', '5'];
+    /// let slice = &mut array[..];
+    /// let slice_of_cells: &[Cell<char>] = Cell::from_mut(slice).as_slice_of_cells();
+    /// for w in slice_of_cells.windows(3) {
+    ///     Cell::swap(&w[0], &w[2]);
+    /// }
+    /// assert_eq!(array, ['s', 't', ' ', '2', '0', '1', '5', 'u', 'R']);
+    /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn windows(&self, size: usize) -> Windows<'_, T> {
@@ -893,7 +915,7 @@ impl<T> [T] {
     #[stable(feature = "chunks_exact", since = "1.31.0")]
     #[inline]
     pub fn chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
-        assert_ne!(chunk_size, 0);
+        assert_ne!(chunk_size, 0, "chunks cannot have a size of zero");
         ChunksExact::new(self, chunk_size)
     }
 
@@ -935,7 +957,7 @@ impl<T> [T] {
     #[stable(feature = "chunks_exact", since = "1.31.0")]
     #[inline]
     pub fn chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
-        assert_ne!(chunk_size, 0);
+        assert_ne!(chunk_size, 0, "chunks cannot have a size of zero");
         ChunksExactMut::new(self, chunk_size)
     }
 
@@ -1002,11 +1024,22 @@ impl<T> [T] {
     /// assert_eq!(chunks, &[['l', 'o'], ['r', 'e']]);
     /// assert_eq!(remainder, &['m']);
     /// ```
+    ///
+    /// If you expect the slice to be an exact multiple, you can combine
+    /// `let`-`else` with an empty slice pattern:
+    /// ```
+    /// #![feature(slice_as_chunks)]
+    /// let slice = ['R', 'u', 's', 't'];
+    /// let (chunks, []) = slice.as_chunks::<2>() else {
+    ///     panic!("slice didn't have even length")
+    /// };
+    /// assert_eq!(chunks, &[['R', 'u'], ['s', 't']]);
+    /// ```
     #[unstable(feature = "slice_as_chunks", issue = "74985")]
     #[inline]
     #[must_use]
     pub fn as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]) {
-        assert_ne!(N, 0);
+        assert_ne!(N, 0, "chunks cannot have a size of zero");
         let len = self.len() / N;
         let (multiple_of_n, remainder) = self.split_at(len * N);
         // SAFETY: We already panicked for zero, and ensured by construction
@@ -1037,7 +1070,7 @@ impl<T> [T] {
     #[inline]
     #[must_use]
     pub fn as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
-        assert_ne!(N, 0);
+        assert_ne!(N, 0, "chunks cannot have a size of zero");
         let len = self.len() / N;
         let (remainder, multiple_of_n) = self.split_at(self.len() - len * N);
         // SAFETY: We already panicked for zero, and ensured by construction
@@ -1076,7 +1109,7 @@ impl<T> [T] {
     #[unstable(feature = "array_chunks", issue = "74985")]
     #[inline]
     pub fn array_chunks<const N: usize>(&self) -> ArrayChunks<'_, T, N> {
-        assert_ne!(N, 0);
+        assert_ne!(N, 0, "chunks cannot have a size of zero");
         ArrayChunks::new(self)
     }
 
@@ -1155,7 +1188,7 @@ impl<T> [T] {
     #[inline]
     #[must_use]
     pub fn as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
-        assert_ne!(N, 0);
+        assert_ne!(N, 0, "chunks cannot have a size of zero");
         let len = self.len() / N;
         let (multiple_of_n, remainder) = self.split_at_mut(len * N);
         // SAFETY: We already panicked for zero, and ensured by construction
@@ -1192,7 +1225,7 @@ impl<T> [T] {
     #[inline]
     #[must_use]
     pub fn as_rchunks_mut<const N: usize>(&mut self) -> (&mut [T], &mut [[T; N]]) {
-        assert_ne!(N, 0);
+        assert_ne!(N, 0, "chunks cannot have a size of zero");
         let len = self.len() / N;
         let (remainder, multiple_of_n) = self.split_at_mut(self.len() - len * N);
         // SAFETY: We already panicked for zero, and ensured by construction
@@ -1233,11 +1266,11 @@ impl<T> [T] {
     #[unstable(feature = "array_chunks", issue = "74985")]
     #[inline]
     pub fn array_chunks_mut<const N: usize>(&mut self) -> ArrayChunksMut<'_, T, N> {
-        assert_ne!(N, 0);
+        assert_ne!(N, 0, "chunks cannot have a size of zero");
         ArrayChunksMut::new(self)
     }
 
-    /// Returns an iterator over overlapping windows of `N` elements of  a slice,
+    /// Returns an iterator over overlapping windows of `N` elements of a slice,
     /// starting at the beginning of the slice.
     ///
     /// This is the const generic equivalent of [`windows`].
@@ -1265,7 +1298,7 @@ impl<T> [T] {
     #[unstable(feature = "array_windows", issue = "75027")]
     #[inline]
     pub fn array_windows<const N: usize>(&self) -> ArrayWindows<'_, T, N> {
-        assert_ne!(N, 0);
+        assert_ne!(N, 0, "windows cannot have a size of zero");
         ArrayWindows::new(self)
     }
 
@@ -2465,7 +2498,7 @@ impl<T> [T] {
             let mid = left + size / 2;
 
             // SAFETY: the while condition means `size` is strictly positive, so
-            // `size/2 < size`.  Thus `left + size/2 < left + size`, which
+            // `size/2 < size`. Thus `left + size/2 < left + size`, which
             // coupled with the `left + size <= self.len()` invariant means
             // we have `left + size/2 < self.len()`, and this is in-bounds.
             let cmp = f(unsafe { self.get_unchecked(mid) });
@@ -3795,7 +3828,7 @@ impl<T> [T] {
     /// The slice is assumed to be partitioned according to the given predicate.
     /// This means that all elements for which the predicate returns true are at the start of the slice
     /// and all elements for which the predicate returns false are at the end.
-    /// For example, [7, 15, 3, 5, 4, 12, 6] is a partitioned under the predicate x % 2 != 0
+    /// For example, `[7, 15, 3, 5, 4, 12, 6]` is partitioned under the predicate `x % 2 != 0`
     /// (all odd numbers are at the start, all even at the end).
     ///
     /// If this slice is not partitioned, the returned result is unspecified and meaningless,
diff --git a/library/core/src/slice/sort.rs b/library/core/src/slice/sort.rs
index 87f77b7f2..2181f9a81 100644
--- a/library/core/src/slice/sort.rs
+++ b/library/core/src/slice/sort.rs
@@ -3,8 +3,11 @@
 //! This module contains a sorting algorithm based on Orson Peters' pattern-defeating quicksort,
 //! published at: <https://github.com/orlp/pdqsort>
 //!
-//! Unstable sorting is compatible with libcore because it doesn't allocate memory, unlike our
+//! Unstable sorting is compatible with core because it doesn't allocate memory, unlike our
 //! stable sorting implementation.
+//!
+//! In addition it also contains the core logic of the stable sort used by `slice::sort` based on
+//! TimSort.
 
 use crate::cmp;
 use crate::mem::{self, MaybeUninit, SizedTypeProperties};
@@ -18,9 +21,9 @@ struct CopyOnDrop<T> {
 
 impl<T> Drop for CopyOnDrop<T> {
     fn drop(&mut self) {
-        // SAFETY:  This is a helper class.
-        //          Please refer to its usage for correctness.
-        //          Namely, one must be sure that `src` and `dst` does not overlap as required by `ptr::copy_nonoverlapping`.
+        // SAFETY: This is a helper class.
+        //         Please refer to its usage for correctness.
+        //         Namely, one must be sure that `src` and `dst` does not overlap as required by `ptr::copy_nonoverlapping`.
         unsafe {
             ptr::copy_nonoverlapping(self.src, self.dest, 1);
         }
@@ -831,6 +834,15 @@ fn partition_at_index_loop<'a, T, F>(
 ) where
     F: FnMut(&T, &T) -> bool,
 {
+    // Limit the amount of iterations and fall back to heapsort, similarly to `slice::sort_unstable`.
+    // This lowers the worst case running time from O(n^2) to O(n log n).
+    // FIXME: Investigate whether it would be better to use something like Median of Medians
+    // or Fast Deterministic Selection to guarantee O(n) worst case.
+    let mut limit = usize::BITS - v.len().leading_zeros();
+
+    // True if the last partitioning was reasonably balanced.
+    let mut was_balanced = true;
+
     loop {
         // For slices of up to this length it's probably faster to simply sort them.
         const MAX_INSERTION: usize = 10;
@@ -839,6 +851,18 @@ fn partition_at_index_loop<'a, T, F>(
             return;
         }
 
+        if limit == 0 {
+            heapsort(v, is_less);
+            return;
+        }
+
+        // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
+        // some elements around. Hopefully we'll choose a better pivot this time.
+        if !was_balanced {
+            break_patterns(v);
+            limit -= 1;
+        }
+
         // Choose a pivot
         let (pivot, _) = choose_pivot(v, is_less);
 
@@ -863,6 +887,7 @@ fn partition_at_index_loop<'a, T, F>(
         }
 
         let (mid, _) = partition(v, pivot, is_less);
+        was_balanced = cmp::min(mid, v.len() - mid) >= v.len() / 8;
 
         // Split the slice into `left`, `pivot`, and `right`.
         let (left, right) = v.split_at_mut(mid);
@@ -883,6 +908,7 @@ fn partition_at_index_loop<'a, T, F>(
     }
 }
 
+/// Reorder the slice such that the element at `index` is at its final sorted position.
 pub fn partition_at_index<T, F>(
     v: &mut [T],
     index: usize,
@@ -927,3 +953,513 @@ where
     let pivot = &mut pivot[0];
     (left, pivot, right)
 }
+
+/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
+///
+/// This is the integral subroutine of insertion sort.
+fn insert_head<T, F>(v: &mut [T], is_less: &mut F)
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    if v.len() >= 2 && is_less(&v[1], &v[0]) {
+        // SAFETY: Copy tmp back even if panic, and ensure unique observation.
+        unsafe {
+            // There are three ways to implement insertion here:
+            //
+            // 1. Swap adjacent elements until the first one gets to its final destination.
+            //    However, this way we copy data around more than is necessary. If elements are big
+            //    structures (costly to copy), this method will be slow.
+            //
+            // 2. Iterate until the right place for the first element is found. Then shift the
+            //    elements succeeding it to make room for it and finally place it into the
+            //    remaining hole. This is a good method.
+            //
+            // 3. Copy the first element into a temporary variable. Iterate until the right place
+            //    for it is found. As we go along, copy every traversed element into the slot
+            //    preceding it. Finally, copy data from the temporary variable into the remaining
+            //    hole. This method is very good. Benchmarks demonstrated slightly better
+            //    performance than with the 2nd method.
+            //
+            // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
+            let tmp = mem::ManuallyDrop::new(ptr::read(&v[0]));
+
+            // Intermediate state of the insertion process is always tracked by `hole`, which
+            // serves two purposes:
+            // 1. Protects integrity of `v` from panics in `is_less`.
+            // 2. Fills the remaining hole in `v` in the end.
+            //
+            // Panic safety:
+            //
+            // If `is_less` panics at any point during the process, `hole` will get dropped and
+            // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
+            // initially held exactly once.
+            let mut hole = InsertionHole { src: &*tmp, dest: &mut v[1] };
+            ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
+
+            for i in 2..v.len() {
+                if !is_less(&v[i], &*tmp) {
+                    break;
+                }
+                ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
+                hole.dest = &mut v[i];
+            }
+            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
+        }
+    }
+
+    // When dropped, copies from `src` into `dest`.
+    struct InsertionHole<T> {
+        src: *const T,
+        dest: *mut T,
+    }
+
+    impl<T> Drop for InsertionHole<T> {
+        fn drop(&mut self) {
+            // SAFETY: The caller must ensure that src and dest are correctly set.
+            unsafe {
+                ptr::copy_nonoverlapping(self.src, self.dest, 1);
+            }
+        }
+    }
+}
+
+/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
+/// stores the result into `v[..]`.
+///
+/// # Safety
+///
+/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
+/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
+unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F)
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    let len = v.len();
+    let v = v.as_mut_ptr();
+
+    // SAFETY: mid and len must be in-bounds of v.
+    let (v_mid, v_end) = unsafe { (v.add(mid), v.add(len)) };
+
+    // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
+    // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
+    // copying the lesser (or greater) one into `v`.
+    //
+    // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
+    // consumed first, then we must copy whatever is left of the shorter run into the remaining
+    // hole in `v`.
+    //
+    // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
+    // 1. Protects integrity of `v` from panics in `is_less`.
+    // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
+    //
+    // Panic safety:
+    //
+    // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
+    // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
+    // object it initially held exactly once.
+    let mut hole;
+
+    if mid <= len - mid {
+        // The left run is shorter.
+
+        // SAFETY: buf must have enough capacity for `v[..mid]`.
+        unsafe {
+            ptr::copy_nonoverlapping(v, buf, mid);
+            hole = MergeHole { start: buf, end: buf.add(mid), dest: v };
+        }
+
+        // Initially, these pointers point to the beginnings of their arrays.
+        let left = &mut hole.start;
+        let mut right = v_mid;
+        let out = &mut hole.dest;
+
+        while *left < hole.end && right < v_end {
+            // Consume the lesser side.
+            // If equal, prefer the left run to maintain stability.
+
+            // SAFETY: left and right must be valid and part of v same for out.
+            unsafe {
+                let to_copy = if is_less(&*right, &**left) {
+                    get_and_increment(&mut right)
+                } else {
+                    get_and_increment(left)
+                };
+                ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
+            }
+        }
+    } else {
+        // The right run is shorter.
+
+        // SAFETY: buf must have enough capacity for `v[mid..]`.
+        unsafe {
+            ptr::copy_nonoverlapping(v_mid, buf, len - mid);
+            hole = MergeHole { start: buf, end: buf.add(len - mid), dest: v_mid };
+        }
+
+        // Initially, these pointers point past the ends of their arrays.
+        let left = &mut hole.dest;
+        let right = &mut hole.end;
+        let mut out = v_end;
+
+        while v < *left && buf < *right {
+            // Consume the greater side.
+            // If equal, prefer the right run to maintain stability.
+
+            // SAFETY: left and right must be valid and part of v same for out.
+            unsafe {
+                let to_copy = if is_less(&*right.sub(1), &*left.sub(1)) {
+                    decrement_and_get(left)
+                } else {
+                    decrement_and_get(right)
+                };
+                ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
+            }
+        }
+    }
+    // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
+    // it will now be copied into the hole in `v`.
+
+    unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
+        let old = *ptr;
+
+        // SAFETY: ptr.add(1) must still be a valid pointer and part of `v`.
+        *ptr = unsafe { ptr.add(1) };
+        old
+    }
+
+    unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
+        // SAFETY: ptr.sub(1) must still be a valid pointer and part of `v`.
+        *ptr = unsafe { ptr.sub(1) };
+        *ptr
+    }
+
+    // When dropped, copies the range `start..end` into `dest..`.
+    struct MergeHole<T> {
+        start: *mut T,
+        end: *mut T,
+        dest: *mut T,
+    }
+
+    impl<T> Drop for MergeHole<T> {
+        fn drop(&mut self) {
+            // SAFETY: `T` is not a zero-sized type, and these are pointers into a slice's elements.
+            unsafe {
+                let len = self.end.sub_ptr(self.start);
+                ptr::copy_nonoverlapping(self.start, self.dest, len);
+            }
+        }
+    }
+}
+
+/// This merge sort borrows some (but not all) ideas from TimSort, which used to be described in
+/// detail [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt). However Python
+/// has switched to a Powersort based implementation.
+///
+/// The algorithm identifies strictly descending and non-descending subsequences, which are called
+/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
+/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
+/// satisfied:
+///
+/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
+/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
+///
+/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
+pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
+    v: &mut [T],
+    is_less: &mut CmpF,
+    elem_alloc_fn: ElemAllocF,
+    elem_dealloc_fn: ElemDeallocF,
+    run_alloc_fn: RunAllocF,
+    run_dealloc_fn: RunDeallocF,
+) where
+    CmpF: FnMut(&T, &T) -> bool,
+    ElemAllocF: Fn(usize) -> *mut T,
+    ElemDeallocF: Fn(*mut T, usize),
+    RunAllocF: Fn(usize) -> *mut TimSortRun,
+    RunDeallocF: Fn(*mut TimSortRun, usize),
+{
+    // Slices of up to this length get sorted using insertion sort.
+    const MAX_INSERTION: usize = 20;
+    // Very short runs are extended using insertion sort to span at least this many elements.
+    const MIN_RUN: usize = 10;
+
+    // The caller should have already checked that.
+    debug_assert!(!T::IS_ZST);
+
+    let len = v.len();
+
+    // Short arrays get sorted in-place via insertion sort to avoid allocations.
+    if len <= MAX_INSERTION {
+        if len >= 2 {
+            for i in (0..len - 1).rev() {
+                insert_head(&mut v[i..], is_less);
+            }
+        }
+        return;
+    }
+
+    // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
+    // shallow copies of the contents of `v` without risking the dtors running on copies if
+    // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
+    // which will always have length at most `len / 2`.
+    let buf = BufGuard::new(len / 2, elem_alloc_fn, elem_dealloc_fn);
+    let buf_ptr = buf.buf_ptr;
+
+    let mut runs = RunVec::new(run_alloc_fn, run_dealloc_fn);
+
+    // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
+    // strange decision, but consider the fact that merges more often go in the opposite direction
+    // (forwards). According to benchmarks, merging forwards is slightly faster than merging
+    // backwards. To conclude, identifying runs by traversing backwards improves performance.
+    let mut end = len;
+    while end > 0 {
+        // Find the next natural run, and reverse it if it's strictly descending.
+        let mut start = end - 1;
+        if start > 0 {
+            start -= 1;
+
+            // SAFETY: The v.get_unchecked must be fed with correct inbound indicies.
+            unsafe {
+                if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
+                    while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
+                        start -= 1;
+                    }
+                    v[start..end].reverse();
+                } else {
+                    while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1))
+                    {
+                        start -= 1;
+                    }
+                }
+            }
+        }
+
+        // Insert some more elements into the run if it's too short. Insertion sort is faster than
+        // merge sort on short sequences, so this significantly improves performance.
+        while start > 0 && end - start < MIN_RUN {
+            start -= 1;
+            insert_head(&mut v[start..end], is_less);
+        }
+
+        // Push this run onto the stack.
+        runs.push(TimSortRun { start, len: end - start });
+        end = start;
+
+        // Merge some pairs of adjacent runs to satisfy the invariants.
+        while let Some(r) = collapse(runs.as_slice()) {
+            let left = runs[r + 1];
+            let right = runs[r];
+            // SAFETY: `buf_ptr` must hold enough capacity for the shorter of the two sides, and
+            // neither side may be on length 0.
+            unsafe {
+                merge(&mut v[left.start..right.start + right.len], left.len, buf_ptr, is_less);
+            }
+            runs[r] = TimSortRun { start: left.start, len: left.len + right.len };
+            runs.remove(r + 1);
+        }
+    }
+
+    // Finally, exactly one run must remain in the stack.
+    debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
+
+    // Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
+    // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
+    // algorithm should continue building a new run instead, `None` is returned.
+    //
+    // TimSort is infamous for its buggy implementations, as described here:
+    // http://envisage-project.eu/timsort-specification-and-verification/
+    //
+    // The gist of the story is: we must enforce the invariants on the top four runs on the stack.
+    // Enforcing them on just top three is not sufficient to ensure that the invariants will still
+    // hold for *all* runs in the stack.
+    //
+    // This function correctly checks invariants for the top four runs. Additionally, if the top
+    // run starts at index 0, it will always demand a merge operation until the stack is fully
+    // collapsed, in order to complete the sort.
+    #[inline]
+    fn collapse(runs: &[TimSortRun]) -> Option<usize> {
+        let n = runs.len();
+        if n >= 2
+            && (runs[n - 1].start == 0
+                || runs[n - 2].len <= runs[n - 1].len
+                || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
+                || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
+        {
+            if n >= 3 && runs[n - 3].len < runs[n - 1].len { Some(n - 3) } else { Some(n - 2) }
+        } else {
+            None
+        }
+    }
+
+    // Extremely basic versions of Vec.
+    // Their use is super limited and by having the code here, it allows reuse between the sort
+    // implementations.
+    struct BufGuard<T, ElemDeallocF>
+    where
+        ElemDeallocF: Fn(*mut T, usize),
+    {
+        buf_ptr: *mut T,
+        capacity: usize,
+        elem_dealloc_fn: ElemDeallocF,
+    }
+
+    impl<T, ElemDeallocF> BufGuard<T, ElemDeallocF>
+    where
+        ElemDeallocF: Fn(*mut T, usize),
+    {
+        fn new<ElemAllocF>(
+            len: usize,
+            elem_alloc_fn: ElemAllocF,
+            elem_dealloc_fn: ElemDeallocF,
+        ) -> Self
+        where
+            ElemAllocF: Fn(usize) -> *mut T,
+        {
+            Self { buf_ptr: elem_alloc_fn(len), capacity: len, elem_dealloc_fn }
+        }
+    }
+
+    impl<T, ElemDeallocF> Drop for BufGuard<T, ElemDeallocF>
+    where
+        ElemDeallocF: Fn(*mut T, usize),
+    {
+        fn drop(&mut self) {
+            (self.elem_dealloc_fn)(self.buf_ptr, self.capacity);
+        }
+    }
+
+    struct RunVec<RunAllocF, RunDeallocF>
+    where
+        RunAllocF: Fn(usize) -> *mut TimSortRun,
+        RunDeallocF: Fn(*mut TimSortRun, usize),
+    {
+        buf_ptr: *mut TimSortRun,
+        capacity: usize,
+        len: usize,
+        run_alloc_fn: RunAllocF,
+        run_dealloc_fn: RunDeallocF,
+    }
+
+    impl<RunAllocF, RunDeallocF> RunVec<RunAllocF, RunDeallocF>
+    where
+        RunAllocF: Fn(usize) -> *mut TimSortRun,
+        RunDeallocF: Fn(*mut TimSortRun, usize),
+    {
+        fn new(run_alloc_fn: RunAllocF, run_dealloc_fn: RunDeallocF) -> Self {
+            // Most slices can be sorted with at most 16 runs in-flight.
+            const START_RUN_CAPACITY: usize = 16;
+
+            Self {
+                buf_ptr: run_alloc_fn(START_RUN_CAPACITY),
+                capacity: START_RUN_CAPACITY,
+                len: 0,
+                run_alloc_fn,
+                run_dealloc_fn,
+            }
+        }
+
+        fn push(&mut self, val: TimSortRun) {
+            if self.len == self.capacity {
+                let old_capacity = self.capacity;
+                let old_buf_ptr = self.buf_ptr;
+
+                self.capacity = self.capacity * 2;
+                self.buf_ptr = (self.run_alloc_fn)(self.capacity);
+
+                // SAFETY: buf_ptr new and old were correctly allocated and old_buf_ptr has
+                // old_capacity valid elements.
+                unsafe {
+                    ptr::copy_nonoverlapping(old_buf_ptr, self.buf_ptr, old_capacity);
+                }
+
+                (self.run_dealloc_fn)(old_buf_ptr, old_capacity);
+            }
+
+            // SAFETY: The invariant was just checked.
+            unsafe {
+                self.buf_ptr.add(self.len).write(val);
+            }
+            self.len += 1;
+        }
+
+        fn remove(&mut self, index: usize) {
+            if index >= self.len {
+                panic!("Index out of bounds");
+            }
+
+            // SAFETY: buf_ptr needs to be valid and len invariant upheld.
+            unsafe {
+                // the place we are taking from.
+                let ptr = self.buf_ptr.add(index);
+
+                // Shift everything down to fill in that spot.
+                ptr::copy(ptr.add(1), ptr, self.len - index - 1);
+            }
+            self.len -= 1;
+        }
+
+        fn as_slice(&self) -> &[TimSortRun] {
+            // SAFETY: Safe as long as buf_ptr is valid and len invariant was upheld.
+            unsafe { &*ptr::slice_from_raw_parts(self.buf_ptr, self.len) }
+        }
+
+        fn len(&self) -> usize {
+            self.len
+        }
+    }
+
+    impl<RunAllocF, RunDeallocF> core::ops::Index<usize> for RunVec<RunAllocF, RunDeallocF>
+    where
+        RunAllocF: Fn(usize) -> *mut TimSortRun,
+        RunDeallocF: Fn(*mut TimSortRun, usize),
+    {
+        type Output = TimSortRun;
+
+        fn index(&self, index: usize) -> &Self::Output {
+            if index < self.len {
+                // SAFETY: buf_ptr and len invariant must be upheld.
+                unsafe {
+                    return &*(self.buf_ptr.add(index));
+                }
+            }
+
+            panic!("Index out of bounds");
+        }
+    }
+
+    impl<RunAllocF, RunDeallocF> core::ops::IndexMut<usize> for RunVec<RunAllocF, RunDeallocF>
+    where
+        RunAllocF: Fn(usize) -> *mut TimSortRun,
+        RunDeallocF: Fn(*mut TimSortRun, usize),
+    {
+        fn index_mut(&mut self, index: usize) -> &mut Self::Output {
+            if index < self.len {
+                // SAFETY: buf_ptr and len invariant must be upheld.
+                unsafe {
+                    return &mut *(self.buf_ptr.add(index));
+                }
+            }
+
+            panic!("Index out of bounds");
+        }
+    }
+
+    impl<RunAllocF, RunDeallocF> Drop for RunVec<RunAllocF, RunDeallocF>
+    where
+        RunAllocF: Fn(usize) -> *mut TimSortRun,
+        RunDeallocF: Fn(*mut TimSortRun, usize),
+    {
+        fn drop(&mut self) {
+            // As long as TimSortRun is Copy we don't need to drop them individually but just the
+            // whole allocation.
+            (self.run_dealloc_fn)(self.buf_ptr, self.capacity);
+        }
+    }
+}
+
+/// Internal type used by merge_sort.
+#[derive(Clone, Copy, Debug)]
+pub struct TimSortRun {
+    len: usize,
+    start: usize,
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:19:13 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:19:13 +0000
commit	218caa410aa38c29984be31a5229b9fa717560ee (patch)
tree	c54bd55eeb6e4c508940a30e94c0032fbd45d677 /library/core/src/slice
parent	Releasing progress-linux version 1.67.1+dfsg1-1~progress7.99u1. (diff)
download	rustc-218caa410aa38c29984be31a5229b9fa717560ee.tar.xz rustc-218caa410aa38c29984be31a5229b9fa717560ee.zip