summaryrefslogtreecommitdiffstats
path: root/library/core/src/slice
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
commit9835e2ae736235810b4ea1c162ca5e65c547e770 (patch)
tree3fcebf40ed70e581d776a8a4c65923e8ec20e026 /library/core/src/slice
parentReleasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff)
downloadrustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz
rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/core/src/slice')
-rw-r--r--library/core/src/slice/ascii.rs79
-rw-r--r--library/core/src/slice/index.rs27
-rw-r--r--library/core/src/slice/iter.rs42
-rw-r--r--library/core/src/slice/iter/macros.rs75
-rw-r--r--library/core/src/slice/memchr.rs14
-rw-r--r--library/core/src/slice/mod.rs372
-rw-r--r--library/core/src/slice/select.rs302
-rw-r--r--library/core/src/slice/sort.rs181
8 files changed, 772 insertions, 320 deletions
diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs
index 5e5399acc..f3311f76a 100644
--- a/library/core/src/slice/ascii.rs
+++ b/library/core/src/slice/ascii.rs
@@ -10,12 +10,43 @@ use crate::ops;
impl [u8] {
/// Checks if all bytes in this slice are within the ASCII range.
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
+ #[rustc_const_unstable(feature = "const_slice_is_ascii", issue = "111090")]
#[must_use]
#[inline]
- pub fn is_ascii(&self) -> bool {
+ pub const fn is_ascii(&self) -> bool {
is_ascii(self)
}
+ /// If this slice [`is_ascii`](Self::is_ascii), returns it as a slice of
+ /// [ASCII characters](`ascii::Char`), otherwise returns `None`.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[must_use]
+ #[inline]
+ pub const fn as_ascii(&self) -> Option<&[ascii::Char]> {
+ if self.is_ascii() {
+ // SAFETY: Just checked that it's ASCII
+ Some(unsafe { self.as_ascii_unchecked() })
+ } else {
+ None
+ }
+ }
+
+ /// Converts this slice of bytes into a slice of ASCII characters,
+ /// without checking whether they're valid.
+ ///
+ /// # Safety
+ ///
+ /// Every byte in the slice must be in `0..=127`, or else this is UB.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[must_use]
+ #[inline]
+ pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char] {
+ let byte_ptr: *const [u8] = self;
+ let ascii_ptr = byte_ptr as *const [ascii::Char];
+ // SAFETY: The caller promised all the bytes are ASCII
+ unsafe { &*ascii_ptr }
+ }
+
/// Checks that two slices are an ASCII case-insensitive match.
///
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
@@ -232,11 +263,29 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
/// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
/// from `../str/mod.rs`, which does something similar for utf8 validation.
#[inline]
-fn contains_nonascii(v: usize) -> bool {
+const fn contains_nonascii(v: usize) -> bool {
const NONASCII_MASK: usize = usize::repeat_u8(0x80);
(NONASCII_MASK & v) != 0
}
+/// ASCII test *without* the chunk-at-a-time optimizations.
+///
+/// This is carefully structured to produce nice small code -- it's smaller in
+/// `-O` than what the "obvious" ways produces under `-C opt-level=s`. If you
+/// touch it, be sure to run (and update if needed) the assembly test.
+#[unstable(feature = "str_internals", issue = "none")]
+#[doc(hidden)]
+#[inline]
+pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
+ while let [rest @ .., last] = bytes {
+ if !last.is_ascii() {
+ break;
+ }
+ bytes = rest;
+ }
+ bytes.is_empty()
+}
+
/// Optimized ASCII test that will use usize-at-a-time operations instead of
/// byte-at-a-time operations (when possible).
///
@@ -250,7 +299,7 @@ fn contains_nonascii(v: usize) -> bool {
/// If any of these loads produces something for which `contains_nonascii`
/// (above) returns true, then we know the answer is false.
#[inline]
-fn is_ascii(s: &[u8]) -> bool {
+const fn is_ascii(s: &[u8]) -> bool {
const USIZE_SIZE: usize = mem::size_of::<usize>();
let len = s.len();
@@ -262,7 +311,7 @@ fn is_ascii(s: &[u8]) -> bool {
// We also do this for architectures where `size_of::<usize>()` isn't
// sufficient alignment for `usize`, because it's a weird edge case.
if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem::align_of::<usize>() {
- return s.iter().all(|b| b.is_ascii());
+ return is_ascii_simple(s);
}
// We always read the first word unaligned, which means `align_offset` is
@@ -291,18 +340,26 @@ fn is_ascii(s: &[u8]) -> bool {
// Paranoia check about alignment, since we're about to do a bunch of
// unaligned loads. In practice this should be impossible barring a bug in
// `align_offset` though.
- debug_assert_eq!(word_ptr.addr() % mem::align_of::<usize>(), 0);
+ // While this method is allowed to spuriously fail in CTFE, if it doesn't
+ // have alignment information it should have given a `usize::MAX` for
+ // `align_offset` earlier, sending things through the scalar path instead of
+ // this one, so this check should pass if it's reachable.
+ debug_assert!(word_ptr.is_aligned_to(mem::align_of::<usize>()));
// Read subsequent words until the last aligned word, excluding the last
// aligned word by itself to be done in tail check later, to ensure that
// tail is always one `usize` at most to extra branch `byte_pos == len`.
while byte_pos < len - USIZE_SIZE {
- debug_assert!(
- // Sanity check that the read is in bounds
- (word_ptr.addr() + USIZE_SIZE) <= start.addr().wrapping_add(len) &&
- // And that our assumptions about `byte_pos` hold.
- (word_ptr.addr() - start.addr()) == byte_pos
- );
+ // Sanity check that the read is in bounds
+ debug_assert!(byte_pos + USIZE_SIZE <= len);
+ // And that our assumptions about `byte_pos` hold.
+ debug_assert!(matches!(
+ word_ptr.cast::<u8>().guaranteed_eq(start.wrapping_add(byte_pos)),
+ // These are from the same allocation, so will hopefully always be
+ // known to match even in CTFE, but if it refuses to compare them
+ // that's ok since it's just a debug check anyway.
+ None | Some(true),
+ ));
// SAFETY: We know `word_ptr` is properly aligned (because of
// `align_offset`), and we know that we have enough bytes between `word_ptr` and the end
diff --git a/library/core/src/slice/index.rs b/library/core/src/slice/index.rs
index 353935324..6ef9f9c95 100644
--- a/library/core/src/slice/index.rs
+++ b/library/core/src/slice/index.rs
@@ -7,10 +7,9 @@ use crate::ops;
use crate::ptr;
#[stable(feature = "rust1", since = "1.0.0")]
-#[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
-impl<T, I> const ops::Index<I> for [T]
+impl<T, I> ops::Index<I> for [T]
where
- I: ~const SliceIndex<[T]>,
+ I: SliceIndex<[T]>,
{
type Output = I::Output;
@@ -21,10 +20,9 @@ where
}
#[stable(feature = "rust1", since = "1.0.0")]
-#[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
-impl<T, I> const ops::IndexMut<I> for [T]
+impl<T, I> ops::IndexMut<I> for [T]
where
- I: ~const SliceIndex<[T]>,
+ I: SliceIndex<[T]>,
{
#[inline]
fn index_mut(&mut self, index: I) -> &mut I::Output {
@@ -162,7 +160,6 @@ mod private_slice_index {
message = "the type `{T}` cannot be indexed by `{Self}`",
label = "slice indices are of type `usize` or ranges of `usize`"
)]
-#[const_trait]
pub unsafe trait SliceIndex<T: ?Sized>: private_slice_index::Sealed {
/// The output type returned by methods.
#[stable(feature = "slice_get_slice", since = "1.28.0")]
@@ -211,7 +208,7 @@ pub unsafe trait SliceIndex<T: ?Sized>: private_slice_index::Sealed {
#[stable(feature = "slice_get_slice_impls", since = "1.15.0")]
#[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
-unsafe impl<T> const SliceIndex<[T]> for usize {
+unsafe impl<T> SliceIndex<[T]> for usize {
type Output = T;
#[inline]
@@ -271,7 +268,7 @@ unsafe impl<T> const SliceIndex<[T]> for usize {
/// Because `IndexRange` guarantees `start <= end`, fewer checks are needed here
/// than there are for a general `Range<usize>` (which might be `100..3`).
#[rustc_const_unstable(feature = "const_index_range_slice_index", issue = "none")]
-unsafe impl<T> const SliceIndex<[T]> for ops::IndexRange {
+unsafe impl<T> SliceIndex<[T]> for ops::IndexRange {
type Output = [T];
#[inline]
@@ -347,7 +344,7 @@ unsafe impl<T> const SliceIndex<[T]> for ops::IndexRange {
#[stable(feature = "slice_get_slice_impls", since = "1.15.0")]
#[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
-unsafe impl<T> const SliceIndex<[T]> for ops::Range<usize> {
+unsafe impl<T> SliceIndex<[T]> for ops::Range<usize> {
type Output = [T];
#[inline]
@@ -428,7 +425,7 @@ unsafe impl<T> const SliceIndex<[T]> for ops::Range<usize> {
#[stable(feature = "slice_get_slice_impls", since = "1.15.0")]
#[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
-unsafe impl<T> const SliceIndex<[T]> for ops::RangeTo<usize> {
+unsafe impl<T> SliceIndex<[T]> for ops::RangeTo<usize> {
type Output = [T];
#[inline]
@@ -466,7 +463,7 @@ unsafe impl<T> const SliceIndex<[T]> for ops::RangeTo<usize> {
#[stable(feature = "slice_get_slice_impls", since = "1.15.0")]
#[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
-unsafe impl<T> const SliceIndex<[T]> for ops::RangeFrom<usize> {
+unsafe impl<T> SliceIndex<[T]> for ops::RangeFrom<usize> {
type Output = [T];
#[inline]
@@ -512,7 +509,7 @@ unsafe impl<T> const SliceIndex<[T]> for ops::RangeFrom<usize> {
#[stable(feature = "slice_get_slice_impls", since = "1.15.0")]
#[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
-unsafe impl<T> const SliceIndex<[T]> for ops::RangeFull {
+unsafe impl<T> SliceIndex<[T]> for ops::RangeFull {
type Output = [T];
#[inline]
@@ -548,7 +545,7 @@ unsafe impl<T> const SliceIndex<[T]> for ops::RangeFull {
#[stable(feature = "inclusive_range", since = "1.26.0")]
#[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
-unsafe impl<T> const SliceIndex<[T]> for ops::RangeInclusive<usize> {
+unsafe impl<T> SliceIndex<[T]> for ops::RangeInclusive<usize> {
type Output = [T];
#[inline]
@@ -592,7 +589,7 @@ unsafe impl<T> const SliceIndex<[T]> for ops::RangeInclusive<usize> {
#[stable(feature = "inclusive_range", since = "1.26.0")]
#[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
-unsafe impl<T> const SliceIndex<[T]> for ops::RangeToInclusive<usize> {
+unsafe impl<T> SliceIndex<[T]> for ops::RangeToInclusive<usize> {
type Output = [T];
#[inline]
diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs
index 88b84bd13..5369fe0a9 100644
--- a/library/core/src/slice/iter.rs
+++ b/library/core/src/slice/iter.rs
@@ -13,7 +13,7 @@ use crate::iter::{
use crate::marker::{PhantomData, Send, Sized, Sync};
use crate::mem::{self, SizedTypeProperties};
use crate::num::NonZeroUsize;
-use crate::ptr::NonNull;
+use crate::ptr::{invalid, invalid_mut, NonNull};
use super::{from_raw_parts, from_raw_parts_mut};
@@ -60,10 +60,15 @@ impl<'a, T> IntoIterator for &'a mut [T] {
#[stable(feature = "rust1", since = "1.0.0")]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Iter<'a, T: 'a> {
+ /// The pointer to the next element to return, or the past-the-end location
+ /// if the iterator is empty.
+ ///
+ /// This address will be used for all ZST elements, never changed.
ptr: NonNull<T>,
- end: *const T, // If T is a ZST, this is actually ptr+len. This encoding is picked so that
- // ptr == end is a quick test for the Iterator being empty, that works
- // for both ZST and non-ZST.
+ /// For non-ZSTs, the non-null pointer to the past-the-end element.
+ ///
+ /// For ZSTs, this is `ptr::invalid(len)`.
+ end: *const T,
_marker: PhantomData<&'a T>,
}
@@ -85,10 +90,7 @@ impl<'a, T> Iter<'a, T> {
let ptr = slice.as_ptr();
// SAFETY: Similar to `IterMut::new`.
unsafe {
- assume(!ptr.is_null());
-
- let end =
- if T::IS_ZST { ptr.wrapping_byte_add(slice.len()) } else { ptr.add(slice.len()) };
+ let end = if T::IS_ZST { invalid(slice.len()) } else { ptr.add(slice.len()) };
Self { ptr: NonNull::new_unchecked(ptr as *mut T), end, _marker: PhantomData }
}
@@ -179,10 +181,15 @@ impl<T> AsRef<[T]> for Iter<'_, T> {
#[stable(feature = "rust1", since = "1.0.0")]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct IterMut<'a, T: 'a> {
+ /// The pointer to the next element to return, or the past-the-end location
+ /// if the iterator is empty.
+ ///
+ /// This address will be used for all ZST elements, never changed.
ptr: NonNull<T>,
- end: *mut T, // If T is a ZST, this is actually ptr+len. This encoding is picked so that
- // ptr == end is a quick test for the Iterator being empty, that works
- // for both ZST and non-ZST.
+ /// For non-ZSTs, the non-null pointer to the past-the-end element.
+ ///
+ /// For ZSTs, this is `ptr::invalid_mut(len)`.
+ end: *mut T,
_marker: PhantomData<&'a mut T>,
}
@@ -219,10 +226,7 @@ impl<'a, T> IterMut<'a, T> {
// See the `next_unchecked!` and `is_empty!` macros as well as the
// `post_inc_start` method for more information.
unsafe {
- assume(!ptr.is_null());
-
- let end =
- if T::IS_ZST { ptr.wrapping_byte_add(slice.len()) } else { ptr.add(slice.len()) };
+ let end = if T::IS_ZST { invalid_mut(slice.len()) } else { ptr.add(slice.len()) };
Self { ptr: NonNull::new_unchecked(ptr), end, _marker: PhantomData }
}
@@ -685,7 +689,7 @@ where
None
} else {
self.finished = true;
- Some(mem::replace(&mut self.v, &mut []))
+ Some(mem::take(&mut self.v))
}
}
}
@@ -749,7 +753,7 @@ where
match idx_opt {
None => self.finish(),
Some(idx) => {
- let tmp = mem::replace(&mut self.v, &mut []);
+ let tmp = mem::take(&mut self.v);
let (head, tail) = tmp.split_at_mut(idx);
self.v = head;
Some(&mut tail[1..])
@@ -830,7 +834,7 @@ where
if idx == self.v.len() {
self.finished = true;
}
- let tmp = mem::replace(&mut self.v, &mut []);
+ let tmp = mem::take(&mut self.v);
let (head, tail) = tmp.split_at_mut(idx);
self.v = tail;
Some(head)
@@ -876,7 +880,7 @@ where
if idx == 0 {
self.finished = true;
}
- let tmp = mem::replace(&mut self.v, &mut []);
+ let tmp = mem::take(&mut self.v);
let (head, tail) = tmp.split_at_mut(idx);
self.v = head;
Some(tail)
diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs
index 392752f2a..3462c0e02 100644
--- a/library/core/src/slice/iter/macros.rs
+++ b/library/core/src/slice/iter/macros.rs
@@ -1,11 +1,30 @@
//! Macros used by iterators of slice.
+// Shrinks the iterator when T is a ZST, setting the length to `new_len`.
+// `new_len` must not exceed `self.len()`.
+macro_rules! zst_set_len {
+ ($self: ident, $new_len: expr) => {{
+ #![allow(unused_unsafe)] // we're sometimes used within an unsafe block
+
+ // SAFETY: same as `invalid(_mut)`, but the macro doesn't know
+ // which versions of that function to call, so open-code it.
+ $self.end = unsafe { mem::transmute::<usize, _>($new_len) };
+ }};
+}
+
+// Shrinks the iterator when T is a ZST, reducing the length by `n`.
+// `n` must not exceed `self.len()`.
+macro_rules! zst_shrink {
+ ($self: ident, $n: ident) => {
+ let new_len = $self.end.addr() - $n;
+ zst_set_len!($self, new_len);
+ };
+}
+
// Inlining is_empty and len makes a huge performance difference
macro_rules! is_empty {
- // The way we encode the length of a ZST iterator, this works both for ZST
- // and non-ZST.
($self: ident) => {
- $self.ptr.as_ptr() as *const T == $self.end
+ if T::IS_ZST { $self.end.addr() == 0 } else { $self.ptr.as_ptr() as *const _ == $self.end }
};
}
@@ -13,16 +32,13 @@ macro_rules! len {
($self: ident) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block
- let start = $self.ptr;
if T::IS_ZST {
- // This _cannot_ use `ptr_sub` because we depend on wrapping
- // to represent the length of long ZST slice iterators.
- $self.end.addr().wrapping_sub(start.as_ptr().addr())
+ $self.end.addr()
} else {
// To get rid of some bounds checks (see `position`), we use ptr_sub instead of
// offset_from (Tested by `codegen/slice-position-bounds-check`.)
// SAFETY: by the type invariant pointers are aligned and `start <= end`
- unsafe { $self.end.sub_ptr(start.as_ptr()) }
+ unsafe { $self.end.sub_ptr($self.ptr.as_ptr()) }
}
}};
}
@@ -50,14 +66,6 @@ macro_rules! iterator {
($self: ident) => {& $( $mut_ )? *$self.pre_dec_end(1)}
}
- // Shrinks the iterator when T is a ZST, by moving the end of the iterator
- // backwards by `n`. `n` must not exceed `self.len()`.
- macro_rules! zst_shrink {
- ($self: ident, $n: ident) => {
- $self.end = $self.end.wrapping_byte_sub($n);
- }
- }
-
impl<'a, T> $name<'a, T> {
// Helper function for creating a slice from the iterator.
#[inline(always)]
@@ -73,16 +81,15 @@ macro_rules! iterator {
// Unsafe because the offset must not exceed `self.len()`.
#[inline(always)]
unsafe fn post_inc_start(&mut self, offset: usize) -> * $raw_mut T {
- if mem::size_of::<T>() == 0 {
+ let old = self.ptr;
+ if T::IS_ZST {
zst_shrink!(self, offset);
- self.ptr.as_ptr()
} else {
- let old = self.ptr.as_ptr();
// SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
// so this new pointer is inside `self` and thus guaranteed to be non-null.
- self.ptr = unsafe { NonNull::new_unchecked(self.ptr.as_ptr().add(offset)) };
- old
+ self.ptr = unsafe { self.ptr.add(offset) };
}
+ old.as_ptr()
}
// Helper function for moving the end of the iterator backwards by `offset` elements,
@@ -124,12 +131,10 @@ macro_rules! iterator {
fn next(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks
- // SAFETY: `assume` calls are safe since a slice's start pointer
- // must be non-null, and slices over non-ZSTs must also have a
- // non-null end pointer. The call to `next_unchecked!` is safe
- // since we check if the iterator is empty first.
+ // SAFETY: `assume` call is safe because slices over non-ZSTs must
+ // have a non-null end pointer. The call to `next_unchecked!` is
+ // safe since we check if the iterator is empty first.
unsafe {
- assume(!self.ptr.as_ptr().is_null());
if !<T>::IS_ZST {
assume(!self.end.is_null());
}
@@ -157,9 +162,7 @@ macro_rules! iterator {
if n >= len!(self) {
// This iterator is now empty.
if T::IS_ZST {
- // We have to do it this way as `ptr` may never be 0, but `end`
- // could be (due to wrapping).
- self.end = self.ptr.as_ptr();
+ zst_set_len!(self, 0);
} else {
// SAFETY: end can't be 0 if T isn't ZST because ptr isn't 0 and end >= ptr
unsafe {
@@ -339,12 +342,10 @@ macro_rules! iterator {
fn next_back(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks
- // SAFETY: `assume` calls are safe since a slice's start pointer must be non-null,
- // and slices over non-ZSTs must also have a non-null end pointer.
- // The call to `next_back_unchecked!` is safe since we check if the iterator is
- // empty first.
+ // SAFETY: `assume` call is safe because slices over non-ZSTs must
+ // have a non-null end pointer. The call to `next_back_unchecked!`
+ // is safe since we check if the iterator is empty first.
unsafe {
- assume(!self.ptr.as_ptr().is_null());
if !<T>::IS_ZST {
assume(!self.end.is_null());
}
@@ -360,7 +361,11 @@ macro_rules! iterator {
fn nth_back(&mut self, n: usize) -> Option<$elem> {
if n >= len!(self) {
// This iterator is now empty.
- self.end = self.ptr.as_ptr();
+ if T::IS_ZST {
+ zst_set_len!(self, 0);
+ } else {
+ self.end = self.ptr.as_ptr();
+ }
return None;
}
// SAFETY: We are in bounds. `pre_dec_end` does the right thing even for ZSTs.
diff --git a/library/core/src/slice/memchr.rs b/library/core/src/slice/memchr.rs
index 98c8349eb..3a8b59d72 100644
--- a/library/core/src/slice/memchr.rs
+++ b/library/core/src/slice/memchr.rs
@@ -1,7 +1,6 @@
// Original implementation taken from rust-memchr.
// Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
-use crate::cmp;
use crate::mem;
const LO_USIZE: usize = usize::repeat_u8(0x01);
@@ -83,8 +82,12 @@ const fn memchr_aligned(x: u8, text: &[u8]) -> Option<usize> {
let mut offset = ptr.align_offset(USIZE_BYTES);
if offset > 0 {
- offset = cmp::min(offset, len);
- if let Some(index) = memchr_naive(x, &text[..offset]) {
+ // FIXME(const-hack, fee1-dead): replace with min
+ offset = if offset < len { offset } else { len };
+ // FIXME(const-hack, fee1-dead): replace with range slicing
+ // SAFETY: offset is within bounds
+ let slice = unsafe { super::from_raw_parts(text.as_ptr(), offset) };
+ if let Some(index) = memchr_naive(x, slice) {
return Some(index);
}
}
@@ -110,7 +113,10 @@ const fn memchr_aligned(x: u8, text: &[u8]) -> Option<usize> {
// Find the byte after the point the body loop stopped.
// FIXME(const-hack): Use `?` instead.
- if let Some(i) = memchr_naive(x, &text[offset..]) { Some(offset + i) } else { None }
+ // FIXME(const-hack, fee1-dead): use range slicing
+ // SAFETY: offset is within bounds
+ let slice = unsafe { super::from_raw_parts(text.as_ptr().add(offset), text.len() - offset) };
+ if let Some(i) = memchr_naive(x, slice) { Some(offset + i) } else { None }
}
/// Returns the last index matching the byte `x` in `text`.
diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs
index f541808a6..ea0181e35 100644
--- a/library/core/src/slice/mod.rs
+++ b/library/core/src/slice/mod.rs
@@ -42,8 +42,13 @@ mod index;
mod iter;
mod raw;
mod rotate;
+mod select;
mod specialize;
+#[unstable(feature = "str_internals", issue = "none")]
+#[doc(hidden)]
+pub use ascii::is_ascii_simple;
+
#[stable(feature = "rust1", since = "1.0.0")]
pub use iter::{Chunks, ChunksMut, Windows};
#[stable(feature = "rust1", since = "1.0.0")]
@@ -315,6 +320,264 @@ impl<T> [T] {
if let [.., last] = self { Some(last) } else { None }
}
+ /// Returns the first `N` elements of the slice, or `None` if it has fewer than `N` elements.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(slice_first_last_chunk)]
+ ///
+ /// let u = [10, 40, 30];
+ /// assert_eq!(Some(&[10, 40]), u.first_chunk::<2>());
+ ///
+ /// let v: &[i32] = &[10];
+ /// assert_eq!(None, v.first_chunk::<2>());
+ ///
+ /// let w: &[i32] = &[];
+ /// assert_eq!(Some(&[]), w.first_chunk::<0>());
+ /// ```
+ #[unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[rustc_const_unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[inline]
+ pub const fn first_chunk<const N: usize>(&self) -> Option<&[T; N]> {
+ if self.len() < N {
+ None
+ } else {
+ // SAFETY: We explicitly check for the correct number of elements,
+ // and do not let the reference outlive the slice.
+ Some(unsafe { &*(self.as_ptr() as *const [T; N]) })
+ }
+ }
+
+ /// Returns a mutable reference to the first `N` elements of the slice,
+ /// or `None` if it has fewer than `N` elements.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(slice_first_last_chunk)]
+ ///
+ /// let x = &mut [0, 1, 2];
+ ///
+ /// if let Some(first) = x.first_chunk_mut::<2>() {
+ /// first[0] = 5;
+ /// first[1] = 4;
+ /// }
+ /// assert_eq!(x, &[5, 4, 2]);
+ /// ```
+ #[unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[rustc_const_unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[inline]
+ pub const fn first_chunk_mut<const N: usize>(&mut self) -> Option<&mut [T; N]> {
+ if self.len() < N {
+ None
+ } else {
+ // SAFETY: We explicitly check for the correct number of elements,
+ // do not let the reference outlive the slice,
+ // and require exclusive access to the entire slice to mutate the chunk.
+ Some(unsafe { &mut *(self.as_mut_ptr() as *mut [T; N]) })
+ }
+ }
+
+ /// Returns the first `N` elements of the slice and the remainder,
+ /// or `None` if it has fewer than `N` elements.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(slice_first_last_chunk)]
+ ///
+ /// let x = &[0, 1, 2];
+ ///
+ /// if let Some((first, elements)) = x.split_first_chunk::<2>() {
+ /// assert_eq!(first, &[0, 1]);
+ /// assert_eq!(elements, &[2]);
+ /// }
+ /// ```
+ #[unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[rustc_const_unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[inline]
+ pub const fn split_first_chunk<const N: usize>(&self) -> Option<(&[T; N], &[T])> {
+ if self.len() < N {
+ None
+ } else {
+ // SAFETY: We manually verified the bounds of the split.
+ let (first, tail) = unsafe { self.split_at_unchecked(N) };
+
+ // SAFETY: We explicitly check for the correct number of elements,
+ // and do not let the references outlive the slice.
+ Some((unsafe { &*(first.as_ptr() as *const [T; N]) }, tail))
+ }
+ }
+
+ /// Returns a mutable reference to the first `N` elements of the slice and the remainder,
+ /// or `None` if it has fewer than `N` elements.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(slice_first_last_chunk)]
+ ///
+ /// let x = &mut [0, 1, 2];
+ ///
+ /// if let Some((first, elements)) = x.split_first_chunk_mut::<2>() {
+ /// first[0] = 3;
+ /// first[1] = 4;
+ /// elements[0] = 5;
+ /// }
+ /// assert_eq!(x, &[3, 4, 5]);
+ /// ```
+ #[unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[rustc_const_unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[inline]
+ pub const fn split_first_chunk_mut<const N: usize>(
+ &mut self,
+ ) -> Option<(&mut [T; N], &mut [T])> {
+ if self.len() < N {
+ None
+ } else {
+ // SAFETY: We manually verified the bounds of the split.
+ let (first, tail) = unsafe { self.split_at_mut_unchecked(N) };
+
+ // SAFETY: We explicitly check for the correct number of elements,
+ // do not let the reference outlive the slice,
+ // and enforce exclusive mutability of the chunk by the split.
+ Some((unsafe { &mut *(first.as_mut_ptr() as *mut [T; N]) }, tail))
+ }
+ }
+
+ /// Returns the last `N` elements of the slice and the remainder,
+ /// or `None` if it has fewer than `N` elements.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(slice_first_last_chunk)]
+ ///
+ /// let x = &[0, 1, 2];
+ ///
+ /// if let Some((last, elements)) = x.split_last_chunk::<2>() {
+ /// assert_eq!(last, &[1, 2]);
+ /// assert_eq!(elements, &[0]);
+ /// }
+ /// ```
+ #[unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[rustc_const_unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[inline]
+ pub const fn split_last_chunk<const N: usize>(&self) -> Option<(&[T; N], &[T])> {
+ if self.len() < N {
+ None
+ } else {
+ // SAFETY: We manually verified the bounds of the split.
+ let (init, last) = unsafe { self.split_at_unchecked(self.len() - N) };
+
+ // SAFETY: We explicitly check for the correct number of elements,
+ // and do not let the references outlive the slice.
+ Some((unsafe { &*(last.as_ptr() as *const [T; N]) }, init))
+ }
+ }
+
+ /// Returns the last and all the rest of the elements of the slice, or `None` if it is empty.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(slice_first_last_chunk)]
+ ///
+ /// let x = &mut [0, 1, 2];
+ ///
+ /// if let Some((last, elements)) = x.split_last_chunk_mut::<2>() {
+ /// last[0] = 3;
+ /// last[1] = 4;
+ /// elements[0] = 5;
+ /// }
+ /// assert_eq!(x, &[5, 3, 4]);
+ /// ```
+ #[unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[rustc_const_unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[inline]
+ pub const fn split_last_chunk_mut<const N: usize>(
+ &mut self,
+ ) -> Option<(&mut [T; N], &mut [T])> {
+ if self.len() < N {
+ None
+ } else {
+ // SAFETY: We manually verified the bounds of the split.
+ let (init, last) = unsafe { self.split_at_mut_unchecked(self.len() - N) };
+
+ // SAFETY: We explicitly check for the correct number of elements,
+ // do not let the reference outlive the slice,
+ // and enforce exclusive mutability of the chunk by the split.
+ Some((unsafe { &mut *(last.as_mut_ptr() as *mut [T; N]) }, init))
+ }
+ }
+
+ /// Returns the last element of the slice, or `None` if it is empty.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(slice_first_last_chunk)]
+ ///
+ /// let u = [10, 40, 30];
+ /// assert_eq!(Some(&[40, 30]), u.last_chunk::<2>());
+ ///
+ /// let v: &[i32] = &[10];
+ /// assert_eq!(None, v.last_chunk::<2>());
+ ///
+ /// let w: &[i32] = &[];
+ /// assert_eq!(Some(&[]), w.last_chunk::<0>());
+ /// ```
+ #[unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[rustc_const_unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[inline]
+ pub const fn last_chunk<const N: usize>(&self) -> Option<&[T; N]> {
+ if self.len() < N {
+ None
+ } else {
+ // SAFETY: We manually verified the bounds of the slice.
+ // FIXME: Without const traits, we need this instead of `get_unchecked`.
+ let last = unsafe { self.split_at_unchecked(self.len() - N).1 };
+
+ // SAFETY: We explicitly check for the correct number of elements,
+ // and do not let the references outlive the slice.
+ Some(unsafe { &*(last.as_ptr() as *const [T; N]) })
+ }
+ }
+
+ /// Returns a mutable pointer to the last item in the slice.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(slice_first_last_chunk)]
+ ///
+ /// let x = &mut [0, 1, 2];
+ ///
+ /// if let Some(last) = x.last_chunk_mut::<2>() {
+ /// last[0] = 10;
+ /// last[1] = 20;
+ /// }
+ /// assert_eq!(x, &[0, 10, 20]);
+ /// ```
+ #[unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[rustc_const_unstable(feature = "slice_first_last_chunk", issue = "111774")]
+ #[inline]
+ pub const fn last_chunk_mut<const N: usize>(&mut self) -> Option<&mut [T; N]> {
+ if self.len() < N {
+ None
+ } else {
+ // SAFETY: We manually verified the bounds of the slice.
+ // FIXME: Without const traits, we need this instead of `get_unchecked`.
+ let last = unsafe { self.split_at_mut_unchecked(self.len() - N).1 };
+
+ // SAFETY: We explicitly check for the correct number of elements,
+ // do not let the reference outlive the slice,
+ // and require exclusive access to the entire slice to mutate the chunk.
+ Some(unsafe { &mut *(last.as_mut_ptr() as *mut [T; N]) })
+ }
+ }
+
/// Returns a reference to an element or subslice depending on the type of
/// index.
///
@@ -333,12 +596,11 @@ impl<T> [T] {
/// assert_eq!(None, v.get(0..4));
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
- #[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
#[inline]
#[must_use]
- pub const fn get<I>(&self, index: I) -> Option<&I::Output>
+ pub fn get<I>(&self, index: I) -> Option<&I::Output>
where
- I: ~const SliceIndex<Self>,
+ I: SliceIndex<Self>,
{
index.get(self)
}
@@ -359,12 +621,11 @@ impl<T> [T] {
/// assert_eq!(x, &[0, 42, 2]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
- #[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
#[inline]
#[must_use]
- pub const fn get_mut<I>(&mut self, index: I) -> Option<&mut I::Output>
+ pub fn get_mut<I>(&mut self, index: I) -> Option<&mut I::Output>
where
- I: ~const SliceIndex<Self>,
+ I: SliceIndex<Self>,
{
index.get_mut(self)
}
@@ -392,12 +653,11 @@ impl<T> [T] {
/// }
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
- #[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
#[inline]
#[must_use]
- pub const unsafe fn get_unchecked<I>(&self, index: I) -> &I::Output
+ pub unsafe fn get_unchecked<I>(&self, index: I) -> &I::Output
where
- I: ~const SliceIndex<Self>,
+ I: SliceIndex<Self>,
{
// SAFETY: the caller must uphold most of the safety requirements for `get_unchecked`;
// the slice is dereferenceable because `self` is a safe reference.
@@ -430,12 +690,11 @@ impl<T> [T] {
/// assert_eq!(x, &[1, 13, 4]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
- #[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
#[inline]
#[must_use]
- pub const unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut I::Output
+ pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut I::Output
where
- I: ~const SliceIndex<Self>,
+ I: SliceIndex<Self>,
{
// SAFETY: the caller must uphold the safety requirements for `get_unchecked_mut`;
// the slice is dereferenceable because `self` is a safe reference.
@@ -678,9 +937,8 @@ impl<T> [T] {
/// assert!(v == [3, 2, 1]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
- #[rustc_const_unstable(feature = "const_reverse", issue = "100784")]
#[inline]
- pub const fn reverse(&mut self) {
+ pub fn reverse(&mut self) {
let half_len = self.len() / 2;
let Range { start, end } = self.as_mut_ptr_range();
@@ -703,7 +961,7 @@ impl<T> [T] {
revswap(front_half, back_half, half_len);
#[inline]
- const fn revswap<T>(a: &mut [T], b: &mut [T], n: usize) {
+ fn revswap<T>(a: &mut [T], b: &mut [T], n: usize) {
debug_assert!(a.len() == n);
debug_assert!(b.len() == n);
@@ -996,7 +1254,7 @@ impl<T> [T] {
#[unstable(feature = "slice_as_chunks", issue = "74985")]
#[inline]
#[must_use]
- pub unsafe fn as_chunks_unchecked<const N: usize>(&self) -> &[[T; N]] {
+ pub const unsafe fn as_chunks_unchecked<const N: usize>(&self) -> &[[T; N]] {
let this = self;
// SAFETY: Caller must guarantee that `N` is nonzero and exactly divides the slice length
let new_len = unsafe {
@@ -1044,7 +1302,7 @@ impl<T> [T] {
#[inline]
#[track_caller]
#[must_use]
- pub fn as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]) {
+ pub const fn as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]) {
assert!(N != 0, "chunk size must be non-zero");
let len = self.len() / N;
let (multiple_of_n, remainder) = self.split_at(len * N);
@@ -1076,7 +1334,7 @@ impl<T> [T] {
#[inline]
#[track_caller]
#[must_use]
- pub fn as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
+ pub const fn as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
assert!(N != 0, "chunk size must be non-zero");
let len = self.len() / N;
let (remainder, multiple_of_n) = self.split_at(self.len() - len * N);
@@ -1153,7 +1411,7 @@ impl<T> [T] {
#[unstable(feature = "slice_as_chunks", issue = "74985")]
#[inline]
#[must_use]
- pub unsafe fn as_chunks_unchecked_mut<const N: usize>(&mut self) -> &mut [[T; N]] {
+ pub const unsafe fn as_chunks_unchecked_mut<const N: usize>(&mut self) -> &mut [[T; N]] {
let this = &*self;
// SAFETY: Caller must guarantee that `N` is nonzero and exactly divides the slice length
let new_len = unsafe {
@@ -1196,7 +1454,7 @@ impl<T> [T] {
#[inline]
#[track_caller]
#[must_use]
- pub fn as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
+ pub const fn as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
assert!(N != 0, "chunk size must be non-zero");
let len = self.len() / N;
let (multiple_of_n, remainder) = self.split_at_mut(len * N);
@@ -1234,7 +1492,7 @@ impl<T> [T] {
#[inline]
#[track_caller]
#[must_use]
- pub fn as_rchunks_mut<const N: usize>(&mut self) -> (&mut [T], &mut [[T; N]]) {
+ pub const fn as_rchunks_mut<const N: usize>(&mut self) -> (&mut [T], &mut [[T; N]]) {
assert!(N != 0, "chunk size must be non-zero");
let len = self.len() / N;
let (remainder, multiple_of_n) = self.split_at_mut(self.len() - len * N);
@@ -1596,7 +1854,8 @@ impl<T> [T] {
/// }
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
- #[rustc_const_unstable(feature = "const_slice_split_at_not_mut", issue = "101158")]
+ #[rustc_const_stable(feature = "const_slice_split_at_not_mut", since = "1.71.0")]
+ #[rustc_allow_const_fn_unstable(slice_split_at_unchecked)]
#[inline]
#[track_caller]
#[must_use]
@@ -2746,8 +3005,9 @@ impl<T> [T] {
///
/// # Current implementation
///
- /// The current algorithm is based on the quickselect portion of the same quicksort algorithm
- /// used for [`sort_unstable`].
+ /// The current algorithm is an introselect implementation based on Pattern Defeating Quicksort, which is also
+ /// the basis for [`sort_unstable`]. The fallback algorithm is Median of Medians using Tukey's Ninther for
+ /// pivot selection, which guarantees linear runtime for all inputs.
///
/// [`sort_unstable`]: slice::sort_unstable
///
@@ -2776,7 +3036,7 @@ impl<T> [T] {
where
T: Ord,
{
- sort::partition_at_index(self, index, T::lt)
+ select::partition_at_index(self, index, T::lt)
}
/// Reorder the slice with a comparator function such that the element at `index` is at its
@@ -2797,8 +3057,9 @@ impl<T> [T] {
///
/// # Current implementation
///
- /// The current algorithm is based on the quickselect portion of the same quicksort algorithm
- /// used for [`sort_unstable`].
+ /// The current algorithm is an introselect implementation based on Pattern Defeating Quicksort, which is also
+ /// the basis for [`sort_unstable`]. The fallback algorithm is Median of Medians using Tukey's Ninther for
+ /// pivot selection, which guarantees linear runtime for all inputs.
///
/// [`sort_unstable`]: slice::sort_unstable
///
@@ -2831,7 +3092,7 @@ impl<T> [T] {
where
F: FnMut(&T, &T) -> Ordering,
{
- sort::partition_at_index(self, index, |a: &T, b: &T| compare(a, b) == Less)
+ select::partition_at_index(self, index, |a: &T, b: &T| compare(a, b) == Less)
}
/// Reorder the slice with a key extraction function such that the element at `index` is at its
@@ -2887,7 +3148,7 @@ impl<T> [T] {
F: FnMut(&T) -> K,
K: Ord,
{
- sort::partition_at_index(self, index, |a: &T, b: &T| f(a).lt(&f(b)))
+ select::partition_at_index(self, index, |a: &T, b: &T| f(a).lt(&f(b)))
}
/// Moves all consecutive repeated elements to the end of the slice according to the
@@ -3479,44 +3740,13 @@ impl<T> [T] {
// Ts = size_of::<U> / gcd(size_of::<T>, size_of::<U>)
//
// Luckily since all this is constant-evaluated... performance here matters not!
- #[inline]
- fn gcd(a: usize, b: usize) -> usize {
- use crate::intrinsics;
- // iterative stein’s algorithm
- // We should still make this `const fn` (and revert to recursive algorithm if we do)
- // because relying on llvm to consteval all this is… well, it makes me uncomfortable.
-
- // SAFETY: `a` and `b` are checked to be non-zero values.
- let (ctz_a, mut ctz_b) = unsafe {
- if a == 0 {
- return b;
- }
- if b == 0 {
- return a;
- }
- (intrinsics::cttz_nonzero(a), intrinsics::cttz_nonzero(b))
- };
- let k = ctz_a.min(ctz_b);
- let mut a = a >> ctz_a;
- let mut b = b;
- loop {
- // remove all factors of 2 from b
- b >>= ctz_b;
- if a > b {
- mem::swap(&mut a, &mut b);
- }
- b = b - a;
- // SAFETY: `b` is checked to be non-zero.
- unsafe {
- if b == 0 {
- break;
- }
- ctz_b = intrinsics::cttz_nonzero(b);
- }
- }
- a << k
+ const fn gcd(a: usize, b: usize) -> usize {
+ if b == 0 { a } else { gcd(b, a % b) }
}
- let gcd: usize = gcd(mem::size_of::<T>(), mem::size_of::<U>());
+
+ // Explicitly wrap the function call in a const block so it gets
+ // constant-evaluated even in debug mode.
+ let gcd: usize = const { gcd(mem::size_of::<T>(), mem::size_of::<U>()) };
let ts: usize = mem::size_of::<U>() / gcd;
let us: usize = mem::size_of::<T>() / gcd;
@@ -4262,7 +4492,7 @@ impl<T, const N: usize> [[T; N]] {
/// assert!(empty_slice_of_arrays.flatten().is_empty());
/// ```
#[unstable(feature = "slice_flatten", issue = "95629")]
- pub fn flatten(&self) -> &[T] {
+ pub const fn flatten(&self) -> &[T] {
let len = if T::IS_ZST {
self.len().checked_mul(N).expect("slice len overflow")
} else {
@@ -4404,8 +4634,7 @@ where
}
#[stable(feature = "rust1", since = "1.0.0")]
-#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")]
-impl<T> const Default for &[T] {
+impl<T> Default for &[T] {
/// Creates an empty slice.
fn default() -> Self {
&[]
@@ -4413,8 +4642,7 @@ impl<T> const Default for &[T] {
}
#[stable(feature = "mut_slice_default", since = "1.5.0")]
-#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")]
-impl<T> const Default for &mut [T] {
+impl<T> Default for &mut [T] {
/// Creates a mutable empty slice.
fn default() -> Self {
&mut []
@@ -4458,7 +4686,7 @@ impl<T, const N: usize> SlicePattern for [T; N] {
/// This will do `binomial(N + 1, 2) = N * (N + 1) / 2 = 0, 1, 3, 6, 10, ..`
/// comparison operations.
fn get_many_check_valid<const N: usize>(indices: &[usize; N], len: usize) -> bool {
- // NB: The optimzer should inline the loops into a sequence
+ // NB: The optimizer should inline the loops into a sequence
// of instructions without additional branching.
let mut valid = true;
for (i, &idx) in indices.iter().enumerate() {
diff --git a/library/core/src/slice/select.rs b/library/core/src/slice/select.rs
new file mode 100644
index 000000000..ffc193578
--- /dev/null
+++ b/library/core/src/slice/select.rs
@@ -0,0 +1,302 @@
+//! Slice selection
+//!
+//! This module contains the implementation for `slice::select_nth_unstable`.
+//! It uses an introselect algorithm based on Orson Peters' pattern-defeating quicksort,
+//! published at: <https://github.com/orlp/pdqsort>
+//!
+//! The fallback algorithm used for introselect is Median of Medians using Tukey's Ninther
+//! for pivot selection. Using this as a fallback ensures O(n) worst case running time with
+//! better performance than one would get using heapsort as fallback.
+
+use crate::cmp;
+use crate::mem::{self, SizedTypeProperties};
+use crate::slice::sort::{
+ break_patterns, choose_pivot, insertion_sort_shift_left, partition, partition_equal,
+};
+
+// For slices of up to this length it's probably faster to simply sort them.
+// Defined at the module scope because it's used in multiple functions.
+const MAX_INSERTION: usize = 10;
+
+fn partition_at_index_loop<'a, T, F>(
+ mut v: &'a mut [T],
+ mut index: usize,
+ is_less: &mut F,
+ mut pred: Option<&'a T>,
+) where
+ F: FnMut(&T, &T) -> bool,
+{
+ // Limit the amount of iterations and fall back to fast deterministic selection
+ // to ensure O(n) worst case running time. This limit needs to be constant, because
+ // using `ilog2(len)` like in `sort` would result in O(n log n) time complexity.
+ // The exact value of the limit is chosen somewhat arbitrarily, but for most inputs bad pivot
+ // selections should be relatively rare, so the limit usually shouldn't be reached
+ // anyways.
+ let mut limit = 16;
+
+ // True if the last partitioning was reasonably balanced.
+ let mut was_balanced = true;
+
+ loop {
+ if v.len() <= MAX_INSERTION {
+ if v.len() > 1 {
+ insertion_sort_shift_left(v, 1, is_less);
+ }
+ return;
+ }
+
+ if limit == 0 {
+ median_of_medians(v, is_less, index);
+ return;
+ }
+
+ // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
+ // some elements around. Hopefully we'll choose a better pivot this time.
+ if !was_balanced {
+ break_patterns(v);
+ limit -= 1;
+ }
+
+ // Choose a pivot
+ let (pivot, _) = choose_pivot(v, is_less);
+
+ // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
+ // slice. Partition the slice into elements equal to and elements greater than the pivot.
+ // This case is usually hit when the slice contains many duplicate elements.
+ if let Some(p) = pred {
+ if !is_less(p, &v[pivot]) {
+ let mid = partition_equal(v, pivot, is_less);
+
+ // If we've passed our index, then we're good.
+ if mid > index {
+ return;
+ }
+
+ // Otherwise, continue sorting elements greater than the pivot.
+ v = &mut v[mid..];
+ index = index - mid;
+ pred = None;
+ continue;
+ }
+ }
+
+ let (mid, _) = partition(v, pivot, is_less);
+ was_balanced = cmp::min(mid, v.len() - mid) >= v.len() / 8;
+
+ // Split the slice into `left`, `pivot`, and `right`.
+ let (left, right) = v.split_at_mut(mid);
+ let (pivot, right) = right.split_at_mut(1);
+ let pivot = &pivot[0];
+
+ if mid < index {
+ v = right;
+ index = index - mid - 1;
+ pred = Some(pivot);
+ } else if mid > index {
+ v = left;
+ } else {
+ // If mid == index, then we're done, since partition() guaranteed that all elements
+ // after mid are greater than or equal to mid.
+ return;
+ }
+ }
+}
+
+/// Helper function that returns the index of the minimum element in the slice using the given
+/// comparator function
+fn min_index<T, F: FnMut(&T, &T) -> bool>(slice: &[T], is_less: &mut F) -> Option<usize> {
+ slice
+ .iter()
+ .enumerate()
+ .reduce(|acc, t| if is_less(t.1, acc.1) { t } else { acc })
+ .map(|(i, _)| i)
+}
+
+/// Helper function that returns the index of the maximum element in the slice using the given
+/// comparator function
+fn max_index<T, F: FnMut(&T, &T) -> bool>(slice: &[T], is_less: &mut F) -> Option<usize> {
+ slice
+ .iter()
+ .enumerate()
+ .reduce(|acc, t| if is_less(acc.1, t.1) { t } else { acc })
+ .map(|(i, _)| i)
+}
+
+/// Reorder the slice such that the element at `index` is at its final sorted position.
+pub fn partition_at_index<T, F>(
+ v: &mut [T],
+ index: usize,
+ mut is_less: F,
+) -> (&mut [T], &mut T, &mut [T])
+where
+ F: FnMut(&T, &T) -> bool,
+{
+ if index >= v.len() {
+ panic!("partition_at_index index {} greater than length of slice {}", index, v.len());
+ }
+
+ if T::IS_ZST {
+ // Sorting has no meaningful behavior on zero-sized types. Do nothing.
+ } else if index == v.len() - 1 {
+ // Find max element and place it in the last position of the array. We're free to use
+ // `unwrap()` here because we know v must not be empty.
+ let max_idx = max_index(v, &mut is_less).unwrap();
+ v.swap(max_idx, index);
+ } else if index == 0 {
+ // Find min element and place it in the first position of the array. We're free to use
+ // `unwrap()` here because we know v must not be empty.
+ let min_idx = min_index(v, &mut is_less).unwrap();
+ v.swap(min_idx, index);
+ } else {
+ partition_at_index_loop(v, index, &mut is_less, None);
+ }
+
+ let (left, right) = v.split_at_mut(index);
+ let (pivot, right) = right.split_at_mut(1);
+ let pivot = &mut pivot[0];
+ (left, pivot, right)
+}
+
+/// Selection algorithm to select the k-th element from the slice in guaranteed O(n) time.
+/// This is essentially a quickselect that uses Tukey's Ninther for pivot selection
+fn median_of_medians<T, F: FnMut(&T, &T) -> bool>(mut v: &mut [T], is_less: &mut F, mut k: usize) {
+ // Since this function isn't public, it should never be called with an out-of-bounds index.
+ debug_assert!(k < v.len());
+
+ // If T is as ZST, `partition_at_index` will already return early.
+ debug_assert!(!T::IS_ZST);
+
+ // We now know that `k < v.len() <= isize::MAX`
+ loop {
+ if v.len() <= MAX_INSERTION {
+ if v.len() > 1 {
+ insertion_sort_shift_left(v, 1, is_less);
+ }
+ return;
+ }
+
+ // `median_of_{minima,maxima}` can't handle the extreme cases of the first/last element,
+ // so we catch them here and just do a linear search.
+ if k == v.len() - 1 {
+ // Find max element and place it in the last position of the array. We're free to use
+ // `unwrap()` here because we know v must not be empty.
+ let max_idx = max_index(v, is_less).unwrap();
+ v.swap(max_idx, k);
+ return;
+ } else if k == 0 {
+ // Find min element and place it in the first position of the array. We're free to use
+ // `unwrap()` here because we know v must not be empty.
+ let min_idx = min_index(v, is_less).unwrap();
+ v.swap(min_idx, k);
+ return;
+ }
+
+ let p = median_of_ninthers(v, is_less);
+
+ if p == k {
+ return;
+ } else if p > k {
+ v = &mut v[..p];
+ } else {
+ // Since `p < k < v.len()`, `p + 1` doesn't overflow and is
+ // a valid index into the slice.
+ v = &mut v[p + 1..];
+ k -= p + 1;
+ }
+ }
+}
+
+// Optimized for when `k` lies somewhere in the middle of the slice. Selects a pivot
+// as close as possible to the median of the slice. For more details on how the algorithm
+// operates, refer to the paper <https://drops.dagstuhl.de/opus/volltexte/2017/7612/pdf/LIPIcs-SEA-2017-24.pdf>.
+fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) -> usize {
+ // use `saturating_mul` so the multiplication doesn't overflow on 16-bit platforms.
+ let frac = if v.len() <= 1024 {
+ v.len() / 12
+ } else if v.len() <= 128_usize.saturating_mul(1024) {
+ v.len() / 64
+ } else {
+ v.len() / 1024
+ };
+
+ let pivot = frac / 2;
+ let lo = v.len() / 2 - pivot;
+ let hi = frac + lo;
+ let gap = (v.len() - 9 * frac) / 4;
+ let mut a = lo - 4 * frac - gap;
+ let mut b = hi + gap;
+ for i in lo..hi {
+ ninther(v, is_less, a, i - frac, b, a + 1, i, b + 1, a + 2, i + frac, b + 2);
+ a += 3;
+ b += 3;
+ }
+
+ median_of_medians(&mut v[lo..lo + frac], is_less, pivot);
+ partition(v, lo + pivot, is_less).0
+}
+
+/// Moves around the 9 elements at the indices a..i, such that
+/// `v[d]` contains the median of the 9 elements and the other
+/// elements are partitioned around it.
+fn ninther<T, F: FnMut(&T, &T) -> bool>(
+ v: &mut [T],
+ is_less: &mut F,
+ a: usize,
+ mut b: usize,
+ c: usize,
+ mut d: usize,
+ e: usize,
+ mut f: usize,
+ g: usize,
+ mut h: usize,
+ i: usize,
+) {
+ b = median_idx(v, is_less, a, b, c);
+ h = median_idx(v, is_less, g, h, i);
+ if is_less(&v[h], &v[b]) {
+ mem::swap(&mut b, &mut h);
+ }
+ if is_less(&v[f], &v[d]) {
+ mem::swap(&mut d, &mut f);
+ }
+ if is_less(&v[e], &v[d]) {
+ // do nothing
+ } else if is_less(&v[f], &v[e]) {
+ d = f;
+ } else {
+ if is_less(&v[e], &v[b]) {
+ v.swap(e, b);
+ } else if is_less(&v[h], &v[e]) {
+ v.swap(e, h);
+ }
+ return;
+ }
+ if is_less(&v[d], &v[b]) {
+ d = b;
+ } else if is_less(&v[h], &v[d]) {
+ d = h;
+ }
+
+ v.swap(d, e);
+}
+
+/// returns the index pointing to the median of the 3
+/// elements `v[a]`, `v[b]` and `v[c]`
+fn median_idx<T, F: FnMut(&T, &T) -> bool>(
+ v: &[T],
+ is_less: &mut F,
+ mut a: usize,
+ b: usize,
+ mut c: usize,
+) -> usize {
+ if is_less(&v[c], &v[a]) {
+ mem::swap(&mut a, &mut c);
+ }
+ if is_less(&v[c], &v[b]) {
+ return c;
+ }
+ if is_less(&v[b], &v[a]) {
+ return a;
+ }
+ b
+}
diff --git a/library/core/src/slice/sort.rs b/library/core/src/slice/sort.rs
index 07fd96f92..db76d2625 100644
--- a/library/core/src/slice/sort.rs
+++ b/library/core/src/slice/sort.rs
@@ -145,7 +145,7 @@ where
/// Never inline this function to avoid code bloat. It still optimizes nicely and has practically no
/// performance impact. Even improving performance in some cases.
#[inline(never)]
-fn insertion_sort_shift_left<T, F>(v: &mut [T], offset: usize, is_less: &mut F)
+pub(super) fn insertion_sort_shift_left<T, F>(v: &mut [T], offset: usize, is_less: &mut F)
where
F: FnMut(&T, &T) -> bool,
{
@@ -557,7 +557,7 @@ where
///
/// 1. Number of elements smaller than `v[pivot]`.
/// 2. True if `v` was already partitioned.
-fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> (usize, bool)
+pub(super) fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> (usize, bool)
where
F: FnMut(&T, &T) -> bool,
{
@@ -612,7 +612,7 @@ where
///
/// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain
/// elements smaller than the pivot.
-fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
+pub(super) fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
where
F: FnMut(&T, &T) -> bool,
{
@@ -670,7 +670,7 @@ where
/// Scatters some elements around in an attempt to break patterns that might cause imbalanced
/// partitions in quicksort.
#[cold]
-fn break_patterns<T>(v: &mut [T]) {
+pub(super) fn break_patterns<T>(v: &mut [T]) {
let len = v.len();
if len >= 8 {
let mut seed = len;
@@ -719,7 +719,7 @@ fn break_patterns<T>(v: &mut [T]) {
/// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted.
///
/// Elements in `v` might be reordered in the process.
-fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
+pub(super) fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
where
F: FnMut(&T, &T) -> bool,
{
@@ -897,138 +897,6 @@ where
recurse(v, &mut is_less, None, limit);
}
-fn partition_at_index_loop<'a, T, F>(
- mut v: &'a mut [T],
- mut index: usize,
- is_less: &mut F,
- mut pred: Option<&'a T>,
-) where
- F: FnMut(&T, &T) -> bool,
-{
- // Limit the amount of iterations and fall back to heapsort, similarly to `slice::sort_unstable`.
- // This lowers the worst case running time from O(n^2) to O(n log n).
- // FIXME: Investigate whether it would be better to use something like Median of Medians
- // or Fast Deterministic Selection to guarantee O(n) worst case.
- let mut limit = usize::BITS - v.len().leading_zeros();
-
- // True if the last partitioning was reasonably balanced.
- let mut was_balanced = true;
-
- loop {
- let len = v.len();
-
- // For slices of up to this length it's probably faster to simply sort them.
- const MAX_INSERTION: usize = 10;
- if len <= MAX_INSERTION {
- if len >= 2 {
- insertion_sort_shift_left(v, 1, is_less);
- }
- return;
- }
-
- if limit == 0 {
- heapsort(v, is_less);
- return;
- }
-
- // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
- // some elements around. Hopefully we'll choose a better pivot this time.
- if !was_balanced {
- break_patterns(v);
- limit -= 1;
- }
-
- // Choose a pivot
- let (pivot, _) = choose_pivot(v, is_less);
-
- // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
- // slice. Partition the slice into elements equal to and elements greater than the pivot.
- // This case is usually hit when the slice contains many duplicate elements.
- if let Some(p) = pred {
- if !is_less(p, &v[pivot]) {
- let mid = partition_equal(v, pivot, is_less);
-
- // If we've passed our index, then we're good.
- if mid > index {
- return;
- }
-
- // Otherwise, continue sorting elements greater than the pivot.
- v = &mut v[mid..];
- index = index - mid;
- pred = None;
- continue;
- }
- }
-
- let (mid, _) = partition(v, pivot, is_less);
- was_balanced = cmp::min(mid, len - mid) >= len / 8;
-
- // Split the slice into `left`, `pivot`, and `right`.
- let (left, right) = v.split_at_mut(mid);
- let (pivot, right) = right.split_at_mut(1);
- let pivot = &pivot[0];
-
- if mid < index {
- v = right;
- index = index - mid - 1;
- pred = Some(pivot);
- } else if mid > index {
- v = left;
- } else {
- // If mid == index, then we're done, since partition() guaranteed that all elements
- // after mid are greater than or equal to mid.
- return;
- }
- }
-}
-
-/// Reorder the slice such that the element at `index` is at its final sorted position.
-pub fn partition_at_index<T, F>(
- v: &mut [T],
- index: usize,
- mut is_less: F,
-) -> (&mut [T], &mut T, &mut [T])
-where
- F: FnMut(&T, &T) -> bool,
-{
- use cmp::Ordering::Greater;
- use cmp::Ordering::Less;
-
- if index >= v.len() {
- panic!("partition_at_index index {} greater than length of slice {}", index, v.len());
- }
-
- if T::IS_ZST {
- // Sorting has no meaningful behavior on zero-sized types. Do nothing.
- } else if index == v.len() - 1 {
- // Find max element and place it in the last position of the array. We're free to use
- // `unwrap()` here because we know v must not be empty.
- let (max_index, _) = v
- .iter()
- .enumerate()
- .max_by(|&(_, x), &(_, y)| if is_less(x, y) { Less } else { Greater })
- .unwrap();
- v.swap(max_index, index);
- } else if index == 0 {
- // Find min element and place it in the first position of the array. We're free to use
- // `unwrap()` here because we know v must not be empty.
- let (min_index, _) = v
- .iter()
- .enumerate()
- .min_by(|&(_, x), &(_, y)| if is_less(x, y) { Less } else { Greater })
- .unwrap();
- v.swap(min_index, index);
- } else {
- partition_at_index_loop(v, index, &mut is_less, None);
- }
-
- let (left, right) = v.split_at_mut(index);
- let (pivot, right) = right.split_at_mut(1);
- let pivot = &mut pivot[0];
- (left, pivot, right)
-}
-
/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
/// stores the result into `v[..]`.
///
@@ -1085,12 +953,12 @@ where
// SAFETY: left and right must be valid and part of v same for out.
unsafe {
- let to_copy = if is_less(&*right, &**left) {
- get_and_increment(&mut right)
- } else {
- get_and_increment(left)
- };
- ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
+ let is_l = is_less(&*right, &**left);
+ let to_copy = if is_l { right } else { *left };
+ ptr::copy_nonoverlapping(to_copy, *out, 1);
+ *out = out.add(1);
+ right = right.add(is_l as usize);
+ *left = left.add(!is_l as usize);
}
}
} else {
@@ -1113,32 +981,18 @@ where
// SAFETY: left and right must be valid and part of v same for out.
unsafe {
- let to_copy = if is_less(&*right.sub(1), &*left.sub(1)) {
- decrement_and_get(left)
- } else {
- decrement_and_get(right)
- };
- ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
+ let is_l = is_less(&*right.sub(1), &*left.sub(1));
+ *left = left.sub(is_l as usize);
+ *right = right.sub(!is_l as usize);
+ let to_copy = if is_l { *left } else { *right };
+ out = out.sub(1);
+ ptr::copy_nonoverlapping(to_copy, out, 1);
}
}
}
// Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
// it will now be copied into the hole in `v`.
- unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
- let old = *ptr;
-
- // SAFETY: ptr.add(1) must still be a valid pointer and part of `v`.
- *ptr = unsafe { ptr.add(1) };
- old
- }
-
- unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
- // SAFETY: ptr.sub(1) must still be a valid pointer and part of `v`.
- *ptr = unsafe { ptr.sub(1) };
- *ptr
- }
-
// When dropped, copies the range `start..end` into `dest..`.
struct MergeHole<T> {
start: *mut T,
@@ -1456,7 +1310,6 @@ pub struct TimSortRun {
/// Takes a range as denoted by start and end, that is already sorted and extends it to the right if
/// necessary with sorts optimized for smaller ranges such as insertion sort.
-#[cfg(not(no_global_oom_handling))]
fn provide_sorted_batch<T, F>(v: &mut [T], start: usize, mut end: usize, is_less: &mut F) -> usize
where
F: FnMut(&T, &T) -> bool,