From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/unic-char-range/.cargo-checksum.json | 1 + vendor/unic-char-range/Cargo.toml | 51 +++++ vendor/unic-char-range/benches/benchmarks.rs | 41 ++++ .../examples/macro_use_std_tests.rs | 18 ++ vendor/unic-char-range/src/iter.rs | 151 ++++++++++++++ vendor/unic-char-range/src/iter_fused.rs | 15 ++ vendor/unic-char-range/src/iter_trusted_len.rs | 16 ++ vendor/unic-char-range/src/lib.rs | 75 +++++++ vendor/unic-char-range/src/macros.rs | 44 ++++ vendor/unic-char-range/src/par_iter.rs | 110 ++++++++++ vendor/unic-char-range/src/pkg_info.rs | 20 ++ vendor/unic-char-range/src/range.rs | 231 +++++++++++++++++++++ vendor/unic-char-range/src/step.rs | 45 ++++ vendor/unic-char-range/tests/iter_tests.rs | 78 +++++++ 14 files changed, 896 insertions(+) create mode 100644 vendor/unic-char-range/.cargo-checksum.json create mode 100644 vendor/unic-char-range/Cargo.toml create mode 100644 vendor/unic-char-range/benches/benchmarks.rs create mode 100644 vendor/unic-char-range/examples/macro_use_std_tests.rs create mode 100644 vendor/unic-char-range/src/iter.rs create mode 100644 vendor/unic-char-range/src/iter_fused.rs create mode 100644 vendor/unic-char-range/src/iter_trusted_len.rs create mode 100644 vendor/unic-char-range/src/lib.rs create mode 100644 vendor/unic-char-range/src/macros.rs create mode 100644 vendor/unic-char-range/src/par_iter.rs create mode 100644 vendor/unic-char-range/src/pkg_info.rs create mode 100644 vendor/unic-char-range/src/range.rs create mode 100644 vendor/unic-char-range/src/step.rs create mode 100644 vendor/unic-char-range/tests/iter_tests.rs (limited to 'vendor/unic-char-range') diff --git a/vendor/unic-char-range/.cargo-checksum.json b/vendor/unic-char-range/.cargo-checksum.json new file mode 100644 index 000000000..3f53bcd99 --- /dev/null +++ b/vendor/unic-char-range/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"36d694fec9b997f5cd86861a5d062d0e69794e051b07d8ebf1b79ab9482c3462","benches/benchmarks.rs":"43c24a525f93c2417e09e4a3914523faf5802e8d4e33697070ddb429ef16fbe0","examples/macro_use_std_tests.rs":"f96109f197e7750ed5ab10041542a5b59a0f3a791e52cc88ebfe5e098b05711e","src/iter.rs":"aeb949486a521f8c273d7875536d030643f74603112ef6688bafdd639bd8f487","src/iter_fused.rs":"2ab4a76b8e9610163deda5c602881c2b8e9696f607580a04bb4e17754e4f8f9b","src/iter_trusted_len.rs":"b89e6d7dbbc2f46a1b66015b1b5d2bc12d91309880847d4f980f7b39ff4a8945","src/lib.rs":"955ee0b9069740fd1ee724943562f9189b7eb29b49d79c8c352a6f193c12c3ec","src/macros.rs":"7f0a7eedfdd31a0d5f10b16f342524663b4f2c82a0c97728f8677d0d29490241","src/par_iter.rs":"d6a213e83fc3bd0b0d248c0a7e42fe1b8e24eeca1424195c4638c7328795d018","src/pkg_info.rs":"37cbe4efd0b0205d251e1ef7714b23e3be27052c8a0de6b1ae3cbf3aefae1fe5","src/range.rs":"5ce555d9cf8513c76f3b45002a3eed4a07cfe8a51e769f9888270091d319cb05","src/step.rs":"66cbc3e291f1014f994c88b61a741e4cd84e9ef62debef5e160b4ae4f5211bd7","tests/iter_tests.rs":"5af9a84509e992e83d1ee90e86ec1e88352edf7926013b4c7cbdfe1f90c61b7a"},"package":"0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"} \ No newline at end of file diff --git a/vendor/unic-char-range/Cargo.toml b/vendor/unic-char-range/Cargo.toml new file mode 100644 index 000000000..42731411e --- /dev/null +++ b/vendor/unic-char-range/Cargo.toml @@ -0,0 +1,51 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g. crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "unic-char-range" +version = "0.9.0" +authors = ["The UNIC Project Developers"] +exclude = [] +description = "UNIC — Unicode Character Tools — Character Range and Iteration" +keywords = ["text", "unicode", "utilities", "iteration"] +categories = ["internationalization", "text-processing"] +license = "MIT/Apache-2.0" +repository = "https://github.com/open-i18n/rust-unic/" +[dependencies.rayon] +version = "1.0" +optional = true + +[features] +default = [] +exact-size-is-empty = [] +fused = [] +std = [] +trusted-len = [] +unstable = ["exact-size-is-empty", "fused", "trusted-len"] +[badges.appveyor] +branch = "master" +repository = "open-i18n/rust-unic" +service = "github" + +[badges.is-it-maintained-issue-resolution] +repository = "open-i18n/rust-unic" + +[badges.is-it-maintained-open-issues] +repository = "open-i18n/rust-unic" + +[badges.maintenance] +status = "actively-developed" + +[badges.travis-ci] +branch = "master" +repository = "open-i18n/rust-unic" diff --git a/vendor/unic-char-range/benches/benchmarks.rs b/vendor/unic-char-range/benches/benchmarks.rs new file mode 100644 index 000000000..a05ac271e --- /dev/null +++ b/vendor/unic-char-range/benches/benchmarks.rs @@ -0,0 +1,41 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(test)] + +extern crate test; + +use std::char; +use unic_char_range::CharRange; + +#[bench] +fn forward_iteration(b: &mut test::Bencher) { + b.iter(|| CharRange::all().iter().count()) +} + +#[bench] +fn forward_iteration_baseline(b: &mut test::Bencher) { + b.iter(|| (0..0x11_0000).filter_map(char::from_u32).count()) +} + +#[bench] +fn reverse_iteration(b: &mut test::Bencher) { + b.iter(|| CharRange::all().iter().rev().count()) +} + +#[bench] +fn reverse_iteration_baseline(b: &mut test::Bencher) { + b.iter(|| (0..0x11_0000).rev().filter_map(char::from_u32).count()) +} + +#[bench] +fn range_length(b: &mut test::Bencher) { + b.iter(|| CharRange::all().len()) +} diff --git a/vendor/unic-char-range/examples/macro_use_std_tests.rs b/vendor/unic-char-range/examples/macro_use_std_tests.rs new file mode 100644 index 000000000..751f93ae3 --- /dev/null +++ b/vendor/unic-char-range/examples/macro_use_std_tests.rs @@ -0,0 +1,18 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[macro_use] +extern crate unic_char_range; + +fn main() { + assert!(chars!('\u{0}'..='\u{2}') + .iter() + .eq(['\u{0}', '\u{1}', '\u{2}'].iter().cloned())); +} diff --git a/vendor/unic-char-range/src/iter.rs b/vendor/unic-char-range/src/iter.rs new file mode 100644 index 000000000..44901dfe3 --- /dev/null +++ b/vendor/unic-char-range/src/iter.rs @@ -0,0 +1,151 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use core::{char, ops}; + +use crate::{step, CharRange}; + +const SURROGATE_RANGE: ops::Range = 0xD800..0xE000; + +/// An iterator over a range of unicode code points. +/// +/// Constructed via `CharRange::iter`. See `CharRange` for more information. +#[derive(Clone, Debug)] +pub struct CharIter { + /// The lowest uniterated character (inclusive). + /// + /// Iteration is finished if this is higher than `high`. + low: char, + + /// The highest uniterated character (inclusive). + /// + /// Iteration is finished if this is lower than `low`. + high: char, +} + +impl From for CharIter { + fn from(range: CharRange) -> CharIter { + CharIter { + low: range.low, + high: range.high, + } + } +} + +impl From for CharRange { + fn from(iter: CharIter) -> CharRange { + CharRange { + low: iter.low, + high: iter.high, + } + } +} + +impl CharIter { + #[inline] + #[allow(unsafe_code)] + // When stepping `self.low` forward would go over `char::MAX`, + // Set `self.high` to `'\0'` instead. It will have the same effect -- + // consuming the last element from the iterator and ending iteration. + fn step_forward(&mut self) { + if self.low == char::MAX { + self.high = '\0' + } else { + self.low = unsafe { step::forward(self.low) } + } + } + + #[inline] + #[allow(unsafe_code)] + // When stepping `self.high` backward would cause underflow, + // set `self.low` to `char::MAX` instead. It will have the same effect -- + // consuming the last element from the iterator and ending iteration. + fn step_backward(&mut self) { + if self.high == '\0' { + self.low = char::MAX; + } else { + self.high = unsafe { step::backward(self.high) } + } + } + + #[inline] + /// ExactSizeIterator::is_empty() for stable + fn is_finished(&self) -> bool { + self.low > self.high + } +} + +impl Iterator for CharIter { + type Item = char; + + #[inline] + fn next(&mut self) -> Option { + if self.is_finished() { + return None; + } + + let ch = self.low; + self.step_forward(); + Some(ch) + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.len(); + (len, Some(len)) + } + + fn last(self) -> Option { + if self.is_finished() { + None + } else { + Some(self.high) + } + } + + fn max(self) -> Option { + self.last() + } + + fn min(mut self) -> Option { + self.next() + } +} + +impl DoubleEndedIterator for CharIter { + #[inline] + fn next_back(&mut self) -> Option { + if self.is_finished() { + None + } else { + let ch = self.high; + self.step_backward(); + Some(ch) + } + } +} + +impl ExactSizeIterator for CharIter { + fn len(&self) -> usize { + if self.is_finished() { + return 0; + } + let naive_range = (self.low as u32)..(self.high as u32 + 1); + if naive_range.start <= SURROGATE_RANGE.start && SURROGATE_RANGE.end <= naive_range.end { + naive_range.len() - SURROGATE_RANGE.len() + } else { + naive_range.len() + } + } + + #[cfg(feature = "exact-size-is-empty")] + fn is_empty(&self) -> bool { + self.is_finished() + } +} diff --git a/vendor/unic-char-range/src/iter_fused.rs b/vendor/unic-char-range/src/iter_fused.rs new file mode 100644 index 000000000..f0a3faeb3 --- /dev/null +++ b/vendor/unic-char-range/src/iter_fused.rs @@ -0,0 +1,15 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use core::iter; + +use super::iter::CharIter; + +impl iter::FusedIterator for CharIter {} diff --git a/vendor/unic-char-range/src/iter_trusted_len.rs b/vendor/unic-char-range/src/iter_trusted_len.rs new file mode 100644 index 000000000..aaf8e5340 --- /dev/null +++ b/vendor/unic-char-range/src/iter_trusted_len.rs @@ -0,0 +1,16 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use core::iter; + +use super::iter::CharIter; + +#[allow(unsafe_code)] +unsafe impl iter::TrustedLen for CharIter {} diff --git a/vendor/unic-char-range/src/lib.rs b/vendor/unic-char-range/src/lib.rs new file mode 100644 index 000000000..1f6e82c47 --- /dev/null +++ b/vendor/unic-char-range/src/lib.rs @@ -0,0 +1,75 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(feature = "exact-size-is-empty", feature(exact_size_is_empty))] +#![cfg_attr(feature = "trusted-len", feature(trusted_len))] +#![warn( + bad_style, + missing_debug_implementations, + missing_docs, + unconditional_recursion +)] +#![deny(unsafe_code)] + +//! # UNIC — Unicode Character Tools — Character Range +//! +//! A simple way to control iteration over a range of characters. +//! +//! # Examples +//! +//! ``` +//! #[macro_use] extern crate unic_char_range; +//! +//! # fn main() { +//! for character in chars!('a'..='z') { +//! // character is each character in the lowercase english alphabet in order +//! } +//! +//! for character in chars!(..) { +//! // character is every valid char from lowest codepoint to highest +//! } +//! # } +//! ``` +//! +//! # Features +//! +//! None of these features are included by default; they rely on unstable Rust feature gates. +//! +//! - `unstable`: enables all features +//! - `exact-size-is-empty`: provide a specific impl of [`ExactSizeIterator::is_empty`][is_empty] +//! - `trusted-len`: impl the [`TrustedLen`] contract +//! +//! [is_empty]: https://doc.rust-lang.org/std/iter/trait.ExactSizeIterator.html#method.is_empty +//! [`FusedIterator`]: https://doc.rust-lang.org/std/iter/trait.FusedIterator.html +//! [`TrustedLen`]: https://doc.rust-lang.org/std/iter/trait.TrustedLen.html +//! + +mod pkg_info; +pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION}; + +mod iter; +pub use crate::iter::CharIter; + +mod range; +pub use crate::range::CharRange; + +#[macro_use] +mod macros; + +mod step; + +mod iter_fused; + +#[cfg(feature = "trusted-len")] +mod iter_trusted_len; + +#[cfg(feature = "rayon")] +mod par_iter; diff --git a/vendor/unic-char-range/src/macros.rs b/vendor/unic-char-range/src/macros.rs new file mode 100644 index 000000000..a1b41d295 --- /dev/null +++ b/vendor/unic-char-range/src/macros.rs @@ -0,0 +1,44 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[macro_export] +/// Convenience macro for the initialization of `CharRange`s. +/// +/// # Syntax +/// +/// ``` +/// # #[macro_use] extern crate unic_char_range; +/// # fn main() { +/// chars!('a'..'z'); // The half open range including 'a' and excluding 'z' +/// chars!('a'..='z'); // The closed range including 'a' and including 'z' +/// chars!(..); // All characters +/// # } +/// ``` +/// +/// `chars!('a'..='z')` and `chars!(..)` are constant-time expressions, and can be used +/// where such are required, such as in the initialization of constant data structures. +/// +/// NOTE: Because an `expr` capture cannot be followed by a `..`/`..=`, this macro captures token +/// trees. This means that if you want to pass more than one token, you must parenthesize it (e.g. +/// `chars!('\0' ..= (char::MAX))`). +macro_rules! chars { + ( $low:tt .. $high:tt ) => { + $crate::CharRange::open_right($low, $high) + }; + ( $low:tt ..= $high:tt ) => { + $crate::CharRange { + low: $low, + high: $high, + } + }; + ( .. ) => { + $crate::CharRange::all() + }; +} diff --git a/vendor/unic-char-range/src/par_iter.rs b/vendor/unic-char-range/src/par_iter.rs new file mode 100644 index 000000000..5b40d221a --- /dev/null +++ b/vendor/unic-char-range/src/par_iter.rs @@ -0,0 +1,110 @@ +// Copyright 2018 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use rayon; + +use self::rayon::iter::plumbing::{Consumer, ProducerCallback, UnindexedConsumer}; +use self::rayon::prelude::*; +use crate::step::{AFTER_SURROGATE, BEFORE_SURROGATE}; +use crate::CharRange; +use core::char; +use core::ops::Range; + +const SKIP_LENGTH: u32 = + crate::step::AFTER_SURROGATE as u32 - crate::step::BEFORE_SURROGATE as u32 - 1; + +#[derive(Clone, Debug)] +pub struct Iter(rayon::iter::Map, fn(u32) -> char>); + +impl ParallelIterator for Iter { + type Item = char; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.0.drive_unindexed(consumer) + } +} + +impl IndexedParallelIterator for Iter { + fn len(&self) -> usize { + self.0.len() + } + + fn drive>(self, consumer: C) -> C::Result { + self.0.drive(consumer) + } + + fn with_producer>(self, callback: CB) -> CB::Output { + self.0.with_producer(callback) + } +} + +impl CharRange { + fn compact_range(&self) -> Range { + let low = self.low as u32; + let high = self.high as u32 + 1; + low..(if self.high >= AFTER_SURROGATE { + high - SKIP_LENGTH + } else { + high + }) + } +} + +impl IntoParallelIterator for CharRange { + type Item = char; + type Iter = Iter; + + fn into_par_iter(self) -> Self::Iter { + Iter(self.compact_range().into_par_iter().map(|c| { + let c = if c > BEFORE_SURROGATE as u32 { + c + SKIP_LENGTH + } else { + c + }; + debug_assert!(c <= BEFORE_SURROGATE as u32 || c >= AFTER_SURROGATE as u32); + debug_assert!(c <= char::MAX as u32); + #[allow(unsafe_code)] + unsafe { + char::from_u32_unchecked(c) + } + })) + } +} + +impl<'a> IntoParallelIterator for &'a CharRange { + type Item = char; + type Iter = Iter; + + fn into_par_iter(self) -> Self::Iter { + (*self).into_par_iter() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn length_agrees() { + assert_eq!(chars!(..).iter().count(), chars!(..).par_iter().count()) + } + + #[test] + #[cfg(feature = "std")] + fn content_agrees() { + assert_eq!( + chars!(..).iter().collect::>(), + chars!(..).par_iter().collect::>() + ) + } +} diff --git a/vendor/unic-char-range/src/pkg_info.rs b/vendor/unic-char-range/src/pkg_info.rs new file mode 100644 index 000000000..a1ab2853f --- /dev/null +++ b/vendor/unic-char-range/src/pkg_info.rs @@ -0,0 +1,20 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Package information + +/// UNIC component version. +pub const PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); + +/// UNIC component name. +pub const PKG_NAME: &str = env!("CARGO_PKG_NAME"); + +/// UNIC component description. +pub const PKG_DESCRIPTION: &str = env!("CARGO_PKG_DESCRIPTION"); diff --git a/vendor/unic-char-range/src/range.rs b/vendor/unic-char-range/src/range.rs new file mode 100644 index 000000000..d958016fc --- /dev/null +++ b/vendor/unic-char-range/src/range.rs @@ -0,0 +1,231 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use core::{char, cmp}; + +#[cfg(feature = "std")] +use std::collections::Bound; + +use self::cmp::Ordering; +use crate::CharIter; + +/// A range of unicode code points. +/// +/// The most idiomatic way to construct this range is through the use of the `chars!` macro: +/// +/// ``` +/// #[macro_use] extern crate unic_char_range; +/// use unic_char_range::CharRange; +/// +/// # fn main() { +/// assert_eq!(chars!('a'..='z'), CharRange::closed('a', 'z')); +/// assert_eq!(chars!('a'..'z'), CharRange::open_right('a', 'z')); +/// assert_eq!(chars!(..), CharRange::all()); +/// # } +/// ``` +/// +/// If constructed in reverse order, such that `self.high` is ordered before `self.low`, +/// the range is empty. If you want to iterate in decreasing order, use `.iter().rev()`. +/// All empty ranges are considered equal no matter the internal state. +#[derive(Copy, Clone, Debug, Eq)] +pub struct CharRange { + /// The lowest character in this range (inclusive). + pub low: char, + + /// The highest character in this range (inclusive). + pub high: char, +} + +/// Constructors +impl CharRange { + /// Construct a closed range of characters. + /// + /// If `stop` is ordered before `start`, the resulting range will be empty. + /// + /// # Example + /// + /// ``` + /// # use unic_char_range::*; + /// assert_eq!( + /// CharRange::closed('a', 'd').iter().collect::>(), + /// vec!['a', 'b', 'c', 'd'] + /// ) + /// ``` + pub fn closed(start: char, stop: char) -> CharRange { + CharRange { + low: start, + high: stop, + } + } + + /// Construct a half open (right) range of characters. + /// + /// # Example + /// + /// ``` + /// # use unic_char_range::*; + /// assert_eq!( + /// CharRange::open_right('a', 'd').iter().collect::>(), + /// vec!['a', 'b', 'c'] + /// ) + /// ``` + pub fn open_right(start: char, stop: char) -> CharRange { + let mut iter = CharRange::closed(start, stop).iter(); + let _ = iter.next_back(); + iter.into() + } + + /// Construct a half open (left) range of characters. + /// + /// # Example + /// + /// ``` + /// # use unic_char_range::*; + /// assert_eq!( + /// CharRange::open_left('a', 'd').iter().collect::>(), + /// vec!['b', 'c', 'd'] + /// ) + /// ``` + pub fn open_left(start: char, stop: char) -> CharRange { + let mut iter = CharRange::closed(start, stop).iter(); + let _ = iter.next(); + iter.into() + } + + /// Construct a fully open range of characters. + /// + /// # Example + /// + /// ``` + /// # use unic_char_range::*; + /// assert_eq!( + /// CharRange::open('a', 'd').iter().collect::>(), + /// vec!['b', 'c'] + /// ) + /// ``` + pub fn open(start: char, stop: char) -> CharRange { + let mut iter = CharRange::closed(start, stop).iter(); + let _ = iter.next(); + let _ = iter.next_back(); + iter.into() + } + + #[cfg(feature = "std")] + /// Construct a range of characters from bounds. + pub fn bound(start: Bound, stop: Bound) -> CharRange { + let start = if start == Bound::Unbounded { + Bound::Included('\u{0}') + } else { + start + }; + let stop = if stop == Bound::Unbounded { + Bound::Included(char::MAX) + } else { + stop + }; + match (start, stop) { + (Bound::Included(start), Bound::Included(stop)) => CharRange::closed(start, stop), + (Bound::Excluded(start), Bound::Excluded(stop)) => CharRange::open(start, stop), + (Bound::Included(start), Bound::Excluded(stop)) => CharRange::open_right(start, stop), + (Bound::Excluded(start), Bound::Included(stop)) => CharRange::open_left(start, stop), + (Bound::Unbounded, _) | (_, Bound::Unbounded) => unreachable!(), + } + } + + /// Construct a range over all Unicode characters (Unicode Scalar Values). + pub fn all() -> CharRange { + CharRange::closed('\u{0}', char::MAX) + } + + /// Construct a range over all characters of *assigned* Unicode Planes. + /// + /// Assigned *normal* (non-special) Unicode Planes are: + /// - Plane 0: *Basic Multilingual Plane* (BMP) + /// - Plane 1: *Supplementary Multilingual Plane* (SMP) + /// - Plane 2: *Supplementary Ideographic Plane* (SIP) + /// + /// Unicode Plane 14, *Supplementary Special-purpose Plane* (SSP), is not included in this + /// range, mainly because of the limit of `CharRange` only supporting a continuous range. + /// + /// Unicode Planes 3 to 13 are *Unassigned* planes and therefore excluded. + /// + /// Unicode Planes 15 and 16 are *Private Use Area* planes and won't have Unicode-assigned + /// characters. + pub fn assigned_normal_planes() -> CharRange { + CharRange::closed('\u{0}', '\u{2_FFFF}') + } +} + +/// Collection-like fns +impl CharRange { + /// Does this range include a character? + /// + /// # Examples + /// + /// ``` + /// # use unic_char_range::CharRange; + /// assert!( CharRange::closed('a', 'g').contains('d')); + /// assert!( ! CharRange::closed('a', 'g').contains('z')); + /// + /// assert!( ! CharRange:: open ('a', 'a').contains('a')); + /// assert!( ! CharRange::closed('z', 'a').contains('g')); + /// ``` + pub fn contains(&self, ch: char) -> bool { + self.low <= ch && ch <= self.high + } + + /// Determine the ordering of this range and a character. + /// + /// # Panics + /// + /// Panics if the range is empty. This fn may be adjusted in the future to not panic + /// in optimized builds. Even if so, an empty range will never compare as `Ordering::Equal`. + pub fn cmp_char(&self, ch: char) -> Ordering { + // possible optimization: only assert this in debug builds + assert!(!self.is_empty(), "Cannot compare empty range's ordering"); + if self.high < ch { + Ordering::Less + } else if self.low > ch { + Ordering::Greater + } else { + Ordering::Equal + } + } + + /// How many characters are in this range? + pub fn len(&self) -> usize { + self.iter().len() + } + + /// Is this range empty? + pub fn is_empty(&self) -> bool { + self.low > self.high + } + + /// Create an iterator over this range. + pub fn iter(&self) -> CharIter { + (*self).into() + } +} + +impl IntoIterator for CharRange { + type IntoIter = CharIter; + type Item = char; + + fn into_iter(self) -> CharIter { + self.iter() + } +} + +impl PartialEq for CharRange { + fn eq(&self, other: &CharRange) -> bool { + (self.is_empty() && other.is_empty()) || (self.low == other.low && self.high == other.high) + } +} diff --git a/vendor/unic-char-range/src/step.rs b/vendor/unic-char-range/src/step.rs new file mode 100644 index 000000000..1140edd9e --- /dev/null +++ b/vendor/unic-char-range/src/step.rs @@ -0,0 +1,45 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use core::char; + +pub const BEFORE_SURROGATE: char = '\u{D7FF}'; +pub const AFTER_SURROGATE: char = '\u{E000}'; + +#[inline] +#[allow(unsafe_code)] +/// Step a character one step towards `char::MAX`. +/// +/// # Safety +/// +/// If the given character is `char::MAX`, the return value is not a valid character. +pub unsafe fn forward(ch: char) -> char { + if ch == BEFORE_SURROGATE { + AFTER_SURROGATE + } else { + char::from_u32_unchecked(ch as u32 + 1) + } +} + +#[inline] +#[allow(unsafe_code)] +/// Step a character one step towards `'\0'`. +/// +/// # Safety +/// +/// If the given character is `'\0'`, this will cause an underflow. +/// (Thus, it will panic in debug mode, undefined behavior in release mode.) +pub unsafe fn backward(ch: char) -> char { + if ch == AFTER_SURROGATE { + BEFORE_SURROGATE + } else { + char::from_u32_unchecked(ch as u32 - 1) + } +} diff --git a/vendor/unic-char-range/tests/iter_tests.rs b/vendor/unic-char-range/tests/iter_tests.rs new file mode 100644 index 000000000..9678009a9 --- /dev/null +++ b/vendor/unic-char-range/tests/iter_tests.rs @@ -0,0 +1,78 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::{char, u32, vec}; + +use unic_char_range::CharRange; + +fn all_chars() -> vec::IntoIter { + (u32::MIN..u32::MAX) + .take_while(|&u| u <= char::MAX as u32) + .filter_map(char::from_u32) + .collect::>() + .into_iter() +} + +#[test] +fn test_iter_all_chars() { + assert!(CharRange::all().iter().eq(all_chars())) +} + +#[test] +fn test_iter_all_chars_rev() { + assert!(CharRange::all().iter().rev().eq(all_chars().rev())) +} + +#[test] +fn test_iter_all_chars_mixed_next_back() { + let mut custom = CharRange::all().iter(); + let mut simple = all_chars(); + while let Some(custom_char) = custom.next() { + assert_eq!(Some(custom_char), simple.next()); + assert_eq!(custom.next_back(), simple.next_back()); + } + assert_eq!(None, simple.next()); +} + +#[test] +fn test_iter_all_chars_into_iter() { + for _ch in CharRange::all() { + // nothing + } +} + +#[test] +fn test_iter_fused() { + let mut iter = CharRange::all().iter(); + let mut fused = all_chars().fuse(); + assert!(iter.by_ref().eq(fused.by_ref())); + for _ in 0..100 { + assert_eq!(iter.next(), fused.next()); + } +} + +#[test] +fn test_iter_exact_trusted_len() { + fn assert_presents_right_len(iter: &I, len: usize) { + assert_eq!(iter.len(), len); + assert_eq!(iter.size_hint(), (len, Some(len))); + } + + let mut iter = CharRange::all().iter(); + let mut predicted_length = iter.len(); + assert_eq!(predicted_length, all_chars().len()); + + while let Some(_) = iter.next() { + predicted_length -= 1; + assert_presents_right_len(&iter, predicted_length); + } + + assert_presents_right_len(&iter, 0); +} -- cgit v1.2.3