summaryrefslogtreecommitdiffstats
path: root/vendor/proptest/src/char.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/proptest/src/char.rs')
-rw-r--r--vendor/proptest/src/char.rs409
1 files changed, 409 insertions, 0 deletions
diff --git a/vendor/proptest/src/char.rs b/vendor/proptest/src/char.rs
new file mode 100644
index 000000000..7f0e92d7a
--- /dev/null
+++ b/vendor/proptest/src/char.rs
@@ -0,0 +1,409 @@
+//-
+// Copyright 2017 Jason Lingle
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Strategies for generating `char` values.
+//!
+//! Unlike most strategies in Proptest, character generation is by default
+//! biased to particular values known to be difficult to handle in various
+//! circumstances.
+//!
+//! The main things of interest are `any()` to generate truly arbitrary
+//! characters, and `range()` and `ranges()` to select characters from
+//! inclusive ranges.
+
+use crate::std_facade::Cow;
+use core::ops::RangeInclusive;
+
+use rand::Rng;
+
+use crate::num;
+use crate::strategy::*;
+use crate::test_runner::*;
+
+/// An inclusive char range from fst to snd.
+type CharRange = RangeInclusive<char>;
+
+/// A default set of characters to consider as "special" during character
+/// generation.
+///
+/// Most of the characters here were chosen specifically because they are
+/// difficult to handle in particular contexts.
+pub const DEFAULT_SPECIAL_CHARS: &[char] = &[
+ // Things to give shell scripts and filesystem logic difficulties
+ '/', '\\', '$', '.', '*', '{', '\'', '"', '`', ':',
+ // Characters with special significance in URLs and elsewhere
+ '?', '%', '=', '&', '<',
+ // Interesting ASCII control characters
+ // NUL, HT, CR, LF, VT ESC DEL
+ '\x00', '\t', '\r', '\n', '\x0B', '\x1B', '\x7F',
+ // ¥ both to test simple Unicode handling and because it has interesting
+ // properties on MS Shift-JIS systems.
+ '¥', // No non-Unicode encoding has both ¥ and Ѩ
+ 'Ѩ',
+ // In UTF-8, Ⱥ increases in length from 2 to 3 bytes when lowercased
+ 'Ⱥ',
+ // More Unicode edge-cases: BOM, replacement character, RTL override, and non-BMP
+ '\u{FEFF}', '\u{FFFD}', '\u{202E}', '🕴',
+];
+
+/// A default sequence of ranges used preferentially when generating random
+/// characters.
+pub const DEFAULT_PREFERRED_RANGES: &[CharRange] = &[
+ // ASCII printable
+ ' '..='~',
+ ' '..='~',
+ ' '..='~',
+ ' '..='~',
+ ' '..='~',
+ // Latin-1
+ '\u{0040}'..='\u{00ff}',
+];
+
+/// Selects a random character the way `CharStrategy` does.
+///
+/// If `special` is non-empty, there is a 50% chance that a character from this
+/// array is chosen randomly, and will be returned if that character falls
+/// within `ranges`.
+///
+/// If `preferred` is non-empty, there is a 50% chance that any generation
+/// which gets past the `special` step picks a random element from this list,
+/// then a random character from within that range (both endpoints inclusive).
+/// That character will be returned if it falls within `ranges`.
+///
+/// In all other cases, an element is picked randomly from `ranges` and a
+/// random character within the range (both endpoints inclusive) is chosen and
+/// returned.
+///
+/// Notice that in all cases, `ranges` completely defines the set of characters
+/// that can possibly be defined.
+///
+/// It is legal for ranges in all cases to contain non-characters.
+///
+/// Both `preferred` and `ranges` bias selection towards characters in smaller
+/// ranges. This is deliberate. `preferred` is usually tuned to select
+/// particular characters anyway. `ranges` is usually derived from some
+/// external property, and the fact that a range is small often means it is
+/// more interesting.
+pub fn select_char(
+ rnd: &mut impl Rng,
+ special: &[char],
+ preferred: &[CharRange],
+ ranges: &[CharRange],
+) -> char {
+ let (base, offset) = select_range_index(rnd, special, preferred, ranges);
+ ::core::char::from_u32(base + offset).expect("bad character selected")
+}
+
+fn select_range_index(
+ rnd: &mut impl Rng,
+ special: &[char],
+ preferred: &[CharRange],
+ ranges: &[CharRange],
+) -> (u32, u32) {
+ fn in_range(ranges: &[CharRange], ch: char) -> Option<(u32, u32)> {
+ ranges
+ .iter()
+ .find(|r| ch >= *r.start() && ch <= *r.end())
+ .map(|r| (*r.start() as u32, ch as u32 - *r.start() as u32))
+ }
+
+ if !special.is_empty() && rnd.gen() {
+ let s = special[rnd.gen_range(0..special.len())];
+ if let Some(ret) = in_range(ranges, s) {
+ return ret;
+ }
+ }
+
+ if !preferred.is_empty() && rnd.gen() {
+ let range = preferred[rnd.gen_range(0..preferred.len())].clone();
+ if let Some(ch) = ::core::char::from_u32(
+ rnd.gen_range(*range.start() as u32..*range.end() as u32 + 1),
+ ) {
+ if let Some(ret) = in_range(ranges, ch) {
+ return ret;
+ }
+ }
+ }
+
+ for _ in 0..65_536 {
+ let range = ranges[rnd.gen_range(0..ranges.len())].clone();
+ if let Some(ch) = ::core::char::from_u32(
+ rnd.gen_range(*range.start() as u32..*range.end() as u32 + 1),
+ ) {
+ return (*range.start() as u32, ch as u32 - *range.start() as u32);
+ }
+ }
+
+ // Give up and return a character we at least know is valid.
+ (*ranges[0].start() as u32, 0)
+}
+
+/// Strategy for generating `char`s.
+///
+/// Character selection is more sophisticated than integer selection. Naïve
+/// selection (particularly in the larger context of generating strings) would
+/// result in starting inputs like `ꂡ螧轎ቶᢹ糦狥芹ᘆ㶏曊ᒀ踔虙ჲ` and "simplified"
+/// inputs consisting mostly of control characters. It also has difficulty
+/// locating edge cases, since the vast majority of code points (such as the
+/// enormous CJK regions) don't cause problems for anything with even basic
+/// Unicode support.
+///
+/// Instead, character selection is always based on explicit ranges, and is
+/// designed to bias to specifically chosen characters and character ranges to
+/// produce inputs that are both more useful and easier for humans to
+/// understand. There are also hard-wired simplification targets based on ASCII
+/// instead of simply simplifying towards NUL to avoid problematic inputs being
+/// reduced to a bunch of NUL characters.
+///
+/// Shrinking never crosses ranges. If you have a complex range like `[A-Za-z]`
+/// and the starting point `x` is chosen, it will not shrink to the first `A-Z`
+/// group, but rather simply to `a`.
+///
+/// The usual way to get instances of this class is with the module-level `ANY`
+/// constant or `range` function. Directly constructing a `CharStrategy` is
+/// only necessary for complex ranges or to override the default biases.
+#[derive(Debug, Clone)]
+#[must_use = "strategies do nothing unless used"]
+pub struct CharStrategy<'a> {
+ special: Cow<'a, [char]>,
+ preferred: Cow<'a, [CharRange]>,
+ ranges: Cow<'a, [CharRange]>,
+}
+
+impl<'a> CharStrategy<'a> {
+ /// Construct a new `CharStrategy` with the parameters it will pass to the
+ /// function underlying `select_char()`.
+ ///
+ /// All arguments as per `select_char()`.
+ pub fn new(
+ special: Cow<'a, [char]>,
+ preferred: Cow<'a, [CharRange]>,
+ ranges: Cow<'a, [CharRange]>,
+ ) -> Self {
+ CharStrategy {
+ special,
+ preferred,
+ ranges,
+ }
+ }
+
+ /// Same as `CharStrategy::new()` but using `Cow::Borrowed` for all parts.
+ pub fn new_borrowed(
+ special: &'a [char],
+ preferred: &'a [CharRange],
+ ranges: &'a [CharRange],
+ ) -> Self {
+ CharStrategy::new(
+ Cow::Borrowed(special),
+ Cow::Borrowed(preferred),
+ Cow::Borrowed(ranges),
+ )
+ }
+}
+
+const WHOLE_RANGE: &[CharRange] = &['\x00'..=::core::char::MAX];
+
+/// Creates a `CharStrategy` which picks from literally any character, with the
+/// default biases.
+pub fn any() -> CharStrategy<'static> {
+ CharStrategy {
+ special: Cow::Borrowed(DEFAULT_SPECIAL_CHARS),
+ preferred: Cow::Borrowed(DEFAULT_PREFERRED_RANGES),
+ ranges: Cow::Borrowed(WHOLE_RANGE),
+ }
+}
+
+/// Creates a `CharStrategy` which selects characters within the given
+/// endpoints, inclusive, using the default biases.
+pub fn range(start: char, end: char) -> CharStrategy<'static> {
+ CharStrategy {
+ special: Cow::Borrowed(DEFAULT_SPECIAL_CHARS),
+ preferred: Cow::Borrowed(DEFAULT_PREFERRED_RANGES),
+ ranges: Cow::Owned(vec![start..=end]),
+ }
+}
+
+/// Creates a `CharStrategy` which selects characters within the given ranges,
+/// all inclusive, using the default biases.
+pub fn ranges(ranges: Cow<[CharRange]>) -> CharStrategy {
+ CharStrategy {
+ special: Cow::Borrowed(DEFAULT_SPECIAL_CHARS),
+ preferred: Cow::Borrowed(DEFAULT_PREFERRED_RANGES),
+ ranges,
+ }
+}
+
+/// The `ValueTree` corresponding to `CharStrategy`.
+#[derive(Debug, Clone, Copy)]
+pub struct CharValueTree {
+ value: num::u32::BinarySearch,
+}
+
+impl<'a> Strategy for CharStrategy<'a> {
+ type Tree = CharValueTree;
+ type Value = char;
+
+ fn new_tree(&self, runner: &mut TestRunner) -> NewTree<Self> {
+ let (base, offset) = select_range_index(
+ runner.rng(),
+ &self.special,
+ &self.preferred,
+ &self.ranges,
+ );
+
+ // Select a minimum point more convenient than 0
+ let start = base + offset;
+ let bottom = if start >= '¡' as u32 && base < '¡' as u32 {
+ '¡' as u32
+ } else if start >= 'a' as u32 && base < 'a' as u32 {
+ 'a' as u32
+ } else if start >= 'A' as u32 && base < 'A' as u32 {
+ 'A' as u32
+ } else if start >= '0' as u32 && base < '0' as u32 {
+ '0' as u32
+ } else if start >= ' ' as u32 && base < ' ' as u32 {
+ ' ' as u32
+ } else {
+ base
+ };
+
+ Ok(CharValueTree {
+ value: num::u32::BinarySearch::new_above(bottom, start),
+ })
+ }
+}
+
+impl CharValueTree {
+ fn reposition(&mut self) {
+ while ::core::char::from_u32(self.value.current()).is_none() {
+ if !self.value.complicate() {
+ panic!("Converged to non-char value");
+ }
+ }
+ }
+}
+
+impl ValueTree for CharValueTree {
+ type Value = char;
+
+ fn current(&self) -> char {
+ ::core::char::from_u32(self.value.current())
+ .expect("Generated non-char value")
+ }
+
+ fn simplify(&mut self) -> bool {
+ if self.value.simplify() {
+ self.reposition();
+ true
+ } else {
+ false
+ }
+ }
+
+ fn complicate(&mut self) -> bool {
+ if self.value.complicate() {
+ self.reposition();
+ true
+ } else {
+ false
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use std::cmp::{max, min};
+ use std::vec::Vec;
+
+ use super::*;
+ use crate::collection;
+
+ proptest! {
+ #[test]
+ fn stays_in_range(input_ranges in collection::vec(
+ (0..::std::char::MAX as u32,
+ 0..::std::char::MAX as u32),
+ 1..5))
+ {
+ let input = ranges(Cow::Owned(input_ranges.iter().map(
+ |&(lo, hi)| ::std::char::from_u32(lo).and_then(
+ |lo| ::std::char::from_u32(hi).map(
+ |hi| min(lo, hi) ..= max(lo, hi)))
+ .ok_or_else(|| TestCaseError::reject("non-char")))
+ .collect::<Result<Vec<CharRange>,_>>()?));
+
+ let mut runner = TestRunner::default();
+ for _ in 0..256 {
+ let mut value = input.new_tree(&mut runner).unwrap();
+ loop {
+ let ch = value.current() as u32;
+ assert!(input_ranges.iter().any(
+ |&(lo, hi)| ch >= min(lo, hi) &&
+ ch <= max(lo, hi)));
+
+ if !value.simplify() { break; }
+ }
+ }
+ }
+ }
+
+ #[test]
+ fn applies_desired_bias() {
+ let mut men_in_business_suits_levitating = 0;
+ let mut ascii_printable = 0;
+ let mut runner = TestRunner::deterministic();
+
+ for _ in 0..1024 {
+ let ch = any().new_tree(&mut runner).unwrap().current();
+ if '🕴' == ch {
+ men_in_business_suits_levitating += 1;
+ } else if ch >= ' ' && ch <= '~' {
+ ascii_printable += 1;
+ }
+ }
+
+ assert!(ascii_printable >= 256);
+ assert!(men_in_business_suits_levitating >= 1);
+ }
+
+ #[test]
+ fn doesnt_shrink_to_ascii_control() {
+ let mut accepted = 0;
+ let mut runner = TestRunner::deterministic();
+
+ for _ in 0..256 {
+ let mut value = any().new_tree(&mut runner).unwrap();
+
+ if value.current() <= ' ' {
+ continue;
+ }
+
+ while value.simplify() {}
+
+ assert!(value.current() >= ' ');
+ accepted += 1;
+ }
+
+ assert!(accepted >= 200);
+ }
+
+ #[test]
+ fn test_sanity() {
+ check_strategy_sanity(
+ any(),
+ Some(CheckStrategySanityOptions {
+ // `simplify()` can itself `complicate()` back to the starting
+ // position, so the overly strict complicate-after-simplify check
+ // must be disabled.
+ strict_complicate_after_simplify: false,
+ ..CheckStrategySanityOptions::default()
+ }),
+ );
+ }
+}