summaryrefslogtreecommitdiffstats
path: root/vendor/proptest/src/arbitrary/_std/string.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/proptest/src/arbitrary/_std/string.rs')
-rw-r--r--vendor/proptest/src/arbitrary/_std/string.rs318
1 files changed, 318 insertions, 0 deletions
diff --git a/vendor/proptest/src/arbitrary/_std/string.rs b/vendor/proptest/src/arbitrary/_std/string.rs
new file mode 100644
index 000000000..fd8138a39
--- /dev/null
+++ b/vendor/proptest/src/arbitrary/_std/string.rs
@@ -0,0 +1,318 @@
+//-
+// Copyright 2017, 2018 The proptest developers
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Arbitrary implementations for `std::string`.
+
+use crate::std_facade::{Box, String, Vec};
+use std::iter;
+use std::rc::Rc;
+use std::slice;
+use std::sync::Arc;
+
+multiplex_alloc! {
+ alloc::string::FromUtf8Error, ::std::string::FromUtf8Error,
+ alloc::string::FromUtf16Error, ::std::string::FromUtf16Error
+}
+
+use crate::arbitrary::*;
+use crate::collection;
+use crate::strategy::statics::static_map;
+use crate::strategy::*;
+use crate::string::StringParam;
+
+impl Arbitrary for String {
+ type Parameters = StringParam;
+ type Strategy = &'static str;
+
+ /// ## Panics
+ ///
+ /// This implementation panics if the input is not a valid regex proptest
+ /// can handle.
+ fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
+ args.into()
+ }
+}
+
+macro_rules! dst_wrapped {
+ ($($w: ident),*) => {
+ $(arbitrary!($w<str>, MapInto<StrategyFor<String>, Self>, StringParam;
+ a => any_with::<String>(a).prop_map_into()
+ );)*
+ };
+}
+
+dst_wrapped!(Box, Rc, Arc);
+
+lazy_just!(FromUtf16Error, || String::from_utf16(&[0xD800])
+ .unwrap_err());
+
+// This is a void-like type, it needs to be handled by the user of
+// the type by simply never constructing the variant in an enum or for
+// structs by inductively not generating the struct.
+// The same applies to ! and Infallible.
+// generator!(ParseError, || panic!());
+
+arbitrary!(FromUtf8Error, SFnPtrMap<BoxedStrategy<Vec<u8>>, Self>;
+ static_map(not_utf8_bytes(true).boxed(),
+ |bs| String::from_utf8(bs).unwrap_err())
+);
+
+/// This strategy produces sequences of bytes that are guaranteed to be illegal
+/// wrt. UTF-8 with the goal of producing a suffix of bytes in the end of
+/// an otherwise legal UTF-8 string that causes the string to be illegal.
+/// This is used primarily to generate the `Utf8Error` type and similar.
+pub(crate) fn not_utf8_bytes(
+ allow_null: bool,
+) -> impl Strategy<Value = Vec<u8>> {
+ let prefix = collection::vec(any::<char>(), ..::std::u16::MAX as usize);
+ let suffix = gen_el_bytes(allow_null);
+ (prefix, suffix).prop_map(move |(prefix_bytes, el_bytes)| {
+ let iter = prefix_bytes.iter();
+ let string: String = if allow_null {
+ iter.collect()
+ } else {
+ iter.filter(|&&x| x != '\u{0}').collect()
+ };
+ let mut bytes = string.into_bytes();
+ bytes.extend(el_bytes.into_iter());
+ bytes
+ })
+}
+
+/// Stands for "error_length" bytes and contains a suffix of bytes that
+/// will cause the whole string to become invalid UTF-8.
+/// See `gen_el_bytes` for more details.
+#[derive(Debug)]
+enum ELBytes {
+ B1([u8; 1]),
+ B2([u8; 2]),
+ B3([u8; 3]),
+ B4([u8; 4]),
+}
+
+impl<'a> IntoIterator for &'a ELBytes {
+ type Item = u8;
+ type IntoIter = iter::Cloned<slice::Iter<'a, u8>>;
+ fn into_iter(self) -> Self::IntoIter {
+ use self::ELBytes::*;
+ (match *self {
+ B1(ref a) => a.iter(),
+ B2(ref a) => a.iter(),
+ B3(ref a) => a.iter(),
+ B4(ref a) => a.iter(),
+ })
+ .cloned()
+ }
+}
+
+// By analysis of run_utf8_validation defined at:
+// https://doc.rust-lang.org/nightly/src/core/str/mod.rs.html#1429
+// we know that .error_len() \in {None, Some(1), Some(2), Some(3)}.
+// We represent this with the range [0..4) and generate a valid
+// sequence from that.
+fn gen_el_bytes(allow_null: bool) -> impl Strategy<Value = ELBytes> {
+ fn b1(a: u8) -> ELBytes {
+ ELBytes::B1([a])
+ }
+ fn b2(a: (u8, u8)) -> ELBytes {
+ ELBytes::B2([a.0, a.1])
+ }
+ fn b3(a: ((u8, u8), u8)) -> ELBytes {
+ ELBytes::B3([(a.0).0, (a.0).1, a.1])
+ }
+ fn b4(a: ((u8, u8), u8, u8)) -> ELBytes {
+ ELBytes::B4([(a.0).0, (a.0).1, a.1, a.2])
+ }
+
+ /*
+ // https://tools.ietf.org/html/rfc3629
+ static UTF8_CHAR_WIDTH: [u8; 256] = [
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
+ 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
+ 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
+ ];
+
+ /// Mask of the value bits of a continuation byte.
+ const CONT_MASK: u8 = 0b0011_1111;
+ /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
+ const TAG_CONT_U8: u8 = 0b1000_0000;
+ */
+
+ // Continuation byte:
+ let succ_byte = 0x80u8..0xC0u8;
+
+ // Do we allow the nul byte or not?
+ let start_byte = if allow_null { 0x00u8 } else { 0x01u8 };
+
+ // Invalid continuation byte:
+ let fail_byte = prop_oneof![start_byte..0x7Fu8, 0xC1u8..];
+
+ // Matches zero in the UTF8_CHAR_WIDTH table above.
+ let byte0_w0 = prop_oneof![0x80u8..0xC0u8, 0xF5u8..];
+
+ // Start of a 3 (width) byte sequence:
+ // Leads here: https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1479
+ let byte0_w2 = 0xC2u8..0xE0u8;
+
+ // Start of a 3 (width) byte sequence:
+ // https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1484
+ // See the left column in the match.
+ let byte0_w3 = 0xE0u8..0xF0u8;
+
+ // Start of a 4 (width) byte sequence:
+ // https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1495
+ // See the left column in the match.
+ let byte0_w4 = 0xF0u8..0xF5u8;
+
+ // The 2 first (valid) bytes of a 3 (width) byte sequence:
+ // The first byte is byte0_w3. The second is the ones produced on the right.
+ let byte01_w3 = byte0_w3.clone().prop_flat_map(|x| {
+ (
+ Just(x),
+ match x {
+ 0xE0u8 => 0xA0u8..0xC0u8,
+ 0xE1u8..=0xECu8 => 0x80u8..0xC0u8,
+ 0xEDu8 => 0x80u8..0xA0u8,
+ 0xEEu8..=0xEFu8 => 0x80u8..0xA0u8,
+ _ => panic!(),
+ },
+ )
+ });
+
+ // In a 3 (width) byte sequence, an invalid second byte is chosen such that
+ // it will yield an error length of Some(1). The second byte is on
+ // the right of the match arms.
+ let byte01_w3_e1 = byte0_w3.clone().prop_flat_map(move |x| {
+ (
+ Just(x),
+ match x {
+ 0xE0u8 => prop_oneof![start_byte..0xA0u8, 0xC0u8..],
+ 0xE1u8..=0xECu8 => prop_oneof![start_byte..0x80u8, 0xC0u8..],
+ 0xEDu8 => prop_oneof![start_byte..0x80u8, 0xA0u8..],
+ 0xEEu8..=0xEFu8 => prop_oneof![start_byte..0x80u8, 0xA0u8..],
+ _ => panic!(),
+ },
+ )
+ });
+
+ // In a 4 (width) byte sequence, an invalid second byte is chosen such that
+ // it will yield an error length of Some(1). The second byte is on
+ // the right of the match arms.
+ let byte01_w4_e1 = byte0_w4.clone().prop_flat_map(move |x| {
+ (
+ Just(x),
+ match x {
+ 0xF0u8 => prop_oneof![start_byte..0x90u8, 0xA0u8..],
+ 0xF1u8..=0xF3u8 => prop_oneof![start_byte..0x80u8, 0xA0u8..],
+ 0xF4u8 => prop_oneof![start_byte..0x80u8, 0x90u8..],
+ _ => panic!(),
+ },
+ )
+ });
+
+ // The 2 first (valid) bytes of a 4 (width) byte sequence:
+ // The first byte is byte0_w4. The second is the ones produced on the right.
+ let byte01_w4 = byte0_w4.clone().prop_flat_map(|x| {
+ (
+ Just(x),
+ match x {
+ 0xF0u8 => 0x90u8..0xA0u8,
+ 0xF1u8..=0xF3u8 => 0x80u8..0xA0u8,
+ 0xF4u8 => 0x80u8..0x90u8,
+ _ => panic!(),
+ },
+ )
+ });
+
+ prop_oneof![
+ // error_len = None
+ // These are all happen when next!() fails to provide a byte.
+ prop_oneof![
+ // width = 2
+ // lacking 1 bytes:
+ static_map(byte0_w2.clone(), b1),
+ // width = 3
+ // lacking 2 bytes:
+ static_map(byte0_w3, b1),
+ // lacking 1 bytes:
+ static_map(byte01_w3.clone(), b2),
+ // width = 4
+ // lacking 3 bytes:
+ static_map(byte0_w4, b1),
+ // lacking 2 bytes:
+ static_map(byte01_w4.clone(), b2),
+ // lacking 1 byte:
+ static_map((byte01_w4.clone(), succ_byte.clone()), b3),
+ ],
+ // error_len = Some(1)
+ prop_oneof![
+ // width = 1 is not represented.
+ // width = 0
+ // path taken:
+ // https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1508
+ static_map(byte0_w0, b1),
+ // width = 2
+ // path taken:
+ // https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1480
+ static_map((byte0_w2, fail_byte.clone()), b2),
+ // width = 3
+ // path taken:
+ // https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1488
+ static_map(byte01_w3_e1, b2),
+ // width = 4
+ // path taken:
+ // https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1499
+ static_map(byte01_w4_e1, b2),
+ ],
+ // error_len = Some(2)
+ static_map(
+ prop_oneof![
+ // width = 3
+ // path taken:
+ // https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1491
+ (byte01_w3, fail_byte.clone()),
+ // width = 4
+ // path taken:
+ // https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1502
+ (byte01_w4.clone(), fail_byte.clone())
+ ],
+ b3
+ ),
+ // error_len = Some(3), width = 4
+ // path taken:
+ // https://doc.rust-lang.org/1.23.0/src/core/str/mod.rs.html#1505
+ static_map((byte01_w4, succ_byte, fail_byte), b4),
+ ]
+ .boxed()
+}
+
+#[cfg(test)]
+mod test {
+ no_panic_test!(
+ string => String,
+ str_box => Box<str>,
+ str_rc => Rc<str>,
+ str_arc => Arc<str>,
+ from_utf16_error => FromUtf16Error,
+ from_utf8_error => FromUtf8Error
+ );
+}