summaryrefslogtreecommitdiffstats
path: root/vendor/proptest/src/string.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/proptest/src/string.rs')
-rw-r--r--vendor/proptest/src/string.rs210
1 files changed, 144 insertions, 66 deletions
diff --git a/vendor/proptest/src/string.rs b/vendor/proptest/src/string.rs
index 8777388f5..935cb21ca 100644
--- a/vendor/proptest/src/string.rs
+++ b/vendor/proptest/src/string.rs
@@ -16,14 +16,8 @@ use core::mem;
use core::ops::RangeInclusive;
use core::u32;
-use regex_syntax::hir::{
- self, Hir,
- HirKind::*,
- Literal::*,
- RepetitionKind::{self, *},
- RepetitionRange::*,
-};
-use regex_syntax::{Error as ParseError, Parser};
+use regex_syntax::hir::{self, Hir, HirKind::*, Repetition};
+use regex_syntax::{Error as ParseError, ParserBuilder};
use crate::bool;
use crate::char;
@@ -33,7 +27,7 @@ use crate::test_runner::*;
/// Wraps the regex that forms the `Strategy` for `String` so that a sensible
/// `Default` can be given. The default is a string of non-control characters.
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct StringParam(&'static str);
impl From<StringParam> for &'static str {
@@ -150,7 +144,8 @@ impl StrategyFromRegex for Vec<u8> {
/// If you don't need error handling and aren't limited by setup time, it is
/// also possible to directly use a `&str` as a strategy with the same effect.
pub fn string_regex(regex: &str) -> ParseResult<String> {
- string_regex_parsed(&regex_to_hir(regex)?)
+ let hir = ParserBuilder::new().build().parse(regex)?;
+ string_regex_parsed(&hir)
}
/// Like `string_regex()`, but allows providing a pre-parsed expression.
@@ -167,8 +162,20 @@ pub fn string_regex_parsed(expr: &Hir) -> ParseResult<String> {
/// Creates a strategy which generates byte strings matching the given regular
/// expression.
+///
+/// By default, the byte strings generated by this strategy _will_ be valid
+/// UTF-8. If you wish to generate byte strings that aren't (necessarily)
+/// valid UTF-8, wrap your regex (or some subsection of it) in `(?-u: ... )`.
+/// You may want to turn on the `s` flag as well (`(?s-u: ... )`) so that `.`
+/// will generate newline characters (byte value `0x0A`). See the
+/// [`regex` crate's documentation](https://docs.rs/regex/*/regex/#opt-out-of-unicode-support)
+/// for more information.
pub fn bytes_regex(regex: &str) -> ParseResult<Vec<u8>> {
- bytes_regex_parsed(&regex_to_hir(regex)?)
+ let hir = ParserBuilder::new()
+ .utf8(false)
+ .build()
+ .parse(regex)?;
+ bytes_regex_parsed(&hir)
}
/// Like `bytes_regex()`, but allows providing a pre-parsed expression.
@@ -176,11 +183,7 @@ pub fn bytes_regex_parsed(expr: &Hir) -> ParseResult<Vec<u8>> {
match expr.kind() {
Empty => Ok(Just(vec![]).sboxed()),
- Literal(lit) => Ok(Just(match lit {
- Unicode(scalar) => to_bytes(*scalar),
- Byte(byte) => vec![*byte],
- })
- .sboxed()),
+ Literal(lit) => Ok(Just(lit.0.to_vec()).sboxed()),
Class(class) => Ok(match class {
hir::Class::Unicode(class) => {
@@ -192,19 +195,13 @@ pub fn bytes_regex_parsed(expr: &Hir) -> ParseResult<Vec<u8>> {
}
}),
- Repetition(rep) => Ok(vec(
- bytes_regex_parsed(&rep.hir)?,
- to_range(rep.kind.clone())?,
- )
- .prop_map(|parts| {
- parts.into_iter().fold(vec![], |mut acc, child| {
- acc.extend(child);
- acc
- })
- })
- .sboxed()),
+ Repetition(rep) => {
+ Ok(vec(bytes_regex_parsed(&rep.sub)?, to_range(rep)?)
+ .prop_map(|parts| parts.concat())
+ .sboxed())
+ }
- Group(group) => bytes_regex_parsed(&group.hir).map(|v| v.0),
+ Capture(capture) => bytes_regex_parsed(&capture.sub).map(|v| v.0),
Concat(subs) => {
let subs = ConcatIter {
@@ -232,12 +229,8 @@ pub fn bytes_regex_parsed(expr: &Hir) -> ParseResult<Vec<u8>> {
Ok(Union::try_new(subs.iter().map(bytes_regex_parsed))?.sboxed())
}
- Anchor(_) => {
- unsupported("line/text anchors not supported for string generation")
- }
-
- WordBoundary(_) => unsupported(
- "word boundary tests not supported for string generation",
+ Look(_) => unsupported(
+ "anchors/boundaries not supported for string generation",
),
}
.map(RegexGeneratorStrategy)
@@ -298,8 +291,7 @@ impl<'a, I: Iterator<Item = &'a Hir>> Iterator for ConcatIter<'a, I> {
while let Some(next) = self.iter.next() {
match next.kind() {
// A literal. Accumulate:
- Literal(Unicode(scalar)) => self.buf.extend(to_bytes(*scalar)),
- Literal(Byte(byte)) => self.buf.push(*byte),
+ Literal(literal) => self.buf.extend_from_slice(&literal.0),
// Encountered a non-literal.
_ => {
return if !self.buf.is_empty() {
@@ -324,31 +316,35 @@ impl<'a, I: Iterator<Item = &'a Hir>> Iterator for ConcatIter<'a, I> {
}
}
-fn to_range(kind: RepetitionKind) -> Result<SizeRange, Error> {
- Ok(match kind {
- ZeroOrOne => size_range(0..=1),
- ZeroOrMore => size_range(0..=32),
- OneOrMore => size_range(1..=32),
- Range(range) => match range {
- Exactly(count) if u32::MAX == count => {
- return unsupported(
- "Cannot have repetition of exactly u32::MAX",
- )
- }
- Exactly(count) => size_range(count as usize),
- AtLeast(min) => {
- let max = if min < u32::MAX as u32 / 2 {
- min as usize * 2
- } else {
- u32::MAX as usize
- };
- size_range((min as usize)..max)
- }
- Bounded(_, max) if u32::MAX == max => {
- return unsupported("Cannot have repetition max of u32::MAX")
- }
- Bounded(min, max) => size_range((min as usize)..(max as usize + 1)),
- },
+fn to_range(rep: &Repetition) -> Result<SizeRange, Error> {
+ Ok(match (rep.min, rep.max) {
+ // Zero or one
+ (0, Some(1)) => size_range(0..=1),
+ // Zero or more
+ (0, None) => size_range(0..=32),
+ // One or more
+ (1, None) => size_range(1..=32),
+ // Exact count of u32::MAX
+ (u32::MAX, Some(u32::MAX)) => {
+ return unsupported("Cannot have repetition of exactly u32::MAX");
+ }
+ // Exact count
+ (min, Some(max)) if min == max => size_range(min as usize),
+ // At least min
+ (min, None) => {
+ let max = if min < u32::MAX as u32 / 2 {
+ min as usize * 2
+ } else {
+ u32::MAX as usize
+ };
+ size_range((min as usize)..max)
+ }
+ // Bounded range with max of u32::MAX
+ (_, Some(u32::MAX)) => {
+ return unsupported("Cannot have repetition max of u32::MAX")
+ }
+ // Bounded range
+ (min, Some(max)) => size_range((min as usize)..(max as usize + 1)),
})
}
@@ -357,10 +353,6 @@ fn to_bytes(khar: char) -> Vec<u8> {
khar.encode_utf8(&mut buf).as_bytes().to_owned()
}
-fn regex_to_hir(pattern: &str) -> Result<Hir, Error> {
- Ok(Parser::new().parse(pattern)?)
-}
-
fn unsupported<T>(error: &'static str) -> Result<T, Error> {
Err(Error::UnsupportedRegex(error))
}
@@ -370,9 +362,17 @@ mod test {
use std::collections::HashSet;
use regex::Regex;
+ use regex::bytes::Regex as BytesRegex;
use super::*;
+ fn printable_ascii(v: &[u8]) -> String {
+ v.iter()
+ .flat_map(|c| std::ascii::escape_default(*c))
+ .map(|c| char::from_u32(c.into()).unwrap())
+ .collect()
+ }
+
fn do_test(
pattern: &str,
min_distinct: usize,
@@ -396,6 +396,29 @@ mod test {
);
}
+ fn do_test_bytes(
+ pattern: &str,
+ min_distinct: usize,
+ max_distinct: usize,
+ iterations: usize,
+ ) {
+ let generated = generate_byte_values_matching_regex(pattern, iterations);
+ assert!(
+ generated.len() >= min_distinct,
+ "Expected to generate at least {} strings, but only \
+ generated {}",
+ min_distinct,
+ generated.len()
+ );
+ assert!(
+ generated.len() <= max_distinct,
+ "Expected to generate at most {} strings, but \
+ generated {}",
+ max_distinct,
+ generated.len()
+ );
+ }
+
fn generate_values_matching_regex(
pattern: &str,
iterations: usize,
@@ -432,6 +455,42 @@ mod test {
generated
}
+ fn generate_byte_values_matching_regex(
+ pattern: &str,
+ iterations: usize,
+ ) -> HashSet<Vec<u8>> {
+ let rx = BytesRegex::new(pattern).unwrap();
+ let mut generated = HashSet::new();
+
+ let strategy = bytes_regex(pattern).unwrap();
+ let mut runner = TestRunner::deterministic();
+ for _ in 0..iterations {
+ let mut value = strategy.new_tree(&mut runner).unwrap();
+
+ loop {
+ let s = value.current();
+ let ok = if let Some(matsch) = rx.find(&s) {
+ 0 == matsch.start() && s.len() == matsch.end()
+ } else {
+ false
+ };
+ if !ok {
+ panic!(
+ "Generated string {:?} which does not match {:?}",
+ printable_ascii(&s), pattern
+ );
+ }
+
+ generated.insert(s);
+
+ if !value.simplify() {
+ break;
+ }
+ }
+ }
+ generated
+ }
+
#[test]
fn test_case_insensitive_produces_all_available_values() {
let mut expected: HashSet<String> = HashSet::new();
@@ -445,6 +504,7 @@ mod test {
#[test]
fn test_literal() {
do_test("foo", 1, 1, 8);
+ do_test_bytes("foo", 1, 1, 8);
}
#[test]
@@ -455,36 +515,43 @@ mod test {
#[test]
fn test_alternation() {
do_test("foo|bar|baz", 3, 3, 16);
+ do_test_bytes("foo|bar|baz", 3, 3, 16);
}
#[test]
- fn test_repitition() {
+ fn test_repetition() {
do_test("a{0,8}", 9, 9, 64);
+ do_test_bytes("a{0,8}", 9, 9, 64);
}
#[test]
fn test_question() {
do_test("a?", 2, 2, 16);
+ do_test_bytes("a?", 2, 2, 16);
}
#[test]
fn test_star() {
do_test("a*", 33, 33, 256);
+ do_test_bytes("a*", 33, 33, 256);
}
#[test]
fn test_plus() {
do_test("a+", 32, 32, 256);
+ do_test_bytes("a+", 32, 32, 256);
}
#[test]
fn test_n_to_range() {
do_test("a{4,}", 4, 4, 64);
+ do_test_bytes("a{4,}", 4, 4, 64);
}
#[test]
fn test_concatenation() {
do_test("(foo|bar)(xyzzy|plugh)", 4, 4, 32);
+ do_test_bytes("(foo|bar)(xyzzy|plugh)", 4, 4, 32);
}
#[test]
@@ -505,6 +572,7 @@ mod test {
#[test]
fn test_dot_s() {
do_test("(?s).", 200, 65536, 256);
+ do_test_bytes("(?s-u).", 256, 256, 2048);
}
#[test]
@@ -512,6 +580,16 @@ mod test {
do_test("\\d+", 1, 65536, 256);
}
+ #[test]
+ fn test_non_utf8_byte_strings() {
+ do_test_bytes(r"(?-u)[\xC0-\xFF]\x20", 64, 64, 512);
+ do_test_bytes(r"(?-u)\x20[\x80-\xBF]", 64, 64, 512);
+ do_test_bytes(r#"(?x-u)
+ \xed (( ( \xa0\x80 | \xad\xbf | \xae\x80 | \xaf\xbf )
+ ( \xed ( \xb0\x80 | \xbf\xbf ) )? )
+ | \xb0\x80 | \xbe\x80 | \xbf\xbf )"#, 15, 15, 120);
+ }
+
fn assert_send_and_sync<T: Send + Sync>(_: T) {}
#[test]