diff options
Diffstat (limited to 'third_party/rust/unicode-segmentation/src/test.rs')
-rw-r--r-- | third_party/rust/unicode-segmentation/src/test.rs | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/third_party/rust/unicode-segmentation/src/test.rs b/third_party/rust/unicode-segmentation/src/test.rs new file mode 100644 index 0000000000..bc642a50da --- /dev/null +++ b/third_party/rust/unicode-segmentation/src/test.rs @@ -0,0 +1,247 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use super::UnicodeSegmentation; + +use std::prelude::v1::*; + +#[test] +fn test_graphemes() { + use crate::testdata::{TEST_DIFF, TEST_SAME}; + + pub const EXTRA_DIFF: &'static [( + &'static str, + &'static [&'static str], + &'static [&'static str], + )] = &[ + // Official test suite doesn't include two Prepend chars between two other chars. + ( + "\u{20}\u{600}\u{600}\u{20}", + &["\u{20}", "\u{600}\u{600}\u{20}"], + &["\u{20}", "\u{600}", "\u{600}", "\u{20}"], + ), + // Test for Prepend followed by two Any chars + ( + "\u{600}\u{20}\u{20}", + &["\u{600}\u{20}", "\u{20}"], + &["\u{600}", "\u{20}", "\u{20}"], + ), + ]; + + pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[ + // family emoji (more than two emoji joined by ZWJ) + ( + "\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}", + &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"], + ), + // cartwheel emoji followed by two fitzpatrick skin tone modifiers + // (test case from issue #19) + ( + "\u{1F938}\u{1F3FE}\u{1F3FE}", + &["\u{1F938}\u{1F3FE}\u{1F3FE}"], + ), + ]; + + for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) { + // test forward iterator + assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned())); + assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned())); + + // test reverse iterator + assert!(UnicodeSegmentation::graphemes(s, true) + .rev() + .eq(g.iter().rev().cloned())); + assert!(UnicodeSegmentation::graphemes(s, false) + .rev() + .eq(g.iter().rev().cloned())); + } + + for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) { + // test forward iterator + assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned())); + assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned())); + + // test reverse iterator + assert!(UnicodeSegmentation::graphemes(s, true) + .rev() + .eq(gt.iter().rev().cloned())); + assert!(UnicodeSegmentation::graphemes(s, false) + .rev() + .eq(gf.iter().rev().cloned())); + } + + // test the indices iterators + let s = "a̐éö̲\r\n"; + let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>(); + let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]; + assert_eq!(gr_inds, b); + let gr_inds = UnicodeSegmentation::grapheme_indices(s, true) + .rev() + .collect::<Vec<(usize, &str)>>(); + let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")]; + assert_eq!(gr_inds, b); + let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true); + { + let gr_inds = gr_inds_iter.by_ref(); + let e1 = gr_inds.size_hint(); + assert_eq!(e1, (1, Some(13))); + let c = gr_inds.count(); + assert_eq!(c, 4); + } + let e2 = gr_inds_iter.size_hint(); + assert_eq!(e2, (0, Some(0))); + + // make sure the reverse iterator does the right thing with "\n" at beginning of string + let s = "\n\r\n\r"; + let gr = UnicodeSegmentation::graphemes(s, true) + .rev() + .collect::<Vec<&str>>(); + let b: &[_] = &["\r", "\r\n", "\n"]; + assert_eq!(gr, b); +} + +#[test] +fn test_words() { + use crate::testdata::TEST_WORD; + + // Unicode's official tests don't really test longer chains of flag emoji + // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ + const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[ + ( + "🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦🇴", + &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦🇴"], + ), + ("🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦", &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"]), + ( + "🇦a🇫🇦🇽a🇦🇱🇩🇿🇦🇸🇦🇩🇦", + &["🇦", "a", "🇫🇦", "🇽", "a", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"], + ), + ( + "\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}", + &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"], + ), + ("😌👎🏼", &["😌", "👎🏼"]), + // perhaps wrong, spaces should not be included? + ("hello world", &["hello", " ", "world"]), + ("🇨🇦🇨🇭🇿🇲🇿 hi", &["🇨🇦", "🇨🇭", "🇿🇲", "🇿", " ", "hi"]), + ]; + for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) { + macro_rules! assert_ { + ($test:expr, $exp:expr, $name:expr) => { + // collect into vector for better diagnostics in failure case + let testing = $test.collect::<Vec<_>>(); + let expected = $exp.collect::<Vec<_>>(); + assert_eq!( + testing, expected, + "{} test for testcase ({:?}, {:?}) failed.", + $name, s, w + ) + }; + } + // test forward iterator + assert_!( + s.split_word_bounds(), + w.iter().cloned(), + "Forward word boundaries" + ); + + // test reverse iterator + assert_!( + s.split_word_bounds().rev(), + w.iter().rev().cloned(), + "Reverse word boundaries" + ); + + // generate offsets from word string lengths + let mut indices = vec![0]; + for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| { + *t += n; + Some(*t) + }) { + indices.push(i); + } + indices.pop(); + let indices = indices; + + // test forward indices iterator + assert_!( + s.split_word_bound_indices().map(|(l, _)| l), + indices.iter().cloned(), + "Forward word indices" + ); + + // test backward indices iterator + assert_!( + s.split_word_bound_indices().rev().map(|(l, _)| l), + indices.iter().rev().cloned(), + "Reverse word indices" + ); + } +} + +#[test] +fn test_sentences() { + use crate::testdata::TEST_SENTENCE; + + for &(s, w) in TEST_SENTENCE.iter() { + macro_rules! assert_ { + ($test:expr, $exp:expr, $name:expr) => { + // collect into vector for better diagnostics in failure case + let testing = $test.collect::<Vec<_>>(); + let expected = $exp.collect::<Vec<_>>(); + assert_eq!( + testing, expected, + "{} test for testcase ({:?}, {:?}) failed.", + $name, s, w + ) + }; + } + + assert_!( + s.split_sentence_bounds(), + w.iter().cloned(), + "Forward sentence boundaries" + ); + } +} + +quickcheck! { + fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool { + let a = s.graphemes(true).collect::<Vec<_>>(); + let mut b = s.graphemes(true).rev().collect::<Vec<_>>(); + b.reverse(); + a == b + } + + fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool { + let a = s.graphemes(false).collect::<Vec<_>>(); + let mut b = s.graphemes(false).rev().collect::<Vec<_>>(); + b.reverse(); + a == b + } + + fn quickcheck_join_graphemes(s: String) -> bool { + let a = s.graphemes(true).collect::<String>(); + let b = s.graphemes(false).collect::<String>(); + a == s && b == s + } + + fn quickcheck_forward_reverse_words(s: String) -> bool { + let a = s.split_word_bounds().collect::<Vec<_>>(); + let mut b = s.split_word_bounds().rev().collect::<Vec<_>>(); + b.reverse(); + a == b + } + + fn quickcheck_join_words(s: String) -> bool { + let a = s.split_word_bounds().collect::<String>(); + a == s + } +} |