1
0
Fork 0
firefox/third_party/rust/icu_normalizer/tests/tests.rs
Daniel Baumann 5e9a113729
Adding upstream version 140.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-25 09:37:52 +02:00

1549 lines
51 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use icu_normalizer::properties::CanonicalCombiningClassMap;
use icu_normalizer::properties::CanonicalComposition;
use icu_normalizer::properties::CanonicalDecomposition;
use icu_normalizer::properties::Decomposed;
use icu_normalizer::uts46::Uts46Mapper;
use icu_normalizer::ComposingNormalizer;
use icu_normalizer::DecomposingNormalizer;
#[test]
fn test_nfd_basic() {
let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
assert_eq!(normalizer.normalize("ä"), "a\u{0308}");
assert_eq!(normalizer.normalize("Ä"), "A\u{0308}");
assert_eq!(normalizer.normalize(""), "e\u{0323}\u{0302}");
assert_eq!(normalizer.normalize(""), "E\u{0323}\u{0302}");
assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}");
assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign
assert_eq!(normalizer.normalize("ベ"), "ベ"); // half-width unchanged
assert_eq!(normalizer.normalize("ペ"), "ペ"); // half-width unchanged
assert_eq!(normalizer.normalize(""), ""); // ligature unchanged
assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{FDFA}"); // ligature unchanged
assert_eq!(normalizer.normalize(""), ""); // parenthetical unchanged
assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript
}
#[test]
fn test_nfkd_basic() {
let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
assert_eq!(normalizer.normalize("ä"), "a\u{0308}");
assert_eq!(normalizer.normalize("Ä"), "A\u{0308}");
assert_eq!(normalizer.normalize(""), "e\u{0323}\u{0302}");
assert_eq!(normalizer.normalize(""), "E\u{0323}\u{0302}");
assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}");
assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign
assert_eq!(normalizer.normalize("ベ"), "\u{3099}"); // half-width to full-width
assert_eq!(normalizer.normalize("ペ"), "\u{309A}"); // half-width to full-width
assert_eq!(normalizer.normalize(""), "fi"); // ligature expanded
assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{635}\u{644}\u{649} \u{627}\u{644}\u{644}\u{647} \u{639}\u{644}\u{64A}\u{647} \u{648}\u{633}\u{644}\u{645}");
// ligature expanded
assert_eq!(normalizer.normalize(""), "(\u{1100}\u{1161})"); // parenthetical expanded
assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript
}
#[test]
fn test_nfc_basic() {
let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
assert_eq!(normalizer.normalize("a\u{0308}"), "ä");
assert_eq!(normalizer.normalize("A\u{0308}"), "Ä");
assert_eq!(normalizer.normalize("e\u{0323}\u{0302}"), "");
assert_eq!(normalizer.normalize("E\u{0323}\u{0302}"), "");
assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); // Composition exclusion
assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign
assert_eq!(normalizer.normalize("ベ"), "ベ"); // half-width unchanged
assert_eq!(normalizer.normalize("ペ"), "ペ"); // half-width unchanged
assert_eq!(normalizer.normalize(""), ""); // ligature unchanged
assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{FDFA}"); // ligature unchanged
assert_eq!(normalizer.normalize(""), ""); // parenthetical unchanged
assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript
}
#[test]
fn test_nfkc_basic() {
let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfkc();
assert_eq!(normalizer.normalize("a\u{0308}"), "ä");
assert_eq!(normalizer.normalize("A\u{0308}"), "Ä");
assert_eq!(normalizer.normalize("e\u{0323}\u{0302}"), "");
assert_eq!(normalizer.normalize("E\u{0323}\u{0302}"), "");
assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); // Composition exclusion
assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign
assert_eq!(normalizer.normalize("ベ"), ""); // half-width to full-width, the compose
assert_eq!(normalizer.normalize("ペ"), ""); // half-width to full-width, the compose
assert_eq!(normalizer.normalize(""), "fi"); // ligature expanded
assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}");
// ligature expanded
assert_eq!(normalizer.normalize(""), "(가)"); // parenthetical expanded and partially recomposed
assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript
}
#[test]
fn test_uts46_map_normalize() {
let mapper: Uts46Mapper = Uts46Mapper::new();
assert_eq!(
mapper
.map_normalize("a\u{0308}".chars())
.collect::<String>(),
"ä"
);
assert_eq!(
mapper
.map_normalize("A\u{0308}".chars())
.collect::<String>(),
"ä"
);
assert_eq!(
mapper
.map_normalize("e\u{0323}\u{0302}".chars())
.collect::<String>(),
""
);
assert_eq!(
mapper
.map_normalize("E\u{0323}\u{0302}".chars())
.collect::<String>(),
""
);
assert_eq!(
mapper.map_normalize("𝅗𝅥".chars()).collect::<String>(),
"𝅗\u{1D165}"
); // Composition exclusion
assert_eq!(
mapper.map_normalize("\u{2126}".chars()).collect::<String>(),
"ω"
); // ohm sign
assert_eq!(mapper.map_normalize("ベ".chars()).collect::<String>(), ""); // half-width to full-width, the compose
assert_eq!(mapper.map_normalize("ペ".chars()).collect::<String>(), ""); // half-width to full-width, the compose
assert_eq!(mapper.map_normalize("".chars()).collect::<String>(), "fi"); // ligature expanded
assert_eq!(mapper.map_normalize("\u{FDFA}".chars()).collect::<String>(), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}");
// ligature expanded
assert_eq!(
mapper.map_normalize("".chars()).collect::<String>(),
"(가)"
); // parenthetical expanded and partially recomposed
// Deviations (UTS 46, 6 Mapping Table Derivation, Step 4)
assert_eq!(
mapper.map_normalize("\u{200C}".chars()).collect::<String>(),
"\u{200C}"
);
assert_eq!(
mapper.map_normalize("\u{200D}".chars()).collect::<String>(),
"\u{200D}"
);
assert_eq!(mapper.map_normalize("ß".chars()).collect::<String>(), "ß");
assert_eq!(mapper.map_normalize("ς".chars()).collect::<String>(), "ς");
// Iota subscript
assert_eq!(
mapper.map_normalize("\u{0345}".chars()).collect::<String>(),
"ι"
);
// Disallowed
assert_eq!(
mapper.map_normalize("\u{061C}".chars()).collect::<String>(),
"\u{FFFD}"
);
// Ignored
assert_eq!(
mapper
.map_normalize("a\u{180B}b".chars())
.collect::<String>(),
"ab"
);
}
#[test]
fn test_uts46_normalize_validate() {
let mapper: Uts46Mapper = Uts46Mapper::new();
assert_eq!(
mapper
.normalize_validate("a\u{0308}".chars())
.collect::<String>(),
"ä"
);
assert_eq!(
mapper
.normalize_validate("A\u{0308}".chars())
.collect::<String>(),
"ä"
);
assert_eq!(
mapper
.normalize_validate("e\u{0323}\u{0302}".chars())
.collect::<String>(),
""
);
assert_eq!(
mapper
.normalize_validate("E\u{0323}\u{0302}".chars())
.collect::<String>(),
""
);
assert_eq!(
mapper.normalize_validate("𝅗𝅥".chars()).collect::<String>(),
"𝅗\u{1D165}"
); // Composition exclusion
assert_eq!(
mapper
.normalize_validate("\u{2126}".chars())
.collect::<String>(),
"ω"
); // ohm sign
assert_eq!(
mapper.normalize_validate("ベ".chars()).collect::<String>(),
""
); // half-width to full-width, the compose
assert_eq!(
mapper.normalize_validate("ペ".chars()).collect::<String>(),
""
); // half-width to full-width, the compose
assert_eq!(
mapper.normalize_validate("".chars()).collect::<String>(),
"fi"
); // ligature expanded
assert_eq!(mapper.normalize_validate("\u{FDFA}".chars()).collect::<String>(), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}");
// ligature expanded
assert_eq!(
mapper.normalize_validate("".chars()).collect::<String>(),
"(가)"
); // parenthetical expanded and partially recomposed
// Deviations (UTS 46, 6 Mapping Table Derivation, Step 4)
assert_eq!(
mapper
.normalize_validate("\u{200C}".chars())
.collect::<String>(),
"\u{200C}"
);
assert_eq!(
mapper
.normalize_validate("\u{200D}".chars())
.collect::<String>(),
"\u{200D}"
);
assert_eq!(
mapper.normalize_validate("ß".chars()).collect::<String>(),
"ß"
);
assert_eq!(
mapper.normalize_validate("ς".chars()).collect::<String>(),
"ς"
);
// Iota subscript
assert_eq!(
mapper
.normalize_validate("\u{0345}".chars())
.collect::<String>(),
"ι"
);
// Disallowed
assert_eq!(
mapper
.normalize_validate("\u{061C}".chars())
.collect::<String>(),
"\u{FFFD}"
);
// Ignored
assert_eq!(
mapper
.normalize_validate("a\u{180B}b".chars())
.collect::<String>(),
"a\u{FFFD}b"
);
}
type StackString = arraystring::ArrayString<arraystring::typenum::U48>;
#[test]
fn test_nfd_str_to() {
let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let mut buf = StackString::new();
assert!(normalizer.normalize_to("ä", &mut buf).is_ok());
assert_eq!(&buf, "a\u{0308}");
buf.clear();
assert!(normalizer.normalize_to("", &mut buf).is_ok());
assert_eq!(&buf, "e\u{0323}\u{0302}");
}
#[test]
fn test_nfd_utf8_to() {
let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let mut buf = StackString::new();
assert!(normalizer
.normalize_utf8_to("ä".as_bytes(), &mut buf)
.is_ok());
assert_eq!(&buf, "a\u{0308}");
buf.clear();
assert!(normalizer
.normalize_utf8_to("".as_bytes(), &mut buf)
.is_ok());
assert_eq!(&buf, "e\u{0323}\u{0302}");
}
type StackVec = arrayvec::ArrayVec<u16, 32>;
#[test]
fn test_nfd_utf16_to() {
let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let mut buf = StackVec::new();
assert!(normalizer
.normalize_utf16_to([0x00E4u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(&buf, [0x0061u16, 0x0308u16].as_slice());
buf.clear();
assert!(normalizer
.normalize_utf16_to([0x1EC7u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(&buf, [0x0065u16, 0x0323u16, 0x0302u16].as_slice());
}
#[test]
fn test_nfc_str_to() {
let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let mut buf = StackString::new();
assert!(normalizer.normalize_to("a\u{0308}", &mut buf).is_ok());
assert_eq!(&buf, "ä");
buf.clear();
assert!(normalizer
.normalize_to("e\u{0323}\u{0302}", &mut buf)
.is_ok());
assert_eq!(&buf, "");
}
#[test]
fn test_nfc_utf8_to() {
let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let mut buf = StackString::new();
assert!(normalizer
.normalize_utf8_to("a\u{0308}".as_bytes(), &mut buf)
.is_ok());
assert_eq!(&buf, "ä");
buf.clear();
assert!(normalizer
.normalize_utf8_to("e\u{0323}\u{0302}".as_bytes(), &mut buf)
.is_ok());
assert_eq!(&buf, "");
}
#[test]
fn test_nfc_utf16_to() {
let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let mut buf = StackVec::new();
assert!(normalizer
.normalize_utf16_to([0x0061u16, 0x0308u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(&buf, [0x00E4u16].as_slice());
buf.clear();
assert!(normalizer
.normalize_utf16_to([0x0065u16, 0x0323u16, 0x0302u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(&buf, [0x1EC7u16].as_slice());
}
#[test]
fn test_nfc_utf8_to_errors() {
let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let mut buf = StackString::new();
assert!(normalizer
.normalize_utf8_to(b"\xFFa\xCC\x88\xFF", &mut buf)
.is_ok());
assert_eq!(&buf, "\u{FFFD}ä\u{FFFD}");
buf.clear();
assert!(normalizer
.normalize_utf8_to(b"\x80e\xCC\xA3\xCC\x82\x80", &mut buf)
.is_ok());
assert_eq!(&buf, "\u{FFFD}\u{FFFD}");
buf.clear();
assert!(normalizer
.normalize_utf8_to(b"aaa\xFFaaa\xFFaaa", &mut buf)
.is_ok());
assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa");
buf.clear();
assert!(normalizer
.normalize_utf8_to(b"aaa\xE2\x98aaa\xE2\x98aaa", &mut buf)
.is_ok());
assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa");
}
#[test]
fn test_nfd_utf8_to_errors() {
let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let mut buf = StackString::new();
assert!(normalizer
.normalize_utf8_to(b"\xFF\xC3\xA4\xFF", &mut buf)
.is_ok());
assert_eq!(&buf, "\u{FFFD}a\u{0308}\u{FFFD}");
buf.clear();
assert!(normalizer
.normalize_utf8_to(b"\x80\xE1\xBB\x87\x80", &mut buf)
.is_ok());
assert_eq!(&buf, "\u{FFFD}e\u{0323}\u{0302}\u{FFFD}");
buf.clear();
assert!(normalizer
.normalize_utf8_to(b"aaa\xFFaaa\xFFaaa", &mut buf)
.is_ok());
assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa");
buf.clear();
assert!(normalizer
.normalize_utf8_to(b"aaa\xE2\x98aaa\xE2\x98aaa", &mut buf)
.is_ok());
assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa");
}
#[test]
fn test_nfc_utf16_to_errors() {
let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let mut buf = StackVec::new();
assert!(normalizer
.normalize_utf16_to([0xD800u16, 0x0061u16, 0x0308u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(&buf, [0xFFFDu16, 0x00E4u16].as_slice());
buf.clear();
assert!(normalizer
.normalize_utf16_to([0xDC00u16, 0x0061u16, 0x0308u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(&buf, [0xFFFDu16, 0x00E4u16].as_slice());
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xD800u16, 0x0061u16, 0x0308u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(&buf, [0x0061u16, 0xFFFDu16, 0x00E4u16].as_slice());
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xDC00u16, 0x0061u16, 0x0308u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(&buf, [0x0061u16, 0xFFFDu16, 0x00E4u16].as_slice());
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xD800u16, 0x0061u16, 0x0308u16, 0xD800u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x00E4u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xDC00u16, 0x0061u16, 0x0308u16, 0xDC00u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x00E4u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xD800u16, 0x0061u16, 0x0061u16, 0xD800u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xDC00u16, 0x0061u16, 0x0061u16, 0xDC00u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xD800u16, 0x0308u16, 0xD800u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xDC00u16, 0x0308u16, 0xDC00u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice()
);
}
#[test]
fn test_nfd_utf16_to_errors() {
let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let mut buf = StackVec::new();
assert!(normalizer
.normalize_utf16_to([0xD800u16, 0x00E4u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(&buf, [0xFFFDu16, 0x0061u16, 0x0308u16].as_slice());
buf.clear();
assert!(normalizer
.normalize_utf16_to([0xDC00u16, 0x00E4u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(&buf, [0xFFFDu16, 0x0061u16, 0x0308u16].as_slice());
buf.clear();
assert!(normalizer
.normalize_utf16_to([0x0061u16, 0xD800u16, 0x00E4u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to([0x0061u16, 0xDC00u16, 0x00E4u16].as_slice(), &mut buf)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xD800u16, 0x00E4u16, 0xD800u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xDC00u16, 0x00E4u16, 0xDC00u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xD800u16, 0x0061u16, 0x0061u16, 0xD800u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xDC00u16, 0x0061u16, 0x0061u16, 0xDC00u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xD800u16, 0x0308u16, 0xD800u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice()
);
buf.clear();
assert!(normalizer
.normalize_utf16_to(
[0x0061u16, 0xDC00u16, 0x0308u16, 0xDC00u16].as_slice(),
&mut buf
)
.is_ok());
assert_eq!(
&buf,
[0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice()
);
}
use atoi::FromRadix16;
/// Parse five semicolon-terminated strings consisting of space-separated hexadecimal scalar values
fn parse_hex(mut hexes: &[u8]) -> [StackString; 5] {
let mut strings = [
StackString::new(),
StackString::new(),
StackString::new(),
StackString::new(),
StackString::new(),
];
let mut current = 0;
loop {
let (scalar, mut offset) = u32::from_radix_16(hexes);
let c = core::char::from_u32(scalar).unwrap();
strings[current].try_push(c).unwrap();
match hexes[offset] {
b';' => {
current += 1;
if current == strings.len() {
return strings;
}
offset += 1;
}
b' ' => {
offset += 1;
}
_ => {
panic!("Bad format: Garbage");
}
}
hexes = &hexes[offset..];
}
}
#[test]
fn test_conformance() {
let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let nfkd: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let nfc: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfkc: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let mut prev = 0u32;
let mut part = 0u8;
let data = include_bytes!("data/NormalizationTest.txt");
let lines = data.split(|b| b == &b'\n');
for line in lines {
if line.is_empty() {
continue;
}
if line.starts_with(b"#") {
continue;
}
if line.starts_with(&b"@Part"[..]) {
part = line[5] - b'0';
if part == 2 {
for u in prev + 1..=0x10FFFF {
if let Some(c) = char::from_u32(u) {
assert!(nfd
.normalize_iter(core::iter::once(c))
.eq(core::iter::once(c)));
assert!(nfkd
.normalize_iter(core::iter::once(c))
.eq(core::iter::once(c)));
assert!(nfc
.normalize_iter(core::iter::once(c))
.eq(core::iter::once(c)));
assert!(nfkc
.normalize_iter(core::iter::once(c))
.eq(core::iter::once(c)));
}
}
}
continue;
}
let strings = parse_hex(line);
// 0: source
// 1: NFC
// 2: NFD
// 3: NFKC
// 4: NFKD
if part == 1 {
let mut iter = strings[0].chars();
let current = iter.next().unwrap();
assert_eq!(iter.next(), None);
let current_u = u32::from(current);
for u in prev + 1..current_u {
if let Some(c) = char::from_u32(u) {
assert!(nfd
.normalize_iter(core::iter::once(c))
.eq(core::iter::once(c)));
assert!(nfkd
.normalize_iter(core::iter::once(c))
.eq(core::iter::once(c)));
assert!(nfc
.normalize_iter(core::iter::once(c))
.eq(core::iter::once(c)));
assert!(nfkc
.normalize_iter(core::iter::once(c))
.eq(core::iter::once(c)));
}
}
prev = current_u;
}
// NFC
assert!(nfc
.normalize_iter(strings[0].chars())
.eq(strings[1].chars()));
assert!(nfc
.normalize_iter(strings[1].chars())
.eq(strings[1].chars()));
assert!(nfc
.normalize_iter(strings[2].chars())
.eq(strings[1].chars()));
assert!(nfc
.normalize_iter(strings[3].chars())
.eq(strings[3].chars()));
assert!(nfc
.normalize_iter(strings[4].chars())
.eq(strings[3].chars()));
// NFD
assert!(nfd
.normalize_iter(strings[0].chars())
.eq(strings[2].chars()));
assert!(nfd
.normalize_iter(strings[1].chars())
.eq(strings[2].chars()));
assert!(nfd
.normalize_iter(strings[2].chars())
.eq(strings[2].chars()));
assert!(nfd
.normalize_iter(strings[3].chars())
.eq(strings[4].chars()));
assert!(nfd
.normalize_iter(strings[4].chars())
.eq(strings[4].chars()));
// NFKC
assert!(nfkc
.normalize_iter(strings[0].chars())
.eq(strings[3].chars()));
assert!(nfkc
.normalize_iter(strings[1].chars())
.eq(strings[3].chars()));
assert!(nfkc
.normalize_iter(strings[2].chars())
.eq(strings[3].chars()));
assert!(nfkc
.normalize_iter(strings[3].chars())
.eq(strings[3].chars()));
assert!(nfkc
.normalize_iter(strings[4].chars())
.eq(strings[3].chars()));
// NFKD
assert!(nfkd
.normalize_iter(strings[0].chars())
.eq(strings[4].chars()));
assert!(nfkd
.normalize_iter(strings[1].chars())
.eq(strings[4].chars()));
assert!(nfkd
.normalize_iter(strings[2].chars())
.eq(strings[4].chars()));
assert!(nfkd
.normalize_iter(strings[3].chars())
.eq(strings[4].chars()));
assert!(nfkd
.normalize_iter(strings[4].chars())
.eq(strings[4].chars()));
}
}
// Commented out, because we don't currently have a way to force a no-op set for testing.
// #[test]
// fn test_hangul() {
// use icu_collections::codepointinvlist::{CodePointSet, CodePointSetBuilder};
// use zerofrom::ZeroFrom;
// let builder = CodePointSetBuilder::new();
// let set: CodePointSet = builder.build();
// let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
// {
// let mut norm_iter = normalizer.normalize_iter("A\u{AC00}\u{11A7}".chars());
// // Pessimize passthrough to avoid hiding bugs.
// norm_iter
// .decomposition
// .potential_passthrough_and_not_backward_combining = Some(ZeroFrom::zero_from(&set));
// assert!(norm_iter.eq("A\u{AC00}\u{11A7}".chars()));
// }
// {
// let mut norm_iter = normalizer.normalize_iter("A\u{AC00}\u{11C2}".chars());
// // Pessimize passthrough to avoid hiding bugs.
// norm_iter
// .decomposition
// .potential_passthrough_and_not_backward_combining = Some(ZeroFrom::zero_from(&set));
// assert!(norm_iter.eq("A\u{AC1B}".chars()));
// }
// }
fn str_to_utf16(s: &str, sink: &mut StackVec) {
sink.clear();
let mut buf = [0u16; 2];
for c in s.chars() {
sink.try_extend_from_slice(c.encode_utf16(&mut buf))
.unwrap();
}
}
fn char_to_utf16(c: char, sink: &mut StackVec) {
sink.clear();
let mut buf = [0u16; 2];
sink.try_extend_from_slice(c.encode_utf16(&mut buf))
.unwrap();
}
fn str_to_str(s: &str, sink: &mut StackString) {
sink.clear();
sink.try_push_str(s).unwrap();
}
fn char_to_str(c: char, sink: &mut StackString) {
sink.clear();
sink.try_push(c).unwrap();
}
#[test]
fn test_conformance_utf16() {
let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let nfkd: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let nfc: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfkc: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let mut input = StackVec::new();
let mut normalized = StackVec::new();
let mut expected = StackVec::new();
let mut prev = 0u32;
let mut part = 0u8;
let data = include_bytes!("data/NormalizationTest.txt");
let lines = data.split(|b| b == &b'\n');
for line in lines {
if line.is_empty() {
continue;
}
if line.starts_with(b"#") {
continue;
}
if line.starts_with(&b"@Part"[..]) {
part = line[5] - b'0';
if part == 2 {
for u in prev + 1..=0x10FFFF {
if let Some(c) = char::from_u32(u) {
normalized.clear();
char_to_utf16(c, &mut input);
assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_utf16(c, &mut input);
assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_utf16(c, &mut input);
assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_utf16(c, &mut input);
assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &input);
}
}
}
continue;
}
let strings = parse_hex(line);
// 0: source
// 1: NFC
// 2: NFD
// 3: NFKC
// 4: NFKD
if part == 1 {
let mut iter = strings[0].chars();
let current = iter.next().unwrap();
assert_eq!(iter.next(), None);
let current_u = u32::from(current);
for u in prev + 1..current_u {
if let Some(c) = char::from_u32(u) {
normalized.clear();
char_to_utf16(c, &mut input);
assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_utf16(c, &mut input);
assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_utf16(c, &mut input);
assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_utf16(c, &mut input);
assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &input);
}
}
prev = current_u;
}
// NFC
normalized.clear();
str_to_utf16(&strings[0], &mut input);
str_to_utf16(&strings[1], &mut expected);
assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[1], &mut input);
str_to_utf16(&strings[1], &mut expected);
assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[2], &mut input);
str_to_utf16(&strings[1], &mut expected);
assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[3], &mut input);
str_to_utf16(&strings[3], &mut expected);
assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[4], &mut input);
str_to_utf16(&strings[3], &mut expected);
assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
// NFD
normalized.clear();
str_to_utf16(&strings[0], &mut input);
str_to_utf16(&strings[2], &mut expected);
assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[1], &mut input);
str_to_utf16(&strings[2], &mut expected);
assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[2], &mut input);
str_to_utf16(&strings[2], &mut expected);
assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[3], &mut input);
str_to_utf16(&strings[4], &mut expected);
assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[4], &mut input);
str_to_utf16(&strings[4], &mut expected);
assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
// NFKC
normalized.clear();
str_to_utf16(&strings[0], &mut input);
str_to_utf16(&strings[3], &mut expected);
assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[1], &mut input);
str_to_utf16(&strings[3], &mut expected);
assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[2], &mut input);
str_to_utf16(&strings[3], &mut expected);
assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[3], &mut input);
str_to_utf16(&strings[3], &mut expected);
assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[4], &mut input);
str_to_utf16(&strings[3], &mut expected);
assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
// NFKD
normalized.clear();
str_to_utf16(&strings[0], &mut input);
str_to_utf16(&strings[4], &mut expected);
assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[1], &mut input);
str_to_utf16(&strings[4], &mut expected);
assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[2], &mut input);
str_to_utf16(&strings[4], &mut expected);
assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[3], &mut input);
str_to_utf16(&strings[4], &mut expected);
assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_utf16(&strings[4], &mut input);
str_to_utf16(&strings[4], &mut expected);
assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
assert_eq!(&normalized, &expected);
}
}
#[test]
fn test_conformance_utf8() {
let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let nfkd: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let nfc: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfkc: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let mut input = StackString::new();
let mut normalized = StackString::new();
let mut expected = StackString::new();
let mut prev = 0u32;
let mut part = 0u8;
let data = include_bytes!("data/NormalizationTest.txt");
let lines = data.split(|b| b == &b'\n');
for line in lines {
if line.is_empty() {
continue;
}
if line.starts_with(b"#") {
continue;
}
if line.starts_with(&b"@Part"[..]) {
part = line[5] - b'0';
if part == 2 {
for u in prev + 1..=0x10FFFF {
if let Some(c) = char::from_u32(u) {
normalized.clear();
char_to_str(c, &mut input);
assert!(nfd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_str(c, &mut input);
assert!(nfkd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_str(c, &mut input);
assert!(nfc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_str(c, &mut input);
assert!(nfkc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &input);
}
}
}
continue;
}
let strings = parse_hex(line);
// 0: source
// 1: NFC
// 2: NFD
// 3: NFKC
// 4: NFKD
if part == 1 {
let mut iter = strings[0].chars();
let current = iter.next().unwrap();
assert_eq!(iter.next(), None);
let current_u = u32::from(current);
for u in prev + 1..current_u {
if let Some(c) = char::from_u32(u) {
normalized.clear();
char_to_str(c, &mut input);
assert!(nfd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_str(c, &mut input);
assert!(nfkd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_str(c, &mut input);
assert!(nfc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &input);
normalized.clear();
char_to_str(c, &mut input);
assert!(nfkc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &input);
}
}
prev = current_u;
}
// NFC
normalized.clear();
str_to_str(&strings[0], &mut input);
str_to_str(&strings[1], &mut expected);
assert!(nfc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[1], &mut input);
str_to_str(&strings[1], &mut expected);
assert!(nfc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[2], &mut input);
str_to_str(&strings[1], &mut expected);
assert!(nfc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[3], &mut input);
str_to_str(&strings[3], &mut expected);
assert!(nfc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[4], &mut input);
str_to_str(&strings[3], &mut expected);
assert!(nfc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
// NFD
normalized.clear();
str_to_str(&strings[0], &mut input);
str_to_str(&strings[2], &mut expected);
assert!(nfd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[1], &mut input);
str_to_str(&strings[2], &mut expected);
assert!(nfd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[2], &mut input);
str_to_str(&strings[2], &mut expected);
assert!(nfd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[3], &mut input);
str_to_str(&strings[4], &mut expected);
assert!(nfd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[4], &mut input);
str_to_str(&strings[4], &mut expected);
assert!(nfd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
// NFKC
normalized.clear();
str_to_str(&strings[0], &mut input);
str_to_str(&strings[3], &mut expected);
assert!(nfkc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[1], &mut input);
str_to_str(&strings[3], &mut expected);
assert!(nfkc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[2], &mut input);
str_to_str(&strings[3], &mut expected);
assert!(nfkc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[3], &mut input);
str_to_str(&strings[3], &mut expected);
assert!(nfkc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[4], &mut input);
str_to_str(&strings[3], &mut expected);
assert!(nfkc
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
// NFKD
normalized.clear();
str_to_str(&strings[0], &mut input);
str_to_str(&strings[4], &mut expected);
assert!(nfkd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[1], &mut input);
str_to_str(&strings[4], &mut expected);
assert!(nfkd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[2], &mut input);
str_to_str(&strings[4], &mut expected);
assert!(nfkd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[3], &mut input);
str_to_str(&strings[4], &mut expected);
assert!(nfkd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
normalized.clear();
str_to_str(&strings[4], &mut input);
str_to_str(&strings[4], &mut expected);
assert!(nfkd
.normalize_utf8_to(input.as_bytes(), &mut normalized)
.is_ok());
assert_eq!(&normalized, &expected);
}
}
#[test]
fn test_canonical_composition() {
let comp = CanonicalComposition::new();
assert_eq!(comp.compose('a', 'b'), None); // Just two starters
assert_eq!(comp.compose('a', '\u{0308}'), Some('ä'));
assert_eq!(comp.compose('A', '\u{0308}'), Some('Ä'));
assert_eq!(comp.compose('ẹ', '\u{0302}'), Some('ệ'));
assert_eq!(comp.compose('Ẹ', '\u{0302}'), Some('Ệ'));
assert_eq!(comp.compose('\u{1D157}', '\u{1D165}'), None); // Composition exclusion
assert_eq!(comp.compose('ে', 'া'), Some('ো')); // Second is starter; BMP
assert_eq!(comp.compose('𑄱', '𑄧'), Some('𑄮')); // Second is starter; non-BMP
assert_eq!(comp.compose('ᄀ', 'ᅡ'), Some('가')); // Hangul LV
assert_eq!(comp.compose('가', 'ᆨ'), Some('각')); // Hangul LVT
}
#[test]
fn test_canonical_decomposition() {
let decomp = CanonicalDecomposition::new();
assert_eq!(
decomp.decompose('ä'),
Decomposed::Expansion('a', '\u{0308}')
);
assert_eq!(
decomp.decompose('Ä'),
Decomposed::Expansion('A', '\u{0308}')
);
assert_eq!(
decomp.decompose('ệ'),
Decomposed::Expansion('ẹ', '\u{0302}')
);
assert_eq!(
decomp.decompose('Ệ'),
Decomposed::Expansion('Ẹ', '\u{0302}')
);
assert_eq!(
decomp.decompose('\u{1D15E}'),
Decomposed::Expansion('\u{1D157}', '\u{1D165}')
);
assert_eq!(decomp.decompose('ো'), Decomposed::Expansion('ে', 'া'));
assert_eq!(decomp.decompose('𑄮'), Decomposed::Expansion('𑄱', '𑄧'));
assert_eq!(decomp.decompose('가'), Decomposed::Expansion('ᄀ', 'ᅡ'));
assert_eq!(decomp.decompose('각'), Decomposed::Expansion('가', 'ᆨ'));
assert_eq!(decomp.decompose('\u{212B}'), Decomposed::Singleton('Å')); // ANGSTROM SIGN
assert_eq!(decomp.decompose('\u{2126}'), Decomposed::Singleton('Ω')); // OHM SIGN
assert_eq!(decomp.decompose('\u{1F71}'), Decomposed::Singleton('ά')); // oxia
assert_eq!(
decomp.decompose('\u{1F72}'),
Decomposed::Expansion('ε', '\u{0300}')
); // not oxia but in the oxia range
assert_eq!(
decomp.decompose('ά'),
Decomposed::Expansion('α', '\u{0301}')
); // tonos
}
#[test]
fn test_ccc() {
let map = CanonicalCombiningClassMap::new();
for u in 0..=0x10FFFF {
assert_eq!(
map.get32(u),
icu_properties::maps::canonical_combining_class().get32(u)
);
}
}
#[test]
fn test_utf16_basic() {
let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
assert_eq!(
normalizer.normalize_utf16(&[0x0061]).as_slice(),
[0x0061].as_slice()
);
assert_eq!(
normalizer.normalize_utf16(&[0x0300, 0x0323]).as_slice(),
[0x0323, 0x0300].as_slice()
);
}
#[test]
fn test_accented_digraph() {
let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
assert_eq!(
normalizer.normalize("\u{01C4}\u{0323}"),
"DZ\u{0323}\u{030C}"
);
assert_eq!(
normalizer.normalize("DZ\u{030C}\u{0323}"),
"DZ\u{0323}\u{030C}"
);
}
#[test]
fn test_ddd() {
let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
assert_eq!(
normalizer.normalize("\u{0DDD}\u{0334}"),
"\u{0DD9}\u{0DCF}\u{0334}\u{0DCA}"
);
}
#[test]
fn test_is_normalized() {
let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let nfkd: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let nfc: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfkc: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let aaa = "aaa";
assert!(nfd.is_normalized(aaa));
assert!(nfkd.is_normalized(aaa));
assert!(nfc.is_normalized(aaa));
assert!(nfkc.is_normalized(aaa));
assert!(nfd.is_normalized_utf8(aaa.as_bytes()));
assert!(nfkd.is_normalized_utf8(aaa.as_bytes()));
assert!(nfc.is_normalized_utf8(aaa.as_bytes()));
assert!(nfkc.is_normalized_utf8(aaa.as_bytes()));
let aaa16 = [0x0061u16, 0x0061u16, 0x0061u16].as_slice();
assert!(nfd.is_normalized_utf16(aaa16));
assert!(nfkd.is_normalized_utf16(aaa16));
assert!(nfc.is_normalized_utf16(aaa16));
assert!(nfkc.is_normalized_utf16(aaa16));
let affa = b"a\xFFa";
assert!(nfd.is_normalized_utf8(affa));
assert!(nfkd.is_normalized_utf8(affa));
assert!(nfc.is_normalized_utf8(affa));
assert!(nfkc.is_normalized_utf8(affa));
let a_surrogate_a = [0x0061u16, 0xD800u16, 0x0061u16].as_slice();
assert!(nfd.is_normalized_utf16(a_surrogate_a));
assert!(nfkd.is_normalized_utf16(a_surrogate_a));
assert!(nfc.is_normalized_utf16(a_surrogate_a));
assert!(nfkc.is_normalized_utf16(a_surrogate_a));
let note = "a𝅗\u{1D165}a";
assert!(nfd.is_normalized(note));
assert!(nfkd.is_normalized(note));
assert!(nfc.is_normalized(note));
assert!(nfkc.is_normalized(note));
assert!(nfd.is_normalized_utf8(note.as_bytes()));
assert!(nfkd.is_normalized_utf8(note.as_bytes()));
assert!(nfc.is_normalized_utf8(note.as_bytes()));
assert!(nfkc.is_normalized_utf8(note.as_bytes()));
let note16 = [
0x0061u16, 0xD834u16, 0xDD57u16, 0xD834u16, 0xDD65u16, 0x0061u16,
]
.as_slice();
assert!(nfd.is_normalized_utf16(note16));
assert!(nfkd.is_normalized_utf16(note16));
assert!(nfc.is_normalized_utf16(note16));
assert!(nfkc.is_normalized_utf16(note16));
let umlaut = "aäa";
assert!(!nfd.is_normalized(umlaut));
assert!(!nfkd.is_normalized(umlaut));
assert!(nfc.is_normalized(umlaut));
assert!(nfkc.is_normalized(umlaut));
assert!(!nfd.is_normalized_utf8(umlaut.as_bytes()));
assert!(!nfkd.is_normalized_utf8(umlaut.as_bytes()));
assert!(nfc.is_normalized_utf8(umlaut.as_bytes()));
assert!(nfkc.is_normalized_utf8(umlaut.as_bytes()));
let umlaut16 = [0x0061u16, 0x00E4u16, 0x0061u16].as_slice();
assert!(!nfd.is_normalized_utf16(umlaut16));
assert!(!nfkd.is_normalized_utf16(umlaut16));
assert!(nfc.is_normalized_utf16(umlaut16));
assert!(nfkc.is_normalized_utf16(umlaut16));
let fraction = "a½a";
assert!(nfd.is_normalized(fraction));
assert!(!nfkd.is_normalized(fraction));
assert!(nfc.is_normalized(fraction));
assert!(!nfkc.is_normalized(fraction));
assert!(nfd.is_normalized_utf8(fraction.as_bytes()));
assert!(!nfkd.is_normalized_utf8(fraction.as_bytes()));
assert!(nfc.is_normalized_utf8(fraction.as_bytes()));
assert!(!nfkc.is_normalized_utf8(fraction.as_bytes()));
let fraction16 = [0x0061u16, 0x00BDu16, 0x0061u16].as_slice();
assert!(nfd.is_normalized_utf16(fraction16));
assert!(!nfkd.is_normalized_utf16(fraction16));
assert!(nfc.is_normalized_utf16(fraction16));
assert!(!nfkc.is_normalized_utf16(fraction16));
}