From 17d40c6057c88f4c432b0d7bac88e1b84cb7e67f Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:03:36 +0200 Subject: Adding upstream version 1.65.0+dfsg1. Signed-off-by: Daniel Baumann --- library/std/src/sys_common/wtf8/tests.rs | 295 +++++++++++++++++++++++++++++-- 1 file changed, 276 insertions(+), 19 deletions(-) (limited to 'library/std/src/sys_common/wtf8/tests.rs') diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 931996791..1a302d646 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -19,6 +19,36 @@ fn code_point_to_u32() { assert_eq!(c(0x10FFFF).to_u32(), 0x10FFFF); } +#[test] +fn code_point_to_lead_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_lead_surrogate(), None); + assert_eq!(c(0xE9).to_lead_surrogate(), None); + assert_eq!(c(0xD800).to_lead_surrogate(), Some(0xD800)); + assert_eq!(c(0xDBFF).to_lead_surrogate(), Some(0xDBFF)); + assert_eq!(c(0xDC00).to_lead_surrogate(), None); + assert_eq!(c(0xDFFF).to_lead_surrogate(), None); + assert_eq!(c(0x1F4A9).to_lead_surrogate(), None); + assert_eq!(c(0x10FFFF).to_lead_surrogate(), None); +} + +#[test] +fn code_point_to_trail_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_trail_surrogate(), None); + assert_eq!(c(0xE9).to_trail_surrogate(), None); + assert_eq!(c(0xD800).to_trail_surrogate(), None); + assert_eq!(c(0xDBFF).to_trail_surrogate(), None); + assert_eq!(c(0xDC00).to_trail_surrogate(), Some(0xDC00)); + assert_eq!(c(0xDFFF).to_trail_surrogate(), Some(0xDFFF)); + assert_eq!(c(0x1F4A9).to_trail_surrogate(), None); + assert_eq!(c(0x10FFFF).to_trail_surrogate(), None); +} + #[test] fn code_point_from_char() { assert_eq!(CodePoint::from_char('a').to_u32(), 0x61); @@ -70,35 +100,66 @@ fn wtf8buf_from_string() { #[test] fn wtf8buf_from_wide() { - assert_eq!(Wtf8Buf::from_wide(&[]).bytes, b""); - assert_eq!( - Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes, - b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9" - ); + let buf = Wtf8Buf::from_wide(&[]); + assert_eq!(buf.bytes, b""); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xDCA9]); + assert_eq!(buf.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]); + assert_eq!(buf.bytes, b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xD800]); + assert_eq!(buf.bytes, b"\xED\xA0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDBFF]); + assert_eq!(buf.bytes, b"\xED\xAF\xBF"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDC00]); + assert_eq!(buf.bytes, b"\xED\xB0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDFFF]); + assert_eq!(buf.bytes, b"\xED\xBF\xBF"); + assert!(!buf.is_known_utf8); } #[test] fn wtf8buf_push_str() { let mut string = Wtf8Buf::new(); assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); + string.push_str("aĆ© šŸ’©"); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); } #[test] fn wtf8buf_push_char() { let mut string = Wtf8Buf::from_str("aĆ© "); assert_eq!(string.bytes, b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + string.push_char('šŸ’©'); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); } #[test] fn wtf8buf_push() { let mut string = Wtf8Buf::from_str("aĆ© "); assert_eq!(string.bytes, b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + string.push(CodePoint::from_char('šŸ’©')); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() @@ -106,37 +167,46 @@ fn wtf8buf_push() { let mut string = Wtf8Buf::new(); string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); string.push(c(0xDCA9)); // trail assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic! let mut string = Wtf8Buf::new(); string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); string.push(c(0x20)); // not surrogate string.push(c(0xDCA9)); // trail assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); let mut string = Wtf8Buf::new(); string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); string.push(c(0xDBFF)); // lead assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); let mut string = Wtf8Buf::new(); string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); string.push(c(0xE000)); // not surrogate assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); let mut string = Wtf8Buf::new(); string.push(c(0xD7FF)); // not surrogate + assert!(string.is_known_utf8); string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); let mut string = Wtf8Buf::new(); string.push(c(0x61)); // not surrogate, < 3 bytes + assert!(string.is_known_utf8); string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); let mut string = Wtf8Buf::new(); string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); assert_eq!(string.bytes, b"\xED\xB0\x80"); } @@ -146,6 +216,7 @@ fn wtf8buf_push_wtf8() { assert_eq!(string.bytes, b"a\xC3\xA9"); string.push_wtf8(Wtf8::from_str(" šŸ’©")); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } @@ -161,37 +232,68 @@ fn wtf8buf_push_wtf8() { string.push_wtf8(w(b" ")); // not surrogate string.push_wtf8(w(b"\xED\xB2\xA9")); // trail assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xA0\x80")); // lead string.push_wtf8(w(b"\xED\xAF\xBF")); // lead assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xA0\x80")); // lead string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate string.push_wtf8(w(b"\xED\xB0\x80")); // trail assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes string.push_wtf8(w(b"\xED\xB0\x80")); // trail assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xB0\x80")); // trail assert_eq!(string.bytes, b"\xED\xB0\x80"); + assert!(!string.is_known_utf8); } #[test] fn wtf8buf_truncate() { let mut string = Wtf8Buf::from_str("aĆ©"); + assert!(string.is_known_utf8); + + string.truncate(3); + assert_eq!(string.bytes, b"a\xC3\xA9"); + assert!(string.is_known_utf8); + string.truncate(1); assert_eq!(string.bytes, b"a"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); +} + +#[test] +fn wtf8buf_truncate_around_non_bmp() { + let mut string = Wtf8Buf::from_str("šŸ’©"); + assert!(string.is_known_utf8); + + string.truncate(4); + assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); } #[test] @@ -208,11 +310,37 @@ fn wtf8buf_truncate_fail_longer() { string.truncate(4); } +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp3() { + let mut string = Wtf8Buf::from_str("šŸ’©"); + assert!(string.is_known_utf8); + string.truncate(3); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp2() { + let mut string = Wtf8Buf::from_str("šŸ’©"); + assert!(string.is_known_utf8); + string.truncate(2); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp1() { + let mut string = Wtf8Buf::from_str("šŸ’©"); + assert!(string.is_known_utf8); + string.truncate(1); +} + #[test] fn wtf8buf_into_string() { let mut string = Wtf8Buf::from_str("aĆ© šŸ’©"); + assert!(string.is_known_utf8); assert_eq!(string.clone().into_string(), Ok(String::from("aĆ© šŸ’©"))); string.push(CodePoint::from_u32(0xD800).unwrap()); + assert!(!string.is_known_utf8); assert_eq!(string.clone().into_string(), Err(string)); } @@ -229,15 +357,33 @@ fn wtf8buf_from_iterator() { fn f(values: &[u32]) -> Wtf8Buf { values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::() } - assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert_eq!( + f(&[0x61, 0xE9, 0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! - assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); - assert_eq!(f(&[0xD800, 0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF"); - assert_eq!(f(&[0xD800, 0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80"); - assert_eq!(f(&[0xD7FF, 0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80"); - assert_eq!(f(&[0x61, 0xDC00]).bytes, b"\x61\xED\xB0\x80"); - assert_eq!(f(&[0xDC00]).bytes, b"\xED\xB0\x80"); + assert_eq!( + f(&[0xD83D, 0x20, 0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD7FF, 0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0x61, 0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!(f(&[0xDC00]), Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false }); } #[test] @@ -251,15 +397,36 @@ fn wtf8buf_extend() { string } - assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert_eq!( + e(&[0x61, 0xE9], &[0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! - assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); - assert_eq!(e(&[0xD800], &[0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF"); - assert_eq!(e(&[0xD800], &[0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80"); - assert_eq!(e(&[0xD7FF], &[0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80"); - assert_eq!(e(&[0x61], &[0xDC00]).bytes, b"\x61\xED\xB0\x80"); - assert_eq!(e(&[], &[0xDC00]).bytes, b"\xED\xB0\x80"); + assert_eq!( + e(&[0xD83D, 0x20], &[0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD7FF], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0x61], &[0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); } #[test] @@ -407,3 +574,93 @@ fn wtf8_encode_wide_size_hint() { assert_eq!((0, Some(0)), iter.size_hint()); assert!(iter.next().is_none()); } + +#[test] +fn wtf8_clone_into() { + let mut string = Wtf8Buf::new(); + Wtf8::from_str("green").clone_into(&mut string); + assert_eq!(string.bytes, b"green"); + + let mut string = Wtf8Buf::from_str("green"); + Wtf8::from_str("").clone_into(&mut string); + assert_eq!(string.bytes, b""); + + let mut string = Wtf8Buf::from_str("red"); + Wtf8::from_str("green").clone_into(&mut string); + assert_eq!(string.bytes, b"green"); + + let mut string = Wtf8Buf::from_str("green"); + Wtf8::from_str("red").clone_into(&mut string); + assert_eq!(string.bytes, b"red"); + + let mut string = Wtf8Buf::from_str("green"); + assert!(string.is_known_utf8); + unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").clone_into(&mut string) }; + assert_eq!(string.bytes, b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wtf8_to_ascii_lowercase() { + let lowercase = Wtf8::from_str("").to_ascii_lowercase(); + assert_eq!(lowercase.bytes, b""); + + let lowercase = Wtf8::from_str("GrEeN gRaPeS! šŸ‡").to_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87"); + + let lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_lowercase() }; + assert_eq!(lowercase.bytes, b"\xED\xA0\x80"); + assert!(!lowercase.is_known_utf8); +} + +#[test] +fn wtf8_to_ascii_uppercase() { + let uppercase = Wtf8::from_str("").to_ascii_uppercase(); + assert_eq!(uppercase.bytes, b""); + + let uppercase = Wtf8::from_str("GrEeN gRaPeS! šŸ‡").to_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87"); + + let uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_uppercase() }; + assert_eq!(uppercase.bytes, b"\xED\xA0\x80"); + assert!(!uppercase.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_lowercase() { + let mut lowercase = Wtf8Buf::from_str(""); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b""); + + let mut lowercase = Wtf8Buf::from_str("GrEeN gRaPeS! šŸ‡"); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87"); + + let mut lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"\xED\xA0\x80"); + assert!(!lowercase.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_uppercase() { + let mut uppercase = Wtf8Buf::from_str(""); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b""); + + let mut uppercase = Wtf8Buf::from_str("GrEeN gRaPeS! šŸ‡"); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87"); + + let mut uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"\xED\xA0\x80"); + assert!(!uppercase.is_known_utf8); +} + +#[test] +fn wtf8_to_owned() { + let string = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert_eq!(string.bytes, b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} -- cgit v1.2.3