summaryrefslogtreecommitdiffstats
path: root/library/std/src/sys_common/wtf8/tests.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/std/src/sys_common/wtf8/tests.rs')
-rw-r--r--library/std/src/sys_common/wtf8/tests.rs295
1 files changed, 276 insertions, 19 deletions
diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs
index 931996791..1a302d646 100644
--- a/library/std/src/sys_common/wtf8/tests.rs
+++ b/library/std/src/sys_common/wtf8/tests.rs
@@ -20,6 +20,36 @@ fn code_point_to_u32() {
}
#[test]
+fn code_point_to_lead_surrogate() {
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
+ assert_eq!(c(0).to_lead_surrogate(), None);
+ assert_eq!(c(0xE9).to_lead_surrogate(), None);
+ assert_eq!(c(0xD800).to_lead_surrogate(), Some(0xD800));
+ assert_eq!(c(0xDBFF).to_lead_surrogate(), Some(0xDBFF));
+ assert_eq!(c(0xDC00).to_lead_surrogate(), None);
+ assert_eq!(c(0xDFFF).to_lead_surrogate(), None);
+ assert_eq!(c(0x1F4A9).to_lead_surrogate(), None);
+ assert_eq!(c(0x10FFFF).to_lead_surrogate(), None);
+}
+
+#[test]
+fn code_point_to_trail_surrogate() {
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
+ assert_eq!(c(0).to_trail_surrogate(), None);
+ assert_eq!(c(0xE9).to_trail_surrogate(), None);
+ assert_eq!(c(0xD800).to_trail_surrogate(), None);
+ assert_eq!(c(0xDBFF).to_trail_surrogate(), None);
+ assert_eq!(c(0xDC00).to_trail_surrogate(), Some(0xDC00));
+ assert_eq!(c(0xDFFF).to_trail_surrogate(), Some(0xDFFF));
+ assert_eq!(c(0x1F4A9).to_trail_surrogate(), None);
+ assert_eq!(c(0x10FFFF).to_trail_surrogate(), None);
+}
+
+#[test]
fn code_point_from_char() {
assert_eq!(CodePoint::from_char('a').to_u32(), 0x61);
assert_eq!(CodePoint::from_char('šŸ’©').to_u32(), 0x1F4A9);
@@ -70,35 +100,66 @@ fn wtf8buf_from_string() {
#[test]
fn wtf8buf_from_wide() {
- assert_eq!(Wtf8Buf::from_wide(&[]).bytes, b"");
- assert_eq!(
- Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes,
- b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"
- );
+ let buf = Wtf8Buf::from_wide(&[]);
+ assert_eq!(buf.bytes, b"");
+ assert!(buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xDCA9]);
+ assert_eq!(buf.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]);
+ assert_eq!(buf.bytes, b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xD800]);
+ assert_eq!(buf.bytes, b"\xED\xA0\x80");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xDBFF]);
+ assert_eq!(buf.bytes, b"\xED\xAF\xBF");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xDC00]);
+ assert_eq!(buf.bytes, b"\xED\xB0\x80");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xDFFF]);
+ assert_eq!(buf.bytes, b"\xED\xBF\xBF");
+ assert!(!buf.is_known_utf8);
}
#[test]
fn wtf8buf_push_str() {
let mut string = Wtf8Buf::new();
assert_eq!(string.bytes, b"");
+ assert!(string.is_known_utf8);
+
string.push_str("aĆ© šŸ’©");
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
}
#[test]
fn wtf8buf_push_char() {
let mut string = Wtf8Buf::from_str("aƩ ");
assert_eq!(string.bytes, b"a\xC3\xA9 ");
+ assert!(string.is_known_utf8);
+
string.push_char('šŸ’©');
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
}
#[test]
fn wtf8buf_push() {
let mut string = Wtf8Buf::from_str("aƩ ");
assert_eq!(string.bytes, b"a\xC3\xA9 ");
+ assert!(string.is_known_utf8);
+
string.push(CodePoint::from_char('šŸ’©'));
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
fn c(value: u32) -> CodePoint {
CodePoint::from_u32(value).unwrap()
@@ -106,37 +167,46 @@ fn wtf8buf_push() {
let mut string = Wtf8Buf::new();
string.push(c(0xD83D)); // lead
+ assert!(!string.is_known_utf8);
string.push(c(0xDCA9)); // trail
assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
let mut string = Wtf8Buf::new();
string.push(c(0xD83D)); // lead
+ assert!(!string.is_known_utf8);
string.push(c(0x20)); // not surrogate
string.push(c(0xDCA9)); // trail
assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
let mut string = Wtf8Buf::new();
string.push(c(0xD800)); // lead
+ assert!(!string.is_known_utf8);
string.push(c(0xDBFF)); // lead
assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
let mut string = Wtf8Buf::new();
string.push(c(0xD800)); // lead
+ assert!(!string.is_known_utf8);
string.push(c(0xE000)); // not surrogate
assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
let mut string = Wtf8Buf::new();
string.push(c(0xD7FF)); // not surrogate
+ assert!(string.is_known_utf8);
string.push(c(0xDC00)); // trail
+ assert!(!string.is_known_utf8);
assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
let mut string = Wtf8Buf::new();
string.push(c(0x61)); // not surrogate, < 3 bytes
+ assert!(string.is_known_utf8);
string.push(c(0xDC00)); // trail
+ assert!(!string.is_known_utf8);
assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
let mut string = Wtf8Buf::new();
string.push(c(0xDC00)); // trail
+ assert!(!string.is_known_utf8);
assert_eq!(string.bytes, b"\xED\xB0\x80");
}
@@ -146,6 +216,7 @@ fn wtf8buf_push_wtf8() {
assert_eq!(string.bytes, b"a\xC3\xA9");
string.push_wtf8(Wtf8::from_str(" šŸ’©"));
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
fn w(v: &[u8]) -> &Wtf8 {
unsafe { Wtf8::from_bytes_unchecked(v) }
@@ -161,37 +232,68 @@ fn wtf8buf_push_wtf8() {
string.push_wtf8(w(b" ")); // not surrogate
string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"\xED\xA0\x80")); // lead
string.push_wtf8(w(b"\xED\xAF\xBF")); // lead
assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"\xED\xA0\x80")); // lead
string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate
assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate
string.push_wtf8(w(b"\xED\xB0\x80")); // trail
assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes
string.push_wtf8(w(b"\xED\xB0\x80")); // trail
assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
+ assert!(!string.is_known_utf8);
let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"\xED\xB0\x80")); // trail
assert_eq!(string.bytes, b"\xED\xB0\x80");
+ assert!(!string.is_known_utf8);
}
#[test]
fn wtf8buf_truncate() {
let mut string = Wtf8Buf::from_str("aƩ");
+ assert!(string.is_known_utf8);
+
+ string.truncate(3);
+ assert_eq!(string.bytes, b"a\xC3\xA9");
+ assert!(string.is_known_utf8);
+
string.truncate(1);
assert_eq!(string.bytes, b"a");
+ assert!(string.is_known_utf8);
+
+ string.truncate(0);
+ assert_eq!(string.bytes, b"");
+ assert!(string.is_known_utf8);
+}
+
+#[test]
+fn wtf8buf_truncate_around_non_bmp() {
+ let mut string = Wtf8Buf::from_str("šŸ’©");
+ assert!(string.is_known_utf8);
+
+ string.truncate(4);
+ assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
+
+ string.truncate(0);
+ assert_eq!(string.bytes, b"");
+ assert!(string.is_known_utf8);
}
#[test]
@@ -209,10 +311,36 @@ fn wtf8buf_truncate_fail_longer() {
}
#[test]
+#[should_panic]
+fn wtf8buf_truncate_splitting_non_bmp3() {
+ let mut string = Wtf8Buf::from_str("šŸ’©");
+ assert!(string.is_known_utf8);
+ string.truncate(3);
+}
+
+#[test]
+#[should_panic]
+fn wtf8buf_truncate_splitting_non_bmp2() {
+ let mut string = Wtf8Buf::from_str("šŸ’©");
+ assert!(string.is_known_utf8);
+ string.truncate(2);
+}
+
+#[test]
+#[should_panic]
+fn wtf8buf_truncate_splitting_non_bmp1() {
+ let mut string = Wtf8Buf::from_str("šŸ’©");
+ assert!(string.is_known_utf8);
+ string.truncate(1);
+}
+
+#[test]
fn wtf8buf_into_string() {
let mut string = Wtf8Buf::from_str("aĆ© šŸ’©");
+ assert!(string.is_known_utf8);
assert_eq!(string.clone().into_string(), Ok(String::from("aĆ© šŸ’©")));
string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert!(!string.is_known_utf8);
assert_eq!(string.clone().into_string(), Err(string));
}
@@ -229,15 +357,33 @@ fn wtf8buf_from_iterator() {
fn f(values: &[u32]) -> Wtf8Buf {
values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::<Wtf8Buf>()
}
- assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert_eq!(
+ f(&[0x61, 0xE9, 0x20, 0x1F4A9]),
+ Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true }
+ );
assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
- assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
- assert_eq!(f(&[0xD800, 0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
- assert_eq!(f(&[0xD800, 0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
- assert_eq!(f(&[0xD7FF, 0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80");
- assert_eq!(f(&[0x61, 0xDC00]).bytes, b"\x61\xED\xB0\x80");
- assert_eq!(f(&[0xDC00]).bytes, b"\xED\xB0\x80");
+ assert_eq!(
+ f(&[0xD83D, 0x20, 0xDCA9]),
+ Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0xD800, 0xDBFF]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0xD800, 0xE000]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0xD7FF, 0xDC00]),
+ Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0x61, 0xDC00]),
+ Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(f(&[0xDC00]), Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false });
}
#[test]
@@ -251,15 +397,36 @@ fn wtf8buf_extend() {
string
}
- assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert_eq!(
+ e(&[0x61, 0xE9], &[0x20, 0x1F4A9]),
+ Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true }
+ );
assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
- assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
- assert_eq!(e(&[0xD800], &[0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
- assert_eq!(e(&[0xD800], &[0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
- assert_eq!(e(&[0xD7FF], &[0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80");
- assert_eq!(e(&[0x61], &[0xDC00]).bytes, b"\x61\xED\xB0\x80");
- assert_eq!(e(&[], &[0xDC00]).bytes, b"\xED\xB0\x80");
+ assert_eq!(
+ e(&[0xD83D, 0x20], &[0xDCA9]),
+ Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0xD800], &[0xDBFF]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0xD800], &[0xE000]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0xD7FF], &[0xDC00]),
+ Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0x61], &[0xDC00]),
+ Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[], &[0xDC00]),
+ Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
}
#[test]
@@ -407,3 +574,93 @@ fn wtf8_encode_wide_size_hint() {
assert_eq!((0, Some(0)), iter.size_hint());
assert!(iter.next().is_none());
}
+
+#[test]
+fn wtf8_clone_into() {
+ let mut string = Wtf8Buf::new();
+ Wtf8::from_str("green").clone_into(&mut string);
+ assert_eq!(string.bytes, b"green");
+
+ let mut string = Wtf8Buf::from_str("green");
+ Wtf8::from_str("").clone_into(&mut string);
+ assert_eq!(string.bytes, b"");
+
+ let mut string = Wtf8Buf::from_str("red");
+ Wtf8::from_str("green").clone_into(&mut string);
+ assert_eq!(string.bytes, b"green");
+
+ let mut string = Wtf8Buf::from_str("green");
+ Wtf8::from_str("red").clone_into(&mut string);
+ assert_eq!(string.bytes, b"red");
+
+ let mut string = Wtf8Buf::from_str("green");
+ assert!(string.is_known_utf8);
+ unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").clone_into(&mut string) };
+ assert_eq!(string.bytes, b"\xED\xA0\x80");
+ assert!(!string.is_known_utf8);
+}
+
+#[test]
+fn wtf8_to_ascii_lowercase() {
+ let lowercase = Wtf8::from_str("").to_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"");
+
+ let lowercase = Wtf8::from_str("GrEeN gRaPeS! šŸ‡").to_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87");
+
+ let lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_lowercase() };
+ assert_eq!(lowercase.bytes, b"\xED\xA0\x80");
+ assert!(!lowercase.is_known_utf8);
+}
+
+#[test]
+fn wtf8_to_ascii_uppercase() {
+ let uppercase = Wtf8::from_str("").to_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"");
+
+ let uppercase = Wtf8::from_str("GrEeN gRaPeS! šŸ‡").to_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87");
+
+ let uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_uppercase() };
+ assert_eq!(uppercase.bytes, b"\xED\xA0\x80");
+ assert!(!uppercase.is_known_utf8);
+}
+
+#[test]
+fn wtf8_make_ascii_lowercase() {
+ let mut lowercase = Wtf8Buf::from_str("");
+ lowercase.make_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"");
+
+ let mut lowercase = Wtf8Buf::from_str("GrEeN gRaPeS! šŸ‡");
+ lowercase.make_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87");
+
+ let mut lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
+ lowercase.make_ascii_lowercase();
+ assert_eq!(lowercase.bytes, b"\xED\xA0\x80");
+ assert!(!lowercase.is_known_utf8);
+}
+
+#[test]
+fn wtf8_make_ascii_uppercase() {
+ let mut uppercase = Wtf8Buf::from_str("");
+ uppercase.make_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"");
+
+ let mut uppercase = Wtf8Buf::from_str("GrEeN gRaPeS! šŸ‡");
+ uppercase.make_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87");
+
+ let mut uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
+ uppercase.make_ascii_uppercase();
+ assert_eq!(uppercase.bytes, b"\xED\xA0\x80");
+ assert!(!uppercase.is_known_utf8);
+}
+
+#[test]
+fn wtf8_to_owned() {
+ let string = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
+ assert_eq!(string.bytes, b"\xED\xA0\x80");
+ assert!(!string.is_known_utf8);
+}