#[cfg(__unicase__iter_cmp)] use core::cmp::Ordering; use core::hash::{Hash, Hasher}; use self::map::lookup; mod map; #[derive(Clone, Copy, Debug, Default)] pub struct Unicode(pub S); impl, S2: AsRef> PartialEq> for Unicode { #[inline] fn eq(&self, other: &Unicode) -> bool { let mut left = self.0.as_ref().chars().flat_map(lookup); let mut right = other.0.as_ref().chars().flat_map(lookup); // inline Iterator::eq since not added until Rust 1.5 loop { let x = match left.next() { None => return right.next().is_none(), Some(val) => val, }; let y = match right.next() { None => return false, Some(val) => val, }; if x != y { return false; } } } } impl> Eq for Unicode {} #[cfg(__unicase__iter_cmp)] impl> PartialOrd for Unicode { #[inline] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } #[cfg(__unicase__iter_cmp)] impl> Ord for Unicode { #[inline] fn cmp(&self, other: &Self) -> Ordering { let self_chars = self.0.as_ref().chars().flat_map(lookup); let other_chars = other.0.as_ref().chars().flat_map(lookup); self_chars.cmp(other_chars) } } impl> Hash for Unicode { #[inline] fn hash(&self, hasher: &mut H) { let mut buf = [0; 4]; for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) { let len = char_to_utf8(c, &mut buf); hasher.write(&buf[..len]) } } } #[inline] fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize { const TAG_CONT: u8 = 0b1000_0000; const TAG_TWO_B: u8 = 0b1100_0000; const TAG_THREE_B: u8 = 0b1110_0000; const TAG_FOUR_B: u8 = 0b1111_0000; let code = c as u32; if code <= 0x7F { dst[0] = code as u8; 1 } else if code <= 0x7FF { dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; dst[1] = (code & 0x3F) as u8 | TAG_CONT; 2 } else if code <= 0xFFFF { dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; dst[2] = (code & 0x3F) as u8 | TAG_CONT; 3 } else { dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; dst[3] = (code & 0x3F) as u8 | TAG_CONT; 4 } } // internal mod so that the enum can be 'pub' // thanks privacy-checker :___( mod fold { #[derive(Clone, Copy)] pub enum Fold { Zero, One(char), Two(char, char), Three(char, char, char), } impl Iterator for Fold { type Item = char; #[inline] fn next(&mut self) -> Option { match *self { Fold::Zero => None, Fold::One(one) => { *self = Fold::Zero; Some(one) }, Fold::Two(one, two) => { *self = Fold::One(two); Some(one) }, Fold::Three(one, two, three) => { *self = Fold::Two(one, two); Some(three) } } } #[inline] fn size_hint(&self) -> (usize, Option) { match *self { Fold::Zero => (0, Some(0)), Fold::One(..) => (1, Some(1)), Fold::Two(..) => (2, Some(2)), Fold::Three(..) => (3, Some(3)) } } } impl From<(char,)> for Fold { #[inline] fn from((one,): (char,)) -> Fold { Fold::One(one) } } impl From<(char, char)> for Fold { #[inline] fn from((one, two): (char, char)) -> Fold { Fold::Two(one, two) } } impl From<(char, char, char)> for Fold { #[inline] fn from((one, two, three): (char, char, char)) -> Fold { Fold::Three(one, two, three) } } } #[cfg(test)] mod tests { use super::Unicode; macro_rules! eq { ($left:expr, $right:expr) => ({ assert_eq!(Unicode($left), Unicode($right)); }); } #[test] fn test_ascii_folding() { eq!("foo bar", "FoO BAR"); } #[test] fn test_simple_case_folding() { eq!("στιγμας", "στιγμασ"); } #[test] fn test_full_case_folding() { eq!("flour", "flour"); eq!("Maße", "MASSE"); eq!("ᾲ στο διάολο", "ὰι στο διάολο"); } #[cfg(feature = "nightly")] #[bench] fn bench_ascii_folding(b: &mut ::test::Bencher) { b.bytes = b"foo bar".len() as u64; b.iter(|| eq!("foo bar", "FoO BAR")); } #[cfg(feature = "nightly")] #[bench] fn bench_simple_case_folding(b: &mut ::test::Bencher) { b.bytes = "στιγμας".len() as u64; b.iter(|| eq!("στιγμας", "στιγμασ")); } }