#![deny( missing_copy_implementations, missing_debug_implementations, missing_docs, trivial_casts, trivial_numeric_casts, unsafe_code, unused_import_braces, unused_qualifications, )] #![cfg_attr(feature = "dev", feature(plugin))] #![cfg_attr(feature = "dev", plugin(clippy))] #![cfg_attr(feature = "dev", deny(clippy))] //! Fonctions to decode and encode [RFC-3492 Punycode](https://tools.ietf.org/html/rfc3492). // See [RFC-3492, section 4](https://tools.ietf.org/html/rfc3492#section-4). const BASE : u32 = 36; const TMIN : u32 = 1; const TMAX : u32 = 26; const SKEW : u32 = 38; const DAMP : u32 = 700; const INITIAL_BIAS : u32 = 72; const INITIAL_N : u32 = 128; const DELIMITER : char = '-'; /// Decode the string as Punycode. The string should not contain the initial `xn--` and must /// contain only ASCII characters. /// # Example /// ``` /// assert_eq!( /// punycode::decode("acadmie-franaise-npb1a").unwrap(), /// "académie-française" /// ); /// ``` pub fn decode(input: &str) -> Result { if !input.is_ascii() { return Err(()); } let mut n = INITIAL_N; let mut i = 0; let mut bias = INITIAL_BIAS; let (mut output, input) = if let Some(i) = input.rfind(DELIMITER) { (input[0..i].chars().collect(), &input[i+1..]) } else { (vec![], &input[..]) }; let mut it = input.chars().peekable(); while it.peek() != None { let oldi = i; let mut w = 1; for k in 1.. { let c = if let Some(c) = it.next() { c } else { return Err(()); }; let k = k*BASE; let digit = decode_digit(c); if digit == BASE { return Err(()); } // overflow check if digit > (std::u32::MAX - i) / w { return Err(()); } i += digit * w; let t = clamped_sub(TMIN, k, bias, TMAX); if digit < t { break; } // overflow check if BASE > (std::u32::MAX - t) / w { return Err(()); } w *= BASE - t; } let len = (output.len() + 1) as u32; bias = adapt(i - oldi, len, oldi == 0); let il = i / len; // overflow check if n > std::u32::MAX - il { return Err(()); } n += il; i %= len; if let Some(c) = std::char::from_u32(n) { output.insert(i as usize, c); } else { return Err(()); } i += 1; } Ok(output.iter().cloned().collect()) } /// Encode a string as punycode. The result string will contain only ASCII characters. The result /// string does not start with `xn--`. /// # Example /// ``` /// assert_eq!( /// punycode::encode("académie-française").unwrap(), /// "acadmie-franaise-npb1a" /// ); /// ``` pub fn encode(input: &str) -> Result { encode_slice(&input.chars().collect::>()) } fn encode_slice(input: &[char]) -> Result { let mut n = INITIAL_N; let mut delta = 0; let mut bias = INITIAL_BIAS; let mut output : String = input.iter().filter(|&&c| c.is_ascii()).cloned().collect(); let mut h = output.len() as u32; let b = h; if b > 0 { output.push(DELIMITER) } while h < input.len() as u32 { let m = *input.iter().filter(|&&c| (c as u32) >= n).min().unwrap() as u32; if m - n > (std::u32::MAX - delta) / (h + 1) { return Err(()); } delta += (m - n) * (h + 1); n = m; for c in input { let c = *c as u32; if c < n { delta += 1; } else if c == n { let mut q = delta; for k in 1.. { let k = k*BASE; let t = clamped_sub(TMIN, k, bias, TMAX); if q < t { break; } output.push(encode_digit(t + (q - t) % (BASE - t))); q = (q - t) / (BASE - t); } output.push(encode_digit(q)); bias = adapt(delta, h+1, h == b); delta = 0; h += 1; } } delta += 1; n += 1; } Ok(output) } fn adapt(delta: u32, numpoint: u32, firsttime: bool) -> u32 { let mut delta = if firsttime { delta / DAMP } else { delta / 2 }; delta += delta / numpoint; let mut k = 0; while delta > (BASE - TMIN) * TMAX / 2 { delta /= BASE - TMIN; k += BASE } k + (BASE - TMIN + 1) * delta / (delta + SKEW) } /// Compute `lhs-rhs`. Result will be clamped in [min, max]. fn clamped_sub(min: T, lhs: T, rhs: T, max: T) -> T where T : Ord + std::ops::Add + std::ops::Sub + Copy { if min + rhs >= lhs { min } else if max + rhs <= lhs { max } else { lhs - rhs } } fn decode_digit(c: char) -> u32 { let cp = c as u32; match c { '0' ... '9' => cp - ('0' as u32) + 26, 'A' ... 'Z' => cp - ('A' as u32), 'a' ... 'z' => cp - ('a' as u32), _ => BASE, } } fn encode_digit(d: u32) -> char { let r = (d + 22 + (if d < 26 { 75 } else { 0 })) as u8 as char; assert!(('0' <= r && r <= '9') || ('a' <= r && r <= 'z'), "r = {}", r); r } #[cfg(test)] static TESTS: &'static [(&'static str, &'static str)] = &[ // examples taken from [RCF-3492, section 7.1](https://tools.ietf.org/html/rfc3492#section-7.1) (&"\u{0644}\u{064A}\u{0647}\u{0645}\u{0627}\u{0628}\u{062A}\u{0643}\u{0644}\ \u{0645}\u{0648}\u{0634}\u{0639}\u{0631}\u{0628}\u{064A}\u{061F}", &"egbpdaj6bu4bxfgehfvwxn"), (&"\u{4ED6}\u{4EEC}\u{4E3A}\u{4EC0}\u{4E48}\u{4E0D}\u{8BF4}\u{4E2D}\u{6587}", &"ihqwcrb4cv8a8dqg056pqjye"), (&"\u{4ED6}\u{5011}\u{7232}\u{4EC0}\u{9EBD}\u{4E0D}\u{8AAA}\u{4E2D}\u{6587}", &"ihqwctvzc91f659drss3x8bo0yb"), (&"\u{0050}\u{0072}\u{006F}\u{010D}\u{0070}\u{0072}\u{006F}\u{0073}\u{0074}\ \u{011B}\u{006E}\u{0065}\u{006D}\u{006C}\u{0075}\u{0076}\u{00ED}\u{010D}\ \u{0065}\u{0073}\u{006B}\u{0079}", &"Proprostnemluvesky-uyb24dma41a"), (&"\u{05DC}\u{05DE}\u{05D4}\u{05D4}\u{05DD}\u{05E4}\u{05E9}\u{05D5}\u{05D8}\ \u{05DC}\u{05D0}\u{05DE}\u{05D3}\u{05D1}\u{05E8}\u{05D9}\u{05DD}\u{05E2}\ \u{05D1}\u{05E8}\u{05D9}\u{05EA}", &"4dbcagdahymbxekheh6e0a7fei0b"), (&"\u{092F}\u{0939}\u{0932}\u{094B}\u{0917}\u{0939}\u{093F}\u{0928}\u{094D}\ \u{0926}\u{0940}\u{0915}\u{094D}\u{092F}\u{094B}\u{0902}\u{0928}\u{0939}\ \u{0940}\u{0902}\u{092C}\u{094B}\u{0932}\u{0938}\u{0915}\u{0924}\u{0947}\ \u{0939}\u{0948}\u{0902}", &"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"), (&"\u{306A}\u{305C}\u{307F}\u{3093}\u{306A}\u{65E5}\u{672C}\u{8A9E}\u{3092}\ \u{8A71}\u{3057}\u{3066}\u{304F}\u{308C}\u{306A}\u{3044}\u{306E}\u{304B}", &"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"), (&"\u{C138}\u{ACC4}\u{C758}\u{BAA8}\u{B4E0}\u{C0AC}\u{B78C}\u{B4E4}\u{C774}\ \u{D55C}\u{AD6D}\u{C5B4}\u{B97C}\u{C774}\u{D574}\u{D55C}\u{B2E4}\u{BA74}\ \u{C5BC}\u{B9C8}\u{B098}\u{C88B}\u{C744}\u{AE4C}", &"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"), (&"\u{043F}\u{043E}\u{0447}\u{0435}\u{043C}\u{0443}\u{0436}\u{0435}\u{043E}\ \u{043D}\u{0438}\u{043D}\u{0435}\u{0433}\u{043E}\u{0432}\u{043E}\u{0440}\ \u{044F}\u{0442}\u{043F}\u{043E}\u{0440}\u{0443}\u{0441}\u{0441}\u{043A}\ \u{0438}", &"b1abfaaepdrnnbgefbaDotcwatmq2g4l"), (&"\u{0050}\u{006F}\u{0072}\u{0071}\u{0075}\u{00E9}\u{006E}\u{006F}\u{0070}\ \u{0075}\u{0065}\u{0064}\u{0065}\u{006E}\u{0073}\u{0069}\u{006D}\u{0070}\ \u{006C}\u{0065}\u{006D}\u{0065}\u{006E}\u{0074}\u{0065}\u{0068}\u{0061}\ \u{0062}\u{006C}\u{0061}\u{0072}\u{0065}\u{006E}\u{0045}\u{0073}\u{0070}\ \u{0061}\u{00F1}\u{006F}\u{006C}", &"PorqunopuedensimplementehablarenEspaol-fmd56a"), (&"\u{0054}\u{1EA1}\u{0069}\u{0073}\u{0061}\u{006F}\u{0068}\u{1ECD}\u{006B}\ \u{0068}\u{00F4}\u{006E}\u{0067}\u{0074}\u{0068}\u{1EC3}\u{0063}\u{0068}\ \u{1EC9}\u{006E}\u{00F3}\u{0069}\u{0074}\u{0069}\u{1EBF}\u{006E}\u{0067}\ \u{0056}\u{0069}\u{1EC7}\u{0074}", &"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"), (&"\u{0033}\u{5E74}\u{0042}\u{7D44}\u{91D1}\u{516B}\u{5148}\u{751F}", &"3B-ww4c5e180e575a65lsy2b"), (&"\u{5B89}\u{5BA4}\u{5948}\u{7F8E}\u{6075}\u{002D}\u{0077}\u{0069}\u{0074}\ \u{0068}\u{002D}\u{0053}\u{0055}\u{0050}\u{0045}\u{0052}\u{002D}\u{004D}\ \u{004F}\u{004E}\u{004B}\u{0045}\u{0059}\u{0053}", &"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"), (&"\u{0048}\u{0065}\u{006C}\u{006C}\u{006F}\u{002D}\u{0041}\u{006E}\u{006F}\ \u{0074}\u{0068}\u{0065}\u{0072}\u{002D}\u{0057}\u{0061}\u{0079}\u{002D}\ \u{305D}\u{308C}\u{305E}\u{308C}\u{306E}\u{5834}\u{6240}", &"Hello-Another-Way--fc4qua05auwb3674vfr0b"), (&"\u{3072}\u{3068}\u{3064}\u{5C4B}\u{6839}\u{306E}\u{4E0B}\u{0032}", &"2-u9tlzr9756bt3uc0v"), (&"\u{004D}\u{0061}\u{006A}\u{0069}\u{3067}\u{004B}\u{006F}\u{0069}\u{3059}\ \u{308B}\u{0035}\u{79D2}\u{524D}", &"MajiKoi5-783gue6qz075azm5e"), (&"\u{30D1}\u{30D5}\u{30A3}\u{30FC}\u{0064}\u{0065}\u{30EB}\u{30F3}\u{30D0}", &"de-jg4avhby1noc0d"), (&"\u{305D}\u{306E}\u{30B9}\u{30D4}\u{30FC}\u{30C9}\u{3067}", &"d9juau41awczczp"), (&"\u{002D}\u{003E}\u{0020}\u{0024}\u{0031}\u{002E}\u{0030}\u{0030}\u{0020}\ \u{003C}\u{002D}", &"-> $1.00 <--"), // some real-life examples (&"académie-française", &"acadmie-franaise-npb1a"), (&"bücher", &"bcher-kva"), (&"république-numérique", &"rpublique-numrique-bwbm"), // some real-life TLD (&"бг", &"90ae"), (&"рф", &"p1ai"), (&"укр", &"j1amh"), (&"السعودية", &"mgberp4a5d4ar"), (&"امارات", &"mgbaam7a8h"), (&"مصر", &"wgbh1c"), (&"中国", &"fiqs8s"), (&"中國", &"fiqz9s"), (&"台湾", &"kprw13d"), (&"台灣", &"kpry57d"), (&"香港", &"j6w193g"), // other (&"", &""), (&"a", &"a-"), (&"0", &"0-"), (&"A", &"A-"), (&"é", &"9ca"), (&"\n", &"\n-"), ]; #[test] fn test_decode() { for t in TESTS { assert_eq!(decode(&t.1), Ok(t.0.into())); } } #[test] fn test_encode() { for t in TESTS { assert_eq!(encode(t.0).unwrap().to_lowercase(), t.1.to_lowercase()); } } #[test] fn test_fail_decode() { assert_eq!(decode(&"bcher-kva.ch"), Err(())); assert_eq!(decode(&"+"), Err(())); assert_eq!(decode(&"\\"), Err(())); assert_eq!(decode(&"é"), Err(())); assert_eq!(decode(&"99999999"), Err(())); }