summaryrefslogtreecommitdiffstats
path: root/vendor/icu_provider/src/helpers.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_provider/src/helpers.rs')
-rw-r--r--vendor/icu_provider/src/helpers.rs369
1 files changed, 369 insertions, 0 deletions
diff --git a/vendor/icu_provider/src/helpers.rs b/vendor/icu_provider/src/helpers.rs
new file mode 100644
index 000000000..8d499f7ef
--- /dev/null
+++ b/vendor/icu_provider/src/helpers.rs
@@ -0,0 +1,369 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Internal helper functions.
+
+use alloc::string::String;
+
+/// Prints a JSON-safe string to the output.
+pub fn escape_for_json<'o>(input: &str, output: &'o mut String) -> &'o mut String {
+ // From the ECMA-404 specification:
+ // "A string is a sequence of Unicode code points wrapped with quotation marks (U+0022).
+ // All code points may be placed within the quotation marks except for the code points
+ // that must be escaped: quotation mark (U+0022), reverse solidus (U+005C), and the
+ // control characters U+0000 to U+001F. There are two-character escape sequence
+ // representations of some characters."
+ for cp in input.chars() {
+ let str_to_append = match cp {
+ '\u{0000}' => "\\u0000",
+ '\u{0001}' => "\\u0001",
+ '\u{0002}' => "\\u0002",
+ '\u{0003}' => "\\u0003",
+ '\u{0004}' => "\\u0004",
+ '\u{0005}' => "\\u0005",
+ '\u{0006}' => "\\u0006",
+ '\u{0007}' => "\\u0007",
+ '\u{0008}' => "\\b",
+ '\u{0009}' => "\\t",
+ '\u{000A}' => "\\n",
+ '\u{000B}' => "\\u000B",
+ '\u{000C}' => "\\f",
+ '\u{000D}' => "\\r",
+ '\u{000E}' => "\\u000E",
+ '\u{000F}' => "\\u000F",
+ '\u{0010}' => "\\u0010",
+ '\u{0011}' => "\\u0011",
+ '\u{0012}' => "\\u0012",
+ '\u{0013}' => "\\u0013",
+ '\u{0014}' => "\\u0014",
+ '\u{0015}' => "\\u0015",
+ '\u{0016}' => "\\u0016",
+ '\u{0017}' => "\\u0017",
+ '\u{0018}' => "\\u0018",
+ '\u{0019}' => "\\u0019",
+ '\u{001A}' => "\\u001A",
+ '\u{001B}' => "\\u001B",
+ '\u{001C}' => "\\u001C",
+ '\u{001D}' => "\\u001D",
+ '\u{001E}' => "\\u001E",
+ '\u{001F}' => "\\u001F",
+ '\u{0022}' => "\\\"",
+ '\u{005C}' => "\\\\",
+ cp => {
+ output.push(cp);
+ continue;
+ }
+ };
+ output.push_str(str_to_append);
+ }
+ output
+}
+
+#[test]
+fn test_escape_for_json() {
+ assert_eq!("", escape_for_json("", &mut String::new()));
+ assert_eq!("abc", escape_for_json("abc", &mut String::new()));
+ assert_eq!("ab\\nc", escape_for_json("ab\nc", &mut String::new()));
+ assert_eq!("ab\\\\c", escape_for_json("ab\\c", &mut String::new()));
+ assert_eq!("ab\\\"c", escape_for_json("ab\"c", &mut String::new()));
+ assert_eq!(
+ "ab\\u0000c",
+ escape_for_json("ab\u{0000}c", &mut String::new())
+ );
+ assert_eq!(
+ "ab\\u001Fc",
+ escape_for_json("ab\u{001F}c", &mut String::new())
+ );
+}
+
+/// Const function to compute the FxHash of a byte array with little-endian byte order.
+///
+/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our
+/// use case since the strings being hashed originate from a trusted source (the ICU4X
+/// components), and the hashes are computed at compile time, so we can check for collisions.
+///
+/// We could have considered a SHA or other cryptographic hash function. However, we are using
+/// FxHash because:
+///
+/// 1. There is precedent for this algorithm in Rust
+/// 2. The algorithm is easy to implement as a const function
+/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree
+/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits,
+/// such that truncation would be required in order to fit into a u32, partially reducing
+/// the benefit of a cryptographically secure algorithm
+// The indexing operations in this function have been reviewed in detail and won't panic.
+#[allow(clippy::indexing_slicing)]
+pub const fn fxhash_32(bytes: &[u8], ignore_leading: usize, ignore_trailing: usize) -> u32 {
+ // This code is adapted from https://github.com/rust-lang/rustc-hash,
+ // whose license text is reproduced below.
+ //
+ // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+ // file at the top-level directory of this distribution and at
+ // http://rust-lang.org/COPYRIGHT.
+ //
+ // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+ // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+ // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+ // option. This file may not be copied, modified, or distributed
+ // except according to those terms.
+
+ if ignore_leading + ignore_trailing >= bytes.len() {
+ return 0;
+ }
+
+ #[inline]
+ const fn hash_word_32(mut hash: u32, word: u32) -> u32 {
+ const ROTATE: u32 = 5;
+ const SEED32: u32 = 0x9e_37_79_b9;
+ hash = hash.rotate_left(ROTATE);
+ hash ^= word;
+ hash = hash.wrapping_mul(SEED32);
+ hash
+ }
+
+ let mut cursor = ignore_leading;
+ let end = bytes.len() - ignore_trailing;
+ let mut hash = 0;
+
+ while end - cursor >= 4 {
+ let word = u32::from_le_bytes([
+ bytes[cursor],
+ bytes[cursor + 1],
+ bytes[cursor + 2],
+ bytes[cursor + 3],
+ ]);
+ hash = hash_word_32(hash, word);
+ cursor += 4;
+ }
+
+ if end - cursor >= 2 {
+ let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]);
+ hash = hash_word_32(hash, word as u32);
+ cursor += 2;
+ }
+
+ if end - cursor >= 1 {
+ hash = hash_word_32(hash, bytes[cursor] as u32);
+ }
+
+ hash
+}
+
+#[test]
+fn test_hash_word_32() {
+ assert_eq!(0, fxhash_32(b"", 0, 0));
+ assert_eq!(0, fxhash_32(b"a", 1, 0));
+ assert_eq!(0, fxhash_32(b"a", 0, 1));
+ assert_eq!(0, fxhash_32(b"a", 0, 10));
+ assert_eq!(0, fxhash_32(b"a", 10, 0));
+ assert_eq!(0, fxhash_32(b"a", 1, 1));
+ assert_eq!(0xF3051F19, fxhash_32(b"a", 0, 0));
+ assert_eq!(0x2F9DF119, fxhash_32(b"ab", 0, 0));
+ assert_eq!(0xCB1D9396, fxhash_32(b"abc", 0, 0));
+ assert_eq!(0x8628F119, fxhash_32(b"abcd", 0, 0));
+ assert_eq!(0xBEBDB56D, fxhash_32(b"abcde", 0, 0));
+ assert_eq!(0x1CE8476D, fxhash_32(b"abcdef", 0, 0));
+ assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg", 0, 0));
+ assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh", 0, 0));
+ assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi", 0, 0));
+
+ assert_eq!(
+ fxhash_32(crate::tagged!("props/sc=Khmr@1").as_bytes(), 0, 0),
+ fxhash_32(crate::tagged!("props/sc=Samr@1").as_bytes(), 0, 0)
+ );
+
+ assert_ne!(
+ fxhash_32(
+ crate::tagged!("props/sc=Khmr@1").as_bytes(),
+ crate::leading_tag!().len(),
+ crate::trailing_tag!().len()
+ ),
+ fxhash_32(
+ crate::tagged!("props/sc=Samr@1").as_bytes(),
+ crate::leading_tag!().len(),
+ crate::trailing_tag!().len()
+ )
+ );
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! gen_any_buffer_docs {
+ (ANY, $krate:path, $see_also:path) => {
+ concat!(
+ "Creates a new instance using an [`AnyProvider`](",
+ stringify!($krate),
+ "::AnyProvider).\n\n",
+ "For details on the behavior of this function, see: [`",
+ stringify!($see_also),
+ "`]\n\n",
+ "[📚 Help choosing a constructor](",
+ stringify!($krate),
+ "::constructors)",
+ )
+ };
+ (BUFFER, $krate:path, $see_also:path) => {
+ concat!(
+ "✨ **Enabled with the `\"serde\"` feature.**\n\n",
+ "Creates a new instance using a [`BufferProvider`](",
+ stringify!($krate),
+ "::BufferProvider).\n\n",
+ "For details on the behavior of this function, see: [`",
+ stringify!($see_also),
+ "`]\n\n",
+ "[📚 Help choosing a constructor](",
+ stringify!($krate),
+ "::constructors)",
+ )
+ };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! gen_any_buffer_constructors {
+ (locale: skip, options: skip, error: $error_ty:path) => {
+ $crate::gen_any_buffer_constructors!(
+ locale: skip,
+ options: skip,
+ error: $error_ty,
+ functions: [
+ Self::try_new_unstable,
+ try_new_with_any_provider,
+ try_new_with_buffer_provider
+ ]
+ );
+ };
+ (locale: skip, options: skip, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => {
+ #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)]
+ pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized)) -> Result<Self, $error_ty> {
+ use $crate::AsDowncastingAnyProvider;
+ $f1(&provider.as_downcasting())
+ }
+ #[cfg(feature = "serde")]
+ #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)]
+ pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized)) -> Result<Self, $error_ty> {
+ use $crate::AsDeserializingBufferProvider;
+ $f1(&provider.as_deserializing())
+ }
+ };
+
+ (locale: skip, $options_arg:ident: $options_ty:path, error: $error_ty:path) => {
+ $crate::gen_any_buffer_constructors!(
+ locale: skip,
+ $options_arg: $options_ty,
+ error: $error_ty,
+ functions: [
+ Self::try_new_unstable,
+ try_new_with_any_provider,
+ try_new_with_buffer_provider
+ ]
+ );
+ };
+ (locale: skip, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => {
+ #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)]
+ pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> Result<Self, $error_ty> {
+ use $crate::AsDowncastingAnyProvider;
+ $f1(&provider.as_downcasting(), $options_arg)
+ }
+ #[cfg(feature = "serde")]
+ #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)]
+ pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> Result<Self, $error_ty> {
+ use $crate::AsDeserializingBufferProvider;
+ $f1(&provider.as_deserializing(), $options_arg)
+ }
+ };
+
+ (locale: skip, options: skip, result: $result_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => {
+ #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)]
+ pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized)) -> $result_ty {
+ use $crate::AsDowncastingAnyProvider;
+ $f1(&provider.as_downcasting())
+ }
+ #[cfg(feature = "serde")]
+ #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)]
+ pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized)) -> $result_ty {
+ use $crate::AsDeserializingBufferProvider;
+ $f1(&provider.as_deserializing())
+ }
+ };
+
+ (locale: include, options: skip, error: $error_ty:path) => {
+ $crate::gen_any_buffer_constructors!(
+ locale: include,
+ options: skip,
+ error: $error_ty,
+ functions: [
+ Self::try_new_unstable,
+ try_new_with_any_provider,
+ try_new_with_buffer_provider
+ ]
+ );
+ };
+ (locale: include, options: skip, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => {
+ #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)]
+ pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale) -> Result<Self, $error_ty> {
+ use $crate::AsDowncastingAnyProvider;
+ $f1(&provider.as_downcasting(), locale)
+ }
+ #[cfg(feature = "serde")]
+ #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)]
+ pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale) -> Result<Self, $error_ty> {
+ use $crate::AsDeserializingBufferProvider;
+ $f1(&provider.as_deserializing(), locale)
+ }
+ };
+
+ (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path) => {
+ $crate::gen_any_buffer_constructors!(
+ locale: include,
+ $config_arg: $config_ty,
+ $options_arg: $options_ty,
+ error: $error_ty,
+ functions: [
+ Self::try_new_unstable,
+ try_new_with_any_provider,
+ try_new_with_buffer_provider
+ ]
+ );
+ };
+ (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => {
+ #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)]
+ pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result<Self, $error_ty> {
+ use $crate::AsDowncastingAnyProvider;
+ $f1(&provider.as_downcasting(), locale, $config_arg, $options_arg)
+ }
+ #[cfg(feature = "serde")]
+ #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)]
+ pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result<Self, $error_ty> {
+ use $crate::AsDeserializingBufferProvider;
+ $f1(&provider.as_deserializing(), locale, $config_arg, $options_arg)
+ }
+ };
+
+ (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path) => {
+ $crate::gen_any_buffer_constructors!(
+ locale: include,
+ $options_arg: $options_ty,
+ error: $error_ty,
+ functions: [
+ Self::try_new_unstable,
+ try_new_with_any_provider,
+ try_new_with_buffer_provider
+ ]
+ );
+ };
+ (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => {
+ #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)]
+ pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result<Self, $error_ty> {
+ use $crate::AsDowncastingAnyProvider;
+ $f1(&provider.as_downcasting(), locale, $options_arg)
+ }
+ #[cfg(feature = "serde")]
+ #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)]
+ pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result<Self, $error_ty> {
+ use $crate::AsDeserializingBufferProvider;
+ $f1(&provider.as_deserializing(), locale, $options_arg)
+ }
+ };
+}