summaryrefslogtreecommitdiffstats
path: root/vendor/icu_locid/src/zerovec.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_locid/src/zerovec.rs')
-rw-r--r--vendor/icu_locid/src/zerovec.rs132
1 files changed, 132 insertions, 0 deletions
diff --git a/vendor/icu_locid/src/zerovec.rs b/vendor/icu_locid/src/zerovec.rs
new file mode 100644
index 000000000..530d21499
--- /dev/null
+++ b/vendor/icu_locid/src/zerovec.rs
@@ -0,0 +1,132 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Documentation on zero-copy deserialization of locale types.
+//!
+//! [`Locale`] and [`LanguageIdentifier`] are highly structured types that cannot be directly
+//! stored in a zero-copy data structure, such as those provided by the [`zerovec`] crate.
+//! This page explains how to indirectly store these types in a [`zerovec`].
+//!
+//! There are two main use cases, which have different solutions:
+//!
+//! 1. **Lookup:** You need to locate a locale in a zero-copy vector, such as when querying a map.
+//! 2. **Obtain:** You have a locale stored in a zero-copy vector, and you need to obtain a proper
+//! [`Locale`] or [`LanguageIdentifier`] for use elsewhere in your program.
+//!
+//! # Lookup
+//!
+//! To perform lookup, store the stringified locale in a canonical BCP-47 form as a byte array,
+//! and then use [`Locale::strict_cmp()`] to perform an efficient, zero-allocation lookup.
+//!
+//! To produce more human-readable serialized output, you can use [`UnvalidatedStr`].
+//!
+//! ```
+//! use icu_locid::Locale;
+//! use zerovec::ule::UnvalidatedStr;
+//! use zerovec::ZeroMap;
+//!
+//! // ZeroMap from locales to integers
+//! let data: &[(&UnvalidatedStr, u32)] = &[
+//! ("de-DE-u-hc-h12".into(), 5),
+//! ("en-US-u-ca-buddhist".into(), 10),
+//! ("my-MM".into(), 15),
+//! ("sr-Cyrl-ME".into(), 20),
+//! ("zh-TW".into(), 25),
+//! ];
+//! let zm: ZeroMap<UnvalidatedStr, u32> = data.iter().copied().collect();
+//!
+//! // Get the value associated with a locale
+//! let loc: Locale = "en-US-u-ca-buddhist".parse().unwrap();
+//! let value = zm.get_copied_by(|uvstr| loc.strict_cmp(uvstr).reverse());
+//! assert_eq!(value, Some(10));
+//! ```
+//!
+//! # Obtain
+//!
+//! Obtaining a [`Locale`] or [`LanguageIdentifier`] is not generally a zero-copy operation, since
+//! both of these types may require memory allocation. If possible, architect your code such that
+//! you do not need to obtain a structured type.
+//!
+//! If you need the structured type, such as if you need to manipulate it in some way, there are two
+//! options: storing subtags, and storing a string for parsing.
+//!
+//! ## Storing Subtags
+//!
+//! If the data being stored only contains a limited number of subtags, you can store them as a
+//! tuple, and then construct the [`LanguageIdentifier`] externally.
+//!
+//! ```
+//! use icu_locid::subtags::{Language, Region, Script};
+//! use icu_locid::LanguageIdentifier;
+//! use icu_locid::{
+//! langid, subtags_language as language, subtags_region as region,
+//! subtags_script as script,
+//! };
+//! use zerovec::ZeroMap;
+//!
+//! // ZeroMap from integer to LSR (language-script-region)
+//! let zm: ZeroMap<u32, (Language, Option<Script>, Option<Region>)> = [
+//! (5, (language!("de"), None, Some(region!("DE")))),
+//! (10, (language!("en"), None, Some(region!("US")))),
+//! (15, (language!("my"), None, Some(region!("MM")))),
+//! (
+//! 20,
+//! (language!("sr"), Some(script!("Cyrl")), Some(region!("ME"))),
+//! ),
+//! (25, (language!("zh"), None, Some(region!("TW")))),
+//! ]
+//! .into_iter()
+//! .collect();
+//!
+//! // Construct a LanguageIdentifier from a tuple entry
+//! let lid: LanguageIdentifier =
+//! zm.get_copied(&25).expect("element is present").into();
+//!
+//! assert_eq!(lid, langid!("zh-TW"));
+//! ```
+//!
+//! ## Storing Strings
+//!
+//! If it is necessary to store and obtain an arbitrary locale, it is currently recommended to
+//! store a BCP-47 string and parse it when needed.
+//!
+//! Since the string is stored in an unparsed state, it is not safe to `unwrap` the result from
+//! `Locale::try_from_bytes()`. See [icu4x#831](https://github.com/unicode-org/icu4x/issues/831)
+//! for a discussion on potential data models that could ensure that the locale is valid during
+//! deserialization.
+//!
+//! As above, to produce more human-readable serialized output, you can use [`UnvalidatedStr`].
+//!
+//! ```
+//! use icu_locid::langid;
+//! use icu_locid::Locale;
+//! use zerovec::ule::UnvalidatedStr;
+//! use zerovec::ZeroMap;
+//!
+//! // ZeroMap from integer to locale string
+//! let data: &[(u32, &UnvalidatedStr)] = &[
+//! (5, "de-DE-u-hc-h12".into()),
+//! (10, "en-US-u-ca-buddhist".into()),
+//! (15, "my-MM".into()),
+//! (20, "sr-Cyrl-ME".into()),
+//! (25, "zh-TW".into()),
+//! (30, "INVALID".into()),
+//! ];
+//! let zm: ZeroMap<u32, UnvalidatedStr> = data.iter().copied().collect();
+//!
+//! // Construct a Locale by parsing the string.
+//! let value = zm.get(&25).expect("element is present");
+//! let loc = Locale::try_from_bytes(value);
+//! assert_eq!(loc, Ok(langid!("zh-TW").into()));
+//!
+//! // Invalid entries are fallible
+//! let err_value = zm.get(&30).expect("element is present");
+//! let err_loc = Locale::try_from_bytes(err_value);
+//! assert!(matches!(err_loc, Err(_)));
+//! ```
+//!
+//! [`Locale`]: crate::Locale
+//! [`Locale::strict_cmp()`]: crate::Locale::strict_cmp()
+//! [`LanguageIdentifier`]: crate::LanguageIdentifier
+//! [`UnvalidatedStr`]: zerovec::ule::UnvalidatedStr