From 4e8199b572f2035b7749cba276ece3a26630d23e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:18:21 +0200 Subject: Adding upstream version 1.67.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/icu_locid/src/zerovec.rs | 132 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 vendor/icu_locid/src/zerovec.rs (limited to 'vendor/icu_locid/src/zerovec.rs') diff --git a/vendor/icu_locid/src/zerovec.rs b/vendor/icu_locid/src/zerovec.rs new file mode 100644 index 000000000..530d21499 --- /dev/null +++ b/vendor/icu_locid/src/zerovec.rs @@ -0,0 +1,132 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Documentation on zero-copy deserialization of locale types. +//! +//! [`Locale`] and [`LanguageIdentifier`] are highly structured types that cannot be directly +//! stored in a zero-copy data structure, such as those provided by the [`zerovec`] crate. +//! This page explains how to indirectly store these types in a [`zerovec`]. +//! +//! There are two main use cases, which have different solutions: +//! +//! 1. **Lookup:** You need to locate a locale in a zero-copy vector, such as when querying a map. +//! 2. **Obtain:** You have a locale stored in a zero-copy vector, and you need to obtain a proper +//! [`Locale`] or [`LanguageIdentifier`] for use elsewhere in your program. +//! +//! # Lookup +//! +//! To perform lookup, store the stringified locale in a canonical BCP-47 form as a byte array, +//! and then use [`Locale::strict_cmp()`] to perform an efficient, zero-allocation lookup. +//! +//! To produce more human-readable serialized output, you can use [`UnvalidatedStr`]. +//! +//! ``` +//! use icu_locid::Locale; +//! use zerovec::ule::UnvalidatedStr; +//! use zerovec::ZeroMap; +//! +//! // ZeroMap from locales to integers +//! let data: &[(&UnvalidatedStr, u32)] = &[ +//! ("de-DE-u-hc-h12".into(), 5), +//! ("en-US-u-ca-buddhist".into(), 10), +//! ("my-MM".into(), 15), +//! ("sr-Cyrl-ME".into(), 20), +//! ("zh-TW".into(), 25), +//! ]; +//! let zm: ZeroMap = data.iter().copied().collect(); +//! +//! // Get the value associated with a locale +//! let loc: Locale = "en-US-u-ca-buddhist".parse().unwrap(); +//! let value = zm.get_copied_by(|uvstr| loc.strict_cmp(uvstr).reverse()); +//! assert_eq!(value, Some(10)); +//! ``` +//! +//! # Obtain +//! +//! Obtaining a [`Locale`] or [`LanguageIdentifier`] is not generally a zero-copy operation, since +//! both of these types may require memory allocation. If possible, architect your code such that +//! you do not need to obtain a structured type. +//! +//! If you need the structured type, such as if you need to manipulate it in some way, there are two +//! options: storing subtags, and storing a string for parsing. +//! +//! ## Storing Subtags +//! +//! If the data being stored only contains a limited number of subtags, you can store them as a +//! tuple, and then construct the [`LanguageIdentifier`] externally. +//! +//! ``` +//! use icu_locid::subtags::{Language, Region, Script}; +//! use icu_locid::LanguageIdentifier; +//! use icu_locid::{ +//! langid, subtags_language as language, subtags_region as region, +//! subtags_script as script, +//! }; +//! use zerovec::ZeroMap; +//! +//! // ZeroMap from integer to LSR (language-script-region) +//! let zm: ZeroMap, Option)> = [ +//! (5, (language!("de"), None, Some(region!("DE")))), +//! (10, (language!("en"), None, Some(region!("US")))), +//! (15, (language!("my"), None, Some(region!("MM")))), +//! ( +//! 20, +//! (language!("sr"), Some(script!("Cyrl")), Some(region!("ME"))), +//! ), +//! (25, (language!("zh"), None, Some(region!("TW")))), +//! ] +//! .into_iter() +//! .collect(); +//! +//! // Construct a LanguageIdentifier from a tuple entry +//! let lid: LanguageIdentifier = +//! zm.get_copied(&25).expect("element is present").into(); +//! +//! assert_eq!(lid, langid!("zh-TW")); +//! ``` +//! +//! ## Storing Strings +//! +//! If it is necessary to store and obtain an arbitrary locale, it is currently recommended to +//! store a BCP-47 string and parse it when needed. +//! +//! Since the string is stored in an unparsed state, it is not safe to `unwrap` the result from +//! `Locale::try_from_bytes()`. See [icu4x#831](https://github.com/unicode-org/icu4x/issues/831) +//! for a discussion on potential data models that could ensure that the locale is valid during +//! deserialization. +//! +//! As above, to produce more human-readable serialized output, you can use [`UnvalidatedStr`]. +//! +//! ``` +//! use icu_locid::langid; +//! use icu_locid::Locale; +//! use zerovec::ule::UnvalidatedStr; +//! use zerovec::ZeroMap; +//! +//! // ZeroMap from integer to locale string +//! let data: &[(u32, &UnvalidatedStr)] = &[ +//! (5, "de-DE-u-hc-h12".into()), +//! (10, "en-US-u-ca-buddhist".into()), +//! (15, "my-MM".into()), +//! (20, "sr-Cyrl-ME".into()), +//! (25, "zh-TW".into()), +//! (30, "INVALID".into()), +//! ]; +//! let zm: ZeroMap = data.iter().copied().collect(); +//! +//! // Construct a Locale by parsing the string. +//! let value = zm.get(&25).expect("element is present"); +//! let loc = Locale::try_from_bytes(value); +//! assert_eq!(loc, Ok(langid!("zh-TW").into())); +//! +//! // Invalid entries are fallible +//! let err_value = zm.get(&30).expect("element is present"); +//! let err_loc = Locale::try_from_bytes(err_value); +//! assert!(matches!(err_loc, Err(_))); +//! ``` +//! +//! [`Locale`]: crate::Locale +//! [`Locale::strict_cmp()`]: crate::Locale::strict_cmp() +//! [`LanguageIdentifier`]: crate::LanguageIdentifier +//! [`UnvalidatedStr`]: zerovec::ule::UnvalidatedStr -- cgit v1.2.3