summaryrefslogtreecommitdiffstats
path: root/third_party/rust/tinystr
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/tinystr
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/tinystr')
-rw-r--r--third_party/rust/tinystr/.cargo-checksum.json1
-rw-r--r--third_party/rust/tinystr/Cargo.toml123
-rw-r--r--third_party/rust/tinystr/LICENSE44
-rw-r--r--third_party/rust/tinystr/README.md57
-rw-r--r--third_party/rust/tinystr/benches/common/mod.rs55
-rw-r--r--third_party/rust/tinystr/benches/construct.rs65
-rw-r--r--third_party/rust/tinystr/benches/overview.rs91
-rw-r--r--third_party/rust/tinystr/benches/read.rs34
-rw-r--r--third_party/rust/tinystr/benches/serde.rs37
-rw-r--r--third_party/rust/tinystr/src/ascii.rs984
-rw-r--r--third_party/rust/tinystr/src/asciibyte.rs145
-rw-r--r--third_party/rust/tinystr/src/databake.rs48
-rw-r--r--third_party/rust/tinystr/src/error.rs19
-rw-r--r--third_party/rust/tinystr/src/int_ops.rs315
-rw-r--r--third_party/rust/tinystr/src/lib.rs118
-rw-r--r--third_party/rust/tinystr/src/macros.rs32
-rw-r--r--third_party/rust/tinystr/src/serde.rs91
-rw-r--r--third_party/rust/tinystr/src/ule.rs116
-rw-r--r--third_party/rust/tinystr/src/unvalidated.rs104
-rw-r--r--third_party/rust/tinystr/tests/serde.rs39
20 files changed, 2518 insertions, 0 deletions
diff --git a/third_party/rust/tinystr/.cargo-checksum.json b/third_party/rust/tinystr/.cargo-checksum.json
new file mode 100644
index 0000000000..8608630b3f
--- /dev/null
+++ b/third_party/rust/tinystr/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"6beb3627cfc6a20263af586baaf821337375279823e2808ff2e729dd4c8a908a","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"b7b7a33a29865bc653a6696b079c1527ba61cd58a815fc792ed6fa5bfc680ad5","benches/common/mod.rs":"7e709fcabc2f96b10d3db8968ef3ea7fcc25e88211ce97b64b2126c7a0d7387a","benches/construct.rs":"f61e87e177dd994b700303b75c499a3e95183bb5ff6566889eb085825238dff4","benches/overview.rs":"cac8cfdcf397644ada6bc0bab8f92afc15071b0a64e3119aee4153b616eb9cff","benches/read.rs":"cbf349393a50eb90e7ba53906f98a689d585242292f867a37acf6842263af4d9","benches/serde.rs":"5c88866d08c07088b82dbd5472e6276c632d11e064417f5d8f2025a5ade867f0","src/ascii.rs":"7ad651b33c221a656a48eb44980d9b94bb0a681e79f4955648764968d3a5d62f","src/asciibyte.rs":"fa29de7403c0424c52c2f30bb47002b9abf4bd08b302c411ffe679d3decfb8de","src/databake.rs":"0dc32b7901f73a09357257a57f6150ba7197a88c489c2542081fcdc12336dca8","src/error.rs":"e0cbc912258d6e56aad148404d7cc3213d89736fa9ebe56c41f6cb0df7b2dd63","src/int_ops.rs":"c2be314d19dd41cf18fb3589901d7e58ee32fe3f764fb6a66b08a1e005336406","src/lib.rs":"97cd144b22a402746f4251afc061af075be9d3e6e111d3835f1c1e5cb465d469","src/macros.rs":"3fe76e258b0db2896284bcf4f50a4ac35b7efc542649b4c9f13c6e71c5957ae4","src/serde.rs":"a2c2120043e3334e1cca6fe0fd045ae035849fe6bb33ab0668804334606165dc","src/ule.rs":"52d76c91eeb73eb1885a0b1f40903fc68ea83f7a8b953a71b072eb7d242b3c63","src/unvalidated.rs":"6e41ce55d5f9a2a73f59467c5aff467d12f2b5f297695f109c09812595492efe","tests/serde.rs":"cf8cee82f731928375888d1b5e7e5e50368d3e16ce372fced230c9b1ee2a7451"},"package":"d5d0e245e80bdc9b4e5356fc45a72184abbc3861992603f515270e9340f5a219"} \ No newline at end of file
diff --git a/third_party/rust/tinystr/Cargo.toml b/third_party/rust/tinystr/Cargo.toml
new file mode 100644
index 0000000000..f82bb66b8c
--- /dev/null
+++ b/third_party/rust/tinystr/Cargo.toml
@@ -0,0 +1,123 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.66"
+name = "tinystr"
+version = "0.7.4"
+authors = ["The ICU4X Project Developers"]
+include = [
+ "data/**/*",
+ "src/**/*",
+ "examples/**/*",
+ "benches/**/*",
+ "tests/**/*",
+ "Cargo.toml",
+ "LICENSE",
+ "README.md",
+]
+description = "A small ASCII-only bounded length string representation."
+readme = "README.md"
+keywords = [
+ "string",
+ "str",
+ "small",
+ "tiny",
+ "no_std",
+]
+categories = ["data-structures"]
+license-file = "LICENSE"
+repository = "https://github.com/unicode-org/icu4x"
+
+[package.metadata.cargo-all-features]
+denylist = ["bench"]
+
+[package.metadata.docs.rs]
+all-features = true
+
+[package.metadata.workspaces]
+independent = true
+
+[lib]
+bench = false
+
+[[test]]
+name = "serde"
+required-features = ["serde"]
+
+[[bench]]
+name = "overview"
+harness = false
+
+[[bench]]
+name = "construct"
+harness = false
+required-features = ["bench"]
+
+[[bench]]
+name = "read"
+harness = false
+required-features = ["bench"]
+
+[[bench]]
+name = "serde"
+harness = false
+required-features = [
+ "bench",
+ "serde",
+]
+
+[dependencies.databake]
+version = "0.1.6"
+optional = true
+default-features = false
+
+[dependencies.displaydoc]
+version = "0.2.3"
+default-features = false
+
+[dependencies.serde]
+version = "1.0.123"
+features = ["alloc"]
+optional = true
+default-features = false
+
+[dependencies.zerovec]
+version = "0.10.0"
+optional = true
+default-features = false
+
+[dev-dependencies.bincode]
+version = "1.3"
+
+[dev-dependencies.postcard]
+version = "1.0.0"
+features = ["use-std"]
+default-features = false
+
+[dev-dependencies.rand]
+version = "0.8.5"
+features = ["small_rng"]
+
+[dev-dependencies.serde_json]
+version = "1.0"
+features = ["alloc"]
+default-features = false
+
+[features]
+alloc = []
+bench = []
+default = ["alloc"]
+std = []
+
+[target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies.criterion]
+version = "0.4"
diff --git a/third_party/rust/tinystr/LICENSE b/third_party/rust/tinystr/LICENSE
new file mode 100644
index 0000000000..9845aa5f48
--- /dev/null
+++ b/third_party/rust/tinystr/LICENSE
@@ -0,0 +1,44 @@
+UNICODE LICENSE V3
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 2020-2023 Unicode, Inc.
+
+NOTICE TO USER: Carefully read the following legal agreement. BY
+DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
+SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
+DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of data files and any associated documentation (the "Data Files") or
+software and any associated documentation (the "Software") to deal in the
+Data Files or Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Data Files or Software, and to permit persons to whom the
+Data Files or Software are furnished to do so, provided that either (a)
+this copyright and permission notice appear with all copies of the Data
+Files or Software, or (b) this copyright and permission notice appear in
+associated Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS.
+
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
+FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
+
+—
+
+Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
+ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
diff --git a/third_party/rust/tinystr/README.md b/third_party/rust/tinystr/README.md
new file mode 100644
index 0000000000..92b74c0d1b
--- /dev/null
+++ b/third_party/rust/tinystr/README.md
@@ -0,0 +1,57 @@
+# tinystr [![crates.io](https://img.shields.io/crates/v/tinystr)](https://crates.io/crates/tinystr)
+
+<!-- cargo-rdme start -->
+
+`tinystr` is a utility crate of the [`ICU4X`] project.
+
+It includes [`TinyAsciiStr`], a core API for representing small ASCII-only bounded length strings.
+
+It is optimized for operations on strings of size 8 or smaller. When use cases involve comparison
+and conversion of strings for lowercase/uppercase/titlecase, or checking
+numeric/alphabetic/alphanumeric, `TinyAsciiStr` is the edge performance library.
+
+## Examples
+
+```rust
+use tinystr::TinyAsciiStr;
+
+let s1: TinyAsciiStr<4> = "tEsT".parse().expect("Failed to parse.");
+
+assert_eq!(s1, "tEsT");
+assert_eq!(s1.to_ascii_uppercase(), "TEST");
+assert_eq!(s1.to_ascii_lowercase(), "test");
+assert_eq!(s1.to_ascii_titlecase(), "Test");
+assert!(s1.is_ascii_alphanumeric());
+assert!(!s1.is_ascii_numeric());
+
+let s2 = TinyAsciiStr::<8>::try_from_raw(*b"New York")
+ .expect("Failed to parse.");
+
+assert_eq!(s2, "New York");
+assert_eq!(s2.to_ascii_uppercase(), "NEW YORK");
+assert_eq!(s2.to_ascii_lowercase(), "new york");
+assert_eq!(s2.to_ascii_titlecase(), "New york");
+assert!(!s2.is_ascii_alphanumeric());
+```
+
+## Details
+
+When strings are of size 8 or smaller, the struct transforms the strings as `u32`/`u64` and uses
+bitmasking to provide basic string manipulation operations:
+* `is_ascii_numeric`
+* `is_ascii_alphabetic`
+* `is_ascii_alphanumeric`
+* `to_ascii_lowercase`
+* `to_ascii_uppercase`
+* `to_ascii_titlecase`
+* `PartialEq`
+
+`TinyAsciiStr` will fall back to `u8` character manipulation for strings of length greater than 8.
+
+[`ICU4X`]: ../icu/index.html
+
+<!-- cargo-rdme end -->
+
+## More Information
+
+For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
diff --git a/third_party/rust/tinystr/benches/common/mod.rs b/third_party/rust/tinystr/benches/common/mod.rs
new file mode 100644
index 0000000000..84090b6c2e
--- /dev/null
+++ b/third_party/rust/tinystr/benches/common/mod.rs
@@ -0,0 +1,55 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// This file was adapted from parts of https://github.com/zbraniecki/tinystr
+
+pub static STRINGS_4: &[&str] = &[
+ "US", "GB", "AR", "Hans", "CN", "AT", "PL", "FR", "AT", "Cyrl", "SR", "NO", "FR", "MK", "UK",
+];
+
+pub static STRINGS_8: &[&str] = &[
+ "Latn", "windows", "AR", "Hans", "macos", "AT", "pl", "FR", "en", "Cyrl", "SR", "NO", "419",
+ "und", "UK",
+];
+
+pub static STRINGS_16: &[&str] = &[
+ "Latn",
+ "windows",
+ "AR",
+ "Hans",
+ "macos",
+ "AT",
+ "infiniband",
+ "FR",
+ "en",
+ "Cyrl",
+ "FromIntegral",
+ "NO",
+ "419",
+ "MacintoshOSX2019",
+ "UK",
+];
+
+#[macro_export]
+macro_rules! bench_block {
+ ($c:expr, $name:expr, $action:ident) => {
+ let mut group4 = $c.benchmark_group(&format!("{}/4", $name));
+ group4.bench_function("String", $action!(String, STRINGS_4));
+ group4.bench_function("TinyAsciiStr<4>", $action!(TinyAsciiStr<4>, STRINGS_4));
+ group4.bench_function("TinyAsciiStr<8>", $action!(TinyAsciiStr<8>, STRINGS_4));
+ group4.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_4));
+ group4.finish();
+
+ let mut group8 = $c.benchmark_group(&format!("{}/8", $name));
+ group8.bench_function("String", $action!(String, STRINGS_8));
+ group8.bench_function("TinyAsciiStr<8>", $action!(TinyAsciiStr<8>, STRINGS_8));
+ group8.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_8));
+ group8.finish();
+
+ let mut group16 = $c.benchmark_group(&format!("{}/16", $name));
+ group16.bench_function("String", $action!(String, STRINGS_16));
+ group16.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_16));
+ group16.finish();
+ };
+}
diff --git a/third_party/rust/tinystr/benches/construct.rs b/third_party/rust/tinystr/benches/construct.rs
new file mode 100644
index 0000000000..19b18121ff
--- /dev/null
+++ b/third_party/rust/tinystr/benches/construct.rs
@@ -0,0 +1,65 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// This file was adapted from https://github.com/zbraniecki/tinystr
+
+mod common;
+use common::*;
+
+use criterion::black_box;
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::Bencher;
+use criterion::Criterion;
+
+use tinystr::TinyAsciiStr;
+
+fn construct_from_str(c: &mut Criterion) {
+ macro_rules! cfs {
+ ($r:ty, $inputs:expr) => {
+ |b: &mut Bencher| {
+ b.iter(|| {
+ for s in $inputs {
+ let _: $r = black_box(s.parse().unwrap());
+ }
+ })
+ }
+ };
+ }
+
+ bench_block!(c, "construct_from_str", cfs);
+}
+
+fn construct_from_bytes(c: &mut Criterion) {
+ macro_rules! cfu {
+ ($r:ty, $inputs:expr) => {
+ |b| {
+ let raw: Vec<&[u8]> = $inputs.iter().map(|s| s.as_bytes()).collect();
+ b.iter(move || {
+ for u in &raw {
+ let _ = black_box(<$r>::from_bytes(*u).unwrap());
+ }
+ })
+ }
+ };
+ }
+
+ let mut group4 = c.benchmark_group("construct_from_bytes/4");
+ group4.bench_function("TinyAsciiStr<4>", cfu!(TinyAsciiStr<4>, STRINGS_4));
+ group4.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_4));
+ group4.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_4));
+ group4.finish();
+
+ let mut group8 = c.benchmark_group("construct_from_bytes/8");
+ group8.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_8));
+ group8.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_8));
+ group8.finish();
+
+ let mut group16 = c.benchmark_group("construct_from_bytes/16");
+ group16.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_16));
+ group16.finish();
+}
+
+criterion_group!(benches, construct_from_str, construct_from_bytes,);
+criterion_main!(benches);
diff --git a/third_party/rust/tinystr/benches/overview.rs b/third_party/rust/tinystr/benches/overview.rs
new file mode 100644
index 0000000000..ccdd21ac40
--- /dev/null
+++ b/third_party/rust/tinystr/benches/overview.rs
@@ -0,0 +1,91 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod common;
+use common::*;
+
+use criterion::black_box;
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::Criterion;
+
+use tinystr::TinyAsciiStr;
+
+fn overview(c: &mut Criterion) {
+ let mut g = c.benchmark_group("overview");
+
+ g.bench_function("construct/TinyAsciiStr", |b| {
+ b.iter(|| {
+ for s in STRINGS_4 {
+ let _: TinyAsciiStr<4> = black_box(s).parse().unwrap();
+ let _: TinyAsciiStr<8> = black_box(s).parse().unwrap();
+ let _: TinyAsciiStr<16> = black_box(s).parse().unwrap();
+ }
+ for s in STRINGS_8 {
+ let _: TinyAsciiStr<8> = black_box(s).parse().unwrap();
+ let _: TinyAsciiStr<16> = black_box(s).parse().unwrap();
+ }
+ for s in STRINGS_16 {
+ let _: TinyAsciiStr<16> = black_box(s).parse().unwrap();
+ }
+ });
+ });
+
+ let parsed_ascii_4: Vec<TinyAsciiStr<4>> = STRINGS_4
+ .iter()
+ .map(|s| s.parse::<TinyAsciiStr<4>>().unwrap())
+ .collect();
+ let parsed_ascii_8: Vec<TinyAsciiStr<8>> = STRINGS_4
+ .iter()
+ .chain(STRINGS_8)
+ .map(|s| s.parse::<TinyAsciiStr<8>>().unwrap())
+ .collect();
+ let parsed_ascii_16: Vec<TinyAsciiStr<16>> = STRINGS_4
+ .iter()
+ .chain(STRINGS_8)
+ .chain(STRINGS_16)
+ .map(|s| s.parse::<TinyAsciiStr<16>>().unwrap())
+ .collect();
+
+ g.bench_function("read/TinyAsciiStr", |b| {
+ b.iter(|| {
+ let mut collector: usize = 0;
+ for t in black_box(&parsed_ascii_4) {
+ let s: &str = t;
+ collector += s.bytes().map(usize::from).sum::<usize>();
+ }
+ for t in black_box(&parsed_ascii_8) {
+ let s: &str = t;
+ collector += s.bytes().map(usize::from).sum::<usize>();
+ }
+ for t in black_box(&parsed_ascii_16) {
+ let s: &str = t;
+ collector += s.bytes().map(usize::from).sum::<usize>();
+ }
+ collector
+ });
+ });
+
+ g.bench_function("compare/TinyAsciiStr", |b| {
+ b.iter(|| {
+ let mut collector: usize = 0;
+ for ts in black_box(&parsed_ascii_4).windows(2) {
+ let o = ts[0].cmp(&ts[1]);
+ collector ^= o as usize;
+ }
+ for ts in black_box(&parsed_ascii_8).windows(2) {
+ let o = ts[0].cmp(&ts[1]);
+ collector ^= o as usize;
+ }
+ for ts in black_box(&parsed_ascii_16).windows(2) {
+ let o = ts[0].cmp(&ts[1]);
+ collector ^= o as usize;
+ }
+ collector
+ });
+ });
+}
+
+criterion_group!(benches, overview,);
+criterion_main!(benches);
diff --git a/third_party/rust/tinystr/benches/read.rs b/third_party/rust/tinystr/benches/read.rs
new file mode 100644
index 0000000000..793bb14f87
--- /dev/null
+++ b/third_party/rust/tinystr/benches/read.rs
@@ -0,0 +1,34 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod common;
+use common::*;
+
+use criterion::black_box;
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::Bencher;
+use criterion::Criterion;
+
+use tinystr::TinyAsciiStr;
+
+fn read(c: &mut Criterion) {
+ macro_rules! cfs {
+ ($r:ty, $inputs:expr) => {
+ |b: &mut Bencher| {
+ let parsed: Vec<$r> = $inputs.iter().map(|s| s.parse().unwrap()).collect();
+ b.iter(|| {
+ for s in &parsed {
+ let _: &str = black_box(&**s);
+ }
+ })
+ }
+ };
+ }
+
+ bench_block!(c, "read", cfs);
+}
+
+criterion_group!(benches, read,);
+criterion_main!(benches);
diff --git a/third_party/rust/tinystr/benches/serde.rs b/third_party/rust/tinystr/benches/serde.rs
new file mode 100644
index 0000000000..b0341221d0
--- /dev/null
+++ b/third_party/rust/tinystr/benches/serde.rs
@@ -0,0 +1,37 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod common;
+use common::*;
+
+use criterion::black_box;
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::Bencher;
+use criterion::Criterion;
+
+use tinystr::TinyAsciiStr;
+
+fn deserialize(c: &mut Criterion) {
+ macro_rules! cfs {
+ ($r:ty, $inputs:expr) => {
+ |b: &mut Bencher| {
+ let serialized: Vec<Vec<u8>> = $inputs
+ .iter()
+ .map(|s| postcard::to_stdvec(&s.parse::<$r>().unwrap()).unwrap())
+ .collect();
+ b.iter(|| {
+ for bytes in &serialized {
+ let _: Result<$r, _> = black_box(postcard::from_bytes(bytes));
+ }
+ })
+ }
+ };
+ }
+
+ bench_block!(c, "deserialize", cfs);
+}
+
+criterion_group!(benches, deserialize,);
+criterion_main!(benches);
diff --git a/third_party/rust/tinystr/src/ascii.rs b/third_party/rust/tinystr/src/ascii.rs
new file mode 100644
index 0000000000..9ab694f02a
--- /dev/null
+++ b/third_party/rust/tinystr/src/ascii.rs
@@ -0,0 +1,984 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::asciibyte::AsciiByte;
+use crate::int_ops::{Aligned4, Aligned8};
+use crate::TinyStrError;
+use core::fmt;
+use core::ops::Deref;
+use core::str::{self, FromStr};
+
+#[repr(transparent)]
+#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)]
+pub struct TinyAsciiStr<const N: usize> {
+ bytes: [AsciiByte; N],
+}
+
+impl<const N: usize> TinyAsciiStr<N> {
+ /// Creates a `TinyAsciiStr<N>` from the given byte slice.
+ /// `bytes` may contain at most `N` non-null ASCII bytes.
+ pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> {
+ Self::from_bytes_inner(bytes, 0, bytes.len(), false)
+ }
+
+ /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`.
+ ///
+ /// The byte array may contain trailing NUL bytes.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use tinystr::tinystr;
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// assert_eq!(
+ /// TinyAsciiStr::<3>::try_from_raw(*b"GB\0"),
+ /// Ok(tinystr!(3, "GB"))
+ /// );
+ /// assert_eq!(
+ /// TinyAsciiStr::<3>::try_from_raw(*b"USD"),
+ /// Ok(tinystr!(3, "USD"))
+ /// );
+ /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_)));
+ /// ```
+ pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> {
+ Self::from_bytes_inner(&raw, 0, N, true)
+ }
+
+ /// Equivalent to [`from_bytes(bytes[start..end])`](Self::from_bytes),
+ /// but callable in a `const` context (which range indexing is not).
+ pub const fn from_bytes_manual_slice(
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+ ) -> Result<Self, TinyStrError> {
+ Self::from_bytes_inner(bytes, start, end, false)
+ }
+
+ #[inline]
+ pub(crate) const fn from_bytes_inner(
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+ allow_trailing_null: bool,
+ ) -> Result<Self, TinyStrError> {
+ let len = end - start;
+ if len > N {
+ return Err(TinyStrError::TooLarge { max: N, len });
+ }
+
+ let mut out = [0; N];
+ let mut i = 0;
+ let mut found_null = false;
+ // Indexing is protected by TinyStrError::TooLarge
+ #[allow(clippy::indexing_slicing)]
+ while i < len {
+ let b = bytes[start + i];
+
+ if b == 0 {
+ found_null = true;
+ } else if b >= 0x80 {
+ return Err(TinyStrError::NonAscii);
+ } else if found_null {
+ // Error if there are contentful bytes after null
+ return Err(TinyStrError::ContainsNull);
+ }
+ out[i] = b;
+
+ i += 1;
+ }
+
+ if !allow_trailing_null && found_null {
+ // We found some trailing nulls, error
+ return Err(TinyStrError::ContainsNull);
+ }
+
+ Ok(Self {
+ // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
+ bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
+ })
+ }
+
+ // TODO: This function shadows the FromStr trait. Rename?
+ #[inline]
+ pub const fn from_str(s: &str) -> Result<Self, TinyStrError> {
+ Self::from_bytes_inner(s.as_bytes(), 0, s.len(), false)
+ }
+
+ #[inline]
+ pub const fn as_str(&self) -> &str {
+ // as_bytes is valid utf8
+ unsafe { str::from_utf8_unchecked(self.as_bytes()) }
+ }
+
+ #[inline]
+ #[must_use]
+ pub const fn len(&self) -> usize {
+ if N <= 4 {
+ Aligned4::from_ascii_bytes(&self.bytes).len()
+ } else if N <= 8 {
+ Aligned8::from_ascii_bytes(&self.bytes).len()
+ } else {
+ let mut i = 0;
+ #[allow(clippy::indexing_slicing)] // < N is safe
+ while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 {
+ i += 1
+ }
+ i
+ }
+ }
+
+ #[inline]
+ #[must_use]
+ pub const fn is_empty(&self) -> bool {
+ self.bytes[0] as u8 == AsciiByte::B0 as u8
+ }
+
+ #[inline]
+ #[must_use]
+ pub const fn as_bytes(&self) -> &[u8] {
+ // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`,
+ // and changing the length of that slice to self.len() < N is safe.
+ unsafe {
+ core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len())
+ }
+ }
+
+ #[inline]
+ #[must_use]
+ pub const fn all_bytes(&self) -> &[u8; N] {
+ // SAFETY: `self.bytes` has same size as [u8; N]
+ unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) }
+ }
+
+ #[inline]
+ #[must_use]
+ /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`.
+ ///
+ /// If `M < len()` the string gets truncated, otherwise only the
+ /// memory representation changes.
+ pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> {
+ let mut bytes = [0; M];
+ let mut i = 0;
+ // Indexing is protected by the loop guard
+ #[allow(clippy::indexing_slicing)]
+ while i < M && i < N {
+ bytes[i] = self.bytes[i] as u8;
+ i += 1;
+ }
+ // `self.bytes` only contains ASCII bytes, with no null bytes between
+ // ASCII characters, so this also holds for `bytes`.
+ unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) }
+ }
+
+ /// # Safety
+ /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes
+ /// between ASCII characters
+ #[must_use]
+ pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self {
+ Self {
+ bytes: AsciiByte::to_ascii_byte_array(&bytes),
+ }
+ }
+}
+
+macro_rules! check_is {
+ ($self:ident, $check_int:ident, $check_u8:ident) => {
+ if N <= 4 {
+ Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
+ } else if N <= 8 {
+ Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
+ } else {
+ let mut i = 0;
+ // Won't panic because self.bytes has length N
+ #[allow(clippy::indexing_slicing)]
+ while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
+ if !($self.bytes[i] as u8).$check_u8() {
+ return false;
+ }
+ i += 1;
+ }
+ true
+ }
+ };
+ ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => {
+ if N <= 4 {
+ Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
+ } else if N <= 8 {
+ Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
+ } else {
+ // Won't panic because N is > 8
+ if ($self.bytes[0] as u8).$check_u8_0_inv() {
+ return false;
+ }
+ let mut i = 1;
+ // Won't panic because self.bytes has length N
+ #[allow(clippy::indexing_slicing)]
+ while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
+ if ($self.bytes[i] as u8).$check_u8_1_inv() {
+ return false;
+ }
+ i += 1;
+ }
+ true
+ }
+ };
+ ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => {
+ if N <= 4 {
+ Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
+ } else if N <= 8 {
+ Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
+ } else {
+ // Won't panic because N is > 8
+ if !($self.bytes[0] as u8).$check_u8_0_inv() {
+ return false;
+ }
+ let mut i = 1;
+ // Won't panic because self.bytes has length N
+ #[allow(clippy::indexing_slicing)]
+ while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
+ if !($self.bytes[i] as u8).$check_u8_1_inv() {
+ return false;
+ }
+ i += 1;
+ }
+ true
+ }
+ };
+}
+
+impl<const N: usize> TinyAsciiStr<N> {
+ /// Checks if the value is composed of ASCII alphabetic characters:
+ ///
+ /// * U+0041 'A' ..= U+005A 'Z', or
+ /// * U+0061 'a' ..= U+007A 'z'.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
+ /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
+ ///
+ /// assert!(s1.is_ascii_alphabetic());
+ /// assert!(!s2.is_ascii_alphabetic());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn is_ascii_alphabetic(&self) -> bool {
+ check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic)
+ }
+
+ /// Checks if the value is composed of ASCII alphanumeric characters:
+ ///
+ /// * U+0041 'A' ..= U+005A 'Z', or
+ /// * U+0061 'a' ..= U+007A 'z', or
+ /// * U+0030 '0' ..= U+0039 '9'.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse.");
+ /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse.");
+ ///
+ /// assert!(s1.is_ascii_alphanumeric());
+ /// assert!(!s2.is_ascii_alphanumeric());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn is_ascii_alphanumeric(&self) -> bool {
+ check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric)
+ }
+
+ /// Checks if the value is composed of ASCII decimal digits:
+ ///
+ /// * U+0030 '0' ..= U+0039 '9'.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse.");
+ /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse.");
+ ///
+ /// assert!(s1.is_ascii_numeric());
+ /// assert!(!s2.is_ascii_numeric());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn is_ascii_numeric(&self) -> bool {
+ check_is!(self, is_ascii_numeric, is_ascii_digit)
+ }
+
+ /// Checks if the value is in ASCII lower case.
+ ///
+ /// All letter characters are checked for case. Non-letter characters are ignored.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
+ /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
+ /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
+ ///
+ /// assert!(!s1.is_ascii_lowercase());
+ /// assert!(s2.is_ascii_lowercase());
+ /// assert!(s3.is_ascii_lowercase());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn is_ascii_lowercase(&self) -> bool {
+ check_is!(
+ self,
+ is_ascii_lowercase,
+ !is_ascii_uppercase,
+ !is_ascii_uppercase
+ )
+ }
+
+ /// Checks if the value is in ASCII title case.
+ ///
+ /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase.
+ /// Non-letter characters are ignored.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
+ /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
+ /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
+ ///
+ /// assert!(!s1.is_ascii_titlecase());
+ /// assert!(s2.is_ascii_titlecase());
+ /// assert!(s3.is_ascii_titlecase());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn is_ascii_titlecase(&self) -> bool {
+ check_is!(
+ self,
+ is_ascii_titlecase,
+ !is_ascii_lowercase,
+ !is_ascii_uppercase
+ )
+ }
+
+ /// Checks if the value is in ASCII upper case.
+ ///
+ /// All letter characters are checked for case. Non-letter characters are ignored.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
+ /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
+ /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
+ ///
+ /// assert!(!s1.is_ascii_uppercase());
+ /// assert!(s2.is_ascii_uppercase());
+ /// assert!(!s3.is_ascii_uppercase());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn is_ascii_uppercase(&self) -> bool {
+ check_is!(
+ self,
+ is_ascii_uppercase,
+ !is_ascii_lowercase,
+ !is_ascii_lowercase
+ )
+ }
+
+ /// Checks if the value is composed of ASCII alphabetic lower case characters:
+ ///
+ /// * U+0061 'a' ..= U+007A 'z',
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
+ /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
+ /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
+ /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
+ /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
+ ///
+ /// assert!(!s1.is_ascii_alphabetic_lowercase());
+ /// assert!(!s2.is_ascii_alphabetic_lowercase());
+ /// assert!(!s3.is_ascii_alphabetic_lowercase());
+ /// assert!(s4.is_ascii_alphabetic_lowercase());
+ /// assert!(!s5.is_ascii_alphabetic_lowercase());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
+ check_is!(
+ self,
+ is_ascii_alphabetic_lowercase,
+ is_ascii_lowercase,
+ is_ascii_lowercase
+ )
+ }
+
+ /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
+ /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
+ /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
+ /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
+ /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
+ ///
+ /// assert!(s1.is_ascii_alphabetic_titlecase());
+ /// assert!(!s2.is_ascii_alphabetic_titlecase());
+ /// assert!(!s3.is_ascii_alphabetic_titlecase());
+ /// assert!(!s4.is_ascii_alphabetic_titlecase());
+ /// assert!(!s5.is_ascii_alphabetic_titlecase());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
+ check_is!(
+ self,
+ is_ascii_alphabetic_titlecase,
+ is_ascii_uppercase,
+ is_ascii_lowercase
+ )
+ }
+
+ /// Checks if the value is composed of ASCII alphabetic upper case characters:
+ ///
+ /// * U+0041 'A' ..= U+005A 'Z',
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
+ /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
+ /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
+ /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
+ /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
+ ///
+ /// assert!(!s1.is_ascii_alphabetic_uppercase());
+ /// assert!(!s2.is_ascii_alphabetic_uppercase());
+ /// assert!(!s3.is_ascii_alphabetic_uppercase());
+ /// assert!(s4.is_ascii_alphabetic_uppercase());
+ /// assert!(!s5.is_ascii_alphabetic_uppercase());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
+ check_is!(
+ self,
+ is_ascii_alphabetic_uppercase,
+ is_ascii_uppercase,
+ is_ascii_uppercase
+ )
+ }
+}
+
+macro_rules! to {
+ ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{
+ let mut i = 0;
+ if N <= 4 {
+ let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
+ // Won't panic because self.bytes has length N and aligned has length >= N
+ #[allow(clippy::indexing_slicing)]
+ while i < N {
+ $self.bytes[i] = aligned[i];
+ i += 1;
+ }
+ } else if N <= 8 {
+ let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
+ // Won't panic because self.bytes has length N and aligned has length >= N
+ #[allow(clippy::indexing_slicing)]
+ while i < N {
+ $self.bytes[i] = aligned[i];
+ i += 1;
+ }
+ } else {
+ // Won't panic because self.bytes has length N
+ #[allow(clippy::indexing_slicing)]
+ while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
+ // SAFETY: AsciiByte is repr(u8) and has same size as u8
+ unsafe {
+ $self.bytes[i] = core::mem::transmute(
+ ($self.bytes[i] as u8).$later_char_to()
+ );
+ }
+ i += 1;
+ }
+ // SAFETY: AsciiByte is repr(u8) and has same size as u8
+ $(
+ $self.bytes[0] = unsafe {
+ core::mem::transmute(($self.bytes[0] as u8).$first_char_to())
+ };
+ )?
+ }
+ $self
+ }};
+}
+
+impl<const N: usize> TinyAsciiStr<N> {
+ /// Converts this type to its ASCII lower case equivalent in-place.
+ ///
+ /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse.");
+ ///
+ /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3");
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn to_ascii_lowercase(mut self) -> Self {
+ to!(self, to_ascii_lowercase, to_ascii_lowercase)
+ }
+
+ /// Converts this type to its ASCII title case equivalent in-place.
+ ///
+ /// The first character is converted to ASCII uppercase; the remaining characters
+ /// are converted to ASCII lowercase.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
+ ///
+ /// assert_eq!(&*s1.to_ascii_titlecase(), "Test");
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn to_ascii_titlecase(mut self) -> Self {
+ to!(
+ self,
+ to_ascii_titlecase,
+ to_ascii_lowercase,
+ to_ascii_uppercase
+ )
+ }
+
+ /// Converts this type to its ASCII upper case equivalent in-place.
+ ///
+ /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use tinystr::TinyAsciiStr;
+ ///
+ /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse.");
+ ///
+ /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3");
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn to_ascii_uppercase(mut self) -> Self {
+ to!(self, to_ascii_uppercase, to_ascii_uppercase)
+ }
+}
+
+impl<const N: usize> fmt::Debug for TinyAsciiStr<N> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self.as_str(), f)
+ }
+}
+
+impl<const N: usize> fmt::Display for TinyAsciiStr<N> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(self.as_str(), f)
+ }
+}
+
+impl<const N: usize> Deref for TinyAsciiStr<N> {
+ type Target = str;
+ #[inline]
+ fn deref(&self) -> &str {
+ self.as_str()
+ }
+}
+
+impl<const N: usize> FromStr for TinyAsciiStr<N> {
+ type Err = TinyStrError;
+ #[inline]
+ fn from_str(s: &str) -> Result<Self, TinyStrError> {
+ Self::from_str(s)
+ }
+}
+
+impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> {
+ fn eq(&self, other: &str) -> bool {
+ self.deref() == other
+ }
+}
+
+impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> {
+ fn eq(&self, other: &&str) -> bool {
+ self.deref() == *other
+ }
+}
+
+#[cfg(feature = "alloc")]
+impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> {
+ fn eq(&self, other: &alloc::string::String) -> bool {
+ self.deref() == other.deref()
+ }
+}
+
+#[cfg(feature = "alloc")]
+impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String {
+ fn eq(&self, other: &TinyAsciiStr<N>) -> bool {
+ self.deref() == other.deref()
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use rand::distributions::Distribution;
+ use rand::distributions::Standard;
+ use rand::rngs::SmallRng;
+ use rand::seq::SliceRandom;
+ use rand::SeedableRng;
+
+ const STRINGS: [&str; 26] = [
+ "Latn",
+ "laTn",
+ "windows",
+ "AR",
+ "Hans",
+ "macos",
+ "AT",
+ "infiniband",
+ "FR",
+ "en",
+ "Cyrl",
+ "FromIntegral",
+ "NO",
+ "419",
+ "MacintoshOSX2019",
+ "a3z",
+ "A3z",
+ "A3Z",
+ "a3Z",
+ "3A",
+ "3Z",
+ "3a",
+ "3z",
+ "@@[`{",
+ "UK",
+ "E12",
+ ];
+
+ fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> {
+ let mut rng = SmallRng::seed_from_u64(2022);
+ // Need to do this in 2 steps since the RNG is needed twice
+ let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap())
+ .take(num_strings)
+ .collect::<Vec<usize>>();
+ string_lengths
+ .iter()
+ .map(|len| {
+ Standard
+ .sample_iter(&mut rng)
+ .filter(|b: &u8| *b > 0 && *b < 0x80)
+ .take(*len)
+ .collect::<Vec<u8>>()
+ })
+ .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII"))
+ .collect()
+ }
+
+ fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2)
+ where
+ F1: Fn(&str) -> T,
+ F2: Fn(TinyAsciiStr<N>) -> T,
+ T: core::fmt::Debug + core::cmp::PartialEq,
+ {
+ for s in STRINGS
+ .into_iter()
+ .map(str::to_owned)
+ .chain(gen_strings(100, &[3, 4, 5, 8, 12]))
+ {
+ let t = match TinyAsciiStr::<N>::from_str(&s) {
+ Ok(t) => t,
+ Err(TinyStrError::TooLarge { .. }) => continue,
+ Err(e) => panic!("{}", e),
+ };
+ let expected = reference_f(&s);
+ let actual = tinystr_f(t);
+ assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
+ }
+ }
+
+ #[test]
+ fn test_is_ascii_alphabetic() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| s.chars().all(|c| c.is_ascii_alphabetic()),
+ |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_is_ascii_alphanumeric() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| s.chars().all(|c| c.is_ascii_alphanumeric()),
+ |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_is_ascii_numeric() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| s.chars().all(|c| c.is_ascii_digit()),
+ |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_is_ascii_lowercase() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| {
+ s == TinyAsciiStr::<16>::from_str(s)
+ .unwrap()
+ .to_ascii_lowercase()
+ .as_str()
+ },
+ |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_is_ascii_titlecase() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| {
+ s == TinyAsciiStr::<16>::from_str(s)
+ .unwrap()
+ .to_ascii_titlecase()
+ .as_str()
+ },
+ |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_is_ascii_uppercase() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| {
+ s == TinyAsciiStr::<16>::from_str(s)
+ .unwrap()
+ .to_ascii_uppercase()
+ .as_str()
+ },
+ |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_is_ascii_alphabetic_lowercase() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| {
+ // Check alphabetic
+ s.chars().all(|c| c.is_ascii_alphabetic()) &&
+ // Check lowercase
+ s == TinyAsciiStr::<16>::from_str(s)
+ .unwrap()
+ .to_ascii_lowercase()
+ .as_str()
+ },
+ |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_is_ascii_alphabetic_titlecase() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| {
+ // Check alphabetic
+ s.chars().all(|c| c.is_ascii_alphabetic()) &&
+ // Check titlecase
+ s == TinyAsciiStr::<16>::from_str(s)
+ .unwrap()
+ .to_ascii_titlecase()
+ .as_str()
+ },
+ |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_is_ascii_alphabetic_uppercase() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| {
+ // Check alphabetic
+ s.chars().all(|c| c.is_ascii_alphabetic()) &&
+ // Check uppercase
+ s == TinyAsciiStr::<16>::from_str(s)
+ .unwrap()
+ .to_ascii_uppercase()
+ .as_str()
+ },
+ |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_to_ascii_lowercase() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| {
+ s.chars()
+ .map(|c| c.to_ascii_lowercase())
+ .collect::<String>()
+ },
+ |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_to_ascii_titlecase() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| {
+ let mut r = s
+ .chars()
+ .map(|c| c.to_ascii_lowercase())
+ .collect::<String>();
+ // Safe because the string is nonempty and an ASCII string
+ unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() };
+ r
+ },
+ |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+
+ #[test]
+ fn test_to_ascii_uppercase() {
+ fn check<const N: usize>() {
+ check_operation(
+ |s| {
+ s.chars()
+ .map(|c| c.to_ascii_uppercase())
+ .collect::<String>()
+ },
+ |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(),
+ )
+ }
+ check::<2>();
+ check::<3>();
+ check::<4>();
+ check::<5>();
+ check::<8>();
+ check::<16>();
+ }
+}
diff --git a/third_party/rust/tinystr/src/asciibyte.rs b/third_party/rust/tinystr/src/asciibyte.rs
new file mode 100644
index 0000000000..f41a033414
--- /dev/null
+++ b/third_party/rust/tinystr/src/asciibyte.rs
@@ -0,0 +1,145 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#[repr(u8)]
+#[allow(dead_code)]
+#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)]
+pub enum AsciiByte {
+ B0 = 0,
+ B1 = 1,
+ B2 = 2,
+ B3 = 3,
+ B4 = 4,
+ B5 = 5,
+ B6 = 6,
+ B7 = 7,
+ B8 = 8,
+ B9 = 9,
+ B10 = 10,
+ B11 = 11,
+ B12 = 12,
+ B13 = 13,
+ B14 = 14,
+ B15 = 15,
+ B16 = 16,
+ B17 = 17,
+ B18 = 18,
+ B19 = 19,
+ B20 = 20,
+ B21 = 21,
+ B22 = 22,
+ B23 = 23,
+ B24 = 24,
+ B25 = 25,
+ B26 = 26,
+ B27 = 27,
+ B28 = 28,
+ B29 = 29,
+ B30 = 30,
+ B31 = 31,
+ B32 = 32,
+ B33 = 33,
+ B34 = 34,
+ B35 = 35,
+ B36 = 36,
+ B37 = 37,
+ B38 = 38,
+ B39 = 39,
+ B40 = 40,
+ B41 = 41,
+ B42 = 42,
+ B43 = 43,
+ B44 = 44,
+ B45 = 45,
+ B46 = 46,
+ B47 = 47,
+ B48 = 48,
+ B49 = 49,
+ B50 = 50,
+ B51 = 51,
+ B52 = 52,
+ B53 = 53,
+ B54 = 54,
+ B55 = 55,
+ B56 = 56,
+ B57 = 57,
+ B58 = 58,
+ B59 = 59,
+ B60 = 60,
+ B61 = 61,
+ B62 = 62,
+ B63 = 63,
+ B64 = 64,
+ B65 = 65,
+ B66 = 66,
+ B67 = 67,
+ B68 = 68,
+ B69 = 69,
+ B70 = 70,
+ B71 = 71,
+ B72 = 72,
+ B73 = 73,
+ B74 = 74,
+ B75 = 75,
+ B76 = 76,
+ B77 = 77,
+ B78 = 78,
+ B79 = 79,
+ B80 = 80,
+ B81 = 81,
+ B82 = 82,
+ B83 = 83,
+ B84 = 84,
+ B85 = 85,
+ B86 = 86,
+ B87 = 87,
+ B88 = 88,
+ B89 = 89,
+ B90 = 90,
+ B91 = 91,
+ B92 = 92,
+ B93 = 93,
+ B94 = 94,
+ B95 = 95,
+ B96 = 96,
+ B97 = 97,
+ B98 = 98,
+ B99 = 99,
+ B100 = 100,
+ B101 = 101,
+ B102 = 102,
+ B103 = 103,
+ B104 = 104,
+ B105 = 105,
+ B106 = 106,
+ B107 = 107,
+ B108 = 108,
+ B109 = 109,
+ B110 = 110,
+ B111 = 111,
+ B112 = 112,
+ B113 = 113,
+ B114 = 114,
+ B115 = 115,
+ B116 = 116,
+ B117 = 117,
+ B118 = 118,
+ B119 = 119,
+ B120 = 120,
+ B121 = 121,
+ B122 = 122,
+ B123 = 123,
+ B124 = 124,
+ B125 = 125,
+ B126 = 126,
+ B127 = 127,
+}
+
+impl AsciiByte {
+ // Convert [u8; N] to [AsciiByte; N]
+ #[inline]
+ pub const unsafe fn to_ascii_byte_array<const N: usize>(bytes: &[u8; N]) -> [AsciiByte; N] {
+ *(bytes as *const [u8; N] as *const [AsciiByte; N])
+ }
+}
diff --git a/third_party/rust/tinystr/src/databake.rs b/third_party/rust/tinystr/src/databake.rs
new file mode 100644
index 0000000000..0b127f97c7
--- /dev/null
+++ b/third_party/rust/tinystr/src/databake.rs
@@ -0,0 +1,48 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::TinyAsciiStr;
+use crate::UnvalidatedTinyAsciiStr;
+use databake::*;
+
+impl<const N: usize> Bake for TinyAsciiStr<N> {
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("tinystr");
+ let string = self.as_str();
+ quote! {
+ tinystr::tinystr!(#N, #string)
+ }
+ }
+}
+
+impl<const N: usize> databake::Bake for UnvalidatedTinyAsciiStr<N> {
+ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+ match self.try_into_tinystr() {
+ Ok(tiny) => {
+ let tiny = tiny.bake(env);
+ databake::quote! {
+ #tiny.to_unvalidated()
+ }
+ }
+ Err(_) => {
+ let bytes = self.0.bake(env);
+ env.insert("tinystr");
+ databake::quote! {
+ tinystr::UnvalidatedTinyAsciiStr::from_bytes_unchecked(*#bytes)
+ }
+ }
+ }
+ }
+}
+
+#[test]
+fn test() {
+ test_bake!(TinyAsciiStr<10>, const: crate::tinystr!(10usize, "foo"), tinystr);
+}
+
+#[test]
+fn test_unvalidated() {
+ test_bake!(UnvalidatedTinyAsciiStr<10>, const: crate::tinystr!(10usize, "foo").to_unvalidated(), tinystr);
+ test_bake!(UnvalidatedTinyAsciiStr<3>, const: crate::UnvalidatedTinyAsciiStr::from_bytes_unchecked(*b"AB\xCD"), tinystr);
+}
diff --git a/third_party/rust/tinystr/src/error.rs b/third_party/rust/tinystr/src/error.rs
new file mode 100644
index 0000000000..7910f8b484
--- /dev/null
+++ b/third_party/rust/tinystr/src/error.rs
@@ -0,0 +1,19 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use displaydoc::Display;
+
+#[cfg(feature = "std")]
+impl std::error::Error for TinyStrError {}
+
+#[derive(Display, Debug, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum TinyStrError {
+ #[displaydoc("found string of larger length {len} when constructing string of length {max}")]
+ TooLarge { max: usize, len: usize },
+ #[displaydoc("tinystr types do not support strings with null bytes")]
+ ContainsNull,
+ #[displaydoc("attempted to construct TinyStrAuto from a non-ascii string")]
+ NonAscii,
+}
diff --git a/third_party/rust/tinystr/src/int_ops.rs b/third_party/rust/tinystr/src/int_ops.rs
new file mode 100644
index 0000000000..102b052f22
--- /dev/null
+++ b/third_party/rust/tinystr/src/int_ops.rs
@@ -0,0 +1,315 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::asciibyte::AsciiByte;
+
+/// Internal helper struct that performs operations on aligned integers.
+/// Supports strings up to 4 bytes long.
+#[repr(transparent)]
+pub struct Aligned4(u32);
+
+impl Aligned4 {
+ /// # Panics
+ /// Panics if N is greater than 4
+ #[inline]
+ pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self {
+ let mut bytes = [0; 4];
+ let mut i = 0;
+ // The function documentation defines when panics may occur
+ #[allow(clippy::indexing_slicing)]
+ while i < N {
+ bytes[i] = src[i];
+ i += 1;
+ }
+ Self(u32::from_ne_bytes(bytes))
+ }
+
+ #[inline]
+ pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
+ Self::from_bytes::<N>(unsafe { core::mem::transmute(src) })
+ }
+
+ #[inline]
+ pub const fn to_bytes(&self) -> [u8; 4] {
+ self.0.to_ne_bytes()
+ }
+
+ #[inline]
+ pub const fn to_ascii_bytes(&self) -> [AsciiByte; 4] {
+ unsafe { core::mem::transmute(self.to_bytes()) }
+ }
+
+ pub const fn len(&self) -> usize {
+ let word = self.0;
+ #[cfg(target_endian = "little")]
+ let len = (4 - word.leading_zeros() / 8) as usize;
+ #[cfg(target_endian = "big")]
+ let len = (4 - word.trailing_zeros() / 8) as usize;
+ len
+ }
+
+ pub const fn is_ascii_alphabetic(&self) -> bool {
+ let word = self.0;
+ // Each of the following bitmasks set *the high bit* (0x8) to 0 for valid and 1 for invalid.
+ // `mask` sets all NUL bytes to 0.
+ let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+ // `lower` converts the string to lowercase. It may also change the value of non-alpha
+ // characters, but this does not matter for the alphabetic test that follows.
+ let lower = word | 0x2020_2020;
+ // `alpha` sets all alphabetic bytes to 0. We only need check for lowercase characters.
+ let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
+ // The overall string is valid if every character passes at least one test.
+ // We performed two tests here: non-NUL (`mask`) and alphabetic (`alpha`).
+ (alpha & mask) == 0
+ }
+
+ pub const fn is_ascii_alphanumeric(&self) -> bool {
+ let word = self.0;
+ // See explanatory comments in is_ascii_alphabetic
+ let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+ let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
+ let lower = word | 0x2020_2020;
+ let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
+ (alpha & numeric & mask) == 0
+ }
+
+ pub const fn is_ascii_numeric(&self) -> bool {
+ let word = self.0;
+ // See explanatory comments in is_ascii_alphabetic
+ let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+ let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
+ (numeric & mask) == 0
+ }
+
+ pub const fn is_ascii_lowercase(&self) -> bool {
+ let word = self.0;
+ // For efficiency, this function tests for an invalid string rather than a valid string.
+ // A string is ASCII lowercase iff it contains no uppercase ASCII characters.
+ // `invalid_case` sets all uppercase ASCII characters to 0 and all others to 1.
+ let invalid_case = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
+ // The string is valid if it contains no invalid characters (if all high bits are 1).
+ (invalid_case & 0x8080_8080) == 0x8080_8080
+ }
+
+ pub const fn is_ascii_titlecase(&self) -> bool {
+ let word = self.0;
+ // See explanatory comments in is_ascii_lowercase
+ let invalid_case = if cfg!(target_endian = "little") {
+ !(word + 0x3f3f_3f1f) | (word + 0x2525_2505)
+ } else {
+ !(word + 0x1f3f_3f3f) | (word + 0x0525_2525)
+ };
+ (invalid_case & 0x8080_8080) == 0x8080_8080
+ }
+
+ pub const fn is_ascii_uppercase(&self) -> bool {
+ let word = self.0;
+ // See explanatory comments in is_ascii_lowercase
+ let invalid_case = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
+ (invalid_case & 0x8080_8080) == 0x8080_8080
+ }
+
+ pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
+ let word = self.0;
+ // `mask` sets all NUL bytes to 0.
+ let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+ // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
+ let lower_alpha = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
+ // The overall string is valid if every character passes at least one test.
+ // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
+ (lower_alpha & mask) == 0
+ }
+
+ pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
+ let word = self.0;
+ // See explanatory comments in is_ascii_alphabetic_lowercase
+ let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+ let title_case = if cfg!(target_endian = "little") {
+ !(word + 0x1f1f_1f3f) | (word + 0x0505_0525)
+ } else {
+ !(word + 0x3f1f_1f1f) | (word + 0x2505_0505)
+ };
+ (title_case & mask) == 0
+ }
+
+ pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
+ let word = self.0;
+ // See explanatory comments in is_ascii_alphabetic_lowercase
+ let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
+ let upper_alpha = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
+ (upper_alpha & mask) == 0
+ }
+
+ pub const fn to_ascii_lowercase(&self) -> Self {
+ let word = self.0;
+ let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2);
+ Self(result)
+ }
+
+ pub const fn to_ascii_titlecase(&self) -> Self {
+ let word = self.0.to_le();
+ let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2;
+ let result = (word | mask) & !(0x20 & mask);
+ Self(u32::from_le(result))
+ }
+
+ pub const fn to_ascii_uppercase(&self) -> Self {
+ let word = self.0;
+ let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2);
+ Self(result)
+ }
+}
+
+/// Internal helper struct that performs operations on aligned integers.
+/// Supports strings up to 8 bytes long.
+#[repr(transparent)]
+pub struct Aligned8(u64);
+
+impl Aligned8 {
+ /// # Panics
+ /// Panics if N is greater than 8
+ #[inline]
+ pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self {
+ let mut bytes = [0; 8];
+ let mut i = 0;
+ // The function documentation defines when panics may occur
+ #[allow(clippy::indexing_slicing)]
+ while i < N {
+ bytes[i] = src[i];
+ i += 1;
+ }
+ Self(u64::from_ne_bytes(bytes))
+ }
+
+ #[inline]
+ pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
+ Self::from_bytes::<N>(unsafe { core::mem::transmute(src) })
+ }
+
+ #[inline]
+ pub const fn to_bytes(&self) -> [u8; 8] {
+ self.0.to_ne_bytes()
+ }
+
+ #[inline]
+ pub const fn to_ascii_bytes(&self) -> [AsciiByte; 8] {
+ unsafe { core::mem::transmute(self.to_bytes()) }
+ }
+
+ pub const fn len(&self) -> usize {
+ let word = self.0;
+ #[cfg(target_endian = "little")]
+ let len = (8 - word.leading_zeros() / 8) as usize;
+ #[cfg(target_endian = "big")]
+ let len = (8 - word.trailing_zeros() / 8) as usize;
+ len
+ }
+
+ pub const fn is_ascii_alphabetic(&self) -> bool {
+ let word = self.0;
+ let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+ let lower = word | 0x2020_2020_2020_2020;
+ let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
+ (alpha & mask) == 0
+ }
+
+ pub const fn is_ascii_alphanumeric(&self) -> bool {
+ let word = self.0;
+ let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+ let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
+ let lower = word | 0x2020_2020_2020_2020;
+ let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
+ (alpha & numeric & mask) == 0
+ }
+
+ pub const fn is_ascii_numeric(&self) -> bool {
+ let word = self.0;
+ let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+ let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
+ (numeric & mask) == 0
+ }
+
+ pub const fn is_ascii_lowercase(&self) -> bool {
+ let word = self.0;
+ let invalid_case = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
+ (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
+ }
+
+ pub const fn is_ascii_titlecase(&self) -> bool {
+ let word = self.0;
+ let invalid_case = if cfg!(target_endian = "little") {
+ !(word + 0x3f3f_3f3f_3f3f_3f1f) | (word + 0x2525_2525_2525_2505)
+ } else {
+ !(word + 0x1f3f_3f3f_3f3f_3f3f) | (word + 0x0525_2525_2525_2525)
+ };
+ (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
+ }
+
+ pub const fn is_ascii_uppercase(&self) -> bool {
+ let word = self.0;
+ let invalid_case = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
+ (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
+ }
+
+ pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
+ let word = self.0;
+ // `mask` sets all NUL bytes to 0.
+ let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+ // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
+ let lower_alpha = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
+ // The overall string is valid if every character passes at least one test.
+ // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
+ (lower_alpha & mask) == 0
+ }
+
+ pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
+ let word = self.0;
+ // See explanatory comments in is_ascii_alphabetic_lowercase
+ let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+ let title_case = if cfg!(target_endian = "little") {
+ !(word + 0x1f1f_1f1f_1f1f_1f3f) | (word + 0x0505_0505_0505_0525)
+ } else {
+ !(word + 0x3f1f_1f1f_1f1f_1f1f) | (word + 0x2505_0505_0505_0505)
+ };
+ (title_case & mask) == 0
+ }
+
+ pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
+ let word = self.0;
+ // See explanatory comments in is_ascii_alphabetic_lowercase
+ let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
+ let upper_alpha = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
+ (upper_alpha & mask) == 0
+ }
+
+ pub const fn to_ascii_lowercase(&self) -> Self {
+ let word = self.0;
+ let result = word
+ | (((word + 0x3f3f_3f3f_3f3f_3f3f)
+ & !(word + 0x2525_2525_2525_2525)
+ & 0x8080_8080_8080_8080)
+ >> 2);
+ Self(result)
+ }
+
+ pub const fn to_ascii_titlecase(&self) -> Self {
+ let word = self.0.to_le();
+ let mask = ((word + 0x3f3f_3f3f_3f3f_3f1f)
+ & !(word + 0x2525_2525_2525_2505)
+ & 0x8080_8080_8080_8080)
+ >> 2;
+ let result = (word | mask) & !(0x20 & mask);
+ Self(u64::from_le(result))
+ }
+
+ pub const fn to_ascii_uppercase(&self) -> Self {
+ let word = self.0;
+ let result = word
+ & !(((word + 0x1f1f_1f1f_1f1f_1f1f)
+ & !(word + 0x0505_0505_0505_0505)
+ & 0x8080_8080_8080_8080)
+ >> 2);
+ Self(result)
+ }
+}
diff --git a/third_party/rust/tinystr/src/lib.rs b/third_party/rust/tinystr/src/lib.rs
new file mode 100644
index 0000000000..3d13e95ea5
--- /dev/null
+++ b/third_party/rust/tinystr/src/lib.rs
@@ -0,0 +1,118 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! `tinystr` is a utility crate of the [`ICU4X`] project.
+//!
+//! It includes [`TinyAsciiStr`], a core API for representing small ASCII-only bounded length strings.
+//!
+//! It is optimized for operations on strings of size 8 or smaller. When use cases involve comparison
+//! and conversion of strings for lowercase/uppercase/titlecase, or checking
+//! numeric/alphabetic/alphanumeric, `TinyAsciiStr` is the edge performance library.
+//!
+//! # Examples
+//!
+//! ```rust
+//! use tinystr::TinyAsciiStr;
+//!
+//! let s1: TinyAsciiStr<4> = "tEsT".parse().expect("Failed to parse.");
+//!
+//! assert_eq!(s1, "tEsT");
+//! assert_eq!(s1.to_ascii_uppercase(), "TEST");
+//! assert_eq!(s1.to_ascii_lowercase(), "test");
+//! assert_eq!(s1.to_ascii_titlecase(), "Test");
+//! assert!(s1.is_ascii_alphanumeric());
+//! assert!(!s1.is_ascii_numeric());
+//!
+//! let s2 = TinyAsciiStr::<8>::try_from_raw(*b"New York")
+//! .expect("Failed to parse.");
+//!
+//! assert_eq!(s2, "New York");
+//! assert_eq!(s2.to_ascii_uppercase(), "NEW YORK");
+//! assert_eq!(s2.to_ascii_lowercase(), "new york");
+//! assert_eq!(s2.to_ascii_titlecase(), "New york");
+//! assert!(!s2.is_ascii_alphanumeric());
+//! ```
+//!
+//! # Details
+//!
+//! When strings are of size 8 or smaller, the struct transforms the strings as `u32`/`u64` and uses
+//! bitmasking to provide basic string manipulation operations:
+//! * `is_ascii_numeric`
+//! * `is_ascii_alphabetic`
+//! * `is_ascii_alphanumeric`
+//! * `to_ascii_lowercase`
+//! * `to_ascii_uppercase`
+//! * `to_ascii_titlecase`
+//! * `PartialEq`
+//!
+//! `TinyAsciiStr` will fall back to `u8` character manipulation for strings of length greater than 8.
+
+//!
+//! [`ICU4X`]: ../icu/index.html
+
+// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations
+#![cfg_attr(not(any(test, feature = "std")), no_std)]
+#![cfg_attr(
+ not(test),
+ deny(
+ clippy::indexing_slicing,
+ clippy::unwrap_used,
+ clippy::expect_used,
+ clippy::panic,
+ clippy::exhaustive_structs,
+ clippy::exhaustive_enums,
+ missing_debug_implementations,
+ )
+)]
+
+mod macros;
+
+mod ascii;
+mod asciibyte;
+mod error;
+mod int_ops;
+mod unvalidated;
+
+#[cfg(feature = "serde")]
+mod serde;
+
+#[cfg(feature = "databake")]
+mod databake;
+
+#[cfg(feature = "zerovec")]
+mod ule;
+
+#[cfg(any(feature = "serde", feature = "alloc"))]
+extern crate alloc;
+
+pub use ascii::TinyAsciiStr;
+pub use error::TinyStrError;
+pub use unvalidated::UnvalidatedTinyAsciiStr;
+
+/// These are temporary compatability reexports that will be removed
+/// in a future version.
+pub type TinyStr4 = TinyAsciiStr<4>;
+/// These are temporary compatability reexports that will be removed
+/// in a future version.
+pub type TinyStr8 = TinyAsciiStr<8>;
+/// These are temporary compatability reexports that will be removed
+/// in a future version.
+pub type TinyStr16 = TinyAsciiStr<16>;
+
+#[test]
+fn test_size() {
+ assert_eq!(
+ core::mem::size_of::<TinyStr4>(),
+ core::mem::size_of::<Option<TinyStr4>>()
+ );
+ assert_eq!(
+ core::mem::size_of::<TinyStr8>(),
+ core::mem::size_of::<Option<TinyStr8>>()
+ );
+}
+// /// Allows unit tests to use the macro
+// #[cfg(test)]
+// mod tinystr {
+// pub use super::{TinyAsciiStr, TinyStrError};
+// }
diff --git a/third_party/rust/tinystr/src/macros.rs b/third_party/rust/tinystr/src/macros.rs
new file mode 100644
index 0000000000..b00185238e
--- /dev/null
+++ b/third_party/rust/tinystr/src/macros.rs
@@ -0,0 +1,32 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#[macro_export]
+macro_rules! tinystr {
+ ($n:literal, $s:literal) => {{
+ // Force it into a const context; otherwise it may get evaluated at runtime instead.
+ const TINYSTR_MACRO_CONST: $crate::TinyAsciiStr<$n> = {
+ match $crate::TinyAsciiStr::from_bytes($s.as_bytes()) {
+ Ok(s) => s,
+ // We are okay with panicking here because this is in a const context
+ #[allow(clippy::panic)]
+ // Cannot format the error since formatting isn't const yet
+ Err(_) => panic!(concat!("Failed to construct tinystr from ", $s)),
+ }
+ };
+ TINYSTR_MACRO_CONST
+ }};
+}
+
+#[cfg(test)]
+mod tests {
+ #[test]
+ fn test_macro_construction() {
+ let s1 = tinystr!(8, "foobar");
+ assert_eq!(&*s1, "foobar");
+
+ let s1 = tinystr!(12, "foobarbaz");
+ assert_eq!(&*s1, "foobarbaz");
+ }
+}
diff --git a/third_party/rust/tinystr/src/serde.rs b/third_party/rust/tinystr/src/serde.rs
new file mode 100644
index 0000000000..4c3f8be132
--- /dev/null
+++ b/third_party/rust/tinystr/src/serde.rs
@@ -0,0 +1,91 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::TinyAsciiStr;
+use alloc::borrow::Cow;
+use alloc::string::ToString;
+use core::fmt;
+use core::marker::PhantomData;
+use core::ops::Deref;
+use serde::de::{Error, SeqAccess, Visitor};
+use serde::ser::SerializeTuple;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+
+impl<const N: usize> Serialize for TinyAsciiStr<N> {
+ #[inline]
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ self.deref().serialize(serializer)
+ } else {
+ let mut seq = serializer.serialize_tuple(N)?;
+ for byte in self.all_bytes() {
+ seq.serialize_element(byte)?;
+ }
+ seq.end()
+ }
+ }
+}
+
+struct TinyAsciiStrVisitor<const N: usize> {
+ marker: PhantomData<TinyAsciiStr<N>>,
+}
+
+impl<const N: usize> TinyAsciiStrVisitor<N> {
+ fn new() -> Self {
+ TinyAsciiStrVisitor {
+ marker: PhantomData,
+ }
+ }
+}
+
+impl<'de, const N: usize> Visitor<'de> for TinyAsciiStrVisitor<N> {
+ type Value = TinyAsciiStr<N>;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ write!(formatter, "a TinyAsciiStr<{N}>")
+ }
+
+ #[inline]
+ fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
+ where
+ A: SeqAccess<'de>,
+ {
+ let mut bytes = [0u8; N];
+ let mut zeroes = false;
+ for out in &mut bytes.iter_mut().take(N) {
+ let byte = seq
+ .next_element()?
+ .ok_or_else(|| Error::invalid_length(N, &self))?;
+ if byte == 0 {
+ zeroes = true;
+ } else if zeroes {
+ return Err(Error::custom("TinyAsciiStr cannot contain null bytes"));
+ }
+
+ if byte >= 0x80 {
+ return Err(Error::custom("TinyAsciiStr cannot contain non-ascii bytes"));
+ }
+ *out = byte;
+ }
+
+ Ok(unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) })
+ }
+}
+
+impl<'de, const N: usize> Deserialize<'de> for TinyAsciiStr<N> {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let x: Cow<'de, str> = Deserialize::deserialize(deserializer)?;
+ TinyAsciiStr::from_str(&x).map_err(|e| Error::custom(e.to_string()))
+ } else {
+ deserializer.deserialize_tuple(N, TinyAsciiStrVisitor::<N>::new())
+ }
+ }
+}
diff --git a/third_party/rust/tinystr/src/ule.rs b/third_party/rust/tinystr/src/ule.rs
new file mode 100644
index 0000000000..eda43890b5
--- /dev/null
+++ b/third_party/rust/tinystr/src/ule.rs
@@ -0,0 +1,116 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::{TinyAsciiStr, UnvalidatedTinyAsciiStr};
+use zerovec::maps::ZeroMapKV;
+use zerovec::ule::*;
+use zerovec::{ZeroSlice, ZeroVec};
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. TinyAsciiStr does not include any uninitialized or padding bytes.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 2. TinyAsciiStr is aligned to 1 byte.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+// 4. The impl of validate_byte_slice() returns an error if there are extra bytes.
+// 5. The other ULE methods use the default impl.
+// 6. TinyAsciiStr byte equality is semantic equality
+unsafe impl<const N: usize> ULE for TinyAsciiStr<N> {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ if bytes.len() % N != 0 {
+ return Err(ZeroVecError::length::<Self>(bytes.len()));
+ }
+ // Validate the bytes
+ for chunk in bytes.chunks_exact(N) {
+ let _ = TinyAsciiStr::<N>::from_bytes_inner(chunk, 0, N, true)
+ .map_err(|_| ZeroVecError::parse::<Self>())?;
+ }
+ Ok(())
+ }
+}
+
+impl<const N: usize> AsULE for TinyAsciiStr<N> {
+ type ULE = Self;
+
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+impl<'a, const N: usize> ZeroMapKV<'a> for TinyAsciiStr<N> {
+ type Container = ZeroVec<'a, TinyAsciiStr<N>>;
+ type Slice = ZeroSlice<TinyAsciiStr<N>>;
+ type GetType = TinyAsciiStr<N>;
+ type OwnedType = TinyAsciiStr<N>;
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. UnvalidatedTinyAsciiStr does not include any uninitialized or padding bytes.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 2. UnvalidatedTinyAsciiStr is aligned to 1 byte.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+// 4. The impl of validate_byte_slice() returns an error if there are extra bytes.
+// 5. The other ULE methods use the default impl.
+// 6. UnvalidatedTinyAsciiStr byte equality is semantic equality
+unsafe impl<const N: usize> ULE for UnvalidatedTinyAsciiStr<N> {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ if bytes.len() % N != 0 {
+ return Err(ZeroVecError::length::<Self>(bytes.len()));
+ }
+ Ok(())
+ }
+}
+
+impl<const N: usize> AsULE for UnvalidatedTinyAsciiStr<N> {
+ type ULE = Self;
+
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+impl<'a, const N: usize> ZeroMapKV<'a> for UnvalidatedTinyAsciiStr<N> {
+ type Container = ZeroVec<'a, UnvalidatedTinyAsciiStr<N>>;
+ type Slice = ZeroSlice<UnvalidatedTinyAsciiStr<N>>;
+ type GetType = UnvalidatedTinyAsciiStr<N>;
+ type OwnedType = UnvalidatedTinyAsciiStr<N>;
+}
+
+#[cfg(test)]
+mod test {
+ use crate::*;
+ use zerovec::*;
+
+ #[test]
+ fn test_zerovec() {
+ let mut vec = ZeroVec::<TinyAsciiStr<7>>::new();
+
+ vec.with_mut(|v| v.push("foobar".parse().unwrap()));
+ vec.with_mut(|v| v.push("baz".parse().unwrap()));
+ vec.with_mut(|v| v.push("quux".parse().unwrap()));
+
+ let bytes = vec.as_bytes();
+
+ let vec: ZeroVec<TinyAsciiStr<7>> = ZeroVec::parse_byte_slice(bytes).unwrap();
+
+ assert_eq!(&*vec.get(0).unwrap(), "foobar");
+ assert_eq!(&*vec.get(1).unwrap(), "baz");
+ assert_eq!(&*vec.get(2).unwrap(), "quux");
+ }
+}
diff --git a/third_party/rust/tinystr/src/unvalidated.rs b/third_party/rust/tinystr/src/unvalidated.rs
new file mode 100644
index 0000000000..2fffbbc119
--- /dev/null
+++ b/third_party/rust/tinystr/src/unvalidated.rs
@@ -0,0 +1,104 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::TinyAsciiStr;
+use crate::TinyStrError;
+
+/// A fixed-length bytes array that is expected to be an ASCII string but does not enforce that invariant.
+///
+/// Use this type instead of `TinyAsciiStr` if you don't need to enforce ASCII during deserialization. For
+/// example, strings that are keys of a map don't need to ever be reified as `TinyAsciiStr`s.
+///
+/// The main advantage of this type over `[u8; N]` is that it serializes as a string in
+/// human-readable formats like JSON.
+#[derive(Debug, PartialEq, PartialOrd, Eq, Ord, Clone, Copy)]
+pub struct UnvalidatedTinyAsciiStr<const N: usize>(pub(crate) [u8; N]);
+
+impl<const N: usize> UnvalidatedTinyAsciiStr<N> {
+ #[inline]
+ // Converts into a [`TinyAsciiStr`]. Fails if the bytes are not valid ASCII.
+ pub fn try_into_tinystr(&self) -> Result<TinyAsciiStr<N>, TinyStrError> {
+ TinyAsciiStr::try_from_raw(self.0)
+ }
+
+ #[doc(hidden)]
+ pub const fn from_bytes_unchecked(bytes: [u8; N]) -> Self {
+ Self(bytes)
+ }
+}
+
+impl<const N: usize> TinyAsciiStr<N> {
+ #[inline]
+ // Converts into a [`UnvalidatedTinyAsciiStr`]
+ pub const fn to_unvalidated(self) -> UnvalidatedTinyAsciiStr<N> {
+ UnvalidatedTinyAsciiStr(*self.all_bytes())
+ }
+}
+
+#[cfg(feature = "serde")]
+impl<const N: usize> serde::Serialize for UnvalidatedTinyAsciiStr<N> {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::ser::Error;
+ self.try_into_tinystr()
+ .map_err(|_| S::Error::custom("invalid ascii in UnvalidatedTinyAsciiStr"))?
+ .serialize(serializer)
+ }
+}
+
+macro_rules! deserialize {
+ ($size:literal) => {
+ #[cfg(feature = "serde")]
+ impl<'de, 'a> serde::Deserialize<'de> for UnvalidatedTinyAsciiStr<$size>
+ where
+ 'de: 'a,
+ {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ Ok(TinyAsciiStr::deserialize(deserializer)?.to_unvalidated())
+ } else {
+ Ok(Self(<[u8; $size]>::deserialize(deserializer)?))
+ }
+ }
+ }
+ };
+}
+
+deserialize!(1);
+deserialize!(2);
+deserialize!(3);
+deserialize!(4);
+deserialize!(5);
+deserialize!(6);
+deserialize!(7);
+deserialize!(8);
+deserialize!(9);
+deserialize!(10);
+deserialize!(11);
+deserialize!(12);
+deserialize!(13);
+deserialize!(14);
+deserialize!(15);
+deserialize!(16);
+deserialize!(17);
+deserialize!(18);
+deserialize!(19);
+deserialize!(20);
+deserialize!(21);
+deserialize!(22);
+deserialize!(23);
+deserialize!(24);
+deserialize!(25);
+deserialize!(26);
+deserialize!(27);
+deserialize!(28);
+deserialize!(29);
+deserialize!(30);
+deserialize!(31);
+deserialize!(32);
diff --git a/third_party/rust/tinystr/tests/serde.rs b/third_party/rust/tinystr/tests/serde.rs
new file mode 100644
index 0000000000..282914e6fc
--- /dev/null
+++ b/third_party/rust/tinystr/tests/serde.rs
@@ -0,0 +1,39 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use tinystr::*;
+
+// Tests largely adapted from `tinystr` crate
+// https://github.com/zbraniecki/tinystr/blob/4e4eab55dd6bded7f29a18b41452c506c461716c/tests/serde.rs
+
+macro_rules! test_roundtrip {
+ ($f:ident, $n:literal, $val:expr) => {
+ #[test]
+ fn $f() {
+ let tiny: TinyAsciiStr<$n> = $val.parse().unwrap();
+ let json_string = serde_json::to_string(&tiny).unwrap();
+ let expected_json = concat!("\"", $val, "\"");
+ assert_eq!(json_string, expected_json);
+ let recover: TinyAsciiStr<$n> = serde_json::from_str(&json_string).unwrap();
+ assert_eq!(&*tiny, &*recover);
+
+ let bin = bincode::serialize(&tiny).unwrap();
+ assert_eq!(bin, &tiny.all_bytes()[..]);
+ let debin: TinyAsciiStr<$n> = bincode::deserialize(&bin).unwrap();
+ assert_eq!(&*tiny, &*debin);
+
+ let post = postcard::to_stdvec(&tiny).unwrap();
+ assert_eq!(post, &tiny.all_bytes()[..]);
+ let unpost: TinyAsciiStr<$n> = postcard::from_bytes(&post).unwrap();
+ assert_eq!(&*tiny, &*unpost);
+ }
+ };
+}
+
+test_roundtrip!(test_roundtrip4_1, 4, "en");
+test_roundtrip!(test_roundtrip4_2, 4, "Latn");
+test_roundtrip!(test_roundtrip8, 8, "calendar");
+test_roundtrip!(test_roundtrip16, 16, "verylongstring");
+test_roundtrip!(test_roundtrip10, 11, "shortstring");
+test_roundtrip!(test_roundtrip30, 24, "veryveryverylongstring");