summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unicode-normalization/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/unicode-normalization/src/lib.rs')
-rw-r--r--third_party/rust/unicode-normalization/src/lib.rs235
1 files changed, 235 insertions, 0 deletions
diff --git a/third_party/rust/unicode-normalization/src/lib.rs b/third_party/rust/unicode-normalization/src/lib.rs
new file mode 100644
index 0000000000..21c3bf1f95
--- /dev/null
+++ b/third_party/rust/unicode-normalization/src/lib.rs
@@ -0,0 +1,235 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Unicode character composition and decomposition utilities
+//! as described in
+//! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
+//!
+//! ```rust
+//! extern crate unicode_normalization;
+//!
+//! use unicode_normalization::char::compose;
+//! use unicode_normalization::UnicodeNormalization;
+//!
+//! fn main() {
+//! assert_eq!(compose('A','\u{30a}'), Some('Å'));
+//!
+//! let s = "ÅΩ";
+//! let c = s.nfc().collect::<String>();
+//! assert_eq!(c, "ÅΩ");
+//! }
+//! ```
+//!
+//! # crates.io
+//!
+//! You can use this package in your project by adding the following
+//! to your `Cargo.toml`:
+//!
+//! ```toml
+//! [dependencies]
+//! unicode-normalization = "0.1.20"
+//! ```
+
+#![deny(missing_docs, unsafe_code)]
+#![doc(
+ html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
+ html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
+)]
+#![cfg_attr(not(feature = "std"), no_std)]
+
+#[cfg(not(feature = "std"))]
+extern crate alloc;
+
+#[cfg(feature = "std")]
+extern crate core;
+
+extern crate tinyvec;
+
+pub use crate::decompose::Decompositions;
+pub use crate::quick_check::{
+ is_nfc, is_nfc_quick, is_nfc_stream_safe, is_nfc_stream_safe_quick, is_nfd, is_nfd_quick,
+ is_nfd_stream_safe, is_nfd_stream_safe_quick, is_nfkc, is_nfkc_quick, is_nfkd, is_nfkd_quick,
+ IsNormalized,
+};
+pub use crate::recompose::Recompositions;
+pub use crate::replace::Replacements;
+pub use crate::stream_safe::StreamSafe;
+pub use crate::tables::UNICODE_VERSION;
+use core::{
+ str::Chars,
+ option,
+};
+
+mod no_std_prelude;
+
+mod decompose;
+mod lookups;
+mod normalize;
+mod perfect_hash;
+mod quick_check;
+mod recompose;
+mod replace;
+mod stream_safe;
+
+#[rustfmt::skip]
+mod tables;
+
+#[doc(hidden)]
+pub mod __test_api;
+#[cfg(test)]
+mod test;
+
+/// Methods for composing and decomposing characters.
+pub mod char {
+ pub use crate::normalize::{
+ compose, decompose_canonical, decompose_cjk_compat_variants, decompose_compatible,
+ };
+
+ pub use crate::lookups::{canonical_combining_class, is_combining_mark};
+
+ /// Return whether the given character is assigned (`General_Category` != `Unassigned`)
+ /// and not Private-Use (`General_Category` != `Private_Use`), in the supported version
+ /// of Unicode.
+ pub use crate::tables::is_public_assigned;
+}
+
+/// Methods for iterating over strings while applying Unicode normalizations
+/// as described in
+/// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
+pub trait UnicodeNormalization<I: Iterator<Item = char>> {
+ /// Returns an iterator over the string in Unicode Normalization Form D
+ /// (canonical decomposition).
+ fn nfd(self) -> Decompositions<I>;
+
+ /// Returns an iterator over the string in Unicode Normalization Form KD
+ /// (compatibility decomposition).
+ fn nfkd(self) -> Decompositions<I>;
+
+ /// An Iterator over the string in Unicode Normalization Form C
+ /// (canonical decomposition followed by canonical composition).
+ fn nfc(self) -> Recompositions<I>;
+
+ /// An Iterator over the string in Unicode Normalization Form KC
+ /// (compatibility decomposition followed by canonical composition).
+ fn nfkc(self) -> Recompositions<I>;
+
+ /// A transformation which replaces CJK Compatibility Ideograph codepoints
+ /// with normal forms using Standardized Variation Sequences. This is not
+ /// part of the canonical or compatibility decomposition algorithms, but
+ /// performing it before those algorithms produces normalized output which
+ /// better preserves the intent of the original text.
+ ///
+ /// Note that many systems today ignore variation selectors, so these
+ /// may not immediately help text display as intended, but they at
+ /// least preserve the information in a standardized form, giving
+ /// implementations the option to recognize them.
+ fn cjk_compat_variants(self) -> Replacements<I>;
+
+ /// An Iterator over the string with Conjoining Grapheme Joiner characters
+ /// inserted according to the Stream-Safe Text Process (UAX15-D4)
+ fn stream_safe(self) -> StreamSafe<I>;
+}
+
+impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
+ #[inline]
+ fn nfd(self) -> Decompositions<Chars<'a>> {
+ decompose::new_canonical(self.chars())
+ }
+
+ #[inline]
+ fn nfkd(self) -> Decompositions<Chars<'a>> {
+ decompose::new_compatible(self.chars())
+ }
+
+ #[inline]
+ fn nfc(self) -> Recompositions<Chars<'a>> {
+ recompose::new_canonical(self.chars())
+ }
+
+ #[inline]
+ fn nfkc(self) -> Recompositions<Chars<'a>> {
+ recompose::new_compatible(self.chars())
+ }
+
+ #[inline]
+ fn cjk_compat_variants(self) -> Replacements<Chars<'a>> {
+ replace::new_cjk_compat_variants(self.chars())
+ }
+
+ #[inline]
+ fn stream_safe(self) -> StreamSafe<Chars<'a>> {
+ StreamSafe::new(self.chars())
+ }
+}
+
+
+impl UnicodeNormalization<option::IntoIter<char>> for char {
+ #[inline]
+ fn nfd(self) -> Decompositions<option::IntoIter<char>> {
+ decompose::new_canonical(Some(self).into_iter())
+ }
+
+ #[inline]
+ fn nfkd(self) -> Decompositions<option::IntoIter<char>> {
+ decompose::new_compatible(Some(self).into_iter())
+ }
+
+ #[inline]
+ fn nfc(self) -> Recompositions<option::IntoIter<char>> {
+ recompose::new_canonical(Some(self).into_iter())
+ }
+
+ #[inline]
+ fn nfkc(self) -> Recompositions<option::IntoIter<char>> {
+ recompose::new_compatible(Some(self).into_iter())
+ }
+
+ #[inline]
+ fn cjk_compat_variants(self) -> Replacements<option::IntoIter<char>> {
+ replace::new_cjk_compat_variants(Some(self).into_iter())
+ }
+
+ #[inline]
+ fn stream_safe(self) -> StreamSafe<option::IntoIter<char>> {
+ StreamSafe::new(Some(self).into_iter())
+ }
+}
+
+impl<I: Iterator<Item = char>> UnicodeNormalization<I> for I {
+ #[inline]
+ fn nfd(self) -> Decompositions<I> {
+ decompose::new_canonical(self)
+ }
+
+ #[inline]
+ fn nfkd(self) -> Decompositions<I> {
+ decompose::new_compatible(self)
+ }
+
+ #[inline]
+ fn nfc(self) -> Recompositions<I> {
+ recompose::new_canonical(self)
+ }
+
+ #[inline]
+ fn nfkc(self) -> Recompositions<I> {
+ recompose::new_compatible(self)
+ }
+
+ #[inline]
+ fn cjk_compat_variants(self) -> Replacements<I> {
+ replace::new_cjk_compat_variants(self)
+ }
+
+ #[inline]
+ fn stream_safe(self) -> StreamSafe<I> {
+ StreamSafe::new(self)
+ }
+}