summaryrefslogtreecommitdiffstats
path: root/vendor/unic-char-property/src
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/unic-char-property/src')
-rw-r--r--vendor/unic-char-property/src/lib.rs59
-rw-r--r--vendor/unic-char-property/src/macros.rs293
-rw-r--r--vendor/unic-char-property/src/pkg_info.rs20
-rw-r--r--vendor/unic-char-property/src/property.rs48
-rw-r--r--vendor/unic-char-property/src/range_types.rs104
-rw-r--r--vendor/unic-char-property/src/tables.rs117
6 files changed, 641 insertions, 0 deletions
diff --git a/vendor/unic-char-property/src/lib.rs b/vendor/unic-char-property/src/lib.rs
new file mode 100644
index 000000000..360a67165
--- /dev/null
+++ b/vendor/unic-char-property/src/lib.rs
@@ -0,0 +1,59 @@
+// Copyright 2017 The UNIC Project Developers.
+//
+// See the COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![no_std]
+#![warn(
+ bad_style,
+ missing_debug_implementations,
+ missing_docs,
+ unconditional_recursion
+)]
+#![forbid(unsafe_code)]
+
+//! # UNIC — Unicode Character Tools — Character Property
+//!
+//! A component of [`unic`: Unicode and Internationalization Crates for Rust](/unic/).
+//!
+//! Character Property taxonomy, contracts and build macros.
+//!
+//! ## References
+//!
+//! * [Unicode UTR #23: The Unicode Character Property Model](http://unicode.org/reports/tr23/).
+//!
+//! * [Unicode UAX #44: Unicode Character Database](http://unicode.org/reports/tr44/).
+//!
+//! * [PropertyAliases.txt](https://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt).
+
+#[macro_use]
+extern crate unic_char_range;
+
+mod pkg_info;
+pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION};
+
+mod property;
+pub use self::property::{CharProperty, PartialCharProperty, TotalCharProperty};
+
+mod range_types;
+pub use crate::range_types::{
+ BinaryCharProperty,
+ CustomCharProperty,
+ EnumeratedCharProperty,
+ NumericCharProperty,
+ NumericCharPropertyValue,
+};
+
+mod macros;
+
+// pub because is used in macros, called from macro call-site.
+pub mod tables;
+
+// Used in macros
+#[doc(hidden)]
+pub use core::{fmt as __fmt, str as __str};
diff --git a/vendor/unic-char-property/src/macros.rs b/vendor/unic-char-property/src/macros.rs
new file mode 100644
index 000000000..b5f054347
--- /dev/null
+++ b/vendor/unic-char-property/src/macros.rs
@@ -0,0 +1,293 @@
+// Copyright 2017 The UNIC Project Developers.
+//
+// See the COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/// Macro for declaring a character property.
+///
+/// # Syntax (Enumerated Property)
+///
+/// ```
+/// #[macro_use]
+/// extern crate unic_char_property;
+///
+/// // First we define the type itself.
+/// char_property! {
+/// /// This is the enum type created for the character property.
+/// pub enum MyProp {
+/// abbr => "AbbrPropName";
+/// long => "Long_Property_Name";
+/// human => "Human-Readable Property Name";
+///
+/// /// Zero or more documentation or other attributes.
+/// RustName {
+/// abbr => AbbrName,
+/// long => Long_Name,
+/// human => "&'static str that is a nicer presentation of the name",
+/// }
+/// }
+///
+/// /// Module aliasing property value abbreviated names.
+/// pub mod abbr_names for abbr;
+///
+/// /// Module aliasing property value long names.
+/// pub mod long_names for long;
+/// }
+///
+/// // We also need to impl `PartialCharProperty` or `TotalCharProperty` manually.
+/// # impl unic_char_property::PartialCharProperty for MyProp {
+/// # fn of(_: char) -> Option<Self> { None }
+/// # }
+/// #
+/// # fn main() {}
+/// ```
+///
+/// # Syntax (Binary Property)
+///
+/// ```
+/// #[macro_use] extern crate unic_char_property;
+/// # #[macro_use] extern crate unic_char_range;
+///
+/// char_property! {
+/// /// This is the newtype used for the character property.
+/// pub struct MyProp(bool) {
+/// abbr => "AbbrPropName";
+/// long => "Long_Property_Name";
+/// human => "Human-Readable Property Name";
+///
+/// // Unlike an enumerated property, a binary property will handle the table for you.
+/// data_table_path => "../tests/tables/property_table.rsv";
+/// }
+///
+/// /// A function that returns whether the given character has the property or not.
+/// pub fn is_prop(char) -> bool;
+/// }
+///
+/// // You may also want to create a trait for easy access to the properties you define.
+/// # fn main() {}
+/// ```
+///
+/// # Effect
+///
+/// - Implements the `CharProperty` trait and appropriate range trait
+/// - Implements `FromStr` accepting either the abbr or long name, ascii case insensitive
+/// - Implements `Display` using the `human` string
+/// - Populates the module `abbr_names` with `pub use` bindings of variants to their abbr names
+/// (Enumerated properties only)
+/// - Populates the module `long_names` with `pub use` bindings of variants to their long names
+/// (Enumerated properties only)
+/// - Maintains all documentation comments and other `#[attributes]` as would be expected
+/// (with some limitations, listed below)
+///
+#[macro_export]
+macro_rules! char_property {
+
+ // == Enumerated Property == //
+
+ (
+ $(#[$prop_meta:meta])*
+ pub enum $prop_name:ident {
+ abbr => $prop_abbr:expr;
+ long => $prop_long:expr;
+ human => $prop_human:expr;
+
+ $(
+ $(#[$variant_meta:meta])*
+ $variant_name:ident {
+ abbr => $variant_abbr:ident,
+ long => $variant_long:ident,
+ human => $variant_human:expr,
+ }
+ )*
+ }
+
+ $(#[$abbr_mod_meta:meta])*
+ pub mod $abbr_mod:ident for abbr;
+
+ $(#[$long_mod_meta:meta])*
+ pub mod $long_mod:ident for long;
+
+ ) => {
+ $(#[$prop_meta])*
+ #[allow(bad_style)]
+ #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+ pub enum $prop_name {
+ $( $(#[$variant_meta])* $variant_name, )*
+ }
+
+ $(#[$abbr_mod_meta])*
+ #[allow(bad_style)]
+ pub mod $abbr_mod {
+ $( pub use super::$prop_name::$variant_name as $variant_abbr; )*
+ }
+
+ $(#[$long_mod_meta])*
+ #[allow(bad_style)]
+ pub mod $long_mod {
+ $( pub use super::$prop_name::$variant_name as $variant_long; )*
+ }
+
+ char_property! {
+ __impl FromStr for $prop_name;
+ $(
+ stringify!($variant_abbr) => $prop_name::$variant_name;
+ stringify!($variant_long) => $prop_name::$variant_name;
+ )*
+ }
+
+ char_property! {
+ __impl CharProperty for $prop_name;
+ $prop_abbr;
+ $prop_long;
+ $prop_human;
+ }
+
+ char_property! {
+ __impl Display for $prop_name by EnumeratedCharProperty
+ }
+
+ impl $crate::EnumeratedCharProperty for $prop_name {
+ fn all_values() -> &'static [$prop_name] {
+ const VALUES: &[$prop_name] = &[
+ $( $prop_name::$variant_name, )*
+ ];
+ VALUES
+ }
+ fn abbr_name(&self) -> &'static str {
+ match *self {
+ $( $prop_name::$variant_name => stringify!($variant_abbr), )*
+ }
+ }
+ fn long_name(&self) -> &'static str {
+ match *self {
+ $( $prop_name::$variant_name => stringify!($variant_long), )*
+ }
+ }
+ fn human_name(&self) -> &'static str {
+ match *self {
+ $( $prop_name::$variant_name => $variant_human, )*
+ }
+ }
+ }
+ };
+
+ // == Binary Property == //
+
+ (
+ $(#[$prop_meta:meta])*
+ pub struct $prop_name:ident(bool) {
+ abbr => $prop_abbr:expr;
+ long => $prop_long:expr;
+ human => $prop_human:expr;
+
+ data_table_path => $data_path:expr;
+ }
+
+ $(#[$is_fn_meta:meta])*
+ pub fn $is_fn:ident(char) -> bool;
+
+ ) => {
+ $(#[$prop_meta])*
+ #[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Hash)]
+ pub struct $prop_name(bool);
+
+ $(#[$is_fn_meta])*
+ pub fn $is_fn(ch: char) -> bool {
+ $prop_name::of(ch).as_bool()
+ }
+
+ impl $prop_name {
+ /// Get (struct) property value of the character.
+ pub fn of(ch: char) -> Self {
+ use $crate::tables::CharDataTable;
+ const TABLE: CharDataTable<()> = include!($data_path);
+ $prop_name(TABLE.contains(ch))
+ }
+
+ /// Get boolean property value of the character.
+ pub fn as_bool(&self) -> bool { self.0 }
+ }
+
+ char_property! {
+ __impl FromStr for $prop_name;
+ // Yes
+ "y" => $prop_name(true);
+ "yes" => $prop_name(true);
+ "t" => $prop_name(true);
+ "true" => $prop_name(true);
+ // No
+ "n" => $prop_name(false);
+ "no" => $prop_name(false);
+ "f" => $prop_name(false);
+ "false" => $prop_name(false);
+ }
+
+ char_property! {
+ __impl CharProperty for $prop_name;
+ $prop_abbr;
+ $prop_long;
+ $prop_human;
+ }
+
+ impl $crate::TotalCharProperty for $prop_name {
+ fn of(ch: char) -> Self { Self::of(ch) }
+ }
+
+ impl $crate::BinaryCharProperty for $prop_name {
+ fn as_bool(&self) -> bool { self.as_bool() }
+ }
+
+ impl From<$prop_name> for bool {
+ fn from(prop: $prop_name) -> bool { prop.as_bool() }
+ }
+
+ char_property! {
+ __impl Display for $prop_name by BinaryCharProperty
+ }
+ };
+
+ // == Shared == //
+
+ (
+ __impl CharProperty for $prop_name:ident;
+ $prop_abbr:expr;
+ $prop_long:expr;
+ $prop_human:expr;
+ ) => {
+ impl $crate::CharProperty for $prop_name {
+ fn prop_abbr_name() -> &'static str { $prop_abbr }
+ fn prop_long_name() -> &'static str { $prop_long }
+ fn prop_human_name() -> &'static str { $prop_human }
+ }
+ };
+
+ (
+ __impl FromStr for $prop_name:ident;
+ $( $id:expr => $value:expr; )*
+ ) => {
+ #[allow(unreachable_patterns)]
+ impl $crate::__str::FromStr for $prop_name {
+ type Err = ();
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ match s {
+ $( $id => Ok($value), )*
+ $( s if s.eq_ignore_ascii_case($id) => Ok($value), )*
+ _ => Err(()),
+ }
+ }
+ }
+ };
+
+ ( __impl Display for $prop_name:ident by $trait:ident ) => {
+ impl $crate::__fmt::Display for $prop_name {
+ fn fmt(&self, f: &mut $crate::__fmt::Formatter) -> $crate::__fmt::Result {
+ $crate::$trait::human_name(self).fmt(f)
+ }
+ }
+ };
+}
diff --git a/vendor/unic-char-property/src/pkg_info.rs b/vendor/unic-char-property/src/pkg_info.rs
new file mode 100644
index 000000000..a1ab2853f
--- /dev/null
+++ b/vendor/unic-char-property/src/pkg_info.rs
@@ -0,0 +1,20 @@
+// Copyright 2017 The UNIC Project Developers.
+//
+// See the COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Package information
+
+/// UNIC component version.
+pub const PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
+
+/// UNIC component name.
+pub const PKG_NAME: &str = env!("CARGO_PKG_NAME");
+
+/// UNIC component description.
+pub const PKG_DESCRIPTION: &str = env!("CARGO_PKG_DESCRIPTION");
diff --git a/vendor/unic-char-property/src/property.rs b/vendor/unic-char-property/src/property.rs
new file mode 100644
index 000000000..47d367f4d
--- /dev/null
+++ b/vendor/unic-char-property/src/property.rs
@@ -0,0 +1,48 @@
+// Copyright 2017 The UNIC Project Developers.
+//
+// See the COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Taxonomy and contracts for Character Property types.
+
+use core::fmt::Debug;
+use core::hash::Hash;
+
+/// A Character Property, defined for some or all Unicode characters.
+pub trait CharProperty: PartialCharProperty + Debug + Eq + Hash {
+ /// The *abbreviated name* of the property.
+ fn prop_abbr_name() -> &'static str;
+
+ /// The *long name* of the property.
+ fn prop_long_name() -> &'static str;
+
+ /// The *human-readable* name of the property.
+ fn prop_human_name() -> &'static str;
+}
+
+/// A Character Property defined for some characters.
+///
+/// Examples: `Decomposition_Type`, `Numeric_Type`
+pub trait PartialCharProperty: Copy {
+ /// The property value for the character, or None.
+ fn of(ch: char) -> Option<Self>;
+}
+
+/// A Character Property defined on all characters.
+///
+/// Examples: `Age`, `Name`, `General_Category`, `Bidi_Class`
+pub trait TotalCharProperty: PartialCharProperty + Default {
+ /// The property value for the character.
+ fn of(ch: char) -> Self;
+}
+
+impl<T: TotalCharProperty> PartialCharProperty for T {
+ fn of(ch: char) -> Option<Self> {
+ Some(<Self as TotalCharProperty>::of(ch))
+ }
+}
diff --git a/vendor/unic-char-property/src/range_types.rs b/vendor/unic-char-property/src/range_types.rs
new file mode 100644
index 000000000..e3cd47915
--- /dev/null
+++ b/vendor/unic-char-property/src/range_types.rs
@@ -0,0 +1,104 @@
+// Copyright 2017 The UNIC Project Developers.
+//
+// See the COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Character Property Range types.
+//!
+//! NOTE: At the moment, it is not possible to define a marker for all character property range
+//! types and enforce their implementation from `CharProperty`. We need to fix this whenever the
+//! compiler becomes able to do to so.
+
+use super::property::CharProperty;
+
+// == Enumerated/Catalog Types ==
+
+/// A Character Property with enumerated values.
+///
+/// This is similar to types *Enumeration* and *Catalog*, as defined in UAX#44.
+///
+/// Usage Note: If the property is of type *Catalog*, it's recommended to (in some way) mark the
+/// type as *non-exhaustive*, so that adding new variants to the `enum` type won't result in API
+/// breakage.
+pub trait EnumeratedCharProperty: Sized + CharProperty {
+ /// Exhaustive list of all property values.
+ fn all_values() -> &'static [Self];
+
+ /// The *abbreviated name* of the property value.
+ fn abbr_name(&self) -> &'static str;
+
+ /// The *long name* of the property value.
+ fn long_name(&self) -> &'static str;
+
+ /// The *human-readable name* of the property value.
+ fn human_name(&self) -> &'static str;
+}
+
+// == Binary Types ==
+
+/// A Character Property with binary values.
+///
+/// Examples: `Alphabetic`, `Bidi_Mirrored`, `White_Space`
+pub trait BinaryCharProperty: CharProperty {
+ /// The boolean value of the property value.
+ fn as_bool(&self) -> bool;
+
+ /// The *abbreviated name* of the property value.
+ fn abbr_name(&self) -> &'static str {
+ if self.as_bool() {
+ "Y"
+ } else {
+ "N"
+ }
+ }
+
+ /// The *long name* of the property value.
+ fn long_name(&self) -> &'static str {
+ if self.as_bool() {
+ "Yes"
+ } else {
+ "No"
+ }
+ }
+
+ /// The *human-readable name* of the property value.
+ fn human_name(&self) -> &'static str {
+ if self.as_bool() {
+ "Yes"
+ } else {
+ "No"
+ }
+ }
+}
+
+// == Numeric Types ==
+
+/// Marker for numeric types accepted by `NumericCharProperty`.
+pub trait NumericCharPropertyValue {}
+
+impl NumericCharPropertyValue for u8 {}
+
+/// A Character Property with numeric values.
+///
+/// Examples: `Numeric_Value`, `Canonical_Combining_Class`
+pub trait NumericCharProperty<NumericValue: NumericCharPropertyValue>: CharProperty {
+ /// The numeric value for the property value.
+ fn number(&self) -> NumericValue;
+}
+
+// == Custom Types ==
+
+/// A Character Property with custom values.
+///
+/// Custom values means any non-enumerated, non-numeric value.
+///
+/// Examples: `Age` property that returns a `UnicodeVersion` value.
+pub trait CustomCharProperty<Value>: CharProperty {
+ /// The actual (inner) value for the property value.
+ fn actual(&self) -> Value;
+}
diff --git a/vendor/unic-char-property/src/tables.rs b/vendor/unic-char-property/src/tables.rs
new file mode 100644
index 000000000..ab419d552
--- /dev/null
+++ b/vendor/unic-char-property/src/tables.rs
@@ -0,0 +1,117 @@
+// Copyright 2017 The UNIC Project Developers.
+//
+// See the COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Character data tables used in UNIC.
+
+use unic_char_range::CharRange;
+
+/// A mapping from characters to some associated data.
+///
+/// For the set case, use `()` as the associated value.
+#[derive(Copy, Clone, Debug)]
+pub enum CharDataTable<V: 'static> {
+ #[doc(hidden)]
+ Direct(&'static [(char, V)]),
+ #[doc(hidden)]
+ Range(&'static [(CharRange, V)]),
+}
+
+impl<V> Default for CharDataTable<V> {
+ fn default() -> Self {
+ CharDataTable::Direct(&[])
+ }
+}
+
+impl<V> CharDataTable<V> {
+ /// Does this table contain a mapping for a character?
+ pub fn contains(&self, needle: char) -> bool {
+ match *self {
+ CharDataTable::Direct(table) => {
+ table.binary_search_by_key(&needle, |&(k, _)| k).is_ok()
+ }
+ CharDataTable::Range(table) => table
+ .binary_search_by(|&(range, _)| range.cmp_char(needle))
+ .is_ok(),
+ }
+ }
+}
+
+impl<V: Copy> CharDataTable<V> {
+ /// Find the associated data for a character in this table.
+ pub fn find(&self, needle: char) -> Option<V> {
+ match *self {
+ CharDataTable::Direct(table) => table
+ .binary_search_by_key(&needle, |&(k, _)| k)
+ .map(|idx| table[idx].1)
+ .ok(),
+ CharDataTable::Range(table) => table
+ .binary_search_by(|&(range, _)| range.cmp_char(needle))
+ .map(|idx| table[idx].1)
+ .ok(),
+ }
+ }
+
+ /// Find the range and the associated data for a character in the range table.
+ pub fn find_with_range(&self, needle: char) -> Option<(CharRange, V)> {
+ match *self {
+ CharDataTable::Direct(_) => None,
+ CharDataTable::Range(table) => table
+ .binary_search_by(|&(range, _)| range.cmp_char(needle))
+ .map(|idx| table[idx])
+ .ok(),
+ }
+ }
+}
+
+impl<V: Copy + Default> CharDataTable<V> {
+ /// Find the associated data for a character in this table, or the default value if not entered.
+ pub fn find_or_default(&self, needle: char) -> V {
+ self.find(needle).unwrap_or_else(Default::default)
+ }
+}
+
+/// Iterator for `CharDataTable`. Iterates over pairs `(CharRange, V)`.
+#[derive(Debug)]
+pub struct CharDataTableIter<'a, V: 'static>(&'a CharDataTable<V>, usize);
+
+impl<'a, V: Copy> Iterator for CharDataTableIter<'a, V> {
+ type Item = (CharRange, V);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ match *self.0 {
+ CharDataTable::Direct(arr) => {
+ if self.1 >= arr.len() {
+ None
+ } else {
+ let idx = self.1;
+ self.1 += 1;
+ let (ch, v) = arr[idx];
+ Some((chars!(ch..=ch), v))
+ }
+ }
+ CharDataTable::Range(arr) => {
+ if self.1 >= arr.len() {
+ None
+ } else {
+ let idx = self.1;
+ self.1 += 1;
+ Some(arr[idx])
+ }
+ }
+ }
+ }
+}
+
+impl<V> CharDataTable<V> {
+ /// Iterate over the entries in this table. Yields pairs `(CharRange, V)`.
+ pub fn iter(&self) -> CharDataTableIter<'_, V> {
+ CharDataTableIter(self, 0)
+ }
+}