diff options
Diffstat (limited to 'vendor/unic-char-property/src')
-rw-r--r-- | vendor/unic-char-property/src/lib.rs | 59 | ||||
-rw-r--r-- | vendor/unic-char-property/src/macros.rs | 293 | ||||
-rw-r--r-- | vendor/unic-char-property/src/pkg_info.rs | 20 | ||||
-rw-r--r-- | vendor/unic-char-property/src/property.rs | 48 | ||||
-rw-r--r-- | vendor/unic-char-property/src/range_types.rs | 104 | ||||
-rw-r--r-- | vendor/unic-char-property/src/tables.rs | 117 |
6 files changed, 641 insertions, 0 deletions
diff --git a/vendor/unic-char-property/src/lib.rs b/vendor/unic-char-property/src/lib.rs new file mode 100644 index 000000000..360a67165 --- /dev/null +++ b/vendor/unic-char-property/src/lib.rs @@ -0,0 +1,59 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![no_std] +#![warn( + bad_style, + missing_debug_implementations, + missing_docs, + unconditional_recursion +)] +#![forbid(unsafe_code)] + +//! # UNIC — Unicode Character Tools — Character Property +//! +//! A component of [`unic`: Unicode and Internationalization Crates for Rust](/unic/). +//! +//! Character Property taxonomy, contracts and build macros. +//! +//! ## References +//! +//! * [Unicode UTR #23: The Unicode Character Property Model](http://unicode.org/reports/tr23/). +//! +//! * [Unicode UAX #44: Unicode Character Database](http://unicode.org/reports/tr44/). +//! +//! * [PropertyAliases.txt](https://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt). + +#[macro_use] +extern crate unic_char_range; + +mod pkg_info; +pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION}; + +mod property; +pub use self::property::{CharProperty, PartialCharProperty, TotalCharProperty}; + +mod range_types; +pub use crate::range_types::{ + BinaryCharProperty, + CustomCharProperty, + EnumeratedCharProperty, + NumericCharProperty, + NumericCharPropertyValue, +}; + +mod macros; + +// pub because is used in macros, called from macro call-site. +pub mod tables; + +// Used in macros +#[doc(hidden)] +pub use core::{fmt as __fmt, str as __str}; diff --git a/vendor/unic-char-property/src/macros.rs b/vendor/unic-char-property/src/macros.rs new file mode 100644 index 000000000..b5f054347 --- /dev/null +++ b/vendor/unic-char-property/src/macros.rs @@ -0,0 +1,293 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// Macro for declaring a character property. +/// +/// # Syntax (Enumerated Property) +/// +/// ``` +/// #[macro_use] +/// extern crate unic_char_property; +/// +/// // First we define the type itself. +/// char_property! { +/// /// This is the enum type created for the character property. +/// pub enum MyProp { +/// abbr => "AbbrPropName"; +/// long => "Long_Property_Name"; +/// human => "Human-Readable Property Name"; +/// +/// /// Zero or more documentation or other attributes. +/// RustName { +/// abbr => AbbrName, +/// long => Long_Name, +/// human => "&'static str that is a nicer presentation of the name", +/// } +/// } +/// +/// /// Module aliasing property value abbreviated names. +/// pub mod abbr_names for abbr; +/// +/// /// Module aliasing property value long names. +/// pub mod long_names for long; +/// } +/// +/// // We also need to impl `PartialCharProperty` or `TotalCharProperty` manually. +/// # impl unic_char_property::PartialCharProperty for MyProp { +/// # fn of(_: char) -> Option<Self> { None } +/// # } +/// # +/// # fn main() {} +/// ``` +/// +/// # Syntax (Binary Property) +/// +/// ``` +/// #[macro_use] extern crate unic_char_property; +/// # #[macro_use] extern crate unic_char_range; +/// +/// char_property! { +/// /// This is the newtype used for the character property. +/// pub struct MyProp(bool) { +/// abbr => "AbbrPropName"; +/// long => "Long_Property_Name"; +/// human => "Human-Readable Property Name"; +/// +/// // Unlike an enumerated property, a binary property will handle the table for you. +/// data_table_path => "../tests/tables/property_table.rsv"; +/// } +/// +/// /// A function that returns whether the given character has the property or not. +/// pub fn is_prop(char) -> bool; +/// } +/// +/// // You may also want to create a trait for easy access to the properties you define. +/// # fn main() {} +/// ``` +/// +/// # Effect +/// +/// - Implements the `CharProperty` trait and appropriate range trait +/// - Implements `FromStr` accepting either the abbr or long name, ascii case insensitive +/// - Implements `Display` using the `human` string +/// - Populates the module `abbr_names` with `pub use` bindings of variants to their abbr names +/// (Enumerated properties only) +/// - Populates the module `long_names` with `pub use` bindings of variants to their long names +/// (Enumerated properties only) +/// - Maintains all documentation comments and other `#[attributes]` as would be expected +/// (with some limitations, listed below) +/// +#[macro_export] +macro_rules! char_property { + + // == Enumerated Property == // + + ( + $(#[$prop_meta:meta])* + pub enum $prop_name:ident { + abbr => $prop_abbr:expr; + long => $prop_long:expr; + human => $prop_human:expr; + + $( + $(#[$variant_meta:meta])* + $variant_name:ident { + abbr => $variant_abbr:ident, + long => $variant_long:ident, + human => $variant_human:expr, + } + )* + } + + $(#[$abbr_mod_meta:meta])* + pub mod $abbr_mod:ident for abbr; + + $(#[$long_mod_meta:meta])* + pub mod $long_mod:ident for long; + + ) => { + $(#[$prop_meta])* + #[allow(bad_style)] + #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] + pub enum $prop_name { + $( $(#[$variant_meta])* $variant_name, )* + } + + $(#[$abbr_mod_meta])* + #[allow(bad_style)] + pub mod $abbr_mod { + $( pub use super::$prop_name::$variant_name as $variant_abbr; )* + } + + $(#[$long_mod_meta])* + #[allow(bad_style)] + pub mod $long_mod { + $( pub use super::$prop_name::$variant_name as $variant_long; )* + } + + char_property! { + __impl FromStr for $prop_name; + $( + stringify!($variant_abbr) => $prop_name::$variant_name; + stringify!($variant_long) => $prop_name::$variant_name; + )* + } + + char_property! { + __impl CharProperty for $prop_name; + $prop_abbr; + $prop_long; + $prop_human; + } + + char_property! { + __impl Display for $prop_name by EnumeratedCharProperty + } + + impl $crate::EnumeratedCharProperty for $prop_name { + fn all_values() -> &'static [$prop_name] { + const VALUES: &[$prop_name] = &[ + $( $prop_name::$variant_name, )* + ]; + VALUES + } + fn abbr_name(&self) -> &'static str { + match *self { + $( $prop_name::$variant_name => stringify!($variant_abbr), )* + } + } + fn long_name(&self) -> &'static str { + match *self { + $( $prop_name::$variant_name => stringify!($variant_long), )* + } + } + fn human_name(&self) -> &'static str { + match *self { + $( $prop_name::$variant_name => $variant_human, )* + } + } + } + }; + + // == Binary Property == // + + ( + $(#[$prop_meta:meta])* + pub struct $prop_name:ident(bool) { + abbr => $prop_abbr:expr; + long => $prop_long:expr; + human => $prop_human:expr; + + data_table_path => $data_path:expr; + } + + $(#[$is_fn_meta:meta])* + pub fn $is_fn:ident(char) -> bool; + + ) => { + $(#[$prop_meta])* + #[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Hash)] + pub struct $prop_name(bool); + + $(#[$is_fn_meta])* + pub fn $is_fn(ch: char) -> bool { + $prop_name::of(ch).as_bool() + } + + impl $prop_name { + /// Get (struct) property value of the character. + pub fn of(ch: char) -> Self { + use $crate::tables::CharDataTable; + const TABLE: CharDataTable<()> = include!($data_path); + $prop_name(TABLE.contains(ch)) + } + + /// Get boolean property value of the character. + pub fn as_bool(&self) -> bool { self.0 } + } + + char_property! { + __impl FromStr for $prop_name; + // Yes + "y" => $prop_name(true); + "yes" => $prop_name(true); + "t" => $prop_name(true); + "true" => $prop_name(true); + // No + "n" => $prop_name(false); + "no" => $prop_name(false); + "f" => $prop_name(false); + "false" => $prop_name(false); + } + + char_property! { + __impl CharProperty for $prop_name; + $prop_abbr; + $prop_long; + $prop_human; + } + + impl $crate::TotalCharProperty for $prop_name { + fn of(ch: char) -> Self { Self::of(ch) } + } + + impl $crate::BinaryCharProperty for $prop_name { + fn as_bool(&self) -> bool { self.as_bool() } + } + + impl From<$prop_name> for bool { + fn from(prop: $prop_name) -> bool { prop.as_bool() } + } + + char_property! { + __impl Display for $prop_name by BinaryCharProperty + } + }; + + // == Shared == // + + ( + __impl CharProperty for $prop_name:ident; + $prop_abbr:expr; + $prop_long:expr; + $prop_human:expr; + ) => { + impl $crate::CharProperty for $prop_name { + fn prop_abbr_name() -> &'static str { $prop_abbr } + fn prop_long_name() -> &'static str { $prop_long } + fn prop_human_name() -> &'static str { $prop_human } + } + }; + + ( + __impl FromStr for $prop_name:ident; + $( $id:expr => $value:expr; )* + ) => { + #[allow(unreachable_patterns)] + impl $crate::__str::FromStr for $prop_name { + type Err = (); + fn from_str(s: &str) -> Result<Self, Self::Err> { + match s { + $( $id => Ok($value), )* + $( s if s.eq_ignore_ascii_case($id) => Ok($value), )* + _ => Err(()), + } + } + } + }; + + ( __impl Display for $prop_name:ident by $trait:ident ) => { + impl $crate::__fmt::Display for $prop_name { + fn fmt(&self, f: &mut $crate::__fmt::Formatter) -> $crate::__fmt::Result { + $crate::$trait::human_name(self).fmt(f) + } + } + }; +} diff --git a/vendor/unic-char-property/src/pkg_info.rs b/vendor/unic-char-property/src/pkg_info.rs new file mode 100644 index 000000000..a1ab2853f --- /dev/null +++ b/vendor/unic-char-property/src/pkg_info.rs @@ -0,0 +1,20 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Package information + +/// UNIC component version. +pub const PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); + +/// UNIC component name. +pub const PKG_NAME: &str = env!("CARGO_PKG_NAME"); + +/// UNIC component description. +pub const PKG_DESCRIPTION: &str = env!("CARGO_PKG_DESCRIPTION"); diff --git a/vendor/unic-char-property/src/property.rs b/vendor/unic-char-property/src/property.rs new file mode 100644 index 000000000..47d367f4d --- /dev/null +++ b/vendor/unic-char-property/src/property.rs @@ -0,0 +1,48 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Taxonomy and contracts for Character Property types. + +use core::fmt::Debug; +use core::hash::Hash; + +/// A Character Property, defined for some or all Unicode characters. +pub trait CharProperty: PartialCharProperty + Debug + Eq + Hash { + /// The *abbreviated name* of the property. + fn prop_abbr_name() -> &'static str; + + /// The *long name* of the property. + fn prop_long_name() -> &'static str; + + /// The *human-readable* name of the property. + fn prop_human_name() -> &'static str; +} + +/// A Character Property defined for some characters. +/// +/// Examples: `Decomposition_Type`, `Numeric_Type` +pub trait PartialCharProperty: Copy { + /// The property value for the character, or None. + fn of(ch: char) -> Option<Self>; +} + +/// A Character Property defined on all characters. +/// +/// Examples: `Age`, `Name`, `General_Category`, `Bidi_Class` +pub trait TotalCharProperty: PartialCharProperty + Default { + /// The property value for the character. + fn of(ch: char) -> Self; +} + +impl<T: TotalCharProperty> PartialCharProperty for T { + fn of(ch: char) -> Option<Self> { + Some(<Self as TotalCharProperty>::of(ch)) + } +} diff --git a/vendor/unic-char-property/src/range_types.rs b/vendor/unic-char-property/src/range_types.rs new file mode 100644 index 000000000..e3cd47915 --- /dev/null +++ b/vendor/unic-char-property/src/range_types.rs @@ -0,0 +1,104 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Character Property Range types. +//! +//! NOTE: At the moment, it is not possible to define a marker for all character property range +//! types and enforce their implementation from `CharProperty`. We need to fix this whenever the +//! compiler becomes able to do to so. + +use super::property::CharProperty; + +// == Enumerated/Catalog Types == + +/// A Character Property with enumerated values. +/// +/// This is similar to types *Enumeration* and *Catalog*, as defined in UAX#44. +/// +/// Usage Note: If the property is of type *Catalog*, it's recommended to (in some way) mark the +/// type as *non-exhaustive*, so that adding new variants to the `enum` type won't result in API +/// breakage. +pub trait EnumeratedCharProperty: Sized + CharProperty { + /// Exhaustive list of all property values. + fn all_values() -> &'static [Self]; + + /// The *abbreviated name* of the property value. + fn abbr_name(&self) -> &'static str; + + /// The *long name* of the property value. + fn long_name(&self) -> &'static str; + + /// The *human-readable name* of the property value. + fn human_name(&self) -> &'static str; +} + +// == Binary Types == + +/// A Character Property with binary values. +/// +/// Examples: `Alphabetic`, `Bidi_Mirrored`, `White_Space` +pub trait BinaryCharProperty: CharProperty { + /// The boolean value of the property value. + fn as_bool(&self) -> bool; + + /// The *abbreviated name* of the property value. + fn abbr_name(&self) -> &'static str { + if self.as_bool() { + "Y" + } else { + "N" + } + } + + /// The *long name* of the property value. + fn long_name(&self) -> &'static str { + if self.as_bool() { + "Yes" + } else { + "No" + } + } + + /// The *human-readable name* of the property value. + fn human_name(&self) -> &'static str { + if self.as_bool() { + "Yes" + } else { + "No" + } + } +} + +// == Numeric Types == + +/// Marker for numeric types accepted by `NumericCharProperty`. +pub trait NumericCharPropertyValue {} + +impl NumericCharPropertyValue for u8 {} + +/// A Character Property with numeric values. +/// +/// Examples: `Numeric_Value`, `Canonical_Combining_Class` +pub trait NumericCharProperty<NumericValue: NumericCharPropertyValue>: CharProperty { + /// The numeric value for the property value. + fn number(&self) -> NumericValue; +} + +// == Custom Types == + +/// A Character Property with custom values. +/// +/// Custom values means any non-enumerated, non-numeric value. +/// +/// Examples: `Age` property that returns a `UnicodeVersion` value. +pub trait CustomCharProperty<Value>: CharProperty { + /// The actual (inner) value for the property value. + fn actual(&self) -> Value; +} diff --git a/vendor/unic-char-property/src/tables.rs b/vendor/unic-char-property/src/tables.rs new file mode 100644 index 000000000..ab419d552 --- /dev/null +++ b/vendor/unic-char-property/src/tables.rs @@ -0,0 +1,117 @@ +// Copyright 2017 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Character data tables used in UNIC. + +use unic_char_range::CharRange; + +/// A mapping from characters to some associated data. +/// +/// For the set case, use `()` as the associated value. +#[derive(Copy, Clone, Debug)] +pub enum CharDataTable<V: 'static> { + #[doc(hidden)] + Direct(&'static [(char, V)]), + #[doc(hidden)] + Range(&'static [(CharRange, V)]), +} + +impl<V> Default for CharDataTable<V> { + fn default() -> Self { + CharDataTable::Direct(&[]) + } +} + +impl<V> CharDataTable<V> { + /// Does this table contain a mapping for a character? + pub fn contains(&self, needle: char) -> bool { + match *self { + CharDataTable::Direct(table) => { + table.binary_search_by_key(&needle, |&(k, _)| k).is_ok() + } + CharDataTable::Range(table) => table + .binary_search_by(|&(range, _)| range.cmp_char(needle)) + .is_ok(), + } + } +} + +impl<V: Copy> CharDataTable<V> { + /// Find the associated data for a character in this table. + pub fn find(&self, needle: char) -> Option<V> { + match *self { + CharDataTable::Direct(table) => table + .binary_search_by_key(&needle, |&(k, _)| k) + .map(|idx| table[idx].1) + .ok(), + CharDataTable::Range(table) => table + .binary_search_by(|&(range, _)| range.cmp_char(needle)) + .map(|idx| table[idx].1) + .ok(), + } + } + + /// Find the range and the associated data for a character in the range table. + pub fn find_with_range(&self, needle: char) -> Option<(CharRange, V)> { + match *self { + CharDataTable::Direct(_) => None, + CharDataTable::Range(table) => table + .binary_search_by(|&(range, _)| range.cmp_char(needle)) + .map(|idx| table[idx]) + .ok(), + } + } +} + +impl<V: Copy + Default> CharDataTable<V> { + /// Find the associated data for a character in this table, or the default value if not entered. + pub fn find_or_default(&self, needle: char) -> V { + self.find(needle).unwrap_or_else(Default::default) + } +} + +/// Iterator for `CharDataTable`. Iterates over pairs `(CharRange, V)`. +#[derive(Debug)] +pub struct CharDataTableIter<'a, V: 'static>(&'a CharDataTable<V>, usize); + +impl<'a, V: Copy> Iterator for CharDataTableIter<'a, V> { + type Item = (CharRange, V); + + fn next(&mut self) -> Option<Self::Item> { + match *self.0 { + CharDataTable::Direct(arr) => { + if self.1 >= arr.len() { + None + } else { + let idx = self.1; + self.1 += 1; + let (ch, v) = arr[idx]; + Some((chars!(ch..=ch), v)) + } + } + CharDataTable::Range(arr) => { + if self.1 >= arr.len() { + None + } else { + let idx = self.1; + self.1 += 1; + Some(arr[idx]) + } + } + } + } +} + +impl<V> CharDataTable<V> { + /// Iterate over the entries in this table. Yields pairs `(CharRange, V)`. + pub fn iter(&self) -> CharDataTableIter<'_, V> { + CharDataTableIter(self, 0) + } +} |