summaryrefslogtreecommitdiffstats
path: root/vendor/regex-syntax/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/regex-syntax/src
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex-syntax/src')
-rw-r--r--vendor/regex-syntax/src/ast/mod.rs1502
-rw-r--r--vendor/regex-syntax/src/ast/parse.rs5944
-rw-r--r--vendor/regex-syntax/src/ast/print.rs568
-rw-r--r--vendor/regex-syntax/src/ast/visitor.rs519
-rw-r--r--vendor/regex-syntax/src/either.rs8
-rw-r--r--vendor/regex-syntax/src/error.rs324
-rw-r--r--vendor/regex-syntax/src/hir/interval.rs520
-rw-r--r--vendor/regex-syntax/src/hir/literal/mod.rs1690
-rw-r--r--vendor/regex-syntax/src/hir/mod.rs2296
-rw-r--r--vendor/regex-syntax/src/hir/print.rs367
-rw-r--r--vendor/regex-syntax/src/hir/translate.rs3211
-rw-r--r--vendor/regex-syntax/src/hir/visitor.rs203
-rw-r--r--vendor/regex-syntax/src/lib.rs312
-rw-r--r--vendor/regex-syntax/src/parser.rs200
-rw-r--r--vendor/regex-syntax/src/unicode.rs999
-rw-r--r--vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE57
-rw-r--r--vendor/regex-syntax/src/unicode_tables/age.rs1673
-rw-r--r--vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs2808
-rw-r--r--vendor/regex-syntax/src/unicode_tables/general_category.rs6307
-rw-r--r--vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs1389
-rw-r--r--vendor/regex-syntax/src/unicode_tables/mod.rs57
-rw-r--r--vendor/regex-syntax/src/unicode_tables/perl_decimal.rs74
-rw-r--r--vendor/regex-syntax/src/unicode_tables/perl_space.rs23
-rw-r--r--vendor/regex-syntax/src/unicode_tables/perl_word.rs743
-rw-r--r--vendor/regex-syntax/src/unicode_tables/property_bool.rs10953
-rw-r--r--vendor/regex-syntax/src/unicode_tables/property_names.rs264
-rw-r--r--vendor/regex-syntax/src/unicode_tables/property_values.rs896
-rw-r--r--vendor/regex-syntax/src/unicode_tables/script.rs1218
-rw-r--r--vendor/regex-syntax/src/unicode_tables/script_extension.rs1396
-rw-r--r--vendor/regex-syntax/src/unicode_tables/sentence_break.rs2396
-rw-r--r--vendor/regex-syntax/src/unicode_tables/word_break.rs1060
-rw-r--r--vendor/regex-syntax/src/utf8.rs587
32 files changed, 50564 insertions, 0 deletions
diff --git a/vendor/regex-syntax/src/ast/mod.rs b/vendor/regex-syntax/src/ast/mod.rs
new file mode 100644
index 000000000..9b9127b1f
--- /dev/null
+++ b/vendor/regex-syntax/src/ast/mod.rs
@@ -0,0 +1,1502 @@
+/*!
+Defines an abstract syntax for regular expressions.
+*/
+
+use std::cmp::Ordering;
+use std::error;
+use std::fmt;
+
+pub use crate::ast::visitor::{visit, Visitor};
+
+pub mod parse;
+pub mod print;
+mod visitor;
+
+/// An error that occurred while parsing a regular expression into an abstract
+/// syntax tree.
+///
+/// Note that note all ASTs represents a valid regular expression. For example,
+/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
+/// valid Unicode property name. That particular error is reported when
+/// translating an AST to the high-level intermediate representation (`HIR`).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+ /// The kind of error.
+ kind: ErrorKind,
+ /// The original pattern that the parser generated the error from. Every
+ /// span in an error is a valid range into this string.
+ pattern: String,
+ /// The span of this error.
+ span: Span,
+}
+
+impl Error {
+ /// Return the type of this error.
+ pub fn kind(&self) -> &ErrorKind {
+ &self.kind
+ }
+
+ /// The original pattern string in which this error occurred.
+ ///
+ /// Every span reported by this error is reported in terms of this string.
+ pub fn pattern(&self) -> &str {
+ &self.pattern
+ }
+
+ /// Return the span at which this error occurred.
+ pub fn span(&self) -> &Span {
+ &self.span
+ }
+
+ /// Return an auxiliary span. This span exists only for some errors that
+ /// benefit from being able to point to two locations in the original
+ /// regular expression. For example, "duplicate" errors will have the
+ /// main error position set to the duplicate occurrence while its
+ /// auxiliary span will be set to the initial occurrence.
+ pub fn auxiliary_span(&self) -> Option<&Span> {
+ use self::ErrorKind::*;
+ match self.kind {
+ FlagDuplicate { ref original } => Some(original),
+ FlagRepeatedNegation { ref original, .. } => Some(original),
+ GroupNameDuplicate { ref original, .. } => Some(original),
+ _ => None,
+ }
+ }
+}
+
+/// The type of an error that occurred while building an AST.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+ /// The capturing group limit was exceeded.
+ ///
+ /// Note that this represents a limit on the total number of capturing
+ /// groups in a regex and not necessarily the number of nested capturing
+ /// groups. That is, the nest limit can be low and it is still possible for
+ /// this error to occur.
+ CaptureLimitExceeded,
+ /// An invalid escape sequence was found in a character class set.
+ ClassEscapeInvalid,
+ /// An invalid character class range was found. An invalid range is any
+ /// range where the start is greater than the end.
+ ClassRangeInvalid,
+ /// An invalid range boundary was found in a character class. Range
+ /// boundaries must be a single literal codepoint, but this error indicates
+ /// that something else was found, such as a nested class.
+ ClassRangeLiteral,
+ /// An opening `[` was found with no corresponding closing `]`.
+ ClassUnclosed,
+ /// Note that this error variant is no longer used. Namely, a decimal
+ /// number can only appear as a repetition quantifier. When the number
+ /// in a repetition quantifier is empty, then it gets its own specialized
+ /// error, `RepetitionCountDecimalEmpty`.
+ DecimalEmpty,
+ /// An invalid decimal number was given where one was expected.
+ DecimalInvalid,
+ /// A bracketed hex literal was empty.
+ EscapeHexEmpty,
+ /// A bracketed hex literal did not correspond to a Unicode scalar value.
+ EscapeHexInvalid,
+ /// An invalid hexadecimal digit was found.
+ EscapeHexInvalidDigit,
+ /// EOF was found before an escape sequence was completed.
+ EscapeUnexpectedEof,
+ /// An unrecognized escape sequence.
+ EscapeUnrecognized,
+ /// A dangling negation was used when setting flags, e.g., `i-`.
+ FlagDanglingNegation,
+ /// A flag was used twice, e.g., `i-i`.
+ FlagDuplicate {
+ /// The position of the original flag. The error position
+ /// points to the duplicate flag.
+ original: Span,
+ },
+ /// The negation operator was used twice, e.g., `-i-s`.
+ FlagRepeatedNegation {
+ /// The position of the original negation operator. The error position
+ /// points to the duplicate negation operator.
+ original: Span,
+ },
+ /// Expected a flag but got EOF, e.g., `(?`.
+ FlagUnexpectedEof,
+ /// Unrecognized flag, e.g., `a`.
+ FlagUnrecognized,
+ /// A duplicate capture name was found.
+ GroupNameDuplicate {
+ /// The position of the initial occurrence of the capture name. The
+ /// error position itself points to the duplicate occurrence.
+ original: Span,
+ },
+ /// A capture group name is empty, e.g., `(?P<>abc)`.
+ GroupNameEmpty,
+ /// An invalid character was seen for a capture group name. This includes
+ /// errors where the first character is a digit (even though subsequent
+ /// characters are allowed to be digits).
+ GroupNameInvalid,
+ /// A closing `>` could not be found for a capture group name.
+ GroupNameUnexpectedEof,
+ /// An unclosed group, e.g., `(ab`.
+ ///
+ /// The span of this error corresponds to the unclosed parenthesis.
+ GroupUnclosed,
+ /// An unopened group, e.g., `ab)`.
+ GroupUnopened,
+ /// The nest limit was exceeded. The limit stored here is the limit
+ /// configured in the parser.
+ NestLimitExceeded(u32),
+ /// The range provided in a counted repetition operator is invalid. The
+ /// range is invalid if the start is greater than the end.
+ RepetitionCountInvalid,
+ /// An opening `{` was not followed by a valid decimal value.
+ /// For example, `x{}` or `x{]}` would fail.
+ RepetitionCountDecimalEmpty,
+ /// An opening `{` was found with no corresponding closing `}`.
+ RepetitionCountUnclosed,
+ /// A repetition operator was applied to a missing sub-expression. This
+ /// occurs, for example, in the regex consisting of just a `*` or even
+ /// `(?i)*`. It is, however, possible to create a repetition operating on
+ /// an empty sub-expression. For example, `()*` is still considered valid.
+ RepetitionMissing,
+ /// The Unicode class is not valid. This typically occurs when a `\p` is
+ /// followed by something other than a `{`.
+ UnicodeClassInvalid,
+ /// When octal support is disabled, this error is produced when an octal
+ /// escape is used. The octal escape is assumed to be an invocation of
+ /// a backreference, which is the common case.
+ UnsupportedBackreference,
+ /// When syntax similar to PCRE's look-around is used, this error is
+ /// returned. Some example syntaxes that are rejected include, but are
+ /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
+ /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
+ /// error is used to improve the user experience.
+ UnsupportedLookAround,
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl error::Error for Error {
+ // TODO: Remove this method entirely on the next breaking semver release.
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ use self::ErrorKind::*;
+ match self.kind {
+ CaptureLimitExceeded => "capture group limit exceeded",
+ ClassEscapeInvalid => "invalid escape sequence in character class",
+ ClassRangeInvalid => "invalid character class range",
+ ClassRangeLiteral => "invalid range boundary, must be a literal",
+ ClassUnclosed => "unclosed character class",
+ DecimalEmpty => "empty decimal literal",
+ DecimalInvalid => "invalid decimal literal",
+ EscapeHexEmpty => "empty hexadecimal literal",
+ EscapeHexInvalid => "invalid hexadecimal literal",
+ EscapeHexInvalidDigit => "invalid hexadecimal digit",
+ EscapeUnexpectedEof => "unexpected eof (escape sequence)",
+ EscapeUnrecognized => "unrecognized escape sequence",
+ FlagDanglingNegation => "dangling flag negation operator",
+ FlagDuplicate { .. } => "duplicate flag",
+ FlagRepeatedNegation { .. } => "repeated negation",
+ FlagUnexpectedEof => "unexpected eof (flag)",
+ FlagUnrecognized => "unrecognized flag",
+ GroupNameDuplicate { .. } => "duplicate capture group name",
+ GroupNameEmpty => "empty capture group name",
+ GroupNameInvalid => "invalid capture group name",
+ GroupNameUnexpectedEof => "unclosed capture group name",
+ GroupUnclosed => "unclosed group",
+ GroupUnopened => "unopened group",
+ NestLimitExceeded(_) => "nest limit exceeded",
+ RepetitionCountInvalid => "invalid repetition count range",
+ RepetitionCountUnclosed => "unclosed counted repetition",
+ RepetitionMissing => "repetition operator missing expression",
+ UnicodeClassInvalid => "invalid Unicode character class",
+ UnsupportedBackreference => "backreferences are not supported",
+ UnsupportedLookAround => "look-around is not supported",
+ _ => unreachable!(),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ crate::error::Formatter::from(self).fmt(f)
+ }
+}
+
+impl fmt::Display for ErrorKind {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ use self::ErrorKind::*;
+ match *self {
+ CaptureLimitExceeded => write!(
+ f,
+ "exceeded the maximum number of \
+ capturing groups ({})",
+ ::std::u32::MAX
+ ),
+ ClassEscapeInvalid => {
+ write!(f, "invalid escape sequence found in character class")
+ }
+ ClassRangeInvalid => write!(
+ f,
+ "invalid character class range, \
+ the start must be <= the end"
+ ),
+ ClassRangeLiteral => {
+ write!(f, "invalid range boundary, must be a literal")
+ }
+ ClassUnclosed => write!(f, "unclosed character class"),
+ DecimalEmpty => write!(f, "decimal literal empty"),
+ DecimalInvalid => write!(f, "decimal literal invalid"),
+ EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
+ EscapeHexInvalid => {
+ write!(f, "hexadecimal literal is not a Unicode scalar value")
+ }
+ EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
+ EscapeUnexpectedEof => write!(
+ f,
+ "incomplete escape sequence, \
+ reached end of pattern prematurely"
+ ),
+ EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
+ FlagDanglingNegation => {
+ write!(f, "dangling flag negation operator")
+ }
+ FlagDuplicate { .. } => write!(f, "duplicate flag"),
+ FlagRepeatedNegation { .. } => {
+ write!(f, "flag negation operator repeated")
+ }
+ FlagUnexpectedEof => {
+ write!(f, "expected flag but got end of regex")
+ }
+ FlagUnrecognized => write!(f, "unrecognized flag"),
+ GroupNameDuplicate { .. } => {
+ write!(f, "duplicate capture group name")
+ }
+ GroupNameEmpty => write!(f, "empty capture group name"),
+ GroupNameInvalid => write!(f, "invalid capture group character"),
+ GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
+ GroupUnclosed => write!(f, "unclosed group"),
+ GroupUnopened => write!(f, "unopened group"),
+ NestLimitExceeded(limit) => write!(
+ f,
+ "exceed the maximum number of \
+ nested parentheses/brackets ({})",
+ limit
+ ),
+ RepetitionCountInvalid => write!(
+ f,
+ "invalid repetition count range, \
+ the start must be <= the end"
+ ),
+ RepetitionCountDecimalEmpty => {
+ write!(f, "repetition quantifier expects a valid decimal")
+ }
+ RepetitionCountUnclosed => {
+ write!(f, "unclosed counted repetition")
+ }
+ RepetitionMissing => {
+ write!(f, "repetition operator missing expression")
+ }
+ UnicodeClassInvalid => {
+ write!(f, "invalid Unicode character class")
+ }
+ UnsupportedBackreference => {
+ write!(f, "backreferences are not supported")
+ }
+ UnsupportedLookAround => write!(
+ f,
+ "look-around, including look-ahead and look-behind, \
+ is not supported"
+ ),
+ _ => unreachable!(),
+ }
+ }
+}
+
+/// Span represents the position information of a single AST item.
+///
+/// All span positions are absolute byte offsets that can be used on the
+/// original regular expression that was parsed.
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub struct Span {
+ /// The start byte offset.
+ pub start: Position,
+ /// The end byte offset.
+ pub end: Position,
+}
+
+impl fmt::Debug for Span {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "Span({:?}, {:?})", self.start, self.end)
+ }
+}
+
+impl Ord for Span {
+ fn cmp(&self, other: &Span) -> Ordering {
+ (&self.start, &self.end).cmp(&(&other.start, &other.end))
+ }
+}
+
+impl PartialOrd for Span {
+ fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+/// A single position in a regular expression.
+///
+/// A position encodes one half of a span, and include the byte offset, line
+/// number and column number.
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub struct Position {
+ /// The absolute offset of this position, starting at `0` from the
+ /// beginning of the regular expression pattern string.
+ pub offset: usize,
+ /// The line number, starting at `1`.
+ pub line: usize,
+ /// The approximate column number, starting at `1`.
+ pub column: usize,
+}
+
+impl fmt::Debug for Position {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(
+ f,
+ "Position(o: {:?}, l: {:?}, c: {:?})",
+ self.offset, self.line, self.column
+ )
+ }
+}
+
+impl Ord for Position {
+ fn cmp(&self, other: &Position) -> Ordering {
+ self.offset.cmp(&other.offset)
+ }
+}
+
+impl PartialOrd for Position {
+ fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Span {
+ /// Create a new span with the given positions.
+ pub fn new(start: Position, end: Position) -> Span {
+ Span { start: start, end: end }
+ }
+
+ /// Create a new span using the given position as the start and end.
+ pub fn splat(pos: Position) -> Span {
+ Span::new(pos, pos)
+ }
+
+ /// Create a new span by replacing the starting the position with the one
+ /// given.
+ pub fn with_start(self, pos: Position) -> Span {
+ Span { start: pos, ..self }
+ }
+
+ /// Create a new span by replacing the ending the position with the one
+ /// given.
+ pub fn with_end(self, pos: Position) -> Span {
+ Span { end: pos, ..self }
+ }
+
+ /// Returns true if and only if this span occurs on a single line.
+ pub fn is_one_line(&self) -> bool {
+ self.start.line == self.end.line
+ }
+
+ /// Returns true if and only if this span is empty. That is, it points to
+ /// a single position in the concrete syntax of a regular expression.
+ pub fn is_empty(&self) -> bool {
+ self.start.offset == self.end.offset
+ }
+}
+
+impl Position {
+ /// Create a new position with the given information.
+ ///
+ /// `offset` is the absolute offset of the position, starting at `0` from
+ /// the beginning of the regular expression pattern string.
+ ///
+ /// `line` is the line number, starting at `1`.
+ ///
+ /// `column` is the approximate column number, starting at `1`.
+ pub fn new(offset: usize, line: usize, column: usize) -> Position {
+ Position { offset: offset, line: line, column: column }
+ }
+}
+
+/// An abstract syntax tree for a singular expression along with comments
+/// found.
+///
+/// Comments are not stored in the tree itself to avoid complexity. Each
+/// comment contains a span of precisely where it occurred in the original
+/// regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct WithComments {
+ /// The actual ast.
+ pub ast: Ast,
+ /// All comments found in the original regular expression.
+ pub comments: Vec<Comment>,
+}
+
+/// A comment from a regular expression with an associated span.
+///
+/// A regular expression can only contain comments when the `x` flag is
+/// enabled.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Comment {
+ /// The span of this comment, including the beginning `#` and ending `\n`.
+ pub span: Span,
+ /// The comment text, starting with the first character following the `#`
+ /// and ending with the last character preceding the `\n`.
+ pub comment: String,
+}
+
+/// An abstract syntax tree for a single regular expression.
+///
+/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
+/// space proportional to the size of the `Ast`.
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the `Ast`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Ast {
+ /// An empty regex that matches everything.
+ Empty(Span),
+ /// A set of flags, e.g., `(?is)`.
+ Flags(SetFlags),
+ /// A single character literal, which includes escape sequences.
+ Literal(Literal),
+ /// The "any character" class.
+ Dot(Span),
+ /// A single zero-width assertion.
+ Assertion(Assertion),
+ /// A single character class. This includes all forms of character classes
+ /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
+ Class(Class),
+ /// A repetition operator applied to an arbitrary regular expression.
+ Repetition(Repetition),
+ /// A grouped regular expression.
+ Group(Group),
+ /// An alternation of regular expressions.
+ Alternation(Alternation),
+ /// A concatenation of regular expressions.
+ Concat(Concat),
+}
+
+impl Ast {
+ /// Return the span of this abstract syntax tree.
+ pub fn span(&self) -> &Span {
+ match *self {
+ Ast::Empty(ref span) => span,
+ Ast::Flags(ref x) => &x.span,
+ Ast::Literal(ref x) => &x.span,
+ Ast::Dot(ref span) => span,
+ Ast::Assertion(ref x) => &x.span,
+ Ast::Class(ref x) => x.span(),
+ Ast::Repetition(ref x) => &x.span,
+ Ast::Group(ref x) => &x.span,
+ Ast::Alternation(ref x) => &x.span,
+ Ast::Concat(ref x) => &x.span,
+ }
+ }
+
+ /// Return true if and only if this Ast is empty.
+ pub fn is_empty(&self) -> bool {
+ match *self {
+ Ast::Empty(_) => true,
+ _ => false,
+ }
+ }
+
+ /// Returns true if and only if this AST has any (including possibly empty)
+ /// subexpressions.
+ fn has_subexprs(&self) -> bool {
+ match *self {
+ Ast::Empty(_)
+ | Ast::Flags(_)
+ | Ast::Literal(_)
+ | Ast::Dot(_)
+ | Ast::Assertion(_) => false,
+ Ast::Class(_)
+ | Ast::Repetition(_)
+ | Ast::Group(_)
+ | Ast::Alternation(_)
+ | Ast::Concat(_) => true,
+ }
+ }
+}
+
+/// Print a display representation of this Ast.
+///
+/// This does not preserve any of the original whitespace formatting that may
+/// have originally been present in the concrete syntax from which this Ast
+/// was generated.
+///
+/// This implementation uses constant stack space and heap space proportional
+/// to the size of the `Ast`.
+impl fmt::Display for Ast {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ use crate::ast::print::Printer;
+ Printer::new().print(self, f)
+ }
+}
+
+/// An alternation of regular expressions.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Alternation {
+ /// The span of this alternation.
+ pub span: Span,
+ /// The alternate regular expressions.
+ pub asts: Vec<Ast>,
+}
+
+impl Alternation {
+ /// Return this alternation as an AST.
+ ///
+ /// If this alternation contains zero ASTs, then Ast::Empty is
+ /// returned. If this alternation contains exactly 1 AST, then the
+ /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
+ pub fn into_ast(mut self) -> Ast {
+ match self.asts.len() {
+ 0 => Ast::Empty(self.span),
+ 1 => self.asts.pop().unwrap(),
+ _ => Ast::Alternation(self),
+ }
+ }
+}
+
+/// A concatenation of regular expressions.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Concat {
+ /// The span of this concatenation.
+ pub span: Span,
+ /// The concatenation regular expressions.
+ pub asts: Vec<Ast>,
+}
+
+impl Concat {
+ /// Return this concatenation as an AST.
+ ///
+ /// If this concatenation contains zero ASTs, then Ast::Empty is
+ /// returned. If this concatenation contains exactly 1 AST, then the
+ /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
+ pub fn into_ast(mut self) -> Ast {
+ match self.asts.len() {
+ 0 => Ast::Empty(self.span),
+ 1 => self.asts.pop().unwrap(),
+ _ => Ast::Concat(self),
+ }
+ }
+}
+
+/// A single literal expression.
+///
+/// A literal corresponds to a single Unicode scalar value. Literals may be
+/// represented in their literal form, e.g., `a` or in their escaped form,
+/// e.g., `\x61`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Literal {
+ /// The span of this literal.
+ pub span: Span,
+ /// The kind of this literal.
+ pub kind: LiteralKind,
+ /// The Unicode scalar value corresponding to this literal.
+ pub c: char,
+}
+
+impl Literal {
+ /// If this literal was written as a `\x` hex escape, then this returns
+ /// the corresponding byte value. Otherwise, this returns `None`.
+ pub fn byte(&self) -> Option<u8> {
+ let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
+ if self.c as u32 <= 255 && self.kind == short_hex {
+ Some(self.c as u8)
+ } else {
+ None
+ }
+ }
+}
+
+/// The kind of a single literal expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum LiteralKind {
+ /// The literal is written verbatim, e.g., `a` or `☃`.
+ Verbatim,
+ /// The literal is written as an escape because it is punctuation, e.g.,
+ /// `\*` or `\[`.
+ Punctuation,
+ /// The literal is written as an octal escape, e.g., `\141`.
+ Octal,
+ /// The literal is written as a hex code with a fixed number of digits
+ /// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
+ /// `\U00000061`.
+ HexFixed(HexLiteralKind),
+ /// The literal is written as a hex code with a bracketed number of
+ /// digits. The only restriction is that the bracketed hex code must refer
+ /// to a valid Unicode scalar value.
+ HexBrace(HexLiteralKind),
+ /// The literal is written as a specially recognized escape, e.g., `\f`
+ /// or `\n`.
+ Special(SpecialLiteralKind),
+}
+
+/// The type of a special literal.
+///
+/// A special literal is a special escape sequence recognized by the regex
+/// parser, e.g., `\f` or `\n`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum SpecialLiteralKind {
+ /// Bell, spelled `\a` (`\x07`).
+ Bell,
+ /// Form feed, spelled `\f` (`\x0C`).
+ FormFeed,
+ /// Tab, spelled `\t` (`\x09`).
+ Tab,
+ /// Line feed, spelled `\n` (`\x0A`).
+ LineFeed,
+ /// Carriage return, spelled `\r` (`\x0D`).
+ CarriageReturn,
+ /// Vertical tab, spelled `\v` (`\x0B`).
+ VerticalTab,
+ /// Space, spelled `\ ` (`\x20`). Note that this can only appear when
+ /// parsing in verbose mode.
+ Space,
+}
+
+/// The type of a Unicode hex literal.
+///
+/// Note that all variants behave the same when used with brackets. They only
+/// differ when used without brackets in the number of hex digits that must
+/// follow.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HexLiteralKind {
+ /// A `\x` prefix. When used without brackets, this form is limited to
+ /// two digits.
+ X,
+ /// A `\u` prefix. When used without brackets, this form is limited to
+ /// four digits.
+ UnicodeShort,
+ /// A `\U` prefix. When used without brackets, this form is limited to
+ /// eight digits.
+ UnicodeLong,
+}
+
+impl HexLiteralKind {
+ /// The number of digits that must be used with this literal form when
+ /// used without brackets. When used with brackets, there is no
+ /// restriction on the number of digits.
+ pub fn digits(&self) -> u32 {
+ match *self {
+ HexLiteralKind::X => 2,
+ HexLiteralKind::UnicodeShort => 4,
+ HexLiteralKind::UnicodeLong => 8,
+ }
+ }
+}
+
+/// A single character class expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Class {
+ /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
+ Unicode(ClassUnicode),
+ /// A perl character class, e.g., `\d` or `\W`.
+ Perl(ClassPerl),
+ /// A bracketed character class set, which may contain zero or more
+ /// character ranges and/or zero or more nested classes. e.g.,
+ /// `[a-zA-Z\pL]`.
+ Bracketed(ClassBracketed),
+}
+
+impl Class {
+ /// Return the span of this character class.
+ pub fn span(&self) -> &Span {
+ match *self {
+ Class::Perl(ref x) => &x.span,
+ Class::Unicode(ref x) => &x.span,
+ Class::Bracketed(ref x) => &x.span,
+ }
+ }
+}
+
+/// A Perl character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassPerl {
+ /// The span of this class.
+ pub span: Span,
+ /// The kind of Perl class.
+ pub kind: ClassPerlKind,
+ /// Whether the class is negated or not. e.g., `\d` is not negated but
+ /// `\D` is.
+ pub negated: bool,
+}
+
+/// The available Perl character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassPerlKind {
+ /// Decimal numbers.
+ Digit,
+ /// Whitespace.
+ Space,
+ /// Word characters.
+ Word,
+}
+
+/// An ASCII character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassAscii {
+ /// The span of this class.
+ pub span: Span,
+ /// The kind of ASCII class.
+ pub kind: ClassAsciiKind,
+ /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
+ /// but `[[:^alpha:]]` is.
+ pub negated: bool,
+}
+
+/// The available ASCII character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassAsciiKind {
+ /// `[0-9A-Za-z]`
+ Alnum,
+ /// `[A-Za-z]`
+ Alpha,
+ /// `[\x00-\x7F]`
+ Ascii,
+ /// `[ \t]`
+ Blank,
+ /// `[\x00-\x1F\x7F]`
+ Cntrl,
+ /// `[0-9]`
+ Digit,
+ /// `[!-~]`
+ Graph,
+ /// `[a-z]`
+ Lower,
+ /// `[ -~]`
+ Print,
+ /// `[!-/:-@\[-`{-~]`
+ Punct,
+ /// `[\t\n\v\f\r ]`
+ Space,
+ /// `[A-Z]`
+ Upper,
+ /// `[0-9A-Za-z_]`
+ Word,
+ /// `[0-9A-Fa-f]`
+ Xdigit,
+}
+
+impl ClassAsciiKind {
+ /// Return the corresponding ClassAsciiKind variant for the given name.
+ ///
+ /// The name given should correspond to the lowercase version of the
+ /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
+ ///
+ /// If no variant with the corresponding name exists, then `None` is
+ /// returned.
+ pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
+ use self::ClassAsciiKind::*;
+ match name {
+ "alnum" => Some(Alnum),
+ "alpha" => Some(Alpha),
+ "ascii" => Some(Ascii),
+ "blank" => Some(Blank),
+ "cntrl" => Some(Cntrl),
+ "digit" => Some(Digit),
+ "graph" => Some(Graph),
+ "lower" => Some(Lower),
+ "print" => Some(Print),
+ "punct" => Some(Punct),
+ "space" => Some(Space),
+ "upper" => Some(Upper),
+ "word" => Some(Word),
+ "xdigit" => Some(Xdigit),
+ _ => None,
+ }
+ }
+}
+
+/// A Unicode character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+ /// The span of this class.
+ pub span: Span,
+ /// Whether this class is negated or not.
+ ///
+ /// Note: be careful when using this attribute. This specifically refers
+ /// to whether the class is written as `\p` or `\P`, where the latter
+ /// is `negated = true`. However, it also possible to write something like
+ /// `\P{scx!=Katakana}` which is actually equivalent to
+ /// `\p{scx=Katakana}` and is therefore not actually negated even though
+ /// `negated = true` here. To test whether this class is truly negated
+ /// or not, use the `is_negated` method.
+ pub negated: bool,
+ /// The kind of Unicode class.
+ pub kind: ClassUnicodeKind,
+}
+
+impl ClassUnicode {
+ /// Returns true if this class has been negated.
+ ///
+ /// Note that this takes the Unicode op into account, if it's present.
+ /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
+ pub fn is_negated(&self) -> bool {
+ match self.kind {
+ ClassUnicodeKind::NamedValue {
+ op: ClassUnicodeOpKind::NotEqual,
+ ..
+ } => !self.negated,
+ _ => self.negated,
+ }
+ }
+}
+
+/// The available forms of Unicode character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassUnicodeKind {
+ /// A one letter abbreviated class, e.g., `\pN`.
+ OneLetter(char),
+ /// A binary property, general category or script. The string may be
+ /// empty.
+ Named(String),
+ /// A property name and an associated value.
+ NamedValue {
+ /// The type of Unicode op used to associate `name` with `value`.
+ op: ClassUnicodeOpKind,
+ /// The property name (which may be empty).
+ name: String,
+ /// The property value (which may be empty).
+ value: String,
+ },
+}
+
+/// The type of op used in a Unicode character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassUnicodeOpKind {
+ /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
+ Equal,
+ /// A property set to a specific value using a colon, e.g.,
+ /// `\p{scx:Katakana}`.
+ Colon,
+ /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
+ NotEqual,
+}
+
+impl ClassUnicodeOpKind {
+ /// Whether the op is an equality op or not.
+ pub fn is_equal(&self) -> bool {
+ match *self {
+ ClassUnicodeOpKind::Equal | ClassUnicodeOpKind::Colon => true,
+ _ => false,
+ }
+ }
+}
+
+/// A bracketed character class, e.g., `[a-z0-9]`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassBracketed {
+ /// The span of this class.
+ pub span: Span,
+ /// Whether this class is negated or not. e.g., `[a]` is not negated but
+ /// `[^a]` is.
+ pub negated: bool,
+ /// The type of this set. A set is either a normal union of things, e.g.,
+ /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
+ pub kind: ClassSet,
+}
+
+/// A character class set.
+///
+/// This type corresponds to the internal structure of a bracketed character
+/// class. That is, every bracketed character is one of two types: a union of
+/// items (literals, ranges, other bracketed classes) or a tree of binary set
+/// operations.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassSet {
+ /// An item, which can be a single literal, range, nested character class
+ /// or a union of items.
+ Item(ClassSetItem),
+ /// A single binary operation (i.e., &&, -- or ~~).
+ BinaryOp(ClassSetBinaryOp),
+}
+
+impl ClassSet {
+ /// Build a set from a union.
+ pub fn union(ast: ClassSetUnion) -> ClassSet {
+ ClassSet::Item(ClassSetItem::Union(ast))
+ }
+
+ /// Return the span of this character class set.
+ pub fn span(&self) -> &Span {
+ match *self {
+ ClassSet::Item(ref x) => x.span(),
+ ClassSet::BinaryOp(ref x) => &x.span,
+ }
+ }
+
+ /// Return true if and only if this class set is empty.
+ fn is_empty(&self) -> bool {
+ match *self {
+ ClassSet::Item(ClassSetItem::Empty(_)) => true,
+ _ => false,
+ }
+ }
+}
+
+/// A single component of a character class set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassSetItem {
+ /// An empty item.
+ ///
+ /// Note that a bracketed character class cannot contain a single empty
+ /// item. Empty items can appear when using one of the binary operators.
+ /// For example, `[&&]` is the intersection of two empty classes.
+ Empty(Span),
+ /// A single literal.
+ Literal(Literal),
+ /// A range between two literals.
+ Range(ClassSetRange),
+ /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
+ Ascii(ClassAscii),
+ /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
+ Unicode(ClassUnicode),
+ /// A perl character class, e.g., `\d` or `\W`.
+ Perl(ClassPerl),
+ /// A bracketed character class set, which may contain zero or more
+ /// character ranges and/or zero or more nested classes. e.g.,
+ /// `[a-zA-Z\pL]`.
+ Bracketed(Box<ClassBracketed>),
+ /// A union of items.
+ Union(ClassSetUnion),
+}
+
+impl ClassSetItem {
+ /// Return the span of this character class set item.
+ pub fn span(&self) -> &Span {
+ match *self {
+ ClassSetItem::Empty(ref span) => span,
+ ClassSetItem::Literal(ref x) => &x.span,
+ ClassSetItem::Range(ref x) => &x.span,
+ ClassSetItem::Ascii(ref x) => &x.span,
+ ClassSetItem::Perl(ref x) => &x.span,
+ ClassSetItem::Unicode(ref x) => &x.span,
+ ClassSetItem::Bracketed(ref x) => &x.span,
+ ClassSetItem::Union(ref x) => &x.span,
+ }
+ }
+}
+
+/// A single character class range in a set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetRange {
+ /// The span of this range.
+ pub span: Span,
+ /// The start of this range.
+ pub start: Literal,
+ /// The end of this range.
+ pub end: Literal,
+}
+
+impl ClassSetRange {
+ /// Returns true if and only if this character class range is valid.
+ ///
+ /// The only case where a range is invalid is if its start is greater than
+ /// its end.
+ pub fn is_valid(&self) -> bool {
+ self.start.c <= self.end.c
+ }
+}
+
+/// A union of items inside a character class set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetUnion {
+ /// The span of the items in this operation. e.g., the `a-z0-9` in
+ /// `[^a-z0-9]`
+ pub span: Span,
+ /// The sequence of items that make up this union.
+ pub items: Vec<ClassSetItem>,
+}
+
+impl ClassSetUnion {
+ /// Push a new item in this union.
+ ///
+ /// The ending position of this union's span is updated to the ending
+ /// position of the span of the item given. If the union is empty, then
+ /// the starting position of this union is set to the starting position
+ /// of this item.
+ ///
+ /// In other words, if you only use this method to add items to a union
+ /// and you set the spans on each item correctly, then you should never
+ /// need to adjust the span of the union directly.
+ pub fn push(&mut self, item: ClassSetItem) {
+ if self.items.is_empty() {
+ self.span.start = item.span().start;
+ }
+ self.span.end = item.span().end;
+ self.items.push(item);
+ }
+
+ /// Return this union as a character class set item.
+ ///
+ /// If this union contains zero items, then an empty union is
+ /// returned. If this concatenation contains exactly 1 item, then the
+ /// corresponding item is returned. Otherwise, ClassSetItem::Union is
+ /// returned.
+ pub fn into_item(mut self) -> ClassSetItem {
+ match self.items.len() {
+ 0 => ClassSetItem::Empty(self.span),
+ 1 => self.items.pop().unwrap(),
+ _ => ClassSetItem::Union(self),
+ }
+ }
+}
+
+/// A Unicode character class set operation.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetBinaryOp {
+ /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
+ pub span: Span,
+ /// The type of this set operation.
+ pub kind: ClassSetBinaryOpKind,
+ /// The left hand side of the operation.
+ pub lhs: Box<ClassSet>,
+ /// The right hand side of the operation.
+ pub rhs: Box<ClassSet>,
+}
+
+/// The type of a Unicode character class set operation.
+///
+/// Note that this doesn't explicitly represent union since there is no
+/// explicit union operator. Concatenation inside a character class corresponds
+/// to the union operation.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum ClassSetBinaryOpKind {
+ /// The intersection of two sets, e.g., `\pN&&[a-z]`.
+ Intersection,
+ /// The difference of two sets, e.g., `\pN--[0-9]`.
+ Difference,
+ /// The symmetric difference of two sets. The symmetric difference is the
+ /// set of elements belonging to one but not both sets.
+ /// e.g., `[\pL~~[:ascii:]]`.
+ SymmetricDifference,
+}
+
+/// A single zero-width assertion.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Assertion {
+ /// The span of this assertion.
+ pub span: Span,
+ /// The assertion kind, e.g., `\b` or `^`.
+ pub kind: AssertionKind,
+}
+
+/// An assertion kind.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum AssertionKind {
+ /// `^`
+ StartLine,
+ /// `$`
+ EndLine,
+ /// `\A`
+ StartText,
+ /// `\z`
+ EndText,
+ /// `\b`
+ WordBoundary,
+ /// `\B`
+ NotWordBoundary,
+}
+
+/// A repetition operation applied to a regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+ /// The span of this operation.
+ pub span: Span,
+ /// The actual operation.
+ pub op: RepetitionOp,
+ /// Whether this operation was applied greedily or not.
+ pub greedy: bool,
+ /// The regular expression under repetition.
+ pub ast: Box<Ast>,
+}
+
+/// The repetition operator itself.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct RepetitionOp {
+ /// The span of this operator. This includes things like `+`, `*?` and
+ /// `{m,n}`.
+ pub span: Span,
+ /// The type of operation.
+ pub kind: RepetitionKind,
+}
+
+/// The kind of a repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionKind {
+ /// `?`
+ ZeroOrOne,
+ /// `*`
+ ZeroOrMore,
+ /// `+`
+ OneOrMore,
+ /// `{m,n}`
+ Range(RepetitionRange),
+}
+
+/// A range repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionRange {
+ /// `{m}`
+ Exactly(u32),
+ /// `{m,}`
+ AtLeast(u32),
+ /// `{m,n}`
+ Bounded(u32, u32),
+}
+
+impl RepetitionRange {
+ /// Returns true if and only if this repetition range is valid.
+ ///
+ /// The only case where a repetition range is invalid is if it is bounded
+ /// and its start is greater than its end.
+ pub fn is_valid(&self) -> bool {
+ match *self {
+ RepetitionRange::Bounded(s, e) if s > e => false,
+ _ => true,
+ }
+ }
+}
+
+/// A grouped regular expression.
+///
+/// This includes both capturing and non-capturing groups. This does **not**
+/// include flag-only groups like `(?is)`, but does contain any group that
+/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
+/// `(?is:a)`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Group {
+ /// The span of this group.
+ pub span: Span,
+ /// The kind of this group.
+ pub kind: GroupKind,
+ /// The regular expression in this group.
+ pub ast: Box<Ast>,
+}
+
+impl Group {
+ /// If this group is non-capturing, then this returns the (possibly empty)
+ /// set of flags. Otherwise, `None` is returned.
+ pub fn flags(&self) -> Option<&Flags> {
+ match self.kind {
+ GroupKind::NonCapturing(ref flags) => Some(flags),
+ _ => None,
+ }
+ }
+
+ /// Returns true if and only if this group is capturing.
+ pub fn is_capturing(&self) -> bool {
+ match self.kind {
+ GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true,
+ GroupKind::NonCapturing(_) => false,
+ }
+ }
+
+ /// Returns the capture index of this group, if this is a capturing group.
+ ///
+ /// This returns a capture index precisely when `is_capturing` is `true`.
+ pub fn capture_index(&self) -> Option<u32> {
+ match self.kind {
+ GroupKind::CaptureIndex(i) => Some(i),
+ GroupKind::CaptureName(ref x) => Some(x.index),
+ GroupKind::NonCapturing(_) => None,
+ }
+ }
+}
+
+/// The kind of a group.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum GroupKind {
+ /// `(a)`
+ CaptureIndex(u32),
+ /// `(?P<name>a)`
+ CaptureName(CaptureName),
+ /// `(?:a)` and `(?i:a)`
+ NonCapturing(Flags),
+}
+
+/// A capture name.
+///
+/// This corresponds to the name itself between the angle brackets in, e.g.,
+/// `(?P<foo>expr)`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct CaptureName {
+ /// The span of this capture name.
+ pub span: Span,
+ /// The capture name.
+ pub name: String,
+ /// The capture index.
+ pub index: u32,
+}
+
+/// A group of flags that is not applied to a particular regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SetFlags {
+ /// The span of these flags, including the grouping parentheses.
+ pub span: Span,
+ /// The actual sequence of flags.
+ pub flags: Flags,
+}
+
+/// A group of flags.
+///
+/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Flags {
+ /// The span of this group of flags.
+ pub span: Span,
+ /// A sequence of flag items. Each item is either a flag or a negation
+ /// operator.
+ pub items: Vec<FlagsItem>,
+}
+
+impl Flags {
+ /// Add the given item to this sequence of flags.
+ ///
+ /// If the item was added successfully, then `None` is returned. If the
+ /// given item is a duplicate, then `Some(i)` is returned, where
+ /// `items[i].kind == item.kind`.
+ pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
+ for (i, x) in self.items.iter().enumerate() {
+ if x.kind == item.kind {
+ return Some(i);
+ }
+ }
+ self.items.push(item);
+ None
+ }
+
+ /// Returns the state of the given flag in this set.
+ ///
+ /// If the given flag is in the set but is negated, then `Some(false)` is
+ /// returned.
+ ///
+ /// If the given flag is in the set and is not negated, then `Some(true)`
+ /// is returned.
+ ///
+ /// Otherwise, `None` is returned.
+ pub fn flag_state(&self, flag: Flag) -> Option<bool> {
+ let mut negated = false;
+ for x in &self.items {
+ match x.kind {
+ FlagsItemKind::Negation => {
+ negated = true;
+ }
+ FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
+ return Some(!negated);
+ }
+ _ => {}
+ }
+ }
+ None
+ }
+}
+
+/// A single item in a group of flags.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct FlagsItem {
+ /// The span of this item.
+ pub span: Span,
+ /// The kind of this item.
+ pub kind: FlagsItemKind,
+}
+
+/// The kind of an item in a group of flags.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum FlagsItemKind {
+ /// A negation operator applied to all subsequent flags in the enclosing
+ /// group.
+ Negation,
+ /// A single flag in a group.
+ Flag(Flag),
+}
+
+impl FlagsItemKind {
+ /// Returns true if and only if this item is a negation operator.
+ pub fn is_negation(&self) -> bool {
+ match *self {
+ FlagsItemKind::Negation => true,
+ _ => false,
+ }
+ }
+}
+
+/// A single flag.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Flag {
+ /// `i`
+ CaseInsensitive,
+ /// `m`
+ MultiLine,
+ /// `s`
+ DotMatchesNewLine,
+ /// `U`
+ SwapGreed,
+ /// `u`
+ Unicode,
+ /// `x`
+ IgnoreWhitespace,
+}
+
+/// A custom `Drop` impl is used for `Ast` such that it uses constant stack
+/// space but heap space proportional to the depth of the `Ast`.
+impl Drop for Ast {
+ fn drop(&mut self) {
+ use std::mem;
+
+ match *self {
+ Ast::Empty(_)
+ | Ast::Flags(_)
+ | Ast::Literal(_)
+ | Ast::Dot(_)
+ | Ast::Assertion(_)
+ // Classes are recursive, so they get their own Drop impl.
+ | Ast::Class(_) => return,
+ Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
+ Ast::Group(ref x) if !x.ast.has_subexprs() => return,
+ Ast::Alternation(ref x) if x.asts.is_empty() => return,
+ Ast::Concat(ref x) if x.asts.is_empty() => return,
+ _ => {}
+ }
+
+ let empty_span = || Span::splat(Position::new(0, 0, 0));
+ let empty_ast = || Ast::Empty(empty_span());
+ let mut stack = vec![mem::replace(self, empty_ast())];
+ while let Some(mut ast) = stack.pop() {
+ match ast {
+ Ast::Empty(_)
+ | Ast::Flags(_)
+ | Ast::Literal(_)
+ | Ast::Dot(_)
+ | Ast::Assertion(_)
+ // Classes are recursive, so they get their own Drop impl.
+ | Ast::Class(_) => {}
+ Ast::Repetition(ref mut x) => {
+ stack.push(mem::replace(&mut x.ast, empty_ast()));
+ }
+ Ast::Group(ref mut x) => {
+ stack.push(mem::replace(&mut x.ast, empty_ast()));
+ }
+ Ast::Alternation(ref mut x) => {
+ stack.extend(x.asts.drain(..));
+ }
+ Ast::Concat(ref mut x) => {
+ stack.extend(x.asts.drain(..));
+ }
+ }
+ }
+ }
+}
+
+/// A custom `Drop` impl is used for `ClassSet` such that it uses constant
+/// stack space but heap space proportional to the depth of the `ClassSet`.
+impl Drop for ClassSet {
+ fn drop(&mut self) {
+ use std::mem;
+
+ match *self {
+ ClassSet::Item(ref item) => match *item {
+ ClassSetItem::Empty(_)
+ | ClassSetItem::Literal(_)
+ | ClassSetItem::Range(_)
+ | ClassSetItem::Ascii(_)
+ | ClassSetItem::Unicode(_)
+ | ClassSetItem::Perl(_) => return,
+ ClassSetItem::Bracketed(ref x) => {
+ if x.kind.is_empty() {
+ return;
+ }
+ }
+ ClassSetItem::Union(ref x) => {
+ if x.items.is_empty() {
+ return;
+ }
+ }
+ },
+ ClassSet::BinaryOp(ref op) => {
+ if op.lhs.is_empty() && op.rhs.is_empty() {
+ return;
+ }
+ }
+ }
+
+ let empty_span = || Span::splat(Position::new(0, 0, 0));
+ let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span()));
+ let mut stack = vec![mem::replace(self, empty_set())];
+ while let Some(mut set) = stack.pop() {
+ match set {
+ ClassSet::Item(ref mut item) => match *item {
+ ClassSetItem::Empty(_)
+ | ClassSetItem::Literal(_)
+ | ClassSetItem::Range(_)
+ | ClassSetItem::Ascii(_)
+ | ClassSetItem::Unicode(_)
+ | ClassSetItem::Perl(_) => {}
+ ClassSetItem::Bracketed(ref mut x) => {
+ stack.push(mem::replace(&mut x.kind, empty_set()));
+ }
+ ClassSetItem::Union(ref mut x) => {
+ stack.extend(x.items.drain(..).map(ClassSet::Item));
+ }
+ },
+ ClassSet::BinaryOp(ref mut op) => {
+ stack.push(mem::replace(&mut op.lhs, empty_set()));
+ stack.push(mem::replace(&mut op.rhs, empty_set()));
+ }
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // We use a thread with an explicit stack size to test that our destructor
+ // for Ast can handle arbitrarily sized expressions in constant stack
+ // space. In case we run on a platform without threads (WASM?), we limit
+ // this test to Windows/Unix.
+ #[test]
+ #[cfg(any(unix, windows))]
+ fn no_stack_overflow_on_drop() {
+ use std::thread;
+
+ let run = || {
+ let span = || Span::splat(Position::new(0, 0, 0));
+ let mut ast = Ast::Empty(span());
+ for i in 0..200 {
+ ast = Ast::Group(Group {
+ span: span(),
+ kind: GroupKind::CaptureIndex(i),
+ ast: Box::new(ast),
+ });
+ }
+ assert!(!ast.is_empty());
+ };
+
+ // We run our test on a thread with a small stack size so we can
+ // force the issue more easily.
+ thread::Builder::new()
+ .stack_size(1 << 10)
+ .spawn(run)
+ .unwrap()
+ .join()
+ .unwrap();
+ }
+}
diff --git a/vendor/regex-syntax/src/ast/parse.rs b/vendor/regex-syntax/src/ast/parse.rs
new file mode 100644
index 000000000..9824661c9
--- /dev/null
+++ b/vendor/regex-syntax/src/ast/parse.rs
@@ -0,0 +1,5944 @@
+/*!
+This module provides a regular expression parser.
+*/
+
+use std::borrow::Borrow;
+use std::cell::{Cell, RefCell};
+use std::mem;
+use std::result;
+
+use crate::ast::{self, Ast, Position, Span};
+use crate::either::Either;
+
+use crate::is_meta_character;
+
+type Result<T> = result::Result<T, ast::Error>;
+
+/// A primitive is an expression with no sub-expressions. This includes
+/// literals, assertions and non-set character classes. This representation
+/// is used as intermediate state in the parser.
+///
+/// This does not include ASCII character classes, since they can only appear
+/// within a set character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum Primitive {
+ Literal(ast::Literal),
+ Assertion(ast::Assertion),
+ Dot(Span),
+ Perl(ast::ClassPerl),
+ Unicode(ast::ClassUnicode),
+}
+
+impl Primitive {
+ /// Return the span of this primitive.
+ fn span(&self) -> &Span {
+ match *self {
+ Primitive::Literal(ref x) => &x.span,
+ Primitive::Assertion(ref x) => &x.span,
+ Primitive::Dot(ref span) => span,
+ Primitive::Perl(ref x) => &x.span,
+ Primitive::Unicode(ref x) => &x.span,
+ }
+ }
+
+ /// Convert this primitive into a proper AST.
+ fn into_ast(self) -> Ast {
+ match self {
+ Primitive::Literal(lit) => Ast::Literal(lit),
+ Primitive::Assertion(assert) => Ast::Assertion(assert),
+ Primitive::Dot(span) => Ast::Dot(span),
+ Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
+ Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
+ }
+ }
+
+ /// Convert this primitive into an item in a character class.
+ ///
+ /// If this primitive is not a legal item (i.e., an assertion or a dot),
+ /// then return an error.
+ fn into_class_set_item<P: Borrow<Parser>>(
+ self,
+ p: &ParserI<'_, P>,
+ ) -> Result<ast::ClassSetItem> {
+ use self::Primitive::*;
+ use crate::ast::ClassSetItem;
+
+ match self {
+ Literal(lit) => Ok(ClassSetItem::Literal(lit)),
+ Perl(cls) => Ok(ClassSetItem::Perl(cls)),
+ Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
+ x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
+ }
+ }
+
+ /// Convert this primitive into a literal in a character class. In
+ /// particular, literals are the only valid items that can appear in
+ /// ranges.
+ ///
+ /// If this primitive is not a legal item (i.e., a class, assertion or a
+ /// dot), then return an error.
+ fn into_class_literal<P: Borrow<Parser>>(
+ self,
+ p: &ParserI<'_, P>,
+ ) -> Result<ast::Literal> {
+ use self::Primitive::*;
+
+ match self {
+ Literal(lit) => Ok(lit),
+ x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
+ }
+ }
+}
+
+/// Returns true if the given character is a hexadecimal digit.
+fn is_hex(c: char) -> bool {
+ ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
+}
+
+/// Returns true if the given character is a valid in a capture group name.
+///
+/// If `first` is true, then `c` is treated as the first character in the
+/// group name (which must be alphabetic or underscore).
+fn is_capture_char(c: char, first: bool) -> bool {
+ c == '_'
+ || (!first
+ && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
+ || ('A' <= c && c <= 'Z')
+ || ('a' <= c && c <= 'z')
+}
+
+/// A builder for a regular expression parser.
+///
+/// This builder permits modifying configuration options for the parser.
+#[derive(Clone, Debug)]
+pub struct ParserBuilder {
+ ignore_whitespace: bool,
+ nest_limit: u32,
+ octal: bool,
+}
+
+impl Default for ParserBuilder {
+ fn default() -> ParserBuilder {
+ ParserBuilder::new()
+ }
+}
+
+impl ParserBuilder {
+ /// Create a new parser builder with a default configuration.
+ pub fn new() -> ParserBuilder {
+ ParserBuilder {
+ ignore_whitespace: false,
+ nest_limit: 250,
+ octal: false,
+ }
+ }
+
+ /// Build a parser from this configuration with the given pattern.
+ pub fn build(&self) -> Parser {
+ Parser {
+ pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
+ capture_index: Cell::new(0),
+ nest_limit: self.nest_limit,
+ octal: self.octal,
+ initial_ignore_whitespace: self.ignore_whitespace,
+ ignore_whitespace: Cell::new(self.ignore_whitespace),
+ comments: RefCell::new(vec![]),
+ stack_group: RefCell::new(vec![]),
+ stack_class: RefCell::new(vec![]),
+ capture_names: RefCell::new(vec![]),
+ scratch: RefCell::new(String::new()),
+ }
+ }
+
+ /// Set the nesting limit for this parser.
+ ///
+ /// The nesting limit controls how deep the abstract syntax tree is allowed
+ /// to be. If the AST exceeds the given limit (e.g., with too many nested
+ /// groups), then an error is returned by the parser.
+ ///
+ /// The purpose of this limit is to act as a heuristic to prevent stack
+ /// overflow for consumers that do structural induction on an `Ast` using
+ /// explicit recursion. While this crate never does this (instead using
+ /// constant stack space and moving the call stack to the heap), other
+ /// crates may.
+ ///
+ /// This limit is not checked until the entire Ast is parsed. Therefore,
+ /// if callers want to put a limit on the amount of heap space used, then
+ /// they should impose a limit on the length, in bytes, of the concrete
+ /// pattern string. In particular, this is viable since this parser
+ /// implementation will limit itself to heap space proportional to the
+ /// length of the pattern string.
+ ///
+ /// Note that a nest limit of `0` will return a nest limit error for most
+ /// patterns but not all. For example, a nest limit of `0` permits `a` but
+ /// not `ab`, since `ab` requires a concatenation, which results in a nest
+ /// depth of `1`. In general, a nest limit is not something that manifests
+ /// in an obvious way in the concrete syntax, therefore, it should not be
+ /// used in a granular way.
+ pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
+ self.nest_limit = limit;
+ self
+ }
+
+ /// Whether to support octal syntax or not.
+ ///
+ /// Octal syntax is a little-known way of uttering Unicode codepoints in
+ /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+ /// `\141` are all equivalent regular expressions, where the last example
+ /// shows octal syntax.
+ ///
+ /// While supporting octal syntax isn't in and of itself a problem, it does
+ /// make good error messages harder. That is, in PCRE based regex engines,
+ /// syntax like `\0` invokes a backreference, which is explicitly
+ /// unsupported in Rust's regex engine. However, many users expect it to
+ /// be supported. Therefore, when octal support is disabled, the error
+ /// message will explicitly mention that backreferences aren't supported.
+ ///
+ /// Octal syntax is disabled by default.
+ pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.octal = yes;
+ self
+ }
+
+ /// Enable verbose mode in the regular expression.
+ ///
+ /// When enabled, verbose mode permits insigificant whitespace in many
+ /// places in the regular expression, as well as comments. Comments are
+ /// started using `#` and continue until the end of the line.
+ ///
+ /// By default, this is disabled. It may be selectively enabled in the
+ /// regular expression by using the `x` flag regardless of this setting.
+ pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.ignore_whitespace = yes;
+ self
+ }
+}
+
+/// A regular expression parser.
+///
+/// This parses a string representation of a regular expression into an
+/// abstract syntax tree. The size of the tree is proportional to the length
+/// of the regular expression pattern.
+///
+/// A `Parser` can be configured in more detail via a
+/// [`ParserBuilder`](struct.ParserBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Parser {
+ /// The current position of the parser.
+ pos: Cell<Position>,
+ /// The current capture index.
+ capture_index: Cell<u32>,
+ /// The maximum number of open parens/brackets allowed. If the parser
+ /// exceeds this number, then an error is returned.
+ nest_limit: u32,
+ /// Whether to support octal syntax or not. When `false`, the parser will
+ /// return an error helpfully pointing out that backreferences are not
+ /// supported.
+ octal: bool,
+ /// The initial setting for `ignore_whitespace` as provided by
+ /// `ParserBuilder`. It is used when resetting the parser's state.
+ initial_ignore_whitespace: bool,
+ /// Whether whitespace should be ignored. When enabled, comments are
+ /// also permitted.
+ ignore_whitespace: Cell<bool>,
+ /// A list of comments, in order of appearance.
+ comments: RefCell<Vec<ast::Comment>>,
+ /// A stack of grouped sub-expressions, including alternations.
+ stack_group: RefCell<Vec<GroupState>>,
+ /// A stack of nested character classes. This is only non-empty when
+ /// parsing a class.
+ stack_class: RefCell<Vec<ClassState>>,
+ /// A sorted sequence of capture names. This is used to detect duplicate
+ /// capture names and report an error if one is detected.
+ capture_names: RefCell<Vec<ast::CaptureName>>,
+ /// A scratch buffer used in various places. Mostly this is used to
+ /// accumulate relevant characters from parts of a pattern.
+ scratch: RefCell<String>,
+}
+
+/// ParserI is the internal parser implementation.
+///
+/// We use this separate type so that we can carry the provided pattern string
+/// along with us. In particular, a `Parser` internal state is not tied to any
+/// one pattern, but `ParserI` is.
+///
+/// This type also lets us use `ParserI<&Parser>` in production code while
+/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
+/// work against the internal interface of the parser.
+#[derive(Clone, Debug)]
+struct ParserI<'s, P> {
+ /// The parser state/configuration.
+ parser: P,
+ /// The full regular expression provided by the user.
+ pattern: &'s str,
+}
+
+/// GroupState represents a single stack frame while parsing nested groups
+/// and alternations. Each frame records the state up to an opening parenthesis
+/// or a alternating bracket `|`.
+#[derive(Clone, Debug)]
+enum GroupState {
+ /// This state is pushed whenever an opening group is found.
+ Group {
+ /// The concatenation immediately preceding the opening group.
+ concat: ast::Concat,
+ /// The group that has been opened. Its sub-AST is always empty.
+ group: ast::Group,
+ /// Whether this group has the `x` flag enabled or not.
+ ignore_whitespace: bool,
+ },
+ /// This state is pushed whenever a new alternation branch is found. If
+ /// an alternation branch is found and this state is at the top of the
+ /// stack, then this state should be modified to include the new
+ /// alternation.
+ Alternation(ast::Alternation),
+}
+
+/// ClassState represents a single stack frame while parsing character classes.
+/// Each frame records the state up to an intersection, difference, symmetric
+/// difference or nested class.
+///
+/// Note that a parser's character class stack is only non-empty when parsing
+/// a character class. In all other cases, it is empty.
+#[derive(Clone, Debug)]
+enum ClassState {
+ /// This state is pushed whenever an opening bracket is found.
+ Open {
+ /// The union of class items immediately preceding this class.
+ union: ast::ClassSetUnion,
+ /// The class that has been opened. Typically this just corresponds
+ /// to the `[`, but it can also include `[^` since `^` indicates
+ /// negation of the class.
+ set: ast::ClassBracketed,
+ },
+ /// This state is pushed when a operator is seen. When popped, the stored
+ /// set becomes the left hand side of the operator.
+ Op {
+ /// The type of the operation, i.e., &&, -- or ~~.
+ kind: ast::ClassSetBinaryOpKind,
+ /// The left-hand side of the operator.
+ lhs: ast::ClassSet,
+ },
+}
+
+impl Parser {
+ /// Create a new parser with a default configuration.
+ ///
+ /// The parser can be run with either the `parse` or `parse_with_comments`
+ /// methods. The parse methods return an abstract syntax tree.
+ ///
+ /// To set configuration options on the parser, use
+ /// [`ParserBuilder`](struct.ParserBuilder.html).
+ pub fn new() -> Parser {
+ ParserBuilder::new().build()
+ }
+
+ /// Parse the regular expression into an abstract syntax tree.
+ pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
+ ParserI::new(self, pattern).parse()
+ }
+
+ /// Parse the regular expression and return an abstract syntax tree with
+ /// all of the comments found in the pattern.
+ pub fn parse_with_comments(
+ &mut self,
+ pattern: &str,
+ ) -> Result<ast::WithComments> {
+ ParserI::new(self, pattern).parse_with_comments()
+ }
+
+ /// Reset the internal state of a parser.
+ ///
+ /// This is called at the beginning of every parse. This prevents the
+ /// parser from running with inconsistent state (say, if a previous
+ /// invocation returned an error and the parser is reused).
+ fn reset(&self) {
+ // These settings should be in line with the construction
+ // in `ParserBuilder::build`.
+ self.pos.set(Position { offset: 0, line: 1, column: 1 });
+ self.ignore_whitespace.set(self.initial_ignore_whitespace);
+ self.comments.borrow_mut().clear();
+ self.stack_group.borrow_mut().clear();
+ self.stack_class.borrow_mut().clear();
+ }
+}
+
+impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
+ /// Build an internal parser from a parser configuration and a pattern.
+ fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
+ ParserI { parser: parser, pattern: pattern }
+ }
+
+ /// Return a reference to the parser state.
+ fn parser(&self) -> &Parser {
+ self.parser.borrow()
+ }
+
+ /// Return a reference to the pattern being parsed.
+ fn pattern(&self) -> &str {
+ self.pattern.borrow()
+ }
+
+ /// Create a new error with the given span and error type.
+ fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
+ ast::Error {
+ kind: kind,
+ pattern: self.pattern().to_string(),
+ span: span,
+ }
+ }
+
+ /// Return the current offset of the parser.
+ ///
+ /// The offset starts at `0` from the beginning of the regular expression
+ /// pattern string.
+ fn offset(&self) -> usize {
+ self.parser().pos.get().offset
+ }
+
+ /// Return the current line number of the parser.
+ ///
+ /// The line number starts at `1`.
+ fn line(&self) -> usize {
+ self.parser().pos.get().line
+ }
+
+ /// Return the current column of the parser.
+ ///
+ /// The column number starts at `1` and is reset whenever a `\n` is seen.
+ fn column(&self) -> usize {
+ self.parser().pos.get().column
+ }
+
+ /// Return the next capturing index. Each subsequent call increments the
+ /// internal index.
+ ///
+ /// The span given should correspond to the location of the opening
+ /// parenthesis.
+ ///
+ /// If the capture limit is exceeded, then an error is returned.
+ fn next_capture_index(&self, span: Span) -> Result<u32> {
+ let current = self.parser().capture_index.get();
+ let i = current.checked_add(1).ok_or_else(|| {
+ self.error(span, ast::ErrorKind::CaptureLimitExceeded)
+ })?;
+ self.parser().capture_index.set(i);
+ Ok(i)
+ }
+
+ /// Adds the given capture name to this parser. If this capture name has
+ /// already been used, then an error is returned.
+ fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
+ let mut names = self.parser().capture_names.borrow_mut();
+ match names
+ .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
+ {
+ Err(i) => {
+ names.insert(i, cap.clone());
+ Ok(())
+ }
+ Ok(i) => Err(self.error(
+ cap.span,
+ ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
+ )),
+ }
+ }
+
+ /// Return whether the parser should ignore whitespace or not.
+ fn ignore_whitespace(&self) -> bool {
+ self.parser().ignore_whitespace.get()
+ }
+
+ /// Return the character at the current position of the parser.
+ ///
+ /// This panics if the current position does not point to a valid char.
+ fn char(&self) -> char {
+ self.char_at(self.offset())
+ }
+
+ /// Return the character at the given position.
+ ///
+ /// This panics if the given position does not point to a valid char.
+ fn char_at(&self, i: usize) -> char {
+ self.pattern()[i..]
+ .chars()
+ .next()
+ .unwrap_or_else(|| panic!("expected char at offset {}", i))
+ }
+
+ /// Bump the parser to the next Unicode scalar value.
+ ///
+ /// If the end of the input has been reached, then `false` is returned.
+ fn bump(&self) -> bool {
+ if self.is_eof() {
+ return false;
+ }
+ let Position { mut offset, mut line, mut column } = self.pos();
+ if self.char() == '\n' {
+ line = line.checked_add(1).unwrap();
+ column = 1;
+ } else {
+ column = column.checked_add(1).unwrap();
+ }
+ offset += self.char().len_utf8();
+ self.parser().pos.set(Position {
+ offset: offset,
+ line: line,
+ column: column,
+ });
+ self.pattern()[self.offset()..].chars().next().is_some()
+ }
+
+ /// If the substring starting at the current position of the parser has
+ /// the given prefix, then bump the parser to the character immediately
+ /// following the prefix and return true. Otherwise, don't bump the parser
+ /// and return false.
+ fn bump_if(&self, prefix: &str) -> bool {
+ if self.pattern()[self.offset()..].starts_with(prefix) {
+ for _ in 0..prefix.chars().count() {
+ self.bump();
+ }
+ true
+ } else {
+ false
+ }
+ }
+
+ /// Returns true if and only if the parser is positioned at a look-around
+ /// prefix. The conditions under which this returns true must always
+ /// correspond to a regular expression that would otherwise be consider
+ /// invalid.
+ ///
+ /// This should only be called immediately after parsing the opening of
+ /// a group or a set of flags.
+ fn is_lookaround_prefix(&self) -> bool {
+ self.bump_if("?=")
+ || self.bump_if("?!")
+ || self.bump_if("?<=")
+ || self.bump_if("?<!")
+ }
+
+ /// Bump the parser, and if the `x` flag is enabled, bump through any
+ /// subsequent spaces. Return true if and only if the parser is not at
+ /// EOF.
+ fn bump_and_bump_space(&self) -> bool {
+ if !self.bump() {
+ return false;
+ }
+ self.bump_space();
+ !self.is_eof()
+ }
+
+ /// If the `x` flag is enabled (i.e., whitespace insensitivity with
+ /// comments), then this will advance the parser through all whitespace
+ /// and comments to the next non-whitespace non-comment byte.
+ ///
+ /// If the `x` flag is disabled, then this is a no-op.
+ ///
+ /// This should be used selectively throughout the parser where
+ /// arbitrary whitespace is permitted when the `x` flag is enabled. For
+ /// example, `{ 5 , 6}` is equivalent to `{5,6}`.
+ fn bump_space(&self) {
+ if !self.ignore_whitespace() {
+ return;
+ }
+ while !self.is_eof() {
+ if self.char().is_whitespace() {
+ self.bump();
+ } else if self.char() == '#' {
+ let start = self.pos();
+ let mut comment_text = String::new();
+ self.bump();
+ while !self.is_eof() {
+ let c = self.char();
+ self.bump();
+ if c == '\n' {
+ break;
+ }
+ comment_text.push(c);
+ }
+ let comment = ast::Comment {
+ span: Span::new(start, self.pos()),
+ comment: comment_text,
+ };
+ self.parser().comments.borrow_mut().push(comment);
+ } else {
+ break;
+ }
+ }
+ }
+
+ /// Peek at the next character in the input without advancing the parser.
+ ///
+ /// If the input has been exhausted, then this returns `None`.
+ fn peek(&self) -> Option<char> {
+ if self.is_eof() {
+ return None;
+ }
+ self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
+ }
+
+ /// Like peek, but will ignore spaces when the parser is in whitespace
+ /// insensitive mode.
+ fn peek_space(&self) -> Option<char> {
+ if !self.ignore_whitespace() {
+ return self.peek();
+ }
+ if self.is_eof() {
+ return None;
+ }
+ let mut start = self.offset() + self.char().len_utf8();
+ let mut in_comment = false;
+ for (i, c) in self.pattern()[start..].char_indices() {
+ if c.is_whitespace() {
+ continue;
+ } else if !in_comment && c == '#' {
+ in_comment = true;
+ } else if in_comment && c == '\n' {
+ in_comment = false;
+ } else {
+ start += i;
+ break;
+ }
+ }
+ self.pattern()[start..].chars().next()
+ }
+
+ /// Returns true if the next call to `bump` would return false.
+ fn is_eof(&self) -> bool {
+ self.offset() == self.pattern().len()
+ }
+
+ /// Return the current position of the parser, which includes the offset,
+ /// line and column.
+ fn pos(&self) -> Position {
+ self.parser().pos.get()
+ }
+
+ /// Create a span at the current position of the parser. Both the start
+ /// and end of the span are set.
+ fn span(&self) -> Span {
+ Span::splat(self.pos())
+ }
+
+ /// Create a span that covers the current character.
+ fn span_char(&self) -> Span {
+ let mut next = Position {
+ offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
+ line: self.line(),
+ column: self.column().checked_add(1).unwrap(),
+ };
+ if self.char() == '\n' {
+ next.line += 1;
+ next.column = 1;
+ }
+ Span::new(self.pos(), next)
+ }
+
+ /// Parse and push a single alternation on to the parser's internal stack.
+ /// If the top of the stack already has an alternation, then add to that
+ /// instead of pushing a new one.
+ ///
+ /// The concatenation given corresponds to a single alternation branch.
+ /// The concatenation returned starts the next branch and is empty.
+ ///
+ /// This assumes the parser is currently positioned at `|` and will advance
+ /// the parser to the character following `|`.
+ #[inline(never)]
+ fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
+ assert_eq!(self.char(), '|');
+ concat.span.end = self.pos();
+ self.push_or_add_alternation(concat);
+ self.bump();
+ Ok(ast::Concat { span: self.span(), asts: vec![] })
+ }
+
+ /// Pushes or adds the given branch of an alternation to the parser's
+ /// internal stack of state.
+ fn push_or_add_alternation(&self, concat: ast::Concat) {
+ use self::GroupState::*;
+
+ let mut stack = self.parser().stack_group.borrow_mut();
+ if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
+ alts.asts.push(concat.into_ast());
+ return;
+ }
+ stack.push(Alternation(ast::Alternation {
+ span: Span::new(concat.span.start, self.pos()),
+ asts: vec![concat.into_ast()],
+ }));
+ }
+
+ /// Parse and push a group AST (and its parent concatenation) on to the
+ /// parser's internal stack. Return a fresh concatenation corresponding
+ /// to the group's sub-AST.
+ ///
+ /// If a set of flags was found (with no group), then the concatenation
+ /// is returned with that set of flags added.
+ ///
+ /// This assumes that the parser is currently positioned on the opening
+ /// parenthesis. It advances the parser to the character at the start
+ /// of the sub-expression (or adjoining expression).
+ ///
+ /// If there was a problem parsing the start of the group, then an error
+ /// is returned.
+ #[inline(never)]
+ fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
+ assert_eq!(self.char(), '(');
+ match self.parse_group()? {
+ Either::Left(set) => {
+ let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
+ if let Some(v) = ignore {
+ self.parser().ignore_whitespace.set(v);
+ }
+
+ concat.asts.push(Ast::Flags(set));
+ Ok(concat)
+ }
+ Either::Right(group) => {
+ let old_ignore_whitespace = self.ignore_whitespace();
+ let new_ignore_whitespace = group
+ .flags()
+ .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
+ .unwrap_or(old_ignore_whitespace);
+ self.parser().stack_group.borrow_mut().push(
+ GroupState::Group {
+ concat: concat,
+ group: group,
+ ignore_whitespace: old_ignore_whitespace,
+ },
+ );
+ self.parser().ignore_whitespace.set(new_ignore_whitespace);
+ Ok(ast::Concat { span: self.span(), asts: vec![] })
+ }
+ }
+ }
+
+ /// Pop a group AST from the parser's internal stack and set the group's
+ /// AST to the given concatenation. Return the concatenation containing
+ /// the group.
+ ///
+ /// This assumes that the parser is currently positioned on the closing
+ /// parenthesis and advances the parser to the character following the `)`.
+ ///
+ /// If no such group could be popped, then an unopened group error is
+ /// returned.
+ #[inline(never)]
+ fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
+ use self::GroupState::*;
+
+ assert_eq!(self.char(), ')');
+ let mut stack = self.parser().stack_group.borrow_mut();
+ let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
+ .pop()
+ {
+ Some(Group { concat, group, ignore_whitespace }) => {
+ (concat, group, ignore_whitespace, None)
+ }
+ Some(Alternation(alt)) => match stack.pop() {
+ Some(Group { concat, group, ignore_whitespace }) => {
+ (concat, group, ignore_whitespace, Some(alt))
+ }
+ None | Some(Alternation(_)) => {
+ return Err(self.error(
+ self.span_char(),
+ ast::ErrorKind::GroupUnopened,
+ ));
+ }
+ },
+ None => {
+ return Err(self
+ .error(self.span_char(), ast::ErrorKind::GroupUnopened));
+ }
+ };
+ self.parser().ignore_whitespace.set(ignore_whitespace);
+ group_concat.span.end = self.pos();
+ self.bump();
+ group.span.end = self.pos();
+ match alt {
+ Some(mut alt) => {
+ alt.span.end = group_concat.span.end;
+ alt.asts.push(group_concat.into_ast());
+ group.ast = Box::new(alt.into_ast());
+ }
+ None => {
+ group.ast = Box::new(group_concat.into_ast());
+ }
+ }
+ prior_concat.asts.push(Ast::Group(group));
+ Ok(prior_concat)
+ }
+
+ /// Pop the last state from the parser's internal stack, if it exists, and
+ /// add the given concatenation to it. There either must be no state or a
+ /// single alternation item on the stack. Any other scenario produces an
+ /// error.
+ ///
+ /// This assumes that the parser has advanced to the end.
+ #[inline(never)]
+ fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
+ concat.span.end = self.pos();
+ let mut stack = self.parser().stack_group.borrow_mut();
+ let ast = match stack.pop() {
+ None => Ok(concat.into_ast()),
+ Some(GroupState::Alternation(mut alt)) => {
+ alt.span.end = self.pos();
+ alt.asts.push(concat.into_ast());
+ Ok(Ast::Alternation(alt))
+ }
+ Some(GroupState::Group { group, .. }) => {
+ return Err(
+ self.error(group.span, ast::ErrorKind::GroupUnclosed)
+ );
+ }
+ };
+ // If we try to pop again, there should be nothing.
+ match stack.pop() {
+ None => ast,
+ Some(GroupState::Alternation(_)) => {
+ // This unreachable is unfortunate. This case can't happen
+ // because the only way we can be here is if there were two
+ // `GroupState::Alternation`s adjacent in the parser's stack,
+ // which we guarantee to never happen because we never push a
+ // `GroupState::Alternation` if one is already at the top of
+ // the stack.
+ unreachable!()
+ }
+ Some(GroupState::Group { group, .. }) => {
+ Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
+ }
+ }
+ }
+
+ /// Parse the opening of a character class and push the current class
+ /// parsing context onto the parser's stack. This assumes that the parser
+ /// is positioned at an opening `[`. The given union should correspond to
+ /// the union of set items built up before seeing the `[`.
+ ///
+ /// If there was a problem parsing the opening of the class, then an error
+ /// is returned. Otherwise, a new union of set items for the class is
+ /// returned (which may be populated with either a `]` or a `-`).
+ #[inline(never)]
+ fn push_class_open(
+ &self,
+ parent_union: ast::ClassSetUnion,
+ ) -> Result<ast::ClassSetUnion> {
+ assert_eq!(self.char(), '[');
+
+ let (nested_set, nested_union) = self.parse_set_class_open()?;
+ self.parser()
+ .stack_class
+ .borrow_mut()
+ .push(ClassState::Open { union: parent_union, set: nested_set });
+ Ok(nested_union)
+ }
+
+ /// Parse the end of a character class set and pop the character class
+ /// parser stack. The union given corresponds to the last union built
+ /// before seeing the closing `]`. The union returned corresponds to the
+ /// parent character class set with the nested class added to it.
+ ///
+ /// This assumes that the parser is positioned at a `]` and will advance
+ /// the parser to the byte immediately following the `]`.
+ ///
+ /// If the stack is empty after popping, then this returns the final
+ /// "top-level" character class AST (where a "top-level" character class
+ /// is one that is not nested inside any other character class).
+ ///
+ /// If there is no corresponding opening bracket on the parser's stack,
+ /// then an error is returned.
+ #[inline(never)]
+ fn pop_class(
+ &self,
+ nested_union: ast::ClassSetUnion,
+ ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
+ assert_eq!(self.char(), ']');
+
+ let item = ast::ClassSet::Item(nested_union.into_item());
+ let prevset = self.pop_class_op(item);
+ let mut stack = self.parser().stack_class.borrow_mut();
+ match stack.pop() {
+ None => {
+ // We can never observe an empty stack:
+ //
+ // 1) We are guaranteed to start with a non-empty stack since
+ // the character class parser is only initiated when it sees
+ // a `[`.
+ // 2) If we ever observe an empty stack while popping after
+ // seeing a `]`, then we signal the character class parser
+ // to terminate.
+ panic!("unexpected empty character class stack")
+ }
+ Some(ClassState::Op { .. }) => {
+ // This panic is unfortunate, but this case is impossible
+ // since we already popped the Op state if one exists above.
+ // Namely, every push to the class parser stack is guarded by
+ // whether an existing Op is already on the top of the stack.
+ // If it is, the existing Op is modified. That is, the stack
+ // can never have consecutive Op states.
+ panic!("unexpected ClassState::Op")
+ }
+ Some(ClassState::Open { mut union, mut set }) => {
+ self.bump();
+ set.span.end = self.pos();
+ set.kind = prevset;
+ if stack.is_empty() {
+ Ok(Either::Right(ast::Class::Bracketed(set)))
+ } else {
+ union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
+ Ok(Either::Left(union))
+ }
+ }
+ }
+ }
+
+ /// Return an "unclosed class" error whose span points to the most
+ /// recently opened class.
+ ///
+ /// This should only be called while parsing a character class.
+ #[inline(never)]
+ fn unclosed_class_error(&self) -> ast::Error {
+ for state in self.parser().stack_class.borrow().iter().rev() {
+ match *state {
+ ClassState::Open { ref set, .. } => {
+ return self
+ .error(set.span, ast::ErrorKind::ClassUnclosed);
+ }
+ _ => {}
+ }
+ }
+ // We are guaranteed to have a non-empty stack with at least
+ // one open bracket, so we should never get here.
+ panic!("no open character class found")
+ }
+
+ /// Push the current set of class items on to the class parser's stack as
+ /// the left hand side of the given operator.
+ ///
+ /// A fresh set union is returned, which should be used to build the right
+ /// hand side of this operator.
+ #[inline(never)]
+ fn push_class_op(
+ &self,
+ next_kind: ast::ClassSetBinaryOpKind,
+ next_union: ast::ClassSetUnion,
+ ) -> ast::ClassSetUnion {
+ let item = ast::ClassSet::Item(next_union.into_item());
+ let new_lhs = self.pop_class_op(item);
+ self.parser()
+ .stack_class
+ .borrow_mut()
+ .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
+ ast::ClassSetUnion { span: self.span(), items: vec![] }
+ }
+
+ /// Pop a character class set from the character class parser stack. If the
+ /// top of the stack is just an item (not an operation), then return the
+ /// given set unchanged. If the top of the stack is an operation, then the
+ /// given set will be used as the rhs of the operation on the top of the
+ /// stack. In that case, the binary operation is returned as a set.
+ #[inline(never)]
+ fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
+ let mut stack = self.parser().stack_class.borrow_mut();
+ let (kind, lhs) = match stack.pop() {
+ Some(ClassState::Op { kind, lhs }) => (kind, lhs),
+ Some(state @ ClassState::Open { .. }) => {
+ stack.push(state);
+ return rhs;
+ }
+ None => unreachable!(),
+ };
+ let span = Span::new(lhs.span().start, rhs.span().end);
+ ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+ span: span,
+ kind: kind,
+ lhs: Box::new(lhs),
+ rhs: Box::new(rhs),
+ })
+ }
+}
+
+impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
+ /// Parse the regular expression into an abstract syntax tree.
+ fn parse(&self) -> Result<Ast> {
+ self.parse_with_comments().map(|astc| astc.ast)
+ }
+
+ /// Parse the regular expression and return an abstract syntax tree with
+ /// all of the comments found in the pattern.
+ fn parse_with_comments(&self) -> Result<ast::WithComments> {
+ assert_eq!(self.offset(), 0, "parser can only be used once");
+ self.parser().reset();
+ let mut concat = ast::Concat { span: self.span(), asts: vec![] };
+ loop {
+ self.bump_space();
+ if self.is_eof() {
+ break;
+ }
+ match self.char() {
+ '(' => concat = self.push_group(concat)?,
+ ')' => concat = self.pop_group(concat)?,
+ '|' => concat = self.push_alternate(concat)?,
+ '[' => {
+ let class = self.parse_set_class()?;
+ concat.asts.push(Ast::Class(class));
+ }
+ '?' => {
+ concat = self.parse_uncounted_repetition(
+ concat,
+ ast::RepetitionKind::ZeroOrOne,
+ )?;
+ }
+ '*' => {
+ concat = self.parse_uncounted_repetition(
+ concat,
+ ast::RepetitionKind::ZeroOrMore,
+ )?;
+ }
+ '+' => {
+ concat = self.parse_uncounted_repetition(
+ concat,
+ ast::RepetitionKind::OneOrMore,
+ )?;
+ }
+ '{' => {
+ concat = self.parse_counted_repetition(concat)?;
+ }
+ _ => concat.asts.push(self.parse_primitive()?.into_ast()),
+ }
+ }
+ let ast = self.pop_group_end(concat)?;
+ NestLimiter::new(self).check(&ast)?;
+ Ok(ast::WithComments {
+ ast: ast,
+ comments: mem::replace(
+ &mut *self.parser().comments.borrow_mut(),
+ vec![],
+ ),
+ })
+ }
+
+ /// Parses an uncounted repetition operation. An uncounted repetition
+ /// operator includes ?, * and +, but does not include the {m,n} syntax.
+ /// The given `kind` should correspond to the operator observed by the
+ /// caller.
+ ///
+ /// This assumes that the parser is currently positioned at the repetition
+ /// operator and advances the parser to the first character after the
+ /// operator. (Note that the operator may include a single additional `?`,
+ /// which makes the operator ungreedy.)
+ ///
+ /// The caller should include the concatenation that is being built. The
+ /// concatenation returned includes the repetition operator applied to the
+ /// last expression in the given concatenation.
+ #[inline(never)]
+ fn parse_uncounted_repetition(
+ &self,
+ mut concat: ast::Concat,
+ kind: ast::RepetitionKind,
+ ) -> Result<ast::Concat> {
+ assert!(
+ self.char() == '?' || self.char() == '*' || self.char() == '+'
+ );
+ let op_start = self.pos();
+ let ast = match concat.asts.pop() {
+ Some(ast) => ast,
+ None => {
+ return Err(
+ self.error(self.span(), ast::ErrorKind::RepetitionMissing)
+ )
+ }
+ };
+ match ast {
+ Ast::Empty(_) | Ast::Flags(_) => {
+ return Err(
+ self.error(self.span(), ast::ErrorKind::RepetitionMissing)
+ )
+ }
+ _ => {}
+ }
+ let mut greedy = true;
+ if self.bump() && self.char() == '?' {
+ greedy = false;
+ self.bump();
+ }
+ concat.asts.push(Ast::Repetition(ast::Repetition {
+ span: ast.span().with_end(self.pos()),
+ op: ast::RepetitionOp {
+ span: Span::new(op_start, self.pos()),
+ kind: kind,
+ },
+ greedy: greedy,
+ ast: Box::new(ast),
+ }));
+ Ok(concat)
+ }
+
+ /// Parses a counted repetition operation. A counted repetition operator
+ /// corresponds to the {m,n} syntax, and does not include the ?, * or +
+ /// operators.
+ ///
+ /// This assumes that the parser is currently positioned at the opening `{`
+ /// and advances the parser to the first character after the operator.
+ /// (Note that the operator may include a single additional `?`, which
+ /// makes the operator ungreedy.)
+ ///
+ /// The caller should include the concatenation that is being built. The
+ /// concatenation returned includes the repetition operator applied to the
+ /// last expression in the given concatenation.
+ #[inline(never)]
+ fn parse_counted_repetition(
+ &self,
+ mut concat: ast::Concat,
+ ) -> Result<ast::Concat> {
+ assert!(self.char() == '{');
+ let start = self.pos();
+ let ast = match concat.asts.pop() {
+ Some(ast) => ast,
+ None => {
+ return Err(
+ self.error(self.span(), ast::ErrorKind::RepetitionMissing)
+ )
+ }
+ };
+ match ast {
+ Ast::Empty(_) | Ast::Flags(_) => {
+ return Err(
+ self.error(self.span(), ast::ErrorKind::RepetitionMissing)
+ )
+ }
+ _ => {}
+ }
+ if !self.bump_and_bump_space() {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::RepetitionCountUnclosed,
+ ));
+ }
+ let count_start = specialize_err(
+ self.parse_decimal(),
+ ast::ErrorKind::DecimalEmpty,
+ ast::ErrorKind::RepetitionCountDecimalEmpty,
+ )?;
+ let mut range = ast::RepetitionRange::Exactly(count_start);
+ if self.is_eof() {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::RepetitionCountUnclosed,
+ ));
+ }
+ if self.char() == ',' {
+ if !self.bump_and_bump_space() {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::RepetitionCountUnclosed,
+ ));
+ }
+ if self.char() != '}' {
+ let count_end = specialize_err(
+ self.parse_decimal(),
+ ast::ErrorKind::DecimalEmpty,
+ ast::ErrorKind::RepetitionCountDecimalEmpty,
+ )?;
+ range = ast::RepetitionRange::Bounded(count_start, count_end);
+ } else {
+ range = ast::RepetitionRange::AtLeast(count_start);
+ }
+ }
+ if self.is_eof() || self.char() != '}' {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::RepetitionCountUnclosed,
+ ));
+ }
+
+ let mut greedy = true;
+ if self.bump_and_bump_space() && self.char() == '?' {
+ greedy = false;
+ self.bump();
+ }
+
+ let op_span = Span::new(start, self.pos());
+ if !range.is_valid() {
+ return Err(
+ self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
+ );
+ }
+ concat.asts.push(Ast::Repetition(ast::Repetition {
+ span: ast.span().with_end(self.pos()),
+ op: ast::RepetitionOp {
+ span: op_span,
+ kind: ast::RepetitionKind::Range(range),
+ },
+ greedy: greedy,
+ ast: Box::new(ast),
+ }));
+ Ok(concat)
+ }
+
+ /// Parse a group (which contains a sub-expression) or a set of flags.
+ ///
+ /// If a group was found, then it is returned with an empty AST. If a set
+ /// of flags is found, then that set is returned.
+ ///
+ /// The parser should be positioned at the opening parenthesis.
+ ///
+ /// This advances the parser to the character before the start of the
+ /// sub-expression (in the case of a group) or to the closing parenthesis
+ /// immediately following the set of flags.
+ ///
+ /// # Errors
+ ///
+ /// If flags are given and incorrectly specified, then a corresponding
+ /// error is returned.
+ ///
+ /// If a capture name is given and it is incorrectly specified, then a
+ /// corresponding error is returned.
+ #[inline(never)]
+ fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
+ assert_eq!(self.char(), '(');
+ let open_span = self.span_char();
+ self.bump();
+ self.bump_space();
+ if self.is_lookaround_prefix() {
+ return Err(self.error(
+ Span::new(open_span.start, self.span().end),
+ ast::ErrorKind::UnsupportedLookAround,
+ ));
+ }
+ let inner_span = self.span();
+ if self.bump_if("?P<") {
+ let capture_index = self.next_capture_index(open_span)?;
+ let cap = self.parse_capture_name(capture_index)?;
+ Ok(Either::Right(ast::Group {
+ span: open_span,
+ kind: ast::GroupKind::CaptureName(cap),
+ ast: Box::new(Ast::Empty(self.span())),
+ }))
+ } else if self.bump_if("?") {
+ if self.is_eof() {
+ return Err(
+ self.error(open_span, ast::ErrorKind::GroupUnclosed)
+ );
+ }
+ let flags = self.parse_flags()?;
+ let char_end = self.char();
+ self.bump();
+ if char_end == ')' {
+ // We don't allow empty flags, e.g., `(?)`. We instead
+ // interpret it as a repetition operator missing its argument.
+ if flags.items.is_empty() {
+ return Err(self.error(
+ inner_span,
+ ast::ErrorKind::RepetitionMissing,
+ ));
+ }
+ Ok(Either::Left(ast::SetFlags {
+ span: Span { end: self.pos(), ..open_span },
+ flags: flags,
+ }))
+ } else {
+ assert_eq!(char_end, ':');
+ Ok(Either::Right(ast::Group {
+ span: open_span,
+ kind: ast::GroupKind::NonCapturing(flags),
+ ast: Box::new(Ast::Empty(self.span())),
+ }))
+ }
+ } else {
+ let capture_index = self.next_capture_index(open_span)?;
+ Ok(Either::Right(ast::Group {
+ span: open_span,
+ kind: ast::GroupKind::CaptureIndex(capture_index),
+ ast: Box::new(Ast::Empty(self.span())),
+ }))
+ }
+ }
+
+ /// Parses a capture group name. Assumes that the parser is positioned at
+ /// the first character in the name following the opening `<` (and may
+ /// possibly be EOF). This advances the parser to the first character
+ /// following the closing `>`.
+ ///
+ /// The caller must provide the capture index of the group for this name.
+ #[inline(never)]
+ fn parse_capture_name(
+ &self,
+ capture_index: u32,
+ ) -> Result<ast::CaptureName> {
+ if self.is_eof() {
+ return Err(self
+ .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
+ }
+ let start = self.pos();
+ loop {
+ if self.char() == '>' {
+ break;
+ }
+ if !is_capture_char(self.char(), self.pos() == start) {
+ return Err(self.error(
+ self.span_char(),
+ ast::ErrorKind::GroupNameInvalid,
+ ));
+ }
+ if !self.bump() {
+ break;
+ }
+ }
+ let end = self.pos();
+ if self.is_eof() {
+ return Err(self
+ .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
+ }
+ assert_eq!(self.char(), '>');
+ self.bump();
+ let name = &self.pattern()[start.offset..end.offset];
+ if name.is_empty() {
+ return Err(self.error(
+ Span::new(start, start),
+ ast::ErrorKind::GroupNameEmpty,
+ ));
+ }
+ let capname = ast::CaptureName {
+ span: Span::new(start, end),
+ name: name.to_string(),
+ index: capture_index,
+ };
+ self.add_capture_name(&capname)?;
+ Ok(capname)
+ }
+
+ /// Parse a sequence of flags starting at the current character.
+ ///
+ /// This advances the parser to the character immediately following the
+ /// flags, which is guaranteed to be either `:` or `)`.
+ ///
+ /// # Errors
+ ///
+ /// If any flags are duplicated, then an error is returned.
+ ///
+ /// If the negation operator is used more than once, then an error is
+ /// returned.
+ ///
+ /// If no flags could be found or if the negation operation is not followed
+ /// by any flags, then an error is returned.
+ #[inline(never)]
+ fn parse_flags(&self) -> Result<ast::Flags> {
+ let mut flags = ast::Flags { span: self.span(), items: vec![] };
+ let mut last_was_negation = None;
+ while self.char() != ':' && self.char() != ')' {
+ if self.char() == '-' {
+ last_was_negation = Some(self.span_char());
+ let item = ast::FlagsItem {
+ span: self.span_char(),
+ kind: ast::FlagsItemKind::Negation,
+ };
+ if let Some(i) = flags.add_item(item) {
+ return Err(self.error(
+ self.span_char(),
+ ast::ErrorKind::FlagRepeatedNegation {
+ original: flags.items[i].span,
+ },
+ ));
+ }
+ } else {
+ last_was_negation = None;
+ let item = ast::FlagsItem {
+ span: self.span_char(),
+ kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
+ };
+ if let Some(i) = flags.add_item(item) {
+ return Err(self.error(
+ self.span_char(),
+ ast::ErrorKind::FlagDuplicate {
+ original: flags.items[i].span,
+ },
+ ));
+ }
+ }
+ if !self.bump() {
+ return Err(
+ self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
+ );
+ }
+ }
+ if let Some(span) = last_was_negation {
+ return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
+ }
+ flags.span.end = self.pos();
+ Ok(flags)
+ }
+
+ /// Parse the current character as a flag. Do not advance the parser.
+ ///
+ /// # Errors
+ ///
+ /// If the flag is not recognized, then an error is returned.
+ #[inline(never)]
+ fn parse_flag(&self) -> Result<ast::Flag> {
+ match self.char() {
+ 'i' => Ok(ast::Flag::CaseInsensitive),
+ 'm' => Ok(ast::Flag::MultiLine),
+ 's' => Ok(ast::Flag::DotMatchesNewLine),
+ 'U' => Ok(ast::Flag::SwapGreed),
+ 'u' => Ok(ast::Flag::Unicode),
+ 'x' => Ok(ast::Flag::IgnoreWhitespace),
+ _ => {
+ Err(self
+ .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
+ }
+ }
+ }
+
+ /// Parse a primitive AST. e.g., A literal, non-set character class or
+ /// assertion.
+ ///
+ /// This assumes that the parser expects a primitive at the current
+ /// location. i.e., All other non-primitive cases have been handled.
+ /// For example, if the parser's position is at `|`, then `|` will be
+ /// treated as a literal (e.g., inside a character class).
+ ///
+ /// This advances the parser to the first character immediately following
+ /// the primitive.
+ fn parse_primitive(&self) -> Result<Primitive> {
+ match self.char() {
+ '\\' => self.parse_escape(),
+ '.' => {
+ let ast = Primitive::Dot(self.span_char());
+ self.bump();
+ Ok(ast)
+ }
+ '^' => {
+ let ast = Primitive::Assertion(ast::Assertion {
+ span: self.span_char(),
+ kind: ast::AssertionKind::StartLine,
+ });
+ self.bump();
+ Ok(ast)
+ }
+ '$' => {
+ let ast = Primitive::Assertion(ast::Assertion {
+ span: self.span_char(),
+ kind: ast::AssertionKind::EndLine,
+ });
+ self.bump();
+ Ok(ast)
+ }
+ c => {
+ let ast = Primitive::Literal(ast::Literal {
+ span: self.span_char(),
+ kind: ast::LiteralKind::Verbatim,
+ c: c,
+ });
+ self.bump();
+ Ok(ast)
+ }
+ }
+ }
+
+ /// Parse an escape sequence as a primitive AST.
+ ///
+ /// This assumes the parser is positioned at the start of the escape
+ /// sequence, i.e., `\`. It advances the parser to the first position
+ /// immediately following the escape sequence.
+ #[inline(never)]
+ fn parse_escape(&self) -> Result<Primitive> {
+ assert_eq!(self.char(), '\\');
+ let start = self.pos();
+ if !self.bump() {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::EscapeUnexpectedEof,
+ ));
+ }
+ let c = self.char();
+ // Put some of the more complicated routines into helpers.
+ match c {
+ '0'..='7' => {
+ if !self.parser().octal {
+ return Err(self.error(
+ Span::new(start, self.span_char().end),
+ ast::ErrorKind::UnsupportedBackreference,
+ ));
+ }
+ let mut lit = self.parse_octal();
+ lit.span.start = start;
+ return Ok(Primitive::Literal(lit));
+ }
+ '8'..='9' if !self.parser().octal => {
+ return Err(self.error(
+ Span::new(start, self.span_char().end),
+ ast::ErrorKind::UnsupportedBackreference,
+ ));
+ }
+ 'x' | 'u' | 'U' => {
+ let mut lit = self.parse_hex()?;
+ lit.span.start = start;
+ return Ok(Primitive::Literal(lit));
+ }
+ 'p' | 'P' => {
+ let mut cls = self.parse_unicode_class()?;
+ cls.span.start = start;
+ return Ok(Primitive::Unicode(cls));
+ }
+ 'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
+ let mut cls = self.parse_perl_class();
+ cls.span.start = start;
+ return Ok(Primitive::Perl(cls));
+ }
+ _ => {}
+ }
+
+ // Handle all of the one letter sequences inline.
+ self.bump();
+ let span = Span::new(start, self.pos());
+ if is_meta_character(c) {
+ return Ok(Primitive::Literal(ast::Literal {
+ span: span,
+ kind: ast::LiteralKind::Punctuation,
+ c: c,
+ }));
+ }
+ let special = |kind, c| {
+ Ok(Primitive::Literal(ast::Literal {
+ span: span,
+ kind: ast::LiteralKind::Special(kind),
+ c: c,
+ }))
+ };
+ match c {
+ 'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
+ 'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
+ 't' => special(ast::SpecialLiteralKind::Tab, '\t'),
+ 'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
+ 'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
+ 'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
+ ' ' if self.ignore_whitespace() => {
+ special(ast::SpecialLiteralKind::Space, ' ')
+ }
+ 'A' => Ok(Primitive::Assertion(ast::Assertion {
+ span: span,
+ kind: ast::AssertionKind::StartText,
+ })),
+ 'z' => Ok(Primitive::Assertion(ast::Assertion {
+ span: span,
+ kind: ast::AssertionKind::EndText,
+ })),
+ 'b' => Ok(Primitive::Assertion(ast::Assertion {
+ span: span,
+ kind: ast::AssertionKind::WordBoundary,
+ })),
+ 'B' => Ok(Primitive::Assertion(ast::Assertion {
+ span: span,
+ kind: ast::AssertionKind::NotWordBoundary,
+ })),
+ _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
+ }
+ }
+
+ /// Parse an octal representation of a Unicode codepoint up to 3 digits
+ /// long. This expects the parser to be positioned at the first octal
+ /// digit and advances the parser to the first character immediately
+ /// following the octal number. This also assumes that parsing octal
+ /// escapes is enabled.
+ ///
+ /// Assuming the preconditions are met, this routine can never fail.
+ #[inline(never)]
+ fn parse_octal(&self) -> ast::Literal {
+ use std::char;
+ use std::u32;
+
+ assert!(self.parser().octal);
+ assert!('0' <= self.char() && self.char() <= '7');
+ let start = self.pos();
+ // Parse up to two more digits.
+ while self.bump()
+ && '0' <= self.char()
+ && self.char() <= '7'
+ && self.pos().offset - start.offset <= 2
+ {}
+ let end = self.pos();
+ let octal = &self.pattern()[start.offset..end.offset];
+ // Parsing the octal should never fail since the above guarantees a
+ // valid number.
+ let codepoint =
+ u32::from_str_radix(octal, 8).expect("valid octal number");
+ // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
+ // invalid Unicode scalar values.
+ let c = char::from_u32(codepoint).expect("Unicode scalar value");
+ ast::Literal {
+ span: Span::new(start, end),
+ kind: ast::LiteralKind::Octal,
+ c: c,
+ }
+ }
+
+ /// Parse a hex representation of a Unicode codepoint. This handles both
+ /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
+ /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
+ /// the first character immediately following the hexadecimal literal.
+ #[inline(never)]
+ fn parse_hex(&self) -> Result<ast::Literal> {
+ assert!(
+ self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
+ );
+
+ let hex_kind = match self.char() {
+ 'x' => ast::HexLiteralKind::X,
+ 'u' => ast::HexLiteralKind::UnicodeShort,
+ _ => ast::HexLiteralKind::UnicodeLong,
+ };
+ if !self.bump_and_bump_space() {
+ return Err(
+ self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
+ );
+ }
+ if self.char() == '{' {
+ self.parse_hex_brace(hex_kind)
+ } else {
+ self.parse_hex_digits(hex_kind)
+ }
+ }
+
+ /// Parse an N-digit hex representation of a Unicode codepoint. This
+ /// expects the parser to be positioned at the first digit and will advance
+ /// the parser to the first character immediately following the escape
+ /// sequence.
+ ///
+ /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
+ /// or 8 (for `\UNNNNNNNN`).
+ #[inline(never)]
+ fn parse_hex_digits(
+ &self,
+ kind: ast::HexLiteralKind,
+ ) -> Result<ast::Literal> {
+ use std::char;
+ use std::u32;
+
+ let mut scratch = self.parser().scratch.borrow_mut();
+ scratch.clear();
+
+ let start = self.pos();
+ for i in 0..kind.digits() {
+ if i > 0 && !self.bump_and_bump_space() {
+ return Err(self
+ .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
+ }
+ if !is_hex(self.char()) {
+ return Err(self.error(
+ self.span_char(),
+ ast::ErrorKind::EscapeHexInvalidDigit,
+ ));
+ }
+ scratch.push(self.char());
+ }
+ // The final bump just moves the parser past the literal, which may
+ // be EOF.
+ self.bump_and_bump_space();
+ let end = self.pos();
+ let hex = scratch.as_str();
+ match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
+ None => Err(self.error(
+ Span::new(start, end),
+ ast::ErrorKind::EscapeHexInvalid,
+ )),
+ Some(c) => Ok(ast::Literal {
+ span: Span::new(start, end),
+ kind: ast::LiteralKind::HexFixed(kind),
+ c: c,
+ }),
+ }
+ }
+
+ /// Parse a hex representation of any Unicode scalar value. This expects
+ /// the parser to be positioned at the opening brace `{` and will advance
+ /// the parser to the first character following the closing brace `}`.
+ #[inline(never)]
+ fn parse_hex_brace(
+ &self,
+ kind: ast::HexLiteralKind,
+ ) -> Result<ast::Literal> {
+ use std::char;
+ use std::u32;
+
+ let mut scratch = self.parser().scratch.borrow_mut();
+ scratch.clear();
+
+ let brace_pos = self.pos();
+ let start = self.span_char().end;
+ while self.bump_and_bump_space() && self.char() != '}' {
+ if !is_hex(self.char()) {
+ return Err(self.error(
+ self.span_char(),
+ ast::ErrorKind::EscapeHexInvalidDigit,
+ ));
+ }
+ scratch.push(self.char());
+ }
+ if self.is_eof() {
+ return Err(self.error(
+ Span::new(brace_pos, self.pos()),
+ ast::ErrorKind::EscapeUnexpectedEof,
+ ));
+ }
+ let end = self.pos();
+ let hex = scratch.as_str();
+ assert_eq!(self.char(), '}');
+ self.bump_and_bump_space();
+
+ if hex.is_empty() {
+ return Err(self.error(
+ Span::new(brace_pos, self.pos()),
+ ast::ErrorKind::EscapeHexEmpty,
+ ));
+ }
+ match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
+ None => Err(self.error(
+ Span::new(start, end),
+ ast::ErrorKind::EscapeHexInvalid,
+ )),
+ Some(c) => Ok(ast::Literal {
+ span: Span::new(start, self.pos()),
+ kind: ast::LiteralKind::HexBrace(kind),
+ c: c,
+ }),
+ }
+ }
+
+ /// Parse a decimal number into a u32 while trimming leading and trailing
+ /// whitespace.
+ ///
+ /// This expects the parser to be positioned at the first position where
+ /// a decimal digit could occur. This will advance the parser to the byte
+ /// immediately following the last contiguous decimal digit.
+ ///
+ /// If no decimal digit could be found or if there was a problem parsing
+ /// the complete set of digits into a u32, then an error is returned.
+ fn parse_decimal(&self) -> Result<u32> {
+ let mut scratch = self.parser().scratch.borrow_mut();
+ scratch.clear();
+
+ while !self.is_eof() && self.char().is_whitespace() {
+ self.bump();
+ }
+ let start = self.pos();
+ while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
+ scratch.push(self.char());
+ self.bump_and_bump_space();
+ }
+ let span = Span::new(start, self.pos());
+ while !self.is_eof() && self.char().is_whitespace() {
+ self.bump_and_bump_space();
+ }
+ let digits = scratch.as_str();
+ if digits.is_empty() {
+ return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
+ }
+ match u32::from_str_radix(digits, 10).ok() {
+ Some(n) => Ok(n),
+ None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
+ }
+ }
+
+ /// Parse a standard character class consisting primarily of characters or
+ /// character ranges, but can also contain nested character classes of
+ /// any type (sans `.`).
+ ///
+ /// This assumes the parser is positioned at the opening `[`. If parsing
+ /// is successful, then the parser is advanced to the position immediately
+ /// following the closing `]`.
+ #[inline(never)]
+ fn parse_set_class(&self) -> Result<ast::Class> {
+ assert_eq!(self.char(), '[');
+
+ let mut union =
+ ast::ClassSetUnion { span: self.span(), items: vec![] };
+ loop {
+ self.bump_space();
+ if self.is_eof() {
+ return Err(self.unclosed_class_error());
+ }
+ match self.char() {
+ '[' => {
+ // If we've already parsed the opening bracket, then
+ // attempt to treat this as the beginning of an ASCII
+ // class. If ASCII class parsing fails, then the parser
+ // backs up to `[`.
+ if !self.parser().stack_class.borrow().is_empty() {
+ if let Some(cls) = self.maybe_parse_ascii_class() {
+ union.push(ast::ClassSetItem::Ascii(cls));
+ continue;
+ }
+ }
+ union = self.push_class_open(union)?;
+ }
+ ']' => match self.pop_class(union)? {
+ Either::Left(nested_union) => {
+ union = nested_union;
+ }
+ Either::Right(class) => return Ok(class),
+ },
+ '&' if self.peek() == Some('&') => {
+ assert!(self.bump_if("&&"));
+ union = self.push_class_op(
+ ast::ClassSetBinaryOpKind::Intersection,
+ union,
+ );
+ }
+ '-' if self.peek() == Some('-') => {
+ assert!(self.bump_if("--"));
+ union = self.push_class_op(
+ ast::ClassSetBinaryOpKind::Difference,
+ union,
+ );
+ }
+ '~' if self.peek() == Some('~') => {
+ assert!(self.bump_if("~~"));
+ union = self.push_class_op(
+ ast::ClassSetBinaryOpKind::SymmetricDifference,
+ union,
+ );
+ }
+ _ => {
+ union.push(self.parse_set_class_range()?);
+ }
+ }
+ }
+ }
+
+ /// Parse a single primitive item in a character class set. The item to
+ /// be parsed can either be one of a simple literal character, a range
+ /// between two simple literal characters or a "primitive" character
+ /// class like \w or \p{Greek}.
+ ///
+ /// If an invalid escape is found, or if a character class is found where
+ /// a simple literal is expected (e.g., in a range), then an error is
+ /// returned.
+ #[inline(never)]
+ fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
+ let prim1 = self.parse_set_class_item()?;
+ self.bump_space();
+ if self.is_eof() {
+ return Err(self.unclosed_class_error());
+ }
+ // If the next char isn't a `-`, then we don't have a range.
+ // There are two exceptions. If the char after a `-` is a `]`, then
+ // `-` is interpreted as a literal `-`. Alternatively, if the char
+ // after a `-` is a `-`, then `--` corresponds to a "difference"
+ // operation.
+ if self.char() != '-'
+ || self.peek_space() == Some(']')
+ || self.peek_space() == Some('-')
+ {
+ return prim1.into_class_set_item(self);
+ }
+ // OK, now we're parsing a range, so bump past the `-` and parse the
+ // second half of the range.
+ if !self.bump_and_bump_space() {
+ return Err(self.unclosed_class_error());
+ }
+ let prim2 = self.parse_set_class_item()?;
+ let range = ast::ClassSetRange {
+ span: Span::new(prim1.span().start, prim2.span().end),
+ start: prim1.into_class_literal(self)?,
+ end: prim2.into_class_literal(self)?,
+ };
+ if !range.is_valid() {
+ return Err(
+ self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
+ );
+ }
+ Ok(ast::ClassSetItem::Range(range))
+ }
+
+ /// Parse a single item in a character class as a primitive, where the
+ /// primitive either consists of a verbatim literal or a single escape
+ /// sequence.
+ ///
+ /// This assumes the parser is positioned at the beginning of a primitive,
+ /// and advances the parser to the first position after the primitive if
+ /// successful.
+ ///
+ /// Note that it is the caller's responsibility to report an error if an
+ /// illegal primitive was parsed.
+ #[inline(never)]
+ fn parse_set_class_item(&self) -> Result<Primitive> {
+ if self.char() == '\\' {
+ self.parse_escape()
+ } else {
+ let x = Primitive::Literal(ast::Literal {
+ span: self.span_char(),
+ kind: ast::LiteralKind::Verbatim,
+ c: self.char(),
+ });
+ self.bump();
+ Ok(x)
+ }
+ }
+
+ /// Parses the opening of a character class set. This includes the opening
+ /// bracket along with `^` if present to indicate negation. This also
+ /// starts parsing the opening set of unioned items if applicable, since
+ /// there are special rules applied to certain characters in the opening
+ /// of a character class. For example, `[^]]` is the class of all
+ /// characters not equal to `]`. (`]` would need to be escaped in any other
+ /// position.) Similarly for `-`.
+ ///
+ /// In all cases, the op inside the returned `ast::ClassBracketed` is an
+ /// empty union. This empty union should be replaced with the actual item
+ /// when it is popped from the parser's stack.
+ ///
+ /// This assumes the parser is positioned at the opening `[` and advances
+ /// the parser to the first non-special byte of the character class.
+ ///
+ /// An error is returned if EOF is found.
+ #[inline(never)]
+ fn parse_set_class_open(
+ &self,
+ ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
+ assert_eq!(self.char(), '[');
+ let start = self.pos();
+ if !self.bump_and_bump_space() {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::ClassUnclosed,
+ ));
+ }
+
+ let negated = if self.char() != '^' {
+ false
+ } else {
+ if !self.bump_and_bump_space() {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::ClassUnclosed,
+ ));
+ }
+ true
+ };
+ // Accept any number of `-` as literal `-`.
+ let mut union =
+ ast::ClassSetUnion { span: self.span(), items: vec![] };
+ while self.char() == '-' {
+ union.push(ast::ClassSetItem::Literal(ast::Literal {
+ span: self.span_char(),
+ kind: ast::LiteralKind::Verbatim,
+ c: '-',
+ }));
+ if !self.bump_and_bump_space() {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::ClassUnclosed,
+ ));
+ }
+ }
+ // If `]` is the *first* char in a set, then interpret it as a literal
+ // `]`. That is, an empty class is impossible to write.
+ if union.items.is_empty() && self.char() == ']' {
+ union.push(ast::ClassSetItem::Literal(ast::Literal {
+ span: self.span_char(),
+ kind: ast::LiteralKind::Verbatim,
+ c: ']',
+ }));
+ if !self.bump_and_bump_space() {
+ return Err(self.error(
+ Span::new(start, self.pos()),
+ ast::ErrorKind::ClassUnclosed,
+ ));
+ }
+ }
+ let set = ast::ClassBracketed {
+ span: Span::new(start, self.pos()),
+ negated: negated,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: Span::new(union.span.start, union.span.start),
+ items: vec![],
+ }),
+ };
+ Ok((set, union))
+ }
+
+ /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
+ ///
+ /// This assumes the parser is positioned at the opening `[`.
+ ///
+ /// If no valid ASCII character class could be found, then this does not
+ /// advance the parser and `None` is returned. Otherwise, the parser is
+ /// advanced to the first byte following the closing `]` and the
+ /// corresponding ASCII class is returned.
+ #[inline(never)]
+ fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
+ // ASCII character classes are interesting from a parsing perspective
+ // because parsing cannot fail with any interesting error. For example,
+ // in order to use an ASCII character class, it must be enclosed in
+ // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
+ // of it as "ASCII character characters have the syntax `[:NAME:]`
+ // which can only appear within character brackets." This means that
+ // things like `[[:lower:]A]` are legal constructs.
+ //
+ // However, if one types an incorrect ASCII character class, e.g.,
+ // `[[:loower:]]`, then we treat that as a normal nested character
+ // class containing the characters `:elorw`. One might argue that we
+ // should return an error instead since the repeated colons give away
+ // the intent to write an ASCII class. But what if the user typed
+ // `[[:lower]]` instead? How can we tell that was intended to be an
+ // ASCII class and not just a normal nested class?
+ //
+ // Reasonable people can probably disagree over this, but for better
+ // or worse, we implement semantics that never fails at the expense
+ // of better failure modes.
+ assert_eq!(self.char(), '[');
+ // If parsing fails, then we back up the parser to this starting point.
+ let start = self.pos();
+ let mut negated = false;
+ if !self.bump() || self.char() != ':' {
+ self.parser().pos.set(start);
+ return None;
+ }
+ if !self.bump() {
+ self.parser().pos.set(start);
+ return None;
+ }
+ if self.char() == '^' {
+ negated = true;
+ if !self.bump() {
+ self.parser().pos.set(start);
+ return None;
+ }
+ }
+ let name_start = self.offset();
+ while self.char() != ':' && self.bump() {}
+ if self.is_eof() {
+ self.parser().pos.set(start);
+ return None;
+ }
+ let name = &self.pattern()[name_start..self.offset()];
+ if !self.bump_if(":]") {
+ self.parser().pos.set(start);
+ return None;
+ }
+ let kind = match ast::ClassAsciiKind::from_name(name) {
+ Some(kind) => kind,
+ None => {
+ self.parser().pos.set(start);
+ return None;
+ }
+ };
+ Some(ast::ClassAscii {
+ span: Span::new(start, self.pos()),
+ kind: kind,
+ negated: negated,
+ })
+ }
+
+ /// Parse a Unicode class in either the single character notation, `\pN`
+ /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
+ /// the parser is positioned at the `p` (or `P` for negation) and will
+ /// advance the parser to the character immediately following the class.
+ ///
+ /// Note that this does not check whether the class name is valid or not.
+ #[inline(never)]
+ fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
+ assert!(self.char() == 'p' || self.char() == 'P');
+
+ let mut scratch = self.parser().scratch.borrow_mut();
+ scratch.clear();
+
+ let negated = self.char() == 'P';
+ if !self.bump_and_bump_space() {
+ return Err(
+ self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
+ );
+ }
+ let (start, kind) = if self.char() == '{' {
+ let start = self.span_char().end;
+ while self.bump_and_bump_space() && self.char() != '}' {
+ scratch.push(self.char());
+ }
+ if self.is_eof() {
+ return Err(self
+ .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
+ }
+ assert_eq!(self.char(), '}');
+ self.bump();
+
+ let name = scratch.as_str();
+ if let Some(i) = name.find("!=") {
+ (
+ start,
+ ast::ClassUnicodeKind::NamedValue {
+ op: ast::ClassUnicodeOpKind::NotEqual,
+ name: name[..i].to_string(),
+ value: name[i + 2..].to_string(),
+ },
+ )
+ } else if let Some(i) = name.find(':') {
+ (
+ start,
+ ast::ClassUnicodeKind::NamedValue {
+ op: ast::ClassUnicodeOpKind::Colon,
+ name: name[..i].to_string(),
+ value: name[i + 1..].to_string(),
+ },
+ )
+ } else if let Some(i) = name.find('=') {
+ (
+ start,
+ ast::ClassUnicodeKind::NamedValue {
+ op: ast::ClassUnicodeOpKind::Equal,
+ name: name[..i].to_string(),
+ value: name[i + 1..].to_string(),
+ },
+ )
+ } else {
+ (start, ast::ClassUnicodeKind::Named(name.to_string()))
+ }
+ } else {
+ let start = self.pos();
+ let c = self.char();
+ if c == '\\' {
+ return Err(self.error(
+ self.span_char(),
+ ast::ErrorKind::UnicodeClassInvalid,
+ ));
+ }
+ self.bump_and_bump_space();
+ let kind = ast::ClassUnicodeKind::OneLetter(c);
+ (start, kind)
+ };
+ Ok(ast::ClassUnicode {
+ span: Span::new(start, self.pos()),
+ negated: negated,
+ kind: kind,
+ })
+ }
+
+ /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
+ /// parser is currently at a valid character class name and will be
+ /// advanced to the character immediately following the class.
+ #[inline(never)]
+ fn parse_perl_class(&self) -> ast::ClassPerl {
+ let c = self.char();
+ let span = self.span_char();
+ self.bump();
+ let (negated, kind) = match c {
+ 'd' => (false, ast::ClassPerlKind::Digit),
+ 'D' => (true, ast::ClassPerlKind::Digit),
+ 's' => (false, ast::ClassPerlKind::Space),
+ 'S' => (true, ast::ClassPerlKind::Space),
+ 'w' => (false, ast::ClassPerlKind::Word),
+ 'W' => (true, ast::ClassPerlKind::Word),
+ c => panic!("expected valid Perl class but got '{}'", c),
+ };
+ ast::ClassPerl { span: span, kind: kind, negated: negated }
+ }
+}
+
+/// A type that traverses a fully parsed Ast and checks whether its depth
+/// exceeds the specified nesting limit. If it does, then an error is returned.
+#[derive(Debug)]
+struct NestLimiter<'p, 's, P> {
+ /// The parser that is checking the nest limit.
+ p: &'p ParserI<'s, P>,
+ /// The current depth while walking an Ast.
+ depth: u32,
+}
+
+impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
+ fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
+ NestLimiter { p: p, depth: 0 }
+ }
+
+ #[inline(never)]
+ fn check(self, ast: &Ast) -> Result<()> {
+ ast::visit(ast, self)
+ }
+
+ fn increment_depth(&mut self, span: &Span) -> Result<()> {
+ let new = self.depth.checked_add(1).ok_or_else(|| {
+ self.p.error(
+ span.clone(),
+ ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
+ )
+ })?;
+ let limit = self.p.parser().nest_limit;
+ if new > limit {
+ return Err(self.p.error(
+ span.clone(),
+ ast::ErrorKind::NestLimitExceeded(limit),
+ ));
+ }
+ self.depth = new;
+ Ok(())
+ }
+
+ fn decrement_depth(&mut self) {
+ // Assuming the correctness of the visitor, this should never drop
+ // below 0.
+ self.depth = self.depth.checked_sub(1).unwrap();
+ }
+}
+
+impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
+ type Output = ();
+ type Err = ast::Error;
+
+ fn finish(self) -> Result<()> {
+ Ok(())
+ }
+
+ fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+ let span = match *ast {
+ Ast::Empty(_)
+ | Ast::Flags(_)
+ | Ast::Literal(_)
+ | Ast::Dot(_)
+ | Ast::Assertion(_)
+ | Ast::Class(ast::Class::Unicode(_))
+ | Ast::Class(ast::Class::Perl(_)) => {
+ // These are all base cases, so we don't increment depth.
+ return Ok(());
+ }
+ Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
+ Ast::Repetition(ref x) => &x.span,
+ Ast::Group(ref x) => &x.span,
+ Ast::Alternation(ref x) => &x.span,
+ Ast::Concat(ref x) => &x.span,
+ };
+ self.increment_depth(span)
+ }
+
+ fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+ match *ast {
+ Ast::Empty(_)
+ | Ast::Flags(_)
+ | Ast::Literal(_)
+ | Ast::Dot(_)
+ | Ast::Assertion(_)
+ | Ast::Class(ast::Class::Unicode(_))
+ | Ast::Class(ast::Class::Perl(_)) => {
+ // These are all base cases, so we don't decrement depth.
+ Ok(())
+ }
+ Ast::Class(ast::Class::Bracketed(_))
+ | Ast::Repetition(_)
+ | Ast::Group(_)
+ | Ast::Alternation(_)
+ | Ast::Concat(_) => {
+ self.decrement_depth();
+ Ok(())
+ }
+ }
+ }
+
+ fn visit_class_set_item_pre(
+ &mut self,
+ ast: &ast::ClassSetItem,
+ ) -> Result<()> {
+ let span = match *ast {
+ ast::ClassSetItem::Empty(_)
+ | ast::ClassSetItem::Literal(_)
+ | ast::ClassSetItem::Range(_)
+ | ast::ClassSetItem::Ascii(_)
+ | ast::ClassSetItem::Unicode(_)
+ | ast::ClassSetItem::Perl(_) => {
+ // These are all base cases, so we don't increment depth.
+ return Ok(());
+ }
+ ast::ClassSetItem::Bracketed(ref x) => &x.span,
+ ast::ClassSetItem::Union(ref x) => &x.span,
+ };
+ self.increment_depth(span)
+ }
+
+ fn visit_class_set_item_post(
+ &mut self,
+ ast: &ast::ClassSetItem,
+ ) -> Result<()> {
+ match *ast {
+ ast::ClassSetItem::Empty(_)
+ | ast::ClassSetItem::Literal(_)
+ | ast::ClassSetItem::Range(_)
+ | ast::ClassSetItem::Ascii(_)
+ | ast::ClassSetItem::Unicode(_)
+ | ast::ClassSetItem::Perl(_) => {
+ // These are all base cases, so we don't decrement depth.
+ Ok(())
+ }
+ ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
+ self.decrement_depth();
+ Ok(())
+ }
+ }
+ }
+
+ fn visit_class_set_binary_op_pre(
+ &mut self,
+ ast: &ast::ClassSetBinaryOp,
+ ) -> Result<()> {
+ self.increment_depth(&ast.span)
+ }
+
+ fn visit_class_set_binary_op_post(
+ &mut self,
+ _ast: &ast::ClassSetBinaryOp,
+ ) -> Result<()> {
+ self.decrement_depth();
+ Ok(())
+ }
+}
+
+/// When the result is an error, transforms the ast::ErrorKind from the source
+/// Result into another one. This function is used to return clearer error
+/// messages when possible.
+fn specialize_err<T>(
+ result: Result<T>,
+ from: ast::ErrorKind,
+ to: ast::ErrorKind,
+) -> Result<T> {
+ if let Err(e) = result {
+ if e.kind == from {
+ Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
+ } else {
+ Err(e)
+ }
+ } else {
+ result
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::ops::Range;
+
+ use super::{Parser, ParserBuilder, ParserI, Primitive};
+ use crate::ast::{self, Ast, Position, Span};
+
+ // Our own assert_eq, which has slightly better formatting (but honestly
+ // still kind of crappy).
+ macro_rules! assert_eq {
+ ($left:expr, $right:expr) => {{
+ match (&$left, &$right) {
+ (left_val, right_val) => {
+ if !(*left_val == *right_val) {
+ panic!(
+ "assertion failed: `(left == right)`\n\n\
+ left: `{:?}`\nright: `{:?}`\n\n",
+ left_val, right_val
+ )
+ }
+ }
+ }
+ }};
+ }
+
+ // We create these errors to compare with real ast::Errors in the tests.
+ // We define equality between TestError and ast::Error to disregard the
+ // pattern string in ast::Error, which is annoying to provide in tests.
+ #[derive(Clone, Debug)]
+ struct TestError {
+ span: Span,
+ kind: ast::ErrorKind,
+ }
+
+ impl PartialEq<ast::Error> for TestError {
+ fn eq(&self, other: &ast::Error) -> bool {
+ self.span == other.span && self.kind == other.kind
+ }
+ }
+
+ impl PartialEq<TestError> for ast::Error {
+ fn eq(&self, other: &TestError) -> bool {
+ self.span == other.span && self.kind == other.kind
+ }
+ }
+
+ fn s(str: &str) -> String {
+ str.to_string()
+ }
+
+ fn parser(pattern: &str) -> ParserI<'_, Parser> {
+ ParserI::new(Parser::new(), pattern)
+ }
+
+ fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
+ let parser = ParserBuilder::new().octal(true).build();
+ ParserI::new(parser, pattern)
+ }
+
+ fn parser_nest_limit(
+ pattern: &str,
+ nest_limit: u32,
+ ) -> ParserI<'_, Parser> {
+ let p = ParserBuilder::new().nest_limit(nest_limit).build();
+ ParserI::new(p, pattern)
+ }
+
+ fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
+ let p = ParserBuilder::new().ignore_whitespace(true).build();
+ ParserI::new(p, pattern)
+ }
+
+ /// Short alias for creating a new span.
+ fn nspan(start: Position, end: Position) -> Span {
+ Span::new(start, end)
+ }
+
+ /// Short alias for creating a new position.
+ fn npos(offset: usize, line: usize, column: usize) -> Position {
+ Position::new(offset, line, column)
+ }
+
+ /// Create a new span from the given offset range. This assumes a single
+ /// line and sets the columns based on the offsets. i.e., This only works
+ /// out of the box for ASCII, which is fine for most tests.
+ fn span(range: Range<usize>) -> Span {
+ let start = Position::new(range.start, 1, range.start + 1);
+ let end = Position::new(range.end, 1, range.end + 1);
+ Span::new(start, end)
+ }
+
+ /// Create a new span for the corresponding byte range in the given string.
+ fn span_range(subject: &str, range: Range<usize>) -> Span {
+ let start = Position {
+ offset: range.start,
+ line: 1 + subject[..range.start].matches('\n').count(),
+ column: 1 + subject[..range.start]
+ .chars()
+ .rev()
+ .position(|c| c == '\n')
+ .unwrap_or(subject[..range.start].chars().count()),
+ };
+ let end = Position {
+ offset: range.end,
+ line: 1 + subject[..range.end].matches('\n').count(),
+ column: 1 + subject[..range.end]
+ .chars()
+ .rev()
+ .position(|c| c == '\n')
+ .unwrap_or(subject[..range.end].chars().count()),
+ };
+ Span::new(start, end)
+ }
+
+ /// Create a verbatim literal starting at the given position.
+ fn lit(c: char, start: usize) -> Ast {
+ lit_with(c, span(start..start + c.len_utf8()))
+ }
+
+ /// Create a punctuation literal starting at the given position.
+ fn punct_lit(c: char, span: Span) -> Ast {
+ Ast::Literal(ast::Literal {
+ span: span,
+ kind: ast::LiteralKind::Punctuation,
+ c: c,
+ })
+ }
+
+ /// Create a verbatim literal with the given span.
+ fn lit_with(c: char, span: Span) -> Ast {
+ Ast::Literal(ast::Literal {
+ span: span,
+ kind: ast::LiteralKind::Verbatim,
+ c: c,
+ })
+ }
+
+ /// Create a concatenation with the given range.
+ fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
+ concat_with(span(range), asts)
+ }
+
+ /// Create a concatenation with the given span.
+ fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
+ Ast::Concat(ast::Concat { span: span, asts: asts })
+ }
+
+ /// Create an alternation with the given span.
+ fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
+ Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
+ }
+
+ /// Create a capturing group with the given span.
+ fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
+ Ast::Group(ast::Group {
+ span: span(range),
+ kind: ast::GroupKind::CaptureIndex(index),
+ ast: Box::new(ast),
+ })
+ }
+
+ /// Create an ast::SetFlags.
+ ///
+ /// The given pattern should be the full pattern string. The range given
+ /// should correspond to the byte offsets where the flag set occurs.
+ ///
+ /// If negated is true, then the set is interpreted as beginning with a
+ /// negation.
+ fn flag_set(
+ pat: &str,
+ range: Range<usize>,
+ flag: ast::Flag,
+ negated: bool,
+ ) -> Ast {
+ let mut items = vec![ast::FlagsItem {
+ span: span_range(pat, (range.end - 2)..(range.end - 1)),
+ kind: ast::FlagsItemKind::Flag(flag),
+ }];
+ if negated {
+ items.insert(
+ 0,
+ ast::FlagsItem {
+ span: span_range(pat, (range.start + 2)..(range.end - 2)),
+ kind: ast::FlagsItemKind::Negation,
+ },
+ );
+ }
+ Ast::Flags(ast::SetFlags {
+ span: span_range(pat, range.clone()),
+ flags: ast::Flags {
+ span: span_range(pat, (range.start + 2)..(range.end - 1)),
+ items: items,
+ },
+ })
+ }
+
+ #[test]
+ fn parse_nest_limit() {
+ // A nest limit of 0 still allows some types of regexes.
+ assert_eq!(
+ parser_nest_limit("", 0).parse(),
+ Ok(Ast::Empty(span(0..0)))
+ );
+ assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
+
+ // Test repetition operations, which require one level of nesting.
+ assert_eq!(
+ parser_nest_limit("a+", 0).parse().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::NestLimitExceeded(0),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("a+", 1).parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..2),
+ op: ast::RepetitionOp {
+ span: span(1..2),
+ kind: ast::RepetitionKind::OneOrMore,
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser_nest_limit("(a)+", 1).parse().unwrap_err(),
+ TestError {
+ span: span(0..3),
+ kind: ast::ErrorKind::NestLimitExceeded(1),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("a+*", 1).parse().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::NestLimitExceeded(1),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("a+*", 2).parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..3),
+ op: ast::RepetitionOp {
+ span: span(2..3),
+ kind: ast::RepetitionKind::ZeroOrMore,
+ },
+ greedy: true,
+ ast: Box::new(Ast::Repetition(ast::Repetition {
+ span: span(0..2),
+ op: ast::RepetitionOp {
+ span: span(1..2),
+ kind: ast::RepetitionKind::OneOrMore,
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ })),
+ }))
+ );
+
+ // Test concatenations. A concatenation requires one level of nesting.
+ assert_eq!(
+ parser_nest_limit("ab", 0).parse().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::NestLimitExceeded(0),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("ab", 1).parse(),
+ Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
+ );
+ assert_eq!(
+ parser_nest_limit("abc", 1).parse(),
+ Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
+ );
+
+ // Test alternations. An alternation requires one level of nesting.
+ assert_eq!(
+ parser_nest_limit("a|b", 0).parse().unwrap_err(),
+ TestError {
+ span: span(0..3),
+ kind: ast::ErrorKind::NestLimitExceeded(0),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("a|b", 1).parse(),
+ Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
+ );
+ assert_eq!(
+ parser_nest_limit("a|b|c", 1).parse(),
+ Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
+ );
+
+ // Test character classes. Classes form their own mini-recursive
+ // syntax!
+ assert_eq!(
+ parser_nest_limit("[a]", 0).parse().unwrap_err(),
+ TestError {
+ span: span(0..3),
+ kind: ast::ErrorKind::NestLimitExceeded(0),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("[a]", 1).parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..3),
+ negated: false,
+ kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
+ ast::Literal {
+ span: span(1..2),
+ kind: ast::LiteralKind::Verbatim,
+ c: 'a',
+ }
+ )),
+ })))
+ );
+ assert_eq!(
+ parser_nest_limit("[ab]", 1).parse().unwrap_err(),
+ TestError {
+ span: span(1..3),
+ kind: ast::ErrorKind::NestLimitExceeded(1),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
+ TestError {
+ span: span(3..7),
+ kind: ast::ErrorKind::NestLimitExceeded(2),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
+ TestError {
+ span: span(4..6),
+ kind: ast::ErrorKind::NestLimitExceeded(3),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
+ TestError {
+ span: span(1..5),
+ kind: ast::ErrorKind::NestLimitExceeded(1),
+ }
+ );
+ assert_eq!(
+ parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
+ TestError {
+ span: span(4..6),
+ kind: ast::ErrorKind::NestLimitExceeded(2),
+ }
+ );
+ }
+
+ #[test]
+ fn parse_comments() {
+ let pat = "(?x)
+# This is comment 1.
+foo # This is comment 2.
+ # This is comment 3.
+bar
+# This is comment 4.";
+ let astc = parser(pat).parse_with_comments().unwrap();
+ assert_eq!(
+ astc.ast,
+ concat_with(
+ span_range(pat, 0..pat.len()),
+ vec![
+ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+ lit_with('f', span_range(pat, 26..27)),
+ lit_with('o', span_range(pat, 27..28)),
+ lit_with('o', span_range(pat, 28..29)),
+ lit_with('b', span_range(pat, 74..75)),
+ lit_with('a', span_range(pat, 75..76)),
+ lit_with('r', span_range(pat, 76..77)),
+ ]
+ )
+ );
+ assert_eq!(
+ astc.comments,
+ vec![
+ ast::Comment {
+ span: span_range(pat, 5..26),
+ comment: s(" This is comment 1."),
+ },
+ ast::Comment {
+ span: span_range(pat, 30..51),
+ comment: s(" This is comment 2."),
+ },
+ ast::Comment {
+ span: span_range(pat, 53..74),
+ comment: s(" This is comment 3."),
+ },
+ ast::Comment {
+ span: span_range(pat, 78..98),
+ comment: s(" This is comment 4."),
+ },
+ ]
+ );
+ }
+
+ #[test]
+ fn parse_holistic() {
+ assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
+ assert_eq!(
+ parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
+ Ok(concat(
+ 0..36,
+ vec![
+ punct_lit('\\', span(0..2)),
+ punct_lit('.', span(2..4)),
+ punct_lit('+', span(4..6)),
+ punct_lit('*', span(6..8)),
+ punct_lit('?', span(8..10)),
+ punct_lit('(', span(10..12)),
+ punct_lit(')', span(12..14)),
+ punct_lit('|', span(14..16)),
+ punct_lit('[', span(16..18)),
+ punct_lit(']', span(18..20)),
+ punct_lit('{', span(20..22)),
+ punct_lit('}', span(22..24)),
+ punct_lit('^', span(24..26)),
+ punct_lit('$', span(26..28)),
+ punct_lit('#', span(28..30)),
+ punct_lit('&', span(30..32)),
+ punct_lit('-', span(32..34)),
+ punct_lit('~', span(34..36)),
+ ]
+ ))
+ );
+ }
+
+ #[test]
+ fn parse_ignore_whitespace() {
+ // Test that basic whitespace insensitivity works.
+ let pat = "(?x)a b";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ nspan(npos(0, 1, 1), npos(7, 1, 8)),
+ vec![
+ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+ lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+ lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
+ ]
+ ))
+ );
+
+ // Test that we can toggle whitespace insensitivity.
+ let pat = "(?x)a b(?-x)a b";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ nspan(npos(0, 1, 1), npos(15, 1, 16)),
+ vec![
+ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+ lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+ lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
+ flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
+ lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
+ lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
+ lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
+ ]
+ ))
+ );
+
+ // Test that nesting whitespace insensitive flags works.
+ let pat = "a (?x:a )a ";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ span_range(pat, 0..11),
+ vec![
+ lit_with('a', span_range(pat, 0..1)),
+ lit_with(' ', span_range(pat, 1..2)),
+ Ast::Group(ast::Group {
+ span: span_range(pat, 2..9),
+ kind: ast::GroupKind::NonCapturing(ast::Flags {
+ span: span_range(pat, 4..5),
+ items: vec![ast::FlagsItem {
+ span: span_range(pat, 4..5),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::IgnoreWhitespace
+ ),
+ },],
+ }),
+ ast: Box::new(lit_with('a', span_range(pat, 6..7))),
+ }),
+ lit_with('a', span_range(pat, 9..10)),
+ lit_with(' ', span_range(pat, 10..11)),
+ ]
+ ))
+ );
+
+ // Test that whitespace after an opening paren is insignificant.
+ let pat = "(?x)( ?P<foo> a )";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ span_range(pat, 0..pat.len()),
+ vec![
+ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+ Ast::Group(ast::Group {
+ span: span_range(pat, 4..pat.len()),
+ kind: ast::GroupKind::CaptureName(ast::CaptureName {
+ span: span_range(pat, 9..12),
+ name: s("foo"),
+ index: 1,
+ }),
+ ast: Box::new(lit_with('a', span_range(pat, 14..15))),
+ }),
+ ]
+ ))
+ );
+ let pat = "(?x)( a )";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ span_range(pat, 0..pat.len()),
+ vec![
+ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+ Ast::Group(ast::Group {
+ span: span_range(pat, 4..pat.len()),
+ kind: ast::GroupKind::CaptureIndex(1),
+ ast: Box::new(lit_with('a', span_range(pat, 7..8))),
+ }),
+ ]
+ ))
+ );
+ let pat = "(?x)( ?: a )";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ span_range(pat, 0..pat.len()),
+ vec![
+ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+ Ast::Group(ast::Group {
+ span: span_range(pat, 4..pat.len()),
+ kind: ast::GroupKind::NonCapturing(ast::Flags {
+ span: span_range(pat, 8..8),
+ items: vec![],
+ }),
+ ast: Box::new(lit_with('a', span_range(pat, 11..12))),
+ }),
+ ]
+ ))
+ );
+ let pat = r"(?x)\x { 53 }";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ span_range(pat, 0..pat.len()),
+ vec![
+ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+ Ast::Literal(ast::Literal {
+ span: span(4..13),
+ kind: ast::LiteralKind::HexBrace(
+ ast::HexLiteralKind::X
+ ),
+ c: 'S',
+ }),
+ ]
+ ))
+ );
+
+ // Test that whitespace after an escape is OK.
+ let pat = r"(?x)\ ";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ span_range(pat, 0..pat.len()),
+ vec![
+ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+ Ast::Literal(ast::Literal {
+ span: span_range(pat, 4..6),
+ kind: ast::LiteralKind::Special(
+ ast::SpecialLiteralKind::Space
+ ),
+ c: ' ',
+ }),
+ ]
+ ))
+ );
+ // ... but only when `x` mode is enabled.
+ let pat = r"\ ";
+ assert_eq!(
+ parser(pat).parse().unwrap_err(),
+ TestError {
+ span: span_range(pat, 0..2),
+ kind: ast::ErrorKind::EscapeUnrecognized,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_newlines() {
+ let pat = ".\n.";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ span_range(pat, 0..3),
+ vec![
+ Ast::Dot(span_range(pat, 0..1)),
+ lit_with('\n', span_range(pat, 1..2)),
+ Ast::Dot(span_range(pat, 2..3)),
+ ]
+ ))
+ );
+
+ let pat = "foobar\nbaz\nquux\n";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(concat_with(
+ span_range(pat, 0..pat.len()),
+ vec![
+ lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
+ lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
+ lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
+ lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
+ lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+ lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
+ lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
+ lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
+ lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
+ lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
+ lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
+ lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
+ lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
+ lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
+ lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
+ lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
+ ]
+ ))
+ );
+ }
+
+ #[test]
+ fn parse_uncounted_repetition() {
+ assert_eq!(
+ parser(r"a*").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..2),
+ op: ast::RepetitionOp {
+ span: span(1..2),
+ kind: ast::RepetitionKind::ZeroOrMore,
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser(r"a+").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..2),
+ op: ast::RepetitionOp {
+ span: span(1..2),
+ kind: ast::RepetitionKind::OneOrMore,
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+
+ assert_eq!(
+ parser(r"a?").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..2),
+ op: ast::RepetitionOp {
+ span: span(1..2),
+ kind: ast::RepetitionKind::ZeroOrOne,
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser(r"a??").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..3),
+ op: ast::RepetitionOp {
+ span: span(1..3),
+ kind: ast::RepetitionKind::ZeroOrOne,
+ },
+ greedy: false,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser(r"a?").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..2),
+ op: ast::RepetitionOp {
+ span: span(1..2),
+ kind: ast::RepetitionKind::ZeroOrOne,
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser(r"a?b").parse(),
+ Ok(concat(
+ 0..3,
+ vec![
+ Ast::Repetition(ast::Repetition {
+ span: span(0..2),
+ op: ast::RepetitionOp {
+ span: span(1..2),
+ kind: ast::RepetitionKind::ZeroOrOne,
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }),
+ lit('b', 2),
+ ]
+ ))
+ );
+ assert_eq!(
+ parser(r"a??b").parse(),
+ Ok(concat(
+ 0..4,
+ vec![
+ Ast::Repetition(ast::Repetition {
+ span: span(0..3),
+ op: ast::RepetitionOp {
+ span: span(1..3),
+ kind: ast::RepetitionKind::ZeroOrOne,
+ },
+ greedy: false,
+ ast: Box::new(lit('a', 0)),
+ }),
+ lit('b', 3),
+ ]
+ ))
+ );
+ assert_eq!(
+ parser(r"ab?").parse(),
+ Ok(concat(
+ 0..3,
+ vec![
+ lit('a', 0),
+ Ast::Repetition(ast::Repetition {
+ span: span(1..3),
+ op: ast::RepetitionOp {
+ span: span(2..3),
+ kind: ast::RepetitionKind::ZeroOrOne,
+ },
+ greedy: true,
+ ast: Box::new(lit('b', 1)),
+ }),
+ ]
+ ))
+ );
+ assert_eq!(
+ parser(r"(ab)?").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..5),
+ op: ast::RepetitionOp {
+ span: span(4..5),
+ kind: ast::RepetitionKind::ZeroOrOne,
+ },
+ greedy: true,
+ ast: Box::new(group(
+ 0..4,
+ 1,
+ concat(1..3, vec![lit('a', 1), lit('b', 2),])
+ )),
+ }))
+ );
+ assert_eq!(
+ parser(r"|a?").parse(),
+ Ok(alt(
+ 0..3,
+ vec![
+ Ast::Empty(span(0..0)),
+ Ast::Repetition(ast::Repetition {
+ span: span(1..3),
+ op: ast::RepetitionOp {
+ span: span(2..3),
+ kind: ast::RepetitionKind::ZeroOrOne,
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 1)),
+ }),
+ ]
+ ))
+ );
+
+ assert_eq!(
+ parser(r"*").parse().unwrap_err(),
+ TestError {
+ span: span(0..0),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"(?i)*").parse().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"(*)").parse().unwrap_err(),
+ TestError {
+ span: span(1..1),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"(?:?)").parse().unwrap_err(),
+ TestError {
+ span: span(3..3),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"+").parse().unwrap_err(),
+ TestError {
+ span: span(0..0),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"?").parse().unwrap_err(),
+ TestError {
+ span: span(0..0),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"(?)").parse().unwrap_err(),
+ TestError {
+ span: span(1..1),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"|*").parse().unwrap_err(),
+ TestError {
+ span: span(1..1),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"|+").parse().unwrap_err(),
+ TestError {
+ span: span(1..1),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"|?").parse().unwrap_err(),
+ TestError {
+ span: span(1..1),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_counted_repetition() {
+ assert_eq!(
+ parser(r"a{5}").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..4),
+ op: ast::RepetitionOp {
+ span: span(1..4),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::Exactly(5)
+ ),
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser(r"a{5,}").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..5),
+ op: ast::RepetitionOp {
+ span: span(1..5),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::AtLeast(5)
+ ),
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser(r"a{5,9}").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..6),
+ op: ast::RepetitionOp {
+ span: span(1..6),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::Bounded(5, 9)
+ ),
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser(r"a{5}?").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..5),
+ op: ast::RepetitionOp {
+ span: span(1..5),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::Exactly(5)
+ ),
+ },
+ greedy: false,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser(r"ab{5}").parse(),
+ Ok(concat(
+ 0..5,
+ vec![
+ lit('a', 0),
+ Ast::Repetition(ast::Repetition {
+ span: span(1..5),
+ op: ast::RepetitionOp {
+ span: span(2..5),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::Exactly(5)
+ ),
+ },
+ greedy: true,
+ ast: Box::new(lit('b', 1)),
+ }),
+ ]
+ ))
+ );
+ assert_eq!(
+ parser(r"ab{5}c").parse(),
+ Ok(concat(
+ 0..6,
+ vec![
+ lit('a', 0),
+ Ast::Repetition(ast::Repetition {
+ span: span(1..5),
+ op: ast::RepetitionOp {
+ span: span(2..5),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::Exactly(5)
+ ),
+ },
+ greedy: true,
+ ast: Box::new(lit('b', 1)),
+ }),
+ lit('c', 5),
+ ]
+ ))
+ );
+
+ assert_eq!(
+ parser(r"a{ 5 }").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..6),
+ op: ast::RepetitionOp {
+ span: span(1..6),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::Exactly(5)
+ ),
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser(r"a{ 5 , 9 }").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..10),
+ op: ast::RepetitionOp {
+ span: span(1..10),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::Bounded(5, 9)
+ ),
+ },
+ greedy: true,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+ assert_eq!(
+ parser_ignore_whitespace(r"a{5,9} ?").parse(),
+ Ok(Ast::Repetition(ast::Repetition {
+ span: span(0..8),
+ op: ast::RepetitionOp {
+ span: span(1..8),
+ kind: ast::RepetitionKind::Range(
+ ast::RepetitionRange::Bounded(5, 9)
+ ),
+ },
+ greedy: false,
+ ast: Box::new(lit('a', 0)),
+ }))
+ );
+
+ assert_eq!(
+ parser(r"(?i){0}").parse().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"(?m){1,1}").parse().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"a{]}").parse().unwrap_err(),
+ TestError {
+ span: span(2..2),
+ kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+ }
+ );
+ assert_eq!(
+ parser(r"a{1,]}").parse().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+ }
+ );
+ assert_eq!(
+ parser(r"a{").parse().unwrap_err(),
+ TestError {
+ span: span(1..2),
+ kind: ast::ErrorKind::RepetitionCountUnclosed,
+ }
+ );
+ assert_eq!(
+ parser(r"a{}").parse().unwrap_err(),
+ TestError {
+ span: span(2..2),
+ kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+ }
+ );
+ assert_eq!(
+ parser(r"a{a").parse().unwrap_err(),
+ TestError {
+ span: span(2..2),
+ kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+ }
+ );
+ assert_eq!(
+ parser(r"a{9999999999}").parse().unwrap_err(),
+ TestError {
+ span: span(2..12),
+ kind: ast::ErrorKind::DecimalInvalid,
+ }
+ );
+ assert_eq!(
+ parser(r"a{9").parse().unwrap_err(),
+ TestError {
+ span: span(1..3),
+ kind: ast::ErrorKind::RepetitionCountUnclosed,
+ }
+ );
+ assert_eq!(
+ parser(r"a{9,a").parse().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+ }
+ );
+ assert_eq!(
+ parser(r"a{9,9999999999}").parse().unwrap_err(),
+ TestError {
+ span: span(4..14),
+ kind: ast::ErrorKind::DecimalInvalid,
+ }
+ );
+ assert_eq!(
+ parser(r"a{9,").parse().unwrap_err(),
+ TestError {
+ span: span(1..4),
+ kind: ast::ErrorKind::RepetitionCountUnclosed,
+ }
+ );
+ assert_eq!(
+ parser(r"a{9,11").parse().unwrap_err(),
+ TestError {
+ span: span(1..6),
+ kind: ast::ErrorKind::RepetitionCountUnclosed,
+ }
+ );
+ assert_eq!(
+ parser(r"a{2,1}").parse().unwrap_err(),
+ TestError {
+ span: span(1..6),
+ kind: ast::ErrorKind::RepetitionCountInvalid,
+ }
+ );
+ assert_eq!(
+ parser(r"{5}").parse().unwrap_err(),
+ TestError {
+ span: span(0..0),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ assert_eq!(
+ parser(r"|{5}").parse().unwrap_err(),
+ TestError {
+ span: span(1..1),
+ kind: ast::ErrorKind::RepetitionMissing,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_alternate() {
+ assert_eq!(
+ parser(r"a|b").parse(),
+ Ok(Ast::Alternation(ast::Alternation {
+ span: span(0..3),
+ asts: vec![lit('a', 0), lit('b', 2)],
+ }))
+ );
+ assert_eq!(
+ parser(r"(a|b)").parse(),
+ Ok(group(
+ 0..5,
+ 1,
+ Ast::Alternation(ast::Alternation {
+ span: span(1..4),
+ asts: vec![lit('a', 1), lit('b', 3)],
+ })
+ ))
+ );
+
+ assert_eq!(
+ parser(r"a|b|c").parse(),
+ Ok(Ast::Alternation(ast::Alternation {
+ span: span(0..5),
+ asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
+ }))
+ );
+ assert_eq!(
+ parser(r"ax|by|cz").parse(),
+ Ok(Ast::Alternation(ast::Alternation {
+ span: span(0..8),
+ asts: vec![
+ concat(0..2, vec![lit('a', 0), lit('x', 1)]),
+ concat(3..5, vec![lit('b', 3), lit('y', 4)]),
+ concat(6..8, vec![lit('c', 6), lit('z', 7)]),
+ ],
+ }))
+ );
+ assert_eq!(
+ parser(r"(ax|by|cz)").parse(),
+ Ok(group(
+ 0..10,
+ 1,
+ Ast::Alternation(ast::Alternation {
+ span: span(1..9),
+ asts: vec![
+ concat(1..3, vec![lit('a', 1), lit('x', 2)]),
+ concat(4..6, vec![lit('b', 4), lit('y', 5)]),
+ concat(7..9, vec![lit('c', 7), lit('z', 8)]),
+ ],
+ })
+ ))
+ );
+ assert_eq!(
+ parser(r"(ax|(by|(cz)))").parse(),
+ Ok(group(
+ 0..14,
+ 1,
+ alt(
+ 1..13,
+ vec![
+ concat(1..3, vec![lit('a', 1), lit('x', 2)]),
+ group(
+ 4..13,
+ 2,
+ alt(
+ 5..12,
+ vec![
+ concat(
+ 5..7,
+ vec![lit('b', 5), lit('y', 6)]
+ ),
+ group(
+ 8..12,
+ 3,
+ concat(
+ 9..11,
+ vec![lit('c', 9), lit('z', 10),]
+ )
+ ),
+ ]
+ )
+ ),
+ ]
+ )
+ ))
+ );
+
+ assert_eq!(
+ parser(r"|").parse(),
+ Ok(alt(
+ 0..1,
+ vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
+ ))
+ );
+ assert_eq!(
+ parser(r"||").parse(),
+ Ok(alt(
+ 0..2,
+ vec![
+ Ast::Empty(span(0..0)),
+ Ast::Empty(span(1..1)),
+ Ast::Empty(span(2..2)),
+ ]
+ ))
+ );
+ assert_eq!(
+ parser(r"a|").parse(),
+ Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
+ );
+ assert_eq!(
+ parser(r"|a").parse(),
+ Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
+ );
+
+ assert_eq!(
+ parser(r"(|)").parse(),
+ Ok(group(
+ 0..3,
+ 1,
+ alt(
+ 1..2,
+ vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
+ )
+ ))
+ );
+ assert_eq!(
+ parser(r"(a|)").parse(),
+ Ok(group(
+ 0..4,
+ 1,
+ alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
+ ))
+ );
+ assert_eq!(
+ parser(r"(|a)").parse(),
+ Ok(group(
+ 0..4,
+ 1,
+ alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
+ ))
+ );
+
+ assert_eq!(
+ parser(r"a|b)").parse().unwrap_err(),
+ TestError {
+ span: span(3..4),
+ kind: ast::ErrorKind::GroupUnopened,
+ }
+ );
+ assert_eq!(
+ parser(r"(a|b").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::GroupUnclosed,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_unsupported_lookaround() {
+ assert_eq!(
+ parser(r"(?=a)").parse().unwrap_err(),
+ TestError {
+ span: span(0..3),
+ kind: ast::ErrorKind::UnsupportedLookAround,
+ }
+ );
+ assert_eq!(
+ parser(r"(?!a)").parse().unwrap_err(),
+ TestError {
+ span: span(0..3),
+ kind: ast::ErrorKind::UnsupportedLookAround,
+ }
+ );
+ assert_eq!(
+ parser(r"(?<=a)").parse().unwrap_err(),
+ TestError {
+ span: span(0..4),
+ kind: ast::ErrorKind::UnsupportedLookAround,
+ }
+ );
+ assert_eq!(
+ parser(r"(?<!a)").parse().unwrap_err(),
+ TestError {
+ span: span(0..4),
+ kind: ast::ErrorKind::UnsupportedLookAround,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_group() {
+ assert_eq!(
+ parser("(?i)").parse(),
+ Ok(Ast::Flags(ast::SetFlags {
+ span: span(0..4),
+ flags: ast::Flags {
+ span: span(2..3),
+ items: vec![ast::FlagsItem {
+ span: span(2..3),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::CaseInsensitive
+ ),
+ }],
+ },
+ }))
+ );
+ assert_eq!(
+ parser("(?iU)").parse(),
+ Ok(Ast::Flags(ast::SetFlags {
+ span: span(0..5),
+ flags: ast::Flags {
+ span: span(2..4),
+ items: vec![
+ ast::FlagsItem {
+ span: span(2..3),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::CaseInsensitive
+ ),
+ },
+ ast::FlagsItem {
+ span: span(3..4),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::SwapGreed
+ ),
+ },
+ ],
+ },
+ }))
+ );
+ assert_eq!(
+ parser("(?i-U)").parse(),
+ Ok(Ast::Flags(ast::SetFlags {
+ span: span(0..6),
+ flags: ast::Flags {
+ span: span(2..5),
+ items: vec![
+ ast::FlagsItem {
+ span: span(2..3),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::CaseInsensitive
+ ),
+ },
+ ast::FlagsItem {
+ span: span(3..4),
+ kind: ast::FlagsItemKind::Negation,
+ },
+ ast::FlagsItem {
+ span: span(4..5),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::SwapGreed
+ ),
+ },
+ ],
+ },
+ }))
+ );
+
+ assert_eq!(
+ parser("()").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..2),
+ kind: ast::GroupKind::CaptureIndex(1),
+ ast: Box::new(Ast::Empty(span(1..1))),
+ }))
+ );
+ assert_eq!(
+ parser("(a)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..3),
+ kind: ast::GroupKind::CaptureIndex(1),
+ ast: Box::new(lit('a', 1)),
+ }))
+ );
+ assert_eq!(
+ parser("(())").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..4),
+ kind: ast::GroupKind::CaptureIndex(1),
+ ast: Box::new(Ast::Group(ast::Group {
+ span: span(1..3),
+ kind: ast::GroupKind::CaptureIndex(2),
+ ast: Box::new(Ast::Empty(span(2..2))),
+ })),
+ }))
+ );
+
+ assert_eq!(
+ parser("(?:a)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..5),
+ kind: ast::GroupKind::NonCapturing(ast::Flags {
+ span: span(2..2),
+ items: vec![],
+ }),
+ ast: Box::new(lit('a', 3)),
+ }))
+ );
+
+ assert_eq!(
+ parser("(?i:a)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..6),
+ kind: ast::GroupKind::NonCapturing(ast::Flags {
+ span: span(2..3),
+ items: vec![ast::FlagsItem {
+ span: span(2..3),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::CaseInsensitive
+ ),
+ },],
+ }),
+ ast: Box::new(lit('a', 4)),
+ }))
+ );
+ assert_eq!(
+ parser("(?i-U:a)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..8),
+ kind: ast::GroupKind::NonCapturing(ast::Flags {
+ span: span(2..5),
+ items: vec![
+ ast::FlagsItem {
+ span: span(2..3),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::CaseInsensitive
+ ),
+ },
+ ast::FlagsItem {
+ span: span(3..4),
+ kind: ast::FlagsItemKind::Negation,
+ },
+ ast::FlagsItem {
+ span: span(4..5),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::SwapGreed
+ ),
+ },
+ ],
+ }),
+ ast: Box::new(lit('a', 6)),
+ }))
+ );
+
+ assert_eq!(
+ parser("(").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::GroupUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("(?").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::GroupUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("(?P").parse().unwrap_err(),
+ TestError {
+ span: span(2..3),
+ kind: ast::ErrorKind::FlagUnrecognized,
+ }
+ );
+ assert_eq!(
+ parser("(?P<").parse().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::GroupNameUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser("(a").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::GroupUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("(()").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::GroupUnclosed,
+ }
+ );
+ assert_eq!(
+ parser(")").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::GroupUnopened,
+ }
+ );
+ assert_eq!(
+ parser("a)").parse().unwrap_err(),
+ TestError {
+ span: span(1..2),
+ kind: ast::ErrorKind::GroupUnopened,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_capture_name() {
+ assert_eq!(
+ parser("(?P<a>z)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..8),
+ kind: ast::GroupKind::CaptureName(ast::CaptureName {
+ span: span(4..5),
+ name: s("a"),
+ index: 1,
+ }),
+ ast: Box::new(lit('z', 6)),
+ }))
+ );
+ assert_eq!(
+ parser("(?P<abc>z)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..10),
+ kind: ast::GroupKind::CaptureName(ast::CaptureName {
+ span: span(4..7),
+ name: s("abc"),
+ index: 1,
+ }),
+ ast: Box::new(lit('z', 8)),
+ }))
+ );
+
+ assert_eq!(
+ parser("(?P<a_1>z)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..10),
+ kind: ast::GroupKind::CaptureName(ast::CaptureName {
+ span: span(4..7),
+ name: s("a_1"),
+ index: 1,
+ }),
+ ast: Box::new(lit('z', 8)),
+ }))
+ );
+
+ assert_eq!(
+ parser("(?P<a.1>z)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..10),
+ kind: ast::GroupKind::CaptureName(ast::CaptureName {
+ span: span(4..7),
+ name: s("a.1"),
+ index: 1,
+ }),
+ ast: Box::new(lit('z', 8)),
+ }))
+ );
+
+ assert_eq!(
+ parser("(?P<a[1]>z)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..11),
+ kind: ast::GroupKind::CaptureName(ast::CaptureName {
+ span: span(4..8),
+ name: s("a[1]"),
+ index: 1,
+ }),
+ ast: Box::new(lit('z', 9)),
+ }))
+ );
+
+ assert_eq!(
+ parser("(?P<").parse().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::GroupNameUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser("(?P<>z)").parse().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::GroupNameEmpty,
+ }
+ );
+ assert_eq!(
+ parser("(?P<a").parse().unwrap_err(),
+ TestError {
+ span: span(5..5),
+ kind: ast::ErrorKind::GroupNameUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser("(?P<ab").parse().unwrap_err(),
+ TestError {
+ span: span(6..6),
+ kind: ast::ErrorKind::GroupNameUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser("(?P<0a").parse().unwrap_err(),
+ TestError {
+ span: span(4..5),
+ kind: ast::ErrorKind::GroupNameInvalid,
+ }
+ );
+ assert_eq!(
+ parser("(?P<~").parse().unwrap_err(),
+ TestError {
+ span: span(4..5),
+ kind: ast::ErrorKind::GroupNameInvalid,
+ }
+ );
+ assert_eq!(
+ parser("(?P<abc~").parse().unwrap_err(),
+ TestError {
+ span: span(7..8),
+ kind: ast::ErrorKind::GroupNameInvalid,
+ }
+ );
+ assert_eq!(
+ parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
+ TestError {
+ span: span(12..13),
+ kind: ast::ErrorKind::GroupNameDuplicate {
+ original: span(4..5),
+ },
+ }
+ );
+ }
+
+ #[test]
+ fn parse_flags() {
+ assert_eq!(
+ parser("i:").parse_flags(),
+ Ok(ast::Flags {
+ span: span(0..1),
+ items: vec![ast::FlagsItem {
+ span: span(0..1),
+ kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+ }],
+ })
+ );
+ assert_eq!(
+ parser("i)").parse_flags(),
+ Ok(ast::Flags {
+ span: span(0..1),
+ items: vec![ast::FlagsItem {
+ span: span(0..1),
+ kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+ }],
+ })
+ );
+
+ assert_eq!(
+ parser("isU:").parse_flags(),
+ Ok(ast::Flags {
+ span: span(0..3),
+ items: vec![
+ ast::FlagsItem {
+ span: span(0..1),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::CaseInsensitive
+ ),
+ },
+ ast::FlagsItem {
+ span: span(1..2),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::DotMatchesNewLine
+ ),
+ },
+ ast::FlagsItem {
+ span: span(2..3),
+ kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+ },
+ ],
+ })
+ );
+
+ assert_eq!(
+ parser("-isU:").parse_flags(),
+ Ok(ast::Flags {
+ span: span(0..4),
+ items: vec![
+ ast::FlagsItem {
+ span: span(0..1),
+ kind: ast::FlagsItemKind::Negation,
+ },
+ ast::FlagsItem {
+ span: span(1..2),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::CaseInsensitive
+ ),
+ },
+ ast::FlagsItem {
+ span: span(2..3),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::DotMatchesNewLine
+ ),
+ },
+ ast::FlagsItem {
+ span: span(3..4),
+ kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+ },
+ ],
+ })
+ );
+ assert_eq!(
+ parser("i-sU:").parse_flags(),
+ Ok(ast::Flags {
+ span: span(0..4),
+ items: vec![
+ ast::FlagsItem {
+ span: span(0..1),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::CaseInsensitive
+ ),
+ },
+ ast::FlagsItem {
+ span: span(1..2),
+ kind: ast::FlagsItemKind::Negation,
+ },
+ ast::FlagsItem {
+ span: span(2..3),
+ kind: ast::FlagsItemKind::Flag(
+ ast::Flag::DotMatchesNewLine
+ ),
+ },
+ ast::FlagsItem {
+ span: span(3..4),
+ kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+ },
+ ],
+ })
+ );
+
+ assert_eq!(
+ parser("isU").parse_flags().unwrap_err(),
+ TestError {
+ span: span(3..3),
+ kind: ast::ErrorKind::FlagUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser("isUa:").parse_flags().unwrap_err(),
+ TestError {
+ span: span(3..4),
+ kind: ast::ErrorKind::FlagUnrecognized,
+ }
+ );
+ assert_eq!(
+ parser("isUi:").parse_flags().unwrap_err(),
+ TestError {
+ span: span(3..4),
+ kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
+ }
+ );
+ assert_eq!(
+ parser("i-sU-i:").parse_flags().unwrap_err(),
+ TestError {
+ span: span(4..5),
+ kind: ast::ErrorKind::FlagRepeatedNegation {
+ original: span(1..2),
+ },
+ }
+ );
+ assert_eq!(
+ parser("-)").parse_flags().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::FlagDanglingNegation,
+ }
+ );
+ assert_eq!(
+ parser("i-)").parse_flags().unwrap_err(),
+ TestError {
+ span: span(1..2),
+ kind: ast::ErrorKind::FlagDanglingNegation,
+ }
+ );
+ assert_eq!(
+ parser("iU-)").parse_flags().unwrap_err(),
+ TestError {
+ span: span(2..3),
+ kind: ast::ErrorKind::FlagDanglingNegation,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_flag() {
+ assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
+ assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
+ assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
+ assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
+ assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
+ assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
+
+ assert_eq!(
+ parser("a").parse_flag().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::FlagUnrecognized,
+ }
+ );
+ assert_eq!(
+ parser("☃").parse_flag().unwrap_err(),
+ TestError {
+ span: span_range("☃", 0..3),
+ kind: ast::ErrorKind::FlagUnrecognized,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_primitive_non_escape() {
+ assert_eq!(
+ parser(r".").parse_primitive(),
+ Ok(Primitive::Dot(span(0..1)))
+ );
+ assert_eq!(
+ parser(r"^").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..1),
+ kind: ast::AssertionKind::StartLine,
+ }))
+ );
+ assert_eq!(
+ parser(r"$").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..1),
+ kind: ast::AssertionKind::EndLine,
+ }))
+ );
+
+ assert_eq!(
+ parser(r"a").parse_primitive(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..1),
+ kind: ast::LiteralKind::Verbatim,
+ c: 'a',
+ }))
+ );
+ assert_eq!(
+ parser(r"|").parse_primitive(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..1),
+ kind: ast::LiteralKind::Verbatim,
+ c: '|',
+ }))
+ );
+ assert_eq!(
+ parser(r"☃").parse_primitive(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span_range("☃", 0..3),
+ kind: ast::LiteralKind::Verbatim,
+ c: '☃',
+ }))
+ );
+ }
+
+ #[test]
+ fn parse_escape() {
+ assert_eq!(
+ parser(r"\|").parse_primitive(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..2),
+ kind: ast::LiteralKind::Punctuation,
+ c: '|',
+ }))
+ );
+ let specials = &[
+ (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
+ (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
+ (r"\t", '\t', ast::SpecialLiteralKind::Tab),
+ (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
+ (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
+ (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
+ ];
+ for &(pat, c, ref kind) in specials {
+ assert_eq!(
+ parser(pat).parse_primitive(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..2),
+ kind: ast::LiteralKind::Special(kind.clone()),
+ c: c,
+ }))
+ );
+ }
+ assert_eq!(
+ parser(r"\A").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..2),
+ kind: ast::AssertionKind::StartText,
+ }))
+ );
+ assert_eq!(
+ parser(r"\z").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..2),
+ kind: ast::AssertionKind::EndText,
+ }))
+ );
+ assert_eq!(
+ parser(r"\b").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..2),
+ kind: ast::AssertionKind::WordBoundary,
+ }))
+ );
+ assert_eq!(
+ parser(r"\B").parse_primitive(),
+ Ok(Primitive::Assertion(ast::Assertion {
+ span: span(0..2),
+ kind: ast::AssertionKind::NotWordBoundary,
+ }))
+ );
+
+ assert_eq!(
+ parser(r"\").parse_escape().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\y").parse_escape().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::EscapeUnrecognized,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_unsupported_backreference() {
+ assert_eq!(
+ parser(r"\0").parse_escape().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::UnsupportedBackreference,
+ }
+ );
+ assert_eq!(
+ parser(r"\9").parse_escape().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::UnsupportedBackreference,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_octal() {
+ for i in 0..511 {
+ let pat = format!(r"\{:o}", i);
+ assert_eq!(
+ parser_octal(&pat).parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..pat.len()),
+ kind: ast::LiteralKind::Octal,
+ c: ::std::char::from_u32(i).unwrap(),
+ }))
+ );
+ }
+ assert_eq!(
+ parser_octal(r"\778").parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..3),
+ kind: ast::LiteralKind::Octal,
+ c: '?',
+ }))
+ );
+ assert_eq!(
+ parser_octal(r"\7777").parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..4),
+ kind: ast::LiteralKind::Octal,
+ c: '\u{01FF}',
+ }))
+ );
+ assert_eq!(
+ parser_octal(r"\778").parse(),
+ Ok(Ast::Concat(ast::Concat {
+ span: span(0..4),
+ asts: vec![
+ Ast::Literal(ast::Literal {
+ span: span(0..3),
+ kind: ast::LiteralKind::Octal,
+ c: '?',
+ }),
+ Ast::Literal(ast::Literal {
+ span: span(3..4),
+ kind: ast::LiteralKind::Verbatim,
+ c: '8',
+ }),
+ ],
+ }))
+ );
+ assert_eq!(
+ parser_octal(r"\7777").parse(),
+ Ok(Ast::Concat(ast::Concat {
+ span: span(0..5),
+ asts: vec![
+ Ast::Literal(ast::Literal {
+ span: span(0..4),
+ kind: ast::LiteralKind::Octal,
+ c: '\u{01FF}',
+ }),
+ Ast::Literal(ast::Literal {
+ span: span(4..5),
+ kind: ast::LiteralKind::Verbatim,
+ c: '7',
+ }),
+ ],
+ }))
+ );
+
+ assert_eq!(
+ parser_octal(r"\8").parse_escape().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::EscapeUnrecognized,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_hex_two() {
+ for i in 0..256 {
+ let pat = format!(r"\x{:02x}", i);
+ assert_eq!(
+ parser(&pat).parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..pat.len()),
+ kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
+ c: ::std::char::from_u32(i).unwrap(),
+ }))
+ );
+ }
+
+ assert_eq!(
+ parser(r"\xF").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..3),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\xG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..3),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\xFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..4),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_hex_four() {
+ for i in 0..65536 {
+ let c = match ::std::char::from_u32(i) {
+ None => continue,
+ Some(c) => c,
+ };
+ let pat = format!(r"\u{:04x}", i);
+ assert_eq!(
+ parser(&pat).parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..pat.len()),
+ kind: ast::LiteralKind::HexFixed(
+ ast::HexLiteralKind::UnicodeShort
+ ),
+ c: c,
+ }))
+ );
+ }
+
+ assert_eq!(
+ parser(r"\uF").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..3),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\uG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..3),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\uFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..4),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\uFFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(4..5),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\uFFFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(5..6),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\uD800").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..6),
+ kind: ast::ErrorKind::EscapeHexInvalid,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_hex_eight() {
+ for i in 0..65536 {
+ let c = match ::std::char::from_u32(i) {
+ None => continue,
+ Some(c) => c,
+ };
+ let pat = format!(r"\U{:08x}", i);
+ assert_eq!(
+ parser(&pat).parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..pat.len()),
+ kind: ast::LiteralKind::HexFixed(
+ ast::HexLiteralKind::UnicodeLong
+ ),
+ c: c,
+ }))
+ );
+ }
+
+ assert_eq!(
+ parser(r"\UF").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..3),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\UG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..3),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\UFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..4),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\UFFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(4..5),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\UFFFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(5..6),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\UFFFFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(6..7),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\UFFFFFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(7..8),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(8..9),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
+ TestError {
+ span: span(9..10),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_hex_brace() {
+ assert_eq!(
+ parser(r"\u{26c4}").parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..8),
+ kind: ast::LiteralKind::HexBrace(
+ ast::HexLiteralKind::UnicodeShort
+ ),
+ c: '⛄',
+ }))
+ );
+ assert_eq!(
+ parser(r"\U{26c4}").parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..8),
+ kind: ast::LiteralKind::HexBrace(
+ ast::HexLiteralKind::UnicodeLong
+ ),
+ c: '⛄',
+ }))
+ );
+ assert_eq!(
+ parser(r"\x{26c4}").parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..8),
+ kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+ c: '⛄',
+ }))
+ );
+ assert_eq!(
+ parser(r"\x{26C4}").parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..8),
+ kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+ c: '⛄',
+ }))
+ );
+ assert_eq!(
+ parser(r"\x{10fFfF}").parse_escape(),
+ Ok(Primitive::Literal(ast::Literal {
+ span: span(0..10),
+ kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+ c: '\u{10FFFF}',
+ }))
+ );
+
+ assert_eq!(
+ parser(r"\x").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..2),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\x{").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..3),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\x{FF").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..5),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\x{}").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..4),
+ kind: ast::ErrorKind::EscapeHexEmpty,
+ }
+ );
+ assert_eq!(
+ parser(r"\x{FGF}").parse_escape().unwrap_err(),
+ TestError {
+ span: span(4..5),
+ kind: ast::ErrorKind::EscapeHexInvalidDigit,
+ }
+ );
+ assert_eq!(
+ parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..9),
+ kind: ast::ErrorKind::EscapeHexInvalid,
+ }
+ );
+ assert_eq!(
+ parser(r"\x{D800}").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..7),
+ kind: ast::ErrorKind::EscapeHexInvalid,
+ }
+ );
+ assert_eq!(
+ parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..12),
+ kind: ast::ErrorKind::EscapeHexInvalid,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_decimal() {
+ assert_eq!(parser("123").parse_decimal(), Ok(123));
+ assert_eq!(parser("0").parse_decimal(), Ok(0));
+ assert_eq!(parser("01").parse_decimal(), Ok(1));
+
+ assert_eq!(
+ parser("-1").parse_decimal().unwrap_err(),
+ TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
+ );
+ assert_eq!(
+ parser("").parse_decimal().unwrap_err(),
+ TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
+ );
+ assert_eq!(
+ parser("9999999999").parse_decimal().unwrap_err(),
+ TestError {
+ span: span(0..10),
+ kind: ast::ErrorKind::DecimalInvalid,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_set_class() {
+ fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
+ ast::ClassSet::union(ast::ClassSetUnion {
+ span: span,
+ items: items,
+ })
+ }
+
+ fn intersection(
+ span: Span,
+ lhs: ast::ClassSet,
+ rhs: ast::ClassSet,
+ ) -> ast::ClassSet {
+ ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+ span: span,
+ kind: ast::ClassSetBinaryOpKind::Intersection,
+ lhs: Box::new(lhs),
+ rhs: Box::new(rhs),
+ })
+ }
+
+ fn difference(
+ span: Span,
+ lhs: ast::ClassSet,
+ rhs: ast::ClassSet,
+ ) -> ast::ClassSet {
+ ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+ span: span,
+ kind: ast::ClassSetBinaryOpKind::Difference,
+ lhs: Box::new(lhs),
+ rhs: Box::new(rhs),
+ })
+ }
+
+ fn symdifference(
+ span: Span,
+ lhs: ast::ClassSet,
+ rhs: ast::ClassSet,
+ ) -> ast::ClassSet {
+ ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+ span: span,
+ kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
+ lhs: Box::new(lhs),
+ rhs: Box::new(rhs),
+ })
+ }
+
+ fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
+ ast::ClassSet::Item(item)
+ }
+
+ fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
+ ast::ClassSetItem::Ascii(cls)
+ }
+
+ fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
+ ast::ClassSetItem::Unicode(cls)
+ }
+
+ fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
+ ast::ClassSetItem::Perl(cls)
+ }
+
+ fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
+ ast::ClassSetItem::Bracketed(Box::new(cls))
+ }
+
+ fn lit(span: Span, c: char) -> ast::ClassSetItem {
+ ast::ClassSetItem::Literal(ast::Literal {
+ span: span,
+ kind: ast::LiteralKind::Verbatim,
+ c: c,
+ })
+ }
+
+ fn empty(span: Span) -> ast::ClassSetItem {
+ ast::ClassSetItem::Empty(span)
+ }
+
+ fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
+ let pos1 = Position {
+ offset: span.start.offset + start.len_utf8(),
+ column: span.start.column + 1,
+ ..span.start
+ };
+ let pos2 = Position {
+ offset: span.end.offset - end.len_utf8(),
+ column: span.end.column - 1,
+ ..span.end
+ };
+ ast::ClassSetItem::Range(ast::ClassSetRange {
+ span: span,
+ start: ast::Literal {
+ span: Span { end: pos1, ..span },
+ kind: ast::LiteralKind::Verbatim,
+ c: start,
+ },
+ end: ast::Literal {
+ span: Span { start: pos2, ..span },
+ kind: ast::LiteralKind::Verbatim,
+ c: end,
+ },
+ })
+ }
+
+ fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
+ ast::ClassAscii {
+ span: span,
+ kind: ast::ClassAsciiKind::Alnum,
+ negated: negated,
+ }
+ }
+
+ fn lower(span: Span, negated: bool) -> ast::ClassAscii {
+ ast::ClassAscii {
+ span: span,
+ kind: ast::ClassAsciiKind::Lower,
+ negated: negated,
+ }
+ }
+
+ assert_eq!(
+ parser("[[:alnum:]]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..11),
+ negated: false,
+ kind: itemset(item_ascii(alnum(span(1..10), false))),
+ })))
+ );
+ assert_eq!(
+ parser("[[[:alnum:]]]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..13),
+ negated: false,
+ kind: itemset(item_bracket(ast::ClassBracketed {
+ span: span(1..12),
+ negated: false,
+ kind: itemset(item_ascii(alnum(span(2..11), false))),
+ })),
+ })))
+ );
+ assert_eq!(
+ parser("[[:alnum:]&&[:lower:]]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..22),
+ negated: false,
+ kind: intersection(
+ span(1..21),
+ itemset(item_ascii(alnum(span(1..10), false))),
+ itemset(item_ascii(lower(span(12..21), false))),
+ ),
+ })))
+ );
+ assert_eq!(
+ parser("[[:alnum:]--[:lower:]]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..22),
+ negated: false,
+ kind: difference(
+ span(1..21),
+ itemset(item_ascii(alnum(span(1..10), false))),
+ itemset(item_ascii(lower(span(12..21), false))),
+ ),
+ })))
+ );
+ assert_eq!(
+ parser("[[:alnum:]~~[:lower:]]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..22),
+ negated: false,
+ kind: symdifference(
+ span(1..21),
+ itemset(item_ascii(alnum(span(1..10), false))),
+ itemset(item_ascii(lower(span(12..21), false))),
+ ),
+ })))
+ );
+
+ assert_eq!(
+ parser("[a]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..3),
+ negated: false,
+ kind: itemset(lit(span(1..2), 'a')),
+ })))
+ );
+ assert_eq!(
+ parser(r"[a\]]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..5),
+ negated: false,
+ kind: union(
+ span(1..4),
+ vec![
+ lit(span(1..2), 'a'),
+ ast::ClassSetItem::Literal(ast::Literal {
+ span: span(2..4),
+ kind: ast::LiteralKind::Punctuation,
+ c: ']',
+ }),
+ ]
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[a\-z]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..6),
+ negated: false,
+ kind: union(
+ span(1..5),
+ vec![
+ lit(span(1..2), 'a'),
+ ast::ClassSetItem::Literal(ast::Literal {
+ span: span(2..4),
+ kind: ast::LiteralKind::Punctuation,
+ c: '-',
+ }),
+ lit(span(4..5), 'z'),
+ ]
+ ),
+ })))
+ );
+ assert_eq!(
+ parser("[ab]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..4),
+ negated: false,
+ kind: union(
+ span(1..3),
+ vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
+ ),
+ })))
+ );
+ assert_eq!(
+ parser("[a-]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..4),
+ negated: false,
+ kind: union(
+ span(1..3),
+ vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
+ ),
+ })))
+ );
+ assert_eq!(
+ parser("[-a]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..4),
+ negated: false,
+ kind: union(
+ span(1..3),
+ vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[\pL]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..5),
+ negated: false,
+ kind: itemset(item_unicode(ast::ClassUnicode {
+ span: span(1..4),
+ negated: false,
+ kind: ast::ClassUnicodeKind::OneLetter('L'),
+ })),
+ })))
+ );
+ assert_eq!(
+ parser(r"[\w]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..4),
+ negated: false,
+ kind: itemset(item_perl(ast::ClassPerl {
+ span: span(1..3),
+ kind: ast::ClassPerlKind::Word,
+ negated: false,
+ })),
+ })))
+ );
+ assert_eq!(
+ parser(r"[a\wz]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..6),
+ negated: false,
+ kind: union(
+ span(1..5),
+ vec![
+ lit(span(1..2), 'a'),
+ item_perl(ast::ClassPerl {
+ span: span(2..4),
+ kind: ast::ClassPerlKind::Word,
+ negated: false,
+ }),
+ lit(span(4..5), 'z'),
+ ]
+ ),
+ })))
+ );
+
+ assert_eq!(
+ parser("[a-z]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..5),
+ negated: false,
+ kind: itemset(range(span(1..4), 'a', 'z')),
+ })))
+ );
+ assert_eq!(
+ parser("[a-cx-z]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..8),
+ negated: false,
+ kind: union(
+ span(1..7),
+ vec![
+ range(span(1..4), 'a', 'c'),
+ range(span(4..7), 'x', 'z'),
+ ]
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[\w&&a-cx-z]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..12),
+ negated: false,
+ kind: intersection(
+ span(1..11),
+ itemset(item_perl(ast::ClassPerl {
+ span: span(1..3),
+ kind: ast::ClassPerlKind::Word,
+ negated: false,
+ })),
+ union(
+ span(5..11),
+ vec![
+ range(span(5..8), 'a', 'c'),
+ range(span(8..11), 'x', 'z'),
+ ]
+ ),
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[a-cx-z&&\w]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..12),
+ negated: false,
+ kind: intersection(
+ span(1..11),
+ union(
+ span(1..7),
+ vec![
+ range(span(1..4), 'a', 'c'),
+ range(span(4..7), 'x', 'z'),
+ ]
+ ),
+ itemset(item_perl(ast::ClassPerl {
+ span: span(9..11),
+ kind: ast::ClassPerlKind::Word,
+ negated: false,
+ })),
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[a--b--c]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..9),
+ negated: false,
+ kind: difference(
+ span(1..8),
+ difference(
+ span(1..5),
+ itemset(lit(span(1..2), 'a')),
+ itemset(lit(span(4..5), 'b')),
+ ),
+ itemset(lit(span(7..8), 'c')),
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[a~~b~~c]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..9),
+ negated: false,
+ kind: symdifference(
+ span(1..8),
+ symdifference(
+ span(1..5),
+ itemset(lit(span(1..2), 'a')),
+ itemset(lit(span(4..5), 'b')),
+ ),
+ itemset(lit(span(7..8), 'c')),
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[\^&&^]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..7),
+ negated: false,
+ kind: intersection(
+ span(1..6),
+ itemset(ast::ClassSetItem::Literal(ast::Literal {
+ span: span(1..3),
+ kind: ast::LiteralKind::Punctuation,
+ c: '^',
+ })),
+ itemset(lit(span(5..6), '^')),
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[\&&&&]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..7),
+ negated: false,
+ kind: intersection(
+ span(1..6),
+ itemset(ast::ClassSetItem::Literal(ast::Literal {
+ span: span(1..3),
+ kind: ast::LiteralKind::Punctuation,
+ c: '&',
+ })),
+ itemset(lit(span(5..6), '&')),
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[&&&&]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..6),
+ negated: false,
+ kind: intersection(
+ span(1..5),
+ intersection(
+ span(1..3),
+ itemset(empty(span(1..1))),
+ itemset(empty(span(3..3))),
+ ),
+ itemset(empty(span(5..5))),
+ ),
+ })))
+ );
+
+ let pat = "[☃-⛄]";
+ assert_eq!(
+ parser(pat).parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span_range(pat, 0..9),
+ negated: false,
+ kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
+ span: span_range(pat, 1..8),
+ start: ast::Literal {
+ span: span_range(pat, 1..4),
+ kind: ast::LiteralKind::Verbatim,
+ c: '☃',
+ },
+ end: ast::Literal {
+ span: span_range(pat, 5..8),
+ kind: ast::LiteralKind::Verbatim,
+ c: '⛄',
+ },
+ })),
+ })))
+ );
+
+ assert_eq!(
+ parser(r"[]]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..3),
+ negated: false,
+ kind: itemset(lit(span(1..2), ']')),
+ })))
+ );
+ assert_eq!(
+ parser(r"[]\[]").parse(),
+ Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..5),
+ negated: false,
+ kind: union(
+ span(1..4),
+ vec![
+ lit(span(1..2), ']'),
+ ast::ClassSetItem::Literal(ast::Literal {
+ span: span(2..4),
+ kind: ast::LiteralKind::Punctuation,
+ c: '[',
+ }),
+ ]
+ ),
+ })))
+ );
+ assert_eq!(
+ parser(r"[\[]]").parse(),
+ Ok(concat(
+ 0..5,
+ vec![
+ Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+ span: span(0..4),
+ negated: false,
+ kind: itemset(ast::ClassSetItem::Literal(
+ ast::Literal {
+ span: span(1..3),
+ kind: ast::LiteralKind::Punctuation,
+ c: '[',
+ }
+ )),
+ })),
+ Ast::Literal(ast::Literal {
+ span: span(4..5),
+ kind: ast::LiteralKind::Verbatim,
+ c: ']',
+ }),
+ ]
+ ))
+ );
+
+ assert_eq!(
+ parser("[").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("[[").parse().unwrap_err(),
+ TestError {
+ span: span(1..2),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("[[-]").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("[[[:alnum:]").parse().unwrap_err(),
+ TestError {
+ span: span(1..2),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser(r"[\b]").parse().unwrap_err(),
+ TestError {
+ span: span(1..3),
+ kind: ast::ErrorKind::ClassEscapeInvalid,
+ }
+ );
+ assert_eq!(
+ parser(r"[\w-a]").parse().unwrap_err(),
+ TestError {
+ span: span(1..3),
+ kind: ast::ErrorKind::ClassRangeLiteral,
+ }
+ );
+ assert_eq!(
+ parser(r"[a-\w]").parse().unwrap_err(),
+ TestError {
+ span: span(3..5),
+ kind: ast::ErrorKind::ClassRangeLiteral,
+ }
+ );
+ assert_eq!(
+ parser(r"[z-a]").parse().unwrap_err(),
+ TestError {
+ span: span(1..4),
+ kind: ast::ErrorKind::ClassRangeInvalid,
+ }
+ );
+
+ assert_eq!(
+ parser_ignore_whitespace("[a ").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser_ignore_whitespace("[a- ").parse().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_set_class_open() {
+ assert_eq!(parser("[a]").parse_set_class_open(), {
+ let set = ast::ClassBracketed {
+ span: span(0..1),
+ negated: false,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(1..1),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
+ Ok((set, union))
+ });
+ assert_eq!(
+ parser_ignore_whitespace("[ a]").parse_set_class_open(),
+ {
+ let set = ast::ClassBracketed {
+ span: span(0..4),
+ negated: false,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(4..4),
+ items: vec![],
+ }),
+ };
+ let union =
+ ast::ClassSetUnion { span: span(4..4), items: vec![] };
+ Ok((set, union))
+ }
+ );
+ assert_eq!(parser("[^a]").parse_set_class_open(), {
+ let set = ast::ClassBracketed {
+ span: span(0..2),
+ negated: true,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(2..2),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
+ Ok((set, union))
+ });
+ assert_eq!(
+ parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
+ {
+ let set = ast::ClassBracketed {
+ span: span(0..4),
+ negated: true,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(4..4),
+ items: vec![],
+ }),
+ };
+ let union =
+ ast::ClassSetUnion { span: span(4..4), items: vec![] };
+ Ok((set, union))
+ }
+ );
+ assert_eq!(parser("[-a]").parse_set_class_open(), {
+ let set = ast::ClassBracketed {
+ span: span(0..2),
+ negated: false,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(1..1),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion {
+ span: span(1..2),
+ items: vec![ast::ClassSetItem::Literal(ast::Literal {
+ span: span(1..2),
+ kind: ast::LiteralKind::Verbatim,
+ c: '-',
+ })],
+ };
+ Ok((set, union))
+ });
+ assert_eq!(
+ parser_ignore_whitespace("[ - a]").parse_set_class_open(),
+ {
+ let set = ast::ClassBracketed {
+ span: span(0..4),
+ negated: false,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(2..2),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion {
+ span: span(2..3),
+ items: vec![ast::ClassSetItem::Literal(ast::Literal {
+ span: span(2..3),
+ kind: ast::LiteralKind::Verbatim,
+ c: '-',
+ })],
+ };
+ Ok((set, union))
+ }
+ );
+ assert_eq!(parser("[^-a]").parse_set_class_open(), {
+ let set = ast::ClassBracketed {
+ span: span(0..3),
+ negated: true,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(2..2),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion {
+ span: span(2..3),
+ items: vec![ast::ClassSetItem::Literal(ast::Literal {
+ span: span(2..3),
+ kind: ast::LiteralKind::Verbatim,
+ c: '-',
+ })],
+ };
+ Ok((set, union))
+ });
+ assert_eq!(parser("[--a]").parse_set_class_open(), {
+ let set = ast::ClassBracketed {
+ span: span(0..3),
+ negated: false,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(1..1),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion {
+ span: span(1..3),
+ items: vec![
+ ast::ClassSetItem::Literal(ast::Literal {
+ span: span(1..2),
+ kind: ast::LiteralKind::Verbatim,
+ c: '-',
+ }),
+ ast::ClassSetItem::Literal(ast::Literal {
+ span: span(2..3),
+ kind: ast::LiteralKind::Verbatim,
+ c: '-',
+ }),
+ ],
+ };
+ Ok((set, union))
+ });
+ assert_eq!(parser("[]a]").parse_set_class_open(), {
+ let set = ast::ClassBracketed {
+ span: span(0..2),
+ negated: false,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(1..1),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion {
+ span: span(1..2),
+ items: vec![ast::ClassSetItem::Literal(ast::Literal {
+ span: span(1..2),
+ kind: ast::LiteralKind::Verbatim,
+ c: ']',
+ })],
+ };
+ Ok((set, union))
+ });
+ assert_eq!(
+ parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
+ {
+ let set = ast::ClassBracketed {
+ span: span(0..4),
+ negated: false,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(2..2),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion {
+ span: span(2..3),
+ items: vec![ast::ClassSetItem::Literal(ast::Literal {
+ span: span(2..3),
+ kind: ast::LiteralKind::Verbatim,
+ c: ']',
+ })],
+ };
+ Ok((set, union))
+ }
+ );
+ assert_eq!(parser("[^]a]").parse_set_class_open(), {
+ let set = ast::ClassBracketed {
+ span: span(0..3),
+ negated: true,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(2..2),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion {
+ span: span(2..3),
+ items: vec![ast::ClassSetItem::Literal(ast::Literal {
+ span: span(2..3),
+ kind: ast::LiteralKind::Verbatim,
+ c: ']',
+ })],
+ };
+ Ok((set, union))
+ });
+ assert_eq!(parser("[-]a]").parse_set_class_open(), {
+ let set = ast::ClassBracketed {
+ span: span(0..2),
+ negated: false,
+ kind: ast::ClassSet::union(ast::ClassSetUnion {
+ span: span(1..1),
+ items: vec![],
+ }),
+ };
+ let union = ast::ClassSetUnion {
+ span: span(1..2),
+ items: vec![ast::ClassSetItem::Literal(ast::Literal {
+ span: span(1..2),
+ kind: ast::LiteralKind::Verbatim,
+ c: '-',
+ })],
+ };
+ Ok((set, union))
+ });
+
+ assert_eq!(
+ parser("[").parse_set_class_open().unwrap_err(),
+ TestError {
+ span: span(0..1),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser_ignore_whitespace("[ ")
+ .parse_set_class_open()
+ .unwrap_err(),
+ TestError {
+ span: span(0..5),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("[^").parse_set_class_open().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("[]").parse_set_class_open().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("[-").parse_set_class_open().unwrap_err(),
+ TestError {
+ span: span(0..2),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ assert_eq!(
+ parser("[--").parse_set_class_open().unwrap_err(),
+ TestError {
+ span: span(0..3),
+ kind: ast::ErrorKind::ClassUnclosed,
+ }
+ );
+ }
+
+ #[test]
+ fn maybe_parse_ascii_class() {
+ assert_eq!(
+ parser(r"[:alnum:]").maybe_parse_ascii_class(),
+ Some(ast::ClassAscii {
+ span: span(0..9),
+ kind: ast::ClassAsciiKind::Alnum,
+ negated: false,
+ })
+ );
+ assert_eq!(
+ parser(r"[:alnum:]A").maybe_parse_ascii_class(),
+ Some(ast::ClassAscii {
+ span: span(0..9),
+ kind: ast::ClassAsciiKind::Alnum,
+ negated: false,
+ })
+ );
+ assert_eq!(
+ parser(r"[:^alnum:]").maybe_parse_ascii_class(),
+ Some(ast::ClassAscii {
+ span: span(0..10),
+ kind: ast::ClassAsciiKind::Alnum,
+ negated: true,
+ })
+ );
+
+ let p = parser(r"[:");
+ assert_eq!(p.maybe_parse_ascii_class(), None);
+ assert_eq!(p.offset(), 0);
+
+ let p = parser(r"[:^");
+ assert_eq!(p.maybe_parse_ascii_class(), None);
+ assert_eq!(p.offset(), 0);
+
+ let p = parser(r"[^:alnum:]");
+ assert_eq!(p.maybe_parse_ascii_class(), None);
+ assert_eq!(p.offset(), 0);
+
+ let p = parser(r"[:alnnum:]");
+ assert_eq!(p.maybe_parse_ascii_class(), None);
+ assert_eq!(p.offset(), 0);
+
+ let p = parser(r"[:alnum]");
+ assert_eq!(p.maybe_parse_ascii_class(), None);
+ assert_eq!(p.offset(), 0);
+
+ let p = parser(r"[:alnum:");
+ assert_eq!(p.maybe_parse_ascii_class(), None);
+ assert_eq!(p.offset(), 0);
+ }
+
+ #[test]
+ fn parse_unicode_class() {
+ assert_eq!(
+ parser(r"\pN").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..3),
+ negated: false,
+ kind: ast::ClassUnicodeKind::OneLetter('N'),
+ }))
+ );
+ assert_eq!(
+ parser(r"\PN").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..3),
+ negated: true,
+ kind: ast::ClassUnicodeKind::OneLetter('N'),
+ }))
+ );
+ assert_eq!(
+ parser(r"\p{N}").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..5),
+ negated: false,
+ kind: ast::ClassUnicodeKind::Named(s("N")),
+ }))
+ );
+ assert_eq!(
+ parser(r"\P{N}").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..5),
+ negated: true,
+ kind: ast::ClassUnicodeKind::Named(s("N")),
+ }))
+ );
+ assert_eq!(
+ parser(r"\p{Greek}").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..9),
+ negated: false,
+ kind: ast::ClassUnicodeKind::Named(s("Greek")),
+ }))
+ );
+
+ assert_eq!(
+ parser(r"\p{scx:Katakana}").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..16),
+ negated: false,
+ kind: ast::ClassUnicodeKind::NamedValue {
+ op: ast::ClassUnicodeOpKind::Colon,
+ name: s("scx"),
+ value: s("Katakana"),
+ },
+ }))
+ );
+ assert_eq!(
+ parser(r"\p{scx=Katakana}").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..16),
+ negated: false,
+ kind: ast::ClassUnicodeKind::NamedValue {
+ op: ast::ClassUnicodeOpKind::Equal,
+ name: s("scx"),
+ value: s("Katakana"),
+ },
+ }))
+ );
+ assert_eq!(
+ parser(r"\p{scx!=Katakana}").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..17),
+ negated: false,
+ kind: ast::ClassUnicodeKind::NamedValue {
+ op: ast::ClassUnicodeOpKind::NotEqual,
+ name: s("scx"),
+ value: s("Katakana"),
+ },
+ }))
+ );
+
+ assert_eq!(
+ parser(r"\p{:}").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..5),
+ negated: false,
+ kind: ast::ClassUnicodeKind::NamedValue {
+ op: ast::ClassUnicodeOpKind::Colon,
+ name: s(""),
+ value: s(""),
+ },
+ }))
+ );
+ assert_eq!(
+ parser(r"\p{=}").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..5),
+ negated: false,
+ kind: ast::ClassUnicodeKind::NamedValue {
+ op: ast::ClassUnicodeOpKind::Equal,
+ name: s(""),
+ value: s(""),
+ },
+ }))
+ );
+ assert_eq!(
+ parser(r"\p{!=}").parse_escape(),
+ Ok(Primitive::Unicode(ast::ClassUnicode {
+ span: span(0..6),
+ negated: false,
+ kind: ast::ClassUnicodeKind::NamedValue {
+ op: ast::ClassUnicodeOpKind::NotEqual,
+ name: s(""),
+ value: s(""),
+ },
+ }))
+ );
+
+ assert_eq!(
+ parser(r"\p").parse_escape().unwrap_err(),
+ TestError {
+ span: span(2..2),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\p{").parse_escape().unwrap_err(),
+ TestError {
+ span: span(3..3),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\p{N").parse_escape().unwrap_err(),
+ TestError {
+ span: span(4..4),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+ assert_eq!(
+ parser(r"\p{Greek").parse_escape().unwrap_err(),
+ TestError {
+ span: span(8..8),
+ kind: ast::ErrorKind::EscapeUnexpectedEof,
+ }
+ );
+
+ assert_eq!(
+ parser(r"\pNz").parse(),
+ Ok(Ast::Concat(ast::Concat {
+ span: span(0..4),
+ asts: vec![
+ Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+ span: span(0..3),
+ negated: false,
+ kind: ast::ClassUnicodeKind::OneLetter('N'),
+ })),
+ Ast::Literal(ast::Literal {
+ span: span(3..4),
+ kind: ast::LiteralKind::Verbatim,
+ c: 'z',
+ }),
+ ],
+ }))
+ );
+ assert_eq!(
+ parser(r"\p{Greek}z").parse(),
+ Ok(Ast::Concat(ast::Concat {
+ span: span(0..10),
+ asts: vec![
+ Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+ span: span(0..9),
+ negated: false,
+ kind: ast::ClassUnicodeKind::Named(s("Greek")),
+ })),
+ Ast::Literal(ast::Literal {
+ span: span(9..10),
+ kind: ast::LiteralKind::Verbatim,
+ c: 'z',
+ }),
+ ],
+ }))
+ );
+ assert_eq!(
+ parser(r"\p\{").parse().unwrap_err(),
+ TestError {
+ span: span(2..3),
+ kind: ast::ErrorKind::UnicodeClassInvalid,
+ }
+ );
+ assert_eq!(
+ parser(r"\P\{").parse().unwrap_err(),
+ TestError {
+ span: span(2..3),
+ kind: ast::ErrorKind::UnicodeClassInvalid,
+ }
+ );
+ }
+
+ #[test]
+ fn parse_perl_class() {
+ assert_eq!(
+ parser(r"\d").parse_escape(),
+ Ok(Primitive::Perl(ast::ClassPerl {
+ span: span(0..2),
+ kind: ast::ClassPerlKind::Digit,
+ negated: false,
+ }))
+ );
+ assert_eq!(
+ parser(r"\D").parse_escape(),
+ Ok(Primitive::Perl(ast::ClassPerl {
+ span: span(0..2),
+ kind: ast::ClassPerlKind::Digit,
+ negated: true,
+ }))
+ );
+ assert_eq!(
+ parser(r"\s").parse_escape(),
+ Ok(Primitive::Perl(ast::ClassPerl {
+ span: span(0..2),
+ kind: ast::ClassPerlKind::Space,
+ negated: false,
+ }))
+ );
+ assert_eq!(
+ parser(r"\S").parse_escape(),
+ Ok(Primitive::Perl(ast::ClassPerl {
+ span: span(0..2),
+ kind: ast::ClassPerlKind::Space,
+ negated: true,
+ }))
+ );
+ assert_eq!(
+ parser(r"\w").parse_escape(),
+ Ok(Primitive::Perl(ast::ClassPerl {
+ span: span(0..2),
+ kind: ast::ClassPerlKind::Word,
+ negated: false,
+ }))
+ );
+ assert_eq!(
+ parser(r"\W").parse_escape(),
+ Ok(Primitive::Perl(ast::ClassPerl {
+ span: span(0..2),
+ kind: ast::ClassPerlKind::Word,
+ negated: true,
+ }))
+ );
+
+ assert_eq!(
+ parser(r"\d").parse(),
+ Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
+ span: span(0..2),
+ kind: ast::ClassPerlKind::Digit,
+ negated: false,
+ })))
+ );
+ assert_eq!(
+ parser(r"\dz").parse(),
+ Ok(Ast::Concat(ast::Concat {
+ span: span(0..3),
+ asts: vec![
+ Ast::Class(ast::Class::Perl(ast::ClassPerl {
+ span: span(0..2),
+ kind: ast::ClassPerlKind::Digit,
+ negated: false,
+ })),
+ Ast::Literal(ast::Literal {
+ span: span(2..3),
+ kind: ast::LiteralKind::Verbatim,
+ c: 'z',
+ }),
+ ],
+ }))
+ );
+ }
+
+ // This tests a bug fix where the nest limit checker wasn't decrementing
+ // its depth during post-traversal, which causes long regexes to trip
+ // the default limit too aggressively.
+ #[test]
+ fn regression_454_nest_too_big() {
+ let pattern = r#"
+ 2(?:
+ [45]\d{3}|
+ 7(?:
+ 1[0-267]|
+ 2[0-289]|
+ 3[0-29]|
+ 4[01]|
+ 5[1-3]|
+ 6[013]|
+ 7[0178]|
+ 91
+ )|
+ 8(?:
+ 0[125]|
+ [139][1-6]|
+ 2[0157-9]|
+ 41|
+ 6[1-35]|
+ 7[1-5]|
+ 8[1-8]|
+ 90
+ )|
+ 9(?:
+ 0[0-2]|
+ 1[0-4]|
+ 2[568]|
+ 3[3-6]|
+ 5[5-7]|
+ 6[0167]|
+ 7[15]|
+ 8[0146-9]
+ )
+ )\d{4}
+ "#;
+ assert!(parser_nest_limit(pattern, 50).parse().is_ok());
+ }
+
+ // This tests that we treat a trailing `-` in a character class as a
+ // literal `-` even when whitespace mode is enabled and there is whitespace
+ // after the trailing `-`.
+ #[test]
+ fn regression_455_trailing_dash_ignore_whitespace() {
+ assert!(parser("(?x)[ / - ]").parse().is_ok());
+ assert!(parser("(?x)[ a - ]").parse().is_ok());
+ assert!(parser(
+ "(?x)[
+ a
+ - ]
+ "
+ )
+ .parse()
+ .is_ok());
+ assert!(parser(
+ "(?x)[
+ a # wat
+ - ]
+ "
+ )
+ .parse()
+ .is_ok());
+
+ assert!(parser("(?x)[ / -").parse().is_err());
+ assert!(parser("(?x)[ / - ").parse().is_err());
+ assert!(parser(
+ "(?x)[
+ / -
+ "
+ )
+ .parse()
+ .is_err());
+ assert!(parser(
+ "(?x)[
+ / - # wat
+ "
+ )
+ .parse()
+ .is_err());
+ }
+}
diff --git a/vendor/regex-syntax/src/ast/print.rs b/vendor/regex-syntax/src/ast/print.rs
new file mode 100644
index 000000000..045de2eaf
--- /dev/null
+++ b/vendor/regex-syntax/src/ast/print.rs
@@ -0,0 +1,568 @@
+/*!
+This module provides a regular expression printer for `Ast`.
+*/
+
+use std::fmt;
+
+use crate::ast::visitor::{self, Visitor};
+use crate::ast::{self, Ast};
+
+/// A builder for constructing a printer.
+///
+/// Note that since a printer doesn't have any configuration knobs, this type
+/// remains unexported.
+#[derive(Clone, Debug)]
+struct PrinterBuilder {
+ _priv: (),
+}
+
+impl Default for PrinterBuilder {
+ fn default() -> PrinterBuilder {
+ PrinterBuilder::new()
+ }
+}
+
+impl PrinterBuilder {
+ fn new() -> PrinterBuilder {
+ PrinterBuilder { _priv: () }
+ }
+
+ fn build(&self) -> Printer {
+ Printer { _priv: () }
+ }
+}
+
+/// A printer for a regular expression abstract syntax tree.
+///
+/// A printer converts an abstract syntax tree (AST) to a regular expression
+/// pattern string. This particular printer uses constant stack space and heap
+/// space proportional to the size of the AST.
+///
+/// This printer will not necessarily preserve the original formatting of the
+/// regular expression pattern string. For example, all whitespace and comments
+/// are ignored.
+#[derive(Debug)]
+pub struct Printer {
+ _priv: (),
+}
+
+impl Printer {
+ /// Create a new printer.
+ pub fn new() -> Printer {
+ PrinterBuilder::new().build()
+ }
+
+ /// Print the given `Ast` to the given writer. The writer must implement
+ /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
+ /// here are a `fmt::Formatter` (which is available in `fmt::Display`
+ /// implementations) or a `&mut String`.
+ pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
+ visitor::visit(ast, Writer { wtr })
+ }
+}
+
+#[derive(Debug)]
+struct Writer<W> {
+ wtr: W,
+}
+
+impl<W: fmt::Write> Visitor for Writer<W> {
+ type Output = ();
+ type Err = fmt::Error;
+
+ fn finish(self) -> fmt::Result {
+ Ok(())
+ }
+
+ fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
+ match *ast {
+ Ast::Group(ref x) => self.fmt_group_pre(x),
+ Ast::Class(ast::Class::Bracketed(ref x)) => {
+ self.fmt_class_bracketed_pre(x)
+ }
+ _ => Ok(()),
+ }
+ }
+
+ fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
+ use crate::ast::Class;
+
+ match *ast {
+ Ast::Empty(_) => Ok(()),
+ Ast::Flags(ref x) => self.fmt_set_flags(x),
+ Ast::Literal(ref x) => self.fmt_literal(x),
+ Ast::Dot(_) => self.wtr.write_str("."),
+ Ast::Assertion(ref x) => self.fmt_assertion(x),
+ Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
+ Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
+ Ast::Class(Class::Bracketed(ref x)) => {
+ self.fmt_class_bracketed_post(x)
+ }
+ Ast::Repetition(ref x) => self.fmt_repetition(x),
+ Ast::Group(ref x) => self.fmt_group_post(x),
+ Ast::Alternation(_) => Ok(()),
+ Ast::Concat(_) => Ok(()),
+ }
+ }
+
+ fn visit_alternation_in(&mut self) -> fmt::Result {
+ self.wtr.write_str("|")
+ }
+
+ fn visit_class_set_item_pre(
+ &mut self,
+ ast: &ast::ClassSetItem,
+ ) -> Result<(), Self::Err> {
+ match *ast {
+ ast::ClassSetItem::Bracketed(ref x) => {
+ self.fmt_class_bracketed_pre(x)
+ }
+ _ => Ok(()),
+ }
+ }
+
+ fn visit_class_set_item_post(
+ &mut self,
+ ast: &ast::ClassSetItem,
+ ) -> Result<(), Self::Err> {
+ use crate::ast::ClassSetItem::*;
+
+ match *ast {
+ Empty(_) => Ok(()),
+ Literal(ref x) => self.fmt_literal(x),
+ Range(ref x) => {
+ self.fmt_literal(&x.start)?;
+ self.wtr.write_str("-")?;
+ self.fmt_literal(&x.end)?;
+ Ok(())
+ }
+ Ascii(ref x) => self.fmt_class_ascii(x),
+ Unicode(ref x) => self.fmt_class_unicode(x),
+ Perl(ref x) => self.fmt_class_perl(x),
+ Bracketed(ref x) => self.fmt_class_bracketed_post(x),
+ Union(_) => Ok(()),
+ }
+ }
+
+ fn visit_class_set_binary_op_in(
+ &mut self,
+ ast: &ast::ClassSetBinaryOp,
+ ) -> Result<(), Self::Err> {
+ self.fmt_class_set_binary_op_kind(&ast.kind)
+ }
+}
+
+impl<W: fmt::Write> Writer<W> {
+ fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
+ use crate::ast::GroupKind::*;
+ match ast.kind {
+ CaptureIndex(_) => self.wtr.write_str("("),
+ CaptureName(ref x) => {
+ self.wtr.write_str("(?P<")?;
+ self.wtr.write_str(&x.name)?;
+ self.wtr.write_str(">")?;
+ Ok(())
+ }
+ NonCapturing(ref flags) => {
+ self.wtr.write_str("(?")?;
+ self.fmt_flags(flags)?;
+ self.wtr.write_str(":")?;
+ Ok(())
+ }
+ }
+ }
+
+ fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
+ self.wtr.write_str(")")
+ }
+
+ fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
+ use crate::ast::RepetitionKind::*;
+ match ast.op.kind {
+ ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
+ ZeroOrOne => self.wtr.write_str("??"),
+ ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
+ ZeroOrMore => self.wtr.write_str("*?"),
+ OneOrMore if ast.greedy => self.wtr.write_str("+"),
+ OneOrMore => self.wtr.write_str("+?"),
+ Range(ref x) => {
+ self.fmt_repetition_range(x)?;
+ if !ast.greedy {
+ self.wtr.write_str("?")?;
+ }
+ Ok(())
+ }
+ }
+ }
+
+ fn fmt_repetition_range(
+ &mut self,
+ ast: &ast::RepetitionRange,
+ ) -> fmt::Result {
+ use crate::ast::RepetitionRange::*;
+ match *ast {
+ Exactly(x) => write!(self.wtr, "{{{}}}", x),
+ AtLeast(x) => write!(self.wtr, "{{{},}}", x),
+ Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
+ }
+ }
+
+ fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
+ use crate::ast::LiteralKind::*;
+
+ match ast.kind {
+ Verbatim => self.wtr.write_char(ast.c),
+ Punctuation => write!(self.wtr, r"\{}", ast.c),
+ Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
+ HexFixed(ast::HexLiteralKind::X) => {
+ write!(self.wtr, r"\x{:02X}", ast.c as u32)
+ }
+ HexFixed(ast::HexLiteralKind::UnicodeShort) => {
+ write!(self.wtr, r"\u{:04X}", ast.c as u32)
+ }
+ HexFixed(ast::HexLiteralKind::UnicodeLong) => {
+ write!(self.wtr, r"\U{:08X}", ast.c as u32)
+ }
+ HexBrace(ast::HexLiteralKind::X) => {
+ write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
+ }
+ HexBrace(ast::HexLiteralKind::UnicodeShort) => {
+ write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
+ }
+ HexBrace(ast::HexLiteralKind::UnicodeLong) => {
+ write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
+ }
+ Special(ast::SpecialLiteralKind::Bell) => {
+ self.wtr.write_str(r"\a")
+ }
+ Special(ast::SpecialLiteralKind::FormFeed) => {
+ self.wtr.write_str(r"\f")
+ }
+ Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
+ Special(ast::SpecialLiteralKind::LineFeed) => {
+ self.wtr.write_str(r"\n")
+ }
+ Special(ast::SpecialLiteralKind::CarriageReturn) => {
+ self.wtr.write_str(r"\r")
+ }
+ Special(ast::SpecialLiteralKind::VerticalTab) => {
+ self.wtr.write_str(r"\v")
+ }
+ Special(ast::SpecialLiteralKind::Space) => {
+ self.wtr.write_str(r"\ ")
+ }
+ }
+ }
+
+ fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
+ use crate::ast::AssertionKind::*;
+ match ast.kind {
+ StartLine => self.wtr.write_str("^"),
+ EndLine => self.wtr.write_str("$"),
+ StartText => self.wtr.write_str(r"\A"),
+ EndText => self.wtr.write_str(r"\z"),
+ WordBoundary => self.wtr.write_str(r"\b"),
+ NotWordBoundary => self.wtr.write_str(r"\B"),
+ }
+ }
+
+ fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
+ self.wtr.write_str("(?")?;
+ self.fmt_flags(&ast.flags)?;
+ self.wtr.write_str(")")?;
+ Ok(())
+ }
+
+ fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
+ use crate::ast::{Flag, FlagsItemKind};
+
+ for item in &ast.items {
+ match item.kind {
+ FlagsItemKind::Negation => self.wtr.write_str("-"),
+ FlagsItemKind::Flag(ref flag) => match *flag {
+ Flag::CaseInsensitive => self.wtr.write_str("i"),
+ Flag::MultiLine => self.wtr.write_str("m"),
+ Flag::DotMatchesNewLine => self.wtr.write_str("s"),
+ Flag::SwapGreed => self.wtr.write_str("U"),
+ Flag::Unicode => self.wtr.write_str("u"),
+ Flag::IgnoreWhitespace => self.wtr.write_str("x"),
+ },
+ }?;
+ }
+ Ok(())
+ }
+
+ fn fmt_class_bracketed_pre(
+ &mut self,
+ ast: &ast::ClassBracketed,
+ ) -> fmt::Result {
+ if ast.negated {
+ self.wtr.write_str("[^")
+ } else {
+ self.wtr.write_str("[")
+ }
+ }
+
+ fn fmt_class_bracketed_post(
+ &mut self,
+ _ast: &ast::ClassBracketed,
+ ) -> fmt::Result {
+ self.wtr.write_str("]")
+ }
+
+ fn fmt_class_set_binary_op_kind(
+ &mut self,
+ ast: &ast::ClassSetBinaryOpKind,
+ ) -> fmt::Result {
+ use crate::ast::ClassSetBinaryOpKind::*;
+ match *ast {
+ Intersection => self.wtr.write_str("&&"),
+ Difference => self.wtr.write_str("--"),
+ SymmetricDifference => self.wtr.write_str("~~"),
+ }
+ }
+
+ fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
+ use crate::ast::ClassPerlKind::*;
+ match ast.kind {
+ Digit if ast.negated => self.wtr.write_str(r"\D"),
+ Digit => self.wtr.write_str(r"\d"),
+ Space if ast.negated => self.wtr.write_str(r"\S"),
+ Space => self.wtr.write_str(r"\s"),
+ Word if ast.negated => self.wtr.write_str(r"\W"),
+ Word => self.wtr.write_str(r"\w"),
+ }
+ }
+
+ fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
+ use crate::ast::ClassAsciiKind::*;
+ match ast.kind {
+ Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
+ Alnum => self.wtr.write_str("[:alnum:]"),
+ Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
+ Alpha => self.wtr.write_str("[:alpha:]"),
+ Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
+ Ascii => self.wtr.write_str("[:ascii:]"),
+ Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
+ Blank => self.wtr.write_str("[:blank:]"),
+ Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
+ Cntrl => self.wtr.write_str("[:cntrl:]"),
+ Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
+ Digit => self.wtr.write_str("[:digit:]"),
+ Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
+ Graph => self.wtr.write_str("[:graph:]"),
+ Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
+ Lower => self.wtr.write_str("[:lower:]"),
+ Print if ast.negated => self.wtr.write_str("[:^print:]"),
+ Print => self.wtr.write_str("[:print:]"),
+ Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
+ Punct => self.wtr.write_str("[:punct:]"),
+ Space if ast.negated => self.wtr.write_str("[:^space:]"),
+ Space => self.wtr.write_str("[:space:]"),
+ Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
+ Upper => self.wtr.write_str("[:upper:]"),
+ Word if ast.negated => self.wtr.write_str("[:^word:]"),
+ Word => self.wtr.write_str("[:word:]"),
+ Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
+ Xdigit => self.wtr.write_str("[:xdigit:]"),
+ }
+ }
+
+ fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
+ use crate::ast::ClassUnicodeKind::*;
+ use crate::ast::ClassUnicodeOpKind::*;
+
+ if ast.negated {
+ self.wtr.write_str(r"\P")?;
+ } else {
+ self.wtr.write_str(r"\p")?;
+ }
+ match ast.kind {
+ OneLetter(c) => self.wtr.write_char(c),
+ Named(ref x) => write!(self.wtr, "{{{}}}", x),
+ NamedValue { op: Equal, ref name, ref value } => {
+ write!(self.wtr, "{{{}={}}}", name, value)
+ }
+ NamedValue { op: Colon, ref name, ref value } => {
+ write!(self.wtr, "{{{}:{}}}", name, value)
+ }
+ NamedValue { op: NotEqual, ref name, ref value } => {
+ write!(self.wtr, "{{{}!={}}}", name, value)
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Printer;
+ use crate::ast::parse::ParserBuilder;
+
+ fn roundtrip(given: &str) {
+ roundtrip_with(|b| b, given);
+ }
+
+ fn roundtrip_with<F>(mut f: F, given: &str)
+ where
+ F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
+ {
+ let mut builder = ParserBuilder::new();
+ f(&mut builder);
+ let ast = builder.build().parse(given).unwrap();
+
+ let mut printer = Printer::new();
+ let mut dst = String::new();
+ printer.print(&ast, &mut dst).unwrap();
+ assert_eq!(given, dst);
+ }
+
+ #[test]
+ fn print_literal() {
+ roundtrip("a");
+ roundtrip(r"\[");
+ roundtrip_with(|b| b.octal(true), r"\141");
+ roundtrip(r"\x61");
+ roundtrip(r"\x7F");
+ roundtrip(r"\u0061");
+ roundtrip(r"\U00000061");
+ roundtrip(r"\x{61}");
+ roundtrip(r"\x{7F}");
+ roundtrip(r"\u{61}");
+ roundtrip(r"\U{61}");
+
+ roundtrip(r"\a");
+ roundtrip(r"\f");
+ roundtrip(r"\t");
+ roundtrip(r"\n");
+ roundtrip(r"\r");
+ roundtrip(r"\v");
+ roundtrip(r"(?x)\ ");
+ }
+
+ #[test]
+ fn print_dot() {
+ roundtrip(".");
+ }
+
+ #[test]
+ fn print_concat() {
+ roundtrip("ab");
+ roundtrip("abcde");
+ roundtrip("a(bcd)ef");
+ }
+
+ #[test]
+ fn print_alternation() {
+ roundtrip("a|b");
+ roundtrip("a|b|c|d|e");
+ roundtrip("|a|b|c|d|e");
+ roundtrip("|a|b|c|d|e|");
+ roundtrip("a(b|c|d)|e|f");
+ }
+
+ #[test]
+ fn print_assertion() {
+ roundtrip(r"^");
+ roundtrip(r"$");
+ roundtrip(r"\A");
+ roundtrip(r"\z");
+ roundtrip(r"\b");
+ roundtrip(r"\B");
+ }
+
+ #[test]
+ fn print_repetition() {
+ roundtrip("a?");
+ roundtrip("a??");
+ roundtrip("a*");
+ roundtrip("a*?");
+ roundtrip("a+");
+ roundtrip("a+?");
+ roundtrip("a{5}");
+ roundtrip("a{5}?");
+ roundtrip("a{5,}");
+ roundtrip("a{5,}?");
+ roundtrip("a{5,10}");
+ roundtrip("a{5,10}?");
+ }
+
+ #[test]
+ fn print_flags() {
+ roundtrip("(?i)");
+ roundtrip("(?-i)");
+ roundtrip("(?s-i)");
+ roundtrip("(?-si)");
+ roundtrip("(?siUmux)");
+ }
+
+ #[test]
+ fn print_group() {
+ roundtrip("(?i:a)");
+ roundtrip("(?P<foo>a)");
+ roundtrip("(a)");
+ }
+
+ #[test]
+ fn print_class() {
+ roundtrip(r"[abc]");
+ roundtrip(r"[a-z]");
+ roundtrip(r"[^a-z]");
+ roundtrip(r"[a-z0-9]");
+ roundtrip(r"[-a-z0-9]");
+ roundtrip(r"[-a-z0-9]");
+ roundtrip(r"[a-z0-9---]");
+ roundtrip(r"[a-z&&m-n]");
+ roundtrip(r"[[a-z&&m-n]]");
+ roundtrip(r"[a-z--m-n]");
+ roundtrip(r"[a-z~~m-n]");
+ roundtrip(r"[a-z[0-9]]");
+ roundtrip(r"[a-z[^0-9]]");
+
+ roundtrip(r"\d");
+ roundtrip(r"\D");
+ roundtrip(r"\s");
+ roundtrip(r"\S");
+ roundtrip(r"\w");
+ roundtrip(r"\W");
+
+ roundtrip(r"[[:alnum:]]");
+ roundtrip(r"[[:^alnum:]]");
+ roundtrip(r"[[:alpha:]]");
+ roundtrip(r"[[:^alpha:]]");
+ roundtrip(r"[[:ascii:]]");
+ roundtrip(r"[[:^ascii:]]");
+ roundtrip(r"[[:blank:]]");
+ roundtrip(r"[[:^blank:]]");
+ roundtrip(r"[[:cntrl:]]");
+ roundtrip(r"[[:^cntrl:]]");
+ roundtrip(r"[[:digit:]]");
+ roundtrip(r"[[:^digit:]]");
+ roundtrip(r"[[:graph:]]");
+ roundtrip(r"[[:^graph:]]");
+ roundtrip(r"[[:lower:]]");
+ roundtrip(r"[[:^lower:]]");
+ roundtrip(r"[[:print:]]");
+ roundtrip(r"[[:^print:]]");
+ roundtrip(r"[[:punct:]]");
+ roundtrip(r"[[:^punct:]]");
+ roundtrip(r"[[:space:]]");
+ roundtrip(r"[[:^space:]]");
+ roundtrip(r"[[:upper:]]");
+ roundtrip(r"[[:^upper:]]");
+ roundtrip(r"[[:word:]]");
+ roundtrip(r"[[:^word:]]");
+ roundtrip(r"[[:xdigit:]]");
+ roundtrip(r"[[:^xdigit:]]");
+
+ roundtrip(r"\pL");
+ roundtrip(r"\PL");
+ roundtrip(r"\p{L}");
+ roundtrip(r"\P{L}");
+ roundtrip(r"\p{X=Y}");
+ roundtrip(r"\P{X=Y}");
+ roundtrip(r"\p{X:Y}");
+ roundtrip(r"\P{X:Y}");
+ roundtrip(r"\p{X!=Y}");
+ roundtrip(r"\P{X!=Y}");
+ }
+}
diff --git a/vendor/regex-syntax/src/ast/visitor.rs b/vendor/regex-syntax/src/ast/visitor.rs
new file mode 100644
index 000000000..a0d1e7dd5
--- /dev/null
+++ b/vendor/regex-syntax/src/ast/visitor.rs
@@ -0,0 +1,519 @@
+use std::fmt;
+
+use crate::ast::{self, Ast};
+
+/// A trait for visiting an abstract syntax tree (AST) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on an abstract syntax tree without necessarily using recursion.
+/// In particular, this permits callers to do case analysis with constant stack
+/// usage, which can be important since the size of an abstract syntax tree
+/// may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`](fn.visit.html) function.
+///
+/// Note that the abstract syntax tree for a regular expression is quite
+/// complex. Unless you specifically need it, you might be able to use the
+/// much simpler
+/// [high-level intermediate representation](../hir/struct.Hir.html)
+/// and its
+/// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
+/// instead.
+pub trait Visitor {
+ /// The result of visiting an AST.
+ type Output;
+ /// An error that visiting an AST might return.
+ type Err;
+
+ /// All implementors of `Visitor` must provide a `finish` method, which
+ /// yields the result of visiting the AST or an error.
+ fn finish(self) -> Result<Self::Output, Self::Err>;
+
+ /// This method is called before beginning traversal of the AST.
+ fn start(&mut self) {}
+
+ /// This method is called on an `Ast` before descending into child `Ast`
+ /// nodes.
+ fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on an `Ast` after descending all of its child
+ /// `Ast` nodes.
+ fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called between child nodes of an
+ /// [`Alternation`](struct.Alternation.html).
+ fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on every
+ /// [`ClassSetItem`](enum.ClassSetItem.html)
+ /// before descending into child nodes.
+ fn visit_class_set_item_pre(
+ &mut self,
+ _ast: &ast::ClassSetItem,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on every
+ /// [`ClassSetItem`](enum.ClassSetItem.html)
+ /// after descending into child nodes.
+ fn visit_class_set_item_post(
+ &mut self,
+ _ast: &ast::ClassSetItem,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on every
+ /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+ /// before descending into child nodes.
+ fn visit_class_set_binary_op_pre(
+ &mut self,
+ _ast: &ast::ClassSetBinaryOp,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on every
+ /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+ /// after descending into child nodes.
+ fn visit_class_set_binary_op_post(
+ &mut self,
+ _ast: &ast::ClassSetBinaryOp,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called between the left hand and right hand child nodes
+ /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
+ fn visit_class_set_binary_op_in(
+ &mut self,
+ _ast: &ast::ClassSetBinaryOp,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Ast` while calling the
+/// appropriate methods provided by the
+/// [`Visitor`](trait.Visitor.html) trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Ast` without using a stack size proportional to the depth
+/// of the `Ast`. Namely, this method will instead use constant stack size, but
+/// will use heap space proportional to the size of the `Ast`. This may be
+/// desirable in cases where the size of `Ast` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
+ HeapVisitor::new().visit(ast, visitor)
+}
+
+/// HeapVisitor visits every item in an `Ast` recursively using constant stack
+/// size and a heap size proportional to the size of the `Ast`.
+struct HeapVisitor<'a> {
+ /// A stack of `Ast` nodes. This is roughly analogous to the call stack
+ /// used in a typical recursive visitor.
+ stack: Vec<(&'a Ast, Frame<'a>)>,
+ /// Similar to the `Ast` stack above, but is used only for character
+ /// classes. In particular, character classes embed their own mini
+ /// recursive syntax.
+ stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Ast`.
+enum Frame<'a> {
+ /// A stack frame allocated just before descending into a repetition
+ /// operator's child node.
+ Repetition(&'a ast::Repetition),
+ /// A stack frame allocated just before descending into a group's child
+ /// node.
+ Group(&'a ast::Group),
+ /// The stack frame used while visiting every child node of a concatenation
+ /// of expressions.
+ Concat {
+ /// The child node we are currently visiting.
+ head: &'a Ast,
+ /// The remaining child nodes to visit (which may be empty).
+ tail: &'a [Ast],
+ },
+ /// The stack frame used while visiting every child node of an alternation
+ /// of expressions.
+ Alternation {
+ /// The child node we are currently visiting.
+ head: &'a Ast,
+ /// The remaining child nodes to visit (which may be empty).
+ tail: &'a [Ast],
+ },
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// a character class.
+enum ClassFrame<'a> {
+ /// The stack frame used while visiting every child node of a union of
+ /// character class items.
+ Union {
+ /// The child node we are currently visiting.
+ head: &'a ast::ClassSetItem,
+ /// The remaining child nodes to visit (which may be empty).
+ tail: &'a [ast::ClassSetItem],
+ },
+ /// The stack frame used while a binary class operation.
+ Binary { op: &'a ast::ClassSetBinaryOp },
+ /// A stack frame allocated just before descending into a binary operator's
+ /// left hand child node.
+ BinaryLHS {
+ op: &'a ast::ClassSetBinaryOp,
+ lhs: &'a ast::ClassSet,
+ rhs: &'a ast::ClassSet,
+ },
+ /// A stack frame allocated just before descending into a binary operator's
+ /// right hand child node.
+ BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
+}
+
+/// A representation of the inductive step when performing structural induction
+/// over a character class.
+///
+/// Note that there is no analogous explicit type for the inductive step for
+/// `Ast` nodes because the inductive step is just an `Ast`. For character
+/// classes, the inductive step can produce one of two possible child nodes:
+/// an item or a binary operation. (An item cannot be a binary operation
+/// because that would imply binary operations can be unioned in the concrete
+/// syntax, which is not possible.)
+enum ClassInduct<'a> {
+ Item(&'a ast::ClassSetItem),
+ BinaryOp(&'a ast::ClassSetBinaryOp),
+}
+
+impl<'a> HeapVisitor<'a> {
+ fn new() -> HeapVisitor<'a> {
+ HeapVisitor { stack: vec![], stack_class: vec![] }
+ }
+
+ fn visit<V: Visitor>(
+ &mut self,
+ mut ast: &'a Ast,
+ mut visitor: V,
+ ) -> Result<V::Output, V::Err> {
+ self.stack.clear();
+ self.stack_class.clear();
+
+ visitor.start();
+ loop {
+ visitor.visit_pre(ast)?;
+ if let Some(x) = self.induct(ast, &mut visitor)? {
+ let child = x.child();
+ self.stack.push((ast, x));
+ ast = child;
+ continue;
+ }
+ // No induction means we have a base case, so we can post visit
+ // it now.
+ visitor.visit_post(ast)?;
+
+ // At this point, we now try to pop our call stack until it is
+ // either empty or we hit another inductive case.
+ loop {
+ let (post_ast, frame) = match self.stack.pop() {
+ None => return visitor.finish(),
+ Some((post_ast, frame)) => (post_ast, frame),
+ };
+ // If this is a concat/alternate, then we might have additional
+ // inductive steps to process.
+ if let Some(x) = self.pop(frame) {
+ if let Frame::Alternation { .. } = x {
+ visitor.visit_alternation_in()?;
+ }
+ ast = x.child();
+ self.stack.push((post_ast, x));
+ break;
+ }
+ // Otherwise, we've finished visiting all the child nodes for
+ // this AST, so we can post visit it now.
+ visitor.visit_post(post_ast)?;
+ }
+ }
+ }
+
+ /// Build a stack frame for the given AST if one is needed (which occurs if
+ /// and only if there are child nodes in the AST). Otherwise, return None.
+ ///
+ /// If this visits a class, then the underlying visitor implementation may
+ /// return an error which will be passed on here.
+ fn induct<V: Visitor>(
+ &mut self,
+ ast: &'a Ast,
+ visitor: &mut V,
+ ) -> Result<Option<Frame<'a>>, V::Err> {
+ Ok(match *ast {
+ Ast::Class(ast::Class::Bracketed(ref x)) => {
+ self.visit_class(x, visitor)?;
+ None
+ }
+ Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
+ Ast::Group(ref x) => Some(Frame::Group(x)),
+ Ast::Concat(ref x) if x.asts.is_empty() => None,
+ Ast::Concat(ref x) => {
+ Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
+ }
+ Ast::Alternation(ref x) if x.asts.is_empty() => None,
+ Ast::Alternation(ref x) => Some(Frame::Alternation {
+ head: &x.asts[0],
+ tail: &x.asts[1..],
+ }),
+ _ => None,
+ })
+ }
+
+ /// Pops the given frame. If the frame has an additional inductive step,
+ /// then return it, otherwise return `None`.
+ fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+ match induct {
+ Frame::Repetition(_) => None,
+ Frame::Group(_) => None,
+ Frame::Concat { tail, .. } => {
+ if tail.is_empty() {
+ None
+ } else {
+ Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
+ }
+ }
+ Frame::Alternation { tail, .. } => {
+ if tail.is_empty() {
+ None
+ } else {
+ Some(Frame::Alternation {
+ head: &tail[0],
+ tail: &tail[1..],
+ })
+ }
+ }
+ }
+ }
+
+ fn visit_class<V: Visitor>(
+ &mut self,
+ ast: &'a ast::ClassBracketed,
+ visitor: &mut V,
+ ) -> Result<(), V::Err> {
+ let mut ast = ClassInduct::from_bracketed(ast);
+ loop {
+ self.visit_class_pre(&ast, visitor)?;
+ if let Some(x) = self.induct_class(&ast) {
+ let child = x.child();
+ self.stack_class.push((ast, x));
+ ast = child;
+ continue;
+ }
+ self.visit_class_post(&ast, visitor)?;
+
+ // At this point, we now try to pop our call stack until it is
+ // either empty or we hit another inductive case.
+ loop {
+ let (post_ast, frame) = match self.stack_class.pop() {
+ None => return Ok(()),
+ Some((post_ast, frame)) => (post_ast, frame),
+ };
+ // If this is a union or a binary op, then we might have
+ // additional inductive steps to process.
+ if let Some(x) = self.pop_class(frame) {
+ if let ClassFrame::BinaryRHS { ref op, .. } = x {
+ visitor.visit_class_set_binary_op_in(op)?;
+ }
+ ast = x.child();
+ self.stack_class.push((post_ast, x));
+ break;
+ }
+ // Otherwise, we've finished visiting all the child nodes for
+ // this class node, so we can post visit it now.
+ self.visit_class_post(&post_ast, visitor)?;
+ }
+ }
+ }
+
+ /// Call the appropriate `Visitor` methods given an inductive step.
+ fn visit_class_pre<V: Visitor>(
+ &self,
+ ast: &ClassInduct<'a>,
+ visitor: &mut V,
+ ) -> Result<(), V::Err> {
+ match *ast {
+ ClassInduct::Item(item) => {
+ visitor.visit_class_set_item_pre(item)?;
+ }
+ ClassInduct::BinaryOp(op) => {
+ visitor.visit_class_set_binary_op_pre(op)?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Call the appropriate `Visitor` methods given an inductive step.
+ fn visit_class_post<V: Visitor>(
+ &self,
+ ast: &ClassInduct<'a>,
+ visitor: &mut V,
+ ) -> Result<(), V::Err> {
+ match *ast {
+ ClassInduct::Item(item) => {
+ visitor.visit_class_set_item_post(item)?;
+ }
+ ClassInduct::BinaryOp(op) => {
+ visitor.visit_class_set_binary_op_post(op)?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Build a stack frame for the given class node if one is needed (which
+ /// occurs if and only if there are child nodes). Otherwise, return None.
+ fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
+ match *ast {
+ ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
+ match x.kind {
+ ast::ClassSet::Item(ref item) => {
+ Some(ClassFrame::Union { head: item, tail: &[] })
+ }
+ ast::ClassSet::BinaryOp(ref op) => {
+ Some(ClassFrame::Binary { op: op })
+ }
+ }
+ }
+ ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
+ if x.items.is_empty() {
+ None
+ } else {
+ Some(ClassFrame::Union {
+ head: &x.items[0],
+ tail: &x.items[1..],
+ })
+ }
+ }
+ ClassInduct::BinaryOp(op) => Some(ClassFrame::BinaryLHS {
+ op: op,
+ lhs: &op.lhs,
+ rhs: &op.rhs,
+ }),
+ _ => None,
+ }
+ }
+
+ /// Pops the given frame. If the frame has an additional inductive step,
+ /// then return it, otherwise return `None`.
+ fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
+ match induct {
+ ClassFrame::Union { tail, .. } => {
+ if tail.is_empty() {
+ None
+ } else {
+ Some(ClassFrame::Union {
+ head: &tail[0],
+ tail: &tail[1..],
+ })
+ }
+ }
+ ClassFrame::Binary { .. } => None,
+ ClassFrame::BinaryLHS { op, rhs, .. } => {
+ Some(ClassFrame::BinaryRHS { op: op, rhs: rhs })
+ }
+ ClassFrame::BinaryRHS { .. } => None,
+ }
+ }
+}
+
+impl<'a> Frame<'a> {
+ /// Perform the next inductive step on this frame and return the next
+ /// child AST node to visit.
+ fn child(&self) -> &'a Ast {
+ match *self {
+ Frame::Repetition(rep) => &rep.ast,
+ Frame::Group(group) => &group.ast,
+ Frame::Concat { head, .. } => head,
+ Frame::Alternation { head, .. } => head,
+ }
+ }
+}
+
+impl<'a> ClassFrame<'a> {
+ /// Perform the next inductive step on this frame and return the next
+ /// child class node to visit.
+ fn child(&self) -> ClassInduct<'a> {
+ match *self {
+ ClassFrame::Union { head, .. } => ClassInduct::Item(head),
+ ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
+ ClassFrame::BinaryLHS { ref lhs, .. } => {
+ ClassInduct::from_set(lhs)
+ }
+ ClassFrame::BinaryRHS { ref rhs, .. } => {
+ ClassInduct::from_set(rhs)
+ }
+ }
+ }
+}
+
+impl<'a> ClassInduct<'a> {
+ fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
+ ClassInduct::from_set(&ast.kind)
+ }
+
+ fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
+ match *ast {
+ ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
+ ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
+ }
+ }
+}
+
+impl<'a> fmt::Debug for ClassFrame<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let x = match *self {
+ ClassFrame::Union { .. } => "Union",
+ ClassFrame::Binary { .. } => "Binary",
+ ClassFrame::BinaryLHS { .. } => "BinaryLHS",
+ ClassFrame::BinaryRHS { .. } => "BinaryRHS",
+ };
+ write!(f, "{}", x)
+ }
+}
+
+impl<'a> fmt::Debug for ClassInduct<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let x = match *self {
+ ClassInduct::Item(it) => match *it {
+ ast::ClassSetItem::Empty(_) => "Item(Empty)",
+ ast::ClassSetItem::Literal(_) => "Item(Literal)",
+ ast::ClassSetItem::Range(_) => "Item(Range)",
+ ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
+ ast::ClassSetItem::Perl(_) => "Item(Perl)",
+ ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
+ ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
+ ast::ClassSetItem::Union(_) => "Item(Union)",
+ },
+ ClassInduct::BinaryOp(it) => match it.kind {
+ ast::ClassSetBinaryOpKind::Intersection => {
+ "BinaryOp(Intersection)"
+ }
+ ast::ClassSetBinaryOpKind::Difference => {
+ "BinaryOp(Difference)"
+ }
+ ast::ClassSetBinaryOpKind::SymmetricDifference => {
+ "BinaryOp(SymmetricDifference)"
+ }
+ },
+ };
+ write!(f, "{}", x)
+ }
+}
diff --git a/vendor/regex-syntax/src/either.rs b/vendor/regex-syntax/src/either.rs
new file mode 100644
index 000000000..7ae41e4ce
--- /dev/null
+++ b/vendor/regex-syntax/src/either.rs
@@ -0,0 +1,8 @@
+/// A simple binary sum type.
+///
+/// This is occasionally useful in an ad hoc fashion.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Either<Left, Right> {
+ Left(Left),
+ Right(Right),
+}
diff --git a/vendor/regex-syntax/src/error.rs b/vendor/regex-syntax/src/error.rs
new file mode 100644
index 000000000..71cfa426a
--- /dev/null
+++ b/vendor/regex-syntax/src/error.rs
@@ -0,0 +1,324 @@
+use std::cmp;
+use std::error;
+use std::fmt;
+use std::result;
+
+use crate::ast;
+use crate::hir;
+
+/// A type alias for dealing with errors returned by this crate.
+pub type Result<T> = result::Result<T, Error>;
+
+/// This error type encompasses any error that can be returned by this crate.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Error {
+ /// An error that occurred while translating concrete syntax into abstract
+ /// syntax (AST).
+ Parse(ast::Error),
+ /// An error that occurred while translating abstract syntax into a high
+ /// level intermediate representation (HIR).
+ Translate(hir::Error),
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl From<ast::Error> for Error {
+ fn from(err: ast::Error) -> Error {
+ Error::Parse(err)
+ }
+}
+
+impl From<hir::Error> for Error {
+ fn from(err: hir::Error) -> Error {
+ Error::Translate(err)
+ }
+}
+
+impl error::Error for Error {
+ // TODO: Remove this method entirely on the next breaking semver release.
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ match *self {
+ Error::Parse(ref x) => x.description(),
+ Error::Translate(ref x) => x.description(),
+ _ => unreachable!(),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match *self {
+ Error::Parse(ref x) => x.fmt(f),
+ Error::Translate(ref x) => x.fmt(f),
+ _ => unreachable!(),
+ }
+ }
+}
+
+/// A helper type for formatting nice error messages.
+///
+/// This type is responsible for reporting regex parse errors in a nice human
+/// readable format. Most of its complexity is from interspersing notational
+/// markers pointing out the position where an error occurred.
+#[derive(Debug)]
+pub struct Formatter<'e, E> {
+ /// The original regex pattern in which the error occurred.
+ pattern: &'e str,
+ /// The error kind. It must impl fmt::Display.
+ err: &'e E,
+ /// The primary span of the error.
+ span: &'e ast::Span,
+ /// An auxiliary and optional span, in case the error needs to point to
+ /// two locations (e.g., when reporting a duplicate capture group name).
+ aux_span: Option<&'e ast::Span>,
+}
+
+impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
+ fn from(err: &'e ast::Error) -> Self {
+ Formatter {
+ pattern: err.pattern(),
+ err: err.kind(),
+ span: err.span(),
+ aux_span: err.auxiliary_span(),
+ }
+ }
+}
+
+impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
+ fn from(err: &'e hir::Error) -> Self {
+ Formatter {
+ pattern: err.pattern(),
+ err: err.kind(),
+ span: err.span(),
+ aux_span: None,
+ }
+ }
+}
+
+impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let spans = Spans::from_formatter(self);
+ if self.pattern.contains('\n') {
+ let divider = repeat_char('~', 79);
+
+ writeln!(f, "regex parse error:")?;
+ writeln!(f, "{}", divider)?;
+ let notated = spans.notate();
+ write!(f, "{}", notated)?;
+ writeln!(f, "{}", divider)?;
+ // If we have error spans that cover multiple lines, then we just
+ // note the line numbers.
+ if !spans.multi_line.is_empty() {
+ let mut notes = vec![];
+ for span in &spans.multi_line {
+ notes.push(format!(
+ "on line {} (column {}) through line {} (column {})",
+ span.start.line,
+ span.start.column,
+ span.end.line,
+ span.end.column - 1
+ ));
+ }
+ writeln!(f, "{}", notes.join("\n"))?;
+ }
+ write!(f, "error: {}", self.err)?;
+ } else {
+ writeln!(f, "regex parse error:")?;
+ let notated = Spans::from_formatter(self).notate();
+ write!(f, "{}", notated)?;
+ write!(f, "error: {}", self.err)?;
+ }
+ Ok(())
+ }
+}
+
+/// This type represents an arbitrary number of error spans in a way that makes
+/// it convenient to notate the regex pattern. ("Notate" means "point out
+/// exactly where the error occurred in the regex pattern.")
+///
+/// Technically, we can only ever have two spans given our current error
+/// structure. However, after toiling with a specific algorithm for handling
+/// two spans, it became obvious that an algorithm to handle an arbitrary
+/// number of spans was actually much simpler.
+struct Spans<'p> {
+ /// The original regex pattern string.
+ pattern: &'p str,
+ /// The total width that should be used for line numbers. The width is
+ /// used for left padding the line numbers for alignment.
+ ///
+ /// A value of `0` means line numbers should not be displayed. That is,
+ /// the pattern is itself only one line.
+ line_number_width: usize,
+ /// All error spans that occur on a single line. This sequence always has
+ /// length equivalent to the number of lines in `pattern`, where the index
+ /// of the sequence represents a line number, starting at `0`. The spans
+ /// in each line are sorted in ascending order.
+ by_line: Vec<Vec<ast::Span>>,
+ /// All error spans that occur over one or more lines. That is, the start
+ /// and end position of the span have different line numbers. The spans are
+ /// sorted in ascending order.
+ multi_line: Vec<ast::Span>,
+}
+
+impl<'p> Spans<'p> {
+ /// Build a sequence of spans from a formatter.
+ fn from_formatter<'e, E: fmt::Display>(
+ fmter: &'p Formatter<'e, E>,
+ ) -> Spans<'p> {
+ let mut line_count = fmter.pattern.lines().count();
+ // If the pattern ends with a `\n` literal, then our line count is
+ // off by one, since a span can occur immediately after the last `\n`,
+ // which is consider to be an additional line.
+ if fmter.pattern.ends_with('\n') {
+ line_count += 1;
+ }
+ let line_number_width =
+ if line_count <= 1 { 0 } else { line_count.to_string().len() };
+ let mut spans = Spans {
+ pattern: &fmter.pattern,
+ line_number_width: line_number_width,
+ by_line: vec![vec![]; line_count],
+ multi_line: vec![],
+ };
+ spans.add(fmter.span.clone());
+ if let Some(span) = fmter.aux_span {
+ spans.add(span.clone());
+ }
+ spans
+ }
+
+ /// Add the given span to this sequence, putting it in the right place.
+ fn add(&mut self, span: ast::Span) {
+ // This is grossly inefficient since we sort after each add, but right
+ // now, we only ever add two spans at most.
+ if span.is_one_line() {
+ let i = span.start.line - 1; // because lines are 1-indexed
+ self.by_line[i].push(span);
+ self.by_line[i].sort();
+ } else {
+ self.multi_line.push(span);
+ self.multi_line.sort();
+ }
+ }
+
+ /// Notate the pattern string with carents (`^`) pointing at each span
+ /// location. This only applies to spans that occur within a single line.
+ fn notate(&self) -> String {
+ let mut notated = String::new();
+ for (i, line) in self.pattern.lines().enumerate() {
+ if self.line_number_width > 0 {
+ notated.push_str(&self.left_pad_line_number(i + 1));
+ notated.push_str(": ");
+ } else {
+ notated.push_str(" ");
+ }
+ notated.push_str(line);
+ notated.push('\n');
+ if let Some(notes) = self.notate_line(i) {
+ notated.push_str(&notes);
+ notated.push('\n');
+ }
+ }
+ notated
+ }
+
+ /// Return notes for the line indexed at `i` (zero-based). If there are no
+ /// spans for the given line, then `None` is returned. Otherwise, an
+ /// appropriately space padded string with correctly positioned `^` is
+ /// returned, accounting for line numbers.
+ fn notate_line(&self, i: usize) -> Option<String> {
+ let spans = &self.by_line[i];
+ if spans.is_empty() {
+ return None;
+ }
+ let mut notes = String::new();
+ for _ in 0..self.line_number_padding() {
+ notes.push(' ');
+ }
+ let mut pos = 0;
+ for span in spans {
+ for _ in pos..(span.start.column - 1) {
+ notes.push(' ');
+ pos += 1;
+ }
+ let note_len = span.end.column.saturating_sub(span.start.column);
+ for _ in 0..cmp::max(1, note_len) {
+ notes.push('^');
+ pos += 1;
+ }
+ }
+ Some(notes)
+ }
+
+ /// Left pad the given line number with spaces such that it is aligned with
+ /// other line numbers.
+ fn left_pad_line_number(&self, n: usize) -> String {
+ let n = n.to_string();
+ let pad = self.line_number_width.checked_sub(n.len()).unwrap();
+ let mut result = repeat_char(' ', pad);
+ result.push_str(&n);
+ result
+ }
+
+ /// Return the line number padding beginning at the start of each line of
+ /// the pattern.
+ ///
+ /// If the pattern is only one line, then this returns a fixed padding
+ /// for visual indentation.
+ fn line_number_padding(&self) -> usize {
+ if self.line_number_width == 0 {
+ 4
+ } else {
+ 2 + self.line_number_width
+ }
+ }
+}
+
+fn repeat_char(c: char, count: usize) -> String {
+ ::std::iter::repeat(c).take(count).collect()
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::ast::parse::Parser;
+
+ fn assert_panic_message(pattern: &str, expected_msg: &str) -> () {
+ let result = Parser::new().parse(pattern);
+ match result {
+ Ok(_) => {
+ panic!("regex should not have parsed");
+ }
+ Err(err) => {
+ assert_eq!(err.to_string(), expected_msg.trim());
+ }
+ }
+ }
+
+ // See: https://github.com/rust-lang/regex/issues/464
+ #[test]
+ fn regression_464() {
+ let err = Parser::new().parse("a{\n").unwrap_err();
+ // This test checks that the error formatter doesn't panic.
+ assert!(!err.to_string().is_empty());
+ }
+
+ // See: https://github.com/rust-lang/regex/issues/545
+ #[test]
+ fn repetition_quantifier_expects_a_valid_decimal() {
+ assert_panic_message(
+ r"\\u{[^}]*}",
+ r#"
+regex parse error:
+ \\u{[^}]*}
+ ^
+error: repetition quantifier expects a valid decimal
+"#,
+ );
+ }
+}
diff --git a/vendor/regex-syntax/src/hir/interval.rs b/vendor/regex-syntax/src/hir/interval.rs
new file mode 100644
index 000000000..cfaa2cb45
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/interval.rs
@@ -0,0 +1,520 @@
+use std::char;
+use std::cmp;
+use std::fmt::Debug;
+use std::slice;
+use std::u8;
+
+use crate::unicode;
+
+// This module contains an *internal* implementation of interval sets.
+//
+// The primary invariant that interval sets guards is canonical ordering. That
+// is, every interval set contains an ordered sequence of intervals where
+// no two intervals are overlapping or adjacent. While this invariant is
+// occasionally broken within the implementation, it should be impossible for
+// callers to observe it.
+//
+// Since case folding (as implemented below) breaks that invariant, we roll
+// that into this API even though it is a little out of place in an otherwise
+// generic interval set. (Hence the reason why the `unicode` module is imported
+// here.)
+//
+// Some of the implementation complexity here is a result of me wanting to
+// preserve the sequential representation without using additional memory.
+// In many cases, we do use linear extra memory, but it is at most 2x and it
+// is amortized. If we relaxed the memory requirements, this implementation
+// could become much simpler. The extra memory is honestly probably OK, but
+// character classes (especially of the Unicode variety) can become quite
+// large, and it would be nice to keep regex compilation snappy even in debug
+// builds. (In the past, I have been careless with this area of code and it has
+// caused slow regex compilations in debug mode, so this isn't entirely
+// unwarranted.)
+//
+// Tests on this are relegated to the public API of HIR in src/hir.rs.
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct IntervalSet<I> {
+ ranges: Vec<I>,
+}
+
+impl<I: Interval> IntervalSet<I> {
+ /// Create a new set from a sequence of intervals. Each interval is
+ /// specified as a pair of bounds, where both bounds are inclusive.
+ ///
+ /// The given ranges do not need to be in any specific order, and ranges
+ /// may overlap.
+ pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> {
+ let mut set = IntervalSet { ranges: intervals.into_iter().collect() };
+ set.canonicalize();
+ set
+ }
+
+ /// Add a new interval to this set.
+ pub fn push(&mut self, interval: I) {
+ // TODO: This could be faster. e.g., Push the interval such that
+ // it preserves canonicalization.
+ self.ranges.push(interval);
+ self.canonicalize();
+ }
+
+ /// Return an iterator over all intervals in this set.
+ ///
+ /// The iterator yields intervals in ascending order.
+ pub fn iter(&self) -> IntervalSetIter<'_, I> {
+ IntervalSetIter(self.ranges.iter())
+ }
+
+ /// Return an immutable slice of intervals in this set.
+ ///
+ /// The sequence returned is in canonical ordering.
+ pub fn intervals(&self) -> &[I] {
+ &self.ranges
+ }
+
+ /// Expand this interval set such that it contains all case folded
+ /// characters. For example, if this class consists of the range `a-z`,
+ /// then applying case folding will result in the class containing both the
+ /// ranges `a-z` and `A-Z`.
+ ///
+ /// This returns an error if the necessary case mapping data is not
+ /// available.
+ pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> {
+ let len = self.ranges.len();
+ for i in 0..len {
+ let range = self.ranges[i];
+ if let Err(err) = range.case_fold_simple(&mut self.ranges) {
+ self.canonicalize();
+ return Err(err);
+ }
+ }
+ self.canonicalize();
+ Ok(())
+ }
+
+ /// Union this set with the given set, in place.
+ pub fn union(&mut self, other: &IntervalSet<I>) {
+ // This could almost certainly be done more efficiently.
+ self.ranges.extend(&other.ranges);
+ self.canonicalize();
+ }
+
+ /// Intersect this set with the given set, in place.
+ pub fn intersect(&mut self, other: &IntervalSet<I>) {
+ if self.ranges.is_empty() {
+ return;
+ }
+ if other.ranges.is_empty() {
+ self.ranges.clear();
+ return;
+ }
+
+ // There should be a way to do this in-place with constant memory,
+ // but I couldn't figure out a simple way to do it. So just append
+ // the intersection to the end of this range, and then drain it before
+ // we're done.
+ let drain_end = self.ranges.len();
+
+ let mut ita = (0..drain_end).into_iter();
+ let mut itb = (0..other.ranges.len()).into_iter();
+ let mut a = ita.next().unwrap();
+ let mut b = itb.next().unwrap();
+ loop {
+ if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) {
+ self.ranges.push(ab);
+ }
+ let (it, aorb) =
+ if self.ranges[a].upper() < other.ranges[b].upper() {
+ (&mut ita, &mut a)
+ } else {
+ (&mut itb, &mut b)
+ };
+ match it.next() {
+ Some(v) => *aorb = v,
+ None => break,
+ }
+ }
+ self.ranges.drain(..drain_end);
+ }
+
+ /// Subtract the given set from this set, in place.
+ pub fn difference(&mut self, other: &IntervalSet<I>) {
+ if self.ranges.is_empty() || other.ranges.is_empty() {
+ return;
+ }
+
+ // This algorithm is (to me) surprisingly complex. A search of the
+ // interwebs indicate that this is a potentially interesting problem.
+ // Folks seem to suggest interval or segment trees, but I'd like to
+ // avoid the overhead (both runtime and conceptual) of that.
+ //
+ // The following is basically my Shitty First Draft. Therefore, in
+ // order to grok it, you probably need to read each line carefully.
+ // Simplifications are most welcome!
+ //
+ // Remember, we can assume the canonical format invariant here, which
+ // says that all ranges are sorted, not overlapping and not adjacent in
+ // each class.
+ let drain_end = self.ranges.len();
+ let (mut a, mut b) = (0, 0);
+ 'LOOP: while a < drain_end && b < other.ranges.len() {
+ // Basically, the easy cases are when neither range overlaps with
+ // each other. If the `b` range is less than our current `a`
+ // range, then we can skip it and move on.
+ if other.ranges[b].upper() < self.ranges[a].lower() {
+ b += 1;
+ continue;
+ }
+ // ... similarly for the `a` range. If it's less than the smallest
+ // `b` range, then we can add it as-is.
+ if self.ranges[a].upper() < other.ranges[b].lower() {
+ let range = self.ranges[a];
+ self.ranges.push(range);
+ a += 1;
+ continue;
+ }
+ // Otherwise, we have overlapping ranges.
+ assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b]));
+
+ // This part is tricky and was non-obvious to me without looking
+ // at explicit examples (see the tests). The trickiness stems from
+ // two things: 1) subtracting a range from another range could
+ // yield two ranges and 2) after subtracting a range, it's possible
+ // that future ranges can have an impact. The loop below advances
+ // the `b` ranges until they can't possible impact the current
+ // range.
+ //
+ // For example, if our `a` range is `a-t` and our next three `b`
+ // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply
+ // subtraction three times before moving on to the next `a` range.
+ let mut range = self.ranges[a];
+ while b < other.ranges.len()
+ && !range.is_intersection_empty(&other.ranges[b])
+ {
+ let old_range = range;
+ range = match range.difference(&other.ranges[b]) {
+ (None, None) => {
+ // We lost the entire range, so move on to the next
+ // without adding this one.
+ a += 1;
+ continue 'LOOP;
+ }
+ (Some(range1), None) | (None, Some(range1)) => range1,
+ (Some(range1), Some(range2)) => {
+ self.ranges.push(range1);
+ range2
+ }
+ };
+ // It's possible that the `b` range has more to contribute
+ // here. In particular, if it is greater than the original
+ // range, then it might impact the next `a` range *and* it
+ // has impacted the current `a` range as much as possible,
+ // so we can quit. We don't bump `b` so that the next `a`
+ // range can apply it.
+ if other.ranges[b].upper() > old_range.upper() {
+ break;
+ }
+ // Otherwise, the next `b` range might apply to the current
+ // `a` range.
+ b += 1;
+ }
+ self.ranges.push(range);
+ a += 1;
+ }
+ while a < drain_end {
+ let range = self.ranges[a];
+ self.ranges.push(range);
+ a += 1;
+ }
+ self.ranges.drain(..drain_end);
+ }
+
+ /// Compute the symmetric difference of the two sets, in place.
+ ///
+ /// This computes the symmetric difference of two interval sets. This
+ /// removes all elements in this set that are also in the given set,
+ /// but also adds all elements from the given set that aren't in this
+ /// set. That is, the set will contain all elements in either set,
+ /// but will not contain any elements that are in both sets.
+ pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) {
+ // TODO(burntsushi): Fix this so that it amortizes allocation.
+ let mut intersection = self.clone();
+ intersection.intersect(other);
+ self.union(other);
+ self.difference(&intersection);
+ }
+
+ /// Negate this interval set.
+ ///
+ /// For all `x` where `x` is any element, if `x` was in this set, then it
+ /// will not be in this set after negation.
+ pub fn negate(&mut self) {
+ if self.ranges.is_empty() {
+ let (min, max) = (I::Bound::min_value(), I::Bound::max_value());
+ self.ranges.push(I::create(min, max));
+ return;
+ }
+
+ // There should be a way to do this in-place with constant memory,
+ // but I couldn't figure out a simple way to do it. So just append
+ // the negation to the end of this range, and then drain it before
+ // we're done.
+ let drain_end = self.ranges.len();
+
+ // We do checked arithmetic below because of the canonical ordering
+ // invariant.
+ if self.ranges[0].lower() > I::Bound::min_value() {
+ let upper = self.ranges[0].lower().decrement();
+ self.ranges.push(I::create(I::Bound::min_value(), upper));
+ }
+ for i in 1..drain_end {
+ let lower = self.ranges[i - 1].upper().increment();
+ let upper = self.ranges[i].lower().decrement();
+ self.ranges.push(I::create(lower, upper));
+ }
+ if self.ranges[drain_end - 1].upper() < I::Bound::max_value() {
+ let lower = self.ranges[drain_end - 1].upper().increment();
+ self.ranges.push(I::create(lower, I::Bound::max_value()));
+ }
+ self.ranges.drain(..drain_end);
+ }
+
+ /// Converts this set into a canonical ordering.
+ fn canonicalize(&mut self) {
+ if self.is_canonical() {
+ return;
+ }
+ self.ranges.sort();
+ assert!(!self.ranges.is_empty());
+
+ // Is there a way to do this in-place with constant memory? I couldn't
+ // figure out a way to do it. So just append the canonicalization to
+ // the end of this range, and then drain it before we're done.
+ let drain_end = self.ranges.len();
+ for oldi in 0..drain_end {
+ // If we've added at least one new range, then check if we can
+ // merge this range in the previously added range.
+ if self.ranges.len() > drain_end {
+ let (last, rest) = self.ranges.split_last_mut().unwrap();
+ if let Some(union) = last.union(&rest[oldi]) {
+ *last = union;
+ continue;
+ }
+ }
+ let range = self.ranges[oldi];
+ self.ranges.push(range);
+ }
+ self.ranges.drain(..drain_end);
+ }
+
+ /// Returns true if and only if this class is in a canonical ordering.
+ fn is_canonical(&self) -> bool {
+ for pair in self.ranges.windows(2) {
+ if pair[0] >= pair[1] {
+ return false;
+ }
+ if pair[0].is_contiguous(&pair[1]) {
+ return false;
+ }
+ }
+ true
+ }
+}
+
+/// An iterator over intervals.
+#[derive(Debug)]
+pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>);
+
+impl<'a, I> Iterator for IntervalSetIter<'a, I> {
+ type Item = &'a I;
+
+ fn next(&mut self) -> Option<&'a I> {
+ self.0.next()
+ }
+}
+
+pub trait Interval:
+ Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord
+{
+ type Bound: Bound;
+
+ fn lower(&self) -> Self::Bound;
+ fn upper(&self) -> Self::Bound;
+ fn set_lower(&mut self, bound: Self::Bound);
+ fn set_upper(&mut self, bound: Self::Bound);
+ fn case_fold_simple(
+ &self,
+ intervals: &mut Vec<Self>,
+ ) -> Result<(), unicode::CaseFoldError>;
+
+ /// Create a new interval.
+ fn create(lower: Self::Bound, upper: Self::Bound) -> Self {
+ let mut int = Self::default();
+ if lower <= upper {
+ int.set_lower(lower);
+ int.set_upper(upper);
+ } else {
+ int.set_lower(upper);
+ int.set_upper(lower);
+ }
+ int
+ }
+
+ /// Union the given overlapping range into this range.
+ ///
+ /// If the two ranges aren't contiguous, then this returns `None`.
+ fn union(&self, other: &Self) -> Option<Self> {
+ if !self.is_contiguous(other) {
+ return None;
+ }
+ let lower = cmp::min(self.lower(), other.lower());
+ let upper = cmp::max(self.upper(), other.upper());
+ Some(Self::create(lower, upper))
+ }
+
+ /// Intersect this range with the given range and return the result.
+ ///
+ /// If the intersection is empty, then this returns `None`.
+ fn intersect(&self, other: &Self) -> Option<Self> {
+ let lower = cmp::max(self.lower(), other.lower());
+ let upper = cmp::min(self.upper(), other.upper());
+ if lower <= upper {
+ Some(Self::create(lower, upper))
+ } else {
+ None
+ }
+ }
+
+ /// Subtract the given range from this range and return the resulting
+ /// ranges.
+ ///
+ /// If subtraction would result in an empty range, then no ranges are
+ /// returned.
+ fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) {
+ if self.is_subset(other) {
+ return (None, None);
+ }
+ if self.is_intersection_empty(other) {
+ return (Some(self.clone()), None);
+ }
+ let add_lower = other.lower() > self.lower();
+ let add_upper = other.upper() < self.upper();
+ // We know this because !self.is_subset(other) and the ranges have
+ // a non-empty intersection.
+ assert!(add_lower || add_upper);
+ let mut ret = (None, None);
+ if add_lower {
+ let upper = other.lower().decrement();
+ ret.0 = Some(Self::create(self.lower(), upper));
+ }
+ if add_upper {
+ let lower = other.upper().increment();
+ let range = Self::create(lower, self.upper());
+ if ret.0.is_none() {
+ ret.0 = Some(range);
+ } else {
+ ret.1 = Some(range);
+ }
+ }
+ ret
+ }
+
+ /// Compute the symmetric difference the given range from this range. This
+ /// returns the union of the two ranges minus its intersection.
+ fn symmetric_difference(
+ &self,
+ other: &Self,
+ ) -> (Option<Self>, Option<Self>) {
+ let union = match self.union(other) {
+ None => return (Some(self.clone()), Some(other.clone())),
+ Some(union) => union,
+ };
+ let intersection = match self.intersect(other) {
+ None => return (Some(self.clone()), Some(other.clone())),
+ Some(intersection) => intersection,
+ };
+ union.difference(&intersection)
+ }
+
+ /// Returns true if and only if the two ranges are contiguous. Two ranges
+ /// are contiguous if and only if the ranges are either overlapping or
+ /// adjacent.
+ fn is_contiguous(&self, other: &Self) -> bool {
+ let lower1 = self.lower().as_u32();
+ let upper1 = self.upper().as_u32();
+ let lower2 = other.lower().as_u32();
+ let upper2 = other.upper().as_u32();
+ cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1)
+ }
+
+ /// Returns true if and only if the intersection of this range and the
+ /// other range is empty.
+ fn is_intersection_empty(&self, other: &Self) -> bool {
+ let (lower1, upper1) = (self.lower(), self.upper());
+ let (lower2, upper2) = (other.lower(), other.upper());
+ cmp::max(lower1, lower2) > cmp::min(upper1, upper2)
+ }
+
+ /// Returns true if and only if this range is a subset of the other range.
+ fn is_subset(&self, other: &Self) -> bool {
+ let (lower1, upper1) = (self.lower(), self.upper());
+ let (lower2, upper2) = (other.lower(), other.upper());
+ (lower2 <= lower1 && lower1 <= upper2)
+ && (lower2 <= upper1 && upper1 <= upper2)
+ }
+}
+
+pub trait Bound:
+ Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord
+{
+ fn min_value() -> Self;
+ fn max_value() -> Self;
+ fn as_u32(self) -> u32;
+ fn increment(self) -> Self;
+ fn decrement(self) -> Self;
+}
+
+impl Bound for u8 {
+ fn min_value() -> Self {
+ u8::MIN
+ }
+ fn max_value() -> Self {
+ u8::MAX
+ }
+ fn as_u32(self) -> u32 {
+ self as u32
+ }
+ fn increment(self) -> Self {
+ self.checked_add(1).unwrap()
+ }
+ fn decrement(self) -> Self {
+ self.checked_sub(1).unwrap()
+ }
+}
+
+impl Bound for char {
+ fn min_value() -> Self {
+ '\x00'
+ }
+ fn max_value() -> Self {
+ '\u{10FFFF}'
+ }
+ fn as_u32(self) -> u32 {
+ self as u32
+ }
+
+ fn increment(self) -> Self {
+ match self {
+ '\u{D7FF}' => '\u{E000}',
+ c => char::from_u32((c as u32).checked_add(1).unwrap()).unwrap(),
+ }
+ }
+
+ fn decrement(self) -> Self {
+ match self {
+ '\u{E000}' => '\u{D7FF}',
+ c => char::from_u32((c as u32).checked_sub(1).unwrap()).unwrap(),
+ }
+ }
+}
+
+// Tests for interval sets are written in src/hir.rs against the public API.
diff --git a/vendor/regex-syntax/src/hir/literal/mod.rs b/vendor/regex-syntax/src/hir/literal/mod.rs
new file mode 100644
index 000000000..1e66d2cc3
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/literal/mod.rs
@@ -0,0 +1,1690 @@
+/*!
+Provides routines for extracting literal prefixes and suffixes from an `Hir`.
+*/
+
+use std::cmp;
+use std::fmt;
+use std::iter;
+use std::mem;
+use std::ops;
+
+use crate::hir::{self, Hir, HirKind};
+
+/// A set of literal byte strings extracted from a regular expression.
+///
+/// Every member of the set is a `Literal`, which is represented by a
+/// `Vec<u8>`. (Notably, it may contain invalid UTF-8.) Every member is
+/// said to be either *complete* or *cut*. A complete literal means that
+/// it extends until the beginning (or end) of the regular expression. In
+/// some circumstances, this can be used to indicate a match in the regular
+/// expression.
+///
+/// A key aspect of literal extraction is knowing when to stop. It is not
+/// feasible to blindly extract all literals from a regular expression, even if
+/// there are finitely many. For example, the regular expression `[0-9]{10}`
+/// has `10^10` distinct literals. For this reason, literal extraction is
+/// bounded to some low number by default using heuristics, but the limits can
+/// be tweaked.
+///
+/// **WARNING**: Literal extraction uses stack space proportional to the size
+/// of the `Hir` expression. At some point, this drawback will be eliminated.
+/// To protect yourself, set a reasonable
+/// [`nest_limit` on your `Parser`](../../struct.ParserBuilder.html#method.nest_limit).
+/// This is done for you by default.
+#[derive(Clone, Eq, PartialEq)]
+pub struct Literals {
+ lits: Vec<Literal>,
+ limit_size: usize,
+ limit_class: usize,
+}
+
+/// A single member of a set of literals extracted from a regular expression.
+///
+/// This type has `Deref` and `DerefMut` impls to `Vec<u8>` so that all slice
+/// and `Vec` operations are available.
+#[derive(Clone, Eq, Ord)]
+pub struct Literal {
+ v: Vec<u8>,
+ cut: bool,
+}
+
+impl Literals {
+ /// Returns a new empty set of literals using default limits.
+ pub fn empty() -> Literals {
+ Literals { lits: vec![], limit_size: 250, limit_class: 10 }
+ }
+
+ /// Returns a set of literal prefixes extracted from the given `Hir`.
+ pub fn prefixes(expr: &Hir) -> Literals {
+ let mut lits = Literals::empty();
+ lits.union_prefixes(expr);
+ lits
+ }
+
+ /// Returns a set of literal suffixes extracted from the given `Hir`.
+ pub fn suffixes(expr: &Hir) -> Literals {
+ let mut lits = Literals::empty();
+ lits.union_suffixes(expr);
+ lits
+ }
+
+ /// Get the approximate size limit (in bytes) of this set.
+ pub fn limit_size(&self) -> usize {
+ self.limit_size
+ }
+
+ /// Set the approximate size limit (in bytes) of this set.
+ ///
+ /// If extracting a literal would put the set over this limit, then
+ /// extraction stops.
+ ///
+ /// The new limits will only apply to additions to this set. Existing
+ /// members remain unchanged, even if the set exceeds the new limit.
+ pub fn set_limit_size(&mut self, size: usize) -> &mut Literals {
+ self.limit_size = size;
+ self
+ }
+
+ /// Get the character class size limit for this set.
+ pub fn limit_class(&self) -> usize {
+ self.limit_class
+ }
+
+ /// Limits the size of character(or byte) classes considered.
+ ///
+ /// A value of `0` prevents all character classes from being considered.
+ ///
+ /// This limit also applies to case insensitive literals, since each
+ /// character in the case insensitive literal is converted to a class, and
+ /// then case folded.
+ ///
+ /// The new limits will only apply to additions to this set. Existing
+ /// members remain unchanged, even if the set exceeds the new limit.
+ pub fn set_limit_class(&mut self, size: usize) -> &mut Literals {
+ self.limit_class = size;
+ self
+ }
+
+ /// Returns the set of literals as a slice. Its order is unspecified.
+ pub fn literals(&self) -> &[Literal] {
+ &self.lits
+ }
+
+ /// Returns the length of the smallest literal.
+ ///
+ /// Returns None is there are no literals in the set.
+ pub fn min_len(&self) -> Option<usize> {
+ let mut min = None;
+ for lit in &self.lits {
+ match min {
+ None => min = Some(lit.len()),
+ Some(m) if lit.len() < m => min = Some(lit.len()),
+ _ => {}
+ }
+ }
+ min
+ }
+
+ /// Returns true if all members in this set are complete.
+ pub fn all_complete(&self) -> bool {
+ !self.lits.is_empty() && self.lits.iter().all(|l| !l.is_cut())
+ }
+
+ /// Returns true if any member in this set is complete.
+ pub fn any_complete(&self) -> bool {
+ self.lits.iter().any(|lit| !lit.is_cut())
+ }
+
+ /// Returns true if this set contains an empty literal.
+ pub fn contains_empty(&self) -> bool {
+ self.lits.iter().any(|lit| lit.is_empty())
+ }
+
+ /// Returns true if this set is empty or if all of its members is empty.
+ pub fn is_empty(&self) -> bool {
+ self.lits.is_empty() || self.lits.iter().all(|lit| lit.is_empty())
+ }
+
+ /// Returns a new empty set of literals using this set's limits.
+ pub fn to_empty(&self) -> Literals {
+ let mut lits = Literals::empty();
+ lits.set_limit_size(self.limit_size).set_limit_class(self.limit_class);
+ lits
+ }
+
+ /// Returns the longest common prefix of all members in this set.
+ pub fn longest_common_prefix(&self) -> &[u8] {
+ if self.is_empty() {
+ return &[];
+ }
+ let lit0 = &*self.lits[0];
+ let mut len = lit0.len();
+ for lit in &self.lits[1..] {
+ len = cmp::min(
+ len,
+ lit.iter().zip(lit0).take_while(|&(a, b)| a == b).count(),
+ );
+ }
+ &self.lits[0][..len]
+ }
+
+ /// Returns the longest common suffix of all members in this set.
+ pub fn longest_common_suffix(&self) -> &[u8] {
+ if self.is_empty() {
+ return &[];
+ }
+ let lit0 = &*self.lits[0];
+ let mut len = lit0.len();
+ for lit in &self.lits[1..] {
+ len = cmp::min(
+ len,
+ lit.iter()
+ .rev()
+ .zip(lit0.iter().rev())
+ .take_while(|&(a, b)| a == b)
+ .count(),
+ );
+ }
+ &self.lits[0][self.lits[0].len() - len..]
+ }
+
+ /// Returns a new set of literals with the given number of bytes trimmed
+ /// from the suffix of each literal.
+ ///
+ /// If any literal would be cut out completely by trimming, then None is
+ /// returned.
+ ///
+ /// Any duplicates that are created as a result of this transformation are
+ /// removed.
+ pub fn trim_suffix(&self, num_bytes: usize) -> Option<Literals> {
+ if self.min_len().map(|len| len <= num_bytes).unwrap_or(true) {
+ return None;
+ }
+ let mut new = self.to_empty();
+ for mut lit in self.lits.iter().cloned() {
+ let new_len = lit.len() - num_bytes;
+ lit.truncate(new_len);
+ lit.cut();
+ new.lits.push(lit);
+ }
+ new.lits.sort();
+ new.lits.dedup();
+ Some(new)
+ }
+
+ /// Returns a new set of prefixes of this set of literals that are
+ /// guaranteed to be unambiguous.
+ ///
+ /// Any substring match with a member of the set is returned is guaranteed
+ /// to never overlap with a substring match of another member of the set
+ /// at the same starting position.
+ ///
+ /// Given any two members of the returned set, neither is a substring of
+ /// the other.
+ pub fn unambiguous_prefixes(&self) -> Literals {
+ if self.lits.is_empty() {
+ return self.to_empty();
+ }
+ let mut old: Vec<Literal> = self.lits.iter().cloned().collect();
+ let mut new = self.to_empty();
+ 'OUTER: while let Some(mut candidate) = old.pop() {
+ if candidate.is_empty() {
+ continue;
+ }
+ if new.lits.is_empty() {
+ new.lits.push(candidate);
+ continue;
+ }
+ for lit2 in &mut new.lits {
+ if lit2.is_empty() {
+ continue;
+ }
+ if &candidate == lit2 {
+ // If the literal is already in the set, then we can
+ // just drop it. But make sure that cut literals are
+ // infectious!
+ candidate.cut = candidate.cut || lit2.cut;
+ lit2.cut = candidate.cut;
+ continue 'OUTER;
+ }
+ if candidate.len() < lit2.len() {
+ if let Some(i) = position(&candidate, &lit2) {
+ candidate.cut();
+ let mut lit3 = lit2.clone();
+ lit3.truncate(i);
+ lit3.cut();
+ old.push(lit3);
+ lit2.clear();
+ }
+ } else {
+ if let Some(i) = position(&lit2, &candidate) {
+ lit2.cut();
+ let mut new_candidate = candidate.clone();
+ new_candidate.truncate(i);
+ new_candidate.cut();
+ old.push(new_candidate);
+ candidate.clear();
+ }
+ }
+ // Oops, the candidate is already represented in the set.
+ if candidate.is_empty() {
+ continue 'OUTER;
+ }
+ }
+ new.lits.push(candidate);
+ }
+ new.lits.retain(|lit| !lit.is_empty());
+ new.lits.sort();
+ new.lits.dedup();
+ new
+ }
+
+ /// Returns a new set of suffixes of this set of literals that are
+ /// guaranteed to be unambiguous.
+ ///
+ /// Any substring match with a member of the set is returned is guaranteed
+ /// to never overlap with a substring match of another member of the set
+ /// at the same ending position.
+ ///
+ /// Given any two members of the returned set, neither is a substring of
+ /// the other.
+ pub fn unambiguous_suffixes(&self) -> Literals {
+ // This is a touch wasteful...
+ let mut lits = self.clone();
+ lits.reverse();
+ let mut unamb = lits.unambiguous_prefixes();
+ unamb.reverse();
+ unamb
+ }
+
+ /// Unions the prefixes from the given expression to this set.
+ ///
+ /// If prefixes could not be added (for example, this set would exceed its
+ /// size limits or the set of prefixes from `expr` includes the empty
+ /// string), then false is returned.
+ ///
+ /// Note that prefix literals extracted from `expr` are said to be complete
+ /// if and only if the literal extends from the beginning of `expr` to the
+ /// end of `expr`.
+ pub fn union_prefixes(&mut self, expr: &Hir) -> bool {
+ let mut lits = self.to_empty();
+ prefixes(expr, &mut lits);
+ !lits.is_empty() && !lits.contains_empty() && self.union(lits)
+ }
+
+ /// Unions the suffixes from the given expression to this set.
+ ///
+ /// If suffixes could not be added (for example, this set would exceed its
+ /// size limits or the set of suffixes from `expr` includes the empty
+ /// string), then false is returned.
+ ///
+ /// Note that prefix literals extracted from `expr` are said to be complete
+ /// if and only if the literal extends from the end of `expr` to the
+ /// beginning of `expr`.
+ pub fn union_suffixes(&mut self, expr: &Hir) -> bool {
+ let mut lits = self.to_empty();
+ suffixes(expr, &mut lits);
+ lits.reverse();
+ !lits.is_empty() && !lits.contains_empty() && self.union(lits)
+ }
+
+ /// Unions this set with another set.
+ ///
+ /// If the union would cause the set to exceed its limits, then the union
+ /// is skipped and it returns false. Otherwise, if the union succeeds, it
+ /// returns true.
+ pub fn union(&mut self, lits: Literals) -> bool {
+ if self.num_bytes() + lits.num_bytes() > self.limit_size {
+ return false;
+ }
+ if lits.is_empty() {
+ self.lits.push(Literal::empty());
+ } else {
+ self.lits.extend(lits.lits);
+ }
+ true
+ }
+
+ /// Extends this set with another set.
+ ///
+ /// The set of literals is extended via a cross product.
+ ///
+ /// If a cross product would cause this set to exceed its limits, then the
+ /// cross product is skipped and it returns false. Otherwise, if the cross
+ /// product succeeds, it returns true.
+ pub fn cross_product(&mut self, lits: &Literals) -> bool {
+ if lits.is_empty() {
+ return true;
+ }
+ // Check that we make sure we stay in our limits.
+ let mut size_after;
+ if self.is_empty() || !self.any_complete() {
+ size_after = self.num_bytes();
+ for lits_lit in lits.literals() {
+ size_after += lits_lit.len();
+ }
+ } else {
+ size_after = self.lits.iter().fold(0, |accum, lit| {
+ accum + if lit.is_cut() { lit.len() } else { 0 }
+ });
+ for lits_lit in lits.literals() {
+ for self_lit in self.literals() {
+ if !self_lit.is_cut() {
+ size_after += self_lit.len() + lits_lit.len();
+ }
+ }
+ }
+ }
+ if size_after > self.limit_size {
+ return false;
+ }
+
+ let mut base = self.remove_complete();
+ if base.is_empty() {
+ base = vec![Literal::empty()];
+ }
+ for lits_lit in lits.literals() {
+ for mut self_lit in base.clone() {
+ self_lit.extend(&**lits_lit);
+ self_lit.cut = lits_lit.cut;
+ self.lits.push(self_lit);
+ }
+ }
+ true
+ }
+
+ /// Extends each literal in this set with the bytes given.
+ ///
+ /// If the set is empty, then the given literal is added to the set.
+ ///
+ /// If adding any number of bytes to all members of this set causes a limit
+ /// to be exceeded, then no bytes are added and false is returned. If a
+ /// prefix of `bytes` can be fit into this set, then it is used and all
+ /// resulting literals are cut.
+ pub fn cross_add(&mut self, bytes: &[u8]) -> bool {
+ // N.B. This could be implemented by simply calling cross_product with
+ // a literal set containing just `bytes`, but we can be smarter about
+ // taking shorter prefixes of `bytes` if they'll fit.
+ if bytes.is_empty() {
+ return true;
+ }
+ if self.lits.is_empty() {
+ let i = cmp::min(self.limit_size, bytes.len());
+ self.lits.push(Literal::new(bytes[..i].to_owned()));
+ self.lits[0].cut = i < bytes.len();
+ return !self.lits[0].is_cut();
+ }
+ let size = self.num_bytes();
+ if size + self.lits.len() >= self.limit_size {
+ return false;
+ }
+ let mut i = 1;
+ while size + (i * self.lits.len()) <= self.limit_size
+ && i < bytes.len()
+ {
+ i += 1;
+ }
+ for lit in &mut self.lits {
+ if !lit.is_cut() {
+ lit.extend(&bytes[..i]);
+ if i < bytes.len() {
+ lit.cut();
+ }
+ }
+ }
+ true
+ }
+
+ /// Adds the given literal to this set.
+ ///
+ /// Returns false if adding this literal would cause the class to be too
+ /// big.
+ pub fn add(&mut self, lit: Literal) -> bool {
+ if self.num_bytes() + lit.len() > self.limit_size {
+ return false;
+ }
+ self.lits.push(lit);
+ true
+ }
+
+ /// Extends each literal in this set with the character class given.
+ ///
+ /// Returns false if the character class was too big to add.
+ pub fn add_char_class(&mut self, cls: &hir::ClassUnicode) -> bool {
+ self._add_char_class(cls, false)
+ }
+
+ /// Extends each literal in this set with the character class given,
+ /// writing the bytes of each character in reverse.
+ ///
+ /// Returns false if the character class was too big to add.
+ fn add_char_class_reverse(&mut self, cls: &hir::ClassUnicode) -> bool {
+ self._add_char_class(cls, true)
+ }
+
+ fn _add_char_class(
+ &mut self,
+ cls: &hir::ClassUnicode,
+ reverse: bool,
+ ) -> bool {
+ use std::char;
+
+ if self.class_exceeds_limits(cls_char_count(cls)) {
+ return false;
+ }
+ let mut base = self.remove_complete();
+ if base.is_empty() {
+ base = vec![Literal::empty()];
+ }
+ for r in cls.iter() {
+ let (s, e) = (r.start as u32, r.end as u32 + 1);
+ for c in (s..e).filter_map(char::from_u32) {
+ for mut lit in base.clone() {
+ let mut bytes = c.to_string().into_bytes();
+ if reverse {
+ bytes.reverse();
+ }
+ lit.extend(&bytes);
+ self.lits.push(lit);
+ }
+ }
+ }
+ true
+ }
+
+ /// Extends each literal in this set with the byte class given.
+ ///
+ /// Returns false if the byte class was too big to add.
+ pub fn add_byte_class(&mut self, cls: &hir::ClassBytes) -> bool {
+ if self.class_exceeds_limits(cls_byte_count(cls)) {
+ return false;
+ }
+ let mut base = self.remove_complete();
+ if base.is_empty() {
+ base = vec![Literal::empty()];
+ }
+ for r in cls.iter() {
+ let (s, e) = (r.start as u32, r.end as u32 + 1);
+ for b in (s..e).map(|b| b as u8) {
+ for mut lit in base.clone() {
+ lit.push(b);
+ self.lits.push(lit);
+ }
+ }
+ }
+ true
+ }
+
+ /// Cuts every member of this set. When a member is cut, it can never
+ /// be extended.
+ pub fn cut(&mut self) {
+ for lit in &mut self.lits {
+ lit.cut();
+ }
+ }
+
+ /// Reverses all members in place.
+ pub fn reverse(&mut self) {
+ for lit in &mut self.lits {
+ lit.reverse();
+ }
+ }
+
+ /// Clears this set of all members.
+ pub fn clear(&mut self) {
+ self.lits.clear();
+ }
+
+ /// Pops all complete literals out of this set.
+ fn remove_complete(&mut self) -> Vec<Literal> {
+ let mut base = vec![];
+ for lit in mem::replace(&mut self.lits, vec![]) {
+ if lit.is_cut() {
+ self.lits.push(lit);
+ } else {
+ base.push(lit);
+ }
+ }
+ base
+ }
+
+ /// Returns the total number of bytes in this set.
+ fn num_bytes(&self) -> usize {
+ self.lits.iter().fold(0, |accum, lit| accum + lit.len())
+ }
+
+ /// Returns true if a character class with the given size would cause this
+ /// set to exceed its limits.
+ ///
+ /// The size given should correspond to the number of items in the class.
+ fn class_exceeds_limits(&self, size: usize) -> bool {
+ if size > self.limit_class {
+ return true;
+ }
+ // This is an approximation since codepoints in a char class can encode
+ // to 1-4 bytes.
+ let new_byte_count = if self.lits.is_empty() {
+ size
+ } else {
+ self.lits.iter().fold(0, |accum, lit| {
+ accum
+ + if lit.is_cut() {
+ // If the literal is cut, then we'll never add
+ // anything to it, so don't count it.
+ 0
+ } else {
+ (lit.len() + 1) * size
+ }
+ })
+ };
+ new_byte_count > self.limit_size
+ }
+}
+
+fn prefixes(expr: &Hir, lits: &mut Literals) {
+ match *expr.kind() {
+ HirKind::Literal(hir::Literal::Unicode(c)) => {
+ let mut buf = [0; 4];
+ lits.cross_add(c.encode_utf8(&mut buf).as_bytes());
+ }
+ HirKind::Literal(hir::Literal::Byte(b)) => {
+ lits.cross_add(&[b]);
+ }
+ HirKind::Class(hir::Class::Unicode(ref cls)) => {
+ if !lits.add_char_class(cls) {
+ lits.cut();
+ }
+ }
+ HirKind::Class(hir::Class::Bytes(ref cls)) => {
+ if !lits.add_byte_class(cls) {
+ lits.cut();
+ }
+ }
+ HirKind::Group(hir::Group { ref hir, .. }) => {
+ prefixes(&**hir, lits);
+ }
+ HirKind::Repetition(ref x) => match x.kind {
+ hir::RepetitionKind::ZeroOrOne => {
+ repeat_zero_or_one_literals(&x.hir, lits, prefixes);
+ }
+ hir::RepetitionKind::ZeroOrMore => {
+ repeat_zero_or_more_literals(&x.hir, lits, prefixes);
+ }
+ hir::RepetitionKind::OneOrMore => {
+ repeat_one_or_more_literals(&x.hir, lits, prefixes);
+ }
+ hir::RepetitionKind::Range(ref rng) => {
+ let (min, max) = match *rng {
+ hir::RepetitionRange::Exactly(m) => (m, Some(m)),
+ hir::RepetitionRange::AtLeast(m) => (m, None),
+ hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
+ };
+ repeat_range_literals(
+ &x.hir, min, max, x.greedy, lits, prefixes,
+ )
+ }
+ },
+ HirKind::Concat(ref es) if es.is_empty() => {}
+ HirKind::Concat(ref es) if es.len() == 1 => prefixes(&es[0], lits),
+ HirKind::Concat(ref es) => {
+ for e in es {
+ if let HirKind::Anchor(hir::Anchor::StartText) = *e.kind() {
+ if !lits.is_empty() {
+ lits.cut();
+ break;
+ }
+ lits.add(Literal::empty());
+ continue;
+ }
+ let mut lits2 = lits.to_empty();
+ prefixes(e, &mut lits2);
+ if !lits.cross_product(&lits2) || !lits2.any_complete() {
+ // If this expression couldn't yield any literal that
+ // could be extended, then we need to quit. Since we're
+ // short-circuiting, we also need to freeze every member.
+ lits.cut();
+ break;
+ }
+ }
+ }
+ HirKind::Alternation(ref es) => {
+ alternate_literals(es, lits, prefixes);
+ }
+ _ => lits.cut(),
+ }
+}
+
+fn suffixes(expr: &Hir, lits: &mut Literals) {
+ match *expr.kind() {
+ HirKind::Literal(hir::Literal::Unicode(c)) => {
+ let mut buf = [0u8; 4];
+ let i = c.encode_utf8(&mut buf).len();
+ let buf = &mut buf[..i];
+ buf.reverse();
+ lits.cross_add(buf);
+ }
+ HirKind::Literal(hir::Literal::Byte(b)) => {
+ lits.cross_add(&[b]);
+ }
+ HirKind::Class(hir::Class::Unicode(ref cls)) => {
+ if !lits.add_char_class_reverse(cls) {
+ lits.cut();
+ }
+ }
+ HirKind::Class(hir::Class::Bytes(ref cls)) => {
+ if !lits.add_byte_class(cls) {
+ lits.cut();
+ }
+ }
+ HirKind::Group(hir::Group { ref hir, .. }) => {
+ suffixes(&**hir, lits);
+ }
+ HirKind::Repetition(ref x) => match x.kind {
+ hir::RepetitionKind::ZeroOrOne => {
+ repeat_zero_or_one_literals(&x.hir, lits, suffixes);
+ }
+ hir::RepetitionKind::ZeroOrMore => {
+ repeat_zero_or_more_literals(&x.hir, lits, suffixes);
+ }
+ hir::RepetitionKind::OneOrMore => {
+ repeat_one_or_more_literals(&x.hir, lits, suffixes);
+ }
+ hir::RepetitionKind::Range(ref rng) => {
+ let (min, max) = match *rng {
+ hir::RepetitionRange::Exactly(m) => (m, Some(m)),
+ hir::RepetitionRange::AtLeast(m) => (m, None),
+ hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
+ };
+ repeat_range_literals(
+ &x.hir, min, max, x.greedy, lits, suffixes,
+ )
+ }
+ },
+ HirKind::Concat(ref es) if es.is_empty() => {}
+ HirKind::Concat(ref es) if es.len() == 1 => suffixes(&es[0], lits),
+ HirKind::Concat(ref es) => {
+ for e in es.iter().rev() {
+ if let HirKind::Anchor(hir::Anchor::EndText) = *e.kind() {
+ if !lits.is_empty() {
+ lits.cut();
+ break;
+ }
+ lits.add(Literal::empty());
+ continue;
+ }
+ let mut lits2 = lits.to_empty();
+ suffixes(e, &mut lits2);
+ if !lits.cross_product(&lits2) || !lits2.any_complete() {
+ // If this expression couldn't yield any literal that
+ // could be extended, then we need to quit. Since we're
+ // short-circuiting, we also need to freeze every member.
+ lits.cut();
+ break;
+ }
+ }
+ }
+ HirKind::Alternation(ref es) => {
+ alternate_literals(es, lits, suffixes);
+ }
+ _ => lits.cut(),
+ }
+}
+
+fn repeat_zero_or_one_literals<F: FnMut(&Hir, &mut Literals)>(
+ e: &Hir,
+ lits: &mut Literals,
+ mut f: F,
+) {
+ f(
+ &Hir::repetition(hir::Repetition {
+ kind: hir::RepetitionKind::ZeroOrMore,
+ // FIXME: Our literal extraction doesn't care about greediness.
+ // Which is partially why we're treating 'e?' as 'e*'. Namely,
+ // 'ab??' yields [Complete(ab), Complete(a)], but it should yield
+ // [Complete(a), Complete(ab)] because of the non-greediness.
+ greedy: true,
+ hir: Box::new(e.clone()),
+ }),
+ lits,
+ );
+}
+
+fn repeat_zero_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
+ e: &Hir,
+ lits: &mut Literals,
+ mut f: F,
+) {
+ let (mut lits2, mut lits3) = (lits.clone(), lits.to_empty());
+ lits3.set_limit_size(lits.limit_size() / 2);
+ f(e, &mut lits3);
+
+ if lits3.is_empty() || !lits2.cross_product(&lits3) {
+ lits.cut();
+ return;
+ }
+ lits2.cut();
+ lits2.add(Literal::empty());
+ if !lits.union(lits2) {
+ lits.cut();
+ }
+}
+
+fn repeat_one_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
+ e: &Hir,
+ lits: &mut Literals,
+ mut f: F,
+) {
+ f(e, lits);
+ lits.cut();
+}
+
+fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
+ e: &Hir,
+ min: u32,
+ max: Option<u32>,
+ greedy: bool,
+ lits: &mut Literals,
+ mut f: F,
+) {
+ if min == 0 {
+ // This is a bit conservative. If `max` is set, then we could
+ // treat this as a finite set of alternations. For now, we
+ // just treat it as `e*`.
+ f(
+ &Hir::repetition(hir::Repetition {
+ kind: hir::RepetitionKind::ZeroOrMore,
+ greedy: greedy,
+ hir: Box::new(e.clone()),
+ }),
+ lits,
+ );
+ } else {
+ if min > 0 {
+ let n = cmp::min(lits.limit_size, min as usize);
+ let es = iter::repeat(e.clone()).take(n).collect();
+ f(&Hir::concat(es), lits);
+ if n < min as usize || lits.contains_empty() {
+ lits.cut();
+ }
+ }
+ if max.map_or(true, |max| min < max) {
+ lits.cut();
+ }
+ }
+}
+
+fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
+ es: &[Hir],
+ lits: &mut Literals,
+ mut f: F,
+) {
+ let mut lits2 = lits.to_empty();
+ for e in es {
+ let mut lits3 = lits.to_empty();
+ lits3.set_limit_size(lits.limit_size() / 5);
+ f(e, &mut lits3);
+ if lits3.is_empty() || !lits2.union(lits3) {
+ // If we couldn't find suffixes for *any* of the
+ // alternates, then the entire alternation has to be thrown
+ // away and any existing members must be frozen. Similarly,
+ // if the union couldn't complete, stop and freeze.
+ lits.cut();
+ return;
+ }
+ }
+ if !lits.cross_product(&lits2) {
+ lits.cut();
+ }
+}
+
+impl fmt::Debug for Literals {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("Literals")
+ .field("lits", &self.lits)
+ .field("limit_size", &self.limit_size)
+ .field("limit_class", &self.limit_class)
+ .finish()
+ }
+}
+
+impl Literal {
+ /// Returns a new complete literal with the bytes given.
+ pub fn new(bytes: Vec<u8>) -> Literal {
+ Literal { v: bytes, cut: false }
+ }
+
+ /// Returns a new complete empty literal.
+ pub fn empty() -> Literal {
+ Literal { v: vec![], cut: false }
+ }
+
+ /// Returns true if this literal was "cut."
+ pub fn is_cut(&self) -> bool {
+ self.cut
+ }
+
+ /// Cuts this literal.
+ pub fn cut(&mut self) {
+ self.cut = true;
+ }
+}
+
+impl PartialEq for Literal {
+ fn eq(&self, other: &Literal) -> bool {
+ self.v == other.v
+ }
+}
+
+impl PartialOrd for Literal {
+ fn partial_cmp(&self, other: &Literal) -> Option<cmp::Ordering> {
+ self.v.partial_cmp(&other.v)
+ }
+}
+
+impl fmt::Debug for Literal {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ if self.is_cut() {
+ write!(f, "Cut({})", escape_unicode(&self.v))
+ } else {
+ write!(f, "Complete({})", escape_unicode(&self.v))
+ }
+ }
+}
+
+impl AsRef<[u8]> for Literal {
+ fn as_ref(&self) -> &[u8] {
+ &self.v
+ }
+}
+
+impl ops::Deref for Literal {
+ type Target = Vec<u8>;
+ fn deref(&self) -> &Vec<u8> {
+ &self.v
+ }
+}
+
+impl ops::DerefMut for Literal {
+ fn deref_mut(&mut self) -> &mut Vec<u8> {
+ &mut self.v
+ }
+}
+
+fn position(needle: &[u8], mut haystack: &[u8]) -> Option<usize> {
+ let mut i = 0;
+ while haystack.len() >= needle.len() {
+ if needle == &haystack[..needle.len()] {
+ return Some(i);
+ }
+ i += 1;
+ haystack = &haystack[1..];
+ }
+ None
+}
+
+fn escape_unicode(bytes: &[u8]) -> String {
+ let show = match ::std::str::from_utf8(bytes) {
+ Ok(v) => v.to_string(),
+ Err(_) => escape_bytes(bytes),
+ };
+ let mut space_escaped = String::new();
+ for c in show.chars() {
+ if c.is_whitespace() {
+ let escaped = if c as u32 <= 0x7F {
+ escape_byte(c as u8)
+ } else {
+ if c as u32 <= 0xFFFF {
+ format!(r"\u{{{:04x}}}", c as u32)
+ } else {
+ format!(r"\U{{{:08x}}}", c as u32)
+ }
+ };
+ space_escaped.push_str(&escaped);
+ } else {
+ space_escaped.push(c);
+ }
+ }
+ space_escaped
+}
+
+fn escape_bytes(bytes: &[u8]) -> String {
+ let mut s = String::new();
+ for &b in bytes {
+ s.push_str(&escape_byte(b));
+ }
+ s
+}
+
+fn escape_byte(byte: u8) -> String {
+ use std::ascii::escape_default;
+
+ let escaped: Vec<u8> = escape_default(byte).collect();
+ String::from_utf8_lossy(&escaped).into_owned()
+}
+
+fn cls_char_count(cls: &hir::ClassUnicode) -> usize {
+ cls.iter().map(|&r| 1 + (r.end as u32) - (r.start as u32)).sum::<u32>()
+ as usize
+}
+
+fn cls_byte_count(cls: &hir::ClassBytes) -> usize {
+ cls.iter().map(|&r| 1 + (r.end as u32) - (r.start as u32)).sum::<u32>()
+ as usize
+}
+
+#[cfg(test)]
+mod tests {
+ use std::fmt;
+
+ use super::{escape_bytes, Literal, Literals};
+ use crate::hir::Hir;
+ use crate::ParserBuilder;
+
+ // To make test failures easier to read.
+ #[derive(Debug, Eq, PartialEq)]
+ struct Bytes(Vec<ULiteral>);
+ #[derive(Debug, Eq, PartialEq)]
+ struct Unicode(Vec<ULiteral>);
+
+ fn escape_lits(blits: &[Literal]) -> Vec<ULiteral> {
+ let mut ulits = vec![];
+ for blit in blits {
+ ulits
+ .push(ULiteral { v: escape_bytes(&blit), cut: blit.is_cut() });
+ }
+ ulits
+ }
+
+ fn create_lits<I: IntoIterator<Item = Literal>>(it: I) -> Literals {
+ Literals {
+ lits: it.into_iter().collect(),
+ limit_size: 0,
+ limit_class: 0,
+ }
+ }
+
+ // Needs to be pub for 1.3?
+ #[derive(Clone, Eq, PartialEq)]
+ pub struct ULiteral {
+ v: String,
+ cut: bool,
+ }
+
+ impl ULiteral {
+ fn is_cut(&self) -> bool {
+ self.cut
+ }
+ }
+
+ impl fmt::Debug for ULiteral {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ if self.is_cut() {
+ write!(f, "Cut({})", self.v)
+ } else {
+ write!(f, "Complete({})", self.v)
+ }
+ }
+ }
+
+ impl PartialEq<Literal> for ULiteral {
+ fn eq(&self, other: &Literal) -> bool {
+ self.v.as_bytes() == &*other.v && self.is_cut() == other.is_cut()
+ }
+ }
+
+ impl PartialEq<ULiteral> for Literal {
+ fn eq(&self, other: &ULiteral) -> bool {
+ &*self.v == other.v.as_bytes() && self.is_cut() == other.is_cut()
+ }
+ }
+
+ #[allow(non_snake_case)]
+ fn C(s: &'static str) -> ULiteral {
+ ULiteral { v: s.to_owned(), cut: true }
+ }
+ #[allow(non_snake_case)]
+ fn M(s: &'static str) -> ULiteral {
+ ULiteral { v: s.to_owned(), cut: false }
+ }
+
+ fn prefixes(lits: &mut Literals, expr: &Hir) {
+ lits.union_prefixes(expr);
+ }
+
+ fn suffixes(lits: &mut Literals, expr: &Hir) {
+ lits.union_suffixes(expr);
+ }
+
+ macro_rules! assert_lit_eq {
+ ($which:ident, $got_lits:expr, $($expected_lit:expr),*) => {{
+ let expected: Vec<ULiteral> = vec![$($expected_lit),*];
+ let lits = $got_lits;
+ assert_eq!(
+ $which(expected.clone()),
+ $which(escape_lits(lits.literals())));
+ assert_eq!(
+ !expected.is_empty() && expected.iter().all(|l| !l.is_cut()),
+ lits.all_complete());
+ assert_eq!(
+ expected.iter().any(|l| !l.is_cut()),
+ lits.any_complete());
+ }};
+ }
+
+ macro_rules! test_lit {
+ ($name:ident, $which:ident, $re:expr) => {
+ test_lit!($name, $which, $re,);
+ };
+ ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
+ #[test]
+ fn $name() {
+ let expr = ParserBuilder::new()
+ .build()
+ .parse($re)
+ .unwrap();
+ let lits = Literals::$which(&expr);
+ assert_lit_eq!(Unicode, lits, $($lit),*);
+
+ let expr = ParserBuilder::new()
+ .allow_invalid_utf8(true)
+ .unicode(false)
+ .build()
+ .parse($re)
+ .unwrap();
+ let lits = Literals::$which(&expr);
+ assert_lit_eq!(Bytes, lits, $($lit),*);
+ }
+ };
+ }
+
+ // ************************************************************************
+ // Tests for prefix literal extraction.
+ // ************************************************************************
+
+ // Elementary tests.
+ test_lit!(pfx_one_lit1, prefixes, "a", M("a"));
+ test_lit!(pfx_one_lit2, prefixes, "abc", M("abc"));
+ test_lit!(pfx_one_lit3, prefixes, "(?u)☃", M("\\xe2\\x98\\x83"));
+ #[cfg(feature = "unicode-case")]
+ test_lit!(pfx_one_lit4, prefixes, "(?ui)☃", M("\\xe2\\x98\\x83"));
+ test_lit!(pfx_class1, prefixes, "[1-4]", M("1"), M("2"), M("3"), M("4"));
+ test_lit!(
+ pfx_class2,
+ prefixes,
+ "(?u)[☃Ⅰ]",
+ M("\\xe2\\x85\\xa0"),
+ M("\\xe2\\x98\\x83")
+ );
+ #[cfg(feature = "unicode-case")]
+ test_lit!(
+ pfx_class3,
+ prefixes,
+ "(?ui)[☃Ⅰ]",
+ M("\\xe2\\x85\\xa0"),
+ M("\\xe2\\x85\\xb0"),
+ M("\\xe2\\x98\\x83")
+ );
+ test_lit!(pfx_one_lit_casei1, prefixes, "(?i-u)a", M("A"), M("a"));
+ test_lit!(
+ pfx_one_lit_casei2,
+ prefixes,
+ "(?i-u)abc",
+ M("ABC"),
+ M("aBC"),
+ M("AbC"),
+ M("abC"),
+ M("ABc"),
+ M("aBc"),
+ M("Abc"),
+ M("abc")
+ );
+ test_lit!(pfx_group1, prefixes, "(a)", M("a"));
+ test_lit!(pfx_rep_zero_or_one1, prefixes, "a?");
+ test_lit!(pfx_rep_zero_or_one2, prefixes, "(?:abc)?");
+ test_lit!(pfx_rep_zero_or_one_cat1, prefixes, "ab?", C("ab"), M("a"));
+ // FIXME: This should return [M("a"), M("ab")] because of the non-greedy
+ // repetition. As a work-around, we rewrite ab?? as ab*?, and thus we get
+ // a cut literal.
+ test_lit!(pfx_rep_zero_or_one_cat2, prefixes, "ab??", C("ab"), M("a"));
+ test_lit!(pfx_rep_zero_or_more1, prefixes, "a*");
+ test_lit!(pfx_rep_zero_or_more2, prefixes, "(?:abc)*");
+ test_lit!(pfx_rep_one_or_more1, prefixes, "a+", C("a"));
+ test_lit!(pfx_rep_one_or_more2, prefixes, "(?:abc)+", C("abc"));
+ test_lit!(pfx_rep_nested_one_or_more, prefixes, "(?:a+)+", C("a"));
+ test_lit!(pfx_rep_range1, prefixes, "a{0}");
+ test_lit!(pfx_rep_range2, prefixes, "a{0,}");
+ test_lit!(pfx_rep_range3, prefixes, "a{0,1}");
+ test_lit!(pfx_rep_range4, prefixes, "a{1}", M("a"));
+ test_lit!(pfx_rep_range5, prefixes, "a{2}", M("aa"));
+ test_lit!(pfx_rep_range6, prefixes, "a{1,2}", C("a"));
+ test_lit!(pfx_rep_range7, prefixes, "a{2,3}", C("aa"));
+
+ // Test regexes with concatenations.
+ test_lit!(pfx_cat1, prefixes, "(?:a)(?:b)", M("ab"));
+ test_lit!(pfx_cat2, prefixes, "[ab]z", M("az"), M("bz"));
+ test_lit!(
+ pfx_cat3,
+ prefixes,
+ "(?i-u)[ab]z",
+ M("AZ"),
+ M("BZ"),
+ M("aZ"),
+ M("bZ"),
+ M("Az"),
+ M("Bz"),
+ M("az"),
+ M("bz")
+ );
+ test_lit!(
+ pfx_cat4,
+ prefixes,
+ "[ab][yz]",
+ M("ay"),
+ M("by"),
+ M("az"),
+ M("bz")
+ );
+ test_lit!(pfx_cat5, prefixes, "a*b", C("a"), M("b"));
+ test_lit!(pfx_cat6, prefixes, "a*b*c", C("a"), C("b"), M("c"));
+ test_lit!(pfx_cat7, prefixes, "a*b*c+", C("a"), C("b"), C("c"));
+ test_lit!(pfx_cat8, prefixes, "a*b+c", C("a"), C("b"));
+ test_lit!(pfx_cat9, prefixes, "a*b+c*", C("a"), C("b"));
+ test_lit!(pfx_cat10, prefixes, "ab*", C("ab"), M("a"));
+ test_lit!(pfx_cat11, prefixes, "ab*c", C("ab"), M("ac"));
+ test_lit!(pfx_cat12, prefixes, "ab+", C("ab"));
+ test_lit!(pfx_cat13, prefixes, "ab+c", C("ab"));
+ test_lit!(pfx_cat14, prefixes, "a^", C("a"));
+ test_lit!(pfx_cat15, prefixes, "$a");
+ test_lit!(pfx_cat16, prefixes, r"ab*c", C("ab"), M("ac"));
+ test_lit!(pfx_cat17, prefixes, r"ab+c", C("ab"));
+ test_lit!(pfx_cat18, prefixes, r"z*azb", C("z"), M("azb"));
+ test_lit!(pfx_cat19, prefixes, "a.z", C("a"));
+
+ // Test regexes with alternations.
+ test_lit!(pfx_alt1, prefixes, "a|b", M("a"), M("b"));
+ test_lit!(pfx_alt2, prefixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
+ test_lit!(pfx_alt3, prefixes, "y(?:a|b)z", M("yaz"), M("ybz"));
+ test_lit!(pfx_alt4, prefixes, "a|b*");
+ test_lit!(pfx_alt5, prefixes, "a|b+", M("a"), C("b"));
+ test_lit!(pfx_alt6, prefixes, "a|(?:b|c*)");
+ test_lit!(
+ pfx_alt7,
+ prefixes,
+ "(a|b)*c|(a|ab)*c",
+ C("a"),
+ C("b"),
+ M("c"),
+ C("a"),
+ C("ab"),
+ M("c")
+ );
+ test_lit!(pfx_alt8, prefixes, "a*b|c", C("a"), M("b"), M("c"));
+
+ // Test regexes with empty assertions.
+ test_lit!(pfx_empty1, prefixes, "^a", M("a"));
+ test_lit!(pfx_empty2, prefixes, "a${2}", C("a"));
+ test_lit!(pfx_empty3, prefixes, "^abc", M("abc"));
+ test_lit!(pfx_empty4, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));
+
+ // Make sure some curious regexes have no prefixes.
+ test_lit!(pfx_nothing1, prefixes, ".");
+ test_lit!(pfx_nothing2, prefixes, "(?s).");
+ test_lit!(pfx_nothing3, prefixes, "^");
+ test_lit!(pfx_nothing4, prefixes, "$");
+ test_lit!(pfx_nothing6, prefixes, "(?m)$");
+ test_lit!(pfx_nothing7, prefixes, r"\b");
+ test_lit!(pfx_nothing8, prefixes, r"\B");
+
+ // Test a few regexes that defeat any prefix literal detection.
+ test_lit!(pfx_defeated1, prefixes, ".a");
+ test_lit!(pfx_defeated2, prefixes, "(?s).a");
+ test_lit!(pfx_defeated3, prefixes, "a*b*c*");
+ test_lit!(pfx_defeated4, prefixes, "a|.");
+ test_lit!(pfx_defeated5, prefixes, ".|a");
+ test_lit!(pfx_defeated6, prefixes, "a|^");
+ test_lit!(pfx_defeated7, prefixes, ".(?:a(?:b)(?:c))");
+ test_lit!(pfx_defeated8, prefixes, "$a");
+ test_lit!(pfx_defeated9, prefixes, "(?m)$a");
+ test_lit!(pfx_defeated10, prefixes, r"\ba");
+ test_lit!(pfx_defeated11, prefixes, r"\Ba");
+ test_lit!(pfx_defeated12, prefixes, "^*a");
+ test_lit!(pfx_defeated13, prefixes, "^+a");
+
+ test_lit!(
+ pfx_crazy1,
+ prefixes,
+ r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+ C("Mo\\'"),
+ C("Mu\\'"),
+ C("Moam"),
+ C("Muam")
+ );
+
+ // ************************************************************************
+ // Tests for quiting prefix literal search.
+ // ************************************************************************
+
+ macro_rules! test_exhausted {
+ ($name:ident, $which:ident, $re:expr) => {
+ test_exhausted!($name, $which, $re,);
+ };
+ ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
+ #[test]
+ fn $name() {
+ let expr = ParserBuilder::new()
+ .build()
+ .parse($re)
+ .unwrap();
+ let mut lits = Literals::empty();
+ lits.set_limit_size(20).set_limit_class(10);
+ $which(&mut lits, &expr);
+ assert_lit_eq!(Unicode, lits, $($lit),*);
+
+ let expr = ParserBuilder::new()
+ .allow_invalid_utf8(true)
+ .unicode(false)
+ .build()
+ .parse($re)
+ .unwrap();
+ let mut lits = Literals::empty();
+ lits.set_limit_size(20).set_limit_class(10);
+ $which(&mut lits, &expr);
+ assert_lit_eq!(Bytes, lits, $($lit),*);
+ }
+ };
+ }
+
+ // These test use a much lower limit than the default so that we can
+ // write test cases of reasonable size.
+ test_exhausted!(pfx_exhausted1, prefixes, "[a-z]");
+ test_exhausted!(pfx_exhausted2, prefixes, "[a-z]*A");
+ test_exhausted!(pfx_exhausted3, prefixes, "A[a-z]Z", C("A"));
+ test_exhausted!(
+ pfx_exhausted4,
+ prefixes,
+ "(?i-u)foobar",
+ C("FO"),
+ C("fO"),
+ C("Fo"),
+ C("fo")
+ );
+ test_exhausted!(
+ pfx_exhausted5,
+ prefixes,
+ "(?:ab){100}",
+ C("abababababababababab")
+ );
+ test_exhausted!(
+ pfx_exhausted6,
+ prefixes,
+ "(?:(?:ab){100})*cd",
+ C("ababababab"),
+ M("cd")
+ );
+ test_exhausted!(
+ pfx_exhausted7,
+ prefixes,
+ "z(?:(?:ab){100})*cd",
+ C("zababababab"),
+ M("zcd")
+ );
+ test_exhausted!(
+ pfx_exhausted8,
+ prefixes,
+ "aaaaaaaaaaaaaaaaaaaaz",
+ C("aaaaaaaaaaaaaaaaaaaa")
+ );
+
+ // ************************************************************************
+ // Tests for suffix literal extraction.
+ // ************************************************************************
+
+ // Elementary tests.
+ test_lit!(sfx_one_lit1, suffixes, "a", M("a"));
+ test_lit!(sfx_one_lit2, suffixes, "abc", M("abc"));
+ test_lit!(sfx_one_lit3, suffixes, "(?u)☃", M("\\xe2\\x98\\x83"));
+ #[cfg(feature = "unicode-case")]
+ test_lit!(sfx_one_lit4, suffixes, "(?ui)☃", M("\\xe2\\x98\\x83"));
+ test_lit!(sfx_class1, suffixes, "[1-4]", M("1"), M("2"), M("3"), M("4"));
+ test_lit!(
+ sfx_class2,
+ suffixes,
+ "(?u)[☃Ⅰ]",
+ M("\\xe2\\x85\\xa0"),
+ M("\\xe2\\x98\\x83")
+ );
+ #[cfg(feature = "unicode-case")]
+ test_lit!(
+ sfx_class3,
+ suffixes,
+ "(?ui)[☃Ⅰ]",
+ M("\\xe2\\x85\\xa0"),
+ M("\\xe2\\x85\\xb0"),
+ M("\\xe2\\x98\\x83")
+ );
+ test_lit!(sfx_one_lit_casei1, suffixes, "(?i-u)a", M("A"), M("a"));
+ test_lit!(
+ sfx_one_lit_casei2,
+ suffixes,
+ "(?i-u)abc",
+ M("ABC"),
+ M("ABc"),
+ M("AbC"),
+ M("Abc"),
+ M("aBC"),
+ M("aBc"),
+ M("abC"),
+ M("abc")
+ );
+ test_lit!(sfx_group1, suffixes, "(a)", M("a"));
+ test_lit!(sfx_rep_zero_or_one1, suffixes, "a?");
+ test_lit!(sfx_rep_zero_or_one2, suffixes, "(?:abc)?");
+ test_lit!(sfx_rep_zero_or_more1, suffixes, "a*");
+ test_lit!(sfx_rep_zero_or_more2, suffixes, "(?:abc)*");
+ test_lit!(sfx_rep_one_or_more1, suffixes, "a+", C("a"));
+ test_lit!(sfx_rep_one_or_more2, suffixes, "(?:abc)+", C("abc"));
+ test_lit!(sfx_rep_nested_one_or_more, suffixes, "(?:a+)+", C("a"));
+ test_lit!(sfx_rep_range1, suffixes, "a{0}");
+ test_lit!(sfx_rep_range2, suffixes, "a{0,}");
+ test_lit!(sfx_rep_range3, suffixes, "a{0,1}");
+ test_lit!(sfx_rep_range4, suffixes, "a{1}", M("a"));
+ test_lit!(sfx_rep_range5, suffixes, "a{2}", M("aa"));
+ test_lit!(sfx_rep_range6, suffixes, "a{1,2}", C("a"));
+ test_lit!(sfx_rep_range7, suffixes, "a{2,3}", C("aa"));
+
+ // Test regexes with concatenations.
+ test_lit!(sfx_cat1, suffixes, "(?:a)(?:b)", M("ab"));
+ test_lit!(sfx_cat2, suffixes, "[ab]z", M("az"), M("bz"));
+ test_lit!(
+ sfx_cat3,
+ suffixes,
+ "(?i-u)[ab]z",
+ M("AZ"),
+ M("Az"),
+ M("BZ"),
+ M("Bz"),
+ M("aZ"),
+ M("az"),
+ M("bZ"),
+ M("bz")
+ );
+ test_lit!(
+ sfx_cat4,
+ suffixes,
+ "[ab][yz]",
+ M("ay"),
+ M("az"),
+ M("by"),
+ M("bz")
+ );
+ test_lit!(sfx_cat5, suffixes, "a*b", C("ab"), M("b"));
+ test_lit!(sfx_cat6, suffixes, "a*b*c", C("bc"), C("ac"), M("c"));
+ test_lit!(sfx_cat7, suffixes, "a*b*c+", C("c"));
+ test_lit!(sfx_cat8, suffixes, "a*b+c", C("bc"));
+ test_lit!(sfx_cat9, suffixes, "a*b+c*", C("c"), C("b"));
+ test_lit!(sfx_cat10, suffixes, "ab*", C("b"), M("a"));
+ test_lit!(sfx_cat11, suffixes, "ab*c", C("bc"), M("ac"));
+ test_lit!(sfx_cat12, suffixes, "ab+", C("b"));
+ test_lit!(sfx_cat13, suffixes, "ab+c", C("bc"));
+ test_lit!(sfx_cat14, suffixes, "a^");
+ test_lit!(sfx_cat15, suffixes, "$a", C("a"));
+ test_lit!(sfx_cat16, suffixes, r"ab*c", C("bc"), M("ac"));
+ test_lit!(sfx_cat17, suffixes, r"ab+c", C("bc"));
+ test_lit!(sfx_cat18, suffixes, r"z*azb", C("zazb"), M("azb"));
+ test_lit!(sfx_cat19, suffixes, "a.z", C("z"));
+
+ // Test regexes with alternations.
+ test_lit!(sfx_alt1, suffixes, "a|b", M("a"), M("b"));
+ test_lit!(sfx_alt2, suffixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
+ test_lit!(sfx_alt3, suffixes, "y(?:a|b)z", M("yaz"), M("ybz"));
+ test_lit!(sfx_alt4, suffixes, "a|b*");
+ test_lit!(sfx_alt5, suffixes, "a|b+", M("a"), C("b"));
+ test_lit!(sfx_alt6, suffixes, "a|(?:b|c*)");
+ test_lit!(
+ sfx_alt7,
+ suffixes,
+ "(a|b)*c|(a|ab)*c",
+ C("ac"),
+ C("bc"),
+ M("c"),
+ C("ac"),
+ C("abc"),
+ M("c")
+ );
+ test_lit!(sfx_alt8, suffixes, "a*b|c", C("ab"), M("b"), M("c"));
+
+ // Test regexes with empty assertions.
+ test_lit!(sfx_empty1, suffixes, "a$", M("a"));
+ test_lit!(sfx_empty2, suffixes, "${2}a", C("a"));
+
+ // Make sure some curious regexes have no suffixes.
+ test_lit!(sfx_nothing1, suffixes, ".");
+ test_lit!(sfx_nothing2, suffixes, "(?s).");
+ test_lit!(sfx_nothing3, suffixes, "^");
+ test_lit!(sfx_nothing4, suffixes, "$");
+ test_lit!(sfx_nothing6, suffixes, "(?m)$");
+ test_lit!(sfx_nothing7, suffixes, r"\b");
+ test_lit!(sfx_nothing8, suffixes, r"\B");
+
+ // Test a few regexes that defeat any suffix literal detection.
+ test_lit!(sfx_defeated1, suffixes, "a.");
+ test_lit!(sfx_defeated2, suffixes, "(?s)a.");
+ test_lit!(sfx_defeated3, suffixes, "a*b*c*");
+ test_lit!(sfx_defeated4, suffixes, "a|.");
+ test_lit!(sfx_defeated5, suffixes, ".|a");
+ test_lit!(sfx_defeated6, suffixes, "a|^");
+ test_lit!(sfx_defeated7, suffixes, "(?:a(?:b)(?:c)).");
+ test_lit!(sfx_defeated8, suffixes, "a^");
+ test_lit!(sfx_defeated9, suffixes, "(?m)a$");
+ test_lit!(sfx_defeated10, suffixes, r"a\b");
+ test_lit!(sfx_defeated11, suffixes, r"a\B");
+ test_lit!(sfx_defeated12, suffixes, "a^*");
+ test_lit!(sfx_defeated13, suffixes, "a^+");
+
+ // These test use a much lower limit than the default so that we can
+ // write test cases of reasonable size.
+ test_exhausted!(sfx_exhausted1, suffixes, "[a-z]");
+ test_exhausted!(sfx_exhausted2, suffixes, "A[a-z]*");
+ test_exhausted!(sfx_exhausted3, suffixes, "A[a-z]Z", C("Z"));
+ test_exhausted!(
+ sfx_exhausted4,
+ suffixes,
+ "(?i-u)foobar",
+ C("AR"),
+ C("Ar"),
+ C("aR"),
+ C("ar")
+ );
+ test_exhausted!(
+ sfx_exhausted5,
+ suffixes,
+ "(?:ab){100}",
+ C("abababababababababab")
+ );
+ test_exhausted!(
+ sfx_exhausted6,
+ suffixes,
+ "cd(?:(?:ab){100})*",
+ C("ababababab"),
+ M("cd")
+ );
+ test_exhausted!(
+ sfx_exhausted7,
+ suffixes,
+ "cd(?:(?:ab){100})*z",
+ C("abababababz"),
+ M("cdz")
+ );
+ test_exhausted!(
+ sfx_exhausted8,
+ suffixes,
+ "zaaaaaaaaaaaaaaaaaaaa",
+ C("aaaaaaaaaaaaaaaaaaaa")
+ );
+
+ // ************************************************************************
+ // Tests for generating unambiguous literal sets.
+ // ************************************************************************
+
+ macro_rules! test_unamb {
+ ($name:ident, $given:expr, $expected:expr) => {
+ #[test]
+ fn $name() {
+ let given: Vec<Literal> = $given
+ .into_iter()
+ .map(|ul| {
+ let cut = ul.is_cut();
+ Literal { v: ul.v.into_bytes(), cut: cut }
+ })
+ .collect();
+ let lits = create_lits(given);
+ let got = lits.unambiguous_prefixes();
+ assert_eq!($expected, escape_lits(got.literals()));
+ }
+ };
+ }
+
+ test_unamb!(unambiguous1, vec![M("z"), M("azb")], vec![C("a"), C("z")]);
+ test_unamb!(
+ unambiguous2,
+ vec![M("zaaaaaa"), M("aa")],
+ vec![C("aa"), C("z")]
+ );
+ test_unamb!(
+ unambiguous3,
+ vec![M("Sherlock"), M("Watson")],
+ vec![M("Sherlock"), M("Watson")]
+ );
+ test_unamb!(unambiguous4, vec![M("abc"), M("bc")], vec![C("a"), C("bc")]);
+ test_unamb!(unambiguous5, vec![M("bc"), M("abc")], vec![C("a"), C("bc")]);
+ test_unamb!(unambiguous6, vec![M("a"), M("aa")], vec![C("a")]);
+ test_unamb!(unambiguous7, vec![M("aa"), M("a")], vec![C("a")]);
+ test_unamb!(unambiguous8, vec![M("ab"), M("a")], vec![C("a")]);
+ test_unamb!(
+ unambiguous9,
+ vec![M("ac"), M("bc"), M("c"), M("ac"), M("abc"), M("c")],
+ vec![C("a"), C("b"), C("c")]
+ );
+ test_unamb!(
+ unambiguous10,
+ vec![M("Mo'"), M("Mu'"), M("Mo"), M("Mu")],
+ vec![C("Mo"), C("Mu")]
+ );
+ test_unamb!(
+ unambiguous11,
+ vec![M("zazb"), M("azb")],
+ vec![C("a"), C("z")]
+ );
+ test_unamb!(unambiguous12, vec![M("foo"), C("foo")], vec![C("foo")]);
+ test_unamb!(
+ unambiguous13,
+ vec![M("ABCX"), M("CDAX"), M("BCX")],
+ vec![C("A"), C("BCX"), C("CD")]
+ );
+ test_unamb!(
+ unambiguous14,
+ vec![M("IMGX"), M("MVIX"), M("MGX"), M("DSX")],
+ vec![M("DSX"), C("I"), C("MGX"), C("MV")]
+ );
+ test_unamb!(
+ unambiguous15,
+ vec![M("IMG_"), M("MG_"), M("CIMG")],
+ vec![C("C"), C("I"), C("MG_")]
+ );
+
+ // ************************************************************************
+ // Tests for suffix trimming.
+ // ************************************************************************
+ macro_rules! test_trim {
+ ($name:ident, $trim:expr, $given:expr, $expected:expr) => {
+ #[test]
+ fn $name() {
+ let given: Vec<Literal> = $given
+ .into_iter()
+ .map(|ul| {
+ let cut = ul.is_cut();
+ Literal { v: ul.v.into_bytes(), cut: cut }
+ })
+ .collect();
+ let lits = create_lits(given);
+ let got = lits.trim_suffix($trim).unwrap();
+ assert_eq!($expected, escape_lits(got.literals()));
+ }
+ };
+ }
+
+ test_trim!(trim1, 1, vec![M("ab"), M("yz")], vec![C("a"), C("y")]);
+ test_trim!(trim2, 1, vec![M("abc"), M("abd")], vec![C("ab")]);
+ test_trim!(trim3, 2, vec![M("abc"), M("abd")], vec![C("a")]);
+ test_trim!(trim4, 2, vec![M("abc"), M("ghij")], vec![C("a"), C("gh")]);
+
+ // ************************************************************************
+ // Tests for longest common prefix.
+ // ************************************************************************
+
+ macro_rules! test_lcp {
+ ($name:ident, $given:expr, $expected:expr) => {
+ #[test]
+ fn $name() {
+ let given: Vec<Literal> = $given
+ .into_iter()
+ .map(|s: &str| Literal {
+ v: s.to_owned().into_bytes(),
+ cut: false,
+ })
+ .collect();
+ let lits = create_lits(given);
+ let got = lits.longest_common_prefix();
+ assert_eq!($expected, escape_bytes(got));
+ }
+ };
+ }
+
+ test_lcp!(lcp1, vec!["a"], "a");
+ test_lcp!(lcp2, vec![], "");
+ test_lcp!(lcp3, vec!["a", "b"], "");
+ test_lcp!(lcp4, vec!["ab", "ab"], "ab");
+ test_lcp!(lcp5, vec!["ab", "a"], "a");
+ test_lcp!(lcp6, vec!["a", "ab"], "a");
+ test_lcp!(lcp7, vec!["ab", "b"], "");
+ test_lcp!(lcp8, vec!["b", "ab"], "");
+ test_lcp!(lcp9, vec!["foobar", "foobaz"], "fooba");
+ test_lcp!(lcp10, vec!["foobar", "foobaz", "a"], "");
+ test_lcp!(lcp11, vec!["a", "foobar", "foobaz"], "");
+ test_lcp!(lcp12, vec!["foo", "flub", "flab", "floo"], "f");
+
+ // ************************************************************************
+ // Tests for longest common suffix.
+ // ************************************************************************
+
+ macro_rules! test_lcs {
+ ($name:ident, $given:expr, $expected:expr) => {
+ #[test]
+ fn $name() {
+ let given: Vec<Literal> = $given
+ .into_iter()
+ .map(|s: &str| Literal {
+ v: s.to_owned().into_bytes(),
+ cut: false,
+ })
+ .collect();
+ let lits = create_lits(given);
+ let got = lits.longest_common_suffix();
+ assert_eq!($expected, escape_bytes(got));
+ }
+ };
+ }
+
+ test_lcs!(lcs1, vec!["a"], "a");
+ test_lcs!(lcs2, vec![], "");
+ test_lcs!(lcs3, vec!["a", "b"], "");
+ test_lcs!(lcs4, vec!["ab", "ab"], "ab");
+ test_lcs!(lcs5, vec!["ab", "a"], "");
+ test_lcs!(lcs6, vec!["a", "ab"], "");
+ test_lcs!(lcs7, vec!["ab", "b"], "b");
+ test_lcs!(lcs8, vec!["b", "ab"], "b");
+ test_lcs!(lcs9, vec!["barfoo", "bazfoo"], "foo");
+ test_lcs!(lcs10, vec!["barfoo", "bazfoo", "a"], "");
+ test_lcs!(lcs11, vec!["a", "barfoo", "bazfoo"], "");
+ test_lcs!(lcs12, vec!["flub", "bub", "boob", "dub"], "b");
+}
diff --git a/vendor/regex-syntax/src/hir/mod.rs b/vendor/regex-syntax/src/hir/mod.rs
new file mode 100644
index 000000000..f5cf992e5
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/mod.rs
@@ -0,0 +1,2296 @@
+/*!
+Defines a high-level intermediate representation for regular expressions.
+*/
+use std::char;
+use std::cmp;
+use std::error;
+use std::fmt;
+use std::result;
+use std::u8;
+
+use crate::ast::Span;
+use crate::hir::interval::{Interval, IntervalSet, IntervalSetIter};
+use crate::unicode;
+
+pub use crate::hir::visitor::{visit, Visitor};
+pub use crate::unicode::CaseFoldError;
+
+mod interval;
+pub mod literal;
+pub mod print;
+pub mod translate;
+mod visitor;
+
+/// An error that can occur while translating an `Ast` to a `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+ /// The kind of error.
+ kind: ErrorKind,
+ /// The original pattern that the translator's Ast was parsed from. Every
+ /// span in an error is a valid range into this string.
+ pattern: String,
+ /// The span of this error, derived from the Ast given to the translator.
+ span: Span,
+}
+
+impl Error {
+ /// Return the type of this error.
+ pub fn kind(&self) -> &ErrorKind {
+ &self.kind
+ }
+
+ /// The original pattern string in which this error occurred.
+ ///
+ /// Every span reported by this error is reported in terms of this string.
+ pub fn pattern(&self) -> &str {
+ &self.pattern
+ }
+
+ /// Return the span at which this error occurred.
+ pub fn span(&self) -> &Span {
+ &self.span
+ }
+}
+
+/// The type of an error that occurred while building an `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+ /// This error occurs when a Unicode feature is used when Unicode
+ /// support is disabled. For example `(?-u:\pL)` would trigger this error.
+ UnicodeNotAllowed,
+ /// This error occurs when translating a pattern that could match a byte
+ /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled.
+ InvalidUtf8,
+ /// This occurs when an unrecognized Unicode property name could not
+ /// be found.
+ UnicodePropertyNotFound,
+ /// This occurs when an unrecognized Unicode property value could not
+ /// be found.
+ UnicodePropertyValueNotFound,
+ /// This occurs when a Unicode-aware Perl character class (`\w`, `\s` or
+ /// `\d`) could not be found. This can occur when the `unicode-perl`
+ /// crate feature is not enabled.
+ UnicodePerlClassNotFound,
+ /// This occurs when the Unicode simple case mapping tables are not
+ /// available, and the regular expression required Unicode aware case
+ /// insensitivity.
+ UnicodeCaseUnavailable,
+ /// This occurs when the translator attempts to construct a character class
+ /// that is empty.
+ ///
+ /// Note that this restriction in the translator may be removed in the
+ /// future.
+ EmptyClassNotAllowed,
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl ErrorKind {
+ // TODO: Remove this method entirely on the next breaking semver release.
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ use self::ErrorKind::*;
+ match *self {
+ UnicodeNotAllowed => "Unicode not allowed here",
+ InvalidUtf8 => "pattern can match invalid UTF-8",
+ UnicodePropertyNotFound => "Unicode property not found",
+ UnicodePropertyValueNotFound => "Unicode property value not found",
+ UnicodePerlClassNotFound => {
+ "Unicode-aware Perl class not found \
+ (make sure the unicode-perl feature is enabled)"
+ }
+ UnicodeCaseUnavailable => {
+ "Unicode-aware case insensitivity matching is not available \
+ (make sure the unicode-case feature is enabled)"
+ }
+ EmptyClassNotAllowed => "empty character classes are not allowed",
+ __Nonexhaustive => unreachable!(),
+ }
+ }
+}
+
+impl error::Error for Error {
+ // TODO: Remove this method entirely on the next breaking semver release.
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ self.kind.description()
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ crate::error::Formatter::from(self).fmt(f)
+ }
+}
+
+impl fmt::Display for ErrorKind {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // TODO: Remove this on the next breaking semver release.
+ #[allow(deprecated)]
+ f.write_str(self.description())
+ }
+}
+
+/// A high-level intermediate representation (HIR) for a regular expression.
+///
+/// The HIR of a regular expression represents an intermediate step between its
+/// abstract syntax (a structured description of the concrete syntax) and
+/// compiled byte codes. The purpose of HIR is to make regular expressions
+/// easier to analyze. In particular, the AST is much more complex than the
+/// HIR. For example, while an AST supports arbitrarily nested character
+/// classes, the HIR will flatten all nested classes into a single set. The HIR
+/// will also "compile away" every flag present in the concrete syntax. For
+/// example, users of HIR expressions never need to worry about case folding;
+/// it is handled automatically by the translator (e.g., by translating `(?i)A`
+/// to `[aA]`).
+///
+/// If the HIR was produced by a translator that disallows invalid UTF-8, then
+/// the HIR is guaranteed to match UTF-8 exclusively.
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the HIR.
+///
+/// The specific type of an HIR expression can be accessed via its `kind`
+/// or `into_kind` methods. This extra level of indirection exists for two
+/// reasons:
+///
+/// 1. Construction of an HIR expression *must* use the constructor methods
+/// on this `Hir` type instead of building the `HirKind` values directly.
+/// This permits construction to enforce invariants like "concatenations
+/// always consist of two or more sub-expressions."
+/// 2. Every HIR expression contains attributes that are defined inductively,
+/// and can be computed cheaply during the construction process. For
+/// example, one such attribute is whether the expression must match at the
+/// beginning of the text.
+///
+/// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
+/// expression pattern string, and uses constant stack space and heap space
+/// proportional to the size of the `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Hir {
+ /// The underlying HIR kind.
+ kind: HirKind,
+ /// Analysis info about this HIR, computed during construction.
+ info: HirInfo,
+}
+
+/// The kind of an arbitrary `Hir` expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HirKind {
+ /// The empty regular expression, which matches everything, including the
+ /// empty string.
+ Empty,
+ /// A single literal character that matches exactly this character.
+ Literal(Literal),
+ /// A single character class that matches any of the characters in the
+ /// class. A class can either consist of Unicode scalar values as
+ /// characters, or it can use bytes.
+ Class(Class),
+ /// An anchor assertion. An anchor assertion match always has zero length.
+ Anchor(Anchor),
+ /// A word boundary assertion, which may or may not be Unicode aware. A
+ /// word boundary assertion match always has zero length.
+ WordBoundary(WordBoundary),
+ /// A repetition operation applied to a child expression.
+ Repetition(Repetition),
+ /// A possibly capturing group, which contains a child expression.
+ Group(Group),
+ /// A concatenation of expressions. A concatenation always has at least two
+ /// child expressions.
+ ///
+ /// A concatenation matches only if each of its child expression matches
+ /// one after the other.
+ Concat(Vec<Hir>),
+ /// An alternation of expressions. An alternation always has at least two
+ /// child expressions.
+ ///
+ /// An alternation matches only if at least one of its child expression
+ /// matches. If multiple expressions match, then the leftmost is preferred.
+ Alternation(Vec<Hir>),
+}
+
+impl Hir {
+ /// Returns a reference to the underlying HIR kind.
+ pub fn kind(&self) -> &HirKind {
+ &self.kind
+ }
+
+ /// Consumes ownership of this HIR expression and returns its underlying
+ /// `HirKind`.
+ pub fn into_kind(mut self) -> HirKind {
+ use std::mem;
+ mem::replace(&mut self.kind, HirKind::Empty)
+ }
+
+ /// Returns an empty HIR expression.
+ ///
+ /// An empty HIR expression always matches, including the empty string.
+ pub fn empty() -> Hir {
+ let mut info = HirInfo::new();
+ info.set_always_utf8(true);
+ info.set_all_assertions(true);
+ info.set_anchored_start(false);
+ info.set_anchored_end(false);
+ info.set_line_anchored_start(false);
+ info.set_line_anchored_end(false);
+ info.set_any_anchored_start(false);
+ info.set_any_anchored_end(false);
+ info.set_match_empty(true);
+ info.set_literal(false);
+ info.set_alternation_literal(false);
+ Hir { kind: HirKind::Empty, info: info }
+ }
+
+ /// Creates a literal HIR expression.
+ ///
+ /// If the given literal has a `Byte` variant with an ASCII byte, then this
+ /// method panics. This enforces the invariant that `Byte` variants are
+ /// only used to express matching of invalid UTF-8.
+ pub fn literal(lit: Literal) -> Hir {
+ if let Literal::Byte(b) = lit {
+ assert!(b > 0x7F);
+ }
+
+ let mut info = HirInfo::new();
+ info.set_always_utf8(lit.is_unicode());
+ info.set_all_assertions(false);
+ info.set_anchored_start(false);
+ info.set_anchored_end(false);
+ info.set_line_anchored_start(false);
+ info.set_line_anchored_end(false);
+ info.set_any_anchored_start(false);
+ info.set_any_anchored_end(false);
+ info.set_match_empty(false);
+ info.set_literal(true);
+ info.set_alternation_literal(true);
+ Hir { kind: HirKind::Literal(lit), info: info }
+ }
+
+ /// Creates a class HIR expression.
+ pub fn class(class: Class) -> Hir {
+ let mut info = HirInfo::new();
+ info.set_always_utf8(class.is_always_utf8());
+ info.set_all_assertions(false);
+ info.set_anchored_start(false);
+ info.set_anchored_end(false);
+ info.set_line_anchored_start(false);
+ info.set_line_anchored_end(false);
+ info.set_any_anchored_start(false);
+ info.set_any_anchored_end(false);
+ info.set_match_empty(false);
+ info.set_literal(false);
+ info.set_alternation_literal(false);
+ Hir { kind: HirKind::Class(class), info: info }
+ }
+
+ /// Creates an anchor assertion HIR expression.
+ pub fn anchor(anchor: Anchor) -> Hir {
+ let mut info = HirInfo::new();
+ info.set_always_utf8(true);
+ info.set_all_assertions(true);
+ info.set_anchored_start(false);
+ info.set_anchored_end(false);
+ info.set_line_anchored_start(false);
+ info.set_line_anchored_end(false);
+ info.set_any_anchored_start(false);
+ info.set_any_anchored_end(false);
+ info.set_match_empty(true);
+ info.set_literal(false);
+ info.set_alternation_literal(false);
+ if let Anchor::StartText = anchor {
+ info.set_anchored_start(true);
+ info.set_line_anchored_start(true);
+ info.set_any_anchored_start(true);
+ }
+ if let Anchor::EndText = anchor {
+ info.set_anchored_end(true);
+ info.set_line_anchored_end(true);
+ info.set_any_anchored_end(true);
+ }
+ if let Anchor::StartLine = anchor {
+ info.set_line_anchored_start(true);
+ }
+ if let Anchor::EndLine = anchor {
+ info.set_line_anchored_end(true);
+ }
+ Hir { kind: HirKind::Anchor(anchor), info: info }
+ }
+
+ /// Creates a word boundary assertion HIR expression.
+ pub fn word_boundary(word_boundary: WordBoundary) -> Hir {
+ let mut info = HirInfo::new();
+ info.set_always_utf8(true);
+ info.set_all_assertions(true);
+ info.set_anchored_start(false);
+ info.set_anchored_end(false);
+ info.set_line_anchored_start(false);
+ info.set_line_anchored_end(false);
+ info.set_any_anchored_start(false);
+ info.set_any_anchored_end(false);
+ info.set_literal(false);
+ info.set_alternation_literal(false);
+ // A negated word boundary matches '', so that's fine. But \b does not
+ // match \b, so why do we say it can match the empty string? Well,
+ // because, if you search for \b against 'a', it will report [0, 0) and
+ // [1, 1) as matches, and both of those matches correspond to the empty
+ // string. Thus, only *certain* empty strings match \b, which similarly
+ // applies to \B.
+ info.set_match_empty(true);
+ // Negated ASCII word boundaries can match invalid UTF-8.
+ if let WordBoundary::AsciiNegate = word_boundary {
+ info.set_always_utf8(false);
+ }
+ Hir { kind: HirKind::WordBoundary(word_boundary), info: info }
+ }
+
+ /// Creates a repetition HIR expression.
+ pub fn repetition(rep: Repetition) -> Hir {
+ let mut info = HirInfo::new();
+ info.set_always_utf8(rep.hir.is_always_utf8());
+ info.set_all_assertions(rep.hir.is_all_assertions());
+ // If this operator can match the empty string, then it can never
+ // be anchored.
+ info.set_anchored_start(
+ !rep.is_match_empty() && rep.hir.is_anchored_start(),
+ );
+ info.set_anchored_end(
+ !rep.is_match_empty() && rep.hir.is_anchored_end(),
+ );
+ info.set_line_anchored_start(
+ !rep.is_match_empty() && rep.hir.is_anchored_start(),
+ );
+ info.set_line_anchored_end(
+ !rep.is_match_empty() && rep.hir.is_anchored_end(),
+ );
+ info.set_any_anchored_start(rep.hir.is_any_anchored_start());
+ info.set_any_anchored_end(rep.hir.is_any_anchored_end());
+ info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty());
+ info.set_literal(false);
+ info.set_alternation_literal(false);
+ Hir { kind: HirKind::Repetition(rep), info: info }
+ }
+
+ /// Creates a group HIR expression.
+ pub fn group(group: Group) -> Hir {
+ let mut info = HirInfo::new();
+ info.set_always_utf8(group.hir.is_always_utf8());
+ info.set_all_assertions(group.hir.is_all_assertions());
+ info.set_anchored_start(group.hir.is_anchored_start());
+ info.set_anchored_end(group.hir.is_anchored_end());
+ info.set_line_anchored_start(group.hir.is_line_anchored_start());
+ info.set_line_anchored_end(group.hir.is_line_anchored_end());
+ info.set_any_anchored_start(group.hir.is_any_anchored_start());
+ info.set_any_anchored_end(group.hir.is_any_anchored_end());
+ info.set_match_empty(group.hir.is_match_empty());
+ info.set_literal(false);
+ info.set_alternation_literal(false);
+ Hir { kind: HirKind::Group(group), info: info }
+ }
+
+ /// Returns the concatenation of the given expressions.
+ ///
+ /// This flattens the concatenation as appropriate.
+ pub fn concat(mut exprs: Vec<Hir>) -> Hir {
+ match exprs.len() {
+ 0 => Hir::empty(),
+ 1 => exprs.pop().unwrap(),
+ _ => {
+ let mut info = HirInfo::new();
+ info.set_always_utf8(true);
+ info.set_all_assertions(true);
+ info.set_any_anchored_start(false);
+ info.set_any_anchored_end(false);
+ info.set_match_empty(true);
+ info.set_literal(true);
+ info.set_alternation_literal(true);
+
+ // Some attributes require analyzing all sub-expressions.
+ for e in &exprs {
+ let x = info.is_always_utf8() && e.is_always_utf8();
+ info.set_always_utf8(x);
+
+ let x = info.is_all_assertions() && e.is_all_assertions();
+ info.set_all_assertions(x);
+
+ let x = info.is_any_anchored_start()
+ || e.is_any_anchored_start();
+ info.set_any_anchored_start(x);
+
+ let x =
+ info.is_any_anchored_end() || e.is_any_anchored_end();
+ info.set_any_anchored_end(x);
+
+ let x = info.is_match_empty() && e.is_match_empty();
+ info.set_match_empty(x);
+
+ let x = info.is_literal() && e.is_literal();
+ info.set_literal(x);
+
+ let x = info.is_alternation_literal()
+ && e.is_alternation_literal();
+ info.set_alternation_literal(x);
+ }
+ // Anchored attributes require something slightly more
+ // sophisticated. Normally, WLOG, to determine whether an
+ // expression is anchored to the start, we'd only need to check
+ // the first expression of a concatenation. However,
+ // expressions like `$\b^` are still anchored to the start,
+ // but the first expression in the concatenation *isn't*
+ // anchored to the start. So the "first" expression to look at
+ // is actually one that is either not an assertion or is
+ // specifically the StartText assertion.
+ info.set_anchored_start(
+ exprs
+ .iter()
+ .take_while(|e| {
+ e.is_anchored_start() || e.is_all_assertions()
+ })
+ .any(|e| e.is_anchored_start()),
+ );
+ // Similarly for the end anchor, but in reverse.
+ info.set_anchored_end(
+ exprs
+ .iter()
+ .rev()
+ .take_while(|e| {
+ e.is_anchored_end() || e.is_all_assertions()
+ })
+ .any(|e| e.is_anchored_end()),
+ );
+ // Repeat the process for line anchors.
+ info.set_line_anchored_start(
+ exprs
+ .iter()
+ .take_while(|e| {
+ e.is_line_anchored_start() || e.is_all_assertions()
+ })
+ .any(|e| e.is_line_anchored_start()),
+ );
+ info.set_line_anchored_end(
+ exprs
+ .iter()
+ .rev()
+ .take_while(|e| {
+ e.is_line_anchored_end() || e.is_all_assertions()
+ })
+ .any(|e| e.is_line_anchored_end()),
+ );
+ Hir { kind: HirKind::Concat(exprs), info: info }
+ }
+ }
+ }
+
+ /// Returns the alternation of the given expressions.
+ ///
+ /// This flattens the alternation as appropriate.
+ pub fn alternation(mut exprs: Vec<Hir>) -> Hir {
+ match exprs.len() {
+ 0 => Hir::empty(),
+ 1 => exprs.pop().unwrap(),
+ _ => {
+ let mut info = HirInfo::new();
+ info.set_always_utf8(true);
+ info.set_all_assertions(true);
+ info.set_anchored_start(true);
+ info.set_anchored_end(true);
+ info.set_line_anchored_start(true);
+ info.set_line_anchored_end(true);
+ info.set_any_anchored_start(false);
+ info.set_any_anchored_end(false);
+ info.set_match_empty(false);
+ info.set_literal(false);
+ info.set_alternation_literal(true);
+
+ // Some attributes require analyzing all sub-expressions.
+ for e in &exprs {
+ let x = info.is_always_utf8() && e.is_always_utf8();
+ info.set_always_utf8(x);
+
+ let x = info.is_all_assertions() && e.is_all_assertions();
+ info.set_all_assertions(x);
+
+ let x = info.is_anchored_start() && e.is_anchored_start();
+ info.set_anchored_start(x);
+
+ let x = info.is_anchored_end() && e.is_anchored_end();
+ info.set_anchored_end(x);
+
+ let x = info.is_line_anchored_start()
+ && e.is_line_anchored_start();
+ info.set_line_anchored_start(x);
+
+ let x = info.is_line_anchored_end()
+ && e.is_line_anchored_end();
+ info.set_line_anchored_end(x);
+
+ let x = info.is_any_anchored_start()
+ || e.is_any_anchored_start();
+ info.set_any_anchored_start(x);
+
+ let x =
+ info.is_any_anchored_end() || e.is_any_anchored_end();
+ info.set_any_anchored_end(x);
+
+ let x = info.is_match_empty() || e.is_match_empty();
+ info.set_match_empty(x);
+
+ let x = info.is_alternation_literal() && e.is_literal();
+ info.set_alternation_literal(x);
+ }
+ Hir { kind: HirKind::Alternation(exprs), info: info }
+ }
+ }
+ }
+
+ /// Build an HIR expression for `.`.
+ ///
+ /// A `.` expression matches any character except for `\n`. To build an
+ /// expression that matches any character, including `\n`, use the `any`
+ /// method.
+ ///
+ /// If `bytes` is `true`, then this assumes characters are limited to a
+ /// single byte.
+ pub fn dot(bytes: bool) -> Hir {
+ if bytes {
+ let mut cls = ClassBytes::empty();
+ cls.push(ClassBytesRange::new(b'\0', b'\x09'));
+ cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
+ Hir::class(Class::Bytes(cls))
+ } else {
+ let mut cls = ClassUnicode::empty();
+ cls.push(ClassUnicodeRange::new('\0', '\x09'));
+ cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
+ Hir::class(Class::Unicode(cls))
+ }
+ }
+
+ /// Build an HIR expression for `(?s).`.
+ ///
+ /// A `(?s).` expression matches any character, including `\n`. To build an
+ /// expression that matches any character except for `\n`, then use the
+ /// `dot` method.
+ ///
+ /// If `bytes` is `true`, then this assumes characters are limited to a
+ /// single byte.
+ pub fn any(bytes: bool) -> Hir {
+ if bytes {
+ let mut cls = ClassBytes::empty();
+ cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
+ Hir::class(Class::Bytes(cls))
+ } else {
+ let mut cls = ClassUnicode::empty();
+ cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
+ Hir::class(Class::Unicode(cls))
+ }
+ }
+
+ /// Return true if and only if this HIR will always match valid UTF-8.
+ ///
+ /// When this returns false, then it is possible for this HIR expression
+ /// to match invalid UTF-8.
+ pub fn is_always_utf8(&self) -> bool {
+ self.info.is_always_utf8()
+ }
+
+ /// Returns true if and only if this entire HIR expression is made up of
+ /// zero-width assertions.
+ ///
+ /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but
+ /// not `^a`.
+ pub fn is_all_assertions(&self) -> bool {
+ self.info.is_all_assertions()
+ }
+
+ /// Return true if and only if this HIR is required to match from the
+ /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`,
+ /// `^foo|^bar` but not `^foo|bar`.
+ pub fn is_anchored_start(&self) -> bool {
+ self.info.is_anchored_start()
+ }
+
+ /// Return true if and only if this HIR is required to match at the end
+ /// of text. This includes expressions like `foo$`, `(foo|bar)$`,
+ /// `foo$|bar$` but not `foo$|bar`.
+ pub fn is_anchored_end(&self) -> bool {
+ self.info.is_anchored_end()
+ }
+
+ /// Return true if and only if this HIR is required to match from the
+ /// beginning of text or the beginning of a line. This includes expressions
+ /// like `^foo`, `(?m)^foo`, `^(foo|bar)`, `^(foo|bar)`, `(?m)^foo|^bar`
+ /// but not `^foo|bar` or `(?m)^foo|bar`.
+ ///
+ /// Note that if `is_anchored_start` is `true`, then
+ /// `is_line_anchored_start` will also be `true`. The reverse implication
+ /// is not true. For example, `(?m)^foo` is line anchored, but not
+ /// `is_anchored_start`.
+ pub fn is_line_anchored_start(&self) -> bool {
+ self.info.is_line_anchored_start()
+ }
+
+ /// Return true if and only if this HIR is required to match at the
+ /// end of text or the end of a line. This includes expressions like
+ /// `foo$`, `(?m)foo$`, `(foo|bar)$`, `(?m)(foo|bar)$`, `foo$|bar$`,
+ /// `(?m)(foo|bar)$`, but not `foo$|bar` or `(?m)foo$|bar`.
+ ///
+ /// Note that if `is_anchored_end` is `true`, then
+ /// `is_line_anchored_end` will also be `true`. The reverse implication
+ /// is not true. For example, `(?m)foo$` is line anchored, but not
+ /// `is_anchored_end`.
+ pub fn is_line_anchored_end(&self) -> bool {
+ self.info.is_line_anchored_end()
+ }
+
+ /// Return true if and only if this HIR contains any sub-expression that
+ /// is required to match at the beginning of text. Specifically, this
+ /// returns true if the `^` symbol (when multiline mode is disabled) or the
+ /// `\A` escape appear anywhere in the regex.
+ pub fn is_any_anchored_start(&self) -> bool {
+ self.info.is_any_anchored_start()
+ }
+
+ /// Return true if and only if this HIR contains any sub-expression that is
+ /// required to match at the end of text. Specifically, this returns true
+ /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape
+ /// appear anywhere in the regex.
+ pub fn is_any_anchored_end(&self) -> bool {
+ self.info.is_any_anchored_end()
+ }
+
+ /// Return true if and only if the empty string is part of the language
+ /// matched by this regular expression.
+ ///
+ /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\b`
+ /// and `\B`, but not `a` or `a+`.
+ pub fn is_match_empty(&self) -> bool {
+ self.info.is_match_empty()
+ }
+
+ /// Return true if and only if this HIR is a simple literal. This is only
+ /// true when this HIR expression is either itself a `Literal` or a
+ /// concatenation of only `Literal`s.
+ ///
+ /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`,
+ /// `` are not (even though that contain sub-expressions that are literals).
+ pub fn is_literal(&self) -> bool {
+ self.info.is_literal()
+ }
+
+ /// Return true if and only if this HIR is either a simple literal or an
+ /// alternation of simple literals. This is only
+ /// true when this HIR expression is either itself a `Literal` or a
+ /// concatenation of only `Literal`s or an alternation of only `Literal`s.
+ ///
+ /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation
+ /// literals, but `f+`, `(foo)`, `foo()`, ``
+ /// are not (even though that contain sub-expressions that are literals).
+ pub fn is_alternation_literal(&self) -> bool {
+ self.info.is_alternation_literal()
+ }
+}
+
+impl HirKind {
+ /// Return true if and only if this HIR is the empty regular expression.
+ ///
+ /// Note that this is not defined inductively. That is, it only tests if
+ /// this kind is the `Empty` variant. To get the inductive definition,
+ /// use the `is_match_empty` method on [`Hir`](struct.Hir.html).
+ pub fn is_empty(&self) -> bool {
+ match *self {
+ HirKind::Empty => true,
+ _ => false,
+ }
+ }
+
+ /// Returns true if and only if this kind has any (including possibly
+ /// empty) subexpressions.
+ pub fn has_subexprs(&self) -> bool {
+ match *self {
+ HirKind::Empty
+ | HirKind::Literal(_)
+ | HirKind::Class(_)
+ | HirKind::Anchor(_)
+ | HirKind::WordBoundary(_) => false,
+ HirKind::Group(_)
+ | HirKind::Repetition(_)
+ | HirKind::Concat(_)
+ | HirKind::Alternation(_) => true,
+ }
+ }
+}
+
+/// Print a display representation of this Hir.
+///
+/// The result of this is a valid regular expression pattern string.
+///
+/// This implementation uses constant stack space and heap space proportional
+/// to the size of the `Hir`.
+impl fmt::Display for Hir {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ use crate::hir::print::Printer;
+ Printer::new().print(self, f)
+ }
+}
+
+/// The high-level intermediate representation of a literal.
+///
+/// A literal corresponds to a single character, where a character is either
+/// defined by a Unicode scalar value or an arbitrary byte. Unicode characters
+/// are preferred whenever possible. In particular, a `Byte` variant is only
+/// ever produced when it could match invalid UTF-8.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Literal {
+ /// A single character represented by a Unicode scalar value.
+ Unicode(char),
+ /// A single character represented by an arbitrary byte.
+ Byte(u8),
+}
+
+impl Literal {
+ /// Returns true if and only if this literal corresponds to a Unicode
+ /// scalar value.
+ pub fn is_unicode(&self) -> bool {
+ match *self {
+ Literal::Unicode(_) => true,
+ Literal::Byte(b) if b <= 0x7F => true,
+ Literal::Byte(_) => false,
+ }
+ }
+}
+
+/// The high-level intermediate representation of a character class.
+///
+/// A character class corresponds to a set of characters. A character is either
+/// defined by a Unicode scalar value or a byte. Unicode characters are used
+/// by default, while bytes are used when Unicode mode (via the `u` flag) is
+/// disabled.
+///
+/// A character class, regardless of its character type, is represented by a
+/// sequence of non-overlapping non-adjacent ranges of characters.
+///
+/// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may
+/// be produced even when it exclusively matches valid UTF-8. This is because
+/// a `Bytes` variant represents an intention by the author of the regular
+/// expression to disable Unicode mode, which in turn impacts the semantics of
+/// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not
+/// match the same set of strings.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Class {
+ /// A set of characters represented by Unicode scalar values.
+ Unicode(ClassUnicode),
+ /// A set of characters represented by arbitrary bytes (one byte per
+ /// character).
+ Bytes(ClassBytes),
+}
+
+impl Class {
+ /// Apply Unicode simple case folding to this character class, in place.
+ /// The character class will be expanded to include all simple case folded
+ /// character variants.
+ ///
+ /// If this is a byte oriented character class, then this will be limited
+ /// to the ASCII ranges `A-Z` and `a-z`.
+ pub fn case_fold_simple(&mut self) {
+ match *self {
+ Class::Unicode(ref mut x) => x.case_fold_simple(),
+ Class::Bytes(ref mut x) => x.case_fold_simple(),
+ }
+ }
+
+ /// Negate this character class in place.
+ ///
+ /// After completion, this character class will contain precisely the
+ /// characters that weren't previously in the class.
+ pub fn negate(&mut self) {
+ match *self {
+ Class::Unicode(ref mut x) => x.negate(),
+ Class::Bytes(ref mut x) => x.negate(),
+ }
+ }
+
+ /// Returns true if and only if this character class will only ever match
+ /// valid UTF-8.
+ ///
+ /// A character class can match invalid UTF-8 only when the following
+ /// conditions are met:
+ ///
+ /// 1. The translator was configured to permit generating an expression
+ /// that can match invalid UTF-8. (By default, this is disabled.)
+ /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
+ /// syntax or in the parser builder. By default, Unicode mode is
+ /// enabled.
+ pub fn is_always_utf8(&self) -> bool {
+ match *self {
+ Class::Unicode(_) => true,
+ Class::Bytes(ref x) => x.is_all_ascii(),
+ }
+ }
+}
+
+/// A set of characters represented by Unicode scalar values.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+ set: IntervalSet<ClassUnicodeRange>,
+}
+
+impl ClassUnicode {
+ /// Create a new class from a sequence of ranges.
+ ///
+ /// The given ranges do not need to be in any specific order, and ranges
+ /// may overlap.
+ pub fn new<I>(ranges: I) -> ClassUnicode
+ where
+ I: IntoIterator<Item = ClassUnicodeRange>,
+ {
+ ClassUnicode { set: IntervalSet::new(ranges) }
+ }
+
+ /// Create a new class with no ranges.
+ pub fn empty() -> ClassUnicode {
+ ClassUnicode::new(vec![])
+ }
+
+ /// Add a new range to this set.
+ pub fn push(&mut self, range: ClassUnicodeRange) {
+ self.set.push(range);
+ }
+
+ /// Return an iterator over all ranges in this class.
+ ///
+ /// The iterator yields ranges in ascending order.
+ pub fn iter(&self) -> ClassUnicodeIter<'_> {
+ ClassUnicodeIter(self.set.iter())
+ }
+
+ /// Return the underlying ranges as a slice.
+ pub fn ranges(&self) -> &[ClassUnicodeRange] {
+ self.set.intervals()
+ }
+
+ /// Expand this character class such that it contains all case folded
+ /// characters, according to Unicode's "simple" mapping. For example, if
+ /// this class consists of the range `a-z`, then applying case folding will
+ /// result in the class containing both the ranges `a-z` and `A-Z`.
+ ///
+ /// # Panics
+ ///
+ /// This routine panics when the case mapping data necessary for this
+ /// routine to complete is unavailable. This occurs when the `unicode-case`
+ /// feature is not enabled.
+ ///
+ /// Callers should prefer using `try_case_fold_simple` instead, which will
+ /// return an error instead of panicking.
+ pub fn case_fold_simple(&mut self) {
+ self.set
+ .case_fold_simple()
+ .expect("unicode-case feature must be enabled");
+ }
+
+ /// Expand this character class such that it contains all case folded
+ /// characters, according to Unicode's "simple" mapping. For example, if
+ /// this class consists of the range `a-z`, then applying case folding will
+ /// result in the class containing both the ranges `a-z` and `A-Z`.
+ ///
+ /// # Error
+ ///
+ /// This routine returns an error when the case mapping data necessary
+ /// for this routine to complete is unavailable. This occurs when the
+ /// `unicode-case` feature is not enabled.
+ pub fn try_case_fold_simple(
+ &mut self,
+ ) -> result::Result<(), CaseFoldError> {
+ self.set.case_fold_simple()
+ }
+
+ /// Negate this character class.
+ ///
+ /// For all `c` where `c` is a Unicode scalar value, if `c` was in this
+ /// set, then it will not be in this set after negation.
+ pub fn negate(&mut self) {
+ self.set.negate();
+ }
+
+ /// Union this character class with the given character class, in place.
+ pub fn union(&mut self, other: &ClassUnicode) {
+ self.set.union(&other.set);
+ }
+
+ /// Intersect this character class with the given character class, in
+ /// place.
+ pub fn intersect(&mut self, other: &ClassUnicode) {
+ self.set.intersect(&other.set);
+ }
+
+ /// Subtract the given character class from this character class, in place.
+ pub fn difference(&mut self, other: &ClassUnicode) {
+ self.set.difference(&other.set);
+ }
+
+ /// Compute the symmetric difference of the given character classes, in
+ /// place.
+ ///
+ /// This computes the symmetric difference of two character classes. This
+ /// removes all elements in this class that are also in the given class,
+ /// but all adds all elements from the given class that aren't in this
+ /// class. That is, the class will contain all elements in either class,
+ /// but will not contain any elements that are in both classes.
+ pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
+ self.set.symmetric_difference(&other.set);
+ }
+
+ /// Returns true if and only if this character class will either match
+ /// nothing or only ASCII bytes. Stated differently, this returns false
+ /// if and only if this class contains a non-ASCII codepoint.
+ pub fn is_all_ascii(&self) -> bool {
+ self.set.intervals().last().map_or(true, |r| r.end <= '\x7F')
+ }
+}
+
+/// An iterator over all ranges in a Unicode character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
+
+impl<'a> Iterator for ClassUnicodeIter<'a> {
+ type Item = &'a ClassUnicodeRange;
+
+ fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
+ self.0.next()
+ }
+}
+
+/// A single range of characters represented by Unicode scalar values.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassUnicodeRange {
+ start: char,
+ end: char,
+}
+
+impl fmt::Debug for ClassUnicodeRange {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let start = if !self.start.is_whitespace() && !self.start.is_control()
+ {
+ self.start.to_string()
+ } else {
+ format!("0x{:X}", self.start as u32)
+ };
+ let end = if !self.end.is_whitespace() && !self.end.is_control() {
+ self.end.to_string()
+ } else {
+ format!("0x{:X}", self.end as u32)
+ };
+ f.debug_struct("ClassUnicodeRange")
+ .field("start", &start)
+ .field("end", &end)
+ .finish()
+ }
+}
+
+impl Interval for ClassUnicodeRange {
+ type Bound = char;
+
+ #[inline]
+ fn lower(&self) -> char {
+ self.start
+ }
+ #[inline]
+ fn upper(&self) -> char {
+ self.end
+ }
+ #[inline]
+ fn set_lower(&mut self, bound: char) {
+ self.start = bound;
+ }
+ #[inline]
+ fn set_upper(&mut self, bound: char) {
+ self.end = bound;
+ }
+
+ /// Apply simple case folding to this Unicode scalar value range.
+ ///
+ /// Additional ranges are appended to the given vector. Canonical ordering
+ /// is *not* maintained in the given vector.
+ fn case_fold_simple(
+ &self,
+ ranges: &mut Vec<ClassUnicodeRange>,
+ ) -> Result<(), unicode::CaseFoldError> {
+ if !unicode::contains_simple_case_mapping(self.start, self.end)? {
+ return Ok(());
+ }
+ let start = self.start as u32;
+ let end = (self.end as u32).saturating_add(1);
+ let mut next_simple_cp = None;
+ for cp in (start..end).filter_map(char::from_u32) {
+ if next_simple_cp.map_or(false, |next| cp < next) {
+ continue;
+ }
+ let it = match unicode::simple_fold(cp)? {
+ Ok(it) => it,
+ Err(next) => {
+ next_simple_cp = next;
+ continue;
+ }
+ };
+ for cp_folded in it {
+ ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
+ }
+ }
+ Ok(())
+ }
+}
+
+impl ClassUnicodeRange {
+ /// Create a new Unicode scalar value range for a character class.
+ ///
+ /// The returned range is always in a canonical form. That is, the range
+ /// returned always satisfies the invariant that `start <= end`.
+ pub fn new(start: char, end: char) -> ClassUnicodeRange {
+ ClassUnicodeRange::create(start, end)
+ }
+
+ /// Return the start of this range.
+ ///
+ /// The start of a range is always less than or equal to the end of the
+ /// range.
+ pub fn start(&self) -> char {
+ self.start
+ }
+
+ /// Return the end of this range.
+ ///
+ /// The end of a range is always greater than or equal to the start of the
+ /// range.
+ pub fn end(&self) -> char {
+ self.end
+ }
+}
+
+/// A set of characters represented by arbitrary bytes (where one byte
+/// corresponds to one character).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassBytes {
+ set: IntervalSet<ClassBytesRange>,
+}
+
+impl ClassBytes {
+ /// Create a new class from a sequence of ranges.
+ ///
+ /// The given ranges do not need to be in any specific order, and ranges
+ /// may overlap.
+ pub fn new<I>(ranges: I) -> ClassBytes
+ where
+ I: IntoIterator<Item = ClassBytesRange>,
+ {
+ ClassBytes { set: IntervalSet::new(ranges) }
+ }
+
+ /// Create a new class with no ranges.
+ pub fn empty() -> ClassBytes {
+ ClassBytes::new(vec![])
+ }
+
+ /// Add a new range to this set.
+ pub fn push(&mut self, range: ClassBytesRange) {
+ self.set.push(range);
+ }
+
+ /// Return an iterator over all ranges in this class.
+ ///
+ /// The iterator yields ranges in ascending order.
+ pub fn iter(&self) -> ClassBytesIter<'_> {
+ ClassBytesIter(self.set.iter())
+ }
+
+ /// Return the underlying ranges as a slice.
+ pub fn ranges(&self) -> &[ClassBytesRange] {
+ self.set.intervals()
+ }
+
+ /// Expand this character class such that it contains all case folded
+ /// characters. For example, if this class consists of the range `a-z`,
+ /// then applying case folding will result in the class containing both the
+ /// ranges `a-z` and `A-Z`.
+ ///
+ /// Note that this only applies ASCII case folding, which is limited to the
+ /// characters `a-z` and `A-Z`.
+ pub fn case_fold_simple(&mut self) {
+ self.set.case_fold_simple().expect("ASCII case folding never fails");
+ }
+
+ /// Negate this byte class.
+ ///
+ /// For all `b` where `b` is a any byte, if `b` was in this set, then it
+ /// will not be in this set after negation.
+ pub fn negate(&mut self) {
+ self.set.negate();
+ }
+
+ /// Union this byte class with the given byte class, in place.
+ pub fn union(&mut self, other: &ClassBytes) {
+ self.set.union(&other.set);
+ }
+
+ /// Intersect this byte class with the given byte class, in place.
+ pub fn intersect(&mut self, other: &ClassBytes) {
+ self.set.intersect(&other.set);
+ }
+
+ /// Subtract the given byte class from this byte class, in place.
+ pub fn difference(&mut self, other: &ClassBytes) {
+ self.set.difference(&other.set);
+ }
+
+ /// Compute the symmetric difference of the given byte classes, in place.
+ ///
+ /// This computes the symmetric difference of two byte classes. This
+ /// removes all elements in this class that are also in the given class,
+ /// but all adds all elements from the given class that aren't in this
+ /// class. That is, the class will contain all elements in either class,
+ /// but will not contain any elements that are in both classes.
+ pub fn symmetric_difference(&mut self, other: &ClassBytes) {
+ self.set.symmetric_difference(&other.set);
+ }
+
+ /// Returns true if and only if this character class will either match
+ /// nothing or only ASCII bytes. Stated differently, this returns false
+ /// if and only if this class contains a non-ASCII byte.
+ pub fn is_all_ascii(&self) -> bool {
+ self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
+ }
+}
+
+/// An iterator over all ranges in a byte character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
+
+impl<'a> Iterator for ClassBytesIter<'a> {
+ type Item = &'a ClassBytesRange;
+
+ fn next(&mut self) -> Option<&'a ClassBytesRange> {
+ self.0.next()
+ }
+}
+
+/// A single range of characters represented by arbitrary bytes.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassBytesRange {
+ start: u8,
+ end: u8,
+}
+
+impl Interval for ClassBytesRange {
+ type Bound = u8;
+
+ #[inline]
+ fn lower(&self) -> u8 {
+ self.start
+ }
+ #[inline]
+ fn upper(&self) -> u8 {
+ self.end
+ }
+ #[inline]
+ fn set_lower(&mut self, bound: u8) {
+ self.start = bound;
+ }
+ #[inline]
+ fn set_upper(&mut self, bound: u8) {
+ self.end = bound;
+ }
+
+ /// Apply simple case folding to this byte range. Only ASCII case mappings
+ /// (for a-z) are applied.
+ ///
+ /// Additional ranges are appended to the given vector. Canonical ordering
+ /// is *not* maintained in the given vector.
+ fn case_fold_simple(
+ &self,
+ ranges: &mut Vec<ClassBytesRange>,
+ ) -> Result<(), unicode::CaseFoldError> {
+ if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
+ let lower = cmp::max(self.start, b'a');
+ let upper = cmp::min(self.end, b'z');
+ ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
+ }
+ if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
+ let lower = cmp::max(self.start, b'A');
+ let upper = cmp::min(self.end, b'Z');
+ ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
+ }
+ Ok(())
+ }
+}
+
+impl ClassBytesRange {
+ /// Create a new byte range for a character class.
+ ///
+ /// The returned range is always in a canonical form. That is, the range
+ /// returned always satisfies the invariant that `start <= end`.
+ pub fn new(start: u8, end: u8) -> ClassBytesRange {
+ ClassBytesRange::create(start, end)
+ }
+
+ /// Return the start of this range.
+ ///
+ /// The start of a range is always less than or equal to the end of the
+ /// range.
+ pub fn start(&self) -> u8 {
+ self.start
+ }
+
+ /// Return the end of this range.
+ ///
+ /// The end of a range is always greater than or equal to the start of the
+ /// range.
+ pub fn end(&self) -> u8 {
+ self.end
+ }
+}
+
+impl fmt::Debug for ClassBytesRange {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let mut debug = f.debug_struct("ClassBytesRange");
+ if self.start <= 0x7F {
+ debug.field("start", &(self.start as char));
+ } else {
+ debug.field("start", &self.start);
+ }
+ if self.end <= 0x7F {
+ debug.field("end", &(self.end as char));
+ } else {
+ debug.field("end", &self.end);
+ }
+ debug.finish()
+ }
+}
+
+/// The high-level intermediate representation for an anchor assertion.
+///
+/// A matching anchor assertion is always zero-length.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Anchor {
+ /// Match the beginning of a line or the beginning of text. Specifically,
+ /// this matches at the starting position of the input, or at the position
+ /// immediately following a `\n` character.
+ StartLine,
+ /// Match the end of a line or the end of text. Specifically,
+ /// this matches at the end position of the input, or at the position
+ /// immediately preceding a `\n` character.
+ EndLine,
+ /// Match the beginning of text. Specifically, this matches at the starting
+ /// position of the input.
+ StartText,
+ /// Match the end of text. Specifically, this matches at the ending
+ /// position of the input.
+ EndText,
+}
+
+/// The high-level intermediate representation for a word-boundary assertion.
+///
+/// A matching word boundary assertion is always zero-length.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum WordBoundary {
+ /// Match a Unicode-aware word boundary. That is, this matches a position
+ /// where the left adjacent character and right adjacent character
+ /// correspond to a word and non-word or a non-word and word character.
+ Unicode,
+ /// Match a Unicode-aware negation of a word boundary.
+ UnicodeNegate,
+ /// Match an ASCII-only word boundary. That is, this matches a position
+ /// where the left adjacent character and right adjacent character
+ /// correspond to a word and non-word or a non-word and word character.
+ Ascii,
+ /// Match an ASCII-only negation of a word boundary.
+ AsciiNegate,
+}
+
+impl WordBoundary {
+ /// Returns true if and only if this word boundary assertion is negated.
+ pub fn is_negated(&self) -> bool {
+ match *self {
+ WordBoundary::Unicode | WordBoundary::Ascii => false,
+ WordBoundary::UnicodeNegate | WordBoundary::AsciiNegate => true,
+ }
+ }
+}
+
+/// The high-level intermediate representation for a group.
+///
+/// This represents one of three possible group types:
+///
+/// 1. A non-capturing group (e.g., `(?:expr)`).
+/// 2. A capturing group (e.g., `(expr)`).
+/// 3. A named capturing group (e.g., `(?P<name>expr)`).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Group {
+ /// The kind of this group. If it is a capturing group, then the kind
+ /// contains the capture group index (and the name, if it is a named
+ /// group).
+ pub kind: GroupKind,
+ /// The expression inside the capturing group, which may be empty.
+ pub hir: Box<Hir>,
+}
+
+/// The kind of group.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum GroupKind {
+ /// A normal unnamed capturing group.
+ ///
+ /// The value is the capture index of the group.
+ CaptureIndex(u32),
+ /// A named capturing group.
+ CaptureName {
+ /// The name of the group.
+ name: String,
+ /// The capture index of the group.
+ index: u32,
+ },
+ /// A non-capturing group.
+ NonCapturing,
+}
+
+/// The high-level intermediate representation of a repetition operator.
+///
+/// A repetition operator permits the repetition of an arbitrary
+/// sub-expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+ /// The kind of this repetition operator.
+ pub kind: RepetitionKind,
+ /// Whether this repetition operator is greedy or not. A greedy operator
+ /// will match as much as it can. A non-greedy operator will match as
+ /// little as it can.
+ ///
+ /// Typically, operators are greedy by default and are only non-greedy when
+ /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
+ /// not. However, this can be inverted via the `U` "ungreedy" flag.
+ pub greedy: bool,
+ /// The expression being repeated.
+ pub hir: Box<Hir>,
+}
+
+impl Repetition {
+ /// Returns true if and only if this repetition operator makes it possible
+ /// to match the empty string.
+ ///
+ /// Note that this is not defined inductively. For example, while `a*`
+ /// will report `true`, `()+` will not, even though `()` matches the empty
+ /// string and one or more occurrences of something that matches the empty
+ /// string will always match the empty string. In order to get the
+ /// inductive definition, see the corresponding method on
+ /// [`Hir`](struct.Hir.html).
+ pub fn is_match_empty(&self) -> bool {
+ match self.kind {
+ RepetitionKind::ZeroOrOne => true,
+ RepetitionKind::ZeroOrMore => true,
+ RepetitionKind::OneOrMore => false,
+ RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0,
+ RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0,
+ RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0,
+ }
+ }
+}
+
+/// The kind of a repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionKind {
+ /// Matches a sub-expression zero or one times.
+ ZeroOrOne,
+ /// Matches a sub-expression zero or more times.
+ ZeroOrMore,
+ /// Matches a sub-expression one or more times.
+ OneOrMore,
+ /// Matches a sub-expression within a bounded range of times.
+ Range(RepetitionRange),
+}
+
+/// The kind of a counted repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionRange {
+ /// Matches a sub-expression exactly this many times.
+ Exactly(u32),
+ /// Matches a sub-expression at least this many times.
+ AtLeast(u32),
+ /// Matches a sub-expression at least `m` times and at most `n` times.
+ Bounded(u32, u32),
+}
+
+/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
+/// space but heap space proportional to the depth of the total `Hir`.
+impl Drop for Hir {
+ fn drop(&mut self) {
+ use std::mem;
+
+ match *self.kind() {
+ HirKind::Empty
+ | HirKind::Literal(_)
+ | HirKind::Class(_)
+ | HirKind::Anchor(_)
+ | HirKind::WordBoundary(_) => return,
+ HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return,
+ HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return,
+ HirKind::Concat(ref x) if x.is_empty() => return,
+ HirKind::Alternation(ref x) if x.is_empty() => return,
+ _ => {}
+ }
+
+ let mut stack = vec![mem::replace(self, Hir::empty())];
+ while let Some(mut expr) = stack.pop() {
+ match expr.kind {
+ HirKind::Empty
+ | HirKind::Literal(_)
+ | HirKind::Class(_)
+ | HirKind::Anchor(_)
+ | HirKind::WordBoundary(_) => {}
+ HirKind::Group(ref mut x) => {
+ stack.push(mem::replace(&mut x.hir, Hir::empty()));
+ }
+ HirKind::Repetition(ref mut x) => {
+ stack.push(mem::replace(&mut x.hir, Hir::empty()));
+ }
+ HirKind::Concat(ref mut x) => {
+ stack.extend(x.drain(..));
+ }
+ HirKind::Alternation(ref mut x) => {
+ stack.extend(x.drain(..));
+ }
+ }
+ }
+ }
+}
+
+/// A type that documents various attributes of an HIR expression.
+///
+/// These attributes are typically defined inductively on the HIR.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct HirInfo {
+ /// Represent yes/no questions by a bitfield to conserve space, since
+ /// this is included in every HIR expression.
+ ///
+ /// If more attributes need to be added, it is OK to increase the size of
+ /// this as appropriate.
+ bools: u16,
+}
+
+// A simple macro for defining bitfield accessors/mutators.
+macro_rules! define_bool {
+ ($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => {
+ fn $is_fn_name(&self) -> bool {
+ self.bools & (0b1 << $bit) > 0
+ }
+
+ fn $set_fn_name(&mut self, yes: bool) {
+ if yes {
+ self.bools |= 1 << $bit;
+ } else {
+ self.bools &= !(1 << $bit);
+ }
+ }
+ };
+}
+
+impl HirInfo {
+ fn new() -> HirInfo {
+ HirInfo { bools: 0 }
+ }
+
+ define_bool!(0, is_always_utf8, set_always_utf8);
+ define_bool!(1, is_all_assertions, set_all_assertions);
+ define_bool!(2, is_anchored_start, set_anchored_start);
+ define_bool!(3, is_anchored_end, set_anchored_end);
+ define_bool!(4, is_line_anchored_start, set_line_anchored_start);
+ define_bool!(5, is_line_anchored_end, set_line_anchored_end);
+ define_bool!(6, is_any_anchored_start, set_any_anchored_start);
+ define_bool!(7, is_any_anchored_end, set_any_anchored_end);
+ define_bool!(8, is_match_empty, set_match_empty);
+ define_bool!(9, is_literal, set_literal);
+ define_bool!(10, is_alternation_literal, set_alternation_literal);
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
+ let ranges: Vec<ClassUnicodeRange> = ranges
+ .iter()
+ .map(|&(s, e)| ClassUnicodeRange::new(s, e))
+ .collect();
+ ClassUnicode::new(ranges)
+ }
+
+ fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
+ let ranges: Vec<ClassBytesRange> =
+ ranges.iter().map(|&(s, e)| ClassBytesRange::new(s, e)).collect();
+ ClassBytes::new(ranges)
+ }
+
+ fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
+ cls.iter().map(|x| (x.start(), x.end())).collect()
+ }
+
+ #[cfg(feature = "unicode-case")]
+ fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
+ let mut cls_ = cls.clone();
+ cls_.case_fold_simple();
+ cls_
+ }
+
+ fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+ let mut cls_ = cls1.clone();
+ cls_.union(cls2);
+ cls_
+ }
+
+ fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+ let mut cls_ = cls1.clone();
+ cls_.intersect(cls2);
+ cls_
+ }
+
+ fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+ let mut cls_ = cls1.clone();
+ cls_.difference(cls2);
+ cls_
+ }
+
+ fn usymdifference(
+ cls1: &ClassUnicode,
+ cls2: &ClassUnicode,
+ ) -> ClassUnicode {
+ let mut cls_ = cls1.clone();
+ cls_.symmetric_difference(cls2);
+ cls_
+ }
+
+ fn unegate(cls: &ClassUnicode) -> ClassUnicode {
+ let mut cls_ = cls.clone();
+ cls_.negate();
+ cls_
+ }
+
+ fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
+ cls.iter().map(|x| (x.start(), x.end())).collect()
+ }
+
+ fn bcasefold(cls: &ClassBytes) -> ClassBytes {
+ let mut cls_ = cls.clone();
+ cls_.case_fold_simple();
+ cls_
+ }
+
+ fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+ let mut cls_ = cls1.clone();
+ cls_.union(cls2);
+ cls_
+ }
+
+ fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+ let mut cls_ = cls1.clone();
+ cls_.intersect(cls2);
+ cls_
+ }
+
+ fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+ let mut cls_ = cls1.clone();
+ cls_.difference(cls2);
+ cls_
+ }
+
+ fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+ let mut cls_ = cls1.clone();
+ cls_.symmetric_difference(cls2);
+ cls_
+ }
+
+ fn bnegate(cls: &ClassBytes) -> ClassBytes {
+ let mut cls_ = cls.clone();
+ cls_.negate();
+ cls_
+ }
+
+ #[test]
+ fn class_range_canonical_unicode() {
+ let range = ClassUnicodeRange::new('\u{00FF}', '\0');
+ assert_eq!('\0', range.start());
+ assert_eq!('\u{00FF}', range.end());
+ }
+
+ #[test]
+ fn class_range_canonical_bytes() {
+ let range = ClassBytesRange::new(b'\xFF', b'\0');
+ assert_eq!(b'\0', range.start());
+ assert_eq!(b'\xFF', range.end());
+ }
+
+ #[test]
+ fn class_canonicalize_unicode() {
+ let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+ let expected = vec![('a', 'c'), ('x', 'z')];
+ assert_eq!(expected, uranges(&cls));
+
+ let cls = uclass(&[('x', 'z'), ('a', 'c')]);
+ let expected = vec![('a', 'c'), ('x', 'z')];
+ assert_eq!(expected, uranges(&cls));
+
+ let cls = uclass(&[('x', 'z'), ('w', 'y')]);
+ let expected = vec![('w', 'z')];
+ assert_eq!(expected, uranges(&cls));
+
+ let cls = uclass(&[
+ ('c', 'f'),
+ ('a', 'g'),
+ ('d', 'j'),
+ ('a', 'c'),
+ ('m', 'p'),
+ ('l', 's'),
+ ]);
+ let expected = vec![('a', 'j'), ('l', 's')];
+ assert_eq!(expected, uranges(&cls));
+
+ let cls = uclass(&[('x', 'z'), ('u', 'w')]);
+ let expected = vec![('u', 'z')];
+ assert_eq!(expected, uranges(&cls));
+
+ let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
+ let expected = vec![('\x00', '\u{10FFFF}')];
+ assert_eq!(expected, uranges(&cls));
+
+ let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+ let expected = vec![('a', 'b')];
+ assert_eq!(expected, uranges(&cls));
+ }
+
+ #[test]
+ fn class_canonicalize_bytes() {
+ let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+ let expected = vec![(b'a', b'c'), (b'x', b'z')];
+ assert_eq!(expected, branges(&cls));
+
+ let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
+ let expected = vec![(b'a', b'c'), (b'x', b'z')];
+ assert_eq!(expected, branges(&cls));
+
+ let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
+ let expected = vec![(b'w', b'z')];
+ assert_eq!(expected, branges(&cls));
+
+ let cls = bclass(&[
+ (b'c', b'f'),
+ (b'a', b'g'),
+ (b'd', b'j'),
+ (b'a', b'c'),
+ (b'm', b'p'),
+ (b'l', b's'),
+ ]);
+ let expected = vec![(b'a', b'j'), (b'l', b's')];
+ assert_eq!(expected, branges(&cls));
+
+ let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
+ let expected = vec![(b'u', b'z')];
+ assert_eq!(expected, branges(&cls));
+
+ let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
+ let expected = vec![(b'\x00', b'\xFF')];
+ assert_eq!(expected, branges(&cls));
+
+ let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+ let expected = vec![(b'a', b'b')];
+ assert_eq!(expected, branges(&cls));
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-case")]
+ fn class_case_fold_unicode() {
+ let cls = uclass(&[
+ ('C', 'F'),
+ ('A', 'G'),
+ ('D', 'J'),
+ ('A', 'C'),
+ ('M', 'P'),
+ ('L', 'S'),
+ ('c', 'f'),
+ ]);
+ let expected = uclass(&[
+ ('A', 'J'),
+ ('L', 'S'),
+ ('a', 'j'),
+ ('l', 's'),
+ ('\u{17F}', '\u{17F}'),
+ ]);
+ assert_eq!(expected, ucasefold(&cls));
+
+ let cls = uclass(&[('A', 'Z')]);
+ let expected = uclass(&[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('\u{17F}', '\u{17F}'),
+ ('\u{212A}', '\u{212A}'),
+ ]);
+ assert_eq!(expected, ucasefold(&cls));
+
+ let cls = uclass(&[('a', 'z')]);
+ let expected = uclass(&[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('\u{17F}', '\u{17F}'),
+ ('\u{212A}', '\u{212A}'),
+ ]);
+ assert_eq!(expected, ucasefold(&cls));
+
+ let cls = uclass(&[('A', 'A'), ('_', '_')]);
+ let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
+ assert_eq!(expected, ucasefold(&cls));
+
+ let cls = uclass(&[('A', 'A'), ('=', '=')]);
+ let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
+ assert_eq!(expected, ucasefold(&cls));
+
+ let cls = uclass(&[('\x00', '\x10')]);
+ assert_eq!(cls, ucasefold(&cls));
+
+ let cls = uclass(&[('k', 'k')]);
+ let expected =
+ uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}')]);
+ assert_eq!(expected, ucasefold(&cls));
+
+ let cls = uclass(&[('@', '@')]);
+ assert_eq!(cls, ucasefold(&cls));
+ }
+
+ #[test]
+ #[cfg(not(feature = "unicode-case"))]
+ fn class_case_fold_unicode_disabled() {
+ let mut cls = uclass(&[
+ ('C', 'F'),
+ ('A', 'G'),
+ ('D', 'J'),
+ ('A', 'C'),
+ ('M', 'P'),
+ ('L', 'S'),
+ ('c', 'f'),
+ ]);
+ assert!(cls.try_case_fold_simple().is_err());
+ }
+
+ #[test]
+ #[should_panic]
+ #[cfg(not(feature = "unicode-case"))]
+ fn class_case_fold_unicode_disabled_panics() {
+ let mut cls = uclass(&[
+ ('C', 'F'),
+ ('A', 'G'),
+ ('D', 'J'),
+ ('A', 'C'),
+ ('M', 'P'),
+ ('L', 'S'),
+ ('c', 'f'),
+ ]);
+ cls.case_fold_simple();
+ }
+
+ #[test]
+ fn class_case_fold_bytes() {
+ let cls = bclass(&[
+ (b'C', b'F'),
+ (b'A', b'G'),
+ (b'D', b'J'),
+ (b'A', b'C'),
+ (b'M', b'P'),
+ (b'L', b'S'),
+ (b'c', b'f'),
+ ]);
+ let expected =
+ bclass(&[(b'A', b'J'), (b'L', b'S'), (b'a', b'j'), (b'l', b's')]);
+ assert_eq!(expected, bcasefold(&cls));
+
+ let cls = bclass(&[(b'A', b'Z')]);
+ let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+ assert_eq!(expected, bcasefold(&cls));
+
+ let cls = bclass(&[(b'a', b'z')]);
+ let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+ assert_eq!(expected, bcasefold(&cls));
+
+ let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
+ let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
+ assert_eq!(expected, bcasefold(&cls));
+
+ let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
+ let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
+ assert_eq!(expected, bcasefold(&cls));
+
+ let cls = bclass(&[(b'\x00', b'\x10')]);
+ assert_eq!(cls, bcasefold(&cls));
+
+ let cls = bclass(&[(b'k', b'k')]);
+ let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
+ assert_eq!(expected, bcasefold(&cls));
+
+ let cls = bclass(&[(b'@', b'@')]);
+ assert_eq!(cls, bcasefold(&cls));
+ }
+
+ #[test]
+ fn class_negate_unicode() {
+ let cls = uclass(&[('a', 'a')]);
+ let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+ let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+ let expected = uclass(&[
+ ('\x00', '\x60'),
+ ('\x64', '\x77'),
+ ('\x7B', '\u{10FFFF}'),
+ ]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[('\x00', 'a')]);
+ let expected = uclass(&[('\x62', '\u{10FFFF}')]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[('a', '\u{10FFFF}')]);
+ let expected = uclass(&[('\x00', '\x60')]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[('\x00', '\u{10FFFF}')]);
+ let expected = uclass(&[]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[]);
+ let expected = uclass(&[('\x00', '\u{10FFFF}')]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls =
+ uclass(&[('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')]);
+ let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[('\x00', '\u{D7FF}')]);
+ let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[('\x00', '\u{D7FE}')]);
+ let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+ let expected = uclass(&[('\x00', '\u{D7FF}')]);
+ assert_eq!(expected, unegate(&cls));
+
+ let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
+ let expected = uclass(&[('\x00', '\u{E000}')]);
+ assert_eq!(expected, unegate(&cls));
+ }
+
+ #[test]
+ fn class_negate_bytes() {
+ let cls = bclass(&[(b'a', b'a')]);
+ let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
+ assert_eq!(expected, bnegate(&cls));
+
+ let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+ let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
+ assert_eq!(expected, bnegate(&cls));
+
+ let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+ let expected = bclass(&[
+ (b'\x00', b'\x60'),
+ (b'\x64', b'\x77'),
+ (b'\x7B', b'\xFF'),
+ ]);
+ assert_eq!(expected, bnegate(&cls));
+
+ let cls = bclass(&[(b'\x00', b'a')]);
+ let expected = bclass(&[(b'\x62', b'\xFF')]);
+ assert_eq!(expected, bnegate(&cls));
+
+ let cls = bclass(&[(b'a', b'\xFF')]);
+ let expected = bclass(&[(b'\x00', b'\x60')]);
+ assert_eq!(expected, bnegate(&cls));
+
+ let cls = bclass(&[(b'\x00', b'\xFF')]);
+ let expected = bclass(&[]);
+ assert_eq!(expected, bnegate(&cls));
+
+ let cls = bclass(&[]);
+ let expected = bclass(&[(b'\x00', b'\xFF')]);
+ assert_eq!(expected, bnegate(&cls));
+
+ let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
+ let expected = bclass(&[(b'\xFE', b'\xFE')]);
+ assert_eq!(expected, bnegate(&cls));
+ }
+
+ #[test]
+ fn class_union_unicode() {
+ let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
+ let cls2 = uclass(&[('a', 'z')]);
+ let expected = uclass(&[('a', 'z'), ('A', 'C')]);
+ assert_eq!(expected, uunion(&cls1, &cls2));
+ }
+
+ #[test]
+ fn class_union_bytes() {
+ let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
+ let cls2 = bclass(&[(b'a', b'z')]);
+ let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
+ assert_eq!(expected, bunion(&cls1, &cls2));
+ }
+
+ #[test]
+ fn class_intersect_unicode() {
+ let cls1 = uclass(&[]);
+ let cls2 = uclass(&[('a', 'a')]);
+ let expected = uclass(&[]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'a')]);
+ let cls2 = uclass(&[('a', 'a')]);
+ let expected = uclass(&[('a', 'a')]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'a')]);
+ let cls2 = uclass(&[('b', 'b')]);
+ let expected = uclass(&[]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'a')]);
+ let cls2 = uclass(&[('a', 'c')]);
+ let expected = uclass(&[('a', 'a')]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'b')]);
+ let cls2 = uclass(&[('a', 'c')]);
+ let expected = uclass(&[('a', 'b')]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'b')]);
+ let cls2 = uclass(&[('b', 'c')]);
+ let expected = uclass(&[('b', 'b')]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'b')]);
+ let cls2 = uclass(&[('c', 'd')]);
+ let expected = uclass(&[]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('b', 'c')]);
+ let cls2 = uclass(&[('a', 'd')]);
+ let expected = uclass(&[('b', 'c')]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+ let cls2 = uclass(&[('a', 'h')]);
+ let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+ let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+ let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
+ let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
+ let expected = uclass(&[]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+ let cls2 = uclass(&[('h', 'h')]);
+ let expected = uclass(&[('h', 'h')]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
+ let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
+ let expected = uclass(&[]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
+ let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
+ let expected = uclass(&[('b', 'f')]);
+ assert_eq!(expected, uintersect(&cls1, &cls2));
+ }
+
+ #[test]
+ fn class_intersect_bytes() {
+ let cls1 = bclass(&[]);
+ let cls2 = bclass(&[(b'a', b'a')]);
+ let expected = bclass(&[]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'a')]);
+ let cls2 = bclass(&[(b'a', b'a')]);
+ let expected = bclass(&[(b'a', b'a')]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'a')]);
+ let cls2 = bclass(&[(b'b', b'b')]);
+ let expected = bclass(&[]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'a')]);
+ let cls2 = bclass(&[(b'a', b'c')]);
+ let expected = bclass(&[(b'a', b'a')]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'b')]);
+ let cls2 = bclass(&[(b'a', b'c')]);
+ let expected = bclass(&[(b'a', b'b')]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'b')]);
+ let cls2 = bclass(&[(b'b', b'c')]);
+ let expected = bclass(&[(b'b', b'b')]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'b')]);
+ let cls2 = bclass(&[(b'c', b'd')]);
+ let expected = bclass(&[]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'b', b'c')]);
+ let cls2 = bclass(&[(b'a', b'd')]);
+ let expected = bclass(&[(b'b', b'c')]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+ let cls2 = bclass(&[(b'a', b'h')]);
+ let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+ let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+ let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
+ let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
+ let expected = bclass(&[]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+ let cls2 = bclass(&[(b'h', b'h')]);
+ let expected = bclass(&[(b'h', b'h')]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
+ let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
+ let expected = bclass(&[]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
+ let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
+ let expected = bclass(&[(b'b', b'f')]);
+ assert_eq!(expected, bintersect(&cls1, &cls2));
+ }
+
+ #[test]
+ fn class_difference_unicode() {
+ let cls1 = uclass(&[('a', 'a')]);
+ let cls2 = uclass(&[('a', 'a')]);
+ let expected = uclass(&[]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'a')]);
+ let cls2 = uclass(&[]);
+ let expected = uclass(&[('a', 'a')]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[]);
+ let cls2 = uclass(&[('a', 'a')]);
+ let expected = uclass(&[]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'z')]);
+ let cls2 = uclass(&[('a', 'a')]);
+ let expected = uclass(&[('b', 'z')]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'z')]);
+ let cls2 = uclass(&[('z', 'z')]);
+ let expected = uclass(&[('a', 'y')]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'z')]);
+ let cls2 = uclass(&[('m', 'm')]);
+ let expected = uclass(&[('a', 'l'), ('n', 'z')]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+ let cls2 = uclass(&[('a', 'z')]);
+ let expected = uclass(&[]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+ let cls2 = uclass(&[('d', 'v')]);
+ let expected = uclass(&[('a', 'c')]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+ let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
+ let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+ let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
+ let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('x', 'z')]);
+ let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+ let expected = uclass(&[('x', 'z')]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+
+ let cls1 = uclass(&[('a', 'z')]);
+ let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+ let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
+ assert_eq!(expected, udifference(&cls1, &cls2));
+ }
+
+ #[test]
+ fn class_difference_bytes() {
+ let cls1 = bclass(&[(b'a', b'a')]);
+ let cls2 = bclass(&[(b'a', b'a')]);
+ let expected = bclass(&[]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'a')]);
+ let cls2 = bclass(&[]);
+ let expected = bclass(&[(b'a', b'a')]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[]);
+ let cls2 = bclass(&[(b'a', b'a')]);
+ let expected = bclass(&[]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'z')]);
+ let cls2 = bclass(&[(b'a', b'a')]);
+ let expected = bclass(&[(b'b', b'z')]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'z')]);
+ let cls2 = bclass(&[(b'z', b'z')]);
+ let expected = bclass(&[(b'a', b'y')]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'z')]);
+ let cls2 = bclass(&[(b'm', b'm')]);
+ let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+ let cls2 = bclass(&[(b'a', b'z')]);
+ let expected = bclass(&[]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+ let cls2 = bclass(&[(b'd', b'v')]);
+ let expected = bclass(&[(b'a', b'c')]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+ let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
+ let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+ let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
+ let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'x', b'z')]);
+ let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+ let expected = bclass(&[(b'x', b'z')]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+
+ let cls1 = bclass(&[(b'a', b'z')]);
+ let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+ let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
+ assert_eq!(expected, bdifference(&cls1, &cls2));
+ }
+
+ #[test]
+ fn class_symmetric_difference_unicode() {
+ let cls1 = uclass(&[('a', 'm')]);
+ let cls2 = uclass(&[('g', 't')]);
+ let expected = uclass(&[('a', 'f'), ('n', 't')]);
+ assert_eq!(expected, usymdifference(&cls1, &cls2));
+ }
+
+ #[test]
+ fn class_symmetric_difference_bytes() {
+ let cls1 = bclass(&[(b'a', b'm')]);
+ let cls2 = bclass(&[(b'g', b't')]);
+ let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
+ assert_eq!(expected, bsymdifference(&cls1, &cls2));
+ }
+
+ #[test]
+ #[should_panic]
+ fn hir_byte_literal_non_ascii() {
+ Hir::literal(Literal::Byte(b'a'));
+ }
+
+ // We use a thread with an explicit stack size to test that our destructor
+ // for Hir can handle arbitrarily sized expressions in constant stack
+ // space. In case we run on a platform without threads (WASM?), we limit
+ // this test to Windows/Unix.
+ #[test]
+ #[cfg(any(unix, windows))]
+ fn no_stack_overflow_on_drop() {
+ use std::thread;
+
+ let run = || {
+ let mut expr = Hir::empty();
+ for _ in 0..100 {
+ expr = Hir::group(Group {
+ kind: GroupKind::NonCapturing,
+ hir: Box::new(expr),
+ });
+ expr = Hir::repetition(Repetition {
+ kind: RepetitionKind::ZeroOrOne,
+ greedy: true,
+ hir: Box::new(expr),
+ });
+
+ expr = Hir {
+ kind: HirKind::Concat(vec![expr]),
+ info: HirInfo::new(),
+ };
+ expr = Hir {
+ kind: HirKind::Alternation(vec![expr]),
+ info: HirInfo::new(),
+ };
+ }
+ assert!(!expr.kind.is_empty());
+ };
+
+ // We run our test on a thread with a small stack size so we can
+ // force the issue more easily.
+ thread::Builder::new()
+ .stack_size(1 << 10)
+ .spawn(run)
+ .unwrap()
+ .join()
+ .unwrap();
+ }
+}
diff --git a/vendor/regex-syntax/src/hir/print.rs b/vendor/regex-syntax/src/hir/print.rs
new file mode 100644
index 000000000..b71f3897c
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/print.rs
@@ -0,0 +1,367 @@
+/*!
+This module provides a regular expression printer for `Hir`.
+*/
+
+use std::fmt;
+
+use crate::hir::visitor::{self, Visitor};
+use crate::hir::{self, Hir, HirKind};
+use crate::is_meta_character;
+
+/// A builder for constructing a printer.
+///
+/// Note that since a printer doesn't have any configuration knobs, this type
+/// remains unexported.
+#[derive(Clone, Debug)]
+struct PrinterBuilder {
+ _priv: (),
+}
+
+impl Default for PrinterBuilder {
+ fn default() -> PrinterBuilder {
+ PrinterBuilder::new()
+ }
+}
+
+impl PrinterBuilder {
+ fn new() -> PrinterBuilder {
+ PrinterBuilder { _priv: () }
+ }
+
+ fn build(&self) -> Printer {
+ Printer { _priv: () }
+ }
+}
+
+/// A printer for a regular expression's high-level intermediate
+/// representation.
+///
+/// A printer converts a high-level intermediate representation (HIR) to a
+/// regular expression pattern string. This particular printer uses constant
+/// stack space and heap space proportional to the size of the HIR.
+///
+/// Since this printer is only using the HIR, the pattern it prints will likely
+/// not resemble the original pattern at all. For example, a pattern like
+/// `\pL` will have its entire class written out.
+///
+/// The purpose of this printer is to provide a means to mutate an HIR and then
+/// build a regular expression from the result of that mutation. (A regex
+/// library could provide a constructor from this HIR explicitly, but that
+/// creates an unnecessary public coupling between the regex library and this
+/// specific HIR representation.)
+#[derive(Debug)]
+pub struct Printer {
+ _priv: (),
+}
+
+impl Printer {
+ /// Create a new printer.
+ pub fn new() -> Printer {
+ PrinterBuilder::new().build()
+ }
+
+ /// Print the given `Ast` to the given writer. The writer must implement
+ /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
+ /// here are a `fmt::Formatter` (which is available in `fmt::Display`
+ /// implementations) or a `&mut String`.
+ pub fn print<W: fmt::Write>(&mut self, hir: &Hir, wtr: W) -> fmt::Result {
+ visitor::visit(hir, Writer { wtr })
+ }
+}
+
+#[derive(Debug)]
+struct Writer<W> {
+ wtr: W,
+}
+
+impl<W: fmt::Write> Visitor for Writer<W> {
+ type Output = ();
+ type Err = fmt::Error;
+
+ fn finish(self) -> fmt::Result {
+ Ok(())
+ }
+
+ fn visit_pre(&mut self, hir: &Hir) -> fmt::Result {
+ match *hir.kind() {
+ HirKind::Empty
+ | HirKind::Repetition(_)
+ | HirKind::Concat(_)
+ | HirKind::Alternation(_) => {}
+ HirKind::Literal(hir::Literal::Unicode(c)) => {
+ self.write_literal_char(c)?;
+ }
+ HirKind::Literal(hir::Literal::Byte(b)) => {
+ self.write_literal_byte(b)?;
+ }
+ HirKind::Class(hir::Class::Unicode(ref cls)) => {
+ self.wtr.write_str("[")?;
+ for range in cls.iter() {
+ if range.start() == range.end() {
+ self.write_literal_char(range.start())?;
+ } else {
+ self.write_literal_char(range.start())?;
+ self.wtr.write_str("-")?;
+ self.write_literal_char(range.end())?;
+ }
+ }
+ self.wtr.write_str("]")?;
+ }
+ HirKind::Class(hir::Class::Bytes(ref cls)) => {
+ self.wtr.write_str("(?-u:[")?;
+ for range in cls.iter() {
+ if range.start() == range.end() {
+ self.write_literal_class_byte(range.start())?;
+ } else {
+ self.write_literal_class_byte(range.start())?;
+ self.wtr.write_str("-")?;
+ self.write_literal_class_byte(range.end())?;
+ }
+ }
+ self.wtr.write_str("])")?;
+ }
+ HirKind::Anchor(hir::Anchor::StartLine) => {
+ self.wtr.write_str("(?m:^)")?;
+ }
+ HirKind::Anchor(hir::Anchor::EndLine) => {
+ self.wtr.write_str("(?m:$)")?;
+ }
+ HirKind::Anchor(hir::Anchor::StartText) => {
+ self.wtr.write_str(r"\A")?;
+ }
+ HirKind::Anchor(hir::Anchor::EndText) => {
+ self.wtr.write_str(r"\z")?;
+ }
+ HirKind::WordBoundary(hir::WordBoundary::Unicode) => {
+ self.wtr.write_str(r"\b")?;
+ }
+ HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => {
+ self.wtr.write_str(r"\B")?;
+ }
+ HirKind::WordBoundary(hir::WordBoundary::Ascii) => {
+ self.wtr.write_str(r"(?-u:\b)")?;
+ }
+ HirKind::WordBoundary(hir::WordBoundary::AsciiNegate) => {
+ self.wtr.write_str(r"(?-u:\B)")?;
+ }
+ HirKind::Group(ref x) => match x.kind {
+ hir::GroupKind::CaptureIndex(_) => {
+ self.wtr.write_str("(")?;
+ }
+ hir::GroupKind::CaptureName { ref name, .. } => {
+ write!(self.wtr, "(?P<{}>", name)?;
+ }
+ hir::GroupKind::NonCapturing => {
+ self.wtr.write_str("(?:")?;
+ }
+ },
+ }
+ Ok(())
+ }
+
+ fn visit_post(&mut self, hir: &Hir) -> fmt::Result {
+ match *hir.kind() {
+ // Handled during visit_pre
+ HirKind::Empty
+ | HirKind::Literal(_)
+ | HirKind::Class(_)
+ | HirKind::Anchor(_)
+ | HirKind::WordBoundary(_)
+ | HirKind::Concat(_)
+ | HirKind::Alternation(_) => {}
+ HirKind::Repetition(ref x) => {
+ match x.kind {
+ hir::RepetitionKind::ZeroOrOne => {
+ self.wtr.write_str("?")?;
+ }
+ hir::RepetitionKind::ZeroOrMore => {
+ self.wtr.write_str("*")?;
+ }
+ hir::RepetitionKind::OneOrMore => {
+ self.wtr.write_str("+")?;
+ }
+ hir::RepetitionKind::Range(ref x) => match *x {
+ hir::RepetitionRange::Exactly(m) => {
+ write!(self.wtr, "{{{}}}", m)?;
+ }
+ hir::RepetitionRange::AtLeast(m) => {
+ write!(self.wtr, "{{{},}}", m)?;
+ }
+ hir::RepetitionRange::Bounded(m, n) => {
+ write!(self.wtr, "{{{},{}}}", m, n)?;
+ }
+ },
+ }
+ if !x.greedy {
+ self.wtr.write_str("?")?;
+ }
+ }
+ HirKind::Group(_) => {
+ self.wtr.write_str(")")?;
+ }
+ }
+ Ok(())
+ }
+
+ fn visit_alternation_in(&mut self) -> fmt::Result {
+ self.wtr.write_str("|")
+ }
+}
+
+impl<W: fmt::Write> Writer<W> {
+ fn write_literal_char(&mut self, c: char) -> fmt::Result {
+ if is_meta_character(c) {
+ self.wtr.write_str("\\")?;
+ }
+ self.wtr.write_char(c)
+ }
+
+ fn write_literal_byte(&mut self, b: u8) -> fmt::Result {
+ let c = b as char;
+ if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
+ self.write_literal_char(c)
+ } else {
+ write!(self.wtr, "(?-u:\\x{:02X})", b)
+ }
+ }
+
+ fn write_literal_class_byte(&mut self, b: u8) -> fmt::Result {
+ let c = b as char;
+ if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
+ self.write_literal_char(c)
+ } else {
+ write!(self.wtr, "\\x{:02X}", b)
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Printer;
+ use crate::ParserBuilder;
+
+ fn roundtrip(given: &str, expected: &str) {
+ roundtrip_with(|b| b, given, expected);
+ }
+
+ fn roundtrip_bytes(given: &str, expected: &str) {
+ roundtrip_with(|b| b.allow_invalid_utf8(true), given, expected);
+ }
+
+ fn roundtrip_with<F>(mut f: F, given: &str, expected: &str)
+ where
+ F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
+ {
+ let mut builder = ParserBuilder::new();
+ f(&mut builder);
+ let hir = builder.build().parse(given).unwrap();
+
+ let mut printer = Printer::new();
+ let mut dst = String::new();
+ printer.print(&hir, &mut dst).unwrap();
+
+ // Check that the result is actually valid.
+ builder.build().parse(&dst).unwrap();
+
+ assert_eq!(expected, dst);
+ }
+
+ #[test]
+ fn print_literal() {
+ roundtrip("a", "a");
+ roundtrip(r"\xff", "\u{FF}");
+ roundtrip_bytes(r"\xff", "\u{FF}");
+ roundtrip_bytes(r"(?-u)\xff", r"(?-u:\xFF)");
+ roundtrip("☃", "☃");
+ }
+
+ #[test]
+ fn print_class() {
+ roundtrip(r"[a]", r"[a]");
+ roundtrip(r"[a-z]", r"[a-z]");
+ roundtrip(r"[a-z--b-c--x-y]", r"[ad-wz]");
+ roundtrip(r"[^\x01-\u{10FFFF}]", "[\u{0}]");
+ roundtrip(r"[-]", r"[\-]");
+ roundtrip(r"[☃-⛄]", r"[☃-⛄]");
+
+ roundtrip(r"(?-u)[a]", r"(?-u:[a])");
+ roundtrip(r"(?-u)[a-z]", r"(?-u:[a-z])");
+ roundtrip_bytes(r"(?-u)[a-\xFF]", r"(?-u:[a-\xFF])");
+
+ // The following test that the printer escapes meta characters
+ // in character classes.
+ roundtrip(r"[\[]", r"[\[]");
+ roundtrip(r"[Z-_]", r"[Z-_]");
+ roundtrip(r"[Z-_--Z]", r"[\[-_]");
+
+ // The following test that the printer escapes meta characters
+ // in byte oriented character classes.
+ roundtrip_bytes(r"(?-u)[\[]", r"(?-u:[\[])");
+ roundtrip_bytes(r"(?-u)[Z-_]", r"(?-u:[Z-_])");
+ roundtrip_bytes(r"(?-u)[Z-_--Z]", r"(?-u:[\[-_])");
+ }
+
+ #[test]
+ fn print_anchor() {
+ roundtrip(r"^", r"\A");
+ roundtrip(r"$", r"\z");
+ roundtrip(r"(?m)^", r"(?m:^)");
+ roundtrip(r"(?m)$", r"(?m:$)");
+ }
+
+ #[test]
+ fn print_word_boundary() {
+ roundtrip(r"\b", r"\b");
+ roundtrip(r"\B", r"\B");
+ roundtrip(r"(?-u)\b", r"(?-u:\b)");
+ roundtrip_bytes(r"(?-u)\B", r"(?-u:\B)");
+ }
+
+ #[test]
+ fn print_repetition() {
+ roundtrip("a?", "a?");
+ roundtrip("a??", "a??");
+ roundtrip("(?U)a?", "a??");
+
+ roundtrip("a*", "a*");
+ roundtrip("a*?", "a*?");
+ roundtrip("(?U)a*", "a*?");
+
+ roundtrip("a+", "a+");
+ roundtrip("a+?", "a+?");
+ roundtrip("(?U)a+", "a+?");
+
+ roundtrip("a{1}", "a{1}");
+ roundtrip("a{1,}", "a{1,}");
+ roundtrip("a{1,5}", "a{1,5}");
+ roundtrip("a{1}?", "a{1}?");
+ roundtrip("a{1,}?", "a{1,}?");
+ roundtrip("a{1,5}?", "a{1,5}?");
+ roundtrip("(?U)a{1}", "a{1}?");
+ roundtrip("(?U)a{1,}", "a{1,}?");
+ roundtrip("(?U)a{1,5}", "a{1,5}?");
+ }
+
+ #[test]
+ fn print_group() {
+ roundtrip("()", "()");
+ roundtrip("(?P<foo>)", "(?P<foo>)");
+ roundtrip("(?:)", "(?:)");
+
+ roundtrip("(a)", "(a)");
+ roundtrip("(?P<foo>a)", "(?P<foo>a)");
+ roundtrip("(?:a)", "(?:a)");
+
+ roundtrip("((((a))))", "((((a))))");
+ }
+
+ #[test]
+ fn print_alternation() {
+ roundtrip("|", "|");
+ roundtrip("||", "||");
+
+ roundtrip("a|b", "a|b");
+ roundtrip("a|b|c", "a|b|c");
+ roundtrip("foo|bar|quux", "foo|bar|quux");
+ }
+}
diff --git a/vendor/regex-syntax/src/hir/translate.rs b/vendor/regex-syntax/src/hir/translate.rs
new file mode 100644
index 000000000..56afbbed8
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/translate.rs
@@ -0,0 +1,3211 @@
+/*!
+Defines a translator that converts an `Ast` to an `Hir`.
+*/
+
+use std::cell::{Cell, RefCell};
+use std::result;
+
+use crate::ast::{self, Ast, Span, Visitor};
+use crate::hir::{self, Error, ErrorKind, Hir};
+use crate::unicode::{self, ClassQuery};
+
+type Result<T> = result::Result<T, Error>;
+
+/// A builder for constructing an AST->HIR translator.
+#[derive(Clone, Debug)]
+pub struct TranslatorBuilder {
+ allow_invalid_utf8: bool,
+ flags: Flags,
+}
+
+impl Default for TranslatorBuilder {
+ fn default() -> TranslatorBuilder {
+ TranslatorBuilder::new()
+ }
+}
+
+impl TranslatorBuilder {
+ /// Create a new translator builder with a default c onfiguration.
+ pub fn new() -> TranslatorBuilder {
+ TranslatorBuilder {
+ allow_invalid_utf8: false,
+ flags: Flags::default(),
+ }
+ }
+
+ /// Build a translator using the current configuration.
+ pub fn build(&self) -> Translator {
+ Translator {
+ stack: RefCell::new(vec![]),
+ flags: Cell::new(self.flags),
+ allow_invalid_utf8: self.allow_invalid_utf8,
+ }
+ }
+
+ /// When enabled, translation will permit the construction of a regular
+ /// expression that may match invalid UTF-8.
+ ///
+ /// When disabled (the default), the translator is guaranteed to produce
+ /// an expression that will only ever match valid UTF-8 (otherwise, the
+ /// translator will return an error).
+ ///
+ /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
+ /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
+ /// the parser to return an error. Namely, a negated ASCII word boundary
+ /// can result in matching positions that aren't valid UTF-8 boundaries.
+ pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
+ self.allow_invalid_utf8 = yes;
+ self
+ }
+
+ /// Enable or disable the case insensitive flag (`i`) by default.
+ pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
+ self.flags.case_insensitive = if yes { Some(true) } else { None };
+ self
+ }
+
+ /// Enable or disable the multi-line matching flag (`m`) by default.
+ pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
+ self.flags.multi_line = if yes { Some(true) } else { None };
+ self
+ }
+
+ /// Enable or disable the "dot matches any character" flag (`s`) by
+ /// default.
+ pub fn dot_matches_new_line(
+ &mut self,
+ yes: bool,
+ ) -> &mut TranslatorBuilder {
+ self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
+ self
+ }
+
+ /// Enable or disable the "swap greed" flag (`U`) by default.
+ pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
+ self.flags.swap_greed = if yes { Some(true) } else { None };
+ self
+ }
+
+ /// Enable or disable the Unicode flag (`u`) by default.
+ pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
+ self.flags.unicode = if yes { None } else { Some(false) };
+ self
+ }
+}
+
+/// A translator maps abstract syntax to a high level intermediate
+/// representation.
+///
+/// A translator may be benefit from reuse. That is, a translator can translate
+/// many abstract syntax trees.
+///
+/// A `Translator` can be configured in more detail via a
+/// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Translator {
+ /// Our call stack, but on the heap.
+ stack: RefCell<Vec<HirFrame>>,
+ /// The current flag settings.
+ flags: Cell<Flags>,
+ /// Whether we're allowed to produce HIR that can match arbitrary bytes.
+ allow_invalid_utf8: bool,
+}
+
+impl Translator {
+ /// Create a new translator using the default configuration.
+ pub fn new() -> Translator {
+ TranslatorBuilder::new().build()
+ }
+
+ /// Translate the given abstract syntax tree (AST) into a high level
+ /// intermediate representation (HIR).
+ ///
+ /// If there was a problem doing the translation, then an HIR-specific
+ /// error is returned.
+ ///
+ /// The original pattern string used to produce the `Ast` *must* also be
+ /// provided. The translator does not use the pattern string during any
+ /// correct translation, but is used for error reporting.
+ pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
+ ast::visit(ast, TranslatorI::new(self, pattern))
+ }
+}
+
+/// An HirFrame is a single stack frame, represented explicitly, which is
+/// created for each item in the Ast that we traverse.
+///
+/// Note that technically, this type doesn't represent our entire stack
+/// frame. In particular, the Ast visitor represents any state associated with
+/// traversing the Ast itself.
+#[derive(Clone, Debug)]
+enum HirFrame {
+ /// An arbitrary HIR expression. These get pushed whenever we hit a base
+ /// case in the Ast. They get popped after an inductive (i.e., recursive)
+ /// step is complete.
+ Expr(Hir),
+ /// A Unicode character class. This frame is mutated as we descend into
+ /// the Ast of a character class (which is itself its own mini recursive
+ /// structure).
+ ClassUnicode(hir::ClassUnicode),
+ /// A byte-oriented character class. This frame is mutated as we descend
+ /// into the Ast of a character class (which is itself its own mini
+ /// recursive structure).
+ ///
+ /// Byte character classes are created when Unicode mode (`u`) is disabled.
+ /// If `allow_invalid_utf8` is disabled (the default), then a byte
+ /// character is only permitted to match ASCII text.
+ ClassBytes(hir::ClassBytes),
+ /// This is pushed on to the stack upon first seeing any kind of group,
+ /// indicated by parentheses (including non-capturing groups). It is popped
+ /// upon leaving a group.
+ Group {
+ /// The old active flags when this group was opened.
+ ///
+ /// If this group sets flags, then the new active flags are set to the
+ /// result of merging the old flags with the flags introduced by this
+ /// group. If the group doesn't set any flags, then this is simply
+ /// equivalent to whatever flags were set when the group was opened.
+ ///
+ /// When this group is popped, the active flags should be restored to
+ /// the flags set here.
+ ///
+ /// The "active" flags correspond to whatever flags are set in the
+ /// Translator.
+ old_flags: Flags,
+ },
+ /// This is pushed whenever a concatenation is observed. After visiting
+ /// every sub-expression in the concatenation, the translator's stack is
+ /// popped until it sees a Concat frame.
+ Concat,
+ /// This is pushed whenever an alternation is observed. After visiting
+ /// every sub-expression in the alternation, the translator's stack is
+ /// popped until it sees an Alternation frame.
+ Alternation,
+}
+
+impl HirFrame {
+ /// Assert that the current stack frame is an Hir expression and return it.
+ fn unwrap_expr(self) -> Hir {
+ match self {
+ HirFrame::Expr(expr) => expr,
+ _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
+ }
+ }
+
+ /// Assert that the current stack frame is a Unicode class expression and
+ /// return it.
+ fn unwrap_class_unicode(self) -> hir::ClassUnicode {
+ match self {
+ HirFrame::ClassUnicode(cls) => cls,
+ _ => panic!(
+ "tried to unwrap Unicode class \
+ from HirFrame, got: {:?}",
+ self
+ ),
+ }
+ }
+
+ /// Assert that the current stack frame is a byte class expression and
+ /// return it.
+ fn unwrap_class_bytes(self) -> hir::ClassBytes {
+ match self {
+ HirFrame::ClassBytes(cls) => cls,
+ _ => panic!(
+ "tried to unwrap byte class \
+ from HirFrame, got: {:?}",
+ self
+ ),
+ }
+ }
+
+ /// Assert that the current stack frame is a group indicator and return
+ /// its corresponding flags (the flags that were active at the time the
+ /// group was entered).
+ fn unwrap_group(self) -> Flags {
+ match self {
+ HirFrame::Group { old_flags } => old_flags,
+ _ => {
+ panic!("tried to unwrap group from HirFrame, got: {:?}", self)
+ }
+ }
+ }
+}
+
+impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
+ type Output = Hir;
+ type Err = Error;
+
+ fn finish(self) -> Result<Hir> {
+ // ... otherwise, we should have exactly one HIR on the stack.
+ assert_eq!(self.trans().stack.borrow().len(), 1);
+ Ok(self.pop().unwrap().unwrap_expr())
+ }
+
+ fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+ match *ast {
+ Ast::Class(ast::Class::Bracketed(_)) => {
+ if self.flags().unicode() {
+ let cls = hir::ClassUnicode::empty();
+ self.push(HirFrame::ClassUnicode(cls));
+ } else {
+ let cls = hir::ClassBytes::empty();
+ self.push(HirFrame::ClassBytes(cls));
+ }
+ }
+ Ast::Group(ref x) => {
+ let old_flags = x
+ .flags()
+ .map(|ast| self.set_flags(ast))
+ .unwrap_or_else(|| self.flags());
+ self.push(HirFrame::Group { old_flags });
+ }
+ Ast::Concat(ref x) if x.asts.is_empty() => {}
+ Ast::Concat(_) => {
+ self.push(HirFrame::Concat);
+ }
+ Ast::Alternation(ref x) if x.asts.is_empty() => {}
+ Ast::Alternation(_) => {
+ self.push(HirFrame::Alternation);
+ }
+ _ => {}
+ }
+ Ok(())
+ }
+
+ fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+ match *ast {
+ Ast::Empty(_) => {
+ self.push(HirFrame::Expr(Hir::empty()));
+ }
+ Ast::Flags(ref x) => {
+ self.set_flags(&x.flags);
+ // Flags in the AST are generally considered directives and
+ // not actual sub-expressions. However, they can be used in
+ // the concrete syntax like `((?i))`, and we need some kind of
+ // indication of an expression there, and Empty is the correct
+ // choice.
+ //
+ // There can also be things like `(?i)+`, but we rule those out
+ // in the parser. In the future, we might allow them for
+ // consistency sake.
+ self.push(HirFrame::Expr(Hir::empty()));
+ }
+ Ast::Literal(ref x) => {
+ self.push(HirFrame::Expr(self.hir_literal(x)?));
+ }
+ Ast::Dot(span) => {
+ self.push(HirFrame::Expr(self.hir_dot(span)?));
+ }
+ Ast::Assertion(ref x) => {
+ self.push(HirFrame::Expr(self.hir_assertion(x)?));
+ }
+ Ast::Class(ast::Class::Perl(ref x)) => {
+ if self.flags().unicode() {
+ let cls = self.hir_perl_unicode_class(x)?;
+ let hcls = hir::Class::Unicode(cls);
+ self.push(HirFrame::Expr(Hir::class(hcls)));
+ } else {
+ let cls = self.hir_perl_byte_class(x);
+ let hcls = hir::Class::Bytes(cls);
+ self.push(HirFrame::Expr(Hir::class(hcls)));
+ }
+ }
+ Ast::Class(ast::Class::Unicode(ref x)) => {
+ let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
+ self.push(HirFrame::Expr(Hir::class(cls)));
+ }
+ Ast::Class(ast::Class::Bracketed(ref ast)) => {
+ if self.flags().unicode() {
+ let mut cls = self.pop().unwrap().unwrap_class_unicode();
+ self.unicode_fold_and_negate(
+ &ast.span,
+ ast.negated,
+ &mut cls,
+ )?;
+ if cls.ranges().is_empty() {
+ return Err(self.error(
+ ast.span,
+ ErrorKind::EmptyClassNotAllowed,
+ ));
+ }
+ let expr = Hir::class(hir::Class::Unicode(cls));
+ self.push(HirFrame::Expr(expr));
+ } else {
+ let mut cls = self.pop().unwrap().unwrap_class_bytes();
+ self.bytes_fold_and_negate(
+ &ast.span,
+ ast.negated,
+ &mut cls,
+ )?;
+ if cls.ranges().is_empty() {
+ return Err(self.error(
+ ast.span,
+ ErrorKind::EmptyClassNotAllowed,
+ ));
+ }
+
+ let expr = Hir::class(hir::Class::Bytes(cls));
+ self.push(HirFrame::Expr(expr));
+ }
+ }
+ Ast::Repetition(ref x) => {
+ let expr = self.pop().unwrap().unwrap_expr();
+ self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
+ }
+ Ast::Group(ref x) => {
+ let expr = self.pop().unwrap().unwrap_expr();
+ let old_flags = self.pop().unwrap().unwrap_group();
+ self.trans().flags.set(old_flags);
+ self.push(HirFrame::Expr(self.hir_group(x, expr)));
+ }
+ Ast::Concat(_) => {
+ let mut exprs = vec![];
+ while let Some(HirFrame::Expr(expr)) = self.pop() {
+ if !expr.kind().is_empty() {
+ exprs.push(expr);
+ }
+ }
+ exprs.reverse();
+ self.push(HirFrame::Expr(Hir::concat(exprs)));
+ }
+ Ast::Alternation(_) => {
+ let mut exprs = vec![];
+ while let Some(HirFrame::Expr(expr)) = self.pop() {
+ exprs.push(expr);
+ }
+ exprs.reverse();
+ self.push(HirFrame::Expr(Hir::alternation(exprs)));
+ }
+ }
+ Ok(())
+ }
+
+ fn visit_class_set_item_pre(
+ &mut self,
+ ast: &ast::ClassSetItem,
+ ) -> Result<()> {
+ match *ast {
+ ast::ClassSetItem::Bracketed(_) => {
+ if self.flags().unicode() {
+ let cls = hir::ClassUnicode::empty();
+ self.push(HirFrame::ClassUnicode(cls));
+ } else {
+ let cls = hir::ClassBytes::empty();
+ self.push(HirFrame::ClassBytes(cls));
+ }
+ }
+ // We needn't handle the Union case here since the visitor will
+ // do it for us.
+ _ => {}
+ }
+ Ok(())
+ }
+
+ fn visit_class_set_item_post(
+ &mut self,
+ ast: &ast::ClassSetItem,
+ ) -> Result<()> {
+ match *ast {
+ ast::ClassSetItem::Empty(_) => {}
+ ast::ClassSetItem::Literal(ref x) => {
+ if self.flags().unicode() {
+ let mut cls = self.pop().unwrap().unwrap_class_unicode();
+ cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
+ self.push(HirFrame::ClassUnicode(cls));
+ } else {
+ let mut cls = self.pop().unwrap().unwrap_class_bytes();
+ let byte = self.class_literal_byte(x)?;
+ cls.push(hir::ClassBytesRange::new(byte, byte));
+ self.push(HirFrame::ClassBytes(cls));
+ }
+ }
+ ast::ClassSetItem::Range(ref x) => {
+ if self.flags().unicode() {
+ let mut cls = self.pop().unwrap().unwrap_class_unicode();
+ cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
+ self.push(HirFrame::ClassUnicode(cls));
+ } else {
+ let mut cls = self.pop().unwrap().unwrap_class_bytes();
+ let start = self.class_literal_byte(&x.start)?;
+ let end = self.class_literal_byte(&x.end)?;
+ cls.push(hir::ClassBytesRange::new(start, end));
+ self.push(HirFrame::ClassBytes(cls));
+ }
+ }
+ ast::ClassSetItem::Ascii(ref x) => {
+ if self.flags().unicode() {
+ let xcls = self.hir_ascii_unicode_class(x)?;
+ let mut cls = self.pop().unwrap().unwrap_class_unicode();
+ cls.union(&xcls);
+ self.push(HirFrame::ClassUnicode(cls));
+ } else {
+ let xcls = self.hir_ascii_byte_class(x)?;
+ let mut cls = self.pop().unwrap().unwrap_class_bytes();
+ cls.union(&xcls);
+ self.push(HirFrame::ClassBytes(cls));
+ }
+ }
+ ast::ClassSetItem::Unicode(ref x) => {
+ let xcls = self.hir_unicode_class(x)?;
+ let mut cls = self.pop().unwrap().unwrap_class_unicode();
+ cls.union(&xcls);
+ self.push(HirFrame::ClassUnicode(cls));
+ }
+ ast::ClassSetItem::Perl(ref x) => {
+ if self.flags().unicode() {
+ let xcls = self.hir_perl_unicode_class(x)?;
+ let mut cls = self.pop().unwrap().unwrap_class_unicode();
+ cls.union(&xcls);
+ self.push(HirFrame::ClassUnicode(cls));
+ } else {
+ let xcls = self.hir_perl_byte_class(x);
+ let mut cls = self.pop().unwrap().unwrap_class_bytes();
+ cls.union(&xcls);
+ self.push(HirFrame::ClassBytes(cls));
+ }
+ }
+ ast::ClassSetItem::Bracketed(ref ast) => {
+ if self.flags().unicode() {
+ let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
+ self.unicode_fold_and_negate(
+ &ast.span,
+ ast.negated,
+ &mut cls1,
+ )?;
+
+ let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
+ cls2.union(&cls1);
+ self.push(HirFrame::ClassUnicode(cls2));
+ } else {
+ let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
+ self.bytes_fold_and_negate(
+ &ast.span,
+ ast.negated,
+ &mut cls1,
+ )?;
+
+ let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
+ cls2.union(&cls1);
+ self.push(HirFrame::ClassBytes(cls2));
+ }
+ }
+ // This is handled automatically by the visitor.
+ ast::ClassSetItem::Union(_) => {}
+ }
+ Ok(())
+ }
+
+ fn visit_class_set_binary_op_pre(
+ &mut self,
+ _op: &ast::ClassSetBinaryOp,
+ ) -> Result<()> {
+ if self.flags().unicode() {
+ let cls = hir::ClassUnicode::empty();
+ self.push(HirFrame::ClassUnicode(cls));
+ } else {
+ let cls = hir::ClassBytes::empty();
+ self.push(HirFrame::ClassBytes(cls));
+ }
+ Ok(())
+ }
+
+ fn visit_class_set_binary_op_in(
+ &mut self,
+ _op: &ast::ClassSetBinaryOp,
+ ) -> Result<()> {
+ if self.flags().unicode() {
+ let cls = hir::ClassUnicode::empty();
+ self.push(HirFrame::ClassUnicode(cls));
+ } else {
+ let cls = hir::ClassBytes::empty();
+ self.push(HirFrame::ClassBytes(cls));
+ }
+ Ok(())
+ }
+
+ fn visit_class_set_binary_op_post(
+ &mut self,
+ op: &ast::ClassSetBinaryOp,
+ ) -> Result<()> {
+ use crate::ast::ClassSetBinaryOpKind::*;
+
+ if self.flags().unicode() {
+ let mut rhs = self.pop().unwrap().unwrap_class_unicode();
+ let mut lhs = self.pop().unwrap().unwrap_class_unicode();
+ let mut cls = self.pop().unwrap().unwrap_class_unicode();
+ if self.flags().case_insensitive() {
+ rhs.try_case_fold_simple().map_err(|_| {
+ self.error(
+ op.rhs.span().clone(),
+ ErrorKind::UnicodeCaseUnavailable,
+ )
+ })?;
+ lhs.try_case_fold_simple().map_err(|_| {
+ self.error(
+ op.lhs.span().clone(),
+ ErrorKind::UnicodeCaseUnavailable,
+ )
+ })?;
+ }
+ match op.kind {
+ Intersection => lhs.intersect(&rhs),
+ Difference => lhs.difference(&rhs),
+ SymmetricDifference => lhs.symmetric_difference(&rhs),
+ }
+ cls.union(&lhs);
+ self.push(HirFrame::ClassUnicode(cls));
+ } else {
+ let mut rhs = self.pop().unwrap().unwrap_class_bytes();
+ let mut lhs = self.pop().unwrap().unwrap_class_bytes();
+ let mut cls = self.pop().unwrap().unwrap_class_bytes();
+ if self.flags().case_insensitive() {
+ rhs.case_fold_simple();
+ lhs.case_fold_simple();
+ }
+ match op.kind {
+ Intersection => lhs.intersect(&rhs),
+ Difference => lhs.difference(&rhs),
+ SymmetricDifference => lhs.symmetric_difference(&rhs),
+ }
+ cls.union(&lhs);
+ self.push(HirFrame::ClassBytes(cls));
+ }
+ Ok(())
+ }
+}
+
+/// The internal implementation of a translator.
+///
+/// This type is responsible for carrying around the original pattern string,
+/// which is not tied to the internal state of a translator.
+///
+/// A TranslatorI exists for the time it takes to translate a single Ast.
+#[derive(Clone, Debug)]
+struct TranslatorI<'t, 'p> {
+ trans: &'t Translator,
+ pattern: &'p str,
+}
+
+impl<'t, 'p> TranslatorI<'t, 'p> {
+ /// Build a new internal translator.
+ fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
+ TranslatorI { trans: trans, pattern: pattern }
+ }
+
+ /// Return a reference to the underlying translator.
+ fn trans(&self) -> &Translator {
+ &self.trans
+ }
+
+ /// Push the given frame on to the call stack.
+ fn push(&self, frame: HirFrame) {
+ self.trans().stack.borrow_mut().push(frame);
+ }
+
+ /// Pop the top of the call stack. If the call stack is empty, return None.
+ fn pop(&self) -> Option<HirFrame> {
+ self.trans().stack.borrow_mut().pop()
+ }
+
+ /// Create a new error with the given span and error type.
+ fn error(&self, span: Span, kind: ErrorKind) -> Error {
+ Error { kind: kind, pattern: self.pattern.to_string(), span: span }
+ }
+
+ /// Return a copy of the active flags.
+ fn flags(&self) -> Flags {
+ self.trans().flags.get()
+ }
+
+ /// Set the flags of this translator from the flags set in the given AST.
+ /// Then, return the old flags.
+ fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
+ let old_flags = self.flags();
+ let mut new_flags = Flags::from_ast(ast_flags);
+ new_flags.merge(&old_flags);
+ self.trans().flags.set(new_flags);
+ old_flags
+ }
+
+ fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
+ let ch = match self.literal_to_char(lit)? {
+ byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
+ hir::Literal::Unicode(ch) => ch,
+ };
+ if self.flags().case_insensitive() {
+ self.hir_from_char_case_insensitive(lit.span, ch)
+ } else {
+ self.hir_from_char(lit.span, ch)
+ }
+ }
+
+ /// Convert an Ast literal to its scalar representation.
+ ///
+ /// When Unicode mode is enabled, then this always succeeds and returns a
+ /// `char` (Unicode scalar value).
+ ///
+ /// When Unicode mode is disabled, then a raw byte is returned. If that
+ /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
+ /// an error.
+ fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
+ if self.flags().unicode() {
+ return Ok(hir::Literal::Unicode(lit.c));
+ }
+ let byte = match lit.byte() {
+ None => return Ok(hir::Literal::Unicode(lit.c)),
+ Some(byte) => byte,
+ };
+ if byte <= 0x7F {
+ return Ok(hir::Literal::Unicode(byte as char));
+ }
+ if !self.trans().allow_invalid_utf8 {
+ return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
+ }
+ Ok(hir::Literal::Byte(byte))
+ }
+
+ fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
+ if !self.flags().unicode() && c.len_utf8() > 1 {
+ return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+ }
+ Ok(Hir::literal(hir::Literal::Unicode(c)))
+ }
+
+ fn hir_from_char_case_insensitive(
+ &self,
+ span: Span,
+ c: char,
+ ) -> Result<Hir> {
+ if self.flags().unicode() {
+ // If case folding won't do anything, then don't bother trying.
+ let map =
+ unicode::contains_simple_case_mapping(c, c).map_err(|_| {
+ self.error(span, ErrorKind::UnicodeCaseUnavailable)
+ })?;
+ if !map {
+ return self.hir_from_char(span, c);
+ }
+ let mut cls =
+ hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
+ c, c,
+ )]);
+ cls.try_case_fold_simple().map_err(|_| {
+ self.error(span, ErrorKind::UnicodeCaseUnavailable)
+ })?;
+ Ok(Hir::class(hir::Class::Unicode(cls)))
+ } else {
+ if c.len_utf8() > 1 {
+ return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+ }
+ // If case folding won't do anything, then don't bother trying.
+ match c {
+ 'A'..='Z' | 'a'..='z' => {}
+ _ => return self.hir_from_char(span, c),
+ }
+ let mut cls =
+ hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
+ c as u8, c as u8,
+ )]);
+ cls.case_fold_simple();
+ Ok(Hir::class(hir::Class::Bytes(cls)))
+ }
+ }
+
+ fn hir_dot(&self, span: Span) -> Result<Hir> {
+ let unicode = self.flags().unicode();
+ if !unicode && !self.trans().allow_invalid_utf8 {
+ return Err(self.error(span, ErrorKind::InvalidUtf8));
+ }
+ Ok(if self.flags().dot_matches_new_line() {
+ Hir::any(!unicode)
+ } else {
+ Hir::dot(!unicode)
+ })
+ }
+
+ fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
+ let unicode = self.flags().unicode();
+ let multi_line = self.flags().multi_line();
+ Ok(match asst.kind {
+ ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
+ hir::Anchor::StartLine
+ } else {
+ hir::Anchor::StartText
+ }),
+ ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
+ hir::Anchor::EndLine
+ } else {
+ hir::Anchor::EndText
+ }),
+ ast::AssertionKind::StartText => {
+ Hir::anchor(hir::Anchor::StartText)
+ }
+ ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
+ ast::AssertionKind::WordBoundary => {
+ Hir::word_boundary(if unicode {
+ hir::WordBoundary::Unicode
+ } else {
+ hir::WordBoundary::Ascii
+ })
+ }
+ ast::AssertionKind::NotWordBoundary => {
+ Hir::word_boundary(if unicode {
+ hir::WordBoundary::UnicodeNegate
+ } else {
+ // It is possible for negated ASCII word boundaries to
+ // match at invalid UTF-8 boundaries, even when searching
+ // valid UTF-8.
+ if !self.trans().allow_invalid_utf8 {
+ return Err(
+ self.error(asst.span, ErrorKind::InvalidUtf8)
+ );
+ }
+ hir::WordBoundary::AsciiNegate
+ })
+ }
+ })
+ }
+
+ fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
+ let kind = match group.kind {
+ ast::GroupKind::CaptureIndex(idx) => {
+ hir::GroupKind::CaptureIndex(idx)
+ }
+ ast::GroupKind::CaptureName(ref capname) => {
+ hir::GroupKind::CaptureName {
+ name: capname.name.clone(),
+ index: capname.index,
+ }
+ }
+ ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
+ };
+ Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
+ }
+
+ fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
+ let kind = match rep.op.kind {
+ ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
+ ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
+ ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
+ ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
+ hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
+ }
+ ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
+ hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
+ }
+ ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
+ m,
+ n,
+ )) => {
+ hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
+ }
+ };
+ let greedy =
+ if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
+ Hir::repetition(hir::Repetition {
+ kind: kind,
+ greedy: greedy,
+ hir: Box::new(expr),
+ })
+ }
+
+ fn hir_unicode_class(
+ &self,
+ ast_class: &ast::ClassUnicode,
+ ) -> Result<hir::ClassUnicode> {
+ use crate::ast::ClassUnicodeKind::*;
+
+ if !self.flags().unicode() {
+ return Err(
+ self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
+ );
+ }
+ let query = match ast_class.kind {
+ OneLetter(name) => ClassQuery::OneLetter(name),
+ Named(ref name) => ClassQuery::Binary(name),
+ NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
+ property_name: name,
+ property_value: value,
+ },
+ };
+ let mut result = self.convert_unicode_class_error(
+ &ast_class.span,
+ unicode::class(query),
+ );
+ if let Ok(ref mut class) = result {
+ self.unicode_fold_and_negate(
+ &ast_class.span,
+ ast_class.negated,
+ class,
+ )?;
+ if class.ranges().is_empty() {
+ let err = self
+ .error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
+ return Err(err);
+ }
+ }
+ result
+ }
+
+ fn hir_ascii_unicode_class(
+ &self,
+ ast: &ast::ClassAscii,
+ ) -> Result<hir::ClassUnicode> {
+ let mut cls = hir::ClassUnicode::new(
+ ascii_class(&ast.kind)
+ .iter()
+ .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)),
+ );
+ self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
+ Ok(cls)
+ }
+
+ fn hir_ascii_byte_class(
+ &self,
+ ast: &ast::ClassAscii,
+ ) -> Result<hir::ClassBytes> {
+ let mut cls = hir::ClassBytes::new(
+ ascii_class(&ast.kind)
+ .iter()
+ .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)),
+ );
+ self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
+ Ok(cls)
+ }
+
+ fn hir_perl_unicode_class(
+ &self,
+ ast_class: &ast::ClassPerl,
+ ) -> Result<hir::ClassUnicode> {
+ use crate::ast::ClassPerlKind::*;
+
+ assert!(self.flags().unicode());
+ let result = match ast_class.kind {
+ Digit => unicode::perl_digit(),
+ Space => unicode::perl_space(),
+ Word => unicode::perl_word(),
+ };
+ let mut class =
+ self.convert_unicode_class_error(&ast_class.span, result)?;
+ // We needn't apply case folding here because the Perl Unicode classes
+ // are already closed under Unicode simple case folding.
+ if ast_class.negated {
+ class.negate();
+ }
+ Ok(class)
+ }
+
+ fn hir_perl_byte_class(
+ &self,
+ ast_class: &ast::ClassPerl,
+ ) -> hir::ClassBytes {
+ use crate::ast::ClassPerlKind::*;
+
+ assert!(!self.flags().unicode());
+ let mut class = match ast_class.kind {
+ Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
+ Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
+ Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
+ };
+ // We needn't apply case folding here because the Perl ASCII classes
+ // are already closed (under ASCII case folding).
+ if ast_class.negated {
+ class.negate();
+ }
+ class
+ }
+
+ /// Converts the given Unicode specific error to an HIR translation error.
+ ///
+ /// The span given should approximate the position at which an error would
+ /// occur.
+ fn convert_unicode_class_error(
+ &self,
+ span: &Span,
+ result: unicode::Result<hir::ClassUnicode>,
+ ) -> Result<hir::ClassUnicode> {
+ result.map_err(|err| {
+ let sp = span.clone();
+ match err {
+ unicode::Error::PropertyNotFound => {
+ self.error(sp, ErrorKind::UnicodePropertyNotFound)
+ }
+ unicode::Error::PropertyValueNotFound => {
+ self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
+ }
+ unicode::Error::PerlClassNotFound => {
+ self.error(sp, ErrorKind::UnicodePerlClassNotFound)
+ }
+ }
+ })
+ }
+
+ fn unicode_fold_and_negate(
+ &self,
+ span: &Span,
+ negated: bool,
+ class: &mut hir::ClassUnicode,
+ ) -> Result<()> {
+ // Note that we must apply case folding before negation!
+ // Consider `(?i)[^x]`. If we applied negation field, then
+ // the result would be the character class that matched any
+ // Unicode scalar value.
+ if self.flags().case_insensitive() {
+ class.try_case_fold_simple().map_err(|_| {
+ self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
+ })?;
+ }
+ if negated {
+ class.negate();
+ }
+ Ok(())
+ }
+
+ fn bytes_fold_and_negate(
+ &self,
+ span: &Span,
+ negated: bool,
+ class: &mut hir::ClassBytes,
+ ) -> Result<()> {
+ // Note that we must apply case folding before negation!
+ // Consider `(?i)[^x]`. If we applied negation first, then
+ // the result would be the character class that matched any
+ // Unicode scalar value.
+ if self.flags().case_insensitive() {
+ class.case_fold_simple();
+ }
+ if negated {
+ class.negate();
+ }
+ if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
+ return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
+ }
+ Ok(())
+ }
+
+ /// Return a scalar byte value suitable for use as a literal in a byte
+ /// character class.
+ fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
+ match self.literal_to_char(ast)? {
+ hir::Literal::Byte(byte) => Ok(byte),
+ hir::Literal::Unicode(ch) => {
+ if ch <= 0x7F as char {
+ Ok(ch as u8)
+ } else {
+ // We can't feasibly support Unicode in
+ // byte oriented classes. Byte classes don't
+ // do Unicode case folding.
+ Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
+ }
+ }
+ }
+ }
+}
+
+/// A translator's representation of a regular expression's flags at any given
+/// moment in time.
+///
+/// Each flag can be in one of three states: absent, present but disabled or
+/// present but enabled.
+#[derive(Clone, Copy, Debug, Default)]
+struct Flags {
+ case_insensitive: Option<bool>,
+ multi_line: Option<bool>,
+ dot_matches_new_line: Option<bool>,
+ swap_greed: Option<bool>,
+ unicode: Option<bool>,
+ // Note that `ignore_whitespace` is omitted here because it is handled
+ // entirely in the parser.
+}
+
+impl Flags {
+ fn from_ast(ast: &ast::Flags) -> Flags {
+ let mut flags = Flags::default();
+ let mut enable = true;
+ for item in &ast.items {
+ match item.kind {
+ ast::FlagsItemKind::Negation => {
+ enable = false;
+ }
+ ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
+ flags.case_insensitive = Some(enable);
+ }
+ ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
+ flags.multi_line = Some(enable);
+ }
+ ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
+ flags.dot_matches_new_line = Some(enable);
+ }
+ ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
+ flags.swap_greed = Some(enable);
+ }
+ ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
+ flags.unicode = Some(enable);
+ }
+ ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
+ }
+ }
+ flags
+ }
+
+ fn merge(&mut self, previous: &Flags) {
+ if self.case_insensitive.is_none() {
+ self.case_insensitive = previous.case_insensitive;
+ }
+ if self.multi_line.is_none() {
+ self.multi_line = previous.multi_line;
+ }
+ if self.dot_matches_new_line.is_none() {
+ self.dot_matches_new_line = previous.dot_matches_new_line;
+ }
+ if self.swap_greed.is_none() {
+ self.swap_greed = previous.swap_greed;
+ }
+ if self.unicode.is_none() {
+ self.unicode = previous.unicode;
+ }
+ }
+
+ fn case_insensitive(&self) -> bool {
+ self.case_insensitive.unwrap_or(false)
+ }
+
+ fn multi_line(&self) -> bool {
+ self.multi_line.unwrap_or(false)
+ }
+
+ fn dot_matches_new_line(&self) -> bool {
+ self.dot_matches_new_line.unwrap_or(false)
+ }
+
+ fn swap_greed(&self) -> bool {
+ self.swap_greed.unwrap_or(false)
+ }
+
+ fn unicode(&self) -> bool {
+ self.unicode.unwrap_or(true)
+ }
+}
+
+fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
+ let ranges: Vec<_> = ascii_class(kind)
+ .iter()
+ .cloned()
+ .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
+ .collect();
+ hir::ClassBytes::new(ranges)
+}
+
+fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
+ use crate::ast::ClassAsciiKind::*;
+ match *kind {
+ Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
+ Alpha => &[('A', 'Z'), ('a', 'z')],
+ Ascii => &[('\x00', '\x7F')],
+ Blank => &[('\t', '\t'), (' ', ' ')],
+ Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
+ Digit => &[('0', '9')],
+ Graph => &[('!', '~')],
+ Lower => &[('a', 'z')],
+ Print => &[(' ', '~')],
+ Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
+ Space => &[
+ ('\t', '\t'),
+ ('\n', '\n'),
+ ('\x0B', '\x0B'),
+ ('\x0C', '\x0C'),
+ ('\r', '\r'),
+ (' ', ' '),
+ ],
+ Upper => &[('A', 'Z')],
+ Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
+ Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::ast::parse::ParserBuilder;
+ use crate::ast::{self, Ast, Position, Span};
+ use crate::hir::{self, Hir, HirKind};
+ use crate::unicode::{self, ClassQuery};
+
+ use super::{ascii_class, TranslatorBuilder};
+
+ // We create these errors to compare with real hir::Errors in the tests.
+ // We define equality between TestError and hir::Error to disregard the
+ // pattern string in hir::Error, which is annoying to provide in tests.
+ #[derive(Clone, Debug)]
+ struct TestError {
+ span: Span,
+ kind: hir::ErrorKind,
+ }
+
+ impl PartialEq<hir::Error> for TestError {
+ fn eq(&self, other: &hir::Error) -> bool {
+ self.span == other.span && self.kind == other.kind
+ }
+ }
+
+ impl PartialEq<TestError> for hir::Error {
+ fn eq(&self, other: &TestError) -> bool {
+ self.span == other.span && self.kind == other.kind
+ }
+ }
+
+ fn parse(pattern: &str) -> Ast {
+ ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
+ }
+
+ fn t(pattern: &str) -> Hir {
+ TranslatorBuilder::new()
+ .allow_invalid_utf8(false)
+ .build()
+ .translate(pattern, &parse(pattern))
+ .unwrap()
+ }
+
+ fn t_err(pattern: &str) -> hir::Error {
+ TranslatorBuilder::new()
+ .allow_invalid_utf8(false)
+ .build()
+ .translate(pattern, &parse(pattern))
+ .unwrap_err()
+ }
+
+ fn t_bytes(pattern: &str) -> Hir {
+ TranslatorBuilder::new()
+ .allow_invalid_utf8(true)
+ .build()
+ .translate(pattern, &parse(pattern))
+ .unwrap()
+ }
+
+ fn hir_lit(s: &str) -> Hir {
+ match s.len() {
+ 0 => Hir::empty(),
+ _ => {
+ let lits = s
+ .chars()
+ .map(hir::Literal::Unicode)
+ .map(Hir::literal)
+ .collect();
+ Hir::concat(lits)
+ }
+ }
+ }
+
+ fn hir_blit(s: &[u8]) -> Hir {
+ match s.len() {
+ 0 => Hir::empty(),
+ 1 => Hir::literal(hir::Literal::Byte(s[0])),
+ _ => {
+ let lits = s
+ .iter()
+ .cloned()
+ .map(hir::Literal::Byte)
+ .map(Hir::literal)
+ .collect();
+ Hir::concat(lits)
+ }
+ }
+ }
+
+ fn hir_group(i: u32, expr: Hir) -> Hir {
+ Hir::group(hir::Group {
+ kind: hir::GroupKind::CaptureIndex(i),
+ hir: Box::new(expr),
+ })
+ }
+
+ fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
+ Hir::group(hir::Group {
+ kind: hir::GroupKind::CaptureName {
+ name: name.to_string(),
+ index: i,
+ },
+ hir: Box::new(expr),
+ })
+ }
+
+ fn hir_group_nocap(expr: Hir) -> Hir {
+ Hir::group(hir::Group {
+ kind: hir::GroupKind::NonCapturing,
+ hir: Box::new(expr),
+ })
+ }
+
+ fn hir_quest(greedy: bool, expr: Hir) -> Hir {
+ Hir::repetition(hir::Repetition {
+ kind: hir::RepetitionKind::ZeroOrOne,
+ greedy: greedy,
+ hir: Box::new(expr),
+ })
+ }
+
+ fn hir_star(greedy: bool, expr: Hir) -> Hir {
+ Hir::repetition(hir::Repetition {
+ kind: hir::RepetitionKind::ZeroOrMore,
+ greedy: greedy,
+ hir: Box::new(expr),
+ })
+ }
+
+ fn hir_plus(greedy: bool, expr: Hir) -> Hir {
+ Hir::repetition(hir::Repetition {
+ kind: hir::RepetitionKind::OneOrMore,
+ greedy: greedy,
+ hir: Box::new(expr),
+ })
+ }
+
+ fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
+ Hir::repetition(hir::Repetition {
+ kind: hir::RepetitionKind::Range(range),
+ greedy: greedy,
+ hir: Box::new(expr),
+ })
+ }
+
+ fn hir_alt(alts: Vec<Hir>) -> Hir {
+ Hir::alternation(alts)
+ }
+
+ fn hir_cat(exprs: Vec<Hir>) -> Hir {
+ Hir::concat(exprs)
+ }
+
+ #[allow(dead_code)]
+ fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
+ Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
+ }
+
+ #[allow(dead_code)]
+ fn hir_uclass_perl_word() -> Hir {
+ Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
+ }
+
+ fn hir_uclass(ranges: &[(char, char)]) -> Hir {
+ let ranges: Vec<hir::ClassUnicodeRange> = ranges
+ .iter()
+ .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+ .collect();
+ Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
+ }
+
+ fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
+ let ranges: Vec<hir::ClassBytesRange> = ranges
+ .iter()
+ .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
+ .collect();
+ Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+ }
+
+ fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
+ let ranges: Vec<hir::ClassBytesRange> = ranges
+ .iter()
+ .map(|&(s, e)| {
+ assert!(s as u32 <= 0x7F);
+ assert!(e as u32 <= 0x7F);
+ hir::ClassBytesRange::new(s as u8, e as u8)
+ })
+ .collect();
+ Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+ }
+
+ fn hir_case_fold(expr: Hir) -> Hir {
+ match expr.into_kind() {
+ HirKind::Class(mut cls) => {
+ cls.case_fold_simple();
+ Hir::class(cls)
+ }
+ _ => panic!("cannot case fold non-class Hir expr"),
+ }
+ }
+
+ fn hir_negate(expr: Hir) -> Hir {
+ match expr.into_kind() {
+ HirKind::Class(mut cls) => {
+ cls.negate();
+ Hir::class(cls)
+ }
+ _ => panic!("cannot negate non-class Hir expr"),
+ }
+ }
+
+ #[allow(dead_code)]
+ fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
+ use crate::hir::Class::{Bytes, Unicode};
+
+ match (expr1.into_kind(), expr2.into_kind()) {
+ (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
+ c1.union(&c2);
+ Hir::class(hir::Class::Unicode(c1))
+ }
+ (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
+ c1.union(&c2);
+ Hir::class(hir::Class::Bytes(c1))
+ }
+ _ => panic!("cannot union non-class Hir exprs"),
+ }
+ }
+
+ #[allow(dead_code)]
+ fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
+ use crate::hir::Class::{Bytes, Unicode};
+
+ match (expr1.into_kind(), expr2.into_kind()) {
+ (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
+ c1.difference(&c2);
+ Hir::class(hir::Class::Unicode(c1))
+ }
+ (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
+ c1.difference(&c2);
+ Hir::class(hir::Class::Bytes(c1))
+ }
+ _ => panic!("cannot difference non-class Hir exprs"),
+ }
+ }
+
+ fn hir_anchor(anchor: hir::Anchor) -> Hir {
+ Hir::anchor(anchor)
+ }
+
+ fn hir_word(wb: hir::WordBoundary) -> Hir {
+ Hir::word_boundary(wb)
+ }
+
+ #[test]
+ fn empty() {
+ assert_eq!(t(""), Hir::empty());
+ assert_eq!(t("(?i)"), Hir::empty());
+ assert_eq!(t("()"), hir_group(1, Hir::empty()));
+ assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
+ assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
+ assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
+ assert_eq!(
+ t("()|()"),
+ hir_alt(vec![
+ hir_group(1, Hir::empty()),
+ hir_group(2, Hir::empty()),
+ ])
+ );
+ assert_eq!(
+ t("(|b)"),
+ hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
+ );
+ assert_eq!(
+ t("(a|)"),
+ hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
+ );
+ assert_eq!(
+ t("(a||c)"),
+ hir_group(
+ 1,
+ hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
+ )
+ );
+ assert_eq!(
+ t("(||)"),
+ hir_group(
+ 1,
+ hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
+ )
+ );
+ }
+
+ #[test]
+ fn literal() {
+ assert_eq!(t("a"), hir_lit("a"));
+ assert_eq!(t("(?-u)a"), hir_lit("a"));
+ assert_eq!(t("☃"), hir_lit("☃"));
+ assert_eq!(t("abcd"), hir_lit("abcd"));
+
+ assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
+ assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
+ assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
+ assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
+
+ assert_eq!(
+ t_err("(?-u)☃"),
+ TestError {
+ kind: hir::ErrorKind::UnicodeNotAllowed,
+ span: Span::new(
+ Position::new(5, 1, 6),
+ Position::new(8, 1, 7)
+ ),
+ }
+ );
+ assert_eq!(
+ t_err(r"(?-u)\xFF"),
+ TestError {
+ kind: hir::ErrorKind::InvalidUtf8,
+ span: Span::new(
+ Position::new(5, 1, 6),
+ Position::new(9, 1, 10)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ fn literal_case_insensitive() {
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i:a)"),
+ hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("a(?i)a(?-i)a"),
+ hir_cat(vec![
+ hir_lit("a"),
+ hir_uclass(&[('A', 'A'), ('a', 'a')]),
+ hir_lit("a"),
+ ])
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)ab@c"),
+ hir_cat(vec![
+ hir_uclass(&[('A', 'A'), ('a', 'a')]),
+ hir_uclass(&[('B', 'B'), ('b', 'b')]),
+ hir_lit("@"),
+ hir_uclass(&[('C', 'C'), ('c', 'c')]),
+ ])
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)β"),
+ hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('Ī', 'Ī'),])
+ );
+
+ assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?-u)a(?i)a(?-i)a"),
+ hir_cat(vec![
+ hir_lit("a"),
+ hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+ hir_lit("a"),
+ ])
+ );
+ assert_eq!(
+ t("(?i-u)ab@c"),
+ hir_cat(vec![
+ hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+ hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
+ hir_lit("@"),
+ hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
+ ])
+ );
+
+ assert_eq!(
+ t_bytes("(?i-u)a"),
+ hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+ );
+ assert_eq!(
+ t_bytes("(?i-u)\x61"),
+ hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+ );
+ assert_eq!(
+ t_bytes(r"(?i-u)\x61"),
+ hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+ );
+ assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
+
+ assert_eq!(
+ t_err("(?i-u)β"),
+ TestError {
+ kind: hir::ErrorKind::UnicodeNotAllowed,
+ span: Span::new(
+ Position::new(6, 1, 7),
+ Position::new(8, 1, 8),
+ ),
+ }
+ );
+ }
+
+ #[test]
+ fn dot() {
+ assert_eq!(
+ t("."),
+ hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
+ );
+ assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
+ assert_eq!(
+ t_bytes("(?-u)."),
+ hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
+ );
+ assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
+
+ // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
+ assert_eq!(
+ t_err("(?-u)."),
+ TestError {
+ kind: hir::ErrorKind::InvalidUtf8,
+ span: Span::new(
+ Position::new(5, 1, 6),
+ Position::new(6, 1, 7)
+ ),
+ }
+ );
+ assert_eq!(
+ t_err("(?s-u)."),
+ TestError {
+ kind: hir::ErrorKind::InvalidUtf8,
+ span: Span::new(
+ Position::new(6, 1, 7),
+ Position::new(7, 1, 8)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ fn assertions() {
+ assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
+ assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
+ assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
+ assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
+ assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
+ assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
+ assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
+ assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
+
+ assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
+ assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
+ assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
+ assert_eq!(
+ t_bytes(r"(?-u)\B"),
+ hir_word(hir::WordBoundary::AsciiNegate)
+ );
+
+ assert_eq!(
+ t_err(r"(?-u)\B"),
+ TestError {
+ kind: hir::ErrorKind::InvalidUtf8,
+ span: Span::new(
+ Position::new(5, 1, 6),
+ Position::new(7, 1, 8)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ fn group() {
+ assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
+ assert_eq!(
+ t("(a)(b)"),
+ hir_cat(vec![
+ hir_group(1, hir_lit("a")),
+ hir_group(2, hir_lit("b")),
+ ])
+ );
+ assert_eq!(
+ t("(a)|(b)"),
+ hir_alt(vec![
+ hir_group(1, hir_lit("a")),
+ hir_group(2, hir_lit("b")),
+ ])
+ );
+ assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
+ assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
+ assert_eq!(
+ t("(?P<foo>a)(?P<bar>b)"),
+ hir_cat(vec![
+ hir_group_name(1, "foo", hir_lit("a")),
+ hir_group_name(2, "bar", hir_lit("b")),
+ ])
+ );
+ assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
+ assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
+ assert_eq!(
+ t("(?:a)(b)"),
+ hir_cat(vec![
+ hir_group_nocap(hir_lit("a")),
+ hir_group(1, hir_lit("b")),
+ ])
+ );
+ assert_eq!(
+ t("(a)(?:b)(c)"),
+ hir_cat(vec![
+ hir_group(1, hir_lit("a")),
+ hir_group_nocap(hir_lit("b")),
+ hir_group(2, hir_lit("c")),
+ ])
+ );
+ assert_eq!(
+ t("(a)(?P<foo>b)(c)"),
+ hir_cat(vec![
+ hir_group(1, hir_lit("a")),
+ hir_group_name(2, "foo", hir_lit("b")),
+ hir_group(3, hir_lit("c")),
+ ])
+ );
+ assert_eq!(t("()"), hir_group(1, Hir::empty()));
+ assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
+ assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
+ assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
+ }
+
+ #[test]
+ fn flags() {
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i:a)a"),
+ hir_cat(vec![
+ hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
+ hir_lit("a"),
+ ])
+ );
+ assert_eq!(
+ t("(?i-u:a)β"),
+ hir_cat(vec![
+ hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+ hir_lit("β"),
+ ])
+ );
+ assert_eq!(
+ t("(?:(?i-u)a)b"),
+ hir_cat(vec![
+ hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+ hir_lit("b"),
+ ])
+ );
+ assert_eq!(
+ t("((?i-u)a)b"),
+ hir_cat(vec![
+ hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+ hir_lit("b"),
+ ])
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)(?-i:a)a"),
+ hir_cat(vec![
+ hir_group_nocap(hir_lit("a")),
+ hir_uclass(&[('A', 'A'), ('a', 'a')]),
+ ])
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?im)a^"),
+ hir_cat(vec![
+ hir_uclass(&[('A', 'A'), ('a', 'a')]),
+ hir_anchor(hir::Anchor::StartLine),
+ ])
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?im)a^(?i-m)a^"),
+ hir_cat(vec![
+ hir_uclass(&[('A', 'A'), ('a', 'a')]),
+ hir_anchor(hir::Anchor::StartLine),
+ hir_uclass(&[('A', 'A'), ('a', 'a')]),
+ hir_anchor(hir::Anchor::StartText),
+ ])
+ );
+ assert_eq!(
+ t("(?U)a*a*?(?-U)a*a*?"),
+ hir_cat(vec![
+ hir_star(false, hir_lit("a")),
+ hir_star(true, hir_lit("a")),
+ hir_star(true, hir_lit("a")),
+ hir_star(false, hir_lit("a")),
+ ])
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?:a(?i)a)a"),
+ hir_cat(vec![
+ hir_group_nocap(hir_cat(vec![
+ hir_lit("a"),
+ hir_uclass(&[('A', 'A'), ('a', 'a')]),
+ ])),
+ hir_lit("a"),
+ ])
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)(?:a(?-i)a)a"),
+ hir_cat(vec![
+ hir_group_nocap(hir_cat(vec![
+ hir_uclass(&[('A', 'A'), ('a', 'a')]),
+ hir_lit("a"),
+ ])),
+ hir_uclass(&[('A', 'A'), ('a', 'a')]),
+ ])
+ );
+ }
+
+ #[test]
+ fn escape() {
+ assert_eq!(
+ t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
+ hir_lit(r"\.+*?()|[]{}^$#")
+ );
+ }
+
+ #[test]
+ fn repetition() {
+ assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
+ assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
+ assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
+ assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
+ assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
+ assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
+
+ assert_eq!(
+ t("a{1}"),
+ hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
+ );
+ assert_eq!(
+ t("a{1,}"),
+ hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
+ );
+ assert_eq!(
+ t("a{1,2}"),
+ hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
+ );
+ assert_eq!(
+ t("a{1}?"),
+ hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
+ );
+ assert_eq!(
+ t("a{1,}?"),
+ hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
+ );
+ assert_eq!(
+ t("a{1,2}?"),
+ hir_range(
+ false,
+ hir::RepetitionRange::Bounded(1, 2),
+ hir_lit("a"),
+ )
+ );
+
+ assert_eq!(
+ t("ab?"),
+ hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
+ );
+ assert_eq!(
+ t("(ab)?"),
+ hir_quest(
+ true,
+ hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
+ )
+ );
+ assert_eq!(
+ t("a|b?"),
+ hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
+ );
+ }
+
+ #[test]
+ fn cat_alt() {
+ assert_eq!(
+ t("(ab)"),
+ hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
+ );
+ assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
+ assert_eq!(
+ t("a|b|c"),
+ hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
+ );
+ assert_eq!(
+ t("ab|bc|cd"),
+ hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
+ );
+ assert_eq!(
+ t("(a|b)"),
+ hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
+ );
+ assert_eq!(
+ t("(a|b|c)"),
+ hir_group(
+ 1,
+ hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
+ )
+ );
+ assert_eq!(
+ t("(ab|bc|cd)"),
+ hir_group(
+ 1,
+ hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
+ )
+ );
+ assert_eq!(
+ t("(ab|(bc|(cd)))"),
+ hir_group(
+ 1,
+ hir_alt(vec![
+ hir_lit("ab"),
+ hir_group(
+ 2,
+ hir_alt(vec![
+ hir_lit("bc"),
+ hir_group(3, hir_lit("cd")),
+ ])
+ ),
+ ])
+ )
+ );
+ }
+
+ #[test]
+ fn class_ascii() {
+ assert_eq!(
+ t("[[:alnum:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
+ );
+ assert_eq!(
+ t("[[:alpha:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
+ );
+ assert_eq!(
+ t("[[:ascii:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
+ );
+ assert_eq!(
+ t("[[:blank:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
+ );
+ assert_eq!(
+ t("[[:cntrl:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
+ );
+ assert_eq!(
+ t("[[:digit:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
+ );
+ assert_eq!(
+ t("[[:graph:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
+ );
+ assert_eq!(
+ t("[[:lower:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
+ );
+ assert_eq!(
+ t("[[:print:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
+ );
+ assert_eq!(
+ t("[[:punct:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
+ );
+ assert_eq!(
+ t("[[:space:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
+ );
+ assert_eq!(
+ t("[[:upper:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
+ );
+ assert_eq!(
+ t("[[:word:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
+ );
+ assert_eq!(
+ t("[[:xdigit:]]"),
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
+ );
+
+ assert_eq!(
+ t("[[:^lower:]]"),
+ hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)[[:lower:]]"),
+ hir_uclass(&[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('\u{17F}', '\u{17F}'),
+ ('\u{212A}', '\u{212A}'),
+ ])
+ );
+
+ assert_eq!(
+ t("(?-u)[[:lower:]]"),
+ hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
+ );
+ assert_eq!(
+ t("(?i-u)[[:lower:]]"),
+ hir_case_fold(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Lower
+ )))
+ );
+
+ assert_eq!(
+ t_err("(?-u)[[:^lower:]]"),
+ TestError {
+ kind: hir::ErrorKind::InvalidUtf8,
+ span: Span::new(
+ Position::new(6, 1, 7),
+ Position::new(16, 1, 17)
+ ),
+ }
+ );
+ assert_eq!(
+ t_err("(?i-u)[[:^lower:]]"),
+ TestError {
+ kind: hir::ErrorKind::InvalidUtf8,
+ span: Span::new(
+ Position::new(7, 1, 8),
+ Position::new(17, 1, 18)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ fn class_ascii_multiple() {
+ // See: https://github.com/rust-lang/regex/issues/680
+ assert_eq!(
+ t("[[:alnum:][:^ascii:]]"),
+ hir_union(
+ hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)),
+ hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
+ ),
+ );
+ assert_eq!(
+ t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
+ hir_union(
+ hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)),
+ hir_bclass(&[(0x80, 0xFF)]),
+ ),
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-perl")]
+ fn class_perl() {
+ // Unicode
+ assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
+ assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
+ assert_eq!(t(r"\w"), hir_uclass_perl_word());
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t(r"(?i)\d"),
+ hir_uclass_query(ClassQuery::Binary("digit"))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t(r"(?i)\s"),
+ hir_uclass_query(ClassQuery::Binary("space"))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
+
+ // Unicode, negated
+ assert_eq!(
+ t(r"\D"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+ );
+ assert_eq!(
+ t(r"\S"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
+ );
+ assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t(r"(?i)\D"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t(r"(?i)\S"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
+
+ // ASCII only
+ assert_eq!(
+ t(r"(?-u)\d"),
+ hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
+ );
+ assert_eq!(
+ t(r"(?-u)\s"),
+ hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
+ );
+ assert_eq!(
+ t(r"(?-u)\w"),
+ hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
+ );
+ assert_eq!(
+ t(r"(?i-u)\d"),
+ hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
+ );
+ assert_eq!(
+ t(r"(?i-u)\s"),
+ hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
+ );
+ assert_eq!(
+ t(r"(?i-u)\w"),
+ hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
+ );
+
+ // ASCII only, negated
+ assert_eq!(
+ t(r"(?-u)\D"),
+ hir_negate(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Digit
+ )))
+ );
+ assert_eq!(
+ t(r"(?-u)\S"),
+ hir_negate(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Space
+ )))
+ );
+ assert_eq!(
+ t(r"(?-u)\W"),
+ hir_negate(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Word
+ )))
+ );
+ assert_eq!(
+ t(r"(?i-u)\D"),
+ hir_negate(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Digit
+ )))
+ );
+ assert_eq!(
+ t(r"(?i-u)\S"),
+ hir_negate(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Space
+ )))
+ );
+ assert_eq!(
+ t(r"(?i-u)\W"),
+ hir_negate(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Word
+ )))
+ );
+ }
+
+ #[test]
+ #[cfg(not(feature = "unicode-perl"))]
+ fn class_perl_word_disabled() {
+ assert_eq!(
+ t_err(r"\w"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePerlClassNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(2, 1, 3)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
+ fn class_perl_space_disabled() {
+ assert_eq!(
+ t_err(r"\s"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePerlClassNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(2, 1, 3)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ #[cfg(all(
+ not(feature = "unicode-perl"),
+ not(feature = "unicode-gencat")
+ ))]
+ fn class_perl_digit_disabled() {
+ assert_eq!(
+ t_err(r"\d"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePerlClassNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(2, 1, 3)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-gencat")]
+ fn class_unicode_gencat() {
+ assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
+ assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
+ assert_eq!(
+ t(r"\p{Separator}"),
+ hir_uclass_query(ClassQuery::Binary("Z"))
+ );
+ assert_eq!(
+ t(r"\p{se PaRa ToR}"),
+ hir_uclass_query(ClassQuery::Binary("Z"))
+ );
+ assert_eq!(
+ t(r"\p{gc:Separator}"),
+ hir_uclass_query(ClassQuery::Binary("Z"))
+ );
+ assert_eq!(
+ t(r"\p{gc=Separator}"),
+ hir_uclass_query(ClassQuery::Binary("Z"))
+ );
+ assert_eq!(
+ t(r"\p{Other}"),
+ hir_uclass_query(ClassQuery::Binary("Other"))
+ );
+ assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
+
+ assert_eq!(
+ t(r"\PZ"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+ );
+ assert_eq!(
+ t(r"\P{separator}"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+ );
+ assert_eq!(
+ t(r"\P{gc!=separator}"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+ );
+
+ assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
+ assert_eq!(
+ t(r"\p{assigned}"),
+ hir_uclass_query(ClassQuery::Binary("Assigned"))
+ );
+ assert_eq!(
+ t(r"\p{ascii}"),
+ hir_uclass_query(ClassQuery::Binary("ASCII"))
+ );
+ assert_eq!(
+ t(r"\p{gc:any}"),
+ hir_uclass_query(ClassQuery::Binary("Any"))
+ );
+ assert_eq!(
+ t(r"\p{gc:assigned}"),
+ hir_uclass_query(ClassQuery::Binary("Assigned"))
+ );
+ assert_eq!(
+ t(r"\p{gc:ascii}"),
+ hir_uclass_query(ClassQuery::Binary("ASCII"))
+ );
+
+ assert_eq!(
+ t_err(r"(?-u)\pZ"),
+ TestError {
+ kind: hir::ErrorKind::UnicodeNotAllowed,
+ span: Span::new(
+ Position::new(5, 1, 6),
+ Position::new(8, 1, 9)
+ ),
+ }
+ );
+ assert_eq!(
+ t_err(r"(?-u)\p{Separator}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodeNotAllowed,
+ span: Span::new(
+ Position::new(5, 1, 6),
+ Position::new(18, 1, 19)
+ ),
+ }
+ );
+ assert_eq!(
+ t_err(r"\pE"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(3, 1, 4)
+ ),
+ }
+ );
+ assert_eq!(
+ t_err(r"\p{Foo}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(7, 1, 8)
+ ),
+ }
+ );
+ assert_eq!(
+ t_err(r"\p{gc:Foo}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(10, 1, 11)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ #[cfg(not(feature = "unicode-gencat"))]
+ fn class_unicode_gencat_disabled() {
+ assert_eq!(
+ t_err(r"\p{Separator}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(13, 1, 14)
+ ),
+ }
+ );
+
+ assert_eq!(
+ t_err(r"\p{Any}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(7, 1, 8)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-script")]
+ fn class_unicode_script() {
+ assert_eq!(
+ t(r"\p{Greek}"),
+ hir_uclass_query(ClassQuery::Binary("Greek"))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t(r"(?i)\p{Greek}"),
+ hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t(r"(?i)\P{Greek}"),
+ hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+ "Greek"
+ ))))
+ );
+
+ assert_eq!(
+ t_err(r"\p{sc:Foo}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(10, 1, 11)
+ ),
+ }
+ );
+ assert_eq!(
+ t_err(r"\p{scx:Foo}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(11, 1, 12)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ #[cfg(not(feature = "unicode-script"))]
+ fn class_unicode_script_disabled() {
+ assert_eq!(
+ t_err(r"\p{Greek}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(9, 1, 10)
+ ),
+ }
+ );
+
+ assert_eq!(
+ t_err(r"\p{scx:Greek}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(13, 1, 14)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-age")]
+ fn class_unicode_age() {
+ assert_eq!(
+ t_err(r"\p{age:Foo}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(11, 1, 12)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-gencat")]
+ fn class_unicode_any_empty() {
+ assert_eq!(
+ t_err(r"\P{any}"),
+ TestError {
+ kind: hir::ErrorKind::EmptyClassNotAllowed,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(7, 1, 8)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ #[cfg(not(feature = "unicode-age"))]
+ fn class_unicode_age_disabled() {
+ assert_eq!(
+ t_err(r"\p{age:3.0}"),
+ TestError {
+ kind: hir::ErrorKind::UnicodePropertyNotFound,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(11, 1, 12)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ fn class_bracketed() {
+ assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
+ assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
+ assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
+ assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
+ assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
+ assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
+ assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
+ assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
+ #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+ assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
+ #[cfg(feature = "unicode-gencat")]
+ assert_eq!(
+ t(r"[\pZ]"),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ );
+ #[cfg(feature = "unicode-gencat")]
+ assert_eq!(
+ t(r"[\p{separator}]"),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ );
+ #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+ assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
+ #[cfg(feature = "unicode-gencat")]
+ assert_eq!(
+ t(r"[^\PZ]"),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ );
+ #[cfg(feature = "unicode-gencat")]
+ assert_eq!(
+ t(r"[^\P{separator}]"),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ );
+ #[cfg(all(
+ feature = "unicode-case",
+ any(feature = "unicode-perl", feature = "unicode-gencat")
+ ))]
+ assert_eq!(
+ t(r"(?i)[^\D]"),
+ hir_uclass_query(ClassQuery::Binary("digit"))
+ );
+ #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+ assert_eq!(
+ t(r"(?i)[^\P{greek}]"),
+ hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
+ );
+
+ assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
+ assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
+ assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
+
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)[k]"),
+ hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)[β]"),
+ hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('Ī', 'Ī'),])
+ );
+ assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
+
+ assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
+ assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
+ assert_eq!(
+ t_bytes("(?-u)[^a]"),
+ hir_negate(hir_bclass(&[(b'a', b'a')]))
+ );
+ #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+ assert_eq!(
+ t(r"[^\d]"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+ );
+ #[cfg(feature = "unicode-gencat")]
+ assert_eq!(
+ t(r"[^\pZ]"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
+ );
+ #[cfg(feature = "unicode-gencat")]
+ assert_eq!(
+ t(r"[^\p{separator}]"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
+ );
+ #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+ assert_eq!(
+ t(r"(?i)[^\p{greek}]"),
+ hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+ "greek"
+ ))))
+ );
+ #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+ assert_eq!(
+ t(r"(?i)[\P{greek}]"),
+ hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+ "greek"
+ ))))
+ );
+
+ // Test some weird cases.
+ assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
+
+ assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
+ assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
+ assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
+ assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
+ assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
+
+ assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
+ assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
+ assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
+ assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
+ assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
+
+ assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
+ assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
+ assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
+ assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
+ assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
+
+ assert_eq!(
+ t_err("(?-u)[^a]"),
+ TestError {
+ kind: hir::ErrorKind::InvalidUtf8,
+ span: Span::new(
+ Position::new(5, 1, 6),
+ Position::new(9, 1, 10)
+ ),
+ }
+ );
+ #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
+ assert_eq!(
+ t_err(r"[^\s\S]"),
+ TestError {
+ kind: hir::ErrorKind::EmptyClassNotAllowed,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(7, 1, 8)
+ ),
+ }
+ );
+ #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
+ assert_eq!(
+ t_err(r"(?-u)[^\s\S]"),
+ TestError {
+ kind: hir::ErrorKind::EmptyClassNotAllowed,
+ span: Span::new(
+ Position::new(5, 1, 6),
+ Position::new(12, 1, 13)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ fn class_bracketed_union() {
+ assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
+ #[cfg(feature = "unicode-gencat")]
+ assert_eq!(
+ t(r"[a\pZb]"),
+ hir_union(
+ hir_uclass(&[('a', 'b')]),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ )
+ );
+ #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
+ assert_eq!(
+ t(r"[\pZ\p{Greek}]"),
+ hir_union(
+ hir_uclass_query(ClassQuery::Binary("greek")),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ )
+ );
+ #[cfg(all(
+ feature = "unicode-age",
+ feature = "unicode-gencat",
+ feature = "unicode-script"
+ ))]
+ assert_eq!(
+ t(r"[\p{age:3.0}\pZ\p{Greek}]"),
+ hir_union(
+ hir_uclass_query(ClassQuery::ByValue {
+ property_name: "age",
+ property_value: "3.0",
+ }),
+ hir_union(
+ hir_uclass_query(ClassQuery::Binary("greek")),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ )
+ )
+ );
+ #[cfg(all(
+ feature = "unicode-age",
+ feature = "unicode-gencat",
+ feature = "unicode-script"
+ ))]
+ assert_eq!(
+ t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
+ hir_union(
+ hir_uclass_query(ClassQuery::ByValue {
+ property_name: "age",
+ property_value: "3.0",
+ }),
+ hir_union(
+ hir_uclass_query(ClassQuery::Binary("cyrillic")),
+ hir_union(
+ hir_uclass_query(ClassQuery::Binary("greek")),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ )
+ )
+ )
+ );
+
+ #[cfg(all(
+ feature = "unicode-age",
+ feature = "unicode-case",
+ feature = "unicode-gencat",
+ feature = "unicode-script"
+ ))]
+ assert_eq!(
+ t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
+ hir_case_fold(hir_union(
+ hir_uclass_query(ClassQuery::ByValue {
+ property_name: "age",
+ property_value: "3.0",
+ }),
+ hir_union(
+ hir_uclass_query(ClassQuery::Binary("greek")),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ )
+ ))
+ );
+ #[cfg(all(
+ feature = "unicode-age",
+ feature = "unicode-gencat",
+ feature = "unicode-script"
+ ))]
+ assert_eq!(
+ t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
+ hir_negate(hir_union(
+ hir_uclass_query(ClassQuery::ByValue {
+ property_name: "age",
+ property_value: "3.0",
+ }),
+ hir_union(
+ hir_uclass_query(ClassQuery::Binary("greek")),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ )
+ ))
+ );
+ #[cfg(all(
+ feature = "unicode-age",
+ feature = "unicode-case",
+ feature = "unicode-gencat",
+ feature = "unicode-script"
+ ))]
+ assert_eq!(
+ t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
+ hir_negate(hir_case_fold(hir_union(
+ hir_uclass_query(ClassQuery::ByValue {
+ property_name: "age",
+ property_value: "3.0",
+ }),
+ hir_union(
+ hir_uclass_query(ClassQuery::Binary("greek")),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ )
+ )))
+ );
+ }
+
+ #[test]
+ fn class_bracketed_nested() {
+ assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
+ assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
+ assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
+
+ assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
+ assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
+
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t(r"(?i)[a[^c]]"),
+ hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t(r"(?i)[a-b[^c]]"),
+ hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
+ );
+
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t(r"(?i)[^a-b[^c]]"),
+ hir_uclass(&[('C', 'C'), ('c', 'c')])
+ );
+
+ assert_eq!(
+ t_err(r"[^a-c[^c]]"),
+ TestError {
+ kind: hir::ErrorKind::EmptyClassNotAllowed,
+ span: Span::new(
+ Position::new(0, 1, 1),
+ Position::new(10, 1, 11)
+ ),
+ }
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t_err(r"(?i)[^a-c[^c]]"),
+ TestError {
+ kind: hir::ErrorKind::EmptyClassNotAllowed,
+ span: Span::new(
+ Position::new(4, 1, 5),
+ Position::new(14, 1, 15)
+ ),
+ }
+ );
+ }
+
+ #[test]
+ fn class_bracketed_intersect() {
+ assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
+ assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+ assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+ assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
+ assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
+ assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
+ assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
+ assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
+ assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+
+ assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
+ assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+ assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+ assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
+ assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
+ assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
+
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)[abc&&b-c]"),
+ hir_case_fold(hir_uclass(&[('b', 'c')]))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)[abc&&[b-c]]"),
+ hir_case_fold(hir_uclass(&[('b', 'c')]))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)[[abc]&&[b-c]]"),
+ hir_case_fold(hir_uclass(&[('b', 'c')]))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)[a-z&&b-y&&c-x]"),
+ hir_case_fold(hir_uclass(&[('c', 'x')]))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)[c-da-b&&a-d]"),
+ hir_case_fold(hir_uclass(&[('a', 'd')]))
+ );
+ #[cfg(feature = "unicode-case")]
+ assert_eq!(
+ t("(?i)[a-d&&c-da-b]"),
+ hir_case_fold(hir_uclass(&[('a', 'd')]))
+ );
+
+ assert_eq!(
+ t("(?i-u)[abc&&b-c]"),
+ hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+ );
+ assert_eq!(
+ t("(?i-u)[abc&&[b-c]]"),
+ hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+ );
+ assert_eq!(
+ t("(?i-u)[[abc]&&[b-c]]"),
+ hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+ );
+ assert_eq!(
+ t("(?i-u)[a-z&&b-y&&c-x]"),
+ hir_case_fold(hir_bclass(&[(b'c', b'x')]))
+ );
+ assert_eq!(
+ t("(?i-u)[c-da-b&&a-d]"),
+ hir_case_fold(hir_bclass(&[(b'a', b'd')]))
+ );
+ assert_eq!(
+ t("(?i-u)[a-d&&c-da-b]"),
+ hir_case_fold(hir_bclass(&[(b'a', b'd')]))
+ );
+
+ // In `[a^]`, `^` does not need to be escaped, so it makes sense that
+ // `^` is also allowed to be unescaped after `&&`.
+ assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
+ // `]` needs to be escaped after `&&` since it's not at start of class.
+ assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
+ assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
+ assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
+ assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
+ // Test precedence.
+ assert_eq!(
+ t(r"[a-w&&[^c-g]z]"),
+ hir_uclass(&[('a', 'b'), ('h', 'w')])
+ );
+ }
+
+ #[test]
+ fn class_bracketed_intersect_negate() {
+ #[cfg(feature = "unicode-perl")]
+ assert_eq!(
+ t(r"[^\w&&\d]"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+ );
+ assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+ #[cfg(feature = "unicode-perl")]
+ assert_eq!(
+ t(r"[^[\w&&\d]]"),
+ hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+ );
+ #[cfg(feature = "unicode-perl")]
+ assert_eq!(
+ t(r"[^[^\w&&\d]]"),
+ hir_uclass_query(ClassQuery::Binary("digit"))
+ );
+ #[cfg(feature = "unicode-perl")]
+ assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
+
+ #[cfg(feature = "unicode-perl")]
+ assert_eq!(
+ t_bytes(r"(?-u)[^\w&&\d]"),
+ hir_negate(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Digit
+ )))
+ );
+ assert_eq!(
+ t_bytes(r"(?-u)[^[a-z&&a-c]]"),
+ hir_negate(hir_bclass(&[(b'a', b'c')]))
+ );
+ assert_eq!(
+ t_bytes(r"(?-u)[^[\w&&\d]]"),
+ hir_negate(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Digit
+ )))
+ );
+ assert_eq!(
+ t_bytes(r"(?-u)[^[^\w&&\d]]"),
+ hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
+ );
+ assert_eq!(
+ t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
+ hir_negate(hir_bclass_from_char(ascii_class(
+ &ast::ClassAsciiKind::Word
+ )))
+ );
+ }
+
+ #[test]
+ fn class_bracketed_difference() {
+ #[cfg(feature = "unicode-gencat")]
+ assert_eq!(
+ t(r"[\pL--[:ascii:]]"),
+ hir_difference(
+ hir_uclass_query(ClassQuery::Binary("letter")),
+ hir_uclass(&[('\0', '\x7F')])
+ )
+ );
+
+ assert_eq!(
+ t(r"(?-u)[[:alpha:]--[:lower:]]"),
+ hir_bclass(&[(b'A', b'Z')])
+ );
+ }
+
+ #[test]
+ fn class_bracketed_symmetric_difference() {
+ #[cfg(feature = "unicode-script")]
+ assert_eq!(
+ t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
+ hir_uclass(&[
+ ('\u{0342}', '\u{0342}'),
+ ('\u{0345}', '\u{0345}'),
+ ('\u{1DC0}', '\u{1DC1}'),
+ ])
+ );
+ assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
+
+ assert_eq!(
+ t(r"(?-u)[a-g~~c-j]"),
+ hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
+ );
+ }
+
+ #[test]
+ fn ignore_whitespace() {
+ assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
+ assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
+ assert_eq!(
+ t(r"(?x)\x # comment
+{ # comment
+ 53 # comment
+} #comment"),
+ hir_lit("S")
+ );
+
+ assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
+ assert_eq!(
+ t(r"(?x)\x # comment
+ 53 # comment"),
+ hir_lit("S")
+ );
+ assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
+
+ #[cfg(feature = "unicode-gencat")]
+ assert_eq!(
+ t(r"(?x)\p # comment
+{ # comment
+ Separator # comment
+} # comment"),
+ hir_uclass_query(ClassQuery::Binary("separator"))
+ );
+
+ assert_eq!(
+ t(r"(?x)a # comment
+{ # comment
+ 5 # comment
+ , # comment
+ 10 # comment
+} # comment"),
+ hir_range(
+ true,
+ hir::RepetitionRange::Bounded(5, 10),
+ hir_lit("a")
+ )
+ );
+
+ assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a "));
+ }
+
+ #[test]
+ fn analysis_is_always_utf8() {
+ // Positive examples.
+ assert!(t_bytes(r"a").is_always_utf8());
+ assert!(t_bytes(r"ab").is_always_utf8());
+ assert!(t_bytes(r"(?-u)a").is_always_utf8());
+ assert!(t_bytes(r"(?-u)ab").is_always_utf8());
+ assert!(t_bytes(r"\xFF").is_always_utf8());
+ assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
+ assert!(t_bytes(r"[^a]").is_always_utf8());
+ assert!(t_bytes(r"[^a][^a]").is_always_utf8());
+ assert!(t_bytes(r"\b").is_always_utf8());
+ assert!(t_bytes(r"\B").is_always_utf8());
+ assert!(t_bytes(r"(?-u)\b").is_always_utf8());
+
+ // Negative examples.
+ assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
+ assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
+ assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
+ assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
+ assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
+ }
+
+ #[test]
+ fn analysis_is_all_assertions() {
+ // Positive examples.
+ assert!(t(r"\b").is_all_assertions());
+ assert!(t(r"\B").is_all_assertions());
+ assert!(t(r"^").is_all_assertions());
+ assert!(t(r"$").is_all_assertions());
+ assert!(t(r"\A").is_all_assertions());
+ assert!(t(r"\z").is_all_assertions());
+ assert!(t(r"$^\z\A\b\B").is_all_assertions());
+ assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
+ assert!(t(r"^$|$^").is_all_assertions());
+ assert!(t(r"((\b)+())*^").is_all_assertions());
+
+ // Negative examples.
+ assert!(!t(r"^a").is_all_assertions());
+ }
+
+ #[test]
+ fn analysis_is_anchored() {
+ // Positive examples.
+ assert!(t(r"^").is_anchored_start());
+ assert!(t(r"$").is_anchored_end());
+ assert!(t(r"^").is_line_anchored_start());
+ assert!(t(r"$").is_line_anchored_end());
+
+ assert!(t(r"^^").is_anchored_start());
+ assert!(t(r"$$").is_anchored_end());
+ assert!(t(r"^^").is_line_anchored_start());
+ assert!(t(r"$$").is_line_anchored_end());
+
+ assert!(t(r"^$").is_anchored_start());
+ assert!(t(r"^$").is_anchored_end());
+ assert!(t(r"^$").is_line_anchored_start());
+ assert!(t(r"^$").is_line_anchored_end());
+
+ assert!(t(r"^foo").is_anchored_start());
+ assert!(t(r"foo$").is_anchored_end());
+ assert!(t(r"^foo").is_line_anchored_start());
+ assert!(t(r"foo$").is_line_anchored_end());
+
+ assert!(t(r"^foo|^bar").is_anchored_start());
+ assert!(t(r"foo$|bar$").is_anchored_end());
+ assert!(t(r"^foo|^bar").is_line_anchored_start());
+ assert!(t(r"foo$|bar$").is_line_anchored_end());
+
+ assert!(t(r"^(foo|bar)").is_anchored_start());
+ assert!(t(r"(foo|bar)$").is_anchored_end());
+ assert!(t(r"^(foo|bar)").is_line_anchored_start());
+ assert!(t(r"(foo|bar)$").is_line_anchored_end());
+
+ assert!(t(r"^+").is_anchored_start());
+ assert!(t(r"$+").is_anchored_end());
+ assert!(t(r"^+").is_line_anchored_start());
+ assert!(t(r"$+").is_line_anchored_end());
+ assert!(t(r"^++").is_anchored_start());
+ assert!(t(r"$++").is_anchored_end());
+ assert!(t(r"^++").is_line_anchored_start());
+ assert!(t(r"$++").is_line_anchored_end());
+ assert!(t(r"(^)+").is_anchored_start());
+ assert!(t(r"($)+").is_anchored_end());
+ assert!(t(r"(^)+").is_line_anchored_start());
+ assert!(t(r"($)+").is_line_anchored_end());
+
+ assert!(t(r"$^").is_anchored_start());
+ assert!(t(r"$^").is_anchored_start());
+ assert!(t(r"$^").is_line_anchored_end());
+ assert!(t(r"$^").is_line_anchored_end());
+ assert!(t(r"$^|^$").is_anchored_start());
+ assert!(t(r"$^|^$").is_anchored_end());
+ assert!(t(r"$^|^$").is_line_anchored_start());
+ assert!(t(r"$^|^$").is_line_anchored_end());
+
+ assert!(t(r"\b^").is_anchored_start());
+ assert!(t(r"$\b").is_anchored_end());
+ assert!(t(r"\b^").is_line_anchored_start());
+ assert!(t(r"$\b").is_line_anchored_end());
+ assert!(t(r"^(?m:^)").is_anchored_start());
+ assert!(t(r"(?m:$)$").is_anchored_end());
+ assert!(t(r"^(?m:^)").is_line_anchored_start());
+ assert!(t(r"(?m:$)$").is_line_anchored_end());
+ assert!(t(r"(?m:^)^").is_anchored_start());
+ assert!(t(r"$(?m:$)").is_anchored_end());
+ assert!(t(r"(?m:^)^").is_line_anchored_start());
+ assert!(t(r"$(?m:$)").is_line_anchored_end());
+
+ // Negative examples.
+ assert!(!t(r"(?m)^").is_anchored_start());
+ assert!(!t(r"(?m)$").is_anchored_end());
+ assert!(!t(r"(?m:^$)|$^").is_anchored_start());
+ assert!(!t(r"(?m:^$)|$^").is_anchored_end());
+ assert!(!t(r"$^|(?m:^$)").is_anchored_start());
+ assert!(!t(r"$^|(?m:^$)").is_anchored_end());
+
+ assert!(!t(r"a^").is_anchored_start());
+ assert!(!t(r"$a").is_anchored_start());
+ assert!(!t(r"a^").is_line_anchored_start());
+ assert!(!t(r"$a").is_line_anchored_start());
+
+ assert!(!t(r"a^").is_anchored_end());
+ assert!(!t(r"$a").is_anchored_end());
+ assert!(!t(r"a^").is_line_anchored_end());
+ assert!(!t(r"$a").is_line_anchored_end());
+
+ assert!(!t(r"^foo|bar").is_anchored_start());
+ assert!(!t(r"foo|bar$").is_anchored_end());
+ assert!(!t(r"^foo|bar").is_line_anchored_start());
+ assert!(!t(r"foo|bar$").is_line_anchored_end());
+
+ assert!(!t(r"^*").is_anchored_start());
+ assert!(!t(r"$*").is_anchored_end());
+ assert!(!t(r"^*").is_line_anchored_start());
+ assert!(!t(r"$*").is_line_anchored_end());
+ assert!(!t(r"^*+").is_anchored_start());
+ assert!(!t(r"$*+").is_anchored_end());
+ assert!(!t(r"^*+").is_line_anchored_start());
+ assert!(!t(r"$*+").is_line_anchored_end());
+ assert!(!t(r"^+*").is_anchored_start());
+ assert!(!t(r"$+*").is_anchored_end());
+ assert!(!t(r"^+*").is_line_anchored_start());
+ assert!(!t(r"$+*").is_line_anchored_end());
+ assert!(!t(r"(^)*").is_anchored_start());
+ assert!(!t(r"($)*").is_anchored_end());
+ assert!(!t(r"(^)*").is_line_anchored_start());
+ assert!(!t(r"($)*").is_line_anchored_end());
+ }
+
+ #[test]
+ fn analysis_is_line_anchored() {
+ assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
+ assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
+
+ assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
+ assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
+
+ assert!(t(r"(?m)^").is_line_anchored_start());
+ assert!(t(r"(?m)$").is_line_anchored_end());
+
+ assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
+ assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
+
+ assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
+ assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
+ }
+
+ #[test]
+ fn analysis_is_any_anchored() {
+ // Positive examples.
+ assert!(t(r"^").is_any_anchored_start());
+ assert!(t(r"$").is_any_anchored_end());
+ assert!(t(r"\A").is_any_anchored_start());
+ assert!(t(r"\z").is_any_anchored_end());
+
+ // Negative examples.
+ assert!(!t(r"(?m)^").is_any_anchored_start());
+ assert!(!t(r"(?m)$").is_any_anchored_end());
+ assert!(!t(r"$").is_any_anchored_start());
+ assert!(!t(r"^").is_any_anchored_end());
+ }
+
+ #[test]
+ fn analysis_is_match_empty() {
+ // Positive examples.
+ assert!(t(r"").is_match_empty());
+ assert!(t(r"()").is_match_empty());
+ assert!(t(r"()*").is_match_empty());
+ assert!(t(r"()+").is_match_empty());
+ assert!(t(r"()?").is_match_empty());
+ assert!(t(r"a*").is_match_empty());
+ assert!(t(r"a?").is_match_empty());
+ assert!(t(r"a{0}").is_match_empty());
+ assert!(t(r"a{0,}").is_match_empty());
+ assert!(t(r"a{0,1}").is_match_empty());
+ assert!(t(r"a{0,10}").is_match_empty());
+ #[cfg(feature = "unicode-gencat")]
+ assert!(t(r"\pL*").is_match_empty());
+ assert!(t(r"a*|b").is_match_empty());
+ assert!(t(r"b|a*").is_match_empty());
+ assert!(t(r"a|").is_match_empty());
+ assert!(t(r"|a").is_match_empty());
+ assert!(t(r"a||b").is_match_empty());
+ assert!(t(r"a*a?(abcd)*").is_match_empty());
+ assert!(t(r"^").is_match_empty());
+ assert!(t(r"$").is_match_empty());
+ assert!(t(r"(?m)^").is_match_empty());
+ assert!(t(r"(?m)$").is_match_empty());
+ assert!(t(r"\A").is_match_empty());
+ assert!(t(r"\z").is_match_empty());
+ assert!(t(r"\B").is_match_empty());
+ assert!(t_bytes(r"(?-u)\B").is_match_empty());
+ assert!(t(r"\b").is_match_empty());
+ assert!(t(r"(?-u)\b").is_match_empty());
+
+ // Negative examples.
+ assert!(!t(r"a+").is_match_empty());
+ assert!(!t(r"a{1}").is_match_empty());
+ assert!(!t(r"a{1,}").is_match_empty());
+ assert!(!t(r"a{1,2}").is_match_empty());
+ assert!(!t(r"a{1,10}").is_match_empty());
+ assert!(!t(r"b|a").is_match_empty());
+ assert!(!t(r"a*a+(abcd)*").is_match_empty());
+ }
+
+ #[test]
+ fn analysis_is_literal() {
+ // Positive examples.
+ assert!(t(r"a").is_literal());
+ assert!(t(r"ab").is_literal());
+ assert!(t(r"abc").is_literal());
+ assert!(t(r"(?m)abc").is_literal());
+
+ // Negative examples.
+ assert!(!t(r"").is_literal());
+ assert!(!t(r"^").is_literal());
+ assert!(!t(r"a|b").is_literal());
+ assert!(!t(r"(a)").is_literal());
+ assert!(!t(r"a+").is_literal());
+ assert!(!t(r"foo(a)").is_literal());
+ assert!(!t(r"(a)foo").is_literal());
+ assert!(!t(r"[a]").is_literal());
+ }
+
+ #[test]
+ fn analysis_is_alternation_literal() {
+ // Positive examples.
+ assert!(t(r"a").is_alternation_literal());
+ assert!(t(r"ab").is_alternation_literal());
+ assert!(t(r"abc").is_alternation_literal());
+ assert!(t(r"(?m)abc").is_alternation_literal());
+ assert!(t(r"a|b").is_alternation_literal());
+ assert!(t(r"a|b|c").is_alternation_literal());
+ assert!(t(r"foo|bar").is_alternation_literal());
+ assert!(t(r"foo|bar|baz").is_alternation_literal());
+
+ // Negative examples.
+ assert!(!t(r"").is_alternation_literal());
+ assert!(!t(r"^").is_alternation_literal());
+ assert!(!t(r"(a)").is_alternation_literal());
+ assert!(!t(r"a+").is_alternation_literal());
+ assert!(!t(r"foo(a)").is_alternation_literal());
+ assert!(!t(r"(a)foo").is_alternation_literal());
+ assert!(!t(r"[a]").is_alternation_literal());
+ assert!(!t(r"[a]|b").is_alternation_literal());
+ assert!(!t(r"a|[b]").is_alternation_literal());
+ assert!(!t(r"(a)|b").is_alternation_literal());
+ assert!(!t(r"a|(b)").is_alternation_literal());
+ }
+}
diff --git a/vendor/regex-syntax/src/hir/visitor.rs b/vendor/regex-syntax/src/hir/visitor.rs
new file mode 100644
index 000000000..4f5a70909
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/visitor.rs
@@ -0,0 +1,203 @@
+use crate::hir::{self, Hir, HirKind};
+
+/// A trait for visiting the high-level IR (HIR) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on a high-level intermediate representation of a regular
+/// expression without necessarily using recursion. In particular, this permits
+/// callers to do case analysis with constant stack usage, which can be
+/// important since the size of an HIR may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`](fn.visit.html) function.
+pub trait Visitor {
+ /// The result of visiting an HIR.
+ type Output;
+ /// An error that visiting an HIR might return.
+ type Err;
+
+ /// All implementors of `Visitor` must provide a `finish` method, which
+ /// yields the result of visiting the HIR or an error.
+ fn finish(self) -> Result<Self::Output, Self::Err>;
+
+ /// This method is called before beginning traversal of the HIR.
+ fn start(&mut self) {}
+
+ /// This method is called on an `Hir` before descending into child `Hir`
+ /// nodes.
+ fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on an `Hir` after descending all of its child
+ /// `Hir` nodes.
+ fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called between child nodes of an alternation.
+ fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+ Ok(())
+ }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Hir` while calling
+/// appropriate methods provided by the
+/// [`Visitor`](trait.Visitor.html) trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Hir` without using a stack size proportional to the depth
+/// of the `Hir`. Namely, this method will instead use constant stack space,
+/// but will use heap space proportional to the size of the `Hir`. This may be
+/// desirable in cases where the size of `Hir` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> {
+ HeapVisitor::new().visit(hir, visitor)
+}
+
+/// HeapVisitor visits every item in an `Hir` recursively using constant stack
+/// size and a heap size proportional to the size of the `Hir`.
+struct HeapVisitor<'a> {
+ /// A stack of `Hir` nodes. This is roughly analogous to the call stack
+ /// used in a typical recursive visitor.
+ stack: Vec<(&'a Hir, Frame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Hir`.
+enum Frame<'a> {
+ /// A stack frame allocated just before descending into a repetition
+ /// operator's child node.
+ Repetition(&'a hir::Repetition),
+ /// A stack frame allocated just before descending into a group's child
+ /// node.
+ Group(&'a hir::Group),
+ /// The stack frame used while visiting every child node of a concatenation
+ /// of expressions.
+ Concat {
+ /// The child node we are currently visiting.
+ head: &'a Hir,
+ /// The remaining child nodes to visit (which may be empty).
+ tail: &'a [Hir],
+ },
+ /// The stack frame used while visiting every child node of an alternation
+ /// of expressions.
+ Alternation {
+ /// The child node we are currently visiting.
+ head: &'a Hir,
+ /// The remaining child nodes to visit (which may be empty).
+ tail: &'a [Hir],
+ },
+}
+
+impl<'a> HeapVisitor<'a> {
+ fn new() -> HeapVisitor<'a> {
+ HeapVisitor { stack: vec![] }
+ }
+
+ fn visit<V: Visitor>(
+ &mut self,
+ mut hir: &'a Hir,
+ mut visitor: V,
+ ) -> Result<V::Output, V::Err> {
+ self.stack.clear();
+
+ visitor.start();
+ loop {
+ visitor.visit_pre(hir)?;
+ if let Some(x) = self.induct(hir) {
+ let child = x.child();
+ self.stack.push((hir, x));
+ hir = child;
+ continue;
+ }
+ // No induction means we have a base case, so we can post visit
+ // it now.
+ visitor.visit_post(hir)?;
+
+ // At this point, we now try to pop our call stack until it is
+ // either empty or we hit another inductive case.
+ loop {
+ let (post_hir, frame) = match self.stack.pop() {
+ None => return visitor.finish(),
+ Some((post_hir, frame)) => (post_hir, frame),
+ };
+ // If this is a concat/alternate, then we might have additional
+ // inductive steps to process.
+ if let Some(x) = self.pop(frame) {
+ if let Frame::Alternation { .. } = x {
+ visitor.visit_alternation_in()?;
+ }
+ hir = x.child();
+ self.stack.push((post_hir, x));
+ break;
+ }
+ // Otherwise, we've finished visiting all the child nodes for
+ // this HIR, so we can post visit it now.
+ visitor.visit_post(post_hir)?;
+ }
+ }
+ }
+
+ /// Build a stack frame for the given HIR if one is needed (which occurs if
+ /// and only if there are child nodes in the HIR). Otherwise, return None.
+ fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
+ match *hir.kind() {
+ HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
+ HirKind::Group(ref x) => Some(Frame::Group(x)),
+ HirKind::Concat(ref x) if x.is_empty() => None,
+ HirKind::Concat(ref x) => {
+ Some(Frame::Concat { head: &x[0], tail: &x[1..] })
+ }
+ HirKind::Alternation(ref x) if x.is_empty() => None,
+ HirKind::Alternation(ref x) => {
+ Some(Frame::Alternation { head: &x[0], tail: &x[1..] })
+ }
+ _ => None,
+ }
+ }
+
+ /// Pops the given frame. If the frame has an additional inductive step,
+ /// then return it, otherwise return `None`.
+ fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+ match induct {
+ Frame::Repetition(_) => None,
+ Frame::Group(_) => None,
+ Frame::Concat { tail, .. } => {
+ if tail.is_empty() {
+ None
+ } else {
+ Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
+ }
+ }
+ Frame::Alternation { tail, .. } => {
+ if tail.is_empty() {
+ None
+ } else {
+ Some(Frame::Alternation {
+ head: &tail[0],
+ tail: &tail[1..],
+ })
+ }
+ }
+ }
+ }
+}
+
+impl<'a> Frame<'a> {
+ /// Perform the next inductive step on this frame and return the next
+ /// child HIR node to visit.
+ fn child(&self) -> &'a Hir {
+ match *self {
+ Frame::Repetition(rep) => &rep.hir,
+ Frame::Group(group) => &group.hir,
+ Frame::Concat { head, .. } => head,
+ Frame::Alternation { head, .. } => head,
+ }
+ }
+}
diff --git a/vendor/regex-syntax/src/lib.rs b/vendor/regex-syntax/src/lib.rs
new file mode 100644
index 000000000..9e9af756a
--- /dev/null
+++ b/vendor/regex-syntax/src/lib.rs
@@ -0,0 +1,312 @@
+/*!
+This crate provides a robust regular expression parser.
+
+This crate defines two primary types:
+
+* [`Ast`](ast/enum.Ast.html) is the abstract syntax of a regular expression.
+ An abstract syntax corresponds to a *structured representation* of the
+ concrete syntax of a regular expression, where the concrete syntax is the
+ pattern string itself (e.g., `foo(bar)+`). Given some abstract syntax, it
+ can be converted back to the original concrete syntax (modulo some details,
+ like whitespace). To a first approximation, the abstract syntax is complex
+ and difficult to analyze.
+* [`Hir`](hir/struct.Hir.html) is the high-level intermediate representation
+ ("HIR" or "high-level IR" for short) of regular expression. It corresponds to
+ an intermediate state of a regular expression that sits between the abstract
+ syntax and the low level compiled opcodes that are eventually responsible for
+ executing a regular expression search. Given some high-level IR, it is not
+ possible to produce the original concrete syntax (although it is possible to
+ produce an equivalent concrete syntax, but it will likely scarcely resemble
+ the original pattern). To a first approximation, the high-level IR is simple
+ and easy to analyze.
+
+These two types come with conversion routines:
+
+* An [`ast::parse::Parser`](ast/parse/struct.Parser.html) converts concrete
+ syntax (a `&str`) to an [`Ast`](ast/enum.Ast.html).
+* A [`hir::translate::Translator`](hir/translate/struct.Translator.html)
+ converts an [`Ast`](ast/enum.Ast.html) to a [`Hir`](hir/struct.Hir.html).
+
+As a convenience, the above two conversion routines are combined into one via
+the top-level [`Parser`](struct.Parser.html) type. This `Parser` will first
+convert your pattern to an `Ast` and then convert the `Ast` to an `Hir`.
+
+
+# Example
+
+This example shows how to parse a pattern string into its HIR:
+
+```
+use regex_syntax::Parser;
+use regex_syntax::hir::{self, Hir};
+
+let hir = Parser::new().parse("a|b").unwrap();
+assert_eq!(hir, Hir::alternation(vec![
+ Hir::literal(hir::Literal::Unicode('a')),
+ Hir::literal(hir::Literal::Unicode('b')),
+]));
+```
+
+
+# Concrete syntax supported
+
+The concrete syntax is documented as part of the public API of the
+[`regex` crate](https://docs.rs/regex/%2A/regex/#syntax).
+
+
+# Input safety
+
+A key feature of this library is that it is safe to use with end user facing
+input. This plays a significant role in the internal implementation. In
+particular:
+
+1. Parsers provide a `nest_limit` option that permits callers to control how
+ deeply nested a regular expression is allowed to be. This makes it possible
+ to do case analysis over an `Ast` or an `Hir` using recursion without
+ worrying about stack overflow.
+2. Since relying on a particular stack size is brittle, this crate goes to
+ great lengths to ensure that all interactions with both the `Ast` and the
+ `Hir` do not use recursion. Namely, they use constant stack space and heap
+ space proportional to the size of the original pattern string (in bytes).
+ This includes the type's corresponding destructors. (One exception to this
+ is literal extraction, but this will eventually get fixed.)
+
+
+# Error reporting
+
+The `Display` implementations on all `Error` types exposed in this library
+provide nice human readable errors that are suitable for showing to end users
+in a monospace font.
+
+
+# Literal extraction
+
+This crate provides limited support for
+[literal extraction from `Hir` values](hir/literal/struct.Literals.html).
+Be warned that literal extraction currently uses recursion, and therefore,
+stack size proportional to the size of the `Hir`.
+
+The purpose of literal extraction is to speed up searches. That is, if you
+know a regular expression must match a prefix or suffix literal, then it is
+often quicker to search for instances of that literal, and then confirm or deny
+the match using the full regular expression engine. These optimizations are
+done automatically in the `regex` crate.
+
+
+# Crate features
+
+An important feature provided by this crate is its Unicode support. This
+includes things like case folding, boolean properties, general categories,
+scripts and Unicode-aware support for the Perl classes `\w`, `\s` and `\d`.
+However, a downside of this support is that it requires bundling several
+Unicode data tables that are substantial in size.
+
+A fair number of use cases do not require full Unicode support. For this
+reason, this crate exposes a number of features to control which Unicode
+data is available.
+
+If a regular expression attempts to use a Unicode feature that is not available
+because the corresponding crate feature was disabled, then translating that
+regular expression to an `Hir` will return an error. (It is still possible
+construct an `Ast` for such a regular expression, since Unicode data is not
+used until translation to an `Hir`.) Stated differently, enabling or disabling
+any of the features below can only add or subtract from the total set of valid
+regular expressions. Enabling or disabling a feature will never modify the
+match semantics of a regular expression.
+
+The following features are available:
+
+* **unicode** -
+ Enables all Unicode features. This feature is enabled by default, and will
+ always cover all Unicode features, even if more are added in the future.
+* **unicode-age** -
+ Provide the data for the
+ [Unicode `Age` property](https://www.unicode.org/reports/tr44/tr44-24.html#Character_Age).
+ This makes it possible to use classes like `\p{Age:6.0}` to refer to all
+ codepoints first introduced in Unicode 6.0
+* **unicode-bool** -
+ Provide the data for numerous Unicode boolean properties. The full list
+ is not included here, but contains properties like `Alphabetic`, `Emoji`,
+ `Lowercase`, `Math`, `Uppercase` and `White_Space`.
+* **unicode-case** -
+ Provide the data for case insensitive matching using
+ [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches).
+* **unicode-gencat** -
+ Provide the data for
+ [Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
+ This includes, but is not limited to, `Decimal_Number`, `Letter`,
+ `Math_Symbol`, `Number` and `Punctuation`.
+* **unicode-perl** -
+ Provide the data for supporting the Unicode-aware Perl character classes,
+ corresponding to `\w`, `\s` and `\d`. This is also necessary for using
+ Unicode-aware word boundary assertions. Note that if this feature is
+ disabled, the `\s` and `\d` character classes are still available if the
+ `unicode-bool` and `unicode-gencat` features are enabled, respectively.
+* **unicode-script** -
+ Provide the data for
+ [Unicode scripts and script extensions](https://www.unicode.org/reports/tr24/).
+ This includes, but is not limited to, `Arabic`, `Cyrillic`, `Hebrew`,
+ `Latin` and `Thai`.
+* **unicode-segment** -
+ Provide the data necessary to provide the properties used to implement the
+ [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/).
+ This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and
+ `\p{sb=ATerm}`.
+*/
+
+#![deny(missing_docs)]
+#![warn(missing_debug_implementations)]
+#![forbid(unsafe_code)]
+
+pub use crate::error::{Error, Result};
+pub use crate::parser::{Parser, ParserBuilder};
+pub use crate::unicode::UnicodeWordError;
+
+pub mod ast;
+mod either;
+mod error;
+pub mod hir;
+mod parser;
+mod unicode;
+mod unicode_tables;
+pub mod utf8;
+
+/// Escapes all regular expression meta characters in `text`.
+///
+/// The string returned may be safely used as a literal in a regular
+/// expression.
+pub fn escape(text: &str) -> String {
+ let mut quoted = String::new();
+ escape_into(text, &mut quoted);
+ quoted
+}
+
+/// Escapes all meta characters in `text` and writes the result into `buf`.
+///
+/// This will append escape characters into the given buffer. The characters
+/// that are appended are safe to use as a literal in a regular expression.
+pub fn escape_into(text: &str, buf: &mut String) {
+ buf.reserve(text.len());
+ for c in text.chars() {
+ if is_meta_character(c) {
+ buf.push('\\');
+ }
+ buf.push(c);
+ }
+}
+
+/// Returns true if the give character has significance in a regex.
+///
+/// These are the only characters that are allowed to be escaped, with one
+/// exception: an ASCII space character may be escaped when extended mode (with
+/// the `x` flag) is enabled. In particular, `is_meta_character(' ')` returns
+/// `false`.
+///
+/// Note that the set of characters for which this function returns `true` or
+/// `false` is fixed and won't change in a semver compatible release.
+pub fn is_meta_character(c: char) -> bool {
+ match c {
+ '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{'
+ | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true,
+ _ => false,
+ }
+}
+
+/// Returns true if and only if the given character is a Unicode word
+/// character.
+///
+/// A Unicode word character is defined by
+/// [UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties).
+/// In particular, a character
+/// is considered a word character if it is in either of the `Alphabetic` or
+/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`
+/// or `Connector_Punctuation` general categories.
+///
+/// # Panics
+///
+/// If the `unicode-perl` feature is not enabled, then this function panics.
+/// For this reason, it is recommended that callers use
+/// [`try_is_word_character`](fn.try_is_word_character.html)
+/// instead.
+pub fn is_word_character(c: char) -> bool {
+ try_is_word_character(c).expect("unicode-perl feature must be enabled")
+}
+
+/// Returns true if and only if the given character is a Unicode word
+/// character.
+///
+/// A Unicode word character is defined by
+/// [UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties).
+/// In particular, a character
+/// is considered a word character if it is in either of the `Alphabetic` or
+/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`
+/// or `Connector_Punctuation` general categories.
+///
+/// # Errors
+///
+/// If the `unicode-perl` feature is not enabled, then this function always
+/// returns an error.
+pub fn try_is_word_character(
+ c: char,
+) -> std::result::Result<bool, UnicodeWordError> {
+ unicode::is_word_character(c)
+}
+
+/// Returns true if and only if the given character is an ASCII word character.
+///
+/// An ASCII word character is defined by the following character class:
+/// `[_0-9a-zA-Z]'.
+pub fn is_word_byte(c: u8) -> bool {
+ match c {
+ b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' => true,
+ _ => false,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn escape_meta() {
+ assert_eq!(
+ escape(r"\.+*?()|[]{}^$#&-~"),
+ r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~".to_string()
+ );
+ }
+
+ #[test]
+ fn word_byte() {
+ assert!(is_word_byte(b'a'));
+ assert!(!is_word_byte(b'-'));
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-perl")]
+ fn word_char() {
+ assert!(is_word_character('a'), "ASCII");
+ assert!(is_word_character('à'), "Latin-1");
+ assert!(is_word_character('β'), "Greek");
+ assert!(is_word_character('\u{11011}'), "Brahmi (Unicode 6.0)");
+ assert!(is_word_character('\u{11611}'), "Modi (Unicode 7.0)");
+ assert!(is_word_character('\u{11711}'), "Ahom (Unicode 8.0)");
+ assert!(is_word_character('\u{17828}'), "Tangut (Unicode 9.0)");
+ assert!(is_word_character('\u{1B1B1}'), "Nushu (Unicode 10.0)");
+ assert!(is_word_character('\u{16E40}'), "Medefaidrin (Unicode 11.0)");
+ assert!(!is_word_character('-'));
+ assert!(!is_word_character('☃'));
+ }
+
+ #[test]
+ #[should_panic]
+ #[cfg(not(feature = "unicode-perl"))]
+ fn word_char_disabled_panic() {
+ assert!(is_word_character('a'));
+ }
+
+ #[test]
+ #[cfg(not(feature = "unicode-perl"))]
+ fn word_char_disabled_error() {
+ assert!(try_is_word_character('a').is_err());
+ }
+}
diff --git a/vendor/regex-syntax/src/parser.rs b/vendor/regex-syntax/src/parser.rs
new file mode 100644
index 000000000..a5ee524a8
--- /dev/null
+++ b/vendor/regex-syntax/src/parser.rs
@@ -0,0 +1,200 @@
+use crate::ast;
+use crate::hir;
+
+use crate::Result;
+
+/// A builder for a regular expression parser.
+///
+/// This builder permits modifying configuration options for the parser.
+///
+/// This type combines the builder options for both the
+/// [AST `ParserBuilder`](ast/parse/struct.ParserBuilder.html)
+/// and the
+/// [HIR `TranslatorBuilder`](hir/translate/struct.TranslatorBuilder.html).
+#[derive(Clone, Debug, Default)]
+pub struct ParserBuilder {
+ ast: ast::parse::ParserBuilder,
+ hir: hir::translate::TranslatorBuilder,
+}
+
+impl ParserBuilder {
+ /// Create a new parser builder with a default configuration.
+ pub fn new() -> ParserBuilder {
+ ParserBuilder::default()
+ }
+
+ /// Build a parser from this configuration with the given pattern.
+ pub fn build(&self) -> Parser {
+ Parser { ast: self.ast.build(), hir: self.hir.build() }
+ }
+
+ /// Set the nesting limit for this parser.
+ ///
+ /// The nesting limit controls how deep the abstract syntax tree is allowed
+ /// to be. If the AST exceeds the given limit (e.g., with too many nested
+ /// groups), then an error is returned by the parser.
+ ///
+ /// The purpose of this limit is to act as a heuristic to prevent stack
+ /// overflow for consumers that do structural induction on an `Ast` using
+ /// explicit recursion. While this crate never does this (instead using
+ /// constant stack space and moving the call stack to the heap), other
+ /// crates may.
+ ///
+ /// This limit is not checked until the entire Ast is parsed. Therefore,
+ /// if callers want to put a limit on the amount of heap space used, then
+ /// they should impose a limit on the length, in bytes, of the concrete
+ /// pattern string. In particular, this is viable since this parser
+ /// implementation will limit itself to heap space proportional to the
+ /// length of the pattern string.
+ ///
+ /// Note that a nest limit of `0` will return a nest limit error for most
+ /// patterns but not all. For example, a nest limit of `0` permits `a` but
+ /// not `ab`, since `ab` requires a concatenation, which results in a nest
+ /// depth of `1`. In general, a nest limit is not something that manifests
+ /// in an obvious way in the concrete syntax, therefore, it should not be
+ /// used in a granular way.
+ pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
+ self.ast.nest_limit(limit);
+ self
+ }
+
+ /// Whether to support octal syntax or not.
+ ///
+ /// Octal syntax is a little-known way of uttering Unicode codepoints in
+ /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+ /// `\141` are all equivalent regular expressions, where the last example
+ /// shows octal syntax.
+ ///
+ /// While supporting octal syntax isn't in and of itself a problem, it does
+ /// make good error messages harder. That is, in PCRE based regex engines,
+ /// syntax like `\0` invokes a backreference, which is explicitly
+ /// unsupported in Rust's regex engine. However, many users expect it to
+ /// be supported. Therefore, when octal support is disabled, the error
+ /// message will explicitly mention that backreferences aren't supported.
+ ///
+ /// Octal syntax is disabled by default.
+ pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.ast.octal(yes);
+ self
+ }
+
+ /// When enabled, the parser will permit the construction of a regular
+ /// expression that may match invalid UTF-8.
+ ///
+ /// When disabled (the default), the parser is guaranteed to produce
+ /// an expression that will only ever match valid UTF-8 (otherwise, the
+ /// parser will return an error).
+ ///
+ /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
+ /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
+ /// the parser to return an error. Namely, a negated ASCII word boundary
+ /// can result in matching positions that aren't valid UTF-8 boundaries.
+ pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.hir.allow_invalid_utf8(yes);
+ self
+ }
+
+ /// Enable verbose mode in the regular expression.
+ ///
+ /// When enabled, verbose mode permits insigificant whitespace in many
+ /// places in the regular expression, as well as comments. Comments are
+ /// started using `#` and continue until the end of the line.
+ ///
+ /// By default, this is disabled. It may be selectively enabled in the
+ /// regular expression by using the `x` flag regardless of this setting.
+ pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.ast.ignore_whitespace(yes);
+ self
+ }
+
+ /// Enable or disable the case insensitive flag by default.
+ ///
+ /// By default this is disabled. It may alternatively be selectively
+ /// enabled in the regular expression itself via the `i` flag.
+ pub fn case_insensitive(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.hir.case_insensitive(yes);
+ self
+ }
+
+ /// Enable or disable the multi-line matching flag by default.
+ ///
+ /// By default this is disabled. It may alternatively be selectively
+ /// enabled in the regular expression itself via the `m` flag.
+ pub fn multi_line(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.hir.multi_line(yes);
+ self
+ }
+
+ /// Enable or disable the "dot matches any character" flag by default.
+ ///
+ /// By default this is disabled. It may alternatively be selectively
+ /// enabled in the regular expression itself via the `s` flag.
+ pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.hir.dot_matches_new_line(yes);
+ self
+ }
+
+ /// Enable or disable the "swap greed" flag by default.
+ ///
+ /// By default this is disabled. It may alternatively be selectively
+ /// enabled in the regular expression itself via the `U` flag.
+ pub fn swap_greed(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.hir.swap_greed(yes);
+ self
+ }
+
+ /// Enable or disable the Unicode flag (`u`) by default.
+ ///
+ /// By default this is **enabled**. It may alternatively be selectively
+ /// disabled in the regular expression itself via the `u` flag.
+ ///
+ /// Note that unless `allow_invalid_utf8` is enabled (it's disabled by
+ /// default), a regular expression will fail to parse if Unicode mode is
+ /// disabled and a sub-expression could possibly match invalid UTF-8.
+ pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.hir.unicode(yes);
+ self
+ }
+}
+
+/// A convenience parser for regular expressions.
+///
+/// This parser takes as input a regular expression pattern string (the
+/// "concrete syntax") and returns a high-level intermediate representation
+/// (the HIR) suitable for most types of analysis. In particular, this parser
+/// hides the intermediate state of producing an AST (the "abstract syntax").
+/// The AST is itself far more complex than the HIR, so this parser serves as a
+/// convenience for never having to deal with it at all.
+///
+/// If callers have more fine grained use cases that need an AST, then please
+/// see the [`ast::parse`](ast/parse/index.html) module.
+///
+/// A `Parser` can be configured in more detail via a
+/// [`ParserBuilder`](struct.ParserBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Parser {
+ ast: ast::parse::Parser,
+ hir: hir::translate::Translator,
+}
+
+impl Parser {
+ /// Create a new parser with a default configuration.
+ ///
+ /// The parser can be run with `parse` method. The parse method returns
+ /// a high level intermediate representation of the given regular
+ /// expression.
+ ///
+ /// To set configuration options on the parser, use
+ /// [`ParserBuilder`](struct.ParserBuilder.html).
+ pub fn new() -> Parser {
+ ParserBuilder::new().build()
+ }
+
+ /// Parse the regular expression into a high level intermediate
+ /// representation.
+ pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir> {
+ let ast = self.ast.parse(pattern)?;
+ let hir = self.hir.translate(pattern, &ast)?;
+ Ok(hir)
+ }
+}
diff --git a/vendor/regex-syntax/src/unicode.rs b/vendor/regex-syntax/src/unicode.rs
new file mode 100644
index 000000000..b894c7db2
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode.rs
@@ -0,0 +1,999 @@
+use std::error;
+use std::fmt;
+use std::result;
+
+use crate::hir;
+
+/// A type alias for errors specific to Unicode handling of classes.
+pub type Result<T> = result::Result<T, Error>;
+
+/// An inclusive range of codepoints from a generated file (hence the static
+/// lifetime).
+type Range = &'static [(char, char)];
+
+/// An error that occurs when dealing with Unicode.
+///
+/// We don't impl the Error trait here because these always get converted
+/// into other public errors. (This error type isn't exported.)
+#[derive(Debug)]
+pub enum Error {
+ PropertyNotFound,
+ PropertyValueNotFound,
+ // Not used when unicode-perl is enabled.
+ #[allow(dead_code)]
+ PerlClassNotFound,
+}
+
+/// A type alias for errors specific to Unicode case folding.
+pub type FoldResult<T> = result::Result<T, CaseFoldError>;
+
+/// An error that occurs when Unicode-aware simple case folding fails.
+///
+/// This error can occur when the case mapping tables necessary for Unicode
+/// aware case folding are unavailable. This only occurs when the
+/// `unicode-case` feature is disabled. (The feature is enabled by default.)
+#[derive(Debug)]
+pub struct CaseFoldError(());
+
+impl error::Error for CaseFoldError {}
+
+impl fmt::Display for CaseFoldError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(
+ f,
+ "Unicode-aware case folding is not available \
+ (probably because the unicode-case feature is not enabled)"
+ )
+ }
+}
+
+/// An error that occurs when the Unicode-aware `\w` class is unavailable.
+///
+/// This error can occur when the data tables necessary for the Unicode aware
+/// Perl character class `\w` are unavailable. This only occurs when the
+/// `unicode-perl` feature is disabled. (The feature is enabled by default.)
+#[derive(Debug)]
+pub struct UnicodeWordError(());
+
+impl error::Error for UnicodeWordError {}
+
+impl fmt::Display for UnicodeWordError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(
+ f,
+ "Unicode-aware \\w class is not available \
+ (probably because the unicode-perl feature is not enabled)"
+ )
+ }
+}
+
+/// Return an iterator over the equivalence class of simple case mappings
+/// for the given codepoint. The equivalence class does not include the
+/// given codepoint.
+///
+/// If the equivalence class is empty, then this returns the next scalar
+/// value that has a non-empty equivalence class, if it exists. If no such
+/// scalar value exists, then `None` is returned. The point of this behavior
+/// is to permit callers to avoid calling `simple_fold` more than they need
+/// to, since there is some cost to fetching the equivalence class.
+///
+/// This returns an error if the Unicode case folding tables are not available.
+pub fn simple_fold(
+ c: char,
+) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>> {
+ #[cfg(not(feature = "unicode-case"))]
+ fn imp(
+ _: char,
+ ) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
+ {
+ use std::option::IntoIter;
+ Err::<result::Result<IntoIter<char>, _>, _>(CaseFoldError(()))
+ }
+
+ #[cfg(feature = "unicode-case")]
+ fn imp(
+ c: char,
+ ) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
+ {
+ use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
+
+ Ok(CASE_FOLDING_SIMPLE
+ .binary_search_by_key(&c, |&(c1, _)| c1)
+ .map(|i| CASE_FOLDING_SIMPLE[i].1.iter().map(|&c| c))
+ .map_err(|i| {
+ if i >= CASE_FOLDING_SIMPLE.len() {
+ None
+ } else {
+ Some(CASE_FOLDING_SIMPLE[i].0)
+ }
+ }))
+ }
+
+ imp(c)
+}
+
+/// Returns true if and only if the given (inclusive) range contains at least
+/// one Unicode scalar value that has a non-empty non-trivial simple case
+/// mapping.
+///
+/// This function panics if `end < start`.
+///
+/// This returns an error if the Unicode case folding tables are not available.
+pub fn contains_simple_case_mapping(
+ start: char,
+ end: char,
+) -> FoldResult<bool> {
+ #[cfg(not(feature = "unicode-case"))]
+ fn imp(_: char, _: char) -> FoldResult<bool> {
+ Err(CaseFoldError(()))
+ }
+
+ #[cfg(feature = "unicode-case")]
+ fn imp(start: char, end: char) -> FoldResult<bool> {
+ use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
+ use std::cmp::Ordering;
+
+ assert!(start <= end);
+ Ok(CASE_FOLDING_SIMPLE
+ .binary_search_by(|&(c, _)| {
+ if start <= c && c <= end {
+ Ordering::Equal
+ } else if c > end {
+ Ordering::Greater
+ } else {
+ Ordering::Less
+ }
+ })
+ .is_ok())
+ }
+
+ imp(start, end)
+}
+
+/// A query for finding a character class defined by Unicode. This supports
+/// either use of a property name directly, or lookup by property value. The
+/// former generally refers to Binary properties (see UTS#44, Table 8), but
+/// as a special exception (see UTS#18, Section 1.2) both general categories
+/// (an enumeration) and scripts (a catalog) are supported as if each of their
+/// possible values were a binary property.
+///
+/// In all circumstances, property names and values are normalized and
+/// canonicalized. That is, `GC == gc == GeneralCategory == general_category`.
+///
+/// The lifetime `'a` refers to the shorter of the lifetimes of property name
+/// and property value.
+#[derive(Debug)]
+pub enum ClassQuery<'a> {
+ /// Return a class corresponding to a Unicode binary property, named by
+ /// a single letter.
+ OneLetter(char),
+ /// Return a class corresponding to a Unicode binary property.
+ ///
+ /// Note that, by special exception (see UTS#18, Section 1.2), both
+ /// general category values and script values are permitted here as if
+ /// they were a binary property.
+ Binary(&'a str),
+ /// Return a class corresponding to all codepoints whose property
+ /// (identified by `property_name`) corresponds to the given value
+ /// (identified by `property_value`).
+ ByValue {
+ /// A property name.
+ property_name: &'a str,
+ /// A property value.
+ property_value: &'a str,
+ },
+}
+
+impl<'a> ClassQuery<'a> {
+ fn canonicalize(&self) -> Result<CanonicalClassQuery> {
+ match *self {
+ ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
+ ClassQuery::Binary(name) => self.canonical_binary(name),
+ ClassQuery::ByValue { property_name, property_value } => {
+ let property_name = symbolic_name_normalize(property_name);
+ let property_value = symbolic_name_normalize(property_value);
+
+ let canon_name = match canonical_prop(&property_name)? {
+ None => return Err(Error::PropertyNotFound),
+ Some(canon_name) => canon_name,
+ };
+ Ok(match canon_name {
+ "General_Category" => {
+ let canon = match canonical_gencat(&property_value)? {
+ None => return Err(Error::PropertyValueNotFound),
+ Some(canon) => canon,
+ };
+ CanonicalClassQuery::GeneralCategory(canon)
+ }
+ "Script" => {
+ let canon = match canonical_script(&property_value)? {
+ None => return Err(Error::PropertyValueNotFound),
+ Some(canon) => canon,
+ };
+ CanonicalClassQuery::Script(canon)
+ }
+ _ => {
+ let vals = match property_values(canon_name)? {
+ None => return Err(Error::PropertyValueNotFound),
+ Some(vals) => vals,
+ };
+ let canon_val =
+ match canonical_value(vals, &property_value) {
+ None => {
+ return Err(Error::PropertyValueNotFound)
+ }
+ Some(canon_val) => canon_val,
+ };
+ CanonicalClassQuery::ByValue {
+ property_name: canon_name,
+ property_value: canon_val,
+ }
+ }
+ })
+ }
+ }
+ }
+
+ fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
+ let norm = symbolic_name_normalize(name);
+
+ // This is a special case where 'cf' refers to the 'Format' general
+ // category, but where the 'cf' abbreviation is also an abbreviation
+ // for the 'Case_Folding' property. But we want to treat it as
+ // a general category. (Currently, we don't even support the
+ // 'Case_Folding' property. But if we do in the future, users will be
+ // required to spell it out.)
+ if norm != "cf" {
+ if let Some(canon) = canonical_prop(&norm)? {
+ return Ok(CanonicalClassQuery::Binary(canon));
+ }
+ }
+ if let Some(canon) = canonical_gencat(&norm)? {
+ return Ok(CanonicalClassQuery::GeneralCategory(canon));
+ }
+ if let Some(canon) = canonical_script(&norm)? {
+ return Ok(CanonicalClassQuery::Script(canon));
+ }
+ Err(Error::PropertyNotFound)
+ }
+}
+
+/// Like ClassQuery, but its parameters have been canonicalized. This also
+/// differentiates binary properties from flattened general categories and
+/// scripts.
+#[derive(Debug, Eq, PartialEq)]
+enum CanonicalClassQuery {
+ /// The canonical binary property name.
+ Binary(&'static str),
+ /// The canonical general category name.
+ GeneralCategory(&'static str),
+ /// The canonical script name.
+ Script(&'static str),
+ /// An arbitrary association between property and value, both of which
+ /// have been canonicalized.
+ ///
+ /// Note that by construction, the property name of ByValue will never
+ /// be General_Category or Script. Those two cases are subsumed by the
+ /// eponymous variants.
+ ByValue {
+ /// The canonical property name.
+ property_name: &'static str,
+ /// The canonical property value.
+ property_value: &'static str,
+ },
+}
+
+/// Looks up a Unicode class given a query. If one doesn't exist, then
+/// `None` is returned.
+pub fn class(query: ClassQuery<'_>) -> Result<hir::ClassUnicode> {
+ use self::CanonicalClassQuery::*;
+
+ match query.canonicalize()? {
+ Binary(name) => bool_property(name),
+ GeneralCategory(name) => gencat(name),
+ Script(name) => script(name),
+ ByValue { property_name: "Age", property_value } => {
+ let mut class = hir::ClassUnicode::empty();
+ for set in ages(property_value)? {
+ class.union(&hir_class(set));
+ }
+ Ok(class)
+ }
+ ByValue { property_name: "Script_Extensions", property_value } => {
+ script_extension(property_value)
+ }
+ ByValue {
+ property_name: "Grapheme_Cluster_Break",
+ property_value,
+ } => gcb(property_value),
+ ByValue { property_name: "Sentence_Break", property_value } => {
+ sb(property_value)
+ }
+ ByValue { property_name: "Word_Break", property_value } => {
+ wb(property_value)
+ }
+ _ => {
+ // What else should we support?
+ Err(Error::PropertyNotFound)
+ }
+ }
+}
+
+/// Returns a Unicode aware class for \w.
+///
+/// This returns an error if the data is not available for \w.
+pub fn perl_word() -> Result<hir::ClassUnicode> {
+ #[cfg(not(feature = "unicode-perl"))]
+ fn imp() -> Result<hir::ClassUnicode> {
+ Err(Error::PerlClassNotFound)
+ }
+
+ #[cfg(feature = "unicode-perl")]
+ fn imp() -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::perl_word::PERL_WORD;
+ Ok(hir_class(PERL_WORD))
+ }
+
+ imp()
+}
+
+/// Returns a Unicode aware class for \s.
+///
+/// This returns an error if the data is not available for \s.
+pub fn perl_space() -> Result<hir::ClassUnicode> {
+ #[cfg(not(any(feature = "unicode-perl", feature = "unicode-bool")))]
+ fn imp() -> Result<hir::ClassUnicode> {
+ Err(Error::PerlClassNotFound)
+ }
+
+ #[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
+ fn imp() -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::perl_space::WHITE_SPACE;
+ Ok(hir_class(WHITE_SPACE))
+ }
+
+ #[cfg(feature = "unicode-bool")]
+ fn imp() -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::property_bool::WHITE_SPACE;
+ Ok(hir_class(WHITE_SPACE))
+ }
+
+ imp()
+}
+
+/// Returns a Unicode aware class for \d.
+///
+/// This returns an error if the data is not available for \d.
+pub fn perl_digit() -> Result<hir::ClassUnicode> {
+ #[cfg(not(any(feature = "unicode-perl", feature = "unicode-gencat")))]
+ fn imp() -> Result<hir::ClassUnicode> {
+ Err(Error::PerlClassNotFound)
+ }
+
+ #[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
+ fn imp() -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::perl_decimal::DECIMAL_NUMBER;
+ Ok(hir_class(DECIMAL_NUMBER))
+ }
+
+ #[cfg(feature = "unicode-gencat")]
+ fn imp() -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::general_category::DECIMAL_NUMBER;
+ Ok(hir_class(DECIMAL_NUMBER))
+ }
+
+ imp()
+}
+
+/// Build a Unicode HIR class from a sequence of Unicode scalar value ranges.
+pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
+ let hir_ranges: Vec<hir::ClassUnicodeRange> = ranges
+ .iter()
+ .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+ .collect();
+ hir::ClassUnicode::new(hir_ranges)
+}
+
+/// Returns true only if the given codepoint is in the `\w` character class.
+///
+/// If the `unicode-perl` feature is not enabled, then this returns an error.
+pub fn is_word_character(c: char) -> result::Result<bool, UnicodeWordError> {
+ #[cfg(not(feature = "unicode-perl"))]
+ fn imp(_: char) -> result::Result<bool, UnicodeWordError> {
+ Err(UnicodeWordError(()))
+ }
+
+ #[cfg(feature = "unicode-perl")]
+ fn imp(c: char) -> result::Result<bool, UnicodeWordError> {
+ use crate::is_word_byte;
+ use crate::unicode_tables::perl_word::PERL_WORD;
+ use std::cmp::Ordering;
+
+ if c <= 0x7F as char && is_word_byte(c as u8) {
+ return Ok(true);
+ }
+ Ok(PERL_WORD
+ .binary_search_by(|&(start, end)| {
+ if start <= c && c <= end {
+ Ordering::Equal
+ } else if start > c {
+ Ordering::Greater
+ } else {
+ Ordering::Less
+ }
+ })
+ .is_ok())
+ }
+
+ imp(c)
+}
+
+/// A mapping of property values for a specific property.
+///
+/// The first element of each tuple is a normalized property value while the
+/// second element of each tuple is the corresponding canonical property
+/// value.
+type PropertyValues = &'static [(&'static str, &'static str)];
+
+fn canonical_gencat(normalized_value: &str) -> Result<Option<&'static str>> {
+ Ok(match normalized_value {
+ "any" => Some("Any"),
+ "assigned" => Some("Assigned"),
+ "ascii" => Some("ASCII"),
+ _ => {
+ let gencats = property_values("General_Category")?.unwrap();
+ canonical_value(gencats, normalized_value)
+ }
+ })
+}
+
+fn canonical_script(normalized_value: &str) -> Result<Option<&'static str>> {
+ let scripts = property_values("Script")?.unwrap();
+ Ok(canonical_value(scripts, normalized_value))
+}
+
+/// Find the canonical property name for the given normalized property name.
+///
+/// If no such property exists, then `None` is returned.
+///
+/// The normalized property name must have been normalized according to
+/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
+///
+/// If the property names data is not available, then an error is returned.
+fn canonical_prop(normalized_name: &str) -> Result<Option<&'static str>> {
+ #[cfg(not(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+ )))]
+ fn imp(_: &str) -> Result<Option<&'static str>> {
+ Err(Error::PropertyNotFound)
+ }
+
+ #[cfg(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+ ))]
+ fn imp(name: &str) -> Result<Option<&'static str>> {
+ use crate::unicode_tables::property_names::PROPERTY_NAMES;
+
+ Ok(PROPERTY_NAMES
+ .binary_search_by_key(&name, |&(n, _)| n)
+ .ok()
+ .map(|i| PROPERTY_NAMES[i].1))
+ }
+
+ imp(normalized_name)
+}
+
+/// Find the canonical property value for the given normalized property
+/// value.
+///
+/// The given property values should correspond to the values for the property
+/// under question, which can be found using `property_values`.
+///
+/// If no such property value exists, then `None` is returned.
+///
+/// The normalized property value must have been normalized according to
+/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
+fn canonical_value(
+ vals: PropertyValues,
+ normalized_value: &str,
+) -> Option<&'static str> {
+ vals.binary_search_by_key(&normalized_value, |&(n, _)| n)
+ .ok()
+ .map(|i| vals[i].1)
+}
+
+/// Return the table of property values for the given property name.
+///
+/// If the property values data is not available, then an error is returned.
+fn property_values(
+ canonical_property_name: &'static str,
+) -> Result<Option<PropertyValues>> {
+ #[cfg(not(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+ )))]
+ fn imp(_: &'static str) -> Result<Option<PropertyValues>> {
+ Err(Error::PropertyValueNotFound)
+ }
+
+ #[cfg(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+ ))]
+ fn imp(name: &'static str) -> Result<Option<PropertyValues>> {
+ use crate::unicode_tables::property_values::PROPERTY_VALUES;
+
+ Ok(PROPERTY_VALUES
+ .binary_search_by_key(&name, |&(n, _)| n)
+ .ok()
+ .map(|i| PROPERTY_VALUES[i].1))
+ }
+
+ imp(canonical_property_name)
+}
+
+// This is only used in some cases, but small enough to just let it be dead
+// instead of figuring out (and maintaining) the right set of features.
+#[allow(dead_code)]
+fn property_set(
+ name_map: &'static [(&'static str, Range)],
+ canonical: &'static str,
+) -> Option<Range> {
+ name_map
+ .binary_search_by_key(&canonical, |x| x.0)
+ .ok()
+ .map(|i| name_map[i].1)
+}
+
+/// Returns an iterator over Unicode Age sets. Each item corresponds to a set
+/// of codepoints that were added in a particular revision of Unicode. The
+/// iterator yields items in chronological order.
+///
+/// If the given age value isn't valid or if the data isn't available, then an
+/// error is returned instead.
+fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
+ #[cfg(not(feature = "unicode-age"))]
+ fn imp(_: &str) -> Result<impl Iterator<Item = Range>> {
+ use std::option::IntoIter;
+ Err::<IntoIter<Range>, _>(Error::PropertyNotFound)
+ }
+
+ #[cfg(feature = "unicode-age")]
+ fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
+ use crate::unicode_tables::age;
+
+ const AGES: &'static [(&'static str, Range)] = &[
+ ("V1_1", age::V1_1),
+ ("V2_0", age::V2_0),
+ ("V2_1", age::V2_1),
+ ("V3_0", age::V3_0),
+ ("V3_1", age::V3_1),
+ ("V3_2", age::V3_2),
+ ("V4_0", age::V4_0),
+ ("V4_1", age::V4_1),
+ ("V5_0", age::V5_0),
+ ("V5_1", age::V5_1),
+ ("V5_2", age::V5_2),
+ ("V6_0", age::V6_0),
+ ("V6_1", age::V6_1),
+ ("V6_2", age::V6_2),
+ ("V6_3", age::V6_3),
+ ("V7_0", age::V7_0),
+ ("V8_0", age::V8_0),
+ ("V9_0", age::V9_0),
+ ("V10_0", age::V10_0),
+ ("V11_0", age::V11_0),
+ ("V12_0", age::V12_0),
+ ("V12_1", age::V12_1),
+ ("V13_0", age::V13_0),
+ ];
+ assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
+
+ let pos = AGES.iter().position(|&(age, _)| canonical_age == age);
+ match pos {
+ None => Err(Error::PropertyValueNotFound),
+ Some(i) => Ok(AGES[..i + 1].iter().map(|&(_, classes)| classes)),
+ }
+ }
+
+ imp(canonical_age)
+}
+
+/// Returns the Unicode HIR class corresponding to the given general category.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given general category could not be found, or if the general
+/// category data is not available, then an error is returned.
+fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+ #[cfg(not(feature = "unicode-gencat"))]
+ fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+ Err(Error::PropertyNotFound)
+ }
+
+ #[cfg(feature = "unicode-gencat")]
+ fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::general_category::BY_NAME;
+ match name {
+ "ASCII" => Ok(hir_class(&[('\0', '\x7F')])),
+ "Any" => Ok(hir_class(&[('\0', '\u{10FFFF}')])),
+ "Assigned" => {
+ let mut cls = gencat("Unassigned")?;
+ cls.negate();
+ Ok(cls)
+ }
+ name => property_set(BY_NAME, name)
+ .map(hir_class)
+ .ok_or(Error::PropertyValueNotFound),
+ }
+ }
+
+ match canonical_name {
+ "Decimal_Number" => perl_digit(),
+ name => imp(name),
+ }
+}
+
+/// Returns the Unicode HIR class corresponding to the given script.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given script could not be found, or if the script data is not
+/// available, then an error is returned.
+fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+ #[cfg(not(feature = "unicode-script"))]
+ fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+ Err(Error::PropertyNotFound)
+ }
+
+ #[cfg(feature = "unicode-script")]
+ fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::script::BY_NAME;
+ property_set(BY_NAME, name)
+ .map(hir_class)
+ .ok_or(Error::PropertyValueNotFound)
+ }
+
+ imp(canonical_name)
+}
+
+/// Returns the Unicode HIR class corresponding to the given script extension.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given script extension could not be found, or if the script data is
+/// not available, then an error is returned.
+fn script_extension(
+ canonical_name: &'static str,
+) -> Result<hir::ClassUnicode> {
+ #[cfg(not(feature = "unicode-script"))]
+ fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+ Err(Error::PropertyNotFound)
+ }
+
+ #[cfg(feature = "unicode-script")]
+ fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::script_extension::BY_NAME;
+ property_set(BY_NAME, name)
+ .map(hir_class)
+ .ok_or(Error::PropertyValueNotFound)
+ }
+
+ imp(canonical_name)
+}
+
+/// Returns the Unicode HIR class corresponding to the given Unicode boolean
+/// property.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given boolean property could not be found, or if the boolean
+/// property data is not available, then an error is returned.
+fn bool_property(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+ #[cfg(not(feature = "unicode-bool"))]
+ fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+ Err(Error::PropertyNotFound)
+ }
+
+ #[cfg(feature = "unicode-bool")]
+ fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::property_bool::BY_NAME;
+ property_set(BY_NAME, name)
+ .map(hir_class)
+ .ok_or(Error::PropertyNotFound)
+ }
+
+ match canonical_name {
+ "Decimal_Number" => perl_digit(),
+ "White_Space" => perl_space(),
+ name => imp(name),
+ }
+}
+
+/// Returns the Unicode HIR class corresponding to the given grapheme cluster
+/// break property.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given property could not be found, or if the corresponding data is
+/// not available, then an error is returned.
+fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+ #[cfg(not(feature = "unicode-segment"))]
+ fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+ Err(Error::PropertyNotFound)
+ }
+
+ #[cfg(feature = "unicode-segment")]
+ fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::grapheme_cluster_break::BY_NAME;
+ property_set(BY_NAME, name)
+ .map(hir_class)
+ .ok_or(Error::PropertyValueNotFound)
+ }
+
+ imp(canonical_name)
+}
+
+/// Returns the Unicode HIR class corresponding to the given word break
+/// property.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given property could not be found, or if the corresponding data is
+/// not available, then an error is returned.
+fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+ #[cfg(not(feature = "unicode-segment"))]
+ fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+ Err(Error::PropertyNotFound)
+ }
+
+ #[cfg(feature = "unicode-segment")]
+ fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::word_break::BY_NAME;
+ property_set(BY_NAME, name)
+ .map(hir_class)
+ .ok_or(Error::PropertyValueNotFound)
+ }
+
+ imp(canonical_name)
+}
+
+/// Returns the Unicode HIR class corresponding to the given sentence
+/// break property.
+///
+/// Name canonicalization is assumed to be performed by the caller.
+///
+/// If the given property could not be found, or if the corresponding data is
+/// not available, then an error is returned.
+fn sb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+ #[cfg(not(feature = "unicode-segment"))]
+ fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+ Err(Error::PropertyNotFound)
+ }
+
+ #[cfg(feature = "unicode-segment")]
+ fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+ use crate::unicode_tables::sentence_break::BY_NAME;
+ property_set(BY_NAME, name)
+ .map(hir_class)
+ .ok_or(Error::PropertyValueNotFound)
+ }
+
+ imp(canonical_name)
+}
+
+/// Like symbolic_name_normalize_bytes, but operates on a string.
+fn symbolic_name_normalize(x: &str) -> String {
+ let mut tmp = x.as_bytes().to_vec();
+ let len = symbolic_name_normalize_bytes(&mut tmp).len();
+ tmp.truncate(len);
+ // This should always succeed because `symbolic_name_normalize_bytes`
+ // guarantees that `&tmp[..len]` is always valid UTF-8.
+ //
+ // N.B. We could avoid the additional UTF-8 check here, but it's unlikely
+ // to be worth skipping the additional safety check. A benchmark must
+ // justify it first.
+ String::from_utf8(tmp).unwrap()
+}
+
+/// Normalize the given symbolic name in place according to UAX44-LM3.
+///
+/// A "symbolic name" typically corresponds to property names and property
+/// value aliases. Note, though, that it should not be applied to property
+/// string values.
+///
+/// The slice returned is guaranteed to be valid UTF-8 for all possible values
+/// of `slice`.
+///
+/// See: https://unicode.org/reports/tr44/#UAX44-LM3
+fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] {
+ // I couldn't find a place in the standard that specified that property
+ // names/aliases had a particular structure (unlike character names), but
+ // we assume that it's ASCII only and drop anything that isn't ASCII.
+ let mut start = 0;
+ let mut starts_with_is = false;
+ if slice.len() >= 2 {
+ // Ignore any "is" prefix.
+ starts_with_is = slice[0..2] == b"is"[..]
+ || slice[0..2] == b"IS"[..]
+ || slice[0..2] == b"iS"[..]
+ || slice[0..2] == b"Is"[..];
+ if starts_with_is {
+ start = 2;
+ }
+ }
+ let mut next_write = 0;
+ for i in start..slice.len() {
+ // VALIDITY ARGUMENT: To guarantee that the resulting slice is valid
+ // UTF-8, we ensure that the slice contains only ASCII bytes. In
+ // particular, we drop every non-ASCII byte from the normalized string.
+ let b = slice[i];
+ if b == b' ' || b == b'_' || b == b'-' {
+ continue;
+ } else if b'A' <= b && b <= b'Z' {
+ slice[next_write] = b + (b'a' - b'A');
+ next_write += 1;
+ } else if b <= 0x7F {
+ slice[next_write] = b;
+ next_write += 1;
+ }
+ }
+ // Special case: ISO_Comment has a 'isc' abbreviation. Since we generally
+ // ignore 'is' prefixes, the 'isc' abbreviation gets caught in the cross
+ // fire and ends up creating an alias for 'c' to 'ISO_Comment', but it
+ // is actually an alias for the 'Other' general category.
+ if starts_with_is && next_write == 1 && slice[0] == b'c' {
+ slice[0] = b'i';
+ slice[1] = b's';
+ slice[2] = b'c';
+ next_write = 3;
+ }
+ &mut slice[..next_write]
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{
+ contains_simple_case_mapping, simple_fold, symbolic_name_normalize,
+ symbolic_name_normalize_bytes,
+ };
+
+ #[cfg(feature = "unicode-case")]
+ fn simple_fold_ok(c: char) -> impl Iterator<Item = char> {
+ simple_fold(c).unwrap().unwrap()
+ }
+
+ #[cfg(feature = "unicode-case")]
+ fn simple_fold_err(c: char) -> Option<char> {
+ match simple_fold(c).unwrap() {
+ Ok(_) => unreachable!("simple_fold returned Ok iterator"),
+ Err(next) => next,
+ }
+ }
+
+ #[cfg(feature = "unicode-case")]
+ fn contains_case_map(start: char, end: char) -> bool {
+ contains_simple_case_mapping(start, end).unwrap()
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-case")]
+ fn simple_fold_k() {
+ let xs: Vec<char> = simple_fold_ok('k').collect();
+ assert_eq!(xs, vec!['K', 'â„Ē']);
+
+ let xs: Vec<char> = simple_fold_ok('K').collect();
+ assert_eq!(xs, vec!['k', 'â„Ē']);
+
+ let xs: Vec<char> = simple_fold_ok('â„Ē').collect();
+ assert_eq!(xs, vec!['K', 'k']);
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-case")]
+ fn simple_fold_a() {
+ let xs: Vec<char> = simple_fold_ok('a').collect();
+ assert_eq!(xs, vec!['A']);
+
+ let xs: Vec<char> = simple_fold_ok('A').collect();
+ assert_eq!(xs, vec!['a']);
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-case")]
+ fn simple_fold_empty() {
+ assert_eq!(Some('A'), simple_fold_err('?'));
+ assert_eq!(Some('A'), simple_fold_err('@'));
+ assert_eq!(Some('a'), simple_fold_err('['));
+ assert_eq!(Some('Ⰰ'), simple_fold_err('☃'));
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-case")]
+ fn simple_fold_max() {
+ assert_eq!(None, simple_fold_err('\u{10FFFE}'));
+ assert_eq!(None, simple_fold_err('\u{10FFFF}'));
+ }
+
+ #[test]
+ #[cfg(not(feature = "unicode-case"))]
+ fn simple_fold_disabled() {
+ assert!(simple_fold('a').is_err());
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-case")]
+ fn range_contains() {
+ assert!(contains_case_map('A', 'A'));
+ assert!(contains_case_map('Z', 'Z'));
+ assert!(contains_case_map('A', 'Z'));
+ assert!(contains_case_map('@', 'A'));
+ assert!(contains_case_map('Z', '['));
+ assert!(contains_case_map('☃', 'Ⰰ'));
+
+ assert!(!contains_case_map('[', '['));
+ assert!(!contains_case_map('[', '`'));
+
+ assert!(!contains_case_map('☃', '☃'));
+ }
+
+ #[test]
+ #[cfg(not(feature = "unicode-case"))]
+ fn range_contains_disabled() {
+ assert!(contains_simple_case_mapping('a', 'a').is_err());
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-gencat")]
+ fn regression_466() {
+ use super::{CanonicalClassQuery, ClassQuery};
+
+ let q = ClassQuery::OneLetter('C');
+ assert_eq!(
+ q.canonicalize().unwrap(),
+ CanonicalClassQuery::GeneralCategory("Other")
+ );
+ }
+
+ #[test]
+ fn sym_normalize() {
+ let sym_norm = symbolic_name_normalize;
+
+ assert_eq!(sym_norm("Line_Break"), "linebreak");
+ assert_eq!(sym_norm("Line-break"), "linebreak");
+ assert_eq!(sym_norm("linebreak"), "linebreak");
+ assert_eq!(sym_norm("BA"), "ba");
+ assert_eq!(sym_norm("ba"), "ba");
+ assert_eq!(sym_norm("Greek"), "greek");
+ assert_eq!(sym_norm("isGreek"), "greek");
+ assert_eq!(sym_norm("IS_Greek"), "greek");
+ assert_eq!(sym_norm("isc"), "isc");
+ assert_eq!(sym_norm("is c"), "isc");
+ assert_eq!(sym_norm("is_c"), "isc");
+ }
+
+ #[test]
+ fn valid_utf8_symbolic() {
+ let mut x = b"abc\xFFxyz".to_vec();
+ let y = symbolic_name_normalize_bytes(&mut x);
+ assert_eq!(y, b"abcxyz");
+ }
+}
diff --git a/vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE b/vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE
new file mode 100644
index 000000000..b82826bdb
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE
@@ -0,0 +1,57 @@
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
+
+Unicode Data Files include all data files under the directories
+http://www.unicode.org/Public/, http://www.unicode.org/reports/,
+http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
+http://www.unicode.org/utility/trac/browser/.
+
+Unicode Data Files do not include PDF online code charts under the
+directory http://www.unicode.org/Public/.
+
+Software includes any source code published in the Unicode Standard
+or under the directories
+http://www.unicode.org/Public/, http://www.unicode.org/reports/,
+http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
+http://www.unicode.org/utility/trac/browser/.
+
+NOTICE TO USER: Carefully read the following legal agreement.
+BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
+DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
+YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT.
+IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
+THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright Š 1991-2018 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
diff --git a/vendor/regex-syntax/src/unicode_tables/age.rs b/vendor/regex-syntax/src/unicode_tables/age.rs
new file mode 100644
index 000000000..7772919eb
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/age.rs
@@ -0,0 +1,1673 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate age ucd-13.0.0 --chars
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+ ("V10_0", V10_0),
+ ("V11_0", V11_0),
+ ("V12_0", V12_0),
+ ("V12_1", V12_1),
+ ("V13_0", V13_0),
+ ("V1_1", V1_1),
+ ("V2_0", V2_0),
+ ("V2_1", V2_1),
+ ("V3_0", V3_0),
+ ("V3_1", V3_1),
+ ("V3_2", V3_2),
+ ("V4_0", V4_0),
+ ("V4_1", V4_1),
+ ("V5_0", V5_0),
+ ("V5_1", V5_1),
+ ("V5_2", V5_2),
+ ("V6_0", V6_0),
+ ("V6_1", V6_1),
+ ("V6_2", V6_2),
+ ("V6_3", V6_3),
+ ("V7_0", V7_0),
+ ("V8_0", V8_0),
+ ("V9_0", V9_0),
+];
+
+pub const V10_0: &'static [(char, char)] = &[
+ ('āĄ ', 'āĄĒ'),
+ ('ā§ŧ', 'ā§Ŋ'),
+ ('\u{afa}', '\u{aff}'),
+ ('\u{d00}', '\u{d00}'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('áŗˇ', 'áŗˇ'),
+ ('\u{1df6}', '\u{1df9}'),
+ ('â‚ŋ', 'â‚ŋ'),
+ ('âŋ', 'âŋ'),
+ ('⯒', '⯒'),
+ ('⹅', '⹉'),
+ ('ã„Ž', 'ã„Ž'),
+ ('éŋ–', 'éŋĒ'),
+ ('𐌭', 'đŒ¯'),
+ ('𑨀', '\u{11a47}'),
+ ('𑩐', 'đ‘Ēƒ'),
+ ('đ‘Ē†', 'đ‘Ēœ'),
+ ('đ‘Ēž', 'đ‘Ēĸ'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d47}'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ–ŋĄ', 'đ–ŋĄ'),
+ ('𛀂', '𛄞'),
+ ('𛅰', 'đ›‹ģ'),
+ ('🉠', 'đŸ‰Ĩ'),
+ ('🛓', '🛔'),
+ ('🛷', '🛸'),
+ ('🤀', '🤋'),
+ ('🤟', '🤟'),
+ ('🤨', 'đŸ¤¯'),
+ ('🤱', '🤲'),
+ ('đŸĨŒ', 'đŸĨŒ'),
+ ('đŸĨŸ', 'đŸĨĢ'),
+ ('đŸĻ’', 'đŸĻ—'),
+ ('🧐', 'đŸ§Ļ'),
+ ('đŦē°', 'đŽ¯ '),
+];
+
+pub const V11_0: &'static [(char, char)] = &[
+ ('Õ ', 'Õ '),
+ ('ֈ', 'ֈ'),
+ ('ׯ', 'ׯ'),
+ ('\u{7fd}', 'ßŋ'),
+ ('\u{8d3}', '\u{8d3}'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('āŠļ', 'āŠļ'),
+ ('\u{c04}', '\u{c04}'),
+ ('ā˛„', 'ā˛„'),
+ ('᥸', '᥸'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('âŽē', 'âŽŧ'),
+ ('⯓', 'â¯Ģ'),
+ ('⯰', '⯞'),
+ ('⹊', '⹎'),
+ ('ㄯ', 'ㄯ'),
+ ('éŋĢ', 'éŋ¯'),
+ ('ęž¯', 'ęž¯'),
+ ('Ꞹ', 'ꞹ'),
+ ('ęŖž', '\u{a8ff}'),
+ ('𐨴', 'đ¨ĩ'),
+ ('𐊈', '𐊈'),
+ ('𐴀', '\u{10d27}'),
+ ('𐴰', '𐴚'),
+ ('đŧ€', 'đŧ§'),
+ ('đŧ°', 'đŊ™'),
+ ('\u{110cd}', '\u{110cd}'),
+ ('𑅄', '𑅆'),
+ ('\u{1133b}', '\u{1133b}'),
+ ('\u{1145e}', '\u{1145e}'),
+ ('𑜚', '𑜚'),
+ ('𑠀', 'đ‘ ģ'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', 'đ‘ļ˜'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('đ‘ģ ', 'đ‘ģ¸'),
+ ('𖹀', 'đ–ēš'),
+ ('𘟭', '𘟱'),
+ ('𝋠', 'đ‹ŗ'),
+ ('𝍲', '𝍸'),
+ ('𞱱', '𞲴'),
+ ('đŸ„¯', 'đŸ„¯'),
+ ('🛹', '🛹'),
+ ('🟕', '🟘'),
+ ('đŸĨ', 'đŸĨ'),
+ ('đŸĨŦ', 'đŸĨ°'),
+ ('đŸĨŗ', 'đŸĨļ'),
+ ('đŸĨē', 'đŸĨē'),
+ ('đŸĨŧ', 'đŸĨŋ'),
+ ('đŸĻ˜', 'đŸĻĸ'),
+ ('đŸĻ°', 'đŸĻš'),
+ ('🧁', '🧂'),
+ ('🧧', 'đŸ§ŋ'),
+ ('🩠', '🩭'),
+];
+
+pub const V12_0: &'static [(char, char)] = &[
+ ('āąˇ', 'āąˇ'),
+ ('āē†', 'āē†'),
+ ('āē‰', 'āē‰'),
+ ('āēŒ', 'āēŒ'),
+ ('āēŽ', 'āē“'),
+ ('āē˜', 'āē˜'),
+ ('āē ', 'āē '),
+ ('āē¨', 'āēŠ'),
+ ('āēŦ', 'āēŦ'),
+ ('\u{eba}', '\u{eba}'),
+ ('áŗē', 'áŗē'),
+ ('⯉', '⯉'),
+ ('â¯ŋ', 'â¯ŋ'),
+ ('⚏', '⚏'),
+ ('ęžē', 'ęžŋ'),
+ ('Ꟃ', 'Ᶎ'),
+ ('ę­Ļ', 'ę­§'),
+ ('đŋ ', 'đŋļ'),
+ ('𑑟', '𑑟'),
+ ('𑚸', '𑚸'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '\u{119d7}'),
+ ('\u{119da}', '𑧤'),
+ ('đ‘Ē„', 'đ‘Ē…'),
+ ('đ‘ŋ€', 'đ‘ŋą'),
+ ('đ‘ŋŋ', 'đ‘ŋŋ'),
+ ('\u{13430}', '\u{13438}'),
+ ('đ–Ŋ…', 'đ–ŊŠ'),
+ ('\u{16f4f}', '\u{16f4f}'),
+ ('đ–Ŋŋ', '𖾇'),
+ ('đ–ŋĸ', 'đ–ŋŖ'),
+ ('𘟲', '𘟷'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𞄀', 'đž„Ŧ'),
+ ('\u{1e130}', 'đž„Ŋ'),
+ ('𞅀', '𞅉'),
+ ('𞅎', '𞅏'),
+ ('𞋀', '𞋹'),
+ ('đž‹ŋ', 'đž‹ŋ'),
+ ('đžĨ‹', 'đžĨ‹'),
+ ('𞴁', 'đž´Ŋ'),
+ ('đŸ…Ŧ', 'đŸ…Ŧ'),
+ ('🛕', '🛕'),
+ ('đŸ›ē', 'đŸ›ē'),
+ ('🟠', 'đŸŸĢ'),
+ ('🤍', '🤏'),
+ ('đŸ¤ŋ', 'đŸ¤ŋ'),
+ ('đŸĨą', 'đŸĨą'),
+ ('đŸĨģ', 'đŸĨģ'),
+ ('đŸĻĨ', 'đŸĻĒ'),
+ ('đŸĻŽ', 'đŸĻ¯'),
+ ('đŸĻē', 'đŸĻŋ'),
+ ('🧃', '🧊'),
+ ('🧍', '🧏'),
+ ('🨀', '🩓'),
+ ('🩰', 'đŸŠŗ'),
+ ('🩸', 'đŸŠē'),
+ ('đŸĒ€', 'đŸĒ‚'),
+ ('đŸĒ', 'đŸĒ•'),
+];
+
+pub const V12_1: &'static [(char, char)] = &[('ã‹ŋ', 'ã‹ŋ')];
+
+pub const V13_0: &'static [(char, char)] = &[
+ ('\u{8be}', '\u{8c7}'),
+ ('\u{b55}', '\u{b55}'),
+ ('\u{d04}', '\u{d04}'),
+ ('\u{d81}', '\u{d81}'),
+ ('\u{1abf}', '\u{1ac0}'),
+ ('\u{2b97}', '\u{2b97}'),
+ ('\u{2e50}', '\u{2e52}'),
+ ('\u{31bb}', '\u{31bf}'),
+ ('\u{4db6}', '\u{4dbf}'),
+ ('\u{9ff0}', '\u{9ffc}'),
+ ('\u{a7c7}', '\u{a7ca}'),
+ ('\u{a7f5}', '\u{a7f6}'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('\u{ab68}', '\u{ab6b}'),
+ ('\u{1019c}', '\u{1019c}'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eab}', '\u{10ead}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('\u{10fb0}', '\u{10fcb}'),
+ ('\u{11147}', '\u{11147}'),
+ ('\u{111ce}', '\u{111cf}'),
+ ('\u{1145a}', '\u{1145a}'),
+ ('\u{11460}', '\u{11461}'),
+ ('\u{11900}', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{11946}'),
+ ('\u{11950}', '\u{11959}'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('\u{16fe4}', '\u{16fe4}'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('\u{18af3}', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('\u{1f10d}', '\u{1f10f}'),
+ ('\u{1f16d}', '\u{1f16f}'),
+ ('\u{1f1ad}', '\u{1f1ad}'),
+ ('\u{1f6d6}', '\u{1f6d7}'),
+ ('\u{1f6fb}', '\u{1f6fc}'),
+ ('\u{1f8b0}', '\u{1f8b1}'),
+ ('\u{1f90c}', '\u{1f90c}'),
+ ('\u{1f972}', '\u{1f972}'),
+ ('\u{1f977}', '\u{1f978}'),
+ ('\u{1f9a3}', '\u{1f9a4}'),
+ ('\u{1f9ab}', '\u{1f9ad}'),
+ ('\u{1f9cb}', '\u{1f9cb}'),
+ ('\u{1fa74}', '\u{1fa74}'),
+ ('\u{1fa83}', '\u{1fa86}'),
+ ('\u{1fa96}', '\u{1faa8}'),
+ ('\u{1fab0}', '\u{1fab6}'),
+ ('\u{1fac0}', '\u{1fac2}'),
+ ('\u{1fad0}', '\u{1fad6}'),
+ ('\u{1fb00}', '\u{1fb92}'),
+ ('\u{1fb94}', '\u{1fbca}'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+ ('\u{2a6d7}', '\u{2a6dd}'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const V1_1: &'static [(char, char)] = &[
+ ('\u{0}', 'Įĩ'),
+ ('Įē', 'ȗ'),
+ ('ɐ', 'ʨ'),
+ ('ʰ', '˞'),
+ ('Ë ', 'ËŠ'),
+ ('\u{300}', '\u{345}'),
+ ('\u{360}', '\u{361}'),
+ ('Í´', 'Íĩ'),
+ ('Íē', 'Íē'),
+ ('Íž', 'Íž'),
+ ('΄', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'ĪŽ'),
+ ('Ī', 'Ī–'),
+ ('Īš', 'Īš'),
+ ('Īœ', 'Īœ'),
+ ('Īž', 'Īž'),
+ ('Ī ', 'Ī '),
+ ('Īĸ', 'Īŗ'),
+ ('Ё', 'Ќ'),
+ ('Ў', 'Ņ'),
+ ('Ņ‘', 'Ņœ'),
+ ('Ņž', '\u{486}'),
+ ('Ō', 'Ķ„'),
+ ('Ķ‡', 'Ķˆ'),
+ ('Ķ‹', 'ĶŒ'),
+ ('Ķ', 'ĶĢ'),
+ ('ĶŽ', 'Ķĩ'),
+ ('Ķ¸', 'Ķš'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', '՟'),
+ ('ÕĄ', 'և'),
+ ('։', '։'),
+ ('\u{5b0}', '\u{5b9}'),
+ ('\u{5bb}', '׃'),
+ ('א', '×Ē'),
+ ('×°', '×´'),
+ ('،', '،'),
+ ('؛', '؛'),
+ ('؟', '؟'),
+ ('ØĄ', 'Øē'),
+ ('Ų€', '\u{652}'),
+ ('Ų ', 'Ų­'),
+ ('\u{670}', 'Úˇ'),
+ ('Úē', 'Úž'),
+ ('ۀ', 'ێ'),
+ ('ې', '\u{6ed}'),
+ ('Û°', 'Ûš'),
+ ('\u{901}', 'ā¤ƒ'),
+ ('ā¤…', 'ā¤š'),
+ ('\u{93c}', '\u{94d}'),
+ ('āĨ', '\u{954}'),
+ ('āĨ˜', 'āĨ°'),
+ ('\u{981}', 'āĻƒ'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('\u{9bc}', '\u{9bc}'),
+ ('\u{9be}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', '\u{9cd}'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', '\u{9e3}'),
+ ('ā§Ļ', 'ā§ē'),
+ ('\u{a02}', '\u{a02}'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠĻ', 'āŠ´'),
+ ('\u{a81}', 'āĒƒ'),
+ ('āĒ…', 'āĒ‹'),
+ ('āĒ', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('\u{abc}', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', '\u{acd}'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', 'āĢ '),
+ ('āĢĻ', 'āĢ¯'),
+ ('\u{b01}', 'āŦƒ'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦļ', 'āŦš'),
+ ('\u{b3c}', '\u{b43}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', '\u{b4d}'),
+ ('\u{b56}', '\u{b57}'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', 'ā­Ą'),
+ ('ā­Ļ', 'ā­°'),
+ ('\u{b82}', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽĩ'),
+ ('āŽˇ', 'āŽš'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', '\u{bcd}'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('ā¯§', 'ā¯˛'),
+ ('ā°', 'ā°ƒ'),
+ ('ā°…', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°ŗ'),
+ ('ā°ĩ', 'ā°š'),
+ ('\u{c3e}', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('āą ', 'āąĄ'),
+ ('āąĻ', 'āą¯'),
+ ('ā˛‚', 'ā˛ƒ'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('ā˛ž', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', 'āŗĄ'),
+ ('āŗĻ', 'āŗ¯'),
+ ('ā´‚', 'ā´ƒ'),
+ ('ā´…', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', 'ā´¨'),
+ ('ā´Ē', 'ā´š'),
+ ('\u{d3e}', '\u{d43}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', '\u{d4d}'),
+ ('\u{d57}', '\u{d57}'),
+ ('āĩ ', 'āĩĄ'),
+ ('āĩĻ', 'āĩ¯'),
+ ('ā¸', '\u{e3a}'),
+ ('ā¸ŋ', 'āš›'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē‡', 'āēˆ'),
+ ('āēŠ', 'āēŠ'),
+ ('āē', 'āē'),
+ ('āē”', 'āē—'),
+ ('āē™', 'āēŸ'),
+ ('āēĄ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āē§'),
+ ('āēĒ', 'āēĢ'),
+ ('āē­', '\u{eb9}'),
+ ('\u{ebb}', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('āģ', 'āģ™'),
+ ('āģœ', 'āģ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('ა', 'áƒļ'),
+ ('áƒģ', 'áƒģ'),
+ ('ᄀ', 'ᅙ'),
+ ('ᅟ', 'á†ĸ'),
+ ('ᆨ', 'ᇹ'),
+ ('Ḁ', 'áēš'),
+ ('áē ', 'áģš'),
+ ('áŧ€', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'áŋ„'),
+ ('áŋ†', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ', 'áŋ¯'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋž'),
+ ('\u{2000}', '\u{202e}'),
+ ('‰', '⁆'),
+ ('\u{206a}', '⁰'),
+ ('⁴', '₎'),
+ ('₠', 'â‚Ē'),
+ ('\u{20d0}', '\u{20e1}'),
+ ('℀', 'ℸ'),
+ ('⅓', 'ↂ'),
+ ('←', 'â‡Ē'),
+ ('∀', '⋱'),
+ ('⌀', '⌀'),
+ ('⌂', 'âē'),
+ ('␀', '␤'),
+ ('⑀', '⑊'),
+ ('①', 'â“Ē'),
+ ('─', '▕'),
+ ('■', '◯'),
+ ('☀', '☓'),
+ ('☚', '♯'),
+ ('✁', '✄'),
+ ('✆', '✉'),
+ ('✌', '✧'),
+ ('✩', '❋'),
+ ('❍', '❍'),
+ ('❏', '❒'),
+ ('❖', '❖'),
+ ('❘', '❞'),
+ ('❡', '❧'),
+ ('âļ', '➔'),
+ ('➘', '➯'),
+ ('➱', '➾'),
+ ('\u{3000}', 'ã€ˇ'),
+ ('ã€ŋ', 'ã€ŋ'),
+ ('ぁ', 'ゔ'),
+ ('\u{3099}', 'ゞ'),
+ ('ã‚Ą', 'マ'),
+ ('ㄅ', 'ã„Ŧ'),
+ ('ã„ą', 'ㆎ'),
+ ('㆐', '㆟'),
+ ('㈀', '㈜'),
+ ('㈠', '㉃'),
+ ('㉠', 'ã‰ģ'),
+ ('ã‰ŋ', '㊰'),
+ ('㋀', '㋋'),
+ ('㋐', 'ã‹ž'),
+ ('㌀', 'ãļ'),
+ ('ãģ', '㏝'),
+ ('㏠', '㏞'),
+ ('一', 'éžĨ'),
+ ('\u{e000}', 'ī¨­'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('\u{fb1e}', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'ī´ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇģ'),
+ ('\u{fe20}', '\u{fe23}'),
+ ('ī¸°', 'īš„'),
+ ('īš‰', 'īš’'),
+ ('īš”', 'īšĻ'),
+ ('īš¨', 'īšĢ'),
+ ('īš°', 'īš˛'),
+ ('īš´', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('\u{feff}', '\u{feff}'),
+ ('īŧ', 'īŊž'),
+ ('īŊĄ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('īŋ ', 'īŋĻ'),
+ ('īŋ¨', 'īŋŽ'),
+ ('īŋŊ', '\u{ffff}'),
+];
+
+pub const V2_0: &'static [(char, char)] = &[
+ ('\u{591}', '\u{5a1}'),
+ ('\u{5a3}', '\u{5af}'),
+ ('\u{5c4}', '\u{5c4}'),
+ ('āŧ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŠ'),
+ ('\u{f71}', 'āž‹'),
+ ('\u{f90}', '\u{f95}'),
+ ('\u{f97}', '\u{f97}'),
+ ('\u{f99}', '\u{fad}'),
+ ('\u{fb1}', '\u{fb7}'),
+ ('\u{fb9}', '\u{fb9}'),
+ ('áē›', 'áē›'),
+ ('â‚Ģ', 'â‚Ģ'),
+ ('가', 'ížŖ'),
+ ('\u{1fffe}', '\u{1ffff}'),
+ ('\u{2fffe}', '\u{2ffff}'),
+ ('\u{3fffe}', '\u{3ffff}'),
+ ('\u{4fffe}', '\u{4ffff}'),
+ ('\u{5fffe}', '\u{5ffff}'),
+ ('\u{6fffe}', '\u{6ffff}'),
+ ('\u{7fffe}', '\u{7ffff}'),
+ ('\u{8fffe}', '\u{8ffff}'),
+ ('\u{9fffe}', '\u{9ffff}'),
+ ('\u{afffe}', '\u{affff}'),
+ ('\u{bfffe}', '\u{bffff}'),
+ ('\u{cfffe}', '\u{cffff}'),
+ ('\u{dfffe}', '\u{dffff}'),
+ ('\u{efffe}', '\u{10ffff}'),
+];
+
+pub const V2_1: &'static [(char, char)] = &[('â‚Ŧ', 'â‚Ŧ'), ('īŋŧ', 'īŋŧ')];
+
+pub const V3_0: &'static [(char, char)] = &[
+ ('Įļ', 'Įš'),
+ ('Ș', 'ȟ'),
+ ('Čĸ', 'Čŗ'),
+ ('ĘŠ', 'Ę­'),
+ ('˟', '˟'),
+ ('ËĒ', 'ËŽ'),
+ ('\u{346}', '\u{34e}'),
+ ('\u{362}', '\u{362}'),
+ ('Ī—', 'Ī—'),
+ ('Ī›', 'Ī›'),
+ ('Ī', 'Ī'),
+ ('ĪŸ', 'ĪŸ'),
+ ('ĪĄ', 'ĪĄ'),
+ ('Ѐ', 'Ѐ'),
+ ('Ѝ', 'Ѝ'),
+ ('Ņ', 'Ņ'),
+ ('Ņ', 'Ņ'),
+ ('\u{488}', '\u{489}'),
+ ('ŌŒ', 'Ō'),
+ ('ĶŦ', 'Ķ­'),
+ ('֊', '֊'),
+ ('\u{653}', '\u{655}'),
+ ('Ú¸', 'Úš'),
+ ('Úŋ', 'Úŋ'),
+ ('ۏ', 'ۏ'),
+ ('Ûē', 'Ûž'),
+ ('܀', '܍'),
+ ('\u{70f}', 'ÜŦ'),
+ ('\u{730}', '\u{74a}'),
+ ('Ū€', '\u{7b0}'),
+ ('āļ‚', 'āļƒ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇ˛', 'āˇ´'),
+ ('āŊĒ', 'āŊĒ'),
+ ('\u{f96}', '\u{f96}'),
+ ('\u{fae}', '\u{fb0}'),
+ ('\u{fb8}', '\u{fb8}'),
+ ('\u{fba}', '\u{fbc}'),
+ ('āžž', 'āŋŒ'),
+ ('āŋ', 'āŋ'),
+ ('က', 'အ'),
+ ('á€Ŗ', 'ဧ'),
+ ('ဩ', 'á€Ē'),
+ ('á€Ŧ', '\u{1032}'),
+ ('\u{1036}', '\u{1039}'),
+ ('၀', '\u{1059}'),
+ ('ሀ', 'ሆ'),
+ ('ለ', 'ቆ'),
+ ('ቈ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኆ'),
+ ('ኈ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኮ'),
+ ('ኰ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዎ'),
+ ('ዐ', 'ዖ'),
+ ('ዘ', 'ዮ'),
+ ('ደ', 'ጎ'),
+ ('ጐ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ጞ'),
+ ('ጠ', 'ፆ'),
+ ('ፈ', 'ፚ'),
+ ('፡', 'áŧ'),
+ ('Ꭰ', 'Ᏼ'),
+ ('ᐁ', 'á™ļ'),
+ ('\u{1680}', '᚜'),
+ ('ᚠ', 'ᛰ'),
+ ('ក', 'ៜ'),
+ ('០', '៩'),
+ ('᠀', '\u{180e}'),
+ ('᠐', '᠙'),
+ ('ᠠ', 'ᥡ'),
+ ('áĸ€', '\u{18a9}'),
+ ('\u{202f}', '\u{202f}'),
+ ('⁈', '⁍'),
+ ('₭', '₯'),
+ ('\u{20e2}', '\u{20e3}'),
+ ('ℹ', 'â„ē'),
+ ('Ↄ', 'Ↄ'),
+ ('â‡Ģ', 'â‡ŗ'),
+ ('⌁', '⌁'),
+ ('âģ', 'âģ'),
+ ('âŊ', '⎚'),
+ ('âĨ', 'âĻ'),
+ ('◰', '◷'),
+ ('☙', '☙'),
+ ('♰', '♱'),
+ ('⠀', 'âŖŋ'),
+ ('âē€', 'âē™'),
+ ('âē›', 'âģŗ'),
+ ('âŧ€', 'âŋ•'),
+ ('âŋ°', 'âŋģ'),
+ ('〸', 'ã€ē'),
+ ('〞', '〞'),
+ ('ㆠ', 'ã†ˇ'),
+ ('㐀', 'äļĩ'),
+ ('ꀀ', 'ꒌ'),
+ ('꒐', '꒡'),
+ ('꒤', 'ę’ŗ'),
+ ('ę’ĩ', '꓀'),
+ ('꓂', '꓄'),
+ ('꓆', '꓆'),
+ ('īŦ', 'īŦ'),
+ ('\u{fff9}', '\u{fffb}'),
+];
+
+pub const V3_1: &'static [(char, char)] = &[
+ ('Ī´', 'Īĩ'),
+ ('\u{fdd0}', '\u{fdef}'),
+ ('𐌀', '𐌞'),
+ ('𐌠', 'đŒŖ'),
+ ('𐌰', '𐍊'),
+ ('𐐀', 'đĨ'),
+ ('𐐨', '𐑍'),
+ ('𝀀', 'đƒĩ'),
+ ('𝄀', 'đ„Ļ'),
+ ('đ„Ē', '𝇝'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓀'),
+ ('𝓂', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšŖ'),
+ ('𝚨', '𝟉'),
+ ('𝟎', 'đŸŋ'),
+ ('𠀀', 'đĒ›–'),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{e0001}', '\u{e0001}'),
+ ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const V3_2: &'static [(char, char)] = &[
+ ('Č ', 'Č '),
+ ('\u{34f}', '\u{34f}'),
+ ('\u{363}', '\u{36f}'),
+ ('Ī˜', 'Ī™'),
+ ('Īļ', 'Īļ'),
+ ('ŌŠ', 'Ō‹'),
+ ('Ķ…', 'Ķ†'),
+ ('Ķ‰', 'ĶŠ'),
+ ('Ķ', 'ĶŽ'),
+ ('Ԁ', 'ԏ'),
+ ('ŲŽ', 'Ų¯'),
+ ('Ūą', 'Ūą'),
+ ('ს', 'ჸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', '\u{1714}'),
+ ('ᜠ', 'áœļ'),
+ ('ᝀ', '\u{1753}'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('\u{1772}', '\u{1773}'),
+ ('⁇', '⁇'),
+ ('⁎', '⁒'),
+ ('⁗', '⁗'),
+ ('\u{205f}', '\u{2063}'),
+ ('⁹', '⁹'),
+ ('₰', '₱'),
+ ('\u{20e4}', '\u{20ea}'),
+ ('â„Ŋ', '⅋'),
+ ('⇴', 'â‡ŋ'),
+ ('⋲', 'â‹ŋ'),
+ ('âŧ', 'âŧ'),
+ ('⎛', '⏎'),
+ ('â“Ģ', '⓾'),
+ ('▖', '▟'),
+ ('◸', 'â—ŋ'),
+ ('☖', '☗'),
+ ('♲', 'â™Ŋ'),
+ ('⚀', '⚉'),
+ ('❨', 'âĩ'),
+ ('⟐', 'âŸĢ'),
+ ('⟰', 'âŸŋ'),
+ ('⤀', 'âĢŋ'),
+ ('ã€ģ', 'ã€Ŋ'),
+ ('ゕ', 'ゖ'),
+ ('ゟ', '゠'),
+ ('ãƒŋ', 'ãƒŋ'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㉑', '㉟'),
+ ('ãŠą', 'ãŠŋ'),
+ ('ę’ĸ', 'ę’Ŗ'),
+ ('꒴', '꒴'),
+ ('꓁', '꓁'),
+ ('꓅', '꓅'),
+ ('ī¨°', 'īŠĒ'),
+ ('īˇŧ', 'īˇŧ'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('īš…', 'īš†'),
+ ('īšŗ', 'īšŗ'),
+ ('īŊŸ', 'īŊ '),
+];
+
+pub const V4_0: &'static [(char, char)] = &[
+ ('ČĄ', 'ČĄ'),
+ ('Č´', 'Čļ'),
+ ('ĘŽ', 'Ę¯'),
+ ('˯', 'Ëŋ'),
+ ('\u{350}', '\u{357}'),
+ ('\u{35d}', '\u{35f}'),
+ ('Īˇ', 'Īģ'),
+ ('\u{600}', '\u{603}'),
+ ('؍', '\u{615}'),
+ ('\u{656}', '\u{658}'),
+ ('ÛŽ', 'Û¯'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܭ', 'ܯ'),
+ ('Ũ', 'Ũ'),
+ ('ā¤„', 'ā¤„'),
+ ('āĻŊ', 'āĻŊ'),
+ ('\u{a01}', '\u{a01}'),
+ ('ā¨ƒ', 'ā¨ƒ'),
+ ('āĒŒ', 'āĒŒ'),
+ ('āĢĄ', '\u{ae3}'),
+ ('āĢą', 'āĢą'),
+ ('āŦĩ', 'āŦĩ'),
+ ('ā­ą', 'ā­ą'),
+ ('ā¯ŗ', 'ā¯ē'),
+ ('\u{cbc}', 'ā˛Ŋ'),
+ ('\u{17dd}', '\u{17dd}'),
+ ('៰', '៹'),
+ ('ᤀ', 'ᤜ'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', '\u{193b}'),
+ ('áĨ€', 'áĨ€'),
+ ('áĨ„', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('᧠', 'á§ŋ'),
+ ('ᴀ', 'áĩĢ'),
+ ('⁓', '⁔'),
+ ('â„ģ', 'â„ģ'),
+ ('⏏', '⏐'),
+ ('â“ŋ', 'â“ŋ'),
+ ('☔', '☕'),
+ ('⚊', '⚑'),
+ ('⚠', '⚡'),
+ ('âŦ€', 'âŦ'),
+ ('㈝', '㈞'),
+ ('㉐', '㉐'),
+ ('ã‰ŧ', 'ã‰Ŋ'),
+ ('㋌', '㋏'),
+ ('ãˇ', 'ãē'),
+ ('㏞', '㏟'),
+ ('ãŋ', 'ãŋ'),
+ ('䷀', 'äˇŋ'),
+ ('īˇŊ', 'īˇŊ'),
+ ('īš‡', 'īšˆ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐄀', '𐄂'),
+ ('𐄇', 'đ„ŗ'),
+ ('𐄷', 'đ„ŋ'),
+ ('𐎀', '𐎝'),
+ ('𐎟', '𐎟'),
+ ('đĻ', '𐐧'),
+ ('𐑎', '𐒝'),
+ ('𐒠', '𐒩'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', 'đ ŋ'),
+ ('𝌀', '𝍖'),
+ ('𝓁', '𝓁'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const V4_1: &'static [(char, char)] = &[
+ ('ȡ', 'Ɂ'),
+ ('\u{358}', '\u{35c}'),
+ ('Īŧ', 'Īŋ'),
+ ('Ķļ', 'Ķˇ'),
+ ('\u{5a2}', '\u{5a2}'),
+ ('\u{5c5}', '\u{5c7}'),
+ ('؋', '؋'),
+ ('؞', '؞'),
+ ('\u{659}', '\u{65e}'),
+ ('Ũ', 'Ũ­'),
+ ('āĨŊ', 'āĨŊ'),
+ ('ā§Ž', 'ā§Ž'),
+ ('āŽļ', 'āŽļ'),
+ ('ā¯Ļ', 'ā¯Ļ'),
+ ('āŋ', 'āŋ‘'),
+ ('ლ', 'áƒē'),
+ ('áƒŧ', 'áƒŧ'),
+ ('ሇ', 'ሇ'),
+ ('ቇ', 'ቇ'),
+ ('ኇ', 'ኇ'),
+ ('ኯ', 'ኯ'),
+ ('ዏ', 'ዏ'),
+ ('ዯ', 'ዯ'),
+ ('ጏ', 'ጏ'),
+ ('ጟ', 'ጟ'),
+ ('ፇ', 'ፇ'),
+ ('\u{135f}', '፠'),
+ ('ᎀ', '᎙'),
+ ('áĻ€', 'áĻŠ'),
+ ('áĻ°', 'ᧉ'),
+ ('᧐', '᧙'),
+ ('᧞', '᧟'),
+ ('ᨀ', '\u{1a1b}'),
+ ('᨞', '᨟'),
+ ('áĩŦ', '\u{1dc3}'),
+ ('⁕', '⁖'),
+ ('⁘', '⁞'),
+ ('ₐ', 'ₔ'),
+ ('₲', 'â‚ĩ'),
+ ('\u{20eb}', '\u{20eb}'),
+ ('â„ŧ', 'â„ŧ'),
+ ('⅌', '⅌'),
+ ('⏑', '⏛'),
+ ('☘', '☘'),
+ ('♾', 'â™ŋ'),
+ ('⚒', '⚜'),
+ ('âšĸ', '⚱'),
+ ('⟀', '⟆'),
+ ('âŦŽ', 'âŦ“'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('Ⲁ', 'âŗĒ'),
+ ('âŗš', 'â´Ĩ'),
+ ('â´°', 'âĩĨ'),
+ ('âĩ¯', 'âĩ¯'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('⸀', '⸗'),
+ ('⸜', '⸝'),
+ ('㇀', '㇏'),
+ ('㉞', '㉞'),
+ ('éžĻ', 'éžģ'),
+ ('꜀', '꜖'),
+ ('ꠀ', 'ę Ģ'),
+ ('īŠ°', 'īĢ™'),
+ ('ī¸', 'ī¸™'),
+ ('𐅀', '𐆊'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏕'),
+ ('𐨀', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ŗ'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '𐩇'),
+ ('𐊐', '𐊘'),
+ ('𝈀', '𝉅'),
+ ('𝚤', 'đšĨ'),
+];
+
+pub const V5_0: &'static [(char, char)] = &[
+ ('ɂ', 'ɏ'),
+ ('Íģ', 'ÍŊ'),
+ ('Ķ', 'Ķ'),
+ ('Ķē', 'Ķŋ'),
+ ('Ԑ', 'ԓ'),
+ ('\u{5ba}', '\u{5ba}'),
+ ('߀', 'ßē'),
+ ('āĨģ', 'āĨŧ'),
+ ('āĨž', 'āĨŋ'),
+ ('\u{ce2}', '\u{ce3}'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{1b00}', 'ᭋ'),
+ ('᭐', 'á­ŧ'),
+ ('\u{1dc4}', '\u{1dca}'),
+ ('\u{1dfe}', '\u{1dff}'),
+ ('\u{20ec}', '\u{20ef}'),
+ ('⅍', 'ⅎ'),
+ ('ↄ', 'ↄ'),
+ ('⏜', '⏧'),
+ ('⚲', '⚲'),
+ ('⟇', '⟊'),
+ ('âŦ”', 'âŦš'),
+ ('âŦ ', 'âŦŖ'),
+ ('âą ', 'âąŦ'),
+ ('⹴', '⹡'),
+ ('ꜗ', 'ꜚ'),
+ ('꜠', '꜡'),
+ ('ꡀ', '꡷'),
+ ('𐤀', '𐤙'),
+ ('𐤟', '𐤟'),
+ ('𒀀', '𒍮'),
+ ('𒐀', 'đ’‘ĸ'),
+ ('𒑰', 'đ’‘ŗ'),
+ ('𝍠', '𝍱'),
+ ('𝟊', '𝟋'),
+];
+
+pub const V5_1: &'static [(char, char)] = &[
+ ('Í°', 'Íŗ'),
+ ('Íļ', '͡'),
+ ('Ī', 'Ī'),
+ ('\u{487}', '\u{487}'),
+ ('Ԕ', 'ÔŖ'),
+ ('؆', '؊'),
+ ('\u{616}', '\u{61a}'),
+ ('Øģ', 'Øŋ'),
+ ('ŨŽ', 'Ũŋ'),
+ ('āĨą', 'āĨ˛'),
+ ('\u{a51}', '\u{a51}'),
+ ('\u{a75}', '\u{a75}'),
+ ('\u{b44}', '\u{b44}'),
+ ('\u{b62}', '\u{b63}'),
+ ('ā¯', 'ā¯'),
+ ('ā°Ŋ', 'ā°Ŋ'),
+ ('āą˜', 'āą™'),
+ ('\u{c62}', '\u{c63}'),
+ ('āą¸', 'āąŋ'),
+ ('ā´Ŋ', 'ā´Ŋ'),
+ ('\u{d44}', '\u{d44}'),
+ ('\u{d62}', '\u{d63}'),
+ ('āĩ°', 'āĩĩ'),
+ ('āĩš', 'āĩŋ'),
+ ('āŊĢ', 'āŊŦ'),
+ ('āŋŽ', 'āŋŽ'),
+ ('āŋ’', 'āŋ”'),
+ ('á€ĸ', 'á€ĸ'),
+ ('ဨ', 'ဨ'),
+ ('á€Ģ', 'á€Ģ'),
+ ('\u{1033}', '\u{1035}'),
+ ('\u{103a}', 'á€ŋ'),
+ ('ၚ', '႙'),
+ ('႞', '႟'),
+ ('áĸĒ', 'áĸĒ'),
+ ('\u{1b80}', 'áŽĒ'),
+ ('ᎎ', '᎚'),
+ ('ᰀ', '\u{1c37}'),
+ ('á°ģ', '᱉'),
+ ('ᱍ', 'áąŋ'),
+ ('\u{1dcb}', '\u{1de6}'),
+ ('áēœ', 'áēŸ'),
+ ('áģē', 'áģŋ'),
+ ('\u{2064}', '\u{2064}'),
+ ('\u{20f0}', '\u{20f0}'),
+ ('⅏', '⅏'),
+ ('ↅ', 'ↈ'),
+ ('⚝', '⚝'),
+ ('âšŗ', 'âšŧ'),
+ ('⛀', '⛃'),
+ ('⟌', '⟌'),
+ ('âŸŦ', '⟯'),
+ ('âŦ›', 'âŦŸ'),
+ ('âŦ¤', '⭌'),
+ ('⭐', '⭔'),
+ ('âą­', 'âą¯'),
+ ('âąą', 'âąŗ'),
+ ('⹸', 'âąŊ'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('⸘', '⸛'),
+ ('⸞', '⸰'),
+ ('ㄭ', 'ㄭ'),
+ ('㇐', 'ã‡Ŗ'),
+ ('éžŧ', 'éŋƒ'),
+ ('ꔀ', 'ę˜Ģ'),
+ ('Ꙁ', 'ꙟ'),
+ ('ę™ĸ', 'ę™ŗ'),
+ ('\u{a67c}', 'ꚗ'),
+ ('ꜛ', 'ꜟ'),
+ ('ęœĸ', 'ꞌ'),
+ ('ęŸģ', 'ęŸŋ'),
+ ('ęĸ€', '\u{a8c4}'),
+ ('ęŖŽ', 'ęŖ™'),
+ ('꤀', 'ęĨ“'),
+ ('ęĨŸ', 'ęĨŸ'),
+ ('ꨀ', '\u{aa36}'),
+ ('ꩀ', 'ꩍ'),
+ ('꩐', '꩙'),
+ ('꩜', '꩟'),
+ ('\u{fe24}', '\u{fe26}'),
+ ('𐆐', '𐆛'),
+ ('𐇐', '\u{101fd}'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('𐤠', '𐤚'),
+ ('đ¤ŋ', 'đ¤ŋ'),
+ ('𝄩', '𝄩'),
+ ('🀀', 'đŸ€Ģ'),
+ ('🀰', '🂓'),
+];
+
+pub const V5_2: &'static [(char, char)] = &[
+ ('Ô¤', 'ÔĨ'),
+ ('ā €', '\u{82d}'),
+ ('ā °', 'ā ž'),
+ ('\u{900}', '\u{900}'),
+ ('āĨŽ', 'āĨŽ'),
+ ('\u{955}', '\u{955}'),
+ ('āĨš', 'āĨē'),
+ ('ā§ģ', 'ā§ģ'),
+ ('āŋ•', 'āŋ˜'),
+ ('ႚ', '\u{109d}'),
+ ('ᅚ', 'ᅞ'),
+ ('á†Ŗ', 'ᆧ'),
+ ('á‡ē', 'á‡ŋ'),
+ ('᐀', '᐀'),
+ ('ᙷ', 'á™ŋ'),
+ ('áĸ°', 'áŖĩ'),
+ ('áĻĒ', 'áĻĢ'),
+ ('᧚', '᧚'),
+ ('ᨠ', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a7c}'),
+ ('\u{1a7f}', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('áĒ ', 'áĒ­'),
+ ('\u{1cd0}', 'áŗ˛'),
+ ('\u{1dfd}', '\u{1dfd}'),
+ ('â‚ļ', '₸'),
+ ('⅐', '⅒'),
+ ('↉', '↉'),
+ ('⏨', '⏨'),
+ ('⚞', '⚟'),
+ ('âšŊ', 'âšŋ'),
+ ('⛄', '⛍'),
+ ('⛏', '⛡'),
+ ('â›Ŗ', 'â›Ŗ'),
+ ('⛨', 'â›ŋ'),
+ ('❗', '❗'),
+ ('⭕', '⭙'),
+ ('âą°', 'âą°'),
+ ('âąž', 'âąŋ'),
+ ('âŗĢ', '\u{2cf1}'),
+ ('⸹', '⸹'),
+ ('㉄', '㉏'),
+ ('éŋ„', 'éŋ‹'),
+ ('ꓐ', 'ę“ŋ'),
+ ('ꚠ', '꛷'),
+ ('ę °', 'ę š'),
+ ('\u{a8e0}', 'ęŖģ'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('\u{a980}', '꧍'),
+ ('ꧏ', '꧙'),
+ ('꧞', '꧟'),
+ ('ꊠ', 'ęŠģ'),
+ ('ęĒ€', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢŸ'),
+ ('ę¯€', '\u{abed}'),
+ ('ę¯°', 'ę¯š'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('īŠĢ', 'īŠ­'),
+ ('𐡀', '𐡕'),
+ ('𐡗', '𐡟'),
+ ('𐤚', '𐤛'),
+ ('𐊠', 'đŠŋ'),
+ ('đŦ€', 'đŦĩ'),
+ ('đŦš', '𐭕'),
+ ('𐭘', '𐭲'),
+ ('𐭸', 'đ­ŋ'),
+ ('𐰀', '𐱈'),
+ ('𐚠', '𐚞'),
+ ('\u{11080}', '𑃁'),
+ ('𓀀', '𓐮'),
+ ('🄀', '🄊'),
+ ('🄐', '🄮'),
+ ('🄱', '🄱'),
+ ('đŸ„Ŋ', 'đŸ„Ŋ'),
+ ('đŸ„ŋ', 'đŸ„ŋ'),
+ ('🅂', '🅂'),
+ ('🅆', '🅆'),
+ ('🅊', '🅎'),
+ ('🅗', '🅗'),
+ ('🅟', '🅟'),
+ ('🅹', '🅹'),
+ ('đŸ…ģ', 'đŸ…ŧ'),
+ ('đŸ…ŋ', 'đŸ…ŋ'),
+ ('🆊', '🆍'),
+ ('🆐', '🆐'),
+ ('🈀', '🈀'),
+ ('🈐', '🈱'),
+ ('🉀', '🉈'),
+ ('đĒœ€', 'đĢœ´'),
+];
+
+pub const V6_0: &'static [(char, char)] = &[
+ ('ÔĻ', 'Ô§'),
+ ('Ø ', 'Ø '),
+ ('\u{65f}', '\u{65f}'),
+ ('āĄ€', '\u{85b}'),
+ ('āĄž', 'āĄž'),
+ ('\u{93a}', 'ā¤ģ'),
+ ('āĨ', 'āĨ'),
+ ('\u{956}', '\u{957}'),
+ ('āĨŗ', 'āĨˇ'),
+ ('ā­˛', 'ā­ˇ'),
+ ('ā´Š', 'ā´Š'),
+ ('ā´ē', 'ā´ē'),
+ ('āĩŽ', 'āĩŽ'),
+ ('āžŒ', '\u{f8f}'),
+ ('āŋ™', 'āŋš'),
+ ('\u{135d}', '\u{135e}'),
+ ('ᯀ', 'á¯ŗ'),
+ ('á¯ŧ', 'á¯ŋ'),
+ ('\u{1dfc}', '\u{1dfc}'),
+ ('ₕ', 'ₜ'),
+ ('₹', '₹'),
+ ('⏊', 'âŗ'),
+ ('⛎', '⛎'),
+ ('â›ĸ', 'â›ĸ'),
+ ('⛤', '⛧'),
+ ('✅', '✅'),
+ ('✊', '✋'),
+ ('✨', '✨'),
+ ('❌', '❌'),
+ ('❎', '❎'),
+ ('❓', '❕'),
+ ('❟', '❠'),
+ ('➕', '➗'),
+ ('➰', '➰'),
+ ('âžŋ', 'âžŋ'),
+ ('⟎', '⟏'),
+ ('âĩ°', 'âĩ°'),
+ ('\u{2d7f}', '\u{2d7f}'),
+ ('ㆸ', 'ã†ē'),
+ ('Ꙡ', 'ꙡ'),
+ ('Ɥ', 'ꞎ'),
+ ('Ꞑ', 'ꞑ'),
+ ('Ꞡ', 'ꞩ'),
+ ('ęŸē', 'ęŸē'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('īŽ˛', 'ī¯'),
+ ('𑀀', '𑁍'),
+ ('𑁒', 'đ‘¯'),
+ ('𖠀', '𖨸'),
+ ('𛀀', '𛀁'),
+ ('🂠', '🂮'),
+ ('🂱', '🂾'),
+ ('🃁', '🃏'),
+ ('🃑', '🃟'),
+ ('🄰', '🄰'),
+ ('🄲', 'đŸ„ŧ'),
+ ('🄾', '🄾'),
+ ('🅀', '🅁'),
+ ('🅃', '🅅'),
+ ('🅇', '🅉'),
+ ('🅏', '🅖'),
+ ('🅘', '🅞'),
+ ('🅠', '🅩'),
+ ('🅰', '🅸'),
+ ('đŸ…ē', 'đŸ…ē'),
+ ('đŸ…Ŋ', '🅾'),
+ ('🆀', '🆉'),
+ ('🆎', '🆏'),
+ ('🆑', '🆚'),
+ ('đŸ‡Ļ', 'đŸ‡ŋ'),
+ ('🈁', '🈂'),
+ ('🈲', 'đŸˆē'),
+ ('🉐', '🉑'),
+ ('🌀', '🌠'),
+ ('🌰', 'đŸŒĩ'),
+ ('🌷', 'đŸŧ'),
+ ('🎀', '🎓'),
+ ('🎠', '🏄'),
+ ('🏆', '🏊'),
+ ('🏠', '🏰'),
+ ('🐀', '🐾'),
+ ('👀', '👀'),
+ ('👂', '📷'),
+ ('📹', 'đŸ“ŧ'),
+ ('🔀', 'đŸ”Ŋ'),
+ ('🕐', '🕧'),
+ ('đŸ—ģ', 'đŸ—ŋ'),
+ ('😁', '😐'),
+ ('😒', '😔'),
+ ('😖', '😖'),
+ ('😘', '😘'),
+ ('😚', '😚'),
+ ('😜', '😞'),
+ ('😠', 'đŸ˜Ĩ'),
+ ('😨', 'đŸ˜Ģ'),
+ ('😭', '😭'),
+ ('😰', 'đŸ˜ŗ'),
+ ('đŸ˜ĩ', '🙀'),
+ ('🙅', '🙏'),
+ ('🚀', '🛅'),
+ ('🜀', 'đŸŗ'),
+ ('đĢ€', 'đĢ '),
+];
+
+pub const V6_1: &'static [(char, char)] = &[
+ ('֏', '֏'),
+ ('\u{604}', '\u{604}'),
+ ('āĸ ', 'āĸ '),
+ ('āĸĸ', 'āĸŦ'),
+ ('\u{8e4}', '\u{8fe}'),
+ ('āĢ°', 'āĢ°'),
+ ('āģž', 'āģŸ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('áƒŊ', 'áƒŋ'),
+ ('\u{1bab}', '\u{1bad}'),
+ ('áŽē', 'áŽŋ'),
+ ('áŗ€', 'áŗ‡'),
+ ('áŗŗ', 'áŗļ'),
+ ('⟋', '⟋'),
+ ('⟍', '⟍'),
+ ('âŗ˛', 'âŗŗ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('âĩĻ', 'âĩ§'),
+ ('⸲', 'â¸ģ'),
+ ('éŋŒ', 'éŋŒ'),
+ ('\u{a674}', '\u{a67b}'),
+ ('\u{a69f}', '\u{a69f}'),
+ ('Ꞓ', 'ꞓ'),
+ ('ęžĒ', 'ęžĒ'),
+ ('ꟸ', 'ꟹ'),
+ ('ęĢ ', '\u{aaf6}'),
+ ('ī¨Ž', 'ī¨¯'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𑃐', '𑃨'),
+ ('𑃰', '𑃹'),
+ ('\u{11100}', '\u{11134}'),
+ ('đ‘„ļ', '𑅃'),
+ ('\u{11180}', '𑇈'),
+ ('𑇐', '𑇙'),
+ ('𑚀', '\u{116b7}'),
+ ('𑛀', '𑛉'),
+ ('đ–ŧ€', 'đ–Ŋ„'),
+ ('đ–Ŋ', 'đ–Ŋž'),
+ ('\u{16f8f}', '𖾟'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('đžģ°', 'đžģą'),
+ ('đŸ…Ē', 'đŸ…Ģ'),
+ ('🕀', '🕃'),
+ ('😀', '😀'),
+ ('😑', '😑'),
+ ('😕', '😕'),
+ ('😗', '😗'),
+ ('😙', '😙'),
+ ('😛', '😛'),
+ ('😟', '😟'),
+ ('đŸ˜Ļ', '😧'),
+ ('đŸ˜Ŧ', 'đŸ˜Ŧ'),
+ ('😮', 'đŸ˜¯'),
+ ('😴', '😴'),
+];
+
+pub const V6_2: &'static [(char, char)] = &[('â‚ē', 'â‚ē')];
+
+pub const V6_3: &'static [(char, char)] =
+ &[('\u{61c}', '\u{61c}'), ('\u{2066}', '\u{2069}')];
+
+pub const V7_0: &'static [(char, char)] = &[
+ ('Íŋ', 'Íŋ'),
+ ('Ô¨', 'Ô¯'),
+ ('֍', '֎'),
+ ('\u{605}', '\u{605}'),
+ ('āĸĄ', 'āĸĄ'),
+ ('āĸ­', 'āĸ˛'),
+ ('\u{8ff}', '\u{8ff}'),
+ ('āĨ¸', 'āĨ¸'),
+ ('āĻ€', 'āĻ€'),
+ ('\u{c00}', '\u{c00}'),
+ ('ā°´', 'ā°´'),
+ ('\u{c81}', '\u{c81}'),
+ ('\u{d01}', '\u{d01}'),
+ ('āˇĻ', 'āˇ¯'),
+ ('ᛱ', 'ᛸ'),
+ ('ᤝ', 'ᤞ'),
+ ('\u{1ab0}', '\u{1abe}'),
+ ('\u{1cf8}', '\u{1cf9}'),
+ ('\u{1de7}', '\u{1df5}'),
+ ('â‚ģ', 'â‚Ŋ'),
+ ('⏴', 'âē'),
+ ('✀', '✀'),
+ ('⭍', '⭏'),
+ ('⭚', 'â­ŗ'),
+ ('â­ļ', '⮕'),
+ ('⎘', '⎚'),
+ ('âŽŊ', '⯈'),
+ ('⯊', '⯑'),
+ ('â¸ŧ', '⹂'),
+ ('Ꚙ', 'ꚝ'),
+ ('ꞔ', 'ꞟ'),
+ ('ęžĢ', 'Ɬ'),
+ ('Ʞ', 'Ʇ'),
+ ('ꟷ', 'ꟷ'),
+ ('ꧠ', '꧞'),
+ ('\u{aa7c}', 'ęŠŋ'),
+ ('ęŦ°', 'ꭟ'),
+ ('ę­¤', 'ę­Ĩ'),
+ ('\u{fe27}', '\u{fe2d}'),
+ ('𐆋', '𐆌'),
+ ('𐆠', '𐆠'),
+ ('\u{102e0}', 'đ‹ģ'),
+ ('𐌟', '𐌟'),
+ ('𐍐', '\u{1037a}'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('đ•¯', 'đ•¯'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐥠', 'đĸž'),
+ ('đĸ§', 'đĸ¯'),
+ ('đĒ€', 'đĒŸ'),
+ ('đĢ€', '\u{10ae6}'),
+ ('đĢĢ', 'đĢļ'),
+ ('𐮀', '𐮑'),
+ ('𐮙', '𐮜'),
+ ('𐎊', 'đŽ¯'),
+ ('\u{1107f}', '\u{1107f}'),
+ ('𑅐', 'đ‘…ļ'),
+ ('𑇍', '𑇍'),
+ ('𑇚', '𑇚'),
+ ('𑇡', '𑇴'),
+ ('𑈀', '𑈑'),
+ ('𑈓', 'đ‘ˆŊ'),
+ ('𑊰', '\u{112ea}'),
+ ('𑋰', '𑋹'),
+ ('\u{11301}', '𑌃'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('\u{1133c}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('\u{11357}', '\u{11357}'),
+ ('𑍝', 'đ‘Ŗ'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('𑒀', '𑓇'),
+ ('𑓐', '𑓙'),
+ ('𑖀', '\u{115b5}'),
+ ('𑖸', '𑗉'),
+ ('𑘀', '𑙄'),
+ ('𑙐', '𑙙'),
+ ('đ‘ĸ ', 'đ‘Ŗ˛'),
+ ('đ‘Ŗŋ', 'đ‘Ŗŋ'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('đ’¯', '𒎘'),
+ ('đ’‘Ŗ', '𒑮'),
+ ('𒑴', '𒑴'),
+ ('𖩀', '𖩞'),
+ ('𖩠', '𖩩'),
+ ('𖩮', 'đ–Š¯'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('\u{16af0}', 'đ–Ģĩ'),
+ ('đ–Ŧ€', '𖭅'),
+ ('𖭐', '𖭙'),
+ ('𖭛', '𖭡'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('𛲜', '\u{1bca3}'),
+ ('𞠀', 'đžŖ„'),
+ ('đžŖ‡', '\u{1e8d6}'),
+ ('đŸ‚ŋ', 'đŸ‚ŋ'),
+ ('🃠', 'đŸƒĩ'),
+ ('🄋', '🄌'),
+ ('🌡', 'đŸŒŦ'),
+ ('đŸŒļ', 'đŸŒļ'),
+ ('đŸŊ', 'đŸŊ'),
+ ('🎔', '🎟'),
+ ('🏅', '🏅'),
+ ('🏋', '🏎'),
+ ('🏔', '🏟'),
+ ('🏱', '🏷'),
+ ('đŸŋ', 'đŸŋ'),
+ ('👁', '👁'),
+ ('📸', '📸'),
+ ('đŸ“Ŋ', '📾'),
+ ('🔾', 'đŸ”ŋ'),
+ ('🕄', '🕊'),
+ ('🕨', '🕹'),
+ ('đŸ•ģ', 'đŸ–Ŗ'),
+ ('đŸ–Ĩ', 'đŸ—ē'),
+ ('🙁', '🙂'),
+ ('🙐', 'đŸ™ŋ'),
+ ('🛆', '🛏'),
+ ('🛠', 'đŸ›Ŧ'),
+ ('🛰', 'đŸ›ŗ'),
+ ('🞀', '🟔'),
+ ('🠀', '🠋'),
+ ('🠐', '🡇'),
+ ('🡐', '🡙'),
+ ('🡠', 'đŸĸ‡'),
+ ('đŸĸ', 'đŸĸ­'),
+];
+
+pub const V8_0: &'static [(char, char)] = &[
+ ('āĸŗ', 'āĸ´'),
+ ('\u{8e3}', '\u{8e3}'),
+ ('āĢš', 'āĢš'),
+ ('āąš', 'āąš'),
+ ('āĩŸ', 'āĩŸ'),
+ ('áĩ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('₾', '₾'),
+ ('↊', '↋'),
+ ('â¯Ŧ', '⯯'),
+ ('éŋ', 'éŋ•'),
+ ('\u{a69e}', '\u{a69e}'),
+ ('ꞏ', 'ꞏ'),
+ ('Ʝ', 'ꞷ'),
+ ('ęŖŧ', 'ęŖŊ'),
+ ('ę­ ', 'ę­Ŗ'),
+ ('ę­°', 'ęŽŋ'),
+ ('\u{fe2e}', '\u{fe2f}'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('đŖģ', 'đŖŋ'),
+ ('đĻŧ', 'đĻŊ'),
+ ('𐧀', '𐧏'),
+ ('𐧒', 'đ§ŋ'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('đŗē', 'đŗŋ'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('𑇛', '𑇟'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊩'),
+ ('\u{11300}', '\u{11300}'),
+ ('𑍐', '𑍐'),
+ ('𑗊', '\u{115dd}'),
+ ('𑜀', '𑜙'),
+ ('\u{1171d}', '\u{1172b}'),
+ ('𑜰', 'đ‘œŋ'),
+ ('𒎙', '𒎙'),
+ ('𒒀', '𒕃'),
+ ('𔐀', '𔙆'),
+ ('𝇞', '𝇨'),
+ ('𝠀', 'đĒ‹'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('🌭', 'đŸŒ¯'),
+ ('🍾', 'đŸŋ'),
+ ('🏏', '🏓'),
+ ('🏸', 'đŸŋ'),
+ ('đŸ“ŋ', 'đŸ“ŋ'),
+ ('🕋', '🕏'),
+ ('🙃', '🙄'),
+ ('🛐', '🛐'),
+ ('🤐', '🤘'),
+ ('đŸĻ€', 'đŸĻ„'),
+ ('🧀', '🧀'),
+ ('đĢ  ', 'đŦēĄ'),
+];
+
+pub const V9_0: &'static [(char, char)] = &[
+ ('āĸļ', 'āĸŊ'),
+ ('\u{8d4}', '\u{8e2}'),
+ ('ā˛€', 'ā˛€'),
+ ('āĩ', 'āĩ'),
+ ('āĩ”', 'āĩ–'),
+ ('āĩ˜', 'āĩž'),
+ ('āĩļ', 'āĩ¸'),
+ ('ᲀ', 'ᲈ'),
+ ('\u{1dfb}', '\u{1dfb}'),
+ ('âģ', '⏞'),
+ ('⹃', '⹄'),
+ ('Ɪ', 'Ɪ'),
+ ('\u{a8c5}', '\u{a8c5}'),
+ ('𐆍', '𐆎'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('𑐀', '𑑙'),
+ ('𑑛', '𑑛'),
+ ('𑑝', '𑑝'),
+ ('𑙠', 'đ‘™Ŧ'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '\u{11c36}'),
+ ('\u{11c38}', '𑱅'),
+ ('𑱐', 'đ‘ąŦ'),
+ ('𑱰', '𑲏'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('𑲩', '\u{11cb6}'),
+ ('đ–ŋ ', 'đ–ŋ '),
+ ('𗀀', 'đ˜ŸŦ'),
+ ('𘠀', 'đ˜Ģ˛'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('𞤀', '\u{1e94a}'),
+ ('đžĨ', 'đžĨ™'),
+ ('đžĨž', 'đžĨŸ'),
+ ('🆛', 'đŸ†Ŧ'),
+ ('đŸˆģ', 'đŸˆģ'),
+ ('đŸ•ē', 'đŸ•ē'),
+ ('🖤', '🖤'),
+ ('🛑', '🛒'),
+ ('🛴', 'đŸ›ļ'),
+ ('🤙', '🤞'),
+ ('🤠', '🤧'),
+ ('🤰', '🤰'),
+ ('đŸ¤ŗ', '🤾'),
+ ('đŸĨ€', 'đŸĨ‹'),
+ ('đŸĨ', 'đŸĨž'),
+ ('đŸĻ…', 'đŸĻ‘'),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs b/vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs
new file mode 100644
index 000000000..cfb83f363
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs
@@ -0,0 +1,2808 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate case-folding-simple ucd-13.0.0 --chars --all-pairs
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
+ ('A', &['a']),
+ ('B', &['b']),
+ ('C', &['c']),
+ ('D', &['d']),
+ ('E', &['e']),
+ ('F', &['f']),
+ ('G', &['g']),
+ ('H', &['h']),
+ ('I', &['i']),
+ ('J', &['j']),
+ ('K', &['k', 'â„Ē']),
+ ('L', &['l']),
+ ('M', &['m']),
+ ('N', &['n']),
+ ('O', &['o']),
+ ('P', &['p']),
+ ('Q', &['q']),
+ ('R', &['r']),
+ ('S', &['s', 'Åŋ']),
+ ('T', &['t']),
+ ('U', &['u']),
+ ('V', &['v']),
+ ('W', &['w']),
+ ('X', &['x']),
+ ('Y', &['y']),
+ ('Z', &['z']),
+ ('a', &['A']),
+ ('b', &['B']),
+ ('c', &['C']),
+ ('d', &['D']),
+ ('e', &['E']),
+ ('f', &['F']),
+ ('g', &['G']),
+ ('h', &['H']),
+ ('i', &['I']),
+ ('j', &['J']),
+ ('k', &['K', 'â„Ē']),
+ ('l', &['L']),
+ ('m', &['M']),
+ ('n', &['N']),
+ ('o', &['O']),
+ ('p', &['P']),
+ ('q', &['Q']),
+ ('r', &['R']),
+ ('s', &['S', 'Åŋ']),
+ ('t', &['T']),
+ ('u', &['U']),
+ ('v', &['V']),
+ ('w', &['W']),
+ ('x', &['X']),
+ ('y', &['Y']),
+ ('z', &['Z']),
+ ('Âĩ', &['Μ', 'Îŧ']),
+ ('À', &['à']),
+ ('Á', &['ÃĄ']),
+ ('Â', &['Ãĸ']),
+ ('Ã', &['ÃŖ']),
+ ('Ä', &['ä']),
+ ('Å', &['ÃĨ', 'â„Ģ']),
+ ('Æ', &['ÃĻ']),
+ ('Ç', &['ç']),
+ ('È', &['è']),
+ ('É', &['Ê']),
+ ('Ê', &['ÃĒ']),
+ ('Ë', &['ÃĢ']),
+ ('Ì', &['ÃŦ']),
+ ('Í', &['í']),
+ ('Î', &['ÃŽ']),
+ ('Ï', &['ï']),
+ ('Ð', &['ð']),
+ ('Ñ', &['Ãą']),
+ ('Ò', &['Ã˛']),
+ ('Ó', &['Ãŗ']),
+ ('Ô', &['ô']),
+ ('Õ', &['Ãĩ']),
+ ('Ö', &['Ãļ']),
+ ('Ø', &['ø']),
+ ('Ù', &['Ú']),
+ ('Ú', &['Ãē']),
+ ('Û', &['Ãģ']),
+ ('Ü', &['Ãŧ']),
+ ('Ý', &['ÃŊ']),
+ ('Þ', &['Þ']),
+ ('ß', &['áēž']),
+ ('à', &['À']),
+ ('ÃĄ', &['Á']),
+ ('Ãĸ', &['Â']),
+ ('ÃŖ', &['Ã']),
+ ('ä', &['Ä']),
+ ('ÃĨ', &['Å', 'â„Ģ']),
+ ('ÃĻ', &['Æ']),
+ ('ç', &['Ç']),
+ ('è', &['È']),
+ ('Ê', &['É']),
+ ('ÃĒ', &['Ê']),
+ ('ÃĢ', &['Ë']),
+ ('ÃŦ', &['Ì']),
+ ('í', &['Í']),
+ ('ÃŽ', &['Î']),
+ ('ï', &['Ï']),
+ ('ð', &['Ð']),
+ ('Ãą', &['Ñ']),
+ ('Ã˛', &['Ò']),
+ ('Ãŗ', &['Ó']),
+ ('ô', &['Ô']),
+ ('Ãĩ', &['Õ']),
+ ('Ãļ', &['Ö']),
+ ('ø', &['Ø']),
+ ('Ú', &['Ù']),
+ ('Ãē', &['Ú']),
+ ('Ãģ', &['Û']),
+ ('Ãŧ', &['Ü']),
+ ('ÃŊ', &['Ý']),
+ ('Þ', &['Þ']),
+ ('Ãŋ', &['Ÿ']),
+ ('Ā', &['ā']),
+ ('ā', &['Ā']),
+ ('Ă', &['ă']),
+ ('ă', &['Ă']),
+ ('Ą', &['ą']),
+ ('ą', &['Ą']),
+ ('Ć', &['ć']),
+ ('ć', &['Ć']),
+ ('Ĉ', &['ĉ']),
+ ('ĉ', &['Ĉ']),
+ ('Ċ', &['ċ']),
+ ('ċ', &['Ċ']),
+ ('Č', &['č']),
+ ('č', &['Č']),
+ ('Ď', &['ď']),
+ ('ď', &['Ď']),
+ ('Đ', &['đ']),
+ ('đ', &['Đ']),
+ ('Ē', &['ē']),
+ ('ē', &['Ē']),
+ ('Ĕ', &['ĕ']),
+ ('ĕ', &['Ĕ']),
+ ('Ė', &['ė']),
+ ('ė', &['Ė']),
+ ('Ę', &['ę']),
+ ('ę', &['Ę']),
+ ('Ě', &['ě']),
+ ('ě', &['Ě']),
+ ('Ĝ', &['ĝ']),
+ ('ĝ', &['Ĝ']),
+ ('Ğ', &['ğ']),
+ ('ğ', &['Ğ']),
+ ('Ä ', &['ÄĄ']),
+ ('ÄĄ', &['Ä ']),
+ ('Äĸ', &['ÄŖ']),
+ ('ÄŖ', &['Äĸ']),
+ ('Ĥ', &['ÄĨ']),
+ ('ÄĨ', &['Ĥ']),
+ ('ÄĻ', &['ħ']),
+ ('ħ', &['ÄĻ']),
+ ('Ĩ', &['Ċ']),
+ ('Ċ', &['Ĩ']),
+ ('ÄĒ', &['ÄĢ']),
+ ('ÄĢ', &['ÄĒ']),
+ ('ÄŦ', &['Ä­']),
+ ('Ä­', &['ÄŦ']),
+ ('Ď', &['į']),
+ ('į', &['Ď']),
+ ('IJ', &['Äŗ']),
+ ('Äŗ', &['IJ']),
+ ('Ä´', &['Äĩ']),
+ ('Äĩ', &['Ä´']),
+ ('Äļ', &['ġ']),
+ ('ġ', &['Äļ']),
+ ('Äš', &['Äē']),
+ ('Äē', &['Äš']),
+ ('Äģ', &['Äŧ']),
+ ('Äŧ', &['Äģ']),
+ ('ÄŊ', &['Äž']),
+ ('Äž', &['ÄŊ']),
+ ('Äŋ', &['ŀ']),
+ ('ŀ', &['Äŋ']),
+ ('Ł', &['ł']),
+ ('ł', &['Ł']),
+ ('Ń', &['ń']),
+ ('ń', &['Ń']),
+ ('Ņ', &['ņ']),
+ ('ņ', &['Ņ']),
+ ('Ň', &['ň']),
+ ('ň', &['Ň']),
+ ('Ŋ', &['ŋ']),
+ ('ŋ', &['Ŋ']),
+ ('Ō', &['ō']),
+ ('ō', &['Ō']),
+ ('Ŏ', &['ŏ']),
+ ('ŏ', &['Ŏ']),
+ ('Ő', &['ő']),
+ ('ő', &['Ő']),
+ ('Œ', &['œ']),
+ ('œ', &['Œ']),
+ ('Ŕ', &['ŕ']),
+ ('ŕ', &['Ŕ']),
+ ('Ŗ', &['ŗ']),
+ ('ŗ', &['Ŗ']),
+ ('Ř', &['ř']),
+ ('ř', &['Ř']),
+ ('Ś', &['ś']),
+ ('ś', &['Ś']),
+ ('Ŝ', &['ŝ']),
+ ('ŝ', &['Ŝ']),
+ ('Ş', &['ş']),
+ ('ş', &['Ş']),
+ ('Å ', &['ÅĄ']),
+ ('ÅĄ', &['Å ']),
+ ('Åĸ', &['ÅŖ']),
+ ('ÅŖ', &['Åĸ']),
+ ('Ť', &['ÅĨ']),
+ ('ÅĨ', &['Ť']),
+ ('ÅĻ', &['ŧ']),
+ ('ŧ', &['ÅĻ']),
+ ('Ũ', &['Ŋ']),
+ ('Ŋ', &['Ũ']),
+ ('ÅĒ', &['ÅĢ']),
+ ('ÅĢ', &['ÅĒ']),
+ ('ÅŦ', &['Å­']),
+ ('Å­', &['ÅŦ']),
+ ('Ŏ', &['ů']),
+ ('ů', &['Ŏ']),
+ ('Å°', &['Åą']),
+ ('Åą', &['Å°']),
+ ('Å˛', &['Åŗ']),
+ ('Åŗ', &['Å˛']),
+ ('Å´', &['Åĩ']),
+ ('Åĩ', &['Å´']),
+ ('Åļ', &['Åˇ']),
+ ('Åˇ', &['Åļ']),
+ ('Ÿ', &['Ãŋ']),
+ ('Åš', &['Åē']),
+ ('Åē', &['Åš']),
+ ('Åģ', &['Åŧ']),
+ ('Åŧ', &['Åģ']),
+ ('ÅŊ', &['Åž']),
+ ('Åž', &['ÅŊ']),
+ ('Åŋ', &['S', 's']),
+ ('ƀ', &['Ƀ']),
+ ('Ɓ', &['ɓ']),
+ ('Ƃ', &['ƃ']),
+ ('ƃ', &['Ƃ']),
+ ('Ƅ', &['ƅ']),
+ ('ƅ', &['Ƅ']),
+ ('Ɔ', &['ɔ']),
+ ('Ƈ', &['ƈ']),
+ ('ƈ', &['Ƈ']),
+ ('Ɖ', &['ɖ']),
+ ('Ɗ', &['ɗ']),
+ ('Ƌ', &['ƌ']),
+ ('ƌ', &['Ƌ']),
+ ('Ǝ', &['Į']),
+ ('Ə', &['ə']),
+ ('Ɛ', &['ɛ']),
+ ('Ƒ', &['ƒ']),
+ ('ƒ', &['Ƒ']),
+ ('Ɠ', &['ɠ']),
+ ('Ɣ', &['ÉŖ']),
+ ('ƕ', &['Įļ']),
+ ('Ɩ', &['ÉŠ']),
+ ('Ɨ', &['ɨ']),
+ ('Ƙ', &['ƙ']),
+ ('ƙ', &['Ƙ']),
+ ('ƚ', &['ČŊ']),
+ ('Ɯ', &['ɯ']),
+ ('Ɲ', &['ɲ']),
+ ('ƞ', &['Č ']),
+ ('Ɵ', &['Éĩ']),
+ ('Æ ', &['ÆĄ']),
+ ('ÆĄ', &['Æ ']),
+ ('Æĸ', &['ÆŖ']),
+ ('ÆŖ', &['Æĸ']),
+ ('Ƥ', &['ÆĨ']),
+ ('ÆĨ', &['Ƥ']),
+ ('ÆĻ', &['ʀ']),
+ ('Ƨ', &['ƨ']),
+ ('ƨ', &['Ƨ']),
+ ('ÆŠ', &['ʃ']),
+ ('ÆŦ', &['Æ­']),
+ ('Æ­', &['ÆŦ']),
+ ('ÆŽ', &['ʈ']),
+ ('Ư', &['ư']),
+ ('ư', &['Ư']),
+ ('Æą', &['ʊ']),
+ ('Æ˛', &['ʋ']),
+ ('Æŗ', &['Æ´']),
+ ('Æ´', &['Æŗ']),
+ ('Æĩ', &['Æļ']),
+ ('Æļ', &['Æĩ']),
+ ('Æˇ', &['ʒ']),
+ ('Ƹ', &['ƚ']),
+ ('ƚ', &['Ƹ']),
+ ('Æŧ', &['ÆŊ']),
+ ('ÆŊ', &['Æŧ']),
+ ('Æŋ', &['Įˇ']),
+ ('Į„', &['Į…', 'Į†']),
+ ('Į…', &['Į„', 'Į†']),
+ ('Į†', &['Į„', 'Į…']),
+ ('Į‡', &['Įˆ', 'Į‰']),
+ ('Įˆ', &['Į‡', 'Į‰']),
+ ('Į‰', &['Į‡', 'Įˆ']),
+ ('ĮŠ', &['Į‹', 'ĮŒ']),
+ ('Į‹', &['ĮŠ', 'ĮŒ']),
+ ('ĮŒ', &['ĮŠ', 'Į‹']),
+ ('Į', &['ĮŽ']),
+ ('ĮŽ', &['Į']),
+ ('Į', &['Į']),
+ ('Į', &['Į']),
+ ('Į‘', &['Į’']),
+ ('Į’', &['Į‘']),
+ ('Į“', &['Į”']),
+ ('Į”', &['Į“']),
+ ('Į•', &['Į–']),
+ ('Į–', &['Į•']),
+ ('Į—', &['Į˜']),
+ ('Į˜', &['Į—']),
+ ('Į™', &['Įš']),
+ ('Įš', &['Į™']),
+ ('Į›', &['Įœ']),
+ ('Įœ', &['Į›']),
+ ('Į', &['Ǝ']),
+ ('Įž', &['ĮŸ']),
+ ('ĮŸ', &['Įž']),
+ ('Į ', &['ĮĄ']),
+ ('ĮĄ', &['Į ']),
+ ('Įĸ', &['ĮŖ']),
+ ('ĮŖ', &['Įĸ']),
+ ('Į¤', &['ĮĨ']),
+ ('ĮĨ', &['Į¤']),
+ ('ĮĻ', &['Į§']),
+ ('Į§', &['ĮĻ']),
+ ('Į¨', &['ĮŠ']),
+ ('ĮŠ', &['Į¨']),
+ ('ĮĒ', &['ĮĢ']),
+ ('ĮĢ', &['ĮĒ']),
+ ('ĮŦ', &['Į­']),
+ ('Į­', &['ĮŦ']),
+ ('ĮŽ', &['Į¯']),
+ ('Į¯', &['ĮŽ']),
+ ('Įą', &['Į˛', 'Įŗ']),
+ ('Į˛', &['Įą', 'Įŗ']),
+ ('Įŗ', &['Įą', 'Į˛']),
+ ('Į´', &['Įĩ']),
+ ('Įĩ', &['Į´']),
+ ('Įļ', &['ƕ']),
+ ('Įˇ', &['Æŋ']),
+ ('Į¸', &['Įš']),
+ ('Įš', &['Į¸']),
+ ('Įē', &['Įģ']),
+ ('Įģ', &['Įē']),
+ ('Įŧ', &['ĮŊ']),
+ ('ĮŊ', &['Įŧ']),
+ ('Įž', &['Įŋ']),
+ ('Įŋ', &['Įž']),
+ ('Ȁ', &['ȁ']),
+ ('ȁ', &['Ȁ']),
+ ('Ȃ', &['ȃ']),
+ ('ȃ', &['Ȃ']),
+ ('Ȅ', &['ȅ']),
+ ('ȅ', &['Ȅ']),
+ ('Ȇ', &['ȇ']),
+ ('ȇ', &['Ȇ']),
+ ('Ȉ', &['ȉ']),
+ ('ȉ', &['Ȉ']),
+ ('Ȋ', &['ȋ']),
+ ('ȋ', &['Ȋ']),
+ ('Ȍ', &['ȍ']),
+ ('ȍ', &['Ȍ']),
+ ('Ȏ', &['ȏ']),
+ ('ȏ', &['Ȏ']),
+ ('Ȑ', &['ȑ']),
+ ('ȑ', &['Ȑ']),
+ ('Ȓ', &['ȓ']),
+ ('ȓ', &['Ȓ']),
+ ('Ȕ', &['ȕ']),
+ ('ȕ', &['Ȕ']),
+ ('Ȗ', &['ȗ']),
+ ('ȗ', &['Ȗ']),
+ ('Ș', &['ș']),
+ ('ș', &['Ș']),
+ ('Ț', &['ț']),
+ ('ț', &['Ț']),
+ ('Ȝ', &['ȝ']),
+ ('ȝ', &['Ȝ']),
+ ('Ȟ', &['ȟ']),
+ ('ȟ', &['Ȟ']),
+ ('Č ', &['ƞ']),
+ ('Čĸ', &['ČŖ']),
+ ('ČŖ', &['Čĸ']),
+ ('Ȥ', &['ČĨ']),
+ ('ČĨ', &['Ȥ']),
+ ('ČĻ', &['ȧ']),
+ ('ȧ', &['ČĻ']),
+ ('Ȩ', &['Ȋ']),
+ ('Ȋ', &['Ȩ']),
+ ('ČĒ', &['ČĢ']),
+ ('ČĢ', &['ČĒ']),
+ ('ČŦ', &['Č­']),
+ ('Č­', &['ČŦ']),
+ ('ČŽ', &['Č¯']),
+ ('Č¯', &['ČŽ']),
+ ('Č°', &['Čą']),
+ ('Čą', &['Č°']),
+ ('Ȳ', &['Čŗ']),
+ ('Čŗ', &['Ȳ']),
+ ('Čē', &['âąĨ']),
+ ('Čģ', &['Čŧ']),
+ ('Čŧ', &['Čģ']),
+ ('ČŊ', &['ƚ']),
+ ('Čž', &['âąĻ']),
+ ('Čŋ', &['âąž']),
+ ('ɀ', &['âąŋ']),
+ ('Ɂ', &['ɂ']),
+ ('ɂ', &['Ɂ']),
+ ('Ƀ', &['ƀ']),
+ ('Ʉ', &['ʉ']),
+ ('Ʌ', &['ʌ']),
+ ('Ɇ', &['ɇ']),
+ ('ɇ', &['Ɇ']),
+ ('Ɉ', &['ɉ']),
+ ('ɉ', &['Ɉ']),
+ ('Ɋ', &['ɋ']),
+ ('ɋ', &['Ɋ']),
+ ('Ɍ', &['ɍ']),
+ ('ɍ', &['Ɍ']),
+ ('Ɏ', &['ɏ']),
+ ('ɏ', &['Ɏ']),
+ ('ɐ', &['âą¯']),
+ ('ɑ', &['Ɑ']),
+ ('ɒ', &['Ɒ']),
+ ('ɓ', &['Ɓ']),
+ ('ɔ', &['Ɔ']),
+ ('ɖ', &['Ɖ']),
+ ('ɗ', &['Ɗ']),
+ ('ə', &['Ə']),
+ ('ɛ', &['Ɛ']),
+ ('ɜ', &['ęžĢ']),
+ ('ɠ', &['Ɠ']),
+ ('ÉĄ', &['ęžŦ']),
+ ('ÉŖ', &['Ɣ']),
+ ('ÉĨ', &['Ɥ']),
+ ('ÉĻ', &['ęžĒ']),
+ ('ɨ', &['Ɨ']),
+ ('ÉŠ', &['Ɩ']),
+ ('ÉĒ', &['Ɪ']),
+ ('ÉĢ', &['âąĸ']),
+ ('ÉŦ', &['Ɬ']),
+ ('ɯ', &['Ɯ']),
+ ('ɹ', &['⹎']),
+ ('ɲ', &['Ɲ']),
+ ('Éĩ', &['Ɵ']),
+ ('ÉŊ', &['⹤']),
+ ('ʀ', &['ÆĻ']),
+ ('ʂ', &['Ʂ']),
+ ('ʃ', &['ÆŠ']),
+ ('ʇ', &['Ʇ']),
+ ('ʈ', &['ÆŽ']),
+ ('ʉ', &['Ʉ']),
+ ('ʊ', &['Æą']),
+ ('ʋ', &['Æ˛']),
+ ('ʌ', &['Ʌ']),
+ ('ʒ', &['Æˇ']),
+ ('ʝ', &['Ʝ']),
+ ('ʞ', &['Ʞ']),
+ ('\u{345}', &['Ι', 'ι', 'ι']),
+ ('Í°', &['Íą']),
+ ('Íą', &['Í°']),
+ ('Ͳ', &['Íŗ']),
+ ('Íŗ', &['Ͳ']),
+ ('Íļ', &['͡']),
+ ('͡', &['Íļ']),
+ ('Íģ', &['ĪŊ']),
+ ('Íŧ', &['Īž']),
+ ('ÍŊ', &['Īŋ']),
+ ('Íŋ', &['Īŗ']),
+ ('Ά', &['ÎŦ']),
+ ('Έ', &['έ']),
+ ('Ή', &['ή']),
+ ('Ί', &['ί']),
+ ('Ό', &['ĪŒ']),
+ ('Ύ', &['Ī']),
+ ('Ώ', &['ĪŽ']),
+ ('Α', &['α']),
+ ('Β', &['β', 'Ī']),
+ ('Γ', &['Îŗ']),
+ ('Δ', &['δ']),
+ ('Ε', &['Îĩ', 'Īĩ']),
+ ('Ζ', &['Îļ']),
+ ('Η', &['η']),
+ ('Θ', &['θ', 'Ī‘', 'Ī´']),
+ ('Ι', &['\u{345}', 'ι', 'ι']),
+ ('Κ', &['Îē', 'Ī°']),
+ ('Λ', &['Îģ']),
+ ('Μ', &['Âĩ', 'Îŧ']),
+ ('Ν', &['ÎŊ']),
+ ('Ξ', &['ξ']),
+ ('Ο', &['Îŋ']),
+ ('Π', &['Ī€', 'Ī–']),
+ ('ÎĄ', &['Ī', 'Īą']),
+ ('ÎŖ', &['Ī‚', 'Īƒ']),
+ ('Τ', &['Ī„']),
+ ('ÎĨ', &['Ī…']),
+ ('ÎĻ', &['Ī†', 'Ī•']),
+ ('Χ', &['Ī‡']),
+ ('Ψ', &['Īˆ']),
+ ('Ί', &['Ī‰', 'â„Ļ']),
+ ('ÎĒ', &['ĪŠ']),
+ ('ÎĢ', &['Ī‹']),
+ ('ÎŦ', &['Ά']),
+ ('έ', &['Έ']),
+ ('ή', &['Ή']),
+ ('ί', &['Ί']),
+ ('α', &['Α']),
+ ('β', &['Β', 'Ī']),
+ ('Îŗ', &['Γ']),
+ ('δ', &['Δ']),
+ ('Îĩ', &['Ε', 'Īĩ']),
+ ('Îļ', &['Ζ']),
+ ('η', &['Η']),
+ ('θ', &['Θ', 'Ī‘', 'Ī´']),
+ ('ι', &['\u{345}', 'Ι', 'ι']),
+ ('Îē', &['Κ', 'Ī°']),
+ ('Îģ', &['Λ']),
+ ('Îŧ', &['Âĩ', 'Μ']),
+ ('ÎŊ', &['Ν']),
+ ('ξ', &['Ξ']),
+ ('Îŋ', &['Ο']),
+ ('Ī€', &['Π', 'Ī–']),
+ ('Ī', &['ÎĄ', 'Īą']),
+ ('Ī‚', &['ÎŖ', 'Īƒ']),
+ ('Īƒ', &['ÎŖ', 'Ī‚']),
+ ('Ī„', &['Τ']),
+ ('Ī…', &['ÎĨ']),
+ ('Ī†', &['ÎĻ', 'Ī•']),
+ ('Ī‡', &['Χ']),
+ ('Īˆ', &['Ψ']),
+ ('Ī‰', &['Ί', 'â„Ļ']),
+ ('ĪŠ', &['ÎĒ']),
+ ('Ī‹', &['ÎĢ']),
+ ('ĪŒ', &['Ό']),
+ ('Ī', &['Ύ']),
+ ('ĪŽ', &['Ώ']),
+ ('Ī', &['Ī—']),
+ ('Ī', &['Β', 'β']),
+ ('Ī‘', &['Θ', 'θ', 'Ī´']),
+ ('Ī•', &['ÎĻ', 'Ī†']),
+ ('Ī–', &['Π', 'Ī€']),
+ ('Ī—', &['Ī']),
+ ('Ī˜', &['Ī™']),
+ ('Ī™', &['Ī˜']),
+ ('Īš', &['Ī›']),
+ ('Ī›', &['Īš']),
+ ('Īœ', &['Ī']),
+ ('Ī', &['Īœ']),
+ ('Īž', &['ĪŸ']),
+ ('ĪŸ', &['Īž']),
+ ('Ī ', &['ĪĄ']),
+ ('ĪĄ', &['Ī ']),
+ ('Īĸ', &['ĪŖ']),
+ ('ĪŖ', &['Īĸ']),
+ ('Ī¤', &['ĪĨ']),
+ ('ĪĨ', &['Ī¤']),
+ ('ĪĻ', &['Ī§']),
+ ('Ī§', &['ĪĻ']),
+ ('Ī¨', &['ĪŠ']),
+ ('ĪŠ', &['Ī¨']),
+ ('ĪĒ', &['ĪĢ']),
+ ('ĪĢ', &['ĪĒ']),
+ ('ĪŦ', &['Ī­']),
+ ('Ī­', &['ĪŦ']),
+ ('ĪŽ', &['Ī¯']),
+ ('Ī¯', &['ĪŽ']),
+ ('Ī°', &['Κ', 'Îē']),
+ ('Īą', &['ÎĄ', 'Ī']),
+ ('Ī˛', &['Īš']),
+ ('Īŗ', &['Íŋ']),
+ ('Ī´', &['Θ', 'θ', 'Ī‘']),
+ ('Īĩ', &['Ε', 'Îĩ']),
+ ('Īˇ', &['Ī¸']),
+ ('Ī¸', &['Īˇ']),
+ ('Īš', &['Ī˛']),
+ ('Īē', &['Īģ']),
+ ('Īģ', &['Īē']),
+ ('ĪŊ', &['Íģ']),
+ ('Īž', &['Íŧ']),
+ ('Īŋ', &['ÍŊ']),
+ ('Ѐ', &['Ņ']),
+ ('Ё', &['Ņ‘']),
+ ('Ђ', &['Ņ’']),
+ ('Ѓ', &['Ņ“']),
+ ('Є', &['Ņ”']),
+ ('Ѕ', &['Ņ•']),
+ ('І', &['Ņ–']),
+ ('Ї', &['Ņ—']),
+ ('Ј', &['Ņ˜']),
+ ('Љ', &['Ņ™']),
+ ('Њ', &['Ņš']),
+ ('Ћ', &['Ņ›']),
+ ('Ќ', &['Ņœ']),
+ ('Ѝ', &['Ņ']),
+ ('Ў', &['Ņž']),
+ ('Џ', &['ŅŸ']),
+ ('А', &['а']),
+ ('Б', &['б']),
+ ('В', &['в', 'ᲀ']),
+ ('Г', &['Đŗ']),
+ ('Д', &['д', 'ᲁ']),
+ ('Е', &['Đĩ']),
+ ('Ж', &['Đļ']),
+ ('З', &['з']),
+ ('И', &['и']),
+ ('Й', &['й']),
+ ('К', &['Đē']),
+ ('Л', &['Đģ']),
+ ('М', &['Đŧ']),
+ ('Н', &['ĐŊ']),
+ ('О', &['о', 'ᲂ']),
+ ('П', &['Đŋ']),
+ ('Đ ', &['Ņ€']),
+ ('ĐĄ', &['Ņ', 'ᲃ']),
+ ('Đĸ', &['Ņ‚', 'ᲄ', 'ᲅ']),
+ ('ĐŖ', &['Ņƒ']),
+ ('Ф', &['Ņ„']),
+ ('ĐĨ', &['Ņ…']),
+ ('ĐĻ', &['Ņ†']),
+ ('Ч', &['Ņ‡']),
+ ('Ш', &['Ņˆ']),
+ ('ĐŠ', &['Ņ‰']),
+ ('ĐĒ', &['ŅŠ', 'ᲆ']),
+ ('ĐĢ', &['Ņ‹']),
+ ('ĐŦ', &['ŅŒ']),
+ ('Đ­', &['Ņ']),
+ ('ĐŽ', &['ŅŽ']),
+ ('Đ¯', &['Ņ']),
+ ('а', &['А']),
+ ('б', &['Б']),
+ ('в', &['В', 'ᲀ']),
+ ('Đŗ', &['Г']),
+ ('д', &['Д', 'ᲁ']),
+ ('Đĩ', &['Е']),
+ ('Đļ', &['Ж']),
+ ('з', &['З']),
+ ('и', &['И']),
+ ('й', &['Й']),
+ ('Đē', &['К']),
+ ('Đģ', &['Л']),
+ ('Đŧ', &['М']),
+ ('ĐŊ', &['Н']),
+ ('о', &['О', 'ᲂ']),
+ ('Đŋ', &['П']),
+ ('Ņ€', &['Đ ']),
+ ('Ņ', &['ĐĄ', 'ᲃ']),
+ ('Ņ‚', &['Đĸ', 'ᲄ', 'ᲅ']),
+ ('Ņƒ', &['ĐŖ']),
+ ('Ņ„', &['Ф']),
+ ('Ņ…', &['ĐĨ']),
+ ('Ņ†', &['ĐĻ']),
+ ('Ņ‡', &['Ч']),
+ ('Ņˆ', &['Ш']),
+ ('Ņ‰', &['ĐŠ']),
+ ('ŅŠ', &['ĐĒ', 'ᲆ']),
+ ('Ņ‹', &['ĐĢ']),
+ ('ŅŒ', &['ĐŦ']),
+ ('Ņ', &['Đ­']),
+ ('ŅŽ', &['ĐŽ']),
+ ('Ņ', &['Đ¯']),
+ ('Ņ', &['Ѐ']),
+ ('Ņ‘', &['Ё']),
+ ('Ņ’', &['Ђ']),
+ ('Ņ“', &['Ѓ']),
+ ('Ņ”', &['Є']),
+ ('Ņ•', &['Ѕ']),
+ ('Ņ–', &['І']),
+ ('Ņ—', &['Ї']),
+ ('Ņ˜', &['Ј']),
+ ('Ņ™', &['Љ']),
+ ('Ņš', &['Њ']),
+ ('Ņ›', &['Ћ']),
+ ('Ņœ', &['Ќ']),
+ ('Ņ', &['Ѝ']),
+ ('Ņž', &['Ў']),
+ ('ŅŸ', &['Џ']),
+ ('Ņ ', &['ŅĄ']),
+ ('ŅĄ', &['Ņ ']),
+ ('Ņĸ', &['ŅŖ', 'ᲇ']),
+ ('ŅŖ', &['Ņĸ', 'ᲇ']),
+ ('Ņ¤', &['ŅĨ']),
+ ('ŅĨ', &['Ņ¤']),
+ ('ŅĻ', &['Ņ§']),
+ ('Ņ§', &['ŅĻ']),
+ ('Ņ¨', &['ŅŠ']),
+ ('ŅŠ', &['Ņ¨']),
+ ('ŅĒ', &['ŅĢ']),
+ ('ŅĢ', &['ŅĒ']),
+ ('ŅŦ', &['Ņ­']),
+ ('Ņ­', &['ŅŦ']),
+ ('ŅŽ', &['Ņ¯']),
+ ('Ņ¯', &['ŅŽ']),
+ ('Ņ°', &['Ņą']),
+ ('Ņą', &['Ņ°']),
+ ('Ņ˛', &['Ņŗ']),
+ ('Ņŗ', &['Ņ˛']),
+ ('Ņ´', &['Ņĩ']),
+ ('Ņĩ', &['Ņ´']),
+ ('Ņļ', &['Ņˇ']),
+ ('Ņˇ', &['Ņļ']),
+ ('Ņ¸', &['Ņš']),
+ ('Ņš', &['Ņ¸']),
+ ('Ņē', &['Ņģ']),
+ ('Ņģ', &['Ņē']),
+ ('Ņŧ', &['ŅŊ']),
+ ('ŅŊ', &['Ņŧ']),
+ ('Ņž', &['Ņŋ']),
+ ('Ņŋ', &['Ņž']),
+ ('Ō€', &['Ō']),
+ ('Ō', &['Ō€']),
+ ('ŌŠ', &['Ō‹']),
+ ('Ō‹', &['ŌŠ']),
+ ('ŌŒ', &['Ō']),
+ ('Ō', &['ŌŒ']),
+ ('ŌŽ', &['Ō']),
+ ('Ō', &['ŌŽ']),
+ ('Ō', &['Ō‘']),
+ ('Ō‘', &['Ō']),
+ ('Ō’', &['Ō“']),
+ ('Ō“', &['Ō’']),
+ ('Ō”', &['Ō•']),
+ ('Ō•', &['Ō”']),
+ ('Ō–', &['Ō—']),
+ ('Ō—', &['Ō–']),
+ ('Ō˜', &['Ō™']),
+ ('Ō™', &['Ō˜']),
+ ('Ōš', &['Ō›']),
+ ('Ō›', &['Ōš']),
+ ('Ōœ', &['Ō']),
+ ('Ō', &['Ōœ']),
+ ('Ōž', &['ŌŸ']),
+ ('ŌŸ', &['Ōž']),
+ ('Ō ', &['ŌĄ']),
+ ('ŌĄ', &['Ō ']),
+ ('Ōĸ', &['ŌŖ']),
+ ('ŌŖ', &['Ōĸ']),
+ ('Ō¤', &['ŌĨ']),
+ ('ŌĨ', &['Ō¤']),
+ ('ŌĻ', &['Ō§']),
+ ('Ō§', &['ŌĻ']),
+ ('Ō¨', &['ŌŠ']),
+ ('ŌŠ', &['Ō¨']),
+ ('ŌĒ', &['ŌĢ']),
+ ('ŌĢ', &['ŌĒ']),
+ ('ŌŦ', &['Ō­']),
+ ('Ō­', &['ŌŦ']),
+ ('ŌŽ', &['Ō¯']),
+ ('Ō¯', &['ŌŽ']),
+ ('Ō°', &['Ōą']),
+ ('Ōą', &['Ō°']),
+ ('Ō˛', &['Ōŗ']),
+ ('Ōŗ', &['Ō˛']),
+ ('Ō´', &['Ōĩ']),
+ ('Ōĩ', &['Ō´']),
+ ('Ōļ', &['Ōˇ']),
+ ('Ōˇ', &['Ōļ']),
+ ('Ō¸', &['Ōš']),
+ ('Ōš', &['Ō¸']),
+ ('Ōē', &['Ōģ']),
+ ('Ōģ', &['Ōē']),
+ ('Ōŧ', &['ŌŊ']),
+ ('ŌŊ', &['Ōŧ']),
+ ('Ōž', &['Ōŋ']),
+ ('Ōŋ', &['Ōž']),
+ ('Ķ€', &['Ķ']),
+ ('Ķ', &['Ķ‚']),
+ ('Ķ‚', &['Ķ']),
+ ('Ķƒ', &['Ķ„']),
+ ('Ķ„', &['Ķƒ']),
+ ('Ķ…', &['Ķ†']),
+ ('Ķ†', &['Ķ…']),
+ ('Ķ‡', &['Ķˆ']),
+ ('Ķˆ', &['Ķ‡']),
+ ('Ķ‰', &['ĶŠ']),
+ ('ĶŠ', &['Ķ‰']),
+ ('Ķ‹', &['ĶŒ']),
+ ('ĶŒ', &['Ķ‹']),
+ ('Ķ', &['ĶŽ']),
+ ('ĶŽ', &['Ķ']),
+ ('Ķ', &['Ķ€']),
+ ('Ķ', &['Ķ‘']),
+ ('Ķ‘', &['Ķ']),
+ ('Ķ’', &['Ķ“']),
+ ('Ķ“', &['Ķ’']),
+ ('Ķ”', &['Ķ•']),
+ ('Ķ•', &['Ķ”']),
+ ('Ķ–', &['Ķ—']),
+ ('Ķ—', &['Ķ–']),
+ ('Ķ˜', &['Ķ™']),
+ ('Ķ™', &['Ķ˜']),
+ ('Ķš', &['Ķ›']),
+ ('Ķ›', &['Ķš']),
+ ('Ķœ', &['Ķ']),
+ ('Ķ', &['Ķœ']),
+ ('Ķž', &['ĶŸ']),
+ ('ĶŸ', &['Ķž']),
+ ('Ķ ', &['ĶĄ']),
+ ('ĶĄ', &['Ķ ']),
+ ('Ķĸ', &['ĶŖ']),
+ ('ĶŖ', &['Ķĸ']),
+ ('Ķ¤', &['ĶĨ']),
+ ('ĶĨ', &['Ķ¤']),
+ ('ĶĻ', &['Ķ§']),
+ ('Ķ§', &['ĶĻ']),
+ ('Ķ¨', &['ĶŠ']),
+ ('ĶŠ', &['Ķ¨']),
+ ('ĶĒ', &['ĶĢ']),
+ ('ĶĢ', &['ĶĒ']),
+ ('ĶŦ', &['Ķ­']),
+ ('Ķ­', &['ĶŦ']),
+ ('ĶŽ', &['Ķ¯']),
+ ('Ķ¯', &['ĶŽ']),
+ ('Ķ°', &['Ķą']),
+ ('Ķą', &['Ķ°']),
+ ('Ķ˛', &['Ķŗ']),
+ ('Ķŗ', &['Ķ˛']),
+ ('Ķ´', &['Ķĩ']),
+ ('Ķĩ', &['Ķ´']),
+ ('Ķļ', &['Ķˇ']),
+ ('Ķˇ', &['Ķļ']),
+ ('Ķ¸', &['Ķš']),
+ ('Ķš', &['Ķ¸']),
+ ('Ķē', &['Ķģ']),
+ ('Ķģ', &['Ķē']),
+ ('Ķŧ', &['ĶŊ']),
+ ('ĶŊ', &['Ķŧ']),
+ ('Ķž', &['Ķŋ']),
+ ('Ķŋ', &['Ķž']),
+ ('Ԁ', &['ԁ']),
+ ('ԁ', &['Ԁ']),
+ ('Ԃ', &['ԃ']),
+ ('ԃ', &['Ԃ']),
+ ('Ԅ', &['ԅ']),
+ ('ԅ', &['Ԅ']),
+ ('Ԇ', &['ԇ']),
+ ('ԇ', &['Ԇ']),
+ ('Ԉ', &['ԉ']),
+ ('ԉ', &['Ԉ']),
+ ('Ԋ', &['ԋ']),
+ ('ԋ', &['Ԋ']),
+ ('Ԍ', &['ԍ']),
+ ('ԍ', &['Ԍ']),
+ ('Ԏ', &['ԏ']),
+ ('ԏ', &['Ԏ']),
+ ('Ԑ', &['ԑ']),
+ ('ԑ', &['Ԑ']),
+ ('Ԓ', &['ԓ']),
+ ('ԓ', &['Ԓ']),
+ ('Ԕ', &['ԕ']),
+ ('ԕ', &['Ԕ']),
+ ('Ԗ', &['ԗ']),
+ ('ԗ', &['Ԗ']),
+ ('Ԙ', &['ԙ']),
+ ('ԙ', &['Ԙ']),
+ ('Ԛ', &['ԛ']),
+ ('ԛ', &['Ԛ']),
+ ('Ԝ', &['ԝ']),
+ ('ԝ', &['Ԝ']),
+ ('Ԟ', &['ԟ']),
+ ('ԟ', &['Ԟ']),
+ ('Ô ', &['ÔĄ']),
+ ('ÔĄ', &['Ô ']),
+ ('Ôĸ', &['ÔŖ']),
+ ('ÔŖ', &['Ôĸ']),
+ ('Ô¤', &['ÔĨ']),
+ ('ÔĨ', &['Ô¤']),
+ ('ÔĻ', &['Ô§']),
+ ('Ô§', &['ÔĻ']),
+ ('Ô¨', &['ÔŠ']),
+ ('ÔŠ', &['Ô¨']),
+ ('ÔĒ', &['ÔĢ']),
+ ('ÔĢ', &['ÔĒ']),
+ ('ÔŦ', &['Ô­']),
+ ('Ô­', &['ÔŦ']),
+ ('ÔŽ', &['Ô¯']),
+ ('Ô¯', &['ÔŽ']),
+ ('Ôą', &['ÕĄ']),
+ ('Ô˛', &['Õĸ']),
+ ('Ôŗ', &['ÕŖ']),
+ ('Ô´', &['Õ¤']),
+ ('Ôĩ', &['ÕĨ']),
+ ('Ôļ', &['ÕĻ']),
+ ('Ôˇ', &['Õ§']),
+ ('Ô¸', &['Õ¨']),
+ ('Ôš', &['ÕŠ']),
+ ('Ôē', &['ÕĒ']),
+ ('Ôģ', &['ÕĢ']),
+ ('Ôŧ', &['ÕŦ']),
+ ('ÔŊ', &['Õ­']),
+ ('Ôž', &['ÕŽ']),
+ ('Ôŋ', &['Õ¯']),
+ ('Հ', &['հ']),
+ ('Ձ', &['Õą']),
+ ('Ղ', &['Õ˛']),
+ ('Ճ', &['Õŗ']),
+ ('Մ', &['մ']),
+ ('Յ', &['Õĩ']),
+ ('Ն', &['Õļ']),
+ ('Շ', &['Õˇ']),
+ ('Ո', &['ո']),
+ ('Չ', &['Õš']),
+ ('Պ', &['Õē']),
+ ('Ջ', &['Õģ']),
+ ('Ռ', &['Õŧ']),
+ ('Ս', &['ÕŊ']),
+ ('Վ', &['Õž']),
+ ('Տ', &['Õŋ']),
+ ('Ր', &['ր']),
+ ('Ց', &['ց']),
+ ('Ւ', &['ւ']),
+ ('Փ', &['փ']),
+ ('Ք', &['ք']),
+ ('Օ', &['օ']),
+ ('Ֆ', &['ֆ']),
+ ('ÕĄ', &['Ôą']),
+ ('Õĸ', &['Ô˛']),
+ ('ÕŖ', &['Ôŗ']),
+ ('Õ¤', &['Ô´']),
+ ('ÕĨ', &['Ôĩ']),
+ ('ÕĻ', &['Ôļ']),
+ ('Õ§', &['Ôˇ']),
+ ('Õ¨', &['Ô¸']),
+ ('ÕŠ', &['Ôš']),
+ ('ÕĒ', &['Ôē']),
+ ('ÕĢ', &['Ôģ']),
+ ('ÕŦ', &['Ôŧ']),
+ ('Õ­', &['ÔŊ']),
+ ('ÕŽ', &['Ôž']),
+ ('Õ¯', &['Ôŋ']),
+ ('հ', &['Հ']),
+ ('Õą', &['Ձ']),
+ ('Õ˛', &['Ղ']),
+ ('Õŗ', &['Ճ']),
+ ('մ', &['Մ']),
+ ('Õĩ', &['Յ']),
+ ('Õļ', &['Ն']),
+ ('Õˇ', &['Շ']),
+ ('ո', &['Ո']),
+ ('Õš', &['Չ']),
+ ('Õē', &['Պ']),
+ ('Õģ', &['Ջ']),
+ ('Õŧ', &['Ռ']),
+ ('ÕŊ', &['Ս']),
+ ('Õž', &['Վ']),
+ ('Õŋ', &['Տ']),
+ ('ր', &['Ր']),
+ ('ց', &['Ց']),
+ ('ւ', &['Ւ']),
+ ('փ', &['Փ']),
+ ('ք', &['Ք']),
+ ('օ', &['Օ']),
+ ('ֆ', &['Ֆ']),
+ ('Ⴀ', &['ⴀ']),
+ ('Ⴁ', &['ⴁ']),
+ ('á‚ĸ', &['ⴂ']),
+ ('á‚Ŗ', &['ⴃ']),
+ ('Ⴄ', &['ⴄ']),
+ ('á‚Ĩ', &['ⴅ']),
+ ('á‚Ļ', &['ⴆ']),
+ ('Ⴇ', &['ⴇ']),
+ ('Ⴈ', &['ⴈ']),
+ ('Ⴉ', &['ⴉ']),
+ ('á‚Ē', &['ⴊ']),
+ ('á‚Ģ', &['ⴋ']),
+ ('á‚Ŧ', &['ⴌ']),
+ ('Ⴍ', &['ⴍ']),
+ ('Ⴎ', &['ⴎ']),
+ ('Ⴏ', &['ⴏ']),
+ ('Ⴐ', &['ⴐ']),
+ ('Ⴑ', &['ⴑ']),
+ ('Ⴒ', &['ⴒ']),
+ ('á‚ŗ', &['ⴓ']),
+ ('Ⴔ', &['ⴔ']),
+ ('á‚ĩ', &['ⴕ']),
+ ('á‚ļ', &['ⴖ']),
+ ('Ⴗ', &['ⴗ']),
+ ('Ⴘ', &['ⴘ']),
+ ('Ⴙ', &['ⴙ']),
+ ('á‚ē', &['ⴚ']),
+ ('á‚ģ', &['ⴛ']),
+ ('á‚ŧ', &['ⴜ']),
+ ('á‚Ŋ', &['ⴝ']),
+ ('Ⴞ', &['ⴞ']),
+ ('á‚ŋ', &['ⴟ']),
+ ('Ⴠ', &['ⴠ']),
+ ('Ⴡ', &['ⴥ']),
+ ('Ⴢ', &['â´ĸ']),
+ ('Ⴣ', &['â´Ŗ']),
+ ('Ⴤ', &['ⴤ']),
+ ('Ⴥ', &['â´Ĩ']),
+ ('Ⴧ', &['ⴧ']),
+ ('Ⴭ', &['ⴭ']),
+ ('ა', &['Ა']),
+ ('ბ', &['Ბ']),
+ ('გ', &['Გ']),
+ ('დ', &['Დ']),
+ ('ე', &['Ე']),
+ ('ვ', &['Ვ']),
+ ('ზ', &['Ზ']),
+ ('თ', &['Თ']),
+ ('ი', &['Ი']),
+ ('კ', &['Კ']),
+ ('ლ', &['Ლ']),
+ ('მ', &['Მ']),
+ ('ნ', &['Ნ']),
+ ('ო', &['Ო']),
+ ('პ', &['Პ']),
+ ('ჟ', &['Ჟ']),
+ ('რ', &['Რ']),
+ ('ქ', &['Ქ']),
+ ('áƒĸ', &['á˛ĸ']),
+ ('áƒŖ', &['á˛Ŗ']),
+ ('ფ', &['Ფ']),
+ ('áƒĨ', &['á˛Ĩ']),
+ ('áƒĻ', &['á˛Ļ']),
+ ('ყ', &['Ყ']),
+ ('შ', &['Შ']),
+ ('჊', &['ᲊ']),
+ ('áƒĒ', &['á˛Ē']),
+ ('áƒĢ', &['á˛Ģ']),
+ ('áƒŦ', &['á˛Ŧ']),
+ ('ჭ', &['Ჭ']),
+ ('჎', &['᲎']),
+ ('ჯ', &['á˛¯']),
+ ('ჰ', &['Ჰ']),
+ ('ჹ', &['Ჹ']),
+ ('ჲ', &['Ჲ']),
+ ('áƒŗ', &['á˛ŗ']),
+ ('ჴ', &['Ჴ']),
+ ('áƒĩ', &['á˛ĩ']),
+ ('áƒļ', &['á˛ļ']),
+ ('ს', &['Ს']),
+ ('ჸ', &['Ჸ']),
+ ('ლ', &['Ლ']),
+ ('áƒē', &['á˛ē']),
+ ('áƒŊ', &['á˛Ŋ']),
+ ('პ', &['Პ']),
+ ('áƒŋ', &['á˛ŋ']),
+ ('Ꭰ', &['ꭰ']),
+ ('Ꭱ', &['ꭱ']),
+ ('áŽĸ', &['ę­˛']),
+ ('áŽŖ', &['ę­ŗ']),
+ ('Ꭴ', &['ꭴ']),
+ ('áŽĨ', &['ę­ĩ']),
+ ('áŽĻ', &['ę­ļ']),
+ ('Ꭷ', &['ꭷ']),
+ ('Ꭸ', &['ꭸ']),
+ ('Ꭹ', &['ꭹ']),
+ ('áŽĒ', &['ę­ē']),
+ ('áŽĢ', &['ę­ģ']),
+ ('áŽŦ', &['ę­ŧ']),
+ ('Ꭽ', &['ę­Ŋ']),
+ ('Ꭾ', &['ꭾ']),
+ ('Ꭿ', &['ę­ŋ']),
+ ('Ꮀ', &['ꮀ']),
+ ('Ꮁ', &['ꮁ']),
+ ('Ꮂ', &['ꮂ']),
+ ('áŽŗ', &['ꎃ']),
+ ('Ꮄ', &['ꮄ']),
+ ('áŽĩ', &['ꮅ']),
+ ('áŽļ', &['ꮆ']),
+ ('Ꮇ', &['ꮇ']),
+ ('Ꮈ', &['ꮈ']),
+ ('Ꮉ', &['ꮉ']),
+ ('áŽē', &['ꮊ']),
+ ('áŽģ', &['ꮋ']),
+ ('áŽŧ', &['ꮌ']),
+ ('áŽŊ', &['ꮍ']),
+ ('Ꮎ', &['ꮎ']),
+ ('áŽŋ', &['ꮏ']),
+ ('Ꮐ', &['ꮐ']),
+ ('Ꮑ', &['ꮑ']),
+ ('Ꮒ', &['ꮒ']),
+ ('Ꮓ', &['ꮓ']),
+ ('Ꮔ', &['ꮔ']),
+ ('Ꮕ', &['ꮕ']),
+ ('Ꮖ', &['ꮖ']),
+ ('Ꮗ', &['ꮗ']),
+ ('Ꮘ', &['ꮘ']),
+ ('Ꮙ', &['ꮙ']),
+ ('Ꮚ', &['ꮚ']),
+ ('Ꮛ', &['ꮛ']),
+ ('Ꮜ', &['ꮜ']),
+ ('Ꮝ', &['ꮝ']),
+ ('Ꮞ', &['ꮞ']),
+ ('Ꮟ', &['ꮟ']),
+ ('Ꮠ', &['ꮠ']),
+ ('Ꮡ', &['ꮡ']),
+ ('Ꮢ', &['ęŽĸ']),
+ ('Ꮣ', &['ęŽŖ']),
+ ('Ꮤ', &['ꮤ']),
+ ('Ꮥ', &['ęŽĨ']),
+ ('Ꮦ', &['ęŽĻ']),
+ ('Ꮧ', &['ꮧ']),
+ ('Ꮨ', &['ꮨ']),
+ ('Ꮩ', &['ꮩ']),
+ ('Ꮪ', &['ęŽĒ']),
+ ('Ꮫ', &['ęŽĢ']),
+ ('Ꮬ', &['ęŽŦ']),
+ ('Ꮭ', &['ꮭ']),
+ ('Ꮮ', &['ꮮ']),
+ ('Ꮯ', &['ęŽ¯']),
+ ('Ꮰ', &['ꮰ']),
+ ('Ꮱ', &['ꮱ']),
+ ('áĸ', &['ꎲ']),
+ ('áŖ', &['ęŽŗ']),
+ ('Ꮴ', &['ꮴ']),
+ ('áĨ', &['ęŽĩ']),
+ ('áĻ', &['ęŽļ']),
+ ('Ꮷ', &['ꮷ']),
+ ('Ꮸ', &['ꮸ']),
+ ('Ꮹ', &['ꮹ']),
+ ('áĒ', &['ęŽē']),
+ ('áĢ', &['ęŽģ']),
+ ('áŦ', &['ęŽŧ']),
+ ('Ꮽ', &['ęŽŊ']),
+ ('Ꮾ', &['ꮾ']),
+ ('Ꮿ', &['ęŽŋ']),
+ ('Ᏸ', &['ᏸ']),
+ ('Ᏹ', &['ᏹ']),
+ ('Ᏺ', &['áē']),
+ ('áŗ', &['áģ']),
+ ('Ᏼ', &['áŧ']),
+ ('áĩ', &['áŊ']),
+ ('ᏸ', &['Ᏸ']),
+ ('ᏹ', &['Ᏹ']),
+ ('áē', &['Ᏺ']),
+ ('áģ', &['áŗ']),
+ ('áŧ', &['Ᏼ']),
+ ('áŊ', &['áĩ']),
+ ('ᲀ', &['В', 'в']),
+ ('ᲁ', &['Д', 'д']),
+ ('ᲂ', &['О', 'о']),
+ ('ᲃ', &['ĐĄ', 'Ņ']),
+ ('ᲄ', &['Đĸ', 'Ņ‚', 'ᲅ']),
+ ('ᲅ', &['Đĸ', 'Ņ‚', 'ᲄ']),
+ ('ᲆ', &['ĐĒ', 'ŅŠ']),
+ ('ᲇ', &['Ņĸ', 'ŅŖ']),
+ ('ᲈ', &['Ꙋ', 'ꙋ']),
+ ('Ა', &['ა']),
+ ('Ბ', &['ბ']),
+ ('Გ', &['გ']),
+ ('Დ', &['დ']),
+ ('Ე', &['ე']),
+ ('Ვ', &['ვ']),
+ ('Ზ', &['ზ']),
+ ('Თ', &['თ']),
+ ('Ი', &['ი']),
+ ('Კ', &['კ']),
+ ('Ლ', &['ლ']),
+ ('Მ', &['მ']),
+ ('Ნ', &['ნ']),
+ ('Ო', &['ო']),
+ ('Პ', &['პ']),
+ ('Ჟ', &['ჟ']),
+ ('Რ', &['რ']),
+ ('Ქ', &['ქ']),
+ ('á˛ĸ', &['áƒĸ']),
+ ('á˛Ŗ', &['áƒŖ']),
+ ('Ფ', &['ფ']),
+ ('á˛Ĩ', &['áƒĨ']),
+ ('á˛Ļ', &['áƒĻ']),
+ ('Ყ', &['ყ']),
+ ('Შ', &['შ']),
+ ('ᲊ', &['჊']),
+ ('á˛Ē', &['áƒĒ']),
+ ('á˛Ģ', &['áƒĢ']),
+ ('á˛Ŧ', &['áƒŦ']),
+ ('Ჭ', &['ჭ']),
+ ('᲎', &['჎']),
+ ('á˛¯', &['ჯ']),
+ ('Ჰ', &['ჰ']),
+ ('Ჹ', &['ჹ']),
+ ('Ჲ', &['ჲ']),
+ ('á˛ŗ', &['áƒŗ']),
+ ('Ჴ', &['ჴ']),
+ ('á˛ĩ', &['áƒĩ']),
+ ('á˛ļ', &['áƒļ']),
+ ('Ს', &['ს']),
+ ('Ჸ', &['ჸ']),
+ ('Ლ', &['ლ']),
+ ('á˛ē', &['áƒē']),
+ ('á˛Ŋ', &['áƒŊ']),
+ ('Პ', &['პ']),
+ ('á˛ŋ', &['áƒŋ']),
+ ('áĩš', &['ęŊ']),
+ ('áĩŊ', &['âąŖ']),
+ ('áļŽ', &['Ᶎ']),
+ ('Ḁ', &['ḁ']),
+ ('ḁ', &['Ḁ']),
+ ('Ḃ', &['ḃ']),
+ ('ḃ', &['Ḃ']),
+ ('Ḅ', &['ḅ']),
+ ('ḅ', &['Ḅ']),
+ ('Ḇ', &['ḇ']),
+ ('ḇ', &['Ḇ']),
+ ('Ḉ', &['ḉ']),
+ ('ḉ', &['Ḉ']),
+ ('Ḋ', &['ḋ']),
+ ('ḋ', &['Ḋ']),
+ ('Ḍ', &['ḍ']),
+ ('ḍ', &['Ḍ']),
+ ('Ḏ', &['ḏ']),
+ ('ḏ', &['Ḏ']),
+ ('Ḑ', &['ḑ']),
+ ('ḑ', &['Ḑ']),
+ ('Ḓ', &['ḓ']),
+ ('ḓ', &['Ḓ']),
+ ('Ḕ', &['ḕ']),
+ ('ḕ', &['Ḕ']),
+ ('Ḗ', &['ḗ']),
+ ('ḗ', &['Ḗ']),
+ ('Ḙ', &['ḙ']),
+ ('ḙ', &['Ḙ']),
+ ('Ḛ', &['ḛ']),
+ ('ḛ', &['Ḛ']),
+ ('Ḝ', &['ḝ']),
+ ('ḝ', &['Ḝ']),
+ ('Ḟ', &['ḟ']),
+ ('ḟ', &['Ḟ']),
+ ('Ḡ', &['ḥ']),
+ ('ḥ', &['Ḡ']),
+ ('á¸ĸ', &['á¸Ŗ']),
+ ('á¸Ŗ', &['á¸ĸ']),
+ ('Ḥ', &['á¸Ĩ']),
+ ('á¸Ĩ', &['Ḥ']),
+ ('á¸Ļ', &['ḧ']),
+ ('ḧ', &['á¸Ļ']),
+ ('Ḩ', &['Ḋ']),
+ ('Ḋ', &['Ḩ']),
+ ('á¸Ē', &['á¸Ģ']),
+ ('á¸Ģ', &['á¸Ē']),
+ ('á¸Ŧ', &['ḭ']),
+ ('ḭ', &['á¸Ŧ']),
+ ('Ḏ', &['ḯ']),
+ ('ḯ', &['Ḏ']),
+ ('Ḱ', &['ḹ']),
+ ('ḹ', &['Ḱ']),
+ ('Ḳ', &['á¸ŗ']),
+ ('á¸ŗ', &['Ḳ']),
+ ('Ḵ', &['á¸ĩ']),
+ ('á¸ĩ', &['Ḵ']),
+ ('á¸ļ', &['ḡ']),
+ ('ḡ', &['á¸ļ']),
+ ('Ḹ', &['Ḛ']),
+ ('Ḛ', &['Ḹ']),
+ ('á¸ē', &['á¸ģ']),
+ ('á¸ģ', &['á¸ē']),
+ ('á¸ŧ', &['á¸Ŋ']),
+ ('á¸Ŋ', &['á¸ŧ']),
+ ('Ḟ', &['á¸ŋ']),
+ ('á¸ŋ', &['Ḟ']),
+ ('Ṁ', &['ṁ']),
+ ('ṁ', &['Ṁ']),
+ ('Ṃ', &['ṃ']),
+ ('ṃ', &['Ṃ']),
+ ('Ṅ', &['ṅ']),
+ ('ṅ', &['Ṅ']),
+ ('Ṇ', &['ṇ']),
+ ('ṇ', &['Ṇ']),
+ ('Ṉ', &['ṉ']),
+ ('ṉ', &['Ṉ']),
+ ('Ṋ', &['ṋ']),
+ ('ṋ', &['Ṋ']),
+ ('Ṍ', &['ṍ']),
+ ('ṍ', &['Ṍ']),
+ ('ᚎ', &['ᚏ']),
+ ('ᚏ', &['ᚎ']),
+ ('Ṑ', &['ṑ']),
+ ('ṑ', &['Ṑ']),
+ ('Ṓ', &['ṓ']),
+ ('ṓ', &['Ṓ']),
+ ('Ṕ', &['ṕ']),
+ ('ṕ', &['Ṕ']),
+ ('Ṗ', &['ṗ']),
+ ('ṗ', &['Ṗ']),
+ ('Ṙ', &['ṙ']),
+ ('ṙ', &['Ṙ']),
+ ('Ṛ', &['ṛ']),
+ ('ṛ', &['Ṛ']),
+ ('Ṝ', &['ṝ']),
+ ('ṝ', &['Ṝ']),
+ ('Ṟ', &['ṟ']),
+ ('ṟ', &['Ṟ']),
+ ('áš ', &['ᚥ', 'áē›']),
+ ('ᚥ', &['áš ', 'áē›']),
+ ('ášĸ', &['ášŖ']),
+ ('ášŖ', &['ášĸ']),
+ ('ᚤ', &['ášĨ']),
+ ('ášĨ', &['ᚤ']),
+ ('ášĻ', &['ᚧ']),
+ ('ᚧ', &['ášĻ']),
+ ('ᚨ', &['ᚊ']),
+ ('ᚊ', &['ᚨ']),
+ ('ášĒ', &['ášĢ']),
+ ('ášĢ', &['ášĒ']),
+ ('ášŦ', &['áš­']),
+ ('áš­', &['ášŦ']),
+ ('ᚎ', &['ᚯ']),
+ ('ᚯ', &['ᚎ']),
+ ('áš°', &['ášą']),
+ ('ášą', &['áš°']),
+ ('ᚲ', &['ášŗ']),
+ ('ášŗ', &['ᚲ']),
+ ('áš´', &['ášĩ']),
+ ('ášĩ', &['áš´']),
+ ('ášļ', &['ᚡ']),
+ ('ᚡ', &['ášļ']),
+ ('ᚸ', &['ᚚ']),
+ ('ᚚ', &['ᚸ']),
+ ('ášē', &['ášģ']),
+ ('ášģ', &['ášē']),
+ ('ášŧ', &['ášŊ']),
+ ('ášŊ', &['ášŧ']),
+ ('ášž', &['ášŋ']),
+ ('ášŋ', &['ášž']),
+ ('áē€', &['áē']),
+ ('áē', &['áē€']),
+ ('áē‚', &['áēƒ']),
+ ('áēƒ', &['áē‚']),
+ ('áē„', &['áē…']),
+ ('áē…', &['áē„']),
+ ('áē†', &['áē‡']),
+ ('áē‡', &['áē†']),
+ ('áēˆ', &['áē‰']),
+ ('áē‰', &['áēˆ']),
+ ('áēŠ', &['áē‹']),
+ ('áē‹', &['áēŠ']),
+ ('áēŒ', &['áē']),
+ ('áē', &['áēŒ']),
+ ('áēŽ', &['áē']),
+ ('áē', &['áēŽ']),
+ ('áē', &['áē‘']),
+ ('áē‘', &['áē']),
+ ('áē’', &['áē“']),
+ ('áē“', &['áē’']),
+ ('áē”', &['áē•']),
+ ('áē•', &['áē”']),
+ ('áē›', &['áš ', 'ᚥ']),
+ ('áēž', &['ß']),
+ ('áē ', &['áēĄ']),
+ ('áēĄ', &['áē ']),
+ ('áēĸ', &['áēŖ']),
+ ('áēŖ', &['áēĸ']),
+ ('áē¤', &['áēĨ']),
+ ('áēĨ', &['áē¤']),
+ ('áēĻ', &['áē§']),
+ ('áē§', &['áēĻ']),
+ ('áē¨', &['áēŠ']),
+ ('áēŠ', &['áē¨']),
+ ('áēĒ', &['áēĢ']),
+ ('áēĢ', &['áēĒ']),
+ ('áēŦ', &['áē­']),
+ ('áē­', &['áēŦ']),
+ ('áēŽ', &['áē¯']),
+ ('áē¯', &['áēŽ']),
+ ('áē°', &['áēą']),
+ ('áēą', &['áē°']),
+ ('áē˛', &['áēŗ']),
+ ('áēŗ', &['áē˛']),
+ ('áē´', &['áēĩ']),
+ ('áēĩ', &['áē´']),
+ ('áēļ', &['áēˇ']),
+ ('áēˇ', &['áēļ']),
+ ('áē¸', &['áēš']),
+ ('áēš', &['áē¸']),
+ ('áēē', &['áēģ']),
+ ('áēģ', &['áēē']),
+ ('áēŧ', &['áēŊ']),
+ ('áēŊ', &['áēŧ']),
+ ('áēž', &['áēŋ']),
+ ('áēŋ', &['áēž']),
+ ('áģ€', &['áģ']),
+ ('áģ', &['áģ€']),
+ ('áģ‚', &['áģƒ']),
+ ('áģƒ', &['áģ‚']),
+ ('áģ„', &['áģ…']),
+ ('áģ…', &['áģ„']),
+ ('áģ†', &['áģ‡']),
+ ('áģ‡', &['áģ†']),
+ ('áģˆ', &['áģ‰']),
+ ('áģ‰', &['áģˆ']),
+ ('áģŠ', &['áģ‹']),
+ ('áģ‹', &['áģŠ']),
+ ('áģŒ', &['áģ']),
+ ('áģ', &['áģŒ']),
+ ('áģŽ', &['áģ']),
+ ('áģ', &['áģŽ']),
+ ('áģ', &['áģ‘']),
+ ('áģ‘', &['áģ']),
+ ('áģ’', &['áģ“']),
+ ('áģ“', &['áģ’']),
+ ('áģ”', &['áģ•']),
+ ('áģ•', &['áģ”']),
+ ('áģ–', &['áģ—']),
+ ('áģ—', &['áģ–']),
+ ('áģ˜', &['áģ™']),
+ ('áģ™', &['áģ˜']),
+ ('áģš', &['áģ›']),
+ ('áģ›', &['áģš']),
+ ('áģœ', &['áģ']),
+ ('áģ', &['áģœ']),
+ ('áģž', &['áģŸ']),
+ ('áģŸ', &['áģž']),
+ ('áģ ', &['áģĄ']),
+ ('áģĄ', &['áģ ']),
+ ('áģĸ', &['áģŖ']),
+ ('áģŖ', &['áģĸ']),
+ ('áģ¤', &['áģĨ']),
+ ('áģĨ', &['áģ¤']),
+ ('áģĻ', &['áģ§']),
+ ('áģ§', &['áģĻ']),
+ ('áģ¨', &['áģŠ']),
+ ('áģŠ', &['áģ¨']),
+ ('áģĒ', &['áģĢ']),
+ ('áģĢ', &['áģĒ']),
+ ('áģŦ', &['áģ­']),
+ ('áģ­', &['áģŦ']),
+ ('áģŽ', &['áģ¯']),
+ ('áģ¯', &['áģŽ']),
+ ('áģ°', &['áģą']),
+ ('áģą', &['áģ°']),
+ ('áģ˛', &['áģŗ']),
+ ('áģŗ', &['áģ˛']),
+ ('áģ´', &['áģĩ']),
+ ('áģĩ', &['áģ´']),
+ ('áģļ', &['áģˇ']),
+ ('áģˇ', &['áģļ']),
+ ('áģ¸', &['áģš']),
+ ('áģš', &['áģ¸']),
+ ('áģē', &['áģģ']),
+ ('áģģ', &['áģē']),
+ ('áģŧ', &['áģŊ']),
+ ('áģŊ', &['áģŧ']),
+ ('áģž', &['áģŋ']),
+ ('áģŋ', &['áģž']),
+ ('áŧ€', &['áŧˆ']),
+ ('áŧ', &['áŧ‰']),
+ ('áŧ‚', &['áŧŠ']),
+ ('áŧƒ', &['áŧ‹']),
+ ('áŧ„', &['áŧŒ']),
+ ('áŧ…', &['áŧ']),
+ ('áŧ†', &['áŧŽ']),
+ ('áŧ‡', &['áŧ']),
+ ('áŧˆ', &['áŧ€']),
+ ('áŧ‰', &['áŧ']),
+ ('áŧŠ', &['áŧ‚']),
+ ('áŧ‹', &['áŧƒ']),
+ ('áŧŒ', &['áŧ„']),
+ ('áŧ', &['áŧ…']),
+ ('áŧŽ', &['áŧ†']),
+ ('áŧ', &['áŧ‡']),
+ ('áŧ', &['áŧ˜']),
+ ('áŧ‘', &['áŧ™']),
+ ('áŧ’', &['áŧš']),
+ ('áŧ“', &['áŧ›']),
+ ('áŧ”', &['áŧœ']),
+ ('áŧ•', &['áŧ']),
+ ('áŧ˜', &['áŧ']),
+ ('áŧ™', &['áŧ‘']),
+ ('áŧš', &['áŧ’']),
+ ('áŧ›', &['áŧ“']),
+ ('áŧœ', &['áŧ”']),
+ ('áŧ', &['áŧ•']),
+ ('áŧ ', &['áŧ¨']),
+ ('áŧĄ', &['áŧŠ']),
+ ('áŧĸ', &['áŧĒ']),
+ ('áŧŖ', &['áŧĢ']),
+ ('áŧ¤', &['áŧŦ']),
+ ('áŧĨ', &['áŧ­']),
+ ('áŧĻ', &['áŧŽ']),
+ ('áŧ§', &['áŧ¯']),
+ ('áŧ¨', &['áŧ ']),
+ ('áŧŠ', &['áŧĄ']),
+ ('áŧĒ', &['áŧĸ']),
+ ('áŧĢ', &['áŧŖ']),
+ ('áŧŦ', &['áŧ¤']),
+ ('áŧ­', &['áŧĨ']),
+ ('áŧŽ', &['áŧĻ']),
+ ('áŧ¯', &['áŧ§']),
+ ('áŧ°', &['áŧ¸']),
+ ('áŧą', &['áŧš']),
+ ('áŧ˛', &['áŧē']),
+ ('áŧŗ', &['áŧģ']),
+ ('áŧ´', &['áŧŧ']),
+ ('áŧĩ', &['áŧŊ']),
+ ('áŧļ', &['áŧž']),
+ ('áŧˇ', &['áŧŋ']),
+ ('áŧ¸', &['áŧ°']),
+ ('áŧš', &['áŧą']),
+ ('áŧē', &['áŧ˛']),
+ ('áŧģ', &['áŧŗ']),
+ ('áŧŧ', &['áŧ´']),
+ ('áŧŊ', &['áŧĩ']),
+ ('áŧž', &['áŧļ']),
+ ('áŧŋ', &['áŧˇ']),
+ ('áŊ€', &['áŊˆ']),
+ ('áŊ', &['áŊ‰']),
+ ('áŊ‚', &['áŊŠ']),
+ ('áŊƒ', &['áŊ‹']),
+ ('áŊ„', &['áŊŒ']),
+ ('áŊ…', &['áŊ']),
+ ('áŊˆ', &['áŊ€']),
+ ('áŊ‰', &['áŊ']),
+ ('áŊŠ', &['áŊ‚']),
+ ('áŊ‹', &['áŊƒ']),
+ ('áŊŒ', &['áŊ„']),
+ ('áŊ', &['áŊ…']),
+ ('áŊ‘', &['áŊ™']),
+ ('áŊ“', &['áŊ›']),
+ ('áŊ•', &['áŊ']),
+ ('áŊ—', &['áŊŸ']),
+ ('áŊ™', &['áŊ‘']),
+ ('áŊ›', &['áŊ“']),
+ ('áŊ', &['áŊ•']),
+ ('áŊŸ', &['áŊ—']),
+ ('áŊ ', &['áŊ¨']),
+ ('áŊĄ', &['áŊŠ']),
+ ('áŊĸ', &['áŊĒ']),
+ ('áŊŖ', &['áŊĢ']),
+ ('áŊ¤', &['áŊŦ']),
+ ('áŊĨ', &['áŊ­']),
+ ('áŊĻ', &['áŊŽ']),
+ ('áŊ§', &['áŊ¯']),
+ ('áŊ¨', &['áŊ ']),
+ ('áŊŠ', &['áŊĄ']),
+ ('áŊĒ', &['áŊĸ']),
+ ('áŊĢ', &['áŊŖ']),
+ ('áŊŦ', &['áŊ¤']),
+ ('áŊ­', &['áŊĨ']),
+ ('áŊŽ', &['áŊĻ']),
+ ('áŊ¯', &['áŊ§']),
+ ('áŊ°', &['ážē']),
+ ('áŊą', &['ážģ']),
+ ('áŊ˛', &['áŋˆ']),
+ ('áŊŗ', &['áŋ‰']),
+ ('áŊ´', &['áŋŠ']),
+ ('áŊĩ', &['áŋ‹']),
+ ('áŊļ', &['áŋš']),
+ ('áŊˇ', &['áŋ›']),
+ ('áŊ¸', &['áŋ¸']),
+ ('áŊš', &['áŋš']),
+ ('áŊē', &['áŋĒ']),
+ ('áŊģ', &['áŋĢ']),
+ ('áŊŧ', &['áŋē']),
+ ('áŊŊ', &['áŋģ']),
+ ('ᾀ', &['ᾈ']),
+ ('ᾁ', &['ᾉ']),
+ ('ᾂ', &['ᾊ']),
+ ('ᾃ', &['ᾋ']),
+ ('ᾄ', &['ᾌ']),
+ ('ᾅ', &['ᾍ']),
+ ('ᾆ', &['ᾎ']),
+ ('ᾇ', &['ᾏ']),
+ ('ᾈ', &['ᾀ']),
+ ('ᾉ', &['ᾁ']),
+ ('ᾊ', &['ᾂ']),
+ ('ᾋ', &['ᾃ']),
+ ('ᾌ', &['ᾄ']),
+ ('ᾍ', &['ᾅ']),
+ ('ᾎ', &['ᾆ']),
+ ('ᾏ', &['ᾇ']),
+ ('ថ', &['ម']),
+ ('ᾑ', &['ᾙ']),
+ ('ᾒ', &['ᾚ']),
+ ('ᾓ', &['ᾛ']),
+ ('ᾔ', &['ᾜ']),
+ ('ᾕ', &['ᾝ']),
+ ('ᾖ', &['ᾞ']),
+ ('ᾗ', &['ᾟ']),
+ ('ម', &['ថ']),
+ ('ᾙ', &['ᾑ']),
+ ('ᾚ', &['ᾒ']),
+ ('ᾛ', &['ᾓ']),
+ ('ᾜ', &['ᾔ']),
+ ('ᾝ', &['ᾕ']),
+ ('ᾞ', &['ᾖ']),
+ ('ᾟ', &['ᾗ']),
+ ('ហ', &['ឨ']),
+ ('ឥ', &['ដ']),
+ ('ážĸ', &['ážĒ']),
+ ('ážŖ', &['ážĢ']),
+ ('ឤ', &['ážŦ']),
+ ('ážĨ', &['áž­']),
+ ('ážĻ', &['ណ']),
+ ('ឧ', &['ឯ']),
+ ('ឨ', &['ហ']),
+ ('ដ', &['ឥ']),
+ ('ážĒ', &['ážĸ']),
+ ('ážĢ', &['ážŖ']),
+ ('ážŦ', &['ឤ']),
+ ('áž­', &['ážĨ']),
+ ('ណ', &['ážĻ']),
+ ('ឯ', &['ឧ']),
+ ('ឰ', &['ី']),
+ ('ážą', &['ážš']),
+ ('ážŗ', &['ážŧ']),
+ ('ី', &['ឰ']),
+ ('ážš', &['ážą']),
+ ('ážē', &['áŊ°']),
+ ('ážģ', &['áŊą']),
+ ('ážŧ', &['ážŗ']),
+ ('ι', &['\u{345}', 'Ι', 'ι']),
+ ('áŋƒ', &['áŋŒ']),
+ ('áŋˆ', &['áŊ˛']),
+ ('áŋ‰', &['áŊŗ']),
+ ('áŋŠ', &['áŊ´']),
+ ('áŋ‹', &['áŊĩ']),
+ ('áŋŒ', &['áŋƒ']),
+ ('áŋ', &['áŋ˜']),
+ ('áŋ‘', &['áŋ™']),
+ ('áŋ˜', &['áŋ']),
+ ('áŋ™', &['áŋ‘']),
+ ('áŋš', &['áŊļ']),
+ ('áŋ›', &['áŊˇ']),
+ ('áŋ ', &['áŋ¨']),
+ ('áŋĄ', &['áŋŠ']),
+ ('áŋĨ', &['áŋŦ']),
+ ('áŋ¨', &['áŋ ']),
+ ('áŋŠ', &['áŋĄ']),
+ ('áŋĒ', &['áŊē']),
+ ('áŋĢ', &['áŊģ']),
+ ('áŋŦ', &['áŋĨ']),
+ ('áŋŗ', &['áŋŧ']),
+ ('áŋ¸', &['áŊ¸']),
+ ('áŋš', &['áŊš']),
+ ('áŋē', &['áŊŧ']),
+ ('áŋģ', &['áŊŊ']),
+ ('áŋŧ', &['áŋŗ']),
+ ('â„Ļ', &['Ί', 'Ī‰']),
+ ('â„Ē', &['K', 'k']),
+ ('â„Ģ', &['Å', 'ÃĨ']),
+ ('Ⅎ', &['ⅎ']),
+ ('ⅎ', &['Ⅎ']),
+ ('Ⅰ', &['ⅰ']),
+ ('Ⅱ', &['ⅱ']),
+ ('â…ĸ', &['ⅲ']),
+ ('â…Ŗ', &['â…ŗ']),
+ ('Ⅴ', &['ⅴ']),
+ ('â…Ĩ', &['â…ĩ']),
+ ('â…Ļ', &['â…ļ']),
+ ('Ⅷ', &['ⅷ']),
+ ('Ⅸ', &['ⅸ']),
+ ('Ⅹ', &['ⅹ']),
+ ('â…Ē', &['â…ē']),
+ ('â…Ģ', &['â…ģ']),
+ ('â…Ŧ', &['â…ŧ']),
+ ('Ⅽ', &['â…Ŋ']),
+ ('Ⅾ', &['ⅾ']),
+ ('Ⅿ', &['â…ŋ']),
+ ('ⅰ', &['Ⅰ']),
+ ('ⅱ', &['Ⅱ']),
+ ('ⅲ', &['â…ĸ']),
+ ('â…ŗ', &['â…Ŗ']),
+ ('ⅴ', &['Ⅴ']),
+ ('â…ĩ', &['â…Ĩ']),
+ ('â…ļ', &['â…Ļ']),
+ ('ⅷ', &['Ⅷ']),
+ ('ⅸ', &['Ⅸ']),
+ ('ⅹ', &['Ⅹ']),
+ ('â…ē', &['â…Ē']),
+ ('â…ģ', &['â…Ģ']),
+ ('â…ŧ', &['â…Ŧ']),
+ ('â…Ŋ', &['Ⅽ']),
+ ('ⅾ', &['Ⅾ']),
+ ('â…ŋ', &['Ⅿ']),
+ ('Ↄ', &['ↄ']),
+ ('ↄ', &['Ↄ']),
+ ('â’ļ', &['ⓐ']),
+ ('Ⓑ', &['ⓑ']),
+ ('Ⓒ', &['ⓒ']),
+ ('Ⓓ', &['ⓓ']),
+ ('â’ē', &['ⓔ']),
+ ('â’ģ', &['ⓕ']),
+ ('â’ŧ', &['ⓖ']),
+ ('â’Ŋ', &['ⓗ']),
+ ('Ⓘ', &['ⓘ']),
+ ('â’ŋ', &['ⓙ']),
+ ('Ⓚ', &['ⓚ']),
+ ('Ⓛ', &['ⓛ']),
+ ('Ⓜ', &['ⓜ']),
+ ('Ⓝ', &['ⓝ']),
+ ('Ⓞ', &['ⓞ']),
+ ('Ⓟ', &['ⓟ']),
+ ('Ⓠ', &['ⓠ']),
+ ('Ⓡ', &['ⓡ']),
+ ('Ⓢ', &['â“ĸ']),
+ ('Ⓣ', &['â“Ŗ']),
+ ('Ⓤ', &['ⓤ']),
+ ('Ⓥ', &['â“Ĩ']),
+ ('Ⓦ', &['â“Ļ']),
+ ('Ⓧ', &['ⓧ']),
+ ('Ⓨ', &['ⓨ']),
+ ('Ⓩ', &['ⓩ']),
+ ('ⓐ', &['â’ļ']),
+ ('ⓑ', &['Ⓑ']),
+ ('ⓒ', &['Ⓒ']),
+ ('ⓓ', &['Ⓓ']),
+ ('ⓔ', &['â’ē']),
+ ('ⓕ', &['â’ģ']),
+ ('ⓖ', &['â’ŧ']),
+ ('ⓗ', &['â’Ŋ']),
+ ('ⓘ', &['Ⓘ']),
+ ('ⓙ', &['â’ŋ']),
+ ('ⓚ', &['Ⓚ']),
+ ('ⓛ', &['Ⓛ']),
+ ('ⓜ', &['Ⓜ']),
+ ('ⓝ', &['Ⓝ']),
+ ('ⓞ', &['Ⓞ']),
+ ('ⓟ', &['Ⓟ']),
+ ('ⓠ', &['Ⓠ']),
+ ('ⓡ', &['Ⓡ']),
+ ('â“ĸ', &['Ⓢ']),
+ ('â“Ŗ', &['Ⓣ']),
+ ('ⓤ', &['Ⓤ']),
+ ('â“Ĩ', &['Ⓥ']),
+ ('â“Ļ', &['Ⓦ']),
+ ('ⓧ', &['Ⓧ']),
+ ('ⓨ', &['Ⓨ']),
+ ('ⓩ', &['Ⓩ']),
+ ('Ⰰ', &['ⰰ']),
+ ('Ⰱ', &['ⰹ']),
+ ('Ⰲ', &['ⰲ']),
+ ('Ⰳ', &['â°ŗ']),
+ ('Ⰴ', &['ⰴ']),
+ ('Ⰵ', &['â°ĩ']),
+ ('Ⰶ', &['â°ļ']),
+ ('Ⰷ', &['ⰷ']),
+ ('Ⰸ', &['ⰸ']),
+ ('Ⰹ', &['ⰹ']),
+ ('Ⰺ', &['â°ē']),
+ ('Ⰻ', &['â°ģ']),
+ ('Ⰼ', &['â°ŧ']),
+ ('Ⰽ', &['â°Ŋ']),
+ ('Ⰾ', &['ⰾ']),
+ ('Ⰿ', &['â°ŋ']),
+ ('Ⱀ', &['ⱀ']),
+ ('Ⱁ', &['ⱁ']),
+ ('Ⱂ', &['ⱂ']),
+ ('Ⱃ', &['ⱃ']),
+ ('Ⱄ', &['ⱄ']),
+ ('Ⱅ', &['ⱅ']),
+ ('Ⱆ', &['ⱆ']),
+ ('Ⱇ', &['ⱇ']),
+ ('Ⱈ', &['⹈']),
+ ('Ⱉ', &['ⱉ']),
+ ('Ⱊ', &['ⱊ']),
+ ('Ⱋ', &['ⱋ']),
+ ('Ⱌ', &['ⱌ']),
+ ('Ⱍ', &['ⱍ']),
+ ('Ⱎ', &['ⱎ']),
+ ('Ⱏ', &['ⱏ']),
+ ('Ⱐ', &['⹐']),
+ ('Ⱑ', &['ⱑ']),
+ ('â°ĸ', &['ⱒ']),
+ ('â°Ŗ', &['ⱓ']),
+ ('Ⱔ', &['ⱔ']),
+ ('â°Ĩ', &['ⱕ']),
+ ('â°Ļ', &['ⱖ']),
+ ('Ⱗ', &['ⱗ']),
+ ('Ⱘ', &['⹘']),
+ ('Ⱙ', &['ⱙ']),
+ ('â°Ē', &['ⱚ']),
+ ('â°Ģ', &['ⱛ']),
+ ('â°Ŧ', &['ⱜ']),
+ ('Ⱝ', &['ⱝ']),
+ ('Ⱞ', &['ⱞ']),
+ ('ⰰ', &['Ⰰ']),
+ ('ⰹ', &['Ⰱ']),
+ ('ⰲ', &['Ⰲ']),
+ ('â°ŗ', &['Ⰳ']),
+ ('ⰴ', &['Ⰴ']),
+ ('â°ĩ', &['Ⰵ']),
+ ('â°ļ', &['Ⰶ']),
+ ('ⰷ', &['Ⰷ']),
+ ('ⰸ', &['Ⰸ']),
+ ('ⰹ', &['Ⰹ']),
+ ('â°ē', &['Ⰺ']),
+ ('â°ģ', &['Ⰻ']),
+ ('â°ŧ', &['Ⰼ']),
+ ('â°Ŋ', &['Ⰽ']),
+ ('ⰾ', &['Ⰾ']),
+ ('â°ŋ', &['Ⰿ']),
+ ('ⱀ', &['Ⱀ']),
+ ('ⱁ', &['Ⱁ']),
+ ('ⱂ', &['Ⱂ']),
+ ('ⱃ', &['Ⱃ']),
+ ('ⱄ', &['Ⱄ']),
+ ('ⱅ', &['Ⱅ']),
+ ('ⱆ', &['Ⱆ']),
+ ('ⱇ', &['Ⱇ']),
+ ('⹈', &['Ⱈ']),
+ ('ⱉ', &['Ⱉ']),
+ ('ⱊ', &['Ⱊ']),
+ ('ⱋ', &['Ⱋ']),
+ ('ⱌ', &['Ⱌ']),
+ ('ⱍ', &['Ⱍ']),
+ ('ⱎ', &['Ⱎ']),
+ ('ⱏ', &['Ⱏ']),
+ ('⹐', &['Ⱐ']),
+ ('ⱑ', &['Ⱑ']),
+ ('ⱒ', &['â°ĸ']),
+ ('ⱓ', &['â°Ŗ']),
+ ('ⱔ', &['Ⱔ']),
+ ('ⱕ', &['â°Ĩ']),
+ ('ⱖ', &['â°Ļ']),
+ ('ⱗ', &['Ⱗ']),
+ ('⹘', &['Ⱘ']),
+ ('ⱙ', &['Ⱙ']),
+ ('ⱚ', &['â°Ē']),
+ ('ⱛ', &['â°Ģ']),
+ ('ⱜ', &['â°Ŧ']),
+ ('ⱝ', &['Ⱝ']),
+ ('ⱞ', &['Ⱞ']),
+ ('⹠', &['⹥']),
+ ('⹥', &['⹠']),
+ ('âąĸ', &['ÉĢ']),
+ ('âąŖ', &['áĩŊ']),
+ ('⹤', &['ÉŊ']),
+ ('âąĨ', &['Čē']),
+ ('âąĻ', &['Čž']),
+ ('⹧', &['⹨']),
+ ('⹨', &['⹧']),
+ ('⹊', &['âąĒ']),
+ ('âąĒ', &['⹊']),
+ ('âąĢ', &['âąŦ']),
+ ('âąŦ', &['âąĢ']),
+ ('Ɑ', &['ɑ']),
+ ('⹎', &['ɹ']),
+ ('âą¯', &['ɐ']),
+ ('Ɒ', &['ɒ']),
+ ('⹲', &['âąŗ']),
+ ('âąŗ', &['⹲']),
+ ('âąĩ', &['âąļ']),
+ ('âąļ', &['âąĩ']),
+ ('âąž', &['Čŋ']),
+ ('âąŋ', &['ɀ']),
+ ('Ⲁ', &['ⲁ']),
+ ('ⲁ', &['Ⲁ']),
+ ('Ⲃ', &['ⲃ']),
+ ('ⲃ', &['Ⲃ']),
+ ('Ⲅ', &['ⲅ']),
+ ('ⲅ', &['Ⲅ']),
+ ('Ⲇ', &['ⲇ']),
+ ('ⲇ', &['Ⲇ']),
+ ('Ⲉ', &['ⲉ']),
+ ('ⲉ', &['Ⲉ']),
+ ('Ⲋ', &['ⲋ']),
+ ('ⲋ', &['Ⲋ']),
+ ('Ⲍ', &['ⲍ']),
+ ('ⲍ', &['Ⲍ']),
+ ('Ⲏ', &['ⲏ']),
+ ('ⲏ', &['Ⲏ']),
+ ('Ⲑ', &['ⲑ']),
+ ('ⲑ', &['Ⲑ']),
+ ('Ⲓ', &['ⲓ']),
+ ('ⲓ', &['Ⲓ']),
+ ('Ⲕ', &['ⲕ']),
+ ('ⲕ', &['Ⲕ']),
+ ('Ⲗ', &['ⲗ']),
+ ('ⲗ', &['Ⲗ']),
+ ('Ⲙ', &['ⲙ']),
+ ('ⲙ', &['Ⲙ']),
+ ('Ⲛ', &['ⲛ']),
+ ('ⲛ', &['Ⲛ']),
+ ('Ⲝ', &['ⲝ']),
+ ('ⲝ', &['Ⲝ']),
+ ('Ⲟ', &['ⲟ']),
+ ('ⲟ', &['Ⲟ']),
+ ('Ⲡ', &['ⲥ']),
+ ('ⲥ', &['Ⲡ']),
+ ('â˛ĸ', &['â˛Ŗ']),
+ ('â˛Ŗ', &['â˛ĸ']),
+ ('Ⲥ', &['â˛Ĩ']),
+ ('â˛Ĩ', &['Ⲥ']),
+ ('â˛Ļ', &['ⲧ']),
+ ('ⲧ', &['â˛Ļ']),
+ ('Ⲩ', &['Ⲋ']),
+ ('Ⲋ', &['Ⲩ']),
+ ('â˛Ē', &['â˛Ģ']),
+ ('â˛Ģ', &['â˛Ē']),
+ ('â˛Ŧ', &['ⲭ']),
+ ('ⲭ', &['â˛Ŧ']),
+ ('Ⲏ', &['â˛¯']),
+ ('â˛¯', &['Ⲏ']),
+ ('Ⲱ', &['ⲹ']),
+ ('ⲹ', &['Ⲱ']),
+ ('Ⲳ', &['â˛ŗ']),
+ ('â˛ŗ', &['Ⲳ']),
+ ('Ⲵ', &['â˛ĩ']),
+ ('â˛ĩ', &['Ⲵ']),
+ ('â˛ļ', &['ⲡ']),
+ ('ⲡ', &['â˛ļ']),
+ ('Ⲹ', &['Ⲛ']),
+ ('Ⲛ', &['Ⲹ']),
+ ('â˛ē', &['â˛ģ']),
+ ('â˛ģ', &['â˛ē']),
+ ('â˛ŧ', &['â˛Ŋ']),
+ ('â˛Ŋ', &['â˛ŧ']),
+ ('Ⲟ', &['â˛ŋ']),
+ ('â˛ŋ', &['Ⲟ']),
+ ('âŗ€', &['âŗ']),
+ ('âŗ', &['âŗ€']),
+ ('âŗ‚', &['âŗƒ']),
+ ('âŗƒ', &['âŗ‚']),
+ ('âŗ„', &['âŗ…']),
+ ('âŗ…', &['âŗ„']),
+ ('âŗ†', &['âŗ‡']),
+ ('âŗ‡', &['âŗ†']),
+ ('âŗˆ', &['âŗ‰']),
+ ('âŗ‰', &['âŗˆ']),
+ ('âŗŠ', &['âŗ‹']),
+ ('âŗ‹', &['âŗŠ']),
+ ('âŗŒ', &['âŗ']),
+ ('âŗ', &['âŗŒ']),
+ ('âŗŽ', &['âŗ']),
+ ('âŗ', &['âŗŽ']),
+ ('âŗ', &['âŗ‘']),
+ ('âŗ‘', &['âŗ']),
+ ('âŗ’', &['âŗ“']),
+ ('âŗ“', &['âŗ’']),
+ ('âŗ”', &['âŗ•']),
+ ('âŗ•', &['âŗ”']),
+ ('âŗ–', &['âŗ—']),
+ ('âŗ—', &['âŗ–']),
+ ('âŗ˜', &['âŗ™']),
+ ('âŗ™', &['âŗ˜']),
+ ('âŗš', &['âŗ›']),
+ ('âŗ›', &['âŗš']),
+ ('âŗœ', &['âŗ']),
+ ('âŗ', &['âŗœ']),
+ ('âŗž', &['âŗŸ']),
+ ('âŗŸ', &['âŗž']),
+ ('âŗ ', &['âŗĄ']),
+ ('âŗĄ', &['âŗ ']),
+ ('âŗĸ', &['âŗŖ']),
+ ('âŗŖ', &['âŗĸ']),
+ ('âŗĢ', &['âŗŦ']),
+ ('âŗŦ', &['âŗĢ']),
+ ('âŗ­', &['âŗŽ']),
+ ('âŗŽ', &['âŗ­']),
+ ('âŗ˛', &['âŗŗ']),
+ ('âŗŗ', &['âŗ˛']),
+ ('ⴀ', &['Ⴀ']),
+ ('ⴁ', &['Ⴁ']),
+ ('ⴂ', &['á‚ĸ']),
+ ('ⴃ', &['á‚Ŗ']),
+ ('ⴄ', &['Ⴄ']),
+ ('ⴅ', &['á‚Ĩ']),
+ ('ⴆ', &['á‚Ļ']),
+ ('ⴇ', &['Ⴇ']),
+ ('ⴈ', &['Ⴈ']),
+ ('ⴉ', &['Ⴉ']),
+ ('ⴊ', &['á‚Ē']),
+ ('ⴋ', &['á‚Ģ']),
+ ('ⴌ', &['á‚Ŧ']),
+ ('ⴍ', &['Ⴍ']),
+ ('ⴎ', &['Ⴎ']),
+ ('ⴏ', &['Ⴏ']),
+ ('ⴐ', &['Ⴐ']),
+ ('ⴑ', &['Ⴑ']),
+ ('ⴒ', &['Ⴒ']),
+ ('ⴓ', &['á‚ŗ']),
+ ('ⴔ', &['Ⴔ']),
+ ('ⴕ', &['á‚ĩ']),
+ ('ⴖ', &['á‚ļ']),
+ ('ⴗ', &['Ⴗ']),
+ ('ⴘ', &['Ⴘ']),
+ ('ⴙ', &['Ⴙ']),
+ ('ⴚ', &['á‚ē']),
+ ('ⴛ', &['á‚ģ']),
+ ('ⴜ', &['á‚ŧ']),
+ ('ⴝ', &['á‚Ŋ']),
+ ('ⴞ', &['Ⴞ']),
+ ('ⴟ', &['á‚ŋ']),
+ ('ⴠ', &['Ⴠ']),
+ ('ⴥ', &['Ⴡ']),
+ ('â´ĸ', &['Ⴢ']),
+ ('â´Ŗ', &['Ⴣ']),
+ ('ⴤ', &['Ⴤ']),
+ ('â´Ĩ', &['Ⴥ']),
+ ('ⴧ', &['Ⴧ']),
+ ('ⴭ', &['Ⴭ']),
+ ('Ꙁ', &['ꙁ']),
+ ('ꙁ', &['Ꙁ']),
+ ('Ꙃ', &['ꙃ']),
+ ('ꙃ', &['Ꙃ']),
+ ('Ꙅ', &['ꙅ']),
+ ('ꙅ', &['Ꙅ']),
+ ('Ꙇ', &['ꙇ']),
+ ('ꙇ', &['Ꙇ']),
+ ('Ꙉ', &['ꙉ']),
+ ('ꙉ', &['Ꙉ']),
+ ('Ꙋ', &['ᲈ', 'ꙋ']),
+ ('ꙋ', &['ᲈ', 'Ꙋ']),
+ ('Ꙍ', &['ꙍ']),
+ ('ꙍ', &['Ꙍ']),
+ ('Ꙏ', &['ꙏ']),
+ ('ꙏ', &['Ꙏ']),
+ ('Ꙑ', &['ꙑ']),
+ ('ꙑ', &['Ꙑ']),
+ ('Ꙓ', &['ꙓ']),
+ ('ꙓ', &['Ꙓ']),
+ ('Ꙕ', &['ꙕ']),
+ ('ꙕ', &['Ꙕ']),
+ ('Ꙗ', &['ꙗ']),
+ ('ꙗ', &['Ꙗ']),
+ ('Ꙙ', &['ꙙ']),
+ ('ꙙ', &['Ꙙ']),
+ ('Ꙛ', &['ꙛ']),
+ ('ꙛ', &['Ꙛ']),
+ ('Ꙝ', &['ꙝ']),
+ ('ꙝ', &['Ꙝ']),
+ ('Ꙟ', &['ꙟ']),
+ ('ꙟ', &['Ꙟ']),
+ ('Ꙡ', &['ꙡ']),
+ ('ꙡ', &['Ꙡ']),
+ ('ę™ĸ', &['ę™Ŗ']),
+ ('ę™Ŗ', &['ę™ĸ']),
+ ('Ꙥ', &['ę™Ĩ']),
+ ('ę™Ĩ', &['Ꙥ']),
+ ('ę™Ļ', &['ꙧ']),
+ ('ꙧ', &['ę™Ļ']),
+ ('Ꙩ', &['ꙩ']),
+ ('ꙩ', &['Ꙩ']),
+ ('ę™Ē', &['ę™Ģ']),
+ ('ę™Ģ', &['ę™Ē']),
+ ('ę™Ŧ', &['ꙭ']),
+ ('ꙭ', &['ę™Ŧ']),
+ ('Ꚁ', &['ꚁ']),
+ ('ꚁ', &['Ꚁ']),
+ ('Ꚃ', &['ꚃ']),
+ ('ꚃ', &['Ꚃ']),
+ ('Ꚅ', &['ꚅ']),
+ ('ꚅ', &['Ꚅ']),
+ ('Ꚇ', &['ꚇ']),
+ ('ꚇ', &['Ꚇ']),
+ ('Ꚉ', &['ꚉ']),
+ ('ꚉ', &['Ꚉ']),
+ ('Ꚋ', &['ꚋ']),
+ ('ꚋ', &['Ꚋ']),
+ ('Ꚍ', &['ꚍ']),
+ ('ꚍ', &['Ꚍ']),
+ ('Ꚏ', &['ꚏ']),
+ ('ꚏ', &['Ꚏ']),
+ ('Ꚑ', &['ꚑ']),
+ ('ꚑ', &['Ꚑ']),
+ ('Ꚓ', &['ꚓ']),
+ ('ꚓ', &['Ꚓ']),
+ ('Ꚕ', &['ꚕ']),
+ ('ꚕ', &['Ꚕ']),
+ ('Ꚗ', &['ꚗ']),
+ ('ꚗ', &['Ꚗ']),
+ ('Ꚙ', &['ꚙ']),
+ ('ꚙ', &['Ꚙ']),
+ ('Ꚛ', &['ꚛ']),
+ ('ꚛ', &['Ꚛ']),
+ ('ęœĸ', &['ęœŖ']),
+ ('ęœŖ', &['ęœĸ']),
+ ('Ꜥ', &['ęœĨ']),
+ ('ęœĨ', &['Ꜥ']),
+ ('ęœĻ', &['ꜧ']),
+ ('ꜧ', &['ęœĻ']),
+ ('Ꜩ', &['ꜩ']),
+ ('ꜩ', &['Ꜩ']),
+ ('ęœĒ', &['ęœĢ']),
+ ('ęœĢ', &['ęœĒ']),
+ ('ęœŦ', &['ꜭ']),
+ ('ꜭ', &['ęœŦ']),
+ ('Ꜯ', &['ęœ¯']),
+ ('ęœ¯', &['Ꜯ']),
+ ('Ꜳ', &['ęœŗ']),
+ ('ęœŗ', &['Ꜳ']),
+ ('Ꜵ', &['ęœĩ']),
+ ('ęœĩ', &['Ꜵ']),
+ ('ęœļ', &['ꜷ']),
+ ('ꜷ', &['ęœļ']),
+ ('Ꜹ', &['ꜹ']),
+ ('ꜹ', &['Ꜹ']),
+ ('ęœē', &['ęœģ']),
+ ('ęœģ', &['ęœē']),
+ ('ęœŧ', &['ęœŊ']),
+ ('ęœŊ', &['ęœŧ']),
+ ('Ꜿ', &['ęœŋ']),
+ ('ęœŋ', &['Ꜿ']),
+ ('Ꝁ', &['ꝁ']),
+ ('ꝁ', &['Ꝁ']),
+ ('Ꝃ', &['ꝃ']),
+ ('ꝃ', &['Ꝃ']),
+ ('Ꝅ', &['ꝅ']),
+ ('ꝅ', &['Ꝅ']),
+ ('Ꝇ', &['ꝇ']),
+ ('ꝇ', &['Ꝇ']),
+ ('Ꝉ', &['ꝉ']),
+ ('ꝉ', &['Ꝉ']),
+ ('Ꝋ', &['ꝋ']),
+ ('ꝋ', &['Ꝋ']),
+ ('Ꝍ', &['ꝍ']),
+ ('ꝍ', &['Ꝍ']),
+ ('Ꝏ', &['ꝏ']),
+ ('ꝏ', &['Ꝏ']),
+ ('Ꝑ', &['ꝑ']),
+ ('ꝑ', &['Ꝑ']),
+ ('Ꝓ', &['ꝓ']),
+ ('ꝓ', &['Ꝓ']),
+ ('Ꝕ', &['ꝕ']),
+ ('ꝕ', &['Ꝕ']),
+ ('Ꝗ', &['ꝗ']),
+ ('ꝗ', &['Ꝗ']),
+ ('Ꝙ', &['ꝙ']),
+ ('ꝙ', &['Ꝙ']),
+ ('Ꝛ', &['ꝛ']),
+ ('ꝛ', &['Ꝛ']),
+ ('Ꝝ', &['ꝝ']),
+ ('ꝝ', &['Ꝝ']),
+ ('Ꝟ', &['ꝟ']),
+ ('ꝟ', &['Ꝟ']),
+ ('Ꝡ', &['ꝡ']),
+ ('ꝡ', &['Ꝡ']),
+ ('ęĸ', &['ęŖ']),
+ ('ęŖ', &['ęĸ']),
+ ('Ꝥ', &['ęĨ']),
+ ('ęĨ', &['Ꝥ']),
+ ('ęĻ', &['ꝧ']),
+ ('ꝧ', &['ęĻ']),
+ ('Ꝩ', &['ꝩ']),
+ ('ꝩ', &['Ꝩ']),
+ ('ęĒ', &['ęĢ']),
+ ('ęĢ', &['ęĒ']),
+ ('ęŦ', &['ꝭ']),
+ ('ꝭ', &['ęŦ']),
+ ('Ꝯ', &['ę¯']),
+ ('ę¯', &['Ꝯ']),
+ ('Ꝺ', &['ęē']),
+ ('ęē', &['Ꝺ']),
+ ('ęģ', &['ęŧ']),
+ ('ęŧ', &['ęģ']),
+ ('ęŊ', &['áĩš']),
+ ('Ꝿ', &['ęŋ']),
+ ('ęŋ', &['Ꝿ']),
+ ('Ꞁ', &['ꞁ']),
+ ('ꞁ', &['Ꞁ']),
+ ('Ꞃ', &['ꞃ']),
+ ('ꞃ', &['Ꞃ']),
+ ('Ꞅ', &['ꞅ']),
+ ('ꞅ', &['Ꞅ']),
+ ('Ꞇ', &['ꞇ']),
+ ('ꞇ', &['Ꞇ']),
+ ('Ꞌ', &['ꞌ']),
+ ('ꞌ', &['Ꞌ']),
+ ('Ɥ', &['ÉĨ']),
+ ('Ꞑ', &['ꞑ']),
+ ('ꞑ', &['Ꞑ']),
+ ('Ꞓ', &['ꞓ']),
+ ('ꞓ', &['Ꞓ']),
+ ('ꞔ', &['Ꞔ']),
+ ('Ꞗ', &['ꞗ']),
+ ('ꞗ', &['Ꞗ']),
+ ('Ꞙ', &['ꞙ']),
+ ('ꞙ', &['Ꞙ']),
+ ('Ꞛ', &['ꞛ']),
+ ('ꞛ', &['Ꞛ']),
+ ('Ꞝ', &['ꞝ']),
+ ('ꞝ', &['Ꞝ']),
+ ('Ꞟ', &['ꞟ']),
+ ('ꞟ', &['Ꞟ']),
+ ('Ꞡ', &['ꞡ']),
+ ('ꞡ', &['Ꞡ']),
+ ('ęžĸ', &['ęžŖ']),
+ ('ęžŖ', &['ęžĸ']),
+ ('Ꞥ', &['ęžĨ']),
+ ('ęžĨ', &['Ꞥ']),
+ ('ęžĻ', &['ꞧ']),
+ ('ꞧ', &['ęžĻ']),
+ ('Ꞩ', &['ꞩ']),
+ ('ꞩ', &['Ꞩ']),
+ ('ęžĒ', &['ÉĻ']),
+ ('ęžĢ', &['ɜ']),
+ ('ęžŦ', &['ÉĄ']),
+ ('Ɬ', &['ÉŦ']),
+ ('Ɪ', &['ÉĒ']),
+ ('Ʞ', &['ʞ']),
+ ('Ʇ', &['ʇ']),
+ ('Ʝ', &['ʝ']),
+ ('ęžŗ', &['ꭓ']),
+ ('Ꞵ', &['ęžĩ']),
+ ('ęžĩ', &['Ꞵ']),
+ ('ęžļ', &['ꞷ']),
+ ('ꞷ', &['ęžļ']),
+ ('Ꞹ', &['ꞹ']),
+ ('ꞹ', &['Ꞹ']),
+ ('ęžē', &['ęžģ']),
+ ('ęžģ', &['ęžē']),
+ ('ęžŧ', &['ęžŊ']),
+ ('ęžŊ', &['ęžŧ']),
+ ('Ꞿ', &['ęžŋ']),
+ ('ęžŋ', &['Ꞿ']),
+ ('Ꟃ', &['ꟃ']),
+ ('ꟃ', &['Ꟃ']),
+ ('Ꞔ', &['ꞔ']),
+ ('Ʂ', &['ʂ']),
+ ('Ᶎ', &['áļŽ']),
+ ('\u{a7c7}', &['\u{a7c8}']),
+ ('\u{a7c8}', &['\u{a7c7}']),
+ ('\u{a7c9}', &['\u{a7ca}']),
+ ('\u{a7ca}', &['\u{a7c9}']),
+ ('\u{a7f5}', &['\u{a7f6}']),
+ ('\u{a7f6}', &['\u{a7f5}']),
+ ('ꭓ', &['ęžŗ']),
+ ('ꭰ', &['Ꭰ']),
+ ('ꭱ', &['Ꭱ']),
+ ('ę­˛', &['áŽĸ']),
+ ('ę­ŗ', &['áŽŖ']),
+ ('ꭴ', &['Ꭴ']),
+ ('ę­ĩ', &['áŽĨ']),
+ ('ę­ļ', &['áŽĻ']),
+ ('ꭷ', &['Ꭷ']),
+ ('ꭸ', &['Ꭸ']),
+ ('ꭹ', &['Ꭹ']),
+ ('ę­ē', &['áŽĒ']),
+ ('ę­ģ', &['áŽĢ']),
+ ('ę­ŧ', &['áŽŦ']),
+ ('ę­Ŋ', &['Ꭽ']),
+ ('ꭾ', &['Ꭾ']),
+ ('ę­ŋ', &['Ꭿ']),
+ ('ꮀ', &['Ꮀ']),
+ ('ꮁ', &['Ꮁ']),
+ ('ꮂ', &['Ꮂ']),
+ ('ꎃ', &['áŽŗ']),
+ ('ꮄ', &['Ꮄ']),
+ ('ꮅ', &['áŽĩ']),
+ ('ꮆ', &['áŽļ']),
+ ('ꮇ', &['Ꮇ']),
+ ('ꮈ', &['Ꮈ']),
+ ('ꮉ', &['Ꮉ']),
+ ('ꮊ', &['áŽē']),
+ ('ꮋ', &['áŽģ']),
+ ('ꮌ', &['áŽŧ']),
+ ('ꮍ', &['áŽŊ']),
+ ('ꮎ', &['Ꮎ']),
+ ('ꮏ', &['áŽŋ']),
+ ('ꮐ', &['Ꮐ']),
+ ('ꮑ', &['Ꮑ']),
+ ('ꮒ', &['Ꮒ']),
+ ('ꮓ', &['Ꮓ']),
+ ('ꮔ', &['Ꮔ']),
+ ('ꮕ', &['Ꮕ']),
+ ('ꮖ', &['Ꮖ']),
+ ('ꮗ', &['Ꮗ']),
+ ('ꮘ', &['Ꮘ']),
+ ('ꮙ', &['Ꮙ']),
+ ('ꮚ', &['Ꮚ']),
+ ('ꮛ', &['Ꮛ']),
+ ('ꮜ', &['Ꮜ']),
+ ('ꮝ', &['Ꮝ']),
+ ('ꮞ', &['Ꮞ']),
+ ('ꮟ', &['Ꮟ']),
+ ('ꮠ', &['Ꮠ']),
+ ('ꮡ', &['Ꮡ']),
+ ('ęŽĸ', &['Ꮢ']),
+ ('ęŽŖ', &['Ꮣ']),
+ ('ꮤ', &['Ꮤ']),
+ ('ęŽĨ', &['Ꮥ']),
+ ('ęŽĻ', &['Ꮦ']),
+ ('ꮧ', &['Ꮧ']),
+ ('ꮨ', &['Ꮨ']),
+ ('ꮩ', &['Ꮩ']),
+ ('ęŽĒ', &['Ꮪ']),
+ ('ęŽĢ', &['Ꮫ']),
+ ('ęŽŦ', &['Ꮬ']),
+ ('ꮭ', &['Ꮭ']),
+ ('ꮮ', &['Ꮮ']),
+ ('ęŽ¯', &['Ꮯ']),
+ ('ꮰ', &['Ꮰ']),
+ ('ꮱ', &['Ꮱ']),
+ ('ꎲ', &['áĸ']),
+ ('ęŽŗ', &['áŖ']),
+ ('ꮴ', &['Ꮴ']),
+ ('ęŽĩ', &['áĨ']),
+ ('ęŽļ', &['áĻ']),
+ ('ꮷ', &['Ꮷ']),
+ ('ꮸ', &['Ꮸ']),
+ ('ꮹ', &['Ꮹ']),
+ ('ęŽē', &['áĒ']),
+ ('ęŽģ', &['áĢ']),
+ ('ęŽŧ', &['áŦ']),
+ ('ęŽŊ', &['Ꮽ']),
+ ('ꮾ', &['Ꮾ']),
+ ('ęŽŋ', &['Ꮿ']),
+ ('īŧĄ', &['īŊ']),
+ ('īŧĸ', &['īŊ‚']),
+ ('īŧŖ', &['īŊƒ']),
+ ('īŧ¤', &['īŊ„']),
+ ('īŧĨ', &['īŊ…']),
+ ('īŧĻ', &['īŊ†']),
+ ('īŧ§', &['īŊ‡']),
+ ('īŧ¨', &['īŊˆ']),
+ ('īŧŠ', &['īŊ‰']),
+ ('īŧĒ', &['īŊŠ']),
+ ('īŧĢ', &['īŊ‹']),
+ ('īŧŦ', &['īŊŒ']),
+ ('īŧ­', &['īŊ']),
+ ('īŧŽ', &['īŊŽ']),
+ ('īŧ¯', &['īŊ']),
+ ('īŧ°', &['īŊ']),
+ ('īŧą', &['īŊ‘']),
+ ('īŧ˛', &['īŊ’']),
+ ('īŧŗ', &['īŊ“']),
+ ('īŧ´', &['īŊ”']),
+ ('īŧĩ', &['īŊ•']),
+ ('īŧļ', &['īŊ–']),
+ ('īŧˇ', &['īŊ—']),
+ ('īŧ¸', &['īŊ˜']),
+ ('īŧš', &['īŊ™']),
+ ('īŧē', &['īŊš']),
+ ('īŊ', &['īŧĄ']),
+ ('īŊ‚', &['īŧĸ']),
+ ('īŊƒ', &['īŧŖ']),
+ ('īŊ„', &['īŧ¤']),
+ ('īŊ…', &['īŧĨ']),
+ ('īŊ†', &['īŧĻ']),
+ ('īŊ‡', &['īŧ§']),
+ ('īŊˆ', &['īŧ¨']),
+ ('īŊ‰', &['īŧŠ']),
+ ('īŊŠ', &['īŧĒ']),
+ ('īŊ‹', &['īŧĢ']),
+ ('īŊŒ', &['īŧŦ']),
+ ('īŊ', &['īŧ­']),
+ ('īŊŽ', &['īŧŽ']),
+ ('īŊ', &['īŧ¯']),
+ ('īŊ', &['īŧ°']),
+ ('īŊ‘', &['īŧą']),
+ ('īŊ’', &['īŧ˛']),
+ ('īŊ“', &['īŧŗ']),
+ ('īŊ”', &['īŧ´']),
+ ('īŊ•', &['īŧĩ']),
+ ('īŊ–', &['īŧļ']),
+ ('īŊ—', &['īŧˇ']),
+ ('īŊ˜', &['īŧ¸']),
+ ('īŊ™', &['īŧš']),
+ ('īŊš', &['īŧē']),
+ ('𐐀', &['𐐨']),
+ ('𐐁', &['𐐊']),
+ ('𐐂', &['đĒ']),
+ ('𐐃', &['đĢ']),
+ ('𐐄', &['đŦ']),
+ ('𐐅', &['𐐭']),
+ ('𐐆', &['𐐮']),
+ ('𐐇', &['đ¯']),
+ ('𐐈', &['𐐰']),
+ ('𐐉', &['𐐱']),
+ ('𐐊', &['𐐲']),
+ ('𐐋', &['đŗ']),
+ ('𐐌', &['𐐴']),
+ ('𐐍', &['đĩ']),
+ ('𐐎', &['đļ']),
+ ('𐐏', &['𐐷']),
+ ('𐐐', &['𐐸']),
+ ('𐐑', &['𐐹']),
+ ('𐐒', &['đē']),
+ ('𐐓', &['đģ']),
+ ('𐐔', &['đŧ']),
+ ('𐐕', &['đŊ']),
+ ('𐐖', &['𐐾']),
+ ('𐐗', &['đŋ']),
+ ('𐐘', &['𐑀']),
+ ('𐐙', &['𐑁']),
+ ('𐐚', &['𐑂']),
+ ('𐐛', &['𐑃']),
+ ('𐐜', &['𐑄']),
+ ('𐐝', &['𐑅']),
+ ('𐐞', &['𐑆']),
+ ('𐐟', &['𐑇']),
+ ('𐐠', &['𐑈']),
+ ('𐐡', &['𐑉']),
+ ('đĸ', &['𐑊']),
+ ('đŖ', &['𐑋']),
+ ('𐐤', &['𐑌']),
+ ('đĨ', &['𐑍']),
+ ('đĻ', &['𐑎']),
+ ('𐐧', &['𐑏']),
+ ('𐐨', &['𐐀']),
+ ('𐐊', &['𐐁']),
+ ('đĒ', &['𐐂']),
+ ('đĢ', &['𐐃']),
+ ('đŦ', &['𐐄']),
+ ('𐐭', &['𐐅']),
+ ('𐐮', &['𐐆']),
+ ('đ¯', &['𐐇']),
+ ('𐐰', &['𐐈']),
+ ('𐐱', &['𐐉']),
+ ('𐐲', &['𐐊']),
+ ('đŗ', &['𐐋']),
+ ('𐐴', &['𐐌']),
+ ('đĩ', &['𐐍']),
+ ('đļ', &['𐐎']),
+ ('𐐷', &['𐐏']),
+ ('𐐸', &['𐐐']),
+ ('𐐹', &['𐐑']),
+ ('đē', &['𐐒']),
+ ('đģ', &['𐐓']),
+ ('đŧ', &['𐐔']),
+ ('đŊ', &['𐐕']),
+ ('𐐾', &['𐐖']),
+ ('đŋ', &['𐐗']),
+ ('𐑀', &['𐐘']),
+ ('𐑁', &['𐐙']),
+ ('𐑂', &['𐐚']),
+ ('𐑃', &['𐐛']),
+ ('𐑄', &['𐐜']),
+ ('𐑅', &['𐐝']),
+ ('𐑆', &['𐐞']),
+ ('𐑇', &['𐐟']),
+ ('𐑈', &['𐐠']),
+ ('𐑉', &['𐐡']),
+ ('𐑊', &['đĸ']),
+ ('𐑋', &['đŖ']),
+ ('𐑌', &['𐐤']),
+ ('𐑍', &['đĨ']),
+ ('𐑎', &['đĻ']),
+ ('𐑏', &['𐐧']),
+ ('𐒰', &['𐓘']),
+ ('𐒱', &['𐓙']),
+ ('𐒲', &['𐓚']),
+ ('đ’ŗ', &['𐓛']),
+ ('𐒴', &['𐓜']),
+ ('đ’ĩ', &['𐓝']),
+ ('đ’ļ', &['𐓞']),
+ ('𐒷', &['𐓟']),
+ ('𐒸', &['𐓠']),
+ ('𐒹', &['𐓡']),
+ ('đ’ē', &['đ“ĸ']),
+ ('đ’ģ', &['đ“Ŗ']),
+ ('đ’ŧ', &['𐓤']),
+ ('đ’Ŋ', &['đ“Ĩ']),
+ ('𐒾', &['đ“Ļ']),
+ ('đ’ŋ', &['𐓧']),
+ ('𐓀', &['𐓨']),
+ ('𐓁', &['𐓩']),
+ ('𐓂', &['đ“Ē']),
+ ('𐓃', &['đ“Ģ']),
+ ('𐓄', &['đ“Ŧ']),
+ ('𐓅', &['𐓭']),
+ ('𐓆', &['𐓮']),
+ ('𐓇', &['đ“¯']),
+ ('𐓈', &['𐓰']),
+ ('𐓉', &['𐓱']),
+ ('𐓊', &['𐓲']),
+ ('𐓋', &['đ“ŗ']),
+ ('𐓌', &['𐓴']),
+ ('𐓍', &['đ“ĩ']),
+ ('𐓎', &['đ“ļ']),
+ ('𐓏', &['𐓷']),
+ ('𐓐', &['𐓸']),
+ ('𐓑', &['𐓹']),
+ ('𐓒', &['đ“ē']),
+ ('𐓓', &['đ“ģ']),
+ ('𐓘', &['𐒰']),
+ ('𐓙', &['𐒱']),
+ ('𐓚', &['𐒲']),
+ ('𐓛', &['đ’ŗ']),
+ ('𐓜', &['𐒴']),
+ ('𐓝', &['đ’ĩ']),
+ ('𐓞', &['đ’ļ']),
+ ('𐓟', &['𐒷']),
+ ('𐓠', &['𐒸']),
+ ('𐓡', &['𐒹']),
+ ('đ“ĸ', &['đ’ē']),
+ ('đ“Ŗ', &['đ’ģ']),
+ ('𐓤', &['đ’ŧ']),
+ ('đ“Ĩ', &['đ’Ŋ']),
+ ('đ“Ļ', &['𐒾']),
+ ('𐓧', &['đ’ŋ']),
+ ('𐓨', &['𐓀']),
+ ('𐓩', &['𐓁']),
+ ('đ“Ē', &['𐓂']),
+ ('đ“Ģ', &['𐓃']),
+ ('đ“Ŧ', &['𐓄']),
+ ('𐓭', &['𐓅']),
+ ('𐓮', &['𐓆']),
+ ('đ“¯', &['𐓇']),
+ ('𐓰', &['𐓈']),
+ ('𐓱', &['𐓉']),
+ ('𐓲', &['𐓊']),
+ ('đ“ŗ', &['𐓋']),
+ ('𐓴', &['𐓌']),
+ ('đ“ĩ', &['𐓍']),
+ ('đ“ļ', &['𐓎']),
+ ('𐓷', &['𐓏']),
+ ('𐓸', &['𐓐']),
+ ('𐓹', &['𐓑']),
+ ('đ“ē', &['𐓒']),
+ ('đ“ģ', &['𐓓']),
+ ('𐲀', &['đŗ€']),
+ ('𐲁', &['đŗ']),
+ ('𐲂', &['đŗ‚']),
+ ('𐲃', &['đŗƒ']),
+ ('𐲄', &['đŗ„']),
+ ('𐲅', &['đŗ…']),
+ ('𐲆', &['đŗ†']),
+ ('𐲇', &['đŗ‡']),
+ ('𐲈', &['đŗˆ']),
+ ('𐲉', &['đŗ‰']),
+ ('𐲊', &['đŗŠ']),
+ ('𐲋', &['đŗ‹']),
+ ('𐲌', &['đŗŒ']),
+ ('𐲍', &['đŗ']),
+ ('𐲎', &['đŗŽ']),
+ ('𐲏', &['đŗ']),
+ ('𐲐', &['đŗ']),
+ ('𐲑', &['đŗ‘']),
+ ('𐲒', &['đŗ’']),
+ ('𐲓', &['đŗ“']),
+ ('𐲔', &['đŗ”']),
+ ('𐲕', &['đŗ•']),
+ ('𐲖', &['đŗ–']),
+ ('𐲗', &['đŗ—']),
+ ('𐲘', &['đŗ˜']),
+ ('𐲙', &['đŗ™']),
+ ('𐲚', &['đŗš']),
+ ('𐲛', &['đŗ›']),
+ ('𐲜', &['đŗœ']),
+ ('𐲝', &['đŗ']),
+ ('𐲞', &['đŗž']),
+ ('𐲟', &['đŗŸ']),
+ ('𐲠', &['đŗ ']),
+ ('𐲥', &['đŗĄ']),
+ ('đ˛ĸ', &['đŗĸ']),
+ ('đ˛Ŗ', &['đŗŖ']),
+ ('𐲤', &['đŗ¤']),
+ ('đ˛Ĩ', &['đŗĨ']),
+ ('đ˛Ļ', &['đŗĻ']),
+ ('𐲧', &['đŗ§']),
+ ('𐲨', &['đŗ¨']),
+ ('𐲊', &['đŗŠ']),
+ ('đ˛Ē', &['đŗĒ']),
+ ('đ˛Ģ', &['đŗĢ']),
+ ('đ˛Ŧ', &['đŗŦ']),
+ ('𐲭', &['đŗ­']),
+ ('𐲎', &['đŗŽ']),
+ ('đ˛¯', &['đŗ¯']),
+ ('𐲰', &['đŗ°']),
+ ('𐲹', &['đŗą']),
+ ('𐲲', &['đŗ˛']),
+ ('đŗ€', &['𐲀']),
+ ('đŗ', &['𐲁']),
+ ('đŗ‚', &['𐲂']),
+ ('đŗƒ', &['𐲃']),
+ ('đŗ„', &['𐲄']),
+ ('đŗ…', &['𐲅']),
+ ('đŗ†', &['𐲆']),
+ ('đŗ‡', &['𐲇']),
+ ('đŗˆ', &['𐲈']),
+ ('đŗ‰', &['𐲉']),
+ ('đŗŠ', &['𐲊']),
+ ('đŗ‹', &['𐲋']),
+ ('đŗŒ', &['𐲌']),
+ ('đŗ', &['𐲍']),
+ ('đŗŽ', &['𐲎']),
+ ('đŗ', &['𐲏']),
+ ('đŗ', &['𐲐']),
+ ('đŗ‘', &['𐲑']),
+ ('đŗ’', &['𐲒']),
+ ('đŗ“', &['𐲓']),
+ ('đŗ”', &['𐲔']),
+ ('đŗ•', &['𐲕']),
+ ('đŗ–', &['𐲖']),
+ ('đŗ—', &['𐲗']),
+ ('đŗ˜', &['𐲘']),
+ ('đŗ™', &['𐲙']),
+ ('đŗš', &['𐲚']),
+ ('đŗ›', &['𐲛']),
+ ('đŗœ', &['𐲜']),
+ ('đŗ', &['𐲝']),
+ ('đŗž', &['𐲞']),
+ ('đŗŸ', &['𐲟']),
+ ('đŗ ', &['𐲠']),
+ ('đŗĄ', &['𐲥']),
+ ('đŗĸ', &['đ˛ĸ']),
+ ('đŗŖ', &['đ˛Ŗ']),
+ ('đŗ¤', &['𐲤']),
+ ('đŗĨ', &['đ˛Ĩ']),
+ ('đŗĻ', &['đ˛Ļ']),
+ ('đŗ§', &['𐲧']),
+ ('đŗ¨', &['𐲨']),
+ ('đŗŠ', &['𐲊']),
+ ('đŗĒ', &['đ˛Ē']),
+ ('đŗĢ', &['đ˛Ģ']),
+ ('đŗŦ', &['đ˛Ŧ']),
+ ('đŗ­', &['𐲭']),
+ ('đŗŽ', &['𐲎']),
+ ('đŗ¯', &['đ˛¯']),
+ ('đŗ°', &['𐲰']),
+ ('đŗą', &['𐲹']),
+ ('đŗ˛', &['𐲲']),
+ ('đ‘ĸ ', &['đ‘Ŗ€']),
+ ('đ‘ĸĄ', &['đ‘Ŗ']),
+ ('đ‘ĸĸ', &['đ‘Ŗ‚']),
+ ('đ‘ĸŖ', &['đ‘Ŗƒ']),
+ ('đ‘ĸ¤', &['đ‘Ŗ„']),
+ ('đ‘ĸĨ', &['đ‘Ŗ…']),
+ ('đ‘ĸĻ', &['đ‘Ŗ†']),
+ ('đ‘ĸ§', &['đ‘Ŗ‡']),
+ ('đ‘ĸ¨', &['đ‘Ŗˆ']),
+ ('đ‘ĸŠ', &['đ‘Ŗ‰']),
+ ('đ‘ĸĒ', &['đ‘ŖŠ']),
+ ('đ‘ĸĢ', &['đ‘Ŗ‹']),
+ ('đ‘ĸŦ', &['đ‘ŖŒ']),
+ ('đ‘ĸ­', &['đ‘Ŗ']),
+ ('đ‘ĸŽ', &['đ‘ŖŽ']),
+ ('đ‘ĸ¯', &['đ‘Ŗ']),
+ ('đ‘ĸ°', &['đ‘Ŗ']),
+ ('đ‘ĸą', &['đ‘Ŗ‘']),
+ ('đ‘ĸ˛', &['đ‘Ŗ’']),
+ ('đ‘ĸŗ', &['đ‘Ŗ“']),
+ ('đ‘ĸ´', &['đ‘Ŗ”']),
+ ('đ‘ĸĩ', &['đ‘Ŗ•']),
+ ('đ‘ĸļ', &['đ‘Ŗ–']),
+ ('đ‘ĸˇ', &['đ‘Ŗ—']),
+ ('đ‘ĸ¸', &['đ‘Ŗ˜']),
+ ('đ‘ĸš', &['đ‘Ŗ™']),
+ ('đ‘ĸē', &['đ‘Ŗš']),
+ ('đ‘ĸģ', &['đ‘Ŗ›']),
+ ('đ‘ĸŧ', &['đ‘Ŗœ']),
+ ('đ‘ĸŊ', &['đ‘Ŗ']),
+ ('đ‘ĸž', &['đ‘Ŗž']),
+ ('đ‘ĸŋ', &['đ‘ŖŸ']),
+ ('đ‘Ŗ€', &['đ‘ĸ ']),
+ ('đ‘Ŗ', &['đ‘ĸĄ']),
+ ('đ‘Ŗ‚', &['đ‘ĸĸ']),
+ ('đ‘Ŗƒ', &['đ‘ĸŖ']),
+ ('đ‘Ŗ„', &['đ‘ĸ¤']),
+ ('đ‘Ŗ…', &['đ‘ĸĨ']),
+ ('đ‘Ŗ†', &['đ‘ĸĻ']),
+ ('đ‘Ŗ‡', &['đ‘ĸ§']),
+ ('đ‘Ŗˆ', &['đ‘ĸ¨']),
+ ('đ‘Ŗ‰', &['đ‘ĸŠ']),
+ ('đ‘ŖŠ', &['đ‘ĸĒ']),
+ ('đ‘Ŗ‹', &['đ‘ĸĢ']),
+ ('đ‘ŖŒ', &['đ‘ĸŦ']),
+ ('đ‘Ŗ', &['đ‘ĸ­']),
+ ('đ‘ŖŽ', &['đ‘ĸŽ']),
+ ('đ‘Ŗ', &['đ‘ĸ¯']),
+ ('đ‘Ŗ', &['đ‘ĸ°']),
+ ('đ‘Ŗ‘', &['đ‘ĸą']),
+ ('đ‘Ŗ’', &['đ‘ĸ˛']),
+ ('đ‘Ŗ“', &['đ‘ĸŗ']),
+ ('đ‘Ŗ”', &['đ‘ĸ´']),
+ ('đ‘Ŗ•', &['đ‘ĸĩ']),
+ ('đ‘Ŗ–', &['đ‘ĸļ']),
+ ('đ‘Ŗ—', &['đ‘ĸˇ']),
+ ('đ‘Ŗ˜', &['đ‘ĸ¸']),
+ ('đ‘Ŗ™', &['đ‘ĸš']),
+ ('đ‘Ŗš', &['đ‘ĸē']),
+ ('đ‘Ŗ›', &['đ‘ĸģ']),
+ ('đ‘Ŗœ', &['đ‘ĸŧ']),
+ ('đ‘Ŗ', &['đ‘ĸŊ']),
+ ('đ‘Ŗž', &['đ‘ĸž']),
+ ('đ‘ŖŸ', &['đ‘ĸŋ']),
+ ('𖹀', &['𖹠']),
+ ('𖹁', &['𖹡']),
+ ('𖹂', &['đ–šĸ']),
+ ('𖹃', &['đ–šŖ']),
+ ('𖹄', &['𖹤']),
+ ('𖹅', &['đ–šĨ']),
+ ('𖹆', &['đ–šĻ']),
+ ('𖹇', &['𖹧']),
+ ('𖹈', &['𖹨']),
+ ('𖹉', &['𖹩']),
+ ('𖹊', &['đ–šĒ']),
+ ('𖹋', &['đ–šĢ']),
+ ('𖹌', &['đ–šŦ']),
+ ('𖹍', &['𖹭']),
+ ('𖹎', &['𖹮']),
+ ('𖹏', &['đ–š¯']),
+ ('𖹐', &['𖹰']),
+ ('𖹑', &['𖹱']),
+ ('𖹒', &['𖹲']),
+ ('𖹓', &['đ–šŗ']),
+ ('𖹔', &['𖹴']),
+ ('𖹕', &['đ–šĩ']),
+ ('𖹖', &['đ–šļ']),
+ ('𖹗', &['𖹷']),
+ ('𖹘', &['𖹸']),
+ ('𖹙', &['𖹹']),
+ ('𖹚', &['đ–šē']),
+ ('𖹛', &['đ–šģ']),
+ ('𖹜', &['đ–šŧ']),
+ ('𖹝', &['đ–šŊ']),
+ ('𖹞', &['𖹾']),
+ ('𖹟', &['đ–šŋ']),
+ ('𖹠', &['𖹀']),
+ ('𖹡', &['𖹁']),
+ ('đ–šĸ', &['𖹂']),
+ ('đ–šŖ', &['𖹃']),
+ ('𖹤', &['𖹄']),
+ ('đ–šĨ', &['𖹅']),
+ ('đ–šĻ', &['𖹆']),
+ ('𖹧', &['𖹇']),
+ ('𖹨', &['𖹈']),
+ ('𖹩', &['𖹉']),
+ ('đ–šĒ', &['𖹊']),
+ ('đ–šĢ', &['𖹋']),
+ ('đ–šŦ', &['𖹌']),
+ ('𖹭', &['𖹍']),
+ ('𖹮', &['𖹎']),
+ ('đ–š¯', &['𖹏']),
+ ('𖹰', &['𖹐']),
+ ('𖹱', &['𖹑']),
+ ('𖹲', &['𖹒']),
+ ('đ–šŗ', &['𖹓']),
+ ('𖹴', &['𖹔']),
+ ('đ–šĩ', &['𖹕']),
+ ('đ–šļ', &['𖹖']),
+ ('𖹷', &['𖹗']),
+ ('𖹸', &['𖹘']),
+ ('𖹹', &['𖹙']),
+ ('đ–šē', &['𖹚']),
+ ('đ–šģ', &['𖹛']),
+ ('đ–šŧ', &['𖹜']),
+ ('đ–šŊ', &['𖹝']),
+ ('𖹾', &['𖹞']),
+ ('đ–šŋ', &['𖹟']),
+ ('𞤀', &['đž¤ĸ']),
+ ('𞤁', &['đž¤Ŗ']),
+ ('𞤂', &['𞤤']),
+ ('𞤃', &['đž¤Ĩ']),
+ ('𞤄', &['đž¤Ļ']),
+ ('𞤅', &['𞤧']),
+ ('𞤆', &['𞤨']),
+ ('𞤇', &['𞤩']),
+ ('𞤈', &['đž¤Ē']),
+ ('𞤉', &['đž¤Ģ']),
+ ('𞤊', &['đž¤Ŧ']),
+ ('𞤋', &['𞤭']),
+ ('𞤌', &['𞤮']),
+ ('𞤍', &['đž¤¯']),
+ ('𞤎', &['𞤰']),
+ ('𞤏', &['𞤱']),
+ ('𞤐', &['𞤲']),
+ ('𞤑', &['đž¤ŗ']),
+ ('𞤒', &['𞤴']),
+ ('𞤓', &['đž¤ĩ']),
+ ('𞤔', &['đž¤ļ']),
+ ('𞤕', &['𞤷']),
+ ('𞤖', &['𞤸']),
+ ('𞤗', &['𞤹']),
+ ('𞤘', &['đž¤ē']),
+ ('𞤙', &['đž¤ģ']),
+ ('𞤚', &['đž¤ŧ']),
+ ('𞤛', &['đž¤Ŋ']),
+ ('𞤜', &['𞤾']),
+ ('𞤝', &['đž¤ŋ']),
+ ('𞤞', &['đžĨ€']),
+ ('𞤟', &['đžĨ']),
+ ('𞤠', &['đžĨ‚']),
+ ('𞤡', &['đžĨƒ']),
+ ('đž¤ĸ', &['𞤀']),
+ ('đž¤Ŗ', &['𞤁']),
+ ('𞤤', &['𞤂']),
+ ('đž¤Ĩ', &['𞤃']),
+ ('đž¤Ļ', &['𞤄']),
+ ('𞤧', &['𞤅']),
+ ('𞤨', &['𞤆']),
+ ('𞤩', &['𞤇']),
+ ('đž¤Ē', &['𞤈']),
+ ('đž¤Ģ', &['𞤉']),
+ ('đž¤Ŧ', &['𞤊']),
+ ('𞤭', &['𞤋']),
+ ('𞤮', &['𞤌']),
+ ('đž¤¯', &['𞤍']),
+ ('𞤰', &['𞤎']),
+ ('𞤱', &['𞤏']),
+ ('𞤲', &['𞤐']),
+ ('đž¤ŗ', &['𞤑']),
+ ('𞤴', &['𞤒']),
+ ('đž¤ĩ', &['𞤓']),
+ ('đž¤ļ', &['𞤔']),
+ ('𞤷', &['𞤕']),
+ ('𞤸', &['𞤖']),
+ ('𞤹', &['𞤗']),
+ ('đž¤ē', &['𞤘']),
+ ('đž¤ģ', &['𞤙']),
+ ('đž¤ŧ', &['𞤚']),
+ ('đž¤Ŋ', &['𞤛']),
+ ('𞤾', &['𞤜']),
+ ('đž¤ŋ', &['𞤝']),
+ ('đžĨ€', &['𞤞']),
+ ('đžĨ', &['𞤟']),
+ ('đžĨ‚', &['𞤠']),
+ ('đžĨƒ', &['𞤡']),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/general_category.rs b/vendor/regex-syntax/src/unicode_tables/general_category.rs
new file mode 100644
index 000000000..33b7b7e6e
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/general_category.rs
@@ -0,0 +1,6307 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate general-category ucd-13.0.0 --chars --exclude surrogate
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+ ("Cased_Letter", CASED_LETTER),
+ ("Close_Punctuation", CLOSE_PUNCTUATION),
+ ("Connector_Punctuation", CONNECTOR_PUNCTUATION),
+ ("Control", CONTROL),
+ ("Currency_Symbol", CURRENCY_SYMBOL),
+ ("Dash_Punctuation", DASH_PUNCTUATION),
+ ("Decimal_Number", DECIMAL_NUMBER),
+ ("Enclosing_Mark", ENCLOSING_MARK),
+ ("Final_Punctuation", FINAL_PUNCTUATION),
+ ("Format", FORMAT),
+ ("Initial_Punctuation", INITIAL_PUNCTUATION),
+ ("Letter", LETTER),
+ ("Letter_Number", LETTER_NUMBER),
+ ("Line_Separator", LINE_SEPARATOR),
+ ("Lowercase_Letter", LOWERCASE_LETTER),
+ ("Mark", MARK),
+ ("Math_Symbol", MATH_SYMBOL),
+ ("Modifier_Letter", MODIFIER_LETTER),
+ ("Modifier_Symbol", MODIFIER_SYMBOL),
+ ("Nonspacing_Mark", NONSPACING_MARK),
+ ("Number", NUMBER),
+ ("Open_Punctuation", OPEN_PUNCTUATION),
+ ("Other", OTHER),
+ ("Other_Letter", OTHER_LETTER),
+ ("Other_Number", OTHER_NUMBER),
+ ("Other_Punctuation", OTHER_PUNCTUATION),
+ ("Other_Symbol", OTHER_SYMBOL),
+ ("Paragraph_Separator", PARAGRAPH_SEPARATOR),
+ ("Private_Use", PRIVATE_USE),
+ ("Punctuation", PUNCTUATION),
+ ("Separator", SEPARATOR),
+ ("Space_Separator", SPACE_SEPARATOR),
+ ("Spacing_Mark", SPACING_MARK),
+ ("Symbol", SYMBOL),
+ ("Titlecase_Letter", TITLECASE_LETTER),
+ ("Unassigned", UNASSIGNED),
+ ("Uppercase_Letter", UPPERCASE_LETTER),
+];
+
+pub const CASED_LETTER: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('Âĩ', 'Âĩ'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'Æē'),
+ ('Æŧ', 'Æŋ'),
+ ('Į„', 'ʓ'),
+ ('ʕ', 'Ę¯'),
+ ('Í°', 'Íŗ'),
+ ('Íļ', '͡'),
+ ('Íģ', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՠ', 'ֈ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŊ', 'áƒŋ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('ᴀ', 'á´Ģ'),
+ ('áĩĢ', 'áĩˇ'),
+ ('áĩš', 'áļš'),
+ ('Ḁ', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℭ'),
+ ('ℯ', 'ℴ'),
+ ('ℹ', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ↄ', 'ↄ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âąģ'),
+ ('âąž', 'âŗ¤'),
+ ('âŗĢ', 'âŗŽ'),
+ ('âŗ˛', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('Ꙁ', 'ꙭ'),
+ ('Ꚁ', 'ꚛ'),
+ ('ęœĸ', 'ę¯'),
+ ('ꝱ', 'ꞇ'),
+ ('Ꞌ', 'ꞎ'),
+ ('Ꞑ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', '\u{a7f6}'),
+ ('ęŸē', 'ęŸē'),
+ ('ęŦ°', 'ꭚ'),
+ ('ę­ ', '\u{ab68}'),
+ ('ę­°', 'ęŽŋ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+ ('𐐀', '𐑏'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('đ‘ĸ ', 'đ‘ŖŸ'),
+ ('𖹀', 'đ–šŋ'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𞤀', 'đžĨƒ'),
+];
+
+pub const CLOSE_PUNCTUATION: &'static [(char, char)] = &[
+ (')', ')'),
+ (']', ']'),
+ ('}', '}'),
+ ('āŧģ', 'āŧģ'),
+ ('āŧŊ', 'āŧŊ'),
+ ('᚜', '᚜'),
+ ('⁆', '⁆'),
+ ('⁞', '⁞'),
+ ('₎', '₎'),
+ ('⌉', '⌉'),
+ ('⌋', '⌋'),
+ ('âŒĒ', 'âŒĒ'),
+ ('❊', '❊'),
+ ('âĢ', 'âĢ'),
+ ('❭', '❭'),
+ ('❯', '❯'),
+ ('❱', '❱'),
+ ('âŗ', 'âŗ'),
+ ('âĩ', 'âĩ'),
+ ('⟆', '⟆'),
+ ('⟧', '⟧'),
+ ('⟩', '⟩'),
+ ('âŸĢ', 'âŸĢ'),
+ ('⟭', '⟭'),
+ ('⟯', '⟯'),
+ ('âĻ„', 'âĻ„'),
+ ('âĻ†', 'âĻ†'),
+ ('âĻˆ', 'âĻˆ'),
+ ('âĻŠ', 'âĻŠ'),
+ ('âĻŒ', 'âĻŒ'),
+ ('âĻŽ', 'âĻŽ'),
+ ('âĻ', 'âĻ'),
+ ('âĻ’', 'âĻ’'),
+ ('âĻ”', 'âĻ”'),
+ ('âĻ–', 'âĻ–'),
+ ('âĻ˜', 'âĻ˜'),
+ ('⧙', '⧙'),
+ ('⧛', '⧛'),
+ ('â§Ŋ', 'â§Ŋ'),
+ ('â¸Ŗ', 'â¸Ŗ'),
+ ('â¸Ĩ', 'â¸Ĩ'),
+ ('⸧', '⸧'),
+ ('⸊', '⸊'),
+ ('〉', '〉'),
+ ('》', '》'),
+ ('」', '」'),
+ ('』', '』'),
+ ('】', '】'),
+ ('〕', '〕'),
+ ('〗', '〗'),
+ ('〙', '〙'),
+ ('〛', '〛'),
+ ('〞', '〟'),
+ ('ī´ž', 'ī´ž'),
+ ('ī¸˜', 'ī¸˜'),
+ ('ī¸ļ', 'ī¸ļ'),
+ ('ī¸¸', 'ī¸¸'),
+ ('ī¸ē', 'ī¸ē'),
+ ('ī¸ŧ', 'ī¸ŧ'),
+ ('ī¸ž', 'ī¸ž'),
+ ('īš€', 'īš€'),
+ ('īš‚', 'īš‚'),
+ ('īš„', 'īš„'),
+ ('īšˆ', 'īšˆ'),
+ ('īšš', 'īšš'),
+ ('īšœ', 'īšœ'),
+ ('īšž', 'īšž'),
+ ('īŧ‰', 'īŧ‰'),
+ ('īŧŊ', 'īŧŊ'),
+ ('īŊ', 'īŊ'),
+ ('īŊ ', 'īŊ '),
+ ('īŊŖ', 'īŊŖ'),
+];
+
+pub const CONNECTOR_PUNCTUATION: &'static [(char, char)] = &[
+ ('_', '_'),
+ ('â€ŋ', '⁀'),
+ ('⁔', '⁔'),
+ ('ī¸ŗ', 'ī¸´'),
+ ('īš', 'īš'),
+ ('īŧŋ', 'īŧŋ'),
+];
+
+pub const CONTROL: &'static [(char, char)] =
+ &[('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}')];
+
+pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[
+ ('$', '$'),
+ ('Âĸ', 'ÂĨ'),
+ ('֏', '֏'),
+ ('؋', '؋'),
+ ('ßž', 'ßŋ'),
+ ('ā§˛', 'ā§ŗ'),
+ ('ā§ģ', 'ā§ģ'),
+ ('āĢą', 'āĢą'),
+ ('ā¯š', 'ā¯š'),
+ ('ā¸ŋ', 'ā¸ŋ'),
+ ('៛', '៛'),
+ ('₠', 'â‚ŋ'),
+ ('ę ¸', 'ę ¸'),
+ ('īˇŧ', 'īˇŧ'),
+ ('īšŠ', 'īšŠ'),
+ ('īŧ„', 'īŧ„'),
+ ('īŋ ', 'īŋĄ'),
+ ('īŋĨ', 'īŋĻ'),
+ ('đ‘ŋ', 'đ‘ŋ '),
+ ('đž‹ŋ', 'đž‹ŋ'),
+ ('𞲰', '𞲰'),
+];
+
+pub const DASH_PUNCTUATION: &'static [(char, char)] = &[
+ ('-', '-'),
+ ('֊', '֊'),
+ ('Öž', 'Öž'),
+ ('᐀', '᐀'),
+ ('᠆', '᠆'),
+ ('‐', '―'),
+ ('⸗', '⸗'),
+ ('⸚', '⸚'),
+ ('â¸ē', 'â¸ģ'),
+ ('⹀', '⹀'),
+ ('〜', '〜'),
+ ('〰', '〰'),
+ ('゠', '゠'),
+ ('ī¸ą', 'ī¸˛'),
+ ('īš˜', 'īš˜'),
+ ('īšŖ', 'īšŖ'),
+ ('īŧ', 'īŧ'),
+ ('\u{10ead}', '\u{10ead}'),
+];
+
+pub const DECIMAL_NUMBER: &'static [(char, char)] = &[
+ ('0', '9'),
+ ('Ų ', 'ŲŠ'),
+ ('Û°', 'Ûš'),
+ ('߀', '߉'),
+ ('āĨĻ', 'āĨ¯'),
+ ('ā§Ļ', 'ā§¯'),
+ ('āŠĻ', 'āŠ¯'),
+ ('āĢĻ', 'āĢ¯'),
+ ('ā­Ļ', 'ā­¯'),
+ ('ā¯Ļ', 'ā¯¯'),
+ ('āąĻ', 'āą¯'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āĩĻ', 'āĩ¯'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āš', 'āš™'),
+ ('āģ', 'āģ™'),
+ ('āŧ ', 'āŧŠ'),
+ ('၀', '၉'),
+ ('႐', '႙'),
+ ('០', '៩'),
+ ('᠐', '᠙'),
+ ('áĨ†', 'áĨ'),
+ ('᧐', '᧙'),
+ ('áĒ€', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('᭐', '᭙'),
+ ('Ꮀ', '᎚'),
+ ('᱀', '᱉'),
+ ('᱐', '᱙'),
+ ('꘠', 'ꘊ'),
+ ('ęŖ', 'ęŖ™'),
+ ('꤀', '꤉'),
+ ('꧐', '꧙'),
+ ('꧰', '꧚'),
+ ('꩐', '꩙'),
+ ('ę¯°', 'ę¯š'),
+ ('īŧ', 'īŧ™'),
+ ('𐒠', '𐒩'),
+ ('𐴰', '𐴚'),
+ ('đ‘Ļ', 'đ‘¯'),
+ ('𑃰', '𑃹'),
+ ('đ‘„ļ', 'đ‘„ŋ'),
+ ('𑇐', '𑇙'),
+ ('𑋰', '𑋹'),
+ ('𑑐', '𑑙'),
+ ('𑓐', '𑓙'),
+ ('𑙐', '𑙙'),
+ ('𑛀', '𑛉'),
+ ('𑜰', '𑜹'),
+ ('đ‘Ŗ ', 'đ‘ŖŠ'),
+ ('\u{11950}', '\u{11959}'),
+ ('𑱐', '𑱙'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('𖩠', '𖩩'),
+ ('𖭐', '𖭙'),
+ ('𝟎', 'đŸŋ'),
+ ('𞅀', '𞅉'),
+ ('𞋰', '𞋹'),
+ ('đžĨ', 'đžĨ™'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+];
+
+pub const ENCLOSING_MARK: &'static [(char, char)] = &[
+ ('\u{488}', '\u{489}'),
+ ('\u{1abe}', '\u{1abe}'),
+ ('\u{20dd}', '\u{20e0}'),
+ ('\u{20e2}', '\u{20e4}'),
+ ('\u{a670}', '\u{a672}'),
+];
+
+pub const FINAL_PUNCTUATION: &'static [(char, char)] = &[
+ ('Âģ', 'Âģ'),
+ ('’', '’'),
+ ('”', '”'),
+ ('â€ē', 'â€ē'),
+ ('⸃', '⸃'),
+ ('⸅', '⸅'),
+ ('⸊', '⸊'),
+ ('⸍', '⸍'),
+ ('⸝', '⸝'),
+ ('⸥', '⸥'),
+];
+
+pub const FORMAT: &'static [(char, char)] = &[
+ ('\u{ad}', '\u{ad}'),
+ ('\u{600}', '\u{605}'),
+ ('\u{61c}', '\u{61c}'),
+ ('\u{6dd}', '\u{6dd}'),
+ ('\u{70f}', '\u{70f}'),
+ ('\u{8e2}', '\u{8e2}'),
+ ('\u{180e}', '\u{180e}'),
+ ('\u{200b}', '\u{200f}'),
+ ('\u{202a}', '\u{202e}'),
+ ('\u{2060}', '\u{2064}'),
+ ('\u{2066}', '\u{206f}'),
+ ('\u{feff}', '\u{feff}'),
+ ('\u{fff9}', '\u{fffb}'),
+ ('\u{110bd}', '\u{110bd}'),
+ ('\u{110cd}', '\u{110cd}'),
+ ('\u{13430}', '\u{13438}'),
+ ('\u{1bca0}', '\u{1bca3}'),
+ ('\u{1d173}', '\u{1d17a}'),
+ ('\u{e0001}', '\u{e0001}'),
+ ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const INITIAL_PUNCTUATION: &'static [(char, char)] = &[
+ ('ÂĢ', 'ÂĢ'),
+ ('‘', '‘'),
+ ('‛', '“'),
+ ('‟', '‟'),
+ ('‹', '‹'),
+ ('⸂', '⸂'),
+ ('⸄', '⸄'),
+ ('⸉', '⸉'),
+ ('⸌', '⸌'),
+ ('⸜', '⸜'),
+ ('⸠', '⸠'),
+];
+
+pub const LETTER: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ˁ'),
+ ('ˆ', 'ˑ'),
+ ('ˠ', 'ˤ'),
+ ('ËŦ', 'ËŦ'),
+ ('ËŽ', 'ËŽ'),
+ ('Í°', 'Í´'),
+ ('Íļ', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', 'ՙ'),
+ ('ՠ', 'ֈ'),
+ ('א', '×Ē'),
+ ('ׯ', 'ײ'),
+ ('Ø ', 'ŲŠ'),
+ ('ŲŽ', 'Ų¯'),
+ ('Ųą', 'ۓ'),
+ ('ە', 'ە'),
+ ('ÛĨ', 'ÛĻ'),
+ ('ÛŽ', 'Û¯'),
+ ('Ûē', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', 'ܐ'),
+ ('ܒ', 'ܯ'),
+ ('Ũ', 'ŪĨ'),
+ ('Ūą', 'Ūą'),
+ ('ߊ', 'ßĒ'),
+ ('ß´', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('ā €', 'ā •'),
+ ('ā š', 'ā š'),
+ ('ā ¤', 'ā ¤'),
+ ('ā ¨', 'ā ¨'),
+ ('āĄ€', 'āĄ˜'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('ā¤„', 'ā¤š'),
+ ('ā¤Ŋ', 'ā¤Ŋ'),
+ ('āĨ', 'āĨ'),
+ ('āĨ˜', 'āĨĄ'),
+ ('āĨą', 'āĻ€'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('āĻŊ', 'āĻŊ'),
+ ('ā§Ž', 'ā§Ž'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', 'ā§Ą'),
+ ('ā§°', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠ˛', 'āŠ´'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('āĒŊ', 'āĒŊ'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', 'āĢĄ'),
+ ('āĢš', 'āĢš'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('āŦŊ', 'āŦŊ'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', 'ā­Ą'),
+ ('ā­ą', 'ā­ą'),
+ ('āŽƒ', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('ā¯', 'ā¯'),
+ ('ā°…', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'ā°Ŋ'),
+ ('āą˜', 'āąš'),
+ ('āą ', 'āąĄ'),
+ ('ā˛€', 'ā˛€'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('ā˛Ŋ', 'ā˛Ŋ'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', 'āŗĄ'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d04}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', 'ā´ē'),
+ ('ā´Ŋ', 'ā´Ŋ'),
+ ('āĩŽ', 'āĩŽ'),
+ ('āĩ”', 'āĩ–'),
+ ('āĩŸ', 'āĩĄ'),
+ ('āĩē', 'āĩŋ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('ā¸', 'ā¸°'),
+ ('ā¸˛', 'ā¸ŗ'),
+ ('āš€', 'āš†'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āē°'),
+ ('āē˛', 'āēŗ'),
+ ('āēŊ', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ€'),
+ ('āŊ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('āžˆ', 'āžŒ'),
+ ('က', 'á€Ē'),
+ ('á€ŋ', 'á€ŋ'),
+ ('ၐ', 'ၕ'),
+ ('ၚ', 'ၝ'),
+ ('ၥ', 'ၥ'),
+ ('áĨ', 'áĻ'),
+ ('၎', 'ၰ'),
+ ('áĩ', 'ႁ'),
+ ('ႎ', 'ႎ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('ᎀ', 'ᎏ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛱ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', 'ᜑ'),
+ ('ᜠ', 'ᜱ'),
+ ('ᝀ', 'ᝑ'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('ក', 'ážŗ'),
+ ('ៗ', 'ៗ'),
+ ('ៜ', 'ៜ'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸ„'),
+ ('áĸ‡', 'áĸ¨'),
+ ('áĸĒ', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('áĨ', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('ᨀ', 'ᨖ'),
+ ('ᨠ', 'ᩔ'),
+ ('áĒ§', 'áĒ§'),
+ ('áŦ…', 'áŦŗ'),
+ ('ᭅ', 'ᭋ'),
+ ('ᎃ', 'Ꭰ'),
+ ('ᎎ', 'Ꭿ'),
+ ('áŽē', 'á¯Ĩ'),
+ ('ᰀ', 'á°Ŗ'),
+ ('ᱍ', 'ᱏ'),
+ ('ᱚ', 'áąŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('áŗŠ', 'áŗŦ'),
+ ('áŗŽ', 'áŗŗ'),
+ ('áŗĩ', 'áŗļ'),
+ ('áŗē', 'áŗē'),
+ ('ᴀ', 'áļŋ'),
+ ('Ḁ', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℭ'),
+ ('ℯ', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ↄ', 'ↄ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗ¤'),
+ ('âŗĢ', 'âŗŽ'),
+ ('âŗ˛', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ¯'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('ⸯ', 'ⸯ'),
+ ('々', '〆'),
+ ('ã€ą', 'ã€ĩ'),
+ ('ã€ģ', 'ã€ŧ'),
+ ('ぁ', 'ゖ'),
+ ('ゝ', 'ゟ'),
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŧ', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ꀀ', 'ꒌ'),
+ ('ꓐ', 'ę“Ŋ'),
+ ('ꔀ', 'ꘌ'),
+ ('ꘐ', 'ꘟ'),
+ ('ę˜Ē', 'ę˜Ģ'),
+ ('Ꙁ', 'ꙮ'),
+ ('ę™ŋ', 'ꚝ'),
+ ('ꚠ', 'ę›Ĩ'),
+ ('ꜗ', 'ꜟ'),
+ ('ęœĸ', 'ꞈ'),
+ ('Ꞌ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ꠁ'),
+ ('ꠃ', 'ꠅ'),
+ ('ꠇ', 'ꠊ'),
+ ('ꠌ', 'ę ĸ'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ‚', 'ęĸŗ'),
+ ('ęŖ˛', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', 'ęŖž'),
+ ('ꤊ', 'ę¤Ĩ'),
+ ('ꤰ', 'ęĨ†'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('ęĻ„', 'ęĻ˛'),
+ ('ꧏ', 'ꧏ'),
+ ('ꧠ', 'ꧤ'),
+ ('ę§Ļ', 'ę§¯'),
+ ('ę§ē', '꧞'),
+ ('ꨀ', 'ꨨ'),
+ ('ꩀ', 'ꩂ'),
+ ('ꩄ', 'ꩋ'),
+ ('ꊠ', 'ęŠļ'),
+ ('ęŠē', 'ęŠē'),
+ ('ꊞ', 'ęĒ¯'),
+ ('ęĒą', 'ęĒą'),
+ ('ęĒĩ', 'ęĒļ'),
+ ('ęĒš', 'ęĒŊ'),
+ ('ęĢ€', 'ęĢ€'),
+ ('ęĢ‚', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢ'),
+ ('ęĢ ', 'ęĢĒ'),
+ ('ęĢ˛', 'ęĢ´'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab69}'),
+ ('ę­°', 'ę¯ĸ'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŦ', 'īŦ'),
+ ('īŦŸ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇģ'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+ ('īŊĻ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍀'),
+ ('𐍂', '𐍉'),
+ ('𐍐', 'đĩ'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐐀', '𐒝'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '𐨀'),
+ ('𐨐', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', 'đĢ¤'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('𐴀', 'đ´Ŗ'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', 'đŊ…'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀃', '𑀷'),
+ ('𑂃', 'đ‘‚¯'),
+ ('𑃐', '𑃨'),
+ ('𑄃', 'đ‘„Ļ'),
+ ('𑅄', '𑅄'),
+ ('\u{11147}', '\u{11147}'),
+ ('𑅐', '𑅲'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('𑆃', '𑆲'),
+ ('𑇁', '𑇄'),
+ ('𑇚', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', 'đ‘ˆĢ'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '𑋞'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('đ‘ŒŊ', 'đ‘ŒŊ'),
+ ('𑍐', '𑍐'),
+ ('𑍝', '𑍡'),
+ ('𑐀', '𑐴'),
+ ('𑑇', '𑑊'),
+ ('𑑟', '\u{11461}'),
+ ('𑒀', 'đ‘’¯'),
+ ('𑓄', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑖀', '𑖮'),
+ ('𑗘', '𑗛'),
+ ('𑘀', 'đ‘˜¯'),
+ ('𑙄', '𑙄'),
+ ('𑚀', 'đ‘šĒ'),
+ ('𑚸', '𑚸'),
+ ('𑜀', '𑜚'),
+ ('𑠀', 'đ‘ Ģ'),
+ ('đ‘ĸ ', 'đ‘ŖŸ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{1192f}'),
+ ('\u{1193f}', '\u{1193f}'),
+ ('\u{11941}', '\u{11941}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '𑧐'),
+ ('𑧡', '𑧡'),
+ ('đ‘§Ŗ', 'đ‘§Ŗ'),
+ ('𑨀', '𑨀'),
+ ('𑨋', '𑨲'),
+ ('đ‘¨ē', 'đ‘¨ē'),
+ ('𑩐', '𑩐'),
+ ('𑩜', 'đ‘Ē‰'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '𑰮'),
+ ('𑱀', '𑱀'),
+ ('𑱲', '𑲏'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '𑴰'),
+ ('đ‘ĩ†', 'đ‘ĩ†'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļ‰'),
+ ('đ‘ļ˜', 'đ‘ļ˜'),
+ ('đ‘ģ ', 'đ‘ģ˛'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('đ–Ŧ€', 'đ–Ŧ¯'),
+ ('𖭀', '𖭃'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𖹀', 'đ–šŋ'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('đ–Ŋ', 'đ–Ŋ'),
+ ('𖾓', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', 'đ–ŋŖ'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𞄀', 'đž„Ŧ'),
+ ('𞄷', 'đž„Ŋ'),
+ ('𞅎', '𞅎'),
+ ('𞋀', 'đž‹Ģ'),
+ ('𞠀', 'đžŖ„'),
+ ('𞤀', 'đžĨƒ'),
+ ('đžĨ‹', 'đžĨ‹'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const LETTER_NUMBER: &'static [(char, char)] = &[
+ ('ᛮ', 'ᛰ'),
+ ('Ⅰ', 'ↂ'),
+ ('ↅ', 'ↈ'),
+ ('〇', '〇'),
+ ('ã€Ą', '《'),
+ ('〸', 'ã€ē'),
+ ('ę›Ļ', 'ę›¯'),
+ ('𐅀', '𐅴'),
+ ('𐍁', '𐍁'),
+ ('𐍊', '𐍊'),
+ ('𐏑', '𐏕'),
+ ('𒐀', '𒑮'),
+];
+
+pub const LINE_SEPARATOR: &'static [(char, char)] =
+ &[('\u{2028}', '\u{2028}')];
+
+pub const LOWERCASE_LETTER: &'static [(char, char)] = &[
+ ('a', 'z'),
+ ('Âĩ', 'Âĩ'),
+ ('ß', 'Ãļ'),
+ ('ø', 'Ãŋ'),
+ ('ā', 'ā'),
+ ('ă', 'ă'),
+ ('ą', 'ą'),
+ ('ć', 'ć'),
+ ('ĉ', 'ĉ'),
+ ('ċ', 'ċ'),
+ ('č', 'č'),
+ ('ď', 'ď'),
+ ('đ', 'đ'),
+ ('ē', 'ē'),
+ ('ĕ', 'ĕ'),
+ ('ė', 'ė'),
+ ('ę', 'ę'),
+ ('ě', 'ě'),
+ ('ĝ', 'ĝ'),
+ ('ğ', 'ğ'),
+ ('ÄĄ', 'ÄĄ'),
+ ('ÄŖ', 'ÄŖ'),
+ ('ÄĨ', 'ÄĨ'),
+ ('ħ', 'ħ'),
+ ('ÄŠ', 'ÄŠ'),
+ ('ÄĢ', 'ÄĢ'),
+ ('Ä­', 'Ä­'),
+ ('į', 'į'),
+ ('Äą', 'Äą'),
+ ('Äŗ', 'Äŗ'),
+ ('Äĩ', 'Äĩ'),
+ ('ġ', 'ĸ'),
+ ('Äē', 'Äē'),
+ ('Äŧ', 'Äŧ'),
+ ('Äž', 'Äž'),
+ ('ŀ', 'ŀ'),
+ ('ł', 'ł'),
+ ('ń', 'ń'),
+ ('ņ', 'ņ'),
+ ('ň', 'ʼn'),
+ ('ŋ', 'ŋ'),
+ ('ō', 'ō'),
+ ('ŏ', 'ŏ'),
+ ('ő', 'ő'),
+ ('œ', 'œ'),
+ ('ŕ', 'ŕ'),
+ ('ŗ', 'ŗ'),
+ ('ř', 'ř'),
+ ('ś', 'ś'),
+ ('ŝ', 'ŝ'),
+ ('ş', 'ş'),
+ ('ÅĄ', 'ÅĄ'),
+ ('ÅŖ', 'ÅŖ'),
+ ('ÅĨ', 'ÅĨ'),
+ ('ŧ', 'ŧ'),
+ ('ÅŠ', 'ÅŠ'),
+ ('ÅĢ', 'ÅĢ'),
+ ('Å­', 'Å­'),
+ ('ů', 'ů'),
+ ('Åą', 'Åą'),
+ ('Åŗ', 'Åŗ'),
+ ('Åĩ', 'Åĩ'),
+ ('Åˇ', 'Åˇ'),
+ ('Åē', 'Åē'),
+ ('Åŧ', 'Åŧ'),
+ ('Åž', 'ƀ'),
+ ('ƃ', 'ƃ'),
+ ('ƅ', 'ƅ'),
+ ('ƈ', 'ƈ'),
+ ('ƌ', 'ƍ'),
+ ('ƒ', 'ƒ'),
+ ('ƕ', 'ƕ'),
+ ('ƙ', 'ƛ'),
+ ('ƞ', 'ƞ'),
+ ('ÆĄ', 'ÆĄ'),
+ ('ÆŖ', 'ÆŖ'),
+ ('ÆĨ', 'ÆĨ'),
+ ('ƨ', 'ƨ'),
+ ('ÆĒ', 'ÆĢ'),
+ ('Æ­', 'Æ­'),
+ ('Æ°', 'Æ°'),
+ ('Æ´', 'Æ´'),
+ ('Æļ', 'Æļ'),
+ ('Æš', 'Æē'),
+ ('ÆŊ', 'Æŋ'),
+ ('Į†', 'Į†'),
+ ('Į‰', 'Į‰'),
+ ('ĮŒ', 'ĮŒ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Į', 'Į'),
+ ('Į’', 'Į’'),
+ ('Į”', 'Į”'),
+ ('Į–', 'Į–'),
+ ('Į˜', 'Į˜'),
+ ('Įš', 'Įš'),
+ ('Įœ', 'Į'),
+ ('ĮŸ', 'ĮŸ'),
+ ('ĮĄ', 'ĮĄ'),
+ ('ĮŖ', 'ĮŖ'),
+ ('ĮĨ', 'ĮĨ'),
+ ('Į§', 'Į§'),
+ ('ĮŠ', 'ĮŠ'),
+ ('ĮĢ', 'ĮĢ'),
+ ('Į­', 'Į­'),
+ ('Į¯', 'Į°'),
+ ('Įŗ', 'Įŗ'),
+ ('Įĩ', 'Įĩ'),
+ ('Įš', 'Įš'),
+ ('Įģ', 'Įģ'),
+ ('ĮŊ', 'ĮŊ'),
+ ('Įŋ', 'Įŋ'),
+ ('ȁ', 'ȁ'),
+ ('ȃ', 'ȃ'),
+ ('ȅ', 'ȅ'),
+ ('ȇ', 'ȇ'),
+ ('ȉ', 'ȉ'),
+ ('ȋ', 'ȋ'),
+ ('ȍ', 'ȍ'),
+ ('ȏ', 'ȏ'),
+ ('ȑ', 'ȑ'),
+ ('ȓ', 'ȓ'),
+ ('ȕ', 'ȕ'),
+ ('ȗ', 'ȗ'),
+ ('ș', 'ș'),
+ ('ț', 'ț'),
+ ('ȝ', 'ȝ'),
+ ('ȟ', 'ȟ'),
+ ('ČĄ', 'ČĄ'),
+ ('ČŖ', 'ČŖ'),
+ ('ČĨ', 'ČĨ'),
+ ('ȧ', 'ȧ'),
+ ('ČŠ', 'ČŠ'),
+ ('ČĢ', 'ČĢ'),
+ ('Č­', 'Č­'),
+ ('Č¯', 'Č¯'),
+ ('Čą', 'Čą'),
+ ('Čŗ', 'Čš'),
+ ('Čŧ', 'Čŧ'),
+ ('Čŋ', 'ɀ'),
+ ('ɂ', 'ɂ'),
+ ('ɇ', 'ɇ'),
+ ('ɉ', 'ɉ'),
+ ('ɋ', 'ɋ'),
+ ('ɍ', 'ɍ'),
+ ('ɏ', 'ʓ'),
+ ('ʕ', 'Ę¯'),
+ ('Íą', 'Íą'),
+ ('Íŗ', 'Íŗ'),
+ ('͡', '͡'),
+ ('Íģ', 'ÍŊ'),
+ ('ΐ', 'ΐ'),
+ ('ÎŦ', 'ĪŽ'),
+ ('Ī', 'Ī‘'),
+ ('Ī•', 'Ī—'),
+ ('Ī™', 'Ī™'),
+ ('Ī›', 'Ī›'),
+ ('Ī', 'Ī'),
+ ('ĪŸ', 'ĪŸ'),
+ ('ĪĄ', 'ĪĄ'),
+ ('ĪŖ', 'ĪŖ'),
+ ('ĪĨ', 'ĪĨ'),
+ ('Ī§', 'Ī§'),
+ ('ĪŠ', 'ĪŠ'),
+ ('ĪĢ', 'ĪĢ'),
+ ('Ī­', 'Ī­'),
+ ('Ī¯', 'Īŗ'),
+ ('Īĩ', 'Īĩ'),
+ ('Ī¸', 'Ī¸'),
+ ('Īģ', 'Īŧ'),
+ ('Đ°', 'ŅŸ'),
+ ('ŅĄ', 'ŅĄ'),
+ ('ŅŖ', 'ŅŖ'),
+ ('ŅĨ', 'ŅĨ'),
+ ('Ņ§', 'Ņ§'),
+ ('ŅŠ', 'ŅŠ'),
+ ('ŅĢ', 'ŅĢ'),
+ ('Ņ­', 'Ņ­'),
+ ('Ņ¯', 'Ņ¯'),
+ ('Ņą', 'Ņą'),
+ ('Ņŗ', 'Ņŗ'),
+ ('Ņĩ', 'Ņĩ'),
+ ('Ņˇ', 'Ņˇ'),
+ ('Ņš', 'Ņš'),
+ ('Ņģ', 'Ņģ'),
+ ('ŅŊ', 'ŅŊ'),
+ ('Ņŋ', 'Ņŋ'),
+ ('Ō', 'Ō'),
+ ('Ō‹', 'Ō‹'),
+ ('Ō', 'Ō'),
+ ('Ō', 'Ō'),
+ ('Ō‘', 'Ō‘'),
+ ('Ō“', 'Ō“'),
+ ('Ō•', 'Ō•'),
+ ('Ō—', 'Ō—'),
+ ('Ō™', 'Ō™'),
+ ('Ō›', 'Ō›'),
+ ('Ō', 'Ō'),
+ ('ŌŸ', 'ŌŸ'),
+ ('ŌĄ', 'ŌĄ'),
+ ('ŌŖ', 'ŌŖ'),
+ ('ŌĨ', 'ŌĨ'),
+ ('Ō§', 'Ō§'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌĢ', 'ŌĢ'),
+ ('Ō­', 'Ō­'),
+ ('Ō¯', 'Ō¯'),
+ ('Ōą', 'Ōą'),
+ ('Ōŗ', 'Ōŗ'),
+ ('Ōĩ', 'Ōĩ'),
+ ('Ōˇ', 'Ōˇ'),
+ ('Ōš', 'Ōš'),
+ ('Ōģ', 'Ōģ'),
+ ('ŌŊ', 'ŌŊ'),
+ ('Ōŋ', 'Ōŋ'),
+ ('Ķ‚', 'Ķ‚'),
+ ('Ķ„', 'Ķ„'),
+ ('Ķ†', 'Ķ†'),
+ ('Ķˆ', 'Ķˆ'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶŒ', 'ĶŒ'),
+ ('ĶŽ', 'Ķ'),
+ ('Ķ‘', 'Ķ‘'),
+ ('Ķ“', 'Ķ“'),
+ ('Ķ•', 'Ķ•'),
+ ('Ķ—', 'Ķ—'),
+ ('Ķ™', 'Ķ™'),
+ ('Ķ›', 'Ķ›'),
+ ('Ķ', 'Ķ'),
+ ('ĶŸ', 'ĶŸ'),
+ ('ĶĄ', 'ĶĄ'),
+ ('ĶŖ', 'ĶŖ'),
+ ('ĶĨ', 'ĶĨ'),
+ ('Ķ§', 'Ķ§'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶĢ', 'ĶĢ'),
+ ('Ķ­', 'Ķ­'),
+ ('Ķ¯', 'Ķ¯'),
+ ('Ķą', 'Ķą'),
+ ('Ķŗ', 'Ķŗ'),
+ ('Ķĩ', 'Ķĩ'),
+ ('Ķˇ', 'Ķˇ'),
+ ('Ķš', 'Ķš'),
+ ('Ķģ', 'Ķģ'),
+ ('ĶŊ', 'ĶŊ'),
+ ('Ķŋ', 'Ķŋ'),
+ ('ԁ', 'ԁ'),
+ ('ԃ', 'ԃ'),
+ ('ԅ', 'ԅ'),
+ ('ԇ', 'ԇ'),
+ ('ԉ', 'ԉ'),
+ ('ԋ', 'ԋ'),
+ ('ԍ', 'ԍ'),
+ ('ԏ', 'ԏ'),
+ ('ԑ', 'ԑ'),
+ ('ԓ', 'ԓ'),
+ ('ԕ', 'ԕ'),
+ ('ԗ', 'ԗ'),
+ ('ԙ', 'ԙ'),
+ ('ԛ', 'ԛ'),
+ ('ԝ', 'ԝ'),
+ ('ԟ', 'ԟ'),
+ ('ÔĄ', 'ÔĄ'),
+ ('ÔŖ', 'ÔŖ'),
+ ('ÔĨ', 'ÔĨ'),
+ ('Ô§', 'Ô§'),
+ ('ÔŠ', 'ÔŠ'),
+ ('ÔĢ', 'ÔĢ'),
+ ('Ô­', 'Ô­'),
+ ('Ô¯', 'Ô¯'),
+ ('ՠ', 'ֈ'),
+ ('ა', 'áƒē'),
+ ('áƒŊ', 'áƒŋ'),
+ ('ᏸ', 'áŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('ᴀ', 'á´Ģ'),
+ ('áĩĢ', 'áĩˇ'),
+ ('áĩš', 'áļš'),
+ ('ḁ', 'ḁ'),
+ ('ḃ', 'ḃ'),
+ ('ḅ', 'ḅ'),
+ ('ḇ', 'ḇ'),
+ ('ḉ', 'ḉ'),
+ ('ḋ', 'ḋ'),
+ ('ḍ', 'ḍ'),
+ ('ḏ', 'ḏ'),
+ ('ḑ', 'ḑ'),
+ ('ḓ', 'ḓ'),
+ ('ḕ', 'ḕ'),
+ ('ḗ', 'ḗ'),
+ ('ḙ', 'ḙ'),
+ ('ḛ', 'ḛ'),
+ ('ḝ', 'ḝ'),
+ ('ḟ', 'ḟ'),
+ ('ḥ', 'ḥ'),
+ ('á¸Ŗ', 'á¸Ŗ'),
+ ('á¸Ĩ', 'á¸Ĩ'),
+ ('ḧ', 'ḧ'),
+ ('Ḋ', 'Ḋ'),
+ ('á¸Ģ', 'á¸Ģ'),
+ ('ḭ', 'ḭ'),
+ ('ḯ', 'ḯ'),
+ ('ḹ', 'ḹ'),
+ ('á¸ŗ', 'á¸ŗ'),
+ ('á¸ĩ', 'á¸ĩ'),
+ ('ḡ', 'ḡ'),
+ ('Ḛ', 'Ḛ'),
+ ('á¸ģ', 'á¸ģ'),
+ ('á¸Ŋ', 'á¸Ŋ'),
+ ('á¸ŋ', 'á¸ŋ'),
+ ('ᚁ', 'ᚁ'),
+ ('ᚃ', 'ᚃ'),
+ ('ṅ', 'ṅ'),
+ ('ṇ', 'ṇ'),
+ ('ṉ', 'ṉ'),
+ ('ṋ', 'ṋ'),
+ ('ᚍ', 'ᚍ'),
+ ('ᚏ', 'ᚏ'),
+ ('ṑ', 'ṑ'),
+ ('ṓ', 'ṓ'),
+ ('ṕ', 'ṕ'),
+ ('ṗ', 'ṗ'),
+ ('ṙ', 'ṙ'),
+ ('ṛ', 'ṛ'),
+ ('᚝', '᚝'),
+ ('ṟ', 'ṟ'),
+ ('ᚥ', 'ᚥ'),
+ ('ášŖ', 'ášŖ'),
+ ('ášĨ', 'ášĨ'),
+ ('ᚧ', 'ᚧ'),
+ ('ᚊ', 'ᚊ'),
+ ('ášĢ', 'ášĢ'),
+ ('áš­', 'áš­'),
+ ('ᚯ', 'ᚯ'),
+ ('ášą', 'ášą'),
+ ('ášŗ', 'ášŗ'),
+ ('ášĩ', 'ášĩ'),
+ ('ᚡ', 'ᚡ'),
+ ('ášš', 'ášš'),
+ ('ášģ', 'ášģ'),
+ ('ášŊ', 'ášŊ'),
+ ('ášŋ', 'ášŋ'),
+ ('áē', 'áē'),
+ ('áēƒ', 'áēƒ'),
+ ('áē…', 'áē…'),
+ ('áē‡', 'áē‡'),
+ ('áē‰', 'áē‰'),
+ ('áē‹', 'áē‹'),
+ ('áē', 'áē'),
+ ('áē', 'áē'),
+ ('áē‘', 'áē‘'),
+ ('áē“', 'áē“'),
+ ('áē•', 'áē'),
+ ('áēŸ', 'áēŸ'),
+ ('áēĄ', 'áēĄ'),
+ ('áēŖ', 'áēŖ'),
+ ('áēĨ', 'áēĨ'),
+ ('áē§', 'áē§'),
+ ('áēŠ', 'áēŠ'),
+ ('áēĢ', 'áēĢ'),
+ ('áē­', 'áē­'),
+ ('áē¯', 'áē¯'),
+ ('áēą', 'áēą'),
+ ('áēŗ', 'áēŗ'),
+ ('áēĩ', 'áēĩ'),
+ ('áēˇ', 'áēˇ'),
+ ('áēš', 'áēš'),
+ ('áēģ', 'áēģ'),
+ ('áēŊ', 'áēŊ'),
+ ('áēŋ', 'áēŋ'),
+ ('áģ', 'áģ'),
+ ('áģƒ', 'áģƒ'),
+ ('áģ…', 'áģ…'),
+ ('áģ‡', 'áģ‡'),
+ ('áģ‰', 'áģ‰'),
+ ('áģ‹', 'áģ‹'),
+ ('áģ', 'áģ'),
+ ('áģ', 'áģ'),
+ ('áģ‘', 'áģ‘'),
+ ('áģ“', 'áģ“'),
+ ('áģ•', 'áģ•'),
+ ('áģ—', 'áģ—'),
+ ('áģ™', 'áģ™'),
+ ('áģ›', 'áģ›'),
+ ('áģ', 'áģ'),
+ ('áģŸ', 'áģŸ'),
+ ('áģĄ', 'áģĄ'),
+ ('áģŖ', 'áģŖ'),
+ ('áģĨ', 'áģĨ'),
+ ('áģ§', 'áģ§'),
+ ('áģŠ', 'áģŠ'),
+ ('áģĢ', 'áģĢ'),
+ ('áģ­', 'áģ­'),
+ ('áģ¯', 'áģ¯'),
+ ('áģą', 'áģą'),
+ ('áģŗ', 'áģŗ'),
+ ('áģĩ', 'áģĩ'),
+ ('áģˇ', 'áģˇ'),
+ ('áģš', 'áģš'),
+ ('áģģ', 'áģģ'),
+ ('áģŊ', 'áģŊ'),
+ ('áģŋ', 'áŧ‡'),
+ ('áŧ', 'áŧ•'),
+ ('áŧ ', 'áŧ§'),
+ ('áŧ°', 'áŧˇ'),
+ ('áŊ€', 'áŊ…'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ ', 'áŊ§'),
+ ('áŊ°', 'áŊŊ'),
+ ('ᾀ', 'ᾇ'),
+ ('ᾐ', 'ᾗ'),
+ ('ហ', 'ឧ'),
+ ('áž°', 'áž´'),
+ ('ážļ', 'ឡ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋ‡'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ—'),
+ ('áŋ ', 'áŋ§'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋˇ'),
+ ('ℊ', 'ℊ'),
+ ('ℎ', 'ℏ'),
+ ('ℓ', 'ℓ'),
+ ('ℯ', 'ℯ'),
+ ('ℴ', 'ℴ'),
+ ('ℹ', 'ℹ'),
+ ('â„ŧ', 'â„Ŋ'),
+ ('ⅆ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('ↄ', 'ↄ'),
+ ('ⰰ', 'ⱞ'),
+ ('⹥', '⹥'),
+ ('âąĨ', 'âąĻ'),
+ ('⹨', '⹨'),
+ ('âąĒ', 'âąĒ'),
+ ('âąŦ', 'âąŦ'),
+ ('âąą', 'âąą'),
+ ('âąŗ', 'âą´'),
+ ('âąļ', 'âąģ'),
+ ('ⲁ', 'ⲁ'),
+ ('ⲃ', 'ⲃ'),
+ ('ⲅ', 'ⲅ'),
+ ('ⲇ', 'ⲇ'),
+ ('ⲉ', 'ⲉ'),
+ ('ⲋ', 'ⲋ'),
+ ('ⲍ', 'ⲍ'),
+ ('ⲏ', 'ⲏ'),
+ ('ⲑ', 'ⲑ'),
+ ('ⲓ', 'ⲓ'),
+ ('ⲕ', 'ⲕ'),
+ ('ⲗ', 'ⲗ'),
+ ('ⲙ', 'ⲙ'),
+ ('ⲛ', 'ⲛ'),
+ ('ⲝ', 'ⲝ'),
+ ('ⲟ', 'ⲟ'),
+ ('ⲥ', 'ⲥ'),
+ ('â˛Ŗ', 'â˛Ŗ'),
+ ('â˛Ĩ', 'â˛Ĩ'),
+ ('ⲧ', 'ⲧ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('â˛Ģ', 'â˛Ģ'),
+ ('ⲭ', 'ⲭ'),
+ ('â˛¯', 'â˛¯'),
+ ('ⲹ', 'ⲹ'),
+ ('â˛ŗ', 'â˛ŗ'),
+ ('â˛ĩ', 'â˛ĩ'),
+ ('ⲡ', 'ⲡ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('â˛ģ', 'â˛ģ'),
+ ('â˛Ŋ', 'â˛Ŋ'),
+ ('â˛ŋ', 'â˛ŋ'),
+ ('âŗ', 'âŗ'),
+ ('âŗƒ', 'âŗƒ'),
+ ('âŗ…', 'âŗ…'),
+ ('âŗ‡', 'âŗ‡'),
+ ('âŗ‰', 'âŗ‰'),
+ ('âŗ‹', 'âŗ‹'),
+ ('âŗ', 'âŗ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ‘', 'âŗ‘'),
+ ('âŗ“', 'âŗ“'),
+ ('âŗ•', 'âŗ•'),
+ ('âŗ—', 'âŗ—'),
+ ('âŗ™', 'âŗ™'),
+ ('âŗ›', 'âŗ›'),
+ ('âŗ', 'âŗ'),
+ ('âŗŸ', 'âŗŸ'),
+ ('âŗĄ', 'âŗĄ'),
+ ('âŗŖ', 'âŗ¤'),
+ ('âŗŦ', 'âŗŦ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗŗ', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('ꙁ', 'ꙁ'),
+ ('ꙃ', 'ꙃ'),
+ ('ꙅ', 'ꙅ'),
+ ('ꙇ', 'ꙇ'),
+ ('ꙉ', 'ꙉ'),
+ ('ꙋ', 'ꙋ'),
+ ('ꙍ', 'ꙍ'),
+ ('ꙏ', 'ꙏ'),
+ ('ꙑ', 'ꙑ'),
+ ('ꙓ', 'ꙓ'),
+ ('ꙕ', 'ꙕ'),
+ ('ꙗ', 'ꙗ'),
+ ('ꙙ', 'ꙙ'),
+ ('ꙛ', 'ꙛ'),
+ ('ꙝ', 'ꙝ'),
+ ('ꙟ', 'ꙟ'),
+ ('ꙡ', 'ꙡ'),
+ ('ę™Ŗ', 'ę™Ŗ'),
+ ('ę™Ĩ', 'ę™Ĩ'),
+ ('ꙧ', 'ꙧ'),
+ ('ꙩ', 'ꙩ'),
+ ('ę™Ģ', 'ę™Ģ'),
+ ('ꙭ', 'ꙭ'),
+ ('ꚁ', 'ꚁ'),
+ ('ꚃ', 'ꚃ'),
+ ('ꚅ', 'ꚅ'),
+ ('ꚇ', 'ꚇ'),
+ ('ꚉ', 'ꚉ'),
+ ('ꚋ', 'ꚋ'),
+ ('ꚍ', 'ꚍ'),
+ ('ꚏ', 'ꚏ'),
+ ('ꚑ', 'ꚑ'),
+ ('ꚓ', 'ꚓ'),
+ ('ꚕ', 'ꚕ'),
+ ('ꚗ', 'ꚗ'),
+ ('ꚙ', 'ꚙ'),
+ ('ꚛ', 'ꚛ'),
+ ('ęœŖ', 'ęœŖ'),
+ ('ęœĨ', 'ęœĨ'),
+ ('ꜧ', 'ꜧ'),
+ ('ꜩ', 'ꜩ'),
+ ('ęœĢ', 'ęœĢ'),
+ ('ꜭ', 'ꜭ'),
+ ('ęœ¯', 'ꜱ'),
+ ('ęœŗ', 'ęœŗ'),
+ ('ęœĩ', 'ęœĩ'),
+ ('ꜷ', 'ꜷ'),
+ ('ꜹ', 'ꜹ'),
+ ('ęœģ', 'ęœģ'),
+ ('ęœŊ', 'ęœŊ'),
+ ('ęœŋ', 'ęœŋ'),
+ ('ꝁ', 'ꝁ'),
+ ('ꝃ', 'ꝃ'),
+ ('ꝅ', 'ꝅ'),
+ ('ꝇ', 'ꝇ'),
+ ('ꝉ', 'ꝉ'),
+ ('ꝋ', 'ꝋ'),
+ ('ꝍ', 'ꝍ'),
+ ('ꝏ', 'ꝏ'),
+ ('ꝑ', 'ꝑ'),
+ ('ꝓ', 'ꝓ'),
+ ('ꝕ', 'ꝕ'),
+ ('ꝗ', 'ꝗ'),
+ ('ꝙ', 'ꝙ'),
+ ('ꝛ', 'ꝛ'),
+ ('ꝝ', 'ꝝ'),
+ ('ꝟ', 'ꝟ'),
+ ('ꝡ', 'ꝡ'),
+ ('ęŖ', 'ęŖ'),
+ ('ęĨ', 'ęĨ'),
+ ('ꝧ', 'ꝧ'),
+ ('ꝩ', 'ꝩ'),
+ ('ęĢ', 'ęĢ'),
+ ('ꝭ', 'ꝭ'),
+ ('ę¯', 'ę¯'),
+ ('ꝱ', 'ꝸ'),
+ ('ęē', 'ęē'),
+ ('ęŧ', 'ęŧ'),
+ ('ęŋ', 'ęŋ'),
+ ('ꞁ', 'ꞁ'),
+ ('ꞃ', 'ꞃ'),
+ ('ꞅ', 'ꞅ'),
+ ('ꞇ', 'ꞇ'),
+ ('ꞌ', 'ꞌ'),
+ ('ꞎ', 'ꞎ'),
+ ('ꞑ', 'ꞑ'),
+ ('ꞓ', 'ꞕ'),
+ ('ꞗ', 'ꞗ'),
+ ('ꞙ', 'ꞙ'),
+ ('ꞛ', 'ꞛ'),
+ ('ꞝ', 'ꞝ'),
+ ('ꞟ', 'ꞟ'),
+ ('ꞡ', 'ꞡ'),
+ ('ęžŖ', 'ęžŖ'),
+ ('ęžĨ', 'ęžĨ'),
+ ('ꞧ', 'ꞧ'),
+ ('ꞩ', 'ꞩ'),
+ ('ęž¯', 'ęž¯'),
+ ('ęžĩ', 'ęžĩ'),
+ ('ꞷ', 'ꞷ'),
+ ('ꞹ', 'ꞹ'),
+ ('ęžģ', 'ęžģ'),
+ ('ęžŊ', 'ęžŊ'),
+ ('ęžŋ', 'ęžŋ'),
+ ('ꟃ', 'ꟃ'),
+ ('\u{a7c8}', '\u{a7c8}'),
+ ('\u{a7ca}', '\u{a7ca}'),
+ ('\u{a7f6}', '\u{a7f6}'),
+ ('ęŸē', 'ęŸē'),
+ ('ęŦ°', 'ꭚ'),
+ ('ę­ ', '\u{ab68}'),
+ ('ę­°', 'ęŽŋ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŊ', 'īŊš'),
+ ('𐐨', '𐑏'),
+ ('𐓘', 'đ“ģ'),
+ ('đŗ€', 'đŗ˛'),
+ ('đ‘Ŗ€', 'đ‘ŖŸ'),
+ ('𖹠', 'đ–šŋ'),
+ ('𝐚', 'đŗ'),
+ ('𝑎', '𝑔'),
+ ('𝑖', '𝑧'),
+ ('𝒂', '𝒛'),
+ ('đ’ļ', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝓏'),
+ ('đ“Ē', '𝔃'),
+ ('𝔞', '𝔷'),
+ ('𝕒', 'đ•Ģ'),
+ ('𝖆', '𝖟'),
+ ('đ–ē', '𝗓'),
+ ('𝗮', '𝘇'),
+ ('đ˜ĸ', 'đ˜ģ'),
+ ('𝙖', 'đ™¯'),
+ ('𝚊', 'đšĨ'),
+ ('𝛂', '𝛚'),
+ ('𝛜', '𝛡'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜛'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝕'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞏'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟉'),
+ ('𝟋', '𝟋'),
+ ('đž¤ĸ', 'đžĨƒ'),
+];
+
+pub const MARK: &'static [(char, char)] = &[
+ ('\u{300}', '\u{36f}'),
+ ('\u{483}', '\u{489}'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('\u{610}', '\u{61a}'),
+ ('\u{64b}', '\u{65f}'),
+ ('\u{670}', '\u{670}'),
+ ('\u{6d6}', '\u{6dc}'),
+ ('\u{6df}', '\u{6e4}'),
+ ('\u{6e7}', '\u{6e8}'),
+ ('\u{6ea}', '\u{6ed}'),
+ ('\u{711}', '\u{711}'),
+ ('\u{730}', '\u{74a}'),
+ ('\u{7a6}', '\u{7b0}'),
+ ('\u{7eb}', '\u{7f3}'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('\u{816}', '\u{819}'),
+ ('\u{81b}', '\u{823}'),
+ ('\u{825}', '\u{827}'),
+ ('\u{829}', '\u{82d}'),
+ ('\u{859}', '\u{85b}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', 'ā¤ƒ'),
+ ('\u{93a}', '\u{93c}'),
+ ('ā¤ž', 'āĨ'),
+ ('\u{951}', '\u{957}'),
+ ('\u{962}', '\u{963}'),
+ ('\u{981}', 'āĻƒ'),
+ ('\u{9bc}', '\u{9bc}'),
+ ('\u{9be}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', '\u{9cd}'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('\u{9e2}', '\u{9e3}'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', 'ā¨ƒ'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('\u{a70}', '\u{a71}'),
+ ('\u{a75}', '\u{a75}'),
+ ('\u{a81}', 'āĒƒ'),
+ ('\u{abc}', '\u{abc}'),
+ ('āĒž', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', '\u{acd}'),
+ ('\u{ae2}', '\u{ae3}'),
+ ('\u{afa}', '\u{aff}'),
+ ('\u{b01}', 'āŦƒ'),
+ ('\u{b3c}', '\u{b3c}'),
+ ('\u{b3e}', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('\u{b62}', '\u{b63}'),
+ ('\u{b82}', '\u{b82}'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', '\u{bcd}'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('\u{c00}', '\u{c04}'),
+ ('\u{c3e}', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('\u{c62}', '\u{c63}'),
+ ('\u{c81}', 'ā˛ƒ'),
+ ('\u{cbc}', '\u{cbc}'),
+ ('ā˛ž', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('\u{ce2}', '\u{ce3}'),
+ ('\u{d00}', 'ā´ƒ'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('\u{d3e}', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', '\u{d4d}'),
+ ('\u{d57}', '\u{d57}'),
+ ('\u{d62}', '\u{d63}'),
+ ('\u{d81}', 'āļƒ'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇ˛', 'āˇŗ'),
+ ('\u{e31}', '\u{e31}'),
+ ('\u{e34}', '\u{e3a}'),
+ ('\u{e47}', '\u{e4e}'),
+ ('\u{eb1}', '\u{eb1}'),
+ ('\u{eb4}', '\u{ebc}'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('\u{f18}', '\u{f19}'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('āŧž', 'āŧŋ'),
+ ('\u{f71}', '\u{f84}'),
+ ('\u{f86}', '\u{f87}'),
+ ('\u{f8d}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('á€Ģ', '\u{103e}'),
+ ('ၖ', '\u{1059}'),
+ ('\u{105e}', '\u{1060}'),
+ ('áĸ', 'ၤ'),
+ ('ၧ', 'ၭ'),
+ ('\u{1071}', '\u{1074}'),
+ ('\u{1082}', '\u{108d}'),
+ ('ႏ', 'ႏ'),
+ ('ႚ', '\u{109d}'),
+ ('\u{135d}', '\u{135f}'),
+ ('\u{1712}', '\u{1714}'),
+ ('\u{1732}', '\u{1734}'),
+ ('\u{1752}', '\u{1753}'),
+ ('\u{1772}', '\u{1773}'),
+ ('\u{17b4}', '\u{17d3}'),
+ ('\u{17dd}', '\u{17dd}'),
+ ('\u{180b}', '\u{180d}'),
+ ('\u{1885}', '\u{1886}'),
+ ('\u{18a9}', '\u{18a9}'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', '\u{193b}'),
+ ('\u{1a17}', '\u{1a1b}'),
+ ('ᩕ', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a7c}'),
+ ('\u{1a7f}', '\u{1a7f}'),
+ ('\u{1ab0}', '\u{1ac0}'),
+ ('\u{1b00}', 'áŦ„'),
+ ('\u{1b34}', '᭄'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', 'ᮂ'),
+ ('Ꭵ', '\u{1bad}'),
+ ('\u{1be6}', 'á¯ŗ'),
+ ('á°¤', '\u{1c37}'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', '\u{1ce8}'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('áŗˇ', '\u{1cf9}'),
+ ('\u{1dc0}', '\u{1df9}'),
+ ('\u{1dfb}', '\u{1dff}'),
+ ('\u{20d0}', '\u{20f0}'),
+ ('\u{2cef}', '\u{2cf1}'),
+ ('\u{2d7f}', '\u{2d7f}'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('\u{302a}', '\u{302f}'),
+ ('\u{3099}', '\u{309a}'),
+ ('\u{a66f}', '\u{a672}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('\u{a69e}', '\u{a69f}'),
+ ('\u{a6f0}', '\u{a6f1}'),
+ ('\u{a802}', '\u{a802}'),
+ ('\u{a806}', '\u{a806}'),
+ ('\u{a80b}', '\u{a80b}'),
+ ('ę Ŗ', 'ę §'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('ęĸ€', 'ęĸ'),
+ ('ęĸ´', '\u{a8c5}'),
+ ('\u{a8e0}', '\u{a8f1}'),
+ ('\u{a8ff}', '\u{a8ff}'),
+ ('\u{a926}', '\u{a92d}'),
+ ('\u{a947}', 'ęĨ“'),
+ ('\u{a980}', 'ęĻƒ'),
+ ('\u{a9b3}', '꧀'),
+ ('\u{a9e5}', '\u{a9e5}'),
+ ('\u{aa29}', '\u{aa36}'),
+ ('\u{aa43}', '\u{aa43}'),
+ ('\u{aa4c}', 'ꩍ'),
+ ('ęŠģ', 'ęŠŊ'),
+ ('\u{aab0}', '\u{aab0}'),
+ ('\u{aab2}', '\u{aab4}'),
+ ('\u{aab7}', '\u{aab8}'),
+ ('\u{aabe}', '\u{aabf}'),
+ ('\u{aac1}', '\u{aac1}'),
+ ('ęĢĢ', 'ęĢ¯'),
+ ('ęĢĩ', '\u{aaf6}'),
+ ('ę¯Ŗ', 'ę¯Ē'),
+ ('ę¯Ŧ', '\u{abed}'),
+ ('\u{fb1e}', '\u{fb1e}'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('\u{10376}', '\u{1037a}'),
+ ('\u{10a01}', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '\u{10a0f}'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('\u{10ae5}', '\u{10ae6}'),
+ ('\u{10d24}', '\u{10d27}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10f46}', '\u{10f50}'),
+ ('𑀀', '𑀂'),
+ ('\u{11038}', '\u{11046}'),
+ ('\u{1107f}', '𑂂'),
+ ('𑂰', '\u{110ba}'),
+ ('\u{11100}', '\u{11102}'),
+ ('\u{11127}', '\u{11134}'),
+ ('𑅅', '𑅆'),
+ ('\u{11173}', '\u{11173}'),
+ ('\u{11180}', '𑆂'),
+ ('đ‘†ŗ', '𑇀'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111ce}', '\u{111cf}'),
+ ('đ‘ˆŦ', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('\u{112df}', '\u{112ea}'),
+ ('\u{11300}', '𑌃'),
+ ('\u{1133b}', '\u{1133c}'),
+ ('\u{1133e}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('\u{11357}', '\u{11357}'),
+ ('đ‘ĸ', 'đ‘Ŗ'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('đ‘ĩ', '\u{11446}'),
+ ('\u{1145e}', '\u{1145e}'),
+ ('\u{114b0}', '\u{114c3}'),
+ ('\u{115af}', '\u{115b5}'),
+ ('𑖸', '\u{115c0}'),
+ ('\u{115dc}', '\u{115dd}'),
+ ('𑘰', '\u{11640}'),
+ ('\u{116ab}', '\u{116b7}'),
+ ('\u{1171d}', '\u{1172b}'),
+ ('đ‘ Ŧ', '\u{1183a}'),
+ ('\u{11930}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{1193e}'),
+ ('\u{11940}', '\u{11940}'),
+ ('\u{11942}', '\u{11943}'),
+ ('𑧑', '\u{119d7}'),
+ ('\u{119da}', '\u{119e0}'),
+ ('𑧤', '𑧤'),
+ ('\u{11a01}', '\u{11a0a}'),
+ ('\u{11a33}', '𑨹'),
+ ('\u{11a3b}', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('\u{11a51}', '\u{11a5b}'),
+ ('\u{11a8a}', '\u{11a99}'),
+ ('đ‘°¯', '\u{11c36}'),
+ ('\u{11c38}', '\u{11c3f}'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('𑲩', '\u{11cb6}'),
+ ('\u{11d31}', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d45}'),
+ ('\u{11d47}', '\u{11d47}'),
+ ('đ‘ļŠ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', '\u{11d97}'),
+ ('\u{11ef3}', 'đ‘ģļ'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('\u{16b30}', '\u{16b36}'),
+ ('\u{16f4f}', '\u{16f4f}'),
+ ('đ–Ŋ‘', '𖾇'),
+ ('\u{16f8f}', '\u{16f92}'),
+ ('\u{16fe4}', '\u{16fe4}'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1d165}', '\u{1d169}'),
+ ('𝅭', '\u{1d172}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('\u{1e130}', '\u{1e136}'),
+ ('\u{1e2ec}', '\u{1e2ef}'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('\u{1e944}', '\u{1e94a}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const MATH_SYMBOL: &'static [(char, char)] = &[
+ ('+', '+'),
+ ('<', '>'),
+ ('|', '|'),
+ ('~', '~'),
+ ('ÂŦ', 'ÂŦ'),
+ ('Âą', 'Âą'),
+ ('×', '×'),
+ ('Ãˇ', 'Ãˇ'),
+ ('Īļ', 'Īļ'),
+ ('؆', '؈'),
+ ('⁄', '⁄'),
+ ('⁒', '⁒'),
+ ('âē', 'âŧ'),
+ ('₊', '₌'),
+ ('℘', '℘'),
+ ('⅀', '⅄'),
+ ('⅋', '⅋'),
+ ('←', '↔'),
+ ('↚', '↛'),
+ ('↠', '↠'),
+ ('â†Ŗ', 'â†Ŗ'),
+ ('â†Ļ', 'â†Ļ'),
+ ('↮', '↮'),
+ ('⇎', '⇏'),
+ ('⇒', '⇒'),
+ ('⇔', '⇔'),
+ ('⇴', 'â‹ŋ'),
+ ('⌠', '⌡'),
+ ('âŧ', 'âŧ'),
+ ('⎛', 'âŽŗ'),
+ ('⏜', '⏡'),
+ ('▷', '▷'),
+ ('◁', '◁'),
+ ('◸', 'â—ŋ'),
+ ('♯', '♯'),
+ ('⟀', '⟄'),
+ ('⟇', 'âŸĨ'),
+ ('⟰', 'âŸŋ'),
+ ('⤀', 'âĻ‚'),
+ ('âĻ™', '⧗'),
+ ('⧜', 'â§ģ'),
+ ('⧞', 'âĢŋ'),
+ ('âŦ°', '⭄'),
+ ('⭇', '⭌'),
+ ('īŦŠ', 'īŦŠ'),
+ ('īšĸ', 'īšĸ'),
+ ('īš¤', 'īšĻ'),
+ ('īŧ‹', 'īŧ‹'),
+ ('īŧœ', 'īŧž'),
+ ('īŊœ', 'īŊœ'),
+ ('īŊž', 'īŊž'),
+ ('īŋĸ', 'īŋĸ'),
+ ('īŋŠ', 'īŋŦ'),
+ ('𝛁', '𝛁'),
+ ('𝛛', '𝛛'),
+ ('đ›ģ', 'đ›ģ'),
+ ('𝜕', '𝜕'),
+ ('đœĩ', 'đœĩ'),
+ ('𝝏', '𝝏'),
+ ('đ¯', 'đ¯'),
+ ('𝞉', '𝞉'),
+ ('𝞩', '𝞩'),
+ ('𝟃', '𝟃'),
+ ('đžģ°', 'đžģą'),
+];
+
+pub const MODIFIER_LETTER: &'static [(char, char)] = &[
+ ('ʰ', 'ˁ'),
+ ('ˆ', 'ˑ'),
+ ('ˠ', 'ˤ'),
+ ('ËŦ', 'ËŦ'),
+ ('ËŽ', 'ËŽ'),
+ ('Í´', 'Í´'),
+ ('Íē', 'Íē'),
+ ('ՙ', 'ՙ'),
+ ('Ų€', 'Ų€'),
+ ('ÛĨ', 'ÛĻ'),
+ ('ß´', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('ā š', 'ā š'),
+ ('ā ¤', 'ā ¤'),
+ ('ā ¨', 'ā ¨'),
+ ('āĨą', 'āĨą'),
+ ('āš†', 'āš†'),
+ ('āģ†', 'āģ†'),
+ ('áƒŧ', 'áƒŧ'),
+ ('ៗ', 'ៗ'),
+ ('᥃', '᥃'),
+ ('áĒ§', 'áĒ§'),
+ ('Ṹ', 'áąŊ'),
+ ('á´Ŧ', 'áĩĒ'),
+ ('áĩ¸', 'áĩ¸'),
+ ('áļ›', 'áļŋ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('âąŧ', 'âąŊ'),
+ ('âĩ¯', 'âĩ¯'),
+ ('ⸯ', 'ⸯ'),
+ ('々', '々'),
+ ('ã€ą', 'ã€ĩ'),
+ ('ã€ģ', 'ã€ģ'),
+ ('ゝ', 'ゞ'),
+ ('ãƒŧ', 'マ'),
+ ('ꀕ', 'ꀕ'),
+ ('ꓸ', 'ę“Ŋ'),
+ ('ꘌ', 'ꘌ'),
+ ('ę™ŋ', 'ę™ŋ'),
+ ('ꚜ', 'ꚝ'),
+ ('ꜗ', 'ꜟ'),
+ ('ꝰ', 'ꝰ'),
+ ('ꞈ', 'ꞈ'),
+ ('ꟸ', 'ꟹ'),
+ ('ꧏ', 'ꧏ'),
+ ('ę§Ļ', 'ę§Ļ'),
+ ('ꊰ', 'ꊰ'),
+ ('ęĢ', 'ęĢ'),
+ ('ęĢŗ', 'ęĢ´'),
+ ('ꭜ', 'ꭟ'),
+ ('\u{ab69}', '\u{ab69}'),
+ ('īŊ°', 'īŊ°'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('𖭀', '𖭃'),
+ ('𖾓', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', 'đ–ŋŖ'),
+ ('𞄷', 'đž„Ŋ'),
+ ('đžĨ‹', 'đžĨ‹'),
+];
+
+pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[
+ ('^', '^'),
+ ('`', '`'),
+ ('¨', '¨'),
+ ('¯', '¯'),
+ ('´', '´'),
+ ('¸', '¸'),
+ ('˂', '˅'),
+ ('˒', '˟'),
+ ('ËĨ', 'ËĢ'),
+ ('Ë­', 'Ë­'),
+ ('˯', 'Ëŋ'),
+ ('Íĩ', 'Íĩ'),
+ ('΄', '΅'),
+ ('ážŊ', 'ážŊ'),
+ ('ážŋ', 'áŋ'),
+ ('áŋ', 'áŋ'),
+ ('áŋ', 'áŋŸ'),
+ ('áŋ­', 'áŋ¯'),
+ ('áŋŊ', 'áŋž'),
+ ('゛', '゜'),
+ ('꜀', '꜖'),
+ ('꜠', '꜡'),
+ ('꞉', '꞊'),
+ ('꭛', '꭛'),
+ ('\u{ab6a}', '\u{ab6b}'),
+ ('īŽ˛', 'ī¯'),
+ ('īŧž', 'īŧž'),
+ ('īŊ€', 'īŊ€'),
+ ('īŋŖ', 'īŋŖ'),
+ ('đŸģ', 'đŸŋ'),
+];
+
+pub const NONSPACING_MARK: &'static [(char, char)] = &[
+ ('\u{300}', '\u{36f}'),
+ ('\u{483}', '\u{487}'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('\u{610}', '\u{61a}'),
+ ('\u{64b}', '\u{65f}'),
+ ('\u{670}', '\u{670}'),
+ ('\u{6d6}', '\u{6dc}'),
+ ('\u{6df}', '\u{6e4}'),
+ ('\u{6e7}', '\u{6e8}'),
+ ('\u{6ea}', '\u{6ed}'),
+ ('\u{711}', '\u{711}'),
+ ('\u{730}', '\u{74a}'),
+ ('\u{7a6}', '\u{7b0}'),
+ ('\u{7eb}', '\u{7f3}'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('\u{816}', '\u{819}'),
+ ('\u{81b}', '\u{823}'),
+ ('\u{825}', '\u{827}'),
+ ('\u{829}', '\u{82d}'),
+ ('\u{859}', '\u{85b}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', '\u{902}'),
+ ('\u{93a}', '\u{93a}'),
+ ('\u{93c}', '\u{93c}'),
+ ('\u{941}', '\u{948}'),
+ ('\u{94d}', '\u{94d}'),
+ ('\u{951}', '\u{957}'),
+ ('\u{962}', '\u{963}'),
+ ('\u{981}', '\u{981}'),
+ ('\u{9bc}', '\u{9bc}'),
+ ('\u{9c1}', '\u{9c4}'),
+ ('\u{9cd}', '\u{9cd}'),
+ ('\u{9e2}', '\u{9e3}'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', '\u{a02}'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('\u{a41}', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('\u{a70}', '\u{a71}'),
+ ('\u{a75}', '\u{a75}'),
+ ('\u{a81}', '\u{a82}'),
+ ('\u{abc}', '\u{abc}'),
+ ('\u{ac1}', '\u{ac5}'),
+ ('\u{ac7}', '\u{ac8}'),
+ ('\u{acd}', '\u{acd}'),
+ ('\u{ae2}', '\u{ae3}'),
+ ('\u{afa}', '\u{aff}'),
+ ('\u{b01}', '\u{b01}'),
+ ('\u{b3c}', '\u{b3c}'),
+ ('\u{b3f}', '\u{b3f}'),
+ ('\u{b41}', '\u{b44}'),
+ ('\u{b4d}', '\u{b4d}'),
+ ('\u{b55}', '\u{b56}'),
+ ('\u{b62}', '\u{b63}'),
+ ('\u{b82}', '\u{b82}'),
+ ('\u{bc0}', '\u{bc0}'),
+ ('\u{bcd}', '\u{bcd}'),
+ ('\u{c00}', '\u{c00}'),
+ ('\u{c04}', '\u{c04}'),
+ ('\u{c3e}', '\u{c40}'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('\u{c62}', '\u{c63}'),
+ ('\u{c81}', '\u{c81}'),
+ ('\u{cbc}', '\u{cbc}'),
+ ('\u{cbf}', '\u{cbf}'),
+ ('\u{cc6}', '\u{cc6}'),
+ ('\u{ccc}', '\u{ccd}'),
+ ('\u{ce2}', '\u{ce3}'),
+ ('\u{d00}', '\u{d01}'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('\u{d41}', '\u{d44}'),
+ ('\u{d4d}', '\u{d4d}'),
+ ('\u{d62}', '\u{d63}'),
+ ('\u{d81}', '\u{d81}'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dd2}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('\u{e31}', '\u{e31}'),
+ ('\u{e34}', '\u{e3a}'),
+ ('\u{e47}', '\u{e4e}'),
+ ('\u{eb1}', '\u{eb1}'),
+ ('\u{eb4}', '\u{ebc}'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('\u{f18}', '\u{f19}'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('\u{f71}', '\u{f7e}'),
+ ('\u{f80}', '\u{f84}'),
+ ('\u{f86}', '\u{f87}'),
+ ('\u{f8d}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('\u{102d}', '\u{1030}'),
+ ('\u{1032}', '\u{1037}'),
+ ('\u{1039}', '\u{103a}'),
+ ('\u{103d}', '\u{103e}'),
+ ('\u{1058}', '\u{1059}'),
+ ('\u{105e}', '\u{1060}'),
+ ('\u{1071}', '\u{1074}'),
+ ('\u{1082}', '\u{1082}'),
+ ('\u{1085}', '\u{1086}'),
+ ('\u{108d}', '\u{108d}'),
+ ('\u{109d}', '\u{109d}'),
+ ('\u{135d}', '\u{135f}'),
+ ('\u{1712}', '\u{1714}'),
+ ('\u{1732}', '\u{1734}'),
+ ('\u{1752}', '\u{1753}'),
+ ('\u{1772}', '\u{1773}'),
+ ('\u{17b4}', '\u{17b5}'),
+ ('\u{17b7}', '\u{17bd}'),
+ ('\u{17c6}', '\u{17c6}'),
+ ('\u{17c9}', '\u{17d3}'),
+ ('\u{17dd}', '\u{17dd}'),
+ ('\u{180b}', '\u{180d}'),
+ ('\u{1885}', '\u{1886}'),
+ ('\u{18a9}', '\u{18a9}'),
+ ('\u{1920}', '\u{1922}'),
+ ('\u{1927}', '\u{1928}'),
+ ('\u{1932}', '\u{1932}'),
+ ('\u{1939}', '\u{193b}'),
+ ('\u{1a17}', '\u{1a18}'),
+ ('\u{1a1b}', '\u{1a1b}'),
+ ('\u{1a56}', '\u{1a56}'),
+ ('\u{1a58}', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a60}'),
+ ('\u{1a62}', '\u{1a62}'),
+ ('\u{1a65}', '\u{1a6c}'),
+ ('\u{1a73}', '\u{1a7c}'),
+ ('\u{1a7f}', '\u{1a7f}'),
+ ('\u{1ab0}', '\u{1abd}'),
+ ('\u{1abf}', '\u{1ac0}'),
+ ('\u{1b00}', '\u{1b03}'),
+ ('\u{1b34}', '\u{1b34}'),
+ ('\u{1b36}', '\u{1b3a}'),
+ ('\u{1b3c}', '\u{1b3c}'),
+ ('\u{1b42}', '\u{1b42}'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', '\u{1b81}'),
+ ('\u{1ba2}', '\u{1ba5}'),
+ ('\u{1ba8}', '\u{1ba9}'),
+ ('\u{1bab}', '\u{1bad}'),
+ ('\u{1be6}', '\u{1be6}'),
+ ('\u{1be8}', '\u{1be9}'),
+ ('\u{1bed}', '\u{1bed}'),
+ ('\u{1bef}', '\u{1bf1}'),
+ ('\u{1c2c}', '\u{1c33}'),
+ ('\u{1c36}', '\u{1c37}'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', '\u{1ce0}'),
+ ('\u{1ce2}', '\u{1ce8}'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('\u{1cf8}', '\u{1cf9}'),
+ ('\u{1dc0}', '\u{1df9}'),
+ ('\u{1dfb}', '\u{1dff}'),
+ ('\u{20d0}', '\u{20dc}'),
+ ('\u{20e1}', '\u{20e1}'),
+ ('\u{20e5}', '\u{20f0}'),
+ ('\u{2cef}', '\u{2cf1}'),
+ ('\u{2d7f}', '\u{2d7f}'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('\u{302a}', '\u{302d}'),
+ ('\u{3099}', '\u{309a}'),
+ ('\u{a66f}', '\u{a66f}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('\u{a69e}', '\u{a69f}'),
+ ('\u{a6f0}', '\u{a6f1}'),
+ ('\u{a802}', '\u{a802}'),
+ ('\u{a806}', '\u{a806}'),
+ ('\u{a80b}', '\u{a80b}'),
+ ('\u{a825}', '\u{a826}'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('\u{a8c4}', '\u{a8c5}'),
+ ('\u{a8e0}', '\u{a8f1}'),
+ ('\u{a8ff}', '\u{a8ff}'),
+ ('\u{a926}', '\u{a92d}'),
+ ('\u{a947}', '\u{a951}'),
+ ('\u{a980}', '\u{a982}'),
+ ('\u{a9b3}', '\u{a9b3}'),
+ ('\u{a9b6}', '\u{a9b9}'),
+ ('\u{a9bc}', '\u{a9bd}'),
+ ('\u{a9e5}', '\u{a9e5}'),
+ ('\u{aa29}', '\u{aa2e}'),
+ ('\u{aa31}', '\u{aa32}'),
+ ('\u{aa35}', '\u{aa36}'),
+ ('\u{aa43}', '\u{aa43}'),
+ ('\u{aa4c}', '\u{aa4c}'),
+ ('\u{aa7c}', '\u{aa7c}'),
+ ('\u{aab0}', '\u{aab0}'),
+ ('\u{aab2}', '\u{aab4}'),
+ ('\u{aab7}', '\u{aab8}'),
+ ('\u{aabe}', '\u{aabf}'),
+ ('\u{aac1}', '\u{aac1}'),
+ ('\u{aaec}', '\u{aaed}'),
+ ('\u{aaf6}', '\u{aaf6}'),
+ ('\u{abe5}', '\u{abe5}'),
+ ('\u{abe8}', '\u{abe8}'),
+ ('\u{abed}', '\u{abed}'),
+ ('\u{fb1e}', '\u{fb1e}'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('\u{10376}', '\u{1037a}'),
+ ('\u{10a01}', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '\u{10a0f}'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('\u{10ae5}', '\u{10ae6}'),
+ ('\u{10d24}', '\u{10d27}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10f46}', '\u{10f50}'),
+ ('\u{11001}', '\u{11001}'),
+ ('\u{11038}', '\u{11046}'),
+ ('\u{1107f}', '\u{11081}'),
+ ('\u{110b3}', '\u{110b6}'),
+ ('\u{110b9}', '\u{110ba}'),
+ ('\u{11100}', '\u{11102}'),
+ ('\u{11127}', '\u{1112b}'),
+ ('\u{1112d}', '\u{11134}'),
+ ('\u{11173}', '\u{11173}'),
+ ('\u{11180}', '\u{11181}'),
+ ('\u{111b6}', '\u{111be}'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111cf}', '\u{111cf}'),
+ ('\u{1122f}', '\u{11231}'),
+ ('\u{11234}', '\u{11234}'),
+ ('\u{11236}', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('\u{112df}', '\u{112df}'),
+ ('\u{112e3}', '\u{112ea}'),
+ ('\u{11300}', '\u{11301}'),
+ ('\u{1133b}', '\u{1133c}'),
+ ('\u{11340}', '\u{11340}'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('\u{11438}', '\u{1143f}'),
+ ('\u{11442}', '\u{11444}'),
+ ('\u{11446}', '\u{11446}'),
+ ('\u{1145e}', '\u{1145e}'),
+ ('\u{114b3}', '\u{114b8}'),
+ ('\u{114ba}', '\u{114ba}'),
+ ('\u{114bf}', '\u{114c0}'),
+ ('\u{114c2}', '\u{114c3}'),
+ ('\u{115b2}', '\u{115b5}'),
+ ('\u{115bc}', '\u{115bd}'),
+ ('\u{115bf}', '\u{115c0}'),
+ ('\u{115dc}', '\u{115dd}'),
+ ('\u{11633}', '\u{1163a}'),
+ ('\u{1163d}', '\u{1163d}'),
+ ('\u{1163f}', '\u{11640}'),
+ ('\u{116ab}', '\u{116ab}'),
+ ('\u{116ad}', '\u{116ad}'),
+ ('\u{116b0}', '\u{116b5}'),
+ ('\u{116b7}', '\u{116b7}'),
+ ('\u{1171d}', '\u{1171f}'),
+ ('\u{11722}', '\u{11725}'),
+ ('\u{11727}', '\u{1172b}'),
+ ('\u{1182f}', '\u{11837}'),
+ ('\u{11839}', '\u{1183a}'),
+ ('\u{1193b}', '\u{1193c}'),
+ ('\u{1193e}', '\u{1193e}'),
+ ('\u{11943}', '\u{11943}'),
+ ('\u{119d4}', '\u{119d7}'),
+ ('\u{119da}', '\u{119db}'),
+ ('\u{119e0}', '\u{119e0}'),
+ ('\u{11a01}', '\u{11a0a}'),
+ ('\u{11a33}', '\u{11a38}'),
+ ('\u{11a3b}', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('\u{11a51}', '\u{11a56}'),
+ ('\u{11a59}', '\u{11a5b}'),
+ ('\u{11a8a}', '\u{11a96}'),
+ ('\u{11a98}', '\u{11a99}'),
+ ('\u{11c30}', '\u{11c36}'),
+ ('\u{11c38}', '\u{11c3d}'),
+ ('\u{11c3f}', '\u{11c3f}'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('\u{11caa}', '\u{11cb0}'),
+ ('\u{11cb2}', '\u{11cb3}'),
+ ('\u{11cb5}', '\u{11cb6}'),
+ ('\u{11d31}', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d45}'),
+ ('\u{11d47}', '\u{11d47}'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('\u{11d95}', '\u{11d95}'),
+ ('\u{11d97}', '\u{11d97}'),
+ ('\u{11ef3}', '\u{11ef4}'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('\u{16b30}', '\u{16b36}'),
+ ('\u{16f4f}', '\u{16f4f}'),
+ ('\u{16f8f}', '\u{16f92}'),
+ ('\u{16fe4}', '\u{16fe4}'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1d167}', '\u{1d169}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('\u{1e130}', '\u{1e136}'),
+ ('\u{1e2ec}', '\u{1e2ef}'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('\u{1e944}', '\u{1e94a}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const NUMBER: &'static [(char, char)] = &[
+ ('0', '9'),
+ ('²', 'Âŗ'),
+ ('š', 'š'),
+ ('Âŧ', 'ž'),
+ ('Ų ', 'ŲŠ'),
+ ('Û°', 'Ûš'),
+ ('߀', '߉'),
+ ('āĨĻ', 'āĨ¯'),
+ ('ā§Ļ', 'ā§¯'),
+ ('ā§´', 'ā§š'),
+ ('āŠĻ', 'āŠ¯'),
+ ('āĢĻ', 'āĢ¯'),
+ ('ā­Ļ', 'ā­¯'),
+ ('ā­˛', 'ā­ˇ'),
+ ('ā¯Ļ', 'ā¯˛'),
+ ('āąĻ', 'āą¯'),
+ ('āą¸', 'āąž'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āĩ˜', 'āĩž'),
+ ('āĩĻ', 'āĩ¸'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āš', 'āš™'),
+ ('āģ', 'āģ™'),
+ ('āŧ ', 'āŧŗ'),
+ ('၀', '၉'),
+ ('႐', '႙'),
+ ('ፊ', 'áŧ'),
+ ('ᛮ', 'ᛰ'),
+ ('០', '៩'),
+ ('៰', '៹'),
+ ('᠐', '᠙'),
+ ('áĨ†', 'áĨ'),
+ ('᧐', '᧚'),
+ ('áĒ€', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('᭐', '᭙'),
+ ('Ꮀ', '᎚'),
+ ('᱀', '᱉'),
+ ('᱐', '᱙'),
+ ('⁰', '⁰'),
+ ('⁴', '⁚'),
+ ('₀', '₉'),
+ ('⅐', 'ↂ'),
+ ('ↅ', '↉'),
+ ('①', '⒛'),
+ ('â“Ē', 'â“ŋ'),
+ ('âļ', '➓'),
+ ('âŗŊ', 'âŗŊ'),
+ ('〇', '〇'),
+ ('ã€Ą', '《'),
+ ('〸', 'ã€ē'),
+ ('㆒', '㆕'),
+ ('㈠', '㈊'),
+ ('㉈', '㉏'),
+ ('㉑', '㉟'),
+ ('㊀', '㊉'),
+ ('ãŠą', 'ãŠŋ'),
+ ('꘠', 'ꘊ'),
+ ('ę›Ļ', 'ę›¯'),
+ ('ę °', 'ę ĩ'),
+ ('ęŖ', 'ęŖ™'),
+ ('꤀', '꤉'),
+ ('꧐', '꧙'),
+ ('꧰', '꧚'),
+ ('꩐', '꩙'),
+ ('ę¯°', 'ę¯š'),
+ ('īŧ', 'īŧ™'),
+ ('𐄇', 'đ„ŗ'),
+ ('𐅀', '𐅸'),
+ ('𐆊', '𐆋'),
+ ('𐋡', 'đ‹ģ'),
+ ('𐌠', 'đŒŖ'),
+ ('𐍁', '𐍁'),
+ ('𐍊', '𐍊'),
+ ('𐏑', '𐏕'),
+ ('𐒠', '𐒩'),
+ ('𐡘', '𐡟'),
+ ('𐥚', 'đĄŋ'),
+ ('đĸ§', 'đĸ¯'),
+ ('đŖģ', 'đŖŋ'),
+ ('𐤖', '𐤛'),
+ ('đĻŧ', 'đĻŊ'),
+ ('𐧀', '𐧏'),
+ ('𐧒', 'đ§ŋ'),
+ ('𐩀', '𐩈'),
+ ('đŠŊ', '𐊞'),
+ ('đĒ', 'đĒŸ'),
+ ('đĢĢ', 'đĢ¯'),
+ ('𐭘', '𐭟'),
+ ('𐭸', 'đ­ŋ'),
+ ('𐎊', 'đŽ¯'),
+ ('đŗē', 'đŗŋ'),
+ ('𐴰', '𐴚'),
+ ('𐚠', '𐚞'),
+ ('đŧ', 'đŧĻ'),
+ ('đŊ‘', 'đŊ”'),
+ ('\u{10fc5}', '\u{10fcb}'),
+ ('𑁒', 'đ‘¯'),
+ ('𑃰', '𑃹'),
+ ('đ‘„ļ', 'đ‘„ŋ'),
+ ('𑇐', '𑇙'),
+ ('𑇡', '𑇴'),
+ ('𑋰', '𑋹'),
+ ('𑑐', '𑑙'),
+ ('𑓐', '𑓙'),
+ ('𑙐', '𑙙'),
+ ('𑛀', '𑛉'),
+ ('𑜰', 'đ‘œģ'),
+ ('đ‘Ŗ ', 'đ‘Ŗ˛'),
+ ('\u{11950}', '\u{11959}'),
+ ('𑱐', 'đ‘ąŦ'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('đ‘ŋ€', 'đ‘ŋ”'),
+ ('𒐀', '𒑮'),
+ ('𖩠', '𖩩'),
+ ('𖭐', '𖭙'),
+ ('𖭛', '𖭡'),
+ ('đ–ē€', 'đ–ē–'),
+ ('𝋠', 'đ‹ŗ'),
+ ('𝍠', '𝍸'),
+ ('𝟎', 'đŸŋ'),
+ ('𞅀', '𞅉'),
+ ('𞋰', '𞋹'),
+ ('đžŖ‡', 'đžŖ'),
+ ('đžĨ', 'đžĨ™'),
+ ('𞱱', 'đž˛Ģ'),
+ ('𞲭', 'đž˛¯'),
+ ('𞲱', '𞲴'),
+ ('𞴁', '𞴭'),
+ ('đž´¯', 'đž´Ŋ'),
+ ('🄀', '🄌'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+];
+
+pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[
+ ('(', '('),
+ ('[', '['),
+ ('{', '{'),
+ ('āŧē', 'āŧē'),
+ ('āŧŧ', 'āŧŧ'),
+ ('᚛', '᚛'),
+ ('‚', '‚'),
+ ('„', '„'),
+ ('⁅', '⁅'),
+ ('âŊ', 'âŊ'),
+ ('₍', '₍'),
+ ('⌈', '⌈'),
+ ('⌊', '⌊'),
+ ('〈', '〈'),
+ ('❨', '❨'),
+ ('âĒ', 'âĒ'),
+ ('âŦ', 'âŦ'),
+ ('❎', '❎'),
+ ('❰', '❰'),
+ ('❲', '❲'),
+ ('❴', '❴'),
+ ('⟅', '⟅'),
+ ('âŸĻ', 'âŸĻ'),
+ ('⟨', '⟨'),
+ ('âŸĒ', 'âŸĒ'),
+ ('âŸŦ', 'âŸŦ'),
+ ('⟮', '⟮'),
+ ('âĻƒ', 'âĻƒ'),
+ ('âĻ…', 'âĻ…'),
+ ('âĻ‡', 'âĻ‡'),
+ ('âĻ‰', 'âĻ‰'),
+ ('âĻ‹', 'âĻ‹'),
+ ('âĻ', 'âĻ'),
+ ('âĻ', 'âĻ'),
+ ('âĻ‘', 'âĻ‘'),
+ ('âĻ“', 'âĻ“'),
+ ('âĻ•', 'âĻ•'),
+ ('âĻ—', 'âĻ—'),
+ ('⧘', '⧘'),
+ ('⧚', '⧚'),
+ ('â§ŧ', 'â§ŧ'),
+ ('â¸ĸ', 'â¸ĸ'),
+ ('⸤', '⸤'),
+ ('â¸Ļ', 'â¸Ļ'),
+ ('⸨', '⸨'),
+ ('⹂', '⹂'),
+ ('〈', '〈'),
+ ('《', '《'),
+ ('「', '「'),
+ ('『', '『'),
+ ('【', '【'),
+ ('〔', '〔'),
+ ('〖', '〖'),
+ ('〘', '〘'),
+ ('〚', '〚'),
+ ('〝', '〝'),
+ ('ī´ŋ', 'ī´ŋ'),
+ ('ī¸—', 'ī¸—'),
+ ('ī¸ĩ', 'ī¸ĩ'),
+ ('ī¸ˇ', 'ī¸ˇ'),
+ ('ī¸š', 'ī¸š'),
+ ('ī¸ģ', 'ī¸ģ'),
+ ('ī¸Ŋ', 'ī¸Ŋ'),
+ ('ī¸ŋ', 'ī¸ŋ'),
+ ('īš', 'īš'),
+ ('īšƒ', 'īšƒ'),
+ ('īš‡', 'īš‡'),
+ ('īš™', 'īš™'),
+ ('īš›', 'īš›'),
+ ('īš', 'īš'),
+ ('īŧˆ', 'īŧˆ'),
+ ('īŧģ', 'īŧģ'),
+ ('īŊ›', 'īŊ›'),
+ ('īŊŸ', 'īŊŸ'),
+ ('īŊĸ', 'īŊĸ'),
+];
+
+pub const OTHER: &'static [(char, char)] = &[
+ ('\u{0}', '\u{1f}'),
+ ('\u{7f}', '\u{9f}'),
+ ('\u{ad}', '\u{ad}'),
+ ('\u{378}', '\u{379}'),
+ ('\u{380}', '\u{383}'),
+ ('\u{38b}', '\u{38b}'),
+ ('\u{38d}', '\u{38d}'),
+ ('\u{3a2}', '\u{3a2}'),
+ ('\u{530}', '\u{530}'),
+ ('\u{557}', '\u{558}'),
+ ('\u{58b}', '\u{58c}'),
+ ('\u{590}', '\u{590}'),
+ ('\u{5c8}', '\u{5cf}'),
+ ('\u{5eb}', '\u{5ee}'),
+ ('\u{5f5}', '\u{605}'),
+ ('\u{61c}', '\u{61d}'),
+ ('\u{6dd}', '\u{6dd}'),
+ ('\u{70e}', '\u{70f}'),
+ ('\u{74b}', '\u{74c}'),
+ ('\u{7b2}', '\u{7bf}'),
+ ('\u{7fb}', '\u{7fc}'),
+ ('\u{82e}', '\u{82f}'),
+ ('\u{83f}', '\u{83f}'),
+ ('\u{85c}', '\u{85d}'),
+ ('\u{85f}', '\u{85f}'),
+ ('\u{86b}', '\u{89f}'),
+ ('\u{8b5}', '\u{8b5}'),
+ ('\u{8c8}', '\u{8d2}'),
+ ('\u{8e2}', '\u{8e2}'),
+ ('\u{984}', '\u{984}'),
+ ('\u{98d}', '\u{98e}'),
+ ('\u{991}', '\u{992}'),
+ ('\u{9a9}', '\u{9a9}'),
+ ('\u{9b1}', '\u{9b1}'),
+ ('\u{9b3}', '\u{9b5}'),
+ ('\u{9ba}', '\u{9bb}'),
+ ('\u{9c5}', '\u{9c6}'),
+ ('\u{9c9}', '\u{9ca}'),
+ ('\u{9cf}', '\u{9d6}'),
+ ('\u{9d8}', '\u{9db}'),
+ ('\u{9de}', '\u{9de}'),
+ ('\u{9e4}', '\u{9e5}'),
+ ('\u{9ff}', '\u{a00}'),
+ ('\u{a04}', '\u{a04}'),
+ ('\u{a0b}', '\u{a0e}'),
+ ('\u{a11}', '\u{a12}'),
+ ('\u{a29}', '\u{a29}'),
+ ('\u{a31}', '\u{a31}'),
+ ('\u{a34}', '\u{a34}'),
+ ('\u{a37}', '\u{a37}'),
+ ('\u{a3a}', '\u{a3b}'),
+ ('\u{a3d}', '\u{a3d}'),
+ ('\u{a43}', '\u{a46}'),
+ ('\u{a49}', '\u{a4a}'),
+ ('\u{a4e}', '\u{a50}'),
+ ('\u{a52}', '\u{a58}'),
+ ('\u{a5d}', '\u{a5d}'),
+ ('\u{a5f}', '\u{a65}'),
+ ('\u{a77}', '\u{a80}'),
+ ('\u{a84}', '\u{a84}'),
+ ('\u{a8e}', '\u{a8e}'),
+ ('\u{a92}', '\u{a92}'),
+ ('\u{aa9}', '\u{aa9}'),
+ ('\u{ab1}', '\u{ab1}'),
+ ('\u{ab4}', '\u{ab4}'),
+ ('\u{aba}', '\u{abb}'),
+ ('\u{ac6}', '\u{ac6}'),
+ ('\u{aca}', '\u{aca}'),
+ ('\u{ace}', '\u{acf}'),
+ ('\u{ad1}', '\u{adf}'),
+ ('\u{ae4}', '\u{ae5}'),
+ ('\u{af2}', '\u{af8}'),
+ ('\u{b00}', '\u{b00}'),
+ ('\u{b04}', '\u{b04}'),
+ ('\u{b0d}', '\u{b0e}'),
+ ('\u{b11}', '\u{b12}'),
+ ('\u{b29}', '\u{b29}'),
+ ('\u{b31}', '\u{b31}'),
+ ('\u{b34}', '\u{b34}'),
+ ('\u{b3a}', '\u{b3b}'),
+ ('\u{b45}', '\u{b46}'),
+ ('\u{b49}', '\u{b4a}'),
+ ('\u{b4e}', '\u{b54}'),
+ ('\u{b58}', '\u{b5b}'),
+ ('\u{b5e}', '\u{b5e}'),
+ ('\u{b64}', '\u{b65}'),
+ ('\u{b78}', '\u{b81}'),
+ ('\u{b84}', '\u{b84}'),
+ ('\u{b8b}', '\u{b8d}'),
+ ('\u{b91}', '\u{b91}'),
+ ('\u{b96}', '\u{b98}'),
+ ('\u{b9b}', '\u{b9b}'),
+ ('\u{b9d}', '\u{b9d}'),
+ ('\u{ba0}', '\u{ba2}'),
+ ('\u{ba5}', '\u{ba7}'),
+ ('\u{bab}', '\u{bad}'),
+ ('\u{bba}', '\u{bbd}'),
+ ('\u{bc3}', '\u{bc5}'),
+ ('\u{bc9}', '\u{bc9}'),
+ ('\u{bce}', '\u{bcf}'),
+ ('\u{bd1}', '\u{bd6}'),
+ ('\u{bd8}', '\u{be5}'),
+ ('\u{bfb}', '\u{bff}'),
+ ('\u{c0d}', '\u{c0d}'),
+ ('\u{c11}', '\u{c11}'),
+ ('\u{c29}', '\u{c29}'),
+ ('\u{c3a}', '\u{c3c}'),
+ ('\u{c45}', '\u{c45}'),
+ ('\u{c49}', '\u{c49}'),
+ ('\u{c4e}', '\u{c54}'),
+ ('\u{c57}', '\u{c57}'),
+ ('\u{c5b}', '\u{c5f}'),
+ ('\u{c64}', '\u{c65}'),
+ ('\u{c70}', '\u{c76}'),
+ ('\u{c8d}', '\u{c8d}'),
+ ('\u{c91}', '\u{c91}'),
+ ('\u{ca9}', '\u{ca9}'),
+ ('\u{cb4}', '\u{cb4}'),
+ ('\u{cba}', '\u{cbb}'),
+ ('\u{cc5}', '\u{cc5}'),
+ ('\u{cc9}', '\u{cc9}'),
+ ('\u{cce}', '\u{cd4}'),
+ ('\u{cd7}', '\u{cdd}'),
+ ('\u{cdf}', '\u{cdf}'),
+ ('\u{ce4}', '\u{ce5}'),
+ ('\u{cf0}', '\u{cf0}'),
+ ('\u{cf3}', '\u{cff}'),
+ ('\u{d0d}', '\u{d0d}'),
+ ('\u{d11}', '\u{d11}'),
+ ('\u{d45}', '\u{d45}'),
+ ('\u{d49}', '\u{d49}'),
+ ('\u{d50}', '\u{d53}'),
+ ('\u{d64}', '\u{d65}'),
+ ('\u{d80}', '\u{d80}'),
+ ('\u{d84}', '\u{d84}'),
+ ('\u{d97}', '\u{d99}'),
+ ('\u{db2}', '\u{db2}'),
+ ('\u{dbc}', '\u{dbc}'),
+ ('\u{dbe}', '\u{dbf}'),
+ ('\u{dc7}', '\u{dc9}'),
+ ('\u{dcb}', '\u{dce}'),
+ ('\u{dd5}', '\u{dd5}'),
+ ('\u{dd7}', '\u{dd7}'),
+ ('\u{de0}', '\u{de5}'),
+ ('\u{df0}', '\u{df1}'),
+ ('\u{df5}', '\u{e00}'),
+ ('\u{e3b}', '\u{e3e}'),
+ ('\u{e5c}', '\u{e80}'),
+ ('\u{e83}', '\u{e83}'),
+ ('\u{e85}', '\u{e85}'),
+ ('\u{e8b}', '\u{e8b}'),
+ ('\u{ea4}', '\u{ea4}'),
+ ('\u{ea6}', '\u{ea6}'),
+ ('\u{ebe}', '\u{ebf}'),
+ ('\u{ec5}', '\u{ec5}'),
+ ('\u{ec7}', '\u{ec7}'),
+ ('\u{ece}', '\u{ecf}'),
+ ('\u{eda}', '\u{edb}'),
+ ('\u{ee0}', '\u{eff}'),
+ ('\u{f48}', '\u{f48}'),
+ ('\u{f6d}', '\u{f70}'),
+ ('\u{f98}', '\u{f98}'),
+ ('\u{fbd}', '\u{fbd}'),
+ ('\u{fcd}', '\u{fcd}'),
+ ('\u{fdb}', '\u{fff}'),
+ ('\u{10c6}', '\u{10c6}'),
+ ('\u{10c8}', '\u{10cc}'),
+ ('\u{10ce}', '\u{10cf}'),
+ ('\u{1249}', '\u{1249}'),
+ ('\u{124e}', '\u{124f}'),
+ ('\u{1257}', '\u{1257}'),
+ ('\u{1259}', '\u{1259}'),
+ ('\u{125e}', '\u{125f}'),
+ ('\u{1289}', '\u{1289}'),
+ ('\u{128e}', '\u{128f}'),
+ ('\u{12b1}', '\u{12b1}'),
+ ('\u{12b6}', '\u{12b7}'),
+ ('\u{12bf}', '\u{12bf}'),
+ ('\u{12c1}', '\u{12c1}'),
+ ('\u{12c6}', '\u{12c7}'),
+ ('\u{12d7}', '\u{12d7}'),
+ ('\u{1311}', '\u{1311}'),
+ ('\u{1316}', '\u{1317}'),
+ ('\u{135b}', '\u{135c}'),
+ ('\u{137d}', '\u{137f}'),
+ ('\u{139a}', '\u{139f}'),
+ ('\u{13f6}', '\u{13f7}'),
+ ('\u{13fe}', '\u{13ff}'),
+ ('\u{169d}', '\u{169f}'),
+ ('\u{16f9}', '\u{16ff}'),
+ ('\u{170d}', '\u{170d}'),
+ ('\u{1715}', '\u{171f}'),
+ ('\u{1737}', '\u{173f}'),
+ ('\u{1754}', '\u{175f}'),
+ ('\u{176d}', '\u{176d}'),
+ ('\u{1771}', '\u{1771}'),
+ ('\u{1774}', '\u{177f}'),
+ ('\u{17de}', '\u{17df}'),
+ ('\u{17ea}', '\u{17ef}'),
+ ('\u{17fa}', '\u{17ff}'),
+ ('\u{180e}', '\u{180f}'),
+ ('\u{181a}', '\u{181f}'),
+ ('\u{1879}', '\u{187f}'),
+ ('\u{18ab}', '\u{18af}'),
+ ('\u{18f6}', '\u{18ff}'),
+ ('\u{191f}', '\u{191f}'),
+ ('\u{192c}', '\u{192f}'),
+ ('\u{193c}', '\u{193f}'),
+ ('\u{1941}', '\u{1943}'),
+ ('\u{196e}', '\u{196f}'),
+ ('\u{1975}', '\u{197f}'),
+ ('\u{19ac}', '\u{19af}'),
+ ('\u{19ca}', '\u{19cf}'),
+ ('\u{19db}', '\u{19dd}'),
+ ('\u{1a1c}', '\u{1a1d}'),
+ ('\u{1a5f}', '\u{1a5f}'),
+ ('\u{1a7d}', '\u{1a7e}'),
+ ('\u{1a8a}', '\u{1a8f}'),
+ ('\u{1a9a}', '\u{1a9f}'),
+ ('\u{1aae}', '\u{1aaf}'),
+ ('\u{1ac1}', '\u{1aff}'),
+ ('\u{1b4c}', '\u{1b4f}'),
+ ('\u{1b7d}', '\u{1b7f}'),
+ ('\u{1bf4}', '\u{1bfb}'),
+ ('\u{1c38}', '\u{1c3a}'),
+ ('\u{1c4a}', '\u{1c4c}'),
+ ('\u{1c89}', '\u{1c8f}'),
+ ('\u{1cbb}', '\u{1cbc}'),
+ ('\u{1cc8}', '\u{1ccf}'),
+ ('\u{1cfb}', '\u{1cff}'),
+ ('\u{1dfa}', '\u{1dfa}'),
+ ('\u{1f16}', '\u{1f17}'),
+ ('\u{1f1e}', '\u{1f1f}'),
+ ('\u{1f46}', '\u{1f47}'),
+ ('\u{1f4e}', '\u{1f4f}'),
+ ('\u{1f58}', '\u{1f58}'),
+ ('\u{1f5a}', '\u{1f5a}'),
+ ('\u{1f5c}', '\u{1f5c}'),
+ ('\u{1f5e}', '\u{1f5e}'),
+ ('\u{1f7e}', '\u{1f7f}'),
+ ('\u{1fb5}', '\u{1fb5}'),
+ ('\u{1fc5}', '\u{1fc5}'),
+ ('\u{1fd4}', '\u{1fd5}'),
+ ('\u{1fdc}', '\u{1fdc}'),
+ ('\u{1ff0}', '\u{1ff1}'),
+ ('\u{1ff5}', '\u{1ff5}'),
+ ('\u{1fff}', '\u{1fff}'),
+ ('\u{200b}', '\u{200f}'),
+ ('\u{202a}', '\u{202e}'),
+ ('\u{2060}', '\u{206f}'),
+ ('\u{2072}', '\u{2073}'),
+ ('\u{208f}', '\u{208f}'),
+ ('\u{209d}', '\u{209f}'),
+ ('\u{20c0}', '\u{20cf}'),
+ ('\u{20f1}', '\u{20ff}'),
+ ('\u{218c}', '\u{218f}'),
+ ('\u{2427}', '\u{243f}'),
+ ('\u{244b}', '\u{245f}'),
+ ('\u{2b74}', '\u{2b75}'),
+ ('\u{2b96}', '\u{2b96}'),
+ ('\u{2c2f}', '\u{2c2f}'),
+ ('\u{2c5f}', '\u{2c5f}'),
+ ('\u{2cf4}', '\u{2cf8}'),
+ ('\u{2d26}', '\u{2d26}'),
+ ('\u{2d28}', '\u{2d2c}'),
+ ('\u{2d2e}', '\u{2d2f}'),
+ ('\u{2d68}', '\u{2d6e}'),
+ ('\u{2d71}', '\u{2d7e}'),
+ ('\u{2d97}', '\u{2d9f}'),
+ ('\u{2da7}', '\u{2da7}'),
+ ('\u{2daf}', '\u{2daf}'),
+ ('\u{2db7}', '\u{2db7}'),
+ ('\u{2dbf}', '\u{2dbf}'),
+ ('\u{2dc7}', '\u{2dc7}'),
+ ('\u{2dcf}', '\u{2dcf}'),
+ ('\u{2dd7}', '\u{2dd7}'),
+ ('\u{2ddf}', '\u{2ddf}'),
+ ('\u{2e53}', '\u{2e7f}'),
+ ('\u{2e9a}', '\u{2e9a}'),
+ ('\u{2ef4}', '\u{2eff}'),
+ ('\u{2fd6}', '\u{2fef}'),
+ ('\u{2ffc}', '\u{2fff}'),
+ ('\u{3040}', '\u{3040}'),
+ ('\u{3097}', '\u{3098}'),
+ ('\u{3100}', '\u{3104}'),
+ ('\u{3130}', '\u{3130}'),
+ ('\u{318f}', '\u{318f}'),
+ ('\u{31e4}', '\u{31ef}'),
+ ('\u{321f}', '\u{321f}'),
+ ('\u{9ffd}', '\u{9fff}'),
+ ('\u{a48d}', '\u{a48f}'),
+ ('\u{a4c7}', '\u{a4cf}'),
+ ('\u{a62c}', '\u{a63f}'),
+ ('\u{a6f8}', '\u{a6ff}'),
+ ('\u{a7c0}', '\u{a7c1}'),
+ ('\u{a7cb}', '\u{a7f4}'),
+ ('\u{a82d}', '\u{a82f}'),
+ ('\u{a83a}', '\u{a83f}'),
+ ('\u{a878}', '\u{a87f}'),
+ ('\u{a8c6}', '\u{a8cd}'),
+ ('\u{a8da}', '\u{a8df}'),
+ ('\u{a954}', '\u{a95e}'),
+ ('\u{a97d}', '\u{a97f}'),
+ ('\u{a9ce}', '\u{a9ce}'),
+ ('\u{a9da}', '\u{a9dd}'),
+ ('\u{a9ff}', '\u{a9ff}'),
+ ('\u{aa37}', '\u{aa3f}'),
+ ('\u{aa4e}', '\u{aa4f}'),
+ ('\u{aa5a}', '\u{aa5b}'),
+ ('\u{aac3}', '\u{aada}'),
+ ('\u{aaf7}', '\u{ab00}'),
+ ('\u{ab07}', '\u{ab08}'),
+ ('\u{ab0f}', '\u{ab10}'),
+ ('\u{ab17}', '\u{ab1f}'),
+ ('\u{ab27}', '\u{ab27}'),
+ ('\u{ab2f}', '\u{ab2f}'),
+ ('\u{ab6c}', '\u{ab6f}'),
+ ('\u{abee}', '\u{abef}'),
+ ('\u{abfa}', '\u{abff}'),
+ ('\u{d7a4}', '\u{d7af}'),
+ ('\u{d7c7}', '\u{d7ca}'),
+ ('\u{d7fc}', '\u{f8ff}'),
+ ('\u{fa6e}', '\u{fa6f}'),
+ ('\u{fada}', '\u{faff}'),
+ ('\u{fb07}', '\u{fb12}'),
+ ('\u{fb18}', '\u{fb1c}'),
+ ('\u{fb37}', '\u{fb37}'),
+ ('\u{fb3d}', '\u{fb3d}'),
+ ('\u{fb3f}', '\u{fb3f}'),
+ ('\u{fb42}', '\u{fb42}'),
+ ('\u{fb45}', '\u{fb45}'),
+ ('\u{fbc2}', '\u{fbd2}'),
+ ('\u{fd40}', '\u{fd4f}'),
+ ('\u{fd90}', '\u{fd91}'),
+ ('\u{fdc8}', '\u{fdef}'),
+ ('\u{fdfe}', '\u{fdff}'),
+ ('\u{fe1a}', '\u{fe1f}'),
+ ('\u{fe53}', '\u{fe53}'),
+ ('\u{fe67}', '\u{fe67}'),
+ ('\u{fe6c}', '\u{fe6f}'),
+ ('\u{fe75}', '\u{fe75}'),
+ ('\u{fefd}', '\u{ff00}'),
+ ('\u{ffbf}', '\u{ffc1}'),
+ ('\u{ffc8}', '\u{ffc9}'),
+ ('\u{ffd0}', '\u{ffd1}'),
+ ('\u{ffd8}', '\u{ffd9}'),
+ ('\u{ffdd}', '\u{ffdf}'),
+ ('\u{ffe7}', '\u{ffe7}'),
+ ('\u{ffef}', '\u{fffb}'),
+ ('\u{fffe}', '\u{ffff}'),
+ ('\u{1000c}', '\u{1000c}'),
+ ('\u{10027}', '\u{10027}'),
+ ('\u{1003b}', '\u{1003b}'),
+ ('\u{1003e}', '\u{1003e}'),
+ ('\u{1004e}', '\u{1004f}'),
+ ('\u{1005e}', '\u{1007f}'),
+ ('\u{100fb}', '\u{100ff}'),
+ ('\u{10103}', '\u{10106}'),
+ ('\u{10134}', '\u{10136}'),
+ ('\u{1018f}', '\u{1018f}'),
+ ('\u{1019d}', '\u{1019f}'),
+ ('\u{101a1}', '\u{101cf}'),
+ ('\u{101fe}', '\u{1027f}'),
+ ('\u{1029d}', '\u{1029f}'),
+ ('\u{102d1}', '\u{102df}'),
+ ('\u{102fc}', '\u{102ff}'),
+ ('\u{10324}', '\u{1032c}'),
+ ('\u{1034b}', '\u{1034f}'),
+ ('\u{1037b}', '\u{1037f}'),
+ ('\u{1039e}', '\u{1039e}'),
+ ('\u{103c4}', '\u{103c7}'),
+ ('\u{103d6}', '\u{103ff}'),
+ ('\u{1049e}', '\u{1049f}'),
+ ('\u{104aa}', '\u{104af}'),
+ ('\u{104d4}', '\u{104d7}'),
+ ('\u{104fc}', '\u{104ff}'),
+ ('\u{10528}', '\u{1052f}'),
+ ('\u{10564}', '\u{1056e}'),
+ ('\u{10570}', '\u{105ff}'),
+ ('\u{10737}', '\u{1073f}'),
+ ('\u{10756}', '\u{1075f}'),
+ ('\u{10768}', '\u{107ff}'),
+ ('\u{10806}', '\u{10807}'),
+ ('\u{10809}', '\u{10809}'),
+ ('\u{10836}', '\u{10836}'),
+ ('\u{10839}', '\u{1083b}'),
+ ('\u{1083d}', '\u{1083e}'),
+ ('\u{10856}', '\u{10856}'),
+ ('\u{1089f}', '\u{108a6}'),
+ ('\u{108b0}', '\u{108df}'),
+ ('\u{108f3}', '\u{108f3}'),
+ ('\u{108f6}', '\u{108fa}'),
+ ('\u{1091c}', '\u{1091e}'),
+ ('\u{1093a}', '\u{1093e}'),
+ ('\u{10940}', '\u{1097f}'),
+ ('\u{109b8}', '\u{109bb}'),
+ ('\u{109d0}', '\u{109d1}'),
+ ('\u{10a04}', '\u{10a04}'),
+ ('\u{10a07}', '\u{10a0b}'),
+ ('\u{10a14}', '\u{10a14}'),
+ ('\u{10a18}', '\u{10a18}'),
+ ('\u{10a36}', '\u{10a37}'),
+ ('\u{10a3b}', '\u{10a3e}'),
+ ('\u{10a49}', '\u{10a4f}'),
+ ('\u{10a59}', '\u{10a5f}'),
+ ('\u{10aa0}', '\u{10abf}'),
+ ('\u{10ae7}', '\u{10aea}'),
+ ('\u{10af7}', '\u{10aff}'),
+ ('\u{10b36}', '\u{10b38}'),
+ ('\u{10b56}', '\u{10b57}'),
+ ('\u{10b73}', '\u{10b77}'),
+ ('\u{10b92}', '\u{10b98}'),
+ ('\u{10b9d}', '\u{10ba8}'),
+ ('\u{10bb0}', '\u{10bff}'),
+ ('\u{10c49}', '\u{10c7f}'),
+ ('\u{10cb3}', '\u{10cbf}'),
+ ('\u{10cf3}', '\u{10cf9}'),
+ ('\u{10d28}', '\u{10d2f}'),
+ ('\u{10d3a}', '\u{10e5f}'),
+ ('\u{10e7f}', '\u{10e7f}'),
+ ('\u{10eaa}', '\u{10eaa}'),
+ ('\u{10eae}', '\u{10eaf}'),
+ ('\u{10eb2}', '\u{10eff}'),
+ ('\u{10f28}', '\u{10f2f}'),
+ ('\u{10f5a}', '\u{10faf}'),
+ ('\u{10fcc}', '\u{10fdf}'),
+ ('\u{10ff7}', '\u{10fff}'),
+ ('\u{1104e}', '\u{11051}'),
+ ('\u{11070}', '\u{1107e}'),
+ ('\u{110bd}', '\u{110bd}'),
+ ('\u{110c2}', '\u{110cf}'),
+ ('\u{110e9}', '\u{110ef}'),
+ ('\u{110fa}', '\u{110ff}'),
+ ('\u{11135}', '\u{11135}'),
+ ('\u{11148}', '\u{1114f}'),
+ ('\u{11177}', '\u{1117f}'),
+ ('\u{111e0}', '\u{111e0}'),
+ ('\u{111f5}', '\u{111ff}'),
+ ('\u{11212}', '\u{11212}'),
+ ('\u{1123f}', '\u{1127f}'),
+ ('\u{11287}', '\u{11287}'),
+ ('\u{11289}', '\u{11289}'),
+ ('\u{1128e}', '\u{1128e}'),
+ ('\u{1129e}', '\u{1129e}'),
+ ('\u{112aa}', '\u{112af}'),
+ ('\u{112eb}', '\u{112ef}'),
+ ('\u{112fa}', '\u{112ff}'),
+ ('\u{11304}', '\u{11304}'),
+ ('\u{1130d}', '\u{1130e}'),
+ ('\u{11311}', '\u{11312}'),
+ ('\u{11329}', '\u{11329}'),
+ ('\u{11331}', '\u{11331}'),
+ ('\u{11334}', '\u{11334}'),
+ ('\u{1133a}', '\u{1133a}'),
+ ('\u{11345}', '\u{11346}'),
+ ('\u{11349}', '\u{1134a}'),
+ ('\u{1134e}', '\u{1134f}'),
+ ('\u{11351}', '\u{11356}'),
+ ('\u{11358}', '\u{1135c}'),
+ ('\u{11364}', '\u{11365}'),
+ ('\u{1136d}', '\u{1136f}'),
+ ('\u{11375}', '\u{113ff}'),
+ ('\u{1145c}', '\u{1145c}'),
+ ('\u{11462}', '\u{1147f}'),
+ ('\u{114c8}', '\u{114cf}'),
+ ('\u{114da}', '\u{1157f}'),
+ ('\u{115b6}', '\u{115b7}'),
+ ('\u{115de}', '\u{115ff}'),
+ ('\u{11645}', '\u{1164f}'),
+ ('\u{1165a}', '\u{1165f}'),
+ ('\u{1166d}', '\u{1167f}'),
+ ('\u{116b9}', '\u{116bf}'),
+ ('\u{116ca}', '\u{116ff}'),
+ ('\u{1171b}', '\u{1171c}'),
+ ('\u{1172c}', '\u{1172f}'),
+ ('\u{11740}', '\u{117ff}'),
+ ('\u{1183c}', '\u{1189f}'),
+ ('\u{118f3}', '\u{118fe}'),
+ ('\u{11907}', '\u{11908}'),
+ ('\u{1190a}', '\u{1190b}'),
+ ('\u{11914}', '\u{11914}'),
+ ('\u{11917}', '\u{11917}'),
+ ('\u{11936}', '\u{11936}'),
+ ('\u{11939}', '\u{1193a}'),
+ ('\u{11947}', '\u{1194f}'),
+ ('\u{1195a}', '\u{1199f}'),
+ ('\u{119a8}', '\u{119a9}'),
+ ('\u{119d8}', '\u{119d9}'),
+ ('\u{119e5}', '\u{119ff}'),
+ ('\u{11a48}', '\u{11a4f}'),
+ ('\u{11aa3}', '\u{11abf}'),
+ ('\u{11af9}', '\u{11bff}'),
+ ('\u{11c09}', '\u{11c09}'),
+ ('\u{11c37}', '\u{11c37}'),
+ ('\u{11c46}', '\u{11c4f}'),
+ ('\u{11c6d}', '\u{11c6f}'),
+ ('\u{11c90}', '\u{11c91}'),
+ ('\u{11ca8}', '\u{11ca8}'),
+ ('\u{11cb7}', '\u{11cff}'),
+ ('\u{11d07}', '\u{11d07}'),
+ ('\u{11d0a}', '\u{11d0a}'),
+ ('\u{11d37}', '\u{11d39}'),
+ ('\u{11d3b}', '\u{11d3b}'),
+ ('\u{11d3e}', '\u{11d3e}'),
+ ('\u{11d48}', '\u{11d4f}'),
+ ('\u{11d5a}', '\u{11d5f}'),
+ ('\u{11d66}', '\u{11d66}'),
+ ('\u{11d69}', '\u{11d69}'),
+ ('\u{11d8f}', '\u{11d8f}'),
+ ('\u{11d92}', '\u{11d92}'),
+ ('\u{11d99}', '\u{11d9f}'),
+ ('\u{11daa}', '\u{11edf}'),
+ ('\u{11ef9}', '\u{11faf}'),
+ ('\u{11fb1}', '\u{11fbf}'),
+ ('\u{11ff2}', '\u{11ffe}'),
+ ('\u{1239a}', '\u{123ff}'),
+ ('\u{1246f}', '\u{1246f}'),
+ ('\u{12475}', '\u{1247f}'),
+ ('\u{12544}', '\u{12fff}'),
+ ('\u{1342f}', '\u{143ff}'),
+ ('\u{14647}', '\u{167ff}'),
+ ('\u{16a39}', '\u{16a3f}'),
+ ('\u{16a5f}', '\u{16a5f}'),
+ ('\u{16a6a}', '\u{16a6d}'),
+ ('\u{16a70}', '\u{16acf}'),
+ ('\u{16aee}', '\u{16aef}'),
+ ('\u{16af6}', '\u{16aff}'),
+ ('\u{16b46}', '\u{16b4f}'),
+ ('\u{16b5a}', '\u{16b5a}'),
+ ('\u{16b62}', '\u{16b62}'),
+ ('\u{16b78}', '\u{16b7c}'),
+ ('\u{16b90}', '\u{16e3f}'),
+ ('\u{16e9b}', '\u{16eff}'),
+ ('\u{16f4b}', '\u{16f4e}'),
+ ('\u{16f88}', '\u{16f8e}'),
+ ('\u{16fa0}', '\u{16fdf}'),
+ ('\u{16fe5}', '\u{16fef}'),
+ ('\u{16ff2}', '\u{16fff}'),
+ ('\u{187f8}', '\u{187ff}'),
+ ('\u{18cd6}', '\u{18cff}'),
+ ('\u{18d09}', '\u{1afff}'),
+ ('\u{1b11f}', '\u{1b14f}'),
+ ('\u{1b153}', '\u{1b163}'),
+ ('\u{1b168}', '\u{1b16f}'),
+ ('\u{1b2fc}', '\u{1bbff}'),
+ ('\u{1bc6b}', '\u{1bc6f}'),
+ ('\u{1bc7d}', '\u{1bc7f}'),
+ ('\u{1bc89}', '\u{1bc8f}'),
+ ('\u{1bc9a}', '\u{1bc9b}'),
+ ('\u{1bca0}', '\u{1cfff}'),
+ ('\u{1d0f6}', '\u{1d0ff}'),
+ ('\u{1d127}', '\u{1d128}'),
+ ('\u{1d173}', '\u{1d17a}'),
+ ('\u{1d1e9}', '\u{1d1ff}'),
+ ('\u{1d246}', '\u{1d2df}'),
+ ('\u{1d2f4}', '\u{1d2ff}'),
+ ('\u{1d357}', '\u{1d35f}'),
+ ('\u{1d379}', '\u{1d3ff}'),
+ ('\u{1d455}', '\u{1d455}'),
+ ('\u{1d49d}', '\u{1d49d}'),
+ ('\u{1d4a0}', '\u{1d4a1}'),
+ ('\u{1d4a3}', '\u{1d4a4}'),
+ ('\u{1d4a7}', '\u{1d4a8}'),
+ ('\u{1d4ad}', '\u{1d4ad}'),
+ ('\u{1d4ba}', '\u{1d4ba}'),
+ ('\u{1d4bc}', '\u{1d4bc}'),
+ ('\u{1d4c4}', '\u{1d4c4}'),
+ ('\u{1d506}', '\u{1d506}'),
+ ('\u{1d50b}', '\u{1d50c}'),
+ ('\u{1d515}', '\u{1d515}'),
+ ('\u{1d51d}', '\u{1d51d}'),
+ ('\u{1d53a}', '\u{1d53a}'),
+ ('\u{1d53f}', '\u{1d53f}'),
+ ('\u{1d545}', '\u{1d545}'),
+ ('\u{1d547}', '\u{1d549}'),
+ ('\u{1d551}', '\u{1d551}'),
+ ('\u{1d6a6}', '\u{1d6a7}'),
+ ('\u{1d7cc}', '\u{1d7cd}'),
+ ('\u{1da8c}', '\u{1da9a}'),
+ ('\u{1daa0}', '\u{1daa0}'),
+ ('\u{1dab0}', '\u{1dfff}'),
+ ('\u{1e007}', '\u{1e007}'),
+ ('\u{1e019}', '\u{1e01a}'),
+ ('\u{1e022}', '\u{1e022}'),
+ ('\u{1e025}', '\u{1e025}'),
+ ('\u{1e02b}', '\u{1e0ff}'),
+ ('\u{1e12d}', '\u{1e12f}'),
+ ('\u{1e13e}', '\u{1e13f}'),
+ ('\u{1e14a}', '\u{1e14d}'),
+ ('\u{1e150}', '\u{1e2bf}'),
+ ('\u{1e2fa}', '\u{1e2fe}'),
+ ('\u{1e300}', '\u{1e7ff}'),
+ ('\u{1e8c5}', '\u{1e8c6}'),
+ ('\u{1e8d7}', '\u{1e8ff}'),
+ ('\u{1e94c}', '\u{1e94f}'),
+ ('\u{1e95a}', '\u{1e95d}'),
+ ('\u{1e960}', '\u{1ec70}'),
+ ('\u{1ecb5}', '\u{1ed00}'),
+ ('\u{1ed3e}', '\u{1edff}'),
+ ('\u{1ee04}', '\u{1ee04}'),
+ ('\u{1ee20}', '\u{1ee20}'),
+ ('\u{1ee23}', '\u{1ee23}'),
+ ('\u{1ee25}', '\u{1ee26}'),
+ ('\u{1ee28}', '\u{1ee28}'),
+ ('\u{1ee33}', '\u{1ee33}'),
+ ('\u{1ee38}', '\u{1ee38}'),
+ ('\u{1ee3a}', '\u{1ee3a}'),
+ ('\u{1ee3c}', '\u{1ee41}'),
+ ('\u{1ee43}', '\u{1ee46}'),
+ ('\u{1ee48}', '\u{1ee48}'),
+ ('\u{1ee4a}', '\u{1ee4a}'),
+ ('\u{1ee4c}', '\u{1ee4c}'),
+ ('\u{1ee50}', '\u{1ee50}'),
+ ('\u{1ee53}', '\u{1ee53}'),
+ ('\u{1ee55}', '\u{1ee56}'),
+ ('\u{1ee58}', '\u{1ee58}'),
+ ('\u{1ee5a}', '\u{1ee5a}'),
+ ('\u{1ee5c}', '\u{1ee5c}'),
+ ('\u{1ee5e}', '\u{1ee5e}'),
+ ('\u{1ee60}', '\u{1ee60}'),
+ ('\u{1ee63}', '\u{1ee63}'),
+ ('\u{1ee65}', '\u{1ee66}'),
+ ('\u{1ee6b}', '\u{1ee6b}'),
+ ('\u{1ee73}', '\u{1ee73}'),
+ ('\u{1ee78}', '\u{1ee78}'),
+ ('\u{1ee7d}', '\u{1ee7d}'),
+ ('\u{1ee7f}', '\u{1ee7f}'),
+ ('\u{1ee8a}', '\u{1ee8a}'),
+ ('\u{1ee9c}', '\u{1eea0}'),
+ ('\u{1eea4}', '\u{1eea4}'),
+ ('\u{1eeaa}', '\u{1eeaa}'),
+ ('\u{1eebc}', '\u{1eeef}'),
+ ('\u{1eef2}', '\u{1efff}'),
+ ('\u{1f02c}', '\u{1f02f}'),
+ ('\u{1f094}', '\u{1f09f}'),
+ ('\u{1f0af}', '\u{1f0b0}'),
+ ('\u{1f0c0}', '\u{1f0c0}'),
+ ('\u{1f0d0}', '\u{1f0d0}'),
+ ('\u{1f0f6}', '\u{1f0ff}'),
+ ('\u{1f1ae}', '\u{1f1e5}'),
+ ('\u{1f203}', '\u{1f20f}'),
+ ('\u{1f23c}', '\u{1f23f}'),
+ ('\u{1f249}', '\u{1f24f}'),
+ ('\u{1f252}', '\u{1f25f}'),
+ ('\u{1f266}', '\u{1f2ff}'),
+ ('\u{1f6d8}', '\u{1f6df}'),
+ ('\u{1f6ed}', '\u{1f6ef}'),
+ ('\u{1f6fd}', '\u{1f6ff}'),
+ ('\u{1f774}', '\u{1f77f}'),
+ ('\u{1f7d9}', '\u{1f7df}'),
+ ('\u{1f7ec}', '\u{1f7ff}'),
+ ('\u{1f80c}', '\u{1f80f}'),
+ ('\u{1f848}', '\u{1f84f}'),
+ ('\u{1f85a}', '\u{1f85f}'),
+ ('\u{1f888}', '\u{1f88f}'),
+ ('\u{1f8ae}', '\u{1f8af}'),
+ ('\u{1f8b2}', '\u{1f8ff}'),
+ ('\u{1f979}', '\u{1f979}'),
+ ('\u{1f9cc}', '\u{1f9cc}'),
+ ('\u{1fa54}', '\u{1fa5f}'),
+ ('\u{1fa6e}', '\u{1fa6f}'),
+ ('\u{1fa75}', '\u{1fa77}'),
+ ('\u{1fa7b}', '\u{1fa7f}'),
+ ('\u{1fa87}', '\u{1fa8f}'),
+ ('\u{1faa9}', '\u{1faaf}'),
+ ('\u{1fab7}', '\u{1fabf}'),
+ ('\u{1fac3}', '\u{1facf}'),
+ ('\u{1fad7}', '\u{1faff}'),
+ ('\u{1fb93}', '\u{1fb93}'),
+ ('\u{1fbcb}', '\u{1fbef}'),
+ ('\u{1fbfa}', '\u{1ffff}'),
+ ('\u{2a6de}', '\u{2a6ff}'),
+ ('\u{2b735}', '\u{2b73f}'),
+ ('\u{2b81e}', '\u{2b81f}'),
+ ('\u{2cea2}', '\u{2ceaf}'),
+ ('\u{2ebe1}', '\u{2f7ff}'),
+ ('\u{2fa1e}', '\u{2ffff}'),
+ ('\u{3134b}', '\u{e00ff}'),
+ ('\u{e01f0}', '\u{10ffff}'),
+];
+
+pub const OTHER_LETTER: &'static [(char, char)] = &[
+ ('ÂĒ', 'ÂĒ'),
+ ('Âē', 'Âē'),
+ ('Æģ', 'Æģ'),
+ ('Į€', 'Įƒ'),
+ ('ʔ', 'ʔ'),
+ ('א', '×Ē'),
+ ('ׯ', 'ײ'),
+ ('Ø ', 'Øŋ'),
+ ('Ų', 'ŲŠ'),
+ ('ŲŽ', 'Ų¯'),
+ ('Ųą', 'ۓ'),
+ ('ە', 'ە'),
+ ('ÛŽ', 'Û¯'),
+ ('Ûē', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', 'ܐ'),
+ ('ܒ', 'ܯ'),
+ ('Ũ', 'ŪĨ'),
+ ('Ūą', 'Ūą'),
+ ('ߊ', 'ßĒ'),
+ ('ā €', 'ā •'),
+ ('āĄ€', 'āĄ˜'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('ā¤„', 'ā¤š'),
+ ('ā¤Ŋ', 'ā¤Ŋ'),
+ ('āĨ', 'āĨ'),
+ ('āĨ˜', 'āĨĄ'),
+ ('āĨ˛', 'āĻ€'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('āĻŊ', 'āĻŊ'),
+ ('ā§Ž', 'ā§Ž'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', 'ā§Ą'),
+ ('ā§°', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠ˛', 'āŠ´'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('āĒŊ', 'āĒŊ'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', 'āĢĄ'),
+ ('āĢš', 'āĢš'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('āŦŊ', 'āŦŊ'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', 'ā­Ą'),
+ ('ā­ą', 'ā­ą'),
+ ('āŽƒ', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('ā¯', 'ā¯'),
+ ('ā°…', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'ā°Ŋ'),
+ ('āą˜', 'āąš'),
+ ('āą ', 'āąĄ'),
+ ('ā˛€', 'ā˛€'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('ā˛Ŋ', 'ā˛Ŋ'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', 'āŗĄ'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d04}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', 'ā´ē'),
+ ('ā´Ŋ', 'ā´Ŋ'),
+ ('āĩŽ', 'āĩŽ'),
+ ('āĩ”', 'āĩ–'),
+ ('āĩŸ', 'āĩĄ'),
+ ('āĩē', 'āĩŋ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('ā¸', 'ā¸°'),
+ ('ā¸˛', 'ā¸ŗ'),
+ ('āš€', 'āš…'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āē°'),
+ ('āē˛', 'āēŗ'),
+ ('āēŊ', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ€'),
+ ('āŊ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('āžˆ', 'āžŒ'),
+ ('က', 'á€Ē'),
+ ('á€ŋ', 'á€ŋ'),
+ ('ၐ', 'ၕ'),
+ ('ၚ', 'ၝ'),
+ ('ၥ', 'ၥ'),
+ ('áĨ', 'áĻ'),
+ ('၎', 'ၰ'),
+ ('áĩ', 'ႁ'),
+ ('ႎ', 'ႎ'),
+ ('ᄀ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('ᎀ', 'ᎏ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛱ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', 'ᜑ'),
+ ('ᜠ', 'ᜱ'),
+ ('ᝀ', 'ᝑ'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('ក', 'ážŗ'),
+ ('ៜ', 'ៜ'),
+ ('ᠠ', 'ᡂ'),
+ ('ᡄ', 'ᡸ'),
+ ('áĸ€', 'áĸ„'),
+ ('áĸ‡', 'áĸ¨'),
+ ('áĸĒ', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('áĨ', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('ᨀ', 'ᨖ'),
+ ('ᨠ', 'ᩔ'),
+ ('áŦ…', 'áŦŗ'),
+ ('ᭅ', 'ᭋ'),
+ ('ᎃ', 'Ꭰ'),
+ ('ᎎ', 'Ꭿ'),
+ ('áŽē', 'á¯Ĩ'),
+ ('ᰀ', 'á°Ŗ'),
+ ('ᱍ', 'ᱏ'),
+ ('ᱚ', 'ᱷ'),
+ ('áŗŠ', 'áŗŦ'),
+ ('áŗŽ', 'áŗŗ'),
+ ('áŗĩ', 'áŗļ'),
+ ('áŗē', 'áŗē'),
+ ('â„ĩ', 'ℸ'),
+ ('â´°', 'âĩ§'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('〆', '〆'),
+ ('ã€ŧ', 'ã€ŧ'),
+ ('ぁ', 'ゖ'),
+ ('ゟ', 'ゟ'),
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŋ', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ꀀ', 'ꀔ'),
+ ('ꀖ', 'ꒌ'),
+ ('ꓐ', 'ꓷ'),
+ ('ꔀ', 'ꘋ'),
+ ('ꘐ', 'ꘟ'),
+ ('ę˜Ē', 'ę˜Ģ'),
+ ('ꙮ', 'ꙮ'),
+ ('ꚠ', 'ę›Ĩ'),
+ ('ꞏ', 'ꞏ'),
+ ('ꟷ', 'ꟷ'),
+ ('ęŸģ', 'ꠁ'),
+ ('ꠃ', 'ꠅ'),
+ ('ꠇ', 'ꠊ'),
+ ('ꠌ', 'ę ĸ'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ‚', 'ęĸŗ'),
+ ('ęŖ˛', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', 'ęŖž'),
+ ('ꤊ', 'ę¤Ĩ'),
+ ('ꤰ', 'ęĨ†'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('ęĻ„', 'ęĻ˛'),
+ ('ꧠ', 'ꧤ'),
+ ('ꧧ', 'ę§¯'),
+ ('ę§ē', '꧞'),
+ ('ꨀ', 'ꨨ'),
+ ('ꩀ', 'ꩂ'),
+ ('ꩄ', 'ꩋ'),
+ ('ꊠ', 'ęŠ¯'),
+ ('ꊹ', 'ęŠļ'),
+ ('ęŠē', 'ęŠē'),
+ ('ꊞ', 'ęĒ¯'),
+ ('ęĒą', 'ęĒą'),
+ ('ęĒĩ', 'ęĒļ'),
+ ('ęĒš', 'ęĒŊ'),
+ ('ęĢ€', 'ęĢ€'),
+ ('ęĢ‚', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢœ'),
+ ('ęĢ ', 'ęĢĒ'),
+ ('ęĢ˛', 'ęĢ˛'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ę¯€', 'ę¯ĸ'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ', 'īŦ'),
+ ('īŦŸ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇģ'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('īŊĻ', 'īŊ¯'),
+ ('īŊą', 'īž'),
+ ('īž ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍀'),
+ ('𐍂', '𐍉'),
+ ('𐍐', 'đĩ'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐑐', '𐒝'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '𐨀'),
+ ('𐨐', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', 'đĢ¤'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐴀', 'đ´Ŗ'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', 'đŊ…'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀃', '𑀷'),
+ ('𑂃', 'đ‘‚¯'),
+ ('𑃐', '𑃨'),
+ ('𑄃', 'đ‘„Ļ'),
+ ('𑅄', '𑅄'),
+ ('\u{11147}', '\u{11147}'),
+ ('𑅐', '𑅲'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('𑆃', '𑆲'),
+ ('𑇁', '𑇄'),
+ ('𑇚', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', 'đ‘ˆĢ'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '𑋞'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('đ‘ŒŊ', 'đ‘ŒŊ'),
+ ('𑍐', '𑍐'),
+ ('𑍝', '𑍡'),
+ ('𑐀', '𑐴'),
+ ('𑑇', '𑑊'),
+ ('𑑟', '\u{11461}'),
+ ('𑒀', 'đ‘’¯'),
+ ('𑓄', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑖀', '𑖮'),
+ ('𑗘', '𑗛'),
+ ('𑘀', 'đ‘˜¯'),
+ ('𑙄', '𑙄'),
+ ('𑚀', 'đ‘šĒ'),
+ ('𑚸', '𑚸'),
+ ('𑜀', '𑜚'),
+ ('𑠀', 'đ‘ Ģ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{1192f}'),
+ ('\u{1193f}', '\u{1193f}'),
+ ('\u{11941}', '\u{11941}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '𑧐'),
+ ('𑧡', '𑧡'),
+ ('đ‘§Ŗ', 'đ‘§Ŗ'),
+ ('𑨀', '𑨀'),
+ ('𑨋', '𑨲'),
+ ('đ‘¨ē', 'đ‘¨ē'),
+ ('𑩐', '𑩐'),
+ ('𑩜', 'đ‘Ē‰'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '𑰮'),
+ ('𑱀', '𑱀'),
+ ('𑱲', '𑲏'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '𑴰'),
+ ('đ‘ĩ†', 'đ‘ĩ†'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļ‰'),
+ ('đ‘ļ˜', 'đ‘ļ˜'),
+ ('đ‘ģ ', 'đ‘ģ˛'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('đ–Ŧ€', 'đ–Ŧ¯'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('đ–Ŋ', 'đ–Ŋ'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('𞄀', 'đž„Ŧ'),
+ ('𞅎', '𞅎'),
+ ('𞋀', 'đž‹Ģ'),
+ ('𞠀', 'đžŖ„'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const OTHER_NUMBER: &'static [(char, char)] = &[
+ ('²', 'Âŗ'),
+ ('š', 'š'),
+ ('Âŧ', 'ž'),
+ ('ā§´', 'ā§š'),
+ ('ā­˛', 'ā­ˇ'),
+ ('ā¯°', 'ā¯˛'),
+ ('āą¸', 'āąž'),
+ ('āĩ˜', 'āĩž'),
+ ('āĩ°', 'āĩ¸'),
+ ('āŧĒ', 'āŧŗ'),
+ ('ፊ', 'áŧ'),
+ ('៰', '៹'),
+ ('᧚', '᧚'),
+ ('⁰', '⁰'),
+ ('⁴', '⁚'),
+ ('₀', '₉'),
+ ('⅐', '⅟'),
+ ('↉', '↉'),
+ ('①', '⒛'),
+ ('â“Ē', 'â“ŋ'),
+ ('âļ', '➓'),
+ ('âŗŊ', 'âŗŊ'),
+ ('㆒', '㆕'),
+ ('㈠', '㈊'),
+ ('㉈', '㉏'),
+ ('㉑', '㉟'),
+ ('㊀', '㊉'),
+ ('ãŠą', 'ãŠŋ'),
+ ('ę °', 'ę ĩ'),
+ ('𐄇', 'đ„ŗ'),
+ ('đ…ĩ', '𐅸'),
+ ('𐆊', '𐆋'),
+ ('𐋡', 'đ‹ģ'),
+ ('𐌠', 'đŒŖ'),
+ ('𐡘', '𐡟'),
+ ('𐥚', 'đĄŋ'),
+ ('đĸ§', 'đĸ¯'),
+ ('đŖģ', 'đŖŋ'),
+ ('𐤖', '𐤛'),
+ ('đĻŧ', 'đĻŊ'),
+ ('𐧀', '𐧏'),
+ ('𐧒', 'đ§ŋ'),
+ ('𐩀', '𐩈'),
+ ('đŠŊ', '𐊞'),
+ ('đĒ', 'đĒŸ'),
+ ('đĢĢ', 'đĢ¯'),
+ ('𐭘', '𐭟'),
+ ('𐭸', 'đ­ŋ'),
+ ('𐎊', 'đŽ¯'),
+ ('đŗē', 'đŗŋ'),
+ ('𐚠', '𐚞'),
+ ('đŧ', 'đŧĻ'),
+ ('đŊ‘', 'đŊ”'),
+ ('\u{10fc5}', '\u{10fcb}'),
+ ('𑁒', 'đ‘Ĩ'),
+ ('𑇡', '𑇴'),
+ ('đ‘œē', 'đ‘œģ'),
+ ('đ‘ŖĒ', 'đ‘Ŗ˛'),
+ ('𑱚', 'đ‘ąŦ'),
+ ('đ‘ŋ€', 'đ‘ŋ”'),
+ ('𖭛', '𖭡'),
+ ('đ–ē€', 'đ–ē–'),
+ ('𝋠', 'đ‹ŗ'),
+ ('𝍠', '𝍸'),
+ ('đžŖ‡', 'đžŖ'),
+ ('𞱱', 'đž˛Ģ'),
+ ('𞲭', 'đž˛¯'),
+ ('𞲱', '𞲴'),
+ ('𞴁', '𞴭'),
+ ('đž´¯', 'đž´Ŋ'),
+ ('🄀', '🄌'),
+];
+
+pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[
+ ('!', '#'),
+ ('%', '\''),
+ ('*', '*'),
+ (',', ','),
+ ('.', '/'),
+ (':', ';'),
+ ('?', '@'),
+ ('\\', '\\'),
+ ('ÂĄ', 'ÂĄ'),
+ ('§', '§'),
+ ('Âļ', '¡'),
+ ('Âŋ', 'Âŋ'),
+ ('Íž', 'Íž'),
+ ('·', '·'),
+ ('՚', '՟'),
+ ('։', '։'),
+ ('׀', '׀'),
+ ('׃', '׃'),
+ ('׆', '׆'),
+ ('×ŗ', '×´'),
+ ('؉', '؊'),
+ ('،', '؍'),
+ ('؛', '؛'),
+ ('؞', '؟'),
+ ('ŲĒ', 'Ų­'),
+ ('۔', '۔'),
+ ('܀', '܍'),
+ ('ߡ', 'ߚ'),
+ ('ā °', 'ā ž'),
+ ('āĄž', 'āĄž'),
+ ('āĨ¤', 'āĨĨ'),
+ ('āĨ°', 'āĨ°'),
+ ('ā§Ŋ', 'ā§Ŋ'),
+ ('āŠļ', 'āŠļ'),
+ ('āĢ°', 'āĢ°'),
+ ('āąˇ', 'āąˇ'),
+ ('ā˛„', 'ā˛„'),
+ ('āˇ´', 'āˇ´'),
+ ('āš', 'āš'),
+ ('āšš', 'āš›'),
+ ('āŧ„', 'āŧ’'),
+ ('āŧ”', 'āŧ”'),
+ ('āž…', 'āž…'),
+ ('āŋ', 'āŋ”'),
+ ('āŋ™', 'āŋš'),
+ ('၊', '၏'),
+ ('áƒģ', 'áƒģ'),
+ ('፠', '፨'),
+ ('᙮', '᙮'),
+ ('á›Ģ', '᛭'),
+ ('áœĩ', 'áœļ'),
+ ('។', '៖'),
+ ('៘', '៚'),
+ ('᠀', '᠅'),
+ ('᠇', '᠊'),
+ ('áĨ„', 'áĨ…'),
+ ('᨞', '᨟'),
+ ('áĒ ', 'áĒĻ'),
+ ('áĒ¨', 'áĒ­'),
+ ('᭚', '᭠'),
+ ('á¯ŧ', 'á¯ŋ'),
+ ('á°ģ', 'á°ŋ'),
+ ('áąž', 'áąŋ'),
+ ('áŗ€', 'áŗ‡'),
+ ('áŗ“', 'áŗ“'),
+ ('‖', '‗'),
+ ('†', '‧'),
+ ('‰', '‸'),
+ ('â€ģ', '‾'),
+ ('⁁', '⁃'),
+ ('⁇', '⁑'),
+ ('⁓', '⁓'),
+ ('⁕', '⁞'),
+ ('âŗš', 'âŗŧ'),
+ ('âŗž', 'âŗŋ'),
+ ('âĩ°', 'âĩ°'),
+ ('⸀', '⸁'),
+ ('⸆', '⸈'),
+ ('⸋', '⸋'),
+ ('⸎', '⸖'),
+ ('⸘', '⸙'),
+ ('⸛', '⸛'),
+ ('⸞', '⸟'),
+ ('â¸Ē', '⸎'),
+ ('⸰', '⸚'),
+ ('â¸ŧ', 'â¸ŋ'),
+ ('⚁', '⚁'),
+ ('⹃', '⹏'),
+ ('\u{2e52}', '\u{2e52}'),
+ ('、', '〃'),
+ ('ã€Ŋ', 'ã€Ŋ'),
+ ('ãƒģ', 'ãƒģ'),
+ ('꓾', 'ę“ŋ'),
+ ('꘍', '꘏'),
+ ('ę™ŗ', 'ę™ŗ'),
+ ('꙾', '꙾'),
+ ('꛲', '꛷'),
+ ('ꥴ', 'ꥡ'),
+ ('ęŖŽ', 'ęŖ'),
+ ('ęŖ¸', 'ęŖē'),
+ ('ęŖŧ', 'ęŖŧ'),
+ ('ꤎ', 'ę¤¯'),
+ ('ęĨŸ', 'ęĨŸ'),
+ ('꧁', '꧍'),
+ ('꧞', '꧟'),
+ ('꩜', '꩟'),
+ ('ęĢž', 'ęĢŸ'),
+ ('ęĢ°', 'ęĢą'),
+ ('ę¯Ģ', 'ę¯Ģ'),
+ ('ī¸', 'ī¸–'),
+ ('ī¸™', 'ī¸™'),
+ ('ī¸°', 'ī¸°'),
+ ('īš…', 'īš†'),
+ ('īš‰', 'īšŒ'),
+ ('īš', 'īš’'),
+ ('īš”', 'īš—'),
+ ('īšŸ', 'īšĄ'),
+ ('īš¨', 'īš¨'),
+ ('īšĒ', 'īšĢ'),
+ ('īŧ', 'īŧƒ'),
+ ('īŧ…', 'īŧ‡'),
+ ('īŧŠ', 'īŧŠ'),
+ ('īŧŒ', 'īŧŒ'),
+ ('īŧŽ', 'īŧ'),
+ ('īŧš', 'īŧ›'),
+ ('īŧŸ', 'īŧ '),
+ ('īŧŧ', 'īŧŧ'),
+ ('īŊĄ', 'īŊĄ'),
+ ('īŊ¤', 'īŊĨ'),
+ ('𐄀', '𐄂'),
+ ('𐎟', '𐎟'),
+ ('𐏐', '𐏐'),
+ ('đ•¯', 'đ•¯'),
+ ('𐡗', '𐡗'),
+ ('𐤟', '𐤟'),
+ ('đ¤ŋ', 'đ¤ŋ'),
+ ('𐊐', '𐊘'),
+ ('đŠŋ', 'đŠŋ'),
+ ('đĢ°', 'đĢļ'),
+ ('đŦš', 'đŦŋ'),
+ ('𐮙', '𐮜'),
+ ('đŊ•', 'đŊ™'),
+ ('𑁇', '𑁍'),
+ ('đ‘‚ģ', 'đ‘‚ŧ'),
+ ('𑂾', '𑃁'),
+ ('𑅀', '𑅃'),
+ ('𑅴', 'đ‘…ĩ'),
+ ('𑇅', '𑇈'),
+ ('𑇍', '𑇍'),
+ ('𑇛', '𑇛'),
+ ('𑇝', '𑇟'),
+ ('𑈸', 'đ‘ˆŊ'),
+ ('𑊩', '𑊩'),
+ ('𑑋', '𑑏'),
+ ('\u{1145a}', '𑑛'),
+ ('𑑝', '𑑝'),
+ ('𑓆', '𑓆'),
+ ('𑗁', '𑗗'),
+ ('𑙁', '𑙃'),
+ ('𑙠', 'đ‘™Ŧ'),
+ ('đ‘œŧ', '𑜾'),
+ ('đ‘ ģ', 'đ‘ ģ'),
+ ('\u{11944}', '\u{11946}'),
+ ('đ‘§ĸ', 'đ‘§ĸ'),
+ ('đ‘¨ŋ', '𑩆'),
+ ('đ‘Ēš', 'đ‘Ēœ'),
+ ('đ‘Ēž', 'đ‘Ēĸ'),
+ ('𑱁', '𑱅'),
+ ('𑱰', '𑱱'),
+ ('đ‘ģˇ', 'đ‘ģ¸'),
+ ('đ‘ŋŋ', 'đ‘ŋŋ'),
+ ('𒑰', '𒑴'),
+ ('𖩮', 'đ–Š¯'),
+ ('đ–Ģĩ', 'đ–Ģĩ'),
+ ('đ–Ŧˇ', 'đ–Ŧģ'),
+ ('𖭄', '𖭄'),
+ ('đ–ē—', 'đ–ēš'),
+ ('đ–ŋĸ', 'đ–ŋĸ'),
+ ('𛲟', '𛲟'),
+ ('đĒ‡', 'đĒ‹'),
+ ('đžĨž', 'đžĨŸ'),
+];
+
+pub const OTHER_SYMBOL: &'static [(char, char)] = &[
+ ('ÂĻ', 'ÂĻ'),
+ ('Š', 'Š'),
+ ('ÂŽ', 'ÂŽ'),
+ ('°', '°'),
+ ('Ō‚', 'Ō‚'),
+ ('֍', '֎'),
+ ('؎', '؏'),
+ ('۞', '۞'),
+ ('ÛŠ', 'ÛŠ'),
+ ('ÛŊ', 'Ûž'),
+ ('ßļ', 'ßļ'),
+ ('ā§ē', 'ā§ē'),
+ ('ā­°', 'ā­°'),
+ ('ā¯ŗ', 'ā¯¸'),
+ ('ā¯ē', 'ā¯ē'),
+ ('āąŋ', 'āąŋ'),
+ ('āĩ', 'āĩ'),
+ ('āĩš', 'āĩš'),
+ ('āŧ', 'āŧƒ'),
+ ('āŧ“', 'āŧ“'),
+ ('āŧ•', 'āŧ—'),
+ ('āŧš', 'āŧŸ'),
+ ('āŧ´', 'āŧ´'),
+ ('āŧļ', 'āŧļ'),
+ ('āŧ¸', 'āŧ¸'),
+ ('āžž', 'āŋ…'),
+ ('āŋ‡', 'āŋŒ'),
+ ('āŋŽ', 'āŋ'),
+ ('āŋ•', 'āŋ˜'),
+ ('႞', '႟'),
+ ('᎐', '᎙'),
+ ('᙭', '᙭'),
+ ('áĨ€', 'áĨ€'),
+ ('᧞', 'á§ŋ'),
+ ('á­Ą', 'á­Ē'),
+ ('á­´', 'á­ŧ'),
+ ('℀', '℁'),
+ ('℃', '℆'),
+ ('℈', '℉'),
+ ('℔', '℔'),
+ ('№', '℗'),
+ ('℞', 'â„Ŗ'),
+ ('â„Ĩ', 'â„Ĩ'),
+ ('℧', '℧'),
+ ('℩', '℩'),
+ ('℮', '℮'),
+ ('â„ē', 'â„ģ'),
+ ('⅊', '⅊'),
+ ('⅌', '⅍'),
+ ('⅏', '⅏'),
+ ('↊', '↋'),
+ ('↕', '↙'),
+ ('↜', '↟'),
+ ('↡', 'â†ĸ'),
+ ('↤', 'â†Ĩ'),
+ ('↧', '↭'),
+ ('↯', '⇍'),
+ ('⇐', '⇑'),
+ ('⇓', '⇓'),
+ ('⇕', 'â‡ŗ'),
+ ('⌀', '⌇'),
+ ('⌌', '⌟'),
+ ('âŒĸ', '⌨'),
+ ('âŒĢ', 'âģ'),
+ ('âŊ', '⎚'),
+ ('⎴', '⏛'),
+ ('âĸ', 'âĻ'),
+ ('⑀', '⑊'),
+ ('⒜', 'ⓩ'),
+ ('─', 'â–ļ'),
+ ('▸', '◀'),
+ ('◂', '◷'),
+ ('☀', '♮'),
+ ('♰', '❧'),
+ ('➔', 'âžŋ'),
+ ('⠀', 'âŖŋ'),
+ ('âŦ€', 'âŦ¯'),
+ ('⭅', '⭆'),
+ ('⭍', 'â­ŗ'),
+ ('â­ļ', '⮕'),
+ ('\u{2b97}', 'â¯ŋ'),
+ ('âŗĨ', 'âŗĒ'),
+ ('\u{2e50}', '\u{2e51}'),
+ ('âē€', 'âē™'),
+ ('âē›', 'âģŗ'),
+ ('âŧ€', 'âŋ•'),
+ ('âŋ°', 'âŋģ'),
+ ('〄', '〄'),
+ ('〒', '〓'),
+ ('〠', '〠'),
+ ('ã€ļ', 'ã€ˇ'),
+ ('〞', 'ã€ŋ'),
+ ('㆐', '㆑'),
+ ('㆖', '㆟'),
+ ('㇀', 'ã‡Ŗ'),
+ ('㈀', '㈞'),
+ ('ãˆĒ', '㉇'),
+ ('㉐', '㉐'),
+ ('㉠', 'ã‰ŋ'),
+ ('㊊', '㊰'),
+ ('㋀', 'ãŋ'),
+ ('䷀', 'äˇŋ'),
+ ('꒐', '꓆'),
+ ('ę ¨', 'ę Ģ'),
+ ('ę ļ', 'ę ˇ'),
+ ('ę š', 'ę š'),
+ ('ꊡ', 'ꊚ'),
+ ('īˇŊ', 'īˇŊ'),
+ ('īŋ¤', 'īŋ¤'),
+ ('īŋ¨', 'īŋ¨'),
+ ('īŋ­', 'īŋŽ'),
+ ('īŋŧ', 'īŋŊ'),
+ ('𐄷', 'đ„ŋ'),
+ ('𐅹', '𐆉'),
+ ('𐆌', '𐆎'),
+ ('𐆐', '\u{1019c}'),
+ ('𐆠', '𐆠'),
+ ('𐇐', 'đ‡ŧ'),
+ ('𐥡', '𐥸'),
+ ('đĢˆ', 'đĢˆ'),
+ ('đ‘œŋ', 'đ‘œŋ'),
+ ('đ‘ŋ•', 'đ‘ŋœ'),
+ ('đ‘ŋĄ', 'đ‘ŋą'),
+ ('đ–Ŧŧ', 'đ–Ŧŋ'),
+ ('𖭅', '𖭅'),
+ ('𛲜', '𛲜'),
+ ('𝀀', 'đƒĩ'),
+ ('𝄀', 'đ„Ļ'),
+ ('𝄩', '𝅘𝅥𝅲'),
+ ('đ…Ē', 'đ…Ŧ'),
+ ('𝆃', '𝆄'),
+ ('𝆌', '𝆩'),
+ ('𝆮', '𝇨'),
+ ('𝈀', '𝉁'),
+ ('𝉅', '𝉅'),
+ ('𝌀', '𝍖'),
+ ('𝠀', 'đ§ŋ'),
+ ('𝨷', 'đ¨ē'),
+ ('𝩭', '𝩴'),
+ ('đŠļ', 'đĒƒ'),
+ ('đĒ…', 'đĒ†'),
+ ('𞅏', '𞅏'),
+ ('đž˛Ŧ', 'đž˛Ŧ'),
+ ('𞴮', '𞴮'),
+ ('🀀', 'đŸ€Ģ'),
+ ('🀰', '🂓'),
+ ('🂠', '🂮'),
+ ('🂱', 'đŸ‚ŋ'),
+ ('🃁', '🃏'),
+ ('🃑', 'đŸƒĩ'),
+ ('\u{1f10d}', '\u{1f1ad}'),
+ ('đŸ‡Ļ', '🈂'),
+ ('🈐', 'đŸˆģ'),
+ ('🉀', '🉈'),
+ ('🉐', '🉑'),
+ ('🉠', 'đŸ‰Ĩ'),
+ ('🌀', 'đŸē'),
+ ('🐀', '\u{1f6d7}'),
+ ('🛠', 'đŸ›Ŧ'),
+ ('🛰', '\u{1f6fc}'),
+ ('🜀', 'đŸŗ'),
+ ('🞀', '🟘'),
+ ('🟠', 'đŸŸĢ'),
+ ('🠀', '🠋'),
+ ('🠐', '🡇'),
+ ('🡐', '🡙'),
+ ('🡠', 'đŸĸ‡'),
+ ('đŸĸ', 'đŸĸ­'),
+ ('\u{1f8b0}', '\u{1f8b1}'),
+ ('🤀', '\u{1f978}'),
+ ('đŸĨē', '\u{1f9cb}'),
+ ('🧍', '🩓'),
+ ('🩠', '🩭'),
+ ('🩰', '\u{1fa74}'),
+ ('🩸', 'đŸŠē'),
+ ('đŸĒ€', '\u{1fa86}'),
+ ('đŸĒ', '\u{1faa8}'),
+ ('\u{1fab0}', '\u{1fab6}'),
+ ('\u{1fac0}', '\u{1fac2}'),
+ ('\u{1fad0}', '\u{1fad6}'),
+ ('\u{1fb00}', '\u{1fb92}'),
+ ('\u{1fb94}', '\u{1fbca}'),
+];
+
+pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] =
+ &[('\u{2029}', '\u{2029}')];
+
+pub const PRIVATE_USE: &'static [(char, char)] = &[
+ ('\u{e000}', '\u{f8ff}'),
+ ('\u{f0000}', '\u{ffffd}'),
+ ('\u{100000}', '\u{10fffd}'),
+];
+
+pub const PUNCTUATION: &'static [(char, char)] = &[
+ ('!', '#'),
+ ('%', '*'),
+ (',', '/'),
+ (':', ';'),
+ ('?', '@'),
+ ('[', ']'),
+ ('_', '_'),
+ ('{', '{'),
+ ('}', '}'),
+ ('ÂĄ', 'ÂĄ'),
+ ('§', '§'),
+ ('ÂĢ', 'ÂĢ'),
+ ('Âļ', '¡'),
+ ('Âģ', 'Âģ'),
+ ('Âŋ', 'Âŋ'),
+ ('Íž', 'Íž'),
+ ('·', '·'),
+ ('՚', '՟'),
+ ('։', '֊'),
+ ('Öž', 'Öž'),
+ ('׀', '׀'),
+ ('׃', '׃'),
+ ('׆', '׆'),
+ ('×ŗ', '×´'),
+ ('؉', '؊'),
+ ('،', '؍'),
+ ('؛', '؛'),
+ ('؞', '؟'),
+ ('ŲĒ', 'Ų­'),
+ ('۔', '۔'),
+ ('܀', '܍'),
+ ('ߡ', 'ߚ'),
+ ('ā °', 'ā ž'),
+ ('āĄž', 'āĄž'),
+ ('āĨ¤', 'āĨĨ'),
+ ('āĨ°', 'āĨ°'),
+ ('ā§Ŋ', 'ā§Ŋ'),
+ ('āŠļ', 'āŠļ'),
+ ('āĢ°', 'āĢ°'),
+ ('āąˇ', 'āąˇ'),
+ ('ā˛„', 'ā˛„'),
+ ('āˇ´', 'āˇ´'),
+ ('āš', 'āš'),
+ ('āšš', 'āš›'),
+ ('āŧ„', 'āŧ’'),
+ ('āŧ”', 'āŧ”'),
+ ('āŧē', 'āŧŊ'),
+ ('āž…', 'āž…'),
+ ('āŋ', 'āŋ”'),
+ ('āŋ™', 'āŋš'),
+ ('၊', '၏'),
+ ('áƒģ', 'áƒģ'),
+ ('፠', '፨'),
+ ('᐀', '᐀'),
+ ('᙮', '᙮'),
+ ('᚛', '᚜'),
+ ('á›Ģ', '᛭'),
+ ('áœĩ', 'áœļ'),
+ ('។', '៖'),
+ ('៘', '៚'),
+ ('᠀', '᠊'),
+ ('áĨ„', 'áĨ…'),
+ ('᨞', '᨟'),
+ ('áĒ ', 'áĒĻ'),
+ ('áĒ¨', 'áĒ­'),
+ ('᭚', '᭠'),
+ ('á¯ŧ', 'á¯ŋ'),
+ ('á°ģ', 'á°ŋ'),
+ ('áąž', 'áąŋ'),
+ ('áŗ€', 'áŗ‡'),
+ ('áŗ“', 'áŗ“'),
+ ('‐', '‧'),
+ ('‰', '⁃'),
+ ('⁅', '⁑'),
+ ('⁓', '⁞'),
+ ('âŊ', '⁞'),
+ ('₍', '₎'),
+ ('⌈', '⌋'),
+ ('〈', 'âŒĒ'),
+ ('❨', 'âĩ'),
+ ('⟅', '⟆'),
+ ('âŸĻ', '⟯'),
+ ('âĻƒ', 'âĻ˜'),
+ ('⧘', '⧛'),
+ ('â§ŧ', 'â§Ŋ'),
+ ('âŗš', 'âŗŧ'),
+ ('âŗž', 'âŗŋ'),
+ ('âĩ°', 'âĩ°'),
+ ('⸀', '⸮'),
+ ('⸰', '⚏'),
+ ('\u{2e52}', '\u{2e52}'),
+ ('、', '〃'),
+ ('〈', '】'),
+ ('〔', '〟'),
+ ('〰', '〰'),
+ ('ã€Ŋ', 'ã€Ŋ'),
+ ('゠', '゠'),
+ ('ãƒģ', 'ãƒģ'),
+ ('꓾', 'ę“ŋ'),
+ ('꘍', '꘏'),
+ ('ę™ŗ', 'ę™ŗ'),
+ ('꙾', '꙾'),
+ ('꛲', '꛷'),
+ ('ꥴ', 'ꥡ'),
+ ('ęŖŽ', 'ęŖ'),
+ ('ęŖ¸', 'ęŖē'),
+ ('ęŖŧ', 'ęŖŧ'),
+ ('ꤎ', 'ę¤¯'),
+ ('ęĨŸ', 'ęĨŸ'),
+ ('꧁', '꧍'),
+ ('꧞', '꧟'),
+ ('꩜', '꩟'),
+ ('ęĢž', 'ęĢŸ'),
+ ('ęĢ°', 'ęĢą'),
+ ('ę¯Ģ', 'ę¯Ģ'),
+ ('ī´ž', 'ī´ŋ'),
+ ('ī¸', 'ī¸™'),
+ ('ī¸°', 'īš’'),
+ ('īš”', 'īšĄ'),
+ ('īšŖ', 'īšŖ'),
+ ('īš¨', 'īš¨'),
+ ('īšĒ', 'īšĢ'),
+ ('īŧ', 'īŧƒ'),
+ ('īŧ…', 'īŧŠ'),
+ ('īŧŒ', 'īŧ'),
+ ('īŧš', 'īŧ›'),
+ ('īŧŸ', 'īŧ '),
+ ('īŧģ', 'īŧŊ'),
+ ('īŧŋ', 'īŧŋ'),
+ ('īŊ›', 'īŊ›'),
+ ('īŊ', 'īŊ'),
+ ('īŊŸ', 'īŊĨ'),
+ ('𐄀', '𐄂'),
+ ('𐎟', '𐎟'),
+ ('𐏐', '𐏐'),
+ ('đ•¯', 'đ•¯'),
+ ('𐡗', '𐡗'),
+ ('𐤟', '𐤟'),
+ ('đ¤ŋ', 'đ¤ŋ'),
+ ('𐊐', '𐊘'),
+ ('đŠŋ', 'đŠŋ'),
+ ('đĢ°', 'đĢļ'),
+ ('đŦš', 'đŦŋ'),
+ ('𐮙', '𐮜'),
+ ('\u{10ead}', '\u{10ead}'),
+ ('đŊ•', 'đŊ™'),
+ ('𑁇', '𑁍'),
+ ('đ‘‚ģ', 'đ‘‚ŧ'),
+ ('𑂾', '𑃁'),
+ ('𑅀', '𑅃'),
+ ('𑅴', 'đ‘…ĩ'),
+ ('𑇅', '𑇈'),
+ ('𑇍', '𑇍'),
+ ('𑇛', '𑇛'),
+ ('𑇝', '𑇟'),
+ ('𑈸', 'đ‘ˆŊ'),
+ ('𑊩', '𑊩'),
+ ('𑑋', '𑑏'),
+ ('\u{1145a}', '𑑛'),
+ ('𑑝', '𑑝'),
+ ('𑓆', '𑓆'),
+ ('𑗁', '𑗗'),
+ ('𑙁', '𑙃'),
+ ('𑙠', 'đ‘™Ŧ'),
+ ('đ‘œŧ', '𑜾'),
+ ('đ‘ ģ', 'đ‘ ģ'),
+ ('\u{11944}', '\u{11946}'),
+ ('đ‘§ĸ', 'đ‘§ĸ'),
+ ('đ‘¨ŋ', '𑩆'),
+ ('đ‘Ēš', 'đ‘Ēœ'),
+ ('đ‘Ēž', 'đ‘Ēĸ'),
+ ('𑱁', '𑱅'),
+ ('𑱰', '𑱱'),
+ ('đ‘ģˇ', 'đ‘ģ¸'),
+ ('đ‘ŋŋ', 'đ‘ŋŋ'),
+ ('𒑰', '𒑴'),
+ ('𖩮', 'đ–Š¯'),
+ ('đ–Ģĩ', 'đ–Ģĩ'),
+ ('đ–Ŧˇ', 'đ–Ŧģ'),
+ ('𖭄', '𖭄'),
+ ('đ–ē—', 'đ–ēš'),
+ ('đ–ŋĸ', 'đ–ŋĸ'),
+ ('𛲟', '𛲟'),
+ ('đĒ‡', 'đĒ‹'),
+ ('đžĨž', 'đžĨŸ'),
+];
+
+pub const SEPARATOR: &'static [(char, char)] = &[
+ (' ', ' '),
+ ('\u{a0}', '\u{a0}'),
+ ('\u{1680}', '\u{1680}'),
+ ('\u{2000}', '\u{200a}'),
+ ('\u{2028}', '\u{2029}'),
+ ('\u{202f}', '\u{202f}'),
+ ('\u{205f}', '\u{205f}'),
+ ('\u{3000}', '\u{3000}'),
+];
+
+pub const SPACE_SEPARATOR: &'static [(char, char)] = &[
+ (' ', ' '),
+ ('\u{a0}', '\u{a0}'),
+ ('\u{1680}', '\u{1680}'),
+ ('\u{2000}', '\u{200a}'),
+ ('\u{202f}', '\u{202f}'),
+ ('\u{205f}', '\u{205f}'),
+ ('\u{3000}', '\u{3000}'),
+];
+
+pub const SPACING_MARK: &'static [(char, char)] = &[
+ ('ā¤ƒ', 'ā¤ƒ'),
+ ('ā¤ģ', 'ā¤ģ'),
+ ('ā¤ž', 'āĨ€'),
+ ('āĨ‰', 'āĨŒ'),
+ ('āĨŽ', 'āĨ'),
+ ('āĻ‚', 'āĻƒ'),
+ ('\u{9be}', 'ā§€'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Œ'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('ā¨ƒ', 'ā¨ƒ'),
+ ('ā¨ž', 'āŠ€'),
+ ('āĒƒ', 'āĒƒ'),
+ ('āĒž', 'āĢ€'),
+ ('āĢ‰', 'āĢ‰'),
+ ('āĢ‹', 'āĢŒ'),
+ ('āŦ‚', 'āŦƒ'),
+ ('\u{b3e}', '\u{b3e}'),
+ ('ā­€', 'ā­€'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', 'ā­Œ'),
+ ('\u{b57}', '\u{b57}'),
+ ('\u{bbe}', 'āŽŋ'),
+ ('ā¯', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', 'ā¯Œ'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('ā°', 'ā°ƒ'),
+ ('āą', 'āą„'),
+ ('ā˛‚', 'ā˛ƒ'),
+ ('ā˛ž', 'ā˛ž'),
+ ('āŗ€', 'āŗ„'),
+ ('āŗ‡', 'āŗˆ'),
+ ('āŗŠ', 'āŗ‹'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('ā´‚', 'ā´ƒ'),
+ ('\u{d3e}', 'āĩ€'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩŒ'),
+ ('\u{d57}', '\u{d57}'),
+ ('āļ‚', 'āļƒ'),
+ ('\u{dcf}', 'āˇ‘'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇ˛', 'āˇŗ'),
+ ('āŧž', 'āŧŋ'),
+ ('āŊŋ', 'āŊŋ'),
+ ('á€Ģ', 'á€Ŧ'),
+ ('ေ', 'ေ'),
+ ('း', 'း'),
+ ('á€ģ', 'á€ŧ'),
+ ('ၖ', 'ၗ'),
+ ('áĸ', 'ၤ'),
+ ('ၧ', 'ၭ'),
+ ('ႃ', 'ႄ'),
+ ('ႇ', 'ႌ'),
+ ('ႏ', 'ႏ'),
+ ('ႚ', 'ႜ'),
+ ('ážļ', 'ážļ'),
+ ('ើ', 'ៅ'),
+ ('ះ', 'ៈ'),
+ ('á¤Ŗ', 'á¤Ļ'),
+ ('ᤊ', 'á¤Ģ'),
+ ('ᤰ', '᤹'),
+ ('á¤ŗ', 'ᤸ'),
+ ('ᨙ', 'ᨚ'),
+ ('ᩕ', 'ᩕ'),
+ ('ᩗ', 'ᩗ'),
+ ('እ', 'እ'),
+ ('áŠŖ', 'ኤ'),
+ ('ክ', 'ኲ'),
+ ('áŦ„', 'áŦ„'),
+ ('\u{1b35}', '\u{1b35}'),
+ ('áŦģ', 'áŦģ'),
+ ('áŦŊ', 'ᭁ'),
+ ('ᭃ', '᭄'),
+ ('ᮂ', 'ᮂ'),
+ ('Ꭵ', 'Ꭵ'),
+ ('áŽĻ', 'Ꭷ'),
+ ('áŽĒ', 'áŽĒ'),
+ ('ᯧ', 'ᯧ'),
+ ('á¯Ē', 'á¯Ŧ'),
+ ('ᯎ', 'ᯎ'),
+ ('á¯˛', 'á¯ŗ'),
+ ('á°¤', 'á°Ģ'),
+ ('á°´', 'á°ĩ'),
+ ('áŗĄ', 'áŗĄ'),
+ ('áŗˇ', 'áŗˇ'),
+ ('\u{302e}', '\u{302f}'),
+ ('ę Ŗ', 'ę ¤'),
+ ('ę §', 'ę §'),
+ ('ęĸ€', 'ęĸ'),
+ ('ęĸ´', 'ęŖƒ'),
+ ('ęĨ’', 'ęĨ“'),
+ ('ęĻƒ', 'ęĻƒ'),
+ ('ęĻ´', 'ęĻĩ'),
+ ('ęĻē', 'ęĻģ'),
+ ('ęĻž', '꧀'),
+ ('ę¨¯', 'ꨰ'),
+ ('ę¨ŗ', 'ꨴ'),
+ ('ꩍ', 'ꩍ'),
+ ('ęŠģ', 'ęŠģ'),
+ ('ęŠŊ', 'ęŠŊ'),
+ ('ęĢĢ', 'ęĢĢ'),
+ ('ęĢŽ', 'ęĢ¯'),
+ ('ęĢĩ', 'ęĢĩ'),
+ ('ę¯Ŗ', 'ę¯¤'),
+ ('ę¯Ļ', 'ę¯§'),
+ ('ę¯Š', 'ę¯Ē'),
+ ('ę¯Ŧ', 'ę¯Ŧ'),
+ ('𑀀', '𑀀'),
+ ('𑀂', '𑀂'),
+ ('𑂂', '𑂂'),
+ ('𑂰', '𑂲'),
+ ('𑂷', '𑂸'),
+ ('đ‘„Ŧ', 'đ‘„Ŧ'),
+ ('𑅅', '𑅆'),
+ ('𑆂', '𑆂'),
+ ('đ‘†ŗ', 'đ‘†ĩ'),
+ ('đ‘†ŋ', '𑇀'),
+ ('\u{111ce}', '\u{111ce}'),
+ ('đ‘ˆŦ', '𑈮'),
+ ('𑈲', 'đ‘ˆŗ'),
+ ('đ‘ˆĩ', 'đ‘ˆĩ'),
+ ('𑋠', 'đ‘‹ĸ'),
+ ('𑌂', '𑌃'),
+ ('\u{1133e}', 'đ‘Œŋ'),
+ ('𑍁', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('\u{11357}', '\u{11357}'),
+ ('đ‘ĸ', 'đ‘Ŗ'),
+ ('đ‘ĩ', '𑐷'),
+ ('𑑀', '𑑁'),
+ ('𑑅', '𑑅'),
+ ('\u{114b0}', '𑒲'),
+ ('𑒹', '𑒹'),
+ ('đ‘’ģ', '𑒾'),
+ ('𑓁', '𑓁'),
+ ('\u{115af}', '𑖱'),
+ ('𑖸', 'đ‘–ģ'),
+ ('𑖾', '𑖾'),
+ ('𑘰', '𑘲'),
+ ('đ‘˜ģ', 'đ‘˜ŧ'),
+ ('𑘾', '𑘾'),
+ ('đ‘šŦ', 'đ‘šŦ'),
+ ('𑚮', 'đ‘š¯'),
+ ('đ‘šļ', 'đ‘šļ'),
+ ('𑜠', '𑜡'),
+ ('đ‘œĻ', 'đ‘œĻ'),
+ ('đ‘ Ŧ', '𑠮'),
+ ('𑠸', '𑠸'),
+ ('\u{11930}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193d}', '\u{1193d}'),
+ ('\u{11940}', '\u{11940}'),
+ ('\u{11942}', '\u{11942}'),
+ ('𑧑', '𑧓'),
+ ('𑧜', '𑧟'),
+ ('𑧤', '𑧤'),
+ ('𑨹', '𑨹'),
+ ('𑩗', '𑩘'),
+ ('đ‘Ē—', 'đ‘Ē—'),
+ ('đ‘°¯', 'đ‘°¯'),
+ ('𑰾', '𑰾'),
+ ('𑲩', '𑲩'),
+ ('𑲱', '𑲱'),
+ ('𑲴', '𑲴'),
+ ('đ‘ļŠ', 'đ‘ļŽ'),
+ ('đ‘ļ“', 'đ‘ļ”'),
+ ('đ‘ļ–', 'đ‘ļ–'),
+ ('đ‘ģĩ', 'đ‘ģļ'),
+ ('đ–Ŋ‘', '𖾇'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('\u{1d165}', 'đ…Ļ'),
+ ('𝅭', '\u{1d172}'),
+];
+
+pub const SYMBOL: &'static [(char, char)] = &[
+ ('$', '$'),
+ ('+', '+'),
+ ('<', '>'),
+ ('^', '^'),
+ ('`', '`'),
+ ('|', '|'),
+ ('~', '~'),
+ ('Âĸ', 'ÂĻ'),
+ ('¨', 'Š'),
+ ('ÂŦ', 'ÂŦ'),
+ ('ÂŽ', 'Âą'),
+ ('´', '´'),
+ ('¸', '¸'),
+ ('×', '×'),
+ ('Ãˇ', 'Ãˇ'),
+ ('˂', '˅'),
+ ('˒', '˟'),
+ ('ËĨ', 'ËĢ'),
+ ('Ë­', 'Ë­'),
+ ('˯', 'Ëŋ'),
+ ('Íĩ', 'Íĩ'),
+ ('΄', '΅'),
+ ('Īļ', 'Īļ'),
+ ('Ō‚', 'Ō‚'),
+ ('֍', '֏'),
+ ('؆', '؈'),
+ ('؋', '؋'),
+ ('؎', '؏'),
+ ('۞', '۞'),
+ ('ÛŠ', 'ÛŠ'),
+ ('ÛŊ', 'Ûž'),
+ ('ßļ', 'ßļ'),
+ ('ßž', 'ßŋ'),
+ ('ā§˛', 'ā§ŗ'),
+ ('ā§ē', 'ā§ģ'),
+ ('āĢą', 'āĢą'),
+ ('ā­°', 'ā­°'),
+ ('ā¯ŗ', 'ā¯ē'),
+ ('āąŋ', 'āąŋ'),
+ ('āĩ', 'āĩ'),
+ ('āĩš', 'āĩš'),
+ ('ā¸ŋ', 'ā¸ŋ'),
+ ('āŧ', 'āŧƒ'),
+ ('āŧ“', 'āŧ“'),
+ ('āŧ•', 'āŧ—'),
+ ('āŧš', 'āŧŸ'),
+ ('āŧ´', 'āŧ´'),
+ ('āŧļ', 'āŧļ'),
+ ('āŧ¸', 'āŧ¸'),
+ ('āžž', 'āŋ…'),
+ ('āŋ‡', 'āŋŒ'),
+ ('āŋŽ', 'āŋ'),
+ ('āŋ•', 'āŋ˜'),
+ ('႞', '႟'),
+ ('᎐', '᎙'),
+ ('᙭', '᙭'),
+ ('៛', '៛'),
+ ('áĨ€', 'áĨ€'),
+ ('᧞', 'á§ŋ'),
+ ('á­Ą', 'á­Ē'),
+ ('á­´', 'á­ŧ'),
+ ('ážŊ', 'ážŊ'),
+ ('ážŋ', 'áŋ'),
+ ('áŋ', 'áŋ'),
+ ('áŋ', 'áŋŸ'),
+ ('áŋ­', 'áŋ¯'),
+ ('áŋŊ', 'áŋž'),
+ ('⁄', '⁄'),
+ ('⁒', '⁒'),
+ ('âē', 'âŧ'),
+ ('₊', '₌'),
+ ('₠', 'â‚ŋ'),
+ ('℀', '℁'),
+ ('℃', '℆'),
+ ('℈', '℉'),
+ ('℔', '℔'),
+ ('№', '℘'),
+ ('℞', 'â„Ŗ'),
+ ('â„Ĩ', 'â„Ĩ'),
+ ('℧', '℧'),
+ ('℩', '℩'),
+ ('℮', '℮'),
+ ('â„ē', 'â„ģ'),
+ ('⅀', '⅄'),
+ ('⅊', '⅍'),
+ ('⅏', '⅏'),
+ ('↊', '↋'),
+ ('←', '⌇'),
+ ('⌌', '⌨'),
+ ('âŒĢ', 'âĻ'),
+ ('⑀', '⑊'),
+ ('⒜', 'ⓩ'),
+ ('─', '❧'),
+ ('➔', '⟄'),
+ ('⟇', 'âŸĨ'),
+ ('⟰', 'âĻ‚'),
+ ('âĻ™', '⧗'),
+ ('⧜', 'â§ģ'),
+ ('⧞', 'â­ŗ'),
+ ('â­ļ', '⮕'),
+ ('\u{2b97}', 'â¯ŋ'),
+ ('âŗĨ', 'âŗĒ'),
+ ('\u{2e50}', '\u{2e51}'),
+ ('âē€', 'âē™'),
+ ('âē›', 'âģŗ'),
+ ('âŧ€', 'âŋ•'),
+ ('âŋ°', 'âŋģ'),
+ ('〄', '〄'),
+ ('〒', '〓'),
+ ('〠', '〠'),
+ ('ã€ļ', 'ã€ˇ'),
+ ('〞', 'ã€ŋ'),
+ ('゛', '゜'),
+ ('㆐', '㆑'),
+ ('㆖', '㆟'),
+ ('㇀', 'ã‡Ŗ'),
+ ('㈀', '㈞'),
+ ('ãˆĒ', '㉇'),
+ ('㉐', '㉐'),
+ ('㉠', 'ã‰ŋ'),
+ ('㊊', '㊰'),
+ ('㋀', 'ãŋ'),
+ ('䷀', 'äˇŋ'),
+ ('꒐', '꓆'),
+ ('꜀', '꜖'),
+ ('꜠', '꜡'),
+ ('꞉', '꞊'),
+ ('ę ¨', 'ę Ģ'),
+ ('ę ļ', 'ę š'),
+ ('ꊡ', 'ꊚ'),
+ ('꭛', '꭛'),
+ ('\u{ab6a}', '\u{ab6b}'),
+ ('īŦŠ', 'īŦŠ'),
+ ('īŽ˛', 'ī¯'),
+ ('īˇŧ', 'īˇŊ'),
+ ('īšĸ', 'īšĸ'),
+ ('īš¤', 'īšĻ'),
+ ('īšŠ', 'īšŠ'),
+ ('īŧ„', 'īŧ„'),
+ ('īŧ‹', 'īŧ‹'),
+ ('īŧœ', 'īŧž'),
+ ('īŧž', 'īŧž'),
+ ('īŊ€', 'īŊ€'),
+ ('īŊœ', 'īŊœ'),
+ ('īŊž', 'īŊž'),
+ ('īŋ ', 'īŋĻ'),
+ ('īŋ¨', 'īŋŽ'),
+ ('īŋŧ', 'īŋŊ'),
+ ('𐄷', 'đ„ŋ'),
+ ('𐅹', '𐆉'),
+ ('𐆌', '𐆎'),
+ ('𐆐', '\u{1019c}'),
+ ('𐆠', '𐆠'),
+ ('𐇐', 'đ‡ŧ'),
+ ('𐥡', '𐥸'),
+ ('đĢˆ', 'đĢˆ'),
+ ('đ‘œŋ', 'đ‘œŋ'),
+ ('đ‘ŋ•', 'đ‘ŋą'),
+ ('đ–Ŧŧ', 'đ–Ŧŋ'),
+ ('𖭅', '𖭅'),
+ ('𛲜', '𛲜'),
+ ('𝀀', 'đƒĩ'),
+ ('𝄀', 'đ„Ļ'),
+ ('𝄩', '𝅘𝅥𝅲'),
+ ('đ…Ē', 'đ…Ŧ'),
+ ('𝆃', '𝆄'),
+ ('𝆌', '𝆩'),
+ ('𝆮', '𝇨'),
+ ('𝈀', '𝉁'),
+ ('𝉅', '𝉅'),
+ ('𝌀', '𝍖'),
+ ('𝛁', '𝛁'),
+ ('𝛛', '𝛛'),
+ ('đ›ģ', 'đ›ģ'),
+ ('𝜕', '𝜕'),
+ ('đœĩ', 'đœĩ'),
+ ('𝝏', '𝝏'),
+ ('đ¯', 'đ¯'),
+ ('𝞉', '𝞉'),
+ ('𝞩', '𝞩'),
+ ('𝟃', '𝟃'),
+ ('𝠀', 'đ§ŋ'),
+ ('𝨷', 'đ¨ē'),
+ ('𝩭', '𝩴'),
+ ('đŠļ', 'đĒƒ'),
+ ('đĒ…', 'đĒ†'),
+ ('𞅏', '𞅏'),
+ ('đž‹ŋ', 'đž‹ŋ'),
+ ('đž˛Ŧ', 'đž˛Ŧ'),
+ ('𞲰', '𞲰'),
+ ('𞴮', '𞴮'),
+ ('đžģ°', 'đžģą'),
+ ('🀀', 'đŸ€Ģ'),
+ ('🀰', '🂓'),
+ ('🂠', '🂮'),
+ ('🂱', 'đŸ‚ŋ'),
+ ('🃁', '🃏'),
+ ('🃑', 'đŸƒĩ'),
+ ('\u{1f10d}', '\u{1f1ad}'),
+ ('đŸ‡Ļ', '🈂'),
+ ('🈐', 'đŸˆģ'),
+ ('🉀', '🉈'),
+ ('🉐', '🉑'),
+ ('🉠', 'đŸ‰Ĩ'),
+ ('🌀', '\u{1f6d7}'),
+ ('🛠', 'đŸ›Ŧ'),
+ ('🛰', '\u{1f6fc}'),
+ ('🜀', 'đŸŗ'),
+ ('🞀', '🟘'),
+ ('🟠', 'đŸŸĢ'),
+ ('🠀', '🠋'),
+ ('🠐', '🡇'),
+ ('🡐', '🡙'),
+ ('🡠', 'đŸĸ‡'),
+ ('đŸĸ', 'đŸĸ­'),
+ ('\u{1f8b0}', '\u{1f8b1}'),
+ ('🤀', '\u{1f978}'),
+ ('đŸĨē', '\u{1f9cb}'),
+ ('🧍', '🩓'),
+ ('🩠', '🩭'),
+ ('🩰', '\u{1fa74}'),
+ ('🩸', 'đŸŠē'),
+ ('đŸĒ€', '\u{1fa86}'),
+ ('đŸĒ', '\u{1faa8}'),
+ ('\u{1fab0}', '\u{1fab6}'),
+ ('\u{1fac0}', '\u{1fac2}'),
+ ('\u{1fad0}', '\u{1fad6}'),
+ ('\u{1fb00}', '\u{1fb92}'),
+ ('\u{1fb94}', '\u{1fbca}'),
+];
+
+pub const TITLECASE_LETTER: &'static [(char, char)] = &[
+ ('Į…', 'Į…'),
+ ('Įˆ', 'Įˆ'),
+ ('Į‹', 'Į‹'),
+ ('Į˛', 'Į˛'),
+ ('ᾈ', 'ᾏ'),
+ ('ᾘ', 'ᾟ'),
+ ('ឨ', 'ឯ'),
+ ('ážŧ', 'ážŧ'),
+ ('áŋŒ', 'áŋŒ'),
+ ('áŋŧ', 'áŋŧ'),
+];
+
+pub const UNASSIGNED: &'static [(char, char)] = &[
+ ('\u{378}', '\u{379}'),
+ ('\u{380}', '\u{383}'),
+ ('\u{38b}', '\u{38b}'),
+ ('\u{38d}', '\u{38d}'),
+ ('\u{3a2}', '\u{3a2}'),
+ ('\u{530}', '\u{530}'),
+ ('\u{557}', '\u{558}'),
+ ('\u{58b}', '\u{58c}'),
+ ('\u{590}', '\u{590}'),
+ ('\u{5c8}', '\u{5cf}'),
+ ('\u{5eb}', '\u{5ee}'),
+ ('\u{5f5}', '\u{5ff}'),
+ ('\u{61d}', '\u{61d}'),
+ ('\u{70e}', '\u{70e}'),
+ ('\u{74b}', '\u{74c}'),
+ ('\u{7b2}', '\u{7bf}'),
+ ('\u{7fb}', '\u{7fc}'),
+ ('\u{82e}', '\u{82f}'),
+ ('\u{83f}', '\u{83f}'),
+ ('\u{85c}', '\u{85d}'),
+ ('\u{85f}', '\u{85f}'),
+ ('\u{86b}', '\u{89f}'),
+ ('\u{8b5}', '\u{8b5}'),
+ ('\u{8c8}', '\u{8d2}'),
+ ('\u{984}', '\u{984}'),
+ ('\u{98d}', '\u{98e}'),
+ ('\u{991}', '\u{992}'),
+ ('\u{9a9}', '\u{9a9}'),
+ ('\u{9b1}', '\u{9b1}'),
+ ('\u{9b3}', '\u{9b5}'),
+ ('\u{9ba}', '\u{9bb}'),
+ ('\u{9c5}', '\u{9c6}'),
+ ('\u{9c9}', '\u{9ca}'),
+ ('\u{9cf}', '\u{9d6}'),
+ ('\u{9d8}', '\u{9db}'),
+ ('\u{9de}', '\u{9de}'),
+ ('\u{9e4}', '\u{9e5}'),
+ ('\u{9ff}', '\u{a00}'),
+ ('\u{a04}', '\u{a04}'),
+ ('\u{a0b}', '\u{a0e}'),
+ ('\u{a11}', '\u{a12}'),
+ ('\u{a29}', '\u{a29}'),
+ ('\u{a31}', '\u{a31}'),
+ ('\u{a34}', '\u{a34}'),
+ ('\u{a37}', '\u{a37}'),
+ ('\u{a3a}', '\u{a3b}'),
+ ('\u{a3d}', '\u{a3d}'),
+ ('\u{a43}', '\u{a46}'),
+ ('\u{a49}', '\u{a4a}'),
+ ('\u{a4e}', '\u{a50}'),
+ ('\u{a52}', '\u{a58}'),
+ ('\u{a5d}', '\u{a5d}'),
+ ('\u{a5f}', '\u{a65}'),
+ ('\u{a77}', '\u{a80}'),
+ ('\u{a84}', '\u{a84}'),
+ ('\u{a8e}', '\u{a8e}'),
+ ('\u{a92}', '\u{a92}'),
+ ('\u{aa9}', '\u{aa9}'),
+ ('\u{ab1}', '\u{ab1}'),
+ ('\u{ab4}', '\u{ab4}'),
+ ('\u{aba}', '\u{abb}'),
+ ('\u{ac6}', '\u{ac6}'),
+ ('\u{aca}', '\u{aca}'),
+ ('\u{ace}', '\u{acf}'),
+ ('\u{ad1}', '\u{adf}'),
+ ('\u{ae4}', '\u{ae5}'),
+ ('\u{af2}', '\u{af8}'),
+ ('\u{b00}', '\u{b00}'),
+ ('\u{b04}', '\u{b04}'),
+ ('\u{b0d}', '\u{b0e}'),
+ ('\u{b11}', '\u{b12}'),
+ ('\u{b29}', '\u{b29}'),
+ ('\u{b31}', '\u{b31}'),
+ ('\u{b34}', '\u{b34}'),
+ ('\u{b3a}', '\u{b3b}'),
+ ('\u{b45}', '\u{b46}'),
+ ('\u{b49}', '\u{b4a}'),
+ ('\u{b4e}', '\u{b54}'),
+ ('\u{b58}', '\u{b5b}'),
+ ('\u{b5e}', '\u{b5e}'),
+ ('\u{b64}', '\u{b65}'),
+ ('\u{b78}', '\u{b81}'),
+ ('\u{b84}', '\u{b84}'),
+ ('\u{b8b}', '\u{b8d}'),
+ ('\u{b91}', '\u{b91}'),
+ ('\u{b96}', '\u{b98}'),
+ ('\u{b9b}', '\u{b9b}'),
+ ('\u{b9d}', '\u{b9d}'),
+ ('\u{ba0}', '\u{ba2}'),
+ ('\u{ba5}', '\u{ba7}'),
+ ('\u{bab}', '\u{bad}'),
+ ('\u{bba}', '\u{bbd}'),
+ ('\u{bc3}', '\u{bc5}'),
+ ('\u{bc9}', '\u{bc9}'),
+ ('\u{bce}', '\u{bcf}'),
+ ('\u{bd1}', '\u{bd6}'),
+ ('\u{bd8}', '\u{be5}'),
+ ('\u{bfb}', '\u{bff}'),
+ ('\u{c0d}', '\u{c0d}'),
+ ('\u{c11}', '\u{c11}'),
+ ('\u{c29}', '\u{c29}'),
+ ('\u{c3a}', '\u{c3c}'),
+ ('\u{c45}', '\u{c45}'),
+ ('\u{c49}', '\u{c49}'),
+ ('\u{c4e}', '\u{c54}'),
+ ('\u{c57}', '\u{c57}'),
+ ('\u{c5b}', '\u{c5f}'),
+ ('\u{c64}', '\u{c65}'),
+ ('\u{c70}', '\u{c76}'),
+ ('\u{c8d}', '\u{c8d}'),
+ ('\u{c91}', '\u{c91}'),
+ ('\u{ca9}', '\u{ca9}'),
+ ('\u{cb4}', '\u{cb4}'),
+ ('\u{cba}', '\u{cbb}'),
+ ('\u{cc5}', '\u{cc5}'),
+ ('\u{cc9}', '\u{cc9}'),
+ ('\u{cce}', '\u{cd4}'),
+ ('\u{cd7}', '\u{cdd}'),
+ ('\u{cdf}', '\u{cdf}'),
+ ('\u{ce4}', '\u{ce5}'),
+ ('\u{cf0}', '\u{cf0}'),
+ ('\u{cf3}', '\u{cff}'),
+ ('\u{d0d}', '\u{d0d}'),
+ ('\u{d11}', '\u{d11}'),
+ ('\u{d45}', '\u{d45}'),
+ ('\u{d49}', '\u{d49}'),
+ ('\u{d50}', '\u{d53}'),
+ ('\u{d64}', '\u{d65}'),
+ ('\u{d80}', '\u{d80}'),
+ ('\u{d84}', '\u{d84}'),
+ ('\u{d97}', '\u{d99}'),
+ ('\u{db2}', '\u{db2}'),
+ ('\u{dbc}', '\u{dbc}'),
+ ('\u{dbe}', '\u{dbf}'),
+ ('\u{dc7}', '\u{dc9}'),
+ ('\u{dcb}', '\u{dce}'),
+ ('\u{dd5}', '\u{dd5}'),
+ ('\u{dd7}', '\u{dd7}'),
+ ('\u{de0}', '\u{de5}'),
+ ('\u{df0}', '\u{df1}'),
+ ('\u{df5}', '\u{e00}'),
+ ('\u{e3b}', '\u{e3e}'),
+ ('\u{e5c}', '\u{e80}'),
+ ('\u{e83}', '\u{e83}'),
+ ('\u{e85}', '\u{e85}'),
+ ('\u{e8b}', '\u{e8b}'),
+ ('\u{ea4}', '\u{ea4}'),
+ ('\u{ea6}', '\u{ea6}'),
+ ('\u{ebe}', '\u{ebf}'),
+ ('\u{ec5}', '\u{ec5}'),
+ ('\u{ec7}', '\u{ec7}'),
+ ('\u{ece}', '\u{ecf}'),
+ ('\u{eda}', '\u{edb}'),
+ ('\u{ee0}', '\u{eff}'),
+ ('\u{f48}', '\u{f48}'),
+ ('\u{f6d}', '\u{f70}'),
+ ('\u{f98}', '\u{f98}'),
+ ('\u{fbd}', '\u{fbd}'),
+ ('\u{fcd}', '\u{fcd}'),
+ ('\u{fdb}', '\u{fff}'),
+ ('\u{10c6}', '\u{10c6}'),
+ ('\u{10c8}', '\u{10cc}'),
+ ('\u{10ce}', '\u{10cf}'),
+ ('\u{1249}', '\u{1249}'),
+ ('\u{124e}', '\u{124f}'),
+ ('\u{1257}', '\u{1257}'),
+ ('\u{1259}', '\u{1259}'),
+ ('\u{125e}', '\u{125f}'),
+ ('\u{1289}', '\u{1289}'),
+ ('\u{128e}', '\u{128f}'),
+ ('\u{12b1}', '\u{12b1}'),
+ ('\u{12b6}', '\u{12b7}'),
+ ('\u{12bf}', '\u{12bf}'),
+ ('\u{12c1}', '\u{12c1}'),
+ ('\u{12c6}', '\u{12c7}'),
+ ('\u{12d7}', '\u{12d7}'),
+ ('\u{1311}', '\u{1311}'),
+ ('\u{1316}', '\u{1317}'),
+ ('\u{135b}', '\u{135c}'),
+ ('\u{137d}', '\u{137f}'),
+ ('\u{139a}', '\u{139f}'),
+ ('\u{13f6}', '\u{13f7}'),
+ ('\u{13fe}', '\u{13ff}'),
+ ('\u{169d}', '\u{169f}'),
+ ('\u{16f9}', '\u{16ff}'),
+ ('\u{170d}', '\u{170d}'),
+ ('\u{1715}', '\u{171f}'),
+ ('\u{1737}', '\u{173f}'),
+ ('\u{1754}', '\u{175f}'),
+ ('\u{176d}', '\u{176d}'),
+ ('\u{1771}', '\u{1771}'),
+ ('\u{1774}', '\u{177f}'),
+ ('\u{17de}', '\u{17df}'),
+ ('\u{17ea}', '\u{17ef}'),
+ ('\u{17fa}', '\u{17ff}'),
+ ('\u{180f}', '\u{180f}'),
+ ('\u{181a}', '\u{181f}'),
+ ('\u{1879}', '\u{187f}'),
+ ('\u{18ab}', '\u{18af}'),
+ ('\u{18f6}', '\u{18ff}'),
+ ('\u{191f}', '\u{191f}'),
+ ('\u{192c}', '\u{192f}'),
+ ('\u{193c}', '\u{193f}'),
+ ('\u{1941}', '\u{1943}'),
+ ('\u{196e}', '\u{196f}'),
+ ('\u{1975}', '\u{197f}'),
+ ('\u{19ac}', '\u{19af}'),
+ ('\u{19ca}', '\u{19cf}'),
+ ('\u{19db}', '\u{19dd}'),
+ ('\u{1a1c}', '\u{1a1d}'),
+ ('\u{1a5f}', '\u{1a5f}'),
+ ('\u{1a7d}', '\u{1a7e}'),
+ ('\u{1a8a}', '\u{1a8f}'),
+ ('\u{1a9a}', '\u{1a9f}'),
+ ('\u{1aae}', '\u{1aaf}'),
+ ('\u{1ac1}', '\u{1aff}'),
+ ('\u{1b4c}', '\u{1b4f}'),
+ ('\u{1b7d}', '\u{1b7f}'),
+ ('\u{1bf4}', '\u{1bfb}'),
+ ('\u{1c38}', '\u{1c3a}'),
+ ('\u{1c4a}', '\u{1c4c}'),
+ ('\u{1c89}', '\u{1c8f}'),
+ ('\u{1cbb}', '\u{1cbc}'),
+ ('\u{1cc8}', '\u{1ccf}'),
+ ('\u{1cfb}', '\u{1cff}'),
+ ('\u{1dfa}', '\u{1dfa}'),
+ ('\u{1f16}', '\u{1f17}'),
+ ('\u{1f1e}', '\u{1f1f}'),
+ ('\u{1f46}', '\u{1f47}'),
+ ('\u{1f4e}', '\u{1f4f}'),
+ ('\u{1f58}', '\u{1f58}'),
+ ('\u{1f5a}', '\u{1f5a}'),
+ ('\u{1f5c}', '\u{1f5c}'),
+ ('\u{1f5e}', '\u{1f5e}'),
+ ('\u{1f7e}', '\u{1f7f}'),
+ ('\u{1fb5}', '\u{1fb5}'),
+ ('\u{1fc5}', '\u{1fc5}'),
+ ('\u{1fd4}', '\u{1fd5}'),
+ ('\u{1fdc}', '\u{1fdc}'),
+ ('\u{1ff0}', '\u{1ff1}'),
+ ('\u{1ff5}', '\u{1ff5}'),
+ ('\u{1fff}', '\u{1fff}'),
+ ('\u{2065}', '\u{2065}'),
+ ('\u{2072}', '\u{2073}'),
+ ('\u{208f}', '\u{208f}'),
+ ('\u{209d}', '\u{209f}'),
+ ('\u{20c0}', '\u{20cf}'),
+ ('\u{20f1}', '\u{20ff}'),
+ ('\u{218c}', '\u{218f}'),
+ ('\u{2427}', '\u{243f}'),
+ ('\u{244b}', '\u{245f}'),
+ ('\u{2b74}', '\u{2b75}'),
+ ('\u{2b96}', '\u{2b96}'),
+ ('\u{2c2f}', '\u{2c2f}'),
+ ('\u{2c5f}', '\u{2c5f}'),
+ ('\u{2cf4}', '\u{2cf8}'),
+ ('\u{2d26}', '\u{2d26}'),
+ ('\u{2d28}', '\u{2d2c}'),
+ ('\u{2d2e}', '\u{2d2f}'),
+ ('\u{2d68}', '\u{2d6e}'),
+ ('\u{2d71}', '\u{2d7e}'),
+ ('\u{2d97}', '\u{2d9f}'),
+ ('\u{2da7}', '\u{2da7}'),
+ ('\u{2daf}', '\u{2daf}'),
+ ('\u{2db7}', '\u{2db7}'),
+ ('\u{2dbf}', '\u{2dbf}'),
+ ('\u{2dc7}', '\u{2dc7}'),
+ ('\u{2dcf}', '\u{2dcf}'),
+ ('\u{2dd7}', '\u{2dd7}'),
+ ('\u{2ddf}', '\u{2ddf}'),
+ ('\u{2e53}', '\u{2e7f}'),
+ ('\u{2e9a}', '\u{2e9a}'),
+ ('\u{2ef4}', '\u{2eff}'),
+ ('\u{2fd6}', '\u{2fef}'),
+ ('\u{2ffc}', '\u{2fff}'),
+ ('\u{3040}', '\u{3040}'),
+ ('\u{3097}', '\u{3098}'),
+ ('\u{3100}', '\u{3104}'),
+ ('\u{3130}', '\u{3130}'),
+ ('\u{318f}', '\u{318f}'),
+ ('\u{31e4}', '\u{31ef}'),
+ ('\u{321f}', '\u{321f}'),
+ ('\u{9ffd}', '\u{9fff}'),
+ ('\u{a48d}', '\u{a48f}'),
+ ('\u{a4c7}', '\u{a4cf}'),
+ ('\u{a62c}', '\u{a63f}'),
+ ('\u{a6f8}', '\u{a6ff}'),
+ ('\u{a7c0}', '\u{a7c1}'),
+ ('\u{a7cb}', '\u{a7f4}'),
+ ('\u{a82d}', '\u{a82f}'),
+ ('\u{a83a}', '\u{a83f}'),
+ ('\u{a878}', '\u{a87f}'),
+ ('\u{a8c6}', '\u{a8cd}'),
+ ('\u{a8da}', '\u{a8df}'),
+ ('\u{a954}', '\u{a95e}'),
+ ('\u{a97d}', '\u{a97f}'),
+ ('\u{a9ce}', '\u{a9ce}'),
+ ('\u{a9da}', '\u{a9dd}'),
+ ('\u{a9ff}', '\u{a9ff}'),
+ ('\u{aa37}', '\u{aa3f}'),
+ ('\u{aa4e}', '\u{aa4f}'),
+ ('\u{aa5a}', '\u{aa5b}'),
+ ('\u{aac3}', '\u{aada}'),
+ ('\u{aaf7}', '\u{ab00}'),
+ ('\u{ab07}', '\u{ab08}'),
+ ('\u{ab0f}', '\u{ab10}'),
+ ('\u{ab17}', '\u{ab1f}'),
+ ('\u{ab27}', '\u{ab27}'),
+ ('\u{ab2f}', '\u{ab2f}'),
+ ('\u{ab6c}', '\u{ab6f}'),
+ ('\u{abee}', '\u{abef}'),
+ ('\u{abfa}', '\u{abff}'),
+ ('\u{d7a4}', '\u{d7af}'),
+ ('\u{d7c7}', '\u{d7ca}'),
+ ('\u{d7fc}', '\u{d7ff}'),
+ ('\u{fa6e}', '\u{fa6f}'),
+ ('\u{fada}', '\u{faff}'),
+ ('\u{fb07}', '\u{fb12}'),
+ ('\u{fb18}', '\u{fb1c}'),
+ ('\u{fb37}', '\u{fb37}'),
+ ('\u{fb3d}', '\u{fb3d}'),
+ ('\u{fb3f}', '\u{fb3f}'),
+ ('\u{fb42}', '\u{fb42}'),
+ ('\u{fb45}', '\u{fb45}'),
+ ('\u{fbc2}', '\u{fbd2}'),
+ ('\u{fd40}', '\u{fd4f}'),
+ ('\u{fd90}', '\u{fd91}'),
+ ('\u{fdc8}', '\u{fdef}'),
+ ('\u{fdfe}', '\u{fdff}'),
+ ('\u{fe1a}', '\u{fe1f}'),
+ ('\u{fe53}', '\u{fe53}'),
+ ('\u{fe67}', '\u{fe67}'),
+ ('\u{fe6c}', '\u{fe6f}'),
+ ('\u{fe75}', '\u{fe75}'),
+ ('\u{fefd}', '\u{fefe}'),
+ ('\u{ff00}', '\u{ff00}'),
+ ('\u{ffbf}', '\u{ffc1}'),
+ ('\u{ffc8}', '\u{ffc9}'),
+ ('\u{ffd0}', '\u{ffd1}'),
+ ('\u{ffd8}', '\u{ffd9}'),
+ ('\u{ffdd}', '\u{ffdf}'),
+ ('\u{ffe7}', '\u{ffe7}'),
+ ('\u{ffef}', '\u{fff8}'),
+ ('\u{fffe}', '\u{ffff}'),
+ ('\u{1000c}', '\u{1000c}'),
+ ('\u{10027}', '\u{10027}'),
+ ('\u{1003b}', '\u{1003b}'),
+ ('\u{1003e}', '\u{1003e}'),
+ ('\u{1004e}', '\u{1004f}'),
+ ('\u{1005e}', '\u{1007f}'),
+ ('\u{100fb}', '\u{100ff}'),
+ ('\u{10103}', '\u{10106}'),
+ ('\u{10134}', '\u{10136}'),
+ ('\u{1018f}', '\u{1018f}'),
+ ('\u{1019d}', '\u{1019f}'),
+ ('\u{101a1}', '\u{101cf}'),
+ ('\u{101fe}', '\u{1027f}'),
+ ('\u{1029d}', '\u{1029f}'),
+ ('\u{102d1}', '\u{102df}'),
+ ('\u{102fc}', '\u{102ff}'),
+ ('\u{10324}', '\u{1032c}'),
+ ('\u{1034b}', '\u{1034f}'),
+ ('\u{1037b}', '\u{1037f}'),
+ ('\u{1039e}', '\u{1039e}'),
+ ('\u{103c4}', '\u{103c7}'),
+ ('\u{103d6}', '\u{103ff}'),
+ ('\u{1049e}', '\u{1049f}'),
+ ('\u{104aa}', '\u{104af}'),
+ ('\u{104d4}', '\u{104d7}'),
+ ('\u{104fc}', '\u{104ff}'),
+ ('\u{10528}', '\u{1052f}'),
+ ('\u{10564}', '\u{1056e}'),
+ ('\u{10570}', '\u{105ff}'),
+ ('\u{10737}', '\u{1073f}'),
+ ('\u{10756}', '\u{1075f}'),
+ ('\u{10768}', '\u{107ff}'),
+ ('\u{10806}', '\u{10807}'),
+ ('\u{10809}', '\u{10809}'),
+ ('\u{10836}', '\u{10836}'),
+ ('\u{10839}', '\u{1083b}'),
+ ('\u{1083d}', '\u{1083e}'),
+ ('\u{10856}', '\u{10856}'),
+ ('\u{1089f}', '\u{108a6}'),
+ ('\u{108b0}', '\u{108df}'),
+ ('\u{108f3}', '\u{108f3}'),
+ ('\u{108f6}', '\u{108fa}'),
+ ('\u{1091c}', '\u{1091e}'),
+ ('\u{1093a}', '\u{1093e}'),
+ ('\u{10940}', '\u{1097f}'),
+ ('\u{109b8}', '\u{109bb}'),
+ ('\u{109d0}', '\u{109d1}'),
+ ('\u{10a04}', '\u{10a04}'),
+ ('\u{10a07}', '\u{10a0b}'),
+ ('\u{10a14}', '\u{10a14}'),
+ ('\u{10a18}', '\u{10a18}'),
+ ('\u{10a36}', '\u{10a37}'),
+ ('\u{10a3b}', '\u{10a3e}'),
+ ('\u{10a49}', '\u{10a4f}'),
+ ('\u{10a59}', '\u{10a5f}'),
+ ('\u{10aa0}', '\u{10abf}'),
+ ('\u{10ae7}', '\u{10aea}'),
+ ('\u{10af7}', '\u{10aff}'),
+ ('\u{10b36}', '\u{10b38}'),
+ ('\u{10b56}', '\u{10b57}'),
+ ('\u{10b73}', '\u{10b77}'),
+ ('\u{10b92}', '\u{10b98}'),
+ ('\u{10b9d}', '\u{10ba8}'),
+ ('\u{10bb0}', '\u{10bff}'),
+ ('\u{10c49}', '\u{10c7f}'),
+ ('\u{10cb3}', '\u{10cbf}'),
+ ('\u{10cf3}', '\u{10cf9}'),
+ ('\u{10d28}', '\u{10d2f}'),
+ ('\u{10d3a}', '\u{10e5f}'),
+ ('\u{10e7f}', '\u{10e7f}'),
+ ('\u{10eaa}', '\u{10eaa}'),
+ ('\u{10eae}', '\u{10eaf}'),
+ ('\u{10eb2}', '\u{10eff}'),
+ ('\u{10f28}', '\u{10f2f}'),
+ ('\u{10f5a}', '\u{10faf}'),
+ ('\u{10fcc}', '\u{10fdf}'),
+ ('\u{10ff7}', '\u{10fff}'),
+ ('\u{1104e}', '\u{11051}'),
+ ('\u{11070}', '\u{1107e}'),
+ ('\u{110c2}', '\u{110cc}'),
+ ('\u{110ce}', '\u{110cf}'),
+ ('\u{110e9}', '\u{110ef}'),
+ ('\u{110fa}', '\u{110ff}'),
+ ('\u{11135}', '\u{11135}'),
+ ('\u{11148}', '\u{1114f}'),
+ ('\u{11177}', '\u{1117f}'),
+ ('\u{111e0}', '\u{111e0}'),
+ ('\u{111f5}', '\u{111ff}'),
+ ('\u{11212}', '\u{11212}'),
+ ('\u{1123f}', '\u{1127f}'),
+ ('\u{11287}', '\u{11287}'),
+ ('\u{11289}', '\u{11289}'),
+ ('\u{1128e}', '\u{1128e}'),
+ ('\u{1129e}', '\u{1129e}'),
+ ('\u{112aa}', '\u{112af}'),
+ ('\u{112eb}', '\u{112ef}'),
+ ('\u{112fa}', '\u{112ff}'),
+ ('\u{11304}', '\u{11304}'),
+ ('\u{1130d}', '\u{1130e}'),
+ ('\u{11311}', '\u{11312}'),
+ ('\u{11329}', '\u{11329}'),
+ ('\u{11331}', '\u{11331}'),
+ ('\u{11334}', '\u{11334}'),
+ ('\u{1133a}', '\u{1133a}'),
+ ('\u{11345}', '\u{11346}'),
+ ('\u{11349}', '\u{1134a}'),
+ ('\u{1134e}', '\u{1134f}'),
+ ('\u{11351}', '\u{11356}'),
+ ('\u{11358}', '\u{1135c}'),
+ ('\u{11364}', '\u{11365}'),
+ ('\u{1136d}', '\u{1136f}'),
+ ('\u{11375}', '\u{113ff}'),
+ ('\u{1145c}', '\u{1145c}'),
+ ('\u{11462}', '\u{1147f}'),
+ ('\u{114c8}', '\u{114cf}'),
+ ('\u{114da}', '\u{1157f}'),
+ ('\u{115b6}', '\u{115b7}'),
+ ('\u{115de}', '\u{115ff}'),
+ ('\u{11645}', '\u{1164f}'),
+ ('\u{1165a}', '\u{1165f}'),
+ ('\u{1166d}', '\u{1167f}'),
+ ('\u{116b9}', '\u{116bf}'),
+ ('\u{116ca}', '\u{116ff}'),
+ ('\u{1171b}', '\u{1171c}'),
+ ('\u{1172c}', '\u{1172f}'),
+ ('\u{11740}', '\u{117ff}'),
+ ('\u{1183c}', '\u{1189f}'),
+ ('\u{118f3}', '\u{118fe}'),
+ ('\u{11907}', '\u{11908}'),
+ ('\u{1190a}', '\u{1190b}'),
+ ('\u{11914}', '\u{11914}'),
+ ('\u{11917}', '\u{11917}'),
+ ('\u{11936}', '\u{11936}'),
+ ('\u{11939}', '\u{1193a}'),
+ ('\u{11947}', '\u{1194f}'),
+ ('\u{1195a}', '\u{1199f}'),
+ ('\u{119a8}', '\u{119a9}'),
+ ('\u{119d8}', '\u{119d9}'),
+ ('\u{119e5}', '\u{119ff}'),
+ ('\u{11a48}', '\u{11a4f}'),
+ ('\u{11aa3}', '\u{11abf}'),
+ ('\u{11af9}', '\u{11bff}'),
+ ('\u{11c09}', '\u{11c09}'),
+ ('\u{11c37}', '\u{11c37}'),
+ ('\u{11c46}', '\u{11c4f}'),
+ ('\u{11c6d}', '\u{11c6f}'),
+ ('\u{11c90}', '\u{11c91}'),
+ ('\u{11ca8}', '\u{11ca8}'),
+ ('\u{11cb7}', '\u{11cff}'),
+ ('\u{11d07}', '\u{11d07}'),
+ ('\u{11d0a}', '\u{11d0a}'),
+ ('\u{11d37}', '\u{11d39}'),
+ ('\u{11d3b}', '\u{11d3b}'),
+ ('\u{11d3e}', '\u{11d3e}'),
+ ('\u{11d48}', '\u{11d4f}'),
+ ('\u{11d5a}', '\u{11d5f}'),
+ ('\u{11d66}', '\u{11d66}'),
+ ('\u{11d69}', '\u{11d69}'),
+ ('\u{11d8f}', '\u{11d8f}'),
+ ('\u{11d92}', '\u{11d92}'),
+ ('\u{11d99}', '\u{11d9f}'),
+ ('\u{11daa}', '\u{11edf}'),
+ ('\u{11ef9}', '\u{11faf}'),
+ ('\u{11fb1}', '\u{11fbf}'),
+ ('\u{11ff2}', '\u{11ffe}'),
+ ('\u{1239a}', '\u{123ff}'),
+ ('\u{1246f}', '\u{1246f}'),
+ ('\u{12475}', '\u{1247f}'),
+ ('\u{12544}', '\u{12fff}'),
+ ('\u{1342f}', '\u{1342f}'),
+ ('\u{13439}', '\u{143ff}'),
+ ('\u{14647}', '\u{167ff}'),
+ ('\u{16a39}', '\u{16a3f}'),
+ ('\u{16a5f}', '\u{16a5f}'),
+ ('\u{16a6a}', '\u{16a6d}'),
+ ('\u{16a70}', '\u{16acf}'),
+ ('\u{16aee}', '\u{16aef}'),
+ ('\u{16af6}', '\u{16aff}'),
+ ('\u{16b46}', '\u{16b4f}'),
+ ('\u{16b5a}', '\u{16b5a}'),
+ ('\u{16b62}', '\u{16b62}'),
+ ('\u{16b78}', '\u{16b7c}'),
+ ('\u{16b90}', '\u{16e3f}'),
+ ('\u{16e9b}', '\u{16eff}'),
+ ('\u{16f4b}', '\u{16f4e}'),
+ ('\u{16f88}', '\u{16f8e}'),
+ ('\u{16fa0}', '\u{16fdf}'),
+ ('\u{16fe5}', '\u{16fef}'),
+ ('\u{16ff2}', '\u{16fff}'),
+ ('\u{187f8}', '\u{187ff}'),
+ ('\u{18cd6}', '\u{18cff}'),
+ ('\u{18d09}', '\u{1afff}'),
+ ('\u{1b11f}', '\u{1b14f}'),
+ ('\u{1b153}', '\u{1b163}'),
+ ('\u{1b168}', '\u{1b16f}'),
+ ('\u{1b2fc}', '\u{1bbff}'),
+ ('\u{1bc6b}', '\u{1bc6f}'),
+ ('\u{1bc7d}', '\u{1bc7f}'),
+ ('\u{1bc89}', '\u{1bc8f}'),
+ ('\u{1bc9a}', '\u{1bc9b}'),
+ ('\u{1bca4}', '\u{1cfff}'),
+ ('\u{1d0f6}', '\u{1d0ff}'),
+ ('\u{1d127}', '\u{1d128}'),
+ ('\u{1d1e9}', '\u{1d1ff}'),
+ ('\u{1d246}', '\u{1d2df}'),
+ ('\u{1d2f4}', '\u{1d2ff}'),
+ ('\u{1d357}', '\u{1d35f}'),
+ ('\u{1d379}', '\u{1d3ff}'),
+ ('\u{1d455}', '\u{1d455}'),
+ ('\u{1d49d}', '\u{1d49d}'),
+ ('\u{1d4a0}', '\u{1d4a1}'),
+ ('\u{1d4a3}', '\u{1d4a4}'),
+ ('\u{1d4a7}', '\u{1d4a8}'),
+ ('\u{1d4ad}', '\u{1d4ad}'),
+ ('\u{1d4ba}', '\u{1d4ba}'),
+ ('\u{1d4bc}', '\u{1d4bc}'),
+ ('\u{1d4c4}', '\u{1d4c4}'),
+ ('\u{1d506}', '\u{1d506}'),
+ ('\u{1d50b}', '\u{1d50c}'),
+ ('\u{1d515}', '\u{1d515}'),
+ ('\u{1d51d}', '\u{1d51d}'),
+ ('\u{1d53a}', '\u{1d53a}'),
+ ('\u{1d53f}', '\u{1d53f}'),
+ ('\u{1d545}', '\u{1d545}'),
+ ('\u{1d547}', '\u{1d549}'),
+ ('\u{1d551}', '\u{1d551}'),
+ ('\u{1d6a6}', '\u{1d6a7}'),
+ ('\u{1d7cc}', '\u{1d7cd}'),
+ ('\u{1da8c}', '\u{1da9a}'),
+ ('\u{1daa0}', '\u{1daa0}'),
+ ('\u{1dab0}', '\u{1dfff}'),
+ ('\u{1e007}', '\u{1e007}'),
+ ('\u{1e019}', '\u{1e01a}'),
+ ('\u{1e022}', '\u{1e022}'),
+ ('\u{1e025}', '\u{1e025}'),
+ ('\u{1e02b}', '\u{1e0ff}'),
+ ('\u{1e12d}', '\u{1e12f}'),
+ ('\u{1e13e}', '\u{1e13f}'),
+ ('\u{1e14a}', '\u{1e14d}'),
+ ('\u{1e150}', '\u{1e2bf}'),
+ ('\u{1e2fa}', '\u{1e2fe}'),
+ ('\u{1e300}', '\u{1e7ff}'),
+ ('\u{1e8c5}', '\u{1e8c6}'),
+ ('\u{1e8d7}', '\u{1e8ff}'),
+ ('\u{1e94c}', '\u{1e94f}'),
+ ('\u{1e95a}', '\u{1e95d}'),
+ ('\u{1e960}', '\u{1ec70}'),
+ ('\u{1ecb5}', '\u{1ed00}'),
+ ('\u{1ed3e}', '\u{1edff}'),
+ ('\u{1ee04}', '\u{1ee04}'),
+ ('\u{1ee20}', '\u{1ee20}'),
+ ('\u{1ee23}', '\u{1ee23}'),
+ ('\u{1ee25}', '\u{1ee26}'),
+ ('\u{1ee28}', '\u{1ee28}'),
+ ('\u{1ee33}', '\u{1ee33}'),
+ ('\u{1ee38}', '\u{1ee38}'),
+ ('\u{1ee3a}', '\u{1ee3a}'),
+ ('\u{1ee3c}', '\u{1ee41}'),
+ ('\u{1ee43}', '\u{1ee46}'),
+ ('\u{1ee48}', '\u{1ee48}'),
+ ('\u{1ee4a}', '\u{1ee4a}'),
+ ('\u{1ee4c}', '\u{1ee4c}'),
+ ('\u{1ee50}', '\u{1ee50}'),
+ ('\u{1ee53}', '\u{1ee53}'),
+ ('\u{1ee55}', '\u{1ee56}'),
+ ('\u{1ee58}', '\u{1ee58}'),
+ ('\u{1ee5a}', '\u{1ee5a}'),
+ ('\u{1ee5c}', '\u{1ee5c}'),
+ ('\u{1ee5e}', '\u{1ee5e}'),
+ ('\u{1ee60}', '\u{1ee60}'),
+ ('\u{1ee63}', '\u{1ee63}'),
+ ('\u{1ee65}', '\u{1ee66}'),
+ ('\u{1ee6b}', '\u{1ee6b}'),
+ ('\u{1ee73}', '\u{1ee73}'),
+ ('\u{1ee78}', '\u{1ee78}'),
+ ('\u{1ee7d}', '\u{1ee7d}'),
+ ('\u{1ee7f}', '\u{1ee7f}'),
+ ('\u{1ee8a}', '\u{1ee8a}'),
+ ('\u{1ee9c}', '\u{1eea0}'),
+ ('\u{1eea4}', '\u{1eea4}'),
+ ('\u{1eeaa}', '\u{1eeaa}'),
+ ('\u{1eebc}', '\u{1eeef}'),
+ ('\u{1eef2}', '\u{1efff}'),
+ ('\u{1f02c}', '\u{1f02f}'),
+ ('\u{1f094}', '\u{1f09f}'),
+ ('\u{1f0af}', '\u{1f0b0}'),
+ ('\u{1f0c0}', '\u{1f0c0}'),
+ ('\u{1f0d0}', '\u{1f0d0}'),
+ ('\u{1f0f6}', '\u{1f0ff}'),
+ ('\u{1f1ae}', '\u{1f1e5}'),
+ ('\u{1f203}', '\u{1f20f}'),
+ ('\u{1f23c}', '\u{1f23f}'),
+ ('\u{1f249}', '\u{1f24f}'),
+ ('\u{1f252}', '\u{1f25f}'),
+ ('\u{1f266}', '\u{1f2ff}'),
+ ('\u{1f6d8}', '\u{1f6df}'),
+ ('\u{1f6ed}', '\u{1f6ef}'),
+ ('\u{1f6fd}', '\u{1f6ff}'),
+ ('\u{1f774}', '\u{1f77f}'),
+ ('\u{1f7d9}', '\u{1f7df}'),
+ ('\u{1f7ec}', '\u{1f7ff}'),
+ ('\u{1f80c}', '\u{1f80f}'),
+ ('\u{1f848}', '\u{1f84f}'),
+ ('\u{1f85a}', '\u{1f85f}'),
+ ('\u{1f888}', '\u{1f88f}'),
+ ('\u{1f8ae}', '\u{1f8af}'),
+ ('\u{1f8b2}', '\u{1f8ff}'),
+ ('\u{1f979}', '\u{1f979}'),
+ ('\u{1f9cc}', '\u{1f9cc}'),
+ ('\u{1fa54}', '\u{1fa5f}'),
+ ('\u{1fa6e}', '\u{1fa6f}'),
+ ('\u{1fa75}', '\u{1fa77}'),
+ ('\u{1fa7b}', '\u{1fa7f}'),
+ ('\u{1fa87}', '\u{1fa8f}'),
+ ('\u{1faa9}', '\u{1faaf}'),
+ ('\u{1fab7}', '\u{1fabf}'),
+ ('\u{1fac3}', '\u{1facf}'),
+ ('\u{1fad7}', '\u{1faff}'),
+ ('\u{1fb93}', '\u{1fb93}'),
+ ('\u{1fbcb}', '\u{1fbef}'),
+ ('\u{1fbfa}', '\u{1ffff}'),
+ ('\u{2a6de}', '\u{2a6ff}'),
+ ('\u{2b735}', '\u{2b73f}'),
+ ('\u{2b81e}', '\u{2b81f}'),
+ ('\u{2cea2}', '\u{2ceaf}'),
+ ('\u{2ebe1}', '\u{2f7ff}'),
+ ('\u{2fa1e}', '\u{2ffff}'),
+ ('\u{3134b}', '\u{e0000}'),
+ ('\u{e0002}', '\u{e001f}'),
+ ('\u{e0080}', '\u{e00ff}'),
+ ('\u{e01f0}', '\u{effff}'),
+ ('\u{ffffe}', '\u{fffff}'),
+ ('\u{10fffe}', '\u{10ffff}'),
+];
+
+pub const UPPERCASE_LETTER: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('À', 'Ö'),
+ ('Ø', 'Þ'),
+ ('Ā', 'Ā'),
+ ('Ă', 'Ă'),
+ ('Ą', 'Ą'),
+ ('Ć', 'Ć'),
+ ('Ĉ', 'Ĉ'),
+ ('Ċ', 'Ċ'),
+ ('Č', 'Č'),
+ ('Ď', 'Ď'),
+ ('Đ', 'Đ'),
+ ('Ē', 'Ē'),
+ ('Ĕ', 'Ĕ'),
+ ('Ė', 'Ė'),
+ ('Ę', 'Ę'),
+ ('Ě', 'Ě'),
+ ('Ĝ', 'Ĝ'),
+ ('Ğ', 'Ğ'),
+ ('Ä ', 'Ä '),
+ ('Äĸ', 'Äĸ'),
+ ('Ĥ', 'Ĥ'),
+ ('ÄĻ', 'ÄĻ'),
+ ('Ĩ', 'Ĩ'),
+ ('ÄĒ', 'ÄĒ'),
+ ('ÄŦ', 'ÄŦ'),
+ ('ÄŽ', 'ÄŽ'),
+ ('Ä°', 'Ä°'),
+ ('IJ', 'IJ'),
+ ('Ä´', 'Ä´'),
+ ('Äļ', 'Äļ'),
+ ('Äš', 'Äš'),
+ ('Äģ', 'Äģ'),
+ ('ÄŊ', 'ÄŊ'),
+ ('Äŋ', 'Äŋ'),
+ ('Ł', 'Ł'),
+ ('Ń', 'Ń'),
+ ('Ņ', 'Ņ'),
+ ('Ň', 'Ň'),
+ ('Ŋ', 'Ŋ'),
+ ('Ō', 'Ō'),
+ ('Ŏ', 'Ŏ'),
+ ('Ő', 'Ő'),
+ ('Œ', 'Œ'),
+ ('Ŕ', 'Ŕ'),
+ ('Ŗ', 'Ŗ'),
+ ('Ř', 'Ř'),
+ ('Ś', 'Ś'),
+ ('Ŝ', 'Ŝ'),
+ ('Ş', 'Ş'),
+ ('Å ', 'Å '),
+ ('Åĸ', 'Åĸ'),
+ ('Ť', 'Ť'),
+ ('ÅĻ', 'ÅĻ'),
+ ('Ũ', 'Ũ'),
+ ('ÅĒ', 'ÅĒ'),
+ ('ÅŦ', 'ÅŦ'),
+ ('ÅŽ', 'ÅŽ'),
+ ('Å°', 'Å°'),
+ ('Å˛', 'Å˛'),
+ ('Å´', 'Å´'),
+ ('Åļ', 'Åļ'),
+ ('Ÿ', 'Ś'),
+ ('Åģ', 'Åģ'),
+ ('ÅŊ', 'ÅŊ'),
+ ('Ɓ', 'Ƃ'),
+ ('Ƅ', 'Ƅ'),
+ ('Ɔ', 'Ƈ'),
+ ('Ɖ', 'Ƌ'),
+ ('Ǝ', 'Ƒ'),
+ ('Ɠ', 'Ɣ'),
+ ('Ɩ', 'Ƙ'),
+ ('Ɯ', 'Ɲ'),
+ ('Ɵ', 'Ơ'),
+ ('Æĸ', 'Æĸ'),
+ ('Ƥ', 'Ƥ'),
+ ('ÆĻ', 'Ƨ'),
+ ('ÆŠ', 'ÆŠ'),
+ ('ÆŦ', 'ÆŦ'),
+ ('Ǝ', 'Ư'),
+ ('Æą', 'Æŗ'),
+ ('Æĩ', 'Æĩ'),
+ ('Æˇ', 'Ƹ'),
+ ('Æŧ', 'Æŧ'),
+ ('Į„', 'Į„'),
+ ('Į‡', 'Į‡'),
+ ('ĮŠ', 'ĮŠ'),
+ ('Į', 'Į'),
+ ('Į', 'Į'),
+ ('Į‘', 'Į‘'),
+ ('Į“', 'Į“'),
+ ('Į•', 'Į•'),
+ ('Į—', 'Į—'),
+ ('Į™', 'Į™'),
+ ('Į›', 'Į›'),
+ ('Įž', 'Įž'),
+ ('Į ', 'Į '),
+ ('Įĸ', 'Įĸ'),
+ ('Į¤', 'Į¤'),
+ ('ĮĻ', 'ĮĻ'),
+ ('Į¨', 'Į¨'),
+ ('ĮĒ', 'ĮĒ'),
+ ('ĮŦ', 'ĮŦ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Įą', 'Įą'),
+ ('Į´', 'Į´'),
+ ('Įļ', 'Į¸'),
+ ('Įē', 'Įē'),
+ ('Įŧ', 'Įŧ'),
+ ('Įž', 'Įž'),
+ ('Ȁ', 'Ȁ'),
+ ('Ȃ', 'Ȃ'),
+ ('Ȅ', 'Ȅ'),
+ ('Ȇ', 'Ȇ'),
+ ('Ȉ', 'Ȉ'),
+ ('Ȋ', 'Ȋ'),
+ ('Ȍ', 'Ȍ'),
+ ('Ȏ', 'Ȏ'),
+ ('Ȑ', 'Ȑ'),
+ ('Ȓ', 'Ȓ'),
+ ('Ȕ', 'Ȕ'),
+ ('Ȗ', 'Ȗ'),
+ ('Ș', 'Ș'),
+ ('Ț', 'Ț'),
+ ('Ȝ', 'Ȝ'),
+ ('Ȟ', 'Ȟ'),
+ ('Č ', 'Č '),
+ ('Čĸ', 'Čĸ'),
+ ('Ȥ', 'Ȥ'),
+ ('ČĻ', 'ČĻ'),
+ ('Ȩ', 'Ȩ'),
+ ('ČĒ', 'ČĒ'),
+ ('ČŦ', 'ČŦ'),
+ ('ČŽ', 'ČŽ'),
+ ('Č°', 'Č°'),
+ ('Ȳ', 'Ȳ'),
+ ('Čē', 'Čģ'),
+ ('ČŊ', 'Čž'),
+ ('Ɂ', 'Ɂ'),
+ ('Ƀ', 'Ɇ'),
+ ('Ɉ', 'Ɉ'),
+ ('Ɋ', 'Ɋ'),
+ ('Ɍ', 'Ɍ'),
+ ('Ɏ', 'Ɏ'),
+ ('Í°', 'Í°'),
+ ('Ͳ', 'Ͳ'),
+ ('Íļ', 'Íļ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ώ'),
+ ('Α', 'Ρ'),
+ ('ÎŖ', 'ÎĢ'),
+ ('Ī', 'Ī'),
+ ('Ī’', 'Ī”'),
+ ('Ī˜', 'Ī˜'),
+ ('Īš', 'Īš'),
+ ('Īœ', 'Īœ'),
+ ('Īž', 'Īž'),
+ ('Ī ', 'Ī '),
+ ('Īĸ', 'Īĸ'),
+ ('Ī¤', 'Ī¤'),
+ ('ĪĻ', 'ĪĻ'),
+ ('Ī¨', 'Ī¨'),
+ ('ĪĒ', 'ĪĒ'),
+ ('ĪŦ', 'ĪŦ'),
+ ('ĪŽ', 'ĪŽ'),
+ ('Ī´', 'Ī´'),
+ ('Īˇ', 'Īˇ'),
+ ('Īš', 'Īē'),
+ ('ĪŊ', 'Đ¯'),
+ ('Ņ ', 'Ņ '),
+ ('Ņĸ', 'Ņĸ'),
+ ('Ņ¤', 'Ņ¤'),
+ ('ŅĻ', 'ŅĻ'),
+ ('Ņ¨', 'Ņ¨'),
+ ('ŅĒ', 'ŅĒ'),
+ ('ŅŦ', 'ŅŦ'),
+ ('ŅŽ', 'ŅŽ'),
+ ('Ņ°', 'Ņ°'),
+ ('Ņ˛', 'Ņ˛'),
+ ('Ņ´', 'Ņ´'),
+ ('Ņļ', 'Ņļ'),
+ ('Ņ¸', 'Ņ¸'),
+ ('Ņē', 'Ņē'),
+ ('Ņŧ', 'Ņŧ'),
+ ('Ņž', 'Ņž'),
+ ('Ō€', 'Ō€'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌŒ', 'ŌŒ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō', 'Ō'),
+ ('Ō’', 'Ō’'),
+ ('Ō”', 'Ō”'),
+ ('Ō–', 'Ō–'),
+ ('Ō˜', 'Ō˜'),
+ ('Ōš', 'Ōš'),
+ ('Ōœ', 'Ōœ'),
+ ('Ōž', 'Ōž'),
+ ('Ō ', 'Ō '),
+ ('Ōĸ', 'Ōĸ'),
+ ('Ō¤', 'Ō¤'),
+ ('ŌĻ', 'ŌĻ'),
+ ('Ō¨', 'Ō¨'),
+ ('ŌĒ', 'ŌĒ'),
+ ('ŌŦ', 'ŌŦ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō°', 'Ō°'),
+ ('Ō˛', 'Ō˛'),
+ ('Ō´', 'Ō´'),
+ ('Ōļ', 'Ōļ'),
+ ('Ō¸', 'Ō¸'),
+ ('Ōē', 'Ōē'),
+ ('Ōŧ', 'Ōŧ'),
+ ('Ōž', 'Ōž'),
+ ('Ķ€', 'Ķ'),
+ ('Ķƒ', 'Ķƒ'),
+ ('Ķ…', 'Ķ…'),
+ ('Ķ‡', 'Ķ‡'),
+ ('Ķ‰', 'Ķ‰'),
+ ('Ķ‹', 'Ķ‹'),
+ ('Ķ', 'Ķ'),
+ ('Ķ', 'Ķ'),
+ ('Ķ’', 'Ķ’'),
+ ('Ķ”', 'Ķ”'),
+ ('Ķ–', 'Ķ–'),
+ ('Ķ˜', 'Ķ˜'),
+ ('Ķš', 'Ķš'),
+ ('Ķœ', 'Ķœ'),
+ ('Ķž', 'Ķž'),
+ ('Ķ ', 'Ķ '),
+ ('Ķĸ', 'Ķĸ'),
+ ('Ķ¤', 'Ķ¤'),
+ ('ĶĻ', 'ĶĻ'),
+ ('Ķ¨', 'Ķ¨'),
+ ('ĶĒ', 'ĶĒ'),
+ ('ĶŦ', 'ĶŦ'),
+ ('ĶŽ', 'ĶŽ'),
+ ('Ķ°', 'Ķ°'),
+ ('Ķ˛', 'Ķ˛'),
+ ('Ķ´', 'Ķ´'),
+ ('Ķļ', 'Ķļ'),
+ ('Ķ¸', 'Ķ¸'),
+ ('Ķē', 'Ķē'),
+ ('Ķŧ', 'Ķŧ'),
+ ('Ķž', 'Ķž'),
+ ('Ԁ', 'Ԁ'),
+ ('Ԃ', 'Ԃ'),
+ ('Ԅ', 'Ԅ'),
+ ('Ԇ', 'Ԇ'),
+ ('Ԉ', 'Ԉ'),
+ ('Ԋ', 'Ԋ'),
+ ('Ԍ', 'Ԍ'),
+ ('Ԏ', 'Ԏ'),
+ ('Ԑ', 'Ԑ'),
+ ('Ԓ', 'Ԓ'),
+ ('Ԕ', 'Ԕ'),
+ ('Ԗ', 'Ԗ'),
+ ('Ԙ', 'Ԙ'),
+ ('Ԛ', 'Ԛ'),
+ ('Ԝ', 'Ԝ'),
+ ('Ԟ', 'Ԟ'),
+ ('Ô ', 'Ô '),
+ ('Ôĸ', 'Ôĸ'),
+ ('Ô¤', 'Ô¤'),
+ ('ÔĻ', 'ÔĻ'),
+ ('Ô¨', 'Ô¨'),
+ ('ÔĒ', 'ÔĒ'),
+ ('ÔŦ', 'ÔŦ'),
+ ('ÔŽ', 'ÔŽ'),
+ ('Ôą', 'Ֆ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('Ꭰ', 'áĩ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('Ḁ', 'Ḁ'),
+ ('Ḃ', 'Ḃ'),
+ ('Ḅ', 'Ḅ'),
+ ('Ḇ', 'Ḇ'),
+ ('Ḉ', 'Ḉ'),
+ ('Ḋ', 'Ḋ'),
+ ('Ḍ', 'Ḍ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḑ', 'Ḑ'),
+ ('Ḓ', 'Ḓ'),
+ ('Ḕ', 'Ḕ'),
+ ('Ḗ', 'Ḗ'),
+ ('Ḙ', 'Ḙ'),
+ ('Ḛ', 'Ḛ'),
+ ('Ḝ', 'Ḝ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ḡ', 'Ḡ'),
+ ('á¸ĸ', 'á¸ĸ'),
+ ('Ḥ', 'Ḥ'),
+ ('á¸Ļ', 'á¸Ļ'),
+ ('Ḩ', 'Ḩ'),
+ ('á¸Ē', 'á¸Ē'),
+ ('á¸Ŧ', 'á¸Ŧ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḱ', 'Ḱ'),
+ ('Ḳ', 'Ḳ'),
+ ('Ḵ', 'Ḵ'),
+ ('á¸ļ', 'á¸ļ'),
+ ('Ḹ', 'Ḹ'),
+ ('á¸ē', 'á¸ē'),
+ ('á¸ŧ', 'á¸ŧ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ṁ', 'Ṁ'),
+ ('Ṃ', 'Ṃ'),
+ ('Ṅ', 'Ṅ'),
+ ('Ṇ', 'Ṇ'),
+ ('ᚈ', 'ᚈ'),
+ ('Ṋ', 'Ṋ'),
+ ('Ṍ', 'Ṍ'),
+ ('ᚎ', 'ᚎ'),
+ ('ᚐ', 'ᚐ'),
+ ('Ṓ', 'Ṓ'),
+ ('Ṕ', 'Ṕ'),
+ ('Ṗ', 'Ṗ'),
+ ('ᚘ', 'ᚘ'),
+ ('Ṛ', 'Ṛ'),
+ ('Ṝ', 'Ṝ'),
+ ('᚞', '᚞'),
+ ('áš ', 'áš '),
+ ('ášĸ', 'ášĸ'),
+ ('ᚤ', 'ᚤ'),
+ ('ášĻ', 'ášĻ'),
+ ('ᚨ', 'ᚨ'),
+ ('ášĒ', 'ášĒ'),
+ ('ášŦ', 'ášŦ'),
+ ('ᚎ', 'ᚎ'),
+ ('áš°', 'áš°'),
+ ('ᚲ', 'ᚲ'),
+ ('áš´', 'áš´'),
+ ('ášļ', 'ášļ'),
+ ('ᚸ', 'ᚸ'),
+ ('ášē', 'ášē'),
+ ('ášŧ', 'ášŧ'),
+ ('ášž', 'ášž'),
+ ('áē€', 'áē€'),
+ ('áē‚', 'áē‚'),
+ ('áē„', 'áē„'),
+ ('áē†', 'áē†'),
+ ('áēˆ', 'áēˆ'),
+ ('áēŠ', 'áēŠ'),
+ ('áēŒ', 'áēŒ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē', 'áē'),
+ ('áē’', 'áē’'),
+ ('áē”', 'áē”'),
+ ('áēž', 'áēž'),
+ ('áē ', 'áē '),
+ ('áēĸ', 'áēĸ'),
+ ('áē¤', 'áē¤'),
+ ('áēĻ', 'áēĻ'),
+ ('áē¨', 'áē¨'),
+ ('áēĒ', 'áēĒ'),
+ ('áēŦ', 'áēŦ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē°', 'áē°'),
+ ('áē˛', 'áē˛'),
+ ('áē´', 'áē´'),
+ ('áēļ', 'áēļ'),
+ ('áē¸', 'áē¸'),
+ ('áēē', 'áēē'),
+ ('áēŧ', 'áēŧ'),
+ ('áēž', 'áēž'),
+ ('áģ€', 'áģ€'),
+ ('áģ‚', 'áģ‚'),
+ ('áģ„', 'áģ„'),
+ ('áģ†', 'áģ†'),
+ ('áģˆ', 'áģˆ'),
+ ('áģŠ', 'áģŠ'),
+ ('áģŒ', 'áģŒ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ', 'áģ'),
+ ('áģ’', 'áģ’'),
+ ('áģ”', 'áģ”'),
+ ('áģ–', 'áģ–'),
+ ('áģ˜', 'áģ˜'),
+ ('áģš', 'áģš'),
+ ('áģœ', 'áģœ'),
+ ('áģž', 'áģž'),
+ ('áģ ', 'áģ '),
+ ('áģĸ', 'áģĸ'),
+ ('áģ¤', 'áģ¤'),
+ ('áģĻ', 'áģĻ'),
+ ('áģ¨', 'áģ¨'),
+ ('áģĒ', 'áģĒ'),
+ ('áģŦ', 'áģŦ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ°', 'áģ°'),
+ ('áģ˛', 'áģ˛'),
+ ('áģ´', 'áģ´'),
+ ('áģļ', 'áģļ'),
+ ('áģ¸', 'áģ¸'),
+ ('áģē', 'áģē'),
+ ('áģŧ', 'áģŧ'),
+ ('áģž', 'áģž'),
+ ('áŧˆ', 'áŧ'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ¨', 'áŧ¯'),
+ ('áŧ¸', 'áŧŋ'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŸ'),
+ ('áŊ¨', 'áŊ¯'),
+ ('ី', 'ážģ'),
+ ('áŋˆ', 'áŋ‹'),
+ ('áŋ˜', 'áŋ›'),
+ ('áŋ¨', 'áŋŦ'),
+ ('áŋ¸', 'áŋģ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℋ', 'ℍ'),
+ ('ℐ', 'ℒ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℭ'),
+ ('ℰ', 'â„ŗ'),
+ ('ℾ', 'â„ŋ'),
+ ('ⅅ', 'ⅅ'),
+ ('Ↄ', 'Ↄ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('âą ', 'âą '),
+ ('âąĸ', '⹤'),
+ ('⹧', '⹧'),
+ ('⹊', '⹊'),
+ ('âąĢ', 'âąĢ'),
+ ('âą­', 'âą°'),
+ ('⹲', '⹲'),
+ ('âąĩ', 'âąĩ'),
+ ('Ȿ', 'Ⲁ'),
+ ('Ⲃ', 'Ⲃ'),
+ ('Ⲅ', 'Ⲅ'),
+ ('Ⲇ', 'Ⲇ'),
+ ('Ⲉ', 'Ⲉ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('Ⲍ', 'Ⲍ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲑ', 'Ⲑ'),
+ ('Ⲓ', 'Ⲓ'),
+ ('Ⲕ', 'Ⲕ'),
+ ('Ⲗ', 'Ⲗ'),
+ ('Ⲙ', 'Ⲙ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('Ⲝ', 'Ⲝ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('Ⲡ', 'Ⲡ'),
+ ('â˛ĸ', 'â˛ĸ'),
+ ('Ⲥ', 'Ⲥ'),
+ ('â˛Ļ', 'â˛Ļ'),
+ ('Ⲩ', 'Ⲩ'),
+ ('â˛Ē', 'â˛Ē'),
+ ('â˛Ŧ', 'â˛Ŧ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲱ', 'Ⲱ'),
+ ('Ⲳ', 'Ⲳ'),
+ ('Ⲵ', 'Ⲵ'),
+ ('â˛ļ', 'â˛ļ'),
+ ('Ⲹ', 'Ⲹ'),
+ ('â˛ē', 'â˛ē'),
+ ('â˛ŧ', 'â˛ŧ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('âŗ€', 'âŗ€'),
+ ('âŗ‚', 'âŗ‚'),
+ ('âŗ„', 'âŗ„'),
+ ('âŗ†', 'âŗ†'),
+ ('âŗˆ', 'âŗˆ'),
+ ('âŗŠ', 'âŗŠ'),
+ ('âŗŒ', 'âŗŒ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ’', 'âŗ’'),
+ ('âŗ”', 'âŗ”'),
+ ('âŗ–', 'âŗ–'),
+ ('âŗ˜', 'âŗ˜'),
+ ('âŗš', 'âŗš'),
+ ('âŗœ', 'âŗœ'),
+ ('âŗž', 'âŗž'),
+ ('âŗ ', 'âŗ '),
+ ('âŗĸ', 'âŗĸ'),
+ ('âŗĢ', 'âŗĢ'),
+ ('âŗ­', 'âŗ­'),
+ ('âŗ˛', 'âŗ˛'),
+ ('Ꙁ', 'Ꙁ'),
+ ('Ꙃ', 'Ꙃ'),
+ ('Ꙅ', 'Ꙅ'),
+ ('Ꙇ', 'Ꙇ'),
+ ('Ꙉ', 'Ꙉ'),
+ ('Ꙋ', 'Ꙋ'),
+ ('Ꙍ', 'Ꙍ'),
+ ('Ꙏ', 'Ꙏ'),
+ ('Ꙑ', 'Ꙑ'),
+ ('Ꙓ', 'Ꙓ'),
+ ('Ꙕ', 'Ꙕ'),
+ ('Ꙗ', 'Ꙗ'),
+ ('Ꙙ', 'Ꙙ'),
+ ('Ꙛ', 'Ꙛ'),
+ ('Ꙝ', 'Ꙝ'),
+ ('Ꙟ', 'Ꙟ'),
+ ('Ꙡ', 'Ꙡ'),
+ ('ę™ĸ', 'ę™ĸ'),
+ ('Ꙥ', 'Ꙥ'),
+ ('ę™Ļ', 'ę™Ļ'),
+ ('Ꙩ', 'Ꙩ'),
+ ('ę™Ē', 'ę™Ē'),
+ ('ę™Ŧ', 'ę™Ŧ'),
+ ('Ꚁ', 'Ꚁ'),
+ ('Ꚃ', 'Ꚃ'),
+ ('Ꚅ', 'Ꚅ'),
+ ('Ꚇ', 'Ꚇ'),
+ ('Ꚉ', 'Ꚉ'),
+ ('Ꚋ', 'Ꚋ'),
+ ('Ꚍ', 'Ꚍ'),
+ ('Ꚏ', 'Ꚏ'),
+ ('Ꚑ', 'Ꚑ'),
+ ('Ꚓ', 'Ꚓ'),
+ ('Ꚕ', 'Ꚕ'),
+ ('Ꚗ', 'Ꚗ'),
+ ('Ꚙ', 'Ꚙ'),
+ ('Ꚛ', 'Ꚛ'),
+ ('ęœĸ', 'ęœĸ'),
+ ('Ꜥ', 'Ꜥ'),
+ ('ęœĻ', 'ęœĻ'),
+ ('Ꜩ', 'Ꜩ'),
+ ('ęœĒ', 'ęœĒ'),
+ ('ęœŦ', 'ęœŦ'),
+ ('Ꜯ', 'Ꜯ'),
+ ('Ꜳ', 'Ꜳ'),
+ ('Ꜵ', 'Ꜵ'),
+ ('ęœļ', 'ęœļ'),
+ ('Ꜹ', 'Ꜹ'),
+ ('ęœē', 'ęœē'),
+ ('ęœŧ', 'ęœŧ'),
+ ('Ꜿ', 'Ꜿ'),
+ ('Ꝁ', 'Ꝁ'),
+ ('Ꝃ', 'Ꝃ'),
+ ('Ꝅ', 'Ꝅ'),
+ ('Ꝇ', 'Ꝇ'),
+ ('Ꝉ', 'Ꝉ'),
+ ('Ꝋ', 'Ꝋ'),
+ ('Ꝍ', 'Ꝍ'),
+ ('Ꝏ', 'Ꝏ'),
+ ('Ꝑ', 'Ꝑ'),
+ ('Ꝓ', 'Ꝓ'),
+ ('Ꝕ', 'Ꝕ'),
+ ('Ꝗ', 'Ꝗ'),
+ ('Ꝙ', 'Ꝙ'),
+ ('Ꝛ', 'Ꝛ'),
+ ('Ꝝ', 'Ꝝ'),
+ ('Ꝟ', 'Ꝟ'),
+ ('Ꝡ', 'Ꝡ'),
+ ('ęĸ', 'ęĸ'),
+ ('Ꝥ', 'Ꝥ'),
+ ('ęĻ', 'ęĻ'),
+ ('Ꝩ', 'Ꝩ'),
+ ('ęĒ', 'ęĒ'),
+ ('ęŦ', 'ęŦ'),
+ ('Ꝯ', 'Ꝯ'),
+ ('Ꝺ', 'Ꝺ'),
+ ('ęģ', 'ęģ'),
+ ('ęŊ', 'Ꝿ'),
+ ('Ꞁ', 'Ꞁ'),
+ ('Ꞃ', 'Ꞃ'),
+ ('Ꞅ', 'Ꞅ'),
+ ('Ꞇ', 'Ꞇ'),
+ ('Ꞌ', 'Ꞌ'),
+ ('Ɥ', 'Ɥ'),
+ ('Ꞑ', 'Ꞑ'),
+ ('Ꞓ', 'Ꞓ'),
+ ('Ꞗ', 'Ꞗ'),
+ ('Ꞙ', 'Ꞙ'),
+ ('Ꞛ', 'Ꞛ'),
+ ('Ꞝ', 'Ꞝ'),
+ ('Ꞟ', 'Ꞟ'),
+ ('Ꞡ', 'Ꞡ'),
+ ('ęžĸ', 'ęžĸ'),
+ ('Ꞥ', 'Ꞥ'),
+ ('ęžĻ', 'ęžĻ'),
+ ('Ꞩ', 'Ꞩ'),
+ ('ęžĒ', 'Ɪ'),
+ ('Ʞ', 'Ꞵ'),
+ ('ęžļ', 'ęžļ'),
+ ('Ꞹ', 'Ꞹ'),
+ ('ęžē', 'ęžē'),
+ ('ęžŧ', 'ęžŧ'),
+ ('Ꞿ', 'Ꞿ'),
+ ('Ꟃ', 'Ꟃ'),
+ ('Ꞔ', '\u{a7c7}'),
+ ('\u{a7c9}', '\u{a7c9}'),
+ ('\u{a7f5}', '\u{a7f5}'),
+ ('īŧĄ', 'īŧē'),
+ ('𐐀', '𐐧'),
+ ('𐒰', '𐓓'),
+ ('𐲀', '𐲲'),
+ ('đ‘ĸ ', 'đ‘ĸŋ'),
+ ('𖹀', '𖹟'),
+ ('𝐀', '𝐙'),
+ ('𝐴', '𝑍'),
+ ('𝑨', '𝒁'),
+ ('𝒜', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', 'đ’ĩ'),
+ ('𝓐', '𝓩'),
+ ('𝔄', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔸', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('đ•Ŧ', '𝖅'),
+ ('𝖠', '𝖹'),
+ ('𝗔', '𝗭'),
+ ('𝘈', '𝘡'),
+ ('đ˜ŧ', '𝙕'),
+ ('𝙰', '𝚉'),
+ ('𝚨', '𝛀'),
+ ('đ›ĸ', 'đ›ē'),
+ ('𝜜', '𝜴'),
+ ('𝝖', '𝝮'),
+ ('𝞐', '𝞨'),
+ ('𝟊', '𝟊'),
+ ('𞤀', '𞤡'),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs b/vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs
new file mode 100644
index 000000000..7df9d2b93
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs
@@ -0,0 +1,1389 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate grapheme-cluster-break ucd-13.0.0 --chars
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+ ("CR", CR),
+ ("Control", CONTROL),
+ ("Extend", EXTEND),
+ ("L", L),
+ ("LF", LF),
+ ("LV", LV),
+ ("LVT", LVT),
+ ("Prepend", PREPEND),
+ ("Regional_Indicator", REGIONAL_INDICATOR),
+ ("SpacingMark", SPACINGMARK),
+ ("T", T),
+ ("V", V),
+ ("ZWJ", ZWJ),
+];
+
+pub const CR: &'static [(char, char)] = &[('\r', '\r')];
+
+pub const CONTROL: &'static [(char, char)] = &[
+ ('\u{0}', '\t'),
+ ('\u{b}', '\u{c}'),
+ ('\u{e}', '\u{1f}'),
+ ('\u{7f}', '\u{9f}'),
+ ('\u{ad}', '\u{ad}'),
+ ('\u{61c}', '\u{61c}'),
+ ('\u{180e}', '\u{180e}'),
+ ('\u{200b}', '\u{200b}'),
+ ('\u{200e}', '\u{200f}'),
+ ('\u{2028}', '\u{202e}'),
+ ('\u{2060}', '\u{206f}'),
+ ('\u{feff}', '\u{feff}'),
+ ('\u{fff0}', '\u{fffb}'),
+ ('\u{13430}', '\u{13438}'),
+ ('\u{1bca0}', '\u{1bca3}'),
+ ('\u{1d173}', '\u{1d17a}'),
+ ('\u{e0000}', '\u{e001f}'),
+ ('\u{e0080}', '\u{e00ff}'),
+ ('\u{e01f0}', '\u{e0fff}'),
+];
+
+pub const EXTEND: &'static [(char, char)] = &[
+ ('\u{300}', '\u{36f}'),
+ ('\u{483}', '\u{489}'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('\u{610}', '\u{61a}'),
+ ('\u{64b}', '\u{65f}'),
+ ('\u{670}', '\u{670}'),
+ ('\u{6d6}', '\u{6dc}'),
+ ('\u{6df}', '\u{6e4}'),
+ ('\u{6e7}', '\u{6e8}'),
+ ('\u{6ea}', '\u{6ed}'),
+ ('\u{711}', '\u{711}'),
+ ('\u{730}', '\u{74a}'),
+ ('\u{7a6}', '\u{7b0}'),
+ ('\u{7eb}', '\u{7f3}'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('\u{816}', '\u{819}'),
+ ('\u{81b}', '\u{823}'),
+ ('\u{825}', '\u{827}'),
+ ('\u{829}', '\u{82d}'),
+ ('\u{859}', '\u{85b}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', '\u{902}'),
+ ('\u{93a}', '\u{93a}'),
+ ('\u{93c}', '\u{93c}'),
+ ('\u{941}', '\u{948}'),
+ ('\u{94d}', '\u{94d}'),
+ ('\u{951}', '\u{957}'),
+ ('\u{962}', '\u{963}'),
+ ('\u{981}', '\u{981}'),
+ ('\u{9bc}', '\u{9bc}'),
+ ('\u{9be}', '\u{9be}'),
+ ('\u{9c1}', '\u{9c4}'),
+ ('\u{9cd}', '\u{9cd}'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('\u{9e2}', '\u{9e3}'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', '\u{a02}'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('\u{a41}', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('\u{a70}', '\u{a71}'),
+ ('\u{a75}', '\u{a75}'),
+ ('\u{a81}', '\u{a82}'),
+ ('\u{abc}', '\u{abc}'),
+ ('\u{ac1}', '\u{ac5}'),
+ ('\u{ac7}', '\u{ac8}'),
+ ('\u{acd}', '\u{acd}'),
+ ('\u{ae2}', '\u{ae3}'),
+ ('\u{afa}', '\u{aff}'),
+ ('\u{b01}', '\u{b01}'),
+ ('\u{b3c}', '\u{b3c}'),
+ ('\u{b3e}', '\u{b3f}'),
+ ('\u{b41}', '\u{b44}'),
+ ('\u{b4d}', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('\u{b62}', '\u{b63}'),
+ ('\u{b82}', '\u{b82}'),
+ ('\u{bbe}', '\u{bbe}'),
+ ('\u{bc0}', '\u{bc0}'),
+ ('\u{bcd}', '\u{bcd}'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('\u{c00}', '\u{c00}'),
+ ('\u{c04}', '\u{c04}'),
+ ('\u{c3e}', '\u{c40}'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('\u{c62}', '\u{c63}'),
+ ('\u{c81}', '\u{c81}'),
+ ('\u{cbc}', '\u{cbc}'),
+ ('\u{cbf}', '\u{cbf}'),
+ ('\u{cc2}', '\u{cc2}'),
+ ('\u{cc6}', '\u{cc6}'),
+ ('\u{ccc}', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('\u{ce2}', '\u{ce3}'),
+ ('\u{d00}', '\u{d01}'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('\u{d3e}', '\u{d3e}'),
+ ('\u{d41}', '\u{d44}'),
+ ('\u{d4d}', '\u{d4d}'),
+ ('\u{d57}', '\u{d57}'),
+ ('\u{d62}', '\u{d63}'),
+ ('\u{d81}', '\u{d81}'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dcf}'),
+ ('\u{dd2}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('\u{ddf}', '\u{ddf}'),
+ ('\u{e31}', '\u{e31}'),
+ ('\u{e34}', '\u{e3a}'),
+ ('\u{e47}', '\u{e4e}'),
+ ('\u{eb1}', '\u{eb1}'),
+ ('\u{eb4}', '\u{ebc}'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('\u{f18}', '\u{f19}'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('\u{f71}', '\u{f7e}'),
+ ('\u{f80}', '\u{f84}'),
+ ('\u{f86}', '\u{f87}'),
+ ('\u{f8d}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('\u{102d}', '\u{1030}'),
+ ('\u{1032}', '\u{1037}'),
+ ('\u{1039}', '\u{103a}'),
+ ('\u{103d}', '\u{103e}'),
+ ('\u{1058}', '\u{1059}'),
+ ('\u{105e}', '\u{1060}'),
+ ('\u{1071}', '\u{1074}'),
+ ('\u{1082}', '\u{1082}'),
+ ('\u{1085}', '\u{1086}'),
+ ('\u{108d}', '\u{108d}'),
+ ('\u{109d}', '\u{109d}'),
+ ('\u{135d}', '\u{135f}'),
+ ('\u{1712}', '\u{1714}'),
+ ('\u{1732}', '\u{1734}'),
+ ('\u{1752}', '\u{1753}'),
+ ('\u{1772}', '\u{1773}'),
+ ('\u{17b4}', '\u{17b5}'),
+ ('\u{17b7}', '\u{17bd}'),
+ ('\u{17c6}', '\u{17c6}'),
+ ('\u{17c9}', '\u{17d3}'),
+ ('\u{17dd}', '\u{17dd}'),
+ ('\u{180b}', '\u{180d}'),
+ ('\u{1885}', '\u{1886}'),
+ ('\u{18a9}', '\u{18a9}'),
+ ('\u{1920}', '\u{1922}'),
+ ('\u{1927}', '\u{1928}'),
+ ('\u{1932}', '\u{1932}'),
+ ('\u{1939}', '\u{193b}'),
+ ('\u{1a17}', '\u{1a18}'),
+ ('\u{1a1b}', '\u{1a1b}'),
+ ('\u{1a56}', '\u{1a56}'),
+ ('\u{1a58}', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a60}'),
+ ('\u{1a62}', '\u{1a62}'),
+ ('\u{1a65}', '\u{1a6c}'),
+ ('\u{1a73}', '\u{1a7c}'),
+ ('\u{1a7f}', '\u{1a7f}'),
+ ('\u{1ab0}', '\u{1ac0}'),
+ ('\u{1b00}', '\u{1b03}'),
+ ('\u{1b34}', '\u{1b3a}'),
+ ('\u{1b3c}', '\u{1b3c}'),
+ ('\u{1b42}', '\u{1b42}'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', '\u{1b81}'),
+ ('\u{1ba2}', '\u{1ba5}'),
+ ('\u{1ba8}', '\u{1ba9}'),
+ ('\u{1bab}', '\u{1bad}'),
+ ('\u{1be6}', '\u{1be6}'),
+ ('\u{1be8}', '\u{1be9}'),
+ ('\u{1bed}', '\u{1bed}'),
+ ('\u{1bef}', '\u{1bf1}'),
+ ('\u{1c2c}', '\u{1c33}'),
+ ('\u{1c36}', '\u{1c37}'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', '\u{1ce0}'),
+ ('\u{1ce2}', '\u{1ce8}'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('\u{1cf8}', '\u{1cf9}'),
+ ('\u{1dc0}', '\u{1df9}'),
+ ('\u{1dfb}', '\u{1dff}'),
+ ('\u{200c}', '\u{200c}'),
+ ('\u{20d0}', '\u{20f0}'),
+ ('\u{2cef}', '\u{2cf1}'),
+ ('\u{2d7f}', '\u{2d7f}'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('\u{302a}', '\u{302f}'),
+ ('\u{3099}', '\u{309a}'),
+ ('\u{a66f}', '\u{a672}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('\u{a69e}', '\u{a69f}'),
+ ('\u{a6f0}', '\u{a6f1}'),
+ ('\u{a802}', '\u{a802}'),
+ ('\u{a806}', '\u{a806}'),
+ ('\u{a80b}', '\u{a80b}'),
+ ('\u{a825}', '\u{a826}'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('\u{a8c4}', '\u{a8c5}'),
+ ('\u{a8e0}', '\u{a8f1}'),
+ ('\u{a8ff}', '\u{a8ff}'),
+ ('\u{a926}', '\u{a92d}'),
+ ('\u{a947}', '\u{a951}'),
+ ('\u{a980}', '\u{a982}'),
+ ('\u{a9b3}', '\u{a9b3}'),
+ ('\u{a9b6}', '\u{a9b9}'),
+ ('\u{a9bc}', '\u{a9bd}'),
+ ('\u{a9e5}', '\u{a9e5}'),
+ ('\u{aa29}', '\u{aa2e}'),
+ ('\u{aa31}', '\u{aa32}'),
+ ('\u{aa35}', '\u{aa36}'),
+ ('\u{aa43}', '\u{aa43}'),
+ ('\u{aa4c}', '\u{aa4c}'),
+ ('\u{aa7c}', '\u{aa7c}'),
+ ('\u{aab0}', '\u{aab0}'),
+ ('\u{aab2}', '\u{aab4}'),
+ ('\u{aab7}', '\u{aab8}'),
+ ('\u{aabe}', '\u{aabf}'),
+ ('\u{aac1}', '\u{aac1}'),
+ ('\u{aaec}', '\u{aaed}'),
+ ('\u{aaf6}', '\u{aaf6}'),
+ ('\u{abe5}', '\u{abe5}'),
+ ('\u{abe8}', '\u{abe8}'),
+ ('\u{abed}', '\u{abed}'),
+ ('\u{fb1e}', '\u{fb1e}'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('\u{10376}', '\u{1037a}'),
+ ('\u{10a01}', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '\u{10a0f}'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('\u{10ae5}', '\u{10ae6}'),
+ ('\u{10d24}', '\u{10d27}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10f46}', '\u{10f50}'),
+ ('\u{11001}', '\u{11001}'),
+ ('\u{11038}', '\u{11046}'),
+ ('\u{1107f}', '\u{11081}'),
+ ('\u{110b3}', '\u{110b6}'),
+ ('\u{110b9}', '\u{110ba}'),
+ ('\u{11100}', '\u{11102}'),
+ ('\u{11127}', '\u{1112b}'),
+ ('\u{1112d}', '\u{11134}'),
+ ('\u{11173}', '\u{11173}'),
+ ('\u{11180}', '\u{11181}'),
+ ('\u{111b6}', '\u{111be}'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111cf}', '\u{111cf}'),
+ ('\u{1122f}', '\u{11231}'),
+ ('\u{11234}', '\u{11234}'),
+ ('\u{11236}', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('\u{112df}', '\u{112df}'),
+ ('\u{112e3}', '\u{112ea}'),
+ ('\u{11300}', '\u{11301}'),
+ ('\u{1133b}', '\u{1133c}'),
+ ('\u{1133e}', '\u{1133e}'),
+ ('\u{11340}', '\u{11340}'),
+ ('\u{11357}', '\u{11357}'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('\u{11438}', '\u{1143f}'),
+ ('\u{11442}', '\u{11444}'),
+ ('\u{11446}', '\u{11446}'),
+ ('\u{1145e}', '\u{1145e}'),
+ ('\u{114b0}', '\u{114b0}'),
+ ('\u{114b3}', '\u{114b8}'),
+ ('\u{114ba}', '\u{114ba}'),
+ ('\u{114bd}', '\u{114bd}'),
+ ('\u{114bf}', '\u{114c0}'),
+ ('\u{114c2}', '\u{114c3}'),
+ ('\u{115af}', '\u{115af}'),
+ ('\u{115b2}', '\u{115b5}'),
+ ('\u{115bc}', '\u{115bd}'),
+ ('\u{115bf}', '\u{115c0}'),
+ ('\u{115dc}', '\u{115dd}'),
+ ('\u{11633}', '\u{1163a}'),
+ ('\u{1163d}', '\u{1163d}'),
+ ('\u{1163f}', '\u{11640}'),
+ ('\u{116ab}', '\u{116ab}'),
+ ('\u{116ad}', '\u{116ad}'),
+ ('\u{116b0}', '\u{116b5}'),
+ ('\u{116b7}', '\u{116b7}'),
+ ('\u{1171d}', '\u{1171f}'),
+ ('\u{11722}', '\u{11725}'),
+ ('\u{11727}', '\u{1172b}'),
+ ('\u{1182f}', '\u{11837}'),
+ ('\u{11839}', '\u{1183a}'),
+ ('\u{11930}', '\u{11930}'),
+ ('\u{1193b}', '\u{1193c}'),
+ ('\u{1193e}', '\u{1193e}'),
+ ('\u{11943}', '\u{11943}'),
+ ('\u{119d4}', '\u{119d7}'),
+ ('\u{119da}', '\u{119db}'),
+ ('\u{119e0}', '\u{119e0}'),
+ ('\u{11a01}', '\u{11a0a}'),
+ ('\u{11a33}', '\u{11a38}'),
+ ('\u{11a3b}', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('\u{11a51}', '\u{11a56}'),
+ ('\u{11a59}', '\u{11a5b}'),
+ ('\u{11a8a}', '\u{11a96}'),
+ ('\u{11a98}', '\u{11a99}'),
+ ('\u{11c30}', '\u{11c36}'),
+ ('\u{11c38}', '\u{11c3d}'),
+ ('\u{11c3f}', '\u{11c3f}'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('\u{11caa}', '\u{11cb0}'),
+ ('\u{11cb2}', '\u{11cb3}'),
+ ('\u{11cb5}', '\u{11cb6}'),
+ ('\u{11d31}', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d45}'),
+ ('\u{11d47}', '\u{11d47}'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('\u{11d95}', '\u{11d95}'),
+ ('\u{11d97}', '\u{11d97}'),
+ ('\u{11ef3}', '\u{11ef4}'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('\u{16b30}', '\u{16b36}'),
+ ('\u{16f4f}', '\u{16f4f}'),
+ ('\u{16f8f}', '\u{16f92}'),
+ ('\u{16fe4}', '\u{16fe4}'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1d165}', '\u{1d165}'),
+ ('\u{1d167}', '\u{1d169}'),
+ ('\u{1d16e}', '\u{1d172}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('\u{1e130}', '\u{1e136}'),
+ ('\u{1e2ec}', '\u{1e2ef}'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('\u{1e944}', '\u{1e94a}'),
+ ('đŸģ', 'đŸŋ'),
+ ('\u{e0020}', '\u{e007f}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const L: &'static [(char, char)] = &[('ᄀ', 'ᅟ'), ('ęĨ ', 'ęĨŧ')];
+
+pub const LF: &'static [(char, char)] = &[('\n', '\n')];
+
+pub const LV: &'static [(char, char)] = &[
+ ('가', '가'),
+ ('개', '개'),
+ ('ę°¸', 'ę°¸'),
+ ('걔', '걔'),
+ ('ęą°', 'ęą°'),
+ ('게', '게'),
+ ('겨', '겨'),
+ ('ęŗ„', 'ęŗ„'),
+ ('ęŗ ', 'ęŗ '),
+ ('ęŗŧ', 'ęŗŧ'),
+ ('괘', '괘'),
+ ('ę´´', 'ę´´'),
+ ('ęĩ', 'ęĩ'),
+ ('ęĩŦ', 'ęĩŦ'),
+ ('ęļˆ', 'ęļˆ'),
+ ('ęļ¤', 'ęļ¤'),
+ ('귀', '귀'),
+ ('규', '규'),
+ ('꡸', '꡸'),
+ ('긔', '긔'),
+ ('기', '기'),
+ ('까', '까'),
+ ('ꚨ', 'ꚨ'),
+ ('ęē„', 'ęē„'),
+ ('ęē ', 'ęē '),
+ ('ęēŧ', 'ęēŧ'),
+ ('ęģ˜', 'ęģ˜'),
+ ('ęģ´', 'ęģ´'),
+ ('ęŧ', 'ęŧ'),
+ ('ęŧŦ', 'ęŧŦ'),
+ ('ęŊˆ', 'ęŊˆ'),
+ ('ęŊ¤', 'ęŊ¤'),
+ ('꾀', '꾀'),
+ ('꾜', '꾜'),
+ ('Ꞹ', 'Ꞹ'),
+ ('ęŋ”', 'ęŋ”'),
+ ('ęŋ°', 'ęŋ°'),
+ ('뀌', '뀌'),
+ ('뀨', '뀨'),
+ ('끄', '끄'),
+ ('끠', '끠'),
+ ('ëŧ', 'ëŧ'),
+ ('나', '나'),
+ ('내', '내'),
+ ('냐', '냐'),
+ ('ëƒŦ', 'ëƒŦ'),
+ ('너', '너'),
+ ('네', '네'),
+ ('녀', '녀'),
+ ('녜', '녜'),
+ ('노', '노'),
+ ('놔', '놔'),
+ ('놰', '놰'),
+ ('뇌', '뇌'),
+ ('뇨', '뇨'),
+ ('누', '누'),
+ ('눠', '눠'),
+ ('ëˆŧ', 'ëˆŧ'),
+ ('뉘', '뉘'),
+ ('뉴', '뉴'),
+ ('느', '느'),
+ ('ëŠŦ', 'ëŠŦ'),
+ ('니', '니'),
+ ('다', '다'),
+ ('대', '대'),
+ ('댜', '댜'),
+ ('댸', '댸'),
+ ('더', '더'),
+ ('데', '데'),
+ ('뎌', '뎌'),
+ ('뎨', '뎨'),
+ ('도', '도'),
+ ('돠', '돠'),
+ ('ëŧ', 'ëŧ'),
+ ('되', '되'),
+ ('됴', '됴'),
+ ('두', '두'),
+ ('ë‘Ŧ', 'ë‘Ŧ'),
+ ('뒈', '뒈'),
+ ('뒤', '뒤'),
+ ('듀', '듀'),
+ ('드', '드'),
+ ('듸', '듸'),
+ ('디', '디'),
+ ('따', '따'),
+ ('때', '때'),
+ ('땨', '땨'),
+ ('떄', '떄'),
+ ('떠', '떠'),
+ ('ë–ŧ', 'ë–ŧ'),
+ ('뗘', '뗘'),
+ ('뗴', '뗴'),
+ ('또', '또'),
+ ('ë˜Ŧ', 'ë˜Ŧ'),
+ ('뙈', '뙈'),
+ ('뙤', '뙤'),
+ ('뚀', '뚀'),
+ ('뚜', '뚜'),
+ ('뚸', '뚸'),
+ ('뛔', '뛔'),
+ ('뛰', '뛰'),
+ ('뜌', '뜌'),
+ ('뜨', '뜨'),
+ ('띄', '띄'),
+ ('띠', '띠'),
+ ('ëŧ', 'ëŧ'),
+ ('래', '래'),
+ ('랴', '랴'),
+ ('럐', '럐'),
+ ('ëŸŦ', 'ëŸŦ'),
+ ('레', '레'),
+ ('ë ¤', 'ë ¤'),
+ ('례', '례'),
+ ('로', '로'),
+ ('른', '른'),
+ ('ëĸ”', 'ëĸ”'),
+ ('ëĸ°', 'ëĸ°'),
+ ('ëŖŒ', 'ëŖŒ'),
+ ('ëŖ¨', 'ëŖ¨'),
+ ('뤄', '뤄'),
+ ('뤠', '뤠'),
+ ('ë¤ŧ', 'ë¤ŧ'),
+ ('ëĨ˜', 'ëĨ˜'),
+ ('ëĨ´', 'ëĨ´'),
+ ('ëĻ', 'ëĻ'),
+ ('ëĻŦ', 'ëĻŦ'),
+ ('마', '마'),
+ ('매', '매'),
+ ('먀', '먀'),
+ ('먜', '먜'),
+ ('머', '머'),
+ ('메', '메'),
+ ('늰', '늰'),
+ ('ëĒŒ', 'ëĒŒ'),
+ ('ëĒ¨', 'ëĒ¨'),
+ ('ëĢ„', 'ëĢ„'),
+ ('ëĢ ', 'ëĢ '),
+ ('ëĢŧ', 'ëĢŧ'),
+ ('ëŦ˜', 'ëŦ˜'),
+ ('ëŦ´', 'ëŦ´'),
+ ('뭐', '뭐'),
+ ('ë­Ŧ', 'ë­Ŧ'),
+ ('뎈', '뎈'),
+ ('뎤', '뎤'),
+ ('므', '므'),
+ ('믜', '믜'),
+ ('미', '미'),
+ ('바', '바'),
+ ('ë°°', 'ë°°'),
+ ('뱌', '뱌'),
+ ('빨', '빨'),
+ ('버', '버'),
+ ('베', '베'),
+ ('ë˛ŧ', 'ë˛ŧ'),
+ ('ëŗ˜', 'ëŗ˜'),
+ ('ëŗ´', 'ëŗ´'),
+ ('봐', '봐'),
+ ('ë´Ŧ', 'ë´Ŧ'),
+ ('ëĩˆ', 'ëĩˆ'),
+ ('ëĩ¤', 'ëĩ¤'),
+ ('ëļ€', 'ëļ€'),
+ ('ëļœ', 'ëļœ'),
+ ('ëļ¸', 'ëļ¸'),
+ ('뷔', '뷔'),
+ ('롰', '롰'),
+ ('브', '브'),
+ ('븨', '븨'),
+ ('비', '비'),
+ ('ëš ', 'ëš '),
+ ('ëšŧ', 'ëšŧ'),
+ ('ëē˜', 'ëē˜'),
+ ('ëē´', 'ëē´'),
+ ('ëģ', 'ëģ'),
+ ('ëģŦ', 'ëģŦ'),
+ ('ëŧˆ', 'ëŧˆ'),
+ ('ëŧ¤', 'ëŧ¤'),
+ ('ëŊ€', 'ëŊ€'),
+ ('ëŊœ', 'ëŊœ'),
+ ('ëŊ¸', 'ëŊ¸'),
+ ('뾔', '뾔'),
+ ('ëž°', 'ëž°'),
+ ('ëŋŒ', 'ëŋŒ'),
+ ('ëŋ¨', 'ëŋ¨'),
+ ('ė€„', 'ė€„'),
+ ('ė€ ', 'ė€ '),
+ ('ė€ŧ', 'ė€ŧ'),
+ ('ė˜', 'ė˜'),
+ ('ė´', 'ė´'),
+ ('ė‚', 'ė‚'),
+ ('ė‚Ŧ', 'ė‚Ŧ'),
+ ('ėƒˆ', 'ėƒˆ'),
+ ('ėƒ¤', 'ėƒ¤'),
+ ('ė„€', 'ė„€'),
+ ('ė„œ', 'ė„œ'),
+ ('ė„¸', 'ė„¸'),
+ ('ė…”', 'ė…”'),
+ ('ė…°', 'ė…°'),
+ ('ė†Œ', 'ė†Œ'),
+ ('ė†¨', 'ė†¨'),
+ ('ė‡„', 'ė‡„'),
+ ('ė‡ ', 'ė‡ '),
+ ('ė‡ŧ', 'ė‡ŧ'),
+ ('ėˆ˜', 'ėˆ˜'),
+ ('ėˆ´', 'ėˆ´'),
+ ('ė‰', 'ė‰'),
+ ('ė‰Ŧ', 'ė‰Ŧ'),
+ ('ėŠˆ', 'ėŠˆ'),
+ ('ėŠ¤', 'ėŠ¤'),
+ ('ė‹€', 'ė‹€'),
+ ('ė‹œ', 'ė‹œ'),
+ ('ė‹¸', 'ė‹¸'),
+ ('ėŒ”', 'ėŒ”'),
+ ('ėŒ°', 'ėŒ°'),
+ ('ėŒ', 'ėŒ'),
+ ('ė¨', 'ė¨'),
+ ('ėŽ„', 'ėŽ„'),
+ ('ėŽ ', 'ėŽ '),
+ ('ėŽŧ', 'ėŽŧ'),
+ ('ė˜', 'ė˜'),
+ ('ė´', 'ė´'),
+ ('ė', 'ė'),
+ ('ėŦ', 'ėŦ'),
+ ('ė‘ˆ', 'ė‘ˆ'),
+ ('ė‘¤', 'ė‘¤'),
+ ('ė’€', 'ė’€'),
+ ('ė’œ', 'ė’œ'),
+ ('ė’¸', 'ė’¸'),
+ ('ė“”', 'ė“”'),
+ ('ė“°', 'ė“°'),
+ ('ė”Œ', 'ė”Œ'),
+ ('ė”¨', 'ė”¨'),
+ ('ė•„', 'ė•„'),
+ ('ė• ', 'ė• '),
+ ('ė•ŧ', 'ė•ŧ'),
+ ('ė–˜', 'ė–˜'),
+ ('ė–´', 'ė–´'),
+ ('ė—', 'ė—'),
+ ('ė—Ŧ', 'ė—Ŧ'),
+ ('ė˜ˆ', 'ė˜ˆ'),
+ ('ė˜¤', 'ė˜¤'),
+ ('ė™€', 'ė™€'),
+ ('ė™œ', 'ė™œ'),
+ ('ė™¸', 'ė™¸'),
+ ('ėš”', 'ėš”'),
+ ('ėš°', 'ėš°'),
+ ('ė›Œ', 'ė›Œ'),
+ ('ė›¨', 'ė›¨'),
+ ('ėœ„', 'ėœ„'),
+ ('ėœ ', 'ėœ '),
+ ('ėœŧ', 'ėœŧ'),
+ ('ė˜', 'ė˜'),
+ ('ė´', 'ė´'),
+ ('ėž', 'ėž'),
+ ('ėžŦ', 'ėžŦ'),
+ ('ėŸˆ', 'ėŸˆ'),
+ ('ėŸ¤', 'ėŸ¤'),
+ ('ė €', 'ė €'),
+ ('ė œ', 'ė œ'),
+ ('ė ¸', 'ė ¸'),
+ ('ėĄ”', 'ėĄ”'),
+ ('ėĄ°', 'ėĄ°'),
+ ('ėĸŒ', 'ėĸŒ'),
+ ('ėĸ¨', 'ėĸ¨'),
+ ('ėŖ„', 'ėŖ„'),
+ ('ėŖ ', 'ėŖ '),
+ ('ėŖŧ', 'ėŖŧ'),
+ ('ė¤˜', 'ė¤˜'),
+ ('ė¤´', 'ė¤´'),
+ ('ėĨ', 'ėĨ'),
+ ('ėĨŦ', 'ėĨŦ'),
+ ('ėĻˆ', 'ėĻˆ'),
+ ('ėĻ¤', 'ėĻ¤'),
+ ('ė§€', 'ė§€'),
+ ('ė§œ', 'ė§œ'),
+ ('ė§¸', 'ė§¸'),
+ ('ė¨”', 'ė¨”'),
+ ('ė¨°', 'ė¨°'),
+ ('ėŠŒ', 'ėŠŒ'),
+ ('ėŠ¨', 'ėŠ¨'),
+ ('ėĒ„', 'ėĒ„'),
+ ('ėĒ ', 'ėĒ '),
+ ('ėĒŧ', 'ėĒŧ'),
+ ('ėĢ˜', 'ėĢ˜'),
+ ('ėĢ´', 'ėĢ´'),
+ ('ėŦ', 'ėŦ'),
+ ('ėŦŦ', 'ėŦŦ'),
+ ('ė­ˆ', 'ė­ˆ'),
+ ('ė­¤', 'ė­¤'),
+ ('ėŽ€', 'ėŽ€'),
+ ('ėŽœ', 'ėŽœ'),
+ ('ėŽ¸', 'ėŽ¸'),
+ ('ė¯”', 'ė¯”'),
+ ('ė¯°', 'ė¯°'),
+ ('ė°Œ', 'ė°Œ'),
+ ('ė°¨', 'ė°¨'),
+ ('ėą„', 'ėą„'),
+ ('ėą ', 'ėą '),
+ ('ėąŧ', 'ėąŧ'),
+ ('ė˛˜', 'ė˛˜'),
+ ('ė˛´', 'ė˛´'),
+ ('ėŗ', 'ėŗ'),
+ ('ėŗŦ', 'ėŗŦ'),
+ ('ė´ˆ', 'ė´ˆ'),
+ ('ė´¤', 'ė´¤'),
+ ('ėĩ€', 'ėĩ€'),
+ ('ėĩœ', 'ėĩœ'),
+ ('ėĩ¸', 'ėĩ¸'),
+ ('ėļ”', 'ėļ”'),
+ ('ėļ°', 'ėļ°'),
+ ('ėˇŒ', 'ėˇŒ'),
+ ('ėˇ¨', 'ėˇ¨'),
+ ('ė¸„', 'ė¸„'),
+ ('ė¸ ', 'ė¸ '),
+ ('ė¸ŧ', 'ė¸ŧ'),
+ ('ėš˜', 'ėš˜'),
+ ('ėš´', 'ėš´'),
+ ('ėē', 'ėē'),
+ ('ėēŦ', 'ėēŦ'),
+ ('ėģˆ', 'ėģˆ'),
+ ('ėģ¤', 'ėģ¤'),
+ ('ėŧ€', 'ėŧ€'),
+ ('ėŧœ', 'ėŧœ'),
+ ('ėŧ¸', 'ėŧ¸'),
+ ('ėŊ”', 'ėŊ”'),
+ ('ėŊ°', 'ėŊ°'),
+ ('ėžŒ', 'ėžŒ'),
+ ('ėž¨', 'ėž¨'),
+ ('ėŋ„', 'ėŋ„'),
+ ('ėŋ ', 'ėŋ '),
+ ('ėŋŧ', 'ėŋŧ'),
+ ('퀘', '퀘'),
+ ('퀴', '퀴'),
+ ('큐', '큐'),
+ ('íŦ', 'íŦ'),
+ ('킈', '킈'),
+ ('키', '키'),
+ ('타', '타'),
+ ('태', '태'),
+ ('탸', '탸'),
+ ('턔', '턔'),
+ ('터', '터'),
+ ('테', '테'),
+ ('텨', '텨'),
+ ('톄', '톄'),
+ ('토', '토'),
+ ('í†ŧ', 'í†ŧ'),
+ ('퇘', '퇘'),
+ ('퇴', '퇴'),
+ ('툐', '툐'),
+ ('íˆŦ', 'íˆŦ'),
+ ('퉈', '퉈'),
+ ('퉤', '퉤'),
+ ('튀', '튀'),
+ ('튜', '튜'),
+ ('트', '트'),
+ ('틔', '틔'),
+ ('티', '티'),
+ ('파', '파'),
+ ('패', '패'),
+ ('퍄', '퍄'),
+ ('퍠', '퍠'),
+ ('íŧ', 'íŧ'),
+ ('페', '페'),
+ ('펴', '펴'),
+ ('폐', '폐'),
+ ('íŦ', 'íŦ'),
+ ('퐈', '퐈'),
+ ('퐤', '퐤'),
+ ('푀', '푀'),
+ ('표', '표'),
+ ('푸', '푸'),
+ ('풔', '풔'),
+ ('풰', '풰'),
+ ('퓌', '퓌'),
+ ('퓨', '퓨'),
+ ('프', '프'),
+ ('픠', '픠'),
+ ('í”ŧ', 'í”ŧ'),
+ ('하', '하'),
+ ('해', '해'),
+ ('햐', '햐'),
+ ('í–Ŧ', 'í–Ŧ'),
+ ('허', '허'),
+ ('헤', '헤'),
+ ('혀', '혀'),
+ ('혜', '혜'),
+ ('호', '호'),
+ ('화', '화'),
+ ('홰', '홰'),
+ ('회', '회'),
+ ('효', '효'),
+ ('후', '후'),
+ ('훠', '훠'),
+ ('í›ŧ', 'í›ŧ'),
+ ('휘', '휘'),
+ ('휴', '휴'),
+ ('흐', '흐'),
+ ('íŦ', 'íŦ'),
+ ('히', '히'),
+];
+
+pub const LVT: &'static [(char, char)] = &[
+ ('각', '갛'),
+ ('객', '갷'),
+ ('갹', '걓'),
+ ('걕', 'ęą¯'),
+ ('걱', '겋'),
+ ('겍', '겧'),
+ ('겊', 'ęŗƒ'),
+ ('ęŗ…', 'ęŗŸ'),
+ ('ęŗĄ', 'ęŗģ'),
+ ('ęŗŊ', '괗'),
+ ('괙', 'ę´ŗ'),
+ ('ę´ĩ', 'ęĩ'),
+ ('ęĩ‘', 'ęĩĢ'),
+ ('ęĩ­', 'ęļ‡'),
+ ('ęļ‰', 'ęļŖ'),
+ ('ęļĨ', 'ęļŋ'),
+ ('귁', '귛'),
+ ('귝', '귷'),
+ ('극', '긓'),
+ ('긕', 'ę¸¯'),
+ ('긱', '깋'),
+ ('깍', '깧'),
+ ('Ꚋ', 'ęēƒ'),
+ ('ęē…', 'ęēŸ'),
+ ('ęēĄ', 'ęēģ'),
+ ('ęēŊ', 'ęģ—'),
+ ('ęģ™', 'ęģŗ'),
+ ('ęģĩ', 'ęŧ'),
+ ('ęŧ‘', 'ęŧĢ'),
+ ('ęŧ­', 'ęŊ‡'),
+ ('ęŊ‰', 'ęŊŖ'),
+ ('ęŊĨ', 'ęŊŋ'),
+ ('꾁', '꾛'),
+ ('꾝', '꾷'),
+ ('ęžš', 'ęŋ“'),
+ ('ęŋ•', 'ęŋ¯'),
+ ('ęŋą', '뀋'),
+ ('뀍', '뀧'),
+ ('뀩', '끃'),
+ ('끅', '끟'),
+ ('끥', 'ëģ'),
+ ('ëŊ', '낗'),
+ ('낙', 'ë‚ŗ'),
+ ('ë‚ĩ', '냏'),
+ ('냑', 'ëƒĢ'),
+ ('냭', '넇'),
+ ('넉', 'ë„Ŗ'),
+ ('ë„Ĩ', 'ë„ŋ'),
+ ('녁', '녛'),
+ ('녝', '녷'),
+ ('녹', '놓'),
+ ('놕', '놯'),
+ ('놱', '뇋'),
+ ('뇍', '뇧'),
+ ('뇩', '눃'),
+ ('눅', '눟'),
+ ('눥', 'ëˆģ'),
+ ('ëˆŊ', '뉗'),
+ ('뉙', 'ë‰ŗ'),
+ ('ë‰ĩ', '늏'),
+ ('늑', 'ëŠĢ'),
+ ('늭', '닇'),
+ ('닉', 'ë‹Ŗ'),
+ ('ë‹Ĩ', 'ë‹ŋ'),
+ ('댁', '댛'),
+ ('댝', '댷'),
+ ('댹', '덓'),
+ ('덕', '덯'),
+ ('덱', '뎋'),
+ ('뎍', '뎧'),
+ ('뎩', '돃'),
+ ('독', '돟'),
+ ('돡', 'ëģ'),
+ ('ëŊ', '됗'),
+ ('됙', 'ëŗ'),
+ ('ëĩ', '둏'),
+ ('둑', 'ë‘Ģ'),
+ ('둭', '뒇'),
+ ('뒉', 'ë’Ŗ'),
+ ('ë’Ĩ', 'ë’ŋ'),
+ ('듁', '듛'),
+ ('득', '듷'),
+ ('듹', '딓'),
+ ('딕', '딯'),
+ ('딱', '땋'),
+ ('땍', '땧'),
+ ('땩', '떃'),
+ ('떅', '떟'),
+ ('떡', 'ë–ģ'),
+ ('ë–Ŋ', '뗗'),
+ ('뗙', 'ë—ŗ'),
+ ('ë—ĩ', '똏'),
+ ('똑', 'ë˜Ģ'),
+ ('똭', '뙇'),
+ ('뙉', 'ë™Ŗ'),
+ ('ë™Ĩ', 'ë™ŋ'),
+ ('뚁', '뚛'),
+ ('뚝', '뚷'),
+ ('뚹', '뛓'),
+ ('뛕', '뛯'),
+ ('뛱', '뜋'),
+ ('뜍', '뜧'),
+ ('뜩', '띃'),
+ ('띅', '띟'),
+ ('띡', 'ëģ'),
+ ('ëŊ', '랗'),
+ ('랙', 'ëžŗ'),
+ ('ëžĩ', '럏'),
+ ('럑', 'ëŸĢ'),
+ ('럭', '렇'),
+ ('렉', 'ë Ŗ'),
+ ('ë Ĩ', 'ë ŋ'),
+ ('롁', '롛'),
+ ('록', '롷'),
+ ('륚', 'ëĸ“'),
+ ('ëĸ•', 'ëĸ¯'),
+ ('ëĸą', 'ëŖ‹'),
+ ('ëŖ', 'ëŖ§'),
+ ('ëŖŠ', '뤃'),
+ ('뤅', '뤟'),
+ ('뤥', 'ë¤ģ'),
+ ('ë¤Ŋ', 'ëĨ—'),
+ ('ëĨ™', 'ëĨŗ'),
+ ('ëĨĩ', 'ëĻ'),
+ ('ëĻ‘', 'ëĻĢ'),
+ ('ëĻ­', '맇'),
+ ('막', 'ë§Ŗ'),
+ ('ë§Ĩ', 'ë§ŋ'),
+ ('먁', '먛'),
+ ('먝', '먷'),
+ ('먹', '멓'),
+ ('멕', '늯'),
+ ('늹', 'ëĒ‹'),
+ ('ëĒ', 'ëĒ§'),
+ ('ëĒŠ', 'ëĢƒ'),
+ ('ëĢ…', 'ëĢŸ'),
+ ('ëĢĄ', 'ëĢģ'),
+ ('ëĢŊ', 'ëŦ—'),
+ ('ëŦ™', 'ëŦŗ'),
+ ('ëŦĩ', '뭏'),
+ ('뭑', 'ë­Ģ'),
+ ('뭭', '뮇'),
+ ('뮉', 'ëŽŖ'),
+ ('ëŽĨ', 'ëŽŋ'),
+ ('믁', '믛'),
+ ('믝', 'ë¯ˇ'),
+ ('믚', '밓'),
+ ('박', '밯'),
+ ('백', '뱋'),
+ ('뱍', '뱧'),
+ ('빊', '벃'),
+ ('벅', '벟'),
+ ('벥', 'ë˛ģ'),
+ ('ë˛Ŋ', 'ëŗ—'),
+ ('ëŗ™', 'ëŗŗ'),
+ ('ëŗĩ', '봏'),
+ ('봑', 'ë´Ģ'),
+ ('ë´­', 'ëĩ‡'),
+ ('ëĩ‰', 'ëĩŖ'),
+ ('ëĩĨ', 'ëĩŋ'),
+ ('ëļ', 'ëļ›'),
+ ('ëļ', 'ëļˇ'),
+ ('ëļš', '뷓'),
+ ('뷕', 'ëˇ¯'),
+ ('뷱', '븋'),
+ ('븍', '븧'),
+ ('븊', '뚃'),
+ ('빅', '빟'),
+ ('뚥', 'ëšģ'),
+ ('ëšŊ', 'ëē—'),
+ ('ëē™', 'ëēŗ'),
+ ('ëēĩ', 'ëģ'),
+ ('ëģ‘', 'ëģĢ'),
+ ('ëģ­', 'ëŧ‡'),
+ ('ëŧ‰', 'ëŧŖ'),
+ ('ëŧĨ', 'ëŧŋ'),
+ ('ëŊ', 'ëŊ›'),
+ ('ëŊ', 'ëŊˇ'),
+ ('ëŊš', '뾓'),
+ ('뾕', '랯'),
+ ('ëžą', 'ëŋ‹'),
+ ('ëŋ', 'ëŋ§'),
+ ('ëŋŠ', 'ė€ƒ'),
+ ('ė€…', 'ė€Ÿ'),
+ ('ė€Ą', 'ė€ģ'),
+ ('ė€Ŋ', 'ė—'),
+ ('ė™', 'ėŗ'),
+ ('ėĩ', 'ė‚'),
+ ('ė‚‘', 'ė‚Ģ'),
+ ('ė‚­', 'ėƒ‡'),
+ ('ėƒ‰', 'ėƒŖ'),
+ ('ėƒĨ', 'ėƒŋ'),
+ ('ė„', 'ė„›'),
+ ('ė„', 'ė„ˇ'),
+ ('ė„š', 'ė…“'),
+ ('ė…•', 'ė…¯'),
+ ('ė…ą', 'ė†‹'),
+ ('ė†', 'ė†§'),
+ ('ė†Š', 'ė‡ƒ'),
+ ('ė‡…', 'ė‡Ÿ'),
+ ('ė‡Ą', 'ė‡ģ'),
+ ('ė‡Ŋ', 'ėˆ—'),
+ ('ėˆ™', 'ėˆŗ'),
+ ('ėˆĩ', 'ė‰'),
+ ('ė‰‘', 'ė‰Ģ'),
+ ('ė‰­', 'ėŠ‡'),
+ ('ėŠ‰', 'ėŠŖ'),
+ ('ėŠĨ', 'ėŠŋ'),
+ ('ė‹', 'ė‹›'),
+ ('ė‹', 'ė‹ˇ'),
+ ('ė‹š', 'ėŒ“'),
+ ('ėŒ•', 'ėŒ¯'),
+ ('ėŒą', 'ė‹'),
+ ('ė', 'ė§'),
+ ('ėŠ', 'ėŽƒ'),
+ ('ėŽ…', 'ėŽŸ'),
+ ('ėŽĄ', 'ėŽģ'),
+ ('ėŽŊ', 'ė—'),
+ ('ė™', 'ėŗ'),
+ ('ėĩ', 'ė'),
+ ('ė‘', 'ėĢ'),
+ ('ė­', 'ė‘‡'),
+ ('ė‘‰', 'ė‘Ŗ'),
+ ('ė‘Ĩ', 'ė‘ŋ'),
+ ('ė’', 'ė’›'),
+ ('ė’', 'ė’ˇ'),
+ ('ė’š', 'ė““'),
+ ('ė“•', 'ė“¯'),
+ ('ė“ą', 'ė”‹'),
+ ('ė”', 'ė”§'),
+ ('ė”Š', 'ė•ƒ'),
+ ('ė•…', 'ė•Ÿ'),
+ ('ė•Ą', 'ė•ģ'),
+ ('ė•Ŋ', 'ė–—'),
+ ('ė–™', 'ė–ŗ'),
+ ('ė–ĩ', 'ė—'),
+ ('ė—‘', 'ė—Ģ'),
+ ('ė—­', 'ė˜‡'),
+ ('ė˜‰', 'ė˜Ŗ'),
+ ('ė˜Ĩ', 'ė˜ŋ'),
+ ('ė™', 'ė™›'),
+ ('ė™', 'ė™ˇ'),
+ ('ė™š', 'ėš“'),
+ ('ėš•', 'ėš¯'),
+ ('ėšą', 'ė›‹'),
+ ('ė›', 'ė›§'),
+ ('ė›Š', 'ėœƒ'),
+ ('ėœ…', 'ėœŸ'),
+ ('ėœĄ', 'ėœģ'),
+ ('ėœŊ', 'ė—'),
+ ('ė™', 'ėŗ'),
+ ('ėĩ', 'ėž'),
+ ('ėž‘', 'ėžĢ'),
+ ('ėž­', 'ėŸ‡'),
+ ('ėŸ‰', 'ėŸŖ'),
+ ('ėŸĨ', 'ėŸŋ'),
+ ('ė ', 'ė ›'),
+ ('ė ', 'ė ˇ'),
+ ('ė š', 'ėĄ“'),
+ ('ėĄ•', 'ėĄ¯'),
+ ('ėĄą', 'ėĸ‹'),
+ ('ėĸ', 'ėĸ§'),
+ ('ėĸŠ', 'ėŖƒ'),
+ ('ėŖ…', 'ėŖŸ'),
+ ('ėŖĄ', 'ėŖģ'),
+ ('ėŖŊ', 'ė¤—'),
+ ('ė¤™', 'ė¤ŗ'),
+ ('ė¤ĩ', 'ėĨ'),
+ ('ėĨ‘', 'ėĨĢ'),
+ ('ėĨ­', 'ėĻ‡'),
+ ('ėĻ‰', 'ėĻŖ'),
+ ('ėĻĨ', 'ėĻŋ'),
+ ('ė§', 'ė§›'),
+ ('ė§', 'ė§ˇ'),
+ ('ė§š', 'ė¨“'),
+ ('ė¨•', 'ė¨¯'),
+ ('ė¨ą', 'ėŠ‹'),
+ ('ėŠ', 'ėŠ§'),
+ ('ėŠŠ', 'ėĒƒ'),
+ ('ėĒ…', 'ėĒŸ'),
+ ('ėĒĄ', 'ėĒģ'),
+ ('ėĒŊ', 'ėĢ—'),
+ ('ėĢ™', 'ėĢŗ'),
+ ('ėĢĩ', 'ėŦ'),
+ ('ėŦ‘', 'ėŦĢ'),
+ ('ėŦ­', 'ė­‡'),
+ ('ė­‰', 'ė­Ŗ'),
+ ('ė­Ĩ', 'ė­ŋ'),
+ ('ėŽ', 'ėŽ›'),
+ ('ėŽ', 'ėŽˇ'),
+ ('ėŽš', 'ė¯“'),
+ ('ė¯•', 'ė¯¯'),
+ ('ė¯ą', 'ė°‹'),
+ ('ė°', 'ė°§'),
+ ('ė°Š', 'ėąƒ'),
+ ('ėą…', 'ėąŸ'),
+ ('ėąĄ', 'ėąģ'),
+ ('ėąŊ', 'ė˛—'),
+ ('ė˛™', 'ė˛ŗ'),
+ ('ė˛ĩ', 'ėŗ'),
+ ('ėŗ‘', 'ėŗĢ'),
+ ('ėŗ­', 'ė´‡'),
+ ('ė´‰', 'ė´Ŗ'),
+ ('ė´Ĩ', 'ė´ŋ'),
+ ('ėĩ', 'ėĩ›'),
+ ('ėĩ', 'ėĩˇ'),
+ ('ėĩš', 'ėļ“'),
+ ('ėļ•', 'ėļ¯'),
+ ('ėļą', 'ėˇ‹'),
+ ('ėˇ', 'ėˇ§'),
+ ('ėˇŠ', 'ė¸ƒ'),
+ ('ė¸…', 'ė¸Ÿ'),
+ ('ė¸Ą', 'ė¸ģ'),
+ ('ė¸Ŋ', 'ėš—'),
+ ('ėš™', 'ėšŗ'),
+ ('ėšĩ', 'ėē'),
+ ('ėē‘', 'ėēĢ'),
+ ('ėē­', 'ėģ‡'),
+ ('ėģ‰', 'ėģŖ'),
+ ('ėģĨ', 'ėģŋ'),
+ ('ėŧ', 'ėŧ›'),
+ ('ėŧ', 'ėŧˇ'),
+ ('ėŧš', 'ėŊ“'),
+ ('ėŊ•', 'ėŊ¯'),
+ ('ėŊą', 'ėž‹'),
+ ('ėž', 'ėž§'),
+ ('ėžŠ', 'ėŋƒ'),
+ ('ėŋ…', 'ėŋŸ'),
+ ('ėŋĄ', 'ėŋģ'),
+ ('ėŋŊ', '퀗'),
+ ('퀙', 'í€ŗ'),
+ ('í€ĩ', '큏'),
+ ('큑', 'íĢ'),
+ ('큭', '킇'),
+ ('킉', 'í‚Ŗ'),
+ ('í‚Ĩ', 'í‚ŋ'),
+ ('탁', '탛'),
+ ('택', '탷'),
+ ('탹', '턓'),
+ ('턕', '턯'),
+ ('턱', '텋'),
+ ('텍', '텧'),
+ ('텩', '톃'),
+ ('톅', '톟'),
+ ('톡', 'í†ģ'),
+ ('í†Ŋ', '퇗'),
+ ('퇙', 'í‡ŗ'),
+ ('í‡ĩ', '툏'),
+ ('툑', 'íˆĢ'),
+ ('툭', '퉇'),
+ ('퉉', 'í‰Ŗ'),
+ ('í‰Ĩ', 'í‰ŋ'),
+ ('튁', '튛'),
+ ('튝', '튷'),
+ ('특', '틓'),
+ ('틕', '틯'),
+ ('틱', '팋'),
+ ('팍', '팧'),
+ ('팩', '퍃'),
+ ('퍅', '퍟'),
+ ('퍡', 'íģ'),
+ ('íŊ', '펗'),
+ ('펙', 'íŽŗ'),
+ ('íŽĩ', '폏'),
+ ('폑', 'íĢ'),
+ ('폭', '퐇'),
+ ('퐉', 'íŖ'),
+ ('íĨ', 'íŋ'),
+ ('푁', '푛'),
+ ('푝', '푷'),
+ ('푹', '풓'),
+ ('풕', '풯'),
+ ('풱', '퓋'),
+ ('퓍', '퓧'),
+ ('퓩', '픃'),
+ ('픅', '픟'),
+ ('픡', 'í”ģ'),
+ ('í”Ŋ', '핗'),
+ ('학', 'í•ŗ'),
+ ('í•ĩ', '햏'),
+ ('햑', 'í–Ģ'),
+ ('햭', '헇'),
+ ('헉', 'í—Ŗ'),
+ ('í—Ĩ', 'í—ŋ'),
+ ('혁', '혛'),
+ ('혝', '혷'),
+ ('혹', '홓'),
+ ('확', '홯'),
+ ('홱', '횋'),
+ ('획', '횧'),
+ ('횩', '훃'),
+ ('훅', '훟'),
+ ('훡', 'í›ģ'),
+ ('í›Ŋ', '휗'),
+ ('휙', 'íœŗ'),
+ ('íœĩ', '흏'),
+ ('흑', 'íĢ'),
+ ('흭', '힇'),
+ ('힉', 'ížŖ'),
+];
+
+pub const PREPEND: &'static [(char, char)] = &[
+ ('\u{600}', '\u{605}'),
+ ('\u{6dd}', '\u{6dd}'),
+ ('\u{70f}', '\u{70f}'),
+ ('\u{8e2}', '\u{8e2}'),
+ ('āĩŽ', 'āĩŽ'),
+ ('\u{110bd}', '\u{110bd}'),
+ ('\u{110cd}', '\u{110cd}'),
+ ('𑇂', '𑇃'),
+ ('\u{1193f}', '\u{1193f}'),
+ ('\u{11941}', '\u{11941}'),
+ ('đ‘¨ē', 'đ‘¨ē'),
+ ('đ‘Ē„', 'đ‘Ē‰'),
+ ('đ‘ĩ†', 'đ‘ĩ†'),
+];
+
+pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('đŸ‡Ļ', 'đŸ‡ŋ')];
+
+pub const SPACINGMARK: &'static [(char, char)] = &[
+ ('ā¤ƒ', 'ā¤ƒ'),
+ ('ā¤ģ', 'ā¤ģ'),
+ ('ā¤ž', 'āĨ€'),
+ ('āĨ‰', 'āĨŒ'),
+ ('āĨŽ', 'āĨ'),
+ ('āĻ‚', 'āĻƒ'),
+ ('āĻŋ', 'ā§€'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Œ'),
+ ('ā¨ƒ', 'ā¨ƒ'),
+ ('ā¨ž', 'āŠ€'),
+ ('āĒƒ', 'āĒƒ'),
+ ('āĒž', 'āĢ€'),
+ ('āĢ‰', 'āĢ‰'),
+ ('āĢ‹', 'āĢŒ'),
+ ('āŦ‚', 'āŦƒ'),
+ ('ā­€', 'ā­€'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', 'ā­Œ'),
+ ('āŽŋ', 'āŽŋ'),
+ ('ā¯', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', 'ā¯Œ'),
+ ('ā°', 'ā°ƒ'),
+ ('āą', 'āą„'),
+ ('ā˛‚', 'ā˛ƒ'),
+ ('ā˛ž', 'ā˛ž'),
+ ('āŗ€', 'āŗ'),
+ ('āŗƒ', 'āŗ„'),
+ ('āŗ‡', 'āŗˆ'),
+ ('āŗŠ', 'āŗ‹'),
+ ('ā´‚', 'ā´ƒ'),
+ ('ā´ŋ', 'āĩ€'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩŒ'),
+ ('āļ‚', 'āļƒ'),
+ ('āˇ', 'āˇ‘'),
+ ('āˇ˜', 'āˇž'),
+ ('āˇ˛', 'āˇŗ'),
+ ('ā¸ŗ', 'ā¸ŗ'),
+ ('āēŗ', 'āēŗ'),
+ ('āŧž', 'āŧŋ'),
+ ('āŊŋ', 'āŊŋ'),
+ ('ေ', 'ေ'),
+ ('á€ģ', 'á€ŧ'),
+ ('ၖ', 'ၗ'),
+ ('ႄ', 'ႄ'),
+ ('ážļ', 'ážļ'),
+ ('ើ', 'ៅ'),
+ ('ះ', 'ៈ'),
+ ('á¤Ŗ', 'á¤Ļ'),
+ ('ᤊ', 'á¤Ģ'),
+ ('ᤰ', '᤹'),
+ ('á¤ŗ', 'ᤸ'),
+ ('ᨙ', 'ᨚ'),
+ ('ᩕ', 'ᩕ'),
+ ('ᩗ', 'ᩗ'),
+ ('ክ', 'ኲ'),
+ ('áŦ„', 'áŦ„'),
+ ('áŦģ', 'áŦģ'),
+ ('áŦŊ', 'ᭁ'),
+ ('ᭃ', '᭄'),
+ ('ᮂ', 'ᮂ'),
+ ('Ꭵ', 'Ꭵ'),
+ ('áŽĻ', 'Ꭷ'),
+ ('áŽĒ', 'áŽĒ'),
+ ('ᯧ', 'ᯧ'),
+ ('á¯Ē', 'á¯Ŧ'),
+ ('ᯎ', 'ᯎ'),
+ ('á¯˛', 'á¯ŗ'),
+ ('á°¤', 'á°Ģ'),
+ ('á°´', 'á°ĩ'),
+ ('áŗĄ', 'áŗĄ'),
+ ('áŗˇ', 'áŗˇ'),
+ ('ę Ŗ', 'ę ¤'),
+ ('ę §', 'ę §'),
+ ('ęĸ€', 'ęĸ'),
+ ('ęĸ´', 'ęŖƒ'),
+ ('ęĨ’', 'ęĨ“'),
+ ('ęĻƒ', 'ęĻƒ'),
+ ('ęĻ´', 'ęĻĩ'),
+ ('ęĻē', 'ęĻģ'),
+ ('ęĻž', '꧀'),
+ ('ę¨¯', 'ꨰ'),
+ ('ę¨ŗ', 'ꨴ'),
+ ('ꩍ', 'ꩍ'),
+ ('ęĢĢ', 'ęĢĢ'),
+ ('ęĢŽ', 'ęĢ¯'),
+ ('ęĢĩ', 'ęĢĩ'),
+ ('ę¯Ŗ', 'ę¯¤'),
+ ('ę¯Ļ', 'ę¯§'),
+ ('ę¯Š', 'ę¯Ē'),
+ ('ę¯Ŧ', 'ę¯Ŧ'),
+ ('𑀀', '𑀀'),
+ ('𑀂', '𑀂'),
+ ('𑂂', '𑂂'),
+ ('𑂰', '𑂲'),
+ ('𑂷', '𑂸'),
+ ('đ‘„Ŧ', 'đ‘„Ŧ'),
+ ('𑅅', '𑅆'),
+ ('𑆂', '𑆂'),
+ ('đ‘†ŗ', 'đ‘†ĩ'),
+ ('đ‘†ŋ', '𑇀'),
+ ('\u{111ce}', '\u{111ce}'),
+ ('đ‘ˆŦ', '𑈮'),
+ ('𑈲', 'đ‘ˆŗ'),
+ ('đ‘ˆĩ', 'đ‘ˆĩ'),
+ ('𑋠', 'đ‘‹ĸ'),
+ ('𑌂', '𑌃'),
+ ('đ‘Œŋ', 'đ‘Œŋ'),
+ ('𑍁', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('đ‘ĸ', 'đ‘Ŗ'),
+ ('đ‘ĩ', '𑐷'),
+ ('𑑀', '𑑁'),
+ ('𑑅', '𑑅'),
+ ('𑒱', '𑒲'),
+ ('𑒹', '𑒹'),
+ ('đ‘’ģ', 'đ‘’ŧ'),
+ ('𑒾', '𑒾'),
+ ('𑓁', '𑓁'),
+ ('𑖰', '𑖱'),
+ ('𑖸', 'đ‘–ģ'),
+ ('𑖾', '𑖾'),
+ ('𑘰', '𑘲'),
+ ('đ‘˜ģ', 'đ‘˜ŧ'),
+ ('𑘾', '𑘾'),
+ ('đ‘šŦ', 'đ‘šŦ'),
+ ('𑚮', 'đ‘š¯'),
+ ('đ‘šļ', 'đ‘šļ'),
+ ('𑜠', '𑜡'),
+ ('đ‘œĻ', 'đ‘œĻ'),
+ ('đ‘ Ŧ', '𑠮'),
+ ('𑠸', '𑠸'),
+ ('\u{11931}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193d}', '\u{1193d}'),
+ ('\u{11940}', '\u{11940}'),
+ ('\u{11942}', '\u{11942}'),
+ ('𑧑', '𑧓'),
+ ('𑧜', '𑧟'),
+ ('𑧤', '𑧤'),
+ ('𑨹', '𑨹'),
+ ('𑩗', '𑩘'),
+ ('đ‘Ē—', 'đ‘Ē—'),
+ ('đ‘°¯', 'đ‘°¯'),
+ ('𑰾', '𑰾'),
+ ('𑲩', '𑲩'),
+ ('𑲱', '𑲱'),
+ ('𑲴', '𑲴'),
+ ('đ‘ļŠ', 'đ‘ļŽ'),
+ ('đ‘ļ“', 'đ‘ļ”'),
+ ('đ‘ļ–', 'đ‘ļ–'),
+ ('đ‘ģĩ', 'đ‘ģļ'),
+ ('đ–Ŋ‘', '𖾇'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('đ…Ļ', 'đ…Ļ'),
+ ('𝅭', '𝅭'),
+];
+
+pub const T: &'static [(char, char)] = &[('ᆨ', 'á‡ŋ'), ('ퟋ', 'íŸģ')];
+
+pub const V: &'static [(char, char)] = &[('ᅠ', 'ᆧ'), ('ힰ', 'ퟆ')];
+
+pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')];
diff --git a/vendor/regex-syntax/src/unicode_tables/mod.rs b/vendor/regex-syntax/src/unicode_tables/mod.rs
new file mode 100644
index 000000000..20736c7ac
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/mod.rs
@@ -0,0 +1,57 @@
+#[cfg(feature = "unicode-age")]
+pub mod age;
+
+#[cfg(feature = "unicode-case")]
+pub mod case_folding_simple;
+
+#[cfg(feature = "unicode-gencat")]
+pub mod general_category;
+
+#[cfg(feature = "unicode-segment")]
+pub mod grapheme_cluster_break;
+
+#[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
+#[allow(dead_code)]
+pub mod perl_decimal;
+
+#[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
+#[allow(dead_code)]
+pub mod perl_space;
+
+#[cfg(feature = "unicode-perl")]
+pub mod perl_word;
+
+#[cfg(feature = "unicode-bool")]
+pub mod property_bool;
+
+#[cfg(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+))]
+pub mod property_names;
+
+#[cfg(any(
+ feature = "unicode-age",
+ feature = "unicode-bool",
+ feature = "unicode-gencat",
+ feature = "unicode-perl",
+ feature = "unicode-script",
+ feature = "unicode-segment",
+))]
+pub mod property_values;
+
+#[cfg(feature = "unicode-script")]
+pub mod script;
+
+#[cfg(feature = "unicode-script")]
+pub mod script_extension;
+
+#[cfg(feature = "unicode-segment")]
+pub mod sentence_break;
+
+#[cfg(feature = "unicode-segment")]
+pub mod word_break;
diff --git a/vendor/regex-syntax/src/unicode_tables/perl_decimal.rs b/vendor/regex-syntax/src/unicode_tables/perl_decimal.rs
new file mode 100644
index 000000000..2a09259fc
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/perl_decimal.rs
@@ -0,0 +1,74 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate general-category ucd-13.0.0 --chars --include decimalnumber
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] =
+ &[("Decimal_Number", DECIMAL_NUMBER)];
+
+pub const DECIMAL_NUMBER: &'static [(char, char)] = &[
+ ('0', '9'),
+ ('Ų ', 'ŲŠ'),
+ ('Û°', 'Ûš'),
+ ('߀', '߉'),
+ ('āĨĻ', 'āĨ¯'),
+ ('ā§Ļ', 'ā§¯'),
+ ('āŠĻ', 'āŠ¯'),
+ ('āĢĻ', 'āĢ¯'),
+ ('ā­Ļ', 'ā­¯'),
+ ('ā¯Ļ', 'ā¯¯'),
+ ('āąĻ', 'āą¯'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āĩĻ', 'āĩ¯'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āš', 'āš™'),
+ ('āģ', 'āģ™'),
+ ('āŧ ', 'āŧŠ'),
+ ('၀', '၉'),
+ ('႐', '႙'),
+ ('០', '៩'),
+ ('᠐', '᠙'),
+ ('áĨ†', 'áĨ'),
+ ('᧐', '᧙'),
+ ('áĒ€', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('᭐', '᭙'),
+ ('Ꮀ', '᎚'),
+ ('᱀', '᱉'),
+ ('᱐', '᱙'),
+ ('꘠', 'ꘊ'),
+ ('ęŖ', 'ęŖ™'),
+ ('꤀', '꤉'),
+ ('꧐', '꧙'),
+ ('꧰', '꧚'),
+ ('꩐', '꩙'),
+ ('ę¯°', 'ę¯š'),
+ ('īŧ', 'īŧ™'),
+ ('𐒠', '𐒩'),
+ ('𐴰', '𐴚'),
+ ('đ‘Ļ', 'đ‘¯'),
+ ('𑃰', '𑃹'),
+ ('đ‘„ļ', 'đ‘„ŋ'),
+ ('𑇐', '𑇙'),
+ ('𑋰', '𑋹'),
+ ('𑑐', '𑑙'),
+ ('𑓐', '𑓙'),
+ ('𑙐', '𑙙'),
+ ('𑛀', '𑛉'),
+ ('𑜰', '𑜹'),
+ ('đ‘Ŗ ', 'đ‘ŖŠ'),
+ ('\u{11950}', '\u{11959}'),
+ ('𑱐', '𑱙'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('𖩠', '𖩩'),
+ ('𖭐', '𖭙'),
+ ('𝟎', 'đŸŋ'),
+ ('𞅀', '𞅉'),
+ ('𞋰', '𞋹'),
+ ('đžĨ', 'đžĨ™'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/perl_space.rs b/vendor/regex-syntax/src/unicode_tables/perl_space.rs
new file mode 100644
index 000000000..c112dd126
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/perl_space.rs
@@ -0,0 +1,23 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate property-bool ucd-13.0.0 --chars --include whitespace
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] =
+ &[("White_Space", WHITE_SPACE)];
+
+pub const WHITE_SPACE: &'static [(char, char)] = &[
+ ('\t', '\r'),
+ (' ', ' '),
+ ('\u{85}', '\u{85}'),
+ ('\u{a0}', '\u{a0}'),
+ ('\u{1680}', '\u{1680}'),
+ ('\u{2000}', '\u{200a}'),
+ ('\u{2028}', '\u{2029}'),
+ ('\u{202f}', '\u{202f}'),
+ ('\u{205f}', '\u{205f}'),
+ ('\u{3000}', '\u{3000}'),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/perl_word.rs b/vendor/regex-syntax/src/unicode_tables/perl_word.rs
new file mode 100644
index 000000000..df9eac7d7
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/perl_word.rs
@@ -0,0 +1,743 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate perl-word ucd-13.0.0 --chars
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const PERL_WORD: &'static [(char, char)] = &[
+ ('0', '9'),
+ ('A', 'Z'),
+ ('_', '_'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ˁ'),
+ ('ˆ', 'ˑ'),
+ ('ˠ', 'ˤ'),
+ ('ËŦ', 'ËŦ'),
+ ('ËŽ', 'ËŽ'),
+ ('\u{300}', 'Í´'),
+ ('Íļ', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('\u{483}', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', 'ՙ'),
+ ('ՠ', 'ֈ'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('א', '×Ē'),
+ ('ׯ', 'ײ'),
+ ('\u{610}', '\u{61a}'),
+ ('Ø ', 'ŲŠ'),
+ ('ŲŽ', 'ۓ'),
+ ('ە', '\u{6dc}'),
+ ('\u{6df}', '\u{6e8}'),
+ ('\u{6ea}', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', '\u{74a}'),
+ ('Ũ', 'Ūą'),
+ ('߀', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('ā €', '\u{82d}'),
+ ('āĄ€', '\u{85b}'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', '\u{963}'),
+ ('āĨĻ', 'āĨ¯'),
+ ('āĨą', 'āĻƒ'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('\u{9bc}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Ž'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', '\u{9e3}'),
+ ('ā§Ļ', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', 'ā¨ƒ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠĻ', '\u{a75}'),
+ ('\u{a81}', 'āĒƒ'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('\u{abc}', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', '\u{acd}'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', '\u{ae3}'),
+ ('āĢĻ', 'āĢ¯'),
+ ('āĢš', '\u{aff}'),
+ ('\u{b01}', 'āŦƒ'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('\u{b3c}', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', '\u{b63}'),
+ ('ā­Ļ', 'ā­¯'),
+ ('ā­ą', 'ā­ą'),
+ ('\u{b82}', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', '\u{bcd}'),
+ ('ā¯', 'ā¯'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('ā¯Ļ', 'ā¯¯'),
+ ('\u{c00}', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('āą˜', 'āąš'),
+ ('āą ', '\u{c63}'),
+ ('āąĻ', 'āą¯'),
+ ('ā˛€', 'ā˛ƒ'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('\u{cbc}', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', '\u{ce3}'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d00}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩŽ'),
+ ('āĩ”', '\u{d57}'),
+ ('āĩŸ', '\u{d63}'),
+ ('āĩĻ', 'āĩ¯'),
+ ('āĩē', 'āĩŋ'),
+ ('\u{d81}', 'āļƒ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āˇ˛', 'āˇŗ'),
+ ('ā¸', '\u{e3a}'),
+ ('āš€', '\u{e4e}'),
+ ('āš', 'āš™'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('āģ', 'āģ™'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ€'),
+ ('\u{f18}', '\u{f19}'),
+ ('āŧ ', 'āŧŠ'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('āŧž', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('\u{f71}', '\u{f84}'),
+ ('\u{f86}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('က', '၉'),
+ ('ၐ', '\u{109d}'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('\u{135d}', '\u{135f}'),
+ ('ᎀ', 'ᎏ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛮ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', '\u{1714}'),
+ ('ᜠ', '\u{1734}'),
+ ('ᝀ', '\u{1753}'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('\u{1772}', '\u{1773}'),
+ ('ក', '\u{17d3}'),
+ ('ៗ', 'ៗ'),
+ ('ៜ', '\u{17dd}'),
+ ('០', '៩'),
+ ('\u{180b}', '\u{180d}'),
+ ('᠐', '᠙'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', '\u{193b}'),
+ ('áĨ†', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('᧐', '᧙'),
+ ('ᨀ', '\u{1a1b}'),
+ ('ᨠ', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a7c}'),
+ ('\u{1a7f}', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('áĒ§', 'áĒ§'),
+ ('\u{1ab0}', '\u{1ac0}'),
+ ('\u{1b00}', 'ᭋ'),
+ ('᭐', '᭙'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', 'á¯ŗ'),
+ ('ᰀ', '\u{1c37}'),
+ ('᱀', '᱉'),
+ ('ᱍ', 'áąŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', 'áŗē'),
+ ('ᴀ', '\u{1df9}'),
+ ('\u{1dfb}', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('\u{200c}', '\u{200d}'),
+ ('â€ŋ', '⁀'),
+ ('⁔', '⁔'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('\u{20d0}', '\u{20f0}'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℭ'),
+ ('ℯ', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'ↈ'),
+ ('â’ļ', 'ⓩ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗ¤'),
+ ('âŗĢ', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ¯'),
+ ('\u{2d7f}', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('ⸯ', 'ⸯ'),
+ ('々', '〇'),
+ ('ã€Ą', '\u{302f}'),
+ ('ã€ą', 'ã€ĩ'),
+ ('〸', 'ã€ŧ'),
+ ('ぁ', 'ゖ'),
+ ('\u{3099}', '\u{309a}'),
+ ('ゝ', 'ゟ'),
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŧ', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ꀀ', 'ꒌ'),
+ ('ꓐ', 'ę“Ŋ'),
+ ('ꔀ', 'ꘌ'),
+ ('ꘐ', 'ę˜Ģ'),
+ ('Ꙁ', '\u{a672}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('ę™ŋ', '\u{a6f1}'),
+ ('ꜗ', 'ꜟ'),
+ ('ęœĸ', 'ꞈ'),
+ ('Ꞌ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ę §'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ€', '\u{a8c5}'),
+ ('ęŖ', 'ęŖ™'),
+ ('\u{a8e0}', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', '\u{a92d}'),
+ ('ꤰ', 'ęĨ“'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('\u{a980}', '꧀'),
+ ('ꧏ', '꧙'),
+ ('ꧠ', '꧞'),
+ ('ꨀ', '\u{aa36}'),
+ ('ꩀ', 'ꩍ'),
+ ('꩐', '꩙'),
+ ('ꊠ', 'ęŠļ'),
+ ('ęŠē', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢ'),
+ ('ęĢ ', 'ęĢ¯'),
+ ('ęĢ˛', '\u{aaf6}'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab69}'),
+ ('ę­°', 'ę¯Ē'),
+ ('ę¯Ŧ', '\u{abed}'),
+ ('ę¯°', 'ę¯š'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŦ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇģ'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('ī¸ŗ', 'ī¸´'),
+ ('īš', 'īš'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('īŧ', 'īŧ™'),
+ ('īŧĄ', 'īŧē'),
+ ('īŧŋ', 'īŧŋ'),
+ ('īŊ', 'īŊš'),
+ ('īŊĻ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐅀', '𐅴'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍊'),
+ ('𐍐', '\u{1037a}'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐏑', '𐏕'),
+ ('𐐀', '𐒝'),
+ ('𐒠', '𐒩'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', '\u{10ae6}'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('𐴀', '\u{10d27}'),
+ ('𐴰', '𐴚'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', '\u{10f50}'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀀', '\u{11046}'),
+ ('đ‘Ļ', 'đ‘¯'),
+ ('\u{1107f}', '\u{110ba}'),
+ ('𑃐', '𑃨'),
+ ('𑃰', '𑃹'),
+ ('\u{11100}', '\u{11134}'),
+ ('đ‘„ļ', 'đ‘„ŋ'),
+ ('𑅄', '\u{11147}'),
+ ('𑅐', '\u{11173}'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('\u{11180}', '𑇄'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111ce}', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '\u{112ea}'),
+ ('𑋰', '𑋹'),
+ ('\u{11300}', '𑌃'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('\u{1133b}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('𑍐', '𑍐'),
+ ('\u{11357}', '\u{11357}'),
+ ('𑍝', 'đ‘Ŗ'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('𑐀', '𑑊'),
+ ('𑑐', '𑑙'),
+ ('\u{1145e}', '\u{11461}'),
+ ('𑒀', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑓐', '𑓙'),
+ ('𑖀', '\u{115b5}'),
+ ('𑖸', '\u{115c0}'),
+ ('𑗘', '\u{115dd}'),
+ ('𑘀', '\u{11640}'),
+ ('𑙄', '𑙄'),
+ ('𑙐', '𑙙'),
+ ('𑚀', '𑚸'),
+ ('𑛀', '𑛉'),
+ ('𑜀', '𑜚'),
+ ('\u{1171d}', '\u{1172b}'),
+ ('𑜰', '𑜹'),
+ ('𑠀', '\u{1183a}'),
+ ('đ‘ĸ ', 'đ‘ŖŠ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{11943}'),
+ ('\u{11950}', '\u{11959}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '\u{119d7}'),
+ ('\u{119da}', '𑧡'),
+ ('đ‘§Ŗ', '𑧤'),
+ ('𑨀', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('𑩐', '\u{11a99}'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '\u{11c36}'),
+ ('\u{11c38}', '𑱀'),
+ ('𑱐', '𑱙'),
+ ('𑱲', '𑲏'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('𑲩', '\u{11cb6}'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d47}'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', 'đ‘ļ˜'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('đ‘ģ ', 'đ‘ģļ'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒐀', '𒑮'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('𖩠', '𖩩'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('đ–Ŧ€', '\u{16b36}'),
+ ('𖭀', '𖭃'),
+ ('𖭐', '𖭙'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𖹀', 'đ–šŋ'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('\u{16f4f}', '𖾇'),
+ ('\u{16f8f}', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', '\u{16fe4}'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1d165}', '\u{1d169}'),
+ ('𝅭', '\u{1d172}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𝟎', 'đŸŋ'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('𞄀', 'đž„Ŧ'),
+ ('\u{1e130}', 'đž„Ŋ'),
+ ('𞅀', '𞅉'),
+ ('𞅎', '𞅎'),
+ ('𞋀', '𞋹'),
+ ('𞠀', 'đžŖ„'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('𞤀', 'đžĨ‹'),
+ ('đžĨ', 'đžĨ™'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('🄰', '🅉'),
+ ('🅐', '🅩'),
+ ('🅰', '🆉'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/property_bool.rs b/vendor/regex-syntax/src/unicode_tables/property_bool.rs
new file mode 100644
index 000000000..21cbaf9ae
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/property_bool.rs
@@ -0,0 +1,10953 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate property-bool ucd-13.0.0 --chars
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+ ("ASCII_Hex_Digit", ASCII_HEX_DIGIT),
+ ("Alphabetic", ALPHABETIC),
+ ("Bidi_Control", BIDI_CONTROL),
+ ("Bidi_Mirrored", BIDI_MIRRORED),
+ ("Case_Ignorable", CASE_IGNORABLE),
+ ("Cased", CASED),
+ ("Changes_When_Casefolded", CHANGES_WHEN_CASEFOLDED),
+ ("Changes_When_Casemapped", CHANGES_WHEN_CASEMAPPED),
+ ("Changes_When_Lowercased", CHANGES_WHEN_LOWERCASED),
+ ("Changes_When_Titlecased", CHANGES_WHEN_TITLECASED),
+ ("Changes_When_Uppercased", CHANGES_WHEN_UPPERCASED),
+ ("Dash", DASH),
+ ("Default_Ignorable_Code_Point", DEFAULT_IGNORABLE_CODE_POINT),
+ ("Deprecated", DEPRECATED),
+ ("Diacritic", DIACRITIC),
+ ("Emoji", EMOJI),
+ ("Emoji_Component", EMOJI_COMPONENT),
+ ("Emoji_Modifier", EMOJI_MODIFIER),
+ ("Emoji_Modifier_Base", EMOJI_MODIFIER_BASE),
+ ("Emoji_Presentation", EMOJI_PRESENTATION),
+ ("Extended_Pictographic", EXTENDED_PICTOGRAPHIC),
+ ("Extender", EXTENDER),
+ ("Grapheme_Base", GRAPHEME_BASE),
+ ("Grapheme_Extend", GRAPHEME_EXTEND),
+ ("Grapheme_Link", GRAPHEME_LINK),
+ ("Hex_Digit", HEX_DIGIT),
+ ("Hyphen", HYPHEN),
+ ("IDS_Binary_Operator", IDS_BINARY_OPERATOR),
+ ("IDS_Trinary_Operator", IDS_TRINARY_OPERATOR),
+ ("ID_Continue", ID_CONTINUE),
+ ("ID_Start", ID_START),
+ ("Ideographic", IDEOGRAPHIC),
+ ("Join_Control", JOIN_CONTROL),
+ ("Logical_Order_Exception", LOGICAL_ORDER_EXCEPTION),
+ ("Lowercase", LOWERCASE),
+ ("Math", MATH),
+ ("Noncharacter_Code_Point", NONCHARACTER_CODE_POINT),
+ ("Other_Alphabetic", OTHER_ALPHABETIC),
+ ("Other_Default_Ignorable_Code_Point", OTHER_DEFAULT_IGNORABLE_CODE_POINT),
+ ("Other_Grapheme_Extend", OTHER_GRAPHEME_EXTEND),
+ ("Other_ID_Continue", OTHER_ID_CONTINUE),
+ ("Other_ID_Start", OTHER_ID_START),
+ ("Other_Lowercase", OTHER_LOWERCASE),
+ ("Other_Math", OTHER_MATH),
+ ("Other_Uppercase", OTHER_UPPERCASE),
+ ("Pattern_Syntax", PATTERN_SYNTAX),
+ ("Pattern_White_Space", PATTERN_WHITE_SPACE),
+ ("Prepended_Concatenation_Mark", PREPENDED_CONCATENATION_MARK),
+ ("Quotation_Mark", QUOTATION_MARK),
+ ("Radical", RADICAL),
+ ("Regional_Indicator", REGIONAL_INDICATOR),
+ ("Sentence_Terminal", SENTENCE_TERMINAL),
+ ("Soft_Dotted", SOFT_DOTTED),
+ ("Terminal_Punctuation", TERMINAL_PUNCTUATION),
+ ("Unified_Ideograph", UNIFIED_IDEOGRAPH),
+ ("Uppercase", UPPERCASE),
+ ("Variation_Selector", VARIATION_SELECTOR),
+ ("White_Space", WHITE_SPACE),
+ ("XID_Continue", XID_CONTINUE),
+ ("XID_Start", XID_START),
+];
+
+pub const ASCII_HEX_DIGIT: &'static [(char, char)] =
+ &[('0', '9'), ('A', 'F'), ('a', 'f')];
+
+pub const ALPHABETIC: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ˁ'),
+ ('ˆ', 'ˑ'),
+ ('ˠ', 'ˤ'),
+ ('ËŦ', 'ËŦ'),
+ ('ËŽ', 'ËŽ'),
+ ('\u{345}', '\u{345}'),
+ ('Í°', 'Í´'),
+ ('Íļ', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', 'ՙ'),
+ ('ՠ', 'ֈ'),
+ ('\u{5b0}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('א', '×Ē'),
+ ('ׯ', 'ײ'),
+ ('\u{610}', '\u{61a}'),
+ ('Ø ', '\u{657}'),
+ ('\u{659}', '\u{65f}'),
+ ('ŲŽ', 'ۓ'),
+ ('ە', '\u{6dc}'),
+ ('\u{6e1}', '\u{6e8}'),
+ ('\u{6ed}', 'Û¯'),
+ ('Ûē', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', '\u{73f}'),
+ ('Ũ', 'Ūą'),
+ ('ߊ', 'ßĒ'),
+ ('ß´', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('ā €', '\u{817}'),
+ ('ā š', '\u{82c}'),
+ ('āĄ€', 'āĄ˜'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('\u{8d4}', '\u{8df}'),
+ ('\u{8e3}', '\u{8e9}'),
+ ('\u{8f0}', 'ā¤ģ'),
+ ('ā¤Ŋ', 'āĨŒ'),
+ ('āĨŽ', 'āĨ'),
+ ('\u{955}', '\u{963}'),
+ ('āĨą', 'āĻƒ'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('āĻŊ', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Œ'),
+ ('ā§Ž', 'ā§Ž'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', '\u{9e3}'),
+ ('ā§°', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('\u{a01}', 'ā¨ƒ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4c}'),
+ ('\u{a51}', '\u{a51}'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('\u{a70}', '\u{a75}'),
+ ('\u{a81}', 'āĒƒ'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('āĒŊ', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', 'āĢŒ'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', '\u{ae3}'),
+ ('āĢš', '\u{afc}'),
+ ('\u{b01}', 'āŦƒ'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('āŦŊ', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', 'ā­Œ'),
+ ('\u{b56}', '\u{b57}'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', '\u{b63}'),
+ ('ā­ą', 'ā­ą'),
+ ('\u{b82}', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', 'ā¯Œ'),
+ ('ā¯', 'ā¯'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('\u{c00}', 'ā°ƒ'),
+ ('ā°…', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4c}'),
+ ('\u{c55}', '\u{c56}'),
+ ('āą˜', 'āąš'),
+ ('āą ', '\u{c63}'),
+ ('ā˛€', 'ā˛ƒ'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('ā˛Ŋ', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccc}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', '\u{ce3}'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d00}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', 'ā´ē'),
+ ('ā´Ŋ', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩŒ'),
+ ('āĩŽ', 'āĩŽ'),
+ ('āĩ”', '\u{d57}'),
+ ('āĩŸ', '\u{d63}'),
+ ('āĩē', 'āĩŋ'),
+ ('\u{d81}', 'āļƒ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇ˛', 'āˇŗ'),
+ ('ā¸', '\u{e3a}'),
+ ('āš€', 'āš†'),
+ ('\u{e4d}', '\u{e4d}'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', '\u{eb9}'),
+ ('\u{ebb}', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('\u{ecd}', '\u{ecd}'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ€'),
+ ('āŊ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('\u{f71}', '\u{f81}'),
+ ('āžˆ', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('က', '\u{1036}'),
+ ('း', 'း'),
+ ('á€ģ', 'á€ŋ'),
+ ('ၐ', 'ႏ'),
+ ('ႚ', '\u{109d}'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('ᎀ', 'ᎏ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛮ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', '\u{1713}'),
+ ('ᜠ', '\u{1733}'),
+ ('ᝀ', '\u{1753}'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('\u{1772}', '\u{1773}'),
+ ('ក', 'ážŗ'),
+ ('ážļ', 'ៈ'),
+ ('ៗ', 'ៗ'),
+ ('ៜ', 'ៜ'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', 'ᤸ'),
+ ('áĨ', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('ᨀ', '\u{1a1b}'),
+ ('ᨠ', '\u{1a5e}'),
+ ('እ', '\u{1a74}'),
+ ('áĒ§', 'áĒ§'),
+ ('\u{1abf}', '\u{1ac0}'),
+ ('\u{1b00}', 'áŦŗ'),
+ ('\u{1b35}', 'ᭃ'),
+ ('ᭅ', 'ᭋ'),
+ ('\u{1b80}', '\u{1ba9}'),
+ ('\u{1bac}', 'Ꭿ'),
+ ('áŽē', 'á¯Ĩ'),
+ ('ᯧ', '\u{1bf1}'),
+ ('ᰀ', '\u{1c36}'),
+ ('ᱍ', 'ᱏ'),
+ ('ᱚ', 'áąŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('áŗŠ', 'áŗŦ'),
+ ('áŗŽ', 'áŗŗ'),
+ ('áŗĩ', 'áŗļ'),
+ ('áŗē', 'áŗē'),
+ ('ᴀ', 'áļŋ'),
+ ('\u{1de7}', '\u{1df4}'),
+ ('Ḁ', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℭ'),
+ ('ℯ', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'ↈ'),
+ ('â’ļ', 'ⓩ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗ¤'),
+ ('âŗĢ', 'âŗŽ'),
+ ('âŗ˛', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ¯'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('ⸯ', 'ⸯ'),
+ ('々', '〇'),
+ ('ã€Ą', '《'),
+ ('ã€ą', 'ã€ĩ'),
+ ('〸', 'ã€ŧ'),
+ ('ぁ', 'ゖ'),
+ ('ゝ', 'ゟ'),
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŧ', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ꀀ', 'ꒌ'),
+ ('ꓐ', 'ę“Ŋ'),
+ ('ꔀ', 'ꘌ'),
+ ('ꘐ', 'ꘟ'),
+ ('ę˜Ē', 'ę˜Ģ'),
+ ('Ꙁ', 'ꙮ'),
+ ('\u{a674}', '\u{a67b}'),
+ ('ę™ŋ', 'ę›¯'),
+ ('ꜗ', 'ꜟ'),
+ ('ęœĸ', 'ꞈ'),
+ ('Ꞌ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ꠅ'),
+ ('ꠇ', 'ꠧ'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ€', 'ęŖƒ'),
+ ('\u{a8c5}', '\u{a8c5}'),
+ ('ęŖ˛', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', '\u{a8ff}'),
+ ('ꤊ', '\u{a92a}'),
+ ('ꤰ', 'ęĨ’'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('\u{a980}', 'ęĻ˛'),
+ ('ęĻ´', 'ęĻŋ'),
+ ('ꧏ', 'ꧏ'),
+ ('ꧠ', 'ę§¯'),
+ ('ę§ē', '꧞'),
+ ('ꨀ', '\u{aa36}'),
+ ('ꩀ', 'ꩍ'),
+ ('ꊠ', 'ęŠļ'),
+ ('ęŠē', '\u{aabe}'),
+ ('ęĢ€', 'ęĢ€'),
+ ('ęĢ‚', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢ'),
+ ('ęĢ ', 'ęĢ¯'),
+ ('ęĢ˛', 'ęĢĩ'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab69}'),
+ ('ę­°', 'ę¯Ē'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŦ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇģ'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+ ('īŊĻ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐅀', '𐅴'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍊'),
+ ('𐍐', '\u{1037a}'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐏑', '𐏕'),
+ ('𐐀', '𐒝'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', 'đĢ¤'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('𐴀', '\u{10d27}'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', 'đŊ…'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀀', '\u{11045}'),
+ ('𑂂', '𑂸'),
+ ('𑃐', '𑃨'),
+ ('\u{11100}', '\u{11132}'),
+ ('𑅄', '\u{11147}'),
+ ('𑅐', '𑅲'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('\u{11180}', 'đ‘†ŋ'),
+ ('𑇁', '𑇄'),
+ ('\u{111ce}', '\u{111cf}'),
+ ('𑇚', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', '\u{11234}'),
+ ('\u{11237}', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '\u{112e8}'),
+ ('\u{11300}', '𑌃'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('đ‘ŒŊ', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍌'),
+ ('𑍐', '𑍐'),
+ ('\u{11357}', '\u{11357}'),
+ ('𑍝', 'đ‘Ŗ'),
+ ('𑐀', '𑑁'),
+ ('\u{11443}', '𑑅'),
+ ('𑑇', '𑑊'),
+ ('𑑟', '\u{11461}'),
+ ('𑒀', '𑓁'),
+ ('𑓄', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑖀', '\u{115b5}'),
+ ('𑖸', '𑖾'),
+ ('𑗘', '\u{115dd}'),
+ ('𑘀', '𑘾'),
+ ('\u{11640}', '\u{11640}'),
+ ('𑙄', '𑙄'),
+ ('𑚀', '\u{116b5}'),
+ ('𑚸', '𑚸'),
+ ('𑜀', '𑜚'),
+ ('\u{1171d}', '\u{1172a}'),
+ ('𑠀', '𑠸'),
+ ('đ‘ĸ ', 'đ‘ŖŸ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{1193c}'),
+ ('\u{1193f}', '\u{11942}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '\u{119d7}'),
+ ('\u{119da}', '𑧟'),
+ ('𑧡', '𑧡'),
+ ('đ‘§Ŗ', '𑧤'),
+ ('𑨀', '𑨲'),
+ ('\u{11a35}', '\u{11a3e}'),
+ ('𑩐', 'đ‘Ē—'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '\u{11c36}'),
+ ('\u{11c38}', '𑰾'),
+ ('𑱀', '𑱀'),
+ ('𑱲', '𑲏'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('𑲩', '\u{11cb6}'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d41}'),
+ ('\u{11d43}', '\u{11d43}'),
+ ('đ‘ĩ†', '\u{11d47}'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', 'đ‘ļ–'),
+ ('đ‘ļ˜', 'đ‘ļ˜'),
+ ('đ‘ģ ', 'đ‘ģļ'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒐀', '𒑮'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('đ–Ŧ€', 'đ–Ŧ¯'),
+ ('𖭀', '𖭃'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𖹀', 'đ–šŋ'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('\u{16f4f}', '𖾇'),
+ ('\u{16f8f}', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', 'đ–ŋŖ'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('\u{1bc9e}', '\u{1bc9e}'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('𞄀', 'đž„Ŧ'),
+ ('𞄷', 'đž„Ŋ'),
+ ('𞅎', '𞅎'),
+ ('𞋀', 'đž‹Ģ'),
+ ('𞠀', 'đžŖ„'),
+ ('𞤀', 'đžĨƒ'),
+ ('\u{1e947}', '\u{1e947}'),
+ ('đžĨ‹', 'đžĨ‹'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('🄰', '🅉'),
+ ('🅐', '🅩'),
+ ('🅰', '🆉'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const BIDI_CONTROL: &'static [(char, char)] = &[
+ ('\u{61c}', '\u{61c}'),
+ ('\u{200e}', '\u{200f}'),
+ ('\u{202a}', '\u{202e}'),
+ ('\u{2066}', '\u{2069}'),
+];
+
+pub const BIDI_MIRRORED: &'static [(char, char)] = &[
+ ('(', ')'),
+ ('<', '<'),
+ ('>', '>'),
+ ('[', '['),
+ (']', ']'),
+ ('{', '{'),
+ ('}', '}'),
+ ('ÂĢ', 'ÂĢ'),
+ ('Âģ', 'Âģ'),
+ ('āŧē', 'āŧŊ'),
+ ('᚛', '᚜'),
+ ('‹', 'â€ē'),
+ ('⁅', '⁆'),
+ ('âŊ', '⁞'),
+ ('₍', '₎'),
+ ('⅀', '⅀'),
+ ('∁', '∄'),
+ ('∈', '∍'),
+ ('∑', '∑'),
+ ('∕', '∖'),
+ ('√', '∝'),
+ ('∟', 'âˆĸ'),
+ ('∤', '∤'),
+ ('âˆĻ', 'âˆĻ'),
+ ('âˆĢ', 'âˆŗ'),
+ ('√', '√'),
+ ('âˆģ', '≌'),
+ ('≒', '≕'),
+ ('≟', '≠'),
+ ('â‰ĸ', 'â‰ĸ'),
+ ('≤', 'â‰Ģ'),
+ ('≮', '⊌'),
+ ('⊏', '⊒'),
+ ('⊘', '⊘'),
+ ('âŠĸ', 'âŠŖ'),
+ ('âŠĻ', '⊸'),
+ ('⊾', 'âŠŋ'),
+ ('⋉', '⋍'),
+ ('⋐', '⋑'),
+ ('⋖', '⋭'),
+ ('⋰', 'â‹ŋ'),
+ ('⌈', '⌋'),
+ ('⌠', '⌡'),
+ ('〈', 'âŒĒ'),
+ ('❨', 'âĩ'),
+ ('⟀', '⟀'),
+ ('⟃', '⟆'),
+ ('⟈', '⟉'),
+ ('⟋', '⟍'),
+ ('⟓', '⟖'),
+ ('⟜', '⟞'),
+ ('âŸĸ', '⟯'),
+ ('âĻƒ', 'âĻ˜'),
+ ('âĻ›', 'âĻ '),
+ ('âĻĸ', 'âĻ¯'),
+ ('âĻ¸', 'âĻ¸'),
+ ('⧀', '⧅'),
+ ('⧉', '⧉'),
+ ('⧎', '⧒'),
+ ('⧔', '⧕'),
+ ('⧘', '⧜'),
+ ('⧥', '⧥'),
+ ('â§Ŗ', 'â§Ĩ'),
+ ('⧨', '⧊'),
+ ('⧴', '⧚'),
+ ('â§ŧ', 'â§Ŋ'),
+ ('⨊', '⨜'),
+ ('⨞', '⨡'),
+ ('⨤', '⨤'),
+ ('â¨Ļ', 'â¨Ļ'),
+ ('⨊', '⨊'),
+ ('â¨Ģ', '⨎'),
+ ('⨴', 'â¨ĩ'),
+ ('â¨ŧ', '⨞'),
+ ('⩗', '⩘'),
+ ('⊤', 'âŠĨ'),
+ ('âŠĒ', '⊭'),
+ ('⊯', '⊰'),
+ ('âŠŗ', '⊴'),
+ ('⊚', 'âĒŖ'),
+ ('âĒĻ', 'âĒ­'),
+ ('âĒ¯', 'âĢ–'),
+ ('âĢœ', 'âĢœ'),
+ ('âĢž', 'âĢž'),
+ ('âĢĸ', 'âĢĻ'),
+ ('âĢŦ', 'âĢŽ'),
+ ('âĢŗ', 'âĢŗ'),
+ ('âĢˇ', 'âĢģ'),
+ ('âĢŊ', 'âĢŊ'),
+ ('⯞', '⯞'),
+ ('⸂', '⸅'),
+ ('⸉', '⸊'),
+ ('⸌', '⸍'),
+ ('⸜', '⸝'),
+ ('⸠', '⸊'),
+ ('〈', '】'),
+ ('〔', '〛'),
+ ('īš™', 'īšž'),
+ ('īš¤', 'īšĨ'),
+ ('īŧˆ', 'īŧ‰'),
+ ('īŧœ', 'īŧœ'),
+ ('īŧž', 'īŧž'),
+ ('īŧģ', 'īŧģ'),
+ ('īŧŊ', 'īŧŊ'),
+ ('īŊ›', 'īŊ›'),
+ ('īŊ', 'īŊ'),
+ ('īŊŸ', 'īŊ '),
+ ('īŊĸ', 'īŊŖ'),
+ ('𝛛', '𝛛'),
+ ('𝜕', '𝜕'),
+ ('𝝏', '𝝏'),
+ ('𝞉', '𝞉'),
+ ('𝟃', '𝟃'),
+];
+
+pub const CASE_IGNORABLE: &'static [(char, char)] = &[
+ ('\'', '\''),
+ ('.', '.'),
+ (':', ':'),
+ ('^', '^'),
+ ('`', '`'),
+ ('¨', '¨'),
+ ('\u{ad}', '\u{ad}'),
+ ('¯', '¯'),
+ ('´', '´'),
+ ('¡', '¸'),
+ ('Ę°', '\u{36f}'),
+ ('Í´', 'Íĩ'),
+ ('Íē', 'Íē'),
+ ('΄', '΅'),
+ ('·', '·'),
+ ('\u{483}', '\u{489}'),
+ ('ՙ', 'ՙ'),
+ ('՟', '՟'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('×´', '×´'),
+ ('\u{600}', '\u{605}'),
+ ('\u{610}', '\u{61a}'),
+ ('\u{61c}', '\u{61c}'),
+ ('Ų€', 'Ų€'),
+ ('\u{64b}', '\u{65f}'),
+ ('\u{670}', '\u{670}'),
+ ('\u{6d6}', '\u{6dd}'),
+ ('\u{6df}', '\u{6e8}'),
+ ('\u{6ea}', '\u{6ed}'),
+ ('\u{70f}', '\u{70f}'),
+ ('\u{711}', '\u{711}'),
+ ('\u{730}', '\u{74a}'),
+ ('\u{7a6}', '\u{7b0}'),
+ ('\u{7eb}', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('\u{816}', '\u{82d}'),
+ ('\u{859}', '\u{85b}'),
+ ('\u{8d3}', '\u{902}'),
+ ('\u{93a}', '\u{93a}'),
+ ('\u{93c}', '\u{93c}'),
+ ('\u{941}', '\u{948}'),
+ ('\u{94d}', '\u{94d}'),
+ ('\u{951}', '\u{957}'),
+ ('\u{962}', '\u{963}'),
+ ('āĨą', 'āĨą'),
+ ('\u{981}', '\u{981}'),
+ ('\u{9bc}', '\u{9bc}'),
+ ('\u{9c1}', '\u{9c4}'),
+ ('\u{9cd}', '\u{9cd}'),
+ ('\u{9e2}', '\u{9e3}'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', '\u{a02}'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('\u{a41}', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('\u{a70}', '\u{a71}'),
+ ('\u{a75}', '\u{a75}'),
+ ('\u{a81}', '\u{a82}'),
+ ('\u{abc}', '\u{abc}'),
+ ('\u{ac1}', '\u{ac5}'),
+ ('\u{ac7}', '\u{ac8}'),
+ ('\u{acd}', '\u{acd}'),
+ ('\u{ae2}', '\u{ae3}'),
+ ('\u{afa}', '\u{aff}'),
+ ('\u{b01}', '\u{b01}'),
+ ('\u{b3c}', '\u{b3c}'),
+ ('\u{b3f}', '\u{b3f}'),
+ ('\u{b41}', '\u{b44}'),
+ ('\u{b4d}', '\u{b4d}'),
+ ('\u{b55}', '\u{b56}'),
+ ('\u{b62}', '\u{b63}'),
+ ('\u{b82}', '\u{b82}'),
+ ('\u{bc0}', '\u{bc0}'),
+ ('\u{bcd}', '\u{bcd}'),
+ ('\u{c00}', '\u{c00}'),
+ ('\u{c04}', '\u{c04}'),
+ ('\u{c3e}', '\u{c40}'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('\u{c62}', '\u{c63}'),
+ ('\u{c81}', '\u{c81}'),
+ ('\u{cbc}', '\u{cbc}'),
+ ('\u{cbf}', '\u{cbf}'),
+ ('\u{cc6}', '\u{cc6}'),
+ ('\u{ccc}', '\u{ccd}'),
+ ('\u{ce2}', '\u{ce3}'),
+ ('\u{d00}', '\u{d01}'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('\u{d41}', '\u{d44}'),
+ ('\u{d4d}', '\u{d4d}'),
+ ('\u{d62}', '\u{d63}'),
+ ('\u{d81}', '\u{d81}'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dd2}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('\u{e31}', '\u{e31}'),
+ ('\u{e34}', '\u{e3a}'),
+ ('āš†', '\u{e4e}'),
+ ('\u{eb1}', '\u{eb1}'),
+ ('\u{eb4}', '\u{ebc}'),
+ ('āģ†', 'āģ†'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('\u{f18}', '\u{f19}'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('\u{f71}', '\u{f7e}'),
+ ('\u{f80}', '\u{f84}'),
+ ('\u{f86}', '\u{f87}'),
+ ('\u{f8d}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('\u{102d}', '\u{1030}'),
+ ('\u{1032}', '\u{1037}'),
+ ('\u{1039}', '\u{103a}'),
+ ('\u{103d}', '\u{103e}'),
+ ('\u{1058}', '\u{1059}'),
+ ('\u{105e}', '\u{1060}'),
+ ('\u{1071}', '\u{1074}'),
+ ('\u{1082}', '\u{1082}'),
+ ('\u{1085}', '\u{1086}'),
+ ('\u{108d}', '\u{108d}'),
+ ('\u{109d}', '\u{109d}'),
+ ('áƒŧ', 'áƒŧ'),
+ ('\u{135d}', '\u{135f}'),
+ ('\u{1712}', '\u{1714}'),
+ ('\u{1732}', '\u{1734}'),
+ ('\u{1752}', '\u{1753}'),
+ ('\u{1772}', '\u{1773}'),
+ ('\u{17b4}', '\u{17b5}'),
+ ('\u{17b7}', '\u{17bd}'),
+ ('\u{17c6}', '\u{17c6}'),
+ ('\u{17c9}', '\u{17d3}'),
+ ('ៗ', 'ៗ'),
+ ('\u{17dd}', '\u{17dd}'),
+ ('\u{180b}', '\u{180e}'),
+ ('᥃', '᥃'),
+ ('\u{1885}', '\u{1886}'),
+ ('\u{18a9}', '\u{18a9}'),
+ ('\u{1920}', '\u{1922}'),
+ ('\u{1927}', '\u{1928}'),
+ ('\u{1932}', '\u{1932}'),
+ ('\u{1939}', '\u{193b}'),
+ ('\u{1a17}', '\u{1a18}'),
+ ('\u{1a1b}', '\u{1a1b}'),
+ ('\u{1a56}', '\u{1a56}'),
+ ('\u{1a58}', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a60}'),
+ ('\u{1a62}', '\u{1a62}'),
+ ('\u{1a65}', '\u{1a6c}'),
+ ('\u{1a73}', '\u{1a7c}'),
+ ('\u{1a7f}', '\u{1a7f}'),
+ ('áĒ§', 'áĒ§'),
+ ('\u{1ab0}', '\u{1ac0}'),
+ ('\u{1b00}', '\u{1b03}'),
+ ('\u{1b34}', '\u{1b34}'),
+ ('\u{1b36}', '\u{1b3a}'),
+ ('\u{1b3c}', '\u{1b3c}'),
+ ('\u{1b42}', '\u{1b42}'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', '\u{1b81}'),
+ ('\u{1ba2}', '\u{1ba5}'),
+ ('\u{1ba8}', '\u{1ba9}'),
+ ('\u{1bab}', '\u{1bad}'),
+ ('\u{1be6}', '\u{1be6}'),
+ ('\u{1be8}', '\u{1be9}'),
+ ('\u{1bed}', '\u{1bed}'),
+ ('\u{1bef}', '\u{1bf1}'),
+ ('\u{1c2c}', '\u{1c33}'),
+ ('\u{1c36}', '\u{1c37}'),
+ ('Ṹ', 'áąŊ'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', '\u{1ce0}'),
+ ('\u{1ce2}', '\u{1ce8}'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('\u{1cf8}', '\u{1cf9}'),
+ ('á´Ŧ', 'áĩĒ'),
+ ('áĩ¸', 'áĩ¸'),
+ ('áļ›', '\u{1df9}'),
+ ('\u{1dfb}', '\u{1dff}'),
+ ('ážŊ', 'ážŊ'),
+ ('ážŋ', 'áŋ'),
+ ('áŋ', 'áŋ'),
+ ('áŋ', 'áŋŸ'),
+ ('áŋ­', 'áŋ¯'),
+ ('áŋŊ', 'áŋž'),
+ ('\u{200b}', '\u{200f}'),
+ ('‘', '’'),
+ ('․', '․'),
+ ('‧', '‧'),
+ ('\u{202a}', '\u{202e}'),
+ ('\u{2060}', '\u{2064}'),
+ ('\u{2066}', '\u{206f}'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('\u{20d0}', '\u{20f0}'),
+ ('âąŧ', 'âąŊ'),
+ ('\u{2cef}', '\u{2cf1}'),
+ ('âĩ¯', 'âĩ¯'),
+ ('\u{2d7f}', '\u{2d7f}'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('ⸯ', 'ⸯ'),
+ ('々', '々'),
+ ('\u{302a}', '\u{302d}'),
+ ('ã€ą', 'ã€ĩ'),
+ ('ã€ģ', 'ã€ģ'),
+ ('\u{3099}', 'ゞ'),
+ ('ãƒŧ', 'マ'),
+ ('ꀕ', 'ꀕ'),
+ ('ꓸ', 'ę“Ŋ'),
+ ('ꘌ', 'ꘌ'),
+ ('\u{a66f}', '\u{a672}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('ę™ŋ', 'ę™ŋ'),
+ ('ꚜ', '\u{a69f}'),
+ ('\u{a6f0}', '\u{a6f1}'),
+ ('꜀', '꜡'),
+ ('ꝰ', 'ꝰ'),
+ ('ꞈ', '꞊'),
+ ('ꟸ', 'ꟹ'),
+ ('\u{a802}', '\u{a802}'),
+ ('\u{a806}', '\u{a806}'),
+ ('\u{a80b}', '\u{a80b}'),
+ ('\u{a825}', '\u{a826}'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('\u{a8c4}', '\u{a8c5}'),
+ ('\u{a8e0}', '\u{a8f1}'),
+ ('\u{a8ff}', '\u{a8ff}'),
+ ('\u{a926}', '\u{a92d}'),
+ ('\u{a947}', '\u{a951}'),
+ ('\u{a980}', '\u{a982}'),
+ ('\u{a9b3}', '\u{a9b3}'),
+ ('\u{a9b6}', '\u{a9b9}'),
+ ('\u{a9bc}', '\u{a9bd}'),
+ ('ꧏ', 'ꧏ'),
+ ('\u{a9e5}', 'ę§Ļ'),
+ ('\u{aa29}', '\u{aa2e}'),
+ ('\u{aa31}', '\u{aa32}'),
+ ('\u{aa35}', '\u{aa36}'),
+ ('\u{aa43}', '\u{aa43}'),
+ ('\u{aa4c}', '\u{aa4c}'),
+ ('ꊰ', 'ꊰ'),
+ ('\u{aa7c}', '\u{aa7c}'),
+ ('\u{aab0}', '\u{aab0}'),
+ ('\u{aab2}', '\u{aab4}'),
+ ('\u{aab7}', '\u{aab8}'),
+ ('\u{aabe}', '\u{aabf}'),
+ ('\u{aac1}', '\u{aac1}'),
+ ('ęĢ', 'ęĢ'),
+ ('\u{aaec}', '\u{aaed}'),
+ ('ęĢŗ', 'ęĢ´'),
+ ('\u{aaf6}', '\u{aaf6}'),
+ ('꭛', 'ꭟ'),
+ ('\u{ab69}', '\u{ab6b}'),
+ ('\u{abe5}', '\u{abe5}'),
+ ('\u{abe8}', '\u{abe8}'),
+ ('\u{abed}', '\u{abed}'),
+ ('\u{fb1e}', '\u{fb1e}'),
+ ('īŽ˛', 'ī¯'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('ī¸“', 'ī¸“'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('īš’', 'īš’'),
+ ('īš•', 'īš•'),
+ ('\u{feff}', '\u{feff}'),
+ ('īŧ‡', 'īŧ‡'),
+ ('īŧŽ', 'īŧŽ'),
+ ('īŧš', 'īŧš'),
+ ('īŧž', 'īŧž'),
+ ('īŊ€', 'īŊ€'),
+ ('īŊ°', 'īŊ°'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('īŋŖ', 'īŋŖ'),
+ ('\u{fff9}', '\u{fffb}'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('\u{10376}', '\u{1037a}'),
+ ('\u{10a01}', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '\u{10a0f}'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('\u{10ae5}', '\u{10ae6}'),
+ ('\u{10d24}', '\u{10d27}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10f46}', '\u{10f50}'),
+ ('\u{11001}', '\u{11001}'),
+ ('\u{11038}', '\u{11046}'),
+ ('\u{1107f}', '\u{11081}'),
+ ('\u{110b3}', '\u{110b6}'),
+ ('\u{110b9}', '\u{110ba}'),
+ ('\u{110bd}', '\u{110bd}'),
+ ('\u{110cd}', '\u{110cd}'),
+ ('\u{11100}', '\u{11102}'),
+ ('\u{11127}', '\u{1112b}'),
+ ('\u{1112d}', '\u{11134}'),
+ ('\u{11173}', '\u{11173}'),
+ ('\u{11180}', '\u{11181}'),
+ ('\u{111b6}', '\u{111be}'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111cf}', '\u{111cf}'),
+ ('\u{1122f}', '\u{11231}'),
+ ('\u{11234}', '\u{11234}'),
+ ('\u{11236}', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('\u{112df}', '\u{112df}'),
+ ('\u{112e3}', '\u{112ea}'),
+ ('\u{11300}', '\u{11301}'),
+ ('\u{1133b}', '\u{1133c}'),
+ ('\u{11340}', '\u{11340}'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('\u{11438}', '\u{1143f}'),
+ ('\u{11442}', '\u{11444}'),
+ ('\u{11446}', '\u{11446}'),
+ ('\u{1145e}', '\u{1145e}'),
+ ('\u{114b3}', '\u{114b8}'),
+ ('\u{114ba}', '\u{114ba}'),
+ ('\u{114bf}', '\u{114c0}'),
+ ('\u{114c2}', '\u{114c3}'),
+ ('\u{115b2}', '\u{115b5}'),
+ ('\u{115bc}', '\u{115bd}'),
+ ('\u{115bf}', '\u{115c0}'),
+ ('\u{115dc}', '\u{115dd}'),
+ ('\u{11633}', '\u{1163a}'),
+ ('\u{1163d}', '\u{1163d}'),
+ ('\u{1163f}', '\u{11640}'),
+ ('\u{116ab}', '\u{116ab}'),
+ ('\u{116ad}', '\u{116ad}'),
+ ('\u{116b0}', '\u{116b5}'),
+ ('\u{116b7}', '\u{116b7}'),
+ ('\u{1171d}', '\u{1171f}'),
+ ('\u{11722}', '\u{11725}'),
+ ('\u{11727}', '\u{1172b}'),
+ ('\u{1182f}', '\u{11837}'),
+ ('\u{11839}', '\u{1183a}'),
+ ('\u{1193b}', '\u{1193c}'),
+ ('\u{1193e}', '\u{1193e}'),
+ ('\u{11943}', '\u{11943}'),
+ ('\u{119d4}', '\u{119d7}'),
+ ('\u{119da}', '\u{119db}'),
+ ('\u{119e0}', '\u{119e0}'),
+ ('\u{11a01}', '\u{11a0a}'),
+ ('\u{11a33}', '\u{11a38}'),
+ ('\u{11a3b}', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('\u{11a51}', '\u{11a56}'),
+ ('\u{11a59}', '\u{11a5b}'),
+ ('\u{11a8a}', '\u{11a96}'),
+ ('\u{11a98}', '\u{11a99}'),
+ ('\u{11c30}', '\u{11c36}'),
+ ('\u{11c38}', '\u{11c3d}'),
+ ('\u{11c3f}', '\u{11c3f}'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('\u{11caa}', '\u{11cb0}'),
+ ('\u{11cb2}', '\u{11cb3}'),
+ ('\u{11cb5}', '\u{11cb6}'),
+ ('\u{11d31}', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d45}'),
+ ('\u{11d47}', '\u{11d47}'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('\u{11d95}', '\u{11d95}'),
+ ('\u{11d97}', '\u{11d97}'),
+ ('\u{11ef3}', '\u{11ef4}'),
+ ('\u{13430}', '\u{13438}'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('\u{16b30}', '\u{16b36}'),
+ ('𖭀', '𖭃'),
+ ('\u{16f4f}', '\u{16f4f}'),
+ ('\u{16f8f}', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', '\u{16fe4}'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1bca0}', '\u{1bca3}'),
+ ('\u{1d167}', '\u{1d169}'),
+ ('\u{1d173}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('\u{1e130}', 'đž„Ŋ'),
+ ('\u{1e2ec}', '\u{1e2ef}'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('\u{1e944}', 'đžĨ‹'),
+ ('đŸģ', 'đŸŋ'),
+ ('\u{e0001}', '\u{e0001}'),
+ ('\u{e0020}', '\u{e007f}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const CASED: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'Æē'),
+ ('Æŧ', 'Æŋ'),
+ ('Į„', 'ʓ'),
+ ('ʕ', 'ʸ'),
+ ('ˀ', 'ˁ'),
+ ('ˠ', 'ˤ'),
+ ('\u{345}', '\u{345}'),
+ ('Í°', 'Íŗ'),
+ ('Íļ', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՠ', 'ֈ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŊ', 'áƒŋ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('ᴀ', 'áļŋ'),
+ ('Ḁ', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℭ'),
+ ('ℯ', 'ℴ'),
+ ('ℹ', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'â…ŋ'),
+ ('Ↄ', 'ↄ'),
+ ('â’ļ', 'ⓩ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗ¤'),
+ ('âŗĢ', 'âŗŽ'),
+ ('âŗ˛', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('Ꙁ', 'ꙭ'),
+ ('Ꚁ', 'ꚝ'),
+ ('ęœĸ', 'ꞇ'),
+ ('Ꞌ', 'ꞎ'),
+ ('Ꞑ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', '\u{a7f6}'),
+ ('ꟸ', 'ęŸē'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab68}'),
+ ('ę­°', 'ęŽŋ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+ ('𐐀', '𐑏'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('đ‘ĸ ', 'đ‘ŖŸ'),
+ ('𖹀', 'đ–šŋ'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𞤀', 'đžĨƒ'),
+ ('🄰', '🅉'),
+ ('🅐', '🅩'),
+ ('🅰', '🆉'),
+];
+
+pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('Âĩ', 'Âĩ'),
+ ('À', 'Ö'),
+ ('Ø', 'ß'),
+ ('Ā', 'Ā'),
+ ('Ă', 'Ă'),
+ ('Ą', 'Ą'),
+ ('Ć', 'Ć'),
+ ('Ĉ', 'Ĉ'),
+ ('Ċ', 'Ċ'),
+ ('Č', 'Č'),
+ ('Ď', 'Ď'),
+ ('Đ', 'Đ'),
+ ('Ē', 'Ē'),
+ ('Ĕ', 'Ĕ'),
+ ('Ė', 'Ė'),
+ ('Ę', 'Ę'),
+ ('Ě', 'Ě'),
+ ('Ĝ', 'Ĝ'),
+ ('Ğ', 'Ğ'),
+ ('Ä ', 'Ä '),
+ ('Äĸ', 'Äĸ'),
+ ('Ĥ', 'Ĥ'),
+ ('ÄĻ', 'ÄĻ'),
+ ('Ĩ', 'Ĩ'),
+ ('ÄĒ', 'ÄĒ'),
+ ('ÄŦ', 'ÄŦ'),
+ ('ÄŽ', 'ÄŽ'),
+ ('Ä°', 'Ä°'),
+ ('IJ', 'IJ'),
+ ('Ä´', 'Ä´'),
+ ('Äļ', 'Äļ'),
+ ('Äš', 'Äš'),
+ ('Äģ', 'Äģ'),
+ ('ÄŊ', 'ÄŊ'),
+ ('Äŋ', 'Äŋ'),
+ ('Ł', 'Ł'),
+ ('Ń', 'Ń'),
+ ('Ņ', 'Ņ'),
+ ('Ň', 'Ň'),
+ ('ʼn', 'Ŋ'),
+ ('Ō', 'Ō'),
+ ('Ŏ', 'Ŏ'),
+ ('Ő', 'Ő'),
+ ('Œ', 'Œ'),
+ ('Ŕ', 'Ŕ'),
+ ('Ŗ', 'Ŗ'),
+ ('Ř', 'Ř'),
+ ('Ś', 'Ś'),
+ ('Ŝ', 'Ŝ'),
+ ('Ş', 'Ş'),
+ ('Å ', 'Å '),
+ ('Åĸ', 'Åĸ'),
+ ('Ť', 'Ť'),
+ ('ÅĻ', 'ÅĻ'),
+ ('Ũ', 'Ũ'),
+ ('ÅĒ', 'ÅĒ'),
+ ('ÅŦ', 'ÅŦ'),
+ ('ÅŽ', 'ÅŽ'),
+ ('Å°', 'Å°'),
+ ('Å˛', 'Å˛'),
+ ('Å´', 'Å´'),
+ ('Åļ', 'Åļ'),
+ ('Ÿ', 'Ś'),
+ ('Åģ', 'Åģ'),
+ ('ÅŊ', 'ÅŊ'),
+ ('Åŋ', 'Åŋ'),
+ ('Ɓ', 'Ƃ'),
+ ('Ƅ', 'Ƅ'),
+ ('Ɔ', 'Ƈ'),
+ ('Ɖ', 'Ƌ'),
+ ('Ǝ', 'Ƒ'),
+ ('Ɠ', 'Ɣ'),
+ ('Ɩ', 'Ƙ'),
+ ('Ɯ', 'Ɲ'),
+ ('Ɵ', 'Ơ'),
+ ('Æĸ', 'Æĸ'),
+ ('Ƥ', 'Ƥ'),
+ ('ÆĻ', 'Ƨ'),
+ ('ÆŠ', 'ÆŠ'),
+ ('ÆŦ', 'ÆŦ'),
+ ('Ǝ', 'Ư'),
+ ('Æą', 'Æŗ'),
+ ('Æĩ', 'Æĩ'),
+ ('Æˇ', 'Ƹ'),
+ ('Æŧ', 'Æŧ'),
+ ('Į„', 'Į…'),
+ ('Į‡', 'Įˆ'),
+ ('ĮŠ', 'Į‹'),
+ ('Į', 'Į'),
+ ('Į', 'Į'),
+ ('Į‘', 'Į‘'),
+ ('Į“', 'Į“'),
+ ('Į•', 'Į•'),
+ ('Į—', 'Į—'),
+ ('Į™', 'Į™'),
+ ('Į›', 'Į›'),
+ ('Įž', 'Įž'),
+ ('Į ', 'Į '),
+ ('Įĸ', 'Įĸ'),
+ ('Į¤', 'Į¤'),
+ ('ĮĻ', 'ĮĻ'),
+ ('Į¨', 'Į¨'),
+ ('ĮĒ', 'ĮĒ'),
+ ('ĮŦ', 'ĮŦ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Įą', 'Į˛'),
+ ('Į´', 'Į´'),
+ ('Įļ', 'Į¸'),
+ ('Įē', 'Įē'),
+ ('Įŧ', 'Įŧ'),
+ ('Įž', 'Įž'),
+ ('Ȁ', 'Ȁ'),
+ ('Ȃ', 'Ȃ'),
+ ('Ȅ', 'Ȅ'),
+ ('Ȇ', 'Ȇ'),
+ ('Ȉ', 'Ȉ'),
+ ('Ȋ', 'Ȋ'),
+ ('Ȍ', 'Ȍ'),
+ ('Ȏ', 'Ȏ'),
+ ('Ȑ', 'Ȑ'),
+ ('Ȓ', 'Ȓ'),
+ ('Ȕ', 'Ȕ'),
+ ('Ȗ', 'Ȗ'),
+ ('Ș', 'Ș'),
+ ('Ț', 'Ț'),
+ ('Ȝ', 'Ȝ'),
+ ('Ȟ', 'Ȟ'),
+ ('Č ', 'Č '),
+ ('Čĸ', 'Čĸ'),
+ ('Ȥ', 'Ȥ'),
+ ('ČĻ', 'ČĻ'),
+ ('Ȩ', 'Ȩ'),
+ ('ČĒ', 'ČĒ'),
+ ('ČŦ', 'ČŦ'),
+ ('ČŽ', 'ČŽ'),
+ ('Č°', 'Č°'),
+ ('Ȳ', 'Ȳ'),
+ ('Čē', 'Čģ'),
+ ('ČŊ', 'Čž'),
+ ('Ɂ', 'Ɂ'),
+ ('Ƀ', 'Ɇ'),
+ ('Ɉ', 'Ɉ'),
+ ('Ɋ', 'Ɋ'),
+ ('Ɍ', 'Ɍ'),
+ ('Ɏ', 'Ɏ'),
+ ('\u{345}', '\u{345}'),
+ ('Í°', 'Í°'),
+ ('Ͳ', 'Ͳ'),
+ ('Íļ', 'Íļ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ώ'),
+ ('Α', 'Ρ'),
+ ('ÎŖ', 'ÎĢ'),
+ ('Ī‚', 'Ī‚'),
+ ('Ī', 'Ī‘'),
+ ('Ī•', 'Ī–'),
+ ('Ī˜', 'Ī˜'),
+ ('Īš', 'Īš'),
+ ('Īœ', 'Īœ'),
+ ('Īž', 'Īž'),
+ ('Ī ', 'Ī '),
+ ('Īĸ', 'Īĸ'),
+ ('Ī¤', 'Ī¤'),
+ ('ĪĻ', 'ĪĻ'),
+ ('Ī¨', 'Ī¨'),
+ ('ĪĒ', 'ĪĒ'),
+ ('ĪŦ', 'ĪŦ'),
+ ('ĪŽ', 'ĪŽ'),
+ ('Ī°', 'Īą'),
+ ('Ī´', 'Īĩ'),
+ ('Īˇ', 'Īˇ'),
+ ('Īš', 'Īē'),
+ ('ĪŊ', 'Đ¯'),
+ ('Ņ ', 'Ņ '),
+ ('Ņĸ', 'Ņĸ'),
+ ('Ņ¤', 'Ņ¤'),
+ ('ŅĻ', 'ŅĻ'),
+ ('Ņ¨', 'Ņ¨'),
+ ('ŅĒ', 'ŅĒ'),
+ ('ŅŦ', 'ŅŦ'),
+ ('ŅŽ', 'ŅŽ'),
+ ('Ņ°', 'Ņ°'),
+ ('Ņ˛', 'Ņ˛'),
+ ('Ņ´', 'Ņ´'),
+ ('Ņļ', 'Ņļ'),
+ ('Ņ¸', 'Ņ¸'),
+ ('Ņē', 'Ņē'),
+ ('Ņŧ', 'Ņŧ'),
+ ('Ņž', 'Ņž'),
+ ('Ō€', 'Ō€'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌŒ', 'ŌŒ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō', 'Ō'),
+ ('Ō’', 'Ō’'),
+ ('Ō”', 'Ō”'),
+ ('Ō–', 'Ō–'),
+ ('Ō˜', 'Ō˜'),
+ ('Ōš', 'Ōš'),
+ ('Ōœ', 'Ōœ'),
+ ('Ōž', 'Ōž'),
+ ('Ō ', 'Ō '),
+ ('Ōĸ', 'Ōĸ'),
+ ('Ō¤', 'Ō¤'),
+ ('ŌĻ', 'ŌĻ'),
+ ('Ō¨', 'Ō¨'),
+ ('ŌĒ', 'ŌĒ'),
+ ('ŌŦ', 'ŌŦ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō°', 'Ō°'),
+ ('Ō˛', 'Ō˛'),
+ ('Ō´', 'Ō´'),
+ ('Ōļ', 'Ōļ'),
+ ('Ō¸', 'Ō¸'),
+ ('Ōē', 'Ōē'),
+ ('Ōŧ', 'Ōŧ'),
+ ('Ōž', 'Ōž'),
+ ('Ķ€', 'Ķ'),
+ ('Ķƒ', 'Ķƒ'),
+ ('Ķ…', 'Ķ…'),
+ ('Ķ‡', 'Ķ‡'),
+ ('Ķ‰', 'Ķ‰'),
+ ('Ķ‹', 'Ķ‹'),
+ ('Ķ', 'Ķ'),
+ ('Ķ', 'Ķ'),
+ ('Ķ’', 'Ķ’'),
+ ('Ķ”', 'Ķ”'),
+ ('Ķ–', 'Ķ–'),
+ ('Ķ˜', 'Ķ˜'),
+ ('Ķš', 'Ķš'),
+ ('Ķœ', 'Ķœ'),
+ ('Ķž', 'Ķž'),
+ ('Ķ ', 'Ķ '),
+ ('Ķĸ', 'Ķĸ'),
+ ('Ķ¤', 'Ķ¤'),
+ ('ĶĻ', 'ĶĻ'),
+ ('Ķ¨', 'Ķ¨'),
+ ('ĶĒ', 'ĶĒ'),
+ ('ĶŦ', 'ĶŦ'),
+ ('ĶŽ', 'ĶŽ'),
+ ('Ķ°', 'Ķ°'),
+ ('Ķ˛', 'Ķ˛'),
+ ('Ķ´', 'Ķ´'),
+ ('Ķļ', 'Ķļ'),
+ ('Ķ¸', 'Ķ¸'),
+ ('Ķē', 'Ķē'),
+ ('Ķŧ', 'Ķŧ'),
+ ('Ķž', 'Ķž'),
+ ('Ԁ', 'Ԁ'),
+ ('Ԃ', 'Ԃ'),
+ ('Ԅ', 'Ԅ'),
+ ('Ԇ', 'Ԇ'),
+ ('Ԉ', 'Ԉ'),
+ ('Ԋ', 'Ԋ'),
+ ('Ԍ', 'Ԍ'),
+ ('Ԏ', 'Ԏ'),
+ ('Ԑ', 'Ԑ'),
+ ('Ԓ', 'Ԓ'),
+ ('Ԕ', 'Ԕ'),
+ ('Ԗ', 'Ԗ'),
+ ('Ԙ', 'Ԙ'),
+ ('Ԛ', 'Ԛ'),
+ ('Ԝ', 'Ԝ'),
+ ('Ԟ', 'Ԟ'),
+ ('Ô ', 'Ô '),
+ ('Ôĸ', 'Ôĸ'),
+ ('Ô¤', 'Ô¤'),
+ ('ÔĻ', 'ÔĻ'),
+ ('Ô¨', 'Ô¨'),
+ ('ÔĒ', 'ÔĒ'),
+ ('ÔŦ', 'ÔŦ'),
+ ('ÔŽ', 'ÔŽ'),
+ ('Ôą', 'Ֆ'),
+ ('և', 'և'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ᏸ', 'áŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('Ḁ', 'Ḁ'),
+ ('Ḃ', 'Ḃ'),
+ ('Ḅ', 'Ḅ'),
+ ('Ḇ', 'Ḇ'),
+ ('Ḉ', 'Ḉ'),
+ ('Ḋ', 'Ḋ'),
+ ('Ḍ', 'Ḍ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḑ', 'Ḑ'),
+ ('Ḓ', 'Ḓ'),
+ ('Ḕ', 'Ḕ'),
+ ('Ḗ', 'Ḗ'),
+ ('Ḙ', 'Ḙ'),
+ ('Ḛ', 'Ḛ'),
+ ('Ḝ', 'Ḝ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ḡ', 'Ḡ'),
+ ('á¸ĸ', 'á¸ĸ'),
+ ('Ḥ', 'Ḥ'),
+ ('á¸Ļ', 'á¸Ļ'),
+ ('Ḩ', 'Ḩ'),
+ ('á¸Ē', 'á¸Ē'),
+ ('á¸Ŧ', 'á¸Ŧ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḱ', 'Ḱ'),
+ ('Ḳ', 'Ḳ'),
+ ('Ḵ', 'Ḵ'),
+ ('á¸ļ', 'á¸ļ'),
+ ('Ḹ', 'Ḹ'),
+ ('á¸ē', 'á¸ē'),
+ ('á¸ŧ', 'á¸ŧ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ṁ', 'Ṁ'),
+ ('Ṃ', 'Ṃ'),
+ ('Ṅ', 'Ṅ'),
+ ('Ṇ', 'Ṇ'),
+ ('ᚈ', 'ᚈ'),
+ ('Ṋ', 'Ṋ'),
+ ('Ṍ', 'Ṍ'),
+ ('ᚎ', 'ᚎ'),
+ ('ᚐ', 'ᚐ'),
+ ('Ṓ', 'Ṓ'),
+ ('Ṕ', 'Ṕ'),
+ ('Ṗ', 'Ṗ'),
+ ('ᚘ', 'ᚘ'),
+ ('Ṛ', 'Ṛ'),
+ ('Ṝ', 'Ṝ'),
+ ('᚞', '᚞'),
+ ('áš ', 'áš '),
+ ('ášĸ', 'ášĸ'),
+ ('ᚤ', 'ᚤ'),
+ ('ášĻ', 'ášĻ'),
+ ('ᚨ', 'ᚨ'),
+ ('ášĒ', 'ášĒ'),
+ ('ášŦ', 'ášŦ'),
+ ('ᚎ', 'ᚎ'),
+ ('áš°', 'áš°'),
+ ('ᚲ', 'ᚲ'),
+ ('áš´', 'áš´'),
+ ('ášļ', 'ášļ'),
+ ('ᚸ', 'ᚸ'),
+ ('ášē', 'ášē'),
+ ('ášŧ', 'ášŧ'),
+ ('ášž', 'ášž'),
+ ('áē€', 'áē€'),
+ ('áē‚', 'áē‚'),
+ ('áē„', 'áē„'),
+ ('áē†', 'áē†'),
+ ('áēˆ', 'áēˆ'),
+ ('áēŠ', 'áēŠ'),
+ ('áēŒ', 'áēŒ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē', 'áē'),
+ ('áē’', 'áē’'),
+ ('áē”', 'áē”'),
+ ('áēš', 'áē›'),
+ ('áēž', 'áēž'),
+ ('áē ', 'áē '),
+ ('áēĸ', 'áēĸ'),
+ ('áē¤', 'áē¤'),
+ ('áēĻ', 'áēĻ'),
+ ('áē¨', 'áē¨'),
+ ('áēĒ', 'áēĒ'),
+ ('áēŦ', 'áēŦ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē°', 'áē°'),
+ ('áē˛', 'áē˛'),
+ ('áē´', 'áē´'),
+ ('áēļ', 'áēļ'),
+ ('áē¸', 'áē¸'),
+ ('áēē', 'áēē'),
+ ('áēŧ', 'áēŧ'),
+ ('áēž', 'áēž'),
+ ('áģ€', 'áģ€'),
+ ('áģ‚', 'áģ‚'),
+ ('áģ„', 'áģ„'),
+ ('áģ†', 'áģ†'),
+ ('áģˆ', 'áģˆ'),
+ ('áģŠ', 'áģŠ'),
+ ('áģŒ', 'áģŒ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ', 'áģ'),
+ ('áģ’', 'áģ’'),
+ ('áģ”', 'áģ”'),
+ ('áģ–', 'áģ–'),
+ ('áģ˜', 'áģ˜'),
+ ('áģš', 'áģš'),
+ ('áģœ', 'áģœ'),
+ ('áģž', 'áģž'),
+ ('áģ ', 'áģ '),
+ ('áģĸ', 'áģĸ'),
+ ('áģ¤', 'áģ¤'),
+ ('áģĻ', 'áģĻ'),
+ ('áģ¨', 'áģ¨'),
+ ('áģĒ', 'áģĒ'),
+ ('áģŦ', 'áģŦ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ°', 'áģ°'),
+ ('áģ˛', 'áģ˛'),
+ ('áģ´', 'áģ´'),
+ ('áģļ', 'áģļ'),
+ ('áģ¸', 'áģ¸'),
+ ('áģē', 'áģē'),
+ ('áģŧ', 'áģŧ'),
+ ('áģž', 'áģž'),
+ ('áŧˆ', 'áŧ'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ¨', 'áŧ¯'),
+ ('áŧ¸', 'áŧŋ'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŸ'),
+ ('áŊ¨', 'áŊ¯'),
+ ('ᾀ', 'ឯ'),
+ ('ឲ', '឴'),
+ ('ឡ', 'ážŧ'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ‡', 'áŋŒ'),
+ ('áŋ˜', 'áŋ›'),
+ ('áŋ¨', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋˇ', 'áŋŧ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('â„Ē', 'â„Ģ'),
+ ('Ⅎ', 'Ⅎ'),
+ ('Ⅰ', 'Ⅿ'),
+ ('Ↄ', 'Ↄ'),
+ ('â’ļ', 'Ⓩ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('âą ', 'âą '),
+ ('âąĸ', '⹤'),
+ ('⹧', '⹧'),
+ ('⹊', '⹊'),
+ ('âąĢ', 'âąĢ'),
+ ('âą­', 'âą°'),
+ ('⹲', '⹲'),
+ ('âąĩ', 'âąĩ'),
+ ('Ȿ', 'Ⲁ'),
+ ('Ⲃ', 'Ⲃ'),
+ ('Ⲅ', 'Ⲅ'),
+ ('Ⲇ', 'Ⲇ'),
+ ('Ⲉ', 'Ⲉ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('Ⲍ', 'Ⲍ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲑ', 'Ⲑ'),
+ ('Ⲓ', 'Ⲓ'),
+ ('Ⲕ', 'Ⲕ'),
+ ('Ⲗ', 'Ⲗ'),
+ ('Ⲙ', 'Ⲙ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('Ⲝ', 'Ⲝ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('Ⲡ', 'Ⲡ'),
+ ('â˛ĸ', 'â˛ĸ'),
+ ('Ⲥ', 'Ⲥ'),
+ ('â˛Ļ', 'â˛Ļ'),
+ ('Ⲩ', 'Ⲩ'),
+ ('â˛Ē', 'â˛Ē'),
+ ('â˛Ŧ', 'â˛Ŧ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲱ', 'Ⲱ'),
+ ('Ⲳ', 'Ⲳ'),
+ ('Ⲵ', 'Ⲵ'),
+ ('â˛ļ', 'â˛ļ'),
+ ('Ⲹ', 'Ⲹ'),
+ ('â˛ē', 'â˛ē'),
+ ('â˛ŧ', 'â˛ŧ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('âŗ€', 'âŗ€'),
+ ('âŗ‚', 'âŗ‚'),
+ ('âŗ„', 'âŗ„'),
+ ('âŗ†', 'âŗ†'),
+ ('âŗˆ', 'âŗˆ'),
+ ('âŗŠ', 'âŗŠ'),
+ ('âŗŒ', 'âŗŒ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ’', 'âŗ’'),
+ ('âŗ”', 'âŗ”'),
+ ('âŗ–', 'âŗ–'),
+ ('âŗ˜', 'âŗ˜'),
+ ('âŗš', 'âŗš'),
+ ('âŗœ', 'âŗœ'),
+ ('âŗž', 'âŗž'),
+ ('âŗ ', 'âŗ '),
+ ('âŗĸ', 'âŗĸ'),
+ ('âŗĢ', 'âŗĢ'),
+ ('âŗ­', 'âŗ­'),
+ ('âŗ˛', 'âŗ˛'),
+ ('Ꙁ', 'Ꙁ'),
+ ('Ꙃ', 'Ꙃ'),
+ ('Ꙅ', 'Ꙅ'),
+ ('Ꙇ', 'Ꙇ'),
+ ('Ꙉ', 'Ꙉ'),
+ ('Ꙋ', 'Ꙋ'),
+ ('Ꙍ', 'Ꙍ'),
+ ('Ꙏ', 'Ꙏ'),
+ ('Ꙑ', 'Ꙑ'),
+ ('Ꙓ', 'Ꙓ'),
+ ('Ꙕ', 'Ꙕ'),
+ ('Ꙗ', 'Ꙗ'),
+ ('Ꙙ', 'Ꙙ'),
+ ('Ꙛ', 'Ꙛ'),
+ ('Ꙝ', 'Ꙝ'),
+ ('Ꙟ', 'Ꙟ'),
+ ('Ꙡ', 'Ꙡ'),
+ ('ę™ĸ', 'ę™ĸ'),
+ ('Ꙥ', 'Ꙥ'),
+ ('ę™Ļ', 'ę™Ļ'),
+ ('Ꙩ', 'Ꙩ'),
+ ('ę™Ē', 'ę™Ē'),
+ ('ę™Ŧ', 'ę™Ŧ'),
+ ('Ꚁ', 'Ꚁ'),
+ ('Ꚃ', 'Ꚃ'),
+ ('Ꚅ', 'Ꚅ'),
+ ('Ꚇ', 'Ꚇ'),
+ ('Ꚉ', 'Ꚉ'),
+ ('Ꚋ', 'Ꚋ'),
+ ('Ꚍ', 'Ꚍ'),
+ ('Ꚏ', 'Ꚏ'),
+ ('Ꚑ', 'Ꚑ'),
+ ('Ꚓ', 'Ꚓ'),
+ ('Ꚕ', 'Ꚕ'),
+ ('Ꚗ', 'Ꚗ'),
+ ('Ꚙ', 'Ꚙ'),
+ ('Ꚛ', 'Ꚛ'),
+ ('ęœĸ', 'ęœĸ'),
+ ('Ꜥ', 'Ꜥ'),
+ ('ęœĻ', 'ęœĻ'),
+ ('Ꜩ', 'Ꜩ'),
+ ('ęœĒ', 'ęœĒ'),
+ ('ęœŦ', 'ęœŦ'),
+ ('Ꜯ', 'Ꜯ'),
+ ('Ꜳ', 'Ꜳ'),
+ ('Ꜵ', 'Ꜵ'),
+ ('ęœļ', 'ęœļ'),
+ ('Ꜹ', 'Ꜹ'),
+ ('ęœē', 'ęœē'),
+ ('ęœŧ', 'ęœŧ'),
+ ('Ꜿ', 'Ꜿ'),
+ ('Ꝁ', 'Ꝁ'),
+ ('Ꝃ', 'Ꝃ'),
+ ('Ꝅ', 'Ꝅ'),
+ ('Ꝇ', 'Ꝇ'),
+ ('Ꝉ', 'Ꝉ'),
+ ('Ꝋ', 'Ꝋ'),
+ ('Ꝍ', 'Ꝍ'),
+ ('Ꝏ', 'Ꝏ'),
+ ('Ꝑ', 'Ꝑ'),
+ ('Ꝓ', 'Ꝓ'),
+ ('Ꝕ', 'Ꝕ'),
+ ('Ꝗ', 'Ꝗ'),
+ ('Ꝙ', 'Ꝙ'),
+ ('Ꝛ', 'Ꝛ'),
+ ('Ꝝ', 'Ꝝ'),
+ ('Ꝟ', 'Ꝟ'),
+ ('Ꝡ', 'Ꝡ'),
+ ('ęĸ', 'ęĸ'),
+ ('Ꝥ', 'Ꝥ'),
+ ('ęĻ', 'ęĻ'),
+ ('Ꝩ', 'Ꝩ'),
+ ('ęĒ', 'ęĒ'),
+ ('ęŦ', 'ęŦ'),
+ ('Ꝯ', 'Ꝯ'),
+ ('Ꝺ', 'Ꝺ'),
+ ('ęģ', 'ęģ'),
+ ('ęŊ', 'Ꝿ'),
+ ('Ꞁ', 'Ꞁ'),
+ ('Ꞃ', 'Ꞃ'),
+ ('Ꞅ', 'Ꞅ'),
+ ('Ꞇ', 'Ꞇ'),
+ ('Ꞌ', 'Ꞌ'),
+ ('Ɥ', 'Ɥ'),
+ ('Ꞑ', 'Ꞑ'),
+ ('Ꞓ', 'Ꞓ'),
+ ('Ꞗ', 'Ꞗ'),
+ ('Ꞙ', 'Ꞙ'),
+ ('Ꞛ', 'Ꞛ'),
+ ('Ꞝ', 'Ꞝ'),
+ ('Ꞟ', 'Ꞟ'),
+ ('Ꞡ', 'Ꞡ'),
+ ('ęžĸ', 'ęžĸ'),
+ ('Ꞥ', 'Ꞥ'),
+ ('ęžĻ', 'ęžĻ'),
+ ('Ꞩ', 'Ꞩ'),
+ ('ęžĒ', 'Ɪ'),
+ ('Ʞ', 'Ꞵ'),
+ ('ęžļ', 'ęžļ'),
+ ('Ꞹ', 'Ꞹ'),
+ ('ęžē', 'ęžē'),
+ ('ęžŧ', 'ęžŧ'),
+ ('Ꞿ', 'Ꞿ'),
+ ('Ꟃ', 'Ꟃ'),
+ ('Ꞔ', '\u{a7c7}'),
+ ('\u{a7c9}', '\u{a7c9}'),
+ ('\u{a7f5}', '\u{a7f5}'),
+ ('ę­°', 'ęŽŋ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŧĄ', 'īŧē'),
+ ('𐐀', '𐐧'),
+ ('𐒰', '𐓓'),
+ ('𐲀', '𐲲'),
+ ('đ‘ĸ ', 'đ‘ĸŋ'),
+ ('𖹀', '𖹟'),
+ ('𞤀', '𞤡'),
+];
+
+pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('Âĩ', 'Âĩ'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ġ'),
+ ('Äš', 'ƌ'),
+ ('Ǝ', 'ƚ'),
+ ('Ɯ', 'ÆŠ'),
+ ('ÆŦ', 'Æš'),
+ ('Æŧ', 'ÆŊ'),
+ ('Æŋ', 'Æŋ'),
+ ('Į„', 'Č '),
+ ('Čĸ', 'Čŗ'),
+ ('Čē', 'ɔ'),
+ ('ɖ', 'ɗ'),
+ ('ə', 'ə'),
+ ('ɛ', 'ɜ'),
+ ('É ', 'ÉĄ'),
+ ('ÉŖ', 'ÉŖ'),
+ ('ÉĨ', 'ÉĻ'),
+ ('ɨ', 'ÉŦ'),
+ ('ɯ', 'ɯ'),
+ ('ɹ', 'ɲ'),
+ ('Éĩ', 'Éĩ'),
+ ('ÉŊ', 'ÉŊ'),
+ ('ʀ', 'ʀ'),
+ ('ʂ', 'ʃ'),
+ ('ʇ', 'ʌ'),
+ ('ʒ', 'ʒ'),
+ ('ʝ', 'ʞ'),
+ ('\u{345}', '\u{345}'),
+ ('Í°', 'Íŗ'),
+ ('Íļ', '͡'),
+ ('Íģ', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Ī‘'),
+ ('Ī•', 'Īĩ'),
+ ('Īˇ', 'Īģ'),
+ ('ĪŊ', 'Ō'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ÕĄ', 'և'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŊ', 'áƒŋ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('áĩš', 'áĩš'),
+ ('áĩŊ', 'áĩŊ'),
+ ('áļŽ', 'áļŽ'),
+ ('Ḁ', 'áē›'),
+ ('áēž', 'áēž'),
+ ('áē ', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('â„Ē', 'â„Ģ'),
+ ('Ⅎ', 'Ⅎ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'â…ŋ'),
+ ('Ↄ', 'ↄ'),
+ ('â’ļ', 'ⓩ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âą°'),
+ ('⹲', 'âąŗ'),
+ ('âąĩ', 'âąļ'),
+ ('âąž', 'âŗŖ'),
+ ('âŗĢ', 'âŗŽ'),
+ ('âŗ˛', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('Ꙁ', 'ꙭ'),
+ ('Ꚁ', 'ꚛ'),
+ ('ęœĸ', 'ęœ¯'),
+ ('Ꜳ', 'ę¯'),
+ ('Ꝺ', 'ꞇ'),
+ ('Ꞌ', 'Ɥ'),
+ ('Ꞑ', 'ꞔ'),
+ ('Ꞗ', 'Ɪ'),
+ ('Ʞ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', '\u{a7f6}'),
+ ('ꭓ', 'ꭓ'),
+ ('ę­°', 'ęŽŋ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+ ('𐐀', '𐑏'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('đ‘ĸ ', 'đ‘ŖŸ'),
+ ('𖹀', 'đ–šŋ'),
+ ('𞤀', 'đžĨƒ'),
+];
+
+pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('À', 'Ö'),
+ ('Ø', 'Þ'),
+ ('Ā', 'Ā'),
+ ('Ă', 'Ă'),
+ ('Ą', 'Ą'),
+ ('Ć', 'Ć'),
+ ('Ĉ', 'Ĉ'),
+ ('Ċ', 'Ċ'),
+ ('Č', 'Č'),
+ ('Ď', 'Ď'),
+ ('Đ', 'Đ'),
+ ('Ē', 'Ē'),
+ ('Ĕ', 'Ĕ'),
+ ('Ė', 'Ė'),
+ ('Ę', 'Ę'),
+ ('Ě', 'Ě'),
+ ('Ĝ', 'Ĝ'),
+ ('Ğ', 'Ğ'),
+ ('Ä ', 'Ä '),
+ ('Äĸ', 'Äĸ'),
+ ('Ĥ', 'Ĥ'),
+ ('ÄĻ', 'ÄĻ'),
+ ('Ĩ', 'Ĩ'),
+ ('ÄĒ', 'ÄĒ'),
+ ('ÄŦ', 'ÄŦ'),
+ ('ÄŽ', 'ÄŽ'),
+ ('Ä°', 'Ä°'),
+ ('IJ', 'IJ'),
+ ('Ä´', 'Ä´'),
+ ('Äļ', 'Äļ'),
+ ('Äš', 'Äš'),
+ ('Äģ', 'Äģ'),
+ ('ÄŊ', 'ÄŊ'),
+ ('Äŋ', 'Äŋ'),
+ ('Ł', 'Ł'),
+ ('Ń', 'Ń'),
+ ('Ņ', 'Ņ'),
+ ('Ň', 'Ň'),
+ ('Ŋ', 'Ŋ'),
+ ('Ō', 'Ō'),
+ ('Ŏ', 'Ŏ'),
+ ('Ő', 'Ő'),
+ ('Œ', 'Œ'),
+ ('Ŕ', 'Ŕ'),
+ ('Ŗ', 'Ŗ'),
+ ('Ř', 'Ř'),
+ ('Ś', 'Ś'),
+ ('Ŝ', 'Ŝ'),
+ ('Ş', 'Ş'),
+ ('Å ', 'Å '),
+ ('Åĸ', 'Åĸ'),
+ ('Ť', 'Ť'),
+ ('ÅĻ', 'ÅĻ'),
+ ('Ũ', 'Ũ'),
+ ('ÅĒ', 'ÅĒ'),
+ ('ÅŦ', 'ÅŦ'),
+ ('ÅŽ', 'ÅŽ'),
+ ('Å°', 'Å°'),
+ ('Å˛', 'Å˛'),
+ ('Å´', 'Å´'),
+ ('Åļ', 'Åļ'),
+ ('Ÿ', 'Ś'),
+ ('Åģ', 'Åģ'),
+ ('ÅŊ', 'ÅŊ'),
+ ('Ɓ', 'Ƃ'),
+ ('Ƅ', 'Ƅ'),
+ ('Ɔ', 'Ƈ'),
+ ('Ɖ', 'Ƌ'),
+ ('Ǝ', 'Ƒ'),
+ ('Ɠ', 'Ɣ'),
+ ('Ɩ', 'Ƙ'),
+ ('Ɯ', 'Ɲ'),
+ ('Ɵ', 'Ơ'),
+ ('Æĸ', 'Æĸ'),
+ ('Ƥ', 'Ƥ'),
+ ('ÆĻ', 'Ƨ'),
+ ('ÆŠ', 'ÆŠ'),
+ ('ÆŦ', 'ÆŦ'),
+ ('Ǝ', 'Ư'),
+ ('Æą', 'Æŗ'),
+ ('Æĩ', 'Æĩ'),
+ ('Æˇ', 'Ƹ'),
+ ('Æŧ', 'Æŧ'),
+ ('Į„', 'Į…'),
+ ('Į‡', 'Įˆ'),
+ ('ĮŠ', 'Į‹'),
+ ('Į', 'Į'),
+ ('Į', 'Į'),
+ ('Į‘', 'Į‘'),
+ ('Į“', 'Į“'),
+ ('Į•', 'Į•'),
+ ('Į—', 'Į—'),
+ ('Į™', 'Į™'),
+ ('Į›', 'Į›'),
+ ('Įž', 'Įž'),
+ ('Į ', 'Į '),
+ ('Įĸ', 'Įĸ'),
+ ('Į¤', 'Į¤'),
+ ('ĮĻ', 'ĮĻ'),
+ ('Į¨', 'Į¨'),
+ ('ĮĒ', 'ĮĒ'),
+ ('ĮŦ', 'ĮŦ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Įą', 'Į˛'),
+ ('Į´', 'Į´'),
+ ('Įļ', 'Į¸'),
+ ('Įē', 'Įē'),
+ ('Įŧ', 'Įŧ'),
+ ('Įž', 'Įž'),
+ ('Ȁ', 'Ȁ'),
+ ('Ȃ', 'Ȃ'),
+ ('Ȅ', 'Ȅ'),
+ ('Ȇ', 'Ȇ'),
+ ('Ȉ', 'Ȉ'),
+ ('Ȋ', 'Ȋ'),
+ ('Ȍ', 'Ȍ'),
+ ('Ȏ', 'Ȏ'),
+ ('Ȑ', 'Ȑ'),
+ ('Ȓ', 'Ȓ'),
+ ('Ȕ', 'Ȕ'),
+ ('Ȗ', 'Ȗ'),
+ ('Ș', 'Ș'),
+ ('Ț', 'Ț'),
+ ('Ȝ', 'Ȝ'),
+ ('Ȟ', 'Ȟ'),
+ ('Č ', 'Č '),
+ ('Čĸ', 'Čĸ'),
+ ('Ȥ', 'Ȥ'),
+ ('ČĻ', 'ČĻ'),
+ ('Ȩ', 'Ȩ'),
+ ('ČĒ', 'ČĒ'),
+ ('ČŦ', 'ČŦ'),
+ ('ČŽ', 'ČŽ'),
+ ('Č°', 'Č°'),
+ ('Ȳ', 'Ȳ'),
+ ('Čē', 'Čģ'),
+ ('ČŊ', 'Čž'),
+ ('Ɂ', 'Ɂ'),
+ ('Ƀ', 'Ɇ'),
+ ('Ɉ', 'Ɉ'),
+ ('Ɋ', 'Ɋ'),
+ ('Ɍ', 'Ɍ'),
+ ('Ɏ', 'Ɏ'),
+ ('Í°', 'Í°'),
+ ('Ͳ', 'Ͳ'),
+ ('Íļ', 'Íļ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ώ'),
+ ('Α', 'Ρ'),
+ ('ÎŖ', 'ÎĢ'),
+ ('Ī', 'Ī'),
+ ('Ī˜', 'Ī˜'),
+ ('Īš', 'Īš'),
+ ('Īœ', 'Īœ'),
+ ('Īž', 'Īž'),
+ ('Ī ', 'Ī '),
+ ('Īĸ', 'Īĸ'),
+ ('Ī¤', 'Ī¤'),
+ ('ĪĻ', 'ĪĻ'),
+ ('Ī¨', 'Ī¨'),
+ ('ĪĒ', 'ĪĒ'),
+ ('ĪŦ', 'ĪŦ'),
+ ('ĪŽ', 'ĪŽ'),
+ ('Ī´', 'Ī´'),
+ ('Īˇ', 'Īˇ'),
+ ('Īš', 'Īē'),
+ ('ĪŊ', 'Đ¯'),
+ ('Ņ ', 'Ņ '),
+ ('Ņĸ', 'Ņĸ'),
+ ('Ņ¤', 'Ņ¤'),
+ ('ŅĻ', 'ŅĻ'),
+ ('Ņ¨', 'Ņ¨'),
+ ('ŅĒ', 'ŅĒ'),
+ ('ŅŦ', 'ŅŦ'),
+ ('ŅŽ', 'ŅŽ'),
+ ('Ņ°', 'Ņ°'),
+ ('Ņ˛', 'Ņ˛'),
+ ('Ņ´', 'Ņ´'),
+ ('Ņļ', 'Ņļ'),
+ ('Ņ¸', 'Ņ¸'),
+ ('Ņē', 'Ņē'),
+ ('Ņŧ', 'Ņŧ'),
+ ('Ņž', 'Ņž'),
+ ('Ō€', 'Ō€'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌŒ', 'ŌŒ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō', 'Ō'),
+ ('Ō’', 'Ō’'),
+ ('Ō”', 'Ō”'),
+ ('Ō–', 'Ō–'),
+ ('Ō˜', 'Ō˜'),
+ ('Ōš', 'Ōš'),
+ ('Ōœ', 'Ōœ'),
+ ('Ōž', 'Ōž'),
+ ('Ō ', 'Ō '),
+ ('Ōĸ', 'Ōĸ'),
+ ('Ō¤', 'Ō¤'),
+ ('ŌĻ', 'ŌĻ'),
+ ('Ō¨', 'Ō¨'),
+ ('ŌĒ', 'ŌĒ'),
+ ('ŌŦ', 'ŌŦ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō°', 'Ō°'),
+ ('Ō˛', 'Ō˛'),
+ ('Ō´', 'Ō´'),
+ ('Ōļ', 'Ōļ'),
+ ('Ō¸', 'Ō¸'),
+ ('Ōē', 'Ōē'),
+ ('Ōŧ', 'Ōŧ'),
+ ('Ōž', 'Ōž'),
+ ('Ķ€', 'Ķ'),
+ ('Ķƒ', 'Ķƒ'),
+ ('Ķ…', 'Ķ…'),
+ ('Ķ‡', 'Ķ‡'),
+ ('Ķ‰', 'Ķ‰'),
+ ('Ķ‹', 'Ķ‹'),
+ ('Ķ', 'Ķ'),
+ ('Ķ', 'Ķ'),
+ ('Ķ’', 'Ķ’'),
+ ('Ķ”', 'Ķ”'),
+ ('Ķ–', 'Ķ–'),
+ ('Ķ˜', 'Ķ˜'),
+ ('Ķš', 'Ķš'),
+ ('Ķœ', 'Ķœ'),
+ ('Ķž', 'Ķž'),
+ ('Ķ ', 'Ķ '),
+ ('Ķĸ', 'Ķĸ'),
+ ('Ķ¤', 'Ķ¤'),
+ ('ĶĻ', 'ĶĻ'),
+ ('Ķ¨', 'Ķ¨'),
+ ('ĶĒ', 'ĶĒ'),
+ ('ĶŦ', 'ĶŦ'),
+ ('ĶŽ', 'ĶŽ'),
+ ('Ķ°', 'Ķ°'),
+ ('Ķ˛', 'Ķ˛'),
+ ('Ķ´', 'Ķ´'),
+ ('Ķļ', 'Ķļ'),
+ ('Ķ¸', 'Ķ¸'),
+ ('Ķē', 'Ķē'),
+ ('Ķŧ', 'Ķŧ'),
+ ('Ķž', 'Ķž'),
+ ('Ԁ', 'Ԁ'),
+ ('Ԃ', 'Ԃ'),
+ ('Ԅ', 'Ԅ'),
+ ('Ԇ', 'Ԇ'),
+ ('Ԉ', 'Ԉ'),
+ ('Ԋ', 'Ԋ'),
+ ('Ԍ', 'Ԍ'),
+ ('Ԏ', 'Ԏ'),
+ ('Ԑ', 'Ԑ'),
+ ('Ԓ', 'Ԓ'),
+ ('Ԕ', 'Ԕ'),
+ ('Ԗ', 'Ԗ'),
+ ('Ԙ', 'Ԙ'),
+ ('Ԛ', 'Ԛ'),
+ ('Ԝ', 'Ԝ'),
+ ('Ԟ', 'Ԟ'),
+ ('Ô ', 'Ô '),
+ ('Ôĸ', 'Ôĸ'),
+ ('Ô¤', 'Ô¤'),
+ ('ÔĻ', 'ÔĻ'),
+ ('Ô¨', 'Ô¨'),
+ ('ÔĒ', 'ÔĒ'),
+ ('ÔŦ', 'ÔŦ'),
+ ('ÔŽ', 'ÔŽ'),
+ ('Ôą', 'Ֆ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('Ꭰ', 'áĩ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('Ḁ', 'Ḁ'),
+ ('Ḃ', 'Ḃ'),
+ ('Ḅ', 'Ḅ'),
+ ('Ḇ', 'Ḇ'),
+ ('Ḉ', 'Ḉ'),
+ ('Ḋ', 'Ḋ'),
+ ('Ḍ', 'Ḍ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḑ', 'Ḑ'),
+ ('Ḓ', 'Ḓ'),
+ ('Ḕ', 'Ḕ'),
+ ('Ḗ', 'Ḗ'),
+ ('Ḙ', 'Ḙ'),
+ ('Ḛ', 'Ḛ'),
+ ('Ḝ', 'Ḝ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ḡ', 'Ḡ'),
+ ('á¸ĸ', 'á¸ĸ'),
+ ('Ḥ', 'Ḥ'),
+ ('á¸Ļ', 'á¸Ļ'),
+ ('Ḩ', 'Ḩ'),
+ ('á¸Ē', 'á¸Ē'),
+ ('á¸Ŧ', 'á¸Ŧ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḱ', 'Ḱ'),
+ ('Ḳ', 'Ḳ'),
+ ('Ḵ', 'Ḵ'),
+ ('á¸ļ', 'á¸ļ'),
+ ('Ḹ', 'Ḹ'),
+ ('á¸ē', 'á¸ē'),
+ ('á¸ŧ', 'á¸ŧ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ṁ', 'Ṁ'),
+ ('Ṃ', 'Ṃ'),
+ ('Ṅ', 'Ṅ'),
+ ('Ṇ', 'Ṇ'),
+ ('ᚈ', 'ᚈ'),
+ ('Ṋ', 'Ṋ'),
+ ('Ṍ', 'Ṍ'),
+ ('ᚎ', 'ᚎ'),
+ ('ᚐ', 'ᚐ'),
+ ('Ṓ', 'Ṓ'),
+ ('Ṕ', 'Ṕ'),
+ ('Ṗ', 'Ṗ'),
+ ('ᚘ', 'ᚘ'),
+ ('Ṛ', 'Ṛ'),
+ ('Ṝ', 'Ṝ'),
+ ('᚞', '᚞'),
+ ('áš ', 'áš '),
+ ('ášĸ', 'ášĸ'),
+ ('ᚤ', 'ᚤ'),
+ ('ášĻ', 'ášĻ'),
+ ('ᚨ', 'ᚨ'),
+ ('ášĒ', 'ášĒ'),
+ ('ášŦ', 'ášŦ'),
+ ('ᚎ', 'ᚎ'),
+ ('áš°', 'áš°'),
+ ('ᚲ', 'ᚲ'),
+ ('áš´', 'áš´'),
+ ('ášļ', 'ášļ'),
+ ('ᚸ', 'ᚸ'),
+ ('ášē', 'ášē'),
+ ('ášŧ', 'ášŧ'),
+ ('ášž', 'ášž'),
+ ('áē€', 'áē€'),
+ ('áē‚', 'áē‚'),
+ ('áē„', 'áē„'),
+ ('áē†', 'áē†'),
+ ('áēˆ', 'áēˆ'),
+ ('áēŠ', 'áēŠ'),
+ ('áēŒ', 'áēŒ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē', 'áē'),
+ ('áē’', 'áē’'),
+ ('áē”', 'áē”'),
+ ('áēž', 'áēž'),
+ ('áē ', 'áē '),
+ ('áēĸ', 'áēĸ'),
+ ('áē¤', 'áē¤'),
+ ('áēĻ', 'áēĻ'),
+ ('áē¨', 'áē¨'),
+ ('áēĒ', 'áēĒ'),
+ ('áēŦ', 'áēŦ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē°', 'áē°'),
+ ('áē˛', 'áē˛'),
+ ('áē´', 'áē´'),
+ ('áēļ', 'áēļ'),
+ ('áē¸', 'áē¸'),
+ ('áēē', 'áēē'),
+ ('áēŧ', 'áēŧ'),
+ ('áēž', 'áēž'),
+ ('áģ€', 'áģ€'),
+ ('áģ‚', 'áģ‚'),
+ ('áģ„', 'áģ„'),
+ ('áģ†', 'áģ†'),
+ ('áģˆ', 'áģˆ'),
+ ('áģŠ', 'áģŠ'),
+ ('áģŒ', 'áģŒ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ', 'áģ'),
+ ('áģ’', 'áģ’'),
+ ('áģ”', 'áģ”'),
+ ('áģ–', 'áģ–'),
+ ('áģ˜', 'áģ˜'),
+ ('áģš', 'áģš'),
+ ('áģœ', 'áģœ'),
+ ('áģž', 'áģž'),
+ ('áģ ', 'áģ '),
+ ('áģĸ', 'áģĸ'),
+ ('áģ¤', 'áģ¤'),
+ ('áģĻ', 'áģĻ'),
+ ('áģ¨', 'áģ¨'),
+ ('áģĒ', 'áģĒ'),
+ ('áģŦ', 'áģŦ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ°', 'áģ°'),
+ ('áģ˛', 'áģ˛'),
+ ('áģ´', 'áģ´'),
+ ('áģļ', 'áģļ'),
+ ('áģ¸', 'áģ¸'),
+ ('áģē', 'áģē'),
+ ('áģŧ', 'áģŧ'),
+ ('áģž', 'áģž'),
+ ('áŧˆ', 'áŧ'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ¨', 'áŧ¯'),
+ ('áŧ¸', 'áŧŋ'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŸ'),
+ ('áŊ¨', 'áŊ¯'),
+ ('ᾈ', 'ᾏ'),
+ ('ᾘ', 'ᾟ'),
+ ('ឨ', 'ឯ'),
+ ('ី', 'ážŧ'),
+ ('áŋˆ', 'áŋŒ'),
+ ('áŋ˜', 'áŋ›'),
+ ('áŋ¨', 'áŋŦ'),
+ ('áŋ¸', 'áŋŧ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('â„Ē', 'â„Ģ'),
+ ('Ⅎ', 'Ⅎ'),
+ ('Ⅰ', 'Ⅿ'),
+ ('Ↄ', 'Ↄ'),
+ ('â’ļ', 'Ⓩ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('âą ', 'âą '),
+ ('âąĸ', '⹤'),
+ ('⹧', '⹧'),
+ ('⹊', '⹊'),
+ ('âąĢ', 'âąĢ'),
+ ('âą­', 'âą°'),
+ ('⹲', '⹲'),
+ ('âąĩ', 'âąĩ'),
+ ('Ȿ', 'Ⲁ'),
+ ('Ⲃ', 'Ⲃ'),
+ ('Ⲅ', 'Ⲅ'),
+ ('Ⲇ', 'Ⲇ'),
+ ('Ⲉ', 'Ⲉ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('Ⲍ', 'Ⲍ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲑ', 'Ⲑ'),
+ ('Ⲓ', 'Ⲓ'),
+ ('Ⲕ', 'Ⲕ'),
+ ('Ⲗ', 'Ⲗ'),
+ ('Ⲙ', 'Ⲙ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('Ⲝ', 'Ⲝ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('Ⲡ', 'Ⲡ'),
+ ('â˛ĸ', 'â˛ĸ'),
+ ('Ⲥ', 'Ⲥ'),
+ ('â˛Ļ', 'â˛Ļ'),
+ ('Ⲩ', 'Ⲩ'),
+ ('â˛Ē', 'â˛Ē'),
+ ('â˛Ŧ', 'â˛Ŧ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲱ', 'Ⲱ'),
+ ('Ⲳ', 'Ⲳ'),
+ ('Ⲵ', 'Ⲵ'),
+ ('â˛ļ', 'â˛ļ'),
+ ('Ⲹ', 'Ⲹ'),
+ ('â˛ē', 'â˛ē'),
+ ('â˛ŧ', 'â˛ŧ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('âŗ€', 'âŗ€'),
+ ('âŗ‚', 'âŗ‚'),
+ ('âŗ„', 'âŗ„'),
+ ('âŗ†', 'âŗ†'),
+ ('âŗˆ', 'âŗˆ'),
+ ('âŗŠ', 'âŗŠ'),
+ ('âŗŒ', 'âŗŒ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ’', 'âŗ’'),
+ ('âŗ”', 'âŗ”'),
+ ('âŗ–', 'âŗ–'),
+ ('âŗ˜', 'âŗ˜'),
+ ('âŗš', 'âŗš'),
+ ('âŗœ', 'âŗœ'),
+ ('âŗž', 'âŗž'),
+ ('âŗ ', 'âŗ '),
+ ('âŗĸ', 'âŗĸ'),
+ ('âŗĢ', 'âŗĢ'),
+ ('âŗ­', 'âŗ­'),
+ ('âŗ˛', 'âŗ˛'),
+ ('Ꙁ', 'Ꙁ'),
+ ('Ꙃ', 'Ꙃ'),
+ ('Ꙅ', 'Ꙅ'),
+ ('Ꙇ', 'Ꙇ'),
+ ('Ꙉ', 'Ꙉ'),
+ ('Ꙋ', 'Ꙋ'),
+ ('Ꙍ', 'Ꙍ'),
+ ('Ꙏ', 'Ꙏ'),
+ ('Ꙑ', 'Ꙑ'),
+ ('Ꙓ', 'Ꙓ'),
+ ('Ꙕ', 'Ꙕ'),
+ ('Ꙗ', 'Ꙗ'),
+ ('Ꙙ', 'Ꙙ'),
+ ('Ꙛ', 'Ꙛ'),
+ ('Ꙝ', 'Ꙝ'),
+ ('Ꙟ', 'Ꙟ'),
+ ('Ꙡ', 'Ꙡ'),
+ ('ę™ĸ', 'ę™ĸ'),
+ ('Ꙥ', 'Ꙥ'),
+ ('ę™Ļ', 'ę™Ļ'),
+ ('Ꙩ', 'Ꙩ'),
+ ('ę™Ē', 'ę™Ē'),
+ ('ę™Ŧ', 'ę™Ŧ'),
+ ('Ꚁ', 'Ꚁ'),
+ ('Ꚃ', 'Ꚃ'),
+ ('Ꚅ', 'Ꚅ'),
+ ('Ꚇ', 'Ꚇ'),
+ ('Ꚉ', 'Ꚉ'),
+ ('Ꚋ', 'Ꚋ'),
+ ('Ꚍ', 'Ꚍ'),
+ ('Ꚏ', 'Ꚏ'),
+ ('Ꚑ', 'Ꚑ'),
+ ('Ꚓ', 'Ꚓ'),
+ ('Ꚕ', 'Ꚕ'),
+ ('Ꚗ', 'Ꚗ'),
+ ('Ꚙ', 'Ꚙ'),
+ ('Ꚛ', 'Ꚛ'),
+ ('ęœĸ', 'ęœĸ'),
+ ('Ꜥ', 'Ꜥ'),
+ ('ęœĻ', 'ęœĻ'),
+ ('Ꜩ', 'Ꜩ'),
+ ('ęœĒ', 'ęœĒ'),
+ ('ęœŦ', 'ęœŦ'),
+ ('Ꜯ', 'Ꜯ'),
+ ('Ꜳ', 'Ꜳ'),
+ ('Ꜵ', 'Ꜵ'),
+ ('ęœļ', 'ęœļ'),
+ ('Ꜹ', 'Ꜹ'),
+ ('ęœē', 'ęœē'),
+ ('ęœŧ', 'ęœŧ'),
+ ('Ꜿ', 'Ꜿ'),
+ ('Ꝁ', 'Ꝁ'),
+ ('Ꝃ', 'Ꝃ'),
+ ('Ꝅ', 'Ꝅ'),
+ ('Ꝇ', 'Ꝇ'),
+ ('Ꝉ', 'Ꝉ'),
+ ('Ꝋ', 'Ꝋ'),
+ ('Ꝍ', 'Ꝍ'),
+ ('Ꝏ', 'Ꝏ'),
+ ('Ꝑ', 'Ꝑ'),
+ ('Ꝓ', 'Ꝓ'),
+ ('Ꝕ', 'Ꝕ'),
+ ('Ꝗ', 'Ꝗ'),
+ ('Ꝙ', 'Ꝙ'),
+ ('Ꝛ', 'Ꝛ'),
+ ('Ꝝ', 'Ꝝ'),
+ ('Ꝟ', 'Ꝟ'),
+ ('Ꝡ', 'Ꝡ'),
+ ('ęĸ', 'ęĸ'),
+ ('Ꝥ', 'Ꝥ'),
+ ('ęĻ', 'ęĻ'),
+ ('Ꝩ', 'Ꝩ'),
+ ('ęĒ', 'ęĒ'),
+ ('ęŦ', 'ęŦ'),
+ ('Ꝯ', 'Ꝯ'),
+ ('Ꝺ', 'Ꝺ'),
+ ('ęģ', 'ęģ'),
+ ('ęŊ', 'Ꝿ'),
+ ('Ꞁ', 'Ꞁ'),
+ ('Ꞃ', 'Ꞃ'),
+ ('Ꞅ', 'Ꞅ'),
+ ('Ꞇ', 'Ꞇ'),
+ ('Ꞌ', 'Ꞌ'),
+ ('Ɥ', 'Ɥ'),
+ ('Ꞑ', 'Ꞑ'),
+ ('Ꞓ', 'Ꞓ'),
+ ('Ꞗ', 'Ꞗ'),
+ ('Ꞙ', 'Ꞙ'),
+ ('Ꞛ', 'Ꞛ'),
+ ('Ꞝ', 'Ꞝ'),
+ ('Ꞟ', 'Ꞟ'),
+ ('Ꞡ', 'Ꞡ'),
+ ('ęžĸ', 'ęžĸ'),
+ ('Ꞥ', 'Ꞥ'),
+ ('ęžĻ', 'ęžĻ'),
+ ('Ꞩ', 'Ꞩ'),
+ ('ęžĒ', 'Ɪ'),
+ ('Ʞ', 'Ꞵ'),
+ ('ęžļ', 'ęžļ'),
+ ('Ꞹ', 'Ꞹ'),
+ ('ęžē', 'ęžē'),
+ ('ęžŧ', 'ęžŧ'),
+ ('Ꞿ', 'Ꞿ'),
+ ('Ꟃ', 'Ꟃ'),
+ ('Ꞔ', '\u{a7c7}'),
+ ('\u{a7c9}', '\u{a7c9}'),
+ ('\u{a7f5}', '\u{a7f5}'),
+ ('īŧĄ', 'īŧē'),
+ ('𐐀', '𐐧'),
+ ('𐒰', '𐓓'),
+ ('𐲀', '𐲲'),
+ ('đ‘ĸ ', 'đ‘ĸŋ'),
+ ('𖹀', '𖹟'),
+ ('𞤀', '𞤡'),
+];
+
+pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[
+ ('a', 'z'),
+ ('Âĩ', 'Âĩ'),
+ ('ß', 'Ãļ'),
+ ('ø', 'Ãŋ'),
+ ('ā', 'ā'),
+ ('ă', 'ă'),
+ ('ą', 'ą'),
+ ('ć', 'ć'),
+ ('ĉ', 'ĉ'),
+ ('ċ', 'ċ'),
+ ('č', 'č'),
+ ('ď', 'ď'),
+ ('đ', 'đ'),
+ ('ē', 'ē'),
+ ('ĕ', 'ĕ'),
+ ('ė', 'ė'),
+ ('ę', 'ę'),
+ ('ě', 'ě'),
+ ('ĝ', 'ĝ'),
+ ('ğ', 'ğ'),
+ ('ÄĄ', 'ÄĄ'),
+ ('ÄŖ', 'ÄŖ'),
+ ('ÄĨ', 'ÄĨ'),
+ ('ħ', 'ħ'),
+ ('ÄŠ', 'ÄŠ'),
+ ('ÄĢ', 'ÄĢ'),
+ ('Ä­', 'Ä­'),
+ ('į', 'į'),
+ ('Äą', 'Äą'),
+ ('Äŗ', 'Äŗ'),
+ ('Äĩ', 'Äĩ'),
+ ('ġ', 'ġ'),
+ ('Äē', 'Äē'),
+ ('Äŧ', 'Äŧ'),
+ ('Äž', 'Äž'),
+ ('ŀ', 'ŀ'),
+ ('ł', 'ł'),
+ ('ń', 'ń'),
+ ('ņ', 'ņ'),
+ ('ň', 'ʼn'),
+ ('ŋ', 'ŋ'),
+ ('ō', 'ō'),
+ ('ŏ', 'ŏ'),
+ ('ő', 'ő'),
+ ('œ', 'œ'),
+ ('ŕ', 'ŕ'),
+ ('ŗ', 'ŗ'),
+ ('ř', 'ř'),
+ ('ś', 'ś'),
+ ('ŝ', 'ŝ'),
+ ('ş', 'ş'),
+ ('ÅĄ', 'ÅĄ'),
+ ('ÅŖ', 'ÅŖ'),
+ ('ÅĨ', 'ÅĨ'),
+ ('ŧ', 'ŧ'),
+ ('ÅŠ', 'ÅŠ'),
+ ('ÅĢ', 'ÅĢ'),
+ ('Å­', 'Å­'),
+ ('ů', 'ů'),
+ ('Åą', 'Åą'),
+ ('Åŗ', 'Åŗ'),
+ ('Åĩ', 'Åĩ'),
+ ('Åˇ', 'Åˇ'),
+ ('Åē', 'Åē'),
+ ('Åŧ', 'Åŧ'),
+ ('Åž', 'ƀ'),
+ ('ƃ', 'ƃ'),
+ ('ƅ', 'ƅ'),
+ ('ƈ', 'ƈ'),
+ ('ƌ', 'ƌ'),
+ ('ƒ', 'ƒ'),
+ ('ƕ', 'ƕ'),
+ ('ƙ', 'ƚ'),
+ ('ƞ', 'ƞ'),
+ ('ÆĄ', 'ÆĄ'),
+ ('ÆŖ', 'ÆŖ'),
+ ('ÆĨ', 'ÆĨ'),
+ ('ƨ', 'ƨ'),
+ ('Æ­', 'Æ­'),
+ ('Æ°', 'Æ°'),
+ ('Æ´', 'Æ´'),
+ ('Æļ', 'Æļ'),
+ ('Æš', 'Æš'),
+ ('ÆŊ', 'ÆŊ'),
+ ('Æŋ', 'Æŋ'),
+ ('Į„', 'Į„'),
+ ('Į†', 'Į‡'),
+ ('Į‰', 'ĮŠ'),
+ ('ĮŒ', 'ĮŒ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Į', 'Į'),
+ ('Į’', 'Į’'),
+ ('Į”', 'Į”'),
+ ('Į–', 'Į–'),
+ ('Į˜', 'Į˜'),
+ ('Įš', 'Įš'),
+ ('Įœ', 'Į'),
+ ('ĮŸ', 'ĮŸ'),
+ ('ĮĄ', 'ĮĄ'),
+ ('ĮŖ', 'ĮŖ'),
+ ('ĮĨ', 'ĮĨ'),
+ ('Į§', 'Į§'),
+ ('ĮŠ', 'ĮŠ'),
+ ('ĮĢ', 'ĮĢ'),
+ ('Į­', 'Į­'),
+ ('Į¯', 'Įą'),
+ ('Įŗ', 'Įŗ'),
+ ('Įĩ', 'Įĩ'),
+ ('Įš', 'Įš'),
+ ('Įģ', 'Įģ'),
+ ('ĮŊ', 'ĮŊ'),
+ ('Įŋ', 'Įŋ'),
+ ('ȁ', 'ȁ'),
+ ('ȃ', 'ȃ'),
+ ('ȅ', 'ȅ'),
+ ('ȇ', 'ȇ'),
+ ('ȉ', 'ȉ'),
+ ('ȋ', 'ȋ'),
+ ('ȍ', 'ȍ'),
+ ('ȏ', 'ȏ'),
+ ('ȑ', 'ȑ'),
+ ('ȓ', 'ȓ'),
+ ('ȕ', 'ȕ'),
+ ('ȗ', 'ȗ'),
+ ('ș', 'ș'),
+ ('ț', 'ț'),
+ ('ȝ', 'ȝ'),
+ ('ȟ', 'ȟ'),
+ ('ČŖ', 'ČŖ'),
+ ('ČĨ', 'ČĨ'),
+ ('ȧ', 'ȧ'),
+ ('ČŠ', 'ČŠ'),
+ ('ČĢ', 'ČĢ'),
+ ('Č­', 'Č­'),
+ ('Č¯', 'Č¯'),
+ ('Čą', 'Čą'),
+ ('Čŗ', 'Čŗ'),
+ ('Čŧ', 'Čŧ'),
+ ('Čŋ', 'ɀ'),
+ ('ɂ', 'ɂ'),
+ ('ɇ', 'ɇ'),
+ ('ɉ', 'ɉ'),
+ ('ɋ', 'ɋ'),
+ ('ɍ', 'ɍ'),
+ ('ɏ', 'ɔ'),
+ ('ɖ', 'ɗ'),
+ ('ə', 'ə'),
+ ('ɛ', 'ɜ'),
+ ('É ', 'ÉĄ'),
+ ('ÉŖ', 'ÉŖ'),
+ ('ÉĨ', 'ÉĻ'),
+ ('ɨ', 'ÉŦ'),
+ ('ɯ', 'ɯ'),
+ ('ɹ', 'ɲ'),
+ ('Éĩ', 'Éĩ'),
+ ('ÉŊ', 'ÉŊ'),
+ ('ʀ', 'ʀ'),
+ ('ʂ', 'ʃ'),
+ ('ʇ', 'ʌ'),
+ ('ʒ', 'ʒ'),
+ ('ʝ', 'ʞ'),
+ ('\u{345}', '\u{345}'),
+ ('Íą', 'Íą'),
+ ('Íŗ', 'Íŗ'),
+ ('͡', '͡'),
+ ('Íģ', 'ÍŊ'),
+ ('ΐ', 'ΐ'),
+ ('ÎŦ', 'ĪŽ'),
+ ('Ī', 'Ī‘'),
+ ('Ī•', 'Ī—'),
+ ('Ī™', 'Ī™'),
+ ('Ī›', 'Ī›'),
+ ('Ī', 'Ī'),
+ ('ĪŸ', 'ĪŸ'),
+ ('ĪĄ', 'ĪĄ'),
+ ('ĪŖ', 'ĪŖ'),
+ ('ĪĨ', 'ĪĨ'),
+ ('Ī§', 'Ī§'),
+ ('ĪŠ', 'ĪŠ'),
+ ('ĪĢ', 'ĪĢ'),
+ ('Ī­', 'Ī­'),
+ ('Ī¯', 'Īŗ'),
+ ('Īĩ', 'Īĩ'),
+ ('Ī¸', 'Ī¸'),
+ ('Īģ', 'Īģ'),
+ ('Đ°', 'ŅŸ'),
+ ('ŅĄ', 'ŅĄ'),
+ ('ŅŖ', 'ŅŖ'),
+ ('ŅĨ', 'ŅĨ'),
+ ('Ņ§', 'Ņ§'),
+ ('ŅŠ', 'ŅŠ'),
+ ('ŅĢ', 'ŅĢ'),
+ ('Ņ­', 'Ņ­'),
+ ('Ņ¯', 'Ņ¯'),
+ ('Ņą', 'Ņą'),
+ ('Ņŗ', 'Ņŗ'),
+ ('Ņĩ', 'Ņĩ'),
+ ('Ņˇ', 'Ņˇ'),
+ ('Ņš', 'Ņš'),
+ ('Ņģ', 'Ņģ'),
+ ('ŅŊ', 'ŅŊ'),
+ ('Ņŋ', 'Ņŋ'),
+ ('Ō', 'Ō'),
+ ('Ō‹', 'Ō‹'),
+ ('Ō', 'Ō'),
+ ('Ō', 'Ō'),
+ ('Ō‘', 'Ō‘'),
+ ('Ō“', 'Ō“'),
+ ('Ō•', 'Ō•'),
+ ('Ō—', 'Ō—'),
+ ('Ō™', 'Ō™'),
+ ('Ō›', 'Ō›'),
+ ('Ō', 'Ō'),
+ ('ŌŸ', 'ŌŸ'),
+ ('ŌĄ', 'ŌĄ'),
+ ('ŌŖ', 'ŌŖ'),
+ ('ŌĨ', 'ŌĨ'),
+ ('Ō§', 'Ō§'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌĢ', 'ŌĢ'),
+ ('Ō­', 'Ō­'),
+ ('Ō¯', 'Ō¯'),
+ ('Ōą', 'Ōą'),
+ ('Ōŗ', 'Ōŗ'),
+ ('Ōĩ', 'Ōĩ'),
+ ('Ōˇ', 'Ōˇ'),
+ ('Ōš', 'Ōš'),
+ ('Ōģ', 'Ōģ'),
+ ('ŌŊ', 'ŌŊ'),
+ ('Ōŋ', 'Ōŋ'),
+ ('Ķ‚', 'Ķ‚'),
+ ('Ķ„', 'Ķ„'),
+ ('Ķ†', 'Ķ†'),
+ ('Ķˆ', 'Ķˆ'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶŒ', 'ĶŒ'),
+ ('ĶŽ', 'Ķ'),
+ ('Ķ‘', 'Ķ‘'),
+ ('Ķ“', 'Ķ“'),
+ ('Ķ•', 'Ķ•'),
+ ('Ķ—', 'Ķ—'),
+ ('Ķ™', 'Ķ™'),
+ ('Ķ›', 'Ķ›'),
+ ('Ķ', 'Ķ'),
+ ('ĶŸ', 'ĶŸ'),
+ ('ĶĄ', 'ĶĄ'),
+ ('ĶŖ', 'ĶŖ'),
+ ('ĶĨ', 'ĶĨ'),
+ ('Ķ§', 'Ķ§'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶĢ', 'ĶĢ'),
+ ('Ķ­', 'Ķ­'),
+ ('Ķ¯', 'Ķ¯'),
+ ('Ķą', 'Ķą'),
+ ('Ķŗ', 'Ķŗ'),
+ ('Ķĩ', 'Ķĩ'),
+ ('Ķˇ', 'Ķˇ'),
+ ('Ķš', 'Ķš'),
+ ('Ķģ', 'Ķģ'),
+ ('ĶŊ', 'ĶŊ'),
+ ('Ķŋ', 'Ķŋ'),
+ ('ԁ', 'ԁ'),
+ ('ԃ', 'ԃ'),
+ ('ԅ', 'ԅ'),
+ ('ԇ', 'ԇ'),
+ ('ԉ', 'ԉ'),
+ ('ԋ', 'ԋ'),
+ ('ԍ', 'ԍ'),
+ ('ԏ', 'ԏ'),
+ ('ԑ', 'ԑ'),
+ ('ԓ', 'ԓ'),
+ ('ԕ', 'ԕ'),
+ ('ԗ', 'ԗ'),
+ ('ԙ', 'ԙ'),
+ ('ԛ', 'ԛ'),
+ ('ԝ', 'ԝ'),
+ ('ԟ', 'ԟ'),
+ ('ÔĄ', 'ÔĄ'),
+ ('ÔŖ', 'ÔŖ'),
+ ('ÔĨ', 'ÔĨ'),
+ ('Ô§', 'Ô§'),
+ ('ÔŠ', 'ÔŠ'),
+ ('ÔĢ', 'ÔĢ'),
+ ('Ô­', 'Ô­'),
+ ('Ô¯', 'Ô¯'),
+ ('ÕĄ', 'և'),
+ ('ᏸ', 'áŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('áĩš', 'áĩš'),
+ ('áĩŊ', 'áĩŊ'),
+ ('áļŽ', 'áļŽ'),
+ ('ḁ', 'ḁ'),
+ ('ḃ', 'ḃ'),
+ ('ḅ', 'ḅ'),
+ ('ḇ', 'ḇ'),
+ ('ḉ', 'ḉ'),
+ ('ḋ', 'ḋ'),
+ ('ḍ', 'ḍ'),
+ ('ḏ', 'ḏ'),
+ ('ḑ', 'ḑ'),
+ ('ḓ', 'ḓ'),
+ ('ḕ', 'ḕ'),
+ ('ḗ', 'ḗ'),
+ ('ḙ', 'ḙ'),
+ ('ḛ', 'ḛ'),
+ ('ḝ', 'ḝ'),
+ ('ḟ', 'ḟ'),
+ ('ḥ', 'ḥ'),
+ ('á¸Ŗ', 'á¸Ŗ'),
+ ('á¸Ĩ', 'á¸Ĩ'),
+ ('ḧ', 'ḧ'),
+ ('Ḋ', 'Ḋ'),
+ ('á¸Ģ', 'á¸Ģ'),
+ ('ḭ', 'ḭ'),
+ ('ḯ', 'ḯ'),
+ ('ḹ', 'ḹ'),
+ ('á¸ŗ', 'á¸ŗ'),
+ ('á¸ĩ', 'á¸ĩ'),
+ ('ḡ', 'ḡ'),
+ ('Ḛ', 'Ḛ'),
+ ('á¸ģ', 'á¸ģ'),
+ ('á¸Ŋ', 'á¸Ŋ'),
+ ('á¸ŋ', 'á¸ŋ'),
+ ('ᚁ', 'ᚁ'),
+ ('ᚃ', 'ᚃ'),
+ ('ṅ', 'ṅ'),
+ ('ṇ', 'ṇ'),
+ ('ṉ', 'ṉ'),
+ ('ṋ', 'ṋ'),
+ ('ᚍ', 'ᚍ'),
+ ('ᚏ', 'ᚏ'),
+ ('ṑ', 'ṑ'),
+ ('ṓ', 'ṓ'),
+ ('ṕ', 'ṕ'),
+ ('ṗ', 'ṗ'),
+ ('ṙ', 'ṙ'),
+ ('ṛ', 'ṛ'),
+ ('᚝', '᚝'),
+ ('ṟ', 'ṟ'),
+ ('ᚥ', 'ᚥ'),
+ ('ášŖ', 'ášŖ'),
+ ('ášĨ', 'ášĨ'),
+ ('ᚧ', 'ᚧ'),
+ ('ᚊ', 'ᚊ'),
+ ('ášĢ', 'ášĢ'),
+ ('áš­', 'áš­'),
+ ('ᚯ', 'ᚯ'),
+ ('ášą', 'ášą'),
+ ('ášŗ', 'ášŗ'),
+ ('ášĩ', 'ášĩ'),
+ ('ᚡ', 'ᚡ'),
+ ('ášš', 'ášš'),
+ ('ášģ', 'ášģ'),
+ ('ášŊ', 'ášŊ'),
+ ('ášŋ', 'ášŋ'),
+ ('áē', 'áē'),
+ ('áēƒ', 'áēƒ'),
+ ('áē…', 'áē…'),
+ ('áē‡', 'áē‡'),
+ ('áē‰', 'áē‰'),
+ ('áē‹', 'áē‹'),
+ ('áē', 'áē'),
+ ('áē', 'áē'),
+ ('áē‘', 'áē‘'),
+ ('áē“', 'áē“'),
+ ('áē•', 'áē›'),
+ ('áēĄ', 'áēĄ'),
+ ('áēŖ', 'áēŖ'),
+ ('áēĨ', 'áēĨ'),
+ ('áē§', 'áē§'),
+ ('áēŠ', 'áēŠ'),
+ ('áēĢ', 'áēĢ'),
+ ('áē­', 'áē­'),
+ ('áē¯', 'áē¯'),
+ ('áēą', 'áēą'),
+ ('áēŗ', 'áēŗ'),
+ ('áēĩ', 'áēĩ'),
+ ('áēˇ', 'áēˇ'),
+ ('áēš', 'áēš'),
+ ('áēģ', 'áēģ'),
+ ('áēŊ', 'áēŊ'),
+ ('áēŋ', 'áēŋ'),
+ ('áģ', 'áģ'),
+ ('áģƒ', 'áģƒ'),
+ ('áģ…', 'áģ…'),
+ ('áģ‡', 'áģ‡'),
+ ('áģ‰', 'áģ‰'),
+ ('áģ‹', 'áģ‹'),
+ ('áģ', 'áģ'),
+ ('áģ', 'áģ'),
+ ('áģ‘', 'áģ‘'),
+ ('áģ“', 'áģ“'),
+ ('áģ•', 'áģ•'),
+ ('áģ—', 'áģ—'),
+ ('áģ™', 'áģ™'),
+ ('áģ›', 'áģ›'),
+ ('áģ', 'áģ'),
+ ('áģŸ', 'áģŸ'),
+ ('áģĄ', 'áģĄ'),
+ ('áģŖ', 'áģŖ'),
+ ('áģĨ', 'áģĨ'),
+ ('áģ§', 'áģ§'),
+ ('áģŠ', 'áģŠ'),
+ ('áģĢ', 'áģĢ'),
+ ('áģ­', 'áģ­'),
+ ('áģ¯', 'áģ¯'),
+ ('áģą', 'áģą'),
+ ('áģŗ', 'áģŗ'),
+ ('áģĩ', 'áģĩ'),
+ ('áģˇ', 'áģˇ'),
+ ('áģš', 'áģš'),
+ ('áģģ', 'áģģ'),
+ ('áģŊ', 'áģŊ'),
+ ('áģŋ', 'áŧ‡'),
+ ('áŧ', 'áŧ•'),
+ ('áŧ ', 'áŧ§'),
+ ('áŧ°', 'áŧˇ'),
+ ('áŊ€', 'áŊ…'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ ', 'áŊ§'),
+ ('áŊ°', 'áŊŊ'),
+ ('ᾀ', 'ᾇ'),
+ ('ᾐ', 'ᾗ'),
+ ('ហ', 'ឧ'),
+ ('áž°', 'áž´'),
+ ('ážļ', 'ឡ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋ‡'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ—'),
+ ('áŋ ', 'áŋ§'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋˇ'),
+ ('ⅎ', 'ⅎ'),
+ ('ⅰ', 'â…ŋ'),
+ ('ↄ', 'ↄ'),
+ ('ⓐ', 'ⓩ'),
+ ('ⰰ', 'ⱞ'),
+ ('⹥', '⹥'),
+ ('âąĨ', 'âąĻ'),
+ ('⹨', '⹨'),
+ ('âąĒ', 'âąĒ'),
+ ('âąŦ', 'âąŦ'),
+ ('âąŗ', 'âąŗ'),
+ ('âąļ', 'âąļ'),
+ ('ⲁ', 'ⲁ'),
+ ('ⲃ', 'ⲃ'),
+ ('ⲅ', 'ⲅ'),
+ ('ⲇ', 'ⲇ'),
+ ('ⲉ', 'ⲉ'),
+ ('ⲋ', 'ⲋ'),
+ ('ⲍ', 'ⲍ'),
+ ('ⲏ', 'ⲏ'),
+ ('ⲑ', 'ⲑ'),
+ ('ⲓ', 'ⲓ'),
+ ('ⲕ', 'ⲕ'),
+ ('ⲗ', 'ⲗ'),
+ ('ⲙ', 'ⲙ'),
+ ('ⲛ', 'ⲛ'),
+ ('ⲝ', 'ⲝ'),
+ ('ⲟ', 'ⲟ'),
+ ('ⲥ', 'ⲥ'),
+ ('â˛Ŗ', 'â˛Ŗ'),
+ ('â˛Ĩ', 'â˛Ĩ'),
+ ('ⲧ', 'ⲧ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('â˛Ģ', 'â˛Ģ'),
+ ('ⲭ', 'ⲭ'),
+ ('â˛¯', 'â˛¯'),
+ ('ⲹ', 'ⲹ'),
+ ('â˛ŗ', 'â˛ŗ'),
+ ('â˛ĩ', 'â˛ĩ'),
+ ('ⲡ', 'ⲡ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('â˛ģ', 'â˛ģ'),
+ ('â˛Ŋ', 'â˛Ŋ'),
+ ('â˛ŋ', 'â˛ŋ'),
+ ('âŗ', 'âŗ'),
+ ('âŗƒ', 'âŗƒ'),
+ ('âŗ…', 'âŗ…'),
+ ('âŗ‡', 'âŗ‡'),
+ ('âŗ‰', 'âŗ‰'),
+ ('âŗ‹', 'âŗ‹'),
+ ('âŗ', 'âŗ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ‘', 'âŗ‘'),
+ ('âŗ“', 'âŗ“'),
+ ('âŗ•', 'âŗ•'),
+ ('âŗ—', 'âŗ—'),
+ ('âŗ™', 'âŗ™'),
+ ('âŗ›', 'âŗ›'),
+ ('âŗ', 'âŗ'),
+ ('âŗŸ', 'âŗŸ'),
+ ('âŗĄ', 'âŗĄ'),
+ ('âŗŖ', 'âŗŖ'),
+ ('âŗŦ', 'âŗŦ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗŗ', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('ꙁ', 'ꙁ'),
+ ('ꙃ', 'ꙃ'),
+ ('ꙅ', 'ꙅ'),
+ ('ꙇ', 'ꙇ'),
+ ('ꙉ', 'ꙉ'),
+ ('ꙋ', 'ꙋ'),
+ ('ꙍ', 'ꙍ'),
+ ('ꙏ', 'ꙏ'),
+ ('ꙑ', 'ꙑ'),
+ ('ꙓ', 'ꙓ'),
+ ('ꙕ', 'ꙕ'),
+ ('ꙗ', 'ꙗ'),
+ ('ꙙ', 'ꙙ'),
+ ('ꙛ', 'ꙛ'),
+ ('ꙝ', 'ꙝ'),
+ ('ꙟ', 'ꙟ'),
+ ('ꙡ', 'ꙡ'),
+ ('ę™Ŗ', 'ę™Ŗ'),
+ ('ę™Ĩ', 'ę™Ĩ'),
+ ('ꙧ', 'ꙧ'),
+ ('ꙩ', 'ꙩ'),
+ ('ę™Ģ', 'ę™Ģ'),
+ ('ꙭ', 'ꙭ'),
+ ('ꚁ', 'ꚁ'),
+ ('ꚃ', 'ꚃ'),
+ ('ꚅ', 'ꚅ'),
+ ('ꚇ', 'ꚇ'),
+ ('ꚉ', 'ꚉ'),
+ ('ꚋ', 'ꚋ'),
+ ('ꚍ', 'ꚍ'),
+ ('ꚏ', 'ꚏ'),
+ ('ꚑ', 'ꚑ'),
+ ('ꚓ', 'ꚓ'),
+ ('ꚕ', 'ꚕ'),
+ ('ꚗ', 'ꚗ'),
+ ('ꚙ', 'ꚙ'),
+ ('ꚛ', 'ꚛ'),
+ ('ęœŖ', 'ęœŖ'),
+ ('ęœĨ', 'ęœĨ'),
+ ('ꜧ', 'ꜧ'),
+ ('ꜩ', 'ꜩ'),
+ ('ęœĢ', 'ęœĢ'),
+ ('ꜭ', 'ꜭ'),
+ ('ęœ¯', 'ęœ¯'),
+ ('ęœŗ', 'ęœŗ'),
+ ('ęœĩ', 'ęœĩ'),
+ ('ꜷ', 'ꜷ'),
+ ('ꜹ', 'ꜹ'),
+ ('ęœģ', 'ęœģ'),
+ ('ęœŊ', 'ęœŊ'),
+ ('ęœŋ', 'ęœŋ'),
+ ('ꝁ', 'ꝁ'),
+ ('ꝃ', 'ꝃ'),
+ ('ꝅ', 'ꝅ'),
+ ('ꝇ', 'ꝇ'),
+ ('ꝉ', 'ꝉ'),
+ ('ꝋ', 'ꝋ'),
+ ('ꝍ', 'ꝍ'),
+ ('ꝏ', 'ꝏ'),
+ ('ꝑ', 'ꝑ'),
+ ('ꝓ', 'ꝓ'),
+ ('ꝕ', 'ꝕ'),
+ ('ꝗ', 'ꝗ'),
+ ('ꝙ', 'ꝙ'),
+ ('ꝛ', 'ꝛ'),
+ ('ꝝ', 'ꝝ'),
+ ('ꝟ', 'ꝟ'),
+ ('ꝡ', 'ꝡ'),
+ ('ęŖ', 'ęŖ'),
+ ('ęĨ', 'ęĨ'),
+ ('ꝧ', 'ꝧ'),
+ ('ꝩ', 'ꝩ'),
+ ('ęĢ', 'ęĢ'),
+ ('ꝭ', 'ꝭ'),
+ ('ę¯', 'ę¯'),
+ ('ęē', 'ęē'),
+ ('ęŧ', 'ęŧ'),
+ ('ęŋ', 'ęŋ'),
+ ('ꞁ', 'ꞁ'),
+ ('ꞃ', 'ꞃ'),
+ ('ꞅ', 'ꞅ'),
+ ('ꞇ', 'ꞇ'),
+ ('ꞌ', 'ꞌ'),
+ ('ꞑ', 'ꞑ'),
+ ('ꞓ', 'ꞔ'),
+ ('ꞗ', 'ꞗ'),
+ ('ꞙ', 'ꞙ'),
+ ('ꞛ', 'ꞛ'),
+ ('ꞝ', 'ꞝ'),
+ ('ꞟ', 'ꞟ'),
+ ('ꞡ', 'ꞡ'),
+ ('ęžŖ', 'ęžŖ'),
+ ('ęžĨ', 'ęžĨ'),
+ ('ꞧ', 'ꞧ'),
+ ('ꞩ', 'ꞩ'),
+ ('ęžĩ', 'ęžĩ'),
+ ('ꞷ', 'ꞷ'),
+ ('ꞹ', 'ꞹ'),
+ ('ęžģ', 'ęžģ'),
+ ('ęžŊ', 'ęžŊ'),
+ ('ęžŋ', 'ęžŋ'),
+ ('ꟃ', 'ꟃ'),
+ ('\u{a7c8}', '\u{a7c8}'),
+ ('\u{a7ca}', '\u{a7ca}'),
+ ('\u{a7f6}', '\u{a7f6}'),
+ ('ꭓ', 'ꭓ'),
+ ('ę­°', 'ęŽŋ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŊ', 'īŊš'),
+ ('𐐨', '𐑏'),
+ ('𐓘', 'đ“ģ'),
+ ('đŗ€', 'đŗ˛'),
+ ('đ‘Ŗ€', 'đ‘ŖŸ'),
+ ('𖹠', 'đ–šŋ'),
+ ('đž¤ĸ', 'đžĨƒ'),
+];
+
+pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[
+ ('a', 'z'),
+ ('Âĩ', 'Âĩ'),
+ ('ß', 'Ãļ'),
+ ('ø', 'Ãŋ'),
+ ('ā', 'ā'),
+ ('ă', 'ă'),
+ ('ą', 'ą'),
+ ('ć', 'ć'),
+ ('ĉ', 'ĉ'),
+ ('ċ', 'ċ'),
+ ('č', 'č'),
+ ('ď', 'ď'),
+ ('đ', 'đ'),
+ ('ē', 'ē'),
+ ('ĕ', 'ĕ'),
+ ('ė', 'ė'),
+ ('ę', 'ę'),
+ ('ě', 'ě'),
+ ('ĝ', 'ĝ'),
+ ('ğ', 'ğ'),
+ ('ÄĄ', 'ÄĄ'),
+ ('ÄŖ', 'ÄŖ'),
+ ('ÄĨ', 'ÄĨ'),
+ ('ħ', 'ħ'),
+ ('ÄŠ', 'ÄŠ'),
+ ('ÄĢ', 'ÄĢ'),
+ ('Ä­', 'Ä­'),
+ ('į', 'į'),
+ ('Äą', 'Äą'),
+ ('Äŗ', 'Äŗ'),
+ ('Äĩ', 'Äĩ'),
+ ('ġ', 'ġ'),
+ ('Äē', 'Äē'),
+ ('Äŧ', 'Äŧ'),
+ ('Äž', 'Äž'),
+ ('ŀ', 'ŀ'),
+ ('ł', 'ł'),
+ ('ń', 'ń'),
+ ('ņ', 'ņ'),
+ ('ň', 'ʼn'),
+ ('ŋ', 'ŋ'),
+ ('ō', 'ō'),
+ ('ŏ', 'ŏ'),
+ ('ő', 'ő'),
+ ('œ', 'œ'),
+ ('ŕ', 'ŕ'),
+ ('ŗ', 'ŗ'),
+ ('ř', 'ř'),
+ ('ś', 'ś'),
+ ('ŝ', 'ŝ'),
+ ('ş', 'ş'),
+ ('ÅĄ', 'ÅĄ'),
+ ('ÅŖ', 'ÅŖ'),
+ ('ÅĨ', 'ÅĨ'),
+ ('ŧ', 'ŧ'),
+ ('ÅŠ', 'ÅŠ'),
+ ('ÅĢ', 'ÅĢ'),
+ ('Å­', 'Å­'),
+ ('ů', 'ů'),
+ ('Åą', 'Åą'),
+ ('Åŗ', 'Åŗ'),
+ ('Åĩ', 'Åĩ'),
+ ('Åˇ', 'Åˇ'),
+ ('Åē', 'Åē'),
+ ('Åŧ', 'Åŧ'),
+ ('Åž', 'ƀ'),
+ ('ƃ', 'ƃ'),
+ ('ƅ', 'ƅ'),
+ ('ƈ', 'ƈ'),
+ ('ƌ', 'ƌ'),
+ ('ƒ', 'ƒ'),
+ ('ƕ', 'ƕ'),
+ ('ƙ', 'ƚ'),
+ ('ƞ', 'ƞ'),
+ ('ÆĄ', 'ÆĄ'),
+ ('ÆŖ', 'ÆŖ'),
+ ('ÆĨ', 'ÆĨ'),
+ ('ƨ', 'ƨ'),
+ ('Æ­', 'Æ­'),
+ ('Æ°', 'Æ°'),
+ ('Æ´', 'Æ´'),
+ ('Æļ', 'Æļ'),
+ ('Æš', 'Æš'),
+ ('ÆŊ', 'ÆŊ'),
+ ('Æŋ', 'Æŋ'),
+ ('Į…', 'Į†'),
+ ('Įˆ', 'Į‰'),
+ ('Į‹', 'ĮŒ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Į', 'Į'),
+ ('Į’', 'Į’'),
+ ('Į”', 'Į”'),
+ ('Į–', 'Į–'),
+ ('Į˜', 'Į˜'),
+ ('Įš', 'Įš'),
+ ('Įœ', 'Į'),
+ ('ĮŸ', 'ĮŸ'),
+ ('ĮĄ', 'ĮĄ'),
+ ('ĮŖ', 'ĮŖ'),
+ ('ĮĨ', 'ĮĨ'),
+ ('Į§', 'Į§'),
+ ('ĮŠ', 'ĮŠ'),
+ ('ĮĢ', 'ĮĢ'),
+ ('Į­', 'Į­'),
+ ('Į¯', 'Į°'),
+ ('Į˛', 'Įŗ'),
+ ('Įĩ', 'Įĩ'),
+ ('Įš', 'Įš'),
+ ('Įģ', 'Įģ'),
+ ('ĮŊ', 'ĮŊ'),
+ ('Įŋ', 'Įŋ'),
+ ('ȁ', 'ȁ'),
+ ('ȃ', 'ȃ'),
+ ('ȅ', 'ȅ'),
+ ('ȇ', 'ȇ'),
+ ('ȉ', 'ȉ'),
+ ('ȋ', 'ȋ'),
+ ('ȍ', 'ȍ'),
+ ('ȏ', 'ȏ'),
+ ('ȑ', 'ȑ'),
+ ('ȓ', 'ȓ'),
+ ('ȕ', 'ȕ'),
+ ('ȗ', 'ȗ'),
+ ('ș', 'ș'),
+ ('ț', 'ț'),
+ ('ȝ', 'ȝ'),
+ ('ȟ', 'ȟ'),
+ ('ČŖ', 'ČŖ'),
+ ('ČĨ', 'ČĨ'),
+ ('ȧ', 'ȧ'),
+ ('ČŠ', 'ČŠ'),
+ ('ČĢ', 'ČĢ'),
+ ('Č­', 'Č­'),
+ ('Č¯', 'Č¯'),
+ ('Čą', 'Čą'),
+ ('Čŗ', 'Čŗ'),
+ ('Čŧ', 'Čŧ'),
+ ('Čŋ', 'ɀ'),
+ ('ɂ', 'ɂ'),
+ ('ɇ', 'ɇ'),
+ ('ɉ', 'ɉ'),
+ ('ɋ', 'ɋ'),
+ ('ɍ', 'ɍ'),
+ ('ɏ', 'ɔ'),
+ ('ɖ', 'ɗ'),
+ ('ə', 'ə'),
+ ('ɛ', 'ɜ'),
+ ('É ', 'ÉĄ'),
+ ('ÉŖ', 'ÉŖ'),
+ ('ÉĨ', 'ÉĻ'),
+ ('ɨ', 'ÉŦ'),
+ ('ɯ', 'ɯ'),
+ ('ɹ', 'ɲ'),
+ ('Éĩ', 'Éĩ'),
+ ('ÉŊ', 'ÉŊ'),
+ ('ʀ', 'ʀ'),
+ ('ʂ', 'ʃ'),
+ ('ʇ', 'ʌ'),
+ ('ʒ', 'ʒ'),
+ ('ʝ', 'ʞ'),
+ ('\u{345}', '\u{345}'),
+ ('Íą', 'Íą'),
+ ('Íŗ', 'Íŗ'),
+ ('͡', '͡'),
+ ('Íģ', 'ÍŊ'),
+ ('ΐ', 'ΐ'),
+ ('ÎŦ', 'ĪŽ'),
+ ('Ī', 'Ī‘'),
+ ('Ī•', 'Ī—'),
+ ('Ī™', 'Ī™'),
+ ('Ī›', 'Ī›'),
+ ('Ī', 'Ī'),
+ ('ĪŸ', 'ĪŸ'),
+ ('ĪĄ', 'ĪĄ'),
+ ('ĪŖ', 'ĪŖ'),
+ ('ĪĨ', 'ĪĨ'),
+ ('Ī§', 'Ī§'),
+ ('ĪŠ', 'ĪŠ'),
+ ('ĪĢ', 'ĪĢ'),
+ ('Ī­', 'Ī­'),
+ ('Ī¯', 'Īŗ'),
+ ('Īĩ', 'Īĩ'),
+ ('Ī¸', 'Ī¸'),
+ ('Īģ', 'Īģ'),
+ ('Đ°', 'ŅŸ'),
+ ('ŅĄ', 'ŅĄ'),
+ ('ŅŖ', 'ŅŖ'),
+ ('ŅĨ', 'ŅĨ'),
+ ('Ņ§', 'Ņ§'),
+ ('ŅŠ', 'ŅŠ'),
+ ('ŅĢ', 'ŅĢ'),
+ ('Ņ­', 'Ņ­'),
+ ('Ņ¯', 'Ņ¯'),
+ ('Ņą', 'Ņą'),
+ ('Ņŗ', 'Ņŗ'),
+ ('Ņĩ', 'Ņĩ'),
+ ('Ņˇ', 'Ņˇ'),
+ ('Ņš', 'Ņš'),
+ ('Ņģ', 'Ņģ'),
+ ('ŅŊ', 'ŅŊ'),
+ ('Ņŋ', 'Ņŋ'),
+ ('Ō', 'Ō'),
+ ('Ō‹', 'Ō‹'),
+ ('Ō', 'Ō'),
+ ('Ō', 'Ō'),
+ ('Ō‘', 'Ō‘'),
+ ('Ō“', 'Ō“'),
+ ('Ō•', 'Ō•'),
+ ('Ō—', 'Ō—'),
+ ('Ō™', 'Ō™'),
+ ('Ō›', 'Ō›'),
+ ('Ō', 'Ō'),
+ ('ŌŸ', 'ŌŸ'),
+ ('ŌĄ', 'ŌĄ'),
+ ('ŌŖ', 'ŌŖ'),
+ ('ŌĨ', 'ŌĨ'),
+ ('Ō§', 'Ō§'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌĢ', 'ŌĢ'),
+ ('Ō­', 'Ō­'),
+ ('Ō¯', 'Ō¯'),
+ ('Ōą', 'Ōą'),
+ ('Ōŗ', 'Ōŗ'),
+ ('Ōĩ', 'Ōĩ'),
+ ('Ōˇ', 'Ōˇ'),
+ ('Ōš', 'Ōš'),
+ ('Ōģ', 'Ōģ'),
+ ('ŌŊ', 'ŌŊ'),
+ ('Ōŋ', 'Ōŋ'),
+ ('Ķ‚', 'Ķ‚'),
+ ('Ķ„', 'Ķ„'),
+ ('Ķ†', 'Ķ†'),
+ ('Ķˆ', 'Ķˆ'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶŒ', 'ĶŒ'),
+ ('ĶŽ', 'Ķ'),
+ ('Ķ‘', 'Ķ‘'),
+ ('Ķ“', 'Ķ“'),
+ ('Ķ•', 'Ķ•'),
+ ('Ķ—', 'Ķ—'),
+ ('Ķ™', 'Ķ™'),
+ ('Ķ›', 'Ķ›'),
+ ('Ķ', 'Ķ'),
+ ('ĶŸ', 'ĶŸ'),
+ ('ĶĄ', 'ĶĄ'),
+ ('ĶŖ', 'ĶŖ'),
+ ('ĶĨ', 'ĶĨ'),
+ ('Ķ§', 'Ķ§'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶĢ', 'ĶĢ'),
+ ('Ķ­', 'Ķ­'),
+ ('Ķ¯', 'Ķ¯'),
+ ('Ķą', 'Ķą'),
+ ('Ķŗ', 'Ķŗ'),
+ ('Ķĩ', 'Ķĩ'),
+ ('Ķˇ', 'Ķˇ'),
+ ('Ķš', 'Ķš'),
+ ('Ķģ', 'Ķģ'),
+ ('ĶŊ', 'ĶŊ'),
+ ('Ķŋ', 'Ķŋ'),
+ ('ԁ', 'ԁ'),
+ ('ԃ', 'ԃ'),
+ ('ԅ', 'ԅ'),
+ ('ԇ', 'ԇ'),
+ ('ԉ', 'ԉ'),
+ ('ԋ', 'ԋ'),
+ ('ԍ', 'ԍ'),
+ ('ԏ', 'ԏ'),
+ ('ԑ', 'ԑ'),
+ ('ԓ', 'ԓ'),
+ ('ԕ', 'ԕ'),
+ ('ԗ', 'ԗ'),
+ ('ԙ', 'ԙ'),
+ ('ԛ', 'ԛ'),
+ ('ԝ', 'ԝ'),
+ ('ԟ', 'ԟ'),
+ ('ÔĄ', 'ÔĄ'),
+ ('ÔŖ', 'ÔŖ'),
+ ('ÔĨ', 'ÔĨ'),
+ ('Ô§', 'Ô§'),
+ ('ÔŠ', 'ÔŠ'),
+ ('ÔĢ', 'ÔĢ'),
+ ('Ô­', 'Ô­'),
+ ('Ô¯', 'Ô¯'),
+ ('ÕĄ', 'և'),
+ ('ა', 'áƒē'),
+ ('áƒŊ', 'áƒŋ'),
+ ('ᏸ', 'áŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('áĩš', 'áĩš'),
+ ('áĩŊ', 'áĩŊ'),
+ ('áļŽ', 'áļŽ'),
+ ('ḁ', 'ḁ'),
+ ('ḃ', 'ḃ'),
+ ('ḅ', 'ḅ'),
+ ('ḇ', 'ḇ'),
+ ('ḉ', 'ḉ'),
+ ('ḋ', 'ḋ'),
+ ('ḍ', 'ḍ'),
+ ('ḏ', 'ḏ'),
+ ('ḑ', 'ḑ'),
+ ('ḓ', 'ḓ'),
+ ('ḕ', 'ḕ'),
+ ('ḗ', 'ḗ'),
+ ('ḙ', 'ḙ'),
+ ('ḛ', 'ḛ'),
+ ('ḝ', 'ḝ'),
+ ('ḟ', 'ḟ'),
+ ('ḥ', 'ḥ'),
+ ('á¸Ŗ', 'á¸Ŗ'),
+ ('á¸Ĩ', 'á¸Ĩ'),
+ ('ḧ', 'ḧ'),
+ ('Ḋ', 'Ḋ'),
+ ('á¸Ģ', 'á¸Ģ'),
+ ('ḭ', 'ḭ'),
+ ('ḯ', 'ḯ'),
+ ('ḹ', 'ḹ'),
+ ('á¸ŗ', 'á¸ŗ'),
+ ('á¸ĩ', 'á¸ĩ'),
+ ('ḡ', 'ḡ'),
+ ('Ḛ', 'Ḛ'),
+ ('á¸ģ', 'á¸ģ'),
+ ('á¸Ŋ', 'á¸Ŋ'),
+ ('á¸ŋ', 'á¸ŋ'),
+ ('ᚁ', 'ᚁ'),
+ ('ᚃ', 'ᚃ'),
+ ('ṅ', 'ṅ'),
+ ('ṇ', 'ṇ'),
+ ('ṉ', 'ṉ'),
+ ('ṋ', 'ṋ'),
+ ('ᚍ', 'ᚍ'),
+ ('ᚏ', 'ᚏ'),
+ ('ṑ', 'ṑ'),
+ ('ṓ', 'ṓ'),
+ ('ṕ', 'ṕ'),
+ ('ṗ', 'ṗ'),
+ ('ṙ', 'ṙ'),
+ ('ṛ', 'ṛ'),
+ ('᚝', '᚝'),
+ ('ṟ', 'ṟ'),
+ ('ᚥ', 'ᚥ'),
+ ('ášŖ', 'ášŖ'),
+ ('ášĨ', 'ášĨ'),
+ ('ᚧ', 'ᚧ'),
+ ('ᚊ', 'ᚊ'),
+ ('ášĢ', 'ášĢ'),
+ ('áš­', 'áš­'),
+ ('ᚯ', 'ᚯ'),
+ ('ášą', 'ášą'),
+ ('ášŗ', 'ášŗ'),
+ ('ášĩ', 'ášĩ'),
+ ('ᚡ', 'ᚡ'),
+ ('ášš', 'ášš'),
+ ('ášģ', 'ášģ'),
+ ('ášŊ', 'ášŊ'),
+ ('ášŋ', 'ášŋ'),
+ ('áē', 'áē'),
+ ('áēƒ', 'áēƒ'),
+ ('áē…', 'áē…'),
+ ('áē‡', 'áē‡'),
+ ('áē‰', 'áē‰'),
+ ('áē‹', 'áē‹'),
+ ('áē', 'áē'),
+ ('áē', 'áē'),
+ ('áē‘', 'áē‘'),
+ ('áē“', 'áē“'),
+ ('áē•', 'áē›'),
+ ('áēĄ', 'áēĄ'),
+ ('áēŖ', 'áēŖ'),
+ ('áēĨ', 'áēĨ'),
+ ('áē§', 'áē§'),
+ ('áēŠ', 'áēŠ'),
+ ('áēĢ', 'áēĢ'),
+ ('áē­', 'áē­'),
+ ('áē¯', 'áē¯'),
+ ('áēą', 'áēą'),
+ ('áēŗ', 'áēŗ'),
+ ('áēĩ', 'áēĩ'),
+ ('áēˇ', 'áēˇ'),
+ ('áēš', 'áēš'),
+ ('áēģ', 'áēģ'),
+ ('áēŊ', 'áēŊ'),
+ ('áēŋ', 'áēŋ'),
+ ('áģ', 'áģ'),
+ ('áģƒ', 'áģƒ'),
+ ('áģ…', 'áģ…'),
+ ('áģ‡', 'áģ‡'),
+ ('áģ‰', 'áģ‰'),
+ ('áģ‹', 'áģ‹'),
+ ('áģ', 'áģ'),
+ ('áģ', 'áģ'),
+ ('áģ‘', 'áģ‘'),
+ ('áģ“', 'áģ“'),
+ ('áģ•', 'áģ•'),
+ ('áģ—', 'áģ—'),
+ ('áģ™', 'áģ™'),
+ ('áģ›', 'áģ›'),
+ ('áģ', 'áģ'),
+ ('áģŸ', 'áģŸ'),
+ ('áģĄ', 'áģĄ'),
+ ('áģŖ', 'áģŖ'),
+ ('áģĨ', 'áģĨ'),
+ ('áģ§', 'áģ§'),
+ ('áģŠ', 'áģŠ'),
+ ('áģĢ', 'áģĢ'),
+ ('áģ­', 'áģ­'),
+ ('áģ¯', 'áģ¯'),
+ ('áģą', 'áģą'),
+ ('áģŗ', 'áģŗ'),
+ ('áģĩ', 'áģĩ'),
+ ('áģˇ', 'áģˇ'),
+ ('áģš', 'áģš'),
+ ('áģģ', 'áģģ'),
+ ('áģŊ', 'áģŊ'),
+ ('áģŋ', 'áŧ‡'),
+ ('áŧ', 'áŧ•'),
+ ('áŧ ', 'áŧ§'),
+ ('áŧ°', 'áŧˇ'),
+ ('áŊ€', 'áŊ…'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ ', 'áŊ§'),
+ ('áŊ°', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ឡ'),
+ ('ážŧ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋ‡'),
+ ('áŋŒ', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ—'),
+ ('áŋ ', 'áŋ§'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋˇ'),
+ ('áŋŧ', 'áŋŧ'),
+ ('ⅎ', 'ⅎ'),
+ ('ⅰ', 'â…ŋ'),
+ ('ↄ', 'ↄ'),
+ ('ⓐ', 'ⓩ'),
+ ('ⰰ', 'ⱞ'),
+ ('⹥', '⹥'),
+ ('âąĨ', 'âąĻ'),
+ ('⹨', '⹨'),
+ ('âąĒ', 'âąĒ'),
+ ('âąŦ', 'âąŦ'),
+ ('âąŗ', 'âąŗ'),
+ ('âąļ', 'âąļ'),
+ ('ⲁ', 'ⲁ'),
+ ('ⲃ', 'ⲃ'),
+ ('ⲅ', 'ⲅ'),
+ ('ⲇ', 'ⲇ'),
+ ('ⲉ', 'ⲉ'),
+ ('ⲋ', 'ⲋ'),
+ ('ⲍ', 'ⲍ'),
+ ('ⲏ', 'ⲏ'),
+ ('ⲑ', 'ⲑ'),
+ ('ⲓ', 'ⲓ'),
+ ('ⲕ', 'ⲕ'),
+ ('ⲗ', 'ⲗ'),
+ ('ⲙ', 'ⲙ'),
+ ('ⲛ', 'ⲛ'),
+ ('ⲝ', 'ⲝ'),
+ ('ⲟ', 'ⲟ'),
+ ('ⲥ', 'ⲥ'),
+ ('â˛Ŗ', 'â˛Ŗ'),
+ ('â˛Ĩ', 'â˛Ĩ'),
+ ('ⲧ', 'ⲧ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('â˛Ģ', 'â˛Ģ'),
+ ('ⲭ', 'ⲭ'),
+ ('â˛¯', 'â˛¯'),
+ ('ⲹ', 'ⲹ'),
+ ('â˛ŗ', 'â˛ŗ'),
+ ('â˛ĩ', 'â˛ĩ'),
+ ('ⲡ', 'ⲡ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('â˛ģ', 'â˛ģ'),
+ ('â˛Ŋ', 'â˛Ŋ'),
+ ('â˛ŋ', 'â˛ŋ'),
+ ('âŗ', 'âŗ'),
+ ('âŗƒ', 'âŗƒ'),
+ ('âŗ…', 'âŗ…'),
+ ('âŗ‡', 'âŗ‡'),
+ ('âŗ‰', 'âŗ‰'),
+ ('âŗ‹', 'âŗ‹'),
+ ('âŗ', 'âŗ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ‘', 'âŗ‘'),
+ ('âŗ“', 'âŗ“'),
+ ('âŗ•', 'âŗ•'),
+ ('âŗ—', 'âŗ—'),
+ ('âŗ™', 'âŗ™'),
+ ('âŗ›', 'âŗ›'),
+ ('âŗ', 'âŗ'),
+ ('âŗŸ', 'âŗŸ'),
+ ('âŗĄ', 'âŗĄ'),
+ ('âŗŖ', 'âŗŖ'),
+ ('âŗŦ', 'âŗŦ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗŗ', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('ꙁ', 'ꙁ'),
+ ('ꙃ', 'ꙃ'),
+ ('ꙅ', 'ꙅ'),
+ ('ꙇ', 'ꙇ'),
+ ('ꙉ', 'ꙉ'),
+ ('ꙋ', 'ꙋ'),
+ ('ꙍ', 'ꙍ'),
+ ('ꙏ', 'ꙏ'),
+ ('ꙑ', 'ꙑ'),
+ ('ꙓ', 'ꙓ'),
+ ('ꙕ', 'ꙕ'),
+ ('ꙗ', 'ꙗ'),
+ ('ꙙ', 'ꙙ'),
+ ('ꙛ', 'ꙛ'),
+ ('ꙝ', 'ꙝ'),
+ ('ꙟ', 'ꙟ'),
+ ('ꙡ', 'ꙡ'),
+ ('ę™Ŗ', 'ę™Ŗ'),
+ ('ę™Ĩ', 'ę™Ĩ'),
+ ('ꙧ', 'ꙧ'),
+ ('ꙩ', 'ꙩ'),
+ ('ę™Ģ', 'ę™Ģ'),
+ ('ꙭ', 'ꙭ'),
+ ('ꚁ', 'ꚁ'),
+ ('ꚃ', 'ꚃ'),
+ ('ꚅ', 'ꚅ'),
+ ('ꚇ', 'ꚇ'),
+ ('ꚉ', 'ꚉ'),
+ ('ꚋ', 'ꚋ'),
+ ('ꚍ', 'ꚍ'),
+ ('ꚏ', 'ꚏ'),
+ ('ꚑ', 'ꚑ'),
+ ('ꚓ', 'ꚓ'),
+ ('ꚕ', 'ꚕ'),
+ ('ꚗ', 'ꚗ'),
+ ('ꚙ', 'ꚙ'),
+ ('ꚛ', 'ꚛ'),
+ ('ęœŖ', 'ęœŖ'),
+ ('ęœĨ', 'ęœĨ'),
+ ('ꜧ', 'ꜧ'),
+ ('ꜩ', 'ꜩ'),
+ ('ęœĢ', 'ęœĢ'),
+ ('ꜭ', 'ꜭ'),
+ ('ęœ¯', 'ęœ¯'),
+ ('ęœŗ', 'ęœŗ'),
+ ('ęœĩ', 'ęœĩ'),
+ ('ꜷ', 'ꜷ'),
+ ('ꜹ', 'ꜹ'),
+ ('ęœģ', 'ęœģ'),
+ ('ęœŊ', 'ęœŊ'),
+ ('ęœŋ', 'ęœŋ'),
+ ('ꝁ', 'ꝁ'),
+ ('ꝃ', 'ꝃ'),
+ ('ꝅ', 'ꝅ'),
+ ('ꝇ', 'ꝇ'),
+ ('ꝉ', 'ꝉ'),
+ ('ꝋ', 'ꝋ'),
+ ('ꝍ', 'ꝍ'),
+ ('ꝏ', 'ꝏ'),
+ ('ꝑ', 'ꝑ'),
+ ('ꝓ', 'ꝓ'),
+ ('ꝕ', 'ꝕ'),
+ ('ꝗ', 'ꝗ'),
+ ('ꝙ', 'ꝙ'),
+ ('ꝛ', 'ꝛ'),
+ ('ꝝ', 'ꝝ'),
+ ('ꝟ', 'ꝟ'),
+ ('ꝡ', 'ꝡ'),
+ ('ęŖ', 'ęŖ'),
+ ('ęĨ', 'ęĨ'),
+ ('ꝧ', 'ꝧ'),
+ ('ꝩ', 'ꝩ'),
+ ('ęĢ', 'ęĢ'),
+ ('ꝭ', 'ꝭ'),
+ ('ę¯', 'ę¯'),
+ ('ęē', 'ęē'),
+ ('ęŧ', 'ęŧ'),
+ ('ęŋ', 'ęŋ'),
+ ('ꞁ', 'ꞁ'),
+ ('ꞃ', 'ꞃ'),
+ ('ꞅ', 'ꞅ'),
+ ('ꞇ', 'ꞇ'),
+ ('ꞌ', 'ꞌ'),
+ ('ꞑ', 'ꞑ'),
+ ('ꞓ', 'ꞔ'),
+ ('ꞗ', 'ꞗ'),
+ ('ꞙ', 'ꞙ'),
+ ('ꞛ', 'ꞛ'),
+ ('ꞝ', 'ꞝ'),
+ ('ꞟ', 'ꞟ'),
+ ('ꞡ', 'ꞡ'),
+ ('ęžŖ', 'ęžŖ'),
+ ('ęžĨ', 'ęžĨ'),
+ ('ꞧ', 'ꞧ'),
+ ('ꞩ', 'ꞩ'),
+ ('ęžĩ', 'ęžĩ'),
+ ('ꞷ', 'ꞷ'),
+ ('ꞹ', 'ꞹ'),
+ ('ęžģ', 'ęžģ'),
+ ('ęžŊ', 'ęžŊ'),
+ ('ęžŋ', 'ęžŋ'),
+ ('ꟃ', 'ꟃ'),
+ ('\u{a7c8}', '\u{a7c8}'),
+ ('\u{a7ca}', '\u{a7ca}'),
+ ('\u{a7f6}', '\u{a7f6}'),
+ ('ꭓ', 'ꭓ'),
+ ('ę­°', 'ęŽŋ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŊ', 'īŊš'),
+ ('𐐨', '𐑏'),
+ ('𐓘', 'đ“ģ'),
+ ('đŗ€', 'đŗ˛'),
+ ('đ‘Ŗ€', 'đ‘ŖŸ'),
+ ('𖹠', 'đ–šŋ'),
+ ('đž¤ĸ', 'đžĨƒ'),
+];
+
+pub const DASH: &'static [(char, char)] = &[
+ ('-', '-'),
+ ('֊', '֊'),
+ ('Öž', 'Öž'),
+ ('᐀', '᐀'),
+ ('᠆', '᠆'),
+ ('‐', '―'),
+ ('⁓', '⁓'),
+ ('âģ', 'âģ'),
+ ('₋', '₋'),
+ ('−', '−'),
+ ('⸗', '⸗'),
+ ('⸚', '⸚'),
+ ('â¸ē', 'â¸ģ'),
+ ('⹀', '⹀'),
+ ('〜', '〜'),
+ ('〰', '〰'),
+ ('゠', '゠'),
+ ('ī¸ą', 'ī¸˛'),
+ ('īš˜', 'īš˜'),
+ ('īšŖ', 'īšŖ'),
+ ('īŧ', 'īŧ'),
+ ('\u{10ead}', '\u{10ead}'),
+];
+
+pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
+ ('\u{ad}', '\u{ad}'),
+ ('\u{34f}', '\u{34f}'),
+ ('\u{61c}', '\u{61c}'),
+ ('ᅟ', 'ᅠ'),
+ ('\u{17b4}', '\u{17b5}'),
+ ('\u{180b}', '\u{180e}'),
+ ('\u{200b}', '\u{200f}'),
+ ('\u{202a}', '\u{202e}'),
+ ('\u{2060}', '\u{206f}'),
+ ('ㅤ', 'ㅤ'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{feff}', '\u{feff}'),
+ ('īž ', 'īž '),
+ ('\u{fff0}', '\u{fff8}'),
+ ('\u{1bca0}', '\u{1bca3}'),
+ ('\u{1d173}', '\u{1d17a}'),
+ ('\u{e0000}', '\u{e0fff}'),
+];
+
+pub const DEPRECATED: &'static [(char, char)] = &[
+ ('ʼn', 'ʼn'),
+ ('Ųŗ', 'Ųŗ'),
+ ('\u{f77}', '\u{f77}'),
+ ('\u{f79}', '\u{f79}'),
+ ('ážŖ', 'ឤ'),
+ ('\u{206a}', '\u{206f}'),
+ ('〈', 'âŒĒ'),
+ ('\u{e0001}', '\u{e0001}'),
+];
+
+pub const DIACRITIC: &'static [(char, char)] = &[
+ ('^', '^'),
+ ('`', '`'),
+ ('¨', '¨'),
+ ('¯', '¯'),
+ ('´', '´'),
+ ('¡', '¸'),
+ ('Ę°', '\u{34e}'),
+ ('\u{350}', '\u{357}'),
+ ('\u{35d}', '\u{362}'),
+ ('Í´', 'Íĩ'),
+ ('Íē', 'Íē'),
+ ('΄', '΅'),
+ ('\u{483}', '\u{487}'),
+ ('ՙ', 'ՙ'),
+ ('\u{591}', '\u{5a1}'),
+ ('\u{5a3}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c4}'),
+ ('\u{64b}', '\u{652}'),
+ ('\u{657}', '\u{658}'),
+ ('\u{6df}', '\u{6e0}'),
+ ('ÛĨ', 'ÛĻ'),
+ ('\u{6ea}', '\u{6ec}'),
+ ('\u{730}', '\u{74a}'),
+ ('\u{7a6}', '\u{7b0}'),
+ ('\u{7eb}', 'ßĩ'),
+ ('\u{818}', '\u{819}'),
+ ('\u{8e3}', '\u{8fe}'),
+ ('\u{93c}', '\u{93c}'),
+ ('\u{94d}', '\u{94d}'),
+ ('\u{951}', '\u{954}'),
+ ('āĨą', 'āĨą'),
+ ('\u{9bc}', '\u{9bc}'),
+ ('\u{9cd}', '\u{9cd}'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('\u{a4d}', '\u{a4d}'),
+ ('\u{abc}', '\u{abc}'),
+ ('\u{acd}', '\u{acd}'),
+ ('\u{afd}', '\u{aff}'),
+ ('\u{b3c}', '\u{b3c}'),
+ ('\u{b4d}', '\u{b4d}'),
+ ('\u{b55}', '\u{b55}'),
+ ('\u{bcd}', '\u{bcd}'),
+ ('\u{c4d}', '\u{c4d}'),
+ ('\u{cbc}', '\u{cbc}'),
+ ('\u{ccd}', '\u{ccd}'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('\u{d4d}', '\u{d4d}'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{e47}', '\u{e4c}'),
+ ('\u{e4e}', '\u{e4e}'),
+ ('\u{eba}', '\u{eba}'),
+ ('\u{ec8}', '\u{ecc}'),
+ ('\u{f18}', '\u{f19}'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('āŧž', 'āŧŋ'),
+ ('\u{f82}', '\u{f84}'),
+ ('\u{f86}', '\u{f87}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('\u{1037}', '\u{1037}'),
+ ('\u{1039}', '\u{103a}'),
+ ('áŖ', 'ၤ'),
+ ('၊', 'ၭ'),
+ ('ႇ', '\u{108d}'),
+ ('ႏ', 'ႏ'),
+ ('ႚ', 'ႛ'),
+ ('\u{135d}', '\u{135f}'),
+ ('\u{17c9}', '\u{17d3}'),
+ ('\u{17dd}', '\u{17dd}'),
+ ('\u{1939}', '\u{193b}'),
+ ('\u{1a75}', '\u{1a7c}'),
+ ('\u{1a7f}', '\u{1a7f}'),
+ ('\u{1ab0}', '\u{1abd}'),
+ ('\u{1b34}', '\u{1b34}'),
+ ('᭄', '᭄'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('áŽĒ', '\u{1bab}'),
+ ('\u{1c36}', '\u{1c37}'),
+ ('Ṹ', 'áąŊ'),
+ ('\u{1cd0}', '\u{1ce8}'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('áŗˇ', '\u{1cf9}'),
+ ('á´Ŧ', 'áĩĒ'),
+ ('\u{1dc4}', '\u{1dcf}'),
+ ('\u{1df5}', '\u{1df9}'),
+ ('\u{1dfd}', '\u{1dff}'),
+ ('ážŊ', 'ážŊ'),
+ ('ážŋ', 'áŋ'),
+ ('áŋ', 'áŋ'),
+ ('áŋ', 'áŋŸ'),
+ ('áŋ­', 'áŋ¯'),
+ ('áŋŊ', 'áŋž'),
+ ('\u{2cef}', '\u{2cf1}'),
+ ('ⸯ', 'ⸯ'),
+ ('\u{302a}', '\u{302f}'),
+ ('\u{3099}', '゜'),
+ ('ãƒŧ', 'ãƒŧ'),
+ ('\u{a66f}', '\u{a66f}'),
+ ('\u{a67c}', '\u{a67d}'),
+ ('ę™ŋ', 'ę™ŋ'),
+ ('ꚜ', 'ꚝ'),
+ ('\u{a6f0}', '\u{a6f1}'),
+ ('꜀', '꜡'),
+ ('ꞈ', '꞊'),
+ ('ꟸ', 'ꟹ'),
+ ('\u{a8c4}', '\u{a8c4}'),
+ ('\u{a8e0}', '\u{a8f1}'),
+ ('\u{a92b}', 'ꤎ'),
+ ('ęĨ“', 'ęĨ“'),
+ ('\u{a9b3}', '\u{a9b3}'),
+ ('꧀', '꧀'),
+ ('\u{a9e5}', '\u{a9e5}'),
+ ('ęŠģ', 'ęŠŊ'),
+ ('\u{aabf}', 'ęĢ‚'),
+ ('\u{aaf6}', '\u{aaf6}'),
+ ('꭛', 'ꭟ'),
+ ('\u{ab69}', '\u{ab6b}'),
+ ('ę¯Ŧ', '\u{abed}'),
+ ('\u{fb1e}', '\u{fb1e}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('īŧž', 'īŧž'),
+ ('īŊ€', 'īŊ€'),
+ ('īŊ°', 'īŊ°'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('īŋŖ', 'īŋŖ'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('\u{10ae5}', '\u{10ae6}'),
+ ('đ´ĸ', '\u{10d27}'),
+ ('\u{10f46}', '\u{10f50}'),
+ ('\u{110b9}', '\u{110ba}'),
+ ('\u{11133}', '\u{11134}'),
+ ('\u{11173}', '\u{11173}'),
+ ('𑇀', '𑇀'),
+ ('\u{111ca}', '\u{111cc}'),
+ ('đ‘ˆĩ', '\u{11236}'),
+ ('\u{112e9}', '\u{112ea}'),
+ ('\u{1133c}', '\u{1133c}'),
+ ('𑍍', '𑍍'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('\u{11442}', '\u{11442}'),
+ ('\u{11446}', '\u{11446}'),
+ ('\u{114c2}', '\u{114c3}'),
+ ('\u{115bf}', '\u{115c0}'),
+ ('\u{1163f}', '\u{1163f}'),
+ ('đ‘šļ', '\u{116b7}'),
+ ('\u{1172b}', '\u{1172b}'),
+ ('\u{11839}', '\u{1183a}'),
+ ('\u{1193d}', '\u{1193e}'),
+ ('\u{11943}', '\u{11943}'),
+ ('\u{119e0}', '\u{119e0}'),
+ ('\u{11a34}', '\u{11a34}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('\u{11a99}', '\u{11a99}'),
+ ('\u{11c3f}', '\u{11c3f}'),
+ ('\u{11d42}', '\u{11d42}'),
+ ('\u{11d44}', '\u{11d45}'),
+ ('\u{11d97}', '\u{11d97}'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('\u{16b30}', '\u{16b36}'),
+ ('\u{16f8f}', '𖾟'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('\u{1d167}', '\u{1d169}'),
+ ('𝅭', '\u{1d172}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1e130}', '\u{1e136}'),
+ ('\u{1e2ec}', '\u{1e2ef}'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('\u{1e944}', '\u{1e946}'),
+ ('\u{1e948}', '\u{1e94a}'),
+];
+
+pub const EMOJI: &'static [(char, char)] = &[
+ ('#', '#'),
+ ('*', '*'),
+ ('0', '9'),
+ ('Š', 'Š'),
+ ('ÂŽ', 'ÂŽ'),
+ ('â€ŧ', 'â€ŧ'),
+ ('⁉', '⁉'),
+ ('â„ĸ', 'â„ĸ'),
+ ('ℹ', 'ℹ'),
+ ('↔', '↙'),
+ ('↩', 'â†Ē'),
+ ('⌚', '⌛'),
+ ('⌨', '⌨'),
+ ('⏏', '⏏'),
+ ('⏊', 'âŗ'),
+ ('⏸', 'âē'),
+ ('Ⓜ', 'Ⓜ'),
+ ('â–Ē', 'â–Ģ'),
+ ('â–ļ', 'â–ļ'),
+ ('◀', '◀'),
+ ('â—ģ', '◾'),
+ ('☀', '☄'),
+ ('☎', '☎'),
+ ('☑', '☑'),
+ ('☔', '☕'),
+ ('☘', '☘'),
+ ('☝', '☝'),
+ ('☠', '☠'),
+ ('â˜ĸ', 'â˜Ŗ'),
+ ('â˜Ļ', 'â˜Ļ'),
+ ('â˜Ē', 'â˜Ē'),
+ ('☎', '☯'),
+ ('☸', 'â˜ē'),
+ ('♀', '♀'),
+ ('♂', '♂'),
+ ('♈', '♓'),
+ ('♟', '♠'),
+ ('â™Ŗ', 'â™Ŗ'),
+ ('â™Ĩ', 'â™Ļ'),
+ ('♨', '♨'),
+ ('â™ģ', 'â™ģ'),
+ ('♾', 'â™ŋ'),
+ ('⚒', '⚗'),
+ ('⚙', '⚙'),
+ ('⚛', '⚜'),
+ ('⚠', '⚡'),
+ ('⚧', '⚧'),
+ ('âšĒ', 'âšĢ'),
+ ('⚰', '⚱'),
+ ('âšŊ', '⚾'),
+ ('⛄', '⛅'),
+ ('⛈', '⛈'),
+ ('⛎', '⛏'),
+ ('⛑', '⛑'),
+ ('⛓', '⛔'),
+ ('⛩', 'â›Ē'),
+ ('⛰', 'â›ĩ'),
+ ('⛷', 'â›ē'),
+ ('â›Ŋ', 'â›Ŋ'),
+ ('✂', '✂'),
+ ('✅', '✅'),
+ ('✈', '✍'),
+ ('✏', '✏'),
+ ('✒', '✒'),
+ ('✔', '✔'),
+ ('✖', '✖'),
+ ('✝', '✝'),
+ ('✡', '✡'),
+ ('✨', '✨'),
+ ('âœŗ', '✴'),
+ ('❄', '❄'),
+ ('❇', '❇'),
+ ('❌', '❌'),
+ ('❎', '❎'),
+ ('❓', '❕'),
+ ('❗', '❗'),
+ ('âŖ', '❤'),
+ ('➕', '➗'),
+ ('➡', '➡'),
+ ('➰', '➰'),
+ ('âžŋ', 'âžŋ'),
+ ('⤴', 'â¤ĩ'),
+ ('âŦ…', 'âŦ‡'),
+ ('âŦ›', 'âŦœ'),
+ ('⭐', '⭐'),
+ ('⭕', '⭕'),
+ ('〰', '〰'),
+ ('ã€Ŋ', 'ã€Ŋ'),
+ ('㊗', '㊗'),
+ ('㊙', '㊙'),
+ ('🀄', '🀄'),
+ ('🃏', '🃏'),
+ ('🅰', '🅱'),
+ ('🅾', 'đŸ…ŋ'),
+ ('🆎', '🆎'),
+ ('🆑', '🆚'),
+ ('đŸ‡Ļ', 'đŸ‡ŋ'),
+ ('🈁', '🈂'),
+ ('🈚', '🈚'),
+ ('đŸˆ¯', 'đŸˆ¯'),
+ ('🈲', 'đŸˆē'),
+ ('🉐', '🉑'),
+ ('🌀', '🌡'),
+ ('🌤', '🎓'),
+ ('🎖', '🎗'),
+ ('🎙', '🎛'),
+ ('🎞', '🏰'),
+ ('đŸŗ', 'đŸĩ'),
+ ('🏷', 'đŸ“Ŋ'),
+ ('đŸ“ŋ', 'đŸ”Ŋ'),
+ ('🕉', '🕎'),
+ ('🕐', '🕧'),
+ ('đŸ•¯', '🕰'),
+ ('đŸ•ŗ', 'đŸ•ē'),
+ ('🖇', '🖇'),
+ ('🖊', '🖍'),
+ ('🖐', '🖐'),
+ ('🖕', '🖖'),
+ ('🖤', 'đŸ–Ĩ'),
+ ('🖨', '🖨'),
+ ('🖱', '🖲'),
+ ('đŸ–ŧ', 'đŸ–ŧ'),
+ ('🗂', '🗄'),
+ ('🗑', '🗓'),
+ ('🗜', '🗞'),
+ ('🗡', '🗡'),
+ ('đŸ—Ŗ', 'đŸ—Ŗ'),
+ ('🗨', '🗨'),
+ ('đŸ—¯', 'đŸ—¯'),
+ ('đŸ—ŗ', 'đŸ—ŗ'),
+ ('đŸ—ē', '🙏'),
+ ('🚀', '🛅'),
+ ('🛋', '🛒'),
+ ('🛕', '\u{1f6d7}'),
+ ('🛠', 'đŸ›Ĩ'),
+ ('🛩', '🛩'),
+ ('đŸ›Ģ', 'đŸ›Ŧ'),
+ ('🛰', '🛰'),
+ ('đŸ›ŗ', '\u{1f6fc}'),
+ ('🟠', 'đŸŸĢ'),
+ ('\u{1f90c}', 'đŸ¤ē'),
+ ('đŸ¤ŧ', 'đŸĨ…'),
+ ('đŸĨ‡', '\u{1f978}'),
+ ('đŸĨē', '\u{1f9cb}'),
+ ('🧍', 'đŸ§ŋ'),
+ ('🩰', '\u{1fa74}'),
+ ('🩸', 'đŸŠē'),
+ ('đŸĒ€', '\u{1fa86}'),
+ ('đŸĒ', '\u{1faa8}'),
+ ('\u{1fab0}', '\u{1fab6}'),
+ ('\u{1fac0}', '\u{1fac2}'),
+ ('\u{1fad0}', '\u{1fad6}'),
+];
+
+pub const EMOJI_COMPONENT: &'static [(char, char)] = &[
+ ('#', '#'),
+ ('*', '*'),
+ ('0', '9'),
+ ('\u{200d}', '\u{200d}'),
+ ('\u{20e3}', '\u{20e3}'),
+ ('\u{fe0f}', '\u{fe0f}'),
+ ('đŸ‡Ļ', 'đŸ‡ŋ'),
+ ('đŸģ', 'đŸŋ'),
+ ('đŸĻ°', 'đŸĻŗ'),
+ ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const EMOJI_MODIFIER: &'static [(char, char)] = &[('đŸģ', 'đŸŋ')];
+
+pub const EMOJI_MODIFIER_BASE: &'static [(char, char)] = &[
+ ('☝', '☝'),
+ ('⛹', '⛹'),
+ ('✊', '✍'),
+ ('🎅', '🎅'),
+ ('🏂', '🏄'),
+ ('🏇', '🏇'),
+ ('🏊', '🏌'),
+ ('👂', '👃'),
+ ('👆', '👐'),
+ ('đŸ‘Ļ', '👸'),
+ ('đŸ‘ŧ', 'đŸ‘ŧ'),
+ ('💁', '💃'),
+ ('💅', '💇'),
+ ('💏', '💏'),
+ ('💑', '💑'),
+ ('đŸ’Ē', 'đŸ’Ē'),
+ ('🕴', 'đŸ•ĩ'),
+ ('đŸ•ē', 'đŸ•ē'),
+ ('🖐', '🖐'),
+ ('🖕', '🖖'),
+ ('🙅', '🙇'),
+ ('🙋', '🙏'),
+ ('đŸšŖ', 'đŸšŖ'),
+ ('🚴', 'đŸšļ'),
+ ('🛀', '🛀'),
+ ('🛌', '🛌'),
+ ('\u{1f90c}', '\u{1f90c}'),
+ ('🤏', '🤏'),
+ ('🤘', '🤟'),
+ ('đŸ¤Ļ', 'đŸ¤Ļ'),
+ ('🤰', '🤹'),
+ ('đŸ¤ŧ', '🤾'),
+ ('\u{1f977}', '\u{1f977}'),
+ ('đŸĻĩ', 'đŸĻļ'),
+ ('đŸĻ¸', 'đŸĻš'),
+ ('đŸĻģ', 'đŸĻģ'),
+ ('🧍', '🧏'),
+ ('🧑', '🧝'),
+];
+
+pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[
+ ('⌚', '⌛'),
+ ('⏊', 'âŦ'),
+ ('⏰', '⏰'),
+ ('âŗ', 'âŗ'),
+ ('â—Ŋ', '◾'),
+ ('☔', '☕'),
+ ('♈', '♓'),
+ ('â™ŋ', 'â™ŋ'),
+ ('⚓', '⚓'),
+ ('⚡', '⚡'),
+ ('âšĒ', 'âšĢ'),
+ ('âšŊ', '⚾'),
+ ('⛄', '⛅'),
+ ('⛎', '⛎'),
+ ('⛔', '⛔'),
+ ('â›Ē', 'â›Ē'),
+ ('⛲', 'â›ŗ'),
+ ('â›ĩ', 'â›ĩ'),
+ ('â›ē', 'â›ē'),
+ ('â›Ŋ', 'â›Ŋ'),
+ ('✅', '✅'),
+ ('✊', '✋'),
+ ('✨', '✨'),
+ ('❌', '❌'),
+ ('❎', '❎'),
+ ('❓', '❕'),
+ ('❗', '❗'),
+ ('➕', '➗'),
+ ('➰', '➰'),
+ ('âžŋ', 'âžŋ'),
+ ('âŦ›', 'âŦœ'),
+ ('⭐', '⭐'),
+ ('⭕', '⭕'),
+ ('🀄', '🀄'),
+ ('🃏', '🃏'),
+ ('🆎', '🆎'),
+ ('🆑', '🆚'),
+ ('đŸ‡Ļ', 'đŸ‡ŋ'),
+ ('🈁', '🈁'),
+ ('🈚', '🈚'),
+ ('đŸˆ¯', 'đŸˆ¯'),
+ ('🈲', 'đŸˆļ'),
+ ('🈸', 'đŸˆē'),
+ ('🉐', '🉑'),
+ ('🌀', '🌠'),
+ ('🌭', 'đŸŒĩ'),
+ ('🌷', 'đŸŧ'),
+ ('🍾', '🎓'),
+ ('🎠', '🏊'),
+ ('🏏', '🏓'),
+ ('🏠', '🏰'),
+ ('🏴', '🏴'),
+ ('🏸', '🐾'),
+ ('👀', '👀'),
+ ('👂', 'đŸ“ŧ'),
+ ('đŸ“ŋ', 'đŸ”Ŋ'),
+ ('🕋', '🕎'),
+ ('🕐', '🕧'),
+ ('đŸ•ē', 'đŸ•ē'),
+ ('🖕', '🖖'),
+ ('🖤', '🖤'),
+ ('đŸ—ģ', '🙏'),
+ ('🚀', '🛅'),
+ ('🛌', '🛌'),
+ ('🛐', '🛒'),
+ ('🛕', '\u{1f6d7}'),
+ ('đŸ›Ģ', 'đŸ›Ŧ'),
+ ('🛴', '\u{1f6fc}'),
+ ('🟠', 'đŸŸĢ'),
+ ('\u{1f90c}', 'đŸ¤ē'),
+ ('đŸ¤ŧ', 'đŸĨ…'),
+ ('đŸĨ‡', '\u{1f978}'),
+ ('đŸĨē', '\u{1f9cb}'),
+ ('🧍', 'đŸ§ŋ'),
+ ('🩰', '\u{1fa74}'),
+ ('🩸', 'đŸŠē'),
+ ('đŸĒ€', '\u{1fa86}'),
+ ('đŸĒ', '\u{1faa8}'),
+ ('\u{1fab0}', '\u{1fab6}'),
+ ('\u{1fac0}', '\u{1fac2}'),
+ ('\u{1fad0}', '\u{1fad6}'),
+];
+
+pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[
+ ('Š', 'Š'),
+ ('ÂŽ', 'ÂŽ'),
+ ('â€ŧ', 'â€ŧ'),
+ ('⁉', '⁉'),
+ ('â„ĸ', 'â„ĸ'),
+ ('ℹ', 'ℹ'),
+ ('↔', '↙'),
+ ('↩', 'â†Ē'),
+ ('⌚', '⌛'),
+ ('⌨', '⌨'),
+ ('⎈', '⎈'),
+ ('⏏', '⏏'),
+ ('⏊', 'âŗ'),
+ ('⏸', 'âē'),
+ ('Ⓜ', 'Ⓜ'),
+ ('â–Ē', 'â–Ģ'),
+ ('â–ļ', 'â–ļ'),
+ ('◀', '◀'),
+ ('â—ģ', '◾'),
+ ('☀', '★'),
+ ('☇', '☒'),
+ ('☔', '⚅'),
+ ('⚐', '✅'),
+ ('✈', '✒'),
+ ('✔', '✔'),
+ ('✖', '✖'),
+ ('✝', '✝'),
+ ('✡', '✡'),
+ ('✨', '✨'),
+ ('âœŗ', '✴'),
+ ('❄', '❄'),
+ ('❇', '❇'),
+ ('❌', '❌'),
+ ('❎', '❎'),
+ ('❓', '❕'),
+ ('❗', '❗'),
+ ('âŖ', '❧'),
+ ('➕', '➗'),
+ ('➡', '➡'),
+ ('➰', '➰'),
+ ('âžŋ', 'âžŋ'),
+ ('⤴', 'â¤ĩ'),
+ ('âŦ…', 'âŦ‡'),
+ ('âŦ›', 'âŦœ'),
+ ('⭐', '⭐'),
+ ('⭕', '⭕'),
+ ('〰', '〰'),
+ ('ã€Ŋ', 'ã€Ŋ'),
+ ('㊗', '㊗'),
+ ('㊙', '㊙'),
+ ('🀀', '\u{1f0ff}'),
+ ('\u{1f10d}', '\u{1f10f}'),
+ ('đŸ„¯', 'đŸ„¯'),
+ ('đŸ…Ŧ', '🅱'),
+ ('🅾', 'đŸ…ŋ'),
+ ('🆎', '🆎'),
+ ('🆑', '🆚'),
+ ('\u{1f1ad}', '\u{1f1e5}'),
+ ('🈁', '\u{1f20f}'),
+ ('🈚', '🈚'),
+ ('đŸˆ¯', 'đŸˆ¯'),
+ ('🈲', 'đŸˆē'),
+ ('\u{1f23c}', '\u{1f23f}'),
+ ('\u{1f249}', 'đŸē'),
+ ('🐀', 'đŸ”Ŋ'),
+ ('🕆', '🙏'),
+ ('🚀', '\u{1f6ff}'),
+ ('\u{1f774}', '\u{1f77f}'),
+ ('🟕', '\u{1f7ff}'),
+ ('\u{1f80c}', '\u{1f80f}'),
+ ('\u{1f848}', '\u{1f84f}'),
+ ('\u{1f85a}', '\u{1f85f}'),
+ ('\u{1f888}', '\u{1f88f}'),
+ ('\u{1f8ae}', '\u{1f8ff}'),
+ ('\u{1f90c}', 'đŸ¤ē'),
+ ('đŸ¤ŧ', 'đŸĨ…'),
+ ('đŸĨ‡', '\u{1faff}'),
+ ('\u{1fc00}', '\u{1fffd}'),
+];
+
+pub const EXTENDER: &'static [(char, char)] = &[
+ ('¡', '¡'),
+ ('ː', 'ˑ'),
+ ('Ų€', 'Ų€'),
+ ('ßē', 'ßē'),
+ ('\u{b55}', '\u{b55}'),
+ ('āš†', 'āš†'),
+ ('āģ†', 'āģ†'),
+ ('᠊', '᠊'),
+ ('᥃', '᥃'),
+ ('áĒ§', 'áĒ§'),
+ ('\u{1c36}', '\u{1c36}'),
+ ('áąģ', 'áąģ'),
+ ('々', '々'),
+ ('ã€ą', 'ã€ĩ'),
+ ('ゝ', 'ゞ'),
+ ('ãƒŧ', 'マ'),
+ ('ꀕ', 'ꀕ'),
+ ('ꘌ', 'ꘌ'),
+ ('ꧏ', 'ꧏ'),
+ ('ę§Ļ', 'ę§Ļ'),
+ ('ꊰ', 'ꊰ'),
+ ('ęĢ', 'ęĢ'),
+ ('ęĢŗ', 'ęĢ´'),
+ ('īŊ°', 'īŊ°'),
+ ('𑍝', '𑍝'),
+ ('𑗆', '𑗈'),
+ ('\u{11a98}', '\u{11a98}'),
+ ('𖭂', '𖭃'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', 'đ–ŋŖ'),
+ ('đž„ŧ', 'đž„Ŋ'),
+ ('\u{1e944}', '\u{1e946}'),
+];
+
+pub const GRAPHEME_BASE: &'static [(char, char)] = &[
+ (' ', '~'),
+ ('\u{a0}', 'ÂŦ'),
+ ('ÂŽ', 'Ëŋ'),
+ ('Ͱ', '͡'),
+ ('Íē', 'Íŋ'),
+ ('΄', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Ō‚'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', '֊'),
+ ('֍', '֏'),
+ ('Öž', 'Öž'),
+ ('׀', '׀'),
+ ('׃', '׃'),
+ ('׆', '׆'),
+ ('א', '×Ē'),
+ ('ׯ', '״'),
+ ('؆', '؏'),
+ ('؛', '؛'),
+ ('؞', 'ŲŠ'),
+ ('Ų ', 'Ų¯'),
+ ('Ųą', 'ە'),
+ ('۞', '۞'),
+ ('ÛĨ', 'ÛĻ'),
+ ('ÛŠ', 'ÛŠ'),
+ ('ێ', '܍'),
+ ('ܐ', 'ܐ'),
+ ('ܒ', 'ܯ'),
+ ('Ũ', 'ŪĨ'),
+ ('Ūą', 'Ūą'),
+ ('߀', 'ßĒ'),
+ ('ß´', 'ßē'),
+ ('ßž', 'ā •'),
+ ('ā š', 'ā š'),
+ ('ā ¤', 'ā ¤'),
+ ('ā ¨', 'ā ¨'),
+ ('ā °', 'ā ž'),
+ ('āĄ€', 'āĄ˜'),
+ ('āĄž', 'āĄž'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('ā¤ƒ', 'ā¤š'),
+ ('ā¤ģ', 'ā¤ģ'),
+ ('ā¤Ŋ', 'āĨ€'),
+ ('āĨ‰', 'āĨŒ'),
+ ('āĨŽ', 'āĨ'),
+ ('āĨ˜', 'āĨĄ'),
+ ('āĨ¤', 'āĻ€'),
+ ('āĻ‚', 'āĻƒ'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('āĻŊ', 'āĻŊ'),
+ ('āĻŋ', 'ā§€'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Œ'),
+ ('ā§Ž', 'ā§Ž'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', 'ā§Ą'),
+ ('ā§Ļ', 'ā§Ŋ'),
+ ('ā¨ƒ', 'ā¨ƒ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('ā¨ž', 'āŠ€'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠĻ', 'āŠ¯'),
+ ('āŠ˛', 'āŠ´'),
+ ('āŠļ', 'āŠļ'),
+ ('āĒƒ', 'āĒƒ'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('āĒŊ', 'āĢ€'),
+ ('āĢ‰', 'āĢ‰'),
+ ('āĢ‹', 'āĢŒ'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', 'āĢĄ'),
+ ('āĢĻ', 'āĢą'),
+ ('āĢš', 'āĢš'),
+ ('āŦ‚', 'āŦƒ'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('āŦŊ', 'āŦŊ'),
+ ('ā­€', 'ā­€'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', 'ā­Œ'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', 'ā­Ą'),
+ ('ā­Ļ', 'ā­ˇ'),
+ ('āŽƒ', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('āŽŋ', 'āŽŋ'),
+ ('ā¯', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', 'ā¯Œ'),
+ ('ā¯', 'ā¯'),
+ ('ā¯Ļ', 'ā¯ē'),
+ ('ā°', 'ā°ƒ'),
+ ('ā°…', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'ā°Ŋ'),
+ ('āą', 'āą„'),
+ ('āą˜', 'āąš'),
+ ('āą ', 'āąĄ'),
+ ('āąĻ', 'āą¯'),
+ ('āąˇ', 'ā˛€'),
+ ('ā˛‚', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('ā˛Ŋ', 'ā˛ž'),
+ ('āŗ€', 'āŗ'),
+ ('āŗƒ', 'āŗ„'),
+ ('āŗ‡', 'āŗˆ'),
+ ('āŗŠ', 'āŗ‹'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', 'āŗĄ'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āŗą', 'āŗ˛'),
+ ('ā´‚', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', 'ā´ē'),
+ ('ā´Ŋ', 'ā´Ŋ'),
+ ('ā´ŋ', 'āĩ€'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩŒ'),
+ ('āĩŽ', 'āĩ'),
+ ('āĩ”', 'āĩ–'),
+ ('āĩ˜', 'āĩĄ'),
+ ('āĩĻ', 'āĩŋ'),
+ ('āļ‚', 'āļƒ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('āˇ', 'āˇ‘'),
+ ('āˇ˜', 'āˇž'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āˇ˛', 'āˇ´'),
+ ('ā¸', 'ā¸°'),
+ ('ā¸˛', 'ā¸ŗ'),
+ ('ā¸ŋ', 'āš†'),
+ ('āš', 'āš›'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āē°'),
+ ('āē˛', 'āēŗ'),
+ ('āēŊ', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('āģ', 'āģ™'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ—'),
+ ('āŧš', 'āŧ´'),
+ ('āŧļ', 'āŧļ'),
+ ('āŧ¸', 'āŧ¸'),
+ ('āŧē', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('āŊŋ', 'āŊŋ'),
+ ('āž…', 'āž…'),
+ ('āžˆ', 'āžŒ'),
+ ('āžž', 'āŋ…'),
+ ('āŋ‡', 'āŋŒ'),
+ ('āŋŽ', 'āŋš'),
+ ('က', 'á€Ŧ'),
+ ('ေ', 'ေ'),
+ ('း', 'း'),
+ ('á€ģ', 'á€ŧ'),
+ ('á€ŋ', 'ၗ'),
+ ('ၚ', 'ၝ'),
+ ('ၥ', 'ၰ'),
+ ('áĩ', 'ႁ'),
+ ('ႃ', 'ႄ'),
+ ('ႇ', 'ႌ'),
+ ('ႎ', 'ႜ'),
+ ('႞', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('፠', 'áŧ'),
+ ('ᎀ', '᎙'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('᐀', '᚜'),
+ ('ᚠ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', 'ᜑ'),
+ ('ᜠ', 'ᜱ'),
+ ('áœĩ', 'áœļ'),
+ ('ᝀ', 'ᝑ'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('ក', 'ážŗ'),
+ ('ážļ', 'ážļ'),
+ ('ើ', 'ៅ'),
+ ('ះ', 'ៈ'),
+ ('។', 'ៜ'),
+ ('០', '៩'),
+ ('៰', '៹'),
+ ('᠀', '᠊'),
+ ('᠐', '᠙'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸ„'),
+ ('áĸ‡', 'áĸ¨'),
+ ('áĸĒ', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('á¤Ŗ', 'á¤Ļ'),
+ ('ᤊ', 'á¤Ģ'),
+ ('ᤰ', '᤹'),
+ ('á¤ŗ', 'ᤸ'),
+ ('áĨ€', 'áĨ€'),
+ ('áĨ„', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('᧐', '᧚'),
+ ('᧞', 'ᨖ'),
+ ('ᨙ', 'ᨚ'),
+ ('᨞', 'ᩕ'),
+ ('ᩗ', 'ᩗ'),
+ ('እ', 'እ'),
+ ('áŠŖ', 'ኤ'),
+ ('ክ', 'ኲ'),
+ ('áĒ€', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('áĒ ', 'áĒ­'),
+ ('áŦ„', 'áŦŗ'),
+ ('áŦģ', 'áŦģ'),
+ ('áŦŊ', 'ᭁ'),
+ ('ᭃ', 'ᭋ'),
+ ('᭐', 'á­Ē'),
+ ('á­´', 'á­ŧ'),
+ ('ᮂ', 'ᮡ'),
+ ('áŽĻ', 'Ꭷ'),
+ ('áŽĒ', 'áŽĒ'),
+ ('ᎎ', 'á¯Ĩ'),
+ ('ᯧ', 'ᯧ'),
+ ('á¯Ē', 'á¯Ŧ'),
+ ('ᯎ', 'ᯎ'),
+ ('á¯˛', 'á¯ŗ'),
+ ('á¯ŧ', 'á°Ģ'),
+ ('á°´', 'á°ĩ'),
+ ('á°ģ', '᱉'),
+ ('ᱍ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'áŗ‡'),
+ ('áŗ“', 'áŗ“'),
+ ('áŗĄ', 'áŗĄ'),
+ ('áŗŠ', 'áŗŦ'),
+ ('áŗŽ', 'áŗŗ'),
+ ('áŗĩ', 'áŗˇ'),
+ ('áŗē', 'áŗē'),
+ ('ᴀ', 'áļŋ'),
+ ('Ḁ', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'áŋ„'),
+ ('áŋ†', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ', 'áŋ¯'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋž'),
+ ('\u{2000}', '\u{200a}'),
+ ('‐', '‧'),
+ ('\u{202f}', '\u{205f}'),
+ ('⁰', '⁹'),
+ ('⁴', '₎'),
+ ('ₐ', 'ₜ'),
+ ('₠', 'â‚ŋ'),
+ ('℀', '↋'),
+ ('←', 'âĻ'),
+ ('⑀', '⑊'),
+ ('①', 'â­ŗ'),
+ ('â­ļ', '⮕'),
+ ('\u{2b97}', 'â°Ž'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗŽ'),
+ ('âŗ˛', 'âŗŗ'),
+ ('âŗš', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ°'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('⸀', '\u{2e52}'),
+ ('âē€', 'âē™'),
+ ('âē›', 'âģŗ'),
+ ('âŧ€', 'âŋ•'),
+ ('âŋ°', 'âŋģ'),
+ ('\u{3000}', '《'),
+ ('〰', 'ã€ŋ'),
+ ('ぁ', 'ゖ'),
+ ('゛', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('㆐', 'ã‡Ŗ'),
+ ('ㇰ', '㈞'),
+ ('㈠', '\u{9ffc}'),
+ ('ꀀ', 'ꒌ'),
+ ('꒐', '꓆'),
+ ('ꓐ', 'ę˜Ģ'),
+ ('Ꙁ', 'ꙮ'),
+ ('ę™ŗ', 'ę™ŗ'),
+ ('꙾', 'ꚝ'),
+ ('ꚠ', 'ę›¯'),
+ ('꛲', '꛷'),
+ ('꜀', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ꠁ'),
+ ('ꠃ', 'ꠅ'),
+ ('ꠇ', 'ꠊ'),
+ ('ꠌ', 'ꠤ'),
+ ('ę §', 'ę Ģ'),
+ ('ę °', 'ę š'),
+ ('ꡀ', '꡷'),
+ ('ęĸ€', 'ęŖƒ'),
+ ('ęŖŽ', 'ęŖ™'),
+ ('ęŖ˛', 'ęŖž'),
+ ('꤀', 'ę¤Ĩ'),
+ ('ꤎ', 'ęĨ†'),
+ ('ęĨ’', 'ęĨ“'),
+ ('ęĨŸ', 'ęĨŧ'),
+ ('ęĻƒ', 'ęĻ˛'),
+ ('ęĻ´', 'ęĻĩ'),
+ ('ęĻē', 'ęĻģ'),
+ ('ęĻž', '꧍'),
+ ('ꧏ', '꧙'),
+ ('꧞', 'ꧤ'),
+ ('ę§Ļ', '꧞'),
+ ('ꨀ', 'ꨨ'),
+ ('ę¨¯', 'ꨰ'),
+ ('ę¨ŗ', 'ꨴ'),
+ ('ꩀ', 'ꩂ'),
+ ('ꩄ', 'ꩋ'),
+ ('ꩍ', 'ꩍ'),
+ ('꩐', '꩙'),
+ ('꩜', 'ęŠģ'),
+ ('ęŠŊ', 'ęĒ¯'),
+ ('ęĒą', 'ęĒą'),
+ ('ęĒĩ', 'ęĒļ'),
+ ('ęĒš', 'ęĒŊ'),
+ ('ęĢ€', 'ęĢ€'),
+ ('ęĢ‚', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢĢ'),
+ ('ęĢŽ', 'ęĢĩ'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ęŦ°', '\u{ab6b}'),
+ ('ę­°', 'ę¯¤'),
+ ('ę¯Ļ', 'ę¯§'),
+ ('ę¯Š', 'ę¯Ŧ'),
+ ('ę¯°', 'ę¯š'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŦ', 'īŦ'),
+ ('īŦŸ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'ī¯'),
+ ('ī¯“', 'ī´ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇŊ'),
+ ('ī¸', 'ī¸™'),
+ ('ī¸°', 'īš’'),
+ ('īš”', 'īšĻ'),
+ ('īš¨', 'īšĢ'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('īŧ', 'īž'),
+ ('īž ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('īŋ ', 'īŋĻ'),
+ ('īŋ¨', 'īŋŽ'),
+ ('īŋŧ', 'īŋŊ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐄀', '𐄂'),
+ ('𐄇', 'đ„ŗ'),
+ ('𐄷', '𐆎'),
+ ('𐆐', '\u{1019c}'),
+ ('𐆠', '𐆠'),
+ ('𐇐', 'đ‡ŧ'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('𐋡', 'đ‹ģ'),
+ ('𐌀', 'đŒŖ'),
+ ('𐌭', '𐍊'),
+ ('𐍐', 'đĩ'),
+ ('𐎀', '𐎝'),
+ ('𐎟', '𐏃'),
+ ('𐏈', '𐏕'),
+ ('𐐀', '𐒝'),
+ ('𐒠', '𐒩'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('đ•¯', 'đ•¯'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐡗', 'đĸž'),
+ ('đĸ§', 'đĸ¯'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('đŖģ', '𐤛'),
+ ('𐤟', '𐤹'),
+ ('đ¤ŋ', 'đ¤ŋ'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻŧ', '𐧏'),
+ ('𐧒', '𐨀'),
+ ('𐨐', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('𐩀', '𐩈'),
+ ('𐊐', '𐊘'),
+ ('𐊠', 'đĒŸ'),
+ ('đĢ€', 'đĢ¤'),
+ ('đĢĢ', 'đĢļ'),
+ ('đŦ€', 'đŦĩ'),
+ ('đŦš', '𐭕'),
+ ('𐭘', '𐭲'),
+ ('𐭸', '𐮑'),
+ ('𐮙', '𐮜'),
+ ('𐎊', 'đŽ¯'),
+ ('𐰀', '𐱈'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('đŗē', 'đ´Ŗ'),
+ ('𐴰', '𐴚'),
+ ('𐚠', '𐚞'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10ead}', '\u{10ead}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧ§'),
+ ('đŧ°', 'đŊ…'),
+ ('đŊ‘', 'đŊ™'),
+ ('\u{10fb0}', '\u{10fcb}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀀', '𑀀'),
+ ('𑀂', '𑀷'),
+ ('𑁇', '𑁍'),
+ ('𑁒', 'đ‘¯'),
+ ('𑂂', '𑂲'),
+ ('𑂷', '𑂸'),
+ ('đ‘‚ģ', 'đ‘‚ŧ'),
+ ('𑂾', '𑃁'),
+ ('𑃐', '𑃨'),
+ ('𑃰', '𑃹'),
+ ('𑄃', 'đ‘„Ļ'),
+ ('đ‘„Ŧ', 'đ‘„Ŧ'),
+ ('đ‘„ļ', '\u{11147}'),
+ ('𑅐', '𑅲'),
+ ('𑅴', 'đ‘…ļ'),
+ ('𑆂', 'đ‘†ĩ'),
+ ('đ‘†ŋ', '𑇈'),
+ ('𑇍', '\u{111ce}'),
+ ('𑇐', '𑇟'),
+ ('𑇡', '𑇴'),
+ ('𑈀', '𑈑'),
+ ('𑈓', '𑈮'),
+ ('𑈲', 'đ‘ˆŗ'),
+ ('đ‘ˆĩ', 'đ‘ˆĩ'),
+ ('𑈸', 'đ‘ˆŊ'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊩'),
+ ('𑊰', '𑋞'),
+ ('𑋠', 'đ‘‹ĸ'),
+ ('𑋰', '𑋹'),
+ ('𑌂', '𑌃'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('đ‘ŒŊ', 'đ‘ŒŊ'),
+ ('đ‘Œŋ', 'đ‘Œŋ'),
+ ('𑍁', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('𑍐', '𑍐'),
+ ('𑍝', 'đ‘Ŗ'),
+ ('𑐀', '𑐷'),
+ ('𑑀', '𑑁'),
+ ('𑑅', '𑑅'),
+ ('𑑇', '𑑛'),
+ ('𑑝', '𑑝'),
+ ('𑑟', '\u{11461}'),
+ ('𑒀', 'đ‘’¯'),
+ ('𑒱', '𑒲'),
+ ('𑒹', '𑒹'),
+ ('đ‘’ģ', 'đ‘’ŧ'),
+ ('𑒾', '𑒾'),
+ ('𑓁', '𑓁'),
+ ('𑓄', '𑓇'),
+ ('𑓐', '𑓙'),
+ ('𑖀', '𑖮'),
+ ('𑖰', '𑖱'),
+ ('𑖸', 'đ‘–ģ'),
+ ('𑖾', '𑖾'),
+ ('𑗁', '𑗛'),
+ ('𑘀', '𑘲'),
+ ('đ‘˜ģ', 'đ‘˜ŧ'),
+ ('𑘾', '𑘾'),
+ ('𑙁', '𑙄'),
+ ('𑙐', '𑙙'),
+ ('𑙠', 'đ‘™Ŧ'),
+ ('𑚀', 'đ‘šĒ'),
+ ('đ‘šŦ', 'đ‘šŦ'),
+ ('𑚮', 'đ‘š¯'),
+ ('đ‘šļ', 'đ‘šļ'),
+ ('𑚸', '𑚸'),
+ ('𑛀', '𑛉'),
+ ('𑜀', '𑜚'),
+ ('𑜠', '𑜡'),
+ ('đ‘œĻ', 'đ‘œĻ'),
+ ('𑜰', 'đ‘œŋ'),
+ ('𑠀', '𑠮'),
+ ('𑠸', '𑠸'),
+ ('đ‘ ģ', 'đ‘ ģ'),
+ ('đ‘ĸ ', 'đ‘Ŗ˛'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{1192f}'),
+ ('\u{11931}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193d}', '\u{1193d}'),
+ ('\u{1193f}', '\u{11942}'),
+ ('\u{11944}', '\u{11946}'),
+ ('\u{11950}', '\u{11959}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '𑧓'),
+ ('𑧜', '𑧟'),
+ ('𑧡', '𑧤'),
+ ('𑨀', '𑨀'),
+ ('𑨋', '𑨲'),
+ ('𑨹', 'đ‘¨ē'),
+ ('đ‘¨ŋ', '𑩆'),
+ ('𑩐', '𑩐'),
+ ('𑩗', '𑩘'),
+ ('𑩜', 'đ‘Ē‰'),
+ ('đ‘Ē—', 'đ‘Ē—'),
+ ('đ‘Ēš', 'đ‘Ēĸ'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', 'đ‘°¯'),
+ ('𑰾', '𑰾'),
+ ('𑱀', '𑱅'),
+ ('𑱐', 'đ‘ąŦ'),
+ ('𑱰', '𑲏'),
+ ('𑲩', '𑲩'),
+ ('𑲱', '𑲱'),
+ ('𑲴', '𑲴'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '𑴰'),
+ ('đ‘ĩ†', 'đ‘ĩ†'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļŽ'),
+ ('đ‘ļ“', 'đ‘ļ”'),
+ ('đ‘ļ–', 'đ‘ļ–'),
+ ('đ‘ļ˜', 'đ‘ļ˜'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('đ‘ģ ', 'đ‘ģ˛'),
+ ('đ‘ģĩ', 'đ‘ģ¸'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('đ‘ŋ€', 'đ‘ŋą'),
+ ('đ‘ŋŋ', '𒎙'),
+ ('𒐀', '𒑮'),
+ ('𒑰', '𒑴'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('𖩠', '𖩩'),
+ ('𖩮', 'đ–Š¯'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('đ–Ģĩ', 'đ–Ģĩ'),
+ ('đ–Ŧ€', 'đ–Ŧ¯'),
+ ('đ–Ŧˇ', '𖭅'),
+ ('𖭐', '𖭙'),
+ ('𖭛', '𖭡'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𖹀', 'đ–ēš'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('đ–Ŋ', '𖾇'),
+ ('𖾓', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋŖ'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('𛲜', '𛲜'),
+ ('𛲟', '𛲟'),
+ ('𝀀', 'đƒĩ'),
+ ('𝄀', 'đ„Ļ'),
+ ('𝄩', '𝅘𝅥𝅲'),
+ ('đ…Ļ', 'đ…Ļ'),
+ ('đ…Ē', '𝅭'),
+ ('𝆃', '𝆄'),
+ ('𝆌', '𝆩'),
+ ('𝆮', '𝇨'),
+ ('𝈀', '𝉁'),
+ ('𝉅', '𝉅'),
+ ('𝋠', 'đ‹ŗ'),
+ ('𝌀', '𝍖'),
+ ('𝍠', '𝍸'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝟋'),
+ ('𝟎', 'đ§ŋ'),
+ ('𝨷', 'đ¨ē'),
+ ('𝩭', '𝩴'),
+ ('đŠļ', 'đĒƒ'),
+ ('đĒ…', 'đĒ‹'),
+ ('𞄀', 'đž„Ŧ'),
+ ('𞄷', 'đž„Ŋ'),
+ ('𞅀', '𞅉'),
+ ('𞅎', '𞅏'),
+ ('𞋀', 'đž‹Ģ'),
+ ('𞋰', '𞋹'),
+ ('đž‹ŋ', 'đž‹ŋ'),
+ ('𞠀', 'đžŖ„'),
+ ('đžŖ‡', 'đžŖ'),
+ ('𞤀', 'đžĨƒ'),
+ ('đžĨ‹', 'đžĨ‹'),
+ ('đžĨ', 'đžĨ™'),
+ ('đžĨž', 'đžĨŸ'),
+ ('𞱱', '𞲴'),
+ ('𞴁', 'đž´Ŋ'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('đžģ°', 'đžģą'),
+ ('🀀', 'đŸ€Ģ'),
+ ('🀰', '🂓'),
+ ('🂠', '🂮'),
+ ('🂱', 'đŸ‚ŋ'),
+ ('🃁', '🃏'),
+ ('🃑', 'đŸƒĩ'),
+ ('🄀', '\u{1f1ad}'),
+ ('đŸ‡Ļ', '🈂'),
+ ('🈐', 'đŸˆģ'),
+ ('🉀', '🉈'),
+ ('🉐', '🉑'),
+ ('🉠', 'đŸ‰Ĩ'),
+ ('🌀', '\u{1f6d7}'),
+ ('🛠', 'đŸ›Ŧ'),
+ ('🛰', '\u{1f6fc}'),
+ ('🜀', 'đŸŗ'),
+ ('🞀', '🟘'),
+ ('🟠', 'đŸŸĢ'),
+ ('🠀', '🠋'),
+ ('🠐', '🡇'),
+ ('🡐', '🡙'),
+ ('🡠', 'đŸĸ‡'),
+ ('đŸĸ', 'đŸĸ­'),
+ ('\u{1f8b0}', '\u{1f8b1}'),
+ ('🤀', '\u{1f978}'),
+ ('đŸĨē', '\u{1f9cb}'),
+ ('🧍', '🩓'),
+ ('🩠', '🩭'),
+ ('🩰', '\u{1fa74}'),
+ ('🩸', 'đŸŠē'),
+ ('đŸĒ€', '\u{1fa86}'),
+ ('đŸĒ', '\u{1faa8}'),
+ ('\u{1fab0}', '\u{1fab6}'),
+ ('\u{1fac0}', '\u{1fac2}'),
+ ('\u{1fad0}', '\u{1fad6}'),
+ ('\u{1fb00}', '\u{1fb92}'),
+ ('\u{1fb94}', '\u{1fbca}'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[
+ ('\u{300}', '\u{36f}'),
+ ('\u{483}', '\u{489}'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('\u{610}', '\u{61a}'),
+ ('\u{64b}', '\u{65f}'),
+ ('\u{670}', '\u{670}'),
+ ('\u{6d6}', '\u{6dc}'),
+ ('\u{6df}', '\u{6e4}'),
+ ('\u{6e7}', '\u{6e8}'),
+ ('\u{6ea}', '\u{6ed}'),
+ ('\u{711}', '\u{711}'),
+ ('\u{730}', '\u{74a}'),
+ ('\u{7a6}', '\u{7b0}'),
+ ('\u{7eb}', '\u{7f3}'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('\u{816}', '\u{819}'),
+ ('\u{81b}', '\u{823}'),
+ ('\u{825}', '\u{827}'),
+ ('\u{829}', '\u{82d}'),
+ ('\u{859}', '\u{85b}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', '\u{902}'),
+ ('\u{93a}', '\u{93a}'),
+ ('\u{93c}', '\u{93c}'),
+ ('\u{941}', '\u{948}'),
+ ('\u{94d}', '\u{94d}'),
+ ('\u{951}', '\u{957}'),
+ ('\u{962}', '\u{963}'),
+ ('\u{981}', '\u{981}'),
+ ('\u{9bc}', '\u{9bc}'),
+ ('\u{9be}', '\u{9be}'),
+ ('\u{9c1}', '\u{9c4}'),
+ ('\u{9cd}', '\u{9cd}'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('\u{9e2}', '\u{9e3}'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', '\u{a02}'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('\u{a41}', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('\u{a70}', '\u{a71}'),
+ ('\u{a75}', '\u{a75}'),
+ ('\u{a81}', '\u{a82}'),
+ ('\u{abc}', '\u{abc}'),
+ ('\u{ac1}', '\u{ac5}'),
+ ('\u{ac7}', '\u{ac8}'),
+ ('\u{acd}', '\u{acd}'),
+ ('\u{ae2}', '\u{ae3}'),
+ ('\u{afa}', '\u{aff}'),
+ ('\u{b01}', '\u{b01}'),
+ ('\u{b3c}', '\u{b3c}'),
+ ('\u{b3e}', '\u{b3f}'),
+ ('\u{b41}', '\u{b44}'),
+ ('\u{b4d}', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('\u{b62}', '\u{b63}'),
+ ('\u{b82}', '\u{b82}'),
+ ('\u{bbe}', '\u{bbe}'),
+ ('\u{bc0}', '\u{bc0}'),
+ ('\u{bcd}', '\u{bcd}'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('\u{c00}', '\u{c00}'),
+ ('\u{c04}', '\u{c04}'),
+ ('\u{c3e}', '\u{c40}'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('\u{c62}', '\u{c63}'),
+ ('\u{c81}', '\u{c81}'),
+ ('\u{cbc}', '\u{cbc}'),
+ ('\u{cbf}', '\u{cbf}'),
+ ('\u{cc2}', '\u{cc2}'),
+ ('\u{cc6}', '\u{cc6}'),
+ ('\u{ccc}', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('\u{ce2}', '\u{ce3}'),
+ ('\u{d00}', '\u{d01}'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('\u{d3e}', '\u{d3e}'),
+ ('\u{d41}', '\u{d44}'),
+ ('\u{d4d}', '\u{d4d}'),
+ ('\u{d57}', '\u{d57}'),
+ ('\u{d62}', '\u{d63}'),
+ ('\u{d81}', '\u{d81}'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dcf}'),
+ ('\u{dd2}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('\u{ddf}', '\u{ddf}'),
+ ('\u{e31}', '\u{e31}'),
+ ('\u{e34}', '\u{e3a}'),
+ ('\u{e47}', '\u{e4e}'),
+ ('\u{eb1}', '\u{eb1}'),
+ ('\u{eb4}', '\u{ebc}'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('\u{f18}', '\u{f19}'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('\u{f71}', '\u{f7e}'),
+ ('\u{f80}', '\u{f84}'),
+ ('\u{f86}', '\u{f87}'),
+ ('\u{f8d}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('\u{102d}', '\u{1030}'),
+ ('\u{1032}', '\u{1037}'),
+ ('\u{1039}', '\u{103a}'),
+ ('\u{103d}', '\u{103e}'),
+ ('\u{1058}', '\u{1059}'),
+ ('\u{105e}', '\u{1060}'),
+ ('\u{1071}', '\u{1074}'),
+ ('\u{1082}', '\u{1082}'),
+ ('\u{1085}', '\u{1086}'),
+ ('\u{108d}', '\u{108d}'),
+ ('\u{109d}', '\u{109d}'),
+ ('\u{135d}', '\u{135f}'),
+ ('\u{1712}', '\u{1714}'),
+ ('\u{1732}', '\u{1734}'),
+ ('\u{1752}', '\u{1753}'),
+ ('\u{1772}', '\u{1773}'),
+ ('\u{17b4}', '\u{17b5}'),
+ ('\u{17b7}', '\u{17bd}'),
+ ('\u{17c6}', '\u{17c6}'),
+ ('\u{17c9}', '\u{17d3}'),
+ ('\u{17dd}', '\u{17dd}'),
+ ('\u{180b}', '\u{180d}'),
+ ('\u{1885}', '\u{1886}'),
+ ('\u{18a9}', '\u{18a9}'),
+ ('\u{1920}', '\u{1922}'),
+ ('\u{1927}', '\u{1928}'),
+ ('\u{1932}', '\u{1932}'),
+ ('\u{1939}', '\u{193b}'),
+ ('\u{1a17}', '\u{1a18}'),
+ ('\u{1a1b}', '\u{1a1b}'),
+ ('\u{1a56}', '\u{1a56}'),
+ ('\u{1a58}', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a60}'),
+ ('\u{1a62}', '\u{1a62}'),
+ ('\u{1a65}', '\u{1a6c}'),
+ ('\u{1a73}', '\u{1a7c}'),
+ ('\u{1a7f}', '\u{1a7f}'),
+ ('\u{1ab0}', '\u{1ac0}'),
+ ('\u{1b00}', '\u{1b03}'),
+ ('\u{1b34}', '\u{1b3a}'),
+ ('\u{1b3c}', '\u{1b3c}'),
+ ('\u{1b42}', '\u{1b42}'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', '\u{1b81}'),
+ ('\u{1ba2}', '\u{1ba5}'),
+ ('\u{1ba8}', '\u{1ba9}'),
+ ('\u{1bab}', '\u{1bad}'),
+ ('\u{1be6}', '\u{1be6}'),
+ ('\u{1be8}', '\u{1be9}'),
+ ('\u{1bed}', '\u{1bed}'),
+ ('\u{1bef}', '\u{1bf1}'),
+ ('\u{1c2c}', '\u{1c33}'),
+ ('\u{1c36}', '\u{1c37}'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', '\u{1ce0}'),
+ ('\u{1ce2}', '\u{1ce8}'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('\u{1cf8}', '\u{1cf9}'),
+ ('\u{1dc0}', '\u{1df9}'),
+ ('\u{1dfb}', '\u{1dff}'),
+ ('\u{200c}', '\u{200c}'),
+ ('\u{20d0}', '\u{20f0}'),
+ ('\u{2cef}', '\u{2cf1}'),
+ ('\u{2d7f}', '\u{2d7f}'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('\u{302a}', '\u{302f}'),
+ ('\u{3099}', '\u{309a}'),
+ ('\u{a66f}', '\u{a672}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('\u{a69e}', '\u{a69f}'),
+ ('\u{a6f0}', '\u{a6f1}'),
+ ('\u{a802}', '\u{a802}'),
+ ('\u{a806}', '\u{a806}'),
+ ('\u{a80b}', '\u{a80b}'),
+ ('\u{a825}', '\u{a826}'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('\u{a8c4}', '\u{a8c5}'),
+ ('\u{a8e0}', '\u{a8f1}'),
+ ('\u{a8ff}', '\u{a8ff}'),
+ ('\u{a926}', '\u{a92d}'),
+ ('\u{a947}', '\u{a951}'),
+ ('\u{a980}', '\u{a982}'),
+ ('\u{a9b3}', '\u{a9b3}'),
+ ('\u{a9b6}', '\u{a9b9}'),
+ ('\u{a9bc}', '\u{a9bd}'),
+ ('\u{a9e5}', '\u{a9e5}'),
+ ('\u{aa29}', '\u{aa2e}'),
+ ('\u{aa31}', '\u{aa32}'),
+ ('\u{aa35}', '\u{aa36}'),
+ ('\u{aa43}', '\u{aa43}'),
+ ('\u{aa4c}', '\u{aa4c}'),
+ ('\u{aa7c}', '\u{aa7c}'),
+ ('\u{aab0}', '\u{aab0}'),
+ ('\u{aab2}', '\u{aab4}'),
+ ('\u{aab7}', '\u{aab8}'),
+ ('\u{aabe}', '\u{aabf}'),
+ ('\u{aac1}', '\u{aac1}'),
+ ('\u{aaec}', '\u{aaed}'),
+ ('\u{aaf6}', '\u{aaf6}'),
+ ('\u{abe5}', '\u{abe5}'),
+ ('\u{abe8}', '\u{abe8}'),
+ ('\u{abed}', '\u{abed}'),
+ ('\u{fb1e}', '\u{fb1e}'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('\u{10376}', '\u{1037a}'),
+ ('\u{10a01}', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '\u{10a0f}'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('\u{10ae5}', '\u{10ae6}'),
+ ('\u{10d24}', '\u{10d27}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10f46}', '\u{10f50}'),
+ ('\u{11001}', '\u{11001}'),
+ ('\u{11038}', '\u{11046}'),
+ ('\u{1107f}', '\u{11081}'),
+ ('\u{110b3}', '\u{110b6}'),
+ ('\u{110b9}', '\u{110ba}'),
+ ('\u{11100}', '\u{11102}'),
+ ('\u{11127}', '\u{1112b}'),
+ ('\u{1112d}', '\u{11134}'),
+ ('\u{11173}', '\u{11173}'),
+ ('\u{11180}', '\u{11181}'),
+ ('\u{111b6}', '\u{111be}'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111cf}', '\u{111cf}'),
+ ('\u{1122f}', '\u{11231}'),
+ ('\u{11234}', '\u{11234}'),
+ ('\u{11236}', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('\u{112df}', '\u{112df}'),
+ ('\u{112e3}', '\u{112ea}'),
+ ('\u{11300}', '\u{11301}'),
+ ('\u{1133b}', '\u{1133c}'),
+ ('\u{1133e}', '\u{1133e}'),
+ ('\u{11340}', '\u{11340}'),
+ ('\u{11357}', '\u{11357}'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('\u{11438}', '\u{1143f}'),
+ ('\u{11442}', '\u{11444}'),
+ ('\u{11446}', '\u{11446}'),
+ ('\u{1145e}', '\u{1145e}'),
+ ('\u{114b0}', '\u{114b0}'),
+ ('\u{114b3}', '\u{114b8}'),
+ ('\u{114ba}', '\u{114ba}'),
+ ('\u{114bd}', '\u{114bd}'),
+ ('\u{114bf}', '\u{114c0}'),
+ ('\u{114c2}', '\u{114c3}'),
+ ('\u{115af}', '\u{115af}'),
+ ('\u{115b2}', '\u{115b5}'),
+ ('\u{115bc}', '\u{115bd}'),
+ ('\u{115bf}', '\u{115c0}'),
+ ('\u{115dc}', '\u{115dd}'),
+ ('\u{11633}', '\u{1163a}'),
+ ('\u{1163d}', '\u{1163d}'),
+ ('\u{1163f}', '\u{11640}'),
+ ('\u{116ab}', '\u{116ab}'),
+ ('\u{116ad}', '\u{116ad}'),
+ ('\u{116b0}', '\u{116b5}'),
+ ('\u{116b7}', '\u{116b7}'),
+ ('\u{1171d}', '\u{1171f}'),
+ ('\u{11722}', '\u{11725}'),
+ ('\u{11727}', '\u{1172b}'),
+ ('\u{1182f}', '\u{11837}'),
+ ('\u{11839}', '\u{1183a}'),
+ ('\u{11930}', '\u{11930}'),
+ ('\u{1193b}', '\u{1193c}'),
+ ('\u{1193e}', '\u{1193e}'),
+ ('\u{11943}', '\u{11943}'),
+ ('\u{119d4}', '\u{119d7}'),
+ ('\u{119da}', '\u{119db}'),
+ ('\u{119e0}', '\u{119e0}'),
+ ('\u{11a01}', '\u{11a0a}'),
+ ('\u{11a33}', '\u{11a38}'),
+ ('\u{11a3b}', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('\u{11a51}', '\u{11a56}'),
+ ('\u{11a59}', '\u{11a5b}'),
+ ('\u{11a8a}', '\u{11a96}'),
+ ('\u{11a98}', '\u{11a99}'),
+ ('\u{11c30}', '\u{11c36}'),
+ ('\u{11c38}', '\u{11c3d}'),
+ ('\u{11c3f}', '\u{11c3f}'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('\u{11caa}', '\u{11cb0}'),
+ ('\u{11cb2}', '\u{11cb3}'),
+ ('\u{11cb5}', '\u{11cb6}'),
+ ('\u{11d31}', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d45}'),
+ ('\u{11d47}', '\u{11d47}'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('\u{11d95}', '\u{11d95}'),
+ ('\u{11d97}', '\u{11d97}'),
+ ('\u{11ef3}', '\u{11ef4}'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('\u{16b30}', '\u{16b36}'),
+ ('\u{16f4f}', '\u{16f4f}'),
+ ('\u{16f8f}', '\u{16f92}'),
+ ('\u{16fe4}', '\u{16fe4}'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1d165}', '\u{1d165}'),
+ ('\u{1d167}', '\u{1d169}'),
+ ('\u{1d16e}', '\u{1d172}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('\u{1e130}', '\u{1e136}'),
+ ('\u{1e2ec}', '\u{1e2ef}'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('\u{1e944}', '\u{1e94a}'),
+ ('\u{e0020}', '\u{e007f}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const GRAPHEME_LINK: &'static [(char, char)] = &[
+ ('\u{94d}', '\u{94d}'),
+ ('\u{9cd}', '\u{9cd}'),
+ ('\u{a4d}', '\u{a4d}'),
+ ('\u{acd}', '\u{acd}'),
+ ('\u{b4d}', '\u{b4d}'),
+ ('\u{bcd}', '\u{bcd}'),
+ ('\u{c4d}', '\u{c4d}'),
+ ('\u{ccd}', '\u{ccd}'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('\u{d4d}', '\u{d4d}'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{e3a}', '\u{e3a}'),
+ ('\u{eba}', '\u{eba}'),
+ ('\u{f84}', '\u{f84}'),
+ ('\u{1039}', '\u{103a}'),
+ ('\u{1714}', '\u{1714}'),
+ ('\u{1734}', '\u{1734}'),
+ ('\u{17d2}', '\u{17d2}'),
+ ('\u{1a60}', '\u{1a60}'),
+ ('᭄', '᭄'),
+ ('áŽĒ', '\u{1bab}'),
+ ('á¯˛', 'á¯ŗ'),
+ ('\u{2d7f}', '\u{2d7f}'),
+ ('\u{a806}', '\u{a806}'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('\u{a8c4}', '\u{a8c4}'),
+ ('ęĨ“', 'ęĨ“'),
+ ('꧀', '꧀'),
+ ('\u{aaf6}', '\u{aaf6}'),
+ ('\u{abed}', '\u{abed}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('\u{11046}', '\u{11046}'),
+ ('\u{1107f}', '\u{1107f}'),
+ ('\u{110b9}', '\u{110b9}'),
+ ('\u{11133}', '\u{11134}'),
+ ('𑇀', '𑇀'),
+ ('đ‘ˆĩ', 'đ‘ˆĩ'),
+ ('\u{112ea}', '\u{112ea}'),
+ ('𑍍', '𑍍'),
+ ('\u{11442}', '\u{11442}'),
+ ('\u{114c2}', '\u{114c2}'),
+ ('\u{115bf}', '\u{115bf}'),
+ ('\u{1163f}', '\u{1163f}'),
+ ('đ‘šļ', 'đ‘šļ'),
+ ('\u{1172b}', '\u{1172b}'),
+ ('\u{11839}', '\u{11839}'),
+ ('\u{1193d}', '\u{1193e}'),
+ ('\u{119e0}', '\u{119e0}'),
+ ('\u{11a34}', '\u{11a34}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('\u{11a99}', '\u{11a99}'),
+ ('\u{11c3f}', '\u{11c3f}'),
+ ('\u{11d44}', '\u{11d45}'),
+ ('\u{11d97}', '\u{11d97}'),
+];
+
+pub const HEX_DIGIT: &'static [(char, char)] = &[
+ ('0', '9'),
+ ('A', 'F'),
+ ('a', 'f'),
+ ('īŧ', 'īŧ™'),
+ ('īŧĄ', 'īŧĻ'),
+ ('īŊ', 'īŊ†'),
+];
+
+pub const HYPHEN: &'static [(char, char)] = &[
+ ('-', '-'),
+ ('\u{ad}', '\u{ad}'),
+ ('֊', '֊'),
+ ('᠆', '᠆'),
+ ('‐', '‑'),
+ ('⸗', '⸗'),
+ ('ãƒģ', 'ãƒģ'),
+ ('īšŖ', 'īšŖ'),
+ ('īŧ', 'īŧ'),
+ ('īŊĨ', 'īŊĨ'),
+];
+
+pub const IDS_BINARY_OPERATOR: &'static [(char, char)] =
+ &[('âŋ°', 'âŋą'), ('âŋ´', 'âŋģ')];
+
+pub const IDS_TRINARY_OPERATOR: &'static [(char, char)] = &[('âŋ˛', 'âŋŗ')];
+
+pub const ID_CONTINUE: &'static [(char, char)] = &[
+ ('0', '9'),
+ ('A', 'Z'),
+ ('_', '_'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('¡', '¡'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ˁ'),
+ ('ˆ', 'ˑ'),
+ ('ˠ', 'ˤ'),
+ ('ËŦ', 'ËŦ'),
+ ('ËŽ', 'ËŽ'),
+ ('\u{300}', 'Í´'),
+ ('Íļ', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('\u{483}', '\u{487}'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', 'ՙ'),
+ ('ՠ', 'ֈ'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('א', '×Ē'),
+ ('ׯ', 'ײ'),
+ ('\u{610}', '\u{61a}'),
+ ('Ø ', 'ŲŠ'),
+ ('ŲŽ', 'ۓ'),
+ ('ە', '\u{6dc}'),
+ ('\u{6df}', '\u{6e8}'),
+ ('\u{6ea}', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', '\u{74a}'),
+ ('Ũ', 'Ūą'),
+ ('߀', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('ā €', '\u{82d}'),
+ ('āĄ€', '\u{85b}'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', '\u{963}'),
+ ('āĨĻ', 'āĨ¯'),
+ ('āĨą', 'āĻƒ'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('\u{9bc}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Ž'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', '\u{9e3}'),
+ ('ā§Ļ', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', 'ā¨ƒ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠĻ', '\u{a75}'),
+ ('\u{a81}', 'āĒƒ'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('\u{abc}', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', '\u{acd}'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', '\u{ae3}'),
+ ('āĢĻ', 'āĢ¯'),
+ ('āĢš', '\u{aff}'),
+ ('\u{b01}', 'āŦƒ'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('\u{b3c}', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', '\u{b63}'),
+ ('ā­Ļ', 'ā­¯'),
+ ('ā­ą', 'ā­ą'),
+ ('\u{b82}', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', '\u{bcd}'),
+ ('ā¯', 'ā¯'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('ā¯Ļ', 'ā¯¯'),
+ ('\u{c00}', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('āą˜', 'āąš'),
+ ('āą ', '\u{c63}'),
+ ('āąĻ', 'āą¯'),
+ ('ā˛€', 'ā˛ƒ'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('\u{cbc}', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', '\u{ce3}'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d00}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩŽ'),
+ ('āĩ”', '\u{d57}'),
+ ('āĩŸ', '\u{d63}'),
+ ('āĩĻ', 'āĩ¯'),
+ ('āĩē', 'āĩŋ'),
+ ('\u{d81}', 'āļƒ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āˇ˛', 'āˇŗ'),
+ ('ā¸', '\u{e3a}'),
+ ('āš€', '\u{e4e}'),
+ ('āš', 'āš™'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('āģ', 'āģ™'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ€'),
+ ('\u{f18}', '\u{f19}'),
+ ('āŧ ', 'āŧŠ'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('āŧž', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('\u{f71}', '\u{f84}'),
+ ('\u{f86}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('က', '၉'),
+ ('ၐ', '\u{109d}'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('\u{135d}', '\u{135f}'),
+ ('፩', '፱'),
+ ('ᎀ', 'ᎏ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛮ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', '\u{1714}'),
+ ('ᜠ', '\u{1734}'),
+ ('ᝀ', '\u{1753}'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('\u{1772}', '\u{1773}'),
+ ('ក', '\u{17d3}'),
+ ('ៗ', 'ៗ'),
+ ('ៜ', '\u{17dd}'),
+ ('០', '៩'),
+ ('\u{180b}', '\u{180d}'),
+ ('᠐', '᠙'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', '\u{193b}'),
+ ('áĨ†', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('᧐', '᧚'),
+ ('ᨀ', '\u{1a1b}'),
+ ('ᨠ', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a7c}'),
+ ('\u{1a7f}', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('áĒ§', 'áĒ§'),
+ ('\u{1ab0}', '\u{1abd}'),
+ ('\u{1abf}', '\u{1ac0}'),
+ ('\u{1b00}', 'ᭋ'),
+ ('᭐', '᭙'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', 'á¯ŗ'),
+ ('ᰀ', '\u{1c37}'),
+ ('᱀', '᱉'),
+ ('ᱍ', 'áąŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', 'áŗē'),
+ ('ᴀ', '\u{1df9}'),
+ ('\u{1dfb}', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('â€ŋ', '⁀'),
+ ('⁔', '⁔'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('\u{20d0}', '\u{20dc}'),
+ ('\u{20e1}', '\u{20e1}'),
+ ('\u{20e5}', '\u{20f0}'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('℘', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'ↈ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗ¤'),
+ ('âŗĢ', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ¯'),
+ ('\u{2d7f}', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('々', '〇'),
+ ('ã€Ą', '\u{302f}'),
+ ('ã€ą', 'ã€ĩ'),
+ ('〸', 'ã€ŧ'),
+ ('ぁ', 'ゖ'),
+ ('\u{3099}', 'ゟ'),
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŧ', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ꀀ', 'ꒌ'),
+ ('ꓐ', 'ę“Ŋ'),
+ ('ꔀ', 'ꘌ'),
+ ('ꘐ', 'ę˜Ģ'),
+ ('Ꙁ', '\u{a66f}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('ę™ŋ', '\u{a6f1}'),
+ ('ꜗ', 'ꜟ'),
+ ('ęœĸ', 'ꞈ'),
+ ('Ꞌ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ę §'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ€', '\u{a8c5}'),
+ ('ęŖ', 'ęŖ™'),
+ ('\u{a8e0}', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', '\u{a92d}'),
+ ('ꤰ', 'ęĨ“'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('\u{a980}', '꧀'),
+ ('ꧏ', '꧙'),
+ ('ꧠ', '꧞'),
+ ('ꨀ', '\u{aa36}'),
+ ('ꩀ', 'ꩍ'),
+ ('꩐', '꩙'),
+ ('ꊠ', 'ęŠļ'),
+ ('ęŠē', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢ'),
+ ('ęĢ ', 'ęĢ¯'),
+ ('ęĢ˛', '\u{aaf6}'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab69}'),
+ ('ę­°', 'ę¯Ē'),
+ ('ę¯Ŧ', '\u{abed}'),
+ ('ę¯°', 'ę¯š'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŦ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇģ'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('ī¸ŗ', 'ī¸´'),
+ ('īš', 'īš'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('īŧ', 'īŧ™'),
+ ('īŧĄ', 'īŧē'),
+ ('īŧŋ', 'īŧŋ'),
+ ('īŊ', 'īŊš'),
+ ('īŊĻ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐅀', '𐅴'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍊'),
+ ('𐍐', '\u{1037a}'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐏑', '𐏕'),
+ ('𐐀', '𐒝'),
+ ('𐒠', '𐒩'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', '\u{10ae6}'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('𐴀', '\u{10d27}'),
+ ('𐴰', '𐴚'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', '\u{10f50}'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀀', '\u{11046}'),
+ ('đ‘Ļ', 'đ‘¯'),
+ ('\u{1107f}', '\u{110ba}'),
+ ('𑃐', '𑃨'),
+ ('𑃰', '𑃹'),
+ ('\u{11100}', '\u{11134}'),
+ ('đ‘„ļ', 'đ‘„ŋ'),
+ ('𑅄', '\u{11147}'),
+ ('𑅐', '\u{11173}'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('\u{11180}', '𑇄'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111ce}', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '\u{112ea}'),
+ ('𑋰', '𑋹'),
+ ('\u{11300}', '𑌃'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('\u{1133b}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('𑍐', '𑍐'),
+ ('\u{11357}', '\u{11357}'),
+ ('𑍝', 'đ‘Ŗ'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('𑐀', '𑑊'),
+ ('𑑐', '𑑙'),
+ ('\u{1145e}', '\u{11461}'),
+ ('𑒀', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑓐', '𑓙'),
+ ('𑖀', '\u{115b5}'),
+ ('𑖸', '\u{115c0}'),
+ ('𑗘', '\u{115dd}'),
+ ('𑘀', '\u{11640}'),
+ ('𑙄', '𑙄'),
+ ('𑙐', '𑙙'),
+ ('𑚀', '𑚸'),
+ ('𑛀', '𑛉'),
+ ('𑜀', '𑜚'),
+ ('\u{1171d}', '\u{1172b}'),
+ ('𑜰', '𑜹'),
+ ('𑠀', '\u{1183a}'),
+ ('đ‘ĸ ', 'đ‘ŖŠ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{11943}'),
+ ('\u{11950}', '\u{11959}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '\u{119d7}'),
+ ('\u{119da}', '𑧡'),
+ ('đ‘§Ŗ', '𑧤'),
+ ('𑨀', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('𑩐', '\u{11a99}'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '\u{11c36}'),
+ ('\u{11c38}', '𑱀'),
+ ('𑱐', '𑱙'),
+ ('𑱲', '𑲏'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('𑲩', '\u{11cb6}'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d47}'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', 'đ‘ļ˜'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('đ‘ģ ', 'đ‘ģļ'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒐀', '𒑮'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('𖩠', '𖩩'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('đ–Ŧ€', '\u{16b36}'),
+ ('𖭀', '𖭃'),
+ ('𖭐', '𖭙'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𖹀', 'đ–šŋ'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('\u{16f4f}', '𖾇'),
+ ('\u{16f8f}', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', '\u{16fe4}'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1d165}', '\u{1d169}'),
+ ('𝅭', '\u{1d172}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𝟎', 'đŸŋ'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('𞄀', 'đž„Ŧ'),
+ ('\u{1e130}', 'đž„Ŋ'),
+ ('𞅀', '𞅉'),
+ ('𞅎', '𞅎'),
+ ('𞋀', '𞋹'),
+ ('𞠀', 'đžŖ„'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('𞤀', 'đžĨ‹'),
+ ('đžĨ', 'đžĨ™'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const ID_START: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ˁ'),
+ ('ˆ', 'ˑ'),
+ ('ˠ', 'ˤ'),
+ ('ËŦ', 'ËŦ'),
+ ('ËŽ', 'ËŽ'),
+ ('Í°', 'Í´'),
+ ('Íļ', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', 'ՙ'),
+ ('ՠ', 'ֈ'),
+ ('א', '×Ē'),
+ ('ׯ', 'ײ'),
+ ('Ø ', 'ŲŠ'),
+ ('ŲŽ', 'Ų¯'),
+ ('Ųą', 'ۓ'),
+ ('ە', 'ە'),
+ ('ÛĨ', 'ÛĻ'),
+ ('ÛŽ', 'Û¯'),
+ ('Ûē', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', 'ܐ'),
+ ('ܒ', 'ܯ'),
+ ('Ũ', 'ŪĨ'),
+ ('Ūą', 'Ūą'),
+ ('ߊ', 'ßĒ'),
+ ('ß´', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('ā €', 'ā •'),
+ ('ā š', 'ā š'),
+ ('ā ¤', 'ā ¤'),
+ ('ā ¨', 'ā ¨'),
+ ('āĄ€', 'āĄ˜'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('ā¤„', 'ā¤š'),
+ ('ā¤Ŋ', 'ā¤Ŋ'),
+ ('āĨ', 'āĨ'),
+ ('āĨ˜', 'āĨĄ'),
+ ('āĨą', 'āĻ€'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('āĻŊ', 'āĻŊ'),
+ ('ā§Ž', 'ā§Ž'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', 'ā§Ą'),
+ ('ā§°', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠ˛', 'āŠ´'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('āĒŊ', 'āĒŊ'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', 'āĢĄ'),
+ ('āĢš', 'āĢš'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('āŦŊ', 'āŦŊ'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', 'ā­Ą'),
+ ('ā­ą', 'ā­ą'),
+ ('āŽƒ', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('ā¯', 'ā¯'),
+ ('ā°…', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'ā°Ŋ'),
+ ('āą˜', 'āąš'),
+ ('āą ', 'āąĄ'),
+ ('ā˛€', 'ā˛€'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('ā˛Ŋ', 'ā˛Ŋ'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', 'āŗĄ'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d04}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', 'ā´ē'),
+ ('ā´Ŋ', 'ā´Ŋ'),
+ ('āĩŽ', 'āĩŽ'),
+ ('āĩ”', 'āĩ–'),
+ ('āĩŸ', 'āĩĄ'),
+ ('āĩē', 'āĩŋ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('ā¸', 'ā¸°'),
+ ('ā¸˛', 'ā¸ŗ'),
+ ('āš€', 'āš†'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āē°'),
+ ('āē˛', 'āēŗ'),
+ ('āēŊ', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ€'),
+ ('āŊ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('āžˆ', 'āžŒ'),
+ ('က', 'á€Ē'),
+ ('á€ŋ', 'á€ŋ'),
+ ('ၐ', 'ၕ'),
+ ('ၚ', 'ၝ'),
+ ('ၥ', 'ၥ'),
+ ('áĨ', 'áĻ'),
+ ('၎', 'ၰ'),
+ ('áĩ', 'ႁ'),
+ ('ႎ', 'ႎ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('ᎀ', 'ᎏ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛮ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', 'ᜑ'),
+ ('ᜠ', 'ᜱ'),
+ ('ᝀ', 'ᝑ'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('ក', 'ážŗ'),
+ ('ៗ', 'ៗ'),
+ ('ៜ', 'ៜ'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸ¨'),
+ ('áĸĒ', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('áĨ', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('ᨀ', 'ᨖ'),
+ ('ᨠ', 'ᩔ'),
+ ('áĒ§', 'áĒ§'),
+ ('áŦ…', 'áŦŗ'),
+ ('ᭅ', 'ᭋ'),
+ ('ᎃ', 'Ꭰ'),
+ ('ᎎ', 'Ꭿ'),
+ ('áŽē', 'á¯Ĩ'),
+ ('ᰀ', 'á°Ŗ'),
+ ('ᱍ', 'ᱏ'),
+ ('ᱚ', 'áąŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('áŗŠ', 'áŗŦ'),
+ ('áŗŽ', 'áŗŗ'),
+ ('áŗĩ', 'áŗļ'),
+ ('áŗē', 'áŗē'),
+ ('ᴀ', 'áļŋ'),
+ ('Ḁ', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('℘', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'ↈ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗ¤'),
+ ('âŗĢ', 'âŗŽ'),
+ ('âŗ˛', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ¯'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('々', '〇'),
+ ('ã€Ą', '《'),
+ ('ã€ą', 'ã€ĩ'),
+ ('〸', 'ã€ŧ'),
+ ('ぁ', 'ゖ'),
+ ('゛', 'ゟ'),
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŧ', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ꀀ', 'ꒌ'),
+ ('ꓐ', 'ę“Ŋ'),
+ ('ꔀ', 'ꘌ'),
+ ('ꘐ', 'ꘟ'),
+ ('ę˜Ē', 'ę˜Ģ'),
+ ('Ꙁ', 'ꙮ'),
+ ('ę™ŋ', 'ꚝ'),
+ ('ꚠ', 'ę›¯'),
+ ('ꜗ', 'ꜟ'),
+ ('ęœĸ', 'ꞈ'),
+ ('Ꞌ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ꠁ'),
+ ('ꠃ', 'ꠅ'),
+ ('ꠇ', 'ꠊ'),
+ ('ꠌ', 'ę ĸ'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ‚', 'ęĸŗ'),
+ ('ęŖ˛', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', 'ęŖž'),
+ ('ꤊ', 'ę¤Ĩ'),
+ ('ꤰ', 'ęĨ†'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('ęĻ„', 'ęĻ˛'),
+ ('ꧏ', 'ꧏ'),
+ ('ꧠ', 'ꧤ'),
+ ('ę§Ļ', 'ę§¯'),
+ ('ę§ē', '꧞'),
+ ('ꨀ', 'ꨨ'),
+ ('ꩀ', 'ꩂ'),
+ ('ꩄ', 'ꩋ'),
+ ('ꊠ', 'ęŠļ'),
+ ('ęŠē', 'ęŠē'),
+ ('ꊞ', 'ęĒ¯'),
+ ('ęĒą', 'ęĒą'),
+ ('ęĒĩ', 'ęĒļ'),
+ ('ęĒš', 'ęĒŊ'),
+ ('ęĢ€', 'ęĢ€'),
+ ('ęĢ‚', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢ'),
+ ('ęĢ ', 'ęĢĒ'),
+ ('ęĢ˛', 'ęĢ´'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab69}'),
+ ('ę­°', 'ę¯ĸ'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŦ', 'īŦ'),
+ ('īŦŸ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇģ'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+ ('īŊĻ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐅀', '𐅴'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍊'),
+ ('𐍐', 'đĩ'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐏑', '𐏕'),
+ ('𐐀', '𐒝'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '𐨀'),
+ ('𐨐', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', 'đĢ¤'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('𐴀', 'đ´Ŗ'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', 'đŊ…'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀃', '𑀷'),
+ ('𑂃', 'đ‘‚¯'),
+ ('𑃐', '𑃨'),
+ ('𑄃', 'đ‘„Ļ'),
+ ('𑅄', '𑅄'),
+ ('\u{11147}', '\u{11147}'),
+ ('𑅐', '𑅲'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('𑆃', '𑆲'),
+ ('𑇁', '𑇄'),
+ ('𑇚', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', 'đ‘ˆĢ'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '𑋞'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('đ‘ŒŊ', 'đ‘ŒŊ'),
+ ('𑍐', '𑍐'),
+ ('𑍝', '𑍡'),
+ ('𑐀', '𑐴'),
+ ('𑑇', '𑑊'),
+ ('𑑟', '\u{11461}'),
+ ('𑒀', 'đ‘’¯'),
+ ('𑓄', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑖀', '𑖮'),
+ ('𑗘', '𑗛'),
+ ('𑘀', 'đ‘˜¯'),
+ ('𑙄', '𑙄'),
+ ('𑚀', 'đ‘šĒ'),
+ ('𑚸', '𑚸'),
+ ('𑜀', '𑜚'),
+ ('𑠀', 'đ‘ Ģ'),
+ ('đ‘ĸ ', 'đ‘ŖŸ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{1192f}'),
+ ('\u{1193f}', '\u{1193f}'),
+ ('\u{11941}', '\u{11941}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '𑧐'),
+ ('𑧡', '𑧡'),
+ ('đ‘§Ŗ', 'đ‘§Ŗ'),
+ ('𑨀', '𑨀'),
+ ('𑨋', '𑨲'),
+ ('đ‘¨ē', 'đ‘¨ē'),
+ ('𑩐', '𑩐'),
+ ('𑩜', 'đ‘Ē‰'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '𑰮'),
+ ('𑱀', '𑱀'),
+ ('𑱲', '𑲏'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '𑴰'),
+ ('đ‘ĩ†', 'đ‘ĩ†'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļ‰'),
+ ('đ‘ļ˜', 'đ‘ļ˜'),
+ ('đ‘ģ ', 'đ‘ģ˛'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒐀', '𒑮'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('đ–Ŧ€', 'đ–Ŧ¯'),
+ ('𖭀', '𖭃'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𖹀', 'đ–šŋ'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('đ–Ŋ', 'đ–Ŋ'),
+ ('𖾓', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', 'đ–ŋŖ'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𞄀', 'đž„Ŧ'),
+ ('𞄷', 'đž„Ŋ'),
+ ('𞅎', '𞅎'),
+ ('𞋀', 'đž‹Ģ'),
+ ('𞠀', 'đžŖ„'),
+ ('𞤀', 'đžĨƒ'),
+ ('đžĨ‹', 'đžĨ‹'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const IDEOGRAPHIC: &'static [(char, char)] = &[
+ ('〆', '〇'),
+ ('ã€Ą', '《'),
+ ('〸', 'ã€ē'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('\u{16fe4}', '\u{16fe4}'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const JOIN_CONTROL: &'static [(char, char)] = &[('\u{200c}', '\u{200d}')];
+
+pub const LOGICAL_ORDER_EXCEPTION: &'static [(char, char)] = &[
+ ('āš€', 'āš„'),
+ ('āģ€', 'āģ„'),
+ ('áĻĩ', 'áĻˇ'),
+ ('áĻē', 'áĻē'),
+ ('ęĒĩ', 'ęĒļ'),
+ ('ęĒš', 'ęĒš'),
+ ('ęĒģ', 'ęĒŧ'),
+];
+
+pub const LOWERCASE: &'static [(char, char)] = &[
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('Âē', 'Âē'),
+ ('ß', 'Ãļ'),
+ ('ø', 'Ãŋ'),
+ ('ā', 'ā'),
+ ('ă', 'ă'),
+ ('ą', 'ą'),
+ ('ć', 'ć'),
+ ('ĉ', 'ĉ'),
+ ('ċ', 'ċ'),
+ ('č', 'č'),
+ ('ď', 'ď'),
+ ('đ', 'đ'),
+ ('ē', 'ē'),
+ ('ĕ', 'ĕ'),
+ ('ė', 'ė'),
+ ('ę', 'ę'),
+ ('ě', 'ě'),
+ ('ĝ', 'ĝ'),
+ ('ğ', 'ğ'),
+ ('ÄĄ', 'ÄĄ'),
+ ('ÄŖ', 'ÄŖ'),
+ ('ÄĨ', 'ÄĨ'),
+ ('ħ', 'ħ'),
+ ('ÄŠ', 'ÄŠ'),
+ ('ÄĢ', 'ÄĢ'),
+ ('Ä­', 'Ä­'),
+ ('į', 'į'),
+ ('Äą', 'Äą'),
+ ('Äŗ', 'Äŗ'),
+ ('Äĩ', 'Äĩ'),
+ ('ġ', 'ĸ'),
+ ('Äē', 'Äē'),
+ ('Äŧ', 'Äŧ'),
+ ('Äž', 'Äž'),
+ ('ŀ', 'ŀ'),
+ ('ł', 'ł'),
+ ('ń', 'ń'),
+ ('ņ', 'ņ'),
+ ('ň', 'ʼn'),
+ ('ŋ', 'ŋ'),
+ ('ō', 'ō'),
+ ('ŏ', 'ŏ'),
+ ('ő', 'ő'),
+ ('œ', 'œ'),
+ ('ŕ', 'ŕ'),
+ ('ŗ', 'ŗ'),
+ ('ř', 'ř'),
+ ('ś', 'ś'),
+ ('ŝ', 'ŝ'),
+ ('ş', 'ş'),
+ ('ÅĄ', 'ÅĄ'),
+ ('ÅŖ', 'ÅŖ'),
+ ('ÅĨ', 'ÅĨ'),
+ ('ŧ', 'ŧ'),
+ ('ÅŠ', 'ÅŠ'),
+ ('ÅĢ', 'ÅĢ'),
+ ('Å­', 'Å­'),
+ ('ů', 'ů'),
+ ('Åą', 'Åą'),
+ ('Åŗ', 'Åŗ'),
+ ('Åĩ', 'Åĩ'),
+ ('Åˇ', 'Åˇ'),
+ ('Åē', 'Åē'),
+ ('Åŧ', 'Åŧ'),
+ ('Åž', 'ƀ'),
+ ('ƃ', 'ƃ'),
+ ('ƅ', 'ƅ'),
+ ('ƈ', 'ƈ'),
+ ('ƌ', 'ƍ'),
+ ('ƒ', 'ƒ'),
+ ('ƕ', 'ƕ'),
+ ('ƙ', 'ƛ'),
+ ('ƞ', 'ƞ'),
+ ('ÆĄ', 'ÆĄ'),
+ ('ÆŖ', 'ÆŖ'),
+ ('ÆĨ', 'ÆĨ'),
+ ('ƨ', 'ƨ'),
+ ('ÆĒ', 'ÆĢ'),
+ ('Æ­', 'Æ­'),
+ ('Æ°', 'Æ°'),
+ ('Æ´', 'Æ´'),
+ ('Æļ', 'Æļ'),
+ ('Æš', 'Æē'),
+ ('ÆŊ', 'Æŋ'),
+ ('Į†', 'Į†'),
+ ('Į‰', 'Į‰'),
+ ('ĮŒ', 'ĮŒ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Į', 'Į'),
+ ('Į’', 'Į’'),
+ ('Į”', 'Į”'),
+ ('Į–', 'Į–'),
+ ('Į˜', 'Į˜'),
+ ('Įš', 'Įš'),
+ ('Įœ', 'Į'),
+ ('ĮŸ', 'ĮŸ'),
+ ('ĮĄ', 'ĮĄ'),
+ ('ĮŖ', 'ĮŖ'),
+ ('ĮĨ', 'ĮĨ'),
+ ('Į§', 'Į§'),
+ ('ĮŠ', 'ĮŠ'),
+ ('ĮĢ', 'ĮĢ'),
+ ('Į­', 'Į­'),
+ ('Į¯', 'Į°'),
+ ('Įŗ', 'Įŗ'),
+ ('Įĩ', 'Įĩ'),
+ ('Įš', 'Įš'),
+ ('Įģ', 'Įģ'),
+ ('ĮŊ', 'ĮŊ'),
+ ('Įŋ', 'Įŋ'),
+ ('ȁ', 'ȁ'),
+ ('ȃ', 'ȃ'),
+ ('ȅ', 'ȅ'),
+ ('ȇ', 'ȇ'),
+ ('ȉ', 'ȉ'),
+ ('ȋ', 'ȋ'),
+ ('ȍ', 'ȍ'),
+ ('ȏ', 'ȏ'),
+ ('ȑ', 'ȑ'),
+ ('ȓ', 'ȓ'),
+ ('ȕ', 'ȕ'),
+ ('ȗ', 'ȗ'),
+ ('ș', 'ș'),
+ ('ț', 'ț'),
+ ('ȝ', 'ȝ'),
+ ('ȟ', 'ȟ'),
+ ('ČĄ', 'ČĄ'),
+ ('ČŖ', 'ČŖ'),
+ ('ČĨ', 'ČĨ'),
+ ('ȧ', 'ȧ'),
+ ('ČŠ', 'ČŠ'),
+ ('ČĢ', 'ČĢ'),
+ ('Č­', 'Č­'),
+ ('Č¯', 'Č¯'),
+ ('Čą', 'Čą'),
+ ('Čŗ', 'Čš'),
+ ('Čŧ', 'Čŧ'),
+ ('Čŋ', 'ɀ'),
+ ('ɂ', 'ɂ'),
+ ('ɇ', 'ɇ'),
+ ('ɉ', 'ɉ'),
+ ('ɋ', 'ɋ'),
+ ('ɍ', 'ɍ'),
+ ('ɏ', 'ʓ'),
+ ('ʕ', 'ʸ'),
+ ('ˀ', 'ˁ'),
+ ('ˠ', 'ˤ'),
+ ('\u{345}', '\u{345}'),
+ ('Íą', 'Íą'),
+ ('Íŗ', 'Íŗ'),
+ ('͡', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('ΐ', 'ΐ'),
+ ('ÎŦ', 'ĪŽ'),
+ ('Ī', 'Ī‘'),
+ ('Ī•', 'Ī—'),
+ ('Ī™', 'Ī™'),
+ ('Ī›', 'Ī›'),
+ ('Ī', 'Ī'),
+ ('ĪŸ', 'ĪŸ'),
+ ('ĪĄ', 'ĪĄ'),
+ ('ĪŖ', 'ĪŖ'),
+ ('ĪĨ', 'ĪĨ'),
+ ('Ī§', 'Ī§'),
+ ('ĪŠ', 'ĪŠ'),
+ ('ĪĢ', 'ĪĢ'),
+ ('Ī­', 'Ī­'),
+ ('Ī¯', 'Īŗ'),
+ ('Īĩ', 'Īĩ'),
+ ('Ī¸', 'Ī¸'),
+ ('Īģ', 'Īŧ'),
+ ('Đ°', 'ŅŸ'),
+ ('ŅĄ', 'ŅĄ'),
+ ('ŅŖ', 'ŅŖ'),
+ ('ŅĨ', 'ŅĨ'),
+ ('Ņ§', 'Ņ§'),
+ ('ŅŠ', 'ŅŠ'),
+ ('ŅĢ', 'ŅĢ'),
+ ('Ņ­', 'Ņ­'),
+ ('Ņ¯', 'Ņ¯'),
+ ('Ņą', 'Ņą'),
+ ('Ņŗ', 'Ņŗ'),
+ ('Ņĩ', 'Ņĩ'),
+ ('Ņˇ', 'Ņˇ'),
+ ('Ņš', 'Ņš'),
+ ('Ņģ', 'Ņģ'),
+ ('ŅŊ', 'ŅŊ'),
+ ('Ņŋ', 'Ņŋ'),
+ ('Ō', 'Ō'),
+ ('Ō‹', 'Ō‹'),
+ ('Ō', 'Ō'),
+ ('Ō', 'Ō'),
+ ('Ō‘', 'Ō‘'),
+ ('Ō“', 'Ō“'),
+ ('Ō•', 'Ō•'),
+ ('Ō—', 'Ō—'),
+ ('Ō™', 'Ō™'),
+ ('Ō›', 'Ō›'),
+ ('Ō', 'Ō'),
+ ('ŌŸ', 'ŌŸ'),
+ ('ŌĄ', 'ŌĄ'),
+ ('ŌŖ', 'ŌŖ'),
+ ('ŌĨ', 'ŌĨ'),
+ ('Ō§', 'Ō§'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌĢ', 'ŌĢ'),
+ ('Ō­', 'Ō­'),
+ ('Ō¯', 'Ō¯'),
+ ('Ōą', 'Ōą'),
+ ('Ōŗ', 'Ōŗ'),
+ ('Ōĩ', 'Ōĩ'),
+ ('Ōˇ', 'Ōˇ'),
+ ('Ōš', 'Ōš'),
+ ('Ōģ', 'Ōģ'),
+ ('ŌŊ', 'ŌŊ'),
+ ('Ōŋ', 'Ōŋ'),
+ ('Ķ‚', 'Ķ‚'),
+ ('Ķ„', 'Ķ„'),
+ ('Ķ†', 'Ķ†'),
+ ('Ķˆ', 'Ķˆ'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶŒ', 'ĶŒ'),
+ ('ĶŽ', 'Ķ'),
+ ('Ķ‘', 'Ķ‘'),
+ ('Ķ“', 'Ķ“'),
+ ('Ķ•', 'Ķ•'),
+ ('Ķ—', 'Ķ—'),
+ ('Ķ™', 'Ķ™'),
+ ('Ķ›', 'Ķ›'),
+ ('Ķ', 'Ķ'),
+ ('ĶŸ', 'ĶŸ'),
+ ('ĶĄ', 'ĶĄ'),
+ ('ĶŖ', 'ĶŖ'),
+ ('ĶĨ', 'ĶĨ'),
+ ('Ķ§', 'Ķ§'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶĢ', 'ĶĢ'),
+ ('Ķ­', 'Ķ­'),
+ ('Ķ¯', 'Ķ¯'),
+ ('Ķą', 'Ķą'),
+ ('Ķŗ', 'Ķŗ'),
+ ('Ķĩ', 'Ķĩ'),
+ ('Ķˇ', 'Ķˇ'),
+ ('Ķš', 'Ķš'),
+ ('Ķģ', 'Ķģ'),
+ ('ĶŊ', 'ĶŊ'),
+ ('Ķŋ', 'Ķŋ'),
+ ('ԁ', 'ԁ'),
+ ('ԃ', 'ԃ'),
+ ('ԅ', 'ԅ'),
+ ('ԇ', 'ԇ'),
+ ('ԉ', 'ԉ'),
+ ('ԋ', 'ԋ'),
+ ('ԍ', 'ԍ'),
+ ('ԏ', 'ԏ'),
+ ('ԑ', 'ԑ'),
+ ('ԓ', 'ԓ'),
+ ('ԕ', 'ԕ'),
+ ('ԗ', 'ԗ'),
+ ('ԙ', 'ԙ'),
+ ('ԛ', 'ԛ'),
+ ('ԝ', 'ԝ'),
+ ('ԟ', 'ԟ'),
+ ('ÔĄ', 'ÔĄ'),
+ ('ÔŖ', 'ÔŖ'),
+ ('ÔĨ', 'ÔĨ'),
+ ('Ô§', 'Ô§'),
+ ('ÔŠ', 'ÔŠ'),
+ ('ÔĢ', 'ÔĢ'),
+ ('Ô­', 'Ô­'),
+ ('Ô¯', 'Ô¯'),
+ ('ՠ', 'ֈ'),
+ ('ა', 'áƒē'),
+ ('áƒŊ', 'áƒŋ'),
+ ('ᏸ', 'áŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('ᴀ', 'áļŋ'),
+ ('ḁ', 'ḁ'),
+ ('ḃ', 'ḃ'),
+ ('ḅ', 'ḅ'),
+ ('ḇ', 'ḇ'),
+ ('ḉ', 'ḉ'),
+ ('ḋ', 'ḋ'),
+ ('ḍ', 'ḍ'),
+ ('ḏ', 'ḏ'),
+ ('ḑ', 'ḑ'),
+ ('ḓ', 'ḓ'),
+ ('ḕ', 'ḕ'),
+ ('ḗ', 'ḗ'),
+ ('ḙ', 'ḙ'),
+ ('ḛ', 'ḛ'),
+ ('ḝ', 'ḝ'),
+ ('ḟ', 'ḟ'),
+ ('ḥ', 'ḥ'),
+ ('á¸Ŗ', 'á¸Ŗ'),
+ ('á¸Ĩ', 'á¸Ĩ'),
+ ('ḧ', 'ḧ'),
+ ('Ḋ', 'Ḋ'),
+ ('á¸Ģ', 'á¸Ģ'),
+ ('ḭ', 'ḭ'),
+ ('ḯ', 'ḯ'),
+ ('ḹ', 'ḹ'),
+ ('á¸ŗ', 'á¸ŗ'),
+ ('á¸ĩ', 'á¸ĩ'),
+ ('ḡ', 'ḡ'),
+ ('Ḛ', 'Ḛ'),
+ ('á¸ģ', 'á¸ģ'),
+ ('á¸Ŋ', 'á¸Ŋ'),
+ ('á¸ŋ', 'á¸ŋ'),
+ ('ᚁ', 'ᚁ'),
+ ('ᚃ', 'ᚃ'),
+ ('ṅ', 'ṅ'),
+ ('ṇ', 'ṇ'),
+ ('ṉ', 'ṉ'),
+ ('ṋ', 'ṋ'),
+ ('ᚍ', 'ᚍ'),
+ ('ᚏ', 'ᚏ'),
+ ('ṑ', 'ṑ'),
+ ('ṓ', 'ṓ'),
+ ('ṕ', 'ṕ'),
+ ('ṗ', 'ṗ'),
+ ('ṙ', 'ṙ'),
+ ('ṛ', 'ṛ'),
+ ('᚝', '᚝'),
+ ('ṟ', 'ṟ'),
+ ('ᚥ', 'ᚥ'),
+ ('ášŖ', 'ášŖ'),
+ ('ášĨ', 'ášĨ'),
+ ('ᚧ', 'ᚧ'),
+ ('ᚊ', 'ᚊ'),
+ ('ášĢ', 'ášĢ'),
+ ('áš­', 'áš­'),
+ ('ᚯ', 'ᚯ'),
+ ('ášą', 'ášą'),
+ ('ášŗ', 'ášŗ'),
+ ('ášĩ', 'ášĩ'),
+ ('ᚡ', 'ᚡ'),
+ ('ášš', 'ášš'),
+ ('ášģ', 'ášģ'),
+ ('ášŊ', 'ášŊ'),
+ ('ášŋ', 'ášŋ'),
+ ('áē', 'áē'),
+ ('áēƒ', 'áēƒ'),
+ ('áē…', 'áē…'),
+ ('áē‡', 'áē‡'),
+ ('áē‰', 'áē‰'),
+ ('áē‹', 'áē‹'),
+ ('áē', 'áē'),
+ ('áē', 'áē'),
+ ('áē‘', 'áē‘'),
+ ('áē“', 'áē“'),
+ ('áē•', 'áē'),
+ ('áēŸ', 'áēŸ'),
+ ('áēĄ', 'áēĄ'),
+ ('áēŖ', 'áēŖ'),
+ ('áēĨ', 'áēĨ'),
+ ('áē§', 'áē§'),
+ ('áēŠ', 'áēŠ'),
+ ('áēĢ', 'áēĢ'),
+ ('áē­', 'áē­'),
+ ('áē¯', 'áē¯'),
+ ('áēą', 'áēą'),
+ ('áēŗ', 'áēŗ'),
+ ('áēĩ', 'áēĩ'),
+ ('áēˇ', 'áēˇ'),
+ ('áēš', 'áēš'),
+ ('áēģ', 'áēģ'),
+ ('áēŊ', 'áēŊ'),
+ ('áēŋ', 'áēŋ'),
+ ('áģ', 'áģ'),
+ ('áģƒ', 'áģƒ'),
+ ('áģ…', 'áģ…'),
+ ('áģ‡', 'áģ‡'),
+ ('áģ‰', 'áģ‰'),
+ ('áģ‹', 'áģ‹'),
+ ('áģ', 'áģ'),
+ ('áģ', 'áģ'),
+ ('áģ‘', 'áģ‘'),
+ ('áģ“', 'áģ“'),
+ ('áģ•', 'áģ•'),
+ ('áģ—', 'áģ—'),
+ ('áģ™', 'áģ™'),
+ ('áģ›', 'áģ›'),
+ ('áģ', 'áģ'),
+ ('áģŸ', 'áģŸ'),
+ ('áģĄ', 'áģĄ'),
+ ('áģŖ', 'áģŖ'),
+ ('áģĨ', 'áģĨ'),
+ ('áģ§', 'áģ§'),
+ ('áģŠ', 'áģŠ'),
+ ('áģĢ', 'áģĢ'),
+ ('áģ­', 'áģ­'),
+ ('áģ¯', 'áģ¯'),
+ ('áģą', 'áģą'),
+ ('áģŗ', 'áģŗ'),
+ ('áģĩ', 'áģĩ'),
+ ('áģˇ', 'áģˇ'),
+ ('áģš', 'áģš'),
+ ('áģģ', 'áģģ'),
+ ('áģŊ', 'áģŊ'),
+ ('áģŋ', 'áŧ‡'),
+ ('áŧ', 'áŧ•'),
+ ('áŧ ', 'áŧ§'),
+ ('áŧ°', 'áŧˇ'),
+ ('áŊ€', 'áŊ…'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ ', 'áŊ§'),
+ ('áŊ°', 'áŊŊ'),
+ ('ᾀ', 'ᾇ'),
+ ('ᾐ', 'ᾗ'),
+ ('ហ', 'ឧ'),
+ ('áž°', 'áž´'),
+ ('ážļ', 'ឡ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋ‡'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ—'),
+ ('áŋ ', 'áŋ§'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋˇ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('ℊ', 'ℊ'),
+ ('ℎ', 'ℏ'),
+ ('ℓ', 'ℓ'),
+ ('ℯ', 'ℯ'),
+ ('ℴ', 'ℴ'),
+ ('ℹ', 'ℹ'),
+ ('â„ŧ', 'â„Ŋ'),
+ ('ⅆ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('ⅰ', 'â…ŋ'),
+ ('ↄ', 'ↄ'),
+ ('ⓐ', 'ⓩ'),
+ ('ⰰ', 'ⱞ'),
+ ('⹥', '⹥'),
+ ('âąĨ', 'âąĻ'),
+ ('⹨', '⹨'),
+ ('âąĒ', 'âąĒ'),
+ ('âąŦ', 'âąŦ'),
+ ('âąą', 'âąą'),
+ ('âąŗ', 'âą´'),
+ ('âąļ', 'âąŊ'),
+ ('ⲁ', 'ⲁ'),
+ ('ⲃ', 'ⲃ'),
+ ('ⲅ', 'ⲅ'),
+ ('ⲇ', 'ⲇ'),
+ ('ⲉ', 'ⲉ'),
+ ('ⲋ', 'ⲋ'),
+ ('ⲍ', 'ⲍ'),
+ ('ⲏ', 'ⲏ'),
+ ('ⲑ', 'ⲑ'),
+ ('ⲓ', 'ⲓ'),
+ ('ⲕ', 'ⲕ'),
+ ('ⲗ', 'ⲗ'),
+ ('ⲙ', 'ⲙ'),
+ ('ⲛ', 'ⲛ'),
+ ('ⲝ', 'ⲝ'),
+ ('ⲟ', 'ⲟ'),
+ ('ⲥ', 'ⲥ'),
+ ('â˛Ŗ', 'â˛Ŗ'),
+ ('â˛Ĩ', 'â˛Ĩ'),
+ ('ⲧ', 'ⲧ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('â˛Ģ', 'â˛Ģ'),
+ ('ⲭ', 'ⲭ'),
+ ('â˛¯', 'â˛¯'),
+ ('ⲹ', 'ⲹ'),
+ ('â˛ŗ', 'â˛ŗ'),
+ ('â˛ĩ', 'â˛ĩ'),
+ ('ⲡ', 'ⲡ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('â˛ģ', 'â˛ģ'),
+ ('â˛Ŋ', 'â˛Ŋ'),
+ ('â˛ŋ', 'â˛ŋ'),
+ ('âŗ', 'âŗ'),
+ ('âŗƒ', 'âŗƒ'),
+ ('âŗ…', 'âŗ…'),
+ ('âŗ‡', 'âŗ‡'),
+ ('âŗ‰', 'âŗ‰'),
+ ('âŗ‹', 'âŗ‹'),
+ ('âŗ', 'âŗ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ‘', 'âŗ‘'),
+ ('âŗ“', 'âŗ“'),
+ ('âŗ•', 'âŗ•'),
+ ('âŗ—', 'âŗ—'),
+ ('âŗ™', 'âŗ™'),
+ ('âŗ›', 'âŗ›'),
+ ('âŗ', 'âŗ'),
+ ('âŗŸ', 'âŗŸ'),
+ ('âŗĄ', 'âŗĄ'),
+ ('âŗŖ', 'âŗ¤'),
+ ('âŗŦ', 'âŗŦ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗŗ', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('ꙁ', 'ꙁ'),
+ ('ꙃ', 'ꙃ'),
+ ('ꙅ', 'ꙅ'),
+ ('ꙇ', 'ꙇ'),
+ ('ꙉ', 'ꙉ'),
+ ('ꙋ', 'ꙋ'),
+ ('ꙍ', 'ꙍ'),
+ ('ꙏ', 'ꙏ'),
+ ('ꙑ', 'ꙑ'),
+ ('ꙓ', 'ꙓ'),
+ ('ꙕ', 'ꙕ'),
+ ('ꙗ', 'ꙗ'),
+ ('ꙙ', 'ꙙ'),
+ ('ꙛ', 'ꙛ'),
+ ('ꙝ', 'ꙝ'),
+ ('ꙟ', 'ꙟ'),
+ ('ꙡ', 'ꙡ'),
+ ('ę™Ŗ', 'ę™Ŗ'),
+ ('ę™Ĩ', 'ę™Ĩ'),
+ ('ꙧ', 'ꙧ'),
+ ('ꙩ', 'ꙩ'),
+ ('ę™Ģ', 'ę™Ģ'),
+ ('ꙭ', 'ꙭ'),
+ ('ꚁ', 'ꚁ'),
+ ('ꚃ', 'ꚃ'),
+ ('ꚅ', 'ꚅ'),
+ ('ꚇ', 'ꚇ'),
+ ('ꚉ', 'ꚉ'),
+ ('ꚋ', 'ꚋ'),
+ ('ꚍ', 'ꚍ'),
+ ('ꚏ', 'ꚏ'),
+ ('ꚑ', 'ꚑ'),
+ ('ꚓ', 'ꚓ'),
+ ('ꚕ', 'ꚕ'),
+ ('ꚗ', 'ꚗ'),
+ ('ꚙ', 'ꚙ'),
+ ('ꚛ', 'ꚝ'),
+ ('ęœŖ', 'ęœŖ'),
+ ('ęœĨ', 'ęœĨ'),
+ ('ꜧ', 'ꜧ'),
+ ('ꜩ', 'ꜩ'),
+ ('ęœĢ', 'ęœĢ'),
+ ('ꜭ', 'ꜭ'),
+ ('ęœ¯', 'ꜱ'),
+ ('ęœŗ', 'ęœŗ'),
+ ('ęœĩ', 'ęœĩ'),
+ ('ꜷ', 'ꜷ'),
+ ('ꜹ', 'ꜹ'),
+ ('ęœģ', 'ęœģ'),
+ ('ęœŊ', 'ęœŊ'),
+ ('ęœŋ', 'ęœŋ'),
+ ('ꝁ', 'ꝁ'),
+ ('ꝃ', 'ꝃ'),
+ ('ꝅ', 'ꝅ'),
+ ('ꝇ', 'ꝇ'),
+ ('ꝉ', 'ꝉ'),
+ ('ꝋ', 'ꝋ'),
+ ('ꝍ', 'ꝍ'),
+ ('ꝏ', 'ꝏ'),
+ ('ꝑ', 'ꝑ'),
+ ('ꝓ', 'ꝓ'),
+ ('ꝕ', 'ꝕ'),
+ ('ꝗ', 'ꝗ'),
+ ('ꝙ', 'ꝙ'),
+ ('ꝛ', 'ꝛ'),
+ ('ꝝ', 'ꝝ'),
+ ('ꝟ', 'ꝟ'),
+ ('ꝡ', 'ꝡ'),
+ ('ęŖ', 'ęŖ'),
+ ('ęĨ', 'ęĨ'),
+ ('ꝧ', 'ꝧ'),
+ ('ꝩ', 'ꝩ'),
+ ('ęĢ', 'ęĢ'),
+ ('ꝭ', 'ꝭ'),
+ ('ę¯', 'ꝸ'),
+ ('ęē', 'ęē'),
+ ('ęŧ', 'ęŧ'),
+ ('ęŋ', 'ęŋ'),
+ ('ꞁ', 'ꞁ'),
+ ('ꞃ', 'ꞃ'),
+ ('ꞅ', 'ꞅ'),
+ ('ꞇ', 'ꞇ'),
+ ('ꞌ', 'ꞌ'),
+ ('ꞎ', 'ꞎ'),
+ ('ꞑ', 'ꞑ'),
+ ('ꞓ', 'ꞕ'),
+ ('ꞗ', 'ꞗ'),
+ ('ꞙ', 'ꞙ'),
+ ('ꞛ', 'ꞛ'),
+ ('ꞝ', 'ꞝ'),
+ ('ꞟ', 'ꞟ'),
+ ('ꞡ', 'ꞡ'),
+ ('ęžŖ', 'ęžŖ'),
+ ('ęžĨ', 'ęžĨ'),
+ ('ꞧ', 'ꞧ'),
+ ('ꞩ', 'ꞩ'),
+ ('ęž¯', 'ęž¯'),
+ ('ęžĩ', 'ęžĩ'),
+ ('ꞷ', 'ꞷ'),
+ ('ꞹ', 'ꞹ'),
+ ('ęžģ', 'ęžģ'),
+ ('ęžŊ', 'ęžŊ'),
+ ('ęžŋ', 'ęžŋ'),
+ ('ꟃ', 'ꟃ'),
+ ('\u{a7c8}', '\u{a7c8}'),
+ ('\u{a7ca}', '\u{a7ca}'),
+ ('\u{a7f6}', '\u{a7f6}'),
+ ('ꟸ', 'ęŸē'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab68}'),
+ ('ę­°', 'ęŽŋ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŊ', 'īŊš'),
+ ('𐐨', '𐑏'),
+ ('𐓘', 'đ“ģ'),
+ ('đŗ€', 'đŗ˛'),
+ ('đ‘Ŗ€', 'đ‘ŖŸ'),
+ ('𖹠', 'đ–šŋ'),
+ ('𝐚', 'đŗ'),
+ ('𝑎', '𝑔'),
+ ('𝑖', '𝑧'),
+ ('𝒂', '𝒛'),
+ ('đ’ļ', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝓏'),
+ ('đ“Ē', '𝔃'),
+ ('𝔞', '𝔷'),
+ ('𝕒', 'đ•Ģ'),
+ ('𝖆', '𝖟'),
+ ('đ–ē', '𝗓'),
+ ('𝗮', '𝘇'),
+ ('đ˜ĸ', 'đ˜ģ'),
+ ('𝙖', 'đ™¯'),
+ ('𝚊', 'đšĨ'),
+ ('𝛂', '𝛚'),
+ ('𝛜', '𝛡'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜛'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝕'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞏'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟉'),
+ ('𝟋', '𝟋'),
+ ('đž¤ĸ', 'đžĨƒ'),
+];
+
+pub const MATH: &'static [(char, char)] = &[
+ ('+', '+'),
+ ('<', '>'),
+ ('^', '^'),
+ ('|', '|'),
+ ('~', '~'),
+ ('ÂŦ', 'ÂŦ'),
+ ('Âą', 'Âą'),
+ ('×', '×'),
+ ('Ãˇ', 'Ãˇ'),
+ ('Ī', 'Ī’'),
+ ('Ī•', 'Ī•'),
+ ('Ī°', 'Īą'),
+ ('Ī´', 'Īļ'),
+ ('؆', '؈'),
+ ('‖', '‖'),
+ ('′', '‴'),
+ ('⁀', '⁀'),
+ ('⁄', '⁄'),
+ ('⁒', '⁒'),
+ ('\u{2061}', '\u{2064}'),
+ ('âē', '⁞'),
+ ('₊', '₎'),
+ ('\u{20d0}', '\u{20dc}'),
+ ('\u{20e1}', '\u{20e1}'),
+ ('\u{20e5}', '\u{20e6}'),
+ ('\u{20eb}', '\u{20ef}'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('℘', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('ℨ', '℩'),
+ ('â„Ŧ', 'ℭ'),
+ ('ℯ', 'ℱ'),
+ ('â„ŗ', 'ℸ'),
+ ('â„ŧ', 'ⅉ'),
+ ('⅋', '⅋'),
+ ('←', '↧'),
+ ('↩', '↮'),
+ ('↰', '↱'),
+ ('â†ļ', '↷'),
+ ('â†ŧ', '⇛'),
+ ('⇝', '⇝'),
+ ('⇤', 'â‡Ĩ'),
+ ('⇴', 'â‹ŋ'),
+ ('⌈', '⌋'),
+ ('⌠', '⌡'),
+ ('âŧ', 'âŧ'),
+ ('⎛', 'âŽĩ'),
+ ('⎷', '⎷'),
+ ('⏐', '⏐'),
+ ('⏜', 'âĸ'),
+ ('■', '□'),
+ ('▮', '▷'),
+ ('â–ŧ', '◁'),
+ ('◆', '◇'),
+ ('◊', '○'),
+ ('●', '◓'),
+ ('â—ĸ', 'â—ĸ'),
+ ('◤', '◤'),
+ ('◧', 'â—Ŧ'),
+ ('◸', 'â—ŋ'),
+ ('★', '☆'),
+ ('♀', '♀'),
+ ('♂', '♂'),
+ ('♠', 'â™Ŗ'),
+ ('♭', '♯'),
+ ('⟀', 'âŸŋ'),
+ ('⤀', 'âĢŋ'),
+ ('âŦ°', '⭄'),
+ ('⭇', '⭌'),
+ ('īŦŠ', 'īŦŠ'),
+ ('īšĄ', 'īšĻ'),
+ ('īš¨', 'īš¨'),
+ ('īŧ‹', 'īŧ‹'),
+ ('īŧœ', 'īŧž'),
+ ('īŧŧ', 'īŧŧ'),
+ ('īŧž', 'īŧž'),
+ ('īŊœ', 'īŊœ'),
+ ('īŊž', 'īŊž'),
+ ('īŋĸ', 'īŋĸ'),
+ ('īŋŠ', 'īŋŦ'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝟋'),
+ ('𝟎', 'đŸŋ'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('đžģ°', 'đžģą'),
+];
+
+pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[
+ ('\u{fdd0}', '\u{fdef}'),
+ ('\u{fffe}', '\u{ffff}'),
+ ('\u{1fffe}', '\u{1ffff}'),
+ ('\u{2fffe}', '\u{2ffff}'),
+ ('\u{3fffe}', '\u{3ffff}'),
+ ('\u{4fffe}', '\u{4ffff}'),
+ ('\u{5fffe}', '\u{5ffff}'),
+ ('\u{6fffe}', '\u{6ffff}'),
+ ('\u{7fffe}', '\u{7ffff}'),
+ ('\u{8fffe}', '\u{8ffff}'),
+ ('\u{9fffe}', '\u{9ffff}'),
+ ('\u{afffe}', '\u{affff}'),
+ ('\u{bfffe}', '\u{bffff}'),
+ ('\u{cfffe}', '\u{cffff}'),
+ ('\u{dfffe}', '\u{dffff}'),
+ ('\u{efffe}', '\u{effff}'),
+ ('\u{ffffe}', '\u{fffff}'),
+ ('\u{10fffe}', '\u{10ffff}'),
+];
+
+pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[
+ ('\u{345}', '\u{345}'),
+ ('\u{5b0}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('\u{610}', '\u{61a}'),
+ ('\u{64b}', '\u{657}'),
+ ('\u{659}', '\u{65f}'),
+ ('\u{670}', '\u{670}'),
+ ('\u{6d6}', '\u{6dc}'),
+ ('\u{6e1}', '\u{6e4}'),
+ ('\u{6e7}', '\u{6e8}'),
+ ('\u{6ed}', '\u{6ed}'),
+ ('\u{711}', '\u{711}'),
+ ('\u{730}', '\u{73f}'),
+ ('\u{7a6}', '\u{7b0}'),
+ ('\u{816}', '\u{817}'),
+ ('\u{81b}', '\u{823}'),
+ ('\u{825}', '\u{827}'),
+ ('\u{829}', '\u{82c}'),
+ ('\u{8d4}', '\u{8df}'),
+ ('\u{8e3}', '\u{8e9}'),
+ ('\u{8f0}', 'ā¤ƒ'),
+ ('\u{93a}', 'ā¤ģ'),
+ ('ā¤ž', 'āĨŒ'),
+ ('āĨŽ', 'āĨ'),
+ ('\u{955}', '\u{957}'),
+ ('\u{962}', '\u{963}'),
+ ('\u{981}', 'āĻƒ'),
+ ('\u{9be}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Œ'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('\u{9e2}', '\u{9e3}'),
+ ('\u{a01}', 'ā¨ƒ'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4c}'),
+ ('\u{a51}', '\u{a51}'),
+ ('\u{a70}', '\u{a71}'),
+ ('\u{a75}', '\u{a75}'),
+ ('\u{a81}', 'āĒƒ'),
+ ('āĒž', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', 'āĢŒ'),
+ ('\u{ae2}', '\u{ae3}'),
+ ('\u{afa}', '\u{afc}'),
+ ('\u{b01}', 'āŦƒ'),
+ ('\u{b3e}', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', 'ā­Œ'),
+ ('\u{b56}', '\u{b57}'),
+ ('\u{b62}', '\u{b63}'),
+ ('\u{b82}', '\u{b82}'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', 'ā¯Œ'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('\u{c00}', 'ā°ƒ'),
+ ('\u{c3e}', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4c}'),
+ ('\u{c55}', '\u{c56}'),
+ ('\u{c62}', '\u{c63}'),
+ ('\u{c81}', 'ā˛ƒ'),
+ ('ā˛ž', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccc}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('\u{ce2}', '\u{ce3}'),
+ ('\u{d00}', 'ā´ƒ'),
+ ('\u{d3e}', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩŒ'),
+ ('\u{d57}', '\u{d57}'),
+ ('\u{d62}', '\u{d63}'),
+ ('\u{d81}', 'āļƒ'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇ˛', 'āˇŗ'),
+ ('\u{e31}', '\u{e31}'),
+ ('\u{e34}', '\u{e3a}'),
+ ('\u{e4d}', '\u{e4d}'),
+ ('\u{eb1}', '\u{eb1}'),
+ ('\u{eb4}', '\u{eb9}'),
+ ('\u{ebb}', '\u{ebc}'),
+ ('\u{ecd}', '\u{ecd}'),
+ ('\u{f71}', '\u{f81}'),
+ ('\u{f8d}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('á€Ģ', '\u{1036}'),
+ ('း', 'း'),
+ ('á€ģ', '\u{103e}'),
+ ('ၖ', '\u{1059}'),
+ ('\u{105e}', '\u{1060}'),
+ ('áĸ', 'ၤ'),
+ ('ၧ', 'ၭ'),
+ ('\u{1071}', '\u{1074}'),
+ ('\u{1082}', '\u{108d}'),
+ ('ႏ', 'ႏ'),
+ ('ႚ', '\u{109d}'),
+ ('\u{1712}', '\u{1713}'),
+ ('\u{1732}', '\u{1733}'),
+ ('\u{1752}', '\u{1753}'),
+ ('\u{1772}', '\u{1773}'),
+ ('ážļ', 'ៈ'),
+ ('\u{1885}', '\u{1886}'),
+ ('\u{18a9}', '\u{18a9}'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', 'ᤸ'),
+ ('\u{1a17}', '\u{1a1b}'),
+ ('ᩕ', '\u{1a5e}'),
+ ('እ', '\u{1a74}'),
+ ('\u{1abf}', '\u{1ac0}'),
+ ('\u{1b00}', 'áŦ„'),
+ ('\u{1b35}', 'ᭃ'),
+ ('\u{1b80}', 'ᮂ'),
+ ('Ꭵ', '\u{1ba9}'),
+ ('\u{1bac}', '\u{1bad}'),
+ ('ᯧ', '\u{1bf1}'),
+ ('á°¤', '\u{1c36}'),
+ ('\u{1de7}', '\u{1df4}'),
+ ('â’ļ', 'ⓩ'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('\u{a674}', '\u{a67b}'),
+ ('\u{a69e}', '\u{a69f}'),
+ ('\u{a802}', '\u{a802}'),
+ ('\u{a80b}', '\u{a80b}'),
+ ('ę Ŗ', 'ę §'),
+ ('ęĸ€', 'ęĸ'),
+ ('ęĸ´', 'ęŖƒ'),
+ ('\u{a8c5}', '\u{a8c5}'),
+ ('\u{a8ff}', '\u{a8ff}'),
+ ('\u{a926}', '\u{a92a}'),
+ ('\u{a947}', 'ęĨ’'),
+ ('\u{a980}', 'ęĻƒ'),
+ ('ęĻ´', 'ęĻŋ'),
+ ('\u{a9e5}', '\u{a9e5}'),
+ ('\u{aa29}', '\u{aa36}'),
+ ('\u{aa43}', '\u{aa43}'),
+ ('\u{aa4c}', 'ꩍ'),
+ ('ęŠģ', 'ęŠŊ'),
+ ('\u{aab0}', '\u{aab0}'),
+ ('\u{aab2}', '\u{aab4}'),
+ ('\u{aab7}', '\u{aab8}'),
+ ('\u{aabe}', '\u{aabe}'),
+ ('ęĢĢ', 'ęĢ¯'),
+ ('ęĢĩ', 'ęĢĩ'),
+ ('ę¯Ŗ', 'ę¯Ē'),
+ ('\u{fb1e}', '\u{fb1e}'),
+ ('\u{10376}', '\u{1037a}'),
+ ('\u{10a01}', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '\u{10a0f}'),
+ ('\u{10d24}', '\u{10d27}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('𑀀', '𑀂'),
+ ('\u{11038}', '\u{11045}'),
+ ('𑂂', '𑂂'),
+ ('𑂰', '𑂸'),
+ ('\u{11100}', '\u{11102}'),
+ ('\u{11127}', '\u{11132}'),
+ ('𑅅', '𑅆'),
+ ('\u{11180}', '𑆂'),
+ ('đ‘†ŗ', 'đ‘†ŋ'),
+ ('\u{111ce}', '\u{111cf}'),
+ ('đ‘ˆŦ', '\u{11234}'),
+ ('\u{11237}', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('\u{112df}', '\u{112e8}'),
+ ('\u{11300}', '𑌃'),
+ ('\u{1133e}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍌'),
+ ('\u{11357}', '\u{11357}'),
+ ('đ‘ĸ', 'đ‘Ŗ'),
+ ('đ‘ĩ', '𑑁'),
+ ('\u{11443}', '𑑅'),
+ ('\u{114b0}', '𑓁'),
+ ('\u{115af}', '\u{115b5}'),
+ ('𑖸', '𑖾'),
+ ('\u{115dc}', '\u{115dd}'),
+ ('𑘰', '𑘾'),
+ ('\u{11640}', '\u{11640}'),
+ ('\u{116ab}', '\u{116b5}'),
+ ('\u{1171d}', '\u{1172a}'),
+ ('đ‘ Ŧ', '𑠸'),
+ ('\u{11930}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{1193c}'),
+ ('\u{11940}', '\u{11940}'),
+ ('\u{11942}', '\u{11942}'),
+ ('𑧑', '\u{119d7}'),
+ ('\u{119da}', '𑧟'),
+ ('𑧤', '𑧤'),
+ ('\u{11a01}', '\u{11a0a}'),
+ ('\u{11a35}', '𑨹'),
+ ('\u{11a3b}', '\u{11a3e}'),
+ ('\u{11a51}', '\u{11a5b}'),
+ ('\u{11a8a}', 'đ‘Ē—'),
+ ('đ‘°¯', '\u{11c36}'),
+ ('\u{11c38}', '𑰾'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('𑲩', '\u{11cb6}'),
+ ('\u{11d31}', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d41}'),
+ ('\u{11d43}', '\u{11d43}'),
+ ('\u{11d47}', '\u{11d47}'),
+ ('đ‘ļŠ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', 'đ‘ļ–'),
+ ('\u{11ef3}', 'đ‘ģļ'),
+ ('\u{16f4f}', '\u{16f4f}'),
+ ('đ–Ŋ‘', '𖾇'),
+ ('\u{16f8f}', '\u{16f92}'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('\u{1bc9e}', '\u{1bc9e}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('\u{1e947}', '\u{1e947}'),
+ ('🄰', '🅉'),
+ ('🅐', '🅩'),
+ ('🅰', '🆉'),
+];
+
+pub const OTHER_DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
+ ('\u{34f}', '\u{34f}'),
+ ('ᅟ', 'ᅠ'),
+ ('\u{17b4}', '\u{17b5}'),
+ ('\u{2065}', '\u{2065}'),
+ ('ㅤ', 'ㅤ'),
+ ('īž ', 'īž '),
+ ('\u{fff0}', '\u{fff8}'),
+ ('\u{e0000}', '\u{e0000}'),
+ ('\u{e0002}', '\u{e001f}'),
+ ('\u{e0080}', '\u{e00ff}'),
+ ('\u{e01f0}', '\u{e0fff}'),
+];
+
+pub const OTHER_GRAPHEME_EXTEND: &'static [(char, char)] = &[
+ ('\u{9be}', '\u{9be}'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('\u{b3e}', '\u{b3e}'),
+ ('\u{b57}', '\u{b57}'),
+ ('\u{bbe}', '\u{bbe}'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('\u{cc2}', '\u{cc2}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('\u{d3e}', '\u{d3e}'),
+ ('\u{d57}', '\u{d57}'),
+ ('\u{dcf}', '\u{dcf}'),
+ ('\u{ddf}', '\u{ddf}'),
+ ('\u{1b35}', '\u{1b35}'),
+ ('\u{200c}', '\u{200c}'),
+ ('\u{302e}', '\u{302f}'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('\u{1133e}', '\u{1133e}'),
+ ('\u{11357}', '\u{11357}'),
+ ('\u{114b0}', '\u{114b0}'),
+ ('\u{114bd}', '\u{114bd}'),
+ ('\u{115af}', '\u{115af}'),
+ ('\u{11930}', '\u{11930}'),
+ ('\u{1d165}', '\u{1d165}'),
+ ('\u{1d16e}', '\u{1d172}'),
+ ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const OTHER_ID_CONTINUE: &'static [(char, char)] =
+ &[('·', '·'), ('·', '·'), ('፩', '፱'), ('᧚', '᧚')];
+
+pub const OTHER_ID_START: &'static [(char, char)] =
+ &[('\u{1885}', '\u{1886}'), ('℘', '℘'), ('℮', '℮'), ('゛', '゜')];
+
+pub const OTHER_LOWERCASE: &'static [(char, char)] = &[
+ ('ÂĒ', 'ÂĒ'),
+ ('Âē', 'Âē'),
+ ('ʰ', 'ʸ'),
+ ('ˀ', 'ˁ'),
+ ('ˠ', 'ˤ'),
+ ('\u{345}', '\u{345}'),
+ ('Íē', 'Íē'),
+ ('á´Ŧ', 'áĩĒ'),
+ ('áĩ¸', 'áĩ¸'),
+ ('áļ›', 'áļŋ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('ⅰ', 'â…ŋ'),
+ ('ⓐ', 'ⓩ'),
+ ('âąŧ', 'âąŊ'),
+ ('ꚜ', 'ꚝ'),
+ ('ꝰ', 'ꝰ'),
+ ('ꟸ', 'ꟹ'),
+ ('ꭜ', 'ꭟ'),
+];
+
+pub const OTHER_MATH: &'static [(char, char)] = &[
+ ('^', '^'),
+ ('Ī', 'Ī’'),
+ ('Ī•', 'Ī•'),
+ ('Ī°', 'Īą'),
+ ('Ī´', 'Īĩ'),
+ ('‖', '‖'),
+ ('′', '‴'),
+ ('⁀', '⁀'),
+ ('\u{2061}', '\u{2064}'),
+ ('âŊ', '⁞'),
+ ('₍', '₎'),
+ ('\u{20d0}', '\u{20dc}'),
+ ('\u{20e1}', '\u{20e1}'),
+ ('\u{20e5}', '\u{20e6}'),
+ ('\u{20eb}', '\u{20ef}'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('ℨ', '℩'),
+ ('â„Ŧ', 'ℭ'),
+ ('ℯ', 'ℱ'),
+ ('â„ŗ', 'ℸ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('↕', '↙'),
+ ('↜', '↟'),
+ ('↡', 'â†ĸ'),
+ ('↤', 'â†Ĩ'),
+ ('↧', '↧'),
+ ('↩', '↭'),
+ ('↰', '↱'),
+ ('â†ļ', '↷'),
+ ('â†ŧ', '⇍'),
+ ('⇐', '⇑'),
+ ('⇓', '⇓'),
+ ('⇕', '⇛'),
+ ('⇝', '⇝'),
+ ('⇤', 'â‡Ĩ'),
+ ('⌈', '⌋'),
+ ('⎴', 'âŽĩ'),
+ ('⎷', '⎷'),
+ ('⏐', '⏐'),
+ ('âĸ', 'âĸ'),
+ ('■', '□'),
+ ('▮', 'â–ļ'),
+ ('â–ŧ', '◀'),
+ ('◆', '◇'),
+ ('◊', '○'),
+ ('●', '◓'),
+ ('â—ĸ', 'â—ĸ'),
+ ('◤', '◤'),
+ ('◧', 'â—Ŧ'),
+ ('★', '☆'),
+ ('♀', '♀'),
+ ('♂', '♂'),
+ ('♠', 'â™Ŗ'),
+ ('♭', '♮'),
+ ('⟅', '⟆'),
+ ('âŸĻ', '⟯'),
+ ('âĻƒ', 'âĻ˜'),
+ ('⧘', '⧛'),
+ ('â§ŧ', 'â§Ŋ'),
+ ('īšĄ', 'īšĄ'),
+ ('īšŖ', 'īšŖ'),
+ ('īš¨', 'īš¨'),
+ ('īŧŧ', 'īŧŧ'),
+ ('īŧž', 'īŧž'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𝟎', 'đŸŋ'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+];
+
+pub const OTHER_UPPERCASE: &'static [(char, char)] =
+ &[('Ⅰ', 'Ⅿ'), ('â’ļ', 'Ⓩ'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉')];
+
+pub const PATTERN_SYNTAX: &'static [(char, char)] = &[
+ ('!', '/'),
+ (':', '@'),
+ ('[', '^'),
+ ('`', '`'),
+ ('{', '~'),
+ ('¥', '§'),
+ ('Š', 'Š'),
+ ('ÂĢ', 'ÂŦ'),
+ ('ÂŽ', 'ÂŽ'),
+ ('°', '¹'),
+ ('Âļ', 'Âļ'),
+ ('Âģ', 'Âģ'),
+ ('Âŋ', 'Âŋ'),
+ ('×', '×'),
+ ('Ãˇ', 'Ãˇ'),
+ ('‐', '‧'),
+ ('‰', '‾'),
+ ('⁁', '⁓'),
+ ('⁕', '⁞'),
+ ('←', '\u{245f}'),
+ ('─', 'âĩ'),
+ ('➔', 'â¯ŋ'),
+ ('⸀', '\u{2e7f}'),
+ ('、', '〃'),
+ ('〈', '〠'),
+ ('〰', '〰'),
+ ('ī´ž', 'ī´ŋ'),
+ ('īš…', 'īš†'),
+];
+
+pub const PATTERN_WHITE_SPACE: &'static [(char, char)] = &[
+ ('\t', '\r'),
+ (' ', ' '),
+ ('\u{85}', '\u{85}'),
+ ('\u{200e}', '\u{200f}'),
+ ('\u{2028}', '\u{2029}'),
+];
+
+pub const PREPENDED_CONCATENATION_MARK: &'static [(char, char)] = &[
+ ('\u{600}', '\u{605}'),
+ ('\u{6dd}', '\u{6dd}'),
+ ('\u{70f}', '\u{70f}'),
+ ('\u{8e2}', '\u{8e2}'),
+ ('\u{110bd}', '\u{110bd}'),
+ ('\u{110cd}', '\u{110cd}'),
+];
+
+pub const QUOTATION_MARK: &'static [(char, char)] = &[
+ ('\"', '\"'),
+ ('\'', '\''),
+ ('ÂĢ', 'ÂĢ'),
+ ('Âģ', 'Âģ'),
+ ('‘', '‟'),
+ ('‹', 'â€ē'),
+ ('⹂', '⹂'),
+ ('「', '』'),
+ ('〝', '〟'),
+ ('īš', 'īš„'),
+ ('īŧ‚', 'īŧ‚'),
+ ('īŧ‡', 'īŧ‡'),
+ ('īŊĸ', 'īŊŖ'),
+];
+
+pub const RADICAL: &'static [(char, char)] =
+ &[('âē€', 'âē™'), ('âē›', 'âģŗ'), ('âŧ€', 'âŋ•')];
+
+pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('đŸ‡Ļ', 'đŸ‡ŋ')];
+
+pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[
+ ('!', '!'),
+ ('.', '.'),
+ ('?', '?'),
+ ('։', '։'),
+ ('؞', '؟'),
+ ('۔', '۔'),
+ ('܀', '܂'),
+ ('ßš', 'ßš'),
+ ('ā ˇ', 'ā ˇ'),
+ ('ā š', 'ā š'),
+ ('ā Ŋ', 'ā ž'),
+ ('āĨ¤', 'āĨĨ'),
+ ('၊', '။'),
+ ('áĸ', 'áĸ'),
+ ('፧', '፨'),
+ ('᙮', '᙮'),
+ ('áœĩ', 'áœļ'),
+ ('᠃', '᠃'),
+ ('᠉', '᠉'),
+ ('áĨ„', 'áĨ…'),
+ ('áĒ¨', 'áĒĢ'),
+ ('᭚', '᭛'),
+ ('᭞', '᭟'),
+ ('á°ģ', 'á°ŧ'),
+ ('áąž', 'áąŋ'),
+ ('â€ŧ', 'â€Ŋ'),
+ ('⁇', '⁉'),
+ ('⸎', '⸎'),
+ ('â¸ŧ', 'â¸ŧ'),
+ ('。', '。'),
+ ('ę“ŋ', 'ę“ŋ'),
+ ('꘎', '꘏'),
+ ('ę›ŗ', 'ę›ŗ'),
+ ('꛷', '꛷'),
+ ('ęĄļ', 'ꥡ'),
+ ('ęŖŽ', 'ęŖ'),
+ ('ę¤¯', 'ę¤¯'),
+ ('꧈', '꧉'),
+ ('꩝', '꩟'),
+ ('ęĢ°', 'ęĢą'),
+ ('ę¯Ģ', 'ę¯Ģ'),
+ ('īš’', 'īš’'),
+ ('īš–', 'īš—'),
+ ('īŧ', 'īŧ'),
+ ('īŧŽ', 'īŧŽ'),
+ ('īŧŸ', 'īŧŸ'),
+ ('īŊĄ', 'īŊĄ'),
+ ('𐩖', '𐩗'),
+ ('đŊ•', 'đŊ™'),
+ ('𑁇', '𑁈'),
+ ('𑂾', '𑃁'),
+ ('𑅁', '𑅃'),
+ ('𑇅', '𑇆'),
+ ('𑇍', '𑇍'),
+ ('𑇞', '𑇟'),
+ ('𑈸', '𑈹'),
+ ('đ‘ˆģ', 'đ‘ˆŧ'),
+ ('𑊩', '𑊩'),
+ ('𑑋', '𑑌'),
+ ('𑗂', '𑗃'),
+ ('𑗉', '𑗗'),
+ ('𑙁', '𑙂'),
+ ('đ‘œŧ', '𑜾'),
+ ('\u{11944}', '\u{11944}'),
+ ('\u{11946}', '\u{11946}'),
+ ('𑩂', '𑩃'),
+ ('đ‘Ē›', 'đ‘Ēœ'),
+ ('𑱁', '𑱂'),
+ ('đ‘ģˇ', 'đ‘ģ¸'),
+ ('𖩮', 'đ–Š¯'),
+ ('đ–Ģĩ', 'đ–Ģĩ'),
+ ('đ–Ŧˇ', 'đ–Ŧ¸'),
+ ('𖭄', '𖭄'),
+ ('đ–ē˜', 'đ–ē˜'),
+ ('𛲟', '𛲟'),
+ ('đĒˆ', 'đĒˆ'),
+];
+
+pub const SOFT_DOTTED: &'static [(char, char)] = &[
+ ('i', 'j'),
+ ('į', 'į'),
+ ('ɉ', 'ɉ'),
+ ('ɨ', 'ɨ'),
+ ('ʝ', 'ʝ'),
+ ('ʲ', 'ʲ'),
+ ('Īŗ', 'Īŗ'),
+ ('Ņ–', 'Ņ–'),
+ ('Ņ˜', 'Ņ˜'),
+ ('áĩĸ', 'áĩĸ'),
+ ('áļ–', 'áļ–'),
+ ('áļ¤', 'áļ¤'),
+ ('áļ¨', 'áļ¨'),
+ ('ḭ', 'ḭ'),
+ ('áģ‹', 'áģ‹'),
+ ('⁹', '⁹'),
+ ('ⅈ', 'ⅉ'),
+ ('âąŧ', 'âąŧ'),
+ ('đĸ', 'đŖ'),
+ ('𝑖', '𝑗'),
+ ('𝒊', '𝒋'),
+ ('𝒾', 'đ’ŋ'),
+ ('𝓲', 'đ“ŗ'),
+ ('đ”Ļ', '𝔧'),
+ ('𝕚', '𝕛'),
+ ('𝖎', '𝖏'),
+ ('𝗂', '𝗃'),
+ ('đ—ļ', '𝗷'),
+ ('đ˜Ē', 'đ˜Ģ'),
+ ('𝙞', '𝙟'),
+ ('𝚒', '𝚓'),
+];
+
+pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[
+ ('!', '!'),
+ (',', ','),
+ ('.', '.'),
+ (':', ';'),
+ ('?', '?'),
+ ('Íž', 'Íž'),
+ ('·', '·'),
+ ('։', '։'),
+ ('׃', '׃'),
+ ('،', '،'),
+ ('؛', '؛'),
+ ('؞', '؟'),
+ ('۔', '۔'),
+ ('܀', '܊'),
+ ('܌', '܌'),
+ ('߸', 'ߚ'),
+ ('ā °', 'ā ž'),
+ ('āĄž', 'āĄž'),
+ ('āĨ¤', 'āĨĨ'),
+ ('āšš', 'āš›'),
+ ('āŧˆ', 'āŧˆ'),
+ ('āŧ', 'āŧ’'),
+ ('၊', '။'),
+ ('፡', '፨'),
+ ('᙮', '᙮'),
+ ('á›Ģ', '᛭'),
+ ('áœĩ', 'áœļ'),
+ ('។', '៖'),
+ ('៚', '៚'),
+ ('᠂', '᠅'),
+ ('᠈', '᠉'),
+ ('áĨ„', 'áĨ…'),
+ ('áĒ¨', 'áĒĢ'),
+ ('᭚', '᭛'),
+ ('᭝', '᭟'),
+ ('á°ģ', 'á°ŋ'),
+ ('áąž', 'áąŋ'),
+ ('â€ŧ', 'â€Ŋ'),
+ ('⁇', '⁉'),
+ ('⸎', '⸎'),
+ ('â¸ŧ', 'â¸ŧ'),
+ ('⚁', '⚁'),
+ ('⹌', '⹌'),
+ ('⚎', '⚏'),
+ ('、', '。'),
+ ('꓾', 'ę“ŋ'),
+ ('꘍', '꘏'),
+ ('ę›ŗ', '꛷'),
+ ('ęĄļ', 'ꥡ'),
+ ('ęŖŽ', 'ęŖ'),
+ ('ę¤¯', 'ę¤¯'),
+ ('꧇', '꧉'),
+ ('꩝', '꩟'),
+ ('ęĢŸ', 'ęĢŸ'),
+ ('ęĢ°', 'ęĢą'),
+ ('ę¯Ģ', 'ę¯Ģ'),
+ ('īš', 'īš’'),
+ ('īš”', 'īš—'),
+ ('īŧ', 'īŧ'),
+ ('īŧŒ', 'īŧŒ'),
+ ('īŧŽ', 'īŧŽ'),
+ ('īŧš', 'īŧ›'),
+ ('īŧŸ', 'īŧŸ'),
+ ('īŊĄ', 'īŊĄ'),
+ ('īŊ¤', 'īŊ¤'),
+ ('𐎟', '𐎟'),
+ ('𐏐', '𐏐'),
+ ('𐡗', '𐡗'),
+ ('𐤟', '𐤟'),
+ ('𐩖', '𐩗'),
+ ('đĢ°', 'đĢĩ'),
+ ('đŦē', 'đŦŋ'),
+ ('𐮙', '𐮜'),
+ ('đŊ•', 'đŊ™'),
+ ('𑁇', '𑁍'),
+ ('𑂾', '𑃁'),
+ ('𑅁', '𑅃'),
+ ('𑇅', '𑇆'),
+ ('𑇍', '𑇍'),
+ ('𑇞', '𑇟'),
+ ('𑈸', 'đ‘ˆŧ'),
+ ('𑊩', '𑊩'),
+ ('𑑋', '𑑍'),
+ ('\u{1145a}', '𑑛'),
+ ('𑗂', '𑗅'),
+ ('𑗉', '𑗗'),
+ ('𑙁', '𑙂'),
+ ('đ‘œŧ', '𑜾'),
+ ('\u{11944}', '\u{11944}'),
+ ('\u{11946}', '\u{11946}'),
+ ('𑩂', '𑩃'),
+ ('đ‘Ē›', 'đ‘Ēœ'),
+ ('đ‘ĒĄ', 'đ‘Ēĸ'),
+ ('𑱁', '𑱃'),
+ ('𑱱', '𑱱'),
+ ('đ‘ģˇ', 'đ‘ģ¸'),
+ ('𒑰', '𒑴'),
+ ('𖩮', 'đ–Š¯'),
+ ('đ–Ģĩ', 'đ–Ģĩ'),
+ ('đ–Ŧˇ', 'đ–Ŧš'),
+ ('𖭄', '𖭄'),
+ ('đ–ē—', 'đ–ē˜'),
+ ('𛲟', '𛲟'),
+ ('đĒ‡', 'đĒŠ'),
+];
+
+pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ī¨Ž', 'ī¨'),
+ ('ī¨‘', 'ī¨‘'),
+ ('ī¨“', 'ī¨”'),
+ ('ī¨Ÿ', 'ī¨Ÿ'),
+ ('ī¨Ą', 'ī¨Ą'),
+ ('ī¨Ŗ', 'ī¨¤'),
+ ('ī¨§', 'ī¨Š'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const UPPERCASE: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('À', 'Ö'),
+ ('Ø', 'Þ'),
+ ('Ā', 'Ā'),
+ ('Ă', 'Ă'),
+ ('Ą', 'Ą'),
+ ('Ć', 'Ć'),
+ ('Ĉ', 'Ĉ'),
+ ('Ċ', 'Ċ'),
+ ('Č', 'Č'),
+ ('Ď', 'Ď'),
+ ('Đ', 'Đ'),
+ ('Ē', 'Ē'),
+ ('Ĕ', 'Ĕ'),
+ ('Ė', 'Ė'),
+ ('Ę', 'Ę'),
+ ('Ě', 'Ě'),
+ ('Ĝ', 'Ĝ'),
+ ('Ğ', 'Ğ'),
+ ('Ä ', 'Ä '),
+ ('Äĸ', 'Äĸ'),
+ ('Ĥ', 'Ĥ'),
+ ('ÄĻ', 'ÄĻ'),
+ ('Ĩ', 'Ĩ'),
+ ('ÄĒ', 'ÄĒ'),
+ ('ÄŦ', 'ÄŦ'),
+ ('ÄŽ', 'ÄŽ'),
+ ('Ä°', 'Ä°'),
+ ('IJ', 'IJ'),
+ ('Ä´', 'Ä´'),
+ ('Äļ', 'Äļ'),
+ ('Äš', 'Äš'),
+ ('Äģ', 'Äģ'),
+ ('ÄŊ', 'ÄŊ'),
+ ('Äŋ', 'Äŋ'),
+ ('Ł', 'Ł'),
+ ('Ń', 'Ń'),
+ ('Ņ', 'Ņ'),
+ ('Ň', 'Ň'),
+ ('Ŋ', 'Ŋ'),
+ ('Ō', 'Ō'),
+ ('Ŏ', 'Ŏ'),
+ ('Ő', 'Ő'),
+ ('Œ', 'Œ'),
+ ('Ŕ', 'Ŕ'),
+ ('Ŗ', 'Ŗ'),
+ ('Ř', 'Ř'),
+ ('Ś', 'Ś'),
+ ('Ŝ', 'Ŝ'),
+ ('Ş', 'Ş'),
+ ('Å ', 'Å '),
+ ('Åĸ', 'Åĸ'),
+ ('Ť', 'Ť'),
+ ('ÅĻ', 'ÅĻ'),
+ ('Ũ', 'Ũ'),
+ ('ÅĒ', 'ÅĒ'),
+ ('ÅŦ', 'ÅŦ'),
+ ('ÅŽ', 'ÅŽ'),
+ ('Å°', 'Å°'),
+ ('Å˛', 'Å˛'),
+ ('Å´', 'Å´'),
+ ('Åļ', 'Åļ'),
+ ('Ÿ', 'Ś'),
+ ('Åģ', 'Åģ'),
+ ('ÅŊ', 'ÅŊ'),
+ ('Ɓ', 'Ƃ'),
+ ('Ƅ', 'Ƅ'),
+ ('Ɔ', 'Ƈ'),
+ ('Ɖ', 'Ƌ'),
+ ('Ǝ', 'Ƒ'),
+ ('Ɠ', 'Ɣ'),
+ ('Ɩ', 'Ƙ'),
+ ('Ɯ', 'Ɲ'),
+ ('Ɵ', 'Ơ'),
+ ('Æĸ', 'Æĸ'),
+ ('Ƥ', 'Ƥ'),
+ ('ÆĻ', 'Ƨ'),
+ ('ÆŠ', 'ÆŠ'),
+ ('ÆŦ', 'ÆŦ'),
+ ('Ǝ', 'Ư'),
+ ('Æą', 'Æŗ'),
+ ('Æĩ', 'Æĩ'),
+ ('Æˇ', 'Ƹ'),
+ ('Æŧ', 'Æŧ'),
+ ('Į„', 'Į„'),
+ ('Į‡', 'Į‡'),
+ ('ĮŠ', 'ĮŠ'),
+ ('Į', 'Į'),
+ ('Į', 'Į'),
+ ('Į‘', 'Į‘'),
+ ('Į“', 'Į“'),
+ ('Į•', 'Į•'),
+ ('Į—', 'Į—'),
+ ('Į™', 'Į™'),
+ ('Į›', 'Į›'),
+ ('Įž', 'Įž'),
+ ('Į ', 'Į '),
+ ('Įĸ', 'Įĸ'),
+ ('Į¤', 'Į¤'),
+ ('ĮĻ', 'ĮĻ'),
+ ('Į¨', 'Į¨'),
+ ('ĮĒ', 'ĮĒ'),
+ ('ĮŦ', 'ĮŦ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Įą', 'Įą'),
+ ('Į´', 'Į´'),
+ ('Įļ', 'Į¸'),
+ ('Įē', 'Įē'),
+ ('Įŧ', 'Įŧ'),
+ ('Įž', 'Įž'),
+ ('Ȁ', 'Ȁ'),
+ ('Ȃ', 'Ȃ'),
+ ('Ȅ', 'Ȅ'),
+ ('Ȇ', 'Ȇ'),
+ ('Ȉ', 'Ȉ'),
+ ('Ȋ', 'Ȋ'),
+ ('Ȍ', 'Ȍ'),
+ ('Ȏ', 'Ȏ'),
+ ('Ȑ', 'Ȑ'),
+ ('Ȓ', 'Ȓ'),
+ ('Ȕ', 'Ȕ'),
+ ('Ȗ', 'Ȗ'),
+ ('Ș', 'Ș'),
+ ('Ț', 'Ț'),
+ ('Ȝ', 'Ȝ'),
+ ('Ȟ', 'Ȟ'),
+ ('Č ', 'Č '),
+ ('Čĸ', 'Čĸ'),
+ ('Ȥ', 'Ȥ'),
+ ('ČĻ', 'ČĻ'),
+ ('Ȩ', 'Ȩ'),
+ ('ČĒ', 'ČĒ'),
+ ('ČŦ', 'ČŦ'),
+ ('ČŽ', 'ČŽ'),
+ ('Č°', 'Č°'),
+ ('Ȳ', 'Ȳ'),
+ ('Čē', 'Čģ'),
+ ('ČŊ', 'Čž'),
+ ('Ɂ', 'Ɂ'),
+ ('Ƀ', 'Ɇ'),
+ ('Ɉ', 'Ɉ'),
+ ('Ɋ', 'Ɋ'),
+ ('Ɍ', 'Ɍ'),
+ ('Ɏ', 'Ɏ'),
+ ('Í°', 'Í°'),
+ ('Ͳ', 'Ͳ'),
+ ('Íļ', 'Íļ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ώ'),
+ ('Α', 'Ρ'),
+ ('ÎŖ', 'ÎĢ'),
+ ('Ī', 'Ī'),
+ ('Ī’', 'Ī”'),
+ ('Ī˜', 'Ī˜'),
+ ('Īš', 'Īš'),
+ ('Īœ', 'Īœ'),
+ ('Īž', 'Īž'),
+ ('Ī ', 'Ī '),
+ ('Īĸ', 'Īĸ'),
+ ('Ī¤', 'Ī¤'),
+ ('ĪĻ', 'ĪĻ'),
+ ('Ī¨', 'Ī¨'),
+ ('ĪĒ', 'ĪĒ'),
+ ('ĪŦ', 'ĪŦ'),
+ ('ĪŽ', 'ĪŽ'),
+ ('Ī´', 'Ī´'),
+ ('Īˇ', 'Īˇ'),
+ ('Īš', 'Īē'),
+ ('ĪŊ', 'Đ¯'),
+ ('Ņ ', 'Ņ '),
+ ('Ņĸ', 'Ņĸ'),
+ ('Ņ¤', 'Ņ¤'),
+ ('ŅĻ', 'ŅĻ'),
+ ('Ņ¨', 'Ņ¨'),
+ ('ŅĒ', 'ŅĒ'),
+ ('ŅŦ', 'ŅŦ'),
+ ('ŅŽ', 'ŅŽ'),
+ ('Ņ°', 'Ņ°'),
+ ('Ņ˛', 'Ņ˛'),
+ ('Ņ´', 'Ņ´'),
+ ('Ņļ', 'Ņļ'),
+ ('Ņ¸', 'Ņ¸'),
+ ('Ņē', 'Ņē'),
+ ('Ņŧ', 'Ņŧ'),
+ ('Ņž', 'Ņž'),
+ ('Ō€', 'Ō€'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌŒ', 'ŌŒ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō', 'Ō'),
+ ('Ō’', 'Ō’'),
+ ('Ō”', 'Ō”'),
+ ('Ō–', 'Ō–'),
+ ('Ō˜', 'Ō˜'),
+ ('Ōš', 'Ōš'),
+ ('Ōœ', 'Ōœ'),
+ ('Ōž', 'Ōž'),
+ ('Ō ', 'Ō '),
+ ('Ōĸ', 'Ōĸ'),
+ ('Ō¤', 'Ō¤'),
+ ('ŌĻ', 'ŌĻ'),
+ ('Ō¨', 'Ō¨'),
+ ('ŌĒ', 'ŌĒ'),
+ ('ŌŦ', 'ŌŦ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō°', 'Ō°'),
+ ('Ō˛', 'Ō˛'),
+ ('Ō´', 'Ō´'),
+ ('Ōļ', 'Ōļ'),
+ ('Ō¸', 'Ō¸'),
+ ('Ōē', 'Ōē'),
+ ('Ōŧ', 'Ōŧ'),
+ ('Ōž', 'Ōž'),
+ ('Ķ€', 'Ķ'),
+ ('Ķƒ', 'Ķƒ'),
+ ('Ķ…', 'Ķ…'),
+ ('Ķ‡', 'Ķ‡'),
+ ('Ķ‰', 'Ķ‰'),
+ ('Ķ‹', 'Ķ‹'),
+ ('Ķ', 'Ķ'),
+ ('Ķ', 'Ķ'),
+ ('Ķ’', 'Ķ’'),
+ ('Ķ”', 'Ķ”'),
+ ('Ķ–', 'Ķ–'),
+ ('Ķ˜', 'Ķ˜'),
+ ('Ķš', 'Ķš'),
+ ('Ķœ', 'Ķœ'),
+ ('Ķž', 'Ķž'),
+ ('Ķ ', 'Ķ '),
+ ('Ķĸ', 'Ķĸ'),
+ ('Ķ¤', 'Ķ¤'),
+ ('ĶĻ', 'ĶĻ'),
+ ('Ķ¨', 'Ķ¨'),
+ ('ĶĒ', 'ĶĒ'),
+ ('ĶŦ', 'ĶŦ'),
+ ('ĶŽ', 'ĶŽ'),
+ ('Ķ°', 'Ķ°'),
+ ('Ķ˛', 'Ķ˛'),
+ ('Ķ´', 'Ķ´'),
+ ('Ķļ', 'Ķļ'),
+ ('Ķ¸', 'Ķ¸'),
+ ('Ķē', 'Ķē'),
+ ('Ķŧ', 'Ķŧ'),
+ ('Ķž', 'Ķž'),
+ ('Ԁ', 'Ԁ'),
+ ('Ԃ', 'Ԃ'),
+ ('Ԅ', 'Ԅ'),
+ ('Ԇ', 'Ԇ'),
+ ('Ԉ', 'Ԉ'),
+ ('Ԋ', 'Ԋ'),
+ ('Ԍ', 'Ԍ'),
+ ('Ԏ', 'Ԏ'),
+ ('Ԑ', 'Ԑ'),
+ ('Ԓ', 'Ԓ'),
+ ('Ԕ', 'Ԕ'),
+ ('Ԗ', 'Ԗ'),
+ ('Ԙ', 'Ԙ'),
+ ('Ԛ', 'Ԛ'),
+ ('Ԝ', 'Ԝ'),
+ ('Ԟ', 'Ԟ'),
+ ('Ô ', 'Ô '),
+ ('Ôĸ', 'Ôĸ'),
+ ('Ô¤', 'Ô¤'),
+ ('ÔĻ', 'ÔĻ'),
+ ('Ô¨', 'Ô¨'),
+ ('ÔĒ', 'ÔĒ'),
+ ('ÔŦ', 'ÔŦ'),
+ ('ÔŽ', 'ÔŽ'),
+ ('Ôą', 'Ֆ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('Ꭰ', 'áĩ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('Ḁ', 'Ḁ'),
+ ('Ḃ', 'Ḃ'),
+ ('Ḅ', 'Ḅ'),
+ ('Ḇ', 'Ḇ'),
+ ('Ḉ', 'Ḉ'),
+ ('Ḋ', 'Ḋ'),
+ ('Ḍ', 'Ḍ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḑ', 'Ḑ'),
+ ('Ḓ', 'Ḓ'),
+ ('Ḕ', 'Ḕ'),
+ ('Ḗ', 'Ḗ'),
+ ('Ḙ', 'Ḙ'),
+ ('Ḛ', 'Ḛ'),
+ ('Ḝ', 'Ḝ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ḡ', 'Ḡ'),
+ ('á¸ĸ', 'á¸ĸ'),
+ ('Ḥ', 'Ḥ'),
+ ('á¸Ļ', 'á¸Ļ'),
+ ('Ḩ', 'Ḩ'),
+ ('á¸Ē', 'á¸Ē'),
+ ('á¸Ŧ', 'á¸Ŧ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḱ', 'Ḱ'),
+ ('Ḳ', 'Ḳ'),
+ ('Ḵ', 'Ḵ'),
+ ('á¸ļ', 'á¸ļ'),
+ ('Ḹ', 'Ḹ'),
+ ('á¸ē', 'á¸ē'),
+ ('á¸ŧ', 'á¸ŧ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ṁ', 'Ṁ'),
+ ('Ṃ', 'Ṃ'),
+ ('Ṅ', 'Ṅ'),
+ ('Ṇ', 'Ṇ'),
+ ('ᚈ', 'ᚈ'),
+ ('Ṋ', 'Ṋ'),
+ ('Ṍ', 'Ṍ'),
+ ('ᚎ', 'ᚎ'),
+ ('ᚐ', 'ᚐ'),
+ ('Ṓ', 'Ṓ'),
+ ('Ṕ', 'Ṕ'),
+ ('Ṗ', 'Ṗ'),
+ ('ᚘ', 'ᚘ'),
+ ('Ṛ', 'Ṛ'),
+ ('Ṝ', 'Ṝ'),
+ ('᚞', '᚞'),
+ ('áš ', 'áš '),
+ ('ášĸ', 'ášĸ'),
+ ('ᚤ', 'ᚤ'),
+ ('ášĻ', 'ášĻ'),
+ ('ᚨ', 'ᚨ'),
+ ('ášĒ', 'ášĒ'),
+ ('ášŦ', 'ášŦ'),
+ ('ᚎ', 'ᚎ'),
+ ('áš°', 'áš°'),
+ ('ᚲ', 'ᚲ'),
+ ('áš´', 'áš´'),
+ ('ášļ', 'ášļ'),
+ ('ᚸ', 'ᚸ'),
+ ('ášē', 'ášē'),
+ ('ášŧ', 'ášŧ'),
+ ('ášž', 'ášž'),
+ ('áē€', 'áē€'),
+ ('áē‚', 'áē‚'),
+ ('áē„', 'áē„'),
+ ('áē†', 'áē†'),
+ ('áēˆ', 'áēˆ'),
+ ('áēŠ', 'áēŠ'),
+ ('áēŒ', 'áēŒ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē', 'áē'),
+ ('áē’', 'áē’'),
+ ('áē”', 'áē”'),
+ ('áēž', 'áēž'),
+ ('áē ', 'áē '),
+ ('áēĸ', 'áēĸ'),
+ ('áē¤', 'áē¤'),
+ ('áēĻ', 'áēĻ'),
+ ('áē¨', 'áē¨'),
+ ('áēĒ', 'áēĒ'),
+ ('áēŦ', 'áēŦ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē°', 'áē°'),
+ ('áē˛', 'áē˛'),
+ ('áē´', 'áē´'),
+ ('áēļ', 'áēļ'),
+ ('áē¸', 'áē¸'),
+ ('áēē', 'áēē'),
+ ('áēŧ', 'áēŧ'),
+ ('áēž', 'áēž'),
+ ('áģ€', 'áģ€'),
+ ('áģ‚', 'áģ‚'),
+ ('áģ„', 'áģ„'),
+ ('áģ†', 'áģ†'),
+ ('áģˆ', 'áģˆ'),
+ ('áģŠ', 'áģŠ'),
+ ('áģŒ', 'áģŒ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ', 'áģ'),
+ ('áģ’', 'áģ’'),
+ ('áģ”', 'áģ”'),
+ ('áģ–', 'áģ–'),
+ ('áģ˜', 'áģ˜'),
+ ('áģš', 'áģš'),
+ ('áģœ', 'áģœ'),
+ ('áģž', 'áģž'),
+ ('áģ ', 'áģ '),
+ ('áģĸ', 'áģĸ'),
+ ('áģ¤', 'áģ¤'),
+ ('áģĻ', 'áģĻ'),
+ ('áģ¨', 'áģ¨'),
+ ('áģĒ', 'áģĒ'),
+ ('áģŦ', 'áģŦ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ°', 'áģ°'),
+ ('áģ˛', 'áģ˛'),
+ ('áģ´', 'áģ´'),
+ ('áģļ', 'áģļ'),
+ ('áģ¸', 'áģ¸'),
+ ('áģē', 'áģē'),
+ ('áģŧ', 'áģŧ'),
+ ('áģž', 'áģž'),
+ ('áŧˆ', 'áŧ'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ¨', 'áŧ¯'),
+ ('áŧ¸', 'áŧŋ'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŸ'),
+ ('áŊ¨', 'áŊ¯'),
+ ('ី', 'ážģ'),
+ ('áŋˆ', 'áŋ‹'),
+ ('áŋ˜', 'áŋ›'),
+ ('áŋ¨', 'áŋŦ'),
+ ('áŋ¸', 'áŋģ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℋ', 'ℍ'),
+ ('ℐ', 'ℒ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℭ'),
+ ('ℰ', 'â„ŗ'),
+ ('ℾ', 'â„ŋ'),
+ ('ⅅ', 'ⅅ'),
+ ('Ⅰ', 'Ⅿ'),
+ ('Ↄ', 'Ↄ'),
+ ('â’ļ', 'Ⓩ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('âą ', 'âą '),
+ ('âąĸ', '⹤'),
+ ('⹧', '⹧'),
+ ('⹊', '⹊'),
+ ('âąĢ', 'âąĢ'),
+ ('âą­', 'âą°'),
+ ('⹲', '⹲'),
+ ('âąĩ', 'âąĩ'),
+ ('Ȿ', 'Ⲁ'),
+ ('Ⲃ', 'Ⲃ'),
+ ('Ⲅ', 'Ⲅ'),
+ ('Ⲇ', 'Ⲇ'),
+ ('Ⲉ', 'Ⲉ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('Ⲍ', 'Ⲍ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲑ', 'Ⲑ'),
+ ('Ⲓ', 'Ⲓ'),
+ ('Ⲕ', 'Ⲕ'),
+ ('Ⲗ', 'Ⲗ'),
+ ('Ⲙ', 'Ⲙ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('Ⲝ', 'Ⲝ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('Ⲡ', 'Ⲡ'),
+ ('â˛ĸ', 'â˛ĸ'),
+ ('Ⲥ', 'Ⲥ'),
+ ('â˛Ļ', 'â˛Ļ'),
+ ('Ⲩ', 'Ⲩ'),
+ ('â˛Ē', 'â˛Ē'),
+ ('â˛Ŧ', 'â˛Ŧ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲱ', 'Ⲱ'),
+ ('Ⲳ', 'Ⲳ'),
+ ('Ⲵ', 'Ⲵ'),
+ ('â˛ļ', 'â˛ļ'),
+ ('Ⲹ', 'Ⲹ'),
+ ('â˛ē', 'â˛ē'),
+ ('â˛ŧ', 'â˛ŧ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('âŗ€', 'âŗ€'),
+ ('âŗ‚', 'âŗ‚'),
+ ('âŗ„', 'âŗ„'),
+ ('âŗ†', 'âŗ†'),
+ ('âŗˆ', 'âŗˆ'),
+ ('âŗŠ', 'âŗŠ'),
+ ('âŗŒ', 'âŗŒ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ’', 'âŗ’'),
+ ('âŗ”', 'âŗ”'),
+ ('âŗ–', 'âŗ–'),
+ ('âŗ˜', 'âŗ˜'),
+ ('âŗš', 'âŗš'),
+ ('âŗœ', 'âŗœ'),
+ ('âŗž', 'âŗž'),
+ ('âŗ ', 'âŗ '),
+ ('âŗĸ', 'âŗĸ'),
+ ('âŗĢ', 'âŗĢ'),
+ ('âŗ­', 'âŗ­'),
+ ('âŗ˛', 'âŗ˛'),
+ ('Ꙁ', 'Ꙁ'),
+ ('Ꙃ', 'Ꙃ'),
+ ('Ꙅ', 'Ꙅ'),
+ ('Ꙇ', 'Ꙇ'),
+ ('Ꙉ', 'Ꙉ'),
+ ('Ꙋ', 'Ꙋ'),
+ ('Ꙍ', 'Ꙍ'),
+ ('Ꙏ', 'Ꙏ'),
+ ('Ꙑ', 'Ꙑ'),
+ ('Ꙓ', 'Ꙓ'),
+ ('Ꙕ', 'Ꙕ'),
+ ('Ꙗ', 'Ꙗ'),
+ ('Ꙙ', 'Ꙙ'),
+ ('Ꙛ', 'Ꙛ'),
+ ('Ꙝ', 'Ꙝ'),
+ ('Ꙟ', 'Ꙟ'),
+ ('Ꙡ', 'Ꙡ'),
+ ('ę™ĸ', 'ę™ĸ'),
+ ('Ꙥ', 'Ꙥ'),
+ ('ę™Ļ', 'ę™Ļ'),
+ ('Ꙩ', 'Ꙩ'),
+ ('ę™Ē', 'ę™Ē'),
+ ('ę™Ŧ', 'ę™Ŧ'),
+ ('Ꚁ', 'Ꚁ'),
+ ('Ꚃ', 'Ꚃ'),
+ ('Ꚅ', 'Ꚅ'),
+ ('Ꚇ', 'Ꚇ'),
+ ('Ꚉ', 'Ꚉ'),
+ ('Ꚋ', 'Ꚋ'),
+ ('Ꚍ', 'Ꚍ'),
+ ('Ꚏ', 'Ꚏ'),
+ ('Ꚑ', 'Ꚑ'),
+ ('Ꚓ', 'Ꚓ'),
+ ('Ꚕ', 'Ꚕ'),
+ ('Ꚗ', 'Ꚗ'),
+ ('Ꚙ', 'Ꚙ'),
+ ('Ꚛ', 'Ꚛ'),
+ ('ęœĸ', 'ęœĸ'),
+ ('Ꜥ', 'Ꜥ'),
+ ('ęœĻ', 'ęœĻ'),
+ ('Ꜩ', 'Ꜩ'),
+ ('ęœĒ', 'ęœĒ'),
+ ('ęœŦ', 'ęœŦ'),
+ ('Ꜯ', 'Ꜯ'),
+ ('Ꜳ', 'Ꜳ'),
+ ('Ꜵ', 'Ꜵ'),
+ ('ęœļ', 'ęœļ'),
+ ('Ꜹ', 'Ꜹ'),
+ ('ęœē', 'ęœē'),
+ ('ęœŧ', 'ęœŧ'),
+ ('Ꜿ', 'Ꜿ'),
+ ('Ꝁ', 'Ꝁ'),
+ ('Ꝃ', 'Ꝃ'),
+ ('Ꝅ', 'Ꝅ'),
+ ('Ꝇ', 'Ꝇ'),
+ ('Ꝉ', 'Ꝉ'),
+ ('Ꝋ', 'Ꝋ'),
+ ('Ꝍ', 'Ꝍ'),
+ ('Ꝏ', 'Ꝏ'),
+ ('Ꝑ', 'Ꝑ'),
+ ('Ꝓ', 'Ꝓ'),
+ ('Ꝕ', 'Ꝕ'),
+ ('Ꝗ', 'Ꝗ'),
+ ('Ꝙ', 'Ꝙ'),
+ ('Ꝛ', 'Ꝛ'),
+ ('Ꝝ', 'Ꝝ'),
+ ('Ꝟ', 'Ꝟ'),
+ ('Ꝡ', 'Ꝡ'),
+ ('ęĸ', 'ęĸ'),
+ ('Ꝥ', 'Ꝥ'),
+ ('ęĻ', 'ęĻ'),
+ ('Ꝩ', 'Ꝩ'),
+ ('ęĒ', 'ęĒ'),
+ ('ęŦ', 'ęŦ'),
+ ('Ꝯ', 'Ꝯ'),
+ ('Ꝺ', 'Ꝺ'),
+ ('ęģ', 'ęģ'),
+ ('ęŊ', 'Ꝿ'),
+ ('Ꞁ', 'Ꞁ'),
+ ('Ꞃ', 'Ꞃ'),
+ ('Ꞅ', 'Ꞅ'),
+ ('Ꞇ', 'Ꞇ'),
+ ('Ꞌ', 'Ꞌ'),
+ ('Ɥ', 'Ɥ'),
+ ('Ꞑ', 'Ꞑ'),
+ ('Ꞓ', 'Ꞓ'),
+ ('Ꞗ', 'Ꞗ'),
+ ('Ꞙ', 'Ꞙ'),
+ ('Ꞛ', 'Ꞛ'),
+ ('Ꞝ', 'Ꞝ'),
+ ('Ꞟ', 'Ꞟ'),
+ ('Ꞡ', 'Ꞡ'),
+ ('ęžĸ', 'ęžĸ'),
+ ('Ꞥ', 'Ꞥ'),
+ ('ęžĻ', 'ęžĻ'),
+ ('Ꞩ', 'Ꞩ'),
+ ('ęžĒ', 'Ɪ'),
+ ('Ʞ', 'Ꞵ'),
+ ('ęžļ', 'ęžļ'),
+ ('Ꞹ', 'Ꞹ'),
+ ('ęžē', 'ęžē'),
+ ('ęžŧ', 'ęžŧ'),
+ ('Ꞿ', 'Ꞿ'),
+ ('Ꟃ', 'Ꟃ'),
+ ('Ꞔ', '\u{a7c7}'),
+ ('\u{a7c9}', '\u{a7c9}'),
+ ('\u{a7f5}', '\u{a7f5}'),
+ ('īŧĄ', 'īŧē'),
+ ('𐐀', '𐐧'),
+ ('𐒰', '𐓓'),
+ ('𐲀', '𐲲'),
+ ('đ‘ĸ ', 'đ‘ĸŋ'),
+ ('𖹀', '𖹟'),
+ ('𝐀', '𝐙'),
+ ('𝐴', '𝑍'),
+ ('𝑨', '𝒁'),
+ ('𝒜', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', 'đ’ĩ'),
+ ('𝓐', '𝓩'),
+ ('𝔄', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔸', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('đ•Ŧ', '𝖅'),
+ ('𝖠', '𝖹'),
+ ('𝗔', '𝗭'),
+ ('𝘈', '𝘡'),
+ ('đ˜ŧ', '𝙕'),
+ ('𝙰', '𝚉'),
+ ('𝚨', '𝛀'),
+ ('đ›ĸ', 'đ›ē'),
+ ('𝜜', '𝜴'),
+ ('𝝖', '𝝮'),
+ ('𝞐', '𝞨'),
+ ('𝟊', '𝟊'),
+ ('𞤀', '𞤡'),
+ ('🄰', '🅉'),
+ ('🅐', '🅩'),
+ ('🅰', '🆉'),
+];
+
+pub const VARIATION_SELECTOR: &'static [(char, char)] = &[
+ ('\u{180b}', '\u{180d}'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const WHITE_SPACE: &'static [(char, char)] = &[
+ ('\t', '\r'),
+ (' ', ' '),
+ ('\u{85}', '\u{85}'),
+ ('\u{a0}', '\u{a0}'),
+ ('\u{1680}', '\u{1680}'),
+ ('\u{2000}', '\u{200a}'),
+ ('\u{2028}', '\u{2029}'),
+ ('\u{202f}', '\u{202f}'),
+ ('\u{205f}', '\u{205f}'),
+ ('\u{3000}', '\u{3000}'),
+];
+
+pub const XID_CONTINUE: &'static [(char, char)] = &[
+ ('0', '9'),
+ ('A', 'Z'),
+ ('_', '_'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('¡', '¡'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ˁ'),
+ ('ˆ', 'ˑ'),
+ ('ˠ', 'ˤ'),
+ ('ËŦ', 'ËŦ'),
+ ('ËŽ', 'ËŽ'),
+ ('\u{300}', 'Í´'),
+ ('Íļ', '͡'),
+ ('Íģ', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('\u{483}', '\u{487}'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', 'ՙ'),
+ ('ՠ', 'ֈ'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('א', '×Ē'),
+ ('ׯ', 'ײ'),
+ ('\u{610}', '\u{61a}'),
+ ('Ø ', 'ŲŠ'),
+ ('ŲŽ', 'ۓ'),
+ ('ە', '\u{6dc}'),
+ ('\u{6df}', '\u{6e8}'),
+ ('\u{6ea}', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', '\u{74a}'),
+ ('Ũ', 'Ūą'),
+ ('߀', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('ā €', '\u{82d}'),
+ ('āĄ€', '\u{85b}'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', '\u{963}'),
+ ('āĨĻ', 'āĨ¯'),
+ ('āĨą', 'āĻƒ'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('\u{9bc}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Ž'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', '\u{9e3}'),
+ ('ā§Ļ', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', 'ā¨ƒ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠĻ', '\u{a75}'),
+ ('\u{a81}', 'āĒƒ'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('\u{abc}', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', '\u{acd}'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', '\u{ae3}'),
+ ('āĢĻ', 'āĢ¯'),
+ ('āĢš', '\u{aff}'),
+ ('\u{b01}', 'āŦƒ'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('\u{b3c}', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', '\u{b63}'),
+ ('ā­Ļ', 'ā­¯'),
+ ('ā­ą', 'ā­ą'),
+ ('\u{b82}', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', '\u{bcd}'),
+ ('ā¯', 'ā¯'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('ā¯Ļ', 'ā¯¯'),
+ ('\u{c00}', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('āą˜', 'āąš'),
+ ('āą ', '\u{c63}'),
+ ('āąĻ', 'āą¯'),
+ ('ā˛€', 'ā˛ƒ'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('\u{cbc}', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', '\u{ce3}'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d00}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩŽ'),
+ ('āĩ”', '\u{d57}'),
+ ('āĩŸ', '\u{d63}'),
+ ('āĩĻ', 'āĩ¯'),
+ ('āĩē', 'āĩŋ'),
+ ('\u{d81}', 'āļƒ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āˇ˛', 'āˇŗ'),
+ ('ā¸', '\u{e3a}'),
+ ('āš€', '\u{e4e}'),
+ ('āš', 'āš™'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('āģ', 'āģ™'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ€'),
+ ('\u{f18}', '\u{f19}'),
+ ('āŧ ', 'āŧŠ'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('āŧž', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('\u{f71}', '\u{f84}'),
+ ('\u{f86}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('က', '၉'),
+ ('ၐ', '\u{109d}'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('\u{135d}', '\u{135f}'),
+ ('፩', '፱'),
+ ('ᎀ', 'ᎏ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛮ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', '\u{1714}'),
+ ('ᜠ', '\u{1734}'),
+ ('ᝀ', '\u{1753}'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('\u{1772}', '\u{1773}'),
+ ('ក', '\u{17d3}'),
+ ('ៗ', 'ៗ'),
+ ('ៜ', '\u{17dd}'),
+ ('០', '៩'),
+ ('\u{180b}', '\u{180d}'),
+ ('᠐', '᠙'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', '\u{193b}'),
+ ('áĨ†', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('᧐', '᧚'),
+ ('ᨀ', '\u{1a1b}'),
+ ('ᨠ', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a7c}'),
+ ('\u{1a7f}', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('áĒ§', 'áĒ§'),
+ ('\u{1ab0}', '\u{1abd}'),
+ ('\u{1abf}', '\u{1ac0}'),
+ ('\u{1b00}', 'ᭋ'),
+ ('᭐', '᭙'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', 'á¯ŗ'),
+ ('ᰀ', '\u{1c37}'),
+ ('᱀', '᱉'),
+ ('ᱍ', 'áąŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', 'áŗē'),
+ ('ᴀ', '\u{1df9}'),
+ ('\u{1dfb}', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('â€ŋ', '⁀'),
+ ('⁔', '⁔'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('\u{20d0}', '\u{20dc}'),
+ ('\u{20e1}', '\u{20e1}'),
+ ('\u{20e5}', '\u{20f0}'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('℘', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'ↈ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗ¤'),
+ ('âŗĢ', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ¯'),
+ ('\u{2d7f}', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('々', '〇'),
+ ('ã€Ą', '\u{302f}'),
+ ('ã€ą', 'ã€ĩ'),
+ ('〸', 'ã€ŧ'),
+ ('ぁ', 'ゖ'),
+ ('\u{3099}', '\u{309a}'),
+ ('ゝ', 'ゟ'),
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŧ', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ꀀ', 'ꒌ'),
+ ('ꓐ', 'ę“Ŋ'),
+ ('ꔀ', 'ꘌ'),
+ ('ꘐ', 'ę˜Ģ'),
+ ('Ꙁ', '\u{a66f}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('ę™ŋ', '\u{a6f1}'),
+ ('ꜗ', 'ꜟ'),
+ ('ęœĸ', 'ꞈ'),
+ ('Ꞌ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ę §'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ€', '\u{a8c5}'),
+ ('ęŖ', 'ęŖ™'),
+ ('\u{a8e0}', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', '\u{a92d}'),
+ ('ꤰ', 'ęĨ“'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('\u{a980}', '꧀'),
+ ('ꧏ', '꧙'),
+ ('ꧠ', '꧞'),
+ ('ꨀ', '\u{aa36}'),
+ ('ꩀ', 'ꩍ'),
+ ('꩐', '꩙'),
+ ('ꊠ', 'ęŠļ'),
+ ('ęŠē', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢ'),
+ ('ęĢ ', 'ęĢ¯'),
+ ('ęĢ˛', '\u{aaf6}'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab69}'),
+ ('ę­°', 'ę¯Ē'),
+ ('ę¯Ŧ', '\u{abed}'),
+ ('ę¯°', 'ę¯š'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŦ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'īą'),
+ ('īą¤', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇš'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('ī¸ŗ', 'ī¸´'),
+ ('īš', 'īš'),
+ ('īšą', 'īšą'),
+ ('īšŗ', 'īšŗ'),
+ ('īšˇ', 'īšˇ'),
+ ('īšš', 'īšš'),
+ ('īšģ', 'īšģ'),
+ ('īšŊ', 'īšŊ'),
+ ('īšŋ', 'īģŧ'),
+ ('īŧ', 'īŧ™'),
+ ('īŧĄ', 'īŧē'),
+ ('īŧŋ', 'īŧŋ'),
+ ('īŊ', 'īŊš'),
+ ('īŊĻ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐅀', '𐅴'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍊'),
+ ('𐍐', '\u{1037a}'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐏑', '𐏕'),
+ ('𐐀', '𐒝'),
+ ('𐒠', '𐒩'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', '\u{10ae6}'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('𐴀', '\u{10d27}'),
+ ('𐴰', '𐴚'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', '\u{10f50}'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀀', '\u{11046}'),
+ ('đ‘Ļ', 'đ‘¯'),
+ ('\u{1107f}', '\u{110ba}'),
+ ('𑃐', '𑃨'),
+ ('𑃰', '𑃹'),
+ ('\u{11100}', '\u{11134}'),
+ ('đ‘„ļ', 'đ‘„ŋ'),
+ ('𑅄', '\u{11147}'),
+ ('𑅐', '\u{11173}'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('\u{11180}', '𑇄'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111ce}', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '\u{112ea}'),
+ ('𑋰', '𑋹'),
+ ('\u{11300}', '𑌃'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('\u{1133b}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('𑍐', '𑍐'),
+ ('\u{11357}', '\u{11357}'),
+ ('𑍝', 'đ‘Ŗ'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('𑐀', '𑑊'),
+ ('𑑐', '𑑙'),
+ ('\u{1145e}', '\u{11461}'),
+ ('𑒀', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑓐', '𑓙'),
+ ('𑖀', '\u{115b5}'),
+ ('𑖸', '\u{115c0}'),
+ ('𑗘', '\u{115dd}'),
+ ('𑘀', '\u{11640}'),
+ ('𑙄', '𑙄'),
+ ('𑙐', '𑙙'),
+ ('𑚀', '𑚸'),
+ ('𑛀', '𑛉'),
+ ('𑜀', '𑜚'),
+ ('\u{1171d}', '\u{1172b}'),
+ ('𑜰', '𑜹'),
+ ('𑠀', '\u{1183a}'),
+ ('đ‘ĸ ', 'đ‘ŖŠ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{11943}'),
+ ('\u{11950}', '\u{11959}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '\u{119d7}'),
+ ('\u{119da}', '𑧡'),
+ ('đ‘§Ŗ', '𑧤'),
+ ('𑨀', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('𑩐', '\u{11a99}'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '\u{11c36}'),
+ ('\u{11c38}', '𑱀'),
+ ('𑱐', '𑱙'),
+ ('𑱲', '𑲏'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('𑲩', '\u{11cb6}'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d47}'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', 'đ‘ļ˜'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('đ‘ģ ', 'đ‘ģļ'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒐀', '𒑮'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('𖩠', '𖩩'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('đ–Ŧ€', '\u{16b36}'),
+ ('𖭀', '𖭃'),
+ ('𖭐', '𖭙'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𖹀', 'đ–šŋ'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('\u{16f4f}', '𖾇'),
+ ('\u{16f8f}', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', '\u{16fe4}'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1d165}', '\u{1d169}'),
+ ('𝅭', '\u{1d172}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𝟎', 'đŸŋ'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('𞄀', 'đž„Ŧ'),
+ ('\u{1e130}', 'đž„Ŋ'),
+ ('𞅀', '𞅉'),
+ ('𞅎', '𞅎'),
+ ('𞋀', '𞋹'),
+ ('𞠀', 'đžŖ„'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('𞤀', 'đžĨ‹'),
+ ('đžĨ', 'đžĨ™'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const XID_START: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ˁ'),
+ ('ˆ', 'ˑ'),
+ ('ˠ', 'ˤ'),
+ ('ËŦ', 'ËŦ'),
+ ('ËŽ', 'ËŽ'),
+ ('Í°', 'Í´'),
+ ('Íļ', '͡'),
+ ('Íģ', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', 'ՙ'),
+ ('ՠ', 'ֈ'),
+ ('א', '×Ē'),
+ ('ׯ', 'ײ'),
+ ('Ø ', 'ŲŠ'),
+ ('ŲŽ', 'Ų¯'),
+ ('Ųą', 'ۓ'),
+ ('ە', 'ە'),
+ ('ÛĨ', 'ÛĻ'),
+ ('ÛŽ', 'Û¯'),
+ ('Ûē', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', 'ܐ'),
+ ('ܒ', 'ܯ'),
+ ('Ũ', 'ŪĨ'),
+ ('Ūą', 'Ūą'),
+ ('ߊ', 'ßĒ'),
+ ('ß´', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('ā €', 'ā •'),
+ ('ā š', 'ā š'),
+ ('ā ¤', 'ā ¤'),
+ ('ā ¨', 'ā ¨'),
+ ('āĄ€', 'āĄ˜'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('ā¤„', 'ā¤š'),
+ ('ā¤Ŋ', 'ā¤Ŋ'),
+ ('āĨ', 'āĨ'),
+ ('āĨ˜', 'āĨĄ'),
+ ('āĨą', 'āĻ€'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('āĻŊ', 'āĻŊ'),
+ ('ā§Ž', 'ā§Ž'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', 'ā§Ą'),
+ ('ā§°', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠ˛', 'āŠ´'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('āĒŊ', 'āĒŊ'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', 'āĢĄ'),
+ ('āĢš', 'āĢš'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('āŦŊ', 'āŦŊ'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', 'ā­Ą'),
+ ('ā­ą', 'ā­ą'),
+ ('āŽƒ', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('ā¯', 'ā¯'),
+ ('ā°…', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'ā°Ŋ'),
+ ('āą˜', 'āąš'),
+ ('āą ', 'āąĄ'),
+ ('ā˛€', 'ā˛€'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('ā˛Ŋ', 'ā˛Ŋ'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', 'āŗĄ'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d04}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', 'ā´ē'),
+ ('ā´Ŋ', 'ā´Ŋ'),
+ ('āĩŽ', 'āĩŽ'),
+ ('āĩ”', 'āĩ–'),
+ ('āĩŸ', 'āĩĄ'),
+ ('āĩē', 'āĩŋ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('ā¸', 'ā¸°'),
+ ('ā¸˛', 'ā¸˛'),
+ ('āš€', 'āš†'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āē°'),
+ ('āē˛', 'āē˛'),
+ ('āēŊ', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ€'),
+ ('āŊ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('āžˆ', 'āžŒ'),
+ ('က', 'á€Ē'),
+ ('á€ŋ', 'á€ŋ'),
+ ('ၐ', 'ၕ'),
+ ('ၚ', 'ၝ'),
+ ('ၥ', 'ၥ'),
+ ('áĨ', 'áĻ'),
+ ('၎', 'ၰ'),
+ ('áĩ', 'ႁ'),
+ ('ႎ', 'ႎ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('ᎀ', 'ᎏ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛮ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', 'ᜑ'),
+ ('ᜠ', 'ᜱ'),
+ ('ᝀ', 'ᝑ'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('ក', 'ážŗ'),
+ ('ៗ', 'ៗ'),
+ ('ៜ', 'ៜ'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸ¨'),
+ ('áĸĒ', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('áĨ', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('ᨀ', 'ᨖ'),
+ ('ᨠ', 'ᩔ'),
+ ('áĒ§', 'áĒ§'),
+ ('áŦ…', 'áŦŗ'),
+ ('ᭅ', 'ᭋ'),
+ ('ᎃ', 'Ꭰ'),
+ ('ᎎ', 'Ꭿ'),
+ ('áŽē', 'á¯Ĩ'),
+ ('ᰀ', 'á°Ŗ'),
+ ('ᱍ', 'ᱏ'),
+ ('ᱚ', 'áąŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('áŗŠ', 'áŗŦ'),
+ ('áŗŽ', 'áŗŗ'),
+ ('áŗĩ', 'áŗļ'),
+ ('áŗē', 'áŗē'),
+ ('ᴀ', 'áļŋ'),
+ ('Ḁ', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('℘', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'ↈ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗ¤'),
+ ('âŗĢ', 'âŗŽ'),
+ ('âŗ˛', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ¯'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('々', '〇'),
+ ('ã€Ą', '《'),
+ ('ã€ą', 'ã€ĩ'),
+ ('〸', 'ã€ŧ'),
+ ('ぁ', 'ゖ'),
+ ('ゝ', 'ゟ'),
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŧ', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ꀀ', 'ꒌ'),
+ ('ꓐ', 'ę“Ŋ'),
+ ('ꔀ', 'ꘌ'),
+ ('ꘐ', 'ꘟ'),
+ ('ę˜Ē', 'ę˜Ģ'),
+ ('Ꙁ', 'ꙮ'),
+ ('ę™ŋ', 'ꚝ'),
+ ('ꚠ', 'ę›¯'),
+ ('ꜗ', 'ꜟ'),
+ ('ęœĸ', 'ꞈ'),
+ ('Ꞌ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ꠁ'),
+ ('ꠃ', 'ꠅ'),
+ ('ꠇ', 'ꠊ'),
+ ('ꠌ', 'ę ĸ'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ‚', 'ęĸŗ'),
+ ('ęŖ˛', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', 'ęŖž'),
+ ('ꤊ', 'ę¤Ĩ'),
+ ('ꤰ', 'ęĨ†'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('ęĻ„', 'ęĻ˛'),
+ ('ꧏ', 'ꧏ'),
+ ('ꧠ', 'ꧤ'),
+ ('ę§Ļ', 'ę§¯'),
+ ('ę§ē', '꧞'),
+ ('ꨀ', 'ꨨ'),
+ ('ꩀ', 'ꩂ'),
+ ('ꩄ', 'ꩋ'),
+ ('ꊠ', 'ęŠļ'),
+ ('ęŠē', 'ęŠē'),
+ ('ꊞ', 'ęĒ¯'),
+ ('ęĒą', 'ęĒą'),
+ ('ęĒĩ', 'ęĒļ'),
+ ('ęĒš', 'ęĒŊ'),
+ ('ęĢ€', 'ęĢ€'),
+ ('ęĢ‚', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢ'),
+ ('ęĢ ', 'ęĢĒ'),
+ ('ęĢ˛', 'ęĢ´'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab69}'),
+ ('ę­°', 'ę¯ĸ'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŦ', 'īŦ'),
+ ('īŦŸ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'īą'),
+ ('īą¤', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇš'),
+ ('īšą', 'īšą'),
+ ('īšŗ', 'īšŗ'),
+ ('īšˇ', 'īšˇ'),
+ ('īšš', 'īšš'),
+ ('īšģ', 'īšģ'),
+ ('īšŊ', 'īšŊ'),
+ ('īšŋ', 'īģŧ'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+ ('īŊĻ', 'īž'),
+ ('īž ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐅀', '𐅴'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍊'),
+ ('𐍐', 'đĩ'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐏑', '𐏕'),
+ ('𐐀', '𐒝'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '𐨀'),
+ ('𐨐', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', 'đĢ¤'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('𐴀', 'đ´Ŗ'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', 'đŊ…'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀃', '𑀷'),
+ ('𑂃', 'đ‘‚¯'),
+ ('𑃐', '𑃨'),
+ ('𑄃', 'đ‘„Ļ'),
+ ('𑅄', '𑅄'),
+ ('\u{11147}', '\u{11147}'),
+ ('𑅐', '𑅲'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('𑆃', '𑆲'),
+ ('𑇁', '𑇄'),
+ ('𑇚', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', 'đ‘ˆĢ'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '𑋞'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('đ‘ŒŊ', 'đ‘ŒŊ'),
+ ('𑍐', '𑍐'),
+ ('𑍝', '𑍡'),
+ ('𑐀', '𑐴'),
+ ('𑑇', '𑑊'),
+ ('𑑟', '\u{11461}'),
+ ('𑒀', 'đ‘’¯'),
+ ('𑓄', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑖀', '𑖮'),
+ ('𑗘', '𑗛'),
+ ('𑘀', 'đ‘˜¯'),
+ ('𑙄', '𑙄'),
+ ('𑚀', 'đ‘šĒ'),
+ ('𑚸', '𑚸'),
+ ('𑜀', '𑜚'),
+ ('𑠀', 'đ‘ Ģ'),
+ ('đ‘ĸ ', 'đ‘ŖŸ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{1192f}'),
+ ('\u{1193f}', '\u{1193f}'),
+ ('\u{11941}', '\u{11941}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '𑧐'),
+ ('𑧡', '𑧡'),
+ ('đ‘§Ŗ', 'đ‘§Ŗ'),
+ ('𑨀', '𑨀'),
+ ('𑨋', '𑨲'),
+ ('đ‘¨ē', 'đ‘¨ē'),
+ ('𑩐', '𑩐'),
+ ('𑩜', 'đ‘Ē‰'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '𑰮'),
+ ('𑱀', '𑱀'),
+ ('𑱲', '𑲏'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '𑴰'),
+ ('đ‘ĩ†', 'đ‘ĩ†'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļ‰'),
+ ('đ‘ļ˜', 'đ‘ļ˜'),
+ ('đ‘ģ ', 'đ‘ģ˛'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒐀', '𒑮'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('đ–Ŧ€', 'đ–Ŧ¯'),
+ ('𖭀', '𖭃'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𖹀', 'đ–šŋ'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('đ–Ŋ', 'đ–Ŋ'),
+ ('𖾓', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', 'đ–ŋŖ'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𞄀', 'đž„Ŧ'),
+ ('𞄷', 'đž„Ŋ'),
+ ('𞅎', '𞅎'),
+ ('𞋀', 'đž‹Ģ'),
+ ('𞠀', 'đžŖ„'),
+ ('𞤀', 'đžĨƒ'),
+ ('đžĨ‹', 'đžĨ‹'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/property_names.rs b/vendor/regex-syntax/src/unicode_tables/property_names.rs
new file mode 100644
index 000000000..6393df2f8
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/property_names.rs
@@ -0,0 +1,264 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate property-names ucd-13.0.0
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
+ ("age", "Age"),
+ ("ahex", "ASCII_Hex_Digit"),
+ ("alpha", "Alphabetic"),
+ ("alphabetic", "Alphabetic"),
+ ("asciihexdigit", "ASCII_Hex_Digit"),
+ ("bc", "Bidi_Class"),
+ ("bidic", "Bidi_Control"),
+ ("bidiclass", "Bidi_Class"),
+ ("bidicontrol", "Bidi_Control"),
+ ("bidim", "Bidi_Mirrored"),
+ ("bidimirrored", "Bidi_Mirrored"),
+ ("bidimirroringglyph", "Bidi_Mirroring_Glyph"),
+ ("bidipairedbracket", "Bidi_Paired_Bracket"),
+ ("bidipairedbrackettype", "Bidi_Paired_Bracket_Type"),
+ ("blk", "Block"),
+ ("block", "Block"),
+ ("bmg", "Bidi_Mirroring_Glyph"),
+ ("bpb", "Bidi_Paired_Bracket"),
+ ("bpt", "Bidi_Paired_Bracket_Type"),
+ ("canonicalcombiningclass", "Canonical_Combining_Class"),
+ ("cased", "Cased"),
+ ("casefolding", "Case_Folding"),
+ ("caseignorable", "Case_Ignorable"),
+ ("ccc", "Canonical_Combining_Class"),
+ ("ce", "Composition_Exclusion"),
+ ("cf", "Case_Folding"),
+ ("changeswhencasefolded", "Changes_When_Casefolded"),
+ ("changeswhencasemapped", "Changes_When_Casemapped"),
+ ("changeswhenlowercased", "Changes_When_Lowercased"),
+ ("changeswhennfkccasefolded", "Changes_When_NFKC_Casefolded"),
+ ("changeswhentitlecased", "Changes_When_Titlecased"),
+ ("changeswhenuppercased", "Changes_When_Uppercased"),
+ ("ci", "Case_Ignorable"),
+ ("cjkaccountingnumeric", "kAccountingNumeric"),
+ ("cjkcompatibilityvariant", "kCompatibilityVariant"),
+ ("cjkiicore", "kIICore"),
+ ("cjkirggsource", "kIRG_GSource"),
+ ("cjkirghsource", "kIRG_HSource"),
+ ("cjkirgjsource", "kIRG_JSource"),
+ ("cjkirgkpsource", "kIRG_KPSource"),
+ ("cjkirgksource", "kIRG_KSource"),
+ ("cjkirgmsource", "kIRG_MSource"),
+ ("cjkirgssource", "kIRG_SSource"),
+ ("cjkirgtsource", "kIRG_TSource"),
+ ("cjkirguksource", "kIRG_UKSource"),
+ ("cjkirgusource", "kIRG_USource"),
+ ("cjkirgvsource", "kIRG_VSource"),
+ ("cjkothernumeric", "kOtherNumeric"),
+ ("cjkprimarynumeric", "kPrimaryNumeric"),
+ ("cjkrsunicode", "kRSUnicode"),
+ ("compex", "Full_Composition_Exclusion"),
+ ("compositionexclusion", "Composition_Exclusion"),
+ ("cwcf", "Changes_When_Casefolded"),
+ ("cwcm", "Changes_When_Casemapped"),
+ ("cwkcf", "Changes_When_NFKC_Casefolded"),
+ ("cwl", "Changes_When_Lowercased"),
+ ("cwt", "Changes_When_Titlecased"),
+ ("cwu", "Changes_When_Uppercased"),
+ ("dash", "Dash"),
+ ("decompositionmapping", "Decomposition_Mapping"),
+ ("decompositiontype", "Decomposition_Type"),
+ ("defaultignorablecodepoint", "Default_Ignorable_Code_Point"),
+ ("dep", "Deprecated"),
+ ("deprecated", "Deprecated"),
+ ("di", "Default_Ignorable_Code_Point"),
+ ("dia", "Diacritic"),
+ ("diacritic", "Diacritic"),
+ ("dm", "Decomposition_Mapping"),
+ ("dt", "Decomposition_Type"),
+ ("ea", "East_Asian_Width"),
+ ("eastasianwidth", "East_Asian_Width"),
+ ("ebase", "Emoji_Modifier_Base"),
+ ("ecomp", "Emoji_Component"),
+ ("emod", "Emoji_Modifier"),
+ ("emoji", "Emoji"),
+ ("emojicomponent", "Emoji_Component"),
+ ("emojimodifier", "Emoji_Modifier"),
+ ("emojimodifierbase", "Emoji_Modifier_Base"),
+ ("emojipresentation", "Emoji_Presentation"),
+ ("epres", "Emoji_Presentation"),
+ ("equideo", "Equivalent_Unified_Ideograph"),
+ ("equivalentunifiedideograph", "Equivalent_Unified_Ideograph"),
+ ("expandsonnfc", "Expands_On_NFC"),
+ ("expandsonnfd", "Expands_On_NFD"),
+ ("expandsonnfkc", "Expands_On_NFKC"),
+ ("expandsonnfkd", "Expands_On_NFKD"),
+ ("ext", "Extender"),
+ ("extendedpictographic", "Extended_Pictographic"),
+ ("extender", "Extender"),
+ ("extpict", "Extended_Pictographic"),
+ ("fcnfkc", "FC_NFKC_Closure"),
+ ("fcnfkcclosure", "FC_NFKC_Closure"),
+ ("fullcompositionexclusion", "Full_Composition_Exclusion"),
+ ("gc", "General_Category"),
+ ("gcb", "Grapheme_Cluster_Break"),
+ ("generalcategory", "General_Category"),
+ ("graphemebase", "Grapheme_Base"),
+ ("graphemeclusterbreak", "Grapheme_Cluster_Break"),
+ ("graphemeextend", "Grapheme_Extend"),
+ ("graphemelink", "Grapheme_Link"),
+ ("grbase", "Grapheme_Base"),
+ ("grext", "Grapheme_Extend"),
+ ("grlink", "Grapheme_Link"),
+ ("hangulsyllabletype", "Hangul_Syllable_Type"),
+ ("hex", "Hex_Digit"),
+ ("hexdigit", "Hex_Digit"),
+ ("hst", "Hangul_Syllable_Type"),
+ ("hyphen", "Hyphen"),
+ ("idc", "ID_Continue"),
+ ("idcontinue", "ID_Continue"),
+ ("ideo", "Ideographic"),
+ ("ideographic", "Ideographic"),
+ ("ids", "ID_Start"),
+ ("idsb", "IDS_Binary_Operator"),
+ ("idsbinaryoperator", "IDS_Binary_Operator"),
+ ("idst", "IDS_Trinary_Operator"),
+ ("idstart", "ID_Start"),
+ ("idstrinaryoperator", "IDS_Trinary_Operator"),
+ ("indicpositionalcategory", "Indic_Positional_Category"),
+ ("indicsyllabiccategory", "Indic_Syllabic_Category"),
+ ("inpc", "Indic_Positional_Category"),
+ ("insc", "Indic_Syllabic_Category"),
+ ("isc", "ISO_Comment"),
+ ("jamoshortname", "Jamo_Short_Name"),
+ ("jg", "Joining_Group"),
+ ("joinc", "Join_Control"),
+ ("joincontrol", "Join_Control"),
+ ("joininggroup", "Joining_Group"),
+ ("joiningtype", "Joining_Type"),
+ ("jsn", "Jamo_Short_Name"),
+ ("jt", "Joining_Type"),
+ ("kaccountingnumeric", "kAccountingNumeric"),
+ ("kcompatibilityvariant", "kCompatibilityVariant"),
+ ("kiicore", "kIICore"),
+ ("kirggsource", "kIRG_GSource"),
+ ("kirghsource", "kIRG_HSource"),
+ ("kirgjsource", "kIRG_JSource"),
+ ("kirgkpsource", "kIRG_KPSource"),
+ ("kirgksource", "kIRG_KSource"),
+ ("kirgmsource", "kIRG_MSource"),
+ ("kirgssource", "kIRG_SSource"),
+ ("kirgtsource", "kIRG_TSource"),
+ ("kirguksource", "kIRG_UKSource"),
+ ("kirgusource", "kIRG_USource"),
+ ("kirgvsource", "kIRG_VSource"),
+ ("kothernumeric", "kOtherNumeric"),
+ ("kprimarynumeric", "kPrimaryNumeric"),
+ ("krsunicode", "kRSUnicode"),
+ ("lb", "Line_Break"),
+ ("lc", "Lowercase_Mapping"),
+ ("linebreak", "Line_Break"),
+ ("loe", "Logical_Order_Exception"),
+ ("logicalorderexception", "Logical_Order_Exception"),
+ ("lower", "Lowercase"),
+ ("lowercase", "Lowercase"),
+ ("lowercasemapping", "Lowercase_Mapping"),
+ ("math", "Math"),
+ ("na", "Name"),
+ ("na1", "Unicode_1_Name"),
+ ("name", "Name"),
+ ("namealias", "Name_Alias"),
+ ("nchar", "Noncharacter_Code_Point"),
+ ("nfcqc", "NFC_Quick_Check"),
+ ("nfcquickcheck", "NFC_Quick_Check"),
+ ("nfdqc", "NFD_Quick_Check"),
+ ("nfdquickcheck", "NFD_Quick_Check"),
+ ("nfkccasefold", "NFKC_Casefold"),
+ ("nfkccf", "NFKC_Casefold"),
+ ("nfkcqc", "NFKC_Quick_Check"),
+ ("nfkcquickcheck", "NFKC_Quick_Check"),
+ ("nfkdqc", "NFKD_Quick_Check"),
+ ("nfkdquickcheck", "NFKD_Quick_Check"),
+ ("noncharactercodepoint", "Noncharacter_Code_Point"),
+ ("nt", "Numeric_Type"),
+ ("numerictype", "Numeric_Type"),
+ ("numericvalue", "Numeric_Value"),
+ ("nv", "Numeric_Value"),
+ ("oalpha", "Other_Alphabetic"),
+ ("ocomment", "ISO_Comment"),
+ ("odi", "Other_Default_Ignorable_Code_Point"),
+ ("ogrext", "Other_Grapheme_Extend"),
+ ("oidc", "Other_ID_Continue"),
+ ("oids", "Other_ID_Start"),
+ ("olower", "Other_Lowercase"),
+ ("omath", "Other_Math"),
+ ("otheralphabetic", "Other_Alphabetic"),
+ ("otherdefaultignorablecodepoint", "Other_Default_Ignorable_Code_Point"),
+ ("othergraphemeextend", "Other_Grapheme_Extend"),
+ ("otheridcontinue", "Other_ID_Continue"),
+ ("otheridstart", "Other_ID_Start"),
+ ("otherlowercase", "Other_Lowercase"),
+ ("othermath", "Other_Math"),
+ ("otheruppercase", "Other_Uppercase"),
+ ("oupper", "Other_Uppercase"),
+ ("patsyn", "Pattern_Syntax"),
+ ("patternsyntax", "Pattern_Syntax"),
+ ("patternwhitespace", "Pattern_White_Space"),
+ ("patws", "Pattern_White_Space"),
+ ("pcm", "Prepended_Concatenation_Mark"),
+ ("prependedconcatenationmark", "Prepended_Concatenation_Mark"),
+ ("qmark", "Quotation_Mark"),
+ ("quotationmark", "Quotation_Mark"),
+ ("radical", "Radical"),
+ ("regionalindicator", "Regional_Indicator"),
+ ("ri", "Regional_Indicator"),
+ ("sb", "Sentence_Break"),
+ ("sc", "Script"),
+ ("scf", "Simple_Case_Folding"),
+ ("script", "Script"),
+ ("scriptextensions", "Script_Extensions"),
+ ("scx", "Script_Extensions"),
+ ("sd", "Soft_Dotted"),
+ ("sentencebreak", "Sentence_Break"),
+ ("sentenceterminal", "Sentence_Terminal"),
+ ("sfc", "Simple_Case_Folding"),
+ ("simplecasefolding", "Simple_Case_Folding"),
+ ("simplelowercasemapping", "Simple_Lowercase_Mapping"),
+ ("simpletitlecasemapping", "Simple_Titlecase_Mapping"),
+ ("simpleuppercasemapping", "Simple_Uppercase_Mapping"),
+ ("slc", "Simple_Lowercase_Mapping"),
+ ("softdotted", "Soft_Dotted"),
+ ("space", "White_Space"),
+ ("stc", "Simple_Titlecase_Mapping"),
+ ("sterm", "Sentence_Terminal"),
+ ("suc", "Simple_Uppercase_Mapping"),
+ ("tc", "Titlecase_Mapping"),
+ ("term", "Terminal_Punctuation"),
+ ("terminalpunctuation", "Terminal_Punctuation"),
+ ("titlecasemapping", "Titlecase_Mapping"),
+ ("uc", "Uppercase_Mapping"),
+ ("uideo", "Unified_Ideograph"),
+ ("unicode1name", "Unicode_1_Name"),
+ ("unicoderadicalstroke", "kRSUnicode"),
+ ("unifiedideograph", "Unified_Ideograph"),
+ ("upper", "Uppercase"),
+ ("uppercase", "Uppercase"),
+ ("uppercasemapping", "Uppercase_Mapping"),
+ ("urs", "kRSUnicode"),
+ ("variationselector", "Variation_Selector"),
+ ("verticalorientation", "Vertical_Orientation"),
+ ("vo", "Vertical_Orientation"),
+ ("vs", "Variation_Selector"),
+ ("wb", "Word_Break"),
+ ("whitespace", "White_Space"),
+ ("wordbreak", "Word_Break"),
+ ("wspace", "White_Space"),
+ ("xidc", "XID_Continue"),
+ ("xidcontinue", "XID_Continue"),
+ ("xids", "XID_Start"),
+ ("xidstart", "XID_Start"),
+ ("xonfc", "Expands_On_NFC"),
+ ("xonfd", "Expands_On_NFD"),
+ ("xonfkc", "Expands_On_NFKC"),
+ ("xonfkd", "Expands_On_NFKD"),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/property_values.rs b/vendor/regex-syntax/src/unicode_tables/property_values.rs
new file mode 100644
index 000000000..c46653a7b
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/property_values.rs
@@ -0,0 +1,896 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate property-values ucd-13.0.0 --include gc,script,scx,age,gcb,wb,sb
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const PROPERTY_VALUES: &'static [(
+ &'static str,
+ &'static [(&'static str, &'static str)],
+)] = &[
+ (
+ "Age",
+ &[
+ ("1.1", "V1_1"),
+ ("10.0", "V10_0"),
+ ("11.0", "V11_0"),
+ ("12.0", "V12_0"),
+ ("12.1", "V12_1"),
+ ("13.0", "V13_0"),
+ ("2.0", "V2_0"),
+ ("2.1", "V2_1"),
+ ("3.0", "V3_0"),
+ ("3.1", "V3_1"),
+ ("3.2", "V3_2"),
+ ("4.0", "V4_0"),
+ ("4.1", "V4_1"),
+ ("5.0", "V5_0"),
+ ("5.1", "V5_1"),
+ ("5.2", "V5_2"),
+ ("6.0", "V6_0"),
+ ("6.1", "V6_1"),
+ ("6.2", "V6_2"),
+ ("6.3", "V6_3"),
+ ("7.0", "V7_0"),
+ ("8.0", "V8_0"),
+ ("9.0", "V9_0"),
+ ("na", "Unassigned"),
+ ("unassigned", "Unassigned"),
+ ("v100", "V10_0"),
+ ("v11", "V1_1"),
+ ("v110", "V11_0"),
+ ("v120", "V12_0"),
+ ("v121", "V12_1"),
+ ("v130", "V13_0"),
+ ("v20", "V2_0"),
+ ("v21", "V2_1"),
+ ("v30", "V3_0"),
+ ("v31", "V3_1"),
+ ("v32", "V3_2"),
+ ("v40", "V4_0"),
+ ("v41", "V4_1"),
+ ("v50", "V5_0"),
+ ("v51", "V5_1"),
+ ("v52", "V5_2"),
+ ("v60", "V6_0"),
+ ("v61", "V6_1"),
+ ("v62", "V6_2"),
+ ("v63", "V6_3"),
+ ("v70", "V7_0"),
+ ("v80", "V8_0"),
+ ("v90", "V9_0"),
+ ],
+ ),
+ (
+ "General_Category",
+ &[
+ ("c", "Other"),
+ ("casedletter", "Cased_Letter"),
+ ("cc", "Control"),
+ ("cf", "Format"),
+ ("closepunctuation", "Close_Punctuation"),
+ ("cn", "Unassigned"),
+ ("cntrl", "Control"),
+ ("co", "Private_Use"),
+ ("combiningmark", "Mark"),
+ ("connectorpunctuation", "Connector_Punctuation"),
+ ("control", "Control"),
+ ("cs", "Surrogate"),
+ ("currencysymbol", "Currency_Symbol"),
+ ("dashpunctuation", "Dash_Punctuation"),
+ ("decimalnumber", "Decimal_Number"),
+ ("digit", "Decimal_Number"),
+ ("enclosingmark", "Enclosing_Mark"),
+ ("finalpunctuation", "Final_Punctuation"),
+ ("format", "Format"),
+ ("initialpunctuation", "Initial_Punctuation"),
+ ("l", "Letter"),
+ ("lc", "Cased_Letter"),
+ ("letter", "Letter"),
+ ("letternumber", "Letter_Number"),
+ ("lineseparator", "Line_Separator"),
+ ("ll", "Lowercase_Letter"),
+ ("lm", "Modifier_Letter"),
+ ("lo", "Other_Letter"),
+ ("lowercaseletter", "Lowercase_Letter"),
+ ("lt", "Titlecase_Letter"),
+ ("lu", "Uppercase_Letter"),
+ ("m", "Mark"),
+ ("mark", "Mark"),
+ ("mathsymbol", "Math_Symbol"),
+ ("mc", "Spacing_Mark"),
+ ("me", "Enclosing_Mark"),
+ ("mn", "Nonspacing_Mark"),
+ ("modifierletter", "Modifier_Letter"),
+ ("modifiersymbol", "Modifier_Symbol"),
+ ("n", "Number"),
+ ("nd", "Decimal_Number"),
+ ("nl", "Letter_Number"),
+ ("no", "Other_Number"),
+ ("nonspacingmark", "Nonspacing_Mark"),
+ ("number", "Number"),
+ ("openpunctuation", "Open_Punctuation"),
+ ("other", "Other"),
+ ("otherletter", "Other_Letter"),
+ ("othernumber", "Other_Number"),
+ ("otherpunctuation", "Other_Punctuation"),
+ ("othersymbol", "Other_Symbol"),
+ ("p", "Punctuation"),
+ ("paragraphseparator", "Paragraph_Separator"),
+ ("pc", "Connector_Punctuation"),
+ ("pd", "Dash_Punctuation"),
+ ("pe", "Close_Punctuation"),
+ ("pf", "Final_Punctuation"),
+ ("pi", "Initial_Punctuation"),
+ ("po", "Other_Punctuation"),
+ ("privateuse", "Private_Use"),
+ ("ps", "Open_Punctuation"),
+ ("punct", "Punctuation"),
+ ("punctuation", "Punctuation"),
+ ("s", "Symbol"),
+ ("sc", "Currency_Symbol"),
+ ("separator", "Separator"),
+ ("sk", "Modifier_Symbol"),
+ ("sm", "Math_Symbol"),
+ ("so", "Other_Symbol"),
+ ("spaceseparator", "Space_Separator"),
+ ("spacingmark", "Spacing_Mark"),
+ ("surrogate", "Surrogate"),
+ ("symbol", "Symbol"),
+ ("titlecaseletter", "Titlecase_Letter"),
+ ("unassigned", "Unassigned"),
+ ("uppercaseletter", "Uppercase_Letter"),
+ ("z", "Separator"),
+ ("zl", "Line_Separator"),
+ ("zp", "Paragraph_Separator"),
+ ("zs", "Space_Separator"),
+ ],
+ ),
+ (
+ "Grapheme_Cluster_Break",
+ &[
+ ("cn", "Control"),
+ ("control", "Control"),
+ ("cr", "CR"),
+ ("eb", "E_Base"),
+ ("ebase", "E_Base"),
+ ("ebasegaz", "E_Base_GAZ"),
+ ("ebg", "E_Base_GAZ"),
+ ("em", "E_Modifier"),
+ ("emodifier", "E_Modifier"),
+ ("ex", "Extend"),
+ ("extend", "Extend"),
+ ("gaz", "Glue_After_Zwj"),
+ ("glueafterzwj", "Glue_After_Zwj"),
+ ("l", "L"),
+ ("lf", "LF"),
+ ("lv", "LV"),
+ ("lvt", "LVT"),
+ ("other", "Other"),
+ ("pp", "Prepend"),
+ ("prepend", "Prepend"),
+ ("regionalindicator", "Regional_Indicator"),
+ ("ri", "Regional_Indicator"),
+ ("sm", "SpacingMark"),
+ ("spacingmark", "SpacingMark"),
+ ("t", "T"),
+ ("v", "V"),
+ ("xx", "Other"),
+ ("zwj", "ZWJ"),
+ ],
+ ),
+ (
+ "Script",
+ &[
+ ("adlam", "Adlam"),
+ ("adlm", "Adlam"),
+ ("aghb", "Caucasian_Albanian"),
+ ("ahom", "Ahom"),
+ ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"),
+ ("arab", "Arabic"),
+ ("arabic", "Arabic"),
+ ("armenian", "Armenian"),
+ ("armi", "Imperial_Aramaic"),
+ ("armn", "Armenian"),
+ ("avestan", "Avestan"),
+ ("avst", "Avestan"),
+ ("bali", "Balinese"),
+ ("balinese", "Balinese"),
+ ("bamu", "Bamum"),
+ ("bamum", "Bamum"),
+ ("bass", "Bassa_Vah"),
+ ("bassavah", "Bassa_Vah"),
+ ("batak", "Batak"),
+ ("batk", "Batak"),
+ ("beng", "Bengali"),
+ ("bengali", "Bengali"),
+ ("bhaiksuki", "Bhaiksuki"),
+ ("bhks", "Bhaiksuki"),
+ ("bopo", "Bopomofo"),
+ ("bopomofo", "Bopomofo"),
+ ("brah", "Brahmi"),
+ ("brahmi", "Brahmi"),
+ ("brai", "Braille"),
+ ("braille", "Braille"),
+ ("bugi", "Buginese"),
+ ("buginese", "Buginese"),
+ ("buhd", "Buhid"),
+ ("buhid", "Buhid"),
+ ("cakm", "Chakma"),
+ ("canadianaboriginal", "Canadian_Aboriginal"),
+ ("cans", "Canadian_Aboriginal"),
+ ("cari", "Carian"),
+ ("carian", "Carian"),
+ ("caucasianalbanian", "Caucasian_Albanian"),
+ ("chakma", "Chakma"),
+ ("cham", "Cham"),
+ ("cher", "Cherokee"),
+ ("cherokee", "Cherokee"),
+ ("chorasmian", "Chorasmian"),
+ ("chrs", "Chorasmian"),
+ ("common", "Common"),
+ ("copt", "Coptic"),
+ ("coptic", "Coptic"),
+ ("cprt", "Cypriot"),
+ ("cuneiform", "Cuneiform"),
+ ("cypriot", "Cypriot"),
+ ("cyrillic", "Cyrillic"),
+ ("cyrl", "Cyrillic"),
+ ("deseret", "Deseret"),
+ ("deva", "Devanagari"),
+ ("devanagari", "Devanagari"),
+ ("diak", "Dives_Akuru"),
+ ("divesakuru", "Dives_Akuru"),
+ ("dogr", "Dogra"),
+ ("dogra", "Dogra"),
+ ("dsrt", "Deseret"),
+ ("dupl", "Duployan"),
+ ("duployan", "Duployan"),
+ ("egyp", "Egyptian_Hieroglyphs"),
+ ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"),
+ ("elba", "Elbasan"),
+ ("elbasan", "Elbasan"),
+ ("elym", "Elymaic"),
+ ("elymaic", "Elymaic"),
+ ("ethi", "Ethiopic"),
+ ("ethiopic", "Ethiopic"),
+ ("geor", "Georgian"),
+ ("georgian", "Georgian"),
+ ("glag", "Glagolitic"),
+ ("glagolitic", "Glagolitic"),
+ ("gong", "Gunjala_Gondi"),
+ ("gonm", "Masaram_Gondi"),
+ ("goth", "Gothic"),
+ ("gothic", "Gothic"),
+ ("gran", "Grantha"),
+ ("grantha", "Grantha"),
+ ("greek", "Greek"),
+ ("grek", "Greek"),
+ ("gujarati", "Gujarati"),
+ ("gujr", "Gujarati"),
+ ("gunjalagondi", "Gunjala_Gondi"),
+ ("gurmukhi", "Gurmukhi"),
+ ("guru", "Gurmukhi"),
+ ("han", "Han"),
+ ("hang", "Hangul"),
+ ("hangul", "Hangul"),
+ ("hani", "Han"),
+ ("hanifirohingya", "Hanifi_Rohingya"),
+ ("hano", "Hanunoo"),
+ ("hanunoo", "Hanunoo"),
+ ("hatr", "Hatran"),
+ ("hatran", "Hatran"),
+ ("hebr", "Hebrew"),
+ ("hebrew", "Hebrew"),
+ ("hira", "Hiragana"),
+ ("hiragana", "Hiragana"),
+ ("hluw", "Anatolian_Hieroglyphs"),
+ ("hmng", "Pahawh_Hmong"),
+ ("hmnp", "Nyiakeng_Puachue_Hmong"),
+ ("hrkt", "Katakana_Or_Hiragana"),
+ ("hung", "Old_Hungarian"),
+ ("imperialaramaic", "Imperial_Aramaic"),
+ ("inherited", "Inherited"),
+ ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+ ("inscriptionalparthian", "Inscriptional_Parthian"),
+ ("ital", "Old_Italic"),
+ ("java", "Javanese"),
+ ("javanese", "Javanese"),
+ ("kaithi", "Kaithi"),
+ ("kali", "Kayah_Li"),
+ ("kana", "Katakana"),
+ ("kannada", "Kannada"),
+ ("katakana", "Katakana"),
+ ("katakanaorhiragana", "Katakana_Or_Hiragana"),
+ ("kayahli", "Kayah_Li"),
+ ("khar", "Kharoshthi"),
+ ("kharoshthi", "Kharoshthi"),
+ ("khitansmallscript", "Khitan_Small_Script"),
+ ("khmer", "Khmer"),
+ ("khmr", "Khmer"),
+ ("khoj", "Khojki"),
+ ("khojki", "Khojki"),
+ ("khudawadi", "Khudawadi"),
+ ("kits", "Khitan_Small_Script"),
+ ("knda", "Kannada"),
+ ("kthi", "Kaithi"),
+ ("lana", "Tai_Tham"),
+ ("lao", "Lao"),
+ ("laoo", "Lao"),
+ ("latin", "Latin"),
+ ("latn", "Latin"),
+ ("lepc", "Lepcha"),
+ ("lepcha", "Lepcha"),
+ ("limb", "Limbu"),
+ ("limbu", "Limbu"),
+ ("lina", "Linear_A"),
+ ("linb", "Linear_B"),
+ ("lineara", "Linear_A"),
+ ("linearb", "Linear_B"),
+ ("lisu", "Lisu"),
+ ("lyci", "Lycian"),
+ ("lycian", "Lycian"),
+ ("lydi", "Lydian"),
+ ("lydian", "Lydian"),
+ ("mahajani", "Mahajani"),
+ ("mahj", "Mahajani"),
+ ("maka", "Makasar"),
+ ("makasar", "Makasar"),
+ ("malayalam", "Malayalam"),
+ ("mand", "Mandaic"),
+ ("mandaic", "Mandaic"),
+ ("mani", "Manichaean"),
+ ("manichaean", "Manichaean"),
+ ("marc", "Marchen"),
+ ("marchen", "Marchen"),
+ ("masaramgondi", "Masaram_Gondi"),
+ ("medefaidrin", "Medefaidrin"),
+ ("medf", "Medefaidrin"),
+ ("meeteimayek", "Meetei_Mayek"),
+ ("mend", "Mende_Kikakui"),
+ ("mendekikakui", "Mende_Kikakui"),
+ ("merc", "Meroitic_Cursive"),
+ ("mero", "Meroitic_Hieroglyphs"),
+ ("meroiticcursive", "Meroitic_Cursive"),
+ ("meroitichieroglyphs", "Meroitic_Hieroglyphs"),
+ ("miao", "Miao"),
+ ("mlym", "Malayalam"),
+ ("modi", "Modi"),
+ ("mong", "Mongolian"),
+ ("mongolian", "Mongolian"),
+ ("mro", "Mro"),
+ ("mroo", "Mro"),
+ ("mtei", "Meetei_Mayek"),
+ ("mult", "Multani"),
+ ("multani", "Multani"),
+ ("myanmar", "Myanmar"),
+ ("mymr", "Myanmar"),
+ ("nabataean", "Nabataean"),
+ ("nand", "Nandinagari"),
+ ("nandinagari", "Nandinagari"),
+ ("narb", "Old_North_Arabian"),
+ ("nbat", "Nabataean"),
+ ("newa", "Newa"),
+ ("newtailue", "New_Tai_Lue"),
+ ("nko", "Nko"),
+ ("nkoo", "Nko"),
+ ("nshu", "Nushu"),
+ ("nushu", "Nushu"),
+ ("nyiakengpuachuehmong", "Nyiakeng_Puachue_Hmong"),
+ ("ogam", "Ogham"),
+ ("ogham", "Ogham"),
+ ("olchiki", "Ol_Chiki"),
+ ("olck", "Ol_Chiki"),
+ ("oldhungarian", "Old_Hungarian"),
+ ("olditalic", "Old_Italic"),
+ ("oldnortharabian", "Old_North_Arabian"),
+ ("oldpermic", "Old_Permic"),
+ ("oldpersian", "Old_Persian"),
+ ("oldsogdian", "Old_Sogdian"),
+ ("oldsoutharabian", "Old_South_Arabian"),
+ ("oldturkic", "Old_Turkic"),
+ ("oriya", "Oriya"),
+ ("orkh", "Old_Turkic"),
+ ("orya", "Oriya"),
+ ("osage", "Osage"),
+ ("osge", "Osage"),
+ ("osma", "Osmanya"),
+ ("osmanya", "Osmanya"),
+ ("pahawhhmong", "Pahawh_Hmong"),
+ ("palm", "Palmyrene"),
+ ("palmyrene", "Palmyrene"),
+ ("pauc", "Pau_Cin_Hau"),
+ ("paucinhau", "Pau_Cin_Hau"),
+ ("perm", "Old_Permic"),
+ ("phag", "Phags_Pa"),
+ ("phagspa", "Phags_Pa"),
+ ("phli", "Inscriptional_Pahlavi"),
+ ("phlp", "Psalter_Pahlavi"),
+ ("phnx", "Phoenician"),
+ ("phoenician", "Phoenician"),
+ ("plrd", "Miao"),
+ ("prti", "Inscriptional_Parthian"),
+ ("psalterpahlavi", "Psalter_Pahlavi"),
+ ("qaac", "Coptic"),
+ ("qaai", "Inherited"),
+ ("rejang", "Rejang"),
+ ("rjng", "Rejang"),
+ ("rohg", "Hanifi_Rohingya"),
+ ("runic", "Runic"),
+ ("runr", "Runic"),
+ ("samaritan", "Samaritan"),
+ ("samr", "Samaritan"),
+ ("sarb", "Old_South_Arabian"),
+ ("saur", "Saurashtra"),
+ ("saurashtra", "Saurashtra"),
+ ("sgnw", "SignWriting"),
+ ("sharada", "Sharada"),
+ ("shavian", "Shavian"),
+ ("shaw", "Shavian"),
+ ("shrd", "Sharada"),
+ ("sidd", "Siddham"),
+ ("siddham", "Siddham"),
+ ("signwriting", "SignWriting"),
+ ("sind", "Khudawadi"),
+ ("sinh", "Sinhala"),
+ ("sinhala", "Sinhala"),
+ ("sogd", "Sogdian"),
+ ("sogdian", "Sogdian"),
+ ("sogo", "Old_Sogdian"),
+ ("sora", "Sora_Sompeng"),
+ ("sorasompeng", "Sora_Sompeng"),
+ ("soyo", "Soyombo"),
+ ("soyombo", "Soyombo"),
+ ("sund", "Sundanese"),
+ ("sundanese", "Sundanese"),
+ ("sylo", "Syloti_Nagri"),
+ ("sylotinagri", "Syloti_Nagri"),
+ ("syrc", "Syriac"),
+ ("syriac", "Syriac"),
+ ("tagalog", "Tagalog"),
+ ("tagb", "Tagbanwa"),
+ ("tagbanwa", "Tagbanwa"),
+ ("taile", "Tai_Le"),
+ ("taitham", "Tai_Tham"),
+ ("taiviet", "Tai_Viet"),
+ ("takr", "Takri"),
+ ("takri", "Takri"),
+ ("tale", "Tai_Le"),
+ ("talu", "New_Tai_Lue"),
+ ("tamil", "Tamil"),
+ ("taml", "Tamil"),
+ ("tang", "Tangut"),
+ ("tangut", "Tangut"),
+ ("tavt", "Tai_Viet"),
+ ("telu", "Telugu"),
+ ("telugu", "Telugu"),
+ ("tfng", "Tifinagh"),
+ ("tglg", "Tagalog"),
+ ("thaa", "Thaana"),
+ ("thaana", "Thaana"),
+ ("thai", "Thai"),
+ ("tibetan", "Tibetan"),
+ ("tibt", "Tibetan"),
+ ("tifinagh", "Tifinagh"),
+ ("tirh", "Tirhuta"),
+ ("tirhuta", "Tirhuta"),
+ ("ugar", "Ugaritic"),
+ ("ugaritic", "Ugaritic"),
+ ("unknown", "Unknown"),
+ ("vai", "Vai"),
+ ("vaii", "Vai"),
+ ("wancho", "Wancho"),
+ ("wara", "Warang_Citi"),
+ ("warangciti", "Warang_Citi"),
+ ("wcho", "Wancho"),
+ ("xpeo", "Old_Persian"),
+ ("xsux", "Cuneiform"),
+ ("yezi", "Yezidi"),
+ ("yezidi", "Yezidi"),
+ ("yi", "Yi"),
+ ("yiii", "Yi"),
+ ("zanabazarsquare", "Zanabazar_Square"),
+ ("zanb", "Zanabazar_Square"),
+ ("zinh", "Inherited"),
+ ("zyyy", "Common"),
+ ("zzzz", "Unknown"),
+ ],
+ ),
+ (
+ "Script_Extensions",
+ &[
+ ("adlam", "Adlam"),
+ ("adlm", "Adlam"),
+ ("aghb", "Caucasian_Albanian"),
+ ("ahom", "Ahom"),
+ ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"),
+ ("arab", "Arabic"),
+ ("arabic", "Arabic"),
+ ("armenian", "Armenian"),
+ ("armi", "Imperial_Aramaic"),
+ ("armn", "Armenian"),
+ ("avestan", "Avestan"),
+ ("avst", "Avestan"),
+ ("bali", "Balinese"),
+ ("balinese", "Balinese"),
+ ("bamu", "Bamum"),
+ ("bamum", "Bamum"),
+ ("bass", "Bassa_Vah"),
+ ("bassavah", "Bassa_Vah"),
+ ("batak", "Batak"),
+ ("batk", "Batak"),
+ ("beng", "Bengali"),
+ ("bengali", "Bengali"),
+ ("bhaiksuki", "Bhaiksuki"),
+ ("bhks", "Bhaiksuki"),
+ ("bopo", "Bopomofo"),
+ ("bopomofo", "Bopomofo"),
+ ("brah", "Brahmi"),
+ ("brahmi", "Brahmi"),
+ ("brai", "Braille"),
+ ("braille", "Braille"),
+ ("bugi", "Buginese"),
+ ("buginese", "Buginese"),
+ ("buhd", "Buhid"),
+ ("buhid", "Buhid"),
+ ("cakm", "Chakma"),
+ ("canadianaboriginal", "Canadian_Aboriginal"),
+ ("cans", "Canadian_Aboriginal"),
+ ("cari", "Carian"),
+ ("carian", "Carian"),
+ ("caucasianalbanian", "Caucasian_Albanian"),
+ ("chakma", "Chakma"),
+ ("cham", "Cham"),
+ ("cher", "Cherokee"),
+ ("cherokee", "Cherokee"),
+ ("chorasmian", "Chorasmian"),
+ ("chrs", "Chorasmian"),
+ ("common", "Common"),
+ ("copt", "Coptic"),
+ ("coptic", "Coptic"),
+ ("cprt", "Cypriot"),
+ ("cuneiform", "Cuneiform"),
+ ("cypriot", "Cypriot"),
+ ("cyrillic", "Cyrillic"),
+ ("cyrl", "Cyrillic"),
+ ("deseret", "Deseret"),
+ ("deva", "Devanagari"),
+ ("devanagari", "Devanagari"),
+ ("diak", "Dives_Akuru"),
+ ("divesakuru", "Dives_Akuru"),
+ ("dogr", "Dogra"),
+ ("dogra", "Dogra"),
+ ("dsrt", "Deseret"),
+ ("dupl", "Duployan"),
+ ("duployan", "Duployan"),
+ ("egyp", "Egyptian_Hieroglyphs"),
+ ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"),
+ ("elba", "Elbasan"),
+ ("elbasan", "Elbasan"),
+ ("elym", "Elymaic"),
+ ("elymaic", "Elymaic"),
+ ("ethi", "Ethiopic"),
+ ("ethiopic", "Ethiopic"),
+ ("geor", "Georgian"),
+ ("georgian", "Georgian"),
+ ("glag", "Glagolitic"),
+ ("glagolitic", "Glagolitic"),
+ ("gong", "Gunjala_Gondi"),
+ ("gonm", "Masaram_Gondi"),
+ ("goth", "Gothic"),
+ ("gothic", "Gothic"),
+ ("gran", "Grantha"),
+ ("grantha", "Grantha"),
+ ("greek", "Greek"),
+ ("grek", "Greek"),
+ ("gujarati", "Gujarati"),
+ ("gujr", "Gujarati"),
+ ("gunjalagondi", "Gunjala_Gondi"),
+ ("gurmukhi", "Gurmukhi"),
+ ("guru", "Gurmukhi"),
+ ("han", "Han"),
+ ("hang", "Hangul"),
+ ("hangul", "Hangul"),
+ ("hani", "Han"),
+ ("hanifirohingya", "Hanifi_Rohingya"),
+ ("hano", "Hanunoo"),
+ ("hanunoo", "Hanunoo"),
+ ("hatr", "Hatran"),
+ ("hatran", "Hatran"),
+ ("hebr", "Hebrew"),
+ ("hebrew", "Hebrew"),
+ ("hira", "Hiragana"),
+ ("hiragana", "Hiragana"),
+ ("hluw", "Anatolian_Hieroglyphs"),
+ ("hmng", "Pahawh_Hmong"),
+ ("hmnp", "Nyiakeng_Puachue_Hmong"),
+ ("hrkt", "Katakana_Or_Hiragana"),
+ ("hung", "Old_Hungarian"),
+ ("imperialaramaic", "Imperial_Aramaic"),
+ ("inherited", "Inherited"),
+ ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+ ("inscriptionalparthian", "Inscriptional_Parthian"),
+ ("ital", "Old_Italic"),
+ ("java", "Javanese"),
+ ("javanese", "Javanese"),
+ ("kaithi", "Kaithi"),
+ ("kali", "Kayah_Li"),
+ ("kana", "Katakana"),
+ ("kannada", "Kannada"),
+ ("katakana", "Katakana"),
+ ("katakanaorhiragana", "Katakana_Or_Hiragana"),
+ ("kayahli", "Kayah_Li"),
+ ("khar", "Kharoshthi"),
+ ("kharoshthi", "Kharoshthi"),
+ ("khitansmallscript", "Khitan_Small_Script"),
+ ("khmer", "Khmer"),
+ ("khmr", "Khmer"),
+ ("khoj", "Khojki"),
+ ("khojki", "Khojki"),
+ ("khudawadi", "Khudawadi"),
+ ("kits", "Khitan_Small_Script"),
+ ("knda", "Kannada"),
+ ("kthi", "Kaithi"),
+ ("lana", "Tai_Tham"),
+ ("lao", "Lao"),
+ ("laoo", "Lao"),
+ ("latin", "Latin"),
+ ("latn", "Latin"),
+ ("lepc", "Lepcha"),
+ ("lepcha", "Lepcha"),
+ ("limb", "Limbu"),
+ ("limbu", "Limbu"),
+ ("lina", "Linear_A"),
+ ("linb", "Linear_B"),
+ ("lineara", "Linear_A"),
+ ("linearb", "Linear_B"),
+ ("lisu", "Lisu"),
+ ("lyci", "Lycian"),
+ ("lycian", "Lycian"),
+ ("lydi", "Lydian"),
+ ("lydian", "Lydian"),
+ ("mahajani", "Mahajani"),
+ ("mahj", "Mahajani"),
+ ("maka", "Makasar"),
+ ("makasar", "Makasar"),
+ ("malayalam", "Malayalam"),
+ ("mand", "Mandaic"),
+ ("mandaic", "Mandaic"),
+ ("mani", "Manichaean"),
+ ("manichaean", "Manichaean"),
+ ("marc", "Marchen"),
+ ("marchen", "Marchen"),
+ ("masaramgondi", "Masaram_Gondi"),
+ ("medefaidrin", "Medefaidrin"),
+ ("medf", "Medefaidrin"),
+ ("meeteimayek", "Meetei_Mayek"),
+ ("mend", "Mende_Kikakui"),
+ ("mendekikakui", "Mende_Kikakui"),
+ ("merc", "Meroitic_Cursive"),
+ ("mero", "Meroitic_Hieroglyphs"),
+ ("meroiticcursive", "Meroitic_Cursive"),
+ ("meroitichieroglyphs", "Meroitic_Hieroglyphs"),
+ ("miao", "Miao"),
+ ("mlym", "Malayalam"),
+ ("modi", "Modi"),
+ ("mong", "Mongolian"),
+ ("mongolian", "Mongolian"),
+ ("mro", "Mro"),
+ ("mroo", "Mro"),
+ ("mtei", "Meetei_Mayek"),
+ ("mult", "Multani"),
+ ("multani", "Multani"),
+ ("myanmar", "Myanmar"),
+ ("mymr", "Myanmar"),
+ ("nabataean", "Nabataean"),
+ ("nand", "Nandinagari"),
+ ("nandinagari", "Nandinagari"),
+ ("narb", "Old_North_Arabian"),
+ ("nbat", "Nabataean"),
+ ("newa", "Newa"),
+ ("newtailue", "New_Tai_Lue"),
+ ("nko", "Nko"),
+ ("nkoo", "Nko"),
+ ("nshu", "Nushu"),
+ ("nushu", "Nushu"),
+ ("nyiakengpuachuehmong", "Nyiakeng_Puachue_Hmong"),
+ ("ogam", "Ogham"),
+ ("ogham", "Ogham"),
+ ("olchiki", "Ol_Chiki"),
+ ("olck", "Ol_Chiki"),
+ ("oldhungarian", "Old_Hungarian"),
+ ("olditalic", "Old_Italic"),
+ ("oldnortharabian", "Old_North_Arabian"),
+ ("oldpermic", "Old_Permic"),
+ ("oldpersian", "Old_Persian"),
+ ("oldsogdian", "Old_Sogdian"),
+ ("oldsoutharabian", "Old_South_Arabian"),
+ ("oldturkic", "Old_Turkic"),
+ ("oriya", "Oriya"),
+ ("orkh", "Old_Turkic"),
+ ("orya", "Oriya"),
+ ("osage", "Osage"),
+ ("osge", "Osage"),
+ ("osma", "Osmanya"),
+ ("osmanya", "Osmanya"),
+ ("pahawhhmong", "Pahawh_Hmong"),
+ ("palm", "Palmyrene"),
+ ("palmyrene", "Palmyrene"),
+ ("pauc", "Pau_Cin_Hau"),
+ ("paucinhau", "Pau_Cin_Hau"),
+ ("perm", "Old_Permic"),
+ ("phag", "Phags_Pa"),
+ ("phagspa", "Phags_Pa"),
+ ("phli", "Inscriptional_Pahlavi"),
+ ("phlp", "Psalter_Pahlavi"),
+ ("phnx", "Phoenician"),
+ ("phoenician", "Phoenician"),
+ ("plrd", "Miao"),
+ ("prti", "Inscriptional_Parthian"),
+ ("psalterpahlavi", "Psalter_Pahlavi"),
+ ("qaac", "Coptic"),
+ ("qaai", "Inherited"),
+ ("rejang", "Rejang"),
+ ("rjng", "Rejang"),
+ ("rohg", "Hanifi_Rohingya"),
+ ("runic", "Runic"),
+ ("runr", "Runic"),
+ ("samaritan", "Samaritan"),
+ ("samr", "Samaritan"),
+ ("sarb", "Old_South_Arabian"),
+ ("saur", "Saurashtra"),
+ ("saurashtra", "Saurashtra"),
+ ("sgnw", "SignWriting"),
+ ("sharada", "Sharada"),
+ ("shavian", "Shavian"),
+ ("shaw", "Shavian"),
+ ("shrd", "Sharada"),
+ ("sidd", "Siddham"),
+ ("siddham", "Siddham"),
+ ("signwriting", "SignWriting"),
+ ("sind", "Khudawadi"),
+ ("sinh", "Sinhala"),
+ ("sinhala", "Sinhala"),
+ ("sogd", "Sogdian"),
+ ("sogdian", "Sogdian"),
+ ("sogo", "Old_Sogdian"),
+ ("sora", "Sora_Sompeng"),
+ ("sorasompeng", "Sora_Sompeng"),
+ ("soyo", "Soyombo"),
+ ("soyombo", "Soyombo"),
+ ("sund", "Sundanese"),
+ ("sundanese", "Sundanese"),
+ ("sylo", "Syloti_Nagri"),
+ ("sylotinagri", "Syloti_Nagri"),
+ ("syrc", "Syriac"),
+ ("syriac", "Syriac"),
+ ("tagalog", "Tagalog"),
+ ("tagb", "Tagbanwa"),
+ ("tagbanwa", "Tagbanwa"),
+ ("taile", "Tai_Le"),
+ ("taitham", "Tai_Tham"),
+ ("taiviet", "Tai_Viet"),
+ ("takr", "Takri"),
+ ("takri", "Takri"),
+ ("tale", "Tai_Le"),
+ ("talu", "New_Tai_Lue"),
+ ("tamil", "Tamil"),
+ ("taml", "Tamil"),
+ ("tang", "Tangut"),
+ ("tangut", "Tangut"),
+ ("tavt", "Tai_Viet"),
+ ("telu", "Telugu"),
+ ("telugu", "Telugu"),
+ ("tfng", "Tifinagh"),
+ ("tglg", "Tagalog"),
+ ("thaa", "Thaana"),
+ ("thaana", "Thaana"),
+ ("thai", "Thai"),
+ ("tibetan", "Tibetan"),
+ ("tibt", "Tibetan"),
+ ("tifinagh", "Tifinagh"),
+ ("tirh", "Tirhuta"),
+ ("tirhuta", "Tirhuta"),
+ ("ugar", "Ugaritic"),
+ ("ugaritic", "Ugaritic"),
+ ("unknown", "Unknown"),
+ ("vai", "Vai"),
+ ("vaii", "Vai"),
+ ("wancho", "Wancho"),
+ ("wara", "Warang_Citi"),
+ ("warangciti", "Warang_Citi"),
+ ("wcho", "Wancho"),
+ ("xpeo", "Old_Persian"),
+ ("xsux", "Cuneiform"),
+ ("yezi", "Yezidi"),
+ ("yezidi", "Yezidi"),
+ ("yi", "Yi"),
+ ("yiii", "Yi"),
+ ("zanabazarsquare", "Zanabazar_Square"),
+ ("zanb", "Zanabazar_Square"),
+ ("zinh", "Inherited"),
+ ("zyyy", "Common"),
+ ("zzzz", "Unknown"),
+ ],
+ ),
+ (
+ "Sentence_Break",
+ &[
+ ("at", "ATerm"),
+ ("aterm", "ATerm"),
+ ("cl", "Close"),
+ ("close", "Close"),
+ ("cr", "CR"),
+ ("ex", "Extend"),
+ ("extend", "Extend"),
+ ("fo", "Format"),
+ ("format", "Format"),
+ ("le", "OLetter"),
+ ("lf", "LF"),
+ ("lo", "Lower"),
+ ("lower", "Lower"),
+ ("nu", "Numeric"),
+ ("numeric", "Numeric"),
+ ("oletter", "OLetter"),
+ ("other", "Other"),
+ ("sc", "SContinue"),
+ ("scontinue", "SContinue"),
+ ("se", "Sep"),
+ ("sep", "Sep"),
+ ("sp", "Sp"),
+ ("st", "STerm"),
+ ("sterm", "STerm"),
+ ("up", "Upper"),
+ ("upper", "Upper"),
+ ("xx", "Other"),
+ ],
+ ),
+ (
+ "Word_Break",
+ &[
+ ("aletter", "ALetter"),
+ ("cr", "CR"),
+ ("doublequote", "Double_Quote"),
+ ("dq", "Double_Quote"),
+ ("eb", "E_Base"),
+ ("ebase", "E_Base"),
+ ("ebasegaz", "E_Base_GAZ"),
+ ("ebg", "E_Base_GAZ"),
+ ("em", "E_Modifier"),
+ ("emodifier", "E_Modifier"),
+ ("ex", "ExtendNumLet"),
+ ("extend", "Extend"),
+ ("extendnumlet", "ExtendNumLet"),
+ ("fo", "Format"),
+ ("format", "Format"),
+ ("gaz", "Glue_After_Zwj"),
+ ("glueafterzwj", "Glue_After_Zwj"),
+ ("hebrewletter", "Hebrew_Letter"),
+ ("hl", "Hebrew_Letter"),
+ ("ka", "Katakana"),
+ ("katakana", "Katakana"),
+ ("le", "ALetter"),
+ ("lf", "LF"),
+ ("mb", "MidNumLet"),
+ ("midletter", "MidLetter"),
+ ("midnum", "MidNum"),
+ ("midnumlet", "MidNumLet"),
+ ("ml", "MidLetter"),
+ ("mn", "MidNum"),
+ ("newline", "Newline"),
+ ("nl", "Newline"),
+ ("nu", "Numeric"),
+ ("numeric", "Numeric"),
+ ("other", "Other"),
+ ("regionalindicator", "Regional_Indicator"),
+ ("ri", "Regional_Indicator"),
+ ("singlequote", "Single_Quote"),
+ ("sq", "Single_Quote"),
+ ("wsegspace", "WSegSpace"),
+ ("xx", "Other"),
+ ("zwj", "ZWJ"),
+ ],
+ ),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/script.rs b/vendor/regex-syntax/src/unicode_tables/script.rs
new file mode 100644
index 000000000..cd86cba0d
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/script.rs
@@ -0,0 +1,1218 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate script ucd-13.0.0 --chars
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+ ("Adlam", ADLAM),
+ ("Ahom", AHOM),
+ ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS),
+ ("Arabic", ARABIC),
+ ("Armenian", ARMENIAN),
+ ("Avestan", AVESTAN),
+ ("Balinese", BALINESE),
+ ("Bamum", BAMUM),
+ ("Bassa_Vah", BASSA_VAH),
+ ("Batak", BATAK),
+ ("Bengali", BENGALI),
+ ("Bhaiksuki", BHAIKSUKI),
+ ("Bopomofo", BOPOMOFO),
+ ("Brahmi", BRAHMI),
+ ("Braille", BRAILLE),
+ ("Buginese", BUGINESE),
+ ("Buhid", BUHID),
+ ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
+ ("Carian", CARIAN),
+ ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
+ ("Chakma", CHAKMA),
+ ("Cham", CHAM),
+ ("Cherokee", CHEROKEE),
+ ("Chorasmian", CHORASMIAN),
+ ("Common", COMMON),
+ ("Coptic", COPTIC),
+ ("Cuneiform", CUNEIFORM),
+ ("Cypriot", CYPRIOT),
+ ("Cyrillic", CYRILLIC),
+ ("Deseret", DESERET),
+ ("Devanagari", DEVANAGARI),
+ ("Dives_Akuru", DIVES_AKURU),
+ ("Dogra", DOGRA),
+ ("Duployan", DUPLOYAN),
+ ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS),
+ ("Elbasan", ELBASAN),
+ ("Elymaic", ELYMAIC),
+ ("Ethiopic", ETHIOPIC),
+ ("Georgian", GEORGIAN),
+ ("Glagolitic", GLAGOLITIC),
+ ("Gothic", GOTHIC),
+ ("Grantha", GRANTHA),
+ ("Greek", GREEK),
+ ("Gujarati", GUJARATI),
+ ("Gunjala_Gondi", GUNJALA_GONDI),
+ ("Gurmukhi", GURMUKHI),
+ ("Han", HAN),
+ ("Hangul", HANGUL),
+ ("Hanifi_Rohingya", HANIFI_ROHINGYA),
+ ("Hanunoo", HANUNOO),
+ ("Hatran", HATRAN),
+ ("Hebrew", HEBREW),
+ ("Hiragana", HIRAGANA),
+ ("Imperial_Aramaic", IMPERIAL_ARAMAIC),
+ ("Inherited", INHERITED),
+ ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
+ ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN),
+ ("Javanese", JAVANESE),
+ ("Kaithi", KAITHI),
+ ("Kannada", KANNADA),
+ ("Katakana", KATAKANA),
+ ("Kayah_Li", KAYAH_LI),
+ ("Kharoshthi", KHAROSHTHI),
+ ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT),
+ ("Khmer", KHMER),
+ ("Khojki", KHOJKI),
+ ("Khudawadi", KHUDAWADI),
+ ("Lao", LAO),
+ ("Latin", LATIN),
+ ("Lepcha", LEPCHA),
+ ("Limbu", LIMBU),
+ ("Linear_A", LINEAR_A),
+ ("Linear_B", LINEAR_B),
+ ("Lisu", LISU),
+ ("Lycian", LYCIAN),
+ ("Lydian", LYDIAN),
+ ("Mahajani", MAHAJANI),
+ ("Makasar", MAKASAR),
+ ("Malayalam", MALAYALAM),
+ ("Mandaic", MANDAIC),
+ ("Manichaean", MANICHAEAN),
+ ("Marchen", MARCHEN),
+ ("Masaram_Gondi", MASARAM_GONDI),
+ ("Medefaidrin", MEDEFAIDRIN),
+ ("Meetei_Mayek", MEETEI_MAYEK),
+ ("Mende_Kikakui", MENDE_KIKAKUI),
+ ("Meroitic_Cursive", MEROITIC_CURSIVE),
+ ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS),
+ ("Miao", MIAO),
+ ("Modi", MODI),
+ ("Mongolian", MONGOLIAN),
+ ("Mro", MRO),
+ ("Multani", MULTANI),
+ ("Myanmar", MYANMAR),
+ ("Nabataean", NABATAEAN),
+ ("Nandinagari", NANDINAGARI),
+ ("New_Tai_Lue", NEW_TAI_LUE),
+ ("Newa", NEWA),
+ ("Nko", NKO),
+ ("Nushu", NUSHU),
+ ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG),
+ ("Ogham", OGHAM),
+ ("Ol_Chiki", OL_CHIKI),
+ ("Old_Hungarian", OLD_HUNGARIAN),
+ ("Old_Italic", OLD_ITALIC),
+ ("Old_North_Arabian", OLD_NORTH_ARABIAN),
+ ("Old_Permic", OLD_PERMIC),
+ ("Old_Persian", OLD_PERSIAN),
+ ("Old_Sogdian", OLD_SOGDIAN),
+ ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
+ ("Old_Turkic", OLD_TURKIC),
+ ("Oriya", ORIYA),
+ ("Osage", OSAGE),
+ ("Osmanya", OSMANYA),
+ ("Pahawh_Hmong", PAHAWH_HMONG),
+ ("Palmyrene", PALMYRENE),
+ ("Pau_Cin_Hau", PAU_CIN_HAU),
+ ("Phags_Pa", PHAGS_PA),
+ ("Phoenician", PHOENICIAN),
+ ("Psalter_Pahlavi", PSALTER_PAHLAVI),
+ ("Rejang", REJANG),
+ ("Runic", RUNIC),
+ ("Samaritan", SAMARITAN),
+ ("Saurashtra", SAURASHTRA),
+ ("Sharada", SHARADA),
+ ("Shavian", SHAVIAN),
+ ("Siddham", SIDDHAM),
+ ("SignWriting", SIGNWRITING),
+ ("Sinhala", SINHALA),
+ ("Sogdian", SOGDIAN),
+ ("Sora_Sompeng", SORA_SOMPENG),
+ ("Soyombo", SOYOMBO),
+ ("Sundanese", SUNDANESE),
+ ("Syloti_Nagri", SYLOTI_NAGRI),
+ ("Syriac", SYRIAC),
+ ("Tagalog", TAGALOG),
+ ("Tagbanwa", TAGBANWA),
+ ("Tai_Le", TAI_LE),
+ ("Tai_Tham", TAI_THAM),
+ ("Tai_Viet", TAI_VIET),
+ ("Takri", TAKRI),
+ ("Tamil", TAMIL),
+ ("Tangut", TANGUT),
+ ("Telugu", TELUGU),
+ ("Thaana", THAANA),
+ ("Thai", THAI),
+ ("Tibetan", TIBETAN),
+ ("Tifinagh", TIFINAGH),
+ ("Tirhuta", TIRHUTA),
+ ("Ugaritic", UGARITIC),
+ ("Vai", VAI),
+ ("Wancho", WANCHO),
+ ("Warang_Citi", WARANG_CITI),
+ ("Yezidi", YEZIDI),
+ ("Yi", YI),
+ ("Zanabazar_Square", ZANABAZAR_SQUARE),
+];
+
+pub const ADLAM: &'static [(char, char)] =
+ &[('𞤀', 'đžĨ‹'), ('đžĨ', 'đžĨ™'), ('đžĨž', 'đžĨŸ')];
+
+pub const AHOM: &'static [(char, char)] =
+ &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', 'đ‘œŋ')];
+
+pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')];
+
+pub const ARABIC: &'static [(char, char)] = &[
+ ('\u{600}', '\u{604}'),
+ ('؆', '؋'),
+ ('؍', '\u{61a}'),
+ ('\u{61c}', '\u{61c}'),
+ ('؞', '؞'),
+ ('Ø ', 'Øŋ'),
+ ('Ų', 'ŲŠ'),
+ ('\u{656}', 'Ų¯'),
+ ('Ųą', '\u{6dc}'),
+ ('۞', 'Ûŋ'),
+ ('Ũ', 'Ũŋ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', '\u{8ff}'),
+ ('ī­', 'ī¯'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇŊ'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('𐚠', '𐚞'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('đžģ°', 'đžģą'),
+];
+
+pub const ARMENIAN: &'static [(char, char)] =
+ &[('Ôą', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('īŦ“', 'īŦ—')];
+
+pub const AVESTAN: &'static [(char, char)] = &[('đŦ€', 'đŦĩ'), ('đŦš', 'đŦŋ')];
+
+pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭋ'), ('᭐', 'á­ŧ')];
+
+pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')];
+
+pub const BASSA_VAH: &'static [(char, char)] =
+ &[('đ–Ģ', 'đ–Ģ­'), ('\u{16af0}', 'đ–Ģĩ')];
+
+pub const BATAK: &'static [(char, char)] = &[('ᯀ', 'á¯ŗ'), ('á¯ŧ', 'á¯ŋ')];
+
+pub const BENGALI: &'static [(char, char)] = &[
+ ('āĻ€', 'āĻƒ'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('\u{9bc}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Ž'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', '\u{9e3}'),
+ ('ā§Ļ', '\u{9fe}'),
+];
+
+pub const BHAIKSUKI: &'static [(char, char)] =
+ &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', 'đ‘ąŦ')];
+
+pub const BOPOMOFO: &'static [(char, char)] =
+ &[('ËĒ', 'ËĢ'), ('ㄅ', 'ㄯ'), ('ㆠ', '\u{31bf}')];
+
+pub const BRAHMI: &'static [(char, char)] =
+ &[('𑀀', '𑁍'), ('𑁒', 'đ‘¯'), ('\u{1107f}', '\u{1107f}')];
+
+pub const BRAILLE: &'static [(char, char)] = &[('⠀', 'âŖŋ')];
+
+pub const BUGINESE: &'static [(char, char)] = &[('ᨀ', '\u{1a1b}'), ('᨞', '᨟')];
+
+pub const BUHID: &'static [(char, char)] = &[('ᝀ', '\u{1753}')];
+
+pub const CANADIAN_ABORIGINAL: &'static [(char, char)] =
+ &[('᐀', 'á™ŋ'), ('áĸ°', 'áŖĩ')];
+
+pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')];
+
+pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] =
+ &[('𐔰', 'đ•Ŗ'), ('đ•¯', 'đ•¯')];
+
+pub const CHAKMA: &'static [(char, char)] =
+ &[('\u{11100}', '\u{11134}'), ('đ‘„ļ', '\u{11147}')];
+
+pub const CHAM: &'static [(char, char)] =
+ &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')];
+
+pub const CHEROKEE: &'static [(char, char)] =
+ &[('Ꭰ', 'áĩ'), ('ᏸ', 'áŊ'), ('ę­°', 'ęŽŋ')];
+
+pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')];
+
+pub const COMMON: &'static [(char, char)] = &[
+ ('\u{0}', '@'),
+ ('[', '`'),
+ ('{', 'Š'),
+ ('ÂĢ', 'š'),
+ ('Âģ', 'Âŋ'),
+ ('×', '×'),
+ ('Ãˇ', 'Ãˇ'),
+ ('ʹ', '˟'),
+ ('ËĨ', 'ËŠ'),
+ ('ËŦ', 'Ëŋ'),
+ ('Í´', 'Í´'),
+ ('Íž', 'Íž'),
+ ('΅', '΅'),
+ ('·', '·'),
+ ('\u{605}', '\u{605}'),
+ ('،', '،'),
+ ('؛', '؛'),
+ ('؟', '؟'),
+ ('Ų€', 'Ų€'),
+ ('\u{6dd}', '\u{6dd}'),
+ ('\u{8e2}', '\u{8e2}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('ā¸ŋ', 'ā¸ŋ'),
+ ('āŋ•', 'āŋ˜'),
+ ('áƒģ', 'áƒģ'),
+ ('á›Ģ', '᛭'),
+ ('áœĩ', 'áœļ'),
+ ('᠂', '᠃'),
+ ('᠅', '᠅'),
+ ('áŗ“', 'áŗ“'),
+ ('áŗĄ', 'áŗĄ'),
+ ('áŗŠ', 'áŗŦ'),
+ ('áŗŽ', 'áŗŗ'),
+ ('áŗĩ', 'áŗˇ'),
+ ('áŗē', 'áŗē'),
+ ('\u{2000}', '\u{200b}'),
+ ('\u{200e}', '\u{2064}'),
+ ('\u{2066}', '⁰'),
+ ('⁴', '⁞'),
+ ('₀', '₎'),
+ ('₠', 'â‚ŋ'),
+ ('℀', 'â„Ĩ'),
+ ('℧', '℩'),
+ ('â„Ŧ', 'ℱ'),
+ ('â„ŗ', '⅍'),
+ ('⅏', '⅟'),
+ ('↉', '↋'),
+ ('←', 'âĻ'),
+ ('⑀', '⑊'),
+ ('①', 'âŸŋ'),
+ ('⤀', 'â­ŗ'),
+ ('â­ļ', '⮕'),
+ ('\u{2b97}', 'â¯ŋ'),
+ ('⸀', '\u{2e52}'),
+ ('âŋ°', 'âŋģ'),
+ ('\u{3000}', '〄'),
+ ('〆', '〆'),
+ ('〈', '〠'),
+ ('〰', 'ã€ˇ'),
+ ('ã€ŧ', 'ã€ŋ'),
+ ('゛', '゜'),
+ ('゠', '゠'),
+ ('ãƒģ', 'ãƒŧ'),
+ ('㆐', '㆟'),
+ ('㇀', 'ã‡Ŗ'),
+ ('㈠', '㉟'),
+ ('ã‰ŋ', '㋏'),
+ ('ã‹ŋ', 'ã‹ŋ'),
+ ('㍘', 'ãŋ'),
+ ('䷀', 'äˇŋ'),
+ ('꜀', '꜡'),
+ ('ꞈ', '꞊'),
+ ('ę °', 'ę š'),
+ ('ꤎ', 'ꤎ'),
+ ('ꧏ', 'ꧏ'),
+ ('꭛', '꭛'),
+ ('\u{ab6a}', '\u{ab6b}'),
+ ('ī´ž', 'ī´ŋ'),
+ ('ī¸', 'ī¸™'),
+ ('ī¸°', 'īš’'),
+ ('īš”', 'īšĻ'),
+ ('īš¨', 'īšĢ'),
+ ('\u{feff}', '\u{feff}'),
+ ('īŧ', 'īŧ '),
+ ('īŧģ', 'īŊ€'),
+ ('īŊ›', 'īŊĨ'),
+ ('īŊ°', 'īŊ°'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('īŋ ', 'īŋĻ'),
+ ('īŋ¨', 'īŋŽ'),
+ ('\u{fff9}', 'īŋŊ'),
+ ('𐄀', '𐄂'),
+ ('𐄇', 'đ„ŗ'),
+ ('𐄷', 'đ„ŋ'),
+ ('𐆐', '\u{1019c}'),
+ ('𐇐', 'đ‡ŧ'),
+ ('𐋡', 'đ‹ģ'),
+ ('đ–ŋĸ', 'đ–ŋŖ'),
+ ('\u{1bca0}', '\u{1bca3}'),
+ ('𝀀', 'đƒĩ'),
+ ('𝄀', 'đ„Ļ'),
+ ('𝄩', 'đ…Ļ'),
+ ('đ…Ē', '\u{1d17a}'),
+ ('𝆃', '𝆄'),
+ ('𝆌', '𝆩'),
+ ('𝆮', '𝇨'),
+ ('𝋠', 'đ‹ŗ'),
+ ('𝌀', '𝍖'),
+ ('𝍠', '𝍸'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝟋'),
+ ('𝟎', 'đŸŋ'),
+ ('𞱱', '𞲴'),
+ ('𞴁', 'đž´Ŋ'),
+ ('🀀', 'đŸ€Ģ'),
+ ('🀰', '🂓'),
+ ('🂠', '🂮'),
+ ('🂱', 'đŸ‚ŋ'),
+ ('🃁', '🃏'),
+ ('🃑', 'đŸƒĩ'),
+ ('🄀', '\u{1f1ad}'),
+ ('đŸ‡Ļ', 'đŸ‡ŋ'),
+ ('🈁', '🈂'),
+ ('🈐', 'đŸˆģ'),
+ ('🉀', '🉈'),
+ ('🉐', '🉑'),
+ ('🉠', 'đŸ‰Ĩ'),
+ ('🌀', '\u{1f6d7}'),
+ ('🛠', 'đŸ›Ŧ'),
+ ('🛰', '\u{1f6fc}'),
+ ('🜀', 'đŸŗ'),
+ ('🞀', '🟘'),
+ ('🟠', 'đŸŸĢ'),
+ ('🠀', '🠋'),
+ ('🠐', '🡇'),
+ ('🡐', '🡙'),
+ ('🡠', 'đŸĸ‡'),
+ ('đŸĸ', 'đŸĸ­'),
+ ('\u{1f8b0}', '\u{1f8b1}'),
+ ('🤀', '\u{1f978}'),
+ ('đŸĨē', '\u{1f9cb}'),
+ ('🧍', '🩓'),
+ ('🩠', '🩭'),
+ ('🩰', '\u{1fa74}'),
+ ('🩸', 'đŸŠē'),
+ ('đŸĒ€', '\u{1fa86}'),
+ ('đŸĒ', '\u{1faa8}'),
+ ('\u{1fab0}', '\u{1fab6}'),
+ ('\u{1fac0}', '\u{1fac2}'),
+ ('\u{1fad0}', '\u{1fad6}'),
+ ('\u{1fb00}', '\u{1fb92}'),
+ ('\u{1fb94}', '\u{1fbca}'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+ ('\u{e0001}', '\u{e0001}'),
+ ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const COPTIC: &'static [(char, char)] =
+ &[('Īĸ', 'Ī¯'), ('Ⲁ', 'âŗŗ'), ('âŗš', 'âŗŋ')];
+
+pub const CUNEIFORM: &'static [(char, char)] =
+ &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')];
+
+pub const CYPRIOT: &'static [(char, char)] =
+ &[('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', 'đ ĩ'), ('𐠡', '𐠸'), ('đ ŧ', 'đ ŧ'), ('đ ŋ', 'đ ŋ')];
+
+pub const CYRILLIC: &'static [(char, char)] = &[
+ ('Ѐ', '\u{484}'),
+ ('\u{487}', 'Ô¯'),
+ ('ᲀ', 'ᲈ'),
+ ('á´Ģ', 'á´Ģ'),
+ ('áĩ¸', 'áĩ¸'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('Ꙁ', '\u{a69f}'),
+ ('\u{fe2e}', '\u{fe2f}'),
+];
+
+pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')];
+
+pub const DEVANAGARI: &'static [(char, char)] = &[
+ ('\u{900}', 'āĨ'),
+ ('\u{955}', '\u{963}'),
+ ('āĨĻ', 'āĨŋ'),
+ ('\u{a8e0}', '\u{a8ff}'),
+];
+
+pub const DIVES_AKURU: &'static [(char, char)] = &[
+ ('\u{11900}', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{11946}'),
+ ('\u{11950}', '\u{11959}'),
+];
+
+pub const DOGRA: &'static [(char, char)] = &[('𑠀', 'đ‘ ģ')];
+
+pub const DUPLOYAN: &'static [(char, char)] =
+ &[('𛰀', 'đ›ąĒ'), ('𛱰', 'đ›ąŧ'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '𛲟')];
+
+pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] =
+ &[('𓀀', '𓐮'), ('\u{13430}', '\u{13438}')];
+
+pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')];
+
+pub const ELYMAIC: &'static [(char, char)] = &[('đŋ ', 'đŋļ')];
+
+pub const ETHIOPIC: &'static [(char, char)] = &[
+ ('ሀ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('\u{135d}', 'áŧ'),
+ ('ᎀ', '᎙'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+];
+
+pub const GEORGIAN: &'static [(char, char)] = &[
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'áƒŋ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+];
+
+pub const GLAGOLITIC: &'static [(char, char)] = &[
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+];
+
+pub const GOTHIC: &'static [(char, char)] = &[('𐌰', '𐍊')];
+
+pub const GRANTHA: &'static [(char, char)] = &[
+ ('\u{11300}', '𑌃'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('\u{1133c}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('𑍐', '𑍐'),
+ ('\u{11357}', '\u{11357}'),
+ ('𑍝', 'đ‘Ŗ'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+];
+
+pub const GREEK: &'static [(char, char)] = &[
+ ('Í°', 'Íŗ'),
+ ('Íĩ', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('΄', '΄'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'ĪĄ'),
+ ('Ī°', 'Īŋ'),
+ ('á´Ļ', 'á´Ē'),
+ ('áĩ', 'áĩĄ'),
+ ('áĩĻ', 'áĩĒ'),
+ ('áļŋ', 'áļŋ'),
+ ('áŧ€', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'áŋ„'),
+ ('áŋ†', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ', 'áŋ¯'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋž'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ę­Ĩ', 'ę­Ĩ'),
+ ('𐅀', '𐆎'),
+ ('𐆠', '𐆠'),
+ ('𝈀', '𝉅'),
+];
+
+pub const GUJARATI: &'static [(char, char)] = &[
+ ('\u{a81}', 'āĒƒ'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('\u{abc}', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', '\u{acd}'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', '\u{ae3}'),
+ ('āĢĻ', 'āĢą'),
+ ('āĢš', '\u{aff}'),
+];
+
+pub const GUNJALA_GONDI: &'static [(char, char)] = &[
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', 'đ‘ļ˜'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+];
+
+pub const GURMUKHI: &'static [(char, char)] = &[
+ ('\u{a01}', 'ā¨ƒ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠĻ', 'āŠļ'),
+];
+
+pub const HAN: &'static [(char, char)] = &[
+ ('âē€', 'âē™'),
+ ('âē›', 'âģŗ'),
+ ('âŧ€', 'âŋ•'),
+ ('々', '々'),
+ ('〇', '〇'),
+ ('ã€Ą', '《'),
+ ('〸', 'ã€ģ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const HANGUL: &'static [(char, char)] = &[
+ ('ᄀ', 'á‡ŋ'),
+ ('\u{302e}', '\u{302f}'),
+ ('ã„ą', 'ㆎ'),
+ ('㈀', '㈞'),
+ ('㉠', '㉞'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('īž ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+];
+
+pub const HANIFI_ROHINGYA: &'static [(char, char)] =
+ &[('𐴀', '\u{10d27}'), ('𐴰', '𐴹')];
+
+pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '\u{1734}')];
+
+pub const HATRAN: &'static [(char, char)] =
+ &[('đŖ ', 'đŖ˛'), ('đŖ´', 'đŖĩ'), ('đŖģ', 'đŖŋ')];
+
+pub const HEBREW: &'static [(char, char)] = &[
+ ('\u{591}', '\u{5c7}'),
+ ('א', '×Ē'),
+ ('ׯ', '״'),
+ ('īŦ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'ī­'),
+];
+
+pub const HIRAGANA: &'static [(char, char)] =
+ &[('ぁ', 'ゖ'), ('ゝ', 'ゟ'), ('𛀁', '𛄞'), ('𛅐', '𛅒'), ('🈀', '🈀')];
+
+pub const IMPERIAL_ARAMAIC: &'static [(char, char)] =
+ &[('𐡀', '𐡕'), ('𐡗', '𐡟')];
+
+pub const INHERITED: &'static [(char, char)] = &[
+ ('\u{300}', '\u{36f}'),
+ ('\u{485}', '\u{486}'),
+ ('\u{64b}', '\u{655}'),
+ ('\u{670}', '\u{670}'),
+ ('\u{951}', '\u{954}'),
+ ('\u{1ab0}', '\u{1ac0}'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', '\u{1ce0}'),
+ ('\u{1ce2}', '\u{1ce8}'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('\u{1cf8}', '\u{1cf9}'),
+ ('\u{1dc0}', '\u{1df9}'),
+ ('\u{1dfb}', '\u{1dff}'),
+ ('\u{200c}', '\u{200d}'),
+ ('\u{20d0}', '\u{20f0}'),
+ ('\u{302a}', '\u{302d}'),
+ ('\u{3099}', '\u{309a}'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2d}'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('\u{1133b}', '\u{1133b}'),
+ ('\u{1d167}', '\u{1d169}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] =
+ &[('𐭠', '𐭲'), ('𐭸', 'đ­ŋ')];
+
+pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] =
+ &[('𐭀', '𐭕'), ('𐭘', '𐭟')];
+
+pub const JAVANESE: &'static [(char, char)] =
+ &[('\u{a980}', '꧍'), ('꧐', '꧙'), ('꧞', '꧟')];
+
+pub const KAITHI: &'static [(char, char)] =
+ &[('\u{11080}', '𑃁'), ('\u{110cd}', '\u{110cd}')];
+
+pub const KANNADA: &'static [(char, char)] = &[
+ ('ā˛€', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('\u{cbc}', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', '\u{ce3}'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āŗą', 'āŗ˛'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŊ', 'ãƒŋ'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㋐', 'ã‹ž'),
+ ('㌀', '㍗'),
+ ('īŊĻ', 'īŊ¯'),
+ ('īŊą', 'īž'),
+ ('𛀀', '𛀀'),
+ ('𛅤', '𛅧'),
+];
+
+pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '\u{a92d}'), ('ę¤¯', 'ę¤¯')];
+
+pub const KHAROSHTHI: &'static [(char, char)] = &[
+ ('𐨀', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '𐊈'),
+ ('𐊐', '𐊘'),
+];
+
+pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] =
+ &[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')];
+
+pub const KHMER: &'static [(char, char)] =
+ &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', 'á§ŋ')];
+
+pub const KHOJKI: &'static [(char, char)] = &[('𑈀', '𑈑'), ('𑈓', '\u{1123e}')];
+
+pub const KHUDAWADI: &'static [(char, char)] =
+ &[('𑊰', '\u{112ea}'), ('𑋰', '𑋹')];
+
+pub const LAO: &'static [(char, char)] = &[
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('āģ', 'āģ™'),
+ ('āģœ', 'āģŸ'),
+];
+
+pub const LATIN: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ʸ'),
+ ('ˠ', 'ˤ'),
+ ('ᴀ', 'á´Ĩ'),
+ ('á´Ŧ', 'áĩœ'),
+ ('áĩĸ', 'áĩĨ'),
+ ('áĩĢ', 'áĩˇ'),
+ ('áĩš', 'áļž'),
+ ('Ḁ', 'áģŋ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('â„Ē', 'â„Ģ'),
+ ('Ⅎ', 'Ⅎ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'ↈ'),
+ ('âą ', 'âąŋ'),
+ ('ęœĸ', 'ꞇ'),
+ ('Ꞌ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ęŸŋ'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', 'ꭤ'),
+ ('ę­Ļ', '\u{ab69}'),
+ ('īŦ€', 'īŦ†'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+];
+
+pub const LEPCHA: &'static [(char, char)] =
+ &[('ᰀ', '\u{1c37}'), ('á°ģ', '᱉'), ('ᱍ', 'ᱏ')];
+
+pub const LIMBU: &'static [(char, char)] = &[
+ ('ᤀ', 'ᤞ'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', '\u{193b}'),
+ ('áĨ€', 'áĨ€'),
+ ('áĨ„', 'áĨ'),
+];
+
+pub const LINEAR_A: &'static [(char, char)] =
+ &[('𐘀', 'đœļ'), ('𐝀', '𐝕'), ('𐝠', '𐝧')];
+
+pub const LINEAR_B: &'static [(char, char)] = &[
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+];
+
+pub const LISU: &'static [(char, char)] =
+ &[('ꓐ', 'ę“ŋ'), ('\u{11fb0}', '\u{11fb0}')];
+
+pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')];
+
+pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤚'), ('đ¤ŋ', 'đ¤ŋ')];
+
+pub const MAHAJANI: &'static [(char, char)] = &[('𑅐', 'đ‘…ļ')];
+
+pub const MAKASAR: &'static [(char, char)] = &[('đ‘ģ ', 'đ‘ģ¸')];
+
+pub const MALAYALAM: &'static [(char, char)] = &[
+ ('\u{d00}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩ'),
+ ('āĩ”', '\u{d63}'),
+ ('āĩĻ', 'āĩŋ'),
+];
+
+pub const MANDAIC: &'static [(char, char)] = &[('āĄ€', '\u{85b}'), ('āĄž', 'āĄž')];
+
+pub const MANICHAEAN: &'static [(char, char)] =
+ &[('đĢ€', '\u{10ae6}'), ('đĢĢ', 'đĢļ')];
+
+pub const MARCHEN: &'static [(char, char)] =
+ &[('𑱰', '𑲏'), ('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}')];
+
+pub const MASARAM_GONDI: &'static [(char, char)] = &[
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d47}'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+];
+
+pub const MEDEFAIDRIN: &'static [(char, char)] = &[('𖹀', 'đ–ēš')];
+
+pub const MEETEI_MAYEK: &'static [(char, char)] =
+ &[('ęĢ ', '\u{aaf6}'), ('ę¯€', '\u{abed}'), ('ę¯°', 'ę¯š')];
+
+pub const MENDE_KIKAKUI: &'static [(char, char)] =
+ &[('𞠀', 'đžŖ„'), ('đžŖ‡', '\u{1e8d6}')];
+
+pub const MEROITIC_CURSIVE: &'static [(char, char)] =
+ &[('đĻ ', 'đĻˇ'), ('đĻŧ', '𐧏'), ('𐧒', 'đ§ŋ')];
+
+pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('đĻ€', 'đĻŸ')];
+
+pub const MIAO: &'static [(char, char)] =
+ &[('đ–ŧ€', 'đ–ŊŠ'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')];
+
+pub const MODI: &'static [(char, char)] = &[('𑘀', '𑙄'), ('𑙐', '𑙙')];
+
+pub const MONGOLIAN: &'static [(char, char)] = &[
+ ('᠀', '᠁'),
+ ('᠄', '᠄'),
+ ('᠆', '\u{180e}'),
+ ('᠐', '᠙'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸĒ'),
+ ('𑙠', 'đ‘™Ŧ'),
+];
+
+pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', 'đ–Š¯')];
+
+pub const MULTANI: &'static [(char, char)] =
+ &[('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')];
+
+pub const MYANMAR: &'static [(char, char)] =
+ &[('က', '႟'), ('ꧠ', '꧞'), ('ꊠ', 'ęŠŋ')];
+
+pub const NABATAEAN: &'static [(char, char)] = &[('đĸ€', 'đĸž'), ('đĸ§', 'đĸ¯')];
+
+pub const NANDINAGARI: &'static [(char, char)] =
+ &[('đ‘Ļ ', 'đ‘Ļ§'), ('đ‘ĻĒ', '\u{119d7}'), ('\u{119da}', '𑧤')];
+
+pub const NEW_TAI_LUE: &'static [(char, char)] =
+ &[('áĻ€', 'áĻĢ'), ('áĻ°', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')];
+
+pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '\u{11461}')];
+
+pub const NKO: &'static [(char, char)] = &[('߀', 'ßē'), ('\u{7fd}', 'ßŋ')];
+
+pub const NUSHU: &'static [(char, char)] = &[('đ–ŋĄ', 'đ–ŋĄ'), ('𛅰', 'đ›‹ģ')];
+
+pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] =
+ &[('𞄀', 'đž„Ŧ'), ('\u{1e130}', 'đž„Ŋ'), ('𞅀', '𞅉'), ('𞅎', '𞅏')];
+
+pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')];
+
+pub const OL_CHIKI: &'static [(char, char)] = &[('Ṑ', 'áąŋ')];
+
+pub const OLD_HUNGARIAN: &'static [(char, char)] =
+ &[('𐲀', '𐲲'), ('đŗ€', 'đŗ˛'), ('đŗē', 'đŗŋ')];
+
+pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', 'đŒŖ'), ('𐌭', 'đŒ¯')];
+
+pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('đĒ€', 'đĒŸ')];
+
+pub const OLD_PERMIC: &'static [(char, char)] = &[('𐍐', '\u{1037a}')];
+
+pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')];
+
+pub const OLD_SOGDIAN: &'static [(char, char)] = &[('đŧ€', 'đŧ§')];
+
+pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐊠', 'đŠŋ')];
+
+pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')];
+
+pub const ORIYA: &'static [(char, char)] = &[
+ ('\u{b01}', 'āŦƒ'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('\u{b3c}', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', '\u{b63}'),
+ ('ā­Ļ', 'ā­ˇ'),
+];
+
+pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', 'đ“ģ')];
+
+pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')];
+
+pub const PAHAWH_HMONG: &'static [(char, char)] =
+ &[('đ–Ŧ€', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('đ–­Ŗ', '𖭷'), ('đ–­Ŋ', '𖮏')];
+
+pub const PALMYRENE: &'static [(char, char)] = &[('𐥠', 'đĄŋ')];
+
+pub const PAU_CIN_HAU: &'static [(char, char)] = &[('đ‘Ģ€', 'đ‘Ģ¸')];
+
+pub const PHAGS_PA: &'static [(char, char)] = &[('ꡀ', '꡷')];
+
+pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')];
+
+pub const PSALTER_PAHLAVI: &'static [(char, char)] =
+ &[('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐎊', 'đŽ¯')];
+
+pub const REJANG: &'static [(char, char)] = &[('ꤰ', 'ęĨ“'), ('ęĨŸ', 'ęĨŸ')];
+
+pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'á›Ē'), ('ᛮ', 'ᛸ')];
+
+pub const SAMARITAN: &'static [(char, char)] = &[('ā €', '\u{82d}'), ('ā °', 'ā ž')];
+
+pub const SAURASHTRA: &'static [(char, char)] =
+ &[('ęĸ€', '\u{a8c5}'), ('ęŖŽ', 'ęŖ™')];
+
+pub const SHARADA: &'static [(char, char)] = &[('\u{11180}', '𑇟')];
+
+pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', 'đ‘ŋ')];
+
+pub const SIDDHAM: &'static [(char, char)] =
+ &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')];
+
+pub const SIGNWRITING: &'static [(char, char)] =
+ &[('𝠀', 'đĒ‹'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')];
+
+pub const SINHALA: &'static [(char, char)] = &[
+ ('\u{d81}', 'āļƒ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āˇ˛', 'āˇ´'),
+ ('𑇡', '𑇴'),
+];
+
+pub const SOGDIAN: &'static [(char, char)] = &[('đŧ°', 'đŊ™')];
+
+pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')];
+
+pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', 'đ‘Ēĸ')];
+
+pub const SUNDANESE: &'static [(char, char)] =
+ &[('\u{1b80}', 'áŽŋ'), ('áŗ€', 'áŗ‡')];
+
+pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ꠀ', '\u{a82c}')];
+
+pub const SYRIAC: &'static [(char, char)] =
+ &[('܀', '܍'), ('\u{70f}', '\u{74a}'), ('Ũ', 'Ũ'), ('āĄ ', 'āĄĒ')];
+
+pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', 'ᜌ'), ('ᜎ', '\u{1714}')];
+
+pub const TAGBANWA: &'static [(char, char)] =
+ &[('ᝠ', 'áŦ'), ('ᝎ', 'ᝰ'), ('\u{1772}', '\u{1773}')];
+
+pub const TAI_LE: &'static [(char, char)] = &[('áĨ', 'áĨ­'), ('áĨ°', 'áĨ´')];
+
+pub const TAI_THAM: &'static [(char, char)] = &[
+ ('ᨠ', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a7c}'),
+ ('\u{1a7f}', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('áĒ ', 'áĒ­'),
+];
+
+pub const TAI_VIET: &'static [(char, char)] = &[('ęĒ€', 'ęĢ‚'), ('ęĢ›', 'ęĢŸ')];
+
+pub const TAKRI: &'static [(char, char)] = &[('𑚀', '𑚸'), ('𑛀', '𑛉')];
+
+pub const TAMIL: &'static [(char, char)] = &[
+ ('\u{b82}', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', '\u{bcd}'),
+ ('ā¯', 'ā¯'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('ā¯Ļ', 'ā¯ē'),
+ ('đ‘ŋ€', 'đ‘ŋą'),
+ ('đ‘ŋŋ', 'đ‘ŋŋ'),
+];
+
+pub const TANGUT: &'static [(char, char)] = &[
+ ('đ–ŋ ', 'đ–ŋ '),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18aff}'),
+ ('\u{18d00}', '\u{18d08}'),
+];
+
+pub const TELUGU: &'static [(char, char)] = &[
+ ('\u{c00}', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('āą˜', 'āąš'),
+ ('āą ', '\u{c63}'),
+ ('āąĻ', 'āą¯'),
+ ('āąˇ', 'āąŋ'),
+];
+
+pub const THAANA: &'static [(char, char)] = &[('Ū€', 'Ūą')];
+
+pub const THAI: &'static [(char, char)] = &[('ā¸', '\u{e3a}'), ('āš€', 'āš›')];
+
+pub const TIBETAN: &'static [(char, char)] = &[
+ ('āŧ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('\u{f71}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('āžž', 'āŋŒ'),
+ ('āŋŽ', 'āŋ”'),
+ ('āŋ™', 'āŋš'),
+];
+
+pub const TIFINAGH: &'static [(char, char)] =
+ &[('â´°', 'âĩ§'), ('âĩ¯', 'âĩ°'), ('\u{2d7f}', '\u{2d7f}')];
+
+pub const TIRHUTA: &'static [(char, char)] = &[('𑒀', '𑓇'), ('𑓐', '𑓙')];
+
+pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')];
+
+pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ę˜Ģ')];
+
+pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('đž‹ŋ', 'đž‹ŋ')];
+
+pub const WARANG_CITI: &'static [(char, char)] = &[('đ‘ĸ ', 'đ‘Ŗ˛'), ('đ‘Ŗŋ', 'đ‘Ŗŋ')];
+
+pub const YEZIDI: &'static [(char, char)] = &[
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eab}', '\u{10ead}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+];
+
+pub const YI: &'static [(char, char)] = &[('ꀀ', 'ꒌ'), ('꒐', '꓆')];
+
+pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[('𑨀', '\u{11a47}')];
diff --git a/vendor/regex-syntax/src/unicode_tables/script_extension.rs b/vendor/regex-syntax/src/unicode_tables/script_extension.rs
new file mode 100644
index 000000000..7fca2af9d
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/script_extension.rs
@@ -0,0 +1,1396 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate script-extension ucd-13.0.0 --chars
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+ ("Adlam", ADLAM),
+ ("Ahom", AHOM),
+ ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS),
+ ("Arabic", ARABIC),
+ ("Armenian", ARMENIAN),
+ ("Avestan", AVESTAN),
+ ("Balinese", BALINESE),
+ ("Bamum", BAMUM),
+ ("Bassa_Vah", BASSA_VAH),
+ ("Batak", BATAK),
+ ("Bengali", BENGALI),
+ ("Bhaiksuki", BHAIKSUKI),
+ ("Bopomofo", BOPOMOFO),
+ ("Brahmi", BRAHMI),
+ ("Braille", BRAILLE),
+ ("Buginese", BUGINESE),
+ ("Buhid", BUHID),
+ ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
+ ("Carian", CARIAN),
+ ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
+ ("Chakma", CHAKMA),
+ ("Cham", CHAM),
+ ("Cherokee", CHEROKEE),
+ ("Chorasmian", CHORASMIAN),
+ ("Common", COMMON),
+ ("Coptic", COPTIC),
+ ("Cuneiform", CUNEIFORM),
+ ("Cypriot", CYPRIOT),
+ ("Cyrillic", CYRILLIC),
+ ("Deseret", DESERET),
+ ("Devanagari", DEVANAGARI),
+ ("Dives_Akuru", DIVES_AKURU),
+ ("Dogra", DOGRA),
+ ("Duployan", DUPLOYAN),
+ ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS),
+ ("Elbasan", ELBASAN),
+ ("Elymaic", ELYMAIC),
+ ("Ethiopic", ETHIOPIC),
+ ("Georgian", GEORGIAN),
+ ("Glagolitic", GLAGOLITIC),
+ ("Gothic", GOTHIC),
+ ("Grantha", GRANTHA),
+ ("Greek", GREEK),
+ ("Gujarati", GUJARATI),
+ ("Gunjala_Gondi", GUNJALA_GONDI),
+ ("Gurmukhi", GURMUKHI),
+ ("Han", HAN),
+ ("Hangul", HANGUL),
+ ("Hanifi_Rohingya", HANIFI_ROHINGYA),
+ ("Hanunoo", HANUNOO),
+ ("Hatran", HATRAN),
+ ("Hebrew", HEBREW),
+ ("Hiragana", HIRAGANA),
+ ("Imperial_Aramaic", IMPERIAL_ARAMAIC),
+ ("Inherited", INHERITED),
+ ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
+ ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN),
+ ("Javanese", JAVANESE),
+ ("Kaithi", KAITHI),
+ ("Kannada", KANNADA),
+ ("Katakana", KATAKANA),
+ ("Kayah_Li", KAYAH_LI),
+ ("Kharoshthi", KHAROSHTHI),
+ ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT),
+ ("Khmer", KHMER),
+ ("Khojki", KHOJKI),
+ ("Khudawadi", KHUDAWADI),
+ ("Lao", LAO),
+ ("Latin", LATIN),
+ ("Lepcha", LEPCHA),
+ ("Limbu", LIMBU),
+ ("Linear_A", LINEAR_A),
+ ("Linear_B", LINEAR_B),
+ ("Lisu", LISU),
+ ("Lycian", LYCIAN),
+ ("Lydian", LYDIAN),
+ ("Mahajani", MAHAJANI),
+ ("Makasar", MAKASAR),
+ ("Malayalam", MALAYALAM),
+ ("Mandaic", MANDAIC),
+ ("Manichaean", MANICHAEAN),
+ ("Marchen", MARCHEN),
+ ("Masaram_Gondi", MASARAM_GONDI),
+ ("Medefaidrin", MEDEFAIDRIN),
+ ("Meetei_Mayek", MEETEI_MAYEK),
+ ("Mende_Kikakui", MENDE_KIKAKUI),
+ ("Meroitic_Cursive", MEROITIC_CURSIVE),
+ ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS),
+ ("Miao", MIAO),
+ ("Modi", MODI),
+ ("Mongolian", MONGOLIAN),
+ ("Mro", MRO),
+ ("Multani", MULTANI),
+ ("Myanmar", MYANMAR),
+ ("Nabataean", NABATAEAN),
+ ("Nandinagari", NANDINAGARI),
+ ("New_Tai_Lue", NEW_TAI_LUE),
+ ("Newa", NEWA),
+ ("Nko", NKO),
+ ("Nushu", NUSHU),
+ ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG),
+ ("Ogham", OGHAM),
+ ("Ol_Chiki", OL_CHIKI),
+ ("Old_Hungarian", OLD_HUNGARIAN),
+ ("Old_Italic", OLD_ITALIC),
+ ("Old_North_Arabian", OLD_NORTH_ARABIAN),
+ ("Old_Permic", OLD_PERMIC),
+ ("Old_Persian", OLD_PERSIAN),
+ ("Old_Sogdian", OLD_SOGDIAN),
+ ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
+ ("Old_Turkic", OLD_TURKIC),
+ ("Oriya", ORIYA),
+ ("Osage", OSAGE),
+ ("Osmanya", OSMANYA),
+ ("Pahawh_Hmong", PAHAWH_HMONG),
+ ("Palmyrene", PALMYRENE),
+ ("Pau_Cin_Hau", PAU_CIN_HAU),
+ ("Phags_Pa", PHAGS_PA),
+ ("Phoenician", PHOENICIAN),
+ ("Psalter_Pahlavi", PSALTER_PAHLAVI),
+ ("Rejang", REJANG),
+ ("Runic", RUNIC),
+ ("Samaritan", SAMARITAN),
+ ("Saurashtra", SAURASHTRA),
+ ("Sharada", SHARADA),
+ ("Shavian", SHAVIAN),
+ ("Siddham", SIDDHAM),
+ ("SignWriting", SIGNWRITING),
+ ("Sinhala", SINHALA),
+ ("Sogdian", SOGDIAN),
+ ("Sora_Sompeng", SORA_SOMPENG),
+ ("Soyombo", SOYOMBO),
+ ("Sundanese", SUNDANESE),
+ ("Syloti_Nagri", SYLOTI_NAGRI),
+ ("Syriac", SYRIAC),
+ ("Tagalog", TAGALOG),
+ ("Tagbanwa", TAGBANWA),
+ ("Tai_Le", TAI_LE),
+ ("Tai_Tham", TAI_THAM),
+ ("Tai_Viet", TAI_VIET),
+ ("Takri", TAKRI),
+ ("Tamil", TAMIL),
+ ("Tangut", TANGUT),
+ ("Telugu", TELUGU),
+ ("Thaana", THAANA),
+ ("Thai", THAI),
+ ("Tibetan", TIBETAN),
+ ("Tifinagh", TIFINAGH),
+ ("Tirhuta", TIRHUTA),
+ ("Ugaritic", UGARITIC),
+ ("Vai", VAI),
+ ("Wancho", WANCHO),
+ ("Warang_Citi", WARANG_CITI),
+ ("Yezidi", YEZIDI),
+ ("Yi", YI),
+ ("Zanabazar_Square", ZANABAZAR_SQUARE),
+];
+
+pub const ADLAM: &'static [(char, char)] =
+ &[('Ų€', 'Ų€'), ('𞤀', 'đžĨ‹'), ('đžĨ', 'đžĨ™'), ('đžĨž', 'đžĨŸ')];
+
+pub const AHOM: &'static [(char, char)] =
+ &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', 'đ‘œŋ')];
+
+pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')];
+
+pub const ARABIC: &'static [(char, char)] = &[
+ ('\u{600}', '\u{604}'),
+ ('؆', '\u{61c}'),
+ ('؞', '\u{6dc}'),
+ ('۞', 'Ûŋ'),
+ ('Ũ', 'Ũŋ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', '\u{8ff}'),
+ ('ī­', 'ī¯'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇŊ'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('\u{102e0}', 'đ‹ģ'),
+ ('𐚠', '𐚞'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('đžģ°', 'đžģą'),
+];
+
+pub const ARMENIAN: &'static [(char, char)] =
+ &[('Ôą', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('īŦ“', 'īŦ—')];
+
+pub const AVESTAN: &'static [(char, char)] = &[('đŦ€', 'đŦĩ'), ('đŦš', 'đŦŋ')];
+
+pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭋ'), ('᭐', 'á­ŧ')];
+
+pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')];
+
+pub const BASSA_VAH: &'static [(char, char)] =
+ &[('đ–Ģ', 'đ–Ģ­'), ('\u{16af0}', 'đ–Ģĩ')];
+
+pub const BATAK: &'static [(char, char)] = &[('ᯀ', 'á¯ŗ'), ('á¯ŧ', 'á¯ŋ')];
+
+pub const BENGALI: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('āĻ€', 'āĻƒ'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('\u{9bc}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', 'ā§Ž'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', '\u{9e3}'),
+ ('ā§Ļ', '\u{9fe}'),
+ ('\u{1cd0}', '\u{1cd0}'),
+ ('\u{1cd2}', '\u{1cd2}'),
+ ('\u{1cd5}', '\u{1cd6}'),
+ ('\u{1cd8}', '\u{1cd8}'),
+ ('áŗĄ', 'áŗĄ'),
+ ('áŗĒ', 'áŗĒ'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('áŗ˛', 'áŗ˛'),
+ ('áŗĩ', 'áŗˇ'),
+ ('\u{a8f1}', '\u{a8f1}'),
+];
+
+pub const BHAIKSUKI: &'static [(char, char)] =
+ &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', 'đ‘ąŦ')];
+
+pub const BOPOMOFO: &'static [(char, char)] = &[
+ ('ËĒ', 'ËĢ'),
+ ('、', '〃'),
+ ('〈', '】'),
+ ('〓', '〟'),
+ ('\u{302a}', '\u{302d}'),
+ ('〰', '〰'),
+ ('ã€ˇ', 'ã€ˇ'),
+ ('ãƒģ', 'ãƒģ'),
+ ('ㄅ', 'ㄯ'),
+ ('ㆠ', '\u{31bf}'),
+ ('īš…', 'īš†'),
+ ('īŊĄ', 'īŊĨ'),
+];
+
+pub const BRAHMI: &'static [(char, char)] =
+ &[('𑀀', '𑁍'), ('𑁒', 'đ‘¯'), ('\u{1107f}', '\u{1107f}')];
+
+pub const BRAILLE: &'static [(char, char)] = &[('⠀', 'âŖŋ')];
+
+pub const BUGINESE: &'static [(char, char)] =
+ &[('ᨀ', '\u{1a1b}'), ('᨞', '᨟'), ('ꧏ', 'ꧏ')];
+
+pub const BUHID: &'static [(char, char)] = &[('áœĩ', 'áœļ'), ('ᝀ', '\u{1753}')];
+
+pub const CANADIAN_ABORIGINAL: &'static [(char, char)] =
+ &[('᐀', 'á™ŋ'), ('áĸ°', 'áŖĩ')];
+
+pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')];
+
+pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] =
+ &[('𐔰', 'đ•Ŗ'), ('đ•¯', 'đ•¯')];
+
+pub const CHAKMA: &'static [(char, char)] =
+ &[('ā§Ļ', 'ā§¯'), ('၀', '၉'), ('\u{11100}', '\u{11134}'), ('đ‘„ļ', '\u{11147}')];
+
+pub const CHAM: &'static [(char, char)] =
+ &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')];
+
+pub const CHEROKEE: &'static [(char, char)] =
+ &[('Ꭰ', 'áĩ'), ('ᏸ', 'áŊ'), ('ę­°', 'ęŽŋ')];
+
+pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')];
+
+pub const COMMON: &'static [(char, char)] = &[
+ ('\u{0}', '@'),
+ ('[', '`'),
+ ('{', 'Š'),
+ ('ÂĢ', 'š'),
+ ('Âģ', 'Âŋ'),
+ ('×', '×'),
+ ('Ãˇ', 'Ãˇ'),
+ ('ʹ', '˟'),
+ ('ËĨ', 'ËŠ'),
+ ('ËŦ', 'Ëŋ'),
+ ('Í´', 'Í´'),
+ ('Íž', 'Íž'),
+ ('΅', '΅'),
+ ('·', '·'),
+ ('\u{605}', '\u{605}'),
+ ('\u{6dd}', '\u{6dd}'),
+ ('\u{8e2}', '\u{8e2}'),
+ ('ā¸ŋ', 'ā¸ŋ'),
+ ('āŋ•', 'āŋ˜'),
+ ('á›Ģ', '᛭'),
+ ('\u{2000}', '\u{200b}'),
+ ('\u{200e}', '\u{202e}'),
+ ('‰', '\u{2064}'),
+ ('\u{2066}', '⁰'),
+ ('⁴', '⁞'),
+ ('₀', '₎'),
+ ('₠', 'â‚ŋ'),
+ ('℀', 'â„Ĩ'),
+ ('℧', '℩'),
+ ('â„Ŧ', 'ℱ'),
+ ('â„ŗ', '⅍'),
+ ('⅏', '⅟'),
+ ('↉', '↋'),
+ ('←', 'âĻ'),
+ ('⑀', '⑊'),
+ ('①', 'âŸŋ'),
+ ('⤀', 'â­ŗ'),
+ ('â­ļ', '⮕'),
+ ('\u{2b97}', 'â¯ŋ'),
+ ('⸀', '⹂'),
+ ('⹄', '\u{2e52}'),
+ ('âŋ°', 'âŋģ'),
+ ('\u{3000}', '\u{3000}'),
+ ('〄', '〄'),
+ ('〒', '〒'),
+ ('〠', '〠'),
+ ('ã€ļ', 'ã€ļ'),
+ ('㉈', '㉟'),
+ ('ã‰ŋ', 'ã‰ŋ'),
+ ('ãŠą', 'ãŠŋ'),
+ ('㋌', '㋏'),
+ ('ãą', 'ãē'),
+ ('㎀', '㏟'),
+ ('ãŋ', 'ãŋ'),
+ ('䷀', 'äˇŋ'),
+ ('꜈', '꜡'),
+ ('ꞈ', '꞊'),
+ ('꭛', '꭛'),
+ ('\u{ab6a}', '\u{ab6b}'),
+ ('ī´ž', 'ī´ŋ'),
+ ('ī¸', 'ī¸™'),
+ ('ī¸°', 'īš„'),
+ ('īš‡', 'īš’'),
+ ('īš”', 'īšĻ'),
+ ('īš¨', 'īšĢ'),
+ ('\u{feff}', '\u{feff}'),
+ ('īŧ', 'īŧ '),
+ ('īŧģ', 'īŊ€'),
+ ('īŊ›', 'īŊ '),
+ ('īŋ ', 'īŋĻ'),
+ ('īŋ¨', 'īŋŽ'),
+ ('\u{fff9}', 'īŋŊ'),
+ ('𐆐', '\u{1019c}'),
+ ('𐇐', 'đ‡ŧ'),
+ ('đ–ŋĸ', 'đ–ŋŖ'),
+ ('𝀀', 'đƒĩ'),
+ ('𝄀', 'đ„Ļ'),
+ ('𝄩', 'đ…Ļ'),
+ ('đ…Ē', '\u{1d17a}'),
+ ('𝆃', '𝆄'),
+ ('𝆌', '𝆩'),
+ ('𝆮', '𝇨'),
+ ('𝋠', 'đ‹ŗ'),
+ ('𝌀', '𝍖'),
+ ('𝍲', '𝍸'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝟋'),
+ ('𝟎', 'đŸŋ'),
+ ('𞱱', '𞲴'),
+ ('𞴁', 'đž´Ŋ'),
+ ('🀀', 'đŸ€Ģ'),
+ ('🀰', '🂓'),
+ ('🂠', '🂮'),
+ ('🂱', 'đŸ‚ŋ'),
+ ('🃁', '🃏'),
+ ('🃑', 'đŸƒĩ'),
+ ('🄀', '\u{1f1ad}'),
+ ('đŸ‡Ļ', 'đŸ‡ŋ'),
+ ('🈁', '🈂'),
+ ('🈐', 'đŸˆģ'),
+ ('🉀', '🉈'),
+ ('🉠', 'đŸ‰Ĩ'),
+ ('🌀', '\u{1f6d7}'),
+ ('🛠', 'đŸ›Ŧ'),
+ ('🛰', '\u{1f6fc}'),
+ ('🜀', 'đŸŗ'),
+ ('🞀', '🟘'),
+ ('🟠', 'đŸŸĢ'),
+ ('🠀', '🠋'),
+ ('🠐', '🡇'),
+ ('🡐', '🡙'),
+ ('🡠', 'đŸĸ‡'),
+ ('đŸĸ', 'đŸĸ­'),
+ ('\u{1f8b0}', '\u{1f8b1}'),
+ ('🤀', '\u{1f978}'),
+ ('đŸĨē', '\u{1f9cb}'),
+ ('🧍', '🩓'),
+ ('🩠', '🩭'),
+ ('🩰', '\u{1fa74}'),
+ ('🩸', 'đŸŠē'),
+ ('đŸĒ€', '\u{1fa86}'),
+ ('đŸĒ', '\u{1faa8}'),
+ ('\u{1fab0}', '\u{1fab6}'),
+ ('\u{1fac0}', '\u{1fac2}'),
+ ('\u{1fad0}', '\u{1fad6}'),
+ ('\u{1fb00}', '\u{1fb92}'),
+ ('\u{1fb94}', '\u{1fbca}'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+ ('\u{e0001}', '\u{e0001}'),
+ ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const COPTIC: &'static [(char, char)] =
+ &[('Īĸ', 'Ī¯'), ('Ⲁ', 'âŗŗ'), ('âŗš', 'âŗŋ'), ('\u{102e0}', 'đ‹ģ')];
+
+pub const CUNEIFORM: &'static [(char, char)] =
+ &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')];
+
+pub const CYPRIOT: &'static [(char, char)] = &[
+ ('𐄀', '𐄂'),
+ ('𐄇', 'đ„ŗ'),
+ ('𐄷', 'đ„ŋ'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', 'đ ŋ'),
+];
+
+pub const CYRILLIC: &'static [(char, char)] = &[
+ ('Ѐ', 'Ô¯'),
+ ('ᲀ', 'ᲈ'),
+ ('á´Ģ', 'á´Ģ'),
+ ('áĩ¸', 'áĩ¸'),
+ ('\u{1df8}', '\u{1df8}'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('⚃', '⚃'),
+ ('Ꙁ', '\u{a69f}'),
+ ('\u{fe2e}', '\u{fe2f}'),
+];
+
+pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')];
+
+pub const DEVANAGARI: &'static [(char, char)] = &[
+ ('\u{900}', '\u{952}'),
+ ('\u{955}', 'āĨŋ'),
+ ('\u{1cd0}', 'áŗļ'),
+ ('\u{1cf8}', '\u{1cf9}'),
+ ('\u{20f0}', '\u{20f0}'),
+ ('ę °', 'ę š'),
+ ('\u{a8e0}', '\u{a8ff}'),
+];
+
+pub const DIVES_AKURU: &'static [(char, char)] = &[
+ ('\u{11900}', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{11946}'),
+ ('\u{11950}', '\u{11959}'),
+];
+
+pub const DOGRA: &'static [(char, char)] =
+ &[('āĨ¤', 'āĨ¯'), ('ę °', 'ę š'), ('𑠀', 'đ‘ ģ')];
+
+pub const DUPLOYAN: &'static [(char, char)] =
+ &[('𛰀', 'đ›ąĒ'), ('𛱰', 'đ›ąŧ'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '\u{1bca3}')];
+
+pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] =
+ &[('𓀀', '𓐮'), ('\u{13430}', '\u{13438}')];
+
+pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')];
+
+pub const ELYMAIC: &'static [(char, char)] = &[('đŋ ', 'đŋļ')];
+
+pub const ETHIOPIC: &'static [(char, char)] = &[
+ ('ሀ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('\u{135d}', 'áŧ'),
+ ('ᎀ', '᎙'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+];
+
+pub const GEORGIAN: &'static [(char, char)] = &[
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒŋ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+];
+
+pub const GLAGOLITIC: &'static [(char, char)] = &[
+ ('\u{484}', '\u{484}'),
+ ('\u{487}', '\u{487}'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('⚃', '⚃'),
+ ('\u{a66f}', '\u{a66f}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+];
+
+pub const GOTHIC: &'static [(char, char)] = &[('𐌰', '𐍊')];
+
+pub const GRANTHA: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('ā¯Ļ', 'ā¯ŗ'),
+ ('\u{1cd0}', '\u{1cd0}'),
+ ('\u{1cd2}', 'áŗ“'),
+ ('áŗ˛', '\u{1cf4}'),
+ ('\u{1cf8}', '\u{1cf9}'),
+ ('\u{20f0}', '\u{20f0}'),
+ ('\u{11300}', '𑌃'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('\u{1133b}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('𑍐', '𑍐'),
+ ('\u{11357}', '\u{11357}'),
+ ('𑍝', 'đ‘Ŗ'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('đ‘ŋ', 'đ‘ŋ‘'),
+ ('đ‘ŋ“', 'đ‘ŋ“'),
+];
+
+pub const GREEK: &'static [(char, char)] = &[
+ ('\u{342}', '\u{342}'),
+ ('\u{345}', '\u{345}'),
+ ('Í°', 'Íŗ'),
+ ('Íĩ', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('΄', '΄'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'ĪĄ'),
+ ('Ī°', 'Īŋ'),
+ ('á´Ļ', 'á´Ē'),
+ ('áĩ', 'áĩĄ'),
+ ('áĩĻ', 'áĩĒ'),
+ ('áļŋ', '\u{1dc1}'),
+ ('áŧ€', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'áŋ„'),
+ ('áŋ†', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ', 'áŋ¯'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋž'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ę­Ĩ', 'ę­Ĩ'),
+ ('𐅀', '𐆎'),
+ ('𐆠', '𐆠'),
+ ('𝈀', '𝉅'),
+];
+
+pub const GUJARATI: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('\u{a81}', 'āĒƒ'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('\u{abc}', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', '\u{acd}'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', '\u{ae3}'),
+ ('āĢĻ', 'āĢą'),
+ ('āĢš', '\u{aff}'),
+ ('ę °', 'ę š'),
+];
+
+pub const GUNJALA_GONDI: &'static [(char, char)] = &[
+ ('āĨ¤', 'āĨĨ'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', 'đ‘ļ˜'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+];
+
+pub const GURMUKHI: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('\u{a01}', 'ā¨ƒ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠĻ', 'āŠļ'),
+ ('ę °', 'ę š'),
+];
+
+pub const HAN: &'static [(char, char)] = &[
+ ('âē€', 'âē™'),
+ ('âē›', 'âģŗ'),
+ ('âŧ€', 'âŋ•'),
+ ('、', '〃'),
+ ('々', '】'),
+ ('〓', '〟'),
+ ('ã€Ą', '\u{302d}'),
+ ('〰', '〰'),
+ ('ã€ˇ', 'ã€ŋ'),
+ ('ãƒģ', 'ãƒģ'),
+ ('㆐', '㆟'),
+ ('㇀', 'ã‡Ŗ'),
+ ('㈠', '㉇'),
+ ('㊀', '㊰'),
+ ('㋀', '㋋'),
+ ('ã‹ŋ', 'ã‹ŋ'),
+ ('㍘', '㍰'),
+ ('ãģ', 'ãŋ'),
+ ('㏠', '㏞'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('꜀', '꜇'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īš…', 'īš†'),
+ ('īŊĄ', 'īŊĨ'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('𝍠', '𝍱'),
+ ('🉐', '🉑'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const HANGUL: &'static [(char, char)] = &[
+ ('ᄀ', 'á‡ŋ'),
+ ('、', '〃'),
+ ('〈', '】'),
+ ('〓', '〟'),
+ ('\u{302e}', '〰'),
+ ('ã€ˇ', 'ã€ˇ'),
+ ('ãƒģ', 'ãƒģ'),
+ ('ã„ą', 'ㆎ'),
+ ('㈀', '㈞'),
+ ('㉠', '㉞'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('īš…', 'īš†'),
+ ('īŊĄ', 'īŊĨ'),
+ ('īž ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+];
+
+pub const HANIFI_ROHINGYA: &'static [(char, char)] = &[
+ ('،', '،'),
+ ('؛', '؛'),
+ ('؟', '؟'),
+ ('Ų€', 'Ų€'),
+ ('۔', '۔'),
+ ('𐴀', '\u{10d27}'),
+ ('𐴰', '𐴚'),
+];
+
+pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', 'áœļ')];
+
+pub const HATRAN: &'static [(char, char)] =
+ &[('đŖ ', 'đŖ˛'), ('đŖ´', 'đŖĩ'), ('đŖģ', 'đŖŋ')];
+
+pub const HEBREW: &'static [(char, char)] = &[
+ ('\u{591}', '\u{5c7}'),
+ ('א', '×Ē'),
+ ('ׯ', '״'),
+ ('īŦ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'ī­'),
+];
+
+pub const HIRAGANA: &'static [(char, char)] = &[
+ ('、', '〃'),
+ ('〈', '】'),
+ ('〓', '〟'),
+ ('〰', 'ã€ĩ'),
+ ('ã€ˇ', 'ã€ˇ'),
+ ('ã€ŧ', 'ã€Ŋ'),
+ ('ぁ', 'ゖ'),
+ ('\u{3099}', '゠'),
+ ('ãƒģ', 'ãƒŧ'),
+ ('īš…', 'īš†'),
+ ('īŊĄ', 'īŊĨ'),
+ ('īŊ°', 'īŊ°'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('𛀁', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('🈀', '🈀'),
+];
+
+pub const IMPERIAL_ARAMAIC: &'static [(char, char)] =
+ &[('𐡀', '𐡕'), ('𐡗', '𐡟')];
+
+pub const INHERITED: &'static [(char, char)] = &[
+ ('\u{300}', '\u{341}'),
+ ('\u{343}', '\u{344}'),
+ ('\u{346}', '\u{362}'),
+ ('\u{953}', '\u{954}'),
+ ('\u{1ab0}', '\u{1ac0}'),
+ ('\u{1dc2}', '\u{1df7}'),
+ ('\u{1df9}', '\u{1df9}'),
+ ('\u{1dfb}', '\u{1dff}'),
+ ('\u{200c}', '\u{200d}'),
+ ('\u{20d0}', '\u{20ef}'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2d}'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('\u{1d167}', '\u{1d169}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] =
+ &[('𐭠', '𐭲'), ('𐭸', 'đ­ŋ')];
+
+pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] =
+ &[('𐭀', '𐭕'), ('𐭘', '𐭟')];
+
+pub const JAVANESE: &'static [(char, char)] =
+ &[('\u{a980}', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟')];
+
+pub const KAITHI: &'static [(char, char)] =
+ &[('āĨĻ', 'āĨ¯'), ('ę °', 'ę š'), ('\u{11080}', '𑃁'), ('\u{110cd}', '\u{110cd}')];
+
+pub const KANNADA: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('ā˛€', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('\u{cbc}', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', '\u{ce3}'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{1cd0}', '\u{1cd0}'),
+ ('\u{1cd2}', '\u{1cd2}'),
+ ('\u{1cda}', '\u{1cda}'),
+ ('áŗ˛', 'áŗ˛'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('ę °', 'ę ĩ'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+ ('、', '〃'),
+ ('〈', '】'),
+ ('〓', '〟'),
+ ('〰', 'ã€ĩ'),
+ ('ã€ˇ', 'ã€ˇ'),
+ ('ã€ŧ', 'ã€Ŋ'),
+ ('\u{3099}', '゜'),
+ ('゠', 'ãƒŋ'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㋐', 'ã‹ž'),
+ ('㌀', '㍗'),
+ ('īš…', 'īš†'),
+ ('īŊĄ', '\u{ff9f}'),
+ ('𛀀', '𛀀'),
+ ('𛅤', '𛅧'),
+];
+
+pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', 'ę¤¯')];
+
+pub const KHAROSHTHI: &'static [(char, char)] = &[
+ ('𐨀', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '𐊈'),
+ ('𐊐', '𐊘'),
+];
+
+pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] =
+ &[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')];
+
+pub const KHMER: &'static [(char, char)] =
+ &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', 'á§ŋ')];
+
+pub const KHOJKI: &'static [(char, char)] =
+ &[('āĢĻ', 'āĢ¯'), ('ę °', 'ę š'), ('𑈀', '𑈑'), ('𑈓', '\u{1123e}')];
+
+pub const KHUDAWADI: &'static [(char, char)] =
+ &[('āĨ¤', 'āĨĨ'), ('ę °', 'ę š'), ('𑊰', '\u{112ea}'), ('𑋰', '𑋹')];
+
+pub const LAO: &'static [(char, char)] = &[
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('āģ', 'āģ™'),
+ ('āģœ', 'āģŸ'),
+];
+
+pub const LATIN: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', 'ʸ'),
+ ('ˠ', 'ˤ'),
+ ('\u{363}', '\u{36f}'),
+ ('\u{485}', '\u{486}'),
+ ('\u{951}', '\u{952}'),
+ ('áƒģ', 'áƒģ'),
+ ('ᴀ', 'á´Ĩ'),
+ ('á´Ŧ', 'áĩœ'),
+ ('áĩĸ', 'áĩĨ'),
+ ('áĩĢ', 'áĩˇ'),
+ ('áĩš', 'áļž'),
+ ('Ḁ', 'áģŋ'),
+ ('\u{202f}', '\u{202f}'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('\u{20f0}', '\u{20f0}'),
+ ('â„Ē', 'â„Ģ'),
+ ('Ⅎ', 'Ⅎ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'ↈ'),
+ ('âą ', 'âąŋ'),
+ ('꜀', '꜇'),
+ ('ęœĸ', 'ꞇ'),
+ ('Ꞌ', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ęŸŋ'),
+ ('ꤎ', 'ꤎ'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', 'ꭤ'),
+ ('ę­Ļ', '\u{ab69}'),
+ ('īŦ€', 'īŦ†'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+];
+
+pub const LEPCHA: &'static [(char, char)] =
+ &[('ᰀ', '\u{1c37}'), ('á°ģ', '᱉'), ('ᱍ', 'ᱏ')];
+
+pub const LIMBU: &'static [(char, char)] = &[
+ ('āĨĨ', 'āĨĨ'),
+ ('ᤀ', 'ᤞ'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', '\u{193b}'),
+ ('áĨ€', 'áĨ€'),
+ ('áĨ„', 'áĨ'),
+];
+
+pub const LINEAR_A: &'static [(char, char)] =
+ &[('𐄇', 'đ„ŗ'), ('𐘀', 'đœļ'), ('𐝀', '𐝕'), ('𐝠', '𐝧')];
+
+pub const LINEAR_B: &'static [(char, char)] = &[
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐄀', '𐄂'),
+ ('𐄇', 'đ„ŗ'),
+ ('𐄷', 'đ„ŋ'),
+];
+
+pub const LISU: &'static [(char, char)] =
+ &[('ꓐ', 'ę“ŋ'), ('\u{11fb0}', '\u{11fb0}')];
+
+pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')];
+
+pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤚'), ('đ¤ŋ', 'đ¤ŋ')];
+
+pub const MAHAJANI: &'static [(char, char)] =
+ &[('āĨ¤', 'āĨ¯'), ('ę °', 'ę š'), ('𑅐', 'đ‘…ļ')];
+
+pub const MAKASAR: &'static [(char, char)] = &[('đ‘ģ ', 'đ‘ģ¸')];
+
+pub const MALAYALAM: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('\u{d00}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', 'āĩ'),
+ ('āĩ”', '\u{d63}'),
+ ('āĩĻ', 'āĩŋ'),
+ ('\u{1cda}', '\u{1cda}'),
+ ('ę °', 'ę ˛'),
+];
+
+pub const MANDAIC: &'static [(char, char)] =
+ &[('Ų€', 'Ų€'), ('āĄ€', '\u{85b}'), ('āĄž', 'āĄž')];
+
+pub const MANICHAEAN: &'static [(char, char)] =
+ &[('Ų€', 'Ų€'), ('đĢ€', '\u{10ae6}'), ('đĢĢ', 'đĢļ')];
+
+pub const MARCHEN: &'static [(char, char)] =
+ &[('𑱰', '𑲏'), ('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}')];
+
+pub const MASARAM_GONDI: &'static [(char, char)] = &[
+ ('āĨ¤', 'āĨĨ'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d47}'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+];
+
+pub const MEDEFAIDRIN: &'static [(char, char)] = &[('𖹀', 'đ–ēš')];
+
+pub const MEETEI_MAYEK: &'static [(char, char)] =
+ &[('ęĢ ', '\u{aaf6}'), ('ę¯€', '\u{abed}'), ('ę¯°', 'ę¯š')];
+
+pub const MENDE_KIKAKUI: &'static [(char, char)] =
+ &[('𞠀', 'đžŖ„'), ('đžŖ‡', '\u{1e8d6}')];
+
+pub const MEROITIC_CURSIVE: &'static [(char, char)] =
+ &[('đĻ ', 'đĻˇ'), ('đĻŧ', '𐧏'), ('𐧒', 'đ§ŋ')];
+
+pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('đĻ€', 'đĻŸ')];
+
+pub const MIAO: &'static [(char, char)] =
+ &[('đ–ŧ€', 'đ–ŊŠ'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')];
+
+pub const MODI: &'static [(char, char)] =
+ &[('꠰', '꠹'), ('𑘀', '𑙄'), ('𑙐', '𑙙')];
+
+pub const MONGOLIAN: &'static [(char, char)] = &[
+ ('᠀', '\u{180e}'),
+ ('᠐', '᠙'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸĒ'),
+ ('\u{202f}', '\u{202f}'),
+ ('𑙠', 'đ‘™Ŧ'),
+];
+
+pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', 'đ–Š¯')];
+
+pub const MULTANI: &'static [(char, char)] =
+ &[('āŠĻ', 'āŠ¯'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')];
+
+pub const MYANMAR: &'static [(char, char)] =
+ &[('က', '႟'), ('ꤎ', 'ꤎ'), ('ꧠ', '꧞'), ('ꊠ', 'ęŠŋ')];
+
+pub const NABATAEAN: &'static [(char, char)] = &[('đĸ€', 'đĸž'), ('đĸ§', 'đĸ¯')];
+
+pub const NANDINAGARI: &'static [(char, char)] = &[
+ ('āĨ¤', 'āĨĨ'),
+ ('āŗĻ', 'āŗ¯'),
+ ('áŗŠ', 'áŗŠ'),
+ ('áŗ˛', 'áŗ˛'),
+ ('áŗē', 'áŗē'),
+ ('ę °', 'ę ĩ'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '\u{119d7}'),
+ ('\u{119da}', '𑧤'),
+];
+
+pub const NEW_TAI_LUE: &'static [(char, char)] =
+ &[('áĻ€', 'áĻĢ'), ('áĻ°', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')];
+
+pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '\u{11461}')];
+
+pub const NKO: &'static [(char, char)] = &[('߀', 'ßē'), ('\u{7fd}', 'ßŋ')];
+
+pub const NUSHU: &'static [(char, char)] = &[('đ–ŋĄ', 'đ–ŋĄ'), ('𛅰', 'đ›‹ģ')];
+
+pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] =
+ &[('𞄀', 'đž„Ŧ'), ('\u{1e130}', 'đž„Ŋ'), ('𞅀', '𞅉'), ('𞅎', '𞅏')];
+
+pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')];
+
+pub const OL_CHIKI: &'static [(char, char)] = &[('Ṑ', 'áąŋ')];
+
+pub const OLD_HUNGARIAN: &'static [(char, char)] =
+ &[('𐲀', '𐲲'), ('đŗ€', 'đŗ˛'), ('đŗē', 'đŗŋ')];
+
+pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', 'đŒŖ'), ('𐌭', 'đŒ¯')];
+
+pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('đĒ€', 'đĒŸ')];
+
+pub const OLD_PERMIC: &'static [(char, char)] =
+ &[('\u{483}', '\u{483}'), ('𐍐', '\u{1037a}')];
+
+pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')];
+
+pub const OLD_SOGDIAN: &'static [(char, char)] = &[('đŧ€', 'đŧ§')];
+
+pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐊠', 'đŠŋ')];
+
+pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')];
+
+pub const ORIYA: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('\u{b01}', 'āŦƒ'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('\u{b3c}', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', '\u{b63}'),
+ ('ā­Ļ', 'ā­ˇ'),
+ ('\u{1cda}', '\u{1cda}'),
+ ('áŗ˛', 'áŗ˛'),
+];
+
+pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', 'đ“ģ')];
+
+pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')];
+
+pub const PAHAWH_HMONG: &'static [(char, char)] =
+ &[('đ–Ŧ€', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('đ–­Ŗ', '𖭷'), ('đ–­Ŋ', '𖮏')];
+
+pub const PALMYRENE: &'static [(char, char)] = &[('𐥠', 'đĄŋ')];
+
+pub const PAU_CIN_HAU: &'static [(char, char)] = &[('đ‘Ģ€', 'đ‘Ģ¸')];
+
+pub const PHAGS_PA: &'static [(char, char)] =
+ &[('᠂', '᠃'), ('᠅', '᠅'), ('ꡀ', '꡷')];
+
+pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')];
+
+pub const PSALTER_PAHLAVI: &'static [(char, char)] =
+ &[('Ų€', 'Ų€'), ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐎊', 'đŽ¯')];
+
+pub const REJANG: &'static [(char, char)] = &[('ꤰ', 'ęĨ“'), ('ęĨŸ', 'ęĨŸ')];
+
+pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'á›Ē'), ('ᛮ', 'ᛸ')];
+
+pub const SAMARITAN: &'static [(char, char)] = &[('ā €', '\u{82d}'), ('ā °', 'ā ž')];
+
+pub const SAURASHTRA: &'static [(char, char)] =
+ &[('ęĸ€', '\u{a8c5}'), ('ęŖŽ', 'ęŖ™')];
+
+pub const SHARADA: &'static [(char, char)] = &[
+ ('\u{951}', '\u{951}'),
+ ('\u{1cd7}', '\u{1cd7}'),
+ ('\u{1cd9}', '\u{1cd9}'),
+ ('\u{1cdc}', '\u{1cdd}'),
+ ('\u{1ce0}', '\u{1ce0}'),
+ ('\u{11180}', '𑇟'),
+];
+
+pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', 'đ‘ŋ')];
+
+pub const SIDDHAM: &'static [(char, char)] =
+ &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')];
+
+pub const SIGNWRITING: &'static [(char, char)] =
+ &[('𝠀', 'đĒ‹'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')];
+
+pub const SINHALA: &'static [(char, char)] = &[
+ ('āĨ¤', 'āĨĨ'),
+ ('\u{d81}', 'āļƒ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āˇ˛', 'āˇ´'),
+ ('𑇡', '𑇴'),
+];
+
+pub const SOGDIAN: &'static [(char, char)] = &[('Ų€', 'Ų€'), ('đŧ°', 'đŊ™')];
+
+pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')];
+
+pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', 'đ‘Ēĸ')];
+
+pub const SUNDANESE: &'static [(char, char)] =
+ &[('\u{1b80}', 'áŽŋ'), ('áŗ€', 'áŗ‡')];
+
+pub const SYLOTI_NAGRI: &'static [(char, char)] =
+ &[('āĨ¤', 'āĨĨ'), ('ā§Ļ', 'ā§¯'), ('ꠀ', '\u{a82c}')];
+
+pub const SYRIAC: &'static [(char, char)] = &[
+ ('،', '،'),
+ ('؛', '\u{61c}'),
+ ('؟', '؟'),
+ ('Ų€', 'Ų€'),
+ ('\u{64b}', '\u{655}'),
+ ('\u{670}', '\u{670}'),
+ ('܀', '܍'),
+ ('\u{70f}', '\u{74a}'),
+ ('Ũ', 'Ũ'),
+ ('āĄ ', 'āĄĒ'),
+ ('\u{1df8}', '\u{1df8}'),
+];
+
+pub const TAGALOG: &'static [(char, char)] =
+ &[('ᜀ', 'ᜌ'), ('ᜎ', '\u{1714}'), ('áœĩ', 'áœļ')];
+
+pub const TAGBANWA: &'static [(char, char)] =
+ &[('áœĩ', 'áœļ'), ('ᝠ', 'áŦ'), ('ᝎ', 'ᝰ'), ('\u{1772}', '\u{1773}')];
+
+pub const TAI_LE: &'static [(char, char)] =
+ &[('၀', '၉'), ('áĨ', 'áĨ­'), ('áĨ°', 'áĨ´')];
+
+pub const TAI_THAM: &'static [(char, char)] = &[
+ ('ᨠ', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a7c}'),
+ ('\u{1a7f}', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('áĒ ', 'áĒ­'),
+];
+
+pub const TAI_VIET: &'static [(char, char)] = &[('ęĒ€', 'ęĢ‚'), ('ęĢ›', 'ęĢŸ')];
+
+pub const TAKRI: &'static [(char, char)] =
+ &[('āĨ¤', 'āĨĨ'), ('ę °', 'ę š'), ('𑚀', '𑚸'), ('𑛀', '𑛉')];
+
+pub const TAMIL: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('\u{b82}', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', '\u{bcd}'),
+ ('ā¯', 'ā¯'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('ā¯Ļ', 'ā¯ē'),
+ ('\u{1cda}', '\u{1cda}'),
+ ('ęŖŗ', 'ęŖŗ'),
+ ('\u{11301}', '\u{11301}'),
+ ('𑌃', '𑌃'),
+ ('\u{1133b}', '\u{1133c}'),
+ ('đ‘ŋ€', 'đ‘ŋą'),
+ ('đ‘ŋŋ', 'đ‘ŋŋ'),
+];
+
+pub const TANGUT: &'static [(char, char)] = &[
+ ('đ–ŋ ', 'đ–ŋ '),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18aff}'),
+ ('\u{18d00}', '\u{18d08}'),
+];
+
+pub const TELUGU: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('\u{c00}', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('āą˜', 'āąš'),
+ ('āą ', '\u{c63}'),
+ ('āąĻ', 'āą¯'),
+ ('āąˇ', 'āąŋ'),
+ ('\u{1cda}', '\u{1cda}'),
+ ('áŗ˛', 'áŗ˛'),
+];
+
+pub const THAANA: &'static [(char, char)] = &[
+ ('،', '،'),
+ ('؛', '\u{61c}'),
+ ('؟', '؟'),
+ ('Ų ', 'ŲŠ'),
+ ('Ū€', 'Ūą'),
+ ('īˇ˛', 'īˇ˛'),
+ ('īˇŊ', 'īˇŊ'),
+];
+
+pub const THAI: &'static [(char, char)] = &[('ā¸', '\u{e3a}'), ('āš€', 'āš›')];
+
+pub const TIBETAN: &'static [(char, char)] = &[
+ ('āŧ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('\u{f71}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('āžž', 'āŋŒ'),
+ ('āŋŽ', 'āŋ”'),
+ ('āŋ™', 'āŋš'),
+];
+
+pub const TIFINAGH: &'static [(char, char)] =
+ &[('â´°', 'âĩ§'), ('âĩ¯', 'âĩ°'), ('\u{2d7f}', '\u{2d7f}')];
+
+pub const TIRHUTA: &'static [(char, char)] = &[
+ ('\u{951}', '\u{952}'),
+ ('āĨ¤', 'āĨĨ'),
+ ('áŗ˛', 'áŗ˛'),
+ ('ę °', 'ę š'),
+ ('𑒀', '𑓇'),
+ ('𑓐', '𑓙'),
+];
+
+pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')];
+
+pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ę˜Ģ')];
+
+pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('đž‹ŋ', 'đž‹ŋ')];
+
+pub const WARANG_CITI: &'static [(char, char)] = &[('đ‘ĸ ', 'đ‘Ŗ˛'), ('đ‘Ŗŋ', 'đ‘Ŗŋ')];
+
+pub const YEZIDI: &'static [(char, char)] = &[
+ ('،', '،'),
+ ('؛', '؛'),
+ ('؟', '؟'),
+ ('Ų ', 'ŲŠ'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eab}', '\u{10ead}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+];
+
+pub const YI: &'static [(char, char)] = &[
+ ('、', '。'),
+ ('〈', '】'),
+ ('〔', '〛'),
+ ('ãƒģ', 'ãƒģ'),
+ ('ꀀ', 'ꒌ'),
+ ('꒐', '꓆'),
+ ('īŊĄ', 'īŊĨ'),
+];
+
+pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[('𑨀', '\u{11a47}')];
diff --git a/vendor/regex-syntax/src/unicode_tables/sentence_break.rs b/vendor/regex-syntax/src/unicode_tables/sentence_break.rs
new file mode 100644
index 000000000..67d830f74
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/sentence_break.rs
@@ -0,0 +1,2396 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate sentence-break ucd-13.0.0 --chars
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+ ("ATerm", ATERM),
+ ("CR", CR),
+ ("Close", CLOSE),
+ ("Extend", EXTEND),
+ ("Format", FORMAT),
+ ("LF", LF),
+ ("Lower", LOWER),
+ ("Numeric", NUMERIC),
+ ("OLetter", OLETTER),
+ ("SContinue", SCONTINUE),
+ ("STerm", STERM),
+ ("Sep", SEP),
+ ("Sp", SP),
+ ("Upper", UPPER),
+];
+
+pub const ATERM: &'static [(char, char)] =
+ &[('.', '.'), ('․', '․'), ('īš’', 'īš’'), ('īŧŽ', 'īŧŽ')];
+
+pub const CR: &'static [(char, char)] = &[('\r', '\r')];
+
+pub const CLOSE: &'static [(char, char)] = &[
+ ('\"', '\"'),
+ ('\'', ')'),
+ ('[', '['),
+ (']', ']'),
+ ('{', '{'),
+ ('}', '}'),
+ ('ÂĢ', 'ÂĢ'),
+ ('Âģ', 'Âģ'),
+ ('āŧē', 'āŧŊ'),
+ ('᚛', '᚜'),
+ ('‘', '‟'),
+ ('‹', 'â€ē'),
+ ('⁅', '⁆'),
+ ('âŊ', '⁞'),
+ ('₍', '₎'),
+ ('⌈', '⌋'),
+ ('〈', 'âŒĒ'),
+ ('❛', '❠'),
+ ('❨', 'âĩ'),
+ ('⟅', '⟆'),
+ ('âŸĻ', '⟯'),
+ ('âĻƒ', 'âĻ˜'),
+ ('⧘', '⧛'),
+ ('â§ŧ', 'â§Ŋ'),
+ ('⸀', '⸍'),
+ ('⸜', '⸝'),
+ ('⸠', '⸊'),
+ ('⹂', '⹂'),
+ ('〈', '】'),
+ ('〔', '〛'),
+ ('〝', '〟'),
+ ('ī´ž', 'ī´ŋ'),
+ ('ī¸—', 'ī¸˜'),
+ ('ī¸ĩ', 'īš„'),
+ ('īš‡', 'īšˆ'),
+ ('īš™', 'īšž'),
+ ('īŧˆ', 'īŧ‰'),
+ ('īŧģ', 'īŧģ'),
+ ('īŧŊ', 'īŧŊ'),
+ ('īŊ›', 'īŊ›'),
+ ('īŊ', 'īŊ'),
+ ('īŊŸ', 'īŊ '),
+ ('īŊĸ', 'īŊŖ'),
+ ('đŸ™ļ', '🙸'),
+];
+
+pub const EXTEND: &'static [(char, char)] = &[
+ ('\u{300}', '\u{36f}'),
+ ('\u{483}', '\u{489}'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('\u{610}', '\u{61a}'),
+ ('\u{64b}', '\u{65f}'),
+ ('\u{670}', '\u{670}'),
+ ('\u{6d6}', '\u{6dc}'),
+ ('\u{6df}', '\u{6e4}'),
+ ('\u{6e7}', '\u{6e8}'),
+ ('\u{6ea}', '\u{6ed}'),
+ ('\u{711}', '\u{711}'),
+ ('\u{730}', '\u{74a}'),
+ ('\u{7a6}', '\u{7b0}'),
+ ('\u{7eb}', '\u{7f3}'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('\u{816}', '\u{819}'),
+ ('\u{81b}', '\u{823}'),
+ ('\u{825}', '\u{827}'),
+ ('\u{829}', '\u{82d}'),
+ ('\u{859}', '\u{85b}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', 'ā¤ƒ'),
+ ('\u{93a}', '\u{93c}'),
+ ('ā¤ž', 'āĨ'),
+ ('\u{951}', '\u{957}'),
+ ('\u{962}', '\u{963}'),
+ ('\u{981}', 'āĻƒ'),
+ ('\u{9bc}', '\u{9bc}'),
+ ('\u{9be}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', '\u{9cd}'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('\u{9e2}', '\u{9e3}'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', 'ā¨ƒ'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('\u{a70}', '\u{a71}'),
+ ('\u{a75}', '\u{a75}'),
+ ('\u{a81}', 'āĒƒ'),
+ ('\u{abc}', '\u{abc}'),
+ ('āĒž', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', '\u{acd}'),
+ ('\u{ae2}', '\u{ae3}'),
+ ('\u{afa}', '\u{aff}'),
+ ('\u{b01}', 'āŦƒ'),
+ ('\u{b3c}', '\u{b3c}'),
+ ('\u{b3e}', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('\u{b62}', '\u{b63}'),
+ ('\u{b82}', '\u{b82}'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', '\u{bcd}'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('\u{c00}', '\u{c04}'),
+ ('\u{c3e}', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('\u{c62}', '\u{c63}'),
+ ('\u{c81}', 'ā˛ƒ'),
+ ('\u{cbc}', '\u{cbc}'),
+ ('ā˛ž', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('\u{ce2}', '\u{ce3}'),
+ ('\u{d00}', 'ā´ƒ'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('\u{d3e}', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', '\u{d4d}'),
+ ('\u{d57}', '\u{d57}'),
+ ('\u{d62}', '\u{d63}'),
+ ('\u{d81}', 'āļƒ'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇ˛', 'āˇŗ'),
+ ('\u{e31}', '\u{e31}'),
+ ('\u{e34}', '\u{e3a}'),
+ ('\u{e47}', '\u{e4e}'),
+ ('\u{eb1}', '\u{eb1}'),
+ ('\u{eb4}', '\u{ebc}'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('\u{f18}', '\u{f19}'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('āŧž', 'āŧŋ'),
+ ('\u{f71}', '\u{f84}'),
+ ('\u{f86}', '\u{f87}'),
+ ('\u{f8d}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('á€Ģ', '\u{103e}'),
+ ('ၖ', '\u{1059}'),
+ ('\u{105e}', '\u{1060}'),
+ ('áĸ', 'ၤ'),
+ ('ၧ', 'ၭ'),
+ ('\u{1071}', '\u{1074}'),
+ ('\u{1082}', '\u{108d}'),
+ ('ႏ', 'ႏ'),
+ ('ႚ', '\u{109d}'),
+ ('\u{135d}', '\u{135f}'),
+ ('\u{1712}', '\u{1714}'),
+ ('\u{1732}', '\u{1734}'),
+ ('\u{1752}', '\u{1753}'),
+ ('\u{1772}', '\u{1773}'),
+ ('\u{17b4}', '\u{17d3}'),
+ ('\u{17dd}', '\u{17dd}'),
+ ('\u{180b}', '\u{180d}'),
+ ('\u{1885}', '\u{1886}'),
+ ('\u{18a9}', '\u{18a9}'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', '\u{193b}'),
+ ('\u{1a17}', '\u{1a1b}'),
+ ('ᩕ', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a7c}'),
+ ('\u{1a7f}', '\u{1a7f}'),
+ ('\u{1ab0}', '\u{1ac0}'),
+ ('\u{1b00}', 'áŦ„'),
+ ('\u{1b34}', '᭄'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', 'ᮂ'),
+ ('Ꭵ', '\u{1bad}'),
+ ('\u{1be6}', 'á¯ŗ'),
+ ('á°¤', '\u{1c37}'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', '\u{1ce8}'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('áŗˇ', '\u{1cf9}'),
+ ('\u{1dc0}', '\u{1df9}'),
+ ('\u{1dfb}', '\u{1dff}'),
+ ('\u{200c}', '\u{200d}'),
+ ('\u{20d0}', '\u{20f0}'),
+ ('\u{2cef}', '\u{2cf1}'),
+ ('\u{2d7f}', '\u{2d7f}'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('\u{302a}', '\u{302f}'),
+ ('\u{3099}', '\u{309a}'),
+ ('\u{a66f}', '\u{a672}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('\u{a69e}', '\u{a69f}'),
+ ('\u{a6f0}', '\u{a6f1}'),
+ ('\u{a802}', '\u{a802}'),
+ ('\u{a806}', '\u{a806}'),
+ ('\u{a80b}', '\u{a80b}'),
+ ('ę Ŗ', 'ę §'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('ęĸ€', 'ęĸ'),
+ ('ęĸ´', '\u{a8c5}'),
+ ('\u{a8e0}', '\u{a8f1}'),
+ ('\u{a8ff}', '\u{a8ff}'),
+ ('\u{a926}', '\u{a92d}'),
+ ('\u{a947}', 'ęĨ“'),
+ ('\u{a980}', 'ęĻƒ'),
+ ('\u{a9b3}', '꧀'),
+ ('\u{a9e5}', '\u{a9e5}'),
+ ('\u{aa29}', '\u{aa36}'),
+ ('\u{aa43}', '\u{aa43}'),
+ ('\u{aa4c}', 'ꩍ'),
+ ('ęŠģ', 'ęŠŊ'),
+ ('\u{aab0}', '\u{aab0}'),
+ ('\u{aab2}', '\u{aab4}'),
+ ('\u{aab7}', '\u{aab8}'),
+ ('\u{aabe}', '\u{aabf}'),
+ ('\u{aac1}', '\u{aac1}'),
+ ('ęĢĢ', 'ęĢ¯'),
+ ('ęĢĩ', '\u{aaf6}'),
+ ('ę¯Ŗ', 'ę¯Ē'),
+ ('ę¯Ŧ', '\u{abed}'),
+ ('\u{fb1e}', '\u{fb1e}'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('\u{10376}', '\u{1037a}'),
+ ('\u{10a01}', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '\u{10a0f}'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('\u{10ae5}', '\u{10ae6}'),
+ ('\u{10d24}', '\u{10d27}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10f46}', '\u{10f50}'),
+ ('𑀀', '𑀂'),
+ ('\u{11038}', '\u{11046}'),
+ ('\u{1107f}', '𑂂'),
+ ('𑂰', '\u{110ba}'),
+ ('\u{11100}', '\u{11102}'),
+ ('\u{11127}', '\u{11134}'),
+ ('𑅅', '𑅆'),
+ ('\u{11173}', '\u{11173}'),
+ ('\u{11180}', '𑆂'),
+ ('đ‘†ŗ', '𑇀'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111ce}', '\u{111cf}'),
+ ('đ‘ˆŦ', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('\u{112df}', '\u{112ea}'),
+ ('\u{11300}', '𑌃'),
+ ('\u{1133b}', '\u{1133c}'),
+ ('\u{1133e}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('\u{11357}', '\u{11357}'),
+ ('đ‘ĸ', 'đ‘Ŗ'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('đ‘ĩ', '\u{11446}'),
+ ('\u{1145e}', '\u{1145e}'),
+ ('\u{114b0}', '\u{114c3}'),
+ ('\u{115af}', '\u{115b5}'),
+ ('𑖸', '\u{115c0}'),
+ ('\u{115dc}', '\u{115dd}'),
+ ('𑘰', '\u{11640}'),
+ ('\u{116ab}', '\u{116b7}'),
+ ('\u{1171d}', '\u{1172b}'),
+ ('đ‘ Ŧ', '\u{1183a}'),
+ ('\u{11930}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{1193e}'),
+ ('\u{11940}', '\u{11940}'),
+ ('\u{11942}', '\u{11943}'),
+ ('𑧑', '\u{119d7}'),
+ ('\u{119da}', '\u{119e0}'),
+ ('𑧤', '𑧤'),
+ ('\u{11a01}', '\u{11a0a}'),
+ ('\u{11a33}', '𑨹'),
+ ('\u{11a3b}', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('\u{11a51}', '\u{11a5b}'),
+ ('\u{11a8a}', '\u{11a99}'),
+ ('đ‘°¯', '\u{11c36}'),
+ ('\u{11c38}', '\u{11c3f}'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('𑲩', '\u{11cb6}'),
+ ('\u{11d31}', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d45}'),
+ ('\u{11d47}', '\u{11d47}'),
+ ('đ‘ļŠ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', '\u{11d97}'),
+ ('\u{11ef3}', 'đ‘ģļ'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('\u{16b30}', '\u{16b36}'),
+ ('\u{16f4f}', '\u{16f4f}'),
+ ('đ–Ŋ‘', '𖾇'),
+ ('\u{16f8f}', '\u{16f92}'),
+ ('\u{16fe4}', '\u{16fe4}'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1d165}', '\u{1d169}'),
+ ('𝅭', '\u{1d172}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('\u{1e130}', '\u{1e136}'),
+ ('\u{1e2ec}', '\u{1e2ef}'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('\u{1e944}', '\u{1e94a}'),
+ ('\u{e0020}', '\u{e007f}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const FORMAT: &'static [(char, char)] = &[
+ ('\u{ad}', '\u{ad}'),
+ ('\u{600}', '\u{605}'),
+ ('\u{61c}', '\u{61c}'),
+ ('\u{6dd}', '\u{6dd}'),
+ ('\u{70f}', '\u{70f}'),
+ ('\u{8e2}', '\u{8e2}'),
+ ('\u{180e}', '\u{180e}'),
+ ('\u{200b}', '\u{200b}'),
+ ('\u{200e}', '\u{200f}'),
+ ('\u{202a}', '\u{202e}'),
+ ('\u{2060}', '\u{2064}'),
+ ('\u{2066}', '\u{206f}'),
+ ('\u{feff}', '\u{feff}'),
+ ('\u{fff9}', '\u{fffb}'),
+ ('\u{110bd}', '\u{110bd}'),
+ ('\u{110cd}', '\u{110cd}'),
+ ('\u{13430}', '\u{13438}'),
+ ('\u{1bca0}', '\u{1bca3}'),
+ ('\u{1d173}', '\u{1d17a}'),
+ ('\u{e0001}', '\u{e0001}'),
+];
+
+pub const LF: &'static [(char, char)] = &[('\n', '\n')];
+
+pub const LOWER: &'static [(char, char)] = &[
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('Âē', 'Âē'),
+ ('ß', 'Ãļ'),
+ ('ø', 'Ãŋ'),
+ ('ā', 'ā'),
+ ('ă', 'ă'),
+ ('ą', 'ą'),
+ ('ć', 'ć'),
+ ('ĉ', 'ĉ'),
+ ('ċ', 'ċ'),
+ ('č', 'č'),
+ ('ď', 'ď'),
+ ('đ', 'đ'),
+ ('ē', 'ē'),
+ ('ĕ', 'ĕ'),
+ ('ė', 'ė'),
+ ('ę', 'ę'),
+ ('ě', 'ě'),
+ ('ĝ', 'ĝ'),
+ ('ğ', 'ğ'),
+ ('ÄĄ', 'ÄĄ'),
+ ('ÄŖ', 'ÄŖ'),
+ ('ÄĨ', 'ÄĨ'),
+ ('ħ', 'ħ'),
+ ('ÄŠ', 'ÄŠ'),
+ ('ÄĢ', 'ÄĢ'),
+ ('Ä­', 'Ä­'),
+ ('į', 'į'),
+ ('Äą', 'Äą'),
+ ('Äŗ', 'Äŗ'),
+ ('Äĩ', 'Äĩ'),
+ ('ġ', 'ĸ'),
+ ('Äē', 'Äē'),
+ ('Äŧ', 'Äŧ'),
+ ('Äž', 'Äž'),
+ ('ŀ', 'ŀ'),
+ ('ł', 'ł'),
+ ('ń', 'ń'),
+ ('ņ', 'ņ'),
+ ('ň', 'ʼn'),
+ ('ŋ', 'ŋ'),
+ ('ō', 'ō'),
+ ('ŏ', 'ŏ'),
+ ('ő', 'ő'),
+ ('œ', 'œ'),
+ ('ŕ', 'ŕ'),
+ ('ŗ', 'ŗ'),
+ ('ř', 'ř'),
+ ('ś', 'ś'),
+ ('ŝ', 'ŝ'),
+ ('ş', 'ş'),
+ ('ÅĄ', 'ÅĄ'),
+ ('ÅŖ', 'ÅŖ'),
+ ('ÅĨ', 'ÅĨ'),
+ ('ŧ', 'ŧ'),
+ ('ÅŠ', 'ÅŠ'),
+ ('ÅĢ', 'ÅĢ'),
+ ('Å­', 'Å­'),
+ ('ů', 'ů'),
+ ('Åą', 'Åą'),
+ ('Åŗ', 'Åŗ'),
+ ('Åĩ', 'Åĩ'),
+ ('Åˇ', 'Åˇ'),
+ ('Åē', 'Åē'),
+ ('Åŧ', 'Åŧ'),
+ ('Åž', 'ƀ'),
+ ('ƃ', 'ƃ'),
+ ('ƅ', 'ƅ'),
+ ('ƈ', 'ƈ'),
+ ('ƌ', 'ƍ'),
+ ('ƒ', 'ƒ'),
+ ('ƕ', 'ƕ'),
+ ('ƙ', 'ƛ'),
+ ('ƞ', 'ƞ'),
+ ('ÆĄ', 'ÆĄ'),
+ ('ÆŖ', 'ÆŖ'),
+ ('ÆĨ', 'ÆĨ'),
+ ('ƨ', 'ƨ'),
+ ('ÆĒ', 'ÆĢ'),
+ ('Æ­', 'Æ­'),
+ ('Æ°', 'Æ°'),
+ ('Æ´', 'Æ´'),
+ ('Æļ', 'Æļ'),
+ ('Æš', 'Æē'),
+ ('ÆŊ', 'Æŋ'),
+ ('Į†', 'Į†'),
+ ('Į‰', 'Į‰'),
+ ('ĮŒ', 'ĮŒ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Į', 'Į'),
+ ('Į’', 'Į’'),
+ ('Į”', 'Į”'),
+ ('Į–', 'Į–'),
+ ('Į˜', 'Į˜'),
+ ('Įš', 'Įš'),
+ ('Įœ', 'Į'),
+ ('ĮŸ', 'ĮŸ'),
+ ('ĮĄ', 'ĮĄ'),
+ ('ĮŖ', 'ĮŖ'),
+ ('ĮĨ', 'ĮĨ'),
+ ('Į§', 'Į§'),
+ ('ĮŠ', 'ĮŠ'),
+ ('ĮĢ', 'ĮĢ'),
+ ('Į­', 'Į­'),
+ ('Į¯', 'Į°'),
+ ('Įŗ', 'Įŗ'),
+ ('Įĩ', 'Įĩ'),
+ ('Įš', 'Įš'),
+ ('Įģ', 'Įģ'),
+ ('ĮŊ', 'ĮŊ'),
+ ('Įŋ', 'Įŋ'),
+ ('ȁ', 'ȁ'),
+ ('ȃ', 'ȃ'),
+ ('ȅ', 'ȅ'),
+ ('ȇ', 'ȇ'),
+ ('ȉ', 'ȉ'),
+ ('ȋ', 'ȋ'),
+ ('ȍ', 'ȍ'),
+ ('ȏ', 'ȏ'),
+ ('ȑ', 'ȑ'),
+ ('ȓ', 'ȓ'),
+ ('ȕ', 'ȕ'),
+ ('ȗ', 'ȗ'),
+ ('ș', 'ș'),
+ ('ț', 'ț'),
+ ('ȝ', 'ȝ'),
+ ('ȟ', 'ȟ'),
+ ('ČĄ', 'ČĄ'),
+ ('ČŖ', 'ČŖ'),
+ ('ČĨ', 'ČĨ'),
+ ('ȧ', 'ȧ'),
+ ('ČŠ', 'ČŠ'),
+ ('ČĢ', 'ČĢ'),
+ ('Č­', 'Č­'),
+ ('Č¯', 'Č¯'),
+ ('Čą', 'Čą'),
+ ('Čŗ', 'Čš'),
+ ('Čŧ', 'Čŧ'),
+ ('Čŋ', 'ɀ'),
+ ('ɂ', 'ɂ'),
+ ('ɇ', 'ɇ'),
+ ('ɉ', 'ɉ'),
+ ('ɋ', 'ɋ'),
+ ('ɍ', 'ɍ'),
+ ('ɏ', 'ʓ'),
+ ('ʕ', 'ʸ'),
+ ('ˀ', 'ˁ'),
+ ('ˠ', 'ˤ'),
+ ('Íą', 'Íą'),
+ ('Íŗ', 'Íŗ'),
+ ('͡', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('ΐ', 'ΐ'),
+ ('ÎŦ', 'ĪŽ'),
+ ('Ī', 'Ī‘'),
+ ('Ī•', 'Ī—'),
+ ('Ī™', 'Ī™'),
+ ('Ī›', 'Ī›'),
+ ('Ī', 'Ī'),
+ ('ĪŸ', 'ĪŸ'),
+ ('ĪĄ', 'ĪĄ'),
+ ('ĪŖ', 'ĪŖ'),
+ ('ĪĨ', 'ĪĨ'),
+ ('Ī§', 'Ī§'),
+ ('ĪŠ', 'ĪŠ'),
+ ('ĪĢ', 'ĪĢ'),
+ ('Ī­', 'Ī­'),
+ ('Ī¯', 'Īŗ'),
+ ('Īĩ', 'Īĩ'),
+ ('Ī¸', 'Ī¸'),
+ ('Īģ', 'Īŧ'),
+ ('Đ°', 'ŅŸ'),
+ ('ŅĄ', 'ŅĄ'),
+ ('ŅŖ', 'ŅŖ'),
+ ('ŅĨ', 'ŅĨ'),
+ ('Ņ§', 'Ņ§'),
+ ('ŅŠ', 'ŅŠ'),
+ ('ŅĢ', 'ŅĢ'),
+ ('Ņ­', 'Ņ­'),
+ ('Ņ¯', 'Ņ¯'),
+ ('Ņą', 'Ņą'),
+ ('Ņŗ', 'Ņŗ'),
+ ('Ņĩ', 'Ņĩ'),
+ ('Ņˇ', 'Ņˇ'),
+ ('Ņš', 'Ņš'),
+ ('Ņģ', 'Ņģ'),
+ ('ŅŊ', 'ŅŊ'),
+ ('Ņŋ', 'Ņŋ'),
+ ('Ō', 'Ō'),
+ ('Ō‹', 'Ō‹'),
+ ('Ō', 'Ō'),
+ ('Ō', 'Ō'),
+ ('Ō‘', 'Ō‘'),
+ ('Ō“', 'Ō“'),
+ ('Ō•', 'Ō•'),
+ ('Ō—', 'Ō—'),
+ ('Ō™', 'Ō™'),
+ ('Ō›', 'Ō›'),
+ ('Ō', 'Ō'),
+ ('ŌŸ', 'ŌŸ'),
+ ('ŌĄ', 'ŌĄ'),
+ ('ŌŖ', 'ŌŖ'),
+ ('ŌĨ', 'ŌĨ'),
+ ('Ō§', 'Ō§'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌĢ', 'ŌĢ'),
+ ('Ō­', 'Ō­'),
+ ('Ō¯', 'Ō¯'),
+ ('Ōą', 'Ōą'),
+ ('Ōŗ', 'Ōŗ'),
+ ('Ōĩ', 'Ōĩ'),
+ ('Ōˇ', 'Ōˇ'),
+ ('Ōš', 'Ōš'),
+ ('Ōģ', 'Ōģ'),
+ ('ŌŊ', 'ŌŊ'),
+ ('Ōŋ', 'Ōŋ'),
+ ('Ķ‚', 'Ķ‚'),
+ ('Ķ„', 'Ķ„'),
+ ('Ķ†', 'Ķ†'),
+ ('Ķˆ', 'Ķˆ'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶŒ', 'ĶŒ'),
+ ('ĶŽ', 'Ķ'),
+ ('Ķ‘', 'Ķ‘'),
+ ('Ķ“', 'Ķ“'),
+ ('Ķ•', 'Ķ•'),
+ ('Ķ—', 'Ķ—'),
+ ('Ķ™', 'Ķ™'),
+ ('Ķ›', 'Ķ›'),
+ ('Ķ', 'Ķ'),
+ ('ĶŸ', 'ĶŸ'),
+ ('ĶĄ', 'ĶĄ'),
+ ('ĶŖ', 'ĶŖ'),
+ ('ĶĨ', 'ĶĨ'),
+ ('Ķ§', 'Ķ§'),
+ ('ĶŠ', 'ĶŠ'),
+ ('ĶĢ', 'ĶĢ'),
+ ('Ķ­', 'Ķ­'),
+ ('Ķ¯', 'Ķ¯'),
+ ('Ķą', 'Ķą'),
+ ('Ķŗ', 'Ķŗ'),
+ ('Ķĩ', 'Ķĩ'),
+ ('Ķˇ', 'Ķˇ'),
+ ('Ķš', 'Ķš'),
+ ('Ķģ', 'Ķģ'),
+ ('ĶŊ', 'ĶŊ'),
+ ('Ķŋ', 'Ķŋ'),
+ ('ԁ', 'ԁ'),
+ ('ԃ', 'ԃ'),
+ ('ԅ', 'ԅ'),
+ ('ԇ', 'ԇ'),
+ ('ԉ', 'ԉ'),
+ ('ԋ', 'ԋ'),
+ ('ԍ', 'ԍ'),
+ ('ԏ', 'ԏ'),
+ ('ԑ', 'ԑ'),
+ ('ԓ', 'ԓ'),
+ ('ԕ', 'ԕ'),
+ ('ԗ', 'ԗ'),
+ ('ԙ', 'ԙ'),
+ ('ԛ', 'ԛ'),
+ ('ԝ', 'ԝ'),
+ ('ԟ', 'ԟ'),
+ ('ÔĄ', 'ÔĄ'),
+ ('ÔŖ', 'ÔŖ'),
+ ('ÔĨ', 'ÔĨ'),
+ ('Ô§', 'Ô§'),
+ ('ÔŠ', 'ÔŠ'),
+ ('ÔĢ', 'ÔĢ'),
+ ('Ô­', 'Ô­'),
+ ('Ô¯', 'Ô¯'),
+ ('ՠ', 'ֈ'),
+ ('ᏸ', 'áŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('ᴀ', 'áļŋ'),
+ ('ḁ', 'ḁ'),
+ ('ḃ', 'ḃ'),
+ ('ḅ', 'ḅ'),
+ ('ḇ', 'ḇ'),
+ ('ḉ', 'ḉ'),
+ ('ḋ', 'ḋ'),
+ ('ḍ', 'ḍ'),
+ ('ḏ', 'ḏ'),
+ ('ḑ', 'ḑ'),
+ ('ḓ', 'ḓ'),
+ ('ḕ', 'ḕ'),
+ ('ḗ', 'ḗ'),
+ ('ḙ', 'ḙ'),
+ ('ḛ', 'ḛ'),
+ ('ḝ', 'ḝ'),
+ ('ḟ', 'ḟ'),
+ ('ḥ', 'ḥ'),
+ ('á¸Ŗ', 'á¸Ŗ'),
+ ('á¸Ĩ', 'á¸Ĩ'),
+ ('ḧ', 'ḧ'),
+ ('Ḋ', 'Ḋ'),
+ ('á¸Ģ', 'á¸Ģ'),
+ ('ḭ', 'ḭ'),
+ ('ḯ', 'ḯ'),
+ ('ḹ', 'ḹ'),
+ ('á¸ŗ', 'á¸ŗ'),
+ ('á¸ĩ', 'á¸ĩ'),
+ ('ḡ', 'ḡ'),
+ ('Ḛ', 'Ḛ'),
+ ('á¸ģ', 'á¸ģ'),
+ ('á¸Ŋ', 'á¸Ŋ'),
+ ('á¸ŋ', 'á¸ŋ'),
+ ('ᚁ', 'ᚁ'),
+ ('ᚃ', 'ᚃ'),
+ ('ṅ', 'ṅ'),
+ ('ṇ', 'ṇ'),
+ ('ṉ', 'ṉ'),
+ ('ṋ', 'ṋ'),
+ ('ᚍ', 'ᚍ'),
+ ('ᚏ', 'ᚏ'),
+ ('ṑ', 'ṑ'),
+ ('ṓ', 'ṓ'),
+ ('ṕ', 'ṕ'),
+ ('ṗ', 'ṗ'),
+ ('ṙ', 'ṙ'),
+ ('ṛ', 'ṛ'),
+ ('᚝', '᚝'),
+ ('ṟ', 'ṟ'),
+ ('ᚥ', 'ᚥ'),
+ ('ášŖ', 'ášŖ'),
+ ('ášĨ', 'ášĨ'),
+ ('ᚧ', 'ᚧ'),
+ ('ᚊ', 'ᚊ'),
+ ('ášĢ', 'ášĢ'),
+ ('áš­', 'áš­'),
+ ('ᚯ', 'ᚯ'),
+ ('ášą', 'ášą'),
+ ('ášŗ', 'ášŗ'),
+ ('ášĩ', 'ášĩ'),
+ ('ᚡ', 'ᚡ'),
+ ('ášš', 'ášš'),
+ ('ášģ', 'ášģ'),
+ ('ášŊ', 'ášŊ'),
+ ('ášŋ', 'ášŋ'),
+ ('áē', 'áē'),
+ ('áēƒ', 'áēƒ'),
+ ('áē…', 'áē…'),
+ ('áē‡', 'áē‡'),
+ ('áē‰', 'áē‰'),
+ ('áē‹', 'áē‹'),
+ ('áē', 'áē'),
+ ('áē', 'áē'),
+ ('áē‘', 'áē‘'),
+ ('áē“', 'áē“'),
+ ('áē•', 'áē'),
+ ('áēŸ', 'áēŸ'),
+ ('áēĄ', 'áēĄ'),
+ ('áēŖ', 'áēŖ'),
+ ('áēĨ', 'áēĨ'),
+ ('áē§', 'áē§'),
+ ('áēŠ', 'áēŠ'),
+ ('áēĢ', 'áēĢ'),
+ ('áē­', 'áē­'),
+ ('áē¯', 'áē¯'),
+ ('áēą', 'áēą'),
+ ('áēŗ', 'áēŗ'),
+ ('áēĩ', 'áēĩ'),
+ ('áēˇ', 'áēˇ'),
+ ('áēš', 'áēš'),
+ ('áēģ', 'áēģ'),
+ ('áēŊ', 'áēŊ'),
+ ('áēŋ', 'áēŋ'),
+ ('áģ', 'áģ'),
+ ('áģƒ', 'áģƒ'),
+ ('áģ…', 'áģ…'),
+ ('áģ‡', 'áģ‡'),
+ ('áģ‰', 'áģ‰'),
+ ('áģ‹', 'áģ‹'),
+ ('áģ', 'áģ'),
+ ('áģ', 'áģ'),
+ ('áģ‘', 'áģ‘'),
+ ('áģ“', 'áģ“'),
+ ('áģ•', 'áģ•'),
+ ('áģ—', 'áģ—'),
+ ('áģ™', 'áģ™'),
+ ('áģ›', 'áģ›'),
+ ('áģ', 'áģ'),
+ ('áģŸ', 'áģŸ'),
+ ('áģĄ', 'áģĄ'),
+ ('áģŖ', 'áģŖ'),
+ ('áģĨ', 'áģĨ'),
+ ('áģ§', 'áģ§'),
+ ('áģŠ', 'áģŠ'),
+ ('áģĢ', 'áģĢ'),
+ ('áģ­', 'áģ­'),
+ ('áģ¯', 'áģ¯'),
+ ('áģą', 'áģą'),
+ ('áģŗ', 'áģŗ'),
+ ('áģĩ', 'áģĩ'),
+ ('áģˇ', 'áģˇ'),
+ ('áģš', 'áģš'),
+ ('áģģ', 'áģģ'),
+ ('áģŊ', 'áģŊ'),
+ ('áģŋ', 'áŧ‡'),
+ ('áŧ', 'áŧ•'),
+ ('áŧ ', 'áŧ§'),
+ ('áŧ°', 'áŧˇ'),
+ ('áŊ€', 'áŊ…'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ ', 'áŊ§'),
+ ('áŊ°', 'áŊŊ'),
+ ('ᾀ', 'ᾇ'),
+ ('ᾐ', 'ᾗ'),
+ ('ហ', 'ឧ'),
+ ('áž°', 'áž´'),
+ ('ážļ', 'ឡ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋ‡'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ—'),
+ ('áŋ ', 'áŋ§'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋˇ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('ℊ', 'ℊ'),
+ ('ℎ', 'ℏ'),
+ ('ℓ', 'ℓ'),
+ ('ℯ', 'ℯ'),
+ ('ℴ', 'ℴ'),
+ ('ℹ', 'ℹ'),
+ ('â„ŧ', 'â„Ŋ'),
+ ('ⅆ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('ⅰ', 'â…ŋ'),
+ ('ↄ', 'ↄ'),
+ ('ⓐ', 'ⓩ'),
+ ('ⰰ', 'ⱞ'),
+ ('⹥', '⹥'),
+ ('âąĨ', 'âąĻ'),
+ ('⹨', '⹨'),
+ ('âąĒ', 'âąĒ'),
+ ('âąŦ', 'âąŦ'),
+ ('âąą', 'âąą'),
+ ('âąŗ', 'âą´'),
+ ('âąļ', 'âąŊ'),
+ ('ⲁ', 'ⲁ'),
+ ('ⲃ', 'ⲃ'),
+ ('ⲅ', 'ⲅ'),
+ ('ⲇ', 'ⲇ'),
+ ('ⲉ', 'ⲉ'),
+ ('ⲋ', 'ⲋ'),
+ ('ⲍ', 'ⲍ'),
+ ('ⲏ', 'ⲏ'),
+ ('ⲑ', 'ⲑ'),
+ ('ⲓ', 'ⲓ'),
+ ('ⲕ', 'ⲕ'),
+ ('ⲗ', 'ⲗ'),
+ ('ⲙ', 'ⲙ'),
+ ('ⲛ', 'ⲛ'),
+ ('ⲝ', 'ⲝ'),
+ ('ⲟ', 'ⲟ'),
+ ('ⲥ', 'ⲥ'),
+ ('â˛Ŗ', 'â˛Ŗ'),
+ ('â˛Ĩ', 'â˛Ĩ'),
+ ('ⲧ', 'ⲧ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('â˛Ģ', 'â˛Ģ'),
+ ('ⲭ', 'ⲭ'),
+ ('â˛¯', 'â˛¯'),
+ ('ⲹ', 'ⲹ'),
+ ('â˛ŗ', 'â˛ŗ'),
+ ('â˛ĩ', 'â˛ĩ'),
+ ('ⲡ', 'ⲡ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('â˛ģ', 'â˛ģ'),
+ ('â˛Ŋ', 'â˛Ŋ'),
+ ('â˛ŋ', 'â˛ŋ'),
+ ('âŗ', 'âŗ'),
+ ('âŗƒ', 'âŗƒ'),
+ ('âŗ…', 'âŗ…'),
+ ('âŗ‡', 'âŗ‡'),
+ ('âŗ‰', 'âŗ‰'),
+ ('âŗ‹', 'âŗ‹'),
+ ('âŗ', 'âŗ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ‘', 'âŗ‘'),
+ ('âŗ“', 'âŗ“'),
+ ('âŗ•', 'âŗ•'),
+ ('âŗ—', 'âŗ—'),
+ ('âŗ™', 'âŗ™'),
+ ('âŗ›', 'âŗ›'),
+ ('âŗ', 'âŗ'),
+ ('âŗŸ', 'âŗŸ'),
+ ('âŗĄ', 'âŗĄ'),
+ ('âŗŖ', 'âŗ¤'),
+ ('âŗŦ', 'âŗŦ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗŗ', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('ꙁ', 'ꙁ'),
+ ('ꙃ', 'ꙃ'),
+ ('ꙅ', 'ꙅ'),
+ ('ꙇ', 'ꙇ'),
+ ('ꙉ', 'ꙉ'),
+ ('ꙋ', 'ꙋ'),
+ ('ꙍ', 'ꙍ'),
+ ('ꙏ', 'ꙏ'),
+ ('ꙑ', 'ꙑ'),
+ ('ꙓ', 'ꙓ'),
+ ('ꙕ', 'ꙕ'),
+ ('ꙗ', 'ꙗ'),
+ ('ꙙ', 'ꙙ'),
+ ('ꙛ', 'ꙛ'),
+ ('ꙝ', 'ꙝ'),
+ ('ꙟ', 'ꙟ'),
+ ('ꙡ', 'ꙡ'),
+ ('ę™Ŗ', 'ę™Ŗ'),
+ ('ę™Ĩ', 'ę™Ĩ'),
+ ('ꙧ', 'ꙧ'),
+ ('ꙩ', 'ꙩ'),
+ ('ę™Ģ', 'ę™Ģ'),
+ ('ꙭ', 'ꙭ'),
+ ('ꚁ', 'ꚁ'),
+ ('ꚃ', 'ꚃ'),
+ ('ꚅ', 'ꚅ'),
+ ('ꚇ', 'ꚇ'),
+ ('ꚉ', 'ꚉ'),
+ ('ꚋ', 'ꚋ'),
+ ('ꚍ', 'ꚍ'),
+ ('ꚏ', 'ꚏ'),
+ ('ꚑ', 'ꚑ'),
+ ('ꚓ', 'ꚓ'),
+ ('ꚕ', 'ꚕ'),
+ ('ꚗ', 'ꚗ'),
+ ('ꚙ', 'ꚙ'),
+ ('ꚛ', 'ꚝ'),
+ ('ęœŖ', 'ęœŖ'),
+ ('ęœĨ', 'ęœĨ'),
+ ('ꜧ', 'ꜧ'),
+ ('ꜩ', 'ꜩ'),
+ ('ęœĢ', 'ęœĢ'),
+ ('ꜭ', 'ꜭ'),
+ ('ęœ¯', 'ꜱ'),
+ ('ęœŗ', 'ęœŗ'),
+ ('ęœĩ', 'ęœĩ'),
+ ('ꜷ', 'ꜷ'),
+ ('ꜹ', 'ꜹ'),
+ ('ęœģ', 'ęœģ'),
+ ('ęœŊ', 'ęœŊ'),
+ ('ęœŋ', 'ęœŋ'),
+ ('ꝁ', 'ꝁ'),
+ ('ꝃ', 'ꝃ'),
+ ('ꝅ', 'ꝅ'),
+ ('ꝇ', 'ꝇ'),
+ ('ꝉ', 'ꝉ'),
+ ('ꝋ', 'ꝋ'),
+ ('ꝍ', 'ꝍ'),
+ ('ꝏ', 'ꝏ'),
+ ('ꝑ', 'ꝑ'),
+ ('ꝓ', 'ꝓ'),
+ ('ꝕ', 'ꝕ'),
+ ('ꝗ', 'ꝗ'),
+ ('ꝙ', 'ꝙ'),
+ ('ꝛ', 'ꝛ'),
+ ('ꝝ', 'ꝝ'),
+ ('ꝟ', 'ꝟ'),
+ ('ꝡ', 'ꝡ'),
+ ('ęŖ', 'ęŖ'),
+ ('ęĨ', 'ęĨ'),
+ ('ꝧ', 'ꝧ'),
+ ('ꝩ', 'ꝩ'),
+ ('ęĢ', 'ęĢ'),
+ ('ꝭ', 'ꝭ'),
+ ('ę¯', 'ꝸ'),
+ ('ęē', 'ęē'),
+ ('ęŧ', 'ęŧ'),
+ ('ęŋ', 'ęŋ'),
+ ('ꞁ', 'ꞁ'),
+ ('ꞃ', 'ꞃ'),
+ ('ꞅ', 'ꞅ'),
+ ('ꞇ', 'ꞇ'),
+ ('ꞌ', 'ꞌ'),
+ ('ꞎ', 'ꞎ'),
+ ('ꞑ', 'ꞑ'),
+ ('ꞓ', 'ꞕ'),
+ ('ꞗ', 'ꞗ'),
+ ('ꞙ', 'ꞙ'),
+ ('ꞛ', 'ꞛ'),
+ ('ꞝ', 'ꞝ'),
+ ('ꞟ', 'ꞟ'),
+ ('ꞡ', 'ꞡ'),
+ ('ęžŖ', 'ęžŖ'),
+ ('ęžĨ', 'ęžĨ'),
+ ('ꞧ', 'ꞧ'),
+ ('ꞩ', 'ꞩ'),
+ ('ęž¯', 'ęž¯'),
+ ('ęžĩ', 'ęžĩ'),
+ ('ꞷ', 'ꞷ'),
+ ('ꞹ', 'ꞹ'),
+ ('ęžģ', 'ęžģ'),
+ ('ęžŊ', 'ęžŊ'),
+ ('ęžŋ', 'ęžŋ'),
+ ('ꟃ', 'ꟃ'),
+ ('\u{a7c8}', '\u{a7c8}'),
+ ('\u{a7ca}', '\u{a7ca}'),
+ ('\u{a7f6}', '\u{a7f6}'),
+ ('ꟸ', 'ęŸē'),
+ ('ęŦ°', 'ꭚ'),
+ ('ꭜ', '\u{ab68}'),
+ ('ę­°', 'ęŽŋ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('īŊ', 'īŊš'),
+ ('𐐨', '𐑏'),
+ ('𐓘', 'đ“ģ'),
+ ('đŗ€', 'đŗ˛'),
+ ('đ‘Ŗ€', 'đ‘ŖŸ'),
+ ('𖹠', 'đ–šŋ'),
+ ('𝐚', 'đŗ'),
+ ('𝑎', '𝑔'),
+ ('𝑖', '𝑧'),
+ ('𝒂', '𝒛'),
+ ('đ’ļ', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝓏'),
+ ('đ“Ē', '𝔃'),
+ ('𝔞', '𝔷'),
+ ('𝕒', 'đ•Ģ'),
+ ('𝖆', '𝖟'),
+ ('đ–ē', '𝗓'),
+ ('𝗮', '𝘇'),
+ ('đ˜ĸ', 'đ˜ģ'),
+ ('𝙖', 'đ™¯'),
+ ('𝚊', 'đšĨ'),
+ ('𝛂', '𝛚'),
+ ('𝛜', '𝛡'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜛'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝕'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞏'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟉'),
+ ('𝟋', '𝟋'),
+ ('đž¤ĸ', 'đžĨƒ'),
+];
+
+pub const NUMERIC: &'static [(char, char)] = &[
+ ('0', '9'),
+ ('Ų ', 'ŲŠ'),
+ ('ŲĢ', 'ŲŦ'),
+ ('Û°', 'Ûš'),
+ ('߀', '߉'),
+ ('āĨĻ', 'āĨ¯'),
+ ('ā§Ļ', 'ā§¯'),
+ ('āŠĻ', 'āŠ¯'),
+ ('āĢĻ', 'āĢ¯'),
+ ('ā­Ļ', 'ā­¯'),
+ ('ā¯Ļ', 'ā¯¯'),
+ ('āąĻ', 'āą¯'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āĩĻ', 'āĩ¯'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āš', 'āš™'),
+ ('āģ', 'āģ™'),
+ ('āŧ ', 'āŧŠ'),
+ ('၀', '၉'),
+ ('႐', '႙'),
+ ('០', '៩'),
+ ('᠐', '᠙'),
+ ('áĨ†', 'áĨ'),
+ ('᧐', '᧙'),
+ ('áĒ€', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('᭐', '᭙'),
+ ('Ꮀ', '᎚'),
+ ('᱀', '᱉'),
+ ('᱐', '᱙'),
+ ('꘠', 'ꘊ'),
+ ('ęŖ', 'ęŖ™'),
+ ('꤀', '꤉'),
+ ('꧐', '꧙'),
+ ('꧰', '꧚'),
+ ('꩐', '꩙'),
+ ('ę¯°', 'ę¯š'),
+ ('īŧ', 'īŧ™'),
+ ('𐒠', '𐒩'),
+ ('𐴰', '𐴚'),
+ ('đ‘Ļ', 'đ‘¯'),
+ ('𑃰', '𑃹'),
+ ('đ‘„ļ', 'đ‘„ŋ'),
+ ('𑇐', '𑇙'),
+ ('𑋰', '𑋹'),
+ ('𑑐', '𑑙'),
+ ('𑓐', '𑓙'),
+ ('𑙐', '𑙙'),
+ ('𑛀', '𑛉'),
+ ('𑜰', '𑜹'),
+ ('đ‘Ŗ ', 'đ‘ŖŠ'),
+ ('\u{11950}', '\u{11959}'),
+ ('𑱐', '𑱙'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('𖩠', '𖩩'),
+ ('𖭐', '𖭙'),
+ ('𝟎', 'đŸŋ'),
+ ('𞅀', '𞅉'),
+ ('𞋰', '𞋹'),
+ ('đžĨ', 'đžĨ™'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+];
+
+pub const OLETTER: &'static [(char, char)] = &[
+ ('Æģ', 'Æģ'),
+ ('Į€', 'Įƒ'),
+ ('ʔ', 'ʔ'),
+ ('Ęš', 'Ęŋ'),
+ ('ˆ', 'ˑ'),
+ ('ËŦ', 'ËŦ'),
+ ('ËŽ', 'ËŽ'),
+ ('Í´', 'Í´'),
+ ('ՙ', 'ՙ'),
+ ('א', '×Ē'),
+ ('ׯ', '×ŗ'),
+ ('Ø ', 'ŲŠ'),
+ ('ŲŽ', 'Ų¯'),
+ ('Ųą', 'ۓ'),
+ ('ە', 'ە'),
+ ('ÛĨ', 'ÛĻ'),
+ ('ÛŽ', 'Û¯'),
+ ('Ûē', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', 'ܐ'),
+ ('ܒ', 'ܯ'),
+ ('Ũ', 'ŪĨ'),
+ ('Ūą', 'Ūą'),
+ ('ߊ', 'ßĒ'),
+ ('ß´', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('ā €', 'ā •'),
+ ('ā š', 'ā š'),
+ ('ā ¤', 'ā ¤'),
+ ('ā ¨', 'ā ¨'),
+ ('āĄ€', 'āĄ˜'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('ā¤„', 'ā¤š'),
+ ('ā¤Ŋ', 'ā¤Ŋ'),
+ ('āĨ', 'āĨ'),
+ ('āĨ˜', 'āĨĄ'),
+ ('āĨą', 'āĻ€'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('āĻŊ', 'āĻŊ'),
+ ('ā§Ž', 'ā§Ž'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', 'ā§Ą'),
+ ('ā§°', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠ˛', 'āŠ´'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('āĒŊ', 'āĒŊ'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', 'āĢĄ'),
+ ('āĢš', 'āĢš'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('āŦŊ', 'āŦŊ'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', 'ā­Ą'),
+ ('ā­ą', 'ā­ą'),
+ ('āŽƒ', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('ā¯', 'ā¯'),
+ ('ā°…', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'ā°Ŋ'),
+ ('āą˜', 'āąš'),
+ ('āą ', 'āąĄ'),
+ ('ā˛€', 'ā˛€'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('ā˛Ŋ', 'ā˛Ŋ'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', 'āŗĄ'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d04}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', 'ā´ē'),
+ ('ā´Ŋ', 'ā´Ŋ'),
+ ('āĩŽ', 'āĩŽ'),
+ ('āĩ”', 'āĩ–'),
+ ('āĩŸ', 'āĩĄ'),
+ ('āĩē', 'āĩŋ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('ā¸', 'ā¸°'),
+ ('ā¸˛', 'ā¸ŗ'),
+ ('āš€', 'āš†'),
+ ('āē', 'āē‚'),
+ ('āē„', 'āē„'),
+ ('āē†', 'āēŠ'),
+ ('āēŒ', 'āēŖ'),
+ ('āēĨ', 'āēĨ'),
+ ('āē§', 'āē°'),
+ ('āē˛', 'āēŗ'),
+ ('āēŊ', 'āēŊ'),
+ ('āģ€', 'āģ„'),
+ ('āģ†', 'āģ†'),
+ ('āģœ', 'āģŸ'),
+ ('āŧ€', 'āŧ€'),
+ ('āŊ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('āžˆ', 'āžŒ'),
+ ('က', 'á€Ē'),
+ ('á€ŋ', 'á€ŋ'),
+ ('ၐ', 'ၕ'),
+ ('ၚ', 'ၝ'),
+ ('ၥ', 'ၥ'),
+ ('áĨ', 'áĻ'),
+ ('၎', 'ၰ'),
+ ('áĩ', 'ႁ'),
+ ('ႎ', 'ႎ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('ᎀ', 'ᎏ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛮ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', 'ᜑ'),
+ ('ᜠ', 'ᜱ'),
+ ('ᝀ', 'ᝑ'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('ក', 'ážŗ'),
+ ('ៗ', 'ៗ'),
+ ('ៜ', 'ៜ'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸ„'),
+ ('áĸ‡', 'áĸ¨'),
+ ('áĸĒ', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('áĨ', 'áĨ­'),
+ ('áĨ°', 'áĨ´'),
+ ('áĻ€', 'áĻĢ'),
+ ('áĻ°', 'ᧉ'),
+ ('ᨀ', 'ᨖ'),
+ ('ᨠ', 'ᩔ'),
+ ('áĒ§', 'áĒ§'),
+ ('áŦ…', 'áŦŗ'),
+ ('ᭅ', 'ᭋ'),
+ ('ᎃ', 'Ꭰ'),
+ ('ᎎ', 'Ꭿ'),
+ ('áŽē', 'á¯Ĩ'),
+ ('ᰀ', 'á°Ŗ'),
+ ('ᱍ', 'ᱏ'),
+ ('ᱚ', 'áąŊ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('áŗŠ', 'áŗŦ'),
+ ('áŗŽ', 'áŗŗ'),
+ ('áŗĩ', 'áŗļ'),
+ ('áŗē', 'áŗē'),
+ ('â„ĩ', 'ℸ'),
+ ('ↀ', 'ↂ'),
+ ('ↅ', 'ↈ'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ¯'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('ⸯ', 'ⸯ'),
+ ('々', '〇'),
+ ('ã€Ą', '《'),
+ ('ã€ą', 'ã€ĩ'),
+ ('〸', 'ã€ŧ'),
+ ('ぁ', 'ゖ'),
+ ('ゝ', 'ゟ'),
+ ('ã‚Ą', 'ãƒē'),
+ ('ãƒŧ', 'ãƒŋ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㐀', '\u{4dbf}'),
+ ('一', '\u{9ffc}'),
+ ('ꀀ', 'ꒌ'),
+ ('ꓐ', 'ę“Ŋ'),
+ ('ꔀ', 'ꘌ'),
+ ('ꘐ', 'ꘟ'),
+ ('ę˜Ē', 'ę˜Ģ'),
+ ('ꙮ', 'ꙮ'),
+ ('ę™ŋ', 'ę™ŋ'),
+ ('ꚠ', 'ę›¯'),
+ ('ꜗ', 'ꜟ'),
+ ('ꞈ', 'ꞈ'),
+ ('ꞏ', 'ꞏ'),
+ ('ꟷ', 'ꟷ'),
+ ('ęŸģ', 'ꠁ'),
+ ('ꠃ', 'ꠅ'),
+ ('ꠇ', 'ꠊ'),
+ ('ꠌ', 'ę ĸ'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ‚', 'ęĸŗ'),
+ ('ęŖ˛', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', 'ęŖž'),
+ ('ꤊ', 'ę¤Ĩ'),
+ ('ꤰ', 'ęĨ†'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('ęĻ„', 'ęĻ˛'),
+ ('ꧏ', 'ꧏ'),
+ ('ꧠ', 'ꧤ'),
+ ('ę§Ļ', 'ę§¯'),
+ ('ę§ē', '꧞'),
+ ('ꨀ', 'ꨨ'),
+ ('ꩀ', 'ꩂ'),
+ ('ꩄ', 'ꩋ'),
+ ('ꊠ', 'ęŠļ'),
+ ('ęŠē', 'ęŠē'),
+ ('ꊞ', 'ęĒ¯'),
+ ('ęĒą', 'ęĒą'),
+ ('ęĒĩ', 'ęĒļ'),
+ ('ęĒš', 'ęĒŊ'),
+ ('ęĢ€', 'ęĢ€'),
+ ('ęĢ‚', 'ęĢ‚'),
+ ('ęĢ›', 'ęĢ'),
+ ('ęĢ ', 'ęĢĒ'),
+ ('ęĢ˛', 'ęĢ´'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('\u{ab69}', '\u{ab69}'),
+ ('ę¯€', 'ę¯ĸ'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('ī¤€', 'īŠ­'),
+ ('īŠ°', 'īĢ™'),
+ ('īŦ', 'īŦ'),
+ ('īŦŸ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'īŽą'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇģ'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('īŊĻ', 'īž'),
+ ('īž ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐅀', '𐅴'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍊'),
+ ('𐍐', 'đĩ'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐏑', '𐏕'),
+ ('𐑐', '𐒝'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '𐨀'),
+ ('𐨐', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', 'đĢ¤'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐴀', 'đ´Ŗ'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', 'đŊ…'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀃', '𑀷'),
+ ('𑂃', 'đ‘‚¯'),
+ ('𑃐', '𑃨'),
+ ('𑄃', 'đ‘„Ļ'),
+ ('𑅄', '𑅄'),
+ ('\u{11147}', '\u{11147}'),
+ ('𑅐', '𑅲'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('𑆃', '𑆲'),
+ ('𑇁', '𑇄'),
+ ('𑇚', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', 'đ‘ˆĢ'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '𑋞'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('đ‘ŒŊ', 'đ‘ŒŊ'),
+ ('𑍐', '𑍐'),
+ ('𑍝', '𑍡'),
+ ('𑐀', '𑐴'),
+ ('𑑇', '𑑊'),
+ ('𑑟', '\u{11461}'),
+ ('𑒀', 'đ‘’¯'),
+ ('𑓄', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑖀', '𑖮'),
+ ('𑗘', '𑗛'),
+ ('𑘀', 'đ‘˜¯'),
+ ('𑙄', '𑙄'),
+ ('𑚀', 'đ‘šĒ'),
+ ('𑚸', '𑚸'),
+ ('𑜀', '𑜚'),
+ ('𑠀', 'đ‘ Ģ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{1192f}'),
+ ('\u{1193f}', '\u{1193f}'),
+ ('\u{11941}', '\u{11941}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '𑧐'),
+ ('𑧡', '𑧡'),
+ ('đ‘§Ŗ', 'đ‘§Ŗ'),
+ ('𑨀', '𑨀'),
+ ('𑨋', '𑨲'),
+ ('đ‘¨ē', 'đ‘¨ē'),
+ ('𑩐', '𑩐'),
+ ('𑩜', 'đ‘Ē‰'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '𑰮'),
+ ('𑱀', '𑱀'),
+ ('𑱲', '𑲏'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '𑴰'),
+ ('đ‘ĩ†', 'đ‘ĩ†'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļ‰'),
+ ('đ‘ļ˜', 'đ‘ļ˜'),
+ ('đ‘ģ ', 'đ‘ģ˛'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒐀', '𒑮'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('đ–Ŧ€', 'đ–Ŧ¯'),
+ ('𖭀', '𖭃'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('đ–Ŋ', 'đ–Ŋ'),
+ ('𖾓', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', 'đ–ŋŖ'),
+ ('𗀀', '𘟷'),
+ ('𘠀', '\u{18cd5}'),
+ ('\u{18d00}', '\u{18d08}'),
+ ('𛀀', '𛄞'),
+ ('𛅐', '𛅒'),
+ ('𛅤', '𛅧'),
+ ('𛅰', 'đ›‹ģ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('𞄀', 'đž„Ŧ'),
+ ('𞄷', 'đž„Ŋ'),
+ ('𞅎', '𞅎'),
+ ('𞋀', 'đž‹Ģ'),
+ ('𞠀', 'đžŖ„'),
+ ('đžĨ‹', 'đžĨ‹'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('𠀀', '\u{2a6dd}'),
+ ('đĒœ€', 'đĢœ´'),
+ ('đĢ€', 'đĢ '),
+ ('đĢ  ', 'đŦēĄ'),
+ ('đŦē°', 'đŽ¯ '),
+ ('đ¯ €', 'đ¯¨'),
+ ('\u{30000}', '\u{3134a}'),
+];
+
+pub const SCONTINUE: &'static [(char, char)] = &[
+ (',', '-'),
+ (':', ':'),
+ ('՝', '՝'),
+ ('،', '؍'),
+ ('߸', '߸'),
+ ('᠂', '᠂'),
+ ('᠈', '᠈'),
+ ('–', '—'),
+ ('、', '、'),
+ ('ī¸', 'ī¸‘'),
+ ('ī¸“', 'ī¸“'),
+ ('ī¸ą', 'ī¸˛'),
+ ('īš', 'īš‘'),
+ ('īš•', 'īš•'),
+ ('īš˜', 'īš˜'),
+ ('īšŖ', 'īšŖ'),
+ ('īŧŒ', 'īŧ'),
+ ('īŧš', 'īŧš'),
+ ('īŊ¤', 'īŊ¤'),
+];
+
+pub const STERM: &'static [(char, char)] = &[
+ ('!', '!'),
+ ('?', '?'),
+ ('։', '։'),
+ ('؞', '؟'),
+ ('۔', '۔'),
+ ('܀', '܂'),
+ ('ßš', 'ßš'),
+ ('ā ˇ', 'ā ˇ'),
+ ('ā š', 'ā š'),
+ ('ā Ŋ', 'ā ž'),
+ ('āĨ¤', 'āĨĨ'),
+ ('၊', '။'),
+ ('áĸ', 'áĸ'),
+ ('፧', '፨'),
+ ('᙮', '᙮'),
+ ('áœĩ', 'áœļ'),
+ ('᠃', '᠃'),
+ ('᠉', '᠉'),
+ ('áĨ„', 'áĨ…'),
+ ('áĒ¨', 'áĒĢ'),
+ ('᭚', '᭛'),
+ ('᭞', '᭟'),
+ ('á°ģ', 'á°ŧ'),
+ ('áąž', 'áąŋ'),
+ ('â€ŧ', 'â€Ŋ'),
+ ('⁇', '⁉'),
+ ('⸎', '⸎'),
+ ('â¸ŧ', 'â¸ŧ'),
+ ('。', '。'),
+ ('ę“ŋ', 'ę“ŋ'),
+ ('꘎', '꘏'),
+ ('ę›ŗ', 'ę›ŗ'),
+ ('꛷', '꛷'),
+ ('ęĄļ', 'ꥡ'),
+ ('ęŖŽ', 'ęŖ'),
+ ('ę¤¯', 'ę¤¯'),
+ ('꧈', '꧉'),
+ ('꩝', '꩟'),
+ ('ęĢ°', 'ęĢą'),
+ ('ę¯Ģ', 'ę¯Ģ'),
+ ('īš–', 'īš—'),
+ ('īŧ', 'īŧ'),
+ ('īŧŸ', 'īŧŸ'),
+ ('īŊĄ', 'īŊĄ'),
+ ('𐩖', '𐩗'),
+ ('đŊ•', 'đŊ™'),
+ ('𑁇', '𑁈'),
+ ('𑂾', '𑃁'),
+ ('𑅁', '𑅃'),
+ ('𑇅', '𑇆'),
+ ('𑇍', '𑇍'),
+ ('𑇞', '𑇟'),
+ ('𑈸', '𑈹'),
+ ('đ‘ˆģ', 'đ‘ˆŧ'),
+ ('𑊩', '𑊩'),
+ ('𑑋', '𑑌'),
+ ('𑗂', '𑗃'),
+ ('𑗉', '𑗗'),
+ ('𑙁', '𑙂'),
+ ('đ‘œŧ', '𑜾'),
+ ('\u{11944}', '\u{11944}'),
+ ('\u{11946}', '\u{11946}'),
+ ('𑩂', '𑩃'),
+ ('đ‘Ē›', 'đ‘Ēœ'),
+ ('𑱁', '𑱂'),
+ ('đ‘ģˇ', 'đ‘ģ¸'),
+ ('𖩮', 'đ–Š¯'),
+ ('đ–Ģĩ', 'đ–Ģĩ'),
+ ('đ–Ŧˇ', 'đ–Ŧ¸'),
+ ('𖭄', '𖭄'),
+ ('đ–ē˜', 'đ–ē˜'),
+ ('𛲟', '𛲟'),
+ ('đĒˆ', 'đĒˆ'),
+];
+
+pub const SEP: &'static [(char, char)] =
+ &[('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}')];
+
+pub const SP: &'static [(char, char)] = &[
+ ('\t', '\t'),
+ ('\u{b}', '\u{c}'),
+ (' ', ' '),
+ ('\u{a0}', '\u{a0}'),
+ ('\u{1680}', '\u{1680}'),
+ ('\u{2000}', '\u{200a}'),
+ ('\u{202f}', '\u{202f}'),
+ ('\u{205f}', '\u{205f}'),
+ ('\u{3000}', '\u{3000}'),
+];
+
+pub const UPPER: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('À', 'Ö'),
+ ('Ø', 'Þ'),
+ ('Ā', 'Ā'),
+ ('Ă', 'Ă'),
+ ('Ą', 'Ą'),
+ ('Ć', 'Ć'),
+ ('Ĉ', 'Ĉ'),
+ ('Ċ', 'Ċ'),
+ ('Č', 'Č'),
+ ('Ď', 'Ď'),
+ ('Đ', 'Đ'),
+ ('Ē', 'Ē'),
+ ('Ĕ', 'Ĕ'),
+ ('Ė', 'Ė'),
+ ('Ę', 'Ę'),
+ ('Ě', 'Ě'),
+ ('Ĝ', 'Ĝ'),
+ ('Ğ', 'Ğ'),
+ ('Ä ', 'Ä '),
+ ('Äĸ', 'Äĸ'),
+ ('Ĥ', 'Ĥ'),
+ ('ÄĻ', 'ÄĻ'),
+ ('Ĩ', 'Ĩ'),
+ ('ÄĒ', 'ÄĒ'),
+ ('ÄŦ', 'ÄŦ'),
+ ('ÄŽ', 'ÄŽ'),
+ ('Ä°', 'Ä°'),
+ ('IJ', 'IJ'),
+ ('Ä´', 'Ä´'),
+ ('Äļ', 'Äļ'),
+ ('Äš', 'Äš'),
+ ('Äģ', 'Äģ'),
+ ('ÄŊ', 'ÄŊ'),
+ ('Äŋ', 'Äŋ'),
+ ('Ł', 'Ł'),
+ ('Ń', 'Ń'),
+ ('Ņ', 'Ņ'),
+ ('Ň', 'Ň'),
+ ('Ŋ', 'Ŋ'),
+ ('Ō', 'Ō'),
+ ('Ŏ', 'Ŏ'),
+ ('Ő', 'Ő'),
+ ('Œ', 'Œ'),
+ ('Ŕ', 'Ŕ'),
+ ('Ŗ', 'Ŗ'),
+ ('Ř', 'Ř'),
+ ('Ś', 'Ś'),
+ ('Ŝ', 'Ŝ'),
+ ('Ş', 'Ş'),
+ ('Å ', 'Å '),
+ ('Åĸ', 'Åĸ'),
+ ('Ť', 'Ť'),
+ ('ÅĻ', 'ÅĻ'),
+ ('Ũ', 'Ũ'),
+ ('ÅĒ', 'ÅĒ'),
+ ('ÅŦ', 'ÅŦ'),
+ ('ÅŽ', 'ÅŽ'),
+ ('Å°', 'Å°'),
+ ('Å˛', 'Å˛'),
+ ('Å´', 'Å´'),
+ ('Åļ', 'Åļ'),
+ ('Ÿ', 'Ś'),
+ ('Åģ', 'Åģ'),
+ ('ÅŊ', 'ÅŊ'),
+ ('Ɓ', 'Ƃ'),
+ ('Ƅ', 'Ƅ'),
+ ('Ɔ', 'Ƈ'),
+ ('Ɖ', 'Ƌ'),
+ ('Ǝ', 'Ƒ'),
+ ('Ɠ', 'Ɣ'),
+ ('Ɩ', 'Ƙ'),
+ ('Ɯ', 'Ɲ'),
+ ('Ɵ', 'Ơ'),
+ ('Æĸ', 'Æĸ'),
+ ('Ƥ', 'Ƥ'),
+ ('ÆĻ', 'Ƨ'),
+ ('ÆŠ', 'ÆŠ'),
+ ('ÆŦ', 'ÆŦ'),
+ ('Ǝ', 'Ư'),
+ ('Æą', 'Æŗ'),
+ ('Æĩ', 'Æĩ'),
+ ('Æˇ', 'Ƹ'),
+ ('Æŧ', 'Æŧ'),
+ ('Į„', 'Į…'),
+ ('Į‡', 'Įˆ'),
+ ('ĮŠ', 'Į‹'),
+ ('Į', 'Į'),
+ ('Į', 'Į'),
+ ('Į‘', 'Į‘'),
+ ('Į“', 'Į“'),
+ ('Į•', 'Į•'),
+ ('Į—', 'Į—'),
+ ('Į™', 'Į™'),
+ ('Į›', 'Į›'),
+ ('Įž', 'Įž'),
+ ('Į ', 'Į '),
+ ('Įĸ', 'Įĸ'),
+ ('Į¤', 'Į¤'),
+ ('ĮĻ', 'ĮĻ'),
+ ('Į¨', 'Į¨'),
+ ('ĮĒ', 'ĮĒ'),
+ ('ĮŦ', 'ĮŦ'),
+ ('ĮŽ', 'ĮŽ'),
+ ('Įą', 'Į˛'),
+ ('Į´', 'Į´'),
+ ('Įļ', 'Į¸'),
+ ('Įē', 'Įē'),
+ ('Įŧ', 'Įŧ'),
+ ('Įž', 'Įž'),
+ ('Ȁ', 'Ȁ'),
+ ('Ȃ', 'Ȃ'),
+ ('Ȅ', 'Ȅ'),
+ ('Ȇ', 'Ȇ'),
+ ('Ȉ', 'Ȉ'),
+ ('Ȋ', 'Ȋ'),
+ ('Ȍ', 'Ȍ'),
+ ('Ȏ', 'Ȏ'),
+ ('Ȑ', 'Ȑ'),
+ ('Ȓ', 'Ȓ'),
+ ('Ȕ', 'Ȕ'),
+ ('Ȗ', 'Ȗ'),
+ ('Ș', 'Ș'),
+ ('Ț', 'Ț'),
+ ('Ȝ', 'Ȝ'),
+ ('Ȟ', 'Ȟ'),
+ ('Č ', 'Č '),
+ ('Čĸ', 'Čĸ'),
+ ('Ȥ', 'Ȥ'),
+ ('ČĻ', 'ČĻ'),
+ ('Ȩ', 'Ȩ'),
+ ('ČĒ', 'ČĒ'),
+ ('ČŦ', 'ČŦ'),
+ ('ČŽ', 'ČŽ'),
+ ('Č°', 'Č°'),
+ ('Ȳ', 'Ȳ'),
+ ('Čē', 'Čģ'),
+ ('ČŊ', 'Čž'),
+ ('Ɂ', 'Ɂ'),
+ ('Ƀ', 'Ɇ'),
+ ('Ɉ', 'Ɉ'),
+ ('Ɋ', 'Ɋ'),
+ ('Ɍ', 'Ɍ'),
+ ('Ɏ', 'Ɏ'),
+ ('Í°', 'Í°'),
+ ('Ͳ', 'Ͳ'),
+ ('Íļ', 'Íļ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ώ'),
+ ('Α', 'Ρ'),
+ ('ÎŖ', 'ÎĢ'),
+ ('Ī', 'Ī'),
+ ('Ī’', 'Ī”'),
+ ('Ī˜', 'Ī˜'),
+ ('Īš', 'Īš'),
+ ('Īœ', 'Īœ'),
+ ('Īž', 'Īž'),
+ ('Ī ', 'Ī '),
+ ('Īĸ', 'Īĸ'),
+ ('Ī¤', 'Ī¤'),
+ ('ĪĻ', 'ĪĻ'),
+ ('Ī¨', 'Ī¨'),
+ ('ĪĒ', 'ĪĒ'),
+ ('ĪŦ', 'ĪŦ'),
+ ('ĪŽ', 'ĪŽ'),
+ ('Ī´', 'Ī´'),
+ ('Īˇ', 'Īˇ'),
+ ('Īš', 'Īē'),
+ ('ĪŊ', 'Đ¯'),
+ ('Ņ ', 'Ņ '),
+ ('Ņĸ', 'Ņĸ'),
+ ('Ņ¤', 'Ņ¤'),
+ ('ŅĻ', 'ŅĻ'),
+ ('Ņ¨', 'Ņ¨'),
+ ('ŅĒ', 'ŅĒ'),
+ ('ŅŦ', 'ŅŦ'),
+ ('ŅŽ', 'ŅŽ'),
+ ('Ņ°', 'Ņ°'),
+ ('Ņ˛', 'Ņ˛'),
+ ('Ņ´', 'Ņ´'),
+ ('Ņļ', 'Ņļ'),
+ ('Ņ¸', 'Ņ¸'),
+ ('Ņē', 'Ņē'),
+ ('Ņŧ', 'Ņŧ'),
+ ('Ņž', 'Ņž'),
+ ('Ō€', 'Ō€'),
+ ('ŌŠ', 'ŌŠ'),
+ ('ŌŒ', 'ŌŒ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō', 'Ō'),
+ ('Ō’', 'Ō’'),
+ ('Ō”', 'Ō”'),
+ ('Ō–', 'Ō–'),
+ ('Ō˜', 'Ō˜'),
+ ('Ōš', 'Ōš'),
+ ('Ōœ', 'Ōœ'),
+ ('Ōž', 'Ōž'),
+ ('Ō ', 'Ō '),
+ ('Ōĸ', 'Ōĸ'),
+ ('Ō¤', 'Ō¤'),
+ ('ŌĻ', 'ŌĻ'),
+ ('Ō¨', 'Ō¨'),
+ ('ŌĒ', 'ŌĒ'),
+ ('ŌŦ', 'ŌŦ'),
+ ('ŌŽ', 'ŌŽ'),
+ ('Ō°', 'Ō°'),
+ ('Ō˛', 'Ō˛'),
+ ('Ō´', 'Ō´'),
+ ('Ōļ', 'Ōļ'),
+ ('Ō¸', 'Ō¸'),
+ ('Ōē', 'Ōē'),
+ ('Ōŧ', 'Ōŧ'),
+ ('Ōž', 'Ōž'),
+ ('Ķ€', 'Ķ'),
+ ('Ķƒ', 'Ķƒ'),
+ ('Ķ…', 'Ķ…'),
+ ('Ķ‡', 'Ķ‡'),
+ ('Ķ‰', 'Ķ‰'),
+ ('Ķ‹', 'Ķ‹'),
+ ('Ķ', 'Ķ'),
+ ('Ķ', 'Ķ'),
+ ('Ķ’', 'Ķ’'),
+ ('Ķ”', 'Ķ”'),
+ ('Ķ–', 'Ķ–'),
+ ('Ķ˜', 'Ķ˜'),
+ ('Ķš', 'Ķš'),
+ ('Ķœ', 'Ķœ'),
+ ('Ķž', 'Ķž'),
+ ('Ķ ', 'Ķ '),
+ ('Ķĸ', 'Ķĸ'),
+ ('Ķ¤', 'Ķ¤'),
+ ('ĶĻ', 'ĶĻ'),
+ ('Ķ¨', 'Ķ¨'),
+ ('ĶĒ', 'ĶĒ'),
+ ('ĶŦ', 'ĶŦ'),
+ ('ĶŽ', 'ĶŽ'),
+ ('Ķ°', 'Ķ°'),
+ ('Ķ˛', 'Ķ˛'),
+ ('Ķ´', 'Ķ´'),
+ ('Ķļ', 'Ķļ'),
+ ('Ķ¸', 'Ķ¸'),
+ ('Ķē', 'Ķē'),
+ ('Ķŧ', 'Ķŧ'),
+ ('Ķž', 'Ķž'),
+ ('Ԁ', 'Ԁ'),
+ ('Ԃ', 'Ԃ'),
+ ('Ԅ', 'Ԅ'),
+ ('Ԇ', 'Ԇ'),
+ ('Ԉ', 'Ԉ'),
+ ('Ԋ', 'Ԋ'),
+ ('Ԍ', 'Ԍ'),
+ ('Ԏ', 'Ԏ'),
+ ('Ԑ', 'Ԑ'),
+ ('Ԓ', 'Ԓ'),
+ ('Ԕ', 'Ԕ'),
+ ('Ԗ', 'Ԗ'),
+ ('Ԙ', 'Ԙ'),
+ ('Ԛ', 'Ԛ'),
+ ('Ԝ', 'Ԝ'),
+ ('Ԟ', 'Ԟ'),
+ ('Ô ', 'Ô '),
+ ('Ôĸ', 'Ôĸ'),
+ ('Ô¤', 'Ô¤'),
+ ('ÔĻ', 'ÔĻ'),
+ ('Ô¨', 'Ô¨'),
+ ('ÔĒ', 'ÔĒ'),
+ ('ÔŦ', 'ÔŦ'),
+ ('ÔŽ', 'ÔŽ'),
+ ('Ôą', 'Ֆ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('Ꭰ', 'áĩ'),
+ ('Ḁ', 'Ḁ'),
+ ('Ḃ', 'Ḃ'),
+ ('Ḅ', 'Ḅ'),
+ ('Ḇ', 'Ḇ'),
+ ('Ḉ', 'Ḉ'),
+ ('Ḋ', 'Ḋ'),
+ ('Ḍ', 'Ḍ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḑ', 'Ḑ'),
+ ('Ḓ', 'Ḓ'),
+ ('Ḕ', 'Ḕ'),
+ ('Ḗ', 'Ḗ'),
+ ('Ḙ', 'Ḙ'),
+ ('Ḛ', 'Ḛ'),
+ ('Ḝ', 'Ḝ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ḡ', 'Ḡ'),
+ ('á¸ĸ', 'á¸ĸ'),
+ ('Ḥ', 'Ḥ'),
+ ('á¸Ļ', 'á¸Ļ'),
+ ('Ḩ', 'Ḩ'),
+ ('á¸Ē', 'á¸Ē'),
+ ('á¸Ŧ', 'á¸Ŧ'),
+ ('Ḏ', 'Ḏ'),
+ ('Ḱ', 'Ḱ'),
+ ('Ḳ', 'Ḳ'),
+ ('Ḵ', 'Ḵ'),
+ ('á¸ļ', 'á¸ļ'),
+ ('Ḹ', 'Ḹ'),
+ ('á¸ē', 'á¸ē'),
+ ('á¸ŧ', 'á¸ŧ'),
+ ('Ḟ', 'Ḟ'),
+ ('Ṁ', 'Ṁ'),
+ ('Ṃ', 'Ṃ'),
+ ('Ṅ', 'Ṅ'),
+ ('Ṇ', 'Ṇ'),
+ ('ᚈ', 'ᚈ'),
+ ('Ṋ', 'Ṋ'),
+ ('Ṍ', 'Ṍ'),
+ ('ᚎ', 'ᚎ'),
+ ('ᚐ', 'ᚐ'),
+ ('Ṓ', 'Ṓ'),
+ ('Ṕ', 'Ṕ'),
+ ('Ṗ', 'Ṗ'),
+ ('ᚘ', 'ᚘ'),
+ ('Ṛ', 'Ṛ'),
+ ('Ṝ', 'Ṝ'),
+ ('᚞', '᚞'),
+ ('áš ', 'áš '),
+ ('ášĸ', 'ášĸ'),
+ ('ᚤ', 'ᚤ'),
+ ('ášĻ', 'ášĻ'),
+ ('ᚨ', 'ᚨ'),
+ ('ášĒ', 'ášĒ'),
+ ('ášŦ', 'ášŦ'),
+ ('ᚎ', 'ᚎ'),
+ ('áš°', 'áš°'),
+ ('ᚲ', 'ᚲ'),
+ ('áš´', 'áš´'),
+ ('ášļ', 'ášļ'),
+ ('ᚸ', 'ᚸ'),
+ ('ášē', 'ášē'),
+ ('ášŧ', 'ášŧ'),
+ ('ášž', 'ášž'),
+ ('áē€', 'áē€'),
+ ('áē‚', 'áē‚'),
+ ('áē„', 'áē„'),
+ ('áē†', 'áē†'),
+ ('áēˆ', 'áēˆ'),
+ ('áēŠ', 'áēŠ'),
+ ('áēŒ', 'áēŒ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē', 'áē'),
+ ('áē’', 'áē’'),
+ ('áē”', 'áē”'),
+ ('áēž', 'áēž'),
+ ('áē ', 'áē '),
+ ('áēĸ', 'áēĸ'),
+ ('áē¤', 'áē¤'),
+ ('áēĻ', 'áēĻ'),
+ ('áē¨', 'áē¨'),
+ ('áēĒ', 'áēĒ'),
+ ('áēŦ', 'áēŦ'),
+ ('áēŽ', 'áēŽ'),
+ ('áē°', 'áē°'),
+ ('áē˛', 'áē˛'),
+ ('áē´', 'áē´'),
+ ('áēļ', 'áēļ'),
+ ('áē¸', 'áē¸'),
+ ('áēē', 'áēē'),
+ ('áēŧ', 'áēŧ'),
+ ('áēž', 'áēž'),
+ ('áģ€', 'áģ€'),
+ ('áģ‚', 'áģ‚'),
+ ('áģ„', 'áģ„'),
+ ('áģ†', 'áģ†'),
+ ('áģˆ', 'áģˆ'),
+ ('áģŠ', 'áģŠ'),
+ ('áģŒ', 'áģŒ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ', 'áģ'),
+ ('áģ’', 'áģ’'),
+ ('áģ”', 'áģ”'),
+ ('áģ–', 'áģ–'),
+ ('áģ˜', 'áģ˜'),
+ ('áģš', 'áģš'),
+ ('áģœ', 'áģœ'),
+ ('áģž', 'áģž'),
+ ('áģ ', 'áģ '),
+ ('áģĸ', 'áģĸ'),
+ ('áģ¤', 'áģ¤'),
+ ('áģĻ', 'áģĻ'),
+ ('áģ¨', 'áģ¨'),
+ ('áģĒ', 'áģĒ'),
+ ('áģŦ', 'áģŦ'),
+ ('áģŽ', 'áģŽ'),
+ ('áģ°', 'áģ°'),
+ ('áģ˛', 'áģ˛'),
+ ('áģ´', 'áģ´'),
+ ('áģļ', 'áģļ'),
+ ('áģ¸', 'áģ¸'),
+ ('áģē', 'áģē'),
+ ('áģŧ', 'áģŧ'),
+ ('áģž', 'áģž'),
+ ('áŧˆ', 'áŧ'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ¨', 'áŧ¯'),
+ ('áŧ¸', 'áŧŋ'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŸ'),
+ ('áŊ¨', 'áŊ¯'),
+ ('ᾈ', 'ᾏ'),
+ ('ᾘ', 'ᾟ'),
+ ('ឨ', 'ឯ'),
+ ('ី', 'ážŧ'),
+ ('áŋˆ', 'áŋŒ'),
+ ('áŋ˜', 'áŋ›'),
+ ('áŋ¨', 'áŋŦ'),
+ ('áŋ¸', 'áŋŧ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℋ', 'ℍ'),
+ ('ℐ', 'ℒ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℭ'),
+ ('ℰ', 'â„ŗ'),
+ ('ℾ', 'â„ŋ'),
+ ('ⅅ', 'ⅅ'),
+ ('Ⅰ', 'Ⅿ'),
+ ('Ↄ', 'Ↄ'),
+ ('â’ļ', 'Ⓩ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('âą ', 'âą '),
+ ('âąĸ', '⹤'),
+ ('⹧', '⹧'),
+ ('⹊', '⹊'),
+ ('âąĢ', 'âąĢ'),
+ ('âą­', 'âą°'),
+ ('⹲', '⹲'),
+ ('âąĩ', 'âąĩ'),
+ ('Ȿ', 'Ⲁ'),
+ ('Ⲃ', 'Ⲃ'),
+ ('Ⲅ', 'Ⲅ'),
+ ('Ⲇ', 'Ⲇ'),
+ ('Ⲉ', 'Ⲉ'),
+ ('Ⲋ', 'Ⲋ'),
+ ('Ⲍ', 'Ⲍ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲑ', 'Ⲑ'),
+ ('Ⲓ', 'Ⲓ'),
+ ('Ⲕ', 'Ⲕ'),
+ ('Ⲗ', 'Ⲗ'),
+ ('Ⲙ', 'Ⲙ'),
+ ('Ⲛ', 'Ⲛ'),
+ ('Ⲝ', 'Ⲝ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('Ⲡ', 'Ⲡ'),
+ ('â˛ĸ', 'â˛ĸ'),
+ ('Ⲥ', 'Ⲥ'),
+ ('â˛Ļ', 'â˛Ļ'),
+ ('Ⲩ', 'Ⲩ'),
+ ('â˛Ē', 'â˛Ē'),
+ ('â˛Ŧ', 'â˛Ŧ'),
+ ('Ⲏ', 'Ⲏ'),
+ ('Ⲱ', 'Ⲱ'),
+ ('Ⲳ', 'Ⲳ'),
+ ('Ⲵ', 'Ⲵ'),
+ ('â˛ļ', 'â˛ļ'),
+ ('Ⲹ', 'Ⲹ'),
+ ('â˛ē', 'â˛ē'),
+ ('â˛ŧ', 'â˛ŧ'),
+ ('Ⲟ', 'Ⲟ'),
+ ('âŗ€', 'âŗ€'),
+ ('âŗ‚', 'âŗ‚'),
+ ('âŗ„', 'âŗ„'),
+ ('âŗ†', 'âŗ†'),
+ ('âŗˆ', 'âŗˆ'),
+ ('âŗŠ', 'âŗŠ'),
+ ('âŗŒ', 'âŗŒ'),
+ ('âŗŽ', 'âŗŽ'),
+ ('âŗ', 'âŗ'),
+ ('âŗ’', 'âŗ’'),
+ ('âŗ”', 'âŗ”'),
+ ('âŗ–', 'âŗ–'),
+ ('âŗ˜', 'âŗ˜'),
+ ('âŗš', 'âŗš'),
+ ('âŗœ', 'âŗœ'),
+ ('âŗž', 'âŗž'),
+ ('âŗ ', 'âŗ '),
+ ('âŗĸ', 'âŗĸ'),
+ ('âŗĢ', 'âŗĢ'),
+ ('âŗ­', 'âŗ­'),
+ ('âŗ˛', 'âŗ˛'),
+ ('Ꙁ', 'Ꙁ'),
+ ('Ꙃ', 'Ꙃ'),
+ ('Ꙅ', 'Ꙅ'),
+ ('Ꙇ', 'Ꙇ'),
+ ('Ꙉ', 'Ꙉ'),
+ ('Ꙋ', 'Ꙋ'),
+ ('Ꙍ', 'Ꙍ'),
+ ('Ꙏ', 'Ꙏ'),
+ ('Ꙑ', 'Ꙑ'),
+ ('Ꙓ', 'Ꙓ'),
+ ('Ꙕ', 'Ꙕ'),
+ ('Ꙗ', 'Ꙗ'),
+ ('Ꙙ', 'Ꙙ'),
+ ('Ꙛ', 'Ꙛ'),
+ ('Ꙝ', 'Ꙝ'),
+ ('Ꙟ', 'Ꙟ'),
+ ('Ꙡ', 'Ꙡ'),
+ ('ę™ĸ', 'ę™ĸ'),
+ ('Ꙥ', 'Ꙥ'),
+ ('ę™Ļ', 'ę™Ļ'),
+ ('Ꙩ', 'Ꙩ'),
+ ('ę™Ē', 'ę™Ē'),
+ ('ę™Ŧ', 'ę™Ŧ'),
+ ('Ꚁ', 'Ꚁ'),
+ ('Ꚃ', 'Ꚃ'),
+ ('Ꚅ', 'Ꚅ'),
+ ('Ꚇ', 'Ꚇ'),
+ ('Ꚉ', 'Ꚉ'),
+ ('Ꚋ', 'Ꚋ'),
+ ('Ꚍ', 'Ꚍ'),
+ ('Ꚏ', 'Ꚏ'),
+ ('Ꚑ', 'Ꚑ'),
+ ('Ꚓ', 'Ꚓ'),
+ ('Ꚕ', 'Ꚕ'),
+ ('Ꚗ', 'Ꚗ'),
+ ('Ꚙ', 'Ꚙ'),
+ ('Ꚛ', 'Ꚛ'),
+ ('ęœĸ', 'ęœĸ'),
+ ('Ꜥ', 'Ꜥ'),
+ ('ęœĻ', 'ęœĻ'),
+ ('Ꜩ', 'Ꜩ'),
+ ('ęœĒ', 'ęœĒ'),
+ ('ęœŦ', 'ęœŦ'),
+ ('Ꜯ', 'Ꜯ'),
+ ('Ꜳ', 'Ꜳ'),
+ ('Ꜵ', 'Ꜵ'),
+ ('ęœļ', 'ęœļ'),
+ ('Ꜹ', 'Ꜹ'),
+ ('ęœē', 'ęœē'),
+ ('ęœŧ', 'ęœŧ'),
+ ('Ꜿ', 'Ꜿ'),
+ ('Ꝁ', 'Ꝁ'),
+ ('Ꝃ', 'Ꝃ'),
+ ('Ꝅ', 'Ꝅ'),
+ ('Ꝇ', 'Ꝇ'),
+ ('Ꝉ', 'Ꝉ'),
+ ('Ꝋ', 'Ꝋ'),
+ ('Ꝍ', 'Ꝍ'),
+ ('Ꝏ', 'Ꝏ'),
+ ('Ꝑ', 'Ꝑ'),
+ ('Ꝓ', 'Ꝓ'),
+ ('Ꝕ', 'Ꝕ'),
+ ('Ꝗ', 'Ꝗ'),
+ ('Ꝙ', 'Ꝙ'),
+ ('Ꝛ', 'Ꝛ'),
+ ('Ꝝ', 'Ꝝ'),
+ ('Ꝟ', 'Ꝟ'),
+ ('Ꝡ', 'Ꝡ'),
+ ('ęĸ', 'ęĸ'),
+ ('Ꝥ', 'Ꝥ'),
+ ('ęĻ', 'ęĻ'),
+ ('Ꝩ', 'Ꝩ'),
+ ('ęĒ', 'ęĒ'),
+ ('ęŦ', 'ęŦ'),
+ ('Ꝯ', 'Ꝯ'),
+ ('Ꝺ', 'Ꝺ'),
+ ('ęģ', 'ęģ'),
+ ('ęŊ', 'Ꝿ'),
+ ('Ꞁ', 'Ꞁ'),
+ ('Ꞃ', 'Ꞃ'),
+ ('Ꞅ', 'Ꞅ'),
+ ('Ꞇ', 'Ꞇ'),
+ ('Ꞌ', 'Ꞌ'),
+ ('Ɥ', 'Ɥ'),
+ ('Ꞑ', 'Ꞑ'),
+ ('Ꞓ', 'Ꞓ'),
+ ('Ꞗ', 'Ꞗ'),
+ ('Ꞙ', 'Ꞙ'),
+ ('Ꞛ', 'Ꞛ'),
+ ('Ꞝ', 'Ꞝ'),
+ ('Ꞟ', 'Ꞟ'),
+ ('Ꞡ', 'Ꞡ'),
+ ('ęžĸ', 'ęžĸ'),
+ ('Ꞥ', 'Ꞥ'),
+ ('ęžĻ', 'ęžĻ'),
+ ('Ꞩ', 'Ꞩ'),
+ ('ęžĒ', 'Ɪ'),
+ ('Ʞ', 'Ꞵ'),
+ ('ęžļ', 'ęžļ'),
+ ('Ꞹ', 'Ꞹ'),
+ ('ęžē', 'ęžē'),
+ ('ęžŧ', 'ęžŧ'),
+ ('Ꞿ', 'Ꞿ'),
+ ('Ꟃ', 'Ꟃ'),
+ ('Ꞔ', '\u{a7c7}'),
+ ('\u{a7c9}', '\u{a7c9}'),
+ ('\u{a7f5}', '\u{a7f5}'),
+ ('īŧĄ', 'īŧē'),
+ ('𐐀', '𐐧'),
+ ('𐒰', '𐓓'),
+ ('𐲀', '𐲲'),
+ ('đ‘ĸ ', 'đ‘ĸŋ'),
+ ('𖹀', '𖹟'),
+ ('𝐀', '𝐙'),
+ ('𝐴', '𝑍'),
+ ('𝑨', '𝒁'),
+ ('𝒜', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', 'đ’ĩ'),
+ ('𝓐', '𝓩'),
+ ('𝔄', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔸', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('đ•Ŧ', '𝖅'),
+ ('𝖠', '𝖹'),
+ ('𝗔', '𝗭'),
+ ('𝘈', '𝘡'),
+ ('đ˜ŧ', '𝙕'),
+ ('𝙰', '𝚉'),
+ ('𝚨', '𝛀'),
+ ('đ›ĸ', 'đ›ē'),
+ ('𝜜', '𝜴'),
+ ('𝝖', '𝝮'),
+ ('𝞐', '𝞨'),
+ ('𝟊', '𝟊'),
+ ('𞤀', '𞤡'),
+ ('🄰', '🅉'),
+ ('🅐', '🅩'),
+ ('🅰', '🆉'),
+];
diff --git a/vendor/regex-syntax/src/unicode_tables/word_break.rs b/vendor/regex-syntax/src/unicode_tables/word_break.rs
new file mode 100644
index 000000000..bd23e00a8
--- /dev/null
+++ b/vendor/regex-syntax/src/unicode_tables/word_break.rs
@@ -0,0 +1,1060 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate word-break ucd-13.0.0 --chars
+//
+// Unicode version: 13.0.0.
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+ ("ALetter", ALETTER),
+ ("CR", CR),
+ ("Double_Quote", DOUBLE_QUOTE),
+ ("Extend", EXTEND),
+ ("ExtendNumLet", EXTENDNUMLET),
+ ("Format", FORMAT),
+ ("Hebrew_Letter", HEBREW_LETTER),
+ ("Katakana", KATAKANA),
+ ("LF", LF),
+ ("MidLetter", MIDLETTER),
+ ("MidNum", MIDNUM),
+ ("MidNumLet", MIDNUMLET),
+ ("Newline", NEWLINE),
+ ("Numeric", NUMERIC),
+ ("Regional_Indicator", REGIONAL_INDICATOR),
+ ("Single_Quote", SINGLE_QUOTE),
+ ("WSegSpace", WSEGSPACE),
+ ("ZWJ", ZWJ),
+];
+
+pub const ALETTER: &'static [(char, char)] = &[
+ ('A', 'Z'),
+ ('a', 'z'),
+ ('ÂĒ', 'ÂĒ'),
+ ('Âĩ', 'Âĩ'),
+ ('Âē', 'Âē'),
+ ('À', 'Ö'),
+ ('Ø', 'Ãļ'),
+ ('ø', '˗'),
+ ('˞', 'Ëŋ'),
+ ('Í°', 'Í´'),
+ ('Íļ', '͡'),
+ ('Íē', 'ÍŊ'),
+ ('Íŋ', 'Íŋ'),
+ ('Ά', 'Ά'),
+ ('Έ', 'Ί'),
+ ('Ό', 'Ό'),
+ ('Ύ', 'Ρ'),
+ ('ÎŖ', 'Īĩ'),
+ ('Īˇ', 'Ō'),
+ ('ŌŠ', 'Ô¯'),
+ ('Ôą', 'Ֆ'),
+ ('ՙ', '՜'),
+ ('՞', '՞'),
+ ('ՠ', 'ֈ'),
+ ('֊', '֊'),
+ ('×ŗ', '×ŗ'),
+ ('Ø ', 'ŲŠ'),
+ ('ŲŽ', 'Ų¯'),
+ ('Ųą', 'ۓ'),
+ ('ە', 'ە'),
+ ('ÛĨ', 'ÛĻ'),
+ ('ÛŽ', 'Û¯'),
+ ('Ûē', 'Ûŧ'),
+ ('Ûŋ', 'Ûŋ'),
+ ('ܐ', 'ܐ'),
+ ('ܒ', 'ܯ'),
+ ('Ũ', 'ŪĨ'),
+ ('Ūą', 'Ūą'),
+ ('ߊ', 'ßĒ'),
+ ('ß´', 'ßĩ'),
+ ('ßē', 'ßē'),
+ ('ā €', 'ā •'),
+ ('ā š', 'ā š'),
+ ('ā ¤', 'ā ¤'),
+ ('ā ¨', 'ā ¨'),
+ ('āĄ€', 'āĄ˜'),
+ ('āĄ ', 'āĄĒ'),
+ ('āĸ ', 'āĸ´'),
+ ('āĸļ', '\u{8c7}'),
+ ('ā¤„', 'ā¤š'),
+ ('ā¤Ŋ', 'ā¤Ŋ'),
+ ('āĨ', 'āĨ'),
+ ('āĨ˜', 'āĨĄ'),
+ ('āĨą', 'āĻ€'),
+ ('āĻ…', 'āĻŒ'),
+ ('āĻ', 'āĻ'),
+ ('āĻ“', 'āĻ¨'),
+ ('āĻĒ', 'āĻ°'),
+ ('āĻ˛', 'āĻ˛'),
+ ('āĻļ', 'āĻš'),
+ ('āĻŊ', 'āĻŊ'),
+ ('ā§Ž', 'ā§Ž'),
+ ('ā§œ', 'ā§'),
+ ('ā§Ÿ', 'ā§Ą'),
+ ('ā§°', 'ā§ą'),
+ ('ā§ŧ', 'ā§ŧ'),
+ ('ā¨…', 'ā¨Š'),
+ ('ā¨', 'ā¨'),
+ ('ā¨“', 'ā¨¨'),
+ ('ā¨Ē', 'ā¨°'),
+ ('ā¨˛', 'ā¨ŗ'),
+ ('ā¨ĩ', 'ā¨ļ'),
+ ('ā¨¸', 'ā¨š'),
+ ('āŠ™', 'āŠœ'),
+ ('āŠž', 'āŠž'),
+ ('āŠ˛', 'āŠ´'),
+ ('āĒ…', 'āĒ'),
+ ('āĒ', 'āĒ‘'),
+ ('āĒ“', 'āĒ¨'),
+ ('āĒĒ', 'āĒ°'),
+ ('āĒ˛', 'āĒŗ'),
+ ('āĒĩ', 'āĒš'),
+ ('āĒŊ', 'āĒŊ'),
+ ('āĢ', 'āĢ'),
+ ('āĢ ', 'āĢĄ'),
+ ('āĢš', 'āĢš'),
+ ('āŦ…', 'āŦŒ'),
+ ('āŦ', 'āŦ'),
+ ('āŦ“', 'āŦ¨'),
+ ('āŦĒ', 'āŦ°'),
+ ('āŦ˛', 'āŦŗ'),
+ ('āŦĩ', 'āŦš'),
+ ('āŦŊ', 'āŦŊ'),
+ ('ā­œ', 'ā­'),
+ ('ā­Ÿ', 'ā­Ą'),
+ ('ā­ą', 'ā­ą'),
+ ('āŽƒ', 'āŽƒ'),
+ ('āŽ…', 'āŽŠ'),
+ ('āŽŽ', 'āŽ'),
+ ('āŽ’', 'āŽ•'),
+ ('āŽ™', 'āŽš'),
+ ('āŽœ', 'āŽœ'),
+ ('āŽž', 'āŽŸ'),
+ ('āŽŖ', 'āŽ¤'),
+ ('āŽ¨', 'āŽĒ'),
+ ('āŽŽ', 'āŽš'),
+ ('ā¯', 'ā¯'),
+ ('ā°…', 'ā°Œ'),
+ ('ā°Ž', 'ā°'),
+ ('ā°’', 'ā°¨'),
+ ('ā°Ē', 'ā°š'),
+ ('ā°Ŋ', 'ā°Ŋ'),
+ ('āą˜', 'āąš'),
+ ('āą ', 'āąĄ'),
+ ('ā˛€', 'ā˛€'),
+ ('ā˛…', 'ā˛Œ'),
+ ('ā˛Ž', 'ā˛'),
+ ('ā˛’', 'ā˛¨'),
+ ('ā˛Ē', 'ā˛ŗ'),
+ ('ā˛ĩ', 'ā˛š'),
+ ('ā˛Ŋ', 'ā˛Ŋ'),
+ ('āŗž', 'āŗž'),
+ ('āŗ ', 'āŗĄ'),
+ ('āŗą', 'āŗ˛'),
+ ('\u{d04}', 'ā´Œ'),
+ ('ā´Ž', 'ā´'),
+ ('ā´’', 'ā´ē'),
+ ('ā´Ŋ', 'ā´Ŋ'),
+ ('āĩŽ', 'āĩŽ'),
+ ('āĩ”', 'āĩ–'),
+ ('āĩŸ', 'āĩĄ'),
+ ('āĩē', 'āĩŋ'),
+ ('āļ…', 'āļ–'),
+ ('āļš', 'āļą'),
+ ('āļŗ', 'āļģ'),
+ ('āļŊ', 'āļŊ'),
+ ('āˇ€', 'āˇ†'),
+ ('āŧ€', 'āŧ€'),
+ ('āŊ€', 'āŊ‡'),
+ ('āŊ‰', 'āŊŦ'),
+ ('āžˆ', 'āžŒ'),
+ ('Ⴀ', 'Ⴥ'),
+ ('Ⴧ', 'Ⴧ'),
+ ('Ⴭ', 'Ⴭ'),
+ ('ა', 'áƒē'),
+ ('áƒŧ', 'ቈ'),
+ ('ቊ', 'ቍ'),
+ ('ቐ', 'ቖ'),
+ ('ቘ', 'ቘ'),
+ ('ቚ', 'ቝ'),
+ ('በ', 'ኈ'),
+ ('ኊ', 'ኍ'),
+ ('ነ', 'ኰ'),
+ ('ኲ', 'áŠĩ'),
+ ('ኸ', 'ኾ'),
+ ('ዀ', 'ዀ'),
+ ('ዂ', 'ዅ'),
+ ('ወ', 'ዖ'),
+ ('ዘ', 'ጐ'),
+ ('ጒ', 'ጕ'),
+ ('ጘ', 'ፚ'),
+ ('ᎀ', 'ᎏ'),
+ ('Ꭰ', 'áĩ'),
+ ('ᏸ', 'áŊ'),
+ ('ᐁ', 'á™Ŧ'),
+ ('ᙯ', 'á™ŋ'),
+ ('ᚁ', 'ᚚ'),
+ ('ᚠ', 'á›Ē'),
+ ('ᛮ', 'ᛸ'),
+ ('ᜀ', 'ᜌ'),
+ ('ᜎ', 'ᜑ'),
+ ('ᜠ', 'ᜱ'),
+ ('ᝀ', 'ᝑ'),
+ ('ᝠ', 'áŦ'),
+ ('ᝎ', 'ᝰ'),
+ ('ᠠ', '᥸'),
+ ('áĸ€', 'áĸ„'),
+ ('áĸ‡', 'áĸ¨'),
+ ('áĸĒ', 'áĸĒ'),
+ ('áĸ°', 'áŖĩ'),
+ ('ᤀ', 'ᤞ'),
+ ('ᨀ', 'ᨖ'),
+ ('áŦ…', 'áŦŗ'),
+ ('ᭅ', 'ᭋ'),
+ ('ᎃ', 'Ꭰ'),
+ ('ᎎ', 'Ꭿ'),
+ ('áŽē', 'á¯Ĩ'),
+ ('ᰀ', 'á°Ŗ'),
+ ('ᱍ', 'ᱏ'),
+ ('ᱚ', 'áąŊ'),
+ ('ᲀ', 'ᲈ'),
+ ('Ა', 'á˛ē'),
+ ('á˛Ŋ', 'á˛ŋ'),
+ ('áŗŠ', 'áŗŦ'),
+ ('áŗŽ', 'áŗŗ'),
+ ('áŗĩ', 'áŗļ'),
+ ('áŗē', 'áŗē'),
+ ('ᴀ', 'áļŋ'),
+ ('Ḁ', 'áŧ•'),
+ ('áŧ˜', 'áŧ'),
+ ('áŧ ', 'áŊ…'),
+ ('áŊˆ', 'áŊ'),
+ ('áŊ', 'áŊ—'),
+ ('áŊ™', 'áŊ™'),
+ ('áŊ›', 'áŊ›'),
+ ('áŊ', 'áŊ'),
+ ('áŊŸ', 'áŊŊ'),
+ ('ᾀ', 'ᾴ'),
+ ('ážļ', 'ážŧ'),
+ ('ážž', 'ážž'),
+ ('áŋ‚', 'áŋ„'),
+ ('áŋ†', 'áŋŒ'),
+ ('áŋ', 'áŋ“'),
+ ('áŋ–', 'áŋ›'),
+ ('áŋ ', 'áŋŦ'),
+ ('áŋ˛', 'áŋ´'),
+ ('áŋļ', 'áŋŧ'),
+ ('⁹', '⁹'),
+ ('âŋ', 'âŋ'),
+ ('ₐ', 'ₜ'),
+ ('ℂ', 'ℂ'),
+ ('ℇ', 'ℇ'),
+ ('ℊ', 'ℓ'),
+ ('ℕ', 'ℕ'),
+ ('ℙ', 'ℝ'),
+ ('ℤ', 'ℤ'),
+ ('â„Ļ', 'â„Ļ'),
+ ('ℨ', 'ℨ'),
+ ('â„Ē', 'ℭ'),
+ ('ℯ', 'ℹ'),
+ ('â„ŧ', 'â„ŋ'),
+ ('ⅅ', 'ⅉ'),
+ ('ⅎ', 'ⅎ'),
+ ('Ⅰ', 'ↈ'),
+ ('â’ļ', 'ⓩ'),
+ ('Ⰰ', 'Ⱞ'),
+ ('ⰰ', 'ⱞ'),
+ ('âą ', 'âŗ¤'),
+ ('âŗĢ', 'âŗŽ'),
+ ('âŗ˛', 'âŗŗ'),
+ ('ⴀ', 'â´Ĩ'),
+ ('â´§', 'â´§'),
+ ('â´­', 'â´­'),
+ ('â´°', 'âĩ§'),
+ ('âĩ¯', 'âĩ¯'),
+ ('âļ€', 'âļ–'),
+ ('âļ ', 'âļĻ'),
+ ('âļ¨', 'âļŽ'),
+ ('âļ°', 'âļļ'),
+ ('âļ¸', 'âļž'),
+ ('ⷀ', 'ⷆ'),
+ ('ⷈ', 'ⷎ'),
+ ('ⷐ', 'ⷖ'),
+ ('ⷘ', 'ⷞ'),
+ ('ⸯ', 'ⸯ'),
+ ('々', '々'),
+ ('ã€ģ', 'ã€ŧ'),
+ ('ㄅ', 'ㄯ'),
+ ('ã„ą', 'ㆎ'),
+ ('ㆠ', '\u{31bf}'),
+ ('ꀀ', 'ꒌ'),
+ ('ꓐ', 'ę“Ŋ'),
+ ('ꔀ', 'ꘌ'),
+ ('ꘐ', 'ꘟ'),
+ ('ę˜Ē', 'ę˜Ģ'),
+ ('Ꙁ', 'ꙮ'),
+ ('ę™ŋ', 'ꚝ'),
+ ('ꚠ', 'ę›¯'),
+ ('꜈', 'ęžŋ'),
+ ('Ꟃ', '\u{a7ca}'),
+ ('\u{a7f5}', 'ꠁ'),
+ ('ꠃ', 'ꠅ'),
+ ('ꠇ', 'ꠊ'),
+ ('ꠌ', 'ę ĸ'),
+ ('ꡀ', 'ęĄŗ'),
+ ('ęĸ‚', 'ęĸŗ'),
+ ('ęŖ˛', 'ęŖˇ'),
+ ('ęŖģ', 'ęŖģ'),
+ ('ęŖŊ', 'ęŖž'),
+ ('ꤊ', 'ę¤Ĩ'),
+ ('ꤰ', 'ęĨ†'),
+ ('ęĨ ', 'ęĨŧ'),
+ ('ęĻ„', 'ęĻ˛'),
+ ('ꧏ', 'ꧏ'),
+ ('ꨀ', 'ꨨ'),
+ ('ꩀ', 'ꩂ'),
+ ('ꩄ', 'ꩋ'),
+ ('ęĢ ', 'ęĢĒ'),
+ ('ęĢ˛', 'ęĢ´'),
+ ('ęŦ', 'ęŦ†'),
+ ('ęŦ‰', 'ęŦŽ'),
+ ('ęŦ‘', 'ęŦ–'),
+ ('ęŦ ', 'ęŦĻ'),
+ ('ęŦ¨', 'ęŦŽ'),
+ ('ęŦ°', '\u{ab69}'),
+ ('ę­°', 'ę¯ĸ'),
+ ('가', 'ížŖ'),
+ ('ힰ', 'ퟆ'),
+ ('ퟋ', 'íŸģ'),
+ ('īŦ€', 'īŦ†'),
+ ('īŦ“', 'īŦ—'),
+ ('ī­', 'īŽą'),
+ ('ī¯“', 'ī´Ŋ'),
+ ('īĩ', 'īļ'),
+ ('īļ’', 'īˇ‡'),
+ ('īˇ°', 'īˇģ'),
+ ('īš°', 'īš´'),
+ ('īšļ', 'īģŧ'),
+ ('īŧĄ', 'īŧē'),
+ ('īŊ', 'īŊš'),
+ ('īž ', 'īžž'),
+ ('īŋ‚', 'īŋ‡'),
+ ('īŋŠ', 'īŋ'),
+ ('īŋ’', 'īŋ—'),
+ ('īŋš', 'īŋœ'),
+ ('𐀀', '𐀋'),
+ ('𐀍', 'đ€Ļ'),
+ ('𐀨', 'đ€ē'),
+ ('đ€ŧ', 'đ€Ŋ'),
+ ('đ€ŋ', '𐁍'),
+ ('𐁐', '𐁝'),
+ ('𐂀', 'đƒē'),
+ ('𐅀', '𐅴'),
+ ('𐊀', '𐊜'),
+ ('𐊠', '𐋐'),
+ ('𐌀', '𐌟'),
+ ('𐌭', '𐍊'),
+ ('𐍐', 'đĩ'),
+ ('𐎀', '𐎝'),
+ ('𐎠', '𐏃'),
+ ('𐏈', '𐏏'),
+ ('𐏑', '𐏕'),
+ ('𐐀', '𐒝'),
+ ('𐒰', '𐓓'),
+ ('𐓘', 'đ“ģ'),
+ ('𐔀', '𐔧'),
+ ('𐔰', 'đ•Ŗ'),
+ ('𐘀', 'đœļ'),
+ ('𐝀', '𐝕'),
+ ('𐝠', '𐝧'),
+ ('𐠀', '𐠅'),
+ ('𐠈', '𐠈'),
+ ('𐠊', 'đ ĩ'),
+ ('𐠡', '𐠸'),
+ ('đ ŧ', 'đ ŧ'),
+ ('đ ŋ', '𐡕'),
+ ('𐥠', 'đĄļ'),
+ ('đĸ€', 'đĸž'),
+ ('đŖ ', 'đŖ˛'),
+ ('đŖ´', 'đŖĩ'),
+ ('𐤀', '𐤕'),
+ ('𐤠', '𐤚'),
+ ('đĻ€', 'đĻˇ'),
+ ('đĻž', 'đĻŋ'),
+ ('𐨀', '𐨀'),
+ ('𐨐', '𐨓'),
+ ('𐨕', '𐨗'),
+ ('𐨙', 'đ¨ĩ'),
+ ('𐊠', 'đŠŧ'),
+ ('đĒ€', 'đĒœ'),
+ ('đĢ€', 'đĢ‡'),
+ ('đĢ‰', 'đĢ¤'),
+ ('đŦ€', 'đŦĩ'),
+ ('𐭀', '𐭕'),
+ ('𐭠', '𐭲'),
+ ('𐮀', '𐮑'),
+ ('𐰀', '𐱈'),
+ ('𐲀', '𐲲'),
+ ('đŗ€', 'đŗ˛'),
+ ('𐴀', 'đ´Ŗ'),
+ ('\u{10e80}', '\u{10ea9}'),
+ ('\u{10eb0}', '\u{10eb1}'),
+ ('đŧ€', 'đŧœ'),
+ ('đŧ§', 'đŧ§'),
+ ('đŧ°', 'đŊ…'),
+ ('\u{10fb0}', '\u{10fc4}'),
+ ('đŋ ', 'đŋļ'),
+ ('𑀃', '𑀷'),
+ ('𑂃', 'đ‘‚¯'),
+ ('𑃐', '𑃨'),
+ ('𑄃', 'đ‘„Ļ'),
+ ('𑅄', '𑅄'),
+ ('\u{11147}', '\u{11147}'),
+ ('𑅐', '𑅲'),
+ ('đ‘…ļ', 'đ‘…ļ'),
+ ('𑆃', '𑆲'),
+ ('𑇁', '𑇄'),
+ ('𑇚', '𑇚'),
+ ('𑇜', '𑇜'),
+ ('𑈀', '𑈑'),
+ ('𑈓', 'đ‘ˆĢ'),
+ ('𑊀', '𑊆'),
+ ('𑊈', '𑊈'),
+ ('𑊊', '𑊍'),
+ ('𑊏', '𑊝'),
+ ('𑊟', '𑊨'),
+ ('𑊰', '𑋞'),
+ ('𑌅', '𑌌'),
+ ('𑌏', '𑌐'),
+ ('𑌓', '𑌨'),
+ ('đ‘ŒĒ', '𑌰'),
+ ('𑌲', 'đ‘Œŗ'),
+ ('đ‘Œĩ', '𑌹'),
+ ('đ‘ŒŊ', 'đ‘ŒŊ'),
+ ('𑍐', '𑍐'),
+ ('𑍝', '𑍡'),
+ ('𑐀', '𑐴'),
+ ('𑑇', '𑑊'),
+ ('𑑟', '\u{11461}'),
+ ('𑒀', 'đ‘’¯'),
+ ('𑓄', '𑓅'),
+ ('𑓇', '𑓇'),
+ ('𑖀', '𑖮'),
+ ('𑗘', '𑗛'),
+ ('𑘀', 'đ‘˜¯'),
+ ('𑙄', '𑙄'),
+ ('𑚀', 'đ‘šĒ'),
+ ('𑚸', '𑚸'),
+ ('𑠀', 'đ‘ Ģ'),
+ ('đ‘ĸ ', 'đ‘ŖŸ'),
+ ('đ‘Ŗŋ', '\u{11906}'),
+ ('\u{11909}', '\u{11909}'),
+ ('\u{1190c}', '\u{11913}'),
+ ('\u{11915}', '\u{11916}'),
+ ('\u{11918}', '\u{1192f}'),
+ ('\u{1193f}', '\u{1193f}'),
+ ('\u{11941}', '\u{11941}'),
+ ('đ‘Ļ ', 'đ‘Ļ§'),
+ ('đ‘ĻĒ', '𑧐'),
+ ('𑧡', '𑧡'),
+ ('đ‘§Ŗ', 'đ‘§Ŗ'),
+ ('𑨀', '𑨀'),
+ ('𑨋', '𑨲'),
+ ('đ‘¨ē', 'đ‘¨ē'),
+ ('𑩐', '𑩐'),
+ ('𑩜', 'đ‘Ē‰'),
+ ('đ‘Ē', 'đ‘Ē'),
+ ('đ‘Ģ€', 'đ‘Ģ¸'),
+ ('𑰀', '𑰈'),
+ ('𑰊', '𑰮'),
+ ('𑱀', '𑱀'),
+ ('𑱲', '𑲏'),
+ ('𑴀', '𑴆'),
+ ('𑴈', '𑴉'),
+ ('𑴋', '𑴰'),
+ ('đ‘ĩ†', 'đ‘ĩ†'),
+ ('đ‘ĩ ', 'đ‘ĩĨ'),
+ ('đ‘ĩ§', 'đ‘ĩ¨'),
+ ('đ‘ĩĒ', 'đ‘ļ‰'),
+ ('đ‘ļ˜', 'đ‘ļ˜'),
+ ('đ‘ģ ', 'đ‘ģ˛'),
+ ('\u{11fb0}', '\u{11fb0}'),
+ ('𒀀', '𒎙'),
+ ('𒐀', '𒑮'),
+ ('𒒀', '𒕃'),
+ ('𓀀', '𓐮'),
+ ('𔐀', '𔙆'),
+ ('𖠀', '𖨸'),
+ ('𖩀', '𖩞'),
+ ('đ–Ģ', 'đ–Ģ­'),
+ ('đ–Ŧ€', 'đ–Ŧ¯'),
+ ('𖭀', '𖭃'),
+ ('đ–­Ŗ', '𖭷'),
+ ('đ–­Ŋ', '𖮏'),
+ ('𖹀', 'đ–šŋ'),
+ ('đ–ŧ€', 'đ–ŊŠ'),
+ ('đ–Ŋ', 'đ–Ŋ'),
+ ('𖾓', '𖾟'),
+ ('đ–ŋ ', 'đ–ŋĄ'),
+ ('đ–ŋŖ', 'đ–ŋŖ'),
+ ('𛰀', 'đ›ąĒ'),
+ ('𛱰', 'đ›ąŧ'),
+ ('𛲀', '𛲈'),
+ ('𛲐', '𛲙'),
+ ('𝐀', '𝑔'),
+ ('𝑖', '𝒜'),
+ ('𝒞', '𝒟'),
+ ('đ’ĸ', 'đ’ĸ'),
+ ('đ’Ĩ', 'đ’Ļ'),
+ ('𝒩', 'đ’Ŧ'),
+ ('𝒮', '𝒹'),
+ ('đ’ģ', 'đ’ģ'),
+ ('đ’Ŋ', '𝓃'),
+ ('𝓅', '𝔅'),
+ ('𝔇', '𝔊'),
+ ('𝔍', '𝔔'),
+ ('𝔖', '𝔜'),
+ ('𝔞', '𝔹'),
+ ('đ”ģ', '𝔾'),
+ ('𝕀', '𝕄'),
+ ('𝕆', '𝕆'),
+ ('𝕊', '𝕐'),
+ ('𝕒', 'đšĨ'),
+ ('𝚨', '𝛀'),
+ ('𝛂', '𝛚'),
+ ('𝛜', 'đ›ē'),
+ ('đ›ŧ', '𝜔'),
+ ('𝜖', '𝜴'),
+ ('đœļ', '𝝎'),
+ ('𝝐', '𝝮'),
+ ('𝝰', '𝞈'),
+ ('𝞊', '𝞨'),
+ ('đžĒ', '𝟂'),
+ ('𝟄', '𝟋'),
+ ('𞄀', 'đž„Ŧ'),
+ ('𞄷', 'đž„Ŋ'),
+ ('𞅎', '𞅎'),
+ ('𞋀', 'đž‹Ģ'),
+ ('𞠀', 'đžŖ„'),
+ ('𞤀', 'đžĨƒ'),
+ ('đžĨ‹', 'đžĨ‹'),
+ ('𞸀', '𞸃'),
+ ('𞸅', '𞸟'),
+ ('𞸡', 'đž¸ĸ'),
+ ('𞸤', '𞸤'),
+ ('𞸧', '𞸧'),
+ ('𞸩', '𞸲'),
+ ('𞸴', '𞸷'),
+ ('𞸹', '𞸹'),
+ ('đž¸ģ', 'đž¸ģ'),
+ ('𞹂', '𞹂'),
+ ('𞹇', '𞹇'),
+ ('𞹉', '𞹉'),
+ ('𞹋', '𞹋'),
+ ('𞹍', '𞹏'),
+ ('𞹑', '𞹒'),
+ ('𞹔', '𞹔'),
+ ('𞹗', '𞹗'),
+ ('𞹙', '𞹙'),
+ ('𞹛', '𞹛'),
+ ('𞹝', '𞹝'),
+ ('𞹟', '𞹟'),
+ ('𞹡', 'đžšĸ'),
+ ('𞹤', '𞹤'),
+ ('𞹧', 'đžšĒ'),
+ ('đžšŦ', '𞹲'),
+ ('𞹴', '𞹷'),
+ ('𞹹', 'đžšŧ'),
+ ('𞹾', '𞹾'),
+ ('đžē€', 'đžē‰'),
+ ('đžē‹', 'đžē›'),
+ ('đžēĄ', 'đžēŖ'),
+ ('đžēĨ', 'đžēŠ'),
+ ('đžēĢ', 'đžēģ'),
+ ('🄰', '🅉'),
+ ('🅐', '🅩'),
+ ('🅰', '🆉'),
+];
+
+pub const CR: &'static [(char, char)] = &[('\r', '\r')];
+
+pub const DOUBLE_QUOTE: &'static [(char, char)] = &[('\"', '\"')];
+
+pub const EXTEND: &'static [(char, char)] = &[
+ ('\u{300}', '\u{36f}'),
+ ('\u{483}', '\u{489}'),
+ ('\u{591}', '\u{5bd}'),
+ ('\u{5bf}', '\u{5bf}'),
+ ('\u{5c1}', '\u{5c2}'),
+ ('\u{5c4}', '\u{5c5}'),
+ ('\u{5c7}', '\u{5c7}'),
+ ('\u{610}', '\u{61a}'),
+ ('\u{64b}', '\u{65f}'),
+ ('\u{670}', '\u{670}'),
+ ('\u{6d6}', '\u{6dc}'),
+ ('\u{6df}', '\u{6e4}'),
+ ('\u{6e7}', '\u{6e8}'),
+ ('\u{6ea}', '\u{6ed}'),
+ ('\u{711}', '\u{711}'),
+ ('\u{730}', '\u{74a}'),
+ ('\u{7a6}', '\u{7b0}'),
+ ('\u{7eb}', '\u{7f3}'),
+ ('\u{7fd}', '\u{7fd}'),
+ ('\u{816}', '\u{819}'),
+ ('\u{81b}', '\u{823}'),
+ ('\u{825}', '\u{827}'),
+ ('\u{829}', '\u{82d}'),
+ ('\u{859}', '\u{85b}'),
+ ('\u{8d3}', '\u{8e1}'),
+ ('\u{8e3}', 'ā¤ƒ'),
+ ('\u{93a}', '\u{93c}'),
+ ('ā¤ž', 'āĨ'),
+ ('\u{951}', '\u{957}'),
+ ('\u{962}', '\u{963}'),
+ ('\u{981}', 'āĻƒ'),
+ ('\u{9bc}', '\u{9bc}'),
+ ('\u{9be}', '\u{9c4}'),
+ ('ā§‡', 'ā§ˆ'),
+ ('ā§‹', '\u{9cd}'),
+ ('\u{9d7}', '\u{9d7}'),
+ ('\u{9e2}', '\u{9e3}'),
+ ('\u{9fe}', '\u{9fe}'),
+ ('\u{a01}', 'ā¨ƒ'),
+ ('\u{a3c}', '\u{a3c}'),
+ ('ā¨ž', '\u{a42}'),
+ ('\u{a47}', '\u{a48}'),
+ ('\u{a4b}', '\u{a4d}'),
+ ('\u{a51}', '\u{a51}'),
+ ('\u{a70}', '\u{a71}'),
+ ('\u{a75}', '\u{a75}'),
+ ('\u{a81}', 'āĒƒ'),
+ ('\u{abc}', '\u{abc}'),
+ ('āĒž', '\u{ac5}'),
+ ('\u{ac7}', 'āĢ‰'),
+ ('āĢ‹', '\u{acd}'),
+ ('\u{ae2}', '\u{ae3}'),
+ ('\u{afa}', '\u{aff}'),
+ ('\u{b01}', 'āŦƒ'),
+ ('\u{b3c}', '\u{b3c}'),
+ ('\u{b3e}', '\u{b44}'),
+ ('ā­‡', 'ā­ˆ'),
+ ('ā­‹', '\u{b4d}'),
+ ('\u{b55}', '\u{b57}'),
+ ('\u{b62}', '\u{b63}'),
+ ('\u{b82}', '\u{b82}'),
+ ('\u{bbe}', 'ā¯‚'),
+ ('ā¯†', 'ā¯ˆ'),
+ ('ā¯Š', '\u{bcd}'),
+ ('\u{bd7}', '\u{bd7}'),
+ ('\u{c00}', '\u{c04}'),
+ ('\u{c3e}', 'āą„'),
+ ('\u{c46}', '\u{c48}'),
+ ('\u{c4a}', '\u{c4d}'),
+ ('\u{c55}', '\u{c56}'),
+ ('\u{c62}', '\u{c63}'),
+ ('\u{c81}', 'ā˛ƒ'),
+ ('\u{cbc}', '\u{cbc}'),
+ ('ā˛ž', 'āŗ„'),
+ ('\u{cc6}', 'āŗˆ'),
+ ('āŗŠ', '\u{ccd}'),
+ ('\u{cd5}', '\u{cd6}'),
+ ('\u{ce2}', '\u{ce3}'),
+ ('\u{d00}', 'ā´ƒ'),
+ ('\u{d3b}', '\u{d3c}'),
+ ('\u{d3e}', '\u{d44}'),
+ ('āĩ†', 'āĩˆ'),
+ ('āĩŠ', '\u{d4d}'),
+ ('\u{d57}', '\u{d57}'),
+ ('\u{d62}', '\u{d63}'),
+ ('\u{d81}', 'āļƒ'),
+ ('\u{dca}', '\u{dca}'),
+ ('\u{dcf}', '\u{dd4}'),
+ ('\u{dd6}', '\u{dd6}'),
+ ('āˇ˜', '\u{ddf}'),
+ ('āˇ˛', 'āˇŗ'),
+ ('\u{e31}', '\u{e31}'),
+ ('\u{e34}', '\u{e3a}'),
+ ('\u{e47}', '\u{e4e}'),
+ ('\u{eb1}', '\u{eb1}'),
+ ('\u{eb4}', '\u{ebc}'),
+ ('\u{ec8}', '\u{ecd}'),
+ ('\u{f18}', '\u{f19}'),
+ ('\u{f35}', '\u{f35}'),
+ ('\u{f37}', '\u{f37}'),
+ ('\u{f39}', '\u{f39}'),
+ ('āŧž', 'āŧŋ'),
+ ('\u{f71}', '\u{f84}'),
+ ('\u{f86}', '\u{f87}'),
+ ('\u{f8d}', '\u{f97}'),
+ ('\u{f99}', '\u{fbc}'),
+ ('\u{fc6}', '\u{fc6}'),
+ ('á€Ģ', '\u{103e}'),
+ ('ၖ', '\u{1059}'),
+ ('\u{105e}', '\u{1060}'),
+ ('áĸ', 'ၤ'),
+ ('ၧ', 'ၭ'),
+ ('\u{1071}', '\u{1074}'),
+ ('\u{1082}', '\u{108d}'),
+ ('ႏ', 'ႏ'),
+ ('ႚ', '\u{109d}'),
+ ('\u{135d}', '\u{135f}'),
+ ('\u{1712}', '\u{1714}'),
+ ('\u{1732}', '\u{1734}'),
+ ('\u{1752}', '\u{1753}'),
+ ('\u{1772}', '\u{1773}'),
+ ('\u{17b4}', '\u{17d3}'),
+ ('\u{17dd}', '\u{17dd}'),
+ ('\u{180b}', '\u{180d}'),
+ ('\u{1885}', '\u{1886}'),
+ ('\u{18a9}', '\u{18a9}'),
+ ('\u{1920}', 'á¤Ģ'),
+ ('ᤰ', '\u{193b}'),
+ ('\u{1a17}', '\u{1a1b}'),
+ ('ᩕ', '\u{1a5e}'),
+ ('\u{1a60}', '\u{1a7c}'),
+ ('\u{1a7f}', '\u{1a7f}'),
+ ('\u{1ab0}', '\u{1ac0}'),
+ ('\u{1b00}', 'áŦ„'),
+ ('\u{1b34}', '᭄'),
+ ('\u{1b6b}', '\u{1b73}'),
+ ('\u{1b80}', 'ᮂ'),
+ ('Ꭵ', '\u{1bad}'),
+ ('\u{1be6}', 'á¯ŗ'),
+ ('á°¤', '\u{1c37}'),
+ ('\u{1cd0}', '\u{1cd2}'),
+ ('\u{1cd4}', '\u{1ce8}'),
+ ('\u{1ced}', '\u{1ced}'),
+ ('\u{1cf4}', '\u{1cf4}'),
+ ('áŗˇ', '\u{1cf9}'),
+ ('\u{1dc0}', '\u{1df9}'),
+ ('\u{1dfb}', '\u{1dff}'),
+ ('\u{200c}', '\u{200c}'),
+ ('\u{20d0}', '\u{20f0}'),
+ ('\u{2cef}', '\u{2cf1}'),
+ ('\u{2d7f}', '\u{2d7f}'),
+ ('\u{2de0}', '\u{2dff}'),
+ ('\u{302a}', '\u{302f}'),
+ ('\u{3099}', '\u{309a}'),
+ ('\u{a66f}', '\u{a672}'),
+ ('\u{a674}', '\u{a67d}'),
+ ('\u{a69e}', '\u{a69f}'),
+ ('\u{a6f0}', '\u{a6f1}'),
+ ('\u{a802}', '\u{a802}'),
+ ('\u{a806}', '\u{a806}'),
+ ('\u{a80b}', '\u{a80b}'),
+ ('ę Ŗ', 'ę §'),
+ ('\u{a82c}', '\u{a82c}'),
+ ('ęĸ€', 'ęĸ'),
+ ('ęĸ´', '\u{a8c5}'),
+ ('\u{a8e0}', '\u{a8f1}'),
+ ('\u{a8ff}', '\u{a8ff}'),
+ ('\u{a926}', '\u{a92d}'),
+ ('\u{a947}', 'ęĨ“'),
+ ('\u{a980}', 'ęĻƒ'),
+ ('\u{a9b3}', '꧀'),
+ ('\u{a9e5}', '\u{a9e5}'),
+ ('\u{aa29}', '\u{aa36}'),
+ ('\u{aa43}', '\u{aa43}'),
+ ('\u{aa4c}', 'ꩍ'),
+ ('ęŠģ', 'ęŠŊ'),
+ ('\u{aab0}', '\u{aab0}'),
+ ('\u{aab2}', '\u{aab4}'),
+ ('\u{aab7}', '\u{aab8}'),
+ ('\u{aabe}', '\u{aabf}'),
+ ('\u{aac1}', '\u{aac1}'),
+ ('ęĢĢ', 'ęĢ¯'),
+ ('ęĢĩ', '\u{aaf6}'),
+ ('ę¯Ŗ', 'ę¯Ē'),
+ ('ę¯Ŧ', '\u{abed}'),
+ ('\u{fb1e}', '\u{fb1e}'),
+ ('\u{fe00}', '\u{fe0f}'),
+ ('\u{fe20}', '\u{fe2f}'),
+ ('\u{ff9e}', '\u{ff9f}'),
+ ('\u{101fd}', '\u{101fd}'),
+ ('\u{102e0}', '\u{102e0}'),
+ ('\u{10376}', '\u{1037a}'),
+ ('\u{10a01}', '\u{10a03}'),
+ ('\u{10a05}', '\u{10a06}'),
+ ('\u{10a0c}', '\u{10a0f}'),
+ ('\u{10a38}', '\u{10a3a}'),
+ ('\u{10a3f}', '\u{10a3f}'),
+ ('\u{10ae5}', '\u{10ae6}'),
+ ('\u{10d24}', '\u{10d27}'),
+ ('\u{10eab}', '\u{10eac}'),
+ ('\u{10f46}', '\u{10f50}'),
+ ('𑀀', '𑀂'),
+ ('\u{11038}', '\u{11046}'),
+ ('\u{1107f}', '𑂂'),
+ ('𑂰', '\u{110ba}'),
+ ('\u{11100}', '\u{11102}'),
+ ('\u{11127}', '\u{11134}'),
+ ('𑅅', '𑅆'),
+ ('\u{11173}', '\u{11173}'),
+ ('\u{11180}', '𑆂'),
+ ('đ‘†ŗ', '𑇀'),
+ ('\u{111c9}', '\u{111cc}'),
+ ('\u{111ce}', '\u{111cf}'),
+ ('đ‘ˆŦ', '\u{11237}'),
+ ('\u{1123e}', '\u{1123e}'),
+ ('\u{112df}', '\u{112ea}'),
+ ('\u{11300}', '𑌃'),
+ ('\u{1133b}', '\u{1133c}'),
+ ('\u{1133e}', '𑍄'),
+ ('𑍇', '𑍈'),
+ ('𑍋', '𑍍'),
+ ('\u{11357}', '\u{11357}'),
+ ('đ‘ĸ', 'đ‘Ŗ'),
+ ('\u{11366}', '\u{1136c}'),
+ ('\u{11370}', '\u{11374}'),
+ ('đ‘ĩ', '\u{11446}'),
+ ('\u{1145e}', '\u{1145e}'),
+ ('\u{114b0}', '\u{114c3}'),
+ ('\u{115af}', '\u{115b5}'),
+ ('𑖸', '\u{115c0}'),
+ ('\u{115dc}', '\u{115dd}'),
+ ('𑘰', '\u{11640}'),
+ ('\u{116ab}', '\u{116b7}'),
+ ('\u{1171d}', '\u{1172b}'),
+ ('đ‘ Ŧ', '\u{1183a}'),
+ ('\u{11930}', '\u{11935}'),
+ ('\u{11937}', '\u{11938}'),
+ ('\u{1193b}', '\u{1193e}'),
+ ('\u{11940}', '\u{11940}'),
+ ('\u{11942}', '\u{11943}'),
+ ('𑧑', '\u{119d7}'),
+ ('\u{119da}', '\u{119e0}'),
+ ('𑧤', '𑧤'),
+ ('\u{11a01}', '\u{11a0a}'),
+ ('\u{11a33}', '𑨹'),
+ ('\u{11a3b}', '\u{11a3e}'),
+ ('\u{11a47}', '\u{11a47}'),
+ ('\u{11a51}', '\u{11a5b}'),
+ ('\u{11a8a}', '\u{11a99}'),
+ ('đ‘°¯', '\u{11c36}'),
+ ('\u{11c38}', '\u{11c3f}'),
+ ('\u{11c92}', '\u{11ca7}'),
+ ('𑲩', '\u{11cb6}'),
+ ('\u{11d31}', '\u{11d36}'),
+ ('\u{11d3a}', '\u{11d3a}'),
+ ('\u{11d3c}', '\u{11d3d}'),
+ ('\u{11d3f}', '\u{11d45}'),
+ ('\u{11d47}', '\u{11d47}'),
+ ('đ‘ļŠ', 'đ‘ļŽ'),
+ ('\u{11d90}', '\u{11d91}'),
+ ('đ‘ļ“', '\u{11d97}'),
+ ('\u{11ef3}', 'đ‘ģļ'),
+ ('\u{16af0}', '\u{16af4}'),
+ ('\u{16b30}', '\u{16b36}'),
+ ('\u{16f4f}', '\u{16f4f}'),
+ ('đ–Ŋ‘', '𖾇'),
+ ('\u{16f8f}', '\u{16f92}'),
+ ('\u{16fe4}', '\u{16fe4}'),
+ ('\u{16ff0}', '\u{16ff1}'),
+ ('\u{1bc9d}', '\u{1bc9e}'),
+ ('\u{1d165}', '\u{1d169}'),
+ ('𝅭', '\u{1d172}'),
+ ('\u{1d17b}', '\u{1d182}'),
+ ('\u{1d185}', '\u{1d18b}'),
+ ('\u{1d1aa}', '\u{1d1ad}'),
+ ('\u{1d242}', '\u{1d244}'),
+ ('\u{1da00}', '\u{1da36}'),
+ ('\u{1da3b}', '\u{1da6c}'),
+ ('\u{1da75}', '\u{1da75}'),
+ ('\u{1da84}', '\u{1da84}'),
+ ('\u{1da9b}', '\u{1da9f}'),
+ ('\u{1daa1}', '\u{1daaf}'),
+ ('\u{1e000}', '\u{1e006}'),
+ ('\u{1e008}', '\u{1e018}'),
+ ('\u{1e01b}', '\u{1e021}'),
+ ('\u{1e023}', '\u{1e024}'),
+ ('\u{1e026}', '\u{1e02a}'),
+ ('\u{1e130}', '\u{1e136}'),
+ ('\u{1e2ec}', '\u{1e2ef}'),
+ ('\u{1e8d0}', '\u{1e8d6}'),
+ ('\u{1e944}', '\u{1e94a}'),
+ ('đŸģ', 'đŸŋ'),
+ ('\u{e0020}', '\u{e007f}'),
+ ('\u{e0100}', '\u{e01ef}'),
+];
+
+pub const EXTENDNUMLET: &'static [(char, char)] = &[
+ ('_', '_'),
+ ('\u{202f}', '\u{202f}'),
+ ('â€ŋ', '⁀'),
+ ('⁔', '⁔'),
+ ('ī¸ŗ', 'ī¸´'),
+ ('īš', 'īš'),
+ ('īŧŋ', 'īŧŋ'),
+];
+
+pub const FORMAT: &'static [(char, char)] = &[
+ ('\u{ad}', '\u{ad}'),
+ ('\u{600}', '\u{605}'),
+ ('\u{61c}', '\u{61c}'),
+ ('\u{6dd}', '\u{6dd}'),
+ ('\u{70f}', '\u{70f}'),
+ ('\u{8e2}', '\u{8e2}'),
+ ('\u{180e}', '\u{180e}'),
+ ('\u{200e}', '\u{200f}'),
+ ('\u{202a}', '\u{202e}'),
+ ('\u{2060}', '\u{2064}'),
+ ('\u{2066}', '\u{206f}'),
+ ('\u{feff}', '\u{feff}'),
+ ('\u{fff9}', '\u{fffb}'),
+ ('\u{110bd}', '\u{110bd}'),
+ ('\u{110cd}', '\u{110cd}'),
+ ('\u{13430}', '\u{13438}'),
+ ('\u{1bca0}', '\u{1bca3}'),
+ ('\u{1d173}', '\u{1d17a}'),
+ ('\u{e0001}', '\u{e0001}'),
+];
+
+pub const HEBREW_LETTER: &'static [(char, char)] = &[
+ ('א', '×Ē'),
+ ('ׯ', 'ײ'),
+ ('īŦ', 'īŦ'),
+ ('īŦŸ', 'īŦ¨'),
+ ('īŦĒ', 'īŦļ'),
+ ('īŦ¸', 'īŦŧ'),
+ ('īŦž', 'īŦž'),
+ ('ī­€', 'ī­'),
+ ('ī­ƒ', 'ī­„'),
+ ('ī­†', 'ī­'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+ ('ã€ą', 'ã€ĩ'),
+ ('゛', '゜'),
+ ('゠', 'ãƒē'),
+ ('ãƒŧ', 'ãƒŋ'),
+ ('ㇰ', 'ã‡ŋ'),
+ ('㋐', 'ã‹ž'),
+ ('㌀', '㍗'),
+ ('īŊĻ', 'īž'),
+ ('𛀀', '𛀀'),
+ ('𛅤', '𛅧'),
+];
+
+pub const LF: &'static [(char, char)] = &[('\n', '\n')];
+
+pub const MIDLETTER: &'static [(char, char)] = &[
+ (':', ':'),
+ ('¡', '¡'),
+ ('·', '·'),
+ ('՟', '՟'),
+ ('×´', '×´'),
+ ('‧', '‧'),
+ ('ī¸“', 'ī¸“'),
+ ('īš•', 'īš•'),
+ ('īŧš', 'īŧš'),
+];
+
+pub const MIDNUM: &'static [(char, char)] = &[
+ (',', ','),
+ (';', ';'),
+ ('Íž', 'Íž'),
+ ('։', '։'),
+ ('،', '؍'),
+ ('ŲŦ', 'ŲŦ'),
+ ('߸', '߸'),
+ ('⁄', '⁄'),
+ ('ī¸', 'ī¸'),
+ ('ī¸”', 'ī¸”'),
+ ('īš', 'īš'),
+ ('īš”', 'īš”'),
+ ('īŧŒ', 'īŧŒ'),
+ ('īŧ›', 'īŧ›'),
+];
+
+pub const MIDNUMLET: &'static [(char, char)] = &[
+ ('.', '.'),
+ ('‘', '’'),
+ ('․', '․'),
+ ('īš’', 'īš’'),
+ ('īŧ‡', 'īŧ‡'),
+ ('īŧŽ', 'īŧŽ'),
+];
+
+pub const NEWLINE: &'static [(char, char)] =
+ &[('\u{b}', '\u{c}'), ('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}')];
+
+pub const NUMERIC: &'static [(char, char)] = &[
+ ('0', '9'),
+ ('Ų ', 'ŲŠ'),
+ ('ŲĢ', 'ŲĢ'),
+ ('Û°', 'Ûš'),
+ ('߀', '߉'),
+ ('āĨĻ', 'āĨ¯'),
+ ('ā§Ļ', 'ā§¯'),
+ ('āŠĻ', 'āŠ¯'),
+ ('āĢĻ', 'āĢ¯'),
+ ('ā­Ļ', 'ā­¯'),
+ ('ā¯Ļ', 'ā¯¯'),
+ ('āąĻ', 'āą¯'),
+ ('āŗĻ', 'āŗ¯'),
+ ('āĩĻ', 'āĩ¯'),
+ ('āˇĻ', 'āˇ¯'),
+ ('āš', 'āš™'),
+ ('āģ', 'āģ™'),
+ ('āŧ ', 'āŧŠ'),
+ ('၀', '၉'),
+ ('႐', '႙'),
+ ('០', '៩'),
+ ('᠐', '᠙'),
+ ('áĨ†', 'áĨ'),
+ ('᧐', '᧙'),
+ ('áĒ€', 'áĒ‰'),
+ ('áĒ', 'áĒ™'),
+ ('᭐', '᭙'),
+ ('Ꮀ', '᎚'),
+ ('᱀', '᱉'),
+ ('᱐', '᱙'),
+ ('꘠', 'ꘊ'),
+ ('ęŖ', 'ęŖ™'),
+ ('꤀', '꤉'),
+ ('꧐', '꧙'),
+ ('꧰', '꧚'),
+ ('꩐', '꩙'),
+ ('ę¯°', 'ę¯š'),
+ ('īŧ', 'īŧ™'),
+ ('𐒠', '𐒩'),
+ ('𐴰', '𐴚'),
+ ('đ‘Ļ', 'đ‘¯'),
+ ('𑃰', '𑃹'),
+ ('đ‘„ļ', 'đ‘„ŋ'),
+ ('𑇐', '𑇙'),
+ ('𑋰', '𑋹'),
+ ('𑑐', '𑑙'),
+ ('𑓐', '𑓙'),
+ ('𑙐', '𑙙'),
+ ('𑛀', '𑛉'),
+ ('𑜰', '𑜹'),
+ ('đ‘Ŗ ', 'đ‘ŖŠ'),
+ ('\u{11950}', '\u{11959}'),
+ ('𑱐', '𑱙'),
+ ('đ‘ĩ', 'đ‘ĩ™'),
+ ('đ‘ļ ', 'đ‘ļŠ'),
+ ('𖩠', '𖩩'),
+ ('𖭐', '𖭙'),
+ ('𝟎', 'đŸŋ'),
+ ('𞅀', '𞅉'),
+ ('𞋰', '𞋹'),
+ ('đžĨ', 'đžĨ™'),
+ ('\u{1fbf0}', '\u{1fbf9}'),
+];
+
+pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('đŸ‡Ļ', 'đŸ‡ŋ')];
+
+pub const SINGLE_QUOTE: &'static [(char, char)] = &[('\'', '\'')];
+
+pub const WSEGSPACE: &'static [(char, char)] = &[
+ (' ', ' '),
+ ('\u{1680}', '\u{1680}'),
+ ('\u{2000}', '\u{2006}'),
+ ('\u{2008}', '\u{200a}'),
+ ('\u{205f}', '\u{205f}'),
+ ('\u{3000}', '\u{3000}'),
+];
+
+pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')];
diff --git a/vendor/regex-syntax/src/utf8.rs b/vendor/regex-syntax/src/utf8.rs
new file mode 100644
index 000000000..dc055033e
--- /dev/null
+++ b/vendor/regex-syntax/src/utf8.rs
@@ -0,0 +1,587 @@
+/*!
+Converts ranges of Unicode scalar values to equivalent ranges of UTF-8 bytes.
+
+This is sub-module is useful for constructing byte based automatons that need
+to embed UTF-8 decoding. The most common use of this module is in conjunction
+with the [`hir::ClassUnicodeRange`](../hir/struct.ClassUnicodeRange.html) type.
+
+See the documentation on the `Utf8Sequences` iterator for more details and
+an example.
+
+# Wait, what is this?
+
+This is simplest to explain with an example. Let's say you wanted to test
+whether a particular byte sequence was a Cyrillic character. One possible
+scalar value range is `[0400-04FF]`. The set of allowed bytes for this
+range can be expressed as a sequence of byte ranges:
+
+```text
+[D0-D3][80-BF]
+```
+
+This is simple enough: simply encode the boundaries, `0400` encodes to
+`D0 80` and `04FF` encodes to `D3 BF`, and create ranges from each
+corresponding pair of bytes: `D0` to `D3` and `80` to `BF`.
+
+However, what if you wanted to add the Cyrillic Supplementary characters to
+your range? Your range might then become `[0400-052F]`. The same procedure
+as above doesn't quite work because `052F` encodes to `D4 AF`. The byte ranges
+you'd get from the previous transformation would be `[D0-D4][80-AF]`. However,
+this isn't quite correct because this range doesn't capture many characters,
+for example, `04FF` (because its last byte, `BF` isn't in the range `80-AF`).
+
+Instead, you need multiple sequences of byte ranges:
+
+```text
+[D0-D3][80-BF] # matches codepoints 0400-04FF
+[D4][80-AF] # matches codepoints 0500-052F
+```
+
+This gets even more complicated if you want bigger ranges, particularly if
+they naively contain surrogate codepoints. For example, the sequence of byte
+ranges for the basic multilingual plane (`[0000-FFFF]`) look like this:
+
+```text
+[0-7F]
+[C2-DF][80-BF]
+[E0][A0-BF][80-BF]
+[E1-EC][80-BF][80-BF]
+[ED][80-9F][80-BF]
+[EE-EF][80-BF][80-BF]
+```
+
+Note that the byte ranges above will *not* match any erroneous encoding of
+UTF-8, including encodings of surrogate codepoints.
+
+And, of course, for all of Unicode (`[000000-10FFFF]`):
+
+```text
+[0-7F]
+[C2-DF][80-BF]
+[E0][A0-BF][80-BF]
+[E1-EC][80-BF][80-BF]
+[ED][80-9F][80-BF]
+[EE-EF][80-BF][80-BF]
+[F0][90-BF][80-BF][80-BF]
+[F1-F3][80-BF][80-BF][80-BF]
+[F4][80-8F][80-BF][80-BF]
+```
+
+This module automates the process of creating these byte ranges from ranges of
+Unicode scalar values.
+
+# Lineage
+
+I got the idea and general implementation strategy from Russ Cox in his
+[article on regexps](https://web.archive.org/web/20160404141123/https://swtch.com/~rsc/regexp/regexp3.html) and RE2.
+Russ Cox got it from Ken Thompson's `grep` (no source, folk lore?).
+I also got the idea from
+[Lucene](https://github.com/apache/lucene-solr/blob/ae93f4e7ac6a3908046391de35d4f50a0d3c59ca/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java),
+which uses it for executing automata on their term index.
+*/
+
+#![deny(missing_docs)]
+
+use std::char;
+use std::fmt;
+use std::iter::FusedIterator;
+use std::slice;
+
+const MAX_UTF8_BYTES: usize = 4;
+
+/// Utf8Sequence represents a sequence of byte ranges.
+///
+/// To match a Utf8Sequence, a candidate byte sequence must match each
+/// successive range.
+///
+/// For example, if there are two ranges, `[C2-DF][80-BF]`, then the byte
+/// sequence `\xDD\x61` would not match because `0x61 < 0x80`.
+#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
+pub enum Utf8Sequence {
+ /// One byte range.
+ One(Utf8Range),
+ /// Two successive byte ranges.
+ Two([Utf8Range; 2]),
+ /// Three successive byte ranges.
+ Three([Utf8Range; 3]),
+ /// Four successive byte ranges.
+ Four([Utf8Range; 4]),
+}
+
+impl Utf8Sequence {
+ /// Creates a new UTF-8 sequence from the encoded bytes of a scalar value
+ /// range.
+ ///
+ /// This assumes that `start` and `end` have the same length.
+ fn from_encoded_range(start: &[u8], end: &[u8]) -> Self {
+ assert_eq!(start.len(), end.len());
+ match start.len() {
+ 2 => Utf8Sequence::Two([
+ Utf8Range::new(start[0], end[0]),
+ Utf8Range::new(start[1], end[1]),
+ ]),
+ 3 => Utf8Sequence::Three([
+ Utf8Range::new(start[0], end[0]),
+ Utf8Range::new(start[1], end[1]),
+ Utf8Range::new(start[2], end[2]),
+ ]),
+ 4 => Utf8Sequence::Four([
+ Utf8Range::new(start[0], end[0]),
+ Utf8Range::new(start[1], end[1]),
+ Utf8Range::new(start[2], end[2]),
+ Utf8Range::new(start[3], end[3]),
+ ]),
+ n => unreachable!("invalid encoded length: {}", n),
+ }
+ }
+
+ /// Returns the underlying sequence of byte ranges as a slice.
+ pub fn as_slice(&self) -> &[Utf8Range] {
+ use self::Utf8Sequence::*;
+ match *self {
+ One(ref r) => slice::from_ref(r),
+ Two(ref r) => &r[..],
+ Three(ref r) => &r[..],
+ Four(ref r) => &r[..],
+ }
+ }
+
+ /// Returns the number of byte ranges in this sequence.
+ ///
+ /// The length is guaranteed to be in the closed interval `[1, 4]`.
+ pub fn len(&self) -> usize {
+ self.as_slice().len()
+ }
+
+ /// Reverses the ranges in this sequence.
+ ///
+ /// For example, if this corresponds to the following sequence:
+ ///
+ /// ```text
+ /// [D0-D3][80-BF]
+ /// ```
+ ///
+ /// Then after reversal, it will be
+ ///
+ /// ```text
+ /// [80-BF][D0-D3]
+ /// ```
+ ///
+ /// This is useful when one is constructing a UTF-8 automaton to match
+ /// character classes in reverse.
+ pub fn reverse(&mut self) {
+ match *self {
+ Utf8Sequence::One(_) => {}
+ Utf8Sequence::Two(ref mut x) => x.reverse(),
+ Utf8Sequence::Three(ref mut x) => x.reverse(),
+ Utf8Sequence::Four(ref mut x) => x.reverse(),
+ }
+ }
+
+ /// Returns true if and only if a prefix of `bytes` matches this sequence
+ /// of byte ranges.
+ pub fn matches(&self, bytes: &[u8]) -> bool {
+ if bytes.len() < self.len() {
+ return false;
+ }
+ for (&b, r) in bytes.iter().zip(self) {
+ if !r.matches(b) {
+ return false;
+ }
+ }
+ true
+ }
+}
+
+impl<'a> IntoIterator for &'a Utf8Sequence {
+ type IntoIter = slice::Iter<'a, Utf8Range>;
+ type Item = &'a Utf8Range;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.as_slice().into_iter()
+ }
+}
+
+impl fmt::Debug for Utf8Sequence {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ use self::Utf8Sequence::*;
+ match *self {
+ One(ref r) => write!(f, "{:?}", r),
+ Two(ref r) => write!(f, "{:?}{:?}", r[0], r[1]),
+ Three(ref r) => write!(f, "{:?}{:?}{:?}", r[0], r[1], r[2]),
+ Four(ref r) => {
+ write!(f, "{:?}{:?}{:?}{:?}", r[0], r[1], r[2], r[3])
+ }
+ }
+ }
+}
+
+/// A single inclusive range of UTF-8 bytes.
+#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
+pub struct Utf8Range {
+ /// Start of byte range (inclusive).
+ pub start: u8,
+ /// End of byte range (inclusive).
+ pub end: u8,
+}
+
+impl Utf8Range {
+ fn new(start: u8, end: u8) -> Self {
+ Utf8Range { start, end }
+ }
+
+ /// Returns true if and only if the given byte is in this range.
+ pub fn matches(&self, b: u8) -> bool {
+ self.start <= b && b <= self.end
+ }
+}
+
+impl fmt::Debug for Utf8Range {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ if self.start == self.end {
+ write!(f, "[{:X}]", self.start)
+ } else {
+ write!(f, "[{:X}-{:X}]", self.start, self.end)
+ }
+ }
+}
+
+/// An iterator over ranges of matching UTF-8 byte sequences.
+///
+/// The iteration represents an alternation of comprehensive byte sequences
+/// that match precisely the set of UTF-8 encoded scalar values.
+///
+/// A byte sequence corresponds to one of the scalar values in the range given
+/// if and only if it completely matches exactly one of the sequences of byte
+/// ranges produced by this iterator.
+///
+/// Each sequence of byte ranges matches a unique set of bytes. That is, no two
+/// sequences will match the same bytes.
+///
+/// # Example
+///
+/// This shows how to match an arbitrary byte sequence against a range of
+/// scalar values.
+///
+/// ```rust
+/// use regex_syntax::utf8::{Utf8Sequences, Utf8Sequence};
+///
+/// fn matches(seqs: &[Utf8Sequence], bytes: &[u8]) -> bool {
+/// for range in seqs {
+/// if range.matches(bytes) {
+/// return true;
+/// }
+/// }
+/// false
+/// }
+///
+/// // Test the basic multilingual plane.
+/// let seqs: Vec<_> = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect();
+///
+/// // UTF-8 encoding of 'a'.
+/// assert!(matches(&seqs, &[0x61]));
+/// // UTF-8 encoding of '☃' (`\u{2603}`).
+/// assert!(matches(&seqs, &[0xE2, 0x98, 0x83]));
+/// // UTF-8 encoding of `\u{10348}` (outside the BMP).
+/// assert!(!matches(&seqs, &[0xF0, 0x90, 0x8D, 0x88]));
+/// // Tries to match against a UTF-8 encoding of a surrogate codepoint,
+/// // which is invalid UTF-8, and therefore fails, despite the fact that
+/// // the corresponding codepoint (0xD800) falls in the range given.
+/// assert!(!matches(&seqs, &[0xED, 0xA0, 0x80]));
+/// // And fails against plain old invalid UTF-8.
+/// assert!(!matches(&seqs, &[0xFF, 0xFF]));
+/// ```
+///
+/// If this example seems circuitous, that's because it is! It's meant to be
+/// illustrative. In practice, you could just try to decode your byte sequence
+/// and compare it with the scalar value range directly. However, this is not
+/// always possible (for example, in a byte based automaton).
+#[derive(Debug)]
+pub struct Utf8Sequences {
+ range_stack: Vec<ScalarRange>,
+}
+
+impl Utf8Sequences {
+ /// Create a new iterator over UTF-8 byte ranges for the scalar value range
+ /// given.
+ pub fn new(start: char, end: char) -> Self {
+ let mut it = Utf8Sequences { range_stack: vec![] };
+ it.push(start as u32, end as u32);
+ it
+ }
+
+ /// reset resets the scalar value range.
+ /// Any existing state is cleared, but resources may be reused.
+ ///
+ /// N.B. Benchmarks say that this method is dubious.
+ #[doc(hidden)]
+ pub fn reset(&mut self, start: char, end: char) {
+ self.range_stack.clear();
+ self.push(start as u32, end as u32);
+ }
+
+ fn push(&mut self, start: u32, end: u32) {
+ self.range_stack.push(ScalarRange { start, end });
+ }
+}
+
+struct ScalarRange {
+ start: u32,
+ end: u32,
+}
+
+impl fmt::Debug for ScalarRange {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "ScalarRange({:X}, {:X})", self.start, self.end)
+ }
+}
+
+impl Iterator for Utf8Sequences {
+ type Item = Utf8Sequence;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ 'TOP: while let Some(mut r) = self.range_stack.pop() {
+ 'INNER: loop {
+ if let Some((r1, r2)) = r.split() {
+ self.push(r2.start, r2.end);
+ r.start = r1.start;
+ r.end = r1.end;
+ continue 'INNER;
+ }
+ if !r.is_valid() {
+ continue 'TOP;
+ }
+ for i in 1..MAX_UTF8_BYTES {
+ let max = max_scalar_value(i);
+ if r.start <= max && max < r.end {
+ self.push(max + 1, r.end);
+ r.end = max;
+ continue 'INNER;
+ }
+ }
+ if let Some(ascii_range) = r.as_ascii() {
+ return Some(Utf8Sequence::One(ascii_range));
+ }
+ for i in 1..MAX_UTF8_BYTES {
+ let m = (1 << (6 * i)) - 1;
+ if (r.start & !m) != (r.end & !m) {
+ if (r.start & m) != 0 {
+ self.push((r.start | m) + 1, r.end);
+ r.end = r.start | m;
+ continue 'INNER;
+ }
+ if (r.end & m) != m {
+ self.push(r.end & !m, r.end);
+ r.end = (r.end & !m) - 1;
+ continue 'INNER;
+ }
+ }
+ }
+ let mut start = [0; MAX_UTF8_BYTES];
+ let mut end = [0; MAX_UTF8_BYTES];
+ let n = r.encode(&mut start, &mut end);
+ return Some(Utf8Sequence::from_encoded_range(
+ &start[0..n],
+ &end[0..n],
+ ));
+ }
+ }
+ None
+ }
+}
+
+impl FusedIterator for Utf8Sequences {}
+
+impl ScalarRange {
+ /// split splits this range if it overlaps with a surrogate codepoint.
+ ///
+ /// Either or both ranges may be invalid.
+ fn split(&self) -> Option<(ScalarRange, ScalarRange)> {
+ if self.start < 0xE000 && self.end > 0xD7FF {
+ Some((
+ ScalarRange { start: self.start, end: 0xD7FF },
+ ScalarRange { start: 0xE000, end: self.end },
+ ))
+ } else {
+ None
+ }
+ }
+
+ /// is_valid returns true if and only if start <= end.
+ fn is_valid(&self) -> bool {
+ self.start <= self.end
+ }
+
+ /// as_ascii returns this range as a Utf8Range if and only if all scalar
+ /// values in this range can be encoded as a single byte.
+ fn as_ascii(&self) -> Option<Utf8Range> {
+ if self.is_ascii() {
+ Some(Utf8Range::new(self.start as u8, self.end as u8))
+ } else {
+ None
+ }
+ }
+
+ /// is_ascii returns true if the range is ASCII only (i.e., takes a single
+ /// byte to encode any scalar value).
+ fn is_ascii(&self) -> bool {
+ self.is_valid() && self.end <= 0x7f
+ }
+
+ /// encode writes the UTF-8 encoding of the start and end of this range
+ /// to the corresponding destination slices, and returns the number of
+ /// bytes written.
+ ///
+ /// The slices should have room for at least `MAX_UTF8_BYTES`.
+ fn encode(&self, start: &mut [u8], end: &mut [u8]) -> usize {
+ let cs = char::from_u32(self.start).unwrap();
+ let ce = char::from_u32(self.end).unwrap();
+ let ss = cs.encode_utf8(start);
+ let se = ce.encode_utf8(end);
+ assert_eq!(ss.len(), se.len());
+ ss.len()
+ }
+}
+
+fn max_scalar_value(nbytes: usize) -> u32 {
+ match nbytes {
+ 1 => 0x007F,
+ 2 => 0x07FF,
+ 3 => 0xFFFF,
+ 4 => 0x10FFFF,
+ _ => unreachable!("invalid UTF-8 byte sequence size"),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::char;
+
+ use crate::utf8::{Utf8Range, Utf8Sequences};
+
+ fn rutf8(s: u8, e: u8) -> Utf8Range {
+ Utf8Range::new(s, e)
+ }
+
+ fn never_accepts_surrogate_codepoints(start: char, end: char) {
+ for cp in 0xD800..0xE000 {
+ let buf = encode_surrogate(cp);
+ for r in Utf8Sequences::new(start, end) {
+ if r.matches(&buf) {
+ panic!(
+ "Sequence ({:X}, {:X}) contains range {:?}, \
+ which matches surrogate code point {:X} \
+ with encoded bytes {:?}",
+ start as u32, end as u32, r, cp, buf,
+ );
+ }
+ }
+ }
+ }
+
+ #[test]
+ fn codepoints_no_surrogates() {
+ never_accepts_surrogate_codepoints('\u{0}', '\u{FFFF}');
+ never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFF}');
+ never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFE}');
+ never_accepts_surrogate_codepoints('\u{80}', '\u{10FFFF}');
+ never_accepts_surrogate_codepoints('\u{D7FF}', '\u{E000}');
+ }
+
+ #[test]
+ fn single_codepoint_one_sequence() {
+ // Tests that every range of scalar values that contains a single
+ // scalar value is recognized by one sequence of byte ranges.
+ for i in 0x0..(0x10FFFF + 1) {
+ let c = match char::from_u32(i) {
+ None => continue,
+ Some(c) => c,
+ };
+ let seqs: Vec<_> = Utf8Sequences::new(c, c).collect();
+ assert_eq!(seqs.len(), 1);
+ }
+ }
+
+ #[test]
+ fn bmp() {
+ use crate::utf8::Utf8Sequence::*;
+
+ let seqs = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect::<Vec<_>>();
+ assert_eq!(
+ seqs,
+ vec![
+ One(rutf8(0x0, 0x7F)),
+ Two([rutf8(0xC2, 0xDF), rutf8(0x80, 0xBF)]),
+ Three([
+ rutf8(0xE0, 0xE0),
+ rutf8(0xA0, 0xBF),
+ rutf8(0x80, 0xBF)
+ ]),
+ Three([
+ rutf8(0xE1, 0xEC),
+ rutf8(0x80, 0xBF),
+ rutf8(0x80, 0xBF)
+ ]),
+ Three([
+ rutf8(0xED, 0xED),
+ rutf8(0x80, 0x9F),
+ rutf8(0x80, 0xBF)
+ ]),
+ Three([
+ rutf8(0xEE, 0xEF),
+ rutf8(0x80, 0xBF),
+ rutf8(0x80, 0xBF)
+ ]),
+ ]
+ );
+ }
+
+ #[test]
+ fn reverse() {
+ use crate::utf8::Utf8Sequence::*;
+
+ let mut s = One(rutf8(0xA, 0xB));
+ s.reverse();
+ assert_eq!(s.as_slice(), &[rutf8(0xA, 0xB)]);
+
+ let mut s = Two([rutf8(0xA, 0xB), rutf8(0xB, 0xC)]);
+ s.reverse();
+ assert_eq!(s.as_slice(), &[rutf8(0xB, 0xC), rutf8(0xA, 0xB)]);
+
+ let mut s = Three([rutf8(0xA, 0xB), rutf8(0xB, 0xC), rutf8(0xC, 0xD)]);
+ s.reverse();
+ assert_eq!(
+ s.as_slice(),
+ &[rutf8(0xC, 0xD), rutf8(0xB, 0xC), rutf8(0xA, 0xB)]
+ );
+
+ let mut s = Four([
+ rutf8(0xA, 0xB),
+ rutf8(0xB, 0xC),
+ rutf8(0xC, 0xD),
+ rutf8(0xD, 0xE),
+ ]);
+ s.reverse();
+ assert_eq!(
+ s.as_slice(),
+ &[
+ rutf8(0xD, 0xE),
+ rutf8(0xC, 0xD),
+ rutf8(0xB, 0xC),
+ rutf8(0xA, 0xB)
+ ]
+ );
+ }
+
+ fn encode_surrogate(cp: u32) -> [u8; 3] {
+ const TAG_CONT: u8 = 0b1000_0000;
+ const TAG_THREE_B: u8 = 0b1110_0000;
+
+ assert!(0xD800 <= cp && cp < 0xE000);
+ let mut dst = [0; 3];
+ dst[0] = (cp >> 12 & 0x0F) as u8 | TAG_THREE_B;
+ dst[1] = (cp >> 6 & 0x3F) as u8 | TAG_CONT;
+ dst[2] = (cp & 0x3F) as u8 | TAG_CONT;
+ dst
+ }
+}