diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 17:47:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 17:47:50 +0000 |
commit | bbe35a6e1b54ef5cd7c1c471886c30ba85c0804e (patch) | |
tree | 985a31e8c860c690d9f20e6621ce5fcc05ccd244 /lib/unicase | |
parent | Initial commit. (diff) | |
download | wget-bbe35a6e1b54ef5cd7c1c471886c30ba85c0804e.tar.xz wget-bbe35a6e1b54ef5cd7c1c471886c30ba85c0804e.zip |
Adding upstream version 1.21.upstream/1.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | lib/unicase.in.h | 465 | ||||
-rw-r--r-- | lib/unicase/cased.c | 53 | ||||
-rw-r--r-- | lib/unicase/cased.h | 367 | ||||
-rw-r--r-- | lib/unicase/caseprop.h | 34 | ||||
-rw-r--r-- | lib/unicase/context.h | 65 | ||||
-rw-r--r-- | lib/unicase/empty-prefix-context.c | 27 | ||||
-rw-r--r-- | lib/unicase/empty-suffix-context.c | 27 | ||||
-rw-r--r-- | lib/unicase/ignorable.c | 71 | ||||
-rw-r--r-- | lib/unicase/ignorable.h | 600 | ||||
-rw-r--r-- | lib/unicase/simple-mapping.h | 39 | ||||
-rw-r--r-- | lib/unicase/special-casing-table.gperf | 136 | ||||
-rw-r--r-- | lib/unicase/special-casing-table.h | 333 | ||||
-rw-r--r-- | lib/unicase/special-casing.c | 25 | ||||
-rw-r--r-- | lib/unicase/special-casing.in.h | 61 | ||||
-rw-r--r-- | lib/unicase/tolower.c | 27 | ||||
-rw-r--r-- | lib/unicase/tolower.h | 647 | ||||
-rw-r--r-- | lib/unicase/u-casemap.h | 416 | ||||
-rw-r--r-- | lib/unicase/u8-casemap.c | 40 | ||||
-rw-r--r-- | lib/unicase/u8-tolower.c | 120 | ||||
-rw-r--r-- | lib/unicase/unicasemap.h | 52 |
20 files changed, 3605 insertions, 0 deletions
diff --git a/lib/unicase.in.h b/lib/unicase.in.h new file mode 100644 index 0000000..c11533a --- /dev/null +++ b/lib/unicase.in.h @@ -0,0 +1,465 @@ +/* Unicode character case mappings. + Copyright (C) 2002, 2009-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#ifndef _UNICASE_H +#define _UNICASE_H + +#include "unitypes.h" + +/* Get bool. */ +#include <stdbool.h> + +/* Get size_t. */ +#include <stddef.h> + +/* Get uninorm_t. */ +#include "uninorm.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* ========================================================================= */ + +/* Character case mappings. + These mappings are locale and context independent. + WARNING! These functions are not sufficient for languages such as German. + Better use the functions below that treat an entire string at once and are + language aware. */ + +/* Return the uppercase mapping of a Unicode character. */ +extern ucs4_t + uc_toupper (ucs4_t uc) + _UC_ATTRIBUTE_CONST; + +/* Return the lowercase mapping of a Unicode character. */ +extern ucs4_t + uc_tolower (ucs4_t uc) + _UC_ATTRIBUTE_CONST; + +/* Return the titlecase mapping of a Unicode character. */ +extern ucs4_t + uc_totitle (ucs4_t uc) + _UC_ATTRIBUTE_CONST; + +/* ========================================================================= */ + +/* String case mappings. */ + +/* These functions are locale dependent. The iso639_language argument + identifies the language (e.g. "tr" for Turkish). NULL means to use + locale independent case mappings. */ + +/* Return the ISO 639 language code of the current locale. + Return "" if it is unknown, or in the "C" locale. */ +extern const char * + uc_locale_language (void) + _UC_ATTRIBUTE_PURE; + +/* Conventions: + + All functions prefixed with u8_ operate on UTF-8 encoded strings. + Their unit is an uint8_t (1 byte). + + All functions prefixed with u16_ operate on UTF-16 encoded strings. + Their unit is an uint16_t (a 2-byte word). + + All functions prefixed with u32_ operate on UCS-4 encoded strings. + Their unit is an uint32_t (a 4-byte word). + + All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly + n units. + + Functions returning a string result take a (resultbuf, lengthp) argument + pair. If resultbuf is not NULL and the result fits into *lengthp units, + it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly + allocated string is returned. In both cases, *lengthp is set to the + length (number of units) of the returned string. In case of error, + NULL is returned and errno is set. */ + +/* Return the uppercase mapping of a string. + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. */ +extern uint8_t * + u8_toupper (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint16_t * + u16_toupper (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint32_t * + u32_toupper (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); + +/* Return the lowercase mapping of a string. + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. */ +extern uint8_t * + u8_tolower (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint16_t * + u16_tolower (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint32_t * + u32_tolower (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); + +/* Return the titlecase mapping of a string. + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. */ +extern uint8_t * + u8_totitle (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint16_t * + u16_totitle (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint32_t * + u32_totitle (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); + +/* The case-mapping context given by a prefix string. */ +typedef struct casing_prefix_context + { + /* These fields are private, undocumented. */ + uint32_t last_char_except_ignorable; + uint32_t last_char_normal_or_above; + } + casing_prefix_context_t; +/* The case-mapping context of the empty prefix string. */ +extern const casing_prefix_context_t unicase_empty_prefix_context; +/* Return the case-mapping context of a given prefix string. */ +extern casing_prefix_context_t + u8_casing_prefix_context (const uint8_t *s, size_t n); +extern casing_prefix_context_t + u16_casing_prefix_context (const uint16_t *s, size_t n); +extern casing_prefix_context_t + u32_casing_prefix_context (const uint32_t *s, size_t n); +/* Return the case-mapping context of the prefix concat(A, S), given the + case-mapping context of the prefix A. */ +extern casing_prefix_context_t + u8_casing_prefixes_context (const uint8_t *s, size_t n, + casing_prefix_context_t a_context); +extern casing_prefix_context_t + u16_casing_prefixes_context (const uint16_t *s, size_t n, + casing_prefix_context_t a_context); +extern casing_prefix_context_t + u32_casing_prefixes_context (const uint32_t *s, size_t n, + casing_prefix_context_t a_context); + +/* The case-mapping context given by a suffix string. */ +typedef struct casing_suffix_context + { + /* These fields are private, undocumented. */ + uint32_t first_char_except_ignorable; + uint32_t bits; + } + casing_suffix_context_t; +/* The case-mapping context of the empty suffix string. */ +extern const casing_suffix_context_t unicase_empty_suffix_context; +/* Return the case-mapping context of a given suffix string. */ +extern casing_suffix_context_t + u8_casing_suffix_context (const uint8_t *s, size_t n); +extern casing_suffix_context_t + u16_casing_suffix_context (const uint16_t *s, size_t n); +extern casing_suffix_context_t + u32_casing_suffix_context (const uint32_t *s, size_t n); +/* Return the case-mapping context of the suffix concat(S, A), given the + case-mapping context of the suffix A. */ +extern casing_suffix_context_t + u8_casing_suffixes_context (const uint8_t *s, size_t n, + casing_suffix_context_t a_context); +extern casing_suffix_context_t + u16_casing_suffixes_context (const uint16_t *s, size_t n, + casing_suffix_context_t a_context); +extern casing_suffix_context_t + u32_casing_suffixes_context (const uint32_t *s, size_t n, + casing_suffix_context_t a_context); + +/* Return the uppercase mapping of a string that is surrounded by a prefix + and a suffix. */ +extern uint8_t * + u8_ct_toupper (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint16_t * + u16_ct_toupper (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint32_t * + u32_ct_toupper (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); + +/* Return the lowercase mapping of a string that is surrounded by a prefix + and a suffix. */ +extern uint8_t * + u8_ct_tolower (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint16_t * + u16_ct_tolower (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint32_t * + u32_ct_tolower (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); + +/* Return the titlecase mapping of a string that is surrounded by a prefix + and a suffix. */ +extern uint8_t * + u8_ct_totitle (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint16_t * + u16_ct_totitle (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint32_t * + u32_ct_totitle (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); + +/* Return the case folded string. + Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent + to comparing S1 and S2 with uN_casecmp(). + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. */ +extern uint8_t * + u8_casefold (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint16_t * + u16_casefold (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint32_t * + u32_casefold (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); +/* Likewise, for a string that is surrounded by a prefix and a suffix. */ +extern uint8_t * + u8_ct_casefold (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint16_t * + u16_ct_casefold (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); +extern uint32_t * + u32_ct_casefold (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); + +/* Compare S1 and S2, ignoring differences in case and normalization. + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. + If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and + return 0. Upon failure, return -1 with errno set. */ +extern int + u8_casecmp (const uint8_t *s1, size_t n1, + const uint8_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); +extern int + u16_casecmp (const uint16_t *s1, size_t n1, + const uint16_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); +extern int + u32_casecmp (const uint32_t *s1, size_t n1, + const uint32_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); +extern int + ulc_casecmp (const char *s1, size_t n1, + const char *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); + +/* Convert the string S of length N to a NUL-terminated byte sequence, in such + a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib + function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll(). + NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */ +extern char * + u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + char *_UC_RESTRICT resultbuf, size_t *lengthp); +extern char * + u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + char *_UC_RESTRICT resultbuf, size_t *lengthp); +extern char * + u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + char *_UC_RESTRICT resultbuf, size_t *lengthp); +extern char * + ulc_casexfrm (const char *s, size_t n, const char *iso639_language, + uninorm_t nf, + char *_UC_RESTRICT resultbuf, size_t *lengthp); + +/* Compare S1 and S2, ignoring differences in case and normalization, using the + collation rules of the current locale. + The nf argument identifies the normalization form to apply after the + case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. It can also + be NULL, for no normalization. + If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and + return 0. Upon failure, return -1 with errno set. */ +extern int + u8_casecoll (const uint8_t *s1, size_t n1, + const uint8_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); +extern int + u16_casecoll (const uint16_t *s1, size_t n1, + const uint16_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); +extern int + u32_casecoll (const uint32_t *s1, size_t n1, + const uint32_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); +extern int + ulc_casecoll (const char *s1, size_t n1, + const char *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); + + +/* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false + otherwise, and return 0. Upon failure, return -1 with errno set. */ +extern int + u8_is_uppercase (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u16_is_uppercase (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_uppercase (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + +/* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false + otherwise, and return 0. Upon failure, return -1 with errno set. */ +extern int + u8_is_lowercase (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u16_is_lowercase (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_lowercase (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + +/* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false + otherwise, and return 0. Upon failure, return -1 with errno set. */ +extern int + u8_is_titlecase (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u16_is_titlecase (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_titlecase (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + +/* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to + false otherwise, and return 0. Upon failure, return -1 with errno set. */ +extern int + u8_is_casefolded (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u16_is_casefolded (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_casefolded (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + +/* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to + either upper case or lower case or title case is not a no-op. + Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping, + under the lower case mapping, and under the title case mapping; in other + words, when NFD(S) consists entirely of caseless characters. + Upon failure, return -1 with errno set. */ +extern int + u8_is_cased (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u16_is_cased (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_cased (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + + +/* ========================================================================= */ + +#ifdef __cplusplus +} +#endif + +#endif /* _UNICASE_H */ diff --git a/lib/unicase/cased.c b/lib/unicase/cased.c new file mode 100644 index 0000000..e6d9231 --- /dev/null +++ b/lib/unicase/cased.c @@ -0,0 +1,53 @@ +/* Test whether a Unicode character is cased. + Copyright (C) 2002, 2006-2007, 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "caseprop.h" + +/* Quoting the Unicode standard: + Definition: A character is defined to be "cased" if it has the Lowercase + or Uppercase property or has a General_Category value of + Titlecase_Letter. */ + +#if 0 + +#include "unictype.h" + +bool +uc_is_cased (ucs4_t uc) +{ + return (uc_is_property_lowercase (uc) + || uc_is_property_uppercase (uc) + || uc_is_general_category (uc, UC_TITLECASE_LETTER)); +} + +#else + +#include "unictype/bitmap.h" + +/* Define u_casing_property_cased table. */ +#include "cased.h" + +bool +uc_is_cased (ucs4_t uc) +{ + return bitmap_lookup (&u_casing_property_cased, uc); +} + +#endif diff --git a/lib/unicase/cased.h b/lib/unicase/cased.h new file mode 100644 index 0000000..a488ec4 --- /dev/null +++ b/lib/unicase/cased.h @@ -0,0 +1,367 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Casing Properties of Unicode characters. */ +/* Generated automatically by gen-uni-tables.c for Unicode 9.0.0. */ +#define header_0 16 +#define header_2 9 +#define header_3 127 +#define header_4 15 +static const +struct + { + int header[1]; + int level1[2]; + short level2[2 << 7]; + unsigned int level3[21 << 4]; + } +u_casing_property_cased = +{ + { 2 }, + { + 3 * sizeof (int) / sizeof (short) + 0, + 3 * sizeof (int) / sizeof (short) + 128 + }, + { + 3 + 256 * sizeof (short) / sizeof (int) + 0, + 3 + 256 * sizeof (short) / sizeof (int) + 16, + 3 + 256 * sizeof (short) / sizeof (int) + 32, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 48, + 3 + 256 * sizeof (short) / sizeof (int) + 64, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 80, + 3 + 256 * sizeof (short) / sizeof (int) + 96, + 3 + 256 * sizeof (short) / sizeof (int) + 112, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 128, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 144, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 160, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 176, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 192, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 208, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 224, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 240, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 256, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 272, + 3 + 256 * sizeof (short) / sizeof (int) + 288, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 304, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 320, + -1, + -1, + -1, + -1, + -1, + -1, + -1 + }, + { + 0x00000000U, 0x00000000U, 0x07FFFFFEU, 0x07FFFFFEU, + 0x00000000U, 0x04200400U, 0xFF7FFFFFU, 0xFF7FFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xF7FFFFFFU, 0xFFFFFFF0U, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFEFFFFFU, 0x01FFFFFFU, 0x00000003U, 0x0000001FU, + 0x00000000U, 0x00000000U, 0x00000020U, 0xBCCF0000U, + 0xFFFFD740U, 0xFFFFFFFBU, 0xFFFFFFFFU, 0xFFBFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFC03U, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFEFFFFU, 0x007FFFFFU, 0xFFFFFFFEU, + 0x000000FFU, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFFFFFFFFU, 0x000020BFU, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFFFFFFFFU, 0xFFFFFFFFU, 0x3F3FFFFFU, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x000001FFU, 0x00000000U, 0x00000000U, 0x00000000U, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0x00000000U, 0x00000000U, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0x3F3FFFFFU, 0xFFFFFFFFU, 0xAAFF3F3FU, 0x3FFFFFFFU, + 0xFFFFFFFFU, 0x5FDFFFFFU, 0x0FCF1FDCU, 0x1FDC1FFFU, + 0x00000000U, 0x00000000U, 0x00000000U, 0x80020000U, + 0x1FFF0000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x3E2FFC84U, 0xF21FBD50U, 0x000043E0U, 0xFFFFFFFFU, + 0x00000018U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFFC00000U, 0xFFFFFFFFU, 0x000003FFU, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0xFFFFFFFFU, 0xFFFF7FFFU, 0x7FFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0x000C781FU, + 0xFFFFFFFFU, 0x000020BFU, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0xFFFFFFFFU, 0x00003FFFU, + 0x3FFFFFFFU, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFFFFFFFCU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFF78FFU, 0x00FF7FFFU, 0x00000000U, 0x07000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFFFF0000U, 0xF7FFFFFFU, 0xFFFF003FU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00F8007FU, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x07FFFFFEU, 0x07FFFFFEU, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0x0000FFFFU, 0x00000000U, + 0x00000000U, 0xFFFF0000U, 0xFF0FFFFFU, 0x0FFFFFFFU, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0xFFFFFFFFU, 0x0007FFFFU, 0xFFFFFFFFU, 0x0007FFFFU, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFFFFFFFFU, 0xFFFFFFFFU, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFDFFFFFU, 0xFFFFFFFFU, + 0xDFFFFFFFU, 0xEBFFDE64U, 0xFFFFFFEFU, 0xFFFFFFFFU, + 0xDFDFE7BFU, 0x7BFFFFFFU, 0xFFFDFC5FU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFF3FU, 0xF7FFFFFDU, 0xF7FFFFFFU, + 0xFFDFFFFFU, 0xFFDFFFFFU, 0xFFFF7FFFU, 0xFFFF7FFFU, + 0xFFFFFDFFU, 0xFFFFFDFFU, 0x00000FF7U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0x0000000FU, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFFFF0000U, 0xFFFF03FFU, 0xFFFF03FFU, + 0x000003FFU, 0x00000000U, 0x00000000U, 0x00000000U + } +}; diff --git a/lib/unicase/caseprop.h b/lib/unicase/caseprop.h new file mode 100644 index 0000000..52ce099 --- /dev/null +++ b/lib/unicase/caseprop.h @@ -0,0 +1,34 @@ +/* Case related properties of Unicode characters. + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <stdbool.h> +#include "unitypes.h" + +/* Determine whether a character is "cased" according to the Unicode Standard, + <https://www.unicode.org/versions/Unicode5.0.0/ch03.pdf>, section 3.13, + definition D120. */ +extern bool + uc_is_cased (ucs4_t uc) + _UC_ATTRIBUTE_CONST; + +/* Determine whether a character is "case-ignorable" + according to the Unicode Standard, + <https://www.unicode.org/versions/Unicode5.0.0/ch03.pdf>, section 3.13, + definition D121. */ +extern bool + uc_is_case_ignorable (ucs4_t uc) + _UC_ATTRIBUTE_CONST; diff --git a/lib/unicase/context.h b/lib/unicase/context.h new file mode 100644 index 0000000..b5baef7 --- /dev/null +++ b/lib/unicase/context.h @@ -0,0 +1,65 @@ +/* Case-mapping contexts of UTF-8/UTF-16/UTF-32 substring. + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + + +/* The context of a prefix string combines the information of the "Before C" + conditions of the Unicode Standard, + <https://www.unicode.org/versions/Unicode5.0.0/ch03.pdf>, section 3.13, + table 3-14 "Context Specification for Casing". + + casing_prefix_context_t contains the following fields: + + // Helper for evaluating the FINAL_SIGMA condition: + // Last character that was not case-ignorable. + ucs4_t last_char_except_ignorable; + + // Helper for evaluating the AFTER_SOFT_DOTTED and AFTER_I conditions: + // Last character that was of combining class 230 ("Above") or 0. + ucs4_t last_char_normal_or_above; + + Three bits would be sufficient to carry the context information, but + that would require to invoke uc_is_cased and uc_is_property_soft_dotted + ahead of time, more often than actually needed. */ + + +/* The context of a suffix string combines the information of the "After C" + conditions of the Unicode Standard, + <https://www.unicode.org/versions/Unicode5.0.0/ch03.pdf>, section 3.13, + table 3-14 "Context Specification for Casing". + + casing_suffix_context_t contains the following fields: + + // For evaluating the FINAL_SIGMA condition: + // First character that was not case-ignorable. + ucs4_t first_char_except_ignorable; + + // For evaluating the MORE_ABOVE condition: + // Bit 0 is set if the suffix contains a character of combining class + // 230 (Above) with no character of combining class 0 or 230 (Above) + // before it. + // + // For evaluating the BEFORE_DOT condition: + // Bit 1 is set if the suffix contains a COMBINING DOT ABOVE (U+0307) + // with no character of combining class 0 or 230 (Above) before it. + // + uint32_t bits; + + Three bits would be sufficient to carry the context information, but + that would require to invoke uc_is_cased ahead of time, more often than + actually needed. */ +#define SCC_MORE_ABOVE_MASK 1 +#define SCC_BEFORE_DOT_MASK 2 diff --git a/lib/unicase/empty-prefix-context.c b/lib/unicase/empty-prefix-context.c new file mode 100644 index 0000000..36dded6 --- /dev/null +++ b/lib/unicase/empty-prefix-context.c @@ -0,0 +1,27 @@ +/* Case-mapping context of empty prefix string. + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "unicase.h" + +const casing_prefix_context_t unicase_empty_prefix_context = + { + 0xFFFD /* last_char_except_ignorable */, + 0xFFFD /* last_char_normal_or_above */ + }; diff --git a/lib/unicase/empty-suffix-context.c b/lib/unicase/empty-suffix-context.c new file mode 100644 index 0000000..3fa49c6 --- /dev/null +++ b/lib/unicase/empty-suffix-context.c @@ -0,0 +1,27 @@ +/* Case-mapping context of empty suffix string. + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "unicase.h" + +const casing_suffix_context_t unicase_empty_suffix_context = + { + 0xFFFD /* first_char_except_ignorable */, + 0 /* bits */ + }; diff --git a/lib/unicase/ignorable.c b/lib/unicase/ignorable.c new file mode 100644 index 0000000..a07497b --- /dev/null +++ b/lib/unicase/ignorable.c @@ -0,0 +1,71 @@ +/* Test whether a Unicode character is case-ignorable. + Copyright (C) 2002, 2006-2007, 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "caseprop.h" + +/* Quoting the Unicode standard: + Definition: A character is defined to be "case-ignorable" if it has the + value MidLetter {or the value MidNumLet} for the Word_Break property or + its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), + Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). + The text marked in braces was added in Unicode 5.1.0, see + <https://www.unicode.org/versions/Unicode5.1.0/> section "Update of + Definition of case-ignorable". */ +/* Since this predicate is only used for the "Before C" and "After C" + conditions of FINAL_SIGMA, we exclude the "cased" characters here. + This simplifies the evaluation of the regular expressions + \p{cased} (\p{case-ignorable})* C + and + C (\p{case-ignorable})* \p{cased} + */ + +#if 0 + +#include "unictype.h" +#include "uniwbrk.h" + +bool +uc_is_case_ignorable (ucs4_t uc) +{ + int wbp = uc_wordbreak_property (uc); + + return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET + || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn + | UC_CATEGORY_MASK_Me + | UC_CATEGORY_MASK_Cf + | UC_CATEGORY_MASK_Lm + | UC_CATEGORY_MASK_Sk)) + && !uc_is_cased (uc); +} + +#else + +#include "unictype/bitmap.h" + +/* Define u_casing_property_case_ignorable table. */ +#include "ignorable.h" + +bool +uc_is_case_ignorable (ucs4_t uc) +{ + return bitmap_lookup (&u_casing_property_case_ignorable, uc); +} + +#endif diff --git a/lib/unicase/ignorable.h b/lib/unicase/ignorable.h new file mode 100644 index 0000000..2bb8286 --- /dev/null +++ b/lib/unicase/ignorable.h @@ -0,0 +1,600 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Casing Properties of Unicode characters. */ +/* Generated automatically by gen-uni-tables.c for Unicode 9.0.0. */ +#define header_0 16 +#define header_2 9 +#define header_3 127 +#define header_4 15 +static const +struct + { + int header[1]; + int level1[15]; + short level2[3 << 7]; + unsigned int level3[44 << 4]; + } +u_casing_property_case_ignorable = +{ + { 15 }, + { + 16 * sizeof (int) / sizeof (short) + 0, + 16 * sizeof (int) / sizeof (short) + 128, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 * sizeof (int) / sizeof (short) + 256 + }, + { + 16 + 384 * sizeof (short) / sizeof (int) + 0, + 16 + 384 * sizeof (short) / sizeof (int) + 16, + 16 + 384 * sizeof (short) / sizeof (int) + 32, + 16 + 384 * sizeof (short) / sizeof (int) + 48, + 16 + 384 * sizeof (short) / sizeof (int) + 64, + 16 + 384 * sizeof (short) / sizeof (int) + 80, + 16 + 384 * sizeof (short) / sizeof (int) + 96, + 16 + 384 * sizeof (short) / sizeof (int) + 112, + 16 + 384 * sizeof (short) / sizeof (int) + 128, + 16 + 384 * sizeof (short) / sizeof (int) + 144, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 160, + 16 + 384 * sizeof (short) / sizeof (int) + 176, + 16 + 384 * sizeof (short) / sizeof (int) + 192, + 16 + 384 * sizeof (short) / sizeof (int) + 208, + 16 + 384 * sizeof (short) / sizeof (int) + 224, + 16 + 384 * sizeof (short) / sizeof (int) + 240, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 256, + 16 + 384 * sizeof (short) / sizeof (int) + 272, + 16 + 384 * sizeof (short) / sizeof (int) + 288, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 304, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 320, + 16 + 384 * sizeof (short) / sizeof (int) + 336, + 16 + 384 * sizeof (short) / sizeof (int) + 352, + 16 + 384 * sizeof (short) / sizeof (int) + 368, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 384, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 400, + 16 + 384 * sizeof (short) / sizeof (int) + 416, + 16 + 384 * sizeof (short) / sizeof (int) + 432, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 448, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 464, + 16 + 384 * sizeof (short) / sizeof (int) + 480, + 16 + 384 * sizeof (short) / sizeof (int) + 496, + 16 + 384 * sizeof (short) / sizeof (int) + 512, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 528, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 544, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 560, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 576, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 592, + 16 + 384 * sizeof (short) / sizeof (int) + 608, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 624, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 640, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 656, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 672, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 688, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1 + }, + { + 0x00000000U, 0x04004000U, 0x40000000U, 0x00000001U, + 0x00000000U, 0x0190A100U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFE000000U, 0xFFFFFFFCU, 0xFFFFFFE0U, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFDFU, 0x0030FFFFU, + 0x000000B0U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x000003F8U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x02000000U, 0x00000000U, + 0xFFFE0000U, 0xBFFFFFFFU, 0x000000B6U, 0x00100000U, + 0x17FF003FU, 0x00000000U, 0xFFFFF801U, 0x00010000U, + 0x00000000U, 0x00000000U, 0xBFC00000U, 0x00003DFFU, + 0x00028000U, 0xFFFF0000U, 0x000007FFU, 0x00000000U, + 0x00000000U, 0x0001FFC0U, 0x00000000U, 0x043FF800U, + 0xFFC00000U, 0x00003FFFU, 0x0E000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0xFFF00000U, 0xFFFFFFFFU, + 0x00000007U, 0x14000000U, 0x00FE21FEU, 0x0002000CU, + 0x00000002U, 0x10000000U, 0x0000201EU, 0x0000000CU, + 0x00000006U, 0x10000000U, 0x00023986U, 0x00230000U, + 0x00000006U, 0x10000000U, 0x000021BEU, 0x0000000CU, + 0x00000002U, 0x90000000U, 0x0040201EU, 0x0000000CU, + 0x00000004U, 0x00000000U, 0x00002001U, 0x00000000U, + 0x00000001U, 0xC0000000U, 0x00603DC1U, 0x0000000CU, + 0x00000002U, 0x90000000U, 0x00003040U, 0x0000000CU, + 0x00000002U, 0x00000000U, 0x0000201EU, 0x0000000CU, + 0x00000000U, 0x00000000U, 0x005C0400U, 0x00000000U, + 0x00000000U, 0x07F20000U, 0x00007FC0U, 0x00000000U, + 0x00000000U, 0x1BF20000U, 0x00003F40U, 0x00000000U, + 0x03000000U, 0x02A00000U, 0x00000000U, 0x7FFE0000U, + 0xFEFFE0DFU, 0x1FFFFFFFU, 0x00000040U, 0x00000000U, + 0x00000000U, 0x66FDE000U, 0xC3000000U, 0x001E0001U, + 0x20002064U, 0x00000000U, 0x00000000U, 0x10000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0xE0000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x001C0000U, 0x001C0000U, 0x000C0000U, 0x000C0000U, + 0x00000000U, 0x3FB00000U, 0x208FFE40U, 0x00000000U, + 0x00007800U, 0x00000000U, 0x00000008U, 0x00000000U, + 0x00000060U, 0x00000200U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x0E040187U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x09800000U, 0x00000000U, 0x7F400000U, 0x9FF81FE5U, + 0x00000000U, 0x7FFF0080U, 0x00000000U, 0x00000000U, + 0x0000000FU, 0x17D00000U, 0x00000004U, 0x000FF800U, + 0x00000003U, 0x00003B3CU, 0x00000000U, 0x0003A340U, + 0x00000000U, 0x00CFF000U, 0x00000000U, 0x3F000000U, + 0x00000000U, 0x00000000U, 0xFFF70000U, 0x031021FDU, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0xFFFFFFFFU, 0xF83FFFFFU, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xA0000000U, 0xE000E003U, 0x6000E000U, + 0x0300F800U, 0x00007C90U, 0x00000000U, 0x0000FFDFU, + 0x00000000U, 0x00000000U, 0xFFFF0000U, 0x0001FFFFU, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00038000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x80008000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0xFFFFFFFFU, + 0x00000000U, 0x00008000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000020U, 0x083E3C00U, 0x00000000U, 0x00000000U, + 0x7E000000U, 0x00000000U, 0x00000000U, 0x70000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00200000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x3F000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00001000U, 0x00000000U, 0x00000000U, 0xBFF78000U, + 0xC0000000U, 0x00000000U, 0x00000000U, 0x00030000U, + 0xFFFFFFFFU, 0x00000003U, 0x00000000U, 0x00000000U, + 0x00000700U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000844U, 0x00000060U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000030U, 0x0003FFFFU, + 0x00000000U, 0x00003FC0U, 0x0003FF80U, 0x00000000U, + 0x00000007U, 0x13C80000U, 0x00008000U, 0x00000060U, + 0x00000000U, 0x00667E00U, 0x00001008U, 0x10010000U, + 0x00000000U, 0xC19D0000U, 0x20000002U, 0x00583000U, + 0x00000000U, 0x00000000U, 0x08000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00002120U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x40000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFFFC0000U, 0x00000003U, 0x00000000U, + 0x0008FFFFU, 0x0000FFFFU, 0x00240000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x80000000U, + 0x04004080U, 0x40000000U, 0x00000001U, 0x00010000U, + 0xC0000000U, 0x00000000U, 0x00000000U, 0x0E000008U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x20000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000001U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x07C00000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x0000F06EU, 0x87000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000060U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000002U, 0xFF000000U, 0x0000007FU, 0x80000000U, + 0x00000003U, 0x26780000U, 0x00000000U, 0x00000000U, + 0x00000007U, 0x001FEF80U, 0x00000000U, 0x00080000U, + 0x00000003U, 0x7FC00000U, 0x00001C00U, 0x00000000U, + 0x00000000U, 0x40D38000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x80000000U, 0x000007F8U, + 0x00000003U, 0x10000000U, 0x00000001U, 0x001F1FC0U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xFF000000U, 0x0000005CU, 0x00000000U, + 0x00000000U, 0x85F80000U, 0x0000000DU, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xB03C0000U, 0x30000001U, 0x00000000U, + 0x00000000U, 0xA7F80000U, 0x00000001U, 0x00000000U, + 0x00000000U, 0x00BF2800U, 0x00000000U, 0x00000000U, + 0xE0000000U, 0x00000FBCU, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0xBF7F0000U, 0x00000000U, 0x00000000U, + 0xFFFC0000U, 0x006DFCFFU, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x001F0000U, + 0x00000000U, 0x007F0000U, 0x0000000FU, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0xFFFF8000U, 0x00000000U, 0x00000000U, 0x00000001U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x60000000U, 0x0000000FU, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0xFFF80380U, + 0x00000FE7U, 0x00003C00U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x0000001CU, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0xFFFFFFFFU, 0xF87FFFFFU, 0xFFFFFFFFU, 0x00201FFFU, + 0xF8000010U, 0x0000FFFEU, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0xF9FFFF7FU, 0x000007DBU, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x007F0000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x000007F0U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000000U, 0xF8000000U, + 0x00000002U, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0x0000FFFFU + } +}; diff --git a/lib/unicase/simple-mapping.h b/lib/unicase/simple-mapping.h new file mode 100644 index 0000000..626a24c --- /dev/null +++ b/lib/unicase/simple-mapping.h @@ -0,0 +1,39 @@ +/* Simple case mapping for Unicode characters. + Copyright (C) 2002, 2006, 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +ucs4_t +FUNC (ucs4_t uc) +{ + unsigned int index1 = uc >> mapping_header_0; + if (index1 < mapping_header_1) + { + int lookup1 = u_mapping.level1[index1]; + if (lookup1 >= 0) + { + unsigned int index2 = (uc >> mapping_header_2) & mapping_header_3; + int lookup2 = u_mapping.level2[lookup1 + index2]; + if (lookup2 >= 0) + { + unsigned int index3 = (uc & mapping_header_4); + int lookup3 = u_mapping.level3[lookup2 + index3]; + + return uc + lookup3; + } + } + } + return uc; +} diff --git a/lib/unicase/special-casing-table.gperf b/lib/unicase/special-casing-table.gperf new file mode 100644 index 0000000..287f49a --- /dev/null +++ b/lib/unicase/special-casing-table.gperf @@ -0,0 +1,136 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Special casing rules of Unicode characters. */ +/* Generated automatically by gen-uni-tables.c for Unicode 9.0.0. */ +struct special_casing_rule { char code[3]; }; +%struct-type +%language=ANSI-C +%define slot-name code +%define hash-function-name gl_unicase_special_hash +%define lookup-function-name gl_unicase_special_lookup +%compare-lengths +%compare-strncmp +%readonly-tables +%omit-struct-type +%% +"\x00\x49\x00", 1, SCC_MORE_ABOVE , { 'l', 't' }, { 0x0049, 0, 0 }, { 0x0069, 0x0307, 0 }, { 0x0049, 0, 0 }, { 0x0069, 0, 0 } +"\x00\x49\x01", 1, -SCC_BEFORE_DOT , { 't', 'r' }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 } +"\x00\x49\x02", 1, -SCC_BEFORE_DOT , { 'a', 'z' }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 } +"\x00\x49\x03", 1, SCC_ALWAYS , { 't', 'r' }, { 0x0049, 0, 0 }, { 0x0069, 0, 0 }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 } +"\x00\x49\x04", 0, SCC_ALWAYS , { 'a', 'z' }, { 0x0049, 0, 0 }, { 0x0069, 0, 0 }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 } +"\x00\x4a\x00", 0, SCC_MORE_ABOVE , { 'l', 't' }, { 0x004A, 0, 0 }, { 0x006A, 0x0307, 0 }, { 0x004A, 0, 0 }, { 0x006A, 0, 0 } +"\x00\x69\x00", 1, SCC_ALWAYS , { 't', 'r' }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 } +"\x00\x69\x01", 0, SCC_ALWAYS , { 'a', 'z' }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 } +"\x00\xcc\x00", 0, SCC_ALWAYS , { 'l', 't' }, { 0x00CC, 0, 0 }, { 0x0069, 0x0307, 0x0300 }, { 0x00CC, 0, 0 }, { 0x00EC, 0, 0 } +"\x00\xcd\x00", 0, SCC_ALWAYS , { 'l', 't' }, { 0x00CD, 0, 0 }, { 0x0069, 0x0307, 0x0301 }, { 0x00CD, 0, 0 }, { 0x00ED, 0, 0 } +"\x00\xdf\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0053, 0x0053, 0 }, { 0x00DF, 0, 0 }, { 0x0053, 0x0073, 0 }, { 0x0073, 0x0073, 0 } +"\x01\x28\x00", 0, SCC_ALWAYS , { 'l', 't' }, { 0x0128, 0, 0 }, { 0x0069, 0x0307, 0x0303 }, { 0x0128, 0, 0 }, { 0x0129, 0, 0 } +"\x01\x2e\x00", 0, SCC_MORE_ABOVE , { 'l', 't' }, { 0x012E, 0, 0 }, { 0x012F, 0x0307, 0 }, { 0x012E, 0, 0 }, { 0x012F, 0, 0 } +"\x01\x30\x00", 1, SCC_ALWAYS , { 't', 'r' }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 } +"\x01\x30\x01", 1, SCC_ALWAYS , { 'a', 'z' }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 } +"\x01\x30\x02", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0130, 0, 0 }, { 0x0069, 0x0307, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0x0307, 0 } +"\x01\x49\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x02BC, 0x004E, 0 }, { 0x0149, 0, 0 }, { 0x02BC, 0x004E, 0 }, { 0x02BC, 0x006E, 0 } +"\x01\xf0\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x004A, 0x030C, 0 }, { 0x01F0, 0, 0 }, { 0x004A, 0x030C, 0 }, { 0x006A, 0x030C, 0 } +"\x03\x07\x00", 1, SCC_AFTER_SOFT_DOTTED, { 'l', 't' }, { 0, 0, 0 }, { 0x0307, 0, 0 }, { 0, 0, 0 }, { 0x0307, 0, 0 } +"\x03\x07\x01", 1, SCC_AFTER_I , { 't', 'r' }, { 0x0307, 0, 0 }, { 0, 0, 0 }, { 0x0307, 0, 0 }, { 0x0307, 0, 0 } +"\x03\x07\x02", 0, SCC_AFTER_I , { 'a', 'z' }, { 0x0307, 0, 0 }, { 0, 0, 0 }, { 0x0307, 0, 0 }, { 0x0307, 0, 0 } +"\x03\x90\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0308, 0x0301 }, { 0x0390, 0, 0 }, { 0x0399, 0x0308, 0x0301 }, { 0x03B9, 0x0308, 0x0301 } +"\x03\xa3\x00", 0, SCC_FINAL_SIGMA , { '\0', '\0' }, { 0x03A3, 0, 0 }, { 0x03C2, 0, 0 }, { 0x03A3, 0, 0 }, { 0x03C3, 0, 0 } +"\x03\xb0\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0308, 0x0301 }, { 0x03B0, 0, 0 }, { 0x03A5, 0x0308, 0x0301 }, { 0x03C5, 0x0308, 0x0301 } +"\x05\x87\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0535, 0x0552, 0 }, { 0x0587, 0, 0 }, { 0x0535, 0x0582, 0 }, { 0x0565, 0x0582, 0 } +"\x1e\x96\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0048, 0x0331, 0 }, { 0x1E96, 0, 0 }, { 0x0048, 0x0331, 0 }, { 0x0068, 0x0331, 0 } +"\x1e\x97\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0054, 0x0308, 0 }, { 0x1E97, 0, 0 }, { 0x0054, 0x0308, 0 }, { 0x0074, 0x0308, 0 } +"\x1e\x98\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0057, 0x030A, 0 }, { 0x1E98, 0, 0 }, { 0x0057, 0x030A, 0 }, { 0x0077, 0x030A, 0 } +"\x1e\x99\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0059, 0x030A, 0 }, { 0x1E99, 0, 0 }, { 0x0059, 0x030A, 0 }, { 0x0079, 0x030A, 0 } +"\x1e\x9a\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0041, 0x02BE, 0 }, { 0x1E9A, 0, 0 }, { 0x0041, 0x02BE, 0 }, { 0x0061, 0x02BE, 0 } +"\x1e\x9e\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1E9E, 0, 0 }, { 0x00DF, 0, 0 }, { 0x1E9E, 0, 0 }, { 0x0073, 0x0073, 0 } +"\x1f\x50\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0313, 0 }, { 0x1F50, 0, 0 }, { 0x03A5, 0x0313, 0 }, { 0x03C5, 0x0313, 0 } +"\x1f\x52\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0313, 0x0300 }, { 0x1F52, 0, 0 }, { 0x03A5, 0x0313, 0x0300 }, { 0x03C5, 0x0313, 0x0300 } +"\x1f\x54\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0313, 0x0301 }, { 0x1F54, 0, 0 }, { 0x03A5, 0x0313, 0x0301 }, { 0x03C5, 0x0313, 0x0301 } +"\x1f\x56\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0313, 0x0342 }, { 0x1F56, 0, 0 }, { 0x03A5, 0x0313, 0x0342 }, { 0x03C5, 0x0313, 0x0342 } +"\x1f\x80\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F08, 0x0399, 0 }, { 0x1F80, 0, 0 }, { 0x1F88, 0, 0 }, { 0x1F00, 0x03B9, 0 } +"\x1f\x81\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F09, 0x0399, 0 }, { 0x1F81, 0, 0 }, { 0x1F89, 0, 0 }, { 0x1F01, 0x03B9, 0 } +"\x1f\x82\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0A, 0x0399, 0 }, { 0x1F82, 0, 0 }, { 0x1F8A, 0, 0 }, { 0x1F02, 0x03B9, 0 } +"\x1f\x83\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0B, 0x0399, 0 }, { 0x1F83, 0, 0 }, { 0x1F8B, 0, 0 }, { 0x1F03, 0x03B9, 0 } +"\x1f\x84\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0C, 0x0399, 0 }, { 0x1F84, 0, 0 }, { 0x1F8C, 0, 0 }, { 0x1F04, 0x03B9, 0 } +"\x1f\x85\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0D, 0x0399, 0 }, { 0x1F85, 0, 0 }, { 0x1F8D, 0, 0 }, { 0x1F05, 0x03B9, 0 } +"\x1f\x86\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0E, 0x0399, 0 }, { 0x1F86, 0, 0 }, { 0x1F8E, 0, 0 }, { 0x1F06, 0x03B9, 0 } +"\x1f\x87\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0F, 0x0399, 0 }, { 0x1F87, 0, 0 }, { 0x1F8F, 0, 0 }, { 0x1F07, 0x03B9, 0 } +"\x1f\x88\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F08, 0x0399, 0 }, { 0x1F80, 0, 0 }, { 0x1F88, 0, 0 }, { 0x1F00, 0x03B9, 0 } +"\x1f\x89\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F09, 0x0399, 0 }, { 0x1F81, 0, 0 }, { 0x1F89, 0, 0 }, { 0x1F01, 0x03B9, 0 } +"\x1f\x8a\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0A, 0x0399, 0 }, { 0x1F82, 0, 0 }, { 0x1F8A, 0, 0 }, { 0x1F02, 0x03B9, 0 } +"\x1f\x8b\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0B, 0x0399, 0 }, { 0x1F83, 0, 0 }, { 0x1F8B, 0, 0 }, { 0x1F03, 0x03B9, 0 } +"\x1f\x8c\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0C, 0x0399, 0 }, { 0x1F84, 0, 0 }, { 0x1F8C, 0, 0 }, { 0x1F04, 0x03B9, 0 } +"\x1f\x8d\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0D, 0x0399, 0 }, { 0x1F85, 0, 0 }, { 0x1F8D, 0, 0 }, { 0x1F05, 0x03B9, 0 } +"\x1f\x8e\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0E, 0x0399, 0 }, { 0x1F86, 0, 0 }, { 0x1F8E, 0, 0 }, { 0x1F06, 0x03B9, 0 } +"\x1f\x8f\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0F, 0x0399, 0 }, { 0x1F87, 0, 0 }, { 0x1F8F, 0, 0 }, { 0x1F07, 0x03B9, 0 } +"\x1f\x90\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F28, 0x0399, 0 }, { 0x1F90, 0, 0 }, { 0x1F98, 0, 0 }, { 0x1F20, 0x03B9, 0 } +"\x1f\x91\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F29, 0x0399, 0 }, { 0x1F91, 0, 0 }, { 0x1F99, 0, 0 }, { 0x1F21, 0x03B9, 0 } +"\x1f\x92\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2A, 0x0399, 0 }, { 0x1F92, 0, 0 }, { 0x1F9A, 0, 0 }, { 0x1F22, 0x03B9, 0 } +"\x1f\x93\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2B, 0x0399, 0 }, { 0x1F93, 0, 0 }, { 0x1F9B, 0, 0 }, { 0x1F23, 0x03B9, 0 } +"\x1f\x94\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2C, 0x0399, 0 }, { 0x1F94, 0, 0 }, { 0x1F9C, 0, 0 }, { 0x1F24, 0x03B9, 0 } +"\x1f\x95\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2D, 0x0399, 0 }, { 0x1F95, 0, 0 }, { 0x1F9D, 0, 0 }, { 0x1F25, 0x03B9, 0 } +"\x1f\x96\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2E, 0x0399, 0 }, { 0x1F96, 0, 0 }, { 0x1F9E, 0, 0 }, { 0x1F26, 0x03B9, 0 } +"\x1f\x97\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2F, 0x0399, 0 }, { 0x1F97, 0, 0 }, { 0x1F9F, 0, 0 }, { 0x1F27, 0x03B9, 0 } +"\x1f\x98\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F28, 0x0399, 0 }, { 0x1F90, 0, 0 }, { 0x1F98, 0, 0 }, { 0x1F20, 0x03B9, 0 } +"\x1f\x99\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F29, 0x0399, 0 }, { 0x1F91, 0, 0 }, { 0x1F99, 0, 0 }, { 0x1F21, 0x03B9, 0 } +"\x1f\x9a\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2A, 0x0399, 0 }, { 0x1F92, 0, 0 }, { 0x1F9A, 0, 0 }, { 0x1F22, 0x03B9, 0 } +"\x1f\x9b\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2B, 0x0399, 0 }, { 0x1F93, 0, 0 }, { 0x1F9B, 0, 0 }, { 0x1F23, 0x03B9, 0 } +"\x1f\x9c\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2C, 0x0399, 0 }, { 0x1F94, 0, 0 }, { 0x1F9C, 0, 0 }, { 0x1F24, 0x03B9, 0 } +"\x1f\x9d\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2D, 0x0399, 0 }, { 0x1F95, 0, 0 }, { 0x1F9D, 0, 0 }, { 0x1F25, 0x03B9, 0 } +"\x1f\x9e\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2E, 0x0399, 0 }, { 0x1F96, 0, 0 }, { 0x1F9E, 0, 0 }, { 0x1F26, 0x03B9, 0 } +"\x1f\x9f\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2F, 0x0399, 0 }, { 0x1F97, 0, 0 }, { 0x1F9F, 0, 0 }, { 0x1F27, 0x03B9, 0 } +"\x1f\xa0\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F68, 0x0399, 0 }, { 0x1FA0, 0, 0 }, { 0x1FA8, 0, 0 }, { 0x1F60, 0x03B9, 0 } +"\x1f\xa1\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F69, 0x0399, 0 }, { 0x1FA1, 0, 0 }, { 0x1FA9, 0, 0 }, { 0x1F61, 0x03B9, 0 } +"\x1f\xa2\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6A, 0x0399, 0 }, { 0x1FA2, 0, 0 }, { 0x1FAA, 0, 0 }, { 0x1F62, 0x03B9, 0 } +"\x1f\xa3\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6B, 0x0399, 0 }, { 0x1FA3, 0, 0 }, { 0x1FAB, 0, 0 }, { 0x1F63, 0x03B9, 0 } +"\x1f\xa4\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6C, 0x0399, 0 }, { 0x1FA4, 0, 0 }, { 0x1FAC, 0, 0 }, { 0x1F64, 0x03B9, 0 } +"\x1f\xa5\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6D, 0x0399, 0 }, { 0x1FA5, 0, 0 }, { 0x1FAD, 0, 0 }, { 0x1F65, 0x03B9, 0 } +"\x1f\xa6\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6E, 0x0399, 0 }, { 0x1FA6, 0, 0 }, { 0x1FAE, 0, 0 }, { 0x1F66, 0x03B9, 0 } +"\x1f\xa7\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6F, 0x0399, 0 }, { 0x1FA7, 0, 0 }, { 0x1FAF, 0, 0 }, { 0x1F67, 0x03B9, 0 } +"\x1f\xa8\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F68, 0x0399, 0 }, { 0x1FA0, 0, 0 }, { 0x1FA8, 0, 0 }, { 0x1F60, 0x03B9, 0 } +"\x1f\xa9\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F69, 0x0399, 0 }, { 0x1FA1, 0, 0 }, { 0x1FA9, 0, 0 }, { 0x1F61, 0x03B9, 0 } +"\x1f\xaa\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6A, 0x0399, 0 }, { 0x1FA2, 0, 0 }, { 0x1FAA, 0, 0 }, { 0x1F62, 0x03B9, 0 } +"\x1f\xab\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6B, 0x0399, 0 }, { 0x1FA3, 0, 0 }, { 0x1FAB, 0, 0 }, { 0x1F63, 0x03B9, 0 } +"\x1f\xac\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6C, 0x0399, 0 }, { 0x1FA4, 0, 0 }, { 0x1FAC, 0, 0 }, { 0x1F64, 0x03B9, 0 } +"\x1f\xad\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6D, 0x0399, 0 }, { 0x1FA5, 0, 0 }, { 0x1FAD, 0, 0 }, { 0x1F65, 0x03B9, 0 } +"\x1f\xae\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6E, 0x0399, 0 }, { 0x1FA6, 0, 0 }, { 0x1FAE, 0, 0 }, { 0x1F66, 0x03B9, 0 } +"\x1f\xaf\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6F, 0x0399, 0 }, { 0x1FA7, 0, 0 }, { 0x1FAF, 0, 0 }, { 0x1F67, 0x03B9, 0 } +"\x1f\xb2\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1FBA, 0x0399, 0 }, { 0x1FB2, 0, 0 }, { 0x1FBA, 0x0345, 0 }, { 0x1F70, 0x03B9, 0 } +"\x1f\xb3\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0391, 0x0399, 0 }, { 0x1FB3, 0, 0 }, { 0x1FBC, 0, 0 }, { 0x03B1, 0x03B9, 0 } +"\x1f\xb4\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0386, 0x0399, 0 }, { 0x1FB4, 0, 0 }, { 0x0386, 0x0345, 0 }, { 0x03AC, 0x03B9, 0 } +"\x1f\xb6\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0391, 0x0342, 0 }, { 0x1FB6, 0, 0 }, { 0x0391, 0x0342, 0 }, { 0x03B1, 0x0342, 0 } +"\x1f\xb7\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0391, 0x0342, 0x0399 }, { 0x1FB7, 0, 0 }, { 0x0391, 0x0342, 0x0345 }, { 0x03B1, 0x0342, 0x03B9 } +"\x1f\xbc\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0391, 0x0399, 0 }, { 0x1FB3, 0, 0 }, { 0x1FBC, 0, 0 }, { 0x03B1, 0x03B9, 0 } +"\x1f\xc2\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1FCA, 0x0399, 0 }, { 0x1FC2, 0, 0 }, { 0x1FCA, 0x0345, 0 }, { 0x1F74, 0x03B9, 0 } +"\x1f\xc3\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0397, 0x0399, 0 }, { 0x1FC3, 0, 0 }, { 0x1FCC, 0, 0 }, { 0x03B7, 0x03B9, 0 } +"\x1f\xc4\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0389, 0x0399, 0 }, { 0x1FC4, 0, 0 }, { 0x0389, 0x0345, 0 }, { 0x03AE, 0x03B9, 0 } +"\x1f\xc6\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0397, 0x0342, 0 }, { 0x1FC6, 0, 0 }, { 0x0397, 0x0342, 0 }, { 0x03B7, 0x0342, 0 } +"\x1f\xc7\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0397, 0x0342, 0x0399 }, { 0x1FC7, 0, 0 }, { 0x0397, 0x0342, 0x0345 }, { 0x03B7, 0x0342, 0x03B9 } +"\x1f\xcc\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0397, 0x0399, 0 }, { 0x1FC3, 0, 0 }, { 0x1FCC, 0, 0 }, { 0x03B7, 0x03B9, 0 } +"\x1f\xd2\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0308, 0x0300 }, { 0x1FD2, 0, 0 }, { 0x0399, 0x0308, 0x0300 }, { 0x03B9, 0x0308, 0x0300 } +"\x1f\xd3\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0308, 0x0301 }, { 0x1FD3, 0, 0 }, { 0x0399, 0x0308, 0x0301 }, { 0x03B9, 0x0308, 0x0301 } +"\x1f\xd6\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0342, 0 }, { 0x1FD6, 0, 0 }, { 0x0399, 0x0342, 0 }, { 0x03B9, 0x0342, 0 } +"\x1f\xd7\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0308, 0x0342 }, { 0x1FD7, 0, 0 }, { 0x0399, 0x0308, 0x0342 }, { 0x03B9, 0x0308, 0x0342 } +"\x1f\xe2\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0308, 0x0300 }, { 0x1FE2, 0, 0 }, { 0x03A5, 0x0308, 0x0300 }, { 0x03C5, 0x0308, 0x0300 } +"\x1f\xe3\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0308, 0x0301 }, { 0x1FE3, 0, 0 }, { 0x03A5, 0x0308, 0x0301 }, { 0x03C5, 0x0308, 0x0301 } +"\x1f\xe4\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A1, 0x0313, 0 }, { 0x1FE4, 0, 0 }, { 0x03A1, 0x0313, 0 }, { 0x03C1, 0x0313, 0 } +"\x1f\xe6\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0342, 0 }, { 0x1FE6, 0, 0 }, { 0x03A5, 0x0342, 0 }, { 0x03C5, 0x0342, 0 } +"\x1f\xe7\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0308, 0x0342 }, { 0x1FE7, 0, 0 }, { 0x03A5, 0x0308, 0x0342 }, { 0x03C5, 0x0308, 0x0342 } +"\x1f\xf2\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1FFA, 0x0399, 0 }, { 0x1FF2, 0, 0 }, { 0x1FFA, 0x0345, 0 }, { 0x1F7C, 0x03B9, 0 } +"\x1f\xf3\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A9, 0x0399, 0 }, { 0x1FF3, 0, 0 }, { 0x1FFC, 0, 0 }, { 0x03C9, 0x03B9, 0 } +"\x1f\xf4\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x038F, 0x0399, 0 }, { 0x1FF4, 0, 0 }, { 0x038F, 0x0345, 0 }, { 0x03CE, 0x03B9, 0 } +"\x1f\xf6\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A9, 0x0342, 0 }, { 0x1FF6, 0, 0 }, { 0x03A9, 0x0342, 0 }, { 0x03C9, 0x0342, 0 } +"\x1f\xf7\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A9, 0x0342, 0x0399 }, { 0x1FF7, 0, 0 }, { 0x03A9, 0x0342, 0x0345 }, { 0x03C9, 0x0342, 0x03B9 } +"\x1f\xfc\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A9, 0x0399, 0 }, { 0x1FF3, 0, 0 }, { 0x1FFC, 0, 0 }, { 0x03C9, 0x03B9, 0 } +"\xfb\x00\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x0046, 0 }, { 0xFB00, 0, 0 }, { 0x0046, 0x0066, 0 }, { 0x0066, 0x0066, 0 } +"\xfb\x01\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x0049, 0 }, { 0xFB01, 0, 0 }, { 0x0046, 0x0069, 0 }, { 0x0066, 0x0069, 0 } +"\xfb\x02\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x004C, 0 }, { 0xFB02, 0, 0 }, { 0x0046, 0x006C, 0 }, { 0x0066, 0x006C, 0 } +"\xfb\x03\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x0046, 0x0049 }, { 0xFB03, 0, 0 }, { 0x0046, 0x0066, 0x0069 }, { 0x0066, 0x0066, 0x0069 } +"\xfb\x04\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x0046, 0x004C }, { 0xFB04, 0, 0 }, { 0x0046, 0x0066, 0x006C }, { 0x0066, 0x0066, 0x006C } +"\xfb\x05\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0053, 0x0054, 0 }, { 0xFB05, 0, 0 }, { 0x0053, 0x0074, 0 }, { 0x0073, 0x0074, 0 } +"\xfb\x06\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0053, 0x0054, 0 }, { 0xFB06, 0, 0 }, { 0x0053, 0x0074, 0 }, { 0x0073, 0x0074, 0 } +"\xfb\x13\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0544, 0x0546, 0 }, { 0xFB13, 0, 0 }, { 0x0544, 0x0576, 0 }, { 0x0574, 0x0576, 0 } +"\xfb\x14\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0544, 0x0535, 0 }, { 0xFB14, 0, 0 }, { 0x0544, 0x0565, 0 }, { 0x0574, 0x0565, 0 } +"\xfb\x15\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0544, 0x053B, 0 }, { 0xFB15, 0, 0 }, { 0x0544, 0x056B, 0 }, { 0x0574, 0x056B, 0 } +"\xfb\x16\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x054E, 0x0546, 0 }, { 0xFB16, 0, 0 }, { 0x054E, 0x0576, 0 }, { 0x057E, 0x0576, 0 } +"\xfb\x17\x00", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0544, 0x053D, 0 }, { 0xFB17, 0, 0 }, { 0x0544, 0x056D, 0 }, { 0x0574, 0x056D, 0 } diff --git a/lib/unicase/special-casing-table.h b/lib/unicase/special-casing-table.h new file mode 100644 index 0000000..4943541 --- /dev/null +++ b/lib/unicase/special-casing-table.h @@ -0,0 +1,333 @@ +/* ANSI-C code produced by gperf version 3.1 */ +/* Command-line: gperf -m 10 ./unicase/special-casing-table.gperf */ +/* Computed positions: -k'1-3' */ + + +#define TOTAL_KEYWORDS 122 +#define MIN_WORD_LENGTH 3 +#define MAX_WORD_LENGTH 3 +#define MIN_HASH_VALUE 0 +#define MAX_HASH_VALUE 121 +/* maximum key range = 122, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +/*ARGSUSED*/ +static unsigned int +gl_unicase_special_hash (register const char *str, register size_t len) +{ + static const unsigned char asso_values[] = + { + 2, 0, 4, 5, 37, 12, 121, 4, 122, 122, + 122, 122, 122, 122, 122, 122, 122, 122, 122, 120, + 119, 118, 117, 116, 122, 122, 122, 122, 122, 122, + 5, 3, 122, 122, 122, 122, 122, 122, 122, 122, + 115, 122, 122, 122, 122, 122, 114, 122, 6, 122, + 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, + 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, + 122, 122, 122, 1, 111, 122, 122, 122, 122, 122, + 109, 122, 108, 122, 107, 122, 106, 122, 122, 122, + 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, + 122, 122, 122, 122, 122, 33, 122, 122, 122, 122, + 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, + 122, 122, 122, 122, 122, 122, 122, 122, 105, 104, + 103, 102, 101, 100, 99, 31, 98, 97, 96, 95, + 94, 93, 92, 91, 28, 90, 89, 88, 87, 86, + 27, 24, 23, 20, 19, 85, 84, 83, 16, 82, + 81, 80, 79, 15, 78, 77, 76, 75, 74, 73, + 72, 71, 70, 69, 68, 67, 64, 122, 65, 64, + 63, 122, 62, 61, 122, 122, 122, 122, 60, 122, + 122, 122, 122, 122, 59, 58, 57, 122, 56, 55, + 122, 122, 122, 122, 14, 55, 122, 122, 122, 122, + 53, 52, 122, 122, 51, 50, 122, 122, 122, 122, + 122, 122, 122, 50, 122, 122, 48, 47, 46, 122, + 45, 44, 122, 122, 122, 122, 122, 122, 122, 122, + 46, 122, 42, 41, 39, 122, 38, 35, 122, 122, + 122, 0, 33, 122, 122, 122, 122 + }; + return asso_values[(unsigned char)str[2]+1] + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]]; +} + +const struct special_casing_rule * +gl_unicase_special_lookup (register const char *str, register size_t len) +{ + static const unsigned char lengthtable[] = + { + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 + }; + static const struct special_casing_rule wordlist[] = + { +#line 126 "./unicase/special-casing-table.gperf" + {"\373\001\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x0049, 0 }, { 0xFB01, 0, 0 }, { 0x0046, 0x0069, 0 }, { 0x0066, 0x0069, 0 }}, +#line 31 "./unicase/special-casing-table.gperf" + {"\001I\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x02BC, 0x004E, 0 }, { 0x0149, 0, 0 }, { 0x02BC, 0x004E, 0 }, { 0x02BC, 0x006E, 0 }}, +#line 125 "./unicase/special-casing-table.gperf" + {"\373\000\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x0046, 0 }, { 0xFB00, 0, 0 }, { 0x0046, 0x0066, 0 }, { 0x0066, 0x0066, 0 }}, +#line 15 "./unicase/special-casing-table.gperf" + {"\000I\000", 1, SCC_MORE_ABOVE , { 'l', 't' }, { 0x0049, 0, 0 }, { 0x0069, 0x0307, 0 }, { 0x0049, 0, 0 }, { 0x0069, 0, 0 }}, +#line 127 "./unicase/special-casing-table.gperf" + {"\373\002\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x004C, 0 }, { 0xFB02, 0, 0 }, { 0x0046, 0x006C, 0 }, { 0x0066, 0x006C, 0 }}, +#line 128 "./unicase/special-casing-table.gperf" + {"\373\003\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x0046, 0x0049 }, { 0xFB03, 0, 0 }, { 0x0046, 0x0066, 0x0069 }, { 0x0066, 0x0066, 0x0069 }}, +#line 28 "./unicase/special-casing-table.gperf" + {"\0010\000", 1, SCC_ALWAYS , { 't', 'r' }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }}, +#line 16 "./unicase/special-casing-table.gperf" + {"\000I\001", 1, -SCC_BEFORE_DOT , { 't', 'r' }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 }}, +#line 17 "./unicase/special-casing-table.gperf" + {"\000I\002", 1, -SCC_BEFORE_DOT , { 'a', 'z' }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 }}, +#line 33 "./unicase/special-casing-table.gperf" + {"\003\007\000", 1, SCC_AFTER_SOFT_DOTTED, { 'l', 't' }, { 0, 0, 0 }, { 0x0307, 0, 0 }, { 0, 0, 0 }, { 0x0307, 0, 0 }}, +#line 29 "./unicase/special-casing-table.gperf" + {"\0010\001", 1, SCC_ALWAYS , { 'a', 'z' }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }}, +#line 30 "./unicase/special-casing-table.gperf" + {"\0010\002", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0130, 0, 0 }, { 0x0069, 0x0307, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0x0307, 0 }}, +#line 130 "./unicase/special-casing-table.gperf" + {"\373\005\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0053, 0x0054, 0 }, { 0xFB05, 0, 0 }, { 0x0053, 0x0074, 0 }, { 0x0073, 0x0074, 0 }}, +#line 34 "./unicase/special-casing-table.gperf" + {"\003\007\001", 1, SCC_AFTER_I , { 't', 'r' }, { 0x0307, 0, 0 }, { 0, 0, 0 }, { 0x0307, 0, 0 }, { 0x0307, 0, 0 }}, +#line 35 "./unicase/special-casing-table.gperf" + {"\003\007\002", 0, SCC_AFTER_I , { 'a', 'z' }, { 0x0307, 0, 0 }, { 0, 0, 0 }, { 0x0307, 0, 0 }, { 0x0307, 0, 0 }}, +#line 19 "./unicase/special-casing-table.gperf" + {"\000I\004", 0, SCC_ALWAYS , { 'a', 'z' }, { 0x0049, 0, 0 }, { 0x0069, 0, 0 }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 }}, +#line 23 "./unicase/special-casing-table.gperf" + {"\000\314\000", 0, SCC_ALWAYS , { 'l', 't' }, { 0x00CC, 0, 0 }, { 0x0069, 0x0307, 0x0300 }, { 0x00CC, 0, 0 }, { 0x00EC, 0, 0 }}, +#line 109 "./unicase/special-casing-table.gperf" + {"\037\314\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0397, 0x0399, 0 }, { 0x1FC3, 0, 0 }, { 0x1FCC, 0, 0 }, { 0x03B7, 0x03B9, 0 }}, +#line 85 "./unicase/special-casing-table.gperf" + {"\037\243\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6B, 0x0399, 0 }, { 0x1FA3, 0, 0 }, { 0x1FAB, 0, 0 }, { 0x1F63, 0x03B9, 0 }}, +#line 80 "./unicase/special-casing-table.gperf" + {"\037\236\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2E, 0x0399, 0 }, { 0x1F96, 0, 0 }, { 0x1F9E, 0, 0 }, { 0x1F26, 0x03B9, 0 }}, +#line 37 "./unicase/special-casing-table.gperf" + {"\003\243\000", 0, SCC_FINAL_SIGMA , { '\0', '\0' }, { 0x03A3, 0, 0 }, { 0x03C2, 0, 0 }, { 0x03A3, 0, 0 }, { 0x03C3, 0, 0 }}, +#line 45 "./unicase/special-casing-table.gperf" + {"\036\236\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1E9E, 0, 0 }, { 0x00DF, 0, 0 }, { 0x1E9E, 0, 0 }, { 0x0073, 0x0073, 0 }}, +#line 76 "./unicase/special-casing-table.gperf" + {"\037\232\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2A, 0x0399, 0 }, { 0x1F92, 0, 0 }, { 0x1F9A, 0, 0 }, { 0x1F22, 0x03B9, 0 }}, +#line 75 "./unicase/special-casing-table.gperf" + {"\037\231\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F29, 0x0399, 0 }, { 0x1F91, 0, 0 }, { 0x1F99, 0, 0 }, { 0x1F21, 0x03B9, 0 }}, +#line 44 "./unicase/special-casing-table.gperf" + {"\036\232\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0041, 0x02BE, 0 }, { 0x1E9A, 0, 0 }, { 0x0041, 0x02BE, 0 }, { 0x0061, 0x02BE, 0 }}, +#line 43 "./unicase/special-casing-table.gperf" + {"\036\231\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0059, 0x030A, 0 }, { 0x1E99, 0, 0 }, { 0x0059, 0x030A, 0 }, { 0x0079, 0x030A, 0 }}, +#line 74 "./unicase/special-casing-table.gperf" + {"\037\230\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F28, 0x0399, 0 }, { 0x1F90, 0, 0 }, { 0x1F98, 0, 0 }, { 0x1F20, 0x03B9, 0 }}, +#line 73 "./unicase/special-casing-table.gperf" + {"\037\227\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2F, 0x0399, 0 }, { 0x1F97, 0, 0 }, { 0x1F9F, 0, 0 }, { 0x1F27, 0x03B9, 0 }}, +#line 42 "./unicase/special-casing-table.gperf" + {"\036\230\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0057, 0x030A, 0 }, { 0x1E98, 0, 0 }, { 0x0057, 0x030A, 0 }, { 0x0077, 0x030A, 0 }}, +#line 41 "./unicase/special-casing-table.gperf" + {"\036\227\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0054, 0x0308, 0 }, { 0x1E97, 0, 0 }, { 0x0054, 0x0308, 0 }, { 0x0074, 0x0308, 0 }}, +#line 72 "./unicase/special-casing-table.gperf" + {"\037\226\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2E, 0x0399, 0 }, { 0x1F96, 0, 0 }, { 0x1F9E, 0, 0 }, { 0x1F26, 0x03B9, 0 }}, +#line 66 "./unicase/special-casing-table.gperf" + {"\037\220\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F28, 0x0399, 0 }, { 0x1F90, 0, 0 }, { 0x1F98, 0, 0 }, { 0x1F20, 0x03B9, 0 }}, +#line 40 "./unicase/special-casing-table.gperf" + {"\036\226\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0048, 0x0331, 0 }, { 0x1E96, 0, 0 }, { 0x0048, 0x0331, 0 }, { 0x0068, 0x0331, 0 }}, +#line 36 "./unicase/special-casing-table.gperf" + {"\003\220\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0308, 0x0301 }, { 0x0390, 0, 0 }, { 0x0399, 0x0308, 0x0301 }, { 0x03B9, 0x0308, 0x0301 }}, +#line 57 "./unicase/special-casing-table.gperf" + {"\037\207\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0F, 0x0399, 0 }, { 0x1F87, 0, 0 }, { 0x1F8F, 0, 0 }, { 0x1F07, 0x03B9, 0 }}, +#line 21 "./unicase/special-casing-table.gperf" + {"\000i\000", 1, SCC_ALWAYS , { 't', 'r' }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }}, +#line 124 "./unicase/special-casing-table.gperf" + {"\037\374\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A9, 0x0399, 0 }, { 0x1FF3, 0, 0 }, { 0x1FFC, 0, 0 }, { 0x03C9, 0x03B9, 0 }}, +#line 129 "./unicase/special-casing-table.gperf" + {"\373\004\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0046, 0x0046, 0x004C }, { 0xFB04, 0, 0 }, { 0x0046, 0x0066, 0x006C }, { 0x0066, 0x0066, 0x006C }}, +#line 123 "./unicase/special-casing-table.gperf" + {"\037\367\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A9, 0x0342, 0x0399 }, { 0x1FF7, 0, 0 }, { 0x03A9, 0x0342, 0x0345 }, { 0x03C9, 0x0342, 0x03B9 }}, +#line 22 "./unicase/special-casing-table.gperf" + {"\000i\001", 0, SCC_ALWAYS , { 'a', 'z' }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }, { 0x0130, 0, 0 }, { 0x0069, 0, 0 }}, +#line 18 "./unicase/special-casing-table.gperf" + {"\000I\003", 1, SCC_ALWAYS , { 't', 'r' }, { 0x0049, 0, 0 }, { 0x0069, 0, 0 }, { 0x0049, 0, 0 }, { 0x0131, 0, 0 }}, +#line 122 "./unicase/special-casing-table.gperf" + {"\037\366\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A9, 0x0342, 0 }, { 0x1FF6, 0, 0 }, { 0x03A9, 0x0342, 0 }, { 0x03C9, 0x0342, 0 }}, +#line 121 "./unicase/special-casing-table.gperf" + {"\037\364\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x038F, 0x0399, 0 }, { 0x1FF4, 0, 0 }, { 0x038F, 0x0345, 0 }, { 0x03CE, 0x03B9, 0 }}, +#line 39 "./unicase/special-casing-table.gperf" + {"\005\207\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0535, 0x0552, 0 }, { 0x0587, 0, 0 }, { 0x0535, 0x0582, 0 }, { 0x0565, 0x0582, 0 }}, +#line 120 "./unicase/special-casing-table.gperf" + {"\037\363\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A9, 0x0399, 0 }, { 0x1FF3, 0, 0 }, { 0x1FFC, 0, 0 }, { 0x03C9, 0x03B9, 0 }}, +#line 119 "./unicase/special-casing-table.gperf" + {"\037\362\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1FFA, 0x0399, 0 }, { 0x1FF2, 0, 0 }, { 0x1FFA, 0x0345, 0 }, { 0x1F7C, 0x03B9, 0 }}, +#line 32 "./unicase/special-casing-table.gperf" + {"\001\360\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x004A, 0x030C, 0 }, { 0x01F0, 0, 0 }, { 0x004A, 0x030C, 0 }, { 0x006A, 0x030C, 0 }}, +#line 118 "./unicase/special-casing-table.gperf" + {"\037\347\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0308, 0x0342 }, { 0x1FE7, 0, 0 }, { 0x03A5, 0x0308, 0x0342 }, { 0x03C5, 0x0308, 0x0342 }}, +#line 117 "./unicase/special-casing-table.gperf" + {"\037\346\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0342, 0 }, { 0x1FE6, 0, 0 }, { 0x03A5, 0x0342, 0 }, { 0x03C5, 0x0342, 0 }}, +#line 116 "./unicase/special-casing-table.gperf" + {"\037\344\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A1, 0x0313, 0 }, { 0x1FE4, 0, 0 }, { 0x03A1, 0x0313, 0 }, { 0x03C1, 0x0313, 0 }}, +#line 115 "./unicase/special-casing-table.gperf" + {"\037\343\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0308, 0x0301 }, { 0x1FE3, 0, 0 }, { 0x03A5, 0x0308, 0x0301 }, { 0x03C5, 0x0308, 0x0301 }}, +#line 114 "./unicase/special-casing-table.gperf" + {"\037\342\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0308, 0x0300 }, { 0x1FE2, 0, 0 }, { 0x03A5, 0x0308, 0x0300 }, { 0x03C5, 0x0308, 0x0300 }}, +#line 25 "./unicase/special-casing-table.gperf" + {"\000\337\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0053, 0x0053, 0 }, { 0x00DF, 0, 0 }, { 0x0053, 0x0073, 0 }, { 0x0073, 0x0073, 0 }}, +#line 113 "./unicase/special-casing-table.gperf" + {"\037\327\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0308, 0x0342 }, { 0x1FD7, 0, 0 }, { 0x0399, 0x0308, 0x0342 }, { 0x03B9, 0x0308, 0x0342 }}, +#line 112 "./unicase/special-casing-table.gperf" + {"\037\326\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0342, 0 }, { 0x1FD6, 0, 0 }, { 0x0399, 0x0342, 0 }, { 0x03B9, 0x0342, 0 }}, +#line 111 "./unicase/special-casing-table.gperf" + {"\037\323\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0308, 0x0301 }, { 0x1FD3, 0, 0 }, { 0x0399, 0x0308, 0x0301 }, { 0x03B9, 0x0308, 0x0301 }}, +#line 110 "./unicase/special-casing-table.gperf" + {"\037\322\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0399, 0x0308, 0x0300 }, { 0x1FD2, 0, 0 }, { 0x0399, 0x0308, 0x0300 }, { 0x03B9, 0x0308, 0x0300 }}, +#line 24 "./unicase/special-casing-table.gperf" + {"\000\315\000", 0, SCC_ALWAYS , { 'l', 't' }, { 0x00CD, 0, 0 }, { 0x0069, 0x0307, 0x0301 }, { 0x00CD, 0, 0 }, { 0x00ED, 0, 0 }}, +#line 108 "./unicase/special-casing-table.gperf" + {"\037\307\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0397, 0x0342, 0x0399 }, { 0x1FC7, 0, 0 }, { 0x0397, 0x0342, 0x0345 }, { 0x03B7, 0x0342, 0x03B9 }}, +#line 107 "./unicase/special-casing-table.gperf" + {"\037\306\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0397, 0x0342, 0 }, { 0x1FC6, 0, 0 }, { 0x0397, 0x0342, 0 }, { 0x03B7, 0x0342, 0 }}, +#line 106 "./unicase/special-casing-table.gperf" + {"\037\304\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0389, 0x0399, 0 }, { 0x1FC4, 0, 0 }, { 0x0389, 0x0345, 0 }, { 0x03AE, 0x03B9, 0 }}, +#line 105 "./unicase/special-casing-table.gperf" + {"\037\303\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0397, 0x0399, 0 }, { 0x1FC3, 0, 0 }, { 0x1FCC, 0, 0 }, { 0x03B7, 0x03B9, 0 }}, +#line 104 "./unicase/special-casing-table.gperf" + {"\037\302\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1FCA, 0x0399, 0 }, { 0x1FC2, 0, 0 }, { 0x1FCA, 0x0345, 0 }, { 0x1F74, 0x03B9, 0 }}, +#line 103 "./unicase/special-casing-table.gperf" + {"\037\274\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0391, 0x0399, 0 }, { 0x1FB3, 0, 0 }, { 0x1FBC, 0, 0 }, { 0x03B1, 0x03B9, 0 }}, +#line 102 "./unicase/special-casing-table.gperf" + {"\037\267\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0391, 0x0342, 0x0399 }, { 0x1FB7, 0, 0 }, { 0x0391, 0x0342, 0x0345 }, { 0x03B1, 0x0342, 0x03B9 }}, +#line 101 "./unicase/special-casing-table.gperf" + {"\037\266\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0391, 0x0342, 0 }, { 0x1FB6, 0, 0 }, { 0x0391, 0x0342, 0 }, { 0x03B1, 0x0342, 0 }}, +#line 100 "./unicase/special-casing-table.gperf" + {"\037\264\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0386, 0x0399, 0 }, { 0x1FB4, 0, 0 }, { 0x0386, 0x0345, 0 }, { 0x03AC, 0x03B9, 0 }}, +#line 99 "./unicase/special-casing-table.gperf" + {"\037\263\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0391, 0x0399, 0 }, { 0x1FB3, 0, 0 }, { 0x1FBC, 0, 0 }, { 0x03B1, 0x03B9, 0 }}, +#line 98 "./unicase/special-casing-table.gperf" + {"\037\262\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1FBA, 0x0399, 0 }, { 0x1FB2, 0, 0 }, { 0x1FBA, 0x0345, 0 }, { 0x1F70, 0x03B9, 0 }}, +#line 38 "./unicase/special-casing-table.gperf" + {"\003\260\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0308, 0x0301 }, { 0x03B0, 0, 0 }, { 0x03A5, 0x0308, 0x0301 }, { 0x03C5, 0x0308, 0x0301 }}, +#line 97 "./unicase/special-casing-table.gperf" + {"\037\257\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6F, 0x0399, 0 }, { 0x1FA7, 0, 0 }, { 0x1FAF, 0, 0 }, { 0x1F67, 0x03B9, 0 }}, +#line 96 "./unicase/special-casing-table.gperf" + {"\037\256\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6E, 0x0399, 0 }, { 0x1FA6, 0, 0 }, { 0x1FAE, 0, 0 }, { 0x1F66, 0x03B9, 0 }}, +#line 95 "./unicase/special-casing-table.gperf" + {"\037\255\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6D, 0x0399, 0 }, { 0x1FA5, 0, 0 }, { 0x1FAD, 0, 0 }, { 0x1F65, 0x03B9, 0 }}, +#line 94 "./unicase/special-casing-table.gperf" + {"\037\254\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6C, 0x0399, 0 }, { 0x1FA4, 0, 0 }, { 0x1FAC, 0, 0 }, { 0x1F64, 0x03B9, 0 }}, +#line 93 "./unicase/special-casing-table.gperf" + {"\037\253\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6B, 0x0399, 0 }, { 0x1FA3, 0, 0 }, { 0x1FAB, 0, 0 }, { 0x1F63, 0x03B9, 0 }}, +#line 92 "./unicase/special-casing-table.gperf" + {"\037\252\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6A, 0x0399, 0 }, { 0x1FA2, 0, 0 }, { 0x1FAA, 0, 0 }, { 0x1F62, 0x03B9, 0 }}, +#line 91 "./unicase/special-casing-table.gperf" + {"\037\251\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F69, 0x0399, 0 }, { 0x1FA1, 0, 0 }, { 0x1FA9, 0, 0 }, { 0x1F61, 0x03B9, 0 }}, +#line 90 "./unicase/special-casing-table.gperf" + {"\037\250\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F68, 0x0399, 0 }, { 0x1FA0, 0, 0 }, { 0x1FA8, 0, 0 }, { 0x1F60, 0x03B9, 0 }}, +#line 89 "./unicase/special-casing-table.gperf" + {"\037\247\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6F, 0x0399, 0 }, { 0x1FA7, 0, 0 }, { 0x1FAF, 0, 0 }, { 0x1F67, 0x03B9, 0 }}, +#line 88 "./unicase/special-casing-table.gperf" + {"\037\246\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6E, 0x0399, 0 }, { 0x1FA6, 0, 0 }, { 0x1FAE, 0, 0 }, { 0x1F66, 0x03B9, 0 }}, +#line 87 "./unicase/special-casing-table.gperf" + {"\037\245\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6D, 0x0399, 0 }, { 0x1FA5, 0, 0 }, { 0x1FAD, 0, 0 }, { 0x1F65, 0x03B9, 0 }}, +#line 86 "./unicase/special-casing-table.gperf" + {"\037\244\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6C, 0x0399, 0 }, { 0x1FA4, 0, 0 }, { 0x1FAC, 0, 0 }, { 0x1F64, 0x03B9, 0 }}, +#line 84 "./unicase/special-casing-table.gperf" + {"\037\242\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F6A, 0x0399, 0 }, { 0x1FA2, 0, 0 }, { 0x1FAA, 0, 0 }, { 0x1F62, 0x03B9, 0 }}, +#line 83 "./unicase/special-casing-table.gperf" + {"\037\241\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F69, 0x0399, 0 }, { 0x1FA1, 0, 0 }, { 0x1FA9, 0, 0 }, { 0x1F61, 0x03B9, 0 }}, +#line 82 "./unicase/special-casing-table.gperf" + {"\037\240\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F68, 0x0399, 0 }, { 0x1FA0, 0, 0 }, { 0x1FA8, 0, 0 }, { 0x1F60, 0x03B9, 0 }}, +#line 81 "./unicase/special-casing-table.gperf" + {"\037\237\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2F, 0x0399, 0 }, { 0x1F97, 0, 0 }, { 0x1F9F, 0, 0 }, { 0x1F27, 0x03B9, 0 }}, +#line 79 "./unicase/special-casing-table.gperf" + {"\037\235\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2D, 0x0399, 0 }, { 0x1F95, 0, 0 }, { 0x1F9D, 0, 0 }, { 0x1F25, 0x03B9, 0 }}, +#line 78 "./unicase/special-casing-table.gperf" + {"\037\234\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2C, 0x0399, 0 }, { 0x1F94, 0, 0 }, { 0x1F9C, 0, 0 }, { 0x1F24, 0x03B9, 0 }}, +#line 77 "./unicase/special-casing-table.gperf" + {"\037\233\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2B, 0x0399, 0 }, { 0x1F93, 0, 0 }, { 0x1F9B, 0, 0 }, { 0x1F23, 0x03B9, 0 }}, +#line 71 "./unicase/special-casing-table.gperf" + {"\037\225\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2D, 0x0399, 0 }, { 0x1F95, 0, 0 }, { 0x1F9D, 0, 0 }, { 0x1F25, 0x03B9, 0 }}, +#line 70 "./unicase/special-casing-table.gperf" + {"\037\224\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2C, 0x0399, 0 }, { 0x1F94, 0, 0 }, { 0x1F9C, 0, 0 }, { 0x1F24, 0x03B9, 0 }}, +#line 69 "./unicase/special-casing-table.gperf" + {"\037\223\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2B, 0x0399, 0 }, { 0x1F93, 0, 0 }, { 0x1F9B, 0, 0 }, { 0x1F23, 0x03B9, 0 }}, +#line 68 "./unicase/special-casing-table.gperf" + {"\037\222\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F2A, 0x0399, 0 }, { 0x1F92, 0, 0 }, { 0x1F9A, 0, 0 }, { 0x1F22, 0x03B9, 0 }}, +#line 67 "./unicase/special-casing-table.gperf" + {"\037\221\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F29, 0x0399, 0 }, { 0x1F91, 0, 0 }, { 0x1F99, 0, 0 }, { 0x1F21, 0x03B9, 0 }}, +#line 65 "./unicase/special-casing-table.gperf" + {"\037\217\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0F, 0x0399, 0 }, { 0x1F87, 0, 0 }, { 0x1F8F, 0, 0 }, { 0x1F07, 0x03B9, 0 }}, +#line 64 "./unicase/special-casing-table.gperf" + {"\037\216\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0E, 0x0399, 0 }, { 0x1F86, 0, 0 }, { 0x1F8E, 0, 0 }, { 0x1F06, 0x03B9, 0 }}, +#line 63 "./unicase/special-casing-table.gperf" + {"\037\215\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0D, 0x0399, 0 }, { 0x1F85, 0, 0 }, { 0x1F8D, 0, 0 }, { 0x1F05, 0x03B9, 0 }}, +#line 62 "./unicase/special-casing-table.gperf" + {"\037\214\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0C, 0x0399, 0 }, { 0x1F84, 0, 0 }, { 0x1F8C, 0, 0 }, { 0x1F04, 0x03B9, 0 }}, +#line 61 "./unicase/special-casing-table.gperf" + {"\037\213\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0B, 0x0399, 0 }, { 0x1F83, 0, 0 }, { 0x1F8B, 0, 0 }, { 0x1F03, 0x03B9, 0 }}, +#line 60 "./unicase/special-casing-table.gperf" + {"\037\212\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0A, 0x0399, 0 }, { 0x1F82, 0, 0 }, { 0x1F8A, 0, 0 }, { 0x1F02, 0x03B9, 0 }}, +#line 59 "./unicase/special-casing-table.gperf" + {"\037\211\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F09, 0x0399, 0 }, { 0x1F81, 0, 0 }, { 0x1F89, 0, 0 }, { 0x1F01, 0x03B9, 0 }}, +#line 58 "./unicase/special-casing-table.gperf" + {"\037\210\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F08, 0x0399, 0 }, { 0x1F80, 0, 0 }, { 0x1F88, 0, 0 }, { 0x1F00, 0x03B9, 0 }}, +#line 56 "./unicase/special-casing-table.gperf" + {"\037\206\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0E, 0x0399, 0 }, { 0x1F86, 0, 0 }, { 0x1F8E, 0, 0 }, { 0x1F06, 0x03B9, 0 }}, +#line 55 "./unicase/special-casing-table.gperf" + {"\037\205\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0D, 0x0399, 0 }, { 0x1F85, 0, 0 }, { 0x1F8D, 0, 0 }, { 0x1F05, 0x03B9, 0 }}, +#line 54 "./unicase/special-casing-table.gperf" + {"\037\204\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0C, 0x0399, 0 }, { 0x1F84, 0, 0 }, { 0x1F8C, 0, 0 }, { 0x1F04, 0x03B9, 0 }}, +#line 53 "./unicase/special-casing-table.gperf" + {"\037\203\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0B, 0x0399, 0 }, { 0x1F83, 0, 0 }, { 0x1F8B, 0, 0 }, { 0x1F03, 0x03B9, 0 }}, +#line 52 "./unicase/special-casing-table.gperf" + {"\037\202\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F0A, 0x0399, 0 }, { 0x1F82, 0, 0 }, { 0x1F8A, 0, 0 }, { 0x1F02, 0x03B9, 0 }}, +#line 51 "./unicase/special-casing-table.gperf" + {"\037\201\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F09, 0x0399, 0 }, { 0x1F81, 0, 0 }, { 0x1F89, 0, 0 }, { 0x1F01, 0x03B9, 0 }}, +#line 50 "./unicase/special-casing-table.gperf" + {"\037\200\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x1F08, 0x0399, 0 }, { 0x1F80, 0, 0 }, { 0x1F88, 0, 0 }, { 0x1F00, 0x03B9, 0 }}, +#line 49 "./unicase/special-casing-table.gperf" + {"\037V\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0313, 0x0342 }, { 0x1F56, 0, 0 }, { 0x03A5, 0x0313, 0x0342 }, { 0x03C5, 0x0313, 0x0342 }}, +#line 48 "./unicase/special-casing-table.gperf" + {"\037T\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0313, 0x0301 }, { 0x1F54, 0, 0 }, { 0x03A5, 0x0313, 0x0301 }, { 0x03C5, 0x0313, 0x0301 }}, +#line 47 "./unicase/special-casing-table.gperf" + {"\037R\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0313, 0x0300 }, { 0x1F52, 0, 0 }, { 0x03A5, 0x0313, 0x0300 }, { 0x03C5, 0x0313, 0x0300 }}, +#line 46 "./unicase/special-casing-table.gperf" + {"\037P\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x03A5, 0x0313, 0 }, { 0x1F50, 0, 0 }, { 0x03A5, 0x0313, 0 }, { 0x03C5, 0x0313, 0 }}, +#line 20 "./unicase/special-casing-table.gperf" + {"\000J\000", 0, SCC_MORE_ABOVE , { 'l', 't' }, { 0x004A, 0, 0 }, { 0x006A, 0x0307, 0 }, { 0x004A, 0, 0 }, { 0x006A, 0, 0 }}, +#line 27 "./unicase/special-casing-table.gperf" + {"\001.\000", 0, SCC_MORE_ABOVE , { 'l', 't' }, { 0x012E, 0, 0 }, { 0x012F, 0x0307, 0 }, { 0x012E, 0, 0 }, { 0x012F, 0, 0 }}, +#line 26 "./unicase/special-casing-table.gperf" + {"\001(\000", 0, SCC_ALWAYS , { 'l', 't' }, { 0x0128, 0, 0 }, { 0x0069, 0x0307, 0x0303 }, { 0x0128, 0, 0 }, { 0x0129, 0, 0 }}, +#line 136 "./unicase/special-casing-table.gperf" + {"\373\027\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0544, 0x053D, 0 }, { 0xFB17, 0, 0 }, { 0x0544, 0x056D, 0 }, { 0x0574, 0x056D, 0 }}, +#line 135 "./unicase/special-casing-table.gperf" + {"\373\026\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x054E, 0x0546, 0 }, { 0xFB16, 0, 0 }, { 0x054E, 0x0576, 0 }, { 0x057E, 0x0576, 0 }}, +#line 134 "./unicase/special-casing-table.gperf" + {"\373\025\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0544, 0x053B, 0 }, { 0xFB15, 0, 0 }, { 0x0544, 0x056B, 0 }, { 0x0574, 0x056B, 0 }}, +#line 133 "./unicase/special-casing-table.gperf" + {"\373\024\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0544, 0x0535, 0 }, { 0xFB14, 0, 0 }, { 0x0544, 0x0565, 0 }, { 0x0574, 0x0565, 0 }}, +#line 132 "./unicase/special-casing-table.gperf" + {"\373\023\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0544, 0x0546, 0 }, { 0xFB13, 0, 0 }, { 0x0544, 0x0576, 0 }, { 0x0574, 0x0576, 0 }}, +#line 131 "./unicase/special-casing-table.gperf" + {"\373\006\000", 0, SCC_ALWAYS , { '\0', '\0' }, { 0x0053, 0x0054, 0 }, { 0xFB06, 0, 0 }, { 0x0053, 0x0074, 0 }, { 0x0073, 0x0074, 0 }} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register unsigned int key = gl_unicase_special_hash (str, len); + + if (key <= MAX_HASH_VALUE) + if (len == lengthtable[key]) + { + register const char *s = wordlist[key].code; + + if (*str == *s && !memcmp (str + 1, s + 1, len - 1)) + return &wordlist[key]; + } + } + return 0; +} diff --git a/lib/unicase/special-casing.c b/lib/unicase/special-casing.c new file mode 100644 index 0000000..fa1f21b --- /dev/null +++ b/lib/unicase/special-casing.c @@ -0,0 +1,25 @@ +/* Special casing table. + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "unicase/special-casing.h" + +#include <string.h> + +#include "unicase/special-casing-table.h" diff --git a/lib/unicase/special-casing.in.h b/lib/unicase/special-casing.in.h new file mode 100644 index 0000000..b28888f --- /dev/null +++ b/lib/unicase/special-casing.in.h @@ -0,0 +1,61 @@ +/* Special casing table. + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <stddef.h> + +/* A special casing context. + A context is negated through x -> -x. */ +enum +{ + SCC_ALWAYS = 0, + SCC_FINAL_SIGMA = 1, + SCC_AFTER_SOFT_DOTTED = 2, + SCC_MORE_ABOVE = 3, + SCC_BEFORE_DOT = 4, + SCC_AFTER_I = 5 +}; + +struct special_casing_rule +{ + /* The first two bytes are the code, in big-endian order. The third byte + only distinguishes different rules pertaining to the same code. */ + /*unsigned*/ char code[3]; + + /* True when this rule is not the last one for the given code. */ + /*bool*/ unsigned int has_next : 1; + + /* Context. */ + signed int context : 7; + + /* Language, or an empty string. */ + char language[2]; + + /* Mapping to upper case. Between 0 and 3 characters. Filled with 0s. */ + unsigned short upper[3]; + + /* Mapping to lower case. Between 0 and 3 characters. Filled with 0s. */ + unsigned short lower[3]; + + /* Mapping to title case. Between 0 and 3 characters. Filled with 0s. */ + unsigned short title[3]; + + /* Casefolding mapping. Between 0 and 3 characters. Filled with 0s. */ + unsigned short casefold[3]; +}; + +extern const struct special_casing_rule * + gl_unicase_special_lookup (const char *str, size_t len); diff --git a/lib/unicase/tolower.c b/lib/unicase/tolower.c new file mode 100644 index 0000000..edf257d --- /dev/null +++ b/lib/unicase/tolower.c @@ -0,0 +1,27 @@ +/* Lowercase mapping for Unicode characters (locale and context independent). + Copyright (C) 2002, 2006, 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "unicase.h" + +/* Define u_mapping table. */ +#include "tolower.h" + +#define FUNC uc_tolower +#include "simple-mapping.h" diff --git a/lib/unicase/tolower.h b/lib/unicase/tolower.h new file mode 100644 index 0000000..6795557 --- /dev/null +++ b/lib/unicase/tolower.h @@ -0,0 +1,647 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Simple character mapping of Unicode characters. */ +/* Generated automatically by gen-uni-tables.c for Unicode 9.0.0. */ +#define mapping_header_0 16 +#define mapping_header_1 2 +#define mapping_header_2 7 +#define mapping_header_3 511 +#define mapping_header_4 127 +static const +struct + { + int level1[2]; + short level2[2 << 9]; + int level3[31 << 7]; + } +u_mapping = +{ + { 0, 512 }, + { + 0, 128, 256, 384, 512, -1, 640, 768, + 896, 1024, 1152, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 1280, -1, -1, -1, -1, -1, 1408, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 1536, 1664, 1792, 1920, + -1, -1, 2048, 2176, -1, -1, -1, -1, + -1, 2304, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 2432, 2560, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 2688, 2816, 2944, 3072, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 3200, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 3328, 3456, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 3584, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 3712, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 3840, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 + }, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 0, + 32, 32, 32, 32, 32, 32, 32, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + -199, 0, 1, 0, 1, 0, 1, 0, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + -121, 1, 0, 1, 0, 1, 0, 0, + 0, 210, 1, 0, 1, 0, 206, 1, + 0, 205, 205, 1, 0, 0, 79, 202, + 203, 1, 0, 205, 207, 0, 211, 209, + 1, 0, 0, 0, 211, 213, 0, 214, + 1, 0, 1, 0, 1, 0, 218, 1, + 0, 218, 0, 0, 1, 0, 218, 1, + 0, 217, 217, 1, 0, 1, 0, 219, + 1, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 2, 1, 0, 2, + 1, 0, 2, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 0, 2, 1, 0, 1, 0, -97, -56, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + -130, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 10795, 1, 0, -163, 10792, 0, + 0, 1, 0, -195, 69, 71, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 116, + 0, 0, 0, 0, 0, 0, 38, 0, + 37, 37, 37, 0, 64, 0, 63, 63, + 0, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 0, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, -60, 0, 0, 1, + 0, -7, 1, 0, 0, -130, -130, -130, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 15, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 0, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 7264, 7264, 7264, 7264, 7264, 7264, 7264, 7264, + 7264, 7264, 7264, 7264, 7264, 7264, 7264, 7264, + 7264, 7264, 7264, 7264, 7264, 7264, 7264, 7264, + 7264, 7264, 7264, 7264, 7264, 7264, 7264, 7264, + 7264, 7264, 7264, 7264, 7264, 7264, 0, 7264, + 0, 0, 0, 0, 0, 7264, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 38864, 38864, 38864, 38864, 38864, 38864, 38864, 38864, + 8, 8, 8, 8, 8, 8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, -7615, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -8, -8, -8, -8, -8, -8, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -8, -8, -8, -8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -8, -8, -8, -8, -8, -8, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -8, -8, -8, -8, -8, -8, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -8, -8, -8, -8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, -8, 0, -8, 0, -8, 0, -8, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -8, -8, -8, -8, -8, -8, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -8, -8, -8, -8, -8, -8, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -8, -8, -8, -8, -8, -8, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -8, -8, -8, -8, -8, -8, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -74, -74, -9, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -86, -86, -86, -86, -9, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -100, -100, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -8, -8, -112, -112, -7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -128, -128, -126, -126, -9, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, -7517, 0, + 0, 0, -8383, -8262, 0, 0, 0, 0, + 0, 0, 28, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, -10743, -3814, -10727, 0, 0, 1, + 0, 1, 0, 1, 0, -10780, -10749, -10783, + -10782, 0, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, -10815, -10815, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 0, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 0, -35332, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 0, 0, 0, 1, 0, -42280, 0, 0, + 1, 0, 1, 0, 0, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, -42308, -42319, -42315, -42305, -42308, 0, + -42258, -42282, -42261, 928, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + } +}; diff --git a/lib/unicase/u-casemap.h b/lib/unicase/u-casemap.h new file mode 100644 index 0000000..ecfcb45 --- /dev/null +++ b/lib/unicase/u-casemap.h @@ -0,0 +1,416 @@ +/* Case mapping for UTF-8/UTF-16/UTF-32 strings (locale dependent). + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +UNIT * +FUNC (const UNIT *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + ucs4_t (*single_character_map) (ucs4_t), + size_t offset_in_rule, /* offset in 'struct special_casing_rule' */ + uninorm_t nf, + UNIT *resultbuf, size_t *lengthp) +{ + /* The result being accumulated. */ + UNIT *result; + size_t length; + size_t allocated; + + /* Initialize the accumulator. */ + if (nf != NULL || resultbuf == NULL) + { + result = NULL; + allocated = 0; + } + else + { + result = resultbuf; + allocated = *lengthp; + } + length = 0; + + { + const UNIT *s_end = s + n; + + /* Helper for evaluating the FINAL_SIGMA condition: + Last character that was not case-ignorable. */ + ucs4_t last_char_except_ignorable = + prefix_context.last_char_except_ignorable; + + /* Helper for evaluating the AFTER_SOFT_DOTTED and AFTER_I conditions: + Last character that was of combining class 230 ("Above") or 0. */ + ucs4_t last_char_normal_or_above = + prefix_context.last_char_normal_or_above; + + while (s < s_end) + { + ucs4_t uc; + int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s); + + ucs4_t mapped_uc[3]; + unsigned int mapped_count; + + if (uc < 0x10000) + { + /* Look first in the special-casing table. */ + char code[3]; + + code[0] = (uc >> 8) & 0xff; + code[1] = uc & 0xff; + + for (code[2] = 0; ; code[2]++) + { + const struct special_casing_rule *rule = + gl_unicase_special_lookup (code, 3); + + if (rule == NULL) + break; + + /* Test if the condition applies. */ + /* Does the language apply? */ + if (rule->language[0] == '\0' + || (iso639_language != NULL + && iso639_language[0] == rule->language[0] + && iso639_language[1] == rule->language[1])) + { + /* Does the context apply? */ + int context = rule->context; + bool applies; + + if (context < 0) + context = - context; + switch (context) + { + case SCC_ALWAYS: + applies = true; + break; + + case SCC_FINAL_SIGMA: + /* "Before" condition: preceded by a sequence + consisting of a cased letter and a case-ignorable + sequence. + "After" condition: not followed by a sequence + consisting of a case-ignorable sequence and then a + cased letter. */ + /* Test the "before" condition. */ + applies = uc_is_cased (last_char_except_ignorable); + /* Test the "after" condition. */ + if (applies) + { + const UNIT *s2 = s + count; + for (;;) + { + if (s2 < s_end) + { + ucs4_t uc2; + int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); + /* Our uc_is_case_ignorable function is + known to return false for all cased + characters. So we can call + uc_is_case_ignorable first. */ + if (!uc_is_case_ignorable (uc2)) + { + applies = ! uc_is_cased (uc2); + break; + } + s2 += count2; + } + else + { + applies = ! uc_is_cased (suffix_context.first_char_except_ignorable); + break; + } + } + } + break; + + case SCC_AFTER_SOFT_DOTTED: + /* "Before" condition: There is a Soft_Dotted character + before it, with no intervening character of + combining class 0 or 230 (Above). */ + /* Test the "before" condition. */ + applies = uc_is_property_soft_dotted (last_char_normal_or_above); + break; + + case SCC_MORE_ABOVE: + /* "After" condition: followed by a character of + combining class 230 (Above) with no intervening + character of combining class 0 or 230 (Above). */ + /* Test the "after" condition. */ + { + const UNIT *s2 = s + count; + applies = false; + for (;;) + { + if (s2 < s_end) + { + ucs4_t uc2; + int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); + int ccc = uc_combining_class (uc2); + if (ccc == UC_CCC_A) + { + applies = true; + break; + } + if (ccc == UC_CCC_NR) + break; + s2 += count2; + } + else + { + applies = ((suffix_context.bits & SCC_MORE_ABOVE_MASK) != 0); + break; + } + } + } + break; + + case SCC_BEFORE_DOT: + /* "After" condition: followed by COMBINING DOT ABOVE + (U+0307). Any sequence of characters with a + combining class that is neither 0 nor 230 may + intervene between the current character and the + combining dot above. */ + /* Test the "after" condition. */ + { + const UNIT *s2 = s + count; + applies = false; + for (;;) + { + if (s2 < s_end) + { + ucs4_t uc2; + int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); + if (uc2 == 0x0307) /* COMBINING DOT ABOVE */ + { + applies = true; + break; + } + { + int ccc = uc_combining_class (uc2); + if (ccc == UC_CCC_A || ccc == UC_CCC_NR) + break; + } + s2 += count2; + } + else + { + applies = ((suffix_context.bits & SCC_BEFORE_DOT_MASK) != 0); + break; + } + } + } + break; + + case SCC_AFTER_I: + /* "Before" condition: There is an uppercase I before + it, and there is no intervening character of + combining class 0 or 230 (Above). */ + /* Test the "before" condition. */ + applies = (last_char_normal_or_above == 'I'); + break; + + default: + abort (); + } + if (rule->context < 0) + applies = !applies; + + if (applies) + { + /* The rule applies. + Look up the mapping (0 to 3 characters). */ + const unsigned short *mapped_in_rule = + (const unsigned short *)((const char *)rule + offset_in_rule); + + if (mapped_in_rule[0] == 0) + mapped_count = 0; + else + { + mapped_uc[0] = mapped_in_rule[0]; + if (mapped_in_rule[1] == 0) + mapped_count = 1; + else + { + mapped_uc[1] = mapped_in_rule[1]; + if (mapped_in_rule[2] == 0) + mapped_count = 2; + else + { + mapped_uc[2] = mapped_in_rule[2]; + mapped_count = 3; + } + } + } + goto found_mapping; + } + } + + /* Optimization: Save a hash table lookup in the next round. */ + if (!rule->has_next) + break; + } + } + + /* No special-cased mapping. So use the locale and context independent + mapping. */ + mapped_uc[0] = single_character_map (uc); + mapped_count = 1; + + found_mapping: + /* Found the mapping: uc maps to mapped_uc[0..mapped_count-1]. */ + { + unsigned int i; + + for (i = 0; i < mapped_count; i++) + { + ucs4_t muc = mapped_uc[i]; + + /* Append muc to the result accumulator. */ + if (length < allocated) + { + int ret = U_UCTOMB (result + length, muc, allocated - length); + if (ret == -1) + { + errno = EINVAL; + goto fail; + } + if (ret >= 0) + { + length += ret; + goto done_appending; + } + } + { + size_t old_allocated = allocated; + size_t new_allocated = 2 * old_allocated; + if (new_allocated < 64) + new_allocated = 64; + if (new_allocated < old_allocated) /* integer overflow? */ + abort (); + { + UNIT *larger_result; + if (result == NULL) + { + larger_result = (UNIT *) malloc (new_allocated * sizeof (UNIT)); + if (larger_result == NULL) + { + errno = ENOMEM; + goto fail; + } + } + else if (result == resultbuf) + { + larger_result = (UNIT *) malloc (new_allocated * sizeof (UNIT)); + if (larger_result == NULL) + { + errno = ENOMEM; + goto fail; + } + U_CPY (larger_result, resultbuf, length); + } + else + { + larger_result = + (UNIT *) realloc (result, new_allocated * sizeof (UNIT)); + if (larger_result == NULL) + { + errno = ENOMEM; + goto fail; + } + } + result = larger_result; + allocated = new_allocated; + { + int ret = U_UCTOMB (result + length, muc, allocated - length); + if (ret == -1) + { + errno = EINVAL; + goto fail; + } + if (ret < 0) + abort (); + length += ret; + goto done_appending; + } + } + } + done_appending: ; + } + } + + if (!uc_is_case_ignorable (uc)) + last_char_except_ignorable = uc; + + { + int ccc = uc_combining_class (uc); + if (ccc == UC_CCC_A || ccc == UC_CCC_NR) + last_char_normal_or_above = uc; + } + + s += count; + } + } + + if (nf != NULL) + { + /* Finally, normalize the result. */ + UNIT *normalized_result; + + normalized_result = U_NORMALIZE (nf, result, length, resultbuf, lengthp); + if (normalized_result == NULL) + goto fail; + + free (result); + return normalized_result; + } + + if (length == 0) + { + if (result == NULL) + { + /* Return a non-NULL value. NULL means error. */ + result = (UNIT *) malloc (1); + if (result == NULL) + { + errno = ENOMEM; + goto fail; + } + } + } + else if (result != resultbuf && length < allocated) + { + /* Shrink the allocated memory if possible. */ + UNIT *memory; + + memory = (UNIT *) realloc (result, length * sizeof (UNIT)); + if (memory != NULL) + result = memory; + } + + *lengthp = length; + return result; + + fail: + if (result != resultbuf) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return NULL; +} diff --git a/lib/unicase/u8-casemap.c b/lib/unicase/u8-casemap.c new file mode 100644 index 0000000..020f4c3 --- /dev/null +++ b/lib/unicase/u8-casemap.c @@ -0,0 +1,40 @@ +/* Case mapping for UTF-8 strings (locale dependent). + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "unicasemap.h" + +#include <errno.h> +#include <stdbool.h> +#include <stdlib.h> + +#include "unistr.h" +#include "unictype.h" +#include "uninorm.h" +#include "unicase/caseprop.h" +#include "context.h" +#include "unicase/special-casing.h" + +#define FUNC u8_casemap +#define UNIT uint8_t +#define U_MBTOUC_UNSAFE u8_mbtouc_unsafe +#define U_UCTOMB u8_uctomb +#define U_CPY u8_cpy +#define U_NORMALIZE u8_normalize +#include "u-casemap.h" diff --git a/lib/unicase/u8-tolower.c b/lib/unicase/u8-tolower.c new file mode 100644 index 0000000..bbb3501 --- /dev/null +++ b/lib/unicase/u8-tolower.c @@ -0,0 +1,120 @@ +/* Lowercase mapping for UTF-8 strings (locale dependent). + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "unicase.h" + +#include <stddef.h> + +#include "unicase/unicasemap.h" +#include "unicase/special-casing.h" + +uint8_t * +u8_tolower (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp) +{ + return u8_casemap (s, n, + unicase_empty_prefix_context, unicase_empty_suffix_context, + iso639_language, + uc_tolower, offsetof (struct special_casing_rule, lower[0]), + nf, + resultbuf, lengthp); +} + + +#ifdef TEST + +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* Read the contents of an input stream, and return it, terminated with a NUL + byte. */ +char * +read_file (FILE *stream) +{ +#define BUFSIZE 4096 + char *buf = NULL; + int alloc = 0; + int size = 0; + int count; + + while (! feof (stream)) + { + if (size + BUFSIZE > alloc) + { + alloc = alloc + alloc / 2; + if (alloc < size + BUFSIZE) + alloc = size + BUFSIZE; + buf = realloc (buf, alloc); + if (buf == NULL) + { + fprintf (stderr, "out of memory\n"); + exit (1); + } + } + count = fread (buf + size, 1, BUFSIZE, stream); + if (count == 0) + { + if (ferror (stream)) + { + perror ("fread"); + exit (1); + } + } + else + size += count; + } + buf = realloc (buf, size + 1); + if (buf == NULL) + { + fprintf (stderr, "out of memory\n"); + exit (1); + } + buf[size] = '\0'; + return buf; +#undef BUFSIZE +} + +int +main (int argc, char * argv[]) +{ + setlocale (LC_ALL, ""); + if (argc == 1) + { + /* Display the lower case of the input string. */ + char *input = read_file (stdin); + int length = strlen (input); + size_t output_length; + uint8_t *output = + u8_tolower ((uint8_t *) input, length, uc_locale_language (), + NULL, + NULL, &output_length); + + fwrite (output, 1, output_length, stdout); + + return 0; + } + else + return 1; +} + +#endif /* TEST */ diff --git a/lib/unicase/unicasemap.h b/lib/unicase/unicasemap.h new file mode 100644 index 0000000..be47166 --- /dev/null +++ b/lib/unicase/unicasemap.h @@ -0,0 +1,52 @@ +/* Case mapping for UTF-8/UTF-16/UTF-32 strings (locale dependent). + Copyright (C) 2009-2020 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <stddef.h> + +#include "unitypes.h" +#include "unicase.h" +#include "uninorm.h" + +extern uint8_t * + u8_casemap (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + ucs4_t (*single_character_map) (ucs4_t), + size_t offset_in_rule, /* offset in 'struct special_casing_rule' */ + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); + +extern uint16_t * + u16_casemap (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + ucs4_t (*single_character_map) (ucs4_t), + size_t offset_in_rule, /* offset in 'struct special_casing_rule' */ + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); + +extern uint32_t * + u32_casemap (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + ucs4_t (*single_character_map) (ucs4_t), + size_t offset_in_rule, /* offset in 'struct special_casing_rule' */ + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); |