diff options
Diffstat (limited to 'intl/icu/source/common/uscript_props.cpp')
-rw-r--r-- | intl/icu/source/common/uscript_props.cpp | 309 |
1 files changed, 309 insertions, 0 deletions
diff --git a/intl/icu/source/common/uscript_props.cpp b/intl/icu/source/common/uscript_props.cpp new file mode 100644 index 0000000000..b26164408a --- /dev/null +++ b/intl/icu/source/common/uscript_props.cpp @@ -0,0 +1,309 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2013-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: uscript_props.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2013feb16 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "unicode/uscript.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" +#include "cmemory.h" + +namespace { + +// Script metadata (script properties). +// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt + +// 0 = NOT_ENCODED, no sample character, default false script properties. +// Bits 20.. 0: sample character + +// Bits 23..21: usage +const int32_t UNKNOWN = 1 << 21; +const int32_t EXCLUSION = 2 << 21; +const int32_t LIMITED_USE = 3 << 21; +// st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10 +const int32_t RECOMMENDED = 5 << 21; + +// Bits 31..24: Single-bit flags +const int32_t RTL = 1 << 24; +const int32_t LB_LETTERS = 1 << 25; +const int32_t CASED = 1 << 26; + +const int32_t SCRIPT_PROPS[] = { + // Begin copy-paste output from + // tools/trunk/unicode/py/parsescriptmetadata.py + 0x0040 | RECOMMENDED, // Zyyy + 0x0308 | RECOMMENDED, // Zinh + 0x0628 | RECOMMENDED | RTL, // Arab + 0x0531 | RECOMMENDED | CASED, // Armn + 0x0995 | RECOMMENDED, // Beng + 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo + 0x13C4 | LIMITED_USE | CASED, // Cher + 0x03E2 | EXCLUSION | CASED, // Copt + 0x042F | RECOMMENDED | CASED, // Cyrl + 0x10414 | EXCLUSION | CASED, // Dsrt + 0x0905 | RECOMMENDED, // Deva + 0x12A0 | RECOMMENDED, // Ethi + 0x10D3 | RECOMMENDED, // Geor + 0x10330 | EXCLUSION, // Goth + 0x03A9 | RECOMMENDED | CASED, // Grek + 0x0A95 | RECOMMENDED, // Gujr + 0x0A15 | RECOMMENDED, // Guru + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani + 0xAC00 | RECOMMENDED, // Hang + 0x05D0 | RECOMMENDED | RTL, // Hebr + 0x304B | RECOMMENDED | LB_LETTERS, // Hira + 0x0C95 | RECOMMENDED, // Knda + 0x30AB | RECOMMENDED | LB_LETTERS, // Kana + 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr + 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo + 0x004C | RECOMMENDED | CASED, // Latn + 0x0D15 | RECOMMENDED, // Mlym + 0x1826 | EXCLUSION, // Mong + 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr + 0x168F | EXCLUSION, // Ogam + 0x10300 | EXCLUSION, // Ital + 0x0B15 | RECOMMENDED, // Orya + 0x16A0 | EXCLUSION, // Runr + 0x0D85 | RECOMMENDED, // Sinh + 0x0710 | LIMITED_USE | RTL, // Syrc + 0x0B95 | RECOMMENDED, // Taml + 0x0C15 | RECOMMENDED, // Telu + 0x078C | RECOMMENDED | RTL, // Thaa + 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai + 0x0F40 | RECOMMENDED, // Tibt + 0x14C0 | LIMITED_USE, // Cans + 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii + 0x1703 | EXCLUSION, // Tglg + 0x1723 | EXCLUSION, // Hano + 0x1743 | EXCLUSION, // Buhd + 0x1763 | EXCLUSION, // Tagb + 0x280E | UNKNOWN, // Brai + 0x10800 | EXCLUSION | RTL, // Cprt + 0x1900 | LIMITED_USE, // Limb + 0x10000 | EXCLUSION, // Linb + 0x10480 | EXCLUSION, // Osma + 0x10450 | EXCLUSION, // Shaw + 0x1950 | LIMITED_USE | LB_LETTERS, // Tale + 0x10380 | EXCLUSION, // Ugar + 0, + 0x1A00 | EXCLUSION, // Bugi + 0x2C00 | EXCLUSION | CASED, // Glag + 0x10A00 | EXCLUSION | RTL, // Khar + 0xA800 | LIMITED_USE, // Sylo + 0x1980 | LIMITED_USE | LB_LETTERS, // Talu + 0x2D30 | LIMITED_USE, // Tfng + 0x103A0 | EXCLUSION, // Xpeo + 0x1B05 | LIMITED_USE, // Bali + 0x1BC0 | LIMITED_USE, // Batk + 0, + 0x11005 | EXCLUSION, // Brah + 0xAA00 | LIMITED_USE, // Cham + 0, + 0, + 0, + 0, + 0x13153 | EXCLUSION, // Egyp + 0, + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant + 0x16B1C | EXCLUSION, // Hmng + 0x10CA1 | EXCLUSION | RTL | CASED, // Hung + 0, + 0xA984 | LIMITED_USE, // Java + 0xA90A | LIMITED_USE, // Kali + 0, + 0, + 0x1C00 | LIMITED_USE, // Lepc + 0x10647 | EXCLUSION, // Lina + 0x0840 | LIMITED_USE | RTL, // Mand + 0, + 0x10980 | EXCLUSION | RTL, // Mero + 0x07CA | LIMITED_USE | RTL, // Nkoo + 0x10C00 | EXCLUSION | RTL, // Orkh + 0x1036B | EXCLUSION, // Perm + 0xA840 | EXCLUSION, // Phag + 0x10900 | EXCLUSION | RTL, // Phnx + 0x16F00 | LIMITED_USE, // Plrd + 0, + 0, + 0, + 0, + 0, + 0, + 0xA549 | LIMITED_USE, // Vaii + 0, + 0x12000 | EXCLUSION, // Xsux + 0, + 0xFDD0 | UNKNOWN, // Zzzz + 0x102A0 | EXCLUSION, // Cari + 0x304B | RECOMMENDED | LB_LETTERS, // Jpan + 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana + 0x10280 | EXCLUSION, // Lyci + 0x10920 | EXCLUSION | RTL, // Lydi + 0x1C5A | LIMITED_USE, // Olck + 0xA930 | EXCLUSION, // Rjng + 0xA882 | LIMITED_USE, // Saur + 0x1D850 | EXCLUSION, // Sgnw + 0x1B83 | LIMITED_USE, // Sund + 0, + 0xABC0 | LIMITED_USE, // Mtei + 0x10840 | EXCLUSION | RTL, // Armi + 0x10B00 | EXCLUSION | RTL, // Avst + 0x11103 | LIMITED_USE, // Cakm + 0xAC00 | RECOMMENDED, // Kore + 0x11083 | EXCLUSION, // Kthi + 0x10AD8 | EXCLUSION | RTL, // Mani + 0x10B60 | EXCLUSION | RTL, // Phli + 0x10B8F | EXCLUSION | RTL, // Phlp + 0, + 0x10B40 | EXCLUSION | RTL, // Prti + 0x0800 | EXCLUSION | RTL, // Samr + 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt + 0, + 0, + 0xA6A0 | LIMITED_USE, // Bamu + 0xA4D0 | LIMITED_USE, // Lisu + 0, + 0x10A60 | EXCLUSION | RTL, // Sarb + 0x16AE6 | EXCLUSION, // Bass + 0x1BC20 | EXCLUSION, // Dupl + 0x10500 | EXCLUSION, // Elba + 0x11315 | EXCLUSION, // Gran + 0, + 0, + 0x1E802 | EXCLUSION | RTL, // Mend + 0x109A0 | EXCLUSION | RTL, // Merc + 0x10A95 | EXCLUSION | RTL, // Narb + 0x10896 | EXCLUSION | RTL, // Nbat + 0x10873 | EXCLUSION | RTL, // Palm + 0x112BE | EXCLUSION, // Sind + 0x118B4 | EXCLUSION | CASED, // Wara + 0, + 0, + 0x16A4F | EXCLUSION, // Mroo + 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu + 0x11183 | EXCLUSION, // Shrd + 0x110D0 | EXCLUSION, // Sora + 0x11680 | EXCLUSION, // Takr + 0x18229 | EXCLUSION | LB_LETTERS, // Tang + 0, + 0x14400 | EXCLUSION, // Hluw + 0x11208 | EXCLUSION, // Khoj + 0x11484 | EXCLUSION, // Tirh + 0x10537 | EXCLUSION, // Aghb + 0x11152 | EXCLUSION, // Mahj + 0x11717 | EXCLUSION | LB_LETTERS, // Ahom + 0x108F4 | EXCLUSION | RTL, // Hatr + 0x1160E | EXCLUSION, // Modi + 0x1128F | EXCLUSION, // Mult + 0x11AC0 | EXCLUSION, // Pauc + 0x1158E | EXCLUSION, // Sidd + 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm + 0x11C0E | EXCLUSION, // Bhks + 0x11C72 | EXCLUSION, // Marc + 0x11412 | LIMITED_USE, // Newa + 0x104B5 | LIMITED_USE | CASED, // Osge + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb + 0x1112 | RECOMMENDED, // Jamo + 0, + 0x11D10 | EXCLUSION, // Gonm + 0x11A5C | EXCLUSION, // Soyo + 0x11A0B | EXCLUSION, // Zanb + 0x1180B | EXCLUSION, // Dogr + 0x11D71 | LIMITED_USE, // Gong + 0x11EE5 | EXCLUSION, // Maka + 0x16E40 | EXCLUSION | CASED, // Medf + 0x10D12 | LIMITED_USE | RTL, // Rohg + 0x10F42 | EXCLUSION | RTL, // Sogd + 0x10F19 | EXCLUSION | RTL, // Sogo + 0x10FF1 | EXCLUSION | RTL, // Elym + 0x1E108 | LIMITED_USE, // Hmnp + 0x119CE | EXCLUSION, // Nand + 0x1E2E1 | LIMITED_USE, // Wcho + 0x10FBF | EXCLUSION | RTL, // Chrs + 0x1190C | EXCLUSION, // Diak + 0x18C65 | EXCLUSION | LB_LETTERS, // Kits + 0x10E88 | EXCLUSION | RTL, // Yezi + 0x12FE5 | EXCLUSION, // Cpmn + 0x10F7C | EXCLUSION | RTL, // Ougr + 0x16ABC | EXCLUSION, // Tnsa + 0x1E290 | EXCLUSION, // Toto + 0x10582 | EXCLUSION | CASED, // Vith + 0x11F1B | EXCLUSION | LB_LETTERS, // Kawi + 0x1E4E6 | EXCLUSION, // Nagm + // End copy-paste from parsescriptmetadata.py +}; + +int32_t getScriptProps(UScriptCode script) { + if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) { + return SCRIPT_PROPS[script]; + } else { + return 0; + } +} + +} // namespace + +U_CAPI int32_t U_EXPORT2 +uscript_getSampleString(UScriptCode script, char16_t *dest, int32_t capacity, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { return 0; } + if(capacity < 0 || (capacity > 0 && dest == nullptr)) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + int32_t sampleChar = getScriptProps(script) & 0x1fffff; + int32_t length; + if(sampleChar == 0) { + length = 0; + } else { + length = U16_LENGTH(sampleChar); + if(length <= capacity) { + int32_t i = 0; + U16_APPEND_UNSAFE(dest, i, sampleChar); + } + } + return u_terminateUChars(dest, capacity, length, pErrorCode); +} + +U_COMMON_API icu::UnicodeString U_EXPORT2 +uscript_getSampleUnicodeString(UScriptCode script) { + icu::UnicodeString sample; + int32_t sampleChar = getScriptProps(script) & 0x1fffff; + if(sampleChar != 0) { + sample.append(sampleChar); + } + return sample; +} + +U_CAPI UScriptUsage U_EXPORT2 +uscript_getUsage(UScriptCode script) { + return (UScriptUsage)((getScriptProps(script) >> 21) & 7); +} + +U_CAPI UBool U_EXPORT2 +uscript_isRightToLeft(UScriptCode script) { + return (getScriptProps(script) & RTL) != 0; +} + +U_CAPI UBool U_EXPORT2 +uscript_breaksBetweenLetters(UScriptCode script) { + return (getScriptProps(script) & LB_LETTERS) != 0; +} + +U_CAPI UBool U_EXPORT2 +uscript_isCased(UScriptCode script) { + return (getScriptProps(script) & CASED) != 0; +} |