summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/uscript_props.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/common/uscript_props.cpp')
-rw-r--r--intl/icu/source/common/uscript_props.cpp302
1 files changed, 302 insertions, 0 deletions
diff --git a/intl/icu/source/common/uscript_props.cpp b/intl/icu/source/common/uscript_props.cpp
new file mode 100644
index 0000000000..25d287b57a
--- /dev/null
+++ b/intl/icu/source/common/uscript_props.cpp
@@ -0,0 +1,302 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2013-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: uscript_props.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2013feb16
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uscript.h"
+#include "unicode/utf16.h"
+#include "ustr_imp.h"
+#include "cmemory.h"
+
+namespace {
+
+// Script metadata (script properties).
+// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
+
+// 0 = NOT_ENCODED, no sample character, default false script properties.
+// Bits 20.. 0: sample character
+
+// Bits 23..21: usage
+const int32_t UNKNOWN = 1 << 21;
+const int32_t EXCLUSION = 2 << 21;
+const int32_t LIMITED_USE = 3 << 21;
+// st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
+const int32_t RECOMMENDED = 5 << 21;
+
+// Bits 31..24: Single-bit flags
+const int32_t RTL = 1 << 24;
+const int32_t LB_LETTERS = 1 << 25;
+const int32_t CASED = 1 << 26;
+
+const int32_t SCRIPT_PROPS[] = {
+ // Begin copy-paste output from
+ // tools/trunk/unicode/py/parsescriptmetadata.py
+ 0x0040 | RECOMMENDED, // Zyyy
+ 0x0308 | RECOMMENDED, // Zinh
+ 0x0628 | RECOMMENDED | RTL, // Arab
+ 0x0531 | RECOMMENDED | CASED, // Armn
+ 0x0995 | RECOMMENDED, // Beng
+ 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
+ 0x13C4 | LIMITED_USE | CASED, // Cher
+ 0x03E2 | EXCLUSION | CASED, // Copt
+ 0x042F | RECOMMENDED | CASED, // Cyrl
+ 0x10414 | EXCLUSION | CASED, // Dsrt
+ 0x0905 | RECOMMENDED, // Deva
+ 0x12A0 | RECOMMENDED, // Ethi
+ 0x10D3 | RECOMMENDED, // Geor
+ 0x10330 | EXCLUSION, // Goth
+ 0x03A9 | RECOMMENDED | CASED, // Grek
+ 0x0A95 | RECOMMENDED, // Gujr
+ 0x0A15 | RECOMMENDED, // Guru
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
+ 0xAC00 | RECOMMENDED, // Hang
+ 0x05D0 | RECOMMENDED | RTL, // Hebr
+ 0x304B | RECOMMENDED | LB_LETTERS, // Hira
+ 0x0C95 | RECOMMENDED, // Knda
+ 0x30AB | RECOMMENDED | LB_LETTERS, // Kana
+ 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
+ 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
+ 0x004C | RECOMMENDED | CASED, // Latn
+ 0x0D15 | RECOMMENDED, // Mlym
+ 0x1826 | EXCLUSION, // Mong
+ 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
+ 0x168F | EXCLUSION, // Ogam
+ 0x10300 | EXCLUSION, // Ital
+ 0x0B15 | RECOMMENDED, // Orya
+ 0x16A0 | EXCLUSION, // Runr
+ 0x0D85 | RECOMMENDED, // Sinh
+ 0x0710 | LIMITED_USE | RTL, // Syrc
+ 0x0B95 | RECOMMENDED, // Taml
+ 0x0C15 | RECOMMENDED, // Telu
+ 0x078C | RECOMMENDED | RTL, // Thaa
+ 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
+ 0x0F40 | RECOMMENDED, // Tibt
+ 0x14C0 | LIMITED_USE, // Cans
+ 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii
+ 0x1703 | EXCLUSION, // Tglg
+ 0x1723 | EXCLUSION, // Hano
+ 0x1743 | EXCLUSION, // Buhd
+ 0x1763 | EXCLUSION, // Tagb
+ 0x280E | UNKNOWN, // Brai
+ 0x10800 | EXCLUSION | RTL, // Cprt
+ 0x1900 | LIMITED_USE, // Limb
+ 0x10000 | EXCLUSION, // Linb
+ 0x10480 | EXCLUSION, // Osma
+ 0x10450 | EXCLUSION, // Shaw
+ 0x1950 | LIMITED_USE | LB_LETTERS, // Tale
+ 0x10380 | EXCLUSION, // Ugar
+ 0,
+ 0x1A00 | EXCLUSION, // Bugi
+ 0x2C00 | EXCLUSION | CASED, // Glag
+ 0x10A00 | EXCLUSION | RTL, // Khar
+ 0xA800 | LIMITED_USE, // Sylo
+ 0x1980 | LIMITED_USE | LB_LETTERS, // Talu
+ 0x2D30 | LIMITED_USE, // Tfng
+ 0x103A0 | EXCLUSION, // Xpeo
+ 0x1B05 | LIMITED_USE, // Bali
+ 0x1BC0 | LIMITED_USE, // Batk
+ 0,
+ 0x11005 | EXCLUSION, // Brah
+ 0xAA00 | LIMITED_USE, // Cham
+ 0,
+ 0,
+ 0,
+ 0,
+ 0x13153 | EXCLUSION, // Egyp
+ 0,
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
+ 0x16B1C | EXCLUSION, // Hmng
+ 0x10CA1 | EXCLUSION | RTL | CASED, // Hung
+ 0,
+ 0xA984 | LIMITED_USE, // Java
+ 0xA90A | LIMITED_USE, // Kali
+ 0,
+ 0,
+ 0x1C00 | LIMITED_USE, // Lepc
+ 0x10647 | EXCLUSION, // Lina
+ 0x0840 | LIMITED_USE | RTL, // Mand
+ 0,
+ 0x10980 | EXCLUSION | RTL, // Mero
+ 0x07CA | LIMITED_USE | RTL, // Nkoo
+ 0x10C00 | EXCLUSION | RTL, // Orkh
+ 0x1036B | EXCLUSION, // Perm
+ 0xA840 | EXCLUSION, // Phag
+ 0x10900 | EXCLUSION | RTL, // Phnx
+ 0x16F00 | LIMITED_USE, // Plrd
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0xA549 | LIMITED_USE, // Vaii
+ 0,
+ 0x12000 | EXCLUSION, // Xsux
+ 0,
+ 0xFDD0 | UNKNOWN, // Zzzz
+ 0x102A0 | EXCLUSION, // Cari
+ 0x304B | RECOMMENDED | LB_LETTERS, // Jpan
+ 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
+ 0x10280 | EXCLUSION, // Lyci
+ 0x10920 | EXCLUSION | RTL, // Lydi
+ 0x1C5A | LIMITED_USE, // Olck
+ 0xA930 | EXCLUSION, // Rjng
+ 0xA882 | LIMITED_USE, // Saur
+ 0x1D850 | EXCLUSION, // Sgnw
+ 0x1B83 | LIMITED_USE, // Sund
+ 0,
+ 0xABC0 | LIMITED_USE, // Mtei
+ 0x10840 | EXCLUSION | RTL, // Armi
+ 0x10B00 | EXCLUSION | RTL, // Avst
+ 0x11103 | LIMITED_USE, // Cakm
+ 0xAC00 | RECOMMENDED, // Kore
+ 0x11083 | EXCLUSION, // Kthi
+ 0x10AD8 | EXCLUSION | RTL, // Mani
+ 0x10B60 | EXCLUSION | RTL, // Phli
+ 0x10B8F | EXCLUSION | RTL, // Phlp
+ 0,
+ 0x10B40 | EXCLUSION | RTL, // Prti
+ 0x0800 | EXCLUSION | RTL, // Samr
+ 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
+ 0,
+ 0,
+ 0xA6A0 | LIMITED_USE, // Bamu
+ 0xA4D0 | LIMITED_USE, // Lisu
+ 0,
+ 0x10A60 | EXCLUSION | RTL, // Sarb
+ 0x16AE6 | EXCLUSION, // Bass
+ 0x1BC20 | EXCLUSION, // Dupl
+ 0x10500 | EXCLUSION, // Elba
+ 0x11315 | EXCLUSION, // Gran
+ 0,
+ 0,
+ 0x1E802 | EXCLUSION | RTL, // Mend
+ 0x109A0 | EXCLUSION | RTL, // Merc
+ 0x10A95 | EXCLUSION | RTL, // Narb
+ 0x10896 | EXCLUSION | RTL, // Nbat
+ 0x10873 | EXCLUSION | RTL, // Palm
+ 0x112BE | EXCLUSION, // Sind
+ 0x118B4 | EXCLUSION | CASED, // Wara
+ 0,
+ 0,
+ 0x16A4F | EXCLUSION, // Mroo
+ 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu
+ 0x11183 | EXCLUSION, // Shrd
+ 0x110D0 | EXCLUSION, // Sora
+ 0x11680 | EXCLUSION, // Takr
+ 0x18229 | EXCLUSION | LB_LETTERS, // Tang
+ 0,
+ 0x14400 | EXCLUSION, // Hluw
+ 0x11208 | EXCLUSION, // Khoj
+ 0x11484 | EXCLUSION, // Tirh
+ 0x10537 | EXCLUSION, // Aghb
+ 0x11152 | EXCLUSION, // Mahj
+ 0x11717 | EXCLUSION | LB_LETTERS, // Ahom
+ 0x108F4 | EXCLUSION | RTL, // Hatr
+ 0x1160E | EXCLUSION, // Modi
+ 0x1128F | EXCLUSION, // Mult
+ 0x11AC0 | EXCLUSION, // Pauc
+ 0x1158E | EXCLUSION, // Sidd
+ 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm
+ 0x11C0E | EXCLUSION, // Bhks
+ 0x11C72 | EXCLUSION, // Marc
+ 0x11412 | LIMITED_USE, // Newa
+ 0x104B5 | LIMITED_USE | CASED, // Osge
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb
+ 0x1112 | RECOMMENDED, // Jamo
+ 0,
+ 0x11D10 | EXCLUSION, // Gonm
+ 0x11A5C | EXCLUSION, // Soyo
+ 0x11A0B | EXCLUSION, // Zanb
+ 0x1180B | EXCLUSION, // Dogr
+ 0x11D71 | LIMITED_USE, // Gong
+ 0x11EE5 | EXCLUSION, // Maka
+ 0x16E40 | EXCLUSION | CASED, // Medf
+ 0x10D12 | LIMITED_USE | RTL, // Rohg
+ 0x10F42 | EXCLUSION | RTL, // Sogd
+ 0x10F19 | EXCLUSION | RTL, // Sogo
+ 0x10FF1 | EXCLUSION | RTL, // Elym
+ 0x1E108 | LIMITED_USE, // Hmnp
+ 0x119CE | EXCLUSION, // Nand
+ 0x1E2E1 | LIMITED_USE, // Wcho
+ 0x10FBF | EXCLUSION | RTL, // Chrs
+ 0x1190C | EXCLUSION, // Diak
+ 0x18C65 | EXCLUSION | LB_LETTERS, // Kits
+ 0x10E88 | EXCLUSION | RTL, // Yezi
+ // End copy-paste from parsescriptmetadata.py
+};
+
+int32_t getScriptProps(UScriptCode script) {
+ if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) {
+ return SCRIPT_PROPS[script];
+ } else {
+ return 0;
+ }
+}
+
+} // namespace
+
+U_CAPI int32_t U_EXPORT2
+uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) { return 0; }
+ if(capacity < 0 || (capacity > 0 && dest == NULL)) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ int32_t sampleChar = getScriptProps(script) & 0x1fffff;
+ int32_t length;
+ if(sampleChar == 0) {
+ length = 0;
+ } else {
+ length = U16_LENGTH(sampleChar);
+ if(length <= capacity) {
+ int32_t i = 0;
+ U16_APPEND_UNSAFE(dest, i, sampleChar);
+ }
+ }
+ return u_terminateUChars(dest, capacity, length, pErrorCode);
+}
+
+U_COMMON_API icu::UnicodeString U_EXPORT2
+uscript_getSampleUnicodeString(UScriptCode script) {
+ icu::UnicodeString sample;
+ int32_t sampleChar = getScriptProps(script) & 0x1fffff;
+ if(sampleChar != 0) {
+ sample.append(sampleChar);
+ }
+ return sample;
+}
+
+U_CAPI UScriptUsage U_EXPORT2
+uscript_getUsage(UScriptCode script) {
+ return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_isRightToLeft(UScriptCode script) {
+ return (getScriptProps(script) & RTL) != 0;
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_breaksBetweenLetters(UScriptCode script) {
+ return (getScriptProps(script) & LB_LETTERS) != 0;
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_isCased(UScriptCode script) {
+ return (getScriptProps(script) & CASED) != 0;
+}