summaryrefslogtreecommitdiffstats
path: root/intl/unicharutil/util/is_combining_diacritic.py
diff options
context:
space:
mode:
Diffstat (limited to 'intl/unicharutil/util/is_combining_diacritic.py')
-rw-r--r--intl/unicharutil/util/is_combining_diacritic.py102
1 files changed, 102 insertions, 0 deletions
diff --git a/intl/unicharutil/util/is_combining_diacritic.py b/intl/unicharutil/util/is_combining_diacritic.py
new file mode 100644
index 0000000000..d8c3c4ba35
--- /dev/null
+++ b/intl/unicharutil/util/is_combining_diacritic.py
@@ -0,0 +1,102 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from unicodedata import combining
+
+UNICODE_LIMIT = 0x110000
+
+UNICODE_COMBINING_CLASS_NOT_REORDERED = 0
+UNICODE_COMBINING_CLASS_KANA_VOICING = 8
+UNICODE_COMBINING_CLASS_VIRAMA = 9
+
+
+# Keep this function in sync with IsCombiningDiacritic in nsUnicodeProperties.h.
+def is_combining_diacritic(char):
+ return combining(char) not in (
+ UNICODE_COMBINING_CLASS_NOT_REORDERED,
+ UNICODE_COMBINING_CLASS_KANA_VOICING,
+ UNICODE_COMBINING_CLASS_VIRAMA,
+ 91,
+ 129,
+ 130,
+ 132,
+ )
+
+
+# See gfxFontUtils.h for the SharedBitSet that we're creating a const instance of here.
+BLOCK_SIZE = 32
+BLOCK_SIZE_BITS = BLOCK_SIZE * 8
+
+
+def main(header):
+ blockIndex = []
+ blocks = []
+
+ # Figure out the contents of each 256-char block, and see if it is unique
+ # or can share an already-allocated block.
+ block = [0] * BLOCK_SIZE
+ byte = 0
+ bit = 0x01
+ for char in range(UNICODE_LIMIT):
+ if is_combining_diacritic(chr(char)):
+ block[byte] |= bit
+ bit <<= 1
+ if bit == 0x100:
+ bit = 0x01
+ byte += 1
+ if byte == BLOCK_SIZE:
+ found = False
+ for b in range(len(blocks)):
+ if block == blocks[b]:
+ blockIndex.append(b)
+ found = True
+ break
+ if not found:
+ blockIndex.append(len(blocks))
+ blocks.append(block)
+ byte = 0
+ block = [0] * BLOCK_SIZE
+
+ # Strip trailing empty blocks from the index.
+ while blockIndex[len(blockIndex) - 1] == 0:
+ del blockIndex[len(blockIndex) - 1]
+
+ # Write the SharedBitSet as data in a C++ header file.
+ header.write("/* !GENERATED DATA -- DO NOT EDIT! */\n")
+ header.write("/* (see is_combining_diacritic.py) */\n")
+ header.write("\n")
+ header.write('#include "gfxFontUtils.h"\n')
+ header.write("\n")
+
+ header.write("typedef struct {\n")
+ header.write(" uint16_t mBlockIndexCount;\n")
+ header.write(" uint16_t mBlockCount;\n")
+ header.write(" uint16_t mBlockIndex[" + str(len(blockIndex)) + "];\n")
+ header.write(" uint8_t mBlockData[" + str(len(blocks) * BLOCK_SIZE) + "];\n")
+ header.write("} CombiningDiacriticsBitset_t;\n")
+ header.write("\n")
+
+ header.write(
+ "static const CombiningDiacriticsBitset_t COMBINING_DIACRITICS_BITSET_DATA = {\n"
+ )
+ header.write(" " + str(len(blockIndex)) + ",\n")
+ header.write(" " + str(len(blocks)) + ",\n")
+ header.write(" {\n")
+ for b in blockIndex:
+ header.write(" " + str(b) + ",\n")
+ header.write(" },\n")
+ header.write(" {\n")
+ for b in blocks:
+ header.write(" ")
+ for i in b:
+ header.write(str(i) + ",")
+ header.write("\n")
+ header.write(" },\n")
+ header.write("};\n")
+ header.write("\n")
+ header.write("static const SharedBitSet* sCombiningDiacriticsSet =\n")
+ header.write(
+ " reinterpret_cast<const SharedBitSet*>(&COMBINING_DIACRITICS_BITSET_DATA);\n"
+ )
+ header.write("\n")