# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. from unicodedata import combining UNICODE_LIMIT = 0x110000 UNICODE_COMBINING_CLASS_NOT_REORDERED = 0 UNICODE_COMBINING_CLASS_KANA_VOICING = 8 UNICODE_COMBINING_CLASS_VIRAMA = 9 # Keep this function in sync with IsCombiningDiacritic in nsUnicodeProperties.h. def is_combining_diacritic(char): return combining(char) not in ( UNICODE_COMBINING_CLASS_NOT_REORDERED, UNICODE_COMBINING_CLASS_KANA_VOICING, UNICODE_COMBINING_CLASS_VIRAMA, 91, 129, 130, 132, ) # See gfxFontUtils.h for the SharedBitSet that we're creating a const instance of here. BLOCK_SIZE = 32 BLOCK_SIZE_BITS = BLOCK_SIZE * 8 def main(header): blockIndex = [] blocks = [] # Figure out the contents of each 256-char block, and see if it is unique # or can share an already-allocated block. block = [0] * BLOCK_SIZE byte = 0 bit = 0x01 for char in range(UNICODE_LIMIT): if is_combining_diacritic(chr(char)): block[byte] |= bit bit <<= 1 if bit == 0x100: bit = 0x01 byte += 1 if byte == BLOCK_SIZE: found = False for b in range(len(blocks)): if block == blocks[b]: blockIndex.append(b) found = True break if not found: blockIndex.append(len(blocks)) blocks.append(block) byte = 0 block = [0] * BLOCK_SIZE # Strip trailing empty blocks from the index. while blockIndex[len(blockIndex) - 1] == 0: del blockIndex[len(blockIndex) - 1] # Write the SharedBitSet as data in a C++ header file. header.write("/* !GENERATED DATA -- DO NOT EDIT! */\n") header.write("/* (see is_combining_diacritic.py) */\n") header.write("\n") header.write('#include "gfxFontUtils.h"\n') header.write("\n") header.write("typedef struct {\n") header.write(" uint16_t mBlockIndexCount;\n") header.write(" uint16_t mBlockCount;\n") header.write(" uint16_t mBlockIndex[" + str(len(blockIndex)) + "];\n") header.write(" uint8_t mBlockData[" + str(len(blocks) * BLOCK_SIZE) + "];\n") header.write("} CombiningDiacriticsBitset_t;\n") header.write("\n") header.write( "static const CombiningDiacriticsBitset_t COMBINING_DIACRITICS_BITSET_DATA = {\n" ) header.write(" " + str(len(blockIndex)) + ",\n") header.write(" " + str(len(blocks)) + ",\n") header.write(" {\n") for b in blockIndex: header.write(" " + str(b) + ",\n") header.write(" },\n") header.write(" {\n") for b in blocks: header.write(" ") for i in b: header.write(str(i) + ",") header.write("\n") header.write(" },\n") header.write("};\n") header.write("\n") header.write("static const SharedBitSet* sCombiningDiacriticsSet =\n") header.write( " reinterpret_cast(&COMBINING_DIACRITICS_BITSET_DATA);\n" ) header.write("\n")