From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- intl/icu/source/common/utrie2_impl.h | 175 +++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 intl/icu/source/common/utrie2_impl.h (limited to 'intl/icu/source/common/utrie2_impl.h') diff --git a/intl/icu/source/common/utrie2_impl.h b/intl/icu/source/common/utrie2_impl.h new file mode 100644 index 0000000000..2a14db3a6b --- /dev/null +++ b/intl/icu/source/common/utrie2_impl.h @@ -0,0 +1,175 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2008, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utrie2_impl.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2008sep26 (split off from utrie2.c) +* created by: Markus W. Scherer +* +* Definitions needed for both runtime and builder code for UTrie2, +* used by utrie2.c and utrie2_builder.c. +*/ + +#ifndef __UTRIE2_IMPL_H__ +#define __UTRIE2_IMPL_H__ + +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif +#include "utrie2.h" + +/* Public UTrie2 API implementation ----------------------------------------- */ + +/* + * These definitions are mostly needed by utrie2.cpp, + * but also by utrie2_serialize() and utrie2_swap(). + */ + +// UTrie2 signature values, in platform endianness and opposite endianness. +// The UTrie2 signature ASCII byte values spell "Tri2". +#define UTRIE2_SIG 0x54726932 +#define UTRIE2_OE_SIG 0x32697254 + +/** + * Trie data structure in serialized form: + * + * UTrie2Header header; + * uint16_t index[header.index2Length]; + * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...] + * @internal + */ +typedef struct UTrie2Header { + /** "Tri2" in big-endian US-ASCII (0x54726932) */ + uint32_t signature; + + /** + * options bit field: + * 15.. 4 reserved (0) + * 3.. 0 UTrie2ValueBits valueBits + */ + uint16_t options; + + /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ + uint16_t indexLength; + + /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ + uint16_t shiftedDataLength; + + /** Null index and data blocks, not shifted. */ + uint16_t index2NullOffset, dataNullOffset; + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by UTRIE2_SHIFT_1. + */ + uint16_t shiftedHighStart; +} UTrie2Header; + +/** + * Constants for use with UTrie2Header.options. + * @internal + */ +enum { + /** Mask to get the UTrie2ValueBits valueBits from options. */ + UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf +}; + +/* Building a trie ---------------------------------------------------------- */ + +/* + * These definitions are mostly needed by utrie2_builder.c, but also by + * utrie2_get32() and utrie2_enum(). + */ + +enum { + /** + * At build time, leave a gap in the index-2 table, + * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table + * and the supplementary index-1 table. + * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting. + */ + UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, + UNEWTRIE2_INDEX_GAP_LENGTH= + ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)& + ~UTRIE2_INDEX_2_MASK, + + /** + * Maximum length of the build-time index-2 array. + * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2, + * plus the part of the index-2 table for lead surrogate code points, + * plus the build-time index gap, + * plus the null index-2 block. + */ + UNEWTRIE2_MAX_INDEX_2_LENGTH= + (0x110000>>UTRIE2_SHIFT_2)+ + UTRIE2_LSCP_INDEX_2_LENGTH+ + UNEWTRIE2_INDEX_GAP_LENGTH+ + UTRIE2_INDEX_2_BLOCK_LENGTH, + + UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1 +}; + +/** + * Maximum length of the build-time data array. + * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block, + * plus values for the 0x400 surrogate code units. + */ +#define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400) + +/* + * Build-time trie structure. + * + * Just using a boolean flag for "repeat use" could lead to data array overflow + * because we would not be able to detect when a data block becomes unused. + * It also leads to orphan data blocks that are kept through serialization. + * + * Need to use reference counting for data blocks, + * and allocDataBlock() needs to look for a free block before increasing dataLength. + * + * This scheme seems like overkill for index-2 blocks since the whole index array is + * preallocated anyway (unlike the growable data array). + * Just allocating multiple index-2 blocks as needed. + */ +struct UNewTrie2 { + int32_t index1[UNEWTRIE2_INDEX_1_LENGTH]; + int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH]; + uint32_t *data; +#ifdef UCPTRIE_DEBUG + UMutableCPTrie *t3; +#endif + + uint32_t initialValue, errorValue; + int32_t index2Length, dataCapacity, dataLength; + int32_t firstFreeBlock; + int32_t index2NullOffset, dataNullOffset; + UChar32 highStart; + UBool isCompacted; + + /** + * Multi-purpose per-data-block table. + * + * Before compacting: + * + * Per-data-block reference counters/free-block list. + * 0: unused + * >0: reference counter (number of index-2 entries pointing here) + * <0: next free data block in free-block list + * + * While compacting: + * + * Map of adjusted indexes, used in compactData() and compactIndex2(). + * Maps from original indexes to new ones. + */ + int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2]; +}; + +#endif -- cgit v1.2.3