diff options
Diffstat (limited to '')
-rw-r--r-- | intl/icu/source/common/utrie2_builder.cpp | 1483 |
1 files changed, 1483 insertions, 0 deletions
diff --git a/intl/icu/source/common/utrie2_builder.cpp b/intl/icu/source/common/utrie2_builder.cpp new file mode 100644 index 0000000000..8de824cc3d --- /dev/null +++ b/intl/icu/source/common/utrie2_builder.cpp @@ -0,0 +1,1483 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utrie2_builder.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2008sep26 (split off from utrie2.c) +* created by: Markus W. Scherer +* +* This is a common implementation of a Unicode trie. +* It is a kind of compressed, serializable table of 16- or 32-bit values associated with +* Unicode code points (0..0x10ffff). +* This is the second common version of a Unicode trie (hence the name UTrie2). +* See utrie2.h for a comparison. +* +* This file contains only the builder code. +* See utrie2.c for the runtime and enumeration code. +*/ +// #define UTRIE2_DEBUG +#ifdef UTRIE2_DEBUG +# include <stdio.h> +#endif +// #define UCPTRIE_DEBUG + +#include "unicode/utypes.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "ucptrie_impl.h" +#endif +#include "cmemory.h" +#include "utrie2.h" +#include "utrie2_impl.h" + +#include "utrie.h" // for utrie2_fromUTrie() + +/* Implementation notes ----------------------------------------------------- */ + +/* + * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values + * have been chosen to minimize trie sizes overall. + * Most of the code is flexible enough to work with a range of values, + * within certain limits. + * + * Exception: Support for separate values for lead surrogate code _units_ + * vs. code _points_ was added after the constants were fixed, + * and has not been tested nor particularly designed for different constant values. + * (Especially the utrie2_enum() code that jumps to the special LSCP index-2 + * part and back.) + * + * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data + * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH + * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction + * remapping) stops working. + * + * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate() + * assumes that a single index-2 block is used for 0x400 code points + * corresponding to one lead surrogate. + * + * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains + * more than one Unicode plane, and the split of the index-2 table into a BMP + * part and a supplementary part, with a gap in between, would not work. + * + * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because + * there is data with more than 64k distinct values, + * for example for Unihan collation with a separate collation weight per + * Han character. + */ + +/* Building a trie ----------------------------------------------------------*/ + +enum { + /** The null index-2 block, following the gap in the index-2 table. */ + UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH, + + /** The start of allocated index-2 blocks. */ + UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH, + + /** + * The null data block. + * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller, + * to work with 6-bit trail bytes from 2-byte UTF-8. + */ + UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET, + + /** The start of allocated data blocks. */ + UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40, + + /** + * The start of data blocks for U+0800 and above. + * Below, compaction uses a block length of 64 for 2-byte UTF-8. + * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH. + * Data values for 0x780 code points beyond ASCII. + */ + UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780 +}; + +/* Start with allocation of 16k data entries. */ +#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14) + +/* Grow about 8x each time. */ +#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17) + +static int32_t +allocIndex2Block(UNewTrie2 *trie); + +U_CAPI UTrie2 * U_EXPORT2 +utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { + UTrie2 *trie; + UNewTrie2 *newTrie; + uint32_t *data; + int32_t i, j; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + + trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); + newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); + data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4); + if(trie==NULL || newTrie==NULL || data==NULL) { + uprv_free(trie); + uprv_free(newTrie); + uprv_free(data); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + uprv_memset(trie, 0, sizeof(UTrie2)); + trie->initialValue=initialValue; + trie->errorValue=errorValue; + trie->highStart=0x110000; + trie->newTrie=newTrie; +#ifdef UTRIE2_DEBUG + trie->name="open"; +#endif + + newTrie->data=data; +#ifdef UCPTRIE_DEBUG + newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode); +#endif + newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH; + newTrie->initialValue=initialValue; + newTrie->errorValue=errorValue; + newTrie->highStart=0x110000; + newTrie->firstFreeBlock=0; /* no free block in the list */ + newTrie->isCompacted=FALSE; + + /* + * preallocate and reset + * - ASCII + * - the bad-UTF-8-data block + * - the null data block + */ + for(i=0; i<0x80; ++i) { + newTrie->data[i]=initialValue; + } + for(; i<0xc0; ++i) { + newTrie->data[i]=errorValue; + } + for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) { + newTrie->data[i]=initialValue; + } + newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET; + newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET; + + /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */ + for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { + newTrie->index2[i]=j; + newTrie->map[i]=1; + } + /* reference counts for the bad-UTF-8-data block */ + for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { + newTrie->map[i]=0; + } + /* + * Reference counts for the null data block: all blocks except for the ASCII blocks. + * Plus 1 so that we don't drop this block during compaction. + * Plus as many as needed for lead surrogate code points. + */ + /* i==newTrie->dataNullOffset */ + newTrie->map[i++]= + (0x110000>>UTRIE2_SHIFT_2)- + (0x80>>UTRIE2_SHIFT_2)+ + 1+ + UTRIE2_LSCP_INDEX_2_LENGTH; + j+=UTRIE2_DATA_BLOCK_LENGTH; + for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { + newTrie->map[i]=0; + } + + /* + * set the remaining indexes in the BMP index-2 block + * to the null data block + */ + for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { + newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET; + } + + /* + * Fill the index gap with impossible values so that compaction + * does not overlap other index-2 blocks with the gap. + */ + for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) { + newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1; + } + + /* set the indexes in the null index-2 block */ + for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) { + newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET; + } + newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET; + newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET; + + /* set the index-1 indexes for the linear index-2 block */ + for(i=0, j=0; + i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; + ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH + ) { + newTrie->index1[i]=j; + } + + /* set the remaining index-1 indexes to the null index-2 block */ + for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { + newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET; + } + + /* + * Preallocate and reset data for U+0080..U+07ff, + * for 2-byte UTF-8 which will be compacted in 64-blocks + * even if UTRIE2_DATA_BLOCK_LENGTH is smaller. + */ + for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) { + utrie2_set32(trie, i, initialValue, pErrorCode); + } + + return trie; +} + +static UNewTrie2 * +cloneBuilder(const UNewTrie2 *other) { + UNewTrie2 *trie; + + trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); + if(trie==NULL) { + return NULL; + } + + trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4); + if(trie->data==NULL) { + uprv_free(trie); + return NULL; + } +#ifdef UCPTRIE_DEBUG + if(other->t3==nullptr) { + trie->t3=nullptr; + } else { + UErrorCode errorCode=U_ZERO_ERROR; + trie->t3=umutablecptrie_clone(other->t3, &errorCode); + } +#endif + trie->dataCapacity=other->dataCapacity; + + /* clone data */ + uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1)); + uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4); + trie->index2NullOffset=other->index2NullOffset; + trie->index2Length=other->index2Length; + + uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4); + trie->dataNullOffset=other->dataNullOffset; + trie->dataLength=other->dataLength; + + /* reference counters */ + if(other->isCompacted) { + trie->firstFreeBlock=0; + } else { + uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4); + trie->firstFreeBlock=other->firstFreeBlock; + } + + trie->initialValue=other->initialValue; + trie->errorValue=other->errorValue; + trie->highStart=other->highStart; + trie->isCompacted=other->isCompacted; + + return trie; +} + +U_CAPI UTrie2 * U_EXPORT2 +utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) { + UTrie2 *trie; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); + if(trie==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(trie, other, sizeof(UTrie2)); + + if(other->memory!=NULL) { + trie->memory=uprv_malloc(other->length); + if(trie->memory!=NULL) { + trie->isMemoryOwned=TRUE; + uprv_memcpy(trie->memory, other->memory, other->length); + + /* make the clone's pointers point to its own memory */ + trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory); + if(other->data16!=NULL) { + trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory); + } + if(other->data32!=NULL) { + trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory); + } + } + } else /* other->newTrie!=NULL */ { + trie->newTrie=cloneBuilder(other->newTrie); + } + + if(trie->memory==NULL && trie->newTrie==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + uprv_free(trie); + trie=NULL; + } + return trie; +} + +typedef struct NewTrieAndStatus { + UTrie2 *trie; + UErrorCode errorCode; + UBool exclusiveLimit; /* rather than inclusive range end */ +} NewTrieAndStatus; + +static UBool U_CALLCONV +copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { + NewTrieAndStatus *nt=(NewTrieAndStatus *)context; + if(value!=nt->trie->initialValue) { + if(nt->exclusiveLimit) { + --end; + } + if(start==end) { + utrie2_set32(nt->trie, start, value, &nt->errorCode); + } else { + utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode); + } + return U_SUCCESS(nt->errorCode); + } else { + return TRUE; + } +} + +#ifdef UTRIE2_DEBUG +static long countInitial(const UTrie2 *trie) { + uint32_t initialValue=trie->initialValue; + int32_t length=trie->dataLength; + long count=0; + if(trie->data16!=nullptr) { + for(int32_t i=0; i<length; ++i) { + if(trie->data16[i]==initialValue) { ++count; } + } + } else { + for(int32_t i=0; i<length; ++i) { + if(trie->data32[i]==initialValue) { ++count; } + } + } + return count; +} + +static void +utrie_printLengths(const UTrie *trie) { + long indexLength=trie->indexLength; + long dataLength=(long)trie->dataLength; + long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); + printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n", + indexLength, dataLength, totalLength); +} + +static void +utrie2_printLengths(const UTrie2 *trie, const char *which) { + long indexLength=trie->indexLength; + long dataLength=(long)trie->dataLength; + long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); + printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n", + which, trie->name, indexLength, dataLength, countInitial(trie), totalLength); +} +#endif + +U_CAPI UTrie2 * U_EXPORT2 +utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) { + NewTrieAndStatus context; + UChar lead; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + if(other->newTrie!=NULL && !other->newTrie->isCompacted) { + return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */ + } + + /* Clone the frozen trie by enumerating it and building a new one. */ + context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + context.exclusiveLimit=FALSE; + context.errorCode=*pErrorCode; + utrie2_enum(other, NULL, copyEnumRange, &context); + *pErrorCode=context.errorCode; + for(lead=0xd800; lead<0xdc00; ++lead) { + uint32_t value; + if(other->data32==NULL) { + value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead); + } else { + value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead); + } + if(value!=other->initialValue) { + utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); + } + } + if(U_FAILURE(*pErrorCode)) { + utrie2_close(context.trie); + context.trie=NULL; + } + return context.trie; +} + +/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */ +U_CAPI UTrie2 * U_EXPORT2 +utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) { + NewTrieAndStatus context; + UChar lead; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(trie1==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + context.exclusiveLimit=TRUE; + context.errorCode=*pErrorCode; + utrie_enum(trie1, NULL, copyEnumRange, &context); + *pErrorCode=context.errorCode; + for(lead=0xd800; lead<0xdc00; ++lead) { + uint32_t value; + if(trie1->data32==NULL) { + value=UTRIE_GET16_FROM_LEAD(trie1, lead); + } else { + value=UTRIE_GET32_FROM_LEAD(trie1, lead); + } + if(value!=trie1->initialValue) { + utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); + } + } + if(U_SUCCESS(*pErrorCode)) { + utrie2_freeze(context.trie, + trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS, + pErrorCode); + } +#ifdef UTRIE2_DEBUG + if(U_SUCCESS(*pErrorCode)) { + utrie_printLengths(trie1); + utrie2_printLengths(context.trie, "fromUTrie"); + } +#endif + if(U_FAILURE(*pErrorCode)) { + utrie2_close(context.trie); + context.trie=NULL; + } + return context.trie; +} + +static inline UBool +isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { + int32_t i2, block; + + if(U_IS_LEAD(c) && forLSCP) { + i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ + (c>>UTRIE2_SHIFT_2); + } else { + i2=trie->index1[c>>UTRIE2_SHIFT_1]+ + ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); + } + block=trie->index2[i2]; + return (UBool)(block==trie->dataNullOffset); +} + +static int32_t +allocIndex2Block(UNewTrie2 *trie) { + int32_t newBlock, newTop; + + newBlock=trie->index2Length; + newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH; + if(newTop>UPRV_LENGTHOF(trie->index2)) { + /* + * Should never occur. + * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect, + * or the code writes more values than should be possible. + */ + return -1; + } + trie->index2Length=newTop; + uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4); + return newBlock; +} + +static int32_t +getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { + int32_t i1, i2; + + if(U_IS_LEAD(c) && forLSCP) { + return UTRIE2_LSCP_INDEX_2_OFFSET; + } + + i1=c>>UTRIE2_SHIFT_1; + i2=trie->index1[i1]; + if(i2==trie->index2NullOffset) { + i2=allocIndex2Block(trie); + if(i2<0) { + return -1; /* program error */ + } + trie->index1[i1]=i2; + } + return i2; +} + +static int32_t +allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) { + int32_t newBlock, newTop; + + if(trie->firstFreeBlock!=0) { + /* get the first free block */ + newBlock=trie->firstFreeBlock; + trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2]; + } else { + /* get a new block from the high end */ + newBlock=trie->dataLength; + newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH; + if(newTop>trie->dataCapacity) { + /* out of memory in the data array */ + int32_t capacity; + uint32_t *data; + + if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) { + capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH; + } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) { + capacity=UNEWTRIE2_MAX_DATA_LENGTH; + } else { + /* + * Should never occur. + * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect, + * or the code writes more values than should be possible. + */ + return -1; + } + data=(uint32_t *)uprv_malloc(capacity*4); + if(data==NULL) { + return -1; + } + uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4); + uprv_free(trie->data); + trie->data=data; + trie->dataCapacity=capacity; + } + trie->dataLength=newTop; + } + uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4); + trie->map[newBlock>>UTRIE2_SHIFT_2]=0; + return newBlock; +} + +/* call when the block's reference counter reaches 0 */ +static void +releaseDataBlock(UNewTrie2 *trie, int32_t block) { + /* put this block at the front of the free-block chain */ + trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock; + trie->firstFreeBlock=block; +} + +static inline UBool +isWritableBlock(UNewTrie2 *trie, int32_t block) { + return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]); +} + +static inline void +setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) { + int32_t oldBlock; + ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */ + oldBlock=trie->index2[i2]; + if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) { + releaseDataBlock(trie, oldBlock); + } + trie->index2[i2]=block; +} + +/** + * No error checking for illegal arguments. + * + * @return -1 if no new data block available (out of memory in data array) + * @internal + */ +static int32_t +getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { + int32_t i2, oldBlock, newBlock; + + i2=getIndex2Block(trie, c, forLSCP); + if(i2<0) { + return -1; /* program error */ + } + + i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; + oldBlock=trie->index2[i2]; + if(isWritableBlock(trie, oldBlock)) { + return oldBlock; + } + + /* allocate a new data block */ + newBlock=allocDataBlock(trie, oldBlock); + if(newBlock<0) { + /* out of memory in the data array */ + return -1; + } + setIndex2Entry(trie, i2, newBlock); + return newBlock; +} + +/** + * @return TRUE if the value was successfully set + */ +static void +set32(UNewTrie2 *trie, + UChar32 c, UBool forLSCP, uint32_t value, + UErrorCode *pErrorCode) { + int32_t block; + + if(trie==NULL || trie->isCompacted) { + *pErrorCode=U_NO_WRITE_PERMISSION; + return; + } +#ifdef UCPTRIE_DEBUG + umutablecptrie_set(trie->t3, c, value, pErrorCode); +#endif + + block=getDataBlock(trie, c, forLSCP); + if(block<0) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + trie->data[block+(c&UTRIE2_DATA_MASK)]=value; +} + +U_CAPI void U_EXPORT2 +utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + if((uint32_t)c>0x10ffff) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + set32(trie->newTrie, c, TRUE, value, pErrorCode); +} + +U_CAPI void U_EXPORT2 +utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, + UChar32 c, uint32_t value, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + if(!U_IS_LEAD(c)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + set32(trie->newTrie, c, FALSE, value, pErrorCode); +} + +static void +writeBlock(uint32_t *block, uint32_t value) { + uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH; + while(block<limit) { + *block++=value; + } +} + +/** + * initialValue is ignored if overwrite=TRUE + * @internal + */ +static void +fillBlock(uint32_t *block, UChar32 start, UChar32 limit, + uint32_t value, uint32_t initialValue, UBool overwrite) { + uint32_t *pLimit; + + pLimit=block+limit; + block+=start; + if(overwrite) { + while(block<pLimit) { + *block++=value; + } + } else { + while(block<pLimit) { + if(*block==initialValue) { + *block=value; + } + ++block; + } + } +} + +U_CAPI void U_EXPORT2 +utrie2_setRange32(UTrie2 *trie, + UChar32 start, UChar32 end, + uint32_t value, UBool overwrite, + UErrorCode *pErrorCode) { + /* + * repeat value in [start..end] + * mark index values for repeat-data blocks by setting bit 31 of the index values + * fill around existing values if any, if(overwrite) + */ + UNewTrie2 *newTrie; + int32_t block, rest, repeatBlock; + UChar32 limit; + + if(U_FAILURE(*pErrorCode)) { + return; + } + if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + newTrie=trie->newTrie; + if(newTrie==NULL || newTrie->isCompacted) { + *pErrorCode=U_NO_WRITE_PERMISSION; + return; + } +#ifdef UCPTRIE_DEBUG + umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode); +#endif + if(!overwrite && value==newTrie->initialValue) { + return; /* nothing to do */ + } + + limit=end+1; + if(start&UTRIE2_DATA_MASK) { + UChar32 nextStart; + + /* set partial block at [start..following block boundary[ */ + block=getDataBlock(newTrie, start, TRUE); + if(block<0) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK; + if(nextStart<=limit) { + fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, + value, newTrie->initialValue, overwrite); + start=nextStart; + } else { + fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK, + value, newTrie->initialValue, overwrite); + return; + } + } + + /* number of positions in the last, partial block */ + rest=limit&UTRIE2_DATA_MASK; + + /* round down limit to a block boundary */ + limit&=~UTRIE2_DATA_MASK; + + /* iterate over all-value blocks */ + if(value==newTrie->initialValue) { + repeatBlock=newTrie->dataNullOffset; + } else { + repeatBlock=-1; + } + + while(start<limit) { + int32_t i2; + UBool setRepeatBlock=FALSE; + + if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) { + start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */ + continue; + } + + /* get index value */ + i2=getIndex2Block(newTrie, start, TRUE); + if(i2<0) { + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + return; + } + i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; + block=newTrie->index2[i2]; + if(isWritableBlock(newTrie, block)) { + /* already allocated */ + if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) { + /* + * We overwrite all values, and it's not a + * protected (ASCII-linear or 2-byte UTF-8) block: + * replace with the repeatBlock. + */ + setRepeatBlock=TRUE; + } else { + /* !overwrite, or protected block: just write the values into this block */ + fillBlock(newTrie->data+block, + 0, UTRIE2_DATA_BLOCK_LENGTH, + value, newTrie->initialValue, overwrite); + } + } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) { + /* + * Set the repeatBlock instead of the null block or previous repeat block: + * + * If !isWritableBlock() then all entries in the block have the same value + * because it's the null block or a range block (the repeatBlock from a previous + * call to utrie2_setRange32()). + * No other blocks are used multiple times before compacting. + * + * The null block is the only non-writable block with the initialValue because + * of the repeatBlock initialization above. (If value==initialValue, then + * the repeatBlock will be the null data block.) + * + * We set our repeatBlock if the desired value differs from the block's value, + * and if we overwrite any data or if the data is all initial values + * (which is the same as the block being the null block, see above). + */ + setRepeatBlock=TRUE; + } + if(setRepeatBlock) { + if(repeatBlock>=0) { + setIndex2Entry(newTrie, i2, repeatBlock); + } else { + /* create and set and fill the repeatBlock */ + repeatBlock=getDataBlock(newTrie, start, TRUE); + if(repeatBlock<0) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + writeBlock(newTrie->data+repeatBlock, value); + } + } + + start+=UTRIE2_DATA_BLOCK_LENGTH; + } + + if(rest>0) { + /* set partial block at [last block boundary..limit[ */ + block=getDataBlock(newTrie, start, TRUE); + if(block<0) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite); + } + + return; +} + +/* compaction --------------------------------------------------------------- */ + +static inline UBool +equal_int32(const int32_t *s, const int32_t *t, int32_t length) { + while(length>0 && *s==*t) { + ++s; + ++t; + --length; + } + return (UBool)(length==0); +} + +static inline UBool +equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { + while(length>0 && *s==*t) { + ++s; + ++t; + --length; + } + return (UBool)(length==0); +} + +static int32_t +findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) { + int32_t block; + + /* ensure that we do not even partially get past index2Length */ + index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH; + + for(block=0; block<=index2Length; ++block) { + if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) { + return block; + } + } + return -1; +} + +static int32_t +findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) { + int32_t block; + + /* ensure that we do not even partially get past dataLength */ + dataLength-=blockLength; + + for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) { + if(equal_uint32(data+block, data+otherBlock, blockLength)) { + return block; + } + } + return -1; +} + +/* + * Find the start of the last range in the trie by enumerating backward. + * Indexes for supplementary code points higher than this will be omitted. + */ +static UChar32 +findHighStart(UNewTrie2 *trie, uint32_t highValue) { + const uint32_t *data32; + + uint32_t value, initialValue; + UChar32 c, prev; + int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; + + data32=trie->data; + initialValue=trie->initialValue; + + index2NullOffset=trie->index2NullOffset; + nullBlock=trie->dataNullOffset; + + /* set variables for previous range */ + if(highValue==initialValue) { + prevI2Block=index2NullOffset; + prevBlock=nullBlock; + } else { + prevI2Block=-1; + prevBlock=-1; + } + prev=0x110000; + + /* enumerate index-2 blocks */ + i1=UNEWTRIE2_INDEX_1_LENGTH; + c=prev; + while(c>0) { + i2Block=trie->index1[--i1]; + if(i2Block==prevI2Block) { + /* the index-2 block is the same as the previous one, and filled with highValue */ + c-=UTRIE2_CP_PER_INDEX_1_ENTRY; + continue; + } + prevI2Block=i2Block; + if(i2Block==index2NullOffset) { + /* this is the null index-2 block */ + if(highValue!=initialValue) { + return c; + } + c-=UTRIE2_CP_PER_INDEX_1_ENTRY; + } else { + /* enumerate data blocks for one index-2 block */ + for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) { + block=trie->index2[i2Block+ --i2]; + if(block==prevBlock) { + /* the block is the same as the previous one, and filled with highValue */ + c-=UTRIE2_DATA_BLOCK_LENGTH; + continue; + } + prevBlock=block; + if(block==nullBlock) { + /* this is the null data block */ + if(highValue!=initialValue) { + return c; + } + c-=UTRIE2_DATA_BLOCK_LENGTH; + } else { + for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) { + value=data32[block+ --j]; + if(value!=highValue) { + return c; + } + --c; + } + } + } + } + } + + /* deliver last range */ + return 0; +} + +/* + * Compact a build-time trie. + * + * The compaction + * - removes blocks that are identical with earlier ones + * - overlaps adjacent blocks as much as possible (if overlap==TRUE) + * - moves blocks in steps of the data granularity + * - moves and overlaps blocks that overlap with multiple values in the overlap region + * + * It does not + * - try to move and overlap blocks that are not already adjacent + */ +static void +compactData(UNewTrie2 *trie) { +#ifdef UTRIE2_DEBUG + int32_t countSame=0, sumOverlaps=0; +#endif + + int32_t start, newStart, movedStart; + int32_t blockLength, overlap; + int32_t i, mapIndex, blockCount; + + /* do not compact linear-ASCII data */ + newStart=UTRIE2_DATA_START_OFFSET; + for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) { + trie->map[i]=start; + } + + /* + * Start with a block length of 64 for 2-byte UTF-8, + * then switch to UTRIE2_DATA_BLOCK_LENGTH. + */ + blockLength=64; + blockCount=blockLength>>UTRIE2_SHIFT_2; + for(start=newStart; start<trie->dataLength;) { + /* + * start: index of first entry of current block + * newStart: index where the current block is to be moved + * (right after current end of already-compacted data) + */ + if(start==UNEWTRIE2_DATA_0800_OFFSET) { + blockLength=UTRIE2_DATA_BLOCK_LENGTH; + blockCount=1; + } + + /* skip blocks that are not used */ + if(trie->map[start>>UTRIE2_SHIFT_2]<=0) { + /* advance start to the next block */ + start+=blockLength; + + /* leave newStart with the previous block! */ + continue; + } + + /* search for an identical block */ + if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength)) + >=0 + ) { +#ifdef UTRIE2_DEBUG + ++countSame; +#endif + /* found an identical block, set the other block's index value for the current block */ + for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { + trie->map[mapIndex++]=movedStart; + movedStart+=UTRIE2_DATA_BLOCK_LENGTH; + } + + /* advance start to the next block */ + start+=blockLength; + + /* leave newStart with the previous block! */ + continue; + } + + /* see if the beginning of this block can be overlapped with the end of the previous block */ + /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ + for(overlap=blockLength-UTRIE2_DATA_GRANULARITY; + overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap); + overlap-=UTRIE2_DATA_GRANULARITY) {} + +#ifdef UTRIE2_DEBUG + sumOverlaps+=overlap; +#endif + if(overlap>0 || newStart<start) { + /* some overlap, or just move the whole block */ + movedStart=newStart-overlap; + for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { + trie->map[mapIndex++]=movedStart; + movedStart+=UTRIE2_DATA_BLOCK_LENGTH; + } + + /* move the non-overlapping indexes to their new positions */ + start+=overlap; + for(i=blockLength-overlap; i>0; --i) { + trie->data[newStart++]=trie->data[start++]; + } + } else /* no overlap && newStart==start */ { + for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { + trie->map[mapIndex++]=start; + start+=UTRIE2_DATA_BLOCK_LENGTH; + } + newStart=start; + } + } + + /* now adjust the index-2 table */ + for(i=0; i<trie->index2Length; ++i) { + if(i==UNEWTRIE2_INDEX_GAP_OFFSET) { + /* Gap indexes are invalid (-1). Skip over the gap. */ + i+=UNEWTRIE2_INDEX_GAP_LENGTH; + } + trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2]; + } + trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2]; + + /* ensure dataLength alignment */ + while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) { + trie->data[newStart++]=trie->initialValue; + } + +#ifdef UTRIE2_DEBUG + /* we saved some space */ + printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", + (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps); +#endif + + trie->dataLength=newStart; +} + +static void +compactIndex2(UNewTrie2 *trie) { + int32_t i, start, newStart, movedStart, overlap; + + /* do not compact linear-BMP index-2 blocks */ + newStart=UTRIE2_INDEX_2_BMP_LENGTH; + for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) { + trie->map[i]=start; + } + + /* Reduce the index table gap to what will be needed at runtime. */ + newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1); + + for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) { + /* + * start: index of first entry of current block + * newStart: index where the current block is to be moved + * (right after current end of already-compacted data) + */ + + /* search for an identical block */ + if( (movedStart=findSameIndex2Block(trie->index2, newStart, start)) + >=0 + ) { + /* found an identical block, set the other block's index value for the current block */ + trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart; + + /* advance start to the next block */ + start+=UTRIE2_INDEX_2_BLOCK_LENGTH; + + /* leave newStart with the previous block! */ + continue; + } + + /* see if the beginning of this block can be overlapped with the end of the previous block */ + /* look for maximum overlap with the previous, adjacent block */ + for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1; + overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap); + --overlap) {} + + if(overlap>0 || newStart<start) { + /* some overlap, or just move the whole block */ + trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap; + + /* move the non-overlapping indexes to their new positions */ + start+=overlap; + for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) { + trie->index2[newStart++]=trie->index2[start++]; + } + } else /* no overlap && newStart==start */ { + trie->map[start>>UTRIE2_SHIFT_1_2]=start; + start+=UTRIE2_INDEX_2_BLOCK_LENGTH; + newStart=start; + } + } + + /* now adjust the index-1 table */ + for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { + trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2]; + } + trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2]; + + /* + * Ensure data table alignment: + * Needs to be granularity-aligned for 16-bit trie + * (so that dataMove will be down-shiftable), + * and 2-aligned for uint32_t data. + */ + while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) { + /* Arbitrary value: 0x3fffc not possible for real data. */ + trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT; + } + +#ifdef UTRIE2_DEBUG + /* we saved some space */ + printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n", + (long)trie->index2Length, (long)newStart); +#endif + + trie->index2Length=newStart; +} + +static void +compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { + UNewTrie2 *newTrie; + UChar32 highStart, suppHighStart; + uint32_t highValue; + + newTrie=trie->newTrie; + + /* find highStart and round it up */ + highValue=utrie2_get32(trie, 0x10ffff); + highStart=findHighStart(newTrie, highValue); + highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1); + if(highStart==0x110000) { + highValue=trie->errorValue; + } + + /* + * Set trie->highStart only after utrie2_get32(trie, highStart). + * Otherwise utrie2_get32(trie, highStart) would try to read the highValue. + */ + trie->highStart=newTrie->highStart=highStart; + +#ifdef UTRIE2_DEBUG + printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", + (long)highStart, (long)highValue, (long)trie->initialValue); +#endif + + if(highStart<0x110000) { + /* Blank out [highStart..10ffff] to release associated data blocks. */ + suppHighStart= highStart<=0x10000 ? 0x10000 : highStart; + utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + compactData(newTrie); + if(highStart>0x10000) { + compactIndex2(newTrie); +#ifdef UTRIE2_DEBUG + } else { + printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n", + (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET); +#endif + } + + /* + * Store the highValue in the data array and round up the dataLength. + * Must be done after compactData() because that assumes that dataLength + * is a multiple of UTRIE2_DATA_BLOCK_LENGTH. + */ + newTrie->data[newTrie->dataLength++]=highValue; + while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) { + newTrie->data[newTrie->dataLength++]=trie->initialValue; + } + + newTrie->isCompacted=TRUE; +} + +/* serialization ------------------------------------------------------------ */ + +/** + * Maximum length of the runtime index array. + * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength. + * (The actual maximum length is lower, + * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.) + */ +#define UTRIE2_MAX_INDEX_LENGTH 0xffff + +/** + * Maximum length of the runtime data array. + * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT, + * and by uint16_t UTrie2Header.shiftedDataLength. + */ +#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT) + +/* Compact and internally serialize the trie. */ +U_CAPI void U_EXPORT2 +utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { + UNewTrie2 *newTrie; + UTrie2Header *header; + uint32_t *p; + uint16_t *dest16; + int32_t i, length; + int32_t allIndexesLength; + int32_t dataMove; /* >0 if the data is moved to the end of the index array */ + UChar32 highStart; + + /* argument check */ + if(U_FAILURE(*pErrorCode)) { + return; + } + if( trie==NULL || + valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + newTrie=trie->newTrie; + if(newTrie==NULL) { + /* already frozen */ + UTrie2ValueBits frozenValueBits= + trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS; + if(valueBits!=frozenValueBits) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + return; + } + + /* compact if necessary */ + if(!newTrie->isCompacted) { + compactTrie(trie, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + highStart=trie->highStart; + + if(highStart<=0x10000) { + allIndexesLength=UTRIE2_INDEX_1_OFFSET; + } else { + allIndexesLength=newTrie->index2Length; + } + if(valueBits==UTRIE2_16_VALUE_BITS) { + dataMove=allIndexesLength; + } else { + dataMove=0; + } + + /* are indexLength and dataLength within limits? */ + if( /* for unshifted indexLength */ + allIndexesLength>UTRIE2_MAX_INDEX_LENGTH || + /* for unshifted dataNullOffset */ + (dataMove+newTrie->dataNullOffset)>0xffff || + /* for unshifted 2-byte UTF-8 index-2 values */ + (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff || + /* for shiftedDataLength */ + (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH + ) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + /* calculate the total serialized length */ + length=sizeof(UTrie2Header)+allIndexesLength*2; + if(valueBits==UTRIE2_16_VALUE_BITS) { + length+=newTrie->dataLength*2; + } else { + length+=newTrie->dataLength*4; + } + + trie->memory=uprv_malloc(length); + if(trie->memory==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + trie->length=length; + trie->isMemoryOwned=TRUE; + + trie->indexLength=allIndexesLength; + trie->dataLength=newTrie->dataLength; + if(highStart<=0x10000) { + trie->index2NullOffset=0xffff; + } else { + trie->index2NullOffset=static_cast<uint16_t>(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset); + } + trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset); + trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY; + + /* set the header fields */ + header=(UTrie2Header *)trie->memory; + + header->signature=UTRIE2_SIG; /* "Tri2" */ + header->options=(uint16_t)valueBits; + + header->indexLength=(uint16_t)trie->indexLength; + header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT); + header->index2NullOffset=trie->index2NullOffset; + header->dataNullOffset=trie->dataNullOffset; + header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1); + + /* fill the index and data arrays */ + dest16=(uint16_t *)(header+1); + trie->index=dest16; + + /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */ + p=(uint32_t *)newTrie->index2; + for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) { + *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); + } + + /* write UTF-8 2-byte index-2 values, not right-shifted */ + for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ + *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); + } + for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ + *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]); + } + + if(highStart>0x10000) { + int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1; + int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length; + + /* write 16-bit index-1 values for supplementary code points */ + p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; + for(i=index1Length; i>0; --i) { + *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++); + } + + /* + * write the index-2 array values for supplementary code points, + * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove + */ + p=(uint32_t *)newTrie->index2+index2Offset; + for(i=newTrie->index2Length-index2Offset; i>0; --i) { + *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); + } + } + + /* write the 16/32-bit data array */ + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + /* write 16-bit data values */ + trie->data16=dest16; + trie->data32=NULL; + p=newTrie->data; + for(i=newTrie->dataLength; i>0; --i) { + *dest16++=(uint16_t)*p++; + } + break; + case UTRIE2_32_VALUE_BITS: + /* write 32-bit data values */ + trie->data16=NULL; + trie->data32=(uint32_t *)dest16; + uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4); + break; + default: + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + +#ifdef UTRIE2_DEBUG + utrie2_printLengths(trie, ""); +#endif + +#ifdef UCPTRIE_DEBUG + umutablecptrie_setName(newTrie->t3, trie->name); + ucptrie_close( + umutablecptrie_buildImmutable( + newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode)); +#endif + /* Delete the UNewTrie2. */ + uprv_free(newTrie->data); + uprv_free(newTrie); + trie->newTrie=NULL; +} |