diff options
Diffstat (limited to 'intl/icu/source/common/utrie_swap.cpp')
-rw-r--r-- | intl/icu/source/common/utrie_swap.cpp | 348 |
1 files changed, 348 insertions, 0 deletions
diff --git a/intl/icu/source/common/utrie_swap.cpp b/intl/icu/source/common/utrie_swap.cpp new file mode 100644 index 0000000000..83d183699d --- /dev/null +++ b/intl/icu/source/common/utrie_swap.cpp @@ -0,0 +1,348 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// utrie_swap.cpp +// created: 2018aug08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "ucptrie_impl.h" +#include "udataswp.h" +#include "utrie.h" +#include "utrie2_impl.h" + +// These functions for swapping different generations of ICU code point tries are here +// so that their implementation files need not depend on swapper code, +// need not depend on each other, and so that other swapper code +// need not depend on other trie code. + +namespace { + +constexpr int32_t ASCII_LIMIT = 0x80; + +} // namespace + +U_CAPI int32_t U_EXPORT2 +utrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UTrieHeader *inTrie; + UTrieHeader trie; + int32_t size; + UBool dataIs32; + + if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UTrieHeader *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt32(inTrie->options); + trie.indexLength=udata_readInt32(ds, inTrie->indexLength); + trie.dataLength=udata_readInt32(ds, inTrie->dataLength); + + if( trie.signature!=0x54726965 || + (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || + ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || + trie.indexLength<UTRIE_BMP_INDEX_LENGTH || + (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 || + trie.dataLength<UTRIE_DATA_BLOCK_LENGTH || + (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 || + ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100)) + ) { + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ + return 0; + } + + dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0); + size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2); + + if(length>=0) { + UTrieHeader *outTrie; + + if(length<size) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + outTrie=(UTrieHeader *)outData; + + /* swap the header */ + ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); + + /* swap the index and the data */ + if(dataIs32) { + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + } else { + ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); + } + } + + return size; +} + +U_CAPI int32_t U_EXPORT2 +utrie2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UTrie2Header *inTrie; + UTrie2Header trie; + int32_t dataLength, size; + UTrie2ValueBits valueBits; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UTrie2Header *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt16(inTrie->options); + trie.indexLength=ds->readUInt16(inTrie->indexLength); + trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); + + valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); + dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; + + if( trie.signature!=UTRIE2_SIG || + valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || + trie.indexLength<UTRIE2_INDEX_1_OFFSET || + dataLength<UTRIE2_DATA_START_OFFSET + ) { + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ + return 0; + } + + size=sizeof(UTrie2Header)+trie.indexLength*2; + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + size+=dataLength*2; + break; + case UTRIE2_32_VALUE_BITS: + size+=dataLength*4; + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + if(length>=0) { + UTrie2Header *outTrie; + + if(length<size) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + outTrie=(UTrie2Header *)outData; + + /* swap the header */ + ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); + ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); + + /* swap the index and the data */ + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); + break; + case UTRIE2_32_VALUE_BITS: + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + } + + return size; +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UCPTrieHeader *inTrie; + UCPTrieHeader trie; + int32_t dataLength, size; + UCPTrieValueWidth valueWidth; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UCPTrieHeader *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt16(inTrie->options); + trie.indexLength=ds->readUInt16(inTrie->indexLength); + trie.dataLength = ds->readUInt16(inTrie->dataLength); + + UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); + valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); + dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; + + int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? + UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; + if( trie.signature!=UCPTRIE_SIG || + type > UCPTRIE_TYPE_SMALL || + (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || + valueWidth > UCPTRIE_VALUE_BITS_8 || + trie.indexLength < minIndexLength || + dataLength < ASCII_LIMIT + ) { + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ + return 0; + } + + size=sizeof(UCPTrieHeader)+trie.indexLength*2; + switch(valueWidth) { + case UCPTRIE_VALUE_BITS_16: + size+=dataLength*2; + break; + case UCPTRIE_VALUE_BITS_32: + size+=dataLength*4; + break; + case UCPTRIE_VALUE_BITS_8: + size+=dataLength; + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + if(length>=0) { + UCPTrieHeader *outTrie; + + if(length<size) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + outTrie=(UCPTrieHeader *)outData; + + /* swap the header */ + ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); + ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); + + /* swap the index */ + const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1); + uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1); + ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode); + + /* swap the data */ + const uint16_t *inData=inIndex+trie.indexLength; + uint16_t *outData=outIndex+trie.indexLength; + switch(valueWidth) { + case UCPTRIE_VALUE_BITS_16: + ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode); + break; + case UCPTRIE_VALUE_BITS_32: + ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode); + break; + case UCPTRIE_VALUE_BITS_8: + if(inTrie!=outTrie) { + uprv_memmove(outData, inData, dataLength); + } + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + } + + return size; +} + +namespace { + +/** + * Gets the trie version from 32-bit-aligned memory containing the serialized form + * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). + * + * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie + * @param length the number of bytes available at data; + * can be more than necessary (see return value) + * @param anyEndianOk If false, only platform-endian serialized forms are recognized. + * If true, opposite-endian serialized forms are recognized as well. + * @return the trie version of the serialized form, or 0 if it is not + * recognized as a serialized trie + */ +int32_t +getVersion(const void *data, int32_t length, UBool anyEndianOk) { + uint32_t signature; + if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { + return 0; + } + signature=*(const uint32_t *)data; + if(signature==UCPTRIE_SIG) { + return 3; + } + if(anyEndianOk && signature==UCPTRIE_OE_SIG) { + return 3; + } + if(signature==UTRIE2_SIG) { + return 2; + } + if(anyEndianOk && signature==UTRIE2_OE_SIG) { + return 2; + } + if(signature==UTRIE_SIG) { + return 1; + } + if(anyEndianOk && signature==UTRIE_OE_SIG) { + return 1; + } + return 0; +} + +} // namespace + +U_CAPI int32_t U_EXPORT2 +utrie_swapAnyVersion(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { return 0; } + switch(getVersion(inData, length, true)) { + case 1: + return utrie_swap(ds, inData, length, outData, pErrorCode); + case 2: + return utrie2_swap(ds, inData, length, outData, pErrorCode); + case 3: + return ucptrie_swap(ds, inData, length, outData, pErrorCode); + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } +} |