summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/utrie_swap.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/common/utrie_swap.cpp')
-rw-r--r--intl/icu/source/common/utrie_swap.cpp348
1 files changed, 348 insertions, 0 deletions
diff --git a/intl/icu/source/common/utrie_swap.cpp b/intl/icu/source/common/utrie_swap.cpp
new file mode 100644
index 0000000000..83d183699d
--- /dev/null
+++ b/intl/icu/source/common/utrie_swap.cpp
@@ -0,0 +1,348 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// utrie_swap.cpp
+// created: 2018aug08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "ucptrie_impl.h"
+#include "udataswp.h"
+#include "utrie.h"
+#include "utrie2_impl.h"
+
+// These functions for swapping different generations of ICU code point tries are here
+// so that their implementation files need not depend on swapper code,
+// need not depend on each other, and so that other swapper code
+// need not depend on other trie code.
+
+namespace {
+
+constexpr int32_t ASCII_LIMIT = 0x80;
+
+} // namespace
+
+U_CAPI int32_t U_EXPORT2
+utrie_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UTrieHeader *inTrie;
+ UTrieHeader trie;
+ int32_t size;
+ UBool dataIs32;
+
+ if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ inTrie=(const UTrieHeader *)inData;
+ trie.signature=ds->readUInt32(inTrie->signature);
+ trie.options=ds->readUInt32(inTrie->options);
+ trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
+ trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
+
+ if( trie.signature!=0x54726965 ||
+ (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
+ ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
+ trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
+ (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
+ trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
+ (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
+ ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
+ ) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
+ return 0;
+ }
+
+ dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
+ size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
+
+ if(length>=0) {
+ UTrieHeader *outTrie;
+
+ if(length<size) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ outTrie=(UTrieHeader *)outData;
+
+ /* swap the header */
+ ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
+
+ /* swap the index and the data */
+ if(dataIs32) {
+ ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
+ ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
+ (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
+ } else {
+ ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
+ }
+ }
+
+ return size;
+}
+
+U_CAPI int32_t U_EXPORT2
+utrie2_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UTrie2Header *inTrie;
+ UTrie2Header trie;
+ int32_t dataLength, size;
+ UTrie2ValueBits valueBits;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ inTrie=(const UTrie2Header *)inData;
+ trie.signature=ds->readUInt32(inTrie->signature);
+ trie.options=ds->readUInt16(inTrie->options);
+ trie.indexLength=ds->readUInt16(inTrie->indexLength);
+ trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
+
+ valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
+ dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
+
+ if( trie.signature!=UTRIE2_SIG ||
+ valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
+ trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
+ dataLength<UTRIE2_DATA_START_OFFSET
+ ) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
+ return 0;
+ }
+
+ size=sizeof(UTrie2Header)+trie.indexLength*2;
+ switch(valueBits) {
+ case UTRIE2_16_VALUE_BITS:
+ size+=dataLength*2;
+ break;
+ case UTRIE2_32_VALUE_BITS:
+ size+=dataLength*4;
+ break;
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ if(length>=0) {
+ UTrie2Header *outTrie;
+
+ if(length<size) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ outTrie=(UTrie2Header *)outData;
+
+ /* swap the header */
+ ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
+ ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
+
+ /* swap the index and the data */
+ switch(valueBits) {
+ case UTRIE2_16_VALUE_BITS:
+ ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
+ break;
+ case UTRIE2_32_VALUE_BITS:
+ ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
+ ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
+ (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
+ break;
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+ }
+
+ return size;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucptrie_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UCPTrieHeader *inTrie;
+ UCPTrieHeader trie;
+ int32_t dataLength, size;
+ UCPTrieValueWidth valueWidth;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ inTrie=(const UCPTrieHeader *)inData;
+ trie.signature=ds->readUInt32(inTrie->signature);
+ trie.options=ds->readUInt16(inTrie->options);
+ trie.indexLength=ds->readUInt16(inTrie->indexLength);
+ trie.dataLength = ds->readUInt16(inTrie->dataLength);
+
+ UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
+ valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
+ dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
+
+ int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
+ UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
+ if( trie.signature!=UCPTRIE_SIG ||
+ type > UCPTRIE_TYPE_SMALL ||
+ (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
+ valueWidth > UCPTRIE_VALUE_BITS_8 ||
+ trie.indexLength < minIndexLength ||
+ dataLength < ASCII_LIMIT
+ ) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
+ return 0;
+ }
+
+ size=sizeof(UCPTrieHeader)+trie.indexLength*2;
+ switch(valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ size+=dataLength*2;
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ size+=dataLength*4;
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ size+=dataLength;
+ break;
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ if(length>=0) {
+ UCPTrieHeader *outTrie;
+
+ if(length<size) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ outTrie=(UCPTrieHeader *)outData;
+
+ /* swap the header */
+ ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
+ ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
+
+ /* swap the index */
+ const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1);
+ uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1);
+ ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode);
+
+ /* swap the data */
+ const uint16_t *inData=inIndex+trie.indexLength;
+ uint16_t *outData=outIndex+trie.indexLength;
+ switch(valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode);
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode);
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ if(inTrie!=outTrie) {
+ uprv_memmove(outData, inData, dataLength);
+ }
+ break;
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+ }
+
+ return size;
+}
+
+namespace {
+
+/**
+ * Gets the trie version from 32-bit-aligned memory containing the serialized form
+ * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
+ *
+ * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
+ * @param length the number of bytes available at data;
+ * can be more than necessary (see return value)
+ * @param anyEndianOk If false, only platform-endian serialized forms are recognized.
+ * If true, opposite-endian serialized forms are recognized as well.
+ * @return the trie version of the serialized form, or 0 if it is not
+ * recognized as a serialized trie
+ */
+int32_t
+getVersion(const void *data, int32_t length, UBool anyEndianOk) {
+ uint32_t signature;
+ if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
+ return 0;
+ }
+ signature=*(const uint32_t *)data;
+ if(signature==UCPTRIE_SIG) {
+ return 3;
+ }
+ if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
+ return 3;
+ }
+ if(signature==UTRIE2_SIG) {
+ return 2;
+ }
+ if(anyEndianOk && signature==UTRIE2_OE_SIG) {
+ return 2;
+ }
+ if(signature==UTRIE_SIG) {
+ return 1;
+ }
+ if(anyEndianOk && signature==UTRIE_OE_SIG) {
+ return 1;
+ }
+ return 0;
+}
+
+} // namespace
+
+U_CAPI int32_t U_EXPORT2
+utrie_swapAnyVersion(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) { return 0; }
+ switch(getVersion(inData, length, true)) {
+ case 1:
+ return utrie_swap(ds, inData, length, outData, pErrorCode);
+ case 2:
+ return utrie2_swap(ds, inData, length, outData, pErrorCode);
+ case 3:
+ return ucptrie_swap(ds, inData, length, outData, pErrorCode);
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+}