diff options
Diffstat (limited to 'intl/icu/source/i18n/sortkey.cpp')
-rw-r--r-- | intl/icu/source/i18n/sortkey.cpp | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/sortkey.cpp b/intl/icu/source/i18n/sortkey.cpp new file mode 100644 index 0000000000..1fd066cc20 --- /dev/null +++ b/intl/icu/source/i18n/sortkey.cpp @@ -0,0 +1,287 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1996-2012, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ +//=============================================================================== +// +// File sortkey.cpp +// +// +// +// Created by: Helena Shih +// +// Modification History: +// +// Date Name Description +// +// 6/20/97 helena Java class name change. +// 6/23/97 helena Added comments to make code more readable. +// 6/26/98 erm Changed to use byte arrays instead of UnicodeString +// 7/31/98 erm hashCode: minimum inc should be 2 not 1, +// Cleaned up operator= +// 07/12/99 helena HPUX 11 CC port. +// 03/06/01 synwee Modified compareTo, to handle the result of +// 2 string similar in contents, but one is longer +// than the other +//=============================================================================== + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "unicode/sortkey.h" +#include "cmemory.h" +#include "uelement.h" +#include "ustr_imp.h" + +U_NAMESPACE_BEGIN + +// A hash code of kInvalidHashCode indicates that the hash code needs +// to be computed. A hash code of kEmptyHashCode is used for empty keys +// and for any key whose computed hash code is kInvalidHashCode. +static const int32_t kInvalidHashCode = 0; +static const int32_t kEmptyHashCode = 1; +// The "bogus hash code" replaces a separate fBogus flag. +static const int32_t kBogusHashCode = 2; + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) + +CollationKey::CollationKey() + : UObject(), fFlagAndLength(0), + fHashCode(kEmptyHashCode) +{ +} + +// Create a collation key from a bit array. +CollationKey::CollationKey(const uint8_t* newValues, int32_t count) + : UObject(), fFlagAndLength(count), + fHashCode(kInvalidHashCode) +{ + if (count < 0 || (newValues == nullptr && count != 0) || + (count > getCapacity() && reallocate(count, 0) == nullptr)) { + setToBogus(); + return; + } + + if (count > 0) { + uprv_memcpy(getBytes(), newValues, count); + } +} + +CollationKey::CollationKey(const CollationKey& other) + : UObject(other), fFlagAndLength(other.getLength()), + fHashCode(other.fHashCode) +{ + if (other.isBogus()) + { + setToBogus(); + return; + } + + int32_t length = fFlagAndLength; + if (length > getCapacity() && reallocate(length, 0) == nullptr) { + setToBogus(); + return; + } + + if (length > 0) { + uprv_memcpy(getBytes(), other.getBytes(), length); + } +} + +CollationKey::~CollationKey() +{ + if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } +} + +uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { + uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); + if(newBytes == nullptr) { return nullptr; } + if(length > 0) { + uprv_memcpy(newBytes, getBytes(), length); + } + if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } + fUnion.fFields.fBytes = newBytes; + fUnion.fFields.fCapacity = newCapacity; + fFlagAndLength |= 0x80000000; + return newBytes; +} + +void CollationKey::setLength(int32_t newLength) { + // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); + fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; + fHashCode = kInvalidHashCode; +} + +// set the key to an empty state +CollationKey& +CollationKey::reset() +{ + fFlagAndLength &= 0x80000000; + fHashCode = kEmptyHashCode; + + return *this; +} + +// set the key to a "bogus" or invalid state +CollationKey& +CollationKey::setToBogus() +{ + fFlagAndLength &= 0x80000000; + fHashCode = kBogusHashCode; + + return *this; +} + +bool +CollationKey::operator==(const CollationKey& source) const +{ + return getLength() == source.getLength() && + (this == &source || + uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); +} + +const CollationKey& +CollationKey::operator=(const CollationKey& other) +{ + if (this != &other) + { + if (other.isBogus()) + { + return setToBogus(); + } + + int32_t length = other.getLength(); + if (length > getCapacity() && reallocate(length, 0) == nullptr) { + return setToBogus(); + } + if (length > 0) { + uprv_memcpy(getBytes(), other.getBytes(), length); + } + fFlagAndLength = (fFlagAndLength & 0x80000000) | length; + fHashCode = other.fHashCode; + } + + return *this; +} + +// Bitwise comparison for the collation keys. +Collator::EComparisonResult +CollationKey::compareTo(const CollationKey& target) const +{ + UErrorCode errorCode = U_ZERO_ERROR; + return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); +} + +// Bitwise comparison for the collation keys. +UCollationResult +CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const +{ + if(U_SUCCESS(status)) { + const uint8_t *src = getBytes(); + const uint8_t *tgt = target.getBytes(); + + // are we comparing the same string + if (src == tgt) + return UCOL_EQUAL; + + UCollationResult result; + + // are we comparing different lengths? + int32_t minLength = getLength(); + int32_t targetLength = target.getLength(); + if (minLength < targetLength) { + result = UCOL_LESS; + } else if (minLength == targetLength) { + result = UCOL_EQUAL; + } else { + minLength = targetLength; + result = UCOL_GREATER; + } + + if (minLength > 0) { + int diff = uprv_memcmp(src, tgt, minLength); + if (diff > 0) { + return UCOL_GREATER; + } + else + if (diff < 0) { + return UCOL_LESS; + } + } + + return result; + } else { + return UCOL_EQUAL; + } +} + +#ifdef U_USE_COLLATION_KEY_DEPRECATES +// Create a copy of the byte array. +uint8_t* +CollationKey::toByteArray(int32_t& count) const +{ + uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); + + if (result == nullptr) + { + count = 0; + } + else + { + count = fCount; + if (count > 0) { + uprv_memcpy(result, fBytes, fCount); + } + } + + return result; +} +#endif + +static int32_t +computeHashCode(const uint8_t *key, int32_t length) { + const char *s = reinterpret_cast<const char *>(key); + int32_t hash; + if (s == nullptr || length == 0) { + hash = kEmptyHashCode; + } else { + hash = ustr_hashCharsN(s, length); + if (hash == kInvalidHashCode || hash == kBogusHashCode) { + hash = kEmptyHashCode; + } + } + return hash; +} + +int32_t +CollationKey::hashCode() const +{ + // (Cribbed from UnicodeString) + // We cache the hashCode; when it becomes invalid, due to any change to the + // string, we note this by setting it to kInvalidHashCode. [LIU] + + // Note: This method is semantically const, but physically non-const. + + if (fHashCode == kInvalidHashCode) + { + fHashCode = computeHashCode(getBytes(), getLength()); + } + + return fHashCode; +} + +U_NAMESPACE_END + +U_CAPI int32_t U_EXPORT2 +ucol_keyHashCode(const uint8_t *key, + int32_t length) +{ + return icu::computeHashCode(key, length); +} + +#endif /* #if !UCONFIG_NO_COLLATION */ |