summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/sortkey.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/sortkey.cpp')
-rw-r--r--intl/icu/source/i18n/sortkey.cpp287
1 files changed, 287 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/sortkey.cpp b/intl/icu/source/i18n/sortkey.cpp
new file mode 100644
index 0000000000..1fd066cc20
--- /dev/null
+++ b/intl/icu/source/i18n/sortkey.cpp
@@ -0,0 +1,287 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 1996-2012, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+//===============================================================================
+//
+// File sortkey.cpp
+//
+//
+//
+// Created by: Helena Shih
+//
+// Modification History:
+//
+// Date Name Description
+//
+// 6/20/97 helena Java class name change.
+// 6/23/97 helena Added comments to make code more readable.
+// 6/26/98 erm Changed to use byte arrays instead of UnicodeString
+// 7/31/98 erm hashCode: minimum inc should be 2 not 1,
+// Cleaned up operator=
+// 07/12/99 helena HPUX 11 CC port.
+// 03/06/01 synwee Modified compareTo, to handle the result of
+// 2 string similar in contents, but one is longer
+// than the other
+//===============================================================================
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/sortkey.h"
+#include "cmemory.h"
+#include "uelement.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+// A hash code of kInvalidHashCode indicates that the hash code needs
+// to be computed. A hash code of kEmptyHashCode is used for empty keys
+// and for any key whose computed hash code is kInvalidHashCode.
+static const int32_t kInvalidHashCode = 0;
+static const int32_t kEmptyHashCode = 1;
+// The "bogus hash code" replaces a separate fBogus flag.
+static const int32_t kBogusHashCode = 2;
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
+
+CollationKey::CollationKey()
+ : UObject(), fFlagAndLength(0),
+ fHashCode(kEmptyHashCode)
+{
+}
+
+// Create a collation key from a bit array.
+CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
+ : UObject(), fFlagAndLength(count),
+ fHashCode(kInvalidHashCode)
+{
+ if (count < 0 || (newValues == nullptr && count != 0) ||
+ (count > getCapacity() && reallocate(count, 0) == nullptr)) {
+ setToBogus();
+ return;
+ }
+
+ if (count > 0) {
+ uprv_memcpy(getBytes(), newValues, count);
+ }
+}
+
+CollationKey::CollationKey(const CollationKey& other)
+ : UObject(other), fFlagAndLength(other.getLength()),
+ fHashCode(other.fHashCode)
+{
+ if (other.isBogus())
+ {
+ setToBogus();
+ return;
+ }
+
+ int32_t length = fFlagAndLength;
+ if (length > getCapacity() && reallocate(length, 0) == nullptr) {
+ setToBogus();
+ return;
+ }
+
+ if (length > 0) {
+ uprv_memcpy(getBytes(), other.getBytes(), length);
+ }
+}
+
+CollationKey::~CollationKey()
+{
+ if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
+}
+
+uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
+ uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
+ if(newBytes == nullptr) { return nullptr; }
+ if(length > 0) {
+ uprv_memcpy(newBytes, getBytes(), length);
+ }
+ if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
+ fUnion.fFields.fBytes = newBytes;
+ fUnion.fFields.fCapacity = newCapacity;
+ fFlagAndLength |= 0x80000000;
+ return newBytes;
+}
+
+void CollationKey::setLength(int32_t newLength) {
+ // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
+ fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
+ fHashCode = kInvalidHashCode;
+}
+
+// set the key to an empty state
+CollationKey&
+CollationKey::reset()
+{
+ fFlagAndLength &= 0x80000000;
+ fHashCode = kEmptyHashCode;
+
+ return *this;
+}
+
+// set the key to a "bogus" or invalid state
+CollationKey&
+CollationKey::setToBogus()
+{
+ fFlagAndLength &= 0x80000000;
+ fHashCode = kBogusHashCode;
+
+ return *this;
+}
+
+bool
+CollationKey::operator==(const CollationKey& source) const
+{
+ return getLength() == source.getLength() &&
+ (this == &source ||
+ uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
+}
+
+const CollationKey&
+CollationKey::operator=(const CollationKey& other)
+{
+ if (this != &other)
+ {
+ if (other.isBogus())
+ {
+ return setToBogus();
+ }
+
+ int32_t length = other.getLength();
+ if (length > getCapacity() && reallocate(length, 0) == nullptr) {
+ return setToBogus();
+ }
+ if (length > 0) {
+ uprv_memcpy(getBytes(), other.getBytes(), length);
+ }
+ fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
+ fHashCode = other.fHashCode;
+ }
+
+ return *this;
+}
+
+// Bitwise comparison for the collation keys.
+Collator::EComparisonResult
+CollationKey::compareTo(const CollationKey& target) const
+{
+ UErrorCode errorCode = U_ZERO_ERROR;
+ return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
+}
+
+// Bitwise comparison for the collation keys.
+UCollationResult
+CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
+{
+ if(U_SUCCESS(status)) {
+ const uint8_t *src = getBytes();
+ const uint8_t *tgt = target.getBytes();
+
+ // are we comparing the same string
+ if (src == tgt)
+ return UCOL_EQUAL;
+
+ UCollationResult result;
+
+ // are we comparing different lengths?
+ int32_t minLength = getLength();
+ int32_t targetLength = target.getLength();
+ if (minLength < targetLength) {
+ result = UCOL_LESS;
+ } else if (minLength == targetLength) {
+ result = UCOL_EQUAL;
+ } else {
+ minLength = targetLength;
+ result = UCOL_GREATER;
+ }
+
+ if (minLength > 0) {
+ int diff = uprv_memcmp(src, tgt, minLength);
+ if (diff > 0) {
+ return UCOL_GREATER;
+ }
+ else
+ if (diff < 0) {
+ return UCOL_LESS;
+ }
+ }
+
+ return result;
+ } else {
+ return UCOL_EQUAL;
+ }
+}
+
+#ifdef U_USE_COLLATION_KEY_DEPRECATES
+// Create a copy of the byte array.
+uint8_t*
+CollationKey::toByteArray(int32_t& count) const
+{
+ uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
+
+ if (result == nullptr)
+ {
+ count = 0;
+ }
+ else
+ {
+ count = fCount;
+ if (count > 0) {
+ uprv_memcpy(result, fBytes, fCount);
+ }
+ }
+
+ return result;
+}
+#endif
+
+static int32_t
+computeHashCode(const uint8_t *key, int32_t length) {
+ const char *s = reinterpret_cast<const char *>(key);
+ int32_t hash;
+ if (s == nullptr || length == 0) {
+ hash = kEmptyHashCode;
+ } else {
+ hash = ustr_hashCharsN(s, length);
+ if (hash == kInvalidHashCode || hash == kBogusHashCode) {
+ hash = kEmptyHashCode;
+ }
+ }
+ return hash;
+}
+
+int32_t
+CollationKey::hashCode() const
+{
+ // (Cribbed from UnicodeString)
+ // We cache the hashCode; when it becomes invalid, due to any change to the
+ // string, we note this by setting it to kInvalidHashCode. [LIU]
+
+ // Note: This method is semantically const, but physically non-const.
+
+ if (fHashCode == kInvalidHashCode)
+ {
+ fHashCode = computeHashCode(getBytes(), getLength());
+ }
+
+ return fHashCode;
+}
+
+U_NAMESPACE_END
+
+U_CAPI int32_t U_EXPORT2
+ucol_keyHashCode(const uint8_t *key,
+ int32_t length)
+{
+ return icu::computeHashCode(key, length);
+}
+
+#endif /* #if !UCONFIG_NO_COLLATION */