diff options
Diffstat (limited to 'intl/icu/source/i18n/collationkeys.h')
-rw-r--r-- | intl/icu/source/i18n/collationkeys.h | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/collationkeys.h b/intl/icu/source/i18n/collationkeys.h new file mode 100644 index 0000000000..60d9e50c0d --- /dev/null +++ b/intl/icu/source/i18n/collationkeys.h @@ -0,0 +1,169 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2012-2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* collationkeys.h +* +* created on: 2012sep02 +* created by: Markus W. Scherer +*/ + +#ifndef __COLLATIONKEYS_H__ +#define __COLLATIONKEYS_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "unicode/bytestream.h" +#include "unicode/ucol.h" +#include "charstr.h" +#include "collation.h" + +U_NAMESPACE_BEGIN + +class CollationIterator; +struct CollationDataReader; +struct CollationSettings; + +class SortKeyByteSink : public ByteSink { +public: + SortKeyByteSink(char *dest, int32_t destCapacity) + : buffer_(dest), capacity_(destCapacity), + appended_(0), ignore_(0) {} + virtual ~SortKeyByteSink(); + + void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; } + + virtual void Append(const char *bytes, int32_t n); + void Append(uint32_t b) { + if (ignore_ > 0) { + --ignore_; + } else { + if (appended_ < capacity_ || Resize(1, appended_)) { + buffer_[appended_] = (char)b; + } + ++appended_; + } + } + virtual char *GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char *scratch, int32_t scratch_capacity, + int32_t *result_capacity); + int32_t NumberOfBytesAppended() const { return appended_; } + + /** + * @return how many bytes can be appended (including ignored ones) + * without reallocation + */ + int32_t GetRemainingCapacity() const { + // Either ignore_ or appended_ should be 0. + return ignore_ + capacity_ - appended_; + } + + UBool Overflowed() const { return appended_ > capacity_; } + /** @return FALSE if memory allocation failed */ + UBool IsOk() const { return buffer_ != NULL; } + +protected: + virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0; + virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0; + + void SetNotOk() { + buffer_ = NULL; + capacity_ = 0; + } + + char *buffer_; + int32_t capacity_; + int32_t appended_; + int32_t ignore_; + +private: + SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented + SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented +}; + +class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ { +public: + class LevelCallback : public UMemory { + public: + virtual ~LevelCallback(); + /** + * @param level The next level about to be written to the ByteSink. + * @return TRUE if the level is to be written + * (the base class implementation always returns TRUE) + */ + virtual UBool needToWrite(Collation::Level level); + }; + + /** + * Writes the sort key bytes for minLevel up to the iterator data's strength. + * Optionally writes the case level. + * Stops writing levels when callback.needToWrite(level) returns FALSE. + * Separates levels with the LEVEL_SEPARATOR_BYTE + * but does not write a TERMINATOR_BYTE. + */ + static void writeSortKeyUpToQuaternary(CollationIterator &iter, + const UBool *compressibleBytes, + const CollationSettings &settings, + SortKeyByteSink &sink, + Collation::Level minLevel, LevelCallback &callback, + UBool preflight, UErrorCode &errorCode); +private: + friend struct CollationDataReader; + + CollationKeys(); // no instantiation + + // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. + static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE; + static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; + static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; + static const int32_t SEC_COMMON_MAX_COUNT = 0x21; + + // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. + static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1; + static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7; + static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13; + static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; + + // Case level, upperFirst: Compress up to 13 common weights as 3..15. + static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3; + static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15; + static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; + + // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. + static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE; + static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; + static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; + static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61; + + // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. + static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE; + static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; + static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; + static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; + + // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. + static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80; + static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; + static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; + static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; + + // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. + static const uint32_t QUAT_COMMON_LOW = 0x1c; + static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; + static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; + static const int32_t QUAT_COMMON_MAX_COUNT = 0x71; + // Primary weights shifted to quaternary level must be encoded with + // a lead byte below the common-weight compression range. + static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_COLLATION +#endif // __COLLATIONKEYS_H__ |