summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/collationkeys.h
blob: 60d9e50c0d9106bb7ae0bbcb26c3d12e772b120d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2012-2014, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* collationkeys.h
*
* created on: 2012sep02
* created by: Markus W. Scherer
*/

#ifndef __COLLATIONKEYS_H__
#define __COLLATIONKEYS_H__

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/bytestream.h"
#include "unicode/ucol.h"
#include "charstr.h"
#include "collation.h"

U_NAMESPACE_BEGIN

class CollationIterator;
struct CollationDataReader;
struct CollationSettings;

class SortKeyByteSink : public ByteSink {
public:
    SortKeyByteSink(char *dest, int32_t destCapacity)
            : buffer_(dest), capacity_(destCapacity),
              appended_(0), ignore_(0) {}
    virtual ~SortKeyByteSink();

    void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }

    virtual void Append(const char *bytes, int32_t n);
    void Append(uint32_t b) {
        if (ignore_ > 0) {
            --ignore_;
        } else {
            if (appended_ < capacity_ || Resize(1, appended_)) {
                buffer_[appended_] = (char)b;
            }
            ++appended_;
        }
    }
    virtual char *GetAppendBuffer(int32_t min_capacity,
                                  int32_t desired_capacity_hint,
                                  char *scratch, int32_t scratch_capacity,
                                  int32_t *result_capacity);
    int32_t NumberOfBytesAppended() const { return appended_; }

    /**
     * @return how many bytes can be appended (including ignored ones)
     *         without reallocation
     */
    int32_t GetRemainingCapacity() const {
        // Either ignore_ or appended_ should be 0.
        return ignore_ + capacity_ - appended_;
    }

    UBool Overflowed() const { return appended_ > capacity_; }
    /** @return FALSE if memory allocation failed */
    UBool IsOk() const { return buffer_ != NULL; }

protected:
    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
    virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;

    void SetNotOk() {
        buffer_ = NULL;
        capacity_ = 0;
    }

    char *buffer_;
    int32_t capacity_;
    int32_t appended_;
    int32_t ignore_;

private:
    SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
    SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
};

class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
public:
    class LevelCallback : public UMemory {
    public:
        virtual ~LevelCallback();
        /**
         * @param level The next level about to be written to the ByteSink.
         * @return TRUE if the level is to be written
         *         (the base class implementation always returns TRUE)
         */
        virtual UBool needToWrite(Collation::Level level);
    };

    /**
     * Writes the sort key bytes for minLevel up to the iterator data's strength.
     * Optionally writes the case level.
     * Stops writing levels when callback.needToWrite(level) returns FALSE.
     * Separates levels with the LEVEL_SEPARATOR_BYTE
     * but does not write a TERMINATOR_BYTE.
     */
    static void writeSortKeyUpToQuaternary(CollationIterator &iter,
                                           const UBool *compressibleBytes,
                                           const CollationSettings &settings,
                                           SortKeyByteSink &sink,
                                           Collation::Level minLevel, LevelCallback &callback,
                                           UBool preflight, UErrorCode &errorCode);
private:
    friend struct CollationDataReader;

    CollationKeys();  // no instantiation

    // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
    static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
    static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
    static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
    static const int32_t SEC_COMMON_MAX_COUNT = 0x21;

    // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
    static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
    static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
    static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
    static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;

    // Case level, upperFirst: Compress up to 13 common weights as 3..15.
    static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
    static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
    static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;

    // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
    static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
    static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
    static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
    static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;

    // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
    static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
    static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
    static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
    static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;

    // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
    static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
    static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
    static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
    static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;

    // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
    static const uint32_t QUAT_COMMON_LOW = 0x1c;
    static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
    static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
    static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
    // Primary weights shifted to quaternary level must be encoded with
    // a lead byte below the common-weight compression range.
    static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1;  // 0x1b
};

U_NAMESPACE_END

#endif  // !UCONFIG_NO_COLLATION
#endif  // __COLLATIONKEYS_H__