summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/string_segment.h
blob: b581f7e57599eb9525970a4783f084876d6d92c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING
#ifndef __NUMPARSE_STRINGSEGMENT_H__
#define __NUMPARSE_STRINGSEGMENT_H__

#include "unicode/unistr.h"
#include "unicode/uniset.h"

U_NAMESPACE_BEGIN


/**
 * A mutable UnicodeString wrapper with a variable offset and length and
 * support for case folding. The charAt, length, and subSequence methods all
 * operate relative to the fixed offset into the UnicodeString.
 *
 * Intended to be useful for parsing.
 *
 * CAUTION: Since this class is mutable, it must not be used anywhere that an
 * immutable object is required, like in a cache or as the key of a hash map.
 *
 * @author sffc (Shane Carr)
 */
// Exported as U_I18N_API for tests
class U_I18N_API StringSegment : public UMemory {
  public:
    StringSegment(const UnicodeString& str, bool ignoreCase);

    int32_t getOffset() const;

    void setOffset(int32_t start);

    /**
     * Equivalent to <code>setOffset(getOffset()+delta)</code>.
     *
     * <p>
     * This method is usually called by a Matcher to register that a char was consumed. If the char is
     * strong (it usually is, except for things like whitespace), follow this with a call to
     * {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
     */
    void adjustOffset(int32_t delta);

    /**
     * Adjusts the offset by the width of the current code point, either 1 or 2 chars.
     */
    void adjustOffsetByCodePoint();

    void setLength(int32_t length);

    void resetLength();

    int32_t length() const;

    char16_t charAt(int32_t index) const;

    UChar32 codePointAt(int32_t index) const;

    UnicodeString toUnicodeString() const;

    const UnicodeString toTempUnicodeString() const;

    /**
     * Returns the first code point in the string segment, or -1 if the string starts with an invalid
     * code point.
     *
     * <p>
     * <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles case
     * folding logic, instead of this method.
     */
    UChar32 getCodePoint() const;

    /**
     * Returns true if the first code point of this StringSegment equals the given code point.
     *
     * <p>
     * This method will perform case folding if case folding is enabled for the parser.
     */
    bool startsWith(UChar32 otherCp) const;

    /**
     * Returns true if the first code point of this StringSegment is in the given UnicodeSet.
     */
    bool startsWith(const UnicodeSet& uniset) const;

    /**
     * Returns true if there is at least one code point of overlap between this StringSegment and the
     * given UnicodeString.
     */
    bool startsWith(const UnicodeString& other) const;

    /**
     * Returns the length of the prefix shared by this StringSegment and the given UnicodeString. For
     * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
     * since the first 2 characters are the same.
     *
     * <p>
     * This method only returns offsets along code point boundaries.
     *
     * <p>
     * This method will perform case folding if case folding was enabled in the constructor.
     *
     * <p>
     * IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
     */
    int32_t getCommonPrefixLength(const UnicodeString& other);

    /**
     * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
     * enabled for the parser.
     */
    int32_t getCaseSensitivePrefixLength(const UnicodeString& other);

    bool operator==(const UnicodeString& other) const;

  private:
    const UnicodeString& fStr;
    int32_t fStart;
    int32_t fEnd;
    bool fFoldCase;

    int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase);

    static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase);
};


U_NAMESPACE_END

#endif //__NUMPARSE_STRINGSEGMENT_H__
#endif /* #if !UCONFIG_NO_FORMATTING */