summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/numparse_affixes.h
blob: 81b633c2626c38ed1a742d842245035dd7c1c48c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING
#ifndef __NUMPARSE_AFFIXES_H__
#define __NUMPARSE_AFFIXES_H__

#include "cmemory.h"

#include "numparse_types.h"
#include "numparse_symbols.h"
#include "numparse_currency.h"
#include "number_affixutils.h"
#include "number_currencysymbols.h"

U_NAMESPACE_BEGIN
namespace numparse {
namespace impl {

// Forward-declaration of implementation classes for friending
class AffixPatternMatcherBuilder;
class AffixPatternMatcher;

using ::icu::number::impl::AffixPatternProvider;
using ::icu::number::impl::TokenConsumer;
using ::icu::number::impl::CurrencySymbols;


class U_I18N_API CodePointMatcher : public NumberParseMatcher, public UMemory {
  public:
    CodePointMatcher() = default;  // WARNING: Leaves the object in an unusable state

    CodePointMatcher(UChar32 cp);

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

    bool smokeTest(const StringSegment& segment) const override;

    UnicodeString toString() const override;

  private:
    UChar32 fCp;
};

} // namespace impl
} // namespace numparse

// Export a explicit template instantiations of MaybeStackArray, MemoryPool and CompactUnicodeString.
// When building DLLs for Windows this is required even though no direct access leaks out of the i18n library.
// (See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.)
// Note: These need to be outside of the numparse::impl namespace, or Clang will generate a compile error.
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
template class U_I18N_API MaybeStackArray<numparse::impl::CodePointMatcher*, 8>; 
template class U_I18N_API MaybeStackArray<char16_t, 4>;
template class U_I18N_API MemoryPool<numparse::impl::CodePointMatcher, 8>;
template class U_I18N_API numparse::impl::CompactUnicodeString<4>;
#endif

namespace numparse {
namespace impl {

struct AffixTokenMatcherSetupData {
    const CurrencySymbols& currencySymbols;
    const DecimalFormatSymbols& dfs;
    IgnorablesMatcher& ignorables;
    const Locale& locale;
    parse_flags_t parseFlags;
};


/**
 * Small helper class that generates matchers for individual tokens for AffixPatternMatcher.
 *
 * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a
 * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The
 * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from
 * the warehouse.
 *
 * @author sffc
 */
// Exported as U_I18N_API for tests
class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
  public:
    AffixTokenMatcherWarehouse() = default;  // WARNING: Leaves the object in an unusable state

    AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData);

    NumberParseMatcher& minusSign();

    NumberParseMatcher& plusSign();

    NumberParseMatcher& percent();

    NumberParseMatcher& permille();

    NumberParseMatcher& currency(UErrorCode& status);

    IgnorablesMatcher& ignorables();

    NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status);

    bool hasEmptyCurrencySymbol() const;

  private:
    // NOTE: The following field may be unsafe to access after construction is done!
    const AffixTokenMatcherSetupData* fSetupData;

    // NOTE: These are default-constructed and should not be used until initialized.
    MinusSignMatcher fMinusSign;
    PlusSignMatcher fPlusSign;
    PercentMatcher fPercent;
    PermilleMatcher fPermille;
    CombinedCurrencyMatcher fCurrency;

    // Use a child class for code point matchers, since it requires non-default operators.
    MemoryPool<CodePointMatcher> fCodePoints;

    friend class AffixPatternMatcherBuilder;
    friend class AffixPatternMatcher;
};


class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection {
  public:
    AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
                               IgnorablesMatcher* ignorables);

    void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override;

    /** NOTE: You can build only once! */
    AffixPatternMatcher build(UErrorCode& status);

  private:
    ArraySeriesMatcher::MatcherArray fMatchers;
    int32_t fMatchersLen;
    int32_t fLastTypeOrCp;

    const UnicodeString& fPattern;
    AffixTokenMatcherWarehouse& fWarehouse;
    IgnorablesMatcher* fIgnorables;

    void addMatcher(NumberParseMatcher& matcher) override;
};


// Exported as U_I18N_API for tests
class U_I18N_API AffixPatternMatcher : public ArraySeriesMatcher {
  public:
    AffixPatternMatcher() = default;  // WARNING: Leaves the object in an unusable state

    static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
                                                AffixTokenMatcherWarehouse& warehouse,
                                                parse_flags_t parseFlags, bool* success,
                                                UErrorCode& status);

    UnicodeString getPattern() const;

    bool operator==(const AffixPatternMatcher& other) const;

  private:
    CompactUnicodeString<4> fPattern;

    AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern,
                        UErrorCode& status);

    friend class AffixPatternMatcherBuilder;
};


class AffixMatcher : public NumberParseMatcher, public UMemory {
  public:
    AffixMatcher() = default;  // WARNING: Leaves the object in an unusable state

    AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags);

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

    void postProcess(ParsedNumber& result) const override;

    bool smokeTest(const StringSegment& segment) const override;

    int8_t compareTo(const AffixMatcher& rhs) const;

    UnicodeString toString() const override;

  private:
    AffixPatternMatcher* fPrefix;
    AffixPatternMatcher* fSuffix;
    result_flags_t fFlags;
};


/**
 * A C++-only class to retain ownership of the AffixMatchers needed for parsing.
 */
class AffixMatcherWarehouse {
  public:
    AffixMatcherWarehouse() = default;  // WARNING: Leaves the object in an unusable state

    AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse);

    void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output,
                             const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
                             UErrorCode& status);

  private:
    // 18 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix,
    // and doubled since there may be an empty currency symbol
    AffixMatcher fAffixMatchers[18];
    // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each,
    // and doubled since there may be an empty currency symbol
    AffixPatternMatcher fAffixPatternMatchers[12];
    // Reference to the warehouse for tokens used by the AffixPatternMatchers
    AffixTokenMatcherWarehouse* fTokenWarehouse;

    friend class AffixMatcher;

    static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables,
                              parse_flags_t parseFlags, UErrorCode& status);
};


} // namespace impl
} // namespace numparse
U_NAMESPACE_END

#endif //__NUMPARSE_AFFIXES_H__
#endif /* #if !UCONFIG_NO_FORMATTING */