summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/plurrule_impl.h
blob: 4de6d6460aaa190fe0fdeaa95438c4f4ae72e1e1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2007-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File PLURRULE_IMPL.H
*
*******************************************************************************
*/


#ifndef PLURRULE_IMPL
#define PLURRULE_IMPL

// Internal definitions for the PluralRules implementation.

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

#include "unicode/format.h"
#include "unicode/locid.h"
#include "unicode/parseerr.h"
#include "unicode/strenum.h"
#include "unicode/ures.h"
#include "uvector.h"
#include "hash.h"
#include "uassert.h"

/**
 * A FixedDecimal version of UPLRULES_NO_UNIQUE_VALUE used in PluralRulesTest
 * for parsing of samples.
 */
#define UPLRULES_NO_UNIQUE_VALUE_DECIMAL(ERROR_CODE) (DecimalQuantity::fromExponentString(u"-0.00123456777", ERROR_CODE))

class PluralRulesTest;

U_NAMESPACE_BEGIN

class AndConstraint;
class RuleChain;
class DigitInterval;
class PluralRules;
class VisibleDigits;

namespace pluralimpl {

// TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility.

static const char16_t DOT = ((char16_t) 0x002E);
static const char16_t SINGLE_QUOTE = ((char16_t) 0x0027);
static const char16_t SLASH = ((char16_t) 0x002F);
static const char16_t BACKSLASH = ((char16_t) 0x005C);
static const char16_t SPACE = ((char16_t) 0x0020);
static const char16_t EXCLAMATION = ((char16_t) 0x0021);
static const char16_t QUOTATION_MARK = ((char16_t) 0x0022);
static const char16_t NUMBER_SIGN = ((char16_t) 0x0023);
static const char16_t PERCENT_SIGN = ((char16_t) 0x0025);
static const char16_t ASTERISK = ((char16_t) 0x002A);
static const char16_t COMMA = ((char16_t) 0x002C);
static const char16_t HYPHEN = ((char16_t) 0x002D);
static const char16_t U_ZERO = ((char16_t) 0x0030);
static const char16_t U_ONE = ((char16_t) 0x0031);
static const char16_t U_TWO = ((char16_t) 0x0032);
static const char16_t U_THREE = ((char16_t) 0x0033);
static const char16_t U_FOUR = ((char16_t) 0x0034);
static const char16_t U_FIVE = ((char16_t) 0x0035);
static const char16_t U_SIX = ((char16_t) 0x0036);
static const char16_t U_SEVEN = ((char16_t) 0x0037);
static const char16_t U_EIGHT = ((char16_t) 0x0038);
static const char16_t U_NINE = ((char16_t) 0x0039);
static const char16_t COLON = ((char16_t) 0x003A);
static const char16_t SEMI_COLON = ((char16_t) 0x003B);
static const char16_t EQUALS = ((char16_t) 0x003D);
static const char16_t AT = ((char16_t) 0x0040);
static const char16_t CAP_A = ((char16_t) 0x0041);
static const char16_t CAP_B = ((char16_t) 0x0042);
static const char16_t CAP_R = ((char16_t) 0x0052);
static const char16_t CAP_Z = ((char16_t) 0x005A);
static const char16_t LOWLINE = ((char16_t) 0x005F);
static const char16_t LEFTBRACE = ((char16_t) 0x007B);
static const char16_t RIGHTBRACE = ((char16_t) 0x007D);
static const char16_t TILDE = ((char16_t) 0x007E);
static const char16_t ELLIPSIS = ((char16_t) 0x2026);

static const char16_t LOW_A = ((char16_t) 0x0061);
static const char16_t LOW_B = ((char16_t) 0x0062);
static const char16_t LOW_C = ((char16_t) 0x0063);
static const char16_t LOW_D = ((char16_t) 0x0064);
static const char16_t LOW_E = ((char16_t) 0x0065);
static const char16_t LOW_F = ((char16_t) 0x0066);
static const char16_t LOW_G = ((char16_t) 0x0067);
static const char16_t LOW_H = ((char16_t) 0x0068);
static const char16_t LOW_I = ((char16_t) 0x0069);
static const char16_t LOW_J = ((char16_t) 0x006a);
static const char16_t LOW_K = ((char16_t) 0x006B);
static const char16_t LOW_L = ((char16_t) 0x006C);
static const char16_t LOW_M = ((char16_t) 0x006D);
static const char16_t LOW_N = ((char16_t) 0x006E);
static const char16_t LOW_O = ((char16_t) 0x006F);
static const char16_t LOW_P = ((char16_t) 0x0070);
static const char16_t LOW_Q = ((char16_t) 0x0071);
static const char16_t LOW_R = ((char16_t) 0x0072);
static const char16_t LOW_S = ((char16_t) 0x0073);
static const char16_t LOW_T = ((char16_t) 0x0074);
static const char16_t LOW_U = ((char16_t) 0x0075);
static const char16_t LOW_V = ((char16_t) 0x0076);
static const char16_t LOW_W = ((char16_t) 0x0077);
static const char16_t LOW_Y = ((char16_t) 0x0079);
static const char16_t LOW_Z = ((char16_t) 0x007A);

}


static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff;

enum tokenType {
  none,
  tNumber,
  tComma,
  tSemiColon,
  tSpace,
  tColon,
  tAt,           // '@'
  tDot,
  tDot2,
  tEllipsis,
  tKeyword,
  tAnd,
  tOr,
  tMod,          // 'mod' or '%'
  tNot,          //  'not' only.
  tIn,           //  'in'  only.
  tEqual,        //  '='   only.
  tNotEqual,     //  '!='
  tTilde,
  tWithin,
  tIs,
  tVariableN,
  tVariableI,
  tVariableF,
  tVariableV,
  tVariableT,
  tVariableE,
  tVariableC,
  tDecimal,
  tInteger,
  tEOF
};


class PluralRuleParser: public UMemory {
public:
    PluralRuleParser();
    virtual ~PluralRuleParser();

    void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status);
    void getNextToken(UErrorCode &status);
    void checkSyntax(UErrorCode &status);
    static int32_t getNumberValue(const UnicodeString &token);

private:
    static tokenType getKeyType(const UnicodeString& token, tokenType type);
    static tokenType charType(char16_t ch);
    static UBool isValidKeyword(const UnicodeString& token);

    const UnicodeString  *ruleSrc;  // The rules string.
    int32_t        ruleIndex;       // String index in the input rules, the current parse position.
    UnicodeString  token;           // Token most recently scanned.
    tokenType      type;
    tokenType      prevType;

                                    // The items currently being parsed & built.
                                    // Note: currentChain may not be the last RuleChain in the
                                    //       list because the "other" chain is forced to the end.
    AndConstraint *curAndConstraint;
    RuleChain     *currentChain;

    int32_t        rangeLowIdx;     // Indices in the UVector of ranges of the
    int32_t        rangeHiIdx;      //    low and hi values currently being parsed.

    enum EParseState {
       kKeyword,
       kExpr,
       kValue,
       kRangeList,
       kSamples
    };
};

enum PluralOperand {
    /**
    * The double value of the entire number.
    */
    PLURAL_OPERAND_N,

    /**
     * The integer value, with the fraction digits truncated off.
     */
    PLURAL_OPERAND_I,

    /**
     * All visible fraction digits as an integer, including trailing zeros.
     */
    PLURAL_OPERAND_F,

    /**
     * Visible fraction digits as an integer, not including trailing zeros.
     */
    PLURAL_OPERAND_T,

    /**
     * Number of visible fraction digits.
     */
    PLURAL_OPERAND_V,

    /**
     * Number of visible fraction digits, not including trailing zeros.
     */
    PLURAL_OPERAND_W,

    /**
     * Suppressed exponent for scientific notation (exponent needed in
     * scientific notation to approximate i).
     */
    PLURAL_OPERAND_E,

    /**
     * This operand is currently treated as an alias for `PLURAL_OPERAND_E`.
     * In the future, it will represent:
     *
     * Suppressed exponent for compact notation (exponent needed in
     * compact notation to approximate i).
     */
    PLURAL_OPERAND_C,

    /**
     * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC.
     *
     * <p>Returns the integer value, but will fail if the number has fraction digits.
     * That is, using "j" instead of "i" is like implicitly adding "v is 0".
     *
     * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches
     * "3" but not "3.1" or "3.0".
     */
    PLURAL_OPERAND_J
};

/**
 * Converts from the tokenType enum to PluralOperand. Asserts that the given
 * tokenType can be mapped to a PluralOperand.
 */
PluralOperand tokenTypeToPluralOperand(tokenType tt);

/**
 * An interface to FixedDecimal, allowing for other implementations.
 * @internal
 */
class U_I18N_API IFixedDecimal {
  public:
    virtual ~IFixedDecimal();

    /**
     * Returns the value corresponding to the specified operand (n, i, f, t, v, or w).
     * If the operand is 'n', returns a double; otherwise, returns an integer.
     */
    virtual double getPluralOperand(PluralOperand operand) const = 0;

    virtual bool isNaN() const = 0;

    virtual bool isInfinite() const = 0;

    /** Whether the number has no nonzero fraction digits. */
    virtual bool hasIntegerValue() const = 0;
};

/**
 * class FixedDecimal serves to communicate the properties
 * of a formatted number from a decimal formatter to PluralRules::select()
 *
 * see DecimalFormat::getFixedDecimal()
 * @internal
 */
class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject {
  public:
    /**
      * @param n   the number, e.g. 12.345
      * @param v   The number of visible fraction digits, e.g. 3
      * @param f   The fraction digits, e.g. 345
      * @param e   The exponent, e.g. 7 in 1.2e7, for scientific notation
      * @param c   Currently: an alias for param `e`.
      */
    FixedDecimal(double  n, int32_t v, int64_t f, int32_t e, int32_t c);
    FixedDecimal(double  n, int32_t v, int64_t f, int32_t e);
    FixedDecimal(double  n, int32_t v, int64_t f);
    FixedDecimal(double n, int32_t);
    explicit FixedDecimal(double n);
    FixedDecimal();
    ~FixedDecimal() override;
    FixedDecimal(const UnicodeString &s, UErrorCode &ec);
    FixedDecimal(const FixedDecimal &other);

    static FixedDecimal createWithExponent(double n, int32_t v, int32_t e);

    double getPluralOperand(PluralOperand operand) const override;
    bool isNaN() const override;
    bool isInfinite() const override;
    bool hasIntegerValue() const override;

    bool isNanOrInfinity() const;  // used in decimfmtimpl.cpp

    int32_t getVisibleFractionDigitCount() const;

    void init(double n, int32_t v, int64_t f, int32_t e, int32_t c);
    void init(double n, int32_t v, int64_t f, int32_t e);
    void init(double n, int32_t v, int64_t f);
    void init(double n);
    UBool quickInit(double n);  // Try a fast-path only initialization,
                                //    return true if successful.
    void adjustForMinFractionDigits(int32_t min);
    static int64_t getFractionalDigits(double n, int32_t v);
    static int32_t decimals(double n);

    FixedDecimal& operator=(const FixedDecimal& other) = default;
    bool operator==(const FixedDecimal &other) const;

    UnicodeString toString() const;

    double doubleValue() const;
    int64_t longValue() const;

    double      source;
    int32_t     visibleDecimalDigitCount;
    int64_t     decimalDigits;
    int64_t     decimalDigitsWithoutTrailingZeros;
    int64_t     intValue;
    int32_t     exponent;
    UBool       _hasIntegerValue;
    UBool       isNegative;
    UBool       _isNaN;
    UBool       _isInfinite;
};

class AndConstraint : public UMemory  {
public:
    typedef enum RuleOp {
        NONE,
        MOD
    } RuleOp;
    RuleOp op = AndConstraint::NONE;
    int32_t opNum = -1;             // for mod expressions, the right operand of the mod.
    int32_t value = -1;             // valid for 'is' rules only.
    UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise.
    UBool negated = false;          // true for negated rules.
    UBool integerOnly = false;      // true for 'within' rules.
    tokenType digitsType = none;    // n | i | v | f constraint.
    AndConstraint *next = nullptr;
    // Internal error status, used for errors that occur during the copy constructor.
    UErrorCode fInternalStatus = U_ZERO_ERROR;    

    AndConstraint() = default;
    AndConstraint(const AndConstraint& other);
    virtual ~AndConstraint();
    AndConstraint* add(UErrorCode& status);
    // UBool isFulfilled(double number);
    UBool isFulfilled(const IFixedDecimal &number);
};

class OrConstraint : public UMemory  {
public:
    AndConstraint *childNode = nullptr;
    OrConstraint *next = nullptr;
    // Internal error status, used for errors that occur during the copy constructor.
    UErrorCode fInternalStatus = U_ZERO_ERROR;

    OrConstraint() = default;
    OrConstraint(const OrConstraint& other);
    virtual ~OrConstraint();
    AndConstraint* add(UErrorCode& status);
    // UBool isFulfilled(double number);
    UBool isFulfilled(const IFixedDecimal &number);
};

class RuleChain : public UMemory  {
public:
    UnicodeString   fKeyword;
    RuleChain      *fNext = nullptr;
    OrConstraint   *ruleHeader = nullptr;
    UnicodeString   fDecimalSamples;  // Samples strings from rule source
    UnicodeString   fIntegerSamples;  //   without @decimal or @integer, otherwise unprocessed.
    UBool           fDecimalSamplesUnbounded = false;
    UBool           fIntegerSamplesUnbounded = false;
    // Internal error status, used for errors that occur during the copy constructor.
    UErrorCode      fInternalStatus = U_ZERO_ERROR;

    RuleChain() = default;
    RuleChain(const RuleChain& other);
    virtual ~RuleChain();

    UnicodeString select(const IFixedDecimal &number) const;
    void          dumpRules(UnicodeString& result);
    UErrorCode    getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const;
    UBool         isKeyword(const UnicodeString& keyword) const;
};

class PluralKeywordEnumeration : public StringEnumeration {
public:
    PluralKeywordEnumeration(RuleChain *header, UErrorCode& status);
    virtual ~PluralKeywordEnumeration();
    static UClassID U_EXPORT2 getStaticClassID();
    virtual UClassID getDynamicClassID() const override;
    virtual const UnicodeString* snext(UErrorCode& status) override;
    virtual void reset(UErrorCode& status) override;
    virtual int32_t count(UErrorCode& status) const override;
private:
    int32_t         pos;
    UVector         fKeywordNames;
};


class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration {
  public:
    PluralAvailableLocalesEnumeration(UErrorCode &status);
    virtual ~PluralAvailableLocalesEnumeration();
    virtual const char* next(int32_t *resultLength, UErrorCode& status) override;
    virtual void reset(UErrorCode& status) override;
    virtual int32_t count(UErrorCode& status) const override;
  private:
    UErrorCode      fOpenStatus;
    UResourceBundle *fLocales = nullptr;
    UResourceBundle *fRes = nullptr;
};

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_FORMATTING */

#endif // _PLURRULE_IMPL
//eof