summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/numparse_scientific.cpp
blob: 4b88cd998fee09a8d10680144db204bc5effe46f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT

#include "numparse_types.h"
#include "numparse_scientific.h"
#include "static_unicode_sets.h"
#include "string_segment.h"

using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;


namespace {

inline const UnicodeSet& minusSignSet() {
    return *unisets::get(unisets::MINUS_SIGN);
}

inline const UnicodeSet& plusSignSet() {
    return *unisets::get(unisets::PLUS_SIGN);
}

} // namespace


ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
        : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
          fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
          fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {

    const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
    if (minusSignSet().contains(minusSign)) {
        fCustomMinusSign.setToBogus();
    } else {
        fCustomMinusSign = minusSign;
    }

    const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
    if (plusSignSet().contains(plusSign)) {
        fCustomPlusSign.setToBogus();
    } else {
        fCustomPlusSign = plusSign;
    }
}

bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
    // Only accept scientific notation after the mantissa.
    if (!result.seenNumber()) {
        return false;
    }

    // Only accept one exponent per string.
    if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
        return false;
    }

    // First match the scientific separator, and then match another number after it.
    // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
    int32_t initialOffset = segment.getOffset();
    int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
    if (overlap == fExponentSeparatorString.length()) {
        // Full exponent separator match.

        // First attempt to get a code point, returning true if we can't get one.
        if (segment.length() == overlap) {
            return true;
        }
        segment.adjustOffset(overlap);

        // Allow ignorables before the sign.
        // Note: call site is guarded by the segment.length() check above.
        // Note: the ignorables matcher should not touch the result.
        fIgnorablesMatcher.match(segment, result, status);
        if (segment.length() == 0) {
            segment.setOffset(initialOffset);
            return true;
        }

        // Allow a sign, and then try to match digits.
        int8_t exponentSign = 1;
        if (segment.startsWith(minusSignSet())) {
            exponentSign = -1;
            segment.adjustOffsetByCodePoint();
        } else if (segment.startsWith(plusSignSet())) {
            segment.adjustOffsetByCodePoint();
        } else if (segment.startsWith(fCustomMinusSign)) {
            overlap = segment.getCommonPrefixLength(fCustomMinusSign);
            if (overlap != fCustomMinusSign.length()) {
                // Partial custom sign match
                segment.setOffset(initialOffset);
                return true;
            }
            exponentSign = -1;
            segment.adjustOffset(overlap);
        } else if (segment.startsWith(fCustomPlusSign)) {
            overlap = segment.getCommonPrefixLength(fCustomPlusSign);
            if (overlap != fCustomPlusSign.length()) {
                // Partial custom sign match
                segment.setOffset(initialOffset);
                return true;
            }
            segment.adjustOffset(overlap);
        }

        // Return true if the segment is empty.
        if (segment.length() == 0) {
            segment.setOffset(initialOffset);
            return true;
        }

        // Allow ignorables after the sign.
        // Note: call site is guarded by the segment.length() check above.
        // Note: the ignorables matcher should not touch the result.
        fIgnorablesMatcher.match(segment, result, status);
        if (segment.length() == 0) {
            segment.setOffset(initialOffset);
            return true;
        }

        // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
        bool wasBogus = result.quantity.bogus;
        result.quantity.bogus = false;
        int digitsOffset = segment.getOffset();
        bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
        result.quantity.bogus = wasBogus;

        if (segment.getOffset() != digitsOffset) {
            // At least one exponent digit was matched.
            result.flags |= FLAG_HAS_EXPONENT;
        } else {
            // No exponent digits were matched
            segment.setOffset(initialOffset);
        }
        return digitsReturnValue;

    } else if (overlap == segment.length()) {
        // Partial exponent separator match
        return true;
    }

    // No match
    return false;
}

bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
    return segment.startsWith(fExponentSeparatorString);
}

UnicodeString ScientificMatcher::toString() const {
    return u"<Scientific>";
}


#endif /* #if !UCONFIG_NO_FORMATTING */