1 files changed, 163 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/numparse_scientific.cpp b/intl/icu/source/i18n/numparse_scientific.cpp
new file mode 100644
index 0000000000..4b88cd998f
--- /dev/null
+++ b/intl/icu/source/i18n/numparse_scientific.cpp
@@ -0,0 +1,163 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include "numparse_types.h"
+#include "numparse_scientific.h"
+#include "static_unicode_sets.h"
+#include "string_segment.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+namespace {
+
+inline const UnicodeSet& minusSignSet() {
+    return *unisets::get(unisets::MINUS_SIGN);
+}
+
+inline const UnicodeSet& plusSignSet() {
+    return *unisets::get(unisets::PLUS_SIGN);
+}
+
+} // namespace
+
+
+ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
+        : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
+          fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
+          fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
+
+    const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
+    if (minusSignSet().contains(minusSign)) {
+        fCustomMinusSign.setToBogus();
+    } else {
+        fCustomMinusSign = minusSign;
+    }
+
+    const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
+    if (plusSignSet().contains(plusSign)) {
+        fCustomPlusSign.setToBogus();
+    } else {
+        fCustomPlusSign = plusSign;
+    }
+}
+
+bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+    // Only accept scientific notation after the mantissa.
+    if (!result.seenNumber()) {
+        return false;
+    }
+
+    // Only accept one exponent per string.
+    if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
+        return false;
+    }
+
+    // First match the scientific separator, and then match another number after it.
+    // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
+    int32_t initialOffset = segment.getOffset();
+    int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
+    if (overlap == fExponentSeparatorString.length()) {
+        // Full exponent separator match.
+
+        // First attempt to get a code point, returning true if we can't get one.
+        if (segment.length() == overlap) {
+            return true;
+        }
+        segment.adjustOffset(overlap);
+
+        // Allow ignorables before the sign.
+        // Note: call site is guarded by the segment.length() check above.
+        // Note: the ignorables matcher should not touch the result.
+        fIgnorablesMatcher.match(segment, result, status);
+        if (segment.length() == 0) {
+            segment.setOffset(initialOffset);
+            return true;
+        }
+
+        // Allow a sign, and then try to match digits.
+        int8_t exponentSign = 1;
+        if (segment.startsWith(minusSignSet())) {
+            exponentSign = -1;
+            segment.adjustOffsetByCodePoint();
+        } else if (segment.startsWith(plusSignSet())) {
+            segment.adjustOffsetByCodePoint();
+        } else if (segment.startsWith(fCustomMinusSign)) {
+            overlap = segment.getCommonPrefixLength(fCustomMinusSign);
+            if (overlap != fCustomMinusSign.length()) {
+                // Partial custom sign match
+                segment.setOffset(initialOffset);
+                return true;
+            }
+            exponentSign = -1;
+            segment.adjustOffset(overlap);
+        } else if (segment.startsWith(fCustomPlusSign)) {
+            overlap = segment.getCommonPrefixLength(fCustomPlusSign);
+            if (overlap != fCustomPlusSign.length()) {
+                // Partial custom sign match
+                segment.setOffset(initialOffset);
+                return true;
+            }
+            segment.adjustOffset(overlap);
+        }
+
+        // Return true if the segment is empty.
+        if (segment.length() == 0) {
+            segment.setOffset(initialOffset);
+            return true;
+        }
+
+        // Allow ignorables after the sign.
+        // Note: call site is guarded by the segment.length() check above.
+        // Note: the ignorables matcher should not touch the result.
+        fIgnorablesMatcher.match(segment, result, status);
+        if (segment.length() == 0) {
+            segment.setOffset(initialOffset);
+            return true;
+        }
+
+        // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
+        bool wasBogus = result.quantity.bogus;
+        result.quantity.bogus = false;
+        int digitsOffset = segment.getOffset();
+        bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
+        result.quantity.bogus = wasBogus;
+
+        if (segment.getOffset() != digitsOffset) {
+            // At least one exponent digit was matched.
+            result.flags |= FLAG_HAS_EXPONENT;
+        } else {
+            // No exponent digits were matched
+            segment.setOffset(initialOffset);
+        }
+        return digitsReturnValue;
+
+    } else if (overlap == segment.length()) {
+        // Partial exponent separator match
+        return true;
+    }
+
+    // No match
+    return false;
+}
+
+bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
+    return segment.startsWith(fExponentSeparatorString);
+}
+
+UnicodeString ScientificMatcher::toString() const {
+    return u"<Scientific>";
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */