summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/numparse_scientific.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/i18n/numparse_scientific.cpp
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/i18n/numparse_scientific.cpp')
-rw-r--r--intl/icu/source/i18n/numparse_scientific.cpp163
1 files changed, 163 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/numparse_scientific.cpp b/intl/icu/source/i18n/numparse_scientific.cpp
new file mode 100644
index 0000000000..4b88cd998f
--- /dev/null
+++ b/intl/icu/source/i18n/numparse_scientific.cpp
@@ -0,0 +1,163 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include "numparse_types.h"
+#include "numparse_scientific.h"
+#include "static_unicode_sets.h"
+#include "string_segment.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+namespace {
+
+inline const UnicodeSet& minusSignSet() {
+ return *unisets::get(unisets::MINUS_SIGN);
+}
+
+inline const UnicodeSet& plusSignSet() {
+ return *unisets::get(unisets::PLUS_SIGN);
+}
+
+} // namespace
+
+
+ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
+ : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
+ fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
+ fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
+
+ const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
+ if (minusSignSet().contains(minusSign)) {
+ fCustomMinusSign.setToBogus();
+ } else {
+ fCustomMinusSign = minusSign;
+ }
+
+ const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
+ if (plusSignSet().contains(plusSign)) {
+ fCustomPlusSign.setToBogus();
+ } else {
+ fCustomPlusSign = plusSign;
+ }
+}
+
+bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+ // Only accept scientific notation after the mantissa.
+ if (!result.seenNumber()) {
+ return false;
+ }
+
+ // Only accept one exponent per string.
+ if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
+ return false;
+ }
+
+ // First match the scientific separator, and then match another number after it.
+ // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
+ int32_t initialOffset = segment.getOffset();
+ int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
+ if (overlap == fExponentSeparatorString.length()) {
+ // Full exponent separator match.
+
+ // First attempt to get a code point, returning true if we can't get one.
+ if (segment.length() == overlap) {
+ return true;
+ }
+ segment.adjustOffset(overlap);
+
+ // Allow ignorables before the sign.
+ // Note: call site is guarded by the segment.length() check above.
+ // Note: the ignorables matcher should not touch the result.
+ fIgnorablesMatcher.match(segment, result, status);
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
+ }
+
+ // Allow a sign, and then try to match digits.
+ int8_t exponentSign = 1;
+ if (segment.startsWith(minusSignSet())) {
+ exponentSign = -1;
+ segment.adjustOffsetByCodePoint();
+ } else if (segment.startsWith(plusSignSet())) {
+ segment.adjustOffsetByCodePoint();
+ } else if (segment.startsWith(fCustomMinusSign)) {
+ overlap = segment.getCommonPrefixLength(fCustomMinusSign);
+ if (overlap != fCustomMinusSign.length()) {
+ // Partial custom sign match
+ segment.setOffset(initialOffset);
+ return true;
+ }
+ exponentSign = -1;
+ segment.adjustOffset(overlap);
+ } else if (segment.startsWith(fCustomPlusSign)) {
+ overlap = segment.getCommonPrefixLength(fCustomPlusSign);
+ if (overlap != fCustomPlusSign.length()) {
+ // Partial custom sign match
+ segment.setOffset(initialOffset);
+ return true;
+ }
+ segment.adjustOffset(overlap);
+ }
+
+ // Return true if the segment is empty.
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
+ }
+
+ // Allow ignorables after the sign.
+ // Note: call site is guarded by the segment.length() check above.
+ // Note: the ignorables matcher should not touch the result.
+ fIgnorablesMatcher.match(segment, result, status);
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
+ }
+
+ // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
+ bool wasBogus = result.quantity.bogus;
+ result.quantity.bogus = false;
+ int digitsOffset = segment.getOffset();
+ bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
+ result.quantity.bogus = wasBogus;
+
+ if (segment.getOffset() != digitsOffset) {
+ // At least one exponent digit was matched.
+ result.flags |= FLAG_HAS_EXPONENT;
+ } else {
+ // No exponent digits were matched
+ segment.setOffset(initialOffset);
+ }
+ return digitsReturnValue;
+
+ } else if (overlap == segment.length()) {
+ // Partial exponent separator match
+ return true;
+ }
+
+ // No match
+ return false;
+}
+
+bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
+ return segment.startsWith(fExponentSeparatorString);
+}
+
+UnicodeString ScientificMatcher::toString() const {
+ return u"<Scientific>";
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */