From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- intl/icu/source/i18n/string_segment.cpp | 145 ++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 intl/icu/source/i18n/string_segment.cpp (limited to 'intl/icu/source/i18n/string_segment.cpp') diff --git a/intl/icu/source/i18n/string_segment.cpp b/intl/icu/source/i18n/string_segment.cpp new file mode 100644 index 0000000000..2ddb738f4d --- /dev/null +++ b/intl/icu/source/i18n/string_segment.cpp @@ -0,0 +1,145 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "numparse_types.h" +#include "string_segment.h" +#include "putilimp.h" +#include "unicode/utf16.h" +#include "unicode/uniset.h" + +U_NAMESPACE_BEGIN + + +StringSegment::StringSegment(const UnicodeString& str, bool ignoreCase) + : fStr(str), fStart(0), fEnd(str.length()), + fFoldCase(ignoreCase) {} + +int32_t StringSegment::getOffset() const { + return fStart; +} + +void StringSegment::setOffset(int32_t start) { + fStart = start; +} + +void StringSegment::adjustOffset(int32_t delta) { + fStart += delta; +} + +void StringSegment::adjustOffsetByCodePoint() { + fStart += U16_LENGTH(getCodePoint()); +} + +void StringSegment::setLength(int32_t length) { + fEnd = fStart + length; +} + +void StringSegment::resetLength() { + fEnd = fStr.length(); +} + +int32_t StringSegment::length() const { + return fEnd - fStart; +} + +char16_t StringSegment::charAt(int32_t index) const { + return fStr.charAt(index + fStart); +} + +UChar32 StringSegment::codePointAt(int32_t index) const { + return fStr.char32At(index + fStart); +} + +UnicodeString StringSegment::toUnicodeString() const { + return UnicodeString(fStr.getBuffer() + fStart, fEnd - fStart); +} + +const UnicodeString StringSegment::toTempUnicodeString() const { + // Use the readonly-aliasing constructor for efficiency. + return UnicodeString(false, fStr.getBuffer() + fStart, fEnd - fStart); +} + +UChar32 StringSegment::getCodePoint() const { + char16_t lead = fStr.charAt(fStart); + if (U16_IS_LEAD(lead) && fStart + 1 < fEnd) { + return fStr.char32At(fStart); + } else if (U16_IS_SURROGATE(lead)) { + return -1; + } else { + return lead; + } +} + +bool StringSegment::startsWith(UChar32 otherCp) const { + return codePointsEqual(getCodePoint(), otherCp, fFoldCase); +} + +bool StringSegment::startsWith(const UnicodeSet& uniset) const { + // TODO: Move UnicodeSet case-folding logic here. + // TODO: Handle string matches here instead of separately. + UChar32 cp = getCodePoint(); + if (cp == -1) { + return false; + } + return uniset.contains(cp); +} + +bool StringSegment::startsWith(const UnicodeString& other) const { + if (other.isBogus() || other.length() == 0 || length() == 0) { + return false; + } + int cp1 = getCodePoint(); + int cp2 = other.char32At(0); + return codePointsEqual(cp1, cp2, fFoldCase); +} + +int32_t StringSegment::getCommonPrefixLength(const UnicodeString& other) { + return getPrefixLengthInternal(other, fFoldCase); +} + +int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other) { + return getPrefixLengthInternal(other, false); +} + +int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) { + U_ASSERT(other.length() > 0); + int32_t offset = 0; + for (; offset < uprv_min(length(), other.length());) { + // TODO: case-fold code points, not chars + char16_t c1 = charAt(offset); + char16_t c2 = other.charAt(offset); + if (!codePointsEqual(c1, c2, foldCase)) { + break; + } + offset++; + } + return offset; +} + +bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) { + if (cp1 == cp2) { + return true; + } + if (!foldCase) { + return false; + } + cp1 = u_foldCase(cp1, true); + cp2 = u_foldCase(cp2, true); + return cp1 == cp2; +} + +bool StringSegment::operator==(const UnicodeString& other) const { + return toTempUnicodeString() == other; +} + + +U_NAMESPACE_END +#endif /* #if !UCONFIG_NO_FORMATTING */ -- cgit v1.2.3