/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace comphelper::string { namespace { template T tmpl_stripStart(const T &rIn, const C cRemove) { if (rIn.empty()) return rIn; typename T::size_type i = 0; while (i < rIn.size()) { if (rIn[i] != cRemove) break; ++i; } return rIn.substr(i); } template T tmpl_stripStartString(const T &rIn, const C cRemove) { if (rIn.isEmpty()) return rIn; sal_Int32 i = 0; while (i < rIn.getLength()) { if (rIn[i] != cRemove) break; ++i; } return rIn.copy(i); } } OString stripStart(const OString& rIn, char c) { return tmpl_stripStartString(rIn, c); } std::string_view stripStart(std::string_view rIn, char c) { return tmpl_stripStart(rIn, c); } OUString stripStart(const OUString& rIn, sal_Unicode c) { return tmpl_stripStartString(rIn, c); } std::u16string_view stripStart(std::u16string_view rIn, sal_Unicode c) { return tmpl_stripStart(rIn, c); } namespace { template T tmpl_stripEnd(const T &rIn, const C cRemove) { if (rIn.empty()) return rIn; typename T::size_type i = rIn.size(); while (i > 0) { if (rIn[i-1] != cRemove) break; --i; } return rIn.substr(0, i); } template T tmpl_stripEndString(const T &rIn, const C cRemove) { if (rIn.isEmpty()) return rIn; sal_Int32 i = rIn.getLength(); while (i > 0) { if (rIn[i-1] != cRemove) break; --i; } return rIn.copy(0, i); } } OString stripEnd(const OString& rIn, char c) { return tmpl_stripEndString(rIn, c); } std::string_view stripEnd(std::string_view rIn, char c) { return tmpl_stripEnd(rIn, c); } OUString stripEnd(const OUString& rIn, sal_Unicode c) { return tmpl_stripEndString(rIn, c); } std::u16string_view stripEnd(std::u16string_view rIn, sal_Unicode c) { return tmpl_stripEnd(rIn, c); } namespace { template T tmpl_strip(const T &rIn, const C cRemove) { if (rIn.empty()) return rIn; typename T::size_type end = rIn.size(); while (end > 0) { if (rIn[end-1] != cRemove) break; --end; } typename T::size_type start = 0; while (start < end) { if (rIn[start] != cRemove) break; ++start; } return rIn.substr(start, end - start); } template T tmpl_stripString(const T &rIn, const C cRemove) { if (rIn.isEmpty()) return rIn; sal_Int32 end = rIn.getLength(); while (end > 0) { if (rIn[end-1] != cRemove) break; --end; } sal_Int32 start = 0; while (start < end) { if (rIn[start] != cRemove) break; ++start; } return rIn.copy(start, end - start); } } OString strip(const OString& rIn, char c) { return tmpl_stripString(rIn, c); } std::string_view strip(std::string_view rIn, char c) { return tmpl_strip(rIn, c); } OUString strip(const OUString& rIn, sal_Unicode c) { return tmpl_stripString(rIn, c); } std::u16string_view strip(std::u16string_view rIn, sal_Unicode c) { return tmpl_strip(rIn, c); } namespace { template sal_Int32 tmpl_getTokenCount( T rIn, C cTok) { // Empty String: TokenCount by Definition is 0 if (rIn.empty()) return 0; sal_Int32 nTokCount = 1; for (typename T::size_type i = 0; i < rIn.size(); ++i) { if (rIn[i] == cTok) ++nTokCount; } return nTokCount; } } sal_Int32 getTokenCount(std::string_view rIn, char cTok) { return tmpl_getTokenCount(rIn, cTok); } sal_Int32 getTokenCount(std::u16string_view rIn, sal_Unicode cTok) { return tmpl_getTokenCount(rIn, cTok); } sal_uInt32 decimalStringToNumber(std::u16string_view str) { sal_uInt32 result = 0; for( sal_Int32 i = 0; i < static_cast(str.size()); ) { sal_uInt32 c = o3tl::iterateCodePoints(str, &i); sal_uInt32 value = 0; if( c <= 0x0039) // ASCII decimal digits, most common value = c - 0x0030; else if( c >= 0x1D7F6 ) // mathematical monospace digits value = c - 0x1D7F6; else if( c >= 0x1D7EC ) // mathematical sans-serif bold digits value = c - 0x1D7EC; else if( c >= 0x1D7E2 ) // mathematical sans-serif digits value = c - 0x1D7E2; else if( c >= 0x1D7D8 ) // mathematical double-struck digits value = c - 0x1D7D8; else if( c >= 0x1D7CE ) // mathematical bold digits value = c - 0x1D7CE; else if( c >= 0x11066 ) // brahmi digits value = c - 0x11066; else if( c >= 0x104A0 ) // osmanya digits value = c - 0x104A0; else if( c >= 0xFF10 ) // fullwidth digits value = c - 0xFF10; else if( c >= 0xABF0 ) // meetei mayek digits value = c - 0xABF0; else if( c >= 0xAA50 ) // cham digits value = c - 0xAA50; else if( c >= 0xA9D0 ) // javanese digits value = c - 0xA9D0; else if( c >= 0xA900 ) // kayah li digits value = c - 0xA900; else if( c >= 0xA8D0 ) // saurashtra digits value = c - 0xA8D0; else if( c >= 0xA620 ) // vai digits value = c - 0xA620; else if( c >= 0x1C50 ) // ol chiki digits value = c - 0x1C50; else if( c >= 0x1C40 ) // lepcha digits value = c - 0x1C40; else if( c >= 0x1BB0 ) // sundanese digits value = c - 0x1BB0; else if( c >= 0x1B50 ) // balinese digits value = c - 0x1B50; else if( c >= 0x1A90 ) // tai tham tham digits value = c - 0x1A90; else if( c >= 0x1A80 ) // tai tham hora digits value = c - 0x1A80; else if( c >= 0x19D0 ) // new tai lue digits value = c - 0x19D0; else if( c >= 0x1946 ) // limbu digits value = c - 0x1946; else if( c >= 0x1810 ) // mongolian digits value = c - 0x1810; else if( c >= 0x17E0 ) // khmer digits value = c - 0x17E0; else if( c >= 0x1090 ) // myanmar shan digits value = c - 0x1090; else if( c >= 0x1040 ) // myanmar digits value = c - 0x1040; else if( c >= 0x0F20 ) // tibetan digits value = c - 0x0F20; else if( c >= 0x0ED0 ) // lao digits value = c - 0x0ED0; else if( c >= 0x0E50 ) // thai digits value = c - 0x0E50; else if( c >= 0x0D66 ) // malayalam digits value = c - 0x0D66; else if( c >= 0x0CE6 ) // kannada digits value = c - 0x0CE6; else if( c >= 0x0C66 ) // telugu digits value = c - 0x0C66; else if( c >= 0x0BE6 ) // tamil digits value = c - 0x0BE6; else if( c >= 0x0B66 ) // odia digits value = c - 0x0B66; else if( c >= 0x0AE6 ) // gujarati digits value = c - 0x0AE6; else if( c >= 0x0A66 ) // gurmukhi digits value = c - 0x0A66; else if( c >= 0x09E6 ) // bengali digits value = c - 0x09E6; else if( c >= 0x0966 ) // devanagari digit value = c - 0x0966; else if( c >= 0x07C0 ) // nko digits value = c - 0x07C0; else if( c >= 0x06F0 ) // extended arabic-indic digits value = c - 0x06F0; else if( c >= 0x0660 ) // arabic-indic digits value = c - 0x0660; result = result * 10 + value; } return result; } using namespace ::com::sun::star; // convert between sequence of string and comma separated string OUString convertCommaSeparated( uno::Sequence< OUString > const& i_rSeq) { OUStringBuffer buf; ::comphelper::intersperse( i_rSeq.begin(), i_rSeq.end(), ::comphelper::OUStringBufferAppender(buf), OUString( ", " )); return buf.makeStringAndClear(); } std::vector split(std::u16string_view rStr, sal_Unicode cSeparator) { std::vector< OUString > vec; std::size_t idx = 0; do { std::u16string_view kw = o3tl::getToken(rStr, cSeparator, idx); kw = o3tl::trim(kw); if (!kw.empty()) { vec.push_back(OUString(kw)); } } while (idx != std::u16string_view::npos); return vec; } uno::Sequence< OUString > convertCommaSeparated( std::u16string_view i_rString ) { std::vector< OUString > vec = split(i_rString, ','); return comphelper::containerToSequence(vec); } OString join(std::string_view rSeparator, const std::vector& rSequence) { OStringBuffer aBuffer; for (size_t i = 0; i < rSequence.size(); ++i) { if (i != 0) aBuffer.append(rSeparator); aBuffer.append(rSequence[i]); } return aBuffer.makeStringAndClear(); } sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS, const uno::Reference< i18n::XCollator > &rCollator, const uno::Reference< i18n::XBreakIterator > &rBI, const lang::Locale &rLocale ) { sal_Int32 nRet = 0; sal_Int32 nLHSLastNonDigitPos = 0; sal_Int32 nRHSLastNonDigitPos = 0; sal_Int32 nLHSFirstDigitPos = 0; sal_Int32 nRHSFirstDigitPos = 0; // Check if the string starts with a digit sal_Int32 nStartsDigitLHS = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); sal_Int32 nStartsDigitRHS = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); if (nStartsDigitLHS > 0 && nStartsDigitRHS > 0) { sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.subView(0, nStartsDigitLHS)); sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.subView(0, nStartsDigitRHS)); if (nLHS != nRHS) return nLHS < nRHS ? -1 : 1; nLHSLastNonDigitPos = nStartsDigitLHS; nRHSLastNonDigitPos = nStartsDigitRHS; } else if (nStartsDigitLHS > 0) return -1; else if (nStartsDigitRHS > 0) return 1; while (nLHSFirstDigitPos < rLHS.getLength() || nRHSFirstDigitPos < rRHS.getLength()) { sal_Int32 nLHSChunkLen; sal_Int32 nRHSChunkLen; //Compare non digit block as normal strings nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); if (nLHSFirstDigitPos == -1) nLHSFirstDigitPos = rLHS.getLength(); if (nRHSFirstDigitPos == -1) nRHSFirstDigitPos = rRHS.getLength(); nLHSChunkLen = nLHSFirstDigitPos - nLHSLastNonDigitPos; nRHSChunkLen = nRHSFirstDigitPos - nRHSLastNonDigitPos; nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos, nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen); if (nRet != 0) break; //Compare digit block as one number vs another nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); if (nLHSLastNonDigitPos == -1) nLHSLastNonDigitPos = rLHS.getLength(); if (nRHSLastNonDigitPos == -1) nRHSLastNonDigitPos = rRHS.getLength(); nLHSChunkLen = nLHSLastNonDigitPos - nLHSFirstDigitPos; nRHSChunkLen = nRHSLastNonDigitPos - nRHSFirstDigitPos; //To-Do: Possibly scale down those unicode codepoints that relate to //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in //vcl sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.subView(nLHSFirstDigitPos, nLHSChunkLen)); sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.subView(nRHSFirstDigitPos, nRHSChunkLen)); if (nLHS != nRHS) { nRet = (nLHS < nRHS) ? -1 : 1; break; } } return nRet; } NaturalStringSorter::NaturalStringSorter( const uno::Reference< uno::XComponentContext > &rContext, lang::Locale aLocale) : m_aLocale(std::move(aLocale)) { m_xCollator = i18n::Collator::create( rContext ); m_xCollator->loadDefaultCollator(m_aLocale, 0); m_xBI = i18n::BreakIterator::create( rContext ); } bool isdigitAsciiString(std::string_view rString) { return std::all_of( rString.data(), rString.data() + rString.size(), [](unsigned char c){ return rtl::isAsciiDigit(c); }); } bool isdigitAsciiString(std::u16string_view rString) { return std::all_of( rString.data(), rString.data() + rString.size(), [](sal_Unicode c){ return rtl::isAsciiDigit(c); }); } OUString reverseString(std::u16string_view rStr) { if (rStr.empty()) return OUString(); std::size_t i = rStr.size(); OUStringBuffer sBuf(static_cast(i)); while (i) sBuf.append(rStr[--i]); return sBuf.makeStringAndClear(); } OUString reverseCodePoints(OUString const & str) { auto const len = str.getLength(); OUStringBuffer buf(len); for (auto i = len; i != 0;) { buf.appendUtf32(str.iterateCodePoints(&i, -1)); } return buf.makeStringAndClear(); } sal_Int32 indexOfAny(std::u16string_view rIn, sal_Unicode const*const pChars, sal_Int32 const nPos) { for (std::u16string_view::size_type i = nPos; i < rIn.size(); ++i) { sal_Unicode const c = rIn[i]; for (sal_Unicode const* pChar = pChars; *pChar; ++pChar) { if (c == *pChar) { return i; } } } return -1; } OUString removeAny(std::u16string_view rIn, sal_Unicode const*const pChars) { OUStringBuffer buf; bool isFound(false); for (std::u16string_view::size_type i = 0; i < rIn.size(); ++i) { sal_Unicode const c = rIn[i]; bool removeC(false); for (sal_Unicode const* pChar = pChars; *pChar; ++pChar) { if (c == *pChar) { removeC = true; break; } } if (removeC) { if (!isFound) { if (i > 0) { buf.append(rIn.substr(0, i)); } isFound = true; } } else if (isFound) { buf.append(c); } } return isFound ? buf.makeStringAndClear() : OUString(rIn); } OUString setToken(const OUString& rIn, sal_Int32 nToken, sal_Unicode cTok, std::u16string_view rNewToken) { sal_Int32 nLen = rIn.getLength(); sal_Int32 nTok = 0; sal_Int32 nFirstChar = 0; sal_Int32 i = 0; // Determine token position and length while ( i < nLen ) { // Increase token count if match if (rIn[i] == cTok) { ++nTok; if (nTok == nToken) nFirstChar = i+1; else if (nTok > nToken) break; } ++i; } if (nTok >= nToken) return rIn.replaceAt(nFirstChar, i-nFirstChar, rNewToken); return rIn; } /** Similar to OUString::replaceAt, but for an OUStringBuffer. Replace n = count characters from position index in this string with newStr. */ void replaceAt(OUStringBuffer& rIn, sal_Int32 nIndex, sal_Int32 nCount, std::u16string_view newStr ) { assert(nIndex >= 0 && nIndex <= rIn.getLength()); assert(nCount >= 0); assert(nCount <= rIn.getLength() - nIndex); /* Append? */ const sal_Int32 nOldLength = rIn.getLength(); if ( nIndex == nOldLength ) { rIn.append(newStr); return; } sal_Int32 nNewLength = nOldLength + newStr.size() - nCount; if (newStr.size() > o3tl::make_unsigned(nCount)) rIn.ensureCapacity(nOldLength + newStr.size() - nCount); sal_Unicode* pStr = const_cast(rIn.getStr()); memmove(pStr + nIndex + newStr.size(), pStr + nIndex + nCount, nOldLength - nIndex + nCount); memcpy(pStr + nIndex, newStr.data(), newStr.size()); rIn.setLength(nNewLength); } OUString sanitizeStringSurrogates(const OUString& rString) { sal_Int32 i=0; while (i < rString.getLength()) { sal_Unicode c = rString[i]; if (rtl::isHighSurrogate(c)) { if (i+1 == rString.getLength() || !rtl::isLowSurrogate(rString[i+1])) { SAL_WARN("comphelper", "Surrogate error: high without low"); return rString.copy(0, i); } ++i; //skip correct low } if (rtl::isLowSurrogate(c)) //bare low without preceding high { SAL_WARN("comphelper", "Surrogate error: low without high"); return rString.copy(0, i); } ++i; } return rString; } } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */