summaryrefslogtreecommitdiffstats
path: root/unotools/source/i18n/textsearch.cxx
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--unotools/source/i18n/textsearch.cxx398
1 files changed, 398 insertions, 0 deletions
diff --git a/unotools/source/i18n/textsearch.cxx b/unotools/source/i18n/textsearch.cxx
new file mode 100644
index 000000000..79d5cc68f
--- /dev/null
+++ b/unotools/source/i18n/textsearch.cxx
@@ -0,0 +1,398 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sal/config.h>
+
+#include <cstdlib>
+#include <string_view>
+
+#include <i18nlangtag/languagetag.hxx>
+#include <i18nutil/searchopt.hxx>
+#include <i18nutil/transliteration.hxx>
+#include <com/sun/star/util/TextSearch2.hpp>
+#include <com/sun/star/util/SearchAlgorithms2.hpp>
+#include <com/sun/star/util/SearchFlags.hpp>
+#include <unotools/charclass.hxx>
+#include <comphelper/processfactory.hxx>
+#include <unotools/textsearch.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <tools/diagnose_ex.h>
+#include <mutex>
+
+using namespace ::com::sun::star::util;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+
+namespace utl
+{
+
+SearchParam::SearchParam( const OUString &rText,
+ SearchType eType,
+ bool bCaseSensitive,
+ sal_uInt32 cWildEscChar,
+ bool bWildMatchSel )
+{
+ sSrchStr = rText;
+ m_eSrchType = eType;
+
+ m_cWildEscChar = cWildEscChar;
+
+ m_bCaseSense = bCaseSensitive;
+ m_bWildMatchSel = bWildMatchSel;
+}
+
+SearchParam::SearchParam( const SearchParam& rParam )
+{
+ sSrchStr = rParam.sSrchStr;
+ m_eSrchType = rParam.m_eSrchType;
+
+ m_cWildEscChar = rParam.m_cWildEscChar;
+
+ m_bCaseSense = rParam.m_bCaseSense;
+ m_bWildMatchSel = rParam.m_bWildMatchSel;
+}
+
+SearchParam::~SearchParam() {}
+
+static bool lcl_Equals( const i18nutil::SearchOptions2& rSO1, const i18nutil::SearchOptions2& rSO2 )
+{
+ return
+ rSO1.AlgorithmType2 == rSO2.AlgorithmType2 &&
+ rSO1.WildcardEscapeCharacter == rSO2.WildcardEscapeCharacter &&
+ rSO1.algorithmType == rSO2.algorithmType &&
+ rSO1.searchFlag == rSO2.searchFlag &&
+ rSO1.searchString == rSO2.searchString &&
+ rSO1.replaceString == rSO2.replaceString &&
+ rSO1.changedChars == rSO2.changedChars &&
+ rSO1.deletedChars == rSO2.deletedChars &&
+ rSO1.insertedChars == rSO2.insertedChars &&
+ rSO1.Locale.Language == rSO2.Locale.Language &&
+ rSO1.Locale.Country == rSO2.Locale.Country &&
+ rSO1.Locale.Variant == rSO2.Locale.Variant &&
+ rSO1.transliterateFlags == rSO2.transliterateFlags;
+}
+
+namespace
+{
+ struct CachedTextSearch
+ {
+ std::mutex mutex;
+ i18nutil::SearchOptions2 Options;
+ css::uno::Reference< css::util::XTextSearch2 > xTextSearch;
+ };
+}
+
+Reference<XTextSearch2> TextSearch::getXTextSearch( const i18nutil::SearchOptions2& rPara )
+{
+ static CachedTextSearch theCachedTextSearch;
+
+ std::scoped_lock aGuard(theCachedTextSearch.mutex);
+
+ if ( lcl_Equals(theCachedTextSearch.Options, rPara) )
+ return theCachedTextSearch.xTextSearch;
+
+ Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
+ theCachedTextSearch.xTextSearch.set( ::TextSearch2::create(xContext) );
+ theCachedTextSearch.xTextSearch->setOptions2( rPara.toUnoSearchOptions2() );
+ theCachedTextSearch.Options = rPara;
+
+ return theCachedTextSearch.xTextSearch;
+}
+
+TextSearch::TextSearch(const SearchParam & rParam, LanguageType eLang )
+{
+ if( LANGUAGE_NONE == eLang )
+ eLang = LANGUAGE_SYSTEM;
+ css::lang::Locale aLocale( LanguageTag::convertToLocale( eLang ) );
+
+ Init( rParam, aLocale);
+}
+
+TextSearch::TextSearch(const SearchParam & rParam, const CharClass& rCClass )
+{
+ Init( rParam, rCClass.getLanguageTag().getLocale() );
+}
+
+TextSearch::TextSearch( const i18nutil::SearchOptions2& rPara )
+{
+ xTextSearch = getXTextSearch( rPara );
+}
+
+i18nutil::SearchOptions2 TextSearch::UpgradeToSearchOptions2( const i18nutil::SearchOptions& rOptions )
+{
+ sal_Int16 nAlgorithmType2;
+ switch (rOptions.algorithmType)
+ {
+ case SearchAlgorithms_REGEXP:
+ nAlgorithmType2 = SearchAlgorithms2::REGEXP;
+ break;
+ case SearchAlgorithms_APPROXIMATE:
+ nAlgorithmType2 = SearchAlgorithms2::APPROXIMATE;
+ break;
+ case SearchAlgorithms_ABSOLUTE:
+ nAlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
+ break;
+ default:
+ for (;;) std::abort();
+ }
+ // It would be nice if an inherited struct had a ctor that takes an
+ // instance of the object the struct derived from...
+ i18nutil::SearchOptions2 aOptions2(
+ rOptions.algorithmType,
+ rOptions.searchFlag,
+ rOptions.searchString,
+ rOptions.replaceString,
+ rOptions.Locale,
+ rOptions.changedChars,
+ rOptions.deletedChars,
+ rOptions.insertedChars,
+ rOptions.transliterateFlags,
+ nAlgorithmType2,
+ 0 // no wildcard search, no escape character...
+ );
+ return aOptions2;
+}
+
+void TextSearch::Init( const SearchParam & rParam,
+ const css::lang::Locale& rLocale )
+{
+ // convert SearchParam to the UNO SearchOptions2
+ i18nutil::SearchOptions2 aSOpt;
+
+ switch( rParam.GetSrchType() )
+ {
+ case SearchParam::SearchType::Wildcard:
+ aSOpt.AlgorithmType2 = SearchAlgorithms2::WILDCARD;
+ aSOpt.algorithmType = SearchAlgorithms::SearchAlgorithms_MAKE_FIXED_SIZE; // no old enum for that
+ aSOpt.WildcardEscapeCharacter = rParam.GetWildEscChar();
+ if (rParam.IsWildMatchSel())
+ aSOpt.searchFlag |= SearchFlags::WILD_MATCH_SELECTION;
+ break;
+
+ case SearchParam::SearchType::Regexp:
+ aSOpt.AlgorithmType2 = SearchAlgorithms2::REGEXP;
+ aSOpt.algorithmType = SearchAlgorithms_REGEXP;
+ break;
+
+ case SearchParam::SearchType::Normal:
+ aSOpt.AlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
+ aSOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
+ break;
+
+ default:
+ for (;;) std::abort();
+ }
+ aSOpt.searchString = rParam.GetSrchStr();
+ aSOpt.replaceString = "";
+ aSOpt.Locale = rLocale;
+ aSOpt.transliterateFlags = TransliterationFlags::NONE;
+ if( !rParam.IsCaseSensitive() )
+ {
+ aSOpt.searchFlag |= SearchFlags::ALL_IGNORE_CASE;
+ aSOpt.transliterateFlags |= TransliterationFlags::IGNORE_CASE;
+ }
+
+ xTextSearch = getXTextSearch( aSOpt );
+}
+
+void TextSearch::SetLocale( const i18nutil::SearchOptions2& rOptions,
+ const css::lang::Locale& rLocale )
+{
+ i18nutil::SearchOptions2 aSOpt( rOptions );
+ aSOpt.Locale = rLocale;
+
+ xTextSearch = getXTextSearch( aSOpt );
+}
+
+TextSearch::~TextSearch()
+{
+}
+
+/*
+ * General search methods. These methods will call the respective
+ * methods, such as ordinary string searching or regular expression
+ * matching, using the method pointer.
+ */
+bool TextSearch::SearchForward( const OUString &rStr,
+ sal_Int32* pStart, sal_Int32* pEnd,
+ css::util::SearchResult* pRes)
+{
+ bool bRet = false;
+ try
+ {
+ if( xTextSearch.is() )
+ {
+ SearchResult aRet( xTextSearch->searchForward( rStr, *pStart, *pEnd ));
+ if( aRet.subRegExpressions > 0 )
+ {
+ bRet = true;
+ // the XTextsearch returns in startOffset the higher position
+ // and the endposition is always exclusive.
+ // The caller of this function will have in startPos the
+ // lower pos. and end
+ *pStart = aRet.startOffset[ 0 ];
+ *pEnd = aRet.endOffset[ 0 ];
+ if( pRes )
+ *pRes = aRet;
+ }
+ }
+ }
+ catch ( Exception& )
+ {
+ TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
+ }
+ return bRet;
+}
+
+bool TextSearch::searchForward( const OUString &rStr )
+{
+ sal_Int32 pStart = 0;
+ sal_Int32 pEnd = rStr.getLength();
+
+ bool bResult = SearchForward(rStr, &pStart, &pEnd);
+
+ return bResult;
+}
+
+bool TextSearch::SearchBackward( const OUString & rStr, sal_Int32* pStart,
+ sal_Int32* pEnd, SearchResult* pRes )
+{
+ bool bRet = false;
+ try
+ {
+ if( xTextSearch.is() )
+ {
+ SearchResult aRet( xTextSearch->searchBackward( rStr, *pStart, *pEnd ));
+ if( aRet.subRegExpressions )
+ {
+ bRet = true;
+ // the XTextsearch returns in startOffset the higher position
+ // and the endposition is always exclusive.
+ // The caller of this function will have in startPos the
+ // lower pos. and end
+ *pEnd = aRet.startOffset[ 0 ];
+ *pStart = aRet.endOffset[ 0 ];
+ if( pRes )
+ *pRes = aRet;
+ }
+ }
+ }
+ catch ( Exception& )
+ {
+ TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
+ }
+ return bRet;
+}
+
+void TextSearch::ReplaceBackReferences( OUString& rReplaceStr, std::u16string_view rStr, const SearchResult& rResult ) const
+{
+ if( rResult.subRegExpressions <= 0 )
+ return;
+
+ sal_Unicode sFndChar;
+ sal_Int32 i;
+ OUStringBuffer sBuff(rReplaceStr.getLength()*4);
+ for(i = 0; i < rReplaceStr.getLength(); i++)
+ {
+ if( rReplaceStr[i] == '&')
+ {
+ sal_Int32 nStart = rResult.startOffset[0];
+ sal_Int32 nLength = rResult.endOffset[0] - rResult.startOffset[0];
+ sBuff.append(rStr.substr(nStart, nLength));
+ }
+ else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '$')
+ {
+ sFndChar = rReplaceStr[ i + 1 ];
+ switch(sFndChar)
+ { // placeholder for a backward reference?
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ {
+ int j = sFndChar - '0'; // index
+ if(j < rResult.subRegExpressions)
+ {
+ sal_Int32 nSttReg = rResult.startOffset[j];
+ sal_Int32 nRegLen = rResult.endOffset[j];
+ if (nSttReg < 0 || nRegLen < 0) // A "not found" optional capture
+ {
+ nSttReg = nRegLen = 0; // Copy empty string
+ }
+ else if (nRegLen >= nSttReg)
+ {
+ nRegLen = nRegLen - nSttReg;
+ }
+ else
+ {
+ nRegLen = nSttReg - nRegLen;
+ nSttReg = rResult.endOffset[j];
+ }
+ // Copy reference from found string
+ sBuff.append(rStr.substr(nSttReg, nRegLen));
+ }
+ i += 1;
+ }
+ break;
+ default:
+ sBuff.append(rReplaceStr[i]);
+ sBuff.append(rReplaceStr[i+1]);
+ i += 1;
+ break;
+ }
+ }
+ else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '\\')
+ {
+ sFndChar = rReplaceStr[ i+1 ];
+ switch(sFndChar)
+ {
+ case '\\':
+ case '&':
+ case '$':
+ sBuff.append(sFndChar);
+ i+=1;
+ break;
+ case 't':
+ sBuff.append('\t');
+ i += 1;
+ break;
+ default:
+ sBuff.append(rReplaceStr[i]);
+ sBuff.append(rReplaceStr[i+1]);
+ i += 1;
+ break;
+ }
+ }
+ else
+ {
+ sBuff.append(rReplaceStr[i]);
+ }
+ }
+ rReplaceStr = sBuff.makeStringAndClear();
+}
+
+} // namespace utl
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */