summaryrefslogtreecommitdiffstats
path: root/include/unotools/textsearch.hxx
blob: 04a25a1a3c46d1f8d30b633a706a7029a12b2eeb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#ifndef INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
#define INCLUDED_UNOTOOLS_TEXTSEARCH_HXX

#include <unotools/unotoolsdllapi.h>
#include <i18nlangtag/lang.h>
#include <rtl/ustring.hxx>
#include <com/sun/star/uno/Reference.h>

#include <ostream>

class CharClass;

namespace com::sun::star::lang { struct Locale; }
namespace com::sun::star::util { class XTextSearch2; }

namespace com {
    namespace sun {
        namespace star {
            namespace util {
                struct SearchResult;
            }
        }
    }
}
namespace i18nutil {
    struct SearchOptions;
    struct SearchOptions2;
}
enum class TransliterationFlags;

namespace utl
{

// Utility class for searching
class UNOTOOLS_DLLPUBLIC SearchParam
{
public:
    enum class SearchType { Normal, Regexp, Wildcard, Unknown = -1 };

    /** Convert configuration and document boolean settings to SearchType.
        If bWildcard is true it takes precedence over rbRegExp.
        @param  rbRegExp
                If true and bWildcard is also true, rbRegExp is set to false to
                adapt the caller's settings.
     */
    static SearchType ConvertToSearchType( bool bWildcard, bool & rbRegExp )
    {
        if (bWildcard)
        {
            if (rbRegExp)
                rbRegExp = false;
            return SearchType::Wildcard;
        }
        return rbRegExp ? SearchType::Regexp : SearchType::Normal;
    }

    /** Convert SearchType to configuration and document boolean settings.
     */
    static void ConvertToBool( const SearchType eSearchType, bool& rbWildcard, bool& rbRegExp )
    {
        switch (eSearchType)
        {
            case SearchType::Wildcard:
                rbWildcard = true;
                rbRegExp = false;
                break;
            case SearchType::Regexp:
                rbWildcard = false;
                rbRegExp = true;
                break;
            default:
                rbWildcard = false;
                rbRegExp = false;
                break;
        }
    }

private:
    OUString sSrchStr;            // the search string

    SearchType m_eSrchType;       // search normal/regular/LevDist

    sal_uInt32 m_cWildEscChar;      // wildcard escape character

    bool m_bCaseSense  : 1;
    bool m_bWildMatchSel : 1;       // wildcard pattern must match entire selection

public:
    SearchParam( const OUString &rText,
                    SearchType eSrchType,
                    bool bCaseSensitive = true,
                    sal_uInt32 cWildEscChar = '\\',
                    bool bWildMatchSel = false );

    SearchParam( const SearchParam& );

    ~SearchParam();

    const OUString& GetSrchStr() const          { return sSrchStr; }
    SearchType      GetSrchType() const         { return m_eSrchType; }

    bool            IsCaseSensitive() const     { return m_bCaseSense; }
    bool            IsWildMatchSel() const      { return m_bWildMatchSel; }

    // signed return for API use
    sal_Int32       GetWildEscChar() const      { return static_cast<sal_Int32>(m_cWildEscChar); }
};

// For use in SAL_DEBUG etc. Output format not guaranteed to be stable.
template<typename charT, typename traits>
inline std::basic_ostream<charT, traits> & operator <<(std::basic_ostream<charT, traits> & stream, const SearchParam::SearchType& eType)
{
    switch (eType)
    {
    case SearchParam::SearchType::Normal:
        stream << "N";
        break;
    case SearchParam::SearchType::Regexp:
        stream << "RE";
        break;
    case SearchParam::SearchType::Wildcard:
        stream << "WC";
        break;
    case SearchParam::SearchType::Unknown:
        stream << "UNK";
        break;
    default:
        stream << static_cast<int>(eType) << '?';
        break;
    }

    return stream;
}

//  Utility class for searching a substring in a string.
//  The following metrics are supported
//      - ordinary text (Bayer/Moore)
//      - regular expressions
//      - weighted Levenshtein distance
//      - wildcards '*' and '?'

//  This class allows forward and backward searching!

class UNOTOOLS_DLLPUBLIC TextSearch
{
    static css::uno::Reference< css::util::XTextSearch2 >
        getXTextSearch( const i18nutil::SearchOptions2& rPara );

    css::uno::Reference < css::util::XTextSearch2 >
            xTextSearch;

    void Init( const SearchParam & rParam,
               const css::lang::Locale& rLocale );

public:
    // rText is the string being searched for
    // this first two CTORs are deprecated!
    TextSearch( const SearchParam & rPara, LanguageType nLanguage );
    TextSearch( const SearchParam & rPara, const CharClass& rCClass );

    TextSearch( const i18nutil::SearchOptions2& rPara );
    ~TextSearch();

    /* search in the (selected) text the search string:
        rScrTxt - the text, in which we search
        pStart  - start position for the search
        pEnd    - end position for the search

        RETURN values   ==  true: something is found
                        - pStart start pos of the found text,
                        - pStart end pos of the found text,
                        - pSrchResult - the search result with all found
                             positions. Is only filled with more positions
                             if the regular expression handles groups.

                        == false: nothing found, pStart, pEnd unchanged.

        Definitions: start pos always inclusive, end pos always exclusive!
                     The position must always in the right direction!
                    search forward: start <= end
                    search backward: end <= start
    */
    bool SearchForward( const OUString &rStr,
                        sal_Int32* pStart, sal_Int32* pEnd,
                        css::util::SearchResult* pRes = nullptr );
    /**
     * @brief searchForward Search forward beginning from the start to the end
     *        of the given text
     * @param rStr The text in which we search
     * @return True if the search term is found in the text
     */
    bool searchForward( const OUString &rStr );
    bool SearchBackward( const OUString &rStr,
                        sal_Int32* pStart, sal_Int32* pEnd,
                        css::util::SearchResult* pRes = nullptr );

    void SetLocale( const i18nutil::SearchOptions2& rOpt,
                    const css::lang::Locale& rLocale );

    /* replace back references in the replace string by the sub expressions from the search result */
    void ReplaceBackReferences( OUString& rReplaceStr, const OUString &rStr, const css::util::SearchResult& rResult ) const;

    /** Upgrade SearchOptions to SearchOptions2 for places that don't handle
        SearchOptions2 yet. Better fix your module if you want to support
        wildcard search.
     */
    static i18nutil::SearchOptions2 UpgradeToSearchOptions2( const i18nutil::SearchOptions& rOptions );

};

}   // namespace utl

#endif

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */