summaryrefslogtreecommitdiffstats
path: root/include/rtl/stringutils.hxx
blob: 622542c7dab851f0142dff983ffe0fdd1c18f396 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

/*
 * This file is part of LibreOffice published API.
 */

#ifndef INCLUDED_RTL_STRINGUTILS_HXX
#define INCLUDED_RTL_STRINGUTILS_HXX

#include "sal/config.h"

#include <cassert>
#include <cstddef>

#if defined LIBO_INTERNAL_ONLY
#include <type_traits>
#endif

#include "sal/types.h"

// The unittest uses slightly different code to help check that the proper
// calls are made. The class is put into a different namespace to make
// sure the compiler generates a different (if generating also non-inline)
// copy of the function and does not merge them together. The class
// is "brought" into the proper rtl namespace by a typedef below.
#ifdef RTL_STRING_UNITTEST
#define rtl rtlunittest
#endif

namespace rtl
{

#ifdef RTL_STRING_UNITTEST
#undef rtl
#endif

#if defined LIBO_INTERNAL_ONLY
/// @cond INTERNAL

// A simple wrapper around a single char.  Can be useful in string concatenation contexts, like in
//
//  OString s = ...;
//  char c = ...;
//  s += OStringChar(c);
//
struct SAL_WARN_UNUSED OStringChar {
    constexpr OStringChar(char theC): c(theC) {}
    template<typename T> OStringChar(
        T, std::enable_if_t<std::is_arithmetic_v<T> || std::is_enum_v<T>, int> = 0) = delete;
    constexpr operator std::string_view() const { return {&c, 1}; }
    char const c;
};

/** A simple wrapper around a single sal_Unicode character.

    Can be useful to pass a sal_Unicode constant into an OUString-related
    function that is optimized for UTF-16 string literal arguments.  That is,
    instead of

      sal_Unicode const WILDCARD = '%';
      ...
      if (s[i] == WILDCARD) ...
      ...
      if (s.endsWith(OUString(WILDCARD))) ...

    use

      sal_Unicode const WILDCARD = '%';
      ...
      if (s[i] == WILDCARD) ...
      ...
      if (s.endsWith(OUStringChar(WILDCARD))) ...

    to avoid creating a temporary OUString instance, and instead pick the
    endsWith overload actually designed to take an argument of type
    sal_Unicode const[N].

    (Because of the above use case,
    instances of OUStringChar need to be const, as those literal-optimized
    functions take the literal argument by non-const lvalue reference, for
    technical reasons.

    For actual arrays, it is important to distinguish string literals from other char or sal_Unicode
    arrays, which may contain junk after the first NUL character or may be non-ASCII in the case of
    char arrays.  This is not so much a concern for single char and sal_Unicode values, where NUL is
    assumed to always be meant as an actual character.)

    Can also be useful in string concatenation contexts, like in

      sal_Unicode const * s = ...;
      sal_Unicode c = ...;
      OUString t = s + OUStringChar(c);

    @since LibreOffice 5.0
*/
struct SAL_WARN_UNUSED OUStringChar_ {
    constexpr OUStringChar_(sal_Unicode theC): c(theC) {}
    constexpr OUStringChar_(char theC): c(theC) { assert(c <= 0x7F); }
    template<typename T> OUStringChar_(
        T, std::enable_if_t<std::is_arithmetic_v<T> || std::is_enum_v<T>, int> = 0) = delete;
    constexpr operator std::u16string_view() const { return {&c, 1}; }
    sal_Unicode const c;
};
using OUStringChar = OUStringChar_ const;

/// @endcond
#endif

namespace libreoffice_internal
{
/*
These templates use SFINAE (Substitution failure is not an error) to help distinguish the various
plain C string types: char*, const char*, char[N], const char[N], char[] and const char[].
There are 2 cases:
1) Only string literal (i.e. const char[N]) is wanted, not any of the others.
    In this case it is necessary to distinguish between const char[N] and char[N], as the latter
    would be automatically converted to the const variant, which is not wanted (not a string literal
    with known size of the content). In this case ConstCharArrayDetector is used to ensure the function
    is called only with const char[N] arguments. There's no other plain C string type overload.
    (Note that OUStringChar is also covered by ConstCharArrayDetector's TypeUtf16 check, but
    provides a pointer to a string that is not NUL-terminated, unlike the char16_t const[N] arrays
    normally covered by that check, and which are assumed to represent NUL-terminated string
    literals.)
2) All plain C string types are wanted, and const char[N] needs to be handled differently.
    In this case const char[N] would match const char* argument type (not exactly sure why, but it's
    consistent in all of gcc, clang and msvc). Using a template with a reference to const of the type
    avoids this problem, and CharPtrDetector ensures that the function is called only with char pointer
    arguments. The const in the argument is necessary to handle the case when something is explicitly
    cast to const char*. Additionally (non-const) char[N] needs to be handled, but with the reference
    being const, it would also match const char[N], so another overload with a reference to non-const
    and NonConstCharArrayDetector are used to ensure the function is called only with (non-const) char[N].
Additionally, char[] and const char[] (i.e. size unknown) are rather tricky. Their usage with 'T&' would
mean it would be 'char(&)[]', which seems to be invalid. But gcc and clang somehow manage when it is
a template. while msvc complains about no conversion from char[] to char[1]. And the reference cannot
be avoided, because 'const char[]' as argument type would match also 'const char[N]'
So char[] and const char[] should always be used with their contents specified (which automatically
turns them into char[N] or const char[N]), or char* and const char* should be used.
*/
struct Dummy {};
template< typename T1, typename T2 = void >
struct CharPtrDetector
{
    static const bool ok = false;
};
template< typename T >
struct CharPtrDetector< const char*, T >
{
    typedef T Type;
    static const bool ok = true;
};
template< typename T >
struct CharPtrDetector< char*, T >
{
    typedef T Type;
    static const bool ok = true;
};
#if defined LIBO_INTERNAL_ONLY
template<typename T> struct CharPtrDetector<sal_Unicode *, T> { using TypeUtf16 = T; };
template<typename T> struct CharPtrDetector<sal_Unicode const *, T> { using TypeUtf16 = T; };
template<typename T> struct CharPtrDetector<sal_Unicode[], T> { using TypeUtf16 = T; };
template<typename T> struct CharPtrDetector<sal_Unicode const[], T> { using TypeUtf16 = T; };
#endif

template< typename T1, typename T2 >
struct NonConstCharArrayDetector
{
};
template< typename T, int N >
struct NonConstCharArrayDetector< char[ N ], T >
{
    typedef T Type;
};
#ifdef RTL_STRING_UNITTEST
// never use, until all compilers handle this
template< typename T >
struct NonConstCharArrayDetector< char[], T >
{
    typedef T Type;
};
template< typename T >
struct NonConstCharArrayDetector< const char[], T >
{
    typedef T Type;
};
#endif
#if defined LIBO_INTERNAL_ONLY
template<typename T, std::size_t N> struct NonConstCharArrayDetector<sal_Unicode[N], T> {
    using TypeUtf16 = T;
};
#endif

template< typename T1, typename T2 = void >
struct ConstCharArrayDetector
{
    static const bool ok = false;
};
template< std::size_t N, typename T >
struct ConstCharArrayDetector< const char[ N ], T >
{
    typedef T Type;
    static const std::size_t length = N - 1;
    static const bool ok = true;
#if defined LIBO_INTERNAL_ONLY
    constexpr
#endif
    static bool isValid(char const (& literal)[N]) {
        for (std::size_t i = 0; i != N - 1; ++i) {
            if (literal[i] == '\0') {
                return false;
            }
        }
        return literal[N - 1] == '\0';
    }
#if defined LIBO_INTERNAL_ONLY
    constexpr
#endif
    static char const * toPointer(char const (& literal)[N]) { return literal; }
};

#if defined(__COVERITY__)
//to silence over zealous warnings that the loop is logically dead
//for the single char case
template< typename T >
struct ConstCharArrayDetector< const char[ 1 ], T >
{
    typedef T Type;
    static const std::size_t length = 0;
    static const bool ok = true;
#if defined LIBO_INTERNAL_ONLY
    constexpr
#endif
    static bool isValid(char const (& literal)[1]) {
        return literal[0] == '\0';
    }
#if defined LIBO_INTERNAL_ONLY
    constexpr
#endif
    static char const * toPointer(char const (& literal)[1]) { return literal; }
};
#endif

#if defined LIBO_INTERNAL_ONLY \
    && !(defined _MSC_VER && _MSC_VER >= 1930 && _MSC_VER <= 1939 && defined _MANAGED)
template<std::size_t N, typename T>
struct ConstCharArrayDetector<char8_t const [N], T> {
    using Type = T;
    static constexpr bool const ok = true;
    static constexpr std::size_t const length = N - 1;
    static constexpr bool isValid(char8_t const (& literal)[N]) {
        for (std::size_t i = 0; i != N - 1; ++i) {
            if (literal[i] == u8'\0') {
                return false;
            }
        }
        return literal[N - 1] == u8'\0';
    }
    static constexpr char const * toPointer(char8_t const (& literal)[N])
    { return reinterpret_cast<char const *>(literal); }
};
#endif

#if defined LIBO_INTERNAL_ONLY
template<std::size_t N, typename T>
struct ConstCharArrayDetector<sal_Unicode const [N], T> {
    using TypeUtf16 = T;
    static constexpr bool const ok = true;
    static constexpr std::size_t const length = N - 1;
    static constexpr bool isValid(sal_Unicode const (& literal)[N]) {
        for (std::size_t i = 0; i != N - 1; ++i) {
            if (literal[i] == '\0') {
                return false;
            }
        }
        return literal[N - 1] == '\0';
    }
    static constexpr sal_Unicode const * toPointer(
        sal_Unicode const (& literal)[N])
    { return literal; }
};

#if defined(__COVERITY__)
//to silence over zealous warnings that the loop is logically dead
//for the single char case
template<typename T>
struct ConstCharArrayDetector<sal_Unicode const [1], T> {
    using TypeUtf16 = T;
    static constexpr bool const ok = true;
    static constexpr std::size_t const length = 0;
    static constexpr bool isValid(sal_Unicode const (& literal)[1]) {
        return literal[0] == '\0';
    }
    static constexpr sal_Unicode const * toPointer(
        sal_Unicode const (& literal)[1])
    { return literal; }
};
#endif

template<typename T> struct ConstCharArrayDetector<
    OUStringChar,
    T>
{
    using TypeUtf16 = T;
    static constexpr bool const ok = true;
    static constexpr std::size_t const length = 1;
    static constexpr bool isValid(OUStringChar) { return true; }
    static constexpr sal_Unicode const * toPointer(
        OUStringChar_ const & literal)
    { return &literal.c; }
};
#endif

#if defined LIBO_INTERNAL_ONLY && defined RTL_STRING_UNITTEST

// this one is used to rule out only const char[N]
template< typename T >
struct ExceptConstCharArrayDetector
{
    typedef Dummy Type;
};
template< int N >
struct ExceptConstCharArrayDetector< const char[ N ] >
{
};
template<std::size_t N>
struct ExceptConstCharArrayDetector<sal_Unicode const[N]> {};
template<> struct ExceptConstCharArrayDetector<
    OUStringChar
    >
{};

// this one is used to rule out only const char[N]
// (const will be brought in by 'const T&' in the function call)
// msvc needs const char[N] here (not sure whether gcc or msvc
// are right, it doesn't matter).
template< typename T >
struct ExceptCharArrayDetector
{
    typedef Dummy Type;
};
template< int N >
struct ExceptCharArrayDetector< char[ N ] >
{
};
template< int N >
struct ExceptCharArrayDetector< const char[ N ] >
{
};
template<std::size_t N> struct ExceptCharArrayDetector<sal_Unicode[N]> {};
template<std::size_t N> struct ExceptCharArrayDetector<sal_Unicode const[N]> {};
template<> struct ExceptCharArrayDetector<OUStringChar_> {};

#endif

template< typename T1, typename T2 = void >
struct SalUnicodePtrDetector
{
    static const bool ok = false;
};
template< typename T >
struct SalUnicodePtrDetector< const sal_Unicode*, T >
{
    typedef T Type;
    static const bool ok = true;
};
template< typename T >
struct SalUnicodePtrDetector< sal_Unicode*, T >
{
    typedef T Type;
    static const bool ok = true;
};

// SFINAE helper class
template< typename T, bool >
struct Enable
    {
    };

template< typename T >
struct Enable< T, true >
    {
    typedef T Type;
    };


} /* Namespace */

} /* Namespace */

#endif // INCLUDED_RTL_STRINGUTILS_HXX

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */