1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
|
/*
* Copyright (C) 2005-2018 Team Kodi
* This file is part of Kodi - https://kodi.tv
*
* SPDX-License-Identifier: GPL-2.0-or-later
* See LICENSES/README.md for more information.
*/
#pragma once
//-----------------------------------------------------------------------
//
// File: StringUtils.h
//
// Purpose: ATL split string utility
// Author: Paul J. Weiss
//
// Modified to support J O'Leary's std::string class by kraqh3d
//
//------------------------------------------------------------------------
#include <stdarg.h>
#include <stdint.h>
#include <string>
#include <vector>
#include <sstream>
#include <locale>
// workaround for broken [[deprecated]] in coverity
#if defined(__COVERITY__)
#undef FMT_DEPRECATED
#define FMT_DEPRECATED
#endif
#include "utils/TimeFormat.h"
#include "utils/params_check_macros.h"
#include <fmt/format.h>
#if FMT_VERSION >= 80000
#include <fmt/xchar.h>
#endif
/*! \brief C-processor Token stringification
The following macros can be used to stringify definitions to
C style strings.
Example:
#define foo 4
DEF_TO_STR_NAME(foo) // outputs "foo"
DEF_TO_STR_VALUE(foo) // outputs "4"
*/
#define DEF_TO_STR_NAME(x) #x
#define DEF_TO_STR_VALUE(x) DEF_TO_STR_NAME(x)
template<typename T, std::enable_if_t<!std::is_enum<T>::value, int> = 0>
constexpr auto&& EnumToInt(T&& arg) noexcept
{
return arg;
}
template<typename T, std::enable_if_t<std::is_enum<T>::value, int> = 0>
constexpr auto EnumToInt(T&& arg) noexcept
{
return static_cast<int>(arg);
}
class StringUtils
{
public:
/*! \brief Get a formatted string similar to sprintf
\param fmt Format of the resulting string
\param ... variable number of value type arguments
\return Formatted string
*/
template<typename... Args>
static std::string Format(const std::string& fmt, Args&&... args)
{
// coverity[fun_call_w_exception : FALSE]
return ::fmt::format(fmt, EnumToInt(std::forward<Args>(args))...);
}
template<typename... Args>
static std::wstring Format(const std::wstring& fmt, Args&&... args)
{
// coverity[fun_call_w_exception : FALSE]
return ::fmt::format(fmt, EnumToInt(std::forward<Args>(args))...);
}
static std::string FormatV(PRINTF_FORMAT_STRING const char *fmt, va_list args);
static std::wstring FormatV(PRINTF_FORMAT_STRING const wchar_t *fmt, va_list args);
static std::string ToUpper(const std::string& str);
static std::wstring ToUpper(const std::wstring& str);
static void ToUpper(std::string &str);
static void ToUpper(std::wstring &str);
static std::string ToLower(const std::string& str);
static std::wstring ToLower(const std::wstring& str);
static void ToLower(std::string &str);
static void ToLower(std::wstring &str);
static void ToCapitalize(std::string &str);
static void ToCapitalize(std::wstring &str);
static bool EqualsNoCase(const std::string &str1, const std::string &str2);
static bool EqualsNoCase(const std::string &str1, const char *s2);
static bool EqualsNoCase(const char *s1, const char *s2);
static int CompareNoCase(const std::string& str1, const std::string& str2, size_t n = 0);
static int CompareNoCase(const char* s1, const char* s2, size_t n = 0);
static int ReturnDigits(const std::string &str);
static std::string Left(const std::string &str, size_t count);
static std::string Mid(const std::string &str, size_t first, size_t count = std::string::npos);
static std::string Right(const std::string &str, size_t count);
static std::string& Trim(std::string &str);
static std::string& Trim(std::string &str, const char* const chars);
static std::string& TrimLeft(std::string &str);
static std::string& TrimLeft(std::string &str, const char* const chars);
static std::string& TrimRight(std::string &str);
static std::string& TrimRight(std::string &str, const char* const chars);
static std::string& RemoveDuplicatedSpacesAndTabs(std::string& str);
static int Replace(std::string &str, char oldChar, char newChar);
static int Replace(std::string &str, const std::string &oldStr, const std::string &newStr);
static int Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr);
static bool StartsWith(const std::string &str1, const std::string &str2);
static bool StartsWith(const std::string &str1, const char *s2);
static bool StartsWith(const char *s1, const char *s2);
static bool StartsWithNoCase(const std::string &str1, const std::string &str2);
static bool StartsWithNoCase(const std::string &str1, const char *s2);
static bool StartsWithNoCase(const char *s1, const char *s2);
static bool EndsWith(const std::string &str1, const std::string &str2);
static bool EndsWith(const std::string &str1, const char *s2);
static bool EndsWithNoCase(const std::string &str1, const std::string &str2);
static bool EndsWithNoCase(const std::string &str1, const char *s2);
template<typename CONTAINER>
static std::string Join(const CONTAINER &strings, const std::string& delimiter)
{
std::string result;
for (const auto& str : strings)
result += str + delimiter;
if (!result.empty())
result.erase(result.size() - delimiter.size());
return result;
}
/*! \brief Splits the given input string using the given delimiter into separate strings.
If the given input string is empty the result will be an empty array (not
an array containing an empty string).
\param input Input string to be split
\param delimiter Delimiter to be used to split the input string
\param iMaxStrings (optional) Maximum number of splitted strings
*/
static std::vector<std::string> Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings = 0);
static std::vector<std::string> Split(const std::string& input, const char delimiter, size_t iMaxStrings = 0);
static std::vector<std::string> Split(const std::string& input, const std::vector<std::string> &delimiters);
/*! \brief Splits the given input string using the given delimiter into separate strings.
If the given input string is empty nothing will be put into the target iterator.
\param d_first the beginning of the destination range
\param input Input string to be split
\param delimiter Delimiter to be used to split the input string
\param iMaxStrings (optional) Maximum number of splitted strings
\return output iterator to the element in the destination range, one past the last element
* that was put there
*/
template<typename OutputIt>
static OutputIt SplitTo(OutputIt d_first, const std::string& input, const std::string& delimiter, unsigned int iMaxStrings = 0)
{
OutputIt dest = d_first;
if (input.empty())
return dest;
if (delimiter.empty())
{
*d_first++ = input;
return dest;
}
const size_t delimLen = delimiter.length();
size_t nextDelim;
size_t textPos = 0;
do
{
if (--iMaxStrings == 0)
{
*dest++ = input.substr(textPos);
break;
}
nextDelim = input.find(delimiter, textPos);
*dest++ = input.substr(textPos, nextDelim - textPos);
textPos = nextDelim + delimLen;
} while (nextDelim != std::string::npos);
return dest;
}
template<typename OutputIt>
static OutputIt SplitTo(OutputIt d_first, const std::string& input, const char delimiter, size_t iMaxStrings = 0)
{
return SplitTo(d_first, input, std::string(1, delimiter), iMaxStrings);
}
template<typename OutputIt>
static OutputIt SplitTo(OutputIt d_first, const std::string& input, const std::vector<std::string> &delimiters)
{
OutputIt dest = d_first;
if (input.empty())
return dest;
if (delimiters.empty())
{
*dest++ = input;
return dest;
}
std::string str = input;
for (size_t di = 1; di < delimiters.size(); di++)
StringUtils::Replace(str, delimiters[di], delimiters[0]);
return SplitTo(dest, str, delimiters[0]);
}
/*! \brief Splits the given input strings using the given delimiters into further separate strings.
If the given input string vector is empty the result will be an empty array (not
an array containing an empty string).
Delimiter strings are applied in order, so once the (optional) maximum number of
items is produced no other delimiters are applied. This produces different results
to applying all delimiters at once e.g. "a/b#c/d" becomes "a", "b#c", "d" rather
than "a", "b", "c/d"
\param input Input vector of strings each to be split
\param delimiters Delimiter strings to be used to split the input strings
\param iMaxStrings (optional) Maximum number of resulting split strings
*/
static std::vector<std::string> SplitMulti(const std::vector<std::string>& input,
const std::vector<std::string>& delimiters,
size_t iMaxStrings = 0);
static int FindNumber(const std::string& strInput, const std::string &strFind);
static int64_t AlphaNumericCompare(const wchar_t *left, const wchar_t *right);
static int AlphaNumericCollation(int nKey1, const void* pKey1, int nKey2, const void* pKey2);
static long TimeStringToSeconds(const std::string &timeString);
static void RemoveCRLF(std::string& strLine);
/*! \brief utf8 version of strlen - skips any non-starting bytes in the count, thus returning the number of utf8 characters
\param s c-string to find the length of.
\return the number of utf8 characters in the string.
*/
static size_t utf8_strlen(const char *s);
/*! \brief convert a time in seconds to a string based on the given time format
\param seconds time in seconds
\param format the format we want the time in.
\return the formatted time
\sa TIME_FORMAT
*/
static std::string SecondsToTimeString(long seconds, TIME_FORMAT format = TIME_FORMAT_GUESS);
/*! \brief check whether a string is a natural number.
Matches [ \t]*[0-9]+[ \t]*
\param str the string to check
\return true if the string is a natural number, false otherwise.
*/
static bool IsNaturalNumber(const std::string& str);
/*! \brief check whether a string is an integer.
Matches [ \t]*[\-]*[0-9]+[ \t]*
\param str the string to check
\return true if the string is an integer, false otherwise.
*/
static bool IsInteger(const std::string& str);
/* The next several isasciiXX and asciiXXvalue functions are locale independent (US-ASCII only),
* as opposed to standard ::isXX (::isalpha, ::isdigit...) which are locale dependent.
* Next functions get parameter as char and don't need double cast ((int)(unsigned char) is required for standard functions). */
inline static bool isasciidigit(char chr) // locale independent
{
return chr >= '0' && chr <= '9';
}
inline static bool isasciixdigit(char chr) // locale independent
{
return (chr >= '0' && chr <= '9') || (chr >= 'a' && chr <= 'f') || (chr >= 'A' && chr <= 'F');
}
static int asciidigitvalue(char chr); // locale independent
static int asciixdigitvalue(char chr); // locale independent
inline static bool isasciiuppercaseletter(char chr) // locale independent
{
return (chr >= 'A' && chr <= 'Z');
}
inline static bool isasciilowercaseletter(char chr) // locale independent
{
return (chr >= 'a' && chr <= 'z');
}
inline static bool isasciialphanum(char chr) // locale independent
{
return isasciiuppercaseletter(chr) || isasciilowercaseletter(chr) || isasciidigit(chr);
}
static std::string SizeToString(int64_t size);
static const std::string Empty;
static size_t FindWords(const char *str, const char *wordLowerCase);
static int FindEndBracket(const std::string &str, char opener, char closer, int startPos = 0);
static int DateStringToYYYYMMDD(const std::string &dateString);
static std::string ISODateToLocalizedDate (const std::string& strIsoDate);
static void WordToDigits(std::string &word);
static std::string CreateUUID();
static bool ValidateUUID(const std::string &uuid); // NB only validates syntax
static double CompareFuzzy(const std::string &left, const std::string &right);
static int FindBestMatch(const std::string &str, const std::vector<std::string> &strings, double &matchscore);
static bool ContainsKeyword(const std::string &str, const std::vector<std::string> &keywords);
/*! \brief Convert the string of binary chars to the actual string.
Convert the string representation of binary chars to the actual string.
For example \1\2\3 is converted to a string with binary char \1, \2 and \3
\param param String to convert
\return Converted string
*/
static std::string BinaryStringToString(const std::string& in);
/**
* Convert each character in the string to its hexadecimal
* representation and return the concatenated result
*
* example: "abc\n" -> "6162630a"
*/
static std::string ToHexadecimal(const std::string& in);
/*! \brief Format the string with locale separators.
Format the string with locale separators.
For example 10000.57 in en-us is '10,000.57' but in italian is '10.000,57'
\param param String to format
\return Formatted string
*/
template<typename T>
static std::string FormatNumber(T num)
{
std::stringstream ss;
// ifdef is needed because when you set _ITERATOR_DEBUG_LEVEL=0 and you use custom numpunct you will get runtime error in debug mode
// for more info https://connect.microsoft.com/VisualStudio/feedback/details/2655363
#if !(defined(_DEBUG) && defined(TARGET_WINDOWS))
ss.imbue(GetOriginalLocale());
#endif
ss.precision(1);
ss << std::fixed << num;
return ss.str();
}
/*! \brief Escapes the given string to be able to be used as a parameter.
Escapes backslashes and double-quotes with an additional backslash and
adds double-quotes around the whole string.
\param param String to escape/paramify
\return Escaped/Paramified string
*/
static std::string Paramify(const std::string ¶m);
/*! \brief Unescapes the given string.
Unescapes backslashes and double-quotes and removes double-quotes around the whole string.
\param param String to unescape/deparamify
\return Unescaped/Deparamified string
*/
static std::string DeParamify(const std::string& param);
/*! \brief Split a string by the specified delimiters.
Splits a string using one or more delimiting characters, ignoring empty tokens.
Differs from Split() in two ways:
1. The delimiters are treated as individual characters, rather than a single delimiting string.
2. Empty tokens are ignored.
\return a vector of tokens
*/
static std::vector<std::string> Tokenize(const std::string& input, const std::string& delimiters);
static void Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters);
static std::vector<std::string> Tokenize(const std::string& input, const char delimiter);
static void Tokenize(const std::string& input, std::vector<std::string>& tokens, const char delimiter);
/*!
* \brief Converts a string to a unsigned int number.
* \param str The string to convert
* \param fallback [OPT] The number to return when the conversion fails
* \return The converted number, otherwise fallback if conversion fails
*/
static uint32_t ToUint32(std::string_view str, uint32_t fallback = 0) noexcept;
/*!
* \brief Converts a string to a unsigned long long number.
* \param str The string to convert
* \param fallback [OPT] The number to return when the conversion fails
* \return The converted number, otherwise fallback if conversion fails
*/
static uint64_t ToUint64(std::string_view str, uint64_t fallback = 0) noexcept;
/*!
* \brief Converts a string to a float number.
* \param str The string to convert
* \param fallback [OPT] The number to return when the conversion fails
* \return The converted number, otherwise fallback if conversion fails
*/
static float ToFloat(std::string_view str, float fallback = 0.0f) noexcept;
/*!
* Returns bytes in a human readable format using the smallest unit that will fit `bytes` in at
* most three digits. The number of decimals are adjusted with significance such that 'small'
* numbers will have more decimals than larger ones.
*
* For example: 1024 bytes will be formatted as "1.00kB", 10240 bytes as "10.0kB" and
* 102400 bytes as "100kB". See TestStringUtils for more examples.
*/
static std::string FormatFileSize(uint64_t bytes);
/*! \brief Converts a cstring pointer (const char*) to a std::string.
In case nullptr is passed the result is an empty string.
\param cstr the const pointer to char
\return the resulting std::string or ""
*/
static std::string CreateFromCString(const char* cstr);
private:
/*!
* Wrapper for CLangInfo::GetOriginalLocale() which allows us to
* avoid including LangInfo.h from this header.
*/
static const std::locale& GetOriginalLocale() noexcept;
};
struct sortstringbyname
{
bool operator()(const std::string& strItem1, const std::string& strItem2) const
{
return StringUtils::CompareNoCase(strItem1, strItem2) < 0;
}
};
|