summaryrefslogtreecommitdiffstats
path: root/xpcom/string/nsTStringRepr.h
blob: 7e7fa5338404001b0e46322c41c57e6f5da81d98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsTStringRepr_h
#define nsTStringRepr_h

#include <limits>
#include <string_view>
#include <type_traits>  // std::enable_if

#include "mozilla/Char16.h"
#include "mozilla/CheckedInt.h"
#include "mozilla/fallible.h"
#include "nsStringBuffer.h"
#include "nsStringFlags.h"
#include "nsStringFwd.h"
#include "nsStringIterator.h"
#include "nsCharTraits.h"

template <typename T>
class nsTSubstringTuple;

namespace mozilla {

// This is mainly intended to be used in the context of nsTStrings where
// we want to enable a specific function only for a given character class. In
// order for this technique to work the member function needs to be templated
// on something other than `T`. We keep this in the `mozilla` namespace rather
// than `nsTStringRepr` as it's intentionally not dependent on `T`.
//
// The 'T' at the end of `Char[16]OnlyT` is refering to the `::type` portion
// which will only be defined if the character class is correct. This is similar
// to `std::enable_if_t` which is available in C++14, but not C++11.
//
// `CharType` is generally going to be a shadowed type of `T`.
//
// Example usage of a function that will only be defined if `T` == `char`:
//
// template <typename T>
// class nsTSubstring : public nsTStringRepr<T> {
//   template <typename Q = T, typename EnableForChar = typename CharOnlyT<Q>>
//   int Foo() { return 42; }
// };
//
// Please note that we had to use a separate type `Q` for this to work. You
// will get a semi-decent compiler error if you use `T` directly.

template <typename CharType>
using CharOnlyT =
    typename std::enable_if<std::is_same<char, CharType>::value>::type;

template <typename CharType>
using Char16OnlyT =
    typename std::enable_if<std::is_same<char16_t, CharType>::value>::type;

namespace detail {

// nsTStringLengthStorage is a helper class which holds the string's length and
// provides getters and setters for converting to and from `size_t`. This is
// done to allow the length to be stored in a `uint32_t` using assertions.
template <typename T>
class nsTStringLengthStorage {
 public:
  // The maximum byte capacity for a `nsTString` must fit within an `int32_t`,
  // with enough room for a trailing null, as consumers often cast `Length()`
  // and `Capacity()` to smaller types like `int32_t`.
  static constexpr size_t kMax =
      size_t{std::numeric_limits<int32_t>::max()} / sizeof(T) - 1;
  static_assert(
      (kMax + 1) * sizeof(T) <= std::numeric_limits<int32_t>::max(),
      "nsTString's maximum length, including the trailing null, must fit "
      "within `int32_t`, as callers will cast to `int32_t` occasionally");
  static_assert(((CheckedInt<uint32_t>{kMax} + 1) * sizeof(T) +
                 sizeof(nsStringBuffer))
                    .isValid(),
                "Math required to allocate a nsStringBuffer for a "
                "maximum-capacity string must not overflow uint32_t");

  // Implicit conversion and assignment from `size_t` which assert that the
  // value is in-range.
  MOZ_IMPLICIT constexpr nsTStringLengthStorage(size_t aLength)
      : mLength(static_cast<uint32_t>(aLength)) {
    MOZ_RELEASE_ASSERT(aLength <= kMax, "string is too large");
  }
  constexpr nsTStringLengthStorage& operator=(size_t aLength) {
    MOZ_RELEASE_ASSERT(aLength <= kMax, "string is too large");
    mLength = static_cast<uint32_t>(aLength);
    return *this;
  }
  MOZ_IMPLICIT constexpr operator size_t() const { return mLength; }

 private:
  uint32_t mLength = 0;
};

// nsTStringRepr defines a string's memory layout and some accessor methods.
// This class exists so that nsTLiteralString can avoid inheriting
// nsTSubstring's destructor. All methods on this class must be const because
// literal strings are not writable.
//
// This class is an implementation detail and should not be instantiated
// directly, nor used in any way outside of the string code itself. It is
// buried in a namespace to discourage its use in function parameters.
// If you need to take a parameter, use [const] ns[C]Substring&.
// If you need to instantiate a string, use ns[C]String or descendents.
//
// NAMES:
//   nsStringRepr for wide characters
//   nsCStringRepr for narrow characters
template <typename T>
class nsTStringRepr {
 public:
  typedef mozilla::fallible_t fallible_t;

  typedef T char_type;

  typedef nsCharTraits<char_type> char_traits;
  typedef typename char_traits::incompatible_char_type incompatible_char_type;

  typedef nsTStringRepr<T> self_type;
  typedef self_type base_string_type;

  typedef nsTSubstring<T> substring_type;
  typedef nsTSubstringTuple<T> substring_tuple_type;

  typedef nsReadingIterator<char_type> const_iterator;
  typedef char_type* iterator;

  typedef nsTStringComparator<char_type> comparator_type;

  typedef const char_type* const_char_iterator;

  typedef std::basic_string_view<char_type> string_view;

  typedef size_t index_type;
  typedef size_t size_type;

  // These are only for internal use within the string classes:
  typedef StringDataFlags DataFlags;
  typedef StringClassFlags ClassFlags;
  typedef nsTStringLengthStorage<T> LengthStorage;

  // Reading iterators.
  constexpr const_char_iterator BeginReading() const { return mData; }
  constexpr const_char_iterator EndReading() const { return mData + mLength; }

  // Deprecated reading iterators.
  const_iterator& BeginReading(const_iterator& aIter) const {
    aIter.mStart = mData;
    aIter.mEnd = mData + mLength;
    aIter.mPosition = aIter.mStart;
    return aIter;
  }

  const_iterator& EndReading(const_iterator& aIter) const {
    aIter.mStart = mData;
    aIter.mEnd = mData + mLength;
    aIter.mPosition = aIter.mEnd;
    return aIter;
  }

  const_char_iterator& BeginReading(const_char_iterator& aIter) const {
    return aIter = mData;
  }

  const_char_iterator& EndReading(const_char_iterator& aIter) const {
    return aIter = mData + mLength;
  }

  // Accessors.
  template <typename U, typename Dummy>
  struct raw_type {
    typedef const U* type;
  };
#if defined(MOZ_USE_CHAR16_WRAPPER)
  template <typename Dummy>
  struct raw_type<char16_t, Dummy> {
    typedef char16ptr_t type;
  };
#endif

  // Returns pointer to string data (not necessarily null-terminated)
  constexpr typename raw_type<T, int>::type Data() const { return mData; }

  constexpr size_type Length() const { return static_cast<size_type>(mLength); }

  constexpr string_view View() const { return string_view(Data(), Length()); }

  constexpr operator string_view() const { return View(); }

  constexpr DataFlags GetDataFlags() const { return mDataFlags; }

  constexpr bool IsEmpty() const { return mLength == 0; }

  constexpr bool IsLiteral() const {
    return !!(mDataFlags & DataFlags::LITERAL);
  }

  constexpr bool IsVoid() const { return !!(mDataFlags & DataFlags::VOIDED); }

  constexpr bool IsTerminated() const {
    return !!(mDataFlags & DataFlags::TERMINATED);
  }

  constexpr char_type CharAt(index_type aIndex) const {
    NS_ASSERTION(aIndex < Length(), "index exceeds allowable range");
    return mData[aIndex];
  }

  constexpr char_type operator[](index_type aIndex) const {
    return CharAt(aIndex);
  }

  char_type First() const;

  char_type Last() const;

  // Equality.
  bool NS_FASTCALL Equals(const self_type&) const;
  bool NS_FASTCALL Equals(const self_type&, comparator_type) const;

  bool NS_FASTCALL Equals(const substring_tuple_type& aTuple) const;
  bool NS_FASTCALL Equals(const substring_tuple_type& aTuple,
                          comparator_type) const;

  bool NS_FASTCALL Equals(const char_type* aData) const;
  bool NS_FASTCALL Equals(const char_type* aData, comparator_type) const;

  /**
   * Compare this string and another ASCII-case-insensitively.
   *
   * This method is similar to `LowerCaseEqualsASCII` however both strings are
   * lowercased, meaning that `aString` need not be all lowercase.
   *
   * @param   aString is the string to check
   * @return  boolean
   */
  bool EqualsIgnoreCase(const std::string_view& aString) const;

#if defined(MOZ_USE_CHAR16_WRAPPER)
  template <typename Q = T, typename EnableIfChar16 = Char16OnlyT<Q>>
  bool NS_FASTCALL Equals(char16ptr_t aData) const {
    return Equals(static_cast<const char16_t*>(aData));
  }
  template <typename Q = T, typename EnableIfChar16 = Char16OnlyT<Q>>
  bool NS_FASTCALL Equals(char16ptr_t aData, comparator_type aComp) const {
    return Equals(static_cast<const char16_t*>(aData), aComp);
  }
#endif

  // An efficient comparison with ASCII that can be used even
  // for wide strings. Call this version when you know the
  // length of 'data'.
  bool NS_FASTCALL EqualsASCII(const char* aData, size_type aLen) const;
  // An efficient comparison with ASCII that can be used even
  // for wide strings. Call this version when 'data' is
  // null-terminated.
  bool NS_FASTCALL EqualsASCII(const char* aData) const;

  // An efficient comparison with Latin1 characters that can be used even for
  // wide strings.
  bool EqualsLatin1(const char* aData, size_type aLength) const;

  // EqualsLiteral must ONLY be called with an actual literal string, or
  // a char array *constant* declared without an explicit size and with an
  // initializer that is a string literal or is otherwise null-terminated.
  // Use EqualsASCII for other char array variables.
  // (Although this method may happen to produce expected results for other
  // char arrays that have bound one greater than the sequence of interest,
  // such use is discouraged for reasons of readability and maintainability.)
  // The template trick to acquire the array bound at compile time without
  // using a macro is due to Corey Kosak, with much thanks.
  template <int N>
  inline bool EqualsLiteral(const char (&aStr)[N]) const {
    return EqualsASCII(aStr, N - 1);
  }

  // EqualsLiteral must ONLY be called with an actual literal string, or
  // a char array *constant* declared without an explicit size and with an
  // initializer that is a string literal or is otherwise null-terminated.
  // Use EqualsASCII for other char array variables.
  // (Although this method may happen to produce expected results for other
  // char arrays that have bound one greater than the sequence of interest,
  // such use is discouraged for reasons of readability and maintainability.)
  // The template trick to acquire the array bound at compile time without
  // using a macro is due to Corey Kosak, with much thanks.
  template <size_t N, typename = std::enable_if_t<!std::is_same_v<
                          const char (&)[N], const char_type (&)[N]>>>
  inline bool EqualsLiteral(const char_type (&aStr)[N]) const {
    return *this == nsTLiteralString<char_type>(aStr);
  }

  // The LowerCaseEquals methods compare the ASCII-lowercase version of
  // this string (lowercasing only ASCII uppercase characters) to some
  // ASCII/Literal string. The ASCII string is *not* lowercased for
  // you. If you compare to an ASCII or literal string that contains an
  // uppercase character, it is guaranteed to return false. We will
  // throw assertions too.
  bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData,
                                        size_type aLen) const;
  bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData) const;

  // LowerCaseEqualsLiteral must ONLY be called with an actual literal string,
  // or a char array *constant* declared without an explicit size and with an
  // initializer that is a string literal or is otherwise null-terminated.
  // Use LowerCaseEqualsASCII for other char array variables.
  // (Although this method may happen to produce expected results for other
  // char arrays that have bound one greater than the sequence of interest,
  // such use is discouraged for reasons of readability and maintainability.)
  template <int N>
  bool LowerCaseEqualsLiteral(const char (&aStr)[N]) const {
    return LowerCaseEqualsASCII(aStr, N - 1);
  }

  // Returns true if this string overlaps with the given string fragment.
  bool IsDependentOn(const char_type* aStart, const char_type* aEnd) const {
    // If it _isn't_ the case that one fragment starts after the other ends,
    // or ends before the other starts, then, they conflict:
    //
    //   !(f2.begin >= f1.aEnd || f2.aEnd <= f1.begin)
    //
    // Simplified, that gives us (To avoid relying on Undefined Behavior
    // from comparing pointers from different allocations (which in
    // principle gives the optimizer the permission to assume elsewhere
    // that the pointers are from the same allocation), the comparisons
    // are done on integers, which merely relies on implementation-defined
    // behavior of converting pointers to integers. std::less and
    // std::greater implementations don't actually provide the guarantees
    // that they should.):
    return (reinterpret_cast<uintptr_t>(aStart) <
                reinterpret_cast<uintptr_t>(mData + mLength) &&
            reinterpret_cast<uintptr_t>(aEnd) >
                reinterpret_cast<uintptr_t>(mData));
  }

  /**
   * Search for the given substring within this string.
   *
   * @param   aString is substring to be sought in this
   * @param   aOffset tells us where in this string to start searching
   * @return  offset in string, or kNotFound
   */
  int32_t Find(const string_view& aString, index_type aOffset = 0) const;

  // Previously there was an overload of `Find()` which took a bool second
  // argument.  Avoid issues by explicitly preventing that overload.
  // TODO: Remove this at some point.
  template <typename I,
            typename = std::enable_if_t<!std::is_same_v<I, index_type> &&
                                        std::is_convertible_v<I, index_type>>>
  int32_t Find(const string_view& aString, I aOffset) const {
    static_assert(!std::is_same_v<I, bool>, "offset must not be `bool`");
    return Find(aString, static_cast<index_type>(aOffset));
  }

  /**
   * Search for the given ASCII substring within this string, ignoring case.
   *
   * @param   aString is substring to be sought in this
   * @param   aOffset tells us where in this string to start searching
   * @return  offset in string, or kNotFound
   */
  int32_t LowerCaseFindASCII(const std::string_view& aString,
                             index_type aOffset = 0) const;

  /**
   * Scan the string backwards, looking for the given substring.
   *
   * @param   aString is substring to be sought in this
   * @return  offset in string, or kNotFound
   */
  int32_t RFind(const string_view& aString) const;

  size_type CountChar(char_type) const;

  bool Contains(char_type aChar) const { return FindChar(aChar) != kNotFound; }

  /**
   * Search for the first instance of a given char within this string
   *
   * @param   aChar is the character to search for
   * @param   aOffset tells us where in this string to start searching
   * @return  offset in string, or kNotFound
   */
  int32_t FindChar(char_type aChar, index_type aOffset = 0) const;

  /**
   * Search for the last instance of a given char within this string
   *
   * @param   aChar is the character to search for
   * @param   aOffset tells us where in this string to start searching
   * @return  offset in string, or kNotFound
   */
  int32_t RFindChar(char_type aChar, int32_t aOffset = -1) const;

  /**
   * This method searches this string for the first character found in
   * the given string.
   *
   * @param aSet contains set of chars to be found
   * @param aOffset tells us where in this string to start searching
   *        (counting from left)
   * @return offset in string, or kNotFound
   */

  int32_t FindCharInSet(const string_view& aSet, index_type aOffset = 0) const;

  /**
   * This method searches this string for the last character found in
   * the given string.
   *
   * @param aSet contains set of chars to be found
   * @param aOffset tells us where in this string to start searching
   *        (counting from left)
   * @return offset in string, or kNotFound
   */

  int32_t RFindCharInSet(const string_view& aSet, int32_t aOffset = -1) const;

  /**
   * Perform locale-independent string to double-precision float conversion.
   *
   * Leading spaces in the string will be ignored. The returned value will be
   * finite unless aErrorCode is set to a failed status.
   *
   * @param   aErrorCode will contain error if one occurs
   * @return  double-precision float rep of string value
   */
  double ToDouble(nsresult* aErrorCode) const;

  /**
   * Perform locale-independent string to single-precision float conversion.
   *
   * Leading spaces in the string will be ignored. The returned value will be
   * finite unless aErrorCode is set to a failed status.
   *
   * @param   aErrorCode will contain error if one occurs
   * @return  single-precision float rep of string value
   */
  float ToFloat(nsresult* aErrorCode) const;

  /**
   * Similar to above ToDouble and ToFloat but allows trailing characters that
   * are not converted.
   */
  double ToDoubleAllowTrailingChars(nsresult* aErrorCode) const;
  float ToFloatAllowTrailingChars(nsresult* aErrorCode) const;

 protected:
  nsTStringRepr() = delete;  // Never instantiate directly

  constexpr nsTStringRepr(char_type* aData, size_type aLength,
                          DataFlags aDataFlags, ClassFlags aClassFlags)
      : mData(aData),
        mLength(aLength),
        mDataFlags(aDataFlags),
        mClassFlags(aClassFlags) {}

  static constexpr size_type kMaxCapacity = LengthStorage::kMax;

  /**
   * Checks if the given capacity is valid for this string type.
   */
  [[nodiscard]] static constexpr bool CheckCapacity(size_type aCapacity) {
    return aCapacity <= kMaxCapacity;
  }

  char_type* mData;
  LengthStorage mLength;
  DataFlags mDataFlags;
  ClassFlags const mClassFlags;
};

extern template class nsTStringRepr<char>;
extern template class nsTStringRepr<char16_t>;

}  // namespace detail
}  // namespace mozilla

template <typename T>
int NS_FASTCALL Compare(const mozilla::detail::nsTStringRepr<T>& aLhs,
                        const mozilla::detail::nsTStringRepr<T>& aRhs,
                        nsTStringComparator<T> = nsTDefaultStringComparator<T>);

extern template int NS_FASTCALL Compare<char>(
    const mozilla::detail::nsTStringRepr<char>&,
    const mozilla::detail::nsTStringRepr<char>&, nsTStringComparator<char>);

extern template int NS_FASTCALL
Compare<char16_t>(const mozilla::detail::nsTStringRepr<char16_t>&,
                  const mozilla::detail::nsTStringRepr<char16_t>&,
                  nsTStringComparator<char16_t>);

template <typename T>
inline constexpr bool operator!=(
    const mozilla::detail::nsTStringRepr<T>& aLhs,
    const mozilla::detail::nsTStringRepr<T>& aRhs) {
  return !aLhs.Equals(aRhs);
}

template <typename T>
inline constexpr bool operator!=(const mozilla::detail::nsTStringRepr<T>& aLhs,
                                 const T* aRhs) {
  return !aLhs.Equals(aRhs);
}

template <typename T>
inline bool operator<(const mozilla::detail::nsTStringRepr<T>& aLhs,
                      const mozilla::detail::nsTStringRepr<T>& aRhs) {
  return Compare(aLhs, aRhs) < 0;
}

template <typename T>
inline bool operator<=(const mozilla::detail::nsTStringRepr<T>& aLhs,
                       const mozilla::detail::nsTStringRepr<T>& aRhs) {
  return Compare(aLhs, aRhs) <= 0;
}

template <typename T>
inline bool operator==(const mozilla::detail::nsTStringRepr<T>& aLhs,
                       const mozilla::detail::nsTStringRepr<T>& aRhs) {
  return aLhs.Equals(aRhs);
}

template <typename T>
inline bool operator==(const mozilla::detail::nsTStringRepr<T>& aLhs,
                       const T* aRhs) {
  return aLhs.Equals(aRhs);
}

template <typename T>
inline bool operator>=(const mozilla::detail::nsTStringRepr<T>& aLhs,
                       const mozilla::detail::nsTStringRepr<T>& aRhs) {
  return Compare(aLhs, aRhs) >= 0;
}

template <typename T>
inline bool operator>(const mozilla::detail::nsTStringRepr<T>& aLhs,
                      const mozilla::detail::nsTStringRepr<T>& aRhs) {
  return Compare(aLhs, aRhs) > 0;
}

#endif