diff options
Diffstat (limited to 'xpcom/string')
51 files changed, 11079 insertions, 0 deletions
diff --git a/xpcom/string/README.html b/xpcom/string/README.html new file mode 100644 index 0000000000..4a0927c65c --- /dev/null +++ b/xpcom/string/README.html @@ -0,0 +1,11 @@ +<html> +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> +<body> + <h1><span class="LXRSHORTDESC">managing sequences of characters</span></h1> +<p> + <span class="LXRLONGDESC"></span> +</p> +</body> +</html> diff --git a/xpcom/string/crashtests/1113005-frame.html b/xpcom/string/crashtests/1113005-frame.html new file mode 100644 index 0000000000..505fc22f1e --- /dev/null +++ b/xpcom/string/crashtests/1113005-frame.html @@ -0,0 +1,5 @@ +<form method=post enctype=multipart/form-data action="data:text/html,"><textarea name='file"; filename="filename.ext + '></textarea> +<script> +document.forms[0].submit(); +</script> diff --git a/xpcom/string/crashtests/1113005.html b/xpcom/string/crashtests/1113005.html new file mode 100644 index 0000000000..e377bb637f --- /dev/null +++ b/xpcom/string/crashtests/1113005.html @@ -0,0 +1,2 @@ +<!DOCTYPE html> +<iframe src="1113005-frame.html"></iframe> diff --git a/xpcom/string/crashtests/394275-1.html b/xpcom/string/crashtests/394275-1.html new file mode 100644 index 0000000000..b589c4d359 --- /dev/null +++ b/xpcom/string/crashtests/394275-1.html @@ -0,0 +1,9 @@ +<html> +<body> +<script> +style = document.createElement("style"); // eslint-disable-line no-undef +document.documentElement.appendChild(style); // eslint-disable-line no-undef +style.textContent = "tz\uDAB2 "; // eslint-disable-line no-undef +</script> +</body> +</html> diff --git a/xpcom/string/crashtests/395651-1.html b/xpcom/string/crashtests/395651-1.html new file mode 100644 index 0000000000..bbed371fd6 --- /dev/null +++ b/xpcom/string/crashtests/395651-1.html @@ -0,0 +1,30 @@ +<html> +<head> +<script> + +function X() { dump("X\n"); } +function Y() { dump("Y\n"); } + +function boom() { + dump("Start9\n"); + + var div = document.getElementById("v"); + + var textNode = document.createTextNode(String.fromCharCode(0xDAAF)); // high surrogate + div.appendChild(textNode); + + document.addEventListener("DOMCharacterDataModified", X, true); + textNode.data += "B"; + document.removeEventListener("DOMCharacterDataModified", X, true); + + document.addEventListener("DOMAttrModified", Y, true); + textNode.data += String.fromCharCode(0xDF53); // low surrogate + document.removeEventListener("DOMAttrModified", Y, true); +} + +</script> +</head> + +<body onload="boom();"><div id="v"></div></body> + +</html> diff --git a/xpcom/string/crashtests/crashtests.list b/xpcom/string/crashtests/crashtests.list new file mode 100644 index 0000000000..8562f1ad84 --- /dev/null +++ b/xpcom/string/crashtests/crashtests.list @@ -0,0 +1,3 @@ +load 394275-1.html +load 395651-1.html +load 1113005.html diff --git a/xpcom/string/moz.build b/xpcom/string/moz.build new file mode 100644 index 0000000000..01d8294faf --- /dev/null +++ b/xpcom/string/moz.build @@ -0,0 +1,53 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files("**"): + BUG_COMPONENT = ("Core", "String") + +EXPORTS += [ + "nsASCIIMask.h", + "nsAString.h", + "nsCharTraits.h", + "nsDependentString.h", + "nsDependentSubstring.h", + "nsLiteralString.h", + "nsPrintfCString.h", + "nsPromiseFlatString.h", + "nsReadableUtils.h", + "nsString.h", + "nsStringBuffer.h", + "nsStringFlags.h", + "nsStringFwd.h", + "nsStringIterator.h", + "nsTDependentString.h", + "nsTDependentSubstring.h", + "nsTextFormatter.h", + "nsTLiteralString.h", + "nsTPromiseFlatString.h", + "nsTString.h", + "nsTStringHasher.h", + "nsTStringRepr.h", + "nsTSubstring.h", + "nsTSubstringTuple.h", + "nsUTF8Utils.h", +] + +UNIFIED_SOURCES += [ + "nsASCIIMask.cpp", + "nsDependentString.cpp", + "nsDependentSubstring.cpp", + "nsPromiseFlatString.cpp", + "nsReadableUtils.cpp", + "nsString.cpp", + "nsStringComparator.cpp", + "nsStringObsolete.cpp", + "nsSubstring.cpp", + "nsTextFormatter.cpp", + "nsTSubstringTuple.cpp", + "precompiled_templates.cpp", +] + +FINAL_LIBRARY = "xul" diff --git a/xpcom/string/nsASCIIMask.cpp b/xpcom/string/nsASCIIMask.cpp new file mode 100644 index 0000000000..abcff70306 --- /dev/null +++ b/xpcom/string/nsASCIIMask.cpp @@ -0,0 +1,38 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsASCIIMask.h" + +namespace mozilla { + +constexpr bool TestWhitespace(char c) { + return c == '\f' || c == '\t' || c == '\r' || c == '\n' || c == ' '; +} +constexpr ASCIIMaskArray sWhitespaceMask = CreateASCIIMask(TestWhitespace); + +constexpr bool TestCRLF(char c) { return c == '\r' || c == '\n'; } +constexpr ASCIIMaskArray sCRLFMask = CreateASCIIMask(TestCRLF); + +constexpr bool TestCRLFTab(char c) { + return c == '\r' || c == '\n' || c == '\t'; +} +constexpr ASCIIMaskArray sCRLFTabMask = CreateASCIIMask(TestCRLFTab); + +constexpr bool TestZeroToNine(char c) { + return c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || + c == '6' || c == '7' || c == '8' || c == '9'; +} +constexpr ASCIIMaskArray sZeroToNineMask = CreateASCIIMask(TestZeroToNine); + +const ASCIIMaskArray& ASCIIMask::MaskWhitespace() { return sWhitespaceMask; } + +const ASCIIMaskArray& ASCIIMask::MaskCRLF() { return sCRLFMask; } + +const ASCIIMaskArray& ASCIIMask::MaskCRLFTab() { return sCRLFTabMask; } + +const ASCIIMaskArray& ASCIIMask::Mask0to9() { return sZeroToNineMask; } + +} // namespace mozilla diff --git a/xpcom/string/nsASCIIMask.h b/xpcom/string/nsASCIIMask.h new file mode 100644 index 0000000000..be38b728e8 --- /dev/null +++ b/xpcom/string/nsASCIIMask.h @@ -0,0 +1,69 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsASCIIMask_h_ +#define nsASCIIMask_h_ + +#include <array> +#include <utility> + +#include "mozilla/Attributes.h" + +typedef std::array<bool, 128> ASCIIMaskArray; + +namespace mozilla { + +// Boolean arrays, fixed size and filled in at compile time, meant to +// record something about each of the (standard) ASCII characters. +// No extended ASCII for now, there has been no use case. +// If you have loops that go through a string character by character +// and test for equality to a certain set of characters before deciding +// on a course of action, chances are building up one of these arrays +// and using it is going to be faster, especially if the set of +// characters is more than one long, and known at compile time. +class ASCIIMask { + public: + // Preset masks for some common character groups + // When testing, you must check if the index is < 128 or use IsMasked() + // + // if (someChar < 128 && MaskCRLF()[someChar]) this is \r or \n + + static const ASCIIMaskArray& MaskCRLF(); + static const ASCIIMaskArray& Mask0to9(); + static const ASCIIMaskArray& MaskCRLFTab(); + static const ASCIIMaskArray& MaskWhitespace(); + + static MOZ_ALWAYS_INLINE bool IsMasked(const ASCIIMaskArray& aMask, + uint32_t aChar) { + return aChar < 128 && aMask[aChar]; + } +}; + +// Outside of the preset ones, use these templates to create more masks. +// +// The example creation will look like this: +// +// constexpr bool TestABC(char c) { return c == 'A' || c == 'B' || c == 'C'; } +// constexpr std::array<bool, 128> sABCMask = CreateASCIIMask(TestABC); +// ... +// if (someChar < 128 && sABCMask[someChar]) this is A or B or C + +namespace details { +template <typename F, size_t... Indices> +constexpr std::array<bool, 128> CreateASCIIMask( + F fun, std::index_sequence<Indices...>) { + return {{fun(Indices)...}}; +} +} // namespace details + +template <typename F> +constexpr std::array<bool, 128> CreateASCIIMask(F fun) { + return details::CreateASCIIMask(fun, std::make_index_sequence<128>{}); +} + +} // namespace mozilla + +#endif // nsASCIIMask_h_ diff --git a/xpcom/string/nsAString.h b/xpcom/string/nsAString.h new file mode 100644 index 0000000000..ca0875b5d3 --- /dev/null +++ b/xpcom/string/nsAString.h @@ -0,0 +1,40 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsAString_h___ +#define nsAString_h___ + +#include "nsStringFwd.h" +#include "nsStringIterator.h" +#include "mozilla/TypedEnumBits.h" + +#include <string.h> +#include <stdarg.h> + +#define kNotFound -1 + +#include "nsStringFlags.h" +#include "nsTStringRepr.h" +#include "nsTSubstring.h" +#include "nsTSubstringTuple.h" + +/** + * ASCII case-insensitive comparator. (for Unicode case-insensitive + * comparision, see nsUnicharUtils.h) + */ +int nsCaseInsensitiveCStringComparator(const char*, const char*, uint32_t, + uint32_t); + +class nsCaseInsensitiveCStringArrayComparator { + public: + template <class A, class B> + bool Equals(const A& aStrA, const B& aStrB) const { + return aStrA.Equals(aStrB, nsCaseInsensitiveCStringComparator); + } +}; + +#endif // !defined(nsAString_h___) diff --git a/xpcom/string/nsCharTraits.h b/xpcom/string/nsCharTraits.h new file mode 100644 index 0000000000..9941d4996d --- /dev/null +++ b/xpcom/string/nsCharTraits.h @@ -0,0 +1,487 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsCharTraits_h___ +#define nsCharTraits_h___ + +#include <ctype.h> // for |EOF|, |WEOF| +#include <stdint.h> // for |uint32_t| +#include <string.h> // for |memcpy|, et al +#include "mozilla/MemoryChecking.h" + +// This file may be used (through nsUTF8Utils.h) from non-XPCOM code, in +// particular the standalone software updater. In that case stub out +// the macros provided by nsDebug.h which are only usable when linking XPCOM + +#ifdef NS_NO_XPCOM +# define NS_WARNING(msg) +# define NS_ASSERTION(cond, msg) +# define NS_ERROR(msg) +#else +# include "nsDebug.h" // for NS_ASSERTION +#endif + +/* + * Some macros for converting char16_t (UTF-16) to and from Unicode scalar + * values. + * + * Note that UTF-16 represents all Unicode scalar values up to U+10FFFF by + * using "surrogate pairs". These consist of a high surrogate, i.e. a code + * point in the range U+D800 - U+DBFF, and a low surrogate, i.e. a code point + * in the range U+DC00 - U+DFFF, like this: + * + * U+D800 U+DC00 = U+10000 + * U+D800 U+DC01 = U+10001 + * ... + * U+DBFF U+DFFE = U+10FFFE + * U+DBFF U+DFFF = U+10FFFF + * + * These surrogate code points U+D800 - U+DFFF are not themselves valid Unicode + * scalar values and are not well-formed UTF-16 except as high-surrogate / + * low-surrogate pairs. + */ + +#define PLANE1_BASE uint32_t(0x00010000) +// High surrogates are in the range 0xD800 -- OxDBFF +#define NS_IS_HIGH_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xD800) +// Low surrogates are in the range 0xDC00 -- 0xDFFF +#define NS_IS_LOW_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xDC00) +// Easier to type than NS_IS_HIGH_SURROGATE && NS_IS_LOW_SURROGATE +#define NS_IS_SURROGATE_PAIR(h, l) \ + (NS_IS_HIGH_SURROGATE(h) && NS_IS_LOW_SURROGATE(l)) +// Faster than testing NS_IS_HIGH_SURROGATE || NS_IS_LOW_SURROGATE +#define IS_SURROGATE(u) ((uint32_t(u) & 0xFFFFF800) == 0xD800) + +// Everything else is not a surrogate: 0x000 -- 0xD7FF, 0xE000 -- 0xFFFF + +// N = (H - 0xD800) * 0x400 + 0x10000 + (L - 0xDC00) +// I wonder whether we could somehow assert that H is a high surrogate +// and L is a low surrogate +#define SURROGATE_TO_UCS4(h, l) \ + (((uint32_t(h) & 0x03FF) << 10) + (uint32_t(l) & 0x03FF) + PLANE1_BASE) + +// Extract surrogates from a UCS4 char +// Reference: the Unicode standard 4.0, section 3.9 +// Since (c - 0x10000) >> 10 == (c >> 10) - 0x0080 and +// 0xD7C0 == 0xD800 - 0x0080, +// ((c - 0x10000) >> 10) + 0xD800 can be simplified to +#define H_SURROGATE(c) char16_t(char16_t(uint32_t(c) >> 10) + char16_t(0xD7C0)) +// where it's to be noted that 0xD7C0 is not bitwise-OR'd +// but added. + +// Since 0x10000 & 0x03FF == 0, +// (c - 0x10000) & 0x03FF == c & 0x03FF so that +// ((c - 0x10000) & 0x03FF) | 0xDC00 is equivalent to +#define L_SURROGATE(c) \ + char16_t(char16_t(uint32_t(c) & uint32_t(0x03FF)) | char16_t(0xDC00)) + +#define IS_IN_BMP(ucs) (uint32_t(ucs) < PLANE1_BASE) +#define UCS2_REPLACEMENT_CHAR char16_t(0xFFFD) + +#define UCS_END uint32_t(0x00110000) +#define IS_VALID_CHAR(c) ((uint32_t(c) < UCS_END) && !IS_SURROGATE(c)) +#define ENSURE_VALID_CHAR(c) (IS_VALID_CHAR(c) ? (c) : UCS2_REPLACEMENT_CHAR) + +template <class CharT> +struct nsCharTraits {}; + +template <> +struct nsCharTraits<char16_t> { + typedef char16_t char_type; + typedef uint16_t unsigned_char_type; + typedef char incompatible_char_type; + + static char_type* const sEmptyBuffer; + + // integer representation of characters: + typedef int int_type; + + static char_type to_char_type(int_type aChar) { return char_type(aChar); } + + static int_type to_int_type(char_type aChar) { + return int_type(static_cast<unsigned_char_type>(aChar)); + } + + static bool eq_int_type(int_type aLhs, int_type aRhs) { return aLhs == aRhs; } + + // |char_type| comparisons: + + static bool eq(char_type aLhs, char_type aRhs) { return aLhs == aRhs; } + + static bool lt(char_type aLhs, char_type aRhs) { return aLhs < aRhs; } + + // operations on s[n] arrays: + + static char_type* move(char_type* aStr1, const char_type* aStr2, size_t aN) { + return static_cast<char_type*>( + memmove(aStr1, aStr2, aN * sizeof(char_type))); + } + + static char_type* copy(char_type* aStr1, const char_type* aStr2, size_t aN) { + return static_cast<char_type*>( + memcpy(aStr1, aStr2, aN * sizeof(char_type))); + } + + static void uninitialize(char_type* aStr, size_t aN) { +#ifdef DEBUG + memset(aStr, 0xE4, aN * sizeof(char_type)); +#endif + MOZ_MAKE_MEM_UNDEFINED(aStr, aN * sizeof(char_type)); + } + + static char_type* copyASCII(char_type* aStr1, const char* aStr2, size_t aN) { + for (char_type* s = aStr1; aN--; ++s, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + *s = static_cast<char_type>(*aStr2); + } + return aStr1; + } + + static int compare(const char_type* aStr1, const char_type* aStr2, + size_t aN) { + for (; aN--; ++aStr1, ++aStr2) { + if (!eq(*aStr1, *aStr2)) { + return to_int_type(*aStr1) - to_int_type(*aStr2); + } + } + + return 0; + } + + static int compareASCII(const char_type* aStr1, const char* aStr2, + size_t aN) { + for (; aN--; ++aStr1, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + if (!eq_int_type(to_int_type(*aStr1), + to_int_type(static_cast<char_type>(*aStr2)))) { + return to_int_type(*aStr1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + return 0; + } + + static bool equalsLatin1(const char_type* aStr1, const char* aStr2, + const size_t aN) { + for (size_t i = aN; i > 0; --i, ++aStr1, ++aStr2) { + if (*aStr1 != static_cast<char_type>(*aStr2)) { + return false; + } + } + + return true; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int compareASCIINullTerminated(const char_type* aStr1, size_t aN, + const char* aStr2) { + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + if (!eq_int_type(to_int_type(*aStr1), + to_int_type(static_cast<char_type>(*aStr2)))) { + return to_int_type(*aStr1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + /** + * Convert c to its lower-case form, but only if c is in the ASCII + * range. Otherwise leave it alone. + */ + static char_type ASCIIToLower(char_type aChar) { + if (aChar >= 'A' && aChar <= 'Z') { + return char_type(aChar + ('a' - 'A')); + } + + return aChar; + } + + static int compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, + size_t aN) { + for (; aN--; ++aStr1, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != static_cast<char_type>(*aStr2)) { + return to_int_type(lower_s1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + return 0; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int compareLowerCaseToASCIINullTerminated(const char_type* aStr1, + size_t aN, + const char* aStr2) { + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != static_cast<char_type>(*aStr2)) { + return to_int_type(lower_s1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + static size_t length(const char_type* aStr) { + size_t result = 0; + while (!eq(*aStr++, char_type(0))) { + ++result; + } + return result; + } + + static const char_type* find(const char_type* aStr, size_t aN, + char_type aChar) { + while (aN--) { + if (eq(*aStr, aChar)) { + return aStr; + } + ++aStr; + } + + return 0; + } +}; + +template <> +struct nsCharTraits<char> { + typedef char char_type; + typedef unsigned char unsigned_char_type; + typedef char16_t incompatible_char_type; + + static char_type* const sEmptyBuffer; + + // integer representation of characters: + + typedef int int_type; + + static char_type to_char_type(int_type aChar) { return char_type(aChar); } + + static int_type to_int_type(char_type aChar) { + return int_type(static_cast<unsigned_char_type>(aChar)); + } + + static bool eq_int_type(int_type aLhs, int_type aRhs) { return aLhs == aRhs; } + + // |char_type| comparisons: + + static bool eq(char_type aLhs, char_type aRhs) { return aLhs == aRhs; } + + static bool lt(char_type aLhs, char_type aRhs) { return aLhs < aRhs; } + + // operations on s[n] arrays: + + static char_type* move(char_type* aStr1, const char_type* aStr2, size_t aN) { + return static_cast<char_type*>( + memmove(aStr1, aStr2, aN * sizeof(char_type))); + } + + static char_type* copy(char_type* aStr1, const char_type* aStr2, size_t aN) { + return static_cast<char_type*>( + memcpy(aStr1, aStr2, aN * sizeof(char_type))); + } + + static void uninitialize(char_type* aStr, size_t aN) { +#ifdef DEBUG + memset(aStr, 0xE4, aN * sizeof(char_type)); +#endif + MOZ_MAKE_MEM_UNDEFINED(aStr, aN * sizeof(char_type)); + } + + static char_type* copyASCII(char_type* aStr1, const char* aStr2, size_t aN) { + return copy(aStr1, aStr2, aN); + } + + static int compare(const char_type* aStr1, const char_type* aStr2, + size_t aN) { + return memcmp(aStr1, aStr2, aN); + } + + static int compareASCII(const char_type* aStr1, const char* aStr2, + size_t aN) { +#ifdef DEBUG + for (size_t i = 0; i < aN; ++i) { + NS_ASSERTION(!(aStr2[i] & ~0x7F), "Unexpected non-ASCII character"); + } +#endif + return compare(aStr1, aStr2, aN); + } + + static bool equalsLatin1(const char_type* aStr1, const char* aStr2, + size_t aN) { + return memcmp(aStr1, aStr2, aN) == 0; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int compareASCIINullTerminated(const char_type* aStr1, size_t aN, + const char* aStr2) { + // can't use strcmp here because we don't want to stop when aStr1 + // contains a null + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + if (*aStr1 != *aStr2) { + return to_int_type(*aStr1) - to_int_type(*aStr2); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + /** + * Convert c to its lower-case form, but only if c is ASCII. + */ + static char_type ASCIIToLower(char_type aChar) { + if (aChar >= 'A' && aChar <= 'Z') { + return char_type(aChar + ('a' - 'A')); + } + + return aChar; + } + + static int compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, + size_t aN) { + for (; aN--; ++aStr1, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != *aStr2) { + return to_int_type(lower_s1) - to_int_type(*aStr2); + } + } + return 0; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int compareLowerCaseToASCIINullTerminated(const char_type* aStr1, + size_t aN, + const char* aStr2) { + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != *aStr2) { + return to_int_type(lower_s1) - to_int_type(*aStr2); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + static size_t length(const char_type* aStr) { return strlen(aStr); } + + static const char_type* find(const char_type* aStr, size_t aN, + char_type aChar) { + return reinterpret_cast<const char_type*>( + memchr(aStr, to_int_type(aChar), aN)); + } +}; + +template <class InputIterator> +struct nsCharSourceTraits { + typedef typename InputIterator::difference_type difference_type; + + static uint32_t readable_distance(const InputIterator& aFirst, + const InputIterator& aLast) { + // assumes single fragment + return uint32_t(aLast.get() - aFirst.get()); + } + + static const typename InputIterator::value_type* read( + const InputIterator& aIter) { + return aIter.get(); + } + + static void advance(InputIterator& aStr, difference_type aN) { + aStr.advance(aN); + } +}; + +template <class CharT> +struct nsCharSourceTraits<CharT*> { + typedef ptrdiff_t difference_type; + + static uint32_t readable_distance(CharT* aStr) { + return uint32_t(nsCharTraits<CharT>::length(aStr)); + // return numeric_limits<uint32_t>::max(); + } + + static uint32_t readable_distance(CharT* aFirst, CharT* aLast) { + return uint32_t(aLast - aFirst); + } + + static const CharT* read(CharT* aStr) { return aStr; } + + static void advance(CharT*& aStr, difference_type aN) { aStr += aN; } +}; + +template <class OutputIterator> +struct nsCharSinkTraits { + static void write(OutputIterator& aIter, + const typename OutputIterator::value_type* aStr, + uint32_t aN) { + aIter.write(aStr, aN); + } +}; + +template <class CharT> +struct nsCharSinkTraits<CharT*> { + static void write(CharT*& aIter, const CharT* aStr, uint32_t aN) { + nsCharTraits<CharT>::move(aIter, aStr, aN); + aIter += aN; + } +}; + +#endif // !defined(nsCharTraits_h___) diff --git a/xpcom/string/nsDependentString.cpp b/xpcom/string/nsDependentString.cpp new file mode 100644 index 0000000000..d9707a0356 --- /dev/null +++ b/xpcom/string/nsDependentString.cpp @@ -0,0 +1,10 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsDependentString.h" +#include "nsAlgorithm.h" + +#include "nsTDependentString.cpp" diff --git a/xpcom/string/nsDependentString.h b/xpcom/string/nsDependentString.h new file mode 100644 index 0000000000..4896c8d086 --- /dev/null +++ b/xpcom/string/nsDependentString.h @@ -0,0 +1,15 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsDependentString_h___ +#define nsDependentString_h___ + +#include "nsString.h" +#include "nsDebug.h" + +#include "nsTDependentString.h" + +#endif /* !defined(nsDependentString_h___) */ diff --git a/xpcom/string/nsDependentSubstring.cpp b/xpcom/string/nsDependentSubstring.cpp new file mode 100644 index 0000000000..3057c0692a --- /dev/null +++ b/xpcom/string/nsDependentSubstring.cpp @@ -0,0 +1,10 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsDependentSubstring.h" +#include "nsAlgorithm.h" + +#include "nsTDependentSubstring.cpp" diff --git a/xpcom/string/nsDependentSubstring.h b/xpcom/string/nsDependentSubstring.h new file mode 100644 index 0000000000..cb6cef5d77 --- /dev/null +++ b/xpcom/string/nsDependentSubstring.h @@ -0,0 +1,13 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsDependentSubstring_h___ +#define nsDependentSubstring_h___ + +#include "nsAString.h" +#include "nsTDependentSubstring.h" + +#endif /* !defined(nsDependentSubstring_h___) */ diff --git a/xpcom/string/nsLiteralString.h b/xpcom/string/nsLiteralString.h new file mode 100644 index 0000000000..f982724ce4 --- /dev/null +++ b/xpcom/string/nsLiteralString.h @@ -0,0 +1,31 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsLiteralString_h___ +#define nsLiteralString_h___ + +#include "nscore.h" +#include "nsString.h" + +#include "nsTLiteralString.h" + +#include "mozilla/Char16.h" + +#define NS_CSTRING_LITERAL_AS_STRING_LITERAL(s) u"" s + +#define NS_LITERAL_STRING_FROM_CSTRING(s) \ + static_cast<const nsLiteralString&>( \ + nsLiteralString(NS_CSTRING_LITERAL_AS_STRING_LITERAL(s))) + +constexpr auto operator""_ns(const char* aStr, std::size_t aLen) { + return nsLiteralCString{aStr, aLen}; +} + +constexpr auto operator""_ns(const char16_t* aStr, std::size_t aLen) { + return nsLiteralString{aStr, aLen}; +} + +#endif /* !defined(nsLiteralString_h___) */ diff --git a/xpcom/string/nsPrintfCString.h b/xpcom/string/nsPrintfCString.h new file mode 100644 index 0000000000..f722888705 --- /dev/null +++ b/xpcom/string/nsPrintfCString.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsPrintfCString_h___ +#define nsPrintfCString_h___ + +#include "nsString.h" + +/** + * nsPrintfCString lets you create a nsCString using a printf-style format + * string. For example: + * + * NS_WARNING(nsPrintfCString("Unexpected value: %f", 13.917).get()); + * + * nsPrintfCString has a small built-in auto-buffer. For larger strings, it + * will allocate on the heap. + * + * See also nsCString::AppendPrintf(). + */ +class nsPrintfCString : public nsAutoCStringN<16> { + typedef nsCString string_type; + + public: + explicit nsPrintfCString(const char_type* aFormat, ...) + MOZ_FORMAT_PRINTF(2, 3) { + va_list ap; + va_start(ap, aFormat); + AppendVprintf(aFormat, ap); + va_end(ap); + } +}; + +/** + * + * + * nsVPrintfCString is like nsPrinfCString but is created using vprintf style + * args. This is useful for functions that have already received variadic + * arguments and want to create a nsPrintfCString. For example: + * + * void LogToSeveralLocations(const char* aFormat,...) { + * va_list ap; + * va_start(ap, aFormat); + * nsPrintfCString logString(aFormat, ap); + * va_end(ap); + * // Use logString + * } + * + * See also nsCString::AppendVprintf(). + */ + +class nsVprintfCString : public nsAutoCStringN<16> { + typedef nsCString string_type; + + public: + nsVprintfCString(const char_type* aFormat, va_list aArgs) + MOZ_FORMAT_PRINTF(2, 0) { + AppendVprintf(aFormat, aArgs); + } +}; + +#endif // !defined(nsPrintfCString_h___) diff --git a/xpcom/string/nsPromiseFlatString.cpp b/xpcom/string/nsPromiseFlatString.cpp new file mode 100644 index 0000000000..e21812061f --- /dev/null +++ b/xpcom/string/nsPromiseFlatString.cpp @@ -0,0 +1,9 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsPromiseFlatString.h" + +#include "nsTPromiseFlatString.cpp" diff --git a/xpcom/string/nsPromiseFlatString.h b/xpcom/string/nsPromiseFlatString.h new file mode 100644 index 0000000000..98541ceb4a --- /dev/null +++ b/xpcom/string/nsPromiseFlatString.h @@ -0,0 +1,14 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsPromiseFlatString_h___ +#define nsPromiseFlatString_h___ + +#include "nsString.h" + +#include "nsTPromiseFlatString.h" + +#endif /* !defined(nsPromiseFlatString_h___) */ diff --git a/xpcom/string/nsReadableUtils.cpp b/xpcom/string/nsReadableUtils.cpp new file mode 100644 index 0000000000..937888c408 --- /dev/null +++ b/xpcom/string/nsReadableUtils.cpp @@ -0,0 +1,631 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsReadableUtils.h" + +#include <algorithm> + +#include "mozilla/CheckedInt.h" +#include "mozilla/Utf8.h" + +#include "nscore.h" +#include "nsMemory.h" +#include "nsString.h" +#include "nsTArray.h" +#include "nsUTF8Utils.h" + +using mozilla::Span; + +/** + * A helper function that allocates a buffer of the desired character type big + * enough to hold a copy of the supplied string (plus a zero terminator). + * + * @param aSource an string you will eventually be making a copy of + * @return a new buffer which you must free with |free|. + * + */ +template <class FromStringT, class CharT> +inline CharT* AllocateStringCopy(const FromStringT& aSource, CharT*) { + return static_cast<CharT*>( + malloc((size_t(aSource.Length()) + 1) * sizeof(CharT))); +} + +char* ToNewCString(const nsAString& aSource) { + char* str = ToNewCString(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char* ToNewCString(const nsAString& aSource, + const mozilla::fallible_t& aFallible) { + char* dest = AllocateStringCopy(aSource, (char*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + LossyConvertUtf16toLatin1(aSource, Span(dest, len)); + dest[len] = 0; + return dest; +} + +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count, + const mozilla::fallible_t& aFallible) { + auto len = aSource.Length(); + // The uses of this function seem temporary enough that it's not + // worthwhile to be fancy about the allocation size. Let's just use + // the worst case. + // Times 3 plus 1, because ConvertUTF16toUTF8 requires times 3 and + // then we have the terminator. + // Using CheckedInt<uint32_t>, because aUTF8Count is uint32_t* for + // historical reasons. + mozilla::CheckedInt<uint32_t> destLen(len); + destLen *= 3; + destLen += 1; + if (!destLen.isValid()) { + return nullptr; + } + size_t destLenVal = destLen.value(); + char* dest = static_cast<char*>(malloc(destLenVal)); + if (!dest) { + return nullptr; + } + + size_t written = ConvertUtf16toUtf8(aSource, Span(dest, destLenVal)); + dest[written] = 0; + + if (aUTF8Count) { + *aUTF8Count = written; + } + + return dest; +} + +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) { + char* str = ToNewUTF8String(aSource, aUTF8Count, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char* ToNewCString(const nsACString& aSource) { + char* str = ToNewCString(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char* ToNewCString(const nsACString& aSource, + const mozilla::fallible_t& aFallible) { + // no conversion needed, just allocate a buffer of the correct length and copy + // into it + + char* dest = AllocateStringCopy(aSource, (char*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + memcpy(dest, aSource.BeginReading(), len * sizeof(char)); + dest[len] = 0; + return dest; +} + +char16_t* ToNewUnicode(const nsAString& aSource) { + char16_t* str = ToNewUnicode(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char16_t* ToNewUnicode(const nsAString& aSource, + const mozilla::fallible_t& aFallible) { + // no conversion needed, just allocate a buffer of the correct length and copy + // into it + + char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t)); + dest[len] = 0; + return dest; +} + +char16_t* ToNewUnicode(const nsACString& aSource) { + char16_t* str = ToNewUnicode(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char16_t* ToNewUnicode(const nsACString& aSource, + const mozilla::fallible_t& aFallible) { + char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + ConvertLatin1toUtf16(aSource, Span(dest, len)); + dest[len] = 0; + return dest; +} + +char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count, + const mozilla::fallible_t& aFallible) { + // Compute length plus one as required by ConvertUTF8toUTF16 + uint32_t lengthPlusOne = aSource.Length() + 1; // Can't overflow + + mozilla::CheckedInt<size_t> allocLength(lengthPlusOne); + // Add space for zero-termination + allocLength += 1; + // We need UTF-16 units + allocLength *= sizeof(char16_t); + + if (!allocLength.isValid()) { + return nullptr; + } + + char16_t* dest = (char16_t*)malloc(allocLength.value()); + if (!dest) { + return nullptr; + } + + size_t written = ConvertUtf8toUtf16(aSource, Span(dest, lengthPlusOne)); + dest[written] = 0; + + if (aUTF16Count) { + *aUTF16Count = written; + } + + return dest; +} + +char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) { + char16_t* str = UTF8ToNewUnicode(aSource, aUTF16Count, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, + char16_t* aDest, uint32_t aLength) { + MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length()); + memcpy(aDest, aSource.BeginReading() + aSrcOffset, + size_t(aLength) * sizeof(char16_t)); + return aDest; +} + +void ToUpperCase(nsACString& aCString) { + char* cp = aCString.BeginWriting(); + char* end = cp + aCString.Length(); + while (cp != end) { + char ch = *cp; + if (ch >= 'a' && ch <= 'z') { + *cp = ch - ('a' - 'A'); + } + ++cp; + } +} + +void ToUpperCase(const nsACString& aSource, nsACString& aDest) { + aDest.SetLength(aSource.Length()); + const char* src = aSource.BeginReading(); + const char* end = src + aSource.Length(); + char* dst = aDest.BeginWriting(); + while (src != end) { + char ch = *src; + if (ch >= 'a' && ch <= 'z') { + *dst = ch - ('a' - 'A'); + } else { + *dst = ch; + } + ++src; + ++dst; + } +} + +void ToLowerCase(nsACString& aCString) { + char* cp = aCString.BeginWriting(); + char* end = cp + aCString.Length(); + while (cp != end) { + char ch = *cp; + if (ch >= 'A' && ch <= 'Z') { + *cp = ch + ('a' - 'A'); + } + ++cp; + } +} + +void ToLowerCase(const nsACString& aSource, nsACString& aDest) { + aDest.SetLength(aSource.Length()); + const char* src = aSource.BeginReading(); + const char* end = src + aSource.Length(); + char* dst = aDest.BeginWriting(); + while (src != end) { + char ch = *src; + if (ch >= 'A' && ch <= 'Z') { + *dst = ch + ('a' - 'A'); + } else { + *dst = ch; + } + ++src; + ++dst; + } +} + +void ParseString(const nsACString& aSource, char aDelimiter, + nsTArray<nsCString>& aArray) { + nsACString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + + for (;;) { + nsACString::const_iterator delimiter = start; + FindCharInReadable(aDelimiter, delimiter, end); + + if (delimiter != start) { + aArray.AppendElement(Substring(start, delimiter)); + } + + if (delimiter == end) { + break; + } + start = ++delimiter; + if (start == end) { + break; + } + } +} + +template <class StringT, class IteratorT> +bool FindInReadable_Impl( + const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, + nsTStringComparator<typename StringT::char_type> aCompare) { + bool found_it = false; + + // only bother searching at all if we're given a non-empty range to search + if (aSearchStart != aSearchEnd) { + IteratorT aPatternStart, aPatternEnd; + aPattern.BeginReading(aPatternStart); + aPattern.EndReading(aPatternEnd); + + // outer loop keeps searching till we find it or run out of string to search + while (!found_it) { + // fast inner loop (that's what it's called, not what it is) looks for a + // potential match + while (aSearchStart != aSearchEnd && + aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) { + ++aSearchStart; + } + + // if we broke out of the `fast' loop because we're out of string ... + // we're done: no match + if (aSearchStart == aSearchEnd) { + break; + } + + // otherwise, we're at a potential match, let's see if we really hit one + IteratorT testPattern(aPatternStart); + IteratorT testSearch(aSearchStart); + + // slow inner loop verifies the potential match (found by the `fast' loop) + // at the current position + for (;;) { + // we already compared the first character in the outer loop, + // so we'll advance before the next comparison + ++testPattern; + ++testSearch; + + // if we verified all the way to the end of the pattern, then we found + // it! + if (testPattern == aPatternEnd) { + found_it = true; + aSearchEnd = testSearch; // return the exact found range through the + // parameters + break; + } + + // if we got to end of the string we're searching before we hit the end + // of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchEnd) { + aSearchStart = aSearchEnd; + break; + } + + // else if we mismatched ... it's time to advance to the next search + // position + // and get back into the `fast' loop + if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) { + ++aSearchStart; + break; + } + } + } + } + + return found_it; +} + +/** + * This searches the entire string from right to left, and returns the first + * match found, if any. + */ +template <class StringT, class IteratorT> +bool RFindInReadable_Impl( + const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, + nsTStringComparator<typename StringT::char_type> aCompare) { + IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; + aPattern.BeginReading(patternStart); + aPattern.EndReading(patternEnd); + + // Point to the last character in the pattern + --patternEnd; + // outer loop keeps searching till we run out of string to search + while (aSearchStart != searchEnd) { + // Point to the end position of the next possible match + --searchEnd; + + // Check last character, if a match, explore further from here + if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) { + // We're at a potential match, let's see if we really hit one + IteratorT testPattern(patternEnd); + IteratorT testSearch(searchEnd); + + // inner loop verifies the potential match at the current position + do { + // if we verified all the way to the end of the pattern, then we found + // it! + if (testPattern == patternStart) { + aSearchStart = testSearch; // point to start of match + aSearchEnd = ++searchEnd; // point to end of match + return true; + } + + // if we got to end of the string we're searching before we hit the end + // of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchStart) { + aSearchStart = aSearchEnd; + return false; + } + + // test previous character for a match + --testPattern; + --testSearch; + } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0); + } + } + + aSearchStart = aSearchEnd; + return false; +} + +bool FindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + nsStringComparator aComparator) { + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool FindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + nsCStringComparator aComparator) { + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool CaseInsensitiveFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd) { + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, + nsCaseInsensitiveCStringComparator); +} + +bool RFindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + const nsStringComparator aComparator) { + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool RFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + const nsCStringComparator aComparator) { + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, + const nsAString::const_iterator& aSearchEnd) { + int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char16_t* charFoundAt = + nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, + const nsACString::const_iterator& aSearchEnd) { + int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char* charFoundAt = + nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator aComparator) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator aComparator) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator aComparator) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len) + .Equals(aSubstring, aComparator); +} + +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator aComparator) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len) + .Equals(aSubstring, aComparator); +} + +static const char16_t empty_buffer[1] = {'\0'}; + +const nsString& EmptyString() { + static const nsDependentString sEmpty(empty_buffer); + + return sEmpty; +} + +const nsCString& EmptyCString() { + static const nsDependentCString sEmpty((const char*)empty_buffer); + + return sEmpty; +} + +const nsString& VoidString() { + static const nsString sNull(mozilla::detail::StringDataFlags::VOIDED); + + return sNull; +} + +const nsCString& VoidCString() { + static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED); + + return sNull; +} + +int32_t CompareUTF8toUTF16(const nsACString& aUTF8String, + const nsAString& aUTF16String, bool* aErr) { + const char* u8; + const char* u8end; + aUTF8String.BeginReading(u8); + aUTF8String.EndReading(u8end); + + const char16_t* u16; + const char16_t* u16end; + aUTF16String.BeginReading(u16); + aUTF16String.EndReading(u16end); + + for (;;) { + if (u8 == u8end) { + if (u16 == u16end) { + return 0; + } + return -1; + } + if (u16 == u16end) { + return 1; + } + // No need for ASCII optimization, since both NextChar() + // calls get inlined. + uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr); + uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr); + if (scalar16 == scalar8) { + continue; + } + if (scalar8 < scalar16) { + return -1; + } + return 1; + } +} + +void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) { + NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); + if (IS_IN_BMP(aSource)) { + aDest.Append(char16_t(aSource)); + } else { + aDest.Append(H_SURROGATE(aSource)); + aDest.Append(L_SURROGATE(aSource)); + } +} diff --git a/xpcom/string/nsReadableUtils.h b/xpcom/string/nsReadableUtils.h new file mode 100644 index 0000000000..219b32d0ee --- /dev/null +++ b/xpcom/string/nsReadableUtils.h @@ -0,0 +1,610 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsReadableUtils_h___ +#define nsReadableUtils_h___ + +/** + * I guess all the routines in this file are all mis-named. + * According to our conventions, they should be |NS_xxx|. + */ + +#include "mozilla/Assertions.h" +#include "nsAString.h" +#include "mozilla/TextUtils.h" + +#include "nsTArrayForwardDeclare.h" + +// From the nsstring crate +extern "C" { +bool nsstring_fallible_append_utf8_impl(nsAString* aThis, const char* aOther, + size_t aOtherLen, size_t aOldLen); + +bool nsstring_fallible_append_latin1_impl(nsAString* aThis, const char* aOther, + size_t aOtherLen, size_t aOldLen, + bool aAllowShrinking); + +bool nscstring_fallible_append_utf16_to_utf8_impl(nsACString* aThis, + const char16_t*, + size_t aOtherLen, + size_t aOldLen); + +bool nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString* aThis, + const char16_t*, + size_t aOtherLen, + size_t aOldLen, + bool aAllowShrinking); + +bool nscstring_fallible_append_utf8_to_latin1_lossy_check( + nsACString* aThis, const nsACString* aOther, size_t aOldLen); + +bool nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis, + const nsACString* aOther, + size_t aOldLen); +} + +inline size_t Distance(const nsReadingIterator<char16_t>& aStart, + const nsReadingIterator<char16_t>& aEnd) { + MOZ_ASSERT(aStart.get() <= aEnd.get()); + return static_cast<size_t>(aEnd.get() - aStart.get()); +} + +inline size_t Distance(const nsReadingIterator<char>& aStart, + const nsReadingIterator<char>& aEnd) { + MOZ_ASSERT(aStart.get() <= aEnd.get()); + return static_cast<size_t>(aEnd.get() - aStart.get()); +} + +// NOTE: Operations that don't need an operand to be an XPCOM string +// are in mozilla/TextUtils.h and mozilla/Utf8.h. + +// UTF-8 to UTF-16 +// Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER. + +[[nodiscard]] inline bool CopyUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(), + aSource.Length(), 0); +} + +inline void CopyUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!CopyUTF8toUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(), + aSource.Length(), aDest.Length()); +} + +inline void AppendUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// Latin1 to UTF-16 +// Interpret each incoming unsigned byte value as a Unicode scalar value (not +// windows-1252!). The function names say "ASCII" instead of "Latin1" for +// legacy reasons. + +[[nodiscard]] inline bool CopyASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_latin1_impl(&aDest, aSource.Elements(), + aSource.Length(), 0, true); +} + +inline void CopyASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_latin1_impl( + &aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false); +} + +inline void AppendASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// UTF-16 to UTF-8 +// Unpaired surrogates are replaced with the REPLACEMENT CHARACTER. + +[[nodiscard]] inline bool CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_utf8_impl( + &aDest, aSource.Elements(), aSource.Length(), 0); +} + +inline void CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendUTF16toUTF8( + mozilla::Span<const char16_t> aSource, nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_utf8_impl( + &aDest, aSource.Elements(), aSource.Length(), aDest.Length()); +} + +inline void AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// UTF-16 to Latin1 +// If all code points in the input are below U+0100, represents each scalar +// value as an unsigned byte. (This is not windows-1252!) If there are code +// points above U+00FF, memory-safely produces garbage and will likely start +// asserting in future debug builds. The nature of the garbage may differ +// based on CPU architecture and must not be relied upon. The names say +// "ASCII" instead of "Latin1" for legacy reasons. + +[[nodiscard]] inline bool LossyCopyUTF16toASCII( + mozilla::Span<const char16_t> aSource, nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_latin1_lossy_impl( + &aDest, aSource.Elements(), aSource.Length(), 0, true); +} + +inline void LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!LossyCopyUTF16toASCII(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool LossyAppendUTF16toASCII( + mozilla::Span<const char16_t> aSource, nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_latin1_lossy_impl( + &aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false); +} + +inline void LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY( + !LossyAppendUTF16toASCII(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// Latin1 to UTF-8 +// Interpret each incoming unsigned byte value as a Unicode scalar value (not +// windows-1252!). +// If the input is ASCII, the heap-allocated nsStringBuffer is shared if +// possible. + +[[nodiscard]] inline bool CopyLatin1toUTF8(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource, 0); +} + +inline void CopyLatin1toUTF8(const nsACString& aSource, nsACString& aDest) { + if (MOZ_UNLIKELY(!CopyLatin1toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendLatin1toUTF8(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource, + aDest.Length()); +} + +inline void AppendLatin1toUTF8(const nsACString& aSource, nsACString& aDest) { + if (MOZ_UNLIKELY(!AppendLatin1toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// UTF-8 to Latin1 +// If all code points in the input are below U+0100, represents each scalar +// value as an unsigned byte. (This is not windows-1252!) If there are code +// points above U+00FF, memory-safely produces garbage in release builds and +// asserts in debug builds. The nature of the garbage may differ +// based on CPU architecture and must not be relied upon. +// If the input is ASCII, the heap-allocated nsStringBuffer is shared if +// possible. + +[[nodiscard]] inline bool LossyCopyUTF8toLatin1(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource, + 0); +} + +inline void LossyCopyUTF8toLatin1(const nsACString& aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!LossyCopyUTF8toLatin1(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool LossyAppendUTF8toLatin1(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource, + aDest.Length()); +} + +inline void LossyAppendUTF8toLatin1(const nsACString& aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY( + !LossyAppendUTF8toLatin1(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. + * Performs a conversion with LossyConvertUTF16toLatin1() writing into the + * newly-allocated buffer. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a 16-bit wide string + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewCString(const nsAString& aSource); + +/* A fallible version of ToNewCString. Returns nullptr on failure. */ +char* ToNewCString(const nsAString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource an 8-bit wide string + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewCString(const nsACString& aSource); + +/* A fallible version of ToNewCString. Returns nullptr on failure. */ +char* ToNewCString(const nsACString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. + * Performs an encoding conversion from a UTF-16 string to a UTF-8 string with + * unpaired surrogates replaced with the REPLACEMENT CHARACTER copying + * |aSource| to your new buffer. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a UTF-16 string (made of char16_t's) + * @param aUTF8Count the number of 8-bit units that was returned + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr); + +/* A fallible version of ToNewUTF8String. Returns nullptr on failure. */ +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Infallibly allocates and returns a new |char16_t| buffer which you must + * free with |free|. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a UTF-16 string + * @return a new |char16_t| buffer you must free with |free|. + */ +char16_t* ToNewUnicode(const nsAString& aSource); + +/* A fallible version of ToNewUnicode. Returns nullptr on failure. */ +char16_t* ToNewUnicode(const nsAString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Infallibly allocates and returns a new |char16_t| buffer which you must + * free with|free|. + * + * Performs an encoding conversion by 0-padding 8-bit wide characters up to + * 16-bits wide (i.e. Latin1 to UTF-16 conversion) while copying |aSource| + * to your new buffer. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a Latin1 string + * @return a new |char16_t| buffer you must free with |free|. + */ +char16_t* ToNewUnicode(const nsACString& aSource); + +/* A fallible version of ToNewUnicode. Returns nullptr on failure. */ +char16_t* ToNewUnicode(const nsACString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. Performs an encoding conversion from UTF-8 to UTF-16 + * while copying |aSource| to your new buffer. Malformed byte sequences + * are replaced with the REPLACEMENT CHARACTER. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource an 8-bit wide string, UTF-8 encoded + * @param aUTF16Count the number of 16-bit units that was returned + * @return a new |char16_t| buffer you must free with |free|. + * (UTF-16 encoded) + */ +char16_t* UTF8ToNewUnicode(const nsACString& aSource, + uint32_t* aUTF16Count = nullptr); + +/* A fallible version of UTF8ToNewUnicode. Returns nullptr on failure. */ +char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count, + const mozilla::fallible_t& aFallible); + +/** + * Copies |aLength| 16-bit code units from the start of |aSource| to the + * |char16_t| buffer |aDest|. + * + * After this operation |aDest| is not null terminated. + * + * @param aSource a UTF-16 string + * @param aSrcOffset start offset in the source string + * @param aDest a |char16_t| buffer + * @param aLength the number of 16-bit code units to copy + * @return pointer to destination buffer - identical to |aDest| + */ +char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, + char16_t* aDest, uint32_t aLength); + +/** + * Replaces unpaired surrogates with U+FFFD in the argument. + * + * Copies a shared string buffer or an otherwise read-only + * buffer only if there are unpaired surrogates. + */ +[[nodiscard]] inline bool EnsureUTF16Validity(nsAString& aString) { + uint32_t upTo = mozilla::Utf16ValidUpTo(aString); + uint32_t len = aString.Length(); + if (upTo == len) { + return true; + } + char16_t* ptr = aString.BeginWriting(mozilla::fallible); + if (!ptr) { + return false; + } + auto span = mozilla::Span(ptr, len); + span[upTo] = 0xFFFD; + mozilla::EnsureUtf16ValiditySpan(span.From(upTo + 1)); + return true; +} + +void ParseString(const nsACString& aSource, char aDelimiter, + nsTArray<nsCString>& aArray); + +namespace mozilla::detail { + +constexpr auto kStringJoinAppendDefault = + [](auto& aResult, const auto& aValue) { aResult.Append(aValue); }; + +} // namespace mozilla::detail + +/** + * Join a sequence of items, each optionally transformed to a string, with a + * given separator, appending to a given string. + * + * \tparam CharType char or char16_t + * \tparam InputRange a range usable with range-based for + * \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and + * an item of InputRange which appends the latter to the former + */ +template < + typename CharType, typename InputRange, + typename Func = const decltype(mozilla::detail::kStringJoinAppendDefault)&> +void StringJoinAppend( + nsTSubstring<CharType>& aOutput, + const nsTLiteralString<CharType>& aSeparator, const InputRange& aInputRange, + Func&& aFunc = mozilla::detail::kStringJoinAppendDefault) { + bool first = true; + for (const auto& item : aInputRange) { + if (first) { + first = false; + } else { + aOutput.Append(aSeparator); + } + + aFunc(aOutput, item); + } +} + +/** + * Join a sequence of items, each optionally transformed to a string, with a + * given separator, returning a new string. + * + * \tparam CharType char or char16_t + * \tparam InputRange a range usable with range-based for + * \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and + * an item of InputRange which appends the latter to the former + + */ +template < + typename CharType, typename InputRange, + typename Func = const decltype(mozilla::detail::kStringJoinAppendDefault)&> +auto StringJoin(const nsTLiteralString<CharType>& aSeparator, + const InputRange& aInputRange, + Func&& aFunc = mozilla::detail::kStringJoinAppendDefault) { + nsTAutoString<CharType> res; + StringJoinAppend(res, aSeparator, aInputRange, std::forward<Func>(aFunc)); + return res; +} + +/** + * Converts case in place in the argument string. + */ +void ToUpperCase(nsACString&); + +void ToLowerCase(nsACString&); + +void ToUpperCase(nsACString&); + +void ToLowerCase(nsACString&); + +/** + * Converts case from string aSource to aDest. + */ +void ToUpperCase(const nsACString& aSource, nsACString& aDest); + +void ToLowerCase(const nsACString& aSource, nsACString& aDest); + +/** + * Finds the leftmost occurrence of |aPattern|, if any in the range + * |aSearchStart|..|aSearchEnd|. + * + * Returns |true| if a match was found, and adjusts |aSearchStart| and + * |aSearchEnd| to point to the match. If no match was found, returns |false| + * and makes |aSearchStart == aSearchEnd|. + * + * Currently, this is equivalent to the O(m*n) implementation previously on + * |ns[C]String|. + * + * If we need something faster, then we can implement that later. + */ + +bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&, + nsAString::const_iterator&, + nsStringComparator = nsTDefaultStringComparator); +bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&, + nsACString::const_iterator&, + nsCStringComparator = nsTDefaultStringComparator); + +/* sometimes we don't care about where the string was, just that we + * found it or not */ +inline bool FindInReadable( + const nsAString& aPattern, const nsAString& aSource, + nsStringComparator aCompare = nsTDefaultStringComparator) { + nsAString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + return FindInReadable(aPattern, start, end, aCompare); +} + +inline bool FindInReadable( + const nsACString& aPattern, const nsACString& aSource, + nsCStringComparator aCompare = nsTDefaultStringComparator) { + nsACString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + return FindInReadable(aPattern, start, end, aCompare); +} + +bool CaseInsensitiveFindInReadable(const nsACString& aPattern, + nsACString::const_iterator&, + nsACString::const_iterator&); + +/** + * Finds the rightmost occurrence of |aPattern| + * Returns |true| if a match was found, and adjusts |aSearchStart| and + * |aSearchEnd| to point to the match. If no match was found, returns |false| + * and makes |aSearchStart == aSearchEnd|. + */ +bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&, + nsAString::const_iterator&, + nsStringComparator = nsTDefaultStringComparator); +bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&, + nsACString::const_iterator&, + nsCStringComparator = nsTDefaultStringComparator); + +/** + * Finds the leftmost occurrence of |aChar|, if any in the range + * |aSearchStart|..|aSearchEnd|. + * + * Returns |true| if a match was found, and adjusts |aSearchStart| to + * point to the match. If no match was found, returns |false| and + * makes |aSearchStart == aSearchEnd|. + */ +bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, + const nsAString::const_iterator& aSearchEnd); +bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, + const nsACString::const_iterator& aSearchEnd); + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring); +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator); +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring); +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator); +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring); +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator); +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring); +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator); + +const nsString& EmptyString(); +const nsCString& EmptyCString(); + +const nsString& VoidString(); +const nsCString& VoidCString(); + +/** + * Compare a UTF-8 string to an UTF-16 string. + * + * Returns 0 if the strings are equal, -1 if aUTF8String is less + * than aUTF16Count, and 1 in the reverse case. Errors are replaced + * with U+FFFD and then the U+FFFD is compared as if it had occurred + * in the input. If aErr is not nullptr, *aErr is set to true if + * either string had malformed sequences. + */ +int32_t CompareUTF8toUTF16(const nsACString& aUTF8String, + const nsAString& aUTF16String, bool* aErr = nullptr); + +void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest); + +#endif // !defined(nsReadableUtils_h___) diff --git a/xpcom/string/nsString.cpp b/xpcom/string/nsString.cpp new file mode 100644 index 0000000000..d85eb4b641 --- /dev/null +++ b/xpcom/string/nsString.cpp @@ -0,0 +1,9 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" + +#include "nsTString.cpp" diff --git a/xpcom/string/nsString.h b/xpcom/string/nsString.h new file mode 100644 index 0000000000..934bddc720 --- /dev/null +++ b/xpcom/string/nsString.h @@ -0,0 +1,168 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsString_h___ +#define nsString_h___ + +#include <ostream> + +#include "mozilla/Attributes.h" + +#include "nsStringFwd.h" + +#include "nsAString.h" +#include "nsDependentSubstring.h" +#include "nsReadableUtils.h" + +// enable support for the obsolete string API if not explicitly disabled +#ifndef MOZ_STRING_WITH_OBSOLETE_API +# define MOZ_STRING_WITH_OBSOLETE_API 1 +#endif + +#include "nsTString.h" + +static_assert(sizeof(char16_t) == 2, "size of char16_t must be 2"); +static_assert(sizeof(nsString::char_type) == 2, + "size of nsString::char_type must be 2"); +static_assert(nsString::char_type(-1) > nsString::char_type(0), + "nsString::char_type must be unsigned"); +static_assert(sizeof(nsCString::char_type) == 1, + "size of nsCString::char_type must be 1"); + +static_assert(sizeof(nsTLiteralString<char>) == sizeof(nsTString<char>), + "nsLiteralCString can masquerade as nsCString, " + "so they must have identical layout"); + +static_assert(sizeof(nsTLiteralString<char16_t>) == sizeof(nsTString<char16_t>), + "nsTLiteralString can masquerade as nsString, " + "so they must have identical layout"); + +/** + * A helper class that converts a UTF-16 string to ASCII in a lossy manner + */ +class NS_LossyConvertUTF16toASCII : public nsAutoCString { + public: + explicit NS_LossyConvertUTF16toASCII(const char16ptr_t aString) { + LossyAppendUTF16toASCII(mozilla::MakeStringSpan(aString), *this); + } + + NS_LossyConvertUTF16toASCII(const char16ptr_t aString, uint32_t aLength) { + LossyAppendUTF16toASCII( + Substring(static_cast<const char16_t*>(aString), aLength), *this); + } + + explicit NS_LossyConvertUTF16toASCII(const nsAString& aString) { + LossyAppendUTF16toASCII(aString, *this); + } + + private: + // NOT TO BE IMPLEMENTED + NS_LossyConvertUTF16toASCII(char) = delete; +}; + +class NS_ConvertASCIItoUTF16 : public nsAutoString { + public: + explicit NS_ConvertASCIItoUTF16(const char* aCString) { + AppendASCIItoUTF16(mozilla::MakeStringSpan(aCString), *this); + } + + NS_ConvertASCIItoUTF16(const char* aCString, uint32_t aLength) { + AppendASCIItoUTF16(Substring(aCString, aLength), *this); + } + + explicit NS_ConvertASCIItoUTF16(const nsACString& aCString) { + AppendASCIItoUTF16(aCString, *this); + } + + explicit NS_ConvertASCIItoUTF16(mozilla::Span<const char> aCString) { + AppendASCIItoUTF16(aCString, *this); + } + + private: + // NOT TO BE IMPLEMENTED + NS_ConvertASCIItoUTF16(char16_t) = delete; +}; + +/** + * A helper class that converts a UTF-16 string to UTF-8 + */ +class NS_ConvertUTF16toUTF8 : public nsAutoCString { + public: + explicit NS_ConvertUTF16toUTF8(const char16ptr_t aString) { + AppendUTF16toUTF8(mozilla::MakeStringSpan(aString), *this); + } + + NS_ConvertUTF16toUTF8(const char16ptr_t aString, uint32_t aLength) { + AppendUTF16toUTF8(Substring(static_cast<const char16_t*>(aString), aLength), + *this); + } + + explicit NS_ConvertUTF16toUTF8(const nsAString& aString) { + AppendUTF16toUTF8(aString, *this); + } + + private: + // NOT TO BE IMPLEMENTED + NS_ConvertUTF16toUTF8(char) = delete; +}; + +class NS_ConvertUTF8toUTF16 : public nsAutoString { + public: + explicit NS_ConvertUTF8toUTF16(const char* aCString) { + AppendUTF8toUTF16(mozilla::MakeStringSpan(aCString), *this); + } + + NS_ConvertUTF8toUTF16(const char* aCString, uint32_t aLength) { + AppendUTF8toUTF16(Substring(aCString, aLength), *this); + } + + explicit NS_ConvertUTF8toUTF16(const nsACString& aCString) { + AppendUTF8toUTF16(aCString, *this); + } + + private: + // NOT TO BE IMPLEMENTED + NS_ConvertUTF8toUTF16(char16_t) = delete; +}; + +/** + * Converts an integer (signed/unsigned, 32/64bit) to its decimal string + * representation and returns it as an nsAutoCString/nsAutoString. + */ +template <typename T, typename U> +nsTAutoString<T> IntToTString(const U aInt, const int aRadix = 10) { + nsTAutoString<T> string; + string.AppendInt(aInt, aRadix); + return string; +} + +template <typename U> +nsAutoCString IntToCString(const U aInt, const int aRadix = 10) { + return IntToTString<char>(aInt, aRadix); +} + +template <typename U> +nsAutoString IntToString(const U aInt, const int aRadix = 10) { + return IntToTString<char16_t>(aInt, aRadix); +} + +// MOZ_DBG support + +inline std::ostream& operator<<(std::ostream& aOut, const nsACString& aString) { + aOut.write(aString.Data(), aString.Length()); + return aOut; +} + +inline std::ostream& operator<<(std::ostream& aOut, const nsAString& aString) { + return aOut << NS_ConvertUTF16toUTF8(aString); +} + +// the following are included/declared for backwards compatibility +#include "nsDependentString.h" +#include "nsLiteralString.h" +#include "nsPromiseFlatString.h" + +#endif // !defined(nsString_h___) diff --git a/xpcom/string/nsStringBuffer.h b/xpcom/string/nsStringBuffer.h new file mode 100644 index 0000000000..9dd848f6bd --- /dev/null +++ b/xpcom/string/nsStringBuffer.h @@ -0,0 +1,183 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsStringBuffer_h__ +#define nsStringBuffer_h__ + +#include <atomic> +#include "mozilla/MemoryReporting.h" + +template <class T> +struct already_AddRefed; + +/** + * This structure precedes the string buffers "we" allocate. It may be the + * case that nsTAString::mData does not point to one of these special + * buffers. The mDataFlags member variable distinguishes the buffer type. + * + * When this header is in use, it enables reference counting, and capacity + * tracking. NOTE: A string buffer can be modified only if its reference + * count is 1. + */ +class nsStringBuffer { + private: + friend class CheckStaticAtomSizes; + + std::atomic<uint32_t> mRefCount; + uint32_t mStorageSize; + + public: + /** + * Allocates a new string buffer, with given size in bytes and a + * reference count of one. When the string buffer is no longer needed, + * it should be released via Release. + * + * It is up to the caller to set the bytes corresponding to the string + * buffer by calling the Data method to fetch the raw data pointer. Care + * must be taken to properly null terminate the character array. The + * storage size can be greater than the length of the actual string + * (i.e., it is not required that the null terminator appear in the last + * storage unit of the string buffer's data). + * + * @return new string buffer or null if out of memory. + */ + static already_AddRefed<nsStringBuffer> Alloc(size_t aStorageSize); + + /** + * Resizes the given string buffer to the specified storage size. This + * method must not be called on a readonly string buffer. Use this API + * carefully!! + * + * This method behaves like the ANSI-C realloc function. (i.e., If the + * allocation fails, null will be returned and the given string buffer + * will remain unmodified.) + * + * @see IsReadonly + */ + static nsStringBuffer* Realloc(nsStringBuffer* aBuf, size_t aStorageSize); + + /** + * Increment the reference count on this string buffer. + */ + void NS_FASTCALL AddRef(); + + /** + * Decrement the reference count on this string buffer. The string + * buffer will be destroyed when its reference count reaches zero. + */ + void NS_FASTCALL Release(); + + /** + * This method returns the string buffer corresponding to the given data + * pointer. The data pointer must have been returned previously by a + * call to the nsStringBuffer::Data method. + */ + static nsStringBuffer* FromData(void* aData) { + return reinterpret_cast<nsStringBuffer*>(aData) - 1; + } + + /** + * This method returns the data pointer for this string buffer. + */ + void* Data() const { + return const_cast<char*>(reinterpret_cast<const char*>(this + 1)); + } + + /** + * This function returns the storage size of a string buffer in bytes. + * This value is the same value that was originally passed to Alloc (or + * Realloc). + */ + uint32_t StorageSize() const { return mStorageSize; } + + /** + * If this method returns false, then the caller can be sure that their + * reference to the string buffer is the only reference to the string + * buffer, and therefore it has exclusive access to the string buffer and + * associated data. However, if this function returns true, then other + * consumers may rely on the data in this buffer being immutable and + * other threads may access this buffer simultaneously. + */ + bool IsReadonly() const { + // This doesn't lead to the destruction of the buffer, so we don't + // need to perform acquire memory synchronization for the normal + // reason that a reference count needs acquire synchronization + // (ensuring that all writes to the object made on other threads are + // visible to the thread destroying the object). + // + // We then need to consider the possibility that there were prior + // writes to the buffer on a different thread: one that has either + // since released its reference count, or one that also has access + // to this buffer through the same reference. There are two ways + // for that to happen: either the buffer pointer or a data structure + // (e.g., string object) pointing to the buffer was transferred from + // one thread to another, or the data structure pointing to the + // buffer was already visible on both threads. In the first case + // (transfer), the transfer of data from one thread to another would + // have handled the memory synchronization. In the latter case + // (data structure visible on both threads), the caller needed some + // sort of higher level memory synchronization to protect against + // the string object being mutated at the same time on multiple + // threads. + + // See bug 1603504. TSan might complain about a race when using + // memory_order_relaxed, so use memory_order_acquire for making TSan + // happy. +#if defined(MOZ_TSAN) + return mRefCount.load(std::memory_order_acquire) > 1; +#else + return mRefCount.load(std::memory_order_relaxed) > 1; +#endif + } + + /** + * The FromString methods return a string buffer for the given string + * object or null if the string object does not have a string buffer. + * The reference count of the string buffer is NOT incremented by these + * methods. If the caller wishes to hold onto the returned value, then + * the returned string buffer must have its reference count incremented + * via a call to the AddRef method. + */ + static nsStringBuffer* FromString(const nsAString& aStr); + static nsStringBuffer* FromString(const nsACString& aStr); + + /** + * The ToString methods assign this string buffer to a given string + * object. If the string object does not support sharable string + * buffers, then its value will be set to a copy of the given string + * buffer. Otherwise, these methods increment the reference count of the + * given string buffer. It is important to specify the length (in + * storage units) of the string contained in the string buffer since the + * length of the string may be less than its storage size. The string + * must have a null terminator at the offset specified by |len|. + * + * NOTE: storage size is measured in bytes even for wide strings; + * however, string length is always measured in storage units + * (2-byte units for wide strings). + */ + void ToString(uint32_t aLen, nsAString& aStr, bool aMoveOwnership = false); + void ToString(uint32_t aLen, nsACString& aStr, bool aMoveOwnership = false); + + /** + * This measures the size only if the StringBuffer is unshared. + */ + size_t SizeOfIncludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const; + + /** + * This measures the size regardless of whether the StringBuffer is + * unshared. + * + * WARNING: Only use this if you really know what you are doing, because + * it can easily lead to double-counting strings. If you do use them, + * please explain clearly in a comment why it's safe and won't lead to + * double-counting. + */ + size_t SizeOfIncludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const; +}; + +#endif /* !defined(nsStringBuffer_h__ */ diff --git a/xpcom/string/nsStringComparator.cpp b/xpcom/string/nsStringComparator.cpp new file mode 100644 index 0000000000..839858dd76 --- /dev/null +++ b/xpcom/string/nsStringComparator.cpp @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <ctype.h> +#include "nsAString.h" +#include "plstr.h" + +#include "nsTStringComparator.cpp" + +int nsCaseInsensitiveCStringComparator(const char* aLhs, const char* aRhs, + uint32_t aLhsLength, + uint32_t aRhsLength) { + if (aLhsLength != aRhsLength) { + return (aLhsLength > aRhsLength) ? 1 : -1; + } + int32_t result = int32_t(PL_strncasecmp(aLhs, aRhs, aLhsLength)); + // Egads. PL_strncasecmp is returning *very* negative numbers. + // Some folks expect -1,0,1, so let's temper its enthusiasm. + if (result < 0) { + result = -1; + } + return result; +} diff --git a/xpcom/string/nsStringFlags.h b/xpcom/string/nsStringFlags.h new file mode 100644 index 0000000000..00f103c284 --- /dev/null +++ b/xpcom/string/nsStringFlags.h @@ -0,0 +1,90 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsStringFlags_h +#define nsStringFlags_h + +#include <stdint.h> +#include "mozilla/TypedEnumBits.h" + +namespace mozilla { +namespace detail { +// NOTE: these flags are declared public _only_ for convenience inside +// the string implementation. And they are outside of the string +// class so that the type is the same for both narrow and wide +// strings. + +// bits for mDataFlags +enum class StringDataFlags : uint16_t { + // Some terminology: + // + // "dependent buffer" A dependent buffer is one that the string class + // does not own. The string class relies on some + // external code to ensure the lifetime of the + // dependent buffer. + // + // "refcounted buffer" A refcounted buffer is one that the string class + // allocates. When it allocates a refcounted string + // buffer, it allocates some additional space at + // the beginning of the buffer for additional + // fields, including a reference count and a + // buffer length. See nsStringHeader. + // + // "adopted buffer" An adopted buffer is a raw string buffer + // allocated on the heap (using moz_xmalloc) + // of which the string class subsumes ownership. + // + // Some comments about the string data flags: + // + // REFCOUNTED, OWNED, and INLINE are all mutually exlusive. They + // indicate the allocation type of mData. If none of these flags + // are set, then the string buffer is dependent. + // + // REFCOUNTED, OWNED, or INLINE imply TERMINATED. This is because + // the string classes always allocate null-terminated buffers, and + // non-terminated substrings are always dependent. + // + // VOIDED implies TERMINATED, and moreover it implies that mData + // points to char_traits::sEmptyBuffer. Therefore, VOIDED is + // mutually exclusive with REFCOUNTED, OWNED, and INLINE. + // + // INLINE requires StringClassFlags::INLINE to be set on the type. + + // IsTerminated returns true + TERMINATED = 1 << 0, + + // IsVoid returns true + VOIDED = 1 << 1, + + // mData points to a heap-allocated, shareable, refcounted buffer + REFCOUNTED = 1 << 2, + + // mData points to a heap-allocated, raw buffer + OWNED = 1 << 3, + + // mData points to a writable, inline buffer + INLINE = 1 << 4, + + // mData points to a string literal; DataFlags::TERMINATED will also be set + LITERAL = 1 << 5 +}; + +// bits for mClassFlags +enum class StringClassFlags : uint16_t { + // |this|'s buffer is inline, and requires the type to be binary-compatible + // with nsTAutoStringN + INLINE = 1 << 0, + // |this| requires its buffer is null-terminated + NULL_TERMINATED = 1 << 1 +}; + +MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(StringDataFlags) +MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(StringClassFlags) + +} // namespace detail +} // namespace mozilla + +#endif diff --git a/xpcom/string/nsStringFwd.h b/xpcom/string/nsStringFwd.h new file mode 100644 index 0000000000..e941855c9c --- /dev/null +++ b/xpcom/string/nsStringFwd.h @@ -0,0 +1,90 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* nsStringFwd.h --- forward declarations for string classes */ + +#ifndef nsStringFwd_h +#define nsStringFwd_h + +#include "nscore.h" + +namespace mozilla { +namespace detail { + +template <typename T> +class nsTStringRepr; + +using nsStringRepr = nsTStringRepr<char16_t>; +using nsCStringRepr = nsTStringRepr<char>; + +} // namespace detail +} // namespace mozilla + +static const size_t AutoStringDefaultStorageSize = 64; + +template <typename T> +class nsTSubstring; +template <typename T> +class nsTSubstringTuple; +template <typename T> +class nsTString; +template <typename T, size_t N> +class nsTAutoStringN; +template <typename T> +class nsTDependentString; +template <typename T> +class nsTDependentSubstring; +template <typename T> +class nsTPromiseFlatString; +template <typename T> +class nsTLiteralString; +template <typename T> +class nsTSubstringSplitter; + +template <typename T> +using nsTStringComparator = int (*)(const T*, const T*, uint32_t, uint32_t); + +// The default string comparator (case-sensitive comparision) +template <typename T> +int nsTDefaultStringComparator(const T*, const T*, uint32_t, uint32_t); + +// We define this version without a size param instead of providing a +// default value for N so that so there is a default typename that doesn't +// require angle brackets. +template <typename T> +using nsTAutoString = nsTAutoStringN<T, AutoStringDefaultStorageSize>; + +// Double-byte (char16_t) string types. + +using nsAString = nsTSubstring<char16_t>; +using nsSubstringTuple = nsTSubstringTuple<char16_t>; +using nsString = nsTString<char16_t>; +using nsAutoString = nsTAutoString<char16_t>; +template <size_t N> +using nsAutoStringN = nsTAutoStringN<char16_t, N>; +using nsDependentString = nsTDependentString<char16_t>; +using nsDependentSubstring = nsTDependentSubstring<char16_t>; +using nsPromiseFlatString = nsTPromiseFlatString<char16_t>; +using nsStringComparator = nsTStringComparator<char16_t>; +using nsLiteralString = nsTLiteralString<char16_t>; +using nsSubstringSplitter = nsTSubstringSplitter<char16_t>; + +// Single-byte (char) string types. + +using nsACString = nsTSubstring<char>; +using nsCSubstringTuple = nsTSubstringTuple<char>; +using nsCString = nsTString<char>; +using nsAutoCString = nsTAutoString<char>; +template <size_t N> +using nsAutoCStringN = nsTAutoStringN<char, N>; +using nsDependentCString = nsTDependentString<char>; +using nsDependentCSubstring = nsTDependentSubstring<char>; +using nsPromiseFlatCString = nsTPromiseFlatString<char>; +using nsCStringComparator = nsTStringComparator<char>; +using nsLiteralCString = nsTLiteralString<char>; +using nsCSubstringSplitter = nsTSubstringSplitter<char>; + +#endif /* !defined(nsStringFwd_h) */ diff --git a/xpcom/string/nsStringIterator.h b/xpcom/string/nsStringIterator.h new file mode 100644 index 0000000000..db14efdaca --- /dev/null +++ b/xpcom/string/nsStringIterator.h @@ -0,0 +1,117 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsStringIterator_h___ +#define nsStringIterator_h___ + +#include "nsCharTraits.h" +#include "nsAlgorithm.h" +#include "nsDebug.h" + +/** + * @see nsTAString + */ + +template <class CharT> +class nsReadingIterator { + public: + typedef nsReadingIterator<CharT> self_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef CharT value_type; + typedef const CharT* pointer; + typedef const CharT& reference; + + private: + friend class mozilla::detail::nsTStringRepr<CharT>; + + // unfortunately, the API for nsReadingIterator requires that the + // iterator know its start and end positions. this was needed when + // we supported multi-fragment strings, but now it is really just + // extra baggage. we should remove mStart and mEnd at some point. + + const CharT* mStart; + const CharT* mEnd; + const CharT* mPosition; + + public: + nsReadingIterator() : mStart(nullptr), mEnd(nullptr), mPosition(nullptr) {} + // clang-format off + // nsReadingIterator( const nsReadingIterator<CharT>& ); // auto-generated copy-constructor OK + // nsReadingIterator<CharT>& operator=( const nsReadingIterator<CharT>& ); // auto-generated copy-assignment operator OK + // clang-format on + + pointer get() const { return mPosition; } + + CharT operator*() const { return *get(); } + + self_type& operator++() { + ++mPosition; + return *this; + } + + self_type operator++(int) { + self_type result(*this); + ++mPosition; + return result; + } + + self_type& operator--() { + --mPosition; + return *this; + } + + self_type operator--(int) { + self_type result(*this); + --mPosition; + return result; + } + + self_type& advance(difference_type aN) { + if (aN > 0) { + difference_type step = XPCOM_MIN(aN, mEnd - mPosition); + + NS_ASSERTION( + step > 0, + "can't advance a reading iterator beyond the end of a string"); + + mPosition += step; + } else if (aN < 0) { + difference_type step = XPCOM_MAX(aN, -(mPosition - mStart)); + + NS_ASSERTION(step < 0, + "can't advance (backward) a reading iterator beyond the end " + "of a string"); + + mPosition += step; + } + return *this; + } + + // We return an unsigned type here (with corresponding assert) rather than + // the more usual difference_type because we want to make this class go + // away in favor of mozilla::RangedPtr. Since RangedPtr has the same + // requirement we are enforcing here, the transition ought to be much + // smoother. + size_type operator-(const self_type& aOther) const { + MOZ_ASSERT(mPosition >= aOther.mPosition); + return mPosition - aOther.mPosition; + } +}; + +template <class CharT> +inline bool operator==(const nsReadingIterator<CharT>& aLhs, + const nsReadingIterator<CharT>& aRhs) { + return aLhs.get() == aRhs.get(); +} + +template <class CharT> +inline bool operator!=(const nsReadingIterator<CharT>& aLhs, + const nsReadingIterator<CharT>& aRhs) { + return aLhs.get() != aRhs.get(); +} + +#endif /* !defined(nsStringIterator_h___) */ diff --git a/xpcom/string/nsStringObsolete.cpp b/xpcom/string/nsStringObsolete.cpp new file mode 100644 index 0000000000..23d9dad1f9 --- /dev/null +++ b/xpcom/string/nsStringObsolete.cpp @@ -0,0 +1,1002 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" + +/** + * nsTString obsolete API support + */ + +#if MOZ_STRING_WITH_OBSOLETE_API + +# include "nsDependentString.h" +# include "nsDependentSubstring.h" +# include "nsReadableUtils.h" +# include "nsCRT.h" +# include "nsUTF8Utils.h" +# include "prdtoa.h" + +/* ***** BEGIN RICKG BLOCK ***** + * + * NOTE: This section of code was extracted from rickg's bufferRoutines.h file. + * For the most part it remains unmodified. We want to eliminate (or at + * least clean up) this code at some point. If you find the formatting + * in this section somewhat inconsistent, don't blame me! ;-) + */ + +// avoid STDC's tolower since it may do weird things with non-ASCII bytes +inline char ascii_tolower(char aChar) { + if (aChar >= 'A' && aChar <= 'Z') return aChar + ('a' - 'A'); + return aChar; +} + +//----------------------------------------------------------------------------- +// +// This set of methods is used to search a buffer looking for a char. +// + +/** + * This methods cans the given buffer for the given char + * + * @update gess 02/17/00 + * @param aDest is the buffer to be searched + * @param aDestLength is the size (in char-units, not bytes) of the buffer + * @param anOffset is the start pos to begin searching + * @param aChar is the target character we're looking for + * @param aCount tells us how many characters to iterate through (which may + * be different than aLength); -1 means use full length. + * @return index of pos if found, else -1 (kNotFound) + */ +static int32_t FindChar1(const char* aDest, uint32_t aDestLength, + int32_t anOffset, const char16_t aChar, + int32_t aCount) { + if (anOffset < 0) anOffset = 0; + + if (aCount < 0) aCount = (int32_t)aDestLength; + + if ((aChar < 256) && (0 < aDestLength) && + ((uint32_t)anOffset < aDestLength)) { + // We'll only search if the given aChar is within the normal ascii a range, + //(Since this string is definitely within the ascii range). + + if (0 < aCount) { + const char* left = aDest + anOffset; + const char* last = left + aCount; + const char* max = aDest + aDestLength; + const char* end = (last < max) ? last : max; + + int32_t theMax = end - left; + if (0 < theMax) { + unsigned char theChar = (unsigned char)aChar; + const char* result = (const char*)memchr(left, (int)theChar, theMax); + + if (result) return result - aDest; + } + } + } + + return kNotFound; +} + +/** + * This methods cans the given buffer for the given char + * + * @update gess 3/25/98 + * @param aDest is the buffer to be searched + * @param aDestLength is the size (in char-units, not bytes) of the buffer + * @param anOffset is the start pos to begin searching + * @param aChar is the target character we're looking for + * @param aCount tells us how many characters to iterate through (which may + * be different than aLength); -1 means use full length. + * @return index of pos if found, else -1 (kNotFound) + */ +static int32_t FindChar2(const char16_t* aDest, uint32_t aDestLength, + int32_t anOffset, const char16_t aChar, + int32_t aCount) { + if (anOffset < 0) anOffset = 0; + + if (aCount < 0) aCount = (int32_t)aDestLength; + + if ((0 < aDestLength) && ((uint32_t)anOffset < aDestLength)) { + if (0 < aCount) { + const char16_t* root = aDest; + const char16_t* left = root + anOffset; + const char16_t* last = left + aCount; + const char16_t* max = root + aDestLength; + const char16_t* end = (last < max) ? last : max; + + while (left < end) { + if (*left == aChar) return (left - root); + + ++left; + } + } + } + + return kNotFound; +} + +/** + * This methods cans the given buffer (in reverse) for the given char + * + * @update gess 02/17/00 + * @param aDest is the buffer to be searched + * @param aDestLength is the size (in char-units, not bytes) of the buffer + * @param anOffset is the start pos to begin searching + * @param aChar is the target character we're looking for + * @param aCount tells us how many characters to iterate through (which may + * be different than aLength); -1 means use full length. + * @return index of pos if found, else -1 (kNotFound) + */ + +static int32_t RFindChar1(const char* aDest, uint32_t aDestLength, + int32_t anOffset, const char16_t aChar, + int32_t aCount) { + if (anOffset < 0) anOffset = (int32_t)aDestLength - 1; + + if (aCount < 0) aCount = int32_t(aDestLength); + + if ((aChar < 256) && (0 < aDestLength) && + ((uint32_t)anOffset < aDestLength)) { + // We'll only search if the given aChar is within the normal ascii a range, + //(Since this string is definitely within the ascii range). + + if (0 < aCount) { + const char* rightmost = aDest + anOffset; + const char* min = rightmost - aCount + 1; + const char* leftmost = (min < aDest) ? aDest : min; + + char theChar = (char)aChar; + while (leftmost <= rightmost) { + if ((*rightmost) == theChar) return rightmost - aDest; + + --rightmost; + } + } + } + + return kNotFound; +} + +/** + * This methods cans the given buffer for the given char + * + * @update gess 3/25/98 + * @param aDest is the buffer to be searched + * @param aDestLength is the size (in char-units, not bytes) of the buffer + * @param anOffset is the start pos to begin searching + * @param aChar is the target character we're looking for + * @param aCount tells us how many characters to iterate through (which may + * be different than aLength); -1 means use full length. + * @return index of pos if found, else -1 (kNotFound) + */ +static int32_t RFindChar2(const char16_t* aDest, uint32_t aDestLength, + int32_t anOffset, const char16_t aChar, + int32_t aCount) { + if (anOffset < 0) anOffset = (int32_t)aDestLength - 1; + + if (aCount < 0) aCount = int32_t(aDestLength); + + if ((0 < aDestLength) && ((uint32_t)anOffset < aDestLength)) { + if (0 < aCount) { + const char16_t* root = aDest; + const char16_t* rightmost = root + anOffset; + const char16_t* min = rightmost - aCount + 1; + const char16_t* leftmost = (min < root) ? root : min; + + while (leftmost <= rightmost) { + if ((*rightmost) == aChar) return rightmost - root; + + --rightmost; + } + } + } + + return kNotFound; +} + +//----------------------------------------------------------------------------- +// +// This set of methods is used to compare one buffer onto another. The +// functions are differentiated by the size of source and dest character +// sizes. WARNING: Your destination buffer MUST be big enough to hold all the +// source bytes. We don't validate these ranges here (this should be done in +// higher level routines). +// + +/** + * This method compares the data in one buffer with another + * @update gess 01/04/99 + * @param aStr1 is the first buffer to be compared + * @param aStr2 is the 2nd buffer to be compared + * @param aCount is the number of chars to compare + * @param aIgnoreCase tells us whether to use a case-sensitive comparison + * @return -1,0,1 depending on <,==,> + */ +static +# ifdef __SUNPRO_CC + inline +# endif /* __SUNPRO_CC */ + int32_t + Compare1To1(const char* aStr1, const char* aStr2, uint32_t aCount, + bool aIgnoreCase) { + int32_t result = 0; + if (aIgnoreCase) + result = int32_t(PL_strncasecmp(aStr1, aStr2, aCount)); + else + result = nsCharTraits<char>::compare(aStr1, aStr2, aCount); + + // alien comparisons may return out-of-bound answers + // instead of the -1, 0, 1 expected by most clients + if (result < -1) + result = -1; + else if (result > 1) + result = 1; + return result; +} + +/** + * This method compares the data in one buffer with another + * @update gess 01/04/99 + * @param aStr1 is the first buffer to be compared + * @param aStr2 is the 2nd buffer to be compared + * @param aCount is the number of chars to compare + * @param aIgnoreCase tells us whether to use a case-sensitive comparison + * @return -1,0,1 depending on <,==,> + */ +static +# ifdef __SUNPRO_CC + inline +# endif /* __SUNPRO_CC */ + int32_t + Compare2To2(const char16_t* aStr1, const char16_t* aStr2, uint32_t aCount) { + int32_t result; + + if (aStr1 && aStr2) + result = nsCharTraits<char16_t>::compare(aStr1, aStr2, aCount); + + // The following cases are rare and survivable caller errors. + // Two null pointers are equal, but any string, even 0 length + // is greater than a null pointer. It might not really matter, + // but we pick something reasonable anyway. + else if (!aStr1 && !aStr2) + result = 0; + else if (aStr1) + result = 1; + else + result = -1; + + // alien comparisons may give answers outside the -1, 0, 1 expected by callers + if (result < -1) + result = -1; + else if (result > 1) + result = 1; + return result; +} + +/** + * This method compares the data in one buffer with another + * @update gess 01/04/99 + * @param aStr1 is the first buffer to be compared + * @param aStr2 is the 2nd buffer to be compared + * @param aCount is the number of chars to compare + * @param aIgnoreCase tells us whether to use a case-sensitive comparison + * @return -1,0,1 depending on <,==,> + */ +static +# ifdef __SUNPRO_CC + inline +# endif /* __SUNPRO_CC */ + int32_t + Compare2To1(const char16_t* aStr1, const char* aStr2, uint32_t aCount, + bool aIgnoreCase) { + const char16_t* s1 = aStr1; + const char* s2 = aStr2; + + if (aStr1 && aStr2) { + if (aCount != 0) { + do { + char16_t c1 = *s1++; + char16_t c2 = char16_t((unsigned char)*s2++); + + if (c1 != c2) { +# ifdef DEBUG + // we won't warn on c1>=128 (the 2-byte value) because often + // it is just fine to compare an constant, ascii value (i.e. "body") + // against some non-ascii value (i.e. a unicode string that + // was downloaded from a web page) + if (aIgnoreCase && c2 >= 128) + NS_WARNING( + "got a non-ASCII string, but we can't do an accurate case " + "conversion!"); +# endif + + // can't do case conversion on characters out of our range + if (aIgnoreCase && c1 < 128 && c2 < 128) { + c1 = ascii_tolower(char(c1)); + c2 = ascii_tolower(char(c2)); + + if (c1 == c2) continue; + } + + if (c1 < c2) return -1; + return 1; + } + } while (--aCount); + } + } + return 0; +} + +/** + * This method compares the data in one buffer with another + * @update gess 01/04/99 + * @param aStr1 is the first buffer to be compared + * @param aStr2 is the 2nd buffer to be compared + * @param aCount is the number of chars to compare + * @param aIgnoreCase tells us whether to use a case-sensitive comparison + * @return -1,0,1 depending on <,==,> + */ +inline int32_t Compare1To2(const char* aStr1, const char16_t* aStr2, + uint32_t aCount, bool aIgnoreCase) { + return Compare2To1(aStr2, aStr1, aCount, aIgnoreCase) * -1; +} + +//----------------------------------------------------------------------------- +// +// This set of methods is used compress char sequences in a buffer... +// + +/** + * This method compresses duplicate runs of a given char from the given buffer + * + * @update rickg 03.23.2000 + * @param aString is the buffer to be manipulated + * @param aLength is the length of the buffer + * @param aSet tells us which chars to compress from given buffer + * @param aEliminateLeading tells us whether to strip chars from the start of + * the buffer + * @param aEliminateTrailing tells us whether to strip chars from the start + * of the buffer + * @return the new length of the given buffer + */ +static int32_t CompressChars1(char* aString, uint32_t aLength, + const char* aSet) { + char* from = aString; + char* end = aString + aLength; + char* to = from; + + // this code converts /n, /t, /r into normal space ' '; + // it also compresses runs of whitespace down to a single char... + if (aSet && aString && (0 < aLength)) { + uint32_t aSetLen = strlen(aSet); + + while (from < end) { + char theChar = *from++; + + *to++ = theChar; // always copy this char... + + if ((kNotFound != FindChar1(aSet, aSetLen, 0, theChar, aSetLen))) { + while (from < end) { + theChar = *from++; + if (kNotFound == FindChar1(aSet, aSetLen, 0, theChar, aSetLen)) { + *to++ = theChar; + break; + } + } // while + } // if + } // if + *to = 0; + } + return to - aString; +} + +/** + * This method compresses duplicate runs of a given char from the given buffer + * + * @update rickg 03.23.2000 + * @param aString is the buffer to be manipulated + * @param aLength is the length of the buffer + * @param aSet tells us which chars to compress from given buffer + * @param aEliminateLeading tells us whether to strip chars from the start of + * the buffer + * @param aEliminateTrailing tells us whether to strip chars from the start + * of the buffer + * @return the new length of the given buffer + */ +static int32_t CompressChars2(char16_t* aString, uint32_t aLength, + const char* aSet) { + char16_t* from = aString; + char16_t* end = from + aLength; + char16_t* to = from; + + // this code converts /n, /t, /r into normal space ' '; + // it also compresses runs of whitespace down to a single char... + if (aSet && aString && (0 < aLength)) { + uint32_t aSetLen = strlen(aSet); + + while (from < end) { + char16_t theChar = *from++; + + *to++ = theChar; // always copy this char... + + if ((theChar < 256) && + (kNotFound != FindChar1(aSet, aSetLen, 0, theChar, aSetLen))) { + while (from < end) { + theChar = *from++; + if (kNotFound == FindChar1(aSet, aSetLen, 0, theChar, aSetLen)) { + *to++ = theChar; + break; + } + } // while + } // if + } // if + *to = 0; + } + return to - (char16_t*)aString; +} + +/** + * This method strips chars in a given set from the given buffer + * + * @update gess 01/04/99 + * @param aString is the buffer to be manipulated + * @param aLength is the length of the buffer + * @param aSet tells us which chars to compress from given buffer + * @param aEliminateLeading tells us whether to strip chars from the start of + * the buffer + * @param aEliminateTrailing tells us whether to strip chars from the start + * of the buffer + * @return the new length of the given buffer + */ +static int32_t StripChars1(char* aString, uint32_t aLength, const char* aSet) { + // XXX(darin): this code should defer writing until necessary. + + char* to = aString; + char* from = aString - 1; + char* end = aString + aLength; + + if (aSet && aString && (0 < aLength)) { + uint32_t aSetLen = strlen(aSet); + while (++from < end) { + char theChar = *from; + if (kNotFound == FindChar1(aSet, aSetLen, 0, theChar, aSetLen)) { + *to++ = theChar; + } + } + *to = 0; + } + return to - (char*)aString; +} + +/** + * This method strips chars in a given set from the given buffer + * + * @update gess 01/04/99 + * @param aString is the buffer to be manipulated + * @param aLength is the length of the buffer + * @param aSet tells us which chars to compress from given buffer + * @param aEliminateLeading tells us whether to strip chars from the start of + * the buffer + * @param aEliminateTrailing tells us whether to strip chars from the start + * of the buffer + * @return the new length of the given buffer + */ +static int32_t StripChars2(char16_t* aString, uint32_t aLength, + const char* aSet) { + // XXX(darin): this code should defer writing until necessary. + + char16_t* to = aString; + char16_t* from = aString - 1; + char16_t* end = to + aLength; + + if (aSet && aString && (0 < aLength)) { + uint32_t aSetLen = strlen(aSet); + while (++from < end) { + char16_t theChar = *from; + // Note the test for ascii range below. If you have a real unicode char, + // and you're searching for chars in the (given) ascii string, there's no + // point in doing the real search since it's out of the ascii range. + if ((255 < theChar) || + (kNotFound == FindChar1(aSet, aSetLen, 0, theChar, aSetLen))) { + *to++ = theChar; + } + } + *to = 0; + } + return to - (char16_t*)aString; +} + +/* ***** END RICKG BLOCK ***** */ + +// This function is used to implement FindCharInSet and friends +template <class CharT> +# ifndef __SUNPRO_CC +static +# endif /* !__SUNPRO_CC */ + CharT + GetFindInSetFilter(const CharT* set) { + CharT filter = ~CharT(0); // All bits set + while (*set) { + filter &= ~(*set); + ++set; + } + return filter; +} + +// This template class is used by our code to access rickg's buffer routines. +template <class CharT> +struct nsBufferRoutines {}; + +template <> +struct nsBufferRoutines<char> { + static int32_t compare(const char* a, const char* b, uint32_t max, bool ic) { + return Compare1To1(a, b, max, ic); + } + + static int32_t compare(const char* a, const char16_t* b, uint32_t max, + bool ic) { + return Compare1To2(a, b, max, ic); + } + + static int32_t find_char(const char* s, uint32_t max, int32_t offset, + const char16_t c, int32_t count) { + return FindChar1(s, max, offset, c, count); + } + + static int32_t rfind_char(const char* s, uint32_t max, int32_t offset, + const char16_t c, int32_t count) { + return RFindChar1(s, max, offset, c, count); + } + + static char get_find_in_set_filter(const char* set) { + return GetFindInSetFilter(set); + } + + static int32_t strip_chars(char* s, uint32_t len, const char* set) { + return StripChars1(s, len, set); + } + + static int32_t compress_chars(char* s, uint32_t len, const char* set) { + return CompressChars1(s, len, set); + } +}; + +template <> +struct nsBufferRoutines<char16_t> { + static int32_t compare(const char16_t* a, const char16_t* b, uint32_t max, + bool ic) { + NS_ASSERTION(!ic, "no case-insensitive compare here"); + return Compare2To2(a, b, max); + } + + static int32_t compare(const char16_t* a, const char* b, uint32_t max, + bool ic) { + return Compare2To1(a, b, max, ic); + } + + static int32_t find_char(const char16_t* s, uint32_t max, int32_t offset, + const char16_t c, int32_t count) { + return FindChar2(s, max, offset, c, count); + } + + static int32_t rfind_char(const char16_t* s, uint32_t max, int32_t offset, + const char16_t c, int32_t count) { + return RFindChar2(s, max, offset, c, count); + } + + static char16_t get_find_in_set_filter(const char16_t* set) { + return GetFindInSetFilter(set); + } + + static char16_t get_find_in_set_filter(const char* set) { + return (~char16_t(0) ^ ~char(0)) | GetFindInSetFilter(set); + } + + static int32_t strip_chars(char16_t* s, uint32_t max, const char* set) { + return StripChars2(s, max, set); + } + + static int32_t compress_chars(char16_t* s, uint32_t len, const char* set) { + return CompressChars2(s, len, set); + } +}; + +//----------------------------------------------------------------------------- + +template <class L, class R> +# ifndef __SUNPRO_CC +static +# endif /* !__SUNPRO_CC */ + int32_t + FindSubstring(const L* big, uint32_t bigLen, const R* little, + uint32_t littleLen, bool ignoreCase) { + if (littleLen > bigLen) return kNotFound; + + int32_t i, max = int32_t(bigLen - littleLen); + for (i = 0; i <= max; ++i, ++big) { + if (nsBufferRoutines<L>::compare(big, little, littleLen, ignoreCase) == 0) + return i; + } + + return kNotFound; +} + +template <class L, class R> +# ifndef __SUNPRO_CC +static +# endif /* !__SUNPRO_CC */ + int32_t + RFindSubstring(const L* big, uint32_t bigLen, const R* little, + uint32_t littleLen, bool ignoreCase) { + if (littleLen > bigLen) return kNotFound; + + int32_t i, max = int32_t(bigLen - littleLen); + + const L* iter = big + max; + for (i = max; iter >= big; --i, --iter) { + if (nsBufferRoutines<L>::compare(iter, little, littleLen, ignoreCase) == 0) + return i; + } + + return kNotFound; +} + +template <class CharT, class SetCharT> +# ifndef __SUNPRO_CC +static +# endif /* !__SUNPRO_CC */ + int32_t + FindCharInSet(const CharT* data, uint32_t dataLen, const SetCharT* set) { + CharT filter = nsBufferRoutines<CharT>::get_find_in_set_filter(set); + + const CharT* end = data + dataLen; + for (const CharT* iter = data; iter < end; ++iter) { + CharT currentChar = *iter; + if (currentChar & filter) + continue; // char is not in filter set; go on with next char. + + // test all chars + const SetCharT* charInSet = set; + CharT setChar = CharT(*charInSet); + while (setChar) { + if (setChar == currentChar) + return iter - data; // found it! return index of the found char. + + setChar = CharT(*(++charInSet)); + } + } + return kNotFound; +} + +template <class CharT, class SetCharT> +# ifndef __SUNPRO_CC +static +# endif /* !__SUNPRO_CC */ + int32_t + RFindCharInSet(const CharT* data, uint32_t dataLen, const SetCharT* set) { + CharT filter = nsBufferRoutines<CharT>::get_find_in_set_filter(set); + + for (const CharT* iter = data + dataLen - 1; iter >= data; --iter) { + CharT currentChar = *iter; + if (currentChar & filter) + continue; // char is not in filter set; go on with next char. + + // test all chars + const CharT* charInSet = set; + CharT setChar = *charInSet; + while (setChar) { + if (setChar == currentChar) + return iter - data; // found it! return index of the found char. + + setChar = *(++charInSet); + } + } + return kNotFound; +} + +/** + * this method changes the meaning of |offset| and |count|: + * + * upon return, + * |offset| specifies start of search range + * |count| specifies length of search range + */ +static void Find_ComputeSearchRange(uint32_t bigLen, uint32_t littleLen, + int32_t& offset, int32_t& count) { + // |count| specifies how many iterations to make from |offset| + + if (offset < 0) { + offset = 0; + } else if (uint32_t(offset) > bigLen) { + count = 0; + return; + } + + int32_t maxCount = bigLen - offset; + if (count < 0 || count > maxCount) { + count = maxCount; + } else { + count += littleLen; + if (count > maxCount) count = maxCount; + } +} + +/** + * this method changes the meaning of |offset| and |count|: + * + * upon entry, + * |offset| specifies the end point from which to search backwards + * |count| specifies the number of iterations from |offset| + * + * upon return, + * |offset| specifies start of search range + * |count| specifies length of search range + * + * + * EXAMPLE + * + * + -- littleLen=4 -- + + * : : + * |____|____|____|____|____|____|____|____|____|____|____|____| + * : : + * offset=5 bigLen=12 + * + * if count = 4, then we expect this function to return offset = 2 and + * count = 7. + * + */ +static void RFind_ComputeSearchRange(uint32_t bigLen, uint32_t littleLen, + int32_t& offset, int32_t& count) { + if (littleLen > bigLen) { + offset = 0; + count = 0; + return; + } + + if (offset < 0) offset = bigLen - littleLen; + if (count < 0) count = offset + 1; + + int32_t start = offset - count + 1; + if (start < 0) start = 0; + + count = offset + littleLen - start; + offset = start; +} + +//----------------------------------------------------------------------------- + +# include "nsTStringObsolete.cpp" + +//----------------------------------------------------------------------------- + +// specialized methods: + +template <typename T> +template <typename Q, typename EnableIfChar16> +int32_t nsTString<T>::Find(const self_type& aString, int32_t aOffset, + int32_t aCount) const { + // this method changes the meaning of aOffset and aCount: + Find_ComputeSearchRange(this->mLength, aString.Length(), aOffset, aCount); + + // Capture the raw buffer locally to help msvc deduce the type. + const char_type* str = aString.get(); + int32_t result = FindSubstring(this->mData + aOffset, aCount, str, + aString.Length(), false); + if (result != kNotFound) result += aOffset; + return result; +} + +template int32_t nsTString<char16_t>::Find(const self_type&, int32_t, + int32_t) const; + +template <typename T> +template <typename Q, typename EnableIfChar16> +int32_t nsTString<T>::Find(const char_type* aString, int32_t aOffset, + int32_t aCount) const { + return Find(nsTDependentString<T>(aString), aOffset, aCount); +} + +template int32_t nsTString<char16_t>::Find(const char_type*, int32_t, + int32_t) const; + +template <typename T> +template <typename Q, typename EnableIfChar16> +int32_t nsTString<T>::RFind(const self_type& aString, int32_t aOffset, + int32_t aCount) const { + // this method changes the meaning of aOffset and aCount: + RFind_ComputeSearchRange(this->mLength, aString.Length(), aOffset, aCount); + + // Capture the raw buffer locally to help msvc deduce the type. + const char_type* str = aString.get(); + int32_t result = RFindSubstring(this->mData + aOffset, aCount, str, + aString.Length(), false); + if (result != kNotFound) result += aOffset; + return result; +} + +template int32_t nsTString<char16_t>::RFind(const self_type&, int32_t, + int32_t) const; + +template <typename T> +template <typename Q, typename EnableIfChar16> +int32_t nsTString<T>::RFind(const char_type* aString, int32_t aOffset, + int32_t aCount) const { + return RFind(nsTDependentString<T>(aString), aOffset, aCount); +} + +template int32_t nsTString<char16_t>::RFind(const char_type*, int32_t, + int32_t) const; + +template <typename T> +template <typename Q, typename EnableIfChar16> +int32_t nsTString<T>::FindCharInSet(const char* aSet, int32_t aOffset) const { + if (aOffset < 0) + aOffset = 0; + else if (aOffset >= int32_t(this->mLength)) + return kNotFound; + + int32_t result = + ::FindCharInSet(this->mData + aOffset, this->mLength - aOffset, aSet); + if (result != kNotFound) result += aOffset; + return result; +} + +template int32_t nsTString<char16_t>::FindCharInSet(const char*, int32_t) const; + +template <typename T> +template <typename Q, typename EnableIfChar16> +void nsTString<T>::ReplaceChar(const char* aSet, char16_t aNewChar) { + if (!this->EnsureMutable()) // XXX do this lazily? + this->AllocFailed(this->mLength); + + char16_t* data = this->mData; + uint32_t lenRemaining = this->mLength; + + while (lenRemaining) { + int32_t i = ::FindCharInSet(data, lenRemaining, aSet); + if (i == kNotFound) break; + + data[i++] = aNewChar; + data += i; + lenRemaining -= i; + } +} + +namespace mozilla { +namespace detail { + +template <typename T> +template <typename Q, typename EnableIfChar> +int32_t nsTStringRepr<T>::Compare(const char_type* aString, bool aIgnoreCase, + int32_t aCount) const { + uint32_t strLen = char_traits::length(aString); + + int32_t maxCount = int32_t(XPCOM_MIN(this->mLength, strLen)); + + int32_t compareCount; + if (aCount < 0 || aCount > maxCount) + compareCount = maxCount; + else + compareCount = aCount; + + int32_t result = nsBufferRoutines<T>::compare(this->mData, aString, + compareCount, aIgnoreCase); + + if (result == 0 && (aCount < 0 || strLen < uint32_t(aCount) || + this->mLength < uint32_t(aCount))) { + // Since the caller didn't give us a length to test, or strings shorter + // than aCount, and compareCount characters matched, we have to assume + // that the longer string is greater. + + if (this->mLength != strLen) result = (this->mLength < strLen) ? -1 : 1; + } + return result; +} + +template int32_t nsTStringRepr<char>::Compare(const char_type*, bool, + int32_t) const; + +template <typename T> +template <typename Q, typename EnableIfChar16> +bool nsTStringRepr<T>::EqualsIgnoreCase(const incompatible_char_type* aString, + int32_t aCount) const { + uint32_t strLen = nsCharTraits<char>::length(aString); + + int32_t maxCount = int32_t(XPCOM_MIN(this->mLength, strLen)); + + int32_t compareCount; + if (aCount < 0 || aCount > maxCount) + compareCount = maxCount; + else + compareCount = aCount; + + int32_t result = + nsBufferRoutines<T>::compare(this->mData, aString, compareCount, true); + + if (result == 0 && (aCount < 0 || strLen < uint32_t(aCount) || + this->mLength < uint32_t(aCount))) { + // Since the caller didn't give us a length to test, or strings shorter + // than aCount, and compareCount characters matched, we have to assume + // that the longer string is greater. + + if (this->mLength != strLen) + result = 1; // Arbitrarily using any number != 0 + } + return result == 0; +} + +template bool nsTStringRepr<char16_t>::EqualsIgnoreCase( + const incompatible_char_type*, int32_t) const; + +} // namespace detail +} // namespace mozilla + +/** + * nsTString::ToDouble + */ + +template <> +double nsTString<char>::ToDouble(TrailingCharsPolicy aTrailingCharsPolicy, + nsresult* aErrorCode) const { + double res = 0.0; + if (this->mLength > 0) { + char* conv_stopped; + const char* str = this->mData; + // Use PR_strtod, not strtod, since we don't want locale involved. + res = PR_strtod(str, &conv_stopped); + if (aTrailingCharsPolicy == TrailingCharsPolicy::Allow && + conv_stopped != str) { + *aErrorCode = NS_OK; + } else if (aTrailingCharsPolicy == TrailingCharsPolicy::Disallow && + conv_stopped == str + this->mLength) { + *aErrorCode = NS_OK; + } else { + *aErrorCode = NS_ERROR_ILLEGAL_VALUE; + } + } else { + // The string was too short (0 characters) + *aErrorCode = NS_ERROR_ILLEGAL_VALUE; + } + return res; +} + +template <> +double nsTString<char>::ToDouble(nsresult* aErrorCode) const { + return ToDouble(TrailingCharsPolicy::Disallow, aErrorCode); +} + +template <> +double nsTString<char16_t>::ToDouble(nsresult* aErrorCode) const { + return NS_LossyConvertUTF16toASCII(*this).ToDouble(aErrorCode); +} + +template <typename T> +float nsTString<T>::ToFloat(nsresult* aErrorCode) const { + return (float)ToDouble(aErrorCode); +} + +template <> +double nsTString<char>::ToDoubleAllowTrailingChars(nsresult* aErrorCode) const { + return ToDouble(TrailingCharsPolicy::Allow, aErrorCode); +} + +template <> +double nsTString<char16_t>::ToDoubleAllowTrailingChars( + nsresult* aErrorCode) const { + return NS_LossyConvertUTF16toASCII(*this).ToDoubleAllowTrailingChars( + aErrorCode); +} + +template <typename T> +float nsTString<T>::ToFloatAllowTrailingChars(nsresult* aErrorCode) const { + return (float)ToDoubleAllowTrailingChars(aErrorCode); +} + +template class nsTString<char>; +template class nsTString<char16_t>; + +#endif // !MOZ_STRING_WITH_OBSOLETE_API diff --git a/xpcom/string/nsSubstring.cpp b/xpcom/string/nsSubstring.cpp new file mode 100644 index 0000000000..6c19463802 --- /dev/null +++ b/xpcom/string/nsSubstring.cpp @@ -0,0 +1,424 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef DEBUG +# define ENABLE_STRING_STATS +#endif + +#include "mozilla/Atomics.h" +#include "mozilla/MemoryReporting.h" + +#ifdef ENABLE_STRING_STATS +# include <stdio.h> +#endif + +#include <stdlib.h> +#include "nsAString.h" +#include "nsString.h" +#include "nsStringBuffer.h" +#include "nsDependentString.h" +#include "nsMemory.h" +#include "prprf.h" +#include "nsCOMPtr.h" + +#include "mozilla/IntegerPrintfMacros.h" +#ifdef XP_WIN +# include <windows.h> +# include <process.h> +# define getpid() _getpid() +# define pthread_self() GetCurrentThreadId() +#else +# include <pthread.h> +# include <unistd.h> +#endif + +using mozilla::Atomic; + +// --------------------------------------------------------------------------- + +static const char16_t gNullChar = 0; + +char* const nsCharTraits<char>::sEmptyBuffer = + (char*)const_cast<char16_t*>(&gNullChar); +char16_t* const nsCharTraits<char16_t>::sEmptyBuffer = + const_cast<char16_t*>(&gNullChar); + +// --------------------------------------------------------------------------- + +#ifdef ENABLE_STRING_STATS +class nsStringStats { + public: + nsStringStats() + : mAllocCount(0), mReallocCount(0), mFreeCount(0), mShareCount(0) {} + + ~nsStringStats() { + // this is a hack to suppress duplicate string stats printing + // in seamonkey as a result of the string code being linked + // into seamonkey and libxpcom! :-( + if (!mAllocCount && !mAdoptCount) { + return; + } + + // Only print the stats if we detect a leak. + if (mAllocCount <= mFreeCount && mAdoptCount <= mAdoptFreeCount) { + return; + } + + printf("nsStringStats\n"); + printf(" => mAllocCount: % 10d\n", int(mAllocCount)); + printf(" => mReallocCount: % 10d\n", int(mReallocCount)); + printf(" => mFreeCount: % 10d", int(mFreeCount)); + if (mAllocCount > mFreeCount) { + printf(" -- LEAKED %d !!!\n", mAllocCount - mFreeCount); + } else { + printf("\n"); + } + printf(" => mShareCount: % 10d\n", int(mShareCount)); + printf(" => mAdoptCount: % 10d\n", int(mAdoptCount)); + printf(" => mAdoptFreeCount: % 10d", int(mAdoptFreeCount)); + if (mAdoptCount > mAdoptFreeCount) { + printf(" -- LEAKED %d !!!\n", mAdoptCount - mAdoptFreeCount); + } else { + printf("\n"); + } + printf(" => Process ID: %" PRIuPTR ", Thread ID: %" PRIuPTR "\n", + uintptr_t(getpid()), uintptr_t(pthread_self())); + } + + typedef Atomic<int32_t, mozilla::SequentiallyConsistent> AtomicInt; + + AtomicInt mAllocCount; + AtomicInt mReallocCount; + AtomicInt mFreeCount; + AtomicInt mShareCount; + AtomicInt mAdoptCount; + AtomicInt mAdoptFreeCount; +}; +static nsStringStats gStringStats; +# define STRING_STAT_INCREMENT(_s) (gStringStats.m##_s##Count)++ +#else +# define STRING_STAT_INCREMENT(_s) +#endif + +// --------------------------------------------------------------------------- + +void ReleaseData(void* aData, nsAString::DataFlags aFlags) { + if (aFlags & nsAString::DataFlags::REFCOUNTED) { + nsStringBuffer::FromData(aData)->Release(); + } else if (aFlags & nsAString::DataFlags::OWNED) { + free(aData); + STRING_STAT_INCREMENT(AdoptFree); + // Treat this as destruction of a "StringAdopt" object for leak + // tracking purposes. + MOZ_LOG_DTOR(aData, "StringAdopt", 1); + } + // otherwise, nothing to do. +} + +// --------------------------------------------------------------------------- + +// XXX or we could make nsStringBuffer be a friend of nsTAString + +class nsAStringAccessor : public nsAString { + private: + nsAStringAccessor(); // NOT IMPLEMENTED + + public: + char_type* data() const { return mData; } + size_type length() const { return mLength; } + DataFlags flags() const { return mDataFlags; } + + void set(char_type* aData, size_type aLen, DataFlags aDataFlags) { + ReleaseData(mData, mDataFlags); + SetData(aData, aLen, aDataFlags); + } +}; + +class nsACStringAccessor : public nsACString { + private: + nsACStringAccessor(); // NOT IMPLEMENTED + + public: + char_type* data() const { return mData; } + size_type length() const { return mLength; } + DataFlags flags() const { return mDataFlags; } + + void set(char_type* aData, size_type aLen, DataFlags aDataFlags) { + ReleaseData(mData, mDataFlags); + SetData(aData, aLen, aDataFlags); + } +}; + +// --------------------------------------------------------------------------- + +void nsStringBuffer::AddRef() { + // Memory synchronization is not required when incrementing a + // reference count. The first increment of a reference count on a + // thread is not important, since the first use of the object on a + // thread can happen before it. What is important is the transfer + // of the pointer to that thread, which may happen prior to the + // first increment on that thread. The necessary memory + // synchronization is done by the mechanism that transfers the + // pointer between threads. +#ifdef NS_BUILD_REFCNT_LOGGING + uint32_t count = +#endif + mRefCount.fetch_add(1, std::memory_order_relaxed) +#ifdef NS_BUILD_REFCNT_LOGGING + + 1 +#endif + ; + STRING_STAT_INCREMENT(Share); + NS_LOG_ADDREF(this, count, "nsStringBuffer", sizeof(*this)); +} + +void nsStringBuffer::Release() { + // Since this may be the last release on this thread, we need + // release semantics so that prior writes on this thread are visible + // to the thread that destroys the object when it reads mValue with + // acquire semantics. + uint32_t count = mRefCount.fetch_sub(1, std::memory_order_release) - 1; + NS_LOG_RELEASE(this, count, "nsStringBuffer"); + if (count == 0) { + // We're going to destroy the object on this thread, so we need + // acquire semantics to synchronize with the memory released by + // the last release on other threads, that is, to ensure that + // writes prior to that release are now visible on this thread. + count = mRefCount.load(std::memory_order_acquire); + + STRING_STAT_INCREMENT(Free); + free(this); // we were allocated with |malloc| + } +} + +/** + * Alloc returns a pointer to a new string header with set capacity. + */ +already_AddRefed<nsStringBuffer> nsStringBuffer::Alloc(size_t aSize) { + NS_ASSERTION(aSize != 0, "zero capacity allocation not allowed"); + NS_ASSERTION(sizeof(nsStringBuffer) + aSize <= size_t(uint32_t(-1)) && + sizeof(nsStringBuffer) + aSize > aSize, + "mStorageSize will truncate"); + + nsStringBuffer* hdr = (nsStringBuffer*)malloc(sizeof(nsStringBuffer) + aSize); + if (hdr) { + STRING_STAT_INCREMENT(Alloc); + + hdr->mRefCount = 1; + hdr->mStorageSize = aSize; + NS_LOG_ADDREF(hdr, 1, "nsStringBuffer", sizeof(*hdr)); + } + return dont_AddRef(hdr); +} + +nsStringBuffer* nsStringBuffer::Realloc(nsStringBuffer* aHdr, size_t aSize) { + STRING_STAT_INCREMENT(Realloc); + + NS_ASSERTION(aSize != 0, "zero capacity allocation not allowed"); + NS_ASSERTION(sizeof(nsStringBuffer) + aSize <= size_t(uint32_t(-1)) && + sizeof(nsStringBuffer) + aSize > aSize, + "mStorageSize will truncate"); + + // no point in trying to save ourselves if we hit this assertion + NS_ASSERTION(!aHdr->IsReadonly(), "|Realloc| attempted on readonly string"); + + // Treat this as a release and addref for refcounting purposes, since we + // just asserted that the refcount is 1. If we don't do that, refcount + // logging will claim we've leaked all sorts of stuff. + NS_LOG_RELEASE(aHdr, 0, "nsStringBuffer"); + + aHdr = (nsStringBuffer*)realloc(aHdr, sizeof(nsStringBuffer) + aSize); + if (aHdr) { + NS_LOG_ADDREF(aHdr, 1, "nsStringBuffer", sizeof(*aHdr)); + aHdr->mStorageSize = aSize; + } + + return aHdr; +} + +nsStringBuffer* nsStringBuffer::FromString(const nsAString& aStr) { + const nsAStringAccessor* accessor = + static_cast<const nsAStringAccessor*>(&aStr); + + if (!(accessor->flags() & nsAString::DataFlags::REFCOUNTED)) { + return nullptr; + } + + return FromData(accessor->data()); +} + +nsStringBuffer* nsStringBuffer::FromString(const nsACString& aStr) { + const nsACStringAccessor* accessor = + static_cast<const nsACStringAccessor*>(&aStr); + + if (!(accessor->flags() & nsACString::DataFlags::REFCOUNTED)) { + return nullptr; + } + + return FromData(accessor->data()); +} + +void nsStringBuffer::ToString(uint32_t aLen, nsAString& aStr, + bool aMoveOwnership) { + char16_t* data = static_cast<char16_t*>(Data()); + + nsAStringAccessor* accessor = static_cast<nsAStringAccessor*>(&aStr); + MOZ_DIAGNOSTIC_ASSERT(data[aLen] == char16_t(0), + "data should be null terminated"); + + nsAString::DataFlags flags = + nsAString::DataFlags::REFCOUNTED | nsAString::DataFlags::TERMINATED; + + if (!aMoveOwnership) { + AddRef(); + } + accessor->set(data, aLen, flags); +} + +void nsStringBuffer::ToString(uint32_t aLen, nsACString& aStr, + bool aMoveOwnership) { + char* data = static_cast<char*>(Data()); + + nsACStringAccessor* accessor = static_cast<nsACStringAccessor*>(&aStr); + MOZ_DIAGNOSTIC_ASSERT(data[aLen] == char(0), + "data should be null terminated"); + + nsACString::DataFlags flags = + nsACString::DataFlags::REFCOUNTED | nsACString::DataFlags::TERMINATED; + + if (!aMoveOwnership) { + AddRef(); + } + accessor->set(data, aLen, flags); +} + +size_t nsStringBuffer::SizeOfIncludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const { + return IsReadonly() ? 0 : aMallocSizeOf(this); +} + +size_t nsStringBuffer::SizeOfIncludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this); +} + +// --------------------------------------------------------------------------- + +// define nsAString +#include "nsTSubstring.cpp" + +// Provide rust bindings to the nsA[C]String types +extern "C" { + +// This is a no-op on release, so we ifdef it out such that using it in release +// results in a linker error. +#ifdef DEBUG +void Gecko_IncrementStringAdoptCount(void* aData) { + MOZ_LOG_CTOR(aData, "StringAdopt", 1); +} +#elif defined(MOZ_DEBUG_RUST) +void Gecko_IncrementStringAdoptCount(void* aData) {} +#endif + +void Gecko_FinalizeCString(nsACString* aThis) { aThis->~nsACString(); } + +void Gecko_AssignCString(nsACString* aThis, const nsACString* aOther) { + aThis->Assign(*aOther); +} + +void Gecko_TakeFromCString(nsACString* aThis, nsACString* aOther) { + aThis->Assign(std::move(*aOther)); +} + +void Gecko_AppendCString(nsACString* aThis, const nsACString* aOther) { + aThis->Append(*aOther); +} + +void Gecko_SetLengthCString(nsACString* aThis, uint32_t aLength) { + aThis->SetLength(aLength); +} + +bool Gecko_FallibleAssignCString(nsACString* aThis, const nsACString* aOther) { + return aThis->Assign(*aOther, mozilla::fallible); +} + +bool Gecko_FallibleTakeFromCString(nsACString* aThis, nsACString* aOther) { + return aThis->Assign(std::move(*aOther), mozilla::fallible); +} + +bool Gecko_FallibleAppendCString(nsACString* aThis, const nsACString* aOther) { + return aThis->Append(*aOther, mozilla::fallible); +} + +bool Gecko_FallibleSetLengthCString(nsACString* aThis, uint32_t aLength) { + return aThis->SetLength(aLength, mozilla::fallible); +} + +char* Gecko_BeginWritingCString(nsACString* aThis) { + return aThis->BeginWriting(); +} + +char* Gecko_FallibleBeginWritingCString(nsACString* aThis) { + return aThis->BeginWriting(mozilla::fallible); +} + +uint32_t Gecko_StartBulkWriteCString(nsACString* aThis, uint32_t aCapacity, + uint32_t aUnitsToPreserve, + bool aAllowShrinking) { + return aThis->StartBulkWriteImpl(aCapacity, aUnitsToPreserve, aAllowShrinking) + .unwrapOr(UINT32_MAX); +} + +void Gecko_FinalizeString(nsAString* aThis) { aThis->~nsAString(); } + +void Gecko_AssignString(nsAString* aThis, const nsAString* aOther) { + aThis->Assign(*aOther); +} + +void Gecko_TakeFromString(nsAString* aThis, nsAString* aOther) { + aThis->Assign(std::move(*aOther)); +} + +void Gecko_AppendString(nsAString* aThis, const nsAString* aOther) { + aThis->Append(*aOther); +} + +void Gecko_SetLengthString(nsAString* aThis, uint32_t aLength) { + aThis->SetLength(aLength); +} + +bool Gecko_FallibleAssignString(nsAString* aThis, const nsAString* aOther) { + return aThis->Assign(*aOther, mozilla::fallible); +} + +bool Gecko_FallibleTakeFromString(nsAString* aThis, nsAString* aOther) { + return aThis->Assign(std::move(*aOther), mozilla::fallible); +} + +bool Gecko_FallibleAppendString(nsAString* aThis, const nsAString* aOther) { + return aThis->Append(*aOther, mozilla::fallible); +} + +bool Gecko_FallibleSetLengthString(nsAString* aThis, uint32_t aLength) { + return aThis->SetLength(aLength, mozilla::fallible); +} + +char16_t* Gecko_BeginWritingString(nsAString* aThis) { + return aThis->BeginWriting(); +} + +char16_t* Gecko_FallibleBeginWritingString(nsAString* aThis) { + return aThis->BeginWriting(mozilla::fallible); +} + +uint32_t Gecko_StartBulkWriteString(nsAString* aThis, uint32_t aCapacity, + uint32_t aUnitsToPreserve, + bool aAllowShrinking) { + return aThis->StartBulkWriteImpl(aCapacity, aUnitsToPreserve, aAllowShrinking) + .unwrapOr(UINT32_MAX); +} + +} // extern "C" diff --git a/xpcom/string/nsTDependentString.cpp b/xpcom/string/nsTDependentString.cpp new file mode 100644 index 0000000000..f8a374367f --- /dev/null +++ b/xpcom/string/nsTDependentString.cpp @@ -0,0 +1,44 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +template <typename T> +nsTDependentString<T>::nsTDependentString(const char_type* aStart, + const char_type* aEnd) + : string_type(const_cast<char_type*>(aStart), uint32_t(aEnd - aStart), + DataFlags::TERMINATED, ClassFlags(0)) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); + this->AssertValidDependentString(); +} + +template <typename T> +void nsTDependentString<T>::Rebind(const string_type& str, uint32_t startPos) { + MOZ_ASSERT(str.GetDataFlags() & DataFlags::TERMINATED, + "Unterminated flat string"); + + // If we currently own a buffer, release it. + this->Finalize(); + + size_type strLength = str.Length(); + + if (startPos > strLength) { + startPos = strLength; + } + + char_type* newData = + const_cast<char_type*>(static_cast<const char_type*>(str.Data())) + + startPos; + size_type newLen = strLength - startPos; + DataFlags newDataFlags = + str.GetDataFlags() & (DataFlags::TERMINATED | DataFlags::LITERAL); + this->SetData(newData, newLen, newDataFlags); +} + +template <typename T> +void nsTDependentString<T>::Rebind(const char_type* aStart, + const char_type* aEnd) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); + this->Rebind(aStart, uint32_t(aEnd - aStart)); +} diff --git a/xpcom/string/nsTDependentString.h b/xpcom/string/nsTDependentString.h new file mode 100644 index 0000000000..342f4c70bf --- /dev/null +++ b/xpcom/string/nsTDependentString.h @@ -0,0 +1,125 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTDependentString_h +#define nsTDependentString_h + +#include "nsTString.h" + +/** + * nsTDependentString + * + * Stores a null-terminated, immutable sequence of characters. + * + * Subclass of nsTString that restricts string value to an immutable + * character sequence. This class does not own its data, so the creator + * of objects of this type must take care to ensure that a + * nsTDependentString continues to reference valid memory for the + * duration of its use. + */ +template <typename T> +class nsTDependentString : public nsTString<T> { + public: + typedef nsTDependentString<T> self_type; + typedef nsTString<T> base_string_type; + typedef typename base_string_type::string_type string_type; + + typedef typename base_string_type::fallible_t fallible_t; + + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::char_traits char_traits; + typedef + typename base_string_type::incompatible_char_type incompatible_char_type; + + typedef typename base_string_type::substring_tuple_type substring_tuple_type; + + typedef typename base_string_type::const_iterator const_iterator; + typedef typename base_string_type::iterator iterator; + + typedef typename base_string_type::comparator_type comparator_type; + + typedef typename base_string_type::const_char_iterator const_char_iterator; + + typedef typename base_string_type::index_type index_type; + typedef typename base_string_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + + public: + /** + * constructors + */ + + nsTDependentString(const char_type* aStart, const char_type* aEnd); + + nsTDependentString(const char_type* aData, uint32_t aLength) + : string_type(const_cast<char_type*>(aData), aLength, + DataFlags::TERMINATED, ClassFlags(0)) { + this->AssertValidDependentString(); + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + nsTDependentString(char16ptr_t aData, uint32_t aLength) + : nsTDependentString(static_cast<const char16_t*>(aData), aLength) {} +#endif + + explicit nsTDependentString(const char_type* aData) + : string_type(const_cast<char_type*>(aData), + uint32_t(char_traits::length(aData)), DataFlags::TERMINATED, + ClassFlags(0)) { + string_type::AssertValidDependentString(); + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + explicit nsTDependentString(char16ptr_t aData) + : nsTDependentString(static_cast<const char16_t*>(aData)) {} +#endif + + nsTDependentString(const string_type& aStr, uint32_t aStartPos) + : string_type() { + Rebind(aStr, aStartPos); + } + + // Create a nsTDependentSubstring to be bound later + nsTDependentString() : string_type() {} + + // auto-generated destructor OK + + nsTDependentString(self_type&& aStr) : string_type() { + Rebind(aStr, /* aStartPos = */ 0); + aStr.SetToEmptyBuffer(); + } + + explicit nsTDependentString(const self_type& aStr) : string_type() { + Rebind(aStr, /* aStartPos = */ 0); + } + + /** + * allow this class to be bound to a different string... + */ + + using nsTString<T>::Rebind; + void Rebind(const char_type* aData) { + Rebind(aData, uint32_t(char_traits::length(aData))); + } + + void Rebind(const char_type* aStart, const char_type* aEnd); + void Rebind(const string_type&, uint32_t aStartPos); + + private: + // NOT USED + nsTDependentString(const substring_tuple_type&) = delete; + self_type& operator=(const self_type& aStr) = delete; +}; + +extern template class nsTDependentString<char>; +extern template class nsTDependentString<char16_t>; + +#endif diff --git a/xpcom/string/nsTDependentSubstring.cpp b/xpcom/string/nsTDependentSubstring.cpp new file mode 100644 index 0000000000..4bf1e532df --- /dev/null +++ b/xpcom/string/nsTDependentSubstring.cpp @@ -0,0 +1,102 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTDependentSubstring.h" + +template <typename T> +void nsTDependentSubstring<T>::Rebind(const substring_type& str, + uint32_t startPos, uint32_t length) { + // If we currently own a buffer, release it. + this->Finalize(); + + size_type strLength = str.Length(); + + if (startPos > strLength) { + startPos = strLength; + } + + char_type* newData = + const_cast<char_type*>(static_cast<const char_type*>(str.Data())) + + startPos; + size_type newLength = XPCOM_MIN(length, strLength - startPos); + DataFlags newDataFlags = DataFlags(0); + this->SetData(newData, newLength, newDataFlags); +} + +template <typename T> +void nsTDependentSubstring<T>::Rebind(const char_type* data, size_type length) { + NS_ASSERTION(data, "nsTDependentSubstring must wrap a non-NULL buffer"); + + // If we currently own a buffer, release it. + this->Finalize(); + + char_type* newData = + const_cast<char_type*>(static_cast<const char_type*>(data)); + size_type newLength = length; + DataFlags newDataFlags = DataFlags(0); + this->SetData(newData, newLength, newDataFlags); +} + +template <typename T> +void nsTDependentSubstring<T>::Rebind(const char_type* aStart, + const char_type* aEnd) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); + this->Rebind(aStart, size_type(aEnd - aStart)); +} + +template <typename T> +nsTDependentSubstring<T>::nsTDependentSubstring(const char_type* aStart, + const char_type* aEnd) + : substring_type(const_cast<char_type*>(aStart), uint32_t(aEnd - aStart), + DataFlags(0), ClassFlags(0)) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); +} + +#if defined(MOZ_USE_CHAR16_WRAPPER) +template <typename T> +template <typename Q, typename EnableIfChar16> +nsTDependentSubstring<T>::nsTDependentSubstring(char16ptr_t aStart, + char16ptr_t aEnd) + : substring_type(static_cast<const char16_t*>(aStart), + static_cast<const char16_t*>(aEnd)) { + MOZ_RELEASE_ASSERT(static_cast<const char16_t*>(aStart) <= + static_cast<const char16_t*>(aEnd), + "Overflow!"); +} +#endif + +template <typename T> +nsTDependentSubstring<T>::nsTDependentSubstring(const const_iterator& aStart, + const const_iterator& aEnd) + : substring_type(const_cast<char_type*>(aStart.get()), + uint32_t(aEnd.get() - aStart.get()), DataFlags(0), + ClassFlags(0)) { + MOZ_RELEASE_ASSERT(aStart.get() <= aEnd.get(), "Overflow!"); +} + +template <typename T> +const nsTDependentSubstring<T> Substring(const T* aStart, const T* aEnd) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); + return nsTDependentSubstring<T>(aStart, aEnd); +} + +template nsTDependentSubstring<char> const Substring<char>(char const*, + char const*); +template nsTDependentSubstring<char16_t> const Substring<char16_t>( + char16_t const*, char16_t const*); + +#if defined(MOZ_USE_CHAR16_WRAPPER) +const nsTDependentSubstring<char16_t> Substring(char16ptr_t aData, + uint32_t aLength) { + return nsTDependentSubstring<char16_t>(aData, aLength); +} + +const nsTDependentSubstring<char16_t> Substring(char16ptr_t aStart, + char16ptr_t aEnd) { + return Substring(static_cast<const char16_t*>(aStart), + static_cast<const char16_t*>(aEnd)); +} +#endif diff --git a/xpcom/string/nsTDependentSubstring.h b/xpcom/string/nsTDependentSubstring.h new file mode 100644 index 0000000000..8ccb0d9fce --- /dev/null +++ b/xpcom/string/nsTDependentSubstring.h @@ -0,0 +1,159 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsTDependentSubstring_h +#define nsTDependentSubstring_h + +#include "nsTSubstring.h" +#include "nsTLiteralString.h" +#include "mozilla/Span.h" + +/** + * nsTDependentSubstring_CharT + * + * A string class which wraps an external array of string characters. It + * is the client code's responsibility to ensure that the external buffer + * remains valid for a long as the string is alive. + * + * NAMES: + * nsDependentSubstring for wide characters + * nsDependentCSubstring for narrow characters + */ +template <typename T> +class nsTDependentSubstring : public nsTSubstring<T> { + public: + typedef nsTDependentSubstring<T> self_type; + typedef nsTSubstring<T> substring_type; + typedef typename substring_type::fallible_t fallible_t; + + typedef typename substring_type::char_type char_type; + typedef typename substring_type::char_traits char_traits; + typedef + typename substring_type::incompatible_char_type incompatible_char_type; + + typedef typename substring_type::substring_tuple_type substring_tuple_type; + + typedef typename substring_type::const_iterator const_iterator; + typedef typename substring_type::iterator iterator; + + typedef typename substring_type::comparator_type comparator_type; + + typedef typename substring_type::const_char_iterator const_char_iterator; + + typedef typename substring_type::index_type index_type; + typedef typename substring_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename substring_type::DataFlags DataFlags; + typedef typename substring_type::ClassFlags ClassFlags; + + public: + void Rebind(const substring_type&, uint32_t aStartPos, + uint32_t aLength = size_type(-1)); + + void Rebind(const char_type* aData, size_type aLength); + + void Rebind(const char_type* aStart, const char_type* aEnd); + + nsTDependentSubstring(const substring_type& aStr, uint32_t aStartPos, + uint32_t aLength = size_type(-1)) + : substring_type() { + Rebind(aStr, aStartPos, aLength); + } + + nsTDependentSubstring(const char_type* aData, size_type aLength) + : substring_type(const_cast<char_type*>(aData), aLength, DataFlags(0), + ClassFlags(0)) {} + + explicit nsTDependentSubstring(mozilla::Span<const char_type> aData) + : nsTDependentSubstring(aData.Elements(), aData.Length()) {} + + nsTDependentSubstring(const char_type* aStart, const char_type* aEnd); + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + nsTDependentSubstring(char16ptr_t aData, size_type aLength) + : nsTDependentSubstring(static_cast<const char16_t*>(aData), aLength) {} + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + nsTDependentSubstring(char16ptr_t aStart, char16ptr_t aEnd); +#endif + + nsTDependentSubstring(const const_iterator& aStart, + const const_iterator& aEnd); + + // Create a nsTDependentSubstring to be bound later + nsTDependentSubstring() : substring_type() {} + + // auto-generated copy-constructor OK (XXX really?? what about base class + // copy-ctor?) + + private: + // NOT USED + void operator=(const self_type&) = + delete; // we're immutable, you can't assign into a substring +}; + +extern template class nsTDependentSubstring<char>; +extern template class nsTDependentSubstring<char16_t>; + +template <typename T> +inline const nsTDependentSubstring<T> Substring( + const nsTSubstring<T>& aStr, uint32_t aStartPos, + uint32_t aLength = uint32_t(-1)) { + return nsTDependentSubstring<T>(aStr, aStartPos, aLength); +} + +template <typename T> +inline const nsTDependentSubstring<T> Substring( + const nsTLiteralString<T>& aStr, uint32_t aStartPos, + uint32_t aLength = uint32_t(-1)) { + return nsTDependentSubstring<T>(aStr, aStartPos, aLength); +} + +template <typename T> +inline const nsTDependentSubstring<T> Substring( + const nsReadingIterator<T>& aStart, const nsReadingIterator<T>& aEnd) { + return nsTDependentSubstring<T>(aStart.get(), aEnd.get()); +} + +template <typename T> +inline const nsTDependentSubstring<T> Substring(const T* aData, + uint32_t aLength) { + return nsTDependentSubstring<T>(aData, aLength); +} + +template <typename T> +const nsTDependentSubstring<T> Substring(const T* aStart, const T* aEnd); + +extern template const nsTDependentSubstring<char> Substring(const char* aStart, + const char* aEnd); + +extern template const nsTDependentSubstring<char16_t> Substring( + const char16_t* aStart, const char16_t* aEnd); + +#if defined(MOZ_USE_CHAR16_WRAPPER) +inline const nsTDependentSubstring<char16_t> Substring(char16ptr_t aData, + uint32_t aLength); + +const nsTDependentSubstring<char16_t> Substring(char16ptr_t aStart, + char16ptr_t aEnd); +#endif + +template <typename T> +inline const nsTDependentSubstring<T> StringHead(const nsTSubstring<T>& aStr, + uint32_t aCount) { + return nsTDependentSubstring<T>(aStr, 0, aCount); +} + +template <typename T> +inline const nsTDependentSubstring<T> StringTail(const nsTSubstring<T>& aStr, + uint32_t aCount) { + return nsTDependentSubstring<T>(aStr, aStr.Length() - aCount, aCount); +} + +#endif diff --git a/xpcom/string/nsTLiteralString.h b/xpcom/string/nsTLiteralString.h new file mode 100644 index 0000000000..d7c9213a5c --- /dev/null +++ b/xpcom/string/nsTLiteralString.h @@ -0,0 +1,111 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTLiteralString_h +#define nsTLiteralString_h + +#include "nsTStringRepr.h" + +/** + * nsTLiteralString_CharT + * + * Stores a null-terminated, immutable sequence of characters. + * + * nsTString-lookalike that restricts its string value to a literal character + * sequence. Can be implicitly cast to const nsTString& (the const is + * essential, since this class's data are not writable). The data are assumed + * to be static (permanent) and therefore, as an optimization, this class + * does not have a destructor. + */ +template <typename T> +class nsTLiteralString : public mozilla::detail::nsTStringRepr<T> { + public: + typedef nsTLiteralString<T> self_type; + +#ifdef __clang__ + // bindgen w/ clang 3.9 at least chokes on a typedef, but using is okay. + using typename mozilla::detail::nsTStringRepr<T>::base_string_type; +#else + // On the other hand msvc chokes on the using statement. It seems others + // don't care either way so we lump them in here. + typedef typename mozilla::detail::nsTStringRepr<T>::base_string_type + base_string_type; +#endif + + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::size_type size_type; + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + + public: + /** + * constructor + */ + + template <size_type N> + explicit constexpr nsTLiteralString(const char_type (&aStr)[N]) + : nsTLiteralString(aStr, N - 1) {} + + /** + * For compatibility with existing code that requires const ns[C]String*. + * Use sparingly. If possible, rewrite code to use const ns[C]String& + * and the implicit cast will just work. + */ + MOZ_LIFETIME_BOUND const nsTString<T>& AsString() const { + return *reinterpret_cast<const nsTString<T>*>(this); + } + + MOZ_LIFETIME_BOUND operator const nsTString<T>&() const { return AsString(); } + + template <typename N, typename Dummy> + struct raw_type { + typedef N* type; + }; + +#ifdef MOZ_USE_CHAR16_WRAPPER + template <typename Dummy> + struct raw_type<char16_t, Dummy> { + typedef char16ptr_t type; + }; +#endif + + /** + * Prohibit get() on temporaries as in "x"_ns.get(). + * These should be written as just "x", using a string literal directly. + */ + const typename raw_type<T, int>::type get() const&& = delete; + const typename raw_type<T, int>::type get() const& { return this->mData; } + +// At least older gcc versions do not accept these friend declarations, +// complaining about an "invalid argument list" here, but not where the actual +// operators are defined or used. We make the supposed-to-be-private constructor +// public when building with gcc, relying on the default clang builds to fail if +// any non-private use of that constructor would get into the codebase. +#if defined(__clang__) + private: + friend constexpr auto operator"" _ns(const char* aStr, std::size_t aLen); + friend constexpr auto operator"" _ns(const char16_t* aStr, std::size_t aLen); +#else + public: +#endif + // Only for use by operator"" + constexpr nsTLiteralString(const char_type* aStr, size_t aLen) + : base_string_type(const_cast<char_type*>(aStr), aLen, + DataFlags::TERMINATED | DataFlags::LITERAL, + ClassFlags::NULL_TERMINATED) {} + + public: + // NOT TO BE IMPLEMENTED + template <size_type N> + nsTLiteralString(char_type (&aStr)[N]) = delete; + + self_type& operator=(const self_type&) = delete; +}; + +extern template class nsTLiteralString<char>; +extern template class nsTLiteralString<char16_t>; + +#endif diff --git a/xpcom/string/nsTPromiseFlatString.cpp b/xpcom/string/nsTPromiseFlatString.cpp new file mode 100644 index 0000000000..1e0fde7ec6 --- /dev/null +++ b/xpcom/string/nsTPromiseFlatString.cpp @@ -0,0 +1,21 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +template <typename T> +void nsTPromiseFlatString<T>::Init(const substring_type& str) { + if (str.IsTerminated()) { + char_type* newData = + const_cast<char_type*>(static_cast<const char_type*>(str.Data())); + size_type newLength = str.Length(); + DataFlags newDataFlags = + str.GetDataFlags() & (DataFlags::TERMINATED | DataFlags::LITERAL); + // does not promote DataFlags::VOIDED + + this->SetData(newData, newLength, newDataFlags); + } else { + this->Assign(str); + } +} diff --git a/xpcom/string/nsTPromiseFlatString.h b/xpcom/string/nsTPromiseFlatString.h new file mode 100644 index 0000000000..126362ec9c --- /dev/null +++ b/xpcom/string/nsTPromiseFlatString.h @@ -0,0 +1,136 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTPromiseFlatString_h +#define nsTPromiseFlatString_h + +#include "nsTString.h" + +/** + * NOTE: + * + * Try to avoid flat strings. |PromiseFlat[C]String| will help you as a last + * resort, and this may be necessary when dealing with legacy or OS calls, + * but in general, requiring a null-terminated array of characters kills many + * of the performance wins the string classes offer. Write your own code to + * use |nsA[C]String&|s for parameters. Write your string proccessing + * algorithms to exploit iterators. If you do this, you will benefit from + * being able to chain operations without copying or allocating and your code + * will be significantly more efficient. Remember, a function that takes an + * |const nsA[C]String&| can always be passed a raw character pointer by + * wrapping it (for free) in a |nsDependent[C]String|. But a function that + * takes a character pointer always has the potential to force allocation and + * copying. + * + * + * How to use it: + * + * A |nsPromiseFlat[C]String| doesn't necessarily own the characters it + * promises. You must never use it to promise characters out of a string + * with a shorter lifespan. The typical use will be something like this: + * + * SomeOSFunction( PromiseFlatCString(aCSubstring).get() ); // GOOD + * + * Here's a BAD use: + * + * const char* buffer = PromiseFlatCString(aCSubstring).get(); + * SomeOSFunction(buffer); // BAD!! |buffer| is a dangling pointer + * + * The only way to make one is with the function |PromiseFlat[C]String|, + * which produce a |const| instance. ``What if I need to keep a promise + * around for a little while?'' you might ask. In that case, you can keep a + * reference, like so: + * + * const nsCString& flat = PromiseFlatString(aCSubstring); + * // Temporaries usually die after the full expression containing the + * // expression that created the temporary is evaluated. But when a + * // temporary is assigned to a local reference, the temporary's lifetime + * // is extended to the reference's lifetime (C++11 [class.temporary]p5). + * // + * // This reference holds the anonymous temporary alive. But remember: it + * // must _still_ have a lifetime shorter than that of |aCSubstring|, and + * // |aCSubstring| must not be changed while the PromiseFlatString lives. + * + * SomeOSFunction(flat.get()); + * SomeOtherOSFunction(flat.get()); + * + * + * How does it work? + * + * A |nsPromiseFlat[C]String| is just a wrapper for another string. If you + * apply it to a string that happens to be flat, your promise is just a + * dependent reference to the string's data. If you apply it to a non-flat + * string, then a temporary flat string is created for you, by allocating and + * copying. In the event that you end up assigning the result into a sharing + * string (e.g., |nsTString|), the right thing happens. + */ + +template <typename T> +class nsTPromiseFlatString : public nsTString<T> { + public: + typedef nsTPromiseFlatString<T> self_type; + typedef nsTString<T> base_string_type; + typedef typename base_string_type::substring_type substring_type; + typedef typename base_string_type::string_type string_type; + typedef typename base_string_type::substring_tuple_type substring_tuple_type; + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + + private: + void Init(const substring_type&); + + // NOT TO BE IMPLEMENTED + void operator=(const self_type&) = delete; + + // NOT TO BE IMPLEMENTED + nsTPromiseFlatString() = delete; + + // NOT TO BE IMPLEMENTED + nsTPromiseFlatString(const string_type& aStr) = delete; + + public: + explicit nsTPromiseFlatString(const substring_type& aStr) : string_type() { + Init(aStr); + } + + explicit nsTPromiseFlatString(const substring_tuple_type& aTuple) + : string_type() { + // nothing else to do here except assign the value of the tuple + // into ourselves. + this->Assign(aTuple); + } +}; + +extern template class nsTPromiseFlatString<char>; +extern template class nsTPromiseFlatString<char16_t>; + +// We template this so that the constructor is chosen based on the type of the +// parameter. This allows us to reject attempts to promise a flat flat string. +template <class T> +const nsTPromiseFlatString<T> TPromiseFlatString( + const typename nsTPromiseFlatString<T>::substring_type& aString) { + return nsTPromiseFlatString<T>(aString); +} + +template <class T> +const nsTPromiseFlatString<T> TPromiseFlatString( + const typename nsTPromiseFlatString<T>::substring_tuple_type& aString) { + return nsTPromiseFlatString<T>(aString); +} + +#ifndef PromiseFlatCString +# define PromiseFlatCString TPromiseFlatString<char> +#endif + +#ifndef PromiseFlatString +# define PromiseFlatString TPromiseFlatString<char16_t> +#endif + +#endif diff --git a/xpcom/string/nsTString.cpp b/xpcom/string/nsTString.cpp new file mode 100644 index 0000000000..0af6116a7f --- /dev/null +++ b/xpcom/string/nsTString.cpp @@ -0,0 +1,14 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +template <typename T> +void nsTString<T>::Rebind(const char_type* data, size_type length) { + // If we currently own a buffer, release it. + this->Finalize(); + + this->SetData(const_cast<char_type*>(data), length, DataFlags::TERMINATED); + this->AssertValidDependentString(); +} diff --git a/xpcom/string/nsTString.h b/xpcom/string/nsTString.h new file mode 100644 index 0000000000..19e5bbc99d --- /dev/null +++ b/xpcom/string/nsTString.h @@ -0,0 +1,694 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsTString_h +#define nsTString_h + +#include "nsTSubstring.h" + +/** + * This is the canonical null-terminated string class. All subclasses + * promise null-terminated storage. Instances of this class allocate + * strings on the heap. + * + * NAMES: + * nsString for wide characters + * nsCString for narrow characters + * + * This class is also known as nsAFlat[C]String, where "flat" is used + * to denote a null-terminated string. + */ +template <typename T> +class nsTString : public nsTSubstring<T> { + public: + typedef nsTString<T> self_type; + +#ifdef __clang__ + // bindgen w/ clang 3.9 at least chokes on a typedef, but using is okay. + using typename nsTSubstring<T>::substring_type; +#else + // On the other hand msvc chokes on the using statement. It seems others + // don't care either way so we lump them in here. + typedef typename nsTSubstring<T>::substring_type substring_type; +#endif + + typedef typename substring_type::fallible_t fallible_t; + + typedef typename substring_type::char_type char_type; + typedef typename substring_type::char_traits char_traits; + typedef + typename substring_type::incompatible_char_type incompatible_char_type; + + typedef typename substring_type::substring_tuple_type substring_tuple_type; + + typedef typename substring_type::const_iterator const_iterator; + typedef typename substring_type::iterator iterator; + + typedef typename substring_type::comparator_type comparator_type; + + typedef typename substring_type::const_char_iterator const_char_iterator; + + typedef typename substring_type::index_type index_type; + typedef typename substring_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename substring_type::DataFlags DataFlags; + typedef typename substring_type::ClassFlags ClassFlags; + + public: + /** + * constructors + */ + + nsTString() : substring_type(ClassFlags::NULL_TERMINATED) {} + + explicit nsTString(const char_type* aData, size_type aLength = size_type(-1)) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(aData, aLength); + } + + explicit nsTString(mozilla::Span<const char_type> aData) + : nsTString(aData.Elements(), aData.Length()) {} + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + explicit nsTString(char16ptr_t aStr, size_type aLength = size_type(-1)) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(static_cast<const char16_t*>(aStr), aLength); + } +#endif + + nsTString(const self_type& aStr) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(aStr); + } + + nsTString(self_type&& aStr) : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(std::move(aStr)); + } + + MOZ_IMPLICIT nsTString(const substring_tuple_type& aTuple) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(aTuple); + } + + explicit nsTString(const substring_type& aReadable) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(aReadable); + } + + explicit nsTString(substring_type&& aReadable) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(std::move(aReadable)); + } + + // |operator=| does not inherit, so we must define our own + self_type& operator=(char_type aChar) { + this->Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) { + this->Assign(aData); + return *this; + } + self_type& operator=(const self_type& aStr) { + this->Assign(aStr); + return *this; + } + self_type& operator=(self_type&& aStr) { + this->Assign(std::move(aStr)); + return *this; + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + self_type& operator=(const char16ptr_t aStr) { + this->Assign(static_cast<const char16_t*>(aStr)); + return *this; + } +#endif + self_type& operator=(const substring_type& aStr) { + this->Assign(aStr); + return *this; + } + self_type& operator=(substring_type&& aStr) { + this->Assign(std::move(aStr)); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) { + this->Assign(aTuple); + return *this; + } + + /** + * returns the null-terminated string + */ + + template <typename U, typename Dummy> + struct raw_type { + typedef const U* type; + }; +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Dummy> + struct raw_type<char16_t, Dummy> { + typedef char16ptr_t type; + }; +#endif + + MOZ_NO_DANGLING_ON_TEMPORARIES typename raw_type<T, int>::type get() const { + return this->mData; + } + + /** + * returns character at specified index. + * + * NOTE: unlike nsTSubstring::CharAt, this function allows you to index + * the null terminator character. + */ + + char_type CharAt(index_type aIndex) const { + NS_ASSERTION(aIndex <= this->mLength, "index exceeds allowable range"); + return this->mData[aIndex]; + } + + char_type operator[](index_type aIndex) const { return CharAt(aIndex); } + +#if MOZ_STRING_WITH_OBSOLETE_API + + /** + * Search for the given substring within this string. + * + * @param aString is substring to be sought in this + * @param aIgnoreCase selects case sensitivity + * @param aOffset tells us where in this string to start searching + * @param aCount tells us how far from the offset we are to search. Use + * -1 to search the whole string. + * @return offset in string, or kNotFound + */ + + int32_t Find(const nsTString<char>& aString, bool aIgnoreCase = false, + int32_t aOffset = 0, int32_t aCount = -1) const; + int32_t Find(const char* aString, bool aIgnoreCase = false, + int32_t aOffset = 0, int32_t aCount = -1) const; + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + int32_t Find(const self_type& aString, int32_t aOffset = 0, + int32_t aCount = -1) const; + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + int32_t Find(const char_type* aString, int32_t aOffset = 0, + int32_t aCount = -1) const; +# ifdef MOZ_USE_CHAR16_WRAPPER + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + int32_t Find(char16ptr_t aString, int32_t aOffset = 0, + int32_t aCount = -1) const { + return Find(static_cast<const char16_t*>(aString), aOffset, aCount); + } +# endif + + /** + * This methods scans the string backwards, looking for the given string + * + * @param aString is substring to be sought in this + * @param aIgnoreCase tells us whether or not to do caseless compare + * @param aOffset tells us where in this string to start searching. + * Use -1 to search from the end of the string. + * @param aCount tells us how many iterations to make starting at the + * given offset. + * @return offset in string, or kNotFound + */ + + // Case aIgnoreCase option only with char versions + int32_t RFind(const nsTString<char>& aString, bool aIgnoreCase = false, + int32_t aOffset = -1, int32_t aCount = -1) const; + int32_t RFind(const char* aCString, bool aIgnoreCase = false, + int32_t aOffset = -1, int32_t aCount = -1) const; + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + int32_t RFind(const self_type& aString, int32_t aOffset = -1, + int32_t aCount = -1) const; + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + int32_t RFind(const char_type* aString, int32_t aOffset = -1, + int32_t aCount = -1) const; + + /** + * Search for given char within this string + * + * @param aChar is the character to search for + * @param aOffset tells us where in this string to start searching + * @param aCount tells us how far from the offset we are to search. + * Use -1 to search the whole string. + * @return offset in string, or kNotFound + */ + + // int32_t FindChar( char16_t aChar, int32_t aOffset=0, + // int32_t aCount=-1 ) const; + int32_t RFindChar(char16_t aChar, int32_t aOffset = -1, + int32_t aCount = -1) const; + + /** + * This method searches this string for the first character found in + * the given string. + * + * @param aString contains set of chars to be found + * @param aOffset tells us where in this string to start searching + * (counting from left) + * @return offset in string, or kNotFound + */ + + int32_t FindCharInSet(const char_type* aString, int32_t aOffset = 0) const; + int32_t FindCharInSet(const self_type& aString, int32_t aOffset = 0) const { + return FindCharInSet(aString.get(), aOffset); + } + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + int32_t FindCharInSet(const char* aSet, int32_t aOffset = 0) const; + + /** + * This method searches this string for the last character found in + * the given string. + * + * @param aString contains set of chars to be found + * @param aOffset tells us where in this string to start searching + * (counting from left) + * @return offset in string, or kNotFound + */ + + int32_t RFindCharInSet(const char_type* aString, int32_t aOffset = -1) const; + int32_t RFindCharInSet(const self_type& aString, int32_t aOffset = -1) const { + return RFindCharInSet(aString.get(), aOffset); + } + + /** + * Perform string to double-precision float conversion. + * + * @param aErrorCode will contain error if one occurs + * @return double-precision float rep of string value + */ + double ToDouble(nsresult* aErrorCode) const; + + /** + * Perform string to single-precision float conversion. + * + * @param aErrorCode will contain error if one occurs + * @return single-precision float rep of string value + */ + float ToFloat(nsresult* aErrorCode) const; + + /** + * Similar to above ToDouble and ToFloat but allows trailing characters that + * are not converted. + */ + double ToDoubleAllowTrailingChars(nsresult* aErrorCode) const; + float ToFloatAllowTrailingChars(nsresult* aErrorCode) const; + + /** + * |Left|, |Mid|, and |Right| are annoying signatures that seem better almost + * any _other_ way than they are now. Consider these alternatives + * + * // ...a member function that returns a |Substring| + * aWritable = aReadable.Left(17); + * // ...a global function that returns a |Substring| + * aWritable = Left(aReadable, 17); + * // ...a global function that does the assignment + * Left(aReadable, 17, aWritable); + * + * as opposed to the current signature + * + * // ...a member function that does the assignment + * aReadable.Left(aWritable, 17); + * + * or maybe just stamping them out in favor of |Substring|, they are just + * duplicate functionality + * + * aWritable = Substring(aReadable, 0, 17); + */ + + size_type Mid(self_type& aResult, index_type aStartPos, + size_type aCount) const; + + size_type Left(self_type& aResult, size_type aCount) const { + return Mid(aResult, 0, aCount); + } + + size_type Right(self_type& aResult, size_type aCount) const { + aCount = XPCOM_MIN(this->mLength, aCount); + return Mid(aResult, this->mLength - aCount, aCount); + } + + /** + * Set a char inside this string at given index + * + * @param aChar is the char you want to write into this string + * @param anIndex is the ofs where you want to write the given char + * @return TRUE if successful + */ + + bool SetCharAt(char16_t aChar, uint32_t aIndex); + + /** + * These methods are used to remove all occurrences of the + * characters found in aSet from this string. + * + * @param aSet -- characters to be cut from this + */ + void StripChars(const char_type* aSet); + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + bool StripChars(const incompatible_char_type* aSet, const fallible_t&); + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void StripChars(const incompatible_char_type* aSet); + + /** + * This method strips whitespace throughout the string. + */ + void StripWhitespace(); + bool StripWhitespace(const fallible_t&); + + /** + * swaps occurence of 1 string for another + */ + + void ReplaceChar(char_type aOldChar, char_type aNewChar); + void ReplaceChar(const char_type* aSet, char_type aNewChar); + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void ReplaceChar(const char* aSet, char16_t aNewChar); + + /** + * Replace all occurrences of aTarget with aNewValue. + * The complexity of this function is O(n+m), n being the length of the string + * and m being the length of aNewValue. + */ + void ReplaceSubstring(const self_type& aTarget, const self_type& aNewValue); + void ReplaceSubstring(const char_type* aTarget, const char_type* aNewValue); + [[nodiscard]] bool ReplaceSubstring(const self_type& aTarget, + const self_type& aNewValue, + const fallible_t&); + [[nodiscard]] bool ReplaceSubstring(const char_type* aTarget, + const char_type* aNewValue, + const fallible_t&); + + /** + * This method trims characters found in aTrimSet from + * either end of the underlying string. + * + * @param aSet -- contains chars to be trimmed from both ends + * @param aEliminateLeading + * @param aEliminateTrailing + * @param aIgnoreQuotes -- if true, causes surrounding quotes to be ignored + * @return this + */ + void Trim(const char* aSet, bool aEliminateLeading = true, + bool aEliminateTrailing = true, bool aIgnoreQuotes = false); + + /** + * This method strips whitespace from string. + * You can control whether whitespace is yanked from start and end of + * string as well. + * + * @param aEliminateLeading controls stripping of leading ws + * @param aEliminateTrailing controls stripping of trailing ws + */ + void CompressWhitespace(bool aEliminateLeading = true, + bool aEliminateTrailing = true); + +#endif // !MOZ_STRING_WITH_OBSOLETE_API + + /** + * Allow this string to be bound to a character buffer + * until the string is rebound or mutated; the caller + * must ensure that the buffer outlives the string. + */ + void Rebind(const char_type* aData, size_type aLength); + + /** + * verify restrictions for dependent strings + */ + void AssertValidDependentString() { + MOZ_ASSERT(this->mData, "nsTDependentString must wrap a non-NULL buffer"); + MOZ_ASSERT(this->mLength != size_type(-1), + "nsTDependentString has bogus length"); + MOZ_DIAGNOSTIC_ASSERT(this->mData[substring_type::mLength] == 0, + "nsTDependentString must wrap only null-terminated " + "strings. You are probably looking for " + "nsTDependentSubstring."); + } + + protected: + // allow subclasses to initialize fields directly + nsTString(char_type* aData, size_type aLength, DataFlags aDataFlags, + ClassFlags aClassFlags) + : substring_type(aData, aLength, aDataFlags, + aClassFlags | ClassFlags::NULL_TERMINATED) {} + + friend const nsTString<char>& VoidCString(); + friend const nsTString<char16_t>& VoidString(); + + // Used by Null[C]String. + explicit nsTString(DataFlags aDataFlags) + : substring_type(char_traits::sEmptyBuffer, 0, + aDataFlags | DataFlags::TERMINATED, + ClassFlags::NULL_TERMINATED) {} + + enum class TrailingCharsPolicy { + Disallow, + Allow, + }; + // Utility function for ToDouble and ToDoubleAllowTrailingChars. + double ToDouble(TrailingCharsPolicy aTrailingCharsPolicy, + nsresult* aErrorCode) const; + + struct Segment { + uint32_t mBegin, mLength; + Segment(uint32_t aBegin, uint32_t aLength) + : mBegin(aBegin), mLength(aLength) {} + }; +}; + +// TODO(erahm): Do something with ToDouble so that we can extern the +// nsTString templates. +// extern template class nsTString<char>; +// extern template class nsTString<char16_t>; + +/** + * nsTAutoStringN + * + * Subclass of nsTString that adds support for stack-based string + * allocation. It is normally not a good idea to use this class on the + * heap, because it will allocate space which may be wasted if the string + * it contains is significantly smaller or any larger than 64 characters. + * + * NAMES: + * nsAutoStringN / nsTAutoString for wide characters + * nsAutoCStringN / nsTAutoCString for narrow characters + */ +template <typename T, size_t N> +class MOZ_NON_MEMMOVABLE nsTAutoStringN : public nsTString<T> { + public: + typedef nsTAutoStringN<T, N> self_type; + + typedef nsTString<T> base_string_type; + typedef typename base_string_type::string_type string_type; + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::char_traits char_traits; + typedef typename base_string_type::substring_type substring_type; + typedef typename base_string_type::size_type size_type; + typedef typename base_string_type::substring_tuple_type substring_tuple_type; + + // These are only for internal use within the string classes: + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + + public: + /** + * constructors + */ + + nsTAutoStringN() + : string_type(mStorage, 0, DataFlags::TERMINATED | DataFlags::INLINE, + ClassFlags::INLINE), + mInlineCapacity(N - 1) { + // null-terminate + mStorage[0] = char_type(0); + } + + explicit nsTAutoStringN(char_type aChar) : self_type() { + this->Assign(aChar); + } + + explicit nsTAutoStringN(const char_type* aData, + size_type aLength = size_type(-1)) + : self_type() { + this->Assign(aData, aLength); + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + explicit nsTAutoStringN(char16ptr_t aData, size_type aLength = size_type(-1)) + : self_type(static_cast<const char16_t*>(aData), aLength) {} +#endif + + nsTAutoStringN(const self_type& aStr) : self_type() { this->Assign(aStr); } + + nsTAutoStringN(self_type&& aStr) : self_type() { + this->Assign(std::move(aStr)); + } + + explicit nsTAutoStringN(const substring_type& aStr) : self_type() { + this->Assign(aStr); + } + + explicit nsTAutoStringN(substring_type&& aStr) : self_type() { + this->Assign(std::move(aStr)); + } + + MOZ_IMPLICIT nsTAutoStringN(const substring_tuple_type& aTuple) + : self_type() { + this->Assign(aTuple); + } + + // |operator=| does not inherit, so we must define our own + self_type& operator=(char_type aChar) { + this->Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) { + this->Assign(aData); + return *this; + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + self_type& operator=(char16ptr_t aStr) { + this->Assign(aStr); + return *this; + } +#endif + self_type& operator=(const self_type& aStr) { + this->Assign(aStr); + return *this; + } + self_type& operator=(self_type&& aStr) { + this->Assign(std::move(aStr)); + return *this; + } + self_type& operator=(const substring_type& aStr) { + this->Assign(aStr); + return *this; + } + self_type& operator=(substring_type&& aStr) { + this->Assign(std::move(aStr)); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) { + this->Assign(aTuple); + return *this; + } + + static const size_t kStorageSize = N; + + protected: + friend class nsTSubstring<T>; + + const size_type mInlineCapacity; + + private: + char_type mStorage[N]; +}; + +// Externs for the most common nsTAutoStringN variations. +extern template class nsTAutoStringN<char, 64>; +extern template class nsTAutoStringN<char16_t, 64>; + +// +// nsAutoString stores pointers into itself which are invalidated when an +// nsTArray is resized, so nsTArray must not be instantiated with nsAutoString +// elements! +// +template <class E> +class nsTArrayElementTraits; +template <typename T> +class nsTArrayElementTraits<nsTAutoString<T>> { + public: + template <class A> + struct Dont_Instantiate_nsTArray_of; + template <class A> + struct Instead_Use_nsTArray_of; + + static Dont_Instantiate_nsTArray_of<nsTAutoString<T>>* Construct( + Instead_Use_nsTArray_of<nsTString<T>>* aE) { + return 0; + } + template <class A> + static Dont_Instantiate_nsTArray_of<nsTAutoString<T>>* Construct( + Instead_Use_nsTArray_of<nsTString<T>>* aE, const A& aArg) { + return 0; + } + template <class... Args> + static Dont_Instantiate_nsTArray_of<nsTAutoString<T>>* Construct( + Instead_Use_nsTArray_of<nsTString<T>>* aE, Args&&... aArgs) { + return 0; + } + static Dont_Instantiate_nsTArray_of<nsTAutoString<T>>* Destruct( + Instead_Use_nsTArray_of<nsTString<T>>* aE) { + return 0; + } +}; + +/** + * getter_Copies support for adopting raw string out params that are + * heap-allocated, e.g.: + * + * char* gStr; + * void GetBlah(char** aStr) + * { + * *aStr = strdup(gStr); + * } + * + * // This works, but is clumsy. + * void Inelegant() + * { + * char* buf; + * GetBlah(&buf); + * nsCString str; + * str.Adopt(buf); + * // ... + * } + * + * // This is nicer. + * void Elegant() + * { + * nsCString str; + * GetBlah(getter_Copies(str)); + * // ... + * } + */ +template <typename T> +class MOZ_STACK_CLASS nsTGetterCopies { + public: + typedef T char_type; + + explicit nsTGetterCopies(nsTSubstring<T>& aStr) + : mString(aStr), mData(nullptr) {} + + ~nsTGetterCopies() { + mString.Adopt(mData); // OK if mData is null + } + + operator char_type**() { return &mData; } + + private: + nsTSubstring<T>& mString; + char_type* mData; +}; + +// See the comment above nsTGetterCopies_CharT for how to use this. +template <typename T> +inline nsTGetterCopies<T> getter_Copies(nsTSubstring<T>& aString) { + return nsTGetterCopies<T>(aString); +} + +#endif diff --git a/xpcom/string/nsTStringComparator.cpp b/xpcom/string/nsTStringComparator.cpp new file mode 100644 index 0000000000..02933c6769 --- /dev/null +++ b/xpcom/string/nsTStringComparator.cpp @@ -0,0 +1,63 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" + +template <typename T> +int NS_FASTCALL Compare(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs, + const nsTStringComparator<T> comp) { + typedef typename nsTSubstring<T>::size_type size_type; + typedef typename nsTSubstring<T>::const_iterator const_iterator; + + if (&aLhs == &aRhs) { + return 0; + } + + const_iterator leftIter, rightIter; + aLhs.BeginReading(leftIter); + aRhs.BeginReading(rightIter); + + size_type lLength = aLhs.Length(); + size_type rLength = aRhs.Length(); + size_type lengthToCompare = XPCOM_MIN(lLength, rLength); + + int result; + if ((result = comp(leftIter.get(), rightIter.get(), lengthToCompare, + lengthToCompare)) == 0) { + if (lLength < rLength) { + result = -1; + } else if (rLength < lLength) { + result = 1; + } else { + result = 0; + } + } + + return result; +} + +template int NS_FASTCALL Compare<char>( + mozilla::detail::nsTStringRepr<char> const&, + mozilla::detail::nsTStringRepr<char> const&, nsTStringComparator<char>); + +template int NS_FASTCALL +Compare<char16_t>(mozilla::detail::nsTStringRepr<char16_t> const&, + mozilla::detail::nsTStringRepr<char16_t> const&, + nsTStringComparator<char16_t>); + +template <typename T> +int nsTDefaultStringComparator(const T* aLhs, const T* aRhs, uint32_t aLLength, + uint32_t aRLength) { + return aLLength == aRLength ? nsCharTraits<T>::compare(aLhs, aRhs, aLLength) + : (aLLength > aRLength) ? 1 + : -1; +} + +template int nsTDefaultStringComparator(const char*, const char*, uint32_t, + uint32_t); +template int nsTDefaultStringComparator(const char16_t*, const char16_t*, + uint32_t, uint32_t); diff --git a/xpcom/string/nsTStringHasher.h b/xpcom/string/nsTStringHasher.h new file mode 100644 index 0000000000..7b3f42ba58 --- /dev/null +++ b/xpcom/string/nsTStringHasher.h @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTStringHasher_h___ +#define nsTStringHasher_h___ + +#include "mozilla/HashTable.h" // mozilla::{DefaultHasher, HashNumber, HashString} + +namespace mozilla { + +template <typename T> +struct DefaultHasher<nsTString<T>> { + using Key = nsTString<T>; + using Lookup = nsTString<T>; + + static mozilla::HashNumber hash(const Lookup& aLookup) { + return mozilla::HashString(aLookup.get()); + } + + static bool match(const Key& aKey, const Lookup& aLookup) { + return aKey.Equals(aLookup); + } +}; + +} // namespace mozilla + +#endif // !defined(nsTStringHasher_h___) diff --git a/xpcom/string/nsTStringObsolete.cpp b/xpcom/string/nsTStringObsolete.cpp new file mode 100644 index 0000000000..0e3501ac67 --- /dev/null +++ b/xpcom/string/nsTStringObsolete.cpp @@ -0,0 +1,442 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTArray.h" +#include "nsASCIIMask.h" +#include "mozilla/CheckedInt.h" + +/** + * nsTString::Find + * + * aOffset specifies starting index + * aCount specifies number of string compares (iterations) + */ +template <typename T> +int32_t nsTString<T>::Find(const nsTString<char>& aString, bool aIgnoreCase, + int32_t aOffset, int32_t aCount) const { + // this method changes the meaning of aOffset and aCount: + Find_ComputeSearchRange(this->mLength, aString.Length(), aOffset, aCount); + + int32_t result = FindSubstring(this->mData + aOffset, aCount, aString.get(), + aString.Length(), aIgnoreCase); + if (result != kNotFound) result += aOffset; + return result; +} + +template <typename T> +int32_t nsTString<T>::Find(const char* aString, bool aIgnoreCase, + int32_t aOffset, int32_t aCount) const { + return Find(nsTDependentString<char>(aString), aIgnoreCase, aOffset, aCount); +} + +/** + * nsTString::RFind + * + * aOffset specifies starting index + * aCount specifies number of string compares (iterations) + */ +template <typename T> +int32_t nsTString<T>::RFind(const nsTString<char>& aString, bool aIgnoreCase, + int32_t aOffset, int32_t aCount) const { + // this method changes the meaning of aOffset and aCount: + RFind_ComputeSearchRange(this->mLength, aString.Length(), aOffset, aCount); + + int32_t result = RFindSubstring(this->mData + aOffset, aCount, aString.get(), + aString.Length(), aIgnoreCase); + if (result != kNotFound) result += aOffset; + return result; +} + +template <typename T> +int32_t nsTString<T>::RFind(const char* aString, bool aIgnoreCase, + int32_t aOffset, int32_t aCount) const { + return RFind(nsTDependentString<char>(aString), aIgnoreCase, aOffset, aCount); +} + +/** + * nsTString::RFindChar + */ +template <typename T> +int32_t nsTString<T>::RFindChar(char16_t aChar, int32_t aOffset, + int32_t aCount) const { + return nsBufferRoutines<T>::rfind_char(this->mData, this->mLength, aOffset, + aChar, aCount); +} + +/** + * nsTString::FindCharInSet + */ + +template <typename T> +int32_t nsTString<T>::FindCharInSet(const char_type* aSet, + int32_t aOffset) const { + if (aOffset < 0) + aOffset = 0; + else if (aOffset >= int32_t(this->mLength)) + return kNotFound; + + int32_t result = + ::FindCharInSet(this->mData + aOffset, this->mLength - aOffset, aSet); + if (result != kNotFound) result += aOffset; + return result; +} + +/** + * nsTString::RFindCharInSet + */ + +template <typename T> +int32_t nsTString<T>::RFindCharInSet(const char_type* aSet, + int32_t aOffset) const { + // We want to pass a "data length" to ::RFindCharInSet + if (aOffset < 0 || aOffset > int32_t(this->mLength)) + aOffset = this->mLength; + else + ++aOffset; + + return ::RFindCharInSet(this->mData, aOffset, aSet); +} + +/** + * nsTString::Mid + */ + +template <typename T> +typename nsTString<T>::size_type nsTString<T>::Mid( + self_type& aResult, index_type aStartPos, size_type aLengthToCopy) const { + if (aStartPos == 0 && aLengthToCopy >= this->mLength) + aResult = *this; + else + aResult = Substring(*this, aStartPos, aLengthToCopy); + + return aResult.mLength; +} + +/** + * nsTString::SetCharAt + */ + +template <typename T> +bool nsTString<T>::SetCharAt(char16_t aChar, uint32_t aIndex) { + if (aIndex >= this->mLength) return false; + + if (!this->EnsureMutable()) this->AllocFailed(this->mLength); + + this->mData[aIndex] = char_type(aChar); + return true; +} + +/** + * nsTString::StripChars,StripChar,StripWhitespace + */ + +template <typename T> +template <typename Q, typename EnableIfChar16> +void nsTString<T>::StripChars(const incompatible_char_type* aSet) { + if (!StripChars(aSet, mozilla::fallible)) { + this->AllocFailed(this->mLength); + } +} + +template void nsTString<char16_t>::StripChars(const incompatible_char_type*); + +template <typename T> +template <typename Q, typename EnableIfChar16> +bool nsTString<T>::StripChars(const incompatible_char_type* aSet, + const fallible_t&) { + if (!this->EnsureMutable()) { + return false; + } + + this->mLength = + nsBufferRoutines<T>::strip_chars(this->mData, this->mLength, aSet); + return true; +} + +template bool nsTString<char16_t>::StripChars(const incompatible_char_type*, + const fallible_t&); + +template <typename T> +void nsTString<T>::StripChars(const char_type* aSet) { + nsTSubstring<T>::StripChars(aSet); +} + +template <typename T> +void nsTString<T>::StripWhitespace() { + if (!StripWhitespace(mozilla::fallible)) { + this->AllocFailed(this->mLength); + } +} + +template <typename T> +bool nsTString<T>::StripWhitespace(const fallible_t&) { + if (!this->EnsureMutable()) { + return false; + } + + this->StripTaggedASCII(mozilla::ASCIIMask::MaskWhitespace()); + return true; +} + +/** + * nsTString::ReplaceChar,ReplaceSubstring + */ + +template <typename T> +void nsTString<T>::ReplaceChar(char_type aOldChar, char_type aNewChar) { + if (!this->EnsureMutable()) // XXX do this lazily? + this->AllocFailed(this->mLength); + + for (uint32_t i = 0; i < this->mLength; ++i) { + if (this->mData[i] == aOldChar) this->mData[i] = aNewChar; + } +} + +template <typename T> +void nsTString<T>::ReplaceChar(const char_type* aSet, char_type aNewChar) { + if (!this->EnsureMutable()) // XXX do this lazily? + this->AllocFailed(this->mLength); + + char_type* data = this->mData; + uint32_t lenRemaining = this->mLength; + + while (lenRemaining) { + int32_t i = ::FindCharInSet(data, lenRemaining, aSet); + if (i == kNotFound) break; + + data[i++] = aNewChar; + data += i; + lenRemaining -= i; + } +} + +template void nsTString<char16_t>::ReplaceChar(const char*, char16_t); + +void ReleaseData(void* aData, nsAString::DataFlags aFlags); + +template <typename T> +void nsTString<T>::ReplaceSubstring(const char_type* aTarget, + const char_type* aNewValue) { + ReplaceSubstring(nsTDependentString<T>(aTarget), + nsTDependentString<T>(aNewValue)); +} + +template <typename T> +bool nsTString<T>::ReplaceSubstring(const char_type* aTarget, + const char_type* aNewValue, + const fallible_t& aFallible) { + return ReplaceSubstring(nsTDependentString<T>(aTarget), + nsTDependentString<T>(aNewValue), aFallible); +} + +template <typename T> +void nsTString<T>::ReplaceSubstring(const self_type& aTarget, + const self_type& aNewValue) { + if (!ReplaceSubstring(aTarget, aNewValue, mozilla::fallible)) { + // Note that this may wildly underestimate the allocation that failed, as + // we could have been replacing multiple copies of aTarget. + this->AllocFailed(this->mLength + (aNewValue.Length() - aTarget.Length())); + } +} + +template <typename T> +bool nsTString<T>::ReplaceSubstring(const self_type& aTarget, + const self_type& aNewValue, + const fallible_t&) { + if (aTarget.Length() == 0) return true; + + // Remember all of the non-matching parts. + AutoTArray<Segment, 16> nonMatching; + uint32_t i = 0; + mozilla::CheckedUint32 newLength; + while (true) { + int32_t r = FindSubstring(this->mData + i, this->mLength - i, + static_cast<const char_type*>(aTarget.Data()), + aTarget.Length(), false); + int32_t until = (r == kNotFound) ? this->mLength - i : r; + nonMatching.AppendElement(Segment(i, until)); + newLength += until; + if (r == kNotFound) { + break; + } + + newLength += aNewValue.Length(); + i += r + aTarget.Length(); + if (i >= this->mLength) { + // Add an auxiliary entry at the end of the list to help as an edge case + // for the algorithms below. + nonMatching.AppendElement(Segment(this->mLength, 0)); + break; + } + } + + if (!newLength.isValid()) { + return false; + } + + // If there's only one non-matching segment, then the target string was not + // found, and there's nothing to do. + if (nonMatching.Length() == 1) { + MOZ_ASSERT( + nonMatching[0].mBegin == 0 && nonMatching[0].mLength == this->mLength, + "We should have the correct non-matching segment."); + return true; + } + + // Make sure that we can mutate our buffer. + // Note that we always allocate at least an this->mLength sized buffer, + // because the rest of the algorithm relies on having access to all of the + // original string. In other words, we over-allocate in the shrinking case. + uint32_t oldLen = this->mLength; + mozilla::Result<uint32_t, nsresult> r = + this->StartBulkWriteImpl(XPCOM_MAX(oldLen, newLength.value()), oldLen); + if (r.isErr()) { + return false; + } + + if (aTarget.Length() >= aNewValue.Length()) { + // In the shrinking case, start filling the buffer from the beginning. + const uint32_t delta = (aTarget.Length() - aNewValue.Length()); + for (i = 1; i < nonMatching.Length(); ++i) { + // When we move the i'th non-matching segment into position, we need to + // account for the characters deleted by the previous |i| replacements by + // subtracting |i * delta|. + const char_type* sourceSegmentPtr = this->mData + nonMatching[i].mBegin; + char_type* destinationSegmentPtr = + this->mData + nonMatching[i].mBegin - i * delta; + // Write the i'th replacement immediately before the new i'th non-matching + // segment. + char_traits::copy(destinationSegmentPtr - aNewValue.Length(), + aNewValue.Data(), aNewValue.Length()); + char_traits::move(destinationSegmentPtr, sourceSegmentPtr, + nonMatching[i].mLength); + } + } else { + // In the growing case, start filling the buffer from the end. + const uint32_t delta = (aNewValue.Length() - aTarget.Length()); + for (i = nonMatching.Length() - 1; i > 0; --i) { + // When we move the i'th non-matching segment into position, we need to + // account for the characters added by the previous |i| replacements by + // adding |i * delta|. + const char_type* sourceSegmentPtr = this->mData + nonMatching[i].mBegin; + char_type* destinationSegmentPtr = + this->mData + nonMatching[i].mBegin + i * delta; + char_traits::move(destinationSegmentPtr, sourceSegmentPtr, + nonMatching[i].mLength); + // Write the i'th replacement immediately before the new i'th non-matching + // segment. + char_traits::copy(destinationSegmentPtr - aNewValue.Length(), + aNewValue.Data(), aNewValue.Length()); + } + } + + // Adjust the length and make sure the string is null terminated. + this->FinishBulkWriteImpl(newLength.value()); + + return true; +} + +/** + * nsTString::Trim + */ + +template <typename T> +void nsTString<T>::Trim(const char* aSet, bool aTrimLeading, bool aTrimTrailing, + bool aIgnoreQuotes) { + // the old implementation worried about aSet being null :-/ + if (!aSet) return; + + char_type* start = this->mData; + char_type* end = this->mData + this->mLength; + + // skip over quotes if requested + if (aIgnoreQuotes && this->mLength > 2 && + this->mData[0] == this->mData[this->mLength - 1] && + (this->mData[0] == '\'' || this->mData[0] == '"')) { + ++start; + --end; + } + + uint32_t setLen = nsCharTraits<char>::length(aSet); + + if (aTrimLeading) { + uint32_t cutStart = start - this->mData; + uint32_t cutLength = 0; + + // walk forward from start to end + for (; start != end; ++start, ++cutLength) { + int32_t pos = FindChar1(aSet, setLen, 0, *start, setLen); + if (pos == kNotFound) break; + } + + if (cutLength) { + this->Cut(cutStart, cutLength); + + // reset iterators + start = this->mData + cutStart; + end = this->mData + this->mLength - cutStart; + } + } + + if (aTrimTrailing) { + uint32_t cutEnd = end - this->mData; + uint32_t cutLength = 0; + + // walk backward from end to start + --end; + for (; end >= start; --end, ++cutLength) { + int32_t pos = FindChar1(aSet, setLen, 0, *end, setLen); + if (pos == kNotFound) break; + } + + if (cutLength) this->Cut(cutEnd - cutLength, cutLength); + } +} + +/** + * nsTString::CompressWhitespace. + */ + +template <typename T> +void nsTString<T>::CompressWhitespace(bool aTrimLeading, bool aTrimTrailing) { + // Quick exit + if (this->mLength == 0) { + return; + } + + if (!this->EnsureMutable()) this->AllocFailed(this->mLength); + + const ASCIIMaskArray& mask = mozilla::ASCIIMask::MaskWhitespace(); + + char_type* to = this->mData; + char_type* from = this->mData; + char_type* end = this->mData + this->mLength; + + // Compresses runs of whitespace down to a normal space ' ' and convert + // any whitespace to a normal space. This assumes that whitespace is + // all standard 7-bit ASCII. + bool skipWS = aTrimLeading; + while (from < end) { + uint32_t theChar = *from++; + if (mozilla::ASCIIMask::IsMasked(mask, theChar)) { + if (!skipWS) { + *to++ = ' '; + skipWS = true; + } + } else { + *to++ = theChar; + skipWS = false; + } + } + + // If we need to trim the trailing whitespace, back up one character. + if (aTrimTrailing && skipWS && to > this->mData) { + to--; + } + + *to = char_type(0); // add the null + this->mLength = to - this->mData; +} diff --git a/xpcom/string/nsTStringRepr.h b/xpcom/string/nsTStringRepr.h new file mode 100644 index 0000000000..f0a775c70d --- /dev/null +++ b/xpcom/string/nsTStringRepr.h @@ -0,0 +1,397 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTStringRepr_h +#define nsTStringRepr_h + +#include <type_traits> // std::enable_if + +#include "mozilla/Char16.h" +#include "mozilla/fallible.h" +#include "nsStringFlags.h" +#include "nsStringIterator.h" +#include "nsCharTraits.h" + +template <typename T> +class nsTSubstringTuple; + +namespace mozilla { + +// This is mainly intended to be used in the context of nsTStrings where +// we want to enable a specific function only for a given character class. In +// order for this technique to work the member function needs to be templated +// on something other than `T`. We keep this in the `mozilla` namespace rather +// than `nsTStringRepr` as it's intentionally not dependent on `T`. +// +// The 'T' at the end of `Char[16]OnlyT` is refering to the `::type` portion +// which will only be defined if the character class is correct. This is similar +// to `std::enable_if_t` which is available in C++14, but not C++11. +// +// `CharType` is generally going to be a shadowed type of `T`. +// +// Example usage of a function that will only be defined if `T` == `char`: +// +// template <typename T> +// class nsTSubstring : public nsTStringRepr<T> { +// template <typename Q = T, typename EnableForChar = typename CharOnlyT<Q>> +// int Foo() { return 42; } +// }; +// +// Please note that we had to use a separate type `Q` for this to work. You +// will get a semi-decent compiler error if you use `T` directly. + +template <typename CharType> +using CharOnlyT = + typename std::enable_if<std::is_same<char, CharType>::value>::type; + +template <typename CharType> +using Char16OnlyT = + typename std::enable_if<std::is_same<char16_t, CharType>::value>::type; + +namespace detail { + +// nsTStringRepr defines a string's memory layout and some accessor methods. +// This class exists so that nsTLiteralString can avoid inheriting +// nsTSubstring's destructor. All methods on this class must be const because +// literal strings are not writable. +// +// This class is an implementation detail and should not be instantiated +// directly, nor used in any way outside of the string code itself. It is +// buried in a namespace to discourage its use in function parameters. +// If you need to take a parameter, use [const] ns[C]Substring&. +// If you need to instantiate a string, use ns[C]String or descendents. +// +// NAMES: +// nsStringRepr for wide characters +// nsCStringRepr for narrow characters +template <typename T> +class nsTStringRepr { + public: + typedef mozilla::fallible_t fallible_t; + + typedef T char_type; + + typedef nsCharTraits<char_type> char_traits; + typedef typename char_traits::incompatible_char_type incompatible_char_type; + + typedef nsTStringRepr<T> self_type; + typedef self_type base_string_type; + + typedef nsTSubstring<T> substring_type; + typedef nsTSubstringTuple<T> substring_tuple_type; + + typedef nsReadingIterator<char_type> const_iterator; + typedef char_type* iterator; + + typedef nsTStringComparator<char_type> comparator_type; + + typedef const char_type* const_char_iterator; + + typedef uint32_t index_type; + typedef uint32_t size_type; + + // These are only for internal use within the string classes: + typedef StringDataFlags DataFlags; + typedef StringClassFlags ClassFlags; + + // Reading iterators. + constexpr const_char_iterator BeginReading() const { return mData; } + constexpr const_char_iterator EndReading() const { return mData + mLength; } + + // Deprecated reading iterators. + const_iterator& BeginReading(const_iterator& aIter) const { + aIter.mStart = mData; + aIter.mEnd = mData + mLength; + aIter.mPosition = aIter.mStart; + return aIter; + } + + const_iterator& EndReading(const_iterator& aIter) const { + aIter.mStart = mData; + aIter.mEnd = mData + mLength; + aIter.mPosition = aIter.mEnd; + return aIter; + } + + const_char_iterator& BeginReading(const_char_iterator& aIter) const { + return aIter = mData; + } + + const_char_iterator& EndReading(const_char_iterator& aIter) const { + return aIter = mData + mLength; + } + + // Accessors. + template <typename U, typename Dummy> + struct raw_type { + typedef const U* type; + }; +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Dummy> + struct raw_type<char16_t, Dummy> { + typedef char16ptr_t type; + }; +#endif + + // Returns pointer to string data (not necessarily null-terminated) + constexpr const typename raw_type<T, int>::type Data() const { return mData; } + + constexpr size_type Length() const { return mLength; } + + constexpr DataFlags GetDataFlags() const { return mDataFlags; } + + constexpr bool IsEmpty() const { return mLength == 0; } + + constexpr bool IsLiteral() const { + return !!(mDataFlags & DataFlags::LITERAL); + } + + constexpr bool IsVoid() const { return !!(mDataFlags & DataFlags::VOIDED); } + + constexpr bool IsTerminated() const { + return !!(mDataFlags & DataFlags::TERMINATED); + } + + constexpr char_type CharAt(index_type aIndex) const { + NS_ASSERTION(aIndex < mLength, "index exceeds allowable range"); + return mData[aIndex]; + } + + constexpr char_type operator[](index_type aIndex) const { + return CharAt(aIndex); + } + + char_type First() const; + + char_type Last() const; + + size_type NS_FASTCALL CountChar(char_type) const; + int32_t NS_FASTCALL FindChar(char_type, index_type aOffset = 0) const; + + bool Contains(char_type aChar) const; + + // Equality. + bool NS_FASTCALL Equals(const self_type&) const; + bool NS_FASTCALL Equals(const self_type&, comparator_type) const; + + bool NS_FASTCALL Equals(const substring_tuple_type& aTuple) const; + bool NS_FASTCALL Equals(const substring_tuple_type& aTuple, + comparator_type) const; + + bool NS_FASTCALL Equals(const char_type* aData) const; + bool NS_FASTCALL Equals(const char_type* aData, comparator_type) const; + + /** + * Compares a given string to this string. + * + * @param aString is the string to be compared + * @param aIgnoreCase tells us how to treat case + * @param aCount tells us how many chars to compare + * @return -1,0,1 + */ + template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> + int32_t Compare(const char_type* aString, bool aIgnoreCase = false, + int32_t aCount = -1) const; + + /** + * Equality check between given string and this string. + * + * @param aString is the string to check + * @param aIgnoreCase tells us how to treat case + * @param aCount tells us how many chars to compare + * @return boolean + */ + template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> + bool EqualsIgnoreCase(const char_type* aString, int32_t aCount = -1) const { + return Compare(aString, true, aCount) == 0; + } + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + bool EqualsIgnoreCase(const incompatible_char_type* aString, + int32_t aCount = -1) const; + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = Char16OnlyT<Q>> + bool NS_FASTCALL Equals(char16ptr_t aData) const { + return Equals(static_cast<const char16_t*>(aData)); + } + template <typename Q = T, typename EnableIfChar16 = Char16OnlyT<Q>> + bool NS_FASTCALL Equals(char16ptr_t aData, comparator_type aComp) const { + return Equals(static_cast<const char16_t*>(aData), aComp); + } +#endif + + // An efficient comparison with ASCII that can be used even + // for wide strings. Call this version when you know the + // length of 'data'. + bool NS_FASTCALL EqualsASCII(const char* aData, size_type aLen) const; + // An efficient comparison with ASCII that can be used even + // for wide strings. Call this version when 'data' is + // null-terminated. + bool NS_FASTCALL EqualsASCII(const char* aData) const; + + // An efficient comparison with Latin1 characters that can be used even for + // wide strings. + bool EqualsLatin1(const char* aData, size_type aLength) const; + + // EqualsLiteral must ONLY be called with an actual literal string, or + // a char array *constant* declared without an explicit size and with an + // initializer that is a string literal or is otherwise null-terminated. + // Use EqualsASCII for other char array variables. + // (Although this method may happen to produce expected results for other + // char arrays that have bound one greater than the sequence of interest, + // such use is discouraged for reasons of readability and maintainability.) + // The template trick to acquire the array bound at compile time without + // using a macro is due to Corey Kosak, with much thanks. + template <int N> + inline bool EqualsLiteral(const char (&aStr)[N]) const { + return EqualsASCII(aStr, N - 1); + } + + // EqualsLiteral must ONLY be called with an actual literal string, or + // a char array *constant* declared without an explicit size and with an + // initializer that is a string literal or is otherwise null-terminated. + // Use EqualsASCII for other char array variables. + // (Although this method may happen to produce expected results for other + // char arrays that have bound one greater than the sequence of interest, + // such use is discouraged for reasons of readability and maintainability.) + // The template trick to acquire the array bound at compile time without + // using a macro is due to Corey Kosak, with much thanks. + template <size_t N, typename = std::enable_if_t<!std::is_same_v< + const char (&)[N], const char_type (&)[N]>>> + inline bool EqualsLiteral(const char_type (&aStr)[N]) const { + return *this == nsTLiteralString<char_type>(aStr); + } + + // The LowerCaseEquals methods compare the ASCII-lowercase version of + // this string (lowercasing only ASCII uppercase characters) to some + // ASCII/Literal string. The ASCII string is *not* lowercased for + // you. If you compare to an ASCII or literal string that contains an + // uppercase character, it is guaranteed to return false. We will + // throw assertions too. + bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData, + size_type aLen) const; + bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData) const; + + // LowerCaseEqualsLiteral must ONLY be called with an actual literal string, + // or a char array *constant* declared without an explicit size and with an + // initializer that is a string literal or is otherwise null-terminated. + // Use LowerCaseEqualsASCII for other char array variables. + // (Although this method may happen to produce expected results for other + // char arrays that have bound one greater than the sequence of interest, + // such use is discouraged for reasons of readability and maintainability.) + template <int N> + bool LowerCaseEqualsLiteral(const char (&aStr)[N]) const { + return LowerCaseEqualsASCII(aStr, N - 1); + } + + // Returns true if this string overlaps with the given string fragment. + bool IsDependentOn(const char_type* aStart, const char_type* aEnd) const { + // If it _isn't_ the case that one fragment starts after the other ends, + // or ends before the other starts, then, they conflict: + // + // !(f2.begin >= f1.aEnd || f2.aEnd <= f1.begin) + // + // Simplified, that gives us (To avoid relying on Undefined Behavior + // from comparing pointers from different allocations (which in + // principle gives the optimizer the permission to assume elsewhere + // that the pointers are from the same allocation), the comparisons + // are done on integers, which merely relies on implementation-defined + // behavior of converting pointers to integers. std::less and + // std::greater implementations don't actually provide the guarantees + // that they should.): + return (reinterpret_cast<uintptr_t>(aStart) < + reinterpret_cast<uintptr_t>(mData + mLength) && + reinterpret_cast<uintptr_t>(aEnd) > + reinterpret_cast<uintptr_t>(mData)); + } + + protected: + nsTStringRepr() = delete; // Never instantiate directly + + constexpr nsTStringRepr(char_type* aData, size_type aLength, + DataFlags aDataFlags, ClassFlags aClassFlags) + : mData(aData), + mLength(aLength), + mDataFlags(aDataFlags), + mClassFlags(aClassFlags) {} + + char_type* mData; + size_type mLength; + DataFlags mDataFlags; + ClassFlags const mClassFlags; +}; + +extern template class nsTStringRepr<char>; +extern template class nsTStringRepr<char16_t>; + +} // namespace detail +} // namespace mozilla + +template <typename T> +int NS_FASTCALL Compare(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs, + nsTStringComparator<T> = nsTDefaultStringComparator<T>); + +extern template int NS_FASTCALL Compare<char>( + const mozilla::detail::nsTStringRepr<char>&, + const mozilla::detail::nsTStringRepr<char>&, nsTStringComparator<char>); + +extern template int NS_FASTCALL +Compare<char16_t>(const mozilla::detail::nsTStringRepr<char16_t>&, + const mozilla::detail::nsTStringRepr<char16_t>&, + nsTStringComparator<char16_t>); + +template <typename T> +inline constexpr bool operator!=( + const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return !aLhs.Equals(aRhs); +} + +template <typename T> +inline constexpr bool operator!=(const mozilla::detail::nsTStringRepr<T>& aLhs, + const T* aRhs) { + return !aLhs.Equals(aRhs); +} + +template <typename T> +inline bool operator<(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return Compare(aLhs, aRhs) < 0; +} + +template <typename T> +inline bool operator<=(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return Compare(aLhs, aRhs) <= 0; +} + +template <typename T> +inline bool operator==(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return aLhs.Equals(aRhs); +} + +template <typename T> +inline bool operator==(const mozilla::detail::nsTStringRepr<T>& aLhs, + const T* aRhs) { + return aLhs.Equals(aRhs); +} + +template <typename T> +inline bool operator>=(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return Compare(aLhs, aRhs) >= 0; +} + +template <typename T> +inline bool operator>(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return Compare(aLhs, aRhs) > 0; +} + +#endif diff --git a/xpcom/string/nsTSubstring.cpp b/xpcom/string/nsTSubstring.cpp new file mode 100644 index 0000000000..fc31e9feb1 --- /dev/null +++ b/xpcom/string/nsTSubstring.cpp @@ -0,0 +1,1543 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "double-conversion/double-conversion.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/Printf.h" +#include "mozilla/ResultExtensions.h" + +#include "nsASCIIMask.h" + +// It's not worthwhile to reallocate the buffer and memcpy the +// contents over when the size difference isn't large. With +// power-of-two allocation buckets and 64 as the typical inline +// capacity, considering that above 1000 there performance aspects +// of realloc and memcpy seem to be absorbed, relative to the old +// code, by the performance benefits of the new code being exact, +// we need to choose which transitions of 256 to 128, 512 to 256 +// and 1024 to 512 to allow. As a guess, let's pick the middle +// one as the the largest potential transition that we forgo. So +// we'll shrink from 1024 bucket to 512 bucket but not from 512 +// bucket to 256 bucket. We'll decide by comparing the difference +// of capacities. As bucket differences, the differences are 256 +// and 512. Since the capacities have various overheads, we +// can't compare with 256 or 512 exactly but it's easier to +// compare to some number that's between the two, so it's +// far away from either to ignore the overheads. +const uint32_t kNsStringBufferShrinkingThreshold = 384; + +using double_conversion::DoubleToStringConverter; + +template <typename T> +const typename nsTSubstring<T>::size_type nsTSubstring<T>::kMaxCapacity = + (nsTSubstring<T>::size_type(-1) / 2 - sizeof(nsStringBuffer)) / + sizeof(nsTSubstring<T>::char_type) - + 2; + +#ifdef XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE +template <typename T> +nsTSubstring<T>::nsTSubstring(char_type* aData, size_type aLength, + DataFlags aDataFlags, ClassFlags aClassFlags) + : ::mozilla::detail::nsTStringRepr<T>(aData, aLength, aDataFlags, + aClassFlags) { + AssertValid(); + MOZ_RELEASE_ASSERT(CheckCapacity(aLength), "String is too large."); + + if (aDataFlags & DataFlags::OWNED) { + STRING_STAT_INCREMENT(Adopt); + MOZ_LOG_CTOR(this->mData, "StringAdopt", 1); + } +} +#endif /* XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE */ + +/** + * helper function for down-casting a nsTSubstring to an nsTAutoString. + */ +template <typename T> +inline const nsTAutoString<T>* AsAutoString(const nsTSubstring<T>* aStr) { + return static_cast<const nsTAutoString<T>*>(aStr); +} + +template <typename T> +mozilla::Result<mozilla::BulkWriteHandle<T>, nsresult> +nsTSubstring<T>::BulkWrite(size_type aCapacity, size_type aPrefixToPreserve, + bool aAllowShrinking) { + auto r = StartBulkWriteImpl(aCapacity, aPrefixToPreserve, aAllowShrinking); + if (MOZ_UNLIKELY(r.isErr())) { + return r.propagateErr(); + } + return mozilla::BulkWriteHandle<T>(this, r.unwrap()); +} + +template <typename T> +mozilla::Result<uint32_t, nsresult> nsTSubstring<T>::StartBulkWriteImpl( + size_type aCapacity, size_type aPrefixToPreserve, bool aAllowShrinking, + size_type aSuffixLength, size_type aOldSuffixStart, + size_type aNewSuffixStart) { + // Note! Capacity does not include room for the terminating null char. + + MOZ_ASSERT(aPrefixToPreserve <= aCapacity, + "Requested preservation of an overlong prefix."); + MOZ_ASSERT(aNewSuffixStart + aSuffixLength <= aCapacity, + "Requesed move of suffix to out-of-bounds location."); + // Can't assert aOldSuffixStart, because mLength may not be valid anymore, + // since this method allows itself to be called more than once. + + // If zero capacity is requested, set the string to the special empty + // string. + if (MOZ_UNLIKELY(!aCapacity)) { + ::ReleaseData(this->mData, this->mDataFlags); + SetToEmptyBuffer(); + return 0; + } + + // Note! Capacity() returns 0 when the string is immutable. + const size_type curCapacity = Capacity(); + + bool shrinking = false; + + // We've established that aCapacity > 0. + // |curCapacity == 0| means that the buffer is immutable or 0-sized, so we + // need to allocate a new buffer. We cannot use the existing buffer even + // though it might be large enough. + + if (aCapacity <= curCapacity) { + if (aAllowShrinking) { + shrinking = true; + } else { + char_traits::move(this->mData + aNewSuffixStart, + this->mData + aOldSuffixStart, aSuffixLength); + if (aSuffixLength) { + char_traits::uninitialize( + this->mData + aPrefixToPreserve, + XPCOM_MIN(size_t(aNewSuffixStart - aPrefixToPreserve), + kNsStringBufferMaxPoison)); + char_traits::uninitialize( + this->mData + aNewSuffixStart + aSuffixLength, + XPCOM_MIN(size_t(curCapacity + 1 - aNewSuffixStart - aSuffixLength), + kNsStringBufferMaxPoison)); + } else { + char_traits::uninitialize( + this->mData + aPrefixToPreserve, + XPCOM_MIN(size_t(curCapacity + 1 - aPrefixToPreserve), + kNsStringBufferMaxPoison)); + } + return curCapacity; + } + } + + char_type* oldData = this->mData; + DataFlags oldFlags = this->mDataFlags; + + char_type* newData; + DataFlags newDataFlags; + size_type newCapacity; + + // If this is an nsTAutoStringN, it's possible that we can use the inline + // buffer. + if ((this->mClassFlags & ClassFlags::INLINE) && + (aCapacity <= AsAutoString(this)->mInlineCapacity)) { + newCapacity = AsAutoString(this)->mInlineCapacity; + newData = (char_type*)AsAutoString(this)->mStorage; + newDataFlags = DataFlags::TERMINATED | DataFlags::INLINE; + } else { + // If |aCapacity > kMaxCapacity|, then our doubling algorithm may not be + // able to allocate it. Just bail out in cases like that. We don't want + // to be allocating 2GB+ strings anyway. + static_assert((sizeof(nsStringBuffer) & 0x1) == 0, + "bad size for nsStringBuffer"); + if (MOZ_UNLIKELY(!CheckCapacity(aCapacity))) { + return mozilla::Err(NS_ERROR_OUT_OF_MEMORY); + } + + // We increase our capacity so that the allocated buffer grows + // exponentially, which gives us amortized O(1) appending. Below the + // threshold, we use powers-of-two. Above the threshold, we grow by at + // least 1.125, rounding up to the nearest MiB. + const size_type slowGrowthThreshold = 8 * 1024 * 1024; + + // nsStringBuffer allocates sizeof(nsStringBuffer) + passed size, and + // storageSize below wants extra 1 * sizeof(char_type). + const size_type neededExtraSpace = + sizeof(nsStringBuffer) / sizeof(char_type) + 1; + + size_type temp; + if (aCapacity >= slowGrowthThreshold) { + size_type minNewCapacity = + curCapacity + (curCapacity >> 3); // multiply by 1.125 + temp = XPCOM_MAX(aCapacity, minNewCapacity) + neededExtraSpace; + + // Round up to the next multiple of MiB, but ensure the expected + // capacity doesn't include the extra space required by nsStringBuffer + // and null-termination. + const size_t MiB = 1 << 20; + temp = (MiB * ((temp + MiB - 1) / MiB)) - neededExtraSpace; + } else { + // Round up to the next power of two. + temp = + mozilla::RoundUpPow2(aCapacity + neededExtraSpace) - neededExtraSpace; + } + + newCapacity = XPCOM_MIN(temp, kMaxCapacity); + MOZ_ASSERT(newCapacity >= aCapacity, + "should have hit the early return at the top"); + // Avoid shrinking if the new buffer size is close to the old. Note that + // unsigned underflow is defined behavior. + if ((curCapacity - newCapacity) <= kNsStringBufferShrinkingThreshold && + (this->mDataFlags & DataFlags::REFCOUNTED)) { + MOZ_ASSERT(aAllowShrinking, "How come we didn't return earlier?"); + // We're already close enough to the right size. + newData = oldData; + newCapacity = curCapacity; + } else { + size_type storageSize = (newCapacity + 1) * sizeof(char_type); + // Since we allocate only by powers of 2 we always fit into a full + // mozjemalloc bucket, it's not useful to use realloc, which may spend + // time uselessly copying too much. + nsStringBuffer* newHdr = nsStringBuffer::Alloc(storageSize).take(); + if (newHdr) { + newData = (char_type*)newHdr->Data(); + } else if (shrinking) { + // We're still in a consistent state. + // + // Since shrinking is just a memory footprint optimization, we + // don't propagate OOM if we tried to shrink in order to avoid + // OOM crashes from infallible callers. If we're lucky, soon enough + // a fallible caller reaches OOM and is able to deal or we end up + // disposing of this string before reaching OOM again. + newData = oldData; + newCapacity = curCapacity; + } else { + return mozilla::Err(NS_ERROR_OUT_OF_MEMORY); + } + } + newDataFlags = DataFlags::TERMINATED | DataFlags::REFCOUNTED; + } + + this->mData = newData; + this->mDataFlags = newDataFlags; + + if (oldData == newData) { + char_traits::move(newData + aNewSuffixStart, oldData + aOldSuffixStart, + aSuffixLength); + if (aSuffixLength) { + char_traits::uninitialize( + this->mData + aPrefixToPreserve, + XPCOM_MIN(size_t(aNewSuffixStart - aPrefixToPreserve), + kNsStringBufferMaxPoison)); + char_traits::uninitialize( + this->mData + aNewSuffixStart + aSuffixLength, + XPCOM_MIN(size_t(newCapacity + 1 - aNewSuffixStart - aSuffixLength), + kNsStringBufferMaxPoison)); + } else { + char_traits::uninitialize( + this->mData + aPrefixToPreserve, + XPCOM_MIN(size_t(newCapacity + 1 - aPrefixToPreserve), + kNsStringBufferMaxPoison)); + } + } else { + char_traits::copy(newData, oldData, aPrefixToPreserve); + char_traits::copy(newData + aNewSuffixStart, oldData + aOldSuffixStart, + aSuffixLength); + ::ReleaseData(oldData, oldFlags); + } + + return newCapacity; +} + +template <typename T> +void nsTSubstring<T>::FinishBulkWriteImpl(size_type aLength) { + MOZ_ASSERT(aLength != UINT32_MAX, "OOM magic value passed as length."); + if (aLength) { + FinishBulkWriteImplImpl(aLength); + } else { + ::ReleaseData(this->mData, this->mDataFlags); + SetToEmptyBuffer(); + } + AssertValid(); +} + +template <typename T> +void nsTSubstring<T>::Finalize() { + ::ReleaseData(this->mData, this->mDataFlags); + // this->mData, this->mLength, and this->mDataFlags are purposefully left + // dangling +} + +template <typename T> +bool nsTSubstring<T>::ReplacePrep(index_type aCutStart, size_type aCutLength, + size_type aNewLength) { + aCutLength = XPCOM_MIN(aCutLength, this->mLength - aCutStart); + + mozilla::CheckedInt<size_type> newTotalLen = this->mLength; + newTotalLen += aNewLength; + newTotalLen -= aCutLength; + if (!newTotalLen.isValid()) { + return false; + } + + if (aCutStart == this->mLength && Capacity() > newTotalLen.value()) { + this->mDataFlags &= ~DataFlags::VOIDED; + this->mData[newTotalLen.value()] = char_type(0); + this->mLength = newTotalLen.value(); + return true; + } + + return ReplacePrepInternal(aCutStart, aCutLength, aNewLength, + newTotalLen.value()); +} + +template <typename T> +bool nsTSubstring<T>::ReplacePrepInternal(index_type aCutStart, + size_type aCutLen, size_type aFragLen, + size_type aNewLen) { + size_type newSuffixStart = aCutStart + aFragLen; + size_type oldSuffixStart = aCutStart + aCutLen; + size_type suffixLength = this->mLength - oldSuffixStart; + + mozilla::Result<uint32_t, nsresult> r = StartBulkWriteImpl( + aNewLen, aCutStart, false, suffixLength, oldSuffixStart, newSuffixStart); + if (r.isErr()) { + return false; + } + FinishBulkWriteImpl(aNewLen); + return true; +} + +template <typename T> +typename nsTSubstring<T>::size_type nsTSubstring<T>::Capacity() const { + // return 0 to indicate an immutable or 0-sized buffer + + size_type capacity; + if (this->mDataFlags & DataFlags::REFCOUNTED) { + // if the string is readonly, then we pretend that it has no capacity. + nsStringBuffer* hdr = nsStringBuffer::FromData(this->mData); + if (hdr->IsReadonly()) { + capacity = 0; + } else { + capacity = (hdr->StorageSize() / sizeof(char_type)) - 1; + } + } else if (this->mDataFlags & DataFlags::INLINE) { + MOZ_ASSERT(this->mClassFlags & ClassFlags::INLINE); + capacity = AsAutoString(this)->mInlineCapacity; + } else if (this->mDataFlags & DataFlags::OWNED) { + // we don't store the capacity of an adopted buffer because that would + // require an additional member field. the best we can do is base the + // capacity on our length. remains to be seen if this is the right + // trade-off. + capacity = this->mLength; + } else { + capacity = 0; + } + + return capacity; +} + +template <typename T> +bool nsTSubstring<T>::EnsureMutable(size_type aNewLen) { + if (aNewLen == size_type(-1) || aNewLen == this->mLength) { + if (this->mDataFlags & (DataFlags::INLINE | DataFlags::OWNED)) { + return true; + } + if ((this->mDataFlags & DataFlags::REFCOUNTED) && + !nsStringBuffer::FromData(this->mData)->IsReadonly()) { + return true; + } + + aNewLen = this->mLength; + } + return SetLength(aNewLen, mozilla::fallible); +} + +// --------------------------------------------------------------------------- + +// This version of Assign is optimized for single-character assignment. +template <typename T> +void nsTSubstring<T>::Assign(char_type aChar) { + if (MOZ_UNLIKELY(!Assign(aChar, mozilla::fallible))) { + AllocFailed(1); + } +} + +template <typename T> +bool nsTSubstring<T>::Assign(char_type aChar, const fallible_t&) { + auto r = StartBulkWriteImpl(1, 0, true); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + *this->mData = aChar; + FinishBulkWriteImpl(1); + return true; +} + +template <typename T> +void nsTSubstring<T>::Assign(const char_type* aData, size_type aLength) { + if (MOZ_UNLIKELY(!Assign(aData, aLength, mozilla::fallible))) { + AllocFailed(aLength == size_type(-1) ? char_traits::length(aData) + : aLength); + } +} + +template <typename T> +bool nsTSubstring<T>::Assign(const char_type* aData, + const fallible_t& aFallible) { + return Assign(aData, size_type(-1), aFallible); +} + +template <typename T> +bool nsTSubstring<T>::Assign(const char_type* aData, size_type aLength, + const fallible_t& aFallible) { + if (!aData || aLength == 0) { + Truncate(); + return true; + } + + if (MOZ_UNLIKELY(aLength == size_type(-1))) { + aLength = char_traits::length(aData); + } + + if (MOZ_UNLIKELY(this->IsDependentOn(aData, aData + aLength))) { + return Assign(string_type(aData, aLength), aFallible); + } + + auto r = StartBulkWriteImpl(aLength, 0, true); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + char_traits::copy(this->mData, aData, aLength); + FinishBulkWriteImpl(aLength); + return true; +} + +template <typename T> +void nsTSubstring<T>::AssignASCII(const char* aData, size_type aLength) { + if (MOZ_UNLIKELY(!AssignASCII(aData, aLength, mozilla::fallible))) { + AllocFailed(aLength); + } +} + +template <typename T> +bool nsTSubstring<T>::AssignASCII(const char* aData, size_type aLength, + const fallible_t& aFallible) { + MOZ_ASSERT(aLength != size_type(-1)); + + // A Unicode string can't depend on an ASCII string buffer, + // so this dependence check only applies to CStrings. +#ifdef CharT_is_char + if (this->IsDependentOn(aData, aData + aLength)) { + return Assign(string_type(aData, aLength), aFallible); + } +#endif + + auto r = StartBulkWriteImpl(aLength, 0, true); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + char_traits::copyASCII(this->mData, aData, aLength); + FinishBulkWriteImpl(aLength); + return true; +} + +template <typename T> +void nsTSubstring<T>::AssignLiteral(const char_type* aData, size_type aLength) { + ::ReleaseData(this->mData, this->mDataFlags); + SetData(const_cast<char_type*>(aData), aLength, + DataFlags::TERMINATED | DataFlags::LITERAL); +} + +template <typename T> +void nsTSubstring<T>::Assign(const self_type& aStr) { + if (!Assign(aStr, mozilla::fallible)) { + AllocFailed(aStr.Length()); + } +} + +template <typename T> +bool nsTSubstring<T>::Assign(const self_type& aStr, + const fallible_t& aFallible) { + // |aStr| could be sharable. We need to check its flags to know how to + // deal with it. + + if (&aStr == this) { + return true; + } + + if (!aStr.mLength) { + Truncate(); + this->mDataFlags |= aStr.mDataFlags & DataFlags::VOIDED; + return true; + } + + if (aStr.mDataFlags & DataFlags::REFCOUNTED) { + // nice! we can avoid a string copy :-) + + // |aStr| should be null-terminated + NS_ASSERTION(aStr.mDataFlags & DataFlags::TERMINATED, + "shared, but not terminated"); + + ::ReleaseData(this->mData, this->mDataFlags); + + SetData(aStr.mData, aStr.mLength, + DataFlags::TERMINATED | DataFlags::REFCOUNTED); + + // get an owning reference to the this->mData + nsStringBuffer::FromData(this->mData)->AddRef(); + return true; + } else if (aStr.mDataFlags & DataFlags::LITERAL) { + MOZ_ASSERT(aStr.mDataFlags & DataFlags::TERMINATED, "Unterminated literal"); + + AssignLiteral(aStr.mData, aStr.mLength); + return true; + } + + // else, treat this like an ordinary assignment. + return Assign(aStr.Data(), aStr.Length(), aFallible); +} + +template <typename T> +void nsTSubstring<T>::Assign(self_type&& aStr) { + if (!Assign(std::move(aStr), mozilla::fallible)) { + AllocFailed(aStr.Length()); + } +} + +template <typename T> +void nsTSubstring<T>::AssignOwned(self_type&& aStr) { + NS_ASSERTION(aStr.mDataFlags & (DataFlags::REFCOUNTED | DataFlags::OWNED), + "neither shared nor owned"); + + // If they have a REFCOUNTED or OWNED buffer, we can avoid a copy - so steal + // their buffer and reset them to the empty string. + + // |aStr| should be null-terminated + NS_ASSERTION(aStr.mDataFlags & DataFlags::TERMINATED, + "shared or owned, but not terminated"); + + ::ReleaseData(this->mData, this->mDataFlags); + + SetData(aStr.mData, aStr.mLength, aStr.mDataFlags); + aStr.SetToEmptyBuffer(); +} + +template <typename T> +bool nsTSubstring<T>::Assign(self_type&& aStr, const fallible_t& aFallible) { + // We're moving |aStr| in this method, so we need to try to steal the data, + // and in the fallback perform a copy-assignment followed by a truncation of + // the original string. + + if (&aStr == this) { + NS_WARNING("Move assigning a string to itself?"); + return true; + } + + if (aStr.mDataFlags & (DataFlags::REFCOUNTED | DataFlags::OWNED)) { + AssignOwned(std::move(aStr)); + return true; + } + + // Otherwise treat this as a normal assignment, and truncate the moved string. + // We don't truncate the source string if the allocation failed. + if (!Assign(aStr, aFallible)) { + return false; + } + aStr.Truncate(); + return true; +} + +template <typename T> +void nsTSubstring<T>::Assign(const substring_tuple_type& aTuple) { + if (!Assign(aTuple, mozilla::fallible)) { + AllocFailed(aTuple.Length()); + } +} + +template <typename T> +bool nsTSubstring<T>::AssignNonDependent(const substring_tuple_type& aTuple, + size_type aTupleLength, + const mozilla::fallible_t& aFallible) { + NS_ASSERTION(aTuple.Length() == aTupleLength, "wrong length passed"); + + mozilla::Result<uint32_t, nsresult> r = StartBulkWriteImpl(aTupleLength); + if (r.isErr()) { + return false; + } + + aTuple.WriteTo(this->mData, aTupleLength); + + FinishBulkWriteImpl(aTupleLength); + return true; +} + +template <typename T> +bool nsTSubstring<T>::Assign(const substring_tuple_type& aTuple, + const fallible_t& aFallible) { + const auto [isDependentOnThis, tupleLength] = + aTuple.IsDependentOnWithLength(this->mData, this->mData + this->mLength); + if (isDependentOnThis) { + string_type temp; + self_type& tempSubstring = temp; + if (!tempSubstring.AssignNonDependent(aTuple, tupleLength, aFallible)) { + return false; + } + AssignOwned(std::move(temp)); + return true; + } + + return AssignNonDependent(aTuple, tupleLength, aFallible); +} + +template <typename T> +void nsTSubstring<T>::Adopt(char_type* aData, size_type aLength) { + if (aData) { + ::ReleaseData(this->mData, this->mDataFlags); + + if (aLength == size_type(-1)) { + aLength = char_traits::length(aData); + } + + MOZ_RELEASE_ASSERT(CheckCapacity(aLength), "adopting a too-long string"); + + SetData(aData, aLength, DataFlags::TERMINATED | DataFlags::OWNED); + + STRING_STAT_INCREMENT(Adopt); + // Treat this as construction of a "StringAdopt" object for leak + // tracking purposes. + MOZ_LOG_CTOR(this->mData, "StringAdopt", 1); + } else { + SetIsVoid(true); + } +} + +// This version of Replace is optimized for single-character replacement. +template <typename T> +void nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + char_type aChar) { + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + if (ReplacePrep(aCutStart, aCutLength, 1)) { + this->mData[aCutStart] = aChar; + } +} + +template <typename T> +bool nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + char_type aChar, const fallible_t&) { + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + if (!ReplacePrep(aCutStart, aCutLength, 1)) { + return false; + } + + this->mData[aCutStart] = aChar; + + return true; +} + +template <typename T> +void nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength) { + if (!Replace(aCutStart, aCutLength, aData, aLength, mozilla::fallible)) { + AllocFailed(this->Length() - aCutLength + 1); + } +} + +template <typename T> +bool nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength, + const fallible_t& aFallible) { + // unfortunately, some callers pass null :-( + if (!aData) { + aLength = 0; + } else { + if (aLength == size_type(-1)) { + aLength = char_traits::length(aData); + } + + if (this->IsDependentOn(aData, aData + aLength)) { + nsTAutoString<T> temp(aData, aLength); + return Replace(aCutStart, aCutLength, temp, aFallible); + } + } + + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + bool ok = ReplacePrep(aCutStart, aCutLength, aLength); + if (!ok) { + return false; + } + + if (aLength > 0) { + char_traits::copy(this->mData + aCutStart, aData, aLength); + } + + return true; +} + +template <typename T> +void nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + const substring_tuple_type& aTuple) { + const auto [isDependentOnThis, tupleLength] = + aTuple.IsDependentOnWithLength(this->mData, this->mData + this->mLength); + + if (isDependentOnThis) { + nsTAutoString<T> temp; + if (!temp.AssignNonDependent(aTuple, tupleLength, mozilla::fallible)) { + AllocFailed(tupleLength); + } + Replace(aCutStart, aCutLength, temp); + return; + } + + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + if (ReplacePrep(aCutStart, aCutLength, tupleLength) && tupleLength > 0) { + aTuple.WriteTo(this->mData + aCutStart, tupleLength); + } +} + +template <typename T> +void nsTSubstring<T>::ReplaceLiteral(index_type aCutStart, size_type aCutLength, + const char_type* aData, + size_type aLength) { + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + if (!aCutStart && aCutLength == this->Length() && + !(this->mDataFlags & DataFlags::REFCOUNTED)) { + // Check for REFCOUNTED above to avoid undoing the effect of + // SetCapacity(). + AssignLiteral(aData, aLength); + } else if (ReplacePrep(aCutStart, aCutLength, aLength) && aLength > 0) { + char_traits::copy(this->mData + aCutStart, aData, aLength); + } +} + +template <typename T> +void nsTSubstring<T>::Append(char_type aChar) { + if (MOZ_UNLIKELY(!Append(aChar, mozilla::fallible))) { + AllocFailed(this->mLength + 1); + } +} + +template <typename T> +bool nsTSubstring<T>::Append(char_type aChar, const fallible_t& aFallible) { + size_type oldLen = this->mLength; + size_type newLen = oldLen + 1; // Can't overflow + auto r = StartBulkWriteImpl(newLen, oldLen, false); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + this->mData[oldLen] = aChar; + FinishBulkWriteImpl(newLen); + return true; +} + +template <typename T> +void nsTSubstring<T>::Append(const char_type* aData, size_type aLength) { + if (MOZ_UNLIKELY(!Append(aData, aLength, mozilla::fallible))) { + AllocFailed(this->mLength + (aLength == size_type(-1) + ? char_traits::length(aData) + : aLength)); + } +} + +template <typename T> +bool nsTSubstring<T>::Append(const char_type* aData, size_type aLength, + const fallible_t& aFallible) { + if (MOZ_UNLIKELY(aLength == size_type(-1))) { + aLength = char_traits::length(aData); + } + + if (MOZ_UNLIKELY(!aLength)) { + // Avoid undoing the effect of SetCapacity() if both + // mLength and aLength are zero. + return true; + } + + if (MOZ_UNLIKELY(this->IsDependentOn(aData, aData + aLength))) { + return Append(string_type(aData, aLength), mozilla::fallible); + } + size_type oldLen = this->mLength; + mozilla::CheckedInt<size_type> newLen(oldLen); + newLen += aLength; + if (MOZ_UNLIKELY(!newLen.isValid())) { + return false; + } + auto r = StartBulkWriteImpl(newLen.value(), oldLen, false); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + char_traits::copy(this->mData + oldLen, aData, aLength); + FinishBulkWriteImpl(newLen.value()); + return true; +} + +template <typename T> +void nsTSubstring<T>::AppendASCII(const char* aData, size_type aLength) { + if (MOZ_UNLIKELY(!AppendASCII(aData, aLength, mozilla::fallible))) { + AllocFailed(this->mLength + + (aLength == size_type(-1) ? strlen(aData) : aLength)); + } +} + +template <typename T> +bool nsTSubstring<T>::AppendASCII(const char* aData, + const fallible_t& aFallible) { + return AppendASCII(aData, size_type(-1), aFallible); +} + +template <typename T> +bool nsTSubstring<T>::AppendASCII(const char* aData, size_type aLength, + const fallible_t& aFallible) { + if (MOZ_UNLIKELY(aLength == size_type(-1))) { + aLength = strlen(aData); + } + + if (MOZ_UNLIKELY(!aLength)) { + // Avoid undoing the effect of SetCapacity() if both + // mLength and aLength are zero. + return true; + } + +#ifdef CharT_is_char + // 16-bit string can't depend on an 8-bit buffer + if (MOZ_UNLIKELY(this->IsDependentOn(aData, aData + aLength))) { + return Append(string_type(aData, aLength), mozilla::fallible); + } +#endif + size_type oldLen = this->mLength; + mozilla::CheckedInt<size_type> newLen(oldLen); + newLen += aLength; + if (MOZ_UNLIKELY(!newLen.isValid())) { + return false; + } + auto r = StartBulkWriteImpl(newLen.value(), oldLen, false); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + char_traits::copyASCII(this->mData + oldLen, aData, aLength); + FinishBulkWriteImpl(newLen.value()); + return true; +} + +template <typename T> +void nsTSubstring<T>::Append(const self_type& aStr) { + if (MOZ_UNLIKELY(!Append(aStr, mozilla::fallible))) { + AllocFailed(this->mLength + aStr.Length()); + } +} + +template <typename T> +bool nsTSubstring<T>::Append(const self_type& aStr, + const fallible_t& aFallible) { + // Check refcounted to avoid undoing the effects of SetCapacity(). + if (MOZ_UNLIKELY(!this->mLength && + !(this->mDataFlags & DataFlags::REFCOUNTED))) { + return Assign(aStr, mozilla::fallible); + } + return Append(aStr.BeginReading(), aStr.Length(), mozilla::fallible); +} + +template <typename T> +void nsTSubstring<T>::Append(const substring_tuple_type& aTuple) { + if (MOZ_UNLIKELY(!Append(aTuple, mozilla::fallible))) { + AllocFailed(this->mLength + aTuple.Length()); + } +} + +template <typename T> +bool nsTSubstring<T>::Append(const substring_tuple_type& aTuple, + const fallible_t& aFallible) { + const auto [isDependentOnThis, tupleLength] = + aTuple.IsDependentOnWithLength(this->mData, this->mData + this->mLength); + + if (MOZ_UNLIKELY(!tupleLength)) { + // Avoid undoing the effect of SetCapacity() if both + // mLength and tupleLength are zero. + return true; + } + + if (MOZ_UNLIKELY(isDependentOnThis)) { + return Append(string_type(aTuple), aFallible); + } + + size_type oldLen = this->mLength; + mozilla::CheckedInt<size_type> newLen(oldLen); + newLen += tupleLength; + if (MOZ_UNLIKELY(!newLen.isValid())) { + return false; + } + auto r = StartBulkWriteImpl(newLen.value(), oldLen, false); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + aTuple.WriteTo(this->mData + oldLen, tupleLength); + FinishBulkWriteImpl(newLen.value()); + return true; +} + +template <typename T> +void nsTSubstring<T>::SetCapacity(size_type aCapacity) { + if (!SetCapacity(aCapacity, mozilla::fallible)) { + AllocFailed(aCapacity); + } +} + +template <typename T> +bool nsTSubstring<T>::SetCapacity(size_type aCapacity, const fallible_t&) { + size_type length = this->mLength; + // This method can no longer be used to shorten the + // logical length. + size_type capacity = XPCOM_MAX(aCapacity, length); + + mozilla::Result<uint32_t, nsresult> r = + StartBulkWriteImpl(capacity, length, true); + if (r.isErr()) { + return false; + } + + if (MOZ_UNLIKELY(!capacity)) { + // Zero capacity was requested on a zero-length + // string. In this special case, we are pointing + // to the special empty buffer, which is already + // zero-terminated and not writable, so we must + // not attempt to zero-terminate it. + AssertValid(); + return true; + } + + // FinishBulkWriteImpl with argument zero releases + // the heap-allocated buffer. However, SetCapacity() + // is a special case that allows mLength to be zero + // while a heap-allocated buffer exists. + // By calling FinishBulkWriteImplImpl, we skip the + // zero case handling that's inappropriate in the + // SetCapacity() case. + FinishBulkWriteImplImpl(length); + return true; +} + +template <typename T> +void nsTSubstring<T>::SetLength(size_type aLength) { + if (!SetLength(aLength, mozilla::fallible)) { + AllocFailed(aLength); + } +} + +template <typename T> +bool nsTSubstring<T>::SetLength(size_type aLength, + const fallible_t& aFallible) { + size_type preserve = XPCOM_MIN(aLength, this->mLength); + mozilla::Result<uint32_t, nsresult> r = + StartBulkWriteImpl(aLength, preserve, true); + if (r.isErr()) { + return false; + } + + FinishBulkWriteImpl(aLength); + + return true; +} + +template <typename T> +void nsTSubstring<T>::Truncate() { + ::ReleaseData(this->mData, this->mDataFlags); + SetToEmptyBuffer(); + AssertValid(); +} + +template <typename T> +void nsTSubstring<T>::SetIsVoid(bool aVal) { + if (aVal) { + Truncate(); + this->mDataFlags |= DataFlags::VOIDED; + } else { + this->mDataFlags &= ~DataFlags::VOIDED; + } +} + +namespace mozilla { +namespace detail { + +template <typename T> +typename nsTStringRepr<T>::char_type nsTStringRepr<T>::First() const { + MOZ_RELEASE_ASSERT(this->mLength > 0, "|First()| called on an empty string"); + return this->mData[0]; +} + +template <typename T> +typename nsTStringRepr<T>::char_type nsTStringRepr<T>::Last() const { + MOZ_RELEASE_ASSERT(this->mLength > 0, "|Last()| called on an empty string"); + return this->mData[this->mLength - 1]; +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const self_type& aStr) const { + return this->mLength == aStr.mLength && + char_traits::compare(this->mData, aStr.mData, this->mLength) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const self_type& aStr, + comparator_type aComp) const { + return this->mLength == aStr.mLength && + aComp(this->mData, aStr.mData, this->mLength, aStr.mLength) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const substring_tuple_type& aTuple) const { + return Equals(substring_type(aTuple)); +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const substring_tuple_type& aTuple, + comparator_type aComp) const { + return Equals(substring_type(aTuple), aComp); +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const char_type* aData) const { + // unfortunately, some callers pass null :-( + if (!aData) { + MOZ_ASSERT_UNREACHABLE("null data pointer"); + return this->mLength == 0; + } + + // XXX avoid length calculation? + size_type length = char_traits::length(aData); + return this->mLength == length && + char_traits::compare(this->mData, aData, this->mLength) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const char_type* aData, + comparator_type aComp) const { + // unfortunately, some callers pass null :-( + if (!aData) { + MOZ_ASSERT_UNREACHABLE("null data pointer"); + return this->mLength == 0; + } + + // XXX avoid length calculation? + size_type length = char_traits::length(aData); + return this->mLength == length && + aComp(this->mData, aData, this->mLength, length) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::EqualsASCII(const char* aData, size_type aLen) const { + return this->mLength == aLen && + char_traits::compareASCII(this->mData, aData, aLen) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::EqualsASCII(const char* aData) const { + return char_traits::compareASCIINullTerminated(this->mData, this->mLength, + aData) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::EqualsLatin1(const char* aData, + const size_type aLength) const { + return (this->mLength == aLength) && + char_traits::equalsLatin1(this->mData, aData, aLength); +} + +template <typename T> +bool nsTStringRepr<T>::LowerCaseEqualsASCII(const char* aData, + size_type aLen) const { + return this->mLength == aLen && + char_traits::compareLowerCaseToASCII(this->mData, aData, aLen) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::LowerCaseEqualsASCII(const char* aData) const { + return char_traits::compareLowerCaseToASCIINullTerminated( + this->mData, this->mLength, aData) == 0; +} + +template <typename T> +typename nsTStringRepr<T>::size_type nsTStringRepr<T>::CountChar( + char_type aChar) const { + const char_type* start = this->mData; + const char_type* end = this->mData + this->mLength; + + return NS_COUNT(start, end, aChar); +} + +template <typename T> +int32_t nsTStringRepr<T>::FindChar(char_type aChar, index_type aOffset) const { + if (aOffset < this->mLength) { + const char_type* result = char_traits::find(this->mData + aOffset, + this->mLength - aOffset, aChar); + if (result) { + return result - this->mData; + } + } + return -1; +} + +template <typename T> +bool nsTStringRepr<T>::Contains(char_type aChar) const { + return FindChar(aChar) != kNotFound; +} + +} // namespace detail +} // namespace mozilla + +template <typename T> +void nsTSubstring<T>::StripChar(char_type aChar) { + if (this->mLength == 0) { + return; + } + + if (!EnsureMutable()) { // XXX do this lazily? + AllocFailed(this->mLength); + } + + // XXX(darin): this code should defer writing until necessary. + + char_type* to = this->mData; + char_type* from = this->mData; + char_type* end = this->mData + this->mLength; + + while (from < end) { + char_type theChar = *from++; + if (aChar != theChar) { + *to++ = theChar; + } + } + *to = char_type(0); // add the null + this->mLength = to - this->mData; +} + +template <typename T> +void nsTSubstring<T>::StripChars(const char_type* aChars) { + if (this->mLength == 0) { + return; + } + + if (!EnsureMutable()) { // XXX do this lazily? + AllocFailed(this->mLength); + } + + // XXX(darin): this code should defer writing until necessary. + + char_type* to = this->mData; + char_type* from = this->mData; + char_type* end = this->mData + this->mLength; + + while (from < end) { + char_type theChar = *from++; + const char_type* test = aChars; + + for (; *test && *test != theChar; ++test) + ; + + if (!*test) { + // Not stripped, copy this char. + *to++ = theChar; + } + } + *to = char_type(0); // add the null + this->mLength = to - this->mData; +} + +template <typename T> +void nsTSubstring<T>::StripTaggedASCII(const ASCIIMaskArray& aToStrip) { + if (this->mLength == 0) { + return; + } + + if (!EnsureMutable()) { + AllocFailed(this->mLength); + } + + char_type* to = this->mData; + char_type* from = this->mData; + char_type* end = this->mData + this->mLength; + + while (from < end) { + uint32_t theChar = (uint32_t)*from++; + // Replacing this with a call to ASCIIMask::IsMasked + // regresses performance somewhat, so leaving it inlined. + if (!mozilla::ASCIIMask::IsMasked(aToStrip, theChar)) { + // Not stripped, copy this char. + *to++ = (char_type)theChar; + } + } + *to = char_type(0); // add the null + this->mLength = to - this->mData; +} + +template <typename T> +void nsTSubstring<T>::StripCRLF() { + // Expanding this call to copy the code from StripTaggedASCII + // instead of just calling it does somewhat help with performance + // but it is not worth it given the duplicated code. + StripTaggedASCII(mozilla::ASCIIMask::MaskCRLF()); +} + +template <typename T> +struct MOZ_STACK_CLASS PrintfAppend : public mozilla::PrintfTarget { + explicit PrintfAppend(nsTSubstring<T>* aString) : mString(aString) {} + + bool append(const char* aStr, size_t aLen) override { + if (aLen == 0) { + return true; + } + + mString->AppendASCII(aStr, aLen); + return true; + } + + private: + nsTSubstring<T>* mString; +}; + +template <typename T> +void nsTSubstring<T>::AppendPrintf(const char* aFormat, ...) { + PrintfAppend<T> appender(this); + va_list ap; + va_start(ap, aFormat); + bool r = appender.vprint(aFormat, ap); + if (!r) { + MOZ_CRASH("Allocation or other failure in PrintfTarget::print"); + } + va_end(ap); +} + +template <typename T> +void nsTSubstring<T>::AppendVprintf(const char* aFormat, va_list aAp) { + PrintfAppend<T> appender(this); + bool r = appender.vprint(aFormat, aAp); + if (!r) { + MOZ_CRASH("Allocation or other failure in PrintfTarget::print"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntDec(int32_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntDec(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntDec(uint32_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntDec(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntOct(uint32_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntOct(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntHex(uint32_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntHex(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntDec(int64_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntDec(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntDec(uint64_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntDec(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntOct(uint64_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntOct(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntHex(uint64_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntHex(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +// Returns the length of the formatted aDouble in aBuf. +static int FormatWithoutTrailingZeros(char (&aBuf)[40], double aDouble, + int aPrecision) { + static const DoubleToStringConverter converter( + DoubleToStringConverter::UNIQUE_ZERO | + DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, + "Infinity", "NaN", 'e', -6, 21, 6, 1); + double_conversion::StringBuilder builder(aBuf, sizeof(aBuf)); + bool exponential_notation = false; + converter.ToPrecision(aDouble, aPrecision, &exponential_notation, &builder); + int length = builder.position(); + char* formattedDouble = builder.Finalize(); + + // If we have a shorter string than aPrecision, it means we have a special + // value (NaN or Infinity). All other numbers will be formatted with at + // least aPrecision digits. + if (length <= aPrecision) { + return length; + } + + char* end = formattedDouble + length; + char* decimalPoint = strchr(aBuf, '.'); + // No trailing zeros to remove. + if (!decimalPoint) { + return length; + } + + if (MOZ_UNLIKELY(exponential_notation)) { + // We need to check for cases like 1.00000e-10 (yes, this is + // disgusting). + char* exponent = end - 1; + for (;; --exponent) { + if (*exponent == 'e') { + break; + } + } + char* zerosBeforeExponent = exponent - 1; + for (; zerosBeforeExponent != decimalPoint; --zerosBeforeExponent) { + if (*zerosBeforeExponent != '0') { + break; + } + } + if (zerosBeforeExponent == decimalPoint) { + --zerosBeforeExponent; + } + // Slide the exponent to the left over the trailing zeros. Don't + // worry about copying the trailing NUL character. + size_t exponentSize = end - exponent; + memmove(zerosBeforeExponent + 1, exponent, exponentSize); + length -= exponent - (zerosBeforeExponent + 1); + } else { + char* trailingZeros = end - 1; + for (; trailingZeros != decimalPoint; --trailingZeros) { + if (*trailingZeros != '0') { + break; + } + } + if (trailingZeros == decimalPoint) { + --trailingZeros; + } + length -= end - (trailingZeros + 1); + } + + return length; +} + +template <typename T> +void nsTSubstring<T>::AppendFloat(float aFloat) { + char buf[40]; + int length = FormatWithoutTrailingZeros(buf, aFloat, 6); + AppendASCII(buf, length); +} + +template <typename T> +void nsTSubstring<T>::AppendFloat(double aFloat) { + char buf[40]; + int length = FormatWithoutTrailingZeros(buf, aFloat, 15); + AppendASCII(buf, length); +} + +template <typename T> +size_t nsTSubstring<T>::SizeOfExcludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const { + if (this->mDataFlags & DataFlags::REFCOUNTED) { + return nsStringBuffer::FromData(this->mData) + ->SizeOfIncludingThisIfUnshared(aMallocSizeOf); + } + if (this->mDataFlags & DataFlags::OWNED) { + return aMallocSizeOf(this->mData); + } + + // If we reach here, exactly one of the following must be true: + // - DataFlags::VOIDED is set, and this->mData points to sEmptyBuffer; + // - DataFlags::INLINE is set, and this->mData points to a buffer within a + // string object (e.g. nsAutoString); + // - None of DataFlags::REFCOUNTED, DataFlags::OWNED, DataFlags::INLINE is + // set, and this->mData points to a buffer owned by something else. + // + // In all three cases, we don't measure it. + return 0; +} + +template <typename T> +size_t nsTSubstring<T>::SizeOfExcludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const { + // This is identical to SizeOfExcludingThisIfUnshared except for the + // DataFlags::REFCOUNTED case. + if (this->mDataFlags & DataFlags::REFCOUNTED) { + return nsStringBuffer::FromData(this->mData) + ->SizeOfIncludingThisEvenIfShared(aMallocSizeOf); + } + if (this->mDataFlags & DataFlags::OWNED) { + return aMallocSizeOf(this->mData); + } + return 0; +} + +template <typename T> +size_t nsTSubstring<T>::SizeOfIncludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThisIfUnshared(aMallocSizeOf); +} + +template <typename T> +size_t nsTSubstring<T>::SizeOfIncludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThisEvenIfShared(aMallocSizeOf); +} + +template <typename T> +nsTSubstringSplitter<T> nsTSubstring<T>::Split(const char_type aChar) const { + return nsTSubstringSplitter<T>( + nsTCharSeparatedTokenizerTemplate< + NS_TokenizerIgnoreNothing, T, + nsTokenizerFlags::IncludeEmptyTokenAtEnd>(*this, aChar)); +} + +// Common logic for nsTSubstring<T>::ToInteger and nsTSubstring<T>::ToInteger64. +template <typename T, typename int_type> +int_type ToIntegerCommon(const nsTSubstring<T>& aSrc, nsresult* aErrorCode, + uint32_t aRadix) { + MOZ_ASSERT(aRadix == 10 || aRadix == 16); + + // Initial value, override if we find an integer. + *aErrorCode = NS_ERROR_ILLEGAL_VALUE; + + // Begin by skipping over leading chars that shouldn't be part of the number. + auto cp = aSrc.BeginReading(); + auto endcp = aSrc.EndReading(); + bool negate = false; + bool done = false; + + // NB: For backwards compatibility I'm not going to change this logic but + // it seems really odd. Previously there was logic to auto-detect the + // radix if kAutoDetect was passed in. In practice this value was never + // used, so it pretended to auto detect and skipped some preceding + // letters (excluding valid hex digits) but never used the result. + // + // For example if you pass in "Get the number: 10", aRadix = 10 we'd + // skip the 'G', and then fail to parse "et the number: 10". If aRadix = + // 16 we'd skip the 'G', and parse just 'e' returning 14. + while ((cp < endcp) && (!done)) { + switch (*cp++) { + // clang-format off + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + done = true; + break; + // clang-format on + case '-': + negate = true; + break; + default: + break; + } + } + + if (!done) { + // No base 16 or base 10 digits were found. + return 0; + } + + // Step back. + cp--; + + mozilla::CheckedInt<int_type> result; + + // Now iterate the numeric chars and build our result. + while (cp < endcp) { + auto theChar = *cp++; + if (('0' <= theChar) && (theChar <= '9')) { + result = (aRadix * result) + (theChar - '0'); + } else if ((theChar >= 'A') && (theChar <= 'F')) { + if (10 == aRadix) { + // Invalid base 10 digit, error out. + return 0; + } else { + result = (aRadix * result) + ((theChar - 'A') + 10); + } + } else if ((theChar >= 'a') && (theChar <= 'f')) { + if (10 == aRadix) { + // Invalid base 10 digit, error out. + return 0; + } else { + result = (aRadix * result) + ((theChar - 'a') + 10); + } + } else if ((('X' == theChar) || ('x' == theChar)) && result == 0) { + // For some reason we support a leading 'x' regardless of radix. For + // example: "000000x500", aRadix = 10 would be parsed as 500 rather + // than 0. + continue; + } else { + // We've encountered a char that's not a legal number or sign and we can + // terminate processing. + break; + } + + if (!result.isValid()) { + // Overflow! + return 0; + } + } + + // Integer found. + *aErrorCode = NS_OK; + + if (negate) { + result = -result; + } + + return result.value(); +} + +template <typename T> +int32_t nsTSubstring<T>::ToInteger(nsresult* aErrorCode, + uint32_t aRadix) const { + return ToIntegerCommon<T, int32_t>(*this, aErrorCode, aRadix); +} + +/** + * nsTSubstring::ToInteger64 + */ +template <typename T> +int64_t nsTSubstring<T>::ToInteger64(nsresult* aErrorCode, + uint32_t aRadix) const { + return ToIntegerCommon<T, int64_t>(*this, aErrorCode, aRadix); +} diff --git a/xpcom/string/nsTSubstring.h b/xpcom/string/nsTSubstring.h new file mode 100644 index 0000000000..e7127e5973 --- /dev/null +++ b/xpcom/string/nsTSubstring.h @@ -0,0 +1,1388 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsTSubstring_h +#define nsTSubstring_h + +#include <iterator> +#include <type_traits> + +#include "mozilla/Casting.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/IntegerTypeTraits.h" +#include "mozilla/Result.h" +#include "mozilla/ResultExtensions.h" +#include "mozilla/Span.h" +#include "mozilla/Unused.h" + +#include "nsTStringRepr.h" + +#ifndef MOZILLA_INTERNAL_API +# error "Using XPCOM strings is limited to code linked into libxul." +#endif + +// The max number of logically uninitialized code units to +// fill with a marker byte or to mark as unintialized for +// memory checking. (Limited to avoid quadratic behavior.) +const size_t kNsStringBufferMaxPoison = 16; + +template <typename T> +class nsTSubstringSplitter; +template <typename T> +class nsTString; +template <typename T> +class nsTSubstring; + +namespace mozilla { + +/** + * This handle represents permission to perform low-level writes + * the storage buffer of a string in a manner that's aware of the + * actual capacity of the storage buffer allocation and that's + * cache-friendly in the sense that the writing of zero terminator + * for C compatibility can happen in linear memory access order + * (i.e. the zero terminator write takes place after writing + * new content to the string as opposed to the zero terminator + * write happening first causing a non-linear memory write for + * cache purposes). + * + * If you requested a prefix to be preserved when starting + * or restarting the bulk write, the prefix is present at the + * start of the buffer exposed by this handle as Span or + * as a raw pointer, and it's your responsibility to start + * writing after after the preserved prefix (which you + * presumably wanted not to overwrite since you asked for + * it to be preserved). + * + * In a success case, you must call Finish() with the new + * length of the string. In failure cases, it's OK to return + * early from the function whose local variable this handle is. + * The destructor of this class takes care of putting the + * string in a valid and mostly harmless state in that case + * by setting the value of a non-empty string to a single + * REPLACEMENT CHARACTER or in the case of nsACString that's + * too short for a REPLACEMENT CHARACTER to fit, an ASCII + * SUBSTITUTE. + * + * You must not allow this handle to outlive the string you + * obtained it from. + * + * You must not access the string you obtained this handle + * from in any way other than through this handle until + * you call Finish() on the handle or the handle goes out + * of scope. + * + * Once you've called Finish(), you must not call any + * methods on this handle and must not use values previously + * obtained. + * + * Once you call RestartBulkWrite(), you must not use + * values previously obtained from this handle and must + * reobtain the new corresponding values. + */ +template <typename T> +class BulkWriteHandle final { + friend class nsTSubstring<T>; + + public: + typedef typename mozilla::detail::nsTStringRepr<T> base_string_type; + typedef typename base_string_type::size_type size_type; + + /** + * Pointer to the start of the writable buffer. Never nullptr. + * + * This pointer is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + T* Elements() const { + MOZ_ASSERT(mString); + return mString->mData; + } + + /** + * How many code units can be written to the buffer. + * (Note: This is not the same as the string's Length().) + * + * This value is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + size_type Length() const { + MOZ_ASSERT(mString); + return mCapacity; + } + + /** + * Pointer past the end of the buffer. + * + * This pointer is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + T* End() const { return Elements() + Length(); } + + /** + * The writable buffer as Span. + * + * This Span is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + auto AsSpan() const { return mozilla::Span<T>{Elements(), Length()}; } + + /** + * Autoconvert to the buffer as writable Span. + * + * This Span is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + operator mozilla::Span<T>() const { return AsSpan(); } + + /** + * Restart the bulk write with a different capacity. + * + * This method invalidates previous return values + * of the other methods above. + * + * Can fail if out of memory leaving the buffer + * in the state before this call. + * + * @param aCapacity the new requested capacity + * @param aPrefixToPreserve the number of code units at + * the start of the string to + * copy over to the new buffer + * @param aAllowShrinking whether the string is + * allowed to attempt to + * allocate a smaller buffer + * for its content and copy + * the data over. + */ + mozilla::Result<mozilla::Ok, nsresult> RestartBulkWrite( + size_type aCapacity, size_type aPrefixToPreserve, bool aAllowShrinking) { + MOZ_ASSERT(mString); + MOZ_TRY_VAR(mCapacity, mString->StartBulkWriteImpl( + aCapacity, aPrefixToPreserve, aAllowShrinking)); + return mozilla::Ok(); + } + + /** + * Indicate that the bulk write finished successfully. + * + * @param aLength the number of code units written; + * must not exceed Length() + * @param aAllowShrinking whether the string is + * allowed to attempt to + * allocate a smaller buffer + * for its content and copy + * the data over. + */ + void Finish(size_type aLength, bool aAllowShrinking) { + MOZ_ASSERT(mString); + MOZ_ASSERT(aLength <= mCapacity); + if (!aLength) { + // Truncate is safe even when the string is in an invalid state + mString->Truncate(); + mString = nullptr; + return; + } + if (aAllowShrinking) { + mozilla::Unused << mString->StartBulkWriteImpl(aLength, aLength, true); + } + mString->FinishBulkWriteImpl(aLength); + mString = nullptr; + } + + BulkWriteHandle(BulkWriteHandle&& aOther) + : mString(aOther.Forget()), mCapacity(aOther.mCapacity) {} + + ~BulkWriteHandle() { + if (!mString || !mCapacity) { + return; + } + // The old zero terminator may be gone by now, so we need + // to write a new one somewhere and make length match. + // We can use a length between 1 and self.capacity. + // The contents of the string can be partially uninitialized + // or partially initialized in a way that would be dangerous + // if parsed by some recipient. It's prudent to write something + // same as the contents of the string. U+FFFD is the safest + // placeholder, but when it doesn't fit, let's use ASCII + // substitute. Merely truncating the string to a zero-length + // string might be dangerous in some scenarios. See + // https://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences + // for closely related scenario. + auto ptr = Elements(); + // Cast the pointer below to silence warnings + if (sizeof(T) == 1) { + unsigned char* charPtr = reinterpret_cast<unsigned char*>(ptr); + if (mCapacity >= 3) { + *charPtr++ = 0xEF; + *charPtr++ = 0xBF; + *charPtr++ = 0xBD; + mString->mLength = 3; + } else { + *charPtr++ = 0x1A; + mString->mLength = 1; + } + *charPtr = 0; + } else if (sizeof(T) == 2) { + char16_t* charPtr = reinterpret_cast<char16_t*>(ptr); + *charPtr++ = 0xFFFD; + *charPtr = 0; + mString->mLength = 1; + } else { + MOZ_ASSERT_UNREACHABLE("Only 8-bit and 16-bit code units supported."); + } + } + + BulkWriteHandle() = delete; + BulkWriteHandle(const BulkWriteHandle&) = delete; + BulkWriteHandle& operator=(const BulkWriteHandle&) = delete; + + private: + BulkWriteHandle(nsTSubstring<T>* aString, size_type aCapacity) + : mString(aString), mCapacity(aCapacity) {} + + nsTSubstring<T>* Forget() { + auto string = mString; + mString = nullptr; + return string; + } + + nsTSubstring<T>* mString; // nullptr upon finish + size_type mCapacity; +}; + +} // namespace mozilla + +/** + * nsTSubstring is an abstract string class. From an API perspective, this + * class is the root of the string class hierarchy. It represents a single + * contiguous array of characters, which may or may not be null-terminated. + * This type is not instantiated directly. A sub-class is instantiated + * instead. For example, see nsTString. + * + * NAMES: + * nsAString for wide characters + * nsACString for narrow characters + * + */ +template <typename T> +class nsTSubstring : public mozilla::detail::nsTStringRepr<T> { + friend class mozilla::BulkWriteHandle<T>; + + public: + typedef nsTSubstring<T> self_type; + + typedef nsTString<T> string_type; + + typedef typename mozilla::detail::nsTStringRepr<T> base_string_type; + typedef typename base_string_type::substring_type substring_type; + + typedef typename base_string_type::fallible_t fallible_t; + + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::char_traits char_traits; + typedef + typename base_string_type::incompatible_char_type incompatible_char_type; + + typedef typename base_string_type::substring_tuple_type substring_tuple_type; + + typedef typename base_string_type::const_iterator const_iterator; + typedef typename base_string_type::iterator iterator; + + typedef typename base_string_type::comparator_type comparator_type; + + typedef typename base_string_type::const_char_iterator const_char_iterator; + + typedef typename base_string_type::index_type index_type; + typedef typename base_string_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + + // this acts like a virtual destructor + ~nsTSubstring() { Finalize(); } + + /** + * writing iterators + * + * BeginWriting() makes the string mutable (if it isn't + * already) and returns (or writes into an outparam) a + * pointer that provides write access to the string's buffer. + * + * Note: Consider if BulkWrite() suits your use case better + * than BeginWriting() combined with SetLength(). + * + * Note: Strings autoconvert into writable mozilla::Span, + * which may suit your use case better than calling + * BeginWriting() directly. + * + * When writing via the pointer obtained from BeginWriting(), + * you are allowed to write at most the number of code units + * indicated by Length() or, alternatively, write up to, but + * not including, the position indicated by EndWriting(). + * + * In particular, calling SetCapacity() does not affect what + * the above paragraph says. + */ + + iterator BeginWriting() { + if (!EnsureMutable()) { + AllocFailed(base_string_type::mLength); + } + + return base_string_type::mData; + } + + iterator BeginWriting(const fallible_t&) { + return EnsureMutable() ? base_string_type::mData : iterator(0); + } + + iterator EndWriting() { + if (!EnsureMutable()) { + AllocFailed(base_string_type::mLength); + } + + return base_string_type::mData + base_string_type::mLength; + } + + iterator EndWriting(const fallible_t&) { + return EnsureMutable() + ? (base_string_type::mData + base_string_type::mLength) + : iterator(0); + } + + /** + * Perform string to int conversion. + * @param aErrorCode will contain error if one occurs + * @param aRadix is the radix to use. Only 10 and 16 are supported. + * @return int rep of string value, and possible (out) error code + */ + int32_t ToInteger(nsresult* aErrorCode, uint32_t aRadix = 10) const; + + /** + * Perform string to 64-bit int conversion. + * @param aErrorCode will contain error if one occurs + * @param aRadix is the radix to use. Only 10 and 16 are supported. + * @return 64-bit int rep of string value, and possible (out) error code + */ + int64_t ToInteger64(nsresult* aErrorCode, uint32_t aRadix = 10) const; + + /** + * assignment + */ + + void NS_FASTCALL Assign(char_type aChar); + [[nodiscard]] bool NS_FASTCALL Assign(char_type aChar, const fallible_t&); + + void NS_FASTCALL Assign(const char_type* aData, + size_type aLength = size_type(-1)); + [[nodiscard]] bool NS_FASTCALL Assign(const char_type* aData, + const fallible_t&); + [[nodiscard]] bool NS_FASTCALL Assign(const char_type* aData, + size_type aLength, const fallible_t&); + + void NS_FASTCALL Assign(const self_type&); + [[nodiscard]] bool NS_FASTCALL Assign(const self_type&, const fallible_t&); + + void NS_FASTCALL Assign(self_type&&); + [[nodiscard]] bool NS_FASTCALL Assign(self_type&&, const fallible_t&); + + void NS_FASTCALL Assign(const substring_tuple_type&); + [[nodiscard]] bool NS_FASTCALL Assign(const substring_tuple_type&, + const fallible_t&); + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void Assign(char16ptr_t aData) { + Assign(static_cast<const char16_t*>(aData)); + } + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void Assign(char16ptr_t aData, size_type aLength) { + Assign(static_cast<const char16_t*>(aData), aLength); + } + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + [[nodiscard]] bool Assign(char16ptr_t aData, size_type aLength, + const fallible_t& aFallible) { + return Assign(static_cast<const char16_t*>(aData), aLength, aFallible); + } +#endif + + void NS_FASTCALL AssignASCII(const char* aData, size_type aLength); + [[nodiscard]] bool NS_FASTCALL AssignASCII(const char* aData, + size_type aLength, + const fallible_t&); + + void NS_FASTCALL AssignASCII(const char* aData) { + AssignASCII(aData, mozilla::AssertedCast<size_type, size_t>(strlen(aData))); + } + [[nodiscard]] bool NS_FASTCALL AssignASCII(const char* aData, + const fallible_t& aFallible) { + return AssignASCII(aData, + mozilla::AssertedCast<size_type, size_t>(strlen(aData)), + aFallible); + } + + // AssignLiteral must ONLY be called with an actual literal string, or + // a character array *constant* of static storage duration declared + // without an explicit size and with an initializer that is a string + // literal or is otherwise null-terminated. + // Use Assign or AssignASCII for other character array variables. + // + // This method does not need a fallible version, because it uses the + // POD buffer of the literal as the string's buffer without allocating. + // The literal does not need to be ASCII. If this a 16-bit string, this + // method takes a u"" literal. (The overload on 16-bit strings that takes + // a "" literal takes only ASCII.) + template <int N> + void AssignLiteral(const char_type (&aStr)[N]) { + AssignLiteral(aStr, N - 1); + } + + // AssignLiteral must ONLY be called with an actual literal string, or + // a char array *constant* declared without an explicit size and with an + // initializer that is a string literal or is otherwise null-terminated. + // Use AssignASCII for other char array variables. + // + // This method takes an 8-bit (ASCII-only!) string that is expanded + // into a 16-bit string at run time causing a run-time allocation. + // To avoid the run-time allocation (at the cost of the literal + // taking twice the size in the binary), use the above overload that + // takes a u"" string instead. Using the overload that takes a u"" + // literal is generally preferred when working with 16-bit strings. + // + // There is not a fallible version of this method because it only really + // applies to small allocations that we wouldn't want to check anyway. + template <int N, typename Q = T, + typename EnableIfChar16 = typename mozilla::Char16OnlyT<Q>> + void AssignLiteral(const incompatible_char_type (&aStr)[N]) { + AssignASCII(aStr, N - 1); + } + + self_type& operator=(char_type aChar) { + Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) { + Assign(aData); + return *this; + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + self_type& operator=(char16ptr_t aData) { + Assign(aData); + return *this; + } +#endif + self_type& operator=(const self_type& aStr) { + Assign(aStr); + return *this; + } + self_type& operator=(self_type&& aStr) { + Assign(std::move(aStr)); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) { + Assign(aTuple); + return *this; + } + + // Adopt a heap-allocated char sequence for this string; is Voided if aData + // is null. Useful for e.g. converting an strdup'd C string into an + // nsCString. See also getter_Copies(), which is a useful wrapper. + void NS_FASTCALL Adopt(char_type* aData, size_type aLength = size_type(-1)); + + /** + * buffer manipulation + */ + + void NS_FASTCALL Replace(index_type aCutStart, size_type aCutLength, + char_type aChar); + [[nodiscard]] bool NS_FASTCALL Replace(index_type aCutStart, + size_type aCutLength, char_type aChar, + const fallible_t&); + void NS_FASTCALL Replace(index_type aCutStart, size_type aCutLength, + const char_type* aData, + size_type aLength = size_type(-1)); + [[nodiscard]] bool NS_FASTCALL Replace(index_type aCutStart, + size_type aCutLength, + const char_type* aData, + size_type aLength, const fallible_t&); + void Replace(index_type aCutStart, size_type aCutLength, + const self_type& aStr) { + Replace(aCutStart, aCutLength, aStr.Data(), aStr.Length()); + } + [[nodiscard]] bool Replace(index_type aCutStart, size_type aCutLength, + const self_type& aStr, + const fallible_t& aFallible) { + return Replace(aCutStart, aCutLength, aStr.Data(), aStr.Length(), + aFallible); + } + void NS_FASTCALL Replace(index_type aCutStart, size_type aCutLength, + const substring_tuple_type& aTuple); + + // ReplaceLiteral must ONLY be called with an actual literal string, or + // a character array *constant* of static storage duration declared + // without an explicit size and with an initializer that is a string + // literal or is otherwise null-terminated. + // Use Replace for other character array variables. + template <int N> + void ReplaceLiteral(index_type aCutStart, size_type aCutLength, + const char_type (&aStr)[N]) { + ReplaceLiteral(aCutStart, aCutLength, aStr, N - 1); + } + + void Append(char_type aChar); + + [[nodiscard]] bool Append(char_type aChar, const fallible_t& aFallible); + + void Append(const char_type* aData, size_type aLength = size_type(-1)); + + [[nodiscard]] bool Append(const char_type* aData, size_type aLength, + const fallible_t& aFallible); + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void Append(char16ptr_t aData, size_type aLength = size_type(-1)) { + Append(static_cast<const char16_t*>(aData), aLength); + } +#endif + + void Append(const self_type& aStr); + + [[nodiscard]] bool Append(const self_type& aStr, const fallible_t& aFallible); + + void Append(const substring_tuple_type& aTuple); + + [[nodiscard]] bool Append(const substring_tuple_type& aTuple, + const fallible_t& aFallible); + + void AppendASCII(const char* aData, size_type aLength = size_type(-1)); + + [[nodiscard]] bool AppendASCII(const char* aData, + const fallible_t& aFallible); + + [[nodiscard]] bool AppendASCII(const char* aData, size_type aLength, + const fallible_t& aFallible); + + // Appends a literal string ("" literal in the 8-bit case and u"" literal + // in the 16-bit case) to the string. + // + // AppendLiteral must ONLY be called with an actual literal string, or + // a character array *constant* of static storage duration declared + // without an explicit size and with an initializer that is a string + // literal or is otherwise null-terminated. + // Use Append or AppendASCII for other character array variables. + template <int N> + void AppendLiteral(const char_type (&aStr)[N]) { + // The case where base_string_type::mLength is zero is intentionally + // left unoptimized (could be optimized as call to AssignLiteral), + // because it's rare/nonexistent. If you add that optimization, + // please be sure to also check that + // !(base_string_type::mDataFlags & DataFlags::REFCOUNTED) + // to avoid undoing the effects of SetCapacity(). + Append(aStr, N - 1); + } + + template <int N> + void AppendLiteral(const char_type (&aStr)[N], const fallible_t& aFallible) { + // The case where base_string_type::mLength is zero is intentionally + // left unoptimized (could be optimized as call to AssignLiteral), + // because it's rare/nonexistent. If you add that optimization, + // please be sure to also check that + // !(base_string_type::mDataFlags & DataFlags::REFCOUNTED) + // to avoid undoing the effects of SetCapacity(). + return Append(aStr, N - 1, aFallible); + } + + // Only enable for T = char16_t + // + // Appends an 8-bit literal string ("" literal) to a 16-bit string by + // expanding it. The literal must only contain ASCII. + // + // Using u"" literals with 16-bit strings is generally preferred. + template <int N, typename Q = T, + typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void AppendLiteral(const incompatible_char_type (&aStr)[N]) { + AppendASCII(aStr, N - 1); + } + + // Only enable for T = char16_t + template <int N, typename Q = T, + typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + [[nodiscard]] bool AppendLiteral(const incompatible_char_type (&aStr)[N], + const fallible_t& aFallible) { + return AppendASCII(aStr, N - 1, aFallible); + } + + /** + * Append a formatted string to the current string. Uses the + * standard printf format codes. This uses NSPR formatting, which will be + * locale-aware for floating-point values. You probably don't want to use + * this with floating-point values as a result. + */ + void AppendPrintf(const char* aFormat, ...) MOZ_FORMAT_PRINTF(2, 3); + void AppendVprintf(const char* aFormat, va_list aAp) MOZ_FORMAT_PRINTF(2, 0); + void AppendInt(int32_t aInteger) { AppendIntDec(aInteger); } + void AppendInt(int32_t aInteger, int aRadix) { + if (aRadix == 10) { + AppendIntDec(aInteger); + } else if (aRadix == 8) { + AppendIntOct(static_cast<uint32_t>(aInteger)); + } else { + AppendIntHex(static_cast<uint32_t>(aInteger)); + } + } + void AppendInt(uint32_t aInteger) { AppendIntDec(aInteger); } + void AppendInt(uint32_t aInteger, int aRadix) { + if (aRadix == 10) { + AppendIntDec(aInteger); + } else if (aRadix == 8) { + AppendIntOct(aInteger); + } else { + AppendIntHex(aInteger); + } + } + void AppendInt(int64_t aInteger) { AppendIntDec(aInteger); } + void AppendInt(int64_t aInteger, int aRadix) { + if (aRadix == 10) { + AppendIntDec(aInteger); + } else if (aRadix == 8) { + AppendIntOct(static_cast<uint64_t>(aInteger)); + } else { + AppendIntHex(static_cast<uint64_t>(aInteger)); + } + } + void AppendInt(uint64_t aInteger) { AppendIntDec(aInteger); } + void AppendInt(uint64_t aInteger, int aRadix) { + if (aRadix == 10) { + AppendIntDec(aInteger); + } else if (aRadix == 8) { + AppendIntOct(aInteger); + } else { + AppendIntHex(aInteger); + } + } + + private: + void AppendIntDec(int32_t); + void AppendIntDec(uint32_t); + void AppendIntOct(uint32_t); + void AppendIntHex(uint32_t); + void AppendIntDec(int64_t); + void AppendIntDec(uint64_t); + void AppendIntOct(uint64_t); + void AppendIntHex(uint64_t); + + public: + /** + * Append the given float to this string + */ + void NS_FASTCALL AppendFloat(float aFloat); + void NS_FASTCALL AppendFloat(double aFloat); + + self_type& operator+=(char_type aChar) { + Append(aChar); + return *this; + } + self_type& operator+=(const char_type* aData) { + Append(aData); + return *this; + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + self_type& operator+=(char16ptr_t aData) { + Append(aData); + return *this; + } +#endif + self_type& operator+=(const self_type& aStr) { + Append(aStr); + return *this; + } + self_type& operator+=(const substring_tuple_type& aTuple) { + Append(aTuple); + return *this; + } + + void Insert(char_type aChar, index_type aPos) { Replace(aPos, 0, aChar); } + void Insert(const char_type* aData, index_type aPos, + size_type aLength = size_type(-1)) { + Replace(aPos, 0, aData, aLength); + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void Insert(char16ptr_t aData, index_type aPos, + size_type aLength = size_type(-1)) { + Insert(static_cast<const char16_t*>(aData), aPos, aLength); + } +#endif + void Insert(const self_type& aStr, index_type aPos) { + Replace(aPos, 0, aStr); + } + void Insert(const substring_tuple_type& aTuple, index_type aPos) { + Replace(aPos, 0, aTuple); + } + + // InsertLiteral must ONLY be called with an actual literal string, or + // a character array *constant* of static storage duration declared + // without an explicit size and with an initializer that is a string + // literal or is otherwise null-terminated. + // Use Insert for other character array variables. + template <int N> + void InsertLiteral(const char_type (&aStr)[N], index_type aPos) { + ReplaceLiteral(aPos, 0, aStr, N - 1); + } + + void Cut(index_type aCutStart, size_type aCutLength) { + Replace(aCutStart, aCutLength, char_traits::sEmptyBuffer, 0); + } + + nsTSubstringSplitter<T> Split(const char_type aChar) const; + + /** + * buffer sizing + */ + + /** + * Attempts to set the capacity to the given size in number of + * code units without affecting the length of the string in + * order to avoid reallocation during a subsequent sequence of + * appends. + * + * This method is appropriate to use before a sequence of multiple + * operations from the following list (without operations that are + * not on the list between the SetCapacity() call and operations + * from the list): + * + * Append() + * AppendASCII() + * AppendLiteral() (except if the string is empty: bug 1487606) + * AppendPrintf() + * AppendInt() + * AppendFloat() + * LossyAppendUTF16toASCII() + * AppendASCIItoUTF16() + * + * DO NOT call SetCapacity() if the subsequent operations on the + * string do not meet the criteria above. Operations that undo + * the benefits of SetCapacity() include but are not limited to: + * + * SetLength() + * Truncate() + * Assign() + * AssignLiteral() + * Adopt() + * CopyASCIItoUTF16() + * LossyCopyUTF16toASCII() + * AppendUTF16toUTF8() + * AppendUTF8toUTF16() + * CopyUTF16toUTF8() + * CopyUTF8toUTF16() + * + * If your string is an nsAuto[C]String and you are calling + * SetCapacity() with a constant N, please instead declare the + * string as nsAuto[C]StringN<N+1> without calling SetCapacity(). + * + * There is no need to include room for the null terminator: it is + * the job of the string class. + * + * Note: Calling SetCapacity() does not give you permission to + * use the pointer obtained from BeginWriting() to write + * past the current length (as returned by Length()) of the + * string. Please use either BulkWrite() or SetLength() + * instead. + * + * Note: SetCapacity() won't make the string shorter if + * called with an argument smaller than the length of the + * string. + * + * Note: You must not use previously obtained iterators + * or spans after calling SetCapacity(). + */ + void NS_FASTCALL SetCapacity(size_type aNewCapacity); + [[nodiscard]] bool NS_FASTCALL SetCapacity(size_type aNewCapacity, + const fallible_t&); + + /** + * Changes the logical length of the string, potentially + * allocating a differently-sized buffer for the string. + * + * When making the string shorter, this method never + * reports allocation failure. + * + * Exposes uninitialized memory if the string got longer. + * + * If called with the argument 0, releases the + * heap-allocated buffer, if any. (But the no-argument + * overload of Truncate() is a more idiomatic and efficient + * option than SetLength(0).) + * + * Note: You must not use previously obtained iterators + * or spans after calling SetLength(). + */ + void NS_FASTCALL SetLength(size_type aNewLength); + [[nodiscard]] bool NS_FASTCALL SetLength(size_type aNewLength, + const fallible_t&); + + /** + * Like SetLength() but asserts in that the string + * doesn't become longer. Never fails, so doesn't need a + * fallible variant. + * + * Note: You must not use previously obtained iterators + * or spans after calling Truncate(). + */ + void Truncate(size_type aNewLength) { + MOZ_RELEASE_ASSERT(aNewLength <= base_string_type::mLength, + "Truncate cannot make string longer"); + mozilla::DebugOnly<bool> success = SetLength(aNewLength, mozilla::fallible); + MOZ_ASSERT(success); + } + + /** + * A more efficient overload for Truncate(0). Releases the + * heap-allocated buffer if any. + */ + void Truncate(); + + /** + * buffer access + */ + + /** + * Get a const pointer to the string's internal buffer. The caller + * MUST NOT modify the characters at the returned address. + * + * @returns The length of the buffer in characters. + */ + inline size_type GetData(const char_type** aData) const { + *aData = base_string_type::mData; + return base_string_type::mLength; + } + + /** + * Get a pointer to the string's internal buffer, optionally resizing + * the buffer first. If size_type(-1) is passed for newLen, then the + * current length of the string is used. The caller MAY modify the + * characters at the returned address (up to but not exceeding the + * length of the string). + * + * @returns The length of the buffer in characters or 0 if unable to + * satisfy the request due to low-memory conditions. + */ + size_type GetMutableData(char_type** aData, + size_type aNewLen = size_type(-1)) { + if (!EnsureMutable(aNewLen)) { + AllocFailed(aNewLen == size_type(-1) ? base_string_type::mLength + : aNewLen); + } + + *aData = base_string_type::mData; + return base_string_type::mLength; + } + + size_type GetMutableData(char_type** aData, size_type aNewLen, + const fallible_t&) { + if (!EnsureMutable(aNewLen)) { + *aData = nullptr; + return 0; + } + + *aData = base_string_type::mData; + return base_string_type::mLength; + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + size_type GetMutableData(wchar_t** aData, size_type aNewLen = size_type(-1)) { + return GetMutableData(reinterpret_cast<char16_t**>(aData), aNewLen); + } + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + size_type GetMutableData(wchar_t** aData, size_type aNewLen, + const fallible_t& aFallible) { + return GetMutableData(reinterpret_cast<char16_t**>(aData), aNewLen, + aFallible); + } +#endif + + /** + * Span integration + */ + + operator mozilla::Span<char_type>() { + return mozilla::Span{BeginWriting(), base_string_type::Length()}; + } + + operator mozilla::Span<const char_type>() const { + return mozilla::Span{base_string_type::BeginReading(), + base_string_type::Length()}; + } + + void Append(mozilla::Span<const char_type> aSpan) { + auto len = aSpan.Length(); + MOZ_RELEASE_ASSERT(len <= std::numeric_limits<size_type>::max()); + Append(aSpan.Elements(), len); + } + + [[nodiscard]] bool Append(mozilla::Span<const char_type> aSpan, + const fallible_t& aFallible) { + auto len = aSpan.Length(); + if (len > std::numeric_limits<size_type>::max()) { + return false; + } + return Append(aSpan.Elements(), len, aFallible); + } + + void NS_FASTCALL AssignASCII(mozilla::Span<const char> aData) { + AssignASCII(aData.Elements(), aData.Length()); + } + [[nodiscard]] bool NS_FASTCALL AssignASCII(mozilla::Span<const char> aData, + const fallible_t& aFallible) { + return AssignASCII(aData.Elements(), aData.Length(), aFallible); + } + + void AppendASCII(mozilla::Span<const char> aData) { + AppendASCII(aData.Elements(), aData.Length()); + } + + template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> + operator mozilla::Span<uint8_t>() { + return mozilla::Span{reinterpret_cast<uint8_t*>(BeginWriting()), + base_string_type::Length()}; + } + + template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> + operator mozilla::Span<const uint8_t>() const { + return mozilla::Span{ + reinterpret_cast<const uint8_t*>(base_string_type::BeginReading()), + base_string_type::Length()}; + } + + template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> + void Append(mozilla::Span<const uint8_t> aSpan) { + auto len = aSpan.Length(); + MOZ_RELEASE_ASSERT(len <= std::numeric_limits<size_type>::max()); + Append(reinterpret_cast<const char*>(aSpan.Elements()), len); + } + + template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> + [[nodiscard]] bool Append(mozilla::Span<const uint8_t> aSpan, + const fallible_t& aFallible) { + auto len = aSpan.Length(); + if (len > std::numeric_limits<size_type>::max()) { + return false; + } + return Append(reinterpret_cast<const char*>(aSpan.Elements()), len, + aFallible); + } + + /** + * string data is never null, but can be marked void. if true, the + * string will be truncated. @see nsTSubstring::IsVoid + */ + + void NS_FASTCALL SetIsVoid(bool); + + /** + * This method is used to remove all occurrences of aChar from this + * string. + * + * @param aChar -- char to be stripped + */ + + void StripChar(char_type aChar); + + /** + * This method is used to remove all occurrences of aChars from this + * string. + * + * @param aChars -- chars to be stripped + */ + + void StripChars(const char_type* aChars); + + /** + * This method is used to remove all occurrences of some characters this + * from this string. The characters removed have the corresponding + * entries in the bool array set to true; we retain all characters + * with code beyond 127. + * THE CALLER IS RESPONSIBLE for making sure the complete boolean + * array, 128 entries, is properly initialized. + * + * See also: ASCIIMask class. + * + * @param aToStrip -- Array where each entry is true if the + * corresponding ASCII character is to be stripped. All + * characters beyond code 127 are retained. Note that this + * parameter is of ASCIIMaskArray type, but we expand the typedef + * to avoid having to include nsASCIIMask.h in this include file + * as it brings other includes. + */ + void StripTaggedASCII(const std::array<bool, 128>& aToStrip); + + /** + * A shortcut to strip \r and \n. + */ + void StripCRLF(); + + /** + * If the string uses a shared buffer, this method + * clears the pointer without releasing the buffer. + */ + void ForgetSharedBuffer() { + if (base_string_type::mDataFlags & DataFlags::REFCOUNTED) { + SetToEmptyBuffer(); + } + } + + protected: + void AssertValid() { + MOZ_ASSERT(!(this->mClassFlags & ClassFlags::NULL_TERMINATED) || + (this->mDataFlags & DataFlags::TERMINATED), + "String classes whose static type guarantees a null-terminated " + "buffer must not be assigned a non-null-terminated buffer."); + } + + public: + /** + * this is public to support automatic conversion of tuple to string + * base type, which helps avoid converting to nsTAString. + */ + MOZ_IMPLICIT nsTSubstring(const substring_tuple_type& aTuple) + : base_string_type(nullptr, 0, DataFlags(0), ClassFlags(0)) { + AssertValid(); + Assign(aTuple); + } + + size_t SizeOfExcludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const; + size_t SizeOfIncludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const; + + /** + * WARNING: Only use these functions if you really know what you are + * doing, because they can easily lead to double-counting strings. If + * you do use them, please explain clearly in a comment why it's safe + * and won't lead to double-counting. + */ + size_t SizeOfExcludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const; + size_t SizeOfIncludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const; + + template <class N> + void NS_ABORT_OOM(T) { + struct never {}; // a compiler-friendly way to do static_assert(false) + static_assert( + std::is_same_v<N, never>, + "In string classes, use AllocFailed to account for sizeof(char_type). " + "Use the global ::NS_ABORT_OOM if you really have a count of bytes."); + } + + MOZ_ALWAYS_INLINE void AllocFailed(size_t aLength) { + ::NS_ABORT_OOM(aLength * sizeof(char_type)); + } + + protected: + // default initialization + nsTSubstring() + : base_string_type(char_traits::sEmptyBuffer, 0, DataFlags::TERMINATED, + ClassFlags(0)) { + AssertValid(); + } + + // copy-constructor, constructs as dependent on given object + // (NOTE: this is for internal use only) + nsTSubstring(const self_type& aStr) + : base_string_type(aStr.base_string_type::mData, + aStr.base_string_type::mLength, + aStr.base_string_type::mDataFlags & + (DataFlags::TERMINATED | DataFlags::VOIDED), + ClassFlags(0)) { + AssertValid(); + } + + // initialization with ClassFlags + explicit nsTSubstring(ClassFlags aClassFlags) + : base_string_type(char_traits::sEmptyBuffer, 0, DataFlags::TERMINATED, + aClassFlags) { + AssertValid(); + } + + /** + * allows for direct initialization of a nsTSubstring object. + */ + nsTSubstring(char_type* aData, size_type aLength, DataFlags aDataFlags, + ClassFlags aClassFlags) +// XXXbz or can I just include nscore.h and use NS_BUILD_REFCNT_LOGGING? +#if defined(DEBUG) || defined(FORCE_BUILD_REFCNT_LOGGING) +# define XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE + ; +#else +# undef XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE + : base_string_type(aData, aLength, aDataFlags, aClassFlags) { + AssertValid(); + MOZ_RELEASE_ASSERT(CheckCapacity(aLength), "String is too large."); + } +#endif /* DEBUG || FORCE_BUILD_REFCNT_LOGGING */ + + void SetToEmptyBuffer() { + base_string_type::mData = char_traits::sEmptyBuffer; + base_string_type::mLength = 0; + base_string_type::mDataFlags = DataFlags::TERMINATED; + AssertValid(); + } + + void SetData(char_type* aData, size_type aLength, DataFlags aDataFlags) { + base_string_type::mData = aData; + base_string_type::mLength = aLength; + base_string_type::mDataFlags = aDataFlags; + AssertValid(); + } + + /** + * this function releases mData and does not change the value of + * any of its member variables. in other words, this function acts + * like a destructor. + */ + void NS_FASTCALL Finalize(); + + public: + /** + * Starts a low-level write transaction to the string. + * + * Prepares the string for mutation such that the capacity + * of the string is at least aCapacity. The returned handle + * exposes the actual, potentially larger, capacity. + * + * If meeting the capacity or mutability requirement requires + * reallocation, aPrefixToPreserve code units are copied from the + * start of the old buffer to the start of the new buffer. + * aPrefixToPreserve must not be greater than the string's current + * length or greater than aCapacity. + * + * aAllowShrinking indicates whether an allocation may be + * performed when the string is already mutable and the requested + * capacity is smaller than the current capacity. + * + * If this method returns successfully, you must not access + * the string except through the returned BulkWriteHandle + * until either the BulkWriteHandle goes out of scope or + * you call Finish() on the BulkWriteHandle. + * + * Compared to SetLength() and BeginWriting(), this more + * complex API accomplishes two things: + * 1) It exposes the actual capacity which may be larger + * than the requested capacity, which is useful in some + * multi-step write operations that don't allocate for + * the worst case up front. + * 2) It writes the zero terminator after the string + * content has been written, which results in a + * cache-friendly linear write pattern. + */ + mozilla::Result<mozilla::BulkWriteHandle<T>, nsresult> NS_FASTCALL BulkWrite( + size_type aCapacity, size_type aPrefixToPreserve, bool aAllowShrinking); + + /** + * THIS IS NOT REALLY A PUBLIC METHOD! DO NOT CALL FROM OUTSIDE + * THE STRING IMPLEMENTATION. (It's public only because friend + * declarations don't allow extern or static and this needs to + * be called from Rust FFI glue.) + * + * Prepares mData to be mutated such that the capacity of the string + * (not counting the zero-terminator) is at least aCapacity. + * Returns the actual capacity, which may be larger than what was + * requested or Err(NS_ERROR_OUT_OF_MEMORY) on allocation failure. + * + * mLength is ignored by this method. If the buffer is reallocated, + * aUnitsToPreserve specifies how many code units to copy over to + * the new buffer. The old buffer is freed if applicable. + * + * Unless the return value is Err(NS_ERROR_OUT_OF_MEMORY) to signal + * failure or 0 to signal that the string has been set to + * the special empty state, this method leaves the string in an + * invalid state! The caller is responsible for calling + * FinishBulkWrite() (or in Rust calling + * nsA[C]StringBulkWriteHandle::finish()), which put the string + * into a valid state by setting mLength and zero-terminating. + * This method sets the flag to claim that the string is + * zero-terminated before it actually is. + * + * Once this method has been called and before FinishBulkWrite() + * has been called, only accessing mData or calling this method + * again are valid operations. Do not call any other methods or + * access other fields between calling this method and + * FinishBulkWrite(). + * + * @param aCapacity The requested capacity. The return value + * will be greater than or equal to this value. + * @param aPrefixToPreserve The number of code units at the start + * of the old buffer to copy into the + * new buffer. + * @parem aAllowShrinking If true, an allocation may be performed + * if the requested capacity is smaller + * than the current capacity. + * @param aSuffixLength The length, in code units, of a suffix + * to move. + * @param aOldSuffixStart The old start index of the suffix to + * move. + * @param aNewSuffixStart The new start index of the suffix to + * move. + * + */ + mozilla::Result<uint32_t, nsresult> NS_FASTCALL StartBulkWriteImpl( + size_type aCapacity, size_type aPrefixToPreserve = 0, + bool aAllowShrinking = true, size_type aSuffixLength = 0, + size_type aOldSuffixStart = 0, size_type aNewSuffixStart = 0); + + private: + void AssignOwned(self_type&& aStr); + bool AssignNonDependent(const substring_tuple_type& aTuple, + size_type aTupleLength, + const mozilla::fallible_t& aFallible); + + /** + * Do not call this except from within FinishBulkWriteImpl() and + * SetCapacity(). + */ + MOZ_ALWAYS_INLINE void NS_FASTCALL + FinishBulkWriteImplImpl(size_type aLength) { + base_string_type::mData[aLength] = char_type(0); + base_string_type::mLength = aLength; +#ifdef DEBUG + // ifdefed in order to avoid the call to Capacity() in non-debug + // builds. + // + // Our string is mutable, so Capacity() doesn't return zero. + // Capacity() doesn't include the space for the zero terminator, + // but we want to unitialize that slot, too. Since we start + // counting after the zero terminator the we just wrote above, + // we end up overwriting the space for terminator not reflected + // in the capacity number. + char_traits::uninitialize( + base_string_type::mData + aLength + 1, + XPCOM_MIN(size_t(Capacity() - aLength), kNsStringBufferMaxPoison)); +#endif + } + + protected: + /** + * Restores the string to a valid state after a call to StartBulkWrite() + * that returned a non-error result. The argument to this method + * must be less than or equal to the value returned by the most recent + * StartBulkWrite() call. + */ + void NS_FASTCALL FinishBulkWriteImpl(size_type aLength); + + /** + * this function prepares a section of mData to be modified. if + * necessary, this function will reallocate mData and possibly move + * existing data to open up the specified section. + * + * @param aCutStart specifies the starting offset of the section + * @param aCutLength specifies the length of the section to be replaced + * @param aNewLength specifies the length of the new section + * + * for example, suppose mData contains the string "abcdef" then + * + * ReplacePrep(2, 3, 4); + * + * would cause mData to look like "ab____f" where the characters + * indicated by '_' have an unspecified value and can be freely + * modified. this function will null-terminate mData upon return. + * + * this function returns false if is unable to allocate sufficient + * memory. + */ + [[nodiscard]] bool ReplacePrep(index_type aCutStart, size_type aCutLength, + size_type aNewLength); + + [[nodiscard]] bool NS_FASTCALL ReplacePrepInternal(index_type aCutStart, + size_type aCutLength, + size_type aNewFragLength, + size_type aNewTotalLength); + + /** + * returns the number of writable storage units starting at mData. + * the value does not include space for the null-terminator character. + * + * NOTE: this function returns 0 if mData is immutable (or the buffer + * is 0-sized). + */ + size_type NS_FASTCALL Capacity() const; + + /** + * this helper function can be called prior to directly manipulating + * the contents of mData. see, for example, BeginWriting. + */ + [[nodiscard]] bool NS_FASTCALL + EnsureMutable(size_type aNewLen = size_type(-1)); + + /** + * Checks if the given capacity is valid for this string type. + */ + [[nodiscard]] static bool CheckCapacity(size_type aCapacity) { + if (aCapacity > kMaxCapacity) { + // Also assert for |aCapacity| equal to |size_type(-1)|, since we used to + // use that value to flag immutability. + NS_ASSERTION(aCapacity != size_type(-1), "Bogus capacity"); + return false; + } + + return true; + } + + void NS_FASTCALL ReplaceLiteral(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength); + + static const size_type kMaxCapacity; + + public: + // NOTE: this method is declared public _only_ for convenience for + // callers who don't have access to the original nsLiteralString_CharT. + void NS_FASTCALL AssignLiteral(const char_type* aData, size_type aLength); +}; + +extern template class nsTSubstring<char>; +extern template class nsTSubstring<char16_t>; + +static_assert(sizeof(nsTSubstring<char>) == + sizeof(mozilla::detail::nsTStringRepr<char>), + "Don't add new data fields to nsTSubstring_CharT. " + "Add to nsTStringRepr<T> instead."); + +#include "nsCharSeparatedTokenizer.h" +#include "nsTDependentSubstring.h" + +/** + * Span integration + */ +namespace mozilla { +Span(nsTSubstring<char>&)->Span<char>; +Span(const nsTSubstring<char>&)->Span<const char>; +Span(nsTSubstring<char16_t>&)->Span<char16_t>; +Span(const nsTSubstring<char16_t>&)->Span<const char16_t>; + +} // namespace mozilla + +#endif diff --git a/xpcom/string/nsTSubstringTuple.cpp b/xpcom/string/nsTSubstringTuple.cpp new file mode 100644 index 0000000000..2521799f75 --- /dev/null +++ b/xpcom/string/nsTSubstringTuple.cpp @@ -0,0 +1,92 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTSubstringTuple.h" +#include "mozilla/CheckedInt.h" + +/** + * computes the aggregate string length + */ + +template <typename T> +typename nsTSubstringTuple<T>::size_type nsTSubstringTuple<T>::Length() const { + mozilla::CheckedInt<size_type> len; + if (mHead) { + len = mHead->Length(); + } else { + len = mFragA->Length(); + } + + len += mFragB->Length(); + MOZ_RELEASE_ASSERT(len.isValid(), "Substring tuple length is invalid"); + return len.value(); +} + +/** + * writes the aggregate string to the given buffer. aBufLen is assumed + * to be equal to or greater than the value returned by the Length() + * method. the string written to |aBuf| is not null-terminated. + */ + +template <typename T> +void nsTSubstringTuple<T>::WriteTo(char_type* aBuf, uint32_t aBufLen) const { + MOZ_RELEASE_ASSERT(aBufLen >= mFragB->Length(), "buffer too small"); + uint32_t headLen = aBufLen - mFragB->Length(); + if (mHead) { + mHead->WriteTo(aBuf, headLen); + } else { + MOZ_RELEASE_ASSERT(mFragA->Length() == headLen, "buffer incorrectly sized"); + char_traits::copy(aBuf, mFragA->Data(), mFragA->Length()); + } + + char_traits::copy(aBuf + headLen, mFragB->Data(), mFragB->Length()); +} + +/** + * returns true if this tuple is dependent on (i.e., overlapping with) + * the given char sequence. + */ + +template <typename T> +bool nsTSubstringTuple<T>::IsDependentOn(const char_type* aStart, + const char_type* aEnd) const { + // we start with the right-most fragment since it is faster to check. + + if (mFragB->IsDependentOn(aStart, aEnd)) { + return true; + } + + if (mHead) { + return mHead->IsDependentOn(aStart, aEnd); + } + + return mFragA->IsDependentOn(aStart, aEnd); +} + +template <typename T> +auto nsTSubstringTuple<T>::IsDependentOnWithLength(const char_type* aStart, + const char_type* aEnd) const + -> std::pair<bool, size_type> { + // we start with the right-most fragment since it is faster to check for + // dependency. + const bool rightDependentOn = mFragB->IsDependentOn(aStart, aEnd); + + if (rightDependentOn) { + return {true, Length()}; + } + + const auto [leftDependentOn, leftLen] = + mHead ? mHead->IsDependentOnWithLength(aStart, aEnd) + : std::pair{mFragA->IsDependentOn(aStart, aEnd), mFragA->Length()}; + + const auto checkedLen = + mozilla::CheckedInt<size_type>{leftLen} + mFragB->Length(); + MOZ_RELEASE_ASSERT(checkedLen.isValid(), "Substring tuple length is invalid"); + return {leftDependentOn, checkedLen.value()}; +} + +template class nsTSubstringTuple<char>; +template class nsTSubstringTuple<char16_t>; diff --git a/xpcom/string/nsTSubstringTuple.h b/xpcom/string/nsTSubstringTuple.h new file mode 100644 index 0000000000..727acd2ef1 --- /dev/null +++ b/xpcom/string/nsTSubstringTuple.h @@ -0,0 +1,90 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsTSubstringTuple_h +#define nsTSubstringTuple_h + +#include "mozilla/Attributes.h" +#include "nsTStringRepr.h" + +/** + * nsTSubstringTuple + * + * Represents a tuple of string fragments. Built as a recursive binary tree. + * It is used to implement the concatenation of two or more string objects. + * + * NOTE: This class is a private implementation detail and should never be + * referenced outside the string code. + */ +template <typename T> +class MOZ_TEMPORARY_CLASS nsTSubstringTuple { + public: + typedef T char_type; + typedef nsCharTraits<char_type> char_traits; + + typedef nsTSubstringTuple<T> self_type; + typedef mozilla::detail::nsTStringRepr<char_type> base_string_type; + typedef uint32_t size_type; + + public: + nsTSubstringTuple(const base_string_type* aStrA, + const base_string_type* aStrB) + : mHead(nullptr), mFragA(aStrA), mFragB(aStrB) {} + + nsTSubstringTuple(const self_type& aHead, const base_string_type* aStrB) + : mHead(&aHead), + mFragA(nullptr) // this fragment is ignored when aHead != nullptr + , + mFragB(aStrB) {} + + /** + * computes the aggregate string length + */ + size_type Length() const; + + /** + * writes the aggregate string to the given buffer. bufLen is assumed + * to be equal to or greater than the value returned by the Length() + * method. the string written to |buf| is not null-terminated. + */ + void WriteTo(char_type* aBuf, uint32_t aBufLen) const; + + /** + * returns true if this tuple is dependent on (i.e., overlapping with) + * the given char sequence. + */ + bool IsDependentOn(const char_type* aStart, const char_type* aEnd) const; + + /** + * returns a pair of the results of IsDependentOn() and Length(). This is more + * efficient than calling both functions subsequently, as this traverses the + * tree only once. + */ + std::pair<bool, size_type> IsDependentOnWithLength( + const char_type* aStart, const char_type* aEnd) const; + + private: + const self_type* const mHead; + const base_string_type* const mFragA; + const base_string_type* const mFragB; +}; + +template <typename T> +inline const nsTSubstringTuple<T> operator+( + const mozilla::detail::nsTStringRepr<T>& aStrA, + const mozilla::detail::nsTStringRepr<T>& aStrB) { + return nsTSubstringTuple<T>(&aStrA, &aStrB); +} + +template <typename T> +inline const nsTSubstringTuple<T> operator+( + const nsTSubstringTuple<T>& aHead, + const mozilla::detail::nsTStringRepr<T>& aStrB) { + return nsTSubstringTuple<T>(aHead, &aStrB); +} + +#endif diff --git a/xpcom/string/nsTextFormatter.cpp b/xpcom/string/nsTextFormatter.cpp new file mode 100644 index 0000000000..c739cd152b --- /dev/null +++ b/xpcom/string/nsTextFormatter.cpp @@ -0,0 +1,906 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Portable safe sprintf code. + * + * Code based on mozilla/nsprpub/src/io/prprf.c rev 3.7 + * + * Contributor(s): + * Kipp E.B. Hickman <kipp@netscape.com> (original author) + * Frank Yung-Fong Tang <ftang@netscape.com> + * Daniele Nicolodi <daniele@grinta.net> + */ + +/* + * Copied from xpcom/ds/nsTextFormatter.cpp r1.22 + * Changed to use nsMemory and Frozen linkage + * -- Prasad <prasad@medhas.org> + */ + +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include "prdtoa.h" +#include "mozilla/Logging.h" +#include "mozilla/Sprintf.h" +#include "nsCRTGlue.h" +#include "nsTextFormatter.h" +#include "nsMemory.h" + +struct nsTextFormatter::SprintfStateStr { + int (*stuff)(SprintfStateStr* aState, const char16_t* aStr, uint32_t aLen); + + char16_t* base; + char16_t* cur; + uint32_t maxlen; + + void* stuffclosure; +}; + +#define _LEFT 0x1 +#define _SIGNED 0x2 +#define _SPACED 0x4 +#define _ZEROS 0x8 +#define _NEG 0x10 +#define _UNSIGNED 0x20 + +#define ELEMENTS_OF(array_) (sizeof(array_) / sizeof(array_[0])) + +/* +** Fill into the buffer using the data in src +*/ +int nsTextFormatter::fill2(SprintfStateStr* aState, const char16_t* aSrc, + int aSrcLen, int aWidth, int aFlags) { + char16_t space = ' '; + int rv; + + aWidth -= aSrcLen; + /* Right adjusting */ + if ((aWidth > 0) && ((aFlags & _LEFT) == 0)) { + if (aFlags & _ZEROS) { + space = '0'; + } + while (--aWidth >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + } + + /* Copy out the source data */ + rv = (*aState->stuff)(aState, aSrc, aSrcLen); + if (rv < 0) { + return rv; + } + + /* Left adjusting */ + if ((aWidth > 0) && ((aFlags & _LEFT) != 0)) { + while (--aWidth >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + } + return 0; +} + +/* +** Fill a number. The order is: optional-sign zero-filling conversion-digits +*/ +int nsTextFormatter::fill_n(nsTextFormatter::SprintfStateStr* aState, + const char16_t* aSrc, int aSrcLen, int aWidth, + int aPrec, int aFlags) { + int zerowidth = 0; + int precwidth = 0; + int signwidth = 0; + int leftspaces = 0; + int rightspaces = 0; + int cvtwidth; + int rv; + char16_t sign; + char16_t space = ' '; + char16_t zero = '0'; + + if ((aFlags & _UNSIGNED) == 0) { + if (aFlags & _NEG) { + sign = '-'; + signwidth = 1; + } else if (aFlags & _SIGNED) { + sign = '+'; + signwidth = 1; + } else if (aFlags & _SPACED) { + sign = ' '; + signwidth = 1; + } + } + cvtwidth = signwidth + aSrcLen; + + if (aPrec > 0) { + if (aPrec > aSrcLen) { + /* Need zero filling */ + precwidth = aPrec - aSrcLen; + cvtwidth += precwidth; + } + } + + if ((aFlags & _ZEROS) && (aPrec < 0)) { + if (aWidth > cvtwidth) { + /* Zero filling */ + zerowidth = aWidth - cvtwidth; + cvtwidth += zerowidth; + } + } + + if (aFlags & _LEFT) { + if (aWidth > cvtwidth) { + /* Space filling on the right (i.e. left adjusting) */ + rightspaces = aWidth - cvtwidth; + } + } else { + if (aWidth > cvtwidth) { + /* Space filling on the left (i.e. right adjusting) */ + leftspaces = aWidth - cvtwidth; + } + } + while (--leftspaces >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + if (signwidth) { + rv = (*aState->stuff)(aState, &sign, 1); + if (rv < 0) { + return rv; + } + } + while (--precwidth >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + while (--zerowidth >= 0) { + rv = (*aState->stuff)(aState, &zero, 1); + if (rv < 0) { + return rv; + } + } + rv = (*aState->stuff)(aState, aSrc, aSrcLen); + if (rv < 0) { + return rv; + } + while (--rightspaces >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + return 0; +} + +/* +** Convert a 64-bit integer into its printable form +*/ +int nsTextFormatter::cvt_ll(SprintfStateStr* aState, uint64_t aNum, int aWidth, + int aPrec, int aRadix, int aFlags, + const char16_t* aHexStr) { + char16_t cvtbuf[100]; + char16_t* cvt; + int digits; + + /* according to the man page this needs to happen */ + if (aPrec == 0 && aNum == 0) { + return 0; + } + + /* + ** Converting decimal is a little tricky. In the unsigned case we + ** need to stop when we hit 10 digits. In the signed case, we can + ** stop when the number is zero. + */ + cvt = &cvtbuf[0] + ELEMENTS_OF(cvtbuf); + digits = 0; + while (aNum != 0) { + uint64_t quot = aNum / aRadix; + uint64_t rem = aNum % aRadix; + *--cvt = aHexStr[rem & 0xf]; + digits++; + aNum = quot; + } + if (digits == 0) { + *--cvt = '0'; + digits++; + } + + /* + ** Now that we have the number converted without its sign, deal with + ** the sign and zero padding. + */ + return fill_n(aState, cvt, digits, aWidth, aPrec, aFlags); +} + +/* +** Convert a double precision floating point number into its printable +** form. +*/ +int nsTextFormatter::cvt_f(SprintfStateStr* aState, double aDouble, int aWidth, + int aPrec, const char16_t aType, int aFlags) { + int mode = 2; + int decpt; + int sign; + char buf[256]; + char* bufp = buf; + int bufsz = 256; + char num[256]; + char* nump; + char* endnum; + int numdigits = 0; + char exp = 'e'; + + if (aPrec == -1) { + aPrec = 6; + } else if (aPrec > 50) { + // limit precision to avoid PR_dtoa bug 108335 + // and to prevent buffers overflows + aPrec = 50; + } + + switch (aType) { + case 'f': + numdigits = aPrec; + mode = 3; + break; + case 'E': + exp = 'E'; + [[fallthrough]]; + case 'e': + numdigits = aPrec + 1; + mode = 2; + break; + case 'G': + exp = 'E'; + [[fallthrough]]; + case 'g': + if (aPrec == 0) { + aPrec = 1; + } + numdigits = aPrec; + mode = 2; + break; + default: + NS_ERROR("invalid aType passed to cvt_f"); + } + + if (PR_dtoa(aDouble, mode, numdigits, &decpt, &sign, &endnum, num, bufsz) == + PR_FAILURE) { + buf[0] = '\0'; + return -1; + } + numdigits = endnum - num; + nump = num; + + if (sign) { + *bufp++ = '-'; + } else if (aFlags & _SIGNED) { + *bufp++ = '+'; + } + + if (decpt == 9999) { + while ((*bufp++ = *nump++)) { + } + } else { + switch (aType) { + case 'E': + case 'e': + + *bufp++ = *nump++; + if (aPrec > 0) { + *bufp++ = '.'; + while (*nump) { + *bufp++ = *nump++; + aPrec--; + } + while (aPrec-- > 0) { + *bufp++ = '0'; + } + } + *bufp++ = exp; + + ::snprintf(bufp, bufsz - (bufp - buf), "%+03d", decpt - 1); + break; + + case 'f': + + if (decpt < 1) { + *bufp++ = '0'; + if (aPrec > 0) { + *bufp++ = '.'; + while (decpt++ && aPrec-- > 0) { + *bufp++ = '0'; + } + while (*nump && aPrec-- > 0) { + *bufp++ = *nump++; + } + while (aPrec-- > 0) { + *bufp++ = '0'; + } + } + } else { + while (*nump && decpt-- > 0) { + *bufp++ = *nump++; + } + while (decpt-- > 0) { + *bufp++ = '0'; + } + if (aPrec > 0) { + *bufp++ = '.'; + while (*nump && aPrec-- > 0) { + *bufp++ = *nump++; + } + while (aPrec-- > 0) { + *bufp++ = '0'; + } + } + } + *bufp = '\0'; + break; + + case 'G': + case 'g': + + if ((decpt < -3) || ((decpt - 1) >= aPrec)) { + *bufp++ = *nump++; + numdigits--; + if (numdigits > 0) { + *bufp++ = '.'; + while (*nump) { + *bufp++ = *nump++; + } + } + *bufp++ = exp; + ::snprintf(bufp, bufsz - (bufp - buf), "%+03d", decpt - 1); + } else { + if (decpt < 1) { + *bufp++ = '0'; + if (aPrec > 0) { + *bufp++ = '.'; + while (decpt++) { + *bufp++ = '0'; + } + while (*nump) { + *bufp++ = *nump++; + } + } + } else { + while (*nump && decpt-- > 0) { + *bufp++ = *nump++; + numdigits--; + } + while (decpt-- > 0) { + *bufp++ = '0'; + } + if (numdigits > 0) { + *bufp++ = '.'; + while (*nump) { + *bufp++ = *nump++; + } + } + } + *bufp = '\0'; + } + } + } + + char16_t rbuf[256]; + char16_t* rbufp = rbuf; + bufp = buf; + // cast to char16_t + while ((*rbufp++ = *bufp++)) { + } + *rbufp = '\0'; + + return fill2(aState, rbuf, NS_strlen(rbuf), aWidth, aFlags); +} + +/* +** Convert a string into its printable form. |aWidth| is the output +** width. |aPrec| is the maximum number of characters of |aStr| to output, +** where -1 means until NUL. +*/ +int nsTextFormatter::cvt_S(SprintfStateStr* aState, const char16_t* aStr, + int aWidth, int aPrec, int aFlags) { + int slen; + + if (aPrec == 0) { + return 0; + } + + /* Limit string length by precision value */ + slen = aStr ? NS_strlen(aStr) : 6; + if (aPrec > 0) { + if (aPrec < slen) { + slen = aPrec; + } + } + + /* and away we go */ + return fill2(aState, aStr ? aStr : u"(null)", slen, aWidth, aFlags); +} + +/* +** Convert a string into its printable form. |aWidth| is the output +** width. |aPrec| is the maximum number of characters of |aStr| to output, +** where -1 means until NUL. +*/ +int nsTextFormatter::cvt_s(nsTextFormatter::SprintfStateStr* aState, + const char* aStr, int aWidth, int aPrec, + int aFlags) { + // Be sure to handle null the same way as %S. + if (aStr == nullptr) { + return cvt_S(aState, nullptr, aWidth, aPrec, aFlags); + } + NS_ConvertUTF8toUTF16 utf16Val(aStr); + return cvt_S(aState, utf16Val.get(), aWidth, aPrec, aFlags); +} + +/* +** The workhorse sprintf code. +*/ +int nsTextFormatter::dosprintf(SprintfStateStr* aState, const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues) { + static const char16_t space = ' '; + static const char16_t hex[] = u"0123456789abcdef"; + static const char16_t HEX[] = u"0123456789ABCDEF"; + static const BoxedValue emptyString(u""); + + char16_t c; + int flags, width, prec, radix; + + const char16_t* hexp; + + // Next argument for non-numbered arguments. + size_t nextNaturalArg = 0; + // True if we ever saw a numbered argument. + bool sawNumberedArg = false; + + while ((c = *aFmt++) != 0) { + int rv; + + if (c != '%') { + rv = (*aState->stuff)(aState, aFmt - 1, 1); + if (rv < 0) { + return rv; + } + continue; + } + + // Save the location of the "%" in case we decide it isn't a + // format and want to just emit the text from the format string. + const char16_t* percentPointer = aFmt - 1; + + /* + ** Gobble up the % format string. Hopefully we have handled all + ** of the strange cases! + */ + flags = 0; + c = *aFmt++; + if (c == '%') { + /* quoting a % with %% */ + rv = (*aState->stuff)(aState, aFmt - 1, 1); + if (rv < 0) { + return rv; + } + continue; + } + + // Check for a numbered argument. + bool sawWidth = false; + const BoxedValue* thisArg = nullptr; + if (c >= '0' && c <= '9') { + size_t argNumber = 0; + while (c && c >= '0' && c <= '9') { + argNumber = (argNumber * 10) + (c - '0'); + c = *aFmt++; + } + + if (c == '$') { + // Mixing numbered arguments and implicit arguments is + // disallowed. + if (nextNaturalArg > 0) { + return -1; + } + + c = *aFmt++; + + // Numbered arguments start at 1. + --argNumber; + if (argNumber >= aValues.Length()) { + // A correctness issue but not a safety issue. + MOZ_ASSERT(false); + thisArg = &emptyString; + } else { + thisArg = &aValues[argNumber]; + } + sawNumberedArg = true; + } else { + width = argNumber; + sawWidth = true; + } + } + + if (!sawWidth) { + /* + * Examine optional flags. Note that we do not implement the + * '#' flag of sprintf(). The ANSI C spec. of the '#' flag is + * somewhat ambiguous and not ideal, which is perhaps why + * the various sprintf() implementations are inconsistent + * on this feature. + */ + while ((c == '-') || (c == '+') || (c == ' ') || (c == '0')) { + if (c == '-') { + flags |= _LEFT; + } + if (c == '+') { + flags |= _SIGNED; + } + if (c == ' ') { + flags |= _SPACED; + } + if (c == '0') { + flags |= _ZEROS; + } + c = *aFmt++; + } + if (flags & _SIGNED) { + flags &= ~_SPACED; + } + if (flags & _LEFT) { + flags &= ~_ZEROS; + } + + /* width */ + if (c == '*') { + // Not supported with numbered arguments. + if (sawNumberedArg) { + return -1; + } + + if (nextNaturalArg >= aValues.Length() || + !aValues[nextNaturalArg].IntCompatible()) { + // A correctness issue but not a safety issue. + MOZ_ASSERT(false); + width = 0; + } else { + width = aValues[nextNaturalArg++].mValue.mInt; + } + c = *aFmt++; + } else { + width = 0; + while ((c >= '0') && (c <= '9')) { + width = (width * 10) + (c - '0'); + c = *aFmt++; + } + } + } + + /* precision */ + prec = -1; + if (c == '.') { + c = *aFmt++; + if (c == '*') { + // Not supported with numbered arguments. + if (sawNumberedArg) { + return -1; + } + + if (nextNaturalArg >= aValues.Length() || + !aValues[nextNaturalArg].IntCompatible()) { + // A correctness issue but not a safety issue. + MOZ_ASSERT(false); + } else { + prec = aValues[nextNaturalArg++].mValue.mInt; + } + c = *aFmt++; + } else { + prec = 0; + while ((c >= '0') && (c <= '9')) { + prec = (prec * 10) + (c - '0'); + c = *aFmt++; + } + } + } + + // If the argument isn't known yet, find it now. This is done + // after the width and precision code, in case '*' was used. + if (thisArg == nullptr) { + // Mixing numbered arguments and implicit arguments is + // disallowed. + if (sawNumberedArg) { + return -1; + } + + if (nextNaturalArg >= aValues.Length()) { + // A correctness issue but not a safety issue. + MOZ_ASSERT(false); + thisArg = &emptyString; + } else { + thisArg = &aValues[nextNaturalArg++]; + } + } + + /* Size. Defaults to 32 bits. */ + uint64_t mask = UINT32_MAX; + if (c == 'h') { + c = *aFmt++; + mask = UINT16_MAX; + } else if (c == 'L') { + c = *aFmt++; + mask = UINT64_MAX; + } else if (c == 'l') { + c = *aFmt++; + if (c == 'l') { + c = *aFmt++; + mask = UINT64_MAX; + } else { + mask = UINT32_MAX; + } + } + + /* format */ + hexp = hex; + radix = 10; + // Several `MOZ_ASSERT`s below check for argument compatibility + // with the format specifier. These are only debug assertions, + // not release assertions, and exist to catch problems in C++ + // callers of `nsTextFormatter`, as we do not have compile-time + // checking of format strings. In release mode, these assertions + // will be no-ops, and we will fall through to printing the + // argument based on the known type of the argument. + switch (c) { + case 'd': + case 'i': /* decimal/integer */ + MOZ_ASSERT(thisArg->IntCompatible()); + break; + + case 'o': /* octal */ + MOZ_ASSERT(thisArg->IntCompatible()); + radix = 8; + flags |= _UNSIGNED; + break; + + case 'u': /* unsigned decimal */ + MOZ_ASSERT(thisArg->IntCompatible()); + radix = 10; + flags |= _UNSIGNED; + break; + + case 'x': /* unsigned hex */ + MOZ_ASSERT(thisArg->IntCompatible()); + radix = 16; + flags |= _UNSIGNED; + break; + + case 'X': /* unsigned HEX */ + MOZ_ASSERT(thisArg->IntCompatible()); + radix = 16; + hexp = HEX; + flags |= _UNSIGNED; + break; + + case 'e': + case 'E': + case 'f': + case 'g': + case 'G': + MOZ_ASSERT(thisArg->mKind == DOUBLE); + // Type-based printing below. + break; + + case 'S': + MOZ_ASSERT(thisArg->mKind == STRING16); + // Type-based printing below. + break; + + case 's': + MOZ_ASSERT(thisArg->mKind == STRING); + // Type-based printing below. + break; + + case 'c': { + if (!thisArg->IntCompatible()) { + MOZ_ASSERT(false); + // Type-based printing below. + break; + } + + if ((flags & _LEFT) == 0) { + while (width-- > 1) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + } + char16_t ch = thisArg->mValue.mInt; + rv = (*aState->stuff)(aState, &ch, 1); + if (rv < 0) { + return rv; + } + if (flags & _LEFT) { + while (width-- > 1) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + } + } + continue; + + case 'p': + if (!thisArg->PointerCompatible()) { + MOZ_ASSERT(false); + break; + } + static_assert(sizeof(uint64_t) >= sizeof(void*), + "pointers are larger than 64 bits"); + rv = cvt_ll(aState, uintptr_t(thisArg->mValue.mPtr), width, prec, 16, + flags | _UNSIGNED, hexp); + if (rv < 0) { + return rv; + } + continue; + + case 'n': + if (thisArg->mKind != INTPOINTER) { + return -1; + } + + if (thisArg->mValue.mIntPtr != nullptr) { + *thisArg->mValue.mIntPtr = aState->cur - aState->base; + } + continue; + + default: + /* Not a % token after all... skip it */ + rv = (*aState->stuff)(aState, percentPointer, aFmt - percentPointer); + if (rv < 0) { + return rv; + } + continue; + } + + // If we get here, we want to handle the argument according to its + // actual type; modified by the flags as appropriate. + switch (thisArg->mKind) { + case INT: + case UINT: { + int64_t val = thisArg->mValue.mInt; + if ((flags & _UNSIGNED) == 0 && val < 0) { + val = -val; + flags |= _NEG; + } + rv = cvt_ll(aState, uint64_t(val) & mask, width, prec, radix, flags, + hexp); + } break; + case INTPOINTER: + case POINTER: + // Always treat these as unsigned hex, no matter the format. + static_assert(sizeof(uint64_t) >= sizeof(void*), + "pointers are larger than 64 bits"); + rv = cvt_ll(aState, uintptr_t(thisArg->mValue.mPtr), width, prec, 16, + flags | _UNSIGNED, hexp); + break; + case DOUBLE: + if (c != 'f' && c != 'E' && c != 'e' && c != 'G' && c != 'g') { + // Pick some default. + c = 'g'; + } + rv = cvt_f(aState, thisArg->mValue.mDouble, width, prec, c, flags); + break; + case STRING: + rv = cvt_s(aState, thisArg->mValue.mString, width, prec, flags); + break; + case STRING16: + rv = cvt_S(aState, thisArg->mValue.mString16, width, prec, flags); + break; + default: + // Can't happen. + MOZ_ASSERT(0); + } + + if (rv < 0) { + return rv; + } + } + + return 0; +} + +/************************************************************************/ + +int nsTextFormatter::StringStuff(nsTextFormatter::SprintfStateStr* aState, + const char16_t* aStr, uint32_t aLen) { + ptrdiff_t off = aState->cur - aState->base; + + nsAString* str = static_cast<nsAString*>(aState->stuffclosure); + str->Append(aStr, aLen); + + aState->base = str->BeginWriting(); + aState->cur = aState->base + off; + + return 0; +} + +void nsTextFormatter::vssprintf(nsAString& aOut, const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues) { + SprintfStateStr ss; + ss.stuff = StringStuff; + ss.base = 0; + ss.cur = 0; + ss.maxlen = 0; + ss.stuffclosure = &aOut; + + aOut.Truncate(); + dosprintf(&ss, aFmt, aValues); +} + +/* +** Stuff routine that discards overflow data +*/ +int nsTextFormatter::LimitStuff(SprintfStateStr* aState, const char16_t* aStr, + uint32_t aLen) { + uint32_t limit = aState->maxlen - (aState->cur - aState->base); + + if (aLen > limit) { + aLen = limit; + } + while (aLen) { + --aLen; + *aState->cur++ = *aStr++; + } + return 0; +} + +uint32_t nsTextFormatter::vsnprintf(char16_t* aOut, uint32_t aOutLen, + const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues) { + SprintfStateStr ss; + + MOZ_ASSERT((int32_t)aOutLen > 0); + if ((int32_t)aOutLen <= 0) { + return 0; + } + + ss.stuff = LimitStuff; + ss.base = aOut; + ss.cur = aOut; + ss.maxlen = aOutLen; + int result = dosprintf(&ss, aFmt, aValues); + + if (ss.cur == ss.base) { + return 0; + } + + // Append a NUL. However, be sure not to count it in the returned + // length. + if (ss.cur - ss.base >= ptrdiff_t(ss.maxlen)) { + --ss.cur; + } + *ss.cur = '\0'; + + // Check the result now, so that an unterminated string can't + // possibly escape. + if (result < 0) { + return -1; + } + + return ss.cur - ss.base; +} diff --git a/xpcom/string/nsTextFormatter.h b/xpcom/string/nsTextFormatter.h new file mode 100644 index 0000000000..f571043da2 --- /dev/null +++ b/xpcom/string/nsTextFormatter.h @@ -0,0 +1,172 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This code was copied from xpcom/ds/nsTextFormatter r1.3 + * Memory model and Frozen linkage changes only. + * -- Prasad <prasad@medhas.org> + */ + +#ifndef nsTextFormatter_h___ +#define nsTextFormatter_h___ + +/* + ** API for PR printf like routines. Supports the following formats + ** %d - decimal + ** %u - unsigned decimal + ** %x - unsigned hex + ** %X - unsigned uppercase hex + ** %o - unsigned octal + ** %hd, %hu, %hx, %hX, %ho - 16-bit versions of above + ** %ld, %lu, %lx, %lX, %lo - 32-bit versions of above + ** %lld, %llu, %llx, %llX, %llo - 64 bit versions of above + ** %s - utf8 string + ** %S - char16_t string + ** %c - character + ** %p - pointer (deals with machine dependent pointer size) + ** %f - float + ** %g - float + */ +#include <stdio.h> +#include <stdarg.h> +#include "nscore.h" +#include "nsString.h" +#include "mozilla/Span.h" + +#ifdef XPCOM_GLUE +# error \ + "nsTextFormatter is not available in the standalone glue due to NSPR dependencies." +#endif + +class nsTextFormatter { + public: + /* + * sprintf into a fixed size buffer. Guarantees that the buffer is null + * terminated. Returns the length of the written output, NOT including the + * null terminator, or (uint32_t)-1 if an error occurs. + */ + template <typename... T> + static uint32_t snprintf(char16_t* aOut, uint32_t aOutLen, + const char16_t* aFmt, T... aArgs) { + BoxedValue values[] = {BoxedValue(aArgs)...}; + return vsnprintf(aOut, aOutLen, aFmt, + mozilla::Span(values, sizeof...(aArgs))); + } + + /* + * sprintf into an existing nsAString, overwriting any contents it already + * has. Infallible. + */ + template <typename... T> + static void ssprintf(nsAString& aOut, const char16_t* aFmt, T... aArgs) { + BoxedValue values[] = {BoxedValue(aArgs)...}; + vssprintf(aOut, aFmt, mozilla::Span(values, sizeof...(aArgs))); + } + + private: + enum ArgumentKind { + INT, + UINT, + POINTER, + DOUBLE, + STRING, + STRING16, + INTPOINTER, + }; + + union ValueUnion { + int64_t mInt; + uint64_t mUInt; + void const* mPtr; + double mDouble; + char const* mString; + char16_t const* mString16; + int* mIntPtr; + }; + + struct BoxedValue { + ArgumentKind mKind; + ValueUnion mValue; + + explicit BoxedValue(int aArg) : mKind(INT) { mValue.mInt = aArg; } + + explicit BoxedValue(unsigned int aArg) : mKind(UINT) { + mValue.mUInt = aArg; + } + + explicit BoxedValue(long aArg) : mKind(INT) { mValue.mInt = aArg; } + + explicit BoxedValue(unsigned long aArg) : mKind(UINT) { + mValue.mUInt = aArg; + } + + explicit BoxedValue(long long aArg) : mKind(INT) { mValue.mInt = aArg; } + + explicit BoxedValue(unsigned long long aArg) : mKind(UINT) { + mValue.mUInt = aArg; + } + + explicit BoxedValue(const void* aArg) : mKind(POINTER) { + mValue.mPtr = aArg; + } + + explicit BoxedValue(double aArg) : mKind(DOUBLE) { mValue.mDouble = aArg; } + + explicit BoxedValue(const char* aArg) : mKind(STRING) { + mValue.mString = aArg; + } + + explicit BoxedValue(const char16_t* aArg) : mKind(STRING16) { + mValue.mString16 = aArg; + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + explicit BoxedValue(const char16ptr_t aArg) : mKind(STRING16) { + mValue.mString16 = aArg; + } + +#endif + + explicit BoxedValue(int* aArg) : mKind(INTPOINTER) { + mValue.mIntPtr = aArg; + } + + bool IntCompatible() const { return mKind == INT || mKind == UINT; } + + bool PointerCompatible() const { + return mKind == POINTER || mKind == STRING || mKind == STRING16 || + mKind == INTPOINTER; + } + }; + + struct SprintfStateStr; + + static int fill2(SprintfStateStr* aState, const char16_t* aSrc, int aSrcLen, + int aWidth, int aFlags); + static int fill_n(SprintfStateStr* aState, const char16_t* aSrc, int aSrcLen, + int aWidth, int aPrec, int aFlags); + static int cvt_ll(SprintfStateStr* aState, uint64_t aNum, int aWidth, + int aPrec, int aRadix, int aFlags, const char16_t* aHexStr); + static int cvt_f(SprintfStateStr* aState, double aDouble, int aWidth, + int aPrec, const char16_t aType, int aFlags); + static int cvt_S(SprintfStateStr* aState, const char16_t* aStr, int aWidth, + int aPrec, int aFlags); + static int cvt_s(SprintfStateStr* aState, const char* aStr, int aWidth, + int aPrec, int aFlags); + static int dosprintf(SprintfStateStr* aState, const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues); + static int StringStuff(SprintfStateStr* aState, const char16_t* aStr, + uint32_t aLen); + static int LimitStuff(SprintfStateStr* aState, const char16_t* aStr, + uint32_t aLen); + static uint32_t vsnprintf(char16_t* aOut, uint32_t aOutLen, + const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues); + static void vssprintf(nsAString& aOut, const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues); +}; + +#endif /* nsTextFormatter_h___ */ diff --git a/xpcom/string/nsUTF8Utils.h b/xpcom/string/nsUTF8Utils.h new file mode 100644 index 0000000000..0145011ec1 --- /dev/null +++ b/xpcom/string/nsUTF8Utils.h @@ -0,0 +1,247 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef nsUTF8Utils_h_ +#define nsUTF8Utils_h_ + +// NB: This code may be used from non-XPCOM code, in particular, the +// standalone updater executable. That is, this file may be used in +// two ways: if MOZILLA_INTERNAL_API is defined, this file will +// provide signatures for the Mozilla abstract string types. It will +// use XPCOM assertion/debugging macros, etc. + +#include <type_traits> + +#include "mozilla/Assertions.h" +#include "mozilla/EndianUtils.h" + +#include "nsCharTraits.h" + +#ifdef MOZILLA_INTERNAL_API +# define UTF8UTILS_WARNING(msg) NS_WARNING(msg) +#else +# define UTF8UTILS_WARNING(msg) +#endif + +class UTF8traits { + public: + static bool isASCII(char aChar) { return (aChar & 0x80) == 0x00; } + static bool isInSeq(char aChar) { return (aChar & 0xC0) == 0x80; } + static bool is2byte(char aChar) { return (aChar & 0xE0) == 0xC0; } + static bool is3byte(char aChar) { return (aChar & 0xF0) == 0xE0; } + static bool is4byte(char aChar) { return (aChar & 0xF8) == 0xF0; } + static bool is5byte(char aChar) { return (aChar & 0xFC) == 0xF8; } + static bool is6byte(char aChar) { return (aChar & 0xFE) == 0xFC; } + // return the number of bytes in a sequence beginning with aChar + static int bytes(char aChar) { + if (isASCII(aChar)) { + return 1; + } + if (is2byte(aChar)) { + return 2; + } + if (is3byte(aChar)) { + return 3; + } + if (is4byte(aChar)) { + return 4; + } + MOZ_ASSERT_UNREACHABLE("should not be used for in-sequence characters"); + return 1; + } +}; + +/** + * Extract the next Unicode scalar value from the buffer and return it. The + * pointer passed in is advanced to the start of the next character in the + * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced + * over the maximal valid prefix and *aErr is set to true (if aErr is not + * null). + * + * Note: This method never sets *aErr to false to allow error accumulation + * across multiple calls. + * + * Precondition: *aBuffer < aEnd + */ +class UTF8CharEnumerator { + public: + static inline char32_t NextChar(const char** aBuffer, const char* aEnd, + bool* aErr = nullptr) { + MOZ_ASSERT(aBuffer, "null buffer pointer pointer"); + MOZ_ASSERT(aEnd, "null end pointer"); + + const unsigned char* p = reinterpret_cast<const unsigned char*>(*aBuffer); + const unsigned char* end = reinterpret_cast<const unsigned char*>(aEnd); + + MOZ_ASSERT(p, "null buffer"); + MOZ_ASSERT(p < end, "Bogus range"); + + unsigned char first = *p; + ++p; + + if (MOZ_LIKELY(first < 0x80U)) { + *aBuffer = reinterpret_cast<const char*>(p); + return first; + } + + // Unsigned underflow is defined behavior + if (MOZ_UNLIKELY((p == end) || ((first - 0xC2U) >= (0xF5U - 0xC2U)))) { + *aBuffer = reinterpret_cast<const char*>(p); + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } + + unsigned char second = *p; + + if (first < 0xE0U) { + // Two-byte + if (MOZ_LIKELY((second & 0xC0U) == 0x80U)) { + ++p; + *aBuffer = reinterpret_cast<const char*>(p); + return ((uint32_t(first) & 0x1FU) << 6) | (uint32_t(second) & 0x3FU); + } + *aBuffer = reinterpret_cast<const char*>(p); + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } + + if (MOZ_LIKELY(first < 0xF0U)) { + // Three-byte + unsigned char lower = 0x80U; + unsigned char upper = 0xBFU; + if (first == 0xE0U) { + lower = 0xA0U; + } else if (first == 0xEDU) { + upper = 0x9FU; + } + if (MOZ_LIKELY(second >= lower && second <= upper)) { + ++p; + if (MOZ_LIKELY(p != end)) { + unsigned char third = *p; + if (MOZ_LIKELY((third & 0xC0U) == 0x80U)) { + ++p; + *aBuffer = reinterpret_cast<const char*>(p); + return ((uint32_t(first) & 0xFU) << 12) | + ((uint32_t(second) & 0x3FU) << 6) | + (uint32_t(third) & 0x3FU); + } + } + } + *aBuffer = reinterpret_cast<const char*>(p); + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } + + // Four-byte + unsigned char lower = 0x80U; + unsigned char upper = 0xBFU; + if (first == 0xF0U) { + lower = 0x90U; + } else if (first == 0xF4U) { + upper = 0x8FU; + } + if (MOZ_LIKELY(second >= lower && second <= upper)) { + ++p; + if (MOZ_LIKELY(p != end)) { + unsigned char third = *p; + if (MOZ_LIKELY((third & 0xC0U) == 0x80U)) { + ++p; + if (MOZ_LIKELY(p != end)) { + unsigned char fourth = *p; + if (MOZ_LIKELY((fourth & 0xC0U) == 0x80U)) { + ++p; + *aBuffer = reinterpret_cast<const char*>(p); + return ((uint32_t(first) & 0x7U) << 18) | + ((uint32_t(second) & 0x3FU) << 12) | + ((uint32_t(third) & 0x3FU) << 6) | + (uint32_t(fourth) & 0x3FU); + } + } + } + } + } + *aBuffer = reinterpret_cast<const char*>(p); + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } +}; + +/** + * Extract the next Unicode scalar value from the buffer and return it. The + * pointer passed in is advanced to the start of the next character in the + * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced over + * the unpaired surrogate and *aErr is set to true (if aErr is not null). + * + * Note: This method never sets *aErr to false to allow error accumulation + * across multiple calls. + * + * Precondition: *aBuffer < aEnd + */ +class UTF16CharEnumerator { + public: + static inline char32_t NextChar(const char16_t** aBuffer, + const char16_t* aEnd, bool* aErr = nullptr) { + MOZ_ASSERT(aBuffer, "null buffer pointer pointer"); + MOZ_ASSERT(aEnd, "null end pointer"); + + const char16_t* p = *aBuffer; + + MOZ_ASSERT(p, "null buffer"); + MOZ_ASSERT(p < aEnd, "Bogus range"); + + char16_t c = *p++; + + // Let's use encoding_rs-style code golf here. + // Unsigned underflow is defined behavior + char16_t cMinusSurrogateStart = c - 0xD800U; + if (MOZ_LIKELY(cMinusSurrogateStart > (0xDFFFU - 0xD800U))) { + *aBuffer = p; + return c; + } + if (MOZ_LIKELY(cMinusSurrogateStart <= (0xDBFFU - 0xD800U))) { + // High surrogate + if (MOZ_LIKELY(p != aEnd)) { + char16_t second = *p; + // Unsigned underflow is defined behavior + if (MOZ_LIKELY((second - 0xDC00U) <= (0xDFFFU - 0xDC00U))) { + *aBuffer = ++p; + return (uint32_t(c) << 10) + uint32_t(second) - + (((0xD800U << 10) - 0x10000U) + 0xDC00U); + } + } + } + // Unpaired surrogate + *aBuffer = p; + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } +}; + +template <typename Char, typename UnsignedT> +inline UnsignedT RewindToPriorUTF8Codepoint(const Char* utf8Chars, + UnsignedT index) { + static_assert(std::is_same_v<Char, char> || + std::is_same_v<Char, unsigned char> || + std::is_same_v<Char, signed char>, + "UTF-8 data must be in 8-bit units"); + static_assert(std::is_unsigned_v<UnsignedT>, "index type must be unsigned"); + while (index > 0 && (utf8Chars[index] & 0xC0) == 0x80) --index; + + return index; +} + +#undef UTF8UTILS_WARNING + +#endif /* !defined(nsUTF8Utils_h_) */ diff --git a/xpcom/string/precompiled_templates.cpp b/xpcom/string/precompiled_templates.cpp new file mode 100644 index 0000000000..1ecac69bf8 --- /dev/null +++ b/xpcom/string/precompiled_templates.cpp @@ -0,0 +1,40 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" + +// This file provides concrete instantiations for externed string template +// classes. + +// ================ +// Template classes +// ================ +template class mozilla::detail::nsTStringRepr<char>; +template class mozilla::detail::nsTStringRepr<char16_t>; + +template class nsTLiteralString<char>; +template class nsTLiteralString<char16_t>; + +template class nsTSubstring<char>; +template class nsTSubstring<char16_t>; + +template class nsTDependentSubstring<char>; +template class nsTDependentSubstring<char16_t>; + +// Note: nsTString is skipped as it's implicitly instantiated by derived +// classes. + +template class nsTAutoStringN<char, 64>; +template class nsTAutoStringN<char16_t, 64>; + +template class nsTDependentString<char>; +template class nsTDependentString<char16_t>; + +template class nsTPromiseFlatString<char>; +template class nsTPromiseFlatString<char16_t>; + +template class nsTSubstringSplitter<char>; +template class nsTSubstringSplitter<char16_t>; |