diff options
Diffstat (limited to '')
49 files changed, 10693 insertions, 0 deletions
diff --git a/xpcom/string/README.html b/xpcom/string/README.html new file mode 100644 index 0000000000..ea81688121 --- /dev/null +++ b/xpcom/string/README.html @@ -0,0 +1,11 @@ +<html> + <!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + <body> + <h1><span class="LXRSHORTDESC">managing sequences of characters</span></h1> + <p> + <span class="LXRLONGDESC"></span> + </p> + </body> +</html> diff --git a/xpcom/string/RustRegex.h b/xpcom/string/RustRegex.h new file mode 100644 index 0000000000..80b8140bb5 --- /dev/null +++ b/xpcom/string/RustRegex.h @@ -0,0 +1,707 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_RustRegex_h +#define mozilla_RustRegex_h + +#include "nsPrintfCString.h" +#include "nsTArray.h" +#include "rure.h" +#include "mozilla/Maybe.h" +#include "mozilla/UniquePtr.h" + +namespace mozilla { + +// This header is a thin wrapper around the `rure.h` header file, which declares +// the C API for interacting with the rust `regex` crate. This is intended to +// make the type more ergonomic to use with mozilla types. + +class RustRegex; +class RustRegexSet; +class RustRegexOptions; +class RustRegexCaptures; +class RustRegexIter; +class RustRegexIterCaptureNames; + +using RustRegexMatch = rure_match; + +/* + * RustRegexCaptures represents storage for sub-capture locations of a match. + * + * Computing the capture groups of a match can carry a significant performance + * penalty, so their use in the API is optional. + * + * A RustRegexCaptures value may outlive its corresponding RustRegex and can be + * freed independently. + * + * It is not safe to use from multiple threads simultaneously. + */ +class RustRegexCaptures final { + public: + RustRegexCaptures() = default; + + // Check if the `RustRegexCaptures` object is valid. + bool IsValid() const { return mPtr != nullptr; } + explicit operator bool() const { return IsValid(); } + + /* + * CaptureAt returns Some if and only if the capturing group at the + * index given was part of the match. If so, the returned RustRegexMatch + * object contains the start and end offsets (in bytes) of the match. + * + * If no capture group with the index aIdx exists, or the group was not part + * of the match, then Nothing is returned. (A capturing group exists if and + * only if aIdx is less than Length().) + * + * Note that index 0 corresponds to the full match. + */ + Maybe<RustRegexMatch> CaptureAt(size_t aIdx) const { + RustRegexMatch match; + if (mPtr && rure_captures_at(mPtr.get(), aIdx, &match)) { + return Some(match); + } + return Nothing(); + } + Maybe<RustRegexMatch> operator[](size_t aIdx) const { + return CaptureAt(aIdx); + } + + /* + * Returns the number of capturing groups in this `RustRegexCaptures`. + */ + size_t Length() const { return mPtr ? rure_captures_len(mPtr.get()) : 0; } + + private: + friend class RustRegex; + friend class RustRegexIter; + + explicit RustRegexCaptures(rure* aRe) + : mPtr(aRe ? rure_captures_new(aRe) : nullptr) {} + + struct Deleter { + void operator()(rure_captures* ptr) const { rure_captures_free(ptr); } + }; + UniquePtr<rure_captures, Deleter> mPtr; +}; + +/* + * RustRegexIterCaptureNames is an iterator over the list of capture group names + * in this particular RustRegex. + * + * A RustRegexIterCaptureNames value may not outlive its corresponding + * RustRegex, and should be destroyed before its corresponding RustRegex is + * destroyed. + * + * It is not safe to use from multiple threads simultaneously. + */ +class RustRegexIterCaptureNames { + public: + RustRegexIterCaptureNames() = delete; + + // Check if the `RustRegexIterCaptureNames` object is valid. + bool IsValid() const { return mPtr != nullptr; } + explicit operator bool() const { return IsValid(); } + + /* + * Advances the iterator and returns true if and only if another capture group + * name exists. + * + * The value of the capture group name is written to the provided pointer. + */ + mozilla::Maybe<const char*> Next() { + char* next = nullptr; + if (mPtr && rure_iter_capture_names_next(mPtr.get(), &next)) { + return Some(next); + } + return Nothing(); + } + + private: + friend class RustRegex; + + explicit RustRegexIterCaptureNames(rure* aRe) + : mPtr(aRe ? rure_iter_capture_names_new(aRe) : nullptr) {} + + struct Deleter { + void operator()(rure_iter_capture_names* ptr) const { + rure_iter_capture_names_free(ptr); + } + }; + UniquePtr<rure_iter_capture_names, Deleter> mPtr; +}; + +/* + * RustRegexIter is an iterator over successive non-overlapping matches in a + * particular haystack. + * + * A RustRegexIter value may not outlive its corresponding RustRegex and should + * be destroyed before its corresponding RustRegex is destroyed. + * + * It is not safe to use from multiple threads simultaneously. + */ +class RustRegexIter { + public: + RustRegexIter() = delete; + + // Check if the `RustRegexIter` object is valid. + bool IsValid() const { return mPtr != nullptr; } + explicit operator bool() const { return IsValid(); } + + /* + * Next() returns Some if and only if this regex matches anywhere in haystack. + * The returned RustRegexMatch object contains the start and end offsets (in + * bytes) of the match. + * + * If no match is found, then subsequent calls will return Nothing() + * indefinitely. + * + * Next() should be preferred to NextCaptures() since it may be faster. + * + * N.B. The performance of this search is not impacted by the presence of + * capturing groups in your regular expression. + */ + mozilla::Maybe<RustRegexMatch> Next() { + RustRegexMatch match{}; + if (mPtr && + rure_iter_next(mPtr.get(), mHaystackPtr, mHaystackSize, &match)) { + return Some(match); + } + return Nothing(); + } + + /* + * NextCaptures returns a valid RustRegexCaptures if and only if this regex + * matches anywhere in haystack. If a match is found, then all of its capture + * locations are stored in the returned RustRegexCaptures object. + * + * If no match is found, then subsequent calls will return an invalid + * `RustRegexCaptures` indefinitely. + * + * Only use this function if you specifically need access to capture + * locations. It is not necessary to use this function just because your + * regular expression contains capturing groups. + * + * Capture locations can be accessed using the methods on RustRegexCaptures. + * + * N.B. The performance of this search can be impacted by the number of + * capturing groups. If you're using this function, it may be beneficial to + * use non-capturing groups (e.g., `(?:re)`) where possible. + */ + RustRegexCaptures NextCaptures() { + RustRegexCaptures captures(mRe); + if (mPtr && rure_iter_next_captures(mPtr.get(), mHaystackPtr, mHaystackSize, + captures.mPtr.get())) { + return captures; + } + return {}; + } + + private: + friend class RustRegex; + RustRegexIter(rure* aRe, const std::string_view& aHaystack) + : mRe(aRe), + mHaystackPtr(reinterpret_cast<const uint8_t*>(aHaystack.data())), + mHaystackSize(aHaystack.size()), + mPtr(aRe ? rure_iter_new(aRe) : nullptr) {} + + rure* MOZ_NON_OWNING_REF mRe; + const uint8_t* MOZ_NON_OWNING_REF mHaystackPtr; + size_t mHaystackSize; + + struct Deleter { + void operator()(rure_iter* ptr) const { rure_iter_free(ptr); } + }; + UniquePtr<rure_iter, Deleter> mPtr; +}; + +/* + * RustRegexOptions is the set of configuration options for compiling a regular + * expression. + * + * All flags on this type can be used to set default flags while compiling, and + * can be toggled in the expression itself using standard syntax, e.g. `(?i)` + * turns case-insensitive matching on, and `(?-i)` disables it. + * + * In addition, two non-flag options are available: setting the size limit of + * the compiled program and setting the size limit of the cache of states that + * the DFA uses while searching. + * + * For most uses, the default settings will work fine, and a default-constructed + * RustRegexOptions can be passed. + */ +class RustRegexOptions { + public: + RustRegexOptions() = default; + + /* + * Set the value for the case insensitive (i) flag. + * + * When enabled, letters in the pattern will match both upper case and lower + * case variants. + */ + RustRegexOptions& CaseInsensitive(bool aYes) { + return SetFlag(aYes, RURE_FLAG_CASEI); + } + + /* + * Set the value for the multi-line matching (m) flag. + * + * When enabled, ^ matches the beginning of lines and $ matches the end of + * lines. + * + * By default, they match beginning/end of the input. + */ + RustRegexOptions& MultiLine(bool aYes) { + return SetFlag(aYes, RURE_FLAG_MULTI); + } + + /* + * Set the value for the any character (s) flag, where in . matches anything + * when s is set and matches anything except for new line when it is not set + * (the default). + * + * N.B. “matches anything” means “any byte” when Unicode is disabled and means + * “any valid UTF-8 encoding of any Unicode scalar value” when Unicode is + * enabled. + */ + RustRegexOptions& DotMatchesNewLine(bool aYes) { + return SetFlag(aYes, RURE_FLAG_DOTNL); + } + + /* + * Set the value for the greedy swap (U) flag. + * + * When enabled, a pattern like a* is lazy (tries to find shortest match) and + * a*? is greedy (tries to find longest match). + * + * By default, a* is greedy and a*? is lazy. + */ + RustRegexOptions& SwapGreed(bool aYes) { + return SetFlag(aYes, RURE_FLAG_SWAP_GREED); + } + + /* + * Set the value for the ignore whitespace (x) flag. + * + * When enabled, whitespace such as new lines and spaces will be ignored + * between expressions of the pattern, and # can be used to start a comment + * until the next new line. + */ + RustRegexOptions& IgnoreWhitespace(bool aYes) { + return SetFlag(aYes, RURE_FLAG_SPACE); + } + + /* + * Set the value for the Unicode (u) flag. + * + * Enabled by default. When disabled, character classes such as \w only match + * ASCII word characters instead of all Unicode word characters. + */ + RustRegexOptions& Unicode(bool aYes) { + return SetFlag(aYes, RURE_FLAG_UNICODE); + } + + /* + * SizeLimit sets the appoximate size limit of the compiled regular + * expression. + * + * This size limit roughly corresponds to the number of bytes occupied by + * a single compiled program. If the program would exceed this number, + * then an invalid RustRegex will be constructed. + */ + RustRegexOptions& SizeLimit(size_t aLimit) { + mSizeLimit = Some(aLimit); + return *this; + } + + /* + * DFASizeLimit sets the approximate size of the cache used by the DFA during + * search. + * + * This roughly corresponds to the number of bytes that the DFA will use while + * searching. + * + * Note that this is a *per thread* limit. There is no way to set a global + * limit. In particular, if a regular expression is used from multiple threads + * simultaneously, then each thread may use up to the number of bytes + * specified here. + */ + RustRegexOptions& DFASizeLimit(size_t aLimit) { + mDFASizeLimit = Some(aLimit); + return *this; + } + + private: + friend class RustRegex; + friend class RustRegexSet; + + struct OptionsDeleter { + void operator()(rure_options* ptr) const { rure_options_free(ptr); } + }; + + UniquePtr<rure_options, OptionsDeleter> GetOptions() const { + UniquePtr<rure_options, OptionsDeleter> options; + if (mSizeLimit || mDFASizeLimit) { + options.reset(rure_options_new()); + if (mSizeLimit) { + rure_options_size_limit(options.get(), *mSizeLimit); + } + if (mDFASizeLimit) { + rure_options_dfa_size_limit(options.get(), *mDFASizeLimit); + } + } + return options; + } + + uint32_t GetFlags() const { return mFlags; } + + RustRegexOptions& SetFlag(bool aYes, uint32_t aFlag) { + if (aYes) { + mFlags |= aFlag; + } else { + mFlags &= ~aFlag; + } + return *this; + } + + uint32_t mFlags = RURE_DEFAULT_FLAGS; + Maybe<size_t> mSizeLimit; + Maybe<size_t> mDFASizeLimit; +}; + +/* + * RustRegex is the type of a compiled regular expression. + * + * A RustRegex can be safely used from multiple threads simultaneously. + * + * When calling the matching methods on this type, they will generally have the + * following parameters: + * + * aHaystack + * may contain arbitrary bytes, but ASCII compatible text is more useful. + * UTF-8 is even more useful. Other text encodings aren't supported. + * + * aStart + * the position in bytes at which to start searching. Note that setting the + * start position is distinct from using a substring for `aHaystack`, since + * the regex engine may look at bytes before the start position to determine + * match information. For example, if the start position is greater than 0, + * then the \A ("begin text") anchor can never match. + */ +class RustRegex final { + public: + // Create a new invalid RustRegex object + RustRegex() = default; + + /* + * Compiles the given pattern into a regular expression. The pattern must be + * valid UTF-8 and the length corresponds to the number of bytes in the + * pattern. + * + * If an error occurs, the constructed RustRegex will be `!IsValid()`. + * + * The compiled expression returned may be used from multiple threads + * simultaneously. + */ + explicit RustRegex(const std::string_view& aPattern, + const RustRegexOptions& aOptions = {}) { +#ifdef DEBUG + rure_error* error = rure_error_new(); +#else + rure_error* error = nullptr; +#endif + mPtr.reset(rure_compile(reinterpret_cast<const uint8_t*>(aPattern.data()), + aPattern.size(), aOptions.GetFlags(), + aOptions.GetOptions().get(), error)); +#ifdef DEBUG + if (!mPtr) { + NS_WARNING(nsPrintfCString("RustRegex compile failed: %s", + rure_error_message(error)) + .get()); + } + rure_error_free(error); +#endif + } + + // Check if the compiled `RustRegex` is valid. + bool IsValid() const { return mPtr != nullptr; } + explicit operator bool() const { return IsValid(); } + + /* + * IsMatch returns true if and only if this regex matches anywhere in + * aHaystack. + * + * See the type-level comment for details on aHaystack and aStart. + * + * IsMatch() should be preferred to Find() since it may be faster. + * + * N.B. The performance of this search is not impacted by the presence of + * capturing groups in your regular expression. + */ + bool IsMatch(const std::string_view& aHaystack, size_t aStart = 0) const { + return mPtr && + rure_is_match(mPtr.get(), + reinterpret_cast<const uint8_t*>(aHaystack.data()), + aHaystack.size(), aStart); + } + + /* + * Find returns Some if and only if this regex matches anywhere in + * haystack. The returned RustRegexMatch object contains the start and end + * offsets (in bytes) of the match. + * + * See the type-level comment for details on aHaystack and aStart. + * + * Find() should be preferred to FindCaptures() since it may be faster. + * + * N.B. The performance of this search is not impacted by the presence of + * capturing groups in your regular expression. + */ + Maybe<RustRegexMatch> Find(const std::string_view& aHaystack, + size_t aStart = 0) const { + RustRegexMatch match{}; + if (mPtr && rure_find(mPtr.get(), + reinterpret_cast<const uint8_t*>(aHaystack.data()), + aHaystack.size(), aStart, &match)) { + return Some(match); + } + return Nothing(); + } + + /* + * FindCaptures() returns a valid RustRegexCaptures if and only if this + * regex matches anywhere in haystack. If a match is found, then all of its + * capture locations are stored in the returned RustRegexCaptures object. + * + * See the type-level comment for details on aHaystack and aStart. + * + * Only use this function if you specifically need access to capture + * locations. It is not necessary to use this function just because your + * regular expression contains capturing groups. + * + * Capture locations can be accessed using the methods on RustRegexCaptures. + * + * N.B. The performance of this search can be impacted by the number of + * capturing groups. If you're using this function, it may be beneficial to + * use non-capturing groups (e.g., `(?:re)`) where possible. + */ + RustRegexCaptures FindCaptures(const std::string_view& aHaystack, + size_t aStart = 0) const { + RustRegexCaptures captures(mPtr.get()); + if (mPtr && + rure_find_captures(mPtr.get(), + reinterpret_cast<const uint8_t*>(aHaystack.data()), + aHaystack.size(), aStart, captures.mPtr.get())) { + return captures; + } + return {}; + } + + /* + * ShortestMatch() returns Some if and only if this regex matches anywhere + * in haystack. If a match is found, then its end location is stored in the + * pointer given. The end location is the place at which the regex engine + * determined that a match exists, but may occur before the end of the + * proper leftmost-first match. + * + * See the type-level comment for details on aHaystack and aStart. + * + * ShortestMatch should be preferred to Find since it may be faster. + * + * N.B. The performance of this search is not impacted by the presence of + * capturing groups in your regular expression. + */ + Maybe<size_t> ShortestMatch(const std::string_view& aHaystack, + size_t aStart = 0) const { + size_t end = 0; + if (mPtr && + rure_shortest_match(mPtr.get(), + reinterpret_cast<const uint8_t*>(aHaystack.data()), + aHaystack.size(), aStart, &end)) { + return Some(end); + } + return Nothing(); + } + + /* + * Create an iterator over all successive non-overlapping matches of this + * regex in aHaystack. + * + * See the type-level comment for details on aHaystack. + * + * Both aHaystack and this regex must remain valid until the returned + * `RustRegexIter` is destroyed. + */ + RustRegexIter IterMatches(const std::string_view& aHaystack) const { + return RustRegexIter(mPtr.get(), aHaystack); + } + + /* + * Returns the capture index for the name given. If no such named capturing + * group exists in this regex, then -1 is returned. + * + * The capture index may be used with RustRegexCaptures::CaptureAt. + * + * This function never returns 0 since the first capture group always + * corresponds to the entire match and is always unnamed. + */ + int32_t CaptureNameIndex(const char* aName) const { + return mPtr ? rure_capture_name_index(mPtr.get(), aName) : -1; + } + + /* + * Create an iterator over the list of capture group names in this particular + * regex. + * + * This regex must remain valid until the returned `RustRegexIterCaptureNames` + * is destroyed. + */ + RustRegexIterCaptureNames IterCaptureNames() const { + return RustRegexIterCaptureNames(mPtr.get()); + } + + /* + * Count the number of successive non-overlapping matches of this regex in + * aHaystack. + * + * See the type-level comment for details on aHaystack. + */ + size_t CountMatches(const std::string_view& aHaystack) const { + size_t count = 0; + auto iter = IterMatches(aHaystack); + while (iter.Next()) { + count++; + } + return count; + } + + private: + struct Deleter { + void operator()(rure* ptr) const { rure_free(ptr); } + }; + UniquePtr<rure, Deleter> mPtr; +}; + +/* + * RustRegexSet is the type of a set of compiled regular expression. + * + * A RustRegexSet can be safely used from multiple threads simultaneously. + * + * When calling the matching methods on this type, they will generally have the + * following parameters: + * + * aHaystack + * may contain arbitrary bytes, but ASCII compatible text is more useful. + * UTF-8 is even more useful. Other text encodings aren't supported. + * + * aStart + * the position in bytes at which to start searching. Note that setting the + * start position is distinct from using a substring for `aHaystack`, since + * the regex engine may look at bytes before the start position to determine + * match information. For example, if the start position is greater than 0, + * then the \A ("begin text") anchor can never match. + */ +class RustRegexSet final { + public: + /* + * Compiles the given range of patterns into a single regular expression which + * can be matched in a linear-scan. Each pattern in aPatterns must be valid + * UTF-8, and implicitly coerce to `std::string_view`. + * + * If an error occurs, the constructed RustRegexSet will be `!IsValid()`. + * + * The compiled expression returned may be used from multiple threads + * simultaneously. + */ + template <typename Patterns> + explicit RustRegexSet(Patterns&& aPatterns, + const RustRegexOptions& aOptions = {}) { +#ifdef DEBUG + rure_error* error = rure_error_new(); +#else + rure_error* error = nullptr; +#endif + AutoTArray<const uint8_t*, 4> patternPtrs; + AutoTArray<size_t, 4> patternSizes; + for (auto&& pattern : std::forward<Patterns>(aPatterns)) { + std::string_view view = pattern; + patternPtrs.AppendElement(reinterpret_cast<const uint8_t*>(view.data())); + patternSizes.AppendElement(view.size()); + } + mPtr.reset(rure_compile_set(patternPtrs.Elements(), patternSizes.Elements(), + patternPtrs.Length(), aOptions.GetFlags(), + aOptions.GetOptions().get(), error)); +#ifdef DEBUG + if (!mPtr) { + NS_WARNING(nsPrintfCString("RustRegexSet compile failed: %s", + rure_error_message(error)) + .get()); + } + rure_error_free(error); +#endif + } + + // Check if the `RustRegexSet` object is valid. + bool IsValid() const { return mPtr != nullptr; } + explicit operator bool() const { return IsValid(); } + + /* + * IsMatch returns true if and only if any regexes within the set + * match anywhere in the haystack. Once a match has been located, the + * matching engine will quit immediately. + * + * See the type-level comment for details on aHaystack and aStart. + */ + bool IsMatch(const std::string_view& aHaystack, size_t aStart = 0) const { + return mPtr && + rure_set_is_match(mPtr.get(), + reinterpret_cast<const uint8_t*>(aHaystack.data()), + aHaystack.size(), aStart); + } + + struct SetMatches { + bool matchedAny = false; + nsTArray<bool> matches; + }; + + /* + * Matches() compares each regex in the set against the haystack and + * returns a list with the match result of each pattern. Match results are + * ordered in the same way as the regex set was compiled. For example, index 0 + * of matches corresponds to the first pattern passed to the constructor. + * + * See the type-level comment for details on aHaystack and aStart. + * + * Only use this function if you specifically need to know which regexes + * matched within the set. To determine if any of the regexes matched without + * caring which, use IsMatch. + */ + SetMatches Matches(const std::string_view& aHaystack, + size_t aStart = 0) const { + nsTArray<bool> matches; + matches.SetLength(Length()); + bool any = mPtr && rure_set_matches( + mPtr.get(), + reinterpret_cast<const uint8_t*>(aHaystack.data()), + aHaystack.size(), aStart, matches.Elements()); + return SetMatches{any, std::move(matches)}; + } + + /* + * Returns the number of patterns the regex set was compiled with. + */ + size_t Length() const { return mPtr ? rure_set_len(mPtr.get()) : 0; } + + private: + struct Deleter { + void operator()(rure_set* ptr) const { rure_set_free(ptr); } + }; + UniquePtr<rure_set, Deleter> mPtr; +}; + +} // namespace mozilla + +#endif // mozilla_RustRegex_h diff --git a/xpcom/string/RustStringAPI.cpp b/xpcom/string/RustStringAPI.cpp new file mode 100644 index 0000000000..55ce6b9eeb --- /dev/null +++ b/xpcom/string/RustStringAPI.cpp @@ -0,0 +1,123 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "nsString.h" + +// Extern "C" utilities used by the rust nsString bindings. + +// Provide rust bindings to the nsA[C]String types +extern "C" { + +// This is a no-op on release, so we ifdef it out such that using it in release +// results in a linker error. +#ifdef DEBUG +void Gecko_IncrementStringAdoptCount(void* aData) { + MOZ_LOG_CTOR(aData, "StringAdopt", 1); +} +#elif defined(MOZ_DEBUG_RUST) +void Gecko_IncrementStringAdoptCount(void* aData) {} +#endif + +void Gecko_FinalizeCString(nsACString* aThis) { aThis->~nsACString(); } + +void Gecko_AssignCString(nsACString* aThis, const nsACString* aOther) { + aThis->Assign(*aOther); +} + +void Gecko_TakeFromCString(nsACString* aThis, nsACString* aOther) { + aThis->Assign(std::move(*aOther)); +} + +void Gecko_AppendCString(nsACString* aThis, const nsACString* aOther) { + aThis->Append(*aOther); +} + +void Gecko_SetLengthCString(nsACString* aThis, uint32_t aLength) { + aThis->SetLength(aLength); +} + +bool Gecko_FallibleAssignCString(nsACString* aThis, const nsACString* aOther) { + return aThis->Assign(*aOther, mozilla::fallible); +} + +bool Gecko_FallibleTakeFromCString(nsACString* aThis, nsACString* aOther) { + return aThis->Assign(std::move(*aOther), mozilla::fallible); +} + +bool Gecko_FallibleAppendCString(nsACString* aThis, const nsACString* aOther) { + return aThis->Append(*aOther, mozilla::fallible); +} + +bool Gecko_FallibleSetLengthCString(nsACString* aThis, uint32_t aLength) { + return aThis->SetLength(aLength, mozilla::fallible); +} + +char* Gecko_BeginWritingCString(nsACString* aThis) { + return aThis->BeginWriting(); +} + +char* Gecko_FallibleBeginWritingCString(nsACString* aThis) { + return aThis->BeginWriting(mozilla::fallible); +} + +uint32_t Gecko_StartBulkWriteCString(nsACString* aThis, uint32_t aCapacity, + uint32_t aUnitsToPreserve, + bool aAllowShrinking) { + return aThis->StartBulkWriteImpl(aCapacity, aUnitsToPreserve, aAllowShrinking) + .unwrapOr(UINT32_MAX); +} + +void Gecko_FinalizeString(nsAString* aThis) { aThis->~nsAString(); } + +void Gecko_AssignString(nsAString* aThis, const nsAString* aOther) { + aThis->Assign(*aOther); +} + +void Gecko_TakeFromString(nsAString* aThis, nsAString* aOther) { + aThis->Assign(std::move(*aOther)); +} + +void Gecko_AppendString(nsAString* aThis, const nsAString* aOther) { + aThis->Append(*aOther); +} + +void Gecko_SetLengthString(nsAString* aThis, uint32_t aLength) { + aThis->SetLength(aLength); +} + +bool Gecko_FallibleAssignString(nsAString* aThis, const nsAString* aOther) { + return aThis->Assign(*aOther, mozilla::fallible); +} + +bool Gecko_FallibleTakeFromString(nsAString* aThis, nsAString* aOther) { + return aThis->Assign(std::move(*aOther), mozilla::fallible); +} + +bool Gecko_FallibleAppendString(nsAString* aThis, const nsAString* aOther) { + return aThis->Append(*aOther, mozilla::fallible); +} + +bool Gecko_FallibleSetLengthString(nsAString* aThis, uint32_t aLength) { + return aThis->SetLength(aLength, mozilla::fallible); +} + +char16_t* Gecko_BeginWritingString(nsAString* aThis) { + return aThis->BeginWriting(); +} + +char16_t* Gecko_FallibleBeginWritingString(nsAString* aThis) { + return aThis->BeginWriting(mozilla::fallible); +} + +uint32_t Gecko_StartBulkWriteString(nsAString* aThis, uint32_t aCapacity, + uint32_t aUnitsToPreserve, + bool aAllowShrinking) { + return aThis->StartBulkWriteImpl(aCapacity, aUnitsToPreserve, aAllowShrinking) + .unwrapOr(UINT32_MAX); +} + +} // extern "C" diff --git a/xpcom/string/crashtests/1113005-frame.html b/xpcom/string/crashtests/1113005-frame.html new file mode 100644 index 0000000000..505fc22f1e --- /dev/null +++ b/xpcom/string/crashtests/1113005-frame.html @@ -0,0 +1,5 @@ +<form method=post enctype=multipart/form-data action="data:text/html,"><textarea name='file"; filename="filename.ext + '></textarea> +<script> +document.forms[0].submit(); +</script> diff --git a/xpcom/string/crashtests/1113005.html b/xpcom/string/crashtests/1113005.html new file mode 100644 index 0000000000..e377bb637f --- /dev/null +++ b/xpcom/string/crashtests/1113005.html @@ -0,0 +1,2 @@ +<!DOCTYPE html> +<iframe src="1113005-frame.html"></iframe> diff --git a/xpcom/string/crashtests/394275-1.html b/xpcom/string/crashtests/394275-1.html new file mode 100644 index 0000000000..b589c4d359 --- /dev/null +++ b/xpcom/string/crashtests/394275-1.html @@ -0,0 +1,9 @@ +<html> +<body> +<script> +style = document.createElement("style"); // eslint-disable-line no-undef +document.documentElement.appendChild(style); // eslint-disable-line no-undef +style.textContent = "tz\uDAB2 "; // eslint-disable-line no-undef +</script> +</body> +</html> diff --git a/xpcom/string/crashtests/395651-1.html b/xpcom/string/crashtests/395651-1.html new file mode 100644 index 0000000000..bbed371fd6 --- /dev/null +++ b/xpcom/string/crashtests/395651-1.html @@ -0,0 +1,30 @@ +<html> +<head> +<script> + +function X() { dump("X\n"); } +function Y() { dump("Y\n"); } + +function boom() { + dump("Start9\n"); + + var div = document.getElementById("v"); + + var textNode = document.createTextNode(String.fromCharCode(0xDAAF)); // high surrogate + div.appendChild(textNode); + + document.addEventListener("DOMCharacterDataModified", X, true); + textNode.data += "B"; + document.removeEventListener("DOMCharacterDataModified", X, true); + + document.addEventListener("DOMAttrModified", Y, true); + textNode.data += String.fromCharCode(0xDF53); // low surrogate + document.removeEventListener("DOMAttrModified", Y, true); +} + +</script> +</head> + +<body onload="boom();"><div id="v"></div></body> + +</html> diff --git a/xpcom/string/crashtests/crashtests.list b/xpcom/string/crashtests/crashtests.list new file mode 100644 index 0000000000..d464166e85 --- /dev/null +++ b/xpcom/string/crashtests/crashtests.list @@ -0,0 +1,3 @@ +load 394275-1.html +load 395651-1.html +skip-if(gtkWidget||winWidget) load 1113005.html # Bug 1683062 diff --git a/xpcom/string/moz.build b/xpcom/string/moz.build new file mode 100644 index 0000000000..c0f8091b8f --- /dev/null +++ b/xpcom/string/moz.build @@ -0,0 +1,62 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files("**"): + BUG_COMPONENT = ("Core", "String") + +EXPORTS += [ + "nsASCIIMask.h", + "nsAString.h", + "nsCharTraits.h", + "nsDependentString.h", + "nsDependentSubstring.h", + "nsLiteralString.h", + "nsPrintfCString.h", + "nsPromiseFlatString.h", + "nsReadableUtils.h", + "nsString.h", + "nsStringBuffer.h", + "nsStringFlags.h", + "nsStringFwd.h", + "nsStringIterator.h", + "nsTDependentString.h", + "nsTDependentSubstring.h", + "nsTextFormatter.h", + "nsTLiteralString.h", + "nsTPromiseFlatString.h", + "nsTString.h", + "nsTStringHasher.h", + "nsTStringRepr.h", + "nsTSubstring.h", + "nsTSubstringTuple.h", + "nsUTF8Utils.h", +] + +EXPORTS.mozilla += [ + "RustRegex.h", +] + +UNIFIED_SOURCES += [ + "nsASCIIMask.cpp", + "nsReadableUtils.cpp", + "nsStringBuffer.cpp", + "nsTDependentString.cpp", + "nsTDependentSubstring.cpp", + "nsTextFormatter.cpp", + "nsTLiteralString.cpp", + "nsTPromiseFlatString.cpp", + "nsTString.cpp", + "nsTStringComparator.cpp", + "nsTStringRepr.cpp", + "nsTSubstring.cpp", + "nsTSubstringTuple.cpp", + "RustStringAPI.cpp", +] + +if CONFIG["MOZ_DEBUG"]: + UNIFIED_SOURCES += ["nsStringStats.cpp"] + +FINAL_LIBRARY = "xul" diff --git a/xpcom/string/nsASCIIMask.cpp b/xpcom/string/nsASCIIMask.cpp new file mode 100644 index 0000000000..abcff70306 --- /dev/null +++ b/xpcom/string/nsASCIIMask.cpp @@ -0,0 +1,38 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsASCIIMask.h" + +namespace mozilla { + +constexpr bool TestWhitespace(char c) { + return c == '\f' || c == '\t' || c == '\r' || c == '\n' || c == ' '; +} +constexpr ASCIIMaskArray sWhitespaceMask = CreateASCIIMask(TestWhitespace); + +constexpr bool TestCRLF(char c) { return c == '\r' || c == '\n'; } +constexpr ASCIIMaskArray sCRLFMask = CreateASCIIMask(TestCRLF); + +constexpr bool TestCRLFTab(char c) { + return c == '\r' || c == '\n' || c == '\t'; +} +constexpr ASCIIMaskArray sCRLFTabMask = CreateASCIIMask(TestCRLFTab); + +constexpr bool TestZeroToNine(char c) { + return c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || + c == '6' || c == '7' || c == '8' || c == '9'; +} +constexpr ASCIIMaskArray sZeroToNineMask = CreateASCIIMask(TestZeroToNine); + +const ASCIIMaskArray& ASCIIMask::MaskWhitespace() { return sWhitespaceMask; } + +const ASCIIMaskArray& ASCIIMask::MaskCRLF() { return sCRLFMask; } + +const ASCIIMaskArray& ASCIIMask::MaskCRLFTab() { return sCRLFTabMask; } + +const ASCIIMaskArray& ASCIIMask::Mask0to9() { return sZeroToNineMask; } + +} // namespace mozilla diff --git a/xpcom/string/nsASCIIMask.h b/xpcom/string/nsASCIIMask.h new file mode 100644 index 0000000000..54f51d8957 --- /dev/null +++ b/xpcom/string/nsASCIIMask.h @@ -0,0 +1,70 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsASCIIMask_h_ +#define nsASCIIMask_h_ + +#include <array> +#include <utility> + +#include "mozilla/Attributes.h" + +typedef std::array<bool, 128> ASCIIMaskArray; + +namespace mozilla { + +// Boolean arrays, fixed size and filled in at compile time, meant to +// record something about each of the (standard) ASCII characters. +// No extended ASCII for now, there has been no use case. +// If you have loops that go through a string character by character +// and test for equality to a certain set of characters before deciding +// on a course of action, chances are building up one of these arrays +// and using it is going to be faster, especially if the set of +// characters is more than one long, and known at compile time. +class ASCIIMask { + public: + // Preset masks for some common character groups + // When testing, you must check if the index is < 128 or use IsMasked() + // + // if (someChar < 128 && MaskCRLF()[someChar]) this is \r or \n + + static const ASCIIMaskArray& MaskCRLF(); + static const ASCIIMaskArray& Mask0to9(); + static const ASCIIMaskArray& MaskCRLFTab(); + static const ASCIIMaskArray& MaskWhitespace(); + + static MOZ_ALWAYS_INLINE bool IsMasked(const ASCIIMaskArray& aMask, + uint32_t aChar) { + return aChar < 128 && aMask[aChar]; + } +}; + +// Outside of the preset ones, use these templates to create more masks. +// +// The example creation will look like this: +// +// constexpr bool TestABC(char c) { return c == 'A' || c == 'B' || c == 'C'; } +// constexpr std::array<bool, 128> sABCMask = CreateASCIIMask(TestABC); +// ... +// if (someChar < 128 && sABCMask[someChar]) this is A or B or C + +namespace asciimask_details { +template <typename F, size_t... Indices> +constexpr std::array<bool, 128> CreateASCIIMask( + F fun, std::index_sequence<Indices...>) { + return {{fun(Indices)...}}; +} +} // namespace asciimask_details + +template <typename F> +constexpr std::array<bool, 128> CreateASCIIMask(F fun) { + return asciimask_details::CreateASCIIMask(fun, + std::make_index_sequence<128>{}); +} + +} // namespace mozilla + +#endif // nsASCIIMask_h_ diff --git a/xpcom/string/nsAString.h b/xpcom/string/nsAString.h new file mode 100644 index 0000000000..3893ff8e37 --- /dev/null +++ b/xpcom/string/nsAString.h @@ -0,0 +1,38 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsAString_h___ +#define nsAString_h___ + +#include "nsStringFwd.h" +#include "nsStringIterator.h" +#include "mozilla/TypedEnumBits.h" + +#include <string.h> +#include <stdarg.h> + +#include "nsStringFlags.h" +#include "nsTStringRepr.h" +#include "nsTSubstring.h" +#include "nsTSubstringTuple.h" + +/** + * ASCII case-insensitive comparator. (for Unicode case-insensitive + * comparision, see nsUnicharUtils.h) + */ +int nsCaseInsensitiveCStringComparator(const char*, const char*, size_t, + size_t); + +class nsCaseInsensitiveCStringArrayComparator { + public: + template <class A, class B> + bool Equals(const A& aStrA, const B& aStrB) const { + return aStrA.Equals(aStrB, nsCaseInsensitiveCStringComparator); + } +}; + +#endif // !defined(nsAString_h___) diff --git a/xpcom/string/nsCharTraits.h b/xpcom/string/nsCharTraits.h new file mode 100644 index 0000000000..c81c2f5b2d --- /dev/null +++ b/xpcom/string/nsCharTraits.h @@ -0,0 +1,486 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsCharTraits_h___ +#define nsCharTraits_h___ + +#include <ctype.h> // for |EOF|, |WEOF| +#include <stdint.h> // for |uint32_t| +#include <string.h> // for |memcpy|, et al +#include "mozilla/MemoryChecking.h" + +// This file may be used (through nsUTF8Utils.h) from non-XPCOM code, in +// particular the standalone software updater. In that case stub out +// the macros provided by nsDebug.h which are only usable when linking XPCOM + +#ifdef NS_NO_XPCOM +# define NS_WARNING(msg) +# define NS_ASSERTION(cond, msg) +# define NS_ERROR(msg) +#else +# include "nsDebug.h" // for NS_ASSERTION +#endif + +/* + * Some macros for converting char16_t (UTF-16) to and from Unicode scalar + * values. + * + * Note that UTF-16 represents all Unicode scalar values up to U+10FFFF by + * using "surrogate pairs". These consist of a high surrogate, i.e. a code + * point in the range U+D800 - U+DBFF, and a low surrogate, i.e. a code point + * in the range U+DC00 - U+DFFF, like this: + * + * U+D800 U+DC00 = U+10000 + * U+D800 U+DC01 = U+10001 + * ... + * U+DBFF U+DFFE = U+10FFFE + * U+DBFF U+DFFF = U+10FFFF + * + * These surrogate code points U+D800 - U+DFFF are not themselves valid Unicode + * scalar values and are not well-formed UTF-16 except as high-surrogate / + * low-surrogate pairs. + */ + +#define PLANE1_BASE uint32_t(0x00010000) +// High surrogates are in the range 0xD800 -- OxDBFF +#define NS_IS_HIGH_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xD800) +// Low surrogates are in the range 0xDC00 -- 0xDFFF +#define NS_IS_LOW_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xDC00) +// Easier to type than NS_IS_HIGH_SURROGATE && NS_IS_LOW_SURROGATE +#define NS_IS_SURROGATE_PAIR(h, l) \ + (NS_IS_HIGH_SURROGATE(h) && NS_IS_LOW_SURROGATE(l)) +// Faster than testing NS_IS_HIGH_SURROGATE || NS_IS_LOW_SURROGATE +#define IS_SURROGATE(u) ((uint32_t(u) & 0xFFFFF800) == 0xD800) + +// Everything else is not a surrogate: 0x000 -- 0xD7FF, 0xE000 -- 0xFFFF + +// N = (H - 0xD800) * 0x400 + 0x10000 + (L - 0xDC00) +// I wonder whether we could somehow assert that H is a high surrogate +// and L is a low surrogate +#define SURROGATE_TO_UCS4(h, l) \ + (((uint32_t(h) & 0x03FF) << 10) + (uint32_t(l) & 0x03FF) + PLANE1_BASE) + +// Extract surrogates from a UCS4 char +// Reference: the Unicode standard 4.0, section 3.9 +// Since (c - 0x10000) >> 10 == (c >> 10) - 0x0080 and +// 0xD7C0 == 0xD800 - 0x0080, +// ((c - 0x10000) >> 10) + 0xD800 can be simplified to +#define H_SURROGATE(c) char16_t(char16_t(uint32_t(c) >> 10) + char16_t(0xD7C0)) +// where it's to be noted that 0xD7C0 is not bitwise-OR'd +// but added. + +// Since 0x10000 & 0x03FF == 0, +// (c - 0x10000) & 0x03FF == c & 0x03FF so that +// ((c - 0x10000) & 0x03FF) | 0xDC00 is equivalent to +#define L_SURROGATE(c) \ + char16_t(char16_t(uint32_t(c) & uint32_t(0x03FF)) | char16_t(0xDC00)) + +#define IS_IN_BMP(ucs) (uint32_t(ucs) < PLANE1_BASE) +#define UCS2_REPLACEMENT_CHAR char16_t(0xFFFD) + +#define UCS_END uint32_t(0x00110000) +#define IS_VALID_CHAR(c) ((uint32_t(c) < UCS_END) && !IS_SURROGATE(c)) +#define ENSURE_VALID_CHAR(c) (IS_VALID_CHAR(c) ? (c) : UCS2_REPLACEMENT_CHAR) + +template <class CharT> +struct nsCharTraits {}; + +template <> +struct nsCharTraits<char16_t> { + typedef char16_t char_type; + typedef uint16_t unsigned_char_type; + typedef char incompatible_char_type; + + static char_type* const sEmptyBuffer; + + // integer representation of characters: + typedef int int_type; + + static char_type to_char_type(int_type aChar) { return char_type(aChar); } + + static int_type to_int_type(char_type aChar) { + return int_type(static_cast<unsigned_char_type>(aChar)); + } + + static bool eq_int_type(int_type aLhs, int_type aRhs) { return aLhs == aRhs; } + + // |char_type| comparisons: + + static bool eq(char_type aLhs, char_type aRhs) { return aLhs == aRhs; } + + static bool lt(char_type aLhs, char_type aRhs) { return aLhs < aRhs; } + + // operations on s[n] arrays: + + static char_type* move(char_type* aStr1, const char_type* aStr2, size_t aN) { + return static_cast<char_type*>( + memmove(aStr1, aStr2, aN * sizeof(char_type))); + } + + static char_type* copy(char_type* aStr1, const char_type* aStr2, size_t aN) { + return static_cast<char_type*>( + memcpy(aStr1, aStr2, aN * sizeof(char_type))); + } + + static void uninitialize(char_type* aStr, size_t aN) { +#ifdef DEBUG + memset(aStr, 0xE4, aN * sizeof(char_type)); +#endif + MOZ_MAKE_MEM_UNDEFINED(aStr, aN * sizeof(char_type)); + } + + static char_type* copyASCII(char_type* aStr1, const char* aStr2, size_t aN) { + for (char_type* s = aStr1; aN--; ++s, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + *s = static_cast<char_type>(*aStr2); + } + return aStr1; + } + + static int compare(const char_type* aStr1, const char_type* aStr2, + size_t aN) { + for (; aN--; ++aStr1, ++aStr2) { + if (!eq(*aStr1, *aStr2)) { + return to_int_type(*aStr1) - to_int_type(*aStr2); + } + } + + return 0; + } + + static int compareASCII(const char_type* aStr1, const char* aStr2, + size_t aN) { + for (; aN--; ++aStr1, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + if (!eq_int_type(to_int_type(*aStr1), + to_int_type(static_cast<char_type>(*aStr2)))) { + return to_int_type(*aStr1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + return 0; + } + + static bool equalsLatin1(const char_type* aStr1, const char* aStr2, + const size_t aN) { + for (size_t i = aN; i > 0; --i, ++aStr1, ++aStr2) { + if (*aStr1 != static_cast<char_type>(*aStr2)) { + return false; + } + } + + return true; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int compareASCIINullTerminated(const char_type* aStr1, size_t aN, + const char* aStr2) { + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + if (!eq_int_type(to_int_type(*aStr1), + to_int_type(static_cast<char_type>(*aStr2)))) { + return to_int_type(*aStr1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + /** + * Convert c to its lower-case form, but only if c is in the ASCII + * range. Otherwise leave it alone. + */ + static char_type ASCIIToLower(char_type aChar) { + if (aChar >= 'A' && aChar <= 'Z') { + return char_type(aChar + ('a' - 'A')); + } + + return aChar; + } + + static int compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, + size_t aN) { + for (; aN--; ++aStr1, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != static_cast<char_type>(*aStr2)) { + return to_int_type(lower_s1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + return 0; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int compareLowerCaseToASCIINullTerminated(const char_type* aStr1, + size_t aN, + const char* aStr2) { + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != static_cast<char_type>(*aStr2)) { + return to_int_type(lower_s1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + static size_t length(const char_type* aStr) { + size_t result = 0; + while (!eq(*aStr++, char_type(0))) { + ++result; + } + return result; + } + + static const char_type* find(const char_type* aStr, size_t aN, + char_type aChar) { + while (aN--) { + if (eq(*aStr, aChar)) { + return aStr; + } + ++aStr; + } + + return 0; + } +}; + +template <> +struct nsCharTraits<char> { + typedef char char_type; + typedef unsigned char unsigned_char_type; + typedef char16_t incompatible_char_type; + + static char_type* const sEmptyBuffer; + + // integer representation of characters: + + typedef int int_type; + + static char_type to_char_type(int_type aChar) { return char_type(aChar); } + + static int_type to_int_type(char_type aChar) { + return int_type(static_cast<unsigned_char_type>(aChar)); + } + + static bool eq_int_type(int_type aLhs, int_type aRhs) { return aLhs == aRhs; } + + // |char_type| comparisons: + + static bool eq(char_type aLhs, char_type aRhs) { return aLhs == aRhs; } + + static bool lt(char_type aLhs, char_type aRhs) { return aLhs < aRhs; } + + // operations on s[n] arrays: + + static char_type* move(char_type* aStr1, const char_type* aStr2, size_t aN) { + return static_cast<char_type*>( + memmove(aStr1, aStr2, aN * sizeof(char_type))); + } + + static char_type* copy(char_type* aStr1, const char_type* aStr2, size_t aN) { + return static_cast<char_type*>( + memcpy(aStr1, aStr2, aN * sizeof(char_type))); + } + + static void uninitialize(char_type* aStr, size_t aN) { +#ifdef DEBUG + memset(aStr, 0xE4, aN * sizeof(char_type)); +#endif + MOZ_MAKE_MEM_UNDEFINED(aStr, aN * sizeof(char_type)); + } + + static char_type* copyASCII(char_type* aStr1, const char* aStr2, size_t aN) { + return copy(aStr1, aStr2, aN); + } + + static int compare(const char_type* aStr1, const char_type* aStr2, + size_t aN) { + return memcmp(aStr1, aStr2, aN); + } + + static int compareASCII(const char_type* aStr1, const char* aStr2, + size_t aN) { +#ifdef DEBUG + for (size_t i = 0; i < aN; ++i) { + NS_ASSERTION(!(aStr2[i] & ~0x7F), "Unexpected non-ASCII character"); + } +#endif + return compare(aStr1, aStr2, aN); + } + + static bool equalsLatin1(const char_type* aStr1, const char* aStr2, + size_t aN) { + return memcmp(aStr1, aStr2, aN) == 0; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int compareASCIINullTerminated(const char_type* aStr1, size_t aN, + const char* aStr2) { + // can't use strcmp here because we don't want to stop when aStr1 + // contains a null + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + if (*aStr1 != *aStr2) { + return to_int_type(*aStr1) - to_int_type(*aStr2); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + /** + * Convert c to its lower-case form, but only if c is ASCII. + */ + static char_type ASCIIToLower(char_type aChar) { + if (aChar >= 'A' && aChar <= 'Z') { + return char_type(aChar + ('a' - 'A')); + } + + return aChar; + } + + static int compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, + size_t aN) { + for (; aN--; ++aStr1, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != *aStr2) { + return to_int_type(lower_s1) - to_int_type(*aStr2); + } + } + return 0; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int compareLowerCaseToASCIINullTerminated(const char_type* aStr1, + size_t aN, + const char* aStr2) { + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != *aStr2) { + return to_int_type(lower_s1) - to_int_type(*aStr2); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + static size_t length(const char_type* aStr) { return strlen(aStr); } + + static const char_type* find(const char_type* aStr, size_t aN, + char_type aChar) { + return reinterpret_cast<const char_type*>( + memchr(aStr, to_int_type(aChar), aN)); + } +}; + +template <class InputIterator> +struct nsCharSourceTraits { + typedef typename InputIterator::difference_type difference_type; + + static difference_type readable_distance(const InputIterator& aFirst, + const InputIterator& aLast) { + // assumes single fragment + return aLast.get() - aFirst.get(); + } + + static const typename InputIterator::value_type* read( + const InputIterator& aIter) { + return aIter.get(); + } + + static void advance(InputIterator& aStr, difference_type aN) { + aStr.advance(aN); + } +}; + +template <class CharT> +struct nsCharSourceTraits<CharT*> { + typedef ptrdiff_t difference_type; + + static difference_type readable_distance(CharT* aStr) { + return nsCharTraits<CharT>::length(aStr); + } + + static difference_type readable_distance(CharT* aFirst, CharT* aLast) { + return aLast - aFirst; + } + + static const CharT* read(CharT* aStr) { return aStr; } + + static void advance(CharT*& aStr, difference_type aN) { aStr += aN; } +}; + +template <class OutputIterator> +struct nsCharSinkTraits { + static void write(OutputIterator& aIter, + const typename OutputIterator::value_type* aStr, + size_t aN) { + aIter.write(aStr, aN); + } +}; + +template <class CharT> +struct nsCharSinkTraits<CharT*> { + static void write(CharT*& aIter, const CharT* aStr, size_t aN) { + nsCharTraits<CharT>::move(aIter, aStr, aN); + aIter += aN; + } +}; + +#endif // !defined(nsCharTraits_h___) diff --git a/xpcom/string/nsDependentString.h b/xpcom/string/nsDependentString.h new file mode 100644 index 0000000000..4896c8d086 --- /dev/null +++ b/xpcom/string/nsDependentString.h @@ -0,0 +1,15 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsDependentString_h___ +#define nsDependentString_h___ + +#include "nsString.h" +#include "nsDebug.h" + +#include "nsTDependentString.h" + +#endif /* !defined(nsDependentString_h___) */ diff --git a/xpcom/string/nsDependentSubstring.h b/xpcom/string/nsDependentSubstring.h new file mode 100644 index 0000000000..cb6cef5d77 --- /dev/null +++ b/xpcom/string/nsDependentSubstring.h @@ -0,0 +1,13 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsDependentSubstring_h___ +#define nsDependentSubstring_h___ + +#include "nsAString.h" +#include "nsTDependentSubstring.h" + +#endif /* !defined(nsDependentSubstring_h___) */ diff --git a/xpcom/string/nsLiteralString.h b/xpcom/string/nsLiteralString.h new file mode 100644 index 0000000000..f982724ce4 --- /dev/null +++ b/xpcom/string/nsLiteralString.h @@ -0,0 +1,31 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsLiteralString_h___ +#define nsLiteralString_h___ + +#include "nscore.h" +#include "nsString.h" + +#include "nsTLiteralString.h" + +#include "mozilla/Char16.h" + +#define NS_CSTRING_LITERAL_AS_STRING_LITERAL(s) u"" s + +#define NS_LITERAL_STRING_FROM_CSTRING(s) \ + static_cast<const nsLiteralString&>( \ + nsLiteralString(NS_CSTRING_LITERAL_AS_STRING_LITERAL(s))) + +constexpr auto operator""_ns(const char* aStr, std::size_t aLen) { + return nsLiteralCString{aStr, aLen}; +} + +constexpr auto operator""_ns(const char16_t* aStr, std::size_t aLen) { + return nsLiteralString{aStr, aLen}; +} + +#endif /* !defined(nsLiteralString_h___) */ diff --git a/xpcom/string/nsPrintfCString.h b/xpcom/string/nsPrintfCString.h new file mode 100644 index 0000000000..f722888705 --- /dev/null +++ b/xpcom/string/nsPrintfCString.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsPrintfCString_h___ +#define nsPrintfCString_h___ + +#include "nsString.h" + +/** + * nsPrintfCString lets you create a nsCString using a printf-style format + * string. For example: + * + * NS_WARNING(nsPrintfCString("Unexpected value: %f", 13.917).get()); + * + * nsPrintfCString has a small built-in auto-buffer. For larger strings, it + * will allocate on the heap. + * + * See also nsCString::AppendPrintf(). + */ +class nsPrintfCString : public nsAutoCStringN<16> { + typedef nsCString string_type; + + public: + explicit nsPrintfCString(const char_type* aFormat, ...) + MOZ_FORMAT_PRINTF(2, 3) { + va_list ap; + va_start(ap, aFormat); + AppendVprintf(aFormat, ap); + va_end(ap); + } +}; + +/** + * + * + * nsVPrintfCString is like nsPrinfCString but is created using vprintf style + * args. This is useful for functions that have already received variadic + * arguments and want to create a nsPrintfCString. For example: + * + * void LogToSeveralLocations(const char* aFormat,...) { + * va_list ap; + * va_start(ap, aFormat); + * nsPrintfCString logString(aFormat, ap); + * va_end(ap); + * // Use logString + * } + * + * See also nsCString::AppendVprintf(). + */ + +class nsVprintfCString : public nsAutoCStringN<16> { + typedef nsCString string_type; + + public: + nsVprintfCString(const char_type* aFormat, va_list aArgs) + MOZ_FORMAT_PRINTF(2, 0) { + AppendVprintf(aFormat, aArgs); + } +}; + +#endif // !defined(nsPrintfCString_h___) diff --git a/xpcom/string/nsPromiseFlatString.h b/xpcom/string/nsPromiseFlatString.h new file mode 100644 index 0000000000..98541ceb4a --- /dev/null +++ b/xpcom/string/nsPromiseFlatString.h @@ -0,0 +1,14 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsPromiseFlatString_h___ +#define nsPromiseFlatString_h___ + +#include "nsString.h" + +#include "nsTPromiseFlatString.h" + +#endif /* !defined(nsPromiseFlatString_h___) */ diff --git a/xpcom/string/nsReadableUtils.cpp b/xpcom/string/nsReadableUtils.cpp new file mode 100644 index 0000000000..fa4c4bc69b --- /dev/null +++ b/xpcom/string/nsReadableUtils.cpp @@ -0,0 +1,630 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsReadableUtils.h" + +#include <algorithm> + +#include "mozilla/CheckedInt.h" +#include "mozilla/Utf8.h" + +#include "nscore.h" +#include "nsString.h" +#include "nsTArray.h" +#include "nsUTF8Utils.h" + +using mozilla::Span; + +/** + * A helper function that allocates a buffer of the desired character type big + * enough to hold a copy of the supplied string (plus a zero terminator). + * + * @param aSource an string you will eventually be making a copy of + * @return a new buffer which you must free with |free|. + * + */ +template <class FromStringT, class CharT> +inline CharT* AllocateStringCopy(const FromStringT& aSource, CharT*) { + return static_cast<CharT*>( + malloc((size_t(aSource.Length()) + 1) * sizeof(CharT))); +} + +char* ToNewCString(const nsAString& aSource) { + char* str = ToNewCString(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char* ToNewCString(const nsAString& aSource, + const mozilla::fallible_t& aFallible) { + char* dest = AllocateStringCopy(aSource, (char*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + LossyConvertUtf16toLatin1(aSource, Span(dest, len)); + dest[len] = 0; + return dest; +} + +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count, + const mozilla::fallible_t& aFallible) { + auto len = aSource.Length(); + // The uses of this function seem temporary enough that it's not + // worthwhile to be fancy about the allocation size. Let's just use + // the worst case. + // Times 3 plus 1, because ConvertUTF16toUTF8 requires times 3 and + // then we have the terminator. + // Using CheckedInt<uint32_t>, because aUTF8Count is uint32_t* for + // historical reasons. + mozilla::CheckedInt<uint32_t> destLen(len); + destLen *= 3; + destLen += 1; + if (!destLen.isValid()) { + return nullptr; + } + size_t destLenVal = destLen.value(); + char* dest = static_cast<char*>(malloc(destLenVal)); + if (!dest) { + return nullptr; + } + + size_t written = ConvertUtf16toUtf8(aSource, Span(dest, destLenVal)); + dest[written] = 0; + + if (aUTF8Count) { + *aUTF8Count = written; + } + + return dest; +} + +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) { + char* str = ToNewUTF8String(aSource, aUTF8Count, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char* ToNewCString(const nsACString& aSource) { + char* str = ToNewCString(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char* ToNewCString(const nsACString& aSource, + const mozilla::fallible_t& aFallible) { + // no conversion needed, just allocate a buffer of the correct length and copy + // into it + + char* dest = AllocateStringCopy(aSource, (char*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + memcpy(dest, aSource.BeginReading(), len * sizeof(char)); + dest[len] = 0; + return dest; +} + +char16_t* ToNewUnicode(const nsAString& aSource) { + char16_t* str = ToNewUnicode(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char16_t* ToNewUnicode(const nsAString& aSource, + const mozilla::fallible_t& aFallible) { + // no conversion needed, just allocate a buffer of the correct length and copy + // into it + + char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t)); + dest[len] = 0; + return dest; +} + +char16_t* ToNewUnicode(const nsACString& aSource) { + char16_t* str = ToNewUnicode(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char16_t* ToNewUnicode(const nsACString& aSource, + const mozilla::fallible_t& aFallible) { + char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + ConvertLatin1toUtf16(aSource, Span(dest, len)); + dest[len] = 0; + return dest; +} + +char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count, + const mozilla::fallible_t& aFallible) { + // Compute length plus one as required by ConvertUTF8toUTF16 + uint32_t lengthPlusOne = aSource.Length() + 1; // Can't overflow + + mozilla::CheckedInt<size_t> allocLength(lengthPlusOne); + // Add space for zero-termination + allocLength += 1; + // We need UTF-16 units + allocLength *= sizeof(char16_t); + + if (!allocLength.isValid()) { + return nullptr; + } + + char16_t* dest = (char16_t*)malloc(allocLength.value()); + if (!dest) { + return nullptr; + } + + size_t written = ConvertUtf8toUtf16(aSource, Span(dest, lengthPlusOne)); + dest[written] = 0; + + if (aUTF16Count) { + *aUTF16Count = written; + } + + return dest; +} + +char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) { + char16_t* str = UTF8ToNewUnicode(aSource, aUTF16Count, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, + char16_t* aDest, uint32_t aLength) { + MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length()); + memcpy(aDest, aSource.BeginReading() + aSrcOffset, + size_t(aLength) * sizeof(char16_t)); + return aDest; +} + +void ToUpperCase(nsACString& aCString) { + char* cp = aCString.BeginWriting(); + char* end = cp + aCString.Length(); + while (cp != end) { + char ch = *cp; + if (ch >= 'a' && ch <= 'z') { + *cp = ch - ('a' - 'A'); + } + ++cp; + } +} + +void ToUpperCase(const nsACString& aSource, nsACString& aDest) { + aDest.SetLength(aSource.Length()); + const char* src = aSource.BeginReading(); + const char* end = src + aSource.Length(); + char* dst = aDest.BeginWriting(); + while (src != end) { + char ch = *src; + if (ch >= 'a' && ch <= 'z') { + *dst = ch - ('a' - 'A'); + } else { + *dst = ch; + } + ++src; + ++dst; + } +} + +void ToLowerCase(nsACString& aCString) { + char* cp = aCString.BeginWriting(); + char* end = cp + aCString.Length(); + while (cp != end) { + char ch = *cp; + if (ch >= 'A' && ch <= 'Z') { + *cp = ch + ('a' - 'A'); + } + ++cp; + } +} + +void ToLowerCase(const nsACString& aSource, nsACString& aDest) { + aDest.SetLength(aSource.Length()); + const char* src = aSource.BeginReading(); + const char* end = src + aSource.Length(); + char* dst = aDest.BeginWriting(); + while (src != end) { + char ch = *src; + if (ch >= 'A' && ch <= 'Z') { + *dst = ch + ('a' - 'A'); + } else { + *dst = ch; + } + ++src; + ++dst; + } +} + +void ParseString(const nsACString& aSource, char aDelimiter, + nsTArray<nsCString>& aArray) { + nsACString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + + for (;;) { + nsACString::const_iterator delimiter = start; + FindCharInReadable(aDelimiter, delimiter, end); + + if (delimiter != start) { + aArray.AppendElement(Substring(start, delimiter)); + } + + if (delimiter == end) { + break; + } + start = ++delimiter; + if (start == end) { + break; + } + } +} + +template <class StringT, class IteratorT> +bool FindInReadable_Impl( + const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, + nsTStringComparator<typename StringT::char_type> aCompare) { + bool found_it = false; + + // only bother searching at all if we're given a non-empty range to search + if (aSearchStart != aSearchEnd) { + IteratorT aPatternStart, aPatternEnd; + aPattern.BeginReading(aPatternStart); + aPattern.EndReading(aPatternEnd); + + // outer loop keeps searching till we find it or run out of string to search + while (!found_it) { + // fast inner loop (that's what it's called, not what it is) looks for a + // potential match + while (aSearchStart != aSearchEnd && + aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) { + ++aSearchStart; + } + + // if we broke out of the `fast' loop because we're out of string ... + // we're done: no match + if (aSearchStart == aSearchEnd) { + break; + } + + // otherwise, we're at a potential match, let's see if we really hit one + IteratorT testPattern(aPatternStart); + IteratorT testSearch(aSearchStart); + + // slow inner loop verifies the potential match (found by the `fast' loop) + // at the current position + for (;;) { + // we already compared the first character in the outer loop, + // so we'll advance before the next comparison + ++testPattern; + ++testSearch; + + // if we verified all the way to the end of the pattern, then we found + // it! + if (testPattern == aPatternEnd) { + found_it = true; + aSearchEnd = testSearch; // return the exact found range through the + // parameters + break; + } + + // if we got to end of the string we're searching before we hit the end + // of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchEnd) { + aSearchStart = aSearchEnd; + break; + } + + // else if we mismatched ... it's time to advance to the next search + // position + // and get back into the `fast' loop + if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) { + ++aSearchStart; + break; + } + } + } + } + + return found_it; +} + +/** + * This searches the entire string from right to left, and returns the first + * match found, if any. + */ +template <class StringT, class IteratorT> +bool RFindInReadable_Impl( + const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, + nsTStringComparator<typename StringT::char_type> aCompare) { + IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; + aPattern.BeginReading(patternStart); + aPattern.EndReading(patternEnd); + + // Point to the last character in the pattern + --patternEnd; + // outer loop keeps searching till we run out of string to search + while (aSearchStart != searchEnd) { + // Point to the end position of the next possible match + --searchEnd; + + // Check last character, if a match, explore further from here + if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) { + // We're at a potential match, let's see if we really hit one + IteratorT testPattern(patternEnd); + IteratorT testSearch(searchEnd); + + // inner loop verifies the potential match at the current position + do { + // if we verified all the way to the end of the pattern, then we found + // it! + if (testPattern == patternStart) { + aSearchStart = testSearch; // point to start of match + aSearchEnd = ++searchEnd; // point to end of match + return true; + } + + // if we got to end of the string we're searching before we hit the end + // of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchStart) { + aSearchStart = aSearchEnd; + return false; + } + + // test previous character for a match + --testPattern; + --testSearch; + } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0); + } + } + + aSearchStart = aSearchEnd; + return false; +} + +bool FindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + nsStringComparator aComparator) { + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool FindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + nsCStringComparator aComparator) { + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool CaseInsensitiveFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd) { + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, + nsCaseInsensitiveCStringComparator); +} + +bool RFindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + const nsStringComparator aComparator) { + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool RFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + const nsCStringComparator aComparator) { + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, + const nsAString::const_iterator& aSearchEnd) { + ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char16_t* charFoundAt = + nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, + const nsACString::const_iterator& aSearchEnd) { + ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char* charFoundAt = + nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator aComparator) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator aComparator) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator aComparator) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len) + .Equals(aSubstring, aComparator); +} + +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator aComparator) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len) + .Equals(aSubstring, aComparator); +} + +static const char16_t empty_buffer[1] = {'\0'}; + +const nsString& EmptyString() { + static const nsDependentString sEmpty(empty_buffer); + + return sEmpty; +} + +const nsCString& EmptyCString() { + static const nsDependentCString sEmpty((const char*)empty_buffer); + + return sEmpty; +} + +const nsString& VoidString() { + static const nsString sNull(mozilla::detail::StringDataFlags::VOIDED); + + return sNull; +} + +const nsCString& VoidCString() { + static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED); + + return sNull; +} + +int32_t CompareUTF8toUTF16(const nsACString& aUTF8String, + const nsAString& aUTF16String, bool* aErr) { + const char* u8; + const char* u8end; + aUTF8String.BeginReading(u8); + aUTF8String.EndReading(u8end); + + const char16_t* u16; + const char16_t* u16end; + aUTF16String.BeginReading(u16); + aUTF16String.EndReading(u16end); + + for (;;) { + if (u8 == u8end) { + if (u16 == u16end) { + return 0; + } + return -1; + } + if (u16 == u16end) { + return 1; + } + // No need for ASCII optimization, since both NextChar() + // calls get inlined. + uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr); + uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr); + if (scalar16 == scalar8) { + continue; + } + if (scalar8 < scalar16) { + return -1; + } + return 1; + } +} + +void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) { + NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); + if (IS_IN_BMP(aSource)) { + aDest.Append(char16_t(aSource)); + } else { + aDest.Append(H_SURROGATE(aSource)); + aDest.Append(L_SURROGATE(aSource)); + } +} diff --git a/xpcom/string/nsReadableUtils.h b/xpcom/string/nsReadableUtils.h new file mode 100644 index 0000000000..803c6b5d2f --- /dev/null +++ b/xpcom/string/nsReadableUtils.h @@ -0,0 +1,610 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsReadableUtils_h___ +#define nsReadableUtils_h___ + +/** + * I guess all the routines in this file are all mis-named. + * According to our conventions, they should be |NS_xxx|. + */ + +#include "mozilla/Assertions.h" +#include "nsAString.h" +#include "mozilla/TextUtils.h" + +#include "nsTArrayForwardDeclare.h" + +// From the nsstring crate +extern "C" { +bool nsstring_fallible_append_utf8_impl(nsAString* aThis, const char* aOther, + size_t aOtherLen, size_t aOldLen); + +bool nsstring_fallible_append_latin1_impl(nsAString* aThis, const char* aOther, + size_t aOtherLen, size_t aOldLen, + bool aAllowShrinking); + +bool nscstring_fallible_append_utf16_to_utf8_impl(nsACString* aThis, + const char16_t*, + size_t aOtherLen, + size_t aOldLen); + +bool nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString* aThis, + const char16_t*, + size_t aOtherLen, + size_t aOldLen, + bool aAllowShrinking); + +bool nscstring_fallible_append_utf8_to_latin1_lossy_check( + nsACString* aThis, const nsACString* aOther, size_t aOldLen); + +bool nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis, + const nsACString* aOther, + size_t aOldLen); +} + +inline size_t Distance(const nsReadingIterator<char16_t>& aStart, + const nsReadingIterator<char16_t>& aEnd) { + MOZ_ASSERT(aStart.get() <= aEnd.get()); + return static_cast<size_t>(aEnd.get() - aStart.get()); +} + +inline size_t Distance(const nsReadingIterator<char>& aStart, + const nsReadingIterator<char>& aEnd) { + MOZ_ASSERT(aStart.get() <= aEnd.get()); + return static_cast<size_t>(aEnd.get() - aStart.get()); +} + +// NOTE: Operations that don't need an operand to be an XPCOM string +// are in mozilla/TextUtils.h and mozilla/Utf8.h. + +// UTF-8 to UTF-16 +// Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER. + +[[nodiscard]] inline bool CopyUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(), + aSource.Length(), 0); +} + +inline void CopyUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!CopyUTF8toUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(), + aSource.Length(), aDest.Length()); +} + +inline void AppendUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// Latin1 to UTF-16 +// Interpret each incoming unsigned byte value as a Unicode scalar value (not +// windows-1252!). The function names say "ASCII" instead of "Latin1" for +// legacy reasons. + +[[nodiscard]] inline bool CopyASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_latin1_impl(&aDest, aSource.Elements(), + aSource.Length(), 0, true); +} + +inline void CopyASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_latin1_impl( + &aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false); +} + +inline void AppendASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// UTF-16 to UTF-8 +// Unpaired surrogates are replaced with the REPLACEMENT CHARACTER. + +[[nodiscard]] inline bool CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_utf8_impl( + &aDest, aSource.Elements(), aSource.Length(), 0); +} + +inline void CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendUTF16toUTF8( + mozilla::Span<const char16_t> aSource, nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_utf8_impl( + &aDest, aSource.Elements(), aSource.Length(), aDest.Length()); +} + +inline void AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// UTF-16 to Latin1 +// If all code points in the input are below U+0100, represents each scalar +// value as an unsigned byte. (This is not windows-1252!) If there are code +// points above U+00FF, memory-safely produces garbage and will likely start +// asserting in future debug builds. The nature of the garbage may differ +// based on CPU architecture and must not be relied upon. The names say +// "ASCII" instead of "Latin1" for legacy reasons. + +[[nodiscard]] inline bool LossyCopyUTF16toASCII( + mozilla::Span<const char16_t> aSource, nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_latin1_lossy_impl( + &aDest, aSource.Elements(), aSource.Length(), 0, true); +} + +inline void LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!LossyCopyUTF16toASCII(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool LossyAppendUTF16toASCII( + mozilla::Span<const char16_t> aSource, nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_latin1_lossy_impl( + &aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false); +} + +inline void LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY( + !LossyAppendUTF16toASCII(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// Latin1 to UTF-8 +// Interpret each incoming unsigned byte value as a Unicode scalar value (not +// windows-1252!). +// If the input is ASCII, the heap-allocated nsStringBuffer is shared if +// possible. + +[[nodiscard]] inline bool CopyLatin1toUTF8(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource, 0); +} + +inline void CopyLatin1toUTF8(const nsACString& aSource, nsACString& aDest) { + if (MOZ_UNLIKELY(!CopyLatin1toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendLatin1toUTF8(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource, + aDest.Length()); +} + +inline void AppendLatin1toUTF8(const nsACString& aSource, nsACString& aDest) { + if (MOZ_UNLIKELY(!AppendLatin1toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// UTF-8 to Latin1 +// If all code points in the input are below U+0100, represents each scalar +// value as an unsigned byte. (This is not windows-1252!) If there are code +// points above U+00FF, memory-safely produces garbage in release builds and +// asserts in debug builds. The nature of the garbage may differ +// based on CPU architecture and must not be relied upon. +// If the input is ASCII, the heap-allocated nsStringBuffer is shared if +// possible. + +[[nodiscard]] inline bool LossyCopyUTF8toLatin1(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource, + 0); +} + +inline void LossyCopyUTF8toLatin1(const nsACString& aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!LossyCopyUTF8toLatin1(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool LossyAppendUTF8toLatin1(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource, + aDest.Length()); +} + +inline void LossyAppendUTF8toLatin1(const nsACString& aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY( + !LossyAppendUTF8toLatin1(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. + * Performs a conversion with LossyConvertUTF16toLatin1() writing into the + * newly-allocated buffer. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a 16-bit wide string + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewCString(const nsAString& aSource); + +/* A fallible version of ToNewCString. Returns nullptr on failure. */ +char* ToNewCString(const nsAString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource an 8-bit wide string + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewCString(const nsACString& aSource); + +/* A fallible version of ToNewCString. Returns nullptr on failure. */ +char* ToNewCString(const nsACString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. + * Performs an encoding conversion from a UTF-16 string to a UTF-8 string with + * unpaired surrogates replaced with the REPLACEMENT CHARACTER copying + * |aSource| to your new buffer. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a UTF-16 string (made of char16_t's) + * @param aUTF8Count the number of 8-bit units that was returned + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr); + +/* A fallible version of ToNewUTF8String. Returns nullptr on failure. */ +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Infallibly allocates and returns a new |char16_t| buffer which you must + * free with |free|. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a UTF-16 string + * @return a new |char16_t| buffer you must free with |free|. + */ +char16_t* ToNewUnicode(const nsAString& aSource); + +/* A fallible version of ToNewUnicode. Returns nullptr on failure. */ +char16_t* ToNewUnicode(const nsAString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Infallibly allocates and returns a new |char16_t| buffer which you must + * free with|free|. + * + * Performs an encoding conversion by 0-padding 8-bit wide characters up to + * 16-bits wide (i.e. Latin1 to UTF-16 conversion) while copying |aSource| + * to your new buffer. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a Latin1 string + * @return a new |char16_t| buffer you must free with |free|. + */ +char16_t* ToNewUnicode(const nsACString& aSource); + +/* A fallible version of ToNewUnicode. Returns nullptr on failure. */ +char16_t* ToNewUnicode(const nsACString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. Performs an encoding conversion from UTF-8 to UTF-16 + * while copying |aSource| to your new buffer. Malformed byte sequences + * are replaced with the REPLACEMENT CHARACTER. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource an 8-bit wide string, UTF-8 encoded + * @param aUTF16Count the number of 16-bit units that was returned + * @return a new |char16_t| buffer you must free with |free|. + * (UTF-16 encoded) + */ +char16_t* UTF8ToNewUnicode(const nsACString& aSource, + uint32_t* aUTF16Count = nullptr); + +/* A fallible version of UTF8ToNewUnicode. Returns nullptr on failure. */ +char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count, + const mozilla::fallible_t& aFallible); + +/** + * Copies |aLength| 16-bit code units from the start of |aSource| to the + * |char16_t| buffer |aDest|. + * + * After this operation |aDest| is not null terminated. + * + * @param aSource a UTF-16 string + * @param aSrcOffset start offset in the source string + * @param aDest a |char16_t| buffer + * @param aLength the number of 16-bit code units to copy + * @return pointer to destination buffer - identical to |aDest| + */ +char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, + char16_t* aDest, uint32_t aLength); + +/** + * Replaces unpaired surrogates with U+FFFD in the argument. + * + * Copies a shared string buffer or an otherwise read-only + * buffer only if there are unpaired surrogates. + */ +[[nodiscard]] inline bool EnsureUTF16Validity(nsAString& aString) { + size_t upTo = mozilla::Utf16ValidUpTo(aString); + size_t len = aString.Length(); + if (upTo == len) { + return true; + } + char16_t* ptr = aString.BeginWriting(mozilla::fallible); + if (!ptr) { + return false; + } + auto span = mozilla::Span(ptr, len); + span[upTo] = 0xFFFD; + mozilla::EnsureUtf16ValiditySpan(span.From(upTo + 1)); + return true; +} + +void ParseString(const nsACString& aSource, char aDelimiter, + nsTArray<nsCString>& aArray); + +namespace mozilla::detail { + +constexpr auto kStringJoinAppendDefault = + [](auto& aResult, const auto& aValue) { aResult.Append(aValue); }; + +} // namespace mozilla::detail + +/** + * Join a sequence of items, each optionally transformed to a string, with a + * given separator, appending to a given string. + * + * \tparam CharType char or char16_t + * \tparam InputRange a range usable with range-based for + * \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and + * an item of InputRange which appends the latter to the former + */ +template < + typename CharType, typename InputRange, + typename Func = const decltype(mozilla::detail::kStringJoinAppendDefault)&> +void StringJoinAppend( + nsTSubstring<CharType>& aOutput, + const nsTLiteralString<CharType>& aSeparator, const InputRange& aInputRange, + Func&& aFunc = mozilla::detail::kStringJoinAppendDefault) { + bool first = true; + for (const auto& item : aInputRange) { + if (first) { + first = false; + } else { + aOutput.Append(aSeparator); + } + + aFunc(aOutput, item); + } +} + +/** + * Join a sequence of items, each optionally transformed to a string, with a + * given separator, returning a new string. + * + * \tparam CharType char or char16_t + * \tparam InputRange a range usable with range-based for + * \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and + * an item of InputRange which appends the latter to the former + + */ +template < + typename CharType, typename InputRange, + typename Func = const decltype(mozilla::detail::kStringJoinAppendDefault)&> +auto StringJoin(const nsTLiteralString<CharType>& aSeparator, + const InputRange& aInputRange, + Func&& aFunc = mozilla::detail::kStringJoinAppendDefault) { + nsTAutoString<CharType> res; + StringJoinAppend(res, aSeparator, aInputRange, std::forward<Func>(aFunc)); + return res; +} + +/** + * Converts case in place in the argument string. + */ +void ToUpperCase(nsACString&); + +void ToLowerCase(nsACString&); + +void ToUpperCase(nsACString&); + +void ToLowerCase(nsACString&); + +/** + * Converts case from string aSource to aDest. + */ +void ToUpperCase(const nsACString& aSource, nsACString& aDest); + +void ToLowerCase(const nsACString& aSource, nsACString& aDest); + +/** + * Finds the leftmost occurrence of |aPattern|, if any in the range + * |aSearchStart|..|aSearchEnd|. + * + * Returns |true| if a match was found, and adjusts |aSearchStart| and + * |aSearchEnd| to point to the match. If no match was found, returns |false| + * and makes |aSearchStart == aSearchEnd|. + * + * Currently, this is equivalent to the O(m*n) implementation previously on + * |ns[C]String|. + * + * If we need something faster, then we can implement that later. + */ + +bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&, + nsAString::const_iterator&, + nsStringComparator = nsTDefaultStringComparator); +bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&, + nsACString::const_iterator&, + nsCStringComparator = nsTDefaultStringComparator); + +/* sometimes we don't care about where the string was, just that we + * found it or not */ +inline bool FindInReadable( + const nsAString& aPattern, const nsAString& aSource, + nsStringComparator aCompare = nsTDefaultStringComparator) { + nsAString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + return FindInReadable(aPattern, start, end, aCompare); +} + +inline bool FindInReadable( + const nsACString& aPattern, const nsACString& aSource, + nsCStringComparator aCompare = nsTDefaultStringComparator) { + nsACString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + return FindInReadable(aPattern, start, end, aCompare); +} + +bool CaseInsensitiveFindInReadable(const nsACString& aPattern, + nsACString::const_iterator&, + nsACString::const_iterator&); + +/** + * Finds the rightmost occurrence of |aPattern| + * Returns |true| if a match was found, and adjusts |aSearchStart| and + * |aSearchEnd| to point to the match. If no match was found, returns |false| + * and makes |aSearchStart == aSearchEnd|. + */ +bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&, + nsAString::const_iterator&, + nsStringComparator = nsTDefaultStringComparator); +bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&, + nsACString::const_iterator&, + nsCStringComparator = nsTDefaultStringComparator); + +/** + * Finds the leftmost occurrence of |aChar|, if any in the range + * |aSearchStart|..|aSearchEnd|. + * + * Returns |true| if a match was found, and adjusts |aSearchStart| to + * point to the match. If no match was found, returns |false| and + * makes |aSearchStart == aSearchEnd|. + */ +bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, + const nsAString::const_iterator& aSearchEnd); +bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, + const nsACString::const_iterator& aSearchEnd); + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring); +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator); +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring); +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator); +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring); +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator); +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring); +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator); + +const nsString& EmptyString(); +const nsCString& EmptyCString(); + +const nsString& VoidString(); +const nsCString& VoidCString(); + +/** + * Compare a UTF-8 string to an UTF-16 string. + * + * Returns 0 if the strings are equal, -1 if aUTF8String is less + * than aUTF16Count, and 1 in the reverse case. Errors are replaced + * with U+FFFD and then the U+FFFD is compared as if it had occurred + * in the input. If aErr is not nullptr, *aErr is set to true if + * either string had malformed sequences. + */ +int32_t CompareUTF8toUTF16(const nsACString& aUTF8String, + const nsAString& aUTF16String, bool* aErr = nullptr); + +void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest); + +#endif // !defined(nsReadableUtils_h___) diff --git a/xpcom/string/nsString.h b/xpcom/string/nsString.h new file mode 100644 index 0000000000..e86ea594ac --- /dev/null +++ b/xpcom/string/nsString.h @@ -0,0 +1,171 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsString_h___ +#define nsString_h___ + +#include <ostream> + +#include "mozilla/Attributes.h" + +#include "nsStringFwd.h" + +#include "nsAString.h" +#include "nsDependentSubstring.h" +#include "nsReadableUtils.h" + +#include "nsTString.h" + +static_assert(sizeof(char16_t) == 2, "size of char16_t must be 2"); +static_assert(sizeof(nsString::char_type) == 2, + "size of nsString::char_type must be 2"); +static_assert(nsString::char_type(-1) > nsString::char_type(0), + "nsString::char_type must be unsigned"); +static_assert(sizeof(nsCString::char_type) == 1, + "size of nsCString::char_type must be 1"); + +static_assert(sizeof(nsTLiteralString<char>) == sizeof(nsTString<char>), + "nsLiteralCString can masquerade as nsCString, " + "so they must have identical layout"); + +static_assert(sizeof(nsTLiteralString<char16_t>) == sizeof(nsTString<char16_t>), + "nsTLiteralString can masquerade as nsString, " + "so they must have identical layout"); + +/** + * A helper class that converts a UTF-16 string to ASCII in a lossy manner + */ +class NS_LossyConvertUTF16toASCII : public nsAutoCString { + public: + explicit NS_LossyConvertUTF16toASCII(const char16ptr_t aString) { + LossyAppendUTF16toASCII(mozilla::MakeStringSpan(aString), *this); + } + + NS_LossyConvertUTF16toASCII(const char16ptr_t aString, size_t aLength) { + LossyAppendUTF16toASCII( + Substring(static_cast<const char16_t*>(aString), aLength), *this); + } + + explicit NS_LossyConvertUTF16toASCII(const nsAString& aString) { + LossyAppendUTF16toASCII(aString, *this); + } + + private: + // NOT TO BE IMPLEMENTED + NS_LossyConvertUTF16toASCII(char) = delete; +}; + +class NS_ConvertASCIItoUTF16 : public nsAutoString { + public: + explicit NS_ConvertASCIItoUTF16(const char* aCString) { + AppendASCIItoUTF16(mozilla::MakeStringSpan(aCString), *this); + } + + NS_ConvertASCIItoUTF16(const char* aCString, size_t aLength) { + AppendASCIItoUTF16(Substring(aCString, aLength), *this); + } + + explicit NS_ConvertASCIItoUTF16(const nsACString& aCString) { + AppendASCIItoUTF16(aCString, *this); + } + + explicit NS_ConvertASCIItoUTF16(mozilla::Span<const char> aCString) { + AppendASCIItoUTF16(aCString, *this); + } + + private: + // NOT TO BE IMPLEMENTED + NS_ConvertASCIItoUTF16(char16_t) = delete; +}; + +/** + * A helper class that converts a UTF-16 string to UTF-8 + */ +class NS_ConvertUTF16toUTF8 : public nsAutoCString { + public: + explicit NS_ConvertUTF16toUTF8(const char16ptr_t aString) { + AppendUTF16toUTF8(mozilla::MakeStringSpan(aString), *this); + } + + NS_ConvertUTF16toUTF8(const char16ptr_t aString, size_t aLength) { + AppendUTF16toUTF8(Substring(static_cast<const char16_t*>(aString), aLength), + *this); + } + + explicit NS_ConvertUTF16toUTF8(const nsAString& aString) { + AppendUTF16toUTF8(aString, *this); + } + + explicit NS_ConvertUTF16toUTF8(mozilla::Span<const char16_t> aString) { + AppendUTF16toUTF8(aString, *this); + } + + private: + // NOT TO BE IMPLEMENTED + NS_ConvertUTF16toUTF8(char) = delete; +}; + +class NS_ConvertUTF8toUTF16 : public nsAutoString { + public: + explicit NS_ConvertUTF8toUTF16(const char* aCString) { + AppendUTF8toUTF16(mozilla::MakeStringSpan(aCString), *this); + } + + NS_ConvertUTF8toUTF16(const char* aCString, size_t aLength) { + AppendUTF8toUTF16(Substring(aCString, aLength), *this); + } + + explicit NS_ConvertUTF8toUTF16(const nsACString& aCString) { + AppendUTF8toUTF16(aCString, *this); + } + + explicit NS_ConvertUTF8toUTF16(mozilla::Span<const char> aCString) { + AppendUTF8toUTF16(aCString, *this); + } + + private: + // NOT TO BE IMPLEMENTED + NS_ConvertUTF8toUTF16(char16_t) = delete; +}; + +/** + * Converts an integer (signed/unsigned, 32/64bit) to its decimal string + * representation and returns it as an nsAutoCString/nsAutoString. + */ +template <typename T, typename U> +nsTAutoString<T> IntToTString(const U aInt, const int aRadix = 10) { + nsTAutoString<T> string; + string.AppendInt(aInt, aRadix); + return string; +} + +template <typename U> +nsAutoCString IntToCString(const U aInt, const int aRadix = 10) { + return IntToTString<char>(aInt, aRadix); +} + +template <typename U> +nsAutoString IntToString(const U aInt, const int aRadix = 10) { + return IntToTString<char16_t>(aInt, aRadix); +} + +// MOZ_DBG support + +inline std::ostream& operator<<(std::ostream& aOut, const nsACString& aString) { + aOut.write(aString.Data(), aString.Length()); + return aOut; +} + +inline std::ostream& operator<<(std::ostream& aOut, const nsAString& aString) { + return aOut << NS_ConvertUTF16toUTF8(aString); +} + +// the following are included/declared for backwards compatibility +#include "nsDependentString.h" +#include "nsLiteralString.h" +#include "nsPromiseFlatString.h" + +#endif // !defined(nsString_h___) diff --git a/xpcom/string/nsStringBuffer.cpp b/xpcom/string/nsStringBuffer.cpp new file mode 100644 index 0000000000..f32b51d3a6 --- /dev/null +++ b/xpcom/string/nsStringBuffer.cpp @@ -0,0 +1,162 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsStringBuffer.h" + +#include "mozilla/MemoryReporting.h" +#include "nsISupportsImpl.h" +#include "nsString.h" + +#ifdef DEBUG +# include "nsStringStats.h" +#else +# define STRING_STAT_INCREMENT(_s) +#endif + +void nsStringBuffer::AddRef() { + // Memory synchronization is not required when incrementing a + // reference count. The first increment of a reference count on a + // thread is not important, since the first use of the object on a + // thread can happen before it. What is important is the transfer + // of the pointer to that thread, which may happen prior to the + // first increment on that thread. The necessary memory + // synchronization is done by the mechanism that transfers the + // pointer between threads. +#ifdef NS_BUILD_REFCNT_LOGGING + uint32_t count = +#endif + mRefCount.fetch_add(1, std::memory_order_relaxed) +#ifdef NS_BUILD_REFCNT_LOGGING + + 1 +#endif + ; + STRING_STAT_INCREMENT(Share); + NS_LOG_ADDREF(this, count, "nsStringBuffer", sizeof(*this)); +} + +void nsStringBuffer::Release() { + // Since this may be the last release on this thread, we need + // release semantics so that prior writes on this thread are visible + // to the thread that destroys the object when it reads mValue with + // acquire semantics. + uint32_t count = mRefCount.fetch_sub(1, std::memory_order_release) - 1; + NS_LOG_RELEASE(this, count, "nsStringBuffer"); + if (count == 0) { + // We're going to destroy the object on this thread, so we need + // acquire semantics to synchronize with the memory released by + // the last release on other threads, that is, to ensure that + // writes prior to that release are now visible on this thread. + count = mRefCount.load(std::memory_order_acquire); + + STRING_STAT_INCREMENT(Free); + free(this); // we were allocated with |malloc| + } +} + +/** + * Alloc returns a pointer to a new string header with set capacity. + */ +already_AddRefed<nsStringBuffer> nsStringBuffer::Alloc(size_t aSize) { + NS_ASSERTION(aSize != 0, "zero capacity allocation not allowed"); + NS_ASSERTION(sizeof(nsStringBuffer) + aSize <= size_t(uint32_t(-1)) && + sizeof(nsStringBuffer) + aSize > aSize, + "mStorageSize will truncate"); + + nsStringBuffer* hdr = (nsStringBuffer*)malloc(sizeof(nsStringBuffer) + aSize); + if (hdr) { + STRING_STAT_INCREMENT(Alloc); + + hdr->mRefCount = 1; + hdr->mStorageSize = aSize; + NS_LOG_ADDREF(hdr, 1, "nsStringBuffer", sizeof(*hdr)); + } + return already_AddRefed(hdr); +} + +nsStringBuffer* nsStringBuffer::Realloc(nsStringBuffer* aHdr, size_t aSize) { + STRING_STAT_INCREMENT(Realloc); + + NS_ASSERTION(aSize != 0, "zero capacity allocation not allowed"); + NS_ASSERTION(sizeof(nsStringBuffer) + aSize <= size_t(uint32_t(-1)) && + sizeof(nsStringBuffer) + aSize > aSize, + "mStorageSize will truncate"); + + // no point in trying to save ourselves if we hit this assertion + NS_ASSERTION(!aHdr->IsReadonly(), "|Realloc| attempted on readonly string"); + + // Treat this as a release and addref for refcounting purposes, since we + // just asserted that the refcount is 1. If we don't do that, refcount + // logging will claim we've leaked all sorts of stuff. + NS_LOG_RELEASE(aHdr, 0, "nsStringBuffer"); + + aHdr = (nsStringBuffer*)realloc(aHdr, sizeof(nsStringBuffer) + aSize); + if (aHdr) { + NS_LOG_ADDREF(aHdr, 1, "nsStringBuffer", sizeof(*aHdr)); + aHdr->mStorageSize = aSize; + } + + return aHdr; +} + +nsStringBuffer* nsStringBuffer::FromString(const nsAString& aStr) { + if (!(aStr.mDataFlags & nsAString::DataFlags::REFCOUNTED)) { + return nullptr; + } + + return FromData(aStr.mData); +} + +nsStringBuffer* nsStringBuffer::FromString(const nsACString& aStr) { + if (!(aStr.mDataFlags & nsACString::DataFlags::REFCOUNTED)) { + return nullptr; + } + + return FromData(aStr.mData); +} + +void nsStringBuffer::ToString(uint32_t aLen, nsAString& aStr, + bool aMoveOwnership) { + char16_t* data = static_cast<char16_t*>(Data()); + + MOZ_DIAGNOSTIC_ASSERT(data[aLen] == char16_t(0), + "data should be null terminated"); + + nsAString::DataFlags flags = + nsAString::DataFlags::REFCOUNTED | nsAString::DataFlags::TERMINATED; + + if (!aMoveOwnership) { + AddRef(); + } + aStr.Finalize(); + aStr.SetData(data, aLen, flags); +} + +void nsStringBuffer::ToString(uint32_t aLen, nsACString& aStr, + bool aMoveOwnership) { + char* data = static_cast<char*>(Data()); + + MOZ_DIAGNOSTIC_ASSERT(data[aLen] == char(0), + "data should be null terminated"); + + nsACString::DataFlags flags = + nsACString::DataFlags::REFCOUNTED | nsACString::DataFlags::TERMINATED; + + if (!aMoveOwnership) { + AddRef(); + } + aStr.Finalize(); + aStr.SetData(data, aLen, flags); +} + +size_t nsStringBuffer::SizeOfIncludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const { + return IsReadonly() ? 0 : aMallocSizeOf(this); +} + +size_t nsStringBuffer::SizeOfIncludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this); +} diff --git a/xpcom/string/nsStringBuffer.h b/xpcom/string/nsStringBuffer.h new file mode 100644 index 0000000000..3c92959932 --- /dev/null +++ b/xpcom/string/nsStringBuffer.h @@ -0,0 +1,184 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsStringBuffer_h__ +#define nsStringBuffer_h__ + +#include <atomic> +#include "mozilla/MemoryReporting.h" +#include "nsStringFwd.h" + +template <class T> +struct already_AddRefed; + +/** + * This structure precedes the string buffers "we" allocate. It may be the + * case that nsTAString::mData does not point to one of these special + * buffers. The mDataFlags member variable distinguishes the buffer type. + * + * When this header is in use, it enables reference counting, and capacity + * tracking. NOTE: A string buffer can be modified only if its reference + * count is 1. + */ +class nsStringBuffer { + private: + friend class CheckStaticAtomSizes; + + std::atomic<uint32_t> mRefCount; + uint32_t mStorageSize; + + public: + /** + * Allocates a new string buffer, with given size in bytes and a + * reference count of one. When the string buffer is no longer needed, + * it should be released via Release. + * + * It is up to the caller to set the bytes corresponding to the string + * buffer by calling the Data method to fetch the raw data pointer. Care + * must be taken to properly null terminate the character array. The + * storage size can be greater than the length of the actual string + * (i.e., it is not required that the null terminator appear in the last + * storage unit of the string buffer's data). + * + * @return new string buffer or null if out of memory. + */ + static already_AddRefed<nsStringBuffer> Alloc(size_t aStorageSize); + + /** + * Resizes the given string buffer to the specified storage size. This + * method must not be called on a readonly string buffer. Use this API + * carefully!! + * + * This method behaves like the ANSI-C realloc function. (i.e., If the + * allocation fails, null will be returned and the given string buffer + * will remain unmodified.) + * + * @see IsReadonly + */ + static nsStringBuffer* Realloc(nsStringBuffer* aBuf, size_t aStorageSize); + + /** + * Increment the reference count on this string buffer. + */ + void NS_FASTCALL AddRef(); + + /** + * Decrement the reference count on this string buffer. The string + * buffer will be destroyed when its reference count reaches zero. + */ + void NS_FASTCALL Release(); + + /** + * This method returns the string buffer corresponding to the given data + * pointer. The data pointer must have been returned previously by a + * call to the nsStringBuffer::Data method. + */ + static nsStringBuffer* FromData(void* aData) { + return reinterpret_cast<nsStringBuffer*>(aData) - 1; + } + + /** + * This method returns the data pointer for this string buffer. + */ + void* Data() const { + return const_cast<char*>(reinterpret_cast<const char*>(this + 1)); + } + + /** + * This function returns the storage size of a string buffer in bytes. + * This value is the same value that was originally passed to Alloc (or + * Realloc). + */ + uint32_t StorageSize() const { return mStorageSize; } + + /** + * If this method returns false, then the caller can be sure that their + * reference to the string buffer is the only reference to the string + * buffer, and therefore it has exclusive access to the string buffer and + * associated data. However, if this function returns true, then other + * consumers may rely on the data in this buffer being immutable and + * other threads may access this buffer simultaneously. + */ + bool IsReadonly() const { + // This doesn't lead to the destruction of the buffer, so we don't + // need to perform acquire memory synchronization for the normal + // reason that a reference count needs acquire synchronization + // (ensuring that all writes to the object made on other threads are + // visible to the thread destroying the object). + // + // We then need to consider the possibility that there were prior + // writes to the buffer on a different thread: one that has either + // since released its reference count, or one that also has access + // to this buffer through the same reference. There are two ways + // for that to happen: either the buffer pointer or a data structure + // (e.g., string object) pointing to the buffer was transferred from + // one thread to another, or the data structure pointing to the + // buffer was already visible on both threads. In the first case + // (transfer), the transfer of data from one thread to another would + // have handled the memory synchronization. In the latter case + // (data structure visible on both threads), the caller needed some + // sort of higher level memory synchronization to protect against + // the string object being mutated at the same time on multiple + // threads. + + // See bug 1603504. TSan might complain about a race when using + // memory_order_relaxed, so use memory_order_acquire for making TSan + // happy. +#if defined(MOZ_TSAN) + return mRefCount.load(std::memory_order_acquire) > 1; +#else + return mRefCount.load(std::memory_order_relaxed) > 1; +#endif + } + + /** + * The FromString methods return a string buffer for the given string + * object or null if the string object does not have a string buffer. + * The reference count of the string buffer is NOT incremented by these + * methods. If the caller wishes to hold onto the returned value, then + * the returned string buffer must have its reference count incremented + * via a call to the AddRef method. + */ + static nsStringBuffer* FromString(const nsAString& aStr); + static nsStringBuffer* FromString(const nsACString& aStr); + + /** + * The ToString methods assign this string buffer to a given string + * object. If the string object does not support sharable string + * buffers, then its value will be set to a copy of the given string + * buffer. Otherwise, these methods increment the reference count of the + * given string buffer. It is important to specify the length (in + * storage units) of the string contained in the string buffer since the + * length of the string may be less than its storage size. The string + * must have a null terminator at the offset specified by |len|. + * + * NOTE: storage size is measured in bytes even for wide strings; + * however, string length is always measured in storage units + * (2-byte units for wide strings). + */ + void ToString(uint32_t aLen, nsAString& aStr, bool aMoveOwnership = false); + void ToString(uint32_t aLen, nsACString& aStr, bool aMoveOwnership = false); + + /** + * This measures the size only if the StringBuffer is unshared. + */ + size_t SizeOfIncludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const; + + /** + * This measures the size regardless of whether the StringBuffer is + * unshared. + * + * WARNING: Only use this if you really know what you are doing, because + * it can easily lead to double-counting strings. If you do use them, + * please explain clearly in a comment why it's safe and won't lead to + * double-counting. + */ + size_t SizeOfIncludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const; +}; + +#endif /* !defined(nsStringBuffer_h__ */ diff --git a/xpcom/string/nsStringFlags.h b/xpcom/string/nsStringFlags.h new file mode 100644 index 0000000000..d0ba05c8db --- /dev/null +++ b/xpcom/string/nsStringFlags.h @@ -0,0 +1,95 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsStringFlags_h +#define nsStringFlags_h + +#include <stdint.h> +#include "mozilla/TypedEnumBits.h" + +namespace mozilla { +namespace detail { +// NOTE: these flags are declared public _only_ for convenience inside +// the string implementation. And they are outside of the string +// class so that the type is the same for both narrow and wide +// strings. + +// bits for mDataFlags +enum class StringDataFlags : uint16_t { + // Some terminology: + // + // "dependent buffer" A dependent buffer is one that the string class + // does not own. The string class relies on some + // external code to ensure the lifetime of the + // dependent buffer. + // + // "refcounted buffer" A refcounted buffer is one that the string class + // allocates. When it allocates a refcounted string + // buffer, it allocates some additional space at + // the beginning of the buffer for additional + // fields, including a reference count and a + // buffer length. See nsStringHeader. + // + // "adopted buffer" An adopted buffer is a raw string buffer + // allocated on the heap (using moz_xmalloc) + // of which the string class subsumes ownership. + // + // Some comments about the string data flags: + // + // REFCOUNTED, OWNED, and INLINE are all mutually exlusive. They + // indicate the allocation type of mData. If none of these flags + // are set, then the string buffer is dependent. + // + // REFCOUNTED, OWNED, or INLINE imply TERMINATED. This is because + // the string classes always allocate null-terminated buffers, and + // non-terminated substrings are always dependent. + // + // VOIDED implies TERMINATED, and moreover it implies that mData + // points to char_traits::sEmptyBuffer. Therefore, VOIDED is + // mutually exclusive with REFCOUNTED, OWNED, and INLINE. + // + // INLINE requires StringClassFlags::INLINE to be set on the type. + + // IsTerminated returns true + TERMINATED = 1 << 0, + + // IsVoid returns true + VOIDED = 1 << 1, + + // mData points to a heap-allocated, shareable, refcounted buffer + REFCOUNTED = 1 << 2, + + // mData points to a heap-allocated, raw buffer + OWNED = 1 << 3, + + // mData points to a writable, inline buffer + INLINE = 1 << 4, + + // mData points to a string literal; DataFlags::TERMINATED will also be set + LITERAL = 1 << 5, + + // used to check for invalid flags -- all bits above the last item + INVALID_MASK = (uint16_t) ~((LITERAL << 1) - 1) +}; + +// bits for mClassFlags +enum class StringClassFlags : uint16_t { + // |this|'s buffer is inline, and requires the type to be binary-compatible + // with nsTAutoStringN + INLINE = 1 << 0, + // |this| requires its buffer is null-terminated + NULL_TERMINATED = 1 << 1, + // used to check for invalid flags -- all bits above the last item + INVALID_MASK = (uint16_t) ~((NULL_TERMINATED << 1) - 1) +}; + +MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(StringDataFlags) +MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(StringClassFlags) + +} // namespace detail +} // namespace mozilla + +#endif diff --git a/xpcom/string/nsStringFwd.h b/xpcom/string/nsStringFwd.h new file mode 100644 index 0000000000..f737545163 --- /dev/null +++ b/xpcom/string/nsStringFwd.h @@ -0,0 +1,92 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* nsStringFwd.h --- forward declarations for string classes */ + +#ifndef nsStringFwd_h +#define nsStringFwd_h + +#include "nscore.h" + +static constexpr int32_t kNotFound = -1; + +namespace mozilla { +namespace detail { + +template <typename T> +class nsTStringRepr; + +using nsStringRepr = nsTStringRepr<char16_t>; +using nsCStringRepr = nsTStringRepr<char>; + +} // namespace detail +} // namespace mozilla + +static const size_t AutoStringDefaultStorageSize = 64; + +template <typename T> +class nsTSubstring; +template <typename T> +class nsTSubstringTuple; +template <typename T> +class nsTString; +template <typename T, size_t N> +class nsTAutoStringN; +template <typename T> +class nsTDependentString; +template <typename T> +class nsTDependentSubstring; +template <typename T> +class nsTPromiseFlatString; +template <typename T> +class nsTLiteralString; +template <typename T> +class nsTSubstringSplitter; + +template <typename T> +using nsTStringComparator = int (*)(const T*, const T*, size_t, size_t); + +// The default string comparator (case-sensitive comparision) +template <typename T> +int nsTDefaultStringComparator(const T*, const T*, size_t, size_t); + +// We define this version without a size param instead of providing a +// default value for N so that so there is a default typename that doesn't +// require angle brackets. +template <typename T> +using nsTAutoString = nsTAutoStringN<T, AutoStringDefaultStorageSize>; + +// Double-byte (char16_t) string types. + +using nsAString = nsTSubstring<char16_t>; +using nsSubstringTuple = nsTSubstringTuple<char16_t>; +using nsString = nsTString<char16_t>; +using nsAutoString = nsTAutoString<char16_t>; +template <size_t N> +using nsAutoStringN = nsTAutoStringN<char16_t, N>; +using nsDependentString = nsTDependentString<char16_t>; +using nsDependentSubstring = nsTDependentSubstring<char16_t>; +using nsPromiseFlatString = nsTPromiseFlatString<char16_t>; +using nsStringComparator = nsTStringComparator<char16_t>; +using nsLiteralString = nsTLiteralString<char16_t>; +using nsSubstringSplitter = nsTSubstringSplitter<char16_t>; + +// Single-byte (char) string types. + +using nsACString = nsTSubstring<char>; +using nsCSubstringTuple = nsTSubstringTuple<char>; +using nsCString = nsTString<char>; +using nsAutoCString = nsTAutoString<char>; +template <size_t N> +using nsAutoCStringN = nsTAutoStringN<char, N>; +using nsDependentCString = nsTDependentString<char>; +using nsDependentCSubstring = nsTDependentSubstring<char>; +using nsPromiseFlatCString = nsTPromiseFlatString<char>; +using nsCStringComparator = nsTStringComparator<char>; +using nsLiteralCString = nsTLiteralString<char>; +using nsCSubstringSplitter = nsTSubstringSplitter<char>; + +#endif /* !defined(nsStringFwd_h) */ diff --git a/xpcom/string/nsStringIterator.h b/xpcom/string/nsStringIterator.h new file mode 100644 index 0000000000..db14efdaca --- /dev/null +++ b/xpcom/string/nsStringIterator.h @@ -0,0 +1,117 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsStringIterator_h___ +#define nsStringIterator_h___ + +#include "nsCharTraits.h" +#include "nsAlgorithm.h" +#include "nsDebug.h" + +/** + * @see nsTAString + */ + +template <class CharT> +class nsReadingIterator { + public: + typedef nsReadingIterator<CharT> self_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef CharT value_type; + typedef const CharT* pointer; + typedef const CharT& reference; + + private: + friend class mozilla::detail::nsTStringRepr<CharT>; + + // unfortunately, the API for nsReadingIterator requires that the + // iterator know its start and end positions. this was needed when + // we supported multi-fragment strings, but now it is really just + // extra baggage. we should remove mStart and mEnd at some point. + + const CharT* mStart; + const CharT* mEnd; + const CharT* mPosition; + + public: + nsReadingIterator() : mStart(nullptr), mEnd(nullptr), mPosition(nullptr) {} + // clang-format off + // nsReadingIterator( const nsReadingIterator<CharT>& ); // auto-generated copy-constructor OK + // nsReadingIterator<CharT>& operator=( const nsReadingIterator<CharT>& ); // auto-generated copy-assignment operator OK + // clang-format on + + pointer get() const { return mPosition; } + + CharT operator*() const { return *get(); } + + self_type& operator++() { + ++mPosition; + return *this; + } + + self_type operator++(int) { + self_type result(*this); + ++mPosition; + return result; + } + + self_type& operator--() { + --mPosition; + return *this; + } + + self_type operator--(int) { + self_type result(*this); + --mPosition; + return result; + } + + self_type& advance(difference_type aN) { + if (aN > 0) { + difference_type step = XPCOM_MIN(aN, mEnd - mPosition); + + NS_ASSERTION( + step > 0, + "can't advance a reading iterator beyond the end of a string"); + + mPosition += step; + } else if (aN < 0) { + difference_type step = XPCOM_MAX(aN, -(mPosition - mStart)); + + NS_ASSERTION(step < 0, + "can't advance (backward) a reading iterator beyond the end " + "of a string"); + + mPosition += step; + } + return *this; + } + + // We return an unsigned type here (with corresponding assert) rather than + // the more usual difference_type because we want to make this class go + // away in favor of mozilla::RangedPtr. Since RangedPtr has the same + // requirement we are enforcing here, the transition ought to be much + // smoother. + size_type operator-(const self_type& aOther) const { + MOZ_ASSERT(mPosition >= aOther.mPosition); + return mPosition - aOther.mPosition; + } +}; + +template <class CharT> +inline bool operator==(const nsReadingIterator<CharT>& aLhs, + const nsReadingIterator<CharT>& aRhs) { + return aLhs.get() == aRhs.get(); +} + +template <class CharT> +inline bool operator!=(const nsReadingIterator<CharT>& aLhs, + const nsReadingIterator<CharT>& aRhs) { + return aLhs.get() != aRhs.get(); +} + +#endif /* !defined(nsStringIterator_h___) */ diff --git a/xpcom/string/nsStringStats.cpp b/xpcom/string/nsStringStats.cpp new file mode 100644 index 0000000000..7fc3d82ad5 --- /dev/null +++ b/xpcom/string/nsStringStats.cpp @@ -0,0 +1,66 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsStringStats.h" + +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/MemoryReporting.h" +#include "nsString.h" + +#include <stdint.h> +#include <stdio.h> + +#ifdef XP_WIN +# include <windows.h> +# include <process.h> +#else +# include <unistd.h> +# include <pthread.h> +#endif + +nsStringStats gStringStats; + +nsStringStats::~nsStringStats() { + // this is a hack to suppress duplicate string stats printing + // in seamonkey as a result of the string code being linked + // into seamonkey and libxpcom! :-( + if (!mAllocCount && !mAdoptCount) { + return; + } + + // Only print the stats if we detect a leak. + if (mAllocCount <= mFreeCount && mAdoptCount <= mAdoptFreeCount) { + return; + } + + printf("nsStringStats\n"); + printf(" => mAllocCount: % 10d\n", int(mAllocCount)); + printf(" => mReallocCount: % 10d\n", int(mReallocCount)); + printf(" => mFreeCount: % 10d", int(mFreeCount)); + if (mAllocCount > mFreeCount) { + printf(" -- LEAKED %d !!!\n", mAllocCount - mFreeCount); + } else { + printf("\n"); + } + printf(" => mShareCount: % 10d\n", int(mShareCount)); + printf(" => mAdoptCount: % 10d\n", int(mAdoptCount)); + printf(" => mAdoptFreeCount: % 10d", int(mAdoptFreeCount)); + if (mAdoptCount > mAdoptFreeCount) { + printf(" -- LEAKED %d !!!\n", mAdoptCount - mAdoptFreeCount); + } else { + printf("\n"); + } + +#ifdef XP_WIN + auto pid = uintptr_t(_getpid()); + auto tid = uintptr_t(GetCurrentThreadId()); +#else + auto pid = uintptr_t(getpid()); + auto tid = uintptr_t(pthread_self()); +#endif + + printf(" => Process ID: %" PRIuPTR ", Thread ID: %" PRIuPTR "\n", pid, tid); +} diff --git a/xpcom/string/nsStringStats.h b/xpcom/string/nsStringStats.h new file mode 100644 index 0000000000..a38304c2b7 --- /dev/null +++ b/xpcom/string/nsStringStats.h @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsStringStats_h +#define nsStringStats_h + +#include "mozilla/Atomics.h" + +class nsStringStats { + public: + nsStringStats() = default; + + ~nsStringStats(); + + using AtomicInt = mozilla::Atomic<int32_t, mozilla::SequentiallyConsistent>; + + AtomicInt mAllocCount{0}; + AtomicInt mReallocCount{0}; + AtomicInt mFreeCount{0}; + AtomicInt mShareCount{0}; + AtomicInt mAdoptCount{0}; + AtomicInt mAdoptFreeCount{0}; +}; + +extern nsStringStats gStringStats; + +#define STRING_STAT_INCREMENT(_s) (gStringStats.m##_s##Count)++ + +#endif // nsStringStats_h diff --git a/xpcom/string/nsTDependentString.cpp b/xpcom/string/nsTDependentString.cpp new file mode 100644 index 0000000000..83cfa39687 --- /dev/null +++ b/xpcom/string/nsTDependentString.cpp @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTDependentString.h" + +template <typename T> +nsTDependentString<T>::nsTDependentString(const char_type* aStart, + const char_type* aEnd) + : string_type(const_cast<char_type*>(aStart), aEnd - aStart, + DataFlags::TERMINATED, ClassFlags(0)) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); + this->AssertValidDependentString(); +} + +template <typename T> +void nsTDependentString<T>::Rebind(const string_type& str, + index_type startPos) { + MOZ_ASSERT(str.GetDataFlags() & DataFlags::TERMINATED, + "Unterminated flat string"); + + // If we currently own a buffer, release it. + this->Finalize(); + + size_type strLength = str.Length(); + + if (startPos > strLength) { + startPos = strLength; + } + + char_type* newData = + const_cast<char_type*>(static_cast<const char_type*>(str.Data())) + + startPos; + size_type newLen = strLength - startPos; + DataFlags newDataFlags = + str.GetDataFlags() & (DataFlags::TERMINATED | DataFlags::LITERAL); + this->SetData(newData, newLen, newDataFlags); +} + +template <typename T> +void nsTDependentString<T>::Rebind(const char_type* aStart, + const char_type* aEnd) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); + this->Rebind(aStart, aEnd - aStart); +} + +template class nsTDependentString<char>; +template class nsTDependentString<char16_t>; diff --git a/xpcom/string/nsTDependentString.h b/xpcom/string/nsTDependentString.h new file mode 100644 index 0000000000..c7194a677f --- /dev/null +++ b/xpcom/string/nsTDependentString.h @@ -0,0 +1,126 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTDependentString_h +#define nsTDependentString_h + +#include "nsTString.h" + +/** + * nsTDependentString + * + * Stores a null-terminated, immutable sequence of characters. + * + * Subclass of nsTString that restricts string value to an immutable + * character sequence. This class does not own its data, so the creator + * of objects of this type must take care to ensure that a + * nsTDependentString continues to reference valid memory for the + * duration of its use. + */ +template <typename T> +class nsTDependentString : public nsTString<T> { + public: + typedef nsTDependentString<T> self_type; + typedef nsTString<T> base_string_type; + typedef typename base_string_type::string_type string_type; + + typedef typename base_string_type::fallible_t fallible_t; + + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::char_traits char_traits; + typedef + typename base_string_type::incompatible_char_type incompatible_char_type; + + typedef typename base_string_type::substring_tuple_type substring_tuple_type; + + typedef typename base_string_type::const_iterator const_iterator; + typedef typename base_string_type::iterator iterator; + + typedef typename base_string_type::comparator_type comparator_type; + + typedef typename base_string_type::const_char_iterator const_char_iterator; + + typedef typename base_string_type::string_view string_view; + + typedef typename base_string_type::index_type index_type; + typedef typename base_string_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + + public: + /** + * constructors + */ + + nsTDependentString(const char_type* aStart, const char_type* aEnd); + + nsTDependentString(const char_type* aData, size_type aLength) + : string_type(const_cast<char_type*>(aData), aLength, + DataFlags::TERMINATED, ClassFlags(0)) { + this->AssertValidDependentString(); + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + nsTDependentString(char16ptr_t aData, size_type aLength) + : nsTDependentString(static_cast<const char16_t*>(aData), aLength) {} +#endif + + explicit nsTDependentString(const char_type* aData) + : string_type(const_cast<char_type*>(aData), char_traits::length(aData), + DataFlags::TERMINATED, ClassFlags(0)) { + string_type::AssertValidDependentString(); + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + explicit nsTDependentString(char16ptr_t aData) + : nsTDependentString(static_cast<const char16_t*>(aData)) {} +#endif + + nsTDependentString(const string_type& aStr, index_type aStartPos) + : string_type() { + Rebind(aStr, aStartPos); + } + + // Create a nsTDependentSubstring to be bound later + nsTDependentString() : string_type() {} + + // auto-generated destructor OK + + nsTDependentString(self_type&& aStr) : string_type() { + Rebind(aStr, /* aStartPos = */ 0); + aStr.SetToEmptyBuffer(); + } + + explicit nsTDependentString(const self_type& aStr) : string_type() { + Rebind(aStr, /* aStartPos = */ 0); + } + + /** + * allow this class to be bound to a different string... + */ + + using nsTString<T>::Rebind; + void Rebind(const char_type* aData) { + Rebind(aData, char_traits::length(aData)); + } + + void Rebind(const char_type* aStart, const char_type* aEnd); + void Rebind(const string_type&, index_type aStartPos); + + private: + // NOT USED + nsTDependentString(const substring_tuple_type&) = delete; + self_type& operator=(const self_type& aStr) = delete; +}; + +extern template class nsTDependentString<char>; +extern template class nsTDependentString<char16_t>; + +#endif diff --git a/xpcom/string/nsTDependentSubstring.cpp b/xpcom/string/nsTDependentSubstring.cpp new file mode 100644 index 0000000000..ba1620f98b --- /dev/null +++ b/xpcom/string/nsTDependentSubstring.cpp @@ -0,0 +1,106 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// FIXME: Due to an include cycle, we need to include `nsTSubstring` first. +#include "nsTSubstring.h" +#include "nsTDependentSubstring.h" + +template <typename T> +void nsTDependentSubstring<T>::Rebind(const substring_type& str, + size_type startPos, size_type length) { + // If we currently own a buffer, release it. + this->Finalize(); + + size_type strLength = str.Length(); + + if (startPos > strLength) { + startPos = strLength; + } + + char_type* newData = + const_cast<char_type*>(static_cast<const char_type*>(str.Data())) + + startPos; + size_type newLength = XPCOM_MIN(length, strLength - startPos); + DataFlags newDataFlags = DataFlags(0); + this->SetData(newData, newLength, newDataFlags); +} + +template <typename T> +void nsTDependentSubstring<T>::Rebind(const char_type* data, size_type length) { + NS_ASSERTION(data, "nsTDependentSubstring must wrap a non-NULL buffer"); + + // If we currently own a buffer, release it. + this->Finalize(); + + char_type* newData = + const_cast<char_type*>(static_cast<const char_type*>(data)); + size_type newLength = length; + DataFlags newDataFlags = DataFlags(0); + this->SetData(newData, newLength, newDataFlags); +} + +template <typename T> +void nsTDependentSubstring<T>::Rebind(const char_type* aStart, + const char_type* aEnd) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); + this->Rebind(aStart, size_type(aEnd - aStart)); +} + +template <typename T> +nsTDependentSubstring<T>::nsTDependentSubstring(const char_type* aStart, + const char_type* aEnd) + : substring_type(const_cast<char_type*>(aStart), aEnd - aStart, + DataFlags(0), ClassFlags(0)) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); +} + +#if defined(MOZ_USE_CHAR16_WRAPPER) +template <typename T> +template <typename Q, typename EnableIfChar16> +nsTDependentSubstring<T>::nsTDependentSubstring(char16ptr_t aStart, + char16ptr_t aEnd) + : substring_type(static_cast<const char16_t*>(aStart), + static_cast<const char16_t*>(aEnd)) { + MOZ_RELEASE_ASSERT(static_cast<const char16_t*>(aStart) <= + static_cast<const char16_t*>(aEnd), + "Overflow!"); +} +#endif + +template <typename T> +nsTDependentSubstring<T>::nsTDependentSubstring(const const_iterator& aStart, + const const_iterator& aEnd) + : substring_type(const_cast<char_type*>(aStart.get()), + aEnd.get() - aStart.get(), DataFlags(0), ClassFlags(0)) { + MOZ_RELEASE_ASSERT(aStart.get() <= aEnd.get(), "Overflow!"); +} + +template <typename T> +const nsTDependentSubstring<T> Substring(const T* aStart, const T* aEnd) { + MOZ_RELEASE_ASSERT(aStart <= aEnd, "Overflow!"); + return nsTDependentSubstring<T>(aStart, aEnd); +} + +template nsTDependentSubstring<char> const Substring<char>(char const*, + char const*); +template nsTDependentSubstring<char16_t> const Substring<char16_t>( + char16_t const*, char16_t const*); + +#if defined(MOZ_USE_CHAR16_WRAPPER) +const nsTDependentSubstring<char16_t> Substring(char16ptr_t aData, + size_t aLength) { + return nsTDependentSubstring<char16_t>(aData, aLength); +} + +const nsTDependentSubstring<char16_t> Substring(char16ptr_t aStart, + char16ptr_t aEnd) { + return Substring(static_cast<const char16_t*>(aStart), + static_cast<const char16_t*>(aEnd)); +} +#endif + +template class nsTDependentSubstring<char>; +template class nsTDependentSubstring<char16_t>; diff --git a/xpcom/string/nsTDependentSubstring.h b/xpcom/string/nsTDependentSubstring.h new file mode 100644 index 0000000000..b5198ff2b5 --- /dev/null +++ b/xpcom/string/nsTDependentSubstring.h @@ -0,0 +1,162 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsTDependentSubstring_h +#define nsTDependentSubstring_h + +#include "nsTSubstring.h" +#include "nsTLiteralString.h" +#include "mozilla/Span.h" + +/** + * nsTDependentSubstring_CharT + * + * A string class which wraps an external array of string characters. It + * is the client code's responsibility to ensure that the external buffer + * remains valid for a long as the string is alive. + * + * NAMES: + * nsDependentSubstring for wide characters + * nsDependentCSubstring for narrow characters + */ +template <typename T> +class nsTDependentSubstring : public nsTSubstring<T> { + public: + typedef nsTDependentSubstring<T> self_type; + typedef nsTSubstring<T> substring_type; + typedef typename substring_type::fallible_t fallible_t; + + typedef typename substring_type::char_type char_type; + typedef typename substring_type::char_traits char_traits; + typedef + typename substring_type::incompatible_char_type incompatible_char_type; + + typedef typename substring_type::substring_tuple_type substring_tuple_type; + + typedef typename substring_type::const_iterator const_iterator; + typedef typename substring_type::iterator iterator; + + typedef typename substring_type::comparator_type comparator_type; + + typedef typename substring_type::const_char_iterator const_char_iterator; + + typedef typename substring_type::string_view string_view; + + typedef typename substring_type::index_type index_type; + typedef typename substring_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename substring_type::DataFlags DataFlags; + typedef typename substring_type::ClassFlags ClassFlags; + + public: + void Rebind(const substring_type&, size_type aStartPos, + size_type aLength = size_type(-1)); + + void Rebind(const char_type* aData, size_type aLength); + + void Rebind(const char_type* aStart, const char_type* aEnd); + + nsTDependentSubstring(const substring_type& aStr, size_type aStartPos, + size_type aLength = size_type(-1)) + : substring_type() { + Rebind(aStr, aStartPos, aLength); + } + + nsTDependentSubstring(const char_type* aData, size_type aLength) + : substring_type(const_cast<char_type*>(aData), aLength, DataFlags(0), + ClassFlags(0)) {} + + explicit nsTDependentSubstring(mozilla::Span<const char_type> aData) + : nsTDependentSubstring(aData.Elements(), aData.Length()) {} + + nsTDependentSubstring(const char_type* aStart, const char_type* aEnd); + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + nsTDependentSubstring(char16ptr_t aData, size_type aLength) + : nsTDependentSubstring(static_cast<const char16_t*>(aData), aLength) {} + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + nsTDependentSubstring(char16ptr_t aStart, char16ptr_t aEnd); +#endif + + nsTDependentSubstring(const const_iterator& aStart, + const const_iterator& aEnd); + + // Create a nsTDependentSubstring to be bound later + nsTDependentSubstring() : substring_type() {} + + // auto-generated copy-constructor OK (XXX really?? what about base class + // copy-ctor?) + nsTDependentSubstring(const nsTDependentSubstring&) = default; + + private: + // NOT USED + void operator=(const self_type&) = + delete; // we're immutable, you can't assign into a substring +}; + +extern template class nsTDependentSubstring<char>; +extern template class nsTDependentSubstring<char16_t>; + +template <typename T> +inline const nsTDependentSubstring<T> Substring(const nsTSubstring<T>& aStr, + size_t aStartPos, + size_t aLength = size_t(-1)) { + return nsTDependentSubstring<T>(aStr, aStartPos, aLength); +} + +template <typename T> +inline const nsTDependentSubstring<T> Substring(const nsTLiteralString<T>& aStr, + size_t aStartPos, + size_t aLength = size_t(-1)) { + return nsTDependentSubstring<T>(aStr, aStartPos, aLength); +} + +template <typename T> +inline const nsTDependentSubstring<T> Substring( + const nsReadingIterator<T>& aStart, const nsReadingIterator<T>& aEnd) { + return nsTDependentSubstring<T>(aStart.get(), aEnd.get()); +} + +template <typename T> +inline const nsTDependentSubstring<T> Substring(const T* aData, + size_t aLength) { + return nsTDependentSubstring<T>(aData, aLength); +} + +template <typename T> +const nsTDependentSubstring<T> Substring(const T* aStart, const T* aEnd); + +extern template const nsTDependentSubstring<char> Substring(const char* aStart, + const char* aEnd); + +extern template const nsTDependentSubstring<char16_t> Substring( + const char16_t* aStart, const char16_t* aEnd); + +#if defined(MOZ_USE_CHAR16_WRAPPER) +inline const nsTDependentSubstring<char16_t> Substring(char16ptr_t aData, + size_t aLength); + +const nsTDependentSubstring<char16_t> Substring(char16ptr_t aStart, + char16ptr_t aEnd); +#endif + +template <typename T> +inline const nsTDependentSubstring<T> StringHead(const nsTSubstring<T>& aStr, + size_t aCount) { + return nsTDependentSubstring<T>(aStr, 0, aCount); +} + +template <typename T> +inline const nsTDependentSubstring<T> StringTail(const nsTSubstring<T>& aStr, + size_t aCount) { + return nsTDependentSubstring<T>(aStr, aStr.Length() - aCount, aCount); +} + +#endif diff --git a/xpcom/string/nsTLiteralString.cpp b/xpcom/string/nsTLiteralString.cpp new file mode 100644 index 0000000000..79454f7783 --- /dev/null +++ b/xpcom/string/nsTLiteralString.cpp @@ -0,0 +1,10 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTLiteralString.h" + +template class nsTLiteralString<char>; +template class nsTLiteralString<char16_t>; diff --git a/xpcom/string/nsTLiteralString.h b/xpcom/string/nsTLiteralString.h new file mode 100644 index 0000000000..b233183a73 --- /dev/null +++ b/xpcom/string/nsTLiteralString.h @@ -0,0 +1,113 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTLiteralString_h +#define nsTLiteralString_h + +#include "nsTStringRepr.h" + +/** + * nsTLiteralString_CharT + * + * Stores a null-terminated, immutable sequence of characters. + * + * nsTString-lookalike that restricts its string value to a literal character + * sequence. Can be implicitly cast to const nsTString& (the const is + * essential, since this class's data are not writable). The data are assumed + * to be static (permanent) and therefore, as an optimization, this class + * does not have a destructor. + */ +template <typename T> +class nsTLiteralString : public mozilla::detail::nsTStringRepr<T> { + public: + typedef nsTLiteralString<T> self_type; + +#ifdef __clang__ + // bindgen w/ clang 3.9 at least chokes on a typedef, but using is okay. + using typename mozilla::detail::nsTStringRepr<T>::base_string_type; +#else + // On the other hand msvc chokes on the using statement. It seems others + // don't care either way so we lump them in here. + typedef typename mozilla::detail::nsTStringRepr<T>::base_string_type + base_string_type; +#endif + + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::size_type size_type; + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + + public: + /** + * constructor + */ + + template <size_type N> + explicit constexpr nsTLiteralString(const char_type (&aStr)[N]) + : nsTLiteralString(aStr, N - 1) {} + + nsTLiteralString(const nsTLiteralString&) = default; + + /** + * For compatibility with existing code that requires const ns[C]String*. + * Use sparingly. If possible, rewrite code to use const ns[C]String& + * and the implicit cast will just work. + */ + MOZ_LIFETIME_BOUND const nsTString<T>& AsString() const { + return *reinterpret_cast<const nsTString<T>*>(this); + } + + MOZ_LIFETIME_BOUND operator const nsTString<T>&() const { return AsString(); } + + template <typename N, typename Dummy> + struct raw_type { + typedef N* type; + }; + +#ifdef MOZ_USE_CHAR16_WRAPPER + template <typename Dummy> + struct raw_type<char16_t, Dummy> { + typedef char16ptr_t type; + }; +#endif + + /** + * Prohibit get() on temporaries as in "x"_ns.get(). + * These should be written as just "x", using a string literal directly. + */ + const typename raw_type<T, int>::type get() const&& = delete; + const typename raw_type<T, int>::type get() const& { return this->mData; } + +// At least older gcc versions do not accept these friend declarations, +// complaining about an "invalid argument list" here, but not where the actual +// operators are defined or used. We make the supposed-to-be-private constructor +// public when building with gcc, relying on the default clang builds to fail if +// any non-private use of that constructor would get into the codebase. +#if defined(__clang__) + private: + friend constexpr auto operator"" _ns(const char* aStr, std::size_t aLen); + friend constexpr auto operator"" _ns(const char16_t* aStr, std::size_t aLen); +#else + public: +#endif + // Only for use by operator"" + constexpr nsTLiteralString(const char_type* aStr, size_t aLen) + : base_string_type(const_cast<char_type*>(aStr), aLen, + DataFlags::TERMINATED | DataFlags::LITERAL, + ClassFlags::NULL_TERMINATED) {} + + public: + // NOT TO BE IMPLEMENTED + template <size_type N> + nsTLiteralString(char_type (&aStr)[N]) = delete; + + nsTLiteralString& operator=(const nsTLiteralString&) = delete; +}; + +extern template class nsTLiteralString<char>; +extern template class nsTLiteralString<char16_t>; + +#endif diff --git a/xpcom/string/nsTPromiseFlatString.cpp b/xpcom/string/nsTPromiseFlatString.cpp new file mode 100644 index 0000000000..1243300033 --- /dev/null +++ b/xpcom/string/nsTPromiseFlatString.cpp @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTPromiseFlatString.h" + +template <typename T> +void nsTPromiseFlatString<T>::Init(const substring_type& str) { + if (str.IsTerminated()) { + char_type* newData = + const_cast<char_type*>(static_cast<const char_type*>(str.Data())); + size_type newLength = str.Length(); + DataFlags newDataFlags = + str.GetDataFlags() & (DataFlags::TERMINATED | DataFlags::LITERAL); + // does not promote DataFlags::VOIDED + + this->SetData(newData, newLength, newDataFlags); + } else { + this->Assign(str); + } +} + +template class nsTPromiseFlatString<char>; +template class nsTPromiseFlatString<char16_t>; diff --git a/xpcom/string/nsTPromiseFlatString.h b/xpcom/string/nsTPromiseFlatString.h new file mode 100644 index 0000000000..126362ec9c --- /dev/null +++ b/xpcom/string/nsTPromiseFlatString.h @@ -0,0 +1,136 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTPromiseFlatString_h +#define nsTPromiseFlatString_h + +#include "nsTString.h" + +/** + * NOTE: + * + * Try to avoid flat strings. |PromiseFlat[C]String| will help you as a last + * resort, and this may be necessary when dealing with legacy or OS calls, + * but in general, requiring a null-terminated array of characters kills many + * of the performance wins the string classes offer. Write your own code to + * use |nsA[C]String&|s for parameters. Write your string proccessing + * algorithms to exploit iterators. If you do this, you will benefit from + * being able to chain operations without copying or allocating and your code + * will be significantly more efficient. Remember, a function that takes an + * |const nsA[C]String&| can always be passed a raw character pointer by + * wrapping it (for free) in a |nsDependent[C]String|. But a function that + * takes a character pointer always has the potential to force allocation and + * copying. + * + * + * How to use it: + * + * A |nsPromiseFlat[C]String| doesn't necessarily own the characters it + * promises. You must never use it to promise characters out of a string + * with a shorter lifespan. The typical use will be something like this: + * + * SomeOSFunction( PromiseFlatCString(aCSubstring).get() ); // GOOD + * + * Here's a BAD use: + * + * const char* buffer = PromiseFlatCString(aCSubstring).get(); + * SomeOSFunction(buffer); // BAD!! |buffer| is a dangling pointer + * + * The only way to make one is with the function |PromiseFlat[C]String|, + * which produce a |const| instance. ``What if I need to keep a promise + * around for a little while?'' you might ask. In that case, you can keep a + * reference, like so: + * + * const nsCString& flat = PromiseFlatString(aCSubstring); + * // Temporaries usually die after the full expression containing the + * // expression that created the temporary is evaluated. But when a + * // temporary is assigned to a local reference, the temporary's lifetime + * // is extended to the reference's lifetime (C++11 [class.temporary]p5). + * // + * // This reference holds the anonymous temporary alive. But remember: it + * // must _still_ have a lifetime shorter than that of |aCSubstring|, and + * // |aCSubstring| must not be changed while the PromiseFlatString lives. + * + * SomeOSFunction(flat.get()); + * SomeOtherOSFunction(flat.get()); + * + * + * How does it work? + * + * A |nsPromiseFlat[C]String| is just a wrapper for another string. If you + * apply it to a string that happens to be flat, your promise is just a + * dependent reference to the string's data. If you apply it to a non-flat + * string, then a temporary flat string is created for you, by allocating and + * copying. In the event that you end up assigning the result into a sharing + * string (e.g., |nsTString|), the right thing happens. + */ + +template <typename T> +class nsTPromiseFlatString : public nsTString<T> { + public: + typedef nsTPromiseFlatString<T> self_type; + typedef nsTString<T> base_string_type; + typedef typename base_string_type::substring_type substring_type; + typedef typename base_string_type::string_type string_type; + typedef typename base_string_type::substring_tuple_type substring_tuple_type; + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + + private: + void Init(const substring_type&); + + // NOT TO BE IMPLEMENTED + void operator=(const self_type&) = delete; + + // NOT TO BE IMPLEMENTED + nsTPromiseFlatString() = delete; + + // NOT TO BE IMPLEMENTED + nsTPromiseFlatString(const string_type& aStr) = delete; + + public: + explicit nsTPromiseFlatString(const substring_type& aStr) : string_type() { + Init(aStr); + } + + explicit nsTPromiseFlatString(const substring_tuple_type& aTuple) + : string_type() { + // nothing else to do here except assign the value of the tuple + // into ourselves. + this->Assign(aTuple); + } +}; + +extern template class nsTPromiseFlatString<char>; +extern template class nsTPromiseFlatString<char16_t>; + +// We template this so that the constructor is chosen based on the type of the +// parameter. This allows us to reject attempts to promise a flat flat string. +template <class T> +const nsTPromiseFlatString<T> TPromiseFlatString( + const typename nsTPromiseFlatString<T>::substring_type& aString) { + return nsTPromiseFlatString<T>(aString); +} + +template <class T> +const nsTPromiseFlatString<T> TPromiseFlatString( + const typename nsTPromiseFlatString<T>::substring_tuple_type& aString) { + return nsTPromiseFlatString<T>(aString); +} + +#ifndef PromiseFlatCString +# define PromiseFlatCString TPromiseFlatString<char> +#endif + +#ifndef PromiseFlatString +# define PromiseFlatString TPromiseFlatString<char16_t> +#endif + +#endif diff --git a/xpcom/string/nsTString.cpp b/xpcom/string/nsTString.cpp new file mode 100644 index 0000000000..4e845f62df --- /dev/null +++ b/xpcom/string/nsTString.cpp @@ -0,0 +1,42 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTString.h" +#include "nsString.h" +#include "prdtoa.h" + +/** + * nsTString::SetCharAt + */ + +template <typename T> +bool nsTString<T>::SetCharAt(char16_t aChar, index_type aIndex) { + if (aIndex >= this->mLength) { + return false; + } + + if (!this->EnsureMutable()) { + this->AllocFailed(this->mLength); + } + + this->mData[aIndex] = char_type(aChar); + return true; +} + +template <typename T> +void nsTString<T>::Rebind(const char_type* data, size_type length) { + // If we currently own a buffer, release it. + this->Finalize(); + + this->SetData(const_cast<char_type*>(data), length, DataFlags::TERMINATED); + this->AssertValidDependentString(); +} + +template class nsTString<char>; +template class nsTString<char16_t>; + +template class nsTAutoStringN<char, 64>; +template class nsTAutoStringN<char16_t, 64>; diff --git a/xpcom/string/nsTString.h b/xpcom/string/nsTString.h new file mode 100644 index 0000000000..9793f70e3b --- /dev/null +++ b/xpcom/string/nsTString.h @@ -0,0 +1,447 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsTString_h +#define nsTString_h + +#include "nsTSubstring.h" + +/** + * This is the canonical null-terminated string class. All subclasses + * promise null-terminated storage. Instances of this class allocate + * strings on the heap. + * + * NAMES: + * nsString for wide characters + * nsCString for narrow characters + * + * This class is also known as nsAFlat[C]String, where "flat" is used + * to denote a null-terminated string. + */ +template <typename T> +class nsTString : public nsTSubstring<T> { + public: + typedef nsTString<T> self_type; + + using repr_type = mozilla::detail::nsTStringRepr<T>; + +#ifdef __clang__ + // bindgen w/ clang 3.9 at least chokes on a typedef, but using is okay. + using typename nsTSubstring<T>::substring_type; +#else + // On the other hand msvc chokes on the using statement. It seems others + // don't care either way so we lump them in here. + typedef typename nsTSubstring<T>::substring_type substring_type; +#endif + + typedef typename substring_type::fallible_t fallible_t; + + typedef typename substring_type::char_type char_type; + typedef typename substring_type::char_traits char_traits; + typedef + typename substring_type::incompatible_char_type incompatible_char_type; + + typedef typename substring_type::substring_tuple_type substring_tuple_type; + + typedef typename substring_type::const_iterator const_iterator; + typedef typename substring_type::iterator iterator; + + typedef typename substring_type::comparator_type comparator_type; + + typedef typename substring_type::const_char_iterator const_char_iterator; + + typedef typename substring_type::string_view string_view; + + typedef typename substring_type::index_type index_type; + typedef typename substring_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename substring_type::DataFlags DataFlags; + typedef typename substring_type::ClassFlags ClassFlags; + + public: + /** + * constructors + */ + + nsTString() : substring_type(ClassFlags::NULL_TERMINATED) {} + + explicit nsTString(const char_type* aData, size_type aLength = size_type(-1)) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(aData, aLength); + } + + explicit nsTString(mozilla::Span<const char_type> aData) + : nsTString(aData.Elements(), aData.Length()) {} + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + explicit nsTString(char16ptr_t aStr, size_type aLength = size_type(-1)) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(static_cast<const char16_t*>(aStr), aLength); + } +#endif + + nsTString(const self_type& aStr) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(aStr); + } + + nsTString(self_type&& aStr) : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(std::move(aStr)); + } + + MOZ_IMPLICIT nsTString(const substring_tuple_type& aTuple) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(aTuple); + } + + explicit nsTString(const substring_type& aReadable) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(aReadable); + } + + explicit nsTString(substring_type&& aReadable) + : substring_type(ClassFlags::NULL_TERMINATED) { + this->Assign(std::move(aReadable)); + } + + // |operator=| does not inherit, so we must define our own + self_type& operator=(char_type aChar) { + this->Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) { + this->Assign(aData); + return *this; + } + self_type& operator=(const self_type& aStr) { + this->Assign(aStr); + return *this; + } + self_type& operator=(self_type&& aStr) { + this->Assign(std::move(aStr)); + return *this; + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + self_type& operator=(const char16ptr_t aStr) { + this->Assign(static_cast<const char16_t*>(aStr)); + return *this; + } +#endif + self_type& operator=(const substring_type& aStr) { + this->Assign(aStr); + return *this; + } + self_type& operator=(substring_type&& aStr) { + this->Assign(std::move(aStr)); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) { + this->Assign(aTuple); + return *this; + } + + /** + * returns the null-terminated string + */ + + template <typename U, typename Dummy> + struct raw_type { + typedef const U* type; + }; +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Dummy> + struct raw_type<char16_t, Dummy> { + typedef char16ptr_t type; + }; +#endif + + MOZ_NO_DANGLING_ON_TEMPORARIES typename raw_type<T, int>::type get() const { + return this->mData; + } + + /** + * returns character at specified index. + * + * NOTE: unlike nsTSubstring::CharAt, this function allows you to index + * the null terminator character. + */ + + char_type CharAt(index_type aIndex) const { + MOZ_ASSERT(aIndex <= this->Length(), "index exceeds allowable range"); + return this->mData[aIndex]; + } + + char_type operator[](index_type aIndex) const { return CharAt(aIndex); } + + /** + * Set a char inside this string at given index + * + * @param aChar is the char you want to write into this string + * @param anIndex is the ofs where you want to write the given char + * @return TRUE if successful + */ + bool SetCharAt(char16_t aChar, index_type aIndex); + + /** + * Allow this string to be bound to a character buffer + * until the string is rebound or mutated; the caller + * must ensure that the buffer outlives the string. + */ + void Rebind(const char_type* aData, size_type aLength); + + /** + * verify restrictions for dependent strings + */ + void AssertValidDependentString() { + MOZ_ASSERT(this->mData, "nsTDependentString must wrap a non-NULL buffer"); + MOZ_ASSERT(this->mData[substring_type::mLength] == 0, + "nsTDependentString must wrap only null-terminated strings. " + "You are probably looking for nsTDependentSubstring."); + } + + protected: + // allow subclasses to initialize fields directly + nsTString(char_type* aData, size_type aLength, DataFlags aDataFlags, + ClassFlags aClassFlags) + : substring_type(aData, aLength, aDataFlags, + aClassFlags | ClassFlags::NULL_TERMINATED) {} + + friend const nsTString<char>& VoidCString(); + friend const nsTString<char16_t>& VoidString(); + + // Used by Null[C]String. + explicit nsTString(DataFlags aDataFlags) + : substring_type(char_traits::sEmptyBuffer, 0, + aDataFlags | DataFlags::TERMINATED, + ClassFlags::NULL_TERMINATED) {} +}; + +extern template class nsTString<char>; +extern template class nsTString<char16_t>; + +/** + * nsTAutoStringN + * + * Subclass of nsTString that adds support for stack-based string + * allocation. It is normally not a good idea to use this class on the + * heap, because it will allocate space which may be wasted if the string + * it contains is significantly smaller or any larger than 64 characters. + * + * NAMES: + * nsAutoStringN / nsTAutoString for wide characters + * nsAutoCStringN / nsTAutoCString for narrow characters + */ +template <typename T, size_t N> +class MOZ_NON_MEMMOVABLE nsTAutoStringN : public nsTString<T> { + public: + typedef nsTAutoStringN<T, N> self_type; + + typedef nsTString<T> base_string_type; + typedef typename base_string_type::string_type string_type; + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::char_traits char_traits; + typedef typename base_string_type::substring_type substring_type; + typedef typename base_string_type::size_type size_type; + typedef typename base_string_type::substring_tuple_type substring_tuple_type; + + // These are only for internal use within the string classes: + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + typedef typename base_string_type::LengthStorage LengthStorage; + + public: + /** + * constructors + */ + + nsTAutoStringN() + : string_type(mStorage, 0, DataFlags::TERMINATED | DataFlags::INLINE, + ClassFlags::INLINE), + mInlineCapacity(N - 1) { + // null-terminate + mStorage[0] = char_type(0); + } + + explicit nsTAutoStringN(char_type aChar) : self_type() { + this->Assign(aChar); + } + + explicit nsTAutoStringN(const char_type* aData, + size_type aLength = size_type(-1)) + : self_type() { + this->Assign(aData, aLength); + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + explicit nsTAutoStringN(char16ptr_t aData, size_type aLength = size_type(-1)) + : self_type(static_cast<const char16_t*>(aData), aLength) {} +#endif + + nsTAutoStringN(const self_type& aStr) : self_type() { this->Assign(aStr); } + + nsTAutoStringN(self_type&& aStr) : self_type() { + this->Assign(std::move(aStr)); + } + + explicit nsTAutoStringN(const substring_type& aStr) : self_type() { + this->Assign(aStr); + } + + explicit nsTAutoStringN(substring_type&& aStr) : self_type() { + this->Assign(std::move(aStr)); + } + + MOZ_IMPLICIT nsTAutoStringN(const substring_tuple_type& aTuple) + : self_type() { + this->Assign(aTuple); + } + + // |operator=| does not inherit, so we must define our own + self_type& operator=(char_type aChar) { + this->Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) { + this->Assign(aData); + return *this; + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + self_type& operator=(char16ptr_t aStr) { + this->Assign(aStr); + return *this; + } +#endif + self_type& operator=(const self_type& aStr) { + this->Assign(aStr); + return *this; + } + self_type& operator=(self_type&& aStr) { + this->Assign(std::move(aStr)); + return *this; + } + self_type& operator=(const substring_type& aStr) { + this->Assign(aStr); + return *this; + } + self_type& operator=(substring_type&& aStr) { + this->Assign(std::move(aStr)); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) { + this->Assign(aTuple); + return *this; + } + + static const size_t kStorageSize = N; + + protected: + friend class nsTSubstring<T>; + + const LengthStorage mInlineCapacity; + + private: + char_type mStorage[N]; +}; + +// Externs for the most common nsTAutoStringN variations. +extern template class nsTAutoStringN<char, 64>; +extern template class nsTAutoStringN<char16_t, 64>; + +// +// nsAutoString stores pointers into itself which are invalidated when an +// nsTArray is resized, so nsTArray must not be instantiated with nsAutoString +// elements! +// +template <class E> +class nsTArrayElementTraits; +template <typename T> +class nsTArrayElementTraits<nsTAutoString<T>> { + public: + template <class A> + struct Dont_Instantiate_nsTArray_of; + template <class A> + struct Instead_Use_nsTArray_of; + + static Dont_Instantiate_nsTArray_of<nsTAutoString<T>>* Construct( + Instead_Use_nsTArray_of<nsTString<T>>* aE) { + return 0; + } + template <class A> + static Dont_Instantiate_nsTArray_of<nsTAutoString<T>>* Construct( + Instead_Use_nsTArray_of<nsTString<T>>* aE, const A& aArg) { + return 0; + } + template <class... Args> + static Dont_Instantiate_nsTArray_of<nsTAutoString<T>>* Construct( + Instead_Use_nsTArray_of<nsTString<T>>* aE, Args&&... aArgs) { + return 0; + } + static Dont_Instantiate_nsTArray_of<nsTAutoString<T>>* Destruct( + Instead_Use_nsTArray_of<nsTString<T>>* aE) { + return 0; + } +}; + +/** + * getter_Copies support for adopting raw string out params that are + * heap-allocated, e.g.: + * + * char* gStr; + * void GetBlah(char** aStr) + * { + * *aStr = strdup(gStr); + * } + * + * // This works, but is clumsy. + * void Inelegant() + * { + * char* buf; + * GetBlah(&buf); + * nsCString str; + * str.Adopt(buf); + * // ... + * } + * + * // This is nicer. + * void Elegant() + * { + * nsCString str; + * GetBlah(getter_Copies(str)); + * // ... + * } + */ +template <typename T> +class MOZ_STACK_CLASS nsTGetterCopies { + public: + typedef T char_type; + + explicit nsTGetterCopies(nsTSubstring<T>& aStr) + : mString(aStr), mData(nullptr) {} + + ~nsTGetterCopies() { + mString.Adopt(mData); // OK if mData is null + } + + operator char_type**() { return &mData; } + + private: + nsTSubstring<T>& mString; + char_type* mData; +}; + +// See the comment above nsTGetterCopies_CharT for how to use this. +template <typename T> +inline nsTGetterCopies<T> getter_Copies(nsTSubstring<T>& aString) { + return nsTGetterCopies<T>(aString); +} + +#endif diff --git a/xpcom/string/nsTStringComparator.cpp b/xpcom/string/nsTStringComparator.cpp new file mode 100644 index 0000000000..801a3623b9 --- /dev/null +++ b/xpcom/string/nsTStringComparator.cpp @@ -0,0 +1,91 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" +#include "plstr.h" + +template <typename T> +int NS_FASTCALL Compare(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs, + const nsTStringComparator<T> comp) { + typedef typename nsTSubstring<T>::size_type size_type; + typedef typename nsTSubstring<T>::const_iterator const_iterator; + + if (&aLhs == &aRhs) { + return 0; + } + + const_iterator leftIter, rightIter; + aLhs.BeginReading(leftIter); + aRhs.BeginReading(rightIter); + + size_type lLength = aLhs.Length(); + size_type rLength = aRhs.Length(); + size_type lengthToCompare = XPCOM_MIN(lLength, rLength); + + int result; + if ((result = comp(leftIter.get(), rightIter.get(), lengthToCompare, + lengthToCompare)) == 0) { + if (lLength < rLength) { + result = -1; + } else if (rLength < lLength) { + result = 1; + } else { + result = 0; + } + } + + return result; +} + +template int NS_FASTCALL Compare<char>( + mozilla::detail::nsTStringRepr<char> const&, + mozilla::detail::nsTStringRepr<char> const&, nsTStringComparator<char>); + +template int NS_FASTCALL +Compare<char16_t>(mozilla::detail::nsTStringRepr<char16_t> const&, + mozilla::detail::nsTStringRepr<char16_t> const&, + nsTStringComparator<char16_t>); + +template <typename T> +int nsTDefaultStringComparator(const T* aLhs, const T* aRhs, size_t aLLength, + size_t aRLength) { + return aLLength == aRLength ? nsCharTraits<T>::compare(aLhs, aRhs, aLLength) + : (aLLength > aRLength) ? 1 + : -1; +} + +template int nsTDefaultStringComparator(const char*, const char*, size_t, + size_t); +template int nsTDefaultStringComparator(const char16_t*, const char16_t*, + size_t, size_t); + +int nsCaseInsensitiveCStringComparator(const char* aLhs, const char* aRhs, + size_t aLhsLength, size_t aRhsLength) { +#if defined(LIBFUZZER) && defined(LINUX) + // Make sure libFuzzer can see this string compare by calling the POSIX + // native function which is intercepted. We also call this if the lengths + // don't match so libFuzzer can at least see a partial string, but we throw + // away the result afterwards again. + int32_t result = + int32_t(strncasecmp(aLhs, aRhs, std::min(aLhsLength, aRhsLength))); + + if (aLhsLength != aRhsLength) { + return (aLhsLength > aRhsLength) ? 1 : -1; + } +#else + if (aLhsLength != aRhsLength) { + return (aLhsLength > aRhsLength) ? 1 : -1; + } + int32_t result = int32_t(PL_strncasecmp(aLhs, aRhs, aLhsLength)); +#endif + // Egads. PL_strncasecmp is returning *very* negative numbers. + // Some folks expect -1,0,1, so let's temper its enthusiasm. + if (result < 0) { + result = -1; + } + return result; +} diff --git a/xpcom/string/nsTStringHasher.h b/xpcom/string/nsTStringHasher.h new file mode 100644 index 0000000000..7b3f42ba58 --- /dev/null +++ b/xpcom/string/nsTStringHasher.h @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTStringHasher_h___ +#define nsTStringHasher_h___ + +#include "mozilla/HashTable.h" // mozilla::{DefaultHasher, HashNumber, HashString} + +namespace mozilla { + +template <typename T> +struct DefaultHasher<nsTString<T>> { + using Key = nsTString<T>; + using Lookup = nsTString<T>; + + static mozilla::HashNumber hash(const Lookup& aLookup) { + return mozilla::HashString(aLookup.get()); + } + + static bool match(const Key& aKey, const Lookup& aLookup) { + return aKey.Equals(aLookup); + } +}; + +} // namespace mozilla + +#endif // !defined(nsTStringHasher_h___) diff --git a/xpcom/string/nsTStringRepr.cpp b/xpcom/string/nsTStringRepr.cpp new file mode 100644 index 0000000000..405696fd2b --- /dev/null +++ b/xpcom/string/nsTStringRepr.cpp @@ -0,0 +1,273 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTStringRepr.h" + +#include "double-conversion/string-to-double.h" +#include "mozilla/FloatingPoint.h" +#include "nsError.h" +#include "nsString.h" + +namespace mozilla::detail { + +template <typename T> +typename nsTStringRepr<T>::char_type nsTStringRepr<T>::First() const { + MOZ_RELEASE_ASSERT(this->mLength > 0, "|First()| called on an empty string"); + return this->mData[0]; +} + +template <typename T> +typename nsTStringRepr<T>::char_type nsTStringRepr<T>::Last() const { + MOZ_RELEASE_ASSERT(this->mLength > 0, "|Last()| called on an empty string"); + return this->mData[this->mLength - 1]; +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const self_type& aStr) const { + return this->mLength == aStr.mLength && + char_traits::compare(this->mData, aStr.mData, this->mLength) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const self_type& aStr, + comparator_type aComp) const { + return this->mLength == aStr.mLength && + aComp(this->mData, aStr.mData, this->mLength, aStr.mLength) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const substring_tuple_type& aTuple) const { + return Equals(substring_type(aTuple)); +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const substring_tuple_type& aTuple, + comparator_type aComp) const { + return Equals(substring_type(aTuple), aComp); +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const char_type* aData) const { + // unfortunately, some callers pass null :-( + if (!aData) { + MOZ_ASSERT_UNREACHABLE("null data pointer"); + return this->mLength == 0; + } + + // XXX avoid length calculation? + size_type length = char_traits::length(aData); + return this->mLength == length && + char_traits::compare(this->mData, aData, this->mLength) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::Equals(const char_type* aData, + comparator_type aComp) const { + // unfortunately, some callers pass null :-( + if (!aData) { + MOZ_ASSERT_UNREACHABLE("null data pointer"); + return this->mLength == 0; + } + + // XXX avoid length calculation? + size_type length = char_traits::length(aData); + return this->mLength == length && + aComp(this->mData, aData, this->mLength, length) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::EqualsASCII(const char* aData, size_type aLen) const { + return this->mLength == aLen && + char_traits::compareASCII(this->mData, aData, aLen) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::EqualsASCII(const char* aData) const { + return char_traits::compareASCIINullTerminated(this->mData, this->mLength, + aData) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::EqualsLatin1(const char* aData, + const size_type aLength) const { + return (this->mLength == aLength) && + char_traits::equalsLatin1(this->mData, aData, aLength); +} + +template <typename T> +bool nsTStringRepr<T>::LowerCaseEqualsASCII(const char* aData, + size_type aLen) const { + return this->mLength == aLen && + char_traits::compareLowerCaseToASCII(this->mData, aData, aLen) == 0; +} + +template <typename T> +bool nsTStringRepr<T>::LowerCaseEqualsASCII(const char* aData) const { + return char_traits::compareLowerCaseToASCIINullTerminated( + this->mData, this->mLength, aData) == 0; +} + +template <typename T> +int32_t nsTStringRepr<T>::Find(const string_view& aString, + index_type aOffset) const { + auto idx = View().find(aString, aOffset); + return idx == string_view::npos ? kNotFound : idx; +} + +template <typename T> +int32_t nsTStringRepr<T>::LowerCaseFindASCII(const std::string_view& aString, + index_type aOffset) const { + if (aOffset > Length()) { + return kNotFound; + } + auto begin = BeginReading(); + auto end = EndReading(); + auto it = + std::search(begin + aOffset, end, aString.begin(), aString.end(), + [](char_type l, char r) { + MOZ_ASSERT(!(r & ~0x7F), "Unexpected non-ASCII character"); + MOZ_ASSERT(char_traits::ASCIIToLower(r) == char_type(r), + "Search string must be ASCII lowercase"); + return char_traits::ASCIIToLower(l) == char_type(r); + }); + return it == end ? kNotFound : std::distance(begin, it); +} + +template <typename T> +int32_t nsTStringRepr<T>::RFind(const string_view& aString) const { + auto idx = View().rfind(aString); + return idx == string_view::npos ? kNotFound : idx; +} + +template <typename T> +typename nsTStringRepr<T>::size_type nsTStringRepr<T>::CountChar( + char_type aChar) const { + return std::count(BeginReading(), EndReading(), aChar); +} + +template <typename T> +int32_t nsTStringRepr<T>::FindChar(char_type aChar, index_type aOffset) const { + auto idx = View().find(aChar, aOffset); + return idx == string_view::npos ? kNotFound : idx; +} + +template <typename T> +int32_t nsTStringRepr<T>::RFindChar(char_type aChar, int32_t aOffset) const { + auto idx = View().rfind(aChar, aOffset != -1 ? aOffset : string_view::npos); + return idx == string_view::npos ? kNotFound : idx; +} + +template <typename T> +int32_t nsTStringRepr<T>::FindCharInSet(const string_view& aSet, + index_type aOffset) const { + auto idx = View().find_first_of(aSet, aOffset); + return idx == string_view::npos ? kNotFound : idx; +} + +template <typename T> +int32_t nsTStringRepr<T>::RFindCharInSet(const string_view& aSet, + int32_t aOffset) const { + auto idx = + View().find_last_of(aSet, aOffset != -1 ? aOffset : string_view::npos); + return idx == string_view::npos ? kNotFound : idx; +} + +template <typename T> +bool nsTStringRepr<T>::EqualsIgnoreCase(const std::string_view& aString) const { + return std::equal(BeginReading(), EndReading(), aString.begin(), + aString.end(), [](char_type l, char r) { + return char_traits::ASCIIToLower(l) == + char_traits::ASCIIToLower(char_type(r)); + }); +} + +// We can't use the method `StringToDoubleConverter::ToDouble` due to linking +// issues on Windows as it's in mozglue. Instead, implement the selection logic +// using an overload set. +// +// StringToFloat is used instead of StringToDouble for floats due to differences +// in rounding behaviour. +static void StringToFP( + const double_conversion::StringToDoubleConverter& aConverter, + const char* aData, size_t aLength, int* aProcessed, float* aResult) { + *aResult = aConverter.StringToFloat(aData, aLength, aProcessed); +} + +static void StringToFP( + const double_conversion::StringToDoubleConverter& aConverter, + const char* aData, size_t aLength, int* aProcessed, double* aResult) { + *aResult = aConverter.StringToDouble(aData, aLength, aProcessed); +} + +static void StringToFP( + const double_conversion::StringToDoubleConverter& aConverter, + const char16_t* aData, size_t aLength, int* aProcessed, float* aResult) { + *aResult = aConverter.StringToFloat(reinterpret_cast<const uc16*>(aData), + aLength, aProcessed); +} + +static void StringToFP( + const double_conversion::StringToDoubleConverter& aConverter, + const char16_t* aData, size_t aLength, int* aProcessed, double* aResult) { + *aResult = aConverter.StringToDouble(reinterpret_cast<const uc16*>(aData), + aLength, aProcessed); +} + +template <typename FloatT, typename CharT> +static FloatT ParseFloatingPoint(const nsTStringRepr<CharT>& aString, + bool aAllowTrailingChars, + nsresult* aErrorCode) { + // Performs conversion to double following the "rules for parsing + // floating-point number values" from the HTML standard. + // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-floating-point-number-values + // + // This behaviour allows for leading spaces, and will not generate infinity or + // NaN values except in error conditions. + int flags = double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES; + if (aAllowTrailingChars) { + flags |= double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK; + } + double_conversion::StringToDoubleConverter converter( + flags, mozilla::UnspecifiedNaN<double>(), + mozilla::UnspecifiedNaN<double>(), nullptr, nullptr); + + FloatT result; + int processed; + StringToFP(converter, aString.Data(), aString.Length(), &processed, &result); + + *aErrorCode = std::isfinite(result) ? NS_OK : NS_ERROR_ILLEGAL_VALUE; + return result; +} + +template <typename T> +double nsTStringRepr<T>::ToDouble(nsresult* aErrorCode) const { + return ParseFloatingPoint<double, T>(*this, /* aAllowTrailingChars */ false, + aErrorCode); +} + +template <typename T> +double nsTStringRepr<T>::ToDoubleAllowTrailingChars( + nsresult* aErrorCode) const { + return ParseFloatingPoint<double, T>(*this, /* aAllowTrailingChars */ true, + aErrorCode); +} + +template <typename T> +float nsTStringRepr<T>::ToFloat(nsresult* aErrorCode) const { + return ParseFloatingPoint<float, T>(*this, /* aAllowTrailingChars */ false, + aErrorCode); +} + +template <typename T> +float nsTStringRepr<T>::ToFloatAllowTrailingChars(nsresult* aErrorCode) const { + return ParseFloatingPoint<float, T>(*this, /* aAllowTrailingChars */ true, + aErrorCode); +} + +} // namespace mozilla::detail + +template class mozilla::detail::nsTStringRepr<char>; +template class mozilla::detail::nsTStringRepr<char16_t>; diff --git a/xpcom/string/nsTStringRepr.h b/xpcom/string/nsTStringRepr.h new file mode 100644 index 0000000000..7e7fa53384 --- /dev/null +++ b/xpcom/string/nsTStringRepr.h @@ -0,0 +1,546 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsTStringRepr_h +#define nsTStringRepr_h + +#include <limits> +#include <string_view> +#include <type_traits> // std::enable_if + +#include "mozilla/Char16.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/fallible.h" +#include "nsStringBuffer.h" +#include "nsStringFlags.h" +#include "nsStringFwd.h" +#include "nsStringIterator.h" +#include "nsCharTraits.h" + +template <typename T> +class nsTSubstringTuple; + +namespace mozilla { + +// This is mainly intended to be used in the context of nsTStrings where +// we want to enable a specific function only for a given character class. In +// order for this technique to work the member function needs to be templated +// on something other than `T`. We keep this in the `mozilla` namespace rather +// than `nsTStringRepr` as it's intentionally not dependent on `T`. +// +// The 'T' at the end of `Char[16]OnlyT` is refering to the `::type` portion +// which will only be defined if the character class is correct. This is similar +// to `std::enable_if_t` which is available in C++14, but not C++11. +// +// `CharType` is generally going to be a shadowed type of `T`. +// +// Example usage of a function that will only be defined if `T` == `char`: +// +// template <typename T> +// class nsTSubstring : public nsTStringRepr<T> { +// template <typename Q = T, typename EnableForChar = typename CharOnlyT<Q>> +// int Foo() { return 42; } +// }; +// +// Please note that we had to use a separate type `Q` for this to work. You +// will get a semi-decent compiler error if you use `T` directly. + +template <typename CharType> +using CharOnlyT = + typename std::enable_if<std::is_same<char, CharType>::value>::type; + +template <typename CharType> +using Char16OnlyT = + typename std::enable_if<std::is_same<char16_t, CharType>::value>::type; + +namespace detail { + +// nsTStringLengthStorage is a helper class which holds the string's length and +// provides getters and setters for converting to and from `size_t`. This is +// done to allow the length to be stored in a `uint32_t` using assertions. +template <typename T> +class nsTStringLengthStorage { + public: + // The maximum byte capacity for a `nsTString` must fit within an `int32_t`, + // with enough room for a trailing null, as consumers often cast `Length()` + // and `Capacity()` to smaller types like `int32_t`. + static constexpr size_t kMax = + size_t{std::numeric_limits<int32_t>::max()} / sizeof(T) - 1; + static_assert( + (kMax + 1) * sizeof(T) <= std::numeric_limits<int32_t>::max(), + "nsTString's maximum length, including the trailing null, must fit " + "within `int32_t`, as callers will cast to `int32_t` occasionally"); + static_assert(((CheckedInt<uint32_t>{kMax} + 1) * sizeof(T) + + sizeof(nsStringBuffer)) + .isValid(), + "Math required to allocate a nsStringBuffer for a " + "maximum-capacity string must not overflow uint32_t"); + + // Implicit conversion and assignment from `size_t` which assert that the + // value is in-range. + MOZ_IMPLICIT constexpr nsTStringLengthStorage(size_t aLength) + : mLength(static_cast<uint32_t>(aLength)) { + MOZ_RELEASE_ASSERT(aLength <= kMax, "string is too large"); + } + constexpr nsTStringLengthStorage& operator=(size_t aLength) { + MOZ_RELEASE_ASSERT(aLength <= kMax, "string is too large"); + mLength = static_cast<uint32_t>(aLength); + return *this; + } + MOZ_IMPLICIT constexpr operator size_t() const { return mLength; } + + private: + uint32_t mLength = 0; +}; + +// nsTStringRepr defines a string's memory layout and some accessor methods. +// This class exists so that nsTLiteralString can avoid inheriting +// nsTSubstring's destructor. All methods on this class must be const because +// literal strings are not writable. +// +// This class is an implementation detail and should not be instantiated +// directly, nor used in any way outside of the string code itself. It is +// buried in a namespace to discourage its use in function parameters. +// If you need to take a parameter, use [const] ns[C]Substring&. +// If you need to instantiate a string, use ns[C]String or descendents. +// +// NAMES: +// nsStringRepr for wide characters +// nsCStringRepr for narrow characters +template <typename T> +class nsTStringRepr { + public: + typedef mozilla::fallible_t fallible_t; + + typedef T char_type; + + typedef nsCharTraits<char_type> char_traits; + typedef typename char_traits::incompatible_char_type incompatible_char_type; + + typedef nsTStringRepr<T> self_type; + typedef self_type base_string_type; + + typedef nsTSubstring<T> substring_type; + typedef nsTSubstringTuple<T> substring_tuple_type; + + typedef nsReadingIterator<char_type> const_iterator; + typedef char_type* iterator; + + typedef nsTStringComparator<char_type> comparator_type; + + typedef const char_type* const_char_iterator; + + typedef std::basic_string_view<char_type> string_view; + + typedef size_t index_type; + typedef size_t size_type; + + // These are only for internal use within the string classes: + typedef StringDataFlags DataFlags; + typedef StringClassFlags ClassFlags; + typedef nsTStringLengthStorage<T> LengthStorage; + + // Reading iterators. + constexpr const_char_iterator BeginReading() const { return mData; } + constexpr const_char_iterator EndReading() const { return mData + mLength; } + + // Deprecated reading iterators. + const_iterator& BeginReading(const_iterator& aIter) const { + aIter.mStart = mData; + aIter.mEnd = mData + mLength; + aIter.mPosition = aIter.mStart; + return aIter; + } + + const_iterator& EndReading(const_iterator& aIter) const { + aIter.mStart = mData; + aIter.mEnd = mData + mLength; + aIter.mPosition = aIter.mEnd; + return aIter; + } + + const_char_iterator& BeginReading(const_char_iterator& aIter) const { + return aIter = mData; + } + + const_char_iterator& EndReading(const_char_iterator& aIter) const { + return aIter = mData + mLength; + } + + // Accessors. + template <typename U, typename Dummy> + struct raw_type { + typedef const U* type; + }; +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Dummy> + struct raw_type<char16_t, Dummy> { + typedef char16ptr_t type; + }; +#endif + + // Returns pointer to string data (not necessarily null-terminated) + constexpr typename raw_type<T, int>::type Data() const { return mData; } + + constexpr size_type Length() const { return static_cast<size_type>(mLength); } + + constexpr string_view View() const { return string_view(Data(), Length()); } + + constexpr operator string_view() const { return View(); } + + constexpr DataFlags GetDataFlags() const { return mDataFlags; } + + constexpr bool IsEmpty() const { return mLength == 0; } + + constexpr bool IsLiteral() const { + return !!(mDataFlags & DataFlags::LITERAL); + } + + constexpr bool IsVoid() const { return !!(mDataFlags & DataFlags::VOIDED); } + + constexpr bool IsTerminated() const { + return !!(mDataFlags & DataFlags::TERMINATED); + } + + constexpr char_type CharAt(index_type aIndex) const { + NS_ASSERTION(aIndex < Length(), "index exceeds allowable range"); + return mData[aIndex]; + } + + constexpr char_type operator[](index_type aIndex) const { + return CharAt(aIndex); + } + + char_type First() const; + + char_type Last() const; + + // Equality. + bool NS_FASTCALL Equals(const self_type&) const; + bool NS_FASTCALL Equals(const self_type&, comparator_type) const; + + bool NS_FASTCALL Equals(const substring_tuple_type& aTuple) const; + bool NS_FASTCALL Equals(const substring_tuple_type& aTuple, + comparator_type) const; + + bool NS_FASTCALL Equals(const char_type* aData) const; + bool NS_FASTCALL Equals(const char_type* aData, comparator_type) const; + + /** + * Compare this string and another ASCII-case-insensitively. + * + * This method is similar to `LowerCaseEqualsASCII` however both strings are + * lowercased, meaning that `aString` need not be all lowercase. + * + * @param aString is the string to check + * @return boolean + */ + bool EqualsIgnoreCase(const std::string_view& aString) const; + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = Char16OnlyT<Q>> + bool NS_FASTCALL Equals(char16ptr_t aData) const { + return Equals(static_cast<const char16_t*>(aData)); + } + template <typename Q = T, typename EnableIfChar16 = Char16OnlyT<Q>> + bool NS_FASTCALL Equals(char16ptr_t aData, comparator_type aComp) const { + return Equals(static_cast<const char16_t*>(aData), aComp); + } +#endif + + // An efficient comparison with ASCII that can be used even + // for wide strings. Call this version when you know the + // length of 'data'. + bool NS_FASTCALL EqualsASCII(const char* aData, size_type aLen) const; + // An efficient comparison with ASCII that can be used even + // for wide strings. Call this version when 'data' is + // null-terminated. + bool NS_FASTCALL EqualsASCII(const char* aData) const; + + // An efficient comparison with Latin1 characters that can be used even for + // wide strings. + bool EqualsLatin1(const char* aData, size_type aLength) const; + + // EqualsLiteral must ONLY be called with an actual literal string, or + // a char array *constant* declared without an explicit size and with an + // initializer that is a string literal or is otherwise null-terminated. + // Use EqualsASCII for other char array variables. + // (Although this method may happen to produce expected results for other + // char arrays that have bound one greater than the sequence of interest, + // such use is discouraged for reasons of readability and maintainability.) + // The template trick to acquire the array bound at compile time without + // using a macro is due to Corey Kosak, with much thanks. + template <int N> + inline bool EqualsLiteral(const char (&aStr)[N]) const { + return EqualsASCII(aStr, N - 1); + } + + // EqualsLiteral must ONLY be called with an actual literal string, or + // a char array *constant* declared without an explicit size and with an + // initializer that is a string literal or is otherwise null-terminated. + // Use EqualsASCII for other char array variables. + // (Although this method may happen to produce expected results for other + // char arrays that have bound one greater than the sequence of interest, + // such use is discouraged for reasons of readability and maintainability.) + // The template trick to acquire the array bound at compile time without + // using a macro is due to Corey Kosak, with much thanks. + template <size_t N, typename = std::enable_if_t<!std::is_same_v< + const char (&)[N], const char_type (&)[N]>>> + inline bool EqualsLiteral(const char_type (&aStr)[N]) const { + return *this == nsTLiteralString<char_type>(aStr); + } + + // The LowerCaseEquals methods compare the ASCII-lowercase version of + // this string (lowercasing only ASCII uppercase characters) to some + // ASCII/Literal string. The ASCII string is *not* lowercased for + // you. If you compare to an ASCII or literal string that contains an + // uppercase character, it is guaranteed to return false. We will + // throw assertions too. + bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData, + size_type aLen) const; + bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData) const; + + // LowerCaseEqualsLiteral must ONLY be called with an actual literal string, + // or a char array *constant* declared without an explicit size and with an + // initializer that is a string literal or is otherwise null-terminated. + // Use LowerCaseEqualsASCII for other char array variables. + // (Although this method may happen to produce expected results for other + // char arrays that have bound one greater than the sequence of interest, + // such use is discouraged for reasons of readability and maintainability.) + template <int N> + bool LowerCaseEqualsLiteral(const char (&aStr)[N]) const { + return LowerCaseEqualsASCII(aStr, N - 1); + } + + // Returns true if this string overlaps with the given string fragment. + bool IsDependentOn(const char_type* aStart, const char_type* aEnd) const { + // If it _isn't_ the case that one fragment starts after the other ends, + // or ends before the other starts, then, they conflict: + // + // !(f2.begin >= f1.aEnd || f2.aEnd <= f1.begin) + // + // Simplified, that gives us (To avoid relying on Undefined Behavior + // from comparing pointers from different allocations (which in + // principle gives the optimizer the permission to assume elsewhere + // that the pointers are from the same allocation), the comparisons + // are done on integers, which merely relies on implementation-defined + // behavior of converting pointers to integers. std::less and + // std::greater implementations don't actually provide the guarantees + // that they should.): + return (reinterpret_cast<uintptr_t>(aStart) < + reinterpret_cast<uintptr_t>(mData + mLength) && + reinterpret_cast<uintptr_t>(aEnd) > + reinterpret_cast<uintptr_t>(mData)); + } + + /** + * Search for the given substring within this string. + * + * @param aString is substring to be sought in this + * @param aOffset tells us where in this string to start searching + * @return offset in string, or kNotFound + */ + int32_t Find(const string_view& aString, index_type aOffset = 0) const; + + // Previously there was an overload of `Find()` which took a bool second + // argument. Avoid issues by explicitly preventing that overload. + // TODO: Remove this at some point. + template <typename I, + typename = std::enable_if_t<!std::is_same_v<I, index_type> && + std::is_convertible_v<I, index_type>>> + int32_t Find(const string_view& aString, I aOffset) const { + static_assert(!std::is_same_v<I, bool>, "offset must not be `bool`"); + return Find(aString, static_cast<index_type>(aOffset)); + } + + /** + * Search for the given ASCII substring within this string, ignoring case. + * + * @param aString is substring to be sought in this + * @param aOffset tells us where in this string to start searching + * @return offset in string, or kNotFound + */ + int32_t LowerCaseFindASCII(const std::string_view& aString, + index_type aOffset = 0) const; + + /** + * Scan the string backwards, looking for the given substring. + * + * @param aString is substring to be sought in this + * @return offset in string, or kNotFound + */ + int32_t RFind(const string_view& aString) const; + + size_type CountChar(char_type) const; + + bool Contains(char_type aChar) const { return FindChar(aChar) != kNotFound; } + + /** + * Search for the first instance of a given char within this string + * + * @param aChar is the character to search for + * @param aOffset tells us where in this string to start searching + * @return offset in string, or kNotFound + */ + int32_t FindChar(char_type aChar, index_type aOffset = 0) const; + + /** + * Search for the last instance of a given char within this string + * + * @param aChar is the character to search for + * @param aOffset tells us where in this string to start searching + * @return offset in string, or kNotFound + */ + int32_t RFindChar(char_type aChar, int32_t aOffset = -1) const; + + /** + * This method searches this string for the first character found in + * the given string. + * + * @param aSet contains set of chars to be found + * @param aOffset tells us where in this string to start searching + * (counting from left) + * @return offset in string, or kNotFound + */ + + int32_t FindCharInSet(const string_view& aSet, index_type aOffset = 0) const; + + /** + * This method searches this string for the last character found in + * the given string. + * + * @param aSet contains set of chars to be found + * @param aOffset tells us where in this string to start searching + * (counting from left) + * @return offset in string, or kNotFound + */ + + int32_t RFindCharInSet(const string_view& aSet, int32_t aOffset = -1) const; + + /** + * Perform locale-independent string to double-precision float conversion. + * + * Leading spaces in the string will be ignored. The returned value will be + * finite unless aErrorCode is set to a failed status. + * + * @param aErrorCode will contain error if one occurs + * @return double-precision float rep of string value + */ + double ToDouble(nsresult* aErrorCode) const; + + /** + * Perform locale-independent string to single-precision float conversion. + * + * Leading spaces in the string will be ignored. The returned value will be + * finite unless aErrorCode is set to a failed status. + * + * @param aErrorCode will contain error if one occurs + * @return single-precision float rep of string value + */ + float ToFloat(nsresult* aErrorCode) const; + + /** + * Similar to above ToDouble and ToFloat but allows trailing characters that + * are not converted. + */ + double ToDoubleAllowTrailingChars(nsresult* aErrorCode) const; + float ToFloatAllowTrailingChars(nsresult* aErrorCode) const; + + protected: + nsTStringRepr() = delete; // Never instantiate directly + + constexpr nsTStringRepr(char_type* aData, size_type aLength, + DataFlags aDataFlags, ClassFlags aClassFlags) + : mData(aData), + mLength(aLength), + mDataFlags(aDataFlags), + mClassFlags(aClassFlags) {} + + static constexpr size_type kMaxCapacity = LengthStorage::kMax; + + /** + * Checks if the given capacity is valid for this string type. + */ + [[nodiscard]] static constexpr bool CheckCapacity(size_type aCapacity) { + return aCapacity <= kMaxCapacity; + } + + char_type* mData; + LengthStorage mLength; + DataFlags mDataFlags; + ClassFlags const mClassFlags; +}; + +extern template class nsTStringRepr<char>; +extern template class nsTStringRepr<char16_t>; + +} // namespace detail +} // namespace mozilla + +template <typename T> +int NS_FASTCALL Compare(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs, + nsTStringComparator<T> = nsTDefaultStringComparator<T>); + +extern template int NS_FASTCALL Compare<char>( + const mozilla::detail::nsTStringRepr<char>&, + const mozilla::detail::nsTStringRepr<char>&, nsTStringComparator<char>); + +extern template int NS_FASTCALL +Compare<char16_t>(const mozilla::detail::nsTStringRepr<char16_t>&, + const mozilla::detail::nsTStringRepr<char16_t>&, + nsTStringComparator<char16_t>); + +template <typename T> +inline constexpr bool operator!=( + const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return !aLhs.Equals(aRhs); +} + +template <typename T> +inline constexpr bool operator!=(const mozilla::detail::nsTStringRepr<T>& aLhs, + const T* aRhs) { + return !aLhs.Equals(aRhs); +} + +template <typename T> +inline bool operator<(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return Compare(aLhs, aRhs) < 0; +} + +template <typename T> +inline bool operator<=(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return Compare(aLhs, aRhs) <= 0; +} + +template <typename T> +inline bool operator==(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return aLhs.Equals(aRhs); +} + +template <typename T> +inline bool operator==(const mozilla::detail::nsTStringRepr<T>& aLhs, + const T* aRhs) { + return aLhs.Equals(aRhs); +} + +template <typename T> +inline bool operator>=(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return Compare(aLhs, aRhs) >= 0; +} + +template <typename T> +inline bool operator>(const mozilla::detail::nsTStringRepr<T>& aLhs, + const mozilla::detail::nsTStringRepr<T>& aRhs) { + return Compare(aLhs, aRhs) > 0; +} + +#endif diff --git a/xpcom/string/nsTSubstring.cpp b/xpcom/string/nsTSubstring.cpp new file mode 100644 index 0000000000..b81b845fee --- /dev/null +++ b/xpcom/string/nsTSubstring.cpp @@ -0,0 +1,1706 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "double-conversion/double-conversion.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/Printf.h" +#include "mozilla/ResultExtensions.h" + +#include "nsASCIIMask.h" +#include "nsCharTraits.h" +#include "nsISupports.h" +#include "nsString.h" +#include "nsTArray.h" + +#ifdef DEBUG +# include "nsStringStats.h" +#else +# define STRING_STAT_INCREMENT(_s) +#endif + +// It's not worthwhile to reallocate the buffer and memcpy the +// contents over when the size difference isn't large. With +// power-of-two allocation buckets and 64 as the typical inline +// capacity, considering that above 1000 there performance aspects +// of realloc and memcpy seem to be absorbed, relative to the old +// code, by the performance benefits of the new code being exact, +// we need to choose which transitions of 256 to 128, 512 to 256 +// and 1024 to 512 to allow. As a guess, let's pick the middle +// one as the the largest potential transition that we forgo. So +// we'll shrink from 1024 bucket to 512 bucket but not from 512 +// bucket to 256 bucket. We'll decide by comparing the difference +// of capacities. As bucket differences, the differences are 256 +// and 512. Since the capacities have various overheads, we +// can't compare with 256 or 512 exactly but it's easier to +// compare to some number that's between the two, so it's +// far away from either to ignore the overheads. +const uint32_t kNsStringBufferShrinkingThreshold = 384; + +using double_conversion::DoubleToStringConverter; + +// --------------------------------------------------------------------------- + +static const char16_t gNullChar = 0; + +char* const nsCharTraits<char>::sEmptyBuffer = + (char*)const_cast<char16_t*>(&gNullChar); +char16_t* const nsCharTraits<char16_t>::sEmptyBuffer = + const_cast<char16_t*>(&gNullChar); + +// --------------------------------------------------------------------------- + +static void ReleaseData(void* aData, nsAString::DataFlags aFlags) { + if (aFlags & nsAString::DataFlags::REFCOUNTED) { + nsStringBuffer::FromData(aData)->Release(); + } else if (aFlags & nsAString::DataFlags::OWNED) { + free(aData); + STRING_STAT_INCREMENT(AdoptFree); + // Treat this as destruction of a "StringAdopt" object for leak + // tracking purposes. + MOZ_LOG_DTOR(aData, "StringAdopt", 1); + } + // otherwise, nothing to do. +} + +// --------------------------------------------------------------------------- + +#ifdef XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE +template <typename T> +nsTSubstring<T>::nsTSubstring(char_type* aData, size_type aLength, + DataFlags aDataFlags, ClassFlags aClassFlags) + : ::mozilla::detail::nsTStringRepr<T>(aData, aLength, aDataFlags, + aClassFlags) { + AssertValid(); + + if (aDataFlags & DataFlags::OWNED) { + STRING_STAT_INCREMENT(Adopt); + MOZ_LOG_CTOR(this->mData, "StringAdopt", 1); + } +} +#endif /* XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE */ + +/** + * helper function for down-casting a nsTSubstring to an nsTAutoString. + */ +template <typename T> +inline const nsTAutoString<T>* AsAutoString(const nsTSubstring<T>* aStr) { + return static_cast<const nsTAutoString<T>*>(aStr); +} + +template <typename T> +mozilla::Result<mozilla::BulkWriteHandle<T>, nsresult> +nsTSubstring<T>::BulkWrite(size_type aCapacity, size_type aPrefixToPreserve, + bool aAllowShrinking) { + auto r = StartBulkWriteImpl(aCapacity, aPrefixToPreserve, aAllowShrinking); + if (MOZ_UNLIKELY(r.isErr())) { + return r.propagateErr(); + } + return mozilla::BulkWriteHandle<T>(this, r.unwrap()); +} + +template <typename T> +auto nsTSubstring<T>::StartBulkWriteImpl(size_type aCapacity, + size_type aPrefixToPreserve, + bool aAllowShrinking, + size_type aSuffixLength, + size_type aOldSuffixStart, + size_type aNewSuffixStart) + -> mozilla::Result<size_type, nsresult> { + // Note! Capacity does not include room for the terminating null char. + + MOZ_ASSERT(aPrefixToPreserve <= aCapacity, + "Requested preservation of an overlong prefix."); + MOZ_ASSERT(aNewSuffixStart + aSuffixLength <= aCapacity, + "Requesed move of suffix to out-of-bounds location."); + // Can't assert aOldSuffixStart, because mLength may not be valid anymore, + // since this method allows itself to be called more than once. + + // If zero capacity is requested, set the string to the special empty + // string. + if (MOZ_UNLIKELY(!aCapacity)) { + ReleaseData(this->mData, this->mDataFlags); + SetToEmptyBuffer(); + return 0; + } + + // Note! Capacity() returns 0 when the string is immutable. + const size_type curCapacity = Capacity(); + + bool shrinking = false; + + // We've established that aCapacity > 0. + // |curCapacity == 0| means that the buffer is immutable or 0-sized, so we + // need to allocate a new buffer. We cannot use the existing buffer even + // though it might be large enough. + + if (aCapacity <= curCapacity) { + if (aAllowShrinking) { + shrinking = true; + } else { + char_traits::move(this->mData + aNewSuffixStart, + this->mData + aOldSuffixStart, aSuffixLength); + if (aSuffixLength) { + char_traits::uninitialize(this->mData + aPrefixToPreserve, + XPCOM_MIN(aNewSuffixStart - aPrefixToPreserve, + kNsStringBufferMaxPoison)); + char_traits::uninitialize( + this->mData + aNewSuffixStart + aSuffixLength, + XPCOM_MIN(curCapacity + 1 - aNewSuffixStart - aSuffixLength, + kNsStringBufferMaxPoison)); + } else { + char_traits::uninitialize(this->mData + aPrefixToPreserve, + XPCOM_MIN(curCapacity + 1 - aPrefixToPreserve, + kNsStringBufferMaxPoison)); + } + return curCapacity; + } + } + + char_type* oldData = this->mData; + DataFlags oldFlags = this->mDataFlags; + + char_type* newData; + DataFlags newDataFlags; + size_type newCapacity; + + // If this is an nsTAutoStringN, it's possible that we can use the inline + // buffer. + if ((this->mClassFlags & ClassFlags::INLINE) && + (aCapacity <= AsAutoString(this)->mInlineCapacity)) { + newCapacity = AsAutoString(this)->mInlineCapacity; + newData = (char_type*)AsAutoString(this)->mStorage; + newDataFlags = DataFlags::TERMINATED | DataFlags::INLINE; + } else { + // If |aCapacity > kMaxCapacity|, then our doubling algorithm may not be + // able to allocate it. Just bail out in cases like that. We don't want + // to be allocating 2GB+ strings anyway. + static_assert((sizeof(nsStringBuffer) & 0x1) == 0, + "bad size for nsStringBuffer"); + if (MOZ_UNLIKELY(!this->CheckCapacity(aCapacity))) { + return mozilla::Err(NS_ERROR_OUT_OF_MEMORY); + } + + // We increase our capacity so that the allocated buffer grows + // exponentially, which gives us amortized O(1) appending. Below the + // threshold, we use powers-of-two. Above the threshold, we grow by at + // least 1.125, rounding up to the nearest MiB. + const size_type slowGrowthThreshold = 8 * 1024 * 1024; + + // nsStringBuffer allocates sizeof(nsStringBuffer) + passed size, and + // storageSize below wants extra 1 * sizeof(char_type). + const size_type neededExtraSpace = + sizeof(nsStringBuffer) / sizeof(char_type) + 1; + + size_type temp; + if (aCapacity >= slowGrowthThreshold) { + size_type minNewCapacity = + curCapacity + (curCapacity >> 3); // multiply by 1.125 + temp = XPCOM_MAX(aCapacity, minNewCapacity) + neededExtraSpace; + + // Round up to the next multiple of MiB, but ensure the expected + // capacity doesn't include the extra space required by nsStringBuffer + // and null-termination. + const size_t MiB = 1 << 20; + temp = (MiB * ((temp + MiB - 1) / MiB)) - neededExtraSpace; + } else { + // Round up to the next power of two. + temp = + mozilla::RoundUpPow2(aCapacity + neededExtraSpace) - neededExtraSpace; + } + + newCapacity = XPCOM_MIN(temp, base_string_type::kMaxCapacity); + MOZ_ASSERT(newCapacity >= aCapacity, + "should have hit the early return at the top"); + // Avoid shrinking if the new buffer size is close to the old. Note that + // unsigned underflow is defined behavior. + if ((curCapacity - newCapacity) <= kNsStringBufferShrinkingThreshold && + (this->mDataFlags & DataFlags::REFCOUNTED)) { + MOZ_ASSERT(aAllowShrinking, "How come we didn't return earlier?"); + // We're already close enough to the right size. + newData = oldData; + newCapacity = curCapacity; + } else { + size_type storageSize = (newCapacity + 1) * sizeof(char_type); + // Since we allocate only by powers of 2 we always fit into a full + // mozjemalloc bucket, it's not useful to use realloc, which may spend + // time uselessly copying too much. + nsStringBuffer* newHdr = nsStringBuffer::Alloc(storageSize).take(); + if (newHdr) { + newData = (char_type*)newHdr->Data(); + } else if (shrinking) { + // We're still in a consistent state. + // + // Since shrinking is just a memory footprint optimization, we + // don't propagate OOM if we tried to shrink in order to avoid + // OOM crashes from infallible callers. If we're lucky, soon enough + // a fallible caller reaches OOM and is able to deal or we end up + // disposing of this string before reaching OOM again. + newData = oldData; + newCapacity = curCapacity; + } else { + return mozilla::Err(NS_ERROR_OUT_OF_MEMORY); + } + } + newDataFlags = DataFlags::TERMINATED | DataFlags::REFCOUNTED; + } + + this->mData = newData; + this->mDataFlags = newDataFlags; + + if (oldData == newData) { + char_traits::move(newData + aNewSuffixStart, oldData + aOldSuffixStart, + aSuffixLength); + if (aSuffixLength) { + char_traits::uninitialize(this->mData + aPrefixToPreserve, + XPCOM_MIN(aNewSuffixStart - aPrefixToPreserve, + kNsStringBufferMaxPoison)); + char_traits::uninitialize( + this->mData + aNewSuffixStart + aSuffixLength, + XPCOM_MIN(newCapacity + 1 - aNewSuffixStart - aSuffixLength, + kNsStringBufferMaxPoison)); + } else { + char_traits::uninitialize(this->mData + aPrefixToPreserve, + XPCOM_MIN(newCapacity + 1 - aPrefixToPreserve, + kNsStringBufferMaxPoison)); + } + } else { + char_traits::copy(newData, oldData, aPrefixToPreserve); + char_traits::copy(newData + aNewSuffixStart, oldData + aOldSuffixStart, + aSuffixLength); + ReleaseData(oldData, oldFlags); + } + + return newCapacity; +} + +template <typename T> +void nsTSubstring<T>::FinishBulkWriteImpl(size_type aLength) { + if (aLength) { + FinishBulkWriteImplImpl(aLength); + } else { + ReleaseData(this->mData, this->mDataFlags); + SetToEmptyBuffer(); + } + AssertValid(); +} + +template <typename T> +void nsTSubstring<T>::Finalize() { + ReleaseData(this->mData, this->mDataFlags); + // this->mData, this->mLength, and this->mDataFlags are purposefully left + // dangling +} + +template <typename T> +bool nsTSubstring<T>::ReplacePrep(index_type aCutStart, size_type aCutLength, + size_type aNewLength) { + aCutLength = XPCOM_MIN(aCutLength, this->mLength - aCutStart); + + mozilla::CheckedInt<size_type> newTotalLen = this->Length(); + newTotalLen += aNewLength; + newTotalLen -= aCutLength; + if (!newTotalLen.isValid()) { + return false; + } + + if (aCutStart == this->mLength && Capacity() > newTotalLen.value()) { + this->mDataFlags &= ~DataFlags::VOIDED; + this->mData[newTotalLen.value()] = char_type(0); + this->mLength = newTotalLen.value(); + return true; + } + + return ReplacePrepInternal(aCutStart, aCutLength, aNewLength, + newTotalLen.value()); +} + +template <typename T> +bool nsTSubstring<T>::ReplacePrepInternal(index_type aCutStart, + size_type aCutLen, size_type aFragLen, + size_type aNewLen) { + size_type newSuffixStart = aCutStart + aFragLen; + size_type oldSuffixStart = aCutStart + aCutLen; + size_type suffixLength = this->mLength - oldSuffixStart; + + mozilla::Result<size_type, nsresult> r = StartBulkWriteImpl( + aNewLen, aCutStart, false, suffixLength, oldSuffixStart, newSuffixStart); + if (r.isErr()) { + return false; + } + FinishBulkWriteImpl(aNewLen); + return true; +} + +template <typename T> +typename nsTSubstring<T>::size_type nsTSubstring<T>::Capacity() const { + // return 0 to indicate an immutable or 0-sized buffer + + size_type capacity; + if (this->mDataFlags & DataFlags::REFCOUNTED) { + // if the string is readonly, then we pretend that it has no capacity. + nsStringBuffer* hdr = nsStringBuffer::FromData(this->mData); + if (hdr->IsReadonly()) { + capacity = 0; + } else { + capacity = (size_t(hdr->StorageSize()) / sizeof(char_type)) - 1; + } + } else if (this->mDataFlags & DataFlags::INLINE) { + MOZ_ASSERT(this->mClassFlags & ClassFlags::INLINE); + capacity = AsAutoString(this)->mInlineCapacity; + } else if (this->mDataFlags & DataFlags::OWNED) { + // we don't store the capacity of an adopted buffer because that would + // require an additional member field. the best we can do is base the + // capacity on our length. remains to be seen if this is the right + // trade-off. + capacity = this->mLength; + } else { + capacity = 0; + } + + return capacity; +} + +template <typename T> +bool nsTSubstring<T>::EnsureMutable(size_type aNewLen) { + if (aNewLen == size_type(-1) || aNewLen == this->mLength) { + if (this->mDataFlags & (DataFlags::INLINE | DataFlags::OWNED)) { + return true; + } + if ((this->mDataFlags & DataFlags::REFCOUNTED) && + !nsStringBuffer::FromData(this->mData)->IsReadonly()) { + return true; + } + + aNewLen = this->mLength; + } + return SetLength(aNewLen, mozilla::fallible); +} + +// --------------------------------------------------------------------------- + +// This version of Assign is optimized for single-character assignment. +template <typename T> +void nsTSubstring<T>::Assign(char_type aChar) { + if (MOZ_UNLIKELY(!Assign(aChar, mozilla::fallible))) { + AllocFailed(1); + } +} + +template <typename T> +bool nsTSubstring<T>::Assign(char_type aChar, const fallible_t&) { + auto r = StartBulkWriteImpl(1, 0, true); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + *this->mData = aChar; + FinishBulkWriteImpl(1); + return true; +} + +template <typename T> +void nsTSubstring<T>::Assign(const char_type* aData, size_type aLength) { + if (MOZ_UNLIKELY(!Assign(aData, aLength, mozilla::fallible))) { + AllocFailed(aLength == size_type(-1) ? char_traits::length(aData) + : aLength); + } +} + +template <typename T> +bool nsTSubstring<T>::Assign(const char_type* aData, + const fallible_t& aFallible) { + return Assign(aData, size_type(-1), aFallible); +} + +template <typename T> +bool nsTSubstring<T>::Assign(const char_type* aData, size_type aLength, + const fallible_t& aFallible) { + if (!aData || aLength == 0) { + Truncate(); + return true; + } + + if (MOZ_UNLIKELY(aLength == size_type(-1))) { + aLength = char_traits::length(aData); + } + + if (MOZ_UNLIKELY(this->IsDependentOn(aData, aData + aLength))) { + return Assign(string_type(aData, aLength), aFallible); + } + + auto r = StartBulkWriteImpl(aLength, 0, true); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + char_traits::copy(this->mData, aData, aLength); + FinishBulkWriteImpl(aLength); + return true; +} + +template <typename T> +void nsTSubstring<T>::AssignASCII(const char* aData, size_type aLength) { + if (MOZ_UNLIKELY(!AssignASCII(aData, aLength, mozilla::fallible))) { + AllocFailed(aLength); + } +} + +template <typename T> +bool nsTSubstring<T>::AssignASCII(const char* aData, size_type aLength, + const fallible_t& aFallible) { + MOZ_ASSERT(aLength != size_type(-1)); + + // A Unicode string can't depend on an ASCII string buffer, + // so this dependence check only applies to CStrings. + if constexpr (std::is_same_v<T, char>) { + if (this->IsDependentOn(aData, aData + aLength)) { + return Assign(string_type(aData, aLength), aFallible); + } + } + + auto r = StartBulkWriteImpl(aLength, 0, true); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + char_traits::copyASCII(this->mData, aData, aLength); + FinishBulkWriteImpl(aLength); + return true; +} + +template <typename T> +void nsTSubstring<T>::AssignLiteral(const char_type* aData, size_type aLength) { + ReleaseData(this->mData, this->mDataFlags); + SetData(const_cast<char_type*>(aData), aLength, + DataFlags::TERMINATED | DataFlags::LITERAL); +} + +template <typename T> +void nsTSubstring<T>::Assign(const self_type& aStr) { + if (!Assign(aStr, mozilla::fallible)) { + AllocFailed(aStr.Length()); + } +} + +template <typename T> +bool nsTSubstring<T>::Assign(const self_type& aStr, + const fallible_t& aFallible) { + // |aStr| could be sharable. We need to check its flags to know how to + // deal with it. + + if (&aStr == this) { + return true; + } + + if (!aStr.mLength) { + Truncate(); + this->mDataFlags |= aStr.mDataFlags & DataFlags::VOIDED; + return true; + } + + if (aStr.mDataFlags & DataFlags::REFCOUNTED) { + // nice! we can avoid a string copy :-) + + // |aStr| should be null-terminated + NS_ASSERTION(aStr.mDataFlags & DataFlags::TERMINATED, + "shared, but not terminated"); + + ReleaseData(this->mData, this->mDataFlags); + + SetData(aStr.mData, aStr.mLength, + DataFlags::TERMINATED | DataFlags::REFCOUNTED); + + // get an owning reference to the this->mData + nsStringBuffer::FromData(this->mData)->AddRef(); + return true; + } else if (aStr.mDataFlags & DataFlags::LITERAL) { + MOZ_ASSERT(aStr.mDataFlags & DataFlags::TERMINATED, "Unterminated literal"); + + AssignLiteral(aStr.mData, aStr.mLength); + return true; + } + + // else, treat this like an ordinary assignment. + return Assign(aStr.Data(), aStr.Length(), aFallible); +} + +template <typename T> +void nsTSubstring<T>::Assign(self_type&& aStr) { + if (!Assign(std::move(aStr), mozilla::fallible)) { + AllocFailed(aStr.Length()); + } +} + +template <typename T> +void nsTSubstring<T>::AssignOwned(self_type&& aStr) { + MOZ_ASSERT(aStr.mDataFlags & (DataFlags::REFCOUNTED | DataFlags::OWNED), + "neither shared nor owned"); + + // If they have a REFCOUNTED or OWNED buffer, we can avoid a copy - so steal + // their buffer and reset them to the empty string. + + // |aStr| should be null-terminated + MOZ_ASSERT(aStr.mDataFlags & DataFlags::TERMINATED, + "shared or owned, but not terminated"); + + ReleaseData(this->mData, this->mDataFlags); + + SetData(aStr.mData, aStr.mLength, aStr.mDataFlags); + aStr.SetToEmptyBuffer(); +} + +template <typename T> +bool nsTSubstring<T>::Assign(self_type&& aStr, const fallible_t& aFallible) { + // We're moving |aStr| in this method, so we need to try to steal the data, + // and in the fallback perform a copy-assignment followed by a truncation of + // the original string. + + if (&aStr == this) { + NS_WARNING("Move assigning a string to itself?"); + return true; + } + + if (aStr.mDataFlags & (DataFlags::REFCOUNTED | DataFlags::OWNED)) { + AssignOwned(std::move(aStr)); + return true; + } + + // Otherwise treat this as a normal assignment, and truncate the moved string. + // We don't truncate the source string if the allocation failed. + if (!Assign(aStr, aFallible)) { + return false; + } + aStr.Truncate(); + return true; +} + +template <typename T> +void nsTSubstring<T>::Assign(const substring_tuple_type& aTuple) { + if (!Assign(aTuple, mozilla::fallible)) { + AllocFailed(aTuple.Length()); + } +} + +template <typename T> +bool nsTSubstring<T>::AssignNonDependent(const substring_tuple_type& aTuple, + size_type aTupleLength, + const mozilla::fallible_t& aFallible) { + NS_ASSERTION(aTuple.Length() == aTupleLength, "wrong length passed"); + + auto r = StartBulkWriteImpl(aTupleLength); + if (r.isErr()) { + return false; + } + + aTuple.WriteTo(this->mData, aTupleLength); + + FinishBulkWriteImpl(aTupleLength); + return true; +} + +template <typename T> +bool nsTSubstring<T>::Assign(const substring_tuple_type& aTuple, + const fallible_t& aFallible) { + const auto [isDependentOnThis, tupleLength] = + aTuple.IsDependentOnWithLength(this->mData, this->mData + this->mLength); + if (isDependentOnThis) { + string_type temp; + self_type& tempSubstring = temp; + if (!tempSubstring.AssignNonDependent(aTuple, tupleLength, aFallible)) { + return false; + } + AssignOwned(std::move(temp)); + return true; + } + + return AssignNonDependent(aTuple, tupleLength, aFallible); +} + +template <typename T> +void nsTSubstring<T>::Adopt(char_type* aData, size_type aLength) { + if (aData) { + ReleaseData(this->mData, this->mDataFlags); + + if (aLength == size_type(-1)) { + aLength = char_traits::length(aData); + } + + SetData(aData, aLength, DataFlags::TERMINATED | DataFlags::OWNED); + + STRING_STAT_INCREMENT(Adopt); + // Treat this as construction of a "StringAdopt" object for leak + // tracking purposes. + MOZ_LOG_CTOR(this->mData, "StringAdopt", 1); + } else { + SetIsVoid(true); + } +} + +// This version of Replace is optimized for single-character replacement. +template <typename T> +void nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + char_type aChar) { + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + if (ReplacePrep(aCutStart, aCutLength, 1)) { + this->mData[aCutStart] = aChar; + } +} + +template <typename T> +bool nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + char_type aChar, const fallible_t&) { + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + if (!ReplacePrep(aCutStart, aCutLength, 1)) { + return false; + } + + this->mData[aCutStart] = aChar; + + return true; +} + +template <typename T> +void nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength) { + if (!Replace(aCutStart, aCutLength, aData, aLength, mozilla::fallible)) { + AllocFailed(this->Length() - aCutLength + 1); + } +} + +template <typename T> +bool nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength, + const fallible_t& aFallible) { + // unfortunately, some callers pass null :-( + if (!aData) { + aLength = 0; + } else { + if (aLength == size_type(-1)) { + aLength = char_traits::length(aData); + } + + if (this->IsDependentOn(aData, aData + aLength)) { + nsTAutoString<T> temp(aData, aLength); + return Replace(aCutStart, aCutLength, temp, aFallible); + } + } + + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + bool ok = ReplacePrep(aCutStart, aCutLength, aLength); + if (!ok) { + return false; + } + + if (aLength > 0) { + char_traits::copy(this->mData + aCutStart, aData, aLength); + } + + return true; +} + +template <typename T> +void nsTSubstring<T>::Replace(index_type aCutStart, size_type aCutLength, + const substring_tuple_type& aTuple) { + const auto [isDependentOnThis, tupleLength] = + aTuple.IsDependentOnWithLength(this->mData, this->mData + this->mLength); + + if (isDependentOnThis) { + nsTAutoString<T> temp; + if (!temp.AssignNonDependent(aTuple, tupleLength, mozilla::fallible)) { + AllocFailed(tupleLength); + } + Replace(aCutStart, aCutLength, temp); + return; + } + + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + if (ReplacePrep(aCutStart, aCutLength, tupleLength) && tupleLength > 0) { + aTuple.WriteTo(this->mData + aCutStart, tupleLength); + } +} + +template <typename T> +void nsTSubstring<T>::ReplaceLiteral(index_type aCutStart, size_type aCutLength, + const char_type* aData, + size_type aLength) { + aCutStart = XPCOM_MIN(aCutStart, this->Length()); + + if (!aCutStart && aCutLength == this->Length() && + !(this->mDataFlags & DataFlags::REFCOUNTED)) { + // Check for REFCOUNTED above to avoid undoing the effect of + // SetCapacity(). + AssignLiteral(aData, aLength); + } else if (ReplacePrep(aCutStart, aCutLength, aLength) && aLength > 0) { + char_traits::copy(this->mData + aCutStart, aData, aLength); + } +} + +template <typename T> +void nsTSubstring<T>::Append(char_type aChar) { + if (MOZ_UNLIKELY(!Append(aChar, mozilla::fallible))) { + AllocFailed(this->mLength + 1); + } +} + +template <typename T> +bool nsTSubstring<T>::Append(char_type aChar, const fallible_t& aFallible) { + size_type oldLen = this->mLength; + size_type newLen = oldLen + 1; // Can't overflow + auto r = StartBulkWriteImpl(newLen, oldLen, false); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + this->mData[oldLen] = aChar; + FinishBulkWriteImpl(newLen); + return true; +} + +template <typename T> +void nsTSubstring<T>::Append(const char_type* aData, size_type aLength) { + if (MOZ_UNLIKELY(!Append(aData, aLength, mozilla::fallible))) { + AllocFailed(this->mLength + (aLength == size_type(-1) + ? char_traits::length(aData) + : aLength)); + } +} + +template <typename T> +bool nsTSubstring<T>::Append(const char_type* aData, size_type aLength, + const fallible_t& aFallible) { + if (MOZ_UNLIKELY(aLength == size_type(-1))) { + aLength = char_traits::length(aData); + } + + if (MOZ_UNLIKELY(!aLength)) { + // Avoid undoing the effect of SetCapacity() if both + // mLength and aLength are zero. + return true; + } + + if (MOZ_UNLIKELY(this->IsDependentOn(aData, aData + aLength))) { + return Append(string_type(aData, aLength), mozilla::fallible); + } + size_type oldLen = this->mLength; + mozilla::CheckedInt<size_type> newLen(oldLen); + newLen += aLength; + if (MOZ_UNLIKELY(!newLen.isValid())) { + return false; + } + auto r = StartBulkWriteImpl(newLen.value(), oldLen, false); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + char_traits::copy(this->mData + oldLen, aData, aLength); + FinishBulkWriteImpl(newLen.value()); + return true; +} + +template <typename T> +void nsTSubstring<T>::AppendASCII(const char* aData, size_type aLength) { + if (MOZ_UNLIKELY(!AppendASCII(aData, aLength, mozilla::fallible))) { + AllocFailed(this->mLength + + (aLength == size_type(-1) ? strlen(aData) : aLength)); + } +} + +template <typename T> +bool nsTSubstring<T>::AppendASCII(const char* aData, + const fallible_t& aFallible) { + return AppendASCII(aData, size_type(-1), aFallible); +} + +template <typename T> +bool nsTSubstring<T>::AppendASCII(const char* aData, size_type aLength, + const fallible_t& aFallible) { + if (MOZ_UNLIKELY(aLength == size_type(-1))) { + aLength = strlen(aData); + } + + if (MOZ_UNLIKELY(!aLength)) { + // Avoid undoing the effect of SetCapacity() if both + // mLength and aLength are zero. + return true; + } + + if constexpr (std::is_same_v<T, char>) { + // 16-bit string can't depend on an 8-bit buffer + if (MOZ_UNLIKELY(this->IsDependentOn(aData, aData + aLength))) { + return Append(string_type(aData, aLength), mozilla::fallible); + } + } + + size_type oldLen = this->mLength; + mozilla::CheckedInt<size_type> newLen(oldLen); + newLen += aLength; + if (MOZ_UNLIKELY(!newLen.isValid())) { + return false; + } + auto r = StartBulkWriteImpl(newLen.value(), oldLen, false); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + char_traits::copyASCII(this->mData + oldLen, aData, aLength); + FinishBulkWriteImpl(newLen.value()); + return true; +} + +template <typename T> +void nsTSubstring<T>::Append(const self_type& aStr) { + if (MOZ_UNLIKELY(!Append(aStr, mozilla::fallible))) { + AllocFailed(this->mLength + aStr.Length()); + } +} + +template <typename T> +bool nsTSubstring<T>::Append(const self_type& aStr, + const fallible_t& aFallible) { + // Check refcounted to avoid undoing the effects of SetCapacity(). + if (MOZ_UNLIKELY(!this->mLength && + !(this->mDataFlags & DataFlags::REFCOUNTED))) { + return Assign(aStr, mozilla::fallible); + } + return Append(aStr.BeginReading(), aStr.Length(), mozilla::fallible); +} + +template <typename T> +void nsTSubstring<T>::Append(const substring_tuple_type& aTuple) { + if (MOZ_UNLIKELY(!Append(aTuple, mozilla::fallible))) { + AllocFailed(this->mLength + aTuple.Length()); + } +} + +template <typename T> +bool nsTSubstring<T>::Append(const substring_tuple_type& aTuple, + const fallible_t& aFallible) { + const auto [isDependentOnThis, tupleLength] = + aTuple.IsDependentOnWithLength(this->mData, this->mData + this->mLength); + + if (MOZ_UNLIKELY(!tupleLength)) { + // Avoid undoing the effect of SetCapacity() if both + // mLength and tupleLength are zero. + return true; + } + + if (MOZ_UNLIKELY(isDependentOnThis)) { + return Append(string_type(aTuple), aFallible); + } + + size_type oldLen = this->mLength; + mozilla::CheckedInt<size_type> newLen(oldLen); + newLen += tupleLength; + if (MOZ_UNLIKELY(!newLen.isValid())) { + return false; + } + auto r = StartBulkWriteImpl(newLen.value(), oldLen, false); + if (MOZ_UNLIKELY(r.isErr())) { + return false; + } + aTuple.WriteTo(this->mData + oldLen, tupleLength); + FinishBulkWriteImpl(newLen.value()); + return true; +} + +template <typename T> +void nsTSubstring<T>::SetCapacity(size_type aCapacity) { + if (!SetCapacity(aCapacity, mozilla::fallible)) { + AllocFailed(aCapacity); + } +} + +template <typename T> +bool nsTSubstring<T>::SetCapacity(size_type aCapacity, const fallible_t&) { + size_type length = this->mLength; + // This method can no longer be used to shorten the + // logical length. + size_type capacity = XPCOM_MAX(aCapacity, length); + + auto r = StartBulkWriteImpl(capacity, length, true); + if (r.isErr()) { + return false; + } + + if (MOZ_UNLIKELY(!capacity)) { + // Zero capacity was requested on a zero-length + // string. In this special case, we are pointing + // to the special empty buffer, which is already + // zero-terminated and not writable, so we must + // not attempt to zero-terminate it. + AssertValid(); + return true; + } + + // FinishBulkWriteImpl with argument zero releases + // the heap-allocated buffer. However, SetCapacity() + // is a special case that allows mLength to be zero + // while a heap-allocated buffer exists. + // By calling FinishBulkWriteImplImpl, we skip the + // zero case handling that's inappropriate in the + // SetCapacity() case. + FinishBulkWriteImplImpl(length); + return true; +} + +template <typename T> +void nsTSubstring<T>::SetLength(size_type aLength) { + if (!SetLength(aLength, mozilla::fallible)) { + AllocFailed(aLength); + } +} + +template <typename T> +bool nsTSubstring<T>::SetLength(size_type aLength, + const fallible_t& aFallible) { + size_type preserve = XPCOM_MIN(aLength, this->Length()); + auto r = StartBulkWriteImpl(aLength, preserve, true); + if (r.isErr()) { + return false; + } + + FinishBulkWriteImpl(aLength); + + return true; +} + +template <typename T> +void nsTSubstring<T>::Truncate() { + ReleaseData(this->mData, this->mDataFlags); + SetToEmptyBuffer(); + AssertValid(); +} + +template <typename T> +void nsTSubstring<T>::SetIsVoid(bool aVal) { + if (aVal) { + Truncate(); + this->mDataFlags |= DataFlags::VOIDED; + } else { + this->mDataFlags &= ~DataFlags::VOIDED; + } +} + +template <typename T> +void nsTSubstring<T>::StripChar(char_type aChar) { + if (this->mLength == 0) { + return; + } + + if (!EnsureMutable()) { // XXX do this lazily? + AllocFailed(this->mLength); + } + + // XXX(darin): this code should defer writing until necessary. + + char_type* to = this->mData; + char_type* from = this->mData; + char_type* end = this->mData + this->mLength; + + while (from < end) { + char_type theChar = *from++; + if (aChar != theChar) { + *to++ = theChar; + } + } + *to = char_type(0); // add the null + this->mLength = to - this->mData; +} + +template <typename T> +void nsTSubstring<T>::StripChars(const char_type* aChars) { + if (this->mLength == 0) { + return; + } + + if (!EnsureMutable()) { // XXX do this lazily? + AllocFailed(this->mLength); + } + + // XXX(darin): this code should defer writing until necessary. + + char_type* to = this->mData; + char_type* from = this->mData; + char_type* end = this->mData + this->mLength; + + while (from < end) { + char_type theChar = *from++; + const char_type* test = aChars; + + for (; *test && *test != theChar; ++test) + ; + + if (!*test) { + // Not stripped, copy this char. + *to++ = theChar; + } + } + *to = char_type(0); // add the null + this->mLength = to - this->mData; +} + +template <typename T> +void nsTSubstring<T>::StripTaggedASCII(const ASCIIMaskArray& aToStrip) { + if (this->mLength == 0) { + return; + } + + size_t untaggedPrefixLength = 0; + for (; untaggedPrefixLength < this->mLength; ++untaggedPrefixLength) { + uint32_t theChar = (uint32_t)this->mData[untaggedPrefixLength]; + if (mozilla::ASCIIMask::IsMasked(aToStrip, theChar)) { + break; + } + } + + if (untaggedPrefixLength == this->mLength) { + return; + } + + if (!EnsureMutable()) { + AllocFailed(this->mLength); + } + + char_type* to = this->mData + untaggedPrefixLength; + char_type* from = to; + char_type* end = this->mData + this->mLength; + + while (from < end) { + uint32_t theChar = (uint32_t)*from++; + // Replacing this with a call to ASCIIMask::IsMasked + // regresses performance somewhat, so leaving it inlined. + if (!mozilla::ASCIIMask::IsMasked(aToStrip, theChar)) { + // Not stripped, copy this char. + *to++ = (char_type)theChar; + } + } + *to = char_type(0); // add the null + this->mLength = to - this->mData; +} + +template <typename T> +void nsTSubstring<T>::StripCRLF() { + // Expanding this call to copy the code from StripTaggedASCII + // instead of just calling it does somewhat help with performance + // but it is not worth it given the duplicated code. + StripTaggedASCII(mozilla::ASCIIMask::MaskCRLF()); +} + +template <typename T> +struct MOZ_STACK_CLASS PrintfAppend : public mozilla::PrintfTarget { + explicit PrintfAppend(nsTSubstring<T>* aString) : mString(aString) {} + + bool append(const char* aStr, size_t aLen) override { + if (aLen == 0) { + return true; + } + + mString->AppendASCII(aStr, aLen); + return true; + } + + private: + nsTSubstring<T>* mString; +}; + +template <typename T> +void nsTSubstring<T>::AppendPrintf(const char* aFormat, ...) { + PrintfAppend<T> appender(this); + va_list ap; + va_start(ap, aFormat); + bool r = appender.vprint(aFormat, ap); + if (!r) { + MOZ_CRASH("Allocation or other failure in PrintfTarget::print"); + } + va_end(ap); +} + +template <typename T> +void nsTSubstring<T>::AppendVprintf(const char* aFormat, va_list aAp) { + PrintfAppend<T> appender(this); + bool r = appender.vprint(aFormat, aAp); + if (!r) { + MOZ_CRASH("Allocation or other failure in PrintfTarget::print"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntDec(int32_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntDec(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntDec(uint32_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntDec(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntOct(uint32_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntOct(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntHex(uint32_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntHex(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntDec(int64_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntDec(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntDec(uint64_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntDec(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntOct(uint64_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntOct(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +template <typename T> +void nsTSubstring<T>::AppendIntHex(uint64_t aInteger) { + PrintfAppend<T> appender(this); + bool r = appender.appendIntHex(aInteger); + if (MOZ_UNLIKELY(!r)) { + MOZ_CRASH("Allocation or other failure while appending integers"); + } +} + +// Returns the length of the formatted aDouble in aBuf. +static int FormatWithoutTrailingZeros(char (&aBuf)[40], double aDouble, + int aPrecision) { + static const DoubleToStringConverter converter( + DoubleToStringConverter::UNIQUE_ZERO | + DoubleToStringConverter::NO_TRAILING_ZERO | + DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, + "Infinity", "NaN", 'e', -6, 21, 6, 1); + double_conversion::StringBuilder builder(aBuf, sizeof(aBuf)); + converter.ToPrecision(aDouble, aPrecision, &builder); + int length = builder.position(); + builder.Finalize(); + return length; +} + +template <typename T> +void nsTSubstring<T>::AppendFloat(float aFloat) { + char buf[40]; + int length = FormatWithoutTrailingZeros(buf, aFloat, 6); + AppendASCII(buf, length); +} + +template <typename T> +void nsTSubstring<T>::AppendFloat(double aFloat) { + char buf[40]; + int length = FormatWithoutTrailingZeros(buf, aFloat, 15); + AppendASCII(buf, length); +} + +template <typename T> +size_t nsTSubstring<T>::SizeOfExcludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const { + if (this->mDataFlags & DataFlags::REFCOUNTED) { + return nsStringBuffer::FromData(this->mData) + ->SizeOfIncludingThisIfUnshared(aMallocSizeOf); + } + if (this->mDataFlags & DataFlags::OWNED) { + return aMallocSizeOf(this->mData); + } + + // If we reach here, exactly one of the following must be true: + // - DataFlags::VOIDED is set, and this->mData points to sEmptyBuffer; + // - DataFlags::INLINE is set, and this->mData points to a buffer within a + // string object (e.g. nsAutoString); + // - None of DataFlags::REFCOUNTED, DataFlags::OWNED, DataFlags::INLINE is + // set, and this->mData points to a buffer owned by something else. + // + // In all three cases, we don't measure it. + return 0; +} + +template <typename T> +size_t nsTSubstring<T>::SizeOfExcludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const { + // This is identical to SizeOfExcludingThisIfUnshared except for the + // DataFlags::REFCOUNTED case. + if (this->mDataFlags & DataFlags::REFCOUNTED) { + return nsStringBuffer::FromData(this->mData) + ->SizeOfIncludingThisEvenIfShared(aMallocSizeOf); + } + if (this->mDataFlags & DataFlags::OWNED) { + return aMallocSizeOf(this->mData); + } + return 0; +} + +template <typename T> +size_t nsTSubstring<T>::SizeOfIncludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThisIfUnshared(aMallocSizeOf); +} + +template <typename T> +size_t nsTSubstring<T>::SizeOfIncludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThisEvenIfShared(aMallocSizeOf); +} + +template <typename T> +nsTSubstringSplitter<T> nsTSubstring<T>::Split(const char_type aChar) const { + return nsTSubstringSplitter<T>( + nsTCharSeparatedTokenizerTemplate< + NS_TokenizerIgnoreNothing, T, + nsTokenizerFlags::IncludeEmptyTokenAtEnd>(*this, aChar)); +} + +// Common logic for nsTSubstring<T>::ToInteger and nsTSubstring<T>::ToInteger64. +template <typename T, typename int_type> +int_type ToIntegerCommon(const nsTSubstring<T>& aSrc, nsresult* aErrorCode, + uint32_t aRadix) { + MOZ_ASSERT(aRadix == 10 || aRadix == 16); + + // Initial value, override if we find an integer. + *aErrorCode = NS_ERROR_ILLEGAL_VALUE; + + // Begin by skipping over leading chars that shouldn't be part of the number. + auto cp = aSrc.BeginReading(); + auto endcp = aSrc.EndReading(); + bool negate = false; + bool done = false; + + // NB: For backwards compatibility I'm not going to change this logic but + // it seems really odd. Previously there was logic to auto-detect the + // radix if kAutoDetect was passed in. In practice this value was never + // used, so it pretended to auto detect and skipped some preceding + // letters (excluding valid hex digits) but never used the result. + // + // For example if you pass in "Get the number: 10", aRadix = 10 we'd + // skip the 'G', and then fail to parse "et the number: 10". If aRadix = + // 16 we'd skip the 'G', and parse just 'e' returning 14. + while ((cp < endcp) && (!done)) { + switch (*cp++) { + // clang-format off + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + done = true; + break; + // clang-format on + case '-': + negate = true; + break; + default: + break; + } + } + + if (!done) { + // No base 16 or base 10 digits were found. + return 0; + } + + // Step back. + cp--; + + mozilla::CheckedInt<int_type> result; + + // Now iterate the numeric chars and build our result. + while (cp < endcp) { + auto theChar = *cp++; + if (('0' <= theChar) && (theChar <= '9')) { + result = (aRadix * result) + (theChar - '0'); + } else if ((theChar >= 'A') && (theChar <= 'F')) { + if (10 == aRadix) { + // Invalid base 10 digit, error out. + return 0; + } else { + result = (aRadix * result) + ((theChar - 'A') + 10); + } + } else if ((theChar >= 'a') && (theChar <= 'f')) { + if (10 == aRadix) { + // Invalid base 10 digit, error out. + return 0; + } else { + result = (aRadix * result) + ((theChar - 'a') + 10); + } + } else if ((('X' == theChar) || ('x' == theChar)) && result == 0) { + // For some reason we support a leading 'x' regardless of radix. For + // example: "000000x500", aRadix = 10 would be parsed as 500 rather + // than 0. + continue; + } else { + // We've encountered a char that's not a legal number or sign and we can + // terminate processing. + break; + } + + if (!result.isValid()) { + // Overflow! + return 0; + } + } + + // Integer found. + *aErrorCode = NS_OK; + + if (negate) { + result = -result; + } + + return result.value(); +} + +template <typename T> +int32_t nsTSubstring<T>::ToInteger(nsresult* aErrorCode, + uint32_t aRadix) const { + return ToIntegerCommon<T, int32_t>(*this, aErrorCode, aRadix); +} + +/** + * nsTSubstring::ToInteger64 + */ +template <typename T> +int64_t nsTSubstring<T>::ToInteger64(nsresult* aErrorCode, + uint32_t aRadix) const { + return ToIntegerCommon<T, int64_t>(*this, aErrorCode, aRadix); +} + +/** + * nsTSubstring::Mid + */ +template <typename T> +typename nsTSubstring<T>::size_type nsTSubstring<T>::Mid( + self_type& aResult, index_type aStartPos, size_type aLengthToCopy) const { + if (aStartPos == 0 && aLengthToCopy >= this->mLength) { + aResult = *this; + } else { + aResult = Substring(*this, aStartPos, aLengthToCopy); + } + + return aResult.mLength; +} + +/** + * nsTSubstring::StripWhitespace + */ + +template <typename T> +void nsTSubstring<T>::StripWhitespace() { + if (!StripWhitespace(mozilla::fallible)) { + this->AllocFailed(this->mLength); + } +} + +template <typename T> +bool nsTSubstring<T>::StripWhitespace(const fallible_t&) { + if (!this->EnsureMutable()) { + return false; + } + + this->StripTaggedASCII(mozilla::ASCIIMask::MaskWhitespace()); + return true; +} + +/** + * nsTSubstring::ReplaceChar,ReplaceSubstring + */ + +template <typename T> +void nsTSubstring<T>::ReplaceChar(char_type aOldChar, char_type aNewChar) { + int32_t i = this->FindChar(aOldChar); + if (i == kNotFound) { + return; + } + + if (!this->EnsureMutable()) { + this->AllocFailed(this->mLength); + } + for (; i != kNotFound; i = this->FindChar(aOldChar, i + 1)) { + this->mData[i] = aNewChar; + } +} + +template <typename T> +void nsTSubstring<T>::ReplaceChar(const string_view& aSet, char_type aNewChar) { + int32_t i = this->FindCharInSet(aSet); + if (i == kNotFound) { + return; + } + + if (!this->EnsureMutable()) { + this->AllocFailed(this->mLength); + } + for (; i != kNotFound; i = this->FindCharInSet(aSet, i + 1)) { + this->mData[i] = aNewChar; + } +} + +template <typename T> +void nsTSubstring<T>::ReplaceSubstring(const char_type* aTarget, + const char_type* aNewValue) { + ReplaceSubstring(nsTDependentString<T>(aTarget), + nsTDependentString<T>(aNewValue)); +} + +template <typename T> +bool nsTSubstring<T>::ReplaceSubstring(const char_type* aTarget, + const char_type* aNewValue, + const fallible_t& aFallible) { + return ReplaceSubstring(nsTDependentString<T>(aTarget), + nsTDependentString<T>(aNewValue), aFallible); +} + +template <typename T> +void nsTSubstring<T>::ReplaceSubstring(const self_type& aTarget, + const self_type& aNewValue) { + if (!ReplaceSubstring(aTarget, aNewValue, mozilla::fallible)) { + // Note that this may wildly underestimate the allocation that failed, as + // we could have been replacing multiple copies of aTarget. + this->AllocFailed(this->mLength + (aNewValue.Length() - aTarget.Length())); + } +} + +template <typename T> +bool nsTSubstring<T>::ReplaceSubstring(const self_type& aTarget, + const self_type& aNewValue, + const fallible_t&) { + struct Segment { + uint32_t mBegin, mLength; + Segment(uint32_t aBegin, uint32_t aLength) + : mBegin(aBegin), mLength(aLength) {} + }; + + if (aTarget.Length() == 0) { + return true; + } + + // Remember all of the non-matching parts. + AutoTArray<Segment, 16> nonMatching; + uint32_t i = 0; + mozilla::CheckedUint32 newLength; + while (true) { + int32_t r = this->Find(aTarget, i); + int32_t until = (r == kNotFound) ? this->Length() - i : r - i; + nonMatching.AppendElement(Segment(i, until)); + newLength += until; + if (r == kNotFound) { + break; + } + + newLength += aNewValue.Length(); + i = r + aTarget.Length(); + if (i >= this->Length()) { + // Add an auxiliary entry at the end of the list to help as an edge case + // for the algorithms below. + nonMatching.AppendElement(Segment(this->Length(), 0)); + break; + } + } + + if (!newLength.isValid()) { + return false; + } + + // If there's only one non-matching segment, then the target string was not + // found, and there's nothing to do. + if (nonMatching.Length() == 1) { + MOZ_ASSERT( + nonMatching[0].mBegin == 0 && nonMatching[0].mLength == this->Length(), + "We should have the correct non-matching segment."); + return true; + } + + // Make sure that we can mutate our buffer. + // Note that we always allocate at least an this->mLength sized buffer, + // because the rest of the algorithm relies on having access to all of the + // original string. In other words, we over-allocate in the shrinking case. + uint32_t oldLen = this->Length(); + auto r = + this->StartBulkWriteImpl(XPCOM_MAX(oldLen, newLength.value()), oldLen); + if (r.isErr()) { + return false; + } + + if (aTarget.Length() >= aNewValue.Length()) { + // In the shrinking case, start filling the buffer from the beginning. + const uint32_t delta = (aTarget.Length() - aNewValue.Length()); + for (i = 1; i < nonMatching.Length(); ++i) { + // When we move the i'th non-matching segment into position, we need to + // account for the characters deleted by the previous |i| replacements by + // subtracting |i * delta|. + const char_type* sourceSegmentPtr = this->mData + nonMatching[i].mBegin; + char_type* destinationSegmentPtr = + this->mData + nonMatching[i].mBegin - i * delta; + // Write the i'th replacement immediately before the new i'th non-matching + // segment. + char_traits::copy(destinationSegmentPtr - aNewValue.Length(), + aNewValue.Data(), aNewValue.Length()); + char_traits::move(destinationSegmentPtr, sourceSegmentPtr, + nonMatching[i].mLength); + } + } else { + // In the growing case, start filling the buffer from the end. + const uint32_t delta = (aNewValue.Length() - aTarget.Length()); + for (i = nonMatching.Length() - 1; i > 0; --i) { + // When we move the i'th non-matching segment into position, we need to + // account for the characters added by the previous |i| replacements by + // adding |i * delta|. + const char_type* sourceSegmentPtr = this->mData + nonMatching[i].mBegin; + char_type* destinationSegmentPtr = + this->mData + nonMatching[i].mBegin + i * delta; + char_traits::move(destinationSegmentPtr, sourceSegmentPtr, + nonMatching[i].mLength); + // Write the i'th replacement immediately before the new i'th non-matching + // segment. + char_traits::copy(destinationSegmentPtr - aNewValue.Length(), + aNewValue.Data(), aNewValue.Length()); + } + } + + // Adjust the length and make sure the string is null terminated. + this->FinishBulkWriteImpl(newLength.value()); + + return true; +} + +/** + * nsTSubstring::Trim + */ + +template <typename T> +void nsTSubstring<T>::Trim(const std::string_view& aSet, bool aTrimLeading, + bool aTrimTrailing, bool aIgnoreQuotes) { + char_type* start = this->mData; + char_type* end = this->mData + this->mLength; + + // skip over quotes if requested + if (aIgnoreQuotes && this->mLength > 2 && + this->mData[0] == this->mData[this->mLength - 1] && + (this->mData[0] == '\'' || this->mData[0] == '"')) { + ++start; + --end; + } + + if (aTrimLeading) { + uint32_t cutStart = start - this->mData; + uint32_t cutLength = 0; + + // walk forward from start to end + for (; start != end; ++start, ++cutLength) { + if ((*start & ~0x7F) || // non-ascii + aSet.find(char(*start)) == std::string_view::npos) { + break; + } + } + + if (cutLength) { + this->Cut(cutStart, cutLength); + + // reset iterators + start = this->mData + cutStart; + end = this->mData + this->mLength - cutStart; + } + } + + if (aTrimTrailing) { + uint32_t cutEnd = end - this->mData; + uint32_t cutLength = 0; + + // walk backward from end to start + --end; + for (; end >= start; --end, ++cutLength) { + if ((*end & ~0x7F) || // non-ascii + aSet.find(char(*end)) == std::string_view::npos) { + break; + } + } + + if (cutLength) { + this->Cut(cutEnd - cutLength, cutLength); + } + } +} + +/** + * nsTSubstring::CompressWhitespace. + */ + +template <typename T> +void nsTSubstring<T>::CompressWhitespace(bool aTrimLeading, + bool aTrimTrailing) { + // Quick exit + if (this->mLength == 0) { + return; + } + + if (!this->EnsureMutable()) { + this->AllocFailed(this->mLength); + } + + const ASCIIMaskArray& mask = mozilla::ASCIIMask::MaskWhitespace(); + + char_type* to = this->mData; + char_type* from = this->mData; + char_type* end = this->mData + this->mLength; + + // Compresses runs of whitespace down to a normal space ' ' and convert + // any whitespace to a normal space. This assumes that whitespace is + // all standard 7-bit ASCII. + bool skipWS = aTrimLeading; + while (from < end) { + uint32_t theChar = *from++; + if (mozilla::ASCIIMask::IsMasked(mask, theChar)) { + if (!skipWS) { + *to++ = ' '; + skipWS = true; + } + } else { + *to++ = theChar; + skipWS = false; + } + } + + // If we need to trim the trailing whitespace, back up one character. + if (aTrimTrailing && skipWS && to > this->mData) { + to--; + } + + *to = char_type(0); // add the null + this->mLength = to - this->mData; +} + +template class nsTSubstring<char>; +template class nsTSubstring<char16_t>; diff --git a/xpcom/string/nsTSubstring.h b/xpcom/string/nsTSubstring.h new file mode 100644 index 0000000000..d54dcafd7b --- /dev/null +++ b/xpcom/string/nsTSubstring.h @@ -0,0 +1,1454 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsTSubstring_h +#define nsTSubstring_h + +#include <iterator> +#include <type_traits> + +#include "mozilla/Casting.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Maybe.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/IntegerTypeTraits.h" +#include "mozilla/Result.h" +#include "mozilla/ResultExtensions.h" +#include "mozilla/Span.h" +#include "mozilla/Unused.h" + +#include "nsTStringRepr.h" + +#ifndef MOZILLA_INTERNAL_API +# error "Using XPCOM strings is limited to code linked into libxul." +#endif + +// The max number of logically uninitialized code units to +// fill with a marker byte or to mark as unintialized for +// memory checking. (Limited to avoid quadratic behavior.) +const size_t kNsStringBufferMaxPoison = 16; + +class nsStringBuffer; +template <typename T> +class nsTSubstringSplitter; +template <typename T> +class nsTString; +template <typename T> +class nsTSubstring; + +namespace mozilla { + +/** + * This handle represents permission to perform low-level writes + * the storage buffer of a string in a manner that's aware of the + * actual capacity of the storage buffer allocation and that's + * cache-friendly in the sense that the writing of zero terminator + * for C compatibility can happen in linear memory access order + * (i.e. the zero terminator write takes place after writing + * new content to the string as opposed to the zero terminator + * write happening first causing a non-linear memory write for + * cache purposes). + * + * If you requested a prefix to be preserved when starting + * or restarting the bulk write, the prefix is present at the + * start of the buffer exposed by this handle as Span or + * as a raw pointer, and it's your responsibility to start + * writing after after the preserved prefix (which you + * presumably wanted not to overwrite since you asked for + * it to be preserved). + * + * In a success case, you must call Finish() with the new + * length of the string. In failure cases, it's OK to return + * early from the function whose local variable this handle is. + * The destructor of this class takes care of putting the + * string in a valid and mostly harmless state in that case + * by setting the value of a non-empty string to a single + * REPLACEMENT CHARACTER or in the case of nsACString that's + * too short for a REPLACEMENT CHARACTER to fit, an ASCII + * SUBSTITUTE. + * + * You must not allow this handle to outlive the string you + * obtained it from. + * + * You must not access the string you obtained this handle + * from in any way other than through this handle until + * you call Finish() on the handle or the handle goes out + * of scope. + * + * Once you've called Finish(), you must not call any + * methods on this handle and must not use values previously + * obtained. + * + * Once you call RestartBulkWrite(), you must not use + * values previously obtained from this handle and must + * reobtain the new corresponding values. + */ +template <typename T> +class BulkWriteHandle final { + friend class nsTSubstring<T>; + + public: + typedef typename mozilla::detail::nsTStringRepr<T> base_string_type; + typedef typename base_string_type::size_type size_type; + + /** + * Pointer to the start of the writable buffer. Never nullptr. + * + * This pointer is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + T* Elements() const { + MOZ_ASSERT(mString); + return mString->mData; + } + + /** + * How many code units can be written to the buffer. + * (Note: This is not the same as the string's Length().) + * + * This value is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + size_type Length() const { + MOZ_ASSERT(mString); + return mCapacity; + } + + /** + * Pointer past the end of the buffer. + * + * This pointer is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + T* End() const { return Elements() + Length(); } + + /** + * The writable buffer as Span. + * + * This Span is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + auto AsSpan() const { return mozilla::Span<T>{Elements(), Length()}; } + + /** + * Autoconvert to the buffer as writable Span. + * + * This Span is valid until whichever of these happens first: + * 1) Finish() is called + * 2) RestartBulkWrite() is called + * 3) BulkWriteHandle goes out of scope + */ + operator mozilla::Span<T>() const { return AsSpan(); } + + /** + * Restart the bulk write with a different capacity. + * + * This method invalidates previous return values + * of the other methods above. + * + * Can fail if out of memory leaving the buffer + * in the state before this call. + * + * @param aCapacity the new requested capacity + * @param aPrefixToPreserve the number of code units at + * the start of the string to + * copy over to the new buffer + * @param aAllowShrinking whether the string is + * allowed to attempt to + * allocate a smaller buffer + * for its content and copy + * the data over. + */ + mozilla::Result<mozilla::Ok, nsresult> RestartBulkWrite( + size_type aCapacity, size_type aPrefixToPreserve, bool aAllowShrinking) { + MOZ_ASSERT(mString); + MOZ_TRY_VAR(mCapacity, mString->StartBulkWriteImpl( + aCapacity, aPrefixToPreserve, aAllowShrinking)); + return mozilla::Ok(); + } + + /** + * Indicate that the bulk write finished successfully. + * + * @param aLength the number of code units written; + * must not exceed Length() + * @param aAllowShrinking whether the string is + * allowed to attempt to + * allocate a smaller buffer + * for its content and copy + * the data over. + */ + void Finish(size_type aLength, bool aAllowShrinking) { + MOZ_ASSERT(mString); + MOZ_ASSERT(aLength <= mCapacity); + if (!aLength) { + // Truncate is safe even when the string is in an invalid state + mString->Truncate(); + mString = nullptr; + return; + } + if (aAllowShrinking) { + mozilla::Unused << mString->StartBulkWriteImpl(aLength, aLength, true); + } + mString->FinishBulkWriteImpl(aLength); + mString = nullptr; + } + + BulkWriteHandle(BulkWriteHandle&& aOther) + : mString(aOther.Forget()), mCapacity(aOther.mCapacity) {} + + ~BulkWriteHandle() { + if (!mString || !mCapacity) { + return; + } + // The old zero terminator may be gone by now, so we need + // to write a new one somewhere and make length match. + // We can use a length between 1 and self.capacity. + // The contents of the string can be partially uninitialized + // or partially initialized in a way that would be dangerous + // if parsed by some recipient. It's prudent to write something + // same as the contents of the string. U+FFFD is the safest + // placeholder, but when it doesn't fit, let's use ASCII + // substitute. Merely truncating the string to a zero-length + // string might be dangerous in some scenarios. See + // https://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences + // for closely related scenario. + auto ptr = Elements(); + // Cast the pointer below to silence warnings + if (sizeof(T) == 1) { + unsigned char* charPtr = reinterpret_cast<unsigned char*>(ptr); + if (mCapacity >= 3) { + *charPtr++ = 0xEF; + *charPtr++ = 0xBF; + *charPtr++ = 0xBD; + mString->mLength = 3; + } else { + *charPtr++ = 0x1A; + mString->mLength = 1; + } + *charPtr = 0; + } else if (sizeof(T) == 2) { + char16_t* charPtr = reinterpret_cast<char16_t*>(ptr); + *charPtr++ = 0xFFFD; + *charPtr = 0; + mString->mLength = 1; + } else { + MOZ_ASSERT_UNREACHABLE("Only 8-bit and 16-bit code units supported."); + } + } + + BulkWriteHandle() = delete; + BulkWriteHandle(const BulkWriteHandle&) = delete; + BulkWriteHandle& operator=(const BulkWriteHandle&) = delete; + + private: + BulkWriteHandle(nsTSubstring<T>* aString, size_type aCapacity) + : mString(aString), mCapacity(aCapacity) {} + + nsTSubstring<T>* Forget() { + auto string = mString; + mString = nullptr; + return string; + } + + nsTSubstring<T>* mString; // nullptr upon finish + size_type mCapacity; +}; + +} // namespace mozilla + +/** + * nsTSubstring is an abstract string class. From an API perspective, this + * class is the root of the string class hierarchy. It represents a single + * contiguous array of characters, which may or may not be null-terminated. + * This type is not instantiated directly. A sub-class is instantiated + * instead. For example, see nsTString. + * + * NAMES: + * nsAString for wide characters + * nsACString for narrow characters + * + */ +template <typename T> +class nsTSubstring : public mozilla::detail::nsTStringRepr<T> { + friend class mozilla::BulkWriteHandle<T>; + friend class nsStringBuffer; + + public: + typedef nsTSubstring<T> self_type; + + typedef nsTString<T> string_type; + + typedef typename mozilla::detail::nsTStringRepr<T> base_string_type; + typedef typename base_string_type::substring_type substring_type; + + typedef typename base_string_type::fallible_t fallible_t; + + typedef typename base_string_type::char_type char_type; + typedef typename base_string_type::char_traits char_traits; + typedef + typename base_string_type::incompatible_char_type incompatible_char_type; + + typedef typename base_string_type::substring_tuple_type substring_tuple_type; + + typedef typename base_string_type::const_iterator const_iterator; + typedef typename base_string_type::iterator iterator; + + typedef typename base_string_type::comparator_type comparator_type; + + typedef typename base_string_type::const_char_iterator const_char_iterator; + + typedef typename base_string_type::string_view string_view; + + typedef typename base_string_type::index_type index_type; + typedef typename base_string_type::size_type size_type; + + // These are only for internal use within the string classes: + typedef typename base_string_type::DataFlags DataFlags; + typedef typename base_string_type::ClassFlags ClassFlags; + typedef typename base_string_type::LengthStorage LengthStorage; + + // this acts like a virtual destructor + ~nsTSubstring() { Finalize(); } + + /** + * writing iterators + * + * BeginWriting() makes the string mutable (if it isn't + * already) and returns (or writes into an outparam) a + * pointer that provides write access to the string's buffer. + * + * Note: Consider if BulkWrite() suits your use case better + * than BeginWriting() combined with SetLength(). + * + * Note: Strings autoconvert into writable mozilla::Span, + * which may suit your use case better than calling + * BeginWriting() directly. + * + * When writing via the pointer obtained from BeginWriting(), + * you are allowed to write at most the number of code units + * indicated by Length() or, alternatively, write up to, but + * not including, the position indicated by EndWriting(). + * + * In particular, calling SetCapacity() does not affect what + * the above paragraph says. + */ + + iterator BeginWriting() { + if (!EnsureMutable()) { + AllocFailed(base_string_type::mLength); + } + + return base_string_type::mData; + } + + iterator BeginWriting(const fallible_t&) { + return EnsureMutable() ? base_string_type::mData : iterator(0); + } + + iterator EndWriting() { + if (!EnsureMutable()) { + AllocFailed(base_string_type::mLength); + } + + return base_string_type::mData + base_string_type::mLength; + } + + iterator EndWriting(const fallible_t&) { + return EnsureMutable() + ? (base_string_type::mData + base_string_type::mLength) + : iterator(0); + } + + /** + * Perform string to int conversion. + * @param aErrorCode will contain error if one occurs + * @param aRadix is the radix to use. Only 10 and 16 are supported. + * @return int rep of string value, and possible (out) error code + */ + int32_t ToInteger(nsresult* aErrorCode, uint32_t aRadix = 10) const; + + /** + * Perform string to 64-bit int conversion. + * @param aErrorCode will contain error if one occurs + * @param aRadix is the radix to use. Only 10 and 16 are supported. + * @return 64-bit int rep of string value, and possible (out) error code + */ + int64_t ToInteger64(nsresult* aErrorCode, uint32_t aRadix = 10) const; + + /** + * assignment + */ + + void NS_FASTCALL Assign(char_type aChar); + [[nodiscard]] bool NS_FASTCALL Assign(char_type aChar, const fallible_t&); + + void NS_FASTCALL Assign(const char_type* aData, + size_type aLength = size_type(-1)); + [[nodiscard]] bool NS_FASTCALL Assign(const char_type* aData, + const fallible_t&); + [[nodiscard]] bool NS_FASTCALL Assign(const char_type* aData, + size_type aLength, const fallible_t&); + + void NS_FASTCALL Assign(const self_type&); + [[nodiscard]] bool NS_FASTCALL Assign(const self_type&, const fallible_t&); + + void NS_FASTCALL Assign(self_type&&); + [[nodiscard]] bool NS_FASTCALL Assign(self_type&&, const fallible_t&); + + void NS_FASTCALL Assign(const substring_tuple_type&); + [[nodiscard]] bool NS_FASTCALL Assign(const substring_tuple_type&, + const fallible_t&); + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void Assign(char16ptr_t aData) { + Assign(static_cast<const char16_t*>(aData)); + } + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void Assign(char16ptr_t aData, size_type aLength) { + Assign(static_cast<const char16_t*>(aData), aLength); + } + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + [[nodiscard]] bool Assign(char16ptr_t aData, size_type aLength, + const fallible_t& aFallible) { + return Assign(static_cast<const char16_t*>(aData), aLength, aFallible); + } +#endif + + void NS_FASTCALL AssignASCII(const char* aData, size_type aLength); + [[nodiscard]] bool NS_FASTCALL AssignASCII(const char* aData, + size_type aLength, + const fallible_t&); + + void NS_FASTCALL AssignASCII(const char* aData) { + AssignASCII(aData, strlen(aData)); + } + [[nodiscard]] bool NS_FASTCALL AssignASCII(const char* aData, + const fallible_t& aFallible) { + return AssignASCII(aData, strlen(aData), aFallible); + } + + // AssignLiteral must ONLY be called with an actual literal string, or + // a character array *constant* of static storage duration declared + // without an explicit size and with an initializer that is a string + // literal or is otherwise null-terminated. + // Use Assign or AssignASCII for other character array variables. + // + // This method does not need a fallible version, because it uses the + // POD buffer of the literal as the string's buffer without allocating. + // The literal does not need to be ASCII. If this a 16-bit string, this + // method takes a u"" literal. (The overload on 16-bit strings that takes + // a "" literal takes only ASCII.) + template <int N> + void AssignLiteral(const char_type (&aStr)[N]) { + AssignLiteral(aStr, N - 1); + } + + // AssignLiteral must ONLY be called with an actual literal string, or + // a char array *constant* declared without an explicit size and with an + // initializer that is a string literal or is otherwise null-terminated. + // Use AssignASCII for other char array variables. + // + // This method takes an 8-bit (ASCII-only!) string that is expanded + // into a 16-bit string at run time causing a run-time allocation. + // To avoid the run-time allocation (at the cost of the literal + // taking twice the size in the binary), use the above overload that + // takes a u"" string instead. Using the overload that takes a u"" + // literal is generally preferred when working with 16-bit strings. + // + // There is not a fallible version of this method because it only really + // applies to small allocations that we wouldn't want to check anyway. + template <int N, typename Q = T, + typename EnableIfChar16 = typename mozilla::Char16OnlyT<Q>> + void AssignLiteral(const incompatible_char_type (&aStr)[N]) { + AssignASCII(aStr, N - 1); + } + + self_type& operator=(char_type aChar) { + Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) { + Assign(aData); + return *this; + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + self_type& operator=(char16ptr_t aData) { + Assign(aData); + return *this; + } +#endif + self_type& operator=(const self_type& aStr) { + Assign(aStr); + return *this; + } + self_type& operator=(self_type&& aStr) { + Assign(std::move(aStr)); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) { + Assign(aTuple); + return *this; + } + + // Adopt a heap-allocated char sequence for this string; is Voided if aData + // is null. Useful for e.g. converting an strdup'd C string into an + // nsCString. See also getter_Copies(), which is a useful wrapper. + void NS_FASTCALL Adopt(char_type* aData, size_type aLength = size_type(-1)); + + /** + * buffer manipulation + */ + + void NS_FASTCALL Replace(index_type aCutStart, size_type aCutLength, + char_type aChar); + [[nodiscard]] bool NS_FASTCALL Replace(index_type aCutStart, + size_type aCutLength, char_type aChar, + const fallible_t&); + void NS_FASTCALL Replace(index_type aCutStart, size_type aCutLength, + const char_type* aData, + size_type aLength = size_type(-1)); + [[nodiscard]] bool NS_FASTCALL Replace(index_type aCutStart, + size_type aCutLength, + const char_type* aData, + size_type aLength, const fallible_t&); + void Replace(index_type aCutStart, size_type aCutLength, + const self_type& aStr) { + Replace(aCutStart, aCutLength, aStr.Data(), aStr.Length()); + } + [[nodiscard]] bool Replace(index_type aCutStart, size_type aCutLength, + const self_type& aStr, + const fallible_t& aFallible) { + return Replace(aCutStart, aCutLength, aStr.Data(), aStr.Length(), + aFallible); + } + void NS_FASTCALL Replace(index_type aCutStart, size_type aCutLength, + const substring_tuple_type& aTuple); + + // ReplaceLiteral must ONLY be called with an actual literal string, or + // a character array *constant* of static storage duration declared + // without an explicit size and with an initializer that is a string + // literal or is otherwise null-terminated. + // Use Replace for other character array variables. + template <int N> + void ReplaceLiteral(index_type aCutStart, size_type aCutLength, + const char_type (&aStr)[N]) { + ReplaceLiteral(aCutStart, aCutLength, aStr, N - 1); + } + + /** + * |Left|, |Mid|, and |Right| are annoying signatures that seem better almost + * any _other_ way than they are now. Consider these alternatives + * + * // ...a member function that returns a |Substring| + * aWritable = aReadable.Left(17); + * // ...a global function that returns a |Substring| + * aWritable = Left(aReadable, 17); + * // ...a global function that does the assignment + * Left(aReadable, 17, aWritable); + * + * as opposed to the current signature + * + * // ...a member function that does the assignment + * aReadable.Left(aWritable, 17); + * + * or maybe just stamping them out in favor of |Substring|, they are just + * duplicate functionality + * + * aWritable = Substring(aReadable, 0, 17); + */ + size_type Mid(self_type& aResult, index_type aStartPos, + size_type aCount) const; + + size_type Left(self_type& aResult, size_type aCount) const { + return Mid(aResult, 0, aCount); + } + + size_type Right(self_type& aResult, size_type aCount) const { + aCount = XPCOM_MIN(this->Length(), aCount); + return Mid(aResult, this->mLength - aCount, aCount); + } + + /** + * This method strips whitespace throughout the string. + */ + void StripWhitespace(); + bool StripWhitespace(const fallible_t&); + + /** + * This method is used to remove all occurrences of aChar from this + * string. + * + * @param aChar -- char to be stripped + */ + void StripChar(char_type aChar); + + /** + * This method is used to remove all occurrences of aChars from this + * string. + * + * @param aChars -- chars to be stripped + */ + void StripChars(const char_type* aChars); + + /** + * This method is used to remove all occurrences of some characters this + * from this string. The characters removed have the corresponding + * entries in the bool array set to true; we retain all characters + * with code beyond 127. + * THE CALLER IS RESPONSIBLE for making sure the complete boolean + * array, 128 entries, is properly initialized. + * + * See also: ASCIIMask class. + * + * @param aToStrip -- Array where each entry is true if the + * corresponding ASCII character is to be stripped. All + * characters beyond code 127 are retained. Note that this + * parameter is of ASCIIMaskArray type, but we expand the typedef + * to avoid having to include nsASCIIMask.h in this include file + * as it brings other includes. + */ + void StripTaggedASCII(const std::array<bool, 128>& aToStrip); + + /** + * A shortcut to strip \r and \n. + */ + void StripCRLF(); + + /** + * swaps occurence of 1 string for another + */ + void ReplaceChar(char_type aOldChar, char_type aNewChar); + void ReplaceChar(const string_view& aSet, char_type aNewChar); + + /** + * Replace all occurrences of aTarget with aNewValue. + * The complexity of this function is O(n+m), n being the length of the string + * and m being the length of aNewValue. + */ + void ReplaceSubstring(const self_type& aTarget, const self_type& aNewValue); + void ReplaceSubstring(const char_type* aTarget, const char_type* aNewValue); + [[nodiscard]] bool ReplaceSubstring(const self_type& aTarget, + const self_type& aNewValue, + const fallible_t&); + [[nodiscard]] bool ReplaceSubstring(const char_type* aTarget, + const char_type* aNewValue, + const fallible_t&); + + /** + * This method trims characters found in aSet from either end of the + * underlying string. + * + * @param aSet -- contains chars to be trimmed from both ends + * @param aTrimLeading + * @param aTrimTrailing + * @param aIgnoreQuotes -- if true, causes surrounding quotes to be ignored + * @return this + */ + void Trim(const std::string_view& aSet, bool aTrimLeading = true, + bool aTrimTrailing = true, bool aIgnoreQuotes = false); + + /** + * This method strips whitespace from string. + * You can control whether whitespace is yanked from start and end of + * string as well. + * + * @param aTrimLeading controls stripping of leading ws + * @param aTrimTrailing controls stripping of trailing ws + */ + void CompressWhitespace(bool aTrimLeading = true, bool aTrimTrailing = true); + + void Append(char_type aChar); + + [[nodiscard]] bool Append(char_type aChar, const fallible_t& aFallible); + + void Append(const char_type* aData, size_type aLength = size_type(-1)); + + [[nodiscard]] bool Append(const char_type* aData, size_type aLength, + const fallible_t& aFallible); + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void Append(char16ptr_t aData, size_type aLength = size_type(-1)) { + Append(static_cast<const char16_t*>(aData), aLength); + } +#endif + + void Append(const self_type& aStr); + + [[nodiscard]] bool Append(const self_type& aStr, const fallible_t& aFallible); + + void Append(const substring_tuple_type& aTuple); + + [[nodiscard]] bool Append(const substring_tuple_type& aTuple, + const fallible_t& aFallible); + + void AppendASCII(const char* aData, size_type aLength = size_type(-1)); + + [[nodiscard]] bool AppendASCII(const char* aData, + const fallible_t& aFallible); + + [[nodiscard]] bool AppendASCII(const char* aData, size_type aLength, + const fallible_t& aFallible); + + // Appends a literal string ("" literal in the 8-bit case and u"" literal + // in the 16-bit case) to the string. + // + // AppendLiteral must ONLY be called with an actual literal string, or + // a character array *constant* of static storage duration declared + // without an explicit size and with an initializer that is a string + // literal or is otherwise null-terminated. + // Use Append or AppendASCII for other character array variables. + template <int N> + void AppendLiteral(const char_type (&aStr)[N]) { + // The case where base_string_type::mLength is zero is intentionally + // left unoptimized (could be optimized as call to AssignLiteral), + // because it's rare/nonexistent. If you add that optimization, + // please be sure to also check that + // !(base_string_type::mDataFlags & DataFlags::REFCOUNTED) + // to avoid undoing the effects of SetCapacity(). + Append(aStr, N - 1); + } + + template <int N> + void AppendLiteral(const char_type (&aStr)[N], const fallible_t& aFallible) { + // The case where base_string_type::mLength is zero is intentionally + // left unoptimized (could be optimized as call to AssignLiteral), + // because it's rare/nonexistent. If you add that optimization, + // please be sure to also check that + // !(base_string_type::mDataFlags & DataFlags::REFCOUNTED) + // to avoid undoing the effects of SetCapacity(). + return Append(aStr, N - 1, aFallible); + } + + // Only enable for T = char16_t + // + // Appends an 8-bit literal string ("" literal) to a 16-bit string by + // expanding it. The literal must only contain ASCII. + // + // Using u"" literals with 16-bit strings is generally preferred. + template <int N, typename Q = T, + typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void AppendLiteral(const incompatible_char_type (&aStr)[N]) { + AppendASCII(aStr, N - 1); + } + + // Only enable for T = char16_t + template <int N, typename Q = T, + typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + [[nodiscard]] bool AppendLiteral(const incompatible_char_type (&aStr)[N], + const fallible_t& aFallible) { + return AppendASCII(aStr, N - 1, aFallible); + } + + /** + * Append a formatted string to the current string. Uses the + * standard printf format codes. This uses NSPR formatting, which will be + * locale-aware for floating-point values. You probably don't want to use + * this with floating-point values as a result. + */ + void AppendPrintf(const char* aFormat, ...) MOZ_FORMAT_PRINTF(2, 3); + void AppendVprintf(const char* aFormat, va_list aAp) MOZ_FORMAT_PRINTF(2, 0); + void AppendInt(int32_t aInteger) { AppendIntDec(aInteger); } + void AppendInt(int32_t aInteger, int aRadix) { + if (aRadix == 10) { + AppendIntDec(aInteger); + } else if (aRadix == 8) { + AppendIntOct(static_cast<uint32_t>(aInteger)); + } else { + AppendIntHex(static_cast<uint32_t>(aInteger)); + } + } + void AppendInt(uint32_t aInteger) { AppendIntDec(aInteger); } + void AppendInt(uint32_t aInteger, int aRadix) { + if (aRadix == 10) { + AppendIntDec(aInteger); + } else if (aRadix == 8) { + AppendIntOct(aInteger); + } else { + AppendIntHex(aInteger); + } + } + void AppendInt(int64_t aInteger) { AppendIntDec(aInteger); } + void AppendInt(int64_t aInteger, int aRadix) { + if (aRadix == 10) { + AppendIntDec(aInteger); + } else if (aRadix == 8) { + AppendIntOct(static_cast<uint64_t>(aInteger)); + } else { + AppendIntHex(static_cast<uint64_t>(aInteger)); + } + } + void AppendInt(uint64_t aInteger) { AppendIntDec(aInteger); } + void AppendInt(uint64_t aInteger, int aRadix) { + if (aRadix == 10) { + AppendIntDec(aInteger); + } else if (aRadix == 8) { + AppendIntOct(aInteger); + } else { + AppendIntHex(aInteger); + } + } + + private: + void AppendIntDec(int32_t); + void AppendIntDec(uint32_t); + void AppendIntOct(uint32_t); + void AppendIntHex(uint32_t); + void AppendIntDec(int64_t); + void AppendIntDec(uint64_t); + void AppendIntOct(uint64_t); + void AppendIntHex(uint64_t); + + public: + /** + * Append the given float to this string + */ + void NS_FASTCALL AppendFloat(float aFloat); + void NS_FASTCALL AppendFloat(double aFloat); + + self_type& operator+=(char_type aChar) { + Append(aChar); + return *this; + } + self_type& operator+=(const char_type* aData) { + Append(aData); + return *this; + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + self_type& operator+=(char16ptr_t aData) { + Append(aData); + return *this; + } +#endif + self_type& operator+=(const self_type& aStr) { + Append(aStr); + return *this; + } + self_type& operator+=(const substring_tuple_type& aTuple) { + Append(aTuple); + return *this; + } + + void Insert(char_type aChar, index_type aPos) { Replace(aPos, 0, aChar); } + void Insert(const char_type* aData, index_type aPos, + size_type aLength = size_type(-1)) { + Replace(aPos, 0, aData, aLength); + } +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + void Insert(char16ptr_t aData, index_type aPos, + size_type aLength = size_type(-1)) { + Insert(static_cast<const char16_t*>(aData), aPos, aLength); + } +#endif + void Insert(const self_type& aStr, index_type aPos) { + Replace(aPos, 0, aStr); + } + void Insert(const substring_tuple_type& aTuple, index_type aPos) { + Replace(aPos, 0, aTuple); + } + + // InsertLiteral must ONLY be called with an actual literal string, or + // a character array *constant* of static storage duration declared + // without an explicit size and with an initializer that is a string + // literal or is otherwise null-terminated. + // Use Insert for other character array variables. + template <int N> + void InsertLiteral(const char_type (&aStr)[N], index_type aPos) { + ReplaceLiteral(aPos, 0, aStr, N - 1); + } + + void Cut(index_type aCutStart, size_type aCutLength) { + Replace(aCutStart, aCutLength, char_traits::sEmptyBuffer, 0); + } + + nsTSubstringSplitter<T> Split(const char_type aChar) const; + + /** + * buffer sizing + */ + + /** + * Attempts to set the capacity to the given size in number of + * code units without affecting the length of the string in + * order to avoid reallocation during a subsequent sequence of + * appends. + * + * This method is appropriate to use before a sequence of multiple + * operations from the following list (without operations that are + * not on the list between the SetCapacity() call and operations + * from the list): + * + * Append() + * AppendASCII() + * AppendLiteral() (except if the string is empty: bug 1487606) + * AppendPrintf() + * AppendInt() + * AppendFloat() + * LossyAppendUTF16toASCII() + * AppendASCIItoUTF16() + * + * DO NOT call SetCapacity() if the subsequent operations on the + * string do not meet the criteria above. Operations that undo + * the benefits of SetCapacity() include but are not limited to: + * + * SetLength() + * Truncate() + * Assign() + * AssignLiteral() + * Adopt() + * CopyASCIItoUTF16() + * LossyCopyUTF16toASCII() + * AppendUTF16toUTF8() + * AppendUTF8toUTF16() + * CopyUTF16toUTF8() + * CopyUTF8toUTF16() + * + * If your string is an nsAuto[C]String and you are calling + * SetCapacity() with a constant N, please instead declare the + * string as nsAuto[C]StringN<N+1> without calling SetCapacity(). + * + * There is no need to include room for the null terminator: it is + * the job of the string class. + * + * Note: Calling SetCapacity() does not give you permission to + * use the pointer obtained from BeginWriting() to write + * past the current length (as returned by Length()) of the + * string. Please use either BulkWrite() or SetLength() + * instead. + * + * Note: SetCapacity() won't make the string shorter if + * called with an argument smaller than the length of the + * string. + * + * Note: You must not use previously obtained iterators + * or spans after calling SetCapacity(). + */ + void NS_FASTCALL SetCapacity(size_type aNewCapacity); + [[nodiscard]] bool NS_FASTCALL SetCapacity(size_type aNewCapacity, + const fallible_t&); + + /** + * Changes the logical length of the string, potentially + * allocating a differently-sized buffer for the string. + * + * When making the string shorter, this method never + * reports allocation failure. + * + * Exposes uninitialized memory if the string got longer. + * + * If called with the argument 0, releases the + * heap-allocated buffer, if any. (But the no-argument + * overload of Truncate() is a more idiomatic and efficient + * option than SetLength(0).) + * + * Note: You must not use previously obtained iterators + * or spans after calling SetLength(). + */ + void NS_FASTCALL SetLength(size_type aNewLength); + [[nodiscard]] bool NS_FASTCALL SetLength(size_type aNewLength, + const fallible_t&); + + /** + * Like SetLength() but asserts in that the string + * doesn't become longer. Never fails, so doesn't need a + * fallible variant. + * + * Note: You must not use previously obtained iterators + * or spans after calling Truncate(). + */ + void Truncate(size_type aNewLength) { + MOZ_RELEASE_ASSERT(aNewLength <= base_string_type::mLength, + "Truncate cannot make string longer"); + mozilla::DebugOnly<bool> success = SetLength(aNewLength, mozilla::fallible); + MOZ_ASSERT(success); + } + + /** + * A more efficient overload for Truncate(0). Releases the + * heap-allocated buffer if any. + */ + void Truncate(); + + /** + * buffer access + */ + + /** + * Get a const pointer to the string's internal buffer. The caller + * MUST NOT modify the characters at the returned address. + * + * @returns The length of the buffer in characters. + */ + inline size_type GetData(const char_type** aData) const { + *aData = base_string_type::mData; + return base_string_type::mLength; + } + + /** + * Get a pointer to the string's internal buffer, optionally resizing + * the buffer first. If size_type(-1) is passed for newLen, then the + * current length of the string is used. The caller MAY modify the + * characters at the returned address (up to but not exceeding the + * length of the string). + * + * @returns The length of the buffer in characters or 0 if unable to + * satisfy the request due to low-memory conditions. + */ + size_type GetMutableData(char_type** aData, + size_type aNewLen = size_type(-1)) { + if (!EnsureMutable(aNewLen)) { + AllocFailed(aNewLen == size_type(-1) ? base_string_type::Length() + : aNewLen); + } + + *aData = base_string_type::mData; + return base_string_type::Length(); + } + + size_type GetMutableData(char_type** aData, size_type aNewLen, + const fallible_t&) { + if (!EnsureMutable(aNewLen)) { + *aData = nullptr; + return 0; + } + + *aData = base_string_type::mData; + return base_string_type::mLength; + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + size_type GetMutableData(wchar_t** aData, size_type aNewLen = size_type(-1)) { + return GetMutableData(reinterpret_cast<char16_t**>(aData), aNewLen); + } + + template <typename Q = T, typename EnableIfChar16 = mozilla::Char16OnlyT<Q>> + size_type GetMutableData(wchar_t** aData, size_type aNewLen, + const fallible_t& aFallible) { + return GetMutableData(reinterpret_cast<char16_t**>(aData), aNewLen, + aFallible); + } +#endif + + mozilla::Span<char_type> GetMutableData(size_type aNewLen = size_type(-1)) { + if (!EnsureMutable(aNewLen)) { + AllocFailed(aNewLen == size_type(-1) ? base_string_type::Length() + : aNewLen); + } + + return mozilla::Span{base_string_type::mData, base_string_type::Length()}; + } + + mozilla::Maybe<mozilla::Span<char_type>> GetMutableData(size_type aNewLen, + const fallible_t&) { + if (!EnsureMutable(aNewLen)) { + return mozilla::Nothing(); + } + return Some( + mozilla::Span{base_string_type::mData, base_string_type::Length()}); + } + + /** + * Span integration + */ + + operator mozilla::Span<const char_type>() const { + return mozilla::Span{base_string_type::BeginReading(), + base_string_type::Length()}; + } + + void Append(mozilla::Span<const char_type> aSpan) { + Append(aSpan.Elements(), aSpan.Length()); + } + + [[nodiscard]] bool Append(mozilla::Span<const char_type> aSpan, + const fallible_t& aFallible) { + return Append(aSpan.Elements(), aSpan.Length(), aFallible); + } + + void NS_FASTCALL AssignASCII(mozilla::Span<const char> aData) { + AssignASCII(aData.Elements(), aData.Length()); + } + [[nodiscard]] bool NS_FASTCALL AssignASCII(mozilla::Span<const char> aData, + const fallible_t& aFallible) { + return AssignASCII(aData.Elements(), aData.Length(), aFallible); + } + + void AppendASCII(mozilla::Span<const char> aData) { + AppendASCII(aData.Elements(), aData.Length()); + } + + template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> + operator mozilla::Span<const uint8_t>() const { + return mozilla::Span{ + reinterpret_cast<const uint8_t*>(base_string_type::BeginReading()), + base_string_type::Length()}; + } + + template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> + void Append(mozilla::Span<const uint8_t> aSpan) { + Append(reinterpret_cast<const char*>(aSpan.Elements()), aSpan.Length()); + } + + template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> + [[nodiscard]] bool Append(mozilla::Span<const uint8_t> aSpan, + const fallible_t& aFallible) { + return Append(reinterpret_cast<const char*>(aSpan.Elements()), + aSpan.Length(), aFallible); + } + + void Insert(mozilla::Span<const char_type> aSpan, index_type aPos) { + Insert(aSpan.Elements(), aPos, aSpan.Length()); + } + + /** + * string data is never null, but can be marked void. if true, the + * string will be truncated. @see nsTSubstring::IsVoid + */ + + void NS_FASTCALL SetIsVoid(bool); + + /** + * If the string uses a shared buffer, this method + * clears the pointer without releasing the buffer. + */ + void ForgetSharedBuffer() { + if (base_string_type::mDataFlags & DataFlags::REFCOUNTED) { + SetToEmptyBuffer(); + } + } + + protected: + void AssertValid() { + MOZ_DIAGNOSTIC_ASSERT(!(this->mClassFlags & ClassFlags::INVALID_MASK)); + MOZ_DIAGNOSTIC_ASSERT(!(this->mDataFlags & DataFlags::INVALID_MASK)); + MOZ_ASSERT(!(this->mClassFlags & ClassFlags::NULL_TERMINATED) || + (this->mDataFlags & DataFlags::TERMINATED), + "String classes whose static type guarantees a null-terminated " + "buffer must not be assigned a non-null-terminated buffer."); + } + + public: + /** + * this is public to support automatic conversion of tuple to string + * base type, which helps avoid converting to nsTAString. + */ + MOZ_IMPLICIT nsTSubstring(const substring_tuple_type& aTuple) + : base_string_type(nullptr, 0, DataFlags(0), ClassFlags(0)) { + AssertValid(); + Assign(aTuple); + } + + size_t SizeOfExcludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const; + size_t SizeOfIncludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const; + + /** + * WARNING: Only use these functions if you really know what you are + * doing, because they can easily lead to double-counting strings. If + * you do use them, please explain clearly in a comment why it's safe + * and won't lead to double-counting. + */ + size_t SizeOfExcludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const; + size_t SizeOfIncludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const; + + template <class N> + void NS_ABORT_OOM(T) { + struct never {}; // a compiler-friendly way to do static_assert(false) + static_assert( + std::is_same_v<N, never>, + "In string classes, use AllocFailed to account for sizeof(char_type). " + "Use the global ::NS_ABORT_OOM if you really have a count of bytes."); + } + + MOZ_ALWAYS_INLINE void AllocFailed(size_t aLength) { + ::NS_ABORT_OOM(aLength * sizeof(char_type)); + } + + protected: + // default initialization + nsTSubstring() + : base_string_type(char_traits::sEmptyBuffer, 0, DataFlags::TERMINATED, + ClassFlags(0)) { + AssertValid(); + } + + // copy-constructor, constructs as dependent on given object + // (NOTE: this is for internal use only) + nsTSubstring(const self_type& aStr) + : base_string_type(aStr.base_string_type::mData, + aStr.base_string_type::mLength, + aStr.base_string_type::mDataFlags & + (DataFlags::TERMINATED | DataFlags::VOIDED), + ClassFlags(0)) { + AssertValid(); + } + + // initialization with ClassFlags + explicit nsTSubstring(ClassFlags aClassFlags) + : base_string_type(char_traits::sEmptyBuffer, 0, DataFlags::TERMINATED, + aClassFlags) { + AssertValid(); + } + + /** + * allows for direct initialization of a nsTSubstring object. + */ + nsTSubstring(char_type* aData, size_type aLength, DataFlags aDataFlags, + ClassFlags aClassFlags) +#if defined(NS_BUILD_REFCNT_LOGGING) +# define XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE + ; +#else +# undef XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE + : base_string_type(aData, aLength, aDataFlags, aClassFlags) { + AssertValid(); + } +#endif /* NS_BUILD_REFCNT_LOGGING */ + + void SetToEmptyBuffer() { + base_string_type::mData = char_traits::sEmptyBuffer; + base_string_type::mLength = 0; + base_string_type::mDataFlags = DataFlags::TERMINATED; + AssertValid(); + } + + void SetData(char_type* aData, LengthStorage aLength, DataFlags aDataFlags) { + base_string_type::mData = aData; + base_string_type::mLength = aLength; + base_string_type::mDataFlags = aDataFlags; + AssertValid(); + } + + /** + * this function releases mData and does not change the value of + * any of its member variables. in other words, this function acts + * like a destructor. + */ + void NS_FASTCALL Finalize(); + + public: + /** + * Starts a low-level write transaction to the string. + * + * Prepares the string for mutation such that the capacity + * of the string is at least aCapacity. The returned handle + * exposes the actual, potentially larger, capacity. + * + * If meeting the capacity or mutability requirement requires + * reallocation, aPrefixToPreserve code units are copied from the + * start of the old buffer to the start of the new buffer. + * aPrefixToPreserve must not be greater than the string's current + * length or greater than aCapacity. + * + * aAllowShrinking indicates whether an allocation may be + * performed when the string is already mutable and the requested + * capacity is smaller than the current capacity. + * + * If this method returns successfully, you must not access + * the string except through the returned BulkWriteHandle + * until either the BulkWriteHandle goes out of scope or + * you call Finish() on the BulkWriteHandle. + * + * Compared to SetLength() and BeginWriting(), this more + * complex API accomplishes two things: + * 1) It exposes the actual capacity which may be larger + * than the requested capacity, which is useful in some + * multi-step write operations that don't allocate for + * the worst case up front. + * 2) It writes the zero terminator after the string + * content has been written, which results in a + * cache-friendly linear write pattern. + */ + mozilla::Result<mozilla::BulkWriteHandle<T>, nsresult> NS_FASTCALL BulkWrite( + size_type aCapacity, size_type aPrefixToPreserve, bool aAllowShrinking); + + /** + * THIS IS NOT REALLY A PUBLIC METHOD! DO NOT CALL FROM OUTSIDE + * THE STRING IMPLEMENTATION. (It's public only because friend + * declarations don't allow extern or static and this needs to + * be called from Rust FFI glue.) + * + * Prepares mData to be mutated such that the capacity of the string + * (not counting the zero-terminator) is at least aCapacity. + * Returns the actual capacity, which may be larger than what was + * requested or Err(NS_ERROR_OUT_OF_MEMORY) on allocation failure. + * + * mLength is ignored by this method. If the buffer is reallocated, + * aUnitsToPreserve specifies how many code units to copy over to + * the new buffer. The old buffer is freed if applicable. + * + * Unless the return value is Err(NS_ERROR_OUT_OF_MEMORY) to signal + * failure or 0 to signal that the string has been set to + * the special empty state, this method leaves the string in an + * invalid state! The caller is responsible for calling + * FinishBulkWrite() (or in Rust calling + * nsA[C]StringBulkWriteHandle::finish()), which put the string + * into a valid state by setting mLength and zero-terminating. + * This method sets the flag to claim that the string is + * zero-terminated before it actually is. + * + * Once this method has been called and before FinishBulkWrite() + * has been called, only accessing mData or calling this method + * again are valid operations. Do not call any other methods or + * access other fields between calling this method and + * FinishBulkWrite(). + * + * @param aCapacity The requested capacity. The return value + * will be greater than or equal to this value. + * @param aPrefixToPreserve The number of code units at the start + * of the old buffer to copy into the + * new buffer. + * @parem aAllowShrinking If true, an allocation may be performed + * if the requested capacity is smaller + * than the current capacity. + * @param aSuffixLength The length, in code units, of a suffix + * to move. + * @param aOldSuffixStart The old start index of the suffix to + * move. + * @param aNewSuffixStart The new start index of the suffix to + * move. + * + */ + mozilla::Result<size_type, nsresult> NS_FASTCALL StartBulkWriteImpl( + size_type aCapacity, size_type aPrefixToPreserve = 0, + bool aAllowShrinking = true, size_type aSuffixLength = 0, + size_type aOldSuffixStart = 0, size_type aNewSuffixStart = 0); + + private: + void AssignOwned(self_type&& aStr); + bool AssignNonDependent(const substring_tuple_type& aTuple, + size_type aTupleLength, + const mozilla::fallible_t& aFallible); + + /** + * Do not call this except from within FinishBulkWriteImpl() and + * SetCapacity(). + */ + MOZ_ALWAYS_INLINE void NS_FASTCALL + FinishBulkWriteImplImpl(LengthStorage aLength) { + base_string_type::mData[aLength] = char_type(0); + base_string_type::mLength = aLength; +#ifdef DEBUG + // ifdefed in order to avoid the call to Capacity() in non-debug + // builds. + // + // Our string is mutable, so Capacity() doesn't return zero. + // Capacity() doesn't include the space for the zero terminator, + // but we want to unitialize that slot, too. Since we start + // counting after the zero terminator the we just wrote above, + // we end up overwriting the space for terminator not reflected + // in the capacity number. + char_traits::uninitialize( + base_string_type::mData + aLength + 1, + XPCOM_MIN(size_t(Capacity() - aLength), kNsStringBufferMaxPoison)); +#endif + } + + protected: + /** + * Restores the string to a valid state after a call to StartBulkWrite() + * that returned a non-error result. The argument to this method + * must be less than or equal to the value returned by the most recent + * StartBulkWrite() call. + */ + void NS_FASTCALL FinishBulkWriteImpl(size_type aLength); + + /** + * this function prepares a section of mData to be modified. if + * necessary, this function will reallocate mData and possibly move + * existing data to open up the specified section. + * + * @param aCutStart specifies the starting offset of the section + * @param aCutLength specifies the length of the section to be replaced + * @param aNewLength specifies the length of the new section + * + * for example, suppose mData contains the string "abcdef" then + * + * ReplacePrep(2, 3, 4); + * + * would cause mData to look like "ab____f" where the characters + * indicated by '_' have an unspecified value and can be freely + * modified. this function will null-terminate mData upon return. + * + * this function returns false if is unable to allocate sufficient + * memory. + */ + [[nodiscard]] bool ReplacePrep(index_type aCutStart, size_type aCutLength, + size_type aNewLength); + + [[nodiscard]] bool NS_FASTCALL ReplacePrepInternal(index_type aCutStart, + size_type aCutLength, + size_type aNewFragLength, + size_type aNewTotalLength); + + /** + * returns the number of writable storage units starting at mData. + * the value does not include space for the null-terminator character. + * + * NOTE: this function returns 0 if mData is immutable (or the buffer + * is 0-sized). + */ + size_type NS_FASTCALL Capacity() const; + + /** + * this helper function can be called prior to directly manipulating + * the contents of mData. see, for example, BeginWriting. + */ + [[nodiscard]] bool NS_FASTCALL + EnsureMutable(size_type aNewLen = size_type(-1)); + + void NS_FASTCALL ReplaceLiteral(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength); + + public: + // NOTE: this method is declared public _only_ for convenience for + // callers who don't have access to the original nsLiteralString_CharT. + void NS_FASTCALL AssignLiteral(const char_type* aData, size_type aLength); +}; + +extern template class nsTSubstring<char>; +extern template class nsTSubstring<char16_t>; + +static_assert(sizeof(nsTSubstring<char>) == + sizeof(mozilla::detail::nsTStringRepr<char>), + "Don't add new data fields to nsTSubstring_CharT. " + "Add to nsTStringRepr<T> instead."); + +#include "nsCharSeparatedTokenizer.h" +#include "nsTDependentSubstring.h" + +/** + * Span integration + */ +namespace mozilla { +Span(const nsTSubstring<char>&)->Span<const char>; +Span(const nsTSubstring<char16_t>&)->Span<const char16_t>; + +} // namespace mozilla + +#endif diff --git a/xpcom/string/nsTSubstringTuple.cpp b/xpcom/string/nsTSubstringTuple.cpp new file mode 100644 index 0000000000..3219cb19fa --- /dev/null +++ b/xpcom/string/nsTSubstringTuple.cpp @@ -0,0 +1,92 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTSubstringTuple.h" +#include "mozilla/CheckedInt.h" + +/** + * computes the aggregate string length + */ + +template <typename T> +typename nsTSubstringTuple<T>::size_type nsTSubstringTuple<T>::Length() const { + mozilla::CheckedInt<size_type> len; + if (mHead) { + len = mHead->Length(); + } else { + len = mFragA->Length(); + } + + len += mFragB->Length(); + MOZ_RELEASE_ASSERT(len.isValid(), "Substring tuple length is invalid"); + return len.value(); +} + +/** + * writes the aggregate string to the given buffer. aBufLen is assumed + * to be equal to or greater than the value returned by the Length() + * method. the string written to |aBuf| is not null-terminated. + */ + +template <typename T> +void nsTSubstringTuple<T>::WriteTo(char_type* aBuf, size_type aBufLen) const { + MOZ_RELEASE_ASSERT(aBufLen >= mFragB->Length(), "buffer too small"); + size_type headLen = aBufLen - mFragB->Length(); + if (mHead) { + mHead->WriteTo(aBuf, headLen); + } else { + MOZ_RELEASE_ASSERT(mFragA->Length() == headLen, "buffer incorrectly sized"); + char_traits::copy(aBuf, mFragA->Data(), mFragA->Length()); + } + + char_traits::copy(aBuf + headLen, mFragB->Data(), mFragB->Length()); +} + +/** + * returns true if this tuple is dependent on (i.e., overlapping with) + * the given char sequence. + */ + +template <typename T> +bool nsTSubstringTuple<T>::IsDependentOn(const char_type* aStart, + const char_type* aEnd) const { + // we start with the right-most fragment since it is faster to check. + + if (mFragB->IsDependentOn(aStart, aEnd)) { + return true; + } + + if (mHead) { + return mHead->IsDependentOn(aStart, aEnd); + } + + return mFragA->IsDependentOn(aStart, aEnd); +} + +template <typename T> +auto nsTSubstringTuple<T>::IsDependentOnWithLength(const char_type* aStart, + const char_type* aEnd) const + -> std::pair<bool, size_type> { + // we start with the right-most fragment since it is faster to check for + // dependency. + const bool rightDependentOn = mFragB->IsDependentOn(aStart, aEnd); + + if (rightDependentOn) { + return {true, Length()}; + } + + const auto [leftDependentOn, leftLen] = + mHead ? mHead->IsDependentOnWithLength(aStart, aEnd) + : std::pair{mFragA->IsDependentOn(aStart, aEnd), mFragA->Length()}; + + const auto checkedLen = + mozilla::CheckedInt<size_type>{leftLen} + mFragB->Length(); + MOZ_RELEASE_ASSERT(checkedLen.isValid(), "Substring tuple length is invalid"); + return {leftDependentOn, checkedLen.value()}; +} + +template class nsTSubstringTuple<char>; +template class nsTSubstringTuple<char16_t>; diff --git a/xpcom/string/nsTSubstringTuple.h b/xpcom/string/nsTSubstringTuple.h new file mode 100644 index 0000000000..dff8dedca9 --- /dev/null +++ b/xpcom/string/nsTSubstringTuple.h @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsTSubstringTuple_h +#define nsTSubstringTuple_h + +#include "mozilla/Attributes.h" +#include "nsTStringRepr.h" + +/** + * nsTSubstringTuple + * + * Represents a tuple of string fragments. Built as a recursive binary tree. + * It is used to implement the concatenation of two or more string objects. + * + * NOTE: This class is a private implementation detail and should never be + * referenced outside the string code. + */ +template <typename T> +class MOZ_TEMPORARY_CLASS nsTSubstringTuple { + public: + typedef T char_type; + typedef nsCharTraits<char_type> char_traits; + + typedef nsTSubstringTuple<T> self_type; + typedef mozilla::detail::nsTStringRepr<char_type> base_string_type; + typedef size_t size_type; + + public: + nsTSubstringTuple(const base_string_type* aStrA, + const base_string_type* aStrB) + : mHead(nullptr), mFragA(aStrA), mFragB(aStrB) {} + + nsTSubstringTuple(const self_type& aHead, const base_string_type* aStrB) + : mHead(&aHead), + mFragA(nullptr), // this fragment is ignored when aHead != nullptr + mFragB(aStrB) {} + + /** + * computes the aggregate string length + */ + size_type Length() const; + + /** + * writes the aggregate string to the given buffer. bufLen is assumed + * to be equal to or greater than the value returned by the Length() + * method. the string written to |buf| is not null-terminated. + */ + void WriteTo(char_type* aBuf, size_type aBufLen) const; + + /** + * returns true if this tuple is dependent on (i.e., overlapping with) + * the given char sequence. + */ + bool IsDependentOn(const char_type* aStart, const char_type* aEnd) const; + + /** + * returns a pair of the results of IsDependentOn() and Length(). This is more + * efficient than calling both functions subsequently, as this traverses the + * tree only once. + */ + std::pair<bool, size_type> IsDependentOnWithLength( + const char_type* aStart, const char_type* aEnd) const; + + private: + const self_type* const mHead; + const base_string_type* const mFragA; + const base_string_type* const mFragB; +}; + +template <typename T> +inline const nsTSubstringTuple<T> operator+( + const mozilla::detail::nsTStringRepr<T>& aStrA, + const mozilla::detail::nsTStringRepr<T>& aStrB) { + return nsTSubstringTuple<T>(&aStrA, &aStrB); +} + +template <typename T> +inline const nsTSubstringTuple<T> operator+( + const nsTSubstringTuple<T>& aHead, + const mozilla::detail::nsTStringRepr<T>& aStrB) { + return nsTSubstringTuple<T>(aHead, &aStrB); +} + +#endif diff --git a/xpcom/string/nsTextFormatter.cpp b/xpcom/string/nsTextFormatter.cpp new file mode 100644 index 0000000000..4db5338e2b --- /dev/null +++ b/xpcom/string/nsTextFormatter.cpp @@ -0,0 +1,895 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Portable safe sprintf code. + * + * Code based on mozilla/nsprpub/src/io/prprf.c rev 3.7 + * + * Contributor(s): + * Kipp E.B. Hickman <kipp@netscape.com> (original author) + * Frank Yung-Fong Tang <ftang@netscape.com> + * Daniele Nicolodi <daniele@grinta.net> + */ + +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include "prdtoa.h" +#include "mozilla/Logging.h" +#include "mozilla/Sprintf.h" +#include "nsCRTGlue.h" +#include "nsTextFormatter.h" + +struct nsTextFormatter::SprintfStateStr { + int (*stuff)(SprintfStateStr* aState, const char16_t* aStr, uint32_t aLen); + + char16_t* base; + char16_t* cur; + uint32_t maxlen; + + void* stuffclosure; +}; + +#define _LEFT 0x1 +#define _SIGNED 0x2 +#define _SPACED 0x4 +#define _ZEROS 0x8 +#define _NEG 0x10 +#define _UNSIGNED 0x20 + +#define ELEMENTS_OF(array_) (sizeof(array_) / sizeof(array_[0])) + +/* +** Fill into the buffer using the data in src +*/ +int nsTextFormatter::fill2(SprintfStateStr* aState, const char16_t* aSrc, + int aSrcLen, int aWidth, int aFlags) { + char16_t space = ' '; + int rv; + + aWidth -= aSrcLen; + /* Right adjusting */ + if ((aWidth > 0) && ((aFlags & _LEFT) == 0)) { + if (aFlags & _ZEROS) { + space = '0'; + } + while (--aWidth >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + } + + /* Copy out the source data */ + rv = (*aState->stuff)(aState, aSrc, aSrcLen); + if (rv < 0) { + return rv; + } + + /* Left adjusting */ + if ((aWidth > 0) && ((aFlags & _LEFT) != 0)) { + while (--aWidth >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + } + return 0; +} + +/* +** Fill a number. The order is: optional-sign zero-filling conversion-digits +*/ +int nsTextFormatter::fill_n(nsTextFormatter::SprintfStateStr* aState, + const char16_t* aSrc, int aSrcLen, int aWidth, + int aPrec, int aFlags) { + int zerowidth = 0; + int precwidth = 0; + int signwidth = 0; + int leftspaces = 0; + int rightspaces = 0; + int cvtwidth; + int rv; + char16_t sign; + char16_t space = ' '; + char16_t zero = '0'; + + if ((aFlags & _UNSIGNED) == 0) { + if (aFlags & _NEG) { + sign = '-'; + signwidth = 1; + } else if (aFlags & _SIGNED) { + sign = '+'; + signwidth = 1; + } else if (aFlags & _SPACED) { + sign = ' '; + signwidth = 1; + } + } + cvtwidth = signwidth + aSrcLen; + + if (aPrec > 0) { + if (aPrec > aSrcLen) { + /* Need zero filling */ + precwidth = aPrec - aSrcLen; + cvtwidth += precwidth; + } + } + + if ((aFlags & _ZEROS) && (aPrec < 0)) { + if (aWidth > cvtwidth) { + /* Zero filling */ + zerowidth = aWidth - cvtwidth; + cvtwidth += zerowidth; + } + } + + if (aFlags & _LEFT) { + if (aWidth > cvtwidth) { + /* Space filling on the right (i.e. left adjusting) */ + rightspaces = aWidth - cvtwidth; + } + } else { + if (aWidth > cvtwidth) { + /* Space filling on the left (i.e. right adjusting) */ + leftspaces = aWidth - cvtwidth; + } + } + while (--leftspaces >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + if (signwidth) { + rv = (*aState->stuff)(aState, &sign, 1); + if (rv < 0) { + return rv; + } + } + while (--precwidth >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + while (--zerowidth >= 0) { + rv = (*aState->stuff)(aState, &zero, 1); + if (rv < 0) { + return rv; + } + } + rv = (*aState->stuff)(aState, aSrc, aSrcLen); + if (rv < 0) { + return rv; + } + while (--rightspaces >= 0) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + return 0; +} + +/* +** Convert a 64-bit integer into its printable form +*/ +int nsTextFormatter::cvt_ll(SprintfStateStr* aState, uint64_t aNum, int aWidth, + int aPrec, int aRadix, int aFlags, + const char16_t* aHexStr) { + char16_t cvtbuf[100]; + char16_t* cvt; + int digits; + + /* according to the man page this needs to happen */ + if (aPrec == 0 && aNum == 0) { + return 0; + } + + /* + ** Converting decimal is a little tricky. In the unsigned case we + ** need to stop when we hit 10 digits. In the signed case, we can + ** stop when the number is zero. + */ + cvt = &cvtbuf[0] + ELEMENTS_OF(cvtbuf); + digits = 0; + while (aNum != 0) { + uint64_t quot = aNum / aRadix; + uint64_t rem = aNum % aRadix; + *--cvt = aHexStr[rem & 0xf]; + digits++; + aNum = quot; + } + if (digits == 0) { + *--cvt = '0'; + digits++; + } + + /* + ** Now that we have the number converted without its sign, deal with + ** the sign and zero padding. + */ + return fill_n(aState, cvt, digits, aWidth, aPrec, aFlags); +} + +/* +** Convert a double precision floating point number into its printable +** form. +*/ +int nsTextFormatter::cvt_f(SprintfStateStr* aState, double aDouble, int aWidth, + int aPrec, const char16_t aType, int aFlags) { + int mode = 2; + int decpt; + int sign; + char buf[256]; + char* bufp = buf; + int bufsz = 256; + char num[256]; + char* nump; + char* endnum; + int numdigits = 0; + char exp = 'e'; + + if (aPrec == -1) { + aPrec = 6; + } else if (aPrec > 50) { + // limit precision to avoid PR_dtoa bug 108335 + // and to prevent buffers overflows + aPrec = 50; + } + + switch (aType) { + case 'f': + numdigits = aPrec; + mode = 3; + break; + case 'E': + exp = 'E'; + [[fallthrough]]; + case 'e': + numdigits = aPrec + 1; + mode = 2; + break; + case 'G': + exp = 'E'; + [[fallthrough]]; + case 'g': + if (aPrec == 0) { + aPrec = 1; + } + numdigits = aPrec; + mode = 2; + break; + default: + NS_ERROR("invalid aType passed to cvt_f"); + } + + if (PR_dtoa(aDouble, mode, numdigits, &decpt, &sign, &endnum, num, bufsz) == + PR_FAILURE) { + buf[0] = '\0'; + return -1; + } + numdigits = endnum - num; + nump = num; + + if (sign) { + *bufp++ = '-'; + } else if (aFlags & _SIGNED) { + *bufp++ = '+'; + } + + if (decpt == 9999) { + while ((*bufp++ = *nump++)) { + } + } else { + switch (aType) { + case 'E': + case 'e': + + *bufp++ = *nump++; + if (aPrec > 0) { + *bufp++ = '.'; + while (*nump) { + *bufp++ = *nump++; + aPrec--; + } + while (aPrec-- > 0) { + *bufp++ = '0'; + } + } + *bufp++ = exp; + + ::snprintf(bufp, bufsz - (bufp - buf), "%+03d", decpt - 1); + break; + + case 'f': + + if (decpt < 1) { + *bufp++ = '0'; + if (aPrec > 0) { + *bufp++ = '.'; + while (decpt++ && aPrec-- > 0) { + *bufp++ = '0'; + } + while (*nump && aPrec-- > 0) { + *bufp++ = *nump++; + } + while (aPrec-- > 0) { + *bufp++ = '0'; + } + } + } else { + while (*nump && decpt-- > 0) { + *bufp++ = *nump++; + } + while (decpt-- > 0) { + *bufp++ = '0'; + } + if (aPrec > 0) { + *bufp++ = '.'; + while (*nump && aPrec-- > 0) { + *bufp++ = *nump++; + } + while (aPrec-- > 0) { + *bufp++ = '0'; + } + } + } + *bufp = '\0'; + break; + + case 'G': + case 'g': + + if ((decpt < -3) || ((decpt - 1) >= aPrec)) { + *bufp++ = *nump++; + numdigits--; + if (numdigits > 0) { + *bufp++ = '.'; + while (*nump) { + *bufp++ = *nump++; + } + } + *bufp++ = exp; + ::snprintf(bufp, bufsz - (bufp - buf), "%+03d", decpt - 1); + } else { + if (decpt < 1) { + *bufp++ = '0'; + if (aPrec > 0) { + *bufp++ = '.'; + while (decpt++) { + *bufp++ = '0'; + } + while (*nump) { + *bufp++ = *nump++; + } + } + } else { + while (*nump && decpt-- > 0) { + *bufp++ = *nump++; + numdigits--; + } + while (decpt-- > 0) { + *bufp++ = '0'; + } + if (numdigits > 0) { + *bufp++ = '.'; + while (*nump) { + *bufp++ = *nump++; + } + } + } + *bufp = '\0'; + } + } + } + + char16_t rbuf[256]; + char16_t* rbufp = rbuf; + bufp = buf; + // cast to char16_t + while ((*rbufp++ = *bufp++)) { + } + *rbufp = '\0'; + + return fill2(aState, rbuf, NS_strlen(rbuf), aWidth, aFlags); +} + +/* +** Convert a string into its printable form. |aWidth| is the output +** width. |aPrec| is the maximum number of characters of |aStr| to output, +** where -1 means until NUL. +*/ +int nsTextFormatter::cvt_S(SprintfStateStr* aState, const char16_t* aStr, + int aWidth, int aPrec, int aFlags) { + int slen; + + if (aPrec == 0) { + return 0; + } + + /* Limit string length by precision value */ + slen = aStr ? NS_strlen(aStr) : 6; + if (aPrec > 0) { + if (aPrec < slen) { + slen = aPrec; + } + } + + /* and away we go */ + return fill2(aState, aStr ? aStr : u"(null)", slen, aWidth, aFlags); +} + +/* +** Convert a string into its printable form. |aWidth| is the output +** width. |aPrec| is the maximum number of characters of |aStr| to output, +** where -1 means until NUL. +*/ +int nsTextFormatter::cvt_s(nsTextFormatter::SprintfStateStr* aState, + const char* aStr, int aWidth, int aPrec, + int aFlags) { + // Be sure to handle null the same way as %S. + if (aStr == nullptr) { + return cvt_S(aState, nullptr, aWidth, aPrec, aFlags); + } + NS_ConvertUTF8toUTF16 utf16Val(aStr); + return cvt_S(aState, utf16Val.get(), aWidth, aPrec, aFlags); +} + +/* +** The workhorse sprintf code. +*/ +int nsTextFormatter::dosprintf(SprintfStateStr* aState, const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues) { + static const char16_t space = ' '; + static const char16_t hex[] = u"0123456789abcdef"; + static const char16_t HEX[] = u"0123456789ABCDEF"; + static const BoxedValue emptyString(u""); + + char16_t c; + int flags, width, prec, radix; + + const char16_t* hexp; + + // Next argument for non-numbered arguments. + size_t nextNaturalArg = 0; + // True if we ever saw a numbered argument. + bool sawNumberedArg = false; + + while ((c = *aFmt++) != 0) { + int rv; + + if (c != '%') { + rv = (*aState->stuff)(aState, aFmt - 1, 1); + if (rv < 0) { + return rv; + } + continue; + } + + // Save the location of the "%" in case we decide it isn't a + // format and want to just emit the text from the format string. + const char16_t* percentPointer = aFmt - 1; + + /* + ** Gobble up the % format string. Hopefully we have handled all + ** of the strange cases! + */ + flags = 0; + c = *aFmt++; + if (c == '%') { + /* quoting a % with %% */ + rv = (*aState->stuff)(aState, aFmt - 1, 1); + if (rv < 0) { + return rv; + } + continue; + } + + // Check for a numbered argument. + bool sawWidth = false; + const BoxedValue* thisArg = nullptr; + if (c >= '0' && c <= '9') { + size_t argNumber = 0; + while (c && c >= '0' && c <= '9') { + argNumber = (argNumber * 10) + (c - '0'); + c = *aFmt++; + } + + if (c == '$') { + // Mixing numbered arguments and implicit arguments is + // disallowed. + if (nextNaturalArg > 0) { + return -1; + } + + c = *aFmt++; + + // Numbered arguments start at 1. + --argNumber; + if (argNumber >= aValues.Length()) { + // A correctness issue but not a safety issue. + MOZ_ASSERT(false); + thisArg = &emptyString; + } else { + thisArg = &aValues[argNumber]; + } + sawNumberedArg = true; + } else { + width = argNumber; + sawWidth = true; + } + } + + if (!sawWidth) { + /* + * Examine optional flags. Note that we do not implement the + * '#' flag of sprintf(). The ANSI C spec. of the '#' flag is + * somewhat ambiguous and not ideal, which is perhaps why + * the various sprintf() implementations are inconsistent + * on this feature. + */ + while ((c == '-') || (c == '+') || (c == ' ') || (c == '0')) { + if (c == '-') { + flags |= _LEFT; + } + if (c == '+') { + flags |= _SIGNED; + } + if (c == ' ') { + flags |= _SPACED; + } + if (c == '0') { + flags |= _ZEROS; + } + c = *aFmt++; + } + if (flags & _SIGNED) { + flags &= ~_SPACED; + } + if (flags & _LEFT) { + flags &= ~_ZEROS; + } + + /* width */ + if (c == '*') { + // Not supported with numbered arguments. + if (sawNumberedArg) { + return -1; + } + + if (nextNaturalArg >= aValues.Length() || + !aValues[nextNaturalArg].IntCompatible()) { + // A correctness issue but not a safety issue. + MOZ_ASSERT(false); + width = 0; + } else { + width = aValues[nextNaturalArg++].mValue.mInt; + } + c = *aFmt++; + } else { + width = 0; + while ((c >= '0') && (c <= '9')) { + width = (width * 10) + (c - '0'); + c = *aFmt++; + } + } + } + + /* precision */ + prec = -1; + if (c == '.') { + c = *aFmt++; + if (c == '*') { + // Not supported with numbered arguments. + if (sawNumberedArg) { + return -1; + } + + if (nextNaturalArg >= aValues.Length() || + !aValues[nextNaturalArg].IntCompatible()) { + // A correctness issue but not a safety issue. + MOZ_ASSERT(false); + } else { + prec = aValues[nextNaturalArg++].mValue.mInt; + } + c = *aFmt++; + } else { + prec = 0; + while ((c >= '0') && (c <= '9')) { + prec = (prec * 10) + (c - '0'); + c = *aFmt++; + } + } + } + + // If the argument isn't known yet, find it now. This is done + // after the width and precision code, in case '*' was used. + if (thisArg == nullptr) { + // Mixing numbered arguments and implicit arguments is + // disallowed. + if (sawNumberedArg) { + return -1; + } + + if (nextNaturalArg >= aValues.Length()) { + // A correctness issue but not a safety issue. + MOZ_ASSERT(false); + thisArg = &emptyString; + } else { + thisArg = &aValues[nextNaturalArg++]; + } + } + + /* Size. Defaults to 32 bits. */ + uint64_t mask = UINT32_MAX; + if (c == 'h') { + c = *aFmt++; + mask = UINT16_MAX; + } else if (c == 'L') { + c = *aFmt++; + mask = UINT64_MAX; + } else if (c == 'l') { + c = *aFmt++; + if (c == 'l') { + c = *aFmt++; + mask = UINT64_MAX; + } else { + mask = UINT32_MAX; + } + } + + /* format */ + hexp = hex; + radix = 10; + // Several `MOZ_ASSERT`s below check for argument compatibility + // with the format specifier. These are only debug assertions, + // not release assertions, and exist to catch problems in C++ + // callers of `nsTextFormatter`, as we do not have compile-time + // checking of format strings. In release mode, these assertions + // will be no-ops, and we will fall through to printing the + // argument based on the known type of the argument. + switch (c) { + case 'd': + case 'i': /* decimal/integer */ + MOZ_ASSERT(thisArg->IntCompatible()); + break; + + case 'o': /* octal */ + MOZ_ASSERT(thisArg->IntCompatible()); + radix = 8; + flags |= _UNSIGNED; + break; + + case 'u': /* unsigned decimal */ + MOZ_ASSERT(thisArg->IntCompatible()); + radix = 10; + flags |= _UNSIGNED; + break; + + case 'x': /* unsigned hex */ + MOZ_ASSERT(thisArg->IntCompatible()); + radix = 16; + flags |= _UNSIGNED; + break; + + case 'X': /* unsigned HEX */ + MOZ_ASSERT(thisArg->IntCompatible()); + radix = 16; + hexp = HEX; + flags |= _UNSIGNED; + break; + + case 'e': + case 'E': + case 'f': + case 'g': + case 'G': + MOZ_ASSERT(thisArg->mKind == DOUBLE); + // Type-based printing below. + break; + + case 'S': + case 's': + MOZ_ASSERT(thisArg->mKind == STRING || thisArg->mKind == STRING16); + // Type-based printing below. + break; + + case 'c': { + if (!thisArg->IntCompatible()) { + MOZ_ASSERT(false); + // Type-based printing below. + break; + } + + if ((flags & _LEFT) == 0) { + while (width-- > 1) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + } + char16_t ch = thisArg->mValue.mInt; + rv = (*aState->stuff)(aState, &ch, 1); + if (rv < 0) { + return rv; + } + if (flags & _LEFT) { + while (width-- > 1) { + rv = (*aState->stuff)(aState, &space, 1); + if (rv < 0) { + return rv; + } + } + } + } + continue; + + case 'p': + if (!thisArg->PointerCompatible()) { + MOZ_ASSERT(false); + break; + } + static_assert(sizeof(uint64_t) >= sizeof(void*), + "pointers are larger than 64 bits"); + rv = cvt_ll(aState, uintptr_t(thisArg->mValue.mPtr), width, prec, 16, + flags | _UNSIGNED, hexp); + if (rv < 0) { + return rv; + } + continue; + + case 'n': + if (thisArg->mKind != INTPOINTER) { + return -1; + } + + if (thisArg->mValue.mIntPtr != nullptr) { + *thisArg->mValue.mIntPtr = aState->cur - aState->base; + } + continue; + + default: + /* Not a % token after all... skip it */ + rv = (*aState->stuff)(aState, percentPointer, aFmt - percentPointer); + if (rv < 0) { + return rv; + } + continue; + } + + // If we get here, we want to handle the argument according to its + // actual type; modified by the flags as appropriate. + switch (thisArg->mKind) { + case INT: + case UINT: { + int64_t val = thisArg->mValue.mInt; + if ((flags & _UNSIGNED) == 0 && val < 0) { + val = -val; + flags |= _NEG; + } + rv = cvt_ll(aState, uint64_t(val) & mask, width, prec, radix, flags, + hexp); + } break; + case INTPOINTER: + case POINTER: + // Always treat these as unsigned hex, no matter the format. + static_assert(sizeof(uint64_t) >= sizeof(void*), + "pointers are larger than 64 bits"); + rv = cvt_ll(aState, uintptr_t(thisArg->mValue.mPtr), width, prec, 16, + flags | _UNSIGNED, hexp); + break; + case DOUBLE: + if (c != 'f' && c != 'E' && c != 'e' && c != 'G' && c != 'g') { + // Pick some default. + c = 'g'; + } + rv = cvt_f(aState, thisArg->mValue.mDouble, width, prec, c, flags); + break; + case STRING: + rv = cvt_s(aState, thisArg->mValue.mString, width, prec, flags); + break; + case STRING16: + rv = cvt_S(aState, thisArg->mValue.mString16, width, prec, flags); + break; + default: + // Can't happen. + MOZ_ASSERT(0); + } + + if (rv < 0) { + return rv; + } + } + + return 0; +} + +/************************************************************************/ + +int nsTextFormatter::StringStuff(nsTextFormatter::SprintfStateStr* aState, + const char16_t* aStr, uint32_t aLen) { + ptrdiff_t off = aState->cur - aState->base; + + nsAString* str = static_cast<nsAString*>(aState->stuffclosure); + str->Append(aStr, aLen); + + aState->base = str->BeginWriting(); + aState->cur = aState->base + off; + + return 0; +} + +void nsTextFormatter::vssprintf(nsAString& aOut, const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues) { + SprintfStateStr ss; + ss.stuff = StringStuff; + ss.base = 0; + ss.cur = 0; + ss.maxlen = 0; + ss.stuffclosure = &aOut; + + aOut.Truncate(); + dosprintf(&ss, aFmt, aValues); +} + +/* +** Stuff routine that discards overflow data +*/ +int nsTextFormatter::LimitStuff(SprintfStateStr* aState, const char16_t* aStr, + uint32_t aLen) { + uint32_t limit = aState->maxlen - (aState->cur - aState->base); + + if (aLen > limit) { + aLen = limit; + } + while (aLen) { + --aLen; + *aState->cur++ = *aStr++; + } + return 0; +} + +uint32_t nsTextFormatter::vsnprintf(char16_t* aOut, uint32_t aOutLen, + const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues) { + SprintfStateStr ss; + + MOZ_ASSERT((int32_t)aOutLen > 0); + if ((int32_t)aOutLen <= 0) { + return 0; + } + + ss.stuff = LimitStuff; + ss.base = aOut; + ss.cur = aOut; + ss.maxlen = aOutLen; + int result = dosprintf(&ss, aFmt, aValues); + + if (ss.cur == ss.base) { + return 0; + } + + // Append a NUL. However, be sure not to count it in the returned + // length. + if (ss.cur - ss.base >= ptrdiff_t(ss.maxlen)) { + --ss.cur; + } + *ss.cur = '\0'; + + // Check the result now, so that an unterminated string can't + // possibly escape. + if (result < 0) { + return -1; + } + + return ss.cur - ss.base; +} diff --git a/xpcom/string/nsTextFormatter.h b/xpcom/string/nsTextFormatter.h new file mode 100644 index 0000000000..f571043da2 --- /dev/null +++ b/xpcom/string/nsTextFormatter.h @@ -0,0 +1,172 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This code was copied from xpcom/ds/nsTextFormatter r1.3 + * Memory model and Frozen linkage changes only. + * -- Prasad <prasad@medhas.org> + */ + +#ifndef nsTextFormatter_h___ +#define nsTextFormatter_h___ + +/* + ** API for PR printf like routines. Supports the following formats + ** %d - decimal + ** %u - unsigned decimal + ** %x - unsigned hex + ** %X - unsigned uppercase hex + ** %o - unsigned octal + ** %hd, %hu, %hx, %hX, %ho - 16-bit versions of above + ** %ld, %lu, %lx, %lX, %lo - 32-bit versions of above + ** %lld, %llu, %llx, %llX, %llo - 64 bit versions of above + ** %s - utf8 string + ** %S - char16_t string + ** %c - character + ** %p - pointer (deals with machine dependent pointer size) + ** %f - float + ** %g - float + */ +#include <stdio.h> +#include <stdarg.h> +#include "nscore.h" +#include "nsString.h" +#include "mozilla/Span.h" + +#ifdef XPCOM_GLUE +# error \ + "nsTextFormatter is not available in the standalone glue due to NSPR dependencies." +#endif + +class nsTextFormatter { + public: + /* + * sprintf into a fixed size buffer. Guarantees that the buffer is null + * terminated. Returns the length of the written output, NOT including the + * null terminator, or (uint32_t)-1 if an error occurs. + */ + template <typename... T> + static uint32_t snprintf(char16_t* aOut, uint32_t aOutLen, + const char16_t* aFmt, T... aArgs) { + BoxedValue values[] = {BoxedValue(aArgs)...}; + return vsnprintf(aOut, aOutLen, aFmt, + mozilla::Span(values, sizeof...(aArgs))); + } + + /* + * sprintf into an existing nsAString, overwriting any contents it already + * has. Infallible. + */ + template <typename... T> + static void ssprintf(nsAString& aOut, const char16_t* aFmt, T... aArgs) { + BoxedValue values[] = {BoxedValue(aArgs)...}; + vssprintf(aOut, aFmt, mozilla::Span(values, sizeof...(aArgs))); + } + + private: + enum ArgumentKind { + INT, + UINT, + POINTER, + DOUBLE, + STRING, + STRING16, + INTPOINTER, + }; + + union ValueUnion { + int64_t mInt; + uint64_t mUInt; + void const* mPtr; + double mDouble; + char const* mString; + char16_t const* mString16; + int* mIntPtr; + }; + + struct BoxedValue { + ArgumentKind mKind; + ValueUnion mValue; + + explicit BoxedValue(int aArg) : mKind(INT) { mValue.mInt = aArg; } + + explicit BoxedValue(unsigned int aArg) : mKind(UINT) { + mValue.mUInt = aArg; + } + + explicit BoxedValue(long aArg) : mKind(INT) { mValue.mInt = aArg; } + + explicit BoxedValue(unsigned long aArg) : mKind(UINT) { + mValue.mUInt = aArg; + } + + explicit BoxedValue(long long aArg) : mKind(INT) { mValue.mInt = aArg; } + + explicit BoxedValue(unsigned long long aArg) : mKind(UINT) { + mValue.mUInt = aArg; + } + + explicit BoxedValue(const void* aArg) : mKind(POINTER) { + mValue.mPtr = aArg; + } + + explicit BoxedValue(double aArg) : mKind(DOUBLE) { mValue.mDouble = aArg; } + + explicit BoxedValue(const char* aArg) : mKind(STRING) { + mValue.mString = aArg; + } + + explicit BoxedValue(const char16_t* aArg) : mKind(STRING16) { + mValue.mString16 = aArg; + } + +#if defined(MOZ_USE_CHAR16_WRAPPER) + explicit BoxedValue(const char16ptr_t aArg) : mKind(STRING16) { + mValue.mString16 = aArg; + } + +#endif + + explicit BoxedValue(int* aArg) : mKind(INTPOINTER) { + mValue.mIntPtr = aArg; + } + + bool IntCompatible() const { return mKind == INT || mKind == UINT; } + + bool PointerCompatible() const { + return mKind == POINTER || mKind == STRING || mKind == STRING16 || + mKind == INTPOINTER; + } + }; + + struct SprintfStateStr; + + static int fill2(SprintfStateStr* aState, const char16_t* aSrc, int aSrcLen, + int aWidth, int aFlags); + static int fill_n(SprintfStateStr* aState, const char16_t* aSrc, int aSrcLen, + int aWidth, int aPrec, int aFlags); + static int cvt_ll(SprintfStateStr* aState, uint64_t aNum, int aWidth, + int aPrec, int aRadix, int aFlags, const char16_t* aHexStr); + static int cvt_f(SprintfStateStr* aState, double aDouble, int aWidth, + int aPrec, const char16_t aType, int aFlags); + static int cvt_S(SprintfStateStr* aState, const char16_t* aStr, int aWidth, + int aPrec, int aFlags); + static int cvt_s(SprintfStateStr* aState, const char* aStr, int aWidth, + int aPrec, int aFlags); + static int dosprintf(SprintfStateStr* aState, const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues); + static int StringStuff(SprintfStateStr* aState, const char16_t* aStr, + uint32_t aLen); + static int LimitStuff(SprintfStateStr* aState, const char16_t* aStr, + uint32_t aLen); + static uint32_t vsnprintf(char16_t* aOut, uint32_t aOutLen, + const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues); + static void vssprintf(nsAString& aOut, const char16_t* aFmt, + mozilla::Span<BoxedValue> aValues); +}; + +#endif /* nsTextFormatter_h___ */ diff --git a/xpcom/string/nsUTF8Utils.h b/xpcom/string/nsUTF8Utils.h new file mode 100644 index 0000000000..0145011ec1 --- /dev/null +++ b/xpcom/string/nsUTF8Utils.h @@ -0,0 +1,247 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef nsUTF8Utils_h_ +#define nsUTF8Utils_h_ + +// NB: This code may be used from non-XPCOM code, in particular, the +// standalone updater executable. That is, this file may be used in +// two ways: if MOZILLA_INTERNAL_API is defined, this file will +// provide signatures for the Mozilla abstract string types. It will +// use XPCOM assertion/debugging macros, etc. + +#include <type_traits> + +#include "mozilla/Assertions.h" +#include "mozilla/EndianUtils.h" + +#include "nsCharTraits.h" + +#ifdef MOZILLA_INTERNAL_API +# define UTF8UTILS_WARNING(msg) NS_WARNING(msg) +#else +# define UTF8UTILS_WARNING(msg) +#endif + +class UTF8traits { + public: + static bool isASCII(char aChar) { return (aChar & 0x80) == 0x00; } + static bool isInSeq(char aChar) { return (aChar & 0xC0) == 0x80; } + static bool is2byte(char aChar) { return (aChar & 0xE0) == 0xC0; } + static bool is3byte(char aChar) { return (aChar & 0xF0) == 0xE0; } + static bool is4byte(char aChar) { return (aChar & 0xF8) == 0xF0; } + static bool is5byte(char aChar) { return (aChar & 0xFC) == 0xF8; } + static bool is6byte(char aChar) { return (aChar & 0xFE) == 0xFC; } + // return the number of bytes in a sequence beginning with aChar + static int bytes(char aChar) { + if (isASCII(aChar)) { + return 1; + } + if (is2byte(aChar)) { + return 2; + } + if (is3byte(aChar)) { + return 3; + } + if (is4byte(aChar)) { + return 4; + } + MOZ_ASSERT_UNREACHABLE("should not be used for in-sequence characters"); + return 1; + } +}; + +/** + * Extract the next Unicode scalar value from the buffer and return it. The + * pointer passed in is advanced to the start of the next character in the + * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced + * over the maximal valid prefix and *aErr is set to true (if aErr is not + * null). + * + * Note: This method never sets *aErr to false to allow error accumulation + * across multiple calls. + * + * Precondition: *aBuffer < aEnd + */ +class UTF8CharEnumerator { + public: + static inline char32_t NextChar(const char** aBuffer, const char* aEnd, + bool* aErr = nullptr) { + MOZ_ASSERT(aBuffer, "null buffer pointer pointer"); + MOZ_ASSERT(aEnd, "null end pointer"); + + const unsigned char* p = reinterpret_cast<const unsigned char*>(*aBuffer); + const unsigned char* end = reinterpret_cast<const unsigned char*>(aEnd); + + MOZ_ASSERT(p, "null buffer"); + MOZ_ASSERT(p < end, "Bogus range"); + + unsigned char first = *p; + ++p; + + if (MOZ_LIKELY(first < 0x80U)) { + *aBuffer = reinterpret_cast<const char*>(p); + return first; + } + + // Unsigned underflow is defined behavior + if (MOZ_UNLIKELY((p == end) || ((first - 0xC2U) >= (0xF5U - 0xC2U)))) { + *aBuffer = reinterpret_cast<const char*>(p); + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } + + unsigned char second = *p; + + if (first < 0xE0U) { + // Two-byte + if (MOZ_LIKELY((second & 0xC0U) == 0x80U)) { + ++p; + *aBuffer = reinterpret_cast<const char*>(p); + return ((uint32_t(first) & 0x1FU) << 6) | (uint32_t(second) & 0x3FU); + } + *aBuffer = reinterpret_cast<const char*>(p); + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } + + if (MOZ_LIKELY(first < 0xF0U)) { + // Three-byte + unsigned char lower = 0x80U; + unsigned char upper = 0xBFU; + if (first == 0xE0U) { + lower = 0xA0U; + } else if (first == 0xEDU) { + upper = 0x9FU; + } + if (MOZ_LIKELY(second >= lower && second <= upper)) { + ++p; + if (MOZ_LIKELY(p != end)) { + unsigned char third = *p; + if (MOZ_LIKELY((third & 0xC0U) == 0x80U)) { + ++p; + *aBuffer = reinterpret_cast<const char*>(p); + return ((uint32_t(first) & 0xFU) << 12) | + ((uint32_t(second) & 0x3FU) << 6) | + (uint32_t(third) & 0x3FU); + } + } + } + *aBuffer = reinterpret_cast<const char*>(p); + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } + + // Four-byte + unsigned char lower = 0x80U; + unsigned char upper = 0xBFU; + if (first == 0xF0U) { + lower = 0x90U; + } else if (first == 0xF4U) { + upper = 0x8FU; + } + if (MOZ_LIKELY(second >= lower && second <= upper)) { + ++p; + if (MOZ_LIKELY(p != end)) { + unsigned char third = *p; + if (MOZ_LIKELY((third & 0xC0U) == 0x80U)) { + ++p; + if (MOZ_LIKELY(p != end)) { + unsigned char fourth = *p; + if (MOZ_LIKELY((fourth & 0xC0U) == 0x80U)) { + ++p; + *aBuffer = reinterpret_cast<const char*>(p); + return ((uint32_t(first) & 0x7U) << 18) | + ((uint32_t(second) & 0x3FU) << 12) | + ((uint32_t(third) & 0x3FU) << 6) | + (uint32_t(fourth) & 0x3FU); + } + } + } + } + } + *aBuffer = reinterpret_cast<const char*>(p); + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } +}; + +/** + * Extract the next Unicode scalar value from the buffer and return it. The + * pointer passed in is advanced to the start of the next character in the + * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced over + * the unpaired surrogate and *aErr is set to true (if aErr is not null). + * + * Note: This method never sets *aErr to false to allow error accumulation + * across multiple calls. + * + * Precondition: *aBuffer < aEnd + */ +class UTF16CharEnumerator { + public: + static inline char32_t NextChar(const char16_t** aBuffer, + const char16_t* aEnd, bool* aErr = nullptr) { + MOZ_ASSERT(aBuffer, "null buffer pointer pointer"); + MOZ_ASSERT(aEnd, "null end pointer"); + + const char16_t* p = *aBuffer; + + MOZ_ASSERT(p, "null buffer"); + MOZ_ASSERT(p < aEnd, "Bogus range"); + + char16_t c = *p++; + + // Let's use encoding_rs-style code golf here. + // Unsigned underflow is defined behavior + char16_t cMinusSurrogateStart = c - 0xD800U; + if (MOZ_LIKELY(cMinusSurrogateStart > (0xDFFFU - 0xD800U))) { + *aBuffer = p; + return c; + } + if (MOZ_LIKELY(cMinusSurrogateStart <= (0xDBFFU - 0xD800U))) { + // High surrogate + if (MOZ_LIKELY(p != aEnd)) { + char16_t second = *p; + // Unsigned underflow is defined behavior + if (MOZ_LIKELY((second - 0xDC00U) <= (0xDFFFU - 0xDC00U))) { + *aBuffer = ++p; + return (uint32_t(c) << 10) + uint32_t(second) - + (((0xD800U << 10) - 0x10000U) + 0xDC00U); + } + } + } + // Unpaired surrogate + *aBuffer = p; + if (aErr) { + *aErr = true; + } + return 0xFFFDU; + } +}; + +template <typename Char, typename UnsignedT> +inline UnsignedT RewindToPriorUTF8Codepoint(const Char* utf8Chars, + UnsignedT index) { + static_assert(std::is_same_v<Char, char> || + std::is_same_v<Char, unsigned char> || + std::is_same_v<Char, signed char>, + "UTF-8 data must be in 8-bit units"); + static_assert(std::is_unsigned_v<UnsignedT>, "index type must be unsigned"); + while (index > 0 && (utf8Chars[index] & 0xC0) == 0x80) --index; + + return index; +} + +#undef UTF8UTILS_WARNING + +#endif /* !defined(nsUTF8Utils_h_) */ |