diff options
Diffstat (limited to '')
-rw-r--r-- | xpcom/string/nsReadableUtils.h | 610 |
1 files changed, 610 insertions, 0 deletions
diff --git a/xpcom/string/nsReadableUtils.h b/xpcom/string/nsReadableUtils.h new file mode 100644 index 0000000000..803c6b5d2f --- /dev/null +++ b/xpcom/string/nsReadableUtils.h @@ -0,0 +1,610 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsReadableUtils_h___ +#define nsReadableUtils_h___ + +/** + * I guess all the routines in this file are all mis-named. + * According to our conventions, they should be |NS_xxx|. + */ + +#include "mozilla/Assertions.h" +#include "nsAString.h" +#include "mozilla/TextUtils.h" + +#include "nsTArrayForwardDeclare.h" + +// From the nsstring crate +extern "C" { +bool nsstring_fallible_append_utf8_impl(nsAString* aThis, const char* aOther, + size_t aOtherLen, size_t aOldLen); + +bool nsstring_fallible_append_latin1_impl(nsAString* aThis, const char* aOther, + size_t aOtherLen, size_t aOldLen, + bool aAllowShrinking); + +bool nscstring_fallible_append_utf16_to_utf8_impl(nsACString* aThis, + const char16_t*, + size_t aOtherLen, + size_t aOldLen); + +bool nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString* aThis, + const char16_t*, + size_t aOtherLen, + size_t aOldLen, + bool aAllowShrinking); + +bool nscstring_fallible_append_utf8_to_latin1_lossy_check( + nsACString* aThis, const nsACString* aOther, size_t aOldLen); + +bool nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis, + const nsACString* aOther, + size_t aOldLen); +} + +inline size_t Distance(const nsReadingIterator<char16_t>& aStart, + const nsReadingIterator<char16_t>& aEnd) { + MOZ_ASSERT(aStart.get() <= aEnd.get()); + return static_cast<size_t>(aEnd.get() - aStart.get()); +} + +inline size_t Distance(const nsReadingIterator<char>& aStart, + const nsReadingIterator<char>& aEnd) { + MOZ_ASSERT(aStart.get() <= aEnd.get()); + return static_cast<size_t>(aEnd.get() - aStart.get()); +} + +// NOTE: Operations that don't need an operand to be an XPCOM string +// are in mozilla/TextUtils.h and mozilla/Utf8.h. + +// UTF-8 to UTF-16 +// Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER. + +[[nodiscard]] inline bool CopyUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(), + aSource.Length(), 0); +} + +inline void CopyUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!CopyUTF8toUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_utf8_impl(&aDest, aSource.Elements(), + aSource.Length(), aDest.Length()); +} + +inline void AppendUTF8toUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// Latin1 to UTF-16 +// Interpret each incoming unsigned byte value as a Unicode scalar value (not +// windows-1252!). The function names say "ASCII" instead of "Latin1" for +// legacy reasons. + +[[nodiscard]] inline bool CopyASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_latin1_impl(&aDest, aSource.Elements(), + aSource.Length(), 0, true); +} + +inline void CopyASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest, + const mozilla::fallible_t&) { + return nsstring_fallible_append_latin1_impl( + &aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false); +} + +inline void AppendASCIItoUTF16(mozilla::Span<const char> aSource, + nsAString& aDest) { + if (MOZ_UNLIKELY(!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// UTF-16 to UTF-8 +// Unpaired surrogates are replaced with the REPLACEMENT CHARACTER. + +[[nodiscard]] inline bool CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_utf8_impl( + &aDest, aSource.Elements(), aSource.Length(), 0); +} + +inline void CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendUTF16toUTF8( + mozilla::Span<const char16_t> aSource, nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_utf8_impl( + &aDest, aSource.Elements(), aSource.Length(), aDest.Length()); +} + +inline void AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// UTF-16 to Latin1 +// If all code points in the input are below U+0100, represents each scalar +// value as an unsigned byte. (This is not windows-1252!) If there are code +// points above U+00FF, memory-safely produces garbage and will likely start +// asserting in future debug builds. The nature of the garbage may differ +// based on CPU architecture and must not be relied upon. The names say +// "ASCII" instead of "Latin1" for legacy reasons. + +[[nodiscard]] inline bool LossyCopyUTF16toASCII( + mozilla::Span<const char16_t> aSource, nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_latin1_lossy_impl( + &aDest, aSource.Elements(), aSource.Length(), 0, true); +} + +inline void LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!LossyCopyUTF16toASCII(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool LossyAppendUTF16toASCII( + mozilla::Span<const char16_t> aSource, nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf16_to_latin1_lossy_impl( + &aDest, aSource.Elements(), aSource.Length(), aDest.Length(), false); +} + +inline void LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY( + !LossyAppendUTF16toASCII(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// Latin1 to UTF-8 +// Interpret each incoming unsigned byte value as a Unicode scalar value (not +// windows-1252!). +// If the input is ASCII, the heap-allocated nsStringBuffer is shared if +// possible. + +[[nodiscard]] inline bool CopyLatin1toUTF8(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource, 0); +} + +inline void CopyLatin1toUTF8(const nsACString& aSource, nsACString& aDest) { + if (MOZ_UNLIKELY(!CopyLatin1toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool AppendLatin1toUTF8(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_latin1_to_utf8_check(&aDest, &aSource, + aDest.Length()); +} + +inline void AppendLatin1toUTF8(const nsACString& aSource, nsACString& aDest) { + if (MOZ_UNLIKELY(!AppendLatin1toUTF8(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +// UTF-8 to Latin1 +// If all code points in the input are below U+0100, represents each scalar +// value as an unsigned byte. (This is not windows-1252!) If there are code +// points above U+00FF, memory-safely produces garbage in release builds and +// asserts in debug builds. The nature of the garbage may differ +// based on CPU architecture and must not be relied upon. +// If the input is ASCII, the heap-allocated nsStringBuffer is shared if +// possible. + +[[nodiscard]] inline bool LossyCopyUTF8toLatin1(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource, + 0); +} + +inline void LossyCopyUTF8toLatin1(const nsACString& aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY(!LossyCopyUTF8toLatin1(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aSource.Length()); + } +} + +[[nodiscard]] inline bool LossyAppendUTF8toLatin1(const nsACString& aSource, + nsACString& aDest, + const mozilla::fallible_t&) { + return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest, &aSource, + aDest.Length()); +} + +inline void LossyAppendUTF8toLatin1(const nsACString& aSource, + nsACString& aDest) { + if (MOZ_UNLIKELY( + !LossyAppendUTF8toLatin1(aSource, aDest, mozilla::fallible))) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. + * Performs a conversion with LossyConvertUTF16toLatin1() writing into the + * newly-allocated buffer. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a 16-bit wide string + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewCString(const nsAString& aSource); + +/* A fallible version of ToNewCString. Returns nullptr on failure. */ +char* ToNewCString(const nsAString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource an 8-bit wide string + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewCString(const nsACString& aSource); + +/* A fallible version of ToNewCString. Returns nullptr on failure. */ +char* ToNewCString(const nsACString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. + * Performs an encoding conversion from a UTF-16 string to a UTF-8 string with + * unpaired surrogates replaced with the REPLACEMENT CHARACTER copying + * |aSource| to your new buffer. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a UTF-16 string (made of char16_t's) + * @param aUTF8Count the number of 8-bit units that was returned + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr); + +/* A fallible version of ToNewUTF8String. Returns nullptr on failure. */ +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Infallibly allocates and returns a new |char16_t| buffer which you must + * free with |free|. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a UTF-16 string + * @return a new |char16_t| buffer you must free with |free|. + */ +char16_t* ToNewUnicode(const nsAString& aSource); + +/* A fallible version of ToNewUnicode. Returns nullptr on failure. */ +char16_t* ToNewUnicode(const nsAString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Infallibly allocates and returns a new |char16_t| buffer which you must + * free with|free|. + * + * Performs an encoding conversion by 0-padding 8-bit wide characters up to + * 16-bits wide (i.e. Latin1 to UTF-16 conversion) while copying |aSource| + * to your new buffer. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a Latin1 string + * @return a new |char16_t| buffer you must free with |free|. + */ +char16_t* ToNewUnicode(const nsACString& aSource); + +/* A fallible version of ToNewUnicode. Returns nullptr on failure. */ +char16_t* ToNewUnicode(const nsACString& aSource, + const mozilla::fallible_t& aFallible); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Infallibly allocates and returns a new |char| buffer which you must + * free with |free|. Performs an encoding conversion from UTF-8 to UTF-16 + * while copying |aSource| to your new buffer. Malformed byte sequences + * are replaced with the REPLACEMENT CHARACTER. + * + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource an 8-bit wide string, UTF-8 encoded + * @param aUTF16Count the number of 16-bit units that was returned + * @return a new |char16_t| buffer you must free with |free|. + * (UTF-16 encoded) + */ +char16_t* UTF8ToNewUnicode(const nsACString& aSource, + uint32_t* aUTF16Count = nullptr); + +/* A fallible version of UTF8ToNewUnicode. Returns nullptr on failure. */ +char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count, + const mozilla::fallible_t& aFallible); + +/** + * Copies |aLength| 16-bit code units from the start of |aSource| to the + * |char16_t| buffer |aDest|. + * + * After this operation |aDest| is not null terminated. + * + * @param aSource a UTF-16 string + * @param aSrcOffset start offset in the source string + * @param aDest a |char16_t| buffer + * @param aLength the number of 16-bit code units to copy + * @return pointer to destination buffer - identical to |aDest| + */ +char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, + char16_t* aDest, uint32_t aLength); + +/** + * Replaces unpaired surrogates with U+FFFD in the argument. + * + * Copies a shared string buffer or an otherwise read-only + * buffer only if there are unpaired surrogates. + */ +[[nodiscard]] inline bool EnsureUTF16Validity(nsAString& aString) { + size_t upTo = mozilla::Utf16ValidUpTo(aString); + size_t len = aString.Length(); + if (upTo == len) { + return true; + } + char16_t* ptr = aString.BeginWriting(mozilla::fallible); + if (!ptr) { + return false; + } + auto span = mozilla::Span(ptr, len); + span[upTo] = 0xFFFD; + mozilla::EnsureUtf16ValiditySpan(span.From(upTo + 1)); + return true; +} + +void ParseString(const nsACString& aSource, char aDelimiter, + nsTArray<nsCString>& aArray); + +namespace mozilla::detail { + +constexpr auto kStringJoinAppendDefault = + [](auto& aResult, const auto& aValue) { aResult.Append(aValue); }; + +} // namespace mozilla::detail + +/** + * Join a sequence of items, each optionally transformed to a string, with a + * given separator, appending to a given string. + * + * \tparam CharType char or char16_t + * \tparam InputRange a range usable with range-based for + * \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and + * an item of InputRange which appends the latter to the former + */ +template < + typename CharType, typename InputRange, + typename Func = const decltype(mozilla::detail::kStringJoinAppendDefault)&> +void StringJoinAppend( + nsTSubstring<CharType>& aOutput, + const nsTLiteralString<CharType>& aSeparator, const InputRange& aInputRange, + Func&& aFunc = mozilla::detail::kStringJoinAppendDefault) { + bool first = true; + for (const auto& item : aInputRange) { + if (first) { + first = false; + } else { + aOutput.Append(aSeparator); + } + + aFunc(aOutput, item); + } +} + +/** + * Join a sequence of items, each optionally transformed to a string, with a + * given separator, returning a new string. + * + * \tparam CharType char or char16_t + * \tparam InputRange a range usable with range-based for + * \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and + * an item of InputRange which appends the latter to the former + + */ +template < + typename CharType, typename InputRange, + typename Func = const decltype(mozilla::detail::kStringJoinAppendDefault)&> +auto StringJoin(const nsTLiteralString<CharType>& aSeparator, + const InputRange& aInputRange, + Func&& aFunc = mozilla::detail::kStringJoinAppendDefault) { + nsTAutoString<CharType> res; + StringJoinAppend(res, aSeparator, aInputRange, std::forward<Func>(aFunc)); + return res; +} + +/** + * Converts case in place in the argument string. + */ +void ToUpperCase(nsACString&); + +void ToLowerCase(nsACString&); + +void ToUpperCase(nsACString&); + +void ToLowerCase(nsACString&); + +/** + * Converts case from string aSource to aDest. + */ +void ToUpperCase(const nsACString& aSource, nsACString& aDest); + +void ToLowerCase(const nsACString& aSource, nsACString& aDest); + +/** + * Finds the leftmost occurrence of |aPattern|, if any in the range + * |aSearchStart|..|aSearchEnd|. + * + * Returns |true| if a match was found, and adjusts |aSearchStart| and + * |aSearchEnd| to point to the match. If no match was found, returns |false| + * and makes |aSearchStart == aSearchEnd|. + * + * Currently, this is equivalent to the O(m*n) implementation previously on + * |ns[C]String|. + * + * If we need something faster, then we can implement that later. + */ + +bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&, + nsAString::const_iterator&, + nsStringComparator = nsTDefaultStringComparator); +bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&, + nsACString::const_iterator&, + nsCStringComparator = nsTDefaultStringComparator); + +/* sometimes we don't care about where the string was, just that we + * found it or not */ +inline bool FindInReadable( + const nsAString& aPattern, const nsAString& aSource, + nsStringComparator aCompare = nsTDefaultStringComparator) { + nsAString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + return FindInReadable(aPattern, start, end, aCompare); +} + +inline bool FindInReadable( + const nsACString& aPattern, const nsACString& aSource, + nsCStringComparator aCompare = nsTDefaultStringComparator) { + nsACString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + return FindInReadable(aPattern, start, end, aCompare); +} + +bool CaseInsensitiveFindInReadable(const nsACString& aPattern, + nsACString::const_iterator&, + nsACString::const_iterator&); + +/** + * Finds the rightmost occurrence of |aPattern| + * Returns |true| if a match was found, and adjusts |aSearchStart| and + * |aSearchEnd| to point to the match. If no match was found, returns |false| + * and makes |aSearchStart == aSearchEnd|. + */ +bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&, + nsAString::const_iterator&, + nsStringComparator = nsTDefaultStringComparator); +bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&, + nsACString::const_iterator&, + nsCStringComparator = nsTDefaultStringComparator); + +/** + * Finds the leftmost occurrence of |aChar|, if any in the range + * |aSearchStart|..|aSearchEnd|. + * + * Returns |true| if a match was found, and adjusts |aSearchStart| to + * point to the match. If no match was found, returns |false| and + * makes |aSearchStart == aSearchEnd|. + */ +bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, + const nsAString::const_iterator& aSearchEnd); +bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, + const nsACString::const_iterator& aSearchEnd); + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring); +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator); +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring); +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator); +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring); +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator); +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring); +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator); + +const nsString& EmptyString(); +const nsCString& EmptyCString(); + +const nsString& VoidString(); +const nsCString& VoidCString(); + +/** + * Compare a UTF-8 string to an UTF-16 string. + * + * Returns 0 if the strings are equal, -1 if aUTF8String is less + * than aUTF16Count, and 1 in the reverse case. Errors are replaced + * with U+FFFD and then the U+FFFD is compared as if it had occurred + * in the input. If aErr is not nullptr, *aErr is set to true if + * either string had malformed sequences. + */ +int32_t CompareUTF8toUTF16(const nsACString& aUTF8String, + const nsAString& aUTF16String, bool* aErr = nullptr); + +void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest); + +#endif // !defined(nsReadableUtils_h___) |