diff options
Diffstat (limited to '')
-rw-r--r-- | xpcom/string/nsReadableUtils.cpp | 630 |
1 files changed, 630 insertions, 0 deletions
diff --git a/xpcom/string/nsReadableUtils.cpp b/xpcom/string/nsReadableUtils.cpp new file mode 100644 index 0000000000..fa4c4bc69b --- /dev/null +++ b/xpcom/string/nsReadableUtils.cpp @@ -0,0 +1,630 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsReadableUtils.h" + +#include <algorithm> + +#include "mozilla/CheckedInt.h" +#include "mozilla/Utf8.h" + +#include "nscore.h" +#include "nsString.h" +#include "nsTArray.h" +#include "nsUTF8Utils.h" + +using mozilla::Span; + +/** + * A helper function that allocates a buffer of the desired character type big + * enough to hold a copy of the supplied string (plus a zero terminator). + * + * @param aSource an string you will eventually be making a copy of + * @return a new buffer which you must free with |free|. + * + */ +template <class FromStringT, class CharT> +inline CharT* AllocateStringCopy(const FromStringT& aSource, CharT*) { + return static_cast<CharT*>( + malloc((size_t(aSource.Length()) + 1) * sizeof(CharT))); +} + +char* ToNewCString(const nsAString& aSource) { + char* str = ToNewCString(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char* ToNewCString(const nsAString& aSource, + const mozilla::fallible_t& aFallible) { + char* dest = AllocateStringCopy(aSource, (char*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + LossyConvertUtf16toLatin1(aSource, Span(dest, len)); + dest[len] = 0; + return dest; +} + +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count, + const mozilla::fallible_t& aFallible) { + auto len = aSource.Length(); + // The uses of this function seem temporary enough that it's not + // worthwhile to be fancy about the allocation size. Let's just use + // the worst case. + // Times 3 plus 1, because ConvertUTF16toUTF8 requires times 3 and + // then we have the terminator. + // Using CheckedInt<uint32_t>, because aUTF8Count is uint32_t* for + // historical reasons. + mozilla::CheckedInt<uint32_t> destLen(len); + destLen *= 3; + destLen += 1; + if (!destLen.isValid()) { + return nullptr; + } + size_t destLenVal = destLen.value(); + char* dest = static_cast<char*>(malloc(destLenVal)); + if (!dest) { + return nullptr; + } + + size_t written = ConvertUtf16toUtf8(aSource, Span(dest, destLenVal)); + dest[written] = 0; + + if (aUTF8Count) { + *aUTF8Count = written; + } + + return dest; +} + +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) { + char* str = ToNewUTF8String(aSource, aUTF8Count, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char* ToNewCString(const nsACString& aSource) { + char* str = ToNewCString(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char* ToNewCString(const nsACString& aSource, + const mozilla::fallible_t& aFallible) { + // no conversion needed, just allocate a buffer of the correct length and copy + // into it + + char* dest = AllocateStringCopy(aSource, (char*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + memcpy(dest, aSource.BeginReading(), len * sizeof(char)); + dest[len] = 0; + return dest; +} + +char16_t* ToNewUnicode(const nsAString& aSource) { + char16_t* str = ToNewUnicode(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char16_t* ToNewUnicode(const nsAString& aSource, + const mozilla::fallible_t& aFallible) { + // no conversion needed, just allocate a buffer of the correct length and copy + // into it + + char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t)); + dest[len] = 0; + return dest; +} + +char16_t* ToNewUnicode(const nsACString& aSource) { + char16_t* str = ToNewUnicode(aSource, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char16_t* ToNewUnicode(const nsACString& aSource, + const mozilla::fallible_t& aFallible) { + char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr); + if (!dest) { + return nullptr; + } + + auto len = aSource.Length(); + ConvertLatin1toUtf16(aSource, Span(dest, len)); + dest[len] = 0; + return dest; +} + +char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count, + const mozilla::fallible_t& aFallible) { + // Compute length plus one as required by ConvertUTF8toUTF16 + uint32_t lengthPlusOne = aSource.Length() + 1; // Can't overflow + + mozilla::CheckedInt<size_t> allocLength(lengthPlusOne); + // Add space for zero-termination + allocLength += 1; + // We need UTF-16 units + allocLength *= sizeof(char16_t); + + if (!allocLength.isValid()) { + return nullptr; + } + + char16_t* dest = (char16_t*)malloc(allocLength.value()); + if (!dest) { + return nullptr; + } + + size_t written = ConvertUtf8toUtf16(aSource, Span(dest, lengthPlusOne)); + dest[written] = 0; + + if (aUTF16Count) { + *aUTF16Count = written; + } + + return dest; +} + +char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) { + char16_t* str = UTF8ToNewUnicode(aSource, aUTF16Count, mozilla::fallible); + if (!str) { + MOZ_CRASH("Unable to allocate memory"); + } + return str; +} + +char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, + char16_t* aDest, uint32_t aLength) { + MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length()); + memcpy(aDest, aSource.BeginReading() + aSrcOffset, + size_t(aLength) * sizeof(char16_t)); + return aDest; +} + +void ToUpperCase(nsACString& aCString) { + char* cp = aCString.BeginWriting(); + char* end = cp + aCString.Length(); + while (cp != end) { + char ch = *cp; + if (ch >= 'a' && ch <= 'z') { + *cp = ch - ('a' - 'A'); + } + ++cp; + } +} + +void ToUpperCase(const nsACString& aSource, nsACString& aDest) { + aDest.SetLength(aSource.Length()); + const char* src = aSource.BeginReading(); + const char* end = src + aSource.Length(); + char* dst = aDest.BeginWriting(); + while (src != end) { + char ch = *src; + if (ch >= 'a' && ch <= 'z') { + *dst = ch - ('a' - 'A'); + } else { + *dst = ch; + } + ++src; + ++dst; + } +} + +void ToLowerCase(nsACString& aCString) { + char* cp = aCString.BeginWriting(); + char* end = cp + aCString.Length(); + while (cp != end) { + char ch = *cp; + if (ch >= 'A' && ch <= 'Z') { + *cp = ch + ('a' - 'A'); + } + ++cp; + } +} + +void ToLowerCase(const nsACString& aSource, nsACString& aDest) { + aDest.SetLength(aSource.Length()); + const char* src = aSource.BeginReading(); + const char* end = src + aSource.Length(); + char* dst = aDest.BeginWriting(); + while (src != end) { + char ch = *src; + if (ch >= 'A' && ch <= 'Z') { + *dst = ch + ('a' - 'A'); + } else { + *dst = ch; + } + ++src; + ++dst; + } +} + +void ParseString(const nsACString& aSource, char aDelimiter, + nsTArray<nsCString>& aArray) { + nsACString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + + for (;;) { + nsACString::const_iterator delimiter = start; + FindCharInReadable(aDelimiter, delimiter, end); + + if (delimiter != start) { + aArray.AppendElement(Substring(start, delimiter)); + } + + if (delimiter == end) { + break; + } + start = ++delimiter; + if (start == end) { + break; + } + } +} + +template <class StringT, class IteratorT> +bool FindInReadable_Impl( + const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, + nsTStringComparator<typename StringT::char_type> aCompare) { + bool found_it = false; + + // only bother searching at all if we're given a non-empty range to search + if (aSearchStart != aSearchEnd) { + IteratorT aPatternStart, aPatternEnd; + aPattern.BeginReading(aPatternStart); + aPattern.EndReading(aPatternEnd); + + // outer loop keeps searching till we find it or run out of string to search + while (!found_it) { + // fast inner loop (that's what it's called, not what it is) looks for a + // potential match + while (aSearchStart != aSearchEnd && + aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) { + ++aSearchStart; + } + + // if we broke out of the `fast' loop because we're out of string ... + // we're done: no match + if (aSearchStart == aSearchEnd) { + break; + } + + // otherwise, we're at a potential match, let's see if we really hit one + IteratorT testPattern(aPatternStart); + IteratorT testSearch(aSearchStart); + + // slow inner loop verifies the potential match (found by the `fast' loop) + // at the current position + for (;;) { + // we already compared the first character in the outer loop, + // so we'll advance before the next comparison + ++testPattern; + ++testSearch; + + // if we verified all the way to the end of the pattern, then we found + // it! + if (testPattern == aPatternEnd) { + found_it = true; + aSearchEnd = testSearch; // return the exact found range through the + // parameters + break; + } + + // if we got to end of the string we're searching before we hit the end + // of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchEnd) { + aSearchStart = aSearchEnd; + break; + } + + // else if we mismatched ... it's time to advance to the next search + // position + // and get back into the `fast' loop + if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) { + ++aSearchStart; + break; + } + } + } + } + + return found_it; +} + +/** + * This searches the entire string from right to left, and returns the first + * match found, if any. + */ +template <class StringT, class IteratorT> +bool RFindInReadable_Impl( + const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, + nsTStringComparator<typename StringT::char_type> aCompare) { + IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; + aPattern.BeginReading(patternStart); + aPattern.EndReading(patternEnd); + + // Point to the last character in the pattern + --patternEnd; + // outer loop keeps searching till we run out of string to search + while (aSearchStart != searchEnd) { + // Point to the end position of the next possible match + --searchEnd; + + // Check last character, if a match, explore further from here + if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) { + // We're at a potential match, let's see if we really hit one + IteratorT testPattern(patternEnd); + IteratorT testSearch(searchEnd); + + // inner loop verifies the potential match at the current position + do { + // if we verified all the way to the end of the pattern, then we found + // it! + if (testPattern == patternStart) { + aSearchStart = testSearch; // point to start of match + aSearchEnd = ++searchEnd; // point to end of match + return true; + } + + // if we got to end of the string we're searching before we hit the end + // of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchStart) { + aSearchStart = aSearchEnd; + return false; + } + + // test previous character for a match + --testPattern; + --testSearch; + } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0); + } + } + + aSearchStart = aSearchEnd; + return false; +} + +bool FindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + nsStringComparator aComparator) { + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool FindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + nsCStringComparator aComparator) { + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool CaseInsensitiveFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd) { + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, + nsCaseInsensitiveCStringComparator); +} + +bool RFindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + const nsStringComparator aComparator) { + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool RFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + const nsCStringComparator aComparator) { + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, + const nsAString::const_iterator& aSearchEnd) { + ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char16_t* charFoundAt = + nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, + const nsACString::const_iterator& aSearchEnd) { + ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char* charFoundAt = + nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator aComparator) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator aComparator) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, + nsStringComparator aComparator) { + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len) + .Equals(aSubstring, aComparator); +} + +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, + nsCStringComparator aComparator) { + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len) + .Equals(aSubstring, aComparator); +} + +static const char16_t empty_buffer[1] = {'\0'}; + +const nsString& EmptyString() { + static const nsDependentString sEmpty(empty_buffer); + + return sEmpty; +} + +const nsCString& EmptyCString() { + static const nsDependentCString sEmpty((const char*)empty_buffer); + + return sEmpty; +} + +const nsString& VoidString() { + static const nsString sNull(mozilla::detail::StringDataFlags::VOIDED); + + return sNull; +} + +const nsCString& VoidCString() { + static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED); + + return sNull; +} + +int32_t CompareUTF8toUTF16(const nsACString& aUTF8String, + const nsAString& aUTF16String, bool* aErr) { + const char* u8; + const char* u8end; + aUTF8String.BeginReading(u8); + aUTF8String.EndReading(u8end); + + const char16_t* u16; + const char16_t* u16end; + aUTF16String.BeginReading(u16); + aUTF16String.EndReading(u16end); + + for (;;) { + if (u8 == u8end) { + if (u16 == u16end) { + return 0; + } + return -1; + } + if (u16 == u16end) { + return 1; + } + // No need for ASCII optimization, since both NextChar() + // calls get inlined. + uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr); + uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr); + if (scalar16 == scalar8) { + continue; + } + if (scalar8 < scalar16) { + return -1; + } + return 1; + } +} + +void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) { + NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); + if (IS_IN_BMP(aSource)) { + aDest.Append(char16_t(aSource)); + } else { + aDest.Append(H_SURROGATE(aSource)); + aDest.Append(L_SURROGATE(aSource)); + } +} |