/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsReadableUtils.h" #include #include "mozilla/CheckedInt.h" #include "mozilla/Utf8.h" #include "nscore.h" #include "nsString.h" #include "nsTArray.h" #include "nsUTF8Utils.h" using mozilla::Span; /** * A helper function that allocates a buffer of the desired character type big * enough to hold a copy of the supplied string (plus a zero terminator). * * @param aSource an string you will eventually be making a copy of * @return a new buffer which you must free with |free|. * */ template inline CharT* AllocateStringCopy(const FromStringT& aSource, CharT*) { return static_cast( malloc((size_t(aSource.Length()) + 1) * sizeof(CharT))); } char* ToNewCString(const nsAString& aSource) { char* str = ToNewCString(aSource, mozilla::fallible); if (!str) { MOZ_CRASH("Unable to allocate memory"); } return str; } char* ToNewCString(const nsAString& aSource, const mozilla::fallible_t& aFallible) { char* dest = AllocateStringCopy(aSource, (char*)nullptr); if (!dest) { return nullptr; } auto len = aSource.Length(); LossyConvertUtf16toLatin1(aSource, Span(dest, len)); dest[len] = 0; return dest; } char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count, const mozilla::fallible_t& aFallible) { auto len = aSource.Length(); // The uses of this function seem temporary enough that it's not // worthwhile to be fancy about the allocation size. Let's just use // the worst case. // Times 3 plus 1, because ConvertUTF16toUTF8 requires times 3 and // then we have the terminator. // Using CheckedInt, because aUTF8Count is uint32_t* for // historical reasons. mozilla::CheckedInt destLen(len); destLen *= 3; destLen += 1; if (!destLen.isValid()) { return nullptr; } size_t destLenVal = destLen.value(); char* dest = static_cast(malloc(destLenVal)); if (!dest) { return nullptr; } size_t written = ConvertUtf16toUtf8(aSource, Span(dest, destLenVal)); dest[written] = 0; if (aUTF8Count) { *aUTF8Count = written; } return dest; } char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) { char* str = ToNewUTF8String(aSource, aUTF8Count, mozilla::fallible); if (!str) { MOZ_CRASH("Unable to allocate memory"); } return str; } char* ToNewCString(const nsACString& aSource) { char* str = ToNewCString(aSource, mozilla::fallible); if (!str) { MOZ_CRASH("Unable to allocate memory"); } return str; } char* ToNewCString(const nsACString& aSource, const mozilla::fallible_t& aFallible) { // no conversion needed, just allocate a buffer of the correct length and copy // into it char* dest = AllocateStringCopy(aSource, (char*)nullptr); if (!dest) { return nullptr; } auto len = aSource.Length(); memcpy(dest, aSource.BeginReading(), len * sizeof(char)); dest[len] = 0; return dest; } char16_t* ToNewUnicode(const nsAString& aSource) { char16_t* str = ToNewUnicode(aSource, mozilla::fallible); if (!str) { MOZ_CRASH("Unable to allocate memory"); } return str; } char16_t* ToNewUnicode(const nsAString& aSource, const mozilla::fallible_t& aFallible) { // no conversion needed, just allocate a buffer of the correct length and copy // into it char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr); if (!dest) { return nullptr; } auto len = aSource.Length(); memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t)); dest[len] = 0; return dest; } char16_t* ToNewUnicode(const nsACString& aSource) { char16_t* str = ToNewUnicode(aSource, mozilla::fallible); if (!str) { MOZ_CRASH("Unable to allocate memory"); } return str; } char16_t* ToNewUnicode(const nsACString& aSource, const mozilla::fallible_t& aFallible) { char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr); if (!dest) { return nullptr; } auto len = aSource.Length(); ConvertLatin1toUtf16(aSource, Span(dest, len)); dest[len] = 0; return dest; } char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count, const mozilla::fallible_t& aFallible) { // Compute length plus one as required by ConvertUTF8toUTF16 uint32_t lengthPlusOne = aSource.Length() + 1; // Can't overflow mozilla::CheckedInt allocLength(lengthPlusOne); // Add space for zero-termination allocLength += 1; // We need UTF-16 units allocLength *= sizeof(char16_t); if (!allocLength.isValid()) { return nullptr; } char16_t* dest = (char16_t*)malloc(allocLength.value()); if (!dest) { return nullptr; } size_t written = ConvertUtf8toUtf16(aSource, Span(dest, lengthPlusOne)); dest[written] = 0; if (aUTF16Count) { *aUTF16Count = written; } return dest; } char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) { char16_t* str = UTF8ToNewUnicode(aSource, aUTF16Count, mozilla::fallible); if (!str) { MOZ_CRASH("Unable to allocate memory"); } return str; } char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, uint32_t aLength) { MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length()); memcpy(aDest, aSource.BeginReading() + aSrcOffset, size_t(aLength) * sizeof(char16_t)); return aDest; } void ToUpperCase(nsACString& aCString) { char* cp = aCString.BeginWriting(); char* end = cp + aCString.Length(); while (cp != end) { char ch = *cp; if (ch >= 'a' && ch <= 'z') { *cp = ch - ('a' - 'A'); } ++cp; } } void ToUpperCase(const nsACString& aSource, nsACString& aDest) { aDest.SetLength(aSource.Length()); const char* src = aSource.BeginReading(); const char* end = src + aSource.Length(); char* dst = aDest.BeginWriting(); while (src != end) { char ch = *src; if (ch >= 'a' && ch <= 'z') { *dst = ch - ('a' - 'A'); } else { *dst = ch; } ++src; ++dst; } } void ToLowerCase(nsACString& aCString) { char* cp = aCString.BeginWriting(); char* end = cp + aCString.Length(); while (cp != end) { char ch = *cp; if (ch >= 'A' && ch <= 'Z') { *cp = ch + ('a' - 'A'); } ++cp; } } void ToLowerCase(const nsACString& aSource, nsACString& aDest) { aDest.SetLength(aSource.Length()); const char* src = aSource.BeginReading(); const char* end = src + aSource.Length(); char* dst = aDest.BeginWriting(); while (src != end) { char ch = *src; if (ch >= 'A' && ch <= 'Z') { *dst = ch + ('a' - 'A'); } else { *dst = ch; } ++src; ++dst; } } void ParseString(const nsACString& aSource, char aDelimiter, nsTArray& aArray) { nsACString::const_iterator start, end; aSource.BeginReading(start); aSource.EndReading(end); for (;;) { nsACString::const_iterator delimiter = start; FindCharInReadable(aDelimiter, delimiter, end); if (delimiter != start) { aArray.AppendElement(Substring(start, delimiter)); } if (delimiter == end) { break; } start = ++delimiter; if (start == end) { break; } } } template bool FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, nsTStringComparator aCompare) { bool found_it = false; // only bother searching at all if we're given a non-empty range to search if (aSearchStart != aSearchEnd) { IteratorT aPatternStart, aPatternEnd; aPattern.BeginReading(aPatternStart); aPattern.EndReading(aPatternEnd); // outer loop keeps searching till we find it or run out of string to search while (!found_it) { // fast inner loop (that's what it's called, not what it is) looks for a // potential match while (aSearchStart != aSearchEnd && aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) { ++aSearchStart; } // if we broke out of the `fast' loop because we're out of string ... // we're done: no match if (aSearchStart == aSearchEnd) { break; } // otherwise, we're at a potential match, let's see if we really hit one IteratorT testPattern(aPatternStart); IteratorT testSearch(aSearchStart); // slow inner loop verifies the potential match (found by the `fast' loop) // at the current position for (;;) { // we already compared the first character in the outer loop, // so we'll advance before the next comparison ++testPattern; ++testSearch; // if we verified all the way to the end of the pattern, then we found // it! if (testPattern == aPatternEnd) { found_it = true; aSearchEnd = testSearch; // return the exact found range through the // parameters break; } // if we got to end of the string we're searching before we hit the end // of the // pattern, we'll never find what we're looking for if (testSearch == aSearchEnd) { aSearchStart = aSearchEnd; break; } // else if we mismatched ... it's time to advance to the next search // position // and get back into the `fast' loop if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) { ++aSearchStart; break; } } } } return found_it; } /** * This searches the entire string from right to left, and returns the first * match found, if any. */ template bool RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, nsTStringComparator aCompare) { IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; aPattern.BeginReading(patternStart); aPattern.EndReading(patternEnd); // Point to the last character in the pattern --patternEnd; // outer loop keeps searching till we run out of string to search while (aSearchStart != searchEnd) { // Point to the end position of the next possible match --searchEnd; // Check last character, if a match, explore further from here if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) { // We're at a potential match, let's see if we really hit one IteratorT testPattern(patternEnd); IteratorT testSearch(searchEnd); // inner loop verifies the potential match at the current position do { // if we verified all the way to the end of the pattern, then we found // it! if (testPattern == patternStart) { aSearchStart = testSearch; // point to start of match aSearchEnd = ++searchEnd; // point to end of match return true; } // if we got to end of the string we're searching before we hit the end // of the // pattern, we'll never find what we're looking for if (testSearch == aSearchStart) { aSearchStart = aSearchEnd; return false; } // test previous character for a match --testPattern; --testSearch; } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0); } } aSearchStart = aSearchEnd; return false; } bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, nsStringComparator aComparator) { return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); } bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, nsCStringComparator aComparator) { return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); } bool CaseInsensitiveFindInReadable(const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd) { return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator); } bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator aComparator) { return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); } bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator aComparator) { return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); } bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd) { ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); const char16_t* charFoundAt = nsCharTraits::find(aSearchStart.get(), fragmentLength, aChar); if (charFoundAt) { aSearchStart.advance(charFoundAt - aSearchStart.get()); return true; } aSearchStart.advance(fragmentLength); return false; } bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd) { ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); const char* charFoundAt = nsCharTraits::find(aSearchStart.get(), fragmentLength, aChar); if (charFoundAt) { aSearchStart.advance(charFoundAt - aSearchStart.get()); return true; } aSearchStart.advance(fragmentLength); return false; } bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) { nsAString::size_type src_len = aSource.Length(), sub_len = aSubstring.Length(); if (sub_len > src_len) { return false; } return Substring(aSource, 0, sub_len).Equals(aSubstring); } bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, nsStringComparator aComparator) { nsAString::size_type src_len = aSource.Length(), sub_len = aSubstring.Length(); if (sub_len > src_len) { return false; } return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); } bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) { nsACString::size_type src_len = aSource.Length(), sub_len = aSubstring.Length(); if (sub_len > src_len) { return false; } return Substring(aSource, 0, sub_len).Equals(aSubstring); } bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, nsCStringComparator aComparator) { nsACString::size_type src_len = aSource.Length(), sub_len = aSubstring.Length(); if (sub_len > src_len) { return false; } return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); } bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) { nsAString::size_type src_len = aSource.Length(), sub_len = aSubstring.Length(); if (sub_len > src_len) { return false; } return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); } bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, nsStringComparator aComparator) { nsAString::size_type src_len = aSource.Length(), sub_len = aSubstring.Length(); if (sub_len > src_len) { return false; } return Substring(aSource, src_len - sub_len, sub_len) .Equals(aSubstring, aComparator); } bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) { nsACString::size_type src_len = aSource.Length(), sub_len = aSubstring.Length(); if (sub_len > src_len) { return false; } return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); } bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, nsCStringComparator aComparator) { nsACString::size_type src_len = aSource.Length(), sub_len = aSubstring.Length(); if (sub_len > src_len) { return false; } return Substring(aSource, src_len - sub_len, sub_len) .Equals(aSubstring, aComparator); } static const char16_t empty_buffer[1] = {'\0'}; const nsString& EmptyString() { static const nsDependentString sEmpty(empty_buffer); return sEmpty; } const nsCString& EmptyCString() { static const nsDependentCString sEmpty((const char*)empty_buffer); return sEmpty; } const nsString& VoidString() { static const nsString sNull(mozilla::detail::StringDataFlags::VOIDED); return sNull; } const nsCString& VoidCString() { static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED); return sNull; } int32_t CompareUTF8toUTF16(const nsACString& aUTF8String, const nsAString& aUTF16String, bool* aErr) { const char* u8; const char* u8end; aUTF8String.BeginReading(u8); aUTF8String.EndReading(u8end); const char16_t* u16; const char16_t* u16end; aUTF16String.BeginReading(u16); aUTF16String.EndReading(u16end); for (;;) { if (u8 == u8end) { if (u16 == u16end) { return 0; } return -1; } if (u16 == u16end) { return 1; } // No need for ASCII optimization, since both NextChar() // calls get inlined. uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr); uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr); if (scalar16 == scalar8) { continue; } if (scalar8 < scalar16) { return -1; } return 1; } } void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) { NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); if (IS_IN_BMP(aSource)) { aDest.Append(char16_t(aSource)); } else { aDest.Append(H_SURROGATE(aSource)); aDest.Append(L_SURROGATE(aSource)); } }