/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsEscape.h" #include "mozilla/ArrayUtils.h" #include "mozilla/BinarySearch.h" #include "mozilla/CheckedInt.h" #include "mozilla/TextUtils.h" #include "nsTArray.h" #include "nsCRT.h" #include "nsASCIIMask.h" static const char hexCharsUpper[] = "0123456789ABCDEF"; static const char hexCharsUpperLower[] = "0123456789ABCDEFabcdef"; static const unsigned char netCharType[256] = // clang-format off /* Bit 0 xalpha -- the alphas ** Bit 1 xpalpha -- as xalpha but ** converts spaces to plus and plus to %2B ** Bit 3 ... path -- as xalphas but doesn't escape '/' ** Bit 4 ... NSURL-ref -- extra encoding for Apple NSURL compatibility. ** This encoding set is used on encoded URL ref ** components before converting a URL to an NSURL ** so we don't include '%' to avoid double encoding. */ /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ { 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, /* 0x */ 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, /* 1x */ /* ! " # $ % & ' ( ) * + , - . / */ 0x0,0x8,0x0,0x0,0x8,0x8,0x8,0x8,0x8,0x8,0xf,0xc,0x8,0xf,0xf,0xc, /* 2x */ /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,0x8,0x0,0x8,0x0,0x8, /* 3x */ /* @ A B C D E F G H I J K L M N O */ 0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, /* 4x */ /* bits for '@' changed from 7 to 0 so '@' can be escaped */ /* in usernames and passwords in publishing. */ /* P Q R S T U V W X Y Z [ \ ] ^ _ */ 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x0,0x0,0x0,0x0,0xf, /* 5x */ /* ` a b c d e f g h i j k l m n o */ 0x0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, /* 6x */ /* p q r s t u v w x y z { | } ~ DEL */ 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x0,0x0,0x0,0x8,0x0, /* 7x */ 0x0, }; /* decode % escaped hex codes into character values */ #define UNHEX(C) \ ((C >= '0' && C <= '9') ? C - '0' : \ ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \ ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0))) // clang-format on #define IS_OK(C) (netCharType[((unsigned char)(C))] & (aFlags)) #define HEX_ESCAPE '%' static const uint32_t ENCODE_MAX_LEN = 6; // %uABCD static uint32_t AppendPercentHex(char* aBuffer, unsigned char aChar) { uint32_t i = 0; aBuffer[i++] = '%'; aBuffer[i++] = hexCharsUpper[aChar >> 4]; // high nibble aBuffer[i++] = hexCharsUpper[aChar & 0xF]; // low nibble return i; } static uint32_t AppendPercentHex(char16_t* aBuffer, char16_t aChar) { uint32_t i = 0; aBuffer[i++] = '%'; if (aChar & 0xff00) { aBuffer[i++] = 'u'; aBuffer[i++] = hexCharsUpper[aChar >> 12]; // high-byte high nibble aBuffer[i++] = hexCharsUpper[(aChar >> 8) & 0xF]; // high-byte low nibble } aBuffer[i++] = hexCharsUpper[(aChar >> 4) & 0xF]; // low-byte high nibble aBuffer[i++] = hexCharsUpper[aChar & 0xF]; // low-byte low nibble return i; } //---------------------------------------------------------------------------------------- char* nsEscape(const char* aStr, size_t aLength, size_t* aOutputLength, nsEscapeMask aFlags) //---------------------------------------------------------------------------------------- { if (!aStr) { return nullptr; } size_t charsToEscape = 0; const unsigned char* src = (const unsigned char*)aStr; for (size_t i = 0; i < aLength; ++i) { if (!IS_OK(src[i])) { charsToEscape++; } } // calculate how much memory should be allocated // original length + 2 bytes for each escaped character + terminating '\0' // do the sum in steps to check for overflow size_t dstSize = aLength + 1 + charsToEscape; if (dstSize <= aLength) { return nullptr; } dstSize += charsToEscape; if (dstSize < aLength) { return nullptr; } // fail if we need more than 4GB if (dstSize > UINT32_MAX) { return nullptr; } char* result = (char*)moz_xmalloc(dstSize); unsigned char* dst = (unsigned char*)result; if (aFlags == url_XPAlphas) { for (size_t i = 0; i < aLength; ++i) { unsigned char c = *src++; if (IS_OK(c)) { *dst++ = c; } else if (c == ' ') { *dst++ = '+'; /* convert spaces to pluses */ } else { *dst++ = HEX_ESCAPE; *dst++ = hexCharsUpper[c >> 4]; /* high nibble */ *dst++ = hexCharsUpper[c & 0x0f]; /* low nibble */ } } } else { for (size_t i = 0; i < aLength; ++i) { unsigned char c = *src++; if (IS_OK(c)) { *dst++ = c; } else { *dst++ = HEX_ESCAPE; *dst++ = hexCharsUpper[c >> 4]; /* high nibble */ *dst++ = hexCharsUpper[c & 0x0f]; /* low nibble */ } } } *dst = '\0'; /* tack on eos */ if (aOutputLength) { *aOutputLength = dst - (unsigned char*)result; } return result; } //---------------------------------------------------------------------------------------- char* nsUnescape(char* aStr) //---------------------------------------------------------------------------------------- { nsUnescapeCount(aStr); return aStr; } //---------------------------------------------------------------------------------------- int32_t nsUnescapeCount(char* aStr) //---------------------------------------------------------------------------------------- { char* src = aStr; char* dst = aStr; char c1[] = " "; char c2[] = " "; char* const pc1 = c1; char* const pc2 = c2; if (!*src) { // A null string was passed in. Nothing to escape. // Returns early as the string might not actually be mutable with // length 0. return 0; } while (*src) { c1[0] = *(src + 1); if (*(src + 1) == '\0') { c2[0] = '\0'; } else { c2[0] = *(src + 2); } if (*src != HEX_ESCAPE || strpbrk(pc1, hexCharsUpperLower) == nullptr || strpbrk(pc2, hexCharsUpperLower) == nullptr) { *dst++ = *src++; } else { src++; /* walk over escape */ if (*src) { *dst = UNHEX(*src) << 4; src++; } if (*src) { *dst = (*dst + UNHEX(*src)); src++; } dst++; } } *dst = 0; return (int)(dst - aStr); } /* NET_UnEscapeCnt */ void nsAppendEscapedHTML(const nsACString& aSrc, nsACString& aDst) { // Preparation: aDst's length will increase by at least aSrc's length. If the // addition overflows, we skip this, which is fine, and we'll likely abort // while (infallibly) appending due to aDst becoming too large. mozilla::CheckedInt newCapacity = aDst.Length(); newCapacity += aSrc.Length(); if (newCapacity.isValid()) { aDst.SetCapacity(newCapacity.value()); } for (auto cur = aSrc.BeginReading(); cur != aSrc.EndReading(); cur++) { if (*cur == '<') { aDst.AppendLiteral("<"); } else if (*cur == '>') { aDst.AppendLiteral(">"); } else if (*cur == '&') { aDst.AppendLiteral("&"); } else if (*cur == '"') { aDst.AppendLiteral("""); } else if (*cur == '\'') { aDst.AppendLiteral("'"); } else { aDst.Append(*cur); } } } //---------------------------------------------------------------------------------------- // // The following table encodes which characters needs to be escaped for which // parts of an URL. The bits are the "url components" in the enum EscapeMask, // see nsEscape.h. template static constexpr void AddUnescapedChars(const char (&aChars)[N], uint32_t aFlags, std::array& aTable) { for (size_t i = 0; i < N - 1; ++i) { aTable[static_cast(aChars[i])] |= aFlags; } } static constexpr std::array BuildEscapeChars() { constexpr uint32_t kAllModes = esc_Scheme | esc_Username | esc_Password | esc_Host | esc_Directory | esc_FileBaseName | esc_FileExtension | esc_Param | esc_Query | esc_Ref | esc_ExtHandler; std::array table{0}; // Alphanumerics shouldn't be escaped in all escape modes. AddUnescapedChars("0123456789", kAllModes, table); AddUnescapedChars("ABCDEFGHIJKLMNOPQRSTUVWXYZ", kAllModes, table); AddUnescapedChars("abcdefghijklmnopqrstuvwxyz", kAllModes, table); AddUnescapedChars("!$&()*+,-_~", kAllModes, table); // Extra characters which aren't escaped in particular escape modes. AddUnescapedChars(".", esc_Scheme, table); // Note that behavior of esc_Username and esc_Password is the same, so these // could be merged (in the URL spec, both reference the "userinfo encode set" // https://url.spec.whatwg.org/#userinfo-percent-encode-set, so the same // behavior is expected.) // Leaving separate for now to minimize risk, as these are also IDL-exposed // as separate constants. AddUnescapedChars("'.", esc_Username, table); AddUnescapedChars("'.", esc_Password, table); AddUnescapedChars(".", esc_Host, table); // Same as esc_Scheme AddUnescapedChars("'./:;=@[]|", esc_Directory, table); AddUnescapedChars("'.:;=@[]|", esc_FileBaseName, table); AddUnescapedChars("':;=@[]|", esc_FileExtension, table); AddUnescapedChars(".:;=@[\\]^`{|}", esc_Param, table); AddUnescapedChars("./:;=?@[\\]^`{|}", esc_Query, table); AddUnescapedChars("#'./:;=?@[\\]^{|}", esc_Ref, table); AddUnescapedChars("#'./:;=?@[]", esc_ExtHandler, table); return table; } static constexpr std::array EscapeChars = BuildEscapeChars(); static bool dontNeedEscape(unsigned char aChar, uint32_t aFlags) { return EscapeChars[(size_t)aChar] & aFlags; } static bool dontNeedEscape(uint16_t aChar, uint32_t aFlags) { return aChar < EscapeChars.size() ? (EscapeChars[(size_t)aChar] & aFlags) : false; } //---------------------------------------------------------------------------------------- /** * Templated helper for URL escaping a portion of a string. * * @param aPart The pointer to the beginning of the portion of the string to * escape. * @param aPartLen The length of the string to escape. * @param aFlags Flags used to configure escaping. @see EscapeMask * @param aResult String that has the URL escaped portion appended to. Only * altered if the string is URL escaped or |esc_AlwaysCopy| is specified. * @param aDidAppend Indicates whether or not data was appended to |aResult|. * @return NS_ERROR_INVALID_ARG, NS_ERROR_OUT_OF_MEMORY on failure. */ template static nsresult T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen, uint32_t aFlags, const ASCIIMaskArray* aFilterMask, T& aResult, bool& aDidAppend) { typedef nsCharTraits traits; typedef typename traits::unsigned_char_type unsigned_char_type; static_assert(sizeof(*aPart) == 1 || sizeof(*aPart) == 2, "unexpected char type"); if (!aPart) { MOZ_ASSERT_UNREACHABLE("null pointer"); return NS_ERROR_INVALID_ARG; } bool forced = !!(aFlags & esc_Forced); bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII); bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII); bool writing = !!(aFlags & esc_AlwaysCopy); bool colon = !!(aFlags & esc_Colon); bool spaces = !!(aFlags & esc_Spaces); auto src = reinterpret_cast(aPart); typename T::char_type tempBuffer[100]; unsigned int tempBufferPos = 0; for (size_t i = 0; i < aPartLen; ++i) { unsigned_char_type c = *src++; // If there is a filter, we wish to skip any characters which match it. // This is needed so we don't perform an extra pass just to extract the // filtered characters. if (aFilterMask && mozilla::ASCIIMask::IsMasked(*aFilterMask, c)) { if (!writing) { if (!aResult.Append(aPart, i, mozilla::fallible)) { return NS_ERROR_OUT_OF_MEMORY; } writing = true; } continue; } // if the char has not to be escaped or whatever follows % is // a valid escaped string, just copy the char. // // Also the % will not be escaped until forced // See bugzilla bug 61269 for details why we changed this // // And, we will not escape non-ascii characters if requested. // On special request we will also escape the colon even when // not covered by the matrix. // ignoreAscii is not honored for control characters (C0 and DEL) // // 0x20..0x7e are the valid ASCII characters. if ((dontNeedEscape(c, aFlags) || (c == HEX_ESCAPE && !forced) || (c > 0x7f && ignoreNonAscii) || (c >= 0x20 && c < 0x7f && ignoreAscii)) && !(c == ':' && colon) && !(c == ' ' && spaces)) { if (writing) { tempBuffer[tempBufferPos++] = c; } } else { /* do the escape magic */ if (!writing) { if (!aResult.Append(aPart, i, mozilla::fallible)) { return NS_ERROR_OUT_OF_MEMORY; } writing = true; } uint32_t len = ::AppendPercentHex(tempBuffer + tempBufferPos, c); tempBufferPos += len; MOZ_ASSERT(len <= ENCODE_MAX_LEN, "potential buffer overflow"); } // Flush the temp buffer if it doesnt't have room for another encoded char. if (tempBufferPos >= mozilla::ArrayLength(tempBuffer) - ENCODE_MAX_LEN) { NS_ASSERTION(writing, "should be writing"); if (!aResult.Append(tempBuffer, tempBufferPos, mozilla::fallible)) { return NS_ERROR_OUT_OF_MEMORY; } tempBufferPos = 0; } } if (writing) { if (!aResult.Append(tempBuffer, tempBufferPos, mozilla::fallible)) { return NS_ERROR_OUT_OF_MEMORY; } } aDidAppend = writing; return NS_OK; } bool NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags, nsACString& aResult) { size_t partLen; if (aPartLen < 0) { partLen = strlen(aPart); } else { partLen = aPartLen; } return NS_EscapeURLSpan(mozilla::Span(aPart, partLen), aFlags, aResult); } bool NS_EscapeURLSpan(mozilla::Span aStr, uint32_t aFlags, nsACString& aResult) { bool appended = false; nsresult rv = T_EscapeURL(aStr.Elements(), aStr.Length(), aFlags, nullptr, aResult, appended); if (NS_FAILED(rv)) { ::NS_ABORT_OOM(aResult.Length() * sizeof(nsACString::char_type)); } return appended; } nsresult NS_EscapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult, const mozilla::fallible_t&) { bool appended = false; nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, nullptr, aResult, appended); if (NS_FAILED(rv)) { aResult.Truncate(); return rv; } if (!appended) { aResult = aStr; } return rv; } nsresult NS_EscapeAndFilterURL(const nsACString& aStr, uint32_t aFlags, const ASCIIMaskArray* aFilterMask, nsACString& aResult, const mozilla::fallible_t&) { bool appended = false; nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aFilterMask, aResult, appended); if (NS_FAILED(rv)) { aResult.Truncate(); return rv; } if (!appended) { if (!aResult.Assign(aStr, mozilla::fallible)) { return NS_ERROR_OUT_OF_MEMORY; } } return rv; } const nsAString& NS_EscapeURL(const nsAString& aStr, uint32_t aFlags, nsAString& aResult) { bool result = false; nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, nullptr, aResult, result); if (NS_FAILED(rv)) { ::NS_ABORT_OOM(aResult.Length() * sizeof(nsAString::char_type)); } if (result) { return aResult; } return aStr; } // Starting at aStr[aStart] find the first index in aStr that matches any // character that is forbidden by aFunction. Return false if not found. static bool FindFirstMatchFrom(const nsString& aStr, size_t aStart, const std::function& aFunction, size_t* aIndex) { for (size_t j = aStart, l = aStr.Length(); j < l; ++j) { if (aFunction(aStr[j])) { *aIndex = j; return true; } } return false; } const nsAString& NS_EscapeURL(const nsString& aStr, const std::function& aFunction, nsAString& aResult) { bool didEscape = false; for (size_t i = 0, strLen = aStr.Length(); i < strLen;) { size_t j; if (MOZ_UNLIKELY(FindFirstMatchFrom(aStr, i, aFunction, &j))) { if (i == 0) { didEscape = true; aResult.Truncate(); aResult.SetCapacity(aStr.Length()); } if (j != i) { // The substring from 'i' up to 'j' that needs no escaping. aResult.Append(nsDependentSubstring(aStr, i, j - i)); } char16_t buffer[ENCODE_MAX_LEN]; uint32_t bufferLen = ::AppendPercentHex(buffer, aStr[j]); MOZ_ASSERT(bufferLen <= ENCODE_MAX_LEN, "buffer overflow"); aResult.Append(buffer, bufferLen); i = j + 1; } else { if (MOZ_UNLIKELY(didEscape)) { // The tail of the string that needs no escaping. aResult.Append(nsDependentSubstring(aStr, i, strLen - i)); } break; } } if (MOZ_UNLIKELY(didEscape)) { return aResult; } return aStr; } bool NS_UnescapeURL(const char* aStr, int32_t aLen, uint32_t aFlags, nsACString& aResult) { bool didAppend = false; nsresult rv = NS_UnescapeURL(aStr, aLen, aFlags, aResult, didAppend, mozilla::fallible); if (rv == NS_ERROR_OUT_OF_MEMORY) { ::NS_ABORT_OOM(aLen * sizeof(nsACString::char_type)); } return didAppend; } nsresult NS_UnescapeURL(const char* aStr, int32_t aLen, uint32_t aFlags, nsACString& aResult, bool& aDidAppend, const mozilla::fallible_t&) { if (!aStr) { MOZ_ASSERT_UNREACHABLE("null pointer"); return NS_ERROR_INVALID_ARG; } MOZ_ASSERT(aResult.IsEmpty(), "Passing a non-empty string as an out parameter!"); uint32_t len; if (aLen < 0) { size_t stringLength = strlen(aStr); if (stringLength >= UINT32_MAX) { return NS_ERROR_OUT_OF_MEMORY; } len = stringLength; } else { len = aLen; } bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII); bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII); bool writing = !!(aFlags & esc_AlwaysCopy); bool skipControl = !!(aFlags & esc_SkipControl); bool skipInvalidHostChar = !!(aFlags & esc_Host); unsigned char* destPtr; uint32_t destPos; if (writing) { if (!aResult.SetLength(len, mozilla::fallible)) { return NS_ERROR_OUT_OF_MEMORY; } destPos = 0; destPtr = reinterpret_cast(aResult.BeginWriting()); } const char* last = aStr; const char* end = aStr + len; for (const char* p = aStr; p < end; ++p) { if (*p == HEX_ESCAPE && p + 2 < end) { unsigned char c1 = *((unsigned char*)p + 1); unsigned char c2 = *((unsigned char*)p + 2); unsigned char u = (UNHEX(c1) << 4) + UNHEX(c2); if (mozilla::IsAsciiHexDigit(c1) && mozilla::IsAsciiHexDigit(c2) && (!skipInvalidHostChar || dontNeedEscape(u, aFlags) || c1 >= '8') && ((c1 < '8' && !ignoreAscii) || (c1 >= '8' && !ignoreNonAscii)) && !(skipControl && (c1 < '2' || (c1 == '7' && (c2 == 'f' || c2 == 'F'))))) { if (MOZ_UNLIKELY(!writing)) { writing = true; if (!aResult.SetLength(len, mozilla::fallible)) { return NS_ERROR_OUT_OF_MEMORY; } destPos = 0; destPtr = reinterpret_cast(aResult.BeginWriting()); } if (p > last) { auto toCopy = p - last; memcpy(destPtr + destPos, last, toCopy); destPos += toCopy; MOZ_ASSERT(destPos <= len); last = p; } destPtr[destPos] = u; destPos += 1; MOZ_ASSERT(destPos <= len); p += 2; last += 3; } } } if (writing && last < end) { auto toCopy = end - last; memcpy(destPtr + destPos, last, toCopy); destPos += toCopy; MOZ_ASSERT(destPos <= len); } if (writing) { aResult.Truncate(destPos); } aDidAppend = writing; return NS_OK; }