/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "mozilla/ArrayUtils.h" #include "mozilla/TextUtils.h" #include "mozilla/Utf8.h" #include #include "js/CharacterEncoding.h" #include "js/CompilationAndEvaluation.h" // JS::Compile #include "js/Exception.h" #include "js/friend/ErrorMessages.h" // JSMSG_* #include "js/SourceText.h" #include "jsapi-tests/tests.h" #include "util/Text.h" #include "vm/ErrorReporting.h" using mozilla::ArrayEqual; using mozilla::IsAsciiHexDigit; using mozilla::Utf8Unit; BEGIN_TEST(testUtf8BadBytes) { static const char badLeadingUnit[] = "var x = \x80"; CHECK(testBadUtf8( badLeadingUnit, JSMSG_BAD_LEADING_UTF8_UNIT, [this](JS::ConstUTF8CharsZ message) { const char* chars = message.c_str(); CHECK(startsWith(chars, "0x80")); CHECK(isBadLeadUnitMessage(chars)); return true; }, "0x80")); static const char badSecondInTwoByte[] = "var x = \xDF\x20"; CHECK(testBadUtf8( badSecondInTwoByte, JSMSG_BAD_TRAILING_UTF8_UNIT, [this](JS::ConstUTF8CharsZ message) { const char* chars = message.c_str(); CHECK(isBadTrailingBytesMessage(chars)); CHECK(contains(chars, "0x20")); return true; }, "0xDF 0x20")); static const char badSecondInThreeByte[] = "var x = \xEF\x17\xA7"; CHECK(testBadUtf8( badSecondInThreeByte, JSMSG_BAD_TRAILING_UTF8_UNIT, [this](JS::ConstUTF8CharsZ message) { const char* chars = message.c_str(); CHECK(isBadTrailingBytesMessage(chars)); CHECK(contains(chars, "0x17")); return true; }, // Validating stops with the first invalid code unit and // shouldn't go beyond that. "0xEF 0x17")); static const char lengthTwoTooShort[] = "var x = \xDF"; CHECK(testBadUtf8( lengthTwoTooShort, JSMSG_NOT_ENOUGH_CODE_UNITS, [this](JS::ConstUTF8CharsZ message) { const char* chars = message.c_str(); CHECK(isNotEnoughUnitsMessage(chars)); CHECK(contains(chars, "0xDF")); CHECK(contains(chars, " 1 byte, but 0 bytes were present")); return true; }, "0xDF")); static const char forbiddenHighSurrogate[] = "var x = \xED\xA2\x87"; CHECK(testBadUtf8( forbiddenHighSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT, [this](JS::ConstUTF8CharsZ message) { const char* chars = message.c_str(); CHECK(isSurrogateMessage(chars)); CHECK(contains(chars, "0xD887")); return true; }, "0xED 0xA2 0x87")); static const char forbiddenLowSurrogate[] = "var x = \xED\xB7\xAF"; CHECK(testBadUtf8( forbiddenLowSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT, [this](JS::ConstUTF8CharsZ message) { const char* chars = message.c_str(); CHECK(isSurrogateMessage(chars)); CHECK(contains(chars, "0xDDEF")); return true; }, "0xED 0xB7 0xAF")); static const char oneTooBig[] = "var x = \xF4\x90\x80\x80"; CHECK(testBadUtf8( oneTooBig, JSMSG_FORBIDDEN_UTF8_CODE_POINT, [this](JS::ConstUTF8CharsZ message) { const char* chars = message.c_str(); CHECK(isTooBigMessage(chars)); CHECK(contains(chars, "0x110000")); return true; }, "0xF4 0x90 0x80 0x80")); static const char notShortestFormZero[] = "var x = \xC0\x80"; CHECK(testBadUtf8( notShortestFormZero, JSMSG_FORBIDDEN_UTF8_CODE_POINT, [this](JS::ConstUTF8CharsZ message) { const char* chars = message.c_str(); CHECK(isNotShortestFormMessage(chars)); CHECK(startsWith(chars, "0x0 isn't ")); return true; }, "0xC0 0x80")); static const char notShortestFormNonzero[] = "var x = \xE0\x87\x80"; CHECK(testBadUtf8( notShortestFormNonzero, JSMSG_FORBIDDEN_UTF8_CODE_POINT, [this](JS::ConstUTF8CharsZ message) { const char* chars = message.c_str(); CHECK(isNotShortestFormMessage(chars)); CHECK(startsWith(chars, "0x1C0 isn't ")); return true; }, "0xE0 0x87 0x80")); return true; } static constexpr size_t LengthOfByte = js_strlen("0xFF"); static bool startsWithByte(const char* str) { return str[0] == '0' && str[1] == 'x' && IsAsciiHexDigit(str[2]) && IsAsciiHexDigit(str[3]); } static bool startsWith(const char* str, const char* prefix) { return std::strncmp(prefix, str, strlen(prefix)) == 0; } static bool contains(const char* str, const char* substr) { return std::strstr(str, substr) != nullptr; } static bool equals(const char* str, const char* expected) { return std::strcmp(str, expected) == 0; } static bool isBadLeadUnitMessage(const char* str) { return startsWithByte(str) && equals(str + LengthOfByte, " byte doesn't begin a valid UTF-8 code point"); } static bool isBadTrailingBytesMessage(const char* str) { return startsWith(str, "bad trailing UTF-8 byte "); } static bool isNotEnoughUnitsMessage(const char* str) { return startsWithByte(str) && startsWith(str + LengthOfByte, " byte in UTF-8 must be followed by "); } static bool isForbiddenCodePointMessage(const char* str) { return contains(str, "isn't a valid code point because"); } static bool isSurrogateMessage(const char* str) { return isForbiddenCodePointMessage(str) && contains(str, " it's a UTF-16 surrogate"); } static bool isTooBigMessage(const char* str) { return isForbiddenCodePointMessage(str) && contains(str, "the maximum code point is U+10FFFF"); } static bool isNotShortestFormMessage(const char* str) { return isForbiddenCodePointMessage(str) && contains(str, "it wasn't encoded in shortest possible form"); } template bool testBadUtf8(const char (&chars)[N], unsigned errorNumber, TestMessage testMessage, const char* badBytes) { JS::Rooted script(cx); { JS::CompileOptions options(cx); JS::SourceText srcBuf; CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed)); script = JS::Compile(cx, options, srcBuf); CHECK(!script); } JS::ExceptionStack exnStack(cx); CHECK(JS::StealPendingExceptionStack(cx, &exnStack)); JS::ErrorReportBuilder report(cx); CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects)); const auto* errorReport = report.report(); CHECK(errorReport->errorNumber == errorNumber); CHECK(testMessage(errorReport->message())); { const auto& notes = errorReport->notes; CHECK(notes != nullptr); auto iter = notes->begin(); CHECK(iter != notes->end()); const char* noteMessage = (*iter)->message().c_str(); // The prefix ought always be the same. static constexpr char expectedPrefix[] = "the code units comprising this invalid code point were: "; constexpr size_t expectedPrefixLen = js_strlen(expectedPrefix); CHECK(startsWith(noteMessage, expectedPrefix)); // The end of the prefix is the bad bytes. CHECK(equals(noteMessage + expectedPrefixLen, badBytes)); ++iter; CHECK(iter == notes->end()); } static constexpr char16_t expectedContext[] = u"var x = "; constexpr size_t expectedContextLen = js_strlen(expectedContext); const char16_t* lineOfContext = errorReport->linebuf(); size_t lineOfContextLength = errorReport->linebufLength(); CHECK(lineOfContext[lineOfContextLength] == '\0'); CHECK(lineOfContextLength == expectedContextLen); CHECK(std::memcmp(lineOfContext, expectedContext, expectedContextLen * sizeof(char16_t)) == 0); return true; } END_TEST(testUtf8BadBytes) BEGIN_TEST(testMultiUnitUtf8InWindow) { static const char firstInWindowIsMultiUnit[] = "\xCF\x80\xCF\x80 = 6.283185307; @ bad starts HERE:\x80\xFF\xFF"; CHECK(testContext(firstInWindowIsMultiUnit, u"ππ = 6.283185307; @ bad starts HERE:")); static const char atTokenOffsetIsMulti[] = "var z = 💯"; CHECK(testContext(atTokenOffsetIsMulti, u"var z = 💯")); static const char afterTokenOffsetIsMulti[] = "var z = @💯💯💯X"; CHECK(testContext(afterTokenOffsetIsMulti, u"var z = @💯💯💯X")); static const char atEndIsMulti[] = "var z = @@💯💯💯"; CHECK(testContext(atEndIsMulti, u"var z = @@💯💯💯")); return true; } template bool testContext(const char (&chars)[N], const char16_t (&expectedContext)[ContextLenWithNull]) { JS::Rooted script(cx); { JS::CompileOptions options(cx); JS::SourceText srcBuf; CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed)); script = JS::Compile(cx, options, srcBuf); CHECK(!script); } JS::ExceptionStack exnStack(cx); CHECK(JS::StealPendingExceptionStack(cx, &exnStack)); JS::ErrorReportBuilder report(cx); CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects)); const auto* errorReport = report.report(); CHECK(errorReport->errorNumber == JSMSG_ILLEGAL_CHARACTER); const char16_t* lineOfContext = errorReport->linebuf(); size_t lineOfContextLength = errorReport->linebufLength(); CHECK(lineOfContext[lineOfContextLength] == '\0'); CHECK(lineOfContextLength == ContextLenWithNull - 1); CHECK(ArrayEqual(lineOfContext, expectedContext, ContextLenWithNull)); return true; } END_TEST(testMultiUnitUtf8InWindow)