diff options
Diffstat (limited to '')
-rw-r--r-- | js/src/jsapi-tests/testCompileUtf8.cpp | 300 |
1 files changed, 300 insertions, 0 deletions
diff --git a/js/src/jsapi-tests/testCompileUtf8.cpp b/js/src/jsapi-tests/testCompileUtf8.cpp new file mode 100644 index 0000000000..8799e70201 --- /dev/null +++ b/js/src/jsapi-tests/testCompileUtf8.cpp @@ -0,0 +1,300 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ArrayUtils.h" +#include "mozilla/TextUtils.h" +#include "mozilla/Utf8.h" + +#include <cstring> + +#include "js/CharacterEncoding.h" +#include "js/CompilationAndEvaluation.h" // JS::Compile +#include "js/Exception.h" +#include "js/friend/ErrorMessages.h" // JSMSG_* +#include "js/SourceText.h" +#include "jsapi-tests/tests.h" +#include "util/Text.h" +#include "vm/ErrorReporting.h" + +using mozilla::ArrayEqual; +using mozilla::IsAsciiHexDigit; +using mozilla::Utf8Unit; + +BEGIN_TEST(testUtf8BadBytes) { + static const char badLeadingUnit[] = "var x = \x80"; + CHECK(testBadUtf8( + badLeadingUnit, JSMSG_BAD_LEADING_UTF8_UNIT, + [this](JS::ConstUTF8CharsZ message) { + const char* chars = message.c_str(); + CHECK(startsWith(chars, "0x80")); + CHECK(isBadLeadUnitMessage(chars)); + return true; + }, + "0x80")); + + static const char badSecondInTwoByte[] = "var x = \xDF\x20"; + CHECK(testBadUtf8( + badSecondInTwoByte, JSMSG_BAD_TRAILING_UTF8_UNIT, + [this](JS::ConstUTF8CharsZ message) { + const char* chars = message.c_str(); + CHECK(isBadTrailingBytesMessage(chars)); + CHECK(contains(chars, "0x20")); + return true; + }, + "0xDF 0x20")); + + static const char badSecondInThreeByte[] = "var x = \xEF\x17\xA7"; + CHECK(testBadUtf8( + badSecondInThreeByte, JSMSG_BAD_TRAILING_UTF8_UNIT, + [this](JS::ConstUTF8CharsZ message) { + const char* chars = message.c_str(); + CHECK(isBadTrailingBytesMessage(chars)); + CHECK(contains(chars, "0x17")); + return true; + }, + // Validating stops with the first invalid code unit and + // shouldn't go beyond that. + "0xEF 0x17")); + + static const char lengthTwoTooShort[] = "var x = \xDF"; + CHECK(testBadUtf8( + lengthTwoTooShort, JSMSG_NOT_ENOUGH_CODE_UNITS, + [this](JS::ConstUTF8CharsZ message) { + const char* chars = message.c_str(); + CHECK(isNotEnoughUnitsMessage(chars)); + CHECK(contains(chars, "0xDF")); + CHECK(contains(chars, " 1 byte, but 0 bytes were present")); + return true; + }, + "0xDF")); + + static const char forbiddenHighSurrogate[] = "var x = \xED\xA2\x87"; + CHECK(testBadUtf8( + forbiddenHighSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT, + [this](JS::ConstUTF8CharsZ message) { + const char* chars = message.c_str(); + CHECK(isSurrogateMessage(chars)); + CHECK(contains(chars, "0xD887")); + return true; + }, + "0xED 0xA2 0x87")); + + static const char forbiddenLowSurrogate[] = "var x = \xED\xB7\xAF"; + CHECK(testBadUtf8( + forbiddenLowSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT, + [this](JS::ConstUTF8CharsZ message) { + const char* chars = message.c_str(); + CHECK(isSurrogateMessage(chars)); + CHECK(contains(chars, "0xDDEF")); + return true; + }, + "0xED 0xB7 0xAF")); + + static const char oneTooBig[] = "var x = \xF4\x90\x80\x80"; + CHECK(testBadUtf8( + oneTooBig, JSMSG_FORBIDDEN_UTF8_CODE_POINT, + [this](JS::ConstUTF8CharsZ message) { + const char* chars = message.c_str(); + CHECK(isTooBigMessage(chars)); + CHECK(contains(chars, "0x110000")); + return true; + }, + "0xF4 0x90 0x80 0x80")); + + static const char notShortestFormZero[] = "var x = \xC0\x80"; + CHECK(testBadUtf8( + notShortestFormZero, JSMSG_FORBIDDEN_UTF8_CODE_POINT, + [this](JS::ConstUTF8CharsZ message) { + const char* chars = message.c_str(); + CHECK(isNotShortestFormMessage(chars)); + CHECK(startsWith(chars, "0x0 isn't ")); + return true; + }, + "0xC0 0x80")); + + static const char notShortestFormNonzero[] = "var x = \xE0\x87\x80"; + CHECK(testBadUtf8( + notShortestFormNonzero, JSMSG_FORBIDDEN_UTF8_CODE_POINT, + [this](JS::ConstUTF8CharsZ message) { + const char* chars = message.c_str(); + CHECK(isNotShortestFormMessage(chars)); + CHECK(startsWith(chars, "0x1C0 isn't ")); + return true; + }, + "0xE0 0x87 0x80")); + + return true; +} + +static constexpr size_t LengthOfByte = js_strlen("0xFF"); + +static bool startsWithByte(const char* str) { + return str[0] == '0' && str[1] == 'x' && IsAsciiHexDigit(str[2]) && + IsAsciiHexDigit(str[3]); +} + +static bool startsWith(const char* str, const char* prefix) { + return std::strncmp(prefix, str, strlen(prefix)) == 0; +} + +static bool contains(const char* str, const char* substr) { + return std::strstr(str, substr) != nullptr; +} + +static bool equals(const char* str, const char* expected) { + return std::strcmp(str, expected) == 0; +} + +static bool isBadLeadUnitMessage(const char* str) { + return startsWithByte(str) && + equals(str + LengthOfByte, + " byte doesn't begin a valid UTF-8 code point"); +} + +static bool isBadTrailingBytesMessage(const char* str) { + return startsWith(str, "bad trailing UTF-8 byte "); +} + +static bool isNotEnoughUnitsMessage(const char* str) { + return startsWithByte(str) && + startsWith(str + LengthOfByte, " byte in UTF-8 must be followed by "); +} + +static bool isForbiddenCodePointMessage(const char* str) { + return contains(str, "isn't a valid code point because"); +} + +static bool isSurrogateMessage(const char* str) { + return isForbiddenCodePointMessage(str) && + contains(str, " it's a UTF-16 surrogate"); +} + +static bool isTooBigMessage(const char* str) { + return isForbiddenCodePointMessage(str) && + contains(str, "the maximum code point is U+10FFFF"); +} + +static bool isNotShortestFormMessage(const char* str) { + return isForbiddenCodePointMessage(str) && + contains(str, "it wasn't encoded in shortest possible form"); +} + +template <size_t N, typename TestMessage> +bool testBadUtf8(const char (&chars)[N], unsigned errorNumber, + TestMessage testMessage, const char* badBytes) { + JS::Rooted<JSScript*> script(cx); + { + JS::CompileOptions options(cx); + + JS::SourceText<mozilla::Utf8Unit> srcBuf; + CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed)); + + script = JS::Compile(cx, options, srcBuf); + CHECK(!script); + } + + JS::ExceptionStack exnStack(cx); + CHECK(JS::StealPendingExceptionStack(cx, &exnStack)); + + JS::ErrorReportBuilder report(cx); + CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects)); + + const auto* errorReport = report.report(); + + CHECK(errorReport->errorNumber == errorNumber); + + CHECK(testMessage(errorReport->message())); + + { + const auto& notes = errorReport->notes; + CHECK(notes != nullptr); + + auto iter = notes->begin(); + CHECK(iter != notes->end()); + + const char* noteMessage = (*iter)->message().c_str(); + + // The prefix ought always be the same. + static constexpr char expectedPrefix[] = + "the code units comprising this invalid code point were: "; + constexpr size_t expectedPrefixLen = js_strlen(expectedPrefix); + + CHECK(startsWith(noteMessage, expectedPrefix)); + + // The end of the prefix is the bad bytes. + CHECK(equals(noteMessage + expectedPrefixLen, badBytes)); + + ++iter; + CHECK(iter == notes->end()); + } + + static constexpr char16_t expectedContext[] = u"var x = "; + constexpr size_t expectedContextLen = js_strlen(expectedContext); + + const char16_t* lineOfContext = errorReport->linebuf(); + size_t lineOfContextLength = errorReport->linebufLength(); + + CHECK(lineOfContext[lineOfContextLength] == '\0'); + CHECK(lineOfContextLength == expectedContextLen); + + CHECK(std::memcmp(lineOfContext, expectedContext, + expectedContextLen * sizeof(char16_t)) == 0); + + return true; +} +END_TEST(testUtf8BadBytes) + +BEGIN_TEST(testMultiUnitUtf8InWindow) { + static const char firstInWindowIsMultiUnit[] = + "\xCF\x80\xCF\x80 = 6.283185307; @ bad starts HERE:\x80\xFF\xFF"; + CHECK(testContext(firstInWindowIsMultiUnit, + u"ππ = 6.283185307; @ bad starts HERE:")); + + static const char atTokenOffsetIsMulti[] = "var z = 💯"; + CHECK(testContext(atTokenOffsetIsMulti, u"var z = 💯")); + + static const char afterTokenOffsetIsMulti[] = "var z = @💯💯💯X"; + CHECK(testContext(afterTokenOffsetIsMulti, u"var z = @💯💯💯X")); + + static const char atEndIsMulti[] = "var z = @@💯💯💯"; + CHECK(testContext(atEndIsMulti, u"var z = @@💯💯💯")); + + return true; +} + +template <size_t N, size_t ContextLenWithNull> +bool testContext(const char (&chars)[N], + const char16_t (&expectedContext)[ContextLenWithNull]) { + JS::Rooted<JSScript*> script(cx); + { + JS::CompileOptions options(cx); + + JS::SourceText<mozilla::Utf8Unit> srcBuf; + CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed)); + + script = JS::Compile(cx, options, srcBuf); + CHECK(!script); + } + + JS::ExceptionStack exnStack(cx); + CHECK(JS::StealPendingExceptionStack(cx, &exnStack)); + + JS::ErrorReportBuilder report(cx); + CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects)); + + const auto* errorReport = report.report(); + + CHECK(errorReport->errorNumber == JSMSG_ILLEGAL_CHARACTER); + + const char16_t* lineOfContext = errorReport->linebuf(); + size_t lineOfContextLength = errorReport->linebufLength(); + + CHECK(lineOfContext[lineOfContextLength] == '\0'); + CHECK(lineOfContextLength == ContextLenWithNull - 1); + + CHECK(ArrayEqual(lineOfContext, expectedContext, ContextLenWithNull)); + + return true; +} +END_TEST(testMultiUnitUtf8InWindow) |