From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Fri, 19 Apr 2024 02:47:55 +0200
Subject: Adding upstream version 124.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 js/src/jsapi-tests/testUTF8.cpp | 231 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 231 insertions(+)
 create mode 100644 js/src/jsapi-tests/testUTF8.cpp

(limited to 'js/src/jsapi-tests/testUTF8.cpp')
diff --git a/js/src/jsapi-tests/testUTF8.cpp b/js/src/jsapi-tests/testUTF8.cpp
new file mode 100644
index 0000000000..b32a6dd4b9
--- /dev/null
+++ b/js/src/jsapi-tests/testUTF8.cpp
@@ -0,0 +1,231 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Range.h"  // mozilla::Range
+#include "mozilla/Span.h"   // mozilla::Span
+#include "mozilla/Utf8.h"   // mozilla::ConvertUtf8toUtf16
+
+#include "js/CharacterEncoding.h"
+#include "jsapi-tests/tests.h"
+
+BEGIN_TEST(testUTF8_badUTF8) {
+  static const char badUTF8[] = "...\xC0...";
+  JSString* str = JS_NewStringCopyZ(cx, badUTF8);
+  CHECK(str);
+  char16_t ch;
+  if (!JS_GetStringCharAt(cx, str, 3, &ch)) {
+    return false;
+  }
+  CHECK(ch == 0x00C0);
+  return true;
+}
+END_TEST(testUTF8_badUTF8)
+
+BEGIN_TEST(testUTF8_bigUTF8) {
+  static const char bigUTF8[] = "...\xFB\xBF\xBF\xBF\xBF...";
+  JSString* str = JS_NewStringCopyZ(cx, bigUTF8);
+  CHECK(str);
+  char16_t ch;
+  if (!JS_GetStringCharAt(cx, str, 3, &ch)) {
+    return false;
+  }
+  CHECK(ch == 0x00FB);
+  return true;
+}
+END_TEST(testUTF8_bigUTF8)
+
+BEGIN_TEST(testUTF8_badSurrogate) {
+  static const char16_t badSurrogate[] = {'A', 'B', 'C', 0xDEEE, 'D', 'E', 0};
+  mozilla::Range<const char16_t> tbchars(badSurrogate, js_strlen(badSurrogate));
+  JS::Latin1CharsZ latin1 = JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars);
+  CHECK(latin1);
+  CHECK(latin1[3] == 0x00EE);
+  return true;
+}
+END_TEST(testUTF8_badSurrogate)
+
+BEGIN_TEST(testUTF8_LossyConversion) {
+  // Maximal subparts of an ill-formed subsequence should be replaced with
+  // single REPLACEMENT CHARACTER.
+
+  // Input ends with partial sequence.
+  // clang-format off
+  const char* inputs1[] = {
+    "\xC2",
+    "\xDF",
+    "\xE0",
+    "\xE0\xA0",
+    "\xF0",
+    "\xF0\x90",
+    "\xF0\x90\x80",
+  };
+  // clang-format on
+
+  char16_t outputBuf[8];
+  mozilla::Span output(outputBuf, 8);
+
+  for (const char* input : inputs1) {
+    size_t len;
+    JS::TwoByteCharsZ utf16 = JS::LossyUTF8CharsToNewTwoByteCharsZ(
+        cx, JS::UTF8Chars(input, js_strlen(input)), &len,
+        js::StringBufferArena);
+    CHECK(utf16);
+    CHECK(len == 1);
+    CHECK(utf16[0] == 0xFFFD);
+
+    // Make sure the behavior matches to encoding_rs.
+    len = mozilla::ConvertUtf8toUtf16(mozilla::Span(input, js_strlen(input)),
+                                      output);
+    CHECK(len == 1);
+    CHECK(outputBuf[0] == 0xFFFD);
+  }
+
+  // Partial sequence followed by ASCII range.
+  // clang-format off
+  const char* inputs2[] = {
+    "\xC2 ",
+    "\xDF ",
+    "\xE0 ",
+    "\xE0\xA0 ",
+    "\xF0 ",
+    "\xF0\x90 ",
+    "\xF0\x90\x80 ",
+  };
+  // clang-format on
+
+  for (const char* input : inputs2) {
+    size_t len;
+    JS::TwoByteCharsZ utf16 = JS::LossyUTF8CharsToNewTwoByteCharsZ(
+        cx, JS::UTF8Chars(input, js_strlen(input)), &len,
+        js::StringBufferArena);
+    CHECK(utf16);
+    CHECK(len == 2);
+    CHECK(utf16[0] == 0xFFFD);
+    CHECK(utf16[1] == 0x20);
+
+    len = mozilla::ConvertUtf8toUtf16(mozilla::Span(input, js_strlen(input)),
+                                      output);
+    CHECK(len == 2);
+    CHECK(outputBuf[0] == 0xFFFD);
+    CHECK(outputBuf[1] == 0x20);
+  }
+
+  // Partial sequence followed by other first code unit.
+  // clang-format off
+  const char* inputs3[] = {
+    "\xC2\xC2\x80",
+    "\xDF\xC2\x80",
+    "\xE0\xC2\x80",
+    "\xE0\xA0\xC2\x80",
+    "\xF0\xC2\x80",
+    "\xF0\x90\xC2\x80",
+    "\xF0\x90\x80\xC2\x80",
+  };
+  // clang-format on
+
+  for (const char* input : inputs3) {
+    size_t len;
+    JS::TwoByteCharsZ utf16 = JS::LossyUTF8CharsToNewTwoByteCharsZ(
+        cx, JS::UTF8Chars(input, js_strlen(input)), &len,
+        js::StringBufferArena);
+    CHECK(utf16);
+    CHECK(len == 2);
+    CHECK(utf16[0] == 0xFFFD);
+    CHECK(utf16[1] == 0x80);
+
+    len = mozilla::ConvertUtf8toUtf16(mozilla::Span(input, js_strlen(input)),
+                                      output);
+    CHECK(len == 2);
+    CHECK(outputBuf[0] == 0xFFFD);
+    CHECK(outputBuf[1] == 0x80);
+  }
+
+  // Invalid second byte.
+  // clang-format off
+  const char* inputs4[] = {
+    "\xE0\x9F\x80\x80",
+    "\xED\xA0\x80\x80",
+    "\xF0\x80\x80\x80",
+    "\xF4\x90\x80\x80",
+  };
+  // clang-format on
+
+  for (const char* input : inputs4) {
+    size_t len;
+    JS::TwoByteCharsZ utf16 = JS::LossyUTF8CharsToNewTwoByteCharsZ(
+        cx, JS::UTF8Chars(input, js_strlen(input)), &len,
+        js::StringBufferArena);
+    CHECK(utf16);
+    CHECK(len == 4);
+    CHECK(utf16[0] == 0xFFFD);
+    CHECK(utf16[1] == 0xFFFD);
+    CHECK(utf16[2] == 0xFFFD);
+    CHECK(utf16[3] == 0xFFFD);
+
+    len = mozilla::ConvertUtf8toUtf16(mozilla::Span(input, js_strlen(input)),
+                                      output);
+    CHECK(len == 4);
+    CHECK(outputBuf[0] == 0xFFFD);
+    CHECK(outputBuf[1] == 0xFFFD);
+    CHECK(outputBuf[2] == 0xFFFD);
+    CHECK(outputBuf[3] == 0xFFFD);
+  }
+
+  // Invalid second byte, with not sufficient number of units.
+  // clang-format off
+  const char* inputs5[] = {
+    "\xE0\x9F\x80",
+    "\xED\xA0\x80",
+    "\xF0\x80\x80",
+    "\xF4\x90\x80",
+  };
+  const char* inputs6[] = {
+    "\xE0\x9F",
+    "\xED\xA0",
+    "\xF0\x80",
+    "\xF4\x90",
+  };
+  // clang-format on
+
+  for (const char* input : inputs5) {
+    size_t len;
+    JS::TwoByteCharsZ utf16 = JS::LossyUTF8CharsToNewTwoByteCharsZ(
+        cx, JS::UTF8Chars(input, js_strlen(input)), &len,
+        js::StringBufferArena);
+    CHECK(utf16);
+    CHECK(len == 3);
+    CHECK(utf16[0] == 0xFFFD);
+    CHECK(utf16[1] == 0xFFFD);
+    CHECK(utf16[2] == 0xFFFD);
+
+    len = mozilla::ConvertUtf8toUtf16(mozilla::Span(input, js_strlen(input)),
+                                      output);
+    CHECK(len == 3);
+    CHECK(outputBuf[0] == 0xFFFD);
+    CHECK(outputBuf[1] == 0xFFFD);
+    CHECK(outputBuf[2] == 0xFFFD);
+  }
+
+  for (const char* input : inputs6) {
+    size_t len;
+    JS::TwoByteCharsZ utf16 = JS::LossyUTF8CharsToNewTwoByteCharsZ(
+        cx, JS::UTF8Chars(input, js_strlen(input)), &len,
+        js::StringBufferArena);
+    CHECK(utf16);
+    CHECK(len == 2);
+    CHECK(utf16[0] == 0xFFFD);
+    CHECK(utf16[1] == 0xFFFD);
+
+    len = mozilla::ConvertUtf8toUtf16(mozilla::Span(input, js_strlen(input)),
+                                      output);
+    CHECK(len == 2);
+    CHECK(outputBuf[0] == 0xFFFD);
+    CHECK(outputBuf[1] == 0xFFFD);
+  }
+  return true;
+}
+END_TEST(testUTF8_LossyConversion)
-- 
cgit v1.2.3