From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Fri, 19 Apr 2024 02:47:55 +0200
Subject: Adding upstream version 124.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 .../components/urlbar/tests/unit/test_tokenizer.js | 449 +++++++++++++++++++++
 1 file changed, 449 insertions(+)
 create mode 100644 browser/components/urlbar/tests/unit/test_tokenizer.js

(limited to 'browser/components/urlbar/tests/unit/test_tokenizer.js')

diff --git a/browser/components/urlbar/tests/unit/test_tokenizer.js b/browser/components/urlbar/tests/unit/test_tokenizer.js
new file mode 100644
index 0000000000..835d1a5909
--- /dev/null
+++ b/browser/components/urlbar/tests/unit/test_tokenizer.js
@@ -0,0 +1,449 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+add_task(async function test_tokenizer() {
+  let testContexts = [
+    { desc: "Empty string", searchString: "", expectedTokens: [] },
+    { desc: "Spaces string", searchString: "      ", expectedTokens: [] },
+    {
+      desc: "Single word string",
+      searchString: "test",
+      expectedTokens: [{ value: "test", type: UrlbarTokenizer.TYPE.TEXT }],
+    },
+    {
+      desc: "Multi word string with mixed whitespace types",
+      searchString: " test1 test2\u1680test3\u2004test4\u1680",
+      expectedTokens: [
+        { value: "test1", type: UrlbarTokenizer.TYPE.TEXT },
+        { value: "test2", type: UrlbarTokenizer.TYPE.TEXT },
+        { value: "test3", type: UrlbarTokenizer.TYPE.TEXT },
+        { value: "test4", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "separate restriction char at beginning",
+      searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK} test`,
+      expectedTokens: [
+        {
+          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
+          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
+        },
+        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "separate restriction char at end",
+      searchString: `test ${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
+      expectedTokens: [
+        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
+        {
+          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
+          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
+        },
+      ],
+    },
+    {
+      desc: "boundary restriction char at end",
+      searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
+      expectedTokens: [
+        {
+          value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+      ],
+    },
+    {
+      desc: "boundary search restriction char at end",
+      searchString: `test${UrlbarTokenizer.RESTRICT.SEARCH}`,
+      expectedTokens: [
+        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
+        {
+          value: UrlbarTokenizer.RESTRICT.SEARCH,
+          type: UrlbarTokenizer.TYPE.RESTRICT_SEARCH,
+        },
+      ],
+    },
+    {
+      desc: "separate restriction char in the middle",
+      searchString: `test ${UrlbarTokenizer.RESTRICT.BOOKMARK} test`,
+      expectedTokens: [
+        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
+        {
+          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "restriction char in the middle",
+      searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`,
+      expectedTokens: [
+        {
+          value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`,
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+      ],
+    },
+    {
+      desc: "restriction char in the middle 2",
+      searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK} test`,
+      expectedTokens: [
+        {
+          value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+        { value: `test`, type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "double boundary restriction char",
+      searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK}test${UrlbarTokenizer.RESTRICT.TITLE}`,
+      expectedTokens: [
+        {
+          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
+          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
+        },
+        {
+          value: `test${UrlbarTokenizer.RESTRICT.TITLE}`,
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+      ],
+    },
+    {
+      desc: "double non-combinable restriction char, single char string",
+      searchString: `t${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.SEARCH}`,
+      expectedTokens: [
+        {
+          value: `t${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+        {
+          value: UrlbarTokenizer.RESTRICT.SEARCH,
+          type: UrlbarTokenizer.TYPE.RESTRICT_SEARCH,
+        },
+      ],
+    },
+    {
+      desc: "only boundary restriction chars",
+      searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.TITLE}`,
+      expectedTokens: [
+        {
+          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
+          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
+        },
+        {
+          value: UrlbarTokenizer.RESTRICT.TITLE,
+          type: UrlbarTokenizer.TYPE.RESTRICT_TITLE,
+        },
+      ],
+    },
+    {
+      desc: "only the boundary restriction char",
+      searchString: UrlbarTokenizer.RESTRICT.BOOKMARK,
+      expectedTokens: [
+        {
+          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
+          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
+        },
+      ],
+    },
+    // Some restriction chars may be # or ?, that are also valid path parts.
+    // The next 2 tests will check we consider those as part of url paths.
+    {
+      desc: "boundary # char on path",
+      searchString: "test/#",
+      expectedTokens: [
+        { value: "test/#", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "boundary ? char on path",
+      searchString: "test/?",
+      expectedTokens: [
+        { value: "test/?", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "multiple boundary restriction chars suffix",
+      searchString: `test ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG}`,
+      expectedTokens: [
+        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
+        {
+          value: UrlbarTokenizer.RESTRICT.HISTORY,
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+        {
+          value: UrlbarTokenizer.RESTRICT.TAG,
+          type: UrlbarTokenizer.TYPE.RESTRICT_TAG,
+        },
+      ],
+    },
+    {
+      desc: "multiple boundary restriction chars prefix",
+      searchString: `${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG} test`,
+      expectedTokens: [
+        {
+          value: UrlbarTokenizer.RESTRICT.HISTORY,
+          type: UrlbarTokenizer.TYPE.RESTRICT_HISTORY,
+        },
+        {
+          value: UrlbarTokenizer.RESTRICT.TAG,
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "Math with division",
+      searchString: "3.6/1.2",
+      expectedTokens: [{ value: "3.6/1.2", type: UrlbarTokenizer.TYPE.TEXT }],
+    },
+    {
+      desc: "ipv4 in bookmarks",
+      searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK} 192.168.1.1:8`,
+      expectedTokens: [
+        {
+          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
+          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
+        },
+        { value: "192.168.1.1:8", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
+      ],
+    },
+    {
+      desc: "email",
+      searchString: "test@mozilla.com",
+      expectedTokens: [
+        { value: "test@mozilla.com", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "email2",
+      searchString: "test.test@mozilla.co.uk",
+      expectedTokens: [
+        { value: "test.test@mozilla.co.uk", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "protocol",
+      searchString: "http://test",
+      expectedTokens: [
+        { value: "http://test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "bogus protocol with host (we allow visits to http://///example.com)",
+      searchString: "http:///test",
+      expectedTokens: [
+        { value: "http:///test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "file protocol with path",
+      searchString: "file:///home",
+      expectedTokens: [
+        { value: "file:///home", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "almost a protocol",
+      searchString: "http:",
+      expectedTokens: [
+        { value: "http:", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "almost a protocol 2",
+      searchString: "http:/",
+      expectedTokens: [
+        { value: "http:/", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "bogus protocol (we allow visits to http://///example.com)",
+      searchString: "http:///",
+      expectedTokens: [
+        { value: "http:///", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "file protocol",
+      searchString: "file:///",
+      expectedTokens: [
+        { value: "file:///", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "userinfo",
+      searchString: "user:pass@test",
+      expectedTokens: [
+        { value: "user:pass@test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
+      ],
+    },
+    {
+      desc: "domain",
+      searchString: "www.mozilla.org",
+      expectedTokens: [
+        {
+          value: "www.mozilla.org",
+          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN,
+        },
+      ],
+    },
+    {
+      desc: "data uri",
+      searchString: "data:text/plain,Content",
+      expectedTokens: [
+        {
+          value: "data:text/plain,Content",
+          type: UrlbarTokenizer.TYPE.POSSIBLE_URL,
+        },
+      ],
+    },
+    {
+      desc: "ipv6",
+      searchString: "[2001:db8::1]",
+      expectedTokens: [
+        { value: "[2001:db8::1]", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
+      ],
+    },
+    {
+      desc: "numeric domain",
+      searchString: "test1001.com",
+      expectedTokens: [
+        { value: "test1001.com", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
+      ],
+    },
+    {
+      desc: "invalid ip",
+      searchString: "192.2134.1.2",
+      expectedTokens: [
+        { value: "192.2134.1.2", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "ipv4",
+      searchString: "1.2.3.4",
+      expectedTokens: [
+        { value: "1.2.3.4", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
+      ],
+    },
+    {
+      desc: "host/path",
+      searchString: "test/test",
+      expectedTokens: [
+        { value: "test/test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    {
+      desc: "percent encoded string",
+      searchString: "%E6%97%A5%E6%9C%AC",
+      expectedTokens: [
+        { value: "%E6%97%A5%E6%9C%AC", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "Uppercase",
+      searchString: "TEST",
+      expectedTokens: [{ value: "TEST", type: UrlbarTokenizer.TYPE.TEXT }],
+    },
+    {
+      desc: "Mixed case 1",
+      searchString: "TeSt",
+      expectedTokens: [{ value: "TeSt", type: UrlbarTokenizer.TYPE.TEXT }],
+    },
+    {
+      desc: "Mixed case 2",
+      searchString: "tEsT",
+      expectedTokens: [{ value: "tEsT", type: UrlbarTokenizer.TYPE.TEXT }],
+    },
+    {
+      desc: "Uppercase with spaces",
+      searchString: "TEST EXAMPLE",
+      expectedTokens: [
+        { value: "TEST", type: UrlbarTokenizer.TYPE.TEXT },
+        { value: "EXAMPLE", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "Mixed case with spaces",
+      searchString: "TeSt eXaMpLe",
+      expectedTokens: [
+        { value: "TeSt", type: UrlbarTokenizer.TYPE.TEXT },
+        { value: "eXaMpLe", type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
+    {
+      desc: "plain number",
+      searchString: "1001",
+      expectedTokens: [{ value: "1001", type: UrlbarTokenizer.TYPE.TEXT }],
+    },
+    {
+      desc: "data uri with spaces",
+      searchString: "data:text/html,oh hi?",
+      expectedTokens: [
+        {
+          value: "data:text/html,oh hi?",
+          type: UrlbarTokenizer.TYPE.POSSIBLE_URL,
+        },
+      ],
+    },
+    {
+      desc: "data uri with spaces ignored with other tokens",
+      searchString: "hi data:text/html,oh hi?",
+      expectedTokens: [
+        {
+          value: "hi",
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+        {
+          value: "data:text/html,oh",
+          type: UrlbarTokenizer.TYPE.POSSIBLE_URL,
+        },
+        {
+          value: "hi",
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+        {
+          value: UrlbarTokenizer.RESTRICT.SEARCH,
+          type: UrlbarTokenizer.TYPE.RESTRICT_SEARCH,
+        },
+      ],
+    },
+    {
+      desc: "whitelisted host",
+      searchString: "test whitelisted",
+      expectedTokens: [
+        {
+          value: "test",
+          type: UrlbarTokenizer.TYPE.TEXT,
+        },
+        {
+          value: "whitelisted",
+          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN,
+        },
+      ],
+    },
+  ];
+
+  Services.prefs.setBoolPref("browser.fixup.domainwhitelist.whitelisted", true);
+
+  for (let queryContext of testContexts) {
+    info(queryContext.desc);
+    queryContext.trimmedSearchString = queryContext.searchString.trim();
+    for (let token of queryContext.expectedTokens) {
+      token.lowerCaseValue = token.value.toLocaleLowerCase();
+    }
+    let newQueryContext = UrlbarTokenizer.tokenize(queryContext);
+    Assert.equal(
+      queryContext,
+      newQueryContext,
+      "The queryContext object is the same"
+    );
+    Assert.deepEqual(
+      queryContext.tokens,
+      queryContext.expectedTokens,
+      "Check the expected tokens"
+    );
+  }
+});
-- 
cgit v1.2.3