diff options
Diffstat (limited to 'browser/components/urlbar/tests/unit/test_tokenizer.js')
-rw-r--r-- | browser/components/urlbar/tests/unit/test_tokenizer.js | 449 |
1 files changed, 449 insertions, 0 deletions
diff --git a/browser/components/urlbar/tests/unit/test_tokenizer.js b/browser/components/urlbar/tests/unit/test_tokenizer.js new file mode 100644 index 0000000000..835d1a5909 --- /dev/null +++ b/browser/components/urlbar/tests/unit/test_tokenizer.js @@ -0,0 +1,449 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +add_task(async function test_tokenizer() { + let testContexts = [ + { desc: "Empty string", searchString: "", expectedTokens: [] }, + { desc: "Spaces string", searchString: " ", expectedTokens: [] }, + { + desc: "Single word string", + searchString: "test", + expectedTokens: [{ value: "test", type: UrlbarTokenizer.TYPE.TEXT }], + }, + { + desc: "Multi word string with mixed whitespace types", + searchString: " test1 test2\u1680test3\u2004test4\u1680", + expectedTokens: [ + { value: "test1", type: UrlbarTokenizer.TYPE.TEXT }, + { value: "test2", type: UrlbarTokenizer.TYPE.TEXT }, + { value: "test3", type: UrlbarTokenizer.TYPE.TEXT }, + { value: "test4", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "separate restriction char at beginning", + searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK} test`, + expectedTokens: [ + { + value: UrlbarTokenizer.RESTRICT.BOOKMARK, + type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, + }, + { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "separate restriction char at end", + searchString: `test ${UrlbarTokenizer.RESTRICT.BOOKMARK}`, + expectedTokens: [ + { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, + { + value: UrlbarTokenizer.RESTRICT.BOOKMARK, + type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, + }, + ], + }, + { + desc: "boundary restriction char at end", + searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`, + expectedTokens: [ + { + value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`, + type: UrlbarTokenizer.TYPE.TEXT, + }, + ], + }, + { + desc: "boundary search restriction char at end", + searchString: `test${UrlbarTokenizer.RESTRICT.SEARCH}`, + expectedTokens: [ + { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, + { + value: UrlbarTokenizer.RESTRICT.SEARCH, + type: UrlbarTokenizer.TYPE.RESTRICT_SEARCH, + }, + ], + }, + { + desc: "separate restriction char in the middle", + searchString: `test ${UrlbarTokenizer.RESTRICT.BOOKMARK} test`, + expectedTokens: [ + { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, + { + value: UrlbarTokenizer.RESTRICT.BOOKMARK, + type: UrlbarTokenizer.TYPE.TEXT, + }, + { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "restriction char in the middle", + searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`, + expectedTokens: [ + { + value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`, + type: UrlbarTokenizer.TYPE.TEXT, + }, + ], + }, + { + desc: "restriction char in the middle 2", + searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK} test`, + expectedTokens: [ + { + value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`, + type: UrlbarTokenizer.TYPE.TEXT, + }, + { value: `test`, type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "double boundary restriction char", + searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK}test${UrlbarTokenizer.RESTRICT.TITLE}`, + expectedTokens: [ + { + value: UrlbarTokenizer.RESTRICT.BOOKMARK, + type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, + }, + { + value: `test${UrlbarTokenizer.RESTRICT.TITLE}`, + type: UrlbarTokenizer.TYPE.TEXT, + }, + ], + }, + { + desc: "double non-combinable restriction char, single char string", + searchString: `t${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.SEARCH}`, + expectedTokens: [ + { + value: `t${UrlbarTokenizer.RESTRICT.BOOKMARK}`, + type: UrlbarTokenizer.TYPE.TEXT, + }, + { + value: UrlbarTokenizer.RESTRICT.SEARCH, + type: UrlbarTokenizer.TYPE.RESTRICT_SEARCH, + }, + ], + }, + { + desc: "only boundary restriction chars", + searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.TITLE}`, + expectedTokens: [ + { + value: UrlbarTokenizer.RESTRICT.BOOKMARK, + type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, + }, + { + value: UrlbarTokenizer.RESTRICT.TITLE, + type: UrlbarTokenizer.TYPE.RESTRICT_TITLE, + }, + ], + }, + { + desc: "only the boundary restriction char", + searchString: UrlbarTokenizer.RESTRICT.BOOKMARK, + expectedTokens: [ + { + value: UrlbarTokenizer.RESTRICT.BOOKMARK, + type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, + }, + ], + }, + // Some restriction chars may be # or ?, that are also valid path parts. + // The next 2 tests will check we consider those as part of url paths. + { + desc: "boundary # char on path", + searchString: "test/#", + expectedTokens: [ + { value: "test/#", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "boundary ? char on path", + searchString: "test/?", + expectedTokens: [ + { value: "test/?", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "multiple boundary restriction chars suffix", + searchString: `test ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG}`, + expectedTokens: [ + { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, + { + value: UrlbarTokenizer.RESTRICT.HISTORY, + type: UrlbarTokenizer.TYPE.TEXT, + }, + { + value: UrlbarTokenizer.RESTRICT.TAG, + type: UrlbarTokenizer.TYPE.RESTRICT_TAG, + }, + ], + }, + { + desc: "multiple boundary restriction chars prefix", + searchString: `${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG} test`, + expectedTokens: [ + { + value: UrlbarTokenizer.RESTRICT.HISTORY, + type: UrlbarTokenizer.TYPE.RESTRICT_HISTORY, + }, + { + value: UrlbarTokenizer.RESTRICT.TAG, + type: UrlbarTokenizer.TYPE.TEXT, + }, + { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "Math with division", + searchString: "3.6/1.2", + expectedTokens: [{ value: "3.6/1.2", type: UrlbarTokenizer.TYPE.TEXT }], + }, + { + desc: "ipv4 in bookmarks", + searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK} 192.168.1.1:8`, + expectedTokens: [ + { + value: UrlbarTokenizer.RESTRICT.BOOKMARK, + type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, + }, + { value: "192.168.1.1:8", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, + ], + }, + { + desc: "email", + searchString: "test@mozilla.com", + expectedTokens: [ + { value: "test@mozilla.com", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "email2", + searchString: "test.test@mozilla.co.uk", + expectedTokens: [ + { value: "test.test@mozilla.co.uk", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "protocol", + searchString: "http://test", + expectedTokens: [ + { value: "http://test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "bogus protocol with host (we allow visits to http://///example.com)", + searchString: "http:///test", + expectedTokens: [ + { value: "http:///test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "file protocol with path", + searchString: "file:///home", + expectedTokens: [ + { value: "file:///home", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "almost a protocol", + searchString: "http:", + expectedTokens: [ + { value: "http:", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "almost a protocol 2", + searchString: "http:/", + expectedTokens: [ + { value: "http:/", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "bogus protocol (we allow visits to http://///example.com)", + searchString: "http:///", + expectedTokens: [ + { value: "http:///", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "file protocol", + searchString: "file:///", + expectedTokens: [ + { value: "file:///", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "userinfo", + searchString: "user:pass@test", + expectedTokens: [ + { value: "user:pass@test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, + ], + }, + { + desc: "domain", + searchString: "www.mozilla.org", + expectedTokens: [ + { + value: "www.mozilla.org", + type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN, + }, + ], + }, + { + desc: "data uri", + searchString: "data:text/plain,Content", + expectedTokens: [ + { + value: "data:text/plain,Content", + type: UrlbarTokenizer.TYPE.POSSIBLE_URL, + }, + ], + }, + { + desc: "ipv6", + searchString: "[2001:db8::1]", + expectedTokens: [ + { value: "[2001:db8::1]", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, + ], + }, + { + desc: "numeric domain", + searchString: "test1001.com", + expectedTokens: [ + { value: "test1001.com", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, + ], + }, + { + desc: "invalid ip", + searchString: "192.2134.1.2", + expectedTokens: [ + { value: "192.2134.1.2", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "ipv4", + searchString: "1.2.3.4", + expectedTokens: [ + { value: "1.2.3.4", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, + ], + }, + { + desc: "host/path", + searchString: "test/test", + expectedTokens: [ + { value: "test/test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, + ], + }, + { + desc: "percent encoded string", + searchString: "%E6%97%A5%E6%9C%AC", + expectedTokens: [ + { value: "%E6%97%A5%E6%9C%AC", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "Uppercase", + searchString: "TEST", + expectedTokens: [{ value: "TEST", type: UrlbarTokenizer.TYPE.TEXT }], + }, + { + desc: "Mixed case 1", + searchString: "TeSt", + expectedTokens: [{ value: "TeSt", type: UrlbarTokenizer.TYPE.TEXT }], + }, + { + desc: "Mixed case 2", + searchString: "tEsT", + expectedTokens: [{ value: "tEsT", type: UrlbarTokenizer.TYPE.TEXT }], + }, + { + desc: "Uppercase with spaces", + searchString: "TEST EXAMPLE", + expectedTokens: [ + { value: "TEST", type: UrlbarTokenizer.TYPE.TEXT }, + { value: "EXAMPLE", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "Mixed case with spaces", + searchString: "TeSt eXaMpLe", + expectedTokens: [ + { value: "TeSt", type: UrlbarTokenizer.TYPE.TEXT }, + { value: "eXaMpLe", type: UrlbarTokenizer.TYPE.TEXT }, + ], + }, + { + desc: "plain number", + searchString: "1001", + expectedTokens: [{ value: "1001", type: UrlbarTokenizer.TYPE.TEXT }], + }, + { + desc: "data uri with spaces", + searchString: "data:text/html,oh hi?", + expectedTokens: [ + { + value: "data:text/html,oh hi?", + type: UrlbarTokenizer.TYPE.POSSIBLE_URL, + }, + ], + }, + { + desc: "data uri with spaces ignored with other tokens", + searchString: "hi data:text/html,oh hi?", + expectedTokens: [ + { + value: "hi", + type: UrlbarTokenizer.TYPE.TEXT, + }, + { + value: "data:text/html,oh", + type: UrlbarTokenizer.TYPE.POSSIBLE_URL, + }, + { + value: "hi", + type: UrlbarTokenizer.TYPE.TEXT, + }, + { + value: UrlbarTokenizer.RESTRICT.SEARCH, + type: UrlbarTokenizer.TYPE.RESTRICT_SEARCH, + }, + ], + }, + { + desc: "whitelisted host", + searchString: "test whitelisted", + expectedTokens: [ + { + value: "test", + type: UrlbarTokenizer.TYPE.TEXT, + }, + { + value: "whitelisted", + type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN, + }, + ], + }, + ]; + + Services.prefs.setBoolPref("browser.fixup.domainwhitelist.whitelisted", true); + + for (let queryContext of testContexts) { + info(queryContext.desc); + queryContext.trimmedSearchString = queryContext.searchString.trim(); + for (let token of queryContext.expectedTokens) { + token.lowerCaseValue = token.value.toLocaleLowerCase(); + } + let newQueryContext = UrlbarTokenizer.tokenize(queryContext); + Assert.equal( + queryContext, + newQueryContext, + "The queryContext object is the same" + ); + Assert.deepEqual( + queryContext.tokens, + queryContext.expectedTokens, + "Check the expected tokens" + ); + } +}); |