summaryrefslogtreecommitdiffstats
path: root/browser/components/urlbar/UrlbarSearchUtils.sys.mjs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /browser/components/urlbar/UrlbarSearchUtils.sys.mjs
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'browser/components/urlbar/UrlbarSearchUtils.sys.mjs')
-rw-r--r--browser/components/urlbar/UrlbarSearchUtils.sys.mjs419
1 files changed, 419 insertions, 0 deletions
diff --git a/browser/components/urlbar/UrlbarSearchUtils.sys.mjs b/browser/components/urlbar/UrlbarSearchUtils.sys.mjs
new file mode 100644
index 0000000000..55606a3130
--- /dev/null
+++ b/browser/components/urlbar/UrlbarSearchUtils.sys.mjs
@@ -0,0 +1,419 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Search service utilities for urlbar. The only reason these functions aren't
+ * a part of UrlbarUtils is that we want O(1) case-insensitive lookup for search
+ * aliases, and to do that we need to observe the search service, persistent
+ * state, and an init method. A separate object is easier.
+ */
+
+import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
+
+const lazy = {};
+
+ChromeUtils.defineESModuleGetters(lazy, {
+ UrlbarTokenizer: "resource:///modules/UrlbarTokenizer.sys.mjs",
+ UrlbarUtils: "resource:///modules/UrlbarUtils.sys.mjs",
+});
+
+const SEARCH_ENGINE_TOPIC = "browser-search-engine-modified";
+
+/**
+ * Search service utilities for urlbar.
+ */
+class SearchUtils {
+ constructor() {
+ this._refreshEnginesByAliasPromise = Promise.resolve();
+ this.QueryInterface = ChromeUtils.generateQI([
+ "nsIObserver",
+ "nsISupportsWeakReference",
+ ]);
+ XPCOMUtils.defineLazyPreferenceGetter(
+ this,
+ "separatePrivateDefaultUIEnabled",
+ "browser.search.separatePrivateDefault.ui.enabled",
+ false
+ );
+ XPCOMUtils.defineLazyPreferenceGetter(
+ this,
+ "separatePrivateDefault",
+ "browser.search.separatePrivateDefault",
+ false
+ );
+ }
+
+ /**
+ * Initializes the instance and also Services.search.
+ */
+ async init() {
+ if (!this._initPromise) {
+ this._initPromise = this._initInternal();
+ }
+ await this._initPromise;
+ }
+
+ /**
+ * Gets the engines whose domains match a given prefix.
+ *
+ * @param {string} prefix
+ * String containing the first part of the matching domain name(s).
+ * @param {object} [options]
+ * Options object.
+ * @param {boolean} [options.matchAllDomainLevels]
+ * Match at each sub domain, for example "a.b.c.com" will be matched at
+ * "a.b.c.com", "b.c.com", and "c.com". Partial matches are always returned
+ * after perfect matches.
+ * @param {boolean} [options.onlyEnabled]
+ * Match only engines that have not been disabled on the Search Preferences
+ * list.
+ * @returns {Array<nsISearchEngine>}
+ * An array of all matching engines. An empty array if there are none.
+ */
+ async enginesForDomainPrefix(
+ prefix,
+ { matchAllDomainLevels = false, onlyEnabled = false } = {}
+ ) {
+ try {
+ await this.init();
+ } catch {
+ return [];
+ }
+ prefix = prefix.toLowerCase();
+
+ let disabledEngines = onlyEnabled
+ ? Services.prefs
+ .getStringPref("browser.search.hiddenOneOffs", "")
+ .split(",")
+ .filter(e => !!e)
+ : [];
+
+ // Array of partially matched engines, added through matchPrefix().
+ let partialMatchEngines = [];
+ function matchPrefix(engine, engineHost) {
+ let parts = engineHost.split(".");
+ for (let i = 1; i < parts.length - 1; ++i) {
+ if (parts.slice(i).join(".").startsWith(prefix)) {
+ partialMatchEngines.push(engine);
+ }
+ }
+ }
+
+ // Array of perfectly matched engines. We also keep a Set for O(1) lookup.
+ let perfectMatchEngines = [];
+ let perfectMatchEngineSet = new Set();
+ for (let engine of await Services.search.getVisibleEngines()) {
+ if (disabledEngines.includes(engine.name)) {
+ continue;
+ }
+ let domain = engine.searchUrlDomain;
+ if (domain.startsWith(prefix) || domain.startsWith("www." + prefix)) {
+ perfectMatchEngines.push(engine);
+ perfectMatchEngineSet.add(engine);
+ }
+
+ if (matchAllDomainLevels) {
+ // The prefix may or may not contain part of the public suffix. If
+ // it contains a dot, we must match with and without the public suffix,
+ // otherwise it's sufficient to just match without it.
+ if (prefix.includes(".")) {
+ matchPrefix(engine, domain);
+ }
+ matchPrefix(
+ engine,
+ domain.substr(0, domain.length - engine.searchUrlPublicSuffix.length)
+ );
+ }
+ }
+
+ // Build the final list of matching engines. Partial matches come after
+ // perfect matches. Partial matches may be included in the perfect matches
+ // list, so be careful not to include the same engine more than once.
+ let engines = perfectMatchEngines;
+ let engineSet = perfectMatchEngineSet;
+ for (let engine of partialMatchEngines) {
+ if (!engineSet.has(engine)) {
+ engineSet.add(engine);
+ engines.push(engine);
+ }
+ }
+ return engines;
+ }
+
+ /**
+ * Gets the engine with a given alias.
+ *
+ * @param {string} alias
+ * A search engine alias. The alias string comparison is case insensitive.
+ * @param {string} [searchString]
+ * Optional. If provided, we also enforce that there must be a space after
+ * the alias in the search string.
+ * @returns {nsISearchEngine}
+ * The matching engine or null if there isn't one.
+ */
+ async engineForAlias(alias, searchString = null) {
+ try {
+ await Promise.all([this.init(), this._refreshEnginesByAliasPromise]);
+ } catch {
+ return null;
+ }
+
+ let engine = this._enginesByAlias.get(alias.toLocaleLowerCase());
+ if (engine && searchString) {
+ let query = lazy.UrlbarUtils.substringAfter(searchString, alias);
+ // Match an alias only when it has a space after it. If there's no trailing
+ // space, then continue to treat it as part of the search string.
+ if (!lazy.UrlbarTokenizer.REGEXP_SPACES_START.test(query)) {
+ return null;
+ }
+ }
+ return engine || null;
+ }
+
+ /**
+ * The list of engines with token ("@") aliases.
+ *
+ * @returns {Array}
+ * Array of objects { engine, tokenAliases } for token alias engines or
+ * null if SearchService has not initialized.
+ */
+ async tokenAliasEngines() {
+ try {
+ await this.init();
+ } catch {
+ return [];
+ }
+
+ let tokenAliasEngines = [];
+ for (let engine of await Services.search.getVisibleEngines()) {
+ let tokenAliases = this._aliasesForEngine(engine).filter(a =>
+ a.startsWith("@")
+ );
+ if (tokenAliases.length) {
+ tokenAliasEngines.push({ engine, tokenAliases });
+ }
+ }
+ return tokenAliasEngines;
+ }
+
+ /**
+ * @param {nsISearchEngine} engine
+ * The engine to get the root domain of
+ * @returns {string}
+ * The root domain of a search engine. e.g. If `engine` has the domain
+ * www.subdomain.rootdomain.com, `rootdomain` is returned. Returns the
+ * engine's domain if the engine's URL does not have a valid TLD.
+ */
+ getRootDomainFromEngine(engine) {
+ let domain = engine.searchUrlDomain;
+ let suffix = engine.searchUrlPublicSuffix;
+ if (!suffix) {
+ if (domain.endsWith(".test")) {
+ suffix = "test";
+ } else {
+ return domain;
+ }
+ }
+ domain = domain.substr(
+ 0,
+ // -1 to remove the trailing dot.
+ domain.length - suffix.length - 1
+ );
+ let domainParts = domain.split(".");
+ return domainParts.pop();
+ }
+
+ /**
+ * @param {boolean} [isPrivate]
+ * True if in a private context.
+ * @returns {nsISearchEngine}
+ * The default engine or null if SearchService has not initialized.
+ */
+ getDefaultEngine(isPrivate = false) {
+ if (!Services.search.hasSuccessfullyInitialized) {
+ return null;
+ }
+
+ return this.separatePrivateDefaultUIEnabled &&
+ this.separatePrivateDefault &&
+ isPrivate
+ ? Services.search.defaultPrivateEngine
+ : Services.search.defaultEngine;
+ }
+
+ /**
+ * To make analysis easier, we sanitize some engine names when
+ * recording telemetry about search mode. This function returns the sanitized
+ * key name to record in telemetry.
+ *
+ * @param {object} searchMode
+ * A search mode object. See UrlbarInput.setSearchMode.
+ * @returns {string}
+ * A sanitized scalar key, used to access Telemetry data.
+ */
+ getSearchModeScalarKey(searchMode) {
+ let scalarKey;
+ if (searchMode.engineName) {
+ let engine = Services.search.getEngineByName(searchMode.engineName);
+ let resultDomain = engine.searchUrlDomain;
+ // For built-in engines, sanitize the data in a few special cases to make
+ // analysis easier.
+ if (!engine.isAppProvided) {
+ scalarKey = "other";
+ } else if (resultDomain.includes("amazon.")) {
+ // Group all the localized Amazon sites together.
+ scalarKey = "Amazon";
+ } else if (resultDomain.endsWith("wikipedia.org")) {
+ // Group all the localized Wikipedia sites together.
+ scalarKey = "Wikipedia";
+ } else {
+ scalarKey = searchMode.engineName;
+ }
+ } else if (searchMode.source) {
+ scalarKey =
+ lazy.UrlbarUtils.getResultSourceName(searchMode.source) || "other";
+ }
+
+ return scalarKey;
+ }
+
+ async _initInternal() {
+ await Services.search.init();
+ await this._refreshEnginesByAlias();
+ Services.obs.addObserver(this, SEARCH_ENGINE_TOPIC, true);
+ }
+
+ async _refreshEnginesByAlias() {
+ // See the comment at the top of this file. The only reason we need this
+ // class is for O(1) case-insensitive lookup for search aliases, which is
+ // facilitated by _enginesByAlias.
+ this._enginesByAlias = new Map();
+ for (let engine of await Services.search.getVisibleEngines()) {
+ if (!engine.hidden) {
+ for (let alias of this._aliasesForEngine(engine)) {
+ this._enginesByAlias.set(alias, engine);
+ }
+ }
+ }
+ }
+
+ /**
+ * Checks if the given uri is constructed by the default search engine.
+ * When passing URI's to check against, it's best to use the "original" URI
+ * that was requested, as the server may have redirected the request.
+ *
+ * @param {nsIURI | string} uri
+ * The uri to check.
+ * @returns {string}
+ * The search terms used.
+ * Will return an empty string if it's not a default SERP
+ * or if the default engine hasn't been initialized.
+ */
+ getSearchTermIfDefaultSerpUri(uri) {
+ if (!Services.search.hasSuccessfullyInitialized || !uri) {
+ return "";
+ }
+
+ // Creating a URI can throw.
+ try {
+ if (typeof uri == "string") {
+ uri = Services.io.newURI(uri);
+ }
+ } catch (e) {
+ return "";
+ }
+
+ return Services.search.defaultEngine.searchTermFromResult(uri);
+ }
+
+ /**
+ * Compares the query parameters of two SERPs to see if one is equivalent to
+ * the other. URL `x` is equivalent to URL `y` if
+ * (a) `y` contains at least all the query parameters contained in `x`, and
+ * (b) The values of the query parameters contained in both `x` and `y `are
+ * the same.
+ *
+ * This function does not compare the SERPs' origins or pathnames.
+ * `historySerp` can have a different origin and/or pathname than
+ * `generatedSerp` and still be considered equivalent.
+ *
+ * @param {string} historySerp
+ * The SERP from history whose params should be contained in
+ * `generatedSerp`.
+ * @param {string} generatedSerp
+ * The search URL we would generate for a search result with the same search
+ * string used in `historySerp`.
+ * @param {Array} [ignoreParams]
+ * A list of params to ignore in the matching, i.e. params that can be
+ * contained in `historySerp` but not be in `generatedSerp`.
+ * @returns {boolean} True if `historySerp` can be deduped by `generatedSerp`.
+ */
+ serpsAreEquivalent(historySerp, generatedSerp, ignoreParams = []) {
+ let historyParams = new URL(historySerp).searchParams;
+ let generatedParams = new URL(generatedSerp).searchParams;
+ if (
+ !Array.from(historyParams.entries()).every(
+ ([key, value]) =>
+ ignoreParams.includes(key) || value === generatedParams.get(key)
+ )
+ ) {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Gets the aliases of an engine. For the user's convenience, we recognize
+ * token versions of all non-token aliases. For example, if the user has an
+ * alias of "foo", then we recognize both "foo" and "@foo" as aliases for
+ * foo's engine. The returned list is therefore a superset of
+ * `engine.aliases`. Additionally, the returned aliases will be lower-cased
+ * to make lookups and comparisons easier.
+ *
+ * @param {nsISearchEngine} engine
+ * The aliases of this search engine will be returned.
+ * @returns {Array}
+ * An array of lower-cased string aliases as described above.
+ */
+ _aliasesForEngine(engine) {
+ return engine.aliases.reduce((aliases, aliasWithCase) => {
+ // We store lower-cased aliases to make lookups and comparisons easier.
+ let alias = aliasWithCase.toLocaleLowerCase();
+ aliases.push(alias);
+ if (!alias.startsWith("@")) {
+ aliases.push("@" + alias);
+ }
+ return aliases;
+ }, []);
+ }
+
+ /**
+ * @param {string} engineName
+ * Name of the search engine.
+ * @returns {nsISearchEngine}
+ * The engine based on engineName or null if SearchService has not
+ * initialized.
+ */
+ getEngineByName(engineName) {
+ if (!Services.search.hasSuccessfullyInitialized) {
+ return null;
+ }
+
+ return Services.search.getEngineByName(engineName);
+ }
+
+ observe(subject, topic, data) {
+ switch (data) {
+ case "engine-added":
+ case "engine-changed":
+ case "engine-removed":
+ case "engine-default":
+ this._refreshEnginesByAliasPromise = this._refreshEnginesByAlias();
+ break;
+ }
+ }
+}
+
+export var UrlbarSearchUtils = new SearchUtils();