/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- * vim: sw=2 ts=2 sts=2 expandtab * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /** * This component handles fixing up URIs, by correcting obvious typos and adding * missing schemes. * URI references: * http://www.faqs.org/rfcs/rfc1738.html * http://www.faqs.org/rfcs/rfc2396.html */ // TODO (Bug 1641220) getFixupURIInfo has a complex logic, that likely could be // simplified, but the risk of regressing its behavior is high. /* eslint complexity: ["error", 43] */ import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; import { AppConstants } from "resource://gre/modules/AppConstants.sys.mjs"; const lazy = {}; XPCOMUtils.defineLazyServiceGetter( lazy, "externalProtocolService", "@mozilla.org/uriloader/external-protocol-service;1", "nsIExternalProtocolService" ); XPCOMUtils.defineLazyServiceGetter( lazy, "defaultProtocolHandler", "@mozilla.org/network/protocol;1?name=default", "nsIProtocolHandler" ); XPCOMUtils.defineLazyServiceGetter( lazy, "fileProtocolHandler", "@mozilla.org/network/protocol;1?name=file", "nsIFileProtocolHandler" ); XPCOMUtils.defineLazyServiceGetter( lazy, "handlerService", "@mozilla.org/uriloader/handler-service;1", "nsIHandlerService" ); XPCOMUtils.defineLazyPreferenceGetter( lazy, "fixupSchemeTypos", "browser.fixup.typo.scheme", true ); XPCOMUtils.defineLazyPreferenceGetter( lazy, "dnsFirstForSingleWords", "browser.fixup.dns_first_for_single_words", false ); XPCOMUtils.defineLazyPreferenceGetter( lazy, "keywordEnabled", "keyword.enabled", true ); XPCOMUtils.defineLazyPreferenceGetter( lazy, "alternateProtocol", "browser.fixup.alternate.protocol", "https" ); XPCOMUtils.defineLazyPreferenceGetter( lazy, "dnsResolveFullyQualifiedNames", "browser.urlbar.dnsResolveFullyQualifiedNames", true ); const { FIXUP_FLAG_NONE, FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP, FIXUP_FLAGS_MAKE_ALTERNATE_URI, FIXUP_FLAG_PRIVATE_CONTEXT, FIXUP_FLAG_FIX_SCHEME_TYPOS, } = Ci.nsIURIFixup; const COMMON_PROTOCOLS = ["http", "https", "file"]; // Regex used to identify user:password tokens in url strings. // This is not a strict valid characters check, because we try to fixup this // part of the url too. ChromeUtils.defineLazyGetter( lazy, "userPasswordRegex", () => /^([a-z+.-]+:\/{0,3})*([^\/@]+@).+/i ); // Regex used to identify the string that starts with port expression. ChromeUtils.defineLazyGetter(lazy, "portRegex", () => /^:\d{1,5}([?#/]|$)/); // Regex used to identify numbers. ChromeUtils.defineLazyGetter(lazy, "numberRegex", () => /^[0-9]+(\.[0-9]+)?$/); // Regex used to identify tab separated content (having at least 2 tabs). ChromeUtils.defineLazyGetter(lazy, "maxOneTabRegex", () => /^[^\t]*\t?[^\t]*$/); // Regex used to test if a string with a protocol might instead be a url // without a protocol but with a port: // // : or // :/ // // Where is a string of alphanumeric characters and dashes // separated by dots. // and is a 5 or less digits. This actually breaks the rfc2396 // definition of a scheme which allows dots in schemes. // // Note: // People expecting this to work with // :@:/ will be disappointed! // // Note: Parser could be a lot tighter, tossing out silly hostnames // such as those containing consecutive dots and so on. ChromeUtils.defineLazyGetter( lazy, "possiblyHostPortRegex", () => /^[a-z0-9-]+(\.[a-z0-9-]+)*:[0-9]{1,5}([/?#]|$)/i ); // Regex used to strip newlines. ChromeUtils.defineLazyGetter(lazy, "newLinesRegex", () => /[\r\n]/g); // Regex used to match a possible protocol. // This resembles the logic in Services.io.extractScheme, thus \t is admitted // and stripped later. We don't use Services.io.extractScheme because of // performance bottleneck caused by crossing XPConnect. ChromeUtils.defineLazyGetter( lazy, "possibleProtocolRegex", () => /^([a-z][a-z0-9.+\t-]*)(:|;)?(\/\/)?/i ); // Regex used to match IPs. Note that these are not made to validate IPs, but // just to detect strings that look like an IP. They also skip protocol. // For IPv4 this also accepts a shorthand format with just 2 dots. ChromeUtils.defineLazyGetter( lazy, "IPv4LikeRegex", () => /^(?:[a-z+.-]+:\/*(?!\/))?(?:\d{1,3}\.){2,3}\d{1,3}(?::\d+|\/)?/i ); ChromeUtils.defineLazyGetter( lazy, "IPv6LikeRegex", () => /^(?:[a-z+.-]+:\/*(?!\/))?\[(?:[0-9a-f]{0,4}:){0,7}[0-9a-f]{0,4}\]?(?::\d+|\/)?/i ); // Regex used to detect spaces in URL credentials. ChromeUtils.defineLazyGetter( lazy, "DetectSpaceInCredentialsRegex", () => /^[^/]*\s[^/]*@/ ); // Cache of known domains. ChromeUtils.defineLazyGetter(lazy, "knownDomains", () => { const branch = "browser.fixup.domainwhitelist."; let domains = new Set( Services.prefs .getChildList(branch) .filter(p => Services.prefs.getBoolPref(p, false)) .map(p => p.substring(branch.length)) ); // Hold onto the observer to avoid it being GC-ed. domains._observer = { observe(subject, topic, data) { let domain = data.substring(branch.length); if (Services.prefs.getBoolPref(data, false)) { domains.add(domain); } else { domains.delete(domain); } }, QueryInterface: ChromeUtils.generateQI([ "nsIObserver", "nsISupportsWeakReference", ]), }; Services.prefs.addObserver(branch, domains._observer, true); return domains; }); // Cache of known suffixes. // This works differently from the known domains, because when we examine a // domain we can't tell how many dot-separated parts constitute the suffix. // We create a Map keyed by the last dotted part, containing a Set of // all the suffixes ending with that part: // "two" => ["two"] // "three" => ["some.three", "three"] // When searching we can restrict the linear scan based on the last part. // The ideal structure for this would be a Directed Acyclic Word Graph, but // since we expect this list to be small it's not worth the complication. ChromeUtils.defineLazyGetter(lazy, "knownSuffixes", () => { const branch = "browser.fixup.domainsuffixwhitelist."; let suffixes = new Map(); let prefs = Services.prefs .getChildList(branch) .filter(p => Services.prefs.getBoolPref(p, false)); for (let pref of prefs) { let suffix = pref.substring(branch.length); let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1); if (lastPart) { let entries = suffixes.get(lastPart); if (!entries) { entries = new Set(); suffixes.set(lastPart, entries); } entries.add(suffix); } } // Hold onto the observer to avoid it being GC-ed. suffixes._observer = { observe(subject, topic, data) { let suffix = data.substring(branch.length); let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1); let entries = suffixes.get(lastPart); if (Services.prefs.getBoolPref(data, false)) { // Add the suffix. if (!entries) { entries = new Set(); suffixes.set(lastPart, entries); } entries.add(suffix); } else if (entries) { // Remove the suffix. entries.delete(suffix); if (!entries.size) { suffixes.delete(lastPart); } } }, QueryInterface: ChromeUtils.generateQI([ "nsIObserver", "nsISupportsWeakReference", ]), }; Services.prefs.addObserver(branch, suffixes._observer, true); return suffixes; }); export function URIFixup() { // There are cases that nsIExternalProtocolService.externalProtocolHandlerExists() does // not work well and returns always true due to flatpak. In this case, in order to // fallback to nsIHandlerService.exits(), we test whether can trust // nsIExternalProtocolService here. this._trustExternalProtocolService = !lazy.externalProtocolService.externalProtocolHandlerExists( `__dummy${Date.now()}__` ); } URIFixup.prototype = { get FIXUP_FLAG_NONE() { return FIXUP_FLAG_NONE; }, get FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP() { return FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP; }, get FIXUP_FLAGS_MAKE_ALTERNATE_URI() { return FIXUP_FLAGS_MAKE_ALTERNATE_URI; }, get FIXUP_FLAG_PRIVATE_CONTEXT() { return FIXUP_FLAG_PRIVATE_CONTEXT; }, get FIXUP_FLAG_FIX_SCHEME_TYPOS() { return FIXUP_FLAG_FIX_SCHEME_TYPOS; }, getFixupURIInfo(uriString, fixupFlags = FIXUP_FLAG_NONE) { let isPrivateContext = fixupFlags & FIXUP_FLAG_PRIVATE_CONTEXT; let untrimmedURIString = uriString; // Eliminate embedded newlines, which single-line text fields now allow, // and cleanup the empty spaces and tabs that might be on each end. uriString = uriString.trim().replace(lazy.newLinesRegex, ""); if (!uriString) { throw new Components.Exception( "Should pass a non-null uri", Cr.NS_ERROR_FAILURE ); } let info = new URIFixupInfo(uriString); const { scheme, fixedSchemeUriString, fixupChangedProtocol } = extractScheme(uriString, fixupFlags); uriString = fixedSchemeUriString; info.fixupChangedProtocol = fixupChangedProtocol; if (scheme == "view-source") { let { preferredURI, postData } = fixupViewSource(uriString, fixupFlags); info.preferredURI = info.fixedURI = preferredURI; info.postData = postData; return info; } if (scheme.length < 2) { // Check if it is a file path. We skip most schemes because the only case // where a file path may look like having a scheme is "X:" on Windows. let fileURI = fileURIFixup(uriString); if (fileURI) { info.preferredURI = info.fixedURI = fileURI; info.fixupChangedProtocol = true; return info; } } const isCommonProtocol = COMMON_PROTOCOLS.includes(scheme); let canHandleProtocol = scheme && (isCommonProtocol || Services.io.getProtocolHandler(scheme) != lazy.defaultProtocolHandler || this._isKnownExternalProtocol(scheme)); if ( canHandleProtocol || // If it's an unknown handler and the given URL looks like host:port or // has a user:password we can't pass it to the external protocol handler. // We'll instead try fixing it with http later. (!lazy.possiblyHostPortRegex.test(uriString) && !lazy.userPasswordRegex.test(uriString)) ) { // Just try to create an URL out of it. try { info.fixedURI = Services.io.newURI(uriString); } catch (ex) { if (ex.result != Cr.NS_ERROR_MALFORMED_URI) { throw ex; } } } // We're dealing with a theoretically valid URI but we have no idea how to // load it. (e.g. "christmas:humbug") // It's more likely the user wants to search, and so we chuck this over to // their preferred search provider. // TODO (Bug 1588118): Should check FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP // instead of FIXUP_FLAG_FIX_SCHEME_TYPOS. if ( info.fixedURI && lazy.keywordEnabled && fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS && scheme && !canHandleProtocol ) { tryKeywordFixupForURIInfo(uriString, info, isPrivateContext); } if (info.fixedURI) { if (!info.preferredURI) { maybeSetAlternateFixedURI(info, fixupFlags); info.preferredURI = info.fixedURI; } fixupConsecutiveDotsHost(info); return info; } // Fix up protocol string before calling KeywordURIFixup, because // it cares about the hostname of such URIs. // Prune duff protocol schemes: // ://totallybroken.url.com // //shorthand.url.com let inputHadDuffProtocol = uriString.startsWith("://") || uriString.startsWith("//"); if (inputHadDuffProtocol) { uriString = uriString.replace(/^:?\/\//, ""); } // Avoid fixing up content that looks like tab-separated values. // Assume that 1 tab is accidental, but more than 1 implies this is // supposed to be tab-separated content. if ( !isCommonProtocol && lazy.maxOneTabRegex.test(uriString) && !lazy.DetectSpaceInCredentialsRegex.test(untrimmedURIString) ) { let uriWithProtocol = fixupURIProtocol(uriString); if (uriWithProtocol) { info.fixedURI = uriWithProtocol; info.fixupChangedProtocol = true; info.wasSchemelessInput = true; maybeSetAlternateFixedURI(info, fixupFlags); info.preferredURI = info.fixedURI; // Check if it's a forced visit. The user can enforce a visit by // appending a slash, but the string must be in a valid uri format. if (uriString.endsWith("/")) { fixupConsecutiveDotsHost(info); return info; } } } // Handle "www." as a URI. const asciiHost = info.fixedURI?.asciiHost; if ( asciiHost?.length > 4 && asciiHost?.startsWith("www.") && asciiHost?.lastIndexOf(".") == 3 ) { return info; } // Memoize the public suffix check, since it may be expensive and should // only run once when necessary. let suffixInfo; function checkSuffix(info) { if (!suffixInfo) { suffixInfo = checkAndFixPublicSuffix(info); } return suffixInfo; } // See if it is a keyword and whether a keyword must be fixed up. if ( lazy.keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP && !inputHadDuffProtocol && !checkSuffix(info).suffix && keywordURIFixup(uriString, info, isPrivateContext) ) { fixupConsecutiveDotsHost(info); return info; } if ( info.fixedURI && (!info.fixupChangedProtocol || !checkSuffix(info).hasUnknownSuffix) ) { fixupConsecutiveDotsHost(info); return info; } // If we still haven't been able to construct a valid URI, try to force a // keyword match. if (lazy.keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP) { tryKeywordFixupForURIInfo(info.originalInput, info, isPrivateContext); } if (!info.preferredURI) { // We couldn't salvage anything. throw new Components.Exception( "Couldn't build a valid uri", Cr.NS_ERROR_MALFORMED_URI ); } fixupConsecutiveDotsHost(info); return info; }, webNavigationFlagsToFixupFlags(href, navigationFlags) { try { Services.io.newURI(href); // Remove LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP for valid uris. navigationFlags &= ~Ci.nsIWebNavigation.LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP; } catch (ex) {} let fixupFlags = FIXUP_FLAG_NONE; if ( navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP ) { fixupFlags |= FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP; } if (navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_FIXUP_SCHEME_TYPOS) { fixupFlags |= FIXUP_FLAG_FIX_SCHEME_TYPOS; } return fixupFlags; }, keywordToURI(keyword, isPrivateContext) { if (Services.appinfo.processType == Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT) { // There's no search service in the content process, thus all the calls // from it that care about keywords conversion should go through the // parent process. throw new Components.Exception( "Can't invoke URIFixup in the content process", Cr.NS_ERROR_NOT_AVAILABLE ); } let info = new URIFixupInfo(keyword); // Strip leading "?" and leading/trailing spaces from aKeyword if (keyword.startsWith("?")) { keyword = keyword.substring(1); } keyword = keyword.trim(); if (!Services.search.hasSuccessfullyInitialized) { return info; } // Try falling back to the search service's default search engine // We must use an appropriate search engine depending on the private // context. let engine = isPrivateContext ? Services.search.defaultPrivateEngine : Services.search.defaultEngine; // We allow default search plugins to specify alternate parameters that are // specific to keyword searches. let responseType = null; if (engine.supportsResponseType("application/x-moz-keywordsearch")) { responseType = "application/x-moz-keywordsearch"; } let submission = engine.getSubmission(keyword, responseType, "keyword"); if ( !submission || // For security reasons (avoid redirecting to file, data, or other unsafe // protocols) we only allow fixup to http/https search engines. !submission.uri.scheme.startsWith("http") ) { throw new Components.Exception( "Invalid search submission uri", Cr.NS_ERROR_NOT_AVAILABLE ); } let submissionPostDataStream = submission.postData; if (submissionPostDataStream) { info.postData = submissionPostDataStream; } info.keywordProviderName = engine.name; info.keywordAsSent = keyword; info.preferredURI = submission.uri; return info; }, forceHttpFixup(uriString) { if (!uriString) { throw new Components.Exception( "Should pass a non-null uri", Cr.NS_ERROR_FAILURE ); } let info = new URIFixupInfo(uriString); let { scheme, fixedSchemeUriString, fixupChangedProtocol } = extractScheme( uriString, FIXUP_FLAG_FIX_SCHEME_TYPOS ); if (scheme != "http" && scheme != "https") { throw new Components.Exception( "Scheme should be either http or https", Cr.NS_ERROR_FAILURE ); } info.fixupChangedProtocol = fixupChangedProtocol; info.fixedURI = Services.io.newURI(fixedSchemeUriString); let host = info.fixedURI.host; if (host != "http" && host != "https" && host != "localhost") { let modifiedHostname = maybeAddPrefixAndSuffix(host); updateHostAndScheme(info, modifiedHostname); info.preferredURI = info.fixedURI; } return info; }, checkHost(uri, listener, originAttributes) { let { displayHost, asciiHost } = uri; if (!displayHost) { throw new Components.Exception( "URI must have displayHost", Cr.NS_ERROR_FAILURE ); } if (!asciiHost) { throw new Components.Exception( "URI must have asciiHost", Cr.NS_ERROR_FAILURE ); } let isIPv4Address = host => { let parts = host.split("."); if (parts.length != 4) { return false; } return parts.every(part => { let n = parseInt(part, 10); return n >= 0 && n <= 255; }); }; // Avoid showing fixup information if we're suggesting an IP. Note that // decimal representations of IPs are normalized to a 'regular' // dot-separated IP address by network code, but that only happens for // numbers that don't overflow. Longer numbers do not get normalized, // but still work to access IP addresses. So for instance, // 1097347366913 (ff7f000001) gets resolved by using the final bytes, // making it the same as 7f000001, which is 127.0.0.1 aka localhost. // While 2130706433 would get normalized by network, 1097347366913 // does not, and we have to deal with both cases here: if (isIPv4Address(asciiHost) || /^(?:\d+|0x[a-f0-9]+)$/i.test(asciiHost)) { return; } // For dotless hostnames, we want to ensure this ends with a '.' but don't // want the . showing up in the UI if we end up notifying the user, so we // use a separate variable. let lookupName = displayHost; if (lazy.dnsResolveFullyQualifiedNames && !lookupName.includes(".")) { lookupName += "."; } Services.obs.notifyObservers(null, "uri-fixup-check-dns"); Services.dns.asyncResolve( lookupName, Ci.nsIDNSService.RESOLVE_TYPE_DEFAULT, 0, null, listener, Services.tm.mainThread, originAttributes ); }, isDomainKnown, _isKnownExternalProtocol(scheme) { if (this._trustExternalProtocolService) { return lazy.externalProtocolService.externalProtocolHandlerExists(scheme); } try { // nsIExternalProtocolService.getProtocolHandlerInfo() on Android throws // error due to not implemented. return lazy.handlerService.exists( lazy.externalProtocolService.getProtocolHandlerInfo(scheme) ); } catch (e) { return false; } }, classID: Components.ID("{c6cf88b7-452e-47eb-bdc9-86e3561648ef}"), QueryInterface: ChromeUtils.generateQI(["nsIURIFixup"]), }; export function URIFixupInfo(originalInput = "") { this._originalInput = originalInput; } URIFixupInfo.prototype = { set consumer(consumer) { this._consumer = consumer || null; }, get consumer() { return this._consumer || null; }, set preferredURI(uri) { this._preferredURI = uri; }, get preferredURI() { return this._preferredURI || null; }, set fixedURI(uri) { this._fixedURI = uri; }, get fixedURI() { return this._fixedURI || null; }, set keywordProviderName(name) { this._keywordProviderName = name; }, get keywordProviderName() { return this._keywordProviderName || ""; }, set keywordAsSent(keyword) { this._keywordAsSent = keyword; }, get keywordAsSent() { return this._keywordAsSent || ""; }, set wasSchemelessInput(changed) { this._wasSchemelessInput = changed; }, get wasSchemelessInput() { return !!this._wasSchemelessInput; }, set fixupChangedProtocol(changed) { this._fixupChangedProtocol = changed; }, get fixupChangedProtocol() { return !!this._fixupChangedProtocol; }, set fixupCreatedAlternateURI(changed) { this._fixupCreatedAlternateURI = changed; }, get fixupCreatedAlternateURI() { return !!this._fixupCreatedAlternateURI; }, set originalInput(input) { this._originalInput = input; }, get originalInput() { return this._originalInput || ""; }, set postData(postData) { this._postData = postData; }, get postData() { return this._postData || null; }, classID: Components.ID("{33d75835-722f-42c0-89cc-44f328e56a86}"), QueryInterface: ChromeUtils.generateQI(["nsIURIFixupInfo"]), }; // Helpers /** * Implementation of isDomainKnown, so we don't have to go through the * service. * @param {string} asciiHost * @returns {boolean} whether the domain is known */ function isDomainKnown(asciiHost) { if (lazy.dnsFirstForSingleWords) { return true; } // Check if this domain is known as an actual // domain (which will prevent a keyword query) // Note that any processing of the host here should stay in sync with // code in the front-end(s) that set the pref. let lastDotIndex = asciiHost.lastIndexOf("."); if (lastDotIndex == asciiHost.length - 1) { asciiHost = asciiHost.substring(0, asciiHost.length - 1); lastDotIndex = asciiHost.lastIndexOf("."); } if (lazy.knownDomains.has(asciiHost.toLowerCase())) { return true; } // If there's no dot or only a leading dot we are done, otherwise we'll check // against the known suffixes. if (lastDotIndex <= 0) { return false; } // Don't use getPublicSuffix here, since the suffix is not in the PSL, // thus it couldn't tell if the suffix is made up of one or multiple // dot-separated parts. let lastPart = asciiHost.substr(lastDotIndex + 1); let suffixes = lazy.knownSuffixes.get(lastPart); if (suffixes) { return Array.from(suffixes).some(s => asciiHost.endsWith(s)); } return false; } /** * Checks the suffix of info.fixedURI against the Public Suffix List. * If the suffix is unknown due to a typo this will try to fix it up. * @param {URIFixupInfo} info about the uri to check. * @note this may modify the public suffix of info.fixedURI. * @returns {object} result The lookup result. * @returns {string} result.suffix The public suffix if one can be identified. * @returns {boolean} result.hasUnknownSuffix True when the suffix is not in the * Public Suffix List and it's not in knownSuffixes. False in the other cases. */ function checkAndFixPublicSuffix(info) { let uri = info.fixedURI; let asciiHost = uri?.asciiHost; if ( !asciiHost || !asciiHost.includes(".") || asciiHost.endsWith(".") || isDomainKnown(asciiHost) ) { return { suffix: "", hasUnknownSuffix: false }; } // Quick bailouts for most common cases, according to Alexa Top 1 million. if ( /^\w/.test(asciiHost) && (asciiHost.endsWith(".com") || asciiHost.endsWith(".net") || asciiHost.endsWith(".org") || asciiHost.endsWith(".ru") || asciiHost.endsWith(".de")) ) { return { suffix: asciiHost.substring(asciiHost.lastIndexOf(".") + 1), hasUnknownSuffix: false, }; } try { let suffix = Services.eTLD.getKnownPublicSuffix(uri); if (suffix) { return { suffix, hasUnknownSuffix: false }; } } catch (ex) { return { suffix: "", hasUnknownSuffix: false }; } // Suffix is unknown, try to fix most common 3 chars TLDs typos. // .com is the most commonly mistyped tld, so it has more cases. let suffix = Services.eTLD.getPublicSuffix(uri); if (!suffix || lazy.numberRegex.test(suffix)) { return { suffix: "", hasUnknownSuffix: false }; } for (let [typo, fixed] of [ ["ocm", "com"], ["con", "com"], ["cmo", "com"], ["xom", "com"], ["vom", "com"], ["cpm", "com"], ["com'", "com"], ["ent", "net"], ["ner", "net"], ["nte", "net"], ["met", "net"], ["rog", "org"], ["ogr", "org"], ["prg", "org"], ["orh", "org"], ]) { if (suffix == typo) { let host = uri.host.substring(0, uri.host.length - typo.length) + fixed; let updatePreferredURI = info.preferredURI == info.fixedURI; info.fixedURI = uri.mutate().setHost(host).finalize(); if (updatePreferredURI) { info.preferredURI = info.fixedURI; } return { suffix: fixed, hasUnknownSuffix: false }; } } return { suffix: "", hasUnknownSuffix: true }; } function tryKeywordFixupForURIInfo(uriString, fixupInfo, isPrivateContext) { try { let keywordInfo = Services.uriFixup.keywordToURI( uriString, isPrivateContext ); fixupInfo.keywordProviderName = keywordInfo.keywordProviderName; fixupInfo.keywordAsSent = keywordInfo.keywordAsSent; fixupInfo.preferredURI = keywordInfo.preferredURI; return true; } catch (ex) {} return false; } /** * This generates an alternate fixedURI, by adding a prefix and a suffix to * the fixedURI host, if and only if the protocol is http. It should _never_ * modify URIs with other protocols. * @param {URIFixupInfo} info an URIInfo object * @param {integer} fixupFlags the fixup flags * @returns {boolean} Whether an alternate uri was generated */ function maybeSetAlternateFixedURI(info, fixupFlags) { let uri = info.fixedURI; if ( !(fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) || // Code only works for http. Not for any other protocol including https! !uri.schemeIs("http") || // Security - URLs with user / password info should NOT be fixed up uri.userPass || // Don't fix up hosts with ports uri.port != -1 ) { return false; } let oldHost = uri.host; // Don't create an alternate uri for localhost, because it would be confusing. // Ditto for 'http' and 'https' as these are frequently the result of typos, e.g. // 'https//foo' (note missing : ). if (oldHost == "localhost" || oldHost == "http" || oldHost == "https") { return false; } // Get the prefix and suffix to stick onto the new hostname. By default these // are www. & .com but they could be any other value, e.g. www. & .org let newHost = maybeAddPrefixAndSuffix(oldHost); if (newHost == oldHost) { return false; } return updateHostAndScheme(info, newHost); } /** * Try to fixup a file URI. * @param {string} uriString The file URI to fix. * @returns {nsIURI} a fixed uri or null. * @note FileURIFixup only returns a URI if it has to add the file: protocol. */ function fileURIFixup(uriString) { let attemptFixup = false; let path = uriString; if (AppConstants.platform == "win") { // Check for "\"" in the url-string, just a drive (e.g. C:), // or 'A:/...' where the "protocol" is also a single letter. attemptFixup = uriString.includes("\\") || (uriString[1] == ":" && (uriString.length == 2 || uriString[2] == "/")); if (uriString[1] == ":" && uriString[2] == "/") { path = uriString.replace(/\//g, "\\"); } } else { // UNIX: Check if it starts with "/" or "~". attemptFixup = /^[~/]/.test(uriString); } if (attemptFixup) { try { // Test if this is a valid path by trying to create a local file // object. The URL of that is returned if successful. let file = Cc["@mozilla.org/file/local;1"].createInstance(Ci.nsIFile); file.initWithPath(path); return Services.io.newURI( lazy.fileProtocolHandler.getURLSpecFromActualFile(file) ); } catch (ex) { // Not a file uri. } } return null; } /** * Tries to fixup a string to an nsIURI by adding the default protocol. * * Should fix things like: * no-scheme.com * ftp.no-scheme.com * ftp4.no-scheme.com * no-scheme.com/query?foo=http://www.foo.com * user:pass@no-scheme.com * * @param {string} uriString The string to fixup. * @returns {nsIURI} an nsIURI built adding the default protocol to the string, * or null if fixing was not possible. */ function fixupURIProtocol(uriString) { let schemePos = uriString.indexOf("://"); if (schemePos == -1 || schemePos > uriString.search(/[:\/]/)) { uriString = "http://" + uriString; } try { return Services.io.newURI(uriString); } catch (ex) { // We generated an invalid uri. } return null; } /** * Tries to fixup a string to a search url. * @param {string} uriString the string to fixup. * @param {URIFixupInfo} fixupInfo The fixup info object, modified in-place. * @param {boolean} isPrivateContext Whether this happens in a private context. * @param {nsIInputStream} postData optional POST data for the search * @returns {boolean} Whether the keyword fixup was succesful. */ function keywordURIFixup(uriString, fixupInfo, isPrivateContext) { // Here is a few examples of strings that should be searched: // "what is mozilla" // "what is mozilla?" // "docshell site:mozilla.org" - has a space in the origin part // "?site:mozilla.org - anything that begins with a question mark // "mozilla'.org" - Things that have a quote before the first dot/colon // "mozilla/test" - unknown host // ".mozilla", "mozilla." - starts or ends with a dot () // "user@nonQualifiedHost" // These other strings should not be searched, because they could be URIs: // "www.blah.com" - Domain with a standard or known suffix // "knowndomain" - known domain // "nonQualifiedHost:8888?something" - has a port // "user:pass@nonQualifiedHost" // "blah.com." // We do keyword lookups if the input starts with a question mark. if (uriString.startsWith("?")) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, isPrivateContext ); } // Check for IPs. const userPassword = lazy.userPasswordRegex.exec(uriString); const ipString = userPassword ? uriString.replace(userPassword[2], "") : uriString; if (lazy.IPv4LikeRegex.test(ipString) || lazy.IPv6LikeRegex.test(ipString)) { return false; } // Avoid keyword lookup if we can identify a host and it's known, or ends // with a dot and has some path. // Note that if dnsFirstForSingleWords is true isDomainKnown will always // return true, so we can avoid checking dnsFirstForSingleWords after this. let asciiHost = fixupInfo.fixedURI?.asciiHost; if ( asciiHost && (isDomainKnown(asciiHost) || (asciiHost.endsWith(".") && asciiHost.indexOf(".") != asciiHost.length - 1)) ) { return false; } // Avoid keyword lookup if the url seems to have password. if (fixupInfo.fixedURI?.password) { return false; } // Even if the host is unknown, avoid keyword lookup if the string has // uri-like characteristics, unless it looks like "user@unknownHost". // Note we already excluded passwords at this point. if ( !isURILike(uriString, fixupInfo.fixedURI?.displayHost) || (fixupInfo.fixedURI?.userPass && fixupInfo.fixedURI?.pathQueryRef === "/") ) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, isPrivateContext ); } return false; } /** * Mimics the logic in Services.io.extractScheme, but avoids crossing XPConnect. * This also tries to fixup the scheme if it was clearly mistyped. * @param {string} uriString the string to examine * @param {integer} fixupFlags The original fixup flags * @returns {object} * scheme: a typo fixed scheme or empty string if one could not be identified * fixedSchemeUriString: uri string with a typo fixed scheme * fixupChangedProtocol: true if the scheme is fixed up */ function extractScheme(uriString, fixupFlags = FIXUP_FLAG_NONE) { const matches = uriString.match(lazy.possibleProtocolRegex); const hasColon = matches?.[2] === ":"; const hasSlash2 = matches?.[3] === "//"; const isFixupSchemeTypos = lazy.fixupSchemeTypos && fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS; if ( !matches || (!hasColon && !hasSlash2) || (!hasColon && !isFixupSchemeTypos) ) { return { scheme: "", fixedSchemeUriString: uriString, fixupChangedProtocol: false, }; } let scheme = matches[1].replace("\t", "").toLowerCase(); let fixedSchemeUriString = uriString; if (isFixupSchemeTypos && hasSlash2) { // Fix up typos for string that user would have intented as protocol. const afterProtocol = uriString.substring(matches[0].length); fixedSchemeUriString = `${scheme}://${afterProtocol}`; } let fixupChangedProtocol = false; if (isFixupSchemeTypos) { // Fix up common scheme typos. // TODO: Use levenshtein distance here? fixupChangedProtocol = [ ["ttp", "http"], ["htp", "http"], ["ttps", "https"], ["tps", "https"], ["ps", "https"], ["htps", "https"], ["ile", "file"], ["le", "file"], ].some(([typo, fixed]) => { if (scheme === typo) { scheme = fixed; fixedSchemeUriString = scheme + fixedSchemeUriString.substring(typo.length); return true; } return false; }); } return { scheme, fixedSchemeUriString, fixupChangedProtocol, }; } /** * View-source is a pseudo scheme. We're interested in fixing up the stuff * after it. The easiest way to do that is to call this method again with * the "view-source:" lopped off and then prepend it again afterwards. * @param {string} uriString The original string to fixup * @param {integer} fixupFlags The original fixup flags * @param {nsIInputStream} postData Optional POST data for the search * @returns {object} {preferredURI, postData} The fixed URI and relative postData * @throws if it's not possible to fixup the url */ function fixupViewSource(uriString, fixupFlags) { // We disable keyword lookup and alternate URIs so that small typos don't // cause us to look at very different domains. let newFixupFlags = fixupFlags & ~FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP; let innerURIString = uriString.substring(12).trim(); // Prevent recursion. const { scheme: innerScheme } = extractScheme(innerURIString); if (innerScheme == "view-source") { throw new Components.Exception( "Prevent view-source recursion", Cr.NS_ERROR_FAILURE ); } let info = Services.uriFixup.getFixupURIInfo(innerURIString, newFixupFlags); if (!info.preferredURI) { throw new Components.Exception( "Couldn't build a valid uri", Cr.NS_ERROR_MALFORMED_URI ); } return { preferredURI: Services.io.newURI("view-source:" + info.preferredURI.spec), postData: info.postData, }; } /** * Fixup the host of fixedURI if it contains consecutive dots. * @param {URIFixupInfo} info an URIInfo object */ function fixupConsecutiveDotsHost(fixupInfo) { const uri = fixupInfo.fixedURI; try { if (!uri?.host.includes("..")) { return; } } catch (e) { return; } try { const isPreferredEqualsToFixed = fixupInfo.preferredURI?.equals(uri); fixupInfo.fixedURI = uri .mutate() .setHost(uri.host.replace(/\.+/g, ".")) .finalize(); if (isPreferredEqualsToFixed) { fixupInfo.preferredURI = fixupInfo.fixedURI; } } catch (e) { if (e.result !== Cr.NS_ERROR_MALFORMED_URI) { throw e; } } } /** * Return whether or not given string is uri like. * This function returns true like following strings. * - ":8080" * - "localhost:8080" (if given host is "localhost") * - "/foo?bar" * - "/foo#bar" * @param {string} uriString. * @param {string} host. * @param {boolean} true if uri like. */ function isURILike(uriString, host) { const indexOfSlash = uriString.indexOf("/"); if ( indexOfSlash >= 0 && (indexOfSlash < uriString.indexOf("?", indexOfSlash) || indexOfSlash < uriString.indexOf("#", indexOfSlash)) ) { return true; } if (uriString.startsWith(host)) { uriString = uriString.substring(host.length); } return lazy.portRegex.test(uriString); } /** * Add prefix and suffix to a hostname if both are missing. * * If the host does not start with the prefix, add the prefix to * the hostname. * * By default the prefix and suffix are www. and .com but they could * be any value e.g. www. and .org as they use the preferences * "browser.fixup.alternate.prefix" and "browser.fixup.alternative.suffix" * * If no changes were made, it returns an empty string. * * @param {string} oldHost. * @return {String} Fixed up hostname or an empty string. */ function maybeAddPrefixAndSuffix(oldHost) { let prefix = Services.prefs.getCharPref( "browser.fixup.alternate.prefix", "www." ); let suffix = Services.prefs.getCharPref( "browser.fixup.alternate.suffix", ".com" ); let newHost = ""; let numDots = (oldHost.match(/\./g) || []).length; if (numDots == 0) { newHost = prefix + oldHost + suffix; } else if (numDots == 1) { if (prefix && oldHost == prefix) { newHost = oldHost + suffix; } else if (suffix && !oldHost.startsWith(prefix)) { newHost = prefix + oldHost; } } return newHost ? newHost : oldHost; } /** * Given an instance of URIFixupInfo, update its fixedURI. * * First, change the protocol to the one stored in * "browser.fixup.alternate.protocol". * * Then, try to update fixedURI's host to newHost. * * @param {URIFixupInfo} info. * @param {string} newHost. * @return {boolean} * True, if info was updated without any errors. * False, if NS_ERROR_MALFORMED_URI error. * @throws If a non-NS_ERROR_MALFORMED_URI error occurs. */ function updateHostAndScheme(info, newHost) { let oldHost = info.fixedURI.host; let oldScheme = info.fixedURI.scheme; try { info.fixedURI = info.fixedURI .mutate() .setScheme(lazy.alternateProtocol) .setHost(newHost) .finalize(); } catch (ex) { if (ex.result != Cr.NS_ERROR_MALFORMED_URI) { throw ex; } return false; } if (oldScheme != info.fixedURI.scheme) { info.fixupChangedProtocol = true; } if (oldHost != info.fixedURI.host) { info.fixupCreatedAlternateURI = true; } return true; }