1 files changed, 42 insertions, 26 deletions
diff --git a/browser/actors/SearchSERPTelemetryChild.sys.mjs b/browser/actors/SearchSERPTelemetryChild.sys.mjs
index c760f9a19e..b2b78941ad 100644
--- a/browser/actors/SearchSERPTelemetryChild.sys.mjs
+++ b/browser/actors/SearchSERPTelemetryChild.sys.mjs
@@ -13,13 +13,6 @@ ChromeUtils.defineESModuleGetters(lazy, {
 
 XPCOMUtils.defineLazyPreferenceGetter(
   lazy,
-  "serpEventsEnabled",
-  "browser.search.serpEventTelemetry.enabled",
-  true
-);
-
-XPCOMUtils.defineLazyPreferenceGetter(
-  lazy,
   "serpEventTelemetryCategorization",
   "browser.search.serpEventTelemetryCategorization.enabled",
   false
@@ -1077,7 +1070,11 @@ class DomainExtractor {
           break;
         }
         case "textContent": {
-          this.#fromElementsRetrieveTextContent(elements, extractedDomains);
+          this.#fromElementsRetrieveTextContent(
+            elements,
+            extractedDomains,
+            providerName
+          );
           break;
         }
       }
@@ -1197,8 +1194,26 @@ class DomainExtractor {
    *  A list of elements from the page whose text content we want to inspect.
    * @param {Set<string>} extractedDomains
    *  The result set of domains extracted from the page.
+   * @param {string} providerName
+   *  The name of the search provider.
    */
-  #fromElementsRetrieveTextContent(elements, extractedDomains) {
+  #fromElementsRetrieveTextContent(elements, extractedDomains, providerName) {
+    // Not an exhaustive regex, but it fits our purpose for this method.
+    const LOOSE_URL_REGEX =
+      /^(?:https?:\/\/)?(?:www\.)?(?:[\w\-]+\.)+(?:[\w\-]{2,})/i;
+
+    // Known but acceptable limitations to this function, where the return
+    // value won't be correctly fixed up:
+    //   1) A url is embedded within other text. Ex: "xkcd.com is cool."
+    //   2) The url contains legal but unusual characters. Ex: $ ! * '
+    function fixup(textContent) {
+      return textContent
+        .toLowerCase()
+        .replaceAll(" ", "")
+        .replace(/\.$/, "")
+        .concat(".com");
+    }
+
     for (let element of elements) {
       if (this.#exceedsThreshold(extractedDomains.size)) {
         return;
@@ -1209,18 +1224,24 @@ class DomainExtractor {
       }
 
       let domain;
-      try {
-        domain = new URL(textContent).hostname;
-      } catch (e) {
-        domain = textContent.toLowerCase().replaceAll(" ", "");
-        // If the attempt to turn the text content into a URL object only fails
-        // because we're missing a protocol, ".com" may already be present.
-        if (!domain.endsWith(".com")) {
-          domain = domain.concat(".com");
+      if (LOOSE_URL_REGEX.test(textContent)) {
+        // Creating a new URL object will throw if the protocol is missing.
+        if (!/^https?:\/\//.test(textContent)) {
+          textContent = "https://" + textContent;
         }
+
+        try {
+          domain = new URL(textContent).hostname;
+        } catch (e) {
+          domain = fixup(textContent);
+        }
+      } else {
+        domain = fixup(textContent);
       }
-      if (!extractedDomains.has(domain)) {
-        extractedDomains.add(domain);
+
+      let processedDomain = this.#processDomain(domain, providerName);
+      if (processedDomain && !extractedDomains.has(processedDomain)) {
+        extractedDomains.add(processedDomain);
       }
     }
   }
@@ -1368,7 +1389,6 @@ export class SearchSERPTelemetryChild extends JSWindowActorChild {
     }
 
     if (
-      lazy.serpEventsEnabled &&
       providerInfo.components?.length &&
       (eventType == "load" || eventType == "pageshow")
     ) {
@@ -1496,17 +1516,13 @@ export class SearchSERPTelemetryChild extends JSWindowActorChild {
         // so that we remain consistent with the *.in-content:sap* count for the
         // SEARCH_COUNTS histogram.
         if (event.persisted) {
+          this.#checkForPageImpressionComponents();
           this.#check(event.type);
-          if (lazy.serpEventsEnabled) {
-            this.#checkForPageImpressionComponents();
-          }
         }
         break;
       }
       case "DOMContentLoaded": {
-        if (lazy.serpEventsEnabled) {
-          this.#checkForPageImpressionComponents();
-        }
+        this.#checkForPageImpressionComponents();
         this.#check(event.type);
         break;
       }