summaryrefslogtreecommitdiffstats
path: root/browser/components/attribution/AttributionCode.sys.mjs
blob: 354270c13a5f99b0284a9d077abb84b9914376b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**
 * This is a policy object used to override behavior for testing.
 */
export const AttributionIOUtils = {
  write: async (path, bytes) => IOUtils.write(path, bytes),
  read: async path => IOUtils.read(path),
  readUTF8: async path => IOUtils.readUTF8(path),
  exists: async path => IOUtils.exists(path),
};

import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
import { AppConstants } from "resource://gre/modules/AppConstants.sys.mjs";

const lazy = {};
ChromeUtils.defineESModuleGetters(lazy, {
  MacAttribution: "resource:///modules/MacAttribution.sys.mjs",
});
XPCOMUtils.defineLazyGetter(lazy, "log", () => {
  let { ConsoleAPI } = ChromeUtils.importESModule(
    "resource://gre/modules/Console.sys.mjs"
  );
  let consoleOptions = {
    // tip: set maxLogLevel to "debug" and use lazy.log.debug() to create
    // detailed messages during development. See LOG_LEVELS in Console.sys.mjs
    // for details.
    maxLogLevel: "error",
    maxLogLevelPref: "browser.attribution.loglevel",
    prefix: "AttributionCode",
  };
  return new ConsoleAPI(consoleOptions);
});

// This maximum length was originally based on how much space we have in the PE
// file header that we store attribution codes in for full and stub installers.
// Windows Store builds instead use a "Campaign ID" passed through URLs to send
// attribution information, which Microsoft's documentation claims must be no
// longer than 100 characters. In our own testing, we've been able to retrieve
// the first 208 characters of the Campaign ID. Either way, the "max" length
// for Microsoft Store builds is much lower than this limit implies.
const ATTR_CODE_MAX_LENGTH = 1010;
const ATTR_CODE_VALUE_REGEX = /[a-zA-Z0-9_%\\-\\.\\(\\)]*/;
const ATTR_CODE_FIELD_SEPARATOR = "%26"; // URL-encoded &
const ATTR_CODE_KEY_VALUE_SEPARATOR = "%3D"; // URL-encoded =
const ATTR_CODE_KEYS = [
  "source",
  "medium",
  "campaign",
  "content",
  "experiment",
  "variation",
  "ua",
  "dltoken",
  "msstoresignedin",
  "dlsource",
];

let gCachedAttrData = null;

export var AttributionCode = {
  /**
   * Wrapper to pull campaign IDs from MSIX builds.
   * This function solely exists to make it easy to mock out for tests.
   */
  get msixCampaignId() {
    return Cc["@mozilla.org/windows-package-manager;1"]
      .createInstance(Ci.nsIWindowsPackageManager)
      .getCampaignId();
  },

  /**
   * Returns a platform-specific nsIFile for the file containing the attribution
   * data, or null if the current platform does not support (caching)
   * attribution data.
   */
  get attributionFile() {
    if (AppConstants.platform == "win") {
      let file = Services.dirsvc.get("GreD", Ci.nsIFile);
      file.append("postSigningData");
      return file;
    } else if (AppConstants.platform == "macosx") {
      // There's no `UpdRootD` in xpcshell tests.  Some existing tests override
      // it, which is onerous and difficult to share across tests.  When testing,
      // if it's not defined, fallback to a nested subdirectory of the xpcshell
      // temp directory.  Nesting more closely replicates the situation where the
      // update directory does not (yet) exist, testing a scenario witnessed in
      // development.
      let file;
      try {
        file = Services.dirsvc.get("UpdRootD", Ci.nsIFile);
      } catch (ex) {
        // It's most common to test for the profile dir, even though we actually
        // are using the temp dir.
        if (
          ex instanceof Ci.nsIException &&
          ex.result == Cr.NS_ERROR_FAILURE &&
          Services.env.exists("XPCSHELL_TEST_PROFILE_DIR")
        ) {
          let path = Services.env.get("XPCSHELL_TEST_TEMP_DIR");
          file = Cc["@mozilla.org/file/local;1"].createInstance(Ci.nsIFile);
          file.initWithPath(path);
          file.append("nested_UpdRootD_1");
          file.append("nested_UpdRootD_2");
        } else {
          throw ex;
        }
      }
      file.append("macAttributionData");
      return file;
    }

    return null;
  },

  /**
   * Write the given attribution code to the attribution file.
   * @param {String} code to write.
   */
  async writeAttributionFile(code) {
    // Writing attribution files is only used as part of test code, and Mac
    // attribution, so bailing here for MSIX builds is no big deal.
    if (
      AppConstants.platform === "win" &&
      Services.sysinfo.getProperty("hasWinPackageId")
    ) {
      Services.console.logStringMessage(
        "Attribution code cannot be written for MSIX builds, aborting."
      );
      return;
    }
    let file = AttributionCode.attributionFile;
    await IOUtils.makeDirectory(file.parent.path);
    let bytes = new TextEncoder().encode(code);
    await AttributionIOUtils.write(file.path, bytes);
  },

  /**
   * Returns an array of allowed attribution code keys.
   */
  get allowedCodeKeys() {
    return [...ATTR_CODE_KEYS];
  },

  /**
   * Returns an object containing a key-value pair for each piece of attribution
   * data included in the passed-in attribution code string.
   * If the string isn't a valid attribution code, returns an empty object.
   */
  parseAttributionCode(code) {
    if (code.length > ATTR_CODE_MAX_LENGTH) {
      return {};
    }

    let isValid = true;
    let parsed = {};
    for (let param of code.split(ATTR_CODE_FIELD_SEPARATOR)) {
      let [key, value] = param.split(ATTR_CODE_KEY_VALUE_SEPARATOR, 2);
      if (key && ATTR_CODE_KEYS.includes(key)) {
        if (value && ATTR_CODE_VALUE_REGEX.test(value)) {
          if (key === "msstoresignedin") {
            if (value === "true") {
              parsed[key] = true;
            } else if (value === "false") {
              parsed[key] = false;
            } else {
              throw new Error("Couldn't parse msstoresignedin");
            }
          } else {
            parsed[key] = value;
          }
        }
      } else {
        lazy.log.debug(
          `parseAttributionCode: "${code}" => isValid = false: "${key}", "${value}"`
        );
        isValid = false;
        break;
      }
    }

    if (isValid) {
      return parsed;
    }

    Services.telemetry
      .getHistogramById("BROWSER_ATTRIBUTION_ERRORS")
      .add("decode_error");

    return {};
  },

  /**
   * Returns an object containing a key-value pair for each piece of attribution
   * data included in the passed-in URL containing a query string encoding an
   * attribution code.
   *
   * We have less control of the attribution codes on macOS so we accept more
   * URLs than we accept attribution codes on Windows.
   *
   * If the URL is empty, returns an empty object.
   *
   * If the URL doesn't parse, throws.
   */
  parseAttributionCodeFromUrl(url) {
    if (!url) {
      return {};
    }

    let parsed = {};

    let params = new URL(url).searchParams;
    for (let key of ATTR_CODE_KEYS) {
      // We support the key prefixed with utm_ or not, but intentionally
      // choose non-utm params over utm params.
      for (let paramKey of [`utm_${key}`, `funnel_${key}`, key]) {
        if (params.has(paramKey)) {
          // We expect URI-encoded components in our attribution codes.
          let value = encodeURIComponent(params.get(paramKey));
          if (value && ATTR_CODE_VALUE_REGEX.test(value)) {
            parsed[key] = value;
          }
        }
      }
    }

    return parsed;
  },

  /**
   * Returns a string serializing the given attribution data.
   *
   * It is expected that the given values are already URL-encoded.
   */
  serializeAttributionData(data) {
    // Iterating in this way makes the order deterministic.
    let s = "";
    for (let key of ATTR_CODE_KEYS) {
      if (key in data) {
        let value = data[key];
        if (s) {
          s += ATTR_CODE_FIELD_SEPARATOR; // URL-encoded &
        }
        s += `${key}${ATTR_CODE_KEY_VALUE_SEPARATOR}${value}`; // URL-encoded =
      }
    }
    return s;
  },

  /**
   * Reads the attribution code, either from disk or a cached version.
   * Returns a promise that fulfills with an object containing the parsed
   * attribution data if the code could be read and is valid,
   * or an empty object otherwise.
   *
   * On windows the attribution service converts utm_* keys, removing "utm_".
   * On OSX the attributions are set directly on download and retain "utm_".  We
   * strip "utm_" while retrieving the params.
   */
  async getAttrDataAsync() {
    if (gCachedAttrData != null) {
      lazy.log.debug(
        `getAttrDataAsync: attribution is cached: ${JSON.stringify(
          gCachedAttrData
        )}`
      );
      return gCachedAttrData;
    }

    gCachedAttrData = {};
    let attributionFile = this.attributionFile;
    if (!attributionFile) {
      // This platform doesn't support attribution.
      lazy.log.debug(
        `getAttrDataAsync: no attribution (attributionFile is null)`
      );
      return gCachedAttrData;
    }

    if (
      AppConstants.platform == "macosx" &&
      !(await AttributionIOUtils.exists(attributionFile.path))
    ) {
      lazy.log.debug(
        `getAttrDataAsync: macOS && !exists("${attributionFile.path}")`
      );

      // On macOS, we fish the attribution data from the system quarantine DB.
      try {
        let referrer = await lazy.MacAttribution.getReferrerUrl();
        lazy.log.debug(
          `getAttrDataAsync: macOS attribution getReferrerUrl: "${referrer}"`
        );

        gCachedAttrData = this.parseAttributionCodeFromUrl(referrer);
      } catch (ex) {
        // Avoid partial attribution data.
        gCachedAttrData = {};

        // No attributions.  Just `warn` 'cuz this isn't necessarily an error.
        lazy.log.warn("Caught exception fetching macOS attribution codes!", ex);

        if (
          ex instanceof Ci.nsIException &&
          ex.result == Cr.NS_ERROR_UNEXPECTED
        ) {
          // Bad quarantine data.
          Services.telemetry
            .getHistogramById("BROWSER_ATTRIBUTION_ERRORS")
            .add("quarantine_error");
        }
      }

      lazy.log.debug(
        `macOS attribution data is ${JSON.stringify(gCachedAttrData)}`
      );

      // We only want to try to fetch the referrer from the quarantine
      // database once on macOS.
      try {
        let code = this.serializeAttributionData(gCachedAttrData);
        lazy.log.debug(`macOS attribution data serializes as "${code}"`);
        await this.writeAttributionFile(code);
      } catch (ex) {
        lazy.log.debug(
          `Caught exception writing "${attributionFile.path}"`,
          ex
        );
        Services.telemetry
          .getHistogramById("BROWSER_ATTRIBUTION_ERRORS")
          .add("write_error");
        return gCachedAttrData;
      }

      lazy.log.debug(
        `Returning after successfully writing "${attributionFile.path}"`
      );
      return gCachedAttrData;
    }

    lazy.log.debug(
      `getAttrDataAsync: !macOS || !exists("${attributionFile.path}")`
    );

    let bytes;
    try {
      if (
        AppConstants.platform === "win" &&
        Services.sysinfo.getProperty("hasWinPackageId")
      ) {
        // This comes out of windows-package-manager _not_ URL encoded or in an ArrayBuffer,
        // but the parsing code wants it that way. It's easier to just provide that
        // than have the parsing code support both.
        lazy.log.debug(
          `winPackageFamilyName is: ${Services.sysinfo.getProperty(
            "winPackageFamilyName"
          )}`
        );
        let encoder = new TextEncoder();
        bytes = encoder.encode(encodeURIComponent(this.msixCampaignId));
      } else {
        bytes = await AttributionIOUtils.read(attributionFile.path);
      }
    } catch (ex) {
      if (DOMException.isInstance(ex) && ex.name == "NotFoundError") {
        lazy.log.debug(
          `getAttrDataAsync: !exists("${
            attributionFile.path
          }"), returning ${JSON.stringify(gCachedAttrData)}`
        );
        return gCachedAttrData;
      }
      lazy.log.debug(
        `other error trying to read attribution data:
          attributionFile.path is: ${attributionFile.path}`
      );
      lazy.log.debug("Full exception is:");
      lazy.log.debug(ex);

      Services.telemetry
        .getHistogramById("BROWSER_ATTRIBUTION_ERRORS")
        .add("read_error");
    }
    if (bytes) {
      try {
        let decoder = new TextDecoder();
        let code = decoder.decode(bytes);
        lazy.log.debug(
          `getAttrDataAsync: attribution bytes deserializes to ${code}`
        );
        if (AppConstants.platform == "macosx" && !code) {
          // On macOS, an empty attribution code is fine.  (On Windows, that
          // means the stub/full installer has been incorrectly attributed,
          // which is an error.)
          return gCachedAttrData;
        }

        gCachedAttrData = this.parseAttributionCode(code);
        lazy.log.debug(
          `getAttrDataAsync: ${code} parses to ${JSON.stringify(
            gCachedAttrData
          )}`
        );
      } catch (ex) {
        // TextDecoder can throw an error
        Services.telemetry
          .getHistogramById("BROWSER_ATTRIBUTION_ERRORS")
          .add("decode_error");
      }
    }

    return gCachedAttrData;
  },

  /**
   * Return the cached attribution data synchronously without hitting
   * the disk.
   * @returns A dictionary with the attribution data if it's available,
   *          null otherwise.
   */
  getCachedAttributionData() {
    return gCachedAttrData;
  },

  /**
   * Deletes the attribution data file.
   * Returns a promise that resolves when the file is deleted,
   * or if the file couldn't be deleted (the promise is never rejected).
   */
  async deleteFileAsync() {
    try {
      await IOUtils.remove(this.attributionFile.path);
    } catch (ex) {
      // The attribution file may already have been deleted,
      // or it may have never been installed at all;
      // failure to delete it isn't an error.
    }
  },

  /**
   * Clears the cached attribution code value, if any.
   * Does nothing if called from outside of an xpcshell test.
   */
  _clearCache() {
    if (Services.env.exists("XPCSHELL_TEST_PROFILE_DIR")) {
      gCachedAttrData = null;
    }
  },
};