/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /** * This file has classes to combine New Tab feature events (aggregated from a sqlLite table) into an interest model. */ import { FORMAT, AggregateResultKeys, SPECIAL_FEATURE_CLICK, } from "resource://newtab/lib/InferredModel/InferredConstants.sys.mjs"; export const DAYS_TO_MS = 60 * 60 * 24 * 1000; const MAX_INT_32 = 2 ** 32; /** * Divides numerator fields by the denominator. Value is set to 0 if denominator is missing or 0. * @param {Object.} numerator * @param {Object.} denominator * returns {Object.} */ function divideDict(numerator, denominator) { const result = {}; Object.keys(numerator).forEach(k => { result[k] = denominator[k] ? numerator[k] / denominator[k] : 0; }); return result; } /** * Returns a secure random value between 0 and 1 */ function secureRandomNumber() { const array = new Uint32Array(1); crypto.getRandomValues(array); return array[0] / MAX_INT_32; } /** * Applies laplace noise at a given scale * @param {number} scale value * @returns noisy value */ function laplaceNoise(scale) { const u = secureRandomNumber() - 0.5; return -scale * Math.sign(u) * Math.log(1 - 2 * Math.abs(u)); } /** * Unary encoding with randomized response for differential privacy. * The output must be decoded to back to an integer when aggregating a historgram on a server * @param {number} x - Integer input (0 <= x < N) * @param {number} N - Number of values (see ablove) * @param {number} p - Probability of keeping a 1-bit as 1 (after one-hot encoding the output) * @param {number} q - Probability of flipping a 0-bit to 1 * @returns {string} - Bitstring after unary encoding and randomized response */ export function unaryEncodeDiffPrivacy(x, N, p, q) { const bitstring = []; const randomValues = new Uint32Array(N); crypto.getRandomValues(randomValues); for (let i = 0; i < N; i++) { const trueBit = i === x ? 1 : 0; const rand = randomValues[i] / MAX_INT_32; if (trueBit === 1) { bitstring.push(rand <= p ? "1" : "0"); } else { bitstring.push(rand <= q ? "1" : "0"); } } return bitstring.join(""); } /** * Adds value to all a particular key in a dictionary. If the key is missing it sets the value. * @param {Object} dict - The dictionary to modify. * @param {string} key - The key whose value should be added or set. * @param {number} value - The value to add to the key. */ export function dictAdd(dict, key, value) { if (key in dict) { dict[key] += value; } else { dict[key] = value; } } /** * Apply function to all keys in dictionary, returning new dictionary. * @param {Object} obj - The object whose values should be transformed. * @param {Function} fn - The function to apply to each value. * @returns {Object} A new object with the transformed values. */ export function dictApply(obj, fn) { return Object.fromEntries( Object.entries(obj).map(([key, value]) => [key, fn(value)]) ); } /** * Class for re-scaling events based on time passed. */ export class DayTimeWeighting { /** * Instantiate class based on a series of day periods in the past. * @param {int[]} pastDays Series of number of days, indicating days ago intervals in reverse chonological order. * Intervals are added: If the first value is 1 and the second is 5, then the first inteval is 0-1 and second interval is between 1 and 6. * @param {number[]} relativeWeight Relative weight of each period. Must be same length as pastDays */ constructor(pastDays, relativeWeight) { this.pastDays = pastDays; this.relativeWeight = relativeWeight; } static fromJSON(json) { return new DayTimeWeighting(json.days, json.relative_weight); } /** * Get a series of interval pairs in the past based on the pastDays. * @param {number} curTimeMs Base time time in MS. Usually current time. * @returns */ getDateIntervals(curTimeMs) { let curEndTime = curTimeMs; const res = this.pastDays.map(daysAgo => { const start = new Date(curEndTime - daysAgo * DAYS_TO_MS); const end = new Date(curEndTime); curEndTime = start; return { start, end }; }); return res; } /** * Get relative weight of current index. * @param {int} weightIndex Index * @returns {number} Weight at index, or 0 if index out of range. */ getRelativeWeight(weightIndex) { if (weightIndex >= this.pastDays.length) { return 0; } return this.relativeWeight[weightIndex]; } } /** * Describes the mapping from a set of aggregated events to a single interest feature */ export class InterestFeatures { constructor( name, featureWeights, thresholds = null, diff_p = 0.5, diff_q = 0.5 ) { this.name = name; this.featureWeights = featureWeights; // Thresholds must be in ascending order this.thresholds = thresholds; this.diff_p = diff_p; this.diff_q = diff_q; } static fromJSON(name, json) { return new InterestFeatures( name, json.features, json.thresholds || null, json.diff_p, json.diff_q ); } /** * Quantize a feature value based on the thresholds specified in the class. * @param {number} inValue Value computed by model for the feature. * @returns Quantized value. A value between 0 and number of thresholds specified (inclusive) */ applyThresholds(inValue) { if (!this.thresholds) { return inValue; } for (let k = 0; k < this.thresholds.length; k++) { if (inValue < this.thresholds[k]) { return k; } } return this.thresholds.length; } /** * Applies Differential Privacy Unary Encoding method, outputting a one-hot encoded vector with randomizaiton. * Accurate historgrams of values can be computed with reasonable accuracy. * If the class has no or 0 p/q values set for differential privacy, then response is original number non-encoded. * @param {number} inValue Value to randomize * @returns Bitfield as a string, that is the same as the thresholds length + 1 */ applyDifferentialPrivacy(inValue) { if (!this.thresholds || !this.diff_p) { return inValue; } return unaryEncodeDiffPrivacy( inValue, this.thresholds.length + 1, this.diff_p, this.diff_q ); } } /** * Manages relative tile importance */ export class TileImportance { constructor(tileImportanceMappings) { this.mappings = {}; for (const [formatKey, formatId] of Object.entries(FORMAT)) { if (formatKey in tileImportanceMappings) { this.mappings[formatId] = tileImportanceMappings[formatKey]; } } } getRelativeCTRForTile(tileType) { return this.mappings[tileType] || 1; } static fromJSON(json) { return new TileImportance(json); } } /*** * A simple model for aggregating features */ export class FeatureModel { /** * * @param {string} modelId * @param {Object} dayTimeWeighting Data for day time weighting class * @param {Object} interestVectorModel Data for interest model * @param {Object} tileImportance Data for tile importance * @param {boolean} rescale Whether to rescale to max value * @param {boolean} logScale Whether to apply natural log (ln(x+ 1)) before rescaling */ constructor({ modelId, dayTimeWeighting, interestVectorModel, tileImportance, modelType, rescale = true, logScale = false, noiseScale = 0, laplaceNoiseFn = laplaceNoise, }) { this.modelId = modelId; this.tileImportance = tileImportance; this.dayTimeWeighting = dayTimeWeighting; this.interestVectorModel = interestVectorModel; this.rescale = rescale; this.logScale = logScale; this.modelType = modelType; this.noiseScale = noiseScale; this.laplaceNoiseFn = laplaceNoiseFn; } static fromJSON(json) { const dayTimeWeighting = DayTimeWeighting.fromJSON(json.day_time_weighting); const interestVectorModel = {}; const tileImportance = TileImportance.fromJSON(json.tile_importance || {}); for (const [name, featureJson] of Object.entries(json.interest_vector)) { interestVectorModel[name] = InterestFeatures.fromJSON(name, featureJson); } return new FeatureModel({ dayTimeWeighting, tileImportance, interestVectorModel, normalize: json.normalize, rescale: json.rescale, logScale: json.log_scale, clickScale: json.clickScale, modelType: json.model_type, noiseScale: json.noise_scale, }); } supportsCoarseInterests() { return Object.values(this.interestVectorModel).every( fm => fm.thresholds && fm.thresholds.length ); } supportsCoarsePrivateInterests() { return Object.values(this.interestVectorModel).every( fm => fm.thresholds && fm.thresholds.length && "diff_p" in fm && "diff_q" in fm ); } /** * Return date intervals for the query */ getDateIntervals(curTimeMs) { return this.dayTimeWeighting.getDateIntervals(curTimeMs); } /** * Computes an interest vector or aggregate based on the model and raw sql inout. * @param {Object} config * @param {Array.>} config.dataForIntervals Raw aggregate output from SQL query. Could be clicks or impressions * @param {Object.} config.indexSchema Map of keys to indices in each sub-array in dataForIntervals * @param {boolean} [config.applyThresholding=false] Whether to apply thresholds * @param {boolean} [config.applyDifferntialPrivacy=false] Whether to apply differential privacy. This will be used for sending to telemetry. * @returns */ computeInterestVector({ dataForIntervals, indexSchema, applyThresholding = false, applyDifferentialPrivacy = false, }) { const processedPerTimeInterval = dataForIntervals.map( (intervalData, idx) => { const intervalRawTotal = {}; const perPeriodTotals = {}; intervalData.forEach(aggElement => { const feature = aggElement[indexSchema[AggregateResultKeys.FEATURE]]; let value = aggElement[indexSchema[AggregateResultKeys.VALUE]]; // In the future we could support format here dictAdd(intervalRawTotal, feature, value); }); const weight = this.dayTimeWeighting.getRelativeWeight(idx); // Weight for this time interval Object.values(this.interestVectorModel).forEach(interestFeature => { for (const featureUsed of Object.keys( interestFeature.featureWeights )) { if (featureUsed in intervalRawTotal) { dictAdd( perPeriodTotals, interestFeature.name, intervalRawTotal[featureUsed] * weight * interestFeature.featureWeights[featureUsed] ); } } }); return perPeriodTotals; } ); // Since we are doing linear combinations, it is fine to do the day-time weighting at this step let totalResults = {}; processedPerTimeInterval.forEach(intervalTotals => { for (const key of Object.keys(intervalTotals)) { dictAdd(totalResults, key, intervalTotals[key]); } }); let numClicks = -1; // If clicks is a feature, it's handled as special case if (SPECIAL_FEATURE_CLICK in totalResults) { numClicks = totalResults[SPECIAL_FEATURE_CLICK]; delete totalResults[SPECIAL_FEATURE_CLICK]; } if (this.logScale) { totalResults = dictApply(totalResults, x => Math.log(x + 1)); } if (this.rescale) { let divisor = Math.max(...Object.values(totalResults)); if (divisor <= 0.001) { divisor = 0.001; } totalResults = dictApply(totalResults, x => x / divisor); } if (this.clickScale && numClicks > 0) { totalResults = dictApply(totalResults, x => x / numClicks); } if (numClicks >= 0) { totalResults[SPECIAL_FEATURE_CLICK] = numClicks; } if (applyThresholding) { if (applyDifferentialPrivacy) { // Zero values need to be shown so they can be randomized Object.values(this.interestVectorModel).forEach(interestFeature => { if (!(interestFeature.name in totalResults)) { totalResults[interestFeature.name] = 0; } }); } for (const key of Object.keys(totalResults)) { if (key in this.interestVectorModel) { totalResults[key] = this.interestVectorModel[key].applyThresholds( totalResults[key], applyDifferentialPrivacy ); if (applyDifferentialPrivacy) { totalResults[key] = this.interestVectorModel[ key ].applyDifferentialPrivacy( totalResults[key], applyDifferentialPrivacy ); } } } } return totalResults; } /** * Given pre-computed inferredInterests for clicks and impressions, returns a ctr result with * @param {Object} clickDict clicks dictionary * @param {Object} impressionDict impression dictionary * @param {String} model_id Model ID * @returns model */ computeCTRInterestVectors(clickDict, impressionDict, model_id) { const inferredInterests = divideDict(clickDict, impressionDict); this.applyLaplaceNoise(inferredInterests); return { ...inferredInterests, model_id }; } /** * Applies laplace noise to values in a dictionary if specified in the model * @param {Object} inputDict * @returns */ applyLaplaceNoise(inputDict) { if (!this.noiseScale) { return; } for (const key in inputDict) { if (typeof inputDict[key] === "number") { const noise = this.laplaceNoiseFn(this.noiseScale); inputDict[key] += noise; } } } /** * Computes the interest vector for data intervals, as well as the coarse and privatized (with randomess) */ computeInterestVectors({ dataForIntervals, indexSchema, model_id = "unknown", condensePrivateValues = true, }) { const result = {}; let inferredInterests; let coarseInferredInterests; let coarsePrivateInferredInterests; inferredInterests = this.computeInterestVector({ dataForIntervals, indexSchema, }); const updatedFuzzyInterests = { ...inferredInterests }; this.applyLaplaceNoise(updatedFuzzyInterests); result.inferredInterests = { ...updatedFuzzyInterests, model_id }; if (this.supportsCoarseInterests()) { coarseInferredInterests = this.computeInterestVector({ dataForIntervals, indexSchema, applyThresholding: true, }); if (coarseInferredInterests) { result.coarseInferredInterests = { ...coarseInferredInterests, model_id, }; } } if (this.supportsCoarsePrivateInterests()) { coarsePrivateInferredInterests = this.computeInterestVector({ dataForIntervals, indexSchema, applyThresholding: true, applyDifferentialPrivacy: true, }); if (coarsePrivateInferredInterests) { if (condensePrivateValues) { result.coarsePrivateInferredInterests = { // Key order preserved in Gecko values: Object.values(coarsePrivateInferredInterests), model_id, }; } else { result.coarsePrivateInferredInterests = { ...coarsePrivateInferredInterests, model_id, }; } } } return result; } }